Fix batch size issue. (#3675)

### What problem does this PR solve?

#3657

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
This commit is contained in:
Kevin Hu
2024-11-27 18:06:43 +08:00
committed by GitHub
parent 535b15ace9
commit 57208d8e53
3 changed files with 20 additions and 20 deletions

View File

@@ -54,7 +54,7 @@ class FulltextQueryer:
def rmWWW(txt):
patts = [
(
r"是*(什么样的|哪家|一下|那家|请问|啥样|咋样了|什么时候|何时|何地|何人|是否|是不是|多少|哪里|怎么|哪儿|怎么样|如何|哪些|是啥|啥是|啊|吗|呢|吧|咋|什么|有没有|呀)是*",
r"是*(什么样的|哪家|一下|那家|请问|啥样|咋样了|什么时候|何时|何地|何人|是否|是不是|多少|哪里|怎么|哪儿|怎么样|如何|哪些|是啥|啥是|啊|吗|呢|吧|咋|什么|有没有|呀|谁|哪位|哪个)是*",
"",
),
(r"(^| )(what|who|how|which|where|why)('re|'s)? ", " "),

View File

@@ -228,7 +228,7 @@ class Dealer:
idf2 = np.array([idf(df(t), 1000000000) for t in tks])
wts = (0.3 * idf1 + 0.7 * idf2) * \
np.array([ner(t) * postag(t) for t in tks])
wts = [math.pow(s, 2) for s in wts]
wts = [s for s in wts]
tw = list(zip(tks, wts))
else:
for tk in tks:
@@ -237,7 +237,7 @@ class Dealer:
idf2 = np.array([idf(df(t), 1000000000) for t in tt])
wts = (0.3 * idf1 + 0.7 * idf2) * \
np.array([ner(t) * postag(t) for t in tt])
wts = [math.pow(s, 2) for s in wts]
wts = [s for s in wts]
tw.extend(zip(tt, wts))
S = np.sum([s for _, s in tw])