solve task execution issues (#90)
This commit is contained in:
@@ -1,4 +1,4 @@
|
||||
|
||||
import copy
|
||||
|
||||
from nltk.stem import PorterStemmer
|
||||
stemmer = PorterStemmer()
|
||||
@@ -80,6 +80,20 @@ def tokenize(d, t, eng):
|
||||
d["content_sm_ltks"] = huqie.qieqie(d["content_ltks"])
|
||||
|
||||
|
||||
def tokenize_table(tbls, doc, eng, batch_size=10):
|
||||
res = []
|
||||
# add tables
|
||||
for img, rows in tbls:
|
||||
de = "; " if eng else "; "
|
||||
for i in range(0, len(rows), batch_size):
|
||||
d = copy.deepcopy(doc)
|
||||
r = de.join(rows[i:i + batch_size])
|
||||
tokenize(d, r, eng)
|
||||
d["image"] = img
|
||||
res.append(d)
|
||||
return res
|
||||
|
||||
|
||||
def remove_contents_table(sections, eng=False):
|
||||
i = 0
|
||||
while i < len(sections):
|
||||
@@ -201,10 +215,12 @@ def naive_merge(sections, chunk_token_num=128, delimiter="\n。;!?"):
|
||||
tnum = num_tokens_from_string(t)
|
||||
if tnum < 8: pos = ""
|
||||
if tk_nums[-1] > chunk_token_num:
|
||||
cks.append(t + pos)
|
||||
if t.find(pos) < 0: t += pos
|
||||
cks.append(t)
|
||||
tk_nums.append(tnum)
|
||||
else:
|
||||
cks[-1] += t + pos
|
||||
if cks[-1].find(pos) < 0: t += pos
|
||||
cks[-1] += t
|
||||
tk_nums[-1] += tnum
|
||||
|
||||
for sec, pos in sections:
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import json
|
||||
import re
|
||||
from copy import deepcopy
|
||||
|
||||
from elasticsearch_dsl import Q, Search
|
||||
from typing import List, Optional, Dict, Union
|
||||
from dataclasses import dataclass
|
||||
@@ -98,7 +100,7 @@ class Dealer:
|
||||
del s["highlight"]
|
||||
q_vec = s["knn"]["query_vector"]
|
||||
es_logger.info("【Q】: {}".format(json.dumps(s)))
|
||||
res = self.es.search(s, idxnm=idxnm, timeout="600s", src=src)
|
||||
res = self.es.search(deepcopy(s), idxnm=idxnm, timeout="600s", src=src)
|
||||
es_logger.info("TOTAL: {}".format(self.es.getTotal(res)))
|
||||
if self.es.getTotal(res) == 0 and "knn" in s:
|
||||
bqry, _ = self.qryr.question(qst, min_match="10%")
|
||||
|
||||
Reference in New Issue
Block a user