build dialog server; add thumbnail to docinfo; (#17)
This commit is contained in:
@@ -6,11 +6,10 @@ from tornado.ioloop import IOLoop
|
||||
from tornado.httpserver import HTTPServer
|
||||
from tornado.options import define,options
|
||||
from util import es_conn, setup_logging
|
||||
from svr import sec_search as search
|
||||
from svr.rpc_proxy import RPCProxy
|
||||
from sklearn.metrics.pairwise import cosine_similarity as CosineSimilarity
|
||||
from nlp import huqie
|
||||
from nlp import query as Query
|
||||
from nlp import search
|
||||
from llm import HuEmbedding, GptTurbo
|
||||
import numpy as np
|
||||
from io import BytesIO
|
||||
@@ -38,7 +37,7 @@ def get_QA_pairs(hists):
|
||||
|
||||
|
||||
|
||||
def get_instruction(sres, top_i, max_len=8096 fld="content_ltks"):
|
||||
def get_instruction(sres, top_i, max_len=8096, fld="content_ltks"):
|
||||
max_len //= len(top_i)
|
||||
# add instruction to prompt
|
||||
instructions = [re.sub(r"[\r\n]+", " ", sres.field[sres.ids[i]][fld]) for i in top_i]
|
||||
@@ -96,10 +95,11 @@ class Handler(RequestHandler):
|
||||
try:
|
||||
question = param.get("history",[{"user": "Hi!"}])[-1]["user"]
|
||||
res = SE.search({
|
||||
"question": question,
|
||||
"kb_ids": param.get("kb_ids", []),
|
||||
"size": param.get("topn", 15)
|
||||
})
|
||||
"question": question,
|
||||
"kb_ids": param.get("kb_ids", []),
|
||||
"size": param.get("topn", 15)},
|
||||
search.index_name(param["uid"])
|
||||
)
|
||||
|
||||
sim = SE.rerank(res, question)
|
||||
rk_idx = np.argsort(sim*-1)
|
||||
@@ -112,12 +112,12 @@ class Handler(RequestHandler):
|
||||
refer = OrderedDict()
|
||||
docnms = {}
|
||||
for i in rk_idx:
|
||||
did = res.field[res.ids[i]]["doc_id"])
|
||||
if did not in docnms: docnms[did] = res.field[res.ids[i]]["docnm_kwd"])
|
||||
did = res.field[res.ids[i]]["doc_id"]
|
||||
if did not in docnms: docnms[did] = res.field[res.ids[i]]["docnm_kwd"]
|
||||
if did not in refer: refer[did] = []
|
||||
refer[did].append({
|
||||
"chunk_id": res.ids[i],
|
||||
"content": res.field[res.ids[i]]["content_ltks"]),
|
||||
"content": res.field[res.ids[i]]["content_ltks"],
|
||||
"image": ""
|
||||
})
|
||||
|
||||
@@ -128,7 +128,7 @@ class Handler(RequestHandler):
|
||||
"data":{
|
||||
"uid": param["uid"],
|
||||
"dialog_id": param["dialog_id"],
|
||||
"assistant": ans
|
||||
"assistant": ans,
|
||||
"refer": [{
|
||||
"did": did,
|
||||
"doc_name": docnms[did],
|
||||
@@ -153,7 +153,7 @@ if __name__ == '__main__':
|
||||
parser.add_argument("--port", default=4455, type=int, help="Port used for service")
|
||||
ARGS = parser.parse_args()
|
||||
|
||||
SE = search.ResearchReportSearch(es_conn.HuEs("infiniflow"), EMBEDDING)
|
||||
SE = search.Dealer(es_conn.HuEs("infiniflow"), EMBEDDING)
|
||||
|
||||
app = Application([(r'/v1/chat/completions', Handler)],debug=False)
|
||||
http_server = HTTPServer(app)
|
||||
|
||||
@@ -6,7 +6,7 @@ from util.db_conn import Postgres
|
||||
from util.minio_conn import HuMinio
|
||||
from util import rmSpace, findMaxDt
|
||||
from FlagEmbedding import FlagModel
|
||||
from nlp import huchunk, huqie
|
||||
from nlp import huchunk, huqie, search
|
||||
import base64, hashlib
|
||||
from io import BytesIO
|
||||
import pandas as pd
|
||||
@@ -103,7 +103,7 @@ def build(row):
|
||||
if(!ctx._source.kb_id.contains('%s'))
|
||||
ctx._source.kb_id.add('%s');
|
||||
"""%(str(row["kb_id"]), str(row["kb_id"])),
|
||||
idxnm = index_name(row["uid"])
|
||||
idxnm = search.index_name(row["uid"])
|
||||
)
|
||||
set_progress(row["kb2doc_id"], 1, "Done")
|
||||
return []
|
||||
@@ -171,10 +171,8 @@ def build(row):
|
||||
return docs
|
||||
|
||||
|
||||
def index_name(uid):return f"docgpt_{uid}"
|
||||
|
||||
def init_kb(row):
|
||||
idxnm = index_name(row["uid"])
|
||||
idxnm = search.index_name(row["uid"])
|
||||
if ES.indexExist(idxnm): return
|
||||
return ES.createIdx(idxnm, json.load(open("conf/mapping.json", "r")))
|
||||
|
||||
@@ -199,7 +197,7 @@ def rm_doc_from_kb(df):
|
||||
ctx._source.kb_id.indexOf('%s')
|
||||
);
|
||||
"""%(str(r["kb_id"]),str(r["kb_id"])),
|
||||
idxnm = index_name(r["uid"])
|
||||
idxnm = search.index_name(r["uid"])
|
||||
)
|
||||
if len(df) == 0:return
|
||||
sql = """
|
||||
@@ -233,7 +231,7 @@ def main(comm, mod):
|
||||
set_progress(r["kb2doc_id"], random.randint(70, 95)/100.,
|
||||
"Finished embedding! Start to build index!")
|
||||
init_kb(r)
|
||||
es_r = ES.bulk(cks, index_name(r["uid"]))
|
||||
es_r = ES.bulk(cks, search.index_name(r["uid"]))
|
||||
if es_r:
|
||||
set_progress(r["kb2doc_id"], -1, "Index failure!")
|
||||
print(es_r)
|
||||
|
||||
Reference in New Issue
Block a user