go through upload, create kb, add doc to kb (#11)
* add field progress msg into docinfo; add file processing procedure * go through upload, create kb, add doc to kb
This commit is contained in:
@@ -0,0 +1,19 @@
|
||||
import re
|
||||
|
||||
def rmSpace(txt):
|
||||
txt = re.sub(r"([^a-z0-9.,]) +([^ ])", r"\1\2", txt)
|
||||
return re.sub(r"([^ ]) +([^a-z0-9.,])", r"\1\2", txt)
|
||||
|
||||
def findMaxDt(fnm):
|
||||
m = "1970-01-01 00:00:00"
|
||||
try:
|
||||
with open(fnm, "r") as f:
|
||||
while True:
|
||||
l = f.readline()
|
||||
if not l:break
|
||||
l = l.strip("\n")
|
||||
if l == 'nan':continue
|
||||
if l > m:m = l
|
||||
except Exception as e:
|
||||
print("WARNING: can't find "+ fnm)
|
||||
return m
|
||||
|
||||
@@ -9,7 +9,6 @@ if not os.path.exists(__fnm): __fnm = "./sys.cnf"
|
||||
|
||||
CF.read(__fnm)
|
||||
|
||||
|
||||
class Config:
|
||||
def __init__(self, env):
|
||||
self.env = env
|
||||
|
||||
@@ -3,7 +3,7 @@ import time
|
||||
from util import config
|
||||
import pandas as pd
|
||||
|
||||
class Postgre(object):
|
||||
class Postgres(object):
|
||||
def __init__(self, env, dbnm):
|
||||
self.config = config.init(env)
|
||||
self.conn = None
|
||||
@@ -36,9 +36,28 @@ class Postgre(object):
|
||||
try:
|
||||
return pd.read_sql(sql, self.conn)
|
||||
except Exception as e:
|
||||
logging.error(f"Fail to exec {sql}l "+str(e))
|
||||
logging.error(f"Fail to exec {sql} "+str(e))
|
||||
self.__open__()
|
||||
time.sleep(1)
|
||||
|
||||
return pd.DataFrame()
|
||||
|
||||
|
||||
def update(self, sql):
|
||||
for _ in range(10):
|
||||
try:
|
||||
cur = self.conn.cursor()
|
||||
cur.execute(sql)
|
||||
updated_rows = cur.rowcount
|
||||
conn.commit()
|
||||
cur.close()
|
||||
return updated_rows
|
||||
except Exception as e:
|
||||
logging.error(f"Fail to exec {sql} "+str(e))
|
||||
self.__open__()
|
||||
time.sleep(1)
|
||||
return 0
|
||||
|
||||
if __name__ == "__main__":
|
||||
Postgres("infiniflow", "docgpt")
|
||||
|
||||
|
||||
@@ -31,7 +31,7 @@ class HuEs:
|
||||
self.info = {}
|
||||
self.config = config.init(env)
|
||||
self.conn()
|
||||
self.idxnm = self.config.get("idx_nm")
|
||||
self.idxnm = self.config.get("idx_nm","")
|
||||
if not self.es.ping():
|
||||
raise Exception("Can't connect to ES cluster")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user