Add resume parser and fix bugs (#59)

* Update .gitignore

* Update .gitignore

* Add resume parser and fix bugs
This commit is contained in:
KevinHuSh
2024-02-07 19:27:23 +08:00
committed by GitHub
parent eb8254e688
commit c5ea37cd30
16 changed files with 451 additions and 57 deletions

View File

@@ -474,7 +474,7 @@ class Knowledgebase(DataBaseModel):
vector_similarity_weight = FloatField(default=0.3)
parser_id = CharField(max_length=32, null=False, help_text="default parser ID", default=ParserType.GENERAL.value)
parser_config = JSONField(null=False, default={"from_page":0, "to_page": 100000})
parser_config = JSONField(null=False, default={"pages":[[0,1000000]]})
status = CharField(max_length=1, null=True, help_text="is it validate(0: wasted1: validate)", default="1")
def __str__(self):
@@ -489,7 +489,7 @@ class Document(DataBaseModel):
thumbnail = TextField(null=True, help_text="thumbnail base64 string")
kb_id = CharField(max_length=256, null=False, index=True)
parser_id = CharField(max_length=32, null=False, help_text="default parser ID")
parser_config = JSONField(null=False, default={"from_page":0, "to_page": 100000})
parser_config = JSONField(null=False, default={"pages":[[0,1000000]]})
source_type = CharField(max_length=128, null=False, default="local", help_text="where dose this document from")
type = CharField(max_length=32, null=False, help_text="file extension")
created_by = CharField(max_length=32, null=False, help_text="who created it")

View File

@@ -21,5 +21,6 @@ class DialogService(CommonService):
model = Dialog
class ConversationService(CommonService):
model = Conversation

View File

@@ -63,3 +63,31 @@ class KnowledgebaseService(CommonService):
d = kbs[0].to_dict()
d["embd_id"] = kbs[0].tenant.embd_id
return d
@classmethod
@DB.connection_context()
def update_parser_config(cls, id, config):
e, m = cls.get_by_id(id)
if not e:raise LookupError(f"knowledgebase({id}) not found.")
def dfs_update(old, new):
for k,v in new.items():
if k not in old:
old[k] = v
continue
if isinstance(v, dict):
assert isinstance(old[k], dict)
dfs_update(old[k], v)
else: old[k] = v
dfs_update(m.parser_config, config)
cls.update_by_id(id, m.parser_config)
@classmethod
@DB.connection_context()
def get_field_map(cls, ids):
conf = {}
for k in cls.get_by_ids(ids):
if k.parser_config and "field_map" in k.parser_config:
conf.update(k.parser_config)
return conf