refactor retieval_test, add SQl retrieval methods (#61)

This commit is contained in:
KevinHuSh
2024-02-08 17:01:01 +08:00
committed by GitHub
parent 0a903c7714
commit 5e0a689c43
16 changed files with 238 additions and 74 deletions

View File

@@ -78,3 +78,5 @@ class ParserType(StrEnum):
BOOK = "book"
QA = "qa"
TABLE = "table"
NAIVE = "naive"
PICTURE = "picture"

View File

@@ -381,7 +381,7 @@ class Tenant(DataBaseModel):
embd_id = CharField(max_length=128, null=False, help_text="default embedding model ID")
asr_id = CharField(max_length=128, null=False, help_text="default ASR model ID")
img2txt_id = CharField(max_length=128, null=False, help_text="default image to text model ID")
parser_ids = CharField(max_length=128, null=False, help_text="document processors")
parser_ids = CharField(max_length=256, null=False, help_text="document processors")
credit = IntegerField(default=512)
status = CharField(max_length=1, null=True, help_text="is it validate(0: wasted1: validate)", default="1")

View File

@@ -63,7 +63,9 @@ def init_llm_factory():
"status": "1",
},
]
llm_infos = [{
llm_infos = [
# ---------------------- OpenAI ------------------------
{
"fid": factory_infos[0]["name"],
"llm_name": "gpt-3.5-turbo",
"tags": "LLM,CHAT,4K",
@@ -105,7 +107,9 @@ def init_llm_factory():
"tags": "LLM,CHAT,IMAGE2TEXT",
"max_tokens": 765,
"model_type": LLMType.IMAGE2TEXT.value
},{
},
# ----------------------- Qwen -----------------------
{
"fid": factory_infos[1]["name"],
"llm_name": "qwen-turbo",
"tags": "LLM,CHAT,8K",
@@ -135,7 +139,9 @@ def init_llm_factory():
"tags": "LLM,CHAT,IMAGE2TEXT",
"max_tokens": 765,
"model_type": LLMType.IMAGE2TEXT.value
},{
},
# ----------------------- Infiniflow -----------------------
{
"fid": factory_infos[2]["name"],
"llm_name": "gpt-3.5-turbo",
"tags": "LLM,CHAT,4K",
@@ -160,6 +166,33 @@ def init_llm_factory():
"max_tokens": 765,
"model_type": LLMType.IMAGE2TEXT.value
},
# ---------------------- ZhipuAI ----------------------
{
"fid": factory_infos[3]["name"],
"llm_name": "glm-3-turbo",
"tags": "LLM,CHAT,",
"max_tokens": 128 * 1000,
"model_type": LLMType.CHAT.value
}, {
"fid": factory_infos[3]["name"],
"llm_name": "glm-4",
"tags": "LLM,CHAT,",
"max_tokens": 128 * 1000,
"model_type": LLMType.CHAT.value
}, {
"fid": factory_infos[3]["name"],
"llm_name": "glm-4v",
"tags": "LLM,CHAT,IMAGE2TEXT",
"max_tokens": 2000,
"model_type": LLMType.IMAGE2TEXT.value
},
{
"fid": factory_infos[3]["name"],
"llm_name": "embedding-2",
"tags": "TEXT EMBEDDING",
"max_tokens": 512,
"model_type": LLMType.SPEECH2TEXT.value
},
]
for info in factory_infos:
LLMFactoriesService.save(**info)