Add bce-embedding and fastembed (#383)

### What problem does this PR solve? Issue link:#326 ### Type of change - [x] New Feature (non-breaking change which adds functionality)
2024-04-16 16:42:19 +08:00
parent a7be5d4e8b
commit 890561703b
15 changed files with 99 additions and 37 deletions
--- a/api/db/init_data.py
+++ b/api/db/init_data.py
@@ -18,7 +18,7 @@ import time
 import uuid

 from api.db import LLMType, UserTenantRole
-from api.db.db_models import init_database_tables as init_web_db, LLMFactories, LLM
+from api.db.db_models import init_database_tables as init_web_db, LLMFactories, LLM, TenantLLM
 from api.db.services import UserService
 from api.db.services.llm_service import LLMFactoriesService, LLMService, TenantLLMService, LLMBundle
 from api.db.services.user_service import TenantService, UserTenantService
@@ -114,12 +114,16 @@ factory_infos = [{
    "logo": "",
    "tags": "TEXT EMBEDDING",
    "status": "1",
-},
-    {
+}, {
    "name": "Xinference",
    "logo": "",
    "tags": "LLM,TEXT EMBEDDING,SPEECH2TEXT,MODERATION",
        "status": "1",
+},{
+    "name": "QAnything",
+    "logo": "",
+    "tags": "LLM,TEXT EMBEDDING,SPEECH2TEXT,MODERATION",
+        "status": "1",
 },
    # {
    #     "name": "文心一言",
@@ -254,12 +258,6 @@ def init_llm_factory():
            "tags": "LLM,CHAT,",
            "max_tokens": 7900,
            "model_type": LLMType.CHAT.value
-        }, {
-            "fid": factory_infos[4]["name"],
-            "llm_name": "flag-embedding",
-            "tags": "TEXT EMBEDDING,",
-            "max_tokens": 128 * 1000,
-            "model_type": LLMType.EMBEDDING.value
        }, {
            "fid": factory_infos[4]["name"],
            "llm_name": "moonshot-v1-32k",
@@ -325,6 +323,14 @@ def init_llm_factory():
            "max_tokens": 2147483648,
            "model_type": LLMType.EMBEDDING.value
        },
+        # ------------------------ QAnything -----------------------
+        {
+            "fid": factory_infos[7]["name"],
+            "llm_name": "maidalun1020/bce-embedding-base_v1",
+            "tags": "TEXT EMBEDDING,",
+            "max_tokens": 512,
+            "model_type": LLMType.EMBEDDING.value
+        },
    ]
    for info in factory_infos:
        try:
@@ -337,8 +343,10 @@ def init_llm_factory():
        except Exception as e:
            pass

-    LLMFactoriesService.filter_delete([LLMFactories.name=="Local"])
-    LLMService.filter_delete([LLM.fid=="Local"])
+    LLMFactoriesService.filter_delete([LLMFactories.name == "Local"])
+    LLMService.filter_delete([LLM.fid == "Local"])
+    LLMService.filter_delete([LLM.fid == "Moonshot", LLM.llm_name == "flag-embedding"])
+    TenantLLMService.filter_delete([TenantLLM.llm_factory == "Moonshot", TenantLLM.llm_name == "flag-embedding"])

    """
    drop table llm;
--- a/api/db/services/dialog_service.py
+++ b/api/db/services/dialog_service.py
@@ -80,8 +80,12 @@ def chat(dialog, messages, **kwargs):
            raise LookupError("LLM(%s) not found" % dialog.llm_id)
        max_tokens = 1024
    else: max_tokens = llm[0].max_tokens
+    kbs = KnowledgebaseService.get_by_ids(dialog.kb_ids)
+    embd_nms = list(set([kb.embd_id for kb in kbs]))
+    assert len(embd_nms) == 1, "Knowledge bases use different embedding models."
+
    questions = [m["content"] for m in messages if m["role"] == "user"]
-    embd_mdl = LLMBundle(dialog.tenant_id, LLMType.EMBEDDING)
+    embd_mdl = LLMBundle(dialog.tenant_id, LLMType.EMBEDDING, embd_nms[0])
    chat_mdl = LLMBundle(dialog.tenant_id, LLMType.CHAT, dialog.llm_id)

    prompt_config = dialog.prompt_config
--- a/api/db/services/llm_service.py
+++ b/api/db/services/llm_service.py
@@ -66,7 +66,7 @@ class TenantLLMService(CommonService):
            raise LookupError("Tenant not found")

        if llm_type == LLMType.EMBEDDING.value:
-            mdlnm = tenant.embd_id
+            mdlnm = tenant.embd_id if not llm_name else llm_name
        elif llm_type == LLMType.SPEECH2TEXT.value:
            mdlnm = tenant.asr_id
        elif llm_type == LLMType.IMAGE2TEXT.value:
@@ -77,9 +77,14 @@ class TenantLLMService(CommonService):
            assert False, "LLM type error"

        model_config = cls.get_api_key(tenant_id, mdlnm)
+        if model_config: model_config = model_config.to_dict()
        if not model_config:
-            raise LookupError("Model({}) not authorized".format(mdlnm))
-        model_config = model_config.to_dict()
+            if llm_type == LLMType.EMBEDDING.value:
+                llm = LLMService.query(llm_name=llm_name)
+                if llm and llm[0].fid in ["QAnything", "FastEmbed"]:
+                    model_config = {"llm_factory": llm[0].fid, "api_key":"", "llm_name": llm_name, "api_base": ""}
+            if not model_config: raise LookupError("Model({}) not authorized".format(mdlnm))
+
        if llm_type == LLMType.EMBEDDING.value:
            if model_config["llm_factory"] not in EmbeddingModel:
                return
--- a/api/db/services/task_service.py
+++ b/api/db/services/task_service.py
@@ -41,7 +41,7 @@ class TaskService(CommonService):
            Document.size,
            Knowledgebase.tenant_id,
            Knowledgebase.language,
-            Tenant.embd_id,
+            Knowledgebase.embd_id,
            Tenant.img2txt_id,
            Tenant.asr_id,
            cls.model.update_time]