Refactor Dataset API (#2783)

### What problem does this PR solve?

Refactor Dataset API

### Type of change

- [x] Refactoring

---------

Co-authored-by: liuhua <10215101452@stu.ecun.edu.cn>
This commit is contained in:
liuhua
2024-10-11 09:55:27 +08:00
committed by GitHub
parent a2f9c03a95
commit cbd7cd7c4d
11 changed files with 449 additions and 393 deletions

View File

@@ -30,5 +30,9 @@ class Base(object):
res = self.rag.delete(path, params)
return res
def put(self,path, json):
res = self.rag.put(path,json)
return res
def __str__(self):
return str(self.to_json())

View File

@@ -32,24 +32,13 @@ class DataSet(Base):
res_dict.pop(k)
super().__init__(rag, res_dict)
def save(self) -> bool:
res = self.post('/dataset/save',
{"id": self.id, "name": self.name, "avatar": self.avatar, "tenant_id": self.tenant_id,
"description": self.description, "language": self.language, "embedding_model": self.embedding_model,
"permission": self.permission,
"document_count": self.document_count, "chunk_count": self.chunk_count, "parse_method": self.parse_method,
"parser_config": self.parser_config.to_json()
})
def update(self, update_message: dict):
res = self.put(f'/dataset/{self.id}',
update_message)
res = res.json()
if res.get("retmsg") == "success": return True
raise Exception(res["retmsg"])
if res.get("code") != 0:
raise Exception(res["message"])
def delete(self) -> bool:
res = self.rm('/dataset/delete',
{"id": self.id})
res = res.json()
if res.get("retmsg") == "success": return True
raise Exception(res["retmsg"])
def list_docs(self, keywords: Optional[str] = None, offset: int = 0, limit: int = -1) -> List[Document]:
"""

View File

@@ -18,9 +18,9 @@ from typing import List
import requests
from .modules.assistant import Assistant
from .modules.chunk import Chunk
from .modules.dataset import DataSet
from .modules.document import Document
from .modules.chunk import Chunk
class RAGFlow:
@@ -41,7 +41,11 @@ class RAGFlow:
return res
def delete(self, path, params):
res = requests.delete(url=self.api_url + path, params=params, headers=self.authorization_header)
res = requests.delete(url=self.api_url + path, json=params, headers=self.authorization_header)
return res
def put(self, path, json):
res = requests.put(url=self.api_url + path, json= json,headers=self.authorization_header)
return res
def create_dataset(self, name: str, avatar: str = "", description: str = "", language: str = "English",
@@ -52,7 +56,7 @@ class RAGFlow:
parser_config = DataSet.ParserConfig(self, {"chunk_token_count": 128, "layout_recognize": True,
"delimiter": "\n!?。;!?", "task_page_size": 12})
parser_config = parser_config.to_json()
res = self.post("/dataset/save",
res = self.post("/dataset",
{"name": name, "avatar": avatar, "description": description, "language": language,
"permission": permission,
"document_count": document_count, "chunk_count": chunk_count, "parse_method": parse_method,
@@ -60,27 +64,28 @@ class RAGFlow:
}
)
res = res.json()
if res.get("retmsg") == "success":
if res.get("code") == 0:
return DataSet(self, res["data"])
raise Exception(res["retmsg"])
raise Exception(res["message"])
def list_datasets(self, page: int = 1, page_size: int = 1024, orderby: str = "create_time", desc: bool = True) -> \
def delete_dataset(self, ids: List[str] = None, names: List[str] = None):
res = self.delete("/dataset",{"ids": ids, "names": names})
res=res.json()
if res.get("code") != 0:
raise Exception(res["message"])
def list_datasets(self, page: int = 1, page_size: int = 1024, orderby: str = "create_time", desc: bool = True,
id: str = None, name: str = None) -> \
List[DataSet]:
res = self.get("/dataset/list", {"page": page, "page_size": page_size, "orderby": orderby, "desc": desc})
res = self.get("/dataset",
{"page": page, "page_size": page_size, "orderby": orderby, "desc": desc, "id": id, "name": name})
res = res.json()
result_list = []
if res.get("retmsg") == "success":
if res.get("code") == 0:
for data in res['data']:
result_list.append(DataSet(self, data))
return result_list
raise Exception(res["retmsg"])
def get_dataset(self, id: str = None, name: str = None) -> DataSet:
res = self.get("/dataset/detail", {"id": id, "name": name})
res = res.json()
if res.get("retmsg") == "success":
return DataSet(self, res['data'])
raise Exception(res["retmsg"])
raise Exception(res["message"])
def create_assistant(self, name: str = "assistant", avatar: str = "path", knowledgebases: List[DataSet] = [],
llm: Assistant.LLM = None, prompt: Assistant.Prompt = None) -> Assistant:
@@ -272,4 +277,3 @@ class RAGFlow:
except Exception as e:
print(f"An error occurred during retrieval: {e}")
raise