Refactor Document API (#2833)

### What problem does this PR solve?

Refactor Document API

### Type of change


- [x] Refactoring

Co-authored-by: liuhua <10215101452@stu.ecun.edu.cn>
This commit is contained in:
liuhua
2024-10-14 20:03:33 +08:00
committed by GitHub
parent df223eddf3
commit 6329427ad5
11 changed files with 393 additions and 418 deletions

View File

@@ -18,8 +18,8 @@ class Base(object):
pr[name] = value
return pr
def post(self, path, json, stream=False):
res = self.rag.post(path, json, stream=stream)
def post(self, path, json=None, stream=False, files=None):
res = self.rag.post(path, json, stream=stream,files=files)
return res
def get(self, path, params):

View File

@@ -1,5 +1,7 @@
from typing import Optional, List
from transformers.models.bloom.modeling_bloom import bloom_gelu_back
from .document import Document
from .base import Base
@@ -39,39 +41,27 @@ class DataSet(Base):
if res.get("code") != 0:
raise Exception(res["message"])
def upload_documents(self,document_list: List[dict]):
url = f"/dataset/{self.id}/document"
files = [("file",(ele["name"],ele["blob"])) for ele in document_list]
res = self.post(path=url,json=None,files=files)
res = res.json()
if res.get("code") != 0:
raise Exception(res.get("message"))
def list_docs(self, keywords: Optional[str] = None, offset: int = 0, limit: int = -1) -> List[Document]:
"""
List the documents in the dataset, optionally filtering by keywords, with pagination support.
Args:
keywords (Optional[str]): A string of keywords to filter the documents. Defaults to None.
offset (int): The starting point for pagination. Defaults to 0.
limit (int): The maximum number of documents to return. Defaults to -1 (no limit).
Returns:
List[Document]: A list of Document objects.
"""
# Construct the request payload for listing documents
payload = {
"knowledgebase_id": self.id,
"keywords": keywords,
"offset": offset,
"limit": limit
}
# Send the request to the server to list documents
res = self.get(f'/doc/dataset/{self.id}/documents', payload)
res_json = res.json()
# Handle response and error checking
if res_json.get("retmsg") != "success":
raise Exception(res_json.get("retmsg"))
# Parse the document data from the response
def list_documents(self, id: str = None, keywords: str = None, offset: int =1, limit: int = 1024, orderby: str = "create_time", desc: bool = True):
res = self.get(f"/dataset/{self.id}/info",params={"id": id,"keywords": keywords,"offset": offset,"limit": limit,"orderby": orderby,"desc": desc})
res = res.json()
documents = []
for doc_data in res_json["data"].get("docs", []):
doc = Document(self.rag, doc_data)
documents.append(doc)
if res.get("code") == 0:
for document in res["data"].get("docs"):
documents.append(Document(self.rag,document))
return documents
raise Exception(res["message"])
def delete_documents(self,ids: List[str] = None):
res = self.rm(f"/dataset/{self.id}/document",{"ids":ids})
res = res.json()
if res.get("code") != 0:
raise Exception(res["message"])
return documents

View File

@@ -29,18 +29,14 @@ class Document(Base):
res_dict.pop(k)
super().__init__(rag, res_dict)
def save(self) -> bool:
def update(self,update_message:dict) -> bool:
"""
Save the document details to the server.
"""
res = self.post('/doc/save',
{"id": self.id, "name": self.name, "thumbnail": self.thumbnail, "knowledgebase_id": self.knowledgebase_id,
"parser_method": self.parser_method, "parser_config": self.parser_config.to_json(),
})
res = self.post(f'/dataset/{self.knowledgebase_id}/info/{self.id}',update_message)
res = res.json()
if res.get("retmsg") == "success":
return True
raise Exception(res["retmsg"])
if res.get("code") != 0:
raise Exception(res["message"])
def delete(self) -> bool:
"""
@@ -60,8 +56,7 @@ class Document(Base):
:return: The downloaded document content in bytes.
"""
# Construct the URL for the API request using the document ID and knowledge base ID
res = self.get(f"/doc/{self.id}",
{"headers": self.rag.authorization_header, "id": self.id, "name": self.name, "stream": True})
res = self.get(f"/dataset/{self.knowledgebase_id}/document/{self.id}")
# Check the response status code to ensure the request was successful
if res.status_code == 200: