update sdk document (#2374)

### What problem does this PR solve? _Briefly describe what this PR aims to solve. Include background context that will help reviewers understand the purpose of the PR._ ### Type of change - [x] New Feature (non-breaking change which adds functionality) --------- Co-authored-by: Kevin Hu <kevinhu.sh@gmail.com>
2024-09-12 14:19:45 +08:00
parent 6000c3e304
commit 6722b3d558
7 changed files with 472 additions and 2 deletions
--- a/sdk/python/ragflow/modules/dataset.py
+++ b/sdk/python/ragflow/modules/dataset.py
@@ -1,3 +1,7 @@
+from typing import Optional, List
+
+from .document import Document
+
 from .base import Base


@@ -46,3 +50,39 @@ class DataSet(Base):
        res = res.json()
        if res.get("retmsg") == "success": return True
        raise Exception(res["retmsg"])
+
+    def list_docs(self, keywords: Optional[str] = None, offset: int = 0, limit: int = -1) -> List[Document]:
+        """
+        List the documents in the dataset, optionally filtering by keywords, with pagination support.
+
+        Args:
+            keywords (Optional[str]): A string of keywords to filter the documents. Defaults to None.
+            offset (int): The starting point for pagination. Defaults to 0.
+            limit (int): The maximum number of documents to return. Defaults to -1 (no limit).
+
+        Returns:
+            List[Document]: A list of Document objects.
+        """
+        # Construct the request payload for listing documents
+        payload = {
+            "kb_id": self.id,
+            "keywords": keywords,
+            "offset": offset,
+            "limit": limit
+        }
+
+        # Send the request to the server to list documents
+        res = self.get(f'/doc/dataset/{self.id}/documents', payload)
+        res_json = res.json()
+
+        # Handle response and error checking
+        if res_json.get("retmsg") != "success":
+            raise Exception(res_json.get("retmsg"))
+
+        # Parse the document data from the response
+        documents = []
+        for doc_data in res_json["data"].get("docs", []):
+            doc = Document(self.rag, doc_data)
+            documents.append(doc)
+
+        return documents