diff --git a/deepdoc/parser/excel_parser.py b/deepdoc/parser/excel_parser.py index 2c3e6775..736ac32e 100644 --- a/deepdoc/parser/excel_parser.py +++ b/deepdoc/parser/excel_parser.py @@ -69,7 +69,7 @@ class RAGFlowExcelParser: if fnm.split(".")[-1].lower() in ["csv", "txt"]: encoding = find_codec(binary) - txt = binary.decode(encoding) + txt = binary.decode(encoding, errors="ignore") return len(txt.split("\n")) diff --git a/rag/app/book.py b/rag/app/book.py index 70aee29c..c4bc62ab 100644 --- a/rag/app/book.py +++ b/rag/app/book.py @@ -91,7 +91,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, txt = "" if binary: encoding = find_codec(binary) - txt = binary.decode(encoding) + txt = binary.decode(encoding, errors="ignore") else: with open(filename, "r") as f: while True: diff --git a/rag/app/laws.py b/rag/app/laws.py index 473eca9c..6361d62c 100644 --- a/rag/app/laws.py +++ b/rag/app/laws.py @@ -113,7 +113,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, txt = "" if binary: encoding = find_codec(binary) - txt = binary.decode(encoding) + txt = binary.decode(encoding, errors="ignore") else: with open(filename, "r") as f: while True: diff --git a/rag/app/naive.py b/rag/app/naive.py index 55fab84c..c557a626 100644 --- a/rag/app/naive.py +++ b/rag/app/naive.py @@ -141,7 +141,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, txt = "" if binary: encoding = find_codec(binary) - txt = binary.decode(encoding) + txt = binary.decode(encoding, errors="ignore") else: with open(filename, "r") as f: while True: diff --git a/rag/app/one.py b/rag/app/one.py index f5c78f5a..531fd0a7 100644 --- a/rag/app/one.py +++ b/rag/app/one.py @@ -85,7 +85,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, txt = "" if binary: encoding = find_codec(binary) - txt = binary.decode(encoding) + txt = binary.decode(encoding, errors="ignore") else: with open(filename, "r") as f: while True: diff --git a/rag/app/qa.py b/rag/app/qa.py index a37ff63f..1ecf9b18 100644 --- a/rag/app/qa.py +++ b/rag/app/qa.py @@ -107,7 +107,7 @@ def chunk(filename, binary=None, lang="Chinese", callback=None, **kwargs): txt = "" if binary: encoding = find_codec(binary) - txt = binary.decode(encoding) + txt = binary.decode(encoding, errors="ignore") else: with open(filename, "r") as f: while True: diff --git a/rag/app/table.py b/rag/app/table.py index 96a53aac..368d1ce8 100644 --- a/rag/app/table.py +++ b/rag/app/table.py @@ -149,7 +149,7 @@ def chunk(filename, binary=None, from_page=0, to_page=10000000000, txt = "" if binary: encoding = find_codec(binary) - txt = binary.decode(encoding) + txt = binary.decode(encoding, errors="ignore") else: with open(filename, "r") as f: while True: