refactor code (#583)

### What problem does this PR solve?

### Type of change

- [x] Refactoring
This commit is contained in:
KevinHuSh
2024-04-28 13:19:54 +08:00
committed by GitHub
parent aadb9cbec8
commit 9d60a84958
25 changed files with 48 additions and 525 deletions

View File

@@ -1,6 +1,6 @@
from .pdf_parser import HuParser as PdfParser, PlainParser
from .docx_parser import HuDocxParser as DocxParser
from .excel_parser import HuExcelParser as ExcelParser
from .ppt_parser import HuPptParser as PptParser
from .pdf_parser import RAGFlowPdfParser as PdfParser, PlainParser
from .docx_parser import RAGFlowDocxParser as DocxParser
from .excel_parser import RAGFlowExcelParser as ExcelParser
from .ppt_parser import RAGFlowPptParser as PptParser

View File

@@ -7,7 +7,7 @@ from rag.nlp import huqie
from io import BytesIO
class HuDocxParser:
class RAGFlowDocxParser:
def __extract_table_content(self, tb):
df = []

View File

@@ -6,7 +6,7 @@ from io import BytesIO
from rag.nlp import find_codec
class HuExcelParser:
class RAGFlowExcelParser:
def html(self, fnm):
if isinstance(fnm, str):
wb = load_workbook(fnm)
@@ -74,5 +74,5 @@ class HuExcelParser:
if __name__ == "__main__":
psr = HuExcelParser()
psr = RAGFlowExcelParser()
psr(sys.argv[1])

View File

@@ -23,7 +23,7 @@ from huggingface_hub import snapshot_download
logging.getLogger("pdfminer").setLevel(logging.WARNING)
class HuParser:
class RAGFlowPdfParser:
def __init__(self):
self.ocr = OCR()
if hasattr(self, "model_speciess"):

View File

@@ -14,7 +14,7 @@ from io import BytesIO
from pptx import Presentation
class HuPptParser(object):
class RAGFlowPptParser(object):
def __init__(self):
super().__init__()

View File

@@ -11,10 +11,6 @@
# limitations under the License.
#
from deepdoc.vision.seeit import draw_box
from deepdoc.vision import OCR, init_in_out
import argparse
import numpy as np
import os
import sys
sys.path.insert(
@@ -25,6 +21,11 @@ sys.path.insert(
os.path.abspath(__file__)),
'../../')))
from deepdoc.vision.seeit import draw_box
from deepdoc.vision import OCR, init_in_out
import argparse
import numpy as np
def main(args):
ocr = OCR()

View File

@@ -10,17 +10,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
from deepdoc.vision.seeit import draw_box
from deepdoc.vision import Recognizer, LayoutRecognizer, TableStructureRecognizer, OCR, init_in_out
from api.utils.file_utils import get_project_base_directory
import argparse
import os
import sys
import re
import numpy as np
import os, sys
sys.path.insert(
0,
os.path.abspath(
@@ -29,6 +19,13 @@ sys.path.insert(
os.path.abspath(__file__)),
'../../')))
from deepdoc.vision.seeit import draw_box
from deepdoc.vision import Recognizer, LayoutRecognizer, TableStructureRecognizer, OCR, init_in_out
from api.utils.file_utils import get_project_base_directory
import argparse
import re
import numpy as np
def main(args):
images, outputs = init_in_out(args)