refactor code (#583)
### What problem does this PR solve? ### Type of change - [x] Refactoring
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
|
||||
|
||||
from .pdf_parser import HuParser as PdfParser, PlainParser
|
||||
from .docx_parser import HuDocxParser as DocxParser
|
||||
from .excel_parser import HuExcelParser as ExcelParser
|
||||
from .ppt_parser import HuPptParser as PptParser
|
||||
from .pdf_parser import RAGFlowPdfParser as PdfParser, PlainParser
|
||||
from .docx_parser import RAGFlowDocxParser as DocxParser
|
||||
from .excel_parser import RAGFlowExcelParser as ExcelParser
|
||||
from .ppt_parser import RAGFlowPptParser as PptParser
|
||||
|
||||
@@ -7,7 +7,7 @@ from rag.nlp import huqie
|
||||
from io import BytesIO
|
||||
|
||||
|
||||
class HuDocxParser:
|
||||
class RAGFlowDocxParser:
|
||||
|
||||
def __extract_table_content(self, tb):
|
||||
df = []
|
||||
|
||||
@@ -6,7 +6,7 @@ from io import BytesIO
|
||||
from rag.nlp import find_codec
|
||||
|
||||
|
||||
class HuExcelParser:
|
||||
class RAGFlowExcelParser:
|
||||
def html(self, fnm):
|
||||
if isinstance(fnm, str):
|
||||
wb = load_workbook(fnm)
|
||||
@@ -74,5 +74,5 @@ class HuExcelParser:
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
psr = HuExcelParser()
|
||||
psr = RAGFlowExcelParser()
|
||||
psr(sys.argv[1])
|
||||
|
||||
@@ -23,7 +23,7 @@ from huggingface_hub import snapshot_download
|
||||
logging.getLogger("pdfminer").setLevel(logging.WARNING)
|
||||
|
||||
|
||||
class HuParser:
|
||||
class RAGFlowPdfParser:
|
||||
def __init__(self):
|
||||
self.ocr = OCR()
|
||||
if hasattr(self, "model_speciess"):
|
||||
|
||||
@@ -14,7 +14,7 @@ from io import BytesIO
|
||||
from pptx import Presentation
|
||||
|
||||
|
||||
class HuPptParser(object):
|
||||
class RAGFlowPptParser(object):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
|
||||
|
||||
@@ -11,10 +11,6 @@
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
from deepdoc.vision.seeit import draw_box
|
||||
from deepdoc.vision import OCR, init_in_out
|
||||
import argparse
|
||||
import numpy as np
|
||||
import os
|
||||
import sys
|
||||
sys.path.insert(
|
||||
@@ -25,6 +21,11 @@ sys.path.insert(
|
||||
os.path.abspath(__file__)),
|
||||
'../../')))
|
||||
|
||||
from deepdoc.vision.seeit import draw_box
|
||||
from deepdoc.vision import OCR, init_in_out
|
||||
import argparse
|
||||
import numpy as np
|
||||
|
||||
|
||||
def main(args):
|
||||
ocr = OCR()
|
||||
|
||||
@@ -10,17 +10,7 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
from deepdoc.vision.seeit import draw_box
|
||||
from deepdoc.vision import Recognizer, LayoutRecognizer, TableStructureRecognizer, OCR, init_in_out
|
||||
from api.utils.file_utils import get_project_base_directory
|
||||
import argparse
|
||||
import os
|
||||
import sys
|
||||
import re
|
||||
|
||||
import numpy as np
|
||||
|
||||
import os, sys
|
||||
sys.path.insert(
|
||||
0,
|
||||
os.path.abspath(
|
||||
@@ -29,6 +19,13 @@ sys.path.insert(
|
||||
os.path.abspath(__file__)),
|
||||
'../../')))
|
||||
|
||||
from deepdoc.vision.seeit import draw_box
|
||||
from deepdoc.vision import Recognizer, LayoutRecognizer, TableStructureRecognizer, OCR, init_in_out
|
||||
from api.utils.file_utils import get_project_base_directory
|
||||
import argparse
|
||||
import re
|
||||
import numpy as np
|
||||
|
||||
|
||||
def main(args):
|
||||
images, outputs = init_in_out(args)
|
||||
|
||||
Reference in New Issue
Block a user