refine manul parser (#131)

This commit is contained in:
KevinHuSh
2024-03-19 12:26:04 +08:00
committed by GitHub
parent d56c9e7630
commit 9da671b951
13 changed files with 145 additions and 52 deletions

View File

@@ -247,7 +247,7 @@ class HuParser:
b["SP"] = ii
def __ocr(self, pagenum, img, chars, ZM=3):
bxs = self.ocr(np.array(img))
bxs = self.ocr.detect(np.array(img))
if not bxs:
self.boxes.append([])
return
@@ -278,8 +278,10 @@ class HuParser:
for b in bxs:
if not b["text"]:
b["text"] = b["txt"]
left, right, top, bott = b["x0"]*ZM, b["x1"]*ZM, b["top"]*ZM, b["bottom"]*ZM
b["text"] = self.ocr.recognize(np.array(img), np.array([[left, top], [right, top], [right, bott], [left, bott]], dtype=np.float32))
del b["txt"]
bxs = [b for b in bxs if b["text"]]
if self.mean_height[-1] == 0:
self.mean_height[-1] = np.median([b["bottom"] - b["top"]
for b in bxs])

View File

@@ -69,7 +69,7 @@ def load_model(model_dir, nm):
options.execution_mode = ort.ExecutionMode.ORT_SEQUENTIAL
options.intra_op_num_threads = 2
options.inter_op_num_threads = 2
if ort.get_device() == "GPU":
if False and ort.get_device() == "GPU":
sess = ort.InferenceSession(model_file_path, options=options, providers=['CUDAExecutionProvider'])
else:
sess = ort.InferenceSession(model_file_path, options=options, providers=['CPUExecutionProvider'])
@@ -366,7 +366,7 @@ class TextDetector(object):
'keep_keys': ['image', 'shape']
}
}]
postprocess_params = {"name": "DBPostProcess", "thresh": 0.3, "box_thresh": 0.6, "max_candidates": 1000,
postprocess_params = {"name": "DBPostProcess", "thresh": 0.3, "box_thresh": 0.5, "max_candidates": 1000,
"unclip_ratio": 1.5, "use_dilation": False, "score_mode": "fast", "box_type": "quad"}
self.postprocess_op = build_post_process(postprocess_params)
@@ -534,6 +534,34 @@ class OCR(object):
break
return _boxes
def detect(self, img):
time_dict = {'det': 0, 'rec': 0, 'cls': 0, 'all': 0}
if img is None:
return None, None, time_dict
start = time.time()
dt_boxes, elapse = self.text_detector(img)
time_dict['det'] = elapse
if dt_boxes is None:
end = time.time()
time_dict['all'] = end - start
return None, None, time_dict
else:
cron_logger.debug("dt_boxes num : {}, elapsed : {}".format(
len(dt_boxes), elapse))
return zip(self.sorted_boxes(dt_boxes), [("",0) for _ in range(len(dt_boxes))])
def recognize(self, ori_im, box):
img_crop = self.get_rotate_crop_image(ori_im, box)
rec_res, elapse = self.text_recognizer([img_crop])
text, score = rec_res[0]
if score < self.drop_score:return ""
return text
def __call__(self, img, cls=True):
time_dict = {'det': 0, 'rec': 0, 'cls': 0, 'all': 0}
@@ -562,6 +590,7 @@ class OCR(object):
img_crop_list.append(img_crop)
rec_res, elapse = self.text_recognizer(img_crop_list)
time_dict['rec'] = elapse
cron_logger.debug("rec_res num : {}, elapsed : {}".format(
len(rec_res), elapse))
@@ -575,6 +604,7 @@ class OCR(object):
end = time.time()
time_dict['all'] = end - start
#for bno in range(len(img_crop_list)):
# print(f"{bno}, {rec_res[bno]}")

View File

@@ -41,7 +41,7 @@ class Recognizer(object):
if not os.path.exists(model_file_path):
raise ValueError("not find model file path {}".format(
model_file_path))
if ort.get_device() == "GPU":
if False and ort.get_device() == "GPU":
options = ort.SessionOptions()
options.enable_cpu_mem_arena = False
self.ort_sess = ort.InferenceSession(model_file_path, options=options, providers=[('CUDAExecutionProvider')])