layout refine (#115)
This commit is contained in:
@@ -101,7 +101,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, lang="Chinese", ca
|
||||
d = copy.deepcopy(doc)
|
||||
if pdf_parser:
|
||||
d["image"], poss = pdf_parser.crop(ck, need_position=True)
|
||||
add_positions(d, poss, from_page)
|
||||
add_positions(d, poss)
|
||||
ck = pdf_parser.remove_tag(ck)
|
||||
tokenize(d, ck, eng)
|
||||
res.append(d)
|
||||
|
||||
Reference in New Issue
Block a user