未验证 提交 f68813eb 编写于 作者: U user1018 提交者: GitHub

optimize recovery (#8346)

* optimize recovery

* update
上级 70987157
......@@ -229,7 +229,9 @@ def main(args):
if args.recovery and args.use_pdf2docx_api and flag_pdf:
from pdf2docx.converter import Converter
docx_file = os.path.join(args.output, '{}.docx'.format(img_name))
os.makedirs(args.output, exist_ok=True)
docx_file = os.path.join(args.output,
'{}_api.docx'.format(img_name))
cv = Converter(image_file)
cv.convert(docx_file)
cv.close()
......
......@@ -73,7 +73,7 @@ def convert_info_docx(img, res, save_folder, img_name):
text_run.font.size = shared.Pt(10)
# save to docx
docx_path = os.path.join(save_folder, '{}.docx'.format(img_name))
docx_path = os.path.join(save_folder, '{}_ocr.docx'.format(img_name))
doc.save(docx_path)
logger.info('docx save to {}'.format(docx_path))
......
python-docx
PyMuPDF==1.19.0
beautifulsoup4
fonttools>=4.24.0
fire>=0.3.0
......
......@@ -278,8 +278,6 @@ class HtmlToDocx(HTMLParser):
cell_col += colspan
cell_row += 1
doc.save('1.docx')
def handle_data(self, data):
if self.skip:
return
......
......@@ -14,4 +14,4 @@ lxml
premailer
openpyxl
attrdict
PyMuPDF==1.19.0
\ No newline at end of file
PyMuPDF<1.21.0
\ No newline at end of file
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册