From 1a0a75e3fa896cf3c095e4146a54acef7223657e Mon Sep 17 00:00:00 2001
From: Leif <4603009@qq.com>
Date: Wed, 24 Aug 2022 16:34:36 +0800
Subject: [PATCH] Add pdf2word exe
Add pdf2word exe
---
image2doc.py | 349 --------------
.../pdf2word/icons}/chinese.png | Bin
.../pdf2word/icons}/english.png | Bin
.../pdf2word/icons}/folder-open.png | Bin
.../pdf2word/icons}/folder-plus.png | Bin
.../pdf2word/pdf2word.md | 17 +-
ppstructure/pdf2word/pdf2word.py | 441 ++++++++++++++++++
7 files changed, 450 insertions(+), 357 deletions(-)
delete mode 100644 image2doc.py
rename {icons => ppstructure/pdf2word/icons}/chinese.png (100%)
rename {icons => ppstructure/pdf2word/icons}/english.png (100%)
rename {icons => ppstructure/pdf2word/icons}/folder-open.png (100%)
rename {icons => ppstructure/pdf2word/icons}/folder-plus.png (100%)
rename PDF2WORD.md => ppstructure/pdf2word/pdf2word.md (74%)
create mode 100644 ppstructure/pdf2word/pdf2word.py
diff --git a/image2doc.py b/image2doc.py
deleted file mode 100644
index 700126e0..00000000
--- a/image2doc.py
+++ /dev/null
@@ -1,349 +0,0 @@
-import sys
-import tarfile
-import os
-import time
-import functools
-import cv2
-import platform
-import numpy as np
-from qtpy import QtWidgets
-from qtpy.QtGui import QImage, QPixmap, QIcon
-
-from ppstructure.predict_system import StructureSystem, save_structure_res
-from ppstructure.utility import parse_args, draw_structure_result
-from ppocr.utils.network import download_with_progressbar
-from ppstructure.recovery.recovery_to_doc import sorted_layout_boxes, convert_info_docx
-from ScreenShotWidget import ScreenShotWidget
-
-__APPNAME__ = "Image2Doc"
-__VERSION__ = "0.0.2"
-here = os.path.dirname(os.path.abspath(__file__))
-URLs_EN = {
- # 下载超英文轻量级PP-OCRv3模型的检测模型并解压
- "en_PP-OCRv3_det_infer": "https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_det_infer.tar",
- # 下载英文轻量级PP-OCRv3模型的识别模型并解压
- "en_PP-OCRv3_rec_infer": "https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_rec_infer.tar",
- # 下载超轻量级英文表格英文模型并解压
- "en_ppstructure_mobile_v2.0_SLANet_infer": "https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/en_ppstructure_mobile_v2.0_SLANet_infer.tar",
- # 英文版面分析模型
- "picodet_lcnet_x1_0_fgd_layout_infer": "https://paddleocr.bj.bcebos.com/ppstructure/models/layout/picodet_lcnet_x1_0_fgd_layout_infer.tar",
-}
-DICT_EN = {
- "rec_char_dict_path": "en_dict.txt",
- "layout_dict_path": "layout_publaynet_dict.txt",
-}
-
-URLs_CN = {
- # 下载超中文轻量级PP-OCRv3模型的检测模型并解压
- "cn_PP-OCRv3_det_infer": "https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.tar",
- # 下载中文轻量级PP-OCRv3模型的识别模型并解压
- "cn_PP-OCRv3_rec_infer": "https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar",
- # 下载超轻量级英文表格英文模型并解压
- "cn_ppstructure_mobile_v2.0_SLANet_infer": "https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/en_ppstructure_mobile_v2.0_SLANet_infer.tar",
- # 中文版面分析模型
- "picodet_lcnet_x1_0_fgd_layout_cdla_infer": "https://paddleocr.bj.bcebos.com/ppstructure/models/layout/picodet_lcnet_x1_0_fgd_layout_cdla_infer.tar",
-}
-DICT_CN = {
- "rec_char_dict_path": "ppocr_keys_v1.txt",
- "layout_dict_path": "layout_cdla_dict.txt",
-}
-
-
-def QImageToCvMat(incomingImage):
- '''
- Converts a QImage into an opencv MAT format
- '''
-
- incomingImage = incomingImage.convertToFormat(QImage.Format.Format_RGBA8888)
-
- width = incomingImage.width()
- height = incomingImage.height()
-
- ptr = incomingImage.bits()
- ptr.setsize(height * width * 4)
- arr = np.frombuffer(ptr, np.uint8).reshape((height, width, 4))
- return arr
-
-
-class APP_Image2Doc(QtWidgets.QWidget):
- def __init__(self):
- super(QtWidgets.QWidget, self).__init__()
- self.pb = None # 进度条
- self.pb_text = "已载入: {} / 已转换: {}"
- self.imagePaths = []
- # self.resultPath = os.path.join(here, "output")
- self.screenShotWg = ScreenShotWidget()
- self.screenShot = None
- self.save_pdf = False
-
- self.vis_font_path = os.path.join(here,
- "doc", "fonts", "simfang.ttf")
-
- # 初始化界面
- self.setupUi()
-
- # 下载模型
- self.downloadModels(URLs_EN)
- self.downloadModels(URLs_CN)
-
- self.structure_sys_en = self.initPredictor('EN')
- self.structure_sys_cn = self.initPredictor('CN')
-
- def setupUi(self):
- self.setObjectName("MainWindow")
- self.setWindowTitle(__APPNAME__ + " " + __VERSION__)
-
- layout = QtWidgets.QGridLayout()
-
- openFileButton = QtWidgets.QPushButton("打开文件")
- openFileButton.setIcon(QIcon(QPixmap("./icons/folder-plus.png")))
- layout.addWidget(openFileButton, 0, 0, 1, 1)
- openFileButton.clicked.connect(self.openFileSlot)
-
- # screenShotButton = QtWidgets.QPushButton("截图识别")
- # layout.addWidget(screenShotButton, 0, 1, 1, 1)
- # screenShotButton.clicked.connect(self.screenShotSlot)
- # screenShotButton.setEnabled(False) # temporarily disenble
-
- startCNShotButton = QtWidgets.QPushButton("中文转换")
- startCNShotButton.setIcon(QIcon(QPixmap("./icons/chinese.png")))
- layout.addWidget(startCNShotButton, 0, 1, 1, 1)
- startCNShotButton.clicked.connect(
- functools.partial(self.startSlot, 'CN'))
-
- startENButton = QtWidgets.QPushButton("英文转换")
- startENButton.setIcon(QIcon(QPixmap("./icons/english.png")))
- layout.addWidget(startENButton, 0, 2, 1, 1)
- startENButton.clicked.connect(
- functools.partial(self.startSlot, 'EN'))
-
- showResultButton = QtWidgets.QPushButton("显示结果")
- showResultButton.setIcon(QIcon(QPixmap("./icons/folder-open.png")))
- layout.addWidget(showResultButton, 0, 3, 1, 1)
- showResultButton.clicked.connect(self.showResultSlot)
-
- self.pb = QtWidgets.QLabel(
- self.pb_text.format(0, 0))
- layout.addWidget(self.pb, 1, 0, 1, 4)
-
- self.setLayout(layout)
-
- def downloadModels(self, URLs):
- # using custom model
- tar_file_name_list = [
- 'inference.pdiparams',
- 'inference.pdiparams.info',
- 'inference.pdmodel',
- 'model.pdiparams',
- 'model.pdiparams.info',
- 'model.pdmodel'
- ]
- model_path = os.path.join(here, 'inference')
- os.makedirs(model_path, exist_ok=True)
-
- # download and unzip models
- for name in URLs.keys():
- url = URLs[name]
- print("Try downloading file: {}".format(url))
- tarname = url.split('/')[-1]
- tarpath = os.path.join(model_path, tarname)
- if os.path.exists(tarpath):
- print("File have already exist. skip")
- else:
- try:
- download_with_progressbar(url, tarpath)
- except Exception as e:
- print("Error occurred when downloading file, error message:")
- print(e)
-
- # unzip model tar
- try:
- with tarfile.open(tarpath, 'r') as tarObj:
- storage_dir = os.path.join(model_path, name)
- os.makedirs(storage_dir, exist_ok=True)
- for member in tarObj.getmembers():
- filename = None
- for tar_file_name in tar_file_name_list:
- if tar_file_name in member.name:
- filename = tar_file_name
- if filename is None:
- continue
- file = tarObj.extractfile(member)
- with open(
- os.path.join(storage_dir, filename),
- 'wb') as f:
- f.write(file.read())
- except Exception as e:
- print("Error occurred when unziping file, error message:")
- print(e)
-
- def initPredictor(self, lang='EN'):
- # init predictor args
- args = parse_args()
- args.table_max_len = 488
- args.ocr = True
- args.recovery = True
- args.save_pdf = self.save_pdf
- args.table_char_dict_path = os.path.join(here,
- "ppocr", "utils", "dict", "table_structure_dict.txt")
- if lang == 'EN':
- args.det_model_dir = os.path.join(here, # 此处从这里找到模型存放位置
- "inference", "en_PP-OCRv3_det_infer")
- args.rec_model_dir = os.path.join(here,
- "inference", "en_PP-OCRv3_rec_infer")
- args.table_model_dir = os.path.join(here,
- "inference", "en_ppstructure_mobile_v2.0_SLANet_infer")
- args.output = os.path.join(here, "output") # 结果保存路径
- args.layout_model_dir = os.path.join(here,
- "inference", "picodet_lcnet_x1_0_fgd_layout_infer")
- lang_dict = DICT_EN
- elif lang == 'CN':
- args.det_model_dir = os.path.join(here, # 此处从这里找到模型存放位置
- "inference", "cn_PP-OCRv3_det_infer")
- args.rec_model_dir = os.path.join(here,
- "inference", "cn_PP-OCRv3_rec_infer")
- args.table_model_dir = os.path.join(here,
- "inference", "cn_ppstructure_mobile_v2.0_SLANet_infer")
- args.output = os.path.join(here, "output") # 结果保存路径
- args.layout_model_dir = os.path.join(here,
- "inference", "picodet_lcnet_x1_0_fgd_layout_cdla_infer")
- lang_dict = DICT_CN
- else:
- raise ValueError("Unsupported language")
- args.rec_char_dict_path = os.path.join(here,
- "ppocr", "utils",
- lang_dict['rec_char_dict_path'])
- args.layout_dict_path = os.path.join(here,
- "ppocr", "utils", "dict", "layout_dict",
- lang_dict['layout_dict_path'])
- # init predictor
- return StructureSystem(args)
-
- def openFileSlot(self):
- '''
- 可以多选图像文件
- '''
- selectedFiles = QtWidgets.QFileDialog.getOpenFileNames(self,
- "多文件选择", "/", "图片文件 (*.png *.jpeg *.jpg *.bmp *.pdf)")[0]
- if len(selectedFiles) > 0:
- self.imagePaths = selectedFiles
- self.screenShot = None # discard screenshot temp image
- self.updateProgressBar(len(selectedFiles), 0)
-
- def screenShotSlot(self):
- '''
- 选定图像文件和截图的转换过程只能同时进行一个
- 截图只能同时转换一个
- '''
- self.screenShotWg.start()
- if self.screenShotWg.captureImage:
- self.screenShot = self.screenShotWg.captureImage
- self.imagePaths.clear() # discard openfile temp list
- self.updateProgressBar(1, 0)
-
- def startSlot(self, lang):
- if self.screenShot: # for screenShot
- img_name = 'screenshot_' + time.strftime("%Y%m%d%H%M%S", time.localtime())
- image = QImageToCvMat(self.screenShot)
- self.predictAndSave(image, img_name, lang)
- # update Progress Bar
- self.updateProgressBar(1, 1)
- QtWidgets.QMessageBox.information(self,
- u'Information', "文档提取完成")
- elif len(self.imagePaths) > 0 : # for image file selection
- self.output_dir = os.path.join(
- os.path.dirname(self.imagePaths[0]), "output") # output_dir shold be same as imagepath
- os.makedirs(self.output_dir, exist_ok=True)
- for i, image_file in enumerate(self.imagePaths):
- if os.path.basename(image_file)[-3:] in ['pdf']:
- import fitz
- from PIL import Image
- imgs = []
- with fitz.open(image_file) as pdf:
- for pg in range(0, pdf.pageCount):
- page = pdf[pg]
- mat = fitz.Matrix(2, 2)
- pm = page.getPixmap(matrix=mat, alpha=False)
-
- # if width or height > 2000 pixels, don't enlarge the image
- if pm.width > 2000 or pm.height > 2000:
- pm = page.getPixmap(matrix=fitz.Matrix(1, 1), alpha=False)
-
- img = Image.frombytes("RGB", [pm.width, pm.height], pm.samples)
- img = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
- imgs.append(img)
-
- else:
- img = cv2.imread(image_file)
- if img is None:
- print("error in loading image:{}".format(image_file))
- continue
- imgs = [img]
-
- img_name = os.path.basename(image_file).split('.')[0]
- os.makedirs(os.path.join(self.output_dir, img_name), exist_ok=True)
- self.predictAndSave(imgs, img_name, lang)
-
- # update Progress Bar
- self.updateProgressBar(len(self.imagePaths), i+1)
- QtWidgets.QMessageBox.information(self,
- u'Information', "文档提取完成")
- else:
- print('empty input')
-
- def predictAndSave(self, imgs, img_name, lang):
- all_res = []
- for index, img in enumerate(imgs):
- if lang == 'EN':
- res, time_dict = self.structure_sys_en(img)
- elif lang == 'CN':
- res, time_dict = self.structure_sys_cn(img)
-
- # save output
- save_structure_res(res, self.output_dir, img_name)
- draw_img = draw_structure_result(img, res, self.vis_font_path)
- img_save_path = os.path.join(self.output_dir, img_name, 'show_{}.jpg'.format(index))
- if res != []:
- cv2.imwrite(img_save_path, draw_img)
-
- # recovery
- h, w, _ = img.shape
- res = sorted_layout_boxes(res, w)
- all_res += res
-
- try:
- convert_info_docx(img, all_res, self.output_dir, img_name, self.save_pdf)
- except Exception as ex:
- QtWidgets.QMessageBox.information(self,
- u'Information', "error in layout recovery image:{}, err msg: {}".format(
- img_name, ex))
-
- print('result save to {}'.format(self.output_dir))
-
- def showResultSlot(self):
- if os.path.exists(self.output_dir):
- if platform.system() == 'Windows':
- os.startfile(self.output_dir)
- else:
- os.system('open ' + os.path.normpath(self.lastOpenDir))
- else:
- QtWidgets.QMessageBox.information(self,
- u'Information', "输出文件不存在")
-
- def updateProgressBar(self, loaded, finished):
- self.pb.setText(
- self.pb_text.format(loaded, finished))
-
-
-def main():
- app = QtWidgets.QApplication(sys.argv)
-
- window = APP_Image2Doc() # 创建对象
- window.show() # 全屏显示窗口
-
- QtWidgets.QApplication.processEvents()
- sys.exit(app.exec())
-
-
-if __name__ == "__main__":
- main()
\ No newline at end of file
diff --git a/icons/chinese.png b/ppstructure/pdf2word/icons/chinese.png
similarity index 100%
rename from icons/chinese.png
rename to ppstructure/pdf2word/icons/chinese.png
diff --git a/icons/english.png b/ppstructure/pdf2word/icons/english.png
similarity index 100%
rename from icons/english.png
rename to ppstructure/pdf2word/icons/english.png
diff --git a/icons/folder-open.png b/ppstructure/pdf2word/icons/folder-open.png
similarity index 100%
rename from icons/folder-open.png
rename to ppstructure/pdf2word/icons/folder-open.png
diff --git a/icons/folder-plus.png b/ppstructure/pdf2word/icons/folder-plus.png
similarity index 100%
rename from icons/folder-plus.png
rename to ppstructure/pdf2word/icons/folder-plus.png
diff --git a/PDF2WORD.md b/ppstructure/pdf2word/pdf2word.md
similarity index 74%
rename from PDF2WORD.md
rename to ppstructure/pdf2word/pdf2word.md
index 382a0feb..8d69d607 100644
--- a/PDF2WORD.md
+++ b/ppstructure/pdf2word/pdf2word.md
@@ -1,11 +1,12 @@
# PDF2WORD
-PDF2WORD是PaddleOCR社区开发者@whj 基于PP-Structure智能文档分析系统实现的PDF转换word应用程序,提供可直接安装的exe,方便windows用户运行
+PDF2WORD是PaddleOCR社区开发者@whj 基于PP-Structure智能文档分析模型实现的PDF转换Word应用程序,提供可直接安装的exe,方便windows用户运行
-
+
+
## 1.使用
### 应用程序
@@ -16,17 +17,15 @@ PDF2WORD是PaddleOCR社区开发者@whj 基于PP-Structure智能文档分析系
### 脚本运行
-首次运行需要将
+首次运行需要将切换路径到 `/ppstructure/pdf2word` ,然后运行代码
```
python pdf2word.py
```
-
-
## 2.自行打包
-PDF2WORD应用程序通过[QPT](https://github.com/QPT-Family/QPT)工具打包实现,若您修改了界面代码需要重新打包,请在 `ppstructure` 文件夹下运行下方指令
+PDF2WORD应用程序通过[QPT](https://github.com/QPT-Family/QPT)工具打包实现,若您修改了界面代码需要重新打包,请在 `PaddleOCR` 文件夹下运行下方指令
```
cd ./
@@ -34,9 +33,11 @@ mv ./ppstructure/pdf2word .. -r
python GenEXE.py
```
-
-
## 3.软件下载
如需获取已打包程序,可以扫描下方二维码,关注公众号填写问卷后,加入PaddleOCR官方交流群免费获取20G OCR学习大礼包,内含OCR场景应用集合(包含数码管、液晶屏、车牌、高精度SVTR模型等7个垂类模型)、《动手学OCR》电子书、课程回放视频、前沿论文等重磅资料
+
+
+
+
diff --git a/ppstructure/pdf2word/pdf2word.py b/ppstructure/pdf2word/pdf2word.py
new file mode 100644
index 00000000..add8a6f4
--- /dev/null
+++ b/ppstructure/pdf2word/pdf2word.py
@@ -0,0 +1,441 @@
+import sys
+import tarfile
+import os
+import time
+import datetime
+import functools
+import cv2
+import platform
+import numpy as np
+from qtpy.QtWidgets import QApplication, QWidget, QPushButton, QProgressBar, \
+ QGridLayout, QMessageBox, QLabel, QFileDialog
+from qtpy.QtCore import Signal, QThread, QObject
+from qtpy.QtGui import QImage, QPixmap, QIcon
+
+file = os.path.dirname(os.path.abspath(__file__))
+root = os.path.abspath(os.path.join(file, '../../'))
+sys.path.append(file)
+sys.path.insert(0, root)
+
+from ppstructure.predict_system import StructureSystem, save_structure_res
+from ppstructure.utility import parse_args, draw_structure_result
+from ppocr.utils.network import download_with_progressbar
+from ppstructure.recovery.recovery_to_doc import sorted_layout_boxes, convert_info_docx
+from ScreenShotWidget import ScreenShotWidget
+
+__APPNAME__ = "pdf2word"
+__VERSION__ = "0.1.1"
+
+URLs_EN = {
+ # 下载超英文轻量级PP-OCRv3模型的检测模型并解压
+ "en_PP-OCRv3_det_infer": "https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_det_infer.tar",
+ # 下载英文轻量级PP-OCRv3模型的识别模型并解压
+ "en_PP-OCRv3_rec_infer": "https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_rec_infer.tar",
+ # 下载超轻量级英文表格英文模型并解压
+ "en_ppstructure_mobile_v2.0_SLANet_infer": "https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/en_ppstructure_mobile_v2.0_SLANet_infer.tar",
+ # 英文版面分析模型
+ "picodet_lcnet_x1_0_fgd_layout_infer": "https://paddleocr.bj.bcebos.com/ppstructure/models/layout/picodet_lcnet_x1_0_fgd_layout_infer.tar",
+}
+DICT_EN = {
+ "rec_char_dict_path": "en_dict.txt",
+ "layout_dict_path": "layout_publaynet_dict.txt",
+}
+
+URLs_CN = {
+ # 下载超中文轻量级PP-OCRv3模型的检测模型并解压
+ "cn_PP-OCRv3_det_infer": "https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.tar",
+ # 下载中文轻量级PP-OCRv3模型的识别模型并解压
+ "cn_PP-OCRv3_rec_infer": "https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar",
+ # 下载超轻量级英文表格英文模型并解压
+ "cn_ppstructure_mobile_v2.0_SLANet_infer": "https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/en_ppstructure_mobile_v2.0_SLANet_infer.tar",
+ # 中文版面分析模型
+ "picodet_lcnet_x1_0_fgd_layout_cdla_infer": "https://paddleocr.bj.bcebos.com/ppstructure/models/layout/picodet_lcnet_x1_0_fgd_layout_cdla_infer.tar",
+}
+DICT_CN = {
+ "rec_char_dict_path": "ppocr_keys_v1.txt",
+ "layout_dict_path": "layout_cdla_dict.txt",
+}
+
+
+
+def QImageToCvMat(incomingImage) -> np.array:
+ '''
+ Converts a QImage into an opencv MAT format
+ '''
+
+ incomingImage = incomingImage.convertToFormat(QImage.Format.Format_RGBA8888)
+
+ width = incomingImage.width()
+ height = incomingImage.height()
+
+ ptr = incomingImage.bits()
+ ptr.setsize(height * width * 4)
+ arr = np.frombuffer(ptr, np.uint8).reshape((height, width, 4))
+ return arr
+
+
+def readImage(image_file) -> list:
+ if os.path.basename(image_file)[-3:] in ['pdf']:
+ import fitz
+ from PIL import Image
+ imgs = []
+ with fitz.open(image_file) as pdf:
+ for pg in range(0, pdf.pageCount):
+ page = pdf[pg]
+ mat = fitz.Matrix(2, 2)
+ pm = page.getPixmap(matrix=mat, alpha=False)
+
+ # if width or height > 2000 pixels, don't enlarge the image
+ if pm.width > 2000 or pm.height > 2000:
+ pm = page.getPixmap(matrix=fitz.Matrix(1, 1), alpha=False)
+
+ img = Image.frombytes("RGB", [pm.width, pm.height], pm.samples)
+ img = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
+ imgs.append(img)
+ else:
+ img = cv2.imread(image_file, cv2.IMREAD_COLOR)
+ if img is not None:
+ imgs = [img]
+
+ return imgs
+
+
+class Worker(QThread):
+ progressBarValue = Signal(int)
+ endsignal = Signal()
+ loopFlag = True
+
+ def __init__(self, predictors, save_pdf, vis_font_path):
+ super(Worker, self).__init__()
+ self.predictors = predictors
+ self.save_pdf = save_pdf
+ self.vis_font_path = vis_font_path
+ self.lang = 'EN'
+ self.imagePaths = []
+ self.outputDir = None
+ self.setStackSize(1024*1024)
+
+ def setImagePath(self, imagePaths):
+ self.imagePaths = imagePaths
+
+ def setLang(self, lang):
+ self.lang = lang
+
+ def setOutputDir(self, outputDir):
+ self.outputDir = outputDir
+
+ def predictAndSave(self, imgs, img_name):
+ all_res = []
+ for index, img in enumerate(imgs):
+ res, time_dict = self.predictors[self.lang](img)
+
+ # save output
+ save_structure_res(res, self.outputDir, img_name)
+ draw_img = draw_structure_result(img, res, self.vis_font_path)
+ img_save_path = os.path.join(self.outputDir, img_name, 'show_{}.jpg'.format(index))
+ if res != []:
+ cv2.imwrite(img_save_path, draw_img)
+
+ # recovery
+ h, w, _ = img.shape
+ res = sorted_layout_boxes(res, w)
+ all_res += res
+
+ try:
+ convert_info_docx(img, all_res, self.outputDir, img_name, self.save_pdf)
+ except Exception as ex:
+ print(self,
+ "error in layout recovery image:{}, err msg: {}".format(
+ img_name, ex))
+
+ print('result save to {}'.format(self.outputDir))
+
+ def run(self):
+ try:
+ findex = 0
+ os.makedirs(self.outputDir, exist_ok=True)
+ for i, image_file in enumerate(self.imagePaths):
+ if self.loopFlag == True:
+ imgs = readImage(image_file)
+ if len(imgs) == 0:
+ continue
+ img_name = os.path.basename(image_file).split('.')[0]
+ os.makedirs(os.path.join(self.outputDir, img_name), exist_ok=True)
+ self.predictAndSave(imgs, img_name)
+ findex += 1
+ self.progressBarValue.emit(findex)
+ else:
+ break
+ self.endsignal.emit()
+ self.exec()
+ except Exception as e:
+ print(e)
+ raise
+
+
+class APP_Image2Doc(QWidget):
+ def __init__(self):
+ super().__init__()
+ self.setFixedHeight(90)
+ self.setFixedWidth(400)
+
+ # settings
+ self.imagePaths = []
+ self.screenShotWg = ScreenShotWidget()
+ self.screenShot = None
+ self.save_pdf = False
+ self.output_dir = None
+ self.vis_font_path = os.path.join(root,
+ "doc", "fonts", "simfang.ttf")
+
+ # ProgressBar
+ self.pb = QProgressBar()
+ self.pb.setRange(0, 100)
+ self.pb.setValue(0)
+
+ # 初始化界面
+ self.setupUi()
+
+ # 下载模型
+ self.downloadModels(URLs_EN)
+ self.downloadModels(URLs_CN)
+
+ # 初始化模型
+ predictors = {
+ 'EN': self.initPredictor('EN'),
+ 'CN': self.initPredictor('CN'),
+ }
+
+ # 设置工作进程
+ self._thread = Worker(predictors, self.save_pdf, self.vis_font_path)
+ self._thread.progressBarValue.connect(self.handleProgressBarSingal)
+ self._thread.endsignal.connect(self.handleEndsignalSignal)
+ self._thread.finished.connect(QObject.deleteLater)
+ self.time_start = 0 # save start time
+
+ def setupUi(self):
+ self.setObjectName("MainWindow")
+ self.setWindowTitle(__APPNAME__ + " " + __VERSION__)
+
+ layout = QGridLayout()
+
+ self.openFileButton = QPushButton("打开文件")
+ self.openFileButton.setIcon(QIcon(QPixmap("./icons/folder-plus.png")))
+ layout.addWidget(self.openFileButton, 0, 0, 1, 1)
+ self.openFileButton.clicked.connect(self.handleOpenFileSignal)
+
+ # screenShotButton = QPushButton("截图识别")
+ # layout.addWidget(screenShotButton, 0, 1, 1, 1)
+ # screenShotButton.clicked.connect(self.screenShotSlot)
+ # screenShotButton.setEnabled(False) # temporarily disenble
+
+ self.startCNButton = QPushButton("中文转换")
+ self.startCNButton.setIcon(QIcon(QPixmap("./icons/chinese.png")))
+ layout.addWidget(self.startCNButton, 0, 1, 1, 1)
+ self.startCNButton.clicked.connect(
+ functools.partial(self.handleStartSignal, 'CN'))
+
+ self.startENButton = QPushButton("英文转换")
+ self.startENButton.setIcon(QIcon(QPixmap("./icons/english.png")))
+ layout.addWidget(self.startENButton, 0, 2, 1, 1)
+ self.startENButton.clicked.connect(
+ functools.partial(self.handleStartSignal, 'EN'))
+
+ self.showResultButton = QPushButton("显示结果")
+ self.showResultButton.setIcon(QIcon(QPixmap("./icons/folder-open.png")))
+ layout.addWidget(self.showResultButton, 0, 3, 1, 1)
+ self.showResultButton.clicked.connect(self.handleShowResultSignal)
+
+ # ProgressBar
+ layout.addWidget(self.pb, 2, 0, 1, 4)
+ # time estimate label
+ self.timeEstLabel = QLabel(
+ ("Time Left: --"))
+ layout.addWidget(self.timeEstLabel, 3, 0, 1, 4)
+
+ self.setLayout(layout)
+
+ def downloadModels(self, URLs):
+ # using custom model
+ tar_file_name_list = [
+ 'inference.pdiparams',
+ 'inference.pdiparams.info',
+ 'inference.pdmodel',
+ 'model.pdiparams',
+ 'model.pdiparams.info',
+ 'model.pdmodel'
+ ]
+ model_path = os.path.join(root, 'inference')
+ os.makedirs(model_path, exist_ok=True)
+
+ # download and unzip models
+ for name in URLs.keys():
+ url = URLs[name]
+ print("Try downloading file: {}".format(url))
+ tarname = url.split('/')[-1]
+ tarpath = os.path.join(model_path, tarname)
+ if os.path.exists(tarpath):
+ print("File have already exist. skip")
+ else:
+ try:
+ download_with_progressbar(url, tarpath)
+ except Exception as e:
+ print("Error occurred when downloading file, error message:")
+ print(e)
+
+ # unzip model tar
+ try:
+ with tarfile.open(tarpath, 'r') as tarObj:
+ storage_dir = os.path.join(model_path, name)
+ os.makedirs(storage_dir, exist_ok=True)
+ for member in tarObj.getmembers():
+ filename = None
+ for tar_file_name in tar_file_name_list:
+ if tar_file_name in member.name:
+ filename = tar_file_name
+ if filename is None:
+ continue
+ file = tarObj.extractfile(member)
+ with open(
+ os.path.join(storage_dir, filename),
+ 'wb') as f:
+ f.write(file.read())
+ except Exception as e:
+ print("Error occurred when unziping file, error message:")
+ print(e)
+
+ def initPredictor(self, lang='EN'):
+ # init predictor args
+ args = parse_args()
+ args.table_max_len = 488
+ args.ocr = True
+ args.recovery = True
+ args.save_pdf = self.save_pdf
+ args.table_char_dict_path = os.path.join(root,
+ "ppocr", "utils", "dict", "table_structure_dict.txt")
+ if lang == 'EN':
+ args.det_model_dir = os.path.join(root, # 此处从这里找到模型存放位置
+ "inference", "en_PP-OCRv3_det_infer")
+ args.rec_model_dir = os.path.join(root,
+ "inference", "en_PP-OCRv3_rec_infer")
+ args.table_model_dir = os.path.join(root,
+ "inference", "en_ppstructure_mobile_v2.0_SLANet_infer")
+ args.output = os.path.join(root, "output") # 结果保存路径
+ args.layout_model_dir = os.path.join(root,
+ "inference", "picodet_lcnet_x1_0_fgd_layout_infer")
+ lang_dict = DICT_EN
+ elif lang == 'CN':
+ args.det_model_dir = os.path.join(root, # 此处从这里找到模型存放位置
+ "inference", "cn_PP-OCRv3_det_infer")
+ args.rec_model_dir = os.path.join(root,
+ "inference", "cn_PP-OCRv3_rec_infer")
+ args.table_model_dir = os.path.join(root,
+ "inference", "cn_ppstructure_mobile_v2.0_SLANet_infer")
+ args.output = os.path.join(root, "output") # 结果保存路径
+ args.layout_model_dir = os.path.join(root,
+ "inference", "picodet_lcnet_x1_0_fgd_layout_cdla_infer")
+ lang_dict = DICT_CN
+ else:
+ raise ValueError("Unsupported language")
+ args.rec_char_dict_path = os.path.join(root,
+ "ppocr", "utils",
+ lang_dict['rec_char_dict_path'])
+ args.layout_dict_path = os.path.join(root,
+ "ppocr", "utils", "dict", "layout_dict",
+ lang_dict['layout_dict_path'])
+ # init predictor
+ return StructureSystem(args)
+
+ def handleOpenFileSignal(self):
+ '''
+ 可以多选图像文件
+ '''
+ selectedFiles = QFileDialog.getOpenFileNames(self,
+ "多文件选择", "/", "图片文件 (*.png *.jpeg *.jpg *.bmp *.pdf)")[0]
+ if len(selectedFiles) > 0:
+ self.imagePaths = selectedFiles
+ self.screenShot = None # discard screenshot temp image
+ self.pb.setRange(0, len(self.imagePaths))
+ self.pb.setValue(0)
+
+ def screenShotSlot(self):
+ '''
+ 选定图像文件和截图的转换过程只能同时进行一个
+ 截图只能同时转换一个
+ '''
+ self.screenShotWg.start()
+ if self.screenShotWg.captureImage:
+ self.screenShot = self.screenShotWg.captureImage
+ self.imagePaths.clear() # discard openfile temp list
+ self.pb.setRange(0, 1)
+ self.pb.setValue(0)
+
+ def handleStartSignal(self, lang):
+ if self.screenShot: # for screenShot
+ img_name = 'screenshot_' + time.strftime("%Y%m%d%H%M%S", time.localtime())
+ image = QImageToCvMat(self.screenShot)
+ self.predictAndSave(image, img_name, lang)
+ # update Progress Bar
+ self.pb.setValue(1)
+ QMessageBox.information(self,
+ u'Information', "文档提取完成")
+ elif len(self.imagePaths) > 0 : # for image file selection
+ # Must set image path list and language before start
+ self.output_dir = os.path.join(
+ os.path.dirname(self.imagePaths[0]), "output") # output_dir shold be same as imagepath
+ self._thread.setOutputDir(self.output_dir)
+ self._thread.setImagePath(self.imagePaths)
+ self._thread.setLang(lang)
+ # disenble buttons
+ self.openFileButton.setEnabled(False)
+ self.startCNButton.setEnabled(False)
+ self.startENButton.setEnabled(False)
+ # 启动工作进程
+ self._thread.start()
+ self.time_start = time.time() # log start time
+ QMessageBox.information(self,
+ u'Information', "开始转换")
+ else:
+ QMessageBox.warning(self,
+ u'Information', "请选择要识别的文件或截图")
+
+ def handleShowResultSignal(self):
+ if self.output_dir is None:
+ return
+ if os.path.exists(self.output_dir):
+ if platform.system() == 'Windows':
+ os.startfile(self.output_dir)
+ else:
+ os.system('open ' + os.path.normpath(self.output_dir))
+ else:
+ QMessageBox.information(self,
+ u'Information', "输出文件不存在")
+
+ def handleProgressBarSingal(self, i):
+ self.pb.setValue(i)
+ # calculate time left of recognition
+ lenbar = self.pb.maximum()
+ avg_time = (time.time() - self.time_start) / i # Use average time to prevent time fluctuations
+ time_left = str(datetime.timedelta(seconds=avg_time * (lenbar - i))).split(".")[0] # Remove microseconds
+ self.timeEstLabel.setText(f"Time Left: {time_left}") # show time left
+
+ def handleEndsignalSignal(self):
+ # enble buttons
+ self.openFileButton.setEnabled(True)
+ self.startCNButton.setEnabled(True)
+ self.startENButton.setEnabled(True)
+ QMessageBox.information(self, u'Information', "转换结束")
+
+
+def main():
+ app = QApplication(sys.argv)
+
+ window = APP_Image2Doc() # 创建对象
+ window.show() # 全屏显示窗口
+
+ QApplication.processEvents()
+ sys.exit(app.exec())
+
+
+if __name__ == "__main__":
+ main()
\ No newline at end of file
--
GitLab