提交 1a0a75e3 编写于 作者: qq_25193841's avatar qq_25193841

Add pdf2word exe

Add pdf2word exe
上级 d18e8172
# PDF2WORD
PDF2WORD是PaddleOCR社区开发者@whj 基于PP-Structure智能文档分析系统实现的PDF转换word应用程序,提供可直接安装的exe,方便windows用户运行
PDF2WORD是PaddleOCR社区开发者@whj 基于PP-Structure智能文档分析模型实现的PDF转换Word应用程序,提供可直接安装的exe,方便windows用户运行
<div align="center">
<img src="./doc/imgs_results/PP-OCRv3/en/en_4.png" width="800">
<img src="./doc/imgs_results/PP-OCRv3/en/en_4.png" width="200">
</div>
## 1.使用
### 应用程序
......@@ -16,17 +17,15 @@ PDF2WORD是PaddleOCR社区开发者@whj 基于PP-Structure智能文档分析系
### 脚本运行
首次运行需要将
首次运行需要将切换路径到 `/ppstructure/pdf2word` ,然后运行代码
```
python pdf2word.py
```
## 2.自行打包
PDF2WORD应用程序通过[QPT](https://github.com/QPT-Family/QPT)工具打包实现,若您修改了界面代码需要重新打包,请在 `ppstructure` 文件夹下运行下方指令
PDF2WORD应用程序通过[QPT](https://github.com/QPT-Family/QPT)工具打包实现,若您修改了界面代码需要重新打包,请在 `PaddleOCR` 文件夹下运行下方指令
```
cd ./
......@@ -34,9 +33,11 @@ mv ./ppstructure/pdf2word .. -r
python GenEXE.py
```
## 3.软件下载
如需获取已打包程序,可以扫描下方二维码,关注公众号填写问卷后,加入PaddleOCR官方交流群免费获取20G OCR学习大礼包,内含OCR场景应用集合(包含数码管、液晶屏、车牌、高精度SVTR模型等7个垂类模型)、《动手学OCR》电子书、课程回放视频、前沿论文等重磅资料
<div align="center">
<img src="https://user-images.githubusercontent.com/50011306/186369636-35f2008b-df5a-4784-b1f5-cebebcb2b7a5.jpg" width = "150" height = "150" />
</div>
......@@ -2,22 +2,30 @@ import sys
import tarfile
import os
import time
import datetime
import functools
import cv2
import platform
import numpy as np
from qtpy import QtWidgets
from qtpy.QtWidgets import QApplication, QWidget, QPushButton, QProgressBar, \
QGridLayout, QMessageBox, QLabel, QFileDialog
from qtpy.QtCore import Signal, QThread, QObject
from qtpy.QtGui import QImage, QPixmap, QIcon
file = os.path.dirname(os.path.abspath(__file__))
root = os.path.abspath(os.path.join(file, '../../'))
sys.path.append(file)
sys.path.insert(0, root)
from ppstructure.predict_system import StructureSystem, save_structure_res
from ppstructure.utility import parse_args, draw_structure_result
from ppocr.utils.network import download_with_progressbar
from ppstructure.recovery.recovery_to_doc import sorted_layout_boxes, convert_info_docx
from ScreenShotWidget import ScreenShotWidget
__APPNAME__ = "Image2Doc"
__VERSION__ = "0.0.2"
here = os.path.dirname(os.path.abspath(__file__))
__APPNAME__ = "pdf2word"
__VERSION__ = "0.1.1"
URLs_EN = {
# 下载超英文轻量级PP-OCRv3模型的检测模型并解压
"en_PP-OCRv3_det_infer": "https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_det_infer.tar",
......@@ -49,7 +57,8 @@ DICT_CN = {
}
def QImageToCvMat(incomingImage):
def QImageToCvMat(incomingImage) -> np.array:
'''
Converts a QImage into an opencv MAT format
'''
......@@ -65,20 +74,125 @@ def QImageToCvMat(incomingImage):
return arr
class APP_Image2Doc(QtWidgets.QWidget):
def readImage(image_file) -> list:
if os.path.basename(image_file)[-3:] in ['pdf']:
import fitz
from PIL import Image
imgs = []
with fitz.open(image_file) as pdf:
for pg in range(0, pdf.pageCount):
page = pdf[pg]
mat = fitz.Matrix(2, 2)
pm = page.getPixmap(matrix=mat, alpha=False)
# if width or height > 2000 pixels, don't enlarge the image
if pm.width > 2000 or pm.height > 2000:
pm = page.getPixmap(matrix=fitz.Matrix(1, 1), alpha=False)
img = Image.frombytes("RGB", [pm.width, pm.height], pm.samples)
img = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
imgs.append(img)
else:
img = cv2.imread(image_file, cv2.IMREAD_COLOR)
if img is not None:
imgs = [img]
return imgs
class Worker(QThread):
progressBarValue = Signal(int)
endsignal = Signal()
loopFlag = True
def __init__(self, predictors, save_pdf, vis_font_path):
super(Worker, self).__init__()
self.predictors = predictors
self.save_pdf = save_pdf
self.vis_font_path = vis_font_path
self.lang = 'EN'
self.imagePaths = []
self.outputDir = None
self.setStackSize(1024*1024)
def setImagePath(self, imagePaths):
self.imagePaths = imagePaths
def setLang(self, lang):
self.lang = lang
def setOutputDir(self, outputDir):
self.outputDir = outputDir
def predictAndSave(self, imgs, img_name):
all_res = []
for index, img in enumerate(imgs):
res, time_dict = self.predictors[self.lang](img)
# save output
save_structure_res(res, self.outputDir, img_name)
draw_img = draw_structure_result(img, res, self.vis_font_path)
img_save_path = os.path.join(self.outputDir, img_name, 'show_{}.jpg'.format(index))
if res != []:
cv2.imwrite(img_save_path, draw_img)
# recovery
h, w, _ = img.shape
res = sorted_layout_boxes(res, w)
all_res += res
try:
convert_info_docx(img, all_res, self.outputDir, img_name, self.save_pdf)
except Exception as ex:
print(self,
"error in layout recovery image:{}, err msg: {}".format(
img_name, ex))
print('result save to {}'.format(self.outputDir))
def run(self):
try:
findex = 0
os.makedirs(self.outputDir, exist_ok=True)
for i, image_file in enumerate(self.imagePaths):
if self.loopFlag == True:
imgs = readImage(image_file)
if len(imgs) == 0:
continue
img_name = os.path.basename(image_file).split('.')[0]
os.makedirs(os.path.join(self.outputDir, img_name), exist_ok=True)
self.predictAndSave(imgs, img_name)
findex += 1
self.progressBarValue.emit(findex)
else:
break
self.endsignal.emit()
self.exec()
except Exception as e:
print(e)
raise
class APP_Image2Doc(QWidget):
def __init__(self):
super(QtWidgets.QWidget, self).__init__()
self.pb = None # 进度条
self.pb_text = "已载入: {} / 已转换: {}"
super().__init__()
self.setFixedHeight(90)
self.setFixedWidth(400)
# settings
self.imagePaths = []
# self.resultPath = os.path.join(here, "output")
self.screenShotWg = ScreenShotWidget()
self.screenShot = None
self.save_pdf = False
self.vis_font_path = os.path.join(here,
self.output_dir = None
self.vis_font_path = os.path.join(root,
"doc", "fonts", "simfang.ttf")
# ProgressBar
self.pb = QProgressBar()
self.pb.setRange(0, 100)
self.pb.setValue(0)
# 初始化界面
self.setupUi()
......@@ -86,45 +200,58 @@ class APP_Image2Doc(QtWidgets.QWidget):
self.downloadModels(URLs_EN)
self.downloadModels(URLs_CN)
self.structure_sys_en = self.initPredictor('EN')
self.structure_sys_cn = self.initPredictor('CN')
# 初始化模型
predictors = {
'EN': self.initPredictor('EN'),
'CN': self.initPredictor('CN'),
}
# 设置工作进程
self._thread = Worker(predictors, self.save_pdf, self.vis_font_path)
self._thread.progressBarValue.connect(self.handleProgressBarSingal)
self._thread.endsignal.connect(self.handleEndsignalSignal)
self._thread.finished.connect(QObject.deleteLater)
self.time_start = 0 # save start time
def setupUi(self):
self.setObjectName("MainWindow")
self.setWindowTitle(__APPNAME__ + " " + __VERSION__)
layout = QtWidgets.QGridLayout()
layout = QGridLayout()
openFileButton = QtWidgets.QPushButton("打开文件")
openFileButton.setIcon(QIcon(QPixmap("./icons/folder-plus.png")))
layout.addWidget(openFileButton, 0, 0, 1, 1)
openFileButton.clicked.connect(self.openFileSlot)
self.openFileButton = QPushButton("打开文件")
self.openFileButton.setIcon(QIcon(QPixmap("./icons/folder-plus.png")))
layout.addWidget(self.openFileButton, 0, 0, 1, 1)
self.openFileButton.clicked.connect(self.handleOpenFileSignal)
# screenShotButton = QtWidgets.QPushButton("截图识别")
# screenShotButton = QPushButton("截图识别")
# layout.addWidget(screenShotButton, 0, 1, 1, 1)
# screenShotButton.clicked.connect(self.screenShotSlot)
# screenShotButton.setEnabled(False) # temporarily disenble
startCNShotButton = QtWidgets.QPushButton("中文转换")
startCNShotButton.setIcon(QIcon(QPixmap("./icons/chinese.png")))
layout.addWidget(startCNShotButton, 0, 1, 1, 1)
startCNShotButton.clicked.connect(
functools.partial(self.startSlot, 'CN'))
startENButton = QtWidgets.QPushButton("英文转换")
startENButton.setIcon(QIcon(QPixmap("./icons/english.png")))
layout.addWidget(startENButton, 0, 2, 1, 1)
startENButton.clicked.connect(
functools.partial(self.startSlot, 'EN'))
showResultButton = QtWidgets.QPushButton("显示结果")
showResultButton.setIcon(QIcon(QPixmap("./icons/folder-open.png")))
layout.addWidget(showResultButton, 0, 3, 1, 1)
showResultButton.clicked.connect(self.showResultSlot)
self.pb = QtWidgets.QLabel(
self.pb_text.format(0, 0))
layout.addWidget(self.pb, 1, 0, 1, 4)
self.startCNButton = QPushButton("中文转换")
self.startCNButton.setIcon(QIcon(QPixmap("./icons/chinese.png")))
layout.addWidget(self.startCNButton, 0, 1, 1, 1)
self.startCNButton.clicked.connect(
functools.partial(self.handleStartSignal, 'CN'))
self.startENButton = QPushButton("英文转换")
self.startENButton.setIcon(QIcon(QPixmap("./icons/english.png")))
layout.addWidget(self.startENButton, 0, 2, 1, 1)
self.startENButton.clicked.connect(
functools.partial(self.handleStartSignal, 'EN'))
self.showResultButton = QPushButton("显示结果")
self.showResultButton.setIcon(QIcon(QPixmap("./icons/folder-open.png")))
layout.addWidget(self.showResultButton, 0, 3, 1, 1)
self.showResultButton.clicked.connect(self.handleShowResultSignal)
# ProgressBar
layout.addWidget(self.pb, 2, 0, 1, 4)
# time estimate label
self.timeEstLabel = QLabel(
("Time Left: --"))
layout.addWidget(self.timeEstLabel, 3, 0, 1, 4)
self.setLayout(layout)
......@@ -138,7 +265,7 @@ class APP_Image2Doc(QtWidgets.QWidget):
'model.pdiparams.info',
'model.pdmodel'
]
model_path = os.path.join(here, 'inference')
model_path = os.path.join(root, 'inference')
os.makedirs(model_path, exist_ok=True)
# download and unzip models
......@@ -184,51 +311,52 @@ class APP_Image2Doc(QtWidgets.QWidget):
args.ocr = True
args.recovery = True
args.save_pdf = self.save_pdf
args.table_char_dict_path = os.path.join(here,
args.table_char_dict_path = os.path.join(root,
"ppocr", "utils", "dict", "table_structure_dict.txt")
if lang == 'EN':
args.det_model_dir = os.path.join(here, # 此处从这里找到模型存放位置
args.det_model_dir = os.path.join(root, # 此处从这里找到模型存放位置
"inference", "en_PP-OCRv3_det_infer")
args.rec_model_dir = os.path.join(here,
args.rec_model_dir = os.path.join(root,
"inference", "en_PP-OCRv3_rec_infer")
args.table_model_dir = os.path.join(here,
args.table_model_dir = os.path.join(root,
"inference", "en_ppstructure_mobile_v2.0_SLANet_infer")
args.output = os.path.join(here, "output") # 结果保存路径
args.layout_model_dir = os.path.join(here,
args.output = os.path.join(root, "output") # 结果保存路径
args.layout_model_dir = os.path.join(root,
"inference", "picodet_lcnet_x1_0_fgd_layout_infer")
lang_dict = DICT_EN
elif lang == 'CN':
args.det_model_dir = os.path.join(here, # 此处从这里找到模型存放位置
args.det_model_dir = os.path.join(root, # 此处从这里找到模型存放位置
"inference", "cn_PP-OCRv3_det_infer")
args.rec_model_dir = os.path.join(here,
args.rec_model_dir = os.path.join(root,
"inference", "cn_PP-OCRv3_rec_infer")
args.table_model_dir = os.path.join(here,
args.table_model_dir = os.path.join(root,
"inference", "cn_ppstructure_mobile_v2.0_SLANet_infer")
args.output = os.path.join(here, "output") # 结果保存路径
args.layout_model_dir = os.path.join(here,
args.output = os.path.join(root, "output") # 结果保存路径
args.layout_model_dir = os.path.join(root,
"inference", "picodet_lcnet_x1_0_fgd_layout_cdla_infer")
lang_dict = DICT_CN
else:
raise ValueError("Unsupported language")
args.rec_char_dict_path = os.path.join(here,
args.rec_char_dict_path = os.path.join(root,
"ppocr", "utils",
lang_dict['rec_char_dict_path'])
args.layout_dict_path = os.path.join(here,
args.layout_dict_path = os.path.join(root,
"ppocr", "utils", "dict", "layout_dict",
lang_dict['layout_dict_path'])
# init predictor
return StructureSystem(args)
def openFileSlot(self):
def handleOpenFileSignal(self):
'''
可以多选图像文件
'''
selectedFiles = QtWidgets.QFileDialog.getOpenFileNames(self,
selectedFiles = QFileDialog.getOpenFileNames(self,
"多文件选择", "/", "图片文件 (*.png *.jpeg *.jpg *.bmp *.pdf)")[0]
if len(selectedFiles) > 0:
self.imagePaths = selectedFiles
self.screenShot = None # discard screenshot temp image
self.updateProgressBar(len(selectedFiles), 0)
self.pb.setRange(0, len(self.imagePaths))
self.pb.setValue(0)
def screenShotSlot(self):
'''
......@@ -239,109 +367,73 @@ class APP_Image2Doc(QtWidgets.QWidget):
if self.screenShotWg.captureImage:
self.screenShot = self.screenShotWg.captureImage
self.imagePaths.clear() # discard openfile temp list
self.updateProgressBar(1, 0)
self.pb.setRange(0, 1)
self.pb.setValue(0)
def startSlot(self, lang):
def handleStartSignal(self, lang):
if self.screenShot: # for screenShot
img_name = 'screenshot_' + time.strftime("%Y%m%d%H%M%S", time.localtime())
image = QImageToCvMat(self.screenShot)
self.predictAndSave(image, img_name, lang)
# update Progress Bar
self.updateProgressBar(1, 1)
QtWidgets.QMessageBox.information(self,
self.pb.setValue(1)
QMessageBox.information(self,
u'Information', "文档提取完成")
elif len(self.imagePaths) > 0 : # for image file selection
# Must set image path list and language before start
self.output_dir = os.path.join(
os.path.dirname(self.imagePaths[0]), "output") # output_dir shold be same as imagepath
os.makedirs(self.output_dir, exist_ok=True)
for i, image_file in enumerate(self.imagePaths):
if os.path.basename(image_file)[-3:] in ['pdf']:
import fitz
from PIL import Image
imgs = []
with fitz.open(image_file) as pdf:
for pg in range(0, pdf.pageCount):
page = pdf[pg]
mat = fitz.Matrix(2, 2)
pm = page.getPixmap(matrix=mat, alpha=False)
# if width or height > 2000 pixels, don't enlarge the image
if pm.width > 2000 or pm.height > 2000:
pm = page.getPixmap(matrix=fitz.Matrix(1, 1), alpha=False)
img = Image.frombytes("RGB", [pm.width, pm.height], pm.samples)
img = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
imgs.append(img)
else:
img = cv2.imread(image_file)
if img is None:
print("error in loading image:{}".format(image_file))
continue
imgs = [img]
img_name = os.path.basename(image_file).split('.')[0]
os.makedirs(os.path.join(self.output_dir, img_name), exist_ok=True)
self.predictAndSave(imgs, img_name, lang)
# update Progress Bar
self.updateProgressBar(len(self.imagePaths), i+1)
QtWidgets.QMessageBox.information(self,
u'Information', "文档提取完成")
self._thread.setOutputDir(self.output_dir)
self._thread.setImagePath(self.imagePaths)
self._thread.setLang(lang)
# disenble buttons
self.openFileButton.setEnabled(False)
self.startCNButton.setEnabled(False)
self.startENButton.setEnabled(False)
# 启动工作进程
self._thread.start()
self.time_start = time.time() # log start time
QMessageBox.information(self,
u'Information', "开始转换")
else:
print('empty input')
def predictAndSave(self, imgs, img_name, lang):
all_res = []
for index, img in enumerate(imgs):
if lang == 'EN':
res, time_dict = self.structure_sys_en(img)
elif lang == 'CN':
res, time_dict = self.structure_sys_cn(img)
# save output
save_structure_res(res, self.output_dir, img_name)
draw_img = draw_structure_result(img, res, self.vis_font_path)
img_save_path = os.path.join(self.output_dir, img_name, 'show_{}.jpg'.format(index))
if res != []:
cv2.imwrite(img_save_path, draw_img)
# recovery
h, w, _ = img.shape
res = sorted_layout_boxes(res, w)
all_res += res
QMessageBox.warning(self,
u'Information', "请选择要识别的文件或截图")
try:
convert_info_docx(img, all_res, self.output_dir, img_name, self.save_pdf)
except Exception as ex:
QtWidgets.QMessageBox.information(self,
u'Information', "error in layout recovery image:{}, err msg: {}".format(
img_name, ex))
print('result save to {}'.format(self.output_dir))
def showResultSlot(self):
def handleShowResultSignal(self):
if self.output_dir is None:
return
if os.path.exists(self.output_dir):
if platform.system() == 'Windows':
os.startfile(self.output_dir)
else:
os.system('open ' + os.path.normpath(self.lastOpenDir))
os.system('open ' + os.path.normpath(self.output_dir))
else:
QtWidgets.QMessageBox.information(self,
QMessageBox.information(self,
u'Information', "输出文件不存在")
def updateProgressBar(self, loaded, finished):
self.pb.setText(
self.pb_text.format(loaded, finished))
def handleProgressBarSingal(self, i):
self.pb.setValue(i)
# calculate time left of recognition
lenbar = self.pb.maximum()
avg_time = (time.time() - self.time_start) / i # Use average time to prevent time fluctuations
time_left = str(datetime.timedelta(seconds=avg_time * (lenbar - i))).split(".")[0] # Remove microseconds
self.timeEstLabel.setText(f"Time Left: {time_left}") # show time left
def handleEndsignalSignal(self):
# enble buttons
self.openFileButton.setEnabled(True)
self.startCNButton.setEnabled(True)
self.startENButton.setEnabled(True)
QMessageBox.information(self, u'Information', "转换结束")
def main():
app = QtWidgets.QApplication(sys.argv)
app = QApplication(sys.argv)
window = APP_Image2Doc() # 创建对象
window.show() # 全屏显示窗口
QtWidgets.QApplication.processEvents()
QApplication.processEvents()
sys.exit(app.exec())
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册