提交 373d1c94 编写于 作者: qq_25193841's avatar qq_25193841

add icons

上级 7261f4ae
# PDF2WORD
PDF2WORD是PaddleOCR社区开发者@whj 基于PP-Structure智能文档分析系统实现的PDF转换word应用程序,提供可直接安装的exe,方便windows用户运行
<div align="center">
<img src="./doc/imgs_results/PP-OCRv3/en/en_4.png" width="800">
</div>
## 1.使用
### 应用程序
1. 下载与安装:针对Windows用户,根据[软件下载]()一节下载软件后,运行 `pdf2word.exe` 。若您下载的是lite版本,安装过程中会在线下载环境依赖、模型等必要资源,安装时间较长,请确保网络畅通。serve版本打包了相关依赖,安装时间较短,可按需下载。
2. 转换:由于PP-Structure根据中英文数据分别进行适配,在转换相应文件时可**根据文档语言进行相应选择**
### 脚本运行
首次运行需要将
```
python pdf2word.py
```
## 2.自行打包
PDF2WORD应用程序通过[QPT](https://github.com/QPT-Family/QPT)工具打包实现,若您修改了界面代码需要重新打包,请在 `ppstructure` 文件夹下运行下方指令
```
cd ./
mv ./ppstructure/pdf2word .. -r
python GenEXE.py
```
## 3.软件下载
如需获取已打包程序,可以扫描下方二维码,关注公众号填写问卷后,加入PaddleOCR官方交流群免费获取20G OCR学习大礼包,内含OCR场景应用集合(包含数码管、液晶屏、车牌、高精度SVTR模型等7个垂类模型)、《动手学OCR》电子书、课程回放视频、前沿论文等重磅资料
此差异已折叠。
import sys
import tarfile
import os
import time
import functools
import cv2
import platform
import numpy as np
from qtpy import QtWidgets
from qtpy.QtGui import QImage, QPixmap, QIcon
from ppstructure.predict_system import StructureSystem, save_structure_res
from ppstructure.utility import parse_args, draw_structure_result
from ppocr.utils.network import download_with_progressbar
from ppstructure.recovery.recovery_to_doc import sorted_layout_boxes, convert_info_docx
from ScreenShotWidget import ScreenShotWidget
__APPNAME__ = "Image2Doc"
__VERSION__ = "0.0.2"
here = os.path.dirname(os.path.abspath(__file__))
URLs_EN = {
# 下载超英文轻量级PP-OCRv3模型的检测模型并解压
"en_PP-OCRv3_det_infer": "https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_det_infer.tar",
# 下载英文轻量级PP-OCRv3模型的识别模型并解压
"en_PP-OCRv3_rec_infer": "https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_rec_infer.tar",
# 下载超轻量级英文表格英文模型并解压
"en_ppstructure_mobile_v2.0_SLANet_infer": "https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/en_ppstructure_mobile_v2.0_SLANet_infer.tar",
# 英文版面分析模型
"picodet_lcnet_x1_0_fgd_layout_infer": "https://paddleocr.bj.bcebos.com/ppstructure/models/layout/picodet_lcnet_x1_0_fgd_layout_infer.tar",
}
DICT_EN = {
"rec_char_dict_path": "en_dict.txt",
"layout_dict_path": "layout_publaynet_dict.txt",
}
URLs_CN = {
# 下载超中文轻量级PP-OCRv3模型的检测模型并解压
"cn_PP-OCRv3_det_infer": "https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.tar",
# 下载中文轻量级PP-OCRv3模型的识别模型并解压
"cn_PP-OCRv3_rec_infer": "https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar",
# 下载超轻量级英文表格英文模型并解压
"cn_ppstructure_mobile_v2.0_SLANet_infer": "https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/en_ppstructure_mobile_v2.0_SLANet_infer.tar",
# 中文版面分析模型
"picodet_lcnet_x1_0_fgd_layout_cdla_infer": "https://paddleocr.bj.bcebos.com/ppstructure/models/layout/picodet_lcnet_x1_0_fgd_layout_cdla_infer.tar",
}
DICT_CN = {
"rec_char_dict_path": "ppocr_keys_v1.txt",
"layout_dict_path": "layout_cdla_dict.txt",
}
def QImageToCvMat(incomingImage):
'''
Converts a QImage into an opencv MAT format
'''
incomingImage = incomingImage.convertToFormat(QImage.Format.Format_RGBA8888)
width = incomingImage.width()
height = incomingImage.height()
ptr = incomingImage.bits()
ptr.setsize(height * width * 4)
arr = np.frombuffer(ptr, np.uint8).reshape((height, width, 4))
return arr
class APP_Image2Doc(QtWidgets.QWidget):
def __init__(self):
super(QtWidgets.QWidget, self).__init__()
self.pb = None # 进度条
self.pb_text = "已载入: {} / 已转换: {}"
self.imagePaths = []
# self.resultPath = os.path.join(here, "output")
self.screenShotWg = ScreenShotWidget()
self.screenShot = None
self.save_pdf = False
self.vis_font_path = os.path.join(here,
"doc", "fonts", "simfang.ttf")
# 初始化界面
self.setupUi()
# 下载模型
self.downloadModels(URLs_EN)
self.downloadModels(URLs_CN)
self.structure_sys_en = self.initPredictor('EN')
self.structure_sys_cn = self.initPredictor('CN')
def setupUi(self):
self.setObjectName("MainWindow")
self.setWindowTitle(__APPNAME__ + " " + __VERSION__)
layout = QtWidgets.QGridLayout()
openFileButton = QtWidgets.QPushButton("打开文件")
openFileButton.setIcon(QIcon(QPixmap("./icons/folder-plus.png")))
layout.addWidget(openFileButton, 0, 0, 1, 1)
openFileButton.clicked.connect(self.openFileSlot)
# screenShotButton = QtWidgets.QPushButton("截图识别")
# layout.addWidget(screenShotButton, 0, 1, 1, 1)
# screenShotButton.clicked.connect(self.screenShotSlot)
# screenShotButton.setEnabled(False) # temporarily disenble
startCNShotButton = QtWidgets.QPushButton("中文转换")
startCNShotButton.setIcon(QIcon(QPixmap("./icons/chinese.png")))
layout.addWidget(startCNShotButton, 0, 1, 1, 1)
startCNShotButton.clicked.connect(
functools.partial(self.startSlot, 'CN'))
startENButton = QtWidgets.QPushButton("英文转换")
startENButton.setIcon(QIcon(QPixmap("./icons/english.png")))
layout.addWidget(startENButton, 0, 2, 1, 1)
startENButton.clicked.connect(
functools.partial(self.startSlot, 'EN'))
showResultButton = QtWidgets.QPushButton("显示结果")
showResultButton.setIcon(QIcon(QPixmap("./icons/folder-open.png")))
layout.addWidget(showResultButton, 0, 3, 1, 1)
showResultButton.clicked.connect(self.showResultSlot)
self.pb = QtWidgets.QLabel(
self.pb_text.format(0, 0))
layout.addWidget(self.pb, 1, 0, 1, 4)
self.setLayout(layout)
def downloadModels(self, URLs):
# using custom model
tar_file_name_list = [
'inference.pdiparams',
'inference.pdiparams.info',
'inference.pdmodel',
'model.pdiparams',
'model.pdiparams.info',
'model.pdmodel'
]
model_path = os.path.join(here, 'inference')
os.makedirs(model_path, exist_ok=True)
# download and unzip models
for name in URLs.keys():
url = URLs[name]
print("Try downloading file: {}".format(url))
tarname = url.split('/')[-1]
tarpath = os.path.join(model_path, tarname)
if os.path.exists(tarpath):
print("File have already exist. skip")
else:
try:
download_with_progressbar(url, tarpath)
except Exception as e:
print("Error occurred when downloading file, error message:")
print(e)
# unzip model tar
try:
with tarfile.open(tarpath, 'r') as tarObj:
storage_dir = os.path.join(model_path, name)
os.makedirs(storage_dir, exist_ok=True)
for member in tarObj.getmembers():
filename = None
for tar_file_name in tar_file_name_list:
if tar_file_name in member.name:
filename = tar_file_name
if filename is None:
continue
file = tarObj.extractfile(member)
with open(
os.path.join(storage_dir, filename),
'wb') as f:
f.write(file.read())
except Exception as e:
print("Error occurred when unziping file, error message:")
print(e)
def initPredictor(self, lang='EN'):
# init predictor args
args = parse_args()
args.table_max_len = 488
args.ocr = True
args.recovery = True
args.save_pdf = self.save_pdf
args.table_char_dict_path = os.path.join(here,
"ppocr", "utils", "dict", "table_structure_dict.txt")
if lang == 'EN':
args.det_model_dir = os.path.join(here, # 此处从这里找到模型存放位置
"inference", "en_PP-OCRv3_det_infer")
args.rec_model_dir = os.path.join(here,
"inference", "en_PP-OCRv3_rec_infer")
args.table_model_dir = os.path.join(here,
"inference", "en_ppstructure_mobile_v2.0_SLANet_infer")
args.output = os.path.join(here, "output") # 结果保存路径
args.layout_model_dir = os.path.join(here,
"inference", "picodet_lcnet_x1_0_fgd_layout_infer")
lang_dict = DICT_EN
elif lang == 'CN':
args.det_model_dir = os.path.join(here, # 此处从这里找到模型存放位置
"inference", "cn_PP-OCRv3_det_infer")
args.rec_model_dir = os.path.join(here,
"inference", "cn_PP-OCRv3_rec_infer")
args.table_model_dir = os.path.join(here,
"inference", "cn_ppstructure_mobile_v2.0_SLANet_infer")
args.output = os.path.join(here, "output") # 结果保存路径
args.layout_model_dir = os.path.join(here,
"inference", "picodet_lcnet_x1_0_fgd_layout_cdla_infer")
lang_dict = DICT_CN
else:
raise ValueError("Unsupported language")
args.rec_char_dict_path = os.path.join(here,
"ppocr", "utils",
lang_dict['rec_char_dict_path'])
args.layout_dict_path = os.path.join(here,
"ppocr", "utils", "dict", "layout_dict",
lang_dict['layout_dict_path'])
# init predictor
return StructureSystem(args)
def openFileSlot(self):
'''
可以多选图像文件
'''
selectedFiles = QtWidgets.QFileDialog.getOpenFileNames(self,
"多文件选择", "/", "图片文件 (*.png *.jpeg *.jpg *.bmp *.pdf)")[0]
if len(selectedFiles) > 0:
self.imagePaths = selectedFiles
self.screenShot = None # discard screenshot temp image
self.updateProgressBar(len(selectedFiles), 0)
def screenShotSlot(self):
'''
选定图像文件和截图的转换过程只能同时进行一个
截图只能同时转换一个
'''
self.screenShotWg.start()
if self.screenShotWg.captureImage:
self.screenShot = self.screenShotWg.captureImage
self.imagePaths.clear() # discard openfile temp list
self.updateProgressBar(1, 0)
def startSlot(self, lang):
if self.screenShot: # for screenShot
img_name = 'screenshot_' + time.strftime("%Y%m%d%H%M%S", time.localtime())
image = QImageToCvMat(self.screenShot)
self.predictAndSave(image, img_name, lang)
# update Progress Bar
self.updateProgressBar(1, 1)
QtWidgets.QMessageBox.information(self,
u'Information', "文档提取完成")
elif len(self.imagePaths) > 0 : # for image file selection
self.output_dir = os.path.join(
os.path.dirname(self.imagePaths[0]), "output") # output_dir shold be same as imagepath
os.makedirs(self.output_dir, exist_ok=True)
for i, image_file in enumerate(self.imagePaths):
if os.path.basename(image_file)[-3:] in ['pdf']:
import fitz
from PIL import Image
imgs = []
with fitz.open(image_file) as pdf:
for pg in range(0, pdf.pageCount):
page = pdf[pg]
mat = fitz.Matrix(2, 2)
pm = page.getPixmap(matrix=mat, alpha=False)
# if width or height > 2000 pixels, don't enlarge the image
if pm.width > 2000 or pm.height > 2000:
pm = page.getPixmap(matrix=fitz.Matrix(1, 1), alpha=False)
img = Image.frombytes("RGB", [pm.width, pm.height], pm.samples)
img = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
imgs.append(img)
else:
img = cv2.imread(image_file)
if img is None:
print("error in loading image:{}".format(image_file))
continue
imgs = [img]
img_name = os.path.basename(image_file).split('.')[0]
os.makedirs(os.path.join(self.output_dir, img_name), exist_ok=True)
self.predictAndSave(imgs, img_name, lang)
# update Progress Bar
self.updateProgressBar(len(self.imagePaths), i+1)
QtWidgets.QMessageBox.information(self,
u'Information', "文档提取完成")
else:
print('empty input')
def predictAndSave(self, imgs, img_name, lang):
all_res = []
for index, img in enumerate(imgs):
if lang == 'EN':
res, time_dict = self.structure_sys_en(img)
elif lang == 'CN':
res, time_dict = self.structure_sys_cn(img)
# save output
save_structure_res(res, self.output_dir, img_name)
draw_img = draw_structure_result(img, res, self.vis_font_path)
img_save_path = os.path.join(self.output_dir, img_name, 'show_{}.jpg'.format(index))
if res != []:
cv2.imwrite(img_save_path, draw_img)
# recovery
h, w, _ = img.shape
res = sorted_layout_boxes(res, w)
all_res += res
try:
convert_info_docx(img, all_res, self.output_dir, img_name, self.save_pdf)
except Exception as ex:
QtWidgets.QMessageBox.information(self,
u'Information', "error in layout recovery image:{}, err msg: {}".format(
img_name, ex))
print('result save to {}'.format(self.output_dir))
def showResultSlot(self):
if os.path.exists(self.output_dir):
if platform.system() == 'Windows':
os.startfile(self.output_dir)
else:
os.system('open ' + os.path.normpath(self.lastOpenDir))
else:
QtWidgets.QMessageBox.information(self,
u'Information', "输出文件不存在")
def updateProgressBar(self, loaded, finished):
self.pb.setText(
self.pb_text.format(loaded, finished))
def main():
app = QtWidgets.QApplication(sys.argv)
window = APP_Image2Doc() # 创建对象
window.show() # 全屏显示窗口
QtWidgets.QApplication.processEvents()
sys.exit(app.exec())
if __name__ == "__main__":
main()
\ No newline at end of file
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册