diff --git a/image2doc.py b/image2doc.py deleted file mode 100644 index 700126e0249cdbf7df415f7e8a07931b1241c4bf..0000000000000000000000000000000000000000 --- a/image2doc.py +++ /dev/null @@ -1,349 +0,0 @@ -import sys -import tarfile -import os -import time -import functools -import cv2 -import platform -import numpy as np -from qtpy import QtWidgets -from qtpy.QtGui import QImage, QPixmap, QIcon - -from ppstructure.predict_system import StructureSystem, save_structure_res -from ppstructure.utility import parse_args, draw_structure_result -from ppocr.utils.network import download_with_progressbar -from ppstructure.recovery.recovery_to_doc import sorted_layout_boxes, convert_info_docx -from ScreenShotWidget import ScreenShotWidget - -__APPNAME__ = "Image2Doc" -__VERSION__ = "0.0.2" -here = os.path.dirname(os.path.abspath(__file__)) -URLs_EN = { - # 下载超英文轻量级PP-OCRv3模型的检测模型并解压 - "en_PP-OCRv3_det_infer": "https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_det_infer.tar", - # 下载英文轻量级PP-OCRv3模型的识别模型并解压 - "en_PP-OCRv3_rec_infer": "https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_rec_infer.tar", - # 下载超轻量级英文表格英文模型并解压 - "en_ppstructure_mobile_v2.0_SLANet_infer": "https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/en_ppstructure_mobile_v2.0_SLANet_infer.tar", - # 英文版面分析模型 - "picodet_lcnet_x1_0_fgd_layout_infer": "https://paddleocr.bj.bcebos.com/ppstructure/models/layout/picodet_lcnet_x1_0_fgd_layout_infer.tar", -} -DICT_EN = { - "rec_char_dict_path": "en_dict.txt", - "layout_dict_path": "layout_publaynet_dict.txt", -} - -URLs_CN = { - # 下载超中文轻量级PP-OCRv3模型的检测模型并解压 - "cn_PP-OCRv3_det_infer": "https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.tar", - # 下载中文轻量级PP-OCRv3模型的识别模型并解压 - "cn_PP-OCRv3_rec_infer": "https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar", - # 下载超轻量级英文表格英文模型并解压 - "cn_ppstructure_mobile_v2.0_SLANet_infer": "https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/en_ppstructure_mobile_v2.0_SLANet_infer.tar", - # 中文版面分析模型 - "picodet_lcnet_x1_0_fgd_layout_cdla_infer": "https://paddleocr.bj.bcebos.com/ppstructure/models/layout/picodet_lcnet_x1_0_fgd_layout_cdla_infer.tar", -} -DICT_CN = { - "rec_char_dict_path": "ppocr_keys_v1.txt", - "layout_dict_path": "layout_cdla_dict.txt", -} - - -def QImageToCvMat(incomingImage): - ''' - Converts a QImage into an opencv MAT format - ''' - - incomingImage = incomingImage.convertToFormat(QImage.Format.Format_RGBA8888) - - width = incomingImage.width() - height = incomingImage.height() - - ptr = incomingImage.bits() - ptr.setsize(height * width * 4) - arr = np.frombuffer(ptr, np.uint8).reshape((height, width, 4)) - return arr - - -class APP_Image2Doc(QtWidgets.QWidget): - def __init__(self): - super(QtWidgets.QWidget, self).__init__() - self.pb = None # 进度条 - self.pb_text = "已载入: {} / 已转换: {}" - self.imagePaths = [] - # self.resultPath = os.path.join(here, "output") - self.screenShotWg = ScreenShotWidget() - self.screenShot = None - self.save_pdf = False - - self.vis_font_path = os.path.join(here, - "doc", "fonts", "simfang.ttf") - - # 初始化界面 - self.setupUi() - - # 下载模型 - self.downloadModels(URLs_EN) - self.downloadModels(URLs_CN) - - self.structure_sys_en = self.initPredictor('EN') - self.structure_sys_cn = self.initPredictor('CN') - - def setupUi(self): - self.setObjectName("MainWindow") - self.setWindowTitle(__APPNAME__ + " " + __VERSION__) - - layout = QtWidgets.QGridLayout() - - openFileButton = QtWidgets.QPushButton("打开文件") - openFileButton.setIcon(QIcon(QPixmap("./icons/folder-plus.png"))) - layout.addWidget(openFileButton, 0, 0, 1, 1) - openFileButton.clicked.connect(self.openFileSlot) - - # screenShotButton = QtWidgets.QPushButton("截图识别") - # layout.addWidget(screenShotButton, 0, 1, 1, 1) - # screenShotButton.clicked.connect(self.screenShotSlot) - # screenShotButton.setEnabled(False) # temporarily disenble - - startCNShotButton = QtWidgets.QPushButton("中文转换") - startCNShotButton.setIcon(QIcon(QPixmap("./icons/chinese.png"))) - layout.addWidget(startCNShotButton, 0, 1, 1, 1) - startCNShotButton.clicked.connect( - functools.partial(self.startSlot, 'CN')) - - startENButton = QtWidgets.QPushButton("英文转换") - startENButton.setIcon(QIcon(QPixmap("./icons/english.png"))) - layout.addWidget(startENButton, 0, 2, 1, 1) - startENButton.clicked.connect( - functools.partial(self.startSlot, 'EN')) - - showResultButton = QtWidgets.QPushButton("显示结果") - showResultButton.setIcon(QIcon(QPixmap("./icons/folder-open.png"))) - layout.addWidget(showResultButton, 0, 3, 1, 1) - showResultButton.clicked.connect(self.showResultSlot) - - self.pb = QtWidgets.QLabel( - self.pb_text.format(0, 0)) - layout.addWidget(self.pb, 1, 0, 1, 4) - - self.setLayout(layout) - - def downloadModels(self, URLs): - # using custom model - tar_file_name_list = [ - 'inference.pdiparams', - 'inference.pdiparams.info', - 'inference.pdmodel', - 'model.pdiparams', - 'model.pdiparams.info', - 'model.pdmodel' - ] - model_path = os.path.join(here, 'inference') - os.makedirs(model_path, exist_ok=True) - - # download and unzip models - for name in URLs.keys(): - url = URLs[name] - print("Try downloading file: {}".format(url)) - tarname = url.split('/')[-1] - tarpath = os.path.join(model_path, tarname) - if os.path.exists(tarpath): - print("File have already exist. skip") - else: - try: - download_with_progressbar(url, tarpath) - except Exception as e: - print("Error occurred when downloading file, error message:") - print(e) - - # unzip model tar - try: - with tarfile.open(tarpath, 'r') as tarObj: - storage_dir = os.path.join(model_path, name) - os.makedirs(storage_dir, exist_ok=True) - for member in tarObj.getmembers(): - filename = None - for tar_file_name in tar_file_name_list: - if tar_file_name in member.name: - filename = tar_file_name - if filename is None: - continue - file = tarObj.extractfile(member) - with open( - os.path.join(storage_dir, filename), - 'wb') as f: - f.write(file.read()) - except Exception as e: - print("Error occurred when unziping file, error message:") - print(e) - - def initPredictor(self, lang='EN'): - # init predictor args - args = parse_args() - args.table_max_len = 488 - args.ocr = True - args.recovery = True - args.save_pdf = self.save_pdf - args.table_char_dict_path = os.path.join(here, - "ppocr", "utils", "dict", "table_structure_dict.txt") - if lang == 'EN': - args.det_model_dir = os.path.join(here, # 此处从这里找到模型存放位置 - "inference", "en_PP-OCRv3_det_infer") - args.rec_model_dir = os.path.join(here, - "inference", "en_PP-OCRv3_rec_infer") - args.table_model_dir = os.path.join(here, - "inference", "en_ppstructure_mobile_v2.0_SLANet_infer") - args.output = os.path.join(here, "output") # 结果保存路径 - args.layout_model_dir = os.path.join(here, - "inference", "picodet_lcnet_x1_0_fgd_layout_infer") - lang_dict = DICT_EN - elif lang == 'CN': - args.det_model_dir = os.path.join(here, # 此处从这里找到模型存放位置 - "inference", "cn_PP-OCRv3_det_infer") - args.rec_model_dir = os.path.join(here, - "inference", "cn_PP-OCRv3_rec_infer") - args.table_model_dir = os.path.join(here, - "inference", "cn_ppstructure_mobile_v2.0_SLANet_infer") - args.output = os.path.join(here, "output") # 结果保存路径 - args.layout_model_dir = os.path.join(here, - "inference", "picodet_lcnet_x1_0_fgd_layout_cdla_infer") - lang_dict = DICT_CN - else: - raise ValueError("Unsupported language") - args.rec_char_dict_path = os.path.join(here, - "ppocr", "utils", - lang_dict['rec_char_dict_path']) - args.layout_dict_path = os.path.join(here, - "ppocr", "utils", "dict", "layout_dict", - lang_dict['layout_dict_path']) - # init predictor - return StructureSystem(args) - - def openFileSlot(self): - ''' - 可以多选图像文件 - ''' - selectedFiles = QtWidgets.QFileDialog.getOpenFileNames(self, - "多文件选择", "/", "图片文件 (*.png *.jpeg *.jpg *.bmp *.pdf)")[0] - if len(selectedFiles) > 0: - self.imagePaths = selectedFiles - self.screenShot = None # discard screenshot temp image - self.updateProgressBar(len(selectedFiles), 0) - - def screenShotSlot(self): - ''' - 选定图像文件和截图的转换过程只能同时进行一个 - 截图只能同时转换一个 - ''' - self.screenShotWg.start() - if self.screenShotWg.captureImage: - self.screenShot = self.screenShotWg.captureImage - self.imagePaths.clear() # discard openfile temp list - self.updateProgressBar(1, 0) - - def startSlot(self, lang): - if self.screenShot: # for screenShot - img_name = 'screenshot_' + time.strftime("%Y%m%d%H%M%S", time.localtime()) - image = QImageToCvMat(self.screenShot) - self.predictAndSave(image, img_name, lang) - # update Progress Bar - self.updateProgressBar(1, 1) - QtWidgets.QMessageBox.information(self, - u'Information', "文档提取完成") - elif len(self.imagePaths) > 0 : # for image file selection - self.output_dir = os.path.join( - os.path.dirname(self.imagePaths[0]), "output") # output_dir shold be same as imagepath - os.makedirs(self.output_dir, exist_ok=True) - for i, image_file in enumerate(self.imagePaths): - if os.path.basename(image_file)[-3:] in ['pdf']: - import fitz - from PIL import Image - imgs = [] - with fitz.open(image_file) as pdf: - for pg in range(0, pdf.pageCount): - page = pdf[pg] - mat = fitz.Matrix(2, 2) - pm = page.getPixmap(matrix=mat, alpha=False) - - # if width or height > 2000 pixels, don't enlarge the image - if pm.width > 2000 or pm.height > 2000: - pm = page.getPixmap(matrix=fitz.Matrix(1, 1), alpha=False) - - img = Image.frombytes("RGB", [pm.width, pm.height], pm.samples) - img = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR) - imgs.append(img) - - else: - img = cv2.imread(image_file) - if img is None: - print("error in loading image:{}".format(image_file)) - continue - imgs = [img] - - img_name = os.path.basename(image_file).split('.')[0] - os.makedirs(os.path.join(self.output_dir, img_name), exist_ok=True) - self.predictAndSave(imgs, img_name, lang) - - # update Progress Bar - self.updateProgressBar(len(self.imagePaths), i+1) - QtWidgets.QMessageBox.information(self, - u'Information', "文档提取完成") - else: - print('empty input') - - def predictAndSave(self, imgs, img_name, lang): - all_res = [] - for index, img in enumerate(imgs): - if lang == 'EN': - res, time_dict = self.structure_sys_en(img) - elif lang == 'CN': - res, time_dict = self.structure_sys_cn(img) - - # save output - save_structure_res(res, self.output_dir, img_name) - draw_img = draw_structure_result(img, res, self.vis_font_path) - img_save_path = os.path.join(self.output_dir, img_name, 'show_{}.jpg'.format(index)) - if res != []: - cv2.imwrite(img_save_path, draw_img) - - # recovery - h, w, _ = img.shape - res = sorted_layout_boxes(res, w) - all_res += res - - try: - convert_info_docx(img, all_res, self.output_dir, img_name, self.save_pdf) - except Exception as ex: - QtWidgets.QMessageBox.information(self, - u'Information', "error in layout recovery image:{}, err msg: {}".format( - img_name, ex)) - - print('result save to {}'.format(self.output_dir)) - - def showResultSlot(self): - if os.path.exists(self.output_dir): - if platform.system() == 'Windows': - os.startfile(self.output_dir) - else: - os.system('open ' + os.path.normpath(self.lastOpenDir)) - else: - QtWidgets.QMessageBox.information(self, - u'Information', "输出文件不存在") - - def updateProgressBar(self, loaded, finished): - self.pb.setText( - self.pb_text.format(loaded, finished)) - - -def main(): - app = QtWidgets.QApplication(sys.argv) - - window = APP_Image2Doc() # 创建对象 - window.show() # 全屏显示窗口 - - QtWidgets.QApplication.processEvents() - sys.exit(app.exec()) - - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/icons/chinese.png b/ppstructure/pdf2word/icons/chinese.png similarity index 100% rename from icons/chinese.png rename to ppstructure/pdf2word/icons/chinese.png diff --git a/icons/english.png b/ppstructure/pdf2word/icons/english.png similarity index 100% rename from icons/english.png rename to ppstructure/pdf2word/icons/english.png diff --git a/icons/folder-open.png b/ppstructure/pdf2word/icons/folder-open.png similarity index 100% rename from icons/folder-open.png rename to ppstructure/pdf2word/icons/folder-open.png diff --git a/icons/folder-plus.png b/ppstructure/pdf2word/icons/folder-plus.png similarity index 100% rename from icons/folder-plus.png rename to ppstructure/pdf2word/icons/folder-plus.png diff --git a/PDF2WORD.md b/ppstructure/pdf2word/pdf2word.md similarity index 74% rename from PDF2WORD.md rename to ppstructure/pdf2word/pdf2word.md index 382a0febee57a21ea0b77d95360feadc54ddae60..8d69d60748c2a3b39282b75ce28f32f3089b589c 100644 --- a/PDF2WORD.md +++ b/ppstructure/pdf2word/pdf2word.md @@ -1,11 +1,12 @@ # PDF2WORD -PDF2WORD是PaddleOCR社区开发者@whj 基于PP-Structure智能文档分析系统实现的PDF转换word应用程序,提供可直接安装的exe,方便windows用户运行 +PDF2WORD是PaddleOCR社区开发者@whj 基于PP-Structure智能文档分析模型实现的PDF转换Word应用程序,提供可直接安装的exe,方便windows用户运行
- +
+ ## 1.使用 ### 应用程序 @@ -16,17 +17,15 @@ PDF2WORD是PaddleOCR社区开发者@whj 基于PP-Structure智能文档分析系 ### 脚本运行 -首次运行需要将 +首次运行需要将切换路径到 `/ppstructure/pdf2word` ,然后运行代码 ``` python pdf2word.py ``` - - ## 2.自行打包 -PDF2WORD应用程序通过[QPT](https://github.com/QPT-Family/QPT)工具打包实现,若您修改了界面代码需要重新打包,请在 `ppstructure` 文件夹下运行下方指令 +PDF2WORD应用程序通过[QPT](https://github.com/QPT-Family/QPT)工具打包实现,若您修改了界面代码需要重新打包,请在 `PaddleOCR` 文件夹下运行下方指令 ``` cd ./ @@ -34,9 +33,11 @@ mv ./ppstructure/pdf2word .. -r python GenEXE.py ``` - - ## 3.软件下载 如需获取已打包程序,可以扫描下方二维码,关注公众号填写问卷后,加入PaddleOCR官方交流群免费获取20G OCR学习大礼包,内含OCR场景应用集合(包含数码管、液晶屏、车牌、高精度SVTR模型等7个垂类模型)、《动手学OCR》电子书、课程回放视频、前沿论文等重磅资料 +
+ +
+ diff --git a/ppstructure/pdf2word/pdf2word.py b/ppstructure/pdf2word/pdf2word.py new file mode 100644 index 0000000000000000000000000000000000000000..add8a6f40501de6a9053fb4fb78c6463b3047966 --- /dev/null +++ b/ppstructure/pdf2word/pdf2word.py @@ -0,0 +1,441 @@ +import sys +import tarfile +import os +import time +import datetime +import functools +import cv2 +import platform +import numpy as np +from qtpy.QtWidgets import QApplication, QWidget, QPushButton, QProgressBar, \ + QGridLayout, QMessageBox, QLabel, QFileDialog +from qtpy.QtCore import Signal, QThread, QObject +from qtpy.QtGui import QImage, QPixmap, QIcon + +file = os.path.dirname(os.path.abspath(__file__)) +root = os.path.abspath(os.path.join(file, '../../')) +sys.path.append(file) +sys.path.insert(0, root) + +from ppstructure.predict_system import StructureSystem, save_structure_res +from ppstructure.utility import parse_args, draw_structure_result +from ppocr.utils.network import download_with_progressbar +from ppstructure.recovery.recovery_to_doc import sorted_layout_boxes, convert_info_docx +from ScreenShotWidget import ScreenShotWidget + +__APPNAME__ = "pdf2word" +__VERSION__ = "0.1.1" + +URLs_EN = { + # 下载超英文轻量级PP-OCRv3模型的检测模型并解压 + "en_PP-OCRv3_det_infer": "https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_det_infer.tar", + # 下载英文轻量级PP-OCRv3模型的识别模型并解压 + "en_PP-OCRv3_rec_infer": "https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_rec_infer.tar", + # 下载超轻量级英文表格英文模型并解压 + "en_ppstructure_mobile_v2.0_SLANet_infer": "https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/en_ppstructure_mobile_v2.0_SLANet_infer.tar", + # 英文版面分析模型 + "picodet_lcnet_x1_0_fgd_layout_infer": "https://paddleocr.bj.bcebos.com/ppstructure/models/layout/picodet_lcnet_x1_0_fgd_layout_infer.tar", +} +DICT_EN = { + "rec_char_dict_path": "en_dict.txt", + "layout_dict_path": "layout_publaynet_dict.txt", +} + +URLs_CN = { + # 下载超中文轻量级PP-OCRv3模型的检测模型并解压 + "cn_PP-OCRv3_det_infer": "https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.tar", + # 下载中文轻量级PP-OCRv3模型的识别模型并解压 + "cn_PP-OCRv3_rec_infer": "https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar", + # 下载超轻量级英文表格英文模型并解压 + "cn_ppstructure_mobile_v2.0_SLANet_infer": "https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/en_ppstructure_mobile_v2.0_SLANet_infer.tar", + # 中文版面分析模型 + "picodet_lcnet_x1_0_fgd_layout_cdla_infer": "https://paddleocr.bj.bcebos.com/ppstructure/models/layout/picodet_lcnet_x1_0_fgd_layout_cdla_infer.tar", +} +DICT_CN = { + "rec_char_dict_path": "ppocr_keys_v1.txt", + "layout_dict_path": "layout_cdla_dict.txt", +} + + + +def QImageToCvMat(incomingImage) -> np.array: + ''' + Converts a QImage into an opencv MAT format + ''' + + incomingImage = incomingImage.convertToFormat(QImage.Format.Format_RGBA8888) + + width = incomingImage.width() + height = incomingImage.height() + + ptr = incomingImage.bits() + ptr.setsize(height * width * 4) + arr = np.frombuffer(ptr, np.uint8).reshape((height, width, 4)) + return arr + + +def readImage(image_file) -> list: + if os.path.basename(image_file)[-3:] in ['pdf']: + import fitz + from PIL import Image + imgs = [] + with fitz.open(image_file) as pdf: + for pg in range(0, pdf.pageCount): + page = pdf[pg] + mat = fitz.Matrix(2, 2) + pm = page.getPixmap(matrix=mat, alpha=False) + + # if width or height > 2000 pixels, don't enlarge the image + if pm.width > 2000 or pm.height > 2000: + pm = page.getPixmap(matrix=fitz.Matrix(1, 1), alpha=False) + + img = Image.frombytes("RGB", [pm.width, pm.height], pm.samples) + img = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR) + imgs.append(img) + else: + img = cv2.imread(image_file, cv2.IMREAD_COLOR) + if img is not None: + imgs = [img] + + return imgs + + +class Worker(QThread): + progressBarValue = Signal(int) + endsignal = Signal() + loopFlag = True + + def __init__(self, predictors, save_pdf, vis_font_path): + super(Worker, self).__init__() + self.predictors = predictors + self.save_pdf = save_pdf + self.vis_font_path = vis_font_path + self.lang = 'EN' + self.imagePaths = [] + self.outputDir = None + self.setStackSize(1024*1024) + + def setImagePath(self, imagePaths): + self.imagePaths = imagePaths + + def setLang(self, lang): + self.lang = lang + + def setOutputDir(self, outputDir): + self.outputDir = outputDir + + def predictAndSave(self, imgs, img_name): + all_res = [] + for index, img in enumerate(imgs): + res, time_dict = self.predictors[self.lang](img) + + # save output + save_structure_res(res, self.outputDir, img_name) + draw_img = draw_structure_result(img, res, self.vis_font_path) + img_save_path = os.path.join(self.outputDir, img_name, 'show_{}.jpg'.format(index)) + if res != []: + cv2.imwrite(img_save_path, draw_img) + + # recovery + h, w, _ = img.shape + res = sorted_layout_boxes(res, w) + all_res += res + + try: + convert_info_docx(img, all_res, self.outputDir, img_name, self.save_pdf) + except Exception as ex: + print(self, + "error in layout recovery image:{}, err msg: {}".format( + img_name, ex)) + + print('result save to {}'.format(self.outputDir)) + + def run(self): + try: + findex = 0 + os.makedirs(self.outputDir, exist_ok=True) + for i, image_file in enumerate(self.imagePaths): + if self.loopFlag == True: + imgs = readImage(image_file) + if len(imgs) == 0: + continue + img_name = os.path.basename(image_file).split('.')[0] + os.makedirs(os.path.join(self.outputDir, img_name), exist_ok=True) + self.predictAndSave(imgs, img_name) + findex += 1 + self.progressBarValue.emit(findex) + else: + break + self.endsignal.emit() + self.exec() + except Exception as e: + print(e) + raise + + +class APP_Image2Doc(QWidget): + def __init__(self): + super().__init__() + self.setFixedHeight(90) + self.setFixedWidth(400) + + # settings + self.imagePaths = [] + self.screenShotWg = ScreenShotWidget() + self.screenShot = None + self.save_pdf = False + self.output_dir = None + self.vis_font_path = os.path.join(root, + "doc", "fonts", "simfang.ttf") + + # ProgressBar + self.pb = QProgressBar() + self.pb.setRange(0, 100) + self.pb.setValue(0) + + # 初始化界面 + self.setupUi() + + # 下载模型 + self.downloadModels(URLs_EN) + self.downloadModels(URLs_CN) + + # 初始化模型 + predictors = { + 'EN': self.initPredictor('EN'), + 'CN': self.initPredictor('CN'), + } + + # 设置工作进程 + self._thread = Worker(predictors, self.save_pdf, self.vis_font_path) + self._thread.progressBarValue.connect(self.handleProgressBarSingal) + self._thread.endsignal.connect(self.handleEndsignalSignal) + self._thread.finished.connect(QObject.deleteLater) + self.time_start = 0 # save start time + + def setupUi(self): + self.setObjectName("MainWindow") + self.setWindowTitle(__APPNAME__ + " " + __VERSION__) + + layout = QGridLayout() + + self.openFileButton = QPushButton("打开文件") + self.openFileButton.setIcon(QIcon(QPixmap("./icons/folder-plus.png"))) + layout.addWidget(self.openFileButton, 0, 0, 1, 1) + self.openFileButton.clicked.connect(self.handleOpenFileSignal) + + # screenShotButton = QPushButton("截图识别") + # layout.addWidget(screenShotButton, 0, 1, 1, 1) + # screenShotButton.clicked.connect(self.screenShotSlot) + # screenShotButton.setEnabled(False) # temporarily disenble + + self.startCNButton = QPushButton("中文转换") + self.startCNButton.setIcon(QIcon(QPixmap("./icons/chinese.png"))) + layout.addWidget(self.startCNButton, 0, 1, 1, 1) + self.startCNButton.clicked.connect( + functools.partial(self.handleStartSignal, 'CN')) + + self.startENButton = QPushButton("英文转换") + self.startENButton.setIcon(QIcon(QPixmap("./icons/english.png"))) + layout.addWidget(self.startENButton, 0, 2, 1, 1) + self.startENButton.clicked.connect( + functools.partial(self.handleStartSignal, 'EN')) + + self.showResultButton = QPushButton("显示结果") + self.showResultButton.setIcon(QIcon(QPixmap("./icons/folder-open.png"))) + layout.addWidget(self.showResultButton, 0, 3, 1, 1) + self.showResultButton.clicked.connect(self.handleShowResultSignal) + + # ProgressBar + layout.addWidget(self.pb, 2, 0, 1, 4) + # time estimate label + self.timeEstLabel = QLabel( + ("Time Left: --")) + layout.addWidget(self.timeEstLabel, 3, 0, 1, 4) + + self.setLayout(layout) + + def downloadModels(self, URLs): + # using custom model + tar_file_name_list = [ + 'inference.pdiparams', + 'inference.pdiparams.info', + 'inference.pdmodel', + 'model.pdiparams', + 'model.pdiparams.info', + 'model.pdmodel' + ] + model_path = os.path.join(root, 'inference') + os.makedirs(model_path, exist_ok=True) + + # download and unzip models + for name in URLs.keys(): + url = URLs[name] + print("Try downloading file: {}".format(url)) + tarname = url.split('/')[-1] + tarpath = os.path.join(model_path, tarname) + if os.path.exists(tarpath): + print("File have already exist. skip") + else: + try: + download_with_progressbar(url, tarpath) + except Exception as e: + print("Error occurred when downloading file, error message:") + print(e) + + # unzip model tar + try: + with tarfile.open(tarpath, 'r') as tarObj: + storage_dir = os.path.join(model_path, name) + os.makedirs(storage_dir, exist_ok=True) + for member in tarObj.getmembers(): + filename = None + for tar_file_name in tar_file_name_list: + if tar_file_name in member.name: + filename = tar_file_name + if filename is None: + continue + file = tarObj.extractfile(member) + with open( + os.path.join(storage_dir, filename), + 'wb') as f: + f.write(file.read()) + except Exception as e: + print("Error occurred when unziping file, error message:") + print(e) + + def initPredictor(self, lang='EN'): + # init predictor args + args = parse_args() + args.table_max_len = 488 + args.ocr = True + args.recovery = True + args.save_pdf = self.save_pdf + args.table_char_dict_path = os.path.join(root, + "ppocr", "utils", "dict", "table_structure_dict.txt") + if lang == 'EN': + args.det_model_dir = os.path.join(root, # 此处从这里找到模型存放位置 + "inference", "en_PP-OCRv3_det_infer") + args.rec_model_dir = os.path.join(root, + "inference", "en_PP-OCRv3_rec_infer") + args.table_model_dir = os.path.join(root, + "inference", "en_ppstructure_mobile_v2.0_SLANet_infer") + args.output = os.path.join(root, "output") # 结果保存路径 + args.layout_model_dir = os.path.join(root, + "inference", "picodet_lcnet_x1_0_fgd_layout_infer") + lang_dict = DICT_EN + elif lang == 'CN': + args.det_model_dir = os.path.join(root, # 此处从这里找到模型存放位置 + "inference", "cn_PP-OCRv3_det_infer") + args.rec_model_dir = os.path.join(root, + "inference", "cn_PP-OCRv3_rec_infer") + args.table_model_dir = os.path.join(root, + "inference", "cn_ppstructure_mobile_v2.0_SLANet_infer") + args.output = os.path.join(root, "output") # 结果保存路径 + args.layout_model_dir = os.path.join(root, + "inference", "picodet_lcnet_x1_0_fgd_layout_cdla_infer") + lang_dict = DICT_CN + else: + raise ValueError("Unsupported language") + args.rec_char_dict_path = os.path.join(root, + "ppocr", "utils", + lang_dict['rec_char_dict_path']) + args.layout_dict_path = os.path.join(root, + "ppocr", "utils", "dict", "layout_dict", + lang_dict['layout_dict_path']) + # init predictor + return StructureSystem(args) + + def handleOpenFileSignal(self): + ''' + 可以多选图像文件 + ''' + selectedFiles = QFileDialog.getOpenFileNames(self, + "多文件选择", "/", "图片文件 (*.png *.jpeg *.jpg *.bmp *.pdf)")[0] + if len(selectedFiles) > 0: + self.imagePaths = selectedFiles + self.screenShot = None # discard screenshot temp image + self.pb.setRange(0, len(self.imagePaths)) + self.pb.setValue(0) + + def screenShotSlot(self): + ''' + 选定图像文件和截图的转换过程只能同时进行一个 + 截图只能同时转换一个 + ''' + self.screenShotWg.start() + if self.screenShotWg.captureImage: + self.screenShot = self.screenShotWg.captureImage + self.imagePaths.clear() # discard openfile temp list + self.pb.setRange(0, 1) + self.pb.setValue(0) + + def handleStartSignal(self, lang): + if self.screenShot: # for screenShot + img_name = 'screenshot_' + time.strftime("%Y%m%d%H%M%S", time.localtime()) + image = QImageToCvMat(self.screenShot) + self.predictAndSave(image, img_name, lang) + # update Progress Bar + self.pb.setValue(1) + QMessageBox.information(self, + u'Information', "文档提取完成") + elif len(self.imagePaths) > 0 : # for image file selection + # Must set image path list and language before start + self.output_dir = os.path.join( + os.path.dirname(self.imagePaths[0]), "output") # output_dir shold be same as imagepath + self._thread.setOutputDir(self.output_dir) + self._thread.setImagePath(self.imagePaths) + self._thread.setLang(lang) + # disenble buttons + self.openFileButton.setEnabled(False) + self.startCNButton.setEnabled(False) + self.startENButton.setEnabled(False) + # 启动工作进程 + self._thread.start() + self.time_start = time.time() # log start time + QMessageBox.information(self, + u'Information', "开始转换") + else: + QMessageBox.warning(self, + u'Information', "请选择要识别的文件或截图") + + def handleShowResultSignal(self): + if self.output_dir is None: + return + if os.path.exists(self.output_dir): + if platform.system() == 'Windows': + os.startfile(self.output_dir) + else: + os.system('open ' + os.path.normpath(self.output_dir)) + else: + QMessageBox.information(self, + u'Information', "输出文件不存在") + + def handleProgressBarSingal(self, i): + self.pb.setValue(i) + # calculate time left of recognition + lenbar = self.pb.maximum() + avg_time = (time.time() - self.time_start) / i # Use average time to prevent time fluctuations + time_left = str(datetime.timedelta(seconds=avg_time * (lenbar - i))).split(".")[0] # Remove microseconds + self.timeEstLabel.setText(f"Time Left: {time_left}") # show time left + + def handleEndsignalSignal(self): + # enble buttons + self.openFileButton.setEnabled(True) + self.startCNButton.setEnabled(True) + self.startENButton.setEnabled(True) + QMessageBox.information(self, u'Information', "转换结束") + + +def main(): + app = QApplication(sys.argv) + + window = APP_Image2Doc() # 创建对象 + window.show() # 全屏显示窗口 + + QApplication.processEvents() + sys.exit(app.exec()) + + +if __name__ == "__main__": + main() \ No newline at end of file