diff --git a/PPOCRLabel/README.md b/PPOCRLabel/README.md index 7a936543277e2e1d5687681ac78eca96f5f3f400..9375559f3b10baaa52d034badaee40214b7c31ff 100644 --- a/PPOCRLabel/README.md +++ b/PPOCRLabel/README.md @@ -207,6 +207,24 @@ For some data that are difficult to recognize, the recognition results will not pip install opencv-contrib-python-headless==4.2.0.32 ``` +### Dataset division + +- Enter the following command in the terminal to execute the dataset division script: + ``` + cd ./PPOCRLabel # Change the directory to the PPOCRLabel folder + python gen_ocr_train_val_test.py --trainValTestRatio 6:2:2 --labelRootPath ../train_data/label --detRootPath ../train_data/det --recRootPath ../train_data/rec + ``` + +- Parameter Description: + + trainValTestRatio is the division ratio of the number of images in the training set, validation set, and test set, set according to your actual situation, the default is 6:2:2 + + labelRootPath is the storage path of the dataset labeled by PPOCRLabel, the default is ../train_data/label + + detRootPath is the path where the text detection dataset is divided according to the dataset marked by PPOCRLabel. The default is ../train_data/det + + recRootPath is the path where the character recognition dataset is divided according to the dataset marked by PPOCRLabel. The default is ../train_data/rec + ### Related 1.[Tzutalin. LabelImg. Git code (2015)](https://github.com/tzutalin/labelImg) \ No newline at end of file diff --git a/PPOCRLabel/README_ch.md b/PPOCRLabel/README_ch.md index 17bb95c08267bfac5e2cecde2e1d88c4a7cb1b60..7abf2f5d8b193b895e1993a8e1628d29c371df0a 100644 --- a/PPOCRLabel/README_ch.md +++ b/PPOCRLabel/README_ch.md @@ -193,7 +193,23 @@ PPOCRLabel支持三种导出方式: ``` pip install opencv-contrib-python-headless==4.2.0.32 ``` +### 数据集划分 +- 在终端中输入以下命令执行数据集划分脚本: + ``` + cd ./PPOCRLabel # 将目录切换到PPOCRLabel文件夹下 + python gen_ocr_train_val_test.py --trainValTestRatio 6:2:2 --labelRootPath ../train_data/label --detRootPath ../train_data/det --recRootPath ../train_data/rec + ``` +- 参数说明: + + trainValTestRatio是训练集、验证集、测试集的图像数量划分比例,根据你的实际情况设定,默认是6:2:2 + + labelRootPath是PPOCRLabel标注的数据集存放路径,默认是../train_data/label + + detRootPath是根据PPOCRLabel标注的数据集划分后的文本检测数据集存放的路径,默认是../train_data/det + recRootPath是根据PPOCRLabel标注的数据集划分后的字符识别数据集存放的路径,默认是../train_data/rec + + ### 4. 参考资料 1.[Tzutalin. LabelImg. Git code (2015)](https://github.com/tzutalin/labelImg) diff --git a/PPOCRLabel/gen_ocr_train_val_test.py b/PPOCRLabel/gen_ocr_train_val_test.py new file mode 100644 index 0000000000000000000000000000000000000000..64cba612ae267835dd47aedc2b0356c9df462038 --- /dev/null +++ b/PPOCRLabel/gen_ocr_train_val_test.py @@ -0,0 +1,147 @@ +# coding:utf8 +import os +import shutil +import random +import argparse + + +# 删除划分的训练集、验证集、测试集文件夹,重新创建一个空的文件夹 +def isCreateOrDeleteFolder(path, flag): + flagPath = os.path.join(path, flag) + + if os.path.exists(flagPath): + shutil.rmtree(flagPath) + + os.makedirs(flagPath) + flagAbsPath = os.path.abspath(flagPath) + return flagAbsPath + + +def splitTrainVal(root, dir, absTrainRootPath, absValRootPath, absTestRootPath, trainTxt, valTxt, testTxt, flag): + # 按照指定的比例划分训练集、验证集、测试集 + labelPath = os.path.join(root, dir) + labelAbsPath = os.path.abspath(labelPath) + + if flag == "det": + labelFilePath = os.path.join(labelAbsPath, args.detLabelFileName) + elif flag == "rec": + labelFilePath = os.path.join(labelAbsPath, args.recLabelFileName) + + labelFileRead = open(labelFilePath, "r", encoding="UTF-8") + labelFileContent = labelFileRead.readlines() + random.shuffle(labelFileContent) + labelRecordLen = len(labelFileContent) + + for index, labelRecordInfo in enumerate(labelFileContent): + imageRelativePath = labelRecordInfo.split('\t')[0] + imageLabel = labelRecordInfo.split('\t')[1] + imageName = os.path.basename(imageRelativePath) + + if flag == "det": + imagePath = os.path.join(labelAbsPath, imageName) + elif flag == "rec": + imagePath = os.path.join(labelAbsPath, "{}\\{}".format(args.recImageDirName, imageName)) + + # 按预设的比例划分训练集、验证集、测试集 + trainValTestRatio = args.trainValTestRatio.split(":") + trainRatio = eval(trainValTestRatio[0]) / 10 + valRatio = trainRatio + eval(trainValTestRatio[1]) / 10 + curRatio = index / labelRecordLen + + if curRatio < trainRatio: + imageCopyPath = os.path.join(absTrainRootPath, imageName) + shutil.copy(imagePath, imageCopyPath) + trainTxt.write("{}\t{}".format(imageCopyPath, imageLabel)) + elif curRatio >= trainRatio and curRatio < valRatio: + imageCopyPath = os.path.join(absValRootPath, imageName) + shutil.copy(imagePath, imageCopyPath) + valTxt.write("{}\t{}".format(imageCopyPath, imageLabel)) + else: + imageCopyPath = os.path.join(absTestRootPath, imageName) + shutil.copy(imagePath, imageCopyPath) + testTxt.write("{}\t{}".format(imageCopyPath, imageLabel)) + + +# 删掉存在的文件 +def removeFile(path): + if os.path.exists(path): + os.remove(path) + + +def genDetRecTrainVal(args): + detAbsTrainRootPath = isCreateOrDeleteFolder(args.detRootPath, "train") + detAbsValRootPath = isCreateOrDeleteFolder(args.detRootPath, "val") + detAbsTestRootPath = isCreateOrDeleteFolder(args.detRootPath, "test") + recAbsTrainRootPath = isCreateOrDeleteFolder(args.recRootPath, "train") + recAbsValRootPath = isCreateOrDeleteFolder(args.recRootPath, "val") + recAbsTestRootPath = isCreateOrDeleteFolder(args.recRootPath, "test") + + removeFile(os.path.join(args.detRootPath, "train.txt")) + removeFile(os.path.join(args.detRootPath, "val.txt")) + removeFile(os.path.join(args.detRootPath, "test.txt")) + removeFile(os.path.join(args.recRootPath, "train.txt")) + removeFile(os.path.join(args.recRootPath, "val.txt")) + removeFile(os.path.join(args.recRootPath, "test.txt")) + + detTrainTxt = open(os.path.join(args.detRootPath, "train.txt"), "a", encoding="UTF-8") + detValTxt = open(os.path.join(args.detRootPath, "val.txt"), "a", encoding="UTF-8") + detTestTxt = open(os.path.join(args.detRootPath, "test.txt"), "a", encoding="UTF-8") + recTrainTxt = open(os.path.join(args.recRootPath, "train.txt"), "a", encoding="UTF-8") + recValTxt = open(os.path.join(args.recRootPath, "val.txt"), "a", encoding="UTF-8") + recTestTxt = open(os.path.join(args.recRootPath, "test.txt"), "a", encoding="UTF-8") + + for root, dirs, files in os.walk(args.labelRootPath): + for dir in dirs: + splitTrainVal(root, dir, detAbsTrainRootPath, detAbsValRootPath, detAbsTestRootPath, detTrainTxt, detValTxt, + detTestTxt, "det") + splitTrainVal(root, dir, recAbsTrainRootPath, recAbsValRootPath, recAbsTestRootPath, recTrainTxt, recValTxt, + recTestTxt, "rec") + break + + +if __name__ == "__main__": + # 功能描述:分别划分检测和识别的训练集、验证集、测试集 + # 说明:可以根据自己的路径和需求调整参数,图像数据往往多人合作分批标注,每一批图像数据放在一个文件夹内用PPOCRLabel进行标注, + # 如此会有多个标注好的图像文件夹汇总并划分训练集、验证集、测试集的需求 + parser = argparse.ArgumentParser() + parser.add_argument( + "--trainValTestRatio", + type=str, + default="6:2:2", + help="ratio of trainset:valset:testset") + parser.add_argument( + "--labelRootPath", + type=str, + default="../train_data/label", + help="path to the dataset marked by ppocrlabel, E.g, dataset folder named 1,2,3..." + ) + parser.add_argument( + "--detRootPath", + type=str, + default="../train_data/det", + help="the path where the divided detection dataset is placed") + parser.add_argument( + "--recRootPath", + type=str, + default="../train_data/rec", + help="the path where the divided recognition dataset is placed" + ) + parser.add_argument( + "--detLabelFileName", + type=str, + default="Label.txt", + help="the name of the detection annotation file") + parser.add_argument( + "--recLabelFileName", + type=str, + default="rec_gt.txt", + help="the name of the recognition annotation file" + ) + parser.add_argument( + "--recImageDirName", + type=str, + default="crop_img", + help="the name of the folder where the cropped recognition dataset is located" + ) + args = parser.parse_args() + genDetRecTrainVal(args) diff --git a/deploy/paddle2onnx/readme.md b/deploy/paddle2onnx/readme.md new file mode 100644 index 0000000000000000000000000000000000000000..782cffce9e5611b38da5bc002670a8282c08cc19 --- /dev/null +++ b/deploy/paddle2onnx/readme.md @@ -0,0 +1,76 @@ +# paddle2onnx 模型转化与预测 + +本章节介绍 PaddleOCR 模型如何转化为 ONNX 模型,并基于 ONNX 引擎预测。 + +## 1. 环境准备 + +需要准备 Paddle2ONNX 模型转化环境,和 ONNX 模型预测环境 + +### Paddle2ONNX + +Paddle2ONNX 支持将 PaddlePaddle 模型格式转化到 ONNX 模型格式,算子目前稳定支持导出 ONNX Opset 9~11,部分Paddle算子支持更低的ONNX Opset转换。 +更多细节可参考 [Paddle2ONNX](https://github.com/PaddlePaddle/Paddle2ONNX/blob/develop/README_zh.md) + +- 安装 Paddle2ONNX +``` +python3.7 -m pip install paddle2onnx +``` + +- 安装 ONNX +``` +# 建议安装 1.4.0 版本,可根据环境更换版本号 +python3.7 -m pip install onnxruntime==1.4.0 +``` + +## 2. 模型转换 + + +- Paddle 模型下载 + +有两种方式获取Paddle静态图模型:在 [model_list](../../doc/doc_ch/models_list.md) 中下载PaddleOCR提供的预测模型; +参考[模型导出说明](../../doc/doc_ch/inference.md#训练模型转inference模型)把训练好的权重转为 inference_model。 + +以 ppocr 检测模型为例: + +``` +wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar +cd ./inference && tar xf ch_ppocr_mobile_v2.0_det_infer.tar && cd .. +``` + +- 模型转换 + +使用 Paddle2ONNX 将Paddle静态图模型转换为ONNX模型格式: + +``` +paddle2onnx --model_dir=./inference/ch_ppocr_mobile_v2.0_det_infer/ \ +--model_filename=inference.pdmodel \ +--params_filename=inference.pdiparams \ +--save_file=./inference/det_mobile_onnx/model.onnx \ +--opset_version=10 \ +--enable_onnx_checker=True +``` + +执行完毕后,ONNX 模型会被保存在 `./inference/det_mobile_onnx/` 路径下 + +* 注意:以下几个模型暂不支持转换为 ONNX 模型: +NRTR、SAR、RARE、SRN + +## 3. onnx 预测 + +以检测模型为例,使用 ONNX 预测可执行如下命令: + +``` +python3.7 ../../tools/infer/predict_det.py --use_gpu=False --use_onnx=True \ +--det_model_dir=./inference/det_mobile_onnx/model.onnx \ +--image_dir=../../doc/imgs/1.jpg +``` + +执行命令后在终端会打印出预测的检测框坐标,并在 `./inference_results/` 下保存可视化结果。 + +``` +root INFO: 1.jpg [[[291, 295], [334, 292], [348, 844], [305, 847]], [[344, 296], [379, 294], [387, 669], [353, 671]]] +The predict time of ../../doc/imgs/1.jpg: 0.06162881851196289 +The visualized image saved in ./inference_results/det_res_1.jpg +``` + +* 注意:ONNX暂时不支持变长预测,需要将输入resize到固定输入,预测结果可能与直接使用Paddle预测有细微不同。 diff --git a/deploy/pdserving/ocr_cpp_client.py b/deploy/pdserving/ocr_cpp_client.py new file mode 100755 index 0000000000000000000000000000000000000000..2baa7565ac78b9551c788c7b36457bce38828eb5 --- /dev/null +++ b/deploy/pdserving/ocr_cpp_client.py @@ -0,0 +1,56 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# pylint: disable=doc-string-missing + +from paddle_serving_client import Client +import sys +import numpy as np +import base64 +import os +import cv2 +from paddle_serving_app.reader import Sequential, URL2Image, ResizeByFactor +from paddle_serving_app.reader import Div, Normalize, Transpose +from ocr_reader import OCRReader + +client = Client() +# TODO:load_client need to load more than one client model. +# this need to figure out some details. +client.load_client_config(sys.argv[1:]) +client.connect(["127.0.0.1:9293"]) + +import paddle +test_img_dir = "test_img/" + +ocr_reader = OCRReader(char_dict_path="../../ppocr/utils/ppocr_keys_v1.txt") + + +def cv2_to_base64(image): + return base64.b64encode(image).decode( + 'utf8') #data.tostring()).decode('utf8') + + +for img_file in os.listdir(test_img_dir): + with open(os.path.join(test_img_dir, img_file), 'rb') as file: + image_data = file.read() + image = cv2_to_base64(image_data) + res_list = [] + #print(image) + fetch_map = client.predict( + feed={"x": image}, fetch=["save_infer_model/scale_0.tmp_1"], batch=True) + print("fetrch map:", fetch_map) + one_batch_res = ocr_reader.postprocess(fetch_map, with_score=True) + for res in one_batch_res: + res_list.append(res[0]) + res = {"res": str(res_list)} + print(res) diff --git a/deploy/pdserving/pipeline_http_client.py b/deploy/pdserving/pipeline_http_client.py index 0befe2f6144d18e24fb3f72ed1d919fd8cd7d5a4..61d13178220118eaf53c51723a9ef65201373ffb 100644 --- a/deploy/pdserving/pipeline_http_client.py +++ b/deploy/pdserving/pipeline_http_client.py @@ -18,13 +18,19 @@ import json import base64 import os +import argparse +parser = argparse.ArgumentParser(description="args for paddleserving") +parser.add_argument("--image_dir", type=str, default="../../doc/imgs/") +args = parser.parse_args() + def cv2_to_base64(image): return base64.b64encode(image).decode('utf8') url = "http://127.0.0.1:9998/ocr/prediction" -test_img_dir = "../../doc/imgs/" +test_img_dir = args.image_dir + for idx, img_file in enumerate(os.listdir(test_img_dir)): with open(os.path.join(test_img_dir, img_file), 'rb') as file: image_data1 = file.read() @@ -36,5 +42,4 @@ for idx, img_file in enumerate(os.listdir(test_img_dir)): r = requests.post(url=url, data=json.dumps(data)) print(r.json()) -test_img_dir = "../../doc/imgs/" print("==> total number of test imgs: ", len(os.listdir(test_img_dir))) diff --git a/deploy/pdserving/pipeline_rpc_client.py b/deploy/pdserving/pipeline_rpc_client.py index 79f898faf37f946cdbf4a87d4d62c8b1f9d5c93b..4dcb1ad5f533729e344809e99951b59fb2908537 100644 --- a/deploy/pdserving/pipeline_rpc_client.py +++ b/deploy/pdserving/pipeline_rpc_client.py @@ -30,7 +30,12 @@ def cv2_to_base64(image): return base64.b64encode(image).decode('utf8') -test_img_dir = "imgs/" +import argparse +parser = argparse.ArgumentParser(description="args for paddleserving") +parser.add_argument("--image_dir", type=str, default="../../doc/imgs/") +args = parser.parse_args() +test_img_dir = args.image_dir + for img_file in os.listdir(test_img_dir): with open(os.path.join(test_img_dir, img_file), 'rb') as file: image_data = file.read() diff --git a/doc/doc_ch/algorithm_overview.md b/doc/doc_ch/algorithm_overview.md index af883de86c798babe6ca1616710c0e13546e1045..1ffb8f340305249d5f2c8aeb2fef37dc8f590eff 100755 --- a/doc/doc_ch/algorithm_overview.md +++ b/doc/doc_ch/algorithm_overview.md @@ -68,4 +68,5 @@ PaddleOCR基于动态图开源的文本识别算法列表: |NRTR|NRTR_MTB| 84.3% | rec_mtb_nrtr | [下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_mtb_nrtr_train.tar) | |SAR|Resnet31| 87.2% | rec_r31_sar | [下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.1/rec/rec_r31_sar_train.tar) | |SEED| Aster_Resnet | 85.2% | rec_resnet_stn_bilstm_att | [下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.1/rec/rec_resnet_stn_bilstm_att.tar)| + PaddleOCR文本识别算法的训练和使用请参考文档教程中[模型训练/评估中的文本识别部分](./recognition.md)。 diff --git a/doc/doc_ch/detection.md b/doc/doc_ch/detection.md index 166e15fb03b604b63f47b95304ac06c1f4ae9dd2..8db64664f6ff560450a5ee99d708313c931989fc 100644 --- a/doc/doc_ch/detection.md +++ b/doc/doc_ch/detection.md @@ -98,7 +98,7 @@ python3 -m paddle.distributed.launch --gpus '0,1,2,3' tools/train.py -c configs/ -o Global.pretrained_model=./pretrain_models/MobileNetV3_large_x0_5_pretrained # 多机多卡训练,通过 --ips 参数设置使用的机器IP地址,通过 --gpus 参数设置使用的GPU ID -python3 -m paddle.distributed.launch --ips="10.21.226.181,10.21.226.133" --gpus '0,1,2,3' tools/train.py -c configs/det/det_mv3_db.yml \ +python3 -m paddle.distributed.launch --ips="xx.xx.xx.xx,xx.xx.xx.xx" --gpus '0,1,2,3' tools/train.py -c configs/det/det_mv3_db.yml \ -o Global.pretrained_model=./pretrain_models/MobileNetV3_large_x0_5_pretrained ``` diff --git a/doc/doc_en/detection_en.md b/doc/doc_en/detection_en.md index 68c5691cda9f78c7f805c0f0ecdf82f00534de72..948733e16cebea2ce819367a863948434ece5ae5 100644 --- a/doc/doc_en/detection_en.md +++ b/doc/doc_en/detection_en.md @@ -101,7 +101,7 @@ python3 -m paddle.distributed.launch --gpus '0,1,2,3' tools/train.py -c configs # multi-Node, multi-GPU training # Set the IPs of your nodes used by the '--ips' parameter. Set the GPU ID used by the '--gpus' parameter. -python3 -m paddle.distributed.launch --ips="10.21.226.181,10.21.226.133" --gpus '0,1,2,3' tools/train.py -c configs/det/det_mv3_db.yml \ +python3 -m paddle.distributed.launch --ips="xx.xx.xx.xx,xx.xx.xx.xx" --gpus '0,1,2,3' tools/train.py -c configs/det/det_mv3_db.yml \ -o Global.pretrained_model=./pretrain_models/MobileNetV3_large_x0_5_pretrained ``` **Note:** For multi-Node multi-GPU training, you need to replace the `ips` value in the preceding command with the address of your machine, and the machines must be able to ping each other. The command for viewing the IP address of the machine is `ifconfig`. diff --git a/requirements.txt b/requirements.txt index 6758a59bad20f6ffa271766fc4d0df5ebf4c7a4b..0c87c5c95069a2699f5a3a50320c883c6118ffe7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ shapely -scikit-image==0.18.3 +scikit-image imgaug==0.4.0 pyclipper lmdb diff --git a/test_tipc/configs/amp_ppocr_det_mobile_params.txt b/test_tipc/configs/amp_ppocr_det_mobile_params.txt new file mode 100644 index 0000000000000000000000000000000000000000..1c9978753e663c7b466a55d70657f515c12df18b --- /dev/null +++ b/test_tipc/configs/amp_ppocr_det_mobile_params.txt @@ -0,0 +1,110 @@ +===========================train_params=========================== +model_name:ocr_det +python:python3.7 +gpu_list:0|0,1 +Global.use_gpu:True|True +Global.auto_cast:amp +Global.epoch_num:lite_train_lite_infer=1|whole_train_whole_infer=300 +Global.save_model_dir:./output/ +Train.loader.batch_size_per_card:lite_train_lite_infer=2|whole_train_whole_infer=4 +Global.pretrained_model:null +train_model_name:latest +train_infer_img_dir:./train_data/icdar2015/text_localization/ch4_test_images/ +null:null +## +trainer:norm_train|pact_train|fpgm_train +norm_train:tools/train.py -c test_tipc/configs/det_mv3_db.yml -o Global.pretrained_model=./pretrain_models/MobileNetV3_large_x0_5_pretrained +pact_train:deploy/slim/quantization/quant.py -c test_tipc/configs/det_mv3_db.yml -o +fpgm_train:deploy/slim/prune/sensitivity_anal.py -c test_tipc/configs/det_mv3_db.yml -o Global.pretrained_model=./pretrain_models/det_mv3_db_v2.0_train/best_accuracy +distill_train:null +null:null +null:null +## +===========================eval_params=========================== +eval:null +null:null +## +===========================infer_params=========================== +Global.save_inference_dir:./output/ +Global.pretrained_model: +norm_export:tools/export_model.py -c test_tipc/configs/det_mv3_db.yml -o +quant_export:deploy/slim/quantization/export_model.py -c test_tipc/configs/det_mv3_db.yml -o +fpgm_export:deploy/slim/prune/export_prune_model.py -c test_tipc/configs/det_mv3_db.yml -o +distill_export:null +export1:null +export2:null +inference_dir:null +train_model:./inference/ch_ppocr_mobile_v2.0_det_train/best_accuracy +infer_export:tools/export_model.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o +infer_quant:False +inference:tools/infer/predict_det.py +--use_gpu:True|False +--enable_mkldnn:True|False +--cpu_threads:1|6 +--rec_batch_num:1 +--use_tensorrt:False|True +--precision:fp32|fp16|int8 +--det_model_dir: +--image_dir:./inference/ch_det_data_50/all-sum-510/ +null:null +--benchmark:True +null:null +===========================cpp_infer_params=========================== +use_opencv:True +infer_model:./inference/ch_ppocr_mobile_v2.0_det_infer/ +infer_quant:False +inference:./deploy/cpp_infer/build/ppocr det +--use_gpu:True|False +--enable_mkldnn:True|False +--cpu_threads:1|6 +--rec_batch_num:1 +--use_tensorrt:False|True +--precision:fp32|fp16 +--det_model_dir: +--image_dir:./inference/ch_det_data_50/all-sum-510/ +null:null +--benchmark:True +===========================serving_params=========================== +model_name:ocr_det +python:python3.7 +trans_model:-m paddle_serving_client.convert +--dirname:./inference/ch_ppocr_mobile_v2.0_det_infer/ +--model_filename:inference.pdmodel +--params_filename:inference.pdiparams +--serving_server:./deploy/pdserving/ppocr_det_mobile_2.0_serving/ +--serving_client:./deploy/pdserving/ppocr_det_mobile_2.0_client/ +serving_dir:./deploy/pdserving +web_service:web_service_det.py --config=config.yml --opt op.det.concurrency=1 +op.det.local_service_conf.devices:null|0 +op.det.local_service_conf.use_mkldnn:True|False +op.det.local_service_conf.thread_num:1|6 +op.det.local_service_conf.use_trt:False|True +op.det.local_service_conf.precision:fp32|fp16|int8 +pipline:pipeline_http_client.py --image_dir=../../doc/imgs +===========================kl_quant_params=========================== +infer_model:./inference/ch_ppocr_mobile_v2.0_det_infer/ +infer_export:tools/export_model.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o +infer_quant:True +inference:tools/infer/predict_det.py +--use_gpu:True|False +--enable_mkldnn:True|False +--cpu_threads:1|6 +--rec_batch_num:1 +--use_tensorrt:False|True +--precision:int8 +--det_model_dir: +--image_dir:./inference/ch_det_data_50/all-sum-510/ +null:null +--benchmark:True +null:null +null:null +===========================lite_params=========================== +inference:./ocr_db_crnn det +infer_model:./models/ch_ppocr_mobile_v2.0_det_opt.nb|./models/ch_ppocr_mobile_v2.0_det_slim_opt.nb +--cpu_threads:1|4 +--batch_size:1 +--power_mode:LITE_POWER_HIGH|LITE_POWER_LOW +--image_dir:./test_data/icdar2015_lite/text_localization/ch4_test_images/|./test_data/icdar2015_lite/text_localization/ch4_test_images/img_233.jpg +--config_dir:./config.txt +--rec_dict_dir:./ppocr_keys_v1.txt +--benchmark:True diff --git a/test_tipc/configs/fleet_ppocr_det_mobile_params.txt b/test_tipc/configs/fleet_ppocr_det_mobile_params.txt new file mode 100644 index 0000000000000000000000000000000000000000..99278845e43f1a56239b508e49c1670f5bc77922 --- /dev/null +++ b/test_tipc/configs/fleet_ppocr_det_mobile_params.txt @@ -0,0 +1,110 @@ +===========================train_params=========================== +model_name:ocr_det +python:python3.7 +gpu_list:xx.xx.xx.xx,xx.xx.xx.xx;0,1 +Global.use_gpu:True|True +Global.auto_cast:null|amp +Global.epoch_num:lite_train_lite_infer=1|whole_train_whole_infer=300 +Global.save_model_dir:./output/ +Train.loader.batch_size_per_card:lite_train_lite_infer=2|whole_train_whole_infer=4 +Global.pretrained_model:null +train_model_name:latest +train_infer_img_dir:./train_data/icdar2015/text_localization/ch4_test_images/ +null:null +## +trainer:norm_train|pact_train|fpgm_train +norm_train:tools/train.py -c test_tipc/configs/det_mv3_db.yml -o Global.pretrained_model=./pretrain_models/MobileNetV3_large_x0_5_pretrained +pact_train:deploy/slim/quantization/quant.py -c test_tipc/configs/det_mv3_db.yml -o +fpgm_train:deploy/slim/prune/sensitivity_anal.py -c test_tipc/configs/det_mv3_db.yml -o Global.pretrained_model=./pretrain_models/det_mv3_db_v2.0_train/best_accuracy +distill_train:null +null:null +null:null +## +===========================eval_params=========================== +eval:null +null:null +## +===========================infer_params=========================== +Global.save_inference_dir:./output/ +Global.pretrained_model: +norm_export:tools/export_model.py -c test_tipc/configs/det_mv3_db.yml -o +quant_export:deploy/slim/quantization/export_model.py -c test_tipc/configs/det_mv3_db.yml -o +fpgm_export:deploy/slim/prune/export_prune_model.py -c test_tipc/configs/det_mv3_db.yml -o +distill_export:null +export1:null +export2:null +inference_dir:null +train_model:./inference/ch_ppocr_mobile_v2.0_det_train/best_accuracy +infer_export:tools/export_model.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o +infer_quant:False +inference:tools/infer/predict_det.py +--use_gpu:True|False +--enable_mkldnn:True|False +--cpu_threads:1|6 +--rec_batch_num:1 +--use_tensorrt:False|True +--precision:fp32|fp16|int8 +--det_model_dir: +--image_dir:./inference/ch_det_data_50/all-sum-510/ +null:null +--benchmark:True +null:null +===========================cpp_infer_params=========================== +use_opencv:True +infer_model:./inference/ch_ppocr_mobile_v2.0_det_infer/ +infer_quant:False +inference:./deploy/cpp_infer/build/ppocr det +--use_gpu:True|False +--enable_mkldnn:True|False +--cpu_threads:1|6 +--rec_batch_num:1 +--use_tensorrt:False|True +--precision:fp32|fp16 +--det_model_dir: +--image_dir:./inference/ch_det_data_50/all-sum-510/ +null:null +--benchmark:True +===========================serving_params=========================== +model_name:ocr_det +python:python3.7 +trans_model:-m paddle_serving_client.convert +--dirname:./inference/ch_ppocr_mobile_v2.0_det_infer/ +--model_filename:inference.pdmodel +--params_filename:inference.pdiparams +--serving_server:./deploy/pdserving/ppocr_det_mobile_2.0_serving/ +--serving_client:./deploy/pdserving/ppocr_det_mobile_2.0_client/ +serving_dir:./deploy/pdserving +web_service:web_service_det.py --config=config.yml --opt op.det.concurrency=1 +op.det.local_service_conf.devices:null|0 +op.det.local_service_conf.use_mkldnn:True|False +op.det.local_service_conf.thread_num:1|6 +op.det.local_service_conf.use_trt:False|True +op.det.local_service_conf.precision:fp32|fp16|int8 +pipline:pipeline_http_client.py --image_dir=../../doc/imgs +===========================kl_quant_params=========================== +infer_model:./inference/ch_ppocr_mobile_v2.0_det_infer/ +infer_export:tools/export_model.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o +infer_quant:True +inference:tools/infer/predict_det.py +--use_gpu:True|False +--enable_mkldnn:True|False +--cpu_threads:1|6 +--rec_batch_num:1 +--use_tensorrt:False|True +--precision:int8 +--det_model_dir: +--image_dir:./inference/ch_det_data_50/all-sum-510/ +null:null +--benchmark:True +null:null +null:null +===========================lite_params=========================== +inference:./ocr_db_crnn det +infer_model:./models/ch_ppocr_mobile_v2.0_det_opt.nb|./models/ch_ppocr_mobile_v2.0_det_slim_opt.nb +--cpu_threads:1|4 +--batch_size:1 +--power_mode:LITE_POWER_HIGH|LITE_POWER_LOW +--image_dir:./test_data/icdar2015_lite/text_localization/ch4_test_images/|./test_data/icdar2015_lite/text_localization/ch4_test_images/img_233.jpg +--config_dir:./config.txt +--rec_dict_dir:./ppocr_keys_v1.txt +--benchmark:True diff --git a/test_tipc/configs/jeston_ppocr_det_mobile_params.txt b/test_tipc/configs/jeston_ppocr_det_mobile_params.txt new file mode 100644 index 0000000000000000000000000000000000000000..7e2bba2462bc4517f13da35a6a3bb527275bb85f --- /dev/null +++ b/test_tipc/configs/jeston_ppocr_det_mobile_params.txt @@ -0,0 +1,51 @@ +===========================train_params=========================== +model_name:ocr_det +python:python +gpu_list:null +Global.use_gpu:null +Global.auto_cast:null +Global.epoch_num:null +Global.save_model_dir:null +Train.loader.batch_size_per_card:null +Global.pretrained_model:null +train_model_name:latest +train_infer_img_dir:null +null:null +## +trainer:null +norm_train:null +pact_train:null +fpgm_train:null +distill_train:null +null:null +null:null +## +===========================eval_params=========================== +eval:null +null:null +## +===========================infer_params=========================== +Global.save_inference_dir:./output/ +Global.pretrained_model:null +norm_export:null +quant_export:null +fpgm_export:null +distill_export:null +export1:null +export2:null +inference_dir:null +train_model:./inference/ch_ppocr_mobile_v2.0_det_infer +infer_export:null +infer_quant:False +inference:tools/infer/predict_det.py +--use_gpu:True|False +--enable_mkldnn:False +--cpu_threads:1|6 +--rec_batch_num:1 +--use_tensorrt:False|True +--precision:fp16|fp32 +--det_model_dir: +--image_dir:./inference/ch_det_data_50/all-sum-510/ +null:null +--benchmark:True +null:null diff --git a/test_tipc/configs/mac_ppocr_det_mobile_params.txt b/test_tipc/configs/mac_ppocr_det_mobile_params.txt index b0415c9a1f79837866812d1e545ad8fd09fb681d..0200e2954948e5aeb719aa43e8a88d70c2af506d 100644 --- a/test_tipc/configs/mac_ppocr_det_mobile_params.txt +++ b/test_tipc/configs/mac_ppocr_det_mobile_params.txt @@ -80,7 +80,8 @@ op.det.local_service_conf.use_mkldnn:True|False op.det.local_service_conf.thread_num:1|6 op.det.local_service_conf.use_trt:False|True op.det.local_service_conf.precision:fp32|fp16|int8 -pipline:pipeline_http_client.py --image_dir=../../doc/imgs +pipline:pipeline_http_client.py|pipeline_rpc_client.py +--image_dir=../../doc/imgs ===========================kl_quant_params=========================== infer_model:./inference/ch_ppocr_mobile_v2.0_det_infer/ infer_export:tools/export_model.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o diff --git a/test_tipc/configs/ppocr_det_mobile_params.txt b/test_tipc/configs/ppocr_det_mobile_params.txt index 3442627613b20b687566ce9e84d7404c4a836e83..0ccf77d1f91bde4e0ef191d11debb5818dcd49e0 100644 --- a/test_tipc/configs/ppocr_det_mobile_params.txt +++ b/test_tipc/configs/ppocr_det_mobile_params.txt @@ -1,9 +1,9 @@ ===========================train_params=========================== model_name:ocr_det python:python3.7 -gpu_list:0|0,1|10.21.226.181,10.21.226.133;0,1 -Global.use_gpu:True|True|True -Global.auto_cast:fp32|amp +gpu_list:0|0,1 +Global.use_gpu:True|True +Global.auto_cast:null Global.epoch_num:lite_train_lite_infer=1|whole_train_whole_infer=300 Global.save_model_dir:./output/ Train.loader.batch_size_per_card:lite_train_lite_infer=2|whole_train_whole_infer=4 @@ -80,7 +80,8 @@ op.det.local_service_conf.use_mkldnn:True|False op.det.local_service_conf.thread_num:1|6 op.det.local_service_conf.use_trt:False|True op.det.local_service_conf.precision:fp32|fp16|int8 -pipline:pipeline_http_client.py --image_dir=../../doc/imgs +pipline:pipeline_rpc_client.py|pipeline_http_client.py +--image_dir:../../doc/imgs ===========================kl_quant_params=========================== infer_model:./inference/ch_ppocr_mobile_v2.0_det_infer/ infer_export:tools/export_model.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o @@ -108,3 +109,15 @@ infer_model:./models/ch_ppocr_mobile_v2.0_det_opt.nb|./models/ch_ppocr_mobile_v2 --config_dir:./config.txt --rec_dict_dir:./ppocr_keys_v1.txt --benchmark:True +===========================paddle2onnx_params=========================== +2onnx: paddle2onnx +--model_dir:./inference/ch_ppocr_mobile_v2.0_det_infer/ +--model_filename:inference.pdmodel +--params_filename:inference.pdiparams +--save_file:./inference/det_mobile_onnx/model.onnx +--opset_version:10 +--enable_onnx_checker:True +inference:tools/infer/predict_det.py +--use_gpu:False +--det_model_dir: +--image_dir:./inference/ch_det_data_50/all-sum-510/ \ No newline at end of file diff --git a/test_tipc/configs/ppocr_det_server_params.txt b/test_tipc/configs/ppocr_det_server_params.txt index bba4ef44f769ed16671ead55a0eba6ee986aaaaa..f688fffac8824b0608ea6b6cec0683c70feb659e 100644 --- a/test_tipc/configs/ppocr_det_server_params.txt +++ b/test_tipc/configs/ppocr_det_server_params.txt @@ -80,4 +80,5 @@ op.det.local_service_conf.use_mkldnn:True|False op.det.local_service_conf.thread_num:1|6 op.det.local_service_conf.use_trt:False|True op.det.local_service_conf.precision:fp32|fp16|int8 -pipline:pipeline_http_client.py --image_dir=../../doc/imgs +pipline:pipeline_http_client.py|pipeline_rpc_client.py +--image_dir=../../doc/imgs diff --git a/test_tipc/configs/ppocr_rec_mobile_params.txt b/test_tipc/configs/ppocr_rec_mobile_params.txt index f3f3a54e14e042693d28559e487852a079f77bdd..3177d19cf6cf7759e13e5597492f3bd7fcea78ff 100644 --- a/test_tipc/configs/ppocr_rec_mobile_params.txt +++ b/test_tipc/configs/ppocr_rec_mobile_params.txt @@ -80,4 +80,5 @@ op.rec.local_service_conf.use_mkldnn:True|False op.rec.local_service_conf.thread_num:1|6 op.rec.local_service_conf.use_trt:False|True op.rec.local_service_conf.precision:fp32|fp16|int8 -pipline:pipeline_http_client.py --image_dir=../../doc/imgs_words_en +pipline:pipeline_http_client.py|pipeline_rpc_client.py +--image_dir=../../doc/imgs_words_en diff --git a/test_tipc/configs/ppocr_rec_server_params.txt b/test_tipc/configs/ppocr_rec_server_params.txt index 77961e8e651e0d770dae64860cc129aa2d50dcf2..3bc1dcce2c7103f2180c19551e8f5379e5524476 100644 --- a/test_tipc/configs/ppocr_rec_server_params.txt +++ b/test_tipc/configs/ppocr_rec_server_params.txt @@ -80,4 +80,5 @@ op.rec.local_service_conf.use_mkldnn:True|False op.rec.local_service_conf.thread_num:1|6 op.rec.local_service_conf.use_trt:False|True op.rec.local_service_conf.precision:fp32|fp16|int8 -pipline:pipeline_http_client.py --image_dir=../../doc/imgs_words_en +pipline:pipeline_http_client.py|pipeline_rpc_client.py +--image_dir=../../doc/imgs_words_en diff --git a/test_tipc/configs/win_ppocr_det_mobile_params.txt b/test_tipc/configs/win_ppocr_det_mobile_params.txt index 5a532ceb307fe87174dc6b46fbde236405f59ff5..0f4faee4b32925b4d0780ece6838c176238c7000 100644 --- a/test_tipc/configs/win_ppocr_det_mobile_params.txt +++ b/test_tipc/configs/win_ppocr_det_mobile_params.txt @@ -80,7 +80,8 @@ op.det.local_service_conf.use_mkldnn:True|False op.det.local_service_conf.thread_num:1|6 op.det.local_service_conf.use_trt:False|True op.det.local_service_conf.precision:fp32|fp16|int8 -pipline:pipeline_http_client.py --image_dir=../../doc/imgs +pipline:pipeline_http_client.py|pipeline_rpc_client.py +--image_dir=../../doc/imgs ===========================kl_quant_params=========================== infer_model:./inference/ch_ppocr_mobile_v2.0_det_infer/ infer_export:tools/export_model.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o diff --git a/test_tipc/docs/jeston_test_train_inference_python.md b/test_tipc/docs/jeston_test_train_inference_python.md new file mode 100644 index 0000000000000000000000000000000000000000..3c0524df21df84fefdbbfd8e691766c9c542dff5 --- /dev/null +++ b/test_tipc/docs/jeston_test_train_inference_python.md @@ -0,0 +1,118 @@ +# Jeston端基础训练预测功能测试 + +Jeston端基础训练预测功能测试的主程序为`test_train_inference_python.sh`,由于Jeston端CPU较差,Jeston只需要测试TIPC关于GPU和TensorRT预测推理的部分即可。 + +## 1. 测试结论汇总 + +- 预测相关:基于训练是否使用量化,可以将训练产出的模型可以分为`正常模型`和`量化模型`,这两类模型对应的预测功能汇总如下: + +| 模型类型 |device | batchsize | tensorrt | mkldnn | cpu多线程 | +| ---- | ---- | ---- | :----: | :----: | :----: | +| 正常模型 | GPU | 1/6 | fp32/fp16 | - | - | +| 量化模型 | GPU | 1/6 | int8 | - | - | + + +## 2. 测试流程 + +环境准备只需要Python环境即可,安装PaddlePaddle等依赖参考下述文档。 + +### 2.1 安装依赖 +- 安装PaddlePaddle >= 2.0 +- 安装PaddleOCR依赖 + ``` + pip install -r ../requirements.txt + ``` +- 安装autolog(规范化日志输出工具) + ``` + git clone https://github.com/LDOUBLEV/AutoLog + cd AutoLog + pip install -r requirements.txt + python setup.py bdist_wheel + pip install ./dist/auto_log-1.0.0-py3-none-any.whl + cd ../ + ``` +- 安装PaddleSlim (可选) + ``` + # 如果要测试量化、裁剪等功能,需要安装PaddleSlim + pip install paddleslim + ``` + + +### 2.2 功能测试 + +先运行`prepare.sh`准备数据和模型,然后运行`test_train_inference_python.sh`进行测试,最终在```test_tipc/output```目录下生成`python_infer_*.log`格式的日志文件。 + +`test_train_inference_python.sh`包含5种[运行模式](./test_train_inference_python.md),在Jeston端,仅需要测试预测推理的模式即可: + +``` +- 模式3:whole_infer,不训练,全量数据预测,走通开源模型评估、动转静,检查inference model预测时间和精度; +```shell +bash test_tipc/prepare.sh ./test_tipc/configs/mac_ppocr_det_mobile_params.txt 'whole_infer' +# 用法1: +bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/jeston_ppocr_det_mobile_params.txt 'whole_infer' +# 用法2: 指定GPU卡预测,第三个传入参数为GPU卡号 +bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/jeston_ppocr_det_mobile_params.txt 'whole_infer' '1' +``` + +运行相应指令后,在`test_tipc/output`文件夹下自动会保存运行日志。如`lite_train_lite_infer`模式下,会运行训练+inference的链条,因此,在`test_tipc/output`文件夹有以下文件: +``` +test_tipc/output/ +|- results_python.log # 运行指令状态的日志 +|- python_infer_gpu_usetensorrt_True_precision_fp32_batchsize_1.log # GPU上开启TensorRT,batch_size=1条件下的预测运行日志 +...... +``` + +其中`results_python.log`中包含了每条指令的运行状态,如果运行成功会输出: +``` +Run successfully with command - python tools/infer/predict_det.py --use_gpu=True --use_tensorrt=False --precision=fp32 --det_model_dir=./inference/ch_ppocr_mobile_v2.0_det_infer/ --rec_batch_num=1 --image_dir=./inference/ch_det_data_50/all-sum-510/ --benchmark=True > ./test_tipc/output/python_infer_gpu_usetrt_False_precision_fp32_batchsize_1.log 2>&1 ! +Run successfully with command - python tools/infer/predict_det.py --use_gpu=True --use_tensorrt=True --precision=fp32 --det_model_dir=./inference/ch_ppocr_mobile_v2.0_det_infer/ --rec_batch_num=1 --image_dir=./inference/ch_det_data_50/all-sum-510/ --benchmark=True > ./test_tipc/output/python_infer_gpu_usetrt_True_precision_fp32_batchsize_1.log 2>&1 ! +Run successfully with command - python tools/infer/predict_det.py --use_gpu=True --use_tensorrt=True --precision=fp16 --det_model_dir=./inference/ch_ppocr_mobile_v2.0_det_infer/ --rec_batch_num=1 --image_dir=./inference/ch_det_data_50/all-sum-510/ --benchmark=True > ./test_tipc/output/python_infer_gpu_usetrt_True_precision_fp16_batchsize_1.log 2>&1 ! +``` +如果运行失败,会输出: +``` +Run failed with command - python tools/infer/predict_det.py --use_gpu=True --use_tensorrt=False --precision=fp32 --det_model_dir=./inference/ch_ppocr_mobile_v2.0_det_infer/ --rec_batch_num=1 --image_dir=./inference/ch_det_data_50/all-sum-510/ --benchmark=True > ./test_tipc/output/python_infer_gpu_usetrt_False_precision_fp32_batchsize_1.log 2>&1 ! +Run failed with command - python tools/infer/predict_det.py --use_gpu=True --use_tensorrt=True --precision=fp32 --det_model_dir=./inference/ch_ppocr_mobile_v2.0_det_infer/ --rec_batch_num=1 --image_dir=./inference/ch_det_data_50/all-sum-510/ --benchmark=True > ./test_tipc/output/python_infer_gpu_usetrt_True_precision_fp32_batchsize_1.log 2>&1 ! +Run failed with command - python tools/infer/predict_det.py --use_gpu=True --use_tensorrt=True --precision=fp16 --det_model_dir=./inference/ch_ppocr_mobile_v2.0_det_infer/ --rec_batch_num=1 --image_dir=./inference/ch_det_data_50/all-sum-510/ --benchmark=True > ./test_tipc/output/python_infer_gpu_usetrt_True_precision_fp16_batchsize_1.log 2>&1 ! +``` +可以很方便的根据`results_python.log`中的内容判定哪一个指令运行错误。 + +### 2.3 精度测试 + +使用compare_results.py脚本比较模型预测的结果是否符合预期,主要步骤包括: +- 提取日志中的预测坐标; +- 从本地文件中提取保存好的坐标结果; +- 比较上述两个结果是否符合精度预期,误差大于设置阈值时会报错。 + +#### 使用方式 +运行命令: +```shell +python test_tipc/compare_results.py --gt_file=./test_tipc/results/python_*.txt --log_file=./test_tipc/output/python_*.log --atol=1e-3 --rtol=1e-3 +``` + +参数介绍: +- gt_file: 指向事先保存好的预测结果路径,支持*.txt 结尾,会自动索引*.txt格式的文件,文件默认保存在test_tipc/result/ 文件夹下 +- log_file: 指向运行test_tipc/test_train_inference_python.sh 脚本的infer模式保存的预测日志,预测日志中打印的有预测结果,比如:文本框,预测文本,类别等等,同样支持python_infer_*.log格式传入 +- atol: 设置的绝对误差 +- rtol: 设置的相对误差 + +#### 运行结果 + +正常运行效果如下: +``` +Assert allclose passed! The results of python_infer_gpu_usetrt_True_precision_fp32_batchsize_1.log and ./test_tipc/results/python_ppocr_det_mobile_results_fp32.txt are consistent! +``` + +出现不一致结果时的运行输出: +``` +...... +Traceback (most recent call last): + File "test_tipc/compare_results.py", line 140, in + format(filename, gt_filename)) +ValueError: The results of python_infer_gpu_usetrt_True_precision_fp32_batchsize_1.log and the results of ./test_tipc/results/python_ppocr_det_mobile_results_fp32.txt are inconsistent! +``` + + +## 3. 更多教程 +本文档为功能测试用,更丰富的训练预测使用教程请参考: +[模型训练](https://github.com/PaddlePaddle/PaddleOCR/blob/dygraph/doc/doc_ch/training.md) +[基于Python预测引擎推理](https://github.com/PaddlePaddle/PaddleOCR/blob/dygraph/doc/doc_ch/inference.md) diff --git a/test_tipc/docs/test_inference_cpp.md b/test_tipc/docs/test_inference_cpp.md index 24655d96ba1acaadd489019ec260999c981107de..cd757a895bb957e498fda61cf52d2132d660ca8f 100644 --- a/test_tipc/docs/test_inference_cpp.md +++ b/test_tipc/docs/test_inference_cpp.md @@ -14,6 +14,8 @@ C++预测功能测试的主程序为`test_inference_cpp.sh`,可以测试基于 | 量化模型 | CPU | 1/6 | - | int8 | 支持 | ## 2. 测试流程 +运行环境配置请参考[文档](./install.md)的内容配置TIPC的运行环境。 + ### 2.1 功能测试 先运行`prepare.sh`准备数据和模型,然后运行`test_inference_cpp.sh`进行测试,最终在```test_tipc/output```目录下生成`cpp_infer_*.log`后缀的日志文件。 @@ -26,6 +28,32 @@ bash test_tipc/test_inference_cpp.sh ./test_tipc/configs/ppocr_det_mobile_params bash test_tipc/test_inference_cpp.sh ./test_tipc/configs/ppocr_det_mobile_params.txt '1' ``` +运行预测指令后,在`test_tipc/output`文件夹下自动会保存运行日志,包括以下文件: + +```shell +test_tipc/output/ +|- results_cpp.log # 运行指令状态的日志 +|- cpp_infer_cpu_usemkldnn_False_threads_1_precision_fp32_batchsize_1.log # CPU上不开启Mkldnn,线程数设置为1,测试batch_size=1条件下的预测运行日志 +|- cpp_infer_cpu_usemkldnn_False_threads_6_precision_fp32_batchsize_1.log # CPU上不开启Mkldnn,线程数设置为6,测试batch_size=1条件下的预测运行日志 +|- cpp_infer_gpu_usetrt_False_precision_fp32_batchsize_1.log # GPU上不开启TensorRT,测试batch_size=1的fp32精度预测日志 +|- cpp_infer_gpu_usetrt_True_precision_fp16_batchsize_1.log # GPU上开启TensorRT,测试batch_size=1的fp16精度预测日志 +...... +``` +其中results_cpp.log中包含了每条指令的运行状态,如果运行成功会输出: + +``` +Run successfully with command - ./deploy/cpp_infer/build/ppocr det --use_gpu=False --enable_mkldnn=False --cpu_threads=6 --det_model_dir=./inference/ch_ppocr_mobile_v2.0_det_infer/ --rec_batch_num=1 --image_dir=./inference/ch_det_data_50/all-sum-510/ --benchmar k=True > ./test_tipc/output/cpp_infer_cpu_usemkldnn_False_threads_6_precision_fp32_batchsize_1.log 2>&1 ! +Run successfully with command - ./deploy/cpp_infer/build/ppocr det --use_gpu=True --use_tensorrt=False --precision=fp32 --det_model_dir=./inference/ch_ppocr_mobile_v2.0_det_infer/ --rec_batch_num=1 --image_dir=./inference/ch_det_data_50/all-sum-510/ --benchmark =True > ./test_tipc/output/cpp_infer_gpu_usetrt_False_precision_fp32_batchsize_1.log 2>&1 ! +...... +``` +如果运行失败,会输出: +``` +Run failed with command - ./deploy/cpp_infer/build/ppocr det --use_gpu=True --use_tensorrt=True --precision=fp32 --det_model_dir=./inference/ch_ppocr_mobile_v2.0_det_infer/ --rec_batch_num=1 --image_dir=./inference/ch_det_data_50/all-sum-510/ --benchmark=True > ./test_tipc/output/cpp_infer_gpu_usetrt_True_precision_fp32_batchsize_1.log 2>&1 ! +Run failed with command - ./deploy/cpp_infer/build/ppocr det --use_gpu=True --use_tensorrt=True --precision=fp16 --det_model_dir=./inference/ch_ppocr_mobile_v2.0_det_infer/ --rec_batch_num=1 --image_dir=./inference/ch_det_data_50/all-sum-510/ --benchmark=True > ./test_tipc/output/cpp_infer_gpu_usetrt_True_precision_fp16_batchsize_1.log 2>&1 ! +...... +``` +可以很方便的根据results_cpp.log中的内容判定哪一个指令运行错误。 + ### 2.2 精度测试 diff --git a/test_tipc/docs/test_lite.md b/test_tipc/docs/test_lite.md index 021d54e5609349944fb9c0caff52fa9ed48ede8d..01ae0cb4b471f1219f88ffa9e2c11d50765233d3 100644 --- a/test_tipc/docs/test_lite.md +++ b/test_tipc/docs/test_lite.md @@ -20,6 +20,7 @@ Lite预测功能测试的主程序为`test_lite.sh`,可以测试基于Lite预 ## 2. 测试流程 +运行环境配置请参考[文档](./install.md)的内容配置TIPC的运行环境。 ### 2.1 功能测试 diff --git a/test_tipc/docs/test_paddle2onnx.md b/test_tipc/docs/test_paddle2onnx.md new file mode 100644 index 0000000000000000000000000000000000000000..5d784c5e93c3a93d00c256004de582dcbf357c45 --- /dev/null +++ b/test_tipc/docs/test_paddle2onnx.md @@ -0,0 +1,47 @@ +# Paddle2onnx预测功能测试 + +PaddleServing预测功能测试的主程序为`test_paddle2onnx.sh`,可以测试Paddle2ONNX的模型转化功能,并验证正确性。 + +## 1. 测试结论汇总 + +基于训练是否使用量化,进行本测试的模型可以分为`正常模型`和`量化模型`,这两类模型对应的Paddle2ONNX预测功能汇总如下: + +| 模型类型 |device | +| ---- | ---- | +| 正常模型 | GPU | +| 正常模型 | CPU | +| 量化模型 | GPU | +| 量化模型 | CPU | + +## 2. 测试流程 +### 2.1 功能测试 +先运行`prepare.sh`准备数据和模型,然后运行`test_paddle2onnx.sh`进行测试,最终在```test_tipc/output```目录下生成`paddle2onnx_infer_*.log`后缀的日志文件。 + +```shell +bash test_tipc/prepare.sh ./test_tipc/configs/ppocr_det_mobile_params.txt "paddle2onnx_infer" + +# 用法: +bash test_tipc/test_paddle2onnx.sh ./test_tipc/configs/ppocr_det_mobile_params.txt +``` + +#### 运行结果 + +各测试的运行情况会打印在 `test_tipc/output/results_paddle2onnx.log` 中: +运行成功时会输出: + +``` +Run successfully with command - paddle2onnx --model_dir=./inference/ch_ppocr_mobile_v2.0_det_infer/ --model_filename=inference.pdmodel --params_filename=inference.pdiparams --save_file=./inference/det_mobile_onnx/model.onnx --opset_version=10 --enable_onnx_checker=True! +Run successfully with command - python test_tipc/onnx_inference/predict_det.py --use_gpu=False --image_dir=./inference/ch_det_data_50/all-sum-510/ --det_model_dir=./inference/det_mobile_onnx/model.onnx 2>&1 ! +``` + +运行失败时会输出: + +``` +Run failed with command - paddle2onnx --model_dir=./inference/ch_ppocr_mobile_v2.0_det_infer/ --model_filename=inference.pdmodel --params_filename=inference.pdiparams --save_file=./inference/det_mobile_onnx/model.onnx --opset_version=10 --enable_onnx_checker=True! +... +``` + + +## 3. 更多教程 + +本文档为功能测试用,更详细的Paddle2onnx预测使用教程请参考:[Paddle2ONNX](https://github.com/PaddlePaddle/Paddle2ONNX) diff --git a/test_tipc/docs/test_serving.md b/test_tipc/docs/test_serving.md index fb0848bfb5e37e4b0af39fa9bb2b13b4046c9a50..f63d6c7107ce92807c53d81a22a582b09178a712 100644 --- a/test_tipc/docs/test_serving.md +++ b/test_tipc/docs/test_serving.md @@ -4,7 +4,7 @@ PaddleServing预测功能测试的主程序为`test_serving.sh`,可以测试 ## 1. 测试结论汇总 -基于训练是否使用量化,进行本测试的模型可以分为`正常模型`和`量化模型`,这两类模型对应的C++预测功能汇总如下: +基于训练是否使用量化,进行本测试的模型可以分为`正常模型`和`量化模型`,这两类模型对应的Serving预测功能汇总如下: | 模型类型 |device | batchsize | tensorrt | mkldnn | cpu多线程 | | ---- | ---- | ---- | :----: | :----: | :----: | @@ -14,6 +14,8 @@ PaddleServing预测功能测试的主程序为`test_serving.sh`,可以测试 | 量化模型 | CPU | 1/6 | - | int8 | 支持 | ## 2. 测试流程 +运行环境配置请参考[文档](./install.md)的内容配置TIPC的运行环境。 + ### 2.1 功能测试 先运行`prepare.sh`准备数据和模型,然后运行`test_serving.sh`进行测试,最终在```test_tipc/output```目录下生成`serving_infer_*.log`后缀的日志文件。 diff --git a/test_tipc/docs/test_train_inference_python.md b/test_tipc/docs/test_train_inference_python.md index 5376a699dfc82b908a7fefe21fd81630c2f82dd4..9028e67d093112d23cc7c5d9da10d185f1db9b5b 100644 --- a/test_tipc/docs/test_train_inference_python.md +++ b/test_tipc/docs/test_train_inference_python.md @@ -32,7 +32,7 @@ Linux端基础训练预测功能测试的主程序为`test_train_inference_pytho ## 2. 测试流程 -运行环境配置请参考[文档](./install.md)的内容配置tipc的运行环境。 +运行环境配置请参考[文档](./install.md)的内容配置TIPC的运行环境。 ### 2.1 安装依赖 - 安装PaddlePaddle >= 2.0 diff --git a/test_tipc/docs/win_test_train_inference_python.md b/test_tipc/docs/win_test_train_inference_python.md index e8395d00447770a8312b0ef9991b3f23587d0fa3..b1bb5470de36d14071ed86e29bebf29b104289dd 100644 --- a/test_tipc/docs/win_test_train_inference_python.md +++ b/test_tipc/docs/win_test_train_inference_python.md @@ -23,7 +23,7 @@ Windows端基础训练预测功能测试的主程序为`test_train_inference_pyt ## 2. 测试流程 -运行环境配置请参考[文档](./install.md)的内容配置tipc的运行环境。 +运行环境配置请参考[文档](./install.md)的内容配置TIPC的运行环境。 另外,由于Windows上和linux的路径管理方式不同,可以在win上安装gitbash终端,在gitbash中执行指令的方式和在linux端执行指令方式相同,更方便tipc测试。gitbash[下载链接](https://git-scm.com/download/win)。 diff --git a/test_tipc/prepare.sh b/test_tipc/prepare.sh index c9a38e01c93f9aac6cd06f4207f23f9c9f451c71..f3ad242538a9471af237c804eae343da06e2b9dd 100644 --- a/test_tipc/prepare.sh +++ b/test_tipc/prepare.sh @@ -201,3 +201,20 @@ if [ ${MODE} = "lite_infer" ];then tar -cf test_lite.tar ./test_lite && cp test_lite.tar ${current_dir} && cd ${current_dir} fi + +if [ ${MODE} = "paddle2onnx_infer" ];then + # prepare serving env + python_name=$(func_parser_value "${lines[2]}") + ${python_name} -m pip install install paddle2onnx + ${python_name} -m pip install onnxruntime==1.4.0 + # wget model + wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar + wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar + wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_infer.tar + wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_infer.tar + # wget data + wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/ch_det_data_50.tar + wget -nc -P ./inference/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/rec_inference.tar + cd ./inference && tar xf ch_ppocr_mobile_v2.0_det_infer.tar && tar xf ch_ppocr_mobile_v2.0_rec_infer.tar && tar xf ch_ppocr_server_v2.0_rec_infer.tar && tar xf ch_ppocr_server_v2.0_det_infer.tar && tar xf ch_det_data_50.tar && tar xf rec_inference.tar && cd ../ + +fi diff --git a/test_tipc/test_lite.sh b/test_tipc/test_lite.sh index 832003ba302fe86995e20029cdb019e72d9ce162..1fd9d3c7186207922c436e7981622c707a56596f 100644 --- a/test_tipc/test_lite.sh +++ b/test_tipc/test_lite.sh @@ -3,7 +3,7 @@ source ./common_func.sh export LD_LIBRARY_PATH=${PWD}:$LD_LIBRARY_PATH FILENAME=$1 -dataline=$(awk 'NR==101, NR==110{print}' $FILENAME) +dataline=$(awk 'NR==102, NR==111{print}' $FILENAME) echo $dataline # parser params IFS=$'\n' diff --git a/test_tipc/test_paddle2onnx.sh b/test_tipc/test_paddle2onnx.sh new file mode 100644 index 0000000000000000000000000000000000000000..5dc6e65ec81e6b8674877fc686c8b3650ce93a59 --- /dev/null +++ b/test_tipc/test_paddle2onnx.sh @@ -0,0 +1,76 @@ +#!/bin/bash +source test_tipc/common_func.sh + +FILENAME=$1 + +dataline=$(cat ${FILENAME}) +lines=(${dataline}) +# common params +model_name=$(func_parser_value "${lines[1]}") +python=$(func_parser_value "${lines[2]}") + + +# parser params +dataline=$(awk 'NR==111, NR==123{print}' $FILENAME) +IFS=$'\n' +lines=(${dataline}) + +# parser paddle2onnx +padlle2onnx_cmd=$(func_parser_value "${lines[1]}") +infer_model_dir_key=$(func_parser_key "${lines[2]}") +infer_model_dir_value=$(func_parser_value "${lines[2]}") +model_filename_key=$(func_parser_key "${lines[3]}") +model_filename_value=$(func_parser_value "${lines[3]}") +params_filename_key=$(func_parser_key "${lines[4]}") +params_filename_value=$(func_parser_value "${lines[4]}") +save_file_key=$(func_parser_key "${lines[5]}") +save_file_value=$(func_parser_value "${lines[5]}") +opset_version_key=$(func_parser_key "${lines[6]}") +opset_version_value=$(func_parser_value "${lines[6]}") +enable_onnx_checker_key=$(func_parser_key "${lines[7]}") +enable_onnx_checker_value=$(func_parser_value "${lines[7]}") +# parser onnx inference +inference_py=$(func_parser_value "${lines[8]}") +use_gpu_key=$(func_parser_key "${lines[9]}") +use_gpu_value=$(func_parser_value "${lines[9]}") +det_model_key=$(func_parser_key "${lines[10]}") +image_dir_key=$(func_parser_key "${lines[11]}") +image_dir_value=$(func_parser_value "${lines[11]}") + + +LOG_PATH="./test_tipc/output" +mkdir -p ./test_tipc/output +status_log="${LOG_PATH}/results_paddle2onnx.log" + + +function func_paddle2onnx(){ + IFS='|' + _script=$1 + + # paddle2onnx + _save_log_path="${LOG_PATH}/paddle2onnx_infer_cpu.log" + set_dirname=$(func_set_params "${infer_model_dir_key}" "${infer_model_dir_value}") + set_model_filename=$(func_set_params "${model_filename_key}" "${model_filename_value}") + set_params_filename=$(func_set_params "${params_filename_key}" "${params_filename_value}") + set_save_model=$(func_set_params "${save_file_key}" "${save_file_value}") + set_opset_version=$(func_set_params "${opset_version_key}" "${opset_version_value}") + set_enable_onnx_checker=$(func_set_params "${enable_onnx_checker_key}" "${enable_onnx_checker_value}") + trans_model_cmd="${padlle2onnx_cmd} ${set_dirname} ${set_model_filename} ${set_params_filename} ${set_save_model} ${set_opset_version} ${set_enable_onnx_checker}" + eval $trans_model_cmd + last_status=${PIPESTATUS[0]} + status_check $last_status "${trans_model_cmd}" "${status_log}" + # python inference + set_gpu=$(func_set_params "${use_gpu_key}" "${use_gpu_value}") + set_model_dir=$(func_set_params "${det_model_key}" "${save_file_value}") + set_img_dir=$(func_set_params "${image_dir_key}" "${image_dir_value}") + infer_model_cmd="${python} ${inference_py} ${set_gpu} ${set_img_dir} ${set_model_dir} --use_onnx=True > ${_save_log_path} 2>&1 " + eval $infer_model_cmd + status_check $last_status "${infer_model_cmd}" "${status_log}" +} + + +echo "################### run test ###################" + +export Count=0 +IFS="|" +func_paddle2onnx \ No newline at end of file diff --git a/test_tipc/test_serving.sh b/test_tipc/test_serving.sh index be7b594c3848c423937c59336ce3bf686f8f228d..9b1e90ed6116f32e232657e30277a747a70904c7 100644 --- a/test_tipc/test_serving.sh +++ b/test_tipc/test_serving.sh @@ -2,7 +2,7 @@ source test_tipc/common_func.sh FILENAME=$1 -dataline=$(awk 'NR==67, NR==83{print}' $FILENAME) +dataline=$(awk 'NR==67, NR==84{print}' $FILENAME) # parser params IFS=$'\n' @@ -35,6 +35,8 @@ web_use_trt_list=$(func_parser_value "${lines[14]}") web_precision_key=$(func_parser_key "${lines[15]}") web_precision_list=$(func_parser_value "${lines[15]}") pipeline_py=$(func_parser_value "${lines[16]}") +image_dir_key=$(func_parser_key "${lines[17]}") +image_dir_value=$(func_parser_value "${lines[17]}") LOG_PATH="../../test_tipc/output" mkdir -p ./test_tipc/output @@ -51,67 +53,98 @@ function func_serving(){ set_params_filename=$(func_set_params "${params_filename_key}" "${params_filename_value}") set_serving_server=$(func_set_params "${serving_server_key}" "${serving_server_value}") set_serving_client=$(func_set_params "${serving_client_key}" "${serving_client_value}") + set_image_dir=$(func_set_params "${image_dir_key}" "${image_dir_value}") trans_model_cmd="${python} ${trans_model_py} ${set_dirname} ${set_model_filename} ${set_params_filename} ${set_serving_server} ${set_serving_client}" eval $trans_model_cmd cd ${serving_dir_value} echo $PWD unset https_proxy unset http_proxy - for use_gpu in ${web_use_gpu_list[*]}; do - echo ${ues_gpu} - if [ ${use_gpu} = "null" ]; then - for use_mkldnn in ${web_use_mkldnn_list[*]}; do - if [ ${use_mkldnn} = "False" ]; then - continue - fi - for threads in ${web_cpu_threads_list[*]}; do - _save_log_path="${LOG_PATH}/server_infer_cpu_usemkldnn_${use_mkldnn}_threads_${threads}_batchsize_1.log" - set_cpu_threads=$(func_set_params "${web_cpu_threads_key}" "${threads}") - web_service_cmd="${python} ${web_service_py} ${web_use_gpu_key}=${use_gpu} ${web_use_mkldnn_key}=${use_mkldnn} ${set_cpu_threads} &" - eval $web_service_cmd - sleep 2s - pipeline_cmd="${python} ${pipeline_py} > ${_save_log_path} 2>&1 " - eval $pipeline_cmd - last_status=${PIPESTATUS[0]} - eval "cat ${_save_log_path}" - status_check $last_status "${pipeline_cmd}" "${status_log}" - PID=$! - kill $PID - sleep 2s - ps ux | grep -E 'web_service|pipeline' | awk '{print $2}' | xargs kill -s 9 - done - done - elif [ ${use_gpu} = "0" ]; then - for use_trt in ${web_use_trt_list[*]}; do - for precision in ${web_precision_list[*]}; do - if [[ ${_flag_quant} = "False" ]] && [[ ${precision} =~ "int8" ]]; then - continue - fi - if [[ ${precision} =~ "fp16" || ${precision} =~ "int8" ]] && [ ${use_trt} = "False" ]; then - continue - fi - if [[ ${use_trt} = "False" || ${precision} =~ "int8" ]] && [[ ${_flag_quant} = "True" ]]; then - continue - fi - _save_log_path="${LOG_PATH}/server_infer_gpu_usetrt_${use_trt}_precision_${precision}_batchsize_1.log" - set_tensorrt=$(func_set_params "${web_use_trt_key}" "${use_trt}") - set_precision=$(func_set_params "${web_precision_key}" "${precision}") - web_service_cmd="${python} ${web_service_py} ${web_use_gpu_key}=${use_gpu} ${set_tensorrt} ${set_precision} & " + for python in ${python[*]}; do + if [ ${python} = "cpp"]; then + for use_gpu in ${web_use_gpu_list[*]}; do + if [ ${use_gpu} = "null" ]; then + web_service_cpp_cmd="${python} -m paddle_serving_server.serve --model ppocr_det_mobile_2.0_serving/ ppocr_rec_mobile_2.0_serving/ --port 9293" eval $web_service_cmd sleep 2s - pipeline_cmd="${python} ${pipeline_py} > ${_save_log_path} 2>&1" + _save_log_path="${LOG_PATH}/server_infer_cpp_cpu_pipeline_usemkldnn_False_threads_4_batchsize_1.log" + pipeline_cmd="${python} ocr_cpp_client.py ppocr_det_mobile_2.0_client/ ppocr_rec_mobile_2.0_client/" eval $pipeline_cmd - last_status=${PIPESTATUS[0]} - eval "cat ${_save_log_path}" status_check $last_status "${pipeline_cmd}" "${status_log}" - PID=$! - kill $PID sleep 2s ps ux | grep -E 'web_service|pipeline' | awk '{print $2}' | xargs kill -s 9 - done + else + web_service_cpp_cmd="${python} -m paddle_serving_server.serve --model ppocr_det_mobile_2.0_serving/ ppocr_rec_mobile_2.0_serving/ --port 9293 --gpu_id=0" + eval $web_service_cmd + sleep 2s + _save_log_path="${LOG_PATH}/server_infer_cpp_cpu_pipeline_usemkldnn_False_threads_4_batchsize_1.log" + pipeline_cmd="${python} ocr_cpp_client.py ppocr_det_mobile_2.0_client/ ppocr_rec_mobile_2.0_client/" + eval $pipeline_cmd + status_check $last_status "${pipeline_cmd}" "${status_log}" + sleep 2s + ps ux | grep -E 'web_service|pipeline' | awk '{print $2}' | xargs kill -s 9 + fi done else - echo "Does not support hardware other than CPU and GPU Currently!" + # python serving + for use_gpu in ${web_use_gpu_list[*]}; do + echo ${ues_gpu} + if [ ${use_gpu} = "null" ]; then + for use_mkldnn in ${web_use_mkldnn_list[*]}; do + if [ ${use_mkldnn} = "False" ]; then + continue + fi + for threads in ${web_cpu_threads_list[*]}; do + set_cpu_threads=$(func_set_params "${web_cpu_threads_key}" "${threads}") + web_service_cmd="${python} ${web_service_py} ${web_use_gpu_key}=${use_gpu} ${web_use_mkldnn_key}=${use_mkldnn} ${set_cpu_threads} &" + eval $web_service_cmd + sleep 2s + for pipeline in ${pipeline_py[*]}; do + _save_log_path="${LOG_PATH}/server_infer_cpu_${pipeline%_client*}_usemkldnn_${use_mkldnn}_threads_${threads}_batchsize_1.log" + pipeline_cmd="${python} ${pipeline} ${set_image_dir} > ${_save_log_path} 2>&1 " + eval $pipeline_cmd + last_status=${PIPESTATUS[0]} + eval "cat ${_save_log_path}" + status_check $last_status "${pipeline_cmd}" "${status_log}" + sleep 2s + done + ps ux | grep -E 'web_service|pipeline' | awk '{print $2}' | xargs kill -s 9 + done + done + elif [ ${use_gpu} = "0" ]; then + for use_trt in ${web_use_trt_list[*]}; do + for precision in ${web_precision_list[*]}; do + if [[ ${_flag_quant} = "False" ]] && [[ ${precision} =~ "int8" ]]; then + continue + fi + if [[ ${precision} =~ "fp16" || ${precision} =~ "int8" ]] && [ ${use_trt} = "False" ]; then + continue + fi + if [[ ${use_trt} = "False" || ${precision} =~ "int8" ]] && [[ ${_flag_quant} = "True" ]]; then + continue + fi + set_tensorrt=$(func_set_params "${web_use_trt_key}" "${use_trt}") + set_precision=$(func_set_params "${web_precision_key}" "${precision}") + web_service_cmd="${python} ${web_service_py} ${web_use_gpu_key}=${use_gpu} ${set_tensorrt} ${set_precision} & " + eval $web_service_cmd + + sleep 2s + for pipeline in ${pipeline_py[*]}; do + _save_log_path="${LOG_PATH}/server_infer_gpu_${pipeline%_client*}_usetrt_${use_trt}_precision_${precision}_batchsize_1.log" + pipeline_cmd="${python} ${pipeline} ${set_image_dir}> ${_save_log_path} 2>&1" + eval $pipeline_cmd + last_status=${PIPESTATUS[0]} + eval "cat ${_save_log_path}" + status_check $last_status "${pipeline_cmd}" "${status_log}" + sleep 2s + done + ps ux | grep -E 'web_service|pipeline' | awk '{print $2}' | xargs kill -s 9 + done + done + else + echo "Does not support hardware other than CPU and GPU Currently!" + fi + done fi done } diff --git a/test_tipc/test_train_inference_python.sh b/test_tipc/test_train_inference_python.sh index a9be33ea45cc56b5478e9135451849e25888f8d1..0d4e182b2832f65cec08beffe99055603b90982b 100644 --- a/test_tipc/test_train_inference_python.sh +++ b/test_tipc/test_train_inference_python.sh @@ -90,7 +90,7 @@ infer_value1=$(func_parser_value "${lines[50]}") # parser klquant_infer if [ ${MODE} = "klquant_whole_infer" ]; then - dataline=$(awk 'NR==82, NR==98{print}' $FILENAME) + dataline=$(awk 'NR==85 NR==101{print}' $FILENAME) lines=(${dataline}) # parser inference model infer_model_dir_list=$(func_parser_value "${lines[1]}") @@ -316,7 +316,7 @@ else elif [ ${#ips} -le 26 ];then # train with multi-gpu cmd="${python} -m paddle.distributed.launch --gpus=${gpu} ${run_train} ${set_use_gpu} ${set_save_model} ${set_epoch} ${set_pretrain} ${set_autocast} ${set_batchsize} ${set_train_params1} ${set_amp_config}" else # train with multi-machine - cmd="${python} -m paddle.distributed.launch --ips=${ips} --gpus=${gpu} ${set_use_gpu} ${run_train} ${set_save_model} ${set_pretrain} ${set_epoch} ${set_autocast} ${set_batchsize} ${set_train_params1} ${set_amp_config}" + cmd="${python} -m paddle.distributed.launch --ips=${ips} --gpus=${gpu} ${run_train} ${set_use_gpu} ${set_save_model} ${set_pretrain} ${set_epoch} ${set_autocast} ${set_batchsize} ${set_train_params1} ${set_amp_config}" fi # run train eval "unset CUDA_VISIBLE_DEVICES" diff --git a/tools/infer/predict_cls.py b/tools/infer/predict_cls.py index 1c68494861e60b4aaef541a4e247071944cf420c..a25cac2600e67667badc76c648c1fcda12981a0f 100755 --- a/tools/infer/predict_cls.py +++ b/tools/infer/predict_cls.py @@ -47,6 +47,7 @@ class TextClassifier(object): self.postprocess_op = build_post_process(postprocess_params) self.predictor, self.input_tensor, self.output_tensors, _ = \ utility.create_predictor(args, 'cls', logger) + self.use_onnx = args.use_onnx def resize_norm_img(self, img): imgC, imgH, imgW = self.cls_image_shape @@ -100,10 +101,16 @@ class TextClassifier(object): norm_img_batch = np.concatenate(norm_img_batch) norm_img_batch = norm_img_batch.copy() - self.input_tensor.copy_from_cpu(norm_img_batch) - self.predictor.run() - prob_out = self.output_tensors[0].copy_to_cpu() - self.predictor.try_shrink_memory() + if self.use_onnx: + input_dict = {} + input_dict[self.input_tensor.name] = norm_img_batch + outputs = self.predictor.run(self.output_tensors, input_dict) + prob_out = outputs[0] + else: + self.input_tensor.copy_from_cpu(norm_img_batch) + self.predictor.run() + prob_out = self.output_tensors[0].copy_to_cpu() + self.predictor.try_shrink_memory() cls_result = self.postprocess_op(prob_out) elapse += time.time() - starttime for rno in range(len(cls_result)): diff --git a/tools/infer/predict_det.py b/tools/infer/predict_det.py index b24ad2bbb504caf1f262b4e47625348ce32d6fce..5dfe8d648f06f6382e8e101a6002f7f1b7441323 100755 --- a/tools/infer/predict_det.py +++ b/tools/infer/predict_det.py @@ -38,6 +38,7 @@ class TextDetector(object): def __init__(self, args): self.args = args self.det_algorithm = args.det_algorithm + self.use_onnx = args.use_onnx pre_process_list = [{ 'DetResizeForTest': { 'limit_side_len': args.det_limit_side_len, @@ -100,7 +101,12 @@ class TextDetector(object): else: logger.info("unknown det_algorithm:{}".format(self.det_algorithm)) sys.exit(0) - + if self.use_onnx: + pre_process_list[0] = { + 'DetResizeForTest': { + 'image_shape': [640, 640] + } + } self.preprocess_op = create_operators(pre_process_list) self.postprocess_op = build_post_process(postprocess_params) self.predictor, self.input_tensor, self.output_tensors, self.config = utility.create_predictor( @@ -198,15 +204,19 @@ class TextDetector(object): if self.args.benchmark: self.autolog.times.stamp() - - self.input_tensor.copy_from_cpu(img) - self.predictor.run() - outputs = [] - for output_tensor in self.output_tensors: - output = output_tensor.copy_to_cpu() - outputs.append(output) - if self.args.benchmark: - self.autolog.times.stamp() + if self.use_onnx: + input_dict = {} + input_dict[self.input_tensor.name] = img + outputs = self.predictor.run(self.output_tensors, input_dict) + else: + self.input_tensor.copy_from_cpu(img) + self.predictor.run() + outputs = [] + for output_tensor in self.output_tensors: + output = output_tensor.copy_to_cpu() + outputs.append(output) + if self.args.benchmark: + self.autolog.times.stamp() preds = {} if self.det_algorithm == "EAST": diff --git a/tools/infer/predict_e2e.py b/tools/infer/predict_e2e.py index 5029d6059346a00062418d8d1b6cb029b0110643..08b87f36b0670e98e54c3f38c9328fe9462a6d0f 100755 --- a/tools/infer/predict_e2e.py +++ b/tools/infer/predict_e2e.py @@ -38,6 +38,7 @@ class TextE2E(object): def __init__(self, args): self.args = args self.e2e_algorithm = args.e2e_algorithm + self.use_onnx = args.use_onnx pre_process_list = [{ 'E2EResizeForTest': {} }, { @@ -106,21 +107,31 @@ class TextE2E(object): img = img.copy() starttime = time.time() - self.input_tensor.copy_from_cpu(img) - self.predictor.run() - outputs = [] - for output_tensor in self.output_tensors: - output = output_tensor.copy_to_cpu() - outputs.append(output) - - preds = {} - if self.e2e_algorithm == 'PGNet': + if self.use_onnx: + input_dict = {} + input_dict[self.input_tensor.name] = img + outputs = self.predictor.run(self.output_tensors, input_dict) + preds = {} preds['f_border'] = outputs[0] preds['f_char'] = outputs[1] preds['f_direction'] = outputs[2] preds['f_score'] = outputs[3] else: - raise NotImplementedError + self.input_tensor.copy_from_cpu(img) + self.predictor.run() + outputs = [] + for output_tensor in self.output_tensors: + output = output_tensor.copy_to_cpu() + outputs.append(output) + + preds = {} + if self.e2e_algorithm == 'PGNet': + preds['f_border'] = outputs[0] + preds['f_char'] = outputs[1] + preds['f_direction'] = outputs[2] + preds['f_score'] = outputs[3] + else: + raise NotImplementedError post_result = self.postprocess_op(preds, shape_list) points, strs = post_result['points'], post_result['texts'] dt_boxes = self.filter_tag_det_res_only_clip(points, ori_im.shape) diff --git a/tools/infer/predict_rec.py b/tools/infer/predict_rec.py index 6cc91b56a31708986ffbe35b649a67c4385c22b2..41982e3403b11dd4a1893f89af11a9201e0e15d7 100755 --- a/tools/infer/predict_rec.py +++ b/tools/infer/predict_rec.py @@ -73,6 +73,7 @@ class TextRecognizer(object): self.predictor, self.input_tensor, self.output_tensors, self.config = \ utility.create_predictor(args, 'rec', logger) self.benchmark = args.benchmark + self.use_onnx = args.use_onnx if args.benchmark: import auto_log pid = os.getpid() @@ -107,6 +108,8 @@ class TextRecognizer(object): assert imgC == img.shape[2] imgW = int((32 * max_wh_ratio)) + if self.use_onnx: + imgW = 100 h, w = img.shape[:2] ratio = w / float(h) if math.ceil(imgH * ratio) > imgW: @@ -296,51 +299,72 @@ class TextRecognizer(object): gsrm_slf_attn_bias1_list, gsrm_slf_attn_bias2_list, ] - input_names = self.predictor.get_input_names() - for i in range(len(input_names)): - input_tensor = self.predictor.get_input_handle(input_names[ - i]) - input_tensor.copy_from_cpu(inputs[i]) - self.predictor.run() - outputs = [] - for output_tensor in self.output_tensors: - output = output_tensor.copy_to_cpu() - outputs.append(output) - if self.benchmark: - self.autolog.times.stamp() - preds = {"predict": outputs[2]} + if self.use_onnx: + input_dict = {} + input_dict[self.input_tensor.name] = norm_img_batch + outputs = self.predictor.run(self.output_tensors, + input_dict) + preds = {"predict": outputs[2]} + else: + input_names = self.predictor.get_input_names() + for i in range(len(input_names)): + input_tensor = self.predictor.get_input_handle( + input_names[i]) + input_tensor.copy_from_cpu(inputs[i]) + self.predictor.run() + outputs = [] + for output_tensor in self.output_tensors: + output = output_tensor.copy_to_cpu() + outputs.append(output) + if self.benchmark: + self.autolog.times.stamp() + preds = {"predict": outputs[2]} elif self.rec_algorithm == "SAR": valid_ratios = np.concatenate(valid_ratios) inputs = [ norm_img_batch, valid_ratios, ] - input_names = self.predictor.get_input_names() - for i in range(len(input_names)): - input_tensor = self.predictor.get_input_handle(input_names[ - i]) - input_tensor.copy_from_cpu(inputs[i]) - self.predictor.run() - outputs = [] - for output_tensor in self.output_tensors: - output = output_tensor.copy_to_cpu() - outputs.append(output) - if self.benchmark: - self.autolog.times.stamp() - preds = outputs[0] - else: - self.input_tensor.copy_from_cpu(norm_img_batch) - self.predictor.run() - outputs = [] - for output_tensor in self.output_tensors: - output = output_tensor.copy_to_cpu() - outputs.append(output) - if self.benchmark: - self.autolog.times.stamp() - if len(outputs) != 1: - preds = outputs + if self.use_onnx: + input_dict = {} + input_dict[self.input_tensor.name] = norm_img_batch + outputs = self.predictor.run(self.output_tensors, + input_dict) + preds = outputs[0] else: + input_names = self.predictor.get_input_names() + for i in range(len(input_names)): + input_tensor = self.predictor.get_input_handle( + input_names[i]) + input_tensor.copy_from_cpu(inputs[i]) + self.predictor.run() + outputs = [] + for output_tensor in self.output_tensors: + output = output_tensor.copy_to_cpu() + outputs.append(output) + if self.benchmark: + self.autolog.times.stamp() preds = outputs[0] + else: + if self.use_onnx: + input_dict = {} + input_dict[self.input_tensor.name] = norm_img_batch + outputs = self.predictor.run(self.output_tensors, + input_dict) + preds = outputs[0] + else: + self.input_tensor.copy_from_cpu(norm_img_batch) + self.predictor.run() + outputs = [] + for output_tensor in self.output_tensors: + output = output_tensor.copy_to_cpu() + outputs.append(output) + if self.benchmark: + self.autolog.times.stamp() + if len(outputs) != 1: + preds = outputs + else: + preds = outputs[0] rec_result = self.postprocess_op(preds) for rno in range(len(rec_result)): rec_res[indices[beg_img_no + rno]] = rec_result[rno] diff --git a/tools/infer/utility.py b/tools/infer/utility.py index 41a3c0f14b6378751a367a3709ad7943ee981a4e..58170e393cdc9d8441408a89c84aa6f88d683db3 100755 --- a/tools/infer/utility.py +++ b/tools/infer/utility.py @@ -121,6 +121,7 @@ def init_args(): parser.add_argument("--save_log_path", type=str, default="./log_output/") parser.add_argument("--show_log", type=str2bool, default=True) + parser.add_argument("--use_onnx", type=str2bool, default=False) return parser @@ -144,152 +145,163 @@ def create_predictor(args, mode, logger): if model_dir is None: logger.info("not find {} model file path {}".format(mode, model_dir)) sys.exit(0) - model_file_path = model_dir + "/inference.pdmodel" - params_file_path = model_dir + "/inference.pdiparams" - if not os.path.exists(model_file_path): - raise ValueError("not find model file path {}".format(model_file_path)) - if not os.path.exists(params_file_path): - raise ValueError("not find params file path {}".format( - params_file_path)) - - config = inference.Config(model_file_path, params_file_path) - - if hasattr(args, 'precision'): - if args.precision == "fp16" and args.use_tensorrt: - precision = inference.PrecisionType.Half - elif args.precision == "int8": - precision = inference.PrecisionType.Int8 - else: - precision = inference.PrecisionType.Float32 + if args.use_onnx: + import onnxruntime as ort + model_file_path = model_dir + if not os.path.exists(model_file_path): + raise ValueError("not find model file path {}".format( + model_file_path)) + sess = ort.InferenceSession(model_file_path) + return sess, sess.get_inputs()[0], None, None + else: - precision = inference.PrecisionType.Float32 - - if args.use_gpu: - gpu_id = get_infer_gpuid() - if gpu_id is None: - raise ValueError( - "Not found GPU in current device. Please check your device or set args.use_gpu as False" - ) - config.enable_use_gpu(args.gpu_mem, 0) - if args.use_tensorrt: - config.enable_tensorrt_engine( - precision_mode=precision, - max_batch_size=args.max_batch_size, - min_subgraph_size=args.min_subgraph_size) - # skip the minmum trt subgraph - if mode == "det": - min_input_shape = { - "x": [1, 3, 50, 50], - "conv2d_92.tmp_0": [1, 120, 20, 20], - "conv2d_91.tmp_0": [1, 24, 10, 10], - "conv2d_59.tmp_0": [1, 96, 20, 20], - "nearest_interp_v2_1.tmp_0": [1, 256, 10, 10], - "nearest_interp_v2_2.tmp_0": [1, 256, 20, 20], - "conv2d_124.tmp_0": [1, 256, 20, 20], - "nearest_interp_v2_3.tmp_0": [1, 64, 20, 20], - "nearest_interp_v2_4.tmp_0": [1, 64, 20, 20], - "nearest_interp_v2_5.tmp_0": [1, 64, 20, 20], - "elementwise_add_7": [1, 56, 2, 2], - "nearest_interp_v2_0.tmp_0": [1, 256, 2, 2] - } - max_input_shape = { - "x": [1, 3, 2000, 2000], - "conv2d_92.tmp_0": [1, 120, 400, 400], - "conv2d_91.tmp_0": [1, 24, 200, 200], - "conv2d_59.tmp_0": [1, 96, 400, 400], - "nearest_interp_v2_1.tmp_0": [1, 256, 200, 200], - "conv2d_124.tmp_0": [1, 256, 400, 400], - "nearest_interp_v2_2.tmp_0": [1, 256, 400, 400], - "nearest_interp_v2_3.tmp_0": [1, 64, 400, 400], - "nearest_interp_v2_4.tmp_0": [1, 64, 400, 400], - "nearest_interp_v2_5.tmp_0": [1, 64, 400, 400], - "elementwise_add_7": [1, 56, 400, 400], - "nearest_interp_v2_0.tmp_0": [1, 256, 400, 400] - } - opt_input_shape = { - "x": [1, 3, 640, 640], - "conv2d_92.tmp_0": [1, 120, 160, 160], - "conv2d_91.tmp_0": [1, 24, 80, 80], - "conv2d_59.tmp_0": [1, 96, 160, 160], - "nearest_interp_v2_1.tmp_0": [1, 256, 80, 80], - "nearest_interp_v2_2.tmp_0": [1, 256, 160, 160], - "conv2d_124.tmp_0": [1, 256, 160, 160], - "nearest_interp_v2_3.tmp_0": [1, 64, 160, 160], - "nearest_interp_v2_4.tmp_0": [1, 64, 160, 160], - "nearest_interp_v2_5.tmp_0": [1, 64, 160, 160], - "elementwise_add_7": [1, 56, 40, 40], - "nearest_interp_v2_0.tmp_0": [1, 256, 40, 40] - } - min_pact_shape = { - "nearest_interp_v2_26.tmp_0": [1, 256, 20, 20], - "nearest_interp_v2_27.tmp_0": [1, 64, 20, 20], - "nearest_interp_v2_28.tmp_0": [1, 64, 20, 20], - "nearest_interp_v2_29.tmp_0": [1, 64, 20, 20] - } - max_pact_shape = { - "nearest_interp_v2_26.tmp_0": [1, 256, 400, 400], - "nearest_interp_v2_27.tmp_0": [1, 64, 400, 400], - "nearest_interp_v2_28.tmp_0": [1, 64, 400, 400], - "nearest_interp_v2_29.tmp_0": [1, 64, 400, 400] - } - opt_pact_shape = { - "nearest_interp_v2_26.tmp_0": [1, 256, 160, 160], - "nearest_interp_v2_27.tmp_0": [1, 64, 160, 160], - "nearest_interp_v2_28.tmp_0": [1, 64, 160, 160], - "nearest_interp_v2_29.tmp_0": [1, 64, 160, 160] - } - min_input_shape.update(min_pact_shape) - max_input_shape.update(max_pact_shape) - opt_input_shape.update(opt_pact_shape) - elif mode == "rec": - min_input_shape = {"x": [1, 3, 32, 10]} - max_input_shape = {"x": [args.rec_batch_num, 3, 32, 2000]} - opt_input_shape = {"x": [args.rec_batch_num, 3, 32, 320]} - elif mode == "cls": - min_input_shape = {"x": [1, 3, 48, 10]} - max_input_shape = {"x": [args.rec_batch_num, 3, 48, 2000]} - opt_input_shape = {"x": [args.rec_batch_num, 3, 48, 320]} + model_file_path = model_dir + "/inference.pdmodel" + params_file_path = model_dir + "/inference.pdiparams" + if not os.path.exists(model_file_path): + raise ValueError("not find model file path {}".format( + model_file_path)) + if not os.path.exists(params_file_path): + raise ValueError("not find params file path {}".format( + params_file_path)) + + config = inference.Config(model_file_path, params_file_path) + + if hasattr(args, 'precision'): + if args.precision == "fp16" and args.use_tensorrt: + precision = inference.PrecisionType.Half + elif args.precision == "int8": + precision = inference.PrecisionType.Int8 + else: + precision = inference.PrecisionType.Float32 else: - min_input_shape = {"x": [1, 3, 10, 10]} - max_input_shape = {"x": [1, 3, 1000, 1000]} - opt_input_shape = {"x": [1, 3, 500, 500]} - config.set_trt_dynamic_shape_info(min_input_shape, max_input_shape, - opt_input_shape) + precision = inference.PrecisionType.Float32 + + if args.use_gpu: + gpu_id = get_infer_gpuid() + if gpu_id is None: + logger.warning( + "GPU is not found in current device by nvidia-smi. Please check your device or ignore it if run on jeston." + ) + config.enable_use_gpu(args.gpu_mem, 0) + if args.use_tensorrt: + config.enable_tensorrt_engine( + precision_mode=precision, + max_batch_size=args.max_batch_size, + min_subgraph_size=args.min_subgraph_size) + # skip the minmum trt subgraph + if mode == "det": + min_input_shape = { + "x": [1, 3, 50, 50], + "conv2d_92.tmp_0": [1, 120, 20, 20], + "conv2d_91.tmp_0": [1, 24, 10, 10], + "conv2d_59.tmp_0": [1, 96, 20, 20], + "nearest_interp_v2_1.tmp_0": [1, 256, 10, 10], + "nearest_interp_v2_2.tmp_0": [1, 256, 20, 20], + "conv2d_124.tmp_0": [1, 256, 20, 20], + "nearest_interp_v2_3.tmp_0": [1, 64, 20, 20], + "nearest_interp_v2_4.tmp_0": [1, 64, 20, 20], + "nearest_interp_v2_5.tmp_0": [1, 64, 20, 20], + "elementwise_add_7": [1, 56, 2, 2], + "nearest_interp_v2_0.tmp_0": [1, 256, 2, 2] + } + max_input_shape = { + "x": [1, 3, 2000, 2000], + "conv2d_92.tmp_0": [1, 120, 400, 400], + "conv2d_91.tmp_0": [1, 24, 200, 200], + "conv2d_59.tmp_0": [1, 96, 400, 400], + "nearest_interp_v2_1.tmp_0": [1, 256, 200, 200], + "conv2d_124.tmp_0": [1, 256, 400, 400], + "nearest_interp_v2_2.tmp_0": [1, 256, 400, 400], + "nearest_interp_v2_3.tmp_0": [1, 64, 400, 400], + "nearest_interp_v2_4.tmp_0": [1, 64, 400, 400], + "nearest_interp_v2_5.tmp_0": [1, 64, 400, 400], + "elementwise_add_7": [1, 56, 400, 400], + "nearest_interp_v2_0.tmp_0": [1, 256, 400, 400] + } + opt_input_shape = { + "x": [1, 3, 640, 640], + "conv2d_92.tmp_0": [1, 120, 160, 160], + "conv2d_91.tmp_0": [1, 24, 80, 80], + "conv2d_59.tmp_0": [1, 96, 160, 160], + "nearest_interp_v2_1.tmp_0": [1, 256, 80, 80], + "nearest_interp_v2_2.tmp_0": [1, 256, 160, 160], + "conv2d_124.tmp_0": [1, 256, 160, 160], + "nearest_interp_v2_3.tmp_0": [1, 64, 160, 160], + "nearest_interp_v2_4.tmp_0": [1, 64, 160, 160], + "nearest_interp_v2_5.tmp_0": [1, 64, 160, 160], + "elementwise_add_7": [1, 56, 40, 40], + "nearest_interp_v2_0.tmp_0": [1, 256, 40, 40] + } + min_pact_shape = { + "nearest_interp_v2_26.tmp_0": [1, 256, 20, 20], + "nearest_interp_v2_27.tmp_0": [1, 64, 20, 20], + "nearest_interp_v2_28.tmp_0": [1, 64, 20, 20], + "nearest_interp_v2_29.tmp_0": [1, 64, 20, 20] + } + max_pact_shape = { + "nearest_interp_v2_26.tmp_0": [1, 256, 400, 400], + "nearest_interp_v2_27.tmp_0": [1, 64, 400, 400], + "nearest_interp_v2_28.tmp_0": [1, 64, 400, 400], + "nearest_interp_v2_29.tmp_0": [1, 64, 400, 400] + } + opt_pact_shape = { + "nearest_interp_v2_26.tmp_0": [1, 256, 160, 160], + "nearest_interp_v2_27.tmp_0": [1, 64, 160, 160], + "nearest_interp_v2_28.tmp_0": [1, 64, 160, 160], + "nearest_interp_v2_29.tmp_0": [1, 64, 160, 160] + } + min_input_shape.update(min_pact_shape) + max_input_shape.update(max_pact_shape) + opt_input_shape.update(opt_pact_shape) + elif mode == "rec": + min_input_shape = {"x": [1, 3, 32, 10]} + max_input_shape = {"x": [args.rec_batch_num, 3, 32, 2000]} + opt_input_shape = {"x": [args.rec_batch_num, 3, 32, 320]} + elif mode == "cls": + min_input_shape = {"x": [1, 3, 48, 10]} + max_input_shape = {"x": [args.rec_batch_num, 3, 48, 2000]} + opt_input_shape = {"x": [args.rec_batch_num, 3, 48, 320]} + else: + min_input_shape = {"x": [1, 3, 10, 10]} + max_input_shape = {"x": [1, 3, 1000, 1000]} + opt_input_shape = {"x": [1, 3, 500, 500]} + config.set_trt_dynamic_shape_info(min_input_shape, max_input_shape, + opt_input_shape) - else: - config.disable_gpu() - if hasattr(args, "cpu_threads"): - config.set_cpu_math_library_num_threads(args.cpu_threads) else: - # default cpu threads as 10 - config.set_cpu_math_library_num_threads(10) - if args.enable_mkldnn: - # cache 10 different shapes for mkldnn to avoid memory leak - config.set_mkldnn_cache_capacity(10) - config.enable_mkldnn() - if args.precision == "fp16": - config.enable_mkldnn_bfloat16() - # enable memory optim - config.enable_memory_optim() - config.disable_glog_info() - - config.delete_pass("conv_transpose_eltwiseadd_bn_fuse_pass") - if mode == 'table': - config.delete_pass("fc_fuse_pass") # not supported for table - config.switch_use_feed_fetch_ops(False) - config.switch_ir_optim(True) - - # create predictor - predictor = inference.create_predictor(config) - input_names = predictor.get_input_names() - for name in input_names: - input_tensor = predictor.get_input_handle(name) - output_names = predictor.get_output_names() - output_tensors = [] - for output_name in output_names: - output_tensor = predictor.get_output_handle(output_name) - output_tensors.append(output_tensor) - return predictor, input_tensor, output_tensors, config + config.disable_gpu() + if hasattr(args, "cpu_threads"): + config.set_cpu_math_library_num_threads(args.cpu_threads) + else: + # default cpu threads as 10 + config.set_cpu_math_library_num_threads(10) + if args.enable_mkldnn: + # cache 10 different shapes for mkldnn to avoid memory leak + config.set_mkldnn_cache_capacity(10) + config.enable_mkldnn() + if args.precision == "fp16": + config.enable_mkldnn_bfloat16() + # enable memory optim + config.enable_memory_optim() + config.disable_glog_info() + + config.delete_pass("conv_transpose_eltwiseadd_bn_fuse_pass") + if mode == 'table': + config.delete_pass("fc_fuse_pass") # not supported for table + config.switch_use_feed_fetch_ops(False) + config.switch_ir_optim(True) + + # create predictor + predictor = inference.create_predictor(config) + input_names = predictor.get_input_names() + for name in input_names: + input_tensor = predictor.get_input_handle(name) + output_names = predictor.get_output_names() + output_tensors = [] + for output_name in output_names: + output_tensor = predictor.get_output_handle(output_name) + output_tensors.append(output_tensor) + return predictor, input_tensor, output_tensors, config def get_infer_gpuid():