diff --git a/benchmark/run_benchmark_det.sh b/benchmark/run_benchmark_det.sh index 46144b43a09baf787216728eabaab5f8548fa924..54263e953f3f758b318df147d34ee942a247ed18 100644 --- a/benchmark/run_benchmark_det.sh +++ b/benchmark/run_benchmark_det.sh @@ -5,28 +5,36 @@ set -xe function _set_params(){ run_mode=${1:-"sp"} # 单卡sp|多卡mp batch_size=${2:-"64"} - fp_item=${3:-"fp32"} # fp32|fp16 - max_iter=${4:-"10"} # 可选,如果需要修改代码提前中断 - model_name=${5:-"model_name"} + fp_item=${3:-"fp32"} # fp32|fp16 + max_epoch=${4:-"10"} # 可选,如果需要修改代码提前中断 + model_item=${5:-"model_item"} run_log_path=${TRAIN_LOG_DIR:-$(pwd)} # TRAIN_LOG_DIR 后续QA设置该参数 - +# 日志解析所需参数 + base_batch_size=${batch_size} + mission_name="OCR" + direction_id="0" + ips_unit="images/sec" + skip_steps=2 # 解析日志,有些模型前几个step耗时长,需要跳过 (必填) + keyword="ips:" # 解析日志,筛选出数据所在行的关键字 (必填) + index="1" + model_name=${model_item}_bs${batch_size}_${fp_item} # model_item 用于yml文件名匹配,model_name 用于数据入库前端展示 # 以下不用修改 device=${CUDA_VISIBLE_DEVICES//,/ } arr=(${device}) num_gpu_devices=${#arr[*]} - log_file=${run_log_path}/${model_name}_${run_mode}_bs${batch_size}_${fp_item}_${num_gpu_devices} + log_file=${run_log_path}/${model_item}_${run_mode}_bs${batch_size}_${fp_item}_${num_gpu_devices} } function _train(){ echo "Train on ${num_gpu_devices} GPUs" echo "current CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES, gpus=$num_gpu_devices, batch_size=$batch_size" - train_cmd="-c configs/det/${model_name}.yml -o Train.loader.batch_size_per_card=${batch_size} Global.epoch_num=${max_iter} Global.eval_batch_step=[0,20000] Global.print_batch_step=2" + train_cmd="-c configs/det/${model_item}.yml -o Train.loader.batch_size_per_card=${batch_size} Global.epoch_num=${max_epoch} Global.eval_batch_step=[0,20000] Global.print_batch_step=2" case ${run_mode} in sp) - train_cmd="python3.7 tools/train.py "${train_cmd}"" + train_cmd="python tools/train.py "${train_cmd}"" ;; mp) - train_cmd="python3.7 -m paddle.distributed.launch --log_dir=./mylog --gpus=$CUDA_VISIBLE_DEVICES tools/train.py ${train_cmd}" + train_cmd="python -m paddle.distributed.launch --log_dir=./mylog --gpus=$CUDA_VISIBLE_DEVICES tools/train.py ${train_cmd}" ;; *) echo "choose run_mode(sp or mp)"; exit 1; esac @@ -46,17 +54,7 @@ function _train(){ fi } -function _analysis_log(){ - analysis_cmd="python3.7 benchmark/analysis.py --filename ${log_file} --mission_name ${model_name} --run_mode ${run_mode} --direction_id 0 --keyword 'ips:' --base_batch_size ${batch_size} --skip_steps 1 --gpu_num ${num_gpu_devices} --index 1 --model_mode=-1 --ips_unit=samples/sec" - eval $analysis_cmd -} - -function _kill_process(){ - kill -9 `ps -ef|grep 'python3.7'|awk '{print $2}'` -} - - +source ${BENCHMARK_ROOT}/scripts/run_model.sh # 在该脚本中会对符合benchmark规范的log使用analysis.py 脚本进行性能数据解析;该脚本在连调时可从benchmark repo中下载https://github.com/PaddlePaddle/benchmark/blob/master/scripts/run_model.sh;如果不联调只想要产出训练log可以注掉本行,提交时需打开 _set_params $@ -_train -_analysis_log -_kill_process \ No newline at end of file +#_train # 如果只想产出训练log,不解析,可取消注释 +_run # 该函数在run_model.sh中,执行时会调用_train; 如果不联调只想要产出训练log可以注掉本行,提交时需打开 diff --git a/benchmark/run_det.sh b/benchmark/run_det.sh index 68109b3ab2c3b8b61a0c90b4b31fd855c1ba2d46..be0c141f7ee168d10eebb6efb57158d18ed02f72 100644 --- a/benchmark/run_det.sh +++ b/benchmark/run_det.sh @@ -1,25 +1,35 @@ +#!/bin/bash # 提供可稳定复现性能的脚本,默认在标准docker环境内py37执行: paddlepaddle/paddle:latest-gpu-cuda10.1-cudnn7 paddle=2.1.2 py=37 # 执行目录: ./PaddleOCR # 1 安装该模型需要的依赖 (如需开启优化策略请注明) -python3.7 -m pip install -r requirements.txt +log_path=${LOG_PATH_INDEX_DIR:-$(pwd)} +python -m pip install -r requirements.txt # 2 拷贝该模型需要数据、预训练模型 wget -P ./train_data/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/icdar2015.tar && cd train_data && tar xf icdar2015.tar && cd ../ wget -P ./pretrain_models/ https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNet50_vd_pretrained.pdparams +wget -P ./pretrain_models/ https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNet18_vd_pretrained.pdparams +wget -P ./pretrain_models/ https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNet50_vd_ssld_pretrained.pdparams # 3 批量运行(如不方便批量,1,2需放到单个模型中) model_mode_list=(det_res18_db_v2.0 det_r50_vd_east det_r50_vd_pse) fp_item_list=(fp32) -bs_list=(8 16) for model_mode in ${model_mode_list[@]}; do for fp_item in ${fp_item_list[@]}; do + if [ ${model_mode} == "det_r50_vd_east" ]; then + bs_list=(16) + else + bs_list=(8 16) + fi for bs_item in ${bs_list[@]}; do echo "index is speed, 1gpus, begin, ${model_name}" run_mode=sp - CUDA_VISIBLE_DEVICES=0 bash benchmark/run_benchmark_det.sh ${run_mode} ${bs_item} ${fp_item} 2 ${model_mode} # (5min) + log_name=ocr_${model_mode}_bs${bs_item}_${fp_item}_${run_mode} + CUDA_VISIBLE_DEVICES=0 bash benchmark/run_benchmark_det.sh ${run_mode} ${bs_item} ${fp_item} 1 ${model_mode} | tee ${log_path}/${log_name}_speed_1gpus 2>&1 # (5min) sleep 60 echo "index is speed, 8gpus, run_mode is multi_process, begin, ${model_name}" run_mode=mp - CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 bash benchmark/run_benchmark_det.sh ${run_mode} ${bs_item} ${fp_item} 2 ${model_mode} + log_name=ocr_${model_mode}_bs${bs_item}_${fp_item}_${run_mode} + CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 bash benchmark/run_benchmark_det.sh ${run_mode} ${bs_item} ${fp_item} 2 ${model_mode} | tee ${log_path}/${log_name}_speed_8gpus8p 2>&1 sleep 60 done done diff --git a/configs/det/ch_PP-OCRv2/ch_PP-OCR_det_cml.yml b/configs/det/ch_PP-OCRv2/ch_PP-OCRv2_det_cml.yml similarity index 100% rename from configs/det/ch_PP-OCRv2/ch_PP-OCR_det_cml.yml rename to configs/det/ch_PP-OCRv2/ch_PP-OCRv2_det_cml.yml diff --git a/configs/det/ch_PP-OCRv2/ch_PP-OCR_det_distill.yml b/configs/det/ch_PP-OCRv2/ch_PP-OCRv2_det_distill.yml similarity index 100% rename from configs/det/ch_PP-OCRv2/ch_PP-OCR_det_distill.yml rename to configs/det/ch_PP-OCRv2/ch_PP-OCRv2_det_distill.yml diff --git a/configs/det/ch_PP-OCRv2/ch_PP-OCR_det_dml.yml b/configs/det/ch_PP-OCRv2/ch_PP-OCRv2_det_dml.yml similarity index 100% rename from configs/det/ch_PP-OCRv2/ch_PP-OCR_det_dml.yml rename to configs/det/ch_PP-OCRv2/ch_PP-OCRv2_det_dml.yml diff --git a/configs/det/ch_PP-OCRv2/ch_PP-OCR_det_student.yml b/configs/det/ch_PP-OCRv2/ch_PP-OCRv2_det_student.yml similarity index 100% rename from configs/det/ch_PP-OCRv2/ch_PP-OCR_det_student.yml rename to configs/det/ch_PP-OCRv2/ch_PP-OCRv2_det_student.yml diff --git a/configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec_enhanced_ctc_loss.yml b/configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec_enhanced_ctc_loss.yml index 7161203035b2324c7afc56b2b0c743428558a098..5be96969fd0d4912a4ff2a09c9d181b1b17d633e 100644 --- a/configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec_enhanced_ctc_loss.yml +++ b/configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec_enhanced_ctc_loss.yml @@ -62,8 +62,7 @@ Loss: weight: 0.05 num_classes: 6625 feat_dim: 96 - init_center: false - center_file_path: "./train_center.pkl" + center_file_path: # you can also try to add ace loss on your own dataset # - ACELoss: # weight: 0.1 diff --git a/deploy/cpp_infer/readme.md b/deploy/cpp_infer/readme.md index f88d021d0a050aeecf859981cc2de1cee8f3a2c0..92ef70b642dc1eaed9e694b1ae756f76ee548703 100644 --- a/deploy/cpp_infer/readme.md +++ b/deploy/cpp_infer/readme.md @@ -34,10 +34,10 @@ PaddleOCR模型部署。 * 首先需要从opencv官网上下载在Linux环境下源码编译的包,以opencv3.4.7为例,下载命令如下。 -``` +```bash cd deploy/cpp_infer -wget https://github.com/opencv/opencv/archive/3.4.7.tar.gz -tar -xf 3.4.7.tar.gz +wget https://paddleocr.bj.bcebos.com/libs/opencv/opencv-3.4.7.tar.gz +tar -xf opencv-3.4.7.tar.gz ``` 最终可以在当前目录下看到`opencv-3.4.7/`的文件夹。 @@ -45,12 +45,13 @@ tar -xf 3.4.7.tar.gz * 编译opencv,设置opencv源码路径(`root_path`)以及安装路径(`install_path`)。进入opencv源码路径下,按照下面的方式进行编译。 ```shell -root_path=your_opencv_root_path +root_path="your_opencv_root_path" install_path=${root_path}/opencv3 +build_dir=${root_path}/build -rm -rf build -mkdir build -cd build +rm -rf ${build_dir} +mkdir ${build_dir} +cd ${build_dir} cmake .. \ -DCMAKE_INSTALL_PREFIX=${install_path} \ @@ -74,6 +75,11 @@ make -j make install ``` +也可以直接修改`tools/build_opencv.sh`的内容,然后直接运行下面的命令进行编译。 + +```shell +sh tools/build_opencv.sh +``` 其中`root_path`为下载的opencv源码路径,`install_path`为opencv的安装路径,`make install`完成之后,会在该文件夹下生成opencv头文件和库文件,用于后面的OCR代码编译。 @@ -233,12 +239,12 @@ CUDNN_LIB_DIR=/your_cudnn_lib_dir --image_dir=../../doc/imgs/12.jpg ``` -更多参数如下: +更多支持的可调节参数解释如下: - 通用参数 |参数名称|类型|默认参数|意义| -| --- | --- | --- | --- | +| :---: | :---: | :---: | :---: | |use_gpu|bool|false|是否使用GPU| |gpu_id|int|0|GPU id,使用GPU时有效| |gpu_mem|int|4000|申请的GPU内存| @@ -248,7 +254,7 @@ CUDNN_LIB_DIR=/your_cudnn_lib_dir - 检测模型相关 |参数名称|类型|默认参数|意义| -| --- | --- | --- | --- | +| :---: | :---: | :---: | :---: | |det_model_dir|string|-|检测模型inference model地址| |max_side_len|int|960|输入图像长宽大于960时,等比例缩放图像,使得图像最长边为960| |det_db_thresh|float|0.3|用于过滤DB预测的二值化图像,设置为0.-0.3对结果影响不明显| @@ -260,7 +266,7 @@ CUDNN_LIB_DIR=/your_cudnn_lib_dir - 方向分类器相关 |参数名称|类型|默认参数|意义| -| --- | --- | --- | --- | +| :---: | :---: | :---: | :---: | |use_angle_cls|bool|false|是否使用方向分类器| |cls_model_dir|string|-|方向分类器inference model地址| |cls_thresh|float|0.9|方向分类器的得分阈值| @@ -268,7 +274,7 @@ CUDNN_LIB_DIR=/your_cudnn_lib_dir - 识别模型相关 |参数名称|类型|默认参数|意义| -| --- | --- | --- | --- | +| :---: | :---: | :---: | :---: | |rec_model_dir|string|-|识别模型inference model地址| |char_list_file|string|../../ppocr/utils/ppocr_keys_v1.txt|字典文件| diff --git a/deploy/cpp_infer/readme_en.md b/deploy/cpp_infer/readme_en.md index 48de51ae726e662f48d465b8489a494448dafac1..fd6d953de1f9168da734d6c5eda945c670cfce37 100644 --- a/deploy/cpp_infer/readme_en.md +++ b/deploy/cpp_infer/readme_en.md @@ -17,10 +17,10 @@ PaddleOCR model deployment. * First of all, you need to download the source code compiled package in the Linux environment from the opencv official website. Taking opencv3.4.7 as an example, the download command is as follows. -``` +```bash cd deploy/cpp_infer -wget https://github.com/opencv/opencv/archive/3.4.7.tar.gz -tar -xf 3.4.7.tar.gz +wget https://paddleocr.bj.bcebos.com/libs/opencv/opencv-3.4.7.tar.gz +tar -xf opencv-3.4.7.tar.gz ``` Finally, you can see the folder of `opencv-3.4.7/` in the current directory. diff --git a/deploy/cpp_infer/tools/build_opencv.sh b/deploy/cpp_infer/tools/build_opencv.sh new file mode 100644 index 0000000000000000000000000000000000000000..9f5d556c1101023fd5bef15eab505b673b25e6ee --- /dev/null +++ b/deploy/cpp_infer/tools/build_opencv.sh @@ -0,0 +1,28 @@ +root_path="/paddle/PaddleOCR/deploy/cpp_infer/opencv-3.4.7" +install_path=${root_path}/opencv3 +build_dir=${root_path}/build + +rm -rf ${build_dir} +mkdir ${build_dir} +cd ${build_dir} + +cmake .. \ + -DCMAKE_INSTALL_PREFIX=${install_path} \ + -DCMAKE_BUILD_TYPE=Release \ + -DBUILD_SHARED_LIBS=OFF \ + -DWITH_IPP=OFF \ + -DBUILD_IPP_IW=OFF \ + -DWITH_LAPACK=OFF \ + -DWITH_EIGEN=OFF \ + -DCMAKE_INSTALL_LIBDIR=lib64 \ + -DWITH_ZLIB=ON \ + -DBUILD_ZLIB=ON \ + -DWITH_JPEG=ON \ + -DBUILD_JPEG=ON \ + -DWITH_PNG=ON \ + -DBUILD_PNG=ON \ + -DWITH_TIFF=ON \ + -DBUILD_TIFF=ON + +make -j +make install diff --git a/deploy/lite/ocr_db_crnn.cc b/deploy/lite/ocr_db_crnn.cc index 011d4adbeb65732f12c263ddfec94afb84bf5969..1ffbbacb74545b0bbea4957e25b6235225bad02b 100644 --- a/deploy/lite/ocr_db_crnn.cc +++ b/deploy/lite/ocr_db_crnn.cc @@ -172,7 +172,10 @@ void RunRecModel(std::vector>> boxes, cv::Mat img, cv::Mat resize_img; int index = 0; + + std::vector time_info = {0, 0, 0}; for (int i = boxes.size() - 1; i >= 0; i--) { + auto preprocess_start = std::chrono::steady_clock::now(); crop_img = GetRotateCropImage(srcimg, boxes[i]); if (use_direction_classify >= 1) { crop_img = RunClsModel(crop_img, predictor_cls); @@ -191,7 +194,9 @@ void RunRecModel(std::vector>> boxes, cv::Mat img, auto *data0 = input_tensor0->mutable_data(); NeonMeanScale(dimg, data0, resize_img.rows * resize_img.cols, mean, scale); + auto preprocess_end = std::chrono::steady_clock::now(); //// Run CRNN predictor + auto inference_start = std::chrono::steady_clock::now(); predictor_crnn->Run(); // Get output and run postprocess @@ -199,8 +204,10 @@ void RunRecModel(std::vector>> boxes, cv::Mat img, std::move(predictor_crnn->GetOutput(0))); auto *predict_batch = output_tensor0->data(); auto predict_shape = output_tensor0->shape(); + auto inference_end = std::chrono::steady_clock::now(); // ctc decode + auto postprocess_start = std::chrono::steady_clock::now(); std::string str_res; int argmax_idx; int last_index = 0; @@ -224,7 +231,20 @@ void RunRecModel(std::vector>> boxes, cv::Mat img, score /= count; rec_text.push_back(str_res); rec_text_score.push_back(score); + auto postprocess_end = std::chrono::steady_clock::now(); + + std::chrono::duration preprocess_diff = preprocess_end - preprocess_start; + time_info[0] += double(preprocess_diff.count() * 1000); + std::chrono::duration inference_diff = inference_end - inference_start; + time_info[1] += double(inference_diff.count() * 1000); + std::chrono::duration postprocess_diff = postprocess_end - postprocess_start; + time_info[2] += double(postprocess_diff.count() * 1000); + } + +times->push_back(time_info[0]); +times->push_back(time_info[1]); +times->push_back(time_info[2]); } std::vector>> @@ -312,7 +332,6 @@ std::shared_ptr loadModel(std::string model_file, int num_threa config.set_model_from_file(model_file); config.set_threads(num_threads); - std::shared_ptr predictor = CreatePaddlePredictor(config); return predictor; @@ -434,6 +453,9 @@ void system(char **argv){ auto rec_predictor = loadModel(rec_model_file, std::stoi(num_threads)); auto cls_predictor = loadModel(cls_model_file, std::stoi(num_threads)); + std::vector det_time_info = {0, 0, 0}; + std::vector rec_time_info = {0, 0, 0}; + for (int i = 0; i < cv_all_img_names.size(); ++i) { std::cout << "The predict img: " << cv_all_img_names[i] << std::endl; cv::Mat srcimg = cv::imread(cv_all_img_names[i], cv::IMREAD_COLOR); @@ -459,8 +481,38 @@ void system(char **argv){ //// print recognized text for (int i = 0; i < rec_text.size(); i++) { std::cout << i << "\t" << rec_text[i] << "\t" << rec_text_score[i] - << std::endl; + << std::endl; + } + + det_time_info[0] += det_times[0]; + det_time_info[1] += det_times[1]; + det_time_info[2] += det_times[2]; + rec_time_info[0] += rec_times[0]; + rec_time_info[1] += rec_times[1]; + rec_time_info[2] += rec_times[2]; + } + if (strcmp(argv[12], "True") == 0) { + AutoLogger autolog_det(det_model_file, + runtime_device, + std::stoi(num_threads), + std::stoi(batchsize), + "dynamic", + precision, + det_time_info, + cv_all_img_names.size()); + AutoLogger autolog_rec(rec_model_file, + runtime_device, + std::stoi(num_threads), + std::stoi(batchsize), + "dynamic", + precision, + rec_time_info, + cv_all_img_names.size()); + + autolog_det.report(); + std::cout << std::endl; + autolog_rec.report(); } } @@ -503,15 +555,15 @@ void det(int argc, char **argv) { auto img_vis = Visualization(srcimg, boxes); std::cout << boxes.size() << " bboxes have detected:" << std::endl; - // for (int i=0; i ## 一、训练模型转inference模型 @@ -394,3 +396,127 @@ python3 tools/infer/predict_system.py --image_dir="./doc/imgs_en/img_10.jpg" --d 执行命令后,识别结果图像如下: ![](../imgs_results/img_10_east_starnet.jpg) + + + + +# 六、参数解释 + +更多关于预测过程的参数解释如下所示。 + +* 全局信息 + +| 参数名称 | 类型 | 默认值 | 含义 | +| :--: | :--: | :--: | :--: | +| image_dir | str | 无,必须显式指定 | 图像或者文件夹路径 | +| vis_font_path | str | "./doc/fonts/simfang.ttf" | 用于可视化的字体路径 | +| drop_score | float | 0.5 | 识别得分小于该值的结果会被丢弃,不会作为返回结果 | +| use_pdserving | bool | False | 是否使用Paddle Serving进行预测 | +| warmup | bool | False | 是否开启warmup,在统计预测耗时的时候,可以使用这种方法 | +| draw_img_save_dir | str | "./inference_results" | 系统串联预测OCR结果的保存文件夹 | +| save_crop_res | bool | False | 是否保存OCR的识别文本图像 | +| crop_res_save_dir | str | "./output" | 保存OCR识别出来的文本图像路径 | +| use_mp | bool | False | 是否开启多进程预测 | +| total_process_num | int | 6 | 开启的进城数,`use_mp`为`True`时生效 | +| process_id | int | 0 | 当前进程的id号,无需自己修改 | +| benchmark | bool | False | 是否开启benchmark,对预测速度、显存占用等进行统计 | +| save_log_path | str | "./log_output/" | 开启`benchmark`时,日志结果的保存文件夹 | +| show_log | bool | True | 是否显示预测中的日志信息 | +| use_onnx | bool | False | 是否开启onnx预测 | + + +* 预测引擎相关 + +| 参数名称 | 类型 | 默认值 | 含义 | +| :--: | :--: | :--: | :--: | +| use_gpu | bool | True | 是否使用GPU进行预测 | +| ir_optim | bool | True | 是否对计算图进行分析与优化,开启后可以加速预测过程 | +| use_tensorrt | bool | False | 是否开启tensorrt | +| min_subgraph_size | int | 15 | tensorrt中最小子图size,当子图的size大于该值时,才会尝试对该子图使用trt engine计算 | +| precision | str | fp32 | 预测的精度,支持`fp32`, `fp16`, `int8` 3种输入 | +| enable_mkldnn | bool | True | 是否开启mkldnn | +| cpu_threads | int | 10 | 开启mkldnn时,cpu预测的线程数 | + +* 文本检测模型相关 + +| 参数名称 | 类型 | 默认值 | 含义 | +| :--: | :--: | :--: | :--: | +| det_algorithm | str | "DB" | 文本检测算法名称,目前支持`DB`, `EAST`, `SAST`, `PSE` | +| det_model_dir | str | xx | 检测inference模型路径 | +| det_limit_side_len | int | 960 | 检测的图像边长限制 | +| det_limit_type | str | "max" | 检测的变成限制类型,目前支持`min`, `max`,`min`表示保证图像最短边不小于`det_limit_side_len`,`max`表示保证图像最长边不大于`det_limit_side_len` | + +其中,DB算法相关参数如下 + +| 参数名称 | 类型 | 默认值 | 含义 | +| :--: | :--: | :--: | :--: | +| det_db_thresh | float | 0.3 | DB输出的概率图中,得分大于该阈值的像素点才会被认为是文字像素点 | +| det_db_box_thresh | float | 0.6 | 检测结果边框内,所有像素点的平均得分大于该阈值时,该结果会被认为是文字区域 | +| det_db_unclip_ratio | float | 1.5 | `Vatti clipping`算法的扩张系数,使用该方法对文字区域进行扩张 | +| max_batch_size | int | 10 | 预测的batch size | +| use_dilation | bool | False | 是否对分割结果进行膨胀以获取更优检测效果 | +| det_db_score_mode | str | "fast" | DB的检测结果得分计算方法,支持`fast`和`slow`,`fast`是根据polygon的外接矩形边框内的所有像素计算平均得分,`slow`是根据原始polygon内的所有像素计算平均得分,计算速度相对较慢一些,但是更加准确一些。 | + +EAST算法相关参数如下 + +| 参数名称 | 类型 | 默认值 | 含义 | +| :--: | :--: | :--: | :--: | +| det_east_score_thresh | float | 0.8 | EAST后处理中score map的阈值 | +| det_east_cover_thresh | float | 0.1 | EAST后处理中文本框的平均得分阈值 | +| det_east_nms_thresh | float | 0.2 | EAST后处理中nms的阈值 | + +SAST算法相关参数如下 + +| 参数名称 | 类型 | 默认值 | 含义 | +| :--: | :--: | :--: | :--: | +| det_sast_score_thresh | float | 0.5 | SAST后处理中的得分阈值 | +| det_sast_nms_thresh | float | 0.5 | SAST后处理中nms的阈值 | +| det_sast_polygon | bool | False | 是否多边形检测,弯曲文本场景(如Total-Text)设置为True | + +PSE算法相关参数如下 + +| 参数名称 | 类型 | 默认值 | 含义 | +| :--: | :--: | :--: | :--: | +| det_pse_thresh | float | 0.0 | 对输出图做二值化的阈值 | +| det_pse_box_thresh | float | 0.85 | 对box进行过滤的阈值,低于此阈值的丢弃 | +| det_pse_min_area | float | 16 | box的最小面积,低于此阈值的丢弃 | +| det_pse_box_type | str | "box" | 返回框的类型,box:四点坐标,poly: 弯曲文本的所有点坐标 | +| det_pse_scale | int | 1 | 输入图像相对于进后处理的图的比例,如`640*640`的图像,网络输出为`160*160`,scale为2的情况下,进后处理的图片shape为`320*320`。这个值调大可以加快后处理速度,但是会带来精度的下降 | + +* 文本识别模型相关 + +| 参数名称 | 类型 | 默认值 | 含义 | +| :--: | :--: | :--: | :--: | +| rec_algorithm | str | "CRNN" | 文本识别算法名称,目前支持`CRNN`, `SRN`, `RARE`, `NETR`, `SAR` | +| rec_model_dir | str | 无,如果使用识别模型,该项是必填项 | 识别inference模型路径 | +| rec_image_shape | list | [3, 32, 320] | 识别时的图像尺寸, | +| rec_batch_num | int | 6 | 识别的batch size | +| max_text_length | int | 25 | 识别结果最大长度,在`SRN`中有效 | +| rec_char_dict_path | str | "./ppocr/utils/ppocr_keys_v1.txt" | 识别的字符字典文件 | +| use_space_char | bool | True | 是否包含空格,如果为`True`,则会在最后字符字典中补充`空格`字符 | + + +* 端到端文本检测与识别模型相关 + +| 参数名称 | 类型 | 默认值 | 含义 | +| :--: | :--: | :--: | :--: | +| e2e_algorithm | str | "PGNet" | 端到端算法名称,目前支持`PGNet` | +| e2e_model_dir | str | 无,如果使用端到端模型,该项是必填项 | 端到端模型inference模型路径 | +| e2e_limit_side_len | int | 768 | 端到端的输入图像边长限制 | +| e2e_limit_type | str | "max" | 端到端的边长限制类型,目前支持`min`, `max`,`min`表示保证图像最短边不小于`e2e_limit_side_len`,`max`表示保证图像最长边不大于`e2e_limit_side_len` | +| e2e_pgnet_score_thresh | float | 0.5 | 端到端得分阈值,小于该阈值的结果会被丢弃 | +| e2e_char_dict_path | str | "./ppocr/utils/ic15_dict.txt" | 识别的字典文件路径 | +| e2e_pgnet_valid_set | str | "totaltext" | 验证集名称,目前支持`totaltext`, `partvgg`,不同数据集对应的后处理方式不同,与训练过程保持一致即可 | +| e2e_pgnet_mode | str | "fast" | PGNet的检测结果得分计算方法,支持`fast`和`slow`,`fast`是根据polygon的外接矩形边框内的所有像素计算平均得分,`slow`是根据原始polygon内的所有像素计算平均得分,计算速度相对较慢一些,但是更加准确一些。 | + + +* 方向分类器模型相关 + +| 参数名称 | 类型 | 默认值 | 含义 | +| :--: | :--: | :--: | :--: | +| use_angle_cls | bool | False | 是否使用方向分类器 | +| cls_model_dir | str | 无,如果需要使用,则必须显式指定路径 | 方向分类器inference模型路径 | +| cls_image_shape | list | [3, 48, 192] | 预测尺度 | +| label_list | list | ['0', '180'] | class id对应的角度值 | +| cls_batch_num | int | 6 | 方向分类器预测的batch size | +| cls_thresh | float | 0.9 | 预测阈值,模型预测结果为180度,且得分大于该阈值时,认为最终预测结果为180度,需要翻转 | diff --git a/doc/doc_ch/models_list.md b/doc/doc_ch/models_list.md index dabbde72f88fe72a8188623b7b0b549480e9f1f3..6843ffdc19d5bde205124c30f1d0a5fc2144ce99 100644 --- a/doc/doc_ch/models_list.md +++ b/doc/doc_ch/models_list.md @@ -33,8 +33,8 @@ PaddleOCR提供的可下载模型包括`推理模型`、`训练模型`、`预训 |模型名称|模型简介|配置文件|推理模型大小|下载地址| | --- | --- | --- | --- | --- | -|ch_PP-OCRv2_det_slim|【最新】slim量化+蒸馏版超轻量模型,支持中英文、多语种文本检测|[ch_PP-OCRv2_det_cml.yml](../../configs/det/ch_PP-OCRv2/ch_PP-OCR_det_cml.yml)| 3M |[推理模型](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_slim_quant_infer.tar)| -|ch_PP-OCRv2_det|【最新】原始超轻量模型,支持中英文、多语种文本检测|[ch_PP-OCRv2_det_cml.yml](../../configs/det/ch_PP-OCRv2/ch_PP-OCR_det_cml.yml)|3M|[推理模型](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_distill_train.tar)| +|ch_PP-OCRv2_det_slim|【最新】slim量化+蒸馏版超轻量模型,支持中英文、多语种文本检测|[ch_PP-OCRv2_det_cml.yml](../../configs/det/ch_PP-OCRv2/ch_PP-OCRv2_det_cml.yml)| 3M |[推理模型](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_slim_quant_infer.tar)| +|ch_PP-OCRv2_det|【最新】原始超轻量模型,支持中英文、多语种文本检测|[ch_PP-OCRv2_det_cml.yml](../../configs/det/ch_PP-OCRv2/ch_PP-OCRv2_det_cml.yml)|3M|[推理模型](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_distill_train.tar)| |ch_ppocr_mobile_slim_v2.0_det|slim裁剪版超轻量模型,支持中英文、多语种文本检测|[ch_det_mv3_db_v2.0.yml](../../configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml)| 2.6M |[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/slim/ch_ppocr_mobile_v2.0_det_prune_infer.tar)| |ch_ppocr_mobile_v2.0_det|原始超轻量模型,支持中英文、多语种文本检测|[ch_det_mv3_db_v2.0.yml](../../configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml)|3M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_train.tar)| |ch_ppocr_server_v2.0_det|通用模型,支持中英文、多语种文本检测,比超轻量模型更大,但效果更好|[ch_det_res18_db_v2.0.yml](../../configs/det/ch_ppocr_v2.0/ch_det_res18_db_v2.0.yml)|47M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_train.tar)| diff --git a/doc/doc_ch/pgnet.md b/doc/doc_ch/pgnet.md index 9aa7f255e54ce8dec3a20d475cccb71847d95cc7..0aee58ec1aca24d06305c47569fdf156df6ee874 100644 --- a/doc/doc_ch/pgnet.md +++ b/doc/doc_ch/pgnet.md @@ -66,13 +66,13 @@ wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/pgnet/e2e_server_pgnetA_infer. ### 单张图像或者图像集合预测 ```bash # 预测image_dir指定的单张图像 -python3 tools/infer/predict_e2e.py --e2e_algorithm="PGNet" --image_dir="./doc/imgs_en/img623.jpg" --e2e_model_dir="./inference/e2e_server_pgnetA_infer/" --e2e_pgnet_polygon=True +python3 tools/infer/predict_e2e.py --e2e_algorithm="PGNet" --image_dir="./doc/imgs_en/img623.jpg" --e2e_model_dir="./inference/e2e_server_pgnetA_infer/" --e2e_pgnet_valid_set="totaltext" # 预测image_dir指定的图像集合 -python3 tools/infer/predict_e2e.py --e2e_algorithm="PGNet" --image_dir="./doc/imgs_en/" --e2e_model_dir="./inference/e2e_server_pgnetA_infer/" --e2e_pgnet_polygon=True +python3 tools/infer/predict_e2e.py --e2e_algorithm="PGNet" --image_dir="./doc/imgs_en/" --e2e_model_dir="./inference/e2e_server_pgnetA_infer/" --e2e_pgnet_valid_set="totaltext" # 如果想使用CPU进行预测,需设置use_gpu参数为False -python3 tools/infer/predict_e2e.py --e2e_algorithm="PGNet" --image_dir="./doc/imgs_en/img623.jpg" --e2e_model_dir="./inference/e2e_server_pgnetA_infer/" --e2e_pgnet_polygon=True --use_gpu=False +python3 tools/infer/predict_e2e.py --e2e_algorithm="PGNet" --image_dir="./doc/imgs_en/img623.jpg" --e2e_model_dir="./inference/e2e_server_pgnetA_infer/" --e2e_pgnet_valid_set="totaltext" --use_gpu=False ``` ### 可视化结果 可视化文本检测结果默认保存到./inference_results文件夹里面,结果文件的名称前缀为'e2e_res'。结果示例如下: @@ -167,9 +167,9 @@ python3 tools/infer_e2e.py -c configs/e2e/e2e_r50_vd_pg.yml -o Global.infer_img= wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/pgnet/en_server_pgnetA.tar && tar xf en_server_pgnetA.tar python3 tools/export_model.py -c configs/e2e/e2e_r50_vd_pg.yml -o Global.pretrained_model=./en_server_pgnetA/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/e2e ``` -**PGNet端到端模型推理,需要设置参数`--e2e_algorithm="PGNet"`**,可以执行如下命令: +**PGNet端到端模型推理,需要设置参数`--e2e_algorithm="PGNet"` and `--e2e_pgnet_valid_set="partvgg"`**,可以执行如下命令: ``` -python3 tools/infer/predict_e2e.py --e2e_algorithm="PGNet" --image_dir="./doc/imgs_en/img_10.jpg" --e2e_model_dir="./inference/e2e/" --e2e_pgnet_polygon=False +python3 tools/infer/predict_e2e.py --e2e_algorithm="PGNet" --image_dir="./doc/imgs_en/img_10.jpg" --e2e_model_dir="./inference/e2e/" --e2e_pgnet_valid_set="partvgg" --e2e_pgnet_valid_set="totaltext" ``` 可视化文本检测结果默认保存到`./inference_results`文件夹里面,结果文件的名称前缀为'e2e_res'。结果示例如下: @@ -178,9 +178,9 @@ python3 tools/infer/predict_e2e.py --e2e_algorithm="PGNet" --image_dir="./doc/im #### (2). 弯曲文本检测模型(Total-Text) 对于弯曲文本样例 -**PGNet端到端模型推理,需要设置参数`--e2e_algorithm="PGNet"`,同时,还需要增加参数`--e2e_pgnet_polygon=True`,**可以执行如下命令: +**PGNet端到端模型推理,需要设置参数`--e2e_algorithm="PGNet"`,同时,还需要增加参数`--e2e_pgnet_valid_set="totaltext"`,**可以执行如下命令: ``` -python3 tools/infer/predict_e2e.py --e2e_algorithm="PGNet" --image_dir="./doc/imgs_en/img623.jpg" --e2e_model_dir="./inference/e2e/" --e2e_pgnet_polygon=True +python3 tools/infer/predict_e2e.py --e2e_algorithm="PGNet" --image_dir="./doc/imgs_en/img623.jpg" --e2e_model_dir="./inference/e2e/" --e2e_pgnet_valid_set="totaltext" ``` 可视化文本端到端结果默认保存到`./inference_results`文件夹里面,结果文件的名称前缀为'e2e_res'。结果示例如下: diff --git a/doc/doc_en/models_list_en.md b/doc/doc_en/models_list_en.md index dbb4860279cd25a888a6bfaf64dbe1952ef54470..e3cf251c3439ba009a1de0ba48f7c0aa10b117c4 100644 --- a/doc/doc_en/models_list_en.md +++ b/doc/doc_en/models_list_en.md @@ -29,8 +29,8 @@ Relationship of the above models is as follows. |model name|description|config|model size|download| | --- | --- | --- | --- | --- | -|ch_PP-OCRv2_det_slim|[New] slim quantization with distillation lightweight model, supporting Chinese, English, multilingual text detection|[ch_PP-OCRv2_det_cml.yml](../../configs/det/ch_PP-OCRv2/ch_PP-OCR_det_cml.yml)| 3M |[inference model](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_slim_quant_infer.tar)| -|ch_PP-OCRv2_det|[New] Original lightweight model, supporting Chinese, English, multilingual text detection|[ch_PP-OCRv2_det_cml.yml](../../configs/det/ch_PP-OCRv2/ch_PP-OCR_det_cml.yml)|3M|[inference model](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_distill_train.tar)| +|ch_PP-OCRv2_det_slim|[New] slim quantization with distillation lightweight model, supporting Chinese, English, multilingual text detection|[ch_PP-OCRv2_det_cml.yml](../../configs/det/ch_PP-OCRv2/ch_PP-OCRv2_det_cml.yml)| 3M |[inference model](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_slim_quant_infer.tar)| +|ch_PP-OCRv2_det|[New] Original lightweight model, supporting Chinese, English, multilingual text detection|[ch_PP-OCRv2_det_cml.yml](../../configs/det/ch_PP-OCRv2/ch_PP-OCRv2_det_cml.yml)|3M|[inference model](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_distill_train.tar)| |ch_ppocr_mobile_slim_v2.0_det|Slim pruned lightweight model, supporting Chinese, English, multilingual text detection|[ch_det_mv3_db_v2.0.yml](../../configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml)|2.6M |[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/slim/ch_ppocr_mobile_v2.0_det_prune_infer.tar)| |ch_ppocr_mobile_v2.0_det|Original lightweight model, supporting Chinese, English, multilingual text detection|[ch_det_mv3_db_v2.0.yml](../../configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml)|3M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_train.tar)| |ch_ppocr_server_v2.0_det|General model, which is larger than the lightweight model, but achieved better performance|[ch_det_res18_db_v2.0.yml](../../configs/det/ch_ppocr_v2.0/ch_det_res18_db_v2.0.yml)|47M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_train.tar)| diff --git a/doc/doc_en/pgnet_en.md b/doc/doc_en/pgnet_en.md index d2c6b30248ebad920c41ca53ee38cce828dddb8c..e176a1260c734974e2dad843faeb3e5532176629 100644 --- a/doc/doc_en/pgnet_en.md +++ b/doc/doc_en/pgnet_en.md @@ -59,13 +59,13 @@ After decompression, there should be the following file structure: ### Single image or image set prediction ```bash # Prediction single image specified by image_dir -python3 tools/infer/predict_e2e.py --e2e_algorithm="PGNet" --image_dir="./doc/imgs_en/img623.jpg" --e2e_model_dir="./inference/e2e_server_pgnetA_infer/" --e2e_pgnet_polygon=True +python3 tools/infer/predict_e2e.py --e2e_algorithm="PGNet" --image_dir="./doc/imgs_en/img623.jpg" --e2e_model_dir="./inference/e2e_server_pgnetA_infer/" --e2e_pgnet_valid_set="totaltext" # Prediction the collection of images specified by image_dir -python3 tools/infer/predict_e2e.py --e2e_algorithm="PGNet" --image_dir="./doc/imgs_en/" --e2e_model_dir="./inference/e2e_server_pgnetA_infer/" --e2e_pgnet_polygon=True +python3 tools/infer/predict_e2e.py --e2e_algorithm="PGNet" --image_dir="./doc/imgs_en/" --e2e_model_dir="./inference/e2e_server_pgnetA_infer/" --e2e_pgnet_valid_set="totaltext" # If you want to use CPU for prediction, you need to set use_gpu parameter is false -python3 tools/infer/predict_e2e.py --e2e_algorithm="PGNet" --image_dir="./doc/imgs_en/img623.jpg" --e2e_model_dir="./inference/e2e_server_pgnetA_infer/" --e2e_pgnet_polygon=True --use_gpu=False +python3 tools/infer/predict_e2e.py --e2e_algorithm="PGNet" --image_dir="./doc/imgs_en/img623.jpg" --e2e_model_dir="./inference/e2e_server_pgnetA_infer/" --use_gpu=False --e2e_pgnet_valid_set="totaltext" ``` ### Visualization results The visualized end-to-end results are saved to the `./inference_results` folder by default, and the name of the result file is prefixed with 'e2e_res'. Examples of results are as follows: @@ -166,9 +166,9 @@ First, convert the model saved in the PGNet end-to-end training process into an wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/pgnet/en_server_pgnetA.tar && tar xf en_server_pgnetA.tar python3 tools/export_model.py -c configs/e2e/e2e_r50_vd_pg.yml -o Global.pretrained_model=./en_server_pgnetA/best_accuracy Global.load_static_weights=False Global.save_inference_dir=./inference/e2e ``` -**For PGNet quadrangle end-to-end model inference, you need to set the parameter `--e2e_algorithm="PGNet"`**, run the following command: +**For PGNet quadrangle end-to-end model inference, you need to set the parameter `--e2e_algorithm="PGNet"` and `--e2e_pgnet_valid_set="partvgg"`**, run the following command: ``` -python3 tools/infer/predict_e2e.py --e2e_algorithm="PGNet" --image_dir="./doc/imgs_en/img_10.jpg" --e2e_model_dir="./inference/e2e/" --e2e_pgnet_polygon=False +python3 tools/infer/predict_e2e.py --e2e_algorithm="PGNet" --image_dir="./doc/imgs_en/img_10.jpg" --e2e_model_dir="./inference/e2e/" --e2e_pgnet_valid_set="partvgg" ``` The visualized text detection results are saved to the `./inference_results` folder by default, and the name of the result file is prefixed with 'e2e_res'. Examples of results are as follows: @@ -176,9 +176,9 @@ The visualized text detection results are saved to the `./inference_results` fol #### (2). Curved text detection model (Total-Text) For the curved text example, we use the same model as the quadrilateral -**For PGNet end-to-end curved text detection model inference, you need to set the parameter `--e2e_algorithm="PGNet"` and `--e2e_pgnet_polygon=True`**, run the following command: +**For PGNet end-to-end curved text detection model inference, you need to set the parameter `--e2e_algorithm="PGNet"` and `--e2e_pgnet_valid_set="totaltext"`**, run the following command: ``` -python3 tools/infer/predict_e2e.py --e2e_algorithm="PGNet" --image_dir="./doc/imgs_en/img623.jpg" --e2e_model_dir="./inference/e2e/" --e2e_pgnet_polygon=True +python3 tools/infer/predict_e2e.py --e2e_algorithm="PGNet" --image_dir="./doc/imgs_en/img623.jpg" --e2e_model_dir="./inference/e2e/" --e2e_pgnet_valid_set="totaltext" ``` The visualized text detection results are saved to the `./inference_results` folder by default, and the name of the result file is prefixed with 'e2e_res'. Examples of results are as follows: diff --git a/paddleocr.py b/paddleocr.py index 028cfcc1faae3d9cca7d756b55213c030c7496de..733c83d1b4faa23212e7186148a5a9e1154ba891 100644 --- a/paddleocr.py +++ b/paddleocr.py @@ -42,7 +42,7 @@ __all__ = [ ] SUPPORT_DET_MODEL = ['DB'] -VERSION = '2.3.0.1' +VERSION = '2.3.0.2' SUPPORT_REC_MODEL = ['CRNN'] BASE_DIR = os.path.expanduser("~/.paddleocr/") diff --git a/ppocr/losses/center_loss.py b/ppocr/losses/center_loss.py index f8c57fdd5c9b3f0dec5c3d0a811e5532abd2e45a..f62b8af373ed065a2fedbfa182f9692d6decefda 100644 --- a/ppocr/losses/center_loss.py +++ b/ppocr/losses/center_loss.py @@ -30,21 +30,17 @@ class CenterLoss(nn.Layer): Reference: Wen et al. A Discriminative Feature Learning Approach for Deep Face Recognition. ECCV 2016. """ - def __init__(self, - num_classes=6625, - feat_dim=96, - init_center=False, - center_file_path=None): + def __init__(self, num_classes=6625, feat_dim=96, center_file_path=None): super().__init__() self.num_classes = num_classes self.feat_dim = feat_dim self.centers = paddle.randn( shape=[self.num_classes, self.feat_dim]).astype("float64") - if init_center: + if center_file_path is not None: assert os.path.exists( center_file_path - ), f"center path({center_file_path}) must exist when init_center is set as True." + ), f"center path({center_file_path}) must exist when it is not None." with open(center_file_path, 'rb') as f: char_dict = pickle.load(f) for key in char_dict.keys(): diff --git a/ppocr/modeling/backbones/__init__.py b/ppocr/modeling/backbones/__init__.py index 169eb821f110d4a212068ebab4d46d636e241307..66b507fd24158ddf64d68dd7392f828a2e17c399 100755 --- a/ppocr/modeling/backbones/__init__.py +++ b/ppocr/modeling/backbones/__init__.py @@ -16,7 +16,7 @@ __all__ = ["build_backbone"] def build_backbone(config, model_type): - if model_type == "det": + if model_type == "det" or model_type == "table": from .det_mobilenet_v3 import MobileNetV3 from .det_resnet_vd import ResNet from .det_resnet_vd_sast import ResNet_SAST @@ -36,10 +36,6 @@ def build_backbone(config, model_type): elif model_type == "e2e": from .e2e_resnet_vd_pg import ResNet support_dict = ["ResNet"] - elif model_type == "table": - from .table_resnet_vd import ResNet - from .table_mobilenet_v3 import MobileNetV3 - support_dict = ["ResNet", "MobileNetV3"] else: raise NotImplementedError diff --git a/ppocr/modeling/backbones/rec_mobilenet_v3.py b/ppocr/modeling/backbones/rec_mobilenet_v3.py index c5dcfdd5a3ad1f2c356f488a89e0f1e660a4a832..917e000d94ea01ce0057e08c1f4839240561a368 100644 --- a/ppocr/modeling/backbones/rec_mobilenet_v3.py +++ b/ppocr/modeling/backbones/rec_mobilenet_v3.py @@ -26,8 +26,10 @@ class MobileNetV3(nn.Layer): scale=0.5, large_stride=None, small_stride=None, + disable_se=False, **kwargs): super(MobileNetV3, self).__init__() + self.disable_se = disable_se if small_stride is None: small_stride = [2, 2, 2, 2] if large_stride is None: @@ -101,6 +103,7 @@ class MobileNetV3(nn.Layer): block_list = [] inplanes = make_divisible(inplanes * scale) for (k, exp, c, se, nl, s) in cfg: + se = se and not self.disable_se block_list.append( ResidualUnit( in_channels=inplanes, diff --git a/ppocr/modeling/backbones/table_mobilenet_v3.py b/ppocr/modeling/backbones/table_mobilenet_v3.py deleted file mode 100644 index daa87f976038d8d5eeafadceb869b9232ba22cd9..0000000000000000000000000000000000000000 --- a/ppocr/modeling/backbones/table_mobilenet_v3.py +++ /dev/null @@ -1,287 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import paddle -from paddle import nn -import paddle.nn.functional as F -from paddle import ParamAttr - -__all__ = ['MobileNetV3'] - - -def make_divisible(v, divisor=8, min_value=None): - if min_value is None: - min_value = divisor - new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) - if new_v < 0.9 * v: - new_v += divisor - return new_v - - -class MobileNetV3(nn.Layer): - def __init__(self, - in_channels=3, - model_name='large', - scale=0.5, - disable_se=False, - **kwargs): - """ - the MobilenetV3 backbone network for detection module. - Args: - params(dict): the super parameters for build network - """ - super(MobileNetV3, self).__init__() - - self.disable_se = disable_se - - if model_name == "large": - cfg = [ - # k, exp, c, se, nl, s, - [3, 16, 16, False, 'relu', 1], - [3, 64, 24, False, 'relu', 2], - [3, 72, 24, False, 'relu', 1], - [5, 72, 40, True, 'relu', 2], - [5, 120, 40, True, 'relu', 1], - [5, 120, 40, True, 'relu', 1], - [3, 240, 80, False, 'hardswish', 2], - [3, 200, 80, False, 'hardswish', 1], - [3, 184, 80, False, 'hardswish', 1], - [3, 184, 80, False, 'hardswish', 1], - [3, 480, 112, True, 'hardswish', 1], - [3, 672, 112, True, 'hardswish', 1], - [5, 672, 160, True, 'hardswish', 2], - [5, 960, 160, True, 'hardswish', 1], - [5, 960, 160, True, 'hardswish', 1], - ] - cls_ch_squeeze = 960 - elif model_name == "small": - cfg = [ - # k, exp, c, se, nl, s, - [3, 16, 16, True, 'relu', 2], - [3, 72, 24, False, 'relu', 2], - [3, 88, 24, False, 'relu', 1], - [5, 96, 40, True, 'hardswish', 2], - [5, 240, 40, True, 'hardswish', 1], - [5, 240, 40, True, 'hardswish', 1], - [5, 120, 48, True, 'hardswish', 1], - [5, 144, 48, True, 'hardswish', 1], - [5, 288, 96, True, 'hardswish', 2], - [5, 576, 96, True, 'hardswish', 1], - [5, 576, 96, True, 'hardswish', 1], - ] - cls_ch_squeeze = 576 - else: - raise NotImplementedError("mode[" + model_name + - "_model] is not implemented!") - - supported_scale = [0.35, 0.5, 0.75, 1.0, 1.25] - assert scale in supported_scale, \ - "supported scale are {} but input scale is {}".format(supported_scale, scale) - inplanes = 16 - # conv1 - self.conv = ConvBNLayer( - in_channels=in_channels, - out_channels=make_divisible(inplanes * scale), - kernel_size=3, - stride=2, - padding=1, - groups=1, - if_act=True, - act='hardswish', - name='conv1') - - self.stages = [] - self.out_channels = [] - block_list = [] - i = 0 - inplanes = make_divisible(inplanes * scale) - for (k, exp, c, se, nl, s) in cfg: - se = se and not self.disable_se - start_idx = 2 if model_name == 'large' else 0 - if s == 2 and i > start_idx: - self.out_channels.append(inplanes) - self.stages.append(nn.Sequential(*block_list)) - block_list = [] - block_list.append( - ResidualUnit( - in_channels=inplanes, - mid_channels=make_divisible(scale * exp), - out_channels=make_divisible(scale * c), - kernel_size=k, - stride=s, - use_se=se, - act=nl, - name="conv" + str(i + 2))) - inplanes = make_divisible(scale * c) - i += 1 - block_list.append( - ConvBNLayer( - in_channels=inplanes, - out_channels=make_divisible(scale * cls_ch_squeeze), - kernel_size=1, - stride=1, - padding=0, - groups=1, - if_act=True, - act='hardswish', - name='conv_last')) - self.stages.append(nn.Sequential(*block_list)) - self.out_channels.append(make_divisible(scale * cls_ch_squeeze)) - for i, stage in enumerate(self.stages): - self.add_sublayer(sublayer=stage, name="stage{}".format(i)) - - def forward(self, x): - x = self.conv(x) - out_list = [] - for stage in self.stages: - x = stage(x) - out_list.append(x) - return out_list - - -class ConvBNLayer(nn.Layer): - def __init__(self, - in_channels, - out_channels, - kernel_size, - stride, - padding, - groups=1, - if_act=True, - act=None, - name=None): - super(ConvBNLayer, self).__init__() - self.if_act = if_act - self.act = act - self.conv = nn.Conv2D( - in_channels=in_channels, - out_channels=out_channels, - kernel_size=kernel_size, - stride=stride, - padding=padding, - groups=groups, - weight_attr=ParamAttr(name=name + '_weights'), - bias_attr=False) - - self.bn = nn.BatchNorm( - num_channels=out_channels, - act=None, - param_attr=ParamAttr(name=name + "_bn_scale"), - bias_attr=ParamAttr(name=name + "_bn_offset"), - moving_mean_name=name + "_bn_mean", - moving_variance_name=name + "_bn_variance") - - def forward(self, x): - x = self.conv(x) - x = self.bn(x) - if self.if_act: - if self.act == "relu": - x = F.relu(x) - elif self.act == "hardswish": - x = F.hardswish(x) - else: - print("The activation function({}) is selected incorrectly.". - format(self.act)) - exit() - return x - - -class ResidualUnit(nn.Layer): - def __init__(self, - in_channels, - mid_channels, - out_channels, - kernel_size, - stride, - use_se, - act=None, - name=''): - super(ResidualUnit, self).__init__() - self.if_shortcut = stride == 1 and in_channels == out_channels - self.if_se = use_se - - self.expand_conv = ConvBNLayer( - in_channels=in_channels, - out_channels=mid_channels, - kernel_size=1, - stride=1, - padding=0, - if_act=True, - act=act, - name=name + "_expand") - self.bottleneck_conv = ConvBNLayer( - in_channels=mid_channels, - out_channels=mid_channels, - kernel_size=kernel_size, - stride=stride, - padding=int((kernel_size - 1) // 2), - groups=mid_channels, - if_act=True, - act=act, - name=name + "_depthwise") - if self.if_se: - self.mid_se = SEModule(mid_channels, name=name + "_se") - self.linear_conv = ConvBNLayer( - in_channels=mid_channels, - out_channels=out_channels, - kernel_size=1, - stride=1, - padding=0, - if_act=False, - act=None, - name=name + "_linear") - - def forward(self, inputs): - x = self.expand_conv(inputs) - x = self.bottleneck_conv(x) - if self.if_se: - x = self.mid_se(x) - x = self.linear_conv(x) - if self.if_shortcut: - x = paddle.add(inputs, x) - return x - - -class SEModule(nn.Layer): - def __init__(self, in_channels, reduction=4, name=""): - super(SEModule, self).__init__() - self.avg_pool = nn.AdaptiveAvgPool2D(1) - self.conv1 = nn.Conv2D( - in_channels=in_channels, - out_channels=in_channels // reduction, - kernel_size=1, - stride=1, - padding=0, - weight_attr=ParamAttr(name=name + "_1_weights"), - bias_attr=ParamAttr(name=name + "_1_offset")) - self.conv2 = nn.Conv2D( - in_channels=in_channels // reduction, - out_channels=in_channels, - kernel_size=1, - stride=1, - padding=0, - weight_attr=ParamAttr(name + "_2_weights"), - bias_attr=ParamAttr(name=name + "_2_offset")) - - def forward(self, inputs): - outputs = self.avg_pool(inputs) - outputs = self.conv1(outputs) - outputs = F.relu(outputs) - outputs = self.conv2(outputs) - outputs = F.hardsigmoid(outputs, slope=0.2, offset=0.5) - return inputs * outputs \ No newline at end of file diff --git a/ppocr/modeling/backbones/table_resnet_vd.py b/ppocr/modeling/backbones/table_resnet_vd.py deleted file mode 100644 index 1c07c2684eec8d0c4a445cc88c543bfe1da9c864..0000000000000000000000000000000000000000 --- a/ppocr/modeling/backbones/table_resnet_vd.py +++ /dev/null @@ -1,280 +0,0 @@ -# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import paddle -from paddle import ParamAttr -import paddle.nn as nn -import paddle.nn.functional as F - -__all__ = ["ResNet"] - - -class ConvBNLayer(nn.Layer): - def __init__( - self, - in_channels, - out_channels, - kernel_size, - stride=1, - groups=1, - is_vd_mode=False, - act=None, - name=None, ): - super(ConvBNLayer, self).__init__() - - self.is_vd_mode = is_vd_mode - self._pool2d_avg = nn.AvgPool2D( - kernel_size=2, stride=2, padding=0, ceil_mode=True) - self._conv = nn.Conv2D( - in_channels=in_channels, - out_channels=out_channels, - kernel_size=kernel_size, - stride=stride, - padding=(kernel_size - 1) // 2, - groups=groups, - weight_attr=ParamAttr(name=name + "_weights"), - bias_attr=False) - if name == "conv1": - bn_name = "bn_" + name - else: - bn_name = "bn" + name[3:] - self._batch_norm = nn.BatchNorm( - out_channels, - act=act, - param_attr=ParamAttr(name=bn_name + '_scale'), - bias_attr=ParamAttr(bn_name + '_offset'), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - - def forward(self, inputs): - if self.is_vd_mode: - inputs = self._pool2d_avg(inputs) - y = self._conv(inputs) - y = self._batch_norm(y) - return y - - -class BottleneckBlock(nn.Layer): - def __init__(self, - in_channels, - out_channels, - stride, - shortcut=True, - if_first=False, - name=None): - super(BottleneckBlock, self).__init__() - - self.conv0 = ConvBNLayer( - in_channels=in_channels, - out_channels=out_channels, - kernel_size=1, - act='relu', - name=name + "_branch2a") - self.conv1 = ConvBNLayer( - in_channels=out_channels, - out_channels=out_channels, - kernel_size=3, - stride=stride, - act='relu', - name=name + "_branch2b") - self.conv2 = ConvBNLayer( - in_channels=out_channels, - out_channels=out_channels * 4, - kernel_size=1, - act=None, - name=name + "_branch2c") - - if not shortcut: - self.short = ConvBNLayer( - in_channels=in_channels, - out_channels=out_channels * 4, - kernel_size=1, - stride=1, - is_vd_mode=False if if_first else True, - name=name + "_branch1") - - self.shortcut = shortcut - - def forward(self, inputs): - y = self.conv0(inputs) - conv1 = self.conv1(y) - conv2 = self.conv2(conv1) - - if self.shortcut: - short = inputs - else: - short = self.short(inputs) - y = paddle.add(x=short, y=conv2) - y = F.relu(y) - return y - - -class BasicBlock(nn.Layer): - def __init__(self, - in_channels, - out_channels, - stride, - shortcut=True, - if_first=False, - name=None): - super(BasicBlock, self).__init__() - self.stride = stride - self.conv0 = ConvBNLayer( - in_channels=in_channels, - out_channels=out_channels, - kernel_size=3, - stride=stride, - act='relu', - name=name + "_branch2a") - self.conv1 = ConvBNLayer( - in_channels=out_channels, - out_channels=out_channels, - kernel_size=3, - act=None, - name=name + "_branch2b") - - if not shortcut: - self.short = ConvBNLayer( - in_channels=in_channels, - out_channels=out_channels, - kernel_size=1, - stride=1, - is_vd_mode=False if if_first else True, - name=name + "_branch1") - - self.shortcut = shortcut - - def forward(self, inputs): - y = self.conv0(inputs) - conv1 = self.conv1(y) - - if self.shortcut: - short = inputs - else: - short = self.short(inputs) - y = paddle.add(x=short, y=conv1) - y = F.relu(y) - return y - - -class ResNet(nn.Layer): - def __init__(self, in_channels=3, layers=50, **kwargs): - super(ResNet, self).__init__() - - self.layers = layers - supported_layers = [18, 34, 50, 101, 152, 200] - assert layers in supported_layers, \ - "supported layers are {} but input layer is {}".format( - supported_layers, layers) - - if layers == 18: - depth = [2, 2, 2, 2] - elif layers == 34 or layers == 50: - depth = [3, 4, 6, 3] - elif layers == 101: - depth = [3, 4, 23, 3] - elif layers == 152: - depth = [3, 8, 36, 3] - elif layers == 200: - depth = [3, 12, 48, 3] - num_channels = [64, 256, 512, - 1024] if layers >= 50 else [64, 64, 128, 256] - num_filters = [64, 128, 256, 512] - - self.conv1_1 = ConvBNLayer( - in_channels=in_channels, - out_channels=32, - kernel_size=3, - stride=2, - act='relu', - name="conv1_1") - self.conv1_2 = ConvBNLayer( - in_channels=32, - out_channels=32, - kernel_size=3, - stride=1, - act='relu', - name="conv1_2") - self.conv1_3 = ConvBNLayer( - in_channels=32, - out_channels=64, - kernel_size=3, - stride=1, - act='relu', - name="conv1_3") - self.pool2d_max = nn.MaxPool2D(kernel_size=3, stride=2, padding=1) - - self.stages = [] - self.out_channels = [] - if layers >= 50: - for block in range(len(depth)): - block_list = [] - shortcut = False - for i in range(depth[block]): - if layers in [101, 152] and block == 2: - if i == 0: - conv_name = "res" + str(block + 2) + "a" - else: - conv_name = "res" + str(block + 2) + "b" + str(i) - else: - conv_name = "res" + str(block + 2) + chr(97 + i) - bottleneck_block = self.add_sublayer( - 'bb_%d_%d' % (block, i), - BottleneckBlock( - in_channels=num_channels[block] - if i == 0 else num_filters[block] * 4, - out_channels=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, - shortcut=shortcut, - if_first=block == i == 0, - name=conv_name)) - shortcut = True - block_list.append(bottleneck_block) - self.out_channels.append(num_filters[block] * 4) - self.stages.append(nn.Sequential(*block_list)) - else: - for block in range(len(depth)): - block_list = [] - shortcut = False - for i in range(depth[block]): - conv_name = "res" + str(block + 2) + chr(97 + i) - basic_block = self.add_sublayer( - 'bb_%d_%d' % (block, i), - BasicBlock( - in_channels=num_channels[block] - if i == 0 else num_filters[block], - out_channels=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, - shortcut=shortcut, - if_first=block == i == 0, - name=conv_name)) - shortcut = True - block_list.append(basic_block) - self.out_channels.append(num_filters[block]) - self.stages.append(nn.Sequential(*block_list)) - - def forward(self, inputs): - y = self.conv1_1(inputs) - y = self.conv1_2(y) - y = self.conv1_3(y) - y = self.pool2d_max(y) - out = [] - for block in self.stages: - y = block(y) - out.append(y) - return out diff --git a/ppocr/modeling/heads/rec_att_head.py b/ppocr/modeling/heads/rec_att_head.py index 6d77e42eb5def579052687ab6fdc265159311884..ab8b119fe08ac79a6b98a449bb117da018df2ff3 100644 --- a/ppocr/modeling/heads/rec_att_head.py +++ b/ppocr/modeling/heads/rec_att_head.py @@ -53,7 +53,6 @@ class AttentionHead(nn.Layer): output_hiddens.append(paddle.unsqueeze(outputs, axis=1)) output = paddle.concat(output_hiddens, axis=1) probs = self.generator(output) - else: targets = paddle.zeros(shape=[batch_size], dtype="int32") probs = None @@ -75,6 +74,7 @@ class AttentionHead(nn.Layer): probs_step, axis=1)], axis=1) next_input = probs_step.argmax(axis=1) targets = next_input + if not self.training: probs = paddle.nn.functional.softmax(probs, axis=2) return probs diff --git a/ppocr/modeling/transforms/tps_spatial_transformer.py b/ppocr/modeling/transforms/tps_spatial_transformer.py index 4db34f7b4833c1c9b2901c68899bfb294b5843c4..043bb56b8a526c12b2e0799bf41e128c6499c1fc 100644 --- a/ppocr/modeling/transforms/tps_spatial_transformer.py +++ b/ppocr/modeling/transforms/tps_spatial_transformer.py @@ -53,7 +53,7 @@ def compute_partial_repr(input_points, control_points): 1] repr_matrix = 0.5 * pairwise_dist * paddle.log(pairwise_dist) # fix numerical error for 0 * log(0), substitute all nan with 0 - mask = repr_matrix != repr_matrix + mask = np.array(repr_matrix != repr_matrix) repr_matrix[mask] = 0 return repr_matrix diff --git a/ppocr/postprocess/east_postprocess.py b/ppocr/postprocess/east_postprocess.py index ceee727aa3df052041aee925c6c856773c8a288e..c194c81c6911aac0f9210109c37b76b44532e9c4 100755 --- a/ppocr/postprocess/east_postprocess.py +++ b/ppocr/postprocess/east_postprocess.py @@ -29,6 +29,7 @@ class EASTPostProcess(object): """ The post process for EAST. """ + def __init__(self, score_thresh=0.8, cover_thresh=0.1, @@ -38,11 +39,6 @@ class EASTPostProcess(object): self.score_thresh = score_thresh self.cover_thresh = cover_thresh self.nms_thresh = nms_thresh - - # c++ la-nms is faster, but only support python 3.5 - self.is_python35 = False - if sys.version_info.major == 3 and sys.version_info.minor == 5: - self.is_python35 = True def restore_rectangle_quad(self, origin, geometry): """ @@ -64,6 +60,7 @@ class EASTPostProcess(object): """ restore text boxes from score map and geo map """ + score_map = score_map[0] geo_map = np.swapaxes(geo_map, 1, 0) geo_map = np.swapaxes(geo_map, 1, 2) @@ -79,10 +76,14 @@ class EASTPostProcess(object): boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32) boxes[:, :8] = text_box_restored.reshape((-1, 8)) boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]] - if self.is_python35: + + try: import lanms boxes = lanms.merge_quadrangle_n9(boxes, nms_thresh) - else: + except: + print( + 'you should install lanms by pip3 install lanms-nova to speed up nms_locality' + ) boxes = nms_locality(boxes.astype(np.float64), nms_thresh) if boxes.shape[0] == 0: return [] @@ -139,4 +140,4 @@ class EASTPostProcess(object): continue boxes_norm.append(box) dt_boxes_list.append({'points': np.array(boxes_norm)}) - return dt_boxes_list \ No newline at end of file + return dt_boxes_list diff --git a/ppocr/utils/save_load.py b/ppocr/utils/save_load.py index 702f3e9770d0572e9128357bfd6b39199566a959..f6013a406634ed110ea5af613a5f31e56ce90ead 100644 --- a/ppocr/utils/save_load.py +++ b/ppocr/utils/save_load.py @@ -54,14 +54,37 @@ def load_model(config, model, optimizer=None): pretrained_model = global_config.get('pretrained_model') best_model_dict = {} if checkpoints: - if checkpoints.endswith('pdparams'): + if checkpoints.endswith('.pdparams'): checkpoints = checkpoints.replace('.pdparams', '') - assert os.path.exists(checkpoints + ".pdopt"), \ - f"The {checkpoints}.pdopt does not exists!" - load_pretrained_params(model, checkpoints) - optim_dict = paddle.load(checkpoints + '.pdopt') + assert os.path.exists(checkpoints + ".pdparams"), \ + "The {}.pdparams does not exists!".format(checkpoints) + + # load params from trained model + params = paddle.load(checkpoints + '.pdparams') + state_dict = model.state_dict() + new_state_dict = {} + for key, value in state_dict.items(): + if key not in params: + logger.warning("{} not in loaded params {} !".format( + key, params.keys())) + continue + pre_value = params[key] + if list(value.shape) == list(pre_value.shape): + new_state_dict[key] = pre_value + else: + logger.warning( + "The shape of model params {} {} not matched with loaded params shape {} !". + format(key, value.shape, pre_value.shape)) + model.set_state_dict(new_state_dict) + if optimizer is not None: - optimizer.set_state_dict(optim_dict) + if os.path.exists(checkpoints + '.pdopt'): + optim_dict = paddle.load(checkpoints + '.pdopt') + optimizer.set_state_dict(optim_dict) + else: + logger.warning( + "{}.pdopt is not exists, params of optimizer is not loaded". + format(checkpoints)) if os.path.exists(checkpoints + '.states'): with open(checkpoints + '.states', 'rb') as f: @@ -80,10 +103,10 @@ def load_model(config, model, optimizer=None): def load_pretrained_params(model, path): logger = get_logger() - if path.endswith('pdparams'): + if path.endswith('.pdparams'): path = path.replace('.pdparams', '') assert os.path.exists(path + ".pdparams"), \ - f"The {path}.pdparams does not exists!" + "The {}.pdparams does not exists!".format(path) params = paddle.load(path + '.pdparams') state_dict = model.state_dict() @@ -92,11 +115,11 @@ def load_pretrained_params(model, path): if list(state_dict[k1].shape) == list(params[k2].shape): new_state_dict[k1] = params[k2] else: - logger.info( - f"The shape of model params {k1} {state_dict[k1].shape} not matched with loaded params {k2} {params[k2].shape} !" - ) + logger.warning( + "The shape of model params {} {} not matched with loaded params {} {} !". + format(k1, state_dict[k1].shape, k2, params[k2].shape)) model.set_state_dict(new_state_dict) - logger.info(f"load pretrain successful from {path}") + logger.info("load pretrain successful from {}".format(path)) return model diff --git a/ppstructure/vqa/README.md b/ppstructure/vqa/README.md new file mode 100644 index 0000000000000000000000000000000000000000..8d117fdeb16e1c0e90bf6ec89924e414fc764249 --- /dev/null +++ b/ppstructure/vqa/README.md @@ -0,0 +1,182 @@ +# 视觉问答(VQA) + +VQA主要特性如下: + +- 集成[LayoutXLM](https://arxiv.org/pdf/2104.08836.pdf)模型以及PP-OCR预测引擎。 +- 支持基于多模态方法的语义实体识别 (Semantic Entity Recognition, SER) 以及关系抽取 (Relation Extraction, RE) 任务。基于 SER 任务,可以完成对图像中的文本识别与分类;基于 RE 任务,可以完成对图象中的文本内容的关系提取(比如判断问题对) +- 支持SER任务与OCR引擎联合的端到端系统预测与评估。 +- 支持SER任务和RE任务的自定义训练 + + +本项目是 [LayoutXLM: Multimodal Pre-training for Multilingual Visually-rich Document Understanding](https://arxiv.org/pdf/2104.08836.pdf) 在 Paddle 2.2上的开源实现, +包含了在 [XFUND数据集](https://github.com/doc-analysis/XFUND) 上的微调代码。 + +## 1. 效果演示 + +**注意:** 测试图片来源于XFUN数据集。 + +### 1.1 SER + +
+ +
+ +
+ +
+ +其中不同颜色的框表示不同的类别,对于XFUN数据集,有`QUESTION`, `ANSWER`, `HEADER` 3种类别,在OCR检测框的左上方也标出了对应的类别和OCR识别结果。 + + +### 1.2 RE + +* Coming soon! + + + +## 2. 安装 + +### 2.1 安装依赖 + +- **(1) 安装PaddlePaddle** + +```bash +pip3 install --upgrade pip + +# GPU安装 +python3 -m pip install paddlepaddle-gpu==2.2 -i https://mirror.baidu.com/pypi/simple + +# CPU安装 +python3 -m pip install paddlepaddle==2.2 -i https://mirror.baidu.com/pypi/simple + +``` +更多需求,请参照[安装文档](https://www.paddlepaddle.org.cn/install/quick)中的说明进行操作。 + + +### 2.2 安装PaddleOCR(包含 PP-OCR 和 VQA ) + +- **(1)pip快速安装PaddleOCR whl包(仅预测)** + +```bash +pip install "paddleocr>=2.2" # 推荐使用2.2+版本 +``` + +- **(2)下载VQA源码(预测+训练)** + +```bash +【推荐】git clone https://github.com/PaddlePaddle/PaddleOCR + +# 如果因为网络问题无法pull成功,也可选择使用码云上的托管: +git clone https://gitee.com/paddlepaddle/PaddleOCR + +# 注:码云托管代码可能无法实时同步本github项目更新,存在3~5天延时,请优先使用推荐方式。 +``` + +- **(3)安装PaddleNLP** + +```bash +# 需要使用PaddleNLP最新的代码版本进行安装 +git clone https://github.com/PaddlePaddle/PaddleNLP -b develop +cd PaddleNLP +pip install -e . +``` + + +- **(4)安装VQA的`requirements`** + +```bash +pip install -r requirements.txt +``` + +## 3. 使用 + + +### 3.1 数据和预训练模型准备 + +处理好的XFUN中文数据集下载地址:[https://paddleocr.bj.bcebos.com/dataset/XFUND.tar](https://paddleocr.bj.bcebos.com/dataset/XFUND.tar)。 + + +下载并解压该数据集,解压后将数据集放置在当前目录下。 + +```shell +wget https://paddleocr.bj.bcebos.com/dataset/XFUND.tar +``` + +如果希望转换XFUN中其他语言的数据集,可以参考[XFUN数据转换脚本](helper/trans_xfun_data.py)。 + +如果希望直接体验预测过程,可以下载我们提供的SER预训练模型,跳过训练过程,直接预测即可。 + +* SER任务预训练模型下载链接:[链接](https://paddleocr.bj.bcebos.com/pplayout/PP-Layout_v1.0_ser_pretrained.tar) +* RE任务预训练模型下载链接:coming soon! + + +### 3.2 SER任务 + +* 启动训练 + +```shell +python train_ser.py \ + --model_name_or_path "layoutxlm-base-uncased" \ + --train_data_dir "XFUND/zh_train/image" \ + --train_label_path "XFUND/zh_train/xfun_normalize_train.json" \ + --eval_data_dir "XFUND/zh_val/image" \ + --eval_label_path "XFUND/zh_val/xfun_normalize_val.json" \ + --num_train_epochs 200 \ + --eval_steps 10 \ + --save_steps 500 \ + --output_dir "./output/ser/" \ + --learning_rate 5e-5 \ + --warmup_steps 50 \ + --evaluate_during_training \ + --seed 2048 +``` + +最终会打印出`precision`, `recall`, `f1`等指标,如下所示。 + +``` +best metrics: {'loss': 1.066644651549203, 'precision': 0.8770182068017863, 'recall': 0.9361936193619362, 'f1': 0.9056402979780063} +``` + +模型和训练日志会保存在`./output/ser/`文件夹中。 + +* 使用评估集合中提供的OCR识别结果进行预测 + +```shell +export CUDA_VISIBLE_DEVICES=0 +python3.7 infer_ser.py \ + --model_name_or_path "./PP-Layout_v1.0_ser_pretrained/" \ + --output_dir "output_res/" \ + --infer_imgs "XFUND/zh_val/image/" \ + --ocr_json_path "XFUND/zh_val/xfun_normalize_val.json" +``` + +最终会在`output_res`目录下保存预测结果可视化图像以及预测结果文本文件,文件名为`infer_results.txt`。 + +* 使用`OCR引擎 + SER`串联结果 + +```shell +export CUDA_VISIBLE_DEVICES=0 +python3.7 infer_ser_e2e.py \ + --model_name_or_path "./output/PP-Layout_v1.0_ser_pretrained/" \ + --max_seq_length 512 \ + --output_dir "output_res_e2e/" +``` + +* 对`OCR引擎 + SER`预测系统进行端到端评估 + +```shell +export CUDA_VISIBLE_DEVICES=0 +python helper/eval_with_label_end2end.py --gt_json_path XFUND/zh_val/xfun_normalize_val.json --pred_json_path output_res/infer_results.txt +``` + + +3.3 RE任务 + +coming soon! + + +## 参考链接 + +- LayoutXLM: Multimodal Pre-training for Multilingual Visually-rich Document Understanding, https://arxiv.org/pdf/2104.08836.pdf +- microsoft/unilm/layoutxlm, https://github.com/microsoft/unilm/tree/master/layoutxlm +- XFUND dataset, https://github.com/doc-analysis/XFUND diff --git a/ppstructure/vqa/helper/eval_with_label_end2end.py b/ppstructure/vqa/helper/eval_with_label_end2end.py new file mode 100644 index 0000000000000000000000000000000000000000..c8dd3e0ad437e51e21ebc53daeec9fdf9aa76b63 --- /dev/null +++ b/ppstructure/vqa/helper/eval_with_label_end2end.py @@ -0,0 +1,262 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import re +import sys +# import Polygon +import shapely +from shapely.geometry import Polygon +import numpy as np +from collections import defaultdict +import operator +import editdistance +import argparse +import json +import copy + + +def parse_ser_results_fp(fp, fp_type="gt", ignore_background=True): + # img/zh_val_0.jpg { + # "height": 3508, + # "width": 2480, + # "ocr_info": [ + # {"text": "Maribyrnong", "label": "other", "bbox": [1958, 144, 2184, 198]}, + # {"text": "CITYCOUNCIL", "label": "other", "bbox": [2052, 183, 2171, 214]}, + # ] + assert fp_type in ["gt", "pred"] + key = "label" if fp_type == "gt" else "pred" + res_dict = dict() + with open(fp, "r") as fin: + lines = fin.readlines() + + for _, line in enumerate(lines): + img_path, info = line.strip().split("\t") + # get key + image_name = os.path.basename(img_path) + res_dict[image_name] = [] + # get infos + json_info = json.loads(info) + for single_ocr_info in json_info["ocr_info"]: + label = single_ocr_info[key].upper() + if label in ["O", "OTHERS", "OTHER"]: + label = "O" + if ignore_background and label == "O": + continue + single_ocr_info["label"] = label + res_dict[image_name].append(copy.deepcopy(single_ocr_info)) + return res_dict + + +def polygon_from_str(polygon_points): + """ + Create a shapely polygon object from gt or dt line. + """ + polygon_points = np.array(polygon_points).reshape(4, 2) + polygon = Polygon(polygon_points).convex_hull + return polygon + + +def polygon_iou(poly1, poly2): + """ + Intersection over union between two shapely polygons. + """ + if not poly1.intersects( + poly2): # this test is fast and can accelerate calculation + iou = 0 + else: + try: + inter_area = poly1.intersection(poly2).area + union_area = poly1.area + poly2.area - inter_area + iou = float(inter_area) / union_area + except shapely.geos.TopologicalError: + # except Exception as e: + # print(e) + print('shapely.geos.TopologicalError occured, iou set to 0') + iou = 0 + return iou + + +def ed(args, str1, str2): + if args.ignore_space: + str1 = str1.replace(" ", "") + str2 = str2.replace(" ", "") + if args.ignore_case: + str1 = str1.lower() + str2 = str2.lower() + return editdistance.eval(str1, str2) + + +def convert_bbox_to_polygon(bbox): + """ + bbox : [x1, y1, x2, y2] + output: [[x1, y1], [x2, y2], [x3, y3], [x4, y4]] + """ + xmin, ymin, xmax, ymax = bbox + poly = [[xmin, ymin], [xmax, ymin], [xmax, ymax], [xmin, ymax]] + return poly + + +def eval_e2e(args): + # gt + gt_results = parse_ser_results_fp(args.gt_json_path, "gt", + args.ignore_background) + # pred + dt_results = parse_ser_results_fp(args.pred_json_path, "pred", + args.ignore_background) + assert set(gt_results.keys()) == set(dt_results.keys()) + + iou_thresh = args.iou_thres + num_gt_chars = 0 + gt_count = 0 + dt_count = 0 + hit = 0 + ed_sum = 0 + + for img_name in gt_results: + gt_info = gt_results[img_name] + gt_count += len(gt_info) + + dt_info = dt_results[img_name] + dt_count += len(dt_info) + + dt_match = [False] * len(dt_info) + gt_match = [False] * len(gt_info) + + all_ious = defaultdict(tuple) + # gt: {text, label, bbox or poly} + for index_gt, gt in enumerate(gt_info): + if "poly" not in gt: + gt["poly"] = convert_bbox_to_polygon(gt["bbox"]) + gt_poly = polygon_from_str(gt["poly"]) + for index_dt, dt in enumerate(dt_info): + if "poly" not in dt: + dt["poly"] = convert_bbox_to_polygon(dt["bbox"]) + dt_poly = polygon_from_str(dt["poly"]) + iou = polygon_iou(dt_poly, gt_poly) + if iou >= iou_thresh: + all_ious[(index_gt, index_dt)] = iou + sorted_ious = sorted( + all_ious.items(), key=operator.itemgetter(1), reverse=True) + sorted_gt_dt_pairs = [item[0] for item in sorted_ious] + + # matched gt and dt + for gt_dt_pair in sorted_gt_dt_pairs: + index_gt, index_dt = gt_dt_pair + if gt_match[index_gt] == False and dt_match[index_dt] == False: + gt_match[index_gt] = True + dt_match[index_dt] = True + # ocr rec results + gt_text = gt_info[index_gt]["text"] + dt_text = dt_info[index_dt]["text"] + + # ser results + gt_label = gt_info[index_gt]["label"] + dt_label = dt_info[index_dt]["pred"] + + if True: # ignore_masks[index_gt] == '0': + ed_sum += ed(args, gt_text, dt_text) + num_gt_chars += len(gt_text) + if gt_text == dt_text: + if args.ignore_ser_prediction or gt_label == dt_label: + hit += 1 + +# unmatched dt + for tindex, dt_match_flag in enumerate(dt_match): + if dt_match_flag == False: + dt_text = dt_info[tindex]["text"] + gt_text = "" + ed_sum += ed(args, dt_text, gt_text) + +# unmatched gt + for tindex, gt_match_flag in enumerate(gt_match): + if gt_match_flag == False: + dt_text = "" + gt_text = gt_info[tindex]["text"] + ed_sum += ed(args, gt_text, dt_text) + num_gt_chars += len(gt_text) + + eps = 1e-9 + print("config: ", args) + print('hit, dt_count, gt_count', hit, dt_count, gt_count) + precision = hit / (dt_count + eps) + recall = hit / (gt_count + eps) + fmeasure = 2.0 * precision * recall / (precision + recall + eps) + avg_edit_dist_img = ed_sum / len(gt_results) + avg_edit_dist_field = ed_sum / (gt_count + eps) + character_acc = 1 - ed_sum / (num_gt_chars + eps) + + print('character_acc: %.2f' % (character_acc * 100) + "%") + print('avg_edit_dist_field: %.2f' % (avg_edit_dist_field)) + print('avg_edit_dist_img: %.2f' % (avg_edit_dist_img)) + print('precision: %.2f' % (precision * 100) + "%") + print('recall: %.2f' % (recall * 100) + "%") + print('fmeasure: %.2f' % (fmeasure * 100) + "%") + + return + + +def parse_args(): + """ + """ + + def str2bool(v): + return v.lower() in ("true", "t", "1") + + parser = argparse.ArgumentParser() + ## Required parameters + parser.add_argument( + "--gt_json_path", + default=None, + type=str, + required=True, ) + parser.add_argument( + "--pred_json_path", + default=None, + type=str, + required=True, ) + + parser.add_argument("--iou_thres", default=0.5, type=float) + + parser.add_argument( + "--ignore_case", + default=False, + type=str2bool, + help="whether to do lower case for the strs") + + parser.add_argument( + "--ignore_space", + default=True, + type=str2bool, + help="whether to ignore space") + + parser.add_argument( + "--ignore_background", + default=True, + type=str2bool, + help="whether to ignore other label") + + parser.add_argument( + "--ignore_ser_prediction", + default=False, + type=str2bool, + help="whether to ignore ocr pred results") + + args = parser.parse_args() + return args + + +if __name__ == '__main__': + args = parse_args() + eval_e2e(args) diff --git a/ppstructure/vqa/helper/trans_xfun_data.py b/ppstructure/vqa/helper/trans_xfun_data.py new file mode 100644 index 0000000000000000000000000000000000000000..b5ebd5dfbd8addda0701a7cfd2387133f7a8776b --- /dev/null +++ b/ppstructure/vqa/helper/trans_xfun_data.py @@ -0,0 +1,52 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json + + +def transfer_xfun_data(json_path=None, output_file=None): + with open(json_path, "r") as fin: + lines = fin.readlines() + + json_info = json.loads(lines[0]) + documents = json_info["documents"] + label_info = {} + with open(output_file, "w") as fout: + for idx, document in enumerate(documents): + img_info = document["img"] + document = document["document"] + image_path = img_info["fname"] + + label_info["height"] = img_info["height"] + label_info["width"] = img_info["width"] + + label_info["ocr_info"] = [] + + for doc in document: + label_info["ocr_info"].append({ + "text": doc["text"], + "label": doc["label"], + "bbox": doc["box"], + "id": doc["id"], + "linking": doc["linking"], + "words": doc["words"] + }) + + fout.write(image_path + "\t" + json.dumps( + label_info, ensure_ascii=False) + "\n") + + print("===ok====") + + +transfer_xfun_data("./xfun/zh.val.json", "./xfun_normalize_val.json") diff --git a/ppstructure/vqa/images/input/zh_val_0.jpg b/ppstructure/vqa/images/input/zh_val_0.jpg new file mode 100644 index 0000000000000000000000000000000000000000..479b60bcd3a859b187ce5325dfc381c1b87ee27f Binary files /dev/null and b/ppstructure/vqa/images/input/zh_val_0.jpg differ diff --git a/ppstructure/vqa/images/input/zh_val_42.jpg b/ppstructure/vqa/images/input/zh_val_42.jpg new file mode 100644 index 0000000000000000000000000000000000000000..42151bdd94929ede9da1a63ce8d9339971094a46 Binary files /dev/null and b/ppstructure/vqa/images/input/zh_val_42.jpg differ diff --git a/ppstructure/vqa/images/result_ser/zh_val_0_ser.jpg b/ppstructure/vqa/images/result_ser/zh_val_0_ser.jpg new file mode 100644 index 0000000000000000000000000000000000000000..22ba9a6f1b7652ca9ce6848093c7a39affb4886b Binary files /dev/null and b/ppstructure/vqa/images/result_ser/zh_val_0_ser.jpg differ diff --git a/ppstructure/vqa/images/result_ser/zh_val_42_ser.jpg b/ppstructure/vqa/images/result_ser/zh_val_42_ser.jpg new file mode 100644 index 0000000000000000000000000000000000000000..951864e5f35a987ff241f276c8da523d8c8eeaf3 Binary files /dev/null and b/ppstructure/vqa/images/result_ser/zh_val_42_ser.jpg differ diff --git a/ppstructure/vqa/infer_ser.py b/ppstructure/vqa/infer_ser.py new file mode 100644 index 0000000000000000000000000000000000000000..4ad220094a26b330555fbe9122a46fb56e64fe1e --- /dev/null +++ b/ppstructure/vqa/infer_ser.py @@ -0,0 +1,279 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import json +import cv2 +import numpy as np +from copy import deepcopy + +import paddle + +# relative reference +from utils import parse_args, get_image_file_list, draw_ser_results, get_bio_label_maps +from paddlenlp.transformers import LayoutXLMModel, LayoutXLMTokenizer, LayoutXLMForTokenClassification + + +def pad_sentences(tokenizer, + encoded_inputs, + max_seq_len=512, + pad_to_max_seq_len=True, + return_attention_mask=True, + return_token_type_ids=True, + return_overflowing_tokens=False, + return_special_tokens_mask=False): + # Padding with larger size, reshape is carried out + max_seq_len = ( + len(encoded_inputs["input_ids"]) // max_seq_len + 1) * max_seq_len + + needs_to_be_padded = pad_to_max_seq_len and \ + max_seq_len and len(encoded_inputs["input_ids"]) < max_seq_len + + if needs_to_be_padded: + difference = max_seq_len - len(encoded_inputs["input_ids"]) + if tokenizer.padding_side == 'right': + if return_attention_mask: + encoded_inputs["attention_mask"] = [1] * len(encoded_inputs[ + "input_ids"]) + [0] * difference + if return_token_type_ids: + encoded_inputs["token_type_ids"] = ( + encoded_inputs["token_type_ids"] + + [tokenizer.pad_token_type_id] * difference) + if return_special_tokens_mask: + encoded_inputs["special_tokens_mask"] = encoded_inputs[ + "special_tokens_mask"] + [1] * difference + encoded_inputs["input_ids"] = encoded_inputs[ + "input_ids"] + [tokenizer.pad_token_id] * difference + encoded_inputs["bbox"] = encoded_inputs["bbox"] + [[0, 0, 0, 0] + ] * difference + else: + assert False, f"padding_side of tokenizer just supports [\"right\"] but got {tokenizer.padding_side}" + else: + if return_attention_mask: + encoded_inputs["attention_mask"] = [1] * len(encoded_inputs[ + "input_ids"]) + + return encoded_inputs + + +def split_page(encoded_inputs, max_seq_len=512): + """ + truncate is often used in training process + """ + for key in encoded_inputs: + encoded_inputs[key] = paddle.to_tensor(encoded_inputs[key]) + if encoded_inputs[key].ndim <= 1: # for input_ids, att_mask and so on + encoded_inputs[key] = encoded_inputs[key].reshape([-1, max_seq_len]) + else: # for bbox + encoded_inputs[key] = encoded_inputs[key].reshape( + [-1, max_seq_len, 4]) + return encoded_inputs + + +def preprocess( + tokenizer, + ori_img, + ocr_info, + img_size=(224, 224), + pad_token_label_id=-100, + max_seq_len=512, + add_special_ids=False, + return_attention_mask=True, ): + ocr_info = deepcopy(ocr_info) + height = ori_img.shape[0] + width = ori_img.shape[1] + + img = cv2.resize(ori_img, + (224, 224)).transpose([2, 0, 1]).astype(np.float32) + + segment_offset_id = [] + words_list = [] + bbox_list = [] + input_ids_list = [] + token_type_ids_list = [] + + for info in ocr_info: + # x1, y1, x2, y2 + bbox = info["bbox"] + bbox[0] = int(bbox[0] * 1000.0 / width) + bbox[2] = int(bbox[2] * 1000.0 / width) + bbox[1] = int(bbox[1] * 1000.0 / height) + bbox[3] = int(bbox[3] * 1000.0 / height) + + text = info["text"] + encode_res = tokenizer.encode( + text, pad_to_max_seq_len=False, return_attention_mask=True) + + if not add_special_ids: + # TODO: use tok.all_special_ids to remove + encode_res["input_ids"] = encode_res["input_ids"][1:-1] + encode_res["token_type_ids"] = encode_res["token_type_ids"][1:-1] + encode_res["attention_mask"] = encode_res["attention_mask"][1:-1] + + input_ids_list.extend(encode_res["input_ids"]) + token_type_ids_list.extend(encode_res["token_type_ids"]) + bbox_list.extend([bbox] * len(encode_res["input_ids"])) + words_list.append(text) + segment_offset_id.append(len(input_ids_list)) + + encoded_inputs = { + "input_ids": input_ids_list, + "token_type_ids": token_type_ids_list, + "bbox": bbox_list, + "attention_mask": [1] * len(input_ids_list), + } + + encoded_inputs = pad_sentences( + tokenizer, + encoded_inputs, + max_seq_len=max_seq_len, + return_attention_mask=return_attention_mask) + + encoded_inputs = split_page(encoded_inputs) + + fake_bs = encoded_inputs["input_ids"].shape[0] + + encoded_inputs["image"] = paddle.to_tensor(img).unsqueeze(0).expand( + [fake_bs] + list(img.shape)) + + encoded_inputs["segment_offset_id"] = segment_offset_id + + return encoded_inputs + + +def postprocess(attention_mask, preds, label_map_path): + if isinstance(preds, paddle.Tensor): + preds = preds.numpy() + preds = np.argmax(preds, axis=2) + + _, label_map = get_bio_label_maps(label_map_path) + + preds_list = [[] for _ in range(preds.shape[0])] + + # keep batch info + for i in range(preds.shape[0]): + for j in range(preds.shape[1]): + if attention_mask[i][j] == 1: + preds_list[i].append(label_map[preds[i][j]]) + + return preds_list + + +def merge_preds_list_with_ocr_info(label_map_path, ocr_info, segment_offset_id, + preds_list): + # must ensure the preds_list is generated from the same image + preds = [p for pred in preds_list for p in pred] + label2id_map, _ = get_bio_label_maps(label_map_path) + for key in label2id_map: + if key.startswith("I-"): + label2id_map[key] = label2id_map["B" + key[1:]] + + id2label_map = dict() + for key in label2id_map: + val = label2id_map[key] + if key == "O": + id2label_map[val] = key + if key.startswith("B-") or key.startswith("I-"): + id2label_map[val] = key[2:] + else: + id2label_map[val] = key + + for idx in range(len(segment_offset_id)): + if idx == 0: + start_id = 0 + else: + start_id = segment_offset_id[idx - 1] + + end_id = segment_offset_id[idx] + + curr_pred = preds[start_id:end_id] + curr_pred = [label2id_map[p] for p in curr_pred] + + if len(curr_pred) <= 0: + pred_id = 0 + else: + counts = np.bincount(curr_pred) + pred_id = np.argmax(counts) + ocr_info[idx]["pred_id"] = int(pred_id) + ocr_info[idx]["pred"] = id2label_map[pred_id] + return ocr_info + + +@paddle.no_grad() +def infer(args): + os.makedirs(args.output_dir, exist_ok=True) + + # init token and model + tokenizer = LayoutXLMTokenizer.from_pretrained(args.model_name_or_path) + # model = LayoutXLMModel.from_pretrained(args.model_name_or_path) + model = LayoutXLMForTokenClassification.from_pretrained( + args.model_name_or_path) + model.eval() + + # load ocr results json + ocr_results = dict() + with open(args.ocr_json_path, "r") as fin: + lines = fin.readlines() + for line in lines: + img_name, json_info = line.split("\t") + ocr_results[os.path.basename(img_name)] = json.loads(json_info) + + # get infer img list + infer_imgs = get_image_file_list(args.infer_imgs) + + # loop for infer + with open(os.path.join(args.output_dir, "infer_results.txt"), "w") as fout: + for idx, img_path in enumerate(infer_imgs): + print("process: [{}/{}]".format(idx, len(infer_imgs), img_path)) + + img = cv2.imread(img_path) + + ocr_info = ocr_results[os.path.basename(img_path)]["ocr_info"] + inputs = preprocess( + tokenizer=tokenizer, + ori_img=img, + ocr_info=ocr_info, + max_seq_len=args.max_seq_length) + + outputs = model( + input_ids=inputs["input_ids"], + bbox=inputs["bbox"], + image=inputs["image"], + token_type_ids=inputs["token_type_ids"], + attention_mask=inputs["attention_mask"]) + + preds = outputs[0] + preds = postprocess(inputs["attention_mask"], preds, + args.label_map_path) + ocr_info = merge_preds_list_with_ocr_info( + args.label_map_path, ocr_info, inputs["segment_offset_id"], + preds) + + fout.write(img_path + "\t" + json.dumps( + { + "ocr_info": ocr_info, + }, ensure_ascii=False) + "\n") + + img_res = draw_ser_results(img, ocr_info) + cv2.imwrite( + os.path.join(args.output_dir, os.path.basename(img_path)), + img_res) + + return + + +if __name__ == "__main__": + args = parse_args() + infer(args) diff --git a/ppstructure/vqa/infer_ser_e2e.py b/ppstructure/vqa/infer_ser_e2e.py new file mode 100644 index 0000000000000000000000000000000000000000..da027a140bdb4fa12a40d423998d94e438a7cd11 --- /dev/null +++ b/ppstructure/vqa/infer_ser_e2e.py @@ -0,0 +1,121 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import json +import cv2 +import numpy as np +from copy import deepcopy +from PIL import Image + +import paddle +from paddlenlp.transformers import LayoutXLMModel, LayoutXLMTokenizer, LayoutXLMForTokenClassification + +# relative reference +from utils import parse_args, get_image_file_list, draw_ser_results, get_bio_label_maps, build_ocr_engine + +from utils import pad_sentences, split_page, preprocess, postprocess, merge_preds_list_with_ocr_info + + +def trans_poly_to_bbox(poly): + x1 = np.min([p[0] for p in poly]) + x2 = np.max([p[0] for p in poly]) + y1 = np.min([p[1] for p in poly]) + y2 = np.max([p[1] for p in poly]) + return [x1, y1, x2, y2] + + +def parse_ocr_info_for_ser(ocr_result): + ocr_info = [] + for res in ocr_result: + ocr_info.append({ + "text": res[1][0], + "bbox": trans_poly_to_bbox(res[0]), + "poly": res[0], + }) + return ocr_info + + +@paddle.no_grad() +def infer(args): + os.makedirs(args.output_dir, exist_ok=True) + + # init token and model + tokenizer = LayoutXLMTokenizer.from_pretrained(args.model_name_or_path) + model = LayoutXLMForTokenClassification.from_pretrained( + args.model_name_or_path) + model.eval() + + label2id_map, id2label_map = get_bio_label_maps(args.label_map_path) + label2id_map_for_draw = dict() + for key in label2id_map: + if key.startswith("I-"): + label2id_map_for_draw[key] = label2id_map["B" + key[1:]] + else: + label2id_map_for_draw[key] = label2id_map[key] + + # get infer img list + infer_imgs = get_image_file_list(args.infer_imgs) + + ocr_engine = build_ocr_engine(args.ocr_rec_model_dir, + args.ocr_det_model_dir) + + # loop for infer + with open(os.path.join(args.output_dir, "infer_results.txt"), "w") as fout: + for idx, img_path in enumerate(infer_imgs): + print("process: [{}/{}]".format(idx, len(infer_imgs), img_path)) + + img = cv2.imread(img_path) + + ocr_result = ocr_engine.ocr(img_path, cls=False) + + ocr_info = parse_ocr_info_for_ser(ocr_result) + + inputs = preprocess( + tokenizer=tokenizer, + ori_img=img, + ocr_info=ocr_info, + max_seq_len=args.max_seq_length) + + outputs = model( + input_ids=inputs["input_ids"], + bbox=inputs["bbox"], + image=inputs["image"], + token_type_ids=inputs["token_type_ids"], + attention_mask=inputs["attention_mask"]) + + preds = outputs[0] + preds = postprocess(inputs["attention_mask"], preds, id2label_map) + ocr_info = merge_preds_list_with_ocr_info( + ocr_info, inputs["segment_offset_id"], preds, + label2id_map_for_draw) + + fout.write(img_path + "\t" + json.dumps( + { + "ocr_info": ocr_info, + }, ensure_ascii=False) + "\n") + + img_res = draw_ser_results(img, ocr_info) + cv2.imwrite( + os.path.join(args.output_dir, + os.path.splitext(os.path.basename(img_path))[0] + + "_ser.jpg"), img_res) + + return + + +if __name__ == "__main__": + args = parse_args() + infer(args) diff --git a/ppstructure/vqa/labels/labels_ser.txt b/ppstructure/vqa/labels/labels_ser.txt new file mode 100644 index 0000000000000000000000000000000000000000..508e48112412f62538baf0c78bcf99ec8945196e --- /dev/null +++ b/ppstructure/vqa/labels/labels_ser.txt @@ -0,0 +1,3 @@ +QUESTION +ANSWER +HEADER diff --git a/ppstructure/vqa/requirements.txt b/ppstructure/vqa/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..c259fadc395335b336cb0ecdb5aa6bca48631987 --- /dev/null +++ b/ppstructure/vqa/requirements.txt @@ -0,0 +1,2 @@ +sentencepiece +yacs diff --git a/ppstructure/vqa/train_ser.py b/ppstructure/vqa/train_ser.py new file mode 100644 index 0000000000000000000000000000000000000000..90ca69d93fd22983533fcacd639bbd64dc3e11ec --- /dev/null +++ b/ppstructure/vqa/train_ser.py @@ -0,0 +1,313 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import os +import random +import copy +import logging + +import argparse +import paddle +import numpy as np +from seqeval.metrics import classification_report, f1_score, precision_score, recall_score +from paddlenlp.transformers import LayoutXLMModel, LayoutXLMTokenizer, LayoutXLMForTokenClassification +from xfun import XFUNDataset +from utils import parse_args +from utils import get_bio_label_maps + +logger = logging.getLogger(__name__) + + +def set_seed(args): + random.seed(args.seed) + np.random.seed(args.seed) + paddle.seed(args.seed) + + +def train(args): + os.makedirs(args.output_dir, exist_ok=True) + logging.basicConfig( + filename=os.path.join(args.output_dir, "train.log") + if paddle.distributed.get_rank() == 0 else None, + format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", + datefmt="%m/%d/%Y %H:%M:%S", + level=logging.INFO + if paddle.distributed.get_rank() == 0 else logging.WARN, ) + + ch = logging.StreamHandler() + ch.setLevel(logging.DEBUG) + logger.addHandler(ch) + + label2id_map, id2label_map = get_bio_label_maps(args.label_map_path) + pad_token_label_id = paddle.nn.CrossEntropyLoss().ignore_index + + # dist mode + if paddle.distributed.get_world_size() > 1: + paddle.distributed.init_parallel_env() + + tokenizer = LayoutXLMTokenizer.from_pretrained(args.model_name_or_path) + base_model = LayoutXLMModel.from_pretrained(args.model_name_or_path) + model = LayoutXLMForTokenClassification( + base_model, num_classes=len(label2id_map), dropout=None) + + # dist mode + if paddle.distributed.get_world_size() > 1: + model = paddle.DataParallel(model) + + train_dataset = XFUNDataset( + tokenizer, + data_dir=args.train_data_dir, + label_path=args.train_label_path, + label2id_map=label2id_map, + img_size=(224, 224), + pad_token_label_id=pad_token_label_id, + contains_re=False, + add_special_ids=False, + return_attention_mask=True, + load_mode='all') + + train_sampler = paddle.io.DistributedBatchSampler( + train_dataset, batch_size=args.per_gpu_train_batch_size, shuffle=True) + + args.train_batch_size = args.per_gpu_train_batch_size * max( + 1, paddle.distributed.get_world_size()) + + train_dataloader = paddle.io.DataLoader( + train_dataset, + batch_sampler=train_sampler, + num_workers=0, + use_shared_memory=True, + collate_fn=None, ) + + t_total = len(train_dataloader) * args.num_train_epochs + + # build linear decay with warmup lr sch + lr_scheduler = paddle.optimizer.lr.PolynomialDecay( + learning_rate=args.learning_rate, + decay_steps=t_total, + end_lr=0.0, + power=1.0) + if args.warmup_steps > 0: + lr_scheduler = paddle.optimizer.lr.LinearWarmup( + lr_scheduler, + args.warmup_steps, + start_lr=0, + end_lr=args.learning_rate, ) + + optimizer = paddle.optimizer.AdamW( + learning_rate=lr_scheduler, + parameters=model.parameters(), + epsilon=args.adam_epsilon, + weight_decay=args.weight_decay) + + # Train! + logger.info("***** Running training *****") + logger.info(" Num examples = %d", len(train_dataset)) + logger.info(" Num Epochs = %d", args.num_train_epochs) + logger.info(" Instantaneous batch size per GPU = %d", + args.per_gpu_train_batch_size) + logger.info( + " Total train batch size (w. parallel, distributed) = %d", + args.train_batch_size * paddle.distributed.get_world_size(), ) + logger.info(" Total optimization steps = %d", t_total) + + global_step = 0 + tr_loss = 0.0 + set_seed(args) + best_metrics = None + + for epoch_id in range(args.num_train_epochs): + for step, batch in enumerate(train_dataloader): + model.train() + outputs = model(**batch) + # model outputs are always tuple in ppnlp (see doc) + loss = outputs[0] + loss = loss.mean() + logger.info( + "[epoch {}/{}][iter: {}/{}] lr: {:.5f}, train loss: {:.5f}, ". + format(epoch_id, args.num_train_epochs, step, + len(train_dataloader), + lr_scheduler.get_lr(), loss.numpy()[0])) + + loss.backward() + tr_loss += loss.item() + optimizer.step() + lr_scheduler.step() # Update learning rate schedule + optimizer.clear_grad() + global_step += 1 + + if (paddle.distributed.get_rank() == 0 and args.eval_steps > 0 and + global_step % args.eval_steps == 0): + # Log metrics + # Only evaluate when single GPU otherwise metrics may not average well + if paddle.distributed.get_rank( + ) == 0 and args.evaluate_during_training: + results, _ = evaluate( + args, + model, + tokenizer, + label2id_map, + id2label_map, + pad_token_label_id, ) + + if best_metrics is None or results["f1"] >= best_metrics[ + "f1"]: + best_metrics = copy.deepcopy(results) + output_dir = os.path.join(args.output_dir, "best_model") + os.makedirs(output_dir, exist_ok=True) + if paddle.distributed.get_rank() == 0: + model.save_pretrained(output_dir) + tokenizer.save_pretrained(output_dir) + paddle.save( + args, + os.path.join(output_dir, "training_args.bin")) + logger.info("Saving model checkpoint to %s", + output_dir) + + logger.info("[epoch {}/{}][iter: {}/{}] results: {}".format( + epoch_id, args.num_train_epochs, step, + len(train_dataloader), results)) + if best_metrics is not None: + logger.info("best metrics: {}".format(best_metrics)) + + if paddle.distributed.get_rank( + ) == 0 and args.save_steps > 0 and global_step % args.save_steps == 0: + # Save model checkpoint + output_dir = os.path.join(args.output_dir, + "checkpoint-{}".format(global_step)) + os.makedirs(output_dir, exist_ok=True) + if paddle.distributed.get_rank() == 0: + model.save_pretrained(output_dir) + tokenizer.save_pretrained(output_dir) + paddle.save(args, + os.path.join(output_dir, "training_args.bin")) + logger.info("Saving model checkpoint to %s", output_dir) + + return global_step, tr_loss / global_step + + +def evaluate(args, + model, + tokenizer, + label2id_map, + id2label_map, + pad_token_label_id, + prefix=""): + eval_dataset = XFUNDataset( + tokenizer, + data_dir=args.eval_data_dir, + label_path=args.eval_label_path, + label2id_map=label2id_map, + img_size=(224, 224), + pad_token_label_id=pad_token_label_id, + contains_re=False, + add_special_ids=False, + return_attention_mask=True, + load_mode='all') + + args.eval_batch_size = args.per_gpu_eval_batch_size * max( + 1, paddle.distributed.get_world_size()) + + eval_dataloader = paddle.io.DataLoader( + eval_dataset, + batch_size=args.eval_batch_size, + num_workers=0, + use_shared_memory=True, + collate_fn=None, ) + + # Eval! + logger.info("***** Running evaluation %s *****", prefix) + logger.info(" Num examples = %d", len(eval_dataset)) + logger.info(" Batch size = %d", args.eval_batch_size) + eval_loss = 0.0 + nb_eval_steps = 0 + preds = None + out_label_ids = None + model.eval() + for idx, batch in enumerate(eval_dataloader): + with paddle.no_grad(): + outputs = model(**batch) + tmp_eval_loss, logits = outputs[:2] + + tmp_eval_loss = tmp_eval_loss.mean() + + if paddle.distributed.get_rank() == 0: + logger.info("[Eval]process: {}/{}, loss: {:.5f}".format( + idx, len(eval_dataloader), tmp_eval_loss.numpy()[0])) + + eval_loss += tmp_eval_loss.item() + nb_eval_steps += 1 + if preds is None: + preds = logits.numpy() + out_label_ids = batch["labels"].numpy() + else: + preds = np.append(preds, logits.numpy(), axis=0) + out_label_ids = np.append( + out_label_ids, batch["labels"].numpy(), axis=0) + + eval_loss = eval_loss / nb_eval_steps + preds = np.argmax(preds, axis=2) + + # label_map = {i: label.upper() for i, label in enumerate(labels)} + + out_label_list = [[] for _ in range(out_label_ids.shape[0])] + preds_list = [[] for _ in range(out_label_ids.shape[0])] + + for i in range(out_label_ids.shape[0]): + for j in range(out_label_ids.shape[1]): + if out_label_ids[i, j] != pad_token_label_id: + out_label_list[i].append(id2label_map[out_label_ids[i][j]]) + preds_list[i].append(id2label_map[preds[i][j]]) + + results = { + "loss": eval_loss, + "precision": precision_score(out_label_list, preds_list), + "recall": recall_score(out_label_list, preds_list), + "f1": f1_score(out_label_list, preds_list), + } + + with open(os.path.join(args.output_dir, "test_gt.txt"), "w") as fout: + for lbl in out_label_list: + for l in lbl: + fout.write(l + "\t") + fout.write("\n") + with open(os.path.join(args.output_dir, "test_pred.txt"), "w") as fout: + for lbl in preds_list: + for l in lbl: + fout.write(l + "\t") + fout.write("\n") + + report = classification_report(out_label_list, preds_list) + logger.info("\n" + report) + + logger.info("***** Eval results %s *****", prefix) + for key in sorted(results.keys()): + logger.info(" %s = %s", key, str(results[key])) + + return results, preds_list + + +def print_arguments(args): + """print arguments""" + print('----------- Configuration Arguments -----------') + for arg, value in sorted(vars(args).items()): + print('%s: %s' % (arg, value)) + print('------------------------------------------------') + + +if __name__ == "__main__": + args = parse_args() + print_arguments(args) + train(args) diff --git a/ppstructure/vqa/utils.py b/ppstructure/vqa/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..a4ac1e77d37d0a662294480a393c2f67e7f4cc64 --- /dev/null +++ b/ppstructure/vqa/utils.py @@ -0,0 +1,328 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import argparse +import cv2 +import random +import numpy as np +import imghdr +from copy import deepcopy + +import paddle + +from PIL import Image, ImageDraw, ImageFont + +from paddleocr import PaddleOCR + + +def get_bio_label_maps(label_map_path): + with open(label_map_path, "r") as fin: + lines = fin.readlines() + lines = [line.strip() for line in lines] + if "O" not in lines: + lines.insert(0, "O") + labels = [] + for line in lines: + if line == "O": + labels.append("O") + else: + labels.append("B-" + line) + labels.append("I-" + line) + label2id_map = {label: idx for idx, label in enumerate(labels)} + id2label_map = {idx: label for idx, label in enumerate(labels)} + return label2id_map, id2label_map + + +def get_image_file_list(img_file): + imgs_lists = [] + if img_file is None or not os.path.exists(img_file): + raise Exception("not found any img file in {}".format(img_file)) + + img_end = {'jpg', 'bmp', 'png', 'jpeg', 'rgb', 'tif', 'tiff', 'gif', 'GIF'} + if os.path.isfile(img_file) and imghdr.what(img_file) in img_end: + imgs_lists.append(img_file) + elif os.path.isdir(img_file): + for single_file in os.listdir(img_file): + file_path = os.path.join(img_file, single_file) + if os.path.isfile(file_path) and imghdr.what(file_path) in img_end: + imgs_lists.append(file_path) + if len(imgs_lists) == 0: + raise Exception("not found any img file in {}".format(img_file)) + imgs_lists = sorted(imgs_lists) + return imgs_lists + + +def draw_ser_results(image, + ocr_results, + font_path="../doc/fonts/simfang.ttf", + font_size=18): + np.random.seed(0) + color = (np.random.permutation(range(255)), + np.random.permutation(range(255)), + np.random.permutation(range(255))) + color_map = { + idx: (color[0][idx], color[1][idx], color[2][idx]) + for idx in range(1, 255) + } + if isinstance(image, np.ndarray): + image = Image.fromarray(image) + img_new = image.copy() + draw = ImageDraw.Draw(img_new) + + font = ImageFont.truetype(font_path, font_size, encoding="utf-8") + + for ocr_info in ocr_results: + if ocr_info["pred_id"] not in color_map: + continue + color = color_map[ocr_info["pred_id"]] + + # draw ocr results outline + bbox = ocr_info["bbox"] + bbox = ((bbox[0], bbox[1]), (bbox[2], bbox[3])) + draw.rectangle(bbox, fill=color) + + # draw ocr results + text = "{}: {}".format(ocr_info["pred"], ocr_info["text"]) + start_y = max(0, bbox[0][1] - font_size) + tw = font.getsize(text)[0] + draw.rectangle( + [(bbox[0][0] + 1, start_y), (bbox[0][0] + tw + 1, + start_y + font_size)], + fill=(0, 0, 255)) + draw.text( + (bbox[0][0] + 1, start_y), text, fill=(255, 255, 255), font=font) + + img_new = Image.blend(image, img_new, 0.5) + return np.array(img_new) + + +def build_ocr_engine(rec_model_dir, det_model_dir): + ocr_engine = PaddleOCR( + rec_model_dir=rec_model_dir, + det_model_dir=det_model_dir, + use_angle_cls=False) + return ocr_engine + + +# pad sentences +def pad_sentences(tokenizer, + encoded_inputs, + max_seq_len=512, + pad_to_max_seq_len=True, + return_attention_mask=True, + return_token_type_ids=True, + return_overflowing_tokens=False, + return_special_tokens_mask=False): + # Padding with larger size, reshape is carried out + max_seq_len = ( + len(encoded_inputs["input_ids"]) // max_seq_len + 1) * max_seq_len + + needs_to_be_padded = pad_to_max_seq_len and \ + max_seq_len and len(encoded_inputs["input_ids"]) < max_seq_len + + if needs_to_be_padded: + difference = max_seq_len - len(encoded_inputs["input_ids"]) + if tokenizer.padding_side == 'right': + if return_attention_mask: + encoded_inputs["attention_mask"] = [1] * len(encoded_inputs[ + "input_ids"]) + [0] * difference + if return_token_type_ids: + encoded_inputs["token_type_ids"] = ( + encoded_inputs["token_type_ids"] + + [tokenizer.pad_token_type_id] * difference) + if return_special_tokens_mask: + encoded_inputs["special_tokens_mask"] = encoded_inputs[ + "special_tokens_mask"] + [1] * difference + encoded_inputs["input_ids"] = encoded_inputs[ + "input_ids"] + [tokenizer.pad_token_id] * difference + encoded_inputs["bbox"] = encoded_inputs["bbox"] + [[0, 0, 0, 0] + ] * difference + else: + if return_attention_mask: + encoded_inputs["attention_mask"] = [1] * len(encoded_inputs[ + "input_ids"]) + + return encoded_inputs + + +def split_page(encoded_inputs, max_seq_len=512): + """ + truncate is often used in training process + """ + for key in encoded_inputs: + encoded_inputs[key] = paddle.to_tensor(encoded_inputs[key]) + if encoded_inputs[key].ndim <= 1: # for input_ids, att_mask and so on + encoded_inputs[key] = encoded_inputs[key].reshape([-1, max_seq_len]) + else: # for bbox + encoded_inputs[key] = encoded_inputs[key].reshape( + [-1, max_seq_len, 4]) + return encoded_inputs + + +def preprocess( + tokenizer, + ori_img, + ocr_info, + img_size=(224, 224), + pad_token_label_id=-100, + max_seq_len=512, + add_special_ids=False, + return_attention_mask=True, ): + ocr_info = deepcopy(ocr_info) + height = ori_img.shape[0] + width = ori_img.shape[1] + + img = cv2.resize(ori_img, + (224, 224)).transpose([2, 0, 1]).astype(np.float32) + + segment_offset_id = [] + words_list = [] + bbox_list = [] + input_ids_list = [] + token_type_ids_list = [] + + for info in ocr_info: + # x1, y1, x2, y2 + bbox = info["bbox"] + bbox[0] = int(bbox[0] * 1000.0 / width) + bbox[2] = int(bbox[2] * 1000.0 / width) + bbox[1] = int(bbox[1] * 1000.0 / height) + bbox[3] = int(bbox[3] * 1000.0 / height) + + text = info["text"] + encode_res = tokenizer.encode( + text, pad_to_max_seq_len=False, return_attention_mask=True) + + if not add_special_ids: + # TODO: use tok.all_special_ids to remove + encode_res["input_ids"] = encode_res["input_ids"][1:-1] + encode_res["token_type_ids"] = encode_res["token_type_ids"][1:-1] + encode_res["attention_mask"] = encode_res["attention_mask"][1:-1] + + input_ids_list.extend(encode_res["input_ids"]) + token_type_ids_list.extend(encode_res["token_type_ids"]) + bbox_list.extend([bbox] * len(encode_res["input_ids"])) + words_list.append(text) + segment_offset_id.append(len(input_ids_list)) + + encoded_inputs = { + "input_ids": input_ids_list, + "token_type_ids": token_type_ids_list, + "bbox": bbox_list, + "attention_mask": [1] * len(input_ids_list), + } + + encoded_inputs = pad_sentences( + tokenizer, + encoded_inputs, + max_seq_len=max_seq_len, + return_attention_mask=return_attention_mask) + + encoded_inputs = split_page(encoded_inputs) + + fake_bs = encoded_inputs["input_ids"].shape[0] + + encoded_inputs["image"] = paddle.to_tensor(img).unsqueeze(0).expand( + [fake_bs] + list(img.shape)) + + encoded_inputs["segment_offset_id"] = segment_offset_id + + return encoded_inputs + + +def postprocess(attention_mask, preds, id2label_map): + if isinstance(preds, paddle.Tensor): + preds = preds.numpy() + preds = np.argmax(preds, axis=2) + + preds_list = [[] for _ in range(preds.shape[0])] + + # keep batch info + for i in range(preds.shape[0]): + for j in range(preds.shape[1]): + if attention_mask[i][j] == 1: + preds_list[i].append(id2label_map[preds[i][j]]) + + return preds_list + + +def merge_preds_list_with_ocr_info(ocr_info, segment_offset_id, preds_list, + label2id_map_for_draw): + # must ensure the preds_list is generated from the same image + preds = [p for pred in preds_list for p in pred] + + id2label_map = dict() + for key in label2id_map_for_draw: + val = label2id_map_for_draw[key] + if key == "O": + id2label_map[val] = key + if key.startswith("B-") or key.startswith("I-"): + id2label_map[val] = key[2:] + else: + id2label_map[val] = key + + for idx in range(len(segment_offset_id)): + if idx == 0: + start_id = 0 + else: + start_id = segment_offset_id[idx - 1] + + end_id = segment_offset_id[idx] + + curr_pred = preds[start_id:end_id] + curr_pred = [label2id_map_for_draw[p] for p in curr_pred] + + if len(curr_pred) <= 0: + pred_id = 0 + else: + counts = np.bincount(curr_pred) + pred_id = np.argmax(counts) + ocr_info[idx]["pred_id"] = int(pred_id) + ocr_info[idx]["pred"] = id2label_map[int(pred_id)] + return ocr_info + + +def parse_args(): + parser = argparse.ArgumentParser() + # Required parameters + # yapf: disable + parser.add_argument("--model_name_or_path", default=None, type=str, required=True,) + parser.add_argument("--train_data_dir", default=None, type=str, required=False,) + parser.add_argument("--train_label_path", default=None, type=str, required=False,) + parser.add_argument("--eval_data_dir", default=None, type=str, required=False,) + parser.add_argument("--eval_label_path", default=None, type=str, required=False,) + parser.add_argument("--output_dir", default=None, type=str, required=True,) + parser.add_argument("--max_seq_length", default=512, type=int,) + parser.add_argument("--evaluate_during_training", action="store_true",) + parser.add_argument("--per_gpu_train_batch_size", default=8, type=int, help="Batch size per GPU/CPU for training.",) + parser.add_argument("--per_gpu_eval_batch_size", default=8, type=int, help="Batch size per GPU/CPU for eval.",) + parser.add_argument("--learning_rate", default=5e-5, type=float, help="The initial learning rate for Adam.",) + parser.add_argument("--weight_decay", default=0.0, type=float, help="Weight decay if we apply some.",) + parser.add_argument("--adam_epsilon", default=1e-8, type=float, help="Epsilon for Adam optimizer.",) + parser.add_argument("--max_grad_norm", default=1.0, type=float, help="Max gradient norm.",) + parser.add_argument("--num_train_epochs", default=3, type=int, help="Total number of training epochs to perform.",) + parser.add_argument("--warmup_steps", default=0, type=int, help="Linear warmup over warmup_steps.",) + parser.add_argument("--eval_steps", type=int, default=10, help="eval every X updates steps.",) + parser.add_argument("--save_steps", type=int, default=50, help="Save checkpoint every X updates steps.",) + parser.add_argument("--seed", type=int, default=2048, help="random seed for initialization",) + + parser.add_argument("--ocr_rec_model_dir", default=None, type=str, ) + parser.add_argument("--ocr_det_model_dir", default=None, type=str, ) + parser.add_argument("--label_map_path", default="./labels/labels_ser.txt", type=str, required=False, ) + parser.add_argument("--infer_imgs", default=None, type=str, required=False) + parser.add_argument("--ocr_json_path", default=None, type=str, required=False, help="ocr prediction results") + # yapf: enable + args = parser.parse_args() + return args diff --git a/ppstructure/vqa/xfun.py b/ppstructure/vqa/xfun.py new file mode 100644 index 0000000000000000000000000000000000000000..d62cdb5da5514280b62687d80d345ede9484ee90 --- /dev/null +++ b/ppstructure/vqa/xfun.py @@ -0,0 +1,442 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +import os +import cv2 +import numpy as np +import paddle +import copy +from paddle.io import Dataset + +__all__ = ["XFUNDataset"] + + +class XFUNDataset(Dataset): + """ + Example: + print("=====begin to build dataset=====") + from paddlenlp.transformers import LayoutXLMTokenizer + tokenizer = LayoutXLMTokenizer.from_pretrained("/paddle/models/transformers/layoutxlm-base-paddle/") + tok_res = tokenizer.tokenize("Maribyrnong") + # res = tokenizer.convert_ids_to_tokens(val_data["input_ids"][0]) + dataset = XfunDatasetForSer( + tokenizer, + data_dir="./zh.val/", + label_path="zh.val/xfun_normalize_val.json", + img_size=(224,224)) + print(len(dataset)) + + data = dataset[0] + print(data.keys()) + print("input_ids: ", data["input_ids"]) + print("labels: ", data["labels"]) + print("token_type_ids: ", data["token_type_ids"]) + print("words_list: ", data["words_list"]) + print("image shape: ", data["image"].shape) + """ + + def __init__(self, + tokenizer, + data_dir, + label_path, + contains_re=False, + label2id_map=None, + img_size=(224, 224), + pad_token_label_id=None, + add_special_ids=False, + return_attention_mask=True, + load_mode='all', + max_seq_len=512): + super().__init__() + self.tokenizer = tokenizer + self.data_dir = data_dir + self.label_path = label_path + self.contains_re = contains_re + self.label2id_map = label2id_map + self.img_size = img_size + self.pad_token_label_id = pad_token_label_id + self.add_special_ids = add_special_ids + self.return_attention_mask = return_attention_mask + self.load_mode = load_mode + self.max_seq_len = max_seq_len + + if self.pad_token_label_id is None: + self.pad_token_label_id = paddle.nn.CrossEntropyLoss().ignore_index + + self.all_lines = self.read_all_lines() + + self.entities_labels = {'HEADER': 0, 'QUESTION': 1, 'ANSWER': 2} + self.return_keys = { + 'bbox': 'np', + 'input_ids': 'np', + 'labels': 'np', + 'attention_mask': 'np', + 'image': 'np', + 'token_type_ids': 'np', + 'entities': 'dict', + 'relations': 'dict', + } + + if load_mode == "all": + self.encoded_inputs_all = self._parse_label_file_all() + + def pad_sentences(self, + encoded_inputs, + max_seq_len=512, + pad_to_max_seq_len=True, + return_attention_mask=True, + return_token_type_ids=True, + truncation_strategy="longest_first", + return_overflowing_tokens=False, + return_special_tokens_mask=False): + # Padding + needs_to_be_padded = pad_to_max_seq_len and \ + max_seq_len and len(encoded_inputs["input_ids"]) < max_seq_len + + if needs_to_be_padded: + difference = max_seq_len - len(encoded_inputs["input_ids"]) + if self.tokenizer.padding_side == 'right': + if return_attention_mask: + encoded_inputs["attention_mask"] = [1] * len(encoded_inputs[ + "input_ids"]) + [0] * difference + if return_token_type_ids: + encoded_inputs["token_type_ids"] = ( + encoded_inputs["token_type_ids"] + + [self.tokenizer.pad_token_type_id] * difference) + if return_special_tokens_mask: + encoded_inputs["special_tokens_mask"] = encoded_inputs[ + "special_tokens_mask"] + [1] * difference + encoded_inputs["input_ids"] = encoded_inputs[ + "input_ids"] + [self.tokenizer.pad_token_id] * difference + encoded_inputs["labels"] = encoded_inputs[ + "labels"] + [self.pad_token_label_id] * difference + encoded_inputs["bbox"] = encoded_inputs[ + "bbox"] + [[0, 0, 0, 0]] * difference + elif self.tokenizer.padding_side == 'left': + if return_attention_mask: + encoded_inputs["attention_mask"] = [0] * difference + [ + 1 + ] * len(encoded_inputs["input_ids"]) + if return_token_type_ids: + encoded_inputs["token_type_ids"] = ( + [self.tokenizer.pad_token_type_id] * difference + + encoded_inputs["token_type_ids"]) + if return_special_tokens_mask: + encoded_inputs["special_tokens_mask"] = [ + 1 + ] * difference + encoded_inputs["special_tokens_mask"] + encoded_inputs["input_ids"] = [ + self.tokenizer.pad_token_id + ] * difference + encoded_inputs["input_ids"] + encoded_inputs["labels"] = [ + self.pad_token_label_id + ] * difference + encoded_inputs["labels"] + encoded_inputs["bbox"] = [ + [0, 0, 0, 0] + ] * difference + encoded_inputs["bbox"] + else: + if return_attention_mask: + encoded_inputs["attention_mask"] = [1] * len(encoded_inputs[ + "input_ids"]) + + return encoded_inputs + + def truncate_inputs(self, encoded_inputs, max_seq_len=512): + for key in encoded_inputs: + if key == "sample_id": + continue + length = min(len(encoded_inputs[key]), max_seq_len) + encoded_inputs[key] = encoded_inputs[key][:length] + return encoded_inputs + + def read_all_lines(self, ): + with open(self.label_path, "r") as fin: + lines = fin.readlines() + return lines + + def _parse_label_file_all(self): + """ + parse all samples + """ + encoded_inputs_all = [] + for line in self.all_lines: + encoded_inputs_all.extend(self._parse_label_file(line)) + return encoded_inputs_all + + def _parse_label_file(self, line): + """ + parse single sample + """ + + image_name, info_str = line.split("\t") + image_path = os.path.join(self.data_dir, image_name) + + def add_imgge_path(x): + x['image_path'] = image_path + return x + + encoded_inputs = self._read_encoded_inputs_sample(info_str) + if self.contains_re: + encoded_inputs = self._chunk_re(encoded_inputs) + else: + encoded_inputs = self._chunk_ser(encoded_inputs) + encoded_inputs = list(map(add_imgge_path, encoded_inputs)) + return encoded_inputs + + def _read_encoded_inputs_sample(self, info_str): + """ + parse label info + """ + # read text info + info_dict = json.loads(info_str) + height = info_dict["height"] + width = info_dict["width"] + + words_list = [] + bbox_list = [] + input_ids_list = [] + token_type_ids_list = [] + gt_label_list = [] + + if self.contains_re: + # for re + entities = [] + relations = [] + id2label = {} + entity_id_to_index_map = {} + empty_entity = set() + for info in info_dict["ocr_info"]: + if self.contains_re: + # for re + if len(info["text"]) == 0: + empty_entity.add(info["id"]) + continue + id2label[info["id"]] = info["label"] + relations.extend([tuple(sorted(l)) for l in info["linking"]]) + + # x1, y1, x2, y2 + bbox = info["bbox"] + label = info["label"] + bbox[0] = int(bbox[0] * 1000.0 / width) + bbox[2] = int(bbox[2] * 1000.0 / width) + bbox[1] = int(bbox[1] * 1000.0 / height) + bbox[3] = int(bbox[3] * 1000.0 / height) + + text = info["text"] + encode_res = self.tokenizer.encode( + text, pad_to_max_seq_len=False, return_attention_mask=True) + + gt_label = [] + if not self.add_special_ids: + # TODO: use tok.all_special_ids to remove + encode_res["input_ids"] = encode_res["input_ids"][1:-1] + encode_res["token_type_ids"] = encode_res["token_type_ids"][1: + -1] + encode_res["attention_mask"] = encode_res["attention_mask"][1: + -1] + if label.lower() == "other": + gt_label.extend([0] * len(encode_res["input_ids"])) + else: + gt_label.append(self.label2id_map[("b-" + label).upper()]) + gt_label.extend([self.label2id_map[("i-" + label).upper()]] * + (len(encode_res["input_ids"]) - 1)) + if self.contains_re: + if gt_label[0] != self.label2id_map["O"]: + entity_id_to_index_map[info["id"]] = len(entities) + entities.append({ + "start": len(input_ids_list), + "end": + len(input_ids_list) + len(encode_res["input_ids"]), + "label": label.upper(), + }) + input_ids_list.extend(encode_res["input_ids"]) + token_type_ids_list.extend(encode_res["token_type_ids"]) + bbox_list.extend([bbox] * len(encode_res["input_ids"])) + gt_label_list.extend(gt_label) + words_list.append(text) + + encoded_inputs = { + "input_ids": input_ids_list, + "labels": gt_label_list, + "token_type_ids": token_type_ids_list, + "bbox": bbox_list, + "attention_mask": [1] * len(input_ids_list), + # "words_list": words_list, + } + encoded_inputs = self.pad_sentences( + encoded_inputs, + max_seq_len=self.max_seq_len, + return_attention_mask=self.return_attention_mask) + encoded_inputs = self.truncate_inputs(encoded_inputs) + + if self.contains_re: + relations = self._relations(entities, relations, id2label, + empty_entity, entity_id_to_index_map) + encoded_inputs['relations'] = relations + encoded_inputs['entities'] = entities + return encoded_inputs + + def _chunk_ser(self, encoded_inputs): + encoded_inputs_all = [] + seq_len = len(encoded_inputs['input_ids']) + chunk_size = 512 + for chunk_id, index in enumerate(range(0, seq_len, chunk_size)): + chunk_beg = index + chunk_end = min(index + chunk_size, seq_len) + encoded_inputs_example = {} + for key in encoded_inputs: + encoded_inputs_example[key] = encoded_inputs[key][chunk_beg: + chunk_end] + + encoded_inputs_all.append(encoded_inputs_example) + return encoded_inputs_all + + def _chunk_re(self, encoded_inputs): + # prepare data + entities = encoded_inputs.pop('entities') + relations = encoded_inputs.pop('relations') + encoded_inputs_all = [] + chunk_size = 512 + for chunk_id, index in enumerate( + range(0, len(encoded_inputs["input_ids"]), chunk_size)): + item = {} + for k in encoded_inputs: + item[k] = encoded_inputs[k][index:index + chunk_size] + + # select entity in current chunk + entities_in_this_span = [] + global_to_local_map = {} # + for entity_id, entity in enumerate(entities): + if (index <= entity["start"] < index + chunk_size and + index <= entity["end"] < index + chunk_size): + entity["start"] = entity["start"] - index + entity["end"] = entity["end"] - index + global_to_local_map[entity_id] = len(entities_in_this_span) + entities_in_this_span.append(entity) + + # select relations in current chunk + relations_in_this_span = [] + for relation in relations: + if (index <= relation["start_index"] < index + chunk_size and + index <= relation["end_index"] < index + chunk_size): + relations_in_this_span.append({ + "head": global_to_local_map[relation["head"]], + "tail": global_to_local_map[relation["tail"]], + "start_index": relation["start_index"] - index, + "end_index": relation["end_index"] - index, + }) + item.update({ + "entities": reformat(entities_in_this_span), + "relations": reformat(relations_in_this_span), + }) + item['entities']['label'] = [ + self.entities_labels[x] for x in item['entities']['label'] + ] + encoded_inputs_all.append(item) + return encoded_inputs_all + + def _relations(self, entities, relations, id2label, empty_entity, + entity_id_to_index_map): + """ + build relations + """ + relations = list(set(relations)) + relations = [ + rel for rel in relations + if rel[0] not in empty_entity and rel[1] not in empty_entity + ] + kv_relations = [] + for rel in relations: + pair = [id2label[rel[0]], id2label[rel[1]]] + if pair == ["question", "answer"]: + kv_relations.append({ + "head": entity_id_to_index_map[rel[0]], + "tail": entity_id_to_index_map[rel[1]] + }) + elif pair == ["answer", "question"]: + kv_relations.append({ + "head": entity_id_to_index_map[rel[1]], + "tail": entity_id_to_index_map[rel[0]] + }) + else: + continue + relations = sorted( + [{ + "head": rel["head"], + "tail": rel["tail"], + "start_index": get_relation_span(rel, entities)[0], + "end_index": get_relation_span(rel, entities)[1], + } for rel in kv_relations], + key=lambda x: x["head"], ) + return relations + + def load_img(self, image_path): + # read img + img = cv2.imread(image_path) + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + resize_h, resize_w = self.img_size + im_shape = img.shape[0:2] + im_scale_y = resize_h / im_shape[0] + im_scale_x = resize_w / im_shape[1] + img_new = cv2.resize( + img, None, None, fx=im_scale_x, fy=im_scale_y, interpolation=2) + mean = np.array([0.485, 0.456, 0.406])[np.newaxis, np.newaxis, :] + std = np.array([0.229, 0.224, 0.225])[np.newaxis, np.newaxis, :] + img_new = img_new / 255.0 + img_new -= mean + img_new /= std + img = img_new.transpose((2, 0, 1)) + return img + + def __getitem__(self, idx): + if self.load_mode == "all": + data = copy.deepcopy(self.encoded_inputs_all[idx]) + else: + data = self._parse_label_file(self.all_lines[idx])[0] + + image_path = data.pop('image_path') + data["image"] = self.load_img(image_path) + + return_data = {} + for k, v in data.items(): + if k in self.return_keys: + if self.return_keys[k] == 'np': + v = np.array(v) + return_data[k] = v + return return_data + + def __len__(self, ): + if self.load_mode == "all": + return len(self.encoded_inputs_all) + else: + return len(self.all_lines) + + +def get_relation_span(rel, entities): + bound = [] + for entity_index in [rel["head"], rel["tail"]]: + bound.append(entities[entity_index]["start"]) + bound.append(entities[entity_index]["end"]) + return min(bound), max(bound) + + +def reformat(data): + new_data = {} + for item in data: + for k, v in item.items(): + if k not in new_data: + new_data[k] = [] + new_data[k].append(v) + return new_data diff --git a/test_tipc/common_func.sh b/test_tipc/common_func.sh index 3f0fa66b77ff50b23b1e83dea506580f549f8ecf..85dfe217253bb1d4c8b92f17d26f138121a2a198 100644 --- a/test_tipc/common_func.sh +++ b/test_tipc/common_func.sh @@ -30,6 +30,7 @@ function func_set_params(){ function func_parser_params(){ strs=$1 + MODE=$2 IFS=":" array=(${strs}) key=${array[0]} diff --git a/test_tipc/configs/ch_PP-OCRv2/model_linux_gpu_normal_normal_infer_python_linux_gpu_cpu.txt b/test_tipc/configs/ch_PP-OCRv2/model_linux_gpu_normal_normal_infer_python_linux_gpu_cpu.txt new file mode 100644 index 0000000000000000000000000000000000000000..fcac6e3984cf3fd45fec9f7b736f794289278b25 --- /dev/null +++ b/test_tipc/configs/ch_PP-OCRv2/model_linux_gpu_normal_normal_infer_python_linux_gpu_cpu.txt @@ -0,0 +1,19 @@ +===========================ch_PP-OCRv2=========================== +model_name:ch_PP-OCRv2 +python:python3.7 +infer_model:./inference/ch_PP-OCRv2_det_infer/ +infer_export:null +infer_quant:False +inference:tools/infer/predict_system.py +--use_gpu:False|True +--enable_mkldnn:False|True +--cpu_threads:1|6 +--rec_batch_num:1 +--use_tensorrt:False|True +--precision:fp32 +--det_model_dir: +--image_dir:./inference/ch_det_data_50/all-sum-510/ +--rec_model_dir:./inference/ch_PP-OCRv2_rec_infer/ +--benchmark:True +null:null +null:null diff --git a/test_tipc/configs/ch_PP-OCRv2/model_linux_gpu_normal_normal_lite_cpp_arm_cpu.txt b/test_tipc/configs/ch_PP-OCRv2/model_linux_gpu_normal_normal_lite_cpp_arm_cpu.txt new file mode 100644 index 0000000000000000000000000000000000000000..d03ee1e8b0493ab0f3ca2b8c4bb55b6b0cfecdcc --- /dev/null +++ b/test_tipc/configs/ch_PP-OCRv2/model_linux_gpu_normal_normal_lite_cpp_arm_cpu.txt @@ -0,0 +1,13 @@ +===========================lite_params=========================== +inference:./ocr_db_crnn system +runtime_device:ARM_CPU +det_infer_model:ch_PP-OCRv2_det_infer|ch_PP-OCRv2_det_slim_quant_infer +rec_infer_model:ch_PP-OCRv2_rec_infer|ch_PP-OCRv2_rec_slim_quant_infer +cls_infer_model:ch_ppocr_mobile_v2.0_cls_infer|ch_ppocr_mobile_v2.0_cls_slim_infer +--cpu_threads:1|4 +--det_batch_size:1 +--rec_batch_size:1 +--image_dir:./test_data/icdar2015_lite/text_localization/ch4_test_images/ +--config_dir:./config.txt +--rec_dict_dir:./ppocr_keys_v1.txt +--benchmark:True diff --git a/test_tipc/configs/ch_PP-OCRv2/model_linux_gpu_normal_normal_lite_cpp_arm_gpu_opencl.txt b/test_tipc/configs/ch_PP-OCRv2/model_linux_gpu_normal_normal_lite_cpp_arm_gpu_opencl.txt new file mode 100644 index 0000000000000000000000000000000000000000..a9fcafbf17878ec7ea42691359aa6ae36ccd522e --- /dev/null +++ b/test_tipc/configs/ch_PP-OCRv2/model_linux_gpu_normal_normal_lite_cpp_arm_gpu_opencl.txt @@ -0,0 +1,13 @@ +===========================lite_params=========================== +inference:./ocr_db_crnn system +runtime_device:ARM_GPU_OPENCL +det_infer_model:ch_PP-OCRv2_det_infer|ch_PP-OCRv2_det_slim_quant_infer +rec_infer_model:ch_PP-OCRv2_rec_infer|ch_PP-OCRv2_rec_slim_quant_infer +cls_infer_model:ch_ppocr_mobile_v2.0_cls_infer|ch_ppocr_mobile_v2.0_cls_slim_infer +--cpu_threads:1|4 +--det_batch_size:1 +--rec_batch_size:1 +--image_dir:./test_data/icdar2015_lite/text_localization/ch4_test_images/ +--config_dir:./config.txt +--rec_dict_dir:./ppocr_keys_v1.txt +--benchmark:True diff --git a/test_tipc/configs/ch_PP-OCRv2_det/model_linux_gpu_normal_normal_lite_cpp_arm_cpu.txt b/test_tipc/configs/ch_PP-OCRv2_det/model_linux_gpu_normal_normal_lite_cpp_arm_cpu.txt new file mode 100644 index 0000000000000000000000000000000000000000..37ad2380e35014ad2dd8cc5467a04f958505947a --- /dev/null +++ b/test_tipc/configs/ch_PP-OCRv2_det/model_linux_gpu_normal_normal_lite_cpp_arm_cpu.txt @@ -0,0 +1,13 @@ +===========================lite_params=========================== +inference:./ocr_db_crnn det +runtime_device:ARM_CPU +det_infer_model:ch_PP-OCRv2_det_infer|ch_PP-OCRv2_det_slim_quant_infer +null:null +null:null +--cpu_threads:1|4 +--det_batch_size:1 +null:null +--image_dir:./test_data/icdar2015_lite/text_localization/ch4_test_images/ +--config_dir:./config.txt +null:null +--benchmark:True \ No newline at end of file diff --git a/test_tipc/configs/ch_PP-OCRv2_det/model_linux_gpu_normal_normal_lite_cpp_arm_gpu_opencl.txt b/test_tipc/configs/ch_PP-OCRv2_det/model_linux_gpu_normal_normal_lite_cpp_arm_gpu_opencl.txt new file mode 100644 index 0000000000000000000000000000000000000000..a0b5569a631d03230a1a318c95f16f488ebb8b7d --- /dev/null +++ b/test_tipc/configs/ch_PP-OCRv2_det/model_linux_gpu_normal_normal_lite_cpp_arm_gpu_opencl.txt @@ -0,0 +1,13 @@ +===========================lite_params=========================== +inference:./ocr_db_crnn det +runtime_device:ARM_GPU_OPENCL +det_infer_model:ch_PP-OCRv2_det_infer|ch_PP-OCRv2_det_slim_quant_infer +null:null +null:null +--cpu_threads:1|4 +--det_batch_size:1 +null:null +--image_dir:./test_data/icdar2015_lite/text_localization/ch4_test_images/ +--config_dir:./config.txt +null:null +--benchmark:True diff --git a/test_tipc/configs/ppocrv2_det_mobile/train_infer_python.txt b/test_tipc/configs/ch_PP-OCRv2_det/train_infer_python.txt similarity index 77% rename from test_tipc/configs/ppocrv2_det_mobile/train_infer_python.txt rename to test_tipc/configs/ch_PP-OCRv2_det/train_infer_python.txt index 423cb979f0d35a62324958b36dd9d115211c19d4..9520ede3acd33b0e12300ee2de1b715605c9a0eb 100644 --- a/test_tipc/configs/ppocrv2_det_mobile/train_infer_python.txt +++ b/test_tipc/configs/ch_PP-OCRv2_det/train_infer_python.txt @@ -1,20 +1,20 @@ ===========================train_params=========================== -model_name:PPOCRv2_ocr_det +model_name:ch_PPOCRv2_det python:python3.7 gpu_list:0|0,1 Global.use_gpu:True|True Global.auto_cast:fp32 -Global.epoch_num:lite_train_infer=1|whole_train_infer=500 +Global.epoch_num:lite_train_lite_infer=1|whole_train_whole_infer=500 Global.save_model_dir:./output/ -Train.loader.batch_size_per_card:lite_train_infer=2|whole_train_infer=4 +Train.loader.batch_size_per_card:lite_train_lite_infer=2|whole_train_whole_infer=4 Global.pretrained_model:null train_model_name:latest train_infer_img_dir:./train_data/icdar2015/text_localization/ch4_test_images/ null:null ## trainer:norm_train|pact_train -norm_train:tools/train.py -c configs/det/ch_PP-OCRv2/ch_PP-OCR_det_cml.yml -o -pact_train:deploy/slim/quantization/quant.py -c configs/det/ch_PP-OCRv2/ch_PP-OCR_det_cml.yml -o +norm_train:tools/train.py -c configs/det/ch_PP-OCRv2/ch_PP-OCRv2_det_cml.yml -o +pact_train:deploy/slim/quantization/quant.py -c configs/det/ch_PP-OCRv2/ch_PP-OCRv2_det_cml.yml -o fpgm_train:null distill_train:null null:null @@ -27,8 +27,8 @@ null:null ===========================infer_params=========================== Global.save_inference_dir:./output/ Global.pretrained_model: -norm_export:tools/export_model.py -c configs/det/ch_PP-OCRv2/ch_PP-OCR_det_cml.yml -o -quant_export:deploy/slim/quantization/export_model.py -c configs/det/ch_PP-OCRv2/ch_PP-OCR_det_cml.yml -o +norm_export:tools/export_model.py -c configs/det/ch_PP-OCRv2/ch_PP-OCRv2_det_cml.yml -o +quant_export:deploy/slim/quantization/export_model.py -c configs/det/ch_PP-OCRv2/ch_PP-OCRv2_det_cml.yml -o fpgm_export: distill_export:null export1:null diff --git a/test_tipc/configs/ch_PP-OCRv2_det_KL/model_linux_gpu_normal_normal_infer_python_linux_gpu_cpu.txt b/test_tipc/configs/ch_PP-OCRv2_det_KL/model_linux_gpu_normal_normal_infer_python_linux_gpu_cpu.txt new file mode 100644 index 0000000000000000000000000000000000000000..1aad65b687992155133ed11533a14f642510361d --- /dev/null +++ b/test_tipc/configs/ch_PP-OCRv2_det_KL/model_linux_gpu_normal_normal_infer_python_linux_gpu_cpu.txt @@ -0,0 +1,21 @@ +===========================kl_quant_params=========================== +model_name:PPOCRv2_ocr_det_kl +python:python3.7 +Global.pretrained_model:null +Global.save_inference_dir:null +infer_model:./inference/ch_PP-OCRv2_det_infer/ +infer_export:deploy/slim/quantization/quant_kl.py -c configs/det/ch_PP-OCRv2/ch_PP-OCRv2_det_cml.yml -o +infer_quant:True +inference:tools/infer/predict_det.py +--use_gpu:False|True +--enable_mkldnn:True +--cpu_threads:1|6 +--rec_batch_num:1 +--use_tensorrt:False|True +--precision:int8 +--det_model_dir: +--image_dir:./inference/ch_det_data_50/all-sum-510/ +null:null +--benchmark:True +null:null +null:null diff --git a/test_tipc/configs/ch_PP-OCRv2_det_PACT/train_infer_python.txt b/test_tipc/configs/ch_PP-OCRv2_det_PACT/train_infer_python.txt new file mode 100644 index 0000000000000000000000000000000000000000..b567c08185e084384c3883f1d602cec3f312ea53 --- /dev/null +++ b/test_tipc/configs/ch_PP-OCRv2_det_PACT/train_infer_python.txt @@ -0,0 +1,51 @@ +===========================train_params=========================== +model_name:PPOCRv2_ocr_det +python:python3.7 +gpu_list:0|0,1 +Global.use_gpu:True|True +Global.auto_cast:fp32 +Global.epoch_num:lite_train_lite_infer=1|whole_train_whole_infer=500 +Global.save_model_dir:./output/ +Train.loader.batch_size_per_card:lite_train_lite_infer=2|whole_train_whole_infer=4 +Global.pretrained_model:null +train_model_name:latest +train_infer_img_dir:./train_data/icdar2015/text_localization/ch4_test_images/ +null:null +## +trainer:pact_train +norm_train:null +pact_train:deploy/slim/quantization/quant.py -c configs/det/ch_PP-OCRv2/ch_PP-OCRv2_det_cml.yml -o +fpgm_train:null +distill_train:null +null:null +null:null +## +===========================eval_params=========================== +eval:null +null:null +## +===========================infer_params=========================== +Global.save_inference_dir:./output/ +Global.pretrained_model: +norm_export:null +quant_export:deploy/slim/quantization/export_model.py -c configs/det/ch_PP-OCRv2/ch_PP-OCRv2_det_cml.yml -o +fpgm_export: +distill_export:null +export1:null +export2:null +inference_dir:Student +infer_model:./inference/ch_PP-OCRv2_det_infer/ +infer_export:null +infer_quant:False +inference:tools/infer/predict_det.py +--use_gpu:True|False +--enable_mkldnn:True|False +--cpu_threads:1|6 +--rec_batch_num:1 +--use_tensorrt:False|True +--precision:fp32|fp16|int8 +--det_model_dir: +--image_dir:./inference/ch_det_data_50/all-sum-510/ +null:null +--benchmark:True +null:null diff --git a/test_tipc/configs/ch_PP-OCRv2_rec/ch_PP-OCRv2_rec_distillation.yml b/test_tipc/configs/ch_PP-OCRv2_rec/ch_PP-OCRv2_rec_distillation.yml new file mode 100644 index 0000000000000000000000000000000000000000..6c63af6ae8d62e098dec35d5918291291f654e32 --- /dev/null +++ b/test_tipc/configs/ch_PP-OCRv2_rec/ch_PP-OCRv2_rec_distillation.yml @@ -0,0 +1,159 @@ +Global: + debug: false + use_gpu: true + epoch_num: 800 + log_smooth_window: 20 + print_batch_step: 10 + save_model_dir: ./output/rec_pp-OCRv2_distillation + save_epoch_step: 3 + eval_batch_step: [0, 2000] + cal_metric_during_train: true + pretrained_model: + checkpoints: + save_inference_dir: + use_visualdl: false + infer_img: doc/imgs_words/ch/word_1.jpg + character_dict_path: ppocr/utils/ppocr_keys_v1.txt + max_text_length: 25 + infer_mode: false + use_space_char: true + distributed: true + save_res_path: ./output/rec/predicts_pp-OCRv2_distillation.txt + + +Optimizer: + name: Adam + beta1: 0.9 + beta2: 0.999 + lr: + name: Piecewise + decay_epochs : [700, 800] + values : [0.001, 0.0001] + warmup_epoch: 5 + regularizer: + name: L2 + factor: 2.0e-05 + +Architecture: + model_type: &model_type "rec" + name: DistillationModel + algorithm: Distillation + Models: + Teacher: + pretrained: + freeze_params: false + return_all_feats: true + model_type: *model_type + algorithm: CRNN + Transform: + Backbone: + name: MobileNetV1Enhance + scale: 0.5 + Neck: + name: SequenceEncoder + encoder_type: rnn + hidden_size: 64 + Head: + name: CTCHead + mid_channels: 96 + fc_decay: 0.00002 + Student: + pretrained: + freeze_params: false + return_all_feats: true + model_type: *model_type + algorithm: CRNN + Transform: + Backbone: + name: MobileNetV1Enhance + scale: 0.5 + Neck: + name: SequenceEncoder + encoder_type: rnn + hidden_size: 64 + Head: + name: CTCHead + mid_channels: 96 + fc_decay: 0.00002 + + +Loss: + name: CombinedLoss + loss_config_list: + - DistillationCTCLoss: + weight: 1.0 + model_name_list: ["Student", "Teacher"] + key: head_out + - DistillationDMLLoss: + weight: 1.0 + act: "softmax" + use_log: true + model_name_pairs: + - ["Student", "Teacher"] + key: head_out + - DistillationDistanceLoss: + weight: 1.0 + mode: "l2" + model_name_pairs: + - ["Student", "Teacher"] + key: backbone_out + +PostProcess: + name: DistillationCTCLabelDecode + model_name: ["Student", "Teacher"] + key: head_out + +Metric: + name: DistillationMetric + base_metric_name: RecMetric + main_indicator: acc + key: "Student" + +Train: + dataset: + name: SimpleDataSet + data_dir: ./train_data/ic15_data/ + label_file_list: + - ./train_data/ic15_data/rec_gt_train.txt + transforms: + - DecodeImage: + img_mode: BGR + channel_first: false + - RecAug: + - CTCLabelEncode: + - RecResizeImg: + image_shape: [3, 32, 320] + - KeepKeys: + keep_keys: + - image + - label + - length + loader: + shuffle: true + batch_size_per_card: 128 + drop_last: true + num_sections: 1 + num_workers: 8 +Eval: + dataset: + name: SimpleDataSet + data_dir: ./train_data/ic15_data + label_file_list: + - ./train_data/ic15_data/rec_gt_test.txt + transforms: + - DecodeImage: + img_mode: BGR + channel_first: false + - CTCLabelEncode: + - RecResizeImg: + image_shape: [3, 32, 320] + - KeepKeys: + keep_keys: + - image + - label + - length + loader: + shuffle: false + drop_last: false + batch_size_per_card: 128 + num_workers: 8 diff --git a/test_tipc/configs/ch_PP-OCRv2_rec/train_infer_python.txt b/test_tipc/configs/ch_PP-OCRv2_rec/train_infer_python.txt new file mode 100644 index 0000000000000000000000000000000000000000..b61dc8bbe36ac5b21ec5f3561d39997f992d6c58 --- /dev/null +++ b/test_tipc/configs/ch_PP-OCRv2_rec/train_infer_python.txt @@ -0,0 +1,53 @@ +===========================train_params=========================== +model_name:PPOCRv2_ocr_rec +python:python3.7 +gpu_list:0|0,1 +Global.use_gpu:True|True +Global.auto_cast:fp32 +Global.epoch_num:lite_train_lite_infer=3|whole_train_whole_infer=300 +Global.save_model_dir:./output/ +Train.loader.batch_size_per_card:lite_train_lite_infer=128|whole_train_whole_infer=128 +Global.pretrained_model:null +train_model_name:latest +train_infer_img_dir:./inference/rec_inference +null:null +## +trainer:norm_train +norm_train:tools/train.py -c test_tipc/configs/ch_PP-OCRv2_rec/ch_PP-OCRv2_rec_distillation.yml -o +pact_train:null +fpgm_train:null +distill_train:null +null:null +null:null +## +===========================eval_params=========================== +eval:null +null:null +## +===========================infer_params=========================== +Global.save_inference_dir:./output/ +Global.pretrained_model: +norm_export:tools/export_model.py -c test_tipc/configs/ch_PP-OCRv2_rec/ch_PP-OCRv2_rec_distillation.yml -o +quant_export: +fpgm_export: +distill_export:null +export1:null +export2:null +inference_dir:Student +infer_model:./inference/ch_PP-OCRv2_rec_infer/ +infer_export:null +infer_quant:False +inference:tools/infer/predict_rec.py +--use_gpu:True|False +--enable_mkldnn:True|False +--cpu_threads:1|6 +--rec_batch_num:1|6 +--use_tensorrt:False|True +--precision:fp32|fp16|int8 +--rec_model_dir: +--image_dir:/inference/rec_inference +null:null +--benchmark:True +null:null + + diff --git a/test_tipc/configs/ch_PP-OCRv2_rec_KL/model_linux_gpu_normal_normal_infer_python_linux_gpu_cpu.txt b/test_tipc/configs/ch_PP-OCRv2_rec_KL/model_linux_gpu_normal_normal_infer_python_linux_gpu_cpu.txt new file mode 100644 index 0000000000000000000000000000000000000000..083a3ae26e726e290ffde4095821cbf3c40f7178 --- /dev/null +++ b/test_tipc/configs/ch_PP-OCRv2_rec_KL/model_linux_gpu_normal_normal_infer_python_linux_gpu_cpu.txt @@ -0,0 +1,21 @@ +===========================kl_quant_params=========================== +model_name:PPOCRv2_ocr_rec_kl +python:python3.7 +Global.pretrained_model:null +Global.save_inference_dir:null +infer_model:./inference/ch_PP-OCRv2_rec_infer/ +infer_export:deploy/slim/quantization/quant_kl.py -c test_tipc/configs/ch_PP-OCRv2_rec/ch_PP-OCRv2_rec_distillation.yml -o +infer_quant:True +inference:tools/infer/predict_rec.py +--use_gpu:False|True +--enable_mkldnn:False|True +--cpu_threads:1|6 +--rec_batch_num:1|6 +--use_tensorrt:True +--precision:int8 +--rec_model_dir: +--image_dir:./inference/rec_inference +null:null +--benchmark:True +null:null +null:null diff --git a/test_tipc/configs/ch_PP-OCRv2_rec_PACT/train_infer_python.txt b/test_tipc/configs/ch_PP-OCRv2_rec_PACT/train_infer_python.txt new file mode 100644 index 0000000000000000000000000000000000000000..914c1bc7575dfee3309493b9110afe8b9cb7e59b --- /dev/null +++ b/test_tipc/configs/ch_PP-OCRv2_rec_PACT/train_infer_python.txt @@ -0,0 +1,53 @@ +===========================train_params=========================== +model_name:PPOCRv2_ocr_rec_pact +python:python3.7 +gpu_list:0|0,1 +Global.use_gpu:True|True +Global.auto_cast:fp32 +Global.epoch_num:lite_train_lite_infer=3|whole_train_whole_infer=300 +Global.save_model_dir:./output/ +Train.loader.batch_size_per_card:lite_train_lite_infer=128|whole_train_whole_infer=128 +Global.pretrained_model:null +train_model_name:latest +train_infer_img_dir:./inference/rec_inference +null:null +## +trainer:pact_train +norm_train:deploy/slim/quantization/quant.py -c test_tipc/configs/ch_PP-OCRv2_rec/ch_PP-OCRv2_rec_distillation.yml -o +pact_train:null +fpgm_train:null +distill_train:null +null:null +null:null +## +===========================eval_params=========================== +eval:null +null:null +## +===========================infer_params=========================== +Global.save_inference_dir:./output/ +Global.pretrained_model: +norm_export:deploy/slim/quantization/export_model.py -c test_tipc/configs/ch_PP-OCRv2_rec/ch_PP-OCRv2_rec_distillation.yml -o +quant_export: +fpgm_export: +distill_export:null +export1:null +export2:null +inference_dir:Student +infer_model:./inference/ch_PP-OCRv2_rec_infer/ +infer_export:null +infer_quant:True +inference:tools/infer/predict_rec.py +--use_gpu:True|False +--enable_mkldnn:True|False +--cpu_threads:1|6 +--rec_batch_num:1|6 +--use_tensorrt:False|True +--precision:fp32|fp16|int8 +--rec_model_dir: +--image_dir:/inference/rec_inference +null:null +--benchmark:True +null:null + + diff --git a/test_tipc/configs/ch_ppocr_mobile_V2.0_det_FPGM/train_infer_python.txt b/test_tipc/configs/ch_ppocr_mobile_V2.0_det_FPGM/train_infer_python.txt new file mode 100644 index 0000000000000000000000000000000000000000..92ac3e9d37460a7f299f5cc2929a9bcaabdc34ef --- /dev/null +++ b/test_tipc/configs/ch_ppocr_mobile_V2.0_det_FPGM/train_infer_python.txt @@ -0,0 +1,51 @@ +===========================train_params=========================== +model_name:ocr_det +python:python3.7 +gpu_list:0|0,1 +Global.use_gpu:True|True +Global.auto_cast:null +Global.epoch_num:lite_train_lite_infer=5|whole_train_whole_infer=300 +Global.save_model_dir:./output/ +Train.loader.batch_size_per_card:lite_train_lite_infer=2|whole_train_whole_infer=4 +Global.pretrained_model:null +train_model_name:latest +train_infer_img_dir:./train_data/icdar2015/text_localization/ch4_test_images/ +null:null +## +trainer:fpgm_train +norm_train:null +pact_train:null +fpgm_train:deploy/slim/prune/sensitivity_anal.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o Global.pretrained_model=./pretrain_models/det_mv3_db_v2.0_train/best_accuracy +distill_train:null +null:null +null:null +## +===========================eval_params=========================== +eval:null +null:null +## +===========================infer_params=========================== +Global.save_inference_dir:./output/ +Global.pretrained_model: +norm_export:null +quant_export:null +fpgm_export:deploy/slim/prune/export_prune_model.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o +distill_export:null +export1:null +export2:null +inference_dir:null +train_model:null +infer_export:null +infer_quant:False +inference:tools/infer/predict_det.py +--use_gpu:True|False +--enable_mkldnn:True|False +--cpu_threads:1|6 +--rec_batch_num:1 +--use_tensorrt:False|True +--precision:fp32|fp16|int8 +--det_model_dir: +--image_dir:./inference/ch_det_data_50/all-sum-510/ +null:null +--benchmark:True +null:null \ No newline at end of file diff --git a/test_tipc/configs/ch_ppocr_mobile_v2.0/model_linux_gpu_normal_normal_infer_python_linux_gpu_cpu.txt b/test_tipc/configs/ch_ppocr_mobile_v2.0/model_linux_gpu_normal_normal_infer_python_linux_gpu_cpu.txt new file mode 100644 index 0000000000000000000000000000000000000000..4a46f0cf09dcf2bb812910f0cf322dda0749b87c --- /dev/null +++ b/test_tipc/configs/ch_ppocr_mobile_v2.0/model_linux_gpu_normal_normal_infer_python_linux_gpu_cpu.txt @@ -0,0 +1,19 @@ +===========================ch_ppocr_mobile_v2.0=========================== +model_name:ch_ppocr_mobile_v2.0 +python:python3.7 +infer_model:./inference/ch_ppocr_mobile_v2.0_det_infer/ +infer_export:null +infer_quant:False +inference:tools/infer/predict_system.py +--use_gpu:False|True +--enable_mkldnn:False|True +--cpu_threads:1|6 +--rec_batch_num:1 +--use_tensorrt:False|True +--precision:fp32 +--det_model_dir: +--image_dir:./inference/ch_det_data_50/all-sum-510/ +--rec_model_dir:./inference/ch_ppocr_mobile_v2.0_rec_infer/ +--benchmark:True +null:null +null:null diff --git a/test_tipc/configs/ch_ppocr_mobile_v2.0/model_linux_gpu_normal_normal_lite_cpp_arm_cpu.txt b/test_tipc/configs/ch_ppocr_mobile_v2.0/model_linux_gpu_normal_normal_lite_cpp_arm_cpu.txt new file mode 100644 index 0000000000000000000000000000000000000000..95943683d6e8aa8f4765335fb9f03ad7688eedc7 --- /dev/null +++ b/test_tipc/configs/ch_ppocr_mobile_v2.0/model_linux_gpu_normal_normal_lite_cpp_arm_cpu.txt @@ -0,0 +1,13 @@ +===========================lite_params=========================== +inference:./ocr_db_crnn system +runtime_device:ARM_CPU +det_infer_model:ch_ppocr_mobile_v2.0_det_infer|ch_ppocr_db_mobile_v2.0_det_quant_infer +rec_infer_model:ch_ppocr_mobile_v2.0_rec_infer|ch_ppocr_mobile_v2.0_rec_slim_infer +cls_infer_model:ch_ppocr_mobile_v2.0_cls_infer|ch_ppocr_mobile_v2.0_cls_slim_infer +--cpu_threads:1|4 +--det_batch_size:1 +--rec_batch_size:1 +--image_dir:./test_data/icdar2015_lite/text_localization/ch4_test_images/ +--config_dir:./config.txt +--rec_dict_dir:./ppocr_keys_v1.txt +--benchmark:True diff --git a/test_tipc/configs/ch_ppocr_mobile_v2.0/model_linux_gpu_normal_normal_lite_cpp_arm_gpu_opencl.txt b/test_tipc/configs/ch_ppocr_mobile_v2.0/model_linux_gpu_normal_normal_lite_cpp_arm_gpu_opencl.txt new file mode 100644 index 0000000000000000000000000000000000000000..c13b9bee471ecb7fb5bfa56a5236f2a5bd9af98d --- /dev/null +++ b/test_tipc/configs/ch_ppocr_mobile_v2.0/model_linux_gpu_normal_normal_lite_cpp_arm_gpu_opencl.txt @@ -0,0 +1,13 @@ +===========================lite_params=========================== +inference:./ocr_db_crnn system +runtime_device:ARM_GPU_OPENCL +det_infer_model:ch_ppocr_mobile_v2.0_det_infer|ch_ppocr_db_mobile_v2.0_det_quant_infer +rec_infer_model:ch_ppocr_mobile_v2.0_rec_infer|ch_ppocr_mobile_v2.0_rec_slim_infer +cls_infer_model:ch_ppocr_mobile_v2.0_cls_infer|ch_ppocr_mobile_v2.0_cls_slim_infer +--cpu_threads:1|4 +--det_batch_size:1 +--rec_batch_size:1 +--image_dir:./test_data/icdar2015_lite/text_localization/ch4_test_images/ +--config_dir:./config.txt +--rec_dict_dir:./ppocr_keys_v1.txt +--benchmark:True diff --git a/test_tipc/configs/ppocr_det_mobile/det_mv3_db.yml b/test_tipc/configs/ch_ppocr_mobile_v2.0_det/det_mv3_db.yml similarity index 100% rename from test_tipc/configs/ppocr_det_mobile/det_mv3_db.yml rename to test_tipc/configs/ch_ppocr_mobile_v2.0_det/det_mv3_db.yml diff --git a/test_tipc/configs/ppocr_det_mobile/model_linux_gpu_normal_normal_infer_cpp_linux_gpu_cpu.txt b/test_tipc/configs/ch_ppocr_mobile_v2.0_det/model_linux_gpu_normal_normal_infer_cpp_linux_gpu_cpu.txt similarity index 100% rename from test_tipc/configs/ppocr_det_mobile/model_linux_gpu_normal_normal_infer_cpp_linux_gpu_cpu.txt rename to test_tipc/configs/ch_ppocr_mobile_v2.0_det/model_linux_gpu_normal_normal_infer_cpp_linux_gpu_cpu.txt diff --git a/test_tipc/configs/ppocr_det_mobile/model_linux_gpu_normal_normal_infer_python_jetson.txt b/test_tipc/configs/ch_ppocr_mobile_v2.0_det/model_linux_gpu_normal_normal_infer_python_jetson.txt similarity index 100% rename from test_tipc/configs/ppocr_det_mobile/model_linux_gpu_normal_normal_infer_python_jetson.txt rename to test_tipc/configs/ch_ppocr_mobile_v2.0_det/model_linux_gpu_normal_normal_infer_python_jetson.txt diff --git a/test_tipc/configs/ppocr_det_mobile/model_linux_gpu_normal_normal_lite_cpp_arm_cpu.txt b/test_tipc/configs/ch_ppocr_mobile_v2.0_det/model_linux_gpu_normal_normal_lite_cpp_arm_cpu.txt similarity index 65% rename from test_tipc/configs/ppocr_det_mobile/model_linux_gpu_normal_normal_lite_cpp_arm_cpu.txt rename to test_tipc/configs/ch_ppocr_mobile_v2.0_det/model_linux_gpu_normal_normal_lite_cpp_arm_cpu.txt index af71ce3b870e27f9dc046f00a53f266950f6f112..e83534b540e25a1bcbabdc44e9dea035bf5f406c 100644 --- a/test_tipc/configs/ppocr_det_mobile/model_linux_gpu_normal_normal_lite_cpp_arm_cpu.txt +++ b/test_tipc/configs/ch_ppocr_mobile_v2.0_det/model_linux_gpu_normal_normal_lite_cpp_arm_cpu.txt @@ -1,12 +1,13 @@ ===========================lite_params=========================== inference:./ocr_db_crnn det -infer_model:ch_PP-OCRv2_det_infer|ch_PP-OCRv2_det_slim_quant_infer runtime_device:ARM_CPU +det_infer_model:ch_ppocr_mobile_v2.0_det_infer|ch_ppocr_db_mobile_v2.0_det_quant_infer +null:null +null:null --cpu_threads:1|4 --det_batch_size:1 ---rec_batch_size:1 ---system_batch_size:1 +null:null --image_dir:./test_data/icdar2015_lite/text_localization/ch4_test_images/ --config_dir:./config.txt ---rec_dict_dir:./ppocr_keys_v1.txt +null:null --benchmark:True diff --git a/test_tipc/configs/ch_ppocr_mobile_v2.0_det/model_linux_gpu_normal_normal_lite_cpp_arm_gpu_opencl.txt b/test_tipc/configs/ch_ppocr_mobile_v2.0_det/model_linux_gpu_normal_normal_lite_cpp_arm_gpu_opencl.txt new file mode 100644 index 0000000000000000000000000000000000000000..2ce8aec5b3db06a1ccb29f25d20d75c6a9adca85 --- /dev/null +++ b/test_tipc/configs/ch_ppocr_mobile_v2.0_det/model_linux_gpu_normal_normal_lite_cpp_arm_gpu_opencl.txt @@ -0,0 +1,13 @@ +===========================lite_params=========================== +inference:./ocr_db_crnn det +runtime_device:ARM_GPU_OPENCL +det_infer_model:ch_ppocr_mobile_v2.0_det_infer|ch_ppocr_db_mobile_v2.0_det_quant_infer +null:null +null:null +--cpu_threads:1|4 +--det_batch_size:1 +null:null +--image_dir:./test_data/icdar2015_lite/text_localization/ch4_test_images/ +--config_dir:./config.txt +null:null +--benchmark:True diff --git a/test_tipc/configs/ppocr_det_mobile/model_linux_gpu_normal_normal_paddle2onnx_python_linux_cpu.txt b/test_tipc/configs/ch_ppocr_mobile_v2.0_det/model_linux_gpu_normal_normal_paddle2onnx_python_linux_cpu.txt similarity index 100% rename from test_tipc/configs/ppocr_det_mobile/model_linux_gpu_normal_normal_paddle2onnx_python_linux_cpu.txt rename to test_tipc/configs/ch_ppocr_mobile_v2.0_det/model_linux_gpu_normal_normal_paddle2onnx_python_linux_cpu.txt diff --git a/test_tipc/configs/ppocr_det_mobile/model_linux_gpu_normal_normal_serving_python_linux_gpu_cpu.txt b/test_tipc/configs/ch_ppocr_mobile_v2.0_det/model_linux_gpu_normal_normal_serving_python_linux_gpu_cpu.txt similarity index 96% rename from test_tipc/configs/ppocr_det_mobile/model_linux_gpu_normal_normal_serving_python_linux_gpu_cpu.txt rename to test_tipc/configs/ch_ppocr_mobile_v2.0_det/model_linux_gpu_normal_normal_serving_python_linux_gpu_cpu.txt index a19c8ee3355b010b55d1dbf16aa0e21940ba546c..6e5cecf632a42294006cffdf4cf3a466a326260b 100644 --- a/test_tipc/configs/ppocr_det_mobile/model_linux_gpu_normal_normal_serving_python_linux_gpu_cpu.txt +++ b/test_tipc/configs/ch_ppocr_mobile_v2.0_det/model_linux_gpu_normal_normal_serving_python_linux_gpu_cpu.txt @@ -15,4 +15,4 @@ op.det.local_service_conf.thread_num:1|6 op.det.local_service_conf.use_trt:False|True op.det.local_service_conf.precision:fp32|fp16|int8 pipline:pipeline_rpc_client.py|pipeline_http_client.py ---image_dir:../../doc/imgs \ No newline at end of file +--image_dir:../../doc/imgs diff --git a/test_tipc/configs/ppocr_det_mobile/train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt b/test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_infer_python.txt similarity index 59% rename from test_tipc/configs/ppocr_det_mobile/train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt rename to test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_infer_python.txt index 8171a5aea94d29da88de2606afe8dc27b0f6512a..977312f2a49e76d92e4edc11f8f0d3ecf866999a 100644 --- a/test_tipc/configs/ppocr_det_mobile/train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt +++ b/test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_infer_python.txt @@ -1,10 +1,10 @@ ===========================train_params=========================== -model_name:ocr_det +model_name:ch_ppocr_mobile_v2.0_det python:python3.7 gpu_list:0|0,1 Global.use_gpu:True|True -Global.auto_cast:amp -Global.epoch_num:lite_train_lite_infer=1|whole_train_whole_infer=300 +Global.auto_cast:null +Global.epoch_num:lite_train_lite_infer=5|whole_train_whole_infer=300 Global.save_model_dir:./output/ Train.loader.batch_size_per_card:lite_train_lite_infer=2|whole_train_whole_infer=4 Global.pretrained_model:null @@ -12,10 +12,10 @@ train_model_name:latest train_infer_img_dir:./train_data/icdar2015/text_localization/ch4_test_images/ null:null ## -trainer:norm_train|pact_train|fpgm_train -norm_train:tools/train.py -c test_tipc/configs/det_mv3_db.yml -o Global.pretrained_model=./pretrain_models/MobileNetV3_large_x0_5_pretrained -pact_train:deploy/slim/quantization/quant.py -c test_tipc/configs/det_mv3_db.yml -o -fpgm_train:deploy/slim/prune/sensitivity_anal.py -c test_tipc/configs/det_mv3_db.yml -o Global.pretrained_model=./pretrain_models/det_mv3_db_v2.0_train/best_accuracy +trainer:norm_train +norm_train:tools/train.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o Global.pretrained_model=./pretrain_models/MobileNetV3_large_x0_5_pretrained +pact_train:null +fpgm_train:null distill_train:null null:null null:null @@ -27,9 +27,9 @@ null:null ===========================infer_params=========================== Global.save_inference_dir:./output/ Global.pretrained_model: -norm_export:tools/export_model.py -c test_tipc/configs/det_mv3_db.yml -o -quant_export:deploy/slim/quantization/export_model.py -c test_tipc/configs/det_mv3_db.yml -o -fpgm_export:deploy/slim/prune/export_prune_model.py -c test_tipc/configs/det_mv3_db.yml -o +norm_export:tools/export_model.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o +quant_export:null +fpgm_export:null distill_export:null export1:null export2:null @@ -48,4 +48,4 @@ inference:tools/infer/predict_det.py --image_dir:./inference/ch_det_data_50/all-sum-510/ null:null --benchmark:True -null:null +null:null \ No newline at end of file diff --git a/test_tipc/configs/ppocr_det_mobile/train_linux_dcu_normal_normal_infer_python_dcu.txt b/test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_linux_dcu_normal_normal_infer_python_dcu.txt similarity index 100% rename from test_tipc/configs/ppocr_det_mobile/train_linux_dcu_normal_normal_infer_python_dcu.txt rename to test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_linux_dcu_normal_normal_infer_python_dcu.txt diff --git a/test_tipc/configs/ppocr_det_mobile/train_linux_gpu_fleet_amp_infer_python_linux_gpu_cpu.txt b/test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_linux_gpu_fleet_amp_infer_python_linux_gpu_cpu.txt similarity index 76% rename from test_tipc/configs/ppocr_det_mobile/train_linux_gpu_fleet_amp_infer_python_linux_gpu_cpu.txt rename to test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_linux_gpu_fleet_amp_infer_python_linux_gpu_cpu.txt index 6f5aa58b47154fa4d9f555e906c0e16086612628..bfb71b781039493b12875ed4e99c8cd004a2e295 100644 --- a/test_tipc/configs/ppocr_det_mobile/train_linux_gpu_fleet_amp_infer_python_linux_gpu_cpu.txt +++ b/test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_linux_gpu_fleet_amp_infer_python_linux_gpu_cpu.txt @@ -13,9 +13,9 @@ train_infer_img_dir:./train_data/icdar2015/text_localization/ch4_test_images/ null:null ## trainer:norm_train|pact_train|fpgm_train -norm_train:tools/train.py -c test_tipc/configs/det_mv3_db.yml -o Global.pretrained_model=./pretrain_models/MobileNetV3_large_x0_5_pretrained -pact_train:deploy/slim/quantization/quant.py -c test_tipc/configs/det_mv3_db.yml -o -fpgm_train:deploy/slim/prune/sensitivity_anal.py -c test_tipc/configs/det_mv3_db.yml -o Global.pretrained_model=./pretrain_models/det_mv3_db_v2.0_train/best_accuracy +norm_train:tools/train.py -c test_tipc/configs/ppocr_det_mobile/det_mv3_db.yml -o Global.pretrained_model=./pretrain_models/MobileNetV3_large_x0_5_pretrained +pact_train:deploy/slim/quantization/quant.py -c test_tipc/configs/ppocr_det_mobile/det_mv3_db.yml -o +fpgm_train:deploy/slim/prune/sensitivity_anal.py -c test_tipc/configs/ppocr_det_mobile/det_mv3_db.yml -o Global.pretrained_model=./pretrain_models/det_mv3_db_v2.0_train/best_accuracy distill_train:null null:null null:null @@ -27,9 +27,9 @@ null:null ===========================infer_params=========================== Global.save_inference_dir:./output/ Global.pretrained_model: -norm_export:tools/export_model.py -c test_tipc/configs/det_mv3_db.yml -o -quant_export:deploy/slim/quantization/export_model.py -c test_tipc/configs/det_mv3_db.yml -o -fpgm_export:deploy/slim/prune/export_prune_model.py -c test_tipc/configs/det_mv3_db.yml -o +norm_export:tools/export_model.py -c test_tipc/configs/ppocr_det_mobile/det_mv3_db.yml -o +quant_export:deploy/slim/quantization/export_model.py -c test_tipc/configs/ppocr_det_mobile/det_mv3_db.yml -o +fpgm_export:deploy/slim/prune/export_prune_model.py -c test_tipc/configs/ppocr_det_mobile/det_mv3_db.yml -o distill_export:null export1:null export2:null diff --git a/test_tipc/configs/ppocr_det_mobile/train_infer_python.txt b/test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt similarity index 98% rename from test_tipc/configs/ppocr_det_mobile/train_infer_python.txt rename to test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt index 17cc924a5bb4f06b8585547ddb9dcc2e5614e854..ff1c7432df75f78fd6c45d995f50a9642d44637c 100644 --- a/test_tipc/configs/ppocr_det_mobile/train_infer_python.txt +++ b/test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt @@ -3,7 +3,7 @@ model_name:ocr_det python:python3.7 gpu_list:0|0,1 Global.use_gpu:True|True -Global.auto_cast:null +Global.auto_cast:amp Global.epoch_num:lite_train_lite_infer=1|whole_train_whole_infer=300 Global.save_model_dir:./output/ Train.loader.batch_size_per_card:lite_train_lite_infer=2|whole_train_whole_infer=4 @@ -48,4 +48,4 @@ inference:tools/infer/predict_det.py --image_dir:./inference/ch_det_data_50/all-sum-510/ null:null --benchmark:True -null:null \ No newline at end of file +null:null diff --git a/test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_mac_cpu_normal_normal_infer_python_mac_cpu.txt b/test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_mac_cpu_normal_normal_infer_python_mac_cpu.txt new file mode 100644 index 0000000000000000000000000000000000000000..014dad5fc9d87c08a0725f57127f8bf2cb248be3 --- /dev/null +++ b/test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_mac_cpu_normal_normal_infer_python_mac_cpu.txt @@ -0,0 +1,51 @@ +===========================train_params=========================== +model_name:ocr_det +python:python +gpu_list:-1 +Global.use_gpu:False +Global.auto_cast:null +Global.epoch_num:lite_train_lite_infer=5|whole_train_whole_infer=300 +Global.save_model_dir:./output/ +Train.loader.batch_size_per_card:lite_train_lite_infer=2|whole_train_whole_infer=4 +Global.pretrained_model:null +train_model_name:latest +train_infer_img_dir:./train_data/icdar2015/text_localization/ch4_test_images/ +null:null +## +trainer:norm_train +norm_train:tools/train.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o Global.pretrained_model=./pretrain_models/MobileNetV3_large_x0_5_pretrained +pact_train:null +fpgm_train:null +distill_train:null +null:null +null:null +## +===========================eval_params=========================== +eval:null +null:null +## +===========================infer_params=========================== +Global.save_inference_dir:./output/ +Global.pretrained_model: +norm_export:tools/export_model.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o +quant_export:null +fpgm_export:null +distill_export:null +export1:null +export2:null +inference_dir:null +train_model:./inference/ch_ppocr_mobile_v2.0_det_train/best_accuracy +infer_export:tools/export_model.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o +infer_quant:False +inference:tools/infer/predict_det.py +--use_gpu:False +--enable_mkldnn:False +--cpu_threads:1|6 +--rec_batch_num:1 +--use_tensorrt:False +--precision:fp32 +--det_model_dir: +--image_dir:./inference/ch_det_data_50/all-sum-510/ +null:null +--benchmark:True +null:null diff --git a/test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_windows_gpu_normal_normal_infer_python_windows_cpu_gpu.txt b/test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_windows_gpu_normal_normal_infer_python_windows_cpu_gpu.txt new file mode 100644 index 0000000000000000000000000000000000000000..6a63b39d976c0e9693deec097c37eb0ff212d8af --- /dev/null +++ b/test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_windows_gpu_normal_normal_infer_python_windows_cpu_gpu.txt @@ -0,0 +1,52 @@ +===========================train_params=========================== +model_name:ocr_det +python:python +gpu_list:0 +Global.use_gpu:True +Global.auto_cast:fp32|amp +Global.epoch_num:lite_train_lite_infer=5|whole_train_whole_infer=300 +Global.save_model_dir:./output/ +Train.loader.batch_size_per_card:lite_train_lite_infer=2|whole_train_whole_infer=4 +Global.pretrained_model:null +train_model_name:latest +train_infer_img_dir:./train_data/icdar2015/text_localization/ch4_test_images/ +null:null +## +trainer:norm_train +norm_train:tools/train.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o Global.pretrained_model=./pretrain_models/MobileNetV3_large_x0_5_pretrained +pact_train:null +fpgm_train:null +distill_train:null +null:null +null:null +## +===========================eval_params=========================== +eval:null +null:null +## +===========================infer_params=========================== +Global.save_inference_dir:./output/ +Global.pretrained_model: +norm_export:tools/export_model.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o +quant_export:null +fpgm_export:null +distill_export:null +export1:null +export2:null +inference_dir:null +train_model:./inference/ch_ppocr_mobile_v2.0_det_train/best_accuracy +infer_export:tools/export_model.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o +infer_quant:False +inference:tools/infer/predict_det.py +--use_gpu:True|False +--enable_mkldnn:True|False +--cpu_threads:1|6 +--rec_batch_num:1 +--use_tensorrt:False|True +--precision:fp32|fp16|int8 +--det_model_dir: +--image_dir:./inference/ch_det_data_50/all-sum-510/ +null:null +--benchmark:True +null:null + diff --git a/test_tipc/configs/ch_ppocr_mobile_v2.0_det_KL/model_linux_gpu_normal_normal_infer_python_linux_gpu_cpu.txt b/test_tipc/configs/ch_ppocr_mobile_v2.0_det_KL/model_linux_gpu_normal_normal_infer_python_linux_gpu_cpu.txt new file mode 100644 index 0000000000000000000000000000000000000000..1039dcad06d63bb1fc1a47b7cc4760cd8d75ed63 --- /dev/null +++ b/test_tipc/configs/ch_ppocr_mobile_v2.0_det_KL/model_linux_gpu_normal_normal_infer_python_linux_gpu_cpu.txt @@ -0,0 +1,21 @@ +===========================kl_quant_params=========================== +model_name:ch_ppocr_mobile_v2.0_det_KL +python:python3.7 +Global.pretrained_model:null +Global.save_inference_dir:null +infer_model:./inference/ch_ppocr_mobile_v2.0_det_infer/ +infer_export:deploy/slim/quantization/quant_kl.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o +infer_quant:True +inference:tools/infer/predict_det.py +--use_gpu:False|True +--enable_mkldnn:True +--cpu_threads:1|6 +--rec_batch_num:1 +--use_tensorrt:False|True +--precision:int8 +--det_model_dir: +--image_dir:./inference/ch_det_data_50/all-sum-510/ +null:null +--benchmark:True +null:null +null:null diff --git a/test_tipc/configs/ch_ppocr_mobile_v2.0_det_KL/model_linux_gpu_normal_normal_infer_python_mac_cpu.txt b/test_tipc/configs/ch_ppocr_mobile_v2.0_det_KL/model_linux_gpu_normal_normal_infer_python_mac_cpu.txt new file mode 100644 index 0000000000000000000000000000000000000000..ea74b7fe762aea9e47b18b3c31015524c22b088c --- /dev/null +++ b/test_tipc/configs/ch_ppocr_mobile_v2.0_det_KL/model_linux_gpu_normal_normal_infer_python_mac_cpu.txt @@ -0,0 +1,17 @@ +===========================kl_quant_params=========================== +infer_model:./inference/ch_ppocr_mobile_v2.0_det_infer/ +infer_export:deploy/slim/quantization/quant_kl.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o +infer_quant:True +inference:tools/infer/predict_det.py +--use_gpu:False +--enable_mkldnn:False +--cpu_threads:1|6 +--rec_batch_num:1 +--use_tensorrt:False +--precision:int8 +--det_model_dir: +--image_dir:./inference/ch_det_data_50/all-sum-510/ +null:null +--benchmark:True +null:null +null:null diff --git a/test_tipc/configs/ch_ppocr_mobile_v2.0_det_KL/model_linux_gpu_normal_normal_infer_python_windows_gpu_cpu.txt b/test_tipc/configs/ch_ppocr_mobile_v2.0_det_KL/model_linux_gpu_normal_normal_infer_python_windows_gpu_cpu.txt new file mode 100644 index 0000000000000000000000000000000000000000..ea74b7fe762aea9e47b18b3c31015524c22b088c --- /dev/null +++ b/test_tipc/configs/ch_ppocr_mobile_v2.0_det_KL/model_linux_gpu_normal_normal_infer_python_windows_gpu_cpu.txt @@ -0,0 +1,17 @@ +===========================kl_quant_params=========================== +infer_model:./inference/ch_ppocr_mobile_v2.0_det_infer/ +infer_export:deploy/slim/quantization/quant_kl.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o +infer_quant:True +inference:tools/infer/predict_det.py +--use_gpu:False +--enable_mkldnn:False +--cpu_threads:1|6 +--rec_batch_num:1 +--use_tensorrt:False +--precision:int8 +--det_model_dir: +--image_dir:./inference/ch_det_data_50/all-sum-510/ +null:null +--benchmark:True +null:null +null:null diff --git a/test_tipc/configs/ch_ppocr_mobile_v2.0_det_PACT/train_infer_python.txt b/test_tipc/configs/ch_ppocr_mobile_v2.0_det_PACT/train_infer_python.txt new file mode 100644 index 0000000000000000000000000000000000000000..8a6c6568584250d269acfe63aef43ef66410fd99 --- /dev/null +++ b/test_tipc/configs/ch_ppocr_mobile_v2.0_det_PACT/train_infer_python.txt @@ -0,0 +1,51 @@ +===========================train_params=========================== +model_name:ocr_det +python:python3.7 +gpu_list:0|0,1 +Global.use_gpu:True|True +Global.auto_cast:null +Global.epoch_num:lite_train_lite_infer=5|whole_train_whole_infer=300 +Global.save_model_dir:./output/ +Train.loader.batch_size_per_card:lite_train_lite_infer=2|whole_train_whole_infer=4 +Global.pretrained_model:null +train_model_name:latest +train_infer_img_dir:./train_data/icdar2015/text_localization/ch4_test_images/ +null:null +## +trainer:pact_train +norm_train:null +pact_train:deploy/slim/quantization/quant.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o +fpgm_train:null +distill_train:null +null:null +null:null +## +===========================eval_params=========================== +eval:null +null:null +## +===========================infer_params=========================== +Global.save_inference_dir:./output/ +Global.pretrained_model: +norm_export:null +quant_export:deploy/slim/quantization/export_model.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o +fpgm_export:null +distill_export:null +export1:null +export2:null +inference_dir:null +train_model:null +infer_export:null +infer_quant:False +inference:tools/infer/predict_det.py +--use_gpu:True|False +--enable_mkldnn:True|False +--cpu_threads:1|6 +--rec_batch_num:1 +--use_tensorrt:False|True +--precision:fp32|fp16|int8 +--det_model_dir: +--image_dir:./inference/ch_det_data_50/all-sum-510/ +null:null +--benchmark:True +null:null \ No newline at end of file diff --git a/test_tipc/configs/ppocr_rec_mobile/model_linux_gpu_normal_normal_paddle2onnx_python_linux_cpu.txt b/test_tipc/configs/ch_ppocr_mobile_v2.0_rec/model_linux_gpu_normal_normal_paddle2onnx_python_linux_cpu.txt similarity index 100% rename from test_tipc/configs/ppocr_rec_mobile/model_linux_gpu_normal_normal_paddle2onnx_python_linux_cpu.txt rename to test_tipc/configs/ch_ppocr_mobile_v2.0_rec/model_linux_gpu_normal_normal_paddle2onnx_python_linux_cpu.txt diff --git a/test_tipc/configs/ppocr_rec_mobile/model_linux_gpu_normal_normal_serving_python_linux_gpu_cpu.txt b/test_tipc/configs/ch_ppocr_mobile_v2.0_rec/model_linux_gpu_normal_normal_serving_python_linux_gpu_cpu.txt similarity index 100% rename from test_tipc/configs/ppocr_rec_mobile/model_linux_gpu_normal_normal_serving_python_linux_gpu_cpu.txt rename to test_tipc/configs/ch_ppocr_mobile_v2.0_rec/model_linux_gpu_normal_normal_serving_python_linux_gpu_cpu.txt diff --git a/test_tipc/configs/ch_ppocr_mobile_v2.0_rec/train_infer_python.txt b/test_tipc/configs/ch_ppocr_mobile_v2.0_rec/train_infer_python.txt new file mode 100644 index 0000000000000000000000000000000000000000..c93b83e5dcab1aab56ea5fa1a178e3dc7ec3c2e4 --- /dev/null +++ b/test_tipc/configs/ch_ppocr_mobile_v2.0_rec/train_infer_python.txt @@ -0,0 +1,51 @@ +===========================train_params=========================== +model_name:ch_ppocr_mobile_v2.0_rec +python:python3.7 +gpu_list:0|0,1 +Global.use_gpu:True|True +Global.auto_cast:null +Global.epoch_num:lite_train_infer=2|whole_train_infer=300 +Global.save_model_dir:./output/ +Train.loader.batch_size_per_card:lite_train_infer=128|whole_train_infer=128 +Global.pretrained_model:null +train_model_name:latest +train_infer_img_dir:./inference/rec_inference +null:null +## +trainer:norm_train +norm_train:tools/train.py -c configs/rec/rec_icdar15_train.yml -o +pact_train:null +fpgm_train:null +distill_train:null +null:null +null:null +## +===========================eval_params=========================== +eval:tools/eval.py -c configs/rec/rec_icdar15_train.yml -o +null:null +## +===========================infer_params=========================== +Global.save_inference_dir:./output/ +Global.pretrained_model: +norm_export:tools/export_model.py -c configs/rec/rec_icdar15_train.yml -o +quant_export:null +fpgm_export:null +distill_export:null +export1:null +export2:null +## +infer_model:null +infer_export:tools/export_model.py -c configs/rec/rec_icdar15_train.yml -o +infer_quant:False +inference:tools/infer/predict_rec.py --rec_char_dict_path=./ppocr/utils/ic15_dict.txt --rec_image_shape="3,32,100" --rec_algorithm="RARE" +--use_gpu:True|False +--enable_mkldnn:True|False +--cpu_threads:1|6 +--rec_batch_num:1|6 +--use_tensorrt:True|False +--precision:fp32|fp16|int8 +--rec_model_dir: +--image_dir:./inference/rec_inference +--save_log_path:./test/output/ +--benchmark:True +null:null diff --git a/test_tipc/configs/ch_ppocr_mobile_v2.0_rec_FPGM/rec_chinese_lite_train_v2.0.yml b/test_tipc/configs/ch_ppocr_mobile_v2.0_rec_FPGM/rec_chinese_lite_train_v2.0.yml new file mode 100644 index 0000000000000000000000000000000000000000..ee42dbfd0c80667ae5c3da4ee6df6416e1908388 --- /dev/null +++ b/test_tipc/configs/ch_ppocr_mobile_v2.0_rec_FPGM/rec_chinese_lite_train_v2.0.yml @@ -0,0 +1,102 @@ +Global: + use_gpu: true + epoch_num: 500 + log_smooth_window: 20 + print_batch_step: 10 + save_model_dir: ./output/rec_chinese_lite_v2.0 + save_epoch_step: 3 + # evaluation is run every 5000 iterations after the 4000th iteration + eval_batch_step: [0, 2000] + cal_metric_during_train: True + pretrained_model: + checkpoints: + save_inference_dir: + use_visualdl: False + infer_img: doc/imgs_words/ch/word_1.jpg + # for data or label process + character_dict_path: ppocr/utils/ppocr_keys_v1.txt + max_text_length: 25 + infer_mode: False + use_space_char: True + save_res_path: ./output/rec/predicts_chinese_lite_v2.0.txt + + +Optimizer: + name: Adam + beta1: 0.9 + beta2: 0.999 + lr: + name: Cosine + learning_rate: 0.001 + regularizer: + name: 'L2' + factor: 0.00001 + +Architecture: + model_type: rec + algorithm: CRNN + Transform: + Backbone: + name: MobileNetV3 + scale: 0.5 + model_name: small + small_stride: [1, 2, 2, 2] + disable_se: True + Neck: + name: SequenceEncoder + encoder_type: rnn + hidden_size: 48 + Head: + name: CTCHead + fc_decay: 0.00001 + +Loss: + name: CTCLoss + +PostProcess: + name: CTCLabelDecode + +Metric: + name: RecMetric + main_indicator: acc + +Train: + dataset: + name: SimpleDataSet + data_dir: train_data/ic15_data + label_file_list: ["train_data/ic15_data/rec_gt_train.txt"] + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - RecAug: + - CTCLabelEncode: # Class handling label + - RecResizeImg: + image_shape: [3, 32, 320] + - KeepKeys: + keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order + loader: + shuffle: True + batch_size_per_card: 256 + drop_last: True + num_workers: 8 + +Eval: + dataset: + name: SimpleDataSet + data_dir: train_data/ic15_data + label_file_list: ["train_data/ic15_data/rec_gt_test.txt"] + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - CTCLabelEncode: # Class handling label + - RecResizeImg: + image_shape: [3, 32, 320] + - KeepKeys: + keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order + loader: + shuffle: False + drop_last: False + batch_size_per_card: 256 + num_workers: 8 diff --git a/test_tipc/configs/ch_ppocr_mobile_v2.0_rec_FPGM/train_infer_python.txt b/test_tipc/configs/ch_ppocr_mobile_v2.0_rec_FPGM/train_infer_python.txt new file mode 100644 index 0000000000000000000000000000000000000000..a968573d4410f3d474788cb5f6ab414c5d02aae3 --- /dev/null +++ b/test_tipc/configs/ch_ppocr_mobile_v2.0_rec_FPGM/train_infer_python.txt @@ -0,0 +1,51 @@ +===========================train_params=========================== +model_name:ch_ppocr_mobile_v2.0_rec_FPGM +python:python3.7 +gpu_list:0 +Global.use_gpu:True|True +Global.auto_cast:null +Global.epoch_num:lite_train_lite_infer=1|whole_train_whole_infer=300 +Global.save_model_dir:./output/ +Train.loader.batch_size_per_card:lite_train_lite_infer=128|whole_train_whole_infer=128 +Global.pretrained_model:null +train_model_name:latest +train_infer_img_dir:./train_data/ic15_data/test/word_1.png +null:null +## +trainer:fpgm_train +norm_train:null +pact_train:null +fpgm_train:deploy/slim/prune/sensitivity_anal.py -c test_tipc/configs/ch_ppocr_mobile_v2.0_rec_FPGM/rec_chinese_lite_train_v2.0.yml -o Global.pretrained_model=./pretrain_models/ch_ppocr_mobile_v2.0_rec_train/best_accuracy +distill_train:null +null:null +null:null +## +===========================eval_params=========================== +eval:null +null:null +## +===========================infer_params=========================== +Global.save_inference_dir:./output/ +Global.pretrained_model: +norm_export:null +quant_export:null +fpgm_export:deploy/slim/prune/export_prune_model.py -c test_tipc/configs/ch_ppocr_mobile_v2.0_rec_FPGM/rec_chinese_lite_train_v2.0.yml -o +distill_export:null +export1:null +export2:null +inference_dir:null +train_model:null +infer_export:null +infer_quant:False +inference:tools/infer/predict_rec.py +--use_gpu:True|False +--enable_mkldnn:True|False +--cpu_threads:1|6 +--rec_batch_num:1 +--use_tensorrt:False|True +--precision:fp32|int8 +--rec_model_dir: +--image_dir:./inference/rec_inference +null:null +--benchmark:True +null:null \ No newline at end of file diff --git a/test_tipc/configs/ch_ppocr_mobile_v2.0_rec_KL/model_linux_gpu_normal_normal_infer_python_linux_gpu_cpu.txt b/test_tipc/configs/ch_ppocr_mobile_v2.0_rec_KL/model_linux_gpu_normal_normal_infer_python_linux_gpu_cpu.txt new file mode 100644 index 0000000000000000000000000000000000000000..92f33c58c9e97347e53b778bde5a21472b769f36 --- /dev/null +++ b/test_tipc/configs/ch_ppocr_mobile_v2.0_rec_KL/model_linux_gpu_normal_normal_infer_python_linux_gpu_cpu.txt @@ -0,0 +1,21 @@ +===========================kl_quant_params=========================== +model_name:ch_ppocr_mobile_v2.0_rec_KL +python:python3.7 +Global.pretrained_model:null +Global.save_inference_dir:null +infer_model:./inference/ch_ppocr_mobile_v2.0_rec_infer/ +infer_export:deploy/slim/quantization/quant_kl.py -c test_tipc/configs/ch_ppocr_mobile_v2.0_rec_KL/rec_chinese_lite_train_v2.0.yml -o +infer_quant:True +inference:tools/infer/predict_rec.py +--use_gpu:False|True +--enable_mkldnn:True +--cpu_threads:1|6 +--rec_batch_num:1 +--use_tensorrt:False|True +--precision:int8 +--det_model_dir: +--image_dir:./inference/rec_inference +null:null +--benchmark:True +null:null +null:null diff --git a/test_tipc/configs/ch_ppocr_mobile_v2.0_rec_KL/rec_chinese_lite_train_v2.0.yml b/test_tipc/configs/ch_ppocr_mobile_v2.0_rec_KL/rec_chinese_lite_train_v2.0.yml new file mode 100644 index 0000000000000000000000000000000000000000..b06dafe7fdc01eadeee51e70dfa4e8c675bda531 --- /dev/null +++ b/test_tipc/configs/ch_ppocr_mobile_v2.0_rec_KL/rec_chinese_lite_train_v2.0.yml @@ -0,0 +1,101 @@ +Global: + use_gpu: true + epoch_num: 500 + log_smooth_window: 20 + print_batch_step: 10 + save_model_dir: ./output/rec_chinese_lite_v2.0 + save_epoch_step: 3 + # evaluation is run every 5000 iterations after the 4000th iteration + eval_batch_step: [0, 2000] + cal_metric_during_train: True + pretrained_model: + checkpoints: + save_inference_dir: + use_visualdl: False + infer_img: doc/imgs_words/ch/word_1.jpg + # for data or label process + character_dict_path: ppocr/utils/ppocr_keys_v1.txt + max_text_length: 25 + infer_mode: False + use_space_char: True + save_res_path: ./output/rec/predicts_chinese_lite_v2.0.txt + + +Optimizer: + name: Adam + beta1: 0.9 + beta2: 0.999 + lr: + name: Cosine + learning_rate: 0.001 + regularizer: + name: 'L2' + factor: 0.00001 + +Architecture: + model_type: rec + algorithm: CRNN + Transform: + Backbone: + name: MobileNetV3 + scale: 0.5 + model_name: small + small_stride: [1, 2, 2, 2] + Neck: + name: SequenceEncoder + encoder_type: rnn + hidden_size: 48 + Head: + name: CTCHead + fc_decay: 0.00001 + +Loss: + name: CTCLoss + +PostProcess: + name: CTCLabelDecode + +Metric: + name: RecMetric + main_indicator: acc + +Train: + dataset: + name: SimpleDataSet + data_dir: train_data/ic15_data + label_file_list: ["train_data/ic15_data/rec_gt_train.txt"] + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - RecAug: + - CTCLabelEncode: # Class handling label + - RecResizeImg: + image_shape: [3, 32, 320] + - KeepKeys: + keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order + loader: + shuffle: True + batch_size_per_card: 256 + drop_last: True + num_workers: 8 + +Eval: + dataset: + name: SimpleDataSet + data_dir: train_data/ic15_data + label_file_list: ["train_data/ic15_data/rec_gt_test.txt"] + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - CTCLabelEncode: # Class handling label + - RecResizeImg: + image_shape: [3, 32, 320] + - KeepKeys: + keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order + loader: + shuffle: False + drop_last: False + batch_size_per_card: 256 + num_workers: 8 diff --git a/test_tipc/configs/ch_ppocr_mobile_v2.0_rec_PACT/rec_chinese_lite_train_v2.0.yml b/test_tipc/configs/ch_ppocr_mobile_v2.0_rec_PACT/rec_chinese_lite_train_v2.0.yml new file mode 100644 index 0000000000000000000000000000000000000000..b06dafe7fdc01eadeee51e70dfa4e8c675bda531 --- /dev/null +++ b/test_tipc/configs/ch_ppocr_mobile_v2.0_rec_PACT/rec_chinese_lite_train_v2.0.yml @@ -0,0 +1,101 @@ +Global: + use_gpu: true + epoch_num: 500 + log_smooth_window: 20 + print_batch_step: 10 + save_model_dir: ./output/rec_chinese_lite_v2.0 + save_epoch_step: 3 + # evaluation is run every 5000 iterations after the 4000th iteration + eval_batch_step: [0, 2000] + cal_metric_during_train: True + pretrained_model: + checkpoints: + save_inference_dir: + use_visualdl: False + infer_img: doc/imgs_words/ch/word_1.jpg + # for data or label process + character_dict_path: ppocr/utils/ppocr_keys_v1.txt + max_text_length: 25 + infer_mode: False + use_space_char: True + save_res_path: ./output/rec/predicts_chinese_lite_v2.0.txt + + +Optimizer: + name: Adam + beta1: 0.9 + beta2: 0.999 + lr: + name: Cosine + learning_rate: 0.001 + regularizer: + name: 'L2' + factor: 0.00001 + +Architecture: + model_type: rec + algorithm: CRNN + Transform: + Backbone: + name: MobileNetV3 + scale: 0.5 + model_name: small + small_stride: [1, 2, 2, 2] + Neck: + name: SequenceEncoder + encoder_type: rnn + hidden_size: 48 + Head: + name: CTCHead + fc_decay: 0.00001 + +Loss: + name: CTCLoss + +PostProcess: + name: CTCLabelDecode + +Metric: + name: RecMetric + main_indicator: acc + +Train: + dataset: + name: SimpleDataSet + data_dir: train_data/ic15_data + label_file_list: ["train_data/ic15_data/rec_gt_train.txt"] + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - RecAug: + - CTCLabelEncode: # Class handling label + - RecResizeImg: + image_shape: [3, 32, 320] + - KeepKeys: + keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order + loader: + shuffle: True + batch_size_per_card: 256 + drop_last: True + num_workers: 8 + +Eval: + dataset: + name: SimpleDataSet + data_dir: train_data/ic15_data + label_file_list: ["train_data/ic15_data/rec_gt_test.txt"] + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - CTCLabelEncode: # Class handling label + - RecResizeImg: + image_shape: [3, 32, 320] + - KeepKeys: + keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order + loader: + shuffle: False + drop_last: False + batch_size_per_card: 256 + num_workers: 8 diff --git a/test_tipc/configs/ch_ppocr_mobile_v2.0_rec_PACT/train_infer_python.txt b/test_tipc/configs/ch_ppocr_mobile_v2.0_rec_PACT/train_infer_python.txt new file mode 100644 index 0000000000000000000000000000000000000000..7bbdd58ae13eca00623123cf2ca39d3b76daa72a --- /dev/null +++ b/test_tipc/configs/ch_ppocr_mobile_v2.0_rec_PACT/train_infer_python.txt @@ -0,0 +1,51 @@ +===========================train_params=========================== +model_name:ch_ppocr_mobile_v2.0_rec_PACT +python:python3.7 +gpu_list:0 +Global.use_gpu:True|True +Global.auto_cast:null +Global.epoch_num:lite_train_lite_infer=1|whole_train_whole_infer=300 +Global.save_model_dir:./output/ +Train.loader.batch_size_per_card:lite_train_lite_infer=128|whole_train_whole_infer=128 +Global.checkpoints:null +train_model_name:latest +train_infer_img_dir:./train_data/ic15_data/test/word_1.png +null:null +## +trainer:pact_train +norm_train:null +pact_train:deploy/slim/quantization/quant.py -c test_tipc/configs/ch_ppocr_mobile_v2.0_rec_PACT/rec_chinese_lite_train_v2.0.yml -o +fpgm_train:null +distill_train:null +null:null +null:null +## +===========================eval_params=========================== +eval:null +null:null +## +===========================infer_params=========================== +Global.save_inference_dir:./output/ +Global.checkpoints: +norm_export:null +quant_export:deploy/slim/quantization/export_model.py -ctest_tipc/configs/ch_ppocr_mobile_v2.0_rec_PACT/rec_chinese_lite_train_v2.0.yml -o +fpgm_export:null +distill_export:null +export1:null +export2:null +inference_dir:null +train_model:null +infer_export:null +infer_quant:False +inference:tools/infer/predict_rec.py --rec_char_dict_path=./ppocr/utils/ppocr_keys_v1.txt --rec_image_shape="3,32,100" +--use_gpu:True|False +--enable_mkldnn:True|False +--cpu_threads:1|6 +--rec_batch_num:1|6 +--use_tensorrt:False|True +--precision:fp32|fp16|int8 +--rec_model_dir: +--image_dir:./inference/rec_inference +--save_log_path:./test/output/ +--benchmark:True +null:null \ No newline at end of file diff --git a/test_tipc/configs/ch_ppocr_server_v2.0/model_linux_gpu_normal_normal_infer_python_linux_gpu_cpu.txt b/test_tipc/configs/ch_ppocr_server_v2.0/model_linux_gpu_normal_normal_infer_python_linux_gpu_cpu.txt new file mode 100644 index 0000000000000000000000000000000000000000..92d7031e884d10df3a5c98bf675d64d63b3cb335 --- /dev/null +++ b/test_tipc/configs/ch_ppocr_server_v2.0/model_linux_gpu_normal_normal_infer_python_linux_gpu_cpu.txt @@ -0,0 +1,19 @@ +===========================ch_ppocr_server_v2.0=========================== +model_name:ch_ppocr_server_v2.0 +python:python3.7 +infer_model:./inference/ch_ppocr_server_v2.0_det_infer/ +infer_export:null +infer_quant:True +inference:tools/infer/predict_system.py +--use_gpu:False|True +--enable_mkldnn:False|True +--cpu_threads:1|6 +--rec_batch_num:1 +--use_tensorrt:False +--precision:fp32 +--det_model_dir: +--image_dir:./inference/ch_det_data_50/all-sum-510/ +--rec_model_dir:./inference/ch_ppocr_server_v2.0_rec_infer/ +--benchmark:True +null:null +null:null diff --git a/test_tipc/configs/ppocr_det_server/det_r50_vd_db.yml b/test_tipc/configs/ch_ppocr_server_v2.0_det/det_r50_vd_db.yml similarity index 100% rename from test_tipc/configs/ppocr_det_server/det_r50_vd_db.yml rename to test_tipc/configs/ch_ppocr_server_v2.0_det/det_r50_vd_db.yml diff --git a/test_tipc/configs/ppocr_det_server/model_linux_gpu_normal_normal_paddle2onnx_python_linux_cpu.txt b/test_tipc/configs/ch_ppocr_server_v2.0_det/model_linux_gpu_normal_normal_paddle2onnx_python_linux_cpu.txt similarity index 100% rename from test_tipc/configs/ppocr_det_server/model_linux_gpu_normal_normal_paddle2onnx_python_linux_cpu.txt rename to test_tipc/configs/ch_ppocr_server_v2.0_det/model_linux_gpu_normal_normal_paddle2onnx_python_linux_cpu.txt diff --git a/test_tipc/configs/ppocr_det_server/model_linux_gpu_normal_normal_serving_python_linux_gpu_cpu.txt b/test_tipc/configs/ch_ppocr_server_v2.0_det/model_linux_gpu_normal_normal_serving_python_linux_gpu_cpu.txt similarity index 100% rename from test_tipc/configs/ppocr_det_server/model_linux_gpu_normal_normal_serving_python_linux_gpu_cpu.txt rename to test_tipc/configs/ch_ppocr_server_v2.0_det/model_linux_gpu_normal_normal_serving_python_linux_gpu_cpu.txt diff --git a/test_tipc/configs/ppocr_det_server/train_infer_python.txt b/test_tipc/configs/ch_ppocr_server_v2.0_det/train_infer_python.txt similarity index 89% rename from test_tipc/configs/ppocr_det_server/train_infer_python.txt rename to test_tipc/configs/ch_ppocr_server_v2.0_det/train_infer_python.txt index 35aa1886808cb0a9edf6f7db230954080bdb53eb..bea918a7f366548056d7d62a5785353a4e689d01 100644 --- a/test_tipc/configs/ppocr_det_server/train_infer_python.txt +++ b/test_tipc/configs/ch_ppocr_server_v2.0_det/train_infer_python.txt @@ -1,12 +1,12 @@ ===========================train_params=========================== -model_name:ocr_server_det +model_name:ch_ppocr_server_v2.0_det python:python3.7 gpu_list:0|0,1 Global.use_gpu:True|True Global.auto_cast:null -Global.epoch_num:lite_train_infer=2|whole_train_infer=300 +Global.epoch_num:lite_train_lite_infer=2|whole_train_whole_infer=300 Global.save_model_dir:./output/ -Train.loader.batch_size_per_card:lite_train_infer=2|whole_train_infer=4 +Train.loader.batch_size_per_card:lite_train_lite_infer=2|whole_train_lite_infer=4 Global.pretrained_model:null train_model_name:latest train_infer_img_dir:./train_data/icdar2015/text_localization/ch4_test_images/ diff --git a/test_tipc/configs/ppocr_rec_server/model_linux_gpu_normal_normal_paddle2onnx_python_linux_cpu.txt b/test_tipc/configs/ch_ppocr_server_v2.0_rec/model_linux_gpu_normal_normal_paddle2onnx_python_linux_cpu.txt similarity index 100% rename from test_tipc/configs/ppocr_rec_server/model_linux_gpu_normal_normal_paddle2onnx_python_linux_cpu.txt rename to test_tipc/configs/ch_ppocr_server_v2.0_rec/model_linux_gpu_normal_normal_paddle2onnx_python_linux_cpu.txt diff --git a/test_tipc/configs/ppocr_rec_server/model_linux_gpu_normal_normal_serving_python_linux_gpu_cpu.txt b/test_tipc/configs/ch_ppocr_server_v2.0_rec/model_linux_gpu_normal_normal_serving_python_linux_gpu_cpu.txt similarity index 100% rename from test_tipc/configs/ppocr_rec_server/model_linux_gpu_normal_normal_serving_python_linux_gpu_cpu.txt rename to test_tipc/configs/ch_ppocr_server_v2.0_rec/model_linux_gpu_normal_normal_serving_python_linux_gpu_cpu.txt diff --git a/test_tipc/configs/ppocr_rec_server/rec_icdar15_r34_train.yml b/test_tipc/configs/ch_ppocr_server_v2.0_rec/rec_icdar15_train.yml similarity index 100% rename from test_tipc/configs/ppocr_rec_server/rec_icdar15_r34_train.yml rename to test_tipc/configs/ch_ppocr_server_v2.0_rec/rec_icdar15_train.yml diff --git a/test_tipc/configs/ch_ppocr_server_v2.0_rec/train_infer_python.txt b/test_tipc/configs/ch_ppocr_server_v2.0_rec/train_infer_python.txt new file mode 100644 index 0000000000000000000000000000000000000000..f10985a91902716968660af5188473e4f1a7ae3d --- /dev/null +++ b/test_tipc/configs/ch_ppocr_server_v2.0_rec/train_infer_python.txt @@ -0,0 +1,51 @@ +===========================train_params=========================== +model_name:ch_ppocr_server_v2.0_rec +python:python3.7 +gpu_list:0|0,1 +Global.use_gpu:True|True +Global.auto_cast:null +Global.epoch_num:lite_train_lite_infer=5|whole_train_whole_infer=100 +Global.save_model_dir:./output/ +Train.loader.batch_size_per_card:lite_train_lite_infer=128|whole_train_whole_infer=128 +Global.pretrained_model:null +train_model_name:latest +train_infer_img_dir:./inference/rec_inference +null:null +## +trainer:norm_train +norm_train:tools/train.py -c test_tipc/configs/ch_ppocr_server_v2.0_rec/rec_icdar15_train.yml -o +pact_train:null +fpgm_train:null +distill_train:null +null:null +null:null +## +===========================eval_params=========================== +eval:tools/eval.py -c test_tipc/configs/ch_ppocr_server_v2.0_rec/rec_icdar15_train.yml -o +null:null +## +===========================infer_params=========================== +Global.save_inference_dir:./output/ +Global.pretrained_model: +norm_export:tools/export_model.py -c test_tipc/configs/ch_ppocr_server_v2.0_rec/rec_icdar15_train.yml -o +quant_export:null +fpgm_export:null +distill_export:null +export1:null +export2:null +## +infer_model:null +infer_export:tools/export_model.py -c test_tipc/configs/ch_ppocr_server_v2.0_rec/rec_icdar15_train.yml -o +infer_quant:False +inference:tools/infer/predict_rec.py +--use_gpu:True|False +--enable_mkldnn:True|False +--cpu_threads:1|6 +--rec_batch_num:1|6 +--use_tensorrt:True|False +--precision:fp32|int8 +--rec_model_dir: +--image_dir:./inference/rec_inference +--save_log_path:./test/output/ +--benchmark:True +null:null diff --git a/test_tipc/configs/det_mv3_db_v2.0/train_infer_python.txt b/test_tipc/configs/det_mv3_db_v2.0/train_infer_python.txt new file mode 100644 index 0000000000000000000000000000000000000000..de9df227722c8813fc8a21757b118875157a8a56 --- /dev/null +++ b/test_tipc/configs/det_mv3_db_v2.0/train_infer_python.txt @@ -0,0 +1,51 @@ +===========================train_params=========================== +model_name:det_mv3_db_v2.0 +python:python3.7 +gpu_list:0|0,1 +Global.use_gpu:True|True +Global.auto_cast:null +Global.epoch_num:lite_train_lite_infer=1|whole_train_whole_infer=300 +Global.save_model_dir:./output/ +Train.loader.batch_size_per_card:lite_train_lite_infer=2|whole_train_whole_infer=4 +Global.pretrained_model:null +train_model_name:latest +train_infer_img_dir:./train_data/icdar2015/text_localization/ch4_test_images/ +null:null +## +trainer:norm_train +norm_train:tools/train.py -c configs/det/det_mv3_db.yml -o Global.pretrained_model=./pretrain_models/MobileNetV3_large_x0_5_pretrained +pact_train:null +fpgm_train:null +distill_train:null +null:null +null:null +## +===========================eval_params=========================== +eval:null +null:null +## +===========================infer_params=========================== +Global.save_inference_dir:./output/ +Global.pretrained_model: +norm_export:tools/export_model.py -c configs/det/det_mv3_db.yml -o +quant_export:null +fpgm_export:null +distill_export:null +export1:null +export2:null +inference_dir:null +train_model:./inference/det_mv3_db_v2.0_train/best_accuracy +infer_export:tools/export_model.py -c configs/det/det_mv3_db.yml -o +infer_quant:False +inference:tools/infer/predict_det.py +--use_gpu:True|False +--enable_mkldnn:True|False +--cpu_threads:1|6 +--rec_batch_num:1 +--use_tensorrt:False|True +--precision:fp32|fp16|int8 +--det_model_dir: +--image_dir:./inference/ch_det_data_50/all-sum-510/ +null:null +--benchmark:True +null:null \ No newline at end of file diff --git a/test_tipc/configs/det_mv3_east_v2.0/det_mv3_east.yml b/test_tipc/configs/det_mv3_east_v2.0/det_mv3_east.yml new file mode 100644 index 0000000000000000000000000000000000000000..4ae32ab004684f040b5939be7f348d468b4b8024 --- /dev/null +++ b/test_tipc/configs/det_mv3_east_v2.0/det_mv3_east.yml @@ -0,0 +1,109 @@ +Global: + use_gpu: true + epoch_num: 10000 + log_smooth_window: 20 + print_batch_step: 2 + save_model_dir: ./output/east_mv3/ + save_epoch_step: 1000 + # evaluation is run every 5000 iterations after the 4000th iteration + eval_batch_step: [4000, 5000] + cal_metric_during_train: False + pretrained_model: ./pretrain_models/MobileNetV3_large_x0_5_pretrained + checkpoints: + save_inference_dir: + use_visualdl: False + infer_img: + save_res_path: ./output/det_east/predicts_east.txt + +Architecture: + model_type: det + algorithm: EAST + Transform: + Backbone: + name: MobileNetV3 + scale: 0.5 + model_name: large + Neck: + name: EASTFPN + model_name: small + Head: + name: EASTHead + model_name: small + +Loss: + name: EASTLoss + +Optimizer: + name: Adam + beta1: 0.9 + beta2: 0.999 + lr: + # name: Cosine + learning_rate: 0.001 + # warmup_epoch: 0 + regularizer: + name: 'L2' + factor: 0 + +PostProcess: + name: EASTPostProcess + score_thresh: 0.8 + cover_thresh: 0.1 + nms_thresh: 0.2 + +Metric: + name: DetMetric + main_indicator: hmean + +Train: + dataset: + name: SimpleDataSet + data_dir: ./train_data/icdar2015/text_localization/ + label_file_list: + - ./train_data/icdar2015/text_localization/train_icdar2015_label.txt + ratio_list: [1.0] + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - DetLabelEncode: # Class handling label + - EASTProcessTrain: + image_shape: [512, 512] + background_ratio: 0.125 + min_crop_side_ratio: 0.1 + min_text_size: 10 + - KeepKeys: + keep_keys: ['image', 'score_map', 'geo_map', 'training_mask'] # dataloader will return list in this order + loader: + shuffle: True + drop_last: False + batch_size_per_card: 16 + num_workers: 8 + +Eval: + dataset: + name: SimpleDataSet + data_dir: ./train_data/icdar2015/text_localization/ + label_file_list: + - ./train_data/icdar2015/text_localization/test_icdar2015_label.txt + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - DetLabelEncode: # Class handling label + - DetResizeForTest: + limit_side_len: 2400 + limit_type: max + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: 'hwc' + - ToCHWImage: + - KeepKeys: + keep_keys: ['image', 'shape', 'polys', 'ignore_tags'] + loader: + shuffle: False + drop_last: False + batch_size_per_card: 1 # must be 1 + num_workers: 2 \ No newline at end of file diff --git a/test_tipc/configs/det_mv3_east_v2.0/train_infer_python.txt b/test_tipc/configs/det_mv3_east_v2.0/train_infer_python.txt new file mode 100644 index 0000000000000000000000000000000000000000..7a3aced57aaf31bb54075d8ba3119d1626a2c58a --- /dev/null +++ b/test_tipc/configs/det_mv3_east_v2.0/train_infer_python.txt @@ -0,0 +1,51 @@ +===========================train_params=========================== +model_name:det_mv3_east_v2.0 +python:python3.7 +gpu_list:0 +Global.use_gpu:True|True +Global.auto_cast:fp32 +Global.epoch_num:lite_train_lite_infer=1|whole_train_whole_infer=500 +Global.save_model_dir:./output/ +Train.loader.batch_size_per_card:lite_train_lite_infer=2|whole_train_whole_infer=4 +Global.pretrained_model:null +train_model_name:latest +train_infer_img_dir:./train_data/icdar2015/text_localization/ch4_test_images/ +null:null +## +trainer:norm_train +norm_train:tools/train.py -c test_tipc/configs/det_mv3_east_v2.0/det_mv3_east.yml -o +pact_train:null +fpgm_train:null +distill_train:null +null:null +null:null +## +===========================eval_params=========================== +eval:null +null:null +## +===========================infer_params=========================== +Global.save_inference_dir:./output/ +Global.pretrained_model: +norm_export:tools/export_model.py -c test_tipc/configs/det_mv3_east_v2.0/det_mv3_east.yml -o +quant_export:null +fpgm_export:null +distill_export:null +export1:null +export2:null +## +train_model:./inference/det_mv3_east/best_accuracy +infer_export:tools/export_model.py -c test_tipc/cconfigs/det_mv3_east_v2.0/det_mv3_east.yml -o +infer_quant:False +inference:tools/infer/predict_det.py +--use_gpu:True|False +--enable_mkldnn:True|False +--cpu_threads:1|6 +--rec_batch_num:1 +--use_tensorrt:False|True +--precision:fp32|fp16|int8 +--det_model_dir: +--image_dir:./inference/ch_det_data_50/all-sum-510/ +--save_log_path:null +--benchmark:True +--det_algorithm:EAST diff --git a/test_tipc/configs/det_mv3_pse_v2.0/det_mv3_pse.yml b/test_tipc/configs/det_mv3_pse_v2.0/det_mv3_pse.yml new file mode 100644 index 0000000000000000000000000000000000000000..d37fdcfbb5b27404403674d99c1b8abe8cd65e85 --- /dev/null +++ b/test_tipc/configs/det_mv3_pse_v2.0/det_mv3_pse.yml @@ -0,0 +1,135 @@ +Global: + use_gpu: true + epoch_num: 600 + log_smooth_window: 20 + print_batch_step: 10 + save_model_dir: ./output/det_mv3_pse/ + save_epoch_step: 600 + # evaluation is run every 63 iterations + eval_batch_step: [ 0,1000 ] + cal_metric_during_train: False + pretrained_model: ./pretrain_models/MobileNetV3_large_x0_5_pretrained + checkpoints: #./output/det_r50_vd_pse_batch8_ColorJitter/best_accuracy + save_inference_dir: + use_visualdl: False + infer_img: doc/imgs_en/img_10.jpg + save_res_path: ./output/det_pse/predicts_pse.txt + +Architecture: + model_type: det + algorithm: PSE + Transform: null + Backbone: + name: MobileNetV3 + scale: 0.5 + model_name: large + Neck: + name: FPN + out_channels: 96 + Head: + name: PSEHead + hidden_dim: 96 + out_channels: 7 + +Loss: + name: PSELoss + alpha: 0.7 + ohem_ratio: 3 + kernel_sample_mask: pred + reduction: none + +Optimizer: + name: Adam + beta1: 0.9 + beta2: 0.999 + lr: + name: Step + learning_rate: 0.001 + step_size: 200 + gamma: 0.1 + regularizer: + name: 'L2' + factor: 0.0005 + +PostProcess: + name: PSEPostProcess + thresh: 0 + box_thresh: 0.85 + min_area: 16 + box_type: box # 'box' or 'poly' + scale: 1 + +Metric: + name: DetMetric + main_indicator: hmean + +Train: + dataset: + name: SimpleDataSet + data_dir: ./train_data/icdar2015/text_localization/ + label_file_list: + - ./train_data/icdar2015/text_localization/train_icdar2015_label.txt + ratio_list: [ 1.0 ] + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - DetLabelEncode: # Class handling label + - ColorJitter: + brightness: 0.12549019607843137 + saturation: 0.5 + - IaaAugment: + augmenter_args: + - { 'type': Resize, 'args': { 'size': [ 0.5, 3 ] } } + - { 'type': Fliplr, 'args': { 'p': 0.5 } } + - { 'type': Affine, 'args': { 'rotate': [ -10, 10 ] } } + - MakePseGt: + kernel_num: 7 + min_shrink_ratio: 0.4 + size: 640 + - RandomCropImgMask: + size: [ 640,640 ] + main_key: gt_text + crop_keys: [ 'image', 'gt_text', 'gt_kernels', 'mask' ] + - NormalizeImage: + scale: 1./255. + mean: [ 0.485, 0.456, 0.406 ] + std: [ 0.229, 0.224, 0.225 ] + order: 'hwc' + - ToCHWImage: + - KeepKeys: + keep_keys: [ 'image', 'gt_text', 'gt_kernels', 'mask' ] # the order of the dataloader list + loader: + shuffle: True + drop_last: False + batch_size_per_card: 16 + num_workers: 8 + +Eval: + dataset: + name: SimpleDataSet + data_dir: ./train_data/icdar2015/text_localization/ + label_file_list: + - ./train_data/icdar2015/text_localization/test_icdar2015_label.txt + ratio_list: [ 1.0 ] + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - DetLabelEncode: # Class handling label + - DetResizeForTest: + limit_side_len: 736 + limit_type: min + - NormalizeImage: + scale: 1./255. + mean: [ 0.485, 0.456, 0.406 ] + std: [ 0.229, 0.224, 0.225 ] + order: 'hwc' + - ToCHWImage: + - KeepKeys: + keep_keys: [ 'image', 'shape', 'polys', 'ignore_tags' ] + loader: + shuffle: False + drop_last: False + batch_size_per_card: 1 # must be 1 + num_workers: 8 \ No newline at end of file diff --git a/test_tipc/configs/det_mv3_pse_v2.0/train_infer_python.txt b/test_tipc/configs/det_mv3_pse_v2.0/train_infer_python.txt new file mode 100644 index 0000000000000000000000000000000000000000..f9909027f10d9e9f96d65f9f5a1c5f3fd5c9e1c6 --- /dev/null +++ b/test_tipc/configs/det_mv3_pse_v2.0/train_infer_python.txt @@ -0,0 +1,51 @@ +===========================train_params=========================== +model_name:det_mv3_pse_v2.0 +python:python3.7 +gpu_list:0 +Global.use_gpu:True|True +Global.auto_cast:fp32 +Global.epoch_num:lite_train_lite_infer=1|whole_train_whole_infer=500 +Global.save_model_dir:./output/ +Train.loader.batch_size_per_card:lite_train_lite_infer=2|whole_train_whole_infer=4 +Global.pretrained_model:null +train_model_name:latest +train_infer_img_dir:./train_data/icdar2015/text_localization/ch4_test_images/ +null:null +## +trainer:norm_train +norm_train:tools/train.py -c test_tipc/configs/det_mv3_pse_v2.0/det_mv3_pse.yml -o +pact_train:null +fpgm_train:null +distill_train:null +null:null +null:null +## +===========================eval_params=========================== +eval:null +null:null +## +===========================infer_params=========================== +Global.save_inference_dir:./output/ +Global.pretrained_model: +norm_export:tools/export_model.py -c test_tipc/configs/det_mv3_pse_v2.0/det_mv3_pse.yml -o +quant_export:null +fpgm_export:null +distill_export:null +export1:null +export2:null +## +train_model:./inference/det_mv3_pse/best_accuracy +infer_export:tools/export_model.py -c test_tipc/cconfigs/det_mv3_pse_v2.0/det_mv3_pse.yml -o +infer_quant:False +inference:tools/infer/predict_det.py +--use_gpu:True|False +--enable_mkldnn:True|False +--cpu_threads:1|6 +--rec_batch_num:1 +--use_tensorrt:False|True +--precision:fp32|fp16|int8 +--det_model_dir: +--image_dir:./inference/ch_det_data_50/all-sum-510/ +--save_log_path:null +--benchmark:True +--det_algorithm:PSE diff --git a/test_tipc/configs/det_r50_db_v2.0/train_infer_python.txt b/test_tipc/configs/det_r50_db_v2.0/train_infer_python.txt new file mode 100644 index 0000000000000000000000000000000000000000..72b5ede15d27a33477b5ecd8302aaccc0ab56413 --- /dev/null +++ b/test_tipc/configs/det_r50_db_v2.0/train_infer_python.txt @@ -0,0 +1,51 @@ +===========================train_params=========================== +model_name:det_r50_db_v2.0 +python:python3.7 +gpu_list:0|0,1 +Global.use_gpu:True|True +Global.auto_cast:null +Global.epoch_num:lite_train_lite_infer=2|whole_train_whole_infer=300 +Global.save_model_dir:./output/ +Train.loader.batch_size_per_card:lite_train_lite_infer=2|whole_train_lite_infer=4 +Global.pretrained_model:null +train_model_name:latest +train_infer_img_dir:./train_data/icdar2015/text_localization/ch4_test_images/ +null:null +## +trainer:norm_train +norm_train:tools/train.py -c configs/det/det_r50_vd_db.yml -o +quant_export:null +fpgm_export:null +distill_train:null +null:null +null:null +## +===========================eval_params=========================== +eval:tools/eval.py -c configs/det/det_r50_vd_db.yml -o +null:null +## +===========================infer_params=========================== +Global.save_inference_dir:./output/ +Global.pretrained_model: +norm_export:tools/export_model.py -c configs/det/det_r50_vd_db.yml -o +quant_export:null +fpgm_export:null +distill_export:null +export1:null +export2:null +## +train_model:./inference/ch_ppocr_server_v2.0_det_train/best_accuracy +infer_export:tools/export_model.py -c configs/det/det_r50_vd_db.yml -o +infer_quant:False +inference:tools/infer/predict_det.py +--use_gpu:True|False +--enable_mkldnn:True|False +--cpu_threads:1|6 +--rec_batch_num:1 +--use_tensorrt:False|True +--precision:fp32|fp16|int8 +--det_model_dir: +--image_dir:./inference/ch_det_data_50/all-sum-510/ +--save_log_path:null +--benchmark:True +null:null \ No newline at end of file diff --git a/test_tipc/configs/det_r50_vd_east_v2.0/det_r50_vd_east.yml b/test_tipc/configs/det_r50_vd_east_v2.0/det_r50_vd_east.yml new file mode 100644 index 0000000000000000000000000000000000000000..c6b6fc3ed79d0d717fe3dbd4cb9c8559ff8f07c4 --- /dev/null +++ b/test_tipc/configs/det_r50_vd_east_v2.0/det_r50_vd_east.yml @@ -0,0 +1,108 @@ +Global: + use_gpu: true + epoch_num: 10000 + log_smooth_window: 20 + print_batch_step: 2 + save_model_dir: ./output/east_r50_vd/ + save_epoch_step: 1000 + # evaluation is run every 5000 iterations after the 4000th iteration + eval_batch_step: [4000, 5000] + cal_metric_during_train: False + pretrained_model: + checkpoints: + save_inference_dir: + use_visualdl: False + infer_img: + save_res_path: ./output/det_east/predicts_east.txt + +Architecture: + model_type: det + algorithm: EAST + Transform: + Backbone: + name: ResNet + layers: 50 + Neck: + name: EASTFPN + model_name: large + Head: + name: EASTHead + model_name: large + +Loss: + name: EASTLoss + +Optimizer: + name: Adam + beta1: 0.9 + beta2: 0.999 + lr: + # name: Cosine + learning_rate: 0.001 + # warmup_epoch: 0 + regularizer: + name: 'L2' + factor: 0 + +PostProcess: + name: EASTPostProcess + score_thresh: 0.8 + cover_thresh: 0.1 + nms_thresh: 0.2 + +Metric: + name: DetMetric + main_indicator: hmean + +Train: + dataset: + name: SimpleDataSet + data_dir: ./train_data/icdar2015/text_localization/ + label_file_list: + - ./train_data/icdar2015/text_localization/train_icdar2015_label.txt + ratio_list: [1.0] + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - DetLabelEncode: # Class handling label + - EASTProcessTrain: + image_shape: [512, 512] + background_ratio: 0.125 + min_crop_side_ratio: 0.1 + min_text_size: 10 + - KeepKeys: + keep_keys: ['image', 'score_map', 'geo_map', 'training_mask'] # dataloader will return list in this order + loader: + shuffle: True + drop_last: False + batch_size_per_card: 8 + num_workers: 8 + +Eval: + dataset: + name: SimpleDataSet + data_dir: ./train_data/icdar2015/text_localization/ + label_file_list: + - ./train_data/icdar2015/text_localization/test_icdar2015_label.txt + transforms: + - DecodeImage: # load image + Fa: BGR + channel_first: False + - DetLabelEncode: # Class handling label + - DetResizeForTest: + limit_side_len: 2400 + limit_type: max + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: 'hwc' + - ToCHWImage: + - KeepKeys: + keep_keys: ['image', 'shape', 'polys', 'ignore_tags'] + loader: + shuffle: False + drop_last: False + batch_size_per_card: 1 # must be 1 + num_workers: 2 \ No newline at end of file diff --git a/test_tipc/configs/det_r50_vd_east_v2.0/train_infer_python.txt b/test_tipc/configs/det_r50_vd_east_v2.0/train_infer_python.txt new file mode 100644 index 0000000000000000000000000000000000000000..dfb376237ee35c277fcd86a88328c562d5c0429a --- /dev/null +++ b/test_tipc/configs/det_r50_vd_east_v2.0/train_infer_python.txt @@ -0,0 +1,51 @@ +===========================train_params=========================== +model_name:det_r50_vd_east_v2.0 +python:python3.7 +gpu_list:0 +Global.use_gpu:True|True +Global.auto_cast:fp32 +Global.epoch_num:lite_train_lite_infer=1|whole_train_whole_infer=500 +Global.save_model_dir:./output/ +Train.loader.batch_size_per_card:lite_train_lite_infer=2|whole_train_whole_infer=4 +Global.pretrained_model:null +train_model_name:latest +train_infer_img_dir:./train_data/icdar2015/text_localization/ch4_test_images/ +null:null +## +trainer:norm_train +norm_train:tools/train.py -c test_tipc/configs/det_r50_vd_east_v2.0/det_r50_vd_east.yml -o +pact_train:null +fpgm_train:null +distill_train:null +null:null +null:null +## +===========================eval_params=========================== +eval:null +null:null +## +===========================infer_params=========================== +Global.save_inference_dir:./output/ +Global.pretrained_model: +norm_export:tools/export_model.py -c test_tipc/configs/det_r50_vd_east_v2.0/det_r50_vd_east.yml -o +quant_export:null +fpgm_export:null +distill_export:null +export1:null +export2:null +## +train_model:./inference/det_r50_vd_east/best_accuracy +infer_export:tools/export_model.py -c test_tipc/cconfigs/det_r50_vd_east_v2.0/det_r50_vd_east.yml -o +infer_quant:False +inference:tools/infer/predict_det.py +--use_gpu:True|False +--enable_mkldnn:True|False +--cpu_threads:1|6 +--rec_batch_num:1 +--use_tensorrt:False|True +--precision:fp32|fp16|int8 +--det_model_dir: +--image_dir:./inference/ch_det_data_50/all-sum-510/ +--save_log_path:null +--benchmark:True +--det_algorithm:EAST diff --git a/test_tipc/configs/det_r50_vd_pse_v2.0/det_r50_vd_pse.yml b/test_tipc/configs/det_r50_vd_pse_v2.0/det_r50_vd_pse.yml new file mode 100644 index 0000000000000000000000000000000000000000..5ebc4252718d5572837eac58061bf6f9eb35bf73 --- /dev/null +++ b/test_tipc/configs/det_r50_vd_pse_v2.0/det_r50_vd_pse.yml @@ -0,0 +1,134 @@ +Global: + use_gpu: true + epoch_num: 600 + log_smooth_window: 20 + print_batch_step: 10 + save_model_dir: ./output/det_r50_vd_pse/ + save_epoch_step: 600 + # evaluation is run every 125 iterations + eval_batch_step: [ 0,1000 ] + cal_metric_during_train: False + pretrained_model: + checkpoints: #./output/det_r50_vd_pse_batch8_ColorJitter/best_accuracy + save_inference_dir: + use_visualdl: False + infer_img: doc/imgs_en/img_10.jpg + save_res_path: ./output/det_pse/predicts_pse.txt + +Architecture: + model_type: det + algorithm: PSE + Transform: + Backbone: + name: ResNet + layers: 50 + Neck: + name: FPN + out_channels: 256 + Head: + name: PSEHead + hidden_dim: 256 + out_channels: 7 + +Loss: + name: PSELoss + alpha: 0.7 + ohem_ratio: 3 + kernel_sample_mask: pred + reduction: none + +Optimizer: + name: Adam + beta1: 0.9 + beta2: 0.999 + lr: + name: Step + learning_rate: 0.0001 + step_size: 200 + gamma: 0.1 + regularizer: + name: 'L2' + factor: 0.0005 + +PostProcess: + name: PSEPostProcess + thresh: 0 + box_thresh: 0.85 + min_area: 16 + box_type: box # 'box' or 'poly' + scale: 1 + +Metric: + name: DetMetric + main_indicator: hmean + +Train: + dataset: + name: SimpleDataSet + data_dir: ./train_data/icdar2015/text_localization/ + label_file_list: + - ./train_data/icdar2015/text_localization/train_icdar2015_label.txt + ratio_list: [ 1.0 ] + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - DetLabelEncode: # Class handling label + - ColorJitter: + brightness: 0.12549019607843137 + saturation: 0.5 + - IaaAugment: + augmenter_args: + - { 'type': Resize, 'args': { 'size': [ 0.5, 3 ] } } + - { 'type': Fliplr, 'args': { 'p': 0.5 } } + - { 'type': Affine, 'args': { 'rotate': [ -10, 10 ] } } + - MakePseGt: + kernel_num: 7 + min_shrink_ratio: 0.4 + size: 640 + - RandomCropImgMask: + size: [ 640,640 ] + main_key: gt_text + crop_keys: [ 'image', 'gt_text', 'gt_kernels', 'mask' ] + - NormalizeImage: + scale: 1./255. + mean: [ 0.485, 0.456, 0.406 ] + std: [ 0.229, 0.224, 0.225 ] + order: 'hwc' + - ToCHWImage: + - KeepKeys: + keep_keys: [ 'image', 'gt_text', 'gt_kernels', 'mask' ] # the order of the dataloader list + loader: + shuffle: True + drop_last: False + batch_size_per_card: 8 + num_workers: 8 + +Eval: + dataset: + name: SimpleDataSet + data_dir: ./train_data/icdar2015/text_localization/ + label_file_list: + - ./train_data/icdar2015/text_localization/test_icdar2015_label.txt + ratio_list: [ 1.0 ] + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - DetLabelEncode: # Class handling label + - DetResizeForTest: + limit_side_len: 736 + limit_type: min + - NormalizeImage: + scale: 1./255. + mean: [ 0.485, 0.456, 0.406 ] + std: [ 0.229, 0.224, 0.225 ] + order: 'hwc' + - ToCHWImage: + - KeepKeys: + keep_keys: [ 'image', 'shape', 'polys', 'ignore_tags' ] + loader: + shuffle: False + drop_last: False + batch_size_per_card: 1 # must be 1 + num_workers: 8 \ No newline at end of file diff --git a/test_tipc/configs/det_r50_vd_pse_v2.0/train_infer_python.txt b/test_tipc/configs/det_r50_vd_pse_v2.0/train_infer_python.txt new file mode 100644 index 0000000000000000000000000000000000000000..5ab6d45d7c1eb5e3c17fd53a8c8c504812c1012c --- /dev/null +++ b/test_tipc/configs/det_r50_vd_pse_v2.0/train_infer_python.txt @@ -0,0 +1,51 @@ +===========================train_params=========================== +model_name:det_r50_vd_pse_v2.0 +python:python3.7 +gpu_list:0 +Global.use_gpu:True|True +Global.auto_cast:fp32 +Global.epoch_num:lite_train_lite_infer=1|whole_train_whole_infer=500 +Global.save_model_dir:./output/ +Train.loader.batch_size_per_card:lite_train_lite_infer=2|whole_train_whole_infer=4 +Global.pretrained_model:null +train_model_name:latest +train_infer_img_dir:./train_data/icdar2015/text_localization/ch4_test_images/ +null:null +## +trainer:norm_train +norm_train:tools/train.py -c test_tipc/configs/det_r50_vd_pse_v2.0/det_r50_vd_pse.yml -o +pact_train:null +fpgm_train:null +distill_train:null +null:null +null:null +## +===========================eval_params=========================== +eval:null +null:null +## +===========================infer_params=========================== +Global.save_inference_dir:./output/ +Global.pretrained_model: +norm_export:tools/export_model.py -c test_tipc/configs/det_r50_vd_pse_v2.0/det_r50_vd_pse.yml -o +quant_export:null +fpgm_export:null +distill_export:null +export1:null +export2:null +## +train_model:./inference/det_r50_vd_pse/best_accuracy +infer_export:tools/export_model.py -c test_tipc/cconfigs/det_r50_vd_pse_v2.0/det_r50_vd_pse.yml -o +infer_quant:False +inference:tools/infer/predict_det.py +--use_gpu:True|False +--enable_mkldnn:True|False +--cpu_threads:1|6 +--rec_batch_num:1 +--use_tensorrt:False|True +--precision:fp32|fp16|int8 +--det_model_dir: +--image_dir:./inference/ch_det_data_50/all-sum-510/ +--save_log_path:null +--benchmark:True +--det_algorithm:PSE diff --git a/test_tipc/configs/det_r50_vd_sast_icdar15_v2.0/det_r50_vd_sast_icdar2015.yml b/test_tipc/configs/det_r50_vd_sast_icdar15_v2.0/det_r50_vd_sast_icdar2015.yml new file mode 100644 index 0000000000000000000000000000000000000000..8e9315d2488ad187eb12708d094c5be57cb48eac --- /dev/null +++ b/test_tipc/configs/det_r50_vd_sast_icdar15_v2.0/det_r50_vd_sast_icdar2015.yml @@ -0,0 +1,111 @@ +Global: + use_gpu: true + epoch_num: 5000 + log_smooth_window: 20 + print_batch_step: 2 + save_model_dir: ./output/sast_r50_vd_ic15/ + save_epoch_step: 1000 + # evaluation is run every 5000 iterations after the 4000th iteration + eval_batch_step: [4000, 5000] + cal_metric_during_train: False + pretrained_model: ./pretrain_models/ResNet50_vd_ssld_pretrained + checkpoints: + save_inference_dir: + use_visualdl: False + infer_img: + save_res_path: ./output/sast_r50_vd_ic15/predicts_sast.txt + + +Architecture: + model_type: det + algorithm: SAST + Transform: + Backbone: + name: ResNet_SAST + layers: 50 + Neck: + name: SASTFPN + with_cab: True + Head: + name: SASTHead + +Loss: + name: SASTLoss + +Optimizer: + name: Adam + beta1: 0.9 + beta2: 0.999 + lr: + # name: Cosine + learning_rate: 0.001 + # warmup_epoch: 0 + regularizer: + name: 'L2' + factor: 0 + +PostProcess: + name: SASTPostProcess + score_thresh: 0.5 + sample_pts_num: 2 + nms_thresh: 0.2 + expand_scale: 1.0 + shrink_ratio_of_width: 0.3 + +Metric: + name: DetMetric + main_indicator: hmean + +Train: + dataset: + name: SimpleDataSet + data_dir: ./train_data/icdar2015/text_localization/ + label_file_list: + - ./train_data/icdar2015/text_localization/train_icdar2015_label.txt + ratio_list: [0.1, 0.45, 0.3, 0.15] + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - DetLabelEncode: # Class handling label + - SASTProcessTrain: + image_shape: [512, 512] + min_crop_side_ratio: 0.3 + min_crop_size: 24 + min_text_size: 4 + max_text_size: 512 + - KeepKeys: + keep_keys: ['image', 'score_map', 'border_map', 'training_mask', 'tvo_map', 'tco_map'] # dataloader will return list in this order + loader: + shuffle: True + drop_last: False + batch_size_per_card: 4 + num_workers: 4 + +Eval: + dataset: + name: SimpleDataSet + data_dir: ./train_data/icdar2015/text_localization/ + label_file_list: + - ./train_data/icdar2015/text_localization/test_icdar2015_label.txt + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - DetLabelEncode: # Class handling label + - DetResizeForTest: + resize_long: 1536 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: 'hwc' + - ToCHWImage: + - KeepKeys: + keep_keys: ['image', 'shape', 'polys', 'ignore_tags'] + loader: + shuffle: False + drop_last: False + batch_size_per_card: 1 # must be 1 + num_workers: 2 + diff --git a/test_tipc/configs/det_r50_vd_sast_icdar15_v2.0/train_infer_python.txt b/test_tipc/configs/det_r50_vd_sast_icdar15_v2.0/train_infer_python.txt new file mode 100644 index 0000000000000000000000000000000000000000..d9f15dded4b920cb93b2180aeb9e14e93ebab5cc --- /dev/null +++ b/test_tipc/configs/det_r50_vd_sast_icdar15_v2.0/train_infer_python.txt @@ -0,0 +1,51 @@ +===========================train_params=========================== +model_name:det_r50_vd_sast_icdar15_v2.0 +python:python3.7 +gpu_list:0|0,1 +Global.use_gpu:True|True +Global.auto_cast:null +Global.epoch_num:lite_train_lite_infer=1|whole_train_whole_infer=5000 +Global.save_model_dir:./output/ +Train.loader.batch_size_per_card:lite_train_lite_infer=2|whole_train_whole_infer=4 +Global.pretrained_model:null +train_model_name:latest +train_infer_img_dir:./train_data/icdar2015/text_localization/ch4_test_images/ +null:null +## +trainer:norm_train +norm_train:tools/train.py -c test_tipc/configs/det_r50_vd_sast_icdar15_v2.0/det_r50_vd_sast_icdar2015.yml -o Global.pretrained_model=./pretrain_models/ResNet50_vd_ssld_pretrained +pact_train:null +fpgm_train:null +distill_train:null +null:null +null:null +## +===========================eval_params=========================== +eval:null +null:null +## +===========================infer_params=========================== +Global.save_inference_dir:./output/ +Global.pretrained_model: +norm_export:tools/export_model.py -c test_tipc/configs/det_r50_vd_sast_icdar15_v2.0/det_r50_vd_sast_icdar2015.yml -o +quant_export:null +fpgm_export:null +distill_export:null +export1:null +export2:null +inference_dir:null +train_model:./inference/det_r50_vd_sast_icdar15_v2.0_train/best_accuracy +infer_export:tools/export_model.py -c test_tipc/configs/det_r50_vd_sast_icdar15_v2.0/det_r50_vd_sast_icdar2015.yml -o +infer_quant:False +inference:tools/infer/predict_det.py +--use_gpu:True|False +--enable_mkldnn:True|False +--cpu_threads:1|6 +--rec_batch_num:1 +--use_tensorrt:False|True +--precision:fp32|fp16|int8 +--det_model_dir: +--image_dir:./inference/ch_det_data_50/all-sum-510/ +null:null +--benchmark:True +null:null diff --git a/test_tipc/configs/det_r50_vd_sast_totaltext_v2.0/det_r50_vd_sast_totaltext.yml b/test_tipc/configs/det_r50_vd_sast_totaltext_v2.0/det_r50_vd_sast_totaltext.yml new file mode 100644 index 0000000000000000000000000000000000000000..ef2b8845588386f9722189b58db8eae346a40ca9 --- /dev/null +++ b/test_tipc/configs/det_r50_vd_sast_totaltext_v2.0/det_r50_vd_sast_totaltext.yml @@ -0,0 +1,108 @@ +Global: + use_gpu: true + epoch_num: 5000 + log_smooth_window: 20 + print_batch_step: 2 + save_model_dir: ./output/sast_r50_vd_tt/ + save_epoch_step: 1000 + # evaluation is run every 5000 iterations after the 4000th iteration + eval_batch_step: [4000, 5000] + cal_metric_during_train: False + pretrained_model: ./pretrain_models/ResNet50_vd_ssld_pretrained + checkpoints: + save_inference_dir: + use_visualdl: False + infer_img: + save_res_path: ./output/sast_r50_vd_tt/predicts_sast.txt + +Architecture: + model_type: det + algorithm: SAST + Transform: + Backbone: + name: ResNet_SAST + layers: 50 + Neck: + name: SASTFPN + with_cab: True + Head: + name: SASTHead + +Loss: + name: SASTLoss + +Optimizer: + name: Adam + beta1: 0.9 + beta2: 0.999 + lr: + # name: Cosine + learning_rate: 0.001 + # warmup_epoch: 0 + regularizer: + name: 'L2' + factor: 0 + +PostProcess: + name: SASTPostProcess + score_thresh: 0.5 + sample_pts_num: 6 + nms_thresh: 0.2 + expand_scale: 1.2 + shrink_ratio_of_width: 0.2 + +Metric: + name: DetMetric + main_indicator: hmean + +Train: + dataset: + name: SimpleDataSet + data_dir: ./train_data/total_text/train + label_file_list: [./train_data/total_text/train/train.txt] + ratio_list: [1.0] + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - DetLabelEncode: # Class handling label + - SASTProcessTrain: + image_shape: [512, 512] + min_crop_side_ratio: 0.3 + min_crop_size: 24 + min_text_size: 4 + max_text_size: 512 + - KeepKeys: + keep_keys: ['image', 'score_map', 'border_map', 'training_mask', 'tvo_map', 'tco_map'] # dataloader will return list in this order + loader: + shuffle: True + drop_last: False + batch_size_per_card: 4 + num_workers: 4 + +Eval: + dataset: + name: SimpleDataSet + data_dir: ./train_data/ + label_file_list: + - ./train_data/total_text/test/test.txt + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - DetLabelEncode: # Class handling label + - DetResizeForTest: + resize_long: 768 + - NormalizeImage: + scale: 1./255. + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + order: 'hwc' + - ToCHWImage: + - KeepKeys: + keep_keys: ['image', 'shape', 'polys', 'ignore_tags'] + loader: + shuffle: False + drop_last: False + batch_size_per_card: 1 # must be 1 + num_workers: 2 \ No newline at end of file diff --git a/test_tipc/configs/det_r50_vd_sast_totaltext_v2.0/train_infer_python.txt b/test_tipc/configs/det_r50_vd_sast_totaltext_v2.0/train_infer_python.txt new file mode 100644 index 0000000000000000000000000000000000000000..602254f2f3b7eb6f5b1fc72fbaf212fbea43ca49 --- /dev/null +++ b/test_tipc/configs/det_r50_vd_sast_totaltext_v2.0/train_infer_python.txt @@ -0,0 +1,51 @@ +===========================train_params=========================== +model_name:det_r50_vd_sast_totaltext_v2.0 +python:python3.7 +gpu_list:0|0,1 +Global.use_gpu:True|True +Global.auto_cast:null +Global.epoch_num:lite_train_lite_infer=1|whole_train_whole_infer=5000 +Global.save_model_dir:./output/ +Train.loader.batch_size_per_card:lite_train_lite_infer=2|whole_train_whole_infer=4 +Global.pretrained_model:null +train_model_name:latest +train_infer_img_dir:./train_data/icdar2015/text_localization/ch4_test_images/ +null:null +## +trainer:norm_train +norm_train:tools/train.py -c test_tipc/configs/det_r50_vd_sast_totaltext_v2.0/det_r50_vd_sast_totaltext.yml -o Global.pretrained_model=./pretrain_models/ResNet50_vd_ssld_pretrained +pact_train:null +fpgm_train:null +distill_train:null +null:null +null:null +## +===========================eval_params=========================== +eval:null +null:null +## +===========================infer_params=========================== +Global.save_inference_dir:./output/ +Global.pretrained_model: +norm_export:tools/export_model.py -c test_tipc/configs/det_r50_vd_sast_totaltext_v2.0/det_r50_vd_sast_totaltext.yml -o +quant_export:null +fpgm_export:null +distill_export:null +export1:null +export2:null +inference_dir:null +train_model:./inference/det_r50_vd_sast_totaltext_v2.0/best_accuracy +infer_export:tools/export_model.py -c test_tipc/configs/det_r50_vd_sast_totaltext_v2.0/det_r50_vd_sast_totaltext.yml -o +infer_quant:False +inference:tools/infer/predict_det.py +--use_gpu:True|False +--enable_mkldnn:True|False +--cpu_threads:1|6 +--rec_batch_num:1 +--use_tensorrt:False|True +--precision:fp32|fp16|int8 +--det_model_dir: +--image_dir:./inference/ch_det_data_50/all-sum-510/ +null:null +--benchmark:True +null:null diff --git a/test_tipc/configs/en_server_pgnetA/train_infer_python.txt b/test_tipc/configs/en_server_pgnetA/train_infer_python.txt new file mode 100644 index 0000000000000000000000000000000000000000..d70776998c4e326905920586e90f2833fe42e89b --- /dev/null +++ b/test_tipc/configs/en_server_pgnetA/train_infer_python.txt @@ -0,0 +1,51 @@ +===========================train_params=========================== +model_name:en_server_pgnetA +python:python3.7 +gpu_list:0|0,1 +Global.use_gpu:True|True +Global.auto_cast:null +Global.epoch_num:lite_train_lite_infer=1|whole_train_whole_infer=500 +Global.save_model_dir:./output/ +Train.loader.batch_size_per_card:lite_train_lite_infer=2|whole_train_whole_infer=14 +Global.pretrained_model:null +train_model_name:latest +train_infer_img_dir:./train_data/total_text/test/rgb/ +null:null +## +trainer:norm_train +norm_train:tools/train.py -c configs/e2e/e2e_r50_vd_pg.yml -o Global.pretrained_model=./pretrain_models/en_server_pgnetA/best_accuracy +pact_train:null +fpgm_train:null +distill_train:null +null:null +null:null +## +===========================eval_params=========================== +eval:null +null:null +## +===========================infer_params=========================== +Global.save_inference_dir:./output/ +Global.pretrained_model: +norm_export:tools/export_model.py -c configs/e2e/e2e_r50_vd_pg.yml -o +quant_export:null +fpgm_export:null +distill_export:null +export1:null +export2:null +inference_dir:null +train_model:./inference/en_server_pgnetA/best_accuracy +infer_export:tools/export_model.py -c configs/e2e/e2e_r50_vd_pg.yml -o +infer_quant:False +inference:tools/infer/predict_e2e.py +--use_gpu:True|False +--enable_mkldnn:True|False +--cpu_threads:1|6 +--rec_batch_num:1 +--use_tensorrt:False|True +--precision:fp32|fp16|int8 +--e2e_model_dir: +--image_dir:./inference/ch_det_data_50/all-sum-510/ +null:null +--benchmark:True +null:null diff --git a/test_tipc/configs/ppocr_det_mobile/train_linux_cpu_normal_normal_infer_python_mac.txt b/test_tipc/configs/ppocr_det_mobile/train_linux_cpu_normal_normal_infer_python_mac.txt deleted file mode 100644 index 0200e2954948e5aeb719aa43e8a88d70c2af506d..0000000000000000000000000000000000000000 --- a/test_tipc/configs/ppocr_det_mobile/train_linux_cpu_normal_normal_infer_python_mac.txt +++ /dev/null @@ -1,101 +0,0 @@ -===========================train_params=========================== -model_name:ocr_det -python:python -gpu_list:-1 -Global.use_gpu:False -Global.auto_cast:null -Global.epoch_num:lite_train_lite_infer=1|whole_train_whole_infer=300 -Global.save_model_dir:./output/ -Train.loader.batch_size_per_card:lite_train_lite_infer=2|whole_train_whole_infer=4 -Global.pretrained_model:null -train_model_name:latest -train_infer_img_dir:./train_data/icdar2015/text_localization/ch4_test_images/ -null:null -## -trainer:norm_train|pact_train|fpgm_train -norm_train:tools/train.py -c test_tipc/configs/det_mv3_db.yml -o Global.pretrained_model=./pretrain_models/MobileNetV3_large_x0_5_pretrained -pact_train:deploy/slim/quantization/quant.py -c test_tipc/configs/det_mv3_db.yml -o -fpgm_train:deploy/slim/prune/sensitivity_anal.py -c test_tipc/configs/det_mv3_db.yml -o Global.pretrained_model=./pretrain_models/det_mv3_db_v2.0_train/best_accuracy -distill_train:null -null:null -null:null -## -===========================eval_params=========================== -eval:null -null:null -## -===========================infer_params=========================== -Global.save_inference_dir:./output/ -Global.pretrained_model: -norm_export:tools/export_model.py -c test_tipc/configs/det_mv3_db.yml -o -quant_export:deploy/slim/quantization/export_model.py -c test_tipc/configs/det_mv3_db.yml -o -fpgm_export:deploy/slim/prune/export_prune_model.py -c test_tipc/configs/det_mv3_db.yml -o -distill_export:null -export1:null -export2:null -inference_dir:null -train_model:./inference/ch_ppocr_mobile_v2.0_det_train/best_accuracy -infer_export:tools/export_model.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o -infer_quant:False -inference:tools/infer/predict_det.py ---use_gpu:False ---enable_mkldnn:False ---cpu_threads:1|6 ---rec_batch_num:1 ---use_tensorrt:False ---precision:fp32 ---det_model_dir: ---image_dir:./inference/ch_det_data_50/all-sum-510/ -null:null ---benchmark:True -null:null -===========================cpp_infer_params=========================== -use_opencv:True -infer_model:./inference/ch_ppocr_mobile_v2.0_det_infer/ -infer_quant:False -inference:./deploy/cpp_infer/build/ppocr det ---use_gpu:False ---enable_mkldnn:False ---cpu_threads:1|6 ---rec_batch_num:1 ---use_tensorrt:False ---precision:fp32 ---det_model_dir: ---image_dir:./inference/ch_det_data_50/all-sum-510/ -null:null ---benchmark:True -===========================serving_params=========================== -model_name:ocr_det -python:python3.7 -trans_model:-m paddle_serving_client.convert ---dirname:./inference/ch_ppocr_mobile_v2.0_det_infer/ ---model_filename:inference.pdmodel ---params_filename:inference.pdiparams ---serving_server:./deploy/pdserving/ppocr_det_mobile_2.0_serving/ ---serving_client:./deploy/pdserving/ppocr_det_mobile_2.0_client/ -serving_dir:./deploy/pdserving -web_service:web_service_det.py --config=config.yml --opt op.det.concurrency=1 -op.det.local_service_conf.devices:null|0 -op.det.local_service_conf.use_mkldnn:True|False -op.det.local_service_conf.thread_num:1|6 -op.det.local_service_conf.use_trt:False|True -op.det.local_service_conf.precision:fp32|fp16|int8 -pipline:pipeline_http_client.py|pipeline_rpc_client.py ---image_dir=../../doc/imgs -===========================kl_quant_params=========================== -infer_model:./inference/ch_ppocr_mobile_v2.0_det_infer/ -infer_export:tools/export_model.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o -infer_quant:True -inference:tools/infer/predict_det.py ---use_gpu:False ---enable_mkldnn:False ---cpu_threads:1|6 ---rec_batch_num:1 ---use_tensorrt:False ---precision:int8 ---det_model_dir: ---image_dir:./inference/ch_det_data_50/all-sum-510/ -null:null ---benchmark:True -null:null -null:null diff --git a/test_tipc/configs/ppocr_det_mobile/train_linux_gpu_normal_normal_infer_python_windows.txt b/test_tipc/configs/ppocr_det_mobile/train_linux_gpu_normal_normal_infer_python_windows.txt deleted file mode 100644 index 0f4faee4b32925b4d0780ece6838c176238c7000..0000000000000000000000000000000000000000 --- a/test_tipc/configs/ppocr_det_mobile/train_linux_gpu_normal_normal_infer_python_windows.txt +++ /dev/null @@ -1,111 +0,0 @@ -===========================train_params=========================== -model_name:ocr_det -python:python -gpu_list:0 -Global.use_gpu:True -Global.auto_cast:fp32|amp -Global.epoch_num:lite_train_lite_infer=1|whole_train_whole_infer=300 -Global.save_model_dir:./output/ -Train.loader.batch_size_per_card:lite_train_lite_infer=2|whole_train_whole_infer=4 -Global.pretrained_model:null -train_model_name:latest -train_infer_img_dir:./train_data/icdar2015/text_localization/ch4_test_images/ -null:null -## -trainer:norm_train|pact_train|fpgm_train -norm_train:tools/train.py -c test_tipc/configs/det_mv3_db.yml -o Global.pretrained_model=./pretrain_models/MobileNetV3_large_x0_5_pretrained -pact_train:deploy/slim/quantization/quant.py -c test_tipc/configs/det_mv3_db.yml -o -fpgm_train:deploy/slim/prune/sensitivity_anal.py -c test_tipc/configs/det_mv3_db.yml -o Global.pretrained_model=./pretrain_models/det_mv3_db_v2.0_train/best_accuracy -distill_train:null -null:null -null:null -## -===========================eval_params=========================== -eval:null -null:null -## -===========================infer_params=========================== -Global.save_inference_dir:./output/ -Global.pretrained_model: -norm_export:tools/export_model.py -c test_tipc/configs/det_mv3_db.yml -o -quant_export:deploy/slim/quantization/export_model.py -c test_tipc/configs/det_mv3_db.yml -o -fpgm_export:deploy/slim/prune/export_prune_model.py -c test_tipc/configs/det_mv3_db.yml -o -distill_export:null -export1:null -export2:null -inference_dir:null -train_model:./inference/ch_ppocr_mobile_v2.0_det_train/best_accuracy -infer_export:tools/export_model.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o -infer_quant:False -inference:tools/infer/predict_det.py ---use_gpu:True|False ---enable_mkldnn:True|False ---cpu_threads:1|6 ---rec_batch_num:1 ---use_tensorrt:False|True ---precision:fp32|fp16|int8 ---det_model_dir: ---image_dir:./inference/ch_det_data_50/all-sum-510/ -null:null ---benchmark:True -null:null -===========================cpp_infer_params=========================== -use_opencv:True -infer_model:./inference/ch_ppocr_mobile_v2.0_det_infer/ -infer_quant:False -inference:./deploy/cpp_infer/build/ppocr det ---use_gpu:True|False ---enable_mkldnn:True|False ---cpu_threads:1|6 ---rec_batch_num:1 ---use_tensorrt:False|True ---precision:fp32|fp16 ---det_model_dir: ---image_dir:./inference/ch_det_data_50/all-sum-510/ -null:null ---benchmark:True -===========================serving_params=========================== -model_name:ocr_det -python:python3.7 -trans_model:-m paddle_serving_client.convert ---dirname:./inference/ch_ppocr_mobile_v2.0_det_infer/ ---model_filename:inference.pdmodel ---params_filename:inference.pdiparams ---serving_server:./deploy/pdserving/ppocr_det_mobile_2.0_serving/ ---serving_client:./deploy/pdserving/ppocr_det_mobile_2.0_client/ -serving_dir:./deploy/pdserving -web_service:web_service_det.py --config=config.yml --opt op.det.concurrency=1 -op.det.local_service_conf.devices:null|0 -op.det.local_service_conf.use_mkldnn:True|False -op.det.local_service_conf.thread_num:1|6 -op.det.local_service_conf.use_trt:False|True -op.det.local_service_conf.precision:fp32|fp16|int8 -pipline:pipeline_http_client.py|pipeline_rpc_client.py ---image_dir=../../doc/imgs -===========================kl_quant_params=========================== -infer_model:./inference/ch_ppocr_mobile_v2.0_det_infer/ -infer_export:tools/export_model.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o -infer_quant:True -inference:tools/infer/predict_det.py ---use_gpu:True|False ---enable_mkldnn:True|False ---cpu_threads:1|6 ---rec_batch_num:1 ---use_tensorrt:False|True ---precision:int8 ---det_model_dir: ---image_dir:./inference/ch_det_data_50/all-sum-510/ -null:null ---benchmark:True -null:null -null:null -===========================lite_params=========================== -inference:./ocr_db_crnn det -infer_model:./models/ch_ppocr_mobile_v2.0_det_opt.nb|./models/ch_ppocr_mobile_v2.0_det_slim_opt.nb ---cpu_threads:1|4 ---batch_size:1 ---power_mode:LITE_POWER_HIGH|LITE_POWER_LOW ---image_dir:./test_data/icdar2015_lite/text_localization/ch4_test_images/|./test_data/icdar2015_lite/text_localization/ch4_test_images/img_233.jpg ---config_dir:./config.txt ---rec_dict_dir:./ppocr_keys_v1.txt ---benchmark:True diff --git a/test_tipc/configs/rec_mtb_nrtr/rec_mtb_nrtr.yml b/test_tipc/configs/rec_mtb_nrtr/rec_mtb_nrtr.yml new file mode 100644 index 0000000000000000000000000000000000000000..15119bb2a9de02c19684d21ad5a1859db94895ce --- /dev/null +++ b/test_tipc/configs/rec_mtb_nrtr/rec_mtb_nrtr.yml @@ -0,0 +1,103 @@ +Global: + use_gpu: True + epoch_num: 21 + log_smooth_window: 20 + print_batch_step: 10 + save_model_dir: ./output/rec/nrtr/ + save_epoch_step: 1 + # evaluation is run every 2000 iterations + eval_batch_step: [0, 2000] + cal_metric_during_train: True + pretrained_model: + checkpoints: + save_inference_dir: + use_visualdl: False + infer_img: doc/imgs_words_en/word_10.png + # for data or label process + character_dict_path: ppocr/utils/EN_symbol_dict.txt + max_text_length: 25 + infer_mode: False + use_space_char: False + save_res_path: ./output/rec/predicts_nrtr.txt + +Optimizer: + name: Adam + beta1: 0.9 + beta2: 0.99 + clip_norm: 5.0 + lr: + name: Cosine + learning_rate: 0.0005 + warmup_epoch: 2 + regularizer: + name: 'L2' + factor: 0. + +Architecture: + model_type: rec + algorithm: NRTR + in_channels: 1 + Transform: + Backbone: + name: MTB + cnn_num: 2 + Head: + name: Transformer + d_model: 512 + num_encoder_layers: 6 + beam_size: -1 # When Beam size is greater than 0, it means to use beam search when evaluation. + + +Loss: + name: NRTRLoss + smoothing: True + +PostProcess: + name: NRTRLabelDecode + +Metric: + name: RecMetric + main_indicator: acc + +Train: + dataset: + name: SimpleDataSet + data_dir: ./train_data/ic15_data/ + label_file_list: ["./train_data/ic15_data/rec_gt_train.txt"] + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - NRTRLabelEncode: # Class handling label + - NRTRRecResizeImg: + image_shape: [100, 32] + resize_type: PIL # PIL or OpenCV + - KeepKeys: + keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order + loader: + shuffle: True + batch_size_per_card: 512 + drop_last: True + num_workers: 8 + +Eval: + dataset: + name: SimpleDataSet + data_dir: ./train_data/ic15_data + label_file_list: ["./train_data/ic15_data/rec_gt_test.txt"] + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - NRTRLabelEncode: # Class handling label + - NRTRRecResizeImg: + image_shape: [100, 32] + resize_type: PIL # PIL or OpenCV + - KeepKeys: + keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order + loader: + shuffle: False + drop_last: False + batch_size_per_card: 256 + num_workers: 1 + use_shared_memory: False diff --git a/test_tipc/configs/rec_mtb_nrtr/train_infer_python.txt b/test_tipc/configs/rec_mtb_nrtr/train_infer_python.txt new file mode 100644 index 0000000000000000000000000000000000000000..67630d858c7633daf8e1800b1ab10adb86e6c3bc --- /dev/null +++ b/test_tipc/configs/rec_mtb_nrtr/train_infer_python.txt @@ -0,0 +1,52 @@ +===========================train_params=========================== +model_name:rec_mtb_nrtr +python:python3.7 +gpu_list:0|0,1 +Global.use_gpu:True|True +Global.auto_cast:null +Global.epoch_num:lite_train_lite_infer=2|whole_train_whole_infer=300 +Global.save_model_dir:./output/ +Train.loader.batch_size_per_card:lite_train_lite_infer=128|whole_train_whole_infer=128 +Global.pretrained_model:null +train_model_name:latest +train_infer_img_dir:./inference/rec_inference +null:null +## +trainer:norm_train +norm_train:tools/train.py -c test_tipc/configs/rec_mtb_nrtr/rec_mtb_nrtr.yml -o +pact_train:null +fpgm_train:null +distill_train:null +null:null +null:null +## +===========================eval_params=========================== +eval:tools/eval.py -c test_tipc/configs/rec_mtb_nrtr/rec_mtb_nrtr.yml -o +null:null +## +===========================infer_params=========================== +Global.save_inference_dir:./output/ +Global.pretrained_model: +norm_export:tools/export_model.py -c test_tipc/configs/rec_mtb_nrtr/rec_mtb_nrtr.yml -o +quant_export:null +fpgm_export:null +distill_export:null +export1:null +export2:null +## +infer_model:null +infer_export:tools/export_model.py -c test_tipc/configs/rec_mtb_nrtr/rec_mtb_nrtr.yml -o +infer_quant:False +inference:tools/infer/predict_rec.py --rec_char_dict_path=./ppocr/utils/EN_symbol_dict.txt --rec_image_shape="1,32,100" --rec_algorithm="NRTR" +--use_gpu:True|False +--enable_mkldnn:True|False +--cpu_threads:1|6 +--rec_batch_num:1|6 +--use_tensorrt:True|False +--precision:fp32|int8 +--rec_model_dir: +--image_dir:./inference/rec_inference +--save_log_path:./test/output/ +--benchmark:True +null:null + diff --git a/test_tipc/configs/rec_mv3_none_bilstm_ctc_v2.0/rec_icdar15_train.yml b/test_tipc/configs/rec_mv3_none_bilstm_ctc_v2.0/rec_icdar15_train.yml new file mode 100644 index 0000000000000000000000000000000000000000..463e8d1d7d7f43ba7b48810e2a2e8552eb5e4fe3 --- /dev/null +++ b/test_tipc/configs/rec_mv3_none_bilstm_ctc_v2.0/rec_icdar15_train.yml @@ -0,0 +1,97 @@ +Global: + use_gpu: True + epoch_num: 72 + log_smooth_window: 20 + print_batch_step: 10 + save_model_dir: ./output/rec/mv3_none_bilstm_ctc/ + save_epoch_step: 3 + # evaluation is run every 2000 iterations + eval_batch_step: [0, 2000] + cal_metric_during_train: True + pretrained_model: + checkpoints: + save_inference_dir: + use_visualdl: False + infer_img: doc/imgs_words_en/word_10.png + # for data or label process + character_dict_path: + max_text_length: 25 + infer_mode: False + use_space_char: False + save_res_path: ./output/rec/predicts_mv3_none_bilstm_ctc.txt + +Optimizer: + name: Adam + beta1: 0.9 + beta2: 0.999 + lr: + learning_rate: 0.0005 + regularizer: + name: 'L2' + factor: 0 + +Architecture: + model_type: rec + algorithm: CRNN + Transform: + Backbone: + name: MobileNetV3 + scale: 0.5 + model_name: large + Neck: + name: SequenceEncoder + encoder_type: rnn + hidden_size: 96 + Head: + name: CTCHead + fc_decay: 0 + +Loss: + name: CTCLoss + +PostProcess: + name: CTCLabelDecode + +Metric: + name: RecMetric + main_indicator: acc + +Train: + dataset: + name: SimpleDataSet + data_dir: ./train_data/ic15_data/ + label_file_list: ["./train_data/ic15_data/rec_gt_train.txt"] + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - CTCLabelEncode: # Class handling label + - RecResizeImg: + image_shape: [3, 32, 100] + - KeepKeys: + keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order + loader: + shuffle: False + batch_size_per_card: 256 + drop_last: True + num_workers: 8 + +Eval: + dataset: + name: SimpleDataSet + data_dir: ./train_data/ic15_data + label_file_list: ["./train_data/ic15_data/rec_gt_test.txt"] + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - CTCLabelEncode: # Class handling label + - RecResizeImg: + image_shape: [3, 32, 100] + - KeepKeys: + keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order + loader: + shuffle: False + drop_last: False + batch_size_per_card: 256 + num_workers: 4 diff --git a/test_tipc/configs/rec_mv3_none_bilstm_ctc_v2.0/train_infer_python.txt b/test_tipc/configs/rec_mv3_none_bilstm_ctc_v2.0/train_infer_python.txt new file mode 100644 index 0000000000000000000000000000000000000000..fdc39f1b851a4e05735744f878917a3dfcc1d405 --- /dev/null +++ b/test_tipc/configs/rec_mv3_none_bilstm_ctc_v2.0/train_infer_python.txt @@ -0,0 +1,51 @@ +===========================train_params=========================== +model_name:rec_mv3_none_bilstm_ctc_v2.0 +python:python3.7 +gpu_list:0|0,1 +Global.use_gpu:True|True +Global.auto_cast:null +Global.epoch_num:lite_train_lite_infer=5|whole_train_whole_infer=100 +Global.save_model_dir:./output/ +Train.loader.batch_size_per_card:lite_train_lite_infer=128|whole_train_whole_infer=128 +Global.pretrained_model:null +train_model_name:latest +train_infer_img_dir:./inference/rec_inference +null:null +## +trainer:norm_train +norm_train:tools/train.py -c test_tipc/configs/rec_mv3_none_bilstm_ctc_v2.0/rec_icdar15_train.yml -o +pact_train:null +fpgm_train:null +distill_train:null +null:null +null:null +## +===========================eval_params=========================== +eval:tools/eval.py -c test_tipc/configs/rec_mv3_none_bilstm_ctc_v2.0/rec_icdar15_train.yml -o +null:null +## +===========================infer_params=========================== +Global.save_inference_dir:./output/ +Global.pretrained_model: +norm_export:tools/export_model.py -c test_tipc/configs/rec_mv3_none_bilstm_ctc_v2.0/rec_icdar15_train.yml -o +quant_export:null +fpgm_export:null +distill_export:null +export1:null +export2:null +## +infer_model:null +infer_export:tools/export_model.py -c test_tipc/configs/rec_mv3_none_bilstm_ctc_v2.0/rec_icdar15_train.yml -o +infer_quant:False +inference:tools/infer/predict_rec.py --rec_char_dict_path=./ppocr/utils/ic15_dict.txt --rec_image_shape="3,32,100" +--use_gpu:True|False +--enable_mkldnn:True|False +--cpu_threads:1|6 +--rec_batch_num:1|6 +--use_tensorrt:True|False +--precision:fp32|int8 +--rec_model_dir: +--image_dir:./inference/rec_inference +--save_log_path:./test/output/ +--benchmark:True +null:null diff --git a/test_tipc/configs/rec_mv3_none_none_ctc_v2.0/rec_icdar15_train.yml b/test_tipc/configs/rec_mv3_none_none_ctc_v2.0/rec_icdar15_train.yml new file mode 100644 index 0000000000000000000000000000000000000000..06a2b71efac771f6c765e07f5e8815dabfd76c81 --- /dev/null +++ b/test_tipc/configs/rec_mv3_none_none_ctc_v2.0/rec_icdar15_train.yml @@ -0,0 +1,96 @@ +Global: + use_gpu: True + epoch_num: 72 + log_smooth_window: 20 + print_batch_step: 10 + save_model_dir: ./output/rec/mv3_none_none_ctc/ + save_epoch_step: 3 + # evaluation is run every 2000 iterations + eval_batch_step: [0, 2000] + cal_metric_during_train: True + pretrained_model: + checkpoints: + save_inference_dir: + use_visualdl: False + infer_img: doc/imgs_words_en/word_10.png + # for data or label process + character_dict_path: + max_text_length: 25 + infer_mode: False + use_space_char: False + save_res_path: ./output/rec/predicts_mv3_none_none_ctc.txt + +Optimizer: + name: Adam + beta1: 0.9 + beta2: 0.999 + lr: + learning_rate: 0.0005 + regularizer: + name: 'L2' + factor: 0 + +Architecture: + model_type: rec + algorithm: Rosetta + Transform: + Backbone: + name: MobileNetV3 + scale: 0.5 + model_name: large + Neck: + name: SequenceEncoder + encoder_type: reshape + Head: + name: CTCHead + fc_decay: 0.0004 + +Loss: + name: CTCLoss + +PostProcess: + name: CTCLabelDecode + +Metric: + name: RecMetric + main_indicator: acc + +Train: + dataset: + name: SimpleDataSet + data_dir: ./train_data/ic15_data/ + label_file_list: ["./train_data/ic15_data/rec_gt_train.txt"] + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - CTCLabelEncode: # Class handling label + - RecResizeImg: + image_shape: [3, 32, 100] + - KeepKeys: + keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order + loader: + shuffle: False + batch_size_per_card: 256 + drop_last: True + num_workers: 8 + +Eval: + dataset: + name: SimpleDataSet + data_dir: ./train_data/ic15_data + label_file_list: ["./train_data/ic15_data/rec_gt_test.txt"] + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - CTCLabelEncode: # Class handling label + - RecResizeImg: + image_shape: [3, 32, 100] + - KeepKeys: + keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order + loader: + shuffle: False + drop_last: False + batch_size_per_card: 256 + num_workers: 8 diff --git a/test_tipc/configs/rec_mv3_none_none_ctc_v2.0/train_infer_python.txt b/test_tipc/configs/rec_mv3_none_none_ctc_v2.0/train_infer_python.txt new file mode 100644 index 0000000000000000000000000000000000000000..9810689679903b4cedff253834a1a999c4e8a5f8 --- /dev/null +++ b/test_tipc/configs/rec_mv3_none_none_ctc_v2.0/train_infer_python.txt @@ -0,0 +1,51 @@ +===========================train_params=========================== +model_name:rec_mv3_none_none_ctc_v2.0 +python:python3.7 +gpu_list:0|0,1 +Global.use_gpu:True|True +Global.auto_cast:null +Global.epoch_num:lite_train_lite_infer=5|whole_train_whole_infer=100 +Global.save_model_dir:./output/ +Train.loader.batch_size_per_card:lite_train_lite_infer=128|whole_train_whole_infer=128 +Global.pretrained_model:null +train_model_name:latest +train_infer_img_dir:./inference/rec_inference +null:null +## +trainer:norm_train +norm_train:tools/train.py -c test_tipc/configs/rec_mv3_none_none_ctc_v2.0/rec_icdar15_train.yml -o +pact_train:null +fpgm_train:null +distill_train:null +null:null +null:null +## +===========================eval_params=========================== +eval:tools/eval.py -c test_tipc/configs/rec_mv3_none_none_ctc_v2.0/rec_icdar15_train.yml -o +null:null +## +===========================infer_params=========================== +Global.save_inference_dir:./output/ +Global.pretrained_model: +norm_export:tools/export_model.py -c test_tipc/configs/rec_mv3_none_none_ctc_v2.0/rec_icdar15_train.yml -o +quant_export:null +fpgm_export:null +distill_export:null +export1:null +export2:null +## +infer_model:null +infer_export:tools/export_model.py -c test_tipc/configs/rec_mv3_none_none_ctc_v2.0/rec_icdar15_train.yml -o +infer_quant:False +inference:tools/infer/predict_rec.py --rec_char_dict_path=./ppocr/utils/ic15_dict.txt --rec_image_shape="3,32,100" +--use_gpu:True|False +--enable_mkldnn:True|False +--cpu_threads:1|6 +--rec_batch_num:1|6 +--use_tensorrt:True|False +--precision:fp32|int8 +--rec_model_dir: +--image_dir:./inference/rec_inference +--save_log_path:./test/output/ +--benchmark:True +null:null diff --git a/test_tipc/configs/rec_mv3_tps_bilstm_att_v2.0/rec_mv3_tps_bilstm_att.yml b/test_tipc/configs/rec_mv3_tps_bilstm_att_v2.0/rec_mv3_tps_bilstm_att.yml new file mode 100644 index 0000000000000000000000000000000000000000..2b14c047d4645104fb9532a1b391072dc341f3b7 --- /dev/null +++ b/test_tipc/configs/rec_mv3_tps_bilstm_att_v2.0/rec_mv3_tps_bilstm_att.yml @@ -0,0 +1,103 @@ +Global: + use_gpu: True + epoch_num: 72 + log_smooth_window: 20 + print_batch_step: 10 + save_model_dir: ./output/rec/rec_mv3_tps_bilstm_att/ + save_epoch_step: 3 + # evaluation is run every 5000 iterations after the 4000th iteration + eval_batch_step: [0, 2000] + cal_metric_during_train: True + pretrained_model: + checkpoints: + save_inference_dir: + use_visualdl: False + infer_img: doc/imgs_words/ch/word_1.jpg + # for data or label process + character_dict_path: + max_text_length: 25 + infer_mode: False + use_space_char: False + save_res_path: ./output/rec/predicts_mv3_tps_bilstm_att.txt + + +Optimizer: + name: Adam + beta1: 0.9 + beta2: 0.999 + lr: + learning_rate: 0.0005 + regularizer: + name: 'L2' + factor: 0.00001 + +Architecture: + model_type: rec + algorithm: RARE + Transform: + name: TPS + num_fiducial: 20 + loc_lr: 0.1 + model_name: small + Backbone: + name: MobileNetV3 + scale: 0.5 + model_name: large + Neck: + name: SequenceEncoder + encoder_type: rnn + hidden_size: 96 + Head: + name: AttentionHead + hidden_size: 96 + + +Loss: + name: AttentionLoss + +PostProcess: + name: AttnLabelDecode + +Metric: + name: RecMetric + main_indicator: acc + +Train: + dataset: + name: SimpleDataSet + data_dir: ./train_data/ic15_data/ + label_file_list: ["./train_data/ic15_data/rec_gt_train.txt"] + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - AttnLabelEncode: # Class handling label + - RecResizeImg: + image_shape: [3, 32, 100] + - KeepKeys: + keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order + loader: + shuffle: True + batch_size_per_card: 256 + drop_last: True + num_workers: 8 + +Eval: + dataset: + name: SimpleDataSet + data_dir: ./train_data/ic15_data + label_file_list: ["./train_data/ic15_data/rec_gt_test.txt"] + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - AttnLabelEncode: # Class handling label + - RecResizeImg: + image_shape: [3, 32, 100] + - KeepKeys: + keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order + loader: + shuffle: False + drop_last: False + batch_size_per_card: 256 + num_workers: 1 diff --git a/test_tipc/configs/rec_mv3_tps_bilstm_att_v2.0/train_infer_python.txt b/test_tipc/configs/rec_mv3_tps_bilstm_att_v2.0/train_infer_python.txt new file mode 100644 index 0000000000000000000000000000000000000000..3791aa17b2b5a16565ab3456932e43fd77254472 --- /dev/null +++ b/test_tipc/configs/rec_mv3_tps_bilstm_att_v2.0/train_infer_python.txt @@ -0,0 +1,52 @@ +===========================train_params=========================== +model_name:rec_mv3_tps_bilstm_att_v2.0 +python:python3.7 +gpu_list:0|0,1 +Global.use_gpu:True|True +Global.auto_cast:null +Global.epoch_num:lite_train_lite_infer=2|whole_train_whole_infer=300 +Global.save_model_dir:./output/ +Train.loader.batch_size_per_card:lite_train_lite_infer=128|whole_train_whole_infer=128 +Global.pretrained_model:null +train_model_name:latest +train_infer_img_dir:./inference/rec_inference +null:null +## +trainer:norm_train +norm_train:tools/train.py -c test_tipc/configs/rec_mv3_tps_bilstm_att_v2.0/rec_mv3_tps_bilstm_att.yml -o +pact_train:null +fpgm_train:null +distill_train:null +null:null +null:null +## +===========================eval_params=========================== +eval:tools/eval.py -c test_tipc/configs/rec_mv3_tps_bilstm_att_v2.0/rec_mv3_tps_bilstm_att.yml -o +null:null +## +===========================infer_params=========================== +Global.save_inference_dir:./output/ +Global.pretrained_model: +norm_export:tools/export_model.py -c test_tipc/configs/rec_mv3_tps_bilstm_att_v2.0/rec_mv3_tps_bilstm_att.yml -o +quant_export:null +fpgm_export:null +distill_export:null +export1:null +export2:null +## +infer_model:null +infer_export:tools/export_model.py -c test_tipc/configs/rec_mv3_tps_bilstm_att_v2.0/rec_mv3_tps_bilstm_att.yml -o +infer_quant:False +inference:tools/infer/predict_rec.py --rec_char_dict_path=./ppocr/utils/ic15_dict.txt --rec_image_shape="3,32,100" --rec_algorithm="RARE" +--use_gpu:True|False +--enable_mkldnn:True|False +--cpu_threads:1|6 +--rec_batch_num:1|6 +--use_tensorrt:True|False +--precision:fp32|int8 +--rec_model_dir: +--image_dir:./inference/rec_inference +--save_log_path:./test/output/ +--benchmark:True +null:null + diff --git a/test_tipc/configs/rec_mv3_tps_bilstm_ctc_v2.0/rec_icdar15_train.yml b/test_tipc/configs/rec_mv3_tps_bilstm_ctc_v2.0/rec_icdar15_train.yml new file mode 100644 index 0000000000000000000000000000000000000000..e20ea00f335bbb7996689c6836c5cc2f355bcbf1 --- /dev/null +++ b/test_tipc/configs/rec_mv3_tps_bilstm_ctc_v2.0/rec_icdar15_train.yml @@ -0,0 +1,101 @@ +Global: + use_gpu: True + epoch_num: 72 + log_smooth_window: 20 + print_batch_step: 10 + save_model_dir: ./output/rec/mv3_tps_bilstm_ctc/ + save_epoch_step: 3 + # evaluation is run every 2000 iterations + eval_batch_step: [0, 2000] + cal_metric_during_train: True + pretrained_model: + checkpoints: + save_inference_dir: + use_visualdl: False + infer_img: doc/imgs_words_en/word_10.png + # for data or label process + character_dict_path: + max_text_length: 25 + infer_mode: False + use_space_char: False + save_res_path: ./output/rec/predicts_mv3_tps_bilstm_ctc.txt + +Optimizer: + name: Adam + beta1: 0.9 + beta2: 0.999 + lr: + learning_rate: 0.0005 + regularizer: + name: 'L2' + factor: 0 + +Architecture: + model_type: rec + algorithm: STARNet + Transform: + name: TPS + num_fiducial: 20 + loc_lr: 0.1 + model_name: small + Backbone: + name: MobileNetV3 + scale: 0.5 + model_name: large + Neck: + name: SequenceEncoder + encoder_type: rnn + hidden_size: 96 + Head: + name: CTCHead + fc_decay: 0.0004 + +Loss: + name: CTCLoss + +PostProcess: + name: CTCLabelDecode + +Metric: + name: RecMetric + main_indicator: acc + +Train: + dataset: + name: SimpleDataSet + data_dir: ./train_data/ic15_data/ + label_file_list: ["./train_data/ic15_data/rec_gt_train.txt"] + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - CTCLabelEncode: # Class handling label + - RecResizeImg: + image_shape: [3, 32, 100] + - KeepKeys: + keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order + loader: + shuffle: False + batch_size_per_card: 256 + drop_last: True + num_workers: 8 + +Eval: + dataset: + name: SimpleDataSet + data_dir: ./train_data/ic15_data + label_file_list: ["./train_data/ic15_data/rec_gt_test.txt"] + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - CTCLabelEncode: # Class handling label + - RecResizeImg: + image_shape: [3, 32, 100] + - KeepKeys: + keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order + loader: + shuffle: False + drop_last: False + batch_size_per_card: 256 + num_workers: 4 diff --git a/test_tipc/configs/rec_mv3_tps_bilstm_ctc_v2.0/train_infer_python.txt b/test_tipc/configs/rec_mv3_tps_bilstm_ctc_v2.0/train_infer_python.txt new file mode 100644 index 0000000000000000000000000000000000000000..33700ad696394ad9404a5424cddf93608220917a --- /dev/null +++ b/test_tipc/configs/rec_mv3_tps_bilstm_ctc_v2.0/train_infer_python.txt @@ -0,0 +1,51 @@ +===========================train_params=========================== +model_name:rec_mv3_tps_bilstm_ctc_v2.0 +python:python3.7 +gpu_list:0|0,1 +Global.use_gpu:True|True +Global.auto_cast:null +Global.epoch_num:lite_train_lite_infer=5|whole_train_whole_infer=100 +Global.save_model_dir:./output/ +Train.loader.batch_size_per_card:lite_train_lite_infer=128|whole_train_whole_infer=128 +Global.pretrained_model:null +train_model_name:latest +train_infer_img_dir:./inference/rec_inference +null:null +## +trainer:norm_train +norm_train:tools/train.py -c test_tipc/configs/rec_mv3_tps_bilstm_ctc_v2.0/rec_icdar15_train.yml -o +pact_train:null +fpgm_train:null +distill_train:null +null:null +null:null +## +===========================eval_params=========================== +eval:tools/eval.py -c test_tipc/configs/rec_mv3_tps_bilstm_ctc_v2.0/rec_icdar15_train.yml -o +null:null +## +===========================infer_params=========================== +Global.save_inference_dir:./output/ +Global.pretrained_model: +norm_export:tools/export_model.py -c test_tipc/configs/rec_mv3_tps_bilstm_ctc_v2.0/rec_icdar15_train.yml -o +quant_export:null +fpgm_export:null +distill_export:null +export1:null +export2:null +## +infer_model:null +infer_export:tools/export_model.py -c test_tipc/configs/rec_mv3_tps_bilstm_ctc_v2.0/rec_icdar15_train.yml -o +infer_quant:False +inference:tools/infer/predict_rec.py --rec_char_dict_path=./ppocr/utils/ic15_dict.txt --rec_image_shape="3,32,100" +--use_gpu:True|False +--enable_mkldnn:True|False +--cpu_threads:1|6 +--rec_batch_num:1|6 +--use_tensorrt:True|False +--precision:fp32|int8 +--rec_model_dir: +--image_dir:./inference/rec_inference +--save_log_path:./test/output/ +--benchmark:True +null:null diff --git a/test_tipc/configs/rec_r31_sar/rec_r31_sar.yml b/test_tipc/configs/rec_r31_sar/rec_r31_sar.yml new file mode 100644 index 0000000000000000000000000000000000000000..36bc3c5d12c55de574507cd613da772bbe0d2ced --- /dev/null +++ b/test_tipc/configs/rec_r31_sar/rec_r31_sar.yml @@ -0,0 +1,98 @@ +Global: + use_gpu: true + epoch_num: 5 + log_smooth_window: 20 + print_batch_step: 20 + save_model_dir: ./sar_rec + save_epoch_step: 1 + # evaluation is run every 2000 iterations + eval_batch_step: [0, 2000] + cal_metric_during_train: True + pretrained_model: + checkpoints: + save_inference_dir: + use_visualdl: False + infer_img: + # for data or label process + character_dict_path: ppocr/utils/dict90.txt + max_text_length: 30 + infer_mode: False + use_space_char: False + rm_symbol: True + save_res_path: ./output/rec/predicts_sar.txt + +Optimizer: + name: Adam + beta1: 0.9 + beta2: 0.999 + lr: + name: Piecewise + decay_epochs: [3, 4] + values: [0.001, 0.0001, 0.00001] + regularizer: + name: 'L2' + factor: 0 + +Architecture: + model_type: rec + algorithm: SAR + Transform: + Backbone: + name: ResNet31 + Head: + name: SARHead + +Loss: + name: SARLoss + +PostProcess: + name: SARLabelDecode + +Metric: + name: RecMetric + + +Train: + dataset: + name: SimpleDataSet + data_dir: ./train_data/ic15_data/ + label_file_list: ["./train_data/ic15_data/rec_gt_train.txt"] + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - SARLabelEncode: # Class handling label + - SARRecResizeImg: + image_shape: [3, 48, 48, 160] # h:48 w:[48,160] + width_downsample_ratio: 0.25 + - KeepKeys: + keep_keys: ['image', 'label', 'valid_ratio'] # dataloader will return list in this order + loader: + shuffle: True + batch_size_per_card: 64 + drop_last: True + num_workers: 8 + use_shared_memory: False + +Eval: + dataset: + name: SimpleDataSet + data_dir: ./train_data/ic15_data + label_file_list: ["./train_data/ic15_data/rec_gt_test.txt"] + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - SARLabelEncode: # Class handling label + - SARRecResizeImg: + image_shape: [3, 48, 48, 160] + width_downsample_ratio: 0.25 + - KeepKeys: + keep_keys: ['image', 'label', 'valid_ratio'] # dataloader will return list in this order + loader: + shuffle: False + drop_last: False + batch_size_per_card: 64 + num_workers: 4 + use_shared_memory: False + diff --git a/test_tipc/configs/rec_r31_sar/train_infer_python.txt b/test_tipc/configs/rec_r31_sar/train_infer_python.txt new file mode 100644 index 0000000000000000000000000000000000000000..5cc31b7b8b793e7c82f6676f1fec9a5e8b2393f4 --- /dev/null +++ b/test_tipc/configs/rec_r31_sar/train_infer_python.txt @@ -0,0 +1,52 @@ +===========================train_params=========================== +model_name:rec_r31_sar +python:python3.7 +gpu_list:0|0,1 +Global.use_gpu:True|True +Global.auto_cast:null +Global.epoch_num:lite_train_lite_infer=2|whole_train_whole_infer=300 +Global.save_model_dir:./output/ +Train.loader.batch_size_per_card:lite_train_lite_infer=128|whole_train_whole_infer=128 +Global.pretrained_model:null +train_model_name:latest +train_infer_img_dir:./inference/rec_inference +null:null +## +trainer:norm_train +norm_train:tools/train.py -c test_tipc/configs/rec_r31_sar/rec_r31_sar.yml -o +pact_train:null +fpgm_train:null +distill_train:null +null:null +null:null +## +===========================eval_params=========================== +eval:tools/eval.py -c test_tipc/configs/rec_r31_sar/rec_r31_sar.yml -o +null:null +## +===========================infer_params=========================== +Global.save_inference_dir:./output/ +Global.pretrained_model: +norm_export:tools/export_model.py -c test_tipc/configs/rec_r31_sar/rec_r31_sar.yml -o +quant_export:null +fpgm_export:null +distill_export:null +export1:null +export2:null +## +infer_model:null +infer_export:tools/export_model.py -c test_tipc/configs/rec_r31_sar/rec_r31_sar.yml -o +infer_quant:False +inference:tools/infer/predict_rec.py --rec_char_dict_path=./ppocr/utils/dict90.txt --rec_image_shape="3,48,48,160" --rec_algorithm="SAR" +--use_gpu:True|False +--enable_mkldnn:True|False +--cpu_threads:1|6 +--rec_batch_num:1|6 +--use_tensorrt:True|False +--precision:fp32|fp16|int8 +--rec_model_dir: +--image_dir:./inference/rec_inference +--save_log_path:./test/output/ +--benchmark:True +null:null + diff --git a/test_tipc/configs/rec_r34_vd_none_bilstm_ctc_v2.0/rec_icdar15_train.yml b/test_tipc/configs/rec_r34_vd_none_bilstm_ctc_v2.0/rec_icdar15_train.yml new file mode 100644 index 0000000000000000000000000000000000000000..3e9ea4865831462e7351b718944bfe8cb0f9812f --- /dev/null +++ b/test_tipc/configs/rec_r34_vd_none_bilstm_ctc_v2.0/rec_icdar15_train.yml @@ -0,0 +1,96 @@ +Global: + use_gpu: true + epoch_num: 72 + log_smooth_window: 20 + print_batch_step: 10 + save_model_dir: ./output/rec/r34_vd_none_bilstm_ctc/ + save_epoch_step: 3 + # evaluation is run every 2000 iterations + eval_batch_step: [0, 2000] + cal_metric_during_train: True + pretrained_model: + checkpoints: + save_inference_dir: + use_visualdl: False + infer_img: doc/imgs_words_en/word_10.png + # for data or label process + character_dict_path: + max_text_length: 25 + infer_mode: False + use_space_char: False + save_res_path: ./output/rec/predicts_r34_vd_none_bilstm_ctc.txt + +Optimizer: + name: Adam + beta1: 0.9 + beta2: 0.999 + lr: + learning_rate: 0.0005 + regularizer: + name: 'L2' + factor: 0 + +Architecture: + model_type: rec + algorithm: CRNN + Transform: + Backbone: + name: ResNet + layers: 34 + Neck: + name: SequenceEncoder + encoder_type: rnn + hidden_size: 256 + Head: + name: CTCHead + fc_decay: 0 + +Loss: + name: CTCLoss + +PostProcess: + name: CTCLabelDecode + +Metric: + name: RecMetric + main_indicator: acc + +Train: + dataset: + name: SimpleDataSet + data_dir: ./train_data/ic15_data/ + label_file_list: ["./train_data/ic15_data/rec_gt_train.txt"] + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - CTCLabelEncode: # Class handling label + - RecResizeImg: + image_shape: [3, 32, 100] + - KeepKeys: + keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order + loader: + shuffle: True + batch_size_per_card: 256 + drop_last: True + num_workers: 8 + +Eval: + dataset: + name: SimpleDataSet + data_dir: ./train_data/ic15_data + label_file_list: ["./train_data/ic15_data/rec_gt_test.txt"] + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - CTCLabelEncode: # Class handling label + - RecResizeImg: + image_shape: [3, 32, 100] + - KeepKeys: + keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order + loader: + shuffle: False + drop_last: False + batch_size_per_card: 256 + num_workers: 4 diff --git a/test_tipc/configs/rec_r34_vd_none_bilstm_ctc_v2.0/train_infer_python.txt b/test_tipc/configs/rec_r34_vd_none_bilstm_ctc_v2.0/train_infer_python.txt new file mode 100644 index 0000000000000000000000000000000000000000..857a212fe6f5e0bd9612b55841e748e6b4409061 --- /dev/null +++ b/test_tipc/configs/rec_r34_vd_none_bilstm_ctc_v2.0/train_infer_python.txt @@ -0,0 +1,51 @@ +===========================train_params=========================== +model_name:rec_r34_vd_none_bilstm_ctc_v2.0 +python:python3.7 +gpu_list:0|0,1 +Global.use_gpu:True|True +Global.auto_cast:null +Global.epoch_num:lite_train_lite_infer=5|whole_train_whole_infer=100 +Global.save_model_dir:./output/ +Train.loader.batch_size_per_card:lite_train_lite_infer=128|whole_train_whole_infer=128 +Global.pretrained_model:null +train_model_name:latest +train_infer_img_dir:./inference/rec_inference +null:null +## +trainer:norm_train +norm_train:tools/train.py -c test_tipc/configs/rec_r34_vd_none_bilstm_ctc_v2.0/rec_icdar15_train.yml -o +pact_train:null +fpgm_train:null +distill_train:null +null:null +null:null +## +===========================eval_params=========================== +eval:tools/eval.py -c test_tipc/configs/rec_r34_vd_none_bilstm_ctc_v2.0/rec_icdar15_train.yml -o +null:null +## +===========================infer_params=========================== +Global.save_inference_dir:./output/ +Global.pretrained_model: +norm_export:tools/export_model.py -c test_tipc/configs/rec_r34_vd_none_bilstm_ctc_v2.0/rec_icdar15_train.yml -o +quant_export:null +fpgm_export:null +distill_export:null +export1:null +export2:null +## +infer_model:null +infer_export:tools/export_model.py -c test_tipc/configs/rec_r34_vd_none_bilstm_ctc_v2.0/rec_icdar15_train.yml -o +infer_quant:False +inference:tools/infer/predict_rec.py --rec_char_dict_path=./ppocr/utils/ic15_dict.txt --rec_image_shape="3,32,100" +--use_gpu:True|False +--enable_mkldnn:True|False +--cpu_threads:1|6 +--rec_batch_num:1|6 +--use_tensorrt:True|False +--precision:fp32|int8 +--rec_model_dir: +--image_dir:./inference/rec_inference +--save_log_path:./test/output/ +--benchmark:True +null:null diff --git a/test_tipc/configs/rec_r34_vd_none_none_ctc_v2.0/rec_icdar15_train.yml b/test_tipc/configs/rec_r34_vd_none_none_ctc_v2.0/rec_icdar15_train.yml new file mode 100644 index 0000000000000000000000000000000000000000..ce15e27753c27ce318e65e17e2aeafd4f480817e --- /dev/null +++ b/test_tipc/configs/rec_r34_vd_none_none_ctc_v2.0/rec_icdar15_train.yml @@ -0,0 +1,94 @@ +Global: + use_gpu: true + epoch_num: 72 + log_smooth_window: 20 + print_batch_step: 10 + save_model_dir: ./output/rec/r34_vd_none_none_ctc/ + save_epoch_step: 3 + # evaluation is run every 2000 iterations + eval_batch_step: [0, 2000] + cal_metric_during_train: True + pretrained_model: + checkpoints: + save_inference_dir: + use_visualdl: False + infer_img: doc/imgs_words_en/word_10.png + # for data or label process + character_dict_path: + max_text_length: 25 + infer_mode: False + use_space_char: False + save_res_path: ./output/rec/predicts_r34_vd_none_none_ctc.txt + +Optimizer: + name: Adam + beta1: 0.9 + beta2: 0.999 + lr: + learning_rate: 0.0005 + regularizer: + name: 'L2' + factor: 0 + +Architecture: + model_type: rec + algorithm: Rosetta + Backbone: + name: ResNet + layers: 34 + Neck: + name: SequenceEncoder + encoder_type: reshape + Head: + name: CTCHead + fc_decay: 0.0004 + +Loss: + name: CTCLoss + +PostProcess: + name: CTCLabelDecode + +Metric: + name: RecMetric + main_indicator: acc + +Train: + dataset: + name: SimpleDataSet + data_dir: ./train_data/ic15_data/ + label_file_list: ["./train_data/ic15_data/rec_gt_train.txt"] + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - CTCLabelEncode: # Class handling label + - RecResizeImg: + image_shape: [3, 32, 100] + - KeepKeys: + keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order + loader: + shuffle: True + batch_size_per_card: 256 + drop_last: True + num_workers: 8 + +Eval: + dataset: + name: SimpleDataSet + data_dir: ./train_data/ic15_data + label_file_list: ["./train_data/ic15_data/rec_gt_test.txt"] + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - CTCLabelEncode: # Class handling label + - RecResizeImg: + image_shape: [3, 32, 100] + - KeepKeys: + keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order + loader: + shuffle: False + drop_last: False + batch_size_per_card: 256 + num_workers: 4 diff --git a/test_tipc/configs/rec_r34_vd_none_none_ctc_v2.0/train_infer_python.txt b/test_tipc/configs/rec_r34_vd_none_none_ctc_v2.0/train_infer_python.txt new file mode 100644 index 0000000000000000000000000000000000000000..85804b7453729dad6d2e87d0efd1a053dd9a0aac --- /dev/null +++ b/test_tipc/configs/rec_r34_vd_none_none_ctc_v2.0/train_infer_python.txt @@ -0,0 +1,51 @@ +===========================train_params=========================== +model_name:rec_r34_vd_none_none_ctc_v2.0 +python:python3.7 +gpu_list:0|0,1 +Global.use_gpu:True|True +Global.auto_cast:null +Global.epoch_num:lite_train_lite_infer=5|whole_train_whole_infer=100 +Global.save_model_dir:./output/ +Train.loader.batch_size_per_card:lite_train_lite_infer=128|whole_train_whole_infer=128 +Global.pretrained_model:null +train_model_name:latest +train_infer_img_dir:./inference/rec_inference +null:null +## +trainer:norm_train +norm_train:tools/train.py -c test_tipc/configs/rec_r34_vd_none_none_ctc_v2.0/rec_icdar15_train.yml -o +pact_train:null +fpgm_train:null +distill_train:null +null:null +null:null +## +===========================eval_params=========================== +eval:tools/eval.py -c test_tipc/configs/rec_r34_vd_none_none_ctc_v2.0/rec_icdar15_train.yml -o +null:null +## +===========================infer_params=========================== +Global.save_inference_dir:./output/ +Global.pretrained_model: +norm_export:tools/export_model.py -c test_tipc/configs/rec_r34_vd_none_none_ctc_v2.0/rec_icdar15_train.yml -o +quant_export:null +fpgm_export:null +distill_export:null +export1:null +export2:null +## +infer_model:null +infer_export:tools/export_model.py -c test_tipc/configs/rec_r34_vd_none_none_ctc_v2.0/rec_icdar15_train.yml -o +infer_quant:False +inference:tools/infer/predict_rec.py --rec_char_dict_path=./ppocr/utils/ic15_dict.txt --rec_image_shape="3,32,100" +--use_gpu:True|False +--enable_mkldnn:True|False +--cpu_threads:1|6 +--rec_batch_num:1|6 +--use_tensorrt:True|False +--precision:fp32|int8 +--rec_model_dir: +--image_dir:./inference/rec_inference +--save_log_path:./test/output/ +--benchmark:True +null:null diff --git a/test_tipc/configs/rec_r34_vd_tps_bilstm_att_v2.0/rec_r34_vd_tps_bilstm_att.yml b/test_tipc/configs/rec_r34_vd_tps_bilstm_att_v2.0/rec_r34_vd_tps_bilstm_att.yml new file mode 100644 index 0000000000000000000000000000000000000000..5dd797b0ec742932ca7f85353b9ea4c5eb637edd --- /dev/null +++ b/test_tipc/configs/rec_r34_vd_tps_bilstm_att_v2.0/rec_r34_vd_tps_bilstm_att.yml @@ -0,0 +1,102 @@ +Global: + use_gpu: True + epoch_num: 400 + log_smooth_window: 20 + print_batch_step: 10 + save_model_dir: ./output/rec/b3_rare_r34_none_gru/ + save_epoch_step: 3 + # evaluation is run every 5000 iterations after the 4000th iteration + eval_batch_step: [0, 2000] + cal_metric_during_train: True + pretrained_model: + checkpoints: + save_inference_dir: + use_visualdl: False + infer_img: doc/imgs_words/ch/word_1.jpg + # for data or label process + character_dict_path: + max_text_length: 25 + infer_mode: False + use_space_char: False + save_res_path: ./output/rec/predicts_b3_rare_r34_none_gru.txt + + +Optimizer: + name: Adam + beta1: 0.9 + beta2: 0.999 + lr: + learning_rate: 0.0005 + regularizer: + name: 'L2' + factor: 0.00000 + +Architecture: + model_type: rec + algorithm: RARE + Transform: + name: TPS + num_fiducial: 20 + loc_lr: 0.1 + model_name: large + Backbone: + name: ResNet + layers: 34 + Neck: + name: SequenceEncoder + encoder_type: rnn + hidden_size: 256 #96 + Head: + name: AttentionHead # AttentionHead + hidden_size: 256 # + l2_decay: 0.00001 + +Loss: + name: AttentionLoss + +PostProcess: + name: AttnLabelDecode + +Metric: + name: RecMetric + main_indicator: acc + +Train: + dataset: + name: SimpleDataSet + data_dir: ./train_data/ic15_data/ + label_file_list: ["./train_data/ic15_data/rec_gt_train.txt"] + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - AttnLabelEncode: # Class handling label + - RecResizeImg: + image_shape: [3, 32, 100] + - KeepKeys: + keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order + loader: + shuffle: True + batch_size_per_card: 256 + drop_last: True + num_workers: 8 + +Eval: + dataset: + name: SimpleDataSet + data_dir: ./train_data/ic15_data + label_file_list: ["./train_data/ic15_data/rec_gt_test.txt"] + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - AttnLabelEncode: # Class handling label + - RecResizeImg: + image_shape: [3, 32, 100] + - KeepKeys: + keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order + loader: + shuffle: False + drop_last: False + batch_size_per_card: 256 + num_workers: 8 diff --git a/test_tipc/configs/rec_r34_vd_tps_bilstm_att_v2.0/train_infer_python.txt b/test_tipc/configs/rec_r34_vd_tps_bilstm_att_v2.0/train_infer_python.txt new file mode 100644 index 0000000000000000000000000000000000000000..e816868f33de7ca8794068e8498f6f7845df0324 --- /dev/null +++ b/test_tipc/configs/rec_r34_vd_tps_bilstm_att_v2.0/train_infer_python.txt @@ -0,0 +1,52 @@ +===========================train_params=========================== +model_name:rec_r34_vd_tps_bilstm_att_v2.0 +python:python3.7 +gpu_list:0|0,1 +Global.use_gpu:True|True +Global.auto_cast:null +Global.epoch_num:lite_train_lite_infer=2|whole_train_whole_infer=300 +Global.save_model_dir:./output/ +Train.loader.batch_size_per_card:lite_train_lite_infer=128|whole_train_whole_infer=128 +Global.pretrained_model:null +train_model_name:latest +train_infer_img_dir:./inference/rec_inference +null:null +## +trainer:norm_train +norm_train:tools/train.py -c test_tipc/configs/rec_r34_vd_tps_bilstm_att_v2.0/rec_r34_vd_tps_bilstm_att.yml -o +pact_train:null +fpgm_train:null +distill_train:null +null:null +null:null +## +===========================eval_params=========================== +eval:tools/eval.py -c test_tipc/configs/rec_r34_vd_tps_bilstm_att_v2.0/rec_r34_vd_tps_bilstm_att.yml -o +null:null +## +===========================infer_params=========================== +Global.save_inference_dir:./output/ +Global.pretrained_model: +norm_export:tools/export_model.py -c test_tipc/configs/rec_r34_vd_tps_bilstm_att_v2.0/rec_r34_vd_tps_bilstm_att.yml -o +quant_export:null +fpgm_export:null +distill_export:null +export1:null +export2:null +## +infer_model:null +infer_export:tools/export_model.py -c test_tipc/configs/rec_r34_vd_tps_bilstm_att_v2.0/rec_r34_vd_tps_bilstm_att.yml -o +infer_quant:False +inference:tools/infer/predict_rec.py --rec_char_dict_path=./ppocr/utils/ic15_dict.txt --rec_image_shape="3,32,100" --rec_algorithm="RARE" +--use_gpu:True|False +--enable_mkldnn:True|False +--cpu_threads:1|6 +--rec_batch_num:1|6 +--use_tensorrt:True|False +--precision:fp32|int8 +--rec_model_dir: +--image_dir:./inference/rec_inference +--save_log_path:./test/output/ +--benchmark:True +null:null + diff --git a/test_tipc/configs/rec_r34_vd_tps_bilstm_ctc_v2.0/rec_icdar15_train.yml b/test_tipc/configs/rec_r34_vd_tps_bilstm_ctc_v2.0/rec_icdar15_train.yml new file mode 100644 index 0000000000000000000000000000000000000000..2dcba81e5be4ade1a344307e3a56d4c41601eba5 --- /dev/null +++ b/test_tipc/configs/rec_r34_vd_tps_bilstm_ctc_v2.0/rec_icdar15_train.yml @@ -0,0 +1,100 @@ +Global: + use_gpu: true + epoch_num: 72 + log_smooth_window: 20 + print_batch_step: 10 + save_model_dir: ./output/rec/r34_vd_tps_bilstm_ctc/ + save_epoch_step: 3 + # evaluation is run every 2000 iterations + eval_batch_step: [0, 2000] + cal_metric_during_train: True + pretrained_model: + checkpoints: + save_inference_dir: + use_visualdl: False + infer_img: doc/imgs_words_en/word_10.png + # for data or label process + character_dict_path: + max_text_length: 25 + infer_mode: False + use_space_char: False + save_res_path: ./output/rec/predicts_r34_vd_tps_bilstm_ctc.txt + +Optimizer: + name: Adam + beta1: 0.9 + beta2: 0.999 + lr: + learning_rate: 0.0005 + regularizer: + name: 'L2' + factor: 0 + +Architecture: + model_type: rec + algorithm: STARNet + Transform: + name: TPS + num_fiducial: 20 + loc_lr: 0.1 + model_name: large + Backbone: + name: ResNet + layers: 34 + Neck: + name: SequenceEncoder + encoder_type: rnn + hidden_size: 256 + Head: + name: CTCHead + fc_decay: 0 + +Loss: + name: CTCLoss + +PostProcess: + name: CTCLabelDecode + +Metric: + name: RecMetric + main_indicator: acc + +Train: + dataset: + name: SimpleDataSet + data_dir: ./train_data/ic15_data/ + label_file_list: ["./train_data/ic15_data/rec_gt_train.txt"] + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - CTCLabelEncode: # Class handling label + - RecResizeImg: + image_shape: [3, 32, 100] + - KeepKeys: + keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order + loader: + shuffle: True + batch_size_per_card: 256 + drop_last: True + num_workers: 8 + +Eval: + dataset: + name: SimpleDataSet + data_dir: ./train_data/ic15_data + label_file_list: ["./train_data/ic15_data/rec_gt_test.txt"] + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - CTCLabelEncode: # Class handling label + - RecResizeImg: + image_shape: [3, 32, 100] + - KeepKeys: + keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order + loader: + shuffle: False + drop_last: False + batch_size_per_card: 256 + num_workers: 4 diff --git a/test_tipc/configs/rec_r34_vd_tps_bilstm_ctc_v2.0/train_infer_python.txt b/test_tipc/configs/rec_r34_vd_tps_bilstm_ctc_v2.0/train_infer_python.txt new file mode 100644 index 0000000000000000000000000000000000000000..bb49ae5977208b2921f4a825b62afa7935f572f1 --- /dev/null +++ b/test_tipc/configs/rec_r34_vd_tps_bilstm_ctc_v2.0/train_infer_python.txt @@ -0,0 +1,51 @@ +===========================train_params=========================== +model_name:rec_r34_vd_tps_bilstm_ctc_v2.0 +python:python3.7 +gpu_list:0|0,1 +Global.use_gpu:True|True +Global.auto_cast:null +Global.epoch_num:lite_train_lite_infer=5|whole_train_whole_infer=100 +Global.save_model_dir:./output/ +Train.loader.batch_size_per_card:lite_train_lite_infer=128|whole_train_whole_infer=128 +Global.pretrained_model:null +train_model_name:latest +train_infer_img_dir:./inference/rec_inference +null:null +## +trainer:norm_train +norm_train:tools/train.py -c test_tipc/configs/rec_r34_vd_tps_bilstm_ctc_v2.0/rec_icdar15_train.yml -o +pact_train:null +fpgm_train:null +distill_train:null +null:null +null:null +## +===========================eval_params=========================== +eval:tools/eval.py -c test_tipc/configs/rec_r34_vd_tps_bilstm_ctc_v2.0/rec_icdar15_train.yml -o +null:null +## +===========================infer_params=========================== +Global.save_inference_dir:./output/ +Global.pretrained_model: +norm_export:tools/export_model.py -c test_tipc/configs/rec_r34_vd_tps_bilstm_ctc_v2.0/rec_icdar15_train.yml -o +quant_export:null +fpgm_export:null +distill_export:null +export1:null +export2:null +## +infer_model:null +infer_export:tools/export_model.py -c test_tipc/configs/rec_r34_vd_tps_bilstm_ctc_v2.0/rec_icdar15_train.yml -o +infer_quant:False +inference:tools/infer/predict_rec.py --rec_char_dict_path=./ppocr/utils/ic15_dict.txt --rec_image_shape="3,32,100" +--use_gpu:True|False +--enable_mkldnn:True|False +--cpu_threads:1|6 +--rec_batch_num:1|6 +--use_tensorrt:True|False +--precision:fp32|int8 +--rec_model_dir: +--image_dir:./inference/rec_inference +--save_log_path:./test/output/ +--benchmark:True +null:null diff --git a/test_tipc/configs/rec_r50_fpn_vd_none_srn/rec_r50_fpn_srn.yml b/test_tipc/configs/rec_r50_fpn_vd_none_srn/rec_r50_fpn_srn.yml new file mode 100644 index 0000000000000000000000000000000000000000..41e525205d2b047934a69a8b41a5e7d776990097 --- /dev/null +++ b/test_tipc/configs/rec_r50_fpn_vd_none_srn/rec_r50_fpn_srn.yml @@ -0,0 +1,108 @@ +Global: + use_gpu: True + epoch_num: 72 + log_smooth_window: 20 + print_batch_step: 5 + save_model_dir: ./output/rec/srn_new + save_epoch_step: 3 + # evaluation is run every 5000 iterations after the 4000th iteration + eval_batch_step: [0, 5000] + cal_metric_during_train: True + pretrained_model: + checkpoints: + save_inference_dir: + use_visualdl: False + infer_img: doc/imgs_words/ch/word_1.jpg + # for data or label process + character_dict_path: + max_text_length: 25 + num_heads: 8 + infer_mode: False + use_space_char: False + save_res_path: ./output/rec/predicts_srn.txt + + +Optimizer: + name: Adam + beta1: 0.9 + beta2: 0.999 + clip_norm: 10.0 + lr: + learning_rate: 0.0001 + +Architecture: + model_type: rec + algorithm: SRN + in_channels: 1 + Transform: + Backbone: + name: ResNetFPN + Head: + name: SRNHead + max_text_length: 25 + num_heads: 8 + num_encoder_TUs: 2 + num_decoder_TUs: 4 + hidden_dims: 512 + +Loss: + name: SRNLoss + +PostProcess: + name: SRNLabelDecode + +Metric: + name: RecMetric + main_indicator: acc + +Train: + dataset: + name: SimpleDataSet + data_dir: ./train_data/ic15_data/ + label_file_list: ["./train_data/ic15_data/rec_gt_train.txt"] + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - SRNLabelEncode: # Class handling label + - SRNRecResizeImg: + image_shape: [1, 64, 256] + - KeepKeys: + keep_keys: ['image', + 'label', + 'length', + 'encoder_word_pos', + 'gsrm_word_pos', + 'gsrm_slf_attn_bias1', + 'gsrm_slf_attn_bias2'] # dataloader will return list in this order + loader: + shuffle: False + batch_size_per_card: 64 + drop_last: False + num_workers: 4 + +Eval: + dataset: + name: SimpleDataSet + data_dir: ./train_data/ic15_data + label_file_list: ["./train_data/ic15_data/rec_gt_test.txt"] + transforms: + - DecodeImage: # load image + img_mode: BGR + channel_first: False + - SRNLabelEncode: # Class handling label + - SRNRecResizeImg: + image_shape: [1, 64, 256] + - KeepKeys: + keep_keys: ['image', + 'label', + 'length', + 'encoder_word_pos', + 'gsrm_word_pos', + 'gsrm_slf_attn_bias1', + 'gsrm_slf_attn_bias2'] + loader: + shuffle: False + drop_last: False + batch_size_per_card: 32 + num_workers: 4 diff --git a/test_tipc/configs/rec_r50_fpn_vd_none_srn/train_infer_python.txt b/test_tipc/configs/rec_r50_fpn_vd_none_srn/train_infer_python.txt new file mode 100644 index 0000000000000000000000000000000000000000..b3549c635f267cdb0b494341e9f250669cd74bfe --- /dev/null +++ b/test_tipc/configs/rec_r50_fpn_vd_none_srn/train_infer_python.txt @@ -0,0 +1,52 @@ +===========================train_params=========================== +model_name:rec_r50_fpn_vd_none_srn +python:python3.7 +gpu_list:0|0,1 +Global.use_gpu:True|True +Global.auto_cast:null +Global.epoch_num:lite_train_lite_infer=2|whole_train_whole_infer=300 +Global.save_model_dir:./output/ +Train.loader.batch_size_per_card:lite_train_lite_infer=128|whole_train_whole_infer=128 +Global.pretrained_model:null +train_model_name:latest +train_infer_img_dir:./inference/rec_inference +null:null +## +trainer:norm_train +norm_train:tools/train.py -c test_tipc/configs/rec_r50_fpn_vd_none_srn/rec_r50_fpn_srn.yml -o +pact_train:null +fpgm_train:null +distill_train:null +null:null +null:null +## +===========================eval_params=========================== +eval:tools/eval.py -c test_tipc/configs/rec_r50_fpn_vd_none_srn/rec_r50_fpn_srn.yml -o +null:null +## +===========================infer_params=========================== +Global.save_inference_dir:./output/ +Global.pretrained_model: +norm_export:tools/export_model.py -c test_tipc/configs/rec_r50_fpn_vd_none_srn/rec_r50_fpn_srn.yml -o +quant_export:null +fpgm_export:null +distill_export:null +export1:null +export2:null +## +infer_model:null +infer_export:tools/export_model.py -c test_tipc/configs/rec_r50_fpn_vd_none_srn/rec_r50_fpn_srn.yml -o +infer_quant:False +inference:tools/infer/predict_rec.py --rec_char_dict_path=./ppocr/utils/ic15_dict.txt --rec_image_shape="1,64,256" --rec_algorithm="SRN" --use_space_char=False +--use_gpu:True|False +--enable_mkldnn:True|False +--cpu_threads:1|6 +--rec_batch_num:1|6 +--use_tensorrt:True|False +--precision:fp32|int8 +--rec_model_dir: +--image_dir:./inference/rec_inference +--save_log_path:./test/output/ +--benchmark:True +null:null + diff --git a/test_tipc/docs/jeston_test_train_inference_python.md b/test_tipc/docs/jeston_test_train_inference_python.md index 3c0524df21df84fefdbbfd8e691766c9c542dff5..d96505985ea8a291b3579acb2aaee1b3d66c1baa 100644 --- a/test_tipc/docs/jeston_test_train_inference_python.md +++ b/test_tipc/docs/jeston_test_train_inference_python.md @@ -1,6 +1,6 @@ # Jeston端基础训练预测功能测试 -Jeston端基础训练预测功能测试的主程序为`test_train_inference_python.sh`,由于Jeston端CPU较差,Jeston只需要测试TIPC关于GPU和TensorRT预测推理的部分即可。 +Jeston端基础训练预测功能测试的主程序为`test_inference_inference.sh`,由于Jeston端CPU较差,Jeston只需要测试TIPC关于GPU和TensorRT预测推理的部分即可。 ## 1. 测试结论汇总 @@ -40,21 +40,21 @@ Jeston端基础训练预测功能测试的主程序为`test_train_inference_pyth ### 2.2 功能测试 -先运行`prepare.sh`准备数据和模型,然后运行`test_train_inference_python.sh`进行测试,最终在```test_tipc/output```目录下生成`python_infer_*.log`格式的日志文件。 +先运行`prepare.sh`准备数据和模型,然后运行`test_inference_inference.sh`进行测试,最终在```test_tipc/output```目录下生成`python_infer_*.log`格式的日志文件。 -`test_train_inference_python.sh`包含5种[运行模式](./test_train_inference_python.md),在Jeston端,仅需要测试预测推理的模式即可: +`test_inference_inference.sh`仅有一个模式`whole_infer`,在Jeston端,仅需要测试预测推理的模式即可: ``` - 模式3:whole_infer,不训练,全量数据预测,走通开源模型评估、动转静,检查inference model预测时间和精度; ```shell -bash test_tipc/prepare.sh ./test_tipc/configs/mac_ppocr_det_mobile_params.txt 'whole_infer' +bash test_tipc/prepare.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/model_linux_gpu_normal_normal_infer_python_jetson.txt 'whole_infer' # 用法1: -bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/jeston_ppocr_det_mobile_params.txt 'whole_infer' +bash test_tipc/test_inference_inference.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/model_linux_gpu_normal_normal_infer_python_jetson.txt 'whole_infer' # 用法2: 指定GPU卡预测,第三个传入参数为GPU卡号 -bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/jeston_ppocr_det_mobile_params.txt 'whole_infer' '1' +bash test_tipc/test_inference_jeston.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/model_linux_gpu_normal_normal_infer_python_jetson.txt 'whole_infer' '1' ``` -运行相应指令后,在`test_tipc/output`文件夹下自动会保存运行日志。如`lite_train_lite_infer`模式下,会运行训练+inference的链条,因此,在`test_tipc/output`文件夹有以下文件: +运行相应指令后,在`test_tipc/output`文件夹下自动会保存运行日志。如`whole_infer`模式下,会运行训练+inference的链条,因此,在`test_tipc/output`文件夹有以下文件: ``` test_tipc/output/ |- results_python.log # 运行指令状态的日志 diff --git a/test_tipc/docs/mac_test_train_inference_python.md b/test_tipc/docs/mac_test_train_inference_python.md index 7f088a489ff7988d97c7cb48973b13725b49a09f..ea6e0218b12b342347677ea512d3afd89053261a 100644 --- a/test_tipc/docs/mac_test_train_inference_python.md +++ b/test_tipc/docs/mac_test_train_inference_python.md @@ -57,35 +57,35 @@ Mac端无GPU,环境准备只需要Python环境即可,安装PaddlePaddle等 ```shell # 同linux端运行不同的是,Mac端测试使用新的配置文件mac_ppocr_det_mobile_params.txt, # 配置文件中默认去掉了GPU和mkldnn相关的测试链条 -bash test_tipc/prepare.sh ./test_tipc/configs/mac_ppocr_det_mobile_params.txt 'lite_train_lite_infer' -bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/mac_ppocr_det_mobile_params.txt 'lite_train_lite_infer' +bash test_tipc/prepare.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_mac_cpu_normal_normal_infer_python_mac_cpu.txt 'lite_train_lite_infer' +bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_mac_cpu_normal_normal_infer_python_mac_cpu.txt 'lite_train_lite_infer' ``` - 模式2:lite_train_whole_infer,使用少量数据训练,一定量数据预测,用于验证训练后的模型执行预测,预测速度是否合理; ```shell -bash test_tipc/prepare.sh ./test_tipc/configs/mac_ppocr_det_mobile_params.txt 'lite_train_whole_infer' -bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/mac_ppocr_det_mobile_params.txt 'lite_train_whole_infer' +bash test_tipc/prepare.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_mac_cpu_normal_normal_infer_python_mac_cpu.txt 'lite_train_whole_infer' +bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_mac_cpu_normal_normal_infer_python_mac_cpu.txt 'lite_train_whole_infer' ``` - 模式3:whole_infer,不训练,全量数据预测,走通开源模型评估、动转静,检查inference model预测时间和精度; ```shell -bash test_tipc/prepare.sh ./test_tipc/configs/mac_ppocr_det_mobile_params.txt 'whole_infer' +bash test_tipc/prepare.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_mac_cpu_normal_normal_infer_python_mac_cpu.txt 'whole_infer' # 用法1: -bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/mac_ppocr_det_mobile_params.txt 'whole_infer' +bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_mac_cpu_normal_normal_infer_python_mac_cpu.txt 'whole_infer' # 用法2: 指定GPU卡预测,第三个传入参数为GPU卡号 -bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/mac_ppocr_det_mobile_params.txt 'whole_infer' '1' +bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_mac_cpu_normal_normal_infer_python_mac_cpu.txt 'whole_infer' '1' ``` - 模式4:whole_train_whole_infer,CE: 全量数据训练,全量数据预测,验证模型训练精度,预测精度,预测速度;(Mac端不建议运行此模式) ```shell -bash test_tipc/prepare.sh ./test_tipc/configs/mac_ppocr_det_mobile_params.txt 'whole_train_whole_infer' -bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/mac_ppocr_det_mobile_params.txt 'whole_train_whole_infer' +bash test_tipc/prepare.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_mac_cpu_normal_normal_infer_python_mac_cpu.txt 'whole_train_whole_infer' +bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_mac_cpu_normal_normal_infer_python_mac_cpu.txt 'whole_train_whole_infer' ``` - 模式5:klquant_whole_infer,测试离线量化; ```shell -bash test_tipc/prepare.sh ./test_tipc/configs/mac_ppocr_det_mobile_params.txt 'klquant_whole_infer' -bash test_tipc/test_train_inference_python.sh test_tipc/configs/mac_ppocr_det_mobile_params.txt 'klquant_whole_infer' +bash test_tipc/prepare.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det_KL/model_linux_gpu_normal_normal_infer_python_mac_cpu.txt 'klquant_whole_infer' +bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det_KL/model_linux_gpu_normal_normal_infer_python_mac_cpu.txt 'klquant_whole_infer' ``` 运行相应指令后,在`test_tipc/output`文件夹下自动会保存运行日志。如`lite_train_lite_infer`模式下,会运行训练+inference的链条,因此,在`test_tipc/output`文件夹有以下文件: @@ -134,11 +134,19 @@ python test_tipc/compare_results.py --gt_file=./test_tipc/results/python_*.txt #### 运行结果 -正常运行效果如下图: - +正常运行效果如下: +``` +Assert allclose passed! The results of python_infer_cpu_usemkldnn_False_threads_1_batchsize_1.log and ./test_tipc/results/python_ppocr_det_mobile_results_fp32.txt are consistent! +``` 出现不一致结果时的运行输出: - +``` +...... +Traceback (most recent call last): + File "test_tipc/compare_results.py", line 140, in + format(filename, gt_filename)) +ValueError: The results of python_infer_cpu_usemkldnn_False_threads_1_batchsize_1.log and the results of ./test_tipc/results/python_ppocr_det_mobile_results_fp32.txt are inconsistent! +``` ## 3. 更多教程 diff --git a/test_tipc/docs/test_inference_cpp.md b/test_tipc/docs/test_inference_cpp.md index 3c9599a32cd473a31670d3ee028a36bcf621df87..e662f4bacc0b69bd605a79dac0e36c99daac87d5 100644 --- a/test_tipc/docs/test_inference_cpp.md +++ b/test_tipc/docs/test_inference_cpp.md @@ -20,12 +20,12 @@ C++预测功能测试的主程序为`test_inference_cpp.sh`,可以测试基于 先运行`prepare.sh`准备数据和模型,然后运行`test_inference_cpp.sh`进行测试,最终在```test_tipc/output```目录下生成`cpp_infer_*.log`后缀的日志文件。 ```shell -bash test_tipc/prepare.sh ./test_tipc/configs/ppocr_det_mobile/model_linux_gpu_normal_normal_infer_cpp_linux_gpu_cpu.txt "cpp_infer" +bash test_tipc/prepare.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/model_linux_gpu_normal_normal_infer_cpp_linux_gpu_cpu.txt "cpp_infer" # 用法1: -bash test_tipc/test_inference_cpp.sh test_tipc/configs/ppocr_det_mobile/model_linux_gpu_normal_normal_infer_cpp_linux_gpu_cpu.txt +bash test_tipc/test_inference_cpp.sh test_tipc/configs/ch_ppocr_mobile_v2.0_det/model_linux_gpu_normal_normal_infer_cpp_linux_gpu_cpu.txt # 用法2: 指定GPU卡预测,第三个传入参数为GPU卡号 -bash test_tipc/test_inference_cpp.sh test_tipc/configs/ppocr_det_mobile/model_linux_gpu_normal_normal_infer_cpp_linux_gpu_cpu.txt '1' +bash test_tipc/test_inference_cpp.sh test_tipc/configs/ch_ppocr_mobile_v2.0_det/model_linux_gpu_normal_normal_infer_cpp_linux_gpu_cpu.txt '1' ``` 运行预测指令后,在`test_tipc/output`文件夹下自动会保存运行日志,包括以下文件: diff --git a/test_tipc/docs/test_lite_arm_cpp.md b/test_tipc/docs/test_lite_arm_cpp.md new file mode 100644 index 0000000000000000000000000000000000000000..166b5981b6fcb6d4709a4636df0f1b1af57e1d66 --- /dev/null +++ b/test_tipc/docs/test_lite_arm_cpp.md @@ -0,0 +1,95 @@ +# Lite\_arm\_cpp预测功能测试 + +Lite\_arm\_cpp预测功能测试的主程序为`test_lite_arm_cpp.sh`,可以在ARM上基于Lite预测库测试模型的C++推理功能。 + +## 1. 测试结论汇总 + +目前Lite端的样本间支持以方式的组合: + +**字段说明:** +- 模型类型:包括正常模型(FP32)和量化模型(INT8) +- batch-size:包括1和4 +- threads:包括1和4 +- predictor数量:包括单predictor预测和多predictor预测 +- 预测库来源:包括下载方式和编译方式 +- 测试硬件:ARM\_CPU/ARM\_GPU_OPENCL + +| 模型类型 | batch-size | threads | predictor数量 | 预测库来源 | 测试硬件 | +| :----: | :----: | :----: | :----: | :----: | :----: | +| 正常模型/量化模型 | 1 | 1/4 | 单/多 | 下载方式/编译方式 | ARM\_CPU/ARM\_GPU_OPENCL | + + +## 2. 测试流程 +运行环境配置请参考[文档](./install.md)的内容配置TIPC的运行环境。 + +### 2.1 功能测试 + +先运行`prepare_lite_cpp.sh`,运行后会在当前路径下生成`test_lite.tar`,其中包含了测试数据、测试模型和用于预测的可执行文件。将`test_lite.tar`上传到被测试的手机上,在手机的终端解压该文件,进入`test_lite`目录中,然后运行`test_lite_arm_cpp.sh`进行测试,最终在`test_lite/output`目录下生成`lite_*.log`后缀的日志文件。 + +#### 2.1.1 基于ARM\_CPU测试 + +```shell + +# 数据、模型、Paddle-Lite预测库准备 +#预测库为下载方式 +bash test_tipc/prepare_lite_cpp.sh ./test_tipc/configs/ch_PP-OCRv2_det/model_linux_gpu_normal_normal_lite_cpp_arm_cpu.txt download +#预测库为编译方式 +bash test_tipc/prepare_lite_cpp.sh ./test_tipc/configs/ch_PP-OCRv2_det/model_linux_gpu_normal_normal_lite_cpp_arm_cpu.txt compile + +# 手机端测试: +bash test_lite_arm_cpp.sh model_linux_gpu_normal_normal_lite_cpp_arm_cpu.txt + +``` + +#### 2.1.2 基于ARM\_GPU\_OPENCL测试 + +```shell + +# 数据、模型、Paddle-Lite预测库准备 +#预测库下载方式 +bash test_tipc/prepare_lite_cpp.sh ./test_tipc/configs/ch_PP-OCRv2_det/model_linux_gpu_normal_normal_lite_cpp_arm_gpu_opencl.txt download +#预测库编译方式 +bash test_tipc/prepare_lite_cpp.sh ./test_tipc/configs/ch_PP-OCRv2_det/model_linux_gpu_normal_normal_lite_cpp_arm_gpu_opencl.txt compile + +# 手机端测试: +bash test_lite_arm_cpp.sh model_linux_gpu_normal_normal_lite_cpp_arm_gpu_opencl.txt + +``` + + +**注意**: + +由于运行该项目需要bash等命令,传统的adb方式不能很好的安装。所以此处推荐通在手机上开启虚拟终端的方式连接电脑,连接方式可以参考[安卓手机termux连接电脑](./termux_for_android.md)。 + +### 2.2 运行结果 + +各测试的运行情况会打印在 `./output/` 中: +运行成功时会输出: + +``` +Run successfully with command - ./ocr_db_crnn det ch_PP-OCRv2_det_infer_opt.nb ARM_CPU FP32 1 1 ./test_data/icdar2015_lite/text_localization/ch4_test_images/ ./config.txt True > ./output/lite_ch_PP-OCRv2_det_infer_opt.nb_runtime_device_ARM_CPU_precision_FP32_batchsize_1_threads_1.log 2>&1! +Run successfully with command xxx +... +``` + +运行失败时会输出: + +``` +Run failed with command - ./ocr_db_crnn det ch_PP-OCRv2_det_infer_opt.nb ARM_CPU FP32 1 1 ./test_data/icdar2015_lite/text_localization/ch4_test_images/ ./config.txt True > ./output/lite_ch_PP-OCRv2_det_infer_opt.nb_runtime_device_ARM_CPU_precision_FP32_batchsize_1_threads_1.log 2>&1! +Run failed with command xxx +... +``` + +在./output/文件夹下,会存在如下日志,每一个日志都是不同配置下的log结果: + + + +在每一个log中,都会调用autolog打印如下信息: + + + + + +## 3. 更多教程 + +本文档为功能测试用,更详细的Lite端预测使用教程请参考:[Lite端部署](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/deploy/lite/readme.md)。 diff --git a/test_tipc/docs/test_lite_arm_cpu_cpp.md b/test_tipc/docs/test_lite_arm_cpu_cpp.md deleted file mode 100644 index 6f58026a315dabb8810e56b6d694733c1c72019c..0000000000000000000000000000000000000000 --- a/test_tipc/docs/test_lite_arm_cpu_cpp.md +++ /dev/null @@ -1,71 +0,0 @@ -# Lite\_arm\_cpu\_cpp预测功能测试 - -Lite\_arm\_cpu\_cpp预测功能测试的主程序为`test_lite_arm_cpu_cpp.sh`,可以在ARM CPU上基于Lite预测库测试模型的C++推理功能。 - -## 1. 测试结论汇总 - -目前Lite端的样本间支持以方式的组合: - -**字段说明:** -- 模型类型:包括正常模型(FP32)和量化模型(INT8) -- batch-size:包括1和4 -- threads:包括1和4 -- predictor数量:包括多predictor预测和单predictor预测 -- 预测库来源:包括下载方式和编译方式 - -| 模型类型 | batch-size | threads | predictor数量 | 预测库来源 | -| :----: | :----: | :----: | :----: | :----: | -| 正常模型/量化模型 | 1 | 1/4 | 1 | 下载方式 | - - -## 2. 测试流程 -运行环境配置请参考[文档](./install.md)的内容配置TIPC的运行环境。 - -### 2.1 功能测试 - -先运行`prepare_lite.sh`,运行后会在当前路径下生成`test_lite.tar`,其中包含了测试数据、测试模型和用于预测的可执行文件。将`test_lite.tar`上传到被测试的手机上,在手机的终端解压该文件,进入`test_lite`目录中,然后运行`test_lite_arm_cpu_cpp.sh`进行测试,最终在`test_lite/output`目录下生成`lite_*.log`后缀的日志文件。 - -```shell - -# 数据和模型准备 -bash test_tipc/prepare_lite.sh ./test_tipc/configs/ppocr_det_mobile/model_linux_gpu_normal_normal_lite_cpp_arm_cpu.txt - -# 手机端测试: -bash test_lite_arm_cpu_cpp.sh model_linux_gpu_normal_normal_lite_cpp_arm_cpu.txt - -``` - -**注意**:由于运行该项目需要bash等命令,传统的adb方式不能很好的安装。所以此处推荐通在手机上开启虚拟终端的方式连接电脑,连接方式可以参考[安卓手机termux连接电脑](./termux_for_android.md)。 - -#### 运行结果 - -各测试的运行情况会打印在 `./output/` 中: -运行成功时会输出: - -``` -Run successfully with command - ./ocr_db_crnn det ch_PP-OCRv2_det_infer_opt.nb ARM_CPU FP32 1 1 ./test_data/icdar2015_lite/text_localization/ch4_test_images/ ./config.txt True > ./output/lite_ch_PP-OCRv2_det_infer_opt.nb_runtime_device_ARM_CPU_precision_FP32_batchsize_1_threads_1.log 2>&1! -Run successfully with command xxx -... -``` - -运行失败时会输出: - -``` -Run failed with command - ./ocr_db_crnn det ch_PP-OCRv2_det_infer_opt.nb ARM_CPU FP32 1 1 ./test_data/icdar2015_lite/text_localization/ch4_test_images/ ./config.txt True > ./output/lite_ch_PP-OCRv2_det_infer_opt.nb_runtime_device_ARM_CPU_precision_FP32_batchsize_1_threads_1.log 2>&1! -Run failed with command xxx -... -``` - -在./output/文件夹下,会存在如下日志,每一个日志都是不同配置下的log结果: - - - -在每一个log中,都会调用autolog打印如下信息: - - - - - -## 3. 更多教程 - -本文档为功能测试用,更详细的Lite端预测使用教程请参考:[Lite端部署](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/deploy/lite/readme.md)。 diff --git a/test_tipc/docs/test_train_inference_python.md b/test_tipc/docs/test_train_inference_python.md index 13caa2542a3b89c28106b22b5ef42e84e6ed0632..fa969cbe1b9b6fd524efdaad5002afcfcf40e119 100644 --- a/test_tipc/docs/test_train_inference_python.md +++ b/test_tipc/docs/test_train_inference_python.md @@ -64,35 +64,35 @@ Linux端基础训练预测功能测试的主程序为`test_train_inference_pytho - 模式1:lite_train_lite_infer,使用少量数据训练,用于快速验证训练到预测的走通流程,不验证精度和速度; ```shell -bash test_tipc/prepare.sh ./test_tipc/configs/ppocr_det_mobile/train_infer_python.txt 'lite_train_lite_infer' -bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ppocr_det_mobile/train_infer_python.txt 'lite_train_lite_infer' +bash test_tipc/prepare.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_infer_python.txt 'lite_train_lite_infer' +bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_infer_python.txt 'lite_train_lite_infer' ``` - 模式2:lite_train_whole_infer,使用少量数据训练,一定量数据预测,用于验证训练后的模型执行预测,预测速度是否合理; ```shell -bash test_tipc/prepare.sh ./test_tipc/configs/ppocr_det_mobile/train_infer_python.txt 'lite_train_whole_infer' -bash test_tipc/test_train_inference_python.sh ../test_tipc/configs/ppocr_det_mobile/train_infer_python.txt 'lite_train_whole_infer' +bash test_tipc/prepare.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_infer_python.txt 'lite_train_whole_infer' +bash test_tipc/test_train_inference_python.sh ../test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_infer_python.txt 'lite_train_whole_infer' ``` - 模式3:whole_infer,不训练,全量数据预测,走通开源模型评估、动转静,检查inference model预测时间和精度; ```shell -bash test_tipc/prepare.sh ./test_tipc/configs/ppocr_det_mobile/train_infer_python.txt 'whole_infer' +bash test_tipc/prepare.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_infer_python.txt 'whole_infer' # 用法1: -bash test_tipc/test_train_inference_python.sh ../test_tipc/configs/ppocr_det_mobile/train_infer_python.txt 'whole_infer' +bash test_tipc/test_train_inference_python.sh ../test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_infer_python.txt 'whole_infer' # 用法2: 指定GPU卡预测,第三个传入参数为GPU卡号 -bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ppocr_det_mobile/train_infer_python.txt 'whole_infer' '1' +bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_infer_python.txt 'whole_infer' '1' ``` - 模式4:whole_train_whole_infer,CE: 全量数据训练,全量数据预测,验证模型训练精度,预测精度,预测速度; ```shell -bash test_tipc/prepare.sh ./test_tipc/configs/ppocr_det_mobile/train_infer_python.txt 'whole_train_whole_infer' -bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ppocr_det_mobile/train_infer_python.txt 'whole_train_whole_infer' +bash test_tipc/prepare.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_infer_python.txt 'whole_train_whole_infer' +bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_infer_python.txt 'whole_train_whole_infer' ``` - 模式5:klquant_whole_infer,测试离线量化; ```shell -bash test_tipc/prepare.sh ./test_tipc/configs/ppocr_det_mobile/train_infer_python.txt 'klquant_whole_infer' -bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ppocr_det_mobile/train_infer_python.txt 'klquant_whole_infer' +bash test_tipc/prepare.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det_KL/model_linux_gpu_normal_normal_infer_python_linux_gpu_cpu.txt 'klquant_whole_infer' +bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det_KL/model_linux_gpu_normal_normal_infer_python_linux_gpu_cpu.txt 'klquant_whole_infer' ``` 运行相应指令后,在`test_tipc/output`文件夹下自动会保存运行日志。如'lite_train_lite_infer'模式下,会运行训练+inference的链条,因此,在`test_tipc/output`文件夹有以下文件: diff --git a/test_tipc/docs/win_test_train_inference_python.md b/test_tipc/docs/win_test_train_inference_python.md index b1bb5470de36d14071ed86e29bebf29b104289dd..95585af0380be410c230a799b7d61e607de5f654 100644 --- a/test_tipc/docs/win_test_train_inference_python.md +++ b/test_tipc/docs/win_test_train_inference_python.md @@ -58,35 +58,35 @@ Windows端基础训练预测功能测试的主程序为`test_train_inference_pyt - 模式1:lite_train_lite_infer,使用少量数据训练,用于快速验证训练到预测的走通流程,不验证精度和速度; ```shell -bash test_tipc/prepare.sh ./test_tipc/configs/win_ppocr_det_mobile_params.txt 'lite_train_lite_infer' -bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/win_ppocr_det_mobile_params.txt 'lite_train_lite_infer' +bash test_tipc/prepare.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_windows_gpu_normal_normal_infer_python_windows_cpu_gpu.txt 'lite_train_lite_infer' +bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_windows_gpu_normal_normal_infer_python_windows_cpu_gpu.txt 'lite_train_lite_infer' ``` - 模式2:lite_train_whole_infer,使用少量数据训练,一定量数据预测,用于验证训练后的模型执行预测,预测速度是否合理; ```shell -bash test_tipc/prepare.sh ./test_tipc/configs/win_ppocr_det_mobile_params.txt 'lite_train_whole_infer' -bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/win_ppocr_det_mobile_params.txt 'lite_train_whole_infer' +bash test_tipc/prepare.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_windows_gpu_normal_normal_infer_python_windows_cpu_gpu.txt 'lite_train_whole_infer' +bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_windows_gpu_normal_normal_infer_python_windows_cpu_gpu.txt 'lite_train_whole_infer' ``` - 模式3:whole_infer,不训练,全量数据预测,走通开源模型评估、动转静,检查inference model预测时间和精度; ```shell -bash test_tipc/prepare.sh ./test_tipc/configs/win_ppocr_det_mobile_params.txt 'whole_infer' +bash test_tipc/prepare.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_windows_gpu_normal_normal_infer_python_windows_cpu_gpu.txt 'whole_infer' # 用法1: -bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/win_ppocr_det_mobile_params.txt 'whole_infer' +bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_windows_gpu_normal_normal_infer_python_windows_cpu_gpu.txt 'whole_infer' # 用法2: 指定GPU卡预测,第三个传入参数为GPU卡号 -bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/win_ppocr_det_mobile_params.txt 'whole_infer' '1' +bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_windows_gpu_normal_normal_infer_python_windows_cpu_gpu.txt 'whole_infer' '1' ``` - 模式4:whole_train_whole_infer,CE: 全量数据训练,全量数据预测,验证模型训练精度,预测精度,预测速度; ```shell -bash test_tipc/prepare.sh ./test_tipc/configs/win_ppocr_det_mobile_params.txt 'whole_train_whole_infer' -bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/win_ppocr_det_mobile_params.txt 'whole_train_whole_infer' +bash test_tipc/prepare.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_windows_gpu_normal_normal_infer_python_windows_cpu_gpu.txt 'whole_train_whole_infer' +bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_windows_gpu_normal_normal_infer_python_windows_cpu_gpu.txt 'whole_train_whole_infer' ``` - 模式5:klquant_whole_infer,测试离线量化; ```shell -bash test_tipc/prepare.sh ./test_tipc/configs/win_ppocr_det_mobile_params.txt 'klquant_whole_infer' -bash test_tipc/test_train_inference_python.sh test_tipc/configs/win_ppocr_det_mobile_params.txt 'klquant_whole_infer' +bash test_tipc/prepare.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det_KL/model_linux_gpu_normal_normal_infer_python_windows_gpu_cpu.txt 'klquant_whole_infer' +bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det_KL/model_linux_gpu_normal_normal_infer_python_windows_gpu_cpu.txt 'klquant_whole_infer' ``` @@ -138,11 +138,19 @@ python test_tipc/compare_results.py --gt_file=./test_tipc/results/python_*.txt #### 运行结果 -正常运行效果如下图: - +正常运行效果如下: +``` +Assert allclose passed! The results of python_infer_cpu_usemkldnn_False_threads_1_batchsize_1.log and ./test_tipc/results/python_ppocr_det_mobile_results_fp32.txt are consistent! +``` 出现不一致结果时的运行输出: - +``` +...... +Traceback (most recent call last): + File "test_tipc/compare_results.py", line 140, in + format(filename, gt_filename)) +ValueError: The results of python_infer_cpu_usemkldnn_False_threads_1_batchsize_1.log and the results of ./test_tipc/results/python_ppocr_det_mobile_results_fp32.txt are inconsistent! +``` ## 3. 更多教程 diff --git a/test_tipc/prepare.sh b/test_tipc/prepare.sh index 6e07bf8a1ac174b93f9abe141a00f32da169d9d5..8876157ef8f4b44b227c171d25bdfd1060007910 100644 --- a/test_tipc/prepare.sh +++ b/test_tipc/prepare.sh @@ -25,7 +25,7 @@ if [ ${MODE} = "lite_train_lite_infer" ];then # pretrain lite train data wget -nc -P ./pretrain_models/ https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_large_x0_5_pretrained.pdparams --no-check-certificate wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_mv3_db_v2.0_train.tar --no-check-certificate - if [ ${model_name} == "PPOCRv2_ocr_det" ]; then + if [ ${model_name} == "ch_PPOCRv2_det" ]; then wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_distill_train.tar --no-check-certificate cd ./pretrain_models/ && tar xf ch_PP-OCRv2_det_distill_train.tar && cd ../ fi @@ -41,6 +41,31 @@ if [ ${MODE} = "lite_train_lite_infer" ];then ln -s ./icdar2015_lite ./icdar2015 cd ../ cd ./inference && tar xf rec_inference.tar && cd ../ + if [ ${model_name} == "en_server_pgnetA" ]; then + wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/total_text_lite.tar --no-check-certificate + wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/pgnet/en_server_pgnetA.tar --no-check-certificate + cd ./pretrain_models/ && tar xf en_server_pgnetA.tar && cd ../ + cd ./train_data && tar xf total_text_lite.tar && ln -s total_text_lite total_text && cd ../ + fi + if [ ${model_name} == "det_r50_vd_sast_icdar15_v2.0" ] || [ ${model_name} == "det_r50_vd_sast_totaltext_v2.0" ]; then + wget -nc -P ./pretrain_models/ https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNet50_vd_ssld_pretrained.pdparams --no-check-certificate + wget -nc -P ./train_data/ wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/total_text_lite.tar --no-check-certificate + cd ./train_data && tar xf total_text_lite.tar && ln -s total_text && cd ../ + fi + if [ ${model_name} == "det_mv3_db_v2.0" ]; then + wget -nc -P ./inference/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_mv3_db_v2.0_train.tar --no-check-certificate + cd ./inference/ && tar xf det_mv3_db_v2.0_train.tar && cd ../ + fi + if [ ${model_name} == "det_r50_db_v2.0" ]; then + wget -nc -P ./pretrain_models/ https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNet50_vd_ssld_pretrained.pdparams --no-check-certificate + wget -nc -P ./inference/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_db_v2.0_train.tar --no-check-certificate + cd ./inference/ && tar xf det_r50_vd_db_v2.0_train.tar && cd ../ + fi + if [ ${model_name} == "ch_ppocr_mobile_v2.0_rec_FPGM" ]; then + wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_train.tar --no-check-certificate + cd ./pretrain_models/ && tar xf ch_ppocr_mobile_v2.0_rec_train.tar && cd ../ + fi + elif [ ${MODE} = "whole_train_whole_infer" ];then wget -nc -P ./pretrain_models/ https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_large_x0_5_pretrained.pdparams --no-check-certificate rm -rf ./train_data/icdar2015 @@ -48,10 +73,21 @@ elif [ ${MODE} = "whole_train_whole_infer" ];then wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/icdar2015.tar --no-check-certificate wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/ic15_data.tar --no-check-certificate cd ./train_data/ && tar xf icdar2015.tar && tar xf ic15_data.tar && cd ../ - if [ ${model_name} == "PPOCRv2_ocr_det" ]; then + if [ ${model_name} == "ch_PPOCRv2_det" ]; then wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_distill_train.tar --no-check-certificate cd ./pretrain_models/ && tar xf ch_PP-OCRv2_det_distill_train.tar && cd ../ fi + if [ ${model_name} == "en_server_pgnetA" ]; then + wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/dataset/total_text.tar --no-check-certificate + wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/pgnet/en_server_pgnetA.tar --no-check-certificate + cd ./pretrain_models/ && tar xf en_server_pgnetA.tar && cd ../ + cd ./train_data && tar xf total_text.tar && ln -s total_text && cd ../ + fi + if [ ${model_name} == "det_r50_vd_sast_totaltext_v2.0" ]; then + wget -nc -P ./pretrain_models/ https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNet50_vd_ssld_pretrained.pdparams --no-check-certificate + wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/dataset/total_text.tar --no-check-certificate + cd ./train_data && tar xf total_text.tar && ln -s total_text && cd ../ + fi elif [ ${MODE} = "lite_train_whole_infer" ];then wget -nc -P ./pretrain_models/ https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_large_x0_5_pretrained.pdparams --no-check-certificate rm -rf ./train_data/icdar2015 @@ -61,65 +97,89 @@ elif [ ${MODE} = "lite_train_whole_infer" ];then cd ./train_data/ && tar xf icdar2015_infer.tar && tar xf ic15_data.tar ln -s ./icdar2015_infer ./icdar2015 cd ../ - if [ ${model_name} == "PPOCRv2_ocr_det" ]; then + if [ ${model_name} == "ch_PPOCRv2_det" ]; then wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_distill_train.tar --no-check-certificate cd ./pretrain_models/ && tar xf ch_PP-OCRv2_det_distill_train.tar && cd ../ fi elif [ ${MODE} = "whole_infer" ];then - if [ ${model_name} = "ocr_det" ]; then + wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/ch_det_data_50.tar --no-check-certificate + if [ ${model_name} = "ch_ppocr_mobile_v2.0_det" ]; then eval_model_name="ch_ppocr_mobile_v2.0_det_train" rm -rf ./train_data/icdar2015 wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/ch_det_data_50.tar --no-check-certificate wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_train.tar --no-check-certificate wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar --no-check-certificate cd ./inference && tar xf ${eval_model_name}.tar && tar xf ch_det_data_50.tar && tar xf ch_ppocr_mobile_v2.0_det_infer.tar && cd ../ - elif [ ${model_name} = "ocr_server_det" ]; then + elif [ ${model_name} = "ch_ppocr_server_v2.0_det" ]; then wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_train.tar --no-check-certificate wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/ch_det_data_50.tar --no-check-certificate cd ./inference && tar xf ch_ppocr_server_v2.0_det_train.tar && tar xf ch_det_data_50.tar && cd ../ - elif [ ${model_name} = "ocr_system_mobile" ]; then + elif [ ${model_name} = "ch_ppocr_mobile_v2.0" ]; then wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar --no-check-certificate wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/ch_det_data_50.tar --no-check-certificate wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar --no-check-certificate cd ./inference && tar xf ch_ppocr_mobile_v2.0_det_infer.tar && tar xf ch_ppocr_mobile_v2.0_rec_infer.tar && tar xf ch_det_data_50.tar && cd ../ - elif [ ${model_name} = "ocr_system_server" ]; then + elif [ ${model_name} = "ch_ppocr_server_v2.0" ]; then wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_infer.tar --no-check-certificate wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/ch_det_data_50.tar --no-check-certificate wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_infer.tar --no-check-certificate cd ./inference && tar xf ch_ppocr_server_v2.0_det_infer.tar && tar xf ch_ppocr_server_v2.0_rec_infer.tar && tar xf ch_det_data_50.tar && cd ../ - elif [ ${model_name} = "ocr_rec" ]; then - rm -rf ./train_data/ic15_data + elif [ ${model_name} = "ch_ppocr_mobile_v2.0_rec" ]; then eval_model_name="ch_ppocr_mobile_v2.0_rec_infer" wget -nc -P ./inference/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/rec_inference.tar --no-check-certificate wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar --no-check-certificate cd ./inference && tar xf ${eval_model_name}.tar && tar xf rec_inference.tar && cd ../ - elif [ ${model_name} = "ocr_server_rec" ]; then - rm -rf ./train_data/ic15_data + elif [ ${model_name} = "ch_ppocr_server_v2.0_rec" ]; then eval_model_name="ch_ppocr_server_v2.0_rec_infer" wget -nc -P ./inference/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/rec_inference.tar --no-check-certificate wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_infer.tar --no-check-certificate cd ./inference && tar xf ${eval_model_name}.tar && tar xf rec_inference.tar && cd ../ fi - - elif [ ${model_name} = "PPOCRv2_ocr_det" ]; then + if [ ${model_name} = "ch_PPOCRv2_det" ]; then eval_model_name="ch_PP-OCRv2_det_infer" wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/ch_det_data_50.tar --no-check-certificate wget -nc -P ./inference/ https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_infer.tar --no-check-certificate cd ./inference && tar xf ${eval_model_name}.tar && tar xf ch_det_data_50.tar && cd ../ fi - + if [ ${model_name} == "en_server_pgnetA" ]; then + wget -nc -P ./inference/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/pgnet/en_server_pgnetA.tar --no-check-certificate + cd ./inference && tar xf en_server_pgnetA.tar && cd ../ + fi + if [ ${model_name} == "det_r50_vd_sast_icdar15_v2.0" ]; then + wget -nc -P ./inference/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_sast_icdar15_v2.0_train.tar --no-check-certificate + cd ./inference/ && tar xf det_r50_vd_sast_icdar15_v2.0_train.tar && cd ../ + fi + if [ ${model_name} == "det_mv3_db_v2.0" ]; then + wget -nc -P ./inference/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_mv3_db_v2.0_train.tar --no-check-certificate + cd ./inference/ && tar xf det_mv3_db_v2.0_train.tar && cd ../ + fi + if [ ${model_name} == "det_r50_db_v2.0" ]; then + wget -nc -P ./inference/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_db_v2.0_train.tar --no-check-certificate + cd ./inference/ && tar xf det_r50_vd_db_v2.0_train.tar && cd ../ + fi +fi if [ ${MODE} = "klquant_whole_infer" ]; then - if [ ${model_name} = "ocr_det" ]; then + wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/icdar2015_lite.tar --no-check-certificate + cd ./train_data/ && tar xf icdar2015_lite.tar + ln -s ./icdar2015_lite ./icdar2015 && cd ../ + if [ ${model_name} = "ch_ppocr_mobile_v2.0_det_KL" ]; then wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar --no-check-certificate wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/ch_det_data_50.tar --no-check-certificate cd ./inference && tar xf ch_ppocr_mobile_v2.0_det_infer.tar && tar xf ch_det_data_50.tar && cd ../ fi - if [ ${model_name} = "PPOCRv2_ocr_det" ]; then + if [ ${model_name} = "ch_PPOCRv2_det" ]; then eval_model_name="ch_PP-OCRv2_det_infer" wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/ch_det_data_50.tar --no-check-certificate wget -nc -P ./inference/ https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_infer.tar --no-check-certificate cd ./inference && tar xf ${eval_model_name}.tar && tar xf ch_det_data_50.tar && cd ../ fi + if [ ${model_name} = "ch_ppocr_mobile_v2.0_rec_KL" ]; then + wget -nc -P ./inference/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar --no-check-certificate + wget -nc -P ./inference/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/rec_inference.tar --no-check-certificate + wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/ic15_data.tar --no-check-certificate + cd ./train_data/ && tar xf ic15_data.tar && cd ../ + cd ./inference && tar xf ch_ppocr_mobile_v2.0_rec_infer.tar && tar xf rec_inference.tar && cd ../ + fi fi if [ ${MODE} = "cpp_infer" ];then @@ -127,7 +187,7 @@ if [ ${MODE} = "cpp_infer" ];then wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/ch_det_data_50.tar --no-check-certificate wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar --no-check-certificate cd ./inference && tar xf ch_ppocr_mobile_v2.0_det_infer.tar && tar xf ch_det_data_50.tar && cd ../ - elif [ ${model_name} = "ocr_rec" ]; then + elif [ ${model_name} = "ch_ppocr_mobile_v2.0_rec" ]; then wget -nc -P ./inference/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/rec_inference.tar --no-check-certificate wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar --no-check-certificate cd ./inference && tar xf ch_ppocr_mobile_v2.0_rec_infer.tar && tar xf rec_inference.tar && cd ../ @@ -141,8 +201,11 @@ fi if [ ${MODE} = "serving_infer" ];then # prepare serving env - python_name=$(func_parser_value "${lines[2]}") - wget https://paddle-serving.bj.bcebos.com/chain/paddle_serving_server_gpu-0.0.0.post101-py3-none-any.whl + python_name_list=$(func_parser_value "${lines[2]}") + IFS='|' + array=(${python_name_list}) + python_name=${array[0]} + wget -nc https://paddle-serving.bj.bcebos.com/chain/paddle_serving_server_gpu-0.0.0.post101-py3-none-any.whl ${python_name} -m pip install install paddle_serving_server_gpu-0.0.0.post101-py3-none-any.whl ${python_name} -m pip install paddle_serving_client==0.6.1 ${python_name} -m pip install paddle-serving-app==0.6.3 diff --git a/test_tipc/prepare_lite.sh b/test_tipc/prepare_lite.sh deleted file mode 100644 index 6a08d96298592c829547df9fa30ef4149ddc5b00..0000000000000000000000000000000000000000 --- a/test_tipc/prepare_lite.sh +++ /dev/null @@ -1,55 +0,0 @@ -#!/bin/bash -source ./test_tipc/common_func.sh -FILENAME=$1 -dataline=$(cat ${FILENAME}) -# parser params -IFS=$'\n' -lines=(${dataline}) -IFS=$'\n' -lite_model_list=$(func_parser_value "${lines[2]}") - -# prepare lite .nb model -pip install paddlelite==2.9 -current_dir=${PWD} -IFS="|" -model_path=./inference_models -for model in ${lite_model_list[*]}; do - inference_model_url=https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/${model}.tar - inference_model=${inference_model_url##*/} - wget -nc -P ${model_path} ${inference_model_url} - cd ${model_path} && tar -xf ${inference_model} && cd ../ - model_dir=${model_path}/${inference_model%.*} - model_file=${model_dir}/inference.pdmodel - param_file=${model_dir}/inference.pdiparams - paddle_lite_opt --model_dir=${model_dir} --model_file=${model_file} --param_file=${param_file} --valid_targets=arm --optimize_out=${model_dir}_opt -done - -# prepare test data -data_url=https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/icdar2015_lite.tar -model_path=./inference_models -inference_model=${inference_model_url##*/} -data_file=${data_url##*/} -wget -nc -P ./inference_models ${inference_model_url} -wget -nc -P ./test_data ${data_url} -cd ./inference_models && tar -xf ${inference_model} && cd ../ -cd ./test_data && tar -xf ${data_file} && rm ${data_file} && cd ../ - -# prepare lite env -paddlelite_url=https://github.com/PaddlePaddle/Paddle-Lite/releases/download/v2.9/inference_lite_lib.android.armv8.gcc.c++_shared.with_extra.with_cv.tar.gz -paddlelite_zipfile=$(echo $paddlelite_url | awk -F "/" '{print $NF}') -paddlelite_file=${paddlelite_zipfile:0:66} -wget ${paddlelite_url} && tar -xf ${paddlelite_zipfile} -mkdir -p ${paddlelite_file}/demo/cxx/ocr/test_lite -cp -r ${model_path}/*_opt.nb test_data ${paddlelite_file}/demo/cxx/ocr/test_lite -cp ppocr/utils/ppocr_keys_v1.txt deploy/lite/config.txt ${paddlelite_file}/demo/cxx/ocr/test_lite -cp -r ./deploy/lite/* ${paddlelite_file}/demo/cxx/ocr/ -cp ${paddlelite_file}/cxx/lib/libpaddle_light_api_shared.so ${paddlelite_file}/demo/cxx/ocr/test_lite -cp ${FILENAME} test_tipc/test_lite_arm_cpu_cpp.sh test_tipc/common_func.sh ${paddlelite_file}/demo/cxx/ocr/test_lite -cd ${paddlelite_file}/demo/cxx/ocr/ -git clone https://github.com/cuicheng01/AutoLog.git -make -j -sleep 1 -make -j -cp ocr_db_crnn test_lite && cp test_lite/libpaddle_light_api_shared.so test_lite/libc++_shared.so -tar -cf test_lite.tar ./test_lite && cp test_lite.tar ${current_dir} && cd ${current_dir} -rm -rf ${paddlelite_file}* && rm -rf ${model_path} diff --git a/test_tipc/prepare_lite_cpp.sh b/test_tipc/prepare_lite_cpp.sh new file mode 100644 index 0000000000000000000000000000000000000000..94af43c88d60aad706009c8bdd1e519a7b7e9acc --- /dev/null +++ b/test_tipc/prepare_lite_cpp.sh @@ -0,0 +1,107 @@ +#!/bin/bash +source ./test_tipc/common_func.sh +FILENAME=$1 +dataline=$(cat ${FILENAME}) +# parser params +IFS=$'\n' +lines=(${dataline}) +IFS=$'\n' +paddlelite_library_source=$2 + +inference_cmd=$(func_parser_value "${lines[1]}") +DEVICE=$(func_parser_value "${lines[2]}") +det_lite_model_list=$(func_parser_value "${lines[3]}") +rec_lite_model_list=$(func_parser_value "${lines[4]}") +cls_lite_model_list=$(func_parser_value "${lines[5]}") + +if [[ $inference_cmd =~ "det" ]]; then + lite_model_list=${det_lite_model_list} +elif [[ $inference_cmd =~ "rec" ]]; then + lite_model_list=(${rec_lite_model_list[*]} ${cls_lite_model_list[*]}) +elif [[ $inference_cmd =~ "system" ]]; then + lite_model_list=(${det_lite_model_list[*]} ${rec_lite_model_list[*]} ${cls_lite_model_list[*]}) +else + echo "inference_cmd is wrong, please check." + exit 1 +fi + +if [ ${DEVICE} = "ARM_CPU" ]; then + valid_targets="arm" + paddlelite_library_url="https://github.com/PaddlePaddle/Paddle-Lite/releases/download/v2.10-rc/inference_lite_lib.android.armv8.gcc.c++_shared.with_extra.with_cv.tar.gz" + end_index="66" + compile_with_opencl="OFF" +elif [ ${DEVICE} = "ARM_GPU_OPENCL" ]; then + valid_targets="opencl" + paddlelite_library_url="https://github.com/PaddlePaddle/Paddle-Lite/releases/download/v2.10-rc/inference_lite_lib.armv8.clang.with_exception.with_extra.with_cv.opencl.tar.gz" + end_index="71" + compile_with_opencl="ON" +else + echo "DEVICE only support ARM_CPU, ARM_GPU_OPENCL." + exit 2 +fi + +# prepare paddlelite model +pip install paddlelite==2.10-rc +current_dir=${PWD} +IFS="|" +model_path=./inference_models + +for model in ${lite_model_list[*]}; do + if [[ $model =~ "PP-OCRv2" ]]; then + inference_model_url=https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/${model}.tar + elif [[ $model =~ "v2.0" ]]; then + inference_model_url=https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/${model}.tar + else + echo "Model is wrong, please check." + exit 3 + fi + inference_model=${inference_model_url##*/} + wget -nc -P ${model_path} ${inference_model_url} + cd ${model_path} && tar -xf ${inference_model} && cd ../ + model_dir=${model_path}/${inference_model%.*} + model_file=${model_dir}/inference.pdmodel + param_file=${model_dir}/inference.pdiparams + paddle_lite_opt --model_dir=${model_dir} --model_file=${model_file} --param_file=${param_file} --valid_targets=${valid_targets} --optimize_out=${model_dir}_opt +done + +# prepare test data +data_url=https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/icdar2015_lite.tar +data_file=${data_url##*/} +wget -nc -P ./test_data ${data_url} +cd ./test_data && tar -xf ${data_file} && rm ${data_file} && cd ../ + +# prepare paddlelite predict library +if [[ ${paddlelite_library_source} = "download" ]]; then + paddlelite_library_zipfile=$(echo $paddlelite_library_url | awk -F "/" '{print $NF}') + paddlelite_library_file=${paddlelite_library_zipfile:0:${end_index}} + wget ${paddlelite_library_url} && tar -xf ${paddlelite_library_zipfile} + cd ${paddlelite_library_zipfile} +elif [[ ${paddlelite_library_source} = "compile" ]]; then + git clone -b release/v2.10 https://github.com/PaddlePaddle/Paddle-Lite.git + cd Paddle-Lite + ./lite/tools/build_android.sh --arch=armv8 --with_cv=ON --with_extra=ON --toolchain=clang --with_opencl=${compile_with_opencl} + cd ../ + cp -r Paddle-Lite/build.lite.android.armv8.clang/inference_lite_lib.android.armv8/ . + paddlelite_library_file=inference_lite_lib.android.armv8 +else + echo "paddlelite_library_source only support 'download' and 'compile'" + exit 3 +fi + +# organize the required files +mkdir -p ${paddlelite_library_file}/demo/cxx/ocr/test_lite +cp -r ${model_path}/*_opt.nb test_data ${paddlelite_library_file}/demo/cxx/ocr/test_lite +cp ppocr/utils/ppocr_keys_v1.txt deploy/lite/config.txt ${paddlelite_library_file}/demo/cxx/ocr/test_lite +cp -r ./deploy/lite/* ${paddlelite_library_file}/demo/cxx/ocr/ +cp ${paddlelite_library_file}/cxx/lib/libpaddle_light_api_shared.so ${paddlelite_library_file}/demo/cxx/ocr/test_lite +cp ${FILENAME} test_tipc/test_lite_arm_cpp.sh test_tipc/common_func.sh ${paddlelite_library_file}/demo/cxx/ocr/test_lite +cd ${paddlelite_library_file}/demo/cxx/ocr/ +git clone https://github.com/cuicheng01/AutoLog.git + +# compile and do some postprocess +make -j +sleep 1 +make -j +cp ocr_db_crnn test_lite && cp test_lite/libpaddle_light_api_shared.so test_lite/libc++_shared.so +tar -cf test_lite.tar ./test_lite && cp test_lite.tar ${current_dir} && cd ${current_dir} +rm -rf ${paddlelite_library_file}* && rm -rf ${model_path} diff --git a/test_tipc/readme.md b/test_tipc/readme.md index 9ab6f333d9dd74052dfd6eb71b60a8f257abd72d..8b2489f3445ddfa87c1e587d6da81992fdb90e64 100644 --- a/test_tipc/readme.md +++ b/test_tipc/readme.md @@ -1,9 +1,9 @@ -# 飞桨训推一体认证 +# 飞桨训推一体全流程(TIPC) ## 1. 简介 -飞桨除了基本的模型训练和预测,还提供了支持多端多平台的高性能推理部署工具。本文档提供了PaddleOCR中所有模型的飞桨训推一体认证 (Training and Inference Pipeline Certification(TIPC)) 信息和测试工具,方便用户查阅每种模型的训练推理部署打通情况,并可以进行一键测试。 +飞桨除了基本的模型训练和预测,还提供了支持多端多平台的高性能推理部署工具。本文档提供了PaddleOCR中所有模型的飞桨训推一体全流程(Training and Inference Pipeline Criterion(TIPC))信息和测试工具,方便用户查阅每种模型的训练推理部署打通情况,并可以进行一键测试。
@@ -23,14 +23,17 @@ | 算法论文 | 模型名称 | 模型类型 | 基础
训练预测 | 更多
训练方式 | 模型压缩 | 其他预测部署 | | :--- | :--- | :----: | :--------: | :---- | :---- | :---- | -| DB |ch_ppocr_mobile_v2.0_det | 检测 | 支持 | 多机多卡
混合精度 | FPGM裁剪
离线量化| Paddle Inference: C++
Paddle Serving: Python, C++
Paddle-Lite:
(1) ARM CPU(C++) | -| DB |ch_ppocr_server_v2.0_det | 检测 | 支持 | 多机多卡
混合精度 | FPGM裁剪
离线量化| Paddle Inference: C++
Paddle Serving: Python, C++
Paddle-Lite:
(1) ARM CPU(C++) | +| DB |ch_ppocr_mobile_v2.0_det | 检测 | 支持 | 多机多卡
混合精度 | - | Paddle Inference: C++
Paddle Serving: Python, C++
Paddle-Lite:
(1) ARM CPU(C++) | +| DB |ch_ppocr_mobile_v2.0_det_FPGM | 检测 | 支持 | 多机多卡
混合精度 | FPGM裁剪 | Paddle Inference: C++
Paddle Serving: Python, C++
Paddle-Lite:
(1) ARM CPU(C++) | +| DB |ch_ppocr_mobile_v2.0_det_PACT | 检测 | 支持 | 多机多卡
混合精度 | PACT量化 | Paddle Inference: C++
Paddle Serving: Python, C++
Paddle-Lite:
(1) ARM CPU(C++) | +| DB |ch_ppocr_mobile_v2.0_det_KL | 检测 | 支持 | 多机多卡
混合精度 | 离线量化| Paddle Inference: C++
Paddle Serving: Python, C++
Paddle-Lite:
(1) ARM CPU(C++) | +| DB |ch_ppocr_server_v2.0_det | 检测 | 支持 | 多机多卡
混合精度 | - | Paddle Inference: C++
Paddle Serving: Python, C++ | | DB |ch_PP-OCRv2_det | 检测 | -| CRNN |ch_ppocr_mobile_v2.0_rec | 识别 | 支持 | 多机多卡
混合精度 | PACT量化
离线量化| Paddle Inference: C++
Paddle Serving: Python, C++
Paddle-Lite:
(1) ARM CPU(C++) | -| CRNN |ch_ppocr_server_v2.0_rec | 识别 | 支持 | 多机多卡
混合精度 | PACT量化
离线量化| Paddle Inference: C++
Paddle Serving: Python, C++
Paddle-Lite:
(1) ARM CPU(C++) | +| CRNN |ch_ppocr_mobile_v2.0_rec | 识别 | 支持 | 多机多卡
混合精度 | - | Paddle Inference: C++
Paddle Serving: Python, C++
Paddle-Lite:
(1) ARM CPU(C++) | +| CRNN |ch_ppocr_server_v2.0_rec | 识别 | 支持 | 多机多卡
混合精度 | - | Paddle Inference: C++
Paddle Serving: Python, C++ | | CRNN |ch_PP-OCRv2_rec | 识别 | | PP-OCR |ch_ppocr_mobile_v2.0 | 检测+识别 | 支持 | 多机多卡
混合精度 | - | Paddle Inference: C++
Paddle Serving: Python, C++
Paddle-Lite:
(1) ARM CPU(C++) | -| PP-OCR |ch_ppocr_server_v2.0 | 检测+识别 | 支持 | 多机多卡
混合精度 | - | Paddle Inference: C++
Paddle Serving: Python, C++
Paddle-Lite:
(1) ARM CPU(C++) | +| PP-OCR |ch_ppocr_server_v2.0 | 检测+识别 | 支持 | 多机多卡
混合精度 | - | Paddle Inference: C++
Paddle Serving: Python, C++ | |PP-OCRv2|ch_PP-OCRv2 | 检测+识别 | | DB |det_mv3_db_v2.0 | 检测 | | DB |det_r50_vd_db_v2.0 | 检测 | @@ -54,32 +57,31 @@ -## 3. 一键测试工具使用 +## 3. 测试工具简介 ### 目录介绍 ```shell test_tipc/ ├── configs/ # 配置文件目录 - ├── ppocr_det_mobile # ppocr_det_mobile模型的测试配置文件目录 - ├── det_mv3_db.yml # 测试mobile版ppocr检测模型训练的yml文件 - ├── train_infer_python.txt.txt # 测试Linux上python训练预测(基础训练预测)的配置文件 + ├── ch_ppocr_mobile_v2.0_det # ch_ppocr_mobile_v2.0_det模型的测试配置文件目录 + ├── train_infer_python.txt # 测试Linux上python训练预测(基础训练预测)的配置文件 ├── model_linux_gpu_normal_normal_infer_cpp_linux_gpu_cpu.txt # 测试Linux上c++预测的配置文件 ├── model_linux_gpu_normal_normal_infer_python_jetson.txt # 测试Jetson上python预测的配置文件 ├── train_linux_gpu_fleet_amp_infer_python_linux_gpu_cpu.txt # 测试Linux上多机多卡、混合精度训练和python预测的配置文件 ├── ... - ├── ppocr_det_server # ppocr_det_server模型的测试配置文件目录 + ├── ch_ppocr_server_v2.0_det # ch_ppocr_server_v2.0_det模型的测试配置文件目录 ├── ... - ├── ppocr_rec_mobile # ppocr_rec_mobile模型的测试配置文件目录 + ├── ch_ppocr_mobile_v2.0_rec # ch_ppocr_mobile_v2.0_rec模型的测试配置文件目录 ├── ... - ├── ppocr_rec_server # ppocr_rec_server模型的测试配置文件目录 + ├── ch_ppocr_server_v2.0_det # ch_ppocr_server_v2.0_det模型的测试配置文件目录 ├── ... ├── ... ├── results/ # 预先保存的预测结果,用于和实际预测结果进行精读比对 - ├── python_ppocr_det_mobile_results_fp32.txt # 预存的mobile版ppocr检测模型python预测fp32精度的结果 - ├── python_ppocr_det_mobile_results_fp16.txt # 预存的mobile版ppocr检测模型python预测fp16精度的结果 - ├── cpp_ppocr_det_mobile_results_fp32.txt # 预存的mobile版ppocr检测模型c++预测的fp32精度的结果 - ├── cpp_ppocr_det_mobile_results_fp16.txt # 预存的mobile版ppocr检测模型c++预测的fp16精度的结果 - ├── ... + ├── python_ppocr_det_mobile_results_fp32.txt # 预存的mobile版ppocr检测模型python预测fp32精度的结果 + ├── python_ppocr_det_mobile_results_fp16.txt # 预存的mobile版ppocr检测模型python预测fp16精度的结果 + ├── cpp_ppocr_det_mobile_results_fp32.txt # 预存的mobile版ppocr检测模型c++预测的fp32精度的结果 + ├── cpp_ppocr_det_mobile_results_fp16.txt # 预存的mobile版ppocr检测模型c++预测的fp16精度的结果 + ├── ... ├── prepare.sh # 完成test_*.sh运行所需要的数据和模型下载 ├── test_train_inference_python.sh # 测试python训练预测的主程序 ├── test_inference_cpp.sh # 测试c++预测的主程序 @@ -89,19 +91,9 @@ test_tipc/ └── readme.md # 使用文档 ``` -### 配置文件命名规范 -在`configs`目录下,按模型名称划分为子目录,子目录中存放所有该模型测试需要用到的配置文件,配置文件的命名遵循如下规范: - -1. 基础训练预测配置简单命名为:`train_infer_python.txt`,表示**Linux环境下单机、不使用混合精度训练+python预测**,其完整命名对应`train_linux_gpu_fleet_amp_infer_python_linux_gpu_cpu.txt`,由于本配置文件使用频率较高,这里进行了名称简化。 - -2. 其他带训练配置命名格式为:`train_训练硬件环境(linux_gpu/linux_dcu/…)_是否多机(fleet/normal)_是否混合精度(amp/normal)_预测模式(infer/lite/serving/js)_语言(cpp/python/java)_预测硬件环境(linux_gpu/mac/jetson/opencl_arm_gpu/...).txt`。如,linux gpu下多机多卡+混合精度链条测试对应配置 `train_linux_gpu_fleet_amp_infer_python_linux_gpu_cpu.txt`,linux dcu下基础训练预测对应配置 `train_linux_dcu_normal_normal_infer_python_dcu.txt`。 - -3. 仅预测的配置(如serving、lite等)命名格式:`model_训练硬件环境(linux_gpu/linux_dcu/…)_是否多机(fleet/normal)_是否混合精度(amp/normal)_(infer/lite/serving/js)_语言(cpp/python/java)_预测硬件环境(linux_gpu/mac/jetson/opencl_arm_gpu/...).txt`,即,与2相比,仅第一个字段从train换为model,测试时模型直接下载获取,这里的“训练硬件环境”表示所测试的模型是在哪种环境下训练得到的。 +### 测试流程概述 -根据上述命名规范,可以直接从配置文件名看出对应的测试场景和功能。 - -### 测试流程 -使用本工具,可以测试不同功能的支持情况,以及预测结果是否对齐,测试流程如下: +使用本工具,可以测试不同功能的支持情况,以及预测结果是否对齐,测试流程概括如下:
@@ -110,18 +102,43 @@ test_tipc/ 2. 运行要测试的功能对应的测试脚本`test_*.sh`,产出log,由log可以看到不同配置是否运行成功; 3. 用`compare_results.py`对比log中的预测结果和预存在results目录下的结果,判断预测精度是否符合预期(在误差范围内)。 -其中,有4个测试主程序,功能如下: -- `test_train_inference_python.sh`:测试基于Python的模型训练、评估、推理等基本功能,包括裁剪、量化、蒸馏。 -- `test_inference_cpp.sh`:测试基于C++的模型推理。 -- `test_serving.sh`:测试基于Paddle Serving的服务化部署功能。 -- `test_lite_arm_cpu_cpp.sh`:测试基于Paddle-Lite的ARM CPU端c++预测部署功能。 -- `test_paddle2onnx.sh`:测试Paddle2ONNX的模型转化功能,并验证正确性。 +测试单项功能仅需两行命令,**如需测试不同模型/功能,替换配置文件即可**,命令格式如下: +```shell +# 功能:准备数据 +# 格式:bash + 运行脚本 + 参数1: 配置文件选择 + 参数2: 模式选择 +bash test_tipc/prepare.sh configs/[model_name]/[params_file_name] [Mode] + +# 功能:运行测试 +# 格式:bash + 运行脚本 + 参数1: 配置文件选择 + 参数2: 模式选择 +bash test_tipc/test_train_inference_python.sh configs/[model_name]/[params_file_name] [Mode] +``` + +例如,测试基本训练预测功能的`lite_train_lite_infer`模式,运行: +```shell +# 准备数据 +bash test_tipc/prepare.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_infer_python.txt 'lite_train_lite_infer' +# 运行测试 +bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_infer_python.txt 'lite_train_lite_infer' +``` +关于本示例命令的更多信息可查看[基础训练预测使用文档](https://github.com/PaddlePaddle/PaddleOCR/blob/dygraph/test_tipc/docs/test_train_inference_python.md#22-%E5%8A%9F%E8%83%BD%E6%B5%8B%E8%AF%95)。 + +### 配置文件命名规范 +在`configs`目录下,**按模型名称划分为子目录**,子目录中存放所有该模型测试需要用到的配置文件,配置文件的命名遵循如下规范: + +1. 基础训练预测配置简单命名为:`train_infer_python.txt`,表示**Linux环境下单机、不使用混合精度训练+python预测**,其完整命名对应`train_linux_gpu_normal_normal_infer_python_linux_gpu_cpu.txt`,由于本配置文件使用频率较高,这里进行了名称简化。 + +2. 其他带训练配置命名格式为:`train_训练硬件环境(linux_gpu/linux_dcu/…)_是否多机(fleet/normal)_是否混合精度(amp/normal)_预测模式(infer/lite/serving/js)_语言(cpp/python/java)_预测硬件环境(linux_gpu/mac/jetson/opencl_arm_gpu/...).txt`。如,linux gpu下多机多卡+混合精度链条测试对应配置 `train_linux_gpu_fleet_amp_infer_python_linux_gpu_cpu.txt`,linux dcu下基础训练预测对应配置 `train_linux_dcu_normal_normal_infer_python_linux_dcu.txt`。 + +3. 仅预测的配置(如serving、lite等)命名格式:`model_训练硬件环境(linux_gpu/linux_dcu/…)_是否多机(fleet/normal)_是否混合精度(amp/normal)_(infer/lite/serving/js)_语言(cpp/python/java)_预测硬件环境(linux_gpu/mac/jetson/opencl_arm_gpu/...).txt`,即,与2相比,仅第一个字段从train换为model,测试时模型直接下载获取,这里的“训练硬件环境”表示所测试的模型是在哪种环境下训练得到的。 + +**根据上述命名规范,可以直接从子目录名称和配置文件名找到需要测试的场景和功能对应的配置文件。** + -#### 更多教程 +## 4. 开始测试 各功能测试中涉及混合精度、裁剪、量化等训练相关,及mkldnn、Tensorrt等多种预测相关参数配置,请点击下方相应链接了解更多细节和使用教程: -[test_train_inference_python 使用](docs/test_train_inference_python.md) -[test_inference_cpp 使用](docs/test_inference_cpp.md) -[test_serving 使用](docs/test_serving.md) -[test_lite_arm_cpu_cpp 使用](docs/test_lite_arm_cpu_cpp.md) -[test_paddle2onnx 使用](docs/test_paddle2onnx.md) +- [test_train_inference_python 使用](docs/test_train_inference_python.md) :测试基于Python的模型训练、评估、推理等基本功能,包括裁剪、量化、蒸馏。 +- [test_inference_cpp 使用](docs/test_inference_cpp.md):测试基于C++的模型推理。 +- [test_serving 使用](docs/test_serving.md):测试基于Paddle Serving的服务化部署功能。 +- [test_lite_arm_cpu_cpp 使用](docs/test_lite_arm_cpu_cpp.md):测试基于Paddle-Lite的ARM CPU端c++预测部署功能。 +- [test_paddle2onnx 使用](docs/test_paddle2onnx.md):测试Paddle2ONNX的模型转化功能,并验证正确性。 diff --git a/test_tipc/test_inference_jeston.sh b/test_tipc/test_inference_jeston.sh deleted file mode 100644 index 2fd76e1e9e7e8c7b52d0b6838cd15840a59fe5c4..0000000000000000000000000000000000000000 --- a/test_tipc/test_inference_jeston.sh +++ /dev/null @@ -1,87 +0,0 @@ -#!/bin/bash -source test_tipc/common_func.sh -source test_tipc/test_train_inference_python.sh - -FILENAME=$1 -# MODE be one of ['whole_infer'] -MODE=$2 - -dataline=$(awk 'NR==1, NR==17{print}' $FILENAME) - -# parser params -IFS=$'\n' -lines=(${dataline}) - -model_name=$(func_parser_value "${lines[1]}") -python=$(func_parser_value "${lines[2]}") - -infer_model_dir_list=$(func_parser_value "${lines[3]}") -infer_export_list=$(func_parser_value "${lines[4]}") -infer_is_quant=$(func_parser_value "${lines[5]}") -# parser inference -inference_py=$(func_parser_value "${lines[6]}") -use_gpu_key=$(func_parser_key "${lines[7]}") -use_gpu_list=$(func_parser_value "${lines[7]}") -use_mkldnn_key=$(func_parser_key "${lines[8]}") -use_mkldnn_list=$(func_parser_value "${lines[8]}") -cpu_threads_key=$(func_parser_key "${lines[9]}") -cpu_threads_list=$(func_parser_value "${lines[9]}") -batch_size_key=$(func_parser_key "${lines[10]}") -batch_size_list=$(func_parser_value "${lines[10]}") -use_trt_key=$(func_parser_key "${lines[11]}") -use_trt_list=$(func_parser_value "${lines[11]}") -precision_key=$(func_parser_key "${lines[12]}") -precision_list=$(func_parser_value "${lines[12]}") -infer_model_key=$(func_parser_key "${lines[13]}") -image_dir_key=$(func_parser_key "${lines[14]}") -infer_img_dir=$(func_parser_value "${lines[14]}") -save_log_key=$(func_parser_key "${lines[15]}") -benchmark_key=$(func_parser_key "${lines[16]}") -benchmark_value=$(func_parser_value "${lines[16]}") -infer_key1=$(func_parser_key "${lines[17]}") -infer_value1=$(func_parser_value "${lines[17]}") - - -LOG_PATH="./test_tipc/output" -mkdir -p ${LOG_PATH} -status_log="${LOG_PATH}/results_python.log" - - -if [ ${MODE} = "whole_infer" ]; then - GPUID=$3 - if [ ${#GPUID} -le 0 ];then - env=" " - else - env="export CUDA_VISIBLE_DEVICES=${GPUID}" - fi - # set CUDA_VISIBLE_DEVICES - eval $env - export Count=0 - IFS="|" - infer_run_exports=(${infer_export_list}) - infer_quant_flag=(${infer_is_quant}) - for infer_model in ${infer_model_dir_list[*]}; do - # run export - if [ ${infer_run_exports[Count]} != "null" ];then - save_infer_dir=$(dirname $infer_model) - set_export_weight=$(func_set_params "${export_weight}" "${infer_model}") - set_save_infer_key=$(func_set_params "${save_infer_key}" "${save_infer_dir}") - export_cmd="${python} ${infer_run_exports[Count]} ${set_export_weight} ${set_save_infer_key}" - echo ${infer_run_exports[Count]} - echo $export_cmd - eval $export_cmd - status_export=$? - status_check $status_export "${export_cmd}" "${status_log}" - else - save_infer_dir=${infer_model} - fi - #run inference - is_quant=${infer_quant_flag[Count]} - if [ ${MODE} = "klquant_infer" ]; then - is_quant="True" - fi - func_inference "${python}" "${inference_py}" "${save_infer_dir}" "${LOG_PATH}" "${infer_img_dir}" ${is_quant} - Count=$(($Count + 1)) - done -fi - diff --git a/test_tipc/test_inference_python.sh b/test_tipc/test_inference_python.sh new file mode 100644 index 0000000000000000000000000000000000000000..72516e044ed8a23c660a4c4f486d19f22a584fb0 --- /dev/null +++ b/test_tipc/test_inference_python.sh @@ -0,0 +1,169 @@ +#!/bin/bash +source test_tipc/common_func.sh +#source test_tipc/test_train_inference_python.sh + +FILENAME=$1 +# MODE be one of ['whole_infer'] +MODE=$2 + +dataline=$(awk 'NR==1, NR==20{print}' $FILENAME) + +# parser params +IFS=$'\n' +lines=(${dataline}) + +model_name=$(func_parser_value "${lines[1]}") +python=$(func_parser_value "${lines[2]}") + +infer_model_dir_list=$(func_parser_value "${lines[3]}") +infer_export_list=$(func_parser_value "${lines[4]}") +infer_is_quant=$(func_parser_value "${lines[5]}") +# parser inference +inference_py=$(func_parser_value "${lines[6]}") +use_gpu_key=$(func_parser_key "${lines[7]}") +use_gpu_list=$(func_parser_value "${lines[7]}") +use_mkldnn_key=$(func_parser_key "${lines[8]}") +use_mkldnn_list=$(func_parser_value "${lines[8]}") +cpu_threads_key=$(func_parser_key "${lines[9]}") +cpu_threads_list=$(func_parser_value "${lines[9]}") +batch_size_key=$(func_parser_key "${lines[10]}") +batch_size_list=$(func_parser_value "${lines[10]}") +use_trt_key=$(func_parser_key "${lines[11]}") +use_trt_list=$(func_parser_value "${lines[11]}") +precision_key=$(func_parser_key "${lines[12]}") +precision_list=$(func_parser_value "${lines[12]}") +infer_model_key=$(func_parser_key "${lines[13]}") +image_dir_key=$(func_parser_key "${lines[14]}") +infer_img_dir=$(func_parser_value "${lines[14]}") +rec_model_key=$(func_parser_key "${lines[15]}") +rec_model_value=$(func_parser_value "${lines[15]}") +benchmark_key=$(func_parser_key "${lines[16]}") +benchmark_value=$(func_parser_value "${lines[16]}") +infer_key1=$(func_parser_key "${lines[17]}") +infer_value1=$(func_parser_value "${lines[17]}") + + + +LOG_PATH="./test_tipc/output" +mkdir -p ${LOG_PATH} +status_log="${LOG_PATH}/results_python.log" + + +function func_inference(){ + IFS='|' + _python=$1 + _script=$2 + _model_dir=$3 + _log_path=$4 + _img_dir=$5 + _flag_quant=$6 + # inference + for use_gpu in ${use_gpu_list[*]}; do + if [ ${use_gpu} = "False" ] || [ ${use_gpu} = "cpu" ]; then + for use_mkldnn in ${use_mkldnn_list[*]}; do + if [ ${use_mkldnn} = "False" ] && [ ${_flag_quant} = "True" ]; then + continue + fi + for threads in ${cpu_threads_list[*]}; do + for batch_size in ${batch_size_list[*]}; do + for precision in ${precision_list[*]}; do + if [ ${use_mkldnn} = "False" ] && [ ${precision} = "fp16" ]; then + continue + fi # skip when enable fp16 but disable mkldnn + if [ ${_flag_quant} = "True" ] && [ ${precision} != "int8" ]; then + continue + fi # skip when quant model inference but precision is not int8 + set_precision=$(func_set_params "${precision_key}" "${precision}") + + _save_log_path="${_log_path}/python_infer_cpu_usemkldnn_${use_mkldnn}_threads_${threads}_precision_${precision}_batchsize_${batch_size}.log" + set_infer_data=$(func_set_params "${image_dir_key}" "${_img_dir}") + set_benchmark=$(func_set_params "${benchmark_key}" "${benchmark_value}") + set_batchsize=$(func_set_params "${batch_size_key}" "${batch_size}") + set_cpu_threads=$(func_set_params "${cpu_threads_key}" "${threads}") + set_model_dir=$(func_set_params "${infer_model_key}" "${_model_dir}") + set_infer_params0=$(func_set_params "${rec_model_key}" "${rec_model_value}") + set_infer_params1=$(func_set_params "${infer_key1}" "${infer_value1}") + command="${_python} ${_script} ${use_gpu_key}=${use_gpu} ${use_mkldnn_key}=${use_mkldnn} ${set_cpu_threads} ${set_model_dir} ${set_batchsize} ${set_infer_params0} ${set_infer_data} ${set_benchmark} ${set_precision} ${set_infer_params1} > ${_save_log_path} 2>&1 " + eval $command + last_status=${PIPESTATUS[0]} + eval "cat ${_save_log_path}" + status_check $last_status "${command}" "${status_log}" + done + done + done + done + elif [ ${use_gpu} = "True" ] || [ ${use_gpu} = "gpu" ]; then + for use_trt in ${use_trt_list[*]}; do + for precision in ${precision_list[*]}; do + if [[ ${_flag_quant} = "False" ]] && [[ ${precision} =~ "int8" ]]; then + continue + fi + if [[ ${precision} =~ "fp16" || ${precision} =~ "int8" ]] && [ ${use_trt} = "False" ]; then + continue + fi + if [[ ${use_trt} = "False" || ${precision} =~ "int8" ]] && [ ${_flag_quant} = "True" ]; then + continue + fi + for batch_size in ${batch_size_list[*]}; do + _save_log_path="${_log_path}/python_infer_gpu_usetrt_${use_trt}_precision_${precision}_batchsize_${batch_size}.log" + set_infer_data=$(func_set_params "${image_dir_key}" "${_img_dir}") + set_benchmark=$(func_set_params "${benchmark_key}" "${benchmark_value}") + set_batchsize=$(func_set_params "${batch_size_key}" "${batch_size}") + set_tensorrt=$(func_set_params "${use_trt_key}" "${use_trt}") + set_precision=$(func_set_params "${precision_key}" "${precision}") + set_model_dir=$(func_set_params "${infer_model_key}" "${_model_dir}") + set_infer_params0=$(func_set_params "${save_log_key}" "${save_log_value}") + set_infer_params1=$(func_set_params "${infer_key1}" "${infer_value1}") + command="${_python} ${_script} ${use_gpu_key}=${use_gpu} ${set_tensorrt} ${set_precision} ${set_model_dir} ${set_batchsize} ${set_infer_data} ${set_benchmark} ${set_infer_params1} ${set_infer_params0} > ${_save_log_path} 2>&1 " + eval $command + last_status=${PIPESTATUS[0]} + eval "cat ${_save_log_path}" + status_check $last_status "${command}" "${status_log}" + + done + done + done + else + echo "Does not support hardware other than CPU and GPU Currently!" + fi + done +} + +if [ ${MODE} = "whole_infer" ]; then + GPUID=$3 + if [ ${#GPUID} -le 0 ];then + env=" " + else + env="export CUDA_VISIBLE_DEVICES=${GPUID}" + fi + # set CUDA_VISIBLE_DEVICES + eval $env + export Count=0 + IFS="|" + infer_run_exports=(${infer_export_list}) + infer_quant_flag=(${infer_is_quant}) + for infer_model in ${infer_model_dir_list[*]}; do + # run export + if [ ${infer_run_exports[Count]} != "null" ];then + save_infer_dir=$(dirname $infer_model) + set_export_weight=$(func_set_params "${export_weight}" "${infer_model}") + set_save_infer_key=$(func_set_params "${save_infer_key}" "${save_infer_dir}") + export_cmd="${python} ${infer_run_exports[Count]} ${set_export_weight} ${set_save_infer_key}" + echo ${infer_run_exports[Count]} + eval $export_cmd + status_export=$? + status_check $status_export "${export_cmd}" "${status_log}" + else + save_infer_dir=${infer_model} + fi + #run inference + is_quant=${infer_quant_flag[Count]} + if [ ${MODE} = "klquant_infer" ]; then + is_quant="True" + fi + func_inference "${python}" "${inference_py}" "${save_infer_dir}" "${LOG_PATH}" "${infer_img_dir}" ${is_quant} + Count=$(($Count + 1)) + done +fi + + diff --git a/test_tipc/test_lite_arm_cpp.sh b/test_tipc/test_lite_arm_cpp.sh new file mode 100644 index 0000000000000000000000000000000000000000..c071a236bb7ea35b86b32bfc3b22e87a5aabbb93 --- /dev/null +++ b/test_tipc/test_lite_arm_cpp.sh @@ -0,0 +1,159 @@ +#!/bin/bash +source ./common_func.sh +export LD_LIBRARY_PATH=${PWD}:$LD_LIBRARY_PATH + +FILENAME=$1 +dataline=$(cat $FILENAME) +# parser params +IFS=$'\n' +lines=(${dataline}) + +# parser lite inference +inference_cmd=$(func_parser_value "${lines[1]}") +runtime_device=$(func_parser_value "${lines[2]}") +det_model_list=$(func_parser_value "${lines[3]}") +rec_model_list=$(func_parser_value "${lines[4]}") +cls_model_list=$(func_parser_value "${lines[5]}") +cpu_threads_list=$(func_parser_value "${lines[6]}") +det_batch_size_list=$(func_parser_value "${lines[7]}") +rec_batch_size_list=$(func_parser_value "${lines[8]}") +infer_img_dir_list=$(func_parser_value "${lines[9]}") +config_dir=$(func_parser_value "${lines[10]}") +rec_dict_dir=$(func_parser_value "${lines[11]}") +benchmark_value=$(func_parser_value "${lines[12]}") + +if [[ $inference_cmd =~ "det" ]]; then + lite_model_list=${det_lite_model_list} +elif [[ $inference_cmd =~ "rec" ]]; then + lite_model_list=(${rec_lite_model_list[*]} ${cls_lite_model_list[*]}) +elif [[ $inference_cmd =~ "system" ]]; then + lite_model_list=(${det_lite_model_list[*]} ${rec_lite_model_list[*]} ${cls_lite_model_list[*]}) +else + echo "inference_cmd is wrong, please check." + exit 1 +fi + +LOG_PATH="./output" +mkdir -p ${LOG_PATH} +status_log="${LOG_PATH}/results.log" + + +function func_test_det(){ + IFS='|' + _script=$1 + _det_model=$2 + _log_path=$3 + _img_dir=$4 + _config=$5 + if [[ $_det_model =~ "slim" ]]; then + precision="INT8" + else + precision="FP32" + fi + + # lite inference + for num_threads in ${cpu_threads_list[*]}; do + for det_batchsize in ${det_batch_size_list[*]}; do + _save_log_path="${_log_path}/lite_${_det_model}_runtime_device_${runtime_device}_precision_${precision}_det_batchsize_${det_batchsize}_threads_${num_threads}.log" + command="${_script} ${_det_model} ${runtime_device} ${precision} ${num_threads} ${det_batchsize} ${_img_dir} ${_config} ${benchmark_value} > ${_save_log_path} 2>&1" + eval ${command} + status_check $? "${command}" "${status_log}" + done + done +} + +function func_test_rec(){ + IFS='|' + _script=$1 + _rec_model=$2 + _cls_model=$3 + _log_path=$4 + _img_dir=$5 + _config=$6 + _rec_dict_dir=$7 + + if [[ $_det_model =~ "slim" ]]; then + _precision="INT8" + else + _precision="FP32" + fi + + # lite inference + for num_threads in ${cpu_threads_list[*]}; do + for rec_batchsize in ${rec_batch_size_list[*]}; do + _save_log_path="${_log_path}/lite_${_rec_model}_${cls_model}_runtime_device_${runtime_device}_precision_${_precision}_rec_batchsize_${rec_batchsize}_threads_${num_threads}.log" + command="${_script} ${_rec_model} ${_cls_model} ${runtime_device} ${_precision} ${num_threads} ${rec_batchsize} ${_img_dir} ${_config} ${_rec_dict_dir} ${benchmark_value} > ${_save_log_path} 2>&1" + eval ${command} + status_check $? "${command}" "${status_log}" + done + done +} + +function func_test_system(){ + IFS='|' + _script=$1 + _det_model=$2 + _rec_model=$3 + _cls_model=$4 + _log_path=$5 + _img_dir=$6 + _config=$7 + _rec_dict_dir=$8 + if [[ $_det_model =~ "slim" ]]; then + _precision="INT8" + else + _precision="FP32" + fi + + # lite inference + for num_threads in ${cpu_threads_list[*]}; do + for det_batchsize in ${det_batch_size_list[*]}; do + for rec_batchsize in ${rec_batch_size_list[*]}; do + _save_log_path="${_log_path}/lite_${_det_model}_${_rec_model}_${_cls_model}_runtime_device_${runtime_device}_precision_${_precision}_det_batchsize_${det_batchsize}_rec_batchsize_${rec_batchsize}_threads_${num_threads}.log" + command="${_script} ${_det_model} ${_rec_model} ${_cls_model} ${runtime_device} ${_precision} ${num_threads} ${det_batchsize} ${_img_dir} ${_config} ${_rec_dict_dir} ${benchmark_value} > ${_save_log_path} 2>&1" + eval ${command} + status_check $? "${command}" "${status_log}" + done + done + done +} + + +echo "################### run test ###################" + +if [[ $inference_cmd =~ "det" ]]; then + IFS="|" + det_model_list=(${det_model_list[*]}) + + for i in {0..1}; do + #run lite inference + for img_dir in ${infer_img_dir_list[*]}; do + func_test_det "${inference_cmd}" "${det_model_list[i]}_opt.nb" "${LOG_PATH}" "${img_dir}" "${config_dir}" + done + done + +elif [[ $inference_cmd =~ "rec" ]]; then + IFS="|" + rec_model_list=(${rec_model_list[*]}) + cls_model_list=(${cls_model_list[*]}) + + for i in {0..1}; do + #run lite inference + for img_dir in ${infer_img_dir_list[*]}; do + func_test_rec "${inference_cmd}" "${rec_model}_opt.nb" "${cls_model_list[i]}_opt.nb" "${LOG_PATH}" "${img_dir}" "${rec_dict_dir}" "${config_dir}" + done + done + +elif [[ $inference_cmd =~ "system" ]]; then + IFS="|" + det_model_list=(${det_model_list[*]}) + rec_model_list=(${rec_model_list[*]}) + cls_model_list=(${cls_model_list[*]}) + + for i in {0..1}; do + #run lite inference + for img_dir in ${infer_img_dir_list[*]}; do + func_test_system "${inference_cmd}" "${det_model_list[i]}_opt.nb" "${rec_model_list[i]}_opt.nb" "${cls_model_list[i]}_opt.nb" "${LOG_PATH}" "${img_dir}" "${config_dir}" "${rec_dict_dir}" + done + done +fi diff --git a/test_tipc/test_lite_arm_cpu_cpp.sh b/test_tipc/test_lite_arm_cpu_cpp.sh deleted file mode 100644 index 04eebbd28a334f7ac7819f8ff55d7b3192f4b490..0000000000000000000000000000000000000000 --- a/test_tipc/test_lite_arm_cpu_cpp.sh +++ /dev/null @@ -1,60 +0,0 @@ -#!/bin/bash -source ./common_func.sh -export LD_LIBRARY_PATH=${PWD}:$LD_LIBRARY_PATH - -FILENAME=$1 -dataline=$(cat $FILENAME) -# parser params -IFS=$'\n' -lines=(${dataline}) - -# parser lite inference -lite_inference_cmd=$(func_parser_value "${lines[1]}") -lite_model_dir_list=$(func_parser_value "${lines[2]}") -runtime_device=$(func_parser_value "${lines[3]}") -lite_cpu_threads_list=$(func_parser_value "${lines[4]}") -lite_batch_size_list=$(func_parser_value "${lines[5]}") -lite_infer_img_dir_list=$(func_parser_value "${lines[8]}") -lite_config_dir=$(func_parser_value "${lines[9]}") -lite_rec_dict_dir=$(func_parser_value "${lines[10]}") -lite_benchmark_value=$(func_parser_value "${lines[11]}") - - -LOG_PATH="./output" -mkdir -p ${LOG_PATH} -status_log="${LOG_PATH}/results.log" - - -function func_lite(){ - IFS='|' - _script=$1 - _lite_model=$2 - _log_path=$3 - _img_dir=$4 - _config=$5 - if [[ $lite_model =~ "slim" ]]; then - precision="INT8" - else - precision="FP32" - fi - - # lite inference - for num_threads in ${lite_cpu_threads_list[*]}; do - for batchsize in ${lite_batch_size_list[*]}; do - _save_log_path="${_log_path}/lite_${_lite_model}_runtime_device_${runtime_device}_precision_${precision}_batchsize_${batchsize}_threads_${num_threads}.log" - command="${_script} ${_lite_model} ${runtime_device} ${precision} ${num_threads} ${batchsize} ${_img_dir} ${_config} ${lite_benchmark_value} > ${_save_log_path} 2>&1" - eval ${command} - status_check $? "${command}" "${status_log}" - done - done -} - - -echo "################### run test ###################" -IFS="|" -for lite_model in ${lite_model_dir_list[*]}; do - #run lite inference - for img_dir in ${lite_infer_img_dir_list[*]}; do - func_lite "${lite_inference_cmd}" "${lite_model}_opt.nb" "${LOG_PATH}" "${img_dir}" "${lite_config_dir}" - done -done diff --git a/test_tipc/test_serving.sh b/test_tipc/test_serving.sh index c36935a60fecacea672fd932773a8fb0bdcd619b..1318d012d401c4f4e8540a5d0d227ea75f677004 100644 --- a/test_tipc/test_serving.sh +++ b/test_tipc/test_serving.sh @@ -10,7 +10,7 @@ lines=(${dataline}) # parser serving model_name=$(func_parser_value "${lines[1]}") -python=$(func_parser_value "${lines[2]}") +python_list=$(func_parser_value "${lines[2]}") trans_model_py=$(func_parser_value "${lines[3]}") infer_model_dir_key=$(func_parser_key "${lines[4]}") infer_model_dir_value=$(func_parser_value "${lines[4]}") @@ -54,14 +54,15 @@ function func_serving(){ set_serving_server=$(func_set_params "${serving_server_key}" "${serving_server_value}") set_serving_client=$(func_set_params "${serving_client_key}" "${serving_client_value}") set_image_dir=$(func_set_params "${image_dir_key}" "${image_dir_value}") - trans_model_cmd="${python} ${trans_model_py} ${set_dirname} ${set_model_filename} ${set_params_filename} ${set_serving_server} ${set_serving_client}" + python_list=(${python_list}) + trans_model_cmd="${python_list[0]} ${trans_model_py} ${set_dirname} ${set_model_filename} ${set_params_filename} ${set_serving_server} ${set_serving_client}" eval $trans_model_cmd cd ${serving_dir_value} echo $PWD unset https_proxy unset http_proxy - for python in ${python[*]}; do - if [ ${python} = "cpp"]; then + for python in ${python_list[*]}; do + if [ ${python} = "cpp" ]; then for use_gpu in ${web_use_gpu_list[*]}; do if [ ${use_gpu} = "null" ]; then web_service_cpp_cmd="${python} -m paddle_serving_server.serve --model ppocr_det_mobile_2.0_serving/ ppocr_rec_mobile_2.0_serving/ --port 9293" @@ -91,9 +92,6 @@ function func_serving(){ echo ${ues_gpu} if [ ${use_gpu} = "null" ]; then for use_mkldnn in ${web_use_mkldnn_list[*]}; do - if [ ${use_mkldnn} = "False" ]; then - continue - fi for threads in ${web_cpu_threads_list[*]}; do set_cpu_threads=$(func_set_params "${web_cpu_threads_key}" "${threads}") web_service_cmd="${python} ${web_service_py} ${web_use_gpu_key}=${use_gpu} ${web_use_mkldnn_key}=${use_mkldnn} ${set_cpu_threads} &" @@ -124,6 +122,9 @@ function func_serving(){ continue fi set_tensorrt=$(func_set_params "${web_use_trt_key}" "${use_trt}") + if [ ${use_trt} = True ]; then + device_type=2 + fi set_precision=$(func_set_params "${web_precision_key}" "${precision}") web_service_cmd="${python} ${web_service_py} ${web_use_gpu_key}=${use_gpu} ${set_tensorrt} ${set_precision} & " eval $web_service_cmd diff --git a/test_tipc/test_train_inference_python.sh b/test_tipc/test_train_inference_python.sh index c62b6274f8dcbc84d08900c5d228d78fd3c0de1a..0b0a4e4a75f5e978f64404b27a5f26594dbd484e 100644 --- a/test_tipc/test_train_inference_python.sh +++ b/test_tipc/test_train_inference_python.sh @@ -20,10 +20,10 @@ train_use_gpu_value=$(func_parser_value "${lines[4]}") autocast_list=$(func_parser_value "${lines[5]}") autocast_key=$(func_parser_key "${lines[5]}") epoch_key=$(func_parser_key "${lines[6]}") -epoch_num=$(func_parser_params "${lines[6]}") +epoch_num=$(func_parser_params "${lines[6]}" "${MODE}") save_model_key=$(func_parser_key "${lines[7]}") train_batch_key=$(func_parser_key "${lines[8]}") -train_batch_value=$(func_parser_params "${lines[8]}") +train_batch_value=$(func_parser_params "${lines[8]}" "${MODE}") pretrain_model_key=$(func_parser_key "${lines[9]}") pretrain_model_value=$(func_parser_value "${lines[9]}") train_model_name=$(func_parser_value "${lines[10]}") @@ -90,34 +90,39 @@ infer_value1=$(func_parser_value "${lines[50]}") # parser klquant_infer if [ ${MODE} = "klquant_whole_infer" ]; then - dataline=$(awk 'NR==85 NR==101{print}' $FILENAME) + dataline=$(awk 'NR==1, NR==17{print}' $FILENAME) lines=(${dataline}) + model_name=$(func_parser_value "${lines[1]}") + python=$(func_parser_value "${lines[2]}") + export_weight=$(func_parser_key "${lines[3]}") + save_infer_key=$(func_parser_key "${lines[4]}") # parser inference model - infer_model_dir_list=$(func_parser_value "${lines[1]}") - infer_export_list=$(func_parser_value "${lines[2]}") - infer_is_quant=$(func_parser_value "${lines[3]}") + infer_model_dir_list=$(func_parser_value "${lines[5]}") + infer_export_list=$(func_parser_value "${lines[6]}") + infer_is_quant=$(func_parser_value "${lines[7]}") # parser inference - inference_py=$(func_parser_value "${lines[4]}") - use_gpu_key=$(func_parser_key "${lines[5]}") - use_gpu_list=$(func_parser_value "${lines[5]}") - use_mkldnn_key=$(func_parser_key "${lines[6]}") - use_mkldnn_list=$(func_parser_value "${lines[6]}") - cpu_threads_key=$(func_parser_key "${lines[7]}") - cpu_threads_list=$(func_parser_value "${lines[7]}") - batch_size_key=$(func_parser_key "${lines[8]}") - batch_size_list=$(func_parser_value "${lines[8]}") - use_trt_key=$(func_parser_key "${lines[9]}") - use_trt_list=$(func_parser_value "${lines[9]}") - precision_key=$(func_parser_key "${lines[10]}") - precision_list=$(func_parser_value "${lines[10]}") - infer_model_key=$(func_parser_key "${lines[11]}") - image_dir_key=$(func_parser_key "${lines[12]}") - infer_img_dir=$(func_parser_value "${lines[12]}") - save_log_key=$(func_parser_key "${lines[13]}") - benchmark_key=$(func_parser_key "${lines[14]}") - benchmark_value=$(func_parser_value "${lines[14]}") - infer_key1=$(func_parser_key "${lines[15]}") - infer_value1=$(func_parser_value "${lines[15]}") + inference_py=$(func_parser_value "${lines[8]}") + use_gpu_key=$(func_parser_key "${lines[9]}") + use_gpu_list=$(func_parser_value "${lines[9]}") + use_mkldnn_key=$(func_parser_key "${lines[10]}") + use_mkldnn_list=$(func_parser_value "${lines[10]}") + cpu_threads_key=$(func_parser_key "${lines[11]}") + cpu_threads_list=$(func_parser_value "${lines[11]}") + batch_size_key=$(func_parser_key "${lines[12]}") + batch_size_list=$(func_parser_value "${lines[12]}") + use_trt_key=$(func_parser_key "${lines[13]}") + use_trt_list=$(func_parser_value "${lines[13]}") + precision_key=$(func_parser_key "${lines[14]}") + precision_list=$(func_parser_value "${lines[14]}") + infer_model_key=$(func_parser_key "${lines[15]}") + image_dir_key=$(func_parser_key "${lines[16]}") + infer_img_dir=$(func_parser_value "${lines[16]}") + save_log_key=$(func_parser_key "${lines[17]}") + save_log_value=$(func_parser_value "${lines[17]}") + benchmark_key=$(func_parser_key "${lines[18]}") + benchmark_value=$(func_parser_value "${lines[18]}") + infer_key1=$(func_parser_key "${lines[19]}") + infer_value1=$(func_parser_value "${lines[19]}") fi LOG_PATH="./test_tipc/output" @@ -157,8 +162,9 @@ function func_inference(){ set_batchsize=$(func_set_params "${batch_size_key}" "${batch_size}") set_cpu_threads=$(func_set_params "${cpu_threads_key}" "${threads}") set_model_dir=$(func_set_params "${infer_model_key}" "${_model_dir}") + set_infer_params0=$(func_set_params "${save_log_key}" "${save_log_value}") set_infer_params1=$(func_set_params "${infer_key1}" "${infer_value1}") - command="${_python} ${_script} ${use_gpu_key}=${use_gpu} ${use_mkldnn_key}=${use_mkldnn} ${set_cpu_threads} ${set_model_dir} ${set_batchsize} ${set_infer_data} ${set_benchmark} ${set_precision} ${set_infer_params1} > ${_save_log_path} 2>&1 " + command="${_python} ${_script} ${use_gpu_key}=${use_gpu} ${use_mkldnn_key}=${use_mkldnn} ${set_cpu_threads} ${set_model_dir} ${set_batchsize} ${set_infer_params0} ${set_infer_data} ${set_benchmark} ${set_precision} ${set_infer_params1} > ${_save_log_path} 2>&1 " eval $command last_status=${PIPESTATUS[0]} eval "cat ${_save_log_path}" @@ -187,8 +193,9 @@ function func_inference(){ set_tensorrt=$(func_set_params "${use_trt_key}" "${use_trt}") set_precision=$(func_set_params "${precision_key}" "${precision}") set_model_dir=$(func_set_params "${infer_model_key}" "${_model_dir}") + set_infer_params0=$(func_set_params "${save_log_key}" "${save_log_value}") set_infer_params1=$(func_set_params "${infer_key1}" "${infer_value1}") - command="${_python} ${_script} ${use_gpu_key}=${use_gpu} ${set_tensorrt} ${set_precision} ${set_model_dir} ${set_batchsize} ${set_infer_data} ${set_benchmark} ${set_infer_params1} > ${_save_log_path} 2>&1 " + command="${_python} ${_script} ${use_gpu_key}=${use_gpu} ${set_tensorrt} ${set_precision} ${set_model_dir} ${set_batchsize} ${set_infer_data} ${set_benchmark} ${set_infer_params1} ${set_infer_params0} > ${_save_log_path} 2>&1 " eval $command last_status=${PIPESTATUS[0]} eval "cat ${_save_log_path}" @@ -224,7 +231,7 @@ if [ ${MODE} = "whole_infer" ] || [ ${MODE} = "klquant_whole_infer" ]; then set_save_infer_key=$(func_set_params "${save_infer_key}" "${save_infer_dir}") export_cmd="${python} ${infer_run_exports[Count]} ${set_export_weight} ${set_save_infer_key}" echo ${infer_run_exports[Count]} - echo $export_cmd + echo $export_cmd eval $export_cmd status_export=$? status_check $status_export "${export_cmd}" "${status_log}" @@ -233,7 +240,7 @@ if [ ${MODE} = "whole_infer" ] || [ ${MODE} = "klquant_whole_infer" ]; then fi #run inference is_quant=${infer_quant_flag[Count]} - if [ ${MODE} = "klquant_infer" ]; then + if [ ${MODE} = "klquant_whole_infer" ]; then is_quant="True" fi func_inference "${python}" "${inference_py}" "${save_infer_dir}" "${LOG_PATH}" "${infer_img_dir}" ${is_quant} @@ -334,7 +341,7 @@ else set_eval_pretrain=$(func_set_params "${pretrain_model_key}" "${save_log}/${train_model_name}") # save norm trained models to set pretrain for pact training and fpgm training - if [ ${trainer} = ${trainer_norm} ] && [ ${nodes} -le 1]; then + if [ ${trainer} = ${trainer_norm} ] && [ ${nodes} -le 1 ]; then load_norm_train_model=${set_eval_pretrain} fi # run eval @@ -357,7 +364,7 @@ else #run inference eval $env save_infer_path="${save_log}" - if [ ${inference_dir} != "null" ] && [ ${inference_dir} != '##' ]; then + if [[ ${inference_dir} != "null" ]] && [[ ${inference_dir} != '##' ]]; then infer_model_dir="${save_infer_path}/${inference_dir}" else infer_model_dir=${save_infer_path} diff --git a/tools/infer/predict_e2e.py b/tools/infer/predict_e2e.py index 08b87f36b0670e98e54c3f38c9328fe9462a6d0f..c00d101aa601c05230da39bc19f0e5068bc80aa2 100755 --- a/tools/infer/predict_e2e.py +++ b/tools/infer/predict_e2e.py @@ -68,7 +68,6 @@ class TextE2E(object): postprocess_params["character_dict_path"] = args.e2e_char_dict_path postprocess_params["valid_set"] = args.e2e_pgnet_valid_set postprocess_params["mode"] = args.e2e_pgnet_mode - self.e2e_pgnet_polygon = args.e2e_pgnet_polygon else: logger.info("unknown e2e_algorithm:{}".format(self.e2e_algorithm)) sys.exit(0) diff --git a/tools/infer/predict_rec.py b/tools/infer/predict_rec.py index 41982e3403b11dd4a1893f89af11a9201e0e15d7..7ec8eeadd3ac1232fc28287828a8383b258a2b3d 100755 --- a/tools/infer/predict_rec.py +++ b/tools/infer/predict_rec.py @@ -91,7 +91,7 @@ class TextRecognizer(object): time_keys=[ 'preprocess_time', 'inference_time', 'postprocess_time' ], - warmup=2, + warmup=0, logger=logger) def resize_norm_img(self, img, max_wh_ratio): diff --git a/tools/infer/predict_system.py b/tools/infer/predict_system.py index b5edd01589685a29a37dc20064b0d58e9d776fec..8d674809a5fe22e458fcb0c68419a7313e71d5f6 100755 --- a/tools/infer/predict_system.py +++ b/tools/infer/predict_system.py @@ -49,11 +49,19 @@ class TextSystem(object): if self.use_angle_cls: self.text_classifier = predict_cls.TextClassifier(args) - def print_draw_crop_rec_res(self, img_crop_list, rec_res): + self.args = args + self.crop_image_res_index = 0 + + def draw_crop_rec_res(self, output_dir, img_crop_list, rec_res): + os.makedirs(output_dir, exist_ok=True) bbox_num = len(img_crop_list) for bno in range(bbox_num): - cv2.imwrite("./output/img_crop_%d.jpg" % bno, img_crop_list[bno]) - logger.info(bno, rec_res[bno]) + cv2.imwrite( + os.path.join(output_dir, + f"mg_crop_{bno+self.crop_image_res_index}.jpg"), + img_crop_list[bno]) + logger.debug(f"{bno}, {rec_res[bno]}") + self.crop_image_res_index += bbox_num def __call__(self, img, cls=True): ori_im = img.copy() @@ -80,7 +88,9 @@ class TextSystem(object): rec_res, elapse = self.text_recognizer(img_crop_list) logger.debug("rec_res num : {}, elapse : {}".format( len(rec_res), elapse)) - # self.print_draw_crop_rec_res(img_crop_list, rec_res) + if self.args.save_crop_res: + self.draw_crop_rec_res(self.args.crop_res_save_dir, img_crop_list, + rec_res) filter_boxes, filter_rec_res = [], [] for box, rec_reuslt in zip(dt_boxes, rec_res): text, score = rec_reuslt @@ -135,17 +145,17 @@ def main(args): if not flag: img = cv2.imread(image_file) if img is None: - logger.info("error in loading image:{}".format(image_file)) + logger.debug("error in loading image:{}".format(image_file)) continue starttime = time.time() dt_boxes, rec_res = text_sys(img) elapse = time.time() - starttime total_time += elapse - logger.info( + logger.debug( str(idx) + " Predict time of %s: %.3fs" % (image_file, elapse)) for text, score in rec_res: - logger.info("{}, {:.3f}".format(text, score)) + logger.debug("{}, {:.3f}".format(text, score)) if is_visualize: image = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) @@ -160,19 +170,17 @@ def main(args): scores, drop_score=drop_score, font_path=font_path) - draw_img_save = "./inference_results/" - if not os.path.exists(draw_img_save): - os.makedirs(draw_img_save) + draw_img_save_dir = args.draw_img_save_dir + os.makedirs(draw_img_save_dir, exist_ok=True) if flag: image_file = image_file[:-3] + "png" cv2.imwrite( - os.path.join(draw_img_save, os.path.basename(image_file)), + os.path.join(draw_img_save_dir, os.path.basename(image_file)), draw_img[:, :, ::-1]) - logger.info("The visualized image saved in {}".format( - os.path.join(draw_img_save, os.path.basename(image_file)))) + logger.debug("The visualized image saved in {}".format( + os.path.join(draw_img_save_dir, os.path.basename(image_file)))) logger.info("The predict total time is {}".format(time.time() - _st)) - logger.info("\nThe predict total time is {}".format(total_time)) if args.benchmark: text_sys.text_detector.autolog.report() text_sys.text_recognizer.autolog.report() diff --git a/tools/infer/utility.py b/tools/infer/utility.py old mode 100755 new mode 100644 index cab918419ab5efbc4a8a11d1669ca6b93e45e789..af50c5e6a8cb39faf416dbe7adb516c4db05aef5 --- a/tools/infer/utility.py +++ b/tools/infer/utility.py @@ -96,7 +96,6 @@ def init_args(): parser.add_argument( "--e2e_char_dict_path", type=str, default="./ppocr/utils/ic15_dict.txt") parser.add_argument("--e2e_pgnet_valid_set", type=str, default='totaltext') - parser.add_argument("--e2e_pgnet_polygon", type=str2bool, default=True) parser.add_argument("--e2e_pgnet_mode", type=str, default='fast') # params for text classifier @@ -110,7 +109,13 @@ def init_args(): parser.add_argument("--enable_mkldnn", type=str2bool, default=False) parser.add_argument("--cpu_threads", type=int, default=10) parser.add_argument("--use_pdserving", type=str2bool, default=False) - parser.add_argument("--warmup", type=str2bool, default=True) + parser.add_argument("--warmup", type=str2bool, default=False) + + # + parser.add_argument( + "--draw_img_save_dir", type=str, default="./inference_results") + parser.add_argument("--save_crop_res", type=str2bool, default=False) + parser.add_argument("--crop_res_save_dir", type=str, default="./output") # multi-process parser.add_argument("--use_mp", type=str2bool, default=False) @@ -185,6 +190,7 @@ def create_predictor(args, mode, logger): config.enable_use_gpu(args.gpu_mem, 0) if args.use_tensorrt: config.enable_tensorrt_engine( + workspace_size=1 << 30, precision_mode=precision, max_batch_size=args.max_batch_size, min_subgraph_size=args.min_subgraph_size) @@ -205,7 +211,7 @@ def create_predictor(args, mode, logger): "nearest_interp_v2_0.tmp_0": [1, 256, 2, 2] } max_input_shape = { - "x": [1, 3, 1280, 1280], + "x": [1, 3, 1536, 1536], "conv2d_92.tmp_0": [1, 120, 400, 400], "conv2d_91.tmp_0": [1, 24, 200, 200], "conv2d_59.tmp_0": [1, 96, 400, 400], @@ -255,7 +261,7 @@ def create_predictor(args, mode, logger): opt_input_shape.update(opt_pact_shape) elif mode == "rec": min_input_shape = {"x": [1, 3, 32, 10]} - max_input_shape = {"x": [args.rec_batch_num, 3, 32, 1024]} + max_input_shape = {"x": [args.rec_batch_num, 3, 32, 1536]} opt_input_shape = {"x": [args.rec_batch_num, 3, 32, 320]} elif mode == "cls": min_input_shape = {"x": [1, 3, 48, 10]} @@ -305,11 +311,10 @@ def create_predictor(args, mode, logger): def get_infer_gpuid(): - cmd = "nvidia-smi" - res = os.popen(cmd).readlines() - if len(res) == 0: - return None - cmd = "env | grep CUDA_VISIBLE_DEVICES" + if not paddle.fluid.core.is_compiled_with_rocm(): + cmd = "env | grep CUDA_VISIBLE_DEVICES" + else: + cmd = "env | grep HIP_VISIBLE_DEVICES" env_cuda = os.popen(cmd).readlines() if len(env_cuda) == 0: return 0 diff --git a/tools/infer_det.py b/tools/infer_det.py index bb2cca7362e81494018aa3471664d60bef1b852c..1c679e0faf0d3ebdb6ca7ed4c317ce3eecfa910f 100755 --- a/tools/infer_det.py +++ b/tools/infer_det.py @@ -53,6 +53,7 @@ def draw_det_res(dt_boxes, config, img, img_name, save_path): logger.info("The detected Image saved in {}".format(save_path)) +@paddle.no_grad() def main(): global_config = config['Global']