diff --git a/benchmark/readme.md b/benchmark/readme.md index 7f7704cca5341d495dfbcdc66ddfd29fbea1e1df..d90d21468e7c9d0c9068a273ae704c0c8a086eab 100644 --- a/benchmark/readme.md +++ b/benchmark/readme.md @@ -1,5 +1,5 @@ -# PaddleOCR DB/EAST 算法训练benchmark测试 +# PaddleOCR DB/EAST/PSE 算法训练benchmark测试 PaddleOCR/benchmark目录下的文件用于获取并分析训练日志。 训练采用icdar2015数据集,包括1000张训练图像和500张测试图像。模型配置采用resnet18_vd作为backbone,分别训练batch_size=8和batch_size=16的情况。 @@ -18,7 +18,7 @@ run_det.sh 执行方式如下: ``` # cd PaddleOCR/ -bash benchmark/run_det.sh +bash benchmark/run_det.sh ``` 以DB为例,将得到四个日志文件,如下: @@ -28,7 +28,3 @@ det_res18_db_v2.0_sp_bs8_fp32_1 det_res18_db_v2.0_mp_bs16_fp32_1 det_res18_db_v2.0_mp_bs8_fp32_1 ``` - - - - diff --git a/benchmark/run_benchmark_det.sh b/benchmark/run_benchmark_det.sh index 26bcda5d20ba4e4d0498da28aafb93f29468169d..46144b43a09baf787216728eabaab5f8548fa924 100644 --- a/benchmark/run_benchmark_det.sh +++ b/benchmark/run_benchmark_det.sh @@ -6,7 +6,7 @@ function _set_params(){ run_mode=${1:-"sp"} # 单卡sp|多卡mp batch_size=${2:-"64"} fp_item=${3:-"fp32"} # fp32|fp16 - max_iter=${4:-"500"} # 可选,如果需要修改代码提前中断 + max_iter=${4:-"10"} # 可选,如果需要修改代码提前中断 model_name=${5:-"model_name"} run_log_path=${TRAIN_LOG_DIR:-$(pwd)} # TRAIN_LOG_DIR 后续QA设置该参数 @@ -20,7 +20,7 @@ function _train(){ echo "Train on ${num_gpu_devices} GPUs" echo "current CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES, gpus=$num_gpu_devices, batch_size=$batch_size" - train_cmd="-c configs/det/${model_name}.yml -o Train.loader.batch_size_per_card=${batch_size} Global.epoch_num=${max_iter} " + train_cmd="-c configs/det/${model_name}.yml -o Train.loader.batch_size_per_card=${batch_size} Global.epoch_num=${max_iter} Global.eval_batch_step=[0,20000] Global.print_batch_step=2" case ${run_mode} in sp) train_cmd="python3.7 tools/train.py "${train_cmd}"" @@ -39,18 +39,24 @@ function _train(){ echo -e "${model_name}, SUCCESS" export job_fail_flag=0 fi - kill -9 `ps -ef|grep 'python3.7'|awk '{print $2}'` if [ $run_mode = "mp" -a -d mylog ]; then rm ${log_file} cp mylog/workerlog.0 ${log_file} fi +} - # run log analysis - analysis_cmd="python3.7 benchmark/analysis.py --filename ${log_file} --mission_name ${model_name} --run_mode ${mode} --direction_id 0 --keyword 'ips:' --base_batch_size ${batch_szie} --skip_steps 1 --gpu_num ${num_gpu_devices} --index 1 --model_mode=-1 --ips_unit=samples/sec" +function _analysis_log(){ + analysis_cmd="python3.7 benchmark/analysis.py --filename ${log_file} --mission_name ${model_name} --run_mode ${run_mode} --direction_id 0 --keyword 'ips:' --base_batch_size ${batch_size} --skip_steps 1 --gpu_num ${num_gpu_devices} --index 1 --model_mode=-1 --ips_unit=samples/sec" eval $analysis_cmd } +function _kill_process(){ + kill -9 `ps -ef|grep 'python3.7'|awk '{print $2}'` +} + + _set_params $@ _train - +_analysis_log +_kill_process \ No newline at end of file diff --git a/benchmark/run_det.sh b/benchmark/run_det.sh index c507510c615a60177e07300976947b010dbae990..68109b3ab2c3b8b61a0c90b4b31fd855c1ba2d46 100644 --- a/benchmark/run_det.sh +++ b/benchmark/run_det.sh @@ -3,11 +3,11 @@ # 1 安装该模型需要的依赖 (如需开启优化策略请注明) python3.7 -m pip install -r requirements.txt # 2 拷贝该模型需要数据、预训练模型 -wget -c -p ./tain_data/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/icdar2015.tar && cd train_data && tar xf icdar2015.tar && cd ../ -wget -c -p ./pretrain_models/ https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNet50_vd_pretrained.pdparams +wget -P ./train_data/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/icdar2015.tar && cd train_data && tar xf icdar2015.tar && cd ../ +wget -P ./pretrain_models/ https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNet50_vd_pretrained.pdparams # 3 批量运行(如不方便批量,1,2需放到单个模型中) -model_mode_list=(det_res18_db_v2.0 det_r50_vd_east) +model_mode_list=(det_res18_db_v2.0 det_r50_vd_east det_r50_vd_pse) fp_item_list=(fp32) bs_list=(8 16) for model_mode in ${model_mode_list[@]}; do @@ -15,11 +15,11 @@ for model_mode in ${model_mode_list[@]}; do for bs_item in ${bs_list[@]}; do echo "index is speed, 1gpus, begin, ${model_name}" run_mode=sp - CUDA_VISIBLE_DEVICES=0 bash benchmark/run_benchmark_det.sh ${run_mode} ${bs_item} ${fp_item} 10 ${model_mode} # (5min) + CUDA_VISIBLE_DEVICES=0 bash benchmark/run_benchmark_det.sh ${run_mode} ${bs_item} ${fp_item} 2 ${model_mode} # (5min) sleep 60 echo "index is speed, 8gpus, run_mode is multi_process, begin, ${model_name}" run_mode=mp - CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 bash benchmark/run_benchmark_det.sh ${run_mode} ${bs_item} ${fp_item} 10 ${model_mode} + CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 bash benchmark/run_benchmark_det.sh ${run_mode} ${bs_item} ${fp_item} 2 ${model_mode} sleep 60 done done diff --git a/configs/rec/rec_mtb_nrtr.yml b/configs/rec/rec_mtb_nrtr.yml index 392afc98d52194fdd144ccee626dbda4ddc547e5..04267500854310dc6d5df9318bb8c056c65cd5b5 100644 --- a/configs/rec/rec_mtb_nrtr.yml +++ b/configs/rec/rec_mtb_nrtr.yml @@ -17,7 +17,7 @@ Global: character_dict_path: ppocr/utils/EN_symbol_dict.txt max_text_length: 25 infer_mode: False - use_space_char: True + use_space_char: False save_res_path: ./output/rec/predicts_nrtr.txt Optimizer: diff --git a/deploy/lite/ocr_db_crnn.cc b/deploy/lite/ocr_db_crnn.cc index 26891c8566a10d26a23beeee87ec7275088c6961..9a7d6548654bdd21110f0fe343efd92a13dcb4c0 100644 --- a/deploy/lite/ocr_db_crnn.cc +++ b/deploy/lite/ocr_db_crnn.cc @@ -12,12 +12,14 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle_api.h" // NOLINT #include +#include "paddle_api.h" // NOLINT +#include "paddle_place.h" #include "cls_process.h" #include "crnn_process.h" #include "db_post_process.h" +#include "AutoLog/auto_log/lite_autolog.h" using namespace paddle::lite_api; // NOLINT using namespace std; @@ -27,7 +29,7 @@ void NeonMeanScale(const float *din, float *dout, int size, const std::vector mean, const std::vector scale) { if (mean.size() != 3 || scale.size() != 3) { - std::cerr << "[ERROR] mean or scale size must equal to 3\n"; + std::cerr << "[ERROR] mean or scale size must equal to 3" << std::endl; exit(1); } float32x4_t vmean0 = vdupq_n_f32(mean[0]); @@ -159,7 +161,8 @@ void RunRecModel(std::vector>> boxes, cv::Mat img, std::vector &rec_text_score, std::vector charactor_dict, std::shared_ptr predictor_cls, - int use_direction_classify) { + int use_direction_classify, + std::vector *times) { std::vector mean = {0.5f, 0.5f, 0.5f}; std::vector scale = {1 / 0.5f, 1 / 0.5f, 1 / 0.5f}; @@ -226,14 +229,15 @@ void RunRecModel(std::vector>> boxes, cv::Mat img, std::vector>> RunDetModel(std::shared_ptr predictor, cv::Mat img, - std::map Config) { + std::map Config, std::vector *times) { // Read img int max_side_len = int(Config["max_side_len"]); int det_db_use_dilate = int(Config["det_db_use_dilate"]); cv::Mat srcimg; img.copyTo(srcimg); - + + auto preprocess_start = std::chrono::steady_clock::now(); std::vector ratio_hw; img = DetResizeImg(img, max_side_len, ratio_hw); cv::Mat img_fp; @@ -248,8 +252,10 @@ RunDetModel(std::shared_ptr predictor, cv::Mat img, std::vector scale = {1 / 0.229f, 1 / 0.224f, 1 / 0.225f}; const float *dimg = reinterpret_cast(img_fp.data); NeonMeanScale(dimg, data0, img_fp.rows * img_fp.cols, mean, scale); + auto preprocess_end = std::chrono::steady_clock::now(); // Run predictor + auto inference_start = std::chrono::steady_clock::now(); predictor->Run(); // Get output and post process @@ -257,8 +263,10 @@ RunDetModel(std::shared_ptr predictor, cv::Mat img, std::move(predictor->GetOutput(0))); auto *outptr = output_tensor->data(); auto shape_out = output_tensor->shape(); + auto inference_end = std::chrono::steady_clock::now(); // Save output + auto postprocess_start = std::chrono::steady_clock::now(); float pred[shape_out[2] * shape_out[3]]; unsigned char cbuf[shape_out[2] * shape_out[3]]; @@ -287,14 +295,35 @@ RunDetModel(std::shared_ptr predictor, cv::Mat img, std::vector>> filter_boxes = FilterTagDetRes(boxes, ratio_hw[0], ratio_hw[1], srcimg); + auto postprocess_end = std::chrono::steady_clock::now(); + + std::chrono::duration preprocess_diff = preprocess_end - preprocess_start; + times->push_back(double(preprocess_diff.count() * 1000)); + std::chrono::duration inference_diff = inference_end - inference_start; + times->push_back(double(inference_diff.count() * 1000)); + std::chrono::duration postprocess_diff = postprocess_end - postprocess_start; + times->push_back(double(postprocess_diff.count() * 1000)); return filter_boxes; } -std::shared_ptr loadModel(std::string model_file) { +std::shared_ptr loadModel(std::string model_file, std::string power_mode, int num_threads) { MobileConfig config; config.set_model_from_file(model_file); + if (power_mode == "LITE_POWER_HIGH"){ + config.set_power_mode(LITE_POWER_HIGH); + } else { + if (power_mode == "LITE_POWER_LOW") { + config.set_power_mode(LITE_POWER_HIGH); + } else { + std::cerr << "Only support LITE_POWER_HIGH or LITE_POWER_HIGH." << std::endl; + exit(1); + } + } + + config.set_threads(num_threads); + std::shared_ptr predictor = CreatePaddlePredictor(config); return predictor; @@ -354,60 +383,255 @@ std::map LoadConfigTxt(std::string config_path) { return dict; } -int main(int argc, char **argv) { - if (argc < 5) { - std::cerr << "[ERROR] usage: " << argv[0] - << " det_model_file cls_model_file rec_model_file image_path " - "charactor_dict\n"; +void check_params(int argc, char **argv) { + if (argc<=1 || (strcmp(argv[1], "det")!=0 && strcmp(argv[1], "rec")!=0 && strcmp(argv[1], "system")!=0)) { + std::cerr << "Please choose one mode of [det, rec, system] !" << std::endl; exit(1); } - std::string det_model_file = argv[1]; - std::string rec_model_file = argv[2]; - std::string cls_model_file = argv[3]; - std::string img_path = argv[4]; - std::string dict_path = argv[5]; + if (strcmp(argv[1], "det") == 0) { + if (argc < 9){ + std::cerr << "[ERROR] usage:" << argv[0] + << " det det_model num_threads batchsize power_mode img_dir det_config lite_benchmark_value" << std::endl; + exit(1); + } + } + + if (strcmp(argv[1], "rec") == 0) { + if (argc < 9){ + std::cerr << "[ERROR] usage:" << argv[0] + << " rec rec_model num_threads batchsize power_mode img_dir key_txt lite_benchmark_value" << std::endl; + exit(1); + } + } + + if (strcmp(argv[1], "system") == 0) { + if (argc < 12){ + std::cerr << "[ERROR] usage:" << argv[0] + << " system det_model rec_model clas_model num_threads batchsize power_mode img_dir det_config key_txt lite_benchmark_value" << std::endl; + exit(1); + } + } +} + +void system(char **argv){ + std::string det_model_file = argv[2]; + std::string rec_model_file = argv[3]; + std::string cls_model_file = argv[4]; + std::string precision = argv[5]; + std::string num_threads = argv[6]; + std::string batchsize = argv[7]; + std::string power_mode = argv[8]; + std::string img_dir = argv[9]; + std::string det_config_path = argv[10]; + std::string dict_path = argv[11]; + + if (strcmp(argv[5], "FP32") != 0 && strcmp(argv[5], "INT8") != 0) { + std::cerr << "Only support FP32 or INT8." << std::endl; + exit(1); + } + + std::vector cv_all_img_names; + cv::glob(img_dir, cv_all_img_names); //// load config from txt file - auto Config = LoadConfigTxt("./config.txt"); + auto Config = LoadConfigTxt(det_config_path); int use_direction_classify = int(Config["use_direction_classify"]); - auto start = std::chrono::system_clock::now(); + auto charactor_dict = ReadDict(dict_path); + charactor_dict.insert(charactor_dict.begin(), "#"); // blank char for ctc + charactor_dict.push_back(" "); + + auto det_predictor = loadModel(det_model_file, power_mode, std::stoi(num_threads)); + auto rec_predictor = loadModel(rec_model_file, power_mode, std::stoi(num_threads)); + auto cls_predictor = loadModel(cls_model_file, power_mode, std::stoi(num_threads)); - auto det_predictor = loadModel(det_model_file); - auto rec_predictor = loadModel(rec_model_file); - auto cls_predictor = loadModel(cls_model_file); + for (int i = 0; i < cv_all_img_names.size(); ++i) { + std::cout << "The predict img: " << cv_all_img_names[i] << std::endl; + cv::Mat srcimg = cv::imread(cv_all_img_names[i], cv::IMREAD_COLOR); + + if (!srcimg.data) { + std::cerr << "[ERROR] image read failed! image path: " << cv_all_img_names[i] << std::endl; + exit(1); + } + + std::vector det_times; + auto boxes = RunDetModel(det_predictor, srcimg, Config, &det_times); + + std::vector rec_text; + std::vector rec_text_score; + + std::vector rec_times; + RunRecModel(boxes, srcimg, rec_predictor, rec_text, rec_text_score, + charactor_dict, cls_predictor, use_direction_classify, &rec_times); + + //// visualization + auto img_vis = Visualization(srcimg, boxes); + + //// print recognized text + for (int i = 0; i < rec_text.size(); i++) { + std::cout << i << "\t" << rec_text[i] << "\t" << rec_text_score[i] + << std::endl; + } + } +} + +void det(int argc, char **argv) { + std::string det_model_file = argv[2]; + std::string precision = argv[3]; + std::string num_threads = argv[4]; + std::string batchsize = argv[5]; + std::string power_mode = argv[6]; + std::string img_dir = argv[7]; + std::string det_config_path = argv[8]; + + if (strcmp(argv[3], "FP32") != 0 && strcmp(argv[3], "INT8") != 0) { + std::cerr << "Only support FP32 or INT8." << std::endl; + exit(1); + } + + std::vector cv_all_img_names; + cv::glob(img_dir, cv_all_img_names); + + //// load config from txt file + auto Config = LoadConfigTxt(det_config_path); + + auto det_predictor = loadModel(det_model_file, power_mode, std::stoi(num_threads)); + + std::vector time_info = {0, 0, 0}; + for (int i = 0; i < cv_all_img_names.size(); ++i) { + std::cout << "The predict img: " << cv_all_img_names[i] << std::endl; + cv::Mat srcimg = cv::imread(cv_all_img_names[i], cv::IMREAD_COLOR); + + if (!srcimg.data) { + std::cerr << "[ERROR] image read failed! image path: " << cv_all_img_names[i] << std::endl; + exit(1); + } + + std::vector times; + auto boxes = RunDetModel(det_predictor, srcimg, Config, ×); + + //// visualization + auto img_vis = Visualization(srcimg, boxes); + std::cout << boxes.size() << " bboxes have detected:" << std::endl; + + // for (int i=0; i cv_all_img_names; + cv::glob(img_dir, cv_all_img_names); auto charactor_dict = ReadDict(dict_path); charactor_dict.insert(charactor_dict.begin(), "#"); // blank char for ctc charactor_dict.push_back(" "); - cv::Mat srcimg = cv::imread(img_path, cv::IMREAD_COLOR); - auto boxes = RunDetModel(det_predictor, srcimg, Config); + auto rec_predictor = loadModel(rec_model_file, power_mode, std::stoi(num_threads)); - std::vector rec_text; - std::vector rec_text_score; + std::shared_ptr cls_predictor; - RunRecModel(boxes, srcimg, rec_predictor, rec_text, rec_text_score, - charactor_dict, cls_predictor, use_direction_classify); + std::vector time_info = {0, 0, 0}; + for (int i = 0; i < cv_all_img_names.size(); ++i) { + std::cout << "The predict img: " << cv_all_img_names[i] << std::endl; + cv::Mat srcimg = cv::imread(cv_all_img_names[i], cv::IMREAD_COLOR); - auto end = std::chrono::system_clock::now(); - auto duration = - std::chrono::duration_cast(end - start); + if (!srcimg.data) { + std::cerr << "[ERROR] image read failed! image path: " << cv_all_img_names[i] << std::endl; + exit(1); + } - //// visualization - auto img_vis = Visualization(srcimg, boxes); + int width = srcimg.cols; + int height = srcimg.rows; + std::vector upper_left = {0, 0}; + std::vector upper_right = {width, 0}; + std::vector lower_right = {width, height}; + std::vector lower_left = {0, height}; + std::vector> box = {upper_left, upper_right, lower_right, lower_left}; + std::vector>> boxes = {box}; + + std::vector rec_text; + std::vector rec_text_score; + std::vector times; + RunRecModel(boxes, srcimg, rec_predictor, rec_text, rec_text_score, + charactor_dict, cls_predictor, 0, ×); + + //// print recognized text + for (int i = 0; i < rec_text.size(); i++) { + std::cout << i << "\t" << rec_text[i] << "\t" << rec_text_score[i] + << std::endl; + } + } + // TODO: support autolog + if (strcmp(argv[9], "True") == 0) { + AutoLogger autolog(rec_model_file, + 0, + 0, + 0, + std::stoi(num_threads), + std::stoi(batchsize), + "dynamic", + precision, + power_mode, + time_info, + cv_all_img_names.size()); + autolog.report(); + } +} + +int main(int argc, char **argv) { + check_params(argc, argv); + std::cout << "mode: " << argv[1] << endl; - //// print recognized text - for (int i = 0; i < rec_text.size(); i++) { - std::cout << i << "\t" << rec_text[i] << "\t" << rec_text_score[i] - << std::endl; + if (strcmp(argv[1], "system") == 0) { + system(argv); } - std::cout << "花费了" - << double(duration.count()) * - std::chrono::microseconds::period::num / - std::chrono::microseconds::period::den - << "秒" << std::endl; + if (strcmp(argv[1], "det") == 0) { + det(argc, argv); + } + + if (strcmp(argv[1], "rec") == 0) { + rec(argc, argv); + } return 0; -} \ No newline at end of file +} diff --git a/doc/doc_ch/enhanced_ctc_loss.md b/doc/doc_ch/enhanced_ctc_loss.md index 5525c7785f0a8fc642cebc82674400c2487558f9..8c0856a7a7bceedbcc0a48bb1af6658afa720886 100644 --- a/doc/doc_ch/enhanced_ctc_loss.md +++ b/doc/doc_ch/enhanced_ctc_loss.md @@ -64,7 +64,7 @@ C-CTC Loss是CTC Loss + Center Loss的简称。 其中Center Loss出自论文 < 以配置文件`configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec.yml`为例, center提取命令如下所示: ``` -python tools/export_center.py -c configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec.yml -o Global.pretrained_model: "./output/rec_mobile_pp-OCRv2/best_accuracy" +python tools/export_center.py -c configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec.yml -o Global.pretrained_model="./output/rec_mobile_pp-OCRv2/best_accuracy" ``` 运行完后,会在PaddleOCR主目录下生成`train_center.pkl`. diff --git a/ppocr/losses/rec_nrtr_loss.py b/ppocr/losses/rec_nrtr_loss.py index 41714dd2a3ae15eeedc62521d97935f68271c598..200a6d0486dbf6f76dd674eb58f641b31a70f31c 100644 --- a/ppocr/losses/rec_nrtr_loss.py +++ b/ppocr/losses/rec_nrtr_loss.py @@ -22,7 +22,7 @@ class NRTRLoss(nn.Layer): log_prb = F.log_softmax(pred, axis=1) non_pad_mask = paddle.not_equal( tgt, paddle.zeros( - tgt.shape, dtype='int64')) + tgt.shape, dtype=tgt.dtype)) loss = -(one_hot * log_prb).sum(axis=1) loss = loss.masked_select(non_pad_mask).mean() else: diff --git a/ppocr/postprocess/__init__.py b/ppocr/postprocess/__init__.py index 3a4ebf52a3bd91ffd509b113103dab900588b0bd..5ca4e6bb96fc6f37ef67a2fb0b8c2496e1a83d77 100644 --- a/ppocr/postprocess/__init__.py +++ b/ppocr/postprocess/__init__.py @@ -29,10 +29,7 @@ from .rec_postprocess import CTCLabelDecode, AttnLabelDecode, SRNLabelDecode, Di TableLabelDecode, NRTRLabelDecode, SARLabelDecode , SEEDLabelDecode from .cls_postprocess import ClsPostProcess from .pg_postprocess import PGPostProcess - -if platform.system() != "Windows": - # pse is not support in Windows - from .pse_postprocess import PSEPostProcess +from .pse_postprocess import PSEPostProcess def build_post_process(config, global_config=None): diff --git a/ppocr/postprocess/pse_postprocess/pse/__init__.py b/ppocr/postprocess/pse_postprocess/pse/__init__.py index 97b8d8aff0cf229a4e3ec1961638273bd201822a..0536a32ea5614a8f1826ac2550b1f12518ac53e5 100644 --- a/ppocr/postprocess/pse_postprocess/pse/__init__.py +++ b/ppocr/postprocess/pse_postprocess/pse/__init__.py @@ -17,7 +17,12 @@ import subprocess python_path = sys.executable -if subprocess.call('cd ppocr/postprocess/pse_postprocess/pse;{} setup.py build_ext --inplace;cd -'.format(python_path), shell=True) != 0: - raise RuntimeError('Cannot compile pse: {}'.format(os.path.dirname(os.path.realpath(__file__)))) +ori_path = os.getcwd() +os.chdir('ppocr/postprocess/pse_postprocess/pse') +if subprocess.call( + '{} setup.py build_ext --inplace'.format(python_path), shell=True) != 0: + raise RuntimeError('Cannot compile pse: {}'.format( + os.path.dirname(os.path.realpath(__file__)))) +os.chdir(ori_path) -from .pse import pse \ No newline at end of file +from .pse import pse diff --git a/PTDN/common_func.sh b/test_tipc/common_func.sh similarity index 100% rename from PTDN/common_func.sh rename to test_tipc/common_func.sh diff --git a/PTDN/compare_results.py b/test_tipc/compare_results.py similarity index 99% rename from PTDN/compare_results.py rename to test_tipc/compare_results.py index 35af38809fe7d564707d0d538f7d0159cb6edfbd..e28410ed6cb26aab7557025c06b2541a7d27c2c1 100644 --- a/PTDN/compare_results.py +++ b/test_tipc/compare_results.py @@ -32,6 +32,7 @@ def run_shell_command(cmd): else: return None + def parser_results_from_log_by_name(log_path, names_list): if not os.path.exists(log_path): raise ValueError("The log file {} does not exists!".format(log_path)) @@ -52,6 +53,7 @@ def parser_results_from_log_by_name(log_path, names_list): parser_results[name] = result return parser_results + def load_gt_from_file(gt_file): if not os.path.exists(gt_file): raise ValueError("The log file {} does not exists!".format(gt_file)) diff --git a/PTDN/configs/det_mv3_db.yml b/test_tipc/configs/det_mv3_db.yml similarity index 100% rename from PTDN/configs/det_mv3_db.yml rename to test_tipc/configs/det_mv3_db.yml diff --git a/PTDN/configs/det_r50_vd_db.yml b/test_tipc/configs/det_r50_vd_db.yml similarity index 100% rename from PTDN/configs/det_r50_vd_db.yml rename to test_tipc/configs/det_r50_vd_db.yml diff --git a/PTDN/configs/ppocr_det_mobile_params.txt b/test_tipc/configs/ppocr_det_mobile_params.txt similarity index 66% rename from PTDN/configs/ppocr_det_mobile_params.txt rename to test_tipc/configs/ppocr_det_mobile_params.txt index 63a78fb39f05552651fe02832e6e2622f5cba155..3442627613b20b687566ce9e84d7404c4a836e83 100644 --- a/PTDN/configs/ppocr_det_mobile_params.txt +++ b/test_tipc/configs/ppocr_det_mobile_params.txt @@ -1,21 +1,21 @@ ===========================train_params=========================== model_name:ocr_det python:python3.7 -gpu_list:0|0,1 -Global.use_gpu:True|True -Global.auto_cast:null -Global.epoch_num:lite_train_infer=1|whole_train_infer=300 +gpu_list:0|0,1|10.21.226.181,10.21.226.133;0,1 +Global.use_gpu:True|True|True +Global.auto_cast:fp32|amp +Global.epoch_num:lite_train_lite_infer=1|whole_train_whole_infer=300 Global.save_model_dir:./output/ -Train.loader.batch_size_per_card:lite_train_infer=2|whole_train_infer=4 +Train.loader.batch_size_per_card:lite_train_lite_infer=2|whole_train_whole_infer=4 Global.pretrained_model:null train_model_name:latest train_infer_img_dir:./train_data/icdar2015/text_localization/ch4_test_images/ null:null ## trainer:norm_train|pact_train|fpgm_train -norm_train:tools/train.py -c tests/configs/det_mv3_db.yml -o Global.pretrained_model=./pretrain_models/MobileNetV3_large_x0_5_pretrained -pact_train:deploy/slim/quantization/quant.py -c tests/configs/det_mv3_db.yml -o -fpgm_train:deploy/slim/prune/sensitivity_anal.py -c tests/configs/det_mv3_db.yml -o Global.pretrained_model=./pretrain_models/det_mv3_db_v2.0_train/best_accuracy +norm_train:tools/train.py -c test_tipc/configs/det_mv3_db.yml -o Global.pretrained_model=./pretrain_models/MobileNetV3_large_x0_5_pretrained +pact_train:deploy/slim/quantization/quant.py -c test_tipc/configs/det_mv3_db.yml -o +fpgm_train:deploy/slim/prune/sensitivity_anal.py -c test_tipc/configs/det_mv3_db.yml -o Global.pretrained_model=./pretrain_models/det_mv3_db_v2.0_train/best_accuracy distill_train:null null:null null:null @@ -27,13 +27,13 @@ null:null ===========================infer_params=========================== Global.save_inference_dir:./output/ Global.pretrained_model: -norm_export:tools/export_model.py -c tests/configs/det_mv3_db.yml -o -quant_export:deploy/slim/quantization/export_model.py -c tests/configs/det_mv3_db.yml -o -fpgm_export:deploy/slim/prune/export_prune_model.py -c tests/configs/det_mv3_db.yml -o +norm_export:tools/export_model.py -c test_tipc/configs/det_mv3_db.yml -o +quant_export:deploy/slim/quantization/export_model.py -c test_tipc/configs/det_mv3_db.yml -o +fpgm_export:deploy/slim/prune/export_prune_model.py -c test_tipc/configs/det_mv3_db.yml -o distill_export:null export1:null export2:null -## +inference_dir:null train_model:./inference/ch_ppocr_mobile_v2.0_det_train/best_accuracy infer_export:tools/export_model.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o infer_quant:False @@ -98,3 +98,13 @@ null:null --benchmark:True null:null null:null +===========================lite_params=========================== +inference:./ocr_db_crnn det +infer_model:./models/ch_ppocr_mobile_v2.0_det_opt.nb|./models/ch_ppocr_mobile_v2.0_det_slim_opt.nb +--cpu_threads:1|4 +--batch_size:1 +--power_mode:LITE_POWER_HIGH|LITE_POWER_LOW +--image_dir:./test_data/icdar2015_lite/text_localization/ch4_test_images/|./test_data/icdar2015_lite/text_localization/ch4_test_images/img_233.jpg +--config_dir:./config.txt +--rec_dict_dir:./ppocr_keys_v1.txt +--benchmark:True diff --git a/PTDN/configs/ppocr_det_server_params.txt b/test_tipc/configs/ppocr_det_server_params.txt similarity index 100% rename from PTDN/configs/ppocr_det_server_params.txt rename to test_tipc/configs/ppocr_det_server_params.txt diff --git a/PTDN/configs/ppocr_rec_mobile_params.txt b/test_tipc/configs/ppocr_rec_mobile_params.txt similarity index 100% rename from PTDN/configs/ppocr_rec_mobile_params.txt rename to test_tipc/configs/ppocr_rec_mobile_params.txt diff --git a/PTDN/configs/ppocr_rec_server_params.txt b/test_tipc/configs/ppocr_rec_server_params.txt similarity index 100% rename from PTDN/configs/ppocr_rec_server_params.txt rename to test_tipc/configs/ppocr_rec_server_params.txt diff --git a/PTDN/configs/ppocr_sys_mobile_params.txt b/test_tipc/configs/ppocr_sys_mobile_params.txt similarity index 100% rename from PTDN/configs/ppocr_sys_mobile_params.txt rename to test_tipc/configs/ppocr_sys_mobile_params.txt diff --git a/PTDN/configs/ppocr_sys_server_params.txt b/test_tipc/configs/ppocr_sys_server_params.txt similarity index 100% rename from PTDN/configs/ppocr_sys_server_params.txt rename to test_tipc/configs/ppocr_sys_server_params.txt diff --git a/test_tipc/configs/ppocrv2_det_mobile_params.txt b/test_tipc/configs/ppocrv2_det_mobile_params.txt new file mode 100644 index 0000000000000000000000000000000000000000..423cb979f0d35a62324958b36dd9d115211c19d4 --- /dev/null +++ b/test_tipc/configs/ppocrv2_det_mobile_params.txt @@ -0,0 +1,51 @@ +===========================train_params=========================== +model_name:PPOCRv2_ocr_det +python:python3.7 +gpu_list:0|0,1 +Global.use_gpu:True|True +Global.auto_cast:fp32 +Global.epoch_num:lite_train_infer=1|whole_train_infer=500 +Global.save_model_dir:./output/ +Train.loader.batch_size_per_card:lite_train_infer=2|whole_train_infer=4 +Global.pretrained_model:null +train_model_name:latest +train_infer_img_dir:./train_data/icdar2015/text_localization/ch4_test_images/ +null:null +## +trainer:norm_train|pact_train +norm_train:tools/train.py -c configs/det/ch_PP-OCRv2/ch_PP-OCR_det_cml.yml -o +pact_train:deploy/slim/quantization/quant.py -c configs/det/ch_PP-OCRv2/ch_PP-OCR_det_cml.yml -o +fpgm_train:null +distill_train:null +null:null +null:null +## +===========================eval_params=========================== +eval:null +null:null +## +===========================infer_params=========================== +Global.save_inference_dir:./output/ +Global.pretrained_model: +norm_export:tools/export_model.py -c configs/det/ch_PP-OCRv2/ch_PP-OCR_det_cml.yml -o +quant_export:deploy/slim/quantization/export_model.py -c configs/det/ch_PP-OCRv2/ch_PP-OCR_det_cml.yml -o +fpgm_export: +distill_export:null +export1:null +export2:null +inference_dir:Student +infer_model:./inference/ch_PP-OCRv2_det_infer/ +infer_export:null +infer_quant:False +inference:tools/infer/predict_det.py +--use_gpu:True|False +--enable_mkldnn:True|False +--cpu_threads:1|6 +--rec_batch_num:1 +--use_tensorrt:False|True +--precision:fp32|fp16|int8 +--det_model_dir: +--image_dir:./inference/ch_det_data_50/all-sum-510/ +null:null +--benchmark:True +null:null diff --git a/PTDN/configs/rec_icdar15_r34_train.yml b/test_tipc/configs/rec_icdar15_r34_train.yml similarity index 100% rename from PTDN/configs/rec_icdar15_r34_train.yml rename to test_tipc/configs/rec_icdar15_r34_train.yml diff --git a/PTDN/docs/compare_cpp_right.png b/test_tipc/docs/compare_cpp_right.png similarity index 100% rename from PTDN/docs/compare_cpp_right.png rename to test_tipc/docs/compare_cpp_right.png diff --git a/PTDN/docs/compare_cpp_wrong.png b/test_tipc/docs/compare_cpp_wrong.png similarity index 100% rename from PTDN/docs/compare_cpp_wrong.png rename to test_tipc/docs/compare_cpp_wrong.png diff --git a/PTDN/docs/compare_right.png b/test_tipc/docs/compare_right.png similarity index 100% rename from PTDN/docs/compare_right.png rename to test_tipc/docs/compare_right.png diff --git a/PTDN/docs/compare_wrong.png b/test_tipc/docs/compare_wrong.png similarity index 100% rename from PTDN/docs/compare_wrong.png rename to test_tipc/docs/compare_wrong.png diff --git a/PTDN/docs/guide.png b/test_tipc/docs/guide.png similarity index 100% rename from PTDN/docs/guide.png rename to test_tipc/docs/guide.png diff --git a/test_tipc/docs/install.md b/test_tipc/docs/install.md new file mode 100644 index 0000000000000000000000000000000000000000..f17c264f3987c8cc2a756e045ebacb8fba5c277a --- /dev/null +++ b/test_tipc/docs/install.md @@ -0,0 +1,121 @@ +## 1. 环境准备 + +本教程适用于PTDN目录下基础功能测试的运行环境搭建。 + +推荐环境: +- CUDA 10.1/10.2 +- CUDNN 7.6/cudnn8.1 +- TensorRT 6.1.0.5 / 7.1 / 7.2 + +环境配置可以选择docker镜像安装,或者在本地环境Python搭建环境。推荐使用docker镜像安装,避免不必要的环境配置。 + +## 2. Docker 镜像安装 + +推荐docker镜像安装,按照如下命令创建镜像,当前目录映射到镜像中的`/paddle`目录下 +``` +nvidia-docker run --name paddle -it -v $PWD:/paddle paddlepaddle/paddle:latest-dev-cuda10.1-cudnn7-gcc82 /bin/bash +cd /paddle + +# 安装带TRT的paddle +pip3.7 install https://paddle-wheel.bj.bcebos.com/with-trt/2.1.3/linux-gpu-cuda10.1-cudnn7-mkl-gcc8.2-trt6-avx/paddlepaddle_gpu-2.1.3.post101-cp37-cp37m-linux_x86_64.whl +``` + +## 3 Python 环境构建 + +非docker环境下,环境配置比较灵活,推荐环境组合配置: +- CUDA10.1 + CUDNN7.6 + TensorRT 6 +- CUDA10.2 + CUDNN8.1 + TensorRT 7 +- CUDA11.1 + CUDNN8.1 + TensorRT 7 + +下面以 CUDA10.2 + CUDNN8.1 + TensorRT 7 配置为例,介绍环境配置的流程。 + +### 3.1 安装CUDNN + +如果当前环境满足CUDNN版本的要求,可以跳过此步骤。 + +以CUDNN8.1 安装安装为例,安装步骤如下,首先下载CUDNN,从[Nvidia官网](https://developer.nvidia.com/rdp/cudnn-archive)下载CUDNN8.1版本,下载符合当前系统版本的三个deb文件,分别是: +- cuDNN Runtime Library ,如:libcudnn8_8.1.0.77-1+cuda10.2_amd64.deb +- cuDNN Developer Library ,如:libcudnn8-dev_8.1.0.77-1+cuda10.2_amd64.deb +- cuDNN Code Samples,如:libcudnn8-samples_8.1.0.77-1+cuda10.2_amd64.deb + +deb安装可以参考[官方文档](https://docs.nvidia.com/deeplearning/cudnn/install-guide/index.html#installlinux-deb),安装方式如下 +``` +# x.x.x表示下载的版本号 +# $HOME为工作目录 +sudo dpkg -i libcudnn8_x.x.x-1+cudax.x_arm64.deb +sudo dpkg -i libcudnn8-dev_8.x.x.x-1+cudax.x_arm64.deb +sudo dpkg -i libcudnn8-samples_8.x.x.x-1+cudax.x_arm64.deb + +# 验证是否正确安装 +cp -r /usr/src/cudnn_samples_v8/ $HOME +cd $HOME/cudnn_samples_v8/mnistCUDNN + +# 编译 +make clean && make +./mnistCUDNN +``` +如果运行mnistCUDNN完后提示运行成功,则表示安装成功。如果运行后出现freeimage相关的报错,需要按照提示安装freeimage库: +``` +sudo apt-get install libfreeimage-dev +sudo apt-get install libfreeimage +``` + +### 3.2 安装TensorRT + +首先,从[Nvidia官网TensorRT板块](https://developer.nvidia.com/tensorrt-getting-started)下载TensorRT,这里选择7.1.3.4版本的TensorRT,注意选择适合自己系统版本和CUDA版本的TensorRT,另外建议下载TAR package的安装包。 + +以Ubuntu16.04+CUDA10.2为例,下载并解压后可以参考[官方文档](https://docs.nvidia.com/deeplearning/tensorrt/archives/tensorrt-713/install-guide/index.html#installing-tar)的安装步骤,按照如下步骤安装: +``` +# 以下安装命令中 '${version}' 为下载的TensorRT版本,如7.1.3.4 +# 设置环境变量, 为解压后的TensorRT的lib目录 +export LD_LIBRARY_PATH=$LD_LIBRARY_PATH: + +# 安装TensorRT +cd TensorRT-${version}/python +pip3.7 install tensorrt-*-cp3x-none-linux_x86_64.whl + +# 安装graphsurgeon +cd TensorRT-${version}/graphsurgeon +``` + + +### 3.3 安装PaddlePaddle + +下载支持TensorRT版本的Paddle安装包,注意安装包的TensorRT版本需要与本地TensorRT一致,下载[链接](https://paddleinference.paddlepaddle.org.cn/user_guides/download_lib.html#python) +选择下载 linux-cuda10.2-trt7-gcc8.2 Python3.7版本的Paddle: +``` +# 从下载链接中可以看到是paddle2.1.1-cuda10.2-cudnn8.1版本 +wget https://paddle-wheel.bj.bcebos.com/with-trt/2.1.1-gpu-cuda10.2-cudnn8.1-mkl-gcc8.2/paddlepaddle_gpu-2.1.1-cp37-cp37m-linux_x86_64.whl +pip3.7 install -U paddlepaddle_gpu-2.1.1-cp37-cp37m-linux_x86_64.whl +``` + +## 4. 安装PaddleOCR依赖 +``` +# 安装AutoLog +git clone https://github.com/LDOUBLEV/AutoLog +cd AutoLog +pip3.7 install -r requirements.txt +python3.7 setup.py bdist_wheel +pip3.7 install ./dist/auto_log-1.0.0-py3-none-any.whl + +# 下载OCR代码 +cd ../ +git clone https://github.com/PaddlePaddle/PaddleOCR + +``` + +安装PaddleOCR依赖: +``` +cd PaddleOCR +pip3.7 install -r requirements.txt +``` + +## FAQ : +Q. You are using Paddle compiled with TensorRT, but TensorRT dynamic library is not found. Ignore this if TensorRT is not needed. + +A. 问题一般是当前安装paddle版本带TRT,但是本地环境找不到TensorRT的预测库,需要下载TensorRT库,解压后设置环境变量LD_LIBRARY_PATH; +如: +``` +export LD_LIBRARY_PATH=/usr/local/python3.7.0/lib:/usr/local/nvidia/lib:/usr/local/nvidia/lib64:/paddle/package/TensorRT-6.0.1.5/lib +``` +或者问题是下载的TensorRT版本和当前paddle中编译的TRT版本不匹配,需要下载版本相符的TensorRT重新安装。 diff --git a/test_tipc/docs/lite_auto_log.png b/test_tipc/docs/lite_auto_log.png new file mode 100644 index 0000000000000000000000000000000000000000..cd9256db40232d689ea67a1bbef2b768c5f98753 Binary files /dev/null and b/test_tipc/docs/lite_auto_log.png differ diff --git a/test_tipc/docs/lite_log.png b/test_tipc/docs/lite_log.png new file mode 100644 index 0000000000000000000000000000000000000000..24ae5abc7167049ac879428e5e105a6e67d3c36d Binary files /dev/null and b/test_tipc/docs/lite_log.png differ diff --git a/test_tipc/docs/ssh_termux_ls.png b/test_tipc/docs/ssh_termux_ls.png new file mode 100644 index 0000000000000000000000000000000000000000..2df78026b23b2bb71ac98092d7820e5d02ad611c Binary files /dev/null and b/test_tipc/docs/ssh_termux_ls.png differ diff --git a/test_tipc/docs/termux.jpg b/test_tipc/docs/termux.jpg new file mode 100644 index 0000000000000000000000000000000000000000..cb87c4ccc21bab6411b87f61e76f03b5b5f6f557 Binary files /dev/null and b/test_tipc/docs/termux.jpg differ diff --git a/test_tipc/docs/termux_for_android.md b/test_tipc/docs/termux_for_android.md new file mode 100644 index 0000000000000000000000000000000000000000..73ecbb2e93be5a8fdef593fb492d3a27d33c6b52 --- /dev/null +++ b/test_tipc/docs/termux_for_android.md @@ -0,0 +1,128 @@ +# 安卓手机通过Termux连接电脑 + +由于通过adb方式连接手机后,很多linux命令无法运行,自动化测试受阻,所以此处特此介绍另外一种通过Termux的连接方式,不仅可以运行大部分linux命令,方便开发者在手机上在线调试,甚至还可以多实现台机器同时连接手机。Termux不是真实的Linux环境,但是Termux可以安装真实的Linux,而且不会损失性能,与此同时,Termux不需要root。在配置Termux之前,请确保电脑已经安装adb工具,安装方式请参考[Lite端部署](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/deploy/lite/readme.md) 。在运行以下命令后确保可以显示安卓设备信息。 + +``` +adb devices +``` +连接成功信息提示: +``` +List of devices attached +744be294 device +``` + +## 1.安卓手机安装termux app + +### 1.1 下载termux apk文件 + +由于目前该app目前各大商城暂无,所以可以直接下载如下apk文件。 + +打开电脑终端,执行以下命令: + +``` +wget http://10.12.121.133:8911/cuicheng01/fullchain/termux-v1.0.3.apk +``` + +### 1.2 安装termux到手机上 + +在手机端的开发者模式下,允许USB调试,允许USB安装。在电脑终端,执行如下命令,将termux app安装到手机上: + +``` +adb install termux-v1.0.3.apk +``` + +此处需要手机端确认安装,点击确认。 + +### 1.3 验证是否安装成功 + +打开手机,检验termux是否安装成功,如果没有,重新执行1.2,如果有相应的app,点击进入,会有如下显示。 + + + +接下来的配置环境需要在手机上此终端运行相关命令。 + +## 2.手机端配置termux + +首先将手机联网,最好可以连接外网,部分的配置需要外网。打开Termux终端,执行以下命令安装基础件`proot`,并使用`termux-chroot`命令可以模拟 root 环境与标准的 Linux 目录结构。 + +``` +pkg i -y proot +termux-chroot +``` + +Termux 默认只能访问自身内部的数据,如果要访问手机中其它的数据,输入下面的命令后,手机弹出对请求权限的窗口,允许即可(方便对部分运行出的结果在手机端可视化)。 + +``` +termux-setup-storage +``` + +### 2.1 配置SSH + +作为 Linux 终端或者服务器,必须有SSH。不管你是 SSH 连接到 Termux还是使用Termux去连其它主机,都需要先安装openssh。如果安装失败,请重复执行命令。 + +``` +pkg i -y openssh +``` + +启动 SSH 服务端,默认端口号为8022 + +``` +sshd +``` + + +### 2.2 电脑通过SSH方式连接手机 + +1.保证手机和电脑处于同一局域网下 +手机端分别输入以下命令获得ip地址和当前用户: + +``` +# 获取ip地址 +ifconfig + +# 获取当前用户 +whoami +``` + +如获取到的ip地址和当前用户分别是`172.24.162.117`和`u0_a374`。 + +2.电脑端通过SSH连接手机 + +``` +#默认端口号为8022 +ssh u0_a374@172.24.162.117 -p 8022 +``` + +3.运行ls命令后,会有如下显示: + +``` +ls +``` + + + + +### 2.3 通过scp传输数据 + +1.在当前目录上新建test目录 + +``` +mkdir test +``` + +2.测试scp功能 + +将电脑中的某个文件拷贝到手机上: +``` +scp -P 8022 test.txt u0_a374@172.24.162.117:/home/storage/test +``` + +3.手机端查看 + +打开手机终端,在`/home/storage/test`下查看是否存在`test.txt` + + +## 3. 更多教程 + +本教程可以完成Termux基本配置,更多关于Termux的用法,请参考:[Termux高级终端安装使用配置教程](https://www.sqlsec.com/2018/05/termux.html)。 + diff --git a/PTDN/docs/test.png b/test_tipc/docs/test.png similarity index 100% rename from PTDN/docs/test.png rename to test_tipc/docs/test.png diff --git a/PTDN/docs/test_inference_cpp.md b/test_tipc/docs/test_inference_cpp.md similarity index 69% rename from PTDN/docs/test_inference_cpp.md rename to test_tipc/docs/test_inference_cpp.md index 140860cb506513cbaa0fdc621848568d90e8ef5c..24655d96ba1acaadd489019ec260999c981107de 100644 --- a/PTDN/docs/test_inference_cpp.md +++ b/test_tipc/docs/test_inference_cpp.md @@ -6,7 +6,7 @@ C++预测功能测试的主程序为`test_inference_cpp.sh`,可以测试基于 基于训练是否使用量化,进行本测试的模型可以分为`正常模型`和`量化模型`,这两类模型对应的C++预测功能汇总如下: -| 模型类型 |device | batchsize | tensorrt | mkldnn | cpu多线程 | +| 模型类型 |device | batchsize | tensorrt | mkldnn | cpu多线程 | | ---- | ---- | ---- | :----: | :----: | :----: | | 正常模型 | GPU | 1/6 | fp32/fp16 | - | - | | 正常模型 | CPU | 1/6 | - | fp32 | 支持 | @@ -15,17 +15,17 @@ C++预测功能测试的主程序为`test_inference_cpp.sh`,可以测试基于 ## 2. 测试流程 ### 2.1 功能测试 -先运行`prepare.sh`准备数据和模型,然后运行`test_inference_cpp.sh`进行测试,最终在```tests/output```目录下生成`cpp_infer_*.log`后缀的日志文件。 +先运行`prepare.sh`准备数据和模型,然后运行`test_inference_cpp.sh`进行测试,最终在```test_tipc/output```目录下生成`cpp_infer_*.log`后缀的日志文件。 ```shell -bash tests/prepare.sh ./tests/configs/ppocr_det_mobile_params.txt "cpp_infer" +bash test_tipc/prepare.sh ./test_tipc/configs/ppocr_det_mobile_params.txt "cpp_infer" # 用法1: -bash tests/test_inference_cpp.sh ./tests/configs/ppocr_det_mobile_params.txt +bash test_tipc/test_inference_cpp.sh ./test_tipc/configs/ppocr_det_mobile_params.txt # 用法2: 指定GPU卡预测,第三个传入参数为GPU卡号 -bash tests/test_inference_cpp.sh ./tests/configs/ppocr_det_mobile_params.txt '1' +bash test_tipc/test_inference_cpp.sh ./test_tipc/configs/ppocr_det_mobile_params.txt '1' ``` - + ### 2.2 精度测试 @@ -37,12 +37,12 @@ bash tests/test_inference_cpp.sh ./tests/configs/ppocr_det_mobile_params.txt '1' #### 使用方式 运行命令: ```shell -python3.7 tests/compare_results.py --gt_file=./tests/results/cpp_*.txt --log_file=./tests/output/cpp_*.log --atol=1e-3 --rtol=1e-3 +python3.7 test_tipc/compare_results.py --gt_file=./test_tipc/results/cpp_*.txt --log_file=./test_tipc/output/cpp_*.log --atol=1e-3 --rtol=1e-3 ``` 参数介绍: -- gt_file: 指向事先保存好的预测结果路径,支持*.txt 结尾,会自动索引*.txt格式的文件,文件默认保存在tests/result/ 文件夹下 -- log_file: 指向运行tests/test.sh 脚本的infer模式保存的预测日志,预测日志中打印的有预测结果,比如:文本框,预测文本,类别等等,同样支持infer_*.log格式传入 +- gt_file: 指向事先保存好的预测结果路径,支持*.txt 结尾,会自动索引*.txt格式的文件,文件默认保存在test_tipc/result/ 文件夹下 +- log_file: 指向运行test_tipc/test_inference_cpp.sh 脚本的infer模式保存的预测日志,预测日志中打印的有预测结果,比如:文本框,预测文本,类别等等,同样支持cpp_infer_*.log格式传入 - atol: 设置的绝对误差 - rtol: 设置的相对误差 diff --git a/test_tipc/docs/test_lite.md b/test_tipc/docs/test_lite.md new file mode 100644 index 0000000000000000000000000000000000000000..021d54e5609349944fb9c0caff52fa9ed48ede8d --- /dev/null +++ b/test_tipc/docs/test_lite.md @@ -0,0 +1,71 @@ +# Lite预测功能测试 + +Lite预测功能测试的主程序为`test_lite.sh`,可以测试基于Lite预测库的模型推理功能。 + +## 1. 测试结论汇总 + +目前Lite端的样本间支持以方式的组合: + +**字段说明:** +- 输入设置:包括C++预测、python预测、java预测 +- 模型类型:包括正常模型(FP32)和量化模型(FP16) +- batch-size:包括1和4 +- predictor数量:包括多predictor预测和单predictor预测 +- 功耗模式:包括高性能模式(LITE_POWER_HIGH)和省电模式(LITE_POWER_LOW) +- 预测库来源:包括下载方式和编译方式,其中编译方式分为以下目标硬件:(1)ARM CPU;(2)Linux XPU;(3)OpenCL GPU;(4)Metal GPU + +| 模型类型 | batch-size | predictor数量 | 功耗模式 | 预测库来源 | 支持语言 | +| :----: | :----: | :----: | :----: | :----: | :----: | +| 正常模型/量化模型 | 1 | 1 | 高性能模式/省电模式 | 下载方式 | C++预测 | + + +## 2. 测试流程 + +### 2.1 功能测试 + +先运行`prepare.sh`准备数据和模型,模型和数据会打包到test_lite.tar中,将test_lite.tar上传到手机上,解压后进`入test_lite`目录中,然后运行`test_lite.sh`进行测试,最终在`test_lite/output`目录下生成`lite_*.log`后缀的日志文件。 + +```shell + +# 数据和模型准备 +bash test_tipc/prepare.sh ./test_tipc/configs/ppocr_det_mobile_params.txt "lite_infer" + +# 手机端测试: +bash test_lite.sh ppocr_det_mobile_params.txt + +``` + +**注意**:由于运行该项目需要bash等命令,传统的adb方式不能很好的安装。所以此处推荐通在手机上开启虚拟终端的方式连接电脑,连接方式可以参考[安卓手机termux连接电脑](./termux_for_android.md)。 + +#### 运行结果 + +各测试的运行情况会打印在 `./output/` 中: +运行成功时会输出: + +``` +Run successfully with command - ./ocr_db_crnn det ./models/ch_ppocr_mobile_v2.0_det_slim_opt.nb INT8 4 1 LITE_POWER_LOW ./test_data/icdar2015_lite/text_localization/ch4_test_images/img_233.jpg ./config.txt True > ./output/lite_ch_ppocr_mobile_v2.0_det_slim_opt.nb_precision_INT8_batchsize_1_threads_4_powermode_LITE_POWER_LOW_singleimg_True.log 2>&1! +Run successfully with command xxx +... +``` + +运行失败时会输出: + +``` +Run failed with command - ./ocr_db_crnn det ./models/ch_ppocr_mobile_v2.0_det_slim_opt.nb INT8 4 1 LITE_POWER_LOW ./test_data/icdar2015_lite/text_localization/ch4_test_images/img_233.jpg ./config.txt True > ./output/lite_ch_ppocr_mobile_v2.0_det_slim_opt.nb_precision_INT8_batchsize_1_threads_4_powermode_LITE_POWER_LOW_singleimg_True.log 2>&1! +Run failed with command xxx +... +``` + +在./output/文件夹下,会存在如下日志,每一个日志都是不同配置下的log结果: + + + +在每一个log中,都会调用autolog打印如下信息: + + + + + +## 3. 更多教程 + +本文档为功能测试用,更详细的Lite端预测使用教程请参考:[Lite端部署](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/deploy/lite/readme.md)。 diff --git a/test_tipc/docs/test_serving.md b/test_tipc/docs/test_serving.md new file mode 100644 index 0000000000000000000000000000000000000000..fb0848bfb5e37e4b0af39fa9bb2b13b4046c9a50 --- /dev/null +++ b/test_tipc/docs/test_serving.md @@ -0,0 +1,78 @@ +# PaddleServing预测功能测试 + +PaddleServing预测功能测试的主程序为`test_serving.sh`,可以测试基于PaddleServing的部署功能。 + +## 1. 测试结论汇总 + +基于训练是否使用量化,进行本测试的模型可以分为`正常模型`和`量化模型`,这两类模型对应的C++预测功能汇总如下: + +| 模型类型 |device | batchsize | tensorrt | mkldnn | cpu多线程 | +| ---- | ---- | ---- | :----: | :----: | :----: | +| 正常模型 | GPU | 1/6 | fp32/fp16 | - | - | +| 正常模型 | CPU | 1/6 | - | fp32 | 支持 | +| 量化模型 | GPU | 1/6 | int8 | - | - | +| 量化模型 | CPU | 1/6 | - | int8 | 支持 | + +## 2. 测试流程 +### 2.1 功能测试 +先运行`prepare.sh`准备数据和模型,然后运行`test_serving.sh`进行测试,最终在```test_tipc/output```目录下生成`serving_infer_*.log`后缀的日志文件。 + +```shell +bash test_tipc/prepare.sh ./test_tipc/configs/ppocr_det_mobile_params.txt "serving_infer" + +# 用法: +bash test_tipc/test_serving.sh ./test_tipc/configs/ppocr_det_mobile_params.txt +``` + +#### 运行结果 + +各测试的运行情况会打印在 `test_tipc/output/results_serving.log` 中: +运行成功时会输出: + +``` +Run successfully with command - python3.7 pipeline_http_client.py --image_dir=../../doc/imgs > ../../tests/output/server_infer_cpu_usemkldnn_True_threads_1_batchsize_1.log 2>&1 ! +Run successfully with command - xxxxx +... +``` + +运行失败时会输出: + +``` +Run failed with command - python3.7 pipeline_http_client.py --image_dir=../../doc/imgs > ../../tests/output/server_infer_cpu_usemkldnn_True_threads_1_batchsize_1.log 2>&1 ! +Run failed with command - python3.7 pipeline_http_client.py --image_dir=../../doc/imgs > ../../tests/output/server_infer_cpu_usemkldnn_True_threads_6_batchsize_1.log 2>&1 ! +Run failed with command - xxxxx +... +``` + +详细的预测结果会存在 test_tipc/output/ 文件夹下,例如`server_infer_gpu_usetrt_True_precision_fp16_batchsize_1.log`中会返回检测框的坐标: + +``` +{'err_no': 0, 'err_msg': '', 'key': ['dt_boxes'], 'value': ['[[[ 78. 642.]\n [409. 640.]\n [409. 657.]\n +[ 78. 659.]]\n\n [[ 75. 614.]\n [211. 614.]\n [211. 635.]\n [ 75. 635.]]\n\n +[[103. 554.]\n [135. 554.]\n [135. 575.]\n [103. 575.]]\n\n [[ 75. 531.]\n +[347. 531.]\n [347. 549.]\n [ 75. 549.] ]\n\n [[ 76. 503.]\n [309. 498.]\n +[309. 521.]\n [ 76. 526.]]\n\n [[163. 462.]\n [317. 462.]\n [317. 493.]\n +[163. 493.]]\n\n [[324. 431.]\n [414. 431.]\n [414. 452.]\n [324. 452.]]\n\n +[[ 76. 412.]\n [208. 408.]\n [209. 424.]\n [ 76. 428.]]\n\n [[307. 409.]\n +[428. 409.]\n [428. 426.]\n [307 . 426.]]\n\n [[ 74. 385.]\n [217. 382.]\n +[217. 400.]\n [ 74. 403.]]\n\n [[308. 381.]\n [427. 380.]\n [427. 400.]\n +[308. 401.]]\n\n [[ 74. 363.]\n [195. 362.]\n [195. 378.]\n [ 74. 379.]]\n\n +[[303. 359.]\n [423. 357.]\n [423. 375.]\n [303. 377.]]\n\n [[ 70. 336.]\n +[239. 334.]\n [239. 354.]\ n [ 70. 356.]]\n\n [[ 70. 312.]\n [204. 310.]\n +[204. 327.]\n [ 70. 330.]]\n\n [[303. 308.]\n [419. 306.]\n [419. 326.]\n +[303. 328.]]\n\n [[113. 2 72.]\n [246. 270.]\n [247. 299.]\n [113. 301.]]\n\n + [[361. 269.]\n [384. 269.]\n [384. 296.]\n [361. 296.]]\n\n [[ 70. 250.]\n + [243. 246.]\n [243. 265.]\n [ 70. 269.]]\n\n [[ 65. 221.]\n [187. 220.]\n +[187. 240.]\n [ 65. 241.]]\n\n [[337. 216.]\n [382. 216.]\n [382. 240.]\n +[337. 240.]]\n\n [ [ 65. 196.]\n [247. 193.]\n [247. 213.]\n [ 65. 216.]]\n\n +[[296. 197.]\n [423. 191.]\n [424. 209.]\n [296. 215.]]\n\n [[ 65. 167.]\n [244. 167.]\n +[244. 186.]\n [ 65. 186.]]\n\n [[ 67. 139.]\n [290. 139.]\n [290. 159.]\n [ 67. 159.]]\n\n +[[ 68. 113.]\n [410. 113.]\n [410. 128.]\n [ 68. 129.] ]\n\n [[277. 87.]\n [416. 87.]\n +[416. 108.]\n [277. 108.]]\n\n [[ 79. 28.]\n [132. 28.]\n [132. 62.]\n [ 79. 62.]]\n\n +[[163. 17.]\n [410. 14.]\n [410. 50.]\n [163. 53.]]]']} +``` + + +## 3. 更多教程 + +本文档为功能测试用,更详细的Serving预测使用教程请参考:[PPOCR 服务化部署](https://github.com/PaddlePaddle/PaddleOCR/blob/dygraph/deploy/pdserving/README_CN.md) diff --git a/PTDN/docs/test_train_inference_python.md b/test_tipc/docs/test_train_inference_python.md similarity index 62% rename from PTDN/docs/test_train_inference_python.md rename to test_tipc/docs/test_train_inference_python.md index 8c468ffd34fcd7d949331c9097c7993ca7a1e391..1b4dfe56226974b2de7a1e08d296c4273f81c898 100644 --- a/PTDN/docs/test_train_inference_python.md +++ b/test_tipc/docs/test_train_inference_python.md @@ -19,7 +19,7 @@ - 预测相关:基于训练是否使用量化,可以将训练产出的模型可以分为`正常模型`和`量化模型`,这两类模型对应的预测功能汇总如下, -| 模型类型 |device | batchsize | tensorrt | mkldnn | cpu多线程 | +| 模型类型 |device | batchsize | tensorrt | mkldnn | cpu多线程 | | ---- | ---- | ---- | :----: | :----: | :----: | | 正常模型 | GPU | 1/6 | fp32/fp16 | - | - | | 正常模型 | CPU | 1/6 | - | fp32 | 支持 | @@ -46,42 +46,42 @@ ### 2.2 功能测试 -先运行`prepare.sh`准备数据和模型,然后运行`test_train_inference_python.sh`进行测试,最终在```tests/output```目录下生成`python_infer_*.log`格式的日志文件。 +先运行`prepare.sh`准备数据和模型,然后运行`test_train_inference_python.sh`进行测试,最终在```test_tipc/output```目录下生成`python_infer_*.log`格式的日志文件。 `test_train_inference_python.sh`包含5种运行模式,每种模式的运行数据不同,分别用于测试速度和精度,分别是: -- 模式1:lite_train_infer,使用少量数据训练,用于快速验证训练到预测的走通流程,不验证精度和速度; +- 模式1:lite_train_lite_infer,使用少量数据训练,用于快速验证训练到预测的走通流程,不验证精度和速度; ```shell -bash tests/prepare.sh ./tests/configs/ppocr_det_mobile_params.txt 'lite_train_infer' -bash tests/test_train_inference_python.sh ./tests/configs/ppocr_det_mobile_params.txt 'lite_train_infer' +bash test_tipc/prepare.sh ./test_tipc/configs/ppocr_det_mobile_params.txt 'lite_train_lite_infer' +bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ppocr_det_mobile_params.txt 'lite_train_lite_infer' ``` -- 模式2:whole_infer,使用少量数据训练,一定量数据预测,用于验证训练后的模型执行预测,预测速度是否合理; +- 模式2:lite_train_whole_infer,使用少量数据训练,一定量数据预测,用于验证训练后的模型执行预测,预测速度是否合理; ```shell -bash tests/prepare.sh ./tests/configs/ppocr_det_mobile_params.txt 'whole_infer' -bash tests/test_train_inference_python.sh ./tests/configs/ppocr_det_mobile_params.txt 'whole_infer' +bash test_tipc/prepare.sh ./test_tipc/configs/ppocr_det_mobile_params.txt 'lite_train_whole_infer' +bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ppocr_det_mobile_params.txt 'lite_train_whole_infer' ``` -- 模式3:infer,不训练,全量数据预测,走通开源模型评估、动转静,检查inference model预测时间和精度; +- 模式3:whole_infer,不训练,全量数据预测,走通开源模型评估、动转静,检查inference model预测时间和精度; ```shell -bash tests/prepare.sh ./tests/configs/ppocr_det_mobile_params.txt 'infer' +bash test_tipc/prepare.sh ./test_tipc/configs/ppocr_det_mobile_params.txt 'whole_infer' # 用法1: -bash tests/test_train_inference_python.sh ./tests/configs/ppocr_det_mobile_params.txt 'infer' +bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ppocr_det_mobile_params.txt 'whole_infer' # 用法2: 指定GPU卡预测,第三个传入参数为GPU卡号 -bash tests/test_train_inference_python.sh ./tests/configs/ppocr_det_mobile_params.txt 'infer' '1' +bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ppocr_det_mobile_params.txt 'whole_infer' '1' ``` -- 模式4:whole_train_infer,CE: 全量数据训练,全量数据预测,验证模型训练精度,预测精度,预测速度; +- 模式4:whole_train_whole_infer,CE: 全量数据训练,全量数据预测,验证模型训练精度,预测精度,预测速度; ```shell -bash tests/prepare.sh ./tests/configs/ppocr_det_mobile_params.txt 'whole_train_infer' -bash tests/test_train_inference_python.sh ./tests/configs/ppocr_det_mobile_params.txt 'whole_train_infer' +bash test_tipc/prepare.sh ./test_tipc/configs/ppocr_det_mobile_params.txt 'whole_train_whole_infer' +bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ppocr_det_mobile_params.txt 'whole_train_whole_infer' ``` -- 模式5:klquant_infer,测试离线量化; +- 模式5:klquant_whole_infer,测试离线量化; ```shell -bash tests/prepare.sh ./tests/configs/ppocr_det_mobile_params.txt 'klquant_infer' -bash tests/test_train_inference_python.sh tests/configs/ppocr_det_mobile_params.txt 'klquant_infer' +bash test_tipc/prepare.sh ./test_tipc/configs/ppocr_det_mobile_params.txt 'klquant_whole_infer' +bash test_tipc/test_train_inference_python.sh test_tipc/configs/ppocr_det_mobile_params.txt 'klquant_whole_infer' ``` @@ -95,12 +95,12 @@ bash tests/test_train_inference_python.sh tests/configs/ppocr_det_mobile_params. #### 使用方式 运行命令: ```shell -python3.7 tests/compare_results.py --gt_file=./tests/results/python_*.txt --log_file=./tests/output/python_*.log --atol=1e-3 --rtol=1e-3 +python3.7 test_tipc/compare_results.py --gt_file=./test_tipc/results/python_*.txt --log_file=./test_tipc/output/python_*.log --atol=1e-3 --rtol=1e-3 ``` 参数介绍: -- gt_file: 指向事先保存好的预测结果路径,支持*.txt 结尾,会自动索引*.txt格式的文件,文件默认保存在tests/result/ 文件夹下 -- log_file: 指向运行tests/test.sh 脚本的infer模式保存的预测日志,预测日志中打印的有预测结果,比如:文本框,预测文本,类别等等,同样支持infer_*.log格式传入 +- gt_file: 指向事先保存好的预测结果路径,支持*.txt 结尾,会自动索引*.txt格式的文件,文件默认保存在test_tipc/result/ 文件夹下 +- log_file: 指向运行test_tipc/test_train_inference_python.sh 脚本的infer模式保存的预测日志,预测日志中打印的有预测结果,比如:文本框,预测文本,类别等等,同样支持python_infer_*.log格式传入 - atol: 设置的绝对误差 - rtol: 设置的相对误差 diff --git a/PTDN/prepare.sh b/test_tipc/prepare.sh similarity index 67% rename from PTDN/prepare.sh rename to test_tipc/prepare.sh index d842f4f573d0b1bd697bdad9b67a765ebcf6da6c..fba898e0ffdb078f8c3eb278605c8dd2c86c70a3 100644 --- a/PTDN/prepare.sh +++ b/test_tipc/prepare.sh @@ -1,8 +1,9 @@ #!/bin/bash FILENAME=$1 -# MODE be one of ['lite_train_infer' 'whole_infer' 'whole_train_infer', 'infer', -# 'cpp_infer', 'serving_infer', 'klquant_infer'] +# MODE be one of ['lite_train_lite_infer' 'lite_train_whole_infer' 'whole_train_whole_infer', +# 'whole_infer', 'klquant_whole_infer', +# 'cpp_infer', 'serving_infer', 'lite_infer'] MODE=$2 @@ -34,10 +35,14 @@ trainer_list=$(func_parser_value "${lines[14]}") # MODE be one of ['lite_train_infer' 'whole_infer' 'whole_train_infer'] MODE=$2 -if [ ${MODE} = "lite_train_infer" ];then +if [ ${MODE} = "lite_train_lite_infer" ];then # pretrain lite train data wget -nc -P ./pretrain_models/ https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_large_x0_5_pretrained.pdparams wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_mv3_db_v2.0_train.tar + if [ ${model_name} == "PPOCRv2_ocr_det" ]; then + wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_distill_train.tar + cd ./pretrain_models/ && tar xf ch_PP-OCRv2_det_distill_train.tar && cd ../ + fi cd ./pretrain_models/ && tar xf det_mv3_db_v2.0_train.tar && cd ../ rm -rf ./train_data/icdar2015 rm -rf ./train_data/ic15_data @@ -50,14 +55,18 @@ if [ ${MODE} = "lite_train_infer" ];then ln -s ./icdar2015_lite ./icdar2015 cd ../ cd ./inference && tar xf rec_inference.tar && cd ../ -elif [ ${MODE} = "whole_train_infer" ];then +elif [ ${MODE} = "whole_train_whole_infer" ];then wget -nc -P ./pretrain_models/ https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_large_x0_5_pretrained.pdparams rm -rf ./train_data/icdar2015 rm -rf ./train_data/ic15_data wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/icdar2015.tar wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/ic15_data.tar cd ./train_data/ && tar xf icdar2015.tar && tar xf ic15_data.tar && cd ../ -elif [ ${MODE} = "whole_infer" ];then + if [ ${model_name} == "PPOCRv2_ocr_det" ]; then + wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_distill_train.tar + cd ./pretrain_models/ && tar xf ch_PP-OCRv2_det_distill_train.tar && cd ../ + fi +elif [ ${MODE} = "lite_train_whole_infer" ];then wget -nc -P ./pretrain_models/ https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_large_x0_5_pretrained.pdparams rm -rf ./train_data/icdar2015 rm -rf ./train_data/ic15_data @@ -66,7 +75,11 @@ elif [ ${MODE} = "whole_infer" ];then cd ./train_data/ && tar xf icdar2015_infer.tar && tar xf ic15_data.tar ln -s ./icdar2015_infer ./icdar2015 cd ../ -elif [ ${MODE} = "infer" ];then + if [ ${model_name} == "PPOCRv2_ocr_det" ]; then + wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_distill_train.tar + cd ./pretrain_models/ && tar xf ch_PP-OCRv2_det_distill_train.tar && cd ../ + fi +elif [ ${MODE} = "whole_infer" ];then if [ ${model_name} = "ocr_det" ]; then eval_model_name="ch_ppocr_mobile_v2.0_det_train" rm -rf ./train_data/icdar2015 @@ -100,13 +113,29 @@ elif [ ${MODE} = "infer" ];then wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_infer.tar cd ./inference && tar xf ${eval_model_name}.tar && tar xf rec_inference.tar && cd ../ fi -elif [ ${MODE} = "klquant_infer" ];then + + elif [ ${model_name} = "PPOCRv2_ocr_det" ]; then + eval_model_name="ch_PP-OCRv2_det_infer" + wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/ch_det_data_50.tar + wget -nc -P ./inference/ https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_infer.tar + cd ./inference && tar xf ${eval_model_name}.tar && tar xf ch_det_data_50.tar && cd ../ + fi + +if [ ${MODE} = "klquant_whole_infer" ]; then if [ ${model_name} = "ocr_det" ]; then wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/ch_det_data_50.tar cd ./inference && tar xf ch_ppocr_mobile_v2.0_det_infer.tar && tar xf ch_det_data_50.tar && cd ../ fi -elif [ ${MODE} = "cpp_infer" ];then + if [ ${model_name} = "PPOCRv2_ocr_det" ]; then + eval_model_name="ch_PP-OCRv2_det_infer" + wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/ch_det_data_50.tar + wget -nc -P ./inference/ https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_infer.tar + cd ./inference && tar xf ${eval_model_name}.tar && tar xf ch_det_data_50.tar && cd ../ + fi +fi + +if [ ${MODE} = "cpp_infer" ];then if [ ${model_name} = "ocr_det" ]; then wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/ch_det_data_50.tar wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar @@ -136,3 +165,37 @@ if [ ${MODE} = "serving_infer" ];then wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_infer.tar cd ./inference && tar xf ch_ppocr_mobile_v2.0_det_infer.tar && tar xf ch_ppocr_mobile_v2.0_rec_infer.tar && tar xf ch_ppocr_server_v2.0_rec_infer.tar && tar xf ch_ppocr_server_v2.0_det_infer.tar && cd ../ fi + + +if [ ${MODE} = "lite_infer" ];then + # prepare lite nb model and test data + current_dir=${PWD} + wget -nc -P ./models https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/ch_ppocr_mobile_v2.0_det_opt.nb + wget -nc -P ./models https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/ch_ppocr_mobile_v2.0_det_slim_opt.nb + wget -nc -P ./test_data https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/icdar2015_lite.tar + cd ./test_data && tar -xf icdar2015_lite.tar && rm icdar2015_lite.tar && cd ../ + # prepare lite env + export http_proxy=http://172.19.57.45:3128 + export https_proxy=http://172.19.57.45:3128 + paddlelite_url=https://github.com/PaddlePaddle/Paddle-Lite/releases/download/v2.9/inference_lite_lib.android.armv8.gcc.c++_shared.with_extra.with_cv.tar.gz + paddlelite_zipfile=$(echo $paddlelite_url | awk -F "/" '{print $NF}') + paddlelite_file=inference_lite_lib.android.armv8.gcc.c++_shared.with_extra.with_cv + wget ${paddlelite_url} + tar -xf ${paddlelite_zipfile} + mkdir -p ${paddlelite_file}/demo/cxx/ocr/test_lite + mv models test_data ${paddlelite_file}/demo/cxx/ocr/test_lite + cp ppocr/utils/ppocr_keys_v1.txt deploy/lite/config.txt ${paddlelite_file}/demo/cxx/ocr/test_lite + cp ./deploy/lite/* ${paddlelite_file}/demo/cxx/ocr/ + cp ${paddlelite_file}/cxx/lib/libpaddle_light_api_shared.so ${paddlelite_file}/demo/cxx/ocr/test_lite + cp PTDN/configs/ppocr_det_mobile_params.txt PTDN/test_lite.sh PTDN/common_func.sh ${paddlelite_file}/demo/cxx/ocr/test_lite + cd ${paddlelite_file}/demo/cxx/ocr/ + git clone https://github.com/LDOUBLEV/AutoLog.git + unset http_proxy + unset https_proxy + make -j + sleep 1 + make -j + cp ocr_db_crnn test_lite && cp test_lite/libpaddle_light_api_shared.so test_lite/libc++_shared.so + tar -cf test_lite.tar ./test_lite && cp test_lite.tar ${current_dir} && cd ${current_dir} +fi + diff --git a/PTDN/readme.md b/test_tipc/readme.md similarity index 70% rename from PTDN/readme.md rename to test_tipc/readme.md index 69977fac00482b11e862a7ee83bf9359ac48ffb8..1d8df7da6cf6d1319cedd329e4202fa674e8538b 100644 --- a/PTDN/readme.md +++ b/test_tipc/readme.md @@ -1,9 +1,9 @@ -# 推理部署导航 +# 飞桨训推一体认证 ## 1. 简介 -飞桨除了基本的模型训练和预测,还提供了支持多端多平台的高性能推理部署工具。本文档提供了PaddleOCR中所有模型的推理部署导航PTDN(Paddle Train Deploy Navigation),方便用户查阅每种模型的推理部署打通情况,并可以进行一键测试。 +飞桨除了基本的模型训练和预测,还提供了支持多端多平台的高性能推理部署工具。本文档提供了PaddleOCR中所有模型的飞桨训推一体认证 (Training and Inference Pipeline Certification(TIPC)) 信息和测试工具,方便用户查阅每种模型的训练推理部署打通情况,并可以进行一键测试。
@@ -15,20 +15,23 @@ **字段说明:** - 基础训练预测:包括模型训练、Paddle Inference Python预测。 -- 其他:包括Paddle Inference C++预测、Paddle Serving部署、Paddle-Lite部署等。 +- 更多训练方式:包括多机多卡、混合精度。 +- 模型压缩:包括裁剪、离线/在线量化、蒸馏。 +- 其他预测部署:包括Paddle Inference C++预测、Paddle Serving部署、Paddle-Lite部署等。 +更详细的mkldnn、Tensorrt等预测加速相关功能的支持情况可以查看各测试工具的[更多教程](#more)。 -| 算法论文 | 模型名称 | 模型类型 | 基础训练预测 | 其他 | -| :--- | :--- | :----: | :--------: | :---- | -| DB |ch_ppocr_mobile_v2.0_det | 检测 | 支持 | Paddle Inference: C++
Paddle Serving: Python, C++
Paddle-Lite:
(1) ARM CPU(C++) | -| DB |ch_ppocr_server_v2.0_det | 检测 | 支持 | Paddle Inference: C++
Paddle Serving: Python, C++
Paddle-Lite:
(1) ARM CPU(C++) | +| 算法论文 | 模型名称 | 模型类型 | 基础
训练预测 | 更多
训练方式 | 模型压缩 | 其他预测部署 | +| :--- | :--- | :----: | :--------: | :---- | :---- | :---- | +| DB |ch_ppocr_mobile_v2.0_det | 检测 | 支持 | 多机多卡
混合精度 | FPGM裁剪
离线量化| Paddle Inference: C++
Paddle Serving: Python, C++
Paddle-Lite:
(1) ARM CPU(C++) | +| DB |ch_ppocr_server_v2.0_det | 检测 | 支持 | 多机多卡
混合精度 | FPGM裁剪
离线量化| Paddle Inference: C++
Paddle Serving: Python, C++
Paddle-Lite:
(1) ARM CPU(C++) | | DB |ch_PP-OCRv2_det | 检测 | -| CRNN |ch_ppocr_mobile_v2.0_rec | 识别 | 支持 | Paddle Inference: C++
Paddle Serving: Python, C++
Paddle-Lite:
(1) ARM CPU(C++) | -| CRNN |ch_ppocr_server_v2.0_rec | 识别 | 支持 | Paddle Inference: C++
Paddle Serving: Python, C++
Paddle-Lite:
(1) ARM CPU(C++) | +| CRNN |ch_ppocr_mobile_v2.0_rec | 识别 | 支持 | 多机多卡
混合精度 | PACT量化
离线量化| Paddle Inference: C++
Paddle Serving: Python, C++
Paddle-Lite:
(1) ARM CPU(C++) | +| CRNN |ch_ppocr_server_v2.0_rec | 识别 | 支持 | 多机多卡
混合精度 | PACT量化
离线量化| Paddle Inference: C++
Paddle Serving: Python, C++
Paddle-Lite:
(1) ARM CPU(C++) | | CRNN |ch_PP-OCRv2_rec | 识别 | -| PP-OCR |ch_ppocr_mobile_v2.0 | 检测+识别 | 支持 | Paddle Inference: C++
Paddle Serving: Python, C++
Paddle-Lite:
(1) ARM CPU(C++) | -| PP-OCR |ch_ppocr_server_v2.0 | 检测+识别 | 支持 | Paddle Inference: C++
Paddle Serving: Python, C++
Paddle-Lite:
(1) ARM CPU(C++) | -|PP-OCRv2|ch_PP-OCRv2 | 检测+识别 | 支持 | Paddle Inference: C++
Paddle Serving: Python, C++
Paddle-Lite:
(1) ARM CPU(C++) | +| PP-OCR |ch_ppocr_mobile_v2.0 | 检测+识别 | 支持 | 多机多卡
混合精度 | - | Paddle Inference: C++
Paddle Serving: Python, C++
Paddle-Lite:
(1) ARM CPU(C++) | +| PP-OCR |ch_ppocr_server_v2.0 | 检测+识别 | 支持 | 多机多卡
混合精度 | - | Paddle Inference: C++
Paddle Serving: Python, C++
Paddle-Lite:
(1) ARM CPU(C++) | +|PP-OCRv2|ch_PP-OCRv2 | 检测+识别 | | DB |det_mv3_db_v2.0 | 检测 | | DB |det_r50_vd_db_v2.0 | 检测 | | EAST |det_mv3_east_v2.0 | 检测 | @@ -55,7 +58,7 @@ ### 目录介绍 ```shell -PTDN/ +test_tipc/ ├── configs/ # 配置文件目录 ├── det_mv3_db.yml # 测试mobile版ppocr检测模型训练的yml文件 ├── det_r50_vd_db.yml # 测试server版ppocr检测模型训练的yml文件 @@ -66,7 +69,7 @@ PTDN/ ├── ppocr_sys_server_params.txt # 测试server版ppocr检测+识别模型串联的参数配置文件 ├── ppocr_det_server_params.txt # 测试server版ppocr检测模型的参数配置文件 ├── ppocr_rec_server_params.txt # 测试server版ppocr识别模型的参数配置文件 - ├── ... + ├── ... ├── results/ # 预先保存的预测结果,用于和实际预测结果进行精读比对 ├── python_ppocr_det_mobile_results_fp32.txt # 预存的mobile版ppocr检测模型python预测fp32精度的结果 ├── python_ppocr_det_mobile_results_fp16.txt # 预存的mobile版ppocr检测模型python预测fp16精度的结果 @@ -98,6 +101,8 @@ PTDN/ - `test_serving.sh`:测试基于Paddle Serving的服务化部署功能。 - `test_lite.sh`:测试基于Paddle-Lite的端侧预测部署功能。 + +#### 更多教程 各功能测试中涉及混合精度、裁剪、量化等训练相关,及mkldnn、Tensorrt等多种预测相关参数配置,请点击下方相应链接了解更多细节和使用教程: [test_train_inference_python 使用](docs/test_train_inference_python.md) [test_inference_cpp 使用](docs/test_inference_cpp.md) diff --git a/PTDN/results/cpp_ppocr_det_mobile_results_fp16.txt b/test_tipc/results/cpp_ppocr_det_mobile_results_fp16.txt similarity index 100% rename from PTDN/results/cpp_ppocr_det_mobile_results_fp16.txt rename to test_tipc/results/cpp_ppocr_det_mobile_results_fp16.txt diff --git a/PTDN/results/cpp_ppocr_det_mobile_results_fp32.txt b/test_tipc/results/cpp_ppocr_det_mobile_results_fp32.txt similarity index 100% rename from PTDN/results/cpp_ppocr_det_mobile_results_fp32.txt rename to test_tipc/results/cpp_ppocr_det_mobile_results_fp32.txt diff --git a/PTDN/results/python_ppocr_det_mobile_results_fp16.txt b/test_tipc/results/python_ppocr_det_mobile_results_fp16.txt similarity index 100% rename from PTDN/results/python_ppocr_det_mobile_results_fp16.txt rename to test_tipc/results/python_ppocr_det_mobile_results_fp16.txt diff --git a/PTDN/results/python_ppocr_det_mobile_results_fp32.txt b/test_tipc/results/python_ppocr_det_mobile_results_fp32.txt similarity index 100% rename from PTDN/results/python_ppocr_det_mobile_results_fp32.txt rename to test_tipc/results/python_ppocr_det_mobile_results_fp32.txt diff --git a/PTDN/test_inference_cpp.sh b/test_tipc/test_inference_cpp.sh similarity index 99% rename from PTDN/test_inference_cpp.sh rename to test_tipc/test_inference_cpp.sh index 124bdacb7dad04bdea07a62ba9c86b248be5a06d..3f8b54b189349aa9c011a56f6f12752b771ce43e 100644 --- a/PTDN/test_inference_cpp.sh +++ b/test_tipc/test_inference_cpp.sh @@ -1,5 +1,5 @@ #!/bin/bash -source tests/common_func.sh +source test_tipc/common_func.sh FILENAME=$1 dataline=$(awk 'NR==52, NR==66{print}' $FILENAME) @@ -35,7 +35,7 @@ cpp_benchmark_key=$(func_parser_key "${lines[14]}") cpp_benchmark_value=$(func_parser_value "${lines[14]}") -LOG_PATH="./tests/output" +LOG_PATH="./test_tipc/output" mkdir -p ${LOG_PATH} status_log="${LOG_PATH}/results_cpp.log" diff --git a/test_tipc/test_lite.sh b/test_tipc/test_lite.sh new file mode 100644 index 0000000000000000000000000000000000000000..832003ba302fe86995e20029cdb019e72d9ce162 --- /dev/null +++ b/test_tipc/test_lite.sh @@ -0,0 +1,69 @@ +#!/bin/bash +source ./common_func.sh +export LD_LIBRARY_PATH=${PWD}:$LD_LIBRARY_PATH + +FILENAME=$1 +dataline=$(awk 'NR==101, NR==110{print}' $FILENAME) +echo $dataline +# parser params +IFS=$'\n' +lines=(${dataline}) + +# parser lite inference +lite_inference_cmd=$(func_parser_value "${lines[1]}") +lite_model_dir_list=$(func_parser_value "${lines[2]}") +lite_cpu_threads_list=$(func_parser_value "${lines[3]}") +lite_batch_size_list=$(func_parser_value "${lines[4]}") +lite_power_mode_list=$(func_parser_value "${lines[5]}") +lite_infer_img_dir_list=$(func_parser_value "${lines[6]}") +lite_config_dir=$(func_parser_value "${lines[7]}") +lite_rec_dict_dir=$(func_parser_value "${lines[8]}") +lite_benchmark_value=$(func_parser_value "${lines[9]}") + +LOG_PATH="./output" +mkdir -p ${LOG_PATH} +status_log="${LOG_PATH}/results.log" + + +function func_lite(){ + IFS='|' + _script=$1 + _lite_model=$2 + _log_path=$3 + _img_dir=$4 + _config=$5 + if [[ $lite_model =~ "slim" ]]; then + precision="INT8" + else + precision="FP32" + fi + is_single_img=$(echo $_img_dir | grep -E ".jpg|.jpeg|.png|.JPEG|.JPG") + if [[ "$is_single_img" != "" ]]; then + single_img="True" + else + single_img="False" + fi + + # lite inference + for num_threads in ${lite_cpu_threads_list[*]}; do + for power_mode in ${lite_power_mode_list[*]}; do + for batchsize in ${lite_batch_size_list[*]}; do + model_name=$(echo $lite_model | awk -F "/" '{print $NF}') + _save_log_path="${_log_path}/lite_${model_name}_precision_${precision}_batchsize_${batchsize}_threads_${num_threads}_powermode_${power_mode}_singleimg_${single_img}.log" + command="${_script} ${lite_model} ${precision} ${num_threads} ${batchsize} ${power_mode} ${_img_dir} ${_config} ${lite_benchmark_value} > ${_save_log_path} 2>&1" + eval ${command} + status_check $? "${command}" "${status_log}" + done + done + done +} + + +echo "################### run test ###################" +IFS="|" +for lite_model in ${lite_model_dir_list[*]}; do + #run lite inference + for img_dir in ${lite_infer_img_dir_list[*]}; do + func_lite "${lite_inference_cmd}" "${lite_model}" "${LOG_PATH}" "${img_dir}" "${lite_config_dir}" + done +done diff --git a/PTDN/test_serving.sh b/test_tipc/test_serving.sh similarity index 98% rename from PTDN/test_serving.sh rename to test_tipc/test_serving.sh index ec79a46c9bf4b51c16b1c0ddfff41b772b13b0ae..be7b594c3848c423937c59336ce3bf686f8f228d 100644 --- a/PTDN/test_serving.sh +++ b/test_tipc/test_serving.sh @@ -1,5 +1,5 @@ #!/bin/bash -source tests/common_func.sh +source test_tipc/common_func.sh FILENAME=$1 dataline=$(awk 'NR==67, NR==83{print}' $FILENAME) @@ -36,8 +36,8 @@ web_precision_key=$(func_parser_key "${lines[15]}") web_precision_list=$(func_parser_value "${lines[15]}") pipeline_py=$(func_parser_value "${lines[16]}") -LOG_PATH="../../tests/output" -mkdir -p ./tests/output +LOG_PATH="../../test_tipc/output" +mkdir -p ./test_tipc/output status_log="${LOG_PATH}/results_serving.log" function func_serving(){ diff --git a/PTDN/test_train_inference_python.sh b/test_tipc/test_train_inference_python.sh similarity index 91% rename from PTDN/test_train_inference_python.sh rename to test_tipc/test_train_inference_python.sh index 28cc037801bb4c1f1bcc10a74855b8c146197f4d..a9be33ea45cc56b5478e9135451849e25888f8d1 100644 --- a/PTDN/test_train_inference_python.sh +++ b/test_tipc/test_train_inference_python.sh @@ -1,8 +1,8 @@ #!/bin/bash -source tests/common_func.sh +source test_tipc/common_func.sh FILENAME=$1 -# MODE be one of ['lite_train_infer' 'whole_infer' 'whole_train_infer', 'infer', 'klquant_infer'] +# MODE be one of ['lite_train_lite_infer' 'lite_train_whole_infer' 'whole_train_whole_infer', 'whole_infer', 'klquant_whole_infer'] MODE=$2 dataline=$(awk 'NR==1, NR==51{print}' $FILENAME) @@ -59,6 +59,7 @@ export_key1=$(func_parser_key "${lines[33]}") export_value1=$(func_parser_value "${lines[33]}") export_key2=$(func_parser_key "${lines[34]}") export_value2=$(func_parser_value "${lines[34]}") +inference_dir=$(func_parser_value "${lines[35]}") # parser inference model infer_model_dir_list=$(func_parser_value "${lines[36]}") @@ -88,7 +89,7 @@ infer_key1=$(func_parser_key "${lines[50]}") infer_value1=$(func_parser_value "${lines[50]}") # parser klquant_infer -if [ ${MODE} = "klquant_infer" ]; then +if [ ${MODE} = "klquant_whole_infer" ]; then dataline=$(awk 'NR==82, NR==98{print}' $FILENAME) lines=(${dataline}) # parser inference model @@ -119,7 +120,7 @@ if [ ${MODE} = "klquant_infer" ]; then infer_value1=$(func_parser_value "${lines[15]}") fi -LOG_PATH="./tests/output" +LOG_PATH="./test_tipc/output" mkdir -p ${LOG_PATH} status_log="${LOG_PATH}/results_python.log" @@ -202,7 +203,7 @@ function func_inference(){ done } -if [ ${MODE} = "infer" ] || [ ${MODE} = "klquant_infer" ]; then +if [ ${MODE} = "whole_infer" ] || [ ${MODE} = "klquant_whole_infer" ]; then GPUID=$3 if [ ${#GPUID} -le 0 ];then env=" " @@ -245,6 +246,7 @@ else for gpu in ${gpu_list[*]}; do use_gpu=${USE_GPU_KEY[Count]} Count=$(($Count + 1)) + ips="" if [ ${gpu} = "-1" ];then env="" elif [ ${#gpu} -le 1 ];then @@ -264,6 +266,11 @@ else env=" " fi for autocast in ${autocast_list[*]}; do + if [ ${autocast} = "amp" ]; then + set_amp_config="Global.use_amp=True Global.scale_loss=1024.0 Global.use_dynamic_loss_scaling=True" + else + set_amp_config=" " + fi for trainer in ${trainer_list[*]}; do flag_quant=False if [ ${trainer} = ${pact_key} ]; then @@ -290,7 +297,6 @@ else if [ ${run_train} = "null" ]; then continue fi - set_autocast=$(func_set_params "${autocast_key}" "${autocast}") set_epoch=$(func_set_params "${epoch_key}" "${epoch_num}") set_pretrain=$(func_set_params "${pretrain_model_key}" "${pretrain_model_value}") @@ -306,11 +312,11 @@ else set_save_model=$(func_set_params "${save_model_key}" "${save_log}") if [ ${#gpu} -le 2 ];then # train with cpu or single gpu - cmd="${python} ${run_train} ${set_use_gpu} ${set_save_model} ${set_epoch} ${set_pretrain} ${set_autocast} ${set_batchsize} ${set_train_params1} " - elif [ ${#gpu} -le 15 ];then # train with multi-gpu - cmd="${python} -m paddle.distributed.launch --gpus=${gpu} ${run_train} ${set_save_model} ${set_epoch} ${set_pretrain} ${set_autocast} ${set_batchsize} ${set_train_params1}" + cmd="${python} ${run_train} ${set_use_gpu} ${set_save_model} ${set_epoch} ${set_pretrain} ${set_autocast} ${set_batchsize} ${set_train_params1} ${set_amp_config} " + elif [ ${#ips} -le 26 ];then # train with multi-gpu + cmd="${python} -m paddle.distributed.launch --gpus=${gpu} ${run_train} ${set_use_gpu} ${set_save_model} ${set_epoch} ${set_pretrain} ${set_autocast} ${set_batchsize} ${set_train_params1} ${set_amp_config}" else # train with multi-machine - cmd="${python} -m paddle.distributed.launch --ips=${ips} --gpus=${gpu} ${run_train} ${set_save_model} ${set_pretrain} ${set_epoch} ${set_autocast} ${set_batchsize} ${set_train_params1}" + cmd="${python} -m paddle.distributed.launch --ips=${ips} --gpus=${gpu} ${set_use_gpu} ${run_train} ${set_save_model} ${set_pretrain} ${set_epoch} ${set_autocast} ${set_batchsize} ${set_train_params1} ${set_amp_config}" fi # run train eval "unset CUDA_VISIBLE_DEVICES" @@ -342,7 +348,13 @@ else #run inference eval $env save_infer_path="${save_log}" - func_inference "${python}" "${inference_py}" "${save_infer_path}" "${LOG_PATH}" "${train_infer_img_dir}" "${flag_quant}" + if [ ${inference_dir} != "null" ] && [ ${inference_dir} != '##' ]; then + infer_model_dir="${save_infer_path}/${inference_dir}" + else + infer_model_dir=${save_infer_path} + fi + func_inference "${python}" "${inference_py}" "${infer_model_dir}" "${LOG_PATH}" "${train_infer_img_dir}" "${flag_quant}" + eval "unset CUDA_VISIBLE_DEVICES" fi done # done with: for trainer in ${trainer_list[*]}; do diff --git a/tools/program.py b/tools/program.py index 798e6dff297ad1149942488cca1d5540f1924867..d110f70704028948dff2bc889e07d128e0bc94ea 100755 --- a/tools/program.py +++ b/tools/program.py @@ -159,7 +159,8 @@ def train(config, eval_class, pre_best_model_dict, logger, - vdl_writer=None): + vdl_writer=None, + scaler=None): cal_metric_during_train = config['Global'].get('cal_metric_during_train', False) log_smooth_window = config['Global']['log_smooth_window'] @@ -211,33 +212,49 @@ def train(config, for epoch in range(start_epoch, epoch_num + 1): train_dataloader = build_dataloader( config, 'Train', device, logger, seed=epoch) - train_batch_cost = 0.0 train_reader_cost = 0.0 - batch_sum = 0 - batch_start = time.time() + train_run_cost = 0.0 + total_samples = 0 + reader_start = time.time() max_iter = len(train_dataloader) - 1 if platform.system( ) == "Windows" else len(train_dataloader) for idx, batch in enumerate(train_dataloader): profiler.add_profiler_step(profiler_options) - train_reader_cost += time.time() - batch_start + train_reader_cost += time.time() - reader_start if idx >= max_iter: break lr = optimizer.get_lr() images = batch[0] if use_srn: model_average = True - if model_type == 'table' or extra_input: - preds = model(images, data=batch[1:]) + + train_start = time.time() + # use amp + if scaler: + with paddle.amp.auto_cast(): + if model_type == 'table' or extra_input: + preds = model(images, data=batch[1:]) + else: + preds = model(images) else: - preds = model(images) + if model_type == 'table' or extra_input: + preds = model(images, data=batch[1:]) + else: + preds = model(images) loss = loss_class(preds, batch) avg_loss = loss['loss'] - avg_loss.backward() - optimizer.step() + + if scaler: + scaled_avg_loss = scaler.scale(avg_loss) + scaled_avg_loss.backward() + scaler.minimize(optimizer, scaled_avg_loss) + else: + avg_loss.backward() + optimizer.step() optimizer.clear_grad() - train_batch_cost += time.time() - batch_start - batch_sum += len(images) + train_run_cost += time.time() - train_start + total_samples += len(images) if not isinstance(lr_scheduler, float): lr_scheduler.step() @@ -268,12 +285,13 @@ def train(config, logs = train_stats.log() strs = 'epoch: [{}/{}], iter: {}, {}, reader_cost: {:.5f} s, batch_cost: {:.5f} s, samples: {}, ips: {:.5f}'.format( epoch, epoch_num, global_step, logs, train_reader_cost / - print_batch_step, train_batch_cost / print_batch_step, - batch_sum, batch_sum / train_batch_cost) + print_batch_step, (train_reader_cost + train_run_cost) / + print_batch_step, total_samples, + total_samples / (train_reader_cost + train_run_cost)) logger.info(strs) - train_batch_cost = 0.0 train_reader_cost = 0.0 - batch_sum = 0 + train_run_cost = 0.0 + total_samples = 0 # eval if global_step > start_eval_step and \ (global_step - start_eval_step) % eval_batch_step == 0 and dist.get_rank() == 0: @@ -326,7 +344,7 @@ def train(config, global_step) global_step += 1 optimizer.clear_grad() - batch_start = time.time() + reader_start = time.time() if dist.get_rank() == 0: save_model( model, @@ -367,7 +385,11 @@ def eval(model, with paddle.no_grad(): total_frame = 0.0 total_time = 0.0 - pbar = tqdm(total=len(valid_dataloader), desc='eval model:') + pbar = tqdm( + total=len(valid_dataloader), + desc='eval model:', + position=0, + leave=True) max_iter = len(valid_dataloader) - 1 if platform.system( ) == "Windows" else len(valid_dataloader) for idx, batch in enumerate(valid_dataloader): @@ -436,8 +458,6 @@ def get_center(model, eval_dataloader, post_process_class): batch = [item.numpy() for item in batch] # Obtain usable results from post-processing methods - total_time += time.time() - start - # Evaluate the results of the current batch post_result = post_process_class(preds, batch[1]) #update char_center @@ -480,11 +500,6 @@ def preprocess(is_train=False): 'CLS', 'PGNet', 'Distillation', 'NRTR', 'TableAttn', 'SAR', 'PSE', 'SEED' ] - windows_not_support_list = ['PSE'] - if platform.system() == "Windows" and alg in windows_not_support_list: - logger.warning('{} is not support in Windows now'.format( - windows_not_support_list)) - sys.exit() device = 'gpu:{}'.format(dist.ParallelEnv().dev_id) if use_gpu else 'cpu' device = paddle.set_device(device) diff --git a/tools/train.py b/tools/train.py index 05d295aa99718c25b94a123c23d08c2904fe8c6a..d182af2988cb29511be40a079d2b3e06605ebe28 100755 --- a/tools/train.py +++ b/tools/train.py @@ -102,10 +102,27 @@ def main(config, device, logger, vdl_writer): if valid_dataloader is not None: logger.info('valid dataloader has {} iters'.format( len(valid_dataloader))) + + use_amp = config["Global"].get("use_amp", False) + if use_amp: + AMP_RELATED_FLAGS_SETTING = { + 'FLAGS_cudnn_batchnorm_spatial_persistent': 1, + 'FLAGS_max_inplace_grad_add': 8, + } + paddle.fluid.set_flags(AMP_RELATED_FLAGS_SETTING) + scale_loss = config["Global"].get("scale_loss", 1.0) + use_dynamic_loss_scaling = config["Global"].get( + "use_dynamic_loss_scaling", False) + scaler = paddle.amp.GradScaler( + init_loss_scaling=scale_loss, + use_dynamic_loss_scaling=use_dynamic_loss_scaling) + else: + scaler = None + # start train program.train(config, train_dataloader, valid_dataloader, device, model, loss_class, optimizer, lr_scheduler, post_process_class, - eval_class, pre_best_model_dict, logger, vdl_writer) + eval_class, pre_best_model_dict, logger, vdl_writer, scaler) def test_reader(config, device, logger):