diff --git a/deploy/cpp_infer/src/ocr_rec.cpp b/deploy/cpp_infer/src/ocr_rec.cpp index 3438d4074e9a4ebb60a9ee6b0d9673c99c08df38..0f90ddfab4872f97829da081e64cb7437e72493a 100644 --- a/deploy/cpp_infer/src/ocr_rec.cpp +++ b/deploy/cpp_infer/src/ocr_rec.cpp @@ -83,7 +83,7 @@ void CRNNRecognizer::Run(std::vector img_list, int out_num = std::accumulate(predict_shape.begin(), predict_shape.end(), 1, std::multiplies()); predict_batch.resize(out_num); - + // predict_batch is the result of Last FC with softmax output_t->CopyToCpu(predict_batch.data()); auto inference_end = std::chrono::steady_clock::now(); inference_diff += inference_end - inference_start; @@ -98,9 +98,11 @@ void CRNNRecognizer::Run(std::vector img_list, float max_value = 0.0f; for (int n = 0; n < predict_shape[1]; n++) { + // get idx argmax_idx = int(Utility::argmax( &predict_batch[(m * predict_shape[1] + n) * predict_shape[2]], &predict_batch[(m * predict_shape[1] + n + 1) * predict_shape[2]])); + // get score max_value = float(*std::max_element( &predict_batch[(m * predict_shape[1] + n) * predict_shape[2]], &predict_batch[(m * predict_shape[1] + n + 1) * predict_shape[2]])); diff --git a/doc/doc_ch/FAQ.md b/doc/doc_ch/FAQ.md index 2dad829284806e199801ff3ac55d2e760c3e2583..a4437b8b783ca28fed60bb0a9648191e7ec4d0db 100644 --- a/doc/doc_ch/FAQ.md +++ b/doc/doc_ch/FAQ.md @@ -720,6 +720,13 @@ C++TensorRT预测需要使用支持TRT的预测库并在编译时打开[-DWITH_T 注:建议使用TensorRT大于等于6.1.0.5以上的版本。 +#### Q: 为什么识别模型做预测的时候,预测图片的数量数量还会影响预测的精度 +**A**: 推理时识别模型默认的batch_size=6, 如预测图片长度变化大,可能影响预测效果。如果出现上述问题可在推理的时候设置识别bs=1,命令如下: + +``` +python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/ch/word_4.jpg" --rec_model_dir="./ch_PP-OCRv3_rec_infer/" --rec_batch_num=1 +``` + ### 2.13 推理部署 diff --git a/doc/doc_ch/recognition.md b/doc/doc_ch/recognition.md index 8457df69ff4c09b196b0f0f91271a92344217d75..5134422417fbd78e5635fea5dbede4f3516aeee5 100644 --- a/doc/doc_ch/recognition.md +++ b/doc/doc_ch/recognition.md @@ -217,6 +217,30 @@ python3 tools/train.py -c configs/rec/PP-OCRv3/en_PP-OCRv3_rec.yml -o Global.pre python3 -m paddle.distributed.launch --gpus '0,1,2,3' tools/train.py -c configs/rec/PP-OCRv3/en_PP-OCRv3_rec.yml -o Global.pretrained_model=./pretrain_models/en_PP-OCRv3_rec_train/best_accuracy ``` +正常启动训练后,会看到以下log输出: + +``` +[2022/02/22 07:58:05] root INFO: epoch: [1/800], iter: 10, lr: 0.000000, loss: 0.754281, acc: 0.000000, norm_edit_dis: 0.000008, reader_cost: 0.55541 s, batch_cost: 0.91654 s, samples: 1408, ips: 153.62133 +[2022/02/22 07:58:13] root INFO: epoch: [1/800], iter: 20, lr: 0.000001, loss: 0.924677, acc: 0.000000, norm_edit_dis: 0.000008, reader_cost: 0.00236 s, batch_cost: 0.28528 s, samples: 1280, ips: 448.68599 +[2022/02/22 07:58:23] root INFO: epoch: [1/800], iter: 30, lr: 0.000002, loss: 0.967231, acc: 0.000000, norm_edit_dis: 0.000008, reader_cost: 0.14527 s, batch_cost: 0.42714 s, samples: 1280, ips: 299.66507 +[2022/02/22 07:58:31] root INFO: epoch: [1/800], iter: 40, lr: 0.000003, loss: 0.895318, acc: 0.000000, norm_edit_dis: 0.000008, reader_cost: 0.00173 s, batch_cost: 0.27719 s, samples: 1280, ips: 461.77252 +``` + +log 中自动打印如下信息: + +| 字段 | 含义 | +| :----: | :------: | +| epoch | 当前迭代轮次 | +| iter | 当前迭代次数 | +| lr | 当前学习率 | +| loss | 当前损失函数 | +| acc | 当前batch的准确率 | +| norm_edit_dis | 当前 batch 的编辑距离 | +| reader_cost | 当前 batch 数据处理耗时 | +| batch_cost | 当前 batch 总耗时 | +| samples | 当前 batch 内的样本数 | +| ips | 每秒处理图片的数量 | + PaddleOCR支持训练和评估交替进行, 可以在 `configs/rec/PP-OCRv3/en_PP-OCRv3_rec.yml` 中修改 `eval_batch_step` 设置评估频率,默认每500个iter评估一次。评估过程中默认将最佳acc模型,保存为 `output/en_PP-OCRv3_rec/best_accuracy` 。