::iterator it;
+ for (it = this->label_list_.begin(); it != this->label_list_.end();) {
+ if (*it == "") {
+ it = this->label_list_.erase(it);
+ } else {
+ ++it;
+ }
+ }
+ }
+ // add_special_char
this->label_list_.insert(this->label_list_.begin(), this->beg);
this->label_list_.push_back(this->end);
}
@@ -363,12 +376,12 @@ void TablePostProcessor::Run(
std::vector &rec_scores, std::vector &loc_preds_shape,
std::vector &structure_probs_shape,
std::vector> &rec_html_tag_batch,
- std::vector>>> &rec_boxes_batch,
+ std::vector>> &rec_boxes_batch,
std::vector &width_list, std::vector &height_list) {
for (int batch_idx = 0; batch_idx < structure_probs_shape[0]; batch_idx++) {
// image tags and boxs
std::vector rec_html_tags;
- std::vector>> rec_boxes;
+ std::vector> rec_boxes;
float score = 0.f;
int count = 0;
@@ -378,7 +391,7 @@ void TablePostProcessor::Run(
// step
for (int step_idx = 0; step_idx < structure_probs_shape[1]; step_idx++) {
std::string html_tag;
- std::vector> rec_box;
+ std::vector rec_box;
// html tag
int step_start_idx = (batch_idx * structure_probs_shape[1] + step_idx) *
structure_probs_shape[2];
@@ -399,17 +412,19 @@ void TablePostProcessor::Run(
count += 1;
score += char_score;
rec_html_tags.push_back(html_tag);
+
// box
if (html_tag == "" || html_tag == " | | ") {
- for (int point_idx = 0; point_idx < loc_preds_shape[2];
- point_idx += 2) {
- std::vector point(2, 0);
+ for (int point_idx = 0; point_idx < loc_preds_shape[2]; point_idx++) {
step_start_idx = (batch_idx * structure_probs_shape[1] + step_idx) *
loc_preds_shape[2] +
point_idx;
- point[0] = int(loc_preds[step_start_idx] * width_list[batch_idx]);
- point[1] =
- int(loc_preds[step_start_idx + 1] * height_list[batch_idx]);
+ float point = loc_preds[step_start_idx];
+ if (point_idx % 2 == 0) {
+ point = int(point * width_list[batch_idx]);
+ } else {
+ point = int(point * height_list[batch_idx]);
+ }
rec_box.push_back(point);
}
rec_boxes.push_back(rec_box);
diff --git a/deploy/cpp_infer/src/structure_table.cpp b/deploy/cpp_infer/src/structure_table.cpp
index bbc32580e49d6ed7b29e3f0931eab0b0969b02b9..7df0ab94b5df8a62148ceb01f48b35d73b14f78c 100644
--- a/deploy/cpp_infer/src/structure_table.cpp
+++ b/deploy/cpp_infer/src/structure_table.cpp
@@ -20,7 +20,7 @@ void StructureTableRecognizer::Run(
std::vector img_list,
std::vector> &structure_html_tags,
std::vector &structure_scores,
- std::vector>>> &structure_boxes,
+ std::vector>> &structure_boxes,
std::vector ×) {
std::chrono::duration preprocess_diff =
std::chrono::steady_clock::now() - std::chrono::steady_clock::now();
@@ -89,8 +89,7 @@ void StructureTableRecognizer::Run(
auto postprocess_start = std::chrono::steady_clock::now();
std::vector> structure_html_tag_batch;
std::vector structure_score_batch;
- std::vector>>>
- structure_boxes_batch;
+ std::vector>> structure_boxes_batch;
this->post_processor_.Run(loc_preds, structure_probs, structure_score_batch,
predict_shape0, predict_shape1,
structure_html_tag_batch, structure_boxes_batch,
diff --git a/deploy/cpp_infer/src/utility.cpp b/deploy/cpp_infer/src/utility.cpp
index 4bfc1d091d6124b10c79032beb702ba8727210fc..0e6ba17fc3bab5b5e005f8b5e41640899bee39d0 100644
--- a/deploy/cpp_infer/src/utility.cpp
+++ b/deploy/cpp_infer/src/utility.cpp
@@ -65,6 +65,37 @@ void Utility::VisualizeBboxes(const cv::Mat &srcimg,
<< std::endl;
}
+void Utility::VisualizeBboxes(const cv::Mat &srcimg,
+ const StructurePredictResult &structure_result,
+ const std::string &save_path) {
+ cv::Mat img_vis;
+ srcimg.copyTo(img_vis);
+ for (int n = 0; n < structure_result.cell_box.size(); n++) {
+ if (structure_result.cell_box[n].size() == 8) {
+ cv::Point rook_points[4];
+ for (int m = 0; m < structure_result.cell_box[n].size(); m += 2) {
+ rook_points[m / 2] =
+ cv::Point(int(structure_result.cell_box[n][m]),
+ int(structure_result.cell_box[n][m + 1]));
+ }
+ const cv::Point *ppt[1] = {rook_points};
+ int npt[] = {4};
+ cv::polylines(img_vis, ppt, npt, 1, 1, CV_RGB(0, 255, 0), 2, 8, 0);
+ } else if (structure_result.cell_box[n].size() == 4) {
+ cv::Point rook_points[2];
+ rook_points[0] = cv::Point(int(structure_result.cell_box[n][0]),
+ int(structure_result.cell_box[n][1]));
+ rook_points[1] = cv::Point(int(structure_result.cell_box[n][2]),
+ int(structure_result.cell_box[n][3]));
+ cv::rectangle(img_vis, rook_points[0], rook_points[1], CV_RGB(0, 255, 0),
+ 2, 8, 0);
+ }
+ }
+
+ cv::imwrite(save_path, img_vis);
+ std::cout << "The table visualized image saved in " + save_path << std::endl;
+}
+
// list all files under a directory
void Utility::GetAllFiles(const char *dir_name,
std::vector &all_inputs) {
@@ -268,13 +299,46 @@ cv::Mat Utility::crop_image(cv::Mat &img, std::vector &area) {
void Utility::sorted_boxes(std::vector &ocr_result) {
std::sort(ocr_result.begin(), ocr_result.end(), Utility::comparison_box);
-
- for (int i = 0; i < ocr_result.size() - 1; i++) {
- if (abs(ocr_result[i + 1].box[0][1] - ocr_result[i].box[0][1]) < 10 &&
- (ocr_result[i + 1].box[0][0] < ocr_result[i].box[0][0])) {
- std::swap(ocr_result[i], ocr_result[i + 1]);
+ if (ocr_result.size() > 0) {
+ for (int i = 0; i < ocr_result.size() - 1; i++) {
+ for (int j = i; j > 0; j--) {
+ if (abs(ocr_result[j + 1].box[0][1] - ocr_result[j].box[0][1]) < 10 &&
+ (ocr_result[j + 1].box[0][0] < ocr_result[j].box[0][0])) {
+ std::swap(ocr_result[i], ocr_result[i + 1]);
+ }
+ }
}
}
}
+std::vector Utility::xyxyxyxy2xyxy(std::vector> &box) {
+ int x_collect[4] = {box[0][0], box[1][0], box[2][0], box[3][0]};
+ int y_collect[4] = {box[0][1], box[1][1], box[2][1], box[3][1]};
+ int left = int(*std::min_element(x_collect, x_collect + 4));
+ int right = int(*std::max_element(x_collect, x_collect + 4));
+ int top = int(*std::min_element(y_collect, y_collect + 4));
+ int bottom = int(*std::max_element(y_collect, y_collect + 4));
+ std::vector box1(4, 0);
+ box1[0] = left;
+ box1[1] = top;
+ box1[2] = right;
+ box1[3] = bottom;
+ return box1;
+}
+
+std::vector Utility::xyxyxyxy2xyxy(std::vector &box) {
+ int x_collect[4] = {box[0], box[2], box[4], box[6]};
+ int y_collect[4] = {box[1], box[3], box[5], box[7]};
+ int left = int(*std::min_element(x_collect, x_collect + 4));
+ int right = int(*std::max_element(x_collect, x_collect + 4));
+ int top = int(*std::min_element(y_collect, y_collect + 4));
+ int bottom = int(*std::max_element(y_collect, y_collect + 4));
+ std::vector box1(4, 0);
+ box1[0] = left;
+ box1[1] = top;
+ box1[2] = right;
+ box1[3] = bottom;
+ return box1;
+}
+
} // namespace PaddleOCR
\ No newline at end of file
diff --git a/deploy/lite/config.txt b/deploy/lite/config.txt
index dda0d2b0320544d3a82f59b0672c086c64d83d3d..404249323b6cb5de345438056a9a10abd64b38bc 100644
--- a/deploy/lite/config.txt
+++ b/deploy/lite/config.txt
@@ -5,4 +5,4 @@ det_db_unclip_ratio 1.6
det_db_use_dilate 0
det_use_polygon_score 1
use_direction_classify 1
-rec_image_height 32
\ No newline at end of file
+rec_image_height 48
\ No newline at end of file
diff --git a/deploy/lite/readme.md b/deploy/lite/readme.md
index a1bef8120e52dd91db0fda4ac2a4d91cc2800818..fc91cbfa7d69f6a8c1086243e4df3f820bd78339 100644
--- a/deploy/lite/readme.md
+++ b/deploy/lite/readme.md
@@ -99,6 +99,8 @@ The following table also provides a series of models that can be deployed on mob
|Version|Introduction|Model size|Detection model|Text Direction model|Recognition model|Paddle-Lite branch|
|---|---|---|---|---|---|---|
+|PP-OCRv3|extra-lightweight chinese OCR optimized model|16.2M|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.nb)|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_ppocr_mobile_v2.0_cls_infer_opt.nb)|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.nb)|v2.10|
+|PP-OCRv3(slim)|extra-lightweight chinese OCR optimized model|5.9M|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_slim_infer.nb)|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_ppocr_mobile_v2.0_cls_slim_opt.nb)|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_slim_infer.nb)|v2.10|
|PP-OCRv2|extra-lightweight chinese OCR optimized model|11M|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_PP-OCRv2_det_infer_opt.nb)|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_ppocr_mobile_v2.0_cls_infer_opt.nb)|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_PP-OCRv2_rec_infer_opt.nb)|v2.10|
|PP-OCRv2(slim)|extra-lightweight chinese OCR optimized model|4.6M|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_PP-OCRv2_det_slim_opt.nb)|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_ppocr_mobile_v2.0_cls_slim_opt.nb)|[download link](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_PP-OCRv2_rec_slim_opt.nb)|v2.10|
@@ -134,17 +136,16 @@ Introduction to paddle_lite_opt parameters:
The following takes the ultra-lightweight Chinese model of PaddleOCR as an example to introduce the use of the compiled opt file to complete the conversion of the inference model to the Paddle-Lite optimized model
```
-# 【[Recommendation] Download the Chinese and English inference model of PP-OCRv2
-wget https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_slim_quant_infer.tar && tar xf ch_PP-OCRv2_det_slim_quant_infer.tar
-wget https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_slim_quant_infer.tar && tar xf ch_PP-OCRv2_rec_slim_quant_infer.tar
+# 【[Recommendation] Download the Chinese and English inference model of PP-OCRv3
+wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_slim_infer.tar && tar xf ch_PP-OCRv3_det_slim_infer.tar
+wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_slim_infer.tar && tar xf ch_PP-OCRv2_rec_slim_quant_infer.tar
wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/slim/ch_ppocr_mobile_v2.0_cls_slim_infer.tar && tar xf ch_ppocr_mobile_v2.0_cls_slim_infer.tar
# Convert detection model
-./opt --model_file=./ch_PP-OCRv2_det_slim_quant_infer/inference.pdmodel --param_file=./ch_PP-OCRv2_det_slim_quant_infer/inference.pdiparams --optimize_out=./ch_PP-OCRv2_det_slim_opt --valid_targets=arm --optimize_out_type=naive_buffer
+paddle_lite_opt --model_file=./ch_PP-OCRv3_det_slim_infer/inference.pdmodel --param_file=./ch_PP-OCRv3_det_slim_infer/inference.pdiparams --optimize_out=./ch_PP-OCRv3_det_slim_opt --valid_targets=arm --optimize_out_type=naive_buffer
# Convert recognition model
-./opt --model_file=./ch_PP-OCRv2_rec_slim_quant_infer/inference.pdmodel --param_file=./ch_PP-OCRv2_rec_slim_quant_infer/inference.pdiparams --optimize_out=./ch_PP-OCRv2_rec_slim_opt --valid_targets=arm --optimize_out_type=naive_buffer
+paddle_lite_opt --model_file=./ch_PP-OCRv3_rec_slim_infer/inference.pdmodel --param_file=./ch_PP-OCRv3_rec_slim_infer/inference.pdiparams --optimize_out=./ch_PP-OCRv3_rec_slim_opt --valid_targets=arm --optimize_out_type=naive_buffer
# Convert angle classifier model
-./opt --model_file=./ch_ppocr_mobile_v2.0_cls_slim_infer/inference.pdmodel --param_file=./ch_ppocr_mobile_v2.0_cls_slim_infer/inference.pdiparams --optimize_out=./ch_ppocr_mobile_v2.0_cls_slim_opt --valid_targets=arm --optimize_out_type=naive_buffer
-
+paddle_lite_opt --model_file=./ch_ppocr_mobile_v2.0_cls_slim_infer/inference.pdmodel --param_file=./ch_ppocr_mobile_v2.0_cls_slim_infer/inference.pdiparams --optimize_out=./ch_ppocr_mobile_v2.0_cls_slim_opt --valid_targets=arm --optimize_out_type=naive_buffer
```
After the conversion is successful, there will be more files ending with `.nb` in the inference model directory, which is the successfully converted model file.
@@ -197,15 +198,15 @@ Some preparatory work is required first.
cp ../../../cxx/lib/libpaddle_light_api_shared.so ./debug/
```
-Prepare the test image, taking PaddleOCR/doc/imgs/11.jpg as an example, copy the image file to the demo/cxx/ocr/debug/ folder. Prepare the model files optimized by the lite opt tool, ch_det_mv3_db_opt.nb, ch_rec_mv3_crnn_opt.nb, and place them under the demo/cxx/ocr/debug/ folder.
+Prepare the test image, taking PaddleOCR/doc/imgs/11.jpg as an example, copy the image file to the demo/cxx/ocr/debug/ folder. Prepare the model files optimized by the lite opt tool, ch_PP-OCRv3_det_slim_opt.nb , ch_PP-OCRv3_rec_slim_opt.nb , and place them under the demo/cxx/ocr/debug/ folder.
The structure of the OCR demo is as follows after the above command is executed:
```
demo/cxx/ocr/
|-- debug/
-| |--ch_PP-OCRv2_det_slim_opt.nb Detection model
-| |--ch_PP-OCRv2_rec_slim_opt.nb Recognition model
+| |--ch_PP-OCRv3_det_slim_opt.nb Detection model
+| |--ch_PP-OCRv3_rec_slim_opt.nb Recognition model
| |--ch_ppocr_mobile_v2.0_cls_slim_opt.nb Text direction classification model
| |--11.jpg Image for OCR
| |--ppocr_keys_v1.txt Dictionary file
@@ -240,7 +241,7 @@ det_db_thresh 0.3 # Used to filter the binarized image of DB prediction,
det_db_box_thresh 0.5 # DDB post-processing filter box threshold, if there is a missing box detected, it can be reduced as appropriate
det_db_unclip_ratio 1.6 # Indicates the compactness of the text box, the smaller the value, the closer the text box to the text
use_direction_classify 0 # Whether to use the direction classifier, 0 means not to use, 1 means to use
-rec_image_height 32 # The height of the input image of the recognition model, the PP-OCRv3 model needs to be set to 48, and the PP-OCRv2 model needs to be set to 32
+rec_image_height 48 # The height of the input image of the recognition model, the PP-OCRv3 model needs to be set to 48, and the PP-OCRv2 model needs to be set to 32
```
5. Run Model on phone
@@ -260,14 +261,14 @@ After the above steps are completed, you can use adb to push the file to the pho
export LD_LIBRARY_PATH=${PWD}:$LD_LIBRARY_PATH
# The use of ocr_db_crnn is:
# ./ocr_db_crnn Mode Detection model file Orientation classifier model file Recognition model file Hardware Precision Threads Batchsize Test image path Dictionary file path
- ./ocr_db_crnn system ch_PP-OCRv2_det_slim_opt.nb ch_PP-OCRv2_rec_slim_opt.nb ch_ppocr_mobile_v2.0_cls_slim_opt.nb arm8 INT8 10 1 ./11.jpg config.txt ppocr_keys_v1.txt True
+ ./ocr_db_crnn system ch_PP-OCRv3_det_slim_opt.nb ch_PP-OCRv3_rec_slim_opt.nb ch_ppocr_mobile_v2.0_cls_slim_opt.nb arm8 INT8 10 1 ./11.jpg config.txt ppocr_keys_v1.txt True
# precision can be INT8 for quantitative model or FP32 for normal model.
# Only using detection model
-./ocr_db_crnn det ch_PP-OCRv2_det_slim_opt.nb arm8 INT8 10 1 ./11.jpg config.txt
+./ocr_db_crnn det ch_PP-OCRv3_det_slim_opt.nb arm8 INT8 10 1 ./11.jpg config.txt
# Only using recognition model
-./ocr_db_crnn rec ch_PP-OCRv2_rec_slim_opt.nb arm8 INT8 10 1 word_1.jpg ppocr_keys_v1.txt config.txt
+./ocr_db_crnn rec ch_PP-OCRv3_rec_slim_opt.nb arm8 INT8 10 1 word_1.jpg ppocr_keys_v1.txt config.txt
```
If you modify the code, you need to recompile and push to the phone.
diff --git a/deploy/lite/readme_ch.md b/deploy/lite/readme_ch.md
index 0793827fe647c470944fc36e2b243c8f7e704e99..78e2510917e0fd85c4a724ec74eccb0b7cfc6118 100644
--- a/deploy/lite/readme_ch.md
+++ b/deploy/lite/readme_ch.md
@@ -97,6 +97,8 @@ Paddle-Lite 提供了多种策略来自动优化原始的模型,其中包括
|模型版本|模型简介|模型大小|检测模型|文本方向分类模型|识别模型|Paddle-Lite版本|
|---|---|---|---|---|---|---|
+|PP-OCRv3|蒸馏版超轻量中文OCR移动端模型|16.2M|[下载地址](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.nb)|[下载地址](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_ppocr_mobile_v2.0_cls_infer_opt.nb)|[下载地址](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.nb)|v2.10|
+|PP-OCRv3(slim)|蒸馏版超轻量中文OCR移动端模型|5.9M|[下载地址](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_slim_infer.nb)|[下载地址](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_ppocr_mobile_v2.0_cls_slim_opt.nb)|[下载地址](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_slim_infer.nb)|v2.10|
|PP-OCRv2|蒸馏版超轻量中文OCR移动端模型|11M|[下载地址](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_PP-OCRv2_det_infer_opt.nb)|[下载地址](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_ppocr_mobile_v2.0_cls_infer_opt.nb)|[下载地址](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_PP-OCRv2_rec_infer_opt.nb)|v2.10|
|PP-OCRv2(slim)|蒸馏版超轻量中文OCR移动端模型|4.6M|[下载地址](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_PP-OCRv2_det_slim_opt.nb)|[下载地址](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_ppocr_mobile_v2.0_cls_slim_opt.nb)|[下载地址](https://paddleocr.bj.bcebos.com/PP-OCRv2/lite/ch_PP-OCRv2_rec_slim_opt.nb)|v2.10|
@@ -131,16 +133,16 @@ paddle_lite_opt 参数介绍:
下面以PaddleOCR的超轻量中文模型为例,介绍使用编译好的opt文件完成inference模型到Paddle-Lite优化模型的转换。
```
-# 【推荐】 下载 PP-OCRv2版本的中英文 inference模型
-wget https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_slim_quant_infer.tar && tar xf ch_PP-OCRv2_det_slim_quant_infer.tar
-wget https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_slim_quant_infer.tar && tar xf ch_PP-OCRv2_rec_slim_quant_infer.tar
+# 【推荐】 下载 PP-OCRv3版本的中英文 inference模型
+wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_slim_infer.tar && tar xf ch_PP-OCRv3_det_slim_infer.tar
+wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_slim_infer.tar && tar xf ch_PP-OCRv2_rec_slim_quant_infer.tar
wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/slim/ch_ppocr_mobile_v2.0_cls_slim_infer.tar && tar xf ch_ppocr_mobile_v2.0_cls_slim_infer.tar
# 转换检测模型
-./opt --model_file=./ch_PP-OCRv2_det_slim_quant_infer/inference.pdmodel --param_file=./ch_PP-OCRv2_det_slim_quant_infer/inference.pdiparams --optimize_out=./ch_PP-OCRv2_det_slim_opt --valid_targets=arm --optimize_out_type=naive_buffer
+paddle_lite_opt --model_file=./ch_PP-OCRv3_det_slim_infer/inference.pdmodel --param_file=./ch_PP-OCRv3_det_slim_infer/inference.pdiparams --optimize_out=./ch_PP-OCRv3_det_slim_opt --valid_targets=arm --optimize_out_type=naive_buffer
# 转换识别模型
-./opt --model_file=./ch_PP-OCRv2_rec_slim_quant_infer/inference.pdmodel --param_file=./ch_PP-OCRv2_rec_slim_quant_infer/inference.pdiparams --optimize_out=./ch_PP-OCRv2_rec_slim_opt --valid_targets=arm --optimize_out_type=naive_buffer
+paddle_lite_opt --model_file=./ch_PP-OCRv3_rec_slim_infer/inference.pdmodel --param_file=./ch_PP-OCRv3_rec_slim_infer/inference.pdiparams --optimize_out=./ch_PP-OCRv3_rec_slim_opt --valid_targets=arm --optimize_out_type=naive_buffer
# 转换方向分类器模型
-./opt --model_file=./ch_ppocr_mobile_v2.0_cls_slim_infer/inference.pdmodel --param_file=./ch_ppocr_mobile_v2.0_cls_slim_infer/inference.pdiparams --optimize_out=./ch_ppocr_mobile_v2.0_cls_slim_opt --valid_targets=arm --optimize_out_type=naive_buffer
+paddle_lite_opt --model_file=./ch_ppocr_mobile_v2.0_cls_slim_infer/inference.pdmodel --param_file=./ch_ppocr_mobile_v2.0_cls_slim_infer/inference.pdiparams --optimize_out=./ch_ppocr_mobile_v2.0_cls_slim_opt --valid_targets=arm --optimize_out_type=naive_buffer
```
@@ -194,15 +196,15 @@ wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/slim/ch_ppocr_mobile_v2.0_cls
```
准备测试图像,以`PaddleOCR/doc/imgs/11.jpg`为例,将测试的图像复制到`demo/cxx/ocr/debug/`文件夹下。
- 准备lite opt工具优化后的模型文件,比如使用`ch_PP-OCRv2_det_slim_opt.ch_PP-OCRv2_rec_slim_rec.nb, ch_ppocr_mobile_v2.0_cls_slim_opt.nb`,模型文件放置在`demo/cxx/ocr/debug/`文件夹下。
+ 准备lite opt工具优化后的模型文件,比如使用`ch_PP-OCRv3_det_slim_opt.ch_PP-OCRv3_rec_slim_rec.nb, ch_ppocr_mobile_v2.0_cls_slim_opt.nb`,模型文件放置在`demo/cxx/ocr/debug/`文件夹下。
执行完成后,ocr文件夹下将有如下文件格式:
```
demo/cxx/ocr/
|-- debug/
-| |--ch_PP-OCRv2_det_slim_opt.nb 优化后的检测模型文件
-| |--ch_PP-OCRv2_rec_slim_opt.nb 优化后的识别模型文件
+| |--ch_PP-OCRv3_det_slim_opt.nb 优化后的检测模型文件
+| |--ch_PP-OCRv3_rec_slim_opt.nb 优化后的识别模型文件
| |--ch_ppocr_mobile_v2.0_cls_slim_opt.nb 优化后的文字方向分类器模型文件
| |--11.jpg 待测试图像
| |--ppocr_keys_v1.txt 中文字典文件
@@ -239,7 +241,7 @@ det_db_thresh 0.3 # 用于过滤DB预测的二值化图像,设置为0.
det_db_box_thresh 0.5 # 检测器后处理过滤box的阈值,如果检测存在漏框情况,可酌情减小
det_db_unclip_ratio 1.6 # 表示文本框的紧致程度,越小则文本框更靠近文本
use_direction_classify 0 # 是否使用方向分类器,0表示不使用,1表示使用
-rec_image_height 32 # 识别模型输入图像的高度,PP-OCRv3模型设置为48,PP-OCRv2模型需要设置为32
+rec_image_height 48 # 识别模型输入图像的高度,PP-OCRv3模型设置为48,PP-OCRv2模型需要设置为32
```
5. 启动调试
@@ -259,13 +261,13 @@ rec_image_height 32 # 识别模型输入图像的高度,PP-OCRv3模型
export LD_LIBRARY_PATH=${PWD}:$LD_LIBRARY_PATH
# 开始使用,ocr_db_crnn可执行文件的使用方式为:
# ./ocr_db_crnn 预测模式 检测模型文件 方向分类器模型文件 识别模型文件 运行硬件 运行精度 线程数 batchsize 测试图像路径 参数配置路径 字典文件路径 是否使用benchmark参数
- ./ocr_db_crnn system ch_PP-OCRv2_det_slim_opt.nb ch_PP-OCRv2_rec_slim_opt.nb ch_ppocr_mobile_v2.0_cls_slim_opt.nb arm8 INT8 10 1 ./11.jpg config.txt ppocr_keys_v1.txt True
+ ./ocr_db_crnn system ch_PP-OCRv3_det_slim_opt.nb ch_PP-OCRv3_rec_slim_opt.nb ch_ppocr_mobile_v2.0_cls_slim_opt.nb arm8 INT8 10 1 ./11.jpg config.txt ppocr_keys_v1.txt True
# 仅使用文本检测模型,使用方式如下:
-./ocr_db_crnn det ch_PP-OCRv2_det_slim_opt.nb arm8 INT8 10 1 ./11.jpg config.txt
+./ocr_db_crnn det ch_PP-OCRv3_det_slim_opt.nb arm8 INT8 10 1 ./11.jpg config.txt
# 仅使用文本识别模型,使用方式如下:
-./ocr_db_crnn rec ch_PP-OCRv2_rec_slim_opt.nb arm8 INT8 10 1 word_1.jpg ppocr_keys_v1.txt config.txt
+./ocr_db_crnn rec ch_PP-OCRv3_rec_slim_opt.nb arm8 INT8 10 1 word_1.jpg ppocr_keys_v1.txt config.txt
```
如果对代码做了修改,则需要重新编译并push到手机上。
diff --git a/deploy/slim/quantization/README.md b/deploy/slim/quantization/README.md
index 4c1d784b99aade614d78b4bd6fb20afef15f0f6f..8b29693c9803f004f123b5497c9224ae5c31041d 100644
--- a/deploy/slim/quantization/README.md
+++ b/deploy/slim/quantization/README.md
@@ -22,7 +22,7 @@
### 1. 安装PaddleSlim
```bash
-pip3 install paddleslim==2.2.2
+pip3 install paddleslim==2.3.2
```
### 2. 准备训练好的模型
@@ -32,18 +32,7 @@ PaddleOCR提供了一系列训练好的[模型](../../../doc/doc_ch/models_list.
### 3. 量化训练
量化训练包括离线量化训练和在线量化训练,在线量化训练效果更好,需加载预训练模型,在定义好量化策略后即可对模型进行量化。
-
-量化训练的代码位于slim/quantization/quant.py 中,比如训练检测模型,训练指令如下:
-```bash
-python deploy/slim/quantization/quant.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o Global.pretrained_model='your trained model' Global.save_model_dir=./output/quant_model
-
-# 比如下载提供的训练模型
-wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_train.tar
-tar -xf ch_ppocr_mobile_v2.0_det_train.tar
-python deploy/slim/quantization/quant.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o Global.pretrained_model=./ch_ppocr_mobile_v2.0_det_train/best_accuracy Global.save_model_dir=./output/quant_model
-```
-
-模型蒸馏和模型量化可以同时使用,以PPOCRv3检测模型为例:
+量化训练的代码位于slim/quantization/quant.py 中,比如训练检测模型,以PPOCRv3检测模型为例,训练指令如下:
```
# 下载检测预训练模型:
wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_distill_train.tar
@@ -58,7 +47,7 @@ python deploy/slim/quantization/quant.py -c configs/det/ch_PP-OCRv3/ch_PP-OCRv3_
在得到量化训练保存的模型后,我们可以将其导出为inference_model,用于预测部署:
```bash
-python deploy/slim/quantization/export_model.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o Global.checkpoints=output/quant_model/best_accuracy Global.save_inference_dir=./output/quant_inference_model
+python deploy/slim/quantization/export_model.py -c configs/det/ch_PP-OCRv3/ch_PP-OCRv3_det_cml.yml -o Global.checkpoints=output/quant_model/best_accuracy Global.save_inference_dir=./output/quant_inference_model
```
### 5. 量化模型部署
diff --git a/deploy/slim/quantization/README_en.md b/deploy/slim/quantization/README_en.md
index c6796ae9dc256496308e432023c45ef1026c3d92..f82c3d844e292ee76b95624f7632ed40301e5a4c 100644
--- a/deploy/slim/quantization/README_en.md
+++ b/deploy/slim/quantization/README_en.md
@@ -25,7 +25,7 @@ After training, if you want to further compress the model size and accelerate th
### 1. Install PaddleSlim
```bash
-pip3 install paddleslim==2.2.2
+pip3 install paddleslim==2.3.2
```
@@ -39,18 +39,7 @@ Quantization training includes offline quantization training and online quantiza
Online quantization training is more effective. It is necessary to load the pre-trained model.
After the quantization strategy is defined, the model can be quantified.
-The code for quantization training is located in `slim/quantization/quant.py`. For example, to train a detection model, the training instructions are as follows:
-```bash
-python deploy/slim/quantization/quant.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o Global.pretrained_model='your trained model' Global.save_model_dir=./output/quant_model
-
-# download provided model
-wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_train.tar
-tar -xf ch_ppocr_mobile_v2.0_det_train.tar
-python deploy/slim/quantization/quant.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o Global.pretrained_model=./ch_ppocr_mobile_v2.0_det_train/best_accuracy Global.save_model_dir=./output/quant_model
-```
-
-
-Model distillation and model quantization can be used at the same time, taking the PPOCRv3 detection model as an example:
+The code for quantization training is located in `slim/quantization/quant.py`. For example, the training instructions of slim PPOCRv3 detection model are as follows:
```
# download provided model
wget https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_distill_train.tar
@@ -66,7 +55,7 @@ If you want to quantify the text recognition model, you can modify the configura
Once we got the model after pruning and fine-tuning, we can export it as an inference model for the deployment of predictive tasks:
```bash
-python deploy/slim/quantization/export_model.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o Global.checkpoints=output/quant_model/best_accuracy Global.save_inference_dir=./output/quant_inference_model
+python deploy/slim/quantization/export_model.py -c configs/det/ch_PP-OCRv3/ch_PP-OCRv3_det_cml.yml -o Global.checkpoints=output/quant_model/best_accuracy Global.save_inference_dir=./output/quant_inference_model
```
### 5. Deploy
diff --git a/deploy/slim/quantization/export_model.py b/deploy/slim/quantization/export_model.py
index fd1c3e5e109667fa74f5ade18b78f634e4d325db..bd132b625181cab853961efd2e2c38c411e9edf4 100755
--- a/deploy/slim/quantization/export_model.py
+++ b/deploy/slim/quantization/export_model.py
@@ -151,17 +151,24 @@ def main():
arch_config = config["Architecture"]
- arch_config = config["Architecture"]
+ if arch_config["algorithm"] == "SVTR" and arch_config["Head"][
+ "name"] != 'MultiHead':
+ input_shape = config["Eval"]["dataset"]["transforms"][-2][
+ 'SVTRRecResizeImg']['image_shape']
+ else:
+ input_shape = None
if arch_config["algorithm"] in ["Distillation", ]: # distillation model
archs = list(arch_config["Models"].values())
for idx, name in enumerate(model.model_name_list):
sub_model_save_path = os.path.join(save_path, name, "inference")
export_single_model(model.model_list[idx], archs[idx],
- sub_model_save_path, logger, quanter)
+ sub_model_save_path, logger, input_shape,
+ quanter)
else:
save_path = os.path.join(save_path, "inference")
- export_single_model(model, arch_config, save_path, logger, quanter)
+ export_single_model(model, arch_config, save_path, logger, input_shape,
+ quanter)
if __name__ == "__main__":
diff --git a/doc/doc_en/algorithm_sdmgr_en.md b/doc/doc_en/algorithm_sdmgr_en.md
index 7b3752223dd765e780d56d146c90bd0f892aac7b..4a10ec7dea5e942c5991991eef598d970c189d1b 100644
--- a/doc/doc_en/algorithm_sdmgr_en.md
+++ b/doc/doc_en/algorithm_sdmgr_en.md
@@ -31,7 +31,7 @@ The prediction result is saved as `./output/sdmgr_kie/predicts_kie.txt`, and the
The visualization results are shown in the figure below:
-
+
## 2. Model Training
diff --git a/doc/doc_en/config_en.md b/doc/doc_en/config_en.md
index d467a7f918ed57eb80754483715f3671fd2552c7..ea5c1a472e9cc45bdededc97475a7c423dff1047 100644
--- a/doc/doc_en/config_en.md
+++ b/doc/doc_en/config_en.md
@@ -28,7 +28,7 @@ Take rec_chinese_lite_train_v2.0.yml as an example
| epoch_num | Maximum training epoch number | 500 | \ |
| log_smooth_window | Log queue length, the median value in the queue each time will be printed | 20 | \ |
| print_batch_step | Set print log interval | 10 | \ |
-| save_model_dir | Set model save path | output/{算法名称} | \ |
+| save_model_dir | Set model save path | output/{algorithm_name} | \ |
| save_epoch_step | Set model save interval | 3 | \ |
| eval_batch_step | Set the model evaluation interval | 2000 or [1000, 2000] | running evaluation every 2000 iters or evaluation is run every 2000 iterations after the 1000th iteration |
| cal_metric_during_train | Set whether to evaluate the metric during the training process. At this time, the metric of the model under the current batch is evaluated | true | \ |
@@ -245,4 +245,4 @@ For more supported languages, please refer to : [Multi-language model](https://g
The multi-language model training method is the same as the Chinese model. The training data set is 100w synthetic data. A small amount of fonts and test data can be downloaded using the following two methods.
* [Baidu Netdisk](https://pan.baidu.com/s/1bS_u207Rm7YbY33wOECKDA),Extraction code:frgi.
-* [Google drive](https://drive.google.com/file/d/18cSWX7wXSy4G0tbKJ0d9PuIaiwRLHpjA/view)
\ No newline at end of file
+* [Google drive](https://drive.google.com/file/d/18cSWX7wXSy4G0tbKJ0d9PuIaiwRLHpjA/view)
diff --git a/ppocr/losses/rec_aster_loss.py b/ppocr/losses/rec_aster_loss.py
index 52605e46db35339cc22f7f1e6642456bfaf02f11..9927fbc043f2af146e51cbb9a549f1dffc980341 100644
--- a/ppocr/losses/rec_aster_loss.py
+++ b/ppocr/losses/rec_aster_loss.py
@@ -27,10 +27,9 @@ class CosineEmbeddingLoss(nn.Layer):
self.epsilon = 1e-12
def forward(self, x1, x2, target):
- similarity = paddle.sum(
- x1 * x2, dim=-1) / (paddle.norm(
- x1, axis=-1) * paddle.norm(
- x2, axis=-1) + self.epsilon)
+ similarity = paddle.sum(x1 * x2, axis=-1) / (paddle.norm(
+ x1, axis=-1) * paddle.norm(
+ x2, axis=-1) + self.epsilon)
one_list = paddle.full_like(target, fill_value=1)
out = paddle.mean(
paddle.where(
diff --git a/ppstructure/utility.py b/ppstructure/utility.py
index bdea0af69e37e15d1f191b2a86c036ae1c2b1e45..97b6d6fec0d70fe3014b0b2105dbbef6a292e4d7 100644
--- a/ppstructure/utility.py
+++ b/ppstructure/utility.py
@@ -32,7 +32,7 @@ def init_args():
parser.add_argument(
"--table_char_dict_path",
type=str,
- default="../ppocr/utils/dict/table_structure_dict.txt")
+ default="../ppocr/utils/dict/table_structure_dict_ch.txt")
# params for layout
parser.add_argument("--layout_model_dir", type=str)
parser.add_argument(
diff --git a/tools/infer/utility.py b/tools/infer/utility.py
index 9baf66d7f469a3bf6c9a140e034aee3a635a5c8e..e6adad3dd8c2d57775ab5f7fa489dca98d22eb3d 100644
--- a/tools/infer/utility.py
+++ b/tools/infer/utility.py
@@ -38,7 +38,6 @@ def init_args():
parser.add_argument("--ir_optim", type=str2bool, default=True)
parser.add_argument("--use_tensorrt", type=str2bool, default=False)
parser.add_argument("--min_subgraph_size", type=int, default=15)
- parser.add_argument("--shape_info_filename", type=str, default=None)
parser.add_argument("--precision", type=str, default="fp32")
parser.add_argument("--gpu_mem", type=int, default=500)
@@ -226,23 +225,22 @@ def create_predictor(args, mode, logger):
use_calib_mode=False)
# collect shape
- if args.shape_info_filename is not None:
- if not os.path.exists(args.shape_info_filename):
- config.collect_shape_range_info(
- args.shape_info_filename)
- logger.info(
- f"collect dynamic shape info into : {args.shape_info_filename}"
- )
- else:
- logger.info(
- f"dynamic shape info file( {args.shape_info_filename} ) already exists, not need to generate again."
- )
- config.enable_tuned_tensorrt_dynamic_shape(
- args.shape_info_filename, True)
+ trt_shape_f = os.path.join(model_dir, f"{mode}_trt_dynamic_shape.txt")
+
+ if not os.path.exists(trt_shape_f):
+ config.collect_shape_range_info(trt_shape_f)
+ logger.info(
+ f"collect dynamic shape info into : {trt_shape_f}")
else:
logger.info(
- f"when using tensorrt, dynamic shape is a suggested option, you can use '--shape_info_filename=shape.txt' for offline dygnamic shape tuning"
+ f"dynamic shape info file( {trt_shape_f} ) already exists, not need to generate again."
)
+ try:
+ config.enable_tuned_tensorrt_dynamic_shape(trt_shape_f,
+ True)
+ except Exception as E:
+ logger.info(E)
+ logger.info("Please keep your paddlepaddle-gpu >= 2.3.0!")
elif args.use_xpu:
config.enable_xpu(10 * 1024 * 1024)
|