diff --git a/PPOCRLabel/README.md b/PPOCRLabel/README.md index 21db1867aa6b6504595096de56b17f01dbf3e4f6..0a1a7536912b6591517050bfa62260aaef6077cd 100644 --- a/PPOCRLabel/README.md +++ b/PPOCRLabel/README.md @@ -86,15 +86,8 @@ PPOCRLabel # [Normal mode] for [detection + recognition] labeling PPOCRLabel --kie True # [KIE mode] for [detection + recognition + keyword extraction] labeling ``` -#### 1.2.2 Build and Install the Whl Package Locally - -```bash -cd PaddleOCR/PPOCRLabel -python3 setup.py bdist_wheel -pip3 install dist/PPOCRLabel-1.0.2-py2.py3-none-any.whl -``` - -#### 1.2.3 Run PPOCRLabel by Python Script +#### 1.2.2 Run PPOCRLabel by Python Script +If you modify the PPOCRLabel file (for example, specifying a new built-in model), it will be more convenient to see the results by running the Python script. If you still want to start with the whl package, you need to uninstall the whl package in the current environment and then recompile it according to the next section. ```bash cd ./PPOCRLabel # Switch to the PPOCRLabel directory @@ -104,6 +97,13 @@ python PPOCRLabel.py # [Normal mode] for [detection + recognition] labeling python PPOCRLabel.py --kie True # [KIE mode] for [detection + recognition + keyword extraction] labeling ``` +#### 1.2.3 Build and Install the Whl Package Locally +Compile and install a new whl package, where 1.0.2 is the version number, you can specify the new version in 'setup.py'. +```bash +cd PaddleOCR/PPOCRLabel +python3 setup.py bdist_wheel +pip3 install dist/PPOCRLabel-1.0.2-py2.py3-none-any.whl +``` ## 2. Usage diff --git a/PPOCRLabel/README_ch.md b/PPOCRLabel/README_ch.md index f9c736d56e0b1b2a9b0a270149404c6afd4ec2bf..99c088de83f9cba775733a0473b50596683c47ab 100644 --- a/PPOCRLabel/README_ch.md +++ b/PPOCRLabel/README_ch.md @@ -88,7 +88,7 @@ PPOCRLabel --lang ch --kie True # 启动 【KIE 模式】,用于打【检测+ #### 1.2.2 通过Python脚本运行PPOCRLabel -如果您对PPOCRLabel文件有所更改(例如指定新的内置模型),通过Python脚本运行会更加方面的看到更改的结果。如果仍然需要通过whl包启动,则需要参考下节重新编译whl包。 +如果您对PPOCRLabel文件有所更改(例如指定新的内置模型),通过Python脚本运行会更加方便的看到更改的结果。如果仍然需要通过whl包启动,则需要先卸载当前环境中的whl包,然后参考下节重新编译whl包。 ```bash cd ./PPOCRLabel # 切换到PPOCRLabel目录 @@ -100,11 +100,9 @@ python PPOCRLabel.py --lang ch 编译与安装新的whl包,其中1.0.2为版本号,可在 `setup.py` 中指定新版本。 ```bash -cd ./PPOCRLabel # 切换到PPOCRLabel目录 - -# 选择标签模式来启动 -python PPOCRLabel.py --lang ch # 启动【普通模式】,用于打【检测+识别】场景的标签 -python PPOCRLabel.py --lang ch --kie True # 启动 【KIE 模式】,用于打【检测+识别+关键字提取】场景的标签 +cd PaddleOCR/PPOCRLabel +python3 setup.py bdist_wheel +pip3 install dist/PPOCRLabel-1.0.2-py2.py3-none-any.whl -i https://mirror.baidu.com/pypi/simple ``` diff --git a/deploy/cpp_infer/include/ocr_det.h b/deploy/cpp_infer/include/ocr_det.h index 6e4086fbaa6945b9f685e6844b7e701283de2dae..24392495348c8d85350e6b34725a33c76f0415f1 100644 --- a/deploy/cpp_infer/include/ocr_det.h +++ b/deploy/cpp_infer/include/ocr_det.h @@ -45,8 +45,9 @@ public: const double &det_db_thresh, const double &det_db_box_thresh, const double &det_db_unclip_ratio, - const bool &use_polygon_score, const bool &use_dilation, - const bool &use_tensorrt, const std::string &precision) { + const std::string &det_db_score_mode, + const bool &use_dilation, const bool &use_tensorrt, + const std::string &precision) { this->use_gpu_ = use_gpu; this->gpu_id_ = gpu_id; this->gpu_mem_ = gpu_mem; @@ -58,7 +59,7 @@ public: this->det_db_thresh_ = det_db_thresh; this->det_db_box_thresh_ = det_db_box_thresh; this->det_db_unclip_ratio_ = det_db_unclip_ratio; - this->use_polygon_score_ = use_polygon_score; + this->det_db_score_mode_ = det_db_score_mode; this->use_dilation_ = use_dilation; this->use_tensorrt_ = use_tensorrt; @@ -88,7 +89,7 @@ private: double det_db_thresh_ = 0.3; double det_db_box_thresh_ = 0.5; double det_db_unclip_ratio_ = 2.0; - bool use_polygon_score_ = false; + std::string det_db_score_mode_ = "slow"; bool use_dilation_ = false; bool visualize_ = true; diff --git a/deploy/cpp_infer/include/postprocess_op.h b/deploy/cpp_infer/include/postprocess_op.h index b384b79b3041bfcb96f042c6450d3c6e54f00498..4a98b151bdcc53e2ab3fbda1dca55dd9746bd86c 100644 --- a/deploy/cpp_infer/include/postprocess_op.h +++ b/deploy/cpp_infer/include/postprocess_op.h @@ -56,7 +56,7 @@ public: std::vector>> BoxesFromBitmap(const cv::Mat pred, const cv::Mat bitmap, const float &box_thresh, const float &det_db_unclip_ratio, - const bool &use_polygon_score); + const std::string &det_db_score_mode); std::vector>> FilterTagDetRes(std::vector>> boxes, diff --git a/deploy/cpp_infer/readme.md b/deploy/cpp_infer/readme.md index e7104881027b111de6821af8244ea2a6092fc14b..d269cc9e8fdfd68700e32c14816dc3b313bfad23 100644 --- a/deploy/cpp_infer/readme.md +++ b/deploy/cpp_infer/readme.md @@ -267,7 +267,7 @@ CUDNN_LIB_DIR=/your_cudnn_lib_dir |det_db_thresh|float|0.3|用于过滤DB预测的二值化图像,设置为0.-0.3对结果影响不明显| |det_db_box_thresh|float|0.5|DB后处理过滤box的阈值,如果检测存在漏框情况,可酌情减小| |det_db_unclip_ratio|float|1.6|表示文本框的紧致程度,越小则文本框更靠近文本| -|use_polygon_score|bool|false|是否使用多边形框计算bbox score,false表示使用矩形框计算。矩形框计算速度更快,多边形框对弯曲文本区域计算更准确。| +|det_db_score_mode|string|slow|slow:使用多边形框计算bbox score,fast:使用矩形框计算。矩形框计算速度更快,多边形框对弯曲文本区域计算更准确。| |visualize|bool|true|是否对结果进行可视化,为1时,预测结果会保存在`output`字段指定的文件夹下和输入图像同名的图像上。| - 方向分类器相关 diff --git a/deploy/cpp_infer/readme_en.md b/deploy/cpp_infer/readme_en.md index 61d65095394a9f5b7323bf8eb7324cd1e91b1346..b7687a9f6530976655a99690e9be02a04bbce6d3 100644 --- a/deploy/cpp_infer/readme_en.md +++ b/deploy/cpp_infer/readme_en.md @@ -260,7 +260,7 @@ More parameters are as follows, |det_db_thresh|float|0.3|Used to filter the binarized image of DB prediction, setting 0.-0.3 has no obvious effect on the result| |det_db_box_thresh|float|0.5|DB post-processing filter box threshold, if there is a missing box detected, it can be reduced as appropriate| |det_db_unclip_ratio|float|1.6|Indicates the compactness of the text box, the smaller the value, the closer the text box to the text| -|use_polygon_score|bool|false|Whether to use polygon box to calculate bbox score, false means to use rectangle box to calculate. Use rectangular box to calculate faster, and polygonal box more accurate for curved text area.| +|det_db_score_mode|string|slow| slow: use polygon box to calculate bbox score, fast: use rectangle box to calculate. Use rectangular box to calculate faster, and polygonal box more accurate for curved text area.| |visualize|bool|true|Whether to visualize the results,when it is set as true, the prediction results will be saved in the folder specified by the `output` field on an image with the same name as the input image.| - Classifier related parameters diff --git a/deploy/cpp_infer/include/clipper.cpp b/deploy/cpp_infer/src/clipper.cpp similarity index 100% rename from deploy/cpp_infer/include/clipper.cpp rename to deploy/cpp_infer/src/clipper.cpp diff --git a/deploy/cpp_infer/src/main.cpp b/deploy/cpp_infer/src/main.cpp index efc1e50ce929b4f68dff3437faa05b9ac46c2aa0..046a68a9692c6bbaf582924c688249b0bc366b97 100644 --- a/deploy/cpp_infer/src/main.cpp +++ b/deploy/cpp_infer/src/main.cpp @@ -36,25 +36,26 @@ #include "auto_log/autolog.h" #include +// common args DEFINE_bool(use_gpu, false, "Infering with GPU or CPU."); +DEFINE_bool(use_tensorrt, false, "Whether use tensorrt."); DEFINE_int32(gpu_id, 0, "Device id of GPU to execute."); DEFINE_int32(gpu_mem, 4000, "GPU id when infering with GPU."); DEFINE_int32(cpu_threads, 10, "Num of threads with CPU."); DEFINE_bool(enable_mkldnn, false, "Whether use mkldnn with CPU."); -DEFINE_bool(use_tensorrt, false, "Whether use tensorrt."); DEFINE_string(precision, "fp32", "Precision be one of fp32/fp16/int8"); DEFINE_bool(benchmark, false, "Whether use benchmark."); DEFINE_string(output, "./output/", "Save benchmark log path."); -// detection related DEFINE_string(image_dir, "", "Dir of input image."); +DEFINE_bool(visualize, true, "Whether show the detection results."); +// detection related DEFINE_string(det_model_dir, "", "Path of det inference model."); DEFINE_int32(max_side_len, 960, "max_side_len of input image."); DEFINE_double(det_db_thresh, 0.3, "Threshold of det_db_thresh."); DEFINE_double(det_db_box_thresh, 0.6, "Threshold of det_db_box_thresh."); DEFINE_double(det_db_unclip_ratio, 1.5, "Threshold of det_db_unclip_ratio."); -DEFINE_bool(use_polygon_score, false, "Whether use polygon score."); DEFINE_bool(use_dilation, false, "Whether use the dilation on output map."); -DEFINE_bool(visualize, true, "Whether show the detection results."); +DEFINE_string(det_db_score_mode, "slow", "Whether use polygon score."); // classification related DEFINE_bool(use_angle_cls, false, "Whether use use_angle_cls."); DEFINE_string(cls_model_dir, "", "Path of cls inference model."); @@ -85,7 +86,7 @@ int main_det(std::vector cv_all_img_names) { FLAGS_gpu_mem, FLAGS_cpu_threads, FLAGS_enable_mkldnn, FLAGS_max_side_len, FLAGS_det_db_thresh, FLAGS_det_db_box_thresh, FLAGS_det_db_unclip_ratio, - FLAGS_use_polygon_score, FLAGS_use_dilation, + FLAGS_det_db_score_mode, FLAGS_use_dilation, FLAGS_use_tensorrt, FLAGS_precision); if (!PathExists(FLAGS_output)) { @@ -117,13 +118,21 @@ int main_det(std::vector cv_all_img_names) { time_info[2] += det_times[2]; if (FLAGS_benchmark) { - cout << cv_all_img_names[i] << '\t'; + cout << cv_all_img_names[i] << "\t["; for (int n = 0; n < boxes.size(); n++) { + cout << '['; for (int m = 0; m < boxes[n].size(); m++) { - cout << boxes[n][m][0] << ' ' << boxes[n][m][1] << ' '; + cout << '[' << boxes[n][m][0] << ',' << boxes[n][m][1] << "]"; + if (m != boxes[n].size() - 1) { + cout << ','; + } + } + cout << ']'; + if (n != boxes.size() - 1) { + cout << ','; } } - cout << endl; + cout << ']' << endl; } } @@ -140,8 +149,6 @@ int main_rec(std::vector cv_all_img_names) { std::vector time_info = {0, 0, 0}; std::string rec_char_dict_path = FLAGS_rec_char_dict_path; - if (FLAGS_benchmark) - rec_char_dict_path = FLAGS_rec_char_dict_path.substr(6); cout << "label file: " << rec_char_dict_path << endl; CRNNRecognizer rec(FLAGS_rec_model_dir, FLAGS_use_gpu, FLAGS_gpu_id, @@ -194,7 +201,7 @@ int main_system(std::vector cv_all_img_names) { FLAGS_gpu_mem, FLAGS_cpu_threads, FLAGS_enable_mkldnn, FLAGS_max_side_len, FLAGS_det_db_thresh, FLAGS_det_db_box_thresh, FLAGS_det_db_unclip_ratio, - FLAGS_use_polygon_score, FLAGS_use_dilation, + FLAGS_det_db_score_mode, FLAGS_use_dilation, FLAGS_use_tensorrt, FLAGS_precision); Classifier *cls = nullptr; @@ -205,8 +212,6 @@ int main_system(std::vector cv_all_img_names) { } std::string rec_char_dict_path = FLAGS_rec_char_dict_path; - if (FLAGS_benchmark) - rec_char_dict_path = FLAGS_rec_char_dict_path.substr(6); cout << "label file: " << rec_char_dict_path << endl; CRNNRecognizer rec(FLAGS_rec_model_dir, FLAGS_use_gpu, FLAGS_gpu_id, diff --git a/deploy/cpp_infer/src/ocr_det.cpp b/deploy/cpp_infer/src/ocr_det.cpp index d72dc40cddb0845c370f5ad4bb9b6e2f6fe0bf2f..ff7164ca24bec74e1334ed9237dd33d813470c94 100644 --- a/deploy/cpp_infer/src/ocr_det.cpp +++ b/deploy/cpp_infer/src/ocr_det.cpp @@ -161,7 +161,7 @@ void DBDetector::Run(cv::Mat &img, boxes = post_processor_.BoxesFromBitmap( pred_map, bit_map, this->det_db_box_thresh_, this->det_db_unclip_ratio_, - this->use_polygon_score_); + this->det_db_score_mode_); boxes = post_processor_.FilterTagDetRes(boxes, ratio_h, ratio_w, srcimg); auto postprocess_end = std::chrono::steady_clock::now(); diff --git a/deploy/cpp_infer/src/postprocess_op.cpp b/deploy/cpp_infer/src/postprocess_op.cpp index c3985572048155cf5aca57c95f1d8a816658ef13..5374fb1a4eba68d8055a52ec91d97c290832aa9d 100644 --- a/deploy/cpp_infer/src/postprocess_op.cpp +++ b/deploy/cpp_infer/src/postprocess_op.cpp @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include #include -#include namespace PaddleOCR { @@ -187,23 +187,22 @@ float PostProcessor::PolygonScoreAcc(std::vector contour, cv::Mat mask; mask = cv::Mat::zeros(ymax - ymin + 1, xmax - xmin + 1, CV_8UC1); + cv::Point *rook_point = new cv::Point[contour.size()]; - cv::Point* rook_point = new cv::Point[contour.size()]; - for (int i = 0; i < contour.size(); ++i) { rook_point[i] = cv::Point(int(box_x[i]) - xmin, int(box_y[i]) - ymin); } const cv::Point *ppt[1] = {rook_point}; int npt[] = {int(contour.size())}; - cv::fillPoly(mask, ppt, npt, 1, cv::Scalar(1)); cv::Mat croppedImg; - pred(cv::Rect(xmin, ymin, xmax - xmin + 1, ymax - ymin + 1)).copyTo(croppedImg); + pred(cv::Rect(xmin, ymin, xmax - xmin + 1, ymax - ymin + 1)) + .copyTo(croppedImg); float score = cv::mean(croppedImg, mask)[0]; - delete []rook_point; + delete[] rook_point; return score; } @@ -247,7 +246,7 @@ float PostProcessor::BoxScoreFast(std::vector> box_array, std::vector>> PostProcessor::BoxesFromBitmap( const cv::Mat pred, const cv::Mat bitmap, const float &box_thresh, - const float &det_db_unclip_ratio, const bool &use_polygon_score) { + const float &det_db_unclip_ratio, const std::string &det_db_score_mode) { const int min_size = 3; const int max_candidates = 1000; @@ -281,7 +280,7 @@ std::vector>> PostProcessor::BoxesFromBitmap( } float score; - if (use_polygon_score) + if (det_db_score_mode == "slow") /* compute using polygon*/ score = PolygonScoreAcc(contours[_i], pred); else diff --git a/deploy/hubserving/ocr_rec/params.py b/deploy/hubserving/ocr_rec/params.py index 70b50dd4d680f744dca5cf1cbe0ebe8f0984d93a..09bdeeb3c62abe3a1d197719b79d4f523ff5e5e1 100644 --- a/deploy/hubserving/ocr_rec/params.py +++ b/deploy/hubserving/ocr_rec/params.py @@ -29,8 +29,7 @@ def read_params(): cfg.rec_model_dir = "./inference/ch_PP-OCRv2_rec_infer/" cfg.rec_image_shape = "3, 32, 320" - cfg.rec_char_type = 'ch' - cfg.rec_batch_num = 30 + cfg.rec_batch_num = 6 cfg.max_text_length = 25 cfg.rec_char_dict_path = "./ppocr/utils/ppocr_keys_v1.txt" diff --git a/deploy/hubserving/ocr_system/params.py b/deploy/hubserving/ocr_system/params.py index 6d74294438cfbc83a8445f994585e7d82ada5f7f..9972a3ded83589e7552b308c59b9dc09a9a4399b 100755 --- a/deploy/hubserving/ocr_system/params.py +++ b/deploy/hubserving/ocr_system/params.py @@ -47,8 +47,7 @@ def read_params(): cfg.rec_model_dir = "./inference/ch_PP-OCRv2_rec_infer/" cfg.rec_image_shape = "3, 32, 320" - cfg.rec_char_type = 'ch' - cfg.rec_batch_num = 30 + cfg.rec_batch_num = 6 cfg.max_text_length = 25 cfg.rec_char_dict_path = "./ppocr/utils/ppocr_keys_v1.txt" diff --git a/deploy/hubserving/readme.md b/deploy/hubserving/readme.md index 22699d7122faaab2cdeacad40dff3bbc9f981b03..ab6dbeff749beb5ddb14d116f2d3580ad074d337 100755 --- a/deploy/hubserving/readme.md +++ b/deploy/hubserving/readme.md @@ -188,7 +188,7 @@ hub serving start -c deploy/hubserving/ocr_system/config.json - **output**:可视化结果保存路径,默认为`./hubserving_result` 访问示例: -```python tools/test_hubserving.py --server_url=http://127.0.0.1:8868/predict/ocr_system --image_dir./doc/imgs/ --visualize=false``` +```python tools/test_hubserving.py --server_url=http://127.0.0.1:8868/predict/ocr_system --image_dir=./doc/imgs/ --visualize=false``` ## 4. 返回结果格式说明 返回结果为列表(list),列表中的每一项为词典(dict),词典一共可能包含3种字段,信息如下: diff --git a/deploy/hubserving/readme_en.md b/deploy/hubserving/readme_en.md index b32e6aa822c55771bbebdf49bb81b9c9202279f5..8b99796a257f45d48cf3e0386c741ec798ee23e0 100755 --- a/deploy/hubserving/readme_en.md +++ b/deploy/hubserving/readme_en.md @@ -196,7 +196,7 @@ For example, if using the configuration file to start the text angle classificat **Eg.** ```shell -python tools/test_hubserving.py --server_url=http://127.0.0.1:8868/predict/ocr_system --image_dir./doc/imgs/ --visualize=false` +python tools/test_hubserving.py --server_url=http://127.0.0.1:8868/predict/ocr_system --image_dir=./doc/imgs/ --visualize=false` ``` ## 4. Returned result format diff --git a/deploy/hubserving/structure_table/params.py b/deploy/hubserving/structure_table/params.py index cc1a73687b22e73346addb35e702254ef67ee8db..9632c2f70b794854d191e9f088f3f2e301a5dbb3 100755 --- a/deploy/hubserving/structure_table/params.py +++ b/deploy/hubserving/structure_table/params.py @@ -25,7 +25,6 @@ def read_params(): # params for table structure model cfg.table_max_len = 488 cfg.table_model_dir = './inference/en_ppocr_mobile_v2.0_table_structure_infer/' - cfg.table_char_type = 'en' cfg.table_char_dict_path = './ppocr/utils/dict/table_structure_dict.txt' cfg.show_log = False return cfg diff --git a/deploy/slim/quantization/export_model.py b/deploy/slim/quantization/export_model.py index bbd291c3347929bf394d7859e277286cb4932042..822fd5da4c30a934d0e590ab1067f9f9188213c2 100755 --- a/deploy/slim/quantization/export_model.py +++ b/deploy/slim/quantization/export_model.py @@ -133,6 +133,7 @@ def main(): sub_model_save_path, logger) else: save_path = os.path.join(save_path, "inference") + model.eval() export_single_model(quanter, model, infer_shape, save_path, logger) diff --git a/doc/doc_ch/inference_ppocr.md b/doc/doc_ch/inference_ppocr.md index 3e46f17d3a781839dfe5e632f85aabcd03d0fd17..5fb3811eb40addd506dfa37d257c00a0c2a44258 100644 --- a/doc/doc_ch/inference_ppocr.md +++ b/doc/doc_ch/inference_ppocr.md @@ -3,12 +3,13 @@ 本文介绍针对PP-OCR模型库的Python推理引擎使用方法,内容依次为文本检测、文本识别、方向分类器以及三者串联在CPU、GPU上的预测方法。 -- [1. 文本检测模型推理](#文本检测模型推理) -- [2. 文本识别模型推理](#文本识别模型推理) - - [2.1 超轻量中文识别模型推理](#超轻量中文识别模型推理) - - [2.2 多语言模型的推理](#多语言模型的推理) -- [3. 方向分类模型推理](#方向分类模型推理) -- [4. 文本检测、方向分类和文字识别串联推理](#文本检测、方向分类和文字识别串联推理) +- [基于Python引擎的PP-OCR模型库推理](#基于python引擎的pp-ocr模型库推理) + - [1. 文本检测模型推理](#1-文本检测模型推理) + - [2. 文本识别模型推理](#2-文本识别模型推理) + - [2.1 超轻量中文识别模型推理](#21-超轻量中文识别模型推理) + - [2.2 多语言模型的推理](#22-多语言模型的推理) + - [3. 方向分类模型推理](#3-方向分类模型推理) + - [4. 文本检测、方向分类和文字识别串联推理](#4-文本检测方向分类和文字识别串联推理) @@ -82,7 +83,7 @@ Predicts of ./doc/imgs_words/ch/word_4.jpg:('实力活力', 0.98458153) 如果您需要预测的是其他语言模型,可以在[此链接](./models_list.md#%E5%A4%9A%E8%AF%AD%E8%A8%80%E8%AF%86%E5%88%AB%E6%A8%A1%E5%9E%8B)中找到对应语言的inference模型,在使用inference模型预测时,需要通过`--rec_char_dict_path`指定使用的字典路径, 同时为了得到正确的可视化结果,需要通过 `--vis_font_path` 指定可视化的字体路径,`doc/fonts/` 路径下有默认提供的小语种字体,例如韩文识别: ``` wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/korean_mobile_v2.0_rec_infer.tar -python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/korean/1.jpg" --rec_model_dir="./your inference model" --rec_char_type="korean" --rec_char_dict_path="ppocr/utils/dict/korean_dict.txt" --vis_font_path="doc/fonts/korean.ttf" +python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/korean/1.jpg" --rec_model_dir="./your inference model" --rec_char_dict_path="ppocr/utils/dict/korean_dict.txt" --vis_font_path="doc/fonts/korean.ttf" ``` ![](../imgs_words/korean/1.jpg) diff --git a/doc/doc_ch/quickstart.md b/doc/doc_ch/quickstart.md index d2126192764fa32c7c7a3651b463b8b23240ea6c..57931aa26143f2f442f3e4d579abc2549c11322b 100644 --- a/doc/doc_ch/quickstart.md +++ b/doc/doc_ch/quickstart.md @@ -1,18 +1,18 @@ -# PaddleOCR快速开始 - -- [1. 安装](#1) - - [1.1 安装PaddlePaddle](#11) - - [1.2 安装PaddleOCR whl包](#12) +- [PaddleOCR快速开始](#paddleocr快速开始) + - [1. 安装](#1-安装) + - [1.1 安装PaddlePaddle](#11-安装paddlepaddle) + - [1.2 安装PaddleOCR whl包](#12-安装paddleocr-whl包) + - [2. 便捷使用](#2-便捷使用) + - [2.1 命令行使用](#21-命令行使用) + - [2.1.1 中英文模型](#211-中英文模型) + - [2.1.2 多语言模型](#212-多语言模型) + - [2.1.3 版面分析](#213-版面分析) + - [2.2 Python脚本使用](#22-python脚本使用) + - [2.2.1 中英文与多语言使用](#221-中英文与多语言使用) + - [2.2.2 版面分析](#222-版面分析) + - [3. 小结](#3-小结) -- [2. 便捷使用](#2) - - [2.1 命令行使用](#21) - - [2.1.1 中英文模型](#211) - - [2.1.2 多语言模型](#212) - - [2.1.3 版面分析](#213) - - [2.2 Python脚本使用](#22) - - [2.2.1 中英文与多语言使用](#221) - - [2.2.2 版面分析](#222) -- [3.小结](#3) +# PaddleOCR快速开始 @@ -193,8 +193,8 @@ paddleocr --image_dir=./table/1.png --type=structure /output/table/1/ └─ res.txt └─ [454, 360, 824, 658].xlsx 表格识别结果 - └─ [16, 2, 828, 305].jpg 被裁剪出的图片区域 - └─ [17, 361, 404, 711].xlsx 表格识别结果 + └─ [16, 2, 828, 305].jpg 被裁剪出的图片区域 + └─ [17, 361, 404, 711].xlsx 表格识别结果 ``` - **参数说明** @@ -204,7 +204,7 @@ paddleocr --image_dir=./table/1.png --type=structure | output | excel和识别结果保存的地址 | ./output/table | | table_max_len | 表格结构模型预测时,图像的长边resize尺度 | 488 | | table_model_dir | 表格结构模型 inference 模型地址 | None | - | table_char_type | 表格结构模型所用字典地址 | ../ppocr/utils/dict/table_structure_dict.txt | + | table_char_dict_path | 表格结构模型所用字典地址 | ../ppocr/utils/dict/table_structure_dict.txt | 大部分参数和paddleocr whl包保持一致,见 [whl包文档](./whl.md) diff --git a/doc/doc_ch/recognition.md b/doc/doc_ch/recognition.md index cf55af29e7b6a0c92022b35746081776451627a0..6cdd547517ebb8888374b22c1b52314da53eebab 100644 --- a/doc/doc_ch/recognition.md +++ b/doc/doc_ch/recognition.md @@ -2,19 +2,20 @@ 本文提供了PaddleOCR文本识别任务的全流程指南,包括数据准备、模型训练、调优、评估、预测,各个阶段的详细说明: -- [1 数据准备](#数据准备) - - [1.1 自定义数据集](#自定义数据集) - - [1.2 数据下载](#数据下载) - - [1.3 字典](#字典) - - [1.4 支持空格](#支持空格) -- [2 启动训练](#启动训练) - - [2.1 数据增强](#数据增强) - - [2.2 通用模型训练](#通用模型训练) - - [2.3 多语言模型训练](#多语言模型训练) - - [2.4 知识蒸馏训练](#知识蒸馏训练) -- [3 评估](#评估) -- [4 预测](#预测) -- [5 转Inference模型测试](#Inference) +- [文字识别](#文字识别) + - [1. 数据准备](#1-数据准备) + - [1.1 自定义数据集](#11-自定义数据集) + - [1.2 数据下载](#12-数据下载) + - [1.3 字典](#13-字典) + - [1.4 添加空格类别](#14-添加空格类别) + - [2. 启动训练](#2-启动训练) + - [2.1 数据增强](#21-数据增强) + - [2.2 通用模型训练](#22-通用模型训练) + - [2.3 多语言模型训练](#23-多语言模型训练) + - [2.4 知识蒸馏训练](#24-知识蒸馏训练) + - [3 评估](#3-评估) + - [4 预测](#4-预测) + - [5. 转Inference模型测试](#5-转inference模型测试) @@ -477,8 +478,8 @@ python3 tools/export_model.py -c configs/rec/ch_ppocr_v2.0/rec_chinese_lite_trai - 自定义模型推理 - 如果训练时修改了文本的字典,在使用inference模型预测时,需要通过`--rec_char_dict_path`指定使用的字典路径,并且设置 `rec_char_type=ch` + 如果训练时修改了文本的字典,在使用inference模型预测时,需要通过`--rec_char_dict_path`指定使用的字典路径 ``` - python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png" --rec_model_dir="./your inference model" --rec_image_shape="3, 32, 100" --rec_char_type="ch" --rec_char_dict_path="your text dict path" + python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png" --rec_model_dir="./your inference model" --rec_image_shape="3, 32, 100" --rec_char_dict_path="your text dict path" ``` diff --git a/doc/doc_ch/serving_inference.md b/doc/doc_ch/serving_inference.md index fea5a24546ddd2141085f56eeb99cdf72577bff3..30ea7ee7c11692ba02e8314036d74a21c2f090e5 100644 --- a/doc/doc_ch/serving_inference.md +++ b/doc/doc_ch/serving_inference.md @@ -98,7 +98,6 @@ def read_params(): cfg.rec_model_dir = "./ocr_rec_server/" # 识别算法模型路径 cfg.rec_image_shape = "3, 32, 320" - cfg.rec_char_type = 'ch' cfg.rec_batch_num = 30 cfg.max_text_length = 25 diff --git a/doc/doc_ch/whl.md b/doc/doc_ch/whl.md index 2d524b83d73d4951939c7e0f108c494ab79a86c6..b2eb4ba17cf70edeaea36b5e54fe976605de850f 100644 --- a/doc/doc_ch/whl.md +++ b/doc/doc_ch/whl.md @@ -401,7 +401,6 @@ im_show.save('result.jpg') | rec_algorithm | 使用的识别算法类型 | CRNN | | rec_model_dir | 识别模型所在文件夹。传参方式有两种,1. None: 自动下载内置模型到 `~/.paddleocr/rec`;2.自己转换好的inference模型路径,模型路径下必须包含model和params文件 | None | | rec_image_shape | 识别算法的输入图片尺寸 | "3,32,320" | -| rec_char_type | 识别算法的字符类型,中英文(ch)、英文(en)、法语(french)、德语(german)、韩语(korean)、日语(japan) | ch | | rec_batch_num | 进行识别时,同时前向的图片数 | 30 | | max_text_length | 识别算法能识别的最大文字长度 | 25 | | rec_char_dict_path | 识别模型字典路径,当rec_model_dir使用方式2传参时需要修改为自己的字典路径 | ./ppocr/utils/ppocr_keys_v1.txt | diff --git a/doc/doc_en/inference_en.md b/doc/doc_en/inference_en.md index a8a96e30f020b98b52bb465140b3463cd88beebb..d1233780d89c175729e835d069db1bcc0bb9273f 100755 --- a/doc/doc_en/inference_en.md +++ b/doc/doc_en/inference_en.md @@ -296,7 +296,7 @@ Predicts of ./doc/imgs_words_en/word_336.png:('super', 0.9999073) - The image resolution used in training is different: the image resolution used in training the above model is [3,32,100], while during our Chinese model training, in order to ensure the recognition effect of long text, the image resolution used in training is [3, 32, 320]. The default shape parameter of the inference stage is the image resolution used in training phase, that is [3, 32, 320]. Therefore, when running inference of the above English model here, you need to set the shape of the recognition image through the parameter `rec_image_shape`. -- Character list: the experiment in the DTRB paper is only for 26 lowercase English characters and 10 numbers, a total of 36 characters. All upper and lower case characters are converted to lower case characters, and characters not in the above list are ignored and considered as spaces. Therefore, no characters dictionary file is used here, but a dictionary is generated by the below command. Therefore, the parameter `rec_char_type` needs to be set during inference, which is specified as "en" in English. +- Character list: the experiment in the DTRB paper is only for 26 lowercase English characters and 10 numbers, a total of 36 characters. All upper and lower case characters are converted to lower case characters, and characters not in the above list are ignored and considered as spaces. Therefore, no characters dictionary file is used here, but a dictionary is generated by the below command. ``` self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz" @@ -320,7 +320,7 @@ python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png ### 3.4 Text Recognition Model Inference Using Custom Characters Dictionary -If the text dictionary is modified during training, when using the inference model to predict, you need to specify the dictionary path used by `--rec_char_dict_path`, and set `rec_char_type=ch` +If the text dictionary is modified during training, when using the inference model to predict, you need to specify the dictionary path used by `--rec_char_dict_path` ``` python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png" --rec_model_dir="./your inference model" --rec_image_shape="3, 32, 100" --rec_char_dict_path="your text dict path" diff --git a/doc/doc_en/inference_ppocr_en.md b/doc/doc_en/inference_ppocr_en.md index 21f4c64d6dc99054a3480a66cd710b5d09461ca1..8dc30d3106048575a9ad722386daf9cb658dd455 100755 --- a/doc/doc_en/inference_ppocr_en.md +++ b/doc/doc_en/inference_ppocr_en.md @@ -4,12 +4,13 @@ This article introduces the use of the Python inference engine for the PP-OCR model library. The content is in order of text detection, text recognition, direction classifier and the prediction method of the three in series on the CPU and GPU. -- [Text Detection Model Inference](#DETECTION_MODEL_INFERENCE) -- [Text Recognition Model Inference](#RECOGNITION_MODEL_INFERENCE) - - [1. Lightweight Chinese Recognition Model Inference](#LIGHTWEIGHT_RECOGNITION) - - [2. Multilingual Model Inference](#MULTILINGUAL_MODEL_INFERENCE) -- [Angle Classification Model Inference](#ANGLE_CLASS_MODEL_INFERENCE) -- [Text Detection Angle Classification and Recognition Inference Concatenation](#CONCATENATION) +- [Python Inference for PP-OCR Model Zoo](#python-inference-for-pp-ocr-model-zoo) + - [Text Detection Model Inference](#text-detection-model-inference) + - [Text Recognition Model Inference](#text-recognition-model-inference) + - [1. Lightweight Chinese Recognition Model Inference](#1-lightweight-chinese-recognition-model-inference) + - [2. Multilingual Model Inference](#2-multilingual-model-inference) + - [Angle Classification Model Inference](#angle-classification-model-inference) + - [Text Detection Angle Classification and Recognition Inference Concatenation](#text-detection-angle-classification-and-recognition-inference-concatenation) @@ -82,7 +83,7 @@ You need to specify the visual font path through `--vis_font_path`. There are sm ``` wget wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/korean_mobile_v2.0_rec_infer.tar -python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/korean/1.jpg" --rec_model_dir="./your inference model" --rec_char_type="korean" --rec_char_dict_path="ppocr/utils/dict/korean_dict.txt" --vis_font_path="doc/fonts/korean.ttf" +python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/korean/1.jpg" --rec_model_dir="./your inference model" --rec_char_dict_path="ppocr/utils/dict/korean_dict.txt" --vis_font_path="doc/fonts/korean.ttf" ``` ![](../imgs_words/korean/1.jpg) diff --git a/doc/doc_en/quickstart_en.md b/doc/doc_en/quickstart_en.md index e44345a8e65f6efc94f83604590d980e052f2abd..8a9c38069f384dcef06db60f6b1266e6eb116d84 100644 --- a/doc/doc_en/quickstart_en.md +++ b/doc/doc_en/quickstart_en.md @@ -1,18 +1,19 @@ +- [PaddleOCR Quick Start](#paddleocr-quick-start) + - [1. Installation](#1-installation) + - [1.1 Install PaddlePaddle](#11-install-paddlepaddle) + - [1.2 Install PaddleOCR Whl Package](#12-install-paddleocr-whl-package) + - [2. Easy-to-Use](#2-easy-to-use) + - [2.1 Use by Command Line](#21-use-by-command-line) + - [2.1.1 Chinese and English Model](#211-chinese-and-english-model) + - [2.1.2 Multi-language Model](#212-multi-language-model) + - [2.1.3 Layout Analysis](#213-layout-analysis) + - [2.2 Use by Code](#22-use-by-code) + - [2.2.1 Chinese & English Model and Multilingual Model](#221-chinese--english-model-and-multilingual-model) + - [2.2.2 Layout Analysis](#222-layout-analysis) + - [3. Summary](#3-summary) # PaddleOCR Quick Start -+ [1. Installation](#1installation) - + [1.1 Install PaddlePaddle](#11-install-paddlepaddle) - + [1.2 Install PaddleOCR Whl Package](#12-install-paddleocr-whl-package) -* [2. Easy-to-Use](#2-easy-to-use) - + [2.1 Use by Command Line](#21-use-by-command-line) - - [2.1.1 English and Chinese Model](#211-english-and-chinese-model) - - [2.1.2 Multi-language Model](#212-multi-language-model) - - [2.1.3 Layout Analysis](#213-layoutAnalysis) - + [2.2 Use by Code](#22-use-by-code) - - [2.2.1 Chinese & English Model and Multilingual Model](#221-chinese---english-model-and-multilingual-model) - - [2.2.2 Layout Analysis](#222-layoutAnalysis) -* [3. Summary](#3) @@ -196,7 +197,7 @@ paddleocr --image_dir=../doc/table/1.png --type=structure | output | The path where excel and recognition results are saved | ./output/table | | table_max_len | The long side of the image is resized in table structure model | 488 | | table_model_dir | inference model path of table structure model | None | - | table_char_type | dict path of table structure model | ../ppocr/utils/dict/table_structure_dict.txt | + | table_char_dict_path | dict path of table structure model | ../ppocr/utils/dict/table_structure_dict.txt | diff --git a/doc/doc_en/recognition_en.md b/doc/doc_en/recognition_en.md index 20f4b9457b2fd05058bd2b723048f94de92605b6..c3700070b9d01c89cf8189a7af5f13d877114fb2 100644 --- a/doc/doc_en/recognition_en.md +++ b/doc/doc_en/recognition_en.md @@ -470,8 +470,8 @@ inference/det_db/ - Text recognition model Inference using custom characters dictionary - If the text dictionary is modified during training, when using the inference model to predict, you need to specify the dictionary path used by `--rec_char_dict_path`, and set `rec_char_type=ch` + If the text dictionary is modified during training, when using the inference model to predict, you need to specify the dictionary path used by `--rec_char_dict_path` ``` - python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png" --rec_model_dir="./your inference model" --rec_image_shape="3, 32, 100" --rec_char_type="ch" --rec_char_dict_path="your text dict path" + python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png" --rec_model_dir="./your inference model" --rec_image_shape="3, 32, 100" --rec_char_dict_path="your text dict path" ``` diff --git a/doc/doc_en/whl_en.md b/doc/doc_en/whl_en.md index 2671fbb9f0b5653cff29908a0c40d14a25b2cc58..35b2b1798ad8b566ee87e921e23be84a5ecccf24 100644 --- a/doc/doc_en/whl_en.md +++ b/doc/doc_en/whl_en.md @@ -348,7 +348,6 @@ im_show.save('result.jpg') | rec_algorithm | Type of recognition algorithm selected | CRNN | | rec_model_dir | the text recognition inference model folder. There are two ways to transfer parameters, 1. None: Automatically download the built-in model to `~/.paddleocr/rec`; 2. The path of the inference model converted by yourself, the model and params files must be included in the model path | None | | rec_image_shape | image shape of recognition algorithm | "3,32,320" | -| rec_char_type | Character type of recognition algorithm, Chinese (ch) or English (en) | ch | | rec_batch_num | When performing recognition, the batchsize of forward images | 30 | | max_text_length | The maximum text length that the recognition algorithm can recognize | 25 | | rec_char_dict_path | the alphabet path which needs to be modified to your own path when `rec_model_Name` use mode 2 | ./ppocr/utils/ppocr_keys_v1.txt | diff --git a/doc/joinus.PNG b/doc/joinus.PNG index 5838a96bc8317178de07a16d246966bf6cc7df63..c31a56ab74a2922ffd2d0eb3d9ba32cac59eceb8 100644 Binary files a/doc/joinus.PNG and b/doc/joinus.PNG differ diff --git a/ppstructure/table/README.md b/ppstructure/table/README.md index 6137cfaef657d70a2b3a2b7eb9c69e364e421d96..65d2cd22b6f18d06fe538ffe1fd243c0c0bfaa3c 100644 --- a/ppstructure/table/README.md +++ b/ppstructure/table/README.md @@ -117,7 +117,7 @@ teds: 93.32 ```python cd PaddleOCR/ppstructure -python3 table/predict_table.py --det_model_dir=path/to/det_model_dir --rec_model_dir=path/to/rec_model_dir --table_model_dir=path/to/table_model_dir --image_dir=../doc/table/1.png --rec_char_dict_path=../ppocr/utils/dict/table_dict.txt --table_char_dict_path=../ppocr/utils/dict/table_structure_dict.txt --rec_char_type=EN --det_limit_side_len=736 --det_limit_type=min --output ../output/table +python3 table/predict_table.py --det_model_dir=path/to/det_model_dir --rec_model_dir=path/to/rec_model_dir --table_model_dir=path/to/table_model_dir --image_dir=../doc/table/1.png --rec_char_dict_path=../ppocr/utils/dict/table_dict.txt --table_char_dict_path=../ppocr/utils/dict/table_structure_dict.txt --det_limit_side_len=736 --det_limit_type=min --output ../output/table ``` After running, the excel sheet of each picture will be saved in the directory specified by the output field diff --git a/ppstructure/table/README_ch.md b/ppstructure/table/README_ch.md index 39081995e6dd1e0a05fc88d067bab119ca7b6e39..4a617eeb46455b0bd13c8a848419671354eec8fd 100644 --- a/ppstructure/table/README_ch.md +++ b/ppstructure/table/README_ch.md @@ -117,7 +117,7 @@ teds: 93.32 ```python cd PaddleOCR/ppstructure -python3 table/predict_table.py --det_model_dir=path/to/det_model_dir --rec_model_dir=path/to/rec_model_dir --table_model_dir=path/to/table_model_dir --image_dir=../doc/table/1.png --rec_char_dict_path=../ppocr/utils/dict/table_dict.txt --table_char_dict_path=../ppocr/utils/dict/table_structure_dict.txt --rec_char_type=EN --det_limit_side_len=736 --det_limit_type=min --output ../output/table +python3 table/predict_table.py --det_model_dir=path/to/det_model_dir --rec_model_dir=path/to/rec_model_dir --table_model_dir=path/to/table_model_dir --image_dir=../doc/table/1.png --rec_char_dict_path=../ppocr/utils/dict/table_dict.txt --table_char_dict_path=../ppocr/utils/dict/table_structure_dict.txt --det_limit_side_len=736 --det_limit_type=min --output ../output/table ``` Reference diff --git a/ppstructure/table/predict_structure.py b/ppstructure/table/predict_structure.py index fc85327b3a446573259546d84c439f5f8e5b3ac7..0179c614ae4864677576f6073f291282fb772988 100755 --- a/ppstructure/table/predict_structure.py +++ b/ppstructure/table/predict_structure.py @@ -58,7 +58,6 @@ class TableStructurer(object): }] postprocess_params = { 'name': 'TableLabelDecode', - "character_type": args.table_char_type, "character_dict_path": args.table_char_dict_path, } @@ -104,7 +103,9 @@ class TableStructurer(object): res_loc_final.append([left, top, right, bottom]) structure_str_list = structure_str_list[0][:-1] - structure_str_list = ['', '', ''] + structure_str_list + ['
', '', ''] + structure_str_list = [ + '', '', '' + ] + structure_str_list + ['
', '', ''] elapse = time.time() - starttime return (structure_str_list, res_loc_final), elapse diff --git a/ppstructure/utility.py b/ppstructure/utility.py index 10d9f71a7cdfed00b555c46689b2dd3c5aad807c..081a5f6ae3cd4a01bc2d1ba4812f39086e16cfe9 100644 --- a/ppstructure/utility.py +++ b/ppstructure/utility.py @@ -26,7 +26,6 @@ def init_args(): # params for table structure parser.add_argument("--table_max_len", type=int, default=488) parser.add_argument("--table_model_dir", type=str) - parser.add_argument("--table_char_type", type=str, default='en') parser.add_argument( "--table_char_dict_path", type=str,