From ce518e552cebfd98f1ba7372ea066bbc54aa7e3c Mon Sep 17 00:00:00 2001 From: xmy0916 <863299715@qq.com> Date: Wed, 9 Dec 2020 20:45:56 +0800 Subject: [PATCH] fix doc algorithm&recognition en&ch --- doc/doc_ch/algorithm_overview.md | 5 -- doc/doc_ch/recognition.md | 7 +- doc/doc_en/algorithm_overview_en.md | 6 -- doc/doc_en/recognition_en.md | 111 +++++++++++++++++++--------- 4 files changed, 80 insertions(+), 49 deletions(-) diff --git a/doc/doc_ch/algorithm_overview.md b/doc/doc_ch/algorithm_overview.md index 475db679..d047959d 100644 --- a/doc/doc_ch/algorithm_overview.md +++ b/doc/doc_ch/algorithm_overview.md @@ -54,11 +54,6 @@ PaddleOCR开源的文本识别算法列表: |CRNN|MobileNetV3||rec_mv3_none_bilstm_ctc|[敬请期待]()| |STAR-Net|Resnet34_vd||rec_r34_vd_tps_bilstm_ctc|[敬请期待]()| |STAR-Net|MobileNetV3||rec_mv3_tps_bilstm_ctc|[敬请期待]()| -|RARE|Resnet34_vd||rec_r34_vd_tps_bilstm_attn|[敬请期待]()| -|RARE|MobileNetV3||rec_mv3_tps_bilstm_attn|[敬请期待]()| -|SRN|Resnet50_vd_fpn||rec_r50fpn_vd_none_srn|[敬请期待]()| -**说明:** SRN模型使用了数据扰动方法对上述提到对两个训练集进行增广,增广后的数据可以在[百度网盘](https://pan.baidu.com/s/1-HSZ-ZVdqBF2HaBZ5pRAKA)上下载,提取码: y3ry。 -原始论文使用两阶段训练平均精度为89.74%,PaddleOCR中使用one-stage训练,平均精度为88.33%。两种预训练权重均在[下载链接](https://paddleocr.bj.bcebos.com/SRN/rec_r50fpn_vd_none_srn.tar)中。 PaddleOCR文本识别算法的训练和使用请参考文档教程中[模型训练/评估中的文本识别部分](./recognition.md)。 diff --git a/doc/doc_ch/recognition.md b/doc/doc_ch/recognition.md index 6c5ea02f..6c5efc06 100644 --- a/doc/doc_ch/recognition.md +++ b/doc/doc_ch/recognition.md @@ -166,9 +166,9 @@ tar -xf rec_mv3_none_bilstm_ctc.tar && rm -rf rec_mv3_none_bilstm_ctc.tar *如果您安装的是cpu版本,请将配置文件中的 `use_gpu` 字段修改为false* ``` -# GPU训练 支持单卡,多卡训练,通过selected_gpus参数指定卡号 +# GPU训练 支持单卡,多卡训练,通过--gpus参数指定卡号 # 训练icdar15英文数据 并将训练日志保存为 tain_rec.log -python3 -m paddle.distributed.launch --selected_gpus '0,1,2,3' tools/train.py -c configs/rec/rec_icdar15_train.yml 2>&1 | tee train_rec.log +python3 -m paddle.distributed.launch --gpus '0,1,2,3' tools/train.py -c configs/rec/rec_icdar15_train.yml ``` - 数据增强 @@ -331,9 +331,8 @@ Eval: *注意* 评估时必须确保配置文件中 infer_img 字段为空 ``` -export CUDA_VISIBLE_DEVICES=0 # GPU 评估, Global.checkpoints 为待测权重 -python3 tools/eval.py -c configs/rec/rec_icdar15_train.yml -o Global.checkpoints={path/to/weights}/best_accuracy +python3 --gpus '0' tools/eval.py -c configs/rec/rec_icdar15_train.yml -o Global.checkpoints={path/to/weights}/best_accuracy ``` diff --git a/doc/doc_en/algorithm_overview_en.md b/doc/doc_en/algorithm_overview_en.md index 6cdf310f..60c44865 100644 --- a/doc/doc_en/algorithm_overview_en.md +++ b/doc/doc_en/algorithm_overview_en.md @@ -55,12 +55,6 @@ Refer to [DTRB](https://arxiv.org/abs/1904.01906), the training and evaluation r |CRNN|MobileNetV3||rec_mv3_none_bilstm_ctc|[Coming soon]()| |STAR-Net|Resnet34_vd||rec_r34_vd_tps_bilstm_ctc|[Coming soon]()| |STAR-Net|MobileNetV3||rec_mv3_tps_bilstm_ctc|[Coming soon]()| -|RARE|Resnet34_vd||rec_r34_vd_tps_bilstm_attn|[Coming soon]()| -|RARE|MobileNetV3||rec_mv3_tps_bilstm_attn|[Coming soon]()| -|SRN|Resnet50_vd_fpn||rec_r50fpn_vd_none_srn|[Coming soon]()| -**Note:** SRN model uses data expansion method to expand the two training sets mentioned above, and the expanded data can be downloaded from [Baidu Drive](https://pan.baidu.com/s/1-HSZ-ZVdqBF2HaBZ5pRAKA) (download code: y3ry). - -The average accuracy of the two-stage training in the original paper is 89.74%, and that of one stage training in paddleocr is 88.33%. Both pre-trained weights can be downloaded [here](https://paddleocr.bj.bcebos.com/SRN/rec_r50fpn_vd_none_srn.tar). Please refer to the document for training guide and use of PaddleOCR text recognition algorithms [Text recognition model training/evaluation/prediction](./doc/doc_en/recognition_en.md) diff --git a/doc/doc_en/recognition_en.md b/doc/doc_en/recognition_en.md index 41b00c52..daa12820 100644 --- a/doc/doc_en/recognition_en.md +++ b/doc/doc_en/recognition_en.md @@ -158,10 +158,9 @@ tar -xf rec_mv3_none_bilstm_ctc.tar && rm -rf rec_mv3_none_bilstm_ctc.tar Start training: ``` -# GPU training Support single card and multi-card training, specify the card number through CUDA_VISIBLE_DEVICES -export CUDA_VISIBLE_DEVICES=0,1,2,3 +# GPU training Support single card and multi-card training, specify the card number through --gpus # Training icdar15 English data and saving the log as train_rec.log -python3 tools/train.py -c configs/rec/rec_icdar15_train.yml 2>&1 | tee train_rec.log +python3 -m paddle.distributed.launch --gpus '0,1,2,3' tools/train.py -c configs/rec/rec_icdar15_train.yml ``` - Data Augmentation @@ -199,39 +198,69 @@ If the evaluation set is large, the test will be time-consuming. It is recommend | rec_r34_vd_tps_bilstm_ctc.yml | STARNet | Resnet34_vd | tps | BiLSTM | ctc | For training Chinese data, it is recommended to use -训练中文数据,推荐使用[rec_chinese_lite_train_v1.1.yml](../../configs/rec/ch_ppocr_v1.1/rec_chinese_lite_train_v1.1.yml). If you want to try the result of other algorithms on the Chinese data set, please refer to the following instructions to modify the configuration file: +[rec_chinese_lite_train_v1.1.yml](../../configs/rec/ch_ppocr_v1.1/rec_chinese_lite_train_v1.1.yml). If you want to try the result of other algorithms on the Chinese data set, please refer to the following instructions to modify the configuration file: co -Take `rec_mv3_none_none_ctc.yml` as an example: +Take `rec_chinese_lite_train_v1.1.yml` as an example: ``` Global: ... - # Modify image_shape to fit long text - image_shape: [3, 32, 320] - ... + # Add a custom dictionary, such as modify the dictionary, please point the path to the new dictionary + character_dict_path: ppocr/utils/ppocr_keys_v1.txt # Modify character type character_type: ch - # Add a custom dictionary, such as modify the dictionary, please point the path to the new dictionary - character_dict_path: ./ppocr/utils/ppocr_keys_v1.txt ... - # Modify reader type - reader_yml: ./configs/rec/rec_chinese_reader.yml - # Whether to use data augmentation - distort: true # Whether to recognize spaces - use_space_char: true - ... + use_space_char: False -... Optimizer: ... # Add learning rate decay strategy - decay: - function: cosine_decay - # Each epoch contains iter number - step_each_epoch: 20 - # Total epoch number - total_epoch: 1000 + lr: + name: Cosine + learning_rate: 0.001 + ... + +... + +Train: + dataset: + # Type of dataset,we support LMDBDateSet and SimpleDataSet + name: SimpleDataSet + # Path of dataset + data_dir: ./train_data/ + # Path of train list + label_file_list: ["./train_data/train_list.txt"] + transforms: + ... + - RecResizeImg: + # Modify image_shape to fit long text + image_shape: [3, 32, 320] + ... + loader: + ... + # Train batch_size for Single card + batch_size_per_card: 256 + ... + +Eval: + dataset: + # Type of dataset,we support LMDBDateSet and SimpleDataSet + name: SimpleDataSet + # Path of dataset + data_dir: ./train_data + # Path of eval list + label_file_list: ["./train_data/val_list.txt"] + transforms: + ... + - RecResizeImg: + # Modify image_shape to fit long text + image_shape: [3, 32, 320] + ... + loader: + # Eval batch_size for Single card + batch_size_per_card: 256 + ... ``` **Note that the configuration file for prediction/evaluation must be consistent with the training.** @@ -257,18 +286,33 @@ Take `rec_french_lite_train` as an example: ``` Global: ... - # Add a custom dictionary, if you modify the dictionary - # please point the path to the new dictionary + # Add a custom dictionary, such as modify the dictionary, please point the path to the new dictionary character_dict_path: ./ppocr/utils/dict/french_dict.txt - # Add data augmentation during training - distort: true - # Identify spaces - use_space_char: true - ... - # Modify reader type - reader_yml: ./configs/rec/multi_languages/rec_french_reader.yml ... + # Whether to recognize spaces + use_space_char: False + ... + +Train: + dataset: + # Type of dataset,we support LMDBDateSet and SimpleDataSet + name: SimpleDataSet + # Path of dataset + data_dir: ./train_data/ + # Path of train list + label_file_list: ["./train_data/french_train.txt"] + ... + +Eval: + dataset: + # Type of dataset,we support LMDBDateSet and SimpleDataSet + name: SimpleDataSet + # Path of dataset + data_dir: ./train_data + # Path of eval list + label_file_list: ["./train_data/french_val.txt"] + ... ``` @@ -277,9 +321,8 @@ Global: The evaluation data set can be modified via `configs/rec/rec_icdar15_reader.yml` setting of `label_file_path` in EvalReader. ``` -export CUDA_VISIBLE_DEVICES=0 # GPU evaluation, Global.checkpoints is the weight to be tested -python3 tools/eval.py -c configs/rec/rec_icdar15_reader.yml -o Global.checkpoints={path/to/weights}/best_accuracy +python3 --gpus '0' tools/eval.py -c configs/rec/rec_icdar15_reader.yml -o Global.checkpoints={path/to/weights}/best_accuracy ``` -- GitLab