From c269e1e42f79d33aa87d5e0ba49b488c56f97ae4 Mon Sep 17 00:00:00 2001
From: Topdu <784990967@qq.com>
Date: Mon, 30 May 2022 08:36:11 +0000
Subject: [PATCH] modify vitstr name
---
configs/rec/rec_mtb_nrtr.yml | 2 +-
...{rec_vitstr.yml => rec_vitstr_none_ce.yml} | 4 +--
doc/doc_ch/algorithm_overview.md | 2 +-
doc/doc_ch/algorithm_rec_vitstr.md | 26 +++++++++----------
doc/doc_en/algorithm_overview_en.md | 2 +-
doc/doc_en/algorithm_rec_vitstr_en.md | 14 +++++-----
ppocr/losses/__init__.py | 7 +++--
.../{rec_ce_smooth_loss.py => rec_ce_loss.py} | 4 +--
.../configs/rec_mtb_nrtr/rec_mtb_nrtr.yml | 2 +-
.../rec_mtb_nrtr/train_infer_python.txt | 4 +--
.../rec_vitstr_none_ce.yml} | 4 +--
.../train_infer_python.txt | 10 +++----
12 files changed, 40 insertions(+), 41 deletions(-)
rename configs/rec/{rec_vitstr.yml => rec_vitstr_none_ce.yml} (97%)
rename ppocr/losses/{rec_ce_smooth_loss.py => rec_ce_loss.py} (93%)
rename test_tipc/configs/{rec_vitstr/rec_vitstr.yml => rec_vitstr_none_ce/rec_vitstr_none_ce.yml} (97%)
rename test_tipc/configs/{rec_vitstr => rec_vitstr_none_ce}/train_infer_python.txt (82%)
diff --git a/configs/rec/rec_mtb_nrtr.yml b/configs/rec/rec_mtb_nrtr.yml
index dfe2cc98..40567488 100644
--- a/configs/rec/rec_mtb_nrtr.yml
+++ b/configs/rec/rec_mtb_nrtr.yml
@@ -49,7 +49,7 @@ Architecture:
Loss:
- name: CESmoothingLoss
+ name: CELoss
smoothing: True
PostProcess:
diff --git a/configs/rec/rec_vitstr.yml b/configs/rec/rec_vitstr_none_ce.yml
similarity index 97%
rename from configs/rec/rec_vitstr.yml
rename to configs/rec/rec_vitstr_none_ce.yml
index 005db018..065cc785 100644
--- a/configs/rec/rec_vitstr.yml
+++ b/configs/rec/rec_vitstr_none_ce.yml
@@ -3,7 +3,7 @@ Global:
epoch_num: 20
log_smooth_window: 20
print_batch_step: 10
- save_model_dir: ./output/rec/vitstr/
+ save_model_dir: ./output/rec/vitstr_none_ce/
save_epoch_step: 1
# evaluation is run every 2000 iterations after the 0th iteration#
eval_batch_step: [0, 50]
@@ -44,7 +44,7 @@ Architecture:
name: CTCHead
Loss:
- name: CESmoothingLoss
+ name: CELoss
smoothing: False
with_all: True
diff --git a/doc/doc_ch/algorithm_overview.md b/doc/doc_ch/algorithm_overview.md
index 934ac085..4196ec05 100755
--- a/doc/doc_ch/algorithm_overview.md
+++ b/doc/doc_ch/algorithm_overview.md
@@ -85,7 +85,7 @@
|SAR|Resnet31| 87.20% | rec_r31_sar | [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.1/rec/rec_r31_sar_train.tar) |
|SEED|Aster_Resnet| 85.35% | rec_resnet_stn_bilstm_att | [训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.1/rec/rec_resnet_stn_bilstm_att.tar) |
|SVTR|SVTR-Tiny| 89.25% | rec_svtr_tiny_none_ctc_en | [训练模型](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/rec_svtr_tiny_none_ctc_en_train.tar) |
-|ViTSTR|ViTSTR| 79.82% | rec_vitstr_none_ctc_en | [训练模型](https://paddleocr.bj.bcebos.com/rec_vitstr_none_none_train.tar) |
+|ViTSTR|ViTSTR| 79.82% | rec_vitstr_none_ce_en | [训练模型](https://paddleocr.bj.bcebos.com/rec_vitstr_none_ce_train.tar) |
diff --git a/doc/doc_ch/algorithm_rec_vitstr.md b/doc/doc_ch/algorithm_rec_vitstr.md
index bd83b8d9..ab12be72 100644
--- a/doc/doc_ch/algorithm_rec_vitstr.md
+++ b/doc/doc_ch/algorithm_rec_vitstr.md
@@ -27,7 +27,7 @@
|模型|骨干网络|配置文件|Acc|下载链接|
| --- | --- | --- | --- | --- |
-|ViTSTR|ViTSTR|[rec_vitstr.yml](../../configs/rec/rec_vitstr.yml)|79.82%|[训练模型](https://paddleocr.bj.bcebos.com/rec_vitstr_none_none_train.tar)|
+|ViTSTR|ViTSTR|[rec_vitstr_none_ce.yml](../../configs/rec/rec_vitstr_none_ce.yml)|79.82%|[训练模型](https://paddleocr.bj.bcebos.com/rec_vitstr_none_ce_train.tar)|
## 2. 环境配置
@@ -40,7 +40,7 @@
### 3.1 模型训练
-请参考[文本识别训练教程](./recognition.md)。PaddleOCR对代码进行了模块化,训练`ViTSTR`识别模型时需要**更换配置文件**为`ViTSTR`的[配置文件](../../configs/rec/rec_ViTSTR.yml)。
+请参考[文本识别训练教程](./recognition.md)。PaddleOCR对代码进行了模块化,训练`ViTSTR`识别模型时需要**更换配置文件**为`ViTSTR`的[配置文件](../../configs/rec/rec_vitstr_none_ce.yml)。
#### 启动训练
@@ -48,10 +48,10 @@
具体地,在完成数据准备后,便可以启动训练,训练命令如下:
```shell
#单卡训练(训练周期长,不建议)
-python3 tools/train.py -c configs/rec/rec_vitstr.yml
+python3 tools/train.py -c configs/rec/rec_vitstr_none_ce.yml
#多卡训练,通过--gpus参数指定卡号
-python3 -m paddle.distributed.launch --gpus '0,1,2,3' tools/train.py -c configs/rec/rec_vitstr.yml
+python3 -m paddle.distributed.launch --gpus '0,1,2,3' tools/train.py -c configs/rec/rec_vitstr_none_ce.yml
```
@@ -61,7 +61,7 @@ python3 -m paddle.distributed.launch --gpus '0,1,2,3' tools/train.py -c configs
```shell
# 注意将pretrained_model的路径设置为本地路径。
-python3 -m paddle.distributed.launch --gpus '0' tools/eval.py -c configs/rec/rec_vitstr.yml -o Global.pretrained_model=./rec_vitstr_train/best_accuracy
+python3 -m paddle.distributed.launch --gpus '0' tools/eval.py -c configs/rec/rec_vitstr_none_ce.yml -o Global.pretrained_model=./rec_vitstr_none_ce_train/best_accuracy
```
@@ -70,7 +70,7 @@ python3 -m paddle.distributed.launch --gpus '0' tools/eval.py -c configs/rec/rec
使用如下命令进行单张图片预测:
```shell
# 注意将pretrained_model的路径设置为本地路径。
-python3 tools/infer_rec.py -c configs/rec/rec_vitstr.yml -o Global.infer_img='./doc/imgs_words_en/word_10.png' Global.pretrained_model=./rec_vitstr_train/best_accuracy
+python3 tools/infer_rec.py -c configs/rec/rec_vitstr_none_ce.yml -o Global.infer_img='./doc/imgs_words_en/word_10.png' Global.pretrained_model=./rec_vitstr_none_ce_train/best_accuracy
# 预测文件夹下所有图像时,可修改infer_img为文件夹,如 Global.infer_img='./doc/imgs_words_en/'。
```
@@ -80,15 +80,15 @@ python3 tools/infer_rec.py -c configs/rec/rec_vitstr.yml -o Global.infer_img='./
### 4.1 Python推理
-首先将训练得到best模型,转换成inference model。这里以训练完成的模型为例([模型下载地址](https://paddleocr.bj.bcebos.com/rec_vitstr_none_none_train.tar) ),可以使用如下命令进行转换:
+首先将训练得到best模型,转换成inference model。这里以训练完成的模型为例([模型下载地址](https://paddleocr.bj.bcebos.com/rec_vitstr_none_ce_train.tar) ),可以使用如下命令进行转换:
```shell
# 注意将pretrained_model的路径设置为本地路径。
-python3 tools/export_model.py -c configs/rec/rec_vitstr.yml -o Global.pretrained_model=./rec_vitstr_train/best_accuracy Global.save_inference_dir=./inference/rec_vitstr/
+python3 tools/export_model.py -c configs/rec/rec_vitstr_none_ce.yml -o Global.pretrained_model=./rec_vitstr_none_ce_train/best_accuracy Global.save_inference_dir=./inference/rec_vitstr/
```
**注意:**
- 如果您是在自己的数据集上训练的模型,并且调整了字典文件,请注意修改配置文件中的`character_dict_path`是否是所需要的字典文件。
-- 如果您修改了训练时的输入大小,请修改`tools/export_model.py`文件中的对应NRTR的`infer_shape`。
+- 如果您修改了训练时的输入大小,请修改`tools/export_model.py`文件中的对应ViTSTR的`infer_shape`。
转换成功后,在目录下有三个文件:
```
@@ -110,20 +110,20 @@ python3 tools/infer/predict_rec.py --image_dir='./doc/imgs_words_en/word_10.png'
执行命令后,上面图像的预测结果(识别的文本和得分)会打印到屏幕上,示例如下:
结果如下:
```shell
-Predicts of ./doc/imgs_words_en/word_10.png:('pain', 0.9265879392623901)
+Predicts of ./doc/imgs_words_en/word_10.png:('pain', 0.9998350143432617)
```
**注意**:
- 训练上述模型采用的图像分辨率是[1,224,224],需要通过参数`rec_image_shape`设置为您训练时的识别图像形状。
- 在推理时需要设置参数`rec_char_dict_path`指定字典,如果您修改了字典,请修改该参数为您的字典文件。
-- 如果您修改了预处理方法,需修改`tools/infer/predict_rec.py`中NRTR的预处理为您的预处理方法。
+- 如果您修改了预处理方法,需修改`tools/infer/predict_rec.py`中ViTSTR的预处理为您的预处理方法。
### 4.2 C++推理部署
-由于C++预处理后处理还未支持NRTR,所以暂未支持
+由于C++预处理后处理还未支持ViTSTR,所以暂未支持
### 4.3 Serving服务化部署
@@ -139,7 +139,7 @@ Predicts of ./doc/imgs_words_en/word_10.png:('pain', 0.9265879392623901)
## 5. FAQ
1. 在`ViTSTR`论文中,使用在ImageNet1k上的预训练权重进行初始化训练,我们在训练未采用预训练权重,最终精度没有变化甚至有所提高。
-2. 我们仅仅复现了`ViTSTR`中的tiny版本,如果有需要使用small、base版本,可直接使用源开源repo中的预训练权重转为Paddle权重即可使用。
+2. 我们仅仅复现了`ViTSTR`中的tiny版本,如果需要使用small、base版本,可将[ViTSTR源repo](https://github.com/roatienza/deep-text-recognition-benchmark) 中的预训练权重转为Paddle权重使用。
## 引用
diff --git a/doc/doc_en/algorithm_overview_en.md b/doc/doc_en/algorithm_overview_en.md
index 213d9580..f2284542 100755
--- a/doc/doc_en/algorithm_overview_en.md
+++ b/doc/doc_en/algorithm_overview_en.md
@@ -84,7 +84,7 @@ Refer to [DTRB](https://arxiv.org/abs/1904.01906), the training and evaluation r
|SAR|Resnet31| 87.20% | rec_r31_sar | [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.1/rec/rec_r31_sar_train.tar) |
|SEED|Aster_Resnet| 85.35% | rec_resnet_stn_bilstm_att | [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.1/rec/rec_resnet_stn_bilstm_att.tar) |
|SVTR|SVTR-Tiny| 89.25% | rec_svtr_tiny_none_ctc_en | [trained model](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/rec_svtr_tiny_none_ctc_en_train.tar) |
-|ViTSTR|ViTSTR| 79.82% | rec_vitstr_none_ctc_en | [trained model](https://paddleocr.bj.bcebos.com/rec_vitstr_none_none_train.tar) |
+|ViTSTR|ViTSTR| 79.82% | rec_vitstr_none_ce_en | [trained model](https://paddleocr.bj.bcebos.com/rec_vitstr_none_none_train.tar) |
diff --git a/doc/doc_en/algorithm_rec_vitstr_en.md b/doc/doc_en/algorithm_rec_vitstr_en.md
index 666798c3..b6f26d39 100644
--- a/doc/doc_en/algorithm_rec_vitstr_en.md
+++ b/doc/doc_en/algorithm_rec_vitstr_en.md
@@ -25,7 +25,7 @@ Using MJSynth and SynthText two text recognition datasets for training, and eval
|Model|Backbone|config|Acc|Download link|
| --- | --- | --- | --- | --- |
-|ViTSTR|ViTSTR|[rec_vitstr.yml](../../configs/rec/rec_vitstr.yml)|79.82%|[训练模型](https://paddleocr.bj.bcebos.com/rec_vitstr_none_none_train.tar)|
+|ViTSTR|ViTSTR|[rec_vitstr_none_ce.yml](../../configs/rec/rec_vitstr_none_ce.yml)|79.82%|[训练模型](https://paddleocr.bj.bcebos.com/rec_vitstr_none_none_train.tar)|
## 2. Environment
@@ -43,24 +43,24 @@ Specifically, after the data preparation is completed, the training can be start
```
#Single GPU training (long training period, not recommended)
-python3 tools/train.py -c configs/rec/rec_vitstr.yml
+python3 tools/train.py -c configs/rec/rec_vitstr_none_ce.yml
#Multi GPU training, specify the gpu number through the --gpus parameter
-python3 -m paddle.distributed.launch --gpus '0,1,2,3' tools/train.py -c configs/rec/rec_vitstr.yml
+python3 -m paddle.distributed.launch --gpus '0,1,2,3' tools/train.py -c configs/rec/rec_vitstr_none_ce.yml
```
Evaluation:
```
# GPU evaluation
-python3 -m paddle.distributed.launch --gpus '0' tools/eval.py -c configs/rec/rec_vitstr.yml -o Global.pretrained_model={path/to/weights}/best_accuracy
+python3 -m paddle.distributed.launch --gpus '0' tools/eval.py -c configs/rec/rec_vitstr_none_ce.yml -o Global.pretrained_model={path/to/weights}/best_accuracy
```
Prediction:
```
# The configuration file used for prediction must match the training
-python3 tools/infer_rec.py -c configs/rec/rec_vitstr.yml -o Global.infer_img='./doc/imgs_words_en/word_10.png' Global.pretrained_model=./rec_vitstr_train/best_accuracy
+python3 tools/infer_rec.py -c configs/rec/rec_vitstr_none_ce.yml -o Global.infer_img='./doc/imgs_words_en/word_10.png' Global.pretrained_model=./rec_vitstr_none_ce_train/best_accuracy
```
@@ -71,7 +71,7 @@ python3 tools/infer_rec.py -c configs/rec/rec_vitstr.yml -o Global.infer_img='./
First, the model saved during the ViTSTR text recognition training process is converted into an inference model. ( [Model download link](https://paddleocr.bj.bcebos.com/rec_vitstr_none_none_train.tar)) ), you can use the following command to convert:
```
-python3 tools/export_model.py -c configs/rec/rec_vitstr.yml -o Global.pretrained_model=./rec_vitstr_train/best_accuracy Global.save_inference_dir=./inference/rec_vitstr
+python3 tools/export_model.py -c configs/rec/rec_vitstr_none_ce.yml -o Global.pretrained_model=./rec_vitstr_none_ce_train/best_accuracy Global.save_inference_dir=./inference/rec_vitstr
```
**Note:**
@@ -98,7 +98,7 @@ python3 tools/infer/predict_rec.py --image_dir='./doc/imgs_words_en/word_10.png'
After executing the command, the prediction result (recognized text and score) of the image above is printed to the screen, an example is as follows:
The result is as follows:
```shell
-Predicts of ./doc/imgs_words_en/word_10.png:('pain', 0.9265879392623901)
+Predicts of ./doc/imgs_words_en/word_10.png:('pain', 0.9998350143432617)
```
diff --git a/ppocr/losses/__init__.py b/ppocr/losses/__init__.py
index 6c4545eb..7bea87f6 100755
--- a/ppocr/losses/__init__.py
+++ b/ppocr/losses/__init__.py
@@ -30,7 +30,7 @@ from .det_fce_loss import FCELoss
from .rec_ctc_loss import CTCLoss
from .rec_att_loss import AttentionLoss
from .rec_srn_loss import SRNLoss
-from .rec_ce_smooth_loss import CESmoothingLoss
+from .rec_ce_loss import CELoss
from .rec_sar_loss import SARLoss
from .rec_aster_loss import AsterLoss
from .rec_pren_loss import PRENLoss
@@ -60,9 +60,8 @@ def build_loss(config):
support_dict = [
'DBLoss', 'PSELoss', 'EASTLoss', 'SASTLoss', 'FCELoss', 'CTCLoss',
'ClsLoss', 'AttentionLoss', 'SRNLoss', 'PGLoss', 'CombinedLoss',
- 'CESmoothingLoss', 'TableAttentionLoss', 'SARLoss', 'AsterLoss',
- 'SDMGRLoss', 'VQASerTokenLayoutLMLoss', 'LossFromOutput', 'PRENLoss',
- 'MultiLoss'
+ 'CELoss', 'TableAttentionLoss', 'SARLoss', 'AsterLoss', 'SDMGRLoss',
+ 'VQASerTokenLayoutLMLoss', 'LossFromOutput', 'PRENLoss', 'MultiLoss'
]
config = copy.deepcopy(config)
module_name = config.pop('name')
diff --git a/ppocr/losses/rec_ce_smooth_loss.py b/ppocr/losses/rec_ce_loss.py
similarity index 93%
rename from ppocr/losses/rec_ce_smooth_loss.py
rename to ppocr/losses/rec_ce_loss.py
index 22243ed4..b837ac27 100644
--- a/ppocr/losses/rec_ce_smooth_loss.py
+++ b/ppocr/losses/rec_ce_loss.py
@@ -3,9 +3,9 @@ from paddle import nn
import paddle.nn.functional as F
-class CESmoothingLoss(nn.Layer):
+class CELoss(nn.Layer):
def __init__(self, smoothing=True, with_all=False, **kwargs):
- super(CESmoothingLoss, self).__init__()
+ super(CELoss, self).__init__()
self.loss_func = nn.CrossEntropyLoss(reduction='mean', ignore_index=0)
self.smoothing = smoothing
self.with_all = with_all
diff --git a/test_tipc/configs/rec_mtb_nrtr/rec_mtb_nrtr.yml b/test_tipc/configs/rec_mtb_nrtr/rec_mtb_nrtr.yml
index 3936ab58..ba6728d8 100644
--- a/test_tipc/configs/rec_mtb_nrtr/rec_mtb_nrtr.yml
+++ b/test_tipc/configs/rec_mtb_nrtr/rec_mtb_nrtr.yml
@@ -49,7 +49,7 @@ Architecture:
Loss:
- name: CESmoothingLoss
+ name: CELoss
smoothing: True
PostProcess:
diff --git a/test_tipc/configs/rec_mtb_nrtr/train_infer_python.txt b/test_tipc/configs/rec_mtb_nrtr/train_infer_python.txt
index de6de5a0..20d984e3 100644
--- a/test_tipc/configs/rec_mtb_nrtr/train_infer_python.txt
+++ b/test_tipc/configs/rec_mtb_nrtr/train_infer_python.txt
@@ -40,10 +40,10 @@ infer_quant:False
inference:tools/infer/predict_rec.py --rec_char_dict_path=./ppocr/utils/EN_symbol_dict.txt --rec_image_shape="1,32,100" --rec_algorithm="NRTR"
--use_gpu:True|False
--enable_mkldnn:True|False
---cpu_threads:1|6
+--cpu_threads:6
--rec_batch_num:1|6
--use_tensorrt:True|False
---precision:fp32|int8
+--precision:fp32
--rec_model_dir:
--image_dir:./inference/rec_inference
--save_log_path:./test/output/
diff --git a/test_tipc/configs/rec_vitstr/rec_vitstr.yml b/test_tipc/configs/rec_vitstr_none_ce/rec_vitstr_none_ce.yml
similarity index 97%
rename from test_tipc/configs/rec_vitstr/rec_vitstr.yml
rename to test_tipc/configs/rec_vitstr_none_ce/rec_vitstr_none_ce.yml
index 427bce4b..ccd93c9e 100644
--- a/test_tipc/configs/rec_vitstr/rec_vitstr.yml
+++ b/test_tipc/configs/rec_vitstr_none_ce/rec_vitstr_none_ce.yml
@@ -3,7 +3,7 @@ Global:
epoch_num: 20
log_smooth_window: 20
print_batch_step: 10
- save_model_dir: ./output/rec/vitstr/
+ save_model_dir: ./output/rec/vitstr_none_ce/
save_epoch_step: 1
# evaluation is run every 2000 iterations after the 0th iteration#
eval_batch_step: [0, 2000]
@@ -43,7 +43,7 @@ Architecture:
name: CTCHead
Loss:
- name: CESmoothingLoss
+ name: CELoss
smoothing: False
with_all: True
diff --git a/test_tipc/configs/rec_vitstr/train_infer_python.txt b/test_tipc/configs/rec_vitstr_none_ce/train_infer_python.txt
similarity index 82%
rename from test_tipc/configs/rec_vitstr/train_infer_python.txt
rename to test_tipc/configs/rec_vitstr_none_ce/train_infer_python.txt
index 8b58e220..04c5742e 100644
--- a/test_tipc/configs/rec_vitstr/train_infer_python.txt
+++ b/test_tipc/configs/rec_vitstr_none_ce/train_infer_python.txt
@@ -13,7 +13,7 @@ train_infer_img_dir:./inference/rec_inference
null:null
##
trainer:norm_train
-norm_train:tools/train.py -c test_tipc/configs/rec_vitstr/rec_vitstr.yml -o
+norm_train:tools/train.py -c test_tipc/configs/rec_vitstr_none_ce/rec_vitstr_none_ce.yml -o
pact_train:null
fpgm_train:null
distill_train:null
@@ -21,21 +21,21 @@ null:null
null:null
##
===========================eval_params===========================
-eval:tools/eval.py -c test_tipc/configs/rec_vitstr/rec_vitstr.yml -o
+eval:tools/eval.py -c test_tipc/configs/rec_vitstr_none_ce/rec_vitstr_none_ce.yml -o
null:null
##
===========================infer_params===========================
Global.save_inference_dir:./output/
Global.checkpoints:
-norm_export:tools/export_model.py -c test_tipc/configs/rec_vitstr/rec_vitstr.yml -o
+norm_export:tools/export_model.py -c test_tipc/configs/rec_vitstr_none_ce/rec_vitstr_none_ce.yml -o
quant_export:null
fpgm_export:null
distill_export:null
export1:null
export2:null
##
-train_model:./inference/rec_vitstr_train/best_accuracy
-infer_export:tools/export_model.py -c test_tipc/configs/rec_vitstr/rec_vitstr.yml -o
+train_model:./inference/rec_vitstr_none_ce_train/best_accuracy
+infer_export:tools/export_model.py -c test_tipc/configs/rec_vitstr_none_ce/rec_vitstr_none_ce.yml -o
infer_quant:False
inference:tools/infer/predict_rec.py --rec_char_dict_path=./ppocr/utils/EN_symbol_dict.txt --rec_image_shape="1,224,224" --rec_algorithm="ViTSTR"
--use_gpu:True|False
--
GitLab