Merge branch 'dygraph' of https://github.com/PaddlePaddle/PaddleOCR into fix_cpp

58ef7acb · LDOUBLEV · 38339287 · 5c664bf4 · 58ef7acb · 58ef7acb
31 changed file
--- a/configs/rec/rec_mtb_nrtr.yml
+++ b/configs/rec/rec_mtb_nrtr.yml
+Global:
+  use_gpu: True
+  epoch_num: 21
+  log_smooth_window: 20
+  print_batch_step: 10
+  save_model_dir: ./output/rec/nrtr/
+  save_epoch_step: 1
+  # evaluation is run every 2000 iterations
+  eval_batch_step: [0, 2000]
+  cal_metric_during_train: True
+  pretrained_model:
+  checkpoints: 
+  save_inference_dir:
+  use_visualdl: False
+  infer_img: doc/imgs_words_en/word_10.png
+  # for data or label process
+  character_dict_path: 
+  character_type: EN_symbol
+  max_text_length: 25
+  infer_mode: False
+  use_space_char: True
+  save_res_path: ./output/rec/predicts_nrtr.txt
+
+Optimizer:
+  name: Adam
+  beta1: 0.9
+  beta2: 0.99
+  clip_norm: 5.0
+  lr:
+    name: Cosine
+    learning_rate: 0.0005
+    warmup_epoch: 2
+  regularizer:
+    name: 'L2'
+    factor: 0.
+
+Architecture:
+  model_type: rec
+  algorithm: NRTR
+  in_channels: 1
+  Transform:
+  Backbone:
+    name: MTB
+    cnn_num: 2
+  Head:
+    name: Transformer
+    d_model: 512
+    num_encoder_layers: 6
+    beam_size: 10 # When Beam size is greater than 0, it means to use beam search when evaluation.
+    
+
+Loss:
+  name: NRTRLoss
+  smoothing: True
+
+PostProcess:
+  name: NRTRLabelDecode
+
+Metric:
+  name: RecMetric
+  main_indicator: acc
+
+Train:
+  dataset:
+    name: LMDBDataSet
+    data_dir: ./train_data/data_lmdb_release/training/
+    transforms:
+      - NRTRDecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - NRTRLabelEncode: # Class handling label
+      - NRTRRecResizeImg:
+          image_shape: [100, 32]
+          resize_type: PIL # PIL or OpenCV
+      - KeepKeys:
+          keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
+  loader:
+    shuffle: True
+    batch_size_per_card: 512
+    drop_last: True
+    num_workers: 8
+
+Eval:
+  dataset:
+    name: LMDBDataSet
+    data_dir: ./train_data/data_lmdb_release/evaluation/
+    transforms:
+      - NRTRDecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - NRTRLabelEncode: # Class handling label
+      - NRTRRecResizeImg:
+          image_shape: [100, 32]
+          resize_type: PIL # PIL or OpenCV
+      - KeepKeys:
+          keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
+  loader:
+    shuffle: False
+    drop_last: False
+    batch_size_per_card: 256
+    num_workers: 1
+    use_shared_memory: False
--- a/deploy/cpp_infer/CMakeLists.txt
+++ b/deploy/cpp_infer/CMakeLists.txt
@@ -206,6 +206,10 @@ endif()

 set(DEPS ${DEPS} ${OpenCV_LIBS})

+include(ExternalProject)
+include(external-cmake/auto-log.cmake)
+include_directories(${CMAKE_CURRENT_BINARY_DIR}/autolog/src/extern_Autolog/auto_log)
+
 AUX_SOURCE_DIRECTORY(./src SRCS)
 add_executable(${DEMO_NAME} ${SRCS})


--- a/deploy/cpp_infer/external-cmake/auto-log.cmake
+++ b/deploy/cpp_infer/external-cmake/auto-log.cmake
+find_package(Git REQUIRED)
+message("${CMAKE_BUILD_TYPE}")
+
+set(AUTOLOG_REPOSITORY     https://github.com/LDOUBLEV/AutoLog.git)
+SET(AUTOLOG_INSTALL_DIR   ${CMAKE_CURRENT_BINARY_DIR}/install/Autolog)
+
+ExternalProject_Add(
+    extern_Autolog
+    PREFIX autolog
+    GIT_REPOSITORY ${AUTOLOG_REPOSITORY}
+    GIT_TAG main
+    DOWNLOAD_NO_EXTRACT True
+    INSTALL_COMMAND cmake -E echo "Skipping install step."
+)
--- a/deploy/cpp_infer/src/main.cpp
+++ b/deploy/cpp_infer/src/main.cpp
@@ -39,8 +39,8 @@
 DEFINE_bool(use_gpu, false, "Infering with GPU or CPU.");
 DEFINE_int32(gpu_id, 0, "Device id of GPU to execute.");
 DEFINE_int32(gpu_mem, 4000, "GPU id when infering with GPU.");
-DEFINE_int32(cpu_math_library_num_threads, 10, "Num of threads with CPU.");
-DEFINE_bool(use_mkldnn, false, "Whether use mkldnn with CPU.");
+DEFINE_int32(cpu_threads, 10, "Num of threads with CPU.");
+DEFINE_bool(enable_mkldnn, false, "Whether use mkldnn with CPU.");
 DEFINE_bool(use_tensorrt, false, "Whether use tensorrt.");
 DEFINE_string(precision, "fp32", "Precision be one of fp32/fp16/int8");
 DEFINE_bool(benchmark, true, "Whether use benchmark.");
@@ -60,6 +60,7 @@ DEFINE_string(cls_model_dir, "", "Path of cls inference model.");
 DEFINE_double(cls_thresh, 0.9, "Threshold of cls_thresh.");
 // recognition related
 DEFINE_string(rec_model_dir, "", "Path of rec inference model.");
+DEFINE_int32(rec_batch_num, 1, "rec_batch_num.");
 DEFINE_string(char_list_file, "../../ppocr/utils/ppocr_keys_v1.txt", "Path of dictionary.");


@@ -68,34 +69,6 @@ using namespace cv;
 using namespace PaddleOCR;


-void PrintBenchmarkLog(std::string model_name, 
-                       int batch_size, 
-                       std::string input_shape,
-                       std::vector<double> time_info,
-                       int img_num){
-  LOG(INFO) << "----------------------- Config info -----------------------";
-  LOG(INFO) << "runtime_device: " << (FLAGS_use_gpu ? "gpu" : "cpu");
-  LOG(INFO) << "ir_optim: " << "True";
-  LOG(INFO) << "enable_memory_optim: " << "True";
-  LOG(INFO) << "enable_tensorrt: " << FLAGS_use_tensorrt;
-  LOG(INFO) << "enable_mkldnn: " << (FLAGS_use_mkldnn ? "True" : "False");
-  LOG(INFO) << "cpu_math_library_num_threads: " << FLAGS_cpu_math_library_num_threads;
-  LOG(INFO) << "----------------------- Data info -----------------------";
-  LOG(INFO) << "batch_size: " << batch_size;
-  LOG(INFO) << "input_shape: " << input_shape;
-  LOG(INFO) << "data_num: " << img_num;
-  LOG(INFO) << "----------------------- Model info -----------------------";
-  LOG(INFO) << "model_name: " << model_name;
-  LOG(INFO) << "precision: " << FLAGS_precision;
-  LOG(INFO) << "----------------------- Perf info ------------------------";
-  LOG(INFO) << "Total time spent(ms): "
-            << std::accumulate(time_info.begin(), time_info.end(), 0);
-  LOG(INFO) << "preprocess_time(ms): " << time_info[0] / img_num
-            << ", inference_time(ms): " << time_info[1] / img_num
-            << ", postprocess_time(ms): " << time_info[2] / img_num;
-}
-
-
 static bool PathExists(const std::string& path){
 #ifdef _WIN32
  struct _stat buffer;
@@ -110,8 +83,8 @@ static bool PathExists(const std::string& path){
 int main_det(std::vector<cv::String> cv_all_img_names) {
    std::vector<double> time_info = {0, 0, 0};
    DBDetector det(FLAGS_det_model_dir, FLAGS_use_gpu, FLAGS_gpu_id,
-                   FLAGS_gpu_mem, FLAGS_cpu_math_library_num_threads, 
-                   FLAGS_use_mkldnn, FLAGS_max_side_len, FLAGS_det_db_thresh,
+                   FLAGS_gpu_mem, FLAGS_cpu_threads, 
+                   FLAGS_enable_mkldnn, FLAGS_max_side_len, FLAGS_det_db_thresh,
                   FLAGS_det_db_box_thresh, FLAGS_det_db_unclip_ratio,
                   FLAGS_use_polygon_score, FLAGS_visualize,
                   FLAGS_use_tensorrt, FLAGS_precision);
@@ -135,7 +108,17 @@ int main_det(std::vector<cv::String> cv_all_img_names) {
    }
    
    if (FLAGS_benchmark) {
-        PrintBenchmarkLog("det", 1, "dynamic", time_info, cv_all_img_names.size());
+        AutoLogger autolog("ocr_det", 
+                           FLAGS_use_gpu,
+                           FLAGS_use_tensorrt,
+                           FLAGS_enable_mkldnn,
+                           FLAGS_cpu_threads,
+                           1, 
+                           "dynamic", 
+                           FLAGS_precision, 
+                           time_info, 
+                           cv_all_img_names.size());
+        autolog.report();
    }
    return 0;
 }
@@ -144,8 +127,8 @@ int main_det(std::vector<cv::String> cv_all_img_names) {
 int main_rec(std::vector<cv::String> cv_all_img_names) {
    std::vector<double> time_info = {0, 0, 0};
    CRNNRecognizer rec(FLAGS_rec_model_dir, FLAGS_use_gpu, FLAGS_gpu_id,
-                       FLAGS_gpu_mem, FLAGS_cpu_math_library_num_threads,
-                       FLAGS_use_mkldnn, FLAGS_char_list_file,
+                       FLAGS_gpu_mem, FLAGS_cpu_threads,
+                       FLAGS_enable_mkldnn, FLAGS_char_list_file,
                       FLAGS_use_tensorrt, FLAGS_precision);

    for (int i = 0; i < cv_all_img_names.size(); ++i) {
@@ -165,18 +148,14 @@ int main_rec(std::vector<cv::String> cv_all_img_names) {
      time_info[2] += rec_times[2];
    }
    
-    if (FLAGS_benchmark) {
-        PrintBenchmarkLog("rec", 1, "dynamic", time_info, cv_all_img_names.size());
-    }
-    
    return 0;
 }


 int main_system(std::vector<cv::String> cv_all_img_names) {
    DBDetector det(FLAGS_det_model_dir, FLAGS_use_gpu, FLAGS_gpu_id,
-                   FLAGS_gpu_mem, FLAGS_cpu_math_library_num_threads, 
-                   FLAGS_use_mkldnn, FLAGS_max_side_len, FLAGS_det_db_thresh,
+                   FLAGS_gpu_mem, FLAGS_cpu_threads, 
+                   FLAGS_enable_mkldnn, FLAGS_max_side_len, FLAGS_det_db_thresh,
                   FLAGS_det_db_box_thresh, FLAGS_det_db_unclip_ratio,
                   FLAGS_use_polygon_score, FLAGS_visualize,
                   FLAGS_use_tensorrt, FLAGS_precision);
@@ -184,14 +163,14 @@ int main_system(std::vector<cv::String> cv_all_img_names) {
    Classifier *cls = nullptr;
    if (FLAGS_use_angle_cls) {
      cls = new Classifier(FLAGS_cls_model_dir, FLAGS_use_gpu, FLAGS_gpu_id,
-                           FLAGS_gpu_mem, FLAGS_cpu_math_library_num_threads,
-                           FLAGS_use_mkldnn, FLAGS_cls_thresh,
+                           FLAGS_gpu_mem, FLAGS_cpu_threads,
+                           FLAGS_enable_mkldnn, FLAGS_cls_thresh,
                           FLAGS_use_tensorrt, FLAGS_precision);
    }

    CRNNRecognizer rec(FLAGS_rec_model_dir, FLAGS_use_gpu, FLAGS_gpu_id,
-                       FLAGS_gpu_mem, FLAGS_cpu_math_library_num_threads,
-                       FLAGS_use_mkldnn, FLAGS_char_list_file,
+                       FLAGS_gpu_mem, FLAGS_cpu_threads,
+                       FLAGS_enable_mkldnn, FLAGS_char_list_file,
                       FLAGS_use_tensorrt, FLAGS_precision);

    auto start = std::chrono::system_clock::now();

--- a/doc/doc_ch/algorithm_overview.md
+++ b/doc/doc_ch/algorithm_overview.md
@@ -44,6 +44,7 @@ PaddleOCR基于动态图开源的文本识别算法列表：
 - [x]  STAR-Net([paper](http://www.bmva.org/bmvc/2016/papers/paper043/index.html))[11]
 - [x]  RARE([paper](https://arxiv.org/abs/1603.03915v1))[12]
 - [x]  SRN([paper](https://arxiv.org/abs/2003.12294))[5]
+- [x]  NRTR([paper](https://arxiv.org/abs/1806.00926v2))

 参考[DTRB][3](https://arxiv.org/abs/1904.01906)文字识别训练和评估流程，使用MJSynth和SynthText两个文字识别数据集训练，在IIIT, SVT, IC03, IC13, IC15, SVTP, CUTE数据集上进行评估，算法效果如下：

@@ -58,6 +59,7 @@ PaddleOCR基于动态图开源的文本识别算法列表：
 |RARE|MobileNetV3|82.5%|rec_mv3_tps_bilstm_att |[下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_mv3_tps_bilstm_att_v2.0_train.tar)|
 |RARE|Resnet34_vd|83.6%|rec_r34_vd_tps_bilstm_att |[下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_r34_vd_tps_bilstm_att_v2.0_train.tar)|
 |SRN|Resnet50_vd_fpn| 88.52% | rec_r50fpn_vd_none_srn | [下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_r50_vd_srn_train.tar) |
+|NRTR|NRTR_MTB| 84.3% | rec_mtb_nrtr | [下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_mtb_nrtr_train.tar) |


 PaddleOCR文本识别算法的训练和使用请参考文档教程中[模型训练/评估中的文本识别部分](./recognition.md)。
--- a/doc/doc_ch/recognition.md
+++ b/doc/doc_ch/recognition.md
@@ -185,11 +185,11 @@ python3 -m paddle.distributed.launch --gpus '0,1,2,3'  tools/train.py -c configs
 <a name="数据增强"></a>
 #### 2.1 数据增强

-PaddleOCR提供了多种数据增强方式，如果您希望在训练时加入扰动，请在配置文件中设置 `distort: true`。
+PaddleOCR提供了多种数据增强方式，默认配置文件中已经添加了数据增广。

-默认的扰动方式有：颜色空间转换(cvtColor)、模糊(blur)、抖动(jitter)、噪声(Gasuss noise)、随机切割(random crop)、透视(perspective)、颜色反转(reverse)。
+默认的扰动方式有：颜色空间转换(cvtColor)、模糊(blur)、抖动(jitter)、噪声(Gasuss noise)、随机切割(random crop)、透视(perspective)、颜色反转(reverse)、TIA数据增广。

-训练过程中每种扰动方式以50%的概率被选择，具体代码实现请参考：[img_tools.py](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/ppocr/data/rec/img_tools.py)
+训练过程中每种扰动方式以40%的概率被选择，具体代码实现请参考：[rec_img_aug.py](../../ppocr/data/imaug/rec_img_aug.py)

 *由于OpenCV的兼容性问题，扰动操作暂时只支持Linux*

@@ -215,6 +215,7 @@ PaddleOCR支持训练和评估交替进行, 可以在 `configs/rec/rec_icdar15_t
 | rec_mv3_tps_bilstm_att.yml |  CRNN |   Mobilenet_v3 |  TPS   |  BiLSTM |  att  |
 | rec_r34_vd_tps_bilstm_att.yml |  CRNN |   Resnet34_vd |  TPS   |  BiLSTM |  att  |
 | rec_r50fpn_vd_none_srn.yml    | SRN | Resnet50_fpn_vd    | None    | rnn | srn |
+| rec_mtb_nrtr.yml    | NRTR | nrtr_mtb    | None    | transformer encoder | transformer decoder |

 训练中文数据，推荐使用[rec_chinese_lite_train_v2.0.yml](../../configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml)，如您希望尝试其他算法在中文数据集上的效果，请参考下列说明修改配置文件：


--- a/doc/doc_en/algorithm_overview_en.md
+++ b/doc/doc_en/algorithm_overview_en.md
@@ -46,6 +46,7 @@ PaddleOCR open-source text recognition algorithms list:
 - [x]  STAR-Net([paper](http://www.bmva.org/bmvc/2016/papers/paper043/index.html))[11]
 - [x]  RARE([paper](https://arxiv.org/abs/1603.03915v1))[12]
 - [x]  SRN([paper](https://arxiv.org/abs/2003.12294))[5]
+- [x]  NRTR([paper](https://arxiv.org/abs/1806.00926v2))

 Refer to [DTRB](https://arxiv.org/abs/1904.01906), the training and evaluation result of these above text recognition (using MJSynth and SynthText for training, evaluate on IIIT, SVT, IC03, IC13, IC15, SVTP, CUTE) is as follow:

@@ -60,5 +61,6 @@ Refer to [DTRB](https://arxiv.org/abs/1904.01906), the training and evaluation r
 |RARE|MobileNetV3|82.5%|rec_mv3_tps_bilstm_att |[Download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_mv3_tps_bilstm_att_v2.0_train.tar)|
 |RARE|Resnet34_vd|83.6%|rec_r34_vd_tps_bilstm_att |[Download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_r34_vd_tps_bilstm_att_v2.0_train.tar)|
 |SRN|Resnet50_vd_fpn| 88.52% | rec_r50fpn_vd_none_srn |[Download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_r50_vd_srn_train.tar)|
+|NRTR|NRTR_MTB| 84.3% | rec_mtb_nrtr | [Download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_mtb_nrtr_train.tar) |

 Please refer to the document for training guide and use of PaddleOCR text recognition algorithms [Text recognition model training/evaluation/prediction](./recognition_en.md)
--- a/doc/doc_en/recognition_en.md
+++ b/doc/doc_en/recognition_en.md
@@ -177,11 +177,11 @@ python3 -m paddle.distributed.launch --gpus '0,1,2,3'  tools/train.py -c configs
 <a name="Data_Augmentation"></a>
 #### 2.1 Data Augmentation

-PaddleOCR provides a variety of data augmentation methods. If you want to add disturbance during training, please set `distort: true` in the configuration file.
+PaddleOCR provides a variety of data augmentation methods. All the augmentation methods are enabled by default.

-The default perturbation methods are: cvtColor, blur, jitter, Gasuss noise, random crop, perspective, color reverse.
+The default perturbation methods are: cvtColor, blur, jitter, Gasuss noise, random crop, perspective, color reverse, TIA augmentation.

-Each disturbance method is selected with a 50% probability during the training process. For specific code implementation, please refer to: [img_tools.py](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/ppocr/data/rec/img_tools.py)
+Each disturbance method is selected with a 40% probability during the training process. For specific code implementation, please refer to: [rec_img_aug.py](../../ppocr/data/imaug/rec_img_aug.py)

 <a name="Training"></a>
 #### 2.2 Training
@@ -207,7 +207,7 @@ If the evaluation set is large, the test will be time-consuming. It is recommend
 | rec_mv3_tps_bilstm_att.yml |  CRNN |   Mobilenet_v3 |  TPS   |  BiLSTM |  att  |
 | rec_r34_vd_tps_bilstm_att.yml |  CRNN |   Resnet34_vd |  TPS   |  BiLSTM |  att  |
 | rec_r50fpn_vd_none_srn.yml    | SRN | Resnet50_fpn_vd    | None    | rnn | srn |
-
+| rec_mtb_nrtr.yml    | NRTR | nrtr_mtb    | None    | transformer encoder | transformer decoder |

 For training Chinese data, it is recommended to use
 [rec_chinese_lite_train_v2.0.yml](../../configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml). If you want to try the result of other algorithms on the Chinese data set, please refer to the following instructions to modify the configuration file:

--- a/doc/joinus.PNG
+++ b/doc/joinus.PNG
--- a/ppocr/data/__init__.py
+++ b/ppocr/data/__init__.py
@@ -49,14 +49,12 @@ def term_mp(sig_num, frame):
    os.killpg(pgid, signal.SIGKILL)


-signal.signal(signal.SIGINT, term_mp)
-signal.signal(signal.SIGTERM, term_mp)
-
-
 def build_dataloader(config, mode, device, logger, seed=None):
    config = copy.deepcopy(config)

-    support_dict = ['SimpleDataSet', 'LMDBDataSet', 'PGDataSet', 'PubTabDataSet']
+    support_dict = [
+        'SimpleDataSet', 'LMDBDataSet', 'PGDataSet', 'PubTabDataSet'
+    ]
    module_name = config[mode]['dataset']['name']
    assert module_name in support_dict, Exception(
        'DataSet only support {}'.format(support_dict))
@@ -96,4 +94,8 @@ def build_dataloader(config, mode, device, logger, seed=None):
        return_list=True,
        use_shared_memory=use_shared_memory)

+    # support exit using ctrl+c
+    signal.signal(signal.SIGINT, term_mp)
+    signal.signal(signal.SIGTERM, term_mp)
+
    return data_loader
--- a/ppocr/data/imaug/__init__.py
+++ b/ppocr/data/imaug/__init__.py
@@ -21,7 +21,7 @@ from .make_border_map import MakeBorderMap
 from .make_shrink_map import MakeShrinkMap
 from .random_crop_data import EastRandomCropData, PSERandomCrop

-from .rec_img_aug import RecAug, RecResizeImg, ClsResizeImg, SRNRecResizeImg
+from .rec_img_aug import RecAug, RecResizeImg, ClsResizeImg, SRNRecResizeImg, NRTRRecResizeImg
 from .randaugment import RandAugment
 from .copy_paste import CopyPaste
 from .operators import *

--- a/ppocr/data/imaug/label_ops.py
+++ b/ppocr/data/imaug/label_ops.py
@@ -161,6 +161,34 @@ class BaseRecLabelEncode(object):
        return text_list


+class NRTRLabelEncode(BaseRecLabelEncode):
+    """ Convert between text-label and text-index """
+
+    def __init__(self,
+                 max_text_length,
+                 character_dict_path=None,
+                 character_type='EN_symbol',
+                 use_space_char=False,
+                 **kwargs):
+
+        super(NRTRLabelEncode,
+              self).__init__(max_text_length, character_dict_path,
+                             character_type, use_space_char)
+    def __call__(self, data):
+        text = data['label']
+        text = self.encode(text)
+        if text is None:
+            return None
+        data['length'] = np.array(len(text))
+        text.insert(0, 2)
+        text.append(3)
+        text = text + [0] * (self.max_text_len - len(text))
+        data['label'] = np.array(text)
+        return data
+    def add_special_char(self, dict_character):
+        dict_character = ['blank','<unk>','<s>','</s>'] + dict_character
+        return dict_character
+
 class CTCLabelEncode(BaseRecLabelEncode):
    """ Convert between text-label and text-index """


--- a/ppocr/data/imaug/operators.py
+++ b/ppocr/data/imaug/operators.py
@@ -57,6 +57,38 @@ class DecodeImage(object):
        return data


+class NRTRDecodeImage(object):
+    """ decode image """
+
+    def __init__(self, img_mode='RGB', channel_first=False, **kwargs):
+        self.img_mode = img_mode
+        self.channel_first = channel_first
+
+    def __call__(self, data):
+        img = data['image']
+        if six.PY2:
+            assert type(img) is str and len(
+                img) > 0, "invalid input 'img' in DecodeImage"
+        else:
+            assert type(img) is bytes and len(
+                img) > 0, "invalid input 'img' in DecodeImage"
+        img = np.frombuffer(img, dtype='uint8')
+
+        img = cv2.imdecode(img, 1)
+
+        if img is None:
+            return None
+        if self.img_mode == 'GRAY':
+            img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
+        elif self.img_mode == 'RGB':
+            assert img.shape[2] == 3, 'invalid shape of image[%s]' % (img.shape)
+            img = img[:, :, ::-1]
+        img = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
+        if self.channel_first:
+            img = img.transpose((2, 0, 1))
+        data['image'] = img
+        return data
+
 class NormalizeImage(object):
    """ normalize image such as substract mean, divide std
    """

--- a/ppocr/data/imaug/rec_img_aug.py
+++ b/ppocr/data/imaug/rec_img_aug.py
@@ -16,7 +16,7 @@ import math
 import cv2
 import numpy as np
 import random
-
+from PIL import Image
 from .text_image_aug import tia_perspective, tia_stretch, tia_distort


@@ -43,6 +43,25 @@ class ClsResizeImg(object):
        return data


+class NRTRRecResizeImg(object):
+    def __init__(self, image_shape, resize_type, **kwargs):
+        self.image_shape = image_shape
+        self.resize_type = resize_type
+
+    def __call__(self, data):
+        img = data['image']
+        if self.resize_type == 'PIL':
+            image_pil = Image.fromarray(np.uint8(img))
+            img = image_pil.resize(self.image_shape, Image.ANTIALIAS)
+            img = np.array(img)
+        if self.resize_type == 'OpenCV':
+            img = cv2.resize(img, self.image_shape)
+        norm_img = np.expand_dims(img, -1)
+        norm_img = norm_img.transpose((2, 0, 1))
+        data['image'] = norm_img.astype(np.float32) / 128. - 1.
+        return data
+
+
 class RecResizeImg(object):
    def __init__(self,
                 image_shape,

--- a/ppocr/losses/__init__.py
+++ b/ppocr/losses/__init__.py
@@ -25,7 +25,7 @@ from .det_sast_loss import SASTLoss
 from .rec_ctc_loss import CTCLoss
 from .rec_att_loss import AttentionLoss
 from .rec_srn_loss import SRNLoss
-
+from .rec_nrtr_loss import NRTRLoss
 # cls loss
 from .cls_loss import ClsLoss

@@ -44,8 +44,9 @@ from .table_att_loss import TableAttentionLoss
 def build_loss(config):
    support_dict = [
        'DBLoss', 'EASTLoss', 'SASTLoss', 'CTCLoss', 'ClsLoss', 'AttentionLoss',
-        'SRNLoss', 'PGLoss', 'CombinedLoss', 'TableAttentionLoss'
+        'SRNLoss', 'PGLoss', 'CombinedLoss', 'NRTRLoss', 'TableAttentionLoss'
    ]
+
    config = copy.deepcopy(config)
    module_name = config.pop('name')
    assert module_name in support_dict, Exception('loss only support {}'.format(

--- a/ppocr/losses/rec_nrtr_loss.py
+++ b/ppocr/losses/rec_nrtr_loss.py
+import paddle
+from paddle import nn
+import paddle.nn.functional as F
+
+
+class NRTRLoss(nn.Layer):
+    def __init__(self, smoothing=True, **kwargs):
+        super(NRTRLoss, self).__init__()
+        self.loss_func = nn.CrossEntropyLoss(reduction='mean', ignore_index=0)
+        self.smoothing = smoothing
+
+    def forward(self, pred, batch):
+        pred = pred.reshape([-1, pred.shape[2]])
+        max_len = batch[2].max()
+        tgt = batch[1][:, 1:2 + max_len]
+        tgt = tgt.reshape([-1])
+        if self.smoothing:
+            eps = 0.1
+            n_class = pred.shape[1]
+            one_hot = F.one_hot(tgt, pred.shape[1])
+            one_hot = one_hot * (1 - eps) + (1 - one_hot) * eps / (n_class - 1)
+            log_prb = F.log_softmax(pred, axis=1)
+            non_pad_mask = paddle.not_equal(
+                tgt, paddle.zeros(
+                    tgt.shape, dtype='int64'))
+            loss = -(one_hot * log_prb).sum(axis=1)
+            loss = loss.masked_select(non_pad_mask).mean()
+        else:
+            loss = self.loss_func(pred, tgt)
+        return {'loss': loss}
--- a/ppocr/metrics/rec_metric.py
+++ b/ppocr/metrics/rec_metric.py
@@ -57,3 +57,4 @@ class RecMetric(object):
        self.correct_num = 0
        self.all_num = 0
        self.norm_edit_dis = 0
+        
--- a/ppocr/modeling/architectures/base_model.py
+++ b/ppocr/modeling/architectures/base_model.py
@@ -14,7 +14,6 @@
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
-
 from paddle import nn
 from ppocr.modeling.transforms import build_transform
 from ppocr.modeling.backbones import build_backbone

--- a/ppocr/modeling/backbones/__init__.py
+++ b/ppocr/modeling/backbones/__init__.py
@@ -26,8 +26,9 @@ def build_backbone(config, model_type):
        from .rec_resnet_vd import ResNet
        from .rec_resnet_fpn import ResNetFPN
        from .rec_mv1_enhance import MobileNetV1Enhance
+        from .rec_nrtr_mtb import MTB
        support_dict = [
-            "MobileNetV1Enhance", "MobileNetV3", "ResNet", "ResNetFPN"
+            'MobileNetV1Enhance', 'MobileNetV3', 'ResNet', 'ResNetFPN', 'MTB'
        ]
    elif model_type == "e2e":
        from .e2e_resnet_vd_pg import ResNet

--- a/ppocr/modeling/backbones/rec_nrtr_mtb.py
+++ b/ppocr/modeling/backbones/rec_nrtr_mtb.py
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from paddle import nn
+
+
+class MTB(nn.Layer):
+    def __init__(self, cnn_num, in_channels):
+        super(MTB, self).__init__()
+        self.block = nn.Sequential()
+        self.out_channels = in_channels
+        self.cnn_num = cnn_num
+        if self.cnn_num == 2:
+            for i in range(self.cnn_num):
+                self.block.add_sublayer(
+                    'conv_{}'.format(i),
+                    nn.Conv2D(
+                        in_channels=in_channels
+                        if i == 0 else 32 * (2**(i - 1)),
+                        out_channels=32 * (2**i),
+                        kernel_size=3,
+                        stride=2,
+                        padding=1))
+                self.block.add_sublayer('relu_{}'.format(i), nn.ReLU())
+                self.block.add_sublayer('bn_{}'.format(i),
+                                        nn.BatchNorm2D(32 * (2**i)))
+
+    def forward(self, images):
+        x = self.block(images)
+        if self.cnn_num == 2:
+            # (b, w, h, c)
+            x = x.transpose([0, 3, 2, 1])
+            x_shape = x.shape
+            x = x.reshape([x_shape[0], x_shape[1], x_shape[2] * x_shape[3]])
+        return x
--- a/ppocr/modeling/heads/__init__.py
+++ b/ppocr/modeling/heads/__init__.py
@@ -26,12 +26,14 @@ def build_head(config):
    from .rec_ctc_head import CTCHead
    from .rec_att_head import AttentionHead
    from .rec_srn_head import SRNHead
+    from .rec_nrtr_head import Transformer

    # cls head
    from .cls_head import ClsHead
    support_dict = [
        'DBHead', 'EASTHead', 'SASTHead', 'CTCHead', 'ClsHead', 'AttentionHead',
-        'SRNHead', 'PGHead', 'TableAttentionHead']
+        'SRNHead', 'PGHead', 'Transformer', 'TableAttentionHead'
+    ]

    #table head
    from .table_att_head import TableAttentionHead

--- a/ppocr/modeling/heads/multiheadAttention.py
+++ b/ppocr/modeling/heads/multiheadAttention.py
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle
+from paddle import nn
+import paddle.nn.functional as F
+from paddle.nn import Linear
+from paddle.nn.initializer import XavierUniform as xavier_uniform_
+from paddle.nn.initializer import Constant as constant_
+from paddle.nn.initializer import XavierNormal as xavier_normal_
+
+zeros_ = constant_(value=0.)
+ones_ = constant_(value=1.)
+
+
+class MultiheadAttention(nn.Layer):
+    """Allows the model to jointly attend to information
+    from different representation subspaces.
+    See reference: Attention Is All You Need
+
+    .. math::
+        \text{MultiHead}(Q, K, V) = \text{Concat}(head_1,\dots,head_h)W^O
+        \text{where} head_i = \text{Attention}(QW_i^Q, KW_i^K, VW_i^V)
+
+    Args:
+        embed_dim: total dimension of the model
+        num_heads: parallel attention layers, or heads
+
+    """
+
+    def __init__(self,
+                 embed_dim,
+                 num_heads,
+                 dropout=0.,
+                 bias=True,
+                 add_bias_kv=False,
+                 add_zero_attn=False):
+        super(MultiheadAttention, self).__init__()
+        self.embed_dim = embed_dim
+        self.num_heads = num_heads
+        self.dropout = dropout
+        self.head_dim = embed_dim // num_heads
+        assert self.head_dim * num_heads == self.embed_dim, "embed_dim must be divisible by num_heads"
+        self.scaling = self.head_dim**-0.5
+        self.out_proj = Linear(embed_dim, embed_dim, bias_attr=bias)
+        self._reset_parameters()
+        self.conv1 = paddle.nn.Conv2D(
+            in_channels=embed_dim, out_channels=embed_dim, kernel_size=(1, 1))
+        self.conv2 = paddle.nn.Conv2D(
+            in_channels=embed_dim, out_channels=embed_dim, kernel_size=(1, 1))
+        self.conv3 = paddle.nn.Conv2D(
+            in_channels=embed_dim, out_channels=embed_dim, kernel_size=(1, 1))
+
+    def _reset_parameters(self):
+        xavier_uniform_(self.out_proj.weight)
+
+    def forward(self,
+                query,
+                key,
+                value,
+                key_padding_mask=None,
+                incremental_state=None,
+                need_weights=True,
+                static_kv=False,
+                attn_mask=None):
+        """
+        Inputs of forward function
+            query: [target length, batch size, embed dim]
+            key: [sequence length, batch size, embed dim]
+            value: [sequence length, batch size, embed dim]
+            key_padding_mask: if True, mask padding based on batch size
+            incremental_state: if provided, previous time steps are cashed
+            need_weights: output attn_output_weights
+            static_kv: key and value are static
+
+        Outputs of forward function
+            attn_output: [target length, batch size, embed dim]
+            attn_output_weights: [batch size, target length, sequence length]
+        """
+        tgt_len, bsz, embed_dim = query.shape
+        assert embed_dim == self.embed_dim
+        assert list(query.shape) == [tgt_len, bsz, embed_dim]
+        assert key.shape == value.shape
+
+        q = self._in_proj_q(query)
+        k = self._in_proj_k(key)
+        v = self._in_proj_v(value)
+        q *= self.scaling
+
+        q = q.reshape([tgt_len, bsz * self.num_heads, self.head_dim]).transpose(
+            [1, 0, 2])
+        k = k.reshape([-1, bsz * self.num_heads, self.head_dim]).transpose(
+            [1, 0, 2])
+        v = v.reshape([-1, bsz * self.num_heads, self.head_dim]).transpose(
+            [1, 0, 2])
+
+        src_len = k.shape[1]
+
+        if key_padding_mask is not None:
+            assert key_padding_mask.shape[0] == bsz
+            assert key_padding_mask.shape[1] == src_len
+
+        attn_output_weights = paddle.bmm(q, k.transpose([0, 2, 1]))
+        assert list(attn_output_weights.
+                    shape) == [bsz * self.num_heads, tgt_len, src_len]
+
+        if attn_mask is not None:
+            attn_mask = attn_mask.unsqueeze(0)
+            attn_output_weights += attn_mask
+        if key_padding_mask is not None:
+            attn_output_weights = attn_output_weights.reshape(
+                [bsz, self.num_heads, tgt_len, src_len])
+            key = key_padding_mask.unsqueeze(1).unsqueeze(2).astype('float32')
+            y = paddle.full(shape=key.shape, dtype='float32', fill_value='-inf')
+            y = paddle.where(key == 0., key, y)
+            attn_output_weights += y
+            attn_output_weights = attn_output_weights.reshape(
+                [bsz * self.num_heads, tgt_len, src_len])
+
+        attn_output_weights = F.softmax(
+            attn_output_weights.astype('float32'),
+            axis=-1,
+            dtype=paddle.float32 if attn_output_weights.dtype == paddle.float16
+            else attn_output_weights.dtype)
+        attn_output_weights = F.dropout(
+            attn_output_weights, p=self.dropout, training=self.training)
+
+        attn_output = paddle.bmm(attn_output_weights, v)
+        assert list(attn_output.
+                    shape) == [bsz * self.num_heads, tgt_len, self.head_dim]
+        attn_output = attn_output.transpose([1, 0, 2]).reshape(
+            [tgt_len, bsz, embed_dim])
+        attn_output = self.out_proj(attn_output)
+
+        if need_weights:
+            # average attention weights over heads
+            attn_output_weights = attn_output_weights.reshape(
+                [bsz, self.num_heads, tgt_len, src_len])
+            attn_output_weights = attn_output_weights.sum(
+                axis=1) / self.num_heads
+        else:
+            attn_output_weights = None
+        return attn_output, attn_output_weights
+
+    def _in_proj_q(self, query):
+        query = query.transpose([1, 2, 0])
+        query = paddle.unsqueeze(query, axis=2)
+        res = self.conv1(query)
+        res = paddle.squeeze(res, axis=2)
+        res = res.transpose([2, 0, 1])
+        return res
+
+    def _in_proj_k(self, key):
+        key = key.transpose([1, 2, 0])
+        key = paddle.unsqueeze(key, axis=2)
+        res = self.conv2(key)
+        res = paddle.squeeze(res, axis=2)
+        res = res.transpose([2, 0, 1])
+        return res
+
+    def _in_proj_v(self, value):
+        value = value.transpose([1, 2, 0])  #(1, 2, 0)
+        value = paddle.unsqueeze(value, axis=2)
+        res = self.conv3(value)
+        res = paddle.squeeze(res, axis=2)
+        res = res.transpose([2, 0, 1])
+        return res
--- a/ppocr/modeling/heads/rec_nrtr_head.py
+++ b/ppocr/modeling/heads/rec_nrtr_head.py
--- a/ppocr/postprocess/__init__.py
+++ b/ppocr/postprocess/__init__.py
@@ -24,18 +24,16 @@ __all__ = ['build_post_process']
 from .db_postprocess import DBPostProcess, DistillationDBPostProcess
 from .east_postprocess import EASTPostProcess
 from .sast_postprocess import SASTPostProcess
-from .rec_postprocess import CTCLabelDecode, AttnLabelDecode, SRNLabelDecode, DistillationCTCLabelDecode, \
+from .rec_postprocess import CTCLabelDecode, AttnLabelDecode, SRNLabelDecode, DistillationCTCLabelDecode, NRTRLabelDecode, \
    TableLabelDecode
 from .cls_postprocess import ClsPostProcess
 from .pg_postprocess import PGPostProcess

-
 def build_post_process(config, global_config=None):
    support_dict = [
        'DBPostProcess', 'EASTPostProcess', 'SASTPostProcess', 'CTCLabelDecode',
        'AttnLabelDecode', 'ClsPostProcess', 'SRNLabelDecode', 'PGPostProcess',
-        'DistillationCTCLabelDecode', 'TableLabelDecode',
-        'DistillationDBPostProcess'
+        'DistillationCTCLabelDecode', 'NRTRLabelDecode', 'TableLabelDecode', 'DistillationDBPostProcess'
    ]

    config = copy.deepcopy(config)

--- a/ppocr/postprocess/rec_postprocess.py
+++ b/ppocr/postprocess/rec_postprocess.py
@@ -156,6 +156,69 @@ class DistillationCTCLabelDecode(CTCLabelDecode):
        return output


+class NRTRLabelDecode(BaseRecLabelDecode):
+    """ Convert between text-label and text-index """
+
+    def __init__(self,
+                 character_dict_path=None,
+                 character_type='EN_symbol',
+                 use_space_char=True,
+                 **kwargs):
+        super(NRTRLabelDecode, self).__init__(character_dict_path,
+                                             character_type, use_space_char)
+
+    def __call__(self, preds, label=None, *args, **kwargs):
+        if preds.dtype == paddle.int64:
+            if isinstance(preds, paddle.Tensor):
+                preds = preds.numpy()
+            if preds[0][0]==2:
+                preds_idx = preds[:,1:]
+            else:
+                preds_idx = preds
+
+            text = self.decode(preds_idx)
+            if label is None:
+                return text
+            label = self.decode(label[:,1:])
+        else:
+            if isinstance(preds, paddle.Tensor):
+                preds = preds.numpy()
+            preds_idx = preds.argmax(axis=2)
+            preds_prob = preds.max(axis=2)
+            text = self.decode(preds_idx, preds_prob, is_remove_duplicate=False)
+            if label is None:
+                return text
+            label = self.decode(label[:,1:])
+        return text, label
+
+    def add_special_char(self, dict_character):
+        dict_character = ['blank','<unk>','<s>','</s>'] + dict_character
+        return dict_character
+    
+    def decode(self, text_index, text_prob=None, is_remove_duplicate=False):
+        """ convert text-index into text-label. """
+        result_list = []
+        batch_size = len(text_index)
+        for batch_idx in range(batch_size):
+            char_list = []
+            conf_list = []
+            for idx in range(len(text_index[batch_idx])):
+                if text_index[batch_idx][idx] == 3: # end
+                    break
+                try:
+                    char_list.append(self.character[int(text_index[batch_idx][idx])])
+                except:
+                    continue
+                if text_prob is not None:
+                    conf_list.append(text_prob[batch_idx][idx])
+                else:
+                    conf_list.append(1)
+            text = ''.join(char_list)
+            result_list.append((text.lower(), np.mean(conf_list)))
+        return result_list
+
+
+
 class AttnLabelDecode(BaseRecLabelDecode):
    """ Convert between text-label and text-index """

@@ -193,8 +256,7 @@ class AttnLabelDecode(BaseRecLabelDecode):
                    if idx > 0 and text_index[batch_idx][idx - 1] == text_index[
                            batch_idx][idx]:
                        continue
-                char_list.append(self.character[int(text_index[batch_idx][
-                    idx])])
+                char_list.append(self.character[int(text_index[batch_idx][idx])])
                if text_prob is not None:
                    conf_list.append(text_prob[batch_idx][idx])
                else:

--- a/tests/ocr_det_params.txt
+++ b/tests/ocr_det_params.txt
@@ -49,4 +49,19 @@ inference:tools/infer/predict_det.py
 --save_log_path:null
 --benchmark:True
 null:null
+===========================cpp_infer_params===========================
+use_opencv:True
+infer_model:./inference/ch_ppocr_mobile_v2.0_det_infer/
+infer_quant:False
+inference:./deploy/cpp_infer/build/ppocr det
+--use_gpu:True|False
+--enable_mkldnn:True|False
+--cpu_threads:1|6
+--rec_batch_num:1
+--use_tensorrt:False|True
+--precision:fp32|fp16
+--det_model_dir:
+--image_dir:./inference/ch_det_data_50/all-sum-510/
+--save_log_path:null
+--benchmark:True

--- a/tests/prepare.sh
+++ b/tests/prepare.sh
 #!/bin/bash
 FILENAME=$1
-# MODE be one of ['lite_train_infer' 'whole_infer' 'whole_train_infer', 'infer']
+# MODE be one of ['lite_train_infer' 'whole_infer' 'whole_train_infer', 'infer', 'cpp_infer']
 MODE=$2

 dataline=$(cat ${FILENAME})
@@ -59,7 +59,7 @@ elif [ ${MODE} = "whole_infer" ];then
    cd ./train_data/ && tar xf icdar2015_infer.tar && tar xf ic15_data.tar
    ln -s ./icdar2015_infer ./icdar2015
    cd ../
-else
+elif [ ${MODE} = "infer" ] || [ ${MODE} = "cpp_infer" ];then
    if [ ${model_name} = "ocr_det" ]; then
        eval_model_name="ch_ppocr_mobile_v2.0_det_infer"
        rm -rf ./train_data/icdar2015
@@ -79,3 +79,72 @@ else
    fi 
 fi

+if [ ${MODE} = "cpp_infer" ];then
+    cd deploy/cpp_infer
+    use_opencv=$(func_parser_value "${lines[52]}")
+    if [ ${use_opencv} = "True" ]; then
+        echo "################### build opencv ###################"
+        rm -rf 3.4.7.tar.gz opencv-3.4.7/
+        wget https://github.com/opencv/opencv/archive/3.4.7.tar.gz
+        tar -xf 3.4.7.tar.gz
+
+        cd opencv-3.4.7/
+        install_path=$(pwd)/opencv-3.4.7/opencv3
+
+        rm -rf build
+        mkdir build
+        cd build
+
+        cmake .. \
+            -DCMAKE_INSTALL_PREFIX=${install_path} \
+            -DCMAKE_BUILD_TYPE=Release \
+            -DBUILD_SHARED_LIBS=OFF \
+            -DWITH_IPP=OFF \
+            -DBUILD_IPP_IW=OFF \
+            -DWITH_LAPACK=OFF \
+            -DWITH_EIGEN=OFF \
+            -DCMAKE_INSTALL_LIBDIR=lib64 \
+            -DWITH_ZLIB=ON \
+            -DBUILD_ZLIB=ON \
+            -DWITH_JPEG=ON \
+            -DBUILD_JPEG=ON \
+            -DWITH_PNG=ON \
+            -DBUILD_PNG=ON \
+            -DWITH_TIFF=ON \
+            -DBUILD_TIFF=ON
+
+        make -j
+        make install
+        cd ../
+        echo "################### build opencv finished ###################"
+    fi
+
+
+    echo "################### build PaddleOCR demo ####################"
+    if [ ${use_opencv} = "True" ]; then
+        OPENCV_DIR=$(pwd)/opencv-3.4.7/opencv3/
+    else
+        OPENCV_DIR=''
+    fi
+    LIB_DIR=$(pwd)/Paddle/build/paddle_inference_install_dir/
+    CUDA_LIB_DIR=$(dirname `find /usr -name libcudart.so`)
+    CUDNN_LIB_DIR=$(dirname `find /usr -name libcudnn.so`)
+    
+    BUILD_DIR=build
+    rm -rf ${BUILD_DIR}
+    mkdir ${BUILD_DIR}
+    cd ${BUILD_DIR}
+    cmake .. \
+        -DPADDLE_LIB=${LIB_DIR} \
+        -DWITH_MKL=ON \
+        -DWITH_GPU=OFF \
+        -DWITH_STATIC_LIB=OFF \
+        -DWITH_TENSORRT=OFF \
+        -DOPENCV_DIR=${OPENCV_DIR} \
+        -DCUDNN_LIB=${CUDNN_LIB_DIR} \
+        -DCUDA_LIB=${CUDA_LIB_DIR} \
+        -DTENSORRT_DIR=${TENSORRT_DIR} \
+
+    make -j
+    echo "################### build PaddleOCR demo finished ###################"
+fi
\ No newline at end of file
--- a/tests/test.sh
+++ b/tests/test.sh
 #!/bin/bash
 FILENAME=$1
-# MODE be one of ['lite_train_infer' 'whole_infer' 'whole_train_infer', 'infer']
+# MODE be one of ['lite_train_infer' 'whole_infer' 'whole_train_infer', 'infer', 'cpp_infer']
 MODE=$2

 dataline=$(cat ${FILENAME})
@@ -145,6 +145,33 @@ benchmark_value=$(func_parser_value "${lines[49]}")
 infer_key1=$(func_parser_key "${lines[50]}")
 infer_value1=$(func_parser_value "${lines[50]}")

+if [ ${MODE} = "cpp_infer" ]; then
+    # parser cpp inference model 
+    cpp_infer_model_dir_list=$(func_parser_value "${lines[53]}")
+    cpp_infer_is_quant=$(func_parser_value "${lines[54]}")
+    # parser cpp inference 
+    inference_cmd=$(func_parser_value "${lines[55]}")
+    cpp_use_gpu_key=$(func_parser_key "${lines[56]}")
+    cpp_use_gpu_list=$(func_parser_value "${lines[56]}")
+    cpp_use_mkldnn_key=$(func_parser_key "${lines[57]}")
+    cpp_use_mkldnn_list=$(func_parser_value "${lines[57]}")
+    cpp_cpu_threads_key=$(func_parser_key "${lines[58]}")
+    cpp_cpu_threads_list=$(func_parser_value "${lines[58]}")
+    cpp_batch_size_key=$(func_parser_key "${lines[59]}")
+    cpp_batch_size_list=$(func_parser_value "${lines[59]}")
+    cpp_use_trt_key=$(func_parser_key "${lines[60]}")
+    cpp_use_trt_list=$(func_parser_value "${lines[60]}")
+    cpp_precision_key=$(func_parser_key "${lines[61]}")
+    cpp_precision_list=$(func_parser_value "${lines[61]}")
+    cpp_infer_model_key=$(func_parser_key "${lines[62]}")
+    cpp_image_dir_key=$(func_parser_key "${lines[63]}")
+    cpp_infer_img_dir=$(func_parser_value "${lines[63]}")
+    cpp_save_log_key=$(func_parser_key "${lines[64]}")
+    cpp_benchmark_key=$(func_parser_key "${lines[65]}")
+    cpp_benchmark_value=$(func_parser_value "${lines[65]}")
+fi
+
+
 LOG_PATH="./tests/output"
 mkdir -p ${LOG_PATH}
 status_log="${LOG_PATH}/results.log"
@@ -218,6 +245,71 @@ function func_inference(){
    done
 }

+function func_cpp_inference(){
+    IFS='|'
+    _script=$1
+    _model_dir=$2
+    _log_path=$3
+    _img_dir=$4
+    _flag_quant=$5
+    # inference 
+    for use_gpu in ${cpp_use_gpu_list[*]}; do
+        if [ ${use_gpu} = "False" ] || [ ${use_gpu} = "cpu" ]; then
+            for use_mkldnn in ${cpp_use_mkldnn_list[*]}; do
+                if [ ${use_mkldnn} = "False" ] && [ ${_flag_quant} = "True" ]; then
+                    continue
+                fi
+                for threads in ${cpp_cpu_threads_list[*]}; do
+                    for batch_size in ${cpp_batch_size_list[*]}; do
+                        _save_log_path="${_log_path}/cpp_infer_cpu_usemkldnn_${use_mkldnn}_threads_${threads}_batchsize_${batch_size}.log"
+                        set_infer_data=$(func_set_params "${cpp_image_dir_key}" "${_img_dir}")
+                        set_benchmark=$(func_set_params "${cpp_benchmark_key}" "${cpp_benchmark_value}")
+                        set_batchsize=$(func_set_params "${cpp_batch_size_key}" "${batch_size}")
+                        set_cpu_threads=$(func_set_params "${cpp_cpu_threads_key}" "${threads}")
+                        set_model_dir=$(func_set_params "${cpp_infer_model_key}" "${_model_dir}")
+                        command="${_script} ${cpp_use_gpu_key}=${use_gpu} ${cpp_use_mkldnn_key}=${use_mkldnn} ${set_cpu_threads} ${set_model_dir} ${set_batchsize} ${set_infer_data} ${set_benchmark} > ${_save_log_path} 2>&1 "
+                        eval $command
+                        last_status=${PIPESTATUS[0]}
+                        eval "cat ${_save_log_path}"
+                        status_check $last_status "${command}" "${status_log}"
+                    done
+                done
+            done
+        elif [ ${use_gpu} = "True" ] || [ ${use_gpu} = "gpu" ]; then
+            for use_trt in ${cpp_use_trt_list[*]}; do
+                for precision in ${cpp_precision_list[*]}; do
+                    if [[ ${_flag_quant} = "False" ]] && [[ ${precision} =~ "int8" ]]; then
+                        continue
+                    fi 
+                    if [[ ${precision} =~ "fp16" || ${precision} =~ "int8" ]] && [ ${use_trt} = "False" ]; then
+                        continue
+                    fi
+                    if [[ ${use_trt} = "False" || ${precision} =~ "int8" ]] && [ ${_flag_quant} = "True" ]; then
+                        continue
+                    fi
+                    for batch_size in ${cpp_batch_size_list[*]}; do
+                        _save_log_path="${_log_path}/cpp_infer_gpu_usetrt_${use_trt}_precision_${precision}_batchsize_${batch_size}.log"
+                        set_infer_data=$(func_set_params "${cpp_image_dir_key}" "${_img_dir}")
+                        set_benchmark=$(func_set_params "${cpp_benchmark_key}" "${cpp_benchmark_value}")
+                        set_batchsize=$(func_set_params "${cpp_batch_size_key}" "${batch_size}")
+                        set_tensorrt=$(func_set_params "${cpp_use_trt_key}" "${use_trt}")
+                        set_precision=$(func_set_params "${cpp_precision_key}" "${precision}")
+                        set_model_dir=$(func_set_params "${cpp_infer_model_key}" "${_model_dir}")
+                        command="${_script} ${cpp_use_gpu_key}=${use_gpu} ${set_tensorrt} ${set_precision} ${set_model_dir} ${set_batchsize} ${set_infer_data} ${set_benchmark} > ${_save_log_path} 2>&1 "
+                        eval $command
+                        last_status=${PIPESTATUS[0]}
+                        eval "cat ${_save_log_path}"
+                        status_check $last_status "${command}" "${status_log}"
+                        
+                    done
+                done
+            done
+        else
+            echo "Does not support hardware other than CPU and GPU Currently!"
+        fi
+    done
+}
+
 if [ ${MODE} = "infer" ]; then
    GPUID=$3
    if [ ${#GPUID} -le 0 ];then
@@ -252,6 +344,25 @@ if [ ${MODE} = "infer" ]; then
        Count=$(($Count + 1))
    done

+elif [ ${MODE} = "cpp_infer" ]; then
+    GPUID=$3
+    if [ ${#GPUID} -le 0 ];then
+        env=" "
+    else
+        env="export CUDA_VISIBLE_DEVICES=${GPUID}"
+    fi
+    # set CUDA_VISIBLE_DEVICES
+    eval $env
+    export Count=0
+    IFS="|"
+    infer_quant_flag=(${cpp_infer_is_quant})
+    for infer_model in ${cpp_infer_model_dir_list[*]}; do
+        #run inference
+        is_quant=${infer_quant_flag[Count]}
+        func_cpp_inference "${inference_cmd}" "${infer_model}" "${LOG_PATH}" "${cpp_infer_img_dir}" ${is_quant}
+        Count=$(($Count + 1))
+    done
+
 else
    IFS="|"
    export Count=0

--- a/tools/infer/predict_e2e.py
+++ b/tools/infer/predict_e2e.py
@@ -74,7 +74,7 @@ class TextE2E(object):

        self.preprocess_op = create_operators(pre_process_list)
        self.postprocess_op = build_post_process(postprocess_params)
-        self.predictor, self.input_tensor, self.output_tensors = utility.create_predictor(
+        self.predictor, self.input_tensor, self.output_tensors, _ = utility.create_predictor(
            args, 'e2e', logger)  # paddle.jit.load(args.det_model_dir)
        # self.predictor.eval()


--- a/tools/infer/predict_rec.py
+++ b/tools/infer/predict_rec.py
@@ -88,8 +88,8 @@ class TextRecognizer(object):
    def resize_norm_img(self, img, max_wh_ratio):
        imgC, imgH, imgW = self.rec_image_shape
        assert imgC == img.shape[2]
-        if self.character_type == "ch":
-            imgW = int((32 * max_wh_ratio))
+        max_wh_ratio = max(max_wh_ratio, imgW / imgH)
+        imgW = int((32 * max_wh_ratio))
        h, w = img.shape[:2]
        ratio = w / float(h)
        if math.ceil(imgH * ratio) > imgW:
@@ -278,7 +278,7 @@ def main(args):
    if args.warmup:
        img = np.random.uniform(0, 255, [32, 320, 3]).astype(np.uint8)
        for i in range(2):
-            res = text_recognizer([img])
+            res = text_recognizer([img] * int(args.rec_batch_num))

    for image_file in image_file_list:
        img, flag = check_and_read_gif(image_file)

--- a/tools/program.py
+++ b/tools/program.py
@@ -186,9 +186,11 @@ def train(config,
    model.train()

    use_srn = config['Architecture']['algorithm'] == "SRN"
-    try: 
+    use_nrtr = config['Architecture']['algorithm'] == "NRTR"
+
+    try:
        model_type = config['Architecture']['model_type']
-    except: 
+    except:
        model_type = None

    if 'start_epoch' in best_model_dict:
@@ -213,7 +215,7 @@ def train(config,
            images = batch[0]
            if use_srn:
                model_average = True
-            if use_srn or model_type == 'table':
+            if use_srn or model_type == 'table' or use_nrtr:
                preds = model(images, data=batch[1:])
            else:
                preds = model(images)
@@ -398,7 +400,7 @@ def preprocess(is_train=False):
    alg = config['Architecture']['algorithm']
    assert alg in [
        'EAST', 'DB', 'SAST', 'Rosetta', 'CRNN', 'STARNet', 'RARE', 'SRN',
-        'CLS', 'PGNet', 'Distillation', 'TableAttn'
+        'CLS', 'PGNet', 'Distillation', 'NRTR', 'TableAttn'
    ]

    device = 'gpu:{}'.format(dist.ParallelEnv().dev_id) if use_gpu else 'cpu'