Merge branch 'dygraph' of https://github.com/PaddlePaddle/PaddleOCR into dygraph

0959949f · LDOUBLEV · a94389b6 · 17323716 · 0959949f · 0959949f
135 changed file
--- a/README.md
+++ b/README.md
@@ -27,7 +27,7 @@ PaddleOCR aims to create multilingual, awesome, leading, and practical OCR tools
 ## Recent updates
 - **🔥2022.8.24 Release PaddleOCR [release/2.6](https://github.com/PaddlePaddle/PaddleOCR/tree/release/2.6)**
-  - Release [PP-Structurev2](./ppstructure/)，with functions and performance fully upgraded, adapted to Chinese scenes, and new support for [Layout Recovery](./ppstructure/recovery) and **one line command to convert PDF to Word**;
+  - Release [PP-StructureV2](./ppstructure/)，with functions and performance fully upgraded, adapted to Chinese scenes, and new support for [Layout Recovery](./ppstructure/recovery) and **one line command to convert PDF to Word**;
  - [Layout Analysis](./ppstructure/layout) optimization: model storage reduced by 95%, while speed increased by 11 times, and the average CPU time-cost is only 41ms;
  - [Table Recognition](./ppstructure/table) optimization: 3 optimization strategies are designed, and the model accuracy is improved by 6% under comparable time consumption;
  - [Key Information Extraction](./ppstructure/kie) optimization：a visual-independent model structure is designed, the accuracy of semantic entity recognition is increased by 2.8%, and the accuracy of relation extraction is increased by 9.1%.
@@ -181,7 +181,7 @@ PaddleOCR support a variety of cutting-edge algorithms related to OCR, and devel
 </details>
 <details open>
-<summary>PP-Structurev2</summary>
+<summary>PP-StructureV2</summary>
 - layout analysis + table recognition  
 <div align="center">

--- a/README_ch.md
+++ b/README_ch.md
@@ -28,7 +28,7 @@ PaddleOCR旨在打造一套丰富、领先、且实用的OCR工具库，助力
 ## 近期更新
 - **🔥2022.8.24 发布 PaddleOCR [release/2.6](https://github.com/PaddlePaddle/PaddleOCR/tree/release/2.6)**
-  - 发布[PP-Structurev2](./ppstructure/)，系统功能性能全面升级，适配中文场景，新增支持[版面复原](./ppstructure/recovery)，支持**一行命令完成PDF转Word**；
+  - 发布[PP-StructureV2](./ppstructure/)，系统功能性能全面升级，适配中文场景，新增支持[版面复原](./ppstructure/recovery)，支持**一行命令完成PDF转Word**；
  - [版面分析](./ppstructure/layout)模型优化：模型存储减少95%，速度提升11倍，平均CPU耗时仅需41ms；
  - [表格识别](./ppstructure/table)模型优化：设计3大优化策略，预测耗时不变情况下，模型精度提升6%；
  - [关键信息抽取](./ppstructure/kie)模型优化：设计视觉无关模型结构，语义实体识别精度提升2.8%，关系抽取精度提升9.1%。

--- a/configs/det/det_mv3_db.yml
+++ b/configs/det/det_mv3_db.yml
 Global:
  use_gpu: true
  use_xpu: false
+  use_mlu: false
  epoch_num: 1200
  log_smooth_window: 20
  print_batch_step: 10

--- a/configs/det/det_r50_drrg_ctw.yml
+++ b/configs/det/det_r50_drrg_ctw.yml
+Global:
+  use_gpu: true
+  epoch_num: 1200
+  log_smooth_window: 20
+  print_batch_step: 5
+  save_model_dir: ./output/det_r50_drrg_ctw/
+  save_epoch_step: 100
+  # evaluation is run every 1260 iterations
+  eval_batch_step: [37800, 1260]
+  cal_metric_during_train: False
+  pretrained_model: ./pretrain_models/ResNet50_vd_ssld_pretrained.pdparams 
+  checkpoints: 
+  save_inference_dir: 
+  use_visualdl: False
+  infer_img: doc/imgs_en/img_10.jpg
+  save_res_path: ./output/det_drrg/predicts_drrg.txt
+Architecture:
+  model_type: det
+  algorithm: DRRG
+  Transform:
+  Backbone:
+    name: ResNet_vd
+    layers: 50
+  Neck:
+    name: FPN_UNet
+    in_channels: [256, 512, 1024, 2048]
+    out_channels: 32
+  Head:
+    name: DRRGHead
+    in_channels: 32
+    text_region_thr: 0.3
+    center_region_thr: 0.4
+Loss:
+  name: DRRGLoss
+Optimizer:
+  name: Momentum
+  momentum: 0.9
+  lr:
+    name: DecayLearningRate
+    learning_rate: 0.028
+    epochs: 1200
+    factor: 0.9
+    end_lr: 0.0000001
+  weight_decay: 0.0001
+PostProcess:
+  name: DRRGPostprocess
+  link_thr: 0.8
+Metric:
+  name: DetFCEMetric
+  main_indicator: hmean
+Train:
+  dataset:
+    name: SimpleDataSet
+    data_dir: ./train_data/ctw1500/imgs/
+    label_file_list: 
+      - ./train_data/ctw1500/imgs/training.txt
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+          ignore_orientation: True
+      - DetLabelEncode: # Class handling label
+      - ColorJitter: 
+          brightness: 0.12549019607843137
+          saturation: 0.5
+      - RandomScaling: 
+      - RandomCropFlip:
+          crop_ratio: 0.5
+      - RandomCropPolyInstances:
+          crop_ratio: 0.8
+          min_side_ratio: 0.3
+      - RandomRotatePolyInstances:
+          rotate_ratio: 0.5
+          max_angle: 60
+          pad_with_fixed_color: False
+      - SquareResizePad:
+          target_size: 800
+          pad_ratio: 0.6
+      - IaaAugment:
+          augmenter_args:
+            - { 'type': Fliplr, 'args': { 'p': 0.5 } }
+      - DRRGTargets:
+      - NormalizeImage:
+          scale: 1./255.
+          mean: [0.485, 0.456, 0.406]
+          std: [0.229, 0.224, 0.225]
+          order: 'hwc'
+      - ToCHWImage:
+      - KeepKeys:
+          keep_keys: ['image', 'gt_text_mask', 'gt_center_region_mask', 'gt_mask',
+            'gt_top_height_map', 'gt_bot_height_map', 'gt_sin_map',
+            'gt_cos_map', 'gt_comp_attribs'] # dataloader will return list in this order
+  loader:
+    shuffle: True
+    drop_last: False
+    batch_size_per_card: 4
+    num_workers: 8
+Eval:
+  dataset:
+    name: SimpleDataSet
+    data_dir: ./train_data/ctw1500/imgs/
+    label_file_list:
+      - ./train_data/ctw1500/imgs/test.txt
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+          ignore_orientation: True
+      - DetLabelEncode: # Class handling label
+      - DetResizeForTest:
+          limit_type: 'min'
+          limit_side_len: 640
+      - NormalizeImage:
+          scale: 1./255.
+          mean: [0.485, 0.456, 0.406]
+          std: [0.229, 0.224, 0.225]
+          order: 'hwc'
+      - Pad: 
+      - ToCHWImage:
+      - KeepKeys:
+          keep_keys: ['image', 'shape', 'polys', 'ignore_tags']
+  loader:
+    shuffle: False
+    drop_last: False
+    batch_size_per_card: 1 # must be 1
+    num_workers: 2
\ No newline at end of file
--- a/configs/kie/vi_layoutxlm/ser_vi_layoutxlm_xfund_zh_udml.yml
+++ b/configs/kie/vi_layoutxlm/ser_vi_layoutxlm_xfund_zh_udml.yml
@@ -70,16 +70,14 @@ Loss:
      mode: "l2"
      model_name_pairs:
        - ["Student", "Teacher"]
-      key: hidden_states
+      key: hidden_states_5
-      index: 5
      name: "loss_5"
  - DistillationVQADistanceLoss:
      weight: 0.5
      mode: "l2"
      model_name_pairs:
        - ["Student", "Teacher"]
-      key: hidden_states
+      key: hidden_states_8
-      index: 8
      name: "loss_8"
@@ -182,4 +180,3 @@ Eval:
    drop_last: False
    batch_size_per_card: 8
    num_workers: 4
--- a/configs/rec/rec_d28_can.yml
+++ b/configs/rec/rec_d28_can.yml
+Global:
+  use_gpu: True
+  epoch_num: 240
+  log_smooth_window: 20
+  print_batch_step: 10
+  save_model_dir: ./output/rec/can/
+  save_epoch_step: 1
+  # evaluation is run every 1105 iterations (1 epoch)(batch_size = 8)
+  eval_batch_step: [0, 1105]
+  cal_metric_during_train: True
+  pretrained_model:
+  checkpoints:
+  save_inference_dir:
+  use_visualdl: False
+  infer_img: doc/datasets/crohme_demo/hme_00.jpg
+  # for data or label process
+  character_dict_path: ppocr/utils/dict/latex_symbol_dict.txt
+  max_text_length: 36
+  infer_mode: False
+  use_space_char: False
+  save_res_path: ./output/rec/predicts_can.txt
+Optimizer:
+  name: Momentum
+  momentum: 0.9
+  clip_norm_global: 100.0
+  lr:
+    name: TwoStepCosine
+    learning_rate: 0.01
+    warmup_epoch: 1
+  weight_decay: 0.0001
+Architecture:
+  model_type: rec
+  algorithm: CAN
+  in_channels: 1
+  Transform:
+  Backbone:
+    name: DenseNet 
+    growthRate: 24
+    reduction: 0.5
+    bottleneck: True
+    use_dropout: True
+    input_channel: 1 
+  Head:
+    name: CANHead
+    in_channel: 684
+    out_channel: 111
+    max_text_length: 36
+    ratio: 16
+    attdecoder:
+      is_train: True
+      input_size: 256
+      hidden_size: 256
+      encoder_out_channel: 684
+      dropout: True
+      dropout_ratio: 0.5
+      word_num: 111
+      counting_decoder_out_channel: 111
+      attention:
+        attention_dim: 512
+        word_conv_kernel: 1
+Loss:
+  name: CANLoss
+PostProcess:
+  name: CANLabelDecode
+Metric:
+  name: CANMetric
+  main_indicator: exp_rate
+Train:
+  dataset:
+    name: SimpleDataSet
+    data_dir: ./train_data/CROHME/training/images/
+    label_file_list: ["./train_data/CROHME/training/labels.txt"]
+    transforms:
+      - DecodeImage:
+          channel_first: False
+      - NormalizeImage:
+          mean: [0,0,0]
+          std: [1,1,1]
+          order: 'hwc'
+      - GrayImageChannelFormat: 
+          inverse: True
+      - CANLabelEncode:
+          lower: False
+      - KeepKeys:
+          keep_keys: ['image', 'label']
+  loader:
+    shuffle: True
+    batch_size_per_card: 8
+    drop_last: False
+    num_workers: 4
+    collate_fn: DyMaskCollator
+Eval:
+  dataset:
+    name: SimpleDataSet
+    data_dir: ./train_data/CROHME/evaluation/images/
+    label_file_list: ["./train_data/CROHME/evaluation/labels.txt"]
+    transforms: 
+      - DecodeImage:
+          channel_first: False
+      - NormalizeImage:
+          mean: [0,0,0]
+          std: [1,1,1]
+          order: 'hwc'
+      - GrayImageChannelFormat:
+          inverse: True
+      - CANLabelEncode:
+          lower: False
+      - KeepKeys:
+          keep_keys: ['image', 'label']
+  loader:
+    shuffle: False
+    drop_last: False
+    batch_size_per_card: 1
+    num_workers: 4
+    collate_fn: DyMaskCollator
--- a/configs/rec/rec_mtb_nrtr.yml
+++ b/configs/rec/rec_mtb_nrtr.yml
@@ -82,7 +82,7 @@ Train:
 Eval:
  dataset:
    name: LMDBDataSet
-    data_dir: ./train_data/data_lmdb_release/evaluaiton/
+    data_dir: ./train_data/data_lmdb_release/evaluation/
    transforms:
      - DecodeImage: # load image
          img_mode: BGR

--- a/configs/rec/rec_resnet_rfl_att.yml
+++ b/configs/rec/rec_resnet_rfl_att.yml
+Global:
+  use_gpu: True
+  epoch_num: 6
+  log_smooth_window: 20
+  print_batch_step: 50
+  save_model_dir: ./output/rec/rec_resnet_rfl_att/
+  save_epoch_step: 1
+  # evaluation is run every 5000 iterations after the 4000th iteration
+  eval_batch_step: [0, 5000]
+  cal_metric_during_train: True
+  pretrained_model: ./pretrain_models/rec_resnet_rfl_visual/best_accuracy.pdparams 
+  checkpoints:
+  save_inference_dir:
+  use_visualdl: False
+  infer_img: doc/imgs_words_en/word_10.png
+  # for data or label process
+  character_dict_path:
+  max_text_length: 25
+  infer_mode: False
+  use_space_char: False
+  save_res_path: ./output/rec/rec_resnet_rfl.txt
+Optimizer:
+  name: AdamW
+  beta1: 0.9
+  beta2: 0.999
+  weight_decay: 0.0
+  clip_norm_global: 5.0
+  lr:
+    name: Piecewise
+    decay_epochs : [3, 4, 5]
+    values : [0.001, 0.0003, 0.00009, 0.000027]
+Architecture:
+  model_type: rec
+  algorithm: RFL
+  in_channels: 1
+  Transform:
+    name: TPS
+    num_fiducial: 20
+    loc_lr: 1.0
+    model_name: large
+  Backbone:
+    name: ResNetRFL
+    use_cnt: True
+    use_seq: True
+  Neck:
+    name: RFAdaptor
+    use_v2s: True
+    use_s2v: True
+  Head:
+    name: RFLHead  
+    in_channels: 512
+    hidden_size: 256
+    batch_max_legnth: 25
+    out_channels: 38
+    use_cnt: True
+    use_seq: True
+Loss:
+  name: RFLLoss
+  # ignore_index: 0
+PostProcess:
+  name: RFLLabelDecode
+Metric:
+  name: RecMetric
+  main_indicator: acc
+Train:
+  dataset:
+    name: LMDBDataSet
+    data_dir: ./train_data/data_lmdb_release/training
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - RFLLabelEncode: # Class handling label
+      - RFLRecResizeImg:
+          image_shape: [1, 32, 100]
+          padding: false
+          interpolation: 2
+      - KeepKeys:
+          keep_keys: ['image', 'label', 'length', 'cnt_label'] # dataloader will return list in this order
+  loader:
+    shuffle: True
+    batch_size_per_card: 64
+    drop_last: True
+    num_workers: 8
+Eval:
+  dataset:
+    name: LMDBDataSet
+    data_dir: ./train_data/data_lmdb_release/validation/
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - RFLLabelEncode: # Class handling label
+      - RFLRecResizeImg:
+          image_shape: [1, 32, 100]
+          padding: false
+          interpolation: 2
+      - KeepKeys:
+          keep_keys: ['image', 'label', 'length', 'cnt_label'] # dataloader will return list in this order
+  loader:
+    shuffle: False
+    drop_last: False
+    batch_size_per_card: 256
+    num_workers: 8
--- a/configs/rec/rec_resnet_rfl_visual.yml
+++ b/configs/rec/rec_resnet_rfl_visual.yml
+Global:
+  use_gpu: True
+  epoch_num: 6
+  log_smooth_window: 20
+  print_batch_step: 50
+  save_model_dir: ./output/rec/rec_resnet_rfl_visual/
+  save_epoch_step: 1
+  # evaluation is run every 5000 iterations after the 4000th iteration
+  eval_batch_step: [0, 5000]
+  cal_metric_during_train: False
+  pretrained_model:
+  checkpoints: 
+  save_inference_dir:
+  use_visualdl: False
+  infer_img: doc/imgs_words_en/word_10.png
+  # for data or label process
+  character_dict_path:
+  max_text_length: 25
+  infer_mode: False
+  use_space_char: False
+  save_res_path: ./output/rec/rec_resnet_rfl_visual.txt
+Optimizer:
+  name: AdamW
+  beta1: 0.9
+  beta2: 0.999
+  weight_decay: 0.0
+  clip_norm_global: 5.0
+  lr:
+    name: Piecewise
+    decay_epochs : [3, 4, 5]
+    values : [0.001, 0.0003, 0.00009, 0.000027]
+Architecture:
+  model_type: rec
+  algorithm: RFL
+  in_channels: 1
+  Transform:
+    name: TPS
+    num_fiducial: 20
+    loc_lr: 1.0
+    model_name: large
+  Backbone:
+    name: ResNetRFL
+    use_cnt: True
+    use_seq: False
+  Neck:
+    name: RFAdaptor
+    use_v2s: False
+    use_s2v: False
+  Head:
+    name: RFLHead  
+    in_channels: 512
+    hidden_size: 256
+    batch_max_legnth: 25
+    out_channels: 38
+    use_cnt: True
+    use_seq: False
+Loss:
+  name: RFLLoss
+PostProcess:
+  name: RFLLabelDecode
+Metric:
+  name: CNTMetric
+  main_indicator: acc
+Train:
+  dataset:
+    name: LMDBDataSet
+    data_dir: ./train_data/data_lmdb_release/training
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - RFLLabelEncode: # Class handling label
+      - RFLRecResizeImg:
+          image_shape: [1, 32, 100]
+          padding: false
+          interpolation: 2
+      - KeepKeys:
+          keep_keys: ['image', 'label', 'length', 'cnt_label'] # dataloader will return list in this order
+  loader:
+    shuffle: True
+    batch_size_per_card: 64
+    drop_last: True
+    num_workers: 8
+Eval:
+  dataset:
+    name: LMDBDataSet
+    data_dir: ./train_data/data_lmdb_release/evaluation
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - RFLLabelEncode: # Class handling label
+      - RFLRecResizeImg:
+          image_shape: [1, 32, 100]
+          padding: false
+          interpolation: 2
+      - KeepKeys:
+          keep_keys: ['image', 'label', 'length', 'cnt_label'] # dataloader will return list in this order
+  loader:
+    shuffle: False
+    drop_last: False
+    batch_size_per_card: 256
+    num_workers: 8
--- a/configs/sr/sr_telescope.yml
+++ b/configs/sr/sr_telescope.yml
+Global:
+  use_gpu: true
+  epoch_num: 100
+  log_smooth_window: 20
+  print_batch_step: 10
+  save_model_dir: ./output/sr/sr_telescope/
+  save_epoch_step: 3
+  # evaluation is run every 2000 iterations
+  eval_batch_step: [0, 1000]
+  cal_metric_during_train: False
+  pretrained_model:
+  checkpoints:
+  save_inference_dir:  ./output/sr/sr_telescope/infer
+  use_visualdl: False
+  infer_img: doc/imgs_words_en/word_52.png
+  # for data or label process
+  character_dict_path:
+  max_text_length: 100
+  infer_mode: False
+  use_space_char: False
+  save_res_path: ./output/sr/predicts_telescope.txt
+Optimizer:
+  name: Adam
+  beta1: 0.5
+  beta2: 0.999
+  clip_norm: 0.25
+  lr:
+    learning_rate: 0.0001
+Architecture:
+  model_type: sr
+  algorithm: Telescope
+  Transform:
+    name: TBSRN
+    STN: True
+    infer_mode: False
+Loss:
+  name: TelescopeLoss
+  confuse_dict_path: ./ppocr/utils/dict/confuse.pkl
+PostProcess:
+  name: None
+Metric:
+  name: SRMetric
+  main_indicator: all
+Train:
+  dataset:
+    name: LMDBDataSetSR
+    data_dir: ./train_data/TextZoom/train
+    transforms:
+      - SRResize:
+          imgH: 32
+          imgW: 128
+          down_sample_scale: 2
+      - KeepKeys:
+          keep_keys: ['img_lr', 'img_hr', 'label'] # dataloader will return list in this order
+  loader:
+    shuffle: False
+    batch_size_per_card: 16
+    drop_last: True
+    num_workers: 4
+Eval:
+  dataset:
+    name: LMDBDataSetSR
+    data_dir: ./train_data/TextZoom/test
+    transforms:
+      - SRResize:
+          imgH: 32
+          imgW: 128
+          down_sample_scale: 2
+      - KeepKeys:
+          keep_keys: ['img_lr', 'img_hr', 'label'] # dataloader will return list in this order
+  loader:
+    shuffle: False
+    drop_last: False
+    batch_size_per_card: 16
+    num_workers: 4
--- a/deploy/hubserving/kie_ser/__init__.py
+++ b/deploy/hubserving/kie_ser/__init__.py
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
\ No newline at end of file
--- a/deploy/hubserving/kie_ser/config.json
+++ b/deploy/hubserving/kie_ser/config.json
+{
+    "modules_info": {
+        "kie_ser": {
+            "init_args": {
+                "version": "1.0.0",
+                "use_gpu": true
+            },
+            "predict_args": {
+            }
+        }
+    },
+    "port": 8871,
+    "use_multiprocess": false,
+    "workers": 2
+}
--- a/deploy/hubserving/kie_ser/module.py
+++ b/deploy/hubserving/kie_ser/module.py
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import os
+import sys
+sys.path.insert(0, ".")
+import copy
+import time
+import paddlehub
+from paddlehub.common.logger import logger
+from paddlehub.module.module import moduleinfo, runnable, serving
+import cv2
+import numpy as np
+import paddlehub as hub
+from tools.infer.utility import base64_to_cv2
+from ppstructure.kie.predict_kie_token_ser import SerPredictor
+from ppstructure.utility import parse_args
+from deploy.hubserving.kie_ser.params import read_params
+@moduleinfo(
+    name="kie_ser",
+    version="1.0.0",
+    summary="kie ser service",
+    author="paddle-dev",
+    author_email="paddle-dev@baidu.com",
+    type="cv/KIE_SER")
+class KIESer(hub.Module):
+    def _initialize(self, use_gpu=False, enable_mkldnn=False):
+        """
+        initialize with the necessary elements
+        """
+        cfg = self.merge_configs()
+        cfg.use_gpu = use_gpu
+        if use_gpu:
+            try:
+                _places = os.environ["CUDA_VISIBLE_DEVICES"]
+                int(_places[0])
+                print("use gpu: ", use_gpu)
+                print("CUDA_VISIBLE_DEVICES: ", _places)
+                cfg.gpu_mem = 8000
+            except:
+                raise RuntimeError(
+                    "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES via export CUDA_VISIBLE_DEVICES=cuda_device_id."
+                )
+        cfg.ir_optim = True
+        cfg.enable_mkldnn = enable_mkldnn
+        self.ser_predictor = SerPredictor(cfg)
+    def merge_configs(self, ):
+        # deafult cfg
+        backup_argv = copy.deepcopy(sys.argv)
+        sys.argv = sys.argv[:1]
+        cfg = parse_args()
+        update_cfg_map = vars(read_params())
+        for key in update_cfg_map:
+            cfg.__setattr__(key, update_cfg_map[key])
+        sys.argv = copy.deepcopy(backup_argv)
+        return cfg
+    def read_images(self, paths=[]):
+        images = []
+        for img_path in paths:
+            assert os.path.isfile(
+                img_path), "The {} isn't a valid file.".format(img_path)
+            img = cv2.imread(img_path)
+            if img is None:
+                logger.info("error in loading image:{}".format(img_path))
+                continue
+            images.append(img)
+        return images
+    def predict(self, images=[], paths=[]):
+        """
+        Get the chinese texts in the predicted images.
+        Args:
+            images (list(numpy.ndarray)): images data, shape of each is [H, W, C]. If images not paths
+            paths (list[str]): The paths of images. If paths not images
+        Returns:
+            res (list): The result of chinese texts and save path of images.
+        """
+        if images != [] and isinstance(images, list) and paths == []:
+            predicted_data = images
+        elif images == [] and isinstance(paths, list) and paths != []:
+            predicted_data = self.read_images(paths)
+        else:
+            raise TypeError("The input data is inconsistent with expectations.")
+        assert predicted_data != [], "There is not any image to be predicted. Please check the input data."
+        all_results = []
+        for img in predicted_data:
+            if img is None:
+                logger.info("error in loading image")
+                all_results.append([])
+                continue
+            starttime = time.time()
+            ser_res, _, elapse = self.ser_predictor(img)
+            elapse = time.time() - starttime
+            logger.info("Predict time: {}".format(elapse))
+            all_results.append(ser_res)
+        return all_results
+    @serving
+    def serving_method(self, images, **kwargs):
+        """
+        Run as a service.
+        """
+        images_decode = [base64_to_cv2(image) for image in images]
+        results = self.predict(images_decode, **kwargs)
+        return results
+if __name__ == '__main__':
+    ocr = OCRSystem()
+    ocr._initialize()
+    image_path = [
+        './doc/imgs/11.jpg',
+        './doc/imgs/12.jpg',
+    ]
+    res = ocr.predict(paths=image_path)
+    print(res)
--- a/deploy/hubserving/kie_ser/params.py
+++ b/deploy/hubserving/kie_ser/params.py
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from deploy.hubserving.ocr_system.params import read_params as pp_ocr_read_params
+class Config(object):
+    pass
+def read_params():
+    cfg = pp_ocr_read_params()
+    # SER params
+    cfg.kie_algorithm = "LayoutXLM"
+    cfg.use_visual_backbone = False
+    cfg.ser_model_dir = "./inference/ser_vi_layoutxlm_xfund_infer"
+    cfg.ser_dict_path = "train_data/XFUND/class_list_xfun.txt"
+    cfg.vis_font_path = "./doc/fonts/simfang.ttf"
+    cfg.ocr_order_method = "tb-yx"
+    return cfg
--- a/deploy/hubserving/kie_ser_re/__init__.py
+++ b/deploy/hubserving/kie_ser_re/__init__.py
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
\ No newline at end of file
--- a/deploy/hubserving/kie_ser_re/config.json
+++ b/deploy/hubserving/kie_ser_re/config.json
+{
+    "modules_info": {
+        "kie_ser_re": {
+            "init_args": {
+                "version": "1.0.0",
+                "use_gpu": true
+            },
+            "predict_args": {
+            }
+        }
+    },
+    "port": 8872,
+    "use_multiprocess": false,
+    "workers": 2
+}
--- a/deploy/hubserving/kie_ser_re/module.py
+++ b/deploy/hubserving/kie_ser_re/module.py
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import os
+import sys
+sys.path.insert(0, ".")
+import copy
+import time
+import paddlehub
+from paddlehub.common.logger import logger
+from paddlehub.module.module import moduleinfo, runnable, serving
+import cv2
+import numpy as np
+import paddlehub as hub
+from tools.infer.utility import base64_to_cv2
+from ppstructure.kie.predict_kie_token_ser_re import SerRePredictor
+from ppstructure.utility import parse_args
+from deploy.hubserving.kie_ser_re.params import read_params
+@moduleinfo(
+    name="kie_ser_re",
+    version="1.0.0",
+    summary="kie ser re service",
+    author="paddle-dev",
+    author_email="paddle-dev@baidu.com",
+    type="cv/KIE_SER_RE")
+class KIESerRE(hub.Module):
+    def _initialize(self, use_gpu=False, enable_mkldnn=False):
+        """
+        initialize with the necessary elements
+        """
+        cfg = self.merge_configs()
+        cfg.use_gpu = use_gpu
+        if use_gpu:
+            try:
+                _places = os.environ["CUDA_VISIBLE_DEVICES"]
+                int(_places[0])
+                print("use gpu: ", use_gpu)
+                print("CUDA_VISIBLE_DEVICES: ", _places)
+                cfg.gpu_mem = 8000
+            except:
+                raise RuntimeError(
+                    "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES via export CUDA_VISIBLE_DEVICES=cuda_device_id."
+                )
+        cfg.ir_optim = True
+        cfg.enable_mkldnn = enable_mkldnn
+        self.ser_re_predictor = SerRePredictor(cfg)
+    def merge_configs(self, ):
+        # deafult cfg
+        backup_argv = copy.deepcopy(sys.argv)
+        sys.argv = sys.argv[:1]
+        cfg = parse_args()
+        update_cfg_map = vars(read_params())
+        for key in update_cfg_map:
+            cfg.__setattr__(key, update_cfg_map[key])
+        sys.argv = copy.deepcopy(backup_argv)
+        return cfg
+    def read_images(self, paths=[]):
+        images = []
+        for img_path in paths:
+            assert os.path.isfile(
+                img_path), "The {} isn't a valid file.".format(img_path)
+            img = cv2.imread(img_path)
+            if img is None:
+                logger.info("error in loading image:{}".format(img_path))
+                continue
+            images.append(img)
+        return images
+    def predict(self, images=[], paths=[]):
+        """
+        Get the chinese texts in the predicted images.
+        Args:
+            images (list(numpy.ndarray)): images data, shape of each is [H, W, C]. If images not paths
+            paths (list[str]): The paths of images. If paths not images
+        Returns:
+            res (list): The result of chinese texts and save path of images.
+        """
+        if images != [] and isinstance(images, list) and paths == []:
+            predicted_data = images
+        elif images == [] and isinstance(paths, list) and paths != []:
+            predicted_data = self.read_images(paths)
+        else:
+            raise TypeError("The input data is inconsistent with expectations.")
+        assert predicted_data != [], "There is not any image to be predicted. Please check the input data."
+        all_results = []
+        for img in predicted_data:
+            if img is None:
+                logger.info("error in loading image")
+                all_results.append([])
+                continue
+            print(img.shape)
+            starttime = time.time()
+            re_res, _ = self.ser_re_predictor(img)
+            print(re_res)
+            elapse = time.time() - starttime
+            logger.info("Predict time: {}".format(elapse))
+            all_results.append(re_res)
+        return all_results
+    @serving
+    def serving_method(self, images, **kwargs):
+        """
+        Run as a service.
+        """
+        images_decode = [base64_to_cv2(image) for image in images]
+        results = self.predict(images_decode, **kwargs)
+        return results
+if __name__ == '__main__':
+    ocr = OCRSystem()
+    ocr._initialize()
+    image_path = [
+        './doc/imgs/11.jpg',
+        './doc/imgs/12.jpg',
+    ]
+    res = ocr.predict(paths=image_path)
+    print(res)
--- a/deploy/hubserving/kie_ser_re/params.py
+++ b/deploy/hubserving/kie_ser_re/params.py
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from deploy.hubserving.ocr_system.params import read_params as pp_ocr_read_params
+class Config(object):
+    pass
+def read_params():
+    cfg = pp_ocr_read_params()
+    # SER params
+    cfg.kie_algorithm = "LayoutXLM"
+    cfg.use_visual_backbone = False
+    cfg.ser_model_dir = "./inference/ser_vi_layoutxlm_xfund_infer"
+    cfg.re_model_dir = "./inference/re_vi_layoutxlm_xfund_infer"
+    cfg.ser_dict_path = "train_data/XFUND/class_list_xfun.txt"
+    cfg.vis_font_path = "./doc/fonts/simfang.ttf"
+    cfg.ocr_order_method = "tb-yx"
+    return cfg
--- a/deploy/hubserving/readme.md
+++ b/deploy/hubserving/readme.md
@@ -30,6 +30,8 @@ deploy/hubserving/
  └─  structure_layout  版面分析服务包
  └─  structure_table  表格识别服务包
  └─  structure_system  PP-Structure服务包
+  └─  kie_ser  关键信息抽取-SER服务包
+  └─  kie_ser_re  关键信息抽取-SER+RE服务包
 ```
 每个服务包下包含3个文件。以2阶段串联服务包为例，目录如下：
@@ -42,6 +44,7 @@ deploy/hubserving/ocr_system/
 ```
 ## 1. 近期更新
+* 2022.10.09 新增关键信息抽取服务。
 * 2022.08.23 新增版面分析服务。
 * 2022.05.05 新增PP-OCRv3检测和识别模型。
 * 2022.03.30 新增PP-Structure和表格识别两种服务。
@@ -57,12 +60,15 @@ pip3 install paddlehub==2.1.0 --upgrade -i https://mirror.baidu.com/pypi/simple
 ### 2.2 下载推理模型
 安装服务模块前，需要准备推理模型并放到正确路径。默认使用的是PP-OCRv3模型，默认模型路径为：
 ```
 检测模型：./inference/ch_PP-OCRv3_det_infer/
 识别模型：./inference/ch_PP-OCRv3_rec_infer/
 方向分类器：./inference/ch_ppocr_mobile_v2.0_cls_infer/
 版面分析模型：./inference/picodet_lcnet_x1_0_fgd_layout_infer/
 表格结构识别模型：./inference/ch_ppstructure_mobile_v2.0_SLANet_infer/
+关键信息抽取SER模型：./inference/ser_vi_layoutxlm_xfund_infer/
+关键信息抽取RE模型：./inference/re_vi_layoutxlm_xfund_infer/
 ```
 **模型路径可在`params.py`中查看和修改。** 更多模型可以从PaddleOCR提供的模型库[PP-OCR](../../doc/doc_ch/models_list.md)和[PP-Structure](../../ppstructure/docs/models_list.md)下载，也可以替换成自己训练转换好的模型。
@@ -92,6 +98,12 @@ hub install deploy/hubserving/structure_system/
 # 或，安装版面分析服务模块：  
 hub install deploy/hubserving/structure_layout/
+# 或，安装关键信息抽取SER服务模块：  
+hub install deploy/hubserving/kie_ser/
+# 或，安装关键信息抽取SER+RE服务模块：  
+hub install deploy/hubserving/kie_ser_re/
 ```
 * 在Windows环境下(文件夹的分隔符为`\`)，安装示例如下：
@@ -116,6 +128,12 @@ hub install deploy\hubserving\structure_system\
 # 或，安装版面分析服务模块：
 hub install deploy\hubserving\structure_layout\
+# 或，安装关键信息抽取SER服务模块：  
+hub install deploy\hubserving\kie_ser\
+# 或，安装关键信息抽取SER+RE服务模块：  
+hub install deploy\hubserving\kie_ser_re\
 ```
 ### 2.4 启动服务
@@ -194,6 +212,8 @@ hub serving start -c deploy/hubserving/ocr_system/config.json
 `http://127.0.0.1:8869/predict/structure_table`  
 `http://127.0.0.1:8870/predict/structure_system`  
 `http://127.0.0.1:8870/predict/structure_layout`  
+`http://127.0.0.1:8871/predict/kie_ser`  
+`http://127.0.0.1:8872/predict/kie_ser_re`
 - **image_dir**：测试图像路径，可以是单张图片路径，也可以是图像集合目录路径  
 - **visualize**：是否可视化结果，默认为False  
 - **output**：可视化结果保存路径，默认为`./hubserving_result`
@@ -216,15 +236,18 @@ hub serving start -c deploy/hubserving/ocr_system/config.json
 不同模块返回的字段不同，如，文本识别服务模块返回结果不含`text_region`字段，具体信息如下：
-| 字段名/模块名 | ocr_det | ocr_cls | ocr_rec | ocr_system | structure_table | structure_system | Structure_layout |
+| 字段名/模块名 | ocr_det | ocr_cls | ocr_rec | ocr_system | structure_table | structure_system | Structure_layout | kie_ser | kie_re |
-|  ---  |  ---  |  ---  |  ---  |  ---  | ---  |  ---  |  ---  |
+|  ---  |  ---  |  ---  |  ---  |  ---  | ---  |  ---  |  ---  | ---  |  ---  |
 |angle| | ✔ | | ✔ | |||
-|text| | |✔|✔| | ✔ |  |
+|text| | |✔|✔| | ✔ |  | ✔ | ✔ |
-|confidence| |✔ |✔| | | ✔| |
+|confidence| |✔ |✔| | | ✔| |✔ | ✔ |
-|text_region| ✔| | |✔ | | ✔| |
+|text_region| ✔| | |✔ | | ✔| |✔ | ✔ |
-|html| | | | |✔ |✔||
+|html| | | | |✔ |✔||| |
-|regions| | | | |✔ |✔ | |
+|regions| | | | |✔ |✔ | || |
-|layout| | | | | | | ✔ |
+|layout| | | | | | | ✔ || |
+|ser_res| | | | | | |  |  ✔ | |
+|re_res| | | | | | |  | |  ✔ |
 **说明：** 如果需要增加、删除、修改返回字段，可在相应模块的`module.py`文件中进行修改，完整流程参考下一节自定义修改服务模块。

--- a/deploy/hubserving/readme_en.md
+++ b/deploy/hubserving/readme_en.md
@@ -30,6 +30,8 @@ deploy/hubserving/
  └─  structure_layout  layout analysis service package
  └─  structure_table  table recognition service package
  └─  structure_system  PP-Structure service package
+  └─  kie_ser  KIE(SER) service package
+  └─  kie_ser_re  KIE(SER+RE) service package
 ```
 Each service pack contains 3 files. Take the 2-stage series connection service package as an example, the directory is as follows:  
@@ -42,9 +44,10 @@ deploy/hubserving/ocr_system/
 ```
 ## 1. Update
-* 2022.05.05 add PP-OCRv3 text detection and recognition models.
+* 2022.10.09 add KIE services.
-* 2022.03.30 add PP-Structure and table recognition services。
+* 2022.08.23 add layout analysis services.
-* 2022.08.23 add layout analysis services。
+* 2022.03.30 add PP-Structure and table recognition services.
+* 2022.05.05 add PP-OCRv3 text detection and recognition services.
 ## 2. Quick start service
@@ -65,6 +68,8 @@ text recognition model: ./inference/ch_PP-OCRv3_rec_infer/
 text angle classifier: ./inference/ch_ppocr_mobile_v2.0_cls_infer/
 layout parse model: ./inference/picodet_lcnet_x1_0_fgd_layout_infer/
 tanle recognition: ./inference/ch_ppstructure_mobile_v2.0_SLANet_infer/
+KIE(SER): ./inference/ser_vi_layoutxlm_xfund_infer/
+KIE(SER+RE): ./inference/re_vi_layoutxlm_xfund_infer/
 ```  
 **The model path can be found and modified in `params.py`.** More models provided by PaddleOCR can be obtained from the [model library](../../doc/doc_en/models_list_en.md). You can also use models trained by yourself.
@@ -92,8 +97,11 @@ hub install deploy/hubserving/structure_table/
 # Or install PP-Structure service module
 hub install deploy/hubserving/structure_system/
-# Or install layout analysis service module
+# Or install KIE(SER) service module
-hub install deploy/hubserving/structure_layout/
+hub install deploy/hubserving/kie_ser/
+# Or install KIE(SER+RE) service module
+hub install deploy/hubserving/kie_ser_re/
 ```
 * On Windows platform, the examples are as follows.
@@ -118,6 +126,12 @@ hub install deploy\hubserving\structure_system\
 # Or install layout analysis service module
 hub install deploy\hubserving\structure_layout\
+# Or install KIE(SER) service module
+hub install deploy\hubserving\kie_ser\
+# Or install KIE(SER+RE) service module
+hub install deploy\hubserving\kie_ser_re\
 ```
 ### 2.4 Start service
@@ -201,6 +215,8 @@ For example, if using the configuration file to start the text angle classificat
 `http://127.0.0.1:8869/predict/structure_table`  
 `http://127.0.0.1:8870/predict/structure_system`  
 `http://127.0.0.1:8870/predict/structure_layout`  
+`http://127.0.0.1:8871/predict/kie_ser`  
+`http://127.0.0.1:8872/predict/kie_ser_re`
 - **image_dir**：Test image path, can be a single image path or an image directory path
 - **visualize**：Whether to visualize the results, the default value is False
 - **output**：The floder to save Visualization result, default value is `./hubserving_result`
@@ -225,15 +241,17 @@ The returned result is a list. Each item in the list is a dict. The dict may con
 The fields returned by different modules are different. For example, the results returned by the text recognition service module do not contain `text_region`. The details are as follows:
-| field name/module name | ocr_det | ocr_cls | ocr_rec | ocr_system | structure_table | structure_system | structure_layout |
+| field name/module name | ocr_det | ocr_cls | ocr_rec | ocr_system | structure_table | structure_system | structure_layout | kie_ser | kie_re |
-|  ---  |  ---  |  ---  |  ---  |  ---  | ---  |---  |---  |
+|  ---  |  ---  |  ---  |  ---  |  ---  | ---  |  ---  |  ---  | ---  |  ---  |
-|angle| | ✔ | | ✔ | || |
+|angle| | ✔ | | ✔ | |||
-|text| | |✔|✔| | ✔ | |
+|text| | |✔|✔| | ✔ |  | ✔ | ✔ |
-|confidence| |✔ |✔| | | ✔| |
+|confidence| |✔ |✔| | | ✔| |✔ | ✔ |
-|text_region| ✔| | |✔ | | ✔| |
+|text_region| ✔| | |✔ | | ✔| |✔ | ✔ |
-|html| | | | |✔ |✔| |
+|html| | | | |✔ |✔||| |
-|regions| | | | |✔ |✔ | |
+|regions| | | | |✔ |✔ | || |
-|layout| | | | | | |✔ |
+|layout| | | | | | | ✔ || |
+|ser_res| | | | | | |  |  ✔ | |
+|re_res| | | | | | |  | |  ✔ |
 **Note：** If you need to add, delete or modify the returned fields, you can modify the file `module.py` of the corresponding module. For the complete process, refer to the user-defined modification service module in the next section.

--- a/deploy/paddle2onnx/readme.md
+++ b/deploy/paddle2onnx/readme.md
-# Paddle2ONNX模型转化与预测
+# Paddle2ONNX model transformation and prediction
-本章节介绍 PaddleOCR 模型如何转化为 ONNX 模型，并基于 ONNXRuntime 引擎预测。
+This chapter describes how the PaddleOCR model is converted into an ONNX model and predicted based on the ONNXRuntime engine.
-## 1. 环境准备
+## 1. Environment preparation
-需要准备 PaddleOCR、Paddle2ONNX 模型转化环境，和 ONNXRuntime 预测环境
+Need to prepare PaddleOCR, Paddle2ONNX model conversion environment, and ONNXRuntime prediction environment
 ###  PaddleOCR
-克隆PaddleOCR的仓库，使用release/2.4分支，并进行安装，由于PaddleOCR仓库比较大，git clone速度比较慢，所以本教程已下载
+Clone the PaddleOCR repository, use the release/2.6 branch, and install it.
 ```
-git clone  -b release/2.4 https://github.com/PaddlePaddle/PaddleOCR.git
+git clone  -b release/2.6 https://github.com/PaddlePaddle/PaddleOCR.git
 cd PaddleOCR && python3.7 setup.py install
 ```
 ###  Paddle2ONNX
-Paddle2ONNX 支持将 PaddlePaddle 模型格式转化到 ONNX 模型格式，算子目前稳定支持导出 ONNX Opset 9~11，部分Paddle算子支持更低的ONNX Opset转换。
+Paddle2ONNX supports converting the PaddlePaddle model format to the ONNX model format. The operator currently supports exporting ONNX Opset 9~11 stably, and some Paddle operators support lower ONNX Opset conversion.
-更多细节可参考 [Paddle2ONNX](https://github.com/PaddlePaddle/Paddle2ONNX/blob/develop/README_zh.md)
+For more details, please refer to [Paddle2ONNX](https://github.com/PaddlePaddle/Paddle2ONNX/blob/develop/README_en.md)
- 安装 Paddle2ONNX
+- install Paddle2ONNX
 ```
 python3.7 -m pip install paddle2onnx
 ```
- 安装 ONNXRuntime
+- install ONNXRuntime
 ```
-# 建议安装 1.9.0 版本，可根据环境更换版本号
+# It is recommended to install version 1.9.0, and the version number can be changed according to the environment
 python3.7 -m pip install onnxruntime==1.9.0
 ```
-## 2. 模型转换
+## 2. Model conversion
- Paddle 模型下载
+- Paddle model download
-有两种方式获取Paddle静态图模型：在 [model_list](../../doc/doc_ch/models_list.md) 中下载PaddleOCR提供的预测模型；
+There are two ways to obtain the Paddle model: Download the prediction model provided by PaddleOCR in [model_list](../../doc/doc_en/models_list_en.md);
-参考[模型导出说明](../../doc/doc_ch/inference.md#训练模型转inference模型)把训练好的权重转为 inference_model。
+Refer to [Model Export Instructions](../../doc/doc_en/inference_en.md#1-convert-training-model-to-inference-model) to convert the trained weights to inference_model.
-以 ppocr 中文检测、识别、分类模型为例：
+Take the PP-OCRv3 detection, recognition, and classification model as an example:
 ```
-wget -nc  -P ./inference https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_infer.tar
+wget -nc -P ./inference https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_det_infer.tar
-cd ./inference && tar xf ch_PP-OCRv2_det_infer.tar && cd ..
+cd ./inference && tar xf en_PP-OCRv3_det_infer.tar && cd ..
-wget -nc  -P ./inference https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_infer.tar
+wget -nc  -P ./inference https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_rec_infer.tar
-cd ./inference && tar xf ch_PP-OCRv2_rec_infer.tar && cd ..
+cd ./inference && tar xf en_PP-OCRv3_rec_infer.tar && cd ..
 wget -nc  -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar
 cd ./inference && tar xf ch_ppocr_mobile_v2.0_cls_infer.tar && cd ..
 ```
- 模型转换
+- convert model
-使用 Paddle2ONNX 将Paddle静态图模型转换为ONNX模型格式：
+Convert Paddle inference model to ONNX model format using Paddle2ONNX:
 ```
-paddle2onnx --model_dir ./inference/ch_PP-OCRv2_det_infer \
+paddle2onnx --model_dir ./inference/en_PP-OCRv3_det_infer \
 --model_filename inference.pdmodel \
 --params_filename inference.pdiparams \
 --save_file ./inference/det_onnx/model.onnx \
@@ -65,7 +66,7 @@ paddle2onnx --model_dir ./inference/ch_PP-OCRv2_det_infer \
 --input_shape_dict="{'x':[-1,3,-1,-1]}" \
 --enable_onnx_checker True
-paddle2onnx --model_dir ./inference/ch_PP-OCRv2_rec_infer \
+paddle2onnx --model_dir ./inference/en_PP-OCRv3_rec_infer \
 --model_filename inference.pdmodel \
 --params_filename inference.pdiparams \
 --save_file ./inference/rec_onnx/model.onnx \
@@ -81,136 +82,89 @@ paddle2onnx --model_dir ./inference/ch_ppocr_mobile_v2.0_cls_infer \
 --input_shape_dict="{'x':[-1,3,-1,-1]}" \
 --enable_onnx_checker True
 ```
+After execution, the ONNX model will be saved in `./inference/det_onnx/`, `./inference/rec_onnx/`, `./inference/cls_onnx/` paths respectively
-执行完毕后，ONNX 模型会被分别保存在 `./inference/det_onnx/`，`./inference/rec_onnx/`，`./inference/cls_onnx/`路径下
+* Note: For the OCR model, the conversion process must be in the form of dynamic shape, that is, add the option --input_shape_dict="{'x': [-1, 3, -1, -1]}", otherwise the prediction result may be the same as Predicting directly with Paddle is slightly different.
+  In addition, the following models do not currently support conversion to ONNX models:
-* 注意：对于OCR模型，转化过程中必须采用动态shape的形式，即加入选项--input_shape_dict="{'x': [-1, 3, -1, -1]}"，否则预测结果可能与直接使用Paddle预测有细微不同。
+  NRTR, SAR, RARE, SRN
-  另外，以下几个模型暂不支持转换为 ONNX 模型：
-  NRTR、SAR、RARE、SRN
-## 3. 推理预测
+## 3. prediction
-以中文OCR模型为例，使用 ONNXRuntime 预测可执行如下命令：
+Take the English OCR model as an example, use **ONNXRuntime** to predict and execute the following commands:
 ```
 python3.7 tools/infer/predict_system.py --use_gpu=False --use_onnx=True \
 --det_model_dir=./inference/det_onnx/model.onnx  \
 --rec_model_dir=./inference/rec_onnx/model.onnx  \
 --cls_model_dir=./inference/cls_onnx/model.onnx  \
--image_dir=./deploy/lite/imgs/lite_demo.png
+--image_dir=doc/imgs_en/img_12.jpg \
+--rec_char_dict_path=ppocr/utils/en_dict.txt
 ```
-以中文OCR模型为例，使用 Paddle Inference 预测可执行如下命令：
+Taking the English OCR model as an example, use **Paddle Inference** to predict and execute the following commands:
 ```
 python3.7 tools/infer/predict_system.py --use_gpu=False \
 --cls_model_dir=./inference/ch_ppocr_mobile_v2.0_cls_infer \
--rec_model_dir=./inference/ch_PP-OCRv2_rec_infer \
+--rec_model_dir=./inference/en_PP-OCRv3_rec_infer \
--det_model_dir=./inference/ch_PP-OCRv2_det_infer \
+--det_model_dir=./inference/en_PP-OCRv3_det_infer \
--image_dir=./deploy/lite/imgs/lite_demo.png
+--image_dir=doc/imgs_en/img_12.jpg \
+--rec_char_dict_path=ppocr/utils/en_dict.txt
 ```
-执行命令后在终端会打印出预测的识别信息，并在 `./inference_results/` 下保存可视化结果。
+After executing the command, the predicted identification information will be printed out in the terminal, and the visualization results will be saved under `./inference_results/`.
-ONNXRuntime 执行效果：
+ONNXRuntime result：
 <div align="center">
-    <img src="./images/lite_demo_onnx.png" width=800">
+    <img src="../../doc/imgs_results/multi_lang/img_12.jpg" width=800">
 </div>
-Paddle Inference 执行效果：
+Paddle Inference result：
 <div align="center">
-    <img src="./images/lite_demo_paddle.png" width=800">
+    <img src="../../doc/imgs_results/multi_lang/img_12.jpg" width=800">
 </div>
-使用 ONNXRuntime 预测，终端输出：
+Using ONNXRuntime to predict, terminal output:
 ```
-[2022/02/22 17:48:27] root DEBUG: dt_boxes num : 38, elapse : 0.043187856674194336
+[2022/10/10 12:06:28] ppocr DEBUG: dt_boxes num : 11, elapse : 0.3568880558013916
-[2022/02/22 17:48:27] root DEBUG: rec_res num  : 38, elapse : 0.592170000076294
+[2022/10/10 12:06:31] ppocr DEBUG: rec_res num  : 11, elapse : 2.6445000171661377
-[2022/02/22 17:48:27] root DEBUG: 0  Predict time of ./deploy/lite/imgs/lite_demo.png: 0.642s
+[2022/10/10 12:06:31] ppocr DEBUG: 0  Predict time of doc/imgs_en/img_12.jpg: 3.021s
-[2022/02/22 17:48:27] root DEBUG: The, 0.984
+[2022/10/10 12:06:31] ppocr DEBUG: ACKNOWLEDGEMENTS, 0.997
-[2022/02/22 17:48:27] root DEBUG: visualized, 0.882
+[2022/10/10 12:06:31] ppocr DEBUG: We would like to thank all the designers and, 0.976
-[2022/02/22 17:48:27] root DEBUG: etect18片, 0.720
+[2022/10/10 12:06:31] ppocr DEBUG: contributors who have been involved in the, 0.979
-[2022/02/22 17:48:27] root DEBUG: image saved in./vis.jpg, 0.947
+[2022/10/10 12:06:31] ppocr DEBUG: production of this book; their contributions, 0.989
-[2022/02/22 17:48:27] root DEBUG: 纯臻营养护发素0.993604, 0.996
+[2022/10/10 12:06:31] ppocr DEBUG: have been indispensable to its creation. We, 0.956
-[2022/02/22 17:48:27] root DEBUG: 产品信息/参数, 0.922
+[2022/10/10 12:06:31] ppocr DEBUG: would also like to express our gratitude to all, 0.991
-[2022/02/22 17:48:27] root DEBUG: 0.992728, 0.914
+[2022/10/10 12:06:31] ppocr DEBUG: the producers for their invaluable opinions, 0.978
-[2022/02/22 17:48:27] root DEBUG: （45元／每公斤，100公斤起订）, 0.926
+[2022/10/10 12:06:31] ppocr DEBUG: and assistance throughout this project. And to, 0.988
-[2022/02/22 17:48:27] root DEBUG: 0.97417, 0.977
+[2022/10/10 12:06:31] ppocr DEBUG: the many others whose names are not credited, 0.958
-[2022/02/22 17:48:27] root DEBUG: 每瓶22元，1000瓶起订）0.993976, 0.962
+[2022/10/10 12:06:31] ppocr DEBUG: but have made specific input in this book, we, 0.970
-[2022/02/22 17:48:27] root DEBUG: 【品牌】：代加工方式/0EMODM, 0.945
+[2022/10/10 12:06:31] ppocr DEBUG: thank you for your continuous support., 0.998
-[2022/02/22 17:48:27] root DEBUG: 0.985133, 0.980
+[2022/10/10 12:06:31] ppocr DEBUG: The visualized image saved in ./inference_results/img_12.jpg
-[2022/02/22 17:48:27] root DEBUG: 【品名】：纯臻营养护发素, 0.921
+[2022/10/10 12:06:31] ppocr INFO: The predict total time is 3.2482550144195557
-[2022/02/22 17:48:27] root DEBUG: 0.995007, 0.883
+```
-[2022/02/22 17:48:27] root DEBUG: 【产品编号】：YM-X-30110.96899, 0.955
-[2022/02/22 17:48:27] root DEBUG: 【净含量】：220ml, 0.943
+Using Paddle Inference to predict, terminal output:
-[2022/02/22 17:48:27] root DEBUG: Q.996577, 0.932
-[2022/02/22 17:48:27] root DEBUG: 【适用人群】：适合所有肤质, 0.913
+```
-[2022/02/22 17:48:27] root DEBUG: 0.995842, 0.969
+[2022/10/10 12:06:28] ppocr DEBUG: dt_boxes num : 11, elapse : 0.3568880558013916
-[2022/02/22 17:48:27] root DEBUG: 【主要成分】：鲸蜡硬脂醇、燕麦B-葡聚, 0.883
+[2022/10/10 12:06:31] ppocr DEBUG: rec_res num  : 11, elapse : 2.6445000171661377
-[2022/02/22 17:48:27] root DEBUG: 0.961928, 0.964
+[2022/10/10 12:06:31] ppocr DEBUG: 0  Predict time of doc/imgs_en/img_12.jpg: 3.021s
-[2022/02/22 17:48:27] root DEBUG: 10, 0.812
+[2022/10/10 12:06:31] ppocr DEBUG: ACKNOWLEDGEMENTS, 0.997
-[2022/02/22 17:48:27] root DEBUG: 糖、椰油酰胺丙基甜菜碱、泛醒, 0.866
+[2022/10/10 12:06:31] ppocr DEBUG: We would like to thank all the designers and, 0.976
-[2022/02/22 17:48:27] root DEBUG: 0.925898, 0.943
+[2022/10/10 12:06:31] ppocr DEBUG: contributors who have been involved in the, 0.979
-[2022/02/22 17:48:27] root DEBUG: （成品包材）, 0.974
+[2022/10/10 12:06:31] ppocr DEBUG: production of this book; their contributions, 0.989
-[2022/02/22 17:48:27] root DEBUG: 0.972573, 0.961
+[2022/10/10 12:06:31] ppocr DEBUG: have been indispensable to its creation. We, 0.956
-[2022/02/22 17:48:27] root DEBUG: 【主要功能】：可紧致头发磷层，从而达到, 0.936
+[2022/10/10 12:06:31] ppocr DEBUG: would also like to express our gratitude to all, 0.991
-[2022/02/22 17:48:27] root DEBUG: 0.994448, 0.952
+[2022/10/10 12:06:31] ppocr DEBUG: the producers for their invaluable opinions, 0.978
-[2022/02/22 17:48:27] root DEBUG: 13, 0.998
+[2022/10/10 12:06:31] ppocr DEBUG: and assistance throughout this project. And to, 0.988
-[2022/02/22 17:48:27] root DEBUG: 即时持久改善头发光泽的效果，给干燥的头, 0.994
+[2022/10/10 12:06:31] ppocr DEBUG: the many others whose names are not credited, 0.958
-[2022/02/22 17:48:27] root DEBUG: 0.990198, 0.975
+[2022/10/10 12:06:31] ppocr DEBUG: but have made specific input in this book, we, 0.970
-[2022/02/22 17:48:27] root DEBUG: 14, 0.977
+[2022/10/10 12:06:31] ppocr DEBUG: thank you for your continuous support., 0.998
-[2022/02/22 17:48:27] root DEBUG: 发足够的滋养, 0.991
+[2022/10/10 12:06:31] ppocr DEBUG: The visualized image saved in ./inference_results/img_12.jpg
-[2022/02/22 17:48:27] root DEBUG: 0.997668, 0.918
+[2022/10/10 12:06:31] ppocr INFO: The predict total time is 3.2482550144195557
-[2022/02/22 17:48:27] root DEBUG: 花费了0.457335秒, 0.901
-[2022/02/22 17:48:27] root DEBUG: The visualized image saved in ./inference_results/lite_demo.png
-[2022/02/22 17:48:27] root INFO: The predict total time is 0.7003889083862305
-```
-使用 Paddle Inference 预测，终端输出：
-```
-[2022/02/22 17:47:25] root DEBUG: dt_boxes num : 38, elapse : 0.11791276931762695
-[2022/02/22 17:47:27] root DEBUG: rec_res num  : 38, elapse : 2.6206860542297363
-[2022/02/22 17:47:27] root DEBUG: 0  Predict time of ./deploy/lite/imgs/lite_demo.png: 2.746s
-[2022/02/22 17:47:27] root DEBUG: The, 0.984
-[2022/02/22 17:47:27] root DEBUG: visualized, 0.882
-[2022/02/22 17:47:27] root DEBUG: etect18片, 0.720
-[2022/02/22 17:47:27] root DEBUG: image saved in./vis.jpg, 0.947
-[2022/02/22 17:47:27] root DEBUG: 纯臻营养护发素0.993604, 0.996
-[2022/02/22 17:47:27] root DEBUG: 产品信息/参数, 0.922
-[2022/02/22 17:47:27] root DEBUG: 0.992728, 0.914
-[2022/02/22 17:47:27] root DEBUG: （45元／每公斤，100公斤起订）, 0.926
-[2022/02/22 17:47:27] root DEBUG: 0.97417, 0.977
-[2022/02/22 17:47:27] root DEBUG: 每瓶22元，1000瓶起订）0.993976, 0.962
-[2022/02/22 17:47:27] root DEBUG: 【品牌】：代加工方式/0EMODM, 0.945
-[2022/02/22 17:47:27] root DEBUG: 0.985133, 0.980
-[2022/02/22 17:47:27] root DEBUG: 【品名】：纯臻营养护发素, 0.921
-[2022/02/22 17:47:27] root DEBUG: 0.995007, 0.883
-[2022/02/22 17:47:27] root DEBUG: 【产品编号】：YM-X-30110.96899, 0.955
-[2022/02/22 17:47:27] root DEBUG: 【净含量】：220ml, 0.943
-[2022/02/22 17:47:27] root DEBUG: Q.996577, 0.932
-[2022/02/22 17:47:27] root DEBUG: 【适用人群】：适合所有肤质, 0.913
-[2022/02/22 17:47:27] root DEBUG: 0.995842, 0.969
-[2022/02/22 17:47:27] root DEBUG: 【主要成分】：鲸蜡硬脂醇、燕麦B-葡聚, 0.883
-[2022/02/22 17:47:27] root DEBUG: 0.961928, 0.964
-[2022/02/22 17:47:27] root DEBUG: 10, 0.812
-[2022/02/22 17:47:27] root DEBUG: 糖、椰油酰胺丙基甜菜碱、泛醒, 0.866
-[2022/02/22 17:47:27] root DEBUG: 0.925898, 0.943
-[2022/02/22 17:47:27] root DEBUG: （成品包材）, 0.974
-[2022/02/22 17:47:27] root DEBUG: 0.972573, 0.961
-[2022/02/22 17:47:27] root DEBUG: 【主要功能】：可紧致头发磷层，从而达到, 0.936
-[2022/02/22 17:47:27] root DEBUG: 0.994448, 0.952
-[2022/02/22 17:47:27] root DEBUG: 13, 0.998
-[2022/02/22 17:47:27] root DEBUG: 即时持久改善头发光泽的效果，给干燥的头, 0.994
-[2022/02/22 17:47:27] root DEBUG: 0.990198, 0.975
-[2022/02/22 17:47:27] root DEBUG: 14, 0.977
-[2022/02/22 17:47:27] root DEBUG: 发足够的滋养, 0.991
-[2022/02/22 17:47:27] root DEBUG: 0.997668, 0.918
-[2022/02/22 17:47:27] root DEBUG: 花费了0.457335秒, 0.901
-[2022/02/22 17:47:27] root DEBUG: The visualized image saved in ./inference_results/lite_demo.png
-[2022/02/22 17:47:27] root INFO: The predict total time is 2.8338775634765625
 ```
--- a/doc/datasets/crohme_demo/hme_00.jpg
+++ b/doc/datasets/crohme_demo/hme_00.jpg
--- a/doc/datasets/crohme_demo/hme_01.jpg
+++ b/doc/datasets/crohme_demo/hme_01.jpg
--- a/doc/datasets/crohme_demo/hme_02.jpg
+++ b/doc/datasets/crohme_demo/hme_02.jpg
--- a/doc/doc_ch/algorithm_det_drrg.md
+++ b/doc/doc_ch/algorithm_det_drrg.md
+# DRRG
+- [1. 算法简介](#1-算法简介)
+- [2. 环境配置](#2-环境配置)
+- [3. 模型训练、评估、预测](#3-模型训练评估预测)
+- [4. 推理部署](#4-推理部署)
+  - [4.1 Python推理](#41-python推理)
+  - [4.2 C++推理](#42-c推理)
+  - [4.3 Serving服务化部署](#43-serving服务化部署)
+  - [4.4 更多推理部署](#44-更多推理部署)
+- [5. FAQ](#5-faq)
+- [引用](#引用)
+<a name="1"></a>
+## 1. 算法简介
+论文信息：
+> [Deep Relational Reasoning Graph Network for Arbitrary Shape Text Detection](https://arxiv.org/abs/2003.07493)
+> Zhang, Shi-Xue and Zhu, Xiaobin and Hou, Jie-Bo and Liu, Chang and Yang, Chun and Wang, Hongfa and Yin, Xu-Cheng
+> CVPR, 2020
+在CTW1500文本检测公开数据集上，算法复现效果如下：
+| 模型  |骨干网络|配置文件|precision|recall|Hmean|下载链接|
+|-----| --- | --- | --- | --- | --- | --- |
+| DRRG | ResNet50_vd | [configs/det/det_r50_drrg_ctw.yml](../../configs/det/det_r50_drrg_ctw.yml)| 89.92%|80.91%|85.18%|[训练模型](https://paddleocr.bj.bcebos.com/contribution/det_r50_drrg_ctw_train.tar)|
+<a name="2"></a>
+## 2. 环境配置
+请先参考[《运行环境准备》](./environment.md)配置PaddleOCR运行环境，参考[《项目克隆》](./clone.md)克隆项目代码。
+<a name="3"></a>
+## 3. 模型训练、评估、预测
+上述DRRG模型使用CTW1500文本检测公开数据集训练得到，数据集下载可参考 [ocr_datasets](./dataset/ocr_datasets.md)。
+数据下载完成后，请参考[文本检测训练教程](./detection.md)进行训练。PaddleOCR对代码进行了模块化，训练不同的检测模型只需要**更换配置文件**即可。
+<a name="4"></a>
+## 4. 推理部署
+<a name="4-1"></a>
+### 4.1 Python推理
+由于模型前向运行时需要多次转换为Numpy数据进行运算，因此DRRG的动态图转静态图暂未支持。
+<a name="4-2"></a>
+### 4.2 C++推理
+暂未支持
+<a name="4-3"></a>
+### 4.3 Serving服务化部署
+暂未支持
+<a name="4-4"></a>
+### 4.4 更多推理部署
+暂未支持
+<a name="5"></a>
+## 5. FAQ
+## 引用
+```bibtex
+@inproceedings{zhang2020deep,
+  title={Deep relational reasoning graph network for arbitrary shape text detection},
+  author={Zhang, Shi-Xue and Zhu, Xiaobin and Hou, Jie-Bo and Liu, Chang and Yang, Chun and Wang, Hongfa and Yin, Xu-Cheng},
+  booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
+  pages={9699--9708},
+  year={2020}
+}
+```
--- a/doc/doc_ch/algorithm_overview.md
+++ b/doc/doc_ch/algorithm_overview.md
@@ -29,6 +29,7 @@ PaddleOCR将**持续新增**支持OCR领域前沿算法与模型，**欢迎广
 - [x]  [SAST](./algorithm_det_sast.md)
 - [x]  [PSENet](./algorithm_det_psenet.md)
 - [x]  [FCENet](./algorithm_det_fcenet.md)
+- [x]  [DRRG](./algorithm_det_drrg.md)
 在ICDAR2015文本检测公开数据集上，算法效果如下：
@@ -54,6 +55,7 @@ PaddleOCR将**持续新增**支持OCR领域前沿算法与模型，**欢迎广
 |模型|骨干网络|precision|recall|Hmean|下载链接|
 | --- | --- | --- | --- | --- | --- |  
 |FCE|ResNet50_dcn|88.39%|82.18%|85.27%|[训练模型](https://paddleocr.bj.bcebos.com/contribution/det_r50_dcn_fce_ctw_v2.0_train.tar)|
+|DRRG|ResNet50_vd|89.92%|80.91%|85.18%|[训练模型](https://paddleocr.bj.bcebos.com/contribution/det_r50_drrg_ctw_train.tar)|
 **说明：** SAST模型训练额外加入了icdar2013、icdar2017、COCO-Text、ArT等公开数据集进行调优。PaddleOCR用到的经过整理格式的英文公开数据集下载：
 * [百度云地址](https://pan.baidu.com/s/12cPnZcVuV1zn5DOd4mqjVw) (提取码: 2bpi)
@@ -79,6 +81,7 @@ PaddleOCR将**持续新增**支持OCR领域前沿算法与模型，**欢迎广
 - [x]  [VisionLAN](./algorithm_rec_visionlan.md)
 - [x]  [SPIN](./algorithm_rec_spin.md)
 - [x]  [RobustScanner](./algorithm_rec_robustscanner.md)
+- [x]  [RFL](./algorithm_rec_rfl.md)
 参考[DTRB](https://arxiv.org/abs/1904.01906)[3]文字识别训练和评估流程，使用MJSynth和SynthText两个文字识别数据集训练，在IIIT, SVT, IC03, IC13, IC15, SVTP, CUTE数据集上进行评估，算法效果如下：
@@ -99,10 +102,10 @@ PaddleOCR将**持续新增**支持OCR领域前沿算法与模型，**欢迎广
 |SVTR|SVTR-Tiny| 89.25% | rec_svtr_tiny_none_ctc_en | [训练模型](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/rec_svtr_tiny_none_ctc_en_train.tar) |
 |ViTSTR|ViTSTR| 79.82% | rec_vitstr_none_ce | [训练模型](https://paddleocr.bj.bcebos.com/rec_vitstr_none_ce_train.tar) |
 |ABINet|Resnet45| 90.75% | rec_r45_abinet | [训练模型](https://paddleocr.bj.bcebos.com/rec_r45_abinet_train.tar) |
-|VisionLAN|Resnet45| 90.30% | rec_r45_visionlan | [训练模型](https://paddleocr.bj.bcebos.com/rec_r45_visionlan_train.tar) |
+|VisionLAN|Resnet45| 90.30% | rec_r45_visionlan | [训练模型](https://paddleocr.bj.bcebos.com/VisionLAN/rec_r45_visionlan_train.tar) |
 |SPIN|ResNet32| 90.00% | rec_r32_gaspin_bilstm_att | [训练模型](https://paddleocr.bj.bcebos.com/contribution/rec_r32_gaspin_bilstm_att.tar) |
 |RobustScanner|ResNet31| 87.77% | rec_r31_robustscanner | [训练模型](https://paddleocr.bj.bcebos.com/contribution/rec_r31_robustscanner.tar)|
+|RFL|ResNetRFL| 88.63% | rec_resnet_rfl_att | [训练模型](https://paddleocr.bj.bcebos.com/contribution/rec_resnet_rfl_att_train.tar) |
 <a name="2"></a>

--- a/doc/doc_ch/algorithm_rec_can.md
+++ b/doc/doc_ch/algorithm_rec_can.md
+# 手写数学公式识别算法-CAN
+- [1. 算法简介](#1)
+- [2. 环境配置](#2)
+- [3. 模型训练、评估、预测](#3)
+    - [3.1 训练](#3-1)
+    - [3.2 评估](#3-2)
+    - [3.3 预测](#3-3)
+- [4. 推理部署](#4)
+    - [4.1 Python推理](#4-1)
+    - [4.2 C++推理](#4-2)
+    - [4.3 Serving服务化部署](#4-3)
+    - [4.4 更多推理部署](#4-4)
+- [5. FAQ](#5)
+<a name="1"></a>
+## 1. 算法简介
+论文信息：
+> [When Counting Meets HMER: Counting-Aware Network for Handwritten Mathematical Expression Recognition](https://arxiv.org/abs/2207.11463)
+> Bohan Li, Ye Yuan, Dingkang Liang, Xiao Liu, Zhilong Ji, Jinfeng Bai, Wenyu Liu, Xiang Bai
+> ECCV, 2022
+<a name="model"></a>
+`CAN`使用CROHME手写公式数据集进行训练，在对应测试集上的精度如下：
+|模型    |骨干网络|配置文件|ExpRate|下载链接|
+| ----- | ----- | ----- | ----- | ----- |
+|CAN|DenseNet|[rec_d28_can.yml](../../configs/rec/rec_d28_can.yml)|51.72|[训练模型](https://paddleocr.bj.bcebos.com/contribution/rec_d28_can_train.tar)|
+<a name="2"></a>
+## 2. 环境配置
+请先参考[《运行环境准备》](./environment.md)配置PaddleOCR运行环境，参考[《项目克隆》](./clone.md)克隆项目代码。
+<a name="3"></a>
+## 3. 模型训练、评估、预测
+<a name="3-1"></a>
+### 3.1 模型训练
+请参考[文本识别训练教程](./recognition.md)。PaddleOCR对代码进行了模块化，训练`CAN`识别模型时需要**更换配置文件**为`CAN`的[配置文件](../../configs/rec/rec_d28_can.yml)。
+#### 启动训练
+具体地，在完成数据准备后，便可以启动训练，训练命令如下：
+```shell
+#单卡训练（训练周期长，不建议）
+python3 tools/train.py -c configs/rec/rec_d28_can.yml
+#多卡训练，通过--gpus参数指定卡号
+python3 -m paddle.distributed.launch --gpus '0,1,2,3'  tools/train.py -c configs/rec/rec_d28_can.yml
+```
+**注意：**
+- 我们提供的数据集，即[`CROHME数据集`](https://paddleocr.bj.bcebos.com/dataset/CROHME.tar)将手写公式存储为黑底白字的格式，若您自行准备的数据集与之相反，即以白底黑字模式存储，请在训练时做出如下修改
+```
+python3 tools/train.py -c configs/rec/rec_d28_can.yml
+-o Train.dataset.transforms.GrayImageChannelFormat.inverse=False
+```
+- 默认每训练1个epoch（1105次iteration）进行1次评估，若您更改训练的batch_size，或更换数据集，请在训练时作出如下修改
+```
+python3 tools/train.py -c configs/rec/rec_d28_can.yml
+-o Global.eval_batch_step=[0, {length_of_dataset//batch_size}]
+```
+#
+<a name="3-2"></a>
+### 3.2 评估
+可下载已训练完成的[模型文件](https://paddleocr.bj.bcebos.com/contribution/can_train.tar)，使用如下命令进行评估：
+```shell
+# 注意将pretrained_model的路径设置为本地路径。若使用自行训练保存的模型，请注意修改路径和文件名为{path/to/weights}/{model_name}。
+python3 -m paddle.distributed.launch --gpus '0' tools/eval.py -c configs/rec/rec_d28_can.yml -o Global.pretrained_model=./rec_d28_can_train/CAN
+```
+<a name="3-3"></a>
+### 3.3 预测
+使用如下命令进行单张图片预测：
+```shell
+# 注意将pretrained_model的路径设置为本地路径。
+python3 tools/infer_rec.py -c configs/rec/rec_d28_can.yml -o Architecture.Head.attdecoder.is_train=False Global.infer_img='./doc/datasets/crohme_demo/hme_00.jpg' Global.pretrained_model=./rec_d28_can_train/CAN
+# 预测文件夹下所有图像时，可修改infer_img为文件夹，如 Global.infer_img='./doc/datasets/crohme_demo/'。
+```
+<a name="4"></a>
+## 4. 推理部署
+<a name="4-1"></a>
+### 4.1 Python推理
+首先将训练得到best模型，转换成inference model。这里以训练完成的模型为例（[模型下载地址](https://paddleocr.bj.bcebos.com/contribution/can_train.tar) )，可以使用如下命令进行转换：
+```shell
+# 注意将pretrained_model的路径设置为本地路径。
+python3 tools/export_model.py -c configs/rec/rec_d28_can.yml -o Global.pretrained_model=./rec_d28_can_train/CAN Global.save_inference_dir=./inference/rec_d28_can/ Architecture.Head.attdecoder.is_train=False
+# 目前的静态图模型默认的输出长度最大为36，如果您需要预测更长的序列，请在导出模型时指定其输出序列为合适的值，例如 Architecture.Head.max_text_length=72
+```
+**注意：**
+- 如果您是在自己的数据集上训练的模型，并且调整了字典文件，请注意修改配置文件中的`character_dict_path`是否是所需要的字典文件。
+转换成功后，在目录下有三个文件：
+```
+/inference/rec_d28_can/
+    ├── inference.pdiparams         # 识别inference模型的参数文件
+    ├── inference.pdiparams.info    # 识别inference模型的参数信息，可忽略
+    └── inference.pdmodel           # 识别inference模型的program文件
+```
+执行如下命令进行模型推理：
+```shell
+python3 tools/infer/predict_rec.py --image_dir="./doc/datasets/crohme_demo/hme_00.jpg" --rec_algorithm="CAN" --rec_batch_num=1 --rec_model_dir="./inference/rec_d28_can/" --rec_char_dict_path="./ppocr/utils/dict/latex_symbol_dict.txt"
+# 预测文件夹下所有图像时，可修改image_dir为文件夹，如 --image_dir='./doc/datasets/crohme_demo/'。
+# 如果您需要在白底黑字的图片上进行预测，请设置 --rec_image_inverse=False
+```
+![测试图片样例](../datasets/crohme_demo/hme_00.jpg)
+执行命令后，上面图像的预测结果（识别的文本）会打印到屏幕上，示例如下：
+```shell
+Predicts of ./doc/imgs_hme/hme_00.jpg:['x _ { k } x x _ { k } + y _ { k } y x _ { k }', []]
+```
+**注意**：
+- 需要注意预测图像为**黑底白字**，即手写公式部分为白色，背景为黑色的图片。
+- 在推理时需要设置参数`rec_char_dict_path`指定字典，如果您修改了字典，请修改该参数为您的字典文件。
+- 如果您修改了预处理方法，需修改`tools/infer/predict_rec.py`中CAN的预处理为您的预处理方法。
+<a name="4-2"></a>
+### 4.2 C++推理部署
+由于C++预处理后处理还未支持CAN，所以暂未支持
+<a name="4-3"></a>
+### 4.3 Serving服务化部署
+暂不支持
+<a name="4-4"></a>
+### 4.4 更多推理部署
+暂不支持
+<a name="5"></a>
+## 5. FAQ
+1. CROHME数据集来自于[CAN源repo](https://github.com/LBH1024/CAN) 。
+## 引用
+```bibtex
+@misc{https://doi.org/10.48550/arxiv.2207.11463,
+  doi = {10.48550/ARXIV.2207.11463},
+  url = {https://arxiv.org/abs/2207.11463},
+  author = {Li, Bohan and Yuan, Ye and Liang, Dingkang and Liu, Xiao and Ji, Zhilong and Bai, Jinfeng and Liu, Wenyu and Bai, Xiang},
+  keywords = {Computer Vision and Pattern Recognition (cs.CV), Artificial Intelligence (cs.AI), FOS: Computer and information sciences, FOS: Computer and information sciences},
+  title = {When Counting Meets HMER: Counting-Aware Network for Handwritten Mathematical Expression Recognition},
+  publisher = {arXiv},
+  year = {2022},
+  copyright = {arXiv.org perpetual, non-exclusive license}
+}
+```
--- a/doc/doc_ch/algorithm_rec_rfl.md
+++ b/doc/doc_ch/algorithm_rec_rfl.md
+# 场景文本识别算法-RFL
+- [1. 算法简介](#1)
+- [2. 环境配置](#2)
+- [3. 模型训练、评估、预测](#3)
+    - [3.1 训练](#3-1)
+    - [3.2 评估](#3-2)
+    - [3.3 预测](#3-3)
+- [4. 推理部署](#4)
+    - [4.1 Python推理](#4-1)
+    - [4.2 C++推理](#4-2)
+    - [4.3 Serving服务化部署](#4-3)
+    - [4.4 更多推理部署](#4-4)
+- [5. FAQ](#5)
+<a name="1"></a>
+## 1. 算法简介
+论文信息：
+> [Reciprocal Feature Learning via Explicit and Implicit Tasks in Scene Text Recognition](https://arxiv.org/abs/2105.06229.pdf)
+> Hui Jiang, Yunlu Xu, Zhanzhan Cheng, Shiliang Pu, Yi Niu, Wenqi Ren, Fei Wu, and Wenming Tan
+> ICDAR, 2021
+<a name="model"></a>
+`RFL`使用MJSynth和SynthText两个文字识别数据集训练，在IIIT, SVT, IC03, IC13, IC15, SVTP, CUTE数据集上进行评估，算法复现效果如下：
+|模型|骨干网络|配置文件|Acc|下载链接|
+| --- | --- | --- | --- | --- |
+|RFL-CNT|ResNetRFL|[rec_resnet_rfl_visual.yml](../../configs/rec/rec_resnet_rfl_visual.yml)|93.40%|[训练模型](https://paddleocr.bj.bcebos.com/contribution/rec_resnet_rfl_visual_train.tar)|
+|RFL-Att|ResNetRFL|[rec_resnet_rfl_att.yml](../../configs/rec/rec_resnet_rfl_att.yml)|88.63%|[训练模型](https://paddleocr.bj.bcebos.com/contribution/rec_resnet_rfl_att_train.tar)|
+<a name="2"></a>
+## 2. 环境配置
+请先参考[《运行环境准备》](./environment.md)配置PaddleOCR运行环境，参考[《项目克隆》](./clone.md)克隆项目代码。
+<a name="3"></a>
+## 3. 模型训练、评估、预测
+<a name="3-1"></a>
+### 3.1 模型训练
+PaddleOCR对代码进行了模块化，训练`RFL`识别模型时需要**更换配置文件**为`RFL`的[配置文件](../../configs/rec/rec_resnet_rfl_att.yml)。
+#### 启动训练
+具体地，在完成数据准备后，便可以启动训练，训练命令如下：
+```shell
+#step1:训练CNT分支
+#单卡训练（训练周期长，不建议）
+python3 tools/train.py -c configs/rec/rec_resnet_rfl_visual.yml
+#多卡训练，通过--gpus参数指定卡号
+python3 -m paddle.distributed.launch --gpus '0,1,2,3'  tools/train.py -c configs/rec/rec_resnet_rfl_visual.yml
+#step2:联合训练CNT和Att分支,注意将pretrained_model的路径设置为本地路径。
+#单卡训练（训练周期长，不建议）
+python3 tools/train.py -c configs/rec/rec_resnet_rfl_att.yml -o Global.pretrained_model=./output/rec/rec_resnet_rfl_visual/best_accuracy
+#多卡训练，通过--gpus参数指定卡号
+python3 -m paddle.distributed.launch --gpus '0,1,2,3'  tools/train.py -c configs/rec/rec_resnet_rfl_att.yml  -o Global.pretrained_model=./output/rec/rec_resnet_rfl_visual/best_accuracy
+```
+<a name="3-2"></a>
+### 3.2 评估
+可下载已训练完成的[模型文件](https://paddleocr.bj.bcebos.com/contribution/rec_resnet_rfl.tar)，使用如下命令进行评估：
+```shell
+# 注意将pretrained_model的路径设置为本地路径。
+python3 -m paddle.distributed.launch --gpus '0' tools/eval.py -c configs/rec/rec_resnet_rfl_att.yml -o Global.pretrained_model=./output/rec/rec_resnet_rfl_att/best_accuracy
+```
+<a name="3-3"></a>
+### 3.3 预测
+使用如下命令进行单张图片预测：
+```shell
+# 注意将pretrained_model的路径设置为本地路径。
+python3 tools/infer_rec.py -c configs/rec/rec_resnet_rfl_att.yml -o Global.infer_img='./doc/imgs_words_en/word_10.png' Global.pretrained_model=./output/rec/rec_resnet_rfl_att/best_accuracy
+# 预测文件夹下所有图像时，可修改infer_img为文件夹，如 Global.infer_img='./doc/imgs_words_en/'。
+```
+<a name="4"></a>
+## 4. 推理部署
+<a name="4-1"></a>
+### 4.1 Python推理
+首先将训练得到best模型，转换成inference model。这里以训练完成的模型为例（[模型下载地址](https://paddleocr.bj.bcebos.com/contribution/rec_resnet_rfl.tar) )，可以使用如下命令进行转换：
+```shell
+# 注意将pretrained_model的路径设置为本地路径。
+python3 tools/export_model.py -c configs/rec/rec_resnet_rfl_att.yml -o Global.pretrained_model=./output/rec/rec_resnet_rfl_att/best_accuracy Global.save_inference_dir=./inference/rec_resnet_rfl_att/
+```
+**注意：**
+- 如果您是在自己的数据集上训练的模型，并且调整了字典文件，请注意修改配置文件中的`character_dict_path`是否是所需要的字典文件。
+- 如果您修改了训练时的输入大小，请修改`tools/export_model.py`文件中的对应RFL的`infer_shape`。
+转换成功后，在目录下有三个文件：
+```
+/inference/rec_resnet_rfl_att/
+    ├── inference.pdiparams         # 识别inference模型的参数文件
+    ├── inference.pdiparams.info    # 识别inference模型的参数信息，可忽略
+    └── inference.pdmodel           # 识别inference模型的program文件
+```
+执行如下命令进行模型推理：
+```shell
+python3 tools/infer/predict_rec.py --image_dir='./doc/imgs_words_en/word_10.png' --rec_model_dir='./inference/rec_resnet_rfl_att/' --rec_algorithm='RFL' --rec_image_shape='1,32,100'
+# 预测文件夹下所有图像时，可修改image_dir为文件夹，如 --image_dir='./doc/imgs_words_en/'。
+```
+![](../imgs_words_en/word_10.png)
+执行命令后，上面图像的预测结果（识别的文本和得分）会打印到屏幕上，示例如下：
+结果如下：
+```shell
+Predicts of ./doc/imgs_words_en/word_10.png:('pain', 0.9999927282333374)
+```
+**注意**：
+- 训练上述模型采用的图像分辨率是[1，32，100]，需要通过参数`rec_image_shape`设置为您训练时的识别图像形状。
+- 在推理时需要设置参数`rec_char_dict_path`指定字典，如果您修改了字典，请修改该参数为您的字典文件。
+- 如果您修改了预处理方法，需修改`tools/infer/predict_rec.py`中RFL的预处理为您的预处理方法。
+<a name="4-2"></a>
+### 4.2 C++推理部署
+由于C++预处理后处理还未支持RFL，所以暂未支持
+<a name="4-3"></a>
+### 4.3 Serving服务化部署
+暂不支持
+<a name="4-4"></a>
+### 4.4 更多推理部署
+暂不支持
+<a name="5"></a>
+## 5. FAQ
+## 引用
+```bibtex
+@article{2021Reciprocal,
+  title     = {Reciprocal Feature Learning via Explicit and Implicit Tasks in Scene Text Recognition},
+  author    = {Jiang, H.  and  Xu, Y.  and  Cheng, Z.  and  Pu, S.  and  Niu, Y.  and  Ren, W.  and  Wu, F.  and  Tan, W. },
+  booktitle = {ICDAR},
+  year      = {2021},
+  url       = {https://arxiv.org/abs/2105.06229}
+}
+```
--- a/doc/doc_ch/algorithm_rec_visionlan.md
+++ b/doc/doc_ch/algorithm_rec_visionlan.md
@@ -27,7 +27,7 @@
 |模型|骨干网络|配置文件|Acc|下载链接|
 | --- | --- | --- | --- | --- |
-|VisionLAN|ResNet45|[rec_r45_visionlan.yml](../../configs/rec/rec_r45_visionlan.yml)|90.3%|[预训练、训练模型](https://paddleocr.bj.bcebos.com/rec_r45_visionlan_train.tar)|
+|VisionLAN|ResNet45|[rec_r45_visionlan.yml](../../configs/rec/rec_r45_visionlan.yml)|90.3%|[预训练、训练模型](https://paddleocr.bj.bcebos.com/VisionLAN/rec_r45_visionlan_train.tar)|
 <a name="2"></a>
 ## 2. 环境配置
@@ -80,7 +80,7 @@ python3 tools/infer_rec.py -c configs/rec/rec_r45_visionlan.yml -o Global.infer_
 <a name="4-1"></a>
 ### 4.1 Python推理
-首先将训练得到best模型，转换成inference model。这里以训练完成的模型为例（[模型下载地址](https://paddleocr.bj.bcebos.com/rec_r45_visionlan_train.tar))，可以使用如下命令进行转换：
+首先将训练得到best模型，转换成inference model。这里以训练完成的模型为例（[模型下载地址](https://paddleocr.bj.bcebos.com/VisionLAN/rec_r45_visionlan_train.tar))，可以使用如下命令进行转换：
 ```shell
 # 注意将pretrained_model的路径设置为本地路径。
@@ -139,7 +139,7 @@ Predicts of ./doc/imgs_words/en/word_2.png:('yourself', 0.9999493)
 ## 5. FAQ
 1. MJSynth和SynthText两种数据集来自于[VisionLAN源repo](https://github.com/wangyuxin87/VisionLAN) 。
-2. 我们使用VisionLAN作者提供的预训练模型进行finetune训练。
+2. 我们使用VisionLAN作者提供的预训练模型进行finetune训练，预训练模型配套字典为'ppocr/utils/ic15_dict.txt'。
 ## 引用

--- a/doc/doc_ch/algorithm_sr_telescope.md
+++ b/doc/doc_ch/algorithm_sr_telescope.md
+# Text Telescope
+- [1. 算法简介](#1)
+- [2. 环境配置](#2)
+- [3. 模型训练、评估、预测](#3)
+    - [3.1 训练](#3-1)
+    - [3.2 评估](#3-2)
+    - [3.3 预测](#3-3)
+- [4. 推理部署](#4)
+    - [4.1 Python推理](#4-1)
+    - [4.2 C++推理](#4-2)
+    - [4.3 Serving服务化部署](#4-3)
+    - [4.4 更多推理部署](#4-4)
+- [5. FAQ](#5)
+<a name="1"></a>
+## 1. 算法简介
+论文信息：
+> [Scene Text Telescope: Text-Focused Scene Image Super-Resolution](https://openaccess.thecvf.com/content/CVPR2021/papers/Chen_Scene_Text_Telescope_Text-Focused_Scene_Image_Super-Resolution_CVPR_2021_paper.pdf)
+> Chen, Jingye, Bin Li, and Xiangyang Xue
+> CVPR, 2021
+参考[FudanOCR](https://github.com/FudanVI/FudanOCR/tree/main/scene-text-telescope) 数据下载说明，在TextZoom测试集合上超分算法效果如下：
+|模型|骨干网络|PSNR_Avg|SSIM_Avg|配置文件|下载链接|
+|---|---|---|---|---|---|
+|Text Telescope|tbsrn|21.56|0.7411| [configs/sr/sr_telescope.yml](../../configs/sr/sr_telescope.yml)|[训练模型](https://paddleocr.bj.bcebos.com/contribution/sr_telescope_train.tar)|
+[TextZoom数据集](https://paddleocr.bj.bcebos.com/dataset/TextZoom.tar) 来自两个超分数据集RealSR和SR-RAW，两个数据集都包含LR-HR对，TextZoom有17367对训数据和4373对测试数据。
+<a name="2"></a>
+## 2. 环境配置
+请先参考[《运行环境准备》](./environment.md)配置PaddleOCR运行环境，参考[《项目克隆》](./clone.md)克隆项目代码。
+<a name="3"></a>
+## 3. 模型训练、评估、预测
+请参考[文本识别训练教程](./recognition.md)。PaddleOCR对代码进行了模块化，训练不同的识别模型只需要**更换配置文件**即可。
+- 训练
+在完成数据准备后，便可以启动训练，训练命令如下：
+```
+#单卡训练（训练周期长，不建议）
+python3 tools/train.py -c configs/sr/sr_telescope.yml
+#多卡训练，通过--gpus参数指定卡号
+python3 -m paddle.distributed.launch --gpus '0,1,2,3'  tools/train.py -c configs/sr/sr_telescope.yml
+```
+- 评估
+```
+# GPU 评估， Global.pretrained_model 为待测权重
+python3 -m paddle.distributed.launch --gpus '0' tools/eval.py -c configs/sr/sr_telescope.yml -o Global.pretrained_model={path/to/weights}/best_accuracy
+```
+- 预测：
+```
+# 预测使用的配置文件必须与训练一致
+python3 tools/infer_sr.py -c configs/sr/sr_telescope.yml -o Global.pretrained_model={path/to/weights}/best_accuracy Global.infer_img=doc/imgs_words_en/word_52.png
+```
+![](../imgs_words_en/word_52.png)
+执行命令后，上面图像的超分结果如下：
+![](../imgs_results/sr_word_52.png)
+<a name="4"></a>
+## 4. 推理部署
+<a name="4-1"></a>
+### 4.1 Python推理
+首先将文本超分训练过程中保存的模型，转换成inference model。以 Text-Telescope 训练的[模型](https://paddleocr.bj.bcebos.com/contribution/Telescope_train.tar.gz) 为例，可以使用如下命令进行转换：
+```shell
+python3 tools/export_model.py -c configs/sr/sr_telescope.yml -o Global.pretrained_model={path/to/weights}/best_accuracy Global.save_inference_dir=./inference/sr_out
+```
+Text-Telescope 文本超分模型推理，可以执行如下命令：
+```
+python3 tools/infer/predict_sr.py --sr_model_dir=./inference/sr_out --image_dir=doc/imgs_words_en/word_52.png --sr_image_shape=3,32,128
+```
+执行命令后，图像的超分结果如下：
+![](../imgs_results/sr_word_52.png)
+<a name="4-2"></a>
+### 4.2 C++推理
+暂未支持
+<a name="4-3"></a>
+### 4.3 Serving服务化部署
+暂未支持
+<a name="4-4"></a>
+### 4.4 更多推理部署
+暂未支持
+<a name="5"></a>
+## 5. FAQ
+## 引用
+```bibtex
+@INPROCEEDINGS{9578891,
+  author={Chen, Jingye and Li, Bin and Xue, Xiangyang},
+  booktitle={2021 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
+  title={Scene Text Telescope: Text-Focused Scene Image Super-Resolution},
+  year={2021},
+  volume={},
+  number={},
+  pages={12021-12030},
+  doi={10.1109/CVPR46437.2021.01185}}
+```
--- a/doc/doc_ch/dataset/datasets.md
+++ b/doc/doc_ch/dataset/datasets.md
@@ -5,6 +5,7 @@
 - [中文街景文字识别](#中文街景文字识别)
 - [中文文档文字识别](#中文文档文字识别)
 - [ICDAR2019-ArT](#ICDAR2019-ArT)
+- [电子印章数据集](#电子印章数据集)
 除了开源数据，用户还可使用合成工具自行合成，可参考[数据合成工具](../data_synthesis.md)；
@@ -59,6 +60,12 @@ https://aistudio.baidu.com/aistudio/datasetdetail/8429
    ![](../../datasets/ArT.jpg)
 - **下载地址**：https://ai.baidu.com/broad/download?dataset=art
+<a name="电子印章数据集"></a>
+#### 6、电子印章数据集
+- **数据来源**：https://aistudio.baidu.com/aistudio/datasetdetail/154271/0
+- **数据简介**：共包含10000张图像，训练集8000图，测试集2000图。数据集是用程序合成的，并不涉及隐私安全，主要用于印章弯曲文本的训练与检测。由开发者[jingsongliujing](https://github.com/jingsongliujing)贡献
+- **下载地址**：https://aistudio.baidu.com/aistudio/datasetdetail/154271/0
 ## 参考文献
 **ICDAR 2019-LSVT Challenge**
 ```

--- a/doc/doc_ch/finetune.md
+++ b/doc/doc_ch/finetune.md
@@ -100,6 +100,10 @@ PaddleOCR提供的配置文件是在8卡训练（相当于总的batch size是`8*
 * 数据分布：建议分布与实测场景尽量一致。如果实测场景包含大量短文本，则训练数据中建议也包含较多短文本，如果实测场景对于空格识别效果要求较高，则训练数据中建议也包含较多带空格的文本内容。
+* 数据合成：针对部分字符识别有误的情况，建议获取一批特定字符数据，加入到原数据中使用小学习率微调。其中原始数据与新增数据比例可尝试 10:1 ～ 5：1， 避免单一场景数据过多导致模型过拟合，同时尽量平衡语料词频，确保常用字的出现频率不会过低。
+  特定字符生成可以使用 TextRenderer 工具，合成例子可参考 [数码管数据合成](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.6/applications/%E5%85%89%E5%8A%9F%E7%8E%87%E8%AE%A1%E6%95%B0%E7%A0%81%E7%AE%A1%E5%AD%97%E7%AC%A6%E8%AF%86%E5%88%AB/%E5%85%89%E5%8A%9F%E7%8E%87%E8%AE%A1%E6%95%B0%E7%A0%81%E7%AE%A1%E5%AD%97%E7%AC%A6%E8%AF%86%E5%88%AB.md#31-%E6%95%B0%E6%8D%AE%E5%87%86%E5%A4%87)
+  ，合成数据语料尽量来自真实使用场景，在贴近真实场景的基础上保持字体、背景的丰富性，有助于提升模型效果。
 * 通用中英文数据：在训练的时候，可以在训练集中添加通用真实数据（如在不更换字典的微调场景中，建议添加LSVT、RCTW、MTWI等真实数据），进一步提升模型的泛化性能。
@@ -168,3 +172,8 @@ Train:
    - general.txt
    ratio_list: [1.0, 0.1]
 ```
+### 3.4 训练调优
+训练过程并非一蹴而就的，完成一个阶段的训练评估后，建议收集分析当前模型在真实场景中的 badcase，有针对性的调整训练数据比例，或者进一步新增合成数据。
+通过多次迭代训练，不断优化模型效果。
--- a/doc/doc_ch/inference_ppocr.md
+++ b/doc/doc_ch/inference_ppocr.md
@@ -11,6 +11,7 @@
    - [2.3 多语言模型的推理](#23-多语言模型的推理)
  - [3. 方向分类模型推理](#3-方向分类模型推理)
  - [4. 文本检测、方向分类和文字识别串联推理](#4-文本检测方向分类和文字识别串联推理)
+  - [5. TensorRT推理](5-TensorRT推理)
 <a name="文本检测模型推理"></a>
@@ -40,18 +41,17 @@ python3 tools/infer/predict_det.py --image_dir="./doc/imgs/00018069.jpg" --det_m
 如果输入图片的分辨率比较大，而且想使用更大的分辨率预测，可以设置det_limit_side_len 为想要的值，比如1216：
-```
+```bash
 python3 tools/infer/predict_det.py --image_dir="./doc/imgs/1.jpg" --det_model_dir="./ch_PP-OCRv3_det_infer/" --det_limit_type=max --det_limit_side_len=1216
 ```
 如果想使用CPU进行预测，执行命令如下
-```
+```bash
 python3 tools/infer/predict_det.py --image_dir="./doc/imgs/1.jpg" --det_model_dir="./ch_PP-OCRv3_det_infer/"  --use_gpu=False
 ```
 <a name="文本识别模型推理"></a>
 ## 2. 文本识别模型推理
@@ -87,9 +87,9 @@ Predicts of ./doc/imgs_words/ch/word_4.jpg:('实力活力', 0.9956803321838379)
 ```
 # 下载英文数字识别模型：
-wget https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_det_infer.tar
+wget https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_rec_infer.tar
-tar xf en_PP-OCRv3_det_infer.tar
+tar xf en_PP-OCRv3_rec_infer.tar
-python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/en/word_1.png" --rec_model_dir="./en_PP-OCRv3_det_infer/" --rec_char_dict_path="ppocr/utils/en_dict.txt"
+python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/en/word_1.png" --rec_model_dir="./en_PP-OCRv3_rec_infer/" --rec_char_dict_path="ppocr/utils/en_dict.txt"
 ```
 ![](../imgs_words/en/word_1.png)
@@ -163,3 +163,32 @@ python3 tools/infer/predict_system.py --image_dir="./xxx.pdf" --det_model_dir=".
 ![](../imgs_results/system_res_00018069_v3.jpg)
 更多关于推理超参数的配置与解释，请参考：[模型推理超参数解释教程](./inference_args.md)。
+## 5. TensorRT推理
+Paddle Inference 采用子图的形式集成 TensorRT，针对 GPU 推理场景，TensorRT 可对一些子图进行优化，包括 OP 的横向和纵向融合，过滤冗余的 OP，并为 OP 自动选择最优的 kernel，加快推理速度。
+如果希望使用Paddle Inference进行TRT推理，一般需要2个步骤。
+* （1）收集该模型关于特定数据集的动态shape信息，并存储到文件中。
+* （2）加载动态shape信息文件，进行TRT推理。
+以文本检测模型为例，首先使用下面的命令，生成动态shape文件，最终会在`ch_PP-OCRv3_det_infer`目录下面生成`det_trt_dynamic_shape.txt`的文件，该文件即存储了动态shape信息的文件。
+```bash
+python3 tools/infer/predict_det.py --image_dir="./doc/imgs/1.jpg" --det_model_dir="./ch_PP-OCRv3_det_infer/" --use_tensorrt=True
+```
+上面的推理过程仅用于收集动态shape信息，没有用TRT进行推理。
+运行完成以后，再使用下面的命令，进行TRT推理。
+```bash
+python3 tools/infer/predict_det.py --image_dir="./doc/imgs/1.jpg" --det_model_dir="./ch_PP-OCRv3_det_infer/" --use_tensorrt=True
+```
+**注意：**
+* 如果在第一步中，已经存在动态shape信息文件，则无需重新收集，直接预测，即使用TRT推理；如果希望重新生成动态shape信息文件，则需要先将模型目录下的动态shape信息文件删掉，再重新生成。
+* 动态shape信息文件一般情况下仅需生成一次。在实际部署过程中，建议首先在线下验证集或者测试集合上生成好，之后可以直接加载该文件进行线上TRT推理。
--- a/doc/doc_ch/table_recognition.md
+++ b/doc/doc_ch/table_recognition.md
@@ -41,7 +41,7 @@ img_label
   'imgid': 0,								 		# 图像的index
   'html': {
     'structure': {'tokens': ['<thead>', '<tr>', '<td>', ...]}, 			# 表格的HTML字符串
-     'cell': [
+     'cells': [
       {
         'tokens': ['P', 'a', 'd', 'd', 'l', 'e', 'P', 'a', 'd', 'd', 'l', 'e'], 	# 表格中的单个文本
         'bbox': [x0, y0, x1, y1]  							# 表格中的单个文本的坐标

--- a/doc/doc_en/algorithm_det_drrg_en.md
+++ b/doc/doc_en/algorithm_det_drrg_en.md
+# DRRG
+- [1. Introduction](#1)
+- [2. Environment](#2)
+- [3. Model Training / Evaluation / Prediction](#3)
+    - [3.1 Training](#3-1)
+    - [3.2 Evaluation](#3-2)
+    - [3.3 Prediction](#3-3)
+- [4. Inference and Deployment](#4)
+    - [4.1 Python Inference](#4-1)
+    - [4.2 C++ Inference](#4-2)
+    - [4.3 Serving](#4-3)
+    - [4.4 More](#4-4)
+- [5. FAQ](#5)
+<a name="1"></a>
+## 1. Introduction
+Paper:
+> [Deep Relational Reasoning Graph Network for Arbitrary Shape Text Detection](https://arxiv.org/abs/2003.07493)
+> Zhang, Shi-Xue and Zhu, Xiaobin and Hou, Jie-Bo and Liu, Chang and Yang, Chun and Wang, Hongfa and Yin, Xu-Cheng
+> CVPR, 2020
+On the CTW1500 dataset, the text detection result is as follows:
+|Model|Backbone|Configuration|Precision|Recall|Hmean|Download|
+| --- | --- | --- | --- | --- | --- | --- |
+| DRRG | ResNet50_vd | [configs/det/det_r50_drrg_ctw.yml](../../configs/det/det_r50_drrg_ctw.yml)| 89.92%|80.91%|85.18%|[trained model](https://paddleocr.bj.bcebos.com/contribution/det_r50_drrg_ctw_train.tar)|
+<a name="2"></a>
+## 2. Environment
+Please prepare your environment referring to [prepare the environment](./environment_en.md) and [clone the repo](./clone_en.md).
+<a name="3"></a>
+## 3. Model Training / Evaluation / Prediction
+The above DRRG model is trained using the CTW1500 text detection public dataset. For the download of the dataset, please refer to [ocr_datasets](./dataset/ocr_datasets_en.md).
+After the data download is complete, please refer to [Text Detection Training Tutorial](./detection_en.md) for training. PaddleOCR has modularized the code structure, so that you only need to **replace the configuration file** to train different detection models.
+<a name="4"></a>
+## 4. Inference and Deployment
+<a name="4-1"></a>
+### 4.1 Python Inference
+Since the model needs to be converted to Numpy data for many times in the forward, DRRG dynamic graph to static graph is not supported.
+<a name="4-2"></a>
+### 4.2 C++ Inference
+Not supported
+<a name="4-3"></a>
+### 4.3 Serving
+Not supported
+<a name="4-4"></a>
+### 4.4 More
+Not supported
+<a name="5"></a>
+## 5. FAQ
+## Citation
+```bibtex
+@inproceedings{zhang2020deep,
+  title={Deep relational reasoning graph network for arbitrary shape text detection},
+  author={Zhang, Shi-Xue and Zhu, Xiaobin and Hou, Jie-Bo and Liu, Chang and Yang, Chun and Wang, Hongfa and Yin, Xu-Cheng},
+  booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
+  pages={9699--9708},
+  year={2020}
+}
+```
--- a/doc/doc_en/algorithm_overview_en.md
+++ b/doc/doc_en/algorithm_overview_en.md
@@ -27,6 +27,7 @@ Supported text detection algorithms (Click the link to get the tutorial):
 - [x]  [SAST](./algorithm_det_sast_en.md)
 - [x]  [PSENet](./algorithm_det_psenet_en.md)
 - [x]  [FCENet](./algorithm_det_fcenet_en.md)
+- [x]  [DRRG](./algorithm_det_drrg_en.md)
 On the ICDAR2015 dataset, the text detection result is as follows:
@@ -52,6 +53,7 @@ On CTW1500 dataset, the text detection result is as follows:
 |Model|Backbone|Precision|Recall|Hmean| Download link|
 | --- | --- | --- | --- | --- |---|  
 |FCE|ResNet50_dcn|88.39%|82.18%|85.27%| [trained model](https://paddleocr.bj.bcebos.com/contribution/det_r50_dcn_fce_ctw_v2.0_train.tar) |
+|DRRG|ResNet50_vd|89.92%|80.91%|85.18%|[trained model](https://paddleocr.bj.bcebos.com/contribution/det_r50_drrg_ctw_train.tar)|
 **Note：** Additional data, like icdar2013, icdar2017, COCO-Text, ArT, was added to the model training of SAST. Download English public dataset in organized format used by PaddleOCR from:
 * [Baidu Drive](https://pan.baidu.com/s/12cPnZcVuV1zn5DOd4mqjVw) (download code: 2bpi).
@@ -76,6 +78,7 @@ Supported text recognition algorithms (Click the link to get the tutorial):
 - [x]  [VisionLAN](./algorithm_rec_visionlan_en.md)
 - [x]  [SPIN](./algorithm_rec_spin_en.md)
 - [x]  [RobustScanner](./algorithm_rec_robustscanner_en.md)
+- [x]  [RFL](./algorithm_rec_rfl_en.md)
 Refer to [DTRB](https://arxiv.org/abs/1904.01906), the training and evaluation result of these above text recognition (using MJSynth and SynthText for training, evaluate on IIIT, SVT, IC03, IC13, IC15, SVTP, CUTE) is as follow:
@@ -96,10 +99,10 @@ Refer to [DTRB](https://arxiv.org/abs/1904.01906), the training and evaluation r
 |SVTR|SVTR-Tiny| 89.25% | rec_svtr_tiny_none_ctc_en | [trained model](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/rec_svtr_tiny_none_ctc_en_train.tar) |
 |ViTSTR|ViTSTR| 79.82% | rec_vitstr_none_ce | [trained model](https://paddleocr.bj.bcebos.com/rec_vitstr_none_none_train.tar) |
 |ABINet|Resnet45| 90.75% | rec_r45_abinet | [trained model](https://paddleocr.bj.bcebos.com/rec_r45_abinet_train.tar) |
-|VisionLAN|Resnet45| 90.30% | rec_r45_visionlan | [trained model](https://paddleocr.bj.bcebos.com/rec_r45_visionlan_train.tar) |
+|VisionLAN|Resnet45| 90.30% | rec_r45_visionlan | [trained model](https://paddleocr.bj.bcebos.com/VisionLAN/rec_r45_visionlan_train.tar) |
 |SPIN|ResNet32| 90.00% | rec_r32_gaspin_bilstm_att | [trained model](https://paddleocr.bj.bcebos.com/contribution/rec_r32_gaspin_bilstm_att.tar) |
 |RobustScanner|ResNet31| 87.77% | rec_r31_robustscanner | [trained model](https://paddleocr.bj.bcebos.com/contribution/rec_r31_robustscanner.tar)|
+|RFL|ResNetRFL| 88.63% | rec_resnet_rfl_att | [trained model](https://paddleocr.bj.bcebos.com/contribution/rec_resnet_rfl_att_train.tar) |
 <a name="2"></a>

--- a/doc/doc_en/algorithm_rec_can_en.md
+++ b/doc/doc_en/algorithm_rec_can_en.md
+# CAN
+- [1. Introduction](#1)
+- [2. Environment](#2)
+- [3. Model Training / Evaluation / Prediction](#3)
+    - [3.1 Training](#3-1)
+    - [3.2 Evaluation](#3-2)
+    - [3.3 Prediction](#3-3)
+- [4. Inference and Deployment](#4)
+    - [4.1 Python Inference](#4-1)
+    - [4.2 C++ Inference](#4-2)
+    - [4.3 Serving](#4-3)
+    - [4.4 More](#4-4)
+- [5. FAQ](#5)
+<a name="1"></a>
+## 1. Introduction
+Paper:
+> [When Counting Meets HMER: Counting-Aware Network for Handwritten Mathematical Expression Recognition](https://arxiv.org/abs/2207.11463)
+> Bohan Li, Ye Yuan, Dingkang Liang, Xiao Liu, Zhilong Ji, Jinfeng Bai, Wenyu Liu, Xiang Bai
+> ECCV, 2022
+Using CROHME handwrittem mathematical expression recognition datasets for training, and evaluating on its test sets, the algorithm reproduction effect is as follows:
+|Model|Backbone|config|exprate|Download link|
+| --- | --- | --- | --- | --- |
+|CAN|DenseNet|[rec_d28_can.yml](../../configs/rec/rec_d28_can.yml)|51.72|[trained model](https://paddleocr.bj.bcebos.com/contribution/rec_d28_can_train.tar)|
+<a name="2"></a>
+## 2. Environment
+Please refer to ["Environment Preparation"](./environment_en.md) to configure the PaddleOCR environment, and refer to ["Project Clone"](./clone_en.md) to clone the project code.
+<a name="3"></a>
+## 3. Model Training / Evaluation / Prediction
+Please refer to [Text Recognition Tutorial](./recognition_en.md). PaddleOCR modularizes the code, and training different recognition models only requires **changing the configuration file**.
+Training:
+Specifically, after the data preparation is completed, the training can be started. The training command is as follows:
+```
+#Single GPU training (long training period, not recommended)
+python3 tools/train.py -c configs/rec/rec_d28_can.yml
+#Multi GPU training, specify the gpu number through the --gpus parameter
+python3 -m paddle.distributed.launch --gpus '0,1,2,3'  tools/train.py -c configs/rec/rec_d28_can.yml
+```
+Evaluation:
+```
+# GPU evaluation
+python3 -m paddle.distributed.launch --gpus '0' tools/eval.py -c configs/rec/rec_d28_can.yml -o Global.pretrained_model=./rec_d28_can_train/CAN
+```
+Prediction:
+```
+# The configuration file used for prediction must match the training
+python3 tools/infer_rec.py -c configs/rec/rec_d28_can.yml -o Architecture.Head.attdecoder.is_train=False Global.infer_img='./doc/crohme_demo/hme_00.jpg' Global.pretrained_model=./rec_d28_can_train/CAN
+```
+<a name="4"></a>
+## 4. Inference and Deployment
+<a name="4-1"></a>
+### 4.1 Python Inference
+First, the model saved during the CAN handwritten mathematical expression recognition training process is converted into an inference model. you can use the following command to convert:
+```
+python3 tools/export_model.py -c configs/rec/rec_d28_can.yml -o Global.save_inference_dir=./inference/rec_d28_can/ Architecture.Head.attdecoder.is_train=False
+# The default output max length of the model is 36. If you need to predict a longer sequence, please specify its output sequence as an appropriate value when exporting the model, as: Architecture.Head.max_ text_ length=72
+```
+For CAN handwritten mathematical expression recognition model inference, the following commands can be executed:
+```
+python3 tools/infer/predict_rec.py --image_dir="./doc/crohme_demo/hme_00.jpg" --rec_algorithm="CAN" --rec_batch_num=1 --rec_model_dir="./inference/rec_d28_can/" --rec_char_dict_path="./ppocr/utils/dict/latex_symbol_dict.txt"
+# If you need to predict on a picture with black characters on a white background, please set: -- rec_ image_ inverse=False
+```
+<a name="4-2"></a>
+### 4.2 C++ Inference
+Not supported
+<a name="4-3"></a>
+### 4.3 Serving
+Not supported
+<a name="4-4"></a>
+### 4.4 More
+Not supported
+<a name="5"></a>
+## 5. FAQ
+## Citation
+```bibtex
+@misc{https://doi.org/10.48550/arxiv.2207.11463,
+  doi = {10.48550/ARXIV.2207.11463},
+  url = {https://arxiv.org/abs/2207.11463},
+  author = {Li, Bohan and Yuan, Ye and Liang, Dingkang and Liu, Xiao and Ji, Zhilong and Bai, Jinfeng and Liu, Wenyu and Bai, Xiang},
+  keywords = {Computer Vision and Pattern Recognition (cs.CV), Artificial Intelligence (cs.AI), FOS: Computer and information sciences, FOS: Computer and information sciences},
+  title = {When Counting Meets HMER: Counting-Aware Network for Handwritten Mathematical Expression Recognition},
+  publisher = {arXiv},
+  year = {2022},
+  copyright = {arXiv.org perpetual, non-exclusive license}
+}
+```
--- a/doc/doc_en/algorithm_rec_rfl_en.md
+++ b/doc/doc_en/algorithm_rec_rfl_en.md
+# RFL
+- [1. Introduction](#1)
+- [2. Environment](#2)
+- [3. Model Training / Evaluation / Prediction](#3)
+    - [3.1 Training](#3-1)
+    - [3.2 Evaluation](#3-2)
+    - [3.3 Prediction](#3-3)
+- [4. Inference and Deployment](#4)
+    - [4.1 Python Inference](#4-1)
+    - [4.2 C++ Inference](#4-2)
+    - [4.3 Serving](#4-3)
+    - [4.4 More](#4-4)
+- [5. FAQ](#5)
+<a name="1"></a>
+## 1. Introduction
+Paper:
+> [Reciprocal Feature Learning via Explicit and Implicit Tasks in Scene Text Recognition](https://arxiv.org/abs/2105.06229.pdf)
+> Hui Jiang, Yunlu Xu, Zhanzhan Cheng, Shiliang Pu, Yi Niu, Wenqi Ren, Fei Wu, and Wenming Tan
+> ICDAR, 2021
+Using MJSynth and SynthText two text recognition datasets for training, and evaluating on IIIT, SVT, IC03, IC13, IC15, SVTP, CUTE datasets, the algorithm reproduction effect is as follows:
+|Model|Backbone|config|Acc|Download link|
+| --- | --- | --- | --- | --- |
+|RFL-CNT|ResNetRFL|[rec_resnet_rfl_visual.yml](../../configs/rec/rec_resnet_rfl_visual.yml)|93.40%|[训练模型](https://paddleocr.bj.bcebos.com/contribution/rec_resnet_rfl_visual_train.tar)|
+|RFL-Att|ResNetRFL|[rec_resnet_rfl_att.yml](../../configs/rec/rec_resnet_rfl_att.yml)|88.63%|[训练模型](https://paddleocr.bj.bcebos.com/contribution/rec_resnet_rfl_att_train.tar)|
+<a name="2"></a>
+## 2. Environment
+Please refer to ["Environment Preparation"](./environment_en.md) to configure the PaddleOCR environment, and refer to ["Project Clone"](./clone_en.md) to clone the project code.
+<a name="3"></a>
+## 3. Model Training / Evaluation / Prediction
+PaddleOCR modularizes the code, and training different recognition models only requires **changing the configuration file**.
+Training:
+Specifically, after the data preparation is completed, the training can be started. The training command is as follows:
+```
+#step1:train the CNT branch
+#Single GPU training (long training period, not recommended)
+python3 tools/train.py -c configs/rec/rec_resnet_rfl_visual.yml
+#Multi GPU training, specify the gpu number through the --gpus parameter
+python3 -m paddle.distributed.launch --gpus '0,1,2,3'  tools/train.py -c configs/rec/rec_resnet_rfl_visual.yml
+#step2:joint training of CNT and Att branches
+#Single GPU training (long training period, not recommended)
+python3 tools/train.py -c configs/rec/rec_resnet_rfl_att.yml -o Global.pretrained_model={path/to/weights}/best_accuracy
+#Multi GPU training, specify the gpu number through the --gpus parameter
+python3 -m paddle.distributed.launch --gpus '0,1,2,3'  tools/train.py -c configs/rec/rec_resnet_rfl_att.yml  -o Global.pretrained_model={path/to/weights}/best_accuracy
+```
+Evaluation:
+```
+# GPU evaluation
+python3 -m paddle.distributed.launch --gpus '0' tools/eval.py -c configs/rec/rec_resnet_rfl_att.yml -o Global.pretrained_model={path/to/weights}/best_accuracy
+```
+Prediction:
+```
+# The configuration file used for prediction must match the training
+python3 tools/infer_rec.py -c configs/rec/rec_resnet_rfl_att.yml -o Global.infer_img='./doc/imgs_words_en/word_10.png' Global.pretrained_model={path/to/weights}/best_accuracy
+```
+<a name="4"></a>
+## 4. Inference and Deployment
+<a name="4-1"></a>
+### 4.1 Python Inference
+First, the model saved during the RFL text recognition training process is converted into an inference model. ( [Model download link](https://paddleocr.bj.bcebos.com/contribution/rec_resnet_rfl.tar)) ), you can use the following command to convert:
+```
+python3 tools/export_model.py -c configs/rec/rec_resnet_rfl_att.yml -o Global.pretrained_model={path/to/weights}/best_accuracy  Global.save_inference_dir=./inference/rec_resnet_rfl_att
+```
+**Note:**
+- If you are training the model on your own dataset and have modified the dictionary file, please pay attention to modify the `character_dict_path` in the configuration file to the modified dictionary file.
+- If you modified the input size during training, please modify the `infer_shape` corresponding to NRTR in the `tools/export_model.py` file.
+After the conversion is successful, there are three files in the directory:
+```
+/inference/rec_resnet_rfl_att/
+    ├── inference.pdiparams
+    ├── inference.pdiparams.info
+    └── inference.pdmodel
+```
+For RFL text recognition model inference, the following commands can be executed:
+```
+python3 tools/infer/predict_rec.py --image_dir='./doc/imgs_words_en/word_10.png' --rec_model_dir='./inference/rec_resnet_rfl_att/' --rec_algorithm='RFL' --rec_image_shape='1,32,100'
+```
+![](../imgs_words_en/word_10.png)
+After executing the command, the prediction result (recognized text and score) of the image above is printed to the screen, an example is as follows:
+The result is as follows:
+```shell
+Predicts of ./doc/imgs_words_en/word_10.png:('pain', 0.9999927282333374)
+```
+<a name="4-2"></a>
+### 4.2 C++ Inference
+Not supported
+<a name="4-3"></a>
+### 4.3 Serving
+Not supported
+<a name="4-4"></a>
+### 4.4 More
+Not supported
+<a name="5"></a>
+## 5. FAQ
+## Citation
+```bibtex
+@article{2021Reciprocal,
+  title     = {Reciprocal Feature Learning via Explicit and Implicit Tasks in Scene Text Recognition},
+  author    = {Jiang, H.  and  Xu, Y.  and  Cheng, Z.  and  Pu, S.  and  Niu, Y.  and  Ren, W.  and  Wu, F.  and  Tan, W. },
+  booktitle = {ICDAR},
+  year      = {2021},
+  url       = {https://arxiv.org/abs/2105.06229}
+}
+```
--- a/doc/doc_en/algorithm_rec_visionlan_en.md
+++ b/doc/doc_en/algorithm_rec_visionlan_en.md
@@ -25,7 +25,7 @@ Using MJSynth and SynthText two text recognition datasets for training, and eval
 |Model|Backbone|config|Acc|Download link|
 | --- | --- | --- | --- | --- |
-|VisionLAN|ResNet45|[rec_r45_visionlan.yml](../../configs/rec/rec_r45_visionlan.yml)|90.3%|[预训练、训练模型](https://paddleocr.bj.bcebos.com/rec_r45_visionlan_train.tar)|
+|VisionLAN|ResNet45|[rec_r45_visionlan.yml](../../configs/rec/rec_r45_visionlan.yml)|90.3%|[预训练、训练模型](https://paddleocr.bj.bcebos.com/VisionLAN/rec_r45_visionlan_train.tar)|
 <a name="2"></a>
 ## 2. Environment
@@ -68,7 +68,7 @@ python3 tools/infer_rec.py -c configs/rec/rec_r45_visionlan.yml -o Global.infer_
 <a name="4-1"></a>
 ### 4.1 Python Inference
-First, the model saved during the VisionLAN text recognition training process is converted into an inference model. ( [Model download link](https://paddleocr.bj.bcebos.com/rec_r45_visionlan_train.tar)) ), you can use the following command to convert:
+First, the model saved during the VisionLAN text recognition training process is converted into an inference model. ( [Model download link](https://paddleocr.bj.bcebos.com/VisionLAN/rec_r45_visionlan_train.tar)) ), you can use the following command to convert:
 ```
 python3 tools/export_model.py -c configs/rec/rec_r45_visionlan.yml -o Global.pretrained_model=./rec_r45_visionlan_train/best_accuracy Global.save_inference_dir=./inference/rec_r45_visionlan/
@@ -120,7 +120,7 @@ Not supported
 ## 5. FAQ
 1. Note that the MJSynth and SynthText datasets come from [VisionLAN repo](https://github.com/wangyuxin87/VisionLAN).
-2. We use the pre-trained model provided by the VisionLAN authors for finetune training.
+2. We use the pre-trained model provided by the VisionLAN authors for finetune training. The dictionary for the pre-trained model is 'ppocr/utils/ic15_dict.txt'.
 ## Citation

--- a/doc/doc_en/algorithm_sr_telescope_en.md
+++ b/doc/doc_en/algorithm_sr_telescope_en.md
+# Text Gestalt
+- [1. Introduction](#1)
+- [2. Environment](#2)
+- [3. Model Training / Evaluation / Prediction](#3)
+    - [3.1 Training](#3-1)
+    - [3.2 Evaluation](#3-2)
+    - [3.3 Prediction](#3-3)
+- [4. Inference and Deployment](#4)
+    - [4.1 Python Inference](#4-1)
+    - [4.2 C++ Inference](#4-2)
+    - [4.3 Serving](#4-3)
+    - [4.4 More](#4-4)
+- [5. FAQ](#5)
+<a name="1"></a>
+## 1. Introduction
+Paper:
+> [Scene Text Telescope: Text-Focused Scene Image Super-Resolution](https://openaccess.thecvf.com/content/CVPR2021/papers/Chen_Scene_Text_Telescope_Text-Focused_Scene_Image_Super-Resolution_CVPR_2021_paper.pdf)
+> Chen, Jingye, Bin Li, and Xiangyang Xue
+> CVPR, 2021
+Referring to the [FudanOCR](https://github.com/FudanVI/FudanOCR/tree/main/scene-text-telescope) data download instructions, the effect of the super-score algorithm on the TextZoom test set is as follows:
+|Model|Backbone|config|Acc|Download link|
+|---|---|---|---|---|---|
+|Text Gestalt|tsrn|21.56|0.7411| [configs/sr/sr_telescope.yml](../../configs/sr/sr_telescope.yml)|[train model](https://paddleocr.bj.bcebos.com/contribution/sr_telescope_train.tar)|
+The [TextZoom dataset](https://paddleocr.bj.bcebos.com/dataset/TextZoom.tar) comes from two superfraction data sets, RealSR and SR-RAW, both of which contain LR-HR pairs. TextZoom has 17367 pairs of training data and 4373 pairs of test data.
+<a name="2"></a>
+## 2. Environment
+Please refer to ["Environment Preparation"](./environment_en.md) to configure the PaddleOCR environment, and refer to ["Project Clone"](./clone_en.md) to clone the project code.
+<a name="3"></a>
+## 3. Model Training / Evaluation / Prediction
+Please refer to [Text Recognition Tutorial](./recognition_en.md). PaddleOCR modularizes the code, and training different models only requires **changing the configuration file**.
+Training:
+Specifically, after the data preparation is completed, the training can be started. The training command is as follows:
+```
+#Single GPU training (long training period, not recommended)
+python3 tools/train.py -c configs/sr/sr_telescope.yml
+#Multi GPU training, specify the gpu number through the --gpus parameter
+python3 -m paddle.distributed.launch --gpus '0,1,2,3'  tools/train.py -c configs/sr/sr_telescope.yml
+```
+Evaluation:
+```
+# GPU evaluation
+python3 -m paddle.distributed.launch --gpus '0' tools/eval.py -c configs/sr/sr_telescope.yml -o Global.pretrained_model={path/to/weights}/best_accuracy
+```
+Prediction:
+```
+# The configuration file used for prediction must match the training
+python3 tools/infer_sr.py -c configs/sr/sr_telescope.yml -o Global.pretrained_model={path/to/weights}/best_accuracy Global.infer_img=doc/imgs_words_en/word_52.png
+```
+![](../imgs_words_en/word_52.png)
+After executing the command, the super-resolution result of the above image is as follows:
+![](../imgs_results/sr_word_52.png)
+<a name="4"></a>
+## 4. Inference and Deployment
+<a name="4-1"></a>
+### 4.1 Python Inference
+First, the model saved during the training process is converted into an inference model. ( [Model download link](https://paddleocr.bj.bcebos.com/contribution/Telescope_train.tar.gz) ), you can use the following command to convert:
+```shell
+python3 tools/export_model.py -c configs/sr/sr_telescope.yml -o Global.pretrained_model={path/to/weights}/best_accuracy Global.save_inference_dir=./inference/sr_out
+```
+For Text-Telescope super-resolution model inference, the following commands can be executed:
+```
+python3 tools/infer/predict_sr.py --sr_model_dir=./inference/sr_out --image_dir=doc/imgs_words_en/word_52.png --sr_image_shape=3,32,128
+```
+After executing the command, the super-resolution result of the above image is as follows:
+![](../imgs_results/sr_word_52.png)
+<a name="4-2"></a>
+### 4.2 C++ Inference
+Not supported
+<a name="4-3"></a>
+### 4.3 Serving
+Not supported
+<a name="4-4"></a>
+### 4.4 More
+Not supported
+<a name="5"></a>
+## 5. FAQ
+## Citation
+```bibtex
+@INPROCEEDINGS{9578891,
+  author={Chen, Jingye and Li, Bin and Xue, Xiangyang},
+  booktitle={2021 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
+  title={Scene Text Telescope: Text-Focused Scene Image Super-Resolution},
+  year={2021},
+  volume={},
+  number={},
+  pages={12021-12030},
+  doi={10.1109/CVPR46437.2021.01185}}
+```
--- a/doc/doc_en/inference_ppocr_en.md
+++ b/doc/doc_en/inference_ppocr_en.md
@@ -12,6 +12,7 @@ This article introduces the use of the Python inference engine for the PP-OCR mo
    - [3. Multilingual Model Inference](#3-multilingual-model-inference)
  - [Angle Classification Model Inference](#angle-classification-model-inference)
  - [Text Detection Angle Classification and Recognition Inference Concatenation](#text-detection-angle-classification-and-recognition-inference-concatenation)
+  - [TensorRT Inference](TensorRT-Inference)
 <a name="DETECTION_MODEL_INFERENCE"></a>
@@ -84,9 +85,9 @@ For English recognition model inference, you can execute the following commands,
 ```
 # download en model：
-wget https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_det_infer.tar
+wget https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_rec_infer.tar
-tar xf en_PP-OCRv3_det_infer.tar
+tar xf en_PP-OCRv3_rec_infer.tar
-python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/en/word_1.png" --rec_model_dir="./en_PP-OCRv3_det_infer/" --rec_char_dict_path="ppocr/utils/en_dict.txt"
+python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/en/word_1.png" --rec_model_dir="./en_PP-OCRv3_rec_infer/" --rec_char_dict_path="ppocr/utils/en_dict.txt"
 ```
 ![](../imgs_words/en/word_1.png)
@@ -163,3 +164,34 @@ After executing the command, the recognition result image is as follows:
 ![](../imgs_results/system_res_00018069_v3.jpg)
 For more configuration and explanation of inference parameters, please refer to：[Model Inference Parameters Explained Tutorial](./inference_args_en.md)。
+## TensorRT Inference
+Paddle Inference ensembles TensorRT using subgraph mode. For GPU deployment scenarios, TensorRT can optimize some subgraphs, including horizontal and vertical integration of OPs, filter redundant OPs, and automatically select the optimal OP kernels for to speed up inference.
+You need to do the following 2 steps for inference using TRT.
+* (1) Collect the dynamic shape information of the model about a specific dataset and store it in a file.
+* (2) Load the dynamic shape information file for TRT inference.
+Taking the text detection model as an example. Firstly, you can use the following command to generate a dynamic shape file, which will eventually be named as `det_trt_dynamic_shape.txt` and stored in the `ch_PP-OCRv3_det_infer` folder.
+```bash
+python3 tools/infer/predict_det.py --image_dir="./doc/imgs/1.jpg" --det_model_dir="./ch_PP-OCRv3_det_infer/" --use_tensorrt=True
+```
+The above command is only used to collect dynamic shape information, and TRT is not used during inference.
+Then, you can use the following command to perform TRT inference.
+```bash
+python3 tools/infer/predict_det.py --image_dir="./doc/imgs/1.jpg" --det_model_dir="./ch_PP-OCRv3_det_infer/" --use_tensorrt=True
+```
+**Note:**
+* In the first step, if the dynamic shape information file already exists, it does not need to be collected again. If you want to regenerate the dynamic shape information file, you need to delete the dynamic shape information file in the model folder firstly, and then regenerate it.
+* In general, dynamic shape information file only needs to be generated once. In the actual deployment process, it is recommended that the dynamic shape information file can be generated on offline validation set or test set, and then the file can be directly loaded for online TRT inference.
--- a/doc/doc_en/table_recognition_en.md
+++ b/doc/doc_en/table_recognition_en.md
@@ -41,7 +41,7 @@ The json format of each line is:
   'imgid': 0,# index of image
   'html': {
     'structure': {'tokens': ['<thead>', '<tr>', '<td>', ...]}, # HTML string of the table
-     'cell': [
+     'cells': [
       {
         'tokens': ['P', 'a', 'd', 'd', 'l', 'e', 'P', 'a', 'd', 'd', 'l', 'e'], # text in cell
         'bbox': [x0, y0, x1, y1] # bbox of cell

--- a/doc/imgs_results/sr_word_52.png
+++ b/doc/imgs_results/sr_word_52.png
--- a/paddleocr.py
+++ b/paddleocr.py
@@ -47,7 +47,7 @@ __all__ = [
 ]
 SUPPORT_DET_MODEL = ['DB']
-VERSION = '2.6.0.2'
+VERSION = '2.6.0.3'
 SUPPORT_REC_MODEL = ['CRNN', 'SVTR_LCNet']
 BASE_DIR = os.path.expanduser("~/.paddleocr/")
@@ -567,6 +567,7 @@ class PPStructure(StructureSystem):
        assert params.structure_version in SUPPORT_STRUCTURE_MODEL_VERSION, "structure_version must in {}, but get {}".format(
            SUPPORT_STRUCTURE_MODEL_VERSION, params.structure_version)
        params.use_gpu = check_gpu(params.use_gpu)
+        params.mode = 'structure'
        if not params.show_log:
            logger.setLevel(logging.INFO)
@@ -662,6 +663,16 @@ def main():
            if not flag_gif and not flag_pdf:
                img = cv2.imread(img_path)
+            if args.recovery and args.use_pdf2docx_api and flag_pdf:
+                from pdf2docx.converter import Converter
+                docx_file = os.path.join(args.output,
+                                         '{}.docx'.format(img_name))
+                cv = Converter(img_path)
+                cv.convert(docx_file)
+                cv.close()
+                logger.info('docx save to {}'.format(docx_file))
+                continue
            if not flag_pdf:
                if img is None:
                    logger.error("error in loading image:{}".format(img_path))
@@ -697,8 +708,7 @@ def main():
            if args.recovery and all_res != []:
                try:
                    from ppstructure.recovery.recovery_to_doc import convert_info_docx
-                    convert_info_docx(img, all_res, args.output, img_name,
+                    convert_info_docx(img, all_res, args.output, img_name)
-                                      args.save_pdf)
                except Exception as ex:
                    logger.error(
                        "error in layout recovery image:{}, err msg: {}".format(

--- a/ppocr/data/collate_fn.py
+++ b/ppocr/data/collate_fn.py
@@ -70,3 +70,49 @@ class SSLRotateCollate(object):
    def __call__(self, batch):
        output = [np.concatenate(d, axis=0) for d in zip(*batch)]
        return output
+class DyMaskCollator(object):
+    """
+    batch: [
+        image [batch_size, channel, maxHinbatch, maxWinbatch]
+        image_mask [batch_size, channel, maxHinbatch, maxWinbatch]
+        label [batch_size, maxLabelLen]
+        label_mask [batch_size, maxLabelLen]
+        ...
+    ]
+    """
+    def __call__(self, batch):
+        max_width, max_height, max_length = 0, 0, 0
+        bs, channel = len(batch), batch[0][0].shape[0]
+        proper_items = []
+        for item in batch:
+            if item[0].shape[1] * max_width > 1600 * 320 or item[0].shape[
+                    2] * max_height > 1600 * 320:
+                continue
+            max_height = item[0].shape[1] if item[0].shape[
+                1] > max_height else max_height
+            max_width = item[0].shape[2] if item[0].shape[
+                2] > max_width else max_width
+            max_length = len(item[1]) if len(item[
+                1]) > max_length else max_length
+            proper_items.append(item)
+        images, image_masks = np.zeros(
+            (len(proper_items), channel, max_height, max_width),
+            dtype='float32'), np.zeros(
+                (len(proper_items), 1, max_height, max_width), dtype='float32')
+        labels, label_masks = np.zeros(
+            (len(proper_items), max_length), dtype='int64'), np.zeros(
+                (len(proper_items), max_length), dtype='int64')
+        for i in range(len(proper_items)):
+            _, h, w = proper_items[i][0].shape
+            images[i][:, :h, :w] = proper_items[i][0]
+            image_masks[i][:, :h, :w] = 1
+            l = len(proper_items[i][1])
+            labels[i][:l] = proper_items[i][1]
+            label_masks[i][:l] = 1
+        return images, image_masks, labels, label_masks
--- a/ppocr/data/imaug/__init__.py
+++ b/ppocr/data/imaug/__init__.py
@@ -26,7 +26,8 @@ from .make_pse_gt import MakePseGt
 from .rec_img_aug import BaseDataAugmentation, RecAug, RecConAug, RecResizeImg, ClsResizeImg, \
    SRNRecResizeImg, GrayRecResizeImg, SARRecResizeImg, PRENResizeImg, \
-    ABINetRecResizeImg, SVTRRecResizeImg, ABINetRecAug, VLRecResizeImg, SPINRecResizeImg, RobustScannerRecResizeImg
+    ABINetRecResizeImg, SVTRRecResizeImg, ABINetRecAug, VLRecResizeImg, SPINRecResizeImg, RobustScannerRecResizeImg, \
+    RFLRecResizeImg
 from .ssl_img_aug import SSLRotateResize
 from .randaugment import RandAugment
 from .copy_paste import CopyPaste
@@ -44,6 +45,7 @@ from .vqa import *
 from .fce_aug import *
 from .fce_targets import FCENetTargets
 from .ct_process import *
+from .drrg_targets import DRRGTargets
 def transform(data, ops=None):

--- a/ppocr/data/imaug/drrg_targets.py
+++ b/ppocr/data/imaug/drrg_targets.py
--- a/ppocr/data/imaug/label_ops.py
+++ b/ppocr/data/imaug/label_ops.py
@@ -488,6 +488,62 @@ class AttnLabelEncode(BaseRecLabelEncode):
        return idx
+class RFLLabelEncode(BaseRecLabelEncode):
+    """ Convert between text-label and text-index """
+    def __init__(self,
+                 max_text_length,
+                 character_dict_path=None,
+                 use_space_char=False,
+                 **kwargs):
+        super(RFLLabelEncode, self).__init__(
+            max_text_length, character_dict_path, use_space_char)
+    def add_special_char(self, dict_character):
+        self.beg_str = "sos"
+        self.end_str = "eos"
+        dict_character = [self.beg_str] + dict_character + [self.end_str]
+        return dict_character
+    def encode_cnt(self, text):
+        cnt_label = [0.0] * len(self.character)
+        for char_ in text:
+            cnt_label[char_] += 1
+        return np.array(cnt_label)
+    def __call__(self, data):
+        text = data['label']
+        text = self.encode(text)
+        if text is None:
+            return None
+        if len(text) >= self.max_text_len:
+            return None
+        cnt_label = self.encode_cnt(text)
+        data['length'] = np.array(len(text))
+        text = [0] + text + [len(self.character) - 1] + [0] * (self.max_text_len
+                                                               - len(text) - 2)
+        if len(text) != self.max_text_len:
+            return None
+        data['label'] = np.array(text)
+        data['cnt_label'] = cnt_label
+        return data
+    def get_ignored_tokens(self):
+        beg_idx = self.get_beg_end_flag_idx("beg")
+        end_idx = self.get_beg_end_flag_idx("end")
+        return [beg_idx, end_idx]
+    def get_beg_end_flag_idx(self, beg_or_end):
+        if beg_or_end == "beg":
+            idx = np.array(self.dict[self.beg_str])
+        elif beg_or_end == "end":
+            idx = np.array(self.dict[self.end_str])
+        else:
+            assert False, "Unsupport type %s in get_beg_end_flag_idx" \
+                          % beg_or_end
+        return idx
 class SEEDLabelEncode(BaseRecLabelEncode):
    """ Convert between text-label and text-index """
@@ -1089,7 +1145,7 @@ class VQATokenLabelEncode(object):
    def _load_ocr_info(self, data):
        if self.infer_mode:
-            ocr_result = self.ocr_engine.ocr(data['image'], cls=False)
+            ocr_result = self.ocr_engine.ocr(data['image'], cls=False)[0]
            ocr_info = []
            for res in ocr_result:
                ocr_info.append({
@@ -1344,8 +1400,6 @@ class VLLabelEncode(BaseRecLabelEncode):
                 **kwargs):
        super(VLLabelEncode, self).__init__(
            max_text_length, character_dict_path, use_space_char, lower)
-        self.character = self.character[10:] + self.character[
-            1:10] + [self.character[0]]
        self.dict = {}
        for i, char in enumerate(self.character):
            self.dict[char] = i
@@ -1421,3 +1475,32 @@ class CTLabelEncode(object):
        data['polys'] = boxes
        data['texts'] = txts
        return data
+class CANLabelEncode(BaseRecLabelEncode):
+    def __init__(self,
+                 character_dict_path,
+                 max_text_length=100,
+                 use_space_char=False,
+                 lower=True,
+                 **kwargs):
+        super(CANLabelEncode, self).__init__(
+            max_text_length, character_dict_path, use_space_char, lower)
+    def encode(self, text_seq):
+        text_seq_encoded = []
+        for text in text_seq:
+            if text not in self.character:
+                continue
+            text_seq_encoded.append(self.dict.get(text))
+        if len(text_seq_encoded) == 0:
+            return None
+        return text_seq_encoded
+    def __call__(self, data):
+        label = data['label']
+        if isinstance(label, str):
+            label = label.strip().split()
+        label.append(self.end_str)
+        data['label'] = self.encode(label)
+        return data
--- a/ppocr/data/imaug/operators.py
+++ b/ppocr/data/imaug/operators.py
@@ -498,3 +498,27 @@ class ResizeNormalize(object):
        img_numpy = np.array(img).astype("float32")
        img_numpy = img_numpy.transpose((2, 0, 1)) / 255
        return img_numpy
+class GrayImageChannelFormat(object):
+    """
+    format gray scale image's channel: (3,h,w) -> (1,h,w)
+    Args:
+        inverse: inverse gray image 
+    """
+    def __init__(self, inverse=False, **kwargs):
+        self.inverse = inverse
+    def __call__(self, data):
+        img = data['image']
+        img_single_channel = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+        img_expanded = np.expand_dims(img_single_channel, 0)
+        if self.inverse:
+            data['image'] = np.abs(img_expanded - 1)
+        else:
+            data['image'] = img_expanded
+        data['src_image'] = img
+        return data
\ No newline at end of file
--- a/ppocr/data/imaug/rec_img_aug.py
+++ b/ppocr/data/imaug/rec_img_aug.py
@@ -237,6 +237,33 @@ class VLRecResizeImg(object):
        return data
+class RFLRecResizeImg(object):
+    def __init__(self, image_shape, padding=True, interpolation=1, **kwargs):
+        self.image_shape = image_shape
+        self.padding = padding
+        self.interpolation = interpolation
+        if self.interpolation == 0:
+            self.interpolation = cv2.INTER_NEAREST
+        elif self.interpolation == 1:
+            self.interpolation = cv2.INTER_LINEAR
+        elif self.interpolation == 2:
+            self.interpolation = cv2.INTER_CUBIC
+        elif self.interpolation == 3:
+            self.interpolation = cv2.INTER_AREA
+        else:
+            raise Exception("Unsupported interpolation type !!!")
+    def __call__(self, data):
+        img = data['image']
+        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+        norm_img, valid_ratio = resize_norm_img(
+            img, self.image_shape, self.padding, self.interpolation)
+        data['image'] = norm_img
+        data['valid_ratio'] = valid_ratio
+        return data
 class SRNRecResizeImg(object):
    def __init__(self, image_shape, num_heads, max_text_length, **kwargs):
        self.image_shape = image_shape
@@ -414,8 +441,13 @@ class SVTRRecResizeImg(object):
        data['valid_ratio'] = valid_ratio
        return data
 class RobustScannerRecResizeImg(object):
-    def __init__(self, image_shape, max_text_length, width_downsample_ratio=0.25, **kwargs):
+    def __init__(self,
+                 image_shape,
+                 max_text_length,
+                 width_downsample_ratio=0.25,
+                 **kwargs):
        self.image_shape = image_shape
        self.width_downsample_ratio = width_downsample_ratio
        self.max_text_length = max_text_length
@@ -432,6 +464,7 @@ class RobustScannerRecResizeImg(object):
        data['word_positons'] = word_positons
        return data
 def resize_norm_img_sar(img, image_shape, width_downsample_ratio=0.25):
    imgC, imgH, imgW_min, imgW_max = image_shape
    h = img.shape[0]
@@ -467,13 +500,16 @@ def resize_norm_img_sar(img, image_shape, width_downsample_ratio=0.25):
    return padding_im, resize_shape, pad_shape, valid_ratio
-def resize_norm_img(img, image_shape, padding=True):
+def resize_norm_img(img,
+                    image_shape,
+                    padding=True,
+                    interpolation=cv2.INTER_LINEAR):
    imgC, imgH, imgW = image_shape
    h = img.shape[0]
    w = img.shape[1]
    if not padding:
        resized_image = cv2.resize(
-            img, (imgW, imgH), interpolation=cv2.INTER_LINEAR)
+            img, (imgW, imgH), interpolation=interpolation)
        resized_w = imgW
    else:
        ratio = w / float(h)

--- a/ppocr/data/lmdb_dataset.py
+++ b/ppocr/data/lmdb_dataset.py
@@ -40,6 +40,8 @@ class LMDBDataSet(Dataset):
        if self.do_shuffle:
            np.random.shuffle(self.data_idx_order_list)
        self.ops = create_operators(dataset_config['transforms'], global_config)
+        self.ext_op_transform_idx = dataset_config.get("ext_op_transform_idx",
+                                                       1)
        ratio_list = dataset_config.get("ratio_list", [1.0])
        self.need_reset = True in [x < 1 for x in ratio_list]
@@ -92,6 +94,32 @@ class LMDBDataSet(Dataset):
            return None
        return imgori
+    def get_ext_data(self):
+        ext_data_num = 0
+        for op in self.ops:
+            if hasattr(op, 'ext_data_num'):
+                ext_data_num = getattr(op, 'ext_data_num')
+                break
+        load_data_ops = self.ops[:self.ext_op_transform_idx]
+        ext_data = []
+        while len(ext_data) < ext_data_num:
+            lmdb_idx, file_idx = self.data_idx_order_list[np.random.randint(
+                len(self))]
+            lmdb_idx = int(lmdb_idx)
+            file_idx = int(file_idx)
+            sample_info = self.get_lmdb_sample_info(
+                self.lmdb_sets[lmdb_idx]['txn'], file_idx)
+            if sample_info is None:
+                continue
+            img, label = sample_info
+            data = {'image': img, 'label': label}
+            data = transform(data, load_data_ops)
+            if data is None:
+                continue
+            ext_data.append(data)
+        return ext_data
    def get_lmdb_sample_info(self, txn, index):
        label_key = 'label-%09d'.encode() % index
        label = txn.get(label_key)
@@ -112,6 +140,7 @@ class LMDBDataSet(Dataset):
            return self.__getitem__(np.random.randint(self.__len__()))
        img, label = sample_info
        data = {'image': img, 'label': label}
+        data['ext_data'] = self.get_ext_data()
        outs = transform(data, self.ops)
        if outs is None:
            return self.__getitem__(np.random.randint(self.__len__()))

--- a/ppocr/ext_op/__init__.py
+++ b/ppocr/ext_op/__init__.py
+from .roi_align_rotated.roi_align_rotated import RoIAlignRotated
--- a/ppocr/ext_op/roi_align_rotated/roi_align_rotated.cc
+++ b/ppocr/ext_op/roi_align_rotated/roi_align_rotated.cc
+// This code is refer from:
+// https://github.com/open-mmlab/mmcv/blob/master/mmcv/ops/csrc/pytorch/cpu/roi_align_rotated.cpp
+#include <cassert>
+#include <cmath>
+#include <vector>
+#include "paddle/extension.h"
+#define PADDLE_WITH_CUDA
+#define CHECK_INPUT_SAME(x1, x2)                                               \
+  PD_CHECK(x1.place() == x2.place(), "input must be smae pacle.")
+#define CHECK_INPUT_CPU(x) PD_CHECK(x.is_cpu(), #x " must be a CPU Tensor.")
+template <typename T> struct PreCalc {
+  int pos1;
+  int pos2;
+  int pos3;
+  int pos4;
+  T w1;
+  T w2;
+  T w3;
+  T w4;
+};
+template <typename T>
+void pre_calc_for_bilinear_interpolate(
+    const int height, const int width, const int pooled_height,
+    const int pooled_width, const int iy_upper, const int ix_upper,
+    T roi_start_h, T roi_start_w, T bin_size_h, T bin_size_w,
+    int roi_bin_grid_h, int roi_bin_grid_w, T roi_center_h, T roi_center_w,
+    T cos_theta, T sin_theta, std::vector<PreCalc<T>> &pre_calc) {
+  int pre_calc_index = 0;
+  for (int ph = 0; ph < pooled_height; ph++) {
+    for (int pw = 0; pw < pooled_width; pw++) {
+      for (int iy = 0; iy < iy_upper; iy++) {
+        const T yy = roi_start_h + ph * bin_size_h +
+                     static_cast<T>(iy + .5f) * bin_size_h /
+                         static_cast<T>(roi_bin_grid_h); // e.g., 0.5, 1.5
+        for (int ix = 0; ix < ix_upper; ix++) {
+          const T xx = roi_start_w + pw * bin_size_w +
+                       static_cast<T>(ix + .5f) * bin_size_w /
+                           static_cast<T>(roi_bin_grid_w);
+          // Rotate by theta around the center and translate
+          // In image space, (y, x) is the order for Right Handed System,
+          // and this is essentially multiplying the point by a rotation matrix
+          // to rotate it counterclockwise through angle theta.
+          T y = yy * cos_theta - xx * sin_theta + roi_center_h;
+          T x = yy * sin_theta + xx * cos_theta + roi_center_w;
+          // deal with: inverse elements are out of feature map boundary
+          if (y < -1.0 || y > height || x < -1.0 || x > width) {
+            // empty
+            PreCalc<T> pc;
+            pc.pos1 = 0;
+            pc.pos2 = 0;
+            pc.pos3 = 0;
+            pc.pos4 = 0;
+            pc.w1 = 0;
+            pc.w2 = 0;
+            pc.w3 = 0;
+            pc.w4 = 0;
+            pre_calc[pre_calc_index] = pc;
+            pre_calc_index += 1;
+            continue;
+          }
+          if (y < 0) {
+            y = 0;
+          }
+          if (x < 0) {
+            x = 0;
+          }
+          int y_low = (int)y;
+          int x_low = (int)x;
+          int y_high;
+          int x_high;
+          if (y_low >= height - 1) {
+            y_high = y_low = height - 1;
+            y = (T)y_low;
+          } else {
+            y_high = y_low + 1;
+          }
+          if (x_low >= width - 1) {
+            x_high = x_low = width - 1;
+            x = (T)x_low;
+          } else {
+            x_high = x_low + 1;
+          }
+          T ly = y - y_low;
+          T lx = x - x_low;
+          T hy = 1. - ly, hx = 1. - lx;
+          T w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx;
+          // save weights and indices
+          PreCalc<T> pc;
+          pc.pos1 = y_low * width + x_low;
+          pc.pos2 = y_low * width + x_high;
+          pc.pos3 = y_high * width + x_low;
+          pc.pos4 = y_high * width + x_high;
+          pc.w1 = w1;
+          pc.w2 = w2;
+          pc.w3 = w3;
+          pc.w4 = w4;
+          pre_calc[pre_calc_index] = pc;
+          pre_calc_index += 1;
+        }
+      }
+    }
+  }
+}
+template <typename T>
+void roi_align_rotated_cpu_forward(const int nthreads, const T *input,
+                                   const T &spatial_scale, const bool aligned,
+                                   const bool clockwise, const int channels,
+                                   const int height, const int width,
+                                   const int pooled_height,
+                                   const int pooled_width,
+                                   const int sampling_ratio, const T *rois,
+                                   T *output) {
+  int n_rois = nthreads / channels / pooled_width / pooled_height;
+  // (n, c, ph, pw) is an element in the pooled output
+  // can be parallelized using omp
+  // #pragma omp parallel for num_threads(32)
+  for (int n = 0; n < n_rois; n++) {
+    int index_n = n * channels * pooled_width * pooled_height;
+    const T *current_roi = rois + n * 6;
+    int roi_batch_ind = current_roi[0];
+    // Do not use rounding; this implementation detail is critical
+    T offset = aligned ? (T)0.5 : (T)0.0;
+    T roi_center_w = current_roi[1] * spatial_scale - offset;
+    T roi_center_h = current_roi[2] * spatial_scale - offset;
+    T roi_width = current_roi[3] * spatial_scale;
+    T roi_height = current_roi[4] * spatial_scale;
+    T theta = current_roi[5];
+    if (clockwise) {
+      theta = -theta; // If clockwise, the angle needs to be reversed.
+    }
+    T cos_theta = cos(theta);
+    T sin_theta = sin(theta);
+    if (aligned) {
+      assert(roi_width >= 0 && roi_height >= 0);
+    } else { // for backward-compatibility only
+      roi_width = std::max(roi_width, (T)1.);
+      roi_height = std::max(roi_height, (T)1.);
+    }
+    T bin_size_h = static_cast<T>(roi_height) / static_cast<T>(pooled_height);
+    T bin_size_w = static_cast<T>(roi_width) / static_cast<T>(pooled_width);
+    // We use roi_bin_grid to sample the grid and mimic integral
+    int roi_bin_grid_h = (sampling_ratio > 0)
+                             ? sampling_ratio
+                             : ceilf(roi_height / pooled_height); // e.g., = 2
+    int roi_bin_grid_w =
+        (sampling_ratio > 0) ? sampling_ratio : ceilf(roi_width / pooled_width);
+    // We do average (integral) pooling inside a bin
+    const T count = std::max(roi_bin_grid_h * roi_bin_grid_w, 1); // e.g. = 4
+    // we want to precalculate indices and weights shared by all channels,
+    // this is the key point of optimization
+    std::vector<PreCalc<T>> pre_calc(roi_bin_grid_h * roi_bin_grid_w *
+                                     pooled_width * pooled_height);
+    // roi_start_h and roi_start_w are computed wrt the center of RoI (x, y).
+    // Appropriate translation needs to be applied after.
+    T roi_start_h = -roi_height / 2.0;
+    T roi_start_w = -roi_width / 2.0;
+    pre_calc_for_bilinear_interpolate(
+        height, width, pooled_height, pooled_width, roi_bin_grid_h,
+        roi_bin_grid_w, roi_start_h, roi_start_w, bin_size_h, bin_size_w,
+        roi_bin_grid_h, roi_bin_grid_w, roi_center_h, roi_center_w, cos_theta,
+        sin_theta, pre_calc);
+    for (int c = 0; c < channels; c++) {
+      int index_n_c = index_n + c * pooled_width * pooled_height;
+      const T *offset_input =
+          input + (roi_batch_ind * channels + c) * height * width;
+      int pre_calc_index = 0;
+      for (int ph = 0; ph < pooled_height; ph++) {
+        for (int pw = 0; pw < pooled_width; pw++) {
+          int index = index_n_c + ph * pooled_width + pw;
+          T output_val = 0.;
+          for (int iy = 0; iy < roi_bin_grid_h; iy++) {
+            for (int ix = 0; ix < roi_bin_grid_w; ix++) {
+              PreCalc<T> pc = pre_calc[pre_calc_index];
+              output_val += pc.w1 * offset_input[pc.pos1] +
+                            pc.w2 * offset_input[pc.pos2] +
+                            pc.w3 * offset_input[pc.pos3] +
+                            pc.w4 * offset_input[pc.pos4];
+              pre_calc_index += 1;
+            }
+          }
+          output_val /= count;
+          output[index] = output_val;
+        } // for pw
+      }   // for ph
+    }     // for c
+  }       // for n
+}
+template <typename T>
+void bilinear_interpolate_gradient(const int height, const int width, T y, T x,
+                                   T &w1, T &w2, T &w3, T &w4, int &x_low,
+                                   int &x_high, int &y_low, int &y_high) {
+  // deal with cases that inverse elements are out of feature map boundary
+  if (y < -1.0 || y > height || x < -1.0 || x > width) {
+    // empty
+    w1 = w2 = w3 = w4 = 0.;
+    x_low = x_high = y_low = y_high = -1;
+    return;
+  }
+  if (y < 0) {
+    y = 0;
+  }
+  if (x < 0) {
+    x = 0;
+  }
+  y_low = (int)y;
+  x_low = (int)x;
+  if (y_low >= height - 1) {
+    y_high = y_low = height - 1;
+    y = (T)y_low;
+  } else {
+    y_high = y_low + 1;
+  }
+  if (x_low >= width - 1) {
+    x_high = x_low = width - 1;
+    x = (T)x_low;
+  } else {
+    x_high = x_low + 1;
+  }
+  T ly = y - y_low;
+  T lx = x - x_low;
+  T hy = 1. - ly, hx = 1. - lx;
+  // reference in forward
+  // T v1 = input[y_low * width + x_low];
+  // T v2 = input[y_low * width + x_high];
+  // T v3 = input[y_high * width + x_low];
+  // T v4 = input[y_high * width + x_high];
+  // T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);
+  w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx;
+  return;
+}
+template <class T> inline void add(T *address, const T &val) {
+  *address += val;
+}
+template <typename T>
+void roi_align_rotated_cpu_backward(
+    const int nthreads,
+    // may not be contiguous. should index using n_stride, etc
+    const T *grad_output, const T &spatial_scale, const bool aligned,
+    const bool clockwise, const int channels, const int height, const int width,
+    const int pooled_height, const int pooled_width, const int sampling_ratio,
+    T *grad_input, const T *rois, const int n_stride, const int c_stride,
+    const int h_stride, const int w_stride) {
+  for (int index = 0; index < nthreads; index++) {
+    // (n, c, ph, pw) is an element in the pooled output
+    int pw = index % pooled_width;
+    int ph = (index / pooled_width) % pooled_height;
+    int c = (index / pooled_width / pooled_height) % channels;
+    int n = index / pooled_width / pooled_height / channels;
+    const T *current_roi = rois + n * 6;
+    int roi_batch_ind = current_roi[0];
+    // Do not use rounding; this implementation detail is critical
+    T offset = aligned ? (T)0.5 : (T)0.0;
+    T roi_center_w = current_roi[1] * spatial_scale - offset;
+    T roi_center_h = current_roi[2] * spatial_scale - offset;
+    T roi_width = current_roi[3] * spatial_scale;
+    T roi_height = current_roi[4] * spatial_scale;
+    T theta = current_roi[5];
+    if (clockwise) {
+      theta = -theta; // If clockwise, the angle needs to be reversed.
+    }
+    T cos_theta = cos(theta);
+    T sin_theta = sin(theta);
+    if (aligned) {
+      assert(roi_width >= 0 && roi_height >= 0);
+    } else { // for backward-compatibility only
+      roi_width = std::max(roi_width, (T)1.);
+      roi_height = std::max(roi_height, (T)1.);
+    }
+    T bin_size_h = static_cast<T>(roi_height) / static_cast<T>(pooled_height);
+    T bin_size_w = static_cast<T>(roi_width) / static_cast<T>(pooled_width);
+    T *offset_grad_input =
+        grad_input + ((roi_batch_ind * channels + c) * height * width);
+    int output_offset = n * n_stride + c * c_stride;
+    const T *offset_grad_output = grad_output + output_offset;
+    const T grad_output_this_bin =
+        offset_grad_output[ph * h_stride + pw * w_stride];
+    // We use roi_bin_grid to sample the grid and mimic integral
+    int roi_bin_grid_h = (sampling_ratio > 0)
+                             ? sampling_ratio
+                             : ceilf(roi_height / pooled_height); // e.g., = 2
+    int roi_bin_grid_w =
+        (sampling_ratio > 0) ? sampling_ratio : ceilf(roi_width / pooled_width);
+    // roi_start_h and roi_start_w are computed wrt the center of RoI (x, y).
+    // Appropriate translation needs to be applied after.
+    T roi_start_h = -roi_height / 2.0;
+    T roi_start_w = -roi_width / 2.0;
+    // We do average (integral) pooling inside a bin
+    const T count = roi_bin_grid_h * roi_bin_grid_w; // e.g. = 4
+    for (int iy = 0; iy < roi_bin_grid_h; iy++) {
+      const T yy = roi_start_h + ph * bin_size_h +
+                   static_cast<T>(iy + .5f) * bin_size_h /
+                       static_cast<T>(roi_bin_grid_h); // e.g., 0.5, 1.5
+      for (int ix = 0; ix < roi_bin_grid_w; ix++) {
+        const T xx = roi_start_w + pw * bin_size_w +
+                     static_cast<T>(ix + .5f) * bin_size_w /
+                         static_cast<T>(roi_bin_grid_w);
+        // Rotate by theta around the center and translate
+        T y = yy * cos_theta - xx * sin_theta + roi_center_h;
+        T x = yy * sin_theta + xx * cos_theta + roi_center_w;
+        T w1, w2, w3, w4;
+        int x_low, x_high, y_low, y_high;
+        bilinear_interpolate_gradient(height, width, y, x, w1, w2, w3, w4,
+                                      x_low, x_high, y_low, y_high);
+        T g1 = grad_output_this_bin * w1 / count;
+        T g2 = grad_output_this_bin * w2 / count;
+        T g3 = grad_output_this_bin * w3 / count;
+        T g4 = grad_output_this_bin * w4 / count;
+        if (x_low >= 0 && x_high >= 0 && y_low >= 0 && y_high >= 0) {
+          // atomic add is not needed for now since it is single threaded
+          add(offset_grad_input + y_low * width + x_low, static_cast<T>(g1));
+          add(offset_grad_input + y_low * width + x_high, static_cast<T>(g2));
+          add(offset_grad_input + y_high * width + x_low, static_cast<T>(g3));
+          add(offset_grad_input + y_high * width + x_high, static_cast<T>(g4));
+        } // if
+      }   // ix
+    }     // iy
+  }       // for
+} // ROIAlignRotatedBackward
+std::vector<paddle::Tensor>
+RoIAlignRotatedCPUForward(const paddle::Tensor &input,
+                          const paddle::Tensor &rois, int aligned_height,
+                          int aligned_width, float spatial_scale,
+                          int sampling_ratio, bool aligned, bool clockwise) {
+  CHECK_INPUT_CPU(input);
+  CHECK_INPUT_CPU(rois);
+  auto num_rois = rois.shape()[0];
+  auto channels = input.shape()[1];
+  auto height = input.shape()[2];
+  auto width = input.shape()[3];
+  auto output =
+      paddle::empty({num_rois, channels, aligned_height, aligned_width},
+                    input.type(), paddle::CPUPlace());
+  auto output_size = output.numel();
+  PD_DISPATCH_FLOATING_TYPES(
+      input.type(), "roi_align_rotated_cpu_forward", ([&] {
+        roi_align_rotated_cpu_forward<data_t>(
+            output_size, input.data<data_t>(),
+            static_cast<data_t>(spatial_scale), aligned, clockwise, channels,
+            height, width, aligned_height, aligned_width, sampling_ratio,
+            rois.data<data_t>(), output.data<data_t>());
+      }));
+  return {output};
+}
+std::vector<paddle::Tensor> RoIAlignRotatedCPUBackward(
+    const paddle::Tensor &input, const paddle::Tensor &rois,
+    const paddle::Tensor &grad_output, int aligned_height, int aligned_width,
+    float spatial_scale, int sampling_ratio, bool aligned, bool clockwise) {
+  auto batch_size = input.shape()[0];
+  auto channels = input.shape()[1];
+  auto height = input.shape()[2];
+  auto width = input.shape()[3];
+  auto grad_input = paddle::full({batch_size, channels, height, width}, 0.0,
+                                 input.type(), paddle::CPUPlace());
+  // get stride values to ensure indexing into gradients is correct.
+  int n_stride = grad_output.shape()[0];
+  int c_stride = grad_output.shape()[1];
+  int h_stride = grad_output.shape()[2];
+  int w_stride = grad_output.shape()[3];
+  PD_DISPATCH_FLOATING_TYPES(
+      grad_output.type(), "roi_align_rotated_cpu_backward", [&] {
+        roi_align_rotated_cpu_backward<data_t>(
+            grad_output.numel(), grad_output.data<data_t>(),
+            static_cast<data_t>(spatial_scale), aligned, clockwise, channels,
+            height, width, aligned_height, aligned_width, sampling_ratio,
+            grad_input.data<data_t>(), rois.data<data_t>(), n_stride, c_stride,
+            h_stride, w_stride);
+      });
+  return {grad_input};
+}
+#ifdef PADDLE_WITH_CUDA
+std::vector<paddle::Tensor>
+RoIAlignRotatedCUDAForward(const paddle::Tensor &input,
+                           const paddle::Tensor &rois, int aligned_height,
+                           int aligned_width, float spatial_scale,
+                           int sampling_ratio, bool aligned, bool clockwise);
+#endif
+#ifdef PADDLE_WITH_CUDA
+std::vector<paddle::Tensor> RoIAlignRotatedCUDABackward(
+    const paddle::Tensor &input, const paddle::Tensor &rois,
+    const paddle::Tensor &grad_output, int aligned_height, int aligned_width,
+    float spatial_scale, int sampling_ratio, bool aligned, bool clockwise);
+#endif
+std::vector<paddle::Tensor>
+RoIAlignRotatedForward(const paddle::Tensor &input, const paddle::Tensor &rois,
+                       int aligned_height, int aligned_width,
+                       float spatial_scale, int sampling_ratio, bool aligned,
+                       bool clockwise) {
+  CHECK_INPUT_SAME(input, rois);
+  if (input.is_cpu()) {
+    return RoIAlignRotatedCPUForward(input, rois, aligned_height, aligned_width,
+                                     spatial_scale, sampling_ratio, aligned,
+                                     clockwise);
+#ifdef PADDLE_WITH_CUDA
+  } else if (input.is_gpu()) {
+    return RoIAlignRotatedCUDAForward(input, rois, aligned_height,
+                                      aligned_width, spatial_scale,
+                                      sampling_ratio, aligned, clockwise);
+#endif
+  } else {
+    PD_THROW("Unsupported device type for forward function of roi align "
+             "rotated operator.");
+  }
+}
+std::vector<paddle::Tensor>
+RoIAlignRotatedBackward(const paddle::Tensor &input, const paddle::Tensor &rois,
+                        const paddle::Tensor &grad_output, int aligned_height,
+                        int aligned_width, float spatial_scale,
+                        int sampling_ratio, bool aligned, bool clockwise) {
+  CHECK_INPUT_SAME(input, rois);
+  if (input.is_cpu()) {
+    return RoIAlignRotatedCPUBackward(input, rois, grad_output, aligned_height,
+                                      aligned_width, spatial_scale,
+                                      sampling_ratio, aligned, clockwise);
+#ifdef PADDLE_WITH_CUDA
+  } else if (input.is_gpu()) {
+    return RoIAlignRotatedCUDABackward(input, rois, grad_output, aligned_height,
+                                       aligned_width, spatial_scale,
+                                       sampling_ratio, aligned, clockwise);
+#endif
+  } else {
+    PD_THROW("Unsupported device type for forward function of roi align "
+             "rotated operator.");
+  }
+}
+std::vector<std::vector<int64_t>> InferShape(std::vector<int64_t> input_shape,
+                                             std::vector<int64_t> rois_shape) {
+  return {{rois_shape[0], input_shape[1], input_shape[2], input_shape[3]}};
+}
+std::vector<std::vector<int64_t>>
+InferBackShape(std::vector<int64_t> input_shape,
+               std::vector<int64_t> rois_shape) {
+  return {input_shape};
+}
+std::vector<paddle::DataType> InferDtype(paddle::DataType input_dtype,
+                                         paddle::DataType rois_dtype) {
+  return {input_dtype};
+}
+PD_BUILD_OP(roi_align_rotated)
+    .Inputs({"Input", "Rois"})
+    .Outputs({"Output"})
+    .Attrs({"aligned_height: int", "aligned_width: int", "spatial_scale: float",
+            "sampling_ratio: int", "aligned: bool", "clockwise: bool"})
+    .SetKernelFn(PD_KERNEL(RoIAlignRotatedForward))
+    .SetInferShapeFn(PD_INFER_SHAPE(InferShape))
+    .SetInferDtypeFn(PD_INFER_DTYPE(InferDtype));
+PD_BUILD_GRAD_OP(roi_align_rotated)
+    .Inputs({"Input", "Rois", paddle::Grad("Output")})
+    .Attrs({"aligned_height: int", "aligned_width: int", "spatial_scale: float",
+            "sampling_ratio: int", "aligned: bool", "clockwise: bool"})
+    .Outputs({paddle::Grad("Input")})
+    .SetKernelFn(PD_KERNEL(RoIAlignRotatedBackward))
+    .SetInferShapeFn(PD_INFER_SHAPE(InferBackShape));
--- a/ppocr/ext_op/roi_align_rotated/roi_align_rotated.cu
+++ b/ppocr/ext_op/roi_align_rotated/roi_align_rotated.cu
+// This code is refer from:
+// https://github.com/open-mmlab/mmcv/blob/master/mmcv/ops/csrc/common/cuda/roi_align_rotated_cuda_kernel.cuh
+#include <cassert>
+#include <cmath>
+#include <vector>
+#include "paddle/extension.h"
+#include <cuda.h>
+#define CUDA_1D_KERNEL_LOOP(i, n)                                              \
+  for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < (n);                 \
+       i += blockDim.x * gridDim.x)
+#define THREADS_PER_BLOCK 512
+inline int GET_BLOCKS(const int N) {
+  int optimal_block_num = (N + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK;
+  int max_block_num = 4096;
+  return min(optimal_block_num, max_block_num);
+}
+#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 600
+static __inline__ __device__ double atomicAdd(double *address, double val) {
+  unsigned long long int *address_as_ull = (unsigned long long int *)address;
+  unsigned long long int old = *address_as_ull, assumed;
+  if (val == 0.0)
+    return __longlong_as_double(old);
+  do {
+    assumed = old;
+    old = atomicCAS(address_as_ull, assumed,
+                    __double_as_longlong(val + __longlong_as_double(assumed)));
+  } while (assumed != old);
+  return __longlong_as_double(old);
+}
+#endif
+template <typename T>
+__device__ T bilinear_interpolate(const T *input, const int height,
+                                  const int width, T y, T x,
+                                  const int index /* index for debug only*/) {
+  // deal with cases that inverse elements are out of feature map boundary
+  if (y < -1.0 || y > height || x < -1.0 || x > width)
+    return 0;
+  if (y <= 0)
+    y = 0;
+  if (x <= 0)
+    x = 0;
+  int y_low = (int)y;
+  int x_low = (int)x;
+  int y_high;
+  int x_high;
+  if (y_low >= height - 1) {
+    y_high = y_low = height - 1;
+    y = (T)y_low;
+  } else {
+    y_high = y_low + 1;
+  }
+  if (x_low >= width - 1) {
+    x_high = x_low = width - 1;
+    x = (T)x_low;
+  } else {
+    x_high = x_low + 1;
+  }
+  T ly = y - y_low;
+  T lx = x - x_low;
+  T hy = 1. - ly, hx = 1. - lx;
+  // do bilinear interpolation
+  T v1 = input[y_low * width + x_low];
+  T v2 = input[y_low * width + x_high];
+  T v3 = input[y_high * width + x_low];
+  T v4 = input[y_high * width + x_high];
+  T w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx;
+  T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);
+  return val;
+}
+template <typename T>
+__device__ void
+bilinear_interpolate_gradient(const int height, const int width, T y, T x,
+                              T &w1, T &w2, T &w3, T &w4, int &x_low,
+                              int &x_high, int &y_low, int &y_high,
+                              const int index /* index for debug only*/) {
+  // deal with cases that inverse elements are out of feature map boundary
+  if (y < -1.0 || y > height || x < -1.0 || x > width) {
+    // empty
+    w1 = w2 = w3 = w4 = 0.;
+    x_low = x_high = y_low = y_high = -1;
+    return;
+  }
+  if (y <= 0)
+    y = 0;
+  if (x <= 0)
+    x = 0;
+  y_low = (int)y;
+  x_low = (int)x;
+  if (y_low >= height - 1) {
+    y_high = y_low = height - 1;
+    y = (T)y_low;
+  } else {
+    y_high = y_low + 1;
+  }
+  if (x_low >= width - 1) {
+    x_high = x_low = width - 1;
+    x = (T)x_low;
+  } else {
+    x_high = x_low + 1;
+  }
+  T ly = y - y_low;
+  T lx = x - x_low;
+  T hy = 1. - ly, hx = 1. - lx;
+  // reference in forward
+  // T v1 = input[y_low * width + x_low];
+  // T v2 = input[y_low * width + x_high];
+  // T v3 = input[y_high * width + x_low];
+  // T v4 = input[y_high * width + x_high];
+  // T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);
+  w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx;
+  return;
+}
+/*** Forward ***/
+template <typename scalar_t>
+__global__ void roi_align_rotated_cuda_forward_kernel(
+    const int nthreads, const scalar_t *bottom_data,
+    const scalar_t *bottom_rois, const scalar_t spatial_scale,
+    const int sample_num, const bool aligned, const bool clockwise,
+    const int channels, const int height, const int width,
+    const int pooled_height, const int pooled_width, scalar_t *top_data) {
+  CUDA_1D_KERNEL_LOOP(index, nthreads) {
+    // (n, c, ph, pw) is an element in the pooled output
+    int pw = index % pooled_width;
+    int ph = (index / pooled_width) % pooled_height;
+    int c = (index / pooled_width / pooled_height) % channels;
+    int n = index / pooled_width / pooled_height / channels;
+    const scalar_t *offset_bottom_rois = bottom_rois + n * 6;
+    int roi_batch_ind = offset_bottom_rois[0];
+    // Do not using rounding; this implementation detail is critical
+    scalar_t offset = aligned ? (scalar_t)0.5 : (scalar_t)0.0;
+    scalar_t roi_center_w = offset_bottom_rois[1] * spatial_scale - offset;
+    scalar_t roi_center_h = offset_bottom_rois[2] * spatial_scale - offset;
+    scalar_t roi_width = offset_bottom_rois[3] * spatial_scale;
+    scalar_t roi_height = offset_bottom_rois[4] * spatial_scale;
+    // scalar_t theta = offset_bottom_rois[5] * M_PI / 180.0;
+    scalar_t theta = offset_bottom_rois[5];
+    if (clockwise) {
+      theta = -theta; // If clockwise, the angle needs to be reversed.
+    }
+    if (!aligned) { // for backward-compatibility only
+      // Force malformed ROIs to be 1x1
+      roi_width = max(roi_width, (scalar_t)1.);
+      roi_height = max(roi_height, (scalar_t)1.);
+    }
+    scalar_t bin_size_h = static_cast<scalar_t>(roi_height) /
+                          static_cast<scalar_t>(pooled_height);
+    scalar_t bin_size_w =
+        static_cast<scalar_t>(roi_width) / static_cast<scalar_t>(pooled_width);
+    const scalar_t *offset_bottom_data =
+        bottom_data + (roi_batch_ind * channels + c) * height * width;
+    // We use roi_bin_grid to sample the grid and mimic integral
+    int roi_bin_grid_h = (sample_num > 0)
+                             ? sample_num
+                             : ceilf(roi_height / pooled_height); // e.g., = 2
+    int roi_bin_grid_w =
+        (sample_num > 0) ? sample_num : ceilf(roi_width / pooled_width);
+    // roi_start_h and roi_start_w are computed wrt the center of RoI (x, y).
+    // Appropriate translation needs to be applied after.
+    scalar_t roi_start_h = -roi_height / 2.0;
+    scalar_t roi_start_w = -roi_width / 2.0;
+    scalar_t cosscalar_theta = cos(theta);
+    scalar_t sinscalar_theta = sin(theta);
+    // We do average (integral) pooling inside a bin
+    const scalar_t count = max(roi_bin_grid_h * roi_bin_grid_w, 1); // e.g. = 4
+    scalar_t output_val = 0.;
+    for (int iy = 0; iy < roi_bin_grid_h; iy++) { // e.g., iy = 0, 1
+      const scalar_t yy =
+          roi_start_h + ph * bin_size_h +
+          static_cast<scalar_t>(iy + .5f) * bin_size_h /
+              static_cast<scalar_t>(roi_bin_grid_h); // e.g., 0.5, 1.5
+      for (int ix = 0; ix < roi_bin_grid_w; ix++) {
+        const scalar_t xx = roi_start_w + pw * bin_size_w +
+                            static_cast<scalar_t>(ix + .5f) * bin_size_w /
+                                static_cast<scalar_t>(roi_bin_grid_w);
+        // Rotate by theta (counterclockwise) around the center and translate
+        scalar_t y = yy * cosscalar_theta - xx * sinscalar_theta + roi_center_h;
+        scalar_t x = yy * sinscalar_theta + xx * cosscalar_theta + roi_center_w;
+        scalar_t val = bilinear_interpolate<scalar_t>(
+            offset_bottom_data, height, width, y, x, index);
+        output_val += val;
+      }
+    }
+    output_val /= count;
+    top_data[index] = output_val;
+  }
+}
+/*** Backward ***/
+template <typename scalar_t>
+__global__ void roi_align_rotated_backward_cuda_kernel(
+    const int nthreads, const scalar_t *top_diff, const scalar_t *bottom_rois,
+    const scalar_t spatial_scale, const int sample_num, const bool aligned,
+    const bool clockwise, const int channels, const int height, const int width,
+    const int pooled_height, const int pooled_width, scalar_t *bottom_diff) {
+  CUDA_1D_KERNEL_LOOP(index, nthreads) {
+    // (n, c, ph, pw) is an element in the pooled output
+    int pw = index % pooled_width;
+    int ph = (index / pooled_width) % pooled_height;
+    int c = (index / pooled_width / pooled_height) % channels;
+    int n = index / pooled_width / pooled_height / channels;
+    const scalar_t *offset_bottom_rois = bottom_rois + n * 6;
+    int roi_batch_ind = offset_bottom_rois[0];
+    // Do not round
+    scalar_t offset = aligned ? (scalar_t)0.5 : (scalar_t)0.0;
+    scalar_t roi_center_w = offset_bottom_rois[1] * spatial_scale - offset;
+    scalar_t roi_center_h = offset_bottom_rois[2] * spatial_scale - offset;
+    scalar_t roi_width = offset_bottom_rois[3] * spatial_scale;
+    scalar_t roi_height = offset_bottom_rois[4] * spatial_scale;
+    // scalar_t theta = offset_bottom_rois[5] * M_PI / 180.0;
+    scalar_t theta = offset_bottom_rois[5];
+    if (clockwise) {
+      theta = -theta; // If clockwise, the angle needs to be reversed.
+    }
+    if (!aligned) { // for backward-compatibility only
+      // Force malformed ROIs to be 1x1
+      roi_width = max(roi_width, (scalar_t)1.);
+      roi_height = max(roi_height, (scalar_t)1.);
+    }
+    scalar_t bin_size_h = static_cast<scalar_t>(roi_height) /
+                          static_cast<scalar_t>(pooled_height);
+    scalar_t bin_size_w =
+        static_cast<scalar_t>(roi_width) / static_cast<scalar_t>(pooled_width);
+    scalar_t *offset_bottom_diff =
+        bottom_diff + (roi_batch_ind * channels + c) * height * width;
+    int top_offset = (n * channels + c) * pooled_height * pooled_width;
+    const scalar_t *offset_top_diff = top_diff + top_offset;
+    const scalar_t top_diff_this_bin = offset_top_diff[ph * pooled_width + pw];
+    // We use roi_bin_grid to sample the grid and mimic integral
+    int roi_bin_grid_h = (sample_num > 0)
+                             ? sample_num
+                             : ceilf(roi_height / pooled_height); // e.g., = 2
+    int roi_bin_grid_w =
+        (sample_num > 0) ? sample_num : ceilf(roi_width / pooled_width);
+    // roi_start_h and roi_start_w are computed wrt the center of RoI (x, y).
+    // Appropriate translation needs to be applied after.
+    scalar_t roi_start_h = -roi_height / 2.0;
+    scalar_t roi_start_w = -roi_width / 2.0;
+    scalar_t cosTheta = cos(theta);
+    scalar_t sinTheta = sin(theta);
+    // We do average (integral) pooling inside a bin
+    const scalar_t count = roi_bin_grid_h * roi_bin_grid_w; // e.g. = 4
+    for (int iy = 0; iy < roi_bin_grid_h; iy++) { // e.g., iy = 0, 1
+      const scalar_t yy =
+          roi_start_h + ph * bin_size_h +
+          static_cast<scalar_t>(iy + .5f) * bin_size_h /
+              static_cast<scalar_t>(roi_bin_grid_h); // e.g., 0.5, 1.5
+      for (int ix = 0; ix < roi_bin_grid_w; ix++) {
+        const scalar_t xx = roi_start_w + pw * bin_size_w +
+                            static_cast<scalar_t>(ix + .5f) * bin_size_w /
+                                static_cast<scalar_t>(roi_bin_grid_w);
+        // Rotate by theta around the center and translate
+        scalar_t y = yy * cosTheta - xx * sinTheta + roi_center_h;
+        scalar_t x = yy * sinTheta + xx * cosTheta + roi_center_w;
+        scalar_t w1, w2, w3, w4;
+        int x_low, x_high, y_low, y_high;
+        bilinear_interpolate_gradient<scalar_t>(height, width, y, x, w1, w2, w3,
+                                                w4, x_low, x_high, y_low,
+                                                y_high, index);
+        scalar_t g1 = top_diff_this_bin * w1 / count;
+        scalar_t g2 = top_diff_this_bin * w2 / count;
+        scalar_t g3 = top_diff_this_bin * w3 / count;
+        scalar_t g4 = top_diff_this_bin * w4 / count;
+        if (x_low >= 0 && x_high >= 0 && y_low >= 0 && y_high >= 0) {
+          atomicAdd(offset_bottom_diff + y_low * width + x_low, g1);
+          atomicAdd(offset_bottom_diff + y_low * width + x_high, g2);
+          atomicAdd(offset_bottom_diff + y_high * width + x_low, g3);
+          atomicAdd(offset_bottom_diff + y_high * width + x_high, g4);
+        } // if
+      }   // ix
+    }     // iy
+  }       // CUDA_1D_KERNEL_LOOP
+} // RoIAlignBackward
+std::vector<paddle::Tensor>
+RoIAlignRotatedCUDAForward(const paddle::Tensor &input,
+                           const paddle::Tensor &rois, int aligned_height,
+                           int aligned_width, float spatial_scale,
+                           int sampling_ratio, bool aligned, bool clockwise) {
+  auto num_rois = rois.shape()[0];
+  auto channels = input.shape()[1];
+  auto height = input.shape()[2];
+  auto width = input.shape()[3];
+  auto output =
+      paddle::empty({num_rois, channels, aligned_height, aligned_width},
+                    input.type(), paddle::GPUPlace());
+  auto output_size = output.numel();
+  PD_DISPATCH_FLOATING_TYPES(
+      input.type(), "roi_align_rotated_cuda_forward_kernel", ([&] {
+        roi_align_rotated_cuda_forward_kernel<
+            data_t><<<GET_BLOCKS(output_size), THREADS_PER_BLOCK>>>(
+            output_size, input.data<data_t>(), rois.data<data_t>(),
+            static_cast<data_t>(spatial_scale), sampling_ratio, aligned,
+            clockwise, channels, height, width, aligned_height, aligned_width,
+            output.data<data_t>());
+      }));
+  return {output};
+}
+std::vector<paddle::Tensor> RoIAlignRotatedCUDABackward(
+    const paddle::Tensor &input, const paddle::Tensor &rois,
+    const paddle::Tensor &grad_output, int aligned_height, int aligned_width,
+    float spatial_scale, int sampling_ratio, bool aligned, bool clockwise) {
+  auto num_rois = rois.shape()[0];
+  auto batch_size = input.shape()[0];
+  auto channels = input.shape()[1];
+  auto height = input.shape()[2];
+  auto width = input.shape()[3];
+  auto grad_input = paddle::full({batch_size, channels, height, width}, 0.0,
+                                 input.type(), paddle::GPUPlace());
+  const int output_size = num_rois * aligned_height * aligned_width * channels;
+  PD_DISPATCH_FLOATING_TYPES(
+      grad_output.type(), "roi_align_rotated_backward_cuda_kernel", ([&] {
+        roi_align_rotated_backward_cuda_kernel<
+            data_t><<<GET_BLOCKS(output_size), THREADS_PER_BLOCK>>>(
+            output_size, grad_output.data<data_t>(), rois.data<data_t>(),
+            spatial_scale, sampling_ratio, aligned, clockwise, channels, height,
+            width, aligned_height, aligned_width, grad_input.data<data_t>());
+      }));
+  return {grad_input};
+}
\ No newline at end of file
--- a/ppocr/ext_op/roi_align_rotated/roi_align_rotated.py
+++ b/ppocr/ext_op/roi_align_rotated/roi_align_rotated.py
+# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+This code is refer from:
+https://github.com/open-mmlab/mmcv/blob/master/mmcv/ops/roi_align_rotated.py
+"""
+import paddle
+import paddle.nn as nn
+from paddle.utils.cpp_extension import load
+custom_ops = load(
+    name="custom_jit_ops",
+    sources=[
+        "ppocr/ext_op/roi_align_rotated/roi_align_rotated.cc",
+        "ppocr/ext_op/roi_align_rotated/roi_align_rotated.cu"
+    ])
+roi_align_rotated = custom_ops.roi_align_rotated
+class RoIAlignRotated(nn.Layer):
+    """RoI align pooling layer for rotated proposals.
+    """
+    def __init__(self,
+                 out_size,
+                 spatial_scale,
+                 sample_num=0,
+                 aligned=True,
+                 clockwise=False):
+        super(RoIAlignRotated, self).__init__()
+        if isinstance(out_size, int):
+            self.out_h = out_size
+            self.out_w = out_size
+        elif isinstance(out_size, tuple):
+            assert len(out_size) == 2
+            assert isinstance(out_size[0], int)
+            assert isinstance(out_size[1], int)
+            self.out_h, self.out_w = out_size
+        else:
+            raise TypeError(
+                '"out_size" must be an integer or tuple of integers')
+        self.spatial_scale = float(spatial_scale)
+        self.sample_num = int(sample_num)
+        self.aligned = aligned
+        self.clockwise = clockwise
+    def forward(self, feats, rois):
+        output = roi_align_rotated(feats, rois, self.out_h, self.out_w,
+                                   self.spatial_scale, self.sample_num,
+                                   self.aligned, self.clockwise)
+        return output
--- a/ppocr/losses/__init__.py
+++ b/ppocr/losses/__init__.py
@@ -26,6 +26,7 @@ from .det_sast_loss import SASTLoss
 from .det_pse_loss import PSELoss
 from .det_fce_loss import FCELoss
 from .det_ct_loss import CTLoss
+from .det_drrg_loss import DRRGLoss
 # rec loss
 from .rec_ctc_loss import CTCLoss
@@ -38,6 +39,8 @@ from .rec_pren_loss import PRENLoss
 from .rec_multi_loss import MultiLoss
 from .rec_vl_loss import VLLoss
 from .rec_spin_att_loss import SPINAttentionLoss
+from .rec_rfl_loss import RFLLoss
+from .rec_can_loss import CANLoss
 # cls loss
 from .cls_loss import ClsLoss
@@ -60,6 +63,7 @@ from .vqa_token_layoutlm_loss import VQASerTokenLayoutLMLoss
 # sr loss
 from .stroke_focus_loss import StrokeFocusLoss
+from .text_focus_loss import TelescopeLoss
 def build_loss(config):
@@ -69,7 +73,7 @@ def build_loss(config):
        'CELoss', 'TableAttentionLoss', 'SARLoss', 'AsterLoss', 'SDMGRLoss',
        'VQASerTokenLayoutLMLoss', 'LossFromOutput', 'PRENLoss', 'MultiLoss',
        'TableMasterLoss', 'SPINAttentionLoss', 'VLLoss', 'StrokeFocusLoss',
-        'SLALoss', 'CTLoss'
+        'SLALoss', 'CTLoss', 'RFLLoss', 'DRRGLoss', 'CANLoss', 'TelescopeLoss'
    ]
    config = copy.deepcopy(config)
    module_name = config.pop('name')

--- a/ppocr/losses/det_drrg_loss.py
+++ b/ppocr/losses/det_drrg_loss.py
--- a/ppocr/losses/rec_can_loss.py
+++ b/ppocr/losses/rec_can_loss.py
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+This code is refer from:
+https://github.com/LBH1024/CAN/models/can.py
+"""
+import paddle
+import paddle.nn as nn
+import numpy as np
+class CANLoss(nn.Layer):
+    '''
+    CANLoss is consist of two part:
+        word_average_loss: average accuracy of the symbol
+        counting_loss: counting loss of every symbol
+    '''
+    def __init__(self):
+        super(CANLoss, self).__init__()
+        self.use_label_mask = False
+        self.out_channel = 111
+        self.cross = nn.CrossEntropyLoss(
+            reduction='none') if self.use_label_mask else nn.CrossEntropyLoss()
+        self.counting_loss = nn.SmoothL1Loss(reduction='mean')
+        self.ratio = 16
+    def forward(self, preds, batch):
+        word_probs = preds[0]
+        counting_preds = preds[1]
+        counting_preds1 = preds[2]
+        counting_preds2 = preds[3]
+        labels = batch[2]
+        labels_mask = batch[3]
+        counting_labels = gen_counting_label(labels, self.out_channel, True)
+        counting_loss = self.counting_loss(counting_preds1, counting_labels) + self.counting_loss(counting_preds2, counting_labels) \
+                        + self.counting_loss(counting_preds, counting_labels)
+        word_loss = self.cross(
+            paddle.reshape(word_probs, [-1, word_probs.shape[-1]]),
+            paddle.reshape(labels, [-1]))
+        word_average_loss = paddle.sum(
+            paddle.reshape(word_loss * labels_mask, [-1])) / (
+                paddle.sum(labels_mask) + 1e-10
+            ) if self.use_label_mask else word_loss
+        loss = word_average_loss + counting_loss
+        return {'loss': loss}
+def gen_counting_label(labels, channel, tag):
+    b, t = labels.shape
+    counting_labels = np.zeros([b, channel])
+    if tag:
+        ignore = [0, 1, 107, 108, 109, 110]
+    else:
+        ignore = []
+    for i in range(b):
+        for j in range(t):
+            k = labels[i][j]
+            if k in ignore:
+                continue
+            else:
+                counting_labels[i][k] += 1
+    counting_labels = paddle.to_tensor(counting_labels, dtype='float32')
+    return counting_labels
--- a/ppocr/losses/rec_rfl_loss.py
+++ b/ppocr/losses/rec_rfl_loss.py
--- a/ppocr/losses/text_focus_loss.py
+++ b/ppocr/losses/text_focus_loss.py
--- a/ppocr/metrics/__init__.py
+++ b/ppocr/metrics/__init__.py
@@ -22,7 +22,7 @@ import copy
 __all__ = ["build_metric"]
 from .det_metric import DetMetric, DetFCEMetric
-from .rec_metric import RecMetric
+from .rec_metric import RecMetric, CNTMetric, CANMetric
 from .cls_metric import ClsMetric
 from .e2e_metric import E2EMetric
 from .distillation_metric import DistillationMetric
@@ -38,7 +38,7 @@ def build_metric(config):
    support_dict = [
        "DetMetric", "DetFCEMetric", "RecMetric", "ClsMetric", "E2EMetric",
        "DistillationMetric", "TableMetric", 'KIEMetric', 'VQASerTokenMetric',
-        'VQAReTokenMetric', 'SRMetric', 'CTMetric'
+        'VQAReTokenMetric', 'SRMetric', 'CTMetric', 'CNTMetric', 'CANMetric'
    ]
    config = copy.deepcopy(config)

--- a/ppocr/metrics/rec_metric.py
+++ b/ppocr/metrics/rec_metric.py
--- a/ppocr/modeling/backbones/__init__.py
+++ b/ppocr/modeling/backbones/__init__.py
--- a/ppocr/modeling/backbones/rec_densenet.py
+++ b/ppocr/modeling/backbones/rec_densenet.py
--- a/ppocr/modeling/backbones/rec_resnet_rfl.py
+++ b/ppocr/modeling/backbones/rec_resnet_rfl.py
--- a/ppocr/modeling/heads/__init__.py
+++ b/ppocr/modeling/heads/__init__.py
--- a/ppocr/modeling/heads/det_drrg_head.py
+++ b/ppocr/modeling/heads/det_drrg_head.py
--- a/ppocr/modeling/heads/gcn.py
+++ b/ppocr/modeling/heads/gcn.py
--- a/ppocr/modeling/heads/local_graph.py
+++ b/ppocr/modeling/heads/local_graph.py
--- a/ppocr/modeling/heads/proposal_local_graph.py
+++ b/ppocr/modeling/heads/proposal_local_graph.py
--- a/ppocr/modeling/heads/rec_att_head.py
+++ b/ppocr/modeling/heads/rec_att_head.py
--- a/ppocr/modeling/heads/rec_can_head.py
+++ b/ppocr/modeling/heads/rec_can_head.py
--- a/ppocr/modeling/heads/rec_rfl_head.py
+++ b/ppocr/modeling/heads/rec_rfl_head.py
--- a/ppocr/modeling/heads/sr_rensnet_transformer.py
+++ b/ppocr/modeling/heads/sr_rensnet_transformer.py
--- a/ppocr/modeling/necks/__init__.py
+++ b/ppocr/modeling/necks/__init__.py
--- a/ppocr/modeling/necks/fpn_unet.py
+++ b/ppocr/modeling/necks/fpn_unet.py
--- a/ppocr/modeling/necks/rf_adaptor.py
+++ b/ppocr/modeling/necks/rf_adaptor.py
--- a/ppocr/modeling/transforms/__init__.py
+++ b/ppocr/modeling/transforms/__init__.py
--- a/ppocr/modeling/transforms/tbsrn.py
+++ b/ppocr/modeling/transforms/tbsrn.py
--- a/ppocr/optimizer/__init__.py
+++ b/ppocr/optimizer/__init__.py
@@ -53,6 +53,9 @@ def build_optimizer(config, epochs, step_each_epoch, model):
    if 'clip_norm' in config:
        clip_norm = config.pop('clip_norm')
        grad_clip = paddle.nn.ClipGradByNorm(clip_norm=clip_norm)
+    elif 'clip_norm_global' in config:
+        clip_norm = config.pop('clip_norm_global')
+        grad_clip = paddle.nn.ClipGradByGlobalNorm(clip_norm=clip_norm)
    else:
        grad_clip = None
    optim = getattr(optimizer, optim_name)(learning_rate=lr,

--- a/ppocr/optimizer/learning_rate.py
+++ b/ppocr/optimizer/learning_rate.py
--- a/ppocr/optimizer/lr_scheduler.py
+++ b/ppocr/optimizer/lr_scheduler.py
--- a/ppocr/postprocess/__init__.py
+++ b/ppocr/postprocess/__init__.py
--- a/ppocr/postprocess/drrg_postprocess.py
+++ b/ppocr/postprocess/drrg_postprocess.py
--- a/ppocr/postprocess/rec_postprocess.py
+++ b/ppocr/postprocess/rec_postprocess.py
--- a/ppocr/utils/dict/confuse.pkl
+++ b/ppocr/utils/dict/confuse.pkl
--- a/ppocr/utils/dict/latex_symbol_dict.txt
+++ b/ppocr/utils/dict/latex_symbol_dict.txt
--- a/ppstructure/README.md
+++ b/ppstructure/README.md
--- a/ppstructure/README_ch.md
+++ b/ppstructure/README_ch.md
--- a/ppstructure/docs/PP-Structurev2_introduction.md
+++ b/ppstructure/docs/PP-Structurev2_introduction.md
--- a/ppstructure/docs/inference.md
+++ b/ppstructure/docs/inference.md
--- a/ppstructure/docs/inference_en.md
+++ b/ppstructure/docs/inference_en.md
--- a/ppstructure/docs/ppstructurev2_pipeline.png
+++ b/ppstructure/docs/ppstructurev2_pipeline.png
--- a/ppstructure/docs/quickstart.md
+++ b/ppstructure/docs/quickstart.md
--- a/ppstructure/docs/quickstart_en.md
+++ b/ppstructure/docs/quickstart_en.md
--- a/ppstructure/kie/how_to_do_kie.md
+++ b/ppstructure/kie/how_to_do_kie.md
--- a/ppstructure/kie/how_to_do_kie_en.md
+++ b/ppstructure/kie/how_to_do_kie_en.md
--- a/ppstructure/kie/predict_kie_token_ser_re.py
+++ b/ppstructure/kie/predict_kie_token_ser_re.py
--- a/ppstructure/kie/requirements.txt
+++ b/ppstructure/kie/requirements.txt
--- a/ppstructure/pdf2word/pdf2word.py
+++ b/ppstructure/pdf2word/pdf2word.py
--- a/ppstructure/predict_system.py
+++ b/ppstructure/predict_system.py
--- a/ppstructure/recovery/README.md
+++ b/ppstructure/recovery/README.md
--- a/ppstructure/recovery/README_ch.md
+++ b/ppstructure/recovery/README_ch.md
--- a/ppstructure/recovery/requirements.txt
+++ b/ppstructure/recovery/requirements.txt
--- a/ppstructure/table/README.md
+++ b/ppstructure/table/README.md
--- a/ppstructure/table/README_ch.md
+++ b/ppstructure/table/README_ch.md
--- a/ppstructure/table/predict_structure.py
+++ b/ppstructure/table/predict_structure.py
--- a/ppstructure/table/predict_table.py
+++ b/ppstructure/table/predict_table.py
--- a/ppstructure/utility.py
+++ b/ppstructure/utility.py
--- a/requirements.txt
+++ b/requirements.txt
--- a/setup.py
+++ b/setup.py
--- a/test_tipc/configs/layoutxlm_ser/train_pact_infer_python.txt
+++ b/test_tipc/configs/layoutxlm_ser/train_pact_infer_python.txt
--- a/test_tipc/configs/rec_d28_can/rec_d28_can.yml
+++ b/test_tipc/configs/rec_d28_can/rec_d28_can.yml
--- a/test_tipc/configs/rec_d28_can/train_infer_python.txt
+++ b/test_tipc/configs/rec_d28_can/train_infer_python.txt
--- a/test_tipc/configs/rec_resnet_rfl/rec_resnet_rfl.yml
+++ b/test_tipc/configs/rec_resnet_rfl/rec_resnet_rfl.yml
--- a/test_tipc/configs/rec_resnet_rfl/train_infer_python.txt
+++ b/test_tipc/configs/rec_resnet_rfl/train_infer_python.txt
--- a/test_tipc/configs/slanet/SLANet.yml
+++ b/test_tipc/configs/slanet/SLANet.yml
--- a/test_tipc/configs/slanet/train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt
+++ b/test_tipc/configs/slanet/train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt
--- a/test_tipc/configs/slanet/train_pact_infer_python.txt
+++ b/test_tipc/configs/slanet/train_pact_infer_python.txt
--- a/test_tipc/configs/slanet/train_ptq_infer_python.txt
+++ b/test_tipc/configs/slanet/train_ptq_infer_python.txt
--- a/test_tipc/configs/sr_telescope/sr_telescope.yml
+++ b/test_tipc/configs/sr_telescope/sr_telescope.yml
--- a/test_tipc/configs/sr_telescope/train_infer_python.txt
+++ b/test_tipc/configs/sr_telescope/train_infer_python.txt
--- a/test_tipc/configs/vi_layoutxlm_ser/train_linux_gpu_fleet_normal_infer_python_linux_gpu_cpu.txt
+++ b/test_tipc/configs/vi_layoutxlm_ser/train_linux_gpu_fleet_normal_infer_python_linux_gpu_cpu.txt
--- a/test_tipc/configs/vi_layoutxlm_ser/train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt
+++ b/test_tipc/configs/vi_layoutxlm_ser/train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt
--- a/test_tipc/configs/vi_layoutxlm_ser/train_pact_infer_python.txt
+++ b/test_tipc/configs/vi_layoutxlm_ser/train_pact_infer_python.txt
--- a/test_tipc/configs/vi_layoutxlm_ser/train_ptq_infer_python.txt
+++ b/test_tipc/configs/vi_layoutxlm_ser/train_ptq_infer_python.txt
--- a/test_tipc/prepare.sh
+++ b/test_tipc/prepare.sh
--- a/test_tipc/readme.md
+++ b/test_tipc/readme.md
--- a/tools/eval.py
+++ b/tools/eval.py
--- a/tools/export_model.py
+++ b/tools/export_model.py
--- a/tools/infer/predict_rec.py
+++ b/tools/infer/predict_rec.py
--- a/tools/infer/utility.py
+++ b/tools/infer/utility.py
--- a/tools/infer_rec.py
+++ b/tools/infer_rec.py
--- a/tools/program.py
+++ b/tools/program.py
--- a/tools/train.py
+++ b/tools/train.py