Merge remote-tracking branch 'origin/dygraph' into dygraph

e40fd431 · qq_25193841 · 6e0cbbe1 · 0da240d0 · e40fd431 · e40fd431
71 changed file
--- a/README_ch.md
+++ b/README_ch.md
@@ -81,7 +81,7 @@ PaddleOCR旨在打造一套丰富、领先、且实用的OCR工具库，助力

 | 模型简介     | 模型名称     |推荐场景          | 检测模型 | 方向分类器 | 识别模型 |
 | ------------ | --------------- | ----------------|---- | ---------- | -------- |
-| 中英文超轻量PP-OCRv2模型（13.0M） |  ch_PP-OCRv2_xx |移动端&服务器端|[推理模型](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/chinese/ch_PP-OCRv2_det_distill_train.tar)| [推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar) |[推理模型](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_train.tar)|
+| 中英文超轻量PP-OCRv2模型（13.0M） |  ch_PP-OCRv2_xx |移动端&服务器端|[推理模型](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_distill_train.tar)| [推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar) |[推理模型](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_infer.tar) / [训练模型](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_train.tar)|
 | 中英文超轻量PP-OCR mobile模型（9.4M） | ch_ppocr_mobile_v2.0_xx |移动端&服务器端|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_train.tar)|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar) |[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_pre.tar)      |
 | 中英文通用PP-OCR server模型（143.4M）   |ch_ppocr_server_v2.0_xx|服务器端 |[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_train.tar)    |[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar)    |[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_infer.tar) / [预训练模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_pre.tar)  |  


--- a/configs/det/det_mv3_pse.yml
+++ b/configs/det/det_mv3_pse.yml
+Global:
+  use_gpu: true
+  epoch_num: 600
+  log_smooth_window: 20
+  print_batch_step: 10
+  save_model_dir: ./output/det_mv3_pse/
+  save_epoch_step: 600
+  # evaluation is run every 63 iterations
+  eval_batch_step: [ 0,63 ]
+  cal_metric_during_train: False
+  pretrained_model: ./pretrain_models/MobileNetV3_large_x0_5_pretrained
+  checkpoints: #./output/det_r50_vd_pse_batch8_ColorJitter/best_accuracy
+  save_inference_dir:
+  use_visualdl: False
+  infer_img: doc/imgs_en/img_10.jpg
+  save_res_path: ./output/det_pse/predicts_pse.txt
+
+Architecture:
+  model_type: det
+  algorithm: PSE
+  Transform: null
+  Backbone:
+    name: MobileNetV3
+    scale: 0.5
+    model_name: large
+  Neck:
+    name: FPN
+    out_channels: 96
+  Head:
+    name: PSEHead
+    hidden_dim: 96
+    out_channels: 7
+
+Loss:
+  name: PSELoss
+  alpha: 0.7
+  ohem_ratio: 3
+  kernel_sample_mask: pred
+  reduction: none
+
+Optimizer:
+  name: Adam
+  beta1: 0.9
+  beta2: 0.999
+  lr:
+    name: Step
+    learning_rate: 0.001
+    step_size: 200
+    gamma: 0.1
+  regularizer:
+    name: 'L2'
+    factor: 0.0005
+
+PostProcess:
+  name: PSEPostProcess
+  thresh: 0
+  box_thresh: 0.85
+  min_area: 16
+  box_type: box # 'box' or 'poly'
+  scale: 1
+
+Metric:
+  name: DetMetric
+  main_indicator: hmean
+
+Train:
+  dataset:
+    name: SimpleDataSet
+    data_dir: ./train_data/icdar2015/text_localization/
+    label_file_list:
+      - ./train_data/icdar2015/text_localization/train_icdar2015_label.txt
+    ratio_list: [ 1.0 ]
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - DetLabelEncode: # Class handling label
+      - ColorJitter:
+          brightness: 0.12549019607843137
+          saturation: 0.5
+      - IaaAugment:
+          augmenter_args:
+            - { 'type': Resize, 'args': { 'size': [ 0.5, 3 ] } }
+            - { 'type': Fliplr, 'args': { 'p': 0.5 } }
+            - { 'type': Affine, 'args': { 'rotate': [ -10, 10 ] } }
+      - MakePseGt:
+          kernel_num: 7
+          min_shrink_ratio: 0.4
+          size: 640
+      - RandomCropImgMask:
+          size: [ 640,640 ]
+          main_key: gt_text
+          crop_keys: [ 'image', 'gt_text', 'gt_kernels', 'mask' ]
+      - NormalizeImage:
+          scale: 1./255.
+          mean: [ 0.485, 0.456, 0.406 ]
+          std: [ 0.229, 0.224, 0.225 ]
+          order: 'hwc'
+      - ToCHWImage:
+      - KeepKeys:
+          keep_keys: [ 'image', 'gt_text', 'gt_kernels', 'mask' ] # the order of the dataloader list
+  loader:
+    shuffle: True
+    drop_last: False
+    batch_size_per_card: 16
+    num_workers: 8
+
+Eval:
+  dataset:
+    name: SimpleDataSet
+    data_dir: ./train_data/icdar2015/text_localization/
+    label_file_list:
+      - ./train_data/icdar2015/text_localization/test_icdar2015_label.txt
+    ratio_list: [ 1.0 ]
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - DetLabelEncode: # Class handling label
+      - DetResizeForTest:
+          limit_side_len: 736
+          limit_type: min
+      - NormalizeImage:
+          scale: 1./255.
+          mean: [ 0.485, 0.456, 0.406 ]
+          std: [ 0.229, 0.224, 0.225 ]
+          order: 'hwc'
+      - ToCHWImage:
+      - KeepKeys:
+          keep_keys: [ 'image', 'shape', 'polys', 'ignore_tags' ]
+  loader:
+    shuffle: False
+    drop_last: False
+    batch_size_per_card: 1 # must be 1
+    num_workers: 8
\ No newline at end of file
--- a/configs/det/det_r50_vd_pse.yml
+++ b/configs/det/det_r50_vd_pse.yml
+Global:
+  use_gpu: true
+  epoch_num: 600
+  log_smooth_window: 20
+  print_batch_step: 10
+  save_model_dir: ./output/det_r50_vd_pse/
+  save_epoch_step: 600
+  # evaluation is run every 125 iterations
+  eval_batch_step: [ 0,125 ]
+  cal_metric_during_train: False
+  pretrained_model: ./pretrain_models/ResNet50_vd_ssld_pretrained
+  checkpoints: #./output/det_r50_vd_pse_batch8_ColorJitter/best_accuracy
+  save_inference_dir:
+  use_visualdl: False
+  infer_img: doc/imgs_en/img_10.jpg
+  save_res_path: ./output/det_pse/predicts_pse.txt
+
+Architecture:
+  model_type: det
+  algorithm: PSE
+  Transform:
+  Backbone:
+    name: ResNet
+    layers: 50
+  Neck:
+    name: FPN
+    out_channels: 256
+  Head:
+    name: PSEHead
+    hidden_dim: 256
+    out_channels: 7
+
+Loss:
+  name: PSELoss
+  alpha: 0.7
+  ohem_ratio: 3
+  kernel_sample_mask: pred
+  reduction: none
+
+Optimizer:
+  name: Adam
+  beta1: 0.9
+  beta2: 0.999
+  lr:
+    name: Step
+    learning_rate: 0.0001
+    step_size: 200
+    gamma: 0.1
+  regularizer:
+    name: 'L2'
+    factor: 0.0005
+
+PostProcess:
+  name: PSEPostProcess
+  thresh: 0
+  box_thresh: 0.85
+  min_area: 16
+  box_type: box # 'box' or 'poly'
+  scale: 1
+
+Metric:
+  name: DetMetric
+  main_indicator: hmean
+
+Train:
+  dataset:
+    name: SimpleDataSet
+    data_dir: ./train_data/icdar2015/text_localization/
+    label_file_list:
+      - ./train_data/icdar2015/text_localization/train_icdar2015_label.txt
+    ratio_list: [ 1.0 ]
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - DetLabelEncode: # Class handling label
+      - ColorJitter:
+          brightness: 0.12549019607843137
+          saturation: 0.5
+      - IaaAugment:
+          augmenter_args:
+            - { 'type': Resize, 'args': { 'size': [ 0.5, 3 ] } }
+            - { 'type': Fliplr, 'args': { 'p': 0.5 } }
+            - { 'type': Affine, 'args': { 'rotate': [ -10, 10 ] } }
+      - MakePseGt:
+          kernel_num: 7
+          min_shrink_ratio: 0.4
+          size: 640
+      - RandomCropImgMask:
+          size: [ 640,640 ]
+          main_key: gt_text
+          crop_keys: [ 'image', 'gt_text', 'gt_kernels', 'mask' ]
+      - NormalizeImage:
+          scale: 1./255.
+          mean: [ 0.485, 0.456, 0.406 ]
+          std: [ 0.229, 0.224, 0.225 ]
+          order: 'hwc'
+      - ToCHWImage:
+      - KeepKeys:
+          keep_keys: [ 'image', 'gt_text', 'gt_kernels', 'mask' ] # the order of the dataloader list
+  loader:
+    shuffle: True
+    drop_last: False
+    batch_size_per_card: 8
+    num_workers: 8
+
+Eval:
+  dataset:
+    name: SimpleDataSet
+    data_dir: ./train_data/icdar2015/text_localization/
+    label_file_list:
+      - ./train_data/icdar2015/text_localization/test_icdar2015_label.txt
+    ratio_list: [ 1.0 ]
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - DetLabelEncode: # Class handling label
+      - DetResizeForTest:
+          limit_side_len: 736
+          limit_type: min
+      - NormalizeImage:
+          scale: 1./255.
+          mean: [ 0.485, 0.456, 0.406 ]
+          std: [ 0.229, 0.224, 0.225 ]
+          order: 'hwc'
+      - ToCHWImage:
+      - KeepKeys:
+          keep_keys: [ 'image', 'shape', 'polys', 'ignore_tags' ]
+  loader:
+    shuffle: False
+    drop_last: False
+    batch_size_per_card: 1 # must be 1
+    num_workers: 8
\ No newline at end of file
--- a/configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec.yml
+++ b/configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec.yml
@@ -4,7 +4,7 @@ Global:
  epoch_num: 800
  log_smooth_window: 20
  print_batch_step: 10
-  save_model_dir: ./output/rec_chinese_lite_distillation_v2.1
+  save_model_dir: ./output/rec_mobile_pp-OCRv2
  save_epoch_step: 3
  eval_batch_step: [0, 2000]
  cal_metric_during_train: true
@@ -19,7 +19,7 @@ Global:
  infer_mode: false
  use_space_char: true
  distributed: true
-  save_res_path: ./output/rec/predicts_chinese_lite_distillation_v2.1.txt
+  save_res_path: ./output/rec/predicts_mobile_pp-OCRv2.txt


 Optimizer:
@@ -35,79 +35,32 @@ Optimizer:
    name: L2
    factor: 2.0e-05

+
 Architecture:
-  model_type: &model_type "rec"
-  name: DistillationModel
-  algorithm: Distillation
-  Models:
-    Teacher:
-      pretrained:
-      freeze_params: false
-      return_all_feats: true
-      model_type: *model_type
-      algorithm: CRNN
-      Transform:
-      Backbone:
-        name: MobileNetV1Enhance
-        scale: 0.5
-      Neck:
-        name: SequenceEncoder
-        encoder_type: rnn
-        hidden_size: 64
-      Head:
-        name: CTCHead
-        mid_channels: 96
-        fc_decay: 0.00002
-    Student:
-      pretrained:
-      freeze_params: false
-      return_all_feats: true
-      model_type: *model_type
-      algorithm: CRNN
-      Transform:
-      Backbone:
-        name: MobileNetV1Enhance
-        scale: 0.5
-      Neck:
-        name: SequenceEncoder
-        encoder_type: rnn
-        hidden_size: 64
-      Head:
-        name: CTCHead
-        mid_channels: 96
-        fc_decay: 0.00002
-  
+  model_type: rec
+  algorithm: CRNN
+  Transform:
+  Backbone:
+    name: MobileNetV1Enhance
+    scale: 0.5
+  Neck:
+    name: SequenceEncoder
+    encoder_type: rnn
+    hidden_size: 64
+  Head:
+    name: CTCHead
+    mid_channels: 96
+    fc_decay: 0.00002

 Loss:
-  name: CombinedLoss
-  loss_config_list:
-  - DistillationCTCLoss:
-      weight: 1.0
-      model_name_list: ["Student", "Teacher"]
-      key: head_out
-  - DistillationDMLLoss:
-      weight: 1.0
-      act: "softmax"
-      model_name_pairs:
-      - ["Student", "Teacher"]
-      key: head_out
-  - DistillationDistanceLoss:
-      weight: 1.0
-      mode: "l2"
-      model_name_pairs:
-      - ["Student", "Teacher"]
-      key: backbone_out
+  name: CTCLoss

 PostProcess:
-  name: DistillationCTCLabelDecode
-  model_name: ["Student", "Teacher"]
-  key: head_out
+  name: CTCLabelDecode

 Metric:
-  name: DistillationMetric
-  base_metric_name: RecMetric
+  name: RecMetric
  main_indicator: acc
-  key: "Student"

 Train:
  dataset:
@@ -132,7 +85,6 @@ Train:
    shuffle: true
    batch_size_per_card: 128
    drop_last: true
-    num_sections: 1
    num_workers: 8
 Eval:
  dataset:

--- a/configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec_distillation.yml
+++ b/configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec_distillation.yml
+Global:
+  debug: false
+  use_gpu: true
+  epoch_num: 800
+  log_smooth_window: 20
+  print_batch_step: 10
+  save_model_dir: ./output/rec_pp-OCRv2_distillation
+  save_epoch_step: 3
+  eval_batch_step: [0, 2000]
+  cal_metric_during_train: true
+  pretrained_model:
+  checkpoints:
+  save_inference_dir:
+  use_visualdl: false
+  infer_img: doc/imgs_words/ch/word_1.jpg
+  character_dict_path: ppocr/utils/ppocr_keys_v1.txt
+  character_type: ch
+  max_text_length: 25
+  infer_mode: false
+  use_space_char: true
+  distributed: true
+  save_res_path: ./output/rec/predicts_pp-OCRv2_distillation.txt
+
+
+Optimizer:
+  name: Adam
+  beta1: 0.9
+  beta2: 0.999
+  lr:
+    name: Piecewise
+    decay_epochs : [700, 800]
+    values : [0.001, 0.0001]
+    warmup_epoch: 5
+  regularizer:
+    name: L2
+    factor: 2.0e-05
+
+Architecture:
+  model_type: &model_type "rec"
+  name: DistillationModel
+  algorithm: Distillation
+  Models:
+    Teacher:
+      pretrained:
+      freeze_params: false
+      return_all_feats: true
+      model_type: *model_type
+      algorithm: CRNN
+      Transform:
+      Backbone:
+        name: MobileNetV1Enhance
+        scale: 0.5
+      Neck:
+        name: SequenceEncoder
+        encoder_type: rnn
+        hidden_size: 64
+      Head:
+        name: CTCHead
+        mid_channels: 96
+        fc_decay: 0.00002
+    Student:
+      pretrained:
+      freeze_params: false
+      return_all_feats: true
+      model_type: *model_type
+      algorithm: CRNN
+      Transform:
+      Backbone:
+        name: MobileNetV1Enhance
+        scale: 0.5
+      Neck:
+        name: SequenceEncoder
+        encoder_type: rnn
+        hidden_size: 64
+      Head:
+        name: CTCHead
+        mid_channels: 96
+        fc_decay: 0.00002
+  
+
+Loss:
+  name: CombinedLoss
+  loss_config_list:
+  - DistillationCTCLoss:
+      weight: 1.0
+      model_name_list: ["Student", "Teacher"]
+      key: head_out
+  - DistillationDMLLoss:
+      weight: 1.0
+      act: "softmax"
+      use_log: true
+      model_name_pairs:
+      - ["Student", "Teacher"]
+      key: head_out
+  - DistillationDistanceLoss:
+      weight: 1.0
+      mode: "l2"
+      model_name_pairs:
+      - ["Student", "Teacher"]
+      key: backbone_out
+
+PostProcess:
+  name: DistillationCTCLabelDecode
+  model_name: ["Student", "Teacher"]
+  key: head_out
+
+Metric:
+  name: DistillationMetric
+  base_metric_name: RecMetric
+  main_indicator: acc
+  key: "Student"
+
+Train:
+  dataset:
+    name: SimpleDataSet
+    data_dir: ./train_data/
+    label_file_list:
+    - ./train_data/train_list.txt
+    transforms:
+    - DecodeImage:
+        img_mode: BGR
+        channel_first: false
+    - RecAug:
+    - CTCLabelEncode:
+    - RecResizeImg:
+        image_shape: [3, 32, 320]
+    - KeepKeys:
+        keep_keys:
+        - image
+        - label
+        - length
+  loader:
+    shuffle: true
+    batch_size_per_card: 128
+    drop_last: true
+    num_sections: 1
+    num_workers: 8
+Eval:
+  dataset:
+    name: SimpleDataSet
+    data_dir: ./train_data
+    label_file_list:
+    - ./train_data/val_list.txt
+    transforms:
+    - DecodeImage:
+        img_mode: BGR
+        channel_first: false
+    - CTCLabelEncode:
+    - RecResizeImg:
+        image_shape: [3, 32, 320]
+    - KeepKeys:
+        keep_keys:
+        - image
+        - label
+        - length
+  loader:
+    shuffle: false
+    drop_last: false
+    batch_size_per_card: 128
+    num_workers: 8
--- a/configs/rec/rec_mtb_nrtr.yml
+++ b/configs/rec/rec_mtb_nrtr.yml
@@ -46,7 +46,7 @@ Architecture:
    name: Transformer
    d_model: 512
    num_encoder_layers: 6
-    beam_size: 10 # When Beam size is greater than 0, it means to use beam search when evaluation.
+    beam_size: -1 # When Beam size is greater than 0, it means to use beam search when evaluation.
    

 Loss:
@@ -65,7 +65,7 @@ Train:
    name: LMDBDataSet
    data_dir: ./train_data/data_lmdb_release/training/
    transforms:
-      - NRTRDecodeImage: # load image
+      - DecodeImage: # load image
          img_mode: BGR
          channel_first: False
      - NRTRLabelEncode: # Class handling label
@@ -85,7 +85,7 @@ Eval:
    name: LMDBDataSet
    data_dir: ./train_data/data_lmdb_release/evaluation/
    transforms:
-      - NRTRDecodeImage: # load image
+      - DecodeImage: # load image
          img_mode: BGR
          channel_first: False
      - NRTRLabelEncode: # Class handling label

--- a/configs/rec/rec_r31_sar.yml
+++ b/configs/rec/rec_r31_sar.yml
@@ -79,7 +79,7 @@ Train:
 Eval:
  dataset:
    name: LMDBDataSet
-    data_dir: ./eval_data/evaluation/
+    data_dir: ./train_data/data_lmdb_release/evaluation/
    transforms:
      - DecodeImage: # load image
          img_mode: BGR

--- a/deploy/cpp_infer/src/main.cpp
+++ b/deploy/cpp_infer/src/main.cpp
@@ -91,7 +91,7 @@ int main_det(std::vector<cv::String> cv_all_img_names) {
                   FLAGS_use_tensorrt, FLAGS_precision);
    
    for (int i = 0; i < cv_all_img_names.size(); ++i) {
-      LOG(INFO) << "The predict img: " << cv_all_img_names[i];
+//       LOG(INFO) << "The predict img: " << cv_all_img_names[i];

      cv::Mat srcimg = cv::imread(cv_all_img_names[i], cv::IMREAD_COLOR);
      if (!srcimg.data) {
@@ -106,6 +106,16 @@ int main_det(std::vector<cv::String> cv_all_img_names) {
      time_info[0] += det_times[0];
      time_info[1] += det_times[1];
      time_info[2] += det_times[2];
+    
+      if (FLAGS_benchmark) {
+          cout << cv_all_img_names[i] << '\t';
+          for (int n = 0; n < boxes.size(); n++) {
+            for (int m = 0; m < boxes[n].size(); m++) {
+              cout << boxes[n][m][0] << ' ' << boxes[n][m][1] << ' ';
+            }
+          }
+          cout << endl;
+      }        
    }
    
    if (FLAGS_benchmark) {

--- a/deploy/hubserving/ocr_det/params.py
+++ b/deploy/hubserving/ocr_det/params.py
@@ -13,7 +13,7 @@ def read_params():

    #params for text detector
    cfg.det_algorithm = "DB"
-    cfg.det_model_dir = "./inference/ch_ppocr_mobile_v2.0_det_infer/"
+    cfg.det_model_dir = "./inference/ch_PP-OCRv2_det_infer/"
    cfg.det_limit_side_len = 960
    cfg.det_limit_type = 'max'


--- a/deploy/hubserving/ocr_rec/params.py
+++ b/deploy/hubserving/ocr_rec/params.py
@@ -13,7 +13,7 @@ def read_params():

    #params for text recognizer
    cfg.rec_algorithm = "CRNN"
-    cfg.rec_model_dir = "./inference/ch_ppocr_mobile_v2.0_rec_infer/"
+    cfg.rec_model_dir = "./inference/ch_PP-OCRv2_rec_infer/"

    cfg.rec_image_shape = "3, 32, 320"
    cfg.rec_char_type = 'ch'

--- a/deploy/hubserving/ocr_system/params.py
+++ b/deploy/hubserving/ocr_system/params.py
@@ -13,7 +13,7 @@ def read_params():

    #params for text detector
    cfg.det_algorithm = "DB"
-    cfg.det_model_dir = "./inference/ch_ppocr_mobile_v2.0_det_infer/"
+    cfg.det_model_dir = "./inference/ch_PP-OCRv2_det_infer/"
    cfg.det_limit_side_len = 960
    cfg.det_limit_type = 'max'

@@ -31,7 +31,7 @@ def read_params():

    #params for text recognizer
    cfg.rec_algorithm = "CRNN"
-    cfg.rec_model_dir = "./inference/ch_ppocr_mobile_v2.0_rec_infer/"
+    cfg.rec_model_dir = "./inference/ch_PP-OCRv2_rec_infer/"

    cfg.rec_image_shape = "3, 32, 320"
    cfg.rec_char_type = 'ch'

--- a/deploy/hubserving/readme.md
+++ b/deploy/hubserving/readme.md
@@ -34,10 +34,10 @@ pip3 install paddlehub==2.1.0 --upgrade -i https://pypi.tuna.tsinghua.edu.cn/sim
 ```

 ### 2. 下载推理模型
-安装服务模块前，需要准备推理模型并放到正确路径。默认使用的是v2.0版的超轻量模型，默认模型路径为：
+安装服务模块前，需要准备推理模型并放到正确路径。默认使用的是PP-OCRv2模型，默认模型路径为：
 ```
-检测模型：./inference/ch_ppocr_mobile_v2.0_det_infer/
-识别模型：./inference/ch_ppocr_mobile_v2.0_rec_infer/
+检测模型：./inference/ch_PP-OCRv2_det_infer/
+识别模型：./inference/ch_PP-OCRv2_rec_infer/
 方向分类器：./inference/ch_ppocr_mobile_v2.0_cls_infer/
 ```  


--- a/deploy/hubserving/readme_en.md
+++ b/deploy/hubserving/readme_en.md
@@ -35,10 +35,10 @@ pip3 install paddlehub==2.1.0 --upgrade -i https://pypi.tuna.tsinghua.edu.cn/sim
 ```

 ### 2. Download inference model
-Before installing the service module, you need to prepare the inference model and put it in the correct path. By default, the ultra lightweight model of v2.0 is used, and the default model path is:  
+Before installing the service module, you need to prepare the inference model and put it in the correct path. By default, the PP-OCRv2 models are used, and the default model path is:  
 ```
-detection model: ./inference/ch_ppocr_mobile_v2.0_det_infer/
-recognition model: ./inference/ch_ppocr_mobile_v2.0_rec_infer/
+detection model: ./inference/ch_PP-OCRv2_det_infer/
+recognition model: ./inference/ch_PP-OCRv2_rec_infer/
 text direction classifier: ./inference/ch_ppocr_mobile_v2.0_cls_infer/
 ```  


--- a/deploy/slim/quantization/quant_kl.py
+++ b/deploy/slim/quantization/quant_kl.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import sys
+
+__dir__ = os.path.dirname(os.path.abspath(__file__))
+sys.path.append(__dir__)
+sys.path.append(os.path.abspath(os.path.join(__dir__, '..', '..', '..')))
+sys.path.append(
+    os.path.abspath(os.path.join(__dir__, '..', '..', '..', 'tools')))
+
+import yaml
+import paddle
+import paddle.distributed as dist
+
+paddle.seed(2)
+
+from ppocr.data import build_dataloader
+from ppocr.modeling.architectures import build_model
+from ppocr.losses import build_loss
+from ppocr.optimizer import build_optimizer
+from ppocr.postprocess import build_post_process
+from ppocr.metrics import build_metric
+from ppocr.utils.save_load import init_model
+import tools.program as program
+import paddleslim
+from paddleslim.dygraph.quant import QAT
+import numpy as np
+
+dist.get_world_size()
+
+
+class PACT(paddle.nn.Layer):
+    def __init__(self):
+        super(PACT, self).__init__()
+        alpha_attr = paddle.ParamAttr(
+            name=self.full_name() + ".pact",
+            initializer=paddle.nn.initializer.Constant(value=20),
+            learning_rate=1.0,
+            regularizer=paddle.regularizer.L2Decay(2e-5))
+
+        self.alpha = self.create_parameter(
+            shape=[1], attr=alpha_attr, dtype='float32')
+
+    def forward(self, x):
+        out_left = paddle.nn.functional.relu(x - self.alpha)
+        out_right = paddle.nn.functional.relu(-self.alpha - x)
+        x = x - out_left + out_right
+        return x
+
+
+quant_config = {
+    # weight preprocess type, default is None and no preprocessing is performed. 
+    'weight_preprocess_type': None,
+    # activation preprocess type, default is None and no preprocessing is performed.
+    'activation_preprocess_type': None,
+    # weight quantize type, default is 'channel_wise_abs_max'
+    'weight_quantize_type': 'channel_wise_abs_max',
+    # activation quantize type, default is 'moving_average_abs_max'
+    'activation_quantize_type': 'moving_average_abs_max',
+    # weight quantize bit num, default is 8
+    'weight_bits': 8,
+    # activation quantize bit num, default is 8
+    'activation_bits': 8,
+    # data type after quantization, such as 'uint8', 'int8', etc. default is 'int8'
+    'dtype': 'int8',
+    # window size for 'range_abs_max' quantization. default is 10000
+    'window_size': 10000,
+    # The decay coefficient of moving average, default is 0.9
+    'moving_rate': 0.9,
+    # for dygraph quantization, layers of type in quantizable_layer_type will be quantized
+    'quantizable_layer_type': ['Conv2D', 'Linear'],
+}
+
+
+def sample_generator(loader):
+    def __reader__():
+        for indx, data in enumerate(loader):
+            images = np.array(data[0])
+            yield images
+
+    return __reader__
+
+
+def main(config, device, logger, vdl_writer):
+    # init dist environment
+    if config['Global']['distributed']:
+        dist.init_parallel_env()
+
+    global_config = config['Global']
+
+    # build dataloader
+    config['Train']['loader']['num_workers'] = 0
+    train_dataloader = build_dataloader(config, 'Train', device, logger)
+    if config['Eval']:
+        config['Eval']['loader']['num_workers'] = 0
+        valid_dataloader = build_dataloader(config, 'Eval', device, logger)
+    else:
+        valid_dataloader = None
+
+    paddle.enable_static()
+    place = paddle.CPUPlace()
+    exe = paddle.static.Executor(place)
+
+    if 'inference_model' in global_config.keys():  # , 'inference_model'):
+        inference_model_dir = global_config['inference_model']
+    else:
+        inference_model_dir = os.path.dirname(global_config['pretrained_model'])
+        if  not (os.path.exists(os.path.join(inference_model_dir, "inference.pdmodel")) and \
+            os.path.exists(os.path.join(inference_model_dir, "inference.pdiparams")) ):
+            raise ValueError(
+                "Please set inference model dir in Global.inference_model or Global.pretrained_model for post-quantazition"
+            )
+
+    paddleslim.quant.quant_post_static(
+        executor=exe,
+        model_dir=inference_model_dir,
+        model_filename='inference.pdmodel',
+        params_filename='inference.pdiparams',
+        quantize_model_path=global_config['save_inference_dir'],
+        sample_generator=sample_generator(train_dataloader),
+        save_model_filename='inference.pdmodel',
+        save_params_filename='inference.pdiparams',
+        batch_size=1,
+        batch_nums=None)
+
+
+if __name__ == '__main__':
+    config, device, logger, vdl_writer = program.preprocess(is_train=True)
+    main(config, device, logger, vdl_writer)
--- a/doc/doc_ch/algorithm_overview.md
+++ b/doc/doc_ch/algorithm_overview.md
@@ -9,11 +9,13 @@
 ### 1.文本检测算法

 PaddleOCR开源的文本检测算法列表：
- [x]  DB([paper]( https://arxiv.org/abs/1911.08947)) [2]（ppocr推荐）
- [x]  EAST([paper](https://arxiv.org/abs/1704.03155))[1]
- [x]  SAST([paper](https://arxiv.org/abs/1908.05498))[4]
+- [x]  DB([paper]( https://arxiv.org/abs/1911.08947))（ppocr推荐）
+- [x]  EAST([paper](https://arxiv.org/abs/1704.03155))
+- [x]  SAST([paper](https://arxiv.org/abs/1908.05498))
+- [x]  PSENet([paper](https://arxiv.org/abs/1903.12473v2)）

 在ICDAR2015文本检测公开数据集上，算法效果如下：
+
 |模型|骨干网络|precision|recall|Hmean|下载链接|
 | --- | --- | --- | --- | --- | --- |
 |EAST|ResNet50_vd|85.80%|86.71%|86.25%|[下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_east_v2.0_train.tar)|
@@ -21,6 +23,8 @@ PaddleOCR开源的文本检测算法列表：
 |DB|ResNet50_vd|86.41%|78.72%|82.38%|[下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_db_v2.0_train.tar)|
 |DB|MobileNetV3|77.29%|73.08%|75.12%|[下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_mv3_db_v2.0_train.tar)|
 |SAST|ResNet50_vd|91.39%|83.77%|87.42%|[下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_sast_icdar15_v2.0_train.tar)|
+|PSE|ResNet50_vd|85.81%|79.53%|82.55%|[下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.1/en_det/det_r50_vd_pse_v2.0_train.tar)|
+|PSE|MobileNetV3|82.20%|70.48%|75.89%|[下载链接](https://paddleocr.bj.bcebos.com/dygraph_v2.1/en_det/det_mv3_pse_v2.0_train.tar)|

 在Total-text文本检测公开数据集上，算法效果如下：

@@ -39,15 +43,15 @@ PaddleOCR文本检测算法的训练和使用请参考文档教程中[模型训
 ### 2.文本识别算法

 PaddleOCR基于动态图开源的文本识别算法列表：
- [x]  CRNN([paper](https://arxiv.org/abs/1507.05717))[7]（ppocr推荐）
- [x]  Rosetta([paper](https://arxiv.org/abs/1910.05085))[10]
- [x]  STAR-Net([paper](http://www.bmva.org/bmvc/2016/papers/paper043/index.html))[11]
- [x]  RARE([paper](https://arxiv.org/abs/1603.03915v1))[12]
- [x]  SRN([paper](https://arxiv.org/abs/2003.12294))[5]
+- [x]  CRNN([paper](https://arxiv.org/abs/1507.05717))（ppocr推荐）
+- [x]  Rosetta([paper](https://arxiv.org/abs/1910.05085))
+- [x]  STAR-Net([paper](http://www.bmva.org/bmvc/2016/papers/paper043/index.html))
+- [x]  RARE([paper](https://arxiv.org/abs/1603.03915v1))
+- [x]  SRN([paper](https://arxiv.org/abs/2003.12294))
 - [x]  NRTR([paper](https://arxiv.org/abs/1806.00926v2))
 - [x]  SAR([paper](https://arxiv.org/abs/1811.00751v2))

-参考[DTRB][3](https://arxiv.org/abs/1904.01906)文字识别训练和评估流程，使用MJSynth和SynthText两个文字识别数据集训练，在IIIT, SVT, IC03, IC13, IC15, SVTP, CUTE数据集上进行评估，算法效果如下：
+参考[DTRB](https://arxiv.org/abs/1904.01906) 文字识别训练和评估流程，使用MJSynth和SynthText两个文字识别数据集训练，在IIIT, SVT, IC03, IC13, IC15, SVTP, CUTE数据集上进行评估，算法效果如下：

 |模型|骨干网络|Avg Accuracy|模型存储命名|下载链接|
 |---|---|---|---|---|

--- a/doc/doc_ch/benchmark.md
+++ b/doc/doc_ch/benchmark.md
@@ -12,40 +12,27 @@
 ## 评估指标  

 说明：
- v1.0是未添加优化策略的DB+CRNN模型，v1.1是添加多种优化策略和方向分类器的PP-OCR模型。slim_v1.1是使用裁剪或量化的模型。
+
 - 检测输入图像的的长边尺寸是960。
- 评估耗时阶段为图像输入到结果输出的完整阶段，包括了图像的预处理和后处理。  
+- 评估耗时阶段为图像预测耗时，不包括图像的预处理和后处理。  
 - `Intel至强6148`为服务器端CPU型号，测试中使用Intel MKL-DNN 加速。
 - `骁龙855`为移动端处理平台型号。  

-不同预测模型大小和整体识别精度对比
+预测模型大小和整体识别精度对比

 | 模型名称                     | 整体模型<br>大小\(M\) | 检测模型<br>大小\(M\) | 方向分类器<br>模型大小\(M\) | 识别模型<br>大小\(M\) | 整体识别<br>F\-score |
 |:-:|:-:|:-:|:-:|:-:|:-:|
-| ch\_ppocr\_mobile\_v1\.1 | 8\.1        | 2\.6        | 0\.9           | 4\.6        | 0\.5193      |
-| ch\_ppocr\_server\_v1\.1 | 155\.1      | 47\.2       | 0\.9           | 107         | 0\.5414      |
-| ch\_ppocr\_mobile\_v1\.0 | 8\.6        | 4\.1        | \-             | 4\.5        | 0\.393       |
-| ch\_ppocr\_server\_v1\.0 | 203\.8      | 98\.5       | \-             | 105\.3      | 0\.4436      |
-
-不同预测模型在T4 GPU上预测速度对比，单位ms
-
-| 模型名称                     | 整体  | 检测 | 方向分类器 | 识别  |
-|:-:|:-:|:-:|:-:|:-:|
-| ch\_ppocr\_mobile\_v1\.1 | 137 | 35 | 24    | 78  |
-| ch\_ppocr\_server\_v1\.1 | 204 | 39 | 25    | 140 |
-| ch\_ppocr\_mobile\_v1\.0 | 117 | 41 | \-    | 76  |
-| ch\_ppocr\_server\_v1\.0 | 199 | 52 | \-    | 147 |
+| PP-OCRv2 | 11\.6        | 3\.0        | 0\.9           | 8\.6        | 0\.5224      |
+| PP-OCR mobile |   8\.1  | 2\.6        | 0\.9           | 4\.6        | 0\.503       |
+| PP-OCR server | 155\.1  | 47\.2       | 0\.9           | 107         | 0\.570       |

-不同预测模型在CPU上预测速度对比，单位ms

-| 模型名称                     | 整体   | 检测  | 方向分类器 | 识别  |
-|:-:|:-:|:-:|:-:|:-:|
-| ch\_ppocr\_mobile\_v1\.1 | 421  | 164 | 51    | 206 |
-| ch\_ppocr\_mobile\_v1\.0 | 398  | 219 | \-    | 179 |
+预测模型在CPU和GPU上的速度对比，单位ms

-裁剪量化模型和原始模型模型大小，整体识别精度和在SD 855上预测速度对比
+| 模型名称                     | CPU   | T4 GPU  |
+|:-:|:-:|:-:|
+| PP-OCRv2 | 330  | 111 |
+| PP-OCR mobile | 356  | 11 6|
+| PP-OCR server | 1056  | 200 |

-| 模型名称                           | 整体模型<br>大小\(M\) | 检测模型<br>大小\(M\) | 方向分类器<br>模型大小\(M\) | 识别模型<br>大小\(M\) | 整体识别<br>F\-score | SD 855<br>\(ms\) |
-|:-:|:-:|:-:|:-:|:-:|:-:|:-:|
-| ch\_ppocr\_mobile\_v1\.1       | 8\.1        | 2\.6        | 0\.9           | 4\.6        | 0\.5193      | 306          |
-| ch\_ppocr\_mobile\_slim\_v1\.1 | 3\.5        | 1\.4        | 0\.5           | 1\.6        | 0\.521       | 268          |
+更多 PP-OCR 系列模型的预测指标可以参考[PP-OCR Benchamrk](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.2/doc/doc_ch/benchmark.md)
--- a/doc/doc_ch/knowledge_distillation.md
+++ b/doc/doc_ch/knowledge_distillation.md
@@ -39,7 +39,7 @@ PaddleOCR中集成了知识蒸馏的算法，具体地，有以下几个主要

 ### 2.1 识别配置文件解析

-配置文件在[ch_PP-OCRv2_rec.yml](../../configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec.yml)。
+配置文件在[ch_PP-OCRv2_rec_distillation.yml](../../configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec_distillation.yml)。

 #### 2.1.1 模型结构

@@ -246,6 +246,39 @@ Metric:
 关于`DistillationMetric`更加具体的实现可以参考: [distillation_metric.py](../../ppocr/metrics/distillation_metric.py#L24)。


+#### 2.1.5 蒸馏模型微调
+
+对蒸馏得到的识别蒸馏进行微调有2种方式。
+
+（1）基于知识蒸馏的微调：这种情况比较简单，下载预训练模型，在[ch_PP-OCRv2_rec_distillation.yml](../../configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec_distillation.yml)中配置好预训练模型路径以及自己的数据路径，即可进行模型微调训练。
+
+（2）微调时不使用知识蒸馏：这种情况，需要首先将预训练模型中的学生模型参数提取出来，具体步骤如下。
+
+* 首先下载预训练模型并解压。
+```shell
+# 下面预训练模型并解压
+wget https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_train.tar
+tar -xf ch_PP-OCRv2_rec_train.tar
+```
+
+* 然后使用python，对其中的学生模型参数进行提取
+
+```python
+import paddle
+# 加载预训练模型
+all_params = paddle.load("ch_PP-OCRv2_rec_train/best_accuracy.pdparams")
+# 查看权重参数的keys
+print(all_params.keys())
+# 学生模型的权重提取
+s_params = {key[len("Student."):]: all_params[key] for key in all_params if "Student." in key}
+# 查看学生模型权重参数的keys
+print(s_params.keys())
+# 保存
+paddle.save(s_params, "ch_PP-OCRv2_rec_train/student.pdparams")
+```
+
+转化完成之后，使用[ch_PP-OCRv2_rec.yml](../../configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec.yml)，修改预训练模型的路径（为导出的`student.pdparams`模型路径）以及自己的数据路径，即可进行模型微调。
+
 ### 2.2 检测配置文件解析

 * coming soon!
--- a/doc/doc_en/algorithm_overview_en.md
+++ b/doc/doc_en/algorithm_overview_en.md
@@ -11,9 +11,10 @@ This tutorial lists the text detection algorithms and text recognition algorithm
 ### 1. Text Detection Algorithm

 PaddleOCR open source text detection algorithms list:
- [x]  EAST([paper](https://arxiv.org/abs/1704.03155))[2]
- [x]  DB([paper](https://arxiv.org/abs/1911.08947))[1]
- [x]  SAST([paper](https://arxiv.org/abs/1908.05498))[4]
+- [x]  EAST([paper](https://arxiv.org/abs/1704.03155))
+- [x]  DB([paper](https://arxiv.org/abs/1911.08947))
+- [x]  SAST([paper](https://arxiv.org/abs/1908.05498))
+- [x]  PSE([paper](https://arxiv.org/abs/1903.12473v2))

 On the ICDAR2015 dataset, the text detection result is as follows:

@@ -24,6 +25,8 @@ On the ICDAR2015 dataset, the text detection result is as follows:
 |DB|ResNet50_vd|86.41%|78.72%|82.38%|[Download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_db_v2.0_train.tar)|
 |DB|MobileNetV3|77.29%|73.08%|75.12%|[Download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_mv3_db_v2.0_train.tar)|
 |SAST|ResNet50_vd|91.39%|83.77%|87.42%|[Download link](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_sast_icdar15_v2.0_train.tar)|
+|PSE|ResNet50_vd|85.81%|79.53%|82.55%|[Download link](https://paddleocr.bj.bcebos.com/dygraph_v2.1/en_det/det_r50_vd_pse_v2.0_train.tar)|
+|PSE|MobileNetV3|82.20%|70.48%|75.89%|[Download link](https://paddleocr.bj.bcebos.com/dygraph_v2.1/en_det/det_mv3_pse_v2.0_train.tar)|

 On Total-Text dataset, the text detection result is as follows:

@@ -41,11 +44,11 @@ For the training guide and use of PaddleOCR text detection algorithms, please re
 ### 2. Text Recognition Algorithm

 PaddleOCR open-source text recognition algorithms list:
- [x]  CRNN([paper](https://arxiv.org/abs/1507.05717))[7]
- [x]  Rosetta([paper](https://arxiv.org/abs/1910.05085))[10]
- [x]  STAR-Net([paper](http://www.bmva.org/bmvc/2016/papers/paper043/index.html))[11]
- [x]  RARE([paper](https://arxiv.org/abs/1603.03915v1))[12]
- [x]  SRN([paper](https://arxiv.org/abs/2003.12294))[5]
+- [x]  CRNN([paper](https://arxiv.org/abs/1507.05717))
+- [x]  Rosetta([paper](https://arxiv.org/abs/1910.05085))
+- [x]  STAR-Net([paper](http://www.bmva.org/bmvc/2016/papers/paper043/index.html))
+- [x]  RARE([paper](https://arxiv.org/abs/1603.03915v1))
+- [x]  SRN([paper](https://arxiv.org/abs/2003.12294))
 - [x]  NRTR([paper](https://arxiv.org/abs/1806.00926v2))
 - [x]  SAR([paper](https://arxiv.org/abs/1811.00751v2))


--- a/doc/doc_en/benchmark_en.md
+++ b/doc/doc_en/benchmark_en.md
@@ -13,7 +13,6 @@ We collected 300 images for different real application scenarios to evaluate the
 ## MEASUREMENT

 Explanation:
- v1.0 indicates DB+CRNN models without the strategies. v1.1 indicates the PP-OCR models with the strategies and the direction classify. slim_v1.1 indicates the PP-OCR models with prunner or quantization.

 - The long size of the input for the text detector is 960.

@@ -27,30 +26,16 @@ Compares the model size and F-score:

 | Model Name                    | Model Size <br> of the <br> Whole System\(M\) | Model Size <br>of the Text <br> Detector\(M\) | Model Size <br> of the Direction <br> Classifier\(M\) | Model Size<br>of the Text <br> Recognizer \(M\) | F\-score |
 |:-:|:-:|:-:|:-:|:-:|:-:|
-| ch\_ppocr\_mobile\_v1\.1 | 8\.1        | 2\.6        | 0\.9           | 4\.6        | 0\.5193      |
-| ch\_ppocr\_server\_v1\.1 | 155\.1      | 47\.2       | 0\.9           | 107         | 0\.5414      |
-| ch\_ppocr\_mobile\_v1\.0 | 8\.6        | 4\.1        | \-             | 4\.5        | 0\.393       |
-| ch\_ppocr\_server\_v1\.0 | 203\.8      | 98\.5       | \-             | 105\.3      | 0\.4436      |
+| PP-OCRv2                 | 11\.6        | 3\.0        | 0\.9           | 8\.6        | 0\.5224      |
+| PP-OCR mobile            |   8\.1       | 2\.6        | 0\.9           | 4\.6        | 0\.503       |
+| PP-OCR server            | 155\.1       | 47\.2       | 0\.9           | 107         | 0\.570       |

-Compares the time-consuming on T4 GPU (ms):
+Compares the time-consuming on CPU and T4 GPU (ms):

-| Model Name                     | Overall  | Text Detector  | Direction Classifier  | Text Recognizer |
-|:-:|:-:|:-:|:-:|:-:|
-| ch\_ppocr\_mobile\_v1\.1 | 137 | 35 | 24    | 78  |
-| ch\_ppocr\_server\_v1\.1 | 204 | 39 | 25    | 140 |
-| ch\_ppocr\_mobile\_v1\.0 | 117 | 41 | \-    | 76  |
-| ch\_ppocr\_server\_v1\.0 | 199 | 52 | \-    | 147 |
+| Model Name    | CPU  | T4 GPU |
+|:-:|:-:|:-:|
+| PP-OCRv2      | 330  | 111 |
+| PP-OCR mobile | 356  | 116|
+| PP-OCR server | 1056 | 200 |

-Compares the time-consuming on CPU (ms):
-
-| Model Name                     | Overall  | Text Detector  | Direction Classifier  | Text Recognizer |
-|:-:|:-:|:-:|:-:|:-:|
-| ch\_ppocr\_mobile\_v1\.1 | 421  | 164 | 51    | 206 |
-| ch\_ppocr\_mobile\_v1\.0 | 398  | 219 | \-    | 179 |
-
-Compares the model size, F-score, the time-consuming on SD 855 of between the slim models and the original models:
-
-| Model Name                          | Model Size <br> of the <br> Whole System\(M\) | Model Size <br>of the Text <br> Detector\(M\) | Model Size <br> of the Direction <br> Classifier\(M\) | Model Size<br>of the Text <br> Recognizer \(M\) | F\-score | SD 855<br>\(ms\) |
-|:-:|:-:|:-:|:-:|:-:|:-:|:-:|
-| ch\_ppocr\_mobile\_v1\.1       | 8\.1        | 2\.6        | 0\.9           | 4\.6        | 0\.5193      | 306          |
-| ch\_ppocr\_mobile\_slim\_v1\.1 | 3\.5        | 1\.4        | 0\.5           | 1\.6        | 0\.521       | 268          |
+More indicators of PP-OCR series models can be referred to [PP-OCR Benchamrk](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.2/doc/doc_en/benchmark_en.md)
--- a/ppocr/data/imaug/ColorJitter.py
+++ b/ppocr/data/imaug/ColorJitter.py
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from paddle.vision.transforms import ColorJitter as pp_ColorJitter
+
+__all__  = ['ColorJitter']
+
+class ColorJitter(object):
+    def __init__(self, brightness=0, contrast=0, saturation=0, hue=0,**kwargs):
+        self.aug = pp_ColorJitter(brightness, contrast, saturation, hue)
+
+    def __call__(self, data):
+        image = data['image']
+        image = self.aug(image)
+        data['image'] = image
+        return data
--- a/ppocr/data/imaug/__init__.py
+++ b/ppocr/data/imaug/__init__.py
@@ -19,11 +19,13 @@ from __future__ import unicode_literals
 from .iaa_augment import IaaAugment
 from .make_border_map import MakeBorderMap
 from .make_shrink_map import MakeShrinkMap
-from .random_crop_data import EastRandomCropData, PSERandomCrop
+from .random_crop_data import EastRandomCropData, RandomCropImgMask
+from .make_pse_gt import MakePseGt

 from .rec_img_aug import RecAug, RecResizeImg, ClsResizeImg, SRNRecResizeImg, NRTRRecResizeImg, SARRecResizeImg
 from .randaugment import RandAugment
 from .copy_paste import CopyPaste
+from .ColorJitter import ColorJitter
 from .operators import *
 from .label_ops import *


--- a/ppocr/data/imaug/label_ops.py
+++ b/ppocr/data/imaug/label_ops.py
@@ -174,21 +174,26 @@ class NRTRLabelEncode(BaseRecLabelEncode):
        super(NRTRLabelEncode,
              self).__init__(max_text_length, character_dict_path,
                             character_type, use_space_char)
+
    def __call__(self, data):
        text = data['label']
        text = self.encode(text)
        if text is None:
            return None
+        if len(text) >= self.max_text_len - 1:
+            return None
        data['length'] = np.array(len(text))
        text.insert(0, 2)
        text.append(3)
        text = text + [0] * (self.max_text_len - len(text))
        data['label'] = np.array(text)
        return data
+
    def add_special_char(self, dict_character):
-        dict_character = ['blank','<unk>','<s>','</s>'] + dict_character
+        dict_character = ['blank', '<unk>', '<s>', '</s>'] + dict_character
        return dict_character

+
 class CTCLabelEncode(BaseRecLabelEncode):
    """ Convert between text-label and text-index """

@@ -588,7 +593,7 @@ class SARLabelEncode(BaseRecLabelEncode):
        data['length'] = np.array(len(text))
        target = [self.start_idx] + text + [self.end_idx]
        padded_text = [self.padding_idx for _ in range(self.max_text_len)]
-        
+
        padded_text[:len(target)] = target
        data['label'] = np.array(padded_text)
        return data

--- a/ppocr/data/imaug/make_pse_gt.py
+++ b/ppocr/data/imaug/make_pse_gt.py
+# -*- coding:utf-8 -*- 
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import cv2
+import numpy as np
+import pyclipper
+from shapely.geometry import Polygon
+
+__all__ = ['MakePseGt']
+
+class MakePseGt(object):
+    r'''
+    Making binary mask from detection data with ICDAR format.
+    Typically following the process of class `MakeICDARData`.
+    '''
+
+    def __init__(self, kernel_num=7, size=640, min_shrink_ratio=0.4, **kwargs):
+        self.kernel_num = kernel_num
+        self.min_shrink_ratio = min_shrink_ratio
+        self.size = size
+
+    def __call__(self, data):
+
+        image = data['image']
+        text_polys = data['polys']
+        ignore_tags = data['ignore_tags']
+
+        h, w, _ = image.shape
+        short_edge = min(h, w)
+        if short_edge < self.size:
+            # keep short_size >= self.size
+            scale = self.size / short_edge
+            image = cv2.resize(image, dsize=None, fx=scale, fy=scale)
+            text_polys *= scale
+
+        gt_kernels = []
+        for i in range(1,self.kernel_num+1):
+            # s1->sn, from big to small
+            rate = 1.0 - (1.0 - self.min_shrink_ratio) / (self.kernel_num - 1) * i
+            text_kernel, ignore_tags = self.generate_kernel(image.shape[0:2], rate, text_polys, ignore_tags)
+            gt_kernels.append(text_kernel)
+
+        training_mask = np.ones(image.shape[0:2], dtype='uint8')
+        for i in range(text_polys.shape[0]):
+            if ignore_tags[i]:
+                cv2.fillPoly(training_mask, text_polys[i].astype(np.int32)[np.newaxis, :, :], 0)
+
+        gt_kernels = np.array(gt_kernels)
+        gt_kernels[gt_kernels > 0] = 1
+
+        data['image'] = image
+        data['polys'] = text_polys
+        data['gt_kernels'] = gt_kernels[0:]
+        data['gt_text'] = gt_kernels[0]
+        data['mask'] = training_mask.astype('float32')
+        return data
+
+    def generate_kernel(self, img_size, shrink_ratio, text_polys, ignore_tags=None):
+        h, w = img_size
+        text_kernel = np.zeros((h, w), dtype=np.float32)
+        for i, poly in enumerate(text_polys):
+            polygon = Polygon(poly)
+            distance = polygon.area * (1 - shrink_ratio * shrink_ratio) / (polygon.length + 1e-6)
+            subject = [tuple(l) for l in poly]
+            pco = pyclipper.PyclipperOffset()
+            pco.AddPath(subject, pyclipper.JT_ROUND,
+                        pyclipper.ET_CLOSEDPOLYGON)
+            shrinked = np.array(pco.Execute(-distance))
+
+            if len(shrinked) == 0 or shrinked.size == 0:
+                if ignore_tags is not None:
+                    ignore_tags[i] = True
+                continue
+            try:
+                shrinked = np.array(shrinked[0]).reshape(-1, 2)
+            except:
+                if ignore_tags is not None:
+                    ignore_tags[i] = True
+                continue
+            cv2.fillPoly(text_kernel, [shrinked.astype(np.int32)], i + 1)
+        return text_kernel, ignore_tags
--- a/ppocr/data/imaug/random_crop_data.py
+++ b/ppocr/data/imaug/random_crop_data.py
@@ -164,47 +164,55 @@ class EastRandomCropData(object):
        return data


-class PSERandomCrop(object):
-    def __init__(self, size, **kwargs):
+class RandomCropImgMask(object):
+    def __init__(self, size, main_key, crop_keys, p=3 / 8, **kwargs):
        self.size = size
+        self.main_key = main_key
+        self.crop_keys = crop_keys
+        self.p = p

    def __call__(self, data):
-        imgs = data['imgs']
+        image = data['image']

-        h, w = imgs[0].shape[0:2]
+        h, w = image.shape[0:2]
        th, tw = self.size
        if w == tw and h == th:
-            return imgs
+            return data

-        # label中存在文本实例，并且按照概率进行裁剪，使用threshold_label_map控制
-        if np.max(imgs[2]) > 0 and random.random() > 3 / 8:
-            # 文本实例的左上角点
-            tl = np.min(np.where(imgs[2] > 0), axis=1) - self.size
+        mask = data[self.main_key]
+        if np.max(mask) > 0 and random.random() > self.p:
+            # make sure to crop the text region
+            tl = np.min(np.where(mask > 0), axis=1) - (th, tw)
            tl[tl < 0] = 0
-            # 文本实例的右下角点
-            br = np.max(np.where(imgs[2] > 0), axis=1) - self.size
+            br = np.max(np.where(mask > 0), axis=1) - (th, tw)
            br[br < 0] = 0
-            # 保证选到右下角点时，有足够的距离进行crop
+
            br[0] = min(br[0], h - th)
            br[1] = min(br[1], w - tw)

-            for _ in range(50000):
-                i = random.randint(tl[0], br[0])
-                j = random.randint(tl[1], br[1])
-                # 保证shrink_label_map有文本
-                if imgs[1][i:i + th, j:j + tw].sum() <= 0:
-                    continue
-                else:
-                    break
+            i = random.randint(tl[0], br[0]) if tl[0] < br[0] else 0
+            j = random.randint(tl[1], br[1]) if tl[1] < br[1] else 0
        else:
-            i = random.randint(0, h - th)
-            j = random.randint(0, w - tw)
+            i = random.randint(0, h - th) if h - th > 0 else 0
+            j = random.randint(0, w - tw) if w - tw > 0 else 0

        # return i, j, th, tw
-        for idx in range(len(imgs)):
-            if len(imgs[idx].shape) == 3:
-                imgs[idx] = imgs[idx][i:i + th, j:j + tw, :]
-            else:
-                imgs[idx] = imgs[idx][i:i + th, j:j + tw]
-        data['imgs'] = imgs
+        for k in data:
+            if k in self.crop_keys:
+                if len(data[k].shape) == 3:
+                    if np.argmin(data[k].shape) == 0:
+                        img = data[k][:, i:i + th, j:j + tw]
+                        if img.shape[1] != img.shape[2]:
+                            a = 1
+                    elif np.argmin(data[k].shape) == 2:
+                        img = data[k][i:i + th, j:j + tw, :]
+                        if img.shape[1] != img.shape[0]:
+                            a = 1
+                    else:
+                        img = data[k]
+                else:
+                    img = data[k][i:i + th, j:j + tw]
+                    if img.shape[0] != img.shape[1]:
+                        a = 1
+                data[k] = img
        return data
--- a/ppocr/data/imaug/rec_img_aug.py
+++ b/ppocr/data/imaug/rec_img_aug.py
@@ -44,12 +44,33 @@ class ClsResizeImg(object):


 class NRTRRecResizeImg(object):
-    def __init__(self, image_shape, resize_type, **kwargs):
+    def __init__(self, image_shape, resize_type, padding=False, **kwargs):
        self.image_shape = image_shape
        self.resize_type = resize_type
+        self.padding = padding

    def __call__(self, data):
        img = data['image']
+        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+        image_shape = self.image_shape
+        if self.padding:
+            imgC, imgH, imgW = image_shape
+            # todo: change to 0 and modified image shape
+            h = img.shape[0]
+            w = img.shape[1]
+            ratio = w / float(h)
+            if math.ceil(imgH * ratio) > imgW:
+                resized_w = imgW
+            else:
+                resized_w = int(math.ceil(imgH * ratio))
+            resized_image = cv2.resize(img, (resized_w, imgH))
+            norm_img = np.expand_dims(resized_image, -1)
+            norm_img = norm_img.transpose((2, 0, 1))
+            resized_image = norm_img.astype(np.float32) / 128. - 1.
+            padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32)
+            padding_im[:, :, 0:resized_w] = resized_image
+            data['image'] = padding_im
+            return data
        if self.resize_type == 'PIL':
            image_pil = Image.fromarray(np.uint8(img))
            img = image_pil.resize(self.image_shape, Image.ANTIALIAS)
@@ -109,7 +130,8 @@ class SARRecResizeImg(object):

    def __call__(self, data):
        img = data['image']
-        norm_img, resize_shape, pad_shape, valid_ratio = resize_norm_img_sar(img, self.image_shape, self.width_downsample_ratio)
+        norm_img, resize_shape, pad_shape, valid_ratio = resize_norm_img_sar(
+            img, self.image_shape, self.width_downsample_ratio)
        data['image'] = norm_img
        data['resized_shape'] = resize_shape
        data['pad_shape'] = pad_shape

--- a/ppocr/data/simple_dataset.py
+++ b/ppocr/data/simple_dataset.py
@@ -15,7 +15,6 @@ import numpy as np
 import os
 import random
 from paddle.io import Dataset
-
 from .imaug import transform, create_operators



--- a/ppocr/losses/__init__.py
+++ b/ppocr/losses/__init__.py
@@ -20,6 +20,7 @@ import paddle.nn as nn
 from .det_db_loss import DBLoss
 from .det_east_loss import EASTLoss
 from .det_sast_loss import SASTLoss
+from .det_pse_loss import PSELoss

 # rec loss
 from .rec_ctc_loss import CTCLoss
@@ -42,10 +43,12 @@ from .combined_loss import CombinedLoss
 # table loss
 from .table_att_loss import TableAttentionLoss

+
 def build_loss(config):
    support_dict = [
-        'DBLoss', 'EASTLoss', 'SASTLoss', 'CTCLoss', 'ClsLoss', 'AttentionLoss',
-        'SRNLoss', 'PGLoss', 'CombinedLoss', 'NRTRLoss', 'TableAttentionLoss', 'SARLoss'
+        'DBLoss', 'PSELoss', 'EASTLoss', 'SASTLoss', 'CTCLoss', 'ClsLoss',
+        'AttentionLoss', 'SRNLoss', 'PGLoss', 'CombinedLoss', 'NRTRLoss',
+        'TableAttentionLoss', 'SARLoss'
    ]

    config = copy.deepcopy(config)

--- a/ppocr/losses/basic_loss.py
+++ b/ppocr/losses/basic_loss.py
@@ -56,31 +56,34 @@ class CELoss(nn.Layer):

 class KLJSLoss(object):
    def __init__(self, mode='kl'):
-        assert mode in ['kl', 'js', 'KL', 'JS'], "mode can only be one of ['kl', 'js', 'KL', 'JS']"
+        assert mode in ['kl', 'js', 'KL', 'JS'
+                        ], "mode can only be one of ['kl', 'js', 'KL', 'JS']"
        self.mode = mode

    def __call__(self, p1, p2, reduction="mean"):

-        loss = paddle.multiply(p2, paddle.log( (p2+1e-5)/(p1+1e-5) + 1e-5))
+        loss = paddle.multiply(p2, paddle.log((p2 + 1e-5) / (p1 + 1e-5) + 1e-5))

        if self.mode.lower() == "js":
-            loss += paddle.multiply(p1, paddle.log((p1+1e-5)/(p2+1e-5) + 1e-5))
+            loss += paddle.multiply(
+                p1, paddle.log((p1 + 1e-5) / (p2 + 1e-5) + 1e-5))
            loss *= 0.5
        if reduction == "mean":
-            loss = paddle.mean(loss, axis=[1,2])
-        elif reduction=="none" or reduction is None:
-            return loss 
+            loss = paddle.mean(loss, axis=[1, 2])
+        elif reduction == "none" or reduction is None:
+            return loss
        else:
-            loss = paddle.sum(loss, axis=[1,2])
+            loss = paddle.sum(loss, axis=[1, 2])
+
+        return loss

-        return loss 

 class DMLLoss(nn.Layer):
    """
    DMLLoss
    """

-    def __init__(self, act=None):
+    def __init__(self, act=None, use_log=False):
        super().__init__()
        if act is not None:
            assert act in ["softmax", "sigmoid"]
@@ -90,20 +93,24 @@ class DMLLoss(nn.Layer):
            self.act = nn.Sigmoid()
        else:
            self.act = None
-        
+
+        self.use_log = use_log
+
        self.jskl_loss = KLJSLoss(mode="js")

    def forward(self, out1, out2):
        if self.act is not None:
            out1 = self.act(out1)
            out2 = self.act(out2)
-        if len(out1.shape) < 2:
+        if self.use_log:
+            # for recognition distillation, log is needed for feature map
            log_out1 = paddle.log(out1)
            log_out2 = paddle.log(out2)
            loss = (F.kl_div(
                log_out1, out2, reduction='batchmean') + F.kl_div(
                    log_out2, out1, reduction='batchmean')) / 2.0
        else:
+            # for detection distillation log is not needed
            loss = self.jskl_loss(out1, out2)
        return loss


--- a/ppocr/losses/combined_loss.py
+++ b/ppocr/losses/combined_loss.py
@@ -49,11 +49,15 @@ class CombinedLoss(nn.Layer):
            loss = loss_func(input, batch, **kargs)
            if isinstance(loss, paddle.Tensor):
                loss = {"loss_{}_{}".format(str(loss), idx): loss}
+
            weight = self.loss_weight[idx]
-            for key in loss.keys():
-                if key == "loss":
-                    loss_all += loss[key] * weight
-                else:
-                    loss_dict["{}_{}".format(key, idx)] = loss[key]
+
+            loss = {key: loss[key] * weight for key in loss}
+
+            if "loss" in loss:
+                loss_all += loss["loss"]
+            else:
+                loss_all += paddle.add_n(list(loss.values()))
+            loss_dict.update(loss)
        loss_dict["loss"] = loss_all
        return loss_dict
--- a/ppocr/losses/det_basic_loss.py
+++ b/ppocr/losses/det_basic_loss.py
@@ -75,12 +75,6 @@ class BalanceLoss(nn.Layer):
            mask (variable): masked maps.
        return: (variable) balanced loss
        """
-        # if self.main_loss_type in ['DiceLoss']:
-        #     # For the loss that returns to scalar value, perform ohem on the mask
-        #     mask = ohem_batch(pred, gt, mask, self.negative_ratio)
-        #     loss = self.loss(pred, gt, mask)
-        #     return loss
-
        positive = gt * mask
        negative = (1 - gt) * mask

@@ -153,53 +147,4 @@ class BCELoss(nn.Layer):

    def forward(self, input, label, mask=None, weight=None, name=None):
        loss = F.binary_cross_entropy(input, label, reduction=self.reduction)
-        return loss
-
-
-def ohem_single(score, gt_text, training_mask, ohem_ratio):
-    pos_num = (int)(np.sum(gt_text > 0.5)) - (
-        int)(np.sum((gt_text > 0.5) & (training_mask <= 0.5)))
-
-    if pos_num == 0:
-        # selected_mask = gt_text.copy() * 0 # may be not good
-        selected_mask = training_mask
-        selected_mask = selected_mask.reshape(
-            1, selected_mask.shape[0], selected_mask.shape[1]).astype('float32')
-        return selected_mask
-
-    neg_num = (int)(np.sum(gt_text <= 0.5))
-    neg_num = (int)(min(pos_num * ohem_ratio, neg_num))
-
-    if neg_num == 0:
-        selected_mask = training_mask
-        selected_mask = selected_mask.reshape(
-            1, selected_mask.shape[0], selected_mask.shape[1]).astype('float32')
-        return selected_mask
-
-    neg_score = score[gt_text <= 0.5]
-    # 将负样本得分从高到低排序
-    neg_score_sorted = np.sort(-neg_score)
-    threshold = -neg_score_sorted[neg_num - 1]
-    # 选出 得分高的 负样本 和正样本 的 mask
-    selected_mask = ((score >= threshold) |
-                     (gt_text > 0.5)) & (training_mask > 0.5)
-    selected_mask = selected_mask.reshape(
-        1, selected_mask.shape[0], selected_mask.shape[1]).astype('float32')
-    return selected_mask
-
-
-def ohem_batch(scores, gt_texts, training_masks, ohem_ratio):
-    scores = scores.numpy()
-    gt_texts = gt_texts.numpy()
-    training_masks = training_masks.numpy()
-
-    selected_masks = []
-    for i in range(scores.shape[0]):
-        selected_masks.append(
-            ohem_single(scores[i, :, :], gt_texts[i, :, :], training_masks[
-                i, :, :], ohem_ratio))
-
-    selected_masks = np.concatenate(selected_masks, 0)
-    selected_masks = paddle.to_tensor(selected_masks)
-
-    return selected_masks
+        return loss
\ No newline at end of file
--- a/ppocr/losses/det_pse_loss.py
+++ b/ppocr/losses/det_pse_loss.py
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle
+from paddle import nn
+from paddle.nn import functional as F
+import numpy as np
+from ppocr.utils.iou import iou
+
+
+class PSELoss(nn.Layer):
+    def __init__(self,
+                 alpha,
+                 ohem_ratio=3,
+                 kernel_sample_mask='pred',
+                 reduction='sum',
+                 eps=1e-6,
+                 **kwargs):
+        """Implement PSE Loss.
+        """
+        super(PSELoss, self).__init__()
+        assert reduction in ['sum', 'mean', 'none']
+        self.alpha = alpha
+        self.ohem_ratio = ohem_ratio
+        self.kernel_sample_mask = kernel_sample_mask
+        self.reduction = reduction
+        self.eps = eps
+
+    def forward(self, outputs, labels):
+        predicts = outputs['maps']
+        predicts = F.interpolate(predicts, scale_factor=4)
+
+        texts = predicts[:, 0, :, :]
+        kernels = predicts[:, 1:, :, :]
+        gt_texts, gt_kernels, training_masks = labels[1:]
+
+        # text loss
+        selected_masks = self.ohem_batch(texts, gt_texts, training_masks)
+
+        loss_text = self.dice_loss(texts, gt_texts, selected_masks)
+        iou_text = iou((texts > 0).astype('int64'),
+                       gt_texts,
+                       training_masks,
+                       reduce=False)
+        losses = dict(loss_text=loss_text, iou_text=iou_text)
+
+        # kernel loss
+        loss_kernels = []
+        if self.kernel_sample_mask == 'gt':
+            selected_masks = gt_texts * training_masks
+        elif self.kernel_sample_mask == 'pred':
+            selected_masks = (
+                F.sigmoid(texts) > 0.5).astype('float32') * training_masks
+
+        for i in range(kernels.shape[1]):
+            kernel_i = kernels[:, i, :, :]
+            gt_kernel_i = gt_kernels[:, i, :, :]
+            loss_kernel_i = self.dice_loss(kernel_i, gt_kernel_i,
+                                           selected_masks)
+            loss_kernels.append(loss_kernel_i)
+        loss_kernels = paddle.mean(paddle.stack(loss_kernels, axis=1), axis=1)
+        iou_kernel = iou((kernels[:, -1, :, :] > 0).astype('int64'),
+                         gt_kernels[:, -1, :, :],
+                         training_masks * gt_texts,
+                         reduce=False)
+        losses.update(dict(loss_kernels=loss_kernels, iou_kernel=iou_kernel))
+        loss = self.alpha * loss_text + (1 - self.alpha) * loss_kernels
+        losses['loss'] = loss
+        if self.reduction == 'sum':
+            losses = {x: paddle.sum(v) for x, v in losses.items()}
+        elif self.reduction == 'mean':
+            losses = {x: paddle.mean(v) for x, v in losses.items()}
+        return losses
+
+    def dice_loss(self, input, target, mask):
+        input = F.sigmoid(input)
+
+        input = input.reshape([input.shape[0], -1])
+        target = target.reshape([target.shape[0], -1])
+        mask = mask.reshape([mask.shape[0], -1])
+
+        input = input * mask
+        target = target * mask
+
+        a = paddle.sum(input * target, 1)
+        b = paddle.sum(input * input, 1) + self.eps
+        c = paddle.sum(target * target, 1) + self.eps
+        d = (2 * a) / (b + c)
+        return 1 - d
+
+    def ohem_single(self, score, gt_text, training_mask, ohem_ratio=3):
+        pos_num = int(paddle.sum((gt_text > 0.5).astype('float32'))) - int(
+            paddle.sum(
+                paddle.logical_and((gt_text > 0.5), (training_mask <= 0.5))
+                .astype('float32')))
+
+        if pos_num == 0:
+            selected_mask = training_mask
+            selected_mask = selected_mask.reshape(
+                [1, selected_mask.shape[0], selected_mask.shape[1]]).astype(
+                    'float32')
+            return selected_mask
+
+        neg_num = int(paddle.sum((gt_text <= 0.5).astype('float32')))
+        neg_num = int(min(pos_num * ohem_ratio, neg_num))
+
+        if neg_num == 0:
+            selected_mask = training_mask
+            selected_mask = selected_mask.view(
+                1, selected_mask.shape[0],
+                selected_mask.shape[1]).astype('float32')
+            return selected_mask
+
+        neg_score = paddle.masked_select(score, gt_text <= 0.5)
+        neg_score_sorted = paddle.sort(-neg_score)
+        threshold = -neg_score_sorted[neg_num - 1]
+
+        selected_mask = paddle.logical_and(
+            paddle.logical_or((score >= threshold), (gt_text > 0.5)),
+            (training_mask > 0.5))
+        selected_mask = selected_mask.reshape(
+            [1, selected_mask.shape[0], selected_mask.shape[1]]).astype(
+                'float32')
+        return selected_mask
+
+    def ohem_batch(self, scores, gt_texts, training_masks, ohem_ratio=3):
+        selected_masks = []
+        for i in range(scores.shape[0]):
+            selected_masks.append(
+                self.ohem_single(scores[i, :, :], gt_texts[i, :, :],
+                                 training_masks[i, :, :], ohem_ratio))
+
+        selected_masks = paddle.concat(selected_masks, 0).astype('float32')
+        return selected_masks
--- a/ppocr/losses/distillation_loss.py
+++ b/ppocr/losses/distillation_loss.py
@@ -44,20 +44,22 @@ class DistillationDMLLoss(DMLLoss):
    def __init__(self,
                 model_name_pairs=[],
                 act=None,
+                 use_log=False,
                 key=None,
                 maps_name=None,
                 name="dml"):
-        super().__init__(act=act)
+        super().__init__(act=act, use_log=use_log)
        assert isinstance(model_name_pairs, list)
        self.key = key
        self.model_name_pairs = self._check_model_name_pairs(model_name_pairs)
        self.name = name
        self.maps_name = self._check_maps_name(maps_name)
-    
+
    def _check_model_name_pairs(self, model_name_pairs):
        if not isinstance(model_name_pairs, list):
            return []
-        elif isinstance(model_name_pairs[0], list) and isinstance(model_name_pairs[0][0], str):
+        elif isinstance(model_name_pairs[0], list) and isinstance(
+                model_name_pairs[0][0], str):
            return model_name_pairs
        else:
            return [model_name_pairs]
@@ -112,9 +114,9 @@ class DistillationDMLLoss(DMLLoss):
                            loss_dict["{}_{}_{}_{}_{}".format(key, pair[
                                0], pair[1], map_name, idx)] = loss[key]
                    else:
-                        loss_dict["{}_{}_{}".format(self.name, self.maps_name[_c],
-                                                    idx)] = loss
-        
+                        loss_dict["{}_{}_{}".format(self.name, self.maps_name[
+                            _c], idx)] = loss
+
        loss_dict = _sum_loss(loss_dict)

        return loss_dict

--- a/ppocr/metrics/eval_det_iou.py
+++ b/ppocr/metrics/eval_det_iou.py
@@ -169,21 +169,10 @@ class DetectionIoUEvaluator(object):
        numGlobalCareDet += numDetCare

        perSampleMetrics = {
-            'precision': precision,
-            'recall': recall,
-            'hmean': hmean,
-            'pairs': pairs,
-            'iouMat': [] if len(detPols) > 100 else iouMat.tolist(),
-            'gtPolPoints': gtPolPoints,
-            'detPolPoints': detPolPoints,
            'gtCare': numGtCare,
            'detCare': numDetCare,
-            'gtDontCare': gtDontCarePolsNum,
-            'detDontCare': detDontCarePolsNum,
            'detMatched': detMatched,
-            'evaluationLog': evaluationLog
        }
-
        return perSampleMetrics

    def combine_results(self, results):

--- a/ppocr/modeling/backbones/rec_nrtr_mtb.py
+++ b/ppocr/modeling/backbones/rec_nrtr_mtb.py
@@ -13,6 +13,7 @@
 # limitations under the License.

 from paddle import nn
+import paddle


 class MTB(nn.Layer):
@@ -40,7 +41,8 @@ class MTB(nn.Layer):
        x = self.block(images)
        if self.cnn_num == 2:
            # (b, w, h, c)
-            x = x.transpose([0, 3, 2, 1])
-            x_shape = x.shape
-            x = x.reshape([x_shape[0], x_shape[1], x_shape[2] * x_shape[3]])
+            x = paddle.transpose(x, [0, 3, 2, 1])
+            x_shape = paddle.shape(x)
+            x = paddle.reshape(
+                x, [x_shape[0], x_shape[1], x_shape[2] * x_shape[3]])
        return x
--- a/ppocr/modeling/heads/__init__.py
+++ b/ppocr/modeling/heads/__init__.py
@@ -20,6 +20,7 @@ def build_head(config):
    from .det_db_head import DBHead
    from .det_east_head import EASTHead
    from .det_sast_head import SASTHead
+    from .det_pse_head import PSEHead
    from .e2e_pg_head import PGHead

    # rec head
@@ -32,8 +33,9 @@ def build_head(config):
    # cls head
    from .cls_head import ClsHead
    support_dict = [
-        'DBHead', 'EASTHead', 'SASTHead', 'CTCHead', 'ClsHead', 'AttentionHead',
-        'SRNHead', 'PGHead', 'Transformer', 'TableAttentionHead', 'SARHead'
+        'DBHead', 'PSEHead', 'EASTHead', 'SASTHead', 'CTCHead', 'ClsHead',
+        'AttentionHead', 'SRNHead', 'PGHead', 'Transformer',
+        'TableAttentionHead', 'SARHead'
    ]

    #table head

--- a/ppocr/modeling/heads/det_pse_head.py
+++ b/ppocr/modeling/heads/det_pse_head.py
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from paddle import nn
+
+
+class PSEHead(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 hidden_dim=256,
+                 out_channels=7,
+                 **kwargs):
+        super(PSEHead, self).__init__()
+        self.conv1 = nn.Conv2D(in_channels, hidden_dim, kernel_size=3, stride=1, padding=1)
+        self.bn1 = nn.BatchNorm2D(hidden_dim)
+        self.relu1 = nn.ReLU()
+
+        self.conv2 = nn.Conv2D(hidden_dim, out_channels, kernel_size=1, stride=1, padding=0)
+
+
+    def forward(self, x, **kwargs):
+        out = self.conv1(x)
+        out = self.relu1(self.bn1(out))
+        out = self.conv2(out)
+        return {'maps': out}
--- a/ppocr/modeling/heads/multiheadAttention.py
+++ b/ppocr/modeling/heads/multiheadAttention.py
@@ -71,8 +71,6 @@ class MultiheadAttention(nn.Layer):
                value,
                key_padding_mask=None,
                incremental_state=None,
-                need_weights=True,
-                static_kv=False,
                attn_mask=None):
        """
        Inputs of forward function
@@ -88,46 +86,42 @@ class MultiheadAttention(nn.Layer):
            attn_output: [target length, batch size, embed dim]
            attn_output_weights: [batch size, target length, sequence length]
        """
-        tgt_len, bsz, embed_dim = query.shape
-        assert embed_dim == self.embed_dim
-        assert list(query.shape) == [tgt_len, bsz, embed_dim]
-        assert key.shape == value.shape
-
+        q_shape = paddle.shape(query)
+        src_shape = paddle.shape(key)
        q = self._in_proj_q(query)
        k = self._in_proj_k(key)
        v = self._in_proj_v(value)
        q *= self.scaling
-
-        q = q.reshape([tgt_len, bsz * self.num_heads, self.head_dim]).transpose(
-            [1, 0, 2])
-        k = k.reshape([-1, bsz * self.num_heads, self.head_dim]).transpose(
-            [1, 0, 2])
-        v = v.reshape([-1, bsz * self.num_heads, self.head_dim]).transpose(
-            [1, 0, 2])
-
-        src_len = k.shape[1]
-
+        q = paddle.transpose(
+            paddle.reshape(
+                q, [q_shape[0], q_shape[1], self.num_heads, self.head_dim]),
+            [1, 2, 0, 3])
+        k = paddle.transpose(
+            paddle.reshape(
+                k, [src_shape[0], q_shape[1], self.num_heads, self.head_dim]),
+            [1, 2, 0, 3])
+        v = paddle.transpose(
+            paddle.reshape(
+                v, [src_shape[0], q_shape[1], self.num_heads, self.head_dim]),
+            [1, 2, 0, 3])
        if key_padding_mask is not None:
-            assert key_padding_mask.shape[0] == bsz
-            assert key_padding_mask.shape[1] == src_len
-
-        attn_output_weights = paddle.bmm(q, k.transpose([0, 2, 1]))
-        assert list(attn_output_weights.
-                    shape) == [bsz * self.num_heads, tgt_len, src_len]
-
+            assert key_padding_mask.shape[0] == q_shape[1]
+            assert key_padding_mask.shape[1] == src_shape[0]
+        attn_output_weights = paddle.matmul(q,
+                                            paddle.transpose(k, [0, 1, 3, 2]))
        if attn_mask is not None:
-            attn_mask = attn_mask.unsqueeze(0)
+            attn_mask = paddle.unsqueeze(paddle.unsqueeze(attn_mask, 0), 0)
            attn_output_weights += attn_mask
        if key_padding_mask is not None:
-            attn_output_weights = attn_output_weights.reshape(
-                [bsz, self.num_heads, tgt_len, src_len])
-            key = key_padding_mask.unsqueeze(1).unsqueeze(2).astype('float32')
-            y = paddle.full(shape=key.shape, dtype='float32', fill_value='-inf')
+            attn_output_weights = paddle.reshape(
+                attn_output_weights,
+                [q_shape[1], self.num_heads, q_shape[0], src_shape[0]])
+            key = paddle.unsqueeze(paddle.unsqueeze(key_padding_mask, 1), 2)
+            key = paddle.cast(key, 'float32')
+            y = paddle.full(
+                shape=paddle.shape(key), dtype='float32', fill_value='-inf')
            y = paddle.where(key == 0., key, y)
            attn_output_weights += y
-            attn_output_weights = attn_output_weights.reshape(
-                [bsz * self.num_heads, tgt_len, src_len])
-
        attn_output_weights = F.softmax(
            attn_output_weights.astype('float32'),
            axis=-1,
@@ -136,43 +130,34 @@ class MultiheadAttention(nn.Layer):
        attn_output_weights = F.dropout(
            attn_output_weights, p=self.dropout, training=self.training)

-        attn_output = paddle.bmm(attn_output_weights, v)
-        assert list(attn_output.
-                    shape) == [bsz * self.num_heads, tgt_len, self.head_dim]
-        attn_output = attn_output.transpose([1, 0, 2]).reshape(
-            [tgt_len, bsz, embed_dim])
+        attn_output = paddle.matmul(attn_output_weights, v)
+        attn_output = paddle.reshape(
+            paddle.transpose(attn_output, [2, 0, 1, 3]),
+            [q_shape[0], q_shape[1], self.embed_dim])
        attn_output = self.out_proj(attn_output)

-        if need_weights:
-            # average attention weights over heads
-            attn_output_weights = attn_output_weights.reshape(
-                [bsz, self.num_heads, tgt_len, src_len])
-            attn_output_weights = attn_output_weights.sum(
-                axis=1) / self.num_heads
-        else:
-            attn_output_weights = None
-        return attn_output, attn_output_weights
+        return attn_output

    def _in_proj_q(self, query):
-        query = query.transpose([1, 2, 0])
+        query = paddle.transpose(query, [1, 2, 0])
        query = paddle.unsqueeze(query, axis=2)
        res = self.conv1(query)
        res = paddle.squeeze(res, axis=2)
-        res = res.transpose([2, 0, 1])
+        res = paddle.transpose(res, [2, 0, 1])
        return res

    def _in_proj_k(self, key):
-        key = key.transpose([1, 2, 0])
+        key = paddle.transpose(key, [1, 2, 0])
        key = paddle.unsqueeze(key, axis=2)
        res = self.conv2(key)
        res = paddle.squeeze(res, axis=2)
-        res = res.transpose([2, 0, 1])
+        res = paddle.transpose(res, [2, 0, 1])
        return res

    def _in_proj_v(self, value):
-        value = value.transpose([1, 2, 0])  #(1, 2, 0)
+        value = paddle.transpose(value, [1, 2, 0])  #(1, 2, 0)
        value = paddle.unsqueeze(value, axis=2)
        res = self.conv3(value)
        res = paddle.squeeze(res, axis=2)
-        res = res.transpose([2, 0, 1])
+        res = paddle.transpose(res, [2, 0, 1])
        return res
--- a/ppocr/modeling/heads/rec_nrtr_head.py
+++ b/ppocr/modeling/heads/rec_nrtr_head.py
@@ -61,12 +61,12 @@ class Transformer(nn.Layer):
                 custom_decoder=None,
                 in_channels=0,
                 out_channels=0,
-                 dst_vocab_size=99,
                 scale_embedding=True):
        super(Transformer, self).__init__()
+        self.out_channels = out_channels + 1
        self.embedding = Embeddings(
            d_model=d_model,
-            vocab=dst_vocab_size,
+            vocab=self.out_channels,
            padding_idx=0,
            scale_embedding=scale_embedding)
        self.positional_encoding = PositionalEncoding(
@@ -96,9 +96,10 @@ class Transformer(nn.Layer):
        self.beam_size = beam_size
        self.d_model = d_model
        self.nhead = nhead
-        self.tgt_word_prj = nn.Linear(d_model, dst_vocab_size, bias_attr=False)
+        self.tgt_word_prj = nn.Linear(
+            d_model, self.out_channels, bias_attr=False)
        w0 = np.random.normal(0.0, d_model**-0.5,
-                              (d_model, dst_vocab_size)).astype(np.float32)
+                              (d_model, self.out_channels)).astype(np.float32)
        self.tgt_word_prj.weight.set_value(w0)
        self.apply(self._init_weights)

@@ -156,46 +157,41 @@ class Transformer(nn.Layer):
                return self.forward_test(src)

    def forward_test(self, src):
-        bs = src.shape[0]
+        bs = paddle.shape(src)[0]
        if self.encoder is not None:
-            src = self.positional_encoding(src.transpose([1, 0, 2]))
+            src = self.positional_encoding(paddle.transpose(src, [1, 0, 2]))
            memory = self.encoder(src)
        else:
-            memory = src.squeeze(2).transpose([2, 0, 1])
+            memory = paddle.transpose(paddle.squeeze(src, 2), [2, 0, 1])
        dec_seq = paddle.full((bs, 1), 2, dtype=paddle.int64)
+        dec_prob = paddle.full((bs, 1), 1., dtype=paddle.float32)
        for len_dec_seq in range(1, 25):
-            src_enc = memory.clone()
-            tgt_key_padding_mask = self.generate_padding_mask(dec_seq)
-            dec_seq_embed = self.embedding(dec_seq).transpose([1, 0, 2])
+            dec_seq_embed = paddle.transpose(self.embedding(dec_seq), [1, 0, 2])
            dec_seq_embed = self.positional_encoding(dec_seq_embed)
-            tgt_mask = self.generate_square_subsequent_mask(dec_seq_embed.shape[
-                0])
+            tgt_mask = self.generate_square_subsequent_mask(
+                paddle.shape(dec_seq_embed)[0])
            output = self.decoder(
                dec_seq_embed,
-                src_enc,
+                memory,
                tgt_mask=tgt_mask,
                memory_mask=None,
-                tgt_key_padding_mask=tgt_key_padding_mask,
+                tgt_key_padding_mask=None,
                memory_key_padding_mask=None)
-            dec_output = output.transpose([1, 0, 2])
-
-            dec_output = dec_output[:,
-                                    -1, :]  # Pick the last step: (bh * bm) * d_h
-            word_prob = F.log_softmax(self.tgt_word_prj(dec_output), axis=1)
-            word_prob = word_prob.reshape([1, bs, -1])
-            preds_idx = word_prob.argmax(axis=2)
-
+            dec_output = paddle.transpose(output, [1, 0, 2])
+            dec_output = dec_output[:, -1, :]
+            word_prob = F.softmax(self.tgt_word_prj(dec_output), axis=1)
+            preds_idx = paddle.argmax(word_prob, axis=1)
            if paddle.equal_all(
-                    preds_idx[-1],
+                    preds_idx,
                    paddle.full(
-                        preds_idx[-1].shape, 3, dtype='int64')):
+                        paddle.shape(preds_idx), 3, dtype='int64')):
                break
-
-            preds_prob = word_prob.max(axis=2)
+            preds_prob = paddle.max(word_prob, axis=1)
            dec_seq = paddle.concat(
-                [dec_seq, preds_idx.reshape([-1, 1])], axis=1)
-
-        return dec_seq
+                [dec_seq, paddle.reshape(preds_idx, [-1, 1])], axis=1)
+            dec_prob = paddle.concat(
+                [dec_prob, paddle.reshape(preds_prob, [-1, 1])], axis=1)
+        return [dec_seq, dec_prob]

    def forward_beam(self, images):
        ''' Translation work in one batch '''
@@ -211,14 +207,15 @@ class Transformer(nn.Layer):
                                n_prev_active_inst, n_bm):
            ''' Collect tensor parts associated to active instances. '''

-            _, *d_hs = beamed_tensor.shape
+            beamed_tensor_shape = paddle.shape(beamed_tensor)
            n_curr_active_inst = len(curr_active_inst_idx)
-            new_shape = (n_curr_active_inst * n_bm, *d_hs)
+            new_shape = (n_curr_active_inst * n_bm, beamed_tensor_shape[1],
+                         beamed_tensor_shape[2])

            beamed_tensor = beamed_tensor.reshape([n_prev_active_inst, -1])
            beamed_tensor = beamed_tensor.index_select(
-                paddle.to_tensor(curr_active_inst_idx), axis=0)
-            beamed_tensor = beamed_tensor.reshape([*new_shape])
+                curr_active_inst_idx, axis=0)
+            beamed_tensor = beamed_tensor.reshape(new_shape)

            return beamed_tensor

@@ -249,44 +246,26 @@ class Transformer(nn.Layer):
                    b.get_current_state() for b in inst_dec_beams if not b.done
                ]
                dec_partial_seq = paddle.stack(dec_partial_seq)
-
                dec_partial_seq = dec_partial_seq.reshape([-1, len_dec_seq])
                return dec_partial_seq

-            def prepare_beam_memory_key_padding_mask(
-                    inst_dec_beams, memory_key_padding_mask, n_bm):
-                keep = []
-                for idx in (memory_key_padding_mask):
-                    if not inst_dec_beams[idx].done:
-                        keep.append(idx)
-                memory_key_padding_mask = memory_key_padding_mask[
-                    paddle.to_tensor(keep)]
-                len_s = memory_key_padding_mask.shape[-1]
-                n_inst = memory_key_padding_mask.shape[0]
-                memory_key_padding_mask = paddle.concat(
-                    [memory_key_padding_mask for i in range(n_bm)], axis=1)
-                memory_key_padding_mask = memory_key_padding_mask.reshape(
-                    [n_inst * n_bm, len_s])  #repeat(1, n_bm)
-                return memory_key_padding_mask
-
            def predict_word(dec_seq, enc_output, n_active_inst, n_bm,
                             memory_key_padding_mask):
-                tgt_key_padding_mask = self.generate_padding_mask(dec_seq)
-                dec_seq = self.embedding(dec_seq).transpose([1, 0, 2])
+                dec_seq = paddle.transpose(self.embedding(dec_seq), [1, 0, 2])
                dec_seq = self.positional_encoding(dec_seq)
-                tgt_mask = self.generate_square_subsequent_mask(dec_seq.shape[
-                    0])
+                tgt_mask = self.generate_square_subsequent_mask(
+                    paddle.shape(dec_seq)[0])
                dec_output = self.decoder(
                    dec_seq,
                    enc_output,
                    tgt_mask=tgt_mask,
-                    tgt_key_padding_mask=tgt_key_padding_mask,
-                    memory_key_padding_mask=memory_key_padding_mask,
-                ).transpose([1, 0, 2])
+                    tgt_key_padding_mask=None,
+                    memory_key_padding_mask=memory_key_padding_mask, )
+                dec_output = paddle.transpose(dec_output, [1, 0, 2])
                dec_output = dec_output[:,
                                        -1, :]  # Pick the last step: (bh * bm) * d_h
-                word_prob = F.log_softmax(self.tgt_word_prj(dec_output), axis=1)
-                word_prob = word_prob.reshape([n_active_inst, n_bm, -1])
+                word_prob = F.softmax(self.tgt_word_prj(dec_output), axis=1)
+                word_prob = paddle.reshape(word_prob, [n_active_inst, n_bm, -1])
                return word_prob

            def collect_active_inst_idx_list(inst_beams, word_prob,
@@ -302,9 +281,8 @@ class Transformer(nn.Layer):

            n_active_inst = len(inst_idx_to_position_map)
            dec_seq = prepare_beam_dec_seq(inst_dec_beams, len_dec_seq)
-            memory_key_padding_mask = None
            word_prob = predict_word(dec_seq, enc_output, n_active_inst, n_bm,
-                                     memory_key_padding_mask)
+                                     None)
            # Update the beam with predicted word prob information and collect incomplete instances
            active_inst_idx_list = collect_active_inst_idx_list(
                inst_dec_beams, word_prob, inst_idx_to_position_map)
@@ -324,27 +302,21 @@ class Transformer(nn.Layer):

        with paddle.no_grad():
            #-- Encode
-
            if self.encoder is not None:
                src = self.positional_encoding(images.transpose([1, 0, 2]))
-                src_enc = self.encoder(src).transpose([1, 0, 2])
+                src_enc = self.encoder(src)
            else:
                src_enc = images.squeeze(2).transpose([0, 2, 1])

-            #-- Repeat data for beam search
            n_bm = self.beam_size
-            n_inst, len_s, d_h = src_enc.shape
-            src_enc = paddle.concat([src_enc for i in range(n_bm)], axis=1)
-            src_enc = src_enc.reshape([n_inst * n_bm, len_s, d_h]).transpose(
-                [1, 0, 2])
-            #-- Prepare beams
-            inst_dec_beams = [Beam(n_bm) for _ in range(n_inst)]
-
-            #-- Bookkeeping for active or not
-            active_inst_idx_list = list(range(n_inst))
+            src_shape = paddle.shape(src_enc)
+            inst_dec_beams = [Beam(n_bm) for _ in range(1)]
+            active_inst_idx_list = list(range(1))
+            # Repeat data for beam search
+            src_enc = paddle.tile(src_enc, [1, n_bm, 1])
            inst_idx_to_position_map = get_inst_idx_to_tensor_position_map(
                active_inst_idx_list)
-            #-- Decode
+            # Decode
            for len_dec_seq in range(1, 25):
                src_enc_copy = src_enc.clone()
                active_inst_idx_list = beam_decode_step(
@@ -358,10 +330,19 @@ class Transformer(nn.Layer):
        batch_hyp, batch_scores = collect_hypothesis_and_scores(inst_dec_beams,
                                                                1)
        result_hyp = []
-        for bs_hyp in batch_hyp:
-            bs_hyp_pad = bs_hyp[0] + [3] * (25 - len(bs_hyp[0]))
+        hyp_scores = []
+        for bs_hyp, score in zip(batch_hyp, batch_scores):
+            l = len(bs_hyp[0])
+            bs_hyp_pad = bs_hyp[0] + [3] * (25 - l)
            result_hyp.append(bs_hyp_pad)
-        return paddle.to_tensor(np.array(result_hyp), dtype=paddle.int64)
+            score = float(score) / l
+            hyp_score = [score for _ in range(25)]
+            hyp_scores.append(hyp_score)
+        return [
+            paddle.to_tensor(
+                np.array(result_hyp), dtype=paddle.int64),
+            paddle.to_tensor(hyp_scores)
+        ]

    def generate_square_subsequent_mask(self, sz):
        """Generate a square mask for the sequence. The masked positions are filled with float('-inf').
@@ -376,7 +357,7 @@ class Transformer(nn.Layer):
        return mask

    def generate_padding_mask(self, x):
-        padding_mask = x.equal(paddle.to_tensor(0, dtype=x.dtype))
+        padding_mask = paddle.equal(x, paddle.to_tensor(0, dtype=x.dtype))
        return padding_mask

    def _reset_parameters(self):
@@ -514,17 +495,17 @@ class TransformerEncoderLayer(nn.Layer):
            src,
            src,
            attn_mask=src_mask,
-            key_padding_mask=src_key_padding_mask)[0]
+            key_padding_mask=src_key_padding_mask)
        src = src + self.dropout1(src2)
        src = self.norm1(src)

-        src = src.transpose([1, 2, 0])
+        src = paddle.transpose(src, [1, 2, 0])
        src = paddle.unsqueeze(src, 2)
        src2 = self.conv2(F.relu(self.conv1(src)))
        src2 = paddle.squeeze(src2, 2)
-        src2 = src2.transpose([2, 0, 1])
+        src2 = paddle.transpose(src2, [2, 0, 1])
        src = paddle.squeeze(src, 2)
-        src = src.transpose([2, 0, 1])
+        src = paddle.transpose(src, [2, 0, 1])

        src = src + self.dropout2(src2)
        src = self.norm2(src)
@@ -598,7 +579,7 @@ class TransformerDecoderLayer(nn.Layer):
            tgt,
            tgt,
            attn_mask=tgt_mask,
-            key_padding_mask=tgt_key_padding_mask)[0]
+            key_padding_mask=tgt_key_padding_mask)
        tgt = tgt + self.dropout1(tgt2)
        tgt = self.norm1(tgt)
        tgt2 = self.multihead_attn(
@@ -606,18 +587,18 @@ class TransformerDecoderLayer(nn.Layer):
            memory,
            memory,
            attn_mask=memory_mask,
-            key_padding_mask=memory_key_padding_mask)[0]
+            key_padding_mask=memory_key_padding_mask)
        tgt = tgt + self.dropout2(tgt2)
        tgt = self.norm2(tgt)

        # default
-        tgt = tgt.transpose([1, 2, 0])
+        tgt = paddle.transpose(tgt, [1, 2, 0])
        tgt = paddle.unsqueeze(tgt, 2)
        tgt2 = self.conv2(F.relu(self.conv1(tgt)))
        tgt2 = paddle.squeeze(tgt2, 2)
-        tgt2 = tgt2.transpose([2, 0, 1])
+        tgt2 = paddle.transpose(tgt2, [2, 0, 1])
        tgt = paddle.squeeze(tgt, 2)
-        tgt = tgt.transpose([2, 0, 1])
+        tgt = paddle.transpose(tgt, [2, 0, 1])

        tgt = tgt + self.dropout3(tgt2)
        tgt = self.norm3(tgt)
@@ -656,8 +637,8 @@ class PositionalEncoding(nn.Layer):
            (-math.log(10000.0) / dim))
        pe[:, 0::2] = paddle.sin(position * div_term)
        pe[:, 1::2] = paddle.cos(position * div_term)
-        pe = pe.unsqueeze(0)
-        pe = pe.transpose([1, 0, 2])
+        pe = paddle.unsqueeze(pe, 0)
+        pe = paddle.transpose(pe, [1, 0, 2])
        self.register_buffer('pe', pe)

    def forward(self, x):
@@ -670,7 +651,7 @@ class PositionalEncoding(nn.Layer):
        Examples:
            >>> output = pos_encoder(x)
        """
-        x = x + self.pe[:x.shape[0], :]
+        x = x + self.pe[:paddle.shape(x)[0], :]
        return self.dropout(x)


@@ -702,7 +683,7 @@ class PositionalEncoding_2d(nn.Layer):
            (-math.log(10000.0) / dim))
        pe[:, 0::2] = paddle.sin(position * div_term)
        pe[:, 1::2] = paddle.cos(position * div_term)
-        pe = pe.unsqueeze(0).transpose([1, 0, 2])
+        pe = paddle.transpose(paddle.unsqueeze(pe, 0), [1, 0, 2])
        self.register_buffer('pe', pe)

        self.avg_pool_1 = nn.AdaptiveAvgPool2D((1, 1))
@@ -722,22 +703,23 @@ class PositionalEncoding_2d(nn.Layer):
        Examples:
            >>> output = pos_encoder(x)
        """
-        w_pe = self.pe[:x.shape[-1], :]
+        w_pe = self.pe[:paddle.shape(x)[-1], :]
        w1 = self.linear1(self.avg_pool_1(x).squeeze()).unsqueeze(0)
        w_pe = w_pe * w1
-        w_pe = w_pe.transpose([1, 2, 0])
-        w_pe = w_pe.unsqueeze(2)
+        w_pe = paddle.transpose(w_pe, [1, 2, 0])
+        w_pe = paddle.unsqueeze(w_pe, 2)

-        h_pe = self.pe[:x.shape[-2], :]
+        h_pe = self.pe[:paddle.shape(x).shape[-2], :]
        w2 = self.linear2(self.avg_pool_2(x).squeeze()).unsqueeze(0)
        h_pe = h_pe * w2
-        h_pe = h_pe.transpose([1, 2, 0])
-        h_pe = h_pe.unsqueeze(3)
+        h_pe = paddle.transpose(h_pe, [1, 2, 0])
+        h_pe = paddle.unsqueeze(h_pe, 3)

        x = x + w_pe + h_pe
-        x = x.reshape(
-            [x.shape[0], x.shape[1], x.shape[2] * x.shape[3]]).transpose(
-                [2, 0, 1])
+        x = paddle.transpose(
+            paddle.reshape(x,
+                           [x.shape[0], x.shape[1], x.shape[2] * x.shape[3]]),
+            [2, 0, 1])

        return self.dropout(x)

@@ -817,7 +799,7 @@ class Beam():
    def sort_scores(self):
        "Sort the scores."
        return self.scores, paddle.to_tensor(
-            [i for i in range(self.scores.shape[0])], dtype='int32')
+            [i for i in range(int(self.scores.shape[0]))], dtype='int32')

    def get_the_best_score_and_idx(self):
        "Get the score of the best in the beam."

--- a/ppocr/modeling/heads/rec_sar_head.py
+++ b/ppocr/modeling/heads/rec_sar_head.py
@@ -235,7 +235,8 @@ class ParallelSARDecoder(BaseDecoder):
            # cal mask of attention weight
            for i, valid_ratio in enumerate(valid_ratios):
                valid_width = min(w, math.ceil(w * valid_ratio))
-                attn_weight[i, :, :, valid_width:, :] = float('-inf')
+                if valid_width < w:
+                    attn_weight[i, :, :, valid_width:, :] = float('-inf')

        attn_weight = paddle.reshape(attn_weight, [bsz, T, -1])
        attn_weight = F.softmax(attn_weight, axis=-1)

--- a/ppocr/modeling/necks/__init__.py
+++ b/ppocr/modeling/necks/__init__.py
@@ -22,7 +22,8 @@ def build_neck(config):
    from .rnn import SequenceEncoder
    from .pg_fpn import PGFPN
    from .table_fpn import TableFPN
-    support_dict = ['DBFPN', 'EASTFPN', 'SASTFPN', 'SequenceEncoder', 'PGFPN', 'TableFPN']
+    from .fpn import FPN
+    support_dict = ['FPN','DBFPN', 'EASTFPN', 'SASTFPN', 'SequenceEncoder', 'PGFPN', 'TableFPN']

    module_name = config.pop('name')
    assert module_name in support_dict, Exception('neck only support {}'.format(

--- a/ppocr/modeling/necks/fpn.py
+++ b/ppocr/modeling/necks/fpn.py
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle.nn as nn
+import paddle
+import math
+import paddle.nn.functional as F
+
+class Conv_BN_ReLU(nn.Layer):
+    def __init__(self, in_planes, out_planes, kernel_size=1, stride=1, padding=0):
+        super(Conv_BN_ReLU, self).__init__()
+        self.conv = nn.Conv2D(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=padding,
+                              bias_attr=False)
+        self.bn = nn.BatchNorm2D(out_planes, momentum=0.1)
+        self.relu = nn.ReLU()
+
+        for m in self.sublayers():
+            if isinstance(m, nn.Conv2D):
+                n = m._kernel_size[0] * m._kernel_size[1] * m._out_channels
+                m.weight = paddle.create_parameter(shape=m.weight.shape, dtype='float32', default_initializer=paddle.nn.initializer.Normal(0, math.sqrt(2. / n)))
+            elif isinstance(m, nn.BatchNorm2D):
+                m.weight = paddle.create_parameter(shape=m.weight.shape, dtype='float32', default_initializer=paddle.nn.initializer.Constant(1.0))
+                m.bias = paddle.create_parameter(shape=m.bias.shape, dtype='float32', default_initializer=paddle.nn.initializer.Constant(0.0))
+
+    def forward(self, x):
+        return self.relu(self.bn(self.conv(x)))
+
+class FPN(nn.Layer):
+    def __init__(self, in_channels, out_channels):
+        super(FPN, self).__init__()
+
+        # Top layer
+        self.toplayer_ = Conv_BN_ReLU(in_channels[3], out_channels, kernel_size=1, stride=1, padding=0)
+        # Lateral layers
+        self.latlayer1_ = Conv_BN_ReLU(in_channels[2], out_channels, kernel_size=1, stride=1, padding=0)
+
+        self.latlayer2_ = Conv_BN_ReLU(in_channels[1], out_channels, kernel_size=1, stride=1, padding=0)
+
+        self.latlayer3_ = Conv_BN_ReLU(in_channels[0], out_channels, kernel_size=1, stride=1, padding=0)
+
+        # Smooth layers
+        self.smooth1_ = Conv_BN_ReLU(out_channels, out_channels, kernel_size=3, stride=1, padding=1)
+
+        self.smooth2_ = Conv_BN_ReLU(out_channels, out_channels, kernel_size=3, stride=1, padding=1)
+
+        self.smooth3_ = Conv_BN_ReLU(out_channels, out_channels, kernel_size=3, stride=1, padding=1)
+
+
+        self.out_channels = out_channels * 4
+        for m in self.sublayers():
+            if isinstance(m, nn.Conv2D):
+                n = m._kernel_size[0] * m._kernel_size[1] * m._out_channels
+                m.weight = paddle.create_parameter(shape=m.weight.shape, dtype='float32',
+                                                   default_initializer=paddle.nn.initializer.Normal(0,
+                                                                                                    math.sqrt(2. / n)))
+            elif isinstance(m, nn.BatchNorm2D):
+                m.weight = paddle.create_parameter(shape=m.weight.shape, dtype='float32',
+                                                   default_initializer=paddle.nn.initializer.Constant(1.0))
+                m.bias = paddle.create_parameter(shape=m.bias.shape, dtype='float32',
+                                                 default_initializer=paddle.nn.initializer.Constant(0.0))
+
+    def _upsample(self, x, scale=1):
+        return F.upsample(x, scale_factor=scale, mode='bilinear')
+
+    def _upsample_add(self, x, y, scale=1):
+        return F.upsample(x, scale_factor=scale, mode='bilinear') + y
+
+    def forward(self, x):
+        f2, f3, f4, f5 = x
+        p5 = self.toplayer_(f5)
+
+        f4 = self.latlayer1_(f4)
+        p4 = self._upsample_add(p5, f4,2)
+        p4 = self.smooth1_(p4)
+
+        f3 = self.latlayer2_(f3)
+        p3 = self._upsample_add(p4, f3,2)
+        p3 = self.smooth2_(p3)
+
+        f2 = self.latlayer3_(f2)
+        p2 = self._upsample_add(p3, f2,2)
+        p2 = self.smooth3_(p2)
+
+        p3 = self._upsample(p3, 2)
+        p4 = self._upsample(p4, 4)
+        p5 = self._upsample(p5, 8)
+
+        fuse = paddle.concat([p2, p3, p4, p5], axis=1)
+        return fuse
\ No newline at end of file
--- a/ppocr/postprocess/__init__.py
+++ b/ppocr/postprocess/__init__.py
@@ -28,12 +28,14 @@ from .rec_postprocess import CTCLabelDecode, AttnLabelDecode, SRNLabelDecode, Di
    TableLabelDecode, SARLabelDecode
 from .cls_postprocess import ClsPostProcess
 from .pg_postprocess import PGPostProcess
+from .pse_postprocess import PSEPostProcess
+

 def build_post_process(config, global_config=None):
    support_dict = [
-        'DBPostProcess', 'EASTPostProcess', 'SASTPostProcess', 'CTCLabelDecode',
-        'AttnLabelDecode', 'ClsPostProcess', 'SRNLabelDecode', 'PGPostProcess',
-        'DistillationCTCLabelDecode', 'TableLabelDecode',
+        'DBPostProcess', 'PSEPostProcess', 'EASTPostProcess', 'SASTPostProcess',
+        'CTCLabelDecode', 'AttnLabelDecode', 'ClsPostProcess', 'SRNLabelDecode',
+        'PGPostProcess', 'DistillationCTCLabelDecode', 'TableLabelDecode',
        'DistillationDBPostProcess', 'NRTRLabelDecode', 'SARLabelDecode'
    ]


--- a/ppocr/postprocess/pse_postprocess/__init__.py
+++ b/ppocr/postprocess/pse_postprocess/__init__.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from .pse_postprocess import PSEPostProcess
\ No newline at end of file
--- a/ppocr/postprocess/pse_postprocess/pse/README.md
+++ b/ppocr/postprocess/pse_postprocess/pse/README.md
+## 编译
+code from https://github.com/whai362/pan_pp.pytorch
+```python
+python3 setup.py build_ext --inplace
+```
--- a/ppocr/postprocess/pse_postprocess/pse/__init__.py
+++ b/ppocr/postprocess/pse_postprocess/pse/__init__.py
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import sys
+import os
+import subprocess
+
+python_path = sys.executable
+
+if subprocess.call('cd ppocr/postprocess/pse_postprocess/pse;{} setup.py build_ext --inplace;cd -'.format(python_path), shell=True) != 0:
+    raise RuntimeError('Cannot compile pse: {}'.format(os.path.dirname(os.path.realpath(__file__))))
+
+from .pse import pse
\ No newline at end of file
--- a/ppocr/postprocess/pse_postprocess/pse/pse.pyx
+++ b/ppocr/postprocess/pse_postprocess/pse/pse.pyx
+
+import numpy as np
+import cv2
+cimport numpy as np
+cimport cython
+cimport libcpp
+cimport libcpp.pair
+cimport libcpp.queue
+from libcpp.pair cimport *
+from libcpp.queue  cimport *
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cdef np.ndarray[np.int32_t, ndim=2] _pse(np.ndarray[np.uint8_t, ndim=3] kernels,
+                                         np.ndarray[np.int32_t, ndim=2] label,
+                                         int kernel_num,
+                                         int label_num,
+                                         float min_area=0):
+    cdef np.ndarray[np.int32_t, ndim=2] pred
+    pred = np.zeros((label.shape[0], label.shape[1]), dtype=np.int32)
+
+    for label_idx in range(1, label_num):
+        if np.sum(label == label_idx) < min_area:
+            label[label == label_idx] = 0
+
+    cdef libcpp.queue.queue[libcpp.pair.pair[np.int16_t,np.int16_t]] que = \
+        queue[libcpp.pair.pair[np.int16_t,np.int16_t]]()
+    cdef libcpp.queue.queue[libcpp.pair.pair[np.int16_t,np.int16_t]] nxt_que = \
+        queue[libcpp.pair.pair[np.int16_t,np.int16_t]]()
+    cdef np.int16_t* dx = [-1, 1, 0, 0]
+    cdef np.int16_t* dy = [0, 0, -1, 1]
+    cdef np.int16_t tmpx, tmpy
+
+    points = np.array(np.where(label > 0)).transpose((1, 0))
+    for point_idx in range(points.shape[0]):
+        tmpx, tmpy = points[point_idx, 0], points[point_idx, 1]
+        que.push(pair[np.int16_t,np.int16_t](tmpx, tmpy))
+        pred[tmpx, tmpy] = label[tmpx, tmpy]
+
+    cdef libcpp.pair.pair[np.int16_t,np.int16_t] cur
+    cdef int cur_label
+    for kernel_idx in range(kernel_num - 1, -1, -1):
+        while not que.empty():
+            cur = que.front()
+            que.pop()
+            cur_label = pred[cur.first, cur.second]
+
+            is_edge = True
+            for j in range(4):
+                tmpx = cur.first + dx[j]
+                tmpy = cur.second + dy[j]
+                if tmpx < 0 or tmpx >= label.shape[0] or tmpy < 0 or tmpy >= label.shape[1]:
+                    continue
+                if kernels[kernel_idx, tmpx, tmpy] == 0 or pred[tmpx, tmpy] > 0:
+                    continue
+
+                que.push(pair[np.int16_t,np.int16_t](tmpx, tmpy))
+                pred[tmpx, tmpy] = cur_label
+                is_edge = False
+            if is_edge:
+                nxt_que.push(cur)
+
+        que, nxt_que = nxt_que, que
+
+    return pred
+
+def pse(kernels, min_area):
+    kernel_num = kernels.shape[0]
+    label_num, label = cv2.connectedComponents(kernels[-1], connectivity=4)
+    return _pse(kernels[:-1], label, kernel_num, label_num, min_area)
\ No newline at end of file
--- a/ppocr/postprocess/pse_postprocess/pse/setup.py
+++ b/ppocr/postprocess/pse_postprocess/pse/setup.py
+from distutils.core import setup, Extension
+from Cython.Build import cythonize
+import numpy
+
+setup(ext_modules=cythonize(Extension(
+    'pse',
+    sources=['pse.pyx'],
+    language='c++',
+    include_dirs=[numpy.get_include()],
+    library_dirs=[],
+    libraries=[],
+    extra_compile_args=['-O3'],
+    extra_link_args=[]
+)))
--- a/ppocr/postprocess/pse_postprocess/pse_postprocess.py
+++ b/ppocr/postprocess/pse_postprocess/pse_postprocess.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+import cv2
+import paddle
+from paddle.nn import functional as F
+
+from ppocr.postprocess.pse_postprocess.pse import pse
+
+
+class PSEPostProcess(object):
+    """
+    The post process for PSE.
+    """
+
+    def __init__(self,
+                 thresh=0.5,
+                 box_thresh=0.85,
+                 min_area=16,
+                 box_type='box',
+                 scale=4,
+                 **kwargs):
+        assert box_type in ['box', 'poly'], 'Only box and poly is supported'
+        self.thresh = thresh
+        self.box_thresh = box_thresh
+        self.min_area = min_area
+        self.box_type = box_type
+        self.scale = scale
+
+    def __call__(self, outs_dict, shape_list):
+        pred = outs_dict['maps']
+        if not isinstance(pred, paddle.Tensor):
+            pred = paddle.to_tensor(pred)
+        pred = F.interpolate(pred, scale_factor=4 // self.scale, mode='bilinear')
+
+        score = F.sigmoid(pred[:, 0, :, :])
+
+        kernels = (pred > self.thresh).astype('float32')
+        text_mask = kernels[:, 0, :, :]
+        kernels[:, 0:, :, :] = kernels[:, 0:, :, :] * text_mask
+
+        score = score.numpy()
+        kernels = kernels.numpy().astype(np.uint8)
+
+        boxes_batch = []
+        for batch_index in range(pred.shape[0]):
+            boxes, scores = self.boxes_from_bitmap(score[batch_index], kernels[batch_index], shape_list[batch_index])
+
+            boxes_batch.append({'points': boxes, 'scores': scores})
+        return boxes_batch
+
+    def boxes_from_bitmap(self, score, kernels, shape):
+        label = pse(kernels, self.min_area)
+        return self.generate_box(score, label, shape)
+
+    def generate_box(self, score, label, shape):
+        src_h, src_w, ratio_h, ratio_w = shape
+        label_num = np.max(label) + 1
+
+        boxes = []
+        scores = []
+        for i in range(1, label_num):
+            ind = label == i
+            points = np.array(np.where(ind)).transpose((1, 0))[:, ::-1]
+
+            if points.shape[0] < self.min_area:
+                label[ind] = 0
+                continue
+
+            score_i = np.mean(score[ind])
+            if score_i < self.box_thresh:
+                label[ind] = 0
+                continue
+
+            if self.box_type == 'box':
+                rect = cv2.minAreaRect(points)
+                bbox = cv2.boxPoints(rect)
+            elif self.box_type == 'poly':
+                box_height = np.max(points[:, 1]) + 10
+                box_width = np.max(points[:, 0]) + 10
+
+                mask = np.zeros((box_height, box_width), np.uint8)
+                mask[points[:, 1], points[:, 0]] = 255
+
+                contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+                bbox = np.squeeze(contours[0], 1)
+            else:
+                raise NotImplementedError
+
+            bbox[:, 0] = np.clip(
+                np.round(bbox[:, 0] / ratio_w), 0, src_w)
+            bbox[:, 1] = np.clip(
+                np.round(bbox[:, 1] / ratio_h), 0, src_h)
+            boxes.append(bbox)
+            scores.append(score_i)
+        return boxes, scores
--- a/ppocr/postprocess/rec_postprocess.py
+++ b/ppocr/postprocess/rec_postprocess.py
@@ -169,15 +169,20 @@ class NRTRLabelDecode(BaseRecLabelDecode):
                                              character_type, use_space_char)

    def __call__(self, preds, label=None, *args, **kwargs):
-        if preds.dtype == paddle.int64:
-            if isinstance(preds, paddle.Tensor):
-                preds = preds.numpy()
-            if preds[0][0] == 2:
-                preds_idx = preds[:, 1:]
-            else:
-                preds_idx = preds

-            text = self.decode(preds_idx)
+        if len(preds) == 2:
+            preds_id = preds[0]
+            preds_prob = preds[1]
+            if isinstance(preds_id, paddle.Tensor):
+                preds_id = preds_id.numpy()
+            if isinstance(preds_prob, paddle.Tensor):
+                preds_prob = preds_prob.numpy()
+            if preds_id[0][0] == 2:
+                preds_idx = preds_id[:, 1:]
+                preds_prob = preds_prob[:, 1:]
+            else:
+                preds_idx = preds_id
+            text = self.decode(preds_idx, preds_prob, is_remove_duplicate=False)
            if label is None:
                return text
            label = self.decode(label[:, 1:])

--- a/ppocr/utils/iou.py
+++ b/ppocr/utils/iou.py
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle
+
+EPS = 1e-6
+
+def iou_single(a, b, mask, n_class):
+    valid = mask == 1
+    a = a.masked_select(valid)
+    b = b.masked_select(valid)
+    miou = []
+    for i in range(n_class):
+        if a.shape == [0] and a.shape==b.shape:
+            inter = paddle.to_tensor(0.0)
+            union = paddle.to_tensor(0.0)
+        else:
+            inter = ((a == i).logical_and(b == i)).astype('float32')
+            union = ((a == i).logical_or(b == i)).astype('float32')
+        miou.append(paddle.sum(inter) / (paddle.sum(union) + EPS))
+    miou = sum(miou) / len(miou)
+    return miou
+
+def iou(a, b, mask, n_class=2, reduce=True):
+    batch_size = a.shape[0]
+
+    a = a.reshape([batch_size, -1])
+    b = b.reshape([batch_size, -1])
+    mask = mask.reshape([batch_size, -1])
+
+    iou = paddle.zeros((batch_size,), dtype='float32')
+    for i in range(batch_size):
+        iou[i] = iou_single(a[i], b[i], mask[i], n_class)
+
+    if reduce:
+        iou = paddle.mean(iou)
+    return iou
\ No newline at end of file
--- a/ppocr/utils/save_load.py
+++ b/ppocr/utils/save_load.py
@@ -108,14 +108,15 @@ def load_dygraph_params(config, model, logger, optimizer):
        for k1, k2 in zip(state_dict.keys(), params.keys()):
            if list(state_dict[k1].shape) == list(params[k2].shape):
                new_state_dict[k1] = params[k2]
-        else:
-            logger.info(
-                f"The shape of model params {k1} {state_dict[k1].shape} not matched with loaded params {k2} {params[k2].shape} !"
-            )
+            else:
+                logger.info(
+                    f"The shape of model params {k1} {state_dict[k1].shape} not matched with loaded params {k2} {params[k2].shape} !"
+                )
        model.set_state_dict(new_state_dict)
        logger.info(f"loaded pretrained_model successful from {pm}")
        return {}

+
 def load_pretrained_params(model, path):
    if path is None:
        return False
@@ -138,6 +139,7 @@ def load_pretrained_params(model, path):
    print(f"load pretrain successful from {path}")
    return model

+
 def save_model(model,
               optimizer,
               model_path,

--- a/requirements.txt
+++ b/requirements.txt
@@ -8,6 +8,7 @@ numpy
 visualdl
 python-Levenshtein
 opencv-contrib-python==4.4.0.46
+cython
 lxml
 premailer
 openpyxl
\ No newline at end of file
--- a/tests/compare_results.py
+++ b/tests/compare_results.py
@@ -32,7 +32,6 @@ def run_shell_command(cmd):
    else:
        return None

-
 def parser_results_from_log_by_name(log_path, names_list):
    if not os.path.exists(log_path):
        raise ValueError("The log file {} does not exists!".format(log_path))
@@ -46,11 +45,13 @@ def parser_results_from_log_by_name(log_path, names_list):
        outs = run_shell_command(cmd)
        outs = outs.split("\n")[0]
        result = outs.split("{}".format(name))[-1]
-        result = json.loads(result)
+        try:
+            result = json.loads(result)
+        except:
+            result = np.array([int(r) for r in result.split()]).reshape(-1, 4)
        parser_results[name] = result
    return parser_results

-
 def load_gt_from_file(gt_file):
    if not os.path.exists(gt_file):
        raise ValueError("The log file {} does not exists!".format(gt_file))
@@ -60,7 +61,11 @@ def load_gt_from_file(gt_file):
    parser_gt = {}
    for line in data:
        image_name, result = line.strip("\n").split("\t")
-        result = json.loads(result)
+        image_name = image_name.split('/')[-1]
+        try:
+            result = json.loads(result)
+        except:
+            result = np.array([int(r) for r in result.split()]).reshape(-1, 4)
        parser_gt[image_name] = result
    return parser_gt


--- a/tests/configs/det_mv3_db.yml
+++ b/tests/configs/det_mv3_db.yml
@@ -23,10 +23,10 @@ Architecture:
    name: MobileNetV3
    scale: 0.5
    model_name: large
-    disable_se: True
+    disable_se: False
  Neck:
    name: DBFPN
-    out_channels: 96
+    out_channels: 256
  Head:
    name: DBHead
    k: 50
@@ -74,7 +74,7 @@ Train:
          channel_first: False
      - DetLabelEncode: # Class handling label
      - Resize:
-          # size: [640, 640]
+          size: [640, 640]
      - MakeBorderMap:
          shrink_ratio: 0.4
          thresh_min: 0.3

--- a/tests/configs/rec_icdar15_r34_train.yml
+++ b/tests/configs/rec_icdar15_r34_train.yml
+Global:
+  use_gpu: true
+  epoch_num: 72
+  log_smooth_window: 20
+  print_batch_step: 10
+  save_model_dir: ./output/rec/ic15/
+  save_epoch_step: 3
+  # evaluation is run every 2000 iterations
+  eval_batch_step: [0, 2000]
+  cal_metric_during_train: True
+  pretrained_model:
+  checkpoints:
+  save_inference_dir: ./
+  use_visualdl: False
+  infer_img: doc/imgs_words_en/word_10.png
+  # for data or label process
+  character_dict_path: ppocr/utils/en_dict.txt
+  character_type: EN
+  max_text_length: 25
+  infer_mode: False
+  use_space_char: False
+  save_res_path: ./output/rec/predicts_ic15.txt
+
+Optimizer:
+  name: Adam
+  beta1: 0.9
+  beta2: 0.999
+  lr:
+    learning_rate: 0.0005
+  regularizer:
+    name: 'L2'
+    factor: 0
+
+Architecture:
+  model_type: rec
+  algorithm: CRNN
+  Transform:
+  Backbone:
+    name: ResNet
+    layers: 34
+  Neck:
+    name: SequenceEncoder
+    encoder_type: rnn
+    hidden_size: 256
+  Head:
+    name: CTCHead
+    fc_decay: 0
+
+Loss:
+  name: CTCLoss
+
+PostProcess:
+  name: CTCLabelDecode
+
+Metric:
+  name: RecMetric
+  main_indicator: acc
+
+Train:
+  dataset:
+    name: SimpleDataSet
+    data_dir: ./train_data/ic15_data/
+    label_file_list: ["./train_data/ic15_data/rec_gt_train.txt"]
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - CTCLabelEncode: # Class handling label
+      - RecResizeImg:
+          image_shape: [3, 32, 100]
+      - KeepKeys:
+          keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
+  loader:
+    shuffle: True
+    batch_size_per_card: 256
+    drop_last: True
+    num_workers: 8
+    use_shared_memory: False
+
+Eval:
+  dataset:
+    name: SimpleDataSet
+    data_dir: ./train_data/ic15_data
+    label_file_list: ["./train_data/ic15_data/rec_gt_test.txt"]
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - CTCLabelEncode: # Class handling label
+      - RecResizeImg:
+          image_shape: [3, 32, 100]
+      - KeepKeys:
+          keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
+  loader:
+    shuffle: False
+    drop_last: False
+    batch_size_per_card: 256
+    num_workers: 4
+    use_shared_memory: False
--- a/tests/ocr_det_params.txt
+++ b/tests/ocr_det_params.txt
@@ -12,7 +12,7 @@ train_model_name:latest
 train_infer_img_dir:./train_data/icdar2015/text_localization/ch4_test_images/
 null:null
 ##
-trainer:norm_train|pact_train
+trainer:norm_train|pact_train|fpgm_train
 norm_train:tools/train.py -c tests/configs/det_mv3_db.yml -o Global.pretrained_model=./pretrain_models/MobileNetV3_large_x0_5_pretrained
 pact_train:deploy/slim/quantization/quant.py -c tests/configs/det_mv3_db.yml -o
 fpgm_train:deploy/slim/prune/sensitivity_anal.py -c tests/configs/det_mv3_db.yml -o Global.pretrained_model=./pretrain_models/det_mv3_db_v2.0_train/best_accuracy
@@ -21,7 +21,7 @@ null:null
 null:null
 ##
 ===========================eval_params=========================== 
-eval:tools/eval.py -c tests/configs/det_mv3_db.yml -o
+eval:null
 null:null
 ##
 ===========================infer_params===========================
@@ -35,7 +35,7 @@ export1:null
 export2:null
 ##
 train_model:./inference/ch_ppocr_mobile_v2.0_det_train/best_accuracy
-infer_export:tools/export_model.py -c configs/det/det_mv3_db.yml -o
+infer_export:tools/export_model.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o
 infer_quant:False
 inference:tools/infer/predict_det.py
 --use_gpu:True|False

--- a/tests/ocr_kl_quant_params.txt
+++ b/tests/ocr_kl_quant_params.txt
+===========================train_params===========================
+model_name:ocr_system
+python:python3.7
+gpu_list:null
+Global.use_gpu:null
+Global.auto_cast:null
+Global.epoch_num:null
+Global.save_model_dir:./output/
+Train.loader.batch_size_per_card:null
+Global.pretrained_model:null
+train_model_name:null
+train_infer_img_dir:null
+null:null
+##
+trainer:
+norm_train:null
+pact_train:null
+fpgm_train:null
+distill_train:null
+null:null
+null:null
+##
+===========================eval_params=========================== 
+eval:null
+null:null
+##
+===========================infer_params===========================
+Global.save_inference_dir:./output/
+Global.pretrained_model:
+norm_export:null 
+quant_export:null 
+fpgm_export:null 
+distill_export:null
+export1:null
+export2:null
+##
+infer_model:./inference/ch_ppocr_mobile_v2.0_det_infer/
+kl_quant:deploy/slim/quantization/quant_kl.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml  -o  
+infer_quant:True
+inference:tools/infer/predict_det.py
+--use_gpu:TrueFalse
+--enable_mkldnn:True|False
+--cpu_threads:1|6
+--rec_batch_num:1
+--use_tensorrt:False|True
+--precision:fp32|fp16|int8
+--det_model_dir:
+--image_dir:./inference/ch_det_data_50/all-sum-510/
+--save_log_path:null
+--benchmark:True
+null:null
--- a/tests/ocr_ppocr_mobile_params.txt
+++ b/tests/ocr_ppocr_mobile_params.txt
 ===========================train_params===========================
-model_name:ocr_system
+model_name:ocr_system_mobile
 python:python3.7
 gpu_list:null
 Global.use_gpu:null

--- a/tests/ocr_ppocr_server_params.txt
+++ b/tests/ocr_ppocr_server_params.txt
+===========================train_params===========================
+model_name:ocr_system_server
+python:python3.7
+gpu_list:null
+Global.use_gpu:null
+Global.auto_cast:null
+Global.epoch_num:null
+Global.save_model_dir:./output/
+Train.loader.batch_size_per_card:null
+Global.pretrained_model:null
+train_model_name:null
+train_infer_img_dir:null
+null:null
+##
+trainer:
+norm_train:null
+pact_train:null
+fpgm_train:null
+distill_train:null
+null:null
+null:null
+##
+===========================eval_params===========================
+eval:null
+null:null
+##
+===========================infer_params===========================
+Global.save_inference_dir:./output/
+Global.pretrained_model:
+norm_export:null
+quant_export:null
+fpgm_export:null
+distill_export:null
+export1:null
+export2:null
+##
+infer_model:./inference/ch_ppocr_server_v2.0_det_infer/
+infer_export:null
+infer_quant:False
+inference:tools/infer/predict_system.py
+--use_gpu:True
+--enable_mkldnn:True|False
+--cpu_threads:1|6
+--rec_batch_num:1
+--use_tensorrt:False|True
+--precision:fp32|fp16|int8
+--det_model_dir:
+--image_dir:./inference/ch_det_data_50/all-sum-510/
+--save_log_path:null
+--benchmark:True
+--rec_model_dir:./inference/ch_ppocr_server_v2.0_rec_infer/
+===========================cpp_infer_params===========================
+use_opencv:True
+infer_model:./inference/ch_ppocr_server_v2.0_det_infer/
+infer_quant:False
+inference:./deploy/cpp_infer/build/ppocr system
+--use_gpu:True|False
+--enable_mkldnn:True|False
+--cpu_threads:1|6
+--rec_batch_num:1
+--use_tensorrt:False|True
+--precision:fp32|fp16
+--det_model_dir:
+--image_dir:./inference/ch_det_data_50/all-sum-510/
+--rec_model_dir:./inference/ch_ppocr_server_v2.0_rec_infer/
+--benchmark:True
\ No newline at end of file
--- a/tests/ocr_rec_params.txt
+++ b/tests/ocr_rec_params.txt
@@ -63,4 +63,19 @@ inference:./deploy/cpp_infer/build/ppocr rec
 --rec_model_dir:
 --image_dir:./inference/rec_inference/
 null:null
--benchmark:True
\ No newline at end of file
+--benchmark:True
+===========================serving_params===========================
+trans_model:-m paddle_serving_client.convert
+--dirname:./inference/ch_ppocr_mobile_v2.0_rec_infer/
+--model_filename:inference.pdmodel
+--params_filename:inference.pdiparams
+--serving_server:./deploy/pdserving/ppocr_rec_mobile_2.0_serving/
+--serving_client:./deploy/pdserving/ppocr_rec_mobile_2.0_client/
+serving_dir:./deploy/pdserving
+web_service:web_service_rec.py --config=config.yml --opt op.rec.concurrency=1
+op.rec.local_service_conf.devices:null|0
+op.rec.local_service_conf.use_mkldnn:True|False
+op.rec.local_service_conf.thread_num:1|6
+op.rec.local_service_conf.use_trt:False|True
+op.rec.local_service_conf.precision:fp32|fp16|int8
+pipline:pipeline_http_client.py --image_dir=../../doc/imgs_words_en
\ No newline at end of file
--- a/tests/ocr_rec_server_params.txt
+++ b/tests/ocr_rec_server_params.txt
+===========================train_params===========================
+model_name:ocr_server_rec
+python:python3.7
+gpu_list:0|0,1
+Global.use_gpu:True|True
+Global.auto_cast:null
+Global.epoch_num:lite_train_infer=2|whole_train_infer=300
+Global.save_model_dir:./output/
+Train.loader.batch_size_per_card:lite_train_infer=128|whole_train_infer=128
+Global.pretrained_model:null
+train_model_name:latest
+train_infer_img_dir:./inference/rec_inference
+null:null
+##
+trainer:norm_train|pact_train
+norm_train:tools/train.py -c tests/configs/rec_icdar15_r34_train.yml -o
+pact_train:deploy/slim/quantization/quant.py -c tests/configs/rec_icdar15_r34_train.yml -o
+fpgm_train:null
+distill_train:null
+null:null
+null:null
+##
+===========================eval_params===========================
+eval:tools/eval.py -c tests/configs/rec_icdar15_r34_train.yml -o
+null:null
+##
+===========================infer_params===========================
+Global.save_inference_dir:./output/
+Global.pretrained_model:
+norm_export:tools/export_model.py -c tests/configs/rec_icdar15_r34_train.yml -o
+quant_export:deploy/slim/quantization/export_model.py -c tests/configs/rec_icdar15_r34_train.yml -o
+fpgm_export:null
+distill_export:null
+export1:null
+export2:null
+##
+infer_model:./inference/ch_ppocr_server_v2.0_rec_infer/
+infer_export:null
+infer_quant:False
+inference:tools/infer/predict_rec.py
+--use_gpu:True|False
+--enable_mkldnn:True|False
+--cpu_threads:1|6
+--rec_batch_num:1|6
+--use_tensorrt:True|False
+--precision:fp32|fp16|int8
+--rec_model_dir:
+--image_dir:./inference/rec_inference
+--save_log_path:./test/output/
+--benchmark:True
+null:null
+===========================cpp_infer_params===========================
+use_opencv:True
+infer_model:./inference/ch_ppocr_server_v2.0_rec_infer/
+infer_quant:False
+inference:./deploy/cpp_infer/build/ppocr rec
+--use_gpu:True|False
+--enable_mkldnn:True|False
+--cpu_threads:1|6
+--rec_batch_num:1
+--use_tensorrt:False|True
+--precision:fp32|fp16
+--rec_model_dir:
+--image_dir:./inference/rec_inference/
+null:null
+--benchmark:True
+===========================serving_params===========================
+trans_model:-m paddle_serving_client.convert
+--dirname:./inference/ch_ppocr_server_v2.0_rec_infer/
+--model_filename:inference.pdmodel
+--params_filename:inference.pdiparams
+--serving_server:./deploy/pdserving/ppocr_rec_server_2.0_serving/
+--serving_client:./deploy/pdserving/ppocr_rec_server_2.0_client/
+serving_dir:./deploy/pdserving
+web_service:web_service_rec.py --config=config.yml --opt op.rec.concurrency=1
+op.rec.local_service_conf.devices:null|0
+op.rec.local_service_conf.use_mkldnn:True|False
+op.rec.local_service_conf.thread_num:1|6
+op.rec.local_service_conf.use_trt:False|True
+op.rec.local_service_conf.precision:fp32|fp16|int8
+pipline:pipeline_http_client.py --image_dir=../../doc/imgs_words_en
\ No newline at end of file
--- a/tests/prepare.sh
+++ b/tests/prepare.sh
@@ -75,17 +75,28 @@ elif [ ${MODE} = "infer" ];then
        wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_infer.tar
        wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/ch_det_data_50.tar
        cd ./inference && tar xf ch_ppocr_server_v2.0_det_infer.tar && tar xf ch_det_data_50.tar && cd ../
-    elif  [ ${model_name} = "ocr_system" ]; then
+    elif  [ ${model_name} = "ocr_system_mobile" ]; then
        wget -nc  -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar
        wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/ch_det_data_50.tar
        wget -nc  -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar
        cd ./inference && tar xf ch_ppocr_mobile_v2.0_det_infer.tar && tar xf ch_ppocr_mobile_v2.0_rec_infer.tar && tar xf ch_det_data_50.tar && cd ../
-    else 
+    elif  [ ${model_name} = "ocr_system_server" ]; then
+        wget -nc  -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_infer.tar
+        wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/ch_det_data_50.tar
+        wget -nc  -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_infer.tar
+        cd ./inference && tar xf ch_ppocr_server_v2.0_det_infer.tar && tar xf ch_ppocr_server_v2.0_rec_infer.tar && tar xf ch_det_data_50.tar && cd ../
+    elif [ ${model_name} = "ocr_rec" ]; then
        rm -rf ./train_data/ic15_data
        eval_model_name="ch_ppocr_mobile_v2.0_rec_infer"
        wget -nc -P ./inference/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/rec_inference.tar
        wget -nc  -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar
        cd ./inference && tar xf ${eval_model_name}.tar && tar xf rec_inference.tar && cd ../
+    elif [ ${model_name} = "ocr_server_rec" ]; then
+        rm -rf ./train_data/ic15_data
+        eval_model_name="ch_ppocr_server_v2.0_rec_infer"
+        wget -nc -P ./inference/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/rec_inference.tar
+        wget -nc  -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_infer.tar
+        cd ./inference && tar xf ${eval_model_name}.tar && tar xf rec_inference.tar && cd ../
    fi 
 elif [ ${MODE} = "cpp_infer" ];then
    if [ ${model_name} = "ocr_det" ]; then
@@ -107,12 +118,15 @@ fi
 if [ ${MODE} = "serving_infer" ];then
    # prepare serving env
    python_name=$(func_parser_value "${lines[2]}")
-    ${python_name} -m pip install install paddle-serving-server-gpu==0.6.1.post101
+    wget https://paddle-serving.bj.bcebos.com/chain/paddle_serving_server_gpu-0.0.0.post101-py3-none-any.whl
+    ${python_name} -m pip install install paddle_serving_server_gpu-0.0.0.post101-py3-none-any.whl
    ${python_name} -m pip install paddle_serving_client==0.6.1
-    ${python_name} -m pip install paddle-serving-app==0.6.1
+    ${python_name} -m pip install paddle-serving-app==0.6.3
    wget -nc  -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar
    wget -nc  -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar
-    cd ./inference && tar xf ch_ppocr_mobile_v2.0_det_infer.tar && tar xf ch_ppocr_mobile_v2.0_rec_infer.tar && cd ../
+    wget -nc  -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_infer.tar
+    wget -nc  -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_infer.tar
+    cd ./inference && tar xf ch_ppocr_mobile_v2.0_det_infer.tar && tar xf ch_ppocr_mobile_v2.0_rec_infer.tar && tar xf ch_ppocr_server_v2.0_rec_infer.tar && tar xf ch_ppocr_server_v2.0_det_infer.tar cd ../
 fi

 if [ ${MODE} = "cpp_infer" ];then

--- a/tests/readme.md
+++ b/tests/readme.md

-# 介绍
+# 从训练到推理部署工具链测试方法介绍

 test.sh和params.txt文件配合使用，完成OCR轻量检测和识别模型从训练到预测的流程测试。

@@ -36,7 +36,7 @@ test.sh包含四种运行模式，每种模式的运行数据不同，分别用

 - 模式1：lite_train_infer，使用少量数据训练，用于快速验证训练到预测的走通流程，不验证精度和速度；
 ```shell
-bash test/prepare.sh ./tests/ocr_det_params.txt 'lite_train_infer'
+bash tests/prepare.sh ./tests/ocr_det_params.txt 'lite_train_infer'
 bash tests/test.sh ./tests/ocr_det_params.txt 'lite_train_infer'
 ```  

@@ -66,3 +66,7 @@ bash tests/test.sh ./tests/ocr_det_params.txt 'whole_train_infer'
 bash tests/prepare.sh ./tests/ocr_det_params.txt 'cpp_infer'
 bash tests/test.sh ./tests/ocr_det_params.txt 'cpp_infer'
 ```  
+
+# 日志输出
+最终在```tests/output```目录下生成.log后缀的日志文件
+
--- a/tests/results/det_results_gpu_trt_fp16_cpp.txt
+++ b/tests/results/det_results_gpu_trt_fp16_cpp.txt
--- a/tests/results/det_results_gpu_trt_fp32_cpp.txt
+++ b/tests/results/det_results_gpu_trt_fp32_cpp.txt
--- a/tests/test.sh
+++ b/tests/test.sh
@@ -321,7 +321,7 @@ function func_serving(){
                    if [[ ${precision} =~ "fp16" || ${precision} =~ "int8" ]] && [ ${use_trt} = "False" ]; then
                        continue
                    fi
-                    if [[ ${use_trt} = "Falg_quantse" || ${precision} =~ "int8" ]]; then
+                    if [[ ${use_trt} = "False" || ${precision} =~ "int8" ]] && [[ ${_flag_quant} = "True" ]]; then
                        continue
                    fi
                    _save_log_path="${_log_path}/infer_gpu_usetrt_${use_trt}_precision_${precision}_batchsize_1.log"
@@ -433,7 +433,9 @@ if [ ${MODE} = "infer" ]; then
            save_infer_dir=$(dirname $infer_model)
            set_export_weight=$(func_set_params "${export_weight}" "${infer_model}")
            set_save_infer_key=$(func_set_params "${save_infer_key}" "${save_infer_dir}")
-            export_cmd="${python} ${norm_export} ${set_export_weight} ${set_save_infer_key}"
+            export_cmd="${python} ${infer_run_exports[Count]} ${set_export_weight} ${set_save_infer_key}"
+            echo ${infer_run_exports[Count]} 
+            echo  $export_cmd
            eval $export_cmd
            status_export=$?
            status_check $status_export "${export_cmd}" "${status_log}"

--- a/tools/export_model.py
+++ b/tools/export_model.py
@@ -60,6 +60,8 @@ def export_single_model(model, arch_config, save_path, logger):
                    "When there is tps in the network, variable length input is not supported, and the input size needs to be the same as during training"
                )
                infer_shape[-1] = 100
+            if arch_config["algorithm"] == "NRTR":
+                infer_shape = [1, 32, 100]
        elif arch_config["model_type"] == "table":
            infer_shape = [3, 488, 488]
        model = to_static(

--- a/tools/infer/predict_det.py
+++ b/tools/infer/predict_det.py
@@ -89,6 +89,14 @@ class TextDetector(object):
                postprocess_params["sample_pts_num"] = 2
                postprocess_params["expand_scale"] = 1.0
                postprocess_params["shrink_ratio_of_width"] = 0.3
+        elif self.det_algorithm == "PSE":
+            postprocess_params['name'] = 'PSEPostProcess'
+            postprocess_params["thresh"] = args.det_pse_thresh
+            postprocess_params["box_thresh"] = args.det_pse_box_thresh
+            postprocess_params["min_area"] = args.det_pse_min_area
+            postprocess_params["box_type"] = args.det_pse_box_type
+            postprocess_params["scale"] = args.det_pse_scale
+            self.det_pse_box_type = args.det_pse_box_type
        else:
            logger.info("unknown det_algorithm:{}".format(self.det_algorithm))
            sys.exit(0)
@@ -209,7 +217,7 @@ class TextDetector(object):
            preds['f_score'] = outputs[1]
            preds['f_tco'] = outputs[2]
            preds['f_tvo'] = outputs[3]
-        elif self.det_algorithm == 'DB':
+        elif self.det_algorithm in ['DB', 'PSE']:
            preds['maps'] = outputs[0]
        else:
            raise NotImplementedError
@@ -217,7 +225,9 @@ class TextDetector(object):
        #self.predictor.try_shrink_memory()
        post_result = self.postprocess_op(preds, shape_list)
        dt_boxes = post_result[0]['points']
-        if self.det_algorithm == "SAST" and self.det_sast_polygon:
+        if (self.det_algorithm == "SAST" and
+                self.det_sast_polygon) or (self.det_algorithm == "PSE" and
+                                           self.det_pse_box_type == 'poly'):
            dt_boxes = self.filter_tag_det_res_only_clip(dt_boxes, ori_im.shape)
        else:
            dt_boxes = self.filter_tag_det_res(dt_boxes, ori_im.shape)

--- a/tools/infer/predict_rec.py
+++ b/tools/infer/predict_rec.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 import os
 import sys
-
+from PIL import Image
 __dir__ = os.path.dirname(os.path.abspath(__file__))
 sys.path.append(__dir__)
 sys.path.append(os.path.abspath(os.path.join(__dir__, '../..')))
@@ -61,6 +61,13 @@ class TextRecognizer(object):
                "character_dict_path": args.rec_char_dict_path,
                "use_space_char": args.use_space_char
            }
+        elif self.rec_algorithm == 'NRTR':
+            postprocess_params = {
+                'name': 'NRTRLabelDecode',
+                "character_type": args.rec_char_type,
+                "character_dict_path": args.rec_char_dict_path,
+                "use_space_char": args.use_space_char
+            }
        self.postprocess_op = build_post_process(postprocess_params)
        self.predictor, self.input_tensor, self.output_tensors, self.config = \
            utility.create_predictor(args, 'rec', logger)
@@ -87,6 +94,16 @@ class TextRecognizer(object):

    def resize_norm_img(self, img, max_wh_ratio):
        imgC, imgH, imgW = self.rec_image_shape
+        if self.rec_algorithm == 'NRTR':
+            img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+            # return padding_im
+            image_pil = Image.fromarray(np.uint8(img))
+            img = image_pil.resize([100, 32], Image.ANTIALIAS)
+            img = np.array(img)
+            norm_img = np.expand_dims(img, -1)
+            norm_img = norm_img.transpose((2, 0, 1))
+            return norm_img.astype(np.float32) / 128. - 1.
+
        assert imgC == img.shape[2]
        max_wh_ratio = max(max_wh_ratio, imgW / imgH)
        imgW = int((32 * max_wh_ratio))
@@ -252,14 +269,16 @@ class TextRecognizer(object):
            else:
                self.input_tensor.copy_from_cpu(norm_img_batch)
                self.predictor.run()
-
                outputs = []
                for output_tensor in self.output_tensors:
                    output = output_tensor.copy_to_cpu()
                    outputs.append(output)
                if self.benchmark:
                    self.autolog.times.stamp()
-                preds = outputs[0]
+                if len(outputs) != 1:
+                    preds = outputs
+                else:
+                    preds = outputs[0]
            rec_result = self.postprocess_op(preds)
            for rno in range(len(rec_result)):
                rec_res[indices[beg_img_no + rno]] = rec_result[rno]

--- a/tools/infer/utility.py
+++ b/tools/infer/utility.py
@@ -63,6 +63,13 @@ def init_args():
    parser.add_argument("--det_sast_nms_thresh", type=float, default=0.2)
    parser.add_argument("--det_sast_polygon", type=str2bool, default=False)

+    # PSE parmas
+    parser.add_argument("--det_pse_thresh", type=float, default=0)
+    parser.add_argument("--det_pse_box_thresh", type=float, default=0.85)
+    parser.add_argument("--det_pse_min_area", type=float, default=16)
+    parser.add_argument("--det_pse_box_type", type=str, default='box')
+    parser.add_argument("--det_pse_scale", type=int, default=1)
+
    # params for text recognizer
    parser.add_argument("--rec_algorithm", type=str, default='CRNN')
    parser.add_argument("--rec_model_dir", type=str)

--- a/tools/program.py
+++ b/tools/program.py
@@ -402,7 +402,7 @@ def preprocess(is_train=False):
    alg = config['Architecture']['algorithm']
    assert alg in [
        'EAST', 'DB', 'SAST', 'Rosetta', 'CRNN', 'STARNet', 'RARE', 'SRN',
-        'CLS', 'PGNet', 'Distillation', 'NRTR', 'TableAttn', 'SAR'
+        'CLS', 'PGNet', 'Distillation', 'NRTR', 'TableAttn', 'SAR', 'PSE'
    ]

    device = 'gpu:{}'.format(dist.ParallelEnv().dev_id) if use_gpu else 'cpu'