dygraph first commit

aad3093a · WenmuZhou · 10f7e519 · 10f7e519 · 10f7e519 · aad3093a
147 changed file
--- a/configs/det/det_db_icdar15_reader.yml
+++ b/configs/det/det_db_icdar15_reader.yml
-TrainReader:
-  reader_function: ppocr.data.det.dataset_traversal,TrainReader
-  process_function: ppocr.data.det.db_process,DBProcessTrain
-  num_workers: 8
-  img_set_dir: ./train_data/icdar2015/text_localization/
-  label_file_path: ./train_data/icdar2015/text_localization/train_icdar2015_label.txt
-EvalReader:
-  reader_function: ppocr.data.det.dataset_traversal,EvalTestReader
-  process_function: ppocr.data.det.db_process,DBProcessTest
-  img_set_dir: ./train_data/icdar2015/text_localization/
-  label_file_path: ./train_data/icdar2015/text_localization/test_icdar2015_label.txt
-  test_image_shape: [736, 1280]
-TestReader:
-  reader_function: ppocr.data.det.dataset_traversal,EvalTestReader
-  process_function: ppocr.data.det.db_process,DBProcessTest
-  infer_img:
-  img_set_dir: ./train_data/icdar2015/text_localization/
-  label_file_path: ./train_data/icdar2015/text_localization/test_icdar2015_label.txt
-  test_image_shape: [736, 1280]
-  do_eval: True
--- a/configs/det/det_east_icdar15_reader.yml
+++ b/configs/det/det_east_icdar15_reader.yml
-TrainReader:
-  reader_function: ppocr.data.det.dataset_traversal,TrainReader
-  process_function: ppocr.data.det.east_process,EASTProcessTrain
-  num_workers: 8
-  img_set_dir: ./train_data/icdar2015/text_localization/
-  label_file_path: ./train_data/icdar2015/text_localization/train_icdar2015_label.txt
-  background_ratio: 0.125
-  min_crop_side_ratio: 0.1
-  min_text_size: 10
-EvalReader:
-  reader_function: ppocr.data.det.dataset_traversal,EvalTestReader
-  process_function: ppocr.data.det.east_process,EASTProcessTest
-  img_set_dir: ./train_data/icdar2015/text_localization/
-  label_file_path: ./train_data/icdar2015/text_localization/test_icdar2015_label.txt
-TestReader:
-  reader_function: ppocr.data.det.dataset_traversal,EvalTestReader
-  process_function: ppocr.data.det.east_process,EASTProcessTest
-  infer_img:
-  img_set_dir: ./train_data/icdar2015/text_localization/
-  label_file_path: ./train_data/icdar2015/text_localization/test_icdar2015_label.txt
-  do_eval: True
--- a/configs/det/det_mv3_db.yml
+++ b/configs/det/det_mv3_db.yml
 Global:
-  algorithm: DB
  use_gpu: true
  epoch_num: 1200
  log_smooth_window: 20
  print_batch_step: 2
-  save_model_dir: ./output/det_db/
+  save_model_dir: ./output/20201010/
-  save_epoch_step: 200
+  save_epoch_step: 1200
  # evaluation is run every 5000 iterations after the 4000th iteration
-  eval_batch_step: [4000, 5000]
+  eval_batch_step: 8
-  train_batch_size_per_card: 16
+  # if pretrained_model is saved in static mode, load_static_weights must set to True
-  test_batch_size_per_card: 16
+  load_static_weights: True
-  image_shape: [3, 640, 640]
+  cal_metric_during_train: False
-  reader_yml: ./configs/det/det_db_icdar15_reader.yml
+  pretrained_model: /home/zhoujun20/pretrain_models/MobileNetV3_large_x0_5_pretrained
-  pretrain_weights: ./pretrain_models/MobileNetV3_large_x0_5_pretrained/
+  checkpoints: #./output/det_db_0.001_DiceLoss_256_pp_config_2.0b_4gpu/best_accuracy
-  checkpoints:
-  save_res_path: ./output/det_db/predicts_db.txt
  save_inference_dir:
+  use_visualdl: True
-Architecture:
+  infer_img: doc/imgs_en/img_10.jpg
-  function: ppocr.modeling.architectures.det_model,DetModel
+  save_res_path: ./output/det_db/predicts_db.txt
-Backbone:
+Optimizer:
-  function: ppocr.modeling.backbones.det_mobilenet_v3,MobileNetV3
+  name: Adam
-  scale: 0.5
+  beta1: 0.9
-  model_name: large
+  beta2: 0.999
+  learning_rate:
+#    name: Cosine
+    lr: 0.001
+#    warmup_epoch: 0
+  regularizer:
+    name: 'L2'
+    factor: 0
-Head:
+Architecture:
-  function: ppocr.modeling.heads.det_db_head,DBHead
+  type: det
-  model_name: large
+  algorithm: DB
-  k: 50
+  Transform:
-  inner_channels: 96
+  Backbone:
-  out_channels: 2
+    name: MobileNetV3
+    scale: 0.5
+    model_name: large
+  Neck:
+    name: FPN
+    out_channels: 256
+  Head:
+    name: DBHead
+    k: 50
 Loss:
-  function: ppocr.modeling.losses.det_db_loss,DBLoss
+  name: DBLoss
  balance_loss: true
  main_loss_type: DiceLoss
  alpha: 5
  beta: 10
  ohem_ratio: 3
-Optimizer:
-  function: ppocr.optimizer,AdamDecay
-  base_lr: 0.001
-  beta1: 0.9
-  beta2: 0.999
 PostProcess:
-  function: ppocr.postprocess.db_postprocess,DBPostProcess
+  name: DBPostProcess
  thresh: 0.3
-  box_thresh: 0.7
+  box_thresh: 0.6
  max_candidates: 1000
-  unclip_ratio: 2.0
+  unclip_ratio: 1.5
+Metric:
+  name: DetMetric
+  main_indicator: hmean
+TRAIN:
+  dataset:
+    name: SimpleDataSet
+    data_dir: /home/zhoujun20/detection/
+    file_list:
+      - /home/zhoujun20/detection/train_icdar2015_label.txt # dataset1
+    ratio_list: [1.0]
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - DetLabelEncode: # Class handling label
+      - IaaAugment:
+          augmenter_args:
+            - { 'type': Fliplr, 'args': { 'p': 0.5 } }
+            - { 'type': Affine, 'args': { 'rotate': [ -10,10 ] } }
+            - { 'type': Resize,'args': { 'size': [ 0.5,3 ] } }
+      - EastRandomCropData:
+          size: [ 640,640 ]
+          max_tries: 50
+          keep_ratio: true
+      - MakeBorderMap:
+          shrink_ratio: 0.4
+          thresh_min: 0.3
+          thresh_max: 0.7
+      - MakeShrinkMap:
+          shrink_ratio: 0.4
+          min_text_size: 8
+      - NormalizeImage:
+          scale: 1./255.
+          mean: [ 0.485, 0.456, 0.406 ]
+          std: [ 0.229, 0.224, 0.225 ]
+          order: 'hwc'
+      - ToCHWImage:
+      - keepKeys:
+          keep_keys: ['image','threshold_map','threshold_mask','shrink_map','shrink_mask'] # dataloader将按照此顺序返回list
+  loader:
+    shuffle: True
+    drop_last: False
+    batch_size: 16
+    num_workers: 6
+EVAL:
+  dataset:
+    name: SimpleDataSet
+    data_dir: /home/zhoujun20/detection/
+    file_list:
+      - /home/zhoujun20/detection/test_icdar2015_label.txt
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - DetLabelEncode: # Class handling label
+      - DetResizeForTest:
+          image_shape: [736,1280]
+      - NormalizeImage:
+          scale: 1./255.
+          mean: [ 0.485, 0.456, 0.406 ]
+          std: [ 0.229, 0.224, 0.225 ]
+          order: 'hwc'
+      - ToCHWImage:
+      - keepKeys:
+          keep_keys: ['image','shape','polys','ignore_tags']
+  loader:
+    shuffle: False
+    drop_last: False
+    batch_size: 1 # must be 1
+    num_workers: 6
\ No newline at end of file
--- a/configs/det/det_mv3_east.yml
+++ b/configs/det/det_mv3_east.yml
-Global:
-  algorithm: EAST
-  use_gpu: true
-  epoch_num: 100000
-  log_smooth_window: 20
-  print_batch_step: 5
-  save_model_dir: ./output/det_east/
-  save_epoch_step: 200
-  eval_batch_step: [5000, 5000]
-  train_batch_size_per_card: 16
-  test_batch_size_per_card: 16
-  image_shape: [3, 512, 512]
-  reader_yml: ./configs/det/det_east_icdar15_reader.yml
-  pretrain_weights: ./pretrain_models/MobileNetV3_large_x0_5_pretrained/
-  checkpoints:
-  save_res_path: ./output/det_east/predicts_east.txt
-  save_inference_dir:
-Architecture:
-  function: ppocr.modeling.architectures.det_model,DetModel
-Backbone:
-  function: ppocr.modeling.backbones.det_mobilenet_v3,MobileNetV3
-  scale: 0.5
-  model_name: large
-Head:
-  function: ppocr.modeling.heads.det_east_head,EASTHead
-  model_name: small
-Loss:
-  function: ppocr.modeling.losses.det_east_loss,EASTLoss
-Optimizer:
-  function: ppocr.optimizer,AdamDecay
-  base_lr: 0.001
-  beta1: 0.9
-  beta2: 0.999
-PostProcess:
-  function: ppocr.postprocess.east_postprocess,EASTPostPocess
-  score_thresh: 0.8
-  cover_thresh: 0.1
-  nms_thresh: 0.2
--- a/configs/det/det_r50_vd_db.yml
+++ b/configs/det/det_r50_vd_db.yml
 Global:
-  algorithm: DB
  use_gpu: true
  epoch_num: 1200
  log_smooth_window: 20
  print_batch_step: 2
-  save_model_dir: ./output/det_db/
+  save_model_dir: ./output/20201010/
-  save_epoch_step: 200
+  save_epoch_step: 1200
-  eval_batch_step: [5000, 5000]
+  # evaluation is run every 5000 iterations after the 4000th iteration
-  train_batch_size_per_card: 8
+  eval_batch_step: 8
-  test_batch_size_per_card: 16
+  # if pretrained_model is saved in static mode, load_static_weights must set to True
-  image_shape: [3, 640, 640]
+  load_static_weights: True
-  reader_yml: ./configs/det/det_db_icdar15_reader.yml
+  cal_metric_during_train: False
-  pretrain_weights: ./pretrain_models/ResNet50_vd_ssld_pretrained/
+  pretrained_model: /home/zhoujun20/pretrain_models/MobileNetV3_large_x0_5_pretrained
-  save_res_path: ./output/det_db/predicts_db.txt
+  checkpoints: #./output/det_db_0.001_DiceLoss_256_pp_config_2.0b_4gpu/best_accuracy
-  checkpoints:
  save_inference_dir:
+  use_visualdl: True
+  infer_img: doc/imgs_en/img_10.jpg
+  save_res_path: ./output/det_db/predicts_db.txt
-Architecture:
+Optimizer:
-  function: ppocr.modeling.architectures.det_model,DetModel
+  name: Adam
+  beta1: 0.9
-Backbone:
+  beta2: 0.999
-  function: ppocr.modeling.backbones.det_resnet_vd,ResNet
+  learning_rate:
-  layers: 50
+#    name: Cosine
+    lr: 0.001
+#    warmup_epoch: 0
+  regularizer:
+    name: 'L2'
+    factor: 0
-Head:
+Architecture:
-  function: ppocr.modeling.heads.det_db_head,DBHead
+  type: det
-  model_name: large
+  algorithm: DB
-  k: 50
+  Transform:
-  inner_channels: 256
+  Backbone:
-  out_channels: 2
+    name: ResNet
+    layers: 50
+  Neck:
+    name: FPN
+    out_channels: 256
+  Head:
+    name: DBHead
+    k: 50
 Loss:
-  function: ppocr.modeling.losses.det_db_loss,DBLoss
+  name: DBLoss
  balance_loss: true
  main_loss_type: DiceLoss
  alpha: 5
  beta: 10
  ohem_ratio: 3
-Optimizer:
-  function: ppocr.optimizer,AdamDecay
-  base_lr: 0.001
-  beta1: 0.9
-  beta2: 0.999
 PostProcess:
-  function: ppocr.postprocess.db_postprocess,DBPostProcess
+  name: DBPostProcess
  thresh: 0.3
-  box_thresh: 0.7
+  box_thresh: 0.6
  max_candidates: 1000
  unclip_ratio: 1.5
+Metric:
+  name: DetMetric
+  main_indicator: hmean
+TRAIN:
+  dataset:
+    name: SimpleDataSet
+    data_dir: /home/zhoujun20/detection/
+    file_list:
+      - /home/zhoujun20/detection/train_icdar2015_label.txt # dataset1
+    ratio_list: [1.0]
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - DetLabelEncode: # Class handling label
+      - IaaAugment:
+          augmenter_args:
+            - { 'type': Fliplr, 'args': { 'p': 0.5 } }
+            - { 'type': Affine, 'args': { 'rotate': [ -10,10 ] } }
+            - { 'type': Resize,'args': { 'size': [ 0.5,3 ] } }
+      - EastRandomCropData:
+          size: [ 640,640 ]
+          max_tries: 50
+          keep_ratio: true
+      - MakeBorderMap:
+          shrink_ratio: 0.4
+          thresh_min: 0.3
+          thresh_max: 0.7
+      - MakeShrinkMap:
+          shrink_ratio: 0.4
+          min_text_size: 8
+      - NormalizeImage:
+          scale: 1./255.
+          mean: [ 0.485, 0.456, 0.406 ]
+          std: [ 0.229, 0.224, 0.225 ]
+          order: 'hwc'
+      - ToCHWImage:
+      - keepKeys:
+          keep_keys: ['image','threshold_map','threshold_mask','shrink_map','shrink_mask'] # dataloader将按照此顺序返回list
+  loader:
+    shuffle: True
+    drop_last: False
+    batch_size: 16
+    num_workers: 6
+EVAL:
+  dataset:
+    name: SimpleDataSet
+    data_dir: /home/zhoujun20/detection/
+    file_list:
+      - /home/zhoujun20/detection/test_icdar2015_label.txt
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - DetLabelEncode: # Class handling label
+      - DetResizeForTest:
+          image_shape: [736,1280]
+      - NormalizeImage:
+          scale: 1./255.
+          mean: [ 0.485, 0.456, 0.406 ]
+          std: [ 0.229, 0.224, 0.225 ]
+          order: 'hwc'
+      - ToCHWImage:
+      - keepKeys:
+          keep_keys: ['image','shape','polys','ignore_tags']
+  loader:
+    shuffle: False
+    drop_last: False
+    batch_size: 1 # must be 1
+    num_workers: 6
\ No newline at end of file
--- a/configs/det/det_r50_vd_east.yml
+++ b/configs/det/det_r50_vd_east.yml
-Global:
-  algorithm: EAST
-  use_gpu: true
-  epoch_num: 100000
-  log_smooth_window: 20
-  print_batch_step: 5
-  save_model_dir: ./output/det_east/
-  save_epoch_step: 200
-  eval_batch_step: [5000, 5000]
-  train_batch_size_per_card: 8
-  test_batch_size_per_card: 16
-  image_shape: [3, 512, 512]
-  reader_yml: ./configs/det/det_east_icdar15_reader.yml
-  pretrain_weights: ./pretrain_models/ResNet50_vd_ssld_pretrained/
-  save_res_path: ./output/det_east/predicts_east.txt
-  checkpoints:
-  save_inference_dir:
-Architecture:
-  function: ppocr.modeling.architectures.det_model,DetModel
-Backbone:
-  function: ppocr.modeling.backbones.det_resnet_vd,ResNet
-  layers: 50
-Head:
-  function: ppocr.modeling.heads.det_east_head,EASTHead
-  model_name: large
-Loss:
-  function: ppocr.modeling.losses.det_east_loss,EASTLoss
-Optimizer:
-  function: ppocr.optimizer,AdamDecay
-  base_lr: 0.001
-  beta1: 0.9
-  beta2: 0.999
-PostProcess:
-  function: ppocr.postprocess.east_postprocess,EASTPostPocess
-  score_thresh: 0.8
-  cover_thresh: 0.1
-  nms_thresh: 0.2
--- a/configs/det/det_r50_vd_sast_icdar15.yml
+++ b/configs/det/det_r50_vd_sast_icdar15.yml
-Global:
-  algorithm: SAST
-  use_gpu: true
-  epoch_num: 2000
-  log_smooth_window: 20
-  print_batch_step: 2
-  save_model_dir: ./output/det_sast/
-  save_epoch_step: 20
-  eval_batch_step: 5000
-  train_batch_size_per_card: 8
-  test_batch_size_per_card: 8
-  image_shape: [3, 512, 512]
-  reader_yml: ./configs/det/det_sast_icdar15_reader.yml
-  pretrain_weights: ./pretrain_models/ResNet50_vd_ssld_pretrained/
-  save_res_path: ./output/det_sast/predicts_sast.txt
-  checkpoints: 
-  save_inference_dir:
-Architecture:
-  function: ppocr.modeling.architectures.det_model,DetModel
-Backbone:
-  function: ppocr.modeling.backbones.det_resnet_vd_sast,ResNet
-  layers: 50
-Head:
-  function: ppocr.modeling.heads.det_sast_head,SASTHead
-  model_name: large
-  only_fpn_up: False
-#   with_cab: False
-  with_cab: True
-Loss:
-  function: ppocr.modeling.losses.det_sast_loss,SASTLoss
-Optimizer:
-  function: ppocr.optimizer,RMSProp
-  base_lr: 0.001
-  decay:
-    function: piecewise_decay
-    boundaries: [30000, 50000, 80000, 100000, 150000]
-    decay_rate: 0.3
-PostProcess:
-  function: ppocr.postprocess.sast_postprocess,SASTPostProcess
-  score_thresh: 0.5
-  sample_pts_num: 2
-  nms_thresh: 0.2
-  expand_scale: 1.0
-  shrink_ratio_of_width: 0.3
\ No newline at end of file
--- a/configs/det/det_r50_vd_sast_totaltext.yml
+++ b/configs/det/det_r50_vd_sast_totaltext.yml
-Global:
-  algorithm: SAST
-  use_gpu: true
-  epoch_num: 2000
-  log_smooth_window: 20
-  print_batch_step: 2
-  save_model_dir: ./output/det_sast/
-  save_epoch_step: 20
-  eval_batch_step: 5000
-  train_batch_size_per_card: 8
-  test_batch_size_per_card: 1
-  image_shape: [3, 512, 512]
-  reader_yml: ./configs/det/det_sast_totaltext_reader.yml
-  pretrain_weights: ./pretrain_models/ResNet50_vd_ssld_pretrained/
-  save_res_path: ./output/det_sast/predicts_sast.txt
-  checkpoints:
-  save_inference_dir:
-Architecture:
-  function: ppocr.modeling.architectures.det_model,DetModel
-Backbone:
-  function: ppocr.modeling.backbones.det_resnet_vd_sast,ResNet
-  layers: 50
-Head:
-  function: ppocr.modeling.heads.det_sast_head,SASTHead
-  model_name: large
-  only_fpn_up: False
-  # with_cab: False
-  with_cab: True
-Loss:
-  function: ppocr.modeling.losses.det_sast_loss,SASTLoss
-Optimizer:
-  function: ppocr.optimizer,RMSProp
-  base_lr: 0.001
-  decay:
-    function: piecewise_decay
-    boundaries: [30000, 50000, 80000, 100000, 150000]
-    decay_rate: 0.3
-PostProcess:
-  function: ppocr.postprocess.sast_postprocess,SASTPostProcess
-  score_thresh: 0.5
-  sample_pts_num: 6
-  nms_thresh: 0.2
-  expand_scale: 1.2
-  shrink_ratio_of_width: 0.2
\ No newline at end of file
--- a/configs/det/det_sast_icdar15_reader.yml
+++ b/configs/det/det_sast_icdar15_reader.yml
-TrainReader:
-  reader_function: ppocr.data.det.dataset_traversal,TrainReader
-  process_function: ppocr.data.det.sast_process,SASTProcessTrain
-  num_workers: 8
-  img_set_dir: ./train_data/
-  label_file_path: [./train_data/icdar2013/train_label_json.txt, ./train_data/icdar2015/train_label_json.txt, ./train_data/icdar17_mlt_latin/train_label_json.txt, ./train_data/coco_text_icdar_4pts/train_label_json.txt]
-  data_ratio_list: [0.1, 0.45, 0.3, 0.15]
-  min_crop_side_ratio: 0.3
-  min_crop_size: 24
-  min_text_size: 4
-  max_text_size: 512
-EvalReader:
-  reader_function: ppocr.data.det.dataset_traversal,EvalTestReader
-  process_function: ppocr.data.det.sast_process,SASTProcessTest
-  img_set_dir: ./train_data/icdar2015/text_localization/
-  label_file_path: ./train_data/icdar2015/text_localization/test_icdar2015_label.txt
-  max_side_len: 1536
-TestReader:
-  reader_function: ppocr.data.det.dataset_traversal,EvalTestReader
-  process_function: ppocr.data.det.sast_process,SASTProcessTest
-  infer_img: ./train_data/icdar2015/text_localization/ch4_test_images/img_11.jpg
-  max_side_len: 1536
--- a/configs/det/det_sast_totaltext_reader.yml
+++ b/configs/det/det_sast_totaltext_reader.yml
-TrainReader:
-  reader_function: ppocr.data.det.dataset_traversal,TrainReader
-  process_function: ppocr.data.det.sast_process,SASTProcessTrain
-  num_workers: 8
-  img_set_dir: ./train_data/
-  label_file_path: [./train_data/art_latin_icdar_14pt/train_no_tt_test/train_label_json.txt, ./train_data/total_text_icdar_14pt/train_label_json.txt]
-  data_ratio_list: [0.5, 0.5]
-  min_crop_side_ratio: 0.3
-  min_crop_size: 24
-  min_text_size: 4
-  max_text_size: 512
-EvalReader:
-  reader_function: ppocr.data.det.dataset_traversal,EvalTestReader
-  process_function: ppocr.data.det.sast_process,SASTProcessTest
-  img_set_dir: ./train_data/
-  label_file_path: ./train_data/total_text_icdar_14pt/test_label_json.txt
-  max_side_len: 768
-TestReader:
-  reader_function: ppocr.data.det.dataset_traversal,EvalTestReader
-  process_function: ppocr.data.det.sast_process,SASTProcessTest
-  infer_img: ./train_data/afs/total_text/Images/Test/img623.jpg
-  max_side_len: 768
--- a/configs/rec/rec_benchmark_reader.yml
+++ b/configs/rec/rec_benchmark_reader.yml
-TrainReader:
-  reader_function: ppocr.data.rec.dataset_traversal,LMDBReader
-  num_workers: 8
-  lmdb_sets_dir: ./train_data/data_lmdb_release/training/
-EvalReader:
-  reader_function: ppocr.data.rec.dataset_traversal,LMDBReader
-  lmdb_sets_dir: ./train_data/data_lmdb_release/validation/
-TestReader:
-  reader_function: ppocr.data.rec.dataset_traversal,LMDBReader
-  lmdb_sets_dir: ./train_data/data_lmdb_release/evaluation/
--- a/configs/rec/rec_chinese_common_train.yml
+++ b/configs/rec/rec_chinese_common_train.yml
-Global:
-  algorithm: CRNN
-  use_gpu: true
-  epoch_num: 3000
-  log_smooth_window: 20
-  print_batch_step: 10
-  save_model_dir: ./output/rec_CRNN
-  save_epoch_step: 3
-  eval_batch_step: 2000
-  train_batch_size_per_card: 128
-  test_batch_size_per_card: 128
-  image_shape: [3, 32, 320]
-  max_text_length: 25
-  character_type: ch
-  character_dict_path: ./ppocr/utils/ppocr_keys_v1.txt
-  loss_type: ctc
-  distort: false
-  use_space_char: false
-  reader_yml: ./configs/rec/rec_chinese_reader.yml
-  pretrain_weights:
-  checkpoints:
-  save_inference_dir:
-  infer_img:
-Architecture:
-  function: ppocr.modeling.architectures.rec_model,RecModel
-Backbone:
-  function: ppocr.modeling.backbones.rec_resnet_vd,ResNet
-  layers: 34
-Head:
-  function: ppocr.modeling.heads.rec_ctc_head,CTCPredict
-  encoder_type: rnn
-  SeqRNN:
-    hidden_size: 256
-Loss:
-  function: ppocr.modeling.losses.rec_ctc_loss,CTCLoss
-Optimizer:
-  function: ppocr.optimizer,AdamDecay
-  base_lr: 0.0005
-  beta1: 0.9
-  beta2: 0.999
--- a/configs/rec/rec_chinese_lite_train.yml
+++ b/configs/rec/rec_chinese_lite_train.yml
-Global:
-  algorithm: CRNN
-  use_gpu: true
-  epoch_num: 3000
-  log_smooth_window: 20
-  print_batch_step: 10
-  save_model_dir: ./output/rec_CRNN
-  save_epoch_step: 3
-  eval_batch_step: 2000
-  train_batch_size_per_card: 256
-  test_batch_size_per_card: 256
-  image_shape: [3, 32, 320]
-  max_text_length: 25
-  character_type: ch
-  character_dict_path: ./ppocr/utils/ppocr_keys_v1.txt
-  loss_type: ctc
-  distort: false
-  use_space_char: false
-  reader_yml: ./configs/rec/rec_chinese_reader.yml
-  pretrain_weights:
-  checkpoints:
-  save_inference_dir:
-  infer_img:
-Architecture:
-  function: ppocr.modeling.architectures.rec_model,RecModel
-Backbone:
-  function: ppocr.modeling.backbones.rec_mobilenet_v3,MobileNetV3
-  scale: 0.5
-  model_name: small
-Head:
-  function: ppocr.modeling.heads.rec_ctc_head,CTCPredict
-  encoder_type: rnn
-  SeqRNN:
-    hidden_size: 48
-Loss:
-  function: ppocr.modeling.losses.rec_ctc_loss,CTCLoss
-Optimizer:
-  function: ppocr.optimizer,AdamDecay
-  base_lr: 0.0005
-  beta1: 0.9
-  beta2: 0.999
--- a/configs/rec/rec_chinese_reader.yml
+++ b/configs/rec/rec_chinese_reader.yml
-TrainReader:
-  reader_function: ppocr.data.rec.dataset_traversal,SimpleReader
-  num_workers: 8
-  img_set_dir: ./train_data
-  label_file_path: ./train_data/rec_gt_train.txt
-EvalReader:
-  reader_function: ppocr.data.rec.dataset_traversal,SimpleReader
-  img_set_dir: ./train_data
-  label_file_path: ./train_data/rec_gt_test.txt
-TestReader:
-  reader_function: ppocr.data.rec.dataset_traversal,SimpleReader
--- a/configs/rec/rec_icdar15_reader.yml
+++ b/configs/rec/rec_icdar15_reader.yml
-TrainReader:
-  reader_function: ppocr.data.rec.dataset_traversal,SimpleReader
-  num_workers: 8
-  img_set_dir: ./train_data/ic15_data
-  label_file_path: ./train_data/ic15_data/rec_gt_train.txt
-EvalReader:
-  reader_function: ppocr.data.rec.dataset_traversal,SimpleReader
-  img_set_dir: ./train_data/ic15_data
-  label_file_path: ./train_data/ic15_data/rec_gt_test.txt
-TestReader:
-  reader_function: ppocr.data.rec.dataset_traversal,SimpleReader
--- a/configs/rec/rec_icdar15_train.yml
+++ b/configs/rec/rec_icdar15_train.yml
-Global:
-  algorithm: CRNN
-  use_gpu: true
-  epoch_num: 1000
-  log_smooth_window: 20
-  print_batch_step: 10
-  save_model_dir: ./output/rec_CRNN
-  save_epoch_step: 300
-  eval_batch_step: 500
-  train_batch_size_per_card: 256
-  test_batch_size_per_card: 256
-  image_shape: [3, 32, 100]
-  max_text_length: 25
-  character_type: en
-  loss_type: ctc
-  distort: true
-  debug: false
-  reader_yml: ./configs/rec/rec_icdar15_reader.yml
-  pretrain_weights: ./pretrain_models/rec_mv3_none_bilstm_ctc/best_accuracy
-  checkpoints:
-  save_inference_dir:
-  infer_img:
-Architecture:
-  function: ppocr.modeling.architectures.rec_model,RecModel
-Backbone:
-  function: ppocr.modeling.backbones.rec_mobilenet_v3,MobileNetV3
-  scale: 0.5
-  model_name: large
-Head:
-  function: ppocr.modeling.heads.rec_ctc_head,CTCPredict
-  encoder_type: rnn
-  SeqRNN:
-    hidden_size: 96
-Loss:
-  function: ppocr.modeling.losses.rec_ctc_loss,CTCLoss
-Optimizer:
-  function: ppocr.optimizer,AdamDecay
-  base_lr: 0.0005
-  beta1: 0.9
-  beta2: 0.999
-  decay:
-    function: cosine_decay
-    step_each_epoch: 20
-    total_epoch: 1000
--- a/configs/rec/rec_mv3_none_bilstm_ctc.yml
+++ b/configs/rec/rec_mv3_none_bilstm_ctc.yml
 Global:
-  algorithm: CRNN
+  use_gpu: false
-  use_gpu: true
+  epoch_num: 500
-  epoch_num: 72
  log_smooth_window: 20
  print_batch_step: 10
-  save_model_dir: output/rec_CRNN
+  save_model_dir: ./output/rec/test/
-  save_epoch_step: 3
+  save_epoch_step: 500
-  eval_batch_step: 2000
+  # evaluation is run every 5000 iterations after the 4000th iteration
-  train_batch_size_per_card: 256
+  eval_batch_step: 127
-  test_batch_size_per_card: 256
+  # if pretrained_model is saved in static mode, load_static_weights must set to True
-  image_shape: [3, 32, 100]
+  load_static_weights: True
-  max_text_length: 25
+  cal_metric_during_train: True
-  character_type: en
+  pretrained_model:
-  loss_type: ctc
+  checkpoints: #output/rec/rec_crnn/best_accuracy
-  reader_yml: ./configs/rec/rec_benchmark_reader.yml
-  pretrain_weights:
-  checkpoints:
  save_inference_dir:
-  infer_img:
+  use_visualdl: False
+  infer_img: doc/imgs_words/ch/word_1.jpg
-Architecture:
+  # for data or label process
-  function: ppocr.modeling.architectures.rec_model,RecModel
+  max_text_length: 80
+  character_dict_path: ppocr/utils/ppocr_keys_v1.txt
-Backbone:
+  character_type: 'ch'
-  function: ppocr.modeling.backbones.rec_mobilenet_v3,MobileNetV3
+  use_space_char: False
-  scale: 0.5
+  infer_mode: False
-  model_name: large
+  use_tps: False
-Head:
-  function: ppocr.modeling.heads.rec_ctc_head,CTCPredict
-  encoder_type: rnn
-  SeqRNN:
-    hidden_size: 96
-Loss:
-  function: ppocr.modeling.losses.rec_ctc_loss,CTCLoss
 Optimizer:
-  function: ppocr.optimizer,AdamDecay
+  name: Adam
-  base_lr: 0.001
  beta1: 0.9
  beta2: 0.999
+  learning_rate:
+    name: Cosine
+    lr: 0.001
+    warmup_epoch: 4
+  regularizer:
+    name: 'L2'
+    factor: 0.00001
+Architecture:
+  type: rec
+  algorithm: CRNN
+  Transform:
+  Backbone:
+    name: MobileNetV3
+    scale: 0.5
+    model_name: small
+    small_stride: [ 1, 2, 2, 2 ]
+  Neck:
+    name: SequenceEncoder
+    encoder_type: fc
+    hidden_size: 96
+  Head:
+    name: CTC
+    fc_decay: 0.00001
+Loss:
+  name: CTCLoss
+PostProcess:
+  name: CTCLabelDecode
+Metric:
+  name: RecMetric
+  main_indicator: acc
+TRAIN:
+  dataset:
+    name: SimpleDataSet
+    data_dir: /home/zhoujun20/rec
+    file_list:
+      - /home/zhoujun20/rec/real_data.txt # dataset1
+    ratio_list: [ 0.4,0.6 ]
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - CTCLabelEncode: # Class handling label
+      - RecAug:
+      - RecResizeImg:
+          image_shape: [ 3,32,320 ]
+      - keepKeys:
+          keep_keys: [ 'image','label','length' ] # dataloader将按照此顺序返回list
+  loader:
+    batch_size: 256
+    shuffle: True
+    drop_last: True
+    num_workers: 6
+EVAL:
+  dataset:
+    name: SimpleDataSet
+    data_dir: /home/zhoujun20/rec
+    file_list:
+      - /home/zhoujun20/rec/label_val_all.txt
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - CTCLabelEncode: # Class handling label
+      - RecResizeImg:
+          image_shape: [ 3,32,320 ]
+      - keepKeys:
+          keep_keys: [ 'image','label','length' ] # dataloader将按照此顺序返回list
+  loader:
+    shuffle: False
+    drop_last: False
+    batch_size: 256
+    num_workers: 6
--- a/configs/rec/rec_mv3_none_bilstm_ctc_lmdb.yml
+++ b/configs/rec/rec_mv3_none_bilstm_ctc_lmdb.yml
+Global:
+  use_gpu: true
+  epoch_num: 500
+  log_smooth_window: 20
+  print_batch_step: 1
+  save_model_dir: ./output/rec/test/
+  save_epoch_step: 500
+  # evaluation is run every 5000 iterations after the 4000th iteration
+  eval_batch_step: 1016
+  # if pretrained_model is saved in static mode, load_static_weights must set to True
+  load_static_weights: True
+  cal_metric_during_train: True
+  pretrained_model:
+  checkpoints: #output/rec/rec_crnn/best_accuracy
+  save_inference_dir:
+  use_visualdl: True
+  infer_img: doc/imgs_words/ch/word_1.jpg
+  # for data or label process
+  max_text_length: 80
+  character_dict_path: /home/zhoujun20/rec/lmdb/dict.txt
+  character_type: 'ch'
+  use_space_char: True
+  infer_mode: False
+  use_tps: False
+Optimizer:
+  name: Adam
+  beta1: 0.9
+  beta2: 0.999
+  learning_rate:
+    name: Cosine
+    lr: 0.0005
+    warmup_epoch: 1
+  regularizer:
+    name: 'L2'
+    factor: 0.00001
+Architecture:
+  type: rec
+  algorithm: CRNN
+  Transform:
+  Backbone:
+    name: MobileNetV3
+    scale: 0.5
+    model_name: small
+    small_stride: [ 1, 2, 2, 2 ]
+  Neck:
+    name: SequenceEncoder
+    encoder_type: rnn
+    hidden_size: 48
+  Head:
+    name: CTC
+    fc_decay: 0.00001
+Loss:
+  name: CTCLoss
+PostProcess:
+  name: CTCLabelDecode
+Metric:
+  name: RecMetric
+  main_indicator: acc
+TRAIN:
+  dataset:
+    name: LMDBDateSet
+    file_list:
+      - /home/zhoujun20/rec/lmdb/train # dataset1
+    ratio_list: [ 0.4,0.6 ]
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - CTCLabelEncode: # Class handling label
+      - RecAug:
+      - RecResizeImg:
+          image_shape: [ 3,32,320 ]
+      - keepKeys:
+          keep_keys: [ 'image','label','length' ] # dataloader将按照此顺序返回list
+  loader:
+    batch_size: 256
+    shuffle: True
+    drop_last: True
+    num_workers: 6
+EVAL:
+  dataset:
+    name: LMDBDateSet
+    file_list:
+      - /home/zhoujun20/rec/lmdb/val
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - CTCLabelEncode: # Class handling label
+      - RecResizeImg:
+          image_shape: [ 3,32,320 ]
+      - keepKeys:
+          keep_keys: [ 'image','label','length' ] # dataloader将按照此顺序返回list
+  loader:
+    shuffle: False
+    drop_last: False
+    batch_size: 256
+    num_workers: 6
--- a/configs/rec/rec_mv3_none_none_ctc.yml
+++ b/configs/rec/rec_mv3_none_none_ctc.yml
-Global:
-  algorithm: Rosetta
-  use_gpu: true
-  epoch_num: 72
-  log_smooth_window: 20
-  print_batch_step: 10
-  save_model_dir: output/rec_Rosetta
-  save_epoch_step: 3
-  eval_batch_step: 2000
-  train_batch_size_per_card: 256
-  test_batch_size_per_card: 256
-  image_shape: [3, 32, 100]
-  max_text_length: 25
-  character_type: en
-  loss_type: ctc
-  reader_yml: ./configs/rec/rec_benchmark_reader.yml
-  pretrain_weights: 
-  checkpoints:
-  save_inference_dir:
-  infer_img:
-Architecture:
-  function: ppocr.modeling.architectures.rec_model,RecModel
-Backbone:
-  function: ppocr.modeling.backbones.rec_mobilenet_v3,MobileNetV3
-  scale: 0.5
-  model_name: large
-Head:
-  function: ppocr.modeling.heads.rec_ctc_head,CTCPredict
-  encoder_type: reshape
-Loss:
-  function: ppocr.modeling.losses.rec_ctc_loss,CTCLoss
-Optimizer:
-  function: ppocr.optimizer,AdamDecay
-  base_lr: 0.001
-  beta1: 0.9
-  beta2: 0.999
--- a/configs/rec/rec_mv3_tps_bilstm_attn.yml
+++ b/configs/rec/rec_mv3_tps_bilstm_attn.yml
-Global:
-  algorithm: RARE
-  use_gpu: true
-  epoch_num: 72
-  log_smooth_window: 20
-  print_batch_step: 10
-  save_model_dir: output/rec_RARE
-  save_epoch_step: 3
-  eval_batch_step: 2000
-  train_batch_size_per_card: 256
-  test_batch_size_per_card: 256
-  image_shape: [3, 32, 100]
-  max_text_length: 25
-  character_type: en
-  loss_type: attention
-  tps: true
-  reader_yml: ./configs/rec/rec_benchmark_reader.yml
-  pretrain_weights:
-  checkpoints:
-  save_inference_dir:
-  infer_img:
-Architecture:
-  function: ppocr.modeling.architectures.rec_model,RecModel
-TPS:
-  function: ppocr.modeling.stns.tps,TPS
-  num_fiducial: 20
-  loc_lr: 0.1
-  model_name: small
-Backbone:
-  function: ppocr.modeling.backbones.rec_mobilenet_v3,MobileNetV3
-  scale: 0.5
-  model_name: large
-Head:
-  function: ppocr.modeling.heads.rec_attention_head,AttentionPredict
-  encoder_type: rnn
-  SeqRNN:
-    hidden_size: 96
-  Attention:
-    decoder_size: 96
-    word_vector_dim: 96
-Loss:
-  function: ppocr.modeling.losses.rec_attention_loss,AttentionLoss
-Optimizer:
-  function: ppocr.optimizer,AdamDecay
-  base_lr: 0.001
-  beta1: 0.9
-  beta2: 0.999
--- a/configs/rec/rec_mv3_tps_bilstm_ctc.yml
+++ b/configs/rec/rec_mv3_tps_bilstm_ctc.yml
-Global:
-  algorithm: STARNet
-  use_gpu: true
-  epoch_num: 72
-  log_smooth_window: 20
-  print_batch_step: 10
-  save_model_dir: output/rec_STARNet
-  save_epoch_step: 3
-  eval_batch_step: 2000
-  train_batch_size_per_card: 256
-  test_batch_size_per_card: 256
-  image_shape: [3, 32, 100]
-  max_text_length: 25
-  character_type: en
-  loss_type: ctc
-  tps: true
-  reader_yml: ./configs/rec/rec_benchmark_reader.yml
-  pretrain_weights:
-  checkpoints:
-  save_inference_dir:
-  infer_img:
-Architecture:
-  function: ppocr.modeling.architectures.rec_model,RecModel
-TPS:
-  function: ppocr.modeling.stns.tps,TPS
-  num_fiducial: 20
-  loc_lr: 0.1
-  model_name: small
-Backbone:
-  function: ppocr.modeling.backbones.rec_mobilenet_v3,MobileNetV3
-  scale: 0.5
-  model_name: large
-Head:
-  function: ppocr.modeling.heads.rec_ctc_head,CTCPredict
-  encoder_type: rnn
-  SeqRNN:
-    hidden_size: 96
-Loss:
-  function: ppocr.modeling.losses.rec_ctc_loss,CTCLoss
-Optimizer:
-  function: ppocr.optimizer,AdamDecay
-  base_lr: 0.001
-  beta1: 0.9
-  beta2: 0.999
--- a/configs/rec/rec_r34_vd_none_bilstm_ctc.yml
+++ b/configs/rec/rec_r34_vd_none_bilstm_ctc.yml
 Global:
-  algorithm: CRNN
+  use_gpu: false
-  use_gpu: true
+  epoch_num: 500
-  epoch_num: 72
  log_smooth_window: 20
  print_batch_step: 10
-  save_model_dir: output/rec_CRNN
+  save_model_dir: ./output/rec/test/
-  save_epoch_step: 3
+  save_epoch_step: 500
-  eval_batch_step: 2000
+  # evaluation is run every 5000 iterations after the 4000th iteration
-  train_batch_size_per_card: 256
+  eval_batch_step: 127
-  test_batch_size_per_card: 256
+  # if pretrained_model is saved in static mode, load_static_weights must set to True
-  image_shape: [3, 32, 100]
+  load_static_weights: True
-  max_text_length: 25
+  cal_metric_during_train: True
-  character_type: en
+  pretrained_model:
-  loss_type: ctc
+  checkpoints: #output/rec/rec_crnn/best_accuracy
-  reader_yml: ./configs/rec/rec_benchmark_reader.yml
-  pretrain_weights:
-  checkpoints:
  save_inference_dir:
-  infer_img:
+  use_visualdl: False
+  infer_img: doc/imgs_words/ch/word_1.jpg
+  # for data or label process
+  max_text_length: 80
+  character_dict_path: ppocr/utils/ppocr_keys_v1.txt
+  character_type: 'ch'
+  use_space_char: False
+  infer_mode: False
+  use_tps: False
-Architecture:
-  function: ppocr.modeling.architectures.rec_model,RecModel
-Backbone:
-  function: ppocr.modeling.backbones.rec_resnet_vd,ResNet
-  layers: 34
-Head:
-  function: ppocr.modeling.heads.rec_ctc_head,CTCPredict
-  encoder_type: rnn
-  SeqRNN:
-    hidden_size: 256
-Loss:
-  function: ppocr.modeling.losses.rec_ctc_loss,CTCLoss
 Optimizer:
-  function: ppocr.optimizer,AdamDecay
+  name: Adam
-  base_lr: 0.001
  beta1: 0.9
  beta2: 0.999
+  learning_rate:
+    name: Cosine
+    lr: 0.001
+    warmup_epoch: 4
+  regularizer:
+    name: 'L2'
+    factor: 0.00001
+Architecture:
+  type: rec
+  algorithm: CRNN
+  Transform:
+  Backbone:
+    name: ResNet
+    layers: 200
+  Neck:
+    name: SequenceEncoder
+    encoder_type: fc
+    hidden_size: 96
+  Head:
+    name: CTC
+    fc_decay: 0.00001
+Loss:
+  name: CTCLoss
+PostProcess:
+  name: CTCLabelDecode
+Metric:
+  name: RecMetric
+  main_indicator: acc
+TRAIN:
+  dataset:
+    name: SimpleDataSet
+    data_dir: /home/zhoujun20/rec
+    file_list:
+      - /home/zhoujun20/rec/real_data.txt # dataset1
+    ratio_list: [ 0.4,0.6 ]
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - CTCLabelEncode: # Class handling label
+      - RecAug:
+      - RecResizeImg:
+          image_shape: [ 3,32,320 ]
+      - keepKeys:
+          keep_keys: [ 'image','label','length' ] # dataloader将按照此顺序返回list
+  loader:
+    batch_size: 256
+    shuffle: True
+    drop_last: True
+    num_workers: 6
+EVAL:
+  dataset:
+    name: SimpleDataSet
+    data_dir: /home/zhoujun20/rec
+    file_list:
+      - /home/zhoujun20/rec/label_val_all.txt
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - CTCLabelEncode: # Class handling label
+      - RecResizeImg:
+          image_shape: [ 3,32,320 ]
+      - keepKeys:
+          keep_keys: [ 'image','label','length' ] # dataloader将按照此顺序返回list
+  loader:
+    shuffle: False
+    drop_last: False
+    batch_size: 256
+    num_workers: 6
--- a/configs/rec/rec_r34_vd_none_none_ctc.yml
+++ b/configs/rec/rec_r34_vd_none_none_ctc.yml
-Global:
-  algorithm: Rosetta
-  use_gpu: true
-  epoch_num: 72
-  log_smooth_window: 20
-  print_batch_step: 10
-  save_model_dir: output/rec_Rosetta
-  save_epoch_step: 3
-  eval_batch_step: 2000
-  train_batch_size_per_card: 256
-  test_batch_size_per_card: 256
-  image_shape: [3, 32, 100]
-  max_text_length: 25
-  character_type: en
-  loss_type: ctc
-  reader_yml: ./configs/rec/rec_benchmark_reader.yml
-  pretrain_weights:
-  checkpoints:
-  save_inference_dir:
-  infer_img:
-Architecture:
-  function: ppocr.modeling.architectures.rec_model,RecModel
-Backbone:
-  function: ppocr.modeling.backbones.rec_resnet_vd,ResNet
-  layers: 34
-Head:
-  function: ppocr.modeling.heads.rec_ctc_head,CTCPredict
-  encoder_type: reshape
-Loss:
-  function: ppocr.modeling.losses.rec_ctc_loss,CTCLoss
-Optimizer:
-  function: ppocr.optimizer,AdamDecay
-  base_lr: 0.001
-  beta1: 0.9
-  beta2: 0.999
--- a/configs/rec/rec_r34_vd_tps_bilstm_attn.yml
+++ b/configs/rec/rec_r34_vd_tps_bilstm_attn.yml
-Global:
-  algorithm: RARE
-  use_gpu: true
-  epoch_num: 72
-  log_smooth_window: 20
-  print_batch_step: 10
-  save_model_dir: output/rec_RARE
-  save_epoch_step: 3
-  eval_batch_step: 2000
-  train_batch_size_per_card: 256
-  test_batch_size_per_card: 256
-  image_shape: [3, 32, 100]
-  max_text_length: 25
-  character_type: en
-  loss_type: attention
-  tps: true
-  reader_yml: ./configs/rec/rec_benchmark_reader.yml
-  pretrain_weights:
-  checkpoints:
-  save_inference_dir:
-  infer_img:
-Architecture:
-  function: ppocr.modeling.architectures.rec_model,RecModel
-TPS:
-  function: ppocr.modeling.stns.tps,TPS
-  num_fiducial: 20
-  loc_lr: 0.1
-  model_name: large
-Backbone:
-  function: ppocr.modeling.backbones.rec_resnet_vd,ResNet
-  layers: 34
-Head:
-  function: ppocr.modeling.heads.rec_attention_head,AttentionPredict
-  encoder_type: rnn
-  SeqRNN:
-    hidden_size: 256
-  Attention:
-    decoder_size: 128
-    word_vector_dim: 128
-Loss:
-  function: ppocr.modeling.losses.rec_attention_loss,AttentionLoss
-Optimizer:
-  function: ppocr.optimizer,AdamDecay
-  base_lr: 0.001
-  beta1: 0.9
-  beta2: 0.999
--- a/configs/rec/rec_r34_vd_tps_bilstm_ctc.yml
+++ b/configs/rec/rec_r34_vd_tps_bilstm_ctc.yml
-Global:
-  algorithm: STARNet
-  use_gpu: true
-  epoch_num: 72
-  log_smooth_window: 20
-  print_batch_step: 10
-  save_model_dir: output/rec_STARNet
-  save_epoch_step: 3
-  eval_batch_step: 2000
-  train_batch_size_per_card: 256
-  test_batch_size_per_card: 256
-  image_shape: [3, 32, 100]
-  max_text_length: 25
-  character_type: en
-  loss_type: ctc
-  tps: true
-  reader_yml: ./configs/rec/rec_benchmark_reader.yml
-  pretrain_weights:
-  checkpoints:
-  save_inference_dir:
-  infer_img:
-Architecture:
-  function: ppocr.modeling.architectures.rec_model,RecModel
-TPS:
-  function: ppocr.modeling.stns.tps,TPS
-  num_fiducial: 20
-  loc_lr: 0.1
-  model_name: large
-Backbone:
-  function: ppocr.modeling.backbones.rec_resnet_vd,ResNet
-  layers: 34
-Head:
-  function: ppocr.modeling.heads.rec_ctc_head,CTCPredict
-  encoder_type: rnn
-  SeqRNN:
-    hidden_size: 256
-Loss:
-  function: ppocr.modeling.losses.rec_ctc_loss,CTCLoss
-Optimizer:
-  function: ppocr.optimizer,AdamDecay
-  base_lr: 0.001
-  beta1: 0.9
-  beta2: 0.999
--- a/configs/rec/rec_r50fpn_vd_none_srn.yml
+++ b/configs/rec/rec_r50fpn_vd_none_srn.yml
-Global:
-  algorithm: SRN
-  use_gpu: true
-  epoch_num: 72
-  log_smooth_window: 20
-  print_batch_step: 10
-  save_model_dir: output/rec_pvam_withrotate
-  save_epoch_step: 1
-  eval_batch_step: 8000
-  train_batch_size_per_card: 64
-  test_batch_size_per_card: 1
-  image_shape: [1, 64, 256]
-  max_text_length: 25
-  character_type: en
-  loss_type: srn
-  num_heads: 8
-  average_window: 0.15
-  max_average_window: 15625
-  min_average_window: 10000
-  reader_yml: ./configs/rec/rec_benchmark_reader.yml
-  pretrain_weights: 
-  checkpoints:
-  save_inference_dir:
-  infer_img:
-Architecture:
-  function: ppocr.modeling.architectures.rec_model,RecModel
-Backbone:
-  function: ppocr.modeling.backbones.rec_resnet_fpn,ResNet
-  layers: 50
-Head:
-  function: ppocr.modeling.heads.rec_srn_all_head,SRNPredict
-  encoder_type: rnn
-  num_encoder_TUs: 2
-  num_decoder_TUs: 4
-  hidden_dims: 512
-  SeqRNN:
-    hidden_size: 256
-Loss:
-  function: ppocr.modeling.losses.rec_srn_loss,SRNLoss
-Optimizer:
-  function: ppocr.optimizer,AdamDecay
-  base_lr: 0.0001
-  beta1: 0.9
-  beta2: 0.999
--- a/ppocr/data/__init__.py
+++ b/ppocr/data/__init__.py
@@ -11,3 +11,114 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+import os
+import sys
+import numpy as np
+import paddle
+__dir__ = os.path.dirname(os.path.abspath(__file__))
+sys.path.append(os.path.abspath(os.path.join(__dir__, '../..')))
+import copy
+from paddle.io import DataLoader, DistributedBatchSampler, BatchSampler
+import paddle.distributed as dist
+from ppocr.data.imaug import transform, create_operators
+__all__ = ['build_dataloader', 'transform', 'create_operators']
+def build_dataset(config, global_config):
+    from ppocr.data.dataset import SimpleDataSet, LMDBDateSet
+    support_dict = ['SimpleDataSet', 'LMDBDateSet']
+    module_name = config.pop('name')
+    assert module_name in support_dict, Exception(
+        'DataSet only support {}'.format(support_dict))
+    dataset = eval(module_name)(config, global_config)
+    return dataset
+def build_dataloader(config, device, distributed=False, global_config=None):
+    from ppocr.data.dataset import BatchBalancedDataLoader
+    config = copy.deepcopy(config)
+    dataset_config = config['dataset']
+    _dataset_list = []
+    file_list = dataset_config.pop('file_list')
+    if len(file_list) == 1:
+        ratio_list = [1.0]
+    else:
+        ratio_list = dataset_config.pop('ratio_list')
+    for file in file_list:
+        dataset_config['file_list'] = file
+        _dataset = build_dataset(dataset_config, global_config)
+        _dataset_list.append(_dataset)
+    data_loader = BatchBalancedDataLoader(_dataset_list, ratio_list,
+                                          distributed, device, config['loader'])
+    return data_loader, _dataset.info_dict
+def test_loader():
+    import time
+    from tools.program import load_config, ArgsParser
+    FLAGS = ArgsParser().parse_args()
+    config = load_config(FLAGS.config)
+    place = paddle.CPUPlace()
+    paddle.disable_static(place)
+    import time
+    data_loader, _ = build_dataloader(
+        config['TRAIN'], place, global_config=config['Global'])
+    start = time.time()
+    print(len(data_loader))
+    for epoch in range(1):
+        print('epoch {} ****************'.format(epoch))
+        for i, batch in enumerate(data_loader):
+            if i > len(data_loader):
+                break
+            t = time.time() - start
+            start = time.time()
+            print('{}, batch : {} ,time {}'.format(i, len(batch[0]), t))
+            continue
+            import matplotlib.pyplot as plt
+            from matplotlib import pyplot as plt
+            import cv2
+            fig = plt.figure()
+            # # cv2.imwrite('img.jpg',batch[0].numpy()[0].transpose((1,2,0)))
+            # # cv2.imwrite('bmap.jpg',batch[1].numpy()[0])
+            # # cv2.imwrite('bmask.jpg',batch[2].numpy()[0])
+            # # cv2.imwrite('smap.jpg',batch[3].numpy()[0])
+            # # cv2.imwrite('smask.jpg',batch[4].numpy()[0])
+            plt.title('img')
+            plt.imshow(batch[0].numpy()[0].transpose((1, 2, 0)))
+            # plt.figure()
+            # plt.title('bmap')
+            # plt.imshow(batch[1].numpy()[0],cmap='Greys')
+            # plt.figure()
+            # plt.title('bmask')
+            # plt.imshow(batch[2].numpy()[0],cmap='Greys')
+            # plt.figure()
+            # plt.title('smap')
+            # plt.imshow(batch[3].numpy()[0],cmap='Greys')
+            # plt.figure()
+            # plt.title('smask')
+            # plt.imshow(batch[4].numpy()[0],cmap='Greys')
+            # plt.show()
+            # break
+if __name__ == '__main__':
+    test_loader()
--- a/ppocr/data/dataset.py
+++ b/ppocr/data/dataset.py
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import copy
+import numpy as np
+import os
+import lmdb
+import random
+import signal
+import paddle
+from paddle.io import Dataset, DataLoader, DistributedBatchSampler, BatchSampler
+from .imaug import transform, create_operators
+from ppocr.utils.logging import get_logger
+def term_mp(sig_num, frame):
+    """ kill all child processes
+    """
+    pid = os.getpid()
+    pgid = os.getpgid(os.getpid())
+    print("main proc {} exit, kill process group " "{}".format(pid, pgid))
+    os.killpg(pgid, signal.SIGKILL)
+signal.signal(signal.SIGINT, term_mp)
+signal.signal(signal.SIGTERM, term_mp)
+class ModeException(Exception):
+    """
+    ModeException
+    """
+    def __init__(self, message='', mode=''):
+        message += "\nOnly the following 3 modes are supported: " \
+                   "train, valid, test. Given mode is {}".format(mode)
+        super(ModeException, self).__init__(message)
+class SampleNumException(Exception):
+    """
+    SampleNumException
+    """
+    def __init__(self, message='', sample_num=0, batch_size=1):
+        message += "\nError: The number of the whole data ({}) " \
+                   "is smaller than the batch_size ({}), and drop_last " \
+                   "is turnning on, so nothing  will feed in program, " \
+                   "Terminated now. Please reset batch_size to a smaller " \
+                   "number or feed more data!".format(sample_num, batch_size)
+        super(SampleNumException, self).__init__(message)
+def get_file_list(file_list, data_dir, delimiter='\t'):
+    """
+    read label list from file and shuffle the list
+    Args:
+        params(dict):
+    """
+    if isinstance(file_list, str):
+        file_list = [file_list]
+    data_source_list = []
+    for file in file_list:
+        with open(file) as f:
+            full_lines = [line.strip() for line in f]
+            for line in full_lines:
+                try:
+                    img_path, label = line.split(delimiter)
+                except:
+                    logger = get_logger()
+                    logger.warning('label error in {}'.format(line))
+                img_path = os.path.join(data_dir, img_path)
+                data = {'img_path': img_path, 'label': label}
+                data_source_list.append(data)
+    return data_source_list
+class LMDBDateSet(Dataset):
+    def __init__(self, config, global_config):
+        super(LMDBDateSet, self).__init__()
+        self.data_list = self.load_lmdb_dataset(
+            config['file_list'], global_config['max_text_length'])
+        random.shuffle(self.data_list)
+        self.ops = create_operators(config['transforms'], global_config)
+        # for rec
+        character = ''
+        for op in self.ops:
+            if hasattr(op, 'character'):
+                character = getattr(op, 'character')
+        self.info_dict = {'character': character}
+    def load_lmdb_dataset(self, data_dir, max_text_length):
+        self.env = lmdb.open(
+            data_dir,
+            max_readers=32,
+            readonly=True,
+            lock=False,
+            readahead=False,
+            meminit=False)
+        if not self.env:
+            print('cannot create lmdb from %s' % (data_dir))
+            exit(0)
+        filtered_index_list = []
+        with self.env.begin(write=False) as txn:
+            nSamples = int(txn.get('num-samples'.encode()))
+            self.nSamples = nSamples
+            for index in range(self.nSamples):
+                index += 1  # lmdb starts with 1
+                label_key = 'label-%09d'.encode() % index
+                label = txn.get(label_key).decode('utf-8')
+                if len(label) > max_text_length:
+                    # print(f'The length of the label is longer than max_length: length
+                    # {len(label)}, {label} in dataset {self.root}')
+                    continue
+                # By default, images containing characters which are not in opt.character are filtered.
+                # You can add [UNK] token to `opt.character` in utils.py instead of this filtering.
+                filtered_index_list.append(index)
+        return filtered_index_list
+    def print_lmdb_sets_info(self, lmdb_sets):
+        lmdb_info_strs = []
+        for dataset_idx in range(len(lmdb_sets)):
+            tmp_str = " %s:%d," % (lmdb_sets[dataset_idx]['dirpath'],
+                                   lmdb_sets[dataset_idx]['num_samples'])
+            lmdb_info_strs.append(tmp_str)
+        lmdb_info_strs = ''.join(lmdb_info_strs)
+        logger = get_logger()
+        logger.info("DataSummary:" + lmdb_info_strs)
+        return
+    def __getitem__(self, idx):
+        idx = self.data_list[idx]
+        with self.env.begin(write=False) as txn:
+            label_key = 'label-%09d'.encode() % idx
+            label = txn.get(label_key)
+            if label is not None:
+                label = label.decode('utf-8')
+                img_key = 'image-%09d'.encode() % idx
+                imgbuf = txn.get(img_key)
+                data = {'image': imgbuf, 'label': label}
+                outs = transform(data, self.ops)
+            else:
+                outs = None
+            if outs is None:
+                return self.__getitem__(np.random.randint(self.__len__()))
+            return outs
+    def __len__(self):
+        return len(self.data_list)
+class SimpleDataSet(Dataset):
+    def __init__(self, config, global_config):
+        super(SimpleDataSet, self).__init__()
+        delimiter = config.get('delimiter', '\t')
+        self.data_list = get_file_list(config['file_list'], config['data_dir'],
+                                       delimiter)
+        random.shuffle(self.data_list)
+        self.ops = create_operators(config['transforms'], global_config)
+        # for rec
+        character = ''
+        for op in self.ops:
+            if hasattr(op, 'character'):
+                character = getattr(op, 'character')
+        self.info_dict = {'character': character}
+    def __getitem__(self, idx):
+        data = copy.deepcopy(self.data_list[idx])
+        with open(data['img_path'], 'rb') as f:
+            img = f.read()
+            data['image'] = img
+        outs = transform(data, self.ops)
+        if outs is None:
+            return self.__getitem__(np.random.randint(self.__len__()))
+        return outs
+    def __len__(self):
+        return len(self.data_list)
+class BatchBalancedDataLoader(object):
+    def __init__(self,
+                 dataset_list: list,
+                 ratio_list: list,
+                 distributed,
+                 device,
+                 loader_args: dict):
+        """
+        对datasetlist里的dataset按照ratio_list里对应的比例组合，似的每个batch里的数据按按照比例采样的
+        :param dataset_list: 数据集列表
+        :param ratio_list: 比例列表
+        :param loader_args: dataloader的配置
+        """
+        assert sum(ratio_list) == 1 and len(dataset_list) == len(ratio_list)
+        self.dataset_len = 0
+        self.data_loader_list = []
+        self.dataloader_iter_list = []
+        all_batch_size = loader_args.pop('batch_size')
+        batch_size_list = list(
+            map(int, [max(1.0, all_batch_size * x) for x in ratio_list]))
+        remain_num = all_batch_size - sum(batch_size_list)
+        batch_size_list[np.argmax(ratio_list)] += remain_num
+        for _dataset, _batch_size in zip(dataset_list, batch_size_list):
+            if distributed:
+                batch_sampler_class = DistributedBatchSampler
+            else:
+                batch_sampler_class = BatchSampler
+            batch_sampler = batch_sampler_class(
+                dataset=_dataset,
+                batch_size=_batch_size,
+                shuffle=loader_args['shuffle'],
+                drop_last=loader_args['drop_last'], )
+            _data_loader = DataLoader(
+                dataset=_dataset,
+                batch_sampler=batch_sampler,
+                places=device,
+                num_workers=loader_args['num_workers'],
+                return_list=True, )
+            self.data_loader_list.append(_data_loader)
+            self.dataloader_iter_list.append(iter(_data_loader))
+            self.dataset_len += len(_dataset)
+    def __iter__(self):
+        return self
+    def __len__(self):
+        return min([len(x) for x in self.data_loader_list])
+    def __next__(self):
+        batch = []
+        for i, data_loader_iter in enumerate(self.dataloader_iter_list):
+            try:
+                _batch_i = next(data_loader_iter)
+                batch.append(_batch_i)
+            except StopIteration:
+                self.dataloader_iter_list[i] = iter(self.data_loader_list[i])
+                _batch_i = next(self.dataloader_iter_list[i])
+                batch.append(_batch_i)
+            except ValueError:
+                pass
+        if len(batch) > 0:
+            batch_list = []
+            batch_item_size = len(batch[0])
+            for i in range(batch_item_size):
+                cur_item_list = [batch_i[i] for batch_i in batch]
+                batch_list.append(paddle.concat(cur_item_list, axis=0))
+        else:
+            batch_list = batch[0]
+        return batch_list
+def fill_batch(batch):
+    """
+    2020.09.08： The current paddle version only supports returning data with the same length.
+                Therefore, fill in the batches with inconsistent lengths.
+                this method is currently only useful for text detection
+    """
+    keys = list(range(len(batch[0])))
+    v_max_len_dict = {}
+    for k in keys:
+        v_max_len_dict[k] = max([len(item[k]) for item in batch])
+    for item in batch:
+        length = []
+        for k in keys:
+            v = item[k]
+            length.append(len(v))
+            assert isinstance(v, np.ndarray)
+            if len(v) == v_max_len_dict[k]:
+                continue
+            try:
+                tmp_shape = [v_max_len_dict[k] - len(v)] + list(v[0].shape)
+            except:
+                a = 1
+            tmp_array = np.zeros(tmp_shape, dtype=v[0].dtype)
+            new_array = np.concatenate([v, tmp_array])
+            item[k] = new_array
+        item.append(length)
+    return batch
--- a/ppocr/data/det/data_augment.py
+++ b/ppocr/data/det/data_augment.py
-# -*- coding:utf-8 -*- 
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-from __future__ import unicode_literals
-import numpy as np
-import random
-import cv2
-import math
-import imgaug
-import imgaug.augmenters as iaa
-def AugmentData(data):
-    img = data['image']
-    shape = img.shape
-    aug = iaa.Sequential(
-        [iaa.Fliplr(0.5), iaa.Affine(rotate=(-10, 10)), iaa.Resize(
-            (0.5, 3))]).to_deterministic()
-    def may_augment_annotation(aug, data, shape):
-        if aug is None:
-            return data
-        line_polys = []
-        for poly in data['polys']:
-            new_poly = may_augment_poly(aug, shape, poly)
-            line_polys.append(new_poly)
-        data['polys'] = np.array(line_polys)
-        return data
-    def may_augment_poly(aug, img_shape, poly):
-        keypoints = [imgaug.Keypoint(p[0], p[1]) for p in poly]
-        keypoints = aug.augment_keypoints(
-            [imgaug.KeypointsOnImage(
-                keypoints, shape=img_shape)])[0].keypoints
-        poly = [(p.x, p.y) for p in keypoints]
-        return poly
-    img_aug = aug.augment_image(img)
-    data['image'] = img_aug
-    data = may_augment_annotation(aug, data, shape)
-    return data
--- a/ppocr/data/det/dataset_traversal.py
+++ b/ppocr/data/det/dataset_traversal.py
-#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
-#
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-import os
-import sys
-import math
-import random
-import functools
-import numpy as np
-import cv2
-import string
-from ppocr.utils.utility import initial_logger
-logger = initial_logger()
-from ppocr.utils.utility import create_module
-from ppocr.utils.utility import get_image_file_list
-import time
-class TrainReader(object):
-    def __init__(self, params):
-        self.num_workers = params['num_workers']
-        self.label_file_path = params['label_file_path']
-        print(self.label_file_path)
-        self.use_mul_data = False
-        if isinstance(self.label_file_path, list):
-            self.use_mul_data = True
-            self.data_ratio_list = params['data_ratio_list']
-        self.batch_size = params['train_batch_size_per_card']
-        assert 'process_function' in params,\
-            "absence process_function in Reader"
-        self.process = create_module(params['process_function'])(params)
-    def __call__(self, process_id):     
-        def sample_iter_reader():
-            with open(self.label_file_path, "rb") as fin:
-                label_infor_list = fin.readlines()
-            img_num = len(label_infor_list)
-            img_id_list = list(range(img_num))
-            random.shuffle(img_id_list)
-            if sys.platform == "win32" and self.num_workers != 1:
-                print("multiprocess is not fully compatible with Windows."
-                      "num_workers will be 1.")
-                self.num_workers = 1
-            for img_id in range(process_id, img_num, self.num_workers):
-                label_infor = label_infor_list[img_id_list[img_id]]
-                outs = self.process(label_infor)
-                if outs is None:
-                    continue
-                yield outs
-        def sample_iter_reader_mul():
-            batch_size = 1000
-            data_source_list = self.label_file_path
-            batch_size_list = list(map(int, [max(1.0, batch_size * x) for x in self.data_ratio_list]))
-            print(self.data_ratio_list, batch_size_list)
-            data_filename_list, data_size_list, fetch_record_list = [], [], []
-            for data_source in data_source_list:
-                image_files = open(data_source, "rb").readlines()
-                random.shuffle(image_files)
-                data_filename_list.append(image_files)
-                data_size_list.append(len(image_files))
-                fetch_record_list.append(0)
-            image_batch = []
-            # get a batch of img_fns and poly_fns
-            for i in range(0, len(batch_size_list)):
-                bs = batch_size_list[i]
-                ds = data_size_list[i]
-                image_names = data_filename_list[i]
-                fetch_record = fetch_record_list[i]
-                data_path = data_source_list[i]
-                for j in range(fetch_record, fetch_record + bs):
-                    index = j % ds
-                    image_batch.append(image_names[index])
-                if (fetch_record + bs) > ds:
-                    fetch_record_list[i] = 0
-                    random.shuffle(data_filename_list[i])
-                else:
-                    fetch_record_list[i] = fetch_record + bs
-            if sys.platform == "win32":
-                print("multiprocess is not fully compatible with Windows."
-                      "num_workers will be 1.")
-                self.num_workers = 1
-            for label_infor in image_batch:
-                outs = self.process(label_infor)
-                if outs is None:
-                    continue
-                yield outs
-        def batch_iter_reader():
-            batch_outs = []
-            if self.use_mul_data:
-                print("Sample date from multiple datasets!")
-                for outs in sample_iter_reader_mul():
-                    batch_outs.append(outs)
-                    if len(batch_outs) == self.batch_size:
-                        yield batch_outs
-                        batch_outs = []                
-            else:
-                for outs in sample_iter_reader():
-                    batch_outs.append(outs)
-                    if len(batch_outs) == self.batch_size:
-                        yield batch_outs
-                        batch_outs = []
-        return batch_iter_reader
-class EvalTestReader(object):
-    def __init__(self, params):
-        self.params = params
-        assert 'process_function' in params,\
-            "absence process_function in EvalTestReader"
-    def __call__(self, mode):
-        process_function = create_module(self.params['process_function'])(
-            self.params)
-        batch_size = self.params['test_batch_size_per_card']
-        img_list = []
-        if mode != "test":
-            img_set_dir = self.params['img_set_dir']
-            img_name_list_path = self.params['label_file_path']
-            with open(img_name_list_path, "rb") as fin:
-                lines = fin.readlines()
-                for line in lines:
-                    img_name = line.decode().strip("\n").split("\t")[0]
-                    img_path = os.path.join(img_set_dir, img_name)
-                    img_list.append(img_path)
-        else:
-            img_path = self.params['infer_img']
-            img_list = get_image_file_list(img_path)
-        def batch_iter_reader():
-            batch_outs = []
-            for img_path in img_list:
-                img = cv2.imread(img_path)
-                if img is None:
-                    logger.info("{} does not exist!".format(img_path))
-                    continue
-                elif len(list(img.shape)) == 2 or img.shape[2] == 1:
-                    img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
-                outs = process_function(img)
-                outs.append(img_path)
-                batch_outs.append(outs)
-                if len(batch_outs) == batch_size:
-                    yield batch_outs
-                    batch_outs = []
-            if len(batch_outs) != 0:
-                yield batch_outs
-        return batch_iter_reader
--- a/ppocr/data/det/db_process.py
+++ b/ppocr/data/det/db_process.py
-#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
-#
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-import math
-import cv2
-import numpy as np
-import json
-import sys
-from ppocr.utils.utility import initial_logger, check_and_read_gif
-logger = initial_logger()
-from .data_augment import AugmentData
-from .random_crop_data import RandomCropData
-from .make_shrink_map import MakeShrinkMap
-from .make_border_map import MakeBorderMap
-class DBProcessTrain(object):
-    """
-    DB pre-process for Train mode
-    """
-    def __init__(self, params):
-        self.img_set_dir = params['img_set_dir']
-        self.image_shape = params['image_shape']
-    def order_points_clockwise(self, pts):
-        rect = np.zeros((4, 2), dtype="float32")
-        s = pts.sum(axis=1)
-        rect[0] = pts[np.argmin(s)]
-        rect[2] = pts[np.argmax(s)]
-        diff = np.diff(pts, axis=1)
-        rect[1] = pts[np.argmin(diff)]
-        rect[3] = pts[np.argmax(diff)]
-        return rect
-    def make_data_dict(self, imgvalue, entry):
-        boxes = []
-        texts = []
-        ignores = []
-        for rect in entry:
-            points = rect['points']
-            transcription = rect['transcription']
-            try:
-                box = self.order_points_clockwise(
-                    np.array(points).reshape(-1, 2))
-                if cv2.contourArea(box) > 0:
-                    boxes.append(box)
-                    texts.append(transcription)
-                    ignores.append(transcription in ['*', '###'])
-            except:
-                print('load label failed!')
-        data = {
-            'image': imgvalue,
-            'shape': [imgvalue.shape[0], imgvalue.shape[1]],
-            'polys': np.array(boxes),
-            'texts': texts,
-            'ignore_tags': ignores,
-        }
-        return data
-    def NormalizeImage(self, data):
-        im = data['image']
-        img_mean = [0.485, 0.456, 0.406]
-        img_std = [0.229, 0.224, 0.225]
-        im = im.astype(np.float32, copy=False)
-        im = im / 255
-        im -= img_mean
-        im /= img_std
-        channel_swap = (2, 0, 1)
-        im = im.transpose(channel_swap)
-        data['image'] = im
-        return data
-    def FilterKeys(self, data):
-        filter_keys = ['polys', 'texts', 'ignore_tags', 'shape']
-        for key in filter_keys:
-            if key in data:
-                del data[key]
-        return data
-    def convert_label_infor(self, label_infor):
-        label_infor = label_infor.decode()
-        label_infor = label_infor.encode('utf-8').decode('utf-8-sig')
-        substr = label_infor.strip("\n").split("\t")
-        img_path = self.img_set_dir + substr[0]
-        label = json.loads(substr[1])
-        return img_path, label
-    def __call__(self, label_infor):
-        img_path, gt_label = self.convert_label_infor(label_infor)
-        imgvalue, flag = check_and_read_gif(img_path)
-        if not flag:
-            imgvalue = cv2.imread(img_path)
-        if imgvalue is None:
-            logger.info("{} does not exist!".format(img_path))
-            return None
-        if len(list(imgvalue.shape)) == 2 or imgvalue.shape[2] == 1:
-            imgvalue = cv2.cvtColor(imgvalue, cv2.COLOR_GRAY2BGR)
-        data = self.make_data_dict(imgvalue, gt_label)
-        data = AugmentData(data)
-        data = RandomCropData(data, self.image_shape[1:])
-        data = MakeShrinkMap(data)
-        data = MakeBorderMap(data)
-        data = self.NormalizeImage(data)
-        data = self.FilterKeys(data)
-        return data['image'], data['shrink_map'], data['shrink_mask'], data[
-            'threshold_map'], data['threshold_mask']
-class DBProcessTest(object):
-    """
-    DB pre-process for Test mode
-    """
-    def __init__(self, params):
-        super(DBProcessTest, self).__init__()
-        self.resize_type = 0
-        if 'test_image_shape' in params:
-            self.image_shape = params['test_image_shape']
-            # print(self.image_shape)
-            self.resize_type = 1
-        if 'max_side_len' in params:
-            self.max_side_len = params['max_side_len']
-        else:
-            self.max_side_len = 2400
-    def resize_image_type0(self, im):
-        """
-        resize image to a size multiple of 32 which is required by the network
-        args:
-            img(array): array with shape [h, w, c]
-        return(tuple):
-            img, (ratio_h, ratio_w)
-        """
-        max_side_len = self.max_side_len
-        h, w, _ = im.shape
-        resize_w = w
-        resize_h = h
-        # limit the max side
-        if max(resize_h, resize_w) > max_side_len:
-            if resize_h > resize_w:
-                ratio = float(max_side_len) / resize_h
-            else:
-                ratio = float(max_side_len) / resize_w
-        else:
-            ratio = 1.
-        resize_h = int(resize_h * ratio)
-        resize_w = int(resize_w * ratio)
-        if resize_h % 32 == 0:
-            resize_h = resize_h
-        elif resize_h // 32 <= 1:
-            resize_h = 32
-        else:
-            resize_h = (resize_h // 32 - 1) * 32
-        if resize_w % 32 == 0:
-            resize_w = resize_w
-        elif resize_w // 32 <= 1:
-            resize_w = 32
-        else:
-            resize_w = (resize_w // 32 - 1) * 32
-        try:
-            if int(resize_w) <= 0 or int(resize_h) <= 0:
-                return None, (None, None)
-            im = cv2.resize(im, (int(resize_w), int(resize_h)))
-        except:
-            print(im.shape, resize_w, resize_h)
-            sys.exit(0)
-        ratio_h = resize_h / float(h)
-        ratio_w = resize_w / float(w)
-        return im, (ratio_h, ratio_w)
-    def resize_image_type1(self, im):
-        resize_h, resize_w = self.image_shape
-        ori_h, ori_w = im.shape[:2]  # (h, w, c)
-        im = cv2.resize(im, (int(resize_w), int(resize_h)))
-        ratio_h = float(resize_h) / ori_h
-        ratio_w = float(resize_w) / ori_w
-        return im, (ratio_h, ratio_w)
-    def normalize(self, im):
-        img_mean = [0.485, 0.456, 0.406]
-        img_std = [0.229, 0.224, 0.225]
-        im = im.astype(np.float32, copy=False)
-        im = im / 255
-        im[:, :, 0] -= img_mean[0]
-        im[:, :, 1] -= img_mean[1]
-        im[:, :, 2] -= img_mean[2]
-        im[:, :, 0] /= img_std[0]
-        im[:, :, 1] /= img_std[1]
-        im[:, :, 2] /= img_std[2]
-        channel_swap = (2, 0, 1)
-        im = im.transpose(channel_swap)
-        return im
-    def __call__(self, im):
-        if self.resize_type == 0:
-            im, (ratio_h, ratio_w) = self.resize_image_type0(im)
-        else:
-            im, (ratio_h, ratio_w) = self.resize_image_type1(im)
-        im = self.normalize(im)
-        im = im[np.newaxis, :]
-        return [im, (ratio_h, ratio_w)]
--- a/ppocr/data/det/east_process.py
+++ b/ppocr/data/det/east_process.py
--- a/ppocr/data/det/make_border_map.py
+++ b/ppocr/data/det/make_border_map.py
-# -*- coding:utf-8 -*- 
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-from __future__ import unicode_literals
-import numpy as np
-import cv2
-np.seterr(divide='ignore', invalid='ignore')
-import pyclipper
-from shapely.geometry import Polygon
-import sys
-import warnings
-warnings.simplefilter("ignore")
-def draw_border_map(polygon, canvas, mask, shrink_ratio):
-    polygon = np.array(polygon)
-    assert polygon.ndim == 2
-    assert polygon.shape[1] == 2
-    polygon_shape = Polygon(polygon)
-    if polygon_shape.area <= 0:
-        return
-    distance = polygon_shape.area * (
-        1 - np.power(shrink_ratio, 2)) / polygon_shape.length
-    subject = [tuple(l) for l in polygon]
-    padding = pyclipper.PyclipperOffset()
-    padding.AddPath(subject, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON)
-    padded_polygon = np.array(padding.Execute(distance)[0])
-    cv2.fillPoly(mask, [padded_polygon.astype(np.int32)], 1.0)
-    xmin = padded_polygon[:, 0].min()
-    xmax = padded_polygon[:, 0].max()
-    ymin = padded_polygon[:, 1].min()
-    ymax = padded_polygon[:, 1].max()
-    width = xmax - xmin + 1
-    height = ymax - ymin + 1
-    polygon[:, 0] = polygon[:, 0] - xmin
-    polygon[:, 1] = polygon[:, 1] - ymin
-    xs = np.broadcast_to(
-        np.linspace(
-            0, width - 1, num=width).reshape(1, width), (height, width))
-    ys = np.broadcast_to(
-        np.linspace(
-            0, height - 1, num=height).reshape(height, 1), (height, width))
-    distance_map = np.zeros((polygon.shape[0], height, width), dtype=np.float32)
-    for i in range(polygon.shape[0]):
-        j = (i + 1) % polygon.shape[0]
-        absolute_distance = _distance(xs, ys, polygon[i], polygon[j])
-        distance_map[i] = np.clip(absolute_distance / distance, 0, 1)
-    distance_map = distance_map.min(axis=0)
-    xmin_valid = min(max(0, xmin), canvas.shape[1] - 1)
-    xmax_valid = min(max(0, xmax), canvas.shape[1] - 1)
-    ymin_valid = min(max(0, ymin), canvas.shape[0] - 1)
-    ymax_valid = min(max(0, ymax), canvas.shape[0] - 1)
-    canvas[ymin_valid:ymax_valid + 1, xmin_valid:xmax_valid + 1] = np.fmax(
-        1 - distance_map[ymin_valid - ymin:ymax_valid - ymax + height,
-                         xmin_valid - xmin:xmax_valid - xmax + width],
-        canvas[ymin_valid:ymax_valid + 1, xmin_valid:xmax_valid + 1])
-def _distance(xs, ys, point_1, point_2):
-    '''
-    compute the distance from point to a line
-    ys: coordinates in the first axis
-    xs: coordinates in the second axis
-    point_1, point_2: (x, y), the end of the line
-    '''
-    height, width = xs.shape[:2]
-    square_distance_1 = np.square(xs - point_1[0]) + np.square(ys - point_1[1])
-    square_distance_2 = np.square(xs - point_2[0]) + np.square(ys - point_2[1])
-    square_distance = np.square(point_1[0] - point_2[0]) + np.square(point_1[
-        1] - point_2[1])
-    cosin = (square_distance - square_distance_1 - square_distance_2) / (
-        2 * np.sqrt(square_distance_1 * square_distance_2))
-    square_sin = 1 - np.square(cosin)
-    square_sin = np.nan_to_num(square_sin)
-    result = np.sqrt(square_distance_1 * square_distance_2 * square_sin /
-                     square_distance)
-    result[cosin <
-           0] = np.sqrt(np.fmin(square_distance_1, square_distance_2))[cosin <
-                                                                       0]
-    # self.extend_line(point_1, point_2, result)
-    return result
-def extend_line(point_1, point_2, result, shrink_ratio):
-    ex_point_1 = (
-        int(
-            round(point_1[0] + (point_1[0] - point_2[0]) * (1 + shrink_ratio))),
-        int(
-            round(point_1[1] + (point_1[1] - point_2[1]) * (1 + shrink_ratio))))
-    cv2.line(
-        result,
-        tuple(ex_point_1),
-        tuple(point_1),
-        4096.0,
-        1,
-        lineType=cv2.LINE_AA,
-        shift=0)
-    ex_point_2 = (
-        int(
-            round(point_2[0] + (point_2[0] - point_1[0]) * (1 + shrink_ratio))),
-        int(
-            round(point_2[1] + (point_2[1] - point_1[1]) * (1 + shrink_ratio))))
-    cv2.line(
-        result,
-        tuple(ex_point_2),
-        tuple(point_2),
-        4096.0,
-        1,
-        lineType=cv2.LINE_AA,
-        shift=0)
-    return ex_point_1, ex_point_2
-def MakeBorderMap(data):
-    shrink_ratio = 0.4
-    thresh_min = 0.3
-    thresh_max = 0.7
-    im = data['image']
-    text_polys = data['polys']
-    ignore_tags = data['ignore_tags']
-    canvas = np.zeros(im.shape[:2], dtype=np.float32)
-    mask = np.zeros(im.shape[:2], dtype=np.float32)
-    for i in range(len(text_polys)):
-        if ignore_tags[i]:
-            continue
-        draw_border_map(
-            text_polys[i], canvas, mask=mask, shrink_ratio=shrink_ratio)
-    canvas = canvas * (thresh_max - thresh_min) + thresh_min
-    data['threshold_map'] = canvas
-    data['threshold_mask'] = mask
-    return data
--- a/ppocr/data/det/make_shrink_map.py
+++ b/ppocr/data/det/make_shrink_map.py
-# -*- coding:utf-8 -*- 
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-from __future__ import unicode_literals
-import numpy as np
-import cv2
-from shapely.geometry import Polygon
-import pyclipper
-def validate_polygons(polygons, ignore_tags, h, w):
-    '''
-    polygons (numpy.array, required): of shape (num_instances, num_points, 2)
-    '''
-    if len(polygons) == 0:
-        return polygons, ignore_tags
-    assert len(polygons) == len(ignore_tags)
-    for polygon in polygons:
-        polygon[:, 0] = np.clip(polygon[:, 0], 0, w - 1)
-        polygon[:, 1] = np.clip(polygon[:, 1], 0, h - 1)
-    for i in range(len(polygons)):
-        area = polygon_area(polygons[i])
-        if abs(area) < 1:
-            ignore_tags[i] = True
-        if area > 0:
-            polygons[i] = polygons[i][::-1, :]
-    return polygons, ignore_tags
-def polygon_area(polygon):
-    edge = 0
-    for i in range(polygon.shape[0]):
-        next_index = (i + 1) % polygon.shape[0]
-        edge += (polygon[next_index, 0] - polygon[i, 0]) * (
-            polygon[next_index, 1] - polygon[i, 1])
-    return edge / 2.
-def MakeShrinkMap(data):
-    min_text_size = 8
-    shrink_ratio = 0.4
-    image = data['image']
-    text_polys = data['polys']
-    ignore_tags = data['ignore_tags']
-    h, w = image.shape[:2]
-    text_polys, ignore_tags = validate_polygons(text_polys, ignore_tags, h, w)
-    gt = np.zeros((h, w), dtype=np.float32)
-    # gt = np.zeros((1, h, w), dtype=np.float32)
-    mask = np.ones((h, w), dtype=np.float32)
-    for i in range(len(text_polys)):
-        polygon = text_polys[i]
-        height = max(polygon[:, 1]) - min(polygon[:, 1])
-        width = max(polygon[:, 0]) - min(polygon[:, 0])
-        # height = min(np.linalg.norm(polygon[0] - polygon[3]),
-        #             np.linalg.norm(polygon[1] - polygon[2]))
-        # width = min(np.linalg.norm(polygon[0] - polygon[1]),
-        #             np.linalg.norm(polygon[2] - polygon[3]))
-        if ignore_tags[i] or min(height, width) < min_text_size:
-            cv2.fillPoly(mask, polygon.astype(np.int32)[np.newaxis, :, :], 0)
-            ignore_tags[i] = True
-        else:
-            polygon_shape = Polygon(polygon)
-            distance = polygon_shape.area * (
-                1 - np.power(shrink_ratio, 2)) / polygon_shape.length
-            subject = [tuple(l) for l in text_polys[i]]
-            padding = pyclipper.PyclipperOffset()
-            padding.AddPath(subject, pyclipper.JT_ROUND,
-                            pyclipper.ET_CLOSEDPOLYGON)
-            shrinked = padding.Execute(-distance)
-            if shrinked == []:
-                cv2.fillPoly(mask,
-                             polygon.astype(np.int32)[np.newaxis, :, :], 0)
-                ignore_tags[i] = True
-                continue
-            shrinked = np.array(shrinked[0]).reshape(-1, 2)
-            cv2.fillPoly(gt, [shrinked.astype(np.int32)], 1)
-            # cv2.fillPoly(gt[0], [shrinked.astype(np.int32)], 1)
-    data['shrink_map'] = gt
-    data['shrink_mask'] = mask
-    return data
--- a/ppocr/data/det/sast_process.py
+++ b/ppocr/data/det/sast_process.py
--- a/ppocr/data/imaug/__init__.py
+++ b/ppocr/data/imaug/__init__.py
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+from .iaa_augment import IaaAugment
+from .make_border_map import MakeBorderMap
+from .make_shrink_map import MakeShrinkMap
+from .random_crop_data import EastRandomCropData, PSERandomCrop
+from .rec_img_aug import RecAug, RecResizeImg
+from .operators import *
+from .label_ops import *
+def transform(data, ops=None):
+    """ transform """
+    if ops is None:
+        ops = []
+    for op in ops:
+        data = op(data)
+        if data is None:
+            return None
+    return data
+def create_operators(op_param_list, global_config=None):
+    """
+    create operators based on the config
+    Args:
+        params(list): a dict list, used to create some operators
+    """
+    assert isinstance(op_param_list, list), ('operator config should be a list')
+    ops = []
+    for operator in op_param_list:
+        assert isinstance(operator,
+                          dict) and len(operator) == 1, "yaml format error"
+        op_name = list(operator)[0]
+        param = {} if operator[op_name] is None else operator[op_name]
+        if global_config is not None:
+            param.update(global_config)
+        op = eval(op_name)(**param)
+        ops.append(op)
+    return ops
--- a/ppocr/data/imaug/iaa_augment.py
+++ b/ppocr/data/imaug/iaa_augment.py
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+import numpy as np
+import imgaug
+import imgaug.augmenters as iaa
+class AugmenterBuilder(object):
+    def __init__(self):
+        pass
+    def build(self, args, root=True):
+        if args is None or len(args) == 0:
+            return None
+        elif isinstance(args, list):
+            if root:
+                sequence = [self.build(value, root=False) for value in args]
+                return iaa.Sequential(sequence)
+            else:
+                return getattr(iaa, args[0])(
+                    *[self.to_tuple_if_list(a) for a in args[1:]])
+        elif isinstance(args, dict):
+            cls = getattr(iaa, args['type'])
+            return cls(**{
+                k: self.to_tuple_if_list(v)
+                for k, v in args['args'].items()
+            })
+        else:
+            raise RuntimeError('unknown augmenter arg: ' + str(args))
+    def to_tuple_if_list(self, obj):
+        if isinstance(obj, list):
+            return tuple(obj)
+        return obj
+class IaaAugment():
+    def __init__(self, augmenter_args=None, **kwargs):
+        if augmenter_args is None:
+            augmenter_args = [{
+                'type': 'Fliplr',
+                'args': {
+                    'p': 0.5
+                }
+            }, {
+                'type': 'Affine',
+                'args': {
+                    'rotate': [-10, 10]
+                }
+            }, {
+                'type': 'Resize',
+                'args': {
+                    'size': [0.5, 3]
+                }
+            }]
+        self.augmenter = AugmenterBuilder().build(augmenter_args)
+    def __call__(self, data):
+        image = data['image']
+        shape = image.shape
+        if self.augmenter:
+            aug = self.augmenter.to_deterministic()
+            data['image'] = aug.augment_image(image)
+            data = self.may_augment_annotation(aug, data, shape)
+        return data
+    def may_augment_annotation(self, aug, data, shape):
+        if aug is None:
+            return data
+        line_polys = []
+        for poly in data['polys']:
+            new_poly = self.may_augment_poly(aug, shape, poly)
+            line_polys.append(new_poly)
+        data['polys'] = np.array(line_polys)
+        return data
+    def may_augment_poly(self, aug, img_shape, poly):
+        keypoints = [imgaug.Keypoint(p[0], p[1]) for p in poly]
+        keypoints = aug.augment_keypoints(
+            [imgaug.KeypointsOnImage(
+                keypoints, shape=img_shape)])[0].keypoints
+        poly = [(p.x, p.y) for p in keypoints]
+        return poly
--- a/ppocr/data/imaug/label_ops.py
+++ b/ppocr/data/imaug/label_ops.py
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+import numpy as np
+from ppocr.utils.logging import get_logger
+class DetLabelEncode(object):
+    def __init__(self, **kwargs):
+        pass
+    def __call__(self, data):
+        import json
+        label = data['label']
+        label = json.loads(label)
+        nBox = len(label)
+        boxes, txts, txt_tags = [], [], []
+        for bno in range(0, nBox):
+            box = label[bno]['points']
+            txt = label[bno]['transcription']
+            boxes.append(box)
+            txts.append(txt)
+            if txt in ['*', '###']:
+                txt_tags.append(True)
+            else:
+                txt_tags.append(False)
+        boxes = np.array(boxes, dtype=np.float32)
+        txt_tags = np.array(txt_tags, dtype=np.bool)
+        data['polys'] = boxes
+        data['texts'] = txts
+        data['ignore_tags'] = txt_tags
+        return data
+    def order_points_clockwise(self, pts):
+        rect = np.zeros((4, 2), dtype="float32")
+        s = pts.sum(axis=1)
+        rect[0] = pts[np.argmin(s)]
+        rect[2] = pts[np.argmax(s)]
+        diff = np.diff(pts, axis=1)
+        rect[1] = pts[np.argmin(diff)]
+        rect[3] = pts[np.argmax(diff)]
+        return rect
+class BaseRecLabelEncode(object):
+    """ Convert between text-label and text-index """
+    def __init__(self,
+                 max_text_length,
+                 character_dict_path=None,
+                 character_type='ch',
+                 use_space_char=False):
+        support_character_type = ['ch', 'en', 'en_sensitive']
+        assert character_type in support_character_type, "Only {} are supported now but get {}".format(
+            support_character_type, self.character_str)
+        self.max_text_len = max_text_length
+        if character_type == "en":
+            self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz"
+            dict_character = list(self.character_str)
+        elif character_type == "ch":
+            self.character_str = ""
+            assert character_dict_path is not None, "character_dict_path should not be None when character_type is ch"
+            with open(character_dict_path, "rb") as fin:
+                lines = fin.readlines()
+                for line in lines:
+                    line = line.decode('utf-8').strip("\n").strip("\r\n")
+                    self.character_str += line
+            if use_space_char:
+                self.character_str += " "
+            dict_character = list(self.character_str)
+        elif character_type == "en_sensitive":
+            # same with ASTER setting (use 94 char).
+            import string
+            self.character_str = string.printable[:-6]
+            dict_character = list(self.character_str)
+        self.character_type = character_type
+        dict_character = self.add_special_char(dict_character)
+        self.dict = {}
+        for i, char in enumerate(dict_character):
+            self.dict[char] = i
+        self.character = dict_character
+    def add_special_char(self, dict_character):
+        return dict_character
+    def encode(self, text):
+        """convert text-label into text-index.
+        input:
+            text: text labels of each image. [batch_size]
+        output:
+            text: concatenated text index for CTCLoss.
+                    [sum(text_lengths)] = [text_index_0 + text_index_1 + ... + text_index_(n - 1)]
+            length: length of each text. [batch_size]
+        """
+        if len(text) > self.max_text_len:
+            return None
+        if self.character_type == "en":
+            text = text.lower()
+        text_list = []
+        for char in text:
+            if char not in self.dict:
+                # logger = get_logger()
+                # logger.warning('{} is not in dict'.format(char))
+                continue
+            text_list.append(self.dict[char])
+        if len(text_list) == 0:
+            return None
+        return text_list
+    def get_ignored_tokens(self):
+        return [0]  # for ctc blank
+class CTCLabelEncode(BaseRecLabelEncode):
+    """ Convert between text-label and text-index """
+    def __init__(self,
+                 max_text_length,
+                 character_dict_path=None,
+                 character_type='ch',
+                 use_space_char=False,
+                 **kwargs):
+        super(CTCLabelEncode,
+              self).__init__(max_text_length, character_dict_path,
+                             character_type, use_space_char)
+    def __call__(self, data):
+        text = data['label']
+        text = self.encode(text)
+        if text is None:
+            return None
+        data['length'] = np.array(len(text))
+        text = text + [0] * (self.max_text_len - len(text))
+        data['label'] = np.array(text)
+        return data
+    def add_special_char(self, dict_character):
+        dict_character = ['blank'] + dict_character
+        return dict_character
+class AttnLabelEncode(BaseRecLabelEncode):
+    """ Convert between text-label and text-index """
+    def __init__(self,
+                 max_text_length,
+                 character_dict_path=None,
+                 character_type='ch',
+                 use_space_char=False,
+                 **kwargs):
+        super(AttnLabelEncode,
+              self).__init__(max_text_length, character_dict_path,
+                             character_type, use_space_char)
+        self.beg_str = "sos"
+        self.end_str = "eos"
+    def add_special_char(self, dict_character):
+        dict_character = [self.beg_str, self.end_str] + dict_character
+        return dict_character
+    def __call__(self, text):
+        text = self.encode(text)
+        return text
+    def get_ignored_tokens(self):
+        beg_idx = self.get_beg_end_flag_idx("beg")
+        end_idx = self.get_beg_end_flag_idx("end")
+        return [beg_idx, end_idx]
+    def get_beg_end_flag_idx(self, beg_or_end):
+        if beg_or_end == "beg":
+            idx = np.array(self.dict[self.beg_str])
+        elif beg_or_end == "end":
+            idx = np.array(self.dict[self.end_str])
+        else:
+            assert False, "Unsupport type %s in get_beg_end_flag_idx" \
+                          % beg_or_end
+        return idx
--- a/ppocr/data/imaug/make_border_map.py
+++ b/ppocr/data/imaug/make_border_map.py
+# -*- coding:utf-8 -*- 
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+import numpy as np
+import cv2
+np.seterr(divide='ignore', invalid='ignore')
+import pyclipper
+from shapely.geometry import Polygon
+import sys
+import warnings
+warnings.simplefilter("ignore")
+__all__ = ['MakeBorderMap']
+class MakeBorderMap(object):
+    def __init__(self,
+                 shrink_ratio=0.4,
+                 thresh_min=0.3,
+                 thresh_max=0.7,
+                 **kwargs):
+        self.shrink_ratio = shrink_ratio
+        self.thresh_min = thresh_min
+        self.thresh_max = thresh_max
+    def __call__(self, data: dict) -> dict:
+        img = data['image']
+        text_polys = data['polys']
+        ignore_tags = data['ignore_tags']
+        canvas = np.zeros(img.shape[:2], dtype=np.float32)
+        mask = np.zeros(img.shape[:2], dtype=np.float32)
+        for i in range(len(text_polys)):
+            if ignore_tags[i]:
+                continue
+            self.draw_border_map(text_polys[i], canvas, mask=mask)
+        canvas = canvas * (self.thresh_max - self.thresh_min) + self.thresh_min
+        data['threshold_map'] = canvas
+        data['threshold_mask'] = mask
+        return data
+    def draw_border_map(self, polygon, canvas, mask):
+        polygon = np.array(polygon)
+        assert polygon.ndim == 2
+        assert polygon.shape[1] == 2
+        polygon_shape = Polygon(polygon)
+        if polygon_shape.area <= 0:
+            return
+        distance = polygon_shape.area * (
+            1 - np.power(self.shrink_ratio, 2)) / polygon_shape.length
+        subject = [tuple(l) for l in polygon]
+        padding = pyclipper.PyclipperOffset()
+        padding.AddPath(subject, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON)
+        padded_polygon = np.array(padding.Execute(distance)[0])
+        cv2.fillPoly(mask, [padded_polygon.astype(np.int32)], 1.0)
+        xmin = padded_polygon[:, 0].min()
+        xmax = padded_polygon[:, 0].max()
+        ymin = padded_polygon[:, 1].min()
+        ymax = padded_polygon[:, 1].max()
+        width = xmax - xmin + 1
+        height = ymax - ymin + 1
+        polygon[:, 0] = polygon[:, 0] - xmin
+        polygon[:, 1] = polygon[:, 1] - ymin
+        xs = np.broadcast_to(
+            np.linspace(
+                0, width - 1, num=width).reshape(1, width), (height, width))
+        ys = np.broadcast_to(
+            np.linspace(
+                0, height - 1, num=height).reshape(height, 1), (height, width))
+        distance_map = np.zeros(
+            (polygon.shape[0], height, width), dtype=np.float32)
+        for i in range(polygon.shape[0]):
+            j = (i + 1) % polygon.shape[0]
+            absolute_distance = self._distance(xs, ys, polygon[i], polygon[j])
+            distance_map[i] = np.clip(absolute_distance / distance, 0, 1)
+        distance_map = distance_map.min(axis=0)
+        xmin_valid = min(max(0, xmin), canvas.shape[1] - 1)
+        xmax_valid = min(max(0, xmax), canvas.shape[1] - 1)
+        ymin_valid = min(max(0, ymin), canvas.shape[0] - 1)
+        ymax_valid = min(max(0, ymax), canvas.shape[0] - 1)
+        canvas[ymin_valid:ymax_valid + 1, xmin_valid:xmax_valid + 1] = np.fmax(
+            1 - distance_map[ymin_valid - ymin:ymax_valid - ymax + height,
+                             xmin_valid - xmin:xmax_valid - xmax + width],
+            canvas[ymin_valid:ymax_valid + 1, xmin_valid:xmax_valid + 1])
+    def _distance(self, xs, ys, point_1, point_2):
+        '''
+        compute the distance from point to a line
+        ys: coordinates in the first axis
+        xs: coordinates in the second axis
+        point_1, point_2: (x, y), the end of the line
+        '''
+        height, width = xs.shape[:2]
+        square_distance_1 = np.square(xs - point_1[0]) + np.square(ys - point_1[
+            1])
+        square_distance_2 = np.square(xs - point_2[0]) + np.square(ys - point_2[
+            1])
+        square_distance = np.square(point_1[0] - point_2[0]) + np.square(
+            point_1[1] - point_2[1])
+        cosin = (square_distance - square_distance_1 - square_distance_2) / (
+            2 * np.sqrt(square_distance_1 * square_distance_2))
+        square_sin = 1 - np.square(cosin)
+        square_sin = np.nan_to_num(square_sin)
+        result = np.sqrt(square_distance_1 * square_distance_2 * square_sin /
+                         square_distance)
+        result[cosin <
+               0] = np.sqrt(np.fmin(square_distance_1, square_distance_2))[cosin
+                                                                           < 0]
+        # self.extend_line(point_1, point_2, result)
+        return result
+    def extend_line(self, point_1, point_2, result, shrink_ratio):
+        ex_point_1 = (int(
+            round(point_1[0] + (point_1[0] - point_2[0]) * (1 + shrink_ratio))),
+                      int(
+                          round(point_1[1] + (point_1[1] - point_2[1]) * (
+                              1 + shrink_ratio))))
+        cv2.line(
+            result,
+            tuple(ex_point_1),
+            tuple(point_1),
+            4096.0,
+            1,
+            lineType=cv2.LINE_AA,
+            shift=0)
+        ex_point_2 = (int(
+            round(point_2[0] + (point_2[0] - point_1[0]) * (1 + shrink_ratio))),
+                      int(
+                          round(point_2[1] + (point_2[1] - point_1[1]) * (
+                              1 + shrink_ratio))))
+        cv2.line(
+            result,
+            tuple(ex_point_2),
+            tuple(point_2),
+            4096.0,
+            1,
+            lineType=cv2.LINE_AA,
+            shift=0)
+        return ex_point_1, ex_point_2
--- a/ppocr/data/imaug/make_shrink_map.py
+++ b/ppocr/data/imaug/make_shrink_map.py
+# -*- coding:utf-8 -*- 
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+import numpy as np
+import cv2
+from shapely.geometry import Polygon
+import pyclipper
+__all__ = ['MakeShrinkMap']
+class MakeShrinkMap(object):
+    r'''
+    Making binary mask from detection data with ICDAR format.
+    Typically following the process of class `MakeICDARData`.
+    '''
+    def __init__(self, min_text_size=8, shrink_ratio=0.4, **kwargs):
+        self.min_text_size = min_text_size
+        self.shrink_ratio = shrink_ratio
+    def __call__(self, data):
+        image = data['image']
+        text_polys = data['polys']
+        ignore_tags = data['ignore_tags']
+        h, w = image.shape[:2]
+        text_polys, ignore_tags = self.validate_polygons(text_polys,
+                                                         ignore_tags, h, w)
+        gt = np.zeros((h, w), dtype=np.float32)
+        # gt = np.zeros((1, h, w), dtype=np.float32)
+        mask = np.ones((h, w), dtype=np.float32)
+        for i in range(len(text_polys)):
+            polygon = text_polys[i]
+            height = max(polygon[:, 1]) - min(polygon[:, 1])
+            width = max(polygon[:, 0]) - min(polygon[:, 0])
+            if ignore_tags[i] or min(height, width) < self.min_text_size:
+                cv2.fillPoly(mask,
+                             polygon.astype(np.int32)[np.newaxis, :, :], 0)
+                ignore_tags[i] = True
+            else:
+                polygon_shape = Polygon(polygon)
+                distance = polygon_shape.area * (
+                    1 - np.power(self.shrink_ratio, 2)) / polygon_shape.length
+                subject = [tuple(l) for l in text_polys[i]]
+                padding = pyclipper.PyclipperOffset()
+                padding.AddPath(subject, pyclipper.JT_ROUND,
+                                pyclipper.ET_CLOSEDPOLYGON)
+                shrinked = padding.Execute(-distance)
+                if shrinked == []:
+                    cv2.fillPoly(mask,
+                                 polygon.astype(np.int32)[np.newaxis, :, :], 0)
+                    ignore_tags[i] = True
+                    continue
+                shrinked = np.array(shrinked[0]).reshape(-1, 2)
+                cv2.fillPoly(gt, [shrinked.astype(np.int32)], 1)
+                # cv2.fillPoly(gt[0], [shrinked.astype(np.int32)], 1)
+        data['shrink_map'] = gt
+        data['shrink_mask'] = mask
+        return data
+    def validate_polygons(self, polygons, ignore_tags, h, w):
+        '''
+        polygons (numpy.array, required): of shape (num_instances, num_points, 2)
+        '''
+        if len(polygons) == 0:
+            return polygons, ignore_tags
+        assert len(polygons) == len(ignore_tags)
+        for polygon in polygons:
+            polygon[:, 0] = np.clip(polygon[:, 0], 0, w - 1)
+            polygon[:, 1] = np.clip(polygon[:, 1], 0, h - 1)
+        for i in range(len(polygons)):
+            area = self.polygon_area(polygons[i])
+            if abs(area) < 1:
+                ignore_tags[i] = True
+            if area > 0:
+                polygons[i] = polygons[i][::-1, :]
+        return polygons, ignore_tags
+    def polygon_area(self, polygon):
+        # return cv2.contourArea(polygon.astype(np.float32))
+        edge = 0
+        for i in range(polygon.shape[0]):
+            next_index = (i + 1) % polygon.shape[0]
+            edge += (polygon[next_index, 0] - polygon[i, 0]) * (
+                polygon[next_index, 1] - polygon[i, 1])
+        return edge / 2.
--- a/ppocr/data/imaug/operators.py
+++ b/ppocr/data/imaug/operators.py
+"""
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+import sys
+import six
+import cv2
+import numpy as np
+class DecodeImage(object):
+    """ decode image """
+    def __init__(self, img_mode='RGB', channel_first=False, **kwargs):
+        self.img_mode = img_mode
+        self.channel_first = channel_first
+    def __call__(self, data):
+        img = data['image']
+        if six.PY2:
+            assert type(img) is str and len(
+                img) > 0, "invalid input 'img' in DecodeImage"
+        else:
+            assert type(img) is bytes and len(
+                img) > 0, "invalid input 'img' in DecodeImage"
+        img = np.frombuffer(img, dtype='uint8')
+        img = cv2.imdecode(img, 1)
+        if self.img_mode == 'GRAY':
+            img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
+        elif self.img_mode == 'RGB':
+            assert img.shape[2] == 3, 'invalid shape of image[%s]' % (img.shape)
+            img = img[:, :, ::-1]
+        if self.channel_first:
+            img = img.transpose((2, 0, 1))
+        data['image'] = img
+        return data
+class NormalizeImage(object):
+    """ normalize image such as substract mean, divide std
+    """
+    def __init__(self, scale=None, mean=None, std=None, order='chw', **kwargs):
+        if isinstance(scale, str):
+            scale = eval(scale)
+        self.scale = np.float32(scale if scale is not None else 1.0 / 255.0)
+        mean = mean if mean is not None else [0.485, 0.456, 0.406]
+        std = std if std is not None else [0.229, 0.224, 0.225]
+        shape = (3, 1, 1) if order == 'chw' else (1, 1, 3)
+        self.mean = np.array(mean).reshape(shape).astype('float32')
+        self.std = np.array(std).reshape(shape).astype('float32')
+    def __call__(self, data):
+        img = data['image']
+        from PIL import Image
+        if isinstance(img, Image.Image):
+            img = np.array(img)
+        assert isinstance(img,
+                          np.ndarray), "invalid input 'img' in NormalizeImage"
+        data['image'] = (
+            img.astype('float32') * self.scale - self.mean) / self.std
+        return data
+class ToCHWImage(object):
+    """ convert hwc image to chw image
+    """
+    def __init__(self, **kwargs):
+        pass
+    def __call__(self, data):
+        img = data['image']
+        from PIL import Image
+        if isinstance(img, Image.Image):
+            img = np.array(img)
+        data['image'] = img.transpose((2, 0, 1))
+        return data
+class keepKeys(object):
+    def __init__(self, keep_keys, **kwargs):
+        self.keep_keys = keep_keys
+    def __call__(self, data):
+        data_list = []
+        for key in self.keep_keys:
+            data_list.append(data[key])
+        return data_list
+class DetResizeForTest(object):
+    def __init__(self, **kwargs):
+        super(DetResizeForTest, self).__init__()
+        self.resize_type = 0
+        if 'image_shape' in kwargs:
+            self.image_shape = kwargs['image_shape']
+            self.resize_type = 1
+        if 'limit_side_len' in kwargs:
+            self.limit_side_len = kwargs['limit_side_len']
+            self.limit_type = kwargs.get('limit_type', 'min')
+        else:
+            self.limit_side_len = 736
+            self.limit_type = 'min'
+    def __call__(self, data):
+        img = data['image']
+        if self.resize_type == 0:
+            img, shape = self.resize_image_type0(img)
+        else:
+            img, shape = self.resize_image_type1(img)
+        data['image'] = img
+        data['shape'] = shape
+        return data
+    def resize_image_type1(self, img):
+        resize_h, resize_w = self.image_shape
+        ori_h, ori_w = img.shape[:2]  # (h, w, c)
+        img = cv2.resize(img, (int(resize_w), int(resize_h)))
+        return img, np.array([ori_h, ori_w])
+    def resize_image_type0(self, img):
+        """
+        resize image to a size multiple of 32 which is required by the network
+        args:
+            img(array): array with shape [h, w, c]
+        return(tuple):
+            img, (ratio_h, ratio_w)
+        """
+        limit_side_len = self.limit_side_len
+        h, w, _ = img.shape
+        # limit the max side
+        if self.limit_type == 'max':
+            if max(h, w) > limit_side_len:
+                if h > w:
+                    ratio = float(limit_side_len) / h
+                else:
+                    ratio = float(limit_side_len) / w
+            else:
+                ratio = 1.
+        else:
+            if min(h, w) < limit_side_len:
+                if h < w:
+                    ratio = float(limit_side_len) / h
+                else:
+                    ratio = float(limit_side_len) / w
+            else:
+                ratio = 1.
+        resize_h = int(h * ratio)
+        resize_w = int(w * ratio)
+        resize_h = int(round(resize_h / 32) * 32)
+        resize_w = int(round(resize_w / 32) * 32)
+        try:
+            if int(resize_w) <= 0 or int(resize_h) <= 0:
+                return None, (None, None)
+            img = cv2.resize(img, (int(resize_w), int(resize_h)))
+        except:
+            print(img.shape, resize_w, resize_h)
+            sys.exit(0)
+        return img, np.array([h, w])
--- a/ppocr/data/det/random_crop_data.py
+++ b/ppocr/data/det/random_crop_data.py
@@ -108,48 +108,103 @@ def crop_area(im, text_polys, min_crop_side_ratio, max_tries):
    return 0, 0, w, h
-def RandomCropData(data, size):
+class EastRandomCropData(object):
-    max_tries = 10
+    def __init__(self,
-    min_crop_side_ratio = 0.1
+                 size=(640, 640),
-    require_original_image = False
+                 max_tries=10,
-    keep_ratio = True
+                 min_crop_side_ratio=0.1,
+                 keep_ratio=True,
-    im = data['image']
+                 **kwargs):
-    text_polys = data['polys']
+        self.size = size
-    ignore_tags = data['ignore_tags']
+        self.max_tries = max_tries
-    texts = data['texts']
+        self.min_crop_side_ratio = min_crop_side_ratio
-    all_care_polys = [
+        self.keep_ratio = keep_ratio
-        text_polys[i] for i, tag in enumerate(ignore_tags) if not tag
-    ]
+    def __call__(self, data):
-    # 计算crop区域
+        img = data['image']
-    crop_x, crop_y, crop_w, crop_h = crop_area(im, all_care_polys,
+        text_polys = data['polys']
-                                               min_crop_side_ratio, max_tries)
+        ignore_tags = data['ignore_tags']
-    # crop 图片 保持比例填充
+        texts = data['texts']
-    scale_w = size[0] / crop_w
+        all_care_polys = [
-    scale_h = size[1] / crop_h
+            text_polys[i] for i, tag in enumerate(ignore_tags) if not tag
-    scale = min(scale_w, scale_h)
+        ]
-    h = int(crop_h * scale)
+        # 计算crop区域
-    w = int(crop_w * scale)
+        crop_x, crop_y, crop_w, crop_h = crop_area(
-    if keep_ratio:
+            img, all_care_polys, self.min_crop_side_ratio, self.max_tries)
-        padimg = np.zeros((size[1], size[0], im.shape[2]), im.dtype)
+        # crop 图片 保持比例填充
-        padimg[:h, :w] = cv2.resize(
+        scale_w = self.size[0] / crop_w
-            im[crop_y:crop_y + crop_h, crop_x:crop_x + crop_w], (w, h))
+        scale_h = self.size[1] / crop_h
-        img = padimg
+        scale = min(scale_w, scale_h)
-    else:
+        h = int(crop_h * scale)
-        img = cv2.resize(im[crop_y:crop_y + crop_h, crop_x:crop_x + crop_w],
+        w = int(crop_w * scale)
-                         tuple(size))
+        if self.keep_ratio:
-    # crop 文本框
+            padimg = np.zeros((self.size[1], self.size[0], img.shape[2]),
-    text_polys_crop = []
+                              img.dtype)
-    ignore_tags_crop = []
+            padimg[:h, :w] = cv2.resize(
-    texts_crop = []
+                img[crop_y:crop_y + crop_h, crop_x:crop_x + crop_w], (w, h))
-    for poly, text, tag in zip(text_polys, texts, ignore_tags):
+            img = padimg
-        poly = ((poly - (crop_x, crop_y)) * scale).tolist()
+        else:
-        if not is_poly_outside_rect(poly, 0, 0, w, h):
+            img = cv2.resize(
-            text_polys_crop.append(poly)
+                img[crop_y:crop_y + crop_h, crop_x:crop_x + crop_w],
-            ignore_tags_crop.append(tag)
+                tuple(self.size))
-            texts_crop.append(text)
+        # crop 文本框
-    data['image'] = img
+        text_polys_crop = []
-    data['polys'] = np.array(text_polys_crop)
+        ignore_tags_crop = []
-    data['ignore_tags'] = ignore_tags_crop
+        texts_crop = []
-    data['texts'] = texts_crop
+        for poly, text, tag in zip(text_polys, texts, ignore_tags):
-    return data
+            poly = ((poly - (crop_x, crop_y)) * scale).tolist()
+            if not is_poly_outside_rect(poly, 0, 0, w, h):
+                text_polys_crop.append(poly)
+                ignore_tags_crop.append(tag)
+                texts_crop.append(text)
+        data['image'] = img
+        data['polys'] = np.array(text_polys_crop)
+        data['ignore_tags'] = ignore_tags_crop
+        data['texts'] = texts_crop
+        return data
+class PSERandomCrop(object):
+    def __init__(self, size, **kwargs):
+        self.size = size
+    def __call__(self, data):
+        imgs = data['imgs']
+        h, w = imgs[0].shape[0:2]
+        th, tw = self.size
+        if w == tw and h == th:
+            return imgs
+        # label中存在文本实例，并且按照概率进行裁剪，使用threshold_label_map控制
+        if np.max(imgs[2]) > 0 and random.random() > 3 / 8:
+            # 文本实例的左上角点
+            tl = np.min(np.where(imgs[2] > 0), axis=1) - self.size
+            tl[tl < 0] = 0
+            # 文本实例的右下角点
+            br = np.max(np.where(imgs[2] > 0), axis=1) - self.size
+            br[br < 0] = 0
+            # 保证选到右下角点时，有足够的距离进行crop
+            br[0] = min(br[0], h - th)
+            br[1] = min(br[1], w - tw)
+            for _ in range(50000):
+                i = random.randint(tl[0], br[0])
+                j = random.randint(tl[1], br[1])
+                # 保证shrink_label_map有文本
+                if imgs[1][i:i + th, j:j + tw].sum() <= 0:
+                    continue
+                else:
+                    break
+        else:
+            i = random.randint(0, h - th)
+            j = random.randint(0, w - tw)
+        # return i, j, th, tw
+        for idx in range(len(imgs)):
+            if len(imgs[idx].shape) == 3:
+                imgs[idx] = imgs[idx][i:i + th, j:j + tw, :]
+            else:
+                imgs[idx] = imgs[idx][i:i + th, j:j + tw]
+        data['imgs'] = imgs
+        return data
--- a/ppocr/data/rec/img_tools.py
+++ b/ppocr/data/rec/img_tools.py
--- a/ppocr/data/rec/__init__.py
+++ b/ppocr/data/rec/__init__.py
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
-#     http://www.apache.org/licenses/LICENSE-2.0
+#    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from .augment import tia_perspective, tia_distort, tia_stretch
+__all__ = ['tia_distort', 'tia_stretch', 'tia_perspective']
--- a/ppocr/data/imaug/text_image_aug/augment.py
+++ b/ppocr/data/imaug/text_image_aug/augment.py
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import numpy as np
+from .warp_mls import WarpMLS
+def tia_distort(src, segment=4):
+    img_h, img_w = src.shape[:2]
+    cut = img_w // segment
+    thresh = cut // 3
+    src_pts = list()
+    dst_pts = list()
+    src_pts.append([0, 0])
+    src_pts.append([img_w, 0])
+    src_pts.append([img_w, img_h])
+    src_pts.append([0, img_h])
+    dst_pts.append([np.random.randint(thresh), np.random.randint(thresh)])
+    dst_pts.append(
+        [img_w - np.random.randint(thresh), np.random.randint(thresh)])
+    dst_pts.append(
+        [img_w - np.random.randint(thresh), img_h - np.random.randint(thresh)])
+    dst_pts.append(
+        [np.random.randint(thresh), img_h - np.random.randint(thresh)])
+    half_thresh = thresh * 0.5
+    for cut_idx in np.arange(1, segment, 1):
+        src_pts.append([cut * cut_idx, 0])
+        src_pts.append([cut * cut_idx, img_h])
+        dst_pts.append([
+            cut * cut_idx + np.random.randint(thresh) - half_thresh,
+            np.random.randint(thresh) - half_thresh
+        ])
+        dst_pts.append([
+            cut * cut_idx + np.random.randint(thresh) - half_thresh,
+            img_h + np.random.randint(thresh) - half_thresh
+        ])
+    trans = WarpMLS(src, src_pts, dst_pts, img_w, img_h)
+    dst = trans.generate()
+    return dst
+def tia_stretch(src, segment=4):
+    img_h, img_w = src.shape[:2]
+    cut = img_w // segment
+    thresh = cut * 4 // 5
+    src_pts = list()
+    dst_pts = list()
+    src_pts.append([0, 0])
+    src_pts.append([img_w, 0])
+    src_pts.append([img_w, img_h])
+    src_pts.append([0, img_h])
+    dst_pts.append([0, 0])
+    dst_pts.append([img_w, 0])
+    dst_pts.append([img_w, img_h])
+    dst_pts.append([0, img_h])
+    half_thresh = thresh * 0.5
+    for cut_idx in np.arange(1, segment, 1):
+        move = np.random.randint(thresh) - half_thresh
+        src_pts.append([cut * cut_idx, 0])
+        src_pts.append([cut * cut_idx, img_h])
+        dst_pts.append([cut * cut_idx + move, 0])
+        dst_pts.append([cut * cut_idx + move, img_h])
+    trans = WarpMLS(src, src_pts, dst_pts, img_w, img_h)
+    dst = trans.generate()
+    return dst
+def tia_perspective(src):
+    img_h, img_w = src.shape[:2]
+    thresh = img_h // 2
+    src_pts = list()
+    dst_pts = list()
+    src_pts.append([0, 0])
+    src_pts.append([img_w, 0])
+    src_pts.append([img_w, img_h])
+    src_pts.append([0, img_h])
+    dst_pts.append([0, np.random.randint(thresh)])
+    dst_pts.append([img_w, np.random.randint(thresh)])
+    dst_pts.append([img_w, img_h - np.random.randint(thresh)])
+    dst_pts.append([0, img_h - np.random.randint(thresh)])
+    trans = WarpMLS(src, src_pts, dst_pts, img_w, img_h)
+    dst = trans.generate()
+    return dst
\ No newline at end of file
--- a/ppocr/data/imaug/text_image_aug/warp_mls.py
+++ b/ppocr/data/imaug/text_image_aug/warp_mls.py
--- a/ppocr/data/reader_main.py
+++ b/ppocr/data/reader_main.py
--- a/ppocr/data/rec/dataset_traversal.py
+++ b/ppocr/data/rec/dataset_traversal.py
--- a/ppocr/metrics/DetMetric.py
+++ b/ppocr/metrics/DetMetric.py
--- a/ppocr/metrics/RecMetric.py
+++ b/ppocr/metrics/RecMetric.py
--- a/ppocr/metrics/__init__.py
+++ b/ppocr/metrics/__init__.py
--- a/tools/eval_utils/eval_det_iou.py
+++ b/tools/eval_utils/eval_det_iou.py
--- a/ppocr/modeling/__init__.py
+++ b/ppocr/modeling/__init__.py
--- a/ppocr/modeling/architectures/__init__.py
+++ b/ppocr/modeling/architectures/__init__.py
--- a/ppocr/modeling/architectures/det_model.py
+++ b/ppocr/modeling/architectures/det_model.py
--- a/ppocr/modeling/architectures/model.py
+++ b/ppocr/modeling/architectures/model.py
--- a/ppocr/modeling/architectures/rec_model.py
+++ b/ppocr/modeling/architectures/rec_model.py
--- a/ppocr/modeling/backbones/__init__.py
+++ b/ppocr/modeling/backbones/__init__.py
--- a/ppocr/modeling/backbones/det_mobilenet_v3.py
+++ b/ppocr/modeling/backbones/det_mobilenet_v3.py
--- a/ppocr/modeling/backbones/det_resnet_vd.py
+++ b/ppocr/modeling/backbones/det_resnet_vd.py
--- a/ppocr/modeling/backbones/det_resnet_vd_sast.py
+++ b/ppocr/modeling/backbones/det_resnet_vd_sast.py
--- a/ppocr/modeling/backbones/rec_mobilenet_v3.py
+++ b/ppocr/modeling/backbones/rec_mobilenet_v3.py
--- a/ppocr/modeling/backbones/rec_resnet_fpn.py
+++ b/ppocr/modeling/backbones/rec_resnet_fpn.py
--- a/ppocr/modeling/backbones/rec_resnet_vd.py
+++ b/ppocr/modeling/backbones/rec_resnet_vd.py
--- a/ppocr/modeling/common_functions.py
+++ b/ppocr/modeling/common_functions.py
--- a/ppocr/modeling/heads/__init__.py
+++ b/ppocr/modeling/heads/__init__.py
--- a/ppocr/modeling/heads/det_db_head.py
+++ b/ppocr/modeling/heads/det_db_head.py
--- a/ppocr/modeling/heads/det_east_head.py
+++ b/ppocr/modeling/heads/det_east_head.py
--- a/ppocr/modeling/heads/det_sast_head.py
+++ b/ppocr/modeling/heads/det_sast_head.py
--- a/ppocr/modeling/heads/rec_attention_head.py
+++ b/ppocr/modeling/heads/rec_attention_head.py
--- a/ppocr/modeling/heads/rec_ctc_head.py
+++ b/ppocr/modeling/heads/rec_ctc_head.py
--- a/ppocr/modeling/heads/rec_seq_encoder.py
+++ b/ppocr/modeling/heads/rec_seq_encoder.py
--- a/ppocr/modeling/heads/rec_srn_all_head.py
+++ b/ppocr/modeling/heads/rec_srn_all_head.py
--- a/ppocr/modeling/heads/self_attention/__init__.py
+++ b/ppocr/modeling/heads/self_attention/__init__.py
--- a/ppocr/modeling/heads/self_attention/model.py
+++ b/ppocr/modeling/heads/self_attention/model.py
--- a/ppocr/modeling/losses/__init__.py
+++ b/ppocr/modeling/losses/__init__.py
--- a/ppocr/modeling/losses/det_basic_loss.py
+++ b/ppocr/modeling/losses/det_basic_loss.py
--- a/ppocr/modeling/losses/det_db_loss.py
+++ b/ppocr/modeling/losses/det_db_loss.py
--- a/ppocr/modeling/losses/det_east_loss.py
+++ b/ppocr/modeling/losses/det_east_loss.py
--- a/ppocr/modeling/losses/det_sast_loss.py
+++ b/ppocr/modeling/losses/det_sast_loss.py
--- a/ppocr/modeling/losses/rec_attention_loss.py
+++ b/ppocr/modeling/losses/rec_attention_loss.py
--- a/ppocr/modeling/losses/rec_ctc_loss.py
+++ b/ppocr/modeling/losses/rec_ctc_loss.py
--- a/ppocr/modeling/losses/rec_srn_loss.py
+++ b/ppocr/modeling/losses/rec_srn_loss.py
--- a/tools/eval_utils/__init__.py
+++ b/tools/eval_utils/__init__.py
--- a/ppocr/modeling/necks/fpn.py
+++ b/ppocr/modeling/necks/fpn.py
--- a/ppocr/modeling/necks/rnn.py
+++ b/ppocr/modeling/necks/rnn.py
--- a/ppocr/modeling/stns/tps.py
+++ b/ppocr/modeling/stns/tps.py
--- a/ppocr/modeling/stns/__init__.py
+++ b/ppocr/modeling/stns/__init__.py
--- a/ppocr/optimizer.py
+++ b/ppocr/optimizer.py
--- a/ppocr/optimizer/__init__.py
+++ b/ppocr/optimizer/__init__.py
--- a/ppocr/optimizer/learning_rate.py
+++ b/ppocr/optimizer/learning_rate.py
--- a/ppocr/optimizer/optimizer.py
+++ b/ppocr/optimizer/optimizer.py
--- a/ppocr/optimizer/regularizer.py
+++ b/ppocr/optimizer/regularizer.py
--- a/ppocr/postprocess/__init__.py
+++ b/ppocr/postprocess/__init__.py
--- a/ppocr/postprocess/db_postprocess.py
+++ b/ppocr/postprocess/db_postprocess.py
--- a/ppocr/postprocess/db_postprocess_torch.py
+++ b/ppocr/postprocess/db_postprocess_torch.py
--- a/ppocr/postprocess/east_postprocess.py
+++ b/ppocr/postprocess/east_postprocess.py
--- a/ppocr/postprocess/lanms/.gitignore
+++ b/ppocr/postprocess/lanms/.gitignore
-adaptor.so
--- a/ppocr/postprocess/lanms/.ycm_extra_conf.py
+++ b/ppocr/postprocess/lanms/.ycm_extra_conf.py
--- a/ppocr/postprocess/lanms/Makefile
+++ b/ppocr/postprocess/lanms/Makefile
--- a/ppocr/postprocess/lanms/__init__.py
+++ b/ppocr/postprocess/lanms/__init__.py
--- a/ppocr/postprocess/lanms/__main__.py
+++ b/ppocr/postprocess/lanms/__main__.py
--- a/ppocr/postprocess/lanms/adaptor.cpp
+++ b/ppocr/postprocess/lanms/adaptor.cpp
--- a/ppocr/postprocess/lanms/include/clipper/clipper.cpp
+++ b/ppocr/postprocess/lanms/include/clipper/clipper.cpp
--- a/ppocr/postprocess/lanms/include/clipper/clipper.hpp
+++ b/ppocr/postprocess/lanms/include/clipper/clipper.hpp
--- a/ppocr/postprocess/lanms/include/pybind11/attr.h
+++ b/ppocr/postprocess/lanms/include/pybind11/attr.h
--- a/ppocr/postprocess/lanms/include/pybind11/buffer_info.h
+++ b/ppocr/postprocess/lanms/include/pybind11/buffer_info.h
--- a/ppocr/postprocess/lanms/include/pybind11/cast.h
+++ b/ppocr/postprocess/lanms/include/pybind11/cast.h
--- a/ppocr/postprocess/lanms/include/pybind11/chrono.h
+++ b/ppocr/postprocess/lanms/include/pybind11/chrono.h
--- a/ppocr/postprocess/lanms/include/pybind11/class_support.h
+++ b/ppocr/postprocess/lanms/include/pybind11/class_support.h
--- a/ppocr/postprocess/lanms/include/pybind11/common.h
+++ b/ppocr/postprocess/lanms/include/pybind11/common.h
--- a/ppocr/postprocess/lanms/include/pybind11/complex.h
+++ b/ppocr/postprocess/lanms/include/pybind11/complex.h
--- a/ppocr/postprocess/lanms/include/pybind11/descr.h
+++ b/ppocr/postprocess/lanms/include/pybind11/descr.h
--- a/ppocr/postprocess/lanms/include/pybind11/eigen.h
+++ b/ppocr/postprocess/lanms/include/pybind11/eigen.h
--- a/ppocr/postprocess/lanms/include/pybind11/embed.h
+++ b/ppocr/postprocess/lanms/include/pybind11/embed.h
--- a/ppocr/postprocess/lanms/include/pybind11/eval.h
+++ b/ppocr/postprocess/lanms/include/pybind11/eval.h
--- a/ppocr/postprocess/lanms/include/pybind11/functional.h
+++ b/ppocr/postprocess/lanms/include/pybind11/functional.h
--- a/ppocr/postprocess/lanms/include/pybind11/numpy.h
+++ b/ppocr/postprocess/lanms/include/pybind11/numpy.h
--- a/ppocr/postprocess/lanms/include/pybind11/operators.h
+++ b/ppocr/postprocess/lanms/include/pybind11/operators.h
--- a/ppocr/postprocess/lanms/include/pybind11/options.h
+++ b/ppocr/postprocess/lanms/include/pybind11/options.h
--- a/ppocr/postprocess/lanms/include/pybind11/pybind11.h
+++ b/ppocr/postprocess/lanms/include/pybind11/pybind11.h
--- a/ppocr/postprocess/lanms/include/pybind11/pytypes.h
+++ b/ppocr/postprocess/lanms/include/pybind11/pytypes.h
--- a/ppocr/postprocess/lanms/include/pybind11/stl.h
+++ b/ppocr/postprocess/lanms/include/pybind11/stl.h
--- a/ppocr/postprocess/lanms/include/pybind11/stl_bind.h
+++ b/ppocr/postprocess/lanms/include/pybind11/stl_bind.h
--- a/ppocr/postprocess/lanms/include/pybind11/typeid.h
+++ b/ppocr/postprocess/lanms/include/pybind11/typeid.h
--- a/ppocr/postprocess/lanms/lanms.h
+++ b/ppocr/postprocess/lanms/lanms.h
--- a/ppocr/postprocess/locality_aware_nms.py
+++ b/ppocr/postprocess/locality_aware_nms.py
--- a/ppocr/postprocess/rec_postprocess.py
+++ b/ppocr/postprocess/rec_postprocess.py
--- a/ppocr/postprocess/sast_postprocess.py
+++ b/ppocr/postprocess/sast_postprocess.py
--- a/ppocr/utils/check.py
+++ b/ppocr/utils/check.py
--- a/ppocr/utils/logging.py
+++ b/ppocr/utils/logging.py
--- a/ppocr/utils/save_load.py
+++ b/ppocr/utils/save_load.py
--- a/ppocr/utils/stats.py
+++ b/ppocr/utils/stats.py
--- a/ppocr/utils/utility.py
+++ b/ppocr/utils/utility.py
--- a/requirments.txt
+++ b/requirments.txt
--- a/setup.py
+++ b/setup.py
--- a/tools/eval.py
+++ b/tools/eval.py
--- a/tools/eval_utils/eval_det_utils.py
+++ b/tools/eval_utils/eval_det_utils.py
--- a/tools/eval_utils/eval_rec_utils.py
+++ b/tools/eval_utils/eval_rec_utils.py
--- a/tools/export_model.py
+++ b/tools/export_model.py
--- a/tools/infer/predict_det.py
+++ b/tools/infer/predict_det.py
--- a/tools/infer/utility.py
+++ b/tools/infer/utility.py
--- a/tools/infer_det.py
+++ b/tools/infer_det.py
--- a/tools/infer_rec.py
+++ b/tools/infer_rec.py
--- a/tools/program.py
+++ b/tools/program.py
--- a/tools/train.py
+++ b/tools/train.py
--- a/train.sh
+++ b/train.sh