fix conflicts

e7ad27c3 · LDOUBLEV · c0b4cefd · 91f5ab5c · e7ad27c3 · e7ad27c3
55 changed file
--- a/configs/det/det_mv3_db.yml
+++ b/configs/det/det_mv3_db.yml
@@ -45,9 +45,7 @@ Optimizer:
  beta1: 0.9
  beta2: 0.999
  lr:
-#    name: Cosine
    learning_rate: 0.001
-#    warmup_epoch: 0
  regularizer:
    name: 'L2'
    factor: 0

--- a/configs/det/det_mv3_east.yml
+++ b/configs/det/det_mv3_east.yml
+Global:
+  use_gpu: true
+  epoch_num: 10000
+  log_smooth_window: 20
+  print_batch_step: 2
+  save_model_dir: ./output/east_mv3/
+  save_epoch_step: 1000
+  # evaluation is run every 5000 iterations after the 4000th iteration
+  eval_batch_step: [4000, 5000]
+  # if pretrained_model is saved in static mode, load_static_weights must set to True
+  load_static_weights: True
+  cal_metric_during_train: False
+  pretrained_model: ./pretrain_models/MobileNetV3_large_x0_5_pretrained
+  checkpoints: 
+  save_inference_dir:
+  use_visualdl: False
+  infer_img: 
+  save_res_path: ./output/det_east/predicts_east.txt
+Architecture:
+  model_type: det
+  algorithm: EAST
+  Transform:
+  Backbone:
+    name: MobileNetV3
+    scale: 0.5
+    model_name: large
+  Neck:
+    name: EASTFPN
+    model_name: small
+  Head:
+    name: EASTHead
+    model_name: small
+Loss:
+  name: EASTLoss
+Optimizer:
+  name: Adam
+  beta1: 0.9
+  beta2: 0.999
+  lr:
+  #  name: Cosine
+    learning_rate: 0.001
+  #  warmup_epoch: 0
+  regularizer:
+    name: 'L2'
+    factor: 0
+PostProcess:
+  name: EASTPostProcess
+  score_thresh: 0.8
+  cover_thresh: 0.1
+  nms_thresh: 0.2
+Metric:
+  name: DetMetric
+  main_indicator: hmean
+Train:
+  dataset:
+    name: SimpleDataSet
+    data_dir: ./train_data/icdar2015/text_localization/
+    label_file_list:
+      - ./train_data/icdar2015/text_localization/train_icdar2015_label.txt
+    ratio_list: [1.0]
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - DetLabelEncode: # Class handling label
+      - EASTProcessTrain:
+          image_shape: [512, 512]
+          background_ratio: 0.125
+          min_crop_side_ratio: 0.1
+          min_text_size: 10
+      - KeepKeys:
+          keep_keys: ['image', 'score_map', 'geo_map', 'training_mask'] # dataloader will return list in this order
+  loader:
+    shuffle: True
+    drop_last: False
+    batch_size_per_card: 16
+    num_workers: 8
+Eval:
+  dataset:
+    name: SimpleDataSet
+    data_dir: ./train_data/icdar2015/text_localization/
+    label_file_list:
+      - ./train_data/icdar2015/text_localization/test_icdar2015_label.txt
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - DetLabelEncode: # Class handling label
+      - DetResizeForTest:
+          limit_side_len: 2400
+          limit_type: max
+      - NormalizeImage:
+          scale: 1./255.
+          mean: [0.485, 0.456, 0.406]
+          std: [0.229, 0.224, 0.225]
+          order: 'hwc'
+      - ToCHWImage:
+      - KeepKeys:
+          keep_keys: ['image', 'shape', 'polys', 'ignore_tags']
+  loader:
+    shuffle: False
+    drop_last: False
+    batch_size_per_card: 1 # must be 1
+    num_workers: 2
\ No newline at end of file
--- a/configs/det/det_r50_vd_db.yml
+++ b/configs/det/det_r50_vd_db.yml
+Global:
+  use_gpu: true
+  epoch_num: 1200
+  log_smooth_window: 20
+  print_batch_step: 10
+  save_model_dir: ./output/det_rc/det_r50_vd/
+  save_epoch_step: 1200
+  # evaluation is run every 5000 iterations after the 4000th iteration
+  eval_batch_step: [5000,4000]
+  # if pretrained_model is saved in static mode, load_static_weights must set to True
+  load_static_weights: True
+  cal_metric_during_train: False
+  pretrained_model: ./pretrain_models/ResNet50_vd_ssld_pretrained
+  checkpoints:
+  save_inference_dir:
+  use_visualdl: False
+  infer_img: doc/imgs_en/img_10.jpg
+  save_res_path: ./output/det_db/predicts_db.txt
+Architecture:
+  model_type: det
+  algorithm: DB
+  Transform:
+  Backbone:
+    name: ResNet
+    layers: 50
+  Neck:
+    name: DBFPN
+    out_channels: 256
+  Head:
+    name: DBHead
+    k: 50
+Loss:
+  name: DBLoss
+  balance_loss: true
+  main_loss_type: DiceLoss
+  alpha: 5
+  beta: 10
+  ohem_ratio: 3
+Optimizer:
+  name: Adam
+  beta1: 0.9
+  beta2: 0.999
+  lr:
+    learning_rate: 0.001
+  regularizer:
+    name: 'L2'
+    factor: 0
+PostProcess:
+  name: DBPostProcess
+  thresh: 0.3
+  box_thresh: 0.7
+  max_candidates: 1000
+  unclip_ratio: 1.5
+Metric:
+  name: DetMetric
+  main_indicator: hmean
+Train:
+  dataset:
+    name: SimpleDataSet
+    data_dir: ./train_data/icdar2015/text_localization/
+    label_file_list:
+      - ./train_data/icdar2015/text_localization/train_icdar2015_label.txt
+    ratio_list: [0.5]
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - DetLabelEncode: # Class handling label
+      - IaaAugment:
+          augmenter_args:
+            - { 'type': Fliplr, 'args': { 'p': 0.5 } }
+            - { 'type': Affine, 'args': { 'rotate': [-10, 10] } }
+            - { 'type': Resize, 'args': { 'size': [0.5, 3] } }
+      - EastRandomCropData:
+          size: [640, 640]
+          max_tries: 50
+          keep_ratio: true
+      - MakeBorderMap:
+          shrink_ratio: 0.4
+          thresh_min: 0.3
+          thresh_max: 0.7
+      - MakeShrinkMap:
+          shrink_ratio: 0.4
+          min_text_size: 8
+      - NormalizeImage:
+          scale: 1./255.
+          mean: [0.485, 0.456, 0.406]
+          std: [0.229, 0.224, 0.225]
+          order: 'hwc'
+      - ToCHWImage:
+      - KeepKeys:
+          keep_keys: ['image', 'threshold_map', 'threshold_mask', 'shrink_map', 'shrink_mask'] # the order of the dataloader list
+  loader:
+    shuffle: True
+    drop_last: False
+    batch_size_per_card: 16
+    num_workers: 8
+Eval:
+  dataset:
+    name: SimpleDataSet
+    data_dir: ./train_data/icdar2015/text_localization/
+    label_file_list:
+      - ./train_data/icdar2015/text_localization/test_icdar2015_label.txt
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - DetLabelEncode: # Class handling label
+      - DetResizeForTest:
+          image_shape: [736, 1280]
+      - NormalizeImage:
+          scale: 1./255.
+          mean: [0.485, 0.456, 0.406]
+          std: [0.229, 0.224, 0.225]
+          order: 'hwc'
+      - ToCHWImage:
+      - KeepKeys:
+          keep_keys: ['image', 'shape', 'polys', 'ignore_tags']
+  loader:
+    shuffle: False
+    drop_last: False
+    batch_size_per_card: 1 # must be 1
+    num_workers: 8
\ No newline at end of file
--- a/configs/det/det_r50_vd_east.yml
+++ b/configs/det/det_r50_vd_east.yml
+Global:
+  use_gpu: true
+  epoch_num: 10000
+  log_smooth_window: 20
+  print_batch_step: 2
+  save_model_dir: ./output/east_r50_vd/
+  save_epoch_step: 1000
+  # evaluation is run every 5000 iterations after the 4000th iteration
+  eval_batch_step: [4000, 5000]
+  # if pretrained_model is saved in static mode, load_static_weights must set to True
+  load_static_weights: True
+  cal_metric_during_train: False
+  pretrained_model: ./pretrain_models/ResNet50_vd_pretrained/
+  checkpoints: 
+  save_inference_dir:
+  use_visualdl: False
+  infer_img: 
+  save_res_path: ./output/det_east/predicts_east.txt
+Architecture:
+  model_type: det
+  algorithm: EAST
+  Transform:
+  Backbone:
+    name: ResNet
+    layers: 50
+  Neck:
+    name: EASTFPN
+    model_name: large
+  Head:
+    name: EASTHead
+    model_name: large
+Loss:
+  name: EASTLoss
+Optimizer:
+  name: Adam
+  beta1: 0.9
+  beta2: 0.999
+  lr:
+  #  name: Cosine
+    learning_rate: 0.001
+  #  warmup_epoch: 0
+  regularizer:
+    name: 'L2'
+    factor: 0
+PostProcess:
+  name: EASTPostProcess
+  score_thresh: 0.8
+  cover_thresh: 0.1
+  nms_thresh: 0.2
+Metric:
+  name: DetMetric
+  main_indicator: hmean
+Train:
+  dataset:
+    name: SimpleDataSet
+    data_dir: ./train_data/icdar2015/text_localization/
+    label_file_list:
+      - ./train_data/icdar2015/text_localization/train_icdar2015_label.txt
+    ratio_list: [1.0]
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - DetLabelEncode: # Class handling label
+      - EASTProcessTrain:
+          image_shape: [512, 512]
+          background_ratio: 0.125
+          min_crop_side_ratio: 0.1
+          min_text_size: 10
+      - KeepKeys:
+          keep_keys: ['image', 'score_map', 'geo_map', 'training_mask'] # dataloader will return list in this order
+  loader:
+    shuffle: True
+    drop_last: False
+    batch_size_per_card: 8
+    num_workers: 8
+Eval:
+  dataset:
+    name: SimpleDataSet
+    data_dir: ./train_data/icdar2015/text_localization/
+    label_file_list:
+      - ./train_data/icdar2015/text_localization/test_icdar2015_label.txt
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - DetLabelEncode: # Class handling label
+      - DetResizeForTest:
+          limit_side_len: 2400
+          limit_type: max
+      - NormalizeImage:
+          scale: 1./255.
+          mean: [0.485, 0.456, 0.406]
+          std: [0.229, 0.224, 0.225]
+          order: 'hwc'
+      - ToCHWImage:
+      - KeepKeys:
+          keep_keys: ['image', 'shape', 'polys', 'ignore_tags']
+  loader:
+    shuffle: False
+    drop_last: False
+    batch_size_per_card: 1 # must be 1
+    num_workers: 2
\ No newline at end of file
--- a/configs/det/det_r50_vd_sast_icdar15.yml
+++ b/configs/det/det_r50_vd_sast_icdar15.yml
+Global:
+  use_gpu: true
+  epoch_num: 5000
+  log_smooth_window: 20
+  print_batch_step: 2
+  save_model_dir: ./output/sast_r50_vd_ic15/
+  save_epoch_step: 1000
+  # evaluation is run every 5000 iterations after the 4000th iteration
+  eval_batch_step: [4000, 5000]
+  # if pretrained_model is saved in static mode, load_static_weights must set to True
+  load_static_weights: True
+  cal_metric_during_train: False
+  pretrained_model: ./pretrain_models/ResNet50_vd_ssld_pretrained/
+  checkpoints: 
+  save_inference_dir:
+  use_visualdl: False
+  infer_img: 
+  save_res_path: ./output/sast_r50_vd_ic15/predicts_sast.txt
+Architecture:
+  model_type: det
+  algorithm: SAST
+  Transform:
+  Backbone:
+    name: ResNet_SAST
+    layers: 50
+  Neck:
+    name: SASTFPN
+    with_cab: True
+  Head:
+    name: SASTHead
+Loss:
+  name: SASTLoss
+Optimizer:
+  name: Adam
+  beta1: 0.9
+  beta2: 0.999
+  lr:
+  #  name: Cosine
+    learning_rate: 0.001
+  #  warmup_epoch: 0
+  regularizer:
+    name: 'L2'
+    factor: 0
+PostProcess:
+  name: SASTPostProcess
+  score_thresh: 0.5
+  sample_pts_num: 2
+  nms_thresh: 0.2
+  expand_scale: 1.0
+  shrink_ratio_of_width: 0.3
+Metric:
+  name: DetMetric
+  main_indicator: hmean
+Train:
+  dataset:
+    name: SimpleDataSet
+    data_dir: ./train_data/
+    label_file_path: [./train_data/art_latin_icdar_14pt/train_no_tt_test/train_label_json.txt, ./train_data/total_text_icdar_14pt/train_label_json.txt]
+    data_ratio_list: [0.5, 0.5]
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - DetLabelEncode: # Class handling label
+      - SASTProcessTrain:
+          image_shape: [512, 512]
+          min_crop_side_ratio: 0.3
+          min_crop_size: 24
+          min_text_size: 4
+          max_text_size: 512
+      - KeepKeys:
+          keep_keys: ['image', 'score_map', 'border_map', 'training_mask', 'tvo_map', 'tco_map'] # dataloader will return list in this order
+  loader:
+    shuffle: True
+    drop_last: False
+    batch_size_per_card: 4
+    num_workers: 4
+Eval:
+  dataset:
+    name: SimpleDataSet
+    data_dir: ./train_data/icdar2015/text_localization/
+    label_file_list:
+      - ./train_data/icdar2015/text_localization/test_icdar2015_label.txt
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - DetLabelEncode: # Class handling label
+      - DetResizeForTest:
+          resize_long: 1536
+      - NormalizeImage:
+          scale: 1./255.
+          mean: [0.485, 0.456, 0.406]
+          std: [0.229, 0.224, 0.225]
+          order: 'hwc'
+      - ToCHWImage:
+      - KeepKeys:
+          keep_keys: ['image', 'shape', 'polys', 'ignore_tags']
+  loader:
+    shuffle: False
+    drop_last: False
+    batch_size_per_card: 1 # must be 1
+    num_workers: 2
\ No newline at end of file
--- a/configs/det/det_r50_vd_sast_totaltext.yml
+++ b/configs/det/det_r50_vd_sast_totaltext.yml
+Global:
+  use_gpu: true
+  epoch_num: 5000
+  log_smooth_window: 20
+  print_batch_step: 2
+  save_model_dir: ./output/sast_r50_vd_tt/
+  save_epoch_step: 1000
+  # evaluation is run every 5000 iterations after the 4000th iteration
+  eval_batch_step: [4000, 5000]
+  # if pretrained_model is saved in static mode, load_static_weights must set to True
+  load_static_weights: True
+  cal_metric_during_train: False
+  pretrained_model: ./pretrain_models/ResNet50_vd_ssld_pretrained/
+  checkpoints: 
+  save_inference_dir:
+  use_visualdl: False
+  infer_img: 
+  save_res_path: ./output/sast_r50_vd_tt/predicts_sast.txt
+Architecture:
+  model_type: det
+  algorithm: SAST
+  Transform:
+  Backbone:
+    name: ResNet_SAST
+    layers: 50
+  Neck:
+    name: SASTFPN
+    with_cab: True
+  Head:
+    name: SASTHead
+Loss:
+  name: SASTLoss
+Optimizer:
+  name: Adam
+  beta1: 0.9
+  beta2: 0.999
+  lr:
+  #  name: Cosine
+    learning_rate: 0.001
+  #  warmup_epoch: 0
+  regularizer:
+    name: 'L2'
+    factor: 0
+PostProcess:
+  name: SASTPostProcess
+  score_thresh: 0.5
+  sample_pts_num: 6
+  nms_thresh: 0.2
+  expand_scale: 1.2
+  shrink_ratio_of_width: 0.2
+Metric:
+  name: DetMetric
+  main_indicator: hmean
+Train:
+  dataset:
+    name: SimpleDataSet
+    label_file_list: [./train_data/icdar2013/train_label_json.txt, ./train_data/icdar2015/train_label_json.txt, ./train_data/icdar17_mlt_latin/train_label_json.txt, ./train_data/coco_text_icdar_4pts/train_label_json.txt]
+    ratio_list: [0.1, 0.45, 0.3, 0.15]
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - DetLabelEncode: # Class handling label
+      - SASTProcessTrain:
+          image_shape: [512, 512]
+          min_crop_side_ratio: 0.3
+          min_crop_size: 24
+          min_text_size: 4
+          max_text_size: 512
+      - KeepKeys:
+          keep_keys: ['image', 'score_map', 'border_map', 'training_mask', 'tvo_map', 'tco_map'] # dataloader will return list in this order
+  loader:
+    shuffle: True
+    drop_last: False
+    batch_size_per_card: 4
+    num_workers: 4
+Eval:
+  dataset:
+    name: SimpleDataSet
+    data_dir: ./train_data/
+    label_file_list: 
+        - ./train_data/total_text_icdar_14pt/test_label_json.txt
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - DetLabelEncode: # Class handling label
+      - DetResizeForTest:
+          resize_long: 768
+      - NormalizeImage:
+          scale: 1./255.
+          mean: [0.485, 0.456, 0.406]
+          std: [0.229, 0.224, 0.225]
+          order: 'hwc'
+      - ToCHWImage:
+      - KeepKeys:
+          keep_keys: ['image', 'shape', 'polys', 'ignore_tags']
+  loader:
+    shuffle: False
+    drop_last: False
+    batch_size_per_card: 1 # must be 1
+    num_workers: 2
\ No newline at end of file
--- a/configs/rec/rec_mv3_none_bilstm_ctc.yml
+++ b/configs/rec/rec_mv3_none_bilstm_ctc.yml
@@ -5,7 +5,7 @@ Global:
  print_batch_step: 10
  save_model_dir: ./output/rec/mv3_none_bilstm_ctc/
  save_epoch_step: 3
-  # evaluation is run every 5000 iterations after the 4000th iteration
+  # evaluation is run every 2000 iterations
  eval_batch_step: [0, 2000]
  # if pretrained_model is saved in static mode, load_static_weights must set to True
  cal_metric_during_train: True
@@ -13,7 +13,7 @@ Global:
  checkpoints:
  save_inference_dir:
  use_visualdl: False
-  infer_img: doc/imgs_words/ch/word_1.jpg
+  infer_img: doc/imgs_words_en/word_10.png
  # for data or label process
  character_dict_path: 
  character_type: en
@@ -21,7 +21,6 @@ Global:
  infer_mode: False
  use_space_char: False
 Optimizer:
  name: Adam
  beta1: 0.9

--- a/configs/rec/rec_mv3_none_none_ctc.yml
+++ b/configs/rec/rec_mv3_none_none_ctc.yml
+Global:
+  use_gpu: True
+  epoch_num: 72
+  log_smooth_window: 20
+  print_batch_step: 10
+  save_model_dir: ./output/rec/mv3_none_none_ctc/
+  save_epoch_step: 3
+  # evaluation is run every 2000 iterations
+  eval_batch_step: [0, 2000]
+  # if pretrained_model is saved in static mode, load_static_weights must set to True
+  cal_metric_during_train: True
+  pretrained_model:
+  checkpoints:
+  save_inference_dir:
+  use_visualdl: False
+  infer_img: doc/imgs_words_en/word_10.png
+  # for data or label process
+  character_dict_path:
+  character_type: en
+  max_text_length: 25
+  infer_mode: False
+  use_space_char: False
+Optimizer:
+  name: Adam
+  beta1: 0.9
+  beta2: 0.999
+  lr:
+    learning_rate: 0.0005
+  regularizer:
+    name: 'L2'
+    factor: 0
+Architecture:
+  model_type: rec
+  algorithm: Rosetta
+  Transform:
+  Backbone:
+    name: MobileNetV3
+    scale: 0.5
+    model_name: large
+  Neck:
+    name: SequenceEncoder
+    encoder_type: reshape
+  Head:
+    name: CTCHead
+    fc_decay: 0.0004
+Loss:
+  name: CTCLoss
+PostProcess:
+  name: CTCLabelDecode
+Metric:
+  name: RecMetric
+  main_indicator: acc
+Train:
+  dataset:
+    name: LMDBDateSet
+    data_dir: ./train_data/data_lmdb_release/training/
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - CTCLabelEncode: # Class handling label
+      - RecResizeImg:
+          image_shape: [3, 32, 100]
+      - KeepKeys:
+          keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
+  loader:
+    shuffle: False
+    batch_size_per_card: 256
+    drop_last: True
+    num_workers: 8
+Eval:
+  dataset:
+    name: LMDBDateSet
+    data_dir: ./train_data/data_lmdb_release/validation/
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - CTCLabelEncode: # Class handling label
+      - RecResizeImg:
+          image_shape: [3, 32, 100]
+      - KeepKeys:
+          keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
+  loader:
+    shuffle: False
+    drop_last: False
+    batch_size_per_card: 256
+    num_workers: 8
--- a/configs/rec/rec_mv3_tps_bilstm_ctc.yml
+++ b/configs/rec/rec_mv3_tps_bilstm_ctc.yml
+Global:
+  use_gpu: true
+  epoch_num: 72
+  log_smooth_window: 20
+  print_batch_step: 10
+  save_model_dir: ./output/rec/mv3_tps_bilstm_ctc/
+  save_epoch_step: 3
+  # evaluation is run every 2000 iterations
+  eval_batch_step: [0, 2000]
+  # if pretrained_model is saved in static mode, load_static_weights must set to True
+  cal_metric_during_train: True
+  pretrained_model:
+  checkpoints:
+  save_inference_dir:
+  use_visualdl: False
+  infer_img: doc/imgs_words_en/word_10.png
+  # for data or label process
+  character_dict_path:
+  character_type: en
+  max_text_length: 25
+  infer_mode: False
+  use_space_char: False
+Optimizer:
+  name: Adam
+  beta1: 0.9
+  beta2: 0.999
+  lr:
+    learning_rate: 0.0005
+  regularizer:
+    name: 'L2'
+    factor: 0
+Architecture:
+  model_type: rec
+  algorithm: STARNet
+  Transform:
+    name: TPS
+    num_fiducial: 20
+    loc_lr: 0.1
+    model_name: small
+  Backbone:
+    name: MobileNetV3
+    scale: 0.5
+    model_name: large
+  Neck:
+    name: SequenceEncoder
+    encoder_type: rnn
+    hidden_size: 96
+  Head:
+    name: CTCHead
+    fc_decay: 0.0004
+Loss:
+  name: CTCLoss
+PostProcess:
+  name: CTCLabelDecode
+Metric:
+  name: RecMetric
+  main_indicator: acc
+Train:
+  dataset:
+    name: LMDBDateSet
+    data_dir: ./train_data/data_lmdb_release/training/
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - CTCLabelEncode: # Class handling label
+      - RecResizeImg:
+          image_shape: [3, 32, 100]
+      - KeepKeys:
+          keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
+  loader:
+    shuffle: False
+    batch_size_per_card: 256
+    drop_last: True
+    num_workers: 8
+Eval:
+  dataset:
+    name: LMDBDateSet
+    data_dir: ./train_data/data_lmdb_release/validation/
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - CTCLabelEncode: # Class handling label
+      - RecResizeImg:
+          image_shape: [3, 32, 100]
+      - KeepKeys:
+          keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
+  loader:
+    shuffle: False
+    drop_last: False
+    batch_size_per_card: 256
+    num_workers: 4
--- a/configs/rec/rec_r34_vd_none_bilstm_ctc.yml
+++ b/configs/rec/rec_r34_vd_none_bilstm_ctc.yml
@@ -5,7 +5,7 @@ Global:
  print_batch_step: 10
  save_model_dir: ./output/rec/r34_vd_none_bilstm_ctc/
  save_epoch_step: 3
-  # evaluation is run every 5000 iterations after the 4000th iteration
+  # evaluation is run every 2000 iterations
  eval_batch_step: [0, 2000]
  # if pretrained_model is saved in static mode, load_static_weights must set to True
  cal_metric_during_train: True
@@ -13,7 +13,7 @@ Global:
  checkpoints:
  save_inference_dir:
  use_visualdl: False
-  infer_img: doc/imgs_words/ch/word_1.jpg
+  infer_img: doc/imgs_words_en/word_10.png
  # for data or label process
  character_dict_path: 
  character_type: en
@@ -21,7 +21,6 @@ Global:
  infer_mode: False
  use_space_char: False
 Optimizer:
  name: Adam
  beta1: 0.9
@@ -71,7 +70,7 @@ Train:
      - KeepKeys:
          keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
  loader:
-    shuffle: False
+    shuffle: True
    batch_size_per_card: 256
    drop_last: True
    num_workers: 8

--- a/configs/rec/rec_r34_vd_none_none_ctc.yml
+++ b/configs/rec/rec_r34_vd_none_none_ctc.yml
+Global:
+  use_gpu: true
+  epoch_num: 72
+  log_smooth_window: 20
+  print_batch_step: 10
+  save_model_dir: ./output/rec/r34_vd_none_none_ctc/
+  save_epoch_step: 3
+  # evaluation is run every 2000 iterations
+  eval_batch_step: [0, 2000]
+  # if pretrained_model is saved in static mode, load_static_weights must set to True
+  cal_metric_during_train: True
+  pretrained_model:
+  checkpoints:
+  save_inference_dir:
+  use_visualdl: False
+  infer_img: doc/imgs_words_en/word_10.png
+  # for data or label process
+  character_dict_path:
+  character_type: en
+  max_text_length: 25
+  infer_mode: False
+  use_space_char: False
+Optimizer:
+  name: Adam
+  beta1: 0.9
+  beta2: 0.999
+  lr:
+    learning_rate: 0.0005
+  regularizer:
+    name: 'L2'
+    factor: 0
+Architecture:
+  model_type: rec
+  algorithm: Rosetta
+  Backbone:
+    name: ResNet
+    layers: 34
+  Neck:
+    name: SequenceEncoder
+    encoder_type: reshape
+  Head:
+    name: CTCHead
+    fc_decay: 0.0004
+Loss:
+  name: CTCLoss
+PostProcess:
+  name: CTCLabelDecode
+Metric:
+  name: RecMetric
+  main_indicator: acc
+Train:
+  dataset:
+    name: LMDBDateSet
+    data_dir: ./train_data/data_lmdb_release/training/
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - CTCLabelEncode: # Class handling label
+      - RecResizeImg:
+          image_shape: [3, 32, 100]
+      - KeepKeys:
+          keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
+  loader:
+    shuffle: True
+    batch_size_per_card: 256
+    drop_last: True
+    num_workers: 8
+Eval:
+  dataset:
+    name: LMDBDateSet
+    data_dir: ./train_data/data_lmdb_release/validation/
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - CTCLabelEncode: # Class handling label
+      - RecResizeImg:
+          image_shape: [3, 32, 100]
+      - KeepKeys:
+          keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
+  loader:
+    shuffle: False
+    drop_last: False
+    batch_size_per_card: 256
+    num_workers: 4
--- a/configs/rec/rec_r34_vd_tps_bilstm_ctc.yml
+++ b/configs/rec/rec_r34_vd_tps_bilstm_ctc.yml
@@ -5,7 +5,7 @@ Global:
  print_batch_step: 10
  save_model_dir: ./output/rec/r34_vd_tps_bilstm_ctc/
  save_epoch_step: 3
-  # evaluation is run every 5000 iterations after the 4000th iteration
+  # evaluation is run every 2000 iterations
  eval_batch_step: [0, 2000]
  # if pretrained_model is saved in static mode, load_static_weights must set to True
  cal_metric_during_train: True
@@ -13,7 +13,7 @@ Global:
  checkpoints:
  save_inference_dir:
  use_visualdl: False
-  infer_img: doc/imgs_words/ch/word_1.jpg
+  infer_img: doc/imgs_words_en/word_10.png
  # for data or label process
  character_dict_path: 
  character_type: en
@@ -21,7 +21,6 @@ Global:
  infer_mode: False
  use_space_char: False
 Optimizer:
  name: Adam
  beta1: 0.9
@@ -34,7 +33,7 @@ Optimizer:
 Architecture:
  model_type: rec
-  algorithm: CRNN
+  algorithm: STARNet
  Transform:
    name: TPS
    num_fiducial: 20

--- a/deploy/hubserving/ocr_cls/__init__.py
+++ b/deploy/hubserving/ocr_cls/__init__.py
--- a/deploy/hubserving/ocr_cls/config.json
+++ b/deploy/hubserving/ocr_cls/config.json
+{
+    "modules_info": {
+        "ocr_cls": {
+            "init_args": {
+                "version": "1.0.0",
+                "use_gpu": true
+            },
+            "predict_args": {
+            }
+        }
+    },
+    "port": 8866,
+    "use_multiprocess": false,
+    "workers": 2
+}
--- a/deploy/hubserving/ocr_cls/module.py
+++ b/deploy/hubserving/ocr_cls/module.py
+# -*- coding:utf-8 -*-
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import os
+import sys
+sys.path.insert(0, ".")
+from paddlehub.common.logger import logger
+from paddlehub.module.module import moduleinfo, runnable, serving
+import cv2
+import paddlehub as hub
+from tools.infer.utility import base64_to_cv2
+from tools.infer.predict_cls import TextClassifier
+@moduleinfo(
+    name="ocr_cls",
+    version="1.0.0",
+    summary="ocr recognition service",
+    author="paddle-dev",
+    author_email="paddle-dev@baidu.com",
+    type="cv/text_recognition")
+class OCRCls(hub.Module):
+    def _initialize(self, use_gpu=False, enable_mkldnn=False):
+        """
+        initialize with the necessary elements
+        """
+        from ocr_cls.params import read_params
+        cfg = read_params()
+        cfg.use_gpu = use_gpu
+        if use_gpu:
+            try:
+                _places = os.environ["CUDA_VISIBLE_DEVICES"]
+                int(_places[0])
+                print("use gpu: ", use_gpu)
+                print("CUDA_VISIBLE_DEVICES: ", _places)
+                cfg.gpu_mem = 8000
+            except:
+                raise RuntimeError(
+                    "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES via export CUDA_VISIBLE_DEVICES=cuda_device_id."
+                )
+        cfg.ir_optim = True
+        cfg.enable_mkldnn = enable_mkldnn
+        self.text_classifier = TextClassifier(cfg)
+    def read_images(self, paths=[]):
+        images = []
+        for img_path in paths:
+            assert os.path.isfile(
+                img_path), "The {} isn't a valid file.".format(img_path)
+            img = cv2.imread(img_path)
+            if img is None:
+                logger.info("error in loading image:{}".format(img_path))
+                continue
+            images.append(img)
+        return images
+    def predict(self, images=[], paths=[]):
+        """
+        Get the text angle in the predicted images.
+        Args:
+            images (list(numpy.ndarray)): images data, shape of each is [H, W, C]. If images not paths
+            paths (list[str]): The paths of images. If paths not images
+        Returns:
+            res (list): The result of text detection box and save path of images.
+        """
+        if images != [] and isinstance(images, list) and paths == []:
+            predicted_data = images
+        elif images == [] and isinstance(paths, list) and paths != []:
+            predicted_data = self.read_images(paths)
+        else:
+            raise TypeError("The input data is inconsistent with expectations.")
+        assert predicted_data != [], "There is not any image to be predicted. Please check the input data."
+        img_list = []
+        for img in predicted_data:
+            if img is None:
+                continue
+            img_list.append(img)
+        rec_res_final = []
+        try:
+            img_list, cls_res, predict_time = self.text_classifier(img_list)
+            for dno in range(len(cls_res)):
+                angle, score = cls_res[dno]
+                rec_res_final.append({
+                    'angle': angle,
+                    'confidence': float(score),
+                })
+        except Exception as e:
+            print(e)
+            return [[]]
+        return [rec_res_final]
+    @serving
+    def serving_method(self, images, **kwargs):
+        """
+        Run as a service.
+        """
+        images_decode = [base64_to_cv2(image) for image in images]
+        results = self.predict(images_decode, **kwargs)
+        return results
+if __name__ == '__main__':
+    ocr = OCRCls()
+    image_path = [
+        './doc/imgs_words/ch/word_1.jpg',
+        './doc/imgs_words/ch/word_2.jpg',
+        './doc/imgs_words/ch/word_3.jpg',
+    ]
+    res = ocr.predict(paths=image_path)
+    print(res)
--- a/deploy/hubserving/ocr_cls/params.py
+++ b/deploy/hubserving/ocr_cls/params.py
+# -*- coding:utf-8 -*-
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+class Config(object):
+    pass
+def read_params():
+    cfg = Config()
+    #params for text classifier
+    cfg.cls_model_dir = "./inference/ch_ppocr_mobile_v1.1_cls_infer/"
+    cfg.cls_image_shape = "3, 48, 192"
+    cfg.label_list = ['0', '180']
+    cfg.cls_batch_num = 30
+    cfg.cls_thresh = 0.9
+    cfg.use_zero_copy_run = False
+    cfg.use_pdserving = False
+    return cfg
--- a/deploy/hubserving/ocr_det/config.json
+++ b/deploy/hubserving/ocr_det/config.json
@@ -9,7 +9,7 @@
            }
        }
    },
-    "port": 8866,
+    "port": 8865,
    "use_multiprocess": false,
    "workers": 2    
 }
--- a/deploy/hubserving/ocr_det/module.py
+++ b/deploy/hubserving/ocr_det/module.py
@@ -3,20 +3,14 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
-import argparse
-import ast
-import copy
-import math
 import os
-import time
+import sys
+sys.path.insert(0, ".")
-from paddle.fluid.core import AnalysisConfig, create_paddle_predictor, PaddleTensor
 from paddlehub.common.logger import logger
 from paddlehub.module.module import moduleinfo, runnable, serving
-from PIL import Image
 import cv2
 import numpy as np
-import paddle.fluid as fluid
 import paddlehub as hub
 from tools.infer.utility import base64_to_cv2
@@ -67,9 +61,7 @@ class OCRDet(hub.Module):
            images.append(img)
        return images
-    def predict(self,
+    def predict(self, images=[], paths=[]):
-                images=[],
-                paths=[]):
        """
        Get the text box in the predicted images.
        Args:
@@ -87,7 +79,7 @@ class OCRDet(hub.Module):
            raise TypeError("The input data is inconsistent with expectations.")
        assert predicted_data != [], "There is not any image to be predicted. Please check the input data."
        all_results = []
        for img in predicted_data:
            if img is None:
@@ -99,11 +91,9 @@ class OCRDet(hub.Module):
            rec_res_final = []
            for dno in range(len(dt_boxes)):
-                rec_res_final.append(
+                rec_res_final.append({
-                    {
+                    'text_region': dt_boxes[dno].astype(np.int).tolist()
-                        'text_region': dt_boxes[dno].astype(np.int).tolist()
+                })
-                    }
-                )
            all_results.append(rec_res_final)
        return all_results
@@ -116,7 +106,7 @@ class OCRDet(hub.Module):
        results = self.predict(images_decode, **kwargs)
        return results
 if __name__ == '__main__':
    ocr = OCRDet()
    image_path = [
@@ -124,4 +114,4 @@ if __name__ == '__main__':
        './doc/imgs/12.jpg',
    ]
    res = ocr.predict(paths=image_path)
    print(res)
\ No newline at end of file
--- a/deploy/hubserving/ocr_det/params.py
+++ b/deploy/hubserving/ocr_det/params.py
@@ -10,16 +10,17 @@ class Config(object):
 def read_params():
    cfg = Config()
    #params for text detector
    cfg.det_algorithm = "DB"
-    cfg.det_model_dir = "./inference/ch_det_mv3_db/"
+    cfg.det_model_dir = "./inference/ch_ppocr_mobile_v1.1_det_infer/"
-    cfg.det_max_side_len = 960
+    cfg.det_limit_side_len = 960
+    cfg.det_limit_type = 'max'
    #DB parmas
-    cfg.det_db_thresh =0.3
+    cfg.det_db_thresh = 0.3
-    cfg.det_db_box_thresh =0.5
+    cfg.det_db_box_thresh = 0.5
-    cfg.det_db_unclip_ratio =2.0
+    cfg.det_db_unclip_ratio = 2.0
    # #EAST parmas
    # cfg.det_east_score_thresh = 0.8
@@ -37,5 +38,6 @@ def read_params():
    # cfg.use_space_char = True
    cfg.use_zero_copy_run = False
+    cfg.use_pdserving = False
    return cfg
--- a/deploy/hubserving/ocr_rec/module.py
+++ b/deploy/hubserving/ocr_rec/module.py
@@ -3,20 +3,13 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
-import argparse
-import ast
-import copy
-import math
 import os
-import time
+import sys
+sys.path.insert(0, ".")
-from paddle.fluid.core import AnalysisConfig, create_paddle_predictor, PaddleTensor
 from paddlehub.common.logger import logger
 from paddlehub.module.module import moduleinfo, runnable, serving
-from PIL import Image
 import cv2
-import numpy as np
-import paddle.fluid as fluid
 import paddlehub as hub
 from tools.infer.utility import base64_to_cv2
@@ -67,9 +60,7 @@ class OCRRec(hub.Module):
            images.append(img)
        return images
-    def predict(self,
+    def predict(self, images=[], paths=[]):
-                images=[],
-                paths=[]):
        """
        Get the text box in the predicted images.
        Args:
@@ -87,31 +78,28 @@ class OCRRec(hub.Module):
            raise TypeError("The input data is inconsistent with expectations.")
        assert predicted_data != [], "There is not any image to be predicted. Please check the input data."
        img_list = []
        for img in predicted_data:
            if img is None:
                continue
            img_list.append(img)
        rec_res_final = []
        try:
            rec_res, predict_time = self.text_recognizer(img_list)
            for dno in range(len(rec_res)):
                text, score = rec_res[dno]
-                rec_res_final.append(
+                rec_res_final.append({
-                    {
+                    'text': text,
-                        'text': text,
+                    'confidence': float(score),
-                        'confidence': float(score),
+                })
-                    }
-                )
        except Exception as e:
            print(e)
            return [[]]
        return [rec_res_final]
    @serving
    def serving_method(self, images, **kwargs):
        """
@@ -121,7 +109,7 @@ class OCRRec(hub.Module):
        results = self.predict(images_decode, **kwargs)
        return results
 if __name__ == '__main__':
    ocr = OCRRec()
    image_path = [
@@ -130,4 +118,4 @@ if __name__ == '__main__':
        './doc/imgs_words/ch/word_3.jpg',
    ]
    res = ocr.predict(paths=image_path)
    print(res)
\ No newline at end of file
--- a/deploy/hubserving/ocr_rec/params.py
+++ b/deploy/hubserving/ocr_rec/params.py
@@ -10,25 +10,10 @@ class Config(object):
 def read_params():
    cfg = Config()
-    # #params for text detector
-    # cfg.det_algorithm = "DB"
-    # cfg.det_model_dir = "./inference/ch_det_mv3_db/"
-    # cfg.det_max_side_len = 960
-    # #DB parmas
-    # cfg.det_db_thresh =0.3
-    # cfg.det_db_box_thresh =0.5
-    # cfg.det_db_unclip_ratio =2.0
-    # #EAST parmas
-    # cfg.det_east_score_thresh = 0.8
-    # cfg.det_east_cover_thresh = 0.1
-    # cfg.det_east_nms_thresh = 0.2
    #params for text recognizer
    cfg.rec_algorithm = "CRNN"
-    cfg.rec_model_dir = "./inference/ch_rec_mv3_crnn/"
+    cfg.rec_model_dir = "./inference/ch_ppocr_mobile_v1.1_rec_infer/"
    cfg.rec_image_shape = "3, 32, 320"
    cfg.rec_char_type = 'ch'
@@ -39,5 +24,6 @@ def read_params():
    cfg.use_space_char = True
    cfg.use_zero_copy_run = False
+    cfg.use_pdserving = False
    return cfg
--- a/deploy/hubserving/ocr_system/module.py
+++ b/deploy/hubserving/ocr_system/module.py
@@ -3,20 +3,16 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
-import argparse
-import ast
-import copy
-import math
 import os
+import sys
+sys.path.insert(0, ".")
 import time
-from paddle.fluid.core import AnalysisConfig, create_paddle_predictor, PaddleTensor
 from paddlehub.common.logger import logger
 from paddlehub.module.module import moduleinfo, runnable, serving
-from PIL import Image
 import cv2
 import numpy as np
-import paddle.fluid as fluid
 import paddlehub as hub
 from tools.infer.utility import base64_to_cv2
@@ -52,7 +48,7 @@ class OCRSystem(hub.Module):
                )
        cfg.ir_optim = True
        cfg.enable_mkldnn = enable_mkldnn
        self.text_sys = TextSystem(cfg)
    def read_images(self, paths=[]):
@@ -67,9 +63,7 @@ class OCRSystem(hub.Module):
            images.append(img)
        return images
-    def predict(self,
+    def predict(self, images=[], paths=[]):
-                       images=[],
-                       paths=[]):
        """
        Get the chinese texts in the predicted images.
        Args:
@@ -104,13 +98,11 @@ class OCRSystem(hub.Module):
            for dno in range(dt_num):
                text, score = rec_res[dno]
-                rec_res_final.append(
+                rec_res_final.append({
-                    {
+                    'text': text,
-                        'text': text,
+                    'confidence': float(score),
-                        'confidence': float(score),
+                    'text_region': dt_boxes[dno].astype(np.int).tolist()
-                        'text_region': dt_boxes[dno].astype(np.int).tolist()
+                })
-                    }
-                )
            all_results.append(rec_res_final)
        return all_results
@@ -123,7 +115,7 @@ class OCRSystem(hub.Module):
        results = self.predict(images_decode, **kwargs)
        return results
 if __name__ == '__main__':
    ocr = OCRSystem()
    image_path = [
@@ -131,4 +123,4 @@ if __name__ == '__main__':
        './doc/imgs/12.jpg',
    ]
    res = ocr.predict(paths=image_path)
    print(res)
\ No newline at end of file
--- a/deploy/hubserving/ocr_system/params.py
+++ b/deploy/hubserving/ocr_system/params.py
@@ -10,16 +10,17 @@ class Config(object):
 def read_params():
    cfg = Config()
    #params for text detector
    cfg.det_algorithm = "DB"
-    cfg.det_model_dir = "./inference/ch_det_mv3_db/"
+    cfg.det_model_dir = "./inference/ch_ppocr_mobile_v1.1_det_infer/"
-    cfg.det_max_side_len = 960
+    cfg.det_limit_side_len = 960
+    cfg.det_limit_type = 'max'
    #DB parmas
-    cfg.det_db_thresh =0.3
+    cfg.det_db_thresh = 0.3
-    cfg.det_db_box_thresh =0.5
+    cfg.det_db_box_thresh = 0.5
-    cfg.det_db_unclip_ratio =2.0
+    cfg.det_db_unclip_ratio = 2.0
    #EAST parmas
    cfg.det_east_score_thresh = 0.8
@@ -28,7 +29,7 @@ def read_params():
    #params for text recognizer
    cfg.rec_algorithm = "CRNN"
-    cfg.rec_model_dir = "./inference/ch_rec_mv3_crnn/"
+    cfg.rec_model_dir = "./inference/ch_ppocr_mobile_v1.1_rec_infer/"
    cfg.rec_image_shape = "3, 32, 320"
    cfg.rec_char_type = 'ch'
@@ -38,6 +39,15 @@ def read_params():
    cfg.rec_char_dict_path = "./ppocr/utils/ppocr_keys_v1.txt"
    cfg.use_space_char = True
+    #params for text classifier
+    cfg.use_angle_cls = True
+    cfg.cls_model_dir = "./inference/ch_ppocr_mobile_v1.1_cls_infer/"
+    cfg.cls_image_shape = "3, 48, 192"
+    cfg.label_list = ['0', '180']
+    cfg.cls_batch_num = 30
+    cfg.cls_thresh = 0.9
    cfg.use_zero_copy_run = False
+    cfg.use_pdserving = False
    return cfg
--- a/deploy/hubserving/readme.md
+++ b/deploy/hubserving/readme.md
+[English](readme_en.md) | 简体中文
+PaddleOCR提供2种服务部署方式：
+- 基于PaddleHub Serving的部署：代码路径为"`./deploy/hubserving`"，按照本教程使用；
+- 基于PaddleServing的部署：代码路径为"`./deploy/pdserving`"，使用方法参考[文档](../../deploy/pdserving/readme.md)。
+# 基于PaddleHub Serving的服务部署
+hubserving服务部署目录下包括检测、识别、2阶段串联三种服务包，请根据需求选择相应的服务包进行安装和启动。目录结构如下：
+```
+deploy/hubserving/
+  └─  ocr_cls     分类模块服务包
+  └─  ocr_det     检测模块服务包
+  └─  ocr_rec     识别模块服务包
+  └─  ocr_system  检测+识别串联服务包
+```
+每个服务包下包含3个文件。以2阶段串联服务包为例，目录如下：
+```
+deploy/hubserving/ocr_system/
+  └─  __init__.py    空文件，必选
+  └─  config.json    配置文件，可选，使用配置启动服务时作为参数传入
+  └─  module.py      主模块，必选，包含服务的完整逻辑
+  └─  params.py      参数文件，必选，包含模型路径、前后处理参数等参数
+```
+## 快速启动服务
+以下步骤以检测+识别2阶段串联服务为例，如果只需要检测服务或识别服务，替换相应文件路径即可。
+### 1. 准备环境
+```shell
+# 安装paddlehub  
+pip3 install paddlehub --upgrade -i https://pypi.tuna.tsinghua.edu.cn/simple
+```
+### 2. 下载推理模型
+安装服务模块前，需要准备推理模型并放到正确路径。默认使用的是v1.1版的超轻量模型，默认模型路径为：
+```
+检测模型：./inference/ch_ppocr_mobile_v1.1_det_infer/
+识别模型：./inference/ch_ppocr_mobile_v1.1_rec_infer/
+方向分类器：./inference/ch_ppocr_mobile_v1.1_cls_infer/
+```  
+**模型路径可在`params.py`中查看和修改。** 更多模型可以从PaddleOCR提供的[模型库](../../doc/doc_ch/models_list.md)下载，也可以替换成自己训练转换好的模型。
+### 3. 安装服务模块
+PaddleOCR提供3种服务模块，根据需要安装所需模块。
+* 在Linux环境下，安装示例如下：
+```shell
+# 安装检测服务模块：  
+hub install deploy/hubserving/ocr_det/
+# 或，安装分类服务模块：  
+hub install deploy/hubserving/ocr_cls/
+# 或，安装识别服务模块：  
+hub install deploy/hubserving/ocr_rec/
+# 或，安装检测+识别串联服务模块：  
+hub install deploy/hubserving/ocr_system/
+```
+* 在Windows环境下(文件夹的分隔符为`\`)，安装示例如下：
+```shell
+# 安装检测服务模块：  
+hub install deploy\hubserving\ocr_det\
+# 或，安装分类服务模块：  
+hub install deploy\hubserving\ocr_cls\
+# 或，安装识别服务模块：  
+hub install deploy\hubserving\ocr_rec\
+# 或，安装检测+识别串联服务模块：
+hub install deploy\hubserving\ocr_system\
+```
+### 4. 启动服务
+#### 方式1. 命令行命令启动（仅支持CPU）
+**启动命令：**  
+```shell
+$ hub serving start --modules [Module1==Version1, Module2==Version2, ...] \
+                    --port XXXX \
+                    --use_multiprocess \
+                    --workers \
+```  
+**参数：**  
+|参数|用途|  
+|-|-|  
+|--modules/-m|PaddleHub Serving预安装模型，以多个Module==Version键值对的形式列出<br>*`当不指定Version时，默认选择最新版本`*|  
+|--port/-p|服务端口，默认为8866|  
+|--use_multiprocess|是否启用并发方式，默认为单进程方式，推荐多核CPU机器使用此方式<br>*`Windows操作系统只支持单进程方式`*|
+|--workers|在并发方式下指定的并发任务数，默认为`2*cpu_count-1`，其中`cpu_count`为CPU核数|  
+如启动串联服务：  ```hub serving start -m ocr_system```  
+这样就完成了一个服务化API的部署，使用默认端口号8866。
+#### 方式2. 配置文件启动（支持CPU、GPU）
+**启动命令：**  
+```hub serving start -c config.json```  
+其中，`config.json`格式如下：
+```python
+{
+    "modules_info": {
+        "ocr_system": {
+            "init_args": {
+                "version": "1.0.0",
+                "use_gpu": true
+            },
+            "predict_args": {
+            }
+        }
+    },
+    "port": 8868,
+    "use_multiprocess": false,
+    "workers": 2
+}
+```
+- `init_args`中的可配参数与`module.py`中的`_initialize`函数接口一致。其中，**当`use_gpu`为`true`时，表示使用GPU启动服务**。  
+- `predict_args`中的可配参数与`module.py`中的`predict`函数接口一致。
+**注意:**  
+- 使用配置文件启动服务时，其他参数会被忽略。
+- 如果使用GPU预测(即，`use_gpu`置为`true`)，则需要在启动服务之前，设置CUDA_VISIBLE_DEVICES环境变量，如：```export CUDA_VISIBLE_DEVICES=0```，否则不用设置。
+- **`use_gpu`不可与`use_multiprocess`同时为`true`**。
+如，使用GPU 3号卡启动串联服务：  
+```shell
+export CUDA_VISIBLE_DEVICES=3
+hub serving start -c deploy/hubserving/ocr_system/config.json
+```  
+## 发送预测请求
+配置好服务端，可使用以下命令发送预测请求，获取预测结果:  
+```python tools/test_hubserving.py server_url image_path```  
+需要给脚本传递2个参数：  
+- **server_url**：服务地址，格式为  
+`http://[ip_address]:[port]/predict/[module_name]`  
+例如，如果使用配置文件启动分类，检测、识别，检测+分类+识别3阶段服务，那么发送请求的url将分别是：  
+`http://127.0.0.1:8865/predict/ocr_det`  
+`http://127.0.0.1:8866/predict/ocr_cls`  
+`http://127.0.0.1:8867/predict/ocr_rec`  
+`http://127.0.0.1:8868/predict/ocr_system`  
+- **image_path**：测试图像路径，可以是单张图片路径，也可以是图像集合目录路径  
+访问示例：  
+```python tools/test_hubserving.py http://127.0.0.1:8868/predict/ocr_system ./doc/imgs/```
+## 返回结果格式说明
+返回结果为列表（list），列表中的每一项为词典（dict），词典一共可能包含3种字段，信息如下：
+|字段名称|数据类型|意义|
+|----|----|----|
+|angle|str|文本角度|
+|text|str|文本内容|
+|confidence|float| 文本识别置信度或文本角度分类置信度|
+|text_region|list|文本位置坐标|
+不同模块返回的字段不同，如，文本识别服务模块返回结果不含`text_region`字段，具体信息如下：
+| 字段名/模块名 | ocr_det | ocr_cls | ocr_rec | ocr_system |
+|  ----  |  ----  |  ----  |  ----  |  ----  |
+|angle| | ✔ | | ✔ |
+|text| | |✔|✔|
+|confidence| |✔ |✔|✔|
+|text_region| ✔| | |✔ |
+**说明：** 如果需要增加、删除、修改返回字段，可在相应模块的`module.py`文件中进行修改，完整流程参考下一节自定义修改服务模块。
+## 自定义修改服务模块
+如果需要修改服务逻辑，你一般需要操作以下步骤（以修改`ocr_system`为例）：  
+- 1、 停止服务  
+```hub serving stop --port/-p XXXX```  
+- 2、 到相应的`module.py`和`params.py`等文件中根据实际需求修改代码。  
+例如，如果需要替换部署服务所用模型，则需要到`params.py`中修改模型路径参数`det_model_dir`和`rec_model_dir`，如果需要关闭文本方向分类器，则将参数`use_angle_cls`置为`False`，当然，同时可能还需要修改其他相关参数，请根据实际情况修改调试。 **强烈建议修改后先直接运行`module.py`调试，能正确运行预测后再启动服务测试。**
+- 3、 卸载旧服务包  
+```hub uninstall ocr_system```  
+- 4、 安装修改后的新服务包  
+```hub install deploy/hubserving/ocr_system/```  
+- 5、重新启动服务  
+```hub serving start -m ocr_system```  
--- a/deploy/hubserving/readme_en.md
+++ b/deploy/hubserving/readme_en.md
+English | [简体中文](readme.md)
+PaddleOCR provides 2 service deployment methods:
+- Based on **PaddleHub Serving**: Code path is "`./deploy/hubserving`". Please follow this tutorial.
+- Based on **PaddleServing**: Code path is "`./deploy/pdserving`". Please refer to the [tutorial](../../deploy/pdserving/readme.md) for usage.
+# Service deployment based on PaddleHub Serving  
+The hubserving service deployment directory includes three service packages: detection, recognition, and two-stage series connection. Please select the corresponding service package to install and start service according to your needs. The directory is as follows:  
+```
+deploy/hubserving/
+  └─  ocr_det     detection module service package
+  └─  ocr_cls     angle class module service package
+  └─  ocr_rec     recognition module service package
+  └─  ocr_system  two-stage series connection service package
+```
+Each service pack contains 3 files. Take the 2-stage series connection service package as an example, the directory is as follows:  
+```
+deploy/hubserving/ocr_system/
+  └─  __init__.py    Empty file, required
+  └─  config.json    Configuration file, optional, passed in as a parameter when using configuration to start the service
+  └─  module.py      Main module file, required, contains the complete logic of the service
+  └─  params.py      Parameter file, required, including parameters such as model path, pre- and post-processing parameters
+```
+## Quick start service
+The following steps take the 2-stage series service as an example. If only the detection service or recognition service is needed, replace the corresponding file path.
+### 1. Prepare the environment
+```shell
+# Install paddlehub  
+pip3 install paddlehub --upgrade -i https://pypi.tuna.tsinghua.edu.cn/simple
+```
+### 2. Download inference model
+Before installing the service module, you need to prepare the inference model and put it in the correct path. By default, the ultra lightweight model of v1.1 is used, and the default model path is:  
+```
+detection model: ./inference/ch_ppocr_mobile_v1.1_det_infer/
+recognition model: ./inference/ch_ppocr_mobile_v1.1_rec_infer/
+text direction classifier: ./inference/ch_ppocr_mobile_v1.1_cls_infer/
+```  
+**The model path can be found and modified in `params.py`.** More models provided by PaddleOCR can be obtained from the [model library](../../doc/doc_en/models_list_en.md). You can also use models trained by yourself.
+### 3. Install Service Module
+PaddleOCR provides 3 kinds of service modules, install the required modules according to your needs.
+* On Linux platform, the examples are as follows.
+```shell
+# Install the detection service module:
+hub install deploy/hubserving/ocr_det/
+# Or, install the angle class service module:
+hub install deploy/hubserving/ocr_cls/
+# Or, install the recognition service module:
+hub install deploy/hubserving/ocr_rec/
+# Or, install the 2-stage series service module:
+hub install deploy/hubserving/ocr_system/
+```
+* On Windows platform, the examples are as follows.
+```shell
+# Install the detection service module:
+hub install deploy\hubserving\ocr_det\
+# Or, install the angle class service module:
+hub install deploy\hubserving\ocr_cls\
+# Or, install the recognition service module:
+hub install deploy\hubserving\ocr_rec\
+# Or, install the 2-stage series service module:
+hub install deploy\hubserving\ocr_system\
+```
+### 4. Start service
+#### Way 1. Start with command line parameters (CPU only)
+**start command：**  
+```shell
+$ hub serving start --modules [Module1==Version1, Module2==Version2, ...] \
+                    --port XXXX \
+                    --use_multiprocess \
+                    --workers \
+```  
+**parameters：**  
+|parameters|usage|  
+|-|-|  
+|--modules/-m|PaddleHub Serving pre-installed model, listed in the form of multiple Module==Version key-value pairs<br>*`When Version is not specified, the latest version is selected by default`*|
+|--port/-p|Service port, default is 8866|  
+|--use_multiprocess|Enable concurrent mode, the default is single-process mode, this mode is recommended for multi-core CPU machines<br>*`Windows operating system only supports single-process mode`*|
+|--workers|The number of concurrent tasks specified in concurrent mode, the default is `2*cpu_count-1`, where `cpu_count` is the number of CPU cores|  
+For example, start the 2-stage series service:  
+```shell
+hub serving start -m ocr_system
+```  
+This completes the deployment of a service API, using the default port number 8866.  
+#### Way 2. Start with configuration file（CPU、GPU）
+**start command：**  
+```shell
+hub serving start --config/-c config.json
+```  
+Wherein, the format of `config.json` is as follows:
+```python
+{
+    "modules_info": {
+        "ocr_system": {
+            "init_args": {
+                "version": "1.0.0",
+                "use_gpu": true
+            },
+            "predict_args": {
+            }
+        }
+    },
+    "port": 8868,
+    "use_multiprocess": false,
+    "workers": 2
+}
+```
+- The configurable parameters in `init_args` are consistent with the `_initialize` function interface in `module.py`. Among them, **when `use_gpu` is `true`, it means that the GPU is used to start the service**.
+- The configurable parameters in `predict_args` are consistent with the `predict` function interface in `module.py`.
+**Note:**  
+- When using the configuration file to start the service, other parameters will be ignored.
+- If you use GPU prediction (that is, `use_gpu` is set to `true`), you need to set the environment variable CUDA_VISIBLE_DEVICES before starting the service, such as: ```export CUDA_VISIBLE_DEVICES=0```, otherwise you do not need to set it.
+- **`use_gpu` and `use_multiprocess` cannot be `true` at the same time.**  
+For example, use GPU card No. 3 to start the 2-stage series service:
+```shell
+export CUDA_VISIBLE_DEVICES=3
+hub serving start -c deploy/hubserving/ocr_system/config.json
+```  
+## Send prediction requests
+After the service starts, you can use the following command to send a prediction request to obtain the prediction result:  
+```shell
+python tools/test_hubserving.py server_url image_path
+```  
+Two parameters need to be passed to the script:
+- **server_url**：service address，format of which is
+`http://[ip_address]:[port]/predict/[module_name]`  
+For example, if the detection, recognition and 2-stage serial services are started with provided configuration files, the respective `server_url` would be:  
+`http://127.0.0.1:8865/predict/ocr_det`  
+`http://127.0.0.1:8866/predict/ocr_cls`  
+`http://127.0.0.1:8867/predict/ocr_rec`  
+`http://127.0.0.1:8868/predict/ocr_system`  
+- **image_path**：Test image path, can be a single image path or an image directory path
+**Eg.**
+```shell
+python tools/test_hubserving.py http://127.0.0.1:8868/predict/ocr_system ./doc/imgs/
+```
+## Returned result format
+The returned result is a list. Each item in the list is a dict. The dict may contain three fields. The information is as follows:
+|field name|data type|description|
+|----|----|----|
+|angle|str|angle|
+|text|str|text content|
+|confidence|float|text recognition confidence|
+|text_region|list|text location coordinates|
+The fields returned by different modules are different. For example, the results returned by the text recognition service module do not contain `text_region`. The details are as follows:
+| field name/module name | ocr_det | ocr_cls | ocr_rec | ocr_system |
+|  ----  |  ----  |  ----  |  ----  |  ----  |
+|angle| | ✔ | | ✔ |
+|text| | |✔|✔|
+|confidence| |✔ |✔|✔|
+|text_region| ✔| | |✔ |
+**Note：** If you need to add, delete or modify the returned fields, you can modify the file `module.py` of the corresponding module. For the complete process, refer to the user-defined modification service module in the next section.
+## User defined service module modification
+If you need to modify the service logic, the following steps are generally required (take the modification of `ocr_system` for example):
+- 1. Stop service
+```shell
+hub serving stop --port/-p XXXX
+```
+- 2. Modify the code in the corresponding files, like `module.py` and `params.py`, according to the actual needs.  
+For example, if you need to replace the model used by the deployed service, you need to modify model path parameters `det_model_dir` and `rec_model_dir` in `params.py`. If you want to turn off the text direction classifier, set the parameter `use_angle_cls` to `False`. Of course, other related parameters may need to be modified at the same time. Please modify and debug according to the actual situation. It is suggested to run `module.py` directly for debugging after modification before starting the service test.  
+- 3. Uninstall old service module
+```shell
+hub uninstall ocr_system
+```
+- 4. Install modified service module
+```shell
+hub install deploy/hubserving/ocr_system/
+```
+- 5. Restart service
+```shell
+hub serving start -m ocr_system
+```
--- a/doc/doc_ch/add_new_algorithm.md
+++ b/doc/doc_ch/add_new_algorithm.md
+# 添加新算法
+PaddleOCR将一个算法分解为以下几个部分，并对各部分进行模块化处理，方便快速组合出新的算法。
+* 数据加载和处理
+* 网络
+* 后处理
+* 损失函数
+* 指标评估
+* 优化器
+下面将分别对每个部分进行介绍，并介绍如何在该部分里添加新算法所需模块。
+## 数据加载和处理
+数据加载和处理由不同的模块(module)组成，其完成了图片的读取、数据增强和label的制作。这一部分在[ppocr/data](../../ppocr/data)下。 各个文件及文件夹作用说明如下:
+```bash
+ppocr/data/
+├── imaug             # 图片的读取、数据增强和label制作相关的文件
+│   ├── label_ops.py  # 对label进行变换的modules
+│   ├── operators.py  # 对image进行变换的modules
+│   ├──.....
+├── __init__.py
+├── lmdb_dataset.py   # 读取lmdb的数据集的dataset
+└── simple_dataset.py # 读取以`image_path\tgt`形式保存的数据集的dataset
+```
+PaddleOCR内置了大量图像操作相关模块，对于没有没有内置的模块可通过如下步骤添加:
+1. 在 [ppocr/data/imaug](../../ppocr/data/imaug) 文件夹下新建文件，如my_module.py。
+2. 在 my_module.py 文件内添加相关代码，示例代码如下:
+```python
+class MyModule:
+    def __init__(self, *args, **kwargs):
+        # your init code
+        pass
+    def __call__(self, data):
+        img = data['image']
+        label = data['label']
+        # your process code
+        data['image'] = img
+        data['label'] = label
+        return data
+```
+3. 在 [ppocr/data/imaug/\__init\__.py](../../ppocr/data/imaug/__init__.py) 文件内导入添加的模块。
+数据处理的所有处理步骤由不同的模块顺序执行而成，在config文件中按照列表的形式组合并执行。如:
+```yaml
+# angle class data process
+transforms:
+  - DecodeImage: # load image
+      img_mode: BGR
+      channel_first: False
+  - MyModule:
+      args1: args1
+      args2: args2
+  - KeepKeys:
+      keep_keys: [ 'image', 'label' ] # dataloader will return list in this order
+```
+## 网络
+网络部分完成了网络的组网操作，PaddleOCR将网络划分为四部分，这一部分在[ppocr/modeling](../../ppocr/modeling)下。 进入网络的数据将按照顺序(transforms->backbones->
+necks->heads)依次通过这四个部分。
+```bash
+├── architectures # 网络的组网代码
+├── transforms    # 网络的图像变换模块
+├── backbones     # 网络的特征提取模块
+├── necks         # 网络的特征增强模块
+└── heads         # 网络的输出模块
+```
+PaddleOCR内置了DB,EAST,SAST,CRNN和Attention等算法相关的常用模块，对于没有内置的模块可通过如下步骤添加，四个部分添加步骤一致，以backbones为例:
+1. 在 [ppocr/modeling/backbones](../../ppocr/modeling/backbones) 文件夹下新建文件，如my_backbone.py。
+2. 在 my_backbone.py 文件内添加相关代码，示例代码如下:
+```python
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+class MyBackbone(nn.Layer):
+    def __init__(self, *args, **kwargs):
+        super(MyBackbone, self).__init__()
+        # your init code
+        self.conv = nn.xxxx
+    def forward(self, inputs):
+        # your necwork forward
+        y = self.conv(inputs)
+        return y
+```
+3. 在 [ppocr/modeling/backbones/\__init\__.py](../../ppocr/modeling/backbones/__init__.py)文件内导入添加的模块。
+在完成网络的四部分模块添加之后，只需要配置文件中进行配置即可使用，如:
+```yaml
+Architecture:
+  model_type: rec
+  algorithm: CRNN
+  Transform:
+    name: MyTransform
+    args1: args1
+    args2: args2
+  Backbone:
+    name: MyBackbone
+    args1: args1
+  Neck:
+    name: MyNeck
+    args1: args1
+  Head:
+    name: MyHead
+    args1: args1
+```
+## 后处理
+后处理实现解码网络输出获得文本框或者识别到的文字。这一部分在[ppocr/postprocess](../../ppocr/postprocess)下。
+PaddleOCR内置了DB,EAST,SAST,CRNN和Attention等算法相关的后处理模块，对于没有内置的组件可通过如下步骤添加:
+1. 在 [ppocr/postprocess](../../ppocr/postprocess) 文件夹下新建文件，如 my_postprocess.py。
+2. 在 my_postprocess.py 文件内添加相关代码，示例代码如下:
+```python
+import paddle
+class MyPostProcess:
+    def __init__(self, *args, **kwargs):
+        # your init code
+        pass
+    def __call__(self, preds, label=None, *args, **kwargs):
+        if isinstance(preds, paddle.Tensor):
+            preds = preds.numpy()
+        # you preds decode code
+        preds = self.decode_preds(preds)
+        if label is None:
+            return preds
+        # you label decode code
+        label = self.decode_label(label)
+        return preds, label
+    def decode_preds(self, preds):
+        # you preds decode code
+        pass
+    def decode_label(self, preds):
+        # you label decode code
+        pass
+```
+3. 在 [ppocr/postprocess/\__init\__.py](../../ppocr/postprocess/__init__.py)文件内导入添加的模块。
+在后处理模块添加之后，只需要配置文件中进行配置即可使用，如:
+```yaml
+PostProcess:
+  name: MyPostProcess
+  args1: args1
+  args2: args2
+```
+## 损失函数
+损失函数用于计算网络输出和label之间的距离。这一部分在[ppocr/losses](../../ppocr/losses)下。
+PaddleOCR内置了DB,EAST,SAST,CRNN和Attention等算法相关的损失函数模块，对于没有内置的模块可通过如下步骤添加:
+1. 在 [ppocr/losses](../../ppocr/losses) 文件夹下新建文件，如 my_loss.py。
+2. 在 my_loss.py 文件内添加相关代码，示例代码如下:
+```python
+import paddle
+from paddle import nn
+class MyLoss(nn.Layer):
+    def __init__(self, **kwargs):
+        super(MyLoss, self).__init__()
+        # you init code
+        pass
+    def __call__(self, predicts, batch):
+        label = batch[1]
+        # your loss code
+        loss = self.loss(input=predicts, label=label)
+        return {'loss': loss}
+```
+3. 在 [ppocr/losses/\__init\__.py](../../ppocr/losses/__init__.py)文件内导入添加的模块。
+在损失函数添加之后，只需要配置文件中进行配置即可使用，如:
+```yaml
+Loss:
+  name: MyLoss
+  args1: args1
+  args2: args2
+```
+## 指标评估
+指标评估用于计算网络在当前batch上的性能。这一部分在[ppocr/metrics](../../ppocr/metrics)下。 PaddleOCR内置了检测，分类和识别等算法相关的指标评估模块，对于没有内置的模块可通过如下步骤添加:
+1. 在 [ppocr/metrics](../../ppocr/metrics) 文件夹下新建文件，如my_metric.py。
+2. 在 my_metric.py 文件内添加相关代码，示例代码如下:
+```python
+class MyMetric(object):
+    def __init__(self, main_indicator='acc', **kwargs):
+        # main_indicator is used for select best model
+        self.main_indicator = main_indicator
+        self.reset()
+    def __call__(self, preds, batch, *args, **kwargs):
+        # preds is out of postprocess
+        # batch is out of dataloader
+        labels = batch[1]
+        cur_correct_num = 0
+        cur_all_num = 0
+        # you metric code
+        self.correct_num += cur_correct_num
+        self.all_num += cur_all_num
+        return {'acc': cur_correct_num / cur_all_num, }
+    def get_metric(self):
+        """
+        return metircs {
+                 'acc': 0,
+                 'norm_edit_dis': 0,
+            }
+        """
+        acc = self.correct_num / self.all_num
+        self.reset()
+        return {'acc': acc}
+    def reset(self):
+        # reset metric
+        self.correct_num = 0
+        self.all_num = 0
+```
+3. 在 [ppocr/metrics/\__init\__.py](../../ppocr/metrics/__init__.py)文件内导入添加的模块。
+在指标评估模块添加之后，只需要配置文件中进行配置即可使用，如:
+```yaml
+Metric:
+  name: MyMetric
+  main_indicator: acc
+```
+## 优化器
+优化器用于训练网络。优化器内部还包含了网络正则化和学习率衰减模块。 这一部分在[ppocr/optimizer](../../ppocr/optimizer)下。 PaddleOCR内置了`Momentum`,`Adam`
+和`RMSProp`等常用的优化器模块，`Linear`,`Cosine`,`Step`和`Piecewise`等常用的正则化模块与`L1Decay`和`L2Decay`等常用的学习率衰减模块。
+对于没有内置的模块可通过如下步骤添加，以`optimizer`为例:
+1. 在 [ppocr/optimizer/optimizer.py](../../ppocr/optimizer/optimizer.py) 文件内创建自己的优化器，示例代码如下:
+```python
+from paddle import optimizer as optim
+class MyOptim(object):
+    def __init__(self, learning_rate=0.001, *args, **kwargs):
+        self.learning_rate = learning_rate
+    def __call__(self, parameters):
+        # It is recommended to wrap the built-in optimizer of paddle
+        opt = optim.XXX(
+            learning_rate=self.learning_rate,
+            parameters=parameters)
+        return opt
+```
+在优化器模块添加之后，只需要配置文件中进行配置即可使用，如:
+```yaml
+Optimizer:
+  name: MyOptim
+  args1: args1
+  args2: args2
+  lr:
+    name: Cosine
+    learning_rate: 0.001
+  regularizer:
+    name: 'L2'
+    factor: 0
+```
\ No newline at end of file
--- a/doc/doc_ch/config.md
+++ b/doc/doc_ch/config.md
-# 可选参数列表
+## 可选参数列表
 以下列表可以通过`--help`查看
@@ -8,65 +8,115 @@
 |          -o              |      ALL       |  设置配置文件里的参数内容  |  None  |  使用-o配置相较于-c选择的配置文件具有更高的优先级。例如：`-o Global.use_gpu=false`  |  
-## 配置文件 Global 参数介绍
+## 配置文件参数介绍
 以 `rec_chinese_lite_train_v1.1.yml ` 为例
+### Global 
 |         字段             |            用途                |      默认值       |            备注            |
 | :----------------------: |  :---------------------:   | :--------------:  |   :--------------------:   |
-|      algorithm           |    设置算法                    |  与配置文件同步   |     选择模型，支持模型请参考[简介](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/README.md) |
+|      use_gpu             |    设置代码是否在gpu运行           |       true        |                \                 |
-|      use_gpu             |    设置代码运行场所            |       true        |                \                 |
+|      epoch_num           |    最大训练epoch数             |       500        |                \                 |
-|      epoch_num           |    最大训练epoch数             |       3000        |                \                 |
 |      log_smooth_window   |    滑动窗口大小            |       20          |                \                 |
 |      print_batch_step    |    设置打印log间隔         |       10          |                \                 |
 |      save_model_dir      |    设置模型保存路径        |  output/{算法名称}  |                \                 |
 |      save_epoch_step     |    设置模型保存间隔        |       3           |                \                 |
 |      eval_batch_step     |    设置模型评估间隔        | 2000 或 [1000, 2000]        | 2000 表示每2000次迭代评估一次，[1000， 2000]表示从1000次迭代开始，每2000次评估一次   |
-|train_batch_size_per_card |  设置训练时单卡batch size    |         256         |                \                 |
+|      cal_metric_during_train     |    设置是否在训练过程中评估指标，此时评估的是模型在当前batch下的指标        |       true         |                \                 |
-| test_batch_size_per_card |  设置评估时单卡batch size    |         256         |                \                 |
+|      load_static_weights     |   设置预训练模型是否是静态图模式保存(目前仅检测算法需要)        |       true         |                \                 |
-|      image_shape         |    设置输入图片尺寸        |   [3, 32, 100]    |                \                 |
+|      pretrained_model    |    设置加载预训练模型路径      |  ./pretrain_models/CRNN/best_accuracy  |  \          |
+|      checkpoints         |    加载模型参数路径            |       None        |    用于中断后加载参数继续训练 |
+|      use_visualdl  |    设置是否启用visualdl进行可视化log展示 |          False        |    [教程地址](https://www.paddlepaddle.org.cn/paddle/visualdl) |
+|      infer_img            |    设置预测图像路径或文件夹路径     |       ./infer_img | \|
+|      character_dict_path |    设置字典路径            |  ./ppocr/utils/ppocr_keys_v1.txt  |    \                 |
 |      max_text_length     |    设置文本最大长度        |       25          |                \                 |
 |      character_type      |    设置字符类型            |       ch          |    en/ch, en时将使用默认dict，ch时使用自定义dict|
-|      character_dict_path |    设置字典路径            |  ./ppocr/utils/ic15_dict.txt  |    \                 |
+|      use_space_char     |    设置是否识别空格             |        True      |          仅在 character_type=ch 时支持空格                 |
-|      loss_type           |    设置 loss 类型              |       ctc         |    支持两种loss： ctc / attention |
-|       distort            |    设置是否使用数据增强          |       false       |  设置为true时，将在训练时随机进行扰动，支持的扰动操作可阅读[img_tools.py](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/ppocr/data/rec/img_tools.py)                 |
-|       use_space_char     |    设置是否识别空格             |        false      |          仅在 character_type=ch 时支持空格                 |
 |      label_list          |    设置方向分类器支持的角度       |    ['0','180']    |     仅在方向分类器中生效 |
-|      average_window      |    ModelAverage优化器中的窗口长度计算比例 |  0.15       |       目前仅应用与SRN |
+|      save_res_path          |    设置检测模型的结果保存地址       |    ./output/det_db/predicts_db.txt    |     仅在检测模型中生效 |
-|      max_average_window  |    平均值计算窗口长度的最大值   |   15625              | 推荐设置为一轮训练中mini-batchs的数目|
-|      min_average_window  |    平均值计算窗口长度的最小值  |    10000              |      \          |
-|      reader_yml          |    设置reader配置文件          |  ./configs/rec/rec_icdar15_reader.yml  |  \          |
-|      pretrain_weights    |    加载预训练模型路径      |  ./pretrain_models/CRNN/best_accuracy  |  \          |
-|      checkpoints         |    加载模型参数路径            |       None        |    用于中断后加载参数继续训练 |
-|      save_inference_dir  |    inference model 保存路径 |          None        |    用于保存inference model |
-## 配置文件 Reader 系列参数介绍
+### Optimizer ([ppocr/optimizer](../../ppocr/optimizer))
-以 `rec_chinese_reader.yml` 为例
+|         字段             |            用途            |      默认值        |            备注             |
+| :---------------------: |  :---------------------:   | :--------------:  |   :--------------------:   |
+|      name        |         优化器类名          |  Adam  |  目前支持`Momentum`,`Adam`,`RMSProp`, 见[ppocr/optimizer/optimizer.py](../../ppocr/optimizer/optimizer.py)  |
+|      beta1           |    设置一阶矩估计的指数衰减率  |       0.9         |               \             |
+|      beta2           |    设置二阶矩估计的指数衰减率  |     0.999         |               \             |
+|      **lr**                |         设置学习率decay方式       |   -    |       \  |
+|        name    |      学习率decay类名   |         Cosine       | 目前支持`Linear`,`Cosine`,`Step`,`Piecewise`, 见[ppocr/optimizer/learning_rate.py](../../ppocr/optimizer/learning_rate.py) |
+|        learning_rate      |    基础学习率        |       0.001      |  \        |
+|      **regularizer**      |  设置网络正则化方式        |       -      | \        |
+|        name      |    正则化类名      |       L2     | 目前支持`L1`,`L2`, 见[ppocr/optimizer/regularizer.py](../../ppocr/optimizer/regularizer.py)        |
+|        factor      |    学习率衰减系数       |       0.00004     |  \        |
-|         字段             |            用途                |      默认值       |            备注            |
-| :----------------------: |  :---------------------:   | :--------------:  |   :--------------------:   |
-|      reader_function     |    选择数据读取方式        |  ppocr.data.rec.dataset_traversal,SimpleReader  | 支持SimpleReader / LMDBReader 两种数据读取方式 |
-|      num_workers             |    设置数据读取线程数            |       8        |                \                 |
-|      img_set_dir          |    数据集路径             |       ./train_data        |                \                 |
-|      label_file_path      |    数据标签路径           |       ./train_data/rec_gt_train.txt| \    |
-|      infer_img            |    预测图像文件夹路径     |       ./infer_img | \|
-## 配置文件 Optimizer 系列参数介绍
+### Architecture ([ppocr/modeling](../../ppocr/modeling))
+在ppocr中，网络被划分为Transform,Backbone,Neck和Head四个阶段
+|         字段             |            用途            |      默认值        |            备注             |
+| :---------------------: |  :---------------------:   | :--------------:  |   :--------------------:   |
+|      model_type        |         网络类型          |  rec  |  目前支持`rec`,`det`,`cls`  |
+|      algorithm           |    模型名称  |       CRNN         |               支持列表见[algorithm_overview](./algorithm_overview.md)             |
+|      **Transform**           |    设置变换方式  |       -       |               目前仅rec类型的算法支持, 具体见[ppocr/modeling/transform](../../ppocr/modeling/transform)              |
+|        name    |      变换方式类名   |         TPS       | 目前支持`TPS` |
+|        num_fiducial      |    TPS控制点数        |       20      |  上下边各十个       |
+|        loc_lr      |    定位网络学习率        |       0.1      |  \      |
+|        model_name      |    定位网络大小        |       small      |  目前支持`small`,`large`       |
+|      **Backbone**      |  设置网络backbone类名        |       -      | 具体见[ppocr/modeling/backbones](../../ppocr/modeling/backbones)        |
+|        name      |    backbone类名       |       ResNet     | 目前支持`MobileNetV3`,`ResNet`        |
+|        layers      |    resnet层数       |       34     |  支持18,34,50,101,152,200       |
+|        model_name      |    MobileNetV3 网络大小       |       small     |  支持`small`,`large`       |
+|      **Neck**      |  设置网络neck        |       -      | 具体见[ppocr/modeling/necks](../../ppocr/modeling/necks)        |
+|        name      |    neck类名       |       SequenceEncoder     | 目前支持`SequenceEncoder`,`DBFPN`        |
+|        encoder_type      |    SequenceEncoder编码器类型       |       rnn     |  支持`reshape`,`fc`,`rnn`       |
+|        hidden_size      |   rnn内部单元数       |       48     |  \      |
+|        out_channels      |   DBFPN输出通道数       |       256     |  \      |
+|      **Head**      |  设置网络Head        |       -      | 具体见[ppocr/modeling/heads](../../ppocr/modeling/heads)        |
+|        name      |    head类名       |       CTCHead     | 目前支持`CTCHead`,`DBHead`,`ClsHead`        |
+|        fc_decay      |    CTCHead正则化系数       |       0.0004     |  \      |
+|        k      |   DBHead二值化系数       |       50     |  \      |
+|        class_dim      |   ClsHead输出分类数       |       2     |  \      |
-以 `rec_icdar15_train.yml` 为例
+### Loss ([ppocr/losses](../../ppocr/losses))
+|         字段             |            用途            |      默认值        |            备注             |
+| :---------------------: |  :---------------------:   | :--------------:  |   :--------------------:   |
+|      name        |         网络loss类名          |  CTCLoss  |  目前支持`CTCLoss`,`DBLoss`,`ClsLoss`  |
+|      balance_loss        |        DBLossloss中是否对正负样本数量进行均衡(使用OHEM)         |  True  |  \  |
+|      ohem_ratio        |        DBLossloss中的OHEM的负正样本比例         |  3  |  \  |
+|      main_loss_type        |        DBLossloss中shrink_map所采用的的loss        |  DiceLoss  |  支持`DiceLoss`,`BCELoss`  |
+|      alpha        |        DBLossloss中shrink_map_loss的系数       |  5  |  \  |
+|      beta        |        DBLossloss中threshold_map_loss的系数       |  10  |  \  |
+### PostProcess ([ppocr/postprocess](../../ppocr/postprocess))
+|         字段             |            用途            |      默认值        |            备注             |
+| :---------------------: |  :---------------------:   | :--------------:  |   :--------------------:   |
+|      name        |         后处理类名          |  CTCLabelDecode  |  目前支持`CTCLoss`,`AttnLabelDecode`,`DBPostProcess`,`ClsPostProcess`  |
+|      thresh        |        DBPostProcess中分割图进行二值化的阈值         |  0.3  |  \  |
+|      box_thresh        |        DBPostProcess中对输出框进行过滤的阈值，低于此阈值的框不会输出         |  0.7  |  \  |
+|      max_candidates        |        DBPostProcess中输出的最大文本框数量        |  1000  |   |
+|      unclip_ratio        |        DBPostProcess中对文本框进行放大的比例       |  2.0  |  \  |
+### Metric ([ppocr/metrics](../../ppocr/metrics))
+|         字段             |            用途            |      默认值        |            备注             |
+| :---------------------: |  :---------------------:   | :--------------:  |   :--------------------:   |
+|      name        |         指标评估方法名称          |  CTCLabelDecode  |  目前支持`DetMetric`,`RecMetric`,`ClsMetric`  |
+|      main_indicator        |        主要指标,用于选取最优模型         |  acc |  对于检测方法为hmean，识别和分类方法为acc  |
+### Dataset  ([ppocr/data](../../ppocr/data))
 |         字段             |            用途            |      默认值        |            备注             |
 | :---------------------: |  :---------------------:   | :--------------:  |   :--------------------:   |
-|         function        |         选择优化器          |  pocr.optimizer,AdamDecay  |  目前只支持Adam方式  |
+|      **dataset**        |         每次迭代返回一个样本          |  -  |  -  |
-|         base_lr         |      设置初始学习率          |       0.0005      |               \             |
+|      name        |        dataset类名         |  SimpleDataSet |  目前支持`SimpleDataSet`和`LMDBDateSet`  |
-|         beta1           |    设置一阶矩估计的指数衰减率  |       0.9         |               \             |
+|      data_dir        |        数据集图片存放路径         |  ./train_data |  \  |
-|         beta2           |    设置二阶矩估计的指数衰减率  |     0.999         |               \             |
+|      label_file_list        |        数据标签路径         |  ["./train_data/train_list.txt"] | dataset为LMDBDateSet时不需要此参数   |
-|         decay           |         是否使用decay       |    \              |               \             |
+|      ratio_list        |        数据集的比例         |  [1.0] | 若label_file_list中有两个train_list，且ratio_list为[0.4,0.6]，则从train_list1中采样40%，从train_list2中采样60%组合整个dataset   |
-|      function(decay)    |         设置decay方式       |   -    |       目前支持cosine_decay, cosine_decay_warmup与piecewise_decay  |
+|      transforms        |        对图片和标签进行变换的方法列表         |  [DecodeImage,CTCLabelEncode,RecResizeImg,KeepKeys] |   见[ppocr/data/imaug](../../ppocr/data/imaug)  |
-|      step_each_epoch    |      每个epoch包含多少次迭代, cosine_decay/cosine_decay_warmup时有效   |         20       | 计算方式：total_image_num / (batch_size_per_card * card_size) |
+|      **loader**        |        dataloader相关         |  - |   |
-|        total_epoch      |    总共迭代多少个epoch, cosine_decay/cosine_decay_warmup时有效        |       1000      | 与Global.epoch_num 一致        |
+|      shuffle        |        每个epoch是否将数据集顺序打乱         |  True | \  |
-|        warmup_minibatch      |  线性warmup的迭代次数, cosine_decay_warmup时有效        |       1000      | \        |
+|      batch_size_per_card        |        训练时单卡batch size         |  256 | \  |
-|        boundaries      |    学习率下降时的迭代次数间隔, piecewise_decay时有效       |       -      | 参数为列表形式        |
+|      drop_last        |        是否丢弃因数据集样本数不能被 batch_size 整除而产生的最后一个不完整的mini-batch        |  True | \  |
-|        decay_rate      |    学习率衰减系数, piecewise_decay时有效       |       -      |  \        |
+|      num_workers        |        用于加载数据的子进程个数，若为0即为不开启子进程，在主进程中进行数据加载        |  8 | \  |
\ No newline at end of file
--- a/doc/doc_ch/tree.md
+++ b/doc/doc_ch/tree.md
--- a/doc/doc_en/add_new_algorithm_en.md
+++ b/doc/doc_en/add_new_algorithm_en.md
+# Add new algorithm
+PaddleOCR decomposes an algorithm into the following parts, and modularizes each part to make it more convenient to develop new algorithms.
+* Data loading and processing
+* Network
+* Post-processing
+* Loss
+* Metric
+* Optimizer
+The following will introduce each part separately, and introduce how to add the modules required for the new algorithm.
+## Data loading and processing
+Data loading and processing are composed of different modules, which complete the image reading, data augment and label production. This part is under [ppocr/data](../../ppocr/data). The explanation of each file and folder are as follows:
+```bash
+ppocr/data/
+├── imaug             # Scripts for image reading, data augment and label production
+│   ├── label_ops.py  # Modules that transform the label
+│   ├── operators.py  # Modules that transform the image
+│   ├──.....
+├── __init__.py
+├── lmdb_dataset.py   # The dataset that reads the lmdb
+└── simple_dataset.py # Read the dataset saved in the form of `image_path\tgt`
+```
+PaddleOCR has a large number of built-in image operation related modules. For modules that are not built-in, you can add them through the following steps:
+1. Create a new file under the [ppocr/data/imaug](../../ppocr/data/imaug) folder, such as my_module.py.
+2. Add code in the my_module.py file, the sample code is as follows:
+```python
+class MyModule:
+    def __init__(self, *args, **kwargs):
+        # your init code
+        pass
+    def __call__(self, data):
+        img = data['image']
+        label = data['label']
+        # your process code
+        data['image'] = img
+        data['label'] = label
+        return data
+```
+3. Import the added module in the [ppocr/data/imaug/\__init\__.py](../../ppocr/data/imaug/__init__.py) file.
+All different modules of data processing are executed by sequence, combined and executed in the form of a list in the config file. Such as:
+```yaml
+# angle class data process
+transforms:
+  - DecodeImage: # load image
+      img_mode: BGR
+      channel_first: False
+  - MyModule:
+      args1: args1
+      args2: args2
+  - KeepKeys:
+      keep_keys: [ 'image', 'label' ] # dataloader will return list in this order
+```
+## Network
+The network part completes the construction of the network, and PaddleOCR divides the network into four parts, which are under [ppocr/modeling](../../ppocr/modeling). The data entering the network will pass through these four parts in sequence(transforms->backbones->
+necks->heads).
+```bash
+├── architectures # Code for building network
+├── transforms    # Image Transformation Module
+├── backbones     # Feature extraction module
+├── necks         # Feature enhancement module
+└── heads         # Output module
+```
+PaddleOCR has built-in commonly used modules related to algorithms such as DB, EAST, SAST, CRNN and Attention. For modules that do not have built-in, you can add them through the following steps, the four parts are added in the same steps, take backbones as an example:
+1. Create a new file under the [ppocr/modeling/backbones](../../ppocr/modeling/backbones) folder, such as my_backbone.py.
+2. Add code in the my_backbone.py file, the sample code is as follows:
+```python
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+class MyBackbone(nn.Layer):
+    def __init__(self, *args, **kwargs):
+        super(MyBackbone, self).__init__()
+        # your init code
+        self.conv = nn.xxxx
+    def forward(self, inputs):
+        # your necwork forward
+        y = self.conv(inputs)
+        return y
+```
+3. Import the added module in the [ppocr/modeling/backbones/\__init\__.py](../../ppocr/modeling/backbones/__init__.py) file.
+After adding the four-part modules of the network, you only need to configure them in the configuration file to use, such as:
+```yaml
+Architecture:
+  model_type: rec
+  algorithm: CRNN
+  Transform:
+    name: MyTransform
+    args1: args1
+    args2: args2
+  Backbone:
+    name: MyBackbone
+    args1: args1
+  Neck:
+    name: MyNeck
+    args1: args1
+  Head:
+    name: MyHead
+    args1: args1
+```
+## Post-processing
+Post-processing realizes decoding network output to obtain text box or recognized text. This part is under [ppocr/postprocess](../../ppocr/postprocess).
+PaddleOCR has built-in post-processing modules related to algorithms such as DB, EAST, SAST, CRNN and Attention. For components that are not built-in, they can be added through the following steps:
+1. Create a new file under the [ppocr/postprocess](../../ppocr/postprocess) folder, such as my_postprocess.py.
+2. Add code in the my_postprocess.py file, the sample code is as follows:
+```python
+import paddle
+class MyPostProcess:
+    def __init__(self, *args, **kwargs):
+        # your init code
+        pass
+    def __call__(self, preds, label=None, *args, **kwargs):
+        if isinstance(preds, paddle.Tensor):
+            preds = preds.numpy()
+        # you preds decode code
+        preds = self.decode_preds(preds)
+        if label is None:
+            return preds
+        # you label decode code
+        label = self.decode_label(label)
+        return preds, label
+    def decode_preds(self, preds):
+        # you preds decode code
+        pass
+    def decode_label(self, preds):
+        # you label decode code
+        pass
+```
+3. Import the added module in the [ppocr/postprocess/\__init\__.py](../../ppocr/postprocess/__init__.py) file.
+After the post-processing module is added, you only need to configure it in the configuration file to use, such as:
+```yaml
+PostProcess:
+  name: MyPostProcess
+  args1: args1
+  args2: args2
+```
+## Loss
+The loss function is used to calculate the distance between the network output and the label. This part is under [ppocr/losses](../../ppocr/losses).
+PaddleOCR has built-in loss function modules related to algorithms such as DB, EAST, SAST, CRNN and Attention. For modules that do not have built-in modules, you can add them through the following steps:
+1. Create a new file in the [ppocr/losses](../../ppocr/losses) folder, such as my_loss.py.
+2. Add code in the my_loss.py file, the sample code is as follows:
+```python
+import paddle
+from paddle import nn
+class MyLoss(nn.Layer):
+    def __init__(self, **kwargs):
+        super(MyLoss, self).__init__()
+        # you init code
+        pass
+    def __call__(self, predicts, batch):
+        label = batch[1]
+        # your loss code
+        loss = self.loss(input=predicts, label=label)
+        return {'loss': loss}
+```
+3. Import the added module in the [ppocr/losses/\__init\__.py](../../ppocr/losses/__init__.py) file.
+After the loss function module is added, you only need to configure it in the configuration file to use it, such as:
+```yaml
+Loss:
+  name: MyLoss
+  args1: args1
+  args2: args2
+```
+## Metric
+Metric is used to calculate the performance of the network on the current batch. This part is under [ppocr/metrics](../../ppocr/metrics). PaddleOCR has built-in evaluation modules related to algorithms such as detection, classification and recognition. For modules that do not have built-in modules, you can add them through the following steps:
+1. Create a new file under the [ppocr/metrics](../../ppocr/metrics) folder, such as my_metric.py.
+2. Add code in the my_metric.py file, the sample code is as follows:
+```python
+class MyMetric(object):
+    def __init__(self, main_indicator='acc', **kwargs):
+        # main_indicator is used for select best model
+        self.main_indicator = main_indicator
+        self.reset()
+    def __call__(self, preds, batch, *args, **kwargs):
+        # preds is out of postprocess
+        # batch is out of dataloader
+        labels = batch[1]
+        cur_correct_num = 0
+        cur_all_num = 0
+        # you metric code
+        self.correct_num += cur_correct_num
+        self.all_num += cur_all_num
+        return {'acc': cur_correct_num / cur_all_num, }
+    def get_metric(self):
+        """
+        return metircs {
+                 'acc': 0,
+                 'norm_edit_dis': 0,
+            }
+        """
+        acc = self.correct_num / self.all_num
+        self.reset()
+        return {'acc': acc}
+    def reset(self):
+        # reset metric
+        self.correct_num = 0
+        self.all_num = 0
+```
+3. Import the added module in the [ppocr/metrics/\__init\__.py](../../ppocr/metrics/__init__.py) file.
+After the metric module is added, you only need to configure it in the configuration file to use it, such as:
+```yaml
+Metric:
+  name: MyMetric
+  main_indicator: acc
+```
+## 优化器
+The optimizer is used to train the network. The optimizer also contains network regularization and learning rate decay modules. This part is under [ppocr/optimizer](../../ppocr/optimizer). PaddleOCR has built-in
+Commonly used optimizer modules such as `Momentum`, `Adam` and `RMSProp`, common regularization modules such as `Linear`, `Cosine`, `Step` and `Piecewise`, and common learning rate decay modules such as `L1Decay` and `L2Decay`.
+Modules without built-in can be added through the following steps, take `optimizer` as an example:
+1. Create your own optimizer in the [ppocr/optimizer/optimizer.py](../../ppocr/optimizer/optimizer.py) file, the sample code is as follows:
+```python
+from paddle import optimizer as optim
+class MyOptim(object):
+    def __init__(self, learning_rate=0.001, *args, **kwargs):
+        self.learning_rate = learning_rate
+    def __call__(self, parameters):
+        # It is recommended to wrap the built-in optimizer of paddle
+        opt = optim.XXX(
+            learning_rate=self.learning_rate,
+            parameters=parameters)
+        return opt
+```
+After the optimizer module is added, you only need to configure it in the configuration file to use, such as:
+```yaml
+Optimizer:
+  name: MyOptim
+  args1: args1
+  args2: args2
+  lr:
+    name: Cosine
+    learning_rate: 0.001
+  regularizer:
+    name: 'L2'
+    factor: 0
+```
\ No newline at end of file
--- a/doc/doc_en/config_en.md
+++ b/doc/doc_en/config_en.md
-# OPTIONAL PARAMETERS LIST
+## Optional parameter list
-The following list can be viewed via `--help`
+The following list can be viewed through `--help`
 |         FLAG             |     Supported script    |        Use        |      Defaults       |         Note         |
 | :----------------------: | :------------: | :---------------: | :--------------: | :-----------------: |
-|          -c              |      ALL       |  Specify configuration file to use |  None  |  **Please refer to the parameter introduction for configuration file usage** |
+|          -c              |      ALL       |  Specify configuration file to use  |  None  |  **Please refer to the parameter introduction for configuration file usage** |
-|          -o              |      ALL       |  set configuration options  |  None  |  Configuration using -o has higher priority than the configuration file selected with -c. E.g: `-o Global.use_gpu=false`  |  
+|          -o              |      ALL       |  set configuration options  |  None  |  Configuration using -o has higher priority than the configuration file selected with -c. E.g: -o Global.use_gpu=false |
 ## INTRODUCTION TO GLOBAL PARAMETERS OF CONFIGURATION FILE
-Take `rec_chinese_lite_train_v1.1.yml` as an example
+Take rec_chinese_lite_train_v1.1.yml as an example
+### Global 
-|         Parameter             |            Use                |      Default       |            Note            |
+|         Parameter             |            Use                |      Defaults       |            Note            |
 | :----------------------: |  :---------------------:   | :--------------:  |   :--------------------:   |
-|      algorithm           |    Select algorithm to use                    |  Synchronize with configuration file   |     For selecting model, please refer to the supported model [list](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/README_en.md) |
+|      use_gpu             |    Set using GPU or not           |       true        |                \                 |
-|      use_gpu             |    Set using GPU or not            |       true        |                \                 |
+|      epoch_num           |    Maximum training epoch number             |       500        |                \                 |
-|      epoch_num           |    Maximum training epoch number             |       3000        |                \                 |
 |      log_smooth_window   |    Sliding window size            |       20          |                \                 |
 |      print_batch_step    |    Set print log interval         |       10          |                \                 |
-|      save_model_dir      |    Set model save path        |  output/{model_name}  |                \                 |
+|      save_model_dir      |    Set model save path        |  output/{算法名称}  |                \                 |
 |      save_epoch_step     |    Set model save interval        |       3           |                \                 |
-|      eval_batch_step     |    Set the model evaluation interval        |2000 or [1000, 2000] |runing evaluation every 2000 iters or evaluation is run every 2000 iterations after the 1000th iteration  |
+|      eval_batch_step     |    Set the model evaluation interval        | 2000 or [1000, 2000]        | runing evaluation every 2000 iters or evaluation is run every 2000 iterations after the 1000th iteration   |
-|train_batch_size_per_card |  Set the batch size during training   |         256         |                \                 |
+|      cal_metric_during_train     |    Set whether to evaluate the metric during the training process. At this time, the metric of the model under the current batch is evaluated        |       true         |                \                 |
-| test_batch_size_per_card |  Set the batch size during testing    |         256         |                \                 |
+|      load_static_weights     |   Set whether the pre-training model is saved in static graph mode (currently only required by the detection algorithm)        |       true         |                \                 |
-|      image_shape         |    Set input image size        |   [3, 32, 100]    |                \                 |
+|      pretrained_model    |    Set the path of the pre-trained model      |  ./pretrain_models/CRNN/best_accuracy  |  \          |
-|      max_text_length     |    Set the maximum text length        |       25          |                \                 |
+|      checkpoints         |    set model parameter path            |       None        |   Used to load parameters after interruption to continue training|
-|      character_type      |    Set character type            |       ch          |    en/ch, the default dict will be used for en, and the custom dict will be used for ch|
+|      use_visualdl  |    Set whether to enable visualdl for visual log display |          False        |    [Tutorial](https://www.paddlepaddle.org.cn/paddle/visualdl) |
-|      character_dict_path |    Set dictionary path            |  ./ppocr/utils/ic15_dict.txt  |    \                 |
+|      infer_img            |    Set inference image path or folder path     |       ./infer_img | \|
-|      loss_type           |    Set loss type              |       ctc         |    Supports two types of loss: ctc / attention |
+|      character_dict_path |    Set dictionary path            |  ./ppocr/utils/ppocr_keys_v1.txt  |    \                 |
-|       distort            |    Set use distort          |       false       |  Support distort type ,read [img_tools.py](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/ppocr/data/rec/img_tools.py)                 |
+|      max_text_length     |    Set the maximum length of text        |       25          |                \                 |
-|      use_space_char          |    Wether to recognize space             |        false      |         Only support in character_type=ch mode                 |
+|      character_type      |    Set character type            |       ch          |    en/ch, the default dict will be used for en, and the custom dict will be used for ch |
-     label_list          | Set the angle supported by the direction classifier | ['0','180'] | Only valid in the direction classifier |
+|      use_space_char     |    Set whether to recognize spaces             |        True      |          Only support in character_type=ch mode                 |
-|      reader_yml          |    Set the reader configuration file          |  ./configs/rec/rec_icdar15_reader.yml  |  \          |
+|      label_list          |    Set the angle supported by the direction classifier       |    ['0','180']    |     Only valid in angle classifier model |
-|      pretrain_weights    |    Load pre-trained model path      |  ./pretrain_models/CRNN/best_accuracy  |  \          |
+|      save_res_path          |    Set the save address of the test model results       |    ./output/det_db/predicts_db.txt    |     Only valid in the text detection model |
-|      checkpoints         |    Load saved model path            |       None        |    Used to load saved parameters to continue training after interruption |
-|      save_inference_dir  |   path to save model for inference |          None        |   Use to save inference model |
+### Optimizer ([ppocr/optimizer](../../ppocr/optimizer))
-## INTRODUCTION TO READER PARAMETERS OF CONFIGURATION FILE
+|         Parameter             |            Use            |      Defaults        |            Note             |
+| :---------------------: |  :---------------------:   | :--------------:  |   :--------------------:   |
-Take `rec_chinese_reader.yml` as an example:
+|      name        |         Optimizer class name          |  Adam  |  Currently supports`Momentum`,`Adam`,`RMSProp`, see [ppocr/optimizer/optimizer.py](../../ppocr/optimizer/optimizer.py)  |
+|      beta1           |    Set the exponential decay rate for the 1st moment estimates  |       0.9         |               \             |
-|         Parameter             |            Use                |      Default       |            Note            |
+|      beta2           |    Set the exponential decay rate for the 2nd moment estimates  |     0.999         |               \             |
-| :----------------------: |  :---------------------:   | :--------------:  |   :--------------------:   |
+|      **lr**                |         Set the learning rate decay method       |   -    |       \  |
-|      reader_function     |    Select data reading method        |  ppocr.data.rec.dataset_traversal,SimpleReader  | Support two data reading methods: SimpleReader / LMDBReader  |
+|        name    |      Learning rate decay class name   |         Cosine       | Currently supports`Linear`,`Cosine`,`Step`,`Piecewise`, see[ppocr/optimizer/learning_rate.py](../../ppocr/optimizer/learning_rate.py) |
-|      num_workers             |    Set the number of data reading threads            |       8        |                \                 |
+|        learning_rate      |    Set the base learning rate        |       0.001      |  \        |
-|      img_set_dir          |    Image folder path             |       ./train_data        |                \                 |
+|      **regularizer**      |  Set network regularization method        |       -      | \        |
-|      label_file_path      |    Groundtruth file path           |       ./train_data/rec_gt_train.txt| \    |
+|        name      |    Regularizer class name      |       L2     |  Currently support`L1`,`L2`, see[ppocr/optimizer/regularizer.py](../../ppocr/optimizer/regularizer.py)        |
-|      infer_img            |    Result folder path     |       ./infer_img | \|
+|        factor      |    Learning rate decay coefficient       |       0.00004     |  \        |
+### Architecture ([ppocr/modeling](../../ppocr/modeling))
+In ppocr, the network is divided into four stages: Transform, Backbone, Neck and Head
+|         Parameter             |            Use            |      Defaults        |            Note             |
+| :---------------------: |  :---------------------:   | :--------------:  |   :--------------------:   |
+|      model_type        |         Network Type          |  rec  |  Currently support`rec`,`det`,`cls`  |
+|      algorithm           |    Model name  |       CRNN         |               See [algorithm_overview](./algorithm_overview.md) for the support list             |
+|      **Transform**           |    Set the transformation method  |       -       |               Currently only recognition algorithms are supported, see [ppocr/modeling/transform](../../ppocr/modeling/transform) for details            |
+|        name    |      Transformation class name   |         TPS       | Currently supports `TPS` |
+|        num_fiducial      |   Number of TPS control points        |       20      |  Ten on the top and bottom       |
+|        loc_lr      |    Localization network learning rate        |       0.1      |  \      |
+|        model_name      |    Localization network size        |       small      |  Currently support`small`,`large`       |
+|      **Backbone**      |  Set the network backbone class name        |       -      | see [ppocr/modeling/backbones](../../ppocr/modeling/backbones)        |
+|        name      |    backbone class name       |       ResNet     | Currently support`MobileNetV3`,`ResNet`        |
+|        layers      |    resnet layers       |       34     |  Currently support18,34,50,101,152,200       |
+|        model_name      |    MobileNetV3 network size       |       small     |  Currently support`small`,`large`       |
+|      **Neck**      |  Set network neck        |       -      | see[ppocr/modeling/necks](../../ppocr/modeling/necks)        |
+|        name      |    neck class name       |       SequenceEncoder     | Currently support`SequenceEncoder`,`DBFPN`        |
+|        encoder_type      |    SequenceEncoder encoder type       |       rnn     |  Currently support`reshape`,`fc`,`rnn`       |
+|        hidden_size      |   rnn number of internal units       |       48     |  \      |
+|        out_channels      |   Number of DBFPN output channels       |       256     |  \      |
+|      **Head**      |  Set the network head        |       -      | see[ppocr/modeling/heads](../../ppocr/modeling/heads)        |
+|        name      |    head class name       |       CTCHead     | Currently support`CTCHead`,`DBHead`,`ClsHead`        |
+|        fc_decay      |    CTCHead regularization coefficient       |       0.0004     |  \      |
+|        k      |   DBHead binarization coefficient       |       50     |  \      |
+|        class_dim      |   ClsHead output category number       |       2     |  \      |
+### Loss ([ppocr/losses](../../ppocr/losses))
+|         Parameter             |            Use            |      Defaults        |            Note             |
+| :---------------------: |  :---------------------:   | :--------------:  |   :--------------------:   |
+|      name        |         loss class name          |  CTCLoss  |  Currently support`CTCLoss`,`DBLoss`,`ClsLoss`  |
+|      balance_loss        |        Whether to balance the number of positive and negative samples in DBLossloss (using OHEM)         |  True  |  \  |
+|      ohem_ratio        |        The negative and positive sample ratio of OHEM in DBLossloss         |  3  |  \  |
+|      main_loss_type        |        The loss used by shrink_map in DBLossloss        |  DiceLoss  |  Currently support`DiceLoss`,`BCELoss`  |
+|      alpha        |        The coefficient of shrink_map_loss in DBLossloss       |  5  |  \  |
+|      beta        |        The coefficient of threshold_map_loss in DBLossloss       |  10  |  \  |
-## INTRODUCTION TO OPTIMIZER PARAMETERS OF CONFIGURATION FILE
+### PostProcess ([ppocr/postprocess](../../ppocr/postprocess))
-Take `rec_icdar15_train.yml` as an example:
+|         Parameter             |            Use            |      Defaults        |            Note             |
+| :---------------------: |  :---------------------:   | :--------------:  |   :--------------------:   |
+|      name        |         Post-processing class name          |  CTCLabelDecode  |  Currently support`CTCLoss`,`AttnLabelDecode`,`DBPostProcess`,`ClsPostProcess`  |
+|      thresh        |        The threshold for binarization of the segmentation map in DBPostProcess         |  0.3  |  \  |
+|      box_thresh        |        The threshold for filtering output boxes in DBPostProcess. Boxes below this threshold will not be output         |  0.7  |  \  |
+|      max_candidates        |        The maximum number of text boxes output in DBPostProcess        |  1000  |   |
+|      unclip_ratio        |        The unclip ratio of the text box in DBPostProcess       |  2.0  |  \  |
+### Metric ([ppocr/metrics](../../ppocr/metrics))
+|         Parameter             |            Use            |      Defaults        |            Note             |
+| :---------------------: |  :---------------------:   | :--------------:  |   :--------------------:   |
+|      name        |         Metric method name          |  CTCLabelDecode  |  Currently support`DetMetric`,`RecMetric`,`ClsMetric`  |
+|      main_indicator        |        Main indicators, used to select the best model        |  acc |  For the detection method is hmean, the recognition and classification method is acc  |
-|         Parameter             |            Use          |      Default        |            None             |
+### Dataset  ([ppocr/data](../../ppocr/data))
+|         Parameter             |            Use            |      Defaults        |            Note             |
 | :---------------------: |  :---------------------:   | :--------------:  |   :--------------------:   |
-|         function        |         Select Optimizer function          |  pocr.optimizer,AdamDecay  |  Only support Adam  |
+|      **dataset**        |         Return one sample per iteration          |  -  |  -  |
-|         base_lr         |      Set the base lr          |       0.0005      |               \             |
+|      name        |        dataset class name         |  SimpleDataSet |   Currently support`SimpleDataSet`,`LMDBDateSet`  |
-|         beta1           |    Set the exponential decay rate for the 1st moment estimates  |       0.9         |               \             |
+|      data_dir        |        Image folder path        |  ./train_data |  \  |
-|         beta2           |    Set the exponential decay rate for the 2nd moment estimates  |     0.999         |               \             |
+|      label_file_list        |        Groundtruth file path         |  ["./train_data/train_list.txt"] | This parameter is not required when dataset is LMDBDateSet   |
-|         decay           |         Whether to use decay       |    \              |               \             |
+|      ratio_list        |        Ratio of data set         |  [1.0] | If there are two train_lists in label_file_list and ratio_list is [0.4,0.6], 40% will be sampled from train_list1, and 60% will be sampled from train_list2 to combine the entire dataset   |
-|      function(decay)    |         Set the decay function       |   cosine_decay    |         Support cosine_decay, cosine_decay_warmup and piecewise_decay            |
+|      transforms        |        List of methods to transform images and labels         |  [DecodeImage,CTCLabelEncode,RecResizeImg,KeepKeys] |   see[ppocr/data/imaug](../../ppocr/data/imaug)  |
-|      step_each_epoch    |      The number of steps in an epoch. Used in cosine_decay/cosine_decay_warmup  |         20       | Calculation: total_image_num / (batch_size_per_card * card_size) |
+|      **loader**        |        dataloader related         |  - |   |
-|        total_epoch      |    The number of epochs. Used in cosine_decay/cosine_decay_warmup      |       1000      | Consistent with Global.epoch_num      |
+|      shuffle        |        Does each epoch disrupt the order of the data set         |  True | \  |
-|        warmup_minibatch      |  Number of steps for linear warmup. Used in cosine_decay_warmup        |       1000      | \        |
+|      batch_size_per_card        |        Single card batch size during training         |  256 | \  |
-|        boundaries      |    The step intervals to reduce learning rate. Used in piecewise_decay       |       -      |  The format is list        |
+|      drop_last        |        Whether to discard the last incomplete mini-batch because the number of samples in the data set cannot be divisible by batch_size        |  True | \  |
-|        decay_rate      |    Learning rate decay rate. Used in piecewise_decay       |       -      |  \        |
+|      num_workers        |        The number of sub-processes used to load data, if it is 0, the sub-process is not started, and the data is loaded in the main process       |  8 | \  |
\ No newline at end of file
--- a/doc/doc_en/tree_en.md
+++ b/doc/doc_en/tree_en.md
--- a/ppocr/data/imaug/__init__.py
+++ b/ppocr/data/imaug/__init__.py
@@ -26,6 +26,9 @@ from .randaugment import RandAugment
 from .operators import *
 from .label_ops import *
+from .east_process import *
+from .sast_process import *
 def transform(data, ops=None):
    """ transform """

--- a/ppocr/data/imaug/east_process.py
+++ b/ppocr/data/imaug/east_process.py
+#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+import math
+import cv2
+import numpy as np
+import json
+import sys
+import os
+__all__ = ['EASTProcessTrain']
+class EASTProcessTrain(object):
+    def __init__(self,
+                 image_shape = [512, 512],
+                 background_ratio = 0.125,
+                 min_crop_side_ratio = 0.1,
+                 min_text_size = 10,
+                 **kwargs):
+        self.input_size = image_shape[1]
+        self.random_scale = np.array([0.5, 1, 2.0, 3.0])
+        self.background_ratio = background_ratio
+        self.min_crop_side_ratio = min_crop_side_ratio
+        self.min_text_size = min_text_size
+    def preprocess(self, im):
+        input_size = self.input_size
+        im_shape = im.shape
+        im_size_min = np.min(im_shape[0:2])
+        im_size_max = np.max(im_shape[0:2])
+        im_scale = float(input_size) / float(im_size_max)
+        im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale)
+        img_mean = [0.485, 0.456, 0.406]
+        img_std = [0.229, 0.224, 0.225]
+        # im = im[:, :, ::-1].astype(np.float32)
+        im = im / 255
+        im -= img_mean
+        im /= img_std
+        new_h, new_w, _ = im.shape
+        im_padded = np.zeros((input_size, input_size, 3), dtype=np.float32)
+        im_padded[:new_h, :new_w, :] = im
+        im_padded = im_padded.transpose((2, 0, 1))
+        im_padded = im_padded[np.newaxis, :]
+        return im_padded, im_scale
+    def rotate_im_poly(self, im, text_polys):
+        """
+        rotate image with 90 / 180 / 270 degre
+        """
+        im_w, im_h = im.shape[1], im.shape[0]
+        dst_im = im.copy()
+        dst_polys = []
+        rand_degree_ratio = np.random.rand()
+        rand_degree_cnt = 1
+        if 0.333 < rand_degree_ratio < 0.666:
+            rand_degree_cnt = 2
+        elif rand_degree_ratio > 0.666:
+            rand_degree_cnt = 3
+        for i in range(rand_degree_cnt):
+            dst_im = np.rot90(dst_im)
+        rot_degree = -90 * rand_degree_cnt
+        rot_angle = rot_degree * math.pi / 180.0
+        n_poly = text_polys.shape[0]
+        cx, cy = 0.5 * im_w, 0.5 * im_h
+        ncx, ncy = 0.5 * dst_im.shape[1], 0.5 * dst_im.shape[0]
+        for i in range(n_poly):
+            wordBB = text_polys[i]
+            poly = []
+            for j in range(4):
+                sx, sy = wordBB[j][0], wordBB[j][1]
+                dx = math.cos(rot_angle) * (sx - cx)\
+                    - math.sin(rot_angle) * (sy - cy) + ncx
+                dy = math.sin(rot_angle) * (sx - cx)\
+                    + math.cos(rot_angle) * (sy - cy) + ncy
+                poly.append([dx, dy])
+            dst_polys.append(poly)
+        dst_polys = np.array(dst_polys, dtype=np.float32)
+        return dst_im, dst_polys
+    def polygon_area(self, poly):
+        """
+        compute area of a polygon
+        :param poly:
+        :return:
+        """
+        edge = [(poly[1][0] - poly[0][0]) * (poly[1][1] + poly[0][1]),
+                (poly[2][0] - poly[1][0]) * (poly[2][1] + poly[1][1]),
+                (poly[3][0] - poly[2][0]) * (poly[3][1] + poly[2][1]),
+                (poly[0][0] - poly[3][0]) * (poly[0][1] + poly[3][1])]
+        return np.sum(edge) / 2.
+    def check_and_validate_polys(self, polys, tags, img_height, img_width):
+        """
+        check so that the text poly is in the same direction,
+        and also filter some invalid polygons
+        :param polys:
+        :param tags:
+        :return:
+        """
+        h, w = img_height, img_width
+        if polys.shape[0] == 0:
+            return polys
+        polys[:, :, 0] = np.clip(polys[:, :, 0], 0, w - 1)
+        polys[:, :, 1] = np.clip(polys[:, :, 1], 0, h - 1)
+        validated_polys = []
+        validated_tags = []
+        for poly, tag in zip(polys, tags):
+            p_area = self.polygon_area(poly)
+            #invalid poly
+            if abs(p_area) < 1:
+                continue
+            if p_area > 0:
+                #'poly in wrong direction'
+                if not tag:
+                    tag = True  #reversed cases should be ignore
+                poly = poly[(0, 3, 2, 1), :]
+            validated_polys.append(poly)
+            validated_tags.append(tag)
+        return np.array(validated_polys), np.array(validated_tags)
+    def draw_img_polys(self, img, polys):
+        if len(img.shape) == 4:
+            img = np.squeeze(img, axis=0)
+        if img.shape[0] == 3:
+            img = img.transpose((1, 2, 0))
+            img[:, :, 2] += 123.68
+            img[:, :, 1] += 116.78
+            img[:, :, 0] += 103.94
+        cv2.imwrite("tmp.jpg", img)
+        img = cv2.imread("tmp.jpg")
+        for box in polys:
+            box = box.astype(np.int32).reshape((-1, 1, 2))
+            cv2.polylines(img, [box], True, color=(255, 255, 0), thickness=2)
+        import random
+        ino = random.randint(0, 100)
+        cv2.imwrite("tmp_%d.jpg" % ino, img)
+        return
+    def shrink_poly(self, poly, r):
+        """
+        fit a poly inside the origin poly, maybe bugs here...
+        used for generate the score map
+        :param poly: the text poly
+        :param r: r in the paper
+        :return: the shrinked poly
+        """
+        # shrink ratio
+        R = 0.3
+        # find the longer pair
+        dist0 = np.linalg.norm(poly[0] - poly[1])
+        dist1 = np.linalg.norm(poly[2] - poly[3])
+        dist2 = np.linalg.norm(poly[0] - poly[3])
+        dist3 = np.linalg.norm(poly[1] - poly[2])
+        if dist0 + dist1 > dist2 + dist3:
+            # first move (p0, p1), (p2, p3), then (p0, p3), (p1, p2)
+            ## p0, p1
+            theta = np.arctan2((poly[1][1] - poly[0][1]),
+                               (poly[1][0] - poly[0][0]))
+            poly[0][0] += R * r[0] * np.cos(theta)
+            poly[0][1] += R * r[0] * np.sin(theta)
+            poly[1][0] -= R * r[1] * np.cos(theta)
+            poly[1][1] -= R * r[1] * np.sin(theta)
+            ## p2, p3
+            theta = np.arctan2((poly[2][1] - poly[3][1]),
+                               (poly[2][0] - poly[3][0]))
+            poly[3][0] += R * r[3] * np.cos(theta)
+            poly[3][1] += R * r[3] * np.sin(theta)
+            poly[2][0] -= R * r[2] * np.cos(theta)
+            poly[2][1] -= R * r[2] * np.sin(theta)
+            ## p0, p3
+            theta = np.arctan2((poly[3][0] - poly[0][0]),
+                               (poly[3][1] - poly[0][1]))
+            poly[0][0] += R * r[0] * np.sin(theta)
+            poly[0][1] += R * r[0] * np.cos(theta)
+            poly[3][0] -= R * r[3] * np.sin(theta)
+            poly[3][1] -= R * r[3] * np.cos(theta)
+            ## p1, p2
+            theta = np.arctan2((poly[2][0] - poly[1][0]),
+                               (poly[2][1] - poly[1][1]))
+            poly[1][0] += R * r[1] * np.sin(theta)
+            poly[1][1] += R * r[1] * np.cos(theta)
+            poly[2][0] -= R * r[2] * np.sin(theta)
+            poly[2][1] -= R * r[2] * np.cos(theta)
+        else:
+            ## p0, p3
+            # print poly
+            theta = np.arctan2((poly[3][0] - poly[0][0]),
+                               (poly[3][1] - poly[0][1]))
+            poly[0][0] += R * r[0] * np.sin(theta)
+            poly[0][1] += R * r[0] * np.cos(theta)
+            poly[3][0] -= R * r[3] * np.sin(theta)
+            poly[3][1] -= R * r[3] * np.cos(theta)
+            ## p1, p2
+            theta = np.arctan2((poly[2][0] - poly[1][0]),
+                               (poly[2][1] - poly[1][1]))
+            poly[1][0] += R * r[1] * np.sin(theta)
+            poly[1][1] += R * r[1] * np.cos(theta)
+            poly[2][0] -= R * r[2] * np.sin(theta)
+            poly[2][1] -= R * r[2] * np.cos(theta)
+            ## p0, p1
+            theta = np.arctan2((poly[1][1] - poly[0][1]),
+                               (poly[1][0] - poly[0][0]))
+            poly[0][0] += R * r[0] * np.cos(theta)
+            poly[0][1] += R * r[0] * np.sin(theta)
+            poly[1][0] -= R * r[1] * np.cos(theta)
+            poly[1][1] -= R * r[1] * np.sin(theta)
+            ## p2, p3
+            theta = np.arctan2((poly[2][1] - poly[3][1]),
+                               (poly[2][0] - poly[3][0]))
+            poly[3][0] += R * r[3] * np.cos(theta)
+            poly[3][1] += R * r[3] * np.sin(theta)
+            poly[2][0] -= R * r[2] * np.cos(theta)
+            poly[2][1] -= R * r[2] * np.sin(theta)
+        return poly
+    def generate_quad(self, im_size, polys, tags):
+        """
+        Generate quadrangle.
+        """
+        h, w = im_size
+        poly_mask = np.zeros((h, w), dtype=np.uint8)
+        score_map = np.zeros((h, w), dtype=np.uint8)
+        # (x1, y1, ..., x4, y4, short_edge_norm)
+        geo_map = np.zeros((h, w, 9), dtype=np.float32)
+        # mask used during traning, to ignore some hard areas
+        training_mask = np.ones((h, w), dtype=np.uint8)
+        for poly_idx, poly_tag in enumerate(zip(polys, tags)):
+            poly = poly_tag[0]
+            tag = poly_tag[1]
+            r = [None, None, None, None]
+            for i in range(4):
+                dist1 = np.linalg.norm(poly[i] - poly[(i + 1) % 4])
+                dist2 = np.linalg.norm(poly[i] - poly[(i - 1) % 4])
+                r[i] = min(dist1, dist2)
+            # score map
+            shrinked_poly = self.shrink_poly(
+                poly.copy(), r).astype(np.int32)[np.newaxis, :, :]
+            cv2.fillPoly(score_map, shrinked_poly, 1)
+            cv2.fillPoly(poly_mask, shrinked_poly, poly_idx + 1)
+            # if the poly is too small, then ignore it during training
+            poly_h = min(
+                np.linalg.norm(poly[0] - poly[3]),
+                np.linalg.norm(poly[1] - poly[2]))
+            poly_w = min(
+                np.linalg.norm(poly[0] - poly[1]),
+                np.linalg.norm(poly[2] - poly[3]))
+            if min(poly_h, poly_w) < self.min_text_size:
+                cv2.fillPoly(training_mask,
+                             poly.astype(np.int32)[np.newaxis, :, :], 0)
+            if tag:
+                cv2.fillPoly(training_mask,
+                             poly.astype(np.int32)[np.newaxis, :, :], 0)
+            xy_in_poly = np.argwhere(poly_mask == (poly_idx + 1))
+            # geo map.
+            y_in_poly = xy_in_poly[:, 0]
+            x_in_poly = xy_in_poly[:, 1]
+            poly[:, 0] = np.minimum(np.maximum(poly[:, 0], 0), w)
+            poly[:, 1] = np.minimum(np.maximum(poly[:, 1], 0), h)
+            for pno in range(4):
+                geo_channel_beg = pno * 2
+                geo_map[y_in_poly, x_in_poly, geo_channel_beg] =\
+                    x_in_poly - poly[pno, 0]
+                geo_map[y_in_poly, x_in_poly, geo_channel_beg+1] =\
+                    y_in_poly - poly[pno, 1]
+            geo_map[y_in_poly, x_in_poly, 8] = \
+                1.0 / max(min(poly_h, poly_w), 1.0)
+        return score_map, geo_map, training_mask
+    def crop_area(self,
+                  im,
+                  polys,
+                  tags,
+                  crop_background=False,
+                  max_tries=50):
+        """
+        make random crop from the input image
+        :param im:
+        :param polys:
+        :param tags:
+        :param crop_background:
+        :param max_tries:
+        :return:
+        """
+        h, w, _ = im.shape
+        pad_h = h // 10
+        pad_w = w // 10
+        h_array = np.zeros((h + pad_h * 2), dtype=np.int32)
+        w_array = np.zeros((w + pad_w * 2), dtype=np.int32)
+        for poly in polys:
+            poly = np.round(poly, decimals=0).astype(np.int32)
+            minx = np.min(poly[:, 0])
+            maxx = np.max(poly[:, 0])
+            w_array[minx + pad_w:maxx + pad_w] = 1
+            miny = np.min(poly[:, 1])
+            maxy = np.max(poly[:, 1])
+            h_array[miny + pad_h:maxy + pad_h] = 1
+        # ensure the cropped area not across a text
+        h_axis = np.where(h_array == 0)[0]
+        w_axis = np.where(w_array == 0)[0]
+        if len(h_axis) == 0 or len(w_axis) == 0:
+            return im, polys, tags
+        for i in range(max_tries):
+            xx = np.random.choice(w_axis, size=2)
+            xmin = np.min(xx) - pad_w
+            xmax = np.max(xx) - pad_w
+            xmin = np.clip(xmin, 0, w - 1)
+            xmax = np.clip(xmax, 0, w - 1)
+            yy = np.random.choice(h_axis, size=2)
+            ymin = np.min(yy) - pad_h
+            ymax = np.max(yy) - pad_h
+            ymin = np.clip(ymin, 0, h - 1)
+            ymax = np.clip(ymax, 0, h - 1)
+            if xmax - xmin < self.min_crop_side_ratio * w or \
+               ymax - ymin < self.min_crop_side_ratio * h:
+                # area too small
+                continue
+            if polys.shape[0] != 0:
+                poly_axis_in_area = (polys[:, :, 0] >= xmin)\
+                    & (polys[:, :, 0] <= xmax)\
+                    & (polys[:, :, 1] >= ymin)\
+                    & (polys[:, :, 1] <= ymax)
+                selected_polys = np.where(
+                    np.sum(poly_axis_in_area, axis=1) == 4)[0]
+            else:
+                selected_polys = []
+            if len(selected_polys) == 0:
+                # no text in this area
+                if crop_background:
+                    im = im[ymin:ymax + 1, xmin:xmax + 1, :]
+                    polys = []
+                    tags = []
+                    return im, polys, tags
+                else:
+                    continue
+            im = im[ymin:ymax + 1, xmin:xmax + 1, :]
+            polys = polys[selected_polys]
+            tags = tags[selected_polys]
+            polys[:, :, 0] -= xmin
+            polys[:, :, 1] -= ymin
+            return im, polys, tags
+        return im, polys, tags
+    def crop_background_infor(self, im, text_polys, text_tags):
+        im, text_polys, text_tags = self.crop_area(
+            im, text_polys, text_tags, crop_background=True)
+        if len(text_polys) > 0:
+            return None
+        # pad and resize image
+        input_size = self.input_size
+        im, ratio = self.preprocess(im)
+        score_map = np.zeros((input_size, input_size), dtype=np.float32)
+        geo_map = np.zeros((input_size, input_size, 9), dtype=np.float32)
+        training_mask = np.ones((input_size, input_size), dtype=np.float32)
+        return im, score_map, geo_map, training_mask
+    def crop_foreground_infor(self, im, text_polys, text_tags):
+        im, text_polys, text_tags = self.crop_area(
+            im, text_polys, text_tags, crop_background=False)
+        if text_polys.shape[0] == 0:
+            return None
+        #continue for all ignore case
+        if np.sum((text_tags * 1.0)) >= text_tags.size:
+            return None
+        # pad and resize image
+        input_size = self.input_size
+        im, ratio = self.preprocess(im)
+        text_polys[:, :, 0] *= ratio
+        text_polys[:, :, 1] *= ratio
+        _, _, new_h, new_w = im.shape
+        #         print(im.shape)
+        #         self.draw_img_polys(im, text_polys)
+        score_map, geo_map, training_mask = self.generate_quad(
+            (new_h, new_w), text_polys, text_tags)
+        return im, score_map, geo_map, training_mask
+    def __call__(self, data):
+        im = data['image']
+        text_polys = data['polys']
+        text_tags = data['ignore_tags']
+        if im is None:
+            return None
+        if text_polys.shape[0] == 0:
+            return None
+        #add rotate cases
+        if np.random.rand() < 0.5:
+            im, text_polys = self.rotate_im_poly(im, text_polys)
+        h, w, _ = im.shape
+        text_polys, text_tags = self.check_and_validate_polys(text_polys,
+                                                              text_tags, h, w)
+        if text_polys.shape[0] == 0:
+            return None
+        # random scale this image
+        rd_scale = np.random.choice(self.random_scale)
+        im = cv2.resize(im, dsize=None, fx=rd_scale, fy=rd_scale)
+        text_polys *= rd_scale
+        if np.random.rand() < self.background_ratio:
+            outs = self.crop_background_infor(im, text_polys, text_tags)
+        else:
+            outs = self.crop_foreground_infor(im, text_polys, text_tags)
+        if outs is None:
+            return None
+        im, score_map, geo_map, training_mask = outs
+        score_map = score_map[np.newaxis, ::4, ::4].astype(np.float32)
+        geo_map = np.swapaxes(geo_map, 1, 2)
+        geo_map = np.swapaxes(geo_map, 1, 0)
+        geo_map = geo_map[:, ::4, ::4].astype(np.float32)
+        training_mask = training_mask[np.newaxis, ::4, ::4]
+        training_mask = training_mask.astype(np.float32)
+        data['image'] = im[0]
+        data['score_map'] = score_map
+        data['geo_map'] = geo_map
+        data['training_mask'] = training_mask
+        # print(im.shape, score_map.shape, geo_map.shape, training_mask.shape)
+        return data
\ No newline at end of file
--- a/ppocr/data/imaug/label_ops.py
+++ b/ppocr/data/imaug/label_ops.py
@@ -52,6 +52,7 @@ class DetLabelEncode(object):
                txt_tags.append(True)
            else:
                txt_tags.append(False)
+        boxes = self.expand_points_num(boxes)
        boxes = np.array(boxes, dtype=np.float32)
        txt_tags = np.array(txt_tags, dtype=np.bool)
@@ -70,6 +71,17 @@ class DetLabelEncode(object):
        rect[3] = pts[np.argmax(diff)]
        return rect
+    def expand_points_num(self, boxes):
+        max_points_num = 0
+        for box in boxes:
+            if len(box) > max_points_num:
+                max_points_num = len(box)
+        ex_boxes = []
+        for box in boxes:
+            ex_box = box + [box[-1]] * (max_points_num - len(box))
+            ex_boxes.append(ex_box)
+        return ex_boxes
 class BaseRecLabelEncode(object):
    """ Convert between text-label and text-index """
@@ -83,7 +95,7 @@ class BaseRecLabelEncode(object):
            'ch', 'en', 'en_sensitive', 'french', 'german', 'japan', 'korean'
        ]
        assert character_type in support_character_type, "Only {} are supported now but get {}".format(
-            support_character_type, self.character_str)
+            support_character_type, character_type)
        self.max_text_len = max_text_length
        if character_type == "en":

--- a/ppocr/data/imaug/operators.py
+++ b/ppocr/data/imaug/operators.py
@@ -122,26 +122,37 @@ class DetResizeForTest(object):
        if 'limit_side_len' in kwargs:
            self.limit_side_len = kwargs['limit_side_len']
            self.limit_type = kwargs.get('limit_type', 'min')
+        if 'resize_long' in kwargs:
+            self.resize_type = 2
+            self.resize_long = kwargs.get('resize_long', 960)
        else:
            self.limit_side_len = 736
            self.limit_type = 'min'
    def __call__(self, data):
        img = data['image']
+        src_h, src_w, _ = img.shape
        if self.resize_type == 0:
-            img, shape = self.resize_image_type0(img)
+            # img, shape = self.resize_image_type0(img)
+            img, [ratio_h, ratio_w] = self.resize_image_type0(img)
+        elif self.resize_type == 2:
+            img, [ratio_h, ratio_w] = self.resize_image_type2(img)
        else:
-            img, shape = self.resize_image_type1(img)
+            # img, shape = self.resize_image_type1(img)
+            img, [ratio_h, ratio_w] = self.resize_image_type1(img)
        data['image'] = img
-        data['shape'] = shape
+        data['shape'] = np.array([src_h, src_w, ratio_h, ratio_w])
        return data
    def resize_image_type1(self, img):
        resize_h, resize_w = self.image_shape
        ori_h, ori_w = img.shape[:2]  # (h, w, c)
+        ratio_h = float(resize_h) / ori_h
+        ratio_w = float(resize_w) / ori_w
        img = cv2.resize(img, (int(resize_w), int(resize_h)))
-        return img, np.array([ori_h, ori_w])
+        # return img, np.array([ori_h, ori_w])
+        return img, [ratio_h, ratio_w]
    def resize_image_type0(self, img):
        """
@@ -184,4 +195,31 @@ class DetResizeForTest(object):
        except:
            print(img.shape, resize_w, resize_h)
            sys.exit(0)
-        return img, np.array([h, w])
+        ratio_h = resize_h / float(h)
+        ratio_w = resize_w / float(w)
+        # return img, np.array([h, w])
+        return img, [ratio_h, ratio_w]
+    def resize_image_type2(self, img):
+        h, w, _ = img.shape
+        resize_w = w
+        resize_h = h
+        # Fix the longer side
+        if resize_h > resize_w:
+            ratio = float(self.resize_long) / resize_h
+        else:
+            ratio = float(self.resize_long) / resize_w
+        resize_h = int(resize_h * ratio)
+        resize_w = int(resize_w * ratio)
+        max_stride = 128
+        resize_h = (resize_h + max_stride - 1) // max_stride * max_stride
+        resize_w = (resize_w + max_stride - 1) // max_stride * max_stride
+        img = cv2.resize(img, (int(resize_w), int(resize_h)))
+        ratio_h = resize_h / float(h)
+        ratio_w = resize_w / float(w)
+        return img, [ratio_h, ratio_w]
--- a/ppocr/data/imaug/sast_process.py
+++ b/ppocr/data/imaug/sast_process.py
--- a/ppocr/losses/__init__.py
+++ b/ppocr/losses/__init__.py
@@ -18,6 +18,8 @@ import copy
 def build_loss(config):
    # det loss
    from .det_db_loss import DBLoss
+    from .det_east_loss import EASTLoss
+    from .det_sast_loss import SASTLoss
    # rec loss
    from .rec_ctc_loss import CTCLoss
@@ -25,7 +27,7 @@ def build_loss(config):
    # cls loss
    from .cls_loss import ClsLoss
-    support_dict = ['DBLoss', 'CTCLoss', 'ClsLoss']
+    support_dict = ['DBLoss', 'EASTLoss', 'SASTLoss', 'CTCLoss', 'ClsLoss']
    config = copy.deepcopy(config)
    module_name = config.pop('name')

--- a/ppocr/losses/det_east_loss.py
+++ b/ppocr/losses/det_east_loss.py
+# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import paddle
+from paddle import nn
+from .det_basic_loss import DiceLoss
+class EASTLoss(nn.Layer):
+    """
+    """
+    def __init__(self,
+                 eps=1e-6,
+                 **kwargs):
+        super(EASTLoss, self).__init__()
+        self.dice_loss = DiceLoss(eps=eps)
+    def forward(self, predicts, labels):
+        l_score, l_geo, l_mask = labels[1:]
+        f_score = predicts['f_score']
+        f_geo = predicts['f_geo']
+        dice_loss = self.dice_loss(f_score, l_score, l_mask)
+        #smoooth_l1_loss
+        channels = 8
+        l_geo_split = paddle.split(
+            l_geo, num_or_sections=channels + 1, axis=1)
+        f_geo_split = paddle.split(f_geo, num_or_sections=channels, axis=1)
+        smooth_l1 = 0
+        for i in range(0, channels):
+            geo_diff = l_geo_split[i] - f_geo_split[i]
+            abs_geo_diff = paddle.abs(geo_diff)
+            smooth_l1_sign = paddle.less_than(abs_geo_diff, l_score)
+            smooth_l1_sign = paddle.cast(smooth_l1_sign, dtype='float32')
+            in_loss = abs_geo_diff * abs_geo_diff * smooth_l1_sign + \
+                (abs_geo_diff - 0.5) * (1.0 - smooth_l1_sign)
+            out_loss = l_geo_split[-1] / channels * in_loss * l_score
+            smooth_l1 += out_loss
+        smooth_l1_loss = paddle.mean(smooth_l1 * l_score)
+        dice_loss = dice_loss * 0.01
+        total_loss = dice_loss + smooth_l1_loss
+        losses = {"loss":total_loss, \
+                  "dice_loss":dice_loss,\
+                  "smooth_l1_loss":smooth_l1_loss}
+        return losses
--- a/ppocr/losses/det_sast_loss.py
+++ b/ppocr/losses/det_sast_loss.py
+# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import paddle
+from paddle import nn
+from .det_basic_loss import DiceLoss
+import paddle.fluid as fluid
+import numpy as np
+class SASTLoss(nn.Layer):
+    """
+    """
+    def __init__(self,
+                 eps=1e-6,
+                 **kwargs):
+        super(SASTLoss, self).__init__()
+        self.dice_loss = DiceLoss(eps=eps)
+    def forward(self, predicts, labels):
+        """
+        tcl_pos: N x 128 x 3
+        tcl_mask: N x 128 x 1
+        tcl_label: N x X list or LoDTensor
+        """
+        f_score = predicts['f_score']
+        f_border = predicts['f_border']
+        f_tvo = predicts['f_tvo']
+        f_tco = predicts['f_tco']
+        l_score, l_border, l_mask, l_tvo, l_tco = labels[1:]
+        #score_loss
+        intersection = paddle.sum(f_score * l_score * l_mask)
+        union = paddle.sum(f_score * l_mask) + paddle.sum(l_score * l_mask)
+        score_loss = 1.0 - 2 * intersection / (union + 1e-5)
+        #border loss
+        l_border_split, l_border_norm = paddle.split(l_border, num_or_sections=[4, 1], axis=1)
+        f_border_split = f_border
+        border_ex_shape = l_border_norm.shape * np.array([1, 4, 1, 1])
+        l_border_norm_split = paddle.expand(x=l_border_norm, shape=border_ex_shape)
+        l_border_score = paddle.expand(x=l_score, shape=border_ex_shape)   
+        l_border_mask = paddle.expand(x=l_mask, shape=border_ex_shape)  
+        border_diff = l_border_split - f_border_split
+        abs_border_diff = paddle.abs(border_diff) 
+        border_sign = abs_border_diff < 1.0
+        border_sign = paddle.cast(border_sign, dtype='float32')
+        border_sign.stop_gradient = True
+        border_in_loss = 0.5 * abs_border_diff * abs_border_diff * border_sign + \
+                    (abs_border_diff - 0.5) * (1.0 - border_sign)
+        border_out_loss = l_border_norm_split * border_in_loss
+        border_loss = paddle.sum(border_out_loss * l_border_score * l_border_mask) / \
+                    (paddle.sum(l_border_score * l_border_mask) + 1e-5)
+        #tvo_loss
+        l_tvo_split, l_tvo_norm = paddle.split(l_tvo, num_or_sections=[8, 1], axis=1)
+        f_tvo_split = f_tvo
+        tvo_ex_shape = l_tvo_norm.shape * np.array([1, 8, 1, 1])
+        l_tvo_norm_split = paddle.expand(x=l_tvo_norm, shape=tvo_ex_shape)
+        l_tvo_score = paddle.expand(x=l_score, shape=tvo_ex_shape)   
+        l_tvo_mask = paddle.expand(x=l_mask, shape=tvo_ex_shape)   
+        #
+        tvo_geo_diff = l_tvo_split - f_tvo_split
+        abs_tvo_geo_diff = paddle.abs(tvo_geo_diff) 
+        tvo_sign = abs_tvo_geo_diff < 1.0
+        tvo_sign = paddle.cast(tvo_sign, dtype='float32')
+        tvo_sign.stop_gradient = True
+        tvo_in_loss = 0.5 * abs_tvo_geo_diff * abs_tvo_geo_diff * tvo_sign + \
+                    (abs_tvo_geo_diff - 0.5) * (1.0 - tvo_sign)
+        tvo_out_loss = l_tvo_norm_split * tvo_in_loss
+        tvo_loss = paddle.sum(tvo_out_loss * l_tvo_score * l_tvo_mask) / \
+                    (paddle.sum(l_tvo_score * l_tvo_mask) + 1e-5)
+        #tco_loss
+        l_tco_split, l_tco_norm = paddle.split(l_tco, num_or_sections=[2, 1], axis=1)
+        f_tco_split = f_tco
+        tco_ex_shape = l_tco_norm.shape * np.array([1, 2, 1, 1])
+        l_tco_norm_split = paddle.expand(x=l_tco_norm, shape=tco_ex_shape)
+        l_tco_score = paddle.expand(x=l_score, shape=tco_ex_shape)   
+        l_tco_mask = paddle.expand(x=l_mask, shape=tco_ex_shape) 
+        tco_geo_diff = l_tco_split - f_tco_split
+        abs_tco_geo_diff = paddle.abs(tco_geo_diff) 
+        tco_sign = abs_tco_geo_diff < 1.0
+        tco_sign = paddle.cast(tco_sign, dtype='float32')
+        tco_sign.stop_gradient = True
+        tco_in_loss = 0.5 * abs_tco_geo_diff * abs_tco_geo_diff * tco_sign + \
+                    (abs_tco_geo_diff - 0.5) * (1.0 - tco_sign)
+        tco_out_loss = l_tco_norm_split * tco_in_loss
+        tco_loss = paddle.sum(tco_out_loss * l_tco_score * l_tco_mask) / \
+                    (paddle.sum(l_tco_score * l_tco_mask) + 1e-5)
+        # total loss
+        tvo_lw, tco_lw = 1.5, 1.5
+        score_lw, border_lw = 1.0, 1.0
+        total_loss = score_loss * score_lw + border_loss * border_lw + \
+                    tvo_loss * tvo_lw + tco_loss * tco_lw
+        losses = {'loss':total_loss, "score_loss":score_loss,\
+            "border_loss":border_loss, 'tvo_loss':tvo_loss, 'tco_loss':tco_loss}
+        return losses
\ No newline at end of file
--- a/ppocr/modeling/backbones/__init__.py
+++ b/ppocr/modeling/backbones/__init__.py
@@ -19,6 +19,7 @@ def build_backbone(config, model_type):
    if model_type == 'det':
        from .det_mobilenet_v3 import MobileNetV3
        from .det_resnet_vd import ResNet
+        from .det_resnet_vd_sast import ResNet_SAST
        support_dict = ['MobileNetV3', 'ResNet', 'ResNet_SAST']
    elif model_type == 'rec' or model_type == 'cls':
        from .rec_mobilenet_v3 import MobileNetV3

--- a/ppocr/modeling/backbones/det_resnet_vd_sast.py
+++ b/ppocr/modeling/backbones/det_resnet_vd_sast.py
--- a/ppocr/modeling/heads/__init__.py
+++ b/ppocr/modeling/heads/__init__.py
--- a/ppocr/modeling/heads/det_east_head.py
+++ b/ppocr/modeling/heads/det_east_head.py
--- a/ppocr/modeling/heads/det_sast_head.py
+++ b/ppocr/modeling/heads/det_sast_head.py
--- a/ppocr/modeling/necks/__init__.py
+++ b/ppocr/modeling/necks/__init__.py
--- a/ppocr/modeling/necks/east_fpn.py
+++ b/ppocr/modeling/necks/east_fpn.py
--- a/ppocr/modeling/necks/sast_fpn.py
+++ b/ppocr/modeling/necks/sast_fpn.py
--- a/ppocr/postprocess/__init__.py
+++ b/ppocr/postprocess/__init__.py
--- a/ppocr/postprocess/east_postprocess.py
+++ b/ppocr/postprocess/east_postprocess.py
--- a/ppocr/postprocess/locality_aware_nms.py
+++ b/ppocr/postprocess/locality_aware_nms.py
--- a/ppocr/postprocess/rec_postprocess.py
+++ b/ppocr/postprocess/rec_postprocess.py
--- a/ppocr/postprocess/sast_postprocess.py
+++ b/ppocr/postprocess/sast_postprocess.py
--- a/ppocr/utils/character.py
+++ b/ppocr/utils/character.py
--- a/ppocr/utils/check.py
+++ b/ppocr/utils/check.py
--- a/tools/infer/utility.py
+++ b/tools/infer/utility.py