Merge pull request #7049 from WenmuZhou/tipc1

add benckmark

Merge pull request #7049 from WenmuZhou/tipc1
add benckmark
342522ab · u010070587 · GitHub · 1696b36b · dd1e17fd · 342522ab
28 changed file
--- a/configs/det/det_mv3_db.yml
+++ b/configs/det/det_mv3_db.yml
@@ -101,7 +101,7 @@ Train:
    drop_last: False
    batch_size_per_card: 16
    num_workers: 8
-    use_shared_memory: False
+    use_shared_memory: True

 Eval:
  dataset:
@@ -129,4 +129,4 @@ Eval:
    drop_last: False
    batch_size_per_card: 1 # must be 1
    num_workers: 8
-    use_shared_memory: False
+    use_shared_memory: True
--- a/test_tipc/configs/ch_PP-OCRv2_det/train_infer_python.txt
+++ b/test_tipc/configs/ch_PP-OCRv2_det/train_infer_python.txt
@@ -13,7 +13,7 @@ train_infer_img_dir:./train_data/icdar2015/text_localization/ch4_test_images/
 null:null
 ##
 trainer:norm_train
-norm_train:tools/train.py -c configs/det/ch_PP-OCRv2/ch_PP-OCRv2_det_cml.yml -o 
+norm_train:tools/train.py -c configs/det/ch_PP-OCRv2/ch_PP-OCRv2_det_cml.yml -o Global.print_batch_step=1 Train.loader.shuffle=false
 pact_train:null
 fpgm_train:null
 distill_train:null
@@ -51,3 +51,9 @@ null:null
 null:null
 ===========================infer_benchmark_params==========================
 random_infer_input:[{float32,[3,640,640]}];[{float32,[3,960,960]}]
+===========================train_benchmark_params==========================
+batch_size:8
+fp_items:fp32|fp16
+epoch:2
+--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
+flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096
--- a/test_tipc/configs/ch_PP-OCRv2_rec/ch_PP-OCRv2_rec_distillation.yml
+++ b/test_tipc/configs/ch_PP-OCRv2_rec/ch_PP-OCRv2_rec_distillation.yml
@@ -6,7 +6,7 @@ Global:
  print_batch_step: 10
  save_model_dir: ./output/rec_pp-OCRv2_distillation
  save_epoch_step: 3
-  eval_batch_step: [0, 2000]
+  eval_batch_step: [0, 200000]
  cal_metric_during_train: true
  pretrained_model:
  checkpoints:
@@ -114,7 +114,7 @@ Train:
    name: SimpleDataSet
    data_dir: ./train_data/ic15_data/
    label_file_list:
-    - ./train_data/ic15_data/rec_gt_train.txt
+    - ./train_data/ic15_data/rec_gt_train4w.txt
    transforms:
    - DecodeImage:
        img_mode: BGR

--- a/test_tipc/configs/ch_PP-OCRv2_rec/train_infer_python.txt
+++ b/test_tipc/configs/ch_PP-OCRv2_rec/train_infer_python.txt
@@ -13,7 +13,7 @@ train_infer_img_dir:./inference/rec_inference
 null:null
 ##
 trainer:norm_train
-norm_train:tools/train.py -c test_tipc/configs/ch_PP-OCRv2_rec/ch_PP-OCRv2_rec_distillation.yml -o 
+norm_train:tools/train.py -c test_tipc/configs/ch_PP-OCRv2_rec/ch_PP-OCRv2_rec_distillation.yml -o Global.print_batch_step=4 Train.loader.shuffle=false
 pact_train:null
 fpgm_train:null
 distill_train:null
@@ -51,3 +51,9 @@ null:null
 null:null
 ===========================infer_benchmark_params==========================
 random_infer_input:[{float32,[3,32,320]}]
+===========================train_benchmark_params==========================
+batch_size:64
+fp_items:fp32|fp16
+epoch:1
+--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
+flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096
--- a/test_tipc/configs/ch_PP-OCRv3_det/train_infer_python.txt
+++ b/test_tipc/configs/ch_PP-OCRv3_det/train_infer_python.txt
@@ -13,7 +13,7 @@ train_infer_img_dir:./train_data/icdar2015/text_localization/ch4_test_images/
 null:null
 ##
 trainer:norm_train
-norm_train:tools/train.py -c configs/det/ch_PP-OCRv3/ch_PP-OCRv3_det_cml.yml -o 
+norm_train:tools/train.py -c configs/det/ch_PP-OCRv3/ch_PP-OCRv3_det_cml.yml -o Global.print_batch_step=1 Train.loader.shuffle=false Global.eval_batch_step=[4000,400]
 pact_train:null
 fpgm_train:null
 distill_train:null
@@ -51,3 +51,9 @@ null:null
 null:null
 ===========================infer_benchmark_params==========================
 random_infer_input:[{float32,[3,640,640]}];[{float32,[3,960,960]}]
+===========================train_benchmark_params==========================
+batch_size:8
+fp_items:fp32|fp16
+epoch:2
+--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
+flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096
--- a/test_tipc/configs/ch_PP-OCRv3_rec/ch_PP-OCRv3_rec_distillation.yml
+++ b/test_tipc/configs/ch_PP-OCRv3_rec/ch_PP-OCRv3_rec_distillation.yml
@@ -153,7 +153,7 @@ Train:
    data_dir: ./train_data/ic15_data/
    ext_op_transform_idx: 1
    label_file_list:
-    - ./train_data/ic15_data/rec_gt_train_lite.txt
+    - ./train_data/ic15_data/rec_gt_train4w.txt
    transforms:
    - DecodeImage:
        img_mode: BGR
@@ -183,7 +183,7 @@ Eval:
    name: SimpleDataSet
    data_dir: ./train_data/ic15_data
    label_file_list:
-    - ./train_data/ic15_data/rec_gt_test_lite.txt
+    - ./train_data/ic15_data/rec_gt_test.txt
    transforms:
    - DecodeImage:
        img_mode: BGR

--- a/test_tipc/configs/ch_PP-OCRv3_rec/train_infer_python.txt
+++ b/test_tipc/configs/ch_PP-OCRv3_rec/train_infer_python.txt
@@ -13,7 +13,7 @@ train_infer_img_dir:./inference/rec_inference
 null:null
 ##
 trainer:norm_train
-norm_train:tools/train.py -c test_tipc/configs/ch_PP-OCRv3_rec/ch_PP-OCRv3_rec_distillation.yml -o 
+norm_train:tools/train.py -c test_tipc/configs/ch_PP-OCRv3_rec/ch_PP-OCRv3_rec_distillation.yml -o Global.print_batch_step=1 Train.loader.shuffle=false
 pact_train:null
 fpgm_train:null
 distill_train:null
@@ -51,3 +51,9 @@ null:null
 null:null
 ===========================infer_benchmark_params==========================
 random_infer_input:[{float32,[3,48,320]}]
+===========================train_benchmark_params==========================
+batch_size:128
+fp_items:fp32|fp16
+epoch:1
+--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
+flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096
--- a/test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_infer_python.txt
+++ b/test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_infer_python.txt
@@ -13,7 +13,7 @@ train_infer_img_dir:./train_data/icdar2015/text_localization/ch4_test_images/
 null:null
 ##
 trainer:norm_train
-norm_train:tools/train.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o Global.pretrained_model=./pretrain_models/MobileNetV3_large_x0_5_pretrained
+norm_train:tools/train.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o Global.pretrained_model=./pretrain_models/MobileNetV3_large_x0_5_pretrained Global.print_batch_step=1 Train.loader.shuffle=false
 pact_train:null
 fpgm_train:null
 distill_train:null
@@ -50,4 +50,10 @@ null:null
 --benchmark:True
 null:null
 ===========================infer_benchmark_params==========================
-random_infer_input:[{float32,[3,640,640]}];[{float32,[3,960,960]}]
\ No newline at end of file
+random_infer_input:[{float32,[3,640,640]}];[{float32,[3,960,960]}]
+===========================train_benchmark_params==========================
+batch_size:8
+fp_items:fp32|fp16
+epoch:2
+--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
+flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096
--- a/test_tipc/configs/ch_ppocr_mobile_v2.0_rec/train_infer_python.txt
+++ b/test_tipc/configs/ch_ppocr_mobile_v2.0_rec/train_infer_python.txt
@@ -13,7 +13,7 @@ train_infer_img_dir:./inference/rec_inference
 null:null
 ##
 trainer:norm_train
-norm_train:tools/train.py -c configs/rec/rec_icdar15_train.yml -o
+norm_train:tools/train.py -c configs/rec/rec_icdar15_train.yml -o Global.print_batch_step=4 Train.loader.shuffle=false
 pact_train:null
 fpgm_train:null
 distill_train:null
@@ -51,3 +51,9 @@ inference:tools/infer/predict_rec.py
 null:null
 ===========================infer_benchmark_params==========================
 random_infer_input:[{float32,[3,32,100]}]
+===========================train_benchmark_params==========================
+batch_size:256
+fp_items:fp32|fp16
+epoch:3
+--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
+flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096
--- a/test_tipc/configs/ch_ppocr_server_v2.0_det/det_r50_vd_db.yml
+++ b/test_tipc/configs/ch_ppocr_server_v2.0_det/det_r50_vd_db.yml
@@ -2,13 +2,13 @@ Global:
  use_gpu: false
  epoch_num: 5
  log_smooth_window: 20
-  print_batch_step: 1
+  print_batch_step: 2
  save_model_dir: ./output/db_mv3/
  save_epoch_step: 1200
  # evaluation is run every 2000 iterations
-  eval_batch_step: [0, 400]
+  eval_batch_step: [0, 30000]
  cal_metric_during_train: False
-  pretrained_model: ./pretrain_models/MobileNetV3_large_x0_5_pretrained
+  pretrained_model:
  checkpoints:
  save_inference_dir:
  use_visualdl: False

--- a/test_tipc/configs/ch_ppocr_server_v2.0_det/train_infer_python.txt
+++ b/test_tipc/configs/ch_ppocr_server_v2.0_det/train_infer_python.txt
@@ -13,7 +13,7 @@ train_infer_img_dir:./train_data/icdar2015/text_localization/ch4_test_images/
 null:null
 ##
 trainer:norm_train
-norm_train:tools/train.py -c test_tipc/configs/ch_ppocr_server_v2.0_det/det_r50_vd_db.yml -o 
+norm_train:tools/train.py -c test_tipc/configs/ch_ppocr_server_v2.0_det/det_r50_vd_db.yml -o
 quant_train:null
 fpgm_train:null
 distill_train:null
@@ -50,4 +50,10 @@ inference:tools/infer/predict_det.py
 --benchmark:True
 null:null
 ===========================infer_benchmark_params==========================
-random_infer_input:[{float32,[3,640,640]}];[{float32,[3,960,960]}]
\ No newline at end of file
+random_infer_input:[{float32,[3,640,640]}];[{float32,[3,960,960]}]
+===========================train_benchmark_params==========================
+batch_size:8
+fp_items:fp32|fp16
+epoch:2
+--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
+flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096
--- a/test_tipc/configs/ch_ppocr_server_v2.0_rec/train_infer_python.txt
+++ b/test_tipc/configs/ch_ppocr_server_v2.0_rec/train_infer_python.txt
@@ -13,7 +13,7 @@ train_infer_img_dir:./inference/rec_inference
 null:null
 ##
 trainer:norm_train
-norm_train:tools/train.py -c test_tipc/configs/ch_ppocr_server_v2.0_rec/rec_icdar15_train.yml -o
+norm_train:tools/train.py -c test_tipc/configs/ch_ppocr_server_v2.0_rec/rec_icdar15_train.yml -o Global.print_batch_step=4 Train.loader.shuffle=false
 pact_train:null
 fpgm_train:null
 distill_train:null
@@ -51,3 +51,9 @@ inference:tools/infer/predict_rec.py
 null:null
 ===========================infer_benchmark_params==========================
 random_infer_input:[{float32,[3,32,100]}]
+===========================train_benchmark_params==========================
+batch_size:256
+fp_items:fp32|fp16
+epoch:2
+--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
+flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096
--- a/test_tipc/configs/det_mv3_db_v2_0/train_infer_python.txt
+++ b/test_tipc/configs/det_mv3_db_v2_0/train_infer_python.txt
@@ -13,7 +13,7 @@ train_infer_img_dir:./train_data/icdar2015/text_localization/ch4_test_images/
 null:null
 ##
 trainer:norm_train
-norm_train:tools/train.py -c configs/det/det_mv3_db.yml -o Global.pretrained_model=./pretrain_models/MobileNetV3_large_x0_5_pretrained
+norm_train:tools/train.py -c configs/det/det_mv3_db.yml -o Global.pretrained_model=./pretrain_models/MobileNetV3_large_x0_5_pretrained Global.print_batch_step=1 Train.loader.shuffle=false
 pact_train:null
 fpgm_train:null
 distill_train:null
@@ -52,8 +52,8 @@ null:null
 ===========================infer_benchmark_params==========================
 random_infer_input:[{float32,[3,640,640]}];[{float32,[3,960,960]}]
 ===========================train_benchmark_params==========================
-batch_size:8|16
+batch_size:16
 fp_items:fp32|fp16
-epoch:15
+epoch:4
 --profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
-flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096
+flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096
\ No newline at end of file
--- a/test_tipc/configs/det_r50_db_v2.0/train_infer_python.txt
+++ b/test_tipc/configs/det_r50_db_v2.0/train_infer_python.txt
@@ -13,7 +13,7 @@ train_infer_img_dir:./train_data/icdar2015/text_localization/ch4_test_images/
 null:null
 ##
 trainer:norm_train
-norm_train:tools/train.py -c configs/det/det_r50_vd_db.yml -o 
+norm_train:tools/train.py -c configs/det/det_r50_vd_db.yml -o Global.print_batch_step=1 Train.loader.shuffle=false
 quant_export:null
 fpgm_export:null
 distill_train:null
@@ -50,4 +50,10 @@ inference:tools/infer/predict_det.py
 --benchmark:True
 null:null
 ===========================infer_benchmark_params==========================
-random_infer_input:[{float32,[3,640,640]}];[{float32,[3,960,960]}]
\ No newline at end of file
+random_infer_input:[{float32,[3,640,640]}];[{float32,[3,960,960]}]
+===========================train_benchmark_params==========================
+batch_size:8
+fp_items:fp32|fp16
+epoch:2
+--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
+flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096
\ No newline at end of file
--- a/test_tipc/configs/det_r50_dcn_fce_ctw_v2.0/det_r50_vd_dcn_fce_ctw.yml
+++ b/test_tipc/configs/det_r50_dcn_fce_ctw_v2.0/det_r50_vd_dcn_fce_ctw.yml
+Global:
+  use_gpu: true
+  epoch_num: 1500
+  log_smooth_window: 20
+  print_batch_step: 20
+  save_model_dir: ./output/det_r50_dcn_fce_ctw/
+  save_epoch_step: 100
+  # evaluation is run every 835 iterations
+  eval_batch_step: [0, 4000]
+  cal_metric_during_train: False
+  pretrained_model: ./pretrain_models/ResNet50_vd_ssld_pretrained 
+  checkpoints: 
+  save_inference_dir: 
+  use_visualdl: False
+  infer_img: doc/imgs_en/img_10.jpg
+  save_res_path: ./output/det_fce/predicts_fce.txt
+
+
+Architecture:
+  model_type: det
+  algorithm: FCE
+  Transform:
+  Backbone:
+    name: ResNet_vd
+    layers: 50
+    dcn_stage: [False, True, True, True]
+    out_indices: [1,2,3]
+  Neck:
+    name: FCEFPN
+    out_channels: 256
+    has_extra_convs: False
+    extra_stage: 0
+  Head:
+    name: FCEHead
+    fourier_degree: 5
+Loss:
+  name: FCELoss
+  fourier_degree: 5
+  num_sample: 50
+  
+Optimizer:
+  name: Adam
+  beta1: 0.9
+  beta2: 0.999
+  lr:
+    learning_rate: 0.0001
+  regularizer:
+    name: 'L2'
+    factor: 0
+
+PostProcess:
+  name: FCEPostProcess
+  scales: [8, 16, 32]
+  alpha: 1.0
+  beta: 1.0
+  fourier_degree: 5
+  box_type: 'poly'
+
+Metric:
+  name: DetFCEMetric
+  main_indicator: hmean
+
+Train:
+  dataset:
+    name: SimpleDataSet
+    data_dir: ./train_data/icdar2015/text_localization/
+    label_file_list:
+      - ./train_data/icdar2015/text_localization/train_icdar2015_label.txt
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+          ignore_orientation: True
+      - DetLabelEncode: # Class handling label
+      - ColorJitter: 
+          brightness: 0.142
+          saturation: 0.5
+          contrast: 0.5
+      - RandomScaling: 
+      - RandomCropFlip:
+          crop_ratio: 0.5
+      - RandomCropPolyInstances:
+          crop_ratio: 0.8
+          min_side_ratio: 0.3
+      - RandomRotatePolyInstances:
+          rotate_ratio: 0.5
+          max_angle: 30
+          pad_with_fixed_color: False
+      - SquareResizePad:
+          target_size: 800
+          pad_ratio: 0.6
+      - IaaAugment:
+          augmenter_args:
+            - { 'type': Fliplr, 'args': { 'p': 0.5 } }
+      - FCENetTargets:
+          fourier_degree: 5
+      - NormalizeImage:
+          scale: 1./255.
+          mean: [0.485, 0.456, 0.406]
+          std: [0.229, 0.224, 0.225]
+          order: 'hwc'
+      - ToCHWImage:
+      - KeepKeys:
+          keep_keys: ['image', 'p3_maps', 'p4_maps', 'p5_maps'] # dataloader will return list in this order
+  loader:
+    shuffle: True
+    drop_last: False
+    batch_size_per_card: 6
+    num_workers: 8
+
+Eval:
+  dataset:
+    name: SimpleDataSet
+    data_dir: ./train_data/icdar2015/text_localization/
+    label_file_list:
+      - ./train_data/icdar2015/text_localization/test_icdar2015_label.txt
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+          ignore_orientation: True
+      - DetLabelEncode: # Class handling label
+      - DetResizeForTest:
+          limit_type: 'min'
+          limit_side_len: 736
+      - NormalizeImage:
+          scale: 1./255.
+          mean: [0.485, 0.456, 0.406]
+          std: [0.229, 0.224, 0.225]
+          order: 'hwc'
+      - Pad: 
+      - ToCHWImage:
+      - KeepKeys:
+          keep_keys: ['image', 'shape', 'polys', 'ignore_tags']
+  loader:
+    shuffle: False
+    drop_last: False
+    batch_size_per_card: 1 # must be 1
+    num_workers: 2
\ No newline at end of file
--- a/test_tipc/configs/det_r50_dcn_fce_ctw_v2.0/train_infer_python.txt
+++ b/test_tipc/configs/det_r50_dcn_fce_ctw_v2.0/train_infer_python.txt
+===========================train_params===========================
+model_name:det_r50_dcn_fce_ctw_v2.0
+python:python3.7
+gpu_list:0
+Global.use_gpu:True|True
+Global.auto_cast:fp32
+Global.epoch_num:lite_train_lite_infer=1|whole_train_whole_infer=500
+Global.save_model_dir:./output/
+Train.loader.batch_size_per_card:lite_train_lite_infer=2|whole_train_whole_infer=4
+Global.pretrained_model:null
+train_model_name:latest
+train_infer_img_dir:./train_data/icdar2015/text_localization/ch4_test_images/
+null:null
+##
+trainer:norm_train
+norm_train:tools/train.py -c test_tipc/configs/det_r50_dcn_fce_ctw_v2.0/det_r50_vd_dcn_fce_ctw.yml -o Global.print_batch_step=1 Train.loader.shuffle=false
+pact_train:null
+fpgm_train:null
+distill_train:null
+null:null
+null:null
+##
+===========================eval_params=========================== 
+eval:null
+null:null
+##
+===========================infer_params===========================
+Global.save_inference_dir:./output/
+Global.checkpoints:
+norm_export:tools/export_model.py -c test_tipc/configs/det_r50_dcn_fce_ctw_v2.0/det_r50_vd_dcn_fce_ctw.yml -o 
+quant_export:null 
+fpgm_export:null
+distill_export:null
+export1:null
+export2:null
+##
+train_model:./inference/det_r50_dcn_fce_ctw_v2.0_train/best_accuracy
+infer_export:tools/export_model.py -c test_tipc/configs/det_r50_dcn_fce_ctw_v2.0/det_r50_vd_dcn_fce_ctw.yml  -o
+infer_quant:False
+inference:tools/infer/predict_det.py
+--use_gpu:True|False
+--enable_mkldnn:False
+--cpu_threads:6
+--rec_batch_num:1
+--use_tensorrt:False
+--precision:fp32
+--det_model_dir:
+--image_dir:./inference/ch_det_data_50/all-sum-510/
+--save_log_path:null
+--benchmark:True
+--det_algorithm:FCE
+===========================infer_benchmark_params==========================
+random_infer_input:[{float32,[3,640,640]}];[{float32,[3,960,960]}]
+===========================train_benchmark_params==========================
+batch_size:6
+fp_items:fp32|fp16
+epoch:1
+--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
+flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096
\ No newline at end of file
--- a/test_tipc/configs/det_r50_vd_east_v2_0/det_r50_vd_east.yml
+++ b/test_tipc/configs/det_r50_vd_east_v2_0/det_r50_vd_east.yml
@@ -20,7 +20,7 @@ Architecture:
  algorithm: EAST
  Transform:
  Backbone:
-    name: ResNet
+    name: ResNet_vd
    layers: 50
  Neck:
    name: EASTFPN

--- a/test_tipc/configs/det_r50_vd_east_v2_0/train_infer_python.txt
+++ b/test_tipc/configs/det_r50_vd_east_v2_0/train_infer_python.txt
@@ -13,7 +13,7 @@ train_infer_img_dir:./train_data/icdar2015/text_localization/ch4_test_images/
 null:null
 ##
 trainer:norm_train
-norm_train:tools/train.py -c test_tipc/configs/det_r50_vd_east_v2_0/det_r50_vd_east.yml -o 
+norm_train:tools/train.py -c test_tipc/configs/det_r50_vd_east_v2_0/det_r50_vd_east.yml -o Global.pretrained_model=pretrain_models/det_r50_vd_east_v2.0_train/best_accuracy.pdparams Global.print_batch_step=1 Train.loader.shuffle=false
 pact_train:null
 fpgm_train:null
 distill_train:null
@@ -55,4 +55,5 @@ random_infer_input:[{float32,[3,640,640]}];[{float32,[3,960,960]}]
 batch_size:8
 fp_items:fp32|fp16
 epoch:2
--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
\ No newline at end of file
+--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
+flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096
\ No newline at end of file
--- a/test_tipc/configs/det_r50_vd_pse_v2_0/det_r50_vd_pse.yml
+++ b/test_tipc/configs/det_r50_vd_pse_v2_0/det_r50_vd_pse.yml
@@ -8,7 +8,7 @@ Global:
  # evaluation is run every 125 iterations
  eval_batch_step: [ 0,1000 ]
  cal_metric_during_train: False
-  pretrained_model: 
+  pretrained_model: ./pretrain_models/ResNet50_vd_ssld_pretrained
  checkpoints: #./output/det_r50_vd_pse_batch8_ColorJitter/best_accuracy
  save_inference_dir:
  use_visualdl: False
@@ -20,7 +20,7 @@ Architecture:
  algorithm: PSE
  Transform:
  Backbone:
-    name: ResNet
+    name: ResNet_vd
    layers: 50
  Neck:
    name: FPN

--- a/test_tipc/configs/det_r50_vd_pse_v2_0/train_infer_python.txt
+++ b/test_tipc/configs/det_r50_vd_pse_v2_0/train_infer_python.txt
@@ -13,7 +13,7 @@ train_infer_img_dir:./train_data/icdar2015/text_localization/ch4_test_images/
 null:null
 ##
 trainer:norm_train
-norm_train:tools/train.py -c test_tipc/configs/det_r50_vd_pse_v2_0/det_r50_vd_pse.yml -o 
+norm_train:tools/train.py -c test_tipc/configs/det_r50_vd_pse_v2_0/det_r50_vd_pse.yml -o Global.print_batch_step=1 Train.loader.shuffle=false
 pact_train:null
 fpgm_train:null
 distill_train:null
@@ -54,5 +54,6 @@ random_infer_input:[{float32,[3,640,640]}];[{float32,[3,960,960]}]
 ===========================train_benchmark_params==========================
 batch_size:8
 fp_items:fp32|fp16
-epoch:10
+epoch:2
 --profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
+flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096
\ No newline at end of file
--- a/test_tipc/configs/en_table_structure/table_mv3.yml
+++ b/test_tipc/configs/en_table_structure/table_mv3.yml
@@ -6,7 +6,7 @@ Global:
  save_model_dir: ./output/table_mv3/
  save_epoch_step: 3
  # evaluation is run every 400 iterations after the 0th iteration
-  eval_batch_step: [0, 400]
+  eval_batch_step: [0, 40000]
  cal_metric_during_train: True
  pretrained_model:
  checkpoints:

--- a/test_tipc/configs/en_table_structure/train_infer_python.txt
+++ b/test_tipc/configs/en_table_structure/train_infer_python.txt
@@ -13,7 +13,7 @@ train_infer_img_dir:./ppstructure/docs/table/table.jpg
 null:null
 ##
 trainer:norm_train
-norm_train:tools/train.py -c test_tipc/configs/en_table_structure/table_mv3.yml -o 
+norm_train:tools/train.py -c test_tipc/configs/en_table_structure/table_mv3.yml -o Global.print_batch_step=1 Train.loader.shuffle=false
 pact_train:null
 fpgm_train:null
 distill_train:null
@@ -27,7 +27,7 @@ null:null
 ===========================infer_params===========================
 Global.save_inference_dir:./output/
 Global.checkpoints:
-norm_export:tools/export_model.py -c test_tipc/configs/en_table_structure/table_mv3.yml -o 
+norm_export:tools/export_model.py -c test_tipc/configs/en_table_structure/table_mv3.yml -o
 quant_export:
 fpgm_export: 
 distill_export:null
@@ -51,3 +51,9 @@ null:null
 null:null
 ===========================infer_benchmark_params==========================
 random_infer_input:[{float32,[3,488,488]}]
+===========================train_benchmark_params==========================
+batch_size:32
+fp_items:fp32|fp16
+epoch:1
+--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
+flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096
--- a/test_tipc/configs/rec_mv3_none_bilstm_ctc_v2.0/rec_icdar15_train.yml
+++ b/test_tipc/configs/rec_mv3_none_bilstm_ctc_v2.0/rec_icdar15_train.yml
@@ -6,7 +6,7 @@ Global:
  save_model_dir: ./output/rec/mv3_none_bilstm_ctc/
  save_epoch_step: 3
  # evaluation is run every 2000 iterations
-  eval_batch_step: [0, 2000]
+  eval_batch_step: [0, 20000]
  cal_metric_during_train: True
  pretrained_model:
  checkpoints:

--- a/test_tipc/configs/rec_mv3_none_bilstm_ctc_v2.0/train_infer_python.txt
+++ b/test_tipc/configs/rec_mv3_none_bilstm_ctc_v2.0/train_infer_python.txt
@@ -13,7 +13,7 @@ train_infer_img_dir:./inference/rec_inference
 null:null
 ##
 trainer:norm_train
-norm_train:tools/train.py -c test_tipc/configs/rec_mv3_none_bilstm_ctc_v2.0/rec_icdar15_train.yml -o
+norm_train:tools/train.py -c test_tipc/configs/rec_mv3_none_bilstm_ctc_v2.0/rec_icdar15_train.yml -o Global.print_batch_step=4 Train.loader.shuffle=false
 pact_train:null
 fpgm_train:null
 distill_train:null
@@ -50,4 +50,10 @@ inference:tools/infer/predict_rec.py --rec_char_dict_path=./ppocr/utils/ic15_dic
 --benchmark:True
 null:null
 ===========================infer_benchmark_params==========================
-random_infer_input:[{float32,[3,32,100]}]
\ No newline at end of file
+random_infer_input:[{float32,[3,32,100]}]
+===========================train_benchmark_params==========================
+batch_size:256
+fp_items:fp32|fp16
+epoch:4
+--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
+flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096
--- a/test_tipc/docs/benchmark_train.md
+++ b/test_tipc/docs/benchmark_train.md
@@ -51,3 +51,25 @@ train_log/
    ├── PaddleOCR_det_mv3_db_v2_0_bs8_fp32_SingleP_DP_N1C1_log
    └── PaddleOCR_det_mv3_db_v2_0_bs8_fp32_SingleP_DP_N1C4_log
 ```
+## 3. 各模型单卡性能数据一览
+
+*注：本节中的速度指标均使用单卡（1块Nvidia V100 16G GPU）测得。通常情况下。
+
+
+|模型名称|配置文件|大数据集 float32 fps |小数据集 float32 fps |diff |大数据集 float16 fps|小数据集 float16 fps| diff | 大数据集大小 | 小数据集大小 |
+|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|
+| ch_ppocr_mobile_v2.0_det |[config](../configs/ch_ppocr_mobile_v2.0_det/train_infer_python.txt) | 53.836 | 53.343 / 53.914 / 52.785 |0.020940758 | 45.574 | 45.57 / 46.292 / 46.213 | 0.015596647 | 10,000| 2,000|
+| ch_ppocr_mobile_v2.0_rec |[config](../configs/ch_ppocr_mobile_v2.0_rec/train_infer_python.txt) | 2083.311 | 2043.194	/ 2066.372 / 2093.317 |0.023944295 | 2153.261 | 2167.561 /	2165.726 /	2155.614| 0.005511725 | 600,000| 160,000|
+| ch_ppocr_server_v2.0_det |[config](../configs/ch_ppocr_server_v2.0_det/train_infer_python.txt) | 20.716 | 20.739 /	20.807 /	20.755 |0.003268131 | 20.592 | 20.498 /	20.993 /	20.75| 0.023579288 | 10,000| 2,000|
+| ch_ppocr_server_v2.0_rec |[config](../configs/ch_ppocr_server_v2.0_rec/train_infer_python.txt) | 528.56 | 528.386 /	528.991 /	528.391 |0.001143687 | 1189.788 | 1190.007 /	1176.332 /	1192.084| 0.013213834 |  600,000| 160,000|
+| ch_PP-OCRv2_det	 |[config](../configs/ch_PP-OCRv2_det/train_infer_python.txt) | 13.87 | 13.386 /	13.529 /	13.428 |0.010569887 | 17.847 | 17.746 /	17.908 /	17.96| 0.011915367 | 10,000| 2,000|
+| ch_PP-OCRv2_rec	 |[config](../configs/ch_PP-OCRv2_rec/train_infer_python.txt) | 109.248 | 106.32 /	106.318 /	108.587 |0.020895687 | 117.491 | 117.62 /	117.757 /	117.726| 0.001163413 | 140,000| 40,000|
+| det_mv3_db_v2.0	 |[config](../configs/det_mv3_db_v2_0/train_infer_python.txt) | 61.802 | 62.078 /	61.802 /	62.008 |0.00444602 | 82.947 | 84.294 /	84.457 /	84.005| 0.005351836 | 10,000| 2,000|
+| det_r50_vd_db_v2.0	 |[config](../configs/det_r50_vd_db_v2.0/train_infer_python.txt) | 29.955 | 29.092 /	29.31 /	28.844 |0.015899011 | 51.097 |50.367 /	50.879 /	50.227| 0.012814717 | 10,000| 2,000|
+| det_r50_vd_east_v2.0	 |[config](../configs/det_r50_vd_east_v2.0/train_infer_python.txt) | 42.485 | 42.624 /	42.663 /	42.561 |0.00239083 | 67.61 |67.825/ 	68.299/ 	68.51| 0.00999854 | 10,000| 2,000|
+| det_r50_vd_pse_v2.0	 |[config](../configs/det_r50_vd_pse_v2.0/train_infer_python.txt) | 16.455 | 16.517 / 16.555 /	16.353 |0.012201752 | 27.02 |27.288 /	27.152 /	27.408| 0.009340339 | 10,000| 2,000|
+| rec_mv3_none_bilstm_ctc_v2.0	 |[config](../configs/rec_mv3_none_bilstm_ctc_v2.0/train_infer_python.txt) | 2288.358 | 2291.906 /	2293.725 /	2290.05 |0.001602197 | 2336.17 |2327.042 /	2328.093 /	2344.915| 0.007622025 | 600,000| 160,000|
+| PP-Structure-table	 |[config](../configs/en_table_structure/train_infer_python.txt) | 14.151 | 14.077 /	14.23 /	14.25 |0.012140351 | 16.285 | 16.595 /	16.878 /	16.531 | 0.020559308 | 20,000| 5,000|
+| det_r50_dcn_fce_ctw_v2.0	 |[config](../configs/det_r50_dcn_fce_ctw_v2.0/train_infer_python.txt) | 14.057 | 14.029 /	14.02 /	14.014 |0.001069214 | 18.298 |18.411 /	18.376 /	18.331| 0.004345228 | 10,000| 2,000|
+| ch_PP-OCRv3_det	 |[config](../configs/ch_PP-OCRv3_det/train_infer_python.txt) | 8.622 | 8.431 /	8.423 /	8.479|0.006604552 | 14.203 |14.346	14.468	14.23| 0.016450097 | 10,000| 2,000|
+| ch_PP-OCRv3_rec	 |[config](../configs/ch_PP-OCRv3_rec/train_infer_python.txt) | 73.627 | 72.46 /	73.575 /	73.704|0.016878324 | | |  | 160,000| 40,000|
\ No newline at end of file
--- a/test_tipc/prepare.sh
+++ b/test_tipc/prepare.sh
@@ -22,27 +22,79 @@ trainer_list=$(func_parser_value "${lines[14]}")

 if [ ${MODE} = "benchmark_train" ];then
    pip install -r requirements.txt
-    if [[ ${model_name} =~ "det_mv3_db_v2_0" || ${model_name} =~ "det_r50_vd_pse_v2_0" || ${model_name} =~ "det_r18_db_v2_0" ]];then
-        rm -rf ./train_data/icdar2015
+    if [[ ${model_name} =~ "ch_ppocr_mobile_v2.0_det" || ${model_name} =~ "det_mv3_db_v2_0" ]];then
        wget -nc -P  ./pretrain_models/ https://paddleocr.bj.bcebos.com/pretrained/MobileNetV3_large_x0_5_pretrained.pdparams  --no-check-certificate
-        wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/icdar2015.tar --no-check-certificate
-        cd ./train_data/ && tar xf icdar2015.tar && cd ../
+        rm -rf ./train_data/icdar2015
+        wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/dataset/icdar2015_benckmark.tar --no-check-certificate
+        cd ./train_data/ && tar xf icdar2015_benckmark.tar
+        ln -s ./icdar2015_benckmark ./icdar2015
+        cd ../
+    fi
+    if [[ ${model_name} =~ "ch_ppocr_server_v2.0_det" || ${model_name} =~ "ch_PP-OCRv3_det" ]];then
+        rm -rf ./train_data/icdar2015
+        wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/dataset/icdar2015_benckmark.tar --no-check-certificate
+        cd ./train_data/ && tar xf icdar2015_benckmark.tar
+        ln -s ./icdar2015_benckmark ./icdar2015
+        cd ../
+    fi
+    if [[ ${model_name} =~ "ch_PP-OCRv2_det" ]];then
+        wget  -nc -P  ./pretrain_models/  https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_train.tar --no-check-certificate
+        cd ./pretrain_models/ && tar xf ch_ppocr_server_v2.0_det_train.tar  && cd ../
+        rm -rf ./train_data/icdar2015
+        wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/dataset/icdar2015_benckmark.tar --no-check-certificate
+        cd ./train_data/ && tar xf icdar2015_benckmark.tar
+        ln -s ./icdar2015_benckmark ./icdar2015
+        cd ../
    fi
    if [[ ${model_name} =~ "det_r50_vd_east_v2_0" ]]; then
        wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_east_v2.0_train.tar --no-check-certificate
        cd ./pretrain_models/ && tar xf det_r50_vd_east_v2.0_train.tar && cd ../
-        wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/icdar2015.tar --no-check-certificate
-        cd ./train_data/ && tar xf icdar2015.tar && cd ../
+        rm -rf ./train_data/icdar2015
+        wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/dataset/icdar2015_benckmark.tar --no-check-certificate
+        cd ./train_data/ && tar xf icdar2015_benckmark.tar
+        ln -s ./icdar2015_benckmark ./icdar2015
+        cd ../
    fi
-    if [[ ${model_name} =~ "det_r50_vd_pse_v2_0" ]];then
+    if [[ ${model_name} =~ "det_r50_db_v2.0" || ${model_name} =~ "det_r50_vd_pse_v2_0" ]];then
        wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/pretrained/ResNet50_vd_ssld_pretrained.pdparams --no-check-certificate
-        wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/icdar2015.tar --no-check-certificate
-        cd ./train_data/ && tar xf icdar2015.tar && cd ../
+        rm -rf ./train_data/icdar2015
+        wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/dataset/icdar2015_benckmark.tar --no-check-certificate
+        cd ./train_data/ && tar xf icdar2015_benckmark.tar
+        ln -s ./icdar2015_benckmark ./icdar2015
+        cd ../
    fi
    if [[ ${model_name} =~ "det_r18_db_v2_0" ]];then
        wget -nc -P ./pretrain_models/  https://paddleocr.bj.bcebos.com/pretrained/ResNet18_vd_pretrained.pdparams  --no-check-certificate
-        wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/icdar2015.tar --no-check-certificate
-        cd ./train_data/ && tar xf icdar2015.tar && cd ../
+        rm -rf ./train_data/icdar2015
+        wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/dataset/icdar2015_benckmark.tar --no-check-certificate
+        cd ./train_data/ && tar xf icdar2015_benckmark.tar
+        ln -s ./icdar2015_benckmark ./icdar2015
+        cd ../
+    fi
+    if [[ ${model_name} =~ "ch_ppocr_mobile_v2.0_rec" || ${model_name} =~ "ch_ppocr_server_v2.0_rec" || ${model_name} =~ "ch_PP-OCRv2_rec" || ${model_name} =~ "rec_mv3_none_bilstm_ctc_v2.0" || ${model_name} =~ "ch_PP-OCRv3_rec" ]];then
+        rm -rf ./train_data/ic15_data_benckmark
+        wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/dataset/ic15_data_benckmark.tar --no-check-certificate
+        cd ./train_data/ && tar xf ic15_data_benckmark.tar
+        ln -s ./ic15_data_benckmark ./ic15_data
+        cd ../
+    fi
+    if [[ ${model_name} == "en_table_structure" ]];then
+        wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/dygraph_v2.1/table/en_ppocr_mobile_v2.0_table_structure_train.tar --no-check-certificate
+        cd ./pretrain_models/ && tar xf en_ppocr_mobile_v2.0_table_structure_train.tar  && cd ../
+        rm -rf ./train_data/pubtabnet
+        wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/dataset/pubtabnet_benckmark.tar --no-check-certificate
+        cd ./train_data/ && tar xf pubtabnet_benckmark.tar
+        ln -s ./pubtabnet_benckmark ./pubtabnet
+        cd ../
+    fi
+    if [[ ${model_name} == "det_r50_dcn_fce_ctw_v2.0" ]]; then
+        wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/contribution/det_r50_dcn_fce_ctw_v2.0_train.tar --no-check-certificate
+        cd ./pretrain_models/ && tar xf det_r50_dcn_fce_ctw_v2.0_train.tar && cd ../
+        rm -rf ./train_data/icdar2015
+        wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/dataset/icdar2015_benckmark.tar --no-check-certificate
+        cd ./train_data/ && tar xf icdar2015_benckmark.tar
+        ln -s ./icdar2015_benckmark ./icdar2015
+        cd ../
    fi
 fi

@@ -137,6 +189,10 @@ if [ ${MODE} = "lite_train_lite_infer" ];then
        wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_east_v2.0_train.tar --no-check-certificate
        cd ./pretrain_models/ && tar xf det_r50_vd_east_v2.0_train.tar && cd ../
    fi
+    if [ ${model_name} == "det_r50_dcn_fce_ctw_v2.0" ]; then
+        wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/contribution/det_r50_dcn_fce_ctw_v2.0_train.tar --no-check-certificate
+        cd ./pretrain_models/ && tar xf det_r50_dcn_fce_ctw_v2.0_train.tar & cd ../
+    fi

 elif [ ${MODE} = "whole_train_whole_infer" ];then
    wget -nc -P  ./pretrain_models/ https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_large_x0_5_pretrained.pdparams --no-check-certificate
@@ -363,6 +419,10 @@ elif [ ${MODE} = "whole_infer" ];then
        wget -nc -P ./inference/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_r50_vd_east_v2.0_train.tar --no-check-certificate
        cd ./inference/ && tar xf det_r50_vd_east_v2.0_train.tar & cd ../
    fi
+    if [ ${model_name} == "det_r50_dcn_fce_ctw_v2.0" ]; then
+        wget -nc -P ./inference/ https://paddleocr.bj.bcebos.com/contribution/det_r50_dcn_fce_ctw_v2.0_train.tar --no-check-certificate
+        cd ./inference/ && tar xf det_r50_dcn_fce_ctw_v2.0_train.tar & cd ../
+    fi
    if [[ ${model_name} =~ "en_table_structure" ]];then
        wget -nc -P ./inference/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_structure_infer.tar --no-check-certificate
        wget -nc -P ./inference/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_det_infer.tar --no-check-certificate

--- a/tools/program.py
+++ b/tools/program.py
@@ -154,6 +154,24 @@ def check_xpu(use_xpu):
    except Exception as e:
        pass

+def to_float32(preds):
+    if isinstance(preds, dict):
+        for k in preds:
+            if isinstance(preds[k], dict) or isinstance(preds[k], list):
+                preds[k] = to_float32(preds[k])
+            else:
+                preds[k] = preds[k].astype(paddle.float32)
+    elif isinstance(preds, list):
+        for k in range(len(preds)):
+            if isinstance(preds[k], dict):
+                preds[k] = to_float32(preds[k])
+            elif isinstance(preds[k], list):
+                preds[k] = to_float32(preds[k])
+            else:
+                preds[k] = preds[k].astype(paddle.float32)
+    else:
+        preds = preds.astype(paddle.float32)
+    return preds

 def train(config,
          train_dataloader,
@@ -252,13 +270,19 @@ def train(config,

            # use amp
            if scaler:
-                with paddle.amp.auto_cast():
+                with paddle.amp.auto_cast(level='O2'):
                    if model_type == 'table' or extra_input:
                        preds = model(images, data=batch[1:])
                    elif model_type in ["kie", 'vqa']:
                        preds = model(batch)
                    else:
                        preds = model(images)
+                preds = to_float32(preds)
+                loss = loss_class(preds, batch)
+                avg_loss = loss['loss']
+                scaled_avg_loss = scaler.scale(avg_loss)
+                scaled_avg_loss.backward()
+                scaler.minimize(optimizer, scaled_avg_loss)
            else:
                if model_type == 'table' or extra_input:
                    preds = model(images, data=batch[1:])
@@ -266,15 +290,8 @@ def train(config,
                    preds = model(batch)
                else:
                    preds = model(images)
-
-            loss = loss_class(preds, batch)
-            avg_loss = loss['loss']
-
-            if scaler:
-                scaled_avg_loss = scaler.scale(avg_loss)
-                scaled_avg_loss.backward()
-                scaler.minimize(optimizer, scaled_avg_loss)
-            else:
+                loss = loss_class(preds, batch)
+                avg_loss = loss['loss']
                avg_loss.backward()
                optimizer.step()
            optimizer.clear_grad()

--- a/tools/train.py
+++ b/tools/train.py
@@ -157,6 +157,7 @@ def main(config, device, logger, vdl_writer):
        scaler = paddle.amp.GradScaler(
            init_loss_scaling=scale_loss,
            use_dynamic_loss_scaling=use_dynamic_loss_scaling)
+        model, optimizer = paddle.amp.decorate(models=model, optimizers=optimizer, level='O2', master_weight=True)
    else:
        scaler = None