From 20cb993cb6bef73b2134ffd81f645510f9e9629d Mon Sep 17 00:00:00 2001 From: zhoujun Date: Mon, 20 Mar 2023 17:26:19 +0800 Subject: [PATCH] add d2t train support of svtr (#9479) * add d2s train for slanet and v3 * fix bug * udpate tipc to_static * update db * remove_print * update benchmark_train.sh * update maybe_download_params * add d2t train support of svtr --- configs/rec/rec_svtrnet.yml | 1 + configs/rec/rec_svtrnet_ch.yml | 1 + ppocr/modeling/architectures/__init__.py | 11 ++++++++++- .../modeling/transforms/tps_spatial_transformer.py | 6 ++++-- test_tipc/configs/rec_svtrnet/rec_svtrnet.yml | 1 + .../configs/rec_svtrnet/train_infer_python.txt | 8 ++++++++ test_tipc/prepare.sh | 13 +++++++++++++ 7 files changed, 38 insertions(+), 3 deletions(-) diff --git a/configs/rec/rec_svtrnet.yml b/configs/rec/rec_svtrnet.yml index 82b8273a..4657c5c8 100644 --- a/configs/rec/rec_svtrnet.yml +++ b/configs/rec/rec_svtrnet.yml @@ -20,6 +20,7 @@ Global: infer_mode: False use_space_char: False save_res_path: ./output/rec/predicts_svtr_tiny.txt + d2s_train_image_shape: [3, 64, 256] Optimizer: diff --git a/configs/rec/rec_svtrnet_ch.yml b/configs/rec/rec_svtrnet_ch.yml index 597e57fb..bb82cfc5 100644 --- a/configs/rec/rec_svtrnet_ch.yml +++ b/configs/rec/rec_svtrnet_ch.yml @@ -19,6 +19,7 @@ Global: infer_mode: false use_space_char: true save_res_path: ./output/rec/predicts_svtr_tiny_ch_all.txt + d2s_train_image_shape: [3, 32, 320] Optimizer: name: AdamW beta1: 0.9 diff --git a/ppocr/modeling/architectures/__init__.py b/ppocr/modeling/architectures/__init__.py index 1059af23..00220d28 100755 --- a/ppocr/modeling/architectures/__init__.py +++ b/ppocr/modeling/architectures/__init__.py @@ -40,7 +40,9 @@ def apply_to_static(model, config, logger): return model assert "d2s_train_image_shape" in config[ "Global"], "d2s_train_image_shape must be assigned for static training mode..." - supported_list = ["DB", "SVTR_LCNet", "TableMaster", "LayoutXLM", "SLANet"] + supported_list = [ + "DB", "SVTR_LCNet", "TableMaster", "LayoutXLM", "SLANet", "SVTR" + ] if config["Architecture"]["algorithm"] in ["Distillation"]: algo = list(config["Architecture"]["Models"].values())[0]["algorithm"] else: @@ -104,6 +106,13 @@ def apply_to_static(model, config, logger): InputSpec( [None, 6], dtype='float64'), ]) + elif algo == "SVTR": + specs.append([ + InputSpec( + [None, config["Global"]["max_text_length"]], dtype='int64'), + InputSpec( + [None], dtype='int64') + ]) model = to_static(model, input_spec=specs) logger.info("Successfully to apply @to_static with specs: {}".format(specs)) return model diff --git a/ppocr/modeling/transforms/tps_spatial_transformer.py b/ppocr/modeling/transforms/tps_spatial_transformer.py index e7ec2c84..a409d786 100644 --- a/ppocr/modeling/transforms/tps_spatial_transformer.py +++ b/ppocr/modeling/transforms/tps_spatial_transformer.py @@ -140,7 +140,9 @@ class TPSSpatialTransformer(nn.Layer): padding_matrix = paddle.expand( self.padding_matrix, shape=[batch_size, 3, 2]) - Y = paddle.concat([source_control_points, padding_matrix], 1) + Y = paddle.concat([ + source_control_points.astype(padding_matrix.dtype), padding_matrix + ], 1) mapping_matrix = paddle.matmul(self.inverse_kernel, Y) source_coordinate = paddle.matmul(self.target_coordinate_repr, mapping_matrix) @@ -153,4 +155,4 @@ class TPSSpatialTransformer(nn.Layer): # the input to grid_sample is normalized [-1, 1], but what we get is [0, 1] grid = 2.0 * grid - 1.0 output_maps = grid_sample(input, grid, canvas=None) - return output_maps, source_coordinate \ No newline at end of file + return output_maps, source_coordinate diff --git a/test_tipc/configs/rec_svtrnet/rec_svtrnet.yml b/test_tipc/configs/rec_svtrnet/rec_svtrnet.yml index 140b17e0..6e22bc78 100644 --- a/test_tipc/configs/rec_svtrnet/rec_svtrnet.yml +++ b/test_tipc/configs/rec_svtrnet/rec_svtrnet.yml @@ -20,6 +20,7 @@ Global: infer_mode: False use_space_char: False save_res_path: ./output/rec/predicts_svtr_tiny.txt + d2s_train_image_shape: [3, 64, 256] Optimizer: diff --git a/test_tipc/configs/rec_svtrnet/train_infer_python.txt b/test_tipc/configs/rec_svtrnet/train_infer_python.txt index 5508c041..63e6b908 100644 --- a/test_tipc/configs/rec_svtrnet/train_infer_python.txt +++ b/test_tipc/configs/rec_svtrnet/train_infer_python.txt @@ -51,3 +51,11 @@ inference:tools/infer/predict_rec.py --rec_char_dict_path=./ppocr/utils/ic15_dic null:null ===========================infer_benchmark_params========================== random_infer_input:[{float32,[3,64,256]}] +===========================train_benchmark_params========================== +batch_size:512 +fp_items:fp32|fp16 +epoch:2 +--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile +flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096 +===========================to_static_train_benchmark_params=========================== +to_static_train:Global.to_static=true \ No newline at end of file diff --git a/test_tipc/prepare.sh b/test_tipc/prepare.sh index da3ef905..0600005e 100644 --- a/test_tipc/prepare.sh +++ b/test_tipc/prepare.sh @@ -145,6 +145,19 @@ if [ ${MODE} = "benchmark_train" ];then cd ./train_data/ && tar xf StructureLabel_val_500.tar cd ../ fi + if [ ${model_name} == "rec_svtrnet" ]; then + wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/dataset/ic15_data_benckmark.tar --no-check-certificate + cd ./train_data/ && tar xf ic15_data_benckmark.tar + ln -s ./ic15_data_benckmark ./ic15_data + cd ic15_data + mv rec_gt_train4w.txt rec_gt_train.txt + + for i in `seq 10`;do cp rec_gt_train.txt dup$i.txt;done + cat dup* > rec_gt_train.txt && rm -rf dup* + + cd ../ + cd ../ + fi fi if [ ${MODE} = "lite_train_lite_infer" ];then -- GitLab