From 509202a339e1cd8050e194b38c207ca3997f70c8 Mon Sep 17 00:00:00 2001 From: xiongkun Date: Tue, 22 Nov 2022 10:24:07 +0000 Subject: [PATCH] 1. add dystatic config file for 6 additional models. 2. fix some bugs in test_train_dy2static_python.sh --- ...former_tiny_224_train_dy2static_python.txt | 22 +++++++++ .../MobileNetV1_train_dy2static_python.txt | 4 +- .../MobileNetV2_train_dy2static_python.txt | 4 +- ...etV3_large_x1_0_train_dy2static_python.txt | 2 +- .../MobileViT_S_train_dy2static_python.txt | 23 ++++++++++ .../PPHGNet_small_train_dy2static_python.txt | 44 ++++++++++++++++++ .../ResNet152_train_dy2static_python.txt | 22 +++++++++ .../ResNet50_train_dy2static_python.txt | 44 ++++++++++++++++++ ...uffleNetV2_x1_0_train_dy2static_python.txt | 45 +++++++++++++++++++ test_tipc/test_train_dy2static_python.sh | 11 ++--- 10 files changed, 209 insertions(+), 12 deletions(-) create mode 100644 test_tipc/configs/CSWinTransformer/CSWinTransformer_tiny_224_train_dy2static_python.txt create mode 100644 test_tipc/configs/MobileViT/MobileViT_S_train_dy2static_python.txt create mode 100644 test_tipc/configs/PPHGNet/PPHGNet_small_train_dy2static_python.txt create mode 100644 test_tipc/configs/ResNet/ResNet152_train_dy2static_python.txt create mode 100644 test_tipc/configs/ResNet/ResNet50_train_dy2static_python.txt create mode 100644 test_tipc/configs/ShuffleNet/ShuffleNetV2_x1_0_train_dy2static_python.txt diff --git a/test_tipc/configs/CSWinTransformer/CSWinTransformer_tiny_224_train_dy2static_python.txt b/test_tipc/configs/CSWinTransformer/CSWinTransformer_tiny_224_train_dy2static_python.txt new file mode 100644 index 00000000..d36d3fdf --- /dev/null +++ b/test_tipc/configs/CSWinTransformer/CSWinTransformer_tiny_224_train_dy2static_python.txt @@ -0,0 +1,22 @@ +=========================== base_train =========================== +model_name:CSWinTransformer_tiny_224 +python:python3.7 +gpu_list:0|0,1 +-o Global.device:gpu +-o Global.auto_cast:null +-o Global.epochs:lite_train_lite_infer=2|whole_train_whole_infer=120 +-o Global.output_dir:./output/ +-o DataLoader.Train.sampler.batch_size:8 +-o Global.pretrained_model:null +train_model_name:latest +train_infer_img_dir:./dataset/ILSVRC2012/val +null:null +## +trainer:norm_train +norm_train:tools/train.py -c ppcls/configs/ImageNet/CSWinTransformer/CSWinTransformer_tiny_224.yaml -o Global.seed=1234 -o DataLoader.Train.sampler.shuffle=False -o DataLoader.Train.loader.num_workers=0 -o DataLoader.Train.loader.use_shared_memory=False -o Global.print_batch_step=1 -o Global.eval_during_train=False -o Global.save_interval=2 +pact_train:null +fpgm_train:null +distill_train:null +to_static_train:-o Global.to_static=True +null:null +## diff --git a/test_tipc/configs/MobileNetV1/MobileNetV1_train_dy2static_python.txt b/test_tipc/configs/MobileNetV1/MobileNetV1_train_dy2static_python.txt index dc9e01b7..4edaef2d 100644 --- a/test_tipc/configs/MobileNetV1/MobileNetV1_train_dy2static_python.txt +++ b/test_tipc/configs/MobileNetV1/MobileNetV1_train_dy2static_python.txt @@ -12,7 +12,7 @@ train_model_name:latest train_infer_img_dir:./dataset/ILSVRC2012/val null:null ## -trainer:to_static_train +trainer:norm_train norm_train:tools/train.py -c ppcls/configs/ImageNet/MobileNetV2/MobileNetV2.yaml -o Global.seed=1234 -o DataLoader.Train.sampler.shuffle=False -o DataLoader.Train.loader.num_workers=0 -o DataLoader.Train.loader.use_shared_memory=False -o Global.eval_during_train=False -o Global.save_interval=2 -o Global.print_batch_step=1 pact_train:null fpgm_train:null @@ -34,7 +34,7 @@ train_model_name:latest train_infer_img_dir:./dataset/ILSVRC2012/val null:null ## -trainer:to_static_train +trainer:amp_train amp_train:tools/train.py -c ppcls/configs/ImageNet/MobileNetV1/MobileNetV1.yaml -o Global.seed=1234 -o DataLoader.Train.sampler.shuffle=False -o DataLoader.Train.loader.num_workers=0 -o DataLoader.Train.loader.use_shared_memory=False -o AMP.scale_loss=128 -o AMP.use_dynamic_loss_scaling=True -o AMP.level=O2 -o Global.eval_during_train=False -o Global.save_interval=2 -o Global.print_batch_step=1 pact_train:null fpgm_train:null diff --git a/test_tipc/configs/MobileNetV2/MobileNetV2_train_dy2static_python.txt b/test_tipc/configs/MobileNetV2/MobileNetV2_train_dy2static_python.txt index 8ea5fd5a..a236b2da 100644 --- a/test_tipc/configs/MobileNetV2/MobileNetV2_train_dy2static_python.txt +++ b/test_tipc/configs/MobileNetV2/MobileNetV2_train_dy2static_python.txt @@ -12,7 +12,7 @@ train_model_name:latest train_infer_img_dir:./dataset/ILSVRC2012/val null:null ## -trainer:to_static_train +trainer:norm_train norm_train:tools/train.py -c ppcls/configs/ImageNet/MobileNetV2/MobileNetV2.yaml -o Global.seed=1234 -o DataLoader.Train.sampler.shuffle=False -o DataLoader.Train.loader.num_workers=0 -o DataLoader.Train.loader.use_shared_memory=False -o Global.eval_during_train=False -o Global.save_interval=2 -o Global.print_batch_step=1 pact_train:null fpgm_train:null @@ -34,7 +34,7 @@ train_model_name:latest train_infer_img_dir:./dataset/ILSVRC2012/val null:null ## -trainer:to_static_train +trainer:amp_train amp_train:tools/train.py -c ppcls/configs/ImageNet/MobileNetV2/MobileNetV2.yaml -o Global.seed=1234 -o DataLoader.Train.sampler.shuffle=False -o DataLoader.Train.loader.num_workers=0 -o DataLoader.Train.loader.use_shared_memory=False -o AMP.scale_loss=128 -o AMP.use_dynamic_loss_scaling=True -o AMP.level=O2 -o Global.eval_during_train=False -o Global.save_interval=2 -o Global.print_batch_step=1 pact_train:null fpgm_train:null diff --git a/test_tipc/configs/MobileNetV3/MobileNetV3_large_x1_0_train_dy2static_python.txt b/test_tipc/configs/MobileNetV3/MobileNetV3_large_x1_0_train_dy2static_python.txt index 1dba90c0..f26af7a4 100644 --- a/test_tipc/configs/MobileNetV3/MobileNetV3_large_x1_0_train_dy2static_python.txt +++ b/test_tipc/configs/MobileNetV3/MobileNetV3_large_x1_0_train_dy2static_python.txt @@ -12,7 +12,7 @@ train_model_name:latest train_infer_img_dir:./dataset/ILSVRC2012/val null:null ## -trainer:to_static_train +trainer:norm_train norm_train:tools/train.py -c ppcls/configs/ImageNet/MobileNetV3/MobileNetV3_large_x1_0.yaml -o Global.seed=1234 -o DataLoader.Train.sampler.shuffle=False -o DataLoader.Train.loader.num_workers=0 -o DataLoader.Train.loader.use_shared_memory=False -o Global.eval_during_train=False -o Global.save_interval=2 -o Global.print_batch_step=1 -o Global.print_batch_step=1 -o Global.print_batch_step=1 -o Global.print_batch_step=1 -o Global.print_batch_step=1 -o Global.print_batch_step=1 -o Global.print_batch_step=1 -o Global.print_batch_step=1 pact_train:null fpgm_train:null diff --git a/test_tipc/configs/MobileViT/MobileViT_S_train_dy2static_python.txt b/test_tipc/configs/MobileViT/MobileViT_S_train_dy2static_python.txt new file mode 100644 index 00000000..3d2ebdc8 --- /dev/null +++ b/test_tipc/configs/MobileViT/MobileViT_S_train_dy2static_python.txt @@ -0,0 +1,23 @@ +=========================== base_train =========================== +model_name:MobileViT_S +python:python3.7 +gpu_list:0|0,1 +-o Global.device:gpu +-o Global.auto_cast:null +-o Global.epochs:lite_train_lite_infer=2|whole_train_whole_infer=120 +-o Global.output_dir:./output/ +-o DataLoader.Train.sampler.batch_size:8 +-o Global.pretrained_model:null +train_model_name:latest +train_infer_img_dir:./dataset/ILSVRC2012/val +null:null +## +trainer:norm_train +norm_train:tools/train.py -c ppcls/configs/ImageNet/MobileViT/MobileViT_S.yaml -o Global.seed=1234 -o DataLoader.Train.sampler.shuffle=False -o DataLoader.Train.loader.num_workers=0 -o DataLoader.Train.loader.use_shared_memory=False -o Global.print_batch_step=1 -o Global.eval_during_train=False -o Global.save_interval=2 +pact_train:null +fpgm_train:null +distill_train:null +to_static_train:-o Global.to_static=True +null:null +## + diff --git a/test_tipc/configs/PPHGNet/PPHGNet_small_train_dy2static_python.txt b/test_tipc/configs/PPHGNet/PPHGNet_small_train_dy2static_python.txt new file mode 100644 index 00000000..759f01fd --- /dev/null +++ b/test_tipc/configs/PPHGNet/PPHGNet_small_train_dy2static_python.txt @@ -0,0 +1,44 @@ +=========================== base_train =========================== +model_name:PPHGNet_small +python:python3.7 +gpu_list:0|0,1 +-o Global.device:cpu +-o Global.auto_cast:null +-o Global.epochs:lite_train_lite_infer=2|whole_train_whole_infer=120 +-o Global.output_dir:./output/ +-o DataLoader.Train.sampler.batch_size:8 +-o Global.pretrained_model:null +train_model_name:latest +train_infer_img_dir:./dataset/ILSVRC2012/val +null:null +## +trainer:norm_train +norm_train:tools/train.py -c ppcls/configs/ImageNet/PPHGNet/PPHGNet_small.yaml -o Global.seed=1234 -o DataLoader.Train.sampler.shuffle=False -o DataLoader.Train.loader.num_workers=0 -o DataLoader.Train.loader.use_shared_memory=False -o Global.eval_during_train=False -o Global.save_interval=2 -o AMP=None +pact_train:null +fpgm_train:null +distill_train:null +to_static_train:-o Global.to_static=True +null:null +## +=========================== amp_train =========================== +model_name:PPHGNet_small +python:python3.7 +gpu_list:0|0,1 +-o Global.device:gpu +-o Global.auto_cast:null +-o Global.epochs:lite_train_lite_infer=2|whole_train_whole_infer=120 +-o Global.output_dir:./output/ +-o DataLoader.Train.sampler.batch_size:8 +-o Global.pretrained_model:null +train_model_name:latest +train_infer_img_dir:./dataset/ILSVRC2012/val +null:null +## +trainer:amp_train +amp_train:tools/train.py -c ppcls/configs/ImageNet/PPHGNet/PPHGNet_small.yaml -o Global.seed=1234 -o DataLoader.Train.sampler.shuffle=False -o DataLoader.Train.loader.num_workers=0 -o DataLoader.Train.loader.use_shared_memory=False -o AMP.scale_loss=65536 -o AMP.use_dynamic_loss_scaling=True -o AMP.level=O2 -o Optimizer.multi_precision=True -o Global.eval_during_train=False +pact_train:null +fpgm_train:null +distill_train:null +to_static_train:-o Global.to_static=True +null:null +## diff --git a/test_tipc/configs/ResNet/ResNet152_train_dy2static_python.txt b/test_tipc/configs/ResNet/ResNet152_train_dy2static_python.txt new file mode 100644 index 00000000..0bd202b6 --- /dev/null +++ b/test_tipc/configs/ResNet/ResNet152_train_dy2static_python.txt @@ -0,0 +1,22 @@ +=========================== amp_train =========================== +model_name:ResNet152 +python:python3.7 +gpu_list:0|0,1 +-o Global.device:gpu +-o Global.auto_cast:null +-o Global.epochs:lite_train_lite_infer=2|whole_train_whole_infer=120 +-o Global.output_dir:./output/ +-o DataLoader.Train.sampler.batch_size:8 +-o Global.pretrained_model:null +train_model_name:latest +train_infer_img_dir:./dataset/ILSVRC2012/val +null:null +## +trainer:amp_train +amp_train:tools/train.py -c ppcls/configs/ImageNet/ResNet/ResNet152.yaml -o Global.seed=1234 -o DataLoader.Train.sampler.shuffle=False -o DataLoader.Train.loader.num_workers=0 -o DataLoader.Train.loader.use_shared_memory=False -o AMP.scale_loss=128 -o AMP.use_dynamic_loss_scaling=True -o AMP.level=O2 -o Global.eval_during_train=False -o Global.save_interval=2 +pact_train:null +fpgm_train:null +distill_train:null +to_static_train:-o Global.to_static=True +null:null +## diff --git a/test_tipc/configs/ResNet/ResNet50_train_dy2static_python.txt b/test_tipc/configs/ResNet/ResNet50_train_dy2static_python.txt new file mode 100644 index 00000000..a2d10983 --- /dev/null +++ b/test_tipc/configs/ResNet/ResNet50_train_dy2static_python.txt @@ -0,0 +1,44 @@ +=========================== base_train =========================== +model_name:ResNet50 +python:python3.7 +gpu_list:0 +-o Global.device:gpu +-o Global.auto_cast:null +-o Global.epochs:lite_train_lite_infer=2|whole_train_whole_infer=120 +-o Global.output_dir:./output/ +-o DataLoader.Train.sampler.batch_size:8 +-o Global.pretrained_model:null +train_model_name:latest +train_infer_img_dir:./dataset/ILSVRC2012/val +null:null +## +trainer:norm_train +norm_train:tools/train.py -c ppcls/configs/ImageNet/ResNet/ResNet50.yaml -o Global.seed=1234 -o DataLoader.Train.sampler.shuffle=False -o DataLoader.Train.loader.num_workers=0 -o DataLoader.Train.loader.use_shared_memory=False -o Global.eval_during_train=False -o Global.save_interval=2 -o Global.print_batch_step=1 +pact_train:null +fpgm_train:null +distill_train:null +to_static_train:-o Global.to_static=True +null:null +## +=========================== amp_train =========================== +model_name:ResNet50 +python:python3.7 +gpu_list:0 +-o Global.device:cpu +-o Global.auto_cast:null +-o Global.epochs:lite_train_lite_infer=2|whole_train_whole_infer=120 +-o Global.output_dir:./output/ +-o DataLoader.Train.sampler.batch_size:8 +-o Global.pretrained_model:null +train_model_name:latest +train_infer_img_dir:./dataset/ILSVRC2012/val +null:null +## +trainer:amp_train +amp_train:tools/train.py -c ppcls/configs/ImageNet/ResNet/ResNet50_amp_O1.yaml -o Global.seed=1234 -o DataLoader.Train.sampler.shuffle=False -o DataLoader.Train.loader.num_workers=0 -o DataLoader.Train.loader.use_shared_memory=False -o AMP.scale_loss=128 -o AMP.use_dynamic_loss_scaling=True -o AMP.level=O1 -o Global.print_batch_step=1 -o Global.use_dali=False -o Global.eval_during_train=False +pact_train:null +fpgm_train:null +distill_train:null +to_static_train:-o Global.to_static=True +null:null +## diff --git a/test_tipc/configs/ShuffleNet/ShuffleNetV2_x1_0_train_dy2static_python.txt b/test_tipc/configs/ShuffleNet/ShuffleNetV2_x1_0_train_dy2static_python.txt new file mode 100644 index 00000000..9b770b82 --- /dev/null +++ b/test_tipc/configs/ShuffleNet/ShuffleNetV2_x1_0_train_dy2static_python.txt @@ -0,0 +1,45 @@ +=========================== base_train =========================== +model_name:ShuffleNetV2_x1_0 +python:python3.7 +gpu_list:0|0,1 +-o Global.device:gpu +-o Global.auto_cast:null +-o Global.epochs:lite_train_lite_infer=2|whole_train_whole_infer=120 +-o Global.output_dir:./output/ +-o DataLoader.Train.sampler.batch_size:8 +-o Global.pretrained_model:null +train_model_name:latest +train_infer_img_dir:./dataset/ILSVRC2012/val +null:null +## +trainer:norm_train +norm_train:tools/train.py -c ppcls/configs/ImageNet/ShuffleNet/ShuffleNetV2_x1_0.yaml -o Global.seed=1234 -o DataLoader.Train.sampler.shuffle=False -o DataLoader.Train.loader.num_workers=0 -o DataLoader.Train.loader.use_shared_memory=False -o Global.eval_during_train=False -o Global.save_interval=2 +pact_train:null +fpgm_train:null +distill_train:null +to_static_train:-o Global.to_static=True +null:null +## +=========================== amp_train =========================== +model_name:ShuffleNetV2_x1_0 +python:python3.7 +gpu_list:0|0,1 +-o Global.device:gpu +-o Global.auto_cast:null +-o Global.epochs:lite_train_lite_infer=2|whole_train_whole_infer=120 +-o Global.output_dir:./output/ +-o DataLoader.Train.sampler.batch_size:8 +-o Global.pretrained_model:null +train_model_name:latest +train_infer_img_dir:./dataset/ILSVRC2012/val +null:null +## +trainer:amp_train +amp_train:tools/train.py -c ppcls/configs/ImageNet/ShuffleNet/ShuffleNetV2_x1_0.yaml -o Global.seed=1234 -o DataLoader.Train.sampler.shuffle=False -o DataLoader.Train.loader.num_workers=0 -o DataLoader.Train.loader.use_shared_memory=False -o AMP.scale_loss=128 -o AMP.use_dynamic_loss_scaling=True -o AMP.level=O2 -o Global.eval_during_train=False -o Global.save_interval=2 +pact_train:null +fpgm_train:null +distill_train:null +null:null +null:null +## + diff --git a/test_tipc/test_train_dy2static_python.sh b/test_tipc/test_train_dy2static_python.sh index 864bc768..e7c35f66 100644 --- a/test_tipc/test_train_dy2static_python.sh +++ b/test_tipc/test_train_dy2static_python.sh @@ -32,14 +32,11 @@ do echo ${config_lines} > $FILENAME sed -i 's/gpu_list.*$/gpu_list:0/g' $FILENAME sed -i '16s/$/ -o Global.print_batch_step=1/' ${FILENAME} - IFS=$'\n' - # start dygraph train dygraph_output=$LOG_PATH/${config_name}_python_train_infer_dygraph_output.txt dygraph_loss=$LOG_PATH/${config_name}_dygraph_loss.txt - sed -i '15ctrainer:norm_train' ${FILENAME} cmd="bash test_tipc/test_train_inference_python.sh ${FILENAME} $MODE >$dygraph_output 2>&1" echo $cmd eval $cmd @@ -63,10 +60,10 @@ do last_status=$? cat $diff_log if [ "$dyout" = "" ]; then + status_check 1 $diff_cmd $status_log $model_name $diff_log + elif [ "$stout" = "" ]; then status_check 2 $diff_cmd $status_log $model_name $diff_log + else + status_check $last_status $diff_cmd $status_log $model_name $diff_log fi - if [ "$stout" = "" ]; then - status_check 2 $diff_cmd $status_log $model_name $diff_log - fi - status_check $last_status $diff_cmd $status_log $model_name $diff_log done -- GitLab