From 0e5cbd2b6aaf1b44258e36bf7088f29b2d2ddd6e Mon Sep 17 00:00:00 2001 From: xiongkun Date: Mon, 21 Nov 2022 16:58:39 +0000 Subject: [PATCH] [dy2static-tipc] add txt config for dy2static test --- .../MobileNetV1_train_dy2static_python.txt | 44 +++++++++ .../MobileNetV2_train_dy2static_python.txt | 44 +++++++++ ...etV3_large_x1_0_train_dy2static_python.txt | 44 +++++++++ test_tipc/test_train_dy2static_python.sh | 95 +++++++++++-------- 4 files changed, 186 insertions(+), 41 deletions(-) create mode 100644 test_tipc/configs/MobileNetV1/MobileNetV1_train_dy2static_python.txt create mode 100644 test_tipc/configs/MobileNetV2/MobileNetV2_train_dy2static_python.txt create mode 100644 test_tipc/configs/MobileNetV3/MobileNetV3_large_x1_0_train_dy2static_python.txt diff --git a/test_tipc/configs/MobileNetV1/MobileNetV1_train_dy2static_python.txt b/test_tipc/configs/MobileNetV1/MobileNetV1_train_dy2static_python.txt new file mode 100644 index 00000000..62974275 --- /dev/null +++ b/test_tipc/configs/MobileNetV1/MobileNetV1_train_dy2static_python.txt @@ -0,0 +1,44 @@ +=========================== base_train =========================== +model_name:MobileNetV2 +python:python3.7 +gpu_list:0 +-o Global.device:gpu +-o Global.auto_cast:null +-o Global.epochs:lite_train_lite_infer=2|whole_train_whole_infer=120 +-o Global.output_dir:./output/ +-o DataLoader.Train.sampler.batch_size:8 +-o Global.pretrained_model:null +train_model_name:latest +train_infer_img_dir:./dataset/ILSVRC2012/val +null:null +## +trainer:to_static_train +norm_train:tools/train.py -c ppcls/configs/ImageNet/MobileNetV2/MobileNetV2.yaml -o Global.seed=1234 -o DataLoader.Train.sampler.shuffle=False -o DataLoader.Train.loader.num_workers=0 -o DataLoader.Train.loader.use_shared_memory=False -o Global.eval_during_train=False -o Global.save_interval=2 -o Global.print_batch_step=1 +pact_train:null +fpgm_train:null +distill_train:null +to_static_train:-o Global.to_static=True +null:null +## +=========================== amp_train =========================== +model_name:MobileNetV1 +python:python3.7 +gpu_list:0 +-o Global.device:gpu +-o Global.auto_cast:null +-o Global.epochs:lite_train_lite_infer=2|whole_train_whole_infer=120 +-o Global.output_dir:./output/ +-o DataLoader.Train.sampler.batch_size:8 +-o Global.pretrained_model:null +train_model_name:latest +train_infer_img_dir:./dataset/ILSVRC2012/val +null:null +## +trainer:to_static_train +amp_train:tools/train.py -c ppcls/configs/ImageNet/MobileNetV1/MobileNetV1.yaml -o Global.seed=1234 -o DataLoader.Train.sampler.shuffle=False -o DataLoader.Train.loader.num_workers=0 -o DataLoader.Train.loader.use_shared_memory=False -o AMP.scale_loss=128 -o AMP.use_dynamic_loss_scaling=True -o AMP.level=O2 -o Global.eval_during_train=False -o Global.save_interval=2 -o Global.print_batch_step=1 +pact_train:null +fpgm_train:null +distill_train:null +to_static_train:-o Global.to_static=True +null:null +## diff --git a/test_tipc/configs/MobileNetV2/MobileNetV2_train_dy2static_python.txt b/test_tipc/configs/MobileNetV2/MobileNetV2_train_dy2static_python.txt new file mode 100644 index 00000000..8ea5fd5a --- /dev/null +++ b/test_tipc/configs/MobileNetV2/MobileNetV2_train_dy2static_python.txt @@ -0,0 +1,44 @@ +=========================== base_train =========================== +model_name:MobileNetV2 +python:python3.7 +gpu_list:0 +-o Global.device:gpu +-o Global.auto_cast:null +-o Global.epochs:lite_train_lite_infer=2|whole_train_whole_infer=120 +-o Global.output_dir:./output/ +-o DataLoader.Train.sampler.batch_size:8 +-o Global.pretrained_model:null +train_model_name:latest +train_infer_img_dir:./dataset/ILSVRC2012/val +null:null +## +trainer:to_static_train +norm_train:tools/train.py -c ppcls/configs/ImageNet/MobileNetV2/MobileNetV2.yaml -o Global.seed=1234 -o DataLoader.Train.sampler.shuffle=False -o DataLoader.Train.loader.num_workers=0 -o DataLoader.Train.loader.use_shared_memory=False -o Global.eval_during_train=False -o Global.save_interval=2 -o Global.print_batch_step=1 +pact_train:null +fpgm_train:null +distill_train:null +to_static_train:-o Global.to_static=True +null:null +## +=========================== amp_train =========================== +model_name:MobileNetV2 +python:python3.7 +gpu_list:0 +-o Global.device:gpu +-o Global.auto_cast:null +-o Global.epochs:lite_train_lite_infer=2|whole_train_whole_infer=120 +-o Global.output_dir:./output/ +-o DataLoader.Train.sampler.batch_size:8 +-o Global.pretrained_model:null +train_model_name:latest +train_infer_img_dir:./dataset/ILSVRC2012/val +null:null +## +trainer:to_static_train +amp_train:tools/train.py -c ppcls/configs/ImageNet/MobileNetV2/MobileNetV2.yaml -o Global.seed=1234 -o DataLoader.Train.sampler.shuffle=False -o DataLoader.Train.loader.num_workers=0 -o DataLoader.Train.loader.use_shared_memory=False -o AMP.scale_loss=128 -o AMP.use_dynamic_loss_scaling=True -o AMP.level=O2 -o Global.eval_during_train=False -o Global.save_interval=2 -o Global.print_batch_step=1 +pact_train:null +fpgm_train:null +distill_train:null +to_static_train:-o Global.to_static=True +null:null +## diff --git a/test_tipc/configs/MobileNetV3/MobileNetV3_large_x1_0_train_dy2static_python.txt b/test_tipc/configs/MobileNetV3/MobileNetV3_large_x1_0_train_dy2static_python.txt new file mode 100644 index 00000000..1dba90c0 --- /dev/null +++ b/test_tipc/configs/MobileNetV3/MobileNetV3_large_x1_0_train_dy2static_python.txt @@ -0,0 +1,44 @@ +=========================== base_train =========================== +model_name:MobileNetV3_large_x1_0 +python:python3.7 +gpu_list:0 +-o Global.device:cpu +-o Global.auto_cast:null +-o Global.epochs:lite_train_lite_infer=2|whole_train_whole_infer=120 +-o Global.output_dir:./output/ +-o DataLoader.Train.sampler.batch_size:8 +-o Global.pretrained_model:null +train_model_name:latest +train_infer_img_dir:./dataset/ILSVRC2012/val +null:null +## +trainer:to_static_train +norm_train:tools/train.py -c ppcls/configs/ImageNet/MobileNetV3/MobileNetV3_large_x1_0.yaml -o Global.seed=1234 -o DataLoader.Train.sampler.shuffle=False -o DataLoader.Train.loader.num_workers=0 -o DataLoader.Train.loader.use_shared_memory=False -o Global.eval_during_train=False -o Global.save_interval=2 -o Global.print_batch_step=1 -o Global.print_batch_step=1 -o Global.print_batch_step=1 -o Global.print_batch_step=1 -o Global.print_batch_step=1 -o Global.print_batch_step=1 -o Global.print_batch_step=1 -o Global.print_batch_step=1 +pact_train:null +fpgm_train:null +distill_train:null +to_static_train:-o Global.to_static=True +null:null +## +=========================== amp_train =========================== +model_name:MobileNetV3_large_x1_0 +python:python3.7 +gpu_list:0|0,1 +-o Global.device:gpu +-o Global.auto_cast:null +-o Global.epochs:lite_train_lite_infer=2|whole_train_whole_infer=120 +-o Global.output_dir:./output/ +-o DataLoader.Train.sampler.batch_size:8 +-o Global.pretrained_model:null +train_model_name:latest +train_infer_img_dir:./dataset/ILSVRC2012/val +null:null +## +trainer:amp_train +amp_train:tools/train.py -c ppcls/configs/ImageNet/MobileNetV3/MobileNetV3_large_x1_0.yaml -o Global.seed=1234 -o DataLoader.Train.sampler.shuffle=False -o DataLoader.Train.loader.num_workers=0 -o DataLoader.Train.loader.use_shared_memory=False -o AMP.scale_loss=128 -o AMP.use_dynamic_loss_scaling=True -o AMP.level=O2 -o Global.eval_during_train=False -o Global.save_interval=2 +pact_train:null +fpgm_train:null +distill_train:null +null:null +null:null +## diff --git a/test_tipc/test_train_dy2static_python.sh b/test_tipc/test_train_dy2static_python.sh index 7769bb8a..864bc768 100644 --- a/test_tipc/test_train_dy2static_python.sh +++ b/test_tipc/test_train_dy2static_python.sh @@ -1,20 +1,14 @@ #!/bin/bash source test_tipc/common_func.sh +IFS=$'\n' +BASE_CONFIG_FILE=$1 # always use the lite_train_lite_infer mode to speed. Modify the config file. MODE=lite_train_lite_infer BASEDIR=$(dirname "$0") -FILENAME=$1 -sed -i 's/gpu_list.*$/gpu_list:0/g' $FILENAME -sed -i '23,$d' $FILENAME -#sed -i 's/-o Global.device:.*$/-o Global.device:cpu/g' $FILENAME -sed -i '16s/$/ -o Global.print_batch_step=1/' ${FILENAME} - - # get the log path. -IFS=$'\n' -dataline=$(cat ${FILENAME}) +dataline=$(cat ${BASE_CONFIG_FILE}) lines=(${dataline}) model_name=$(func_parser_value "${lines[1]}") LOG_PATH="./test_tipc/output/${model_name}/${MODE}" @@ -25,35 +19,54 @@ status_log="${LOG_PATH}/results_python.log" # make cudnn algorithm deterministic, such as conv. export FLAGS_cudnn_deterministic=True -# start dygraph train -dygraph_output=$LOG_PATH/python_train_infer_dygraph_output.txt -dygraph_loss=$LOG_PATH/dygraph_loss.txt -sed -i '15ctrainer:norm_train' ${FILENAME} -cmd="bash test_tipc/test_train_inference_python.sh ${FILENAME} $MODE >$dygraph_output 2>&1" -echo $cmd -eval $cmd - -# start dy2static train -dy2static_output=$LOG_PATH/python_train_infer_dy2static_output.txt -dy2static_loss=$LOG_PATH/dy2static_loss.txt -sed -i '15ctrainer:to_static_train' ${FILENAME} -cmd="bash test_tipc/test_train_inference_python.sh ${FILENAME} $MODE >$dy2static_output 2>&1" -echo $cmd -eval $cmd - -# analysis and compare the losses. -dyout=`cat $dy2static_output | python test_tipc/extract_loss.py -v 'Iter:' -e 'loss: {%f},'` -stout=`cat $dygraph_output | python test_tipc/extract_loss.py -v 'Iter:' -e 'loss: {%f},' ` -echo $dyout > $dygraph_loss -echo $stout > $dy2static_loss -diff_log=$LOG_PATH/diff_log.txt -diff_cmd="diff -w $dygraph_loss $dy2static_loss | tee $diff_log" -eval $diff_cmd -last_status=$? -if [ "$dyout" = "" ]; then - status_check 2 $diff_cmd $status_log $model_name $diff_log -fi -if [ "$stout" = "" ]; then - status_check 2 $diff_cmd $status_log $model_name $diff_log -fi -status_check $last_status $diff_cmd $status_log $model_name $diff_log +# read the base config and parse and run the sub commands +config_line_numbers=`cat ${BASE_CONFIG_FILE} | grep -n "============" | cut -d':' -f1` +for cln in $config_line_numbers +do + # change IFS to prevent \n is parsed as delimiter. + IFS="" + config_lines=$(cat ${BASE_CONFIG_FILE} | sed -n "${cln},\$p" | head -n 22) + config_name=`echo ${config_lines} | grep '=====' | cut -d' ' -f2` + FILENAME=$LOG_PATH/dy2static_$config_name.txt + echo "[Start dy2static]" "${config_name} : ${FILENAME}" + echo ${config_lines} > $FILENAME + sed -i 's/gpu_list.*$/gpu_list:0/g' $FILENAME + sed -i '16s/$/ -o Global.print_batch_step=1/' ${FILENAME} + + IFS=$'\n' + + + # start dygraph train + dygraph_output=$LOG_PATH/${config_name}_python_train_infer_dygraph_output.txt + dygraph_loss=$LOG_PATH/${config_name}_dygraph_loss.txt + sed -i '15ctrainer:norm_train' ${FILENAME} + cmd="bash test_tipc/test_train_inference_python.sh ${FILENAME} $MODE >$dygraph_output 2>&1" + echo $cmd + eval $cmd + + # start dy2static train + dy2static_output=$LOG_PATH/${config_name}_python_train_infer_dy2static_output.txt + dy2static_loss=$LOG_PATH/${config_name}_dy2static_loss.txt + sed -i '15ctrainer:to_static_train' ${FILENAME} + cmd="bash test_tipc/test_train_inference_python.sh ${FILENAME} $MODE >$dy2static_output 2>&1" + echo $cmd + eval $cmd + + # analysis and compare the losses. + dyout=`cat $dy2static_output | python test_tipc/extract_loss.py -v 'Iter:' -e 'loss: {%f},'` + stout=`cat $dygraph_output | python test_tipc/extract_loss.py -v 'Iter:' -e 'loss: {%f},' ` + echo $dyout > $dygraph_loss + echo $stout > $dy2static_loss + diff_log=$LOG_PATH/${config_name}_diff_log.txt + diff_cmd="diff -w $dygraph_loss $dy2static_loss > $diff_log" + eval $diff_cmd + last_status=$? + cat $diff_log + if [ "$dyout" = "" ]; then + status_check 2 $diff_cmd $status_log $model_name $diff_log + fi + if [ "$stout" = "" ]; then + status_check 2 $diff_cmd $status_log $model_name $diff_log + fi + status_check $last_status $diff_cmd $status_log $model_name $diff_log +done -- GitLab