提交 0e5cbd2b 编写于 作者: X xiongkun 提交者: Wei Shengyu

[dy2static-tipc] add txt config for dy2static test

上级 f04eb47f
=========================== base_train ===========================
model_name:MobileNetV2
python:python3.7
gpu_list:0
-o Global.device:gpu
-o Global.auto_cast:null
-o Global.epochs:lite_train_lite_infer=2|whole_train_whole_infer=120
-o Global.output_dir:./output/
-o DataLoader.Train.sampler.batch_size:8
-o Global.pretrained_model:null
train_model_name:latest
train_infer_img_dir:./dataset/ILSVRC2012/val
null:null
##
trainer:to_static_train
norm_train:tools/train.py -c ppcls/configs/ImageNet/MobileNetV2/MobileNetV2.yaml -o Global.seed=1234 -o DataLoader.Train.sampler.shuffle=False -o DataLoader.Train.loader.num_workers=0 -o DataLoader.Train.loader.use_shared_memory=False -o Global.eval_during_train=False -o Global.save_interval=2 -o Global.print_batch_step=1
pact_train:null
fpgm_train:null
distill_train:null
to_static_train:-o Global.to_static=True
null:null
##
=========================== amp_train ===========================
model_name:MobileNetV1
python:python3.7
gpu_list:0
-o Global.device:gpu
-o Global.auto_cast:null
-o Global.epochs:lite_train_lite_infer=2|whole_train_whole_infer=120
-o Global.output_dir:./output/
-o DataLoader.Train.sampler.batch_size:8
-o Global.pretrained_model:null
train_model_name:latest
train_infer_img_dir:./dataset/ILSVRC2012/val
null:null
##
trainer:to_static_train
amp_train:tools/train.py -c ppcls/configs/ImageNet/MobileNetV1/MobileNetV1.yaml -o Global.seed=1234 -o DataLoader.Train.sampler.shuffle=False -o DataLoader.Train.loader.num_workers=0 -o DataLoader.Train.loader.use_shared_memory=False -o AMP.scale_loss=128 -o AMP.use_dynamic_loss_scaling=True -o AMP.level=O2 -o Global.eval_during_train=False -o Global.save_interval=2 -o Global.print_batch_step=1
pact_train:null
fpgm_train:null
distill_train:null
to_static_train:-o Global.to_static=True
null:null
##
=========================== base_train ===========================
model_name:MobileNetV2
python:python3.7
gpu_list:0
-o Global.device:gpu
-o Global.auto_cast:null
-o Global.epochs:lite_train_lite_infer=2|whole_train_whole_infer=120
-o Global.output_dir:./output/
-o DataLoader.Train.sampler.batch_size:8
-o Global.pretrained_model:null
train_model_name:latest
train_infer_img_dir:./dataset/ILSVRC2012/val
null:null
##
trainer:to_static_train
norm_train:tools/train.py -c ppcls/configs/ImageNet/MobileNetV2/MobileNetV2.yaml -o Global.seed=1234 -o DataLoader.Train.sampler.shuffle=False -o DataLoader.Train.loader.num_workers=0 -o DataLoader.Train.loader.use_shared_memory=False -o Global.eval_during_train=False -o Global.save_interval=2 -o Global.print_batch_step=1
pact_train:null
fpgm_train:null
distill_train:null
to_static_train:-o Global.to_static=True
null:null
##
=========================== amp_train ===========================
model_name:MobileNetV2
python:python3.7
gpu_list:0
-o Global.device:gpu
-o Global.auto_cast:null
-o Global.epochs:lite_train_lite_infer=2|whole_train_whole_infer=120
-o Global.output_dir:./output/
-o DataLoader.Train.sampler.batch_size:8
-o Global.pretrained_model:null
train_model_name:latest
train_infer_img_dir:./dataset/ILSVRC2012/val
null:null
##
trainer:to_static_train
amp_train:tools/train.py -c ppcls/configs/ImageNet/MobileNetV2/MobileNetV2.yaml -o Global.seed=1234 -o DataLoader.Train.sampler.shuffle=False -o DataLoader.Train.loader.num_workers=0 -o DataLoader.Train.loader.use_shared_memory=False -o AMP.scale_loss=128 -o AMP.use_dynamic_loss_scaling=True -o AMP.level=O2 -o Global.eval_during_train=False -o Global.save_interval=2 -o Global.print_batch_step=1
pact_train:null
fpgm_train:null
distill_train:null
to_static_train:-o Global.to_static=True
null:null
##
=========================== base_train ===========================
model_name:MobileNetV3_large_x1_0
python:python3.7
gpu_list:0
-o Global.device:cpu
-o Global.auto_cast:null
-o Global.epochs:lite_train_lite_infer=2|whole_train_whole_infer=120
-o Global.output_dir:./output/
-o DataLoader.Train.sampler.batch_size:8
-o Global.pretrained_model:null
train_model_name:latest
train_infer_img_dir:./dataset/ILSVRC2012/val
null:null
##
trainer:to_static_train
norm_train:tools/train.py -c ppcls/configs/ImageNet/MobileNetV3/MobileNetV3_large_x1_0.yaml -o Global.seed=1234 -o DataLoader.Train.sampler.shuffle=False -o DataLoader.Train.loader.num_workers=0 -o DataLoader.Train.loader.use_shared_memory=False -o Global.eval_during_train=False -o Global.save_interval=2 -o Global.print_batch_step=1 -o Global.print_batch_step=1 -o Global.print_batch_step=1 -o Global.print_batch_step=1 -o Global.print_batch_step=1 -o Global.print_batch_step=1 -o Global.print_batch_step=1 -o Global.print_batch_step=1
pact_train:null
fpgm_train:null
distill_train:null
to_static_train:-o Global.to_static=True
null:null
##
=========================== amp_train ===========================
model_name:MobileNetV3_large_x1_0
python:python3.7
gpu_list:0|0,1
-o Global.device:gpu
-o Global.auto_cast:null
-o Global.epochs:lite_train_lite_infer=2|whole_train_whole_infer=120
-o Global.output_dir:./output/
-o DataLoader.Train.sampler.batch_size:8
-o Global.pretrained_model:null
train_model_name:latest
train_infer_img_dir:./dataset/ILSVRC2012/val
null:null
##
trainer:amp_train
amp_train:tools/train.py -c ppcls/configs/ImageNet/MobileNetV3/MobileNetV3_large_x1_0.yaml -o Global.seed=1234 -o DataLoader.Train.sampler.shuffle=False -o DataLoader.Train.loader.num_workers=0 -o DataLoader.Train.loader.use_shared_memory=False -o AMP.scale_loss=128 -o AMP.use_dynamic_loss_scaling=True -o AMP.level=O2 -o Global.eval_during_train=False -o Global.save_interval=2
pact_train:null
fpgm_train:null
distill_train:null
null:null
null:null
##
#!/bin/bash
source test_tipc/common_func.sh
IFS=$'\n'
BASE_CONFIG_FILE=$1
# always use the lite_train_lite_infer mode to speed. Modify the config file.
MODE=lite_train_lite_infer
BASEDIR=$(dirname "$0")
FILENAME=$1
sed -i 's/gpu_list.*$/gpu_list:0/g' $FILENAME
sed -i '23,$d' $FILENAME
#sed -i 's/-o Global.device:.*$/-o Global.device:cpu/g' $FILENAME
sed -i '16s/$/ -o Global.print_batch_step=1/' ${FILENAME}
# get the log path.
IFS=$'\n'
dataline=$(cat ${FILENAME})
dataline=$(cat ${BASE_CONFIG_FILE})
lines=(${dataline})
model_name=$(func_parser_value "${lines[1]}")
LOG_PATH="./test_tipc/output/${model_name}/${MODE}"
......@@ -25,35 +19,54 @@ status_log="${LOG_PATH}/results_python.log"
# make cudnn algorithm deterministic, such as conv.
export FLAGS_cudnn_deterministic=True
# start dygraph train
dygraph_output=$LOG_PATH/python_train_infer_dygraph_output.txt
dygraph_loss=$LOG_PATH/dygraph_loss.txt
sed -i '15ctrainer:norm_train' ${FILENAME}
cmd="bash test_tipc/test_train_inference_python.sh ${FILENAME} $MODE >$dygraph_output 2>&1"
echo $cmd
eval $cmd
# start dy2static train
dy2static_output=$LOG_PATH/python_train_infer_dy2static_output.txt
dy2static_loss=$LOG_PATH/dy2static_loss.txt
sed -i '15ctrainer:to_static_train' ${FILENAME}
cmd="bash test_tipc/test_train_inference_python.sh ${FILENAME} $MODE >$dy2static_output 2>&1"
echo $cmd
eval $cmd
# analysis and compare the losses.
dyout=`cat $dy2static_output | python test_tipc/extract_loss.py -v 'Iter:' -e 'loss: {%f},'`
stout=`cat $dygraph_output | python test_tipc/extract_loss.py -v 'Iter:' -e 'loss: {%f},' `
echo $dyout > $dygraph_loss
echo $stout > $dy2static_loss
diff_log=$LOG_PATH/diff_log.txt
diff_cmd="diff -w $dygraph_loss $dy2static_loss | tee $diff_log"
eval $diff_cmd
last_status=$?
if [ "$dyout" = "" ]; then
status_check 2 $diff_cmd $status_log $model_name $diff_log
fi
if [ "$stout" = "" ]; then
status_check 2 $diff_cmd $status_log $model_name $diff_log
fi
status_check $last_status $diff_cmd $status_log $model_name $diff_log
# read the base config and parse and run the sub commands
config_line_numbers=`cat ${BASE_CONFIG_FILE} | grep -n "============" | cut -d':' -f1`
for cln in $config_line_numbers
do
# change IFS to prevent \n is parsed as delimiter.
IFS=""
config_lines=$(cat ${BASE_CONFIG_FILE} | sed -n "${cln},\$p" | head -n 22)
config_name=`echo ${config_lines} | grep '=====' | cut -d' ' -f2`
FILENAME=$LOG_PATH/dy2static_$config_name.txt
echo "[Start dy2static]" "${config_name} : ${FILENAME}"
echo ${config_lines} > $FILENAME
sed -i 's/gpu_list.*$/gpu_list:0/g' $FILENAME
sed -i '16s/$/ -o Global.print_batch_step=1/' ${FILENAME}
IFS=$'\n'
# start dygraph train
dygraph_output=$LOG_PATH/${config_name}_python_train_infer_dygraph_output.txt
dygraph_loss=$LOG_PATH/${config_name}_dygraph_loss.txt
sed -i '15ctrainer:norm_train' ${FILENAME}
cmd="bash test_tipc/test_train_inference_python.sh ${FILENAME} $MODE >$dygraph_output 2>&1"
echo $cmd
eval $cmd
# start dy2static train
dy2static_output=$LOG_PATH/${config_name}_python_train_infer_dy2static_output.txt
dy2static_loss=$LOG_PATH/${config_name}_dy2static_loss.txt
sed -i '15ctrainer:to_static_train' ${FILENAME}
cmd="bash test_tipc/test_train_inference_python.sh ${FILENAME} $MODE >$dy2static_output 2>&1"
echo $cmd
eval $cmd
# analysis and compare the losses.
dyout=`cat $dy2static_output | python test_tipc/extract_loss.py -v 'Iter:' -e 'loss: {%f},'`
stout=`cat $dygraph_output | python test_tipc/extract_loss.py -v 'Iter:' -e 'loss: {%f},' `
echo $dyout > $dygraph_loss
echo $stout > $dy2static_loss
diff_log=$LOG_PATH/${config_name}_diff_log.txt
diff_cmd="diff -w $dygraph_loss $dy2static_loss > $diff_log"
eval $diff_cmd
last_status=$?
cat $diff_log
if [ "$dyout" = "" ]; then
status_check 2 $diff_cmd $status_log $model_name $diff_log
fi
if [ "$stout" = "" ]; then
status_check 2 $diff_cmd $status_log $model_name $diff_log
fi
status_check $last_status $diff_cmd $status_log $model_name $diff_log
done
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册