From 494f381f213b3ca863e74048d193e246916b767b Mon Sep 17 00:00:00 2001 From: shangliang Xu Date: Mon, 8 Aug 2022 16:24:26 +0800 Subject: [PATCH] [TIPC] fix random seed in train benchmark (#6603) --- ...gpu_normal_amp_infer_python_linux_gpu_cpu.txt | 2 +- ...gpu_normal_amp_infer_python_linux_gpu_cpu.txt | 2 +- ...gpu_normal_amp_infer_python_linux_gpu_cpu.txt | 2 +- ...gpu_normal_amp_infer_python_linux_gpu_cpu.txt | 2 +- ...gpu_normal_amp_infer_python_linux_gpu_cpu.txt | 2 +- ...gpu_normal_amp_infer_python_linux_gpu_cpu.txt | 2 +- ...gpu_normal_amp_infer_python_linux_gpu_cpu.txt | 2 +- ...gpu_normal_amp_infer_python_linux_gpu_cpu.txt | 2 +- ...gpu_normal_amp_infer_python_linux_gpu_cpu.txt | 2 +- ...gpu_normal_amp_infer_python_linux_gpu_cpu.txt | 2 +- ...gpu_normal_amp_infer_python_linux_gpu_cpu.txt | 2 +- test_tipc/test_train_inference_python.sh | 16 ++++++++++++---- 12 files changed, 23 insertions(+), 15 deletions(-) diff --git a/test_tipc/configs/keypoint/tinypose_128x96_train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt b/test_tipc/configs/keypoint/tinypose_128x96_train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt index b91057703..7be6cfed1 100644 --- a/test_tipc/configs/keypoint/tinypose_128x96_train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt +++ b/test_tipc/configs/keypoint/tinypose_128x96_train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt @@ -10,7 +10,7 @@ TrainReader.batch_size:lite_train_lite_infer=2|lite_train_whole_infer=2|whole_tr pretrain_weights:https://paddledet.bj.bcebos.com/models/keypoint/tinypose_128x96.pdparams trained_model_name:model_final.pdparams train_infer_img_dir:./dataset/coco/test2017/ -amp_level:O2 +null:null ## trainer:norm_train norm_train:tools/train.py -c test_tipc/configs/keypoint/tinypose_128x96.yml -o diff --git a/test_tipc/configs/mask_rcnn/mask_rcnn_r50_fpn_1x_coco_train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt b/test_tipc/configs/mask_rcnn/mask_rcnn_r50_fpn_1x_coco_train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt index af061e055..796fd008c 100644 --- a/test_tipc/configs/mask_rcnn/mask_rcnn_r50_fpn_1x_coco_train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt +++ b/test_tipc/configs/mask_rcnn/mask_rcnn_r50_fpn_1x_coco_train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt @@ -10,7 +10,7 @@ TrainReader.batch_size:lite_train_lite_infer=2|lite_train_whole_infer=2|whole_tr pretrain_weights:https://paddledet.bj.bcebos.com/models/mask_rcnn_r50_fpn_1x_coco.pdparams trained_model_name:model_final.pdparams train_infer_img_dir:./dataset/coco/test2017/ -amp_level:O2 +null:null ## trainer:norm_train norm_train:tools/train.py -c configs/mask_rcnn/mask_rcnn_r50_fpn_1x_coco.yml -o diff --git a/test_tipc/configs/picodet/picodet_lcnet_1_5x_416_coco_train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt b/test_tipc/configs/picodet/picodet_lcnet_1_5x_416_coco_train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt index 28272ad58..19ff2c7b8 100644 --- a/test_tipc/configs/picodet/picodet_lcnet_1_5x_416_coco_train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt +++ b/test_tipc/configs/picodet/picodet_lcnet_1_5x_416_coco_train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt @@ -10,7 +10,7 @@ TrainReader.batch_size:lite_train_lite_infer=2|lite_train_whole_infer=2|whole_tr pretrain_weights:https://paddledet.bj.bcebos.com/models/picodet_lcnet_1_5x_416_coco.pdparams trained_model_name:model_final.pdparams train_infer_img_dir:./dataset/coco/test2017/ -amp_level:O2 +null:null ## trainer:norm_train norm_train:tools/train.py -c configs/picodet/legacy_model/more_config/picodet_lcnet_1_5x_416_coco.yml -o diff --git a/test_tipc/configs/picodet/picodet_s_320_coco_lcnet_train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt b/test_tipc/configs/picodet/picodet_s_320_coco_lcnet_train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt index cac5a70bb..cf61a5ab1 100644 --- a/test_tipc/configs/picodet/picodet_s_320_coco_lcnet_train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt +++ b/test_tipc/configs/picodet/picodet_s_320_coco_lcnet_train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt @@ -10,7 +10,7 @@ TrainReader.batch_size:lite_train_lite_infer=2|lite_train_whole_infer=2|whole_tr pretrain_weights:https://paddledet.bj.bcebos.com/models/picodet_s_320_coco_lcnet.pdparams trained_model_name:model_final.pdparams train_infer_img_dir:./dataset/coco/test2017/ -amp_level:O2 +null:null ## trainer:norm_train norm_train:tools/train.py -c configs/picodet/picodet_s_320_coco_lcnet.yml -o diff --git a/test_tipc/configs/picodet/picodet_s_320_coco_train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt b/test_tipc/configs/picodet/picodet_s_320_coco_train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt index b1a87cdd7..4c3cf24d8 100644 --- a/test_tipc/configs/picodet/picodet_s_320_coco_train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt +++ b/test_tipc/configs/picodet/picodet_s_320_coco_train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt @@ -10,7 +10,7 @@ TrainReader.batch_size:lite_train_lite_infer=2|lite_train_whole_infer=2|whole_tr pretrain_weights:https://paddledet.bj.bcebos.com/models/picodet_s_320_coco.pdparams trained_model_name:model_final.pdparams train_infer_img_dir:./dataset/coco/test2017/ -amp_level:O2 +null:null ## trainer:norm_train norm_train:tools/train.py -c configs/picodet/legacy_model/picodet_s_320_coco.yml -o diff --git a/test_tipc/configs/ppyolo/ppyolo_mbv3_large_coco_train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt b/test_tipc/configs/ppyolo/ppyolo_mbv3_large_coco_train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt index 6ee39190d..e89918d47 100644 --- a/test_tipc/configs/ppyolo/ppyolo_mbv3_large_coco_train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt +++ b/test_tipc/configs/ppyolo/ppyolo_mbv3_large_coco_train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt @@ -10,7 +10,7 @@ TrainReader.batch_size:lite_train_lite_infer=2|lite_train_whole_infer=2|whole_tr pretrain_weights:https://paddledet.bj.bcebos.com/models/ppyolo_mbv3_large_coco.pdparams trained_model_name:model_final.pdparams train_infer_img_dir:./dataset/coco/test2017/ -amp_level:O2 +null:null ## trainer:norm_train norm_train:tools/train.py -c configs/ppyolo/ppyolo_mbv3_large_coco.yml -o diff --git a/test_tipc/configs/ppyolo/ppyolo_r50vd_dcn_1x_coco_train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt b/test_tipc/configs/ppyolo/ppyolo_r50vd_dcn_1x_coco_train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt index 6a4235a0b..2aac9ba9f 100644 --- a/test_tipc/configs/ppyolo/ppyolo_r50vd_dcn_1x_coco_train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt +++ b/test_tipc/configs/ppyolo/ppyolo_r50vd_dcn_1x_coco_train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt @@ -10,7 +10,7 @@ TrainReader.batch_size:lite_train_lite_infer=2|lite_train_whole_infer=2|whole_tr pretrain_weights:https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_1x_coco.pdparams trained_model_name:model_final.pdparams train_infer_img_dir:./dataset/coco/test2017/ -amp_level:O2 +null:null ## trainer:norm_train norm_train:tools/train.py -c configs/ppyolo/ppyolo_r50vd_dcn_1x_coco.yml -o diff --git a/test_tipc/configs/ppyolo/ppyolo_tiny_650e_coco_train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt b/test_tipc/configs/ppyolo/ppyolo_tiny_650e_coco_train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt index cb0659a7b..dcd3252fc 100644 --- a/test_tipc/configs/ppyolo/ppyolo_tiny_650e_coco_train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt +++ b/test_tipc/configs/ppyolo/ppyolo_tiny_650e_coco_train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt @@ -10,7 +10,7 @@ TrainReader.batch_size:lite_train_lite_infer=2|lite_train_whole_infer=2|whole_tr pretrain_weights:https://paddledet.bj.bcebos.com/models/ppyolo_tiny_650e_coco.pdparams trained_model_name:model_final.pdparams train_infer_img_dir:./dataset/coco/test2017/ -amp_level:O2 +null:null ## trainer:norm_train norm_train:tools/train.py -c configs/ppyolo/ppyolo_tiny_650e_coco.yml -o diff --git a/test_tipc/configs/ppyolo/ppyolov2_r50vd_dcn_365e_coco_train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt b/test_tipc/configs/ppyolo/ppyolov2_r50vd_dcn_365e_coco_train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt index 721db09e0..b6423b95a 100644 --- a/test_tipc/configs/ppyolo/ppyolov2_r50vd_dcn_365e_coco_train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt +++ b/test_tipc/configs/ppyolo/ppyolov2_r50vd_dcn_365e_coco_train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt @@ -10,7 +10,7 @@ TrainReader.batch_size:lite_train_lite_infer=2|lite_train_whole_infer=2|whole_tr pretrain_weights:https://paddledet.bj.bcebos.com/models/ppyolov2_r50vd_dcn_365e_coco.pdparams trained_model_name:model_final.pdparams train_infer_img_dir:./dataset/coco/test2017/ -amp_level:O2 +null:null ## trainer:norm_train norm_train:tools/train.py -c configs/ppyolo/ppyolov2_r50vd_dcn_365e_coco.yml -o diff --git a/test_tipc/configs/ppyoloe/ppyoloe_crn_s_300e_coco_train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt b/test_tipc/configs/ppyoloe/ppyoloe_crn_s_300e_coco_train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt index 815a23937..f11713ba9 100644 --- a/test_tipc/configs/ppyoloe/ppyoloe_crn_s_300e_coco_train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt +++ b/test_tipc/configs/ppyoloe/ppyoloe_crn_s_300e_coco_train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt @@ -10,7 +10,7 @@ TrainReader.batch_size:lite_train_lite_infer=2|lite_train_whole_infer=2|whole_tr pretrain_weights:https://paddledet.bj.bcebos.com/models/ppyoloe_crn_s_300e_coco.pdparams trained_model_name:model_final.pdparams train_infer_img_dir:./dataset/coco/test2017/ -amp_level:O2 +null:null ## trainer:norm_train norm_train:tools/train.py -c configs/ppyoloe/ppyoloe_crn_s_300e_coco.yml -o diff --git a/test_tipc/configs/yolov3/yolov3_darknet53_270e_coco_train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt b/test_tipc/configs/yolov3/yolov3_darknet53_270e_coco_train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt index b3142b6ea..7987def92 100644 --- a/test_tipc/configs/yolov3/yolov3_darknet53_270e_coco_train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt +++ b/test_tipc/configs/yolov3/yolov3_darknet53_270e_coco_train_linux_gpu_normal_amp_infer_python_linux_gpu_cpu.txt @@ -10,7 +10,7 @@ TrainReader.batch_size:lite_train_lite_infer=2|lite_train_whole_infer=2|whole_tr pretrain_weights:https://paddledet.bj.bcebos.com/models/yolov3_darknet53_270e_coco.pdparams trained_model_name:model_final.pdparams train_infer_img_dir:./dataset/coco/test2017/ -amp_level:O2 +null:null ## trainer:norm_train norm_train:tools/train.py -c configs/yolov3/yolov3_darknet53_270e_coco.yml -o diff --git a/test_tipc/test_train_inference_python.sh b/test_tipc/test_train_inference_python.sh index d92ef2c3e..6da0b030f 100644 --- a/test_tipc/test_train_inference_python.sh +++ b/test_tipc/test_train_inference_python.sh @@ -271,17 +271,25 @@ else save_log="${LOG_PATH}/${trainer}_gpus_${gpu}_autocast_${autocast}" if [ ${autocast} = "amp" ] || [ ${autocast} = "fp16" ]; then set_autocast="--amp" - set_train_params1="amp_level=O2" + set_amp_level="amp_level=O2" else set_autocast=" " + set_amp_level=" " + fi + if [ ${MODE} = "benchmark_train" ]; then + set_shuffle="TrainReader.shuffle=False" + set_enable_ce="--enable_ce=True" + else + set_shuffle=" " + set_enable_ce=" " fi set_save_model=$(func_set_params "${save_model_key}" "${save_log}") nodes="1" if [ ${#gpu} -le 2 ];then # train with cpu or single gpu - cmd="${python} ${run_train} LearningRate.base_lr=0.0001 log_iter=1 ${set_use_gpu} ${set_save_model} ${set_epoch} ${set_pretrain} ${set_batchsize} ${set_filename} ${set_train_params1} ${set_autocast}" + cmd="${python} ${run_train} LearningRate.base_lr=0.0001 log_iter=1 ${set_use_gpu} ${set_save_model} ${set_epoch} ${set_pretrain} ${set_batchsize} ${set_filename} ${set_shuffle} ${set_amp_level} ${set_enable_ce} ${set_autocast} ${set_train_params1}" elif [ ${#ips} -le 15 ];then # train with multi-gpu - cmd="${python} -m paddle.distributed.launch --gpus=${gpu} ${run_train} log_iter=1 ${set_use_gpu} ${set_save_model} ${set_epoch} ${set_pretrain} ${set_batchsize} ${set_filename} ${set_train_params1} ${set_autocast}" + cmd="${python} -m paddle.distributed.launch --gpus=${gpu} ${run_train} log_iter=1 ${set_use_gpu} ${set_save_model} ${set_epoch} ${set_pretrain} ${set_batchsize} ${set_filename} ${set_shuffle} ${set_amp_level} ${set_enable_ce} ${set_autocast} ${set_train_params1}" else # train with multi-machine IFS="," ips_array=(${ips}) @@ -289,7 +297,7 @@ else save_log="${LOG_PATH}/${trainer}_gpus_${gpu}_autocast_${autocast}_nodes_${nodes}" IFS="|" set_save_model=$(func_set_params "${save_model_key}" "${save_log}") - cmd="${python} -m paddle.distributed.launch --ips=${ips} --gpus=${gpu} ${run_train} log_iter=1 ${set_use_gpu} ${set_save_model} ${set_epoch} ${set_pretrain} ${set_batchsize} ${set_filename} ${set_train_params1} ${set_autocast}" + cmd="${python} -m paddle.distributed.launch --ips=${ips} --gpus=${gpu} ${run_train} log_iter=1 ${set_use_gpu} ${set_save_model} ${set_epoch} ${set_pretrain} ${set_batchsize} ${set_filename} ${set_shuffle} ${set_amp_level} ${set_enable_ce} ${set_autocast} ${set_train_params1}" fi # run train train_log_path="${LOG_PATH}/${trainer}_gpus_${gpu}_autocast_${autocast}_nodes_${nodes}.log" -- GitLab