From e039650ef7fefc2acad74eb9c1b945b619fa7594 Mon Sep 17 00:00:00 2001 From: LDOUBLEV Date: Tue, 8 Feb 2022 02:06:45 +0000 Subject: [PATCH] add benchmark_train.sh v2 --- test_tipc/benchmark_trainv2.sh | 286 ++++++++++-------- .../det_mv3_db_v2/train_infer_python.txt | 9 +- tools/program.py | 2 +- 3 files changed, 165 insertions(+), 132 deletions(-) diff --git a/test_tipc/benchmark_trainv2.sh b/test_tipc/benchmark_trainv2.sh index 27518a49..e1050e62 100644 --- a/test_tipc/benchmark_trainv2.sh +++ b/test_tipc/benchmark_trainv2.sh @@ -1,6 +1,14 @@ #!/bin/bash source test_tipc/common_func.sh +# set env +python=python3.7 +export model_branch=`git symbolic-ref HEAD 2>/dev/null | cut -d"/" -f 3` +export model_commit=$(git log|head -n1|awk '{print $2}') +export str_tmp=$(echo `pip list|grep paddlepaddle-gpu|awk -F ' ' '{print $2}'`) +export frame_version=${str_tmp%%.post*} +export frame_commit=$(echo `${python} -c "import paddle;print(paddle.version.commit)"`) + # run benchmark sh # Usage: # bash run_benchmark_train.sh config.txt params @@ -55,30 +63,15 @@ function get_repo_name(){ } FILENAME=$1 +cp FILENAME as new FILENAME +new_filename="./test_tipc/benchmark_train.txt" +cmd=`yes|cp $FILENAME $new_filename` +FILENAME=$new_filename # MODE be one of ['benchmark_train'] MODE=$2 -params=$3 +PARAMS=$3 # bash test_tipc/benchmark_train.sh test_tipc/configs/det_mv3_db_v2.0/train_benchmark.txt benchmark_train dynamic_bs8_null_SingleP_DP_N1C1 -IFS="\n" - -# parser params from input: modeltype_bs${bs_item}_${fp_item}_${run_process_type}_${run_mode}_${device_num} -IFS="_" -params_list=(${params}) -model_type=${params_list[0]} -batch_size=${params_list[1]} -batch_size=`echo ${batch_size} | tr -cd "[0-9]" ` -precision=${params_list[2]} -run_process_type=${params_list[3]} -run_mode=${params_list[4]} -device_num=${params_list[5]} -device_num_copy=$device_num -IFS=";" - - -# sed batchsize and precision -func_sed_params "$FILENAME" "6" "$precision" -func_sed_params "$FILENAME" "9" "$batch_size" - +IFS=$'\n' # parser params from train_benchmark.txt dataline=`cat $FILENAME` # parser params @@ -87,24 +80,22 @@ lines=(${dataline}) model_name=$(func_parser_value "${lines[1]}") # 获取benchmark_params所在的行数 -line_num=`grep -n "benchmark_params" $FILENAME | cut -d ":" -f 1` +line_num=`grep -n "train_benchmark_params" $FILENAME | cut -d ":" -f 1` # for train log parser +batch_size=$(func_parser_value "${lines[line_num]}") +line_num=`expr $line_num + 1` +fp_items=$(func_parser_value "${lines[line_num]}") line_num=`expr $line_num + 1` +epoch=$(func_parser_value "${lines[line_num]}") +line_num=`expr $line_num + 1` profile_option_key=$(func_parser_key "${lines[line_num]}") profile_option_params=$(func_parser_value "${lines[line_num]}") profile_option="${profile_option_key}:${profile_option_params}" line_num=`expr $line_num + 1` flags_value=$(func_parser_value "${lines[line_num]}") - -gpu_id=$(set_gpu_id $device_num) -repo_name=$(get_repo_name ) - -SAVE_LOG=${BENCHMARK_LOG_DIR:-$(pwd)} # */benchmark_log -status_log="${SAVE_LOG}/benchmark_log/results.log" - -# set export +# set flags IFS=";" flags_list=(${flags_value}) for _flag in ${flags_list[*]}; do @@ -112,112 +103,151 @@ for _flag in ${flags_list[*]}; do eval $cmd done -if [ ${precision} = "null" ];then - precision="fp32" -fi - -# set env -python=python -export model_branch=`git symbolic-ref HEAD 2>/dev/null | cut -d"/" -f 3` -export model_commit=$(git log|head -n1|awk '{print $2}') -export str_tmp=$(echo `pip list|grep paddlepaddle-gpu|awk -F ' ' '{print $2}'`) -export frame_version=${str_tmp%%.post*} -export frame_commit=$(echo `${python} -c "import paddle;print(paddle.version.commit)"`) +# set log_name +repo_name=$(get_repo_name ) +SAVE_LOG=${BENCHMARK_LOG_DIR:-$(pwd)} # */benchmark_log +mkdir -p "${SAVE_LOG}/benchmark_log/" +status_log="${SAVE_LOG}/benchmark_log/results.log" # set eval and export as null # line eval_py: 24 # line export_py: 30 func_sed_params "$FILENAME" "24" "null" func_sed_params "$FILENAME" "30" "null" -func_sed_params "$FILENAME" "3" "python" - - -if [ ${#gpu_id} -le 1 ];then - log_path="$SAVE_LOG/profiling_log" - mkdir -p $log_path - log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}_${device_num}_profiling" - func_sed_params "$FILENAME" "4" "0" # sed used gpu_id - # set profile_option params +func_sed_params "$FILENAME" "3" "$python" + +# if params +if [ ! -n "$PARAMS" ] ;then + # PARAMS input is not a word. + IFS="|" + batch_size_list=(${batch_size}) + fp_items_list=(${fp_items}) + device_num_list=(N1C4) + run_mode="DP" + echo "batchsize list: $batch_size_list ${batch_size_list[1]}" + echo "fp_item_lists: $fp_items_list ${fp_items_list[1]}" +else + # parser params from input: modeltype_bs${bs_item}_${fp_item}_${run_process_type}_${run_mode}_${device_num} + IFS="_" + params_list=(${PARAMS}) + model_type=${params_list[0]} + batch_size=${params_list[1]} + batch_size=`echo ${batch_size} | tr -cd "[0-9]" ` + precision=${params_list[2]} + run_process_type=${params_list[3]} + run_mode=${params_list[4]} + device_num=${params_list[5]} IFS=";" - cmd="sed -i '13s/.*/${profile_option}/' '${FILENAME}'" - eval $cmd - # run test_train_inference_python.sh - cmd="bash test_tipc/test_train_inference_python.sh ${FILENAME} benchmark_train > ${log_path}/${log_name} 2>&1 " - echo $cmd - eval $cmd - eval "cat ${log_path}/${log_name}" - - # without profile - log_path="$SAVE_LOG/train_log" - speed_log_path="$SAVE_LOG/index" - mkdir -p $log_path - mkdir -p $speed_log_path - log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}_${device_num}_log" - speed_log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}_${device_num}_speed" - func_sed_params "$FILENAME" "13" "null" # sed profile_id as null - cmd="bash test_tipc/test_train_inference_python.sh ${FILENAME} benchmark_train > ${log_path}/${log_name} 2>&1 " - echo $cmd - job_bt=`date '+%Y%m%d%H%M%S'` - eval $cmd - job_et=`date '+%Y%m%d%H%M%S'` - export model_run_time=$((${job_et}-${job_bt})) - eval "cat ${log_path}/${log_name}" - - # parser log - _model_name="${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}" - cmd="${python} ${BENCHMARK_ROOT}/scripts/analysis.py --filename ${log_path}/${log_name} \ - --speed_log_file '${speed_log_path}/${speed_log_name}' \ - --model_name ${_model_name} \ - --base_batch_size ${batch_size} \ - --run_mode ${run_mode} \ - --run_process_type ${run_process_type} \ - --fp_item ${precision} \ - --keyword samples/s: \ - --skip_steps 2 \ - --device_num ${device_num} \ - --speed_unit images/s \ - --convergence_key loss: " - echo $cmd - eval $cmd - last_status=${PIPESTATUS[0]} - status_check $last_status "${cmd}" "${status_log}" + if [ ${precision} = "null" ];then + precision="fp32" + fi -else - unset_env=`unset CUDA_VISIBLE_DEVICES` - log_path="$SAVE_LOG/train_log" - speed_log_path="$SAVE_LOG/index" - mkdir -p $log_path - mkdir -p $speed_log_path - log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}_${device_num}_log" - speed_log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}_${device_num}_speed" - func_sed_params "$FILENAME" "4" "$gpu_id" # sed used gpu_id - func_sed_params "$FILENAME" "13" "null" # sed --profile_option as null - cmd="bash test_tipc/test_train_inference_python.sh ${FILENAME} benchmark_train > ${log_path}/${log_name} 2>&1 " - echo $cmd - job_bt=`date '+%Y%m%d%H%M%S'` - eval $cmd - job_et=`date '+%Y%m%d%H%M%S'` - export model_run_time=$((${job_et}-${job_bt})) - eval "cat ${log_path}/${log_name}" - # parser log - _model_name="${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}" - - cmd="${python} ${BENCHMARK_ROOT}/scripts/analysis.py --filename ${log_path}/${log_name} \ - --speed_log_file '${speed_log_path}/${speed_log_name}' \ - --model_name ${_model_name} \ - --base_batch_size ${batch_size} \ - --run_mode ${run_mode} \ - --run_process_type ${run_process_type} \ - --fp_item ${precision} \ - --keyword samples/s: \ - --skip_steps 2 \ - --device_num ${device_num} \ - --speed_unit images/s \ - --convergence_key loss: " - echo $cmd - eval $cmd - last_status=${PIPESTATUS[0]} - status_check $last_status "${cmd}" "${status_log}" + fp_items_list=($precision) + batch_size_list=($batch_size) + device_num_list=($device_num) fi +IFS="|" +for batch_size in ${batch_size_list[*]}; do + for precision in ${fp_items_list[*]}; do + for device_num in ${device_num_list[*]}; do + echo "for $batch_size $precision $device_num $epoch" + # sed batchsize and precision + func_sed_params "$FILENAME" "6" "$precision" + func_sed_params "$FILENAME" "9" "$MODE=$batch_size" + func_sed_params "$FILENAME" "7" "$MODE=$epoch" + gpu_id=$(set_gpu_id $device_num) + + if [ ${#gpu_id} -le 1 ];then + run_process_type="SingleP" + log_path="$SAVE_LOG/profiling_log" + mkdir -p $log_path + log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}_${device_num}_profiling" + func_sed_params "$FILENAME" "4" "0" # sed used gpu_id + # set profile_option params + echo "profile_option: ${profile_option}" + tmp=`sed -i "13s/.*/${profile_option}/" "${FILENAME}"` + + # run test_train_inference_python.sh + cmd="bash test_tipc/test_train_inference_python.sh ${FILENAME} benchmark_train > ${log_path}/${log_name} 2>&1 " + echo $cmd + eval $cmd + eval "cat ${log_path}/${log_name}" + + # without profile + log_path="$SAVE_LOG/train_log" + speed_log_path="$SAVE_LOG/index" + mkdir -p $log_path + mkdir -p $speed_log_path + log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}_${device_num}_log" + speed_log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}_${device_num}_speed" + func_sed_params "$FILENAME" "13" "null" # sed profile_id as null + cmd="bash test_tipc/test_train_inference_python.sh ${FILENAME} benchmark_train > ${log_path}/${log_name} 2>&1 " + echo $cmd + job_bt=`date '+%Y%m%d%H%M%S'` + eval $cmd + job_et=`date '+%Y%m%d%H%M%S'` + export model_run_time=$((${job_et}-${job_bt})) + eval "cat ${log_path}/${log_name}" + + # parser log + _model_name="${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}" + cmd="${python} ${BENCHMARK_ROOT}/scripts/analysis.py --filename ${log_path}/${log_name} \ + --speed_log_file '${speed_log_path}/${speed_log_name}' \ + --model_name ${_model_name} \ + --base_batch_size ${batch_size} \ + --run_mode ${run_mode} \ + --run_process_type ${run_process_type} \ + --fp_item ${precision} \ + --keyword ips: \ + --skip_steps 2 \ + --device_num ${device_num} \ + --speed_unit samples/s \ + --convergence_key loss: " + echo $cmd + eval $cmd + last_status=${PIPESTATUS[0]} + status_check $last_status "${cmd}" "${status_log}" + else + IFS=";" + unset_env=`unset CUDA_VISIBLE_DEVICES` + run_process_type="MultiP" + log_path="$SAVE_LOG/train_log" + speed_log_path="$SAVE_LOG/index" + mkdir -p $log_path + mkdir -p $speed_log_path + log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}_${device_num}_log" + speed_log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}_${device_num}_speed" + func_sed_params "$FILENAME" "4" "$gpu_id" # sed used gpu_id + func_sed_params "$FILENAME" "13" "null" # sed --profile_option as null + cmd="bash test_tipc/test_train_inference_python.sh ${FILENAME} benchmark_train > ${log_path}/${log_name} 2>&1 " + echo $cmd + job_bt=`date '+%Y%m%d%H%M%S'` + eval $cmd + job_et=`date '+%Y%m%d%H%M%S'` + export model_run_time=$((${job_et}-${job_bt})) + eval "cat ${log_path}/${log_name}" + # parser log + _model_name="${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}" + + cmd="${python} ${BENCHMARK_ROOT}/scripts/analysis.py --filename ${log_path}/${log_name} \ + --speed_log_file '${speed_log_path}/${speed_log_name}' \ + --model_name ${_model_name} \ + --base_batch_size ${batch_size} \ + --run_mode ${run_mode} \ + --run_process_type ${run_process_type} \ + --fp_item ${precision} \ + --keyword ips: \ + --skip_steps 2 \ + --device_num ${device_num} \ + --speed_unit images/s \ + --convergence_key loss: " + echo $cmd + eval $cmd + last_status=${PIPESTATUS[0]} + status_check $last_status "${cmd}" "${status_log}" + fi + done + done +done \ No newline at end of file diff --git a/test_tipc/configs/det_mv3_db_v2/train_infer_python.txt b/test_tipc/configs/det_mv3_db_v2/train_infer_python.txt index 6f510a4b..aba4d7fd 100644 --- a/test_tipc/configs/det_mv3_db_v2/train_infer_python.txt +++ b/test_tipc/configs/det_mv3_db_v2/train_infer_python.txt @@ -4,9 +4,9 @@ python:python3.7 gpu_list:0|0,1 Global.use_gpu:True|True Global.auto_cast:null -Global.epoch_num:lite_train_lite_infer=1|whole_train_whole_infer=300|benchmark_train=2 +Global.epoch_num:lite_train_lite_infer=1|whole_train_whole_infer=300 Global.save_model_dir:./output/ -Train.loader.batch_size_per_card:lite_train_lite_infer=2|whole_train_whole_infer=4|benchmark_train=16 +Train.loader.batch_size_per_card:lite_train_lite_infer=2|whole_train_whole_infer=4 Global.pretrained_model:null train_model_name:latest train_infer_img_dir:./train_data/icdar2015/text_localization/ch4_test_images/ @@ -49,6 +49,9 @@ inference:tools/infer/predict_det.py null:null --benchmark:True null:null -===========================benchmark_params========================== +===========================train_benchmark_params========================== +batch_size:8|16 +fp_items:fp32|fp16 +epoch:2 --profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096 \ No newline at end of file diff --git a/tools/program.py b/tools/program.py index f5d36ce1..f253e581 100755 --- a/tools/program.py +++ b/tools/program.py @@ -283,7 +283,7 @@ def train(config, eta_sec_format = str(datetime.timedelta(seconds=int(eta_sec))) strs = 'epoch: [{}/{}], global_step: {}, {}, avg_reader_cost: ' \ '{:.5f} s, avg_batch_cost: {:.5f} s, avg_samples: {}, ' \ - 'samples/s: {:.5f}, eta: {}'.format( + 'ips: {:.5f} , eta: {}'.format( epoch, epoch_num, global_step, logs, train_reader_cost / print_batch_step, train_batch_cost / print_batch_step, -- GitLab