From e039650ef7fefc2acad74eb9c1b945b619fa7594 Mon Sep 17 00:00:00 2001
From: LDOUBLEV <liuvv0203@outlook.com>
Date: Tue, 8 Feb 2022 02:06:45 +0000
Subject: [PATCH] add benchmark_train.sh v2

---
 test_tipc/benchmark_trainv2.sh                | 286 ++++++++++--------
 .../det_mv3_db_v2/train_infer_python.txt      |   9 +-
 tools/program.py                              |   2 +-
 3 files changed, 165 insertions(+), 132 deletions(-)

diff --git a/test_tipc/benchmark_trainv2.sh b/test_tipc/benchmark_trainv2.sh
index 27518a49..e1050e62 100644
--- a/test_tipc/benchmark_trainv2.sh
+++ b/test_tipc/benchmark_trainv2.sh
@@ -1,6 +1,14 @@
 #!/bin/bash
 source test_tipc/common_func.sh
 
+# set env
+python=python3.7
+export model_branch=`git symbolic-ref HEAD 2>/dev/null | cut -d"/" -f 3`
+export model_commit=$(git log|head -n1|awk '{print $2}') 
+export str_tmp=$(echo `pip list|grep paddlepaddle-gpu|awk -F ' ' '{print $2}'`)
+export frame_version=${str_tmp%%.post*}
+export frame_commit=$(echo `${python} -c "import paddle;print(paddle.version.commit)"`)
+
 # run benchmark sh 
 # Usage:
 # bash run_benchmark_train.sh config.txt params
@@ -55,30 +63,15 @@ function get_repo_name(){
 }
 
 FILENAME=$1
+cp FILENAME as new FILENAME
+new_filename="./test_tipc/benchmark_train.txt"
+cmd=`yes|cp $FILENAME $new_filename`
+FILENAME=$new_filename
 # MODE be one of ['benchmark_train']
 MODE=$2
-params=$3
+PARAMS=$3
 # bash test_tipc/benchmark_train.sh test_tipc/configs/det_mv3_db_v2.0/train_benchmark.txt  benchmark_train dynamic_bs8_null_SingleP_DP_N1C1
-IFS="\n"
-
-# parser params from input: modeltype_bs${bs_item}_${fp_item}_${run_process_type}_${run_mode}_${device_num}
-IFS="_"
-params_list=(${params})
-model_type=${params_list[0]}
-batch_size=${params_list[1]}
-batch_size=`echo  ${batch_size} | tr -cd "[0-9]" `
-precision=${params_list[2]}
-run_process_type=${params_list[3]}
-run_mode=${params_list[4]}
-device_num=${params_list[5]}
-device_num_copy=$device_num
-IFS=";"
-
-
-# sed batchsize and precision
-func_sed_params "$FILENAME" "6" "$precision"
-func_sed_params "$FILENAME" "9" "$batch_size"
-
+IFS=$'\n'
 # parser params from train_benchmark.txt
 dataline=`cat $FILENAME`
 # parser params
@@ -87,24 +80,22 @@ lines=(${dataline})
 model_name=$(func_parser_value "${lines[1]}")
 
 # 获取benchmark_params所在的行数
-line_num=`grep -n "benchmark_params" $FILENAME  | cut -d ":" -f 1`
+line_num=`grep -n "train_benchmark_params" $FILENAME  | cut -d ":" -f 1`
 # for train log parser
+batch_size=$(func_parser_value "${lines[line_num]}")
+line_num=`expr $line_num + 1`
+fp_items=$(func_parser_value "${lines[line_num]}")
 line_num=`expr $line_num + 1`
+epoch=$(func_parser_value "${lines[line_num]}")
 
+line_num=`expr $line_num + 1`
 profile_option_key=$(func_parser_key "${lines[line_num]}")
 profile_option_params=$(func_parser_value "${lines[line_num]}")
 profile_option="${profile_option_key}:${profile_option_params}"
 
 line_num=`expr $line_num + 1`
 flags_value=$(func_parser_value "${lines[line_num]}")
-
-gpu_id=$(set_gpu_id $device_num)
-repo_name=$(get_repo_name )
-
-SAVE_LOG=${BENCHMARK_LOG_DIR:-$(pwd)}   # */benchmark_log
-status_log="${SAVE_LOG}/benchmark_log/results.log"
-
-# set export 
+# set flags
 IFS=";"
 flags_list=(${flags_value})
 for _flag in ${flags_list[*]}; do
@@ -112,112 +103,151 @@ for _flag in ${flags_list[*]}; do
     eval $cmd
 done
 
-if [ ${precision} = "null" ];then
-    precision="fp32"
-fi
-
-# set env
-python=python
-export model_branch=`git symbolic-ref HEAD 2>/dev/null | cut -d"/" -f 3`
-export model_commit=$(git log|head -n1|awk '{print $2}') 
-export str_tmp=$(echo `pip list|grep paddlepaddle-gpu|awk -F ' ' '{print $2}'`)
-export frame_version=${str_tmp%%.post*}
-export frame_commit=$(echo `${python} -c "import paddle;print(paddle.version.commit)"`)
+# set log_name
+repo_name=$(get_repo_name )
+SAVE_LOG=${BENCHMARK_LOG_DIR:-$(pwd)}   # */benchmark_log
+mkdir -p "${SAVE_LOG}/benchmark_log/"
+status_log="${SAVE_LOG}/benchmark_log/results.log"
 
 # set eval and export as null
 # line eval_py: 24
 # line export_py: 30
 func_sed_params "$FILENAME" "24" "null"
 func_sed_params "$FILENAME" "30" "null"
-func_sed_params "$FILENAME" "3"  "python"
-
-
-if [ ${#gpu_id} -le 1 ];then
-    log_path="$SAVE_LOG/profiling_log"
-    mkdir -p $log_path
-    log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}_${device_num}_profiling"
-    func_sed_params "$FILENAME" "4" "0"  # sed used gpu_id 
-    # set profile_option params
+func_sed_params "$FILENAME" "3"  "$python"
+
+# if params
+if  [ ! -n "$PARAMS" ] ;then
+    # PARAMS input is not a word.
+    IFS="|"
+    batch_size_list=(${batch_size})
+    fp_items_list=(${fp_items})
+    device_num_list=(N1C4)
+    run_mode="DP"
+    echo "batchsize list: $batch_size_list ${batch_size_list[1]}"
+    echo "fp_item_lists: $fp_items_list ${fp_items_list[1]}"
+else
+    # parser params from input: modeltype_bs${bs_item}_${fp_item}_${run_process_type}_${run_mode}_${device_num}
+    IFS="_"
+    params_list=(${PARAMS})
+    model_type=${params_list[0]}
+    batch_size=${params_list[1]}
+    batch_size=`echo  ${batch_size} | tr -cd "[0-9]" `
+    precision=${params_list[2]}
+    run_process_type=${params_list[3]}
+    run_mode=${params_list[4]}
+    device_num=${params_list[5]}
     IFS=";"
-    cmd="sed -i '13s/.*/${profile_option}/' '${FILENAME}'"
-    eval $cmd
 
-    # run test_train_inference_python.sh
-    cmd="bash test_tipc/test_train_inference_python.sh ${FILENAME} benchmark_train > ${log_path}/${log_name} 2>&1 "
-    echo $cmd
-    eval $cmd
-    eval "cat ${log_path}/${log_name}"
-
-    # without profile
-    log_path="$SAVE_LOG/train_log"
-    speed_log_path="$SAVE_LOG/index"
-    mkdir -p $log_path
-    mkdir -p $speed_log_path
-    log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}_${device_num}_log"
-    speed_log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}_${device_num}_speed"
-    func_sed_params "$FILENAME" "13" "null"  # sed profile_id as null
-    cmd="bash test_tipc/test_train_inference_python.sh ${FILENAME} benchmark_train > ${log_path}/${log_name} 2>&1 "
-    echo $cmd
-    job_bt=`date '+%Y%m%d%H%M%S'`
-    eval $cmd
-    job_et=`date '+%Y%m%d%H%M%S'`
-    export model_run_time=$((${job_et}-${job_bt}))
-    eval "cat ${log_path}/${log_name}"
-
-    # parser log
-    _model_name="${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}"
-    cmd="${python} ${BENCHMARK_ROOT}/scripts/analysis.py --filename ${log_path}/${log_name} \
-            --speed_log_file '${speed_log_path}/${speed_log_name}' \
-            --model_name ${_model_name} \
-            --base_batch_size ${batch_size} \
-            --run_mode ${run_mode} \
-            --run_process_type ${run_process_type} \
-            --fp_item ${precision} \
-            --keyword samples/s: \
-            --skip_steps 2 \
-            --device_num ${device_num} \
-            --speed_unit images/s \
-            --convergence_key loss: "
-    echo $cmd
-    eval $cmd
-    last_status=${PIPESTATUS[0]}
-    status_check $last_status "${cmd}" "${status_log}"
+    if [ ${precision} = "null" ];then
+        precision="fp32"
+    fi
 
-else
-    unset_env=`unset CUDA_VISIBLE_DEVICES`
-    log_path="$SAVE_LOG/train_log"
-    speed_log_path="$SAVE_LOG/index"
-    mkdir -p $log_path
-    mkdir -p $speed_log_path
-    log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}_${device_num}_log"
-    speed_log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}_${device_num}_speed"
-    func_sed_params "$FILENAME" "4" "$gpu_id"  # sed used gpu_id 
-    func_sed_params "$FILENAME" "13" "null"  # sed --profile_option as null
-    cmd="bash test_tipc/test_train_inference_python.sh ${FILENAME} benchmark_train > ${log_path}/${log_name} 2>&1 "
-    echo $cmd
-    job_bt=`date '+%Y%m%d%H%M%S'`
-    eval $cmd
-    job_et=`date '+%Y%m%d%H%M%S'`
-    export model_run_time=$((${job_et}-${job_bt}))
-    eval "cat ${log_path}/${log_name}"
-    # parser log
-    _model_name="${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}"
-    
-    cmd="${python} ${BENCHMARK_ROOT}/scripts/analysis.py --filename ${log_path}/${log_name} \
-            --speed_log_file '${speed_log_path}/${speed_log_name}' \
-            --model_name ${_model_name} \
-            --base_batch_size ${batch_size} \
-            --run_mode ${run_mode} \
-            --run_process_type ${run_process_type} \
-            --fp_item ${precision} \
-            --keyword samples/s: \
-            --skip_steps 2 \
-            --device_num ${device_num} \
-            --speed_unit images/s \
-            --convergence_key loss: "
-    echo $cmd
-    eval $cmd
-    last_status=${PIPESTATUS[0]}
-    status_check $last_status "${cmd}" "${status_log}"
+    fp_items_list=($precision)
+    batch_size_list=($batch_size)
+    device_num_list=($device_num)
 fi
 
+IFS="|"
+for batch_size in ${batch_size_list[*]}; do 
+    for precision in ${fp_items_list[*]}; do
+        for device_num in ${device_num_list[*]}; do
+            echo "for $batch_size $precision $device_num $epoch"
+            # sed batchsize and precision
+            func_sed_params "$FILENAME" "6" "$precision"
+            func_sed_params "$FILENAME" "9" "$MODE=$batch_size"
+            func_sed_params "$FILENAME" "7" "$MODE=$epoch"
+            gpu_id=$(set_gpu_id $device_num)
+
+            if [ ${#gpu_id} -le 1 ];then
+                run_process_type="SingleP"
+                log_path="$SAVE_LOG/profiling_log"
+                mkdir -p $log_path
+                log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}_${device_num}_profiling"
+                func_sed_params "$FILENAME" "4" "0"  # sed used gpu_id 
+                # set profile_option params
+                echo "profile_option: ${profile_option}"
+                tmp=`sed -i "13s/.*/${profile_option}/" "${FILENAME}"`
+
+                # run test_train_inference_python.sh
+                cmd="bash test_tipc/test_train_inference_python.sh ${FILENAME} benchmark_train > ${log_path}/${log_name} 2>&1 "
+                echo $cmd
+                eval $cmd
+                eval "cat ${log_path}/${log_name}"
+
+                # without profile
+                log_path="$SAVE_LOG/train_log"
+                speed_log_path="$SAVE_LOG/index"
+                mkdir -p $log_path
+                mkdir -p $speed_log_path
+                log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}_${device_num}_log"
+                speed_log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}_${device_num}_speed"
+                func_sed_params "$FILENAME" "13" "null"  # sed profile_id as null
+                cmd="bash test_tipc/test_train_inference_python.sh ${FILENAME} benchmark_train > ${log_path}/${log_name} 2>&1 "
+                echo $cmd
+                job_bt=`date '+%Y%m%d%H%M%S'`
+                eval $cmd
+                job_et=`date '+%Y%m%d%H%M%S'`
+                export model_run_time=$((${job_et}-${job_bt}))
+                eval "cat ${log_path}/${log_name}"
+
+                # parser log
+                _model_name="${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}"
+                cmd="${python} ${BENCHMARK_ROOT}/scripts/analysis.py --filename ${log_path}/${log_name} \
+                        --speed_log_file '${speed_log_path}/${speed_log_name}' \
+                        --model_name ${_model_name} \
+                        --base_batch_size ${batch_size} \
+                        --run_mode ${run_mode} \
+                        --run_process_type ${run_process_type} \
+                        --fp_item ${precision} \
+                        --keyword ips: \
+                        --skip_steps 2 \
+                        --device_num ${device_num} \
+                        --speed_unit samples/s \
+                        --convergence_key loss: "
+                echo $cmd
+                eval $cmd
+                last_status=${PIPESTATUS[0]}
+                status_check $last_status "${cmd}" "${status_log}"
+            else
+                IFS=";"
+                unset_env=`unset CUDA_VISIBLE_DEVICES`
+                run_process_type="MultiP"
+                log_path="$SAVE_LOG/train_log"
+                speed_log_path="$SAVE_LOG/index"
+                mkdir -p $log_path
+                mkdir -p $speed_log_path
+                log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}_${device_num}_log"
+                speed_log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}_${device_num}_speed"
+                func_sed_params "$FILENAME" "4" "$gpu_id"  # sed used gpu_id 
+                func_sed_params "$FILENAME" "13" "null"  # sed --profile_option as null
+                cmd="bash test_tipc/test_train_inference_python.sh ${FILENAME} benchmark_train > ${log_path}/${log_name} 2>&1 "
+                echo $cmd
+                job_bt=`date '+%Y%m%d%H%M%S'`
+                eval $cmd
+                job_et=`date '+%Y%m%d%H%M%S'`
+                export model_run_time=$((${job_et}-${job_bt}))
+                eval "cat ${log_path}/${log_name}"
+                # parser log
+                _model_name="${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}"
+                
+                cmd="${python} ${BENCHMARK_ROOT}/scripts/analysis.py --filename ${log_path}/${log_name} \
+                        --speed_log_file '${speed_log_path}/${speed_log_name}' \
+                        --model_name ${_model_name} \
+                        --base_batch_size ${batch_size} \
+                        --run_mode ${run_mode} \
+                        --run_process_type ${run_process_type} \
+                        --fp_item ${precision} \
+                        --keyword ips: \
+                        --skip_steps 2 \
+                        --device_num ${device_num} \
+                        --speed_unit images/s \
+                        --convergence_key loss: "
+                echo $cmd
+                eval $cmd
+                last_status=${PIPESTATUS[0]}
+                status_check $last_status "${cmd}" "${status_log}"
+            fi
+        done
+    done
+done
\ No newline at end of file
diff --git a/test_tipc/configs/det_mv3_db_v2/train_infer_python.txt b/test_tipc/configs/det_mv3_db_v2/train_infer_python.txt
index 6f510a4b..aba4d7fd 100644
--- a/test_tipc/configs/det_mv3_db_v2/train_infer_python.txt
+++ b/test_tipc/configs/det_mv3_db_v2/train_infer_python.txt
@@ -4,9 +4,9 @@ python:python3.7
 gpu_list:0|0,1
 Global.use_gpu:True|True
 Global.auto_cast:null
-Global.epoch_num:lite_train_lite_infer=1|whole_train_whole_infer=300|benchmark_train=2
+Global.epoch_num:lite_train_lite_infer=1|whole_train_whole_infer=300
 Global.save_model_dir:./output/
-Train.loader.batch_size_per_card:lite_train_lite_infer=2|whole_train_whole_infer=4|benchmark_train=16
+Train.loader.batch_size_per_card:lite_train_lite_infer=2|whole_train_whole_infer=4
 Global.pretrained_model:null
 train_model_name:latest
 train_infer_img_dir:./train_data/icdar2015/text_localization/ch4_test_images/
@@ -49,6 +49,9 @@ inference:tools/infer/predict_det.py
 null:null
 --benchmark:True
 null:null
-===========================benchmark_params==========================
+===========================train_benchmark_params==========================
+batch_size:8|16
+fp_items:fp32|fp16
+epoch:2
 --profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
 flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096
\ No newline at end of file
diff --git a/tools/program.py b/tools/program.py
index f5d36ce1..f253e581 100755
--- a/tools/program.py
+++ b/tools/program.py
@@ -283,7 +283,7 @@ def train(config,
                 eta_sec_format = str(datetime.timedelta(seconds=int(eta_sec)))
                 strs = 'epoch: [{}/{}], global_step: {}, {}, avg_reader_cost: ' \
                        '{:.5f} s, avg_batch_cost: {:.5f} s, avg_samples: {}, ' \
-                       'samples/s: {:.5f}, eta: {}'.format(
+                       'ips: {:.5f} , eta: {}'.format(
                     epoch, epoch_num, global_step, logs,
                     train_reader_cost / print_batch_step,
                     train_batch_cost / print_batch_step,
-- 
GitLab