未验证 提交 14d82854 编写于 作者: W Walter 提交者: GitHub

Merge pull request #1679 from RainFrost1/benchmark

add benchmark for tipc
...@@ -48,7 +48,7 @@ def log_info(trainer, batch_size, epoch_id, iter_id): ...@@ -48,7 +48,7 @@ def log_info(trainer, batch_size, epoch_id, iter_id):
for key in trainer.time_info for key in trainer.time_info
]) ])
ips_msg = "ips: {:.5f} images/sec".format( ips_msg = "ips: {:.5f} samples/s".format(
batch_size / trainer.time_info["batch_cost"].avg) batch_size / trainer.time_info["batch_cost"].avg)
eta_sec = ((trainer.config["Global"]["epochs"] - epoch_id + 1 eta_sec = ((trainer.config["Global"]["epochs"] - epoch_id + 1
) * len(trainer.train_dataloader) - iter_id ) * len(trainer.train_dataloader) - iter_id
......
#!/bin/bash
source test_tipc/common_func.sh
# set env
python=python
export model_branch=`git symbolic-ref HEAD 2>/dev/null | cut -d"/" -f 3`
export model_commit=$(git log|head -n1|awk '{print $2}')
export str_tmp=$(echo `pip list|grep paddlepaddle-gpu|awk -F ' ' '{print $2}'`)
export frame_version=${str_tmp%%.post*}
export frame_commit=$(echo `${python} -c "import paddle;print(paddle.version.commit)"`)
# run benchmark sh
# Usage:
# bash run_benchmark_train.sh config.txt params
# or
# bash run_benchmark_train.sh config.txt
function func_parser_params(){
strs=$1
IFS="="
array=(${strs})
tmp=${array[1]}
echo ${tmp}
}
function func_sed_params(){
filename=$1
line=$2
param_value=$3
params=`sed -n "${line}p" $filename`
IFS=":"
array=(${params})
key=${array[0]}
value=${array[1]}
new_params="${key}:${param_value}"
IFS=";"
cmd="sed -i '${line}s/.*/${new_params}/' '${filename}'"
eval $cmd
}
function set_gpu_id(){
string=$1
_str=${string:1:6}
IFS="C"
arr=(${_str})
M=${arr[0]}
P=${arr[1]}
gn=`expr $P - 1`
gpu_num=`expr $gn / $M`
seq=`seq -s "," 0 $gpu_num`
echo $seq
}
function get_repo_name(){
IFS=";"
cur_dir=$(pwd)
IFS="/"
arr=(${cur_dir})
echo ${arr[-1]}
}
FILENAME=$1
# copy FILENAME as new
new_filename="./test_tipc/benchmark_train.txt"
cmd=`yes|cp $FILENAME $new_filename`
FILENAME=$new_filename
# MODE must be one of ['benchmark_train']
MODE=$2
PARAMS=$3
# bash test_tipc/benchmark_train.sh test_tipc/configs/det_mv3_db_v2_0/train_benchmark.txt benchmark_train dynamic_bs8_null_DP_N1C1
IFS=$'\n'
# parser params from train_benchmark.txt
sed -i 's/ -o DataLoader.Train.sampler.shuffle=False//g' $FILENAME
sed -i 's/ -o DataLoader.Train.loader.num_workers=0//g' $FILENAME
sed -i 's/-o DataLoader.Train.loader.use_shared_memory=False/-o Global.eval_during_train=False/g' $FILENAME
dataline=`cat $FILENAME`
# parser params
IFS=$'\n'
lines=(${dataline})
model_name=$(func_parser_value "${lines[1]}")
# 获取benchmark_params所在的行数
line_num=`grep -n "train_benchmark_params" $FILENAME | cut -d ":" -f 1`
# for train log parser
batch_size=$(func_parser_value "${lines[line_num]}")
line_num=`expr $line_num + 1`
fp_items=$(func_parser_value "${lines[line_num]}")
line_num=`expr $line_num + 1`
epoch=$(func_parser_value "${lines[line_num]}")
line_num=`expr $line_num + 1`
profile_option_key=$(func_parser_key "${lines[line_num]}")
profile_option_params=$(func_parser_value "${lines[line_num]}")
profile_option="${profile_option_key}:${profile_option_params}"
line_num=`expr $line_num + 1`
flags_value=$(func_parser_value "${lines[line_num]}")
# set flags
IFS=";"
flags_list=(${flags_value})
for _flag in ${flags_list[*]}; do
cmd="export ${_flag}"
eval $cmd
done
# set log_name
repo_name=$(get_repo_name )
SAVE_LOG=${BENCHMARK_LOG_DIR:-$(pwd)} # */benchmark_log
mkdir -p "${SAVE_LOG}/benchmark_log/"
status_log="${SAVE_LOG}/benchmark_log/results.log"
# The number of lines in which train params can be replaced.
line_python=3
line_gpuid=4
line_precision=6
line_epoch=7
line_batchsize=9
line_profile=13
line_eval_py=24
line_export_py=30
line_norm_train=16
func_sed_params "$FILENAME" "${line_eval_py}" "null"
func_sed_params "$FILENAME" "${line_export_py}" "null"
func_sed_params "$FILENAME" "${line_python}" "$python"
# if params
if [ ! -n "$PARAMS" ] ;then
# PARAMS input is not a word.
IFS="|"
batch_size_list=(${batch_size})
fp_items_list=(${fp_items})
device_num_list=(N1C4)
run_mode="DP"
else
# parser params from input: modeltype_bs${bs_item}_${fp_item}_${run_mode}_${device_num}
IFS="_"
params_list=(${PARAMS})
model_type=${params_list[0]}
batch_size=${params_list[1]}
batch_size=`echo ${batch_size} | tr -cd "[0-9]" `
precision=${params_list[2]}
# run_process_type=${params_list[3]}
run_mode=${params_list[3]}
device_num=${params_list[4]}
IFS=";"
if [ ${precision} = "null" ];then
precision="fp32"
fi
fp_items_list=($precision)
batch_size_list=($batch_size)
device_num_list=($device_num)
fi
IFS="|"
for batch_size in ${batch_size_list[*]}; do
for precision in ${fp_items_list[*]}; do
for device_num in ${device_num_list[*]}; do
# sed batchsize and precision
func_sed_params "$FILENAME" "${line_precision}" "$precision"
func_sed_params "$FILENAME" "${line_batchsize}" "$batch_size"
func_sed_params "$FILENAME" "${line_epoch}" "$epoch"
gpu_id=$(set_gpu_id $device_num)
if [ ${#gpu_id} -le 1 ];then
run_process_type="SingleP"
log_path="$SAVE_LOG/profiling_log"
mkdir -p $log_path
log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}_${device_num}_profiling"
func_sed_params "$FILENAME" "${line_gpuid}" "0" # sed used gpu_id
# set profile_option params
tmp=`sed -i "${line_profile}s/.*/${profile_option}/" "${FILENAME}"`
# run test_train_inference_python.sh
cmd="bash test_tipc/test_train_inference_python.sh ${FILENAME} benchmark_train > ${log_path}/${log_name} 2>&1 "
echo $cmd
eval $cmd
eval "cat ${log_path}/${log_name}"
# without profile
log_path="$SAVE_LOG/train_log"
speed_log_path="$SAVE_LOG/index"
mkdir -p $log_path
mkdir -p $speed_log_path
log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}_${device_num}_log"
speed_log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}_${device_num}_speed"
func_sed_params "$FILENAME" "${line_profile}" "null" # sed profile_id as null
cmd="bash test_tipc/test_train_inference_python.sh ${FILENAME} benchmark_train > ${log_path}/${log_name} 2>&1 "
echo $cmd
job_bt=`date '+%Y%m%d%H%M%S'`
eval $cmd
job_et=`date '+%Y%m%d%H%M%S'`
export model_run_time=$((${job_et}-${job_bt}))
eval "cat ${log_path}/${log_name}"
# parser log
_model_name="${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}"
cmd="${python} ${BENCHMARK_ROOT}/scripts/analysis.py --filename ${log_path}/${log_name} \
--speed_log_file '${speed_log_path}/${speed_log_name}' \
--model_name ${_model_name} \
--base_batch_size ${batch_size} \
--run_mode ${run_mode} \
--run_process_type ${run_process_type} \
--fp_item ${precision} \
--keyword ips: \
--skip_steps 2 \
--device_num ${device_num} \
--speed_unit samples/s \
--convergence_key loss: "
echo $cmd
eval $cmd
last_status=${PIPESTATUS[0]}
status_check $last_status "${cmd}" "${status_log}"
else
IFS=";"
unset_env=`unset CUDA_VISIBLE_DEVICES`
run_process_type="MultiP"
log_path="$SAVE_LOG/train_log"
speed_log_path="$SAVE_LOG/index"
mkdir -p $log_path
mkdir -p $speed_log_path
log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}_${device_num}_log"
speed_log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}_${device_num}_speed"
func_sed_params "$FILENAME" "${line_gpuid}" "$gpu_id" # sed used gpu_id
func_sed_params "$FILENAME" "${line_profile}" "null" # sed --profile_option as null
cmd="bash test_tipc/test_train_inference_python.sh ${FILENAME} benchmark_train > ${log_path}/${log_name} 2>&1 "
echo $cmd
job_bt=`date '+%Y%m%d%H%M%S'`
eval $cmd
job_et=`date '+%Y%m%d%H%M%S'`
export model_run_time=$((${job_et}-${job_bt}))
eval "cat ${log_path}/${log_name}"
# parser log
_model_name="${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}"
cmd="${python} ${BENCHMARK_ROOT}/scripts/analysis.py --filename ${log_path}/${log_name} \
--speed_log_file '${speed_log_path}/${speed_log_name}' \
--model_name ${_model_name} \
--base_batch_size ${batch_size} \
--run_mode ${run_mode} \
--run_process_type ${run_process_type} \
--fp_item ${precision} \
--keyword ips: \
--skip_steps 2 \
--device_num ${device_num} \
--speed_unit images/s \
--convergence_key loss: "
echo $cmd
eval $cmd
last_status=${PIPESTATUS[0]}
status_check $last_status "${cmd}" "${status_log}"
fi
done
done
done
cd train_log
mkdir train_log
mv Paddle* train_log/
cd ..
mv index train_log/
mv profiling_log train_log/
...@@ -50,3 +50,9 @@ inference:python/predict_cls.py -c configs/inference_cls.yaml ...@@ -50,3 +50,9 @@ inference:python/predict_cls.py -c configs/inference_cls.yaml
-o Global.benchmark:True -o Global.benchmark:True
null:null null:null
null:null null:null
===========================train_benchmark_params==========================
batch_size:64|128
fp_items:fp32
epoch:1
--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096
...@@ -50,3 +50,9 @@ inference:python/predict_cls.py -c configs/inference_cls.yaml ...@@ -50,3 +50,9 @@ inference:python/predict_cls.py -c configs/inference_cls.yaml
-o Global.benchmark:True -o Global.benchmark:True
null:null null:null
null:null null:null
===========================train_benchmark_params==========================
batch_size:64|128
fp_items:fp32
epoch:1
--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096
...@@ -50,3 +50,9 @@ inference:python/predict_cls.py -c configs/inference_cls.yaml ...@@ -50,3 +50,9 @@ inference:python/predict_cls.py -c configs/inference_cls.yaml
-o Global.benchmark:True -o Global.benchmark:True
null:null null:null
null:null null:null
===========================train_benchmark_params==========================
batch_size:64|128
fp_items:fp32
epoch:1
--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096
...@@ -50,3 +50,9 @@ inference:python/predict_cls.py -c configs/inference_cls.yaml ...@@ -50,3 +50,9 @@ inference:python/predict_cls.py -c configs/inference_cls.yaml
-o Global.benchmark:True -o Global.benchmark:True
null:null null:null
null:null null:null
===========================train_benchmark_params==========================
batch_size:256|640
fp_items:fp32
epoch:1
--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096
...@@ -50,3 +50,9 @@ inference:python/predict_cls.py -c configs/inference_cls.yaml ...@@ -50,3 +50,9 @@ inference:python/predict_cls.py -c configs/inference_cls.yaml
-o Global.benchmark:True -o Global.benchmark:True
null:null null:null
null:null null:null
===========================train_benchmark_params==========================
batch_size:32
fp_items:fp32
epoch:1
--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096
...@@ -50,3 +50,9 @@ inference:python/predict_cls.py -c configs/inference_cls.yaml ...@@ -50,3 +50,9 @@ inference:python/predict_cls.py -c configs/inference_cls.yaml
-o Global.benchmark:True -o Global.benchmark:True
null:null null:null
null:null null:null
===========================train_benchmark_params==========================
batch_size:128
fp_items:fp32
epoch:1
--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096
...@@ -50,3 +50,9 @@ inference:python/predict_cls.py -c configs/inference_cls.yaml ...@@ -50,3 +50,9 @@ inference:python/predict_cls.py -c configs/inference_cls.yaml
-o Global.benchmark:True -o Global.benchmark:True
null:null null:null
null:null null:null
===========================train_benchmark_params==========================
batch_size:256|1536
fp_items:fp32
epoch:1
--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096
...@@ -50,3 +50,9 @@ inference:python/predict_cls.py -c configs/inference_cls.yaml ...@@ -50,3 +50,9 @@ inference:python/predict_cls.py -c configs/inference_cls.yaml
-o Global.benchmark:True -o Global.benchmark:True
null:null null:null
null:null null:null
===========================train_benchmark_params==========================
batch_size:64|104
fp_items:fp32
epoch:1
--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096
...@@ -50,3 +50,9 @@ inference:python/predict_cls.py -c configs/inference_cls.yaml ...@@ -50,3 +50,9 @@ inference:python/predict_cls.py -c configs/inference_cls.yaml
-o Global.benchmark:True -o Global.benchmark:True
null:null null:null
null:null null:null
===========================train_benchmark_params==========================
batch_size:64|176
fp_items:fp32
epoch:1
--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096
# TIPC Linux端Benchmark测试文档
该文档为Benchmark测试说明,Benchmark预测功能测试的主程序为`benchmark_train.sh`,用于验证监控模型训练的性能。
# 1. 测试流程
## 1.1 准备数据和环境安装
运行`test_tipc/prepare.sh`,完成训练数据准备和安装环境流程。
```shell
# 运行格式:bash test_tipc/prepare.sh train_benchmark.txt mode
bash test_tipc/prepare.sh test_tipc/configs/MobileNetV2/MobileNetV2_train_infer_python.txt benchmark_train
```
## 1.2 功能测试
执行`test_tipc/benchmark_train.sh`,完成模型训练和日志解析
```shell
# 运行格式:bash test_tipc/benchmark_train.sh train_benchmark.txt mode
bash test_tipc/benchmark_train.sh test_tipc/config/MobileNetV2/MobileNetV2_train_infer_python.txt benchmark_train
```
`test_tipc/benchmark_train.sh`支持根据传入的第三个参数实现只运行某一个训练配置,如下:
```shell
# 运行格式:bash test_tipc/benchmark_train.sh train_benchmark.txt mode params
bash test_tipc/benchmark_train.sh test_tipc/configs/MobileNetV2/MobileNetV2_train_infer_python.txt benchmark_train dynamic_bs8_fp32_DP_N1C1
```
dynamic_bs8_fp32_DP_N1C1为test_tipc/benchmark_train.sh传入的参数,格式如下:
`${modeltype}_${batch_size}_${fp_item}_${run_mode}_${device_num}`
包含的信息有:模型类型、batchsize大小、训练精度如fp32,fp16等、分布式运行模式以及分布式训练使用的机器信息如单机单卡(N1C1)。
## 2. 日志输出
运行后将保存模型的训练日志和解析日志,使用 `test_tipc/configs/MobileNetV2/MobileNetV2_train_infer_python.txt` 参数文件的训练日志解析结果是:
```
{"model_branch": "dygaph", "model_commit": "7c39a1996b19087737c05d883fd346d2f39dbcc0", "model_name": "cls_MobileNetV2_bs8_fp32_SingleP_DP", "batch_size": 8, "fp_item": "fp32", "run_process_type": "SingleP", "run_mode": "DP", "convergence_value": "5.413110", "convergence_key": "loss:", "ips": 19.333, "speed_unit": "samples/s", "device_num": "N1C1", "model_run_time": "0", "frame_commit": "8cc09552473b842c651ead3b9848d41827a3dbab", "frame_version": "0.0.0"}
```
训练日志和日志解析结果保存在benchmark_log目录下,文件组织格式如下:
```
train_log/
├── index
│   ├── PaddleClas_cls_MobileNetV2_bs8_fp32_SingleP_DP_N1C1_speed
│   └── PaddleClas_cls_MobileNetV2_bs8_fp32_SingleP_DP_N1C4_speed
├── profiling_log
│   └── PaddleClas_cls_MobileNetV2_bs8_fp32_SingleP_DP_N1C1_profiling
└── train_log
├── PaddleClas_cls_MobileNetV2_bs8_fp32_SingleP_DP_N1C1_log
└── PaddleClas_cls_MobileNetV2_bs8_fp32_SingleP_DP_N1C4_log
```
...@@ -182,3 +182,15 @@ if [ ${MODE} = "paddle2onnx_infer" ];then ...@@ -182,3 +182,15 @@ if [ ${MODE} = "paddle2onnx_infer" ];then
wget -nc https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/ResNet50_vd_infer.tar && tar xf ResNet50_vd_infer.tar wget -nc https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/ResNet50_vd_infer.tar && tar xf ResNet50_vd_infer.tar
cd ../../ cd ../../
fi fi
if [ ${MODE} = "benchmark_train" ];then
pip install -r requirements.txt
cd dataset
rm -rf ILSVRC2012
wget -nc https://paddle-imagenet-models-name.bj.bcebos.com/data/ImageNet1k/ILSVRC2012_val.tar
tar xf ILSVRC2012_val.tar
ln -s ILSVRC2012_val ILSVRC2012
cd ILSVRC2012
ln -s val_list.txt train_list.txt
cd ../../
fi
...@@ -90,6 +90,10 @@ infer_value1=$(func_parser_value "${lines[50]}") ...@@ -90,6 +90,10 @@ infer_value1=$(func_parser_value "${lines[50]}")
if [ ! $epoch_num ]; then if [ ! $epoch_num ]; then
epoch_num=2 epoch_num=2
fi fi
if [ $MODE = 'benchmark_train' ]; then
epoch_num=1
fi
LOG_PATH="./test_tipc/output" LOG_PATH="./test_tipc/output"
mkdir -p ${LOG_PATH} mkdir -p ${LOG_PATH}
status_log="${LOG_PATH}/results_python.log" status_log="${LOG_PATH}/results_python.log"
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册