未验证 提交 79b2287d 编写于 作者: L lzzyzlbb 提交者: GitHub

Add profile (#463)

* fix benchmark

* Add profile
上级 bc27b744
...@@ -42,6 +42,13 @@ nvidia-docker run --name test_paddlegan -i \ ...@@ -42,6 +42,13 @@ nvidia-docker run --name test_paddlegan -i \
${ImageName} /bin/bash -c "${run_cmd}" ${ImageName} /bin/bash -c "${run_cmd}"
``` ```
如果需要打开profile选项,可以直接替换`run_cmd`为:
```
run_cmd="set -xe;
cd /workspace ;
bash -x benchmark/run_all.sh on"
```
## 输出 ## 输出
执行完成后,在PaddleGAN目录会产出模型训练性能数据的文件,比如`esrgan_mp_bs32_fp32_8`等文件。 执行完成后,在PaddleGAN目录会产出模型训练性能数据的文件,比如`esrgan_mp_bs32_fp32_8`等文件。
...@@ -53,6 +53,7 @@ function parse_yaml { ...@@ -53,6 +53,7 @@ function parse_yaml {
} }
eval $(parse_yaml "benchmark/benchmark.yaml") eval $(parse_yaml "benchmark/benchmark.yaml")
profile=${1:-"off"}
for model_mode in ${model_mode_list[@]}; do for model_mode in ${model_mode_list[@]}; do
eval fp_item_list='$'"${model_mode}_fp_item" eval fp_item_list='$'"${model_mode}_fp_item"
...@@ -82,15 +83,15 @@ for model_mode in ${model_mode_list[@]}; do ...@@ -82,15 +83,15 @@ for model_mode in ${model_mode_list[@]}; do
do do
echo "index is speed, 1gpus, begin, ${model_name}" echo "index is speed, 1gpus, begin, ${model_name}"
run_mode=sp run_mode=sp
CUDA_VISIBLE_DEVICES=0 benchmark/run_benchmark.sh ${run_mode} ${bs_item} ${fp_item} ${mode} ${max_iter} ${model_mode} ${config} ${log_interval} # (5min) CUDA_VISIBLE_DEVICES=0 benchmark/run_benchmark.sh ${run_mode} ${bs_item} ${fp_item} ${mode} ${max_iter} ${model_mode} ${config} ${log_interval} ${profile} # (5min)
sleep 60 sleep 60
echo "index is speed, 8gpus, run_mode is multi_process, begin, ${model_name}" echo "index is speed, 8gpus, run_mode is multi_process, begin, ${model_name}"
run_mode=mp run_mode=mp
basicvsr_name=basicvsr basicvsr_name=basicvsr
if [ ${model_mode} = ${basicvsr_name} ]; then if [ ${model_mode} = ${basicvsr_name} ]; then
CUDA_VISIBLE_DEVICES=0,1,2,3 bash benchmark/run_benchmark.sh ${run_mode} ${bs_item} ${fp_item} ${mode} ${max_iter} ${model_mode} ${config} ${log_interval} CUDA_VISIBLE_DEVICES=0,1,2,3 bash benchmark/run_benchmark.sh ${run_mode} ${bs_item} ${fp_item} ${mode} ${max_iter} ${model_mode} ${config} ${log_interval} ${profile}
else else
CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 bash benchmark/run_benchmark.sh ${run_mode} ${bs_item} ${fp_item} ${mode} ${max_iter} ${model_mode} ${config} ${log_interval} CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 bash benchmark/run_benchmark.sh ${run_mode} ${bs_item} ${fp_item} ${mode} ${max_iter} ${model_mode} ${config} ${log_interval} ${profile}
fi fi
sleep 60 sleep 60
done done
......
...@@ -12,6 +12,7 @@ function _set_params(){ ...@@ -12,6 +12,7 @@ function _set_params(){
config=${7:-"config"} config=${7:-"config"}
log_interval=${8:-"1"} log_interval=${8:-"1"}
run_log_path=${TRAIN_LOG_DIR:-$(pwd)} # TRAIN_LOG_DIR 后续QA设置该参数 run_log_path=${TRAIN_LOG_DIR:-$(pwd)} # TRAIN_LOG_DIR 后续QA设置该参数
need_profile=${9:-"off"}
# 以下不用修改 # 以下不用修改
device=${CUDA_VISIBLE_DEVICES//,/ } device=${CUDA_VISIBLE_DEVICES//,/ }
...@@ -19,6 +20,7 @@ function _set_params(){ ...@@ -19,6 +20,7 @@ function _set_params(){
num_gpu_devices=${#arr[*]} num_gpu_devices=${#arr[*]}
log_file=${run_log_path}/${model_name}_${run_mode}_bs${batch_size}_${fp_item}_${num_gpu_devices} log_file=${run_log_path}/${model_name}_${run_mode}_bs${batch_size}_${fp_item}_${num_gpu_devices}
res_log_file=${run_log_path}/${model_name}_${run_mode}_bs${batch_size}_${fp_item}_${num_gpu_devices}_speed res_log_file=${run_log_path}/${model_name}_${run_mode}_bs${batch_size}_${fp_item}_${num_gpu_devices}_speed
log_profile=${run_log_path}/${model_name}_model.profile
} }
function _analysis_log(){ function _analysis_log(){
...@@ -29,7 +31,14 @@ function _train(){ ...@@ -29,7 +31,14 @@ function _train(){
echo "Train on ${num_gpu_devices} GPUs" echo "Train on ${num_gpu_devices} GPUs"
echo "current CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES, gpus=$num_gpu_devices, batch_size=$batch_size" echo "current CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES, gpus=$num_gpu_devices, batch_size=$batch_size"
train_cmd="--config-file=${config} profiler_cmd=""
profiler_options="batch_range=[10,20];profile_path=${log_profile}"
if [ $need_profile = "on" ]; then
profiler_cmd="--profiler_options=${profiler_options}"
fi
train_cmd="${profiler_cmd}
--config-file=${config}
-o dataset.train.batch_size=${batch_size} -o dataset.train.batch_size=${batch_size}
log_config.interval=${log_interval} log_config.interval=${log_interval}
${mode}=${max_iter} " ${mode}=${max_iter} "
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册