From 05c0bf46e95cafe4f1e4523e8541aa34ca55d1f0 Mon Sep 17 00:00:00 2001 From: gmm <38800877+mmglove@users.noreply.github.com> Date: Wed, 14 Jun 2023 20:32:11 +0800 Subject: [PATCH] fix profile (#794) --- ppgan/utils/profiler.py | 43 +++++++++++++++++------- test_tipc/benchmark_train.sh | 35 +++++++++++-------- test_tipc/test_train_inference_python.sh | 6 ++-- 3 files changed, 54 insertions(+), 30 deletions(-) diff --git a/ppgan/utils/profiler.py b/ppgan/utils/profiler.py index b735973..629ef4e 100644 --- a/ppgan/utils/profiler.py +++ b/ppgan/utils/profiler.py @@ -13,7 +13,7 @@ # limitations under the License. import sys -import paddle +import paddle.profiler as profiler # A global variable to record the number of calling times for profiler # functions. It is used to specify the tracing range of training steps. @@ -21,7 +21,7 @@ _profiler_step_id = 0 # A global variable to avoid parsing from string every time. _profiler_options = None - +_prof = None class ProfilerOptions(object): ''' @@ -31,6 +31,7 @@ class ProfilerOptions(object): "profile_path=model.profile" "batch_range=[50, 60]; profile_path=model.profile" "batch_range=[50, 60]; tracer_option=OpDetail; profile_path=model.profile" + ProfilerOptions supports following key-value pair: batch_range - a integer list, e.g. [100, 110]. state - a string, the optional values are 'CPU', 'GPU' or 'All'. @@ -52,7 +53,8 @@ class ProfilerOptions(object): 'sorted_key': 'total', 'tracer_option': 'Default', 'profile_path': '/tmp/profile', - 'exit_on_finished': True + 'exit_on_finished': True, + 'timer_only': True } self._parse_from_string(options_str) @@ -71,6 +73,8 @@ class ProfilerOptions(object): 'state', 'sorted_key', 'tracer_option', 'profile_path' ]: self._options[key] = value + elif key == 'timer_only': + self._options[key] = value def __getitem__(self, name): if self._options.get(name, None) is None: @@ -84,28 +88,41 @@ def add_profiler_step(options_str=None): Enable the operator-level timing using PaddlePaddle's profiler. The profiler uses a independent variable to count the profiler steps. One call of this function is treated as a profiler step. - Args: - options_str - a string to initialize the ProfilerOptions. + profiler_options - a string to initialize the ProfilerOptions. Default is None, and the profiler is disabled. ''' if options_str is None: return + global _prof global _profiler_step_id global _profiler_options if _profiler_options is None: _profiler_options = ProfilerOptions(options_str) - - if _profiler_step_id == _profiler_options['batch_range'][0]: - paddle.utils.profiler.start_profiler( - _profiler_options['state'], _profiler_options['tracer_option']) - elif _profiler_step_id == _profiler_options['batch_range'][1]: - paddle.utils.profiler.stop_profiler(_profiler_options['sorted_key'], - _profiler_options['profile_path']) + # profile : https://www.paddlepaddle.org.cn/documentation/docs/zh/guides/performance_improving/profiling_model.html#chakanxingnengshujudetongjibiaodan + # timer_only = True only the model's throughput and time overhead are displayed + # timer_only = False calling summary can print a statistical form that presents performance data from different perspectives. + # timer_only = False the output Timeline information can be found in the profiler_log directory + if _prof is None: + _timer_only = str(_profiler_options['timer_only']) == str(True) + _prof = profiler.Profiler( + scheduler = (_profiler_options['batch_range'][0], _profiler_options['batch_range'][1]), + on_trace_ready = profiler.export_chrome_tracing('./profiler_log'), + timer_only = _timer_only) + _prof.start() + else: + _prof.step() + + if _profiler_step_id == _profiler_options['batch_range'][1]: + _prof.stop() + _prof.summary( + op_detail=True, + thread_sep=False, + time_unit='ms') + _prof = None if _profiler_options['exit_on_finished']: sys.exit(0) _profiler_step_id += 1 - diff --git a/test_tipc/benchmark_train.sh b/test_tipc/benchmark_train.sh index 76e1413..fd25613 100644 --- a/test_tipc/benchmark_train.sh +++ b/test_tipc/benchmark_train.sh @@ -113,7 +113,8 @@ repo_name=$(get_repo_name ) SAVE_LOG=${BENCHMARK_LOG_DIR:-$(pwd)} # */benchmark_log mkdir -p "${SAVE_LOG}/benchmark_log/" status_log="${SAVE_LOG}/benchmark_log/results.log" - +# get benchmark profiling params : PROFILING_TIMER_ONLY=no|True|False +PROFILING_TIMER_ONLY=${PROFILING_TIMER_ONLY:-"True"} # The number of lines in which train params can be replaced. line_python=3 line_gpuid=4 @@ -175,19 +176,25 @@ for batch_size in ${batch_size_list[*]}; do gpu_id=$(set_gpu_id $device_num) if [ ${#gpu_id} -le 1 ];then - log_path="$SAVE_LOG/profiling_log" - mkdir -p $log_path - log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_mode}_${device_num}_${to_static}profiling" - func_sed_params "$FILENAME" "${line_gpuid}" "0" # sed used gpu_id - # set profile_option params - tmp=`sed -i "${line_profile}s/.*/${profile_option}/" "${FILENAME}"` - - # run test_train_inference_python.sh - cmd="bash test_tipc/test_train_inference_python.sh ${FILENAME} benchmark_train > ${log_path}/${log_name} 2>&1 " - echo $cmd - eval $cmd - eval "cat ${log_path}/${log_name}" - + func_sed_params "$FILENAME" "${line_gpuid}" "0" # sed used gpu_id + if [[ ${PROFILING_TIMER_ONLY} != "no" ]];then + echo "run profile" + # The default value of profile_option's timer_only parameter is True + if [[ ${PROFILING_TIMER_ONLY} = "False" ]];then + profile_option="${profile_option};timer_only=False" + fi + log_path="$SAVE_LOG/profiling_log" + mkdir -p $log_path + log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_mode}_${device_num}_${to_static}profiling" + # set profile_option params + tmp=`sed -i "${line_profile}s/.*/\"${profile_option}\"/" "${FILENAME}"` + # run test_train_inference_python.sh + cmd="timeout 5m bash test_tipc/test_train_inference_python.sh ${FILENAME} benchmark_train > ${log_path}/${log_name} 2>&1 " + echo $cmd + eval ${cmd} + eval "cat ${log_path}/${log_name}" + fi + echo "run without profile" # without profile log_path="$SAVE_LOG/train_log" speed_log_path="$SAVE_LOG/index" diff --git a/test_tipc/test_train_inference_python.sh b/test_tipc/test_train_inference_python.sh index c5cdce8..40b9eb0 100644 --- a/test_tipc/test_train_inference_python.sh +++ b/test_tipc/test_train_inference_python.sh @@ -254,11 +254,11 @@ else fi set_save_model=$(func_set_params "${save_model_key}" "${save_log}") if [ ${#gpu} -le 2 ];then # train with cpu or single gpu - cmd="${python} ${run_train} ${set_use_gpu} ${set_save_model} ${set_train_params1} ${set_epoch} ${set_pretrain} ${set_batchsize} ${set_amp_config} ${set_amp_level}" + cmd="${python} ${run_train} ${set_use_gpu} ${set_save_model} ${set_epoch} ${set_pretrain} ${set_batchsize} ${set_amp_config} ${set_amp_level} ${set_train_params1} " elif [ ${#ips} -le 26 ];then # train with multi-gpu - cmd="${python} -m paddle.distributed.launch --gpus=${gpu} ${run_train} ${set_use_gpu} ${set_save_model} ${set_train_params1} ${set_epoch} ${set_pretrain} ${set_batchsize} ${set_amp_config} ${set_amp_level}" + cmd="${python} -m paddle.distributed.launch --gpus=${gpu} ${run_train} ${set_use_gpu} ${set_save_model} ${set_epoch} ${set_pretrain} ${set_batchsize} ${set_amp_config} ${set_amp_level} ${set_train_params1}" else # train with multi-machine - cmd="${python} -m paddle.distributed.launch --ips=${ips} --gpus=${gpu} ${run_train} ${set_use_gpu} ${set_save_model} ${set_train_params1} ${set_pretrain} ${set_epoch} ${set_batchsize} ${set_amp_config} ${set_amp_level}" + cmd="${python} -m paddle.distributed.launch --ips=${ips} --gpus=${gpu} ${run_train} ${set_use_gpu} ${set_save_model} ${set_pretrain} ${set_epoch} ${set_batchsize} ${set_amp_config} ${set_amp_level} ${set_train_params1}" fi # run train export FLAGS_cudnn_deterministic=True -- GitLab