From 05c0bf46e95cafe4f1e4523e8541aa34ca55d1f0 Mon Sep 17 00:00:00 2001
From: gmm <38800877+mmglove@users.noreply.github.com>
Date: Wed, 14 Jun 2023 20:32:11 +0800
Subject: [PATCH] fix profile (#794)

---
 ppgan/utils/profiler.py                  | 43 +++++++++++++++++-------
 test_tipc/benchmark_train.sh             | 35 +++++++++++--------
 test_tipc/test_train_inference_python.sh |  6 ++--
 3 files changed, 54 insertions(+), 30 deletions(-)

diff --git a/ppgan/utils/profiler.py b/ppgan/utils/profiler.py
index b735973..629ef4e 100644
--- a/ppgan/utils/profiler.py
+++ b/ppgan/utils/profiler.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 
 import sys
-import paddle
+import paddle.profiler as profiler
 
 # A global variable to record the number of calling times for profiler
 # functions. It is used to specify the tracing range of training steps.
@@ -21,7 +21,7 @@ _profiler_step_id = 0
 
 # A global variable to avoid parsing from string every time.
 _profiler_options = None
-
+_prof = None
 
 class ProfilerOptions(object):
     '''
@@ -31,6 +31,7 @@ class ProfilerOptions(object):
       "profile_path=model.profile"
       "batch_range=[50, 60]; profile_path=model.profile"
       "batch_range=[50, 60]; tracer_option=OpDetail; profile_path=model.profile"
+
     ProfilerOptions supports following key-value pair:
       batch_range      - a integer list, e.g. [100, 110].
       state            - a string, the optional values are 'CPU', 'GPU' or 'All'. 
@@ -52,7 +53,8 @@ class ProfilerOptions(object):
             'sorted_key': 'total',
             'tracer_option': 'Default',
             'profile_path': '/tmp/profile',
-            'exit_on_finished': True
+            'exit_on_finished': True,
+            'timer_only': True
         }
         self._parse_from_string(options_str)
 
@@ -71,6 +73,8 @@ class ProfilerOptions(object):
                     'state', 'sorted_key', 'tracer_option', 'profile_path'
             ]:
                 self._options[key] = value
+            elif key == 'timer_only':
+                self._options[key] = value
 
     def __getitem__(self, name):
         if self._options.get(name, None) is None:
@@ -84,28 +88,41 @@ def add_profiler_step(options_str=None):
     Enable the operator-level timing using PaddlePaddle's profiler.
     The profiler uses a independent variable to count the profiler steps.
     One call of this function is treated as a profiler step.
-    
     Args:
-      options_str - a string to initialize the ProfilerOptions.
+      profiler_options - a string to initialize the ProfilerOptions.
                          Default is None, and the profiler is disabled.
     '''
     if options_str is None:
         return
 
+    global _prof 
     global _profiler_step_id
     global _profiler_options
 
     if _profiler_options is None:
         _profiler_options = ProfilerOptions(options_str)
-
-    if _profiler_step_id == _profiler_options['batch_range'][0]:
-        paddle.utils.profiler.start_profiler(
-            _profiler_options['state'], _profiler_options['tracer_option'])
-    elif _profiler_step_id == _profiler_options['batch_range'][1]:
-        paddle.utils.profiler.stop_profiler(_profiler_options['sorted_key'],
-                                            _profiler_options['profile_path'])
+    # profile : https://www.paddlepaddle.org.cn/documentation/docs/zh/guides/performance_improving/profiling_model.html#chakanxingnengshujudetongjibiaodan
+    # timer_only = True  only the model's throughput and time overhead are displayed
+    # timer_only = False calling summary can print a statistical form that presents performance data from different perspectives.
+    # timer_only = False the output Timeline information can be found in the profiler_log directory
+    if _prof is None:
+        _timer_only = str(_profiler_options['timer_only']) == str(True)
+        _prof = profiler.Profiler(
+                   scheduler = (_profiler_options['batch_range'][0], _profiler_options['batch_range'][1]),
+                   on_trace_ready = profiler.export_chrome_tracing('./profiler_log'),
+                   timer_only = _timer_only)
+        _prof.start()
+    else:
+        _prof.step()
+        
+    if _profiler_step_id == _profiler_options['batch_range'][1]:
+        _prof.stop()
+        _prof.summary(
+             op_detail=True,
+             thread_sep=False,
+             time_unit='ms')
+        _prof = None
         if _profiler_options['exit_on_finished']:
             sys.exit(0)
 
     _profiler_step_id += 1
-
diff --git a/test_tipc/benchmark_train.sh b/test_tipc/benchmark_train.sh
index 76e1413..fd25613 100644
--- a/test_tipc/benchmark_train.sh
+++ b/test_tipc/benchmark_train.sh
@@ -113,7 +113,8 @@ repo_name=$(get_repo_name )
 SAVE_LOG=${BENCHMARK_LOG_DIR:-$(pwd)}   # */benchmark_log
 mkdir -p "${SAVE_LOG}/benchmark_log/"
 status_log="${SAVE_LOG}/benchmark_log/results.log"
-
+# get benchmark profiling params : PROFILING_TIMER_ONLY=no|True|False
+PROFILING_TIMER_ONLY=${PROFILING_TIMER_ONLY:-"True"}
 # The number of lines in which train params can be replaced.
 line_python=3
 line_gpuid=4
@@ -175,19 +176,25 @@ for batch_size in ${batch_size_list[*]}; do
             gpu_id=$(set_gpu_id $device_num)
 
             if [ ${#gpu_id} -le 1 ];then
-                log_path="$SAVE_LOG/profiling_log"
-                mkdir -p $log_path
-                log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_mode}_${device_num}_${to_static}profiling"
-                func_sed_params "$FILENAME" "${line_gpuid}" "0"  # sed used gpu_id
-                # set profile_option params
-                tmp=`sed -i "${line_profile}s/.*/${profile_option}/" "${FILENAME}"`
-
-                # run test_train_inference_python.sh
-                cmd="bash test_tipc/test_train_inference_python.sh ${FILENAME} benchmark_train > ${log_path}/${log_name} 2>&1 "
-                echo $cmd
-                eval $cmd
-                eval "cat ${log_path}/${log_name}"
-
+                func_sed_params "$FILENAME" "${line_gpuid}" "0"  # sed used gpu_id 
+                if [[ ${PROFILING_TIMER_ONLY} != "no" ]];then
+                    echo "run profile"
+                    # The default value of profile_option's timer_only parameter is True
+                    if [[ ${PROFILING_TIMER_ONLY} = "False" ]];then
+                        profile_option="${profile_option};timer_only=False"
+                    fi
+                    log_path="$SAVE_LOG/profiling_log"
+                    mkdir -p $log_path
+                    log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_mode}_${device_num}_${to_static}profiling"
+                    # set profile_option params
+                    tmp=`sed -i "${line_profile}s/.*/\"${profile_option}\"/" "${FILENAME}"`
+                    # run test_train_inference_python.sh
+                    cmd="timeout 5m bash test_tipc/test_train_inference_python.sh ${FILENAME} benchmark_train > ${log_path}/${log_name} 2>&1 "
+                    echo $cmd
+                    eval ${cmd}
+                    eval "cat ${log_path}/${log_name}"
+                fi
+                echo "run without profile"               
                 # without profile
                 log_path="$SAVE_LOG/train_log"
                 speed_log_path="$SAVE_LOG/index"
diff --git a/test_tipc/test_train_inference_python.sh b/test_tipc/test_train_inference_python.sh
index c5cdce8..40b9eb0 100644
--- a/test_tipc/test_train_inference_python.sh
+++ b/test_tipc/test_train_inference_python.sh
@@ -254,11 +254,11 @@ else
                 fi
                 set_save_model=$(func_set_params "${save_model_key}" "${save_log}")
                 if [ ${#gpu} -le 2 ];then  # train with cpu or single gpu
-                    cmd="${python} ${run_train} ${set_use_gpu}  ${set_save_model} ${set_train_params1} ${set_epoch} ${set_pretrain} ${set_batchsize} ${set_amp_config} ${set_amp_level}"
+                    cmd="${python} ${run_train} ${set_use_gpu}  ${set_save_model} ${set_epoch} ${set_pretrain} ${set_batchsize} ${set_amp_config} ${set_amp_level} ${set_train_params1} "
                 elif [ ${#ips} -le 26 ];then  # train with multi-gpu
-                    cmd="${python} -m paddle.distributed.launch --gpus=${gpu} ${run_train} ${set_use_gpu} ${set_save_model} ${set_train_params1} ${set_epoch} ${set_pretrain} ${set_batchsize} ${set_amp_config} ${set_amp_level}"
+                    cmd="${python} -m paddle.distributed.launch --gpus=${gpu} ${run_train} ${set_use_gpu} ${set_save_model} ${set_epoch} ${set_pretrain} ${set_batchsize} ${set_amp_config} ${set_amp_level} ${set_train_params1}"
                 else     # train with multi-machine
-                    cmd="${python} -m paddle.distributed.launch --ips=${ips} --gpus=${gpu} ${run_train} ${set_use_gpu} ${set_save_model} ${set_train_params1} ${set_pretrain} ${set_epoch} ${set_batchsize} ${set_amp_config} ${set_amp_level}"
+                    cmd="${python} -m paddle.distributed.launch --ips=${ips} --gpus=${gpu} ${run_train} ${set_use_gpu} ${set_save_model} ${set_pretrain} ${set_epoch} ${set_batchsize} ${set_amp_config} ${set_amp_level} ${set_train_params1}"
                 fi
                 # run train
                 export FLAGS_cudnn_deterministic=True
-- 
GitLab