[benchmark] add detection train benchmark scripts (#4250)

bceb6e4e · shangliang Xu · GitHub · 16e3d740 · bceb6e4e · bceb6e4e
7 changed file
--- a/benchmark/README.md
+++ b/benchmark/README.md
+# 通用检测benchmark测试脚本说明
+```
+├── benchmark
+│   ├── prepare.sh
+│   ├── README.md
+│   ├── run_all.sh
+│   ├── run_benchmark.sh
+```
+## 脚本说明
+### prepare.sh
+相关数据准备脚本，完成数据、模型的自动下载
+### run_all.sh
+主要运行脚本，可完成所有相关模型的测试方案
+### run_benchmark.sh
+单模型运行脚本，可完成指定模型的测试方案
+## Docker 运行环境
+* docker image: paddlepaddle/paddle:latest-gpu-cuda10.1-cudnn7
+* paddle = 2.1.2
+* python = 3.7
+## 运行benchmark测试
+### 运行所有模型
+```
+git clone https://github.com/PaddlePaddle/PaddleDetection.git
+cd PaddleDetection
+bash benchmark/run_all.sh
+```
+### 运行指定模型
+* Usage：bash run_benchmark.sh ${run_mode} ${batch_size} ${fp_item} ${max_epoch} ${model_name}
+* model_name: faster_rcnn, fcos, deformable_detr, gfl
+```
+git clone https://github.com/PaddlePaddle/PaddleDetection.git
+cd PaddleDetection
+bash benchmark/prepare.sh
+# 单卡
+CUDA_VISIBLE_DEVICES=0 bash benchmark/run_benchmark.sh sp 2 fp32 1 faster_rcnn
+# 多卡
+CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 bash run_benchmark.sh mp 2 fp32 1 faster_rcnn
+```
--- a/benchmark/prepare.sh
+++ b/benchmark/prepare.sh
+#!/usr/bin/env bash
+pip3.7 install -U pip Cython
+pip3.7 install -r requirements.txt
+mv ./dataset/coco/download_coco.py . && rm -rf ./dataset/coco/* && mv ./download_coco.py ./dataset/coco/
+# prepare lite train data
+wget -nc -P ./dataset/coco/ https://paddledet.bj.bcebos.com/data/coco_benchmark.tar
+cd ./dataset/coco/ && tar -xvf coco_benchmark.tar && mv -u coco_benchmark/* .
+rm -rf coco_benchmark/
--- a/benchmark/run_all.sh
+++ b/benchmark/run_all.sh
+# Use docker: paddlepaddle/paddle:latest-gpu-cuda10.1-cudnn7  paddle=2.1.2  python3.7
+#
+# Usage:
+#   git clone https://github.com/PaddlePaddle/PaddleDetection.git
+#   cd PaddleDetection
+#   bash benchmark/run_all.sh
+# run prepare.sh
+bash benchmark/prepare.sh
+model_name_list=(faster_rcnn fcos deformable_detr gfl)
+fp_item_list=(fp32)
+max_epoch=1
+for model_name in ${model_name_list[@]}; do
+      for fp_item in ${fp_item_list[@]}; do
+          case ${model_name} in
+              faster_rcnn) bs_list=(1 8) ;;
+              fcos) bs_list=(2 8) ;;
+              deformable_detr) bs_list=(2) ;;
+              gfl) bs_list=(2 8) ;;
+              *) echo "wrong model_name"; exit 1;
+          esac
+          for bs_item in ${bs_list[@]}
+            do
+            echo "index is speed, 1gpus, begin, ${model_name}"
+            run_mode=sp
+            CUDA_VISIBLE_DEVICES=0 bash benchmark/run_benchmark.sh ${run_mode} ${bs_item} \
+             ${fp_item} ${max_epoch} ${model_name}     #  (5min)
+            sleep 60
+            echo "index is speed, 8gpus, run_mode is multi_process, begin, ${model_name}"
+            run_mode=mp
+            CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 bash benchmark/run_benchmark.sh ${run_mode} \
+             ${bs_item} ${fp_item} ${max_epoch} ${model_name}
+            sleep 60
+            done
+      done
+done
--- a/benchmark/run_benchmark.sh
+++ b/benchmark/run_benchmark.sh
+#!/usr/bin/env bash
+set -xe
+# Usage：CUDA_VISIBLE_DEVICES=0 bash run_benchmark.sh ${run_mode} ${batch_size} ${fp_item} ${max_epoch} ${model_name}
+python="python3.7"
+# Parameter description
+function _set_params(){
+    run_mode=${1:-"sp"}            # sp|mp
+    batch_size=${2:-"2"}           #
+    fp_item=${3:-"fp32"}           # fp32|fp16
+    max_epoch=${4:-"1"}            #
+    model_name=${5:-"model_name"}
+    run_log_path=${TRAIN_LOG_DIR:-$(pwd)}  # TRAIN_LOG_DIR
+    device=${CUDA_VISIBLE_DEVICES//,/ }
+    arr=(${device})
+    num_gpu_devices=${#arr[*]}
+    log_file=${run_log_path}/${model_name}_${run_mode}_bs${batch_size}_${fp_item}_${num_gpu_devices}
+}
+function _train(){
+    echo "Train on ${num_gpu_devices} GPUs"
+    echo "current CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES, gpus=$num_gpu_devices, batch_size=$batch_size"
+    # parse model_name
+    case ${model_name} in
+        faster_rcnn) model_yml="configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.yml" ;;
+        fcos) model_yml="configs/fcos/fcos_r50_fpn_1x_coco.yml" ;;
+        deformable_detr) model_yml="configs/deformable_detr/deformable_detr_r50_1x_coco.yml" ;;
+        gfl) model_yml="configs/gfl/gfl_r50_fpn_1x_coco.yml" ;;
+        *) echo "Undefined model_name"; exit 1;
+    esac
+    set_batch_size="TrainReader.batch_size=${batch_size}"
+    set_max_epoch="epoch=${max_epoch}"
+    set_log_iter="log_iter=10"
+    if [ ${fp_item} = "fp16" ]; then
+        set_fp_item="--fp16"
+    else
+        set_fp_item=" "
+    fi
+    case ${run_mode} in
+        sp) train_cmd="${python} -u tools/train.py -c ${model_yml} ${set_fp_item} \
+            -o ${set_batch_size} ${set_max_epoch} ${set_log_iter}" ;;
+        mp) train_cmd="${python} -m paddle.distributed.launch --log_dir=./mylog \
+            --gpus=${CUDA_VISIBLE_DEVICES} tools/train.py -c ${model_yml} ${set_fp_item} \
+            -o ${set_batch_size} ${set_max_epoch} ${set_log_iter}"
+            log_parse_file="mylog/workerlog.0" ;;
+        *) echo "choose run_mode(sp or mp)"; exit 1;
+    esac
+#
+    timeout 15m ${train_cmd} > ${log_file} 2>&1
+    if [ $? -ne 0 ];then
+        echo -e "${train_cmd}, FAIL"
+        export job_fail_flag=1
+    else
+        echo -e "${train_cmd}, SUCCESS"
+        export job_fail_flag=0
+    fi
+    kill -9 `ps -ef|grep 'python'|awk '{print $2}'`
+    if [ $run_mode = "mp" -a -d mylog ]; then
+        rm ${log_file}
+        cp mylog/workerlog.0 ${log_file}
+    fi
+}
+_set_params $@
+_train
--- a/ppdet/engine/trainer.py
+++ b/ppdet/engine/trainer.py
@@ -38,6 +38,7 @@ from ppdet.metrics import Metric, COCOMetric, VOCMetric, WiderFaceMetric, get_in
 from ppdet.metrics import RBoxMetric, JDEDetMetric
 from ppdet.data.source.category import get_categories
 import ppdet.utils.stats as stats
+from ppdet.utils import profiler
 from .callbacks import Callback, ComposeCallback, LogPrinter, Checkpointer, WiferFaceEval, VisualDLWriter
 from .export_utils import _dump_infer_config
@@ -340,6 +341,7 @@ class Trainer(object):
        if self.cfg.get('print_flops', False):
            self._flops(self.loader)
+        profiler_options = self.cfg.get('profiler_options', None)
        for epoch_id in range(self.start_epoch, self.cfg.epoch):
            self.status['mode'] = 'train'
@@ -351,6 +353,7 @@ class Trainer(object):
            for step_id, data in enumerate(self.loader):
                self.status['data_time'].update(time.time() - iter_tic)
                self.status['step_id'] = step_id
+                profiler.add_profiler_step(profiler_options)
                self._compose_callback.on_step_begin(self.status)
                data['epoch_id'] = epoch_id

--- a/ppdet/utils/profiler.py
+++ b/ppdet/utils/profiler.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import sys
+import paddle
+# A global variable to record the number of calling times for profiler
+# functions. It is used to specify the tracing range of training steps.
+_profiler_step_id = 0
+# A global variable to avoid parsing from string every time.
+_profiler_options = None
+class ProfilerOptions(object):
+    '''
+    Use a string to initialize a ProfilerOptions.
+    The string should be in the format: "key1=value1;key2=value;key3=value3".
+    For example:
+      "profile_path=model.profile"
+      "batch_range=[50, 60]; profile_path=model.profile"
+      "batch_range=[50, 60]; tracer_option=OpDetail; profile_path=model.profile"
+    ProfilerOptions supports following key-value pair:
+      batch_range      - a integer list, e.g. [100, 110].
+      state            - a string, the optional values are 'CPU', 'GPU' or 'All'.
+      sorted_key       - a string, the optional values are 'calls', 'total',
+                         'max', 'min' or 'ave.
+      tracer_option    - a string, the optional values are 'Default', 'OpDetail',
+                         'AllOpDetail'.
+      profile_path     - a string, the path to save the serialized profile data,
+                         which can be used to generate a timeline.
+      exit_on_finished - a boolean.
+    '''
+    def __init__(self, options_str):
+        assert isinstance(options_str, str)
+        self._options = {
+            'batch_range': [10, 20],
+            'state': 'All',
+            'sorted_key': 'total',
+            'tracer_option': 'Default',
+            'profile_path': '/tmp/profile',
+            'exit_on_finished': True
+        }
+        self._parse_from_string(options_str)
+    def _parse_from_string(self, options_str):
+        for kv in options_str.replace(' ', '').split(';'):
+            key, value = kv.split('=')
+            if key == 'batch_range':
+                value_list = value.replace('[', '').replace(']', '').split(',')
+                value_list = list(map(int, value_list))
+                if len(value_list) >= 2 and value_list[0] >= 0 and value_list[
+                        1] > value_list[0]:
+                    self._options[key] = value_list
+            elif key == 'exit_on_finished':
+                self._options[key] = value.lower() in ("yes", "true", "t", "1")
+            elif key in [
+                    'state', 'sorted_key', 'tracer_option', 'profile_path'
+            ]:
+                self._options[key] = value
+    def __getitem__(self, name):
+        if self._options.get(name, None) is None:
+            raise ValueError(
+                "ProfilerOptions does not have an option named %s." % name)
+        return self._options[name]
+def add_profiler_step(options_str=None):
+    '''
+    Enable the operator-level timing using PaddlePaddle's profiler.
+    The profiler uses a independent variable to count the profiler steps.
+    One call of this function is treated as a profiler step.
+    Args:
+      profiler_options - a string to initialize the ProfilerOptions.
+                         Default is None, and the profiler is disabled.
+    '''
+    if options_str is None:
+        return
+    global _profiler_step_id
+    global _profiler_options
+    if _profiler_options is None:
+        _profiler_options = ProfilerOptions(options_str)
+    if _profiler_step_id == _profiler_options['batch_range'][0]:
+        paddle.utils.profiler.start_profiler(_profiler_options['state'],
+                                             _profiler_options['tracer_option'])
+    elif _profiler_step_id == _profiler_options['batch_range'][1]:
+        paddle.utils.profiler.stop_profiler(_profiler_options['sorted_key'],
+                                            _profiler_options['profile_path'])
+        if _profiler_options['exit_on_finished']:
+            sys.exit(0)
+    _profiler_step_id += 1
--- a/tools/train.py
+++ b/tools/train.py
@@ -81,6 +81,13 @@ def parse_args():
        action='store_true',
        default=False,
        help='Whether to save the evaluation results only')
+    parser.add_argument(
+        '--profiler_options',
+        type=str,
+        default=None,
+        help="The option of profiler, which should be in "
+        "format \"key1=value1;key2=value2;key3=value3\"."
+        "please see ppdet/utils/profiler.py for detail.")
    args = parser.parse_args()
    return args
@@ -117,6 +124,7 @@ def main():
    cfg['use_vdl'] = FLAGS.use_vdl
    cfg['vdl_log_dir'] = FLAGS.vdl_log_dir
    cfg['save_prediction_only'] = FLAGS.save_prediction_only
+    cfg['profiler_options'] = FLAGS.profiler_options
    merge_config(FLAGS.opt)
    place = paddle.set_device('gpu' if cfg.use_gpu else 'cpu')