From bceb6e4ef3139e04558fac8f16f6bbfc6e6d3af0 Mon Sep 17 00:00:00 2001 From: shangliang Xu Date: Tue, 12 Oct 2021 10:50:42 +0800 Subject: [PATCH] [benchmark] add detection train benchmark scripts (#4250) --- benchmark/README.md | 46 +++++++++++++++ benchmark/prepare.sh | 9 +++ benchmark/run_all.sh | 39 +++++++++++++ benchmark/run_benchmark.sh | 68 +++++++++++++++++++++++ ppdet/engine/trainer.py | 3 + ppdet/utils/profiler.py | 111 +++++++++++++++++++++++++++++++++++++ tools/train.py | 8 +++ 7 files changed, 284 insertions(+) create mode 100644 benchmark/README.md create mode 100644 benchmark/prepare.sh create mode 100644 benchmark/run_all.sh create mode 100644 benchmark/run_benchmark.sh create mode 100644 ppdet/utils/profiler.py diff --git a/benchmark/README.md b/benchmark/README.md new file mode 100644 index 000000000..a9c99557b --- /dev/null +++ b/benchmark/README.md @@ -0,0 +1,46 @@ +# 通用检测benchmark测试脚本说明 + +``` +├── benchmark +│ ├── prepare.sh +│ ├── README.md +│ ├── run_all.sh +│ ├── run_benchmark.sh +``` + +## 脚本说明 + +### prepare.sh +相关数据准备脚本,完成数据、模型的自动下载 +### run_all.sh +主要运行脚本,可完成所有相关模型的测试方案 +### run_benchmark.sh +单模型运行脚本,可完成指定模型的测试方案 + +## Docker 运行环境 +* docker image: paddlepaddle/paddle:latest-gpu-cuda10.1-cudnn7 +* paddle = 2.1.2 +* python = 3.7 + +## 运行benchmark测试 + +### 运行所有模型 +``` +git clone https://github.com/PaddlePaddle/PaddleDetection.git +cd PaddleDetection +bash benchmark/run_all.sh +``` + +### 运行指定模型 +* Usage:bash run_benchmark.sh ${run_mode} ${batch_size} ${fp_item} ${max_epoch} ${model_name} +* model_name: faster_rcnn, fcos, deformable_detr, gfl +``` +git clone https://github.com/PaddlePaddle/PaddleDetection.git +cd PaddleDetection +bash benchmark/prepare.sh + +# 单卡 +CUDA_VISIBLE_DEVICES=0 bash benchmark/run_benchmark.sh sp 2 fp32 1 faster_rcnn +# 多卡 +CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 bash run_benchmark.sh mp 2 fp32 1 faster_rcnn +``` diff --git a/benchmark/prepare.sh b/benchmark/prepare.sh new file mode 100644 index 000000000..ec088cd1d --- /dev/null +++ b/benchmark/prepare.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash + +pip3.7 install -U pip Cython +pip3.7 install -r requirements.txt +mv ./dataset/coco/download_coco.py . && rm -rf ./dataset/coco/* && mv ./download_coco.py ./dataset/coco/ +# prepare lite train data +wget -nc -P ./dataset/coco/ https://paddledet.bj.bcebos.com/data/coco_benchmark.tar +cd ./dataset/coco/ && tar -xvf coco_benchmark.tar && mv -u coco_benchmark/* . +rm -rf coco_benchmark/ diff --git a/benchmark/run_all.sh b/benchmark/run_all.sh new file mode 100644 index 000000000..50370afb0 --- /dev/null +++ b/benchmark/run_all.sh @@ -0,0 +1,39 @@ +# Use docker: paddlepaddle/paddle:latest-gpu-cuda10.1-cudnn7 paddle=2.1.2 python3.7 +# +# Usage: +# git clone https://github.com/PaddlePaddle/PaddleDetection.git +# cd PaddleDetection +# bash benchmark/run_all.sh + +# run prepare.sh +bash benchmark/prepare.sh + +model_name_list=(faster_rcnn fcos deformable_detr gfl) +fp_item_list=(fp32) +max_epoch=1 + +for model_name in ${model_name_list[@]}; do + for fp_item in ${fp_item_list[@]}; do + case ${model_name} in + faster_rcnn) bs_list=(1 8) ;; + fcos) bs_list=(2 8) ;; + deformable_detr) bs_list=(2) ;; + gfl) bs_list=(2 8) ;; + *) echo "wrong model_name"; exit 1; + esac + for bs_item in ${bs_list[@]} + do + echo "index is speed, 1gpus, begin, ${model_name}" + run_mode=sp + CUDA_VISIBLE_DEVICES=0 bash benchmark/run_benchmark.sh ${run_mode} ${bs_item} \ + ${fp_item} ${max_epoch} ${model_name} # (5min) + sleep 60 + + echo "index is speed, 8gpus, run_mode is multi_process, begin, ${model_name}" + run_mode=mp + CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 bash benchmark/run_benchmark.sh ${run_mode} \ + ${bs_item} ${fp_item} ${max_epoch} ${model_name} + sleep 60 + done + done +done diff --git a/benchmark/run_benchmark.sh b/benchmark/run_benchmark.sh new file mode 100644 index 000000000..01e01157e --- /dev/null +++ b/benchmark/run_benchmark.sh @@ -0,0 +1,68 @@ +#!/usr/bin/env bash +set -xe +# Usage:CUDA_VISIBLE_DEVICES=0 bash run_benchmark.sh ${run_mode} ${batch_size} ${fp_item} ${max_epoch} ${model_name} +python="python3.7" +# Parameter description +function _set_params(){ + run_mode=${1:-"sp"} # sp|mp + batch_size=${2:-"2"} # + fp_item=${3:-"fp32"} # fp32|fp16 + max_epoch=${4:-"1"} # + model_name=${5:-"model_name"} + run_log_path=${TRAIN_LOG_DIR:-$(pwd)} # TRAIN_LOG_DIR + + device=${CUDA_VISIBLE_DEVICES//,/ } + arr=(${device}) + num_gpu_devices=${#arr[*]} + log_file=${run_log_path}/${model_name}_${run_mode}_bs${batch_size}_${fp_item}_${num_gpu_devices} +} +function _train(){ + echo "Train on ${num_gpu_devices} GPUs" + echo "current CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES, gpus=$num_gpu_devices, batch_size=$batch_size" + + # parse model_name + case ${model_name} in + faster_rcnn) model_yml="configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.yml" ;; + fcos) model_yml="configs/fcos/fcos_r50_fpn_1x_coco.yml" ;; + deformable_detr) model_yml="configs/deformable_detr/deformable_detr_r50_1x_coco.yml" ;; + gfl) model_yml="configs/gfl/gfl_r50_fpn_1x_coco.yml" ;; + *) echo "Undefined model_name"; exit 1; + esac + + set_batch_size="TrainReader.batch_size=${batch_size}" + set_max_epoch="epoch=${max_epoch}" + set_log_iter="log_iter=10" + if [ ${fp_item} = "fp16" ]; then + set_fp_item="--fp16" + else + set_fp_item=" " + fi + + case ${run_mode} in + sp) train_cmd="${python} -u tools/train.py -c ${model_yml} ${set_fp_item} \ + -o ${set_batch_size} ${set_max_epoch} ${set_log_iter}" ;; + mp) train_cmd="${python} -m paddle.distributed.launch --log_dir=./mylog \ + --gpus=${CUDA_VISIBLE_DEVICES} tools/train.py -c ${model_yml} ${set_fp_item} \ + -o ${set_batch_size} ${set_max_epoch} ${set_log_iter}" + log_parse_file="mylog/workerlog.0" ;; + *) echo "choose run_mode(sp or mp)"; exit 1; + esac +# + timeout 15m ${train_cmd} > ${log_file} 2>&1 + if [ $? -ne 0 ];then + echo -e "${train_cmd}, FAIL" + export job_fail_flag=1 + else + echo -e "${train_cmd}, SUCCESS" + export job_fail_flag=0 + fi + kill -9 `ps -ef|grep 'python'|awk '{print $2}'` + + if [ $run_mode = "mp" -a -d mylog ]; then + rm ${log_file} + cp mylog/workerlog.0 ${log_file} + fi +} + +_set_params $@ +_train diff --git a/ppdet/engine/trainer.py b/ppdet/engine/trainer.py index 2d73e845e..f537661b9 100644 --- a/ppdet/engine/trainer.py +++ b/ppdet/engine/trainer.py @@ -38,6 +38,7 @@ from ppdet.metrics import Metric, COCOMetric, VOCMetric, WiderFaceMetric, get_in from ppdet.metrics import RBoxMetric, JDEDetMetric from ppdet.data.source.category import get_categories import ppdet.utils.stats as stats +from ppdet.utils import profiler from .callbacks import Callback, ComposeCallback, LogPrinter, Checkpointer, WiferFaceEval, VisualDLWriter from .export_utils import _dump_infer_config @@ -340,6 +341,7 @@ class Trainer(object): if self.cfg.get('print_flops', False): self._flops(self.loader) + profiler_options = self.cfg.get('profiler_options', None) for epoch_id in range(self.start_epoch, self.cfg.epoch): self.status['mode'] = 'train' @@ -351,6 +353,7 @@ class Trainer(object): for step_id, data in enumerate(self.loader): self.status['data_time'].update(time.time() - iter_tic) self.status['step_id'] = step_id + profiler.add_profiler_step(profiler_options) self._compose_callback.on_step_begin(self.status) data['epoch_id'] = epoch_id diff --git a/ppdet/utils/profiler.py b/ppdet/utils/profiler.py new file mode 100644 index 000000000..cae3773fa --- /dev/null +++ b/ppdet/utils/profiler.py @@ -0,0 +1,111 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import paddle + +# A global variable to record the number of calling times for profiler +# functions. It is used to specify the tracing range of training steps. +_profiler_step_id = 0 + +# A global variable to avoid parsing from string every time. +_profiler_options = None + + +class ProfilerOptions(object): + ''' + Use a string to initialize a ProfilerOptions. + The string should be in the format: "key1=value1;key2=value;key3=value3". + For example: + "profile_path=model.profile" + "batch_range=[50, 60]; profile_path=model.profile" + "batch_range=[50, 60]; tracer_option=OpDetail; profile_path=model.profile" + + ProfilerOptions supports following key-value pair: + batch_range - a integer list, e.g. [100, 110]. + state - a string, the optional values are 'CPU', 'GPU' or 'All'. + sorted_key - a string, the optional values are 'calls', 'total', + 'max', 'min' or 'ave. + tracer_option - a string, the optional values are 'Default', 'OpDetail', + 'AllOpDetail'. + profile_path - a string, the path to save the serialized profile data, + which can be used to generate a timeline. + exit_on_finished - a boolean. + ''' + + def __init__(self, options_str): + assert isinstance(options_str, str) + + self._options = { + 'batch_range': [10, 20], + 'state': 'All', + 'sorted_key': 'total', + 'tracer_option': 'Default', + 'profile_path': '/tmp/profile', + 'exit_on_finished': True + } + self._parse_from_string(options_str) + + def _parse_from_string(self, options_str): + for kv in options_str.replace(' ', '').split(';'): + key, value = kv.split('=') + if key == 'batch_range': + value_list = value.replace('[', '').replace(']', '').split(',') + value_list = list(map(int, value_list)) + if len(value_list) >= 2 and value_list[0] >= 0 and value_list[ + 1] > value_list[0]: + self._options[key] = value_list + elif key == 'exit_on_finished': + self._options[key] = value.lower() in ("yes", "true", "t", "1") + elif key in [ + 'state', 'sorted_key', 'tracer_option', 'profile_path' + ]: + self._options[key] = value + + def __getitem__(self, name): + if self._options.get(name, None) is None: + raise ValueError( + "ProfilerOptions does not have an option named %s." % name) + return self._options[name] + + +def add_profiler_step(options_str=None): + ''' + Enable the operator-level timing using PaddlePaddle's profiler. + The profiler uses a independent variable to count the profiler steps. + One call of this function is treated as a profiler step. + + Args: + profiler_options - a string to initialize the ProfilerOptions. + Default is None, and the profiler is disabled. + ''' + if options_str is None: + return + + global _profiler_step_id + global _profiler_options + + if _profiler_options is None: + _profiler_options = ProfilerOptions(options_str) + + if _profiler_step_id == _profiler_options['batch_range'][0]: + paddle.utils.profiler.start_profiler(_profiler_options['state'], + _profiler_options['tracer_option']) + elif _profiler_step_id == _profiler_options['batch_range'][1]: + paddle.utils.profiler.stop_profiler(_profiler_options['sorted_key'], + _profiler_options['profile_path']) + if _profiler_options['exit_on_finished']: + sys.exit(0) + + _profiler_step_id += 1 diff --git a/tools/train.py b/tools/train.py index 9dff47402..fea17f53f 100755 --- a/tools/train.py +++ b/tools/train.py @@ -81,6 +81,13 @@ def parse_args(): action='store_true', default=False, help='Whether to save the evaluation results only') + parser.add_argument( + '--profiler_options', + type=str, + default=None, + help="The option of profiler, which should be in " + "format \"key1=value1;key2=value2;key3=value3\"." + "please see ppdet/utils/profiler.py for detail.") args = parser.parse_args() return args @@ -117,6 +124,7 @@ def main(): cfg['use_vdl'] = FLAGS.use_vdl cfg['vdl_log_dir'] = FLAGS.vdl_log_dir cfg['save_prediction_only'] = FLAGS.save_prediction_only + cfg['profiler_options'] = FLAGS.profiler_options merge_config(FLAGS.opt) place = paddle.set_device('gpu' if cfg.use_gpu else 'cpu') -- GitLab