未验证 提交 9f7a79d1 编写于 作者: G gmm 提交者: GitHub

update benchmark case (#4599)

* update benchmark case

* update benchmark case

* update benchmark case,test=document_fix
上级 f4f5857d
from __future__ import print_function
import argparse
import json
import os
import sys
def parse_args():
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
"--filename", type=str, help="The name of log which need to analysis.")
parser.add_argument(
"--jsonname", type=str, help="The name of dumped json where to output.")
parser.add_argument(
"--keyword",
type=str,
default="ips:",
help="Keyword to specify analysis data")
parser.add_argument(
'--model_name',
type=str,
default="faster_rcnn",
help='training model_name, transformer_base')
parser.add_argument(
'--mission_name',
type=str,
default="目标检测",
help='training mission name')
parser.add_argument(
'--direction_id', type=int, default=0, help='training direction_id')
parser.add_argument(
'--run_mode',
type=str,
default="sp",
help='multi process or single process')
parser.add_argument(
'--index',
type=int,
default=1,
help='{1: speed, 2:mem, 3:profiler, 6:max_batch_size}')
parser.add_argument(
'--gpu_num', type=int, default=1, help='nums of training gpus')
parser.add_argument(
'--batch_size',
type=int,
default=1,
help='batch size of training samples')
args = parser.parse_args()
return args
def parse_text_from_file(file_path: str):
with open(file_path, "r") as f:
lines = f.read().splitlines()
return lines
def parse_avg_from_text(text: list, keyword: str, skip_line=4):
count_list = []
for i, line in enumerate(text):
if keyword in line:
words = line.split(" ")
for j, word in enumerate(words):
if word == keyword:
count_list.append(float(words[j + 1]))
break
count_list = count_list[skip_line:]
if count_list:
return sum(count_list) / len(count_list)
else:
return 0.0
if __name__ == '__main__':
args = parse_args()
run_info = dict()
run_info["log_file"] = args.filename
res_log_file = args.jsonname
run_info["model_name"] = args.model_name
run_info["mission_name"] = args.mission_name
run_info["direction_id"] = args.direction_id
run_info["run_mode"] = args.run_mode
run_info["index"] = args.index
run_info["gpu_num"] = args.gpu_num
run_info["FINAL_RESULT"] = 0
run_info["JOB_FAIL_FLAG"] = 0
text = parse_text_from_file(args.filename)
avg_ips = parse_avg_from_text(text, args.keyword)
run_info["FINAL_RESULT"] = avg_ips * args.gpu_num
if avg_ips == 0.0:
run_info["JOB_FAIL_FLAG"] = 1
print("Failed at get info from training's output log, please check.")
sys.exit()
json_info = json.dumps(run_info)
with open(res_log_file, "w") as of:
of.write(json_info)
#!/usr/bin/env bash
pip3.7 install -U pip Cython
pip3.7 install -r requirements.txt
pip install -U pip Cython
pip install -r requirements.txt
mv ./dataset/coco/download_coco.py . && rm -rf ./dataset/coco/* && mv ./download_coco.py ./dataset/coco/
# prepare lite train data
......@@ -9,6 +9,7 @@ wget -nc -P ./dataset/coco/ https://paddledet.bj.bcebos.com/data/coco_benchmark.
cd ./dataset/coco/ && tar -xvf coco_benchmark.tar && mv -u coco_benchmark/* .
rm -rf coco_benchmark/
cd ../../
rm -rf ./dataset/mot/*
# prepare mot mini train data
wget -nc -P ./dataset/mot/ https://paddledet.bj.bcebos.com/data/mot_benchmark.tar
......
......@@ -4,17 +4,18 @@
# git clone https://github.com/PaddlePaddle/PaddleDetection.git
# cd PaddleDetection
# bash benchmark/run_all.sh
log_path=${LOG_PATH_INDEX_DIR:-$(pwd)} # benchmark系统指定该参数,不需要跑profile时,log_path指向存speed的目录
# run prepare.sh
bash benchmark/prepare.sh
model_name_list=(faster_rcnn fcos deformable_detr gfl hrnet higherhrnet solov2 jde fairmot)
fp_item_list=(fp32)
max_epoch=1
max_epoch=2
for model_name in ${model_name_list[@]}; do
for model_item in ${model_name_list[@]}; do
for fp_item in ${fp_item_list[@]}; do
case ${model_name} in
case ${model_item} in
faster_rcnn) bs_list=(1 8) ;;
fcos) bs_list=(2 8) ;;
deformable_detr) bs_list=(2) ;;
......@@ -28,16 +29,18 @@ for model_name in ${model_name_list[@]}; do
esac
for bs_item in ${bs_list[@]}
do
echo "index is speed, 1gpus, begin, ${model_name}"
run_mode=sp
log_name=detection_${model_item}_bs${bs_item}_${fp_item} # 如:clas_MobileNetv1_mp_bs32_fp32_8
echo "index is speed, 1gpus, begin, ${log_name}"
CUDA_VISIBLE_DEVICES=0 bash benchmark/run_benchmark.sh ${run_mode} ${bs_item} \
${fp_item} ${max_epoch} ${model_name}
${fp_item} ${max_epoch} ${model_item} | tee ${log_path}/${log_name}_speed_8gpus8p 2>&1
sleep 60
echo "index is speed, 8gpus, run_mode is multi_process, begin, ${model_name}"
run_mode=mp
log_name=detection_${model_item}_bs${bs_item}_${fp_item} # 如:clas_MobileNetv1_mp_bs32_fp32_8
echo "index is speed, 8gpus, run_mode is multi_process, begin, ${log_name}"
CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 bash benchmark/run_benchmark.sh ${run_mode} \
${bs_item} ${fp_item} ${max_epoch} ${model_name}
${bs_item} ${fp_item} ${max_epoch} ${model_item}| tee ${log_path}/${log_name}_speed_8gpus8p 2>&1
sleep 60
done
done
......
......@@ -8,13 +8,22 @@ function _set_params(){
batch_size=${2:-"2"}
fp_item=${3:-"fp32"} # fp32|fp16
max_epoch=${4:-"1"}
model_name=${5:-"model_name"}
model_item=${5:-"model_item"}
run_log_path=${TRAIN_LOG_DIR:-$(pwd)}
# 添加日志解析需要的参数
base_batch_size=${batch_size}
mission_name="目标检测"
direction_id="0"
ips_unit="images/s"
skip_steps=10 # 解析日志,有些模型前几个step耗时长,需要跳过 (必填)
keyword="ips:" # 解析日志,筛选出数据所在行的关键字 (必填)
index="1"
model_name=${model_item}_bs${batch_size}_${fp_item}
device=${CUDA_VISIBLE_DEVICES//,/ }
arr=(${device})
num_gpu_devices=${#arr[*]}
log_file=${run_log_path}/${model_name}_${run_mode}_bs${batch_size}_${fp_item}_${num_gpu_devices}
log_file=${run_log_path}/${model_item}_${run_mode}_bs${batch_size}_${fp_item}_${num_gpu_devices}
}
function _train(){
echo "Train on ${num_gpu_devices} GPUs"
......@@ -23,8 +32,8 @@ function _train(){
# set runtime params
set_optimizer_lr_sp=" "
set_optimizer_lr_mp=" "
# parse model_name
case ${model_name} in
# parse model_item
case ${model_item} in
faster_rcnn) model_yml="benchmark/configs/faster_rcnn_r50_fpn_1x_coco.yml"
set_optimizer_lr_sp="LearningRate.base_lr=0.001" ;;
fcos) model_yml="configs/fcos/fcos_r50_fpn_1x_coco.yml"
......@@ -37,7 +46,7 @@ function _train(){
solov2) model_yml="configs/solov2/solov2_r50_fpn_1x_coco.yml" ;;
jde) model_yml="configs/mot/jde/jde_darknet53_30e_1088x608.yml" ;;
fairmot) model_yml="configs/mot/fairmot/fairmot_dla34_30e_1088x608.yml" ;;
*) echo "Undefined model_name"; exit 1;
*) echo "Undefined model_item"; exit 1;
esac
set_batch_size="TrainReader.batch_size=${batch_size}"
......@@ -52,7 +61,8 @@ function _train(){
case ${run_mode} in
sp) train_cmd="${python} -u tools/train.py -c ${model_yml} ${set_fp_item} \
-o ${set_batch_size} ${set_max_epoch} ${set_log_iter} ${set_optimizer_lr_sp}" ;;
mp) train_cmd="${python} -m paddle.distributed.launch --log_dir=./mylog \
mp) rm -rf mylog
train_cmd="${python} -m paddle.distributed.launch --log_dir=./mylog \
--gpus=${CUDA_VISIBLE_DEVICES} tools/train.py -c ${model_yml} ${set_fp_item} \
-o ${set_batch_size} ${set_max_epoch} ${set_log_iter} ${set_optimizer_lr_mp}"
log_parse_file="mylog/workerlog.0" ;;
......@@ -75,5 +85,8 @@ function _train(){
fi
}
source ${BENCHMARK_ROOT}/scripts/run_model.sh # 在该脚本中会对符合benchmark规范的log使用analysis.py 脚本进行性能数据解析;该脚本在连调时可从benchmark repo中下载https://github.com/PaddlePaddle/benchmark/blob/master/scripts/run_model.sh;如果不联调只想要产出训练log可以注掉本行,提交时需打开
_set_params $@
_train
# _train # 如果只想产出训练log,不解析,可取消注释
_run # 该函数在run_model.sh中,执行时会调用_train; 如果不联调只想要产出训练log可以注掉本行,提交时需打开
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册