Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleDetection
提交
4f0aa7ef
P
PaddleDetection
项目概览
PaddlePaddle
/
PaddleDetection
1 年多 前同步成功
通知
697
Star
11112
Fork
2696
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
184
列表
看板
标记
里程碑
合并请求
40
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
184
Issue
184
列表
看板
标记
里程碑
合并请求
40
合并请求
40
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
4f0aa7ef
编写于
10月 21, 2021
作者:
S
shangliang Xu
提交者:
GitHub
10月 21, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[benchmark] fix nan in training (#4345)
上级
8474ac99
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
113 addition
and
6 deletion
+113
-6
benchmark/README.md
benchmark/README.md
+2
-1
benchmark/analysis_log.py
benchmark/analysis_log.py
+100
-0
benchmark/run_benchmark.sh
benchmark/run_benchmark.sh
+11
-5
未找到文件。
benchmark/README.md
浏览文件 @
4f0aa7ef
...
...
@@ -2,6 +2,7 @@
```
├── benchmark
│ ├── analysis_log.py
│ ├── prepare.sh
│ ├── README.md
│ ├── run_all.sh
...
...
@@ -18,7 +19,7 @@
单模型运行脚本,可完成指定模型的测试方案
## Docker 运行环境
*
docker image:
paddlepaddle/paddle:latest-gpu-cuda10.1
-cudnn7
*
docker image:
registry.baidubce.com/paddlepaddle/paddle:2.1.2-gpu-cuda10.2
-cudnn7
*
paddle = 2.1.2
*
python = 3.7
...
...
benchmark/analysis_log.py
0 → 100644
浏览文件 @
4f0aa7ef
from
__future__
import
print_function
import
argparse
import
json
import
os
import
sys
def
parse_args
():
parser
=
argparse
.
ArgumentParser
(
description
=
__doc__
)
parser
.
add_argument
(
"--filename"
,
type
=
str
,
help
=
"The name of log which need to analysis."
)
parser
.
add_argument
(
"--jsonname"
,
type
=
str
,
help
=
"The name of dumped json where to output."
)
parser
.
add_argument
(
"--keyword"
,
type
=
str
,
default
=
"ips:"
,
help
=
"Keyword to specify analysis data"
)
parser
.
add_argument
(
'--model_name'
,
type
=
str
,
default
=
"faster_rcnn"
,
help
=
'training model_name, transformer_base'
)
parser
.
add_argument
(
'--mission_name'
,
type
=
str
,
default
=
"目标检测"
,
help
=
'training mission name'
)
parser
.
add_argument
(
'--direction_id'
,
type
=
int
,
default
=
0
,
help
=
'training direction_id'
)
parser
.
add_argument
(
'--run_mode'
,
type
=
str
,
default
=
"sp"
,
help
=
'multi process or single process'
)
parser
.
add_argument
(
'--index'
,
type
=
int
,
default
=
1
,
help
=
'{1: speed, 2:mem, 3:profiler, 6:max_batch_size}'
)
parser
.
add_argument
(
'--gpu_num'
,
type
=
int
,
default
=
1
,
help
=
'nums of training gpus'
)
parser
.
add_argument
(
'--batch_size'
,
type
=
int
,
default
=
1
,
help
=
'batch size of training samples'
)
args
=
parser
.
parse_args
()
return
args
def
parse_text_from_file
(
file_path
:
str
):
with
open
(
file_path
,
"r"
)
as
f
:
lines
=
f
.
read
().
splitlines
()
return
lines
def
parse_avg_from_text
(
text
:
list
,
keyword
:
str
,
skip_line
=
4
):
count_list
=
[]
for
i
,
line
in
enumerate
(
text
):
if
keyword
in
line
:
words
=
line
.
split
(
" "
)
for
j
,
word
in
enumerate
(
words
):
if
word
==
keyword
:
count_list
.
append
(
float
(
words
[
j
+
1
]))
break
count_list
=
count_list
[
skip_line
:]
if
count_list
:
return
sum
(
count_list
)
/
len
(
count_list
)
else
:
return
0.0
if
__name__
==
'__main__'
:
args
=
parse_args
()
run_info
=
dict
()
run_info
[
"log_file"
]
=
args
.
filename
res_log_file
=
args
.
jsonname
run_info
[
"model_name"
]
=
args
.
model_name
run_info
[
"mission_name"
]
=
args
.
mission_name
run_info
[
"direction_id"
]
=
args
.
direction_id
run_info
[
"run_mode"
]
=
args
.
run_mode
run_info
[
"index"
]
=
args
.
index
run_info
[
"gpu_num"
]
=
args
.
gpu_num
run_info
[
"FINAL_RESULT"
]
=
0
run_info
[
"JOB_FAIL_FLAG"
]
=
0
text
=
parse_text_from_file
(
args
.
filename
)
avg_ips
=
parse_avg_from_text
(
text
,
args
.
keyword
)
run_info
[
"FINAL_RESULT"
]
=
avg_ips
*
args
.
gpu_num
if
avg_ips
==
0.0
:
run_info
[
"JOB_FAIL_FLAG"
]
=
1
print
(
"Failed at get info from training's output log, please check."
)
sys
.
exit
()
json_info
=
json
.
dumps
(
run_info
)
with
open
(
res_log_file
,
"w"
)
as
of
:
of
.
write
(
json_info
)
benchmark/run_benchmark.sh
浏览文件 @
4f0aa7ef
...
...
@@ -20,12 +20,18 @@ function _train(){
echo
"Train on
${
num_gpu_devices
}
GPUs"
echo
"current CUDA_VISIBLE_DEVICES=
$CUDA_VISIBLE_DEVICES
, gpus=
$num_gpu_devices
, batch_size=
$batch_size
"
# set runtime params
set_optimizer_lr_sp
=
" "
set_optimizer_lr_mp
=
" "
# parse model_name
case
${
model_name
}
in
faster_rcnn
)
model_yml
=
"configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.yml"
;;
fcos
)
model_yml
=
"configs/fcos/fcos_r50_fpn_1x_coco.yml"
;;
faster_rcnn
)
model_yml
=
"configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.yml"
set_optimizer_lr_sp
=
"LearningRate.base_lr=0.001"
;;
fcos
)
model_yml
=
"configs/fcos/fcos_r50_fpn_1x_coco.yml"
set_optimizer_lr_sp
=
"LearningRate.base_lr=0.001"
;;
deformable_detr
)
model_yml
=
"configs/deformable_detr/deformable_detr_r50_1x_coco.yml"
;;
gfl
)
model_yml
=
"configs/gfl/gfl_r50_fpn_1x_coco.yml"
;;
gfl
)
model_yml
=
"configs/gfl/gfl_r50_fpn_1x_coco.yml"
set_optimizer_lr_sp
=
"LearningRate.base_lr=0.001"
;;
hrnet
)
model_yml
=
"configs/keypoint/hrnet/hrnet_w32_256x192.yml"
;;
higherhrnet
)
model_yml
=
"configs/keypoint/higherhrnet/higherhrnet_hrnet_w32_512.yml"
;;
solov2
)
model_yml
=
"configs/solov2/solov2_r50_fpn_1x_coco.yml"
;;
...
...
@@ -45,10 +51,10 @@ function _train(){
case
${
run_mode
}
in
sp
)
train_cmd
=
"
${
python
}
-u tools/train.py -c
${
model_yml
}
${
set_fp_item
}
\
-o
${
set_batch_size
}
${
set_max_epoch
}
${
set_log_iter
}
"
;;
-o
${
set_batch_size
}
${
set_max_epoch
}
${
set_log_iter
}
${
set_optimizer_lr_sp
}
"
;;
mp
)
train_cmd
=
"
${
python
}
-m paddle.distributed.launch --log_dir=./mylog
\
--gpus=
${
CUDA_VISIBLE_DEVICES
}
tools/train.py -c
${
model_yml
}
${
set_fp_item
}
\
-o
${
set_batch_size
}
${
set_max_epoch
}
${
set_log_iter
}
"
-o
${
set_batch_size
}
${
set_max_epoch
}
${
set_log_iter
}
${
set_optimizer_lr_mp
}
"
log_parse_file
=
"mylog/workerlog.0"
;;
*
)
echo
"choose run_mode(sp or mp)"
;
exit
1
;
esac
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录