Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
s920243400
PaddleDetection
提交
4f0aa7ef
P
PaddleDetection
项目概览
s920243400
/
PaddleDetection
与 Fork 源项目一致
Fork自
PaddlePaddle / PaddleDetection
通知
2
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
4f0aa7ef
编写于
10月 21, 2021
作者:
S
shangliang Xu
提交者:
GitHub
10月 21, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[benchmark] fix nan in training (#4345)
上级
8474ac99
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
113 addition
and
6 deletion
+113
-6
benchmark/README.md
benchmark/README.md
+2
-1
benchmark/analysis_log.py
benchmark/analysis_log.py
+100
-0
benchmark/run_benchmark.sh
benchmark/run_benchmark.sh
+11
-5
未找到文件。
benchmark/README.md
浏览文件 @
4f0aa7ef
...
...
@@ -2,6 +2,7 @@
```
├── benchmark
│ ├── analysis_log.py
│ ├── prepare.sh
│ ├── README.md
│ ├── run_all.sh
...
...
@@ -18,7 +19,7 @@
单模型运行脚本,可完成指定模型的测试方案
## Docker 运行环境
*
docker image:
paddlepaddle/paddle:latest-gpu-cuda10.1
-cudnn7
*
docker image:
registry.baidubce.com/paddlepaddle/paddle:2.1.2-gpu-cuda10.2
-cudnn7
*
paddle = 2.1.2
*
python = 3.7
...
...
benchmark/analysis_log.py
0 → 100644
浏览文件 @
4f0aa7ef
from
__future__
import
print_function
import
argparse
import
json
import
os
import
sys
def
parse_args
():
parser
=
argparse
.
ArgumentParser
(
description
=
__doc__
)
parser
.
add_argument
(
"--filename"
,
type
=
str
,
help
=
"The name of log which need to analysis."
)
parser
.
add_argument
(
"--jsonname"
,
type
=
str
,
help
=
"The name of dumped json where to output."
)
parser
.
add_argument
(
"--keyword"
,
type
=
str
,
default
=
"ips:"
,
help
=
"Keyword to specify analysis data"
)
parser
.
add_argument
(
'--model_name'
,
type
=
str
,
default
=
"faster_rcnn"
,
help
=
'training model_name, transformer_base'
)
parser
.
add_argument
(
'--mission_name'
,
type
=
str
,
default
=
"目标检测"
,
help
=
'training mission name'
)
parser
.
add_argument
(
'--direction_id'
,
type
=
int
,
default
=
0
,
help
=
'training direction_id'
)
parser
.
add_argument
(
'--run_mode'
,
type
=
str
,
default
=
"sp"
,
help
=
'multi process or single process'
)
parser
.
add_argument
(
'--index'
,
type
=
int
,
default
=
1
,
help
=
'{1: speed, 2:mem, 3:profiler, 6:max_batch_size}'
)
parser
.
add_argument
(
'--gpu_num'
,
type
=
int
,
default
=
1
,
help
=
'nums of training gpus'
)
parser
.
add_argument
(
'--batch_size'
,
type
=
int
,
default
=
1
,
help
=
'batch size of training samples'
)
args
=
parser
.
parse_args
()
return
args
def
parse_text_from_file
(
file_path
:
str
):
with
open
(
file_path
,
"r"
)
as
f
:
lines
=
f
.
read
().
splitlines
()
return
lines
def
parse_avg_from_text
(
text
:
list
,
keyword
:
str
,
skip_line
=
4
):
count_list
=
[]
for
i
,
line
in
enumerate
(
text
):
if
keyword
in
line
:
words
=
line
.
split
(
" "
)
for
j
,
word
in
enumerate
(
words
):
if
word
==
keyword
:
count_list
.
append
(
float
(
words
[
j
+
1
]))
break
count_list
=
count_list
[
skip_line
:]
if
count_list
:
return
sum
(
count_list
)
/
len
(
count_list
)
else
:
return
0.0
if
__name__
==
'__main__'
:
args
=
parse_args
()
run_info
=
dict
()
run_info
[
"log_file"
]
=
args
.
filename
res_log_file
=
args
.
jsonname
run_info
[
"model_name"
]
=
args
.
model_name
run_info
[
"mission_name"
]
=
args
.
mission_name
run_info
[
"direction_id"
]
=
args
.
direction_id
run_info
[
"run_mode"
]
=
args
.
run_mode
run_info
[
"index"
]
=
args
.
index
run_info
[
"gpu_num"
]
=
args
.
gpu_num
run_info
[
"FINAL_RESULT"
]
=
0
run_info
[
"JOB_FAIL_FLAG"
]
=
0
text
=
parse_text_from_file
(
args
.
filename
)
avg_ips
=
parse_avg_from_text
(
text
,
args
.
keyword
)
run_info
[
"FINAL_RESULT"
]
=
avg_ips
*
args
.
gpu_num
if
avg_ips
==
0.0
:
run_info
[
"JOB_FAIL_FLAG"
]
=
1
print
(
"Failed at get info from training's output log, please check."
)
sys
.
exit
()
json_info
=
json
.
dumps
(
run_info
)
with
open
(
res_log_file
,
"w"
)
as
of
:
of
.
write
(
json_info
)
benchmark/run_benchmark.sh
浏览文件 @
4f0aa7ef
...
...
@@ -20,12 +20,18 @@ function _train(){
echo
"Train on
${
num_gpu_devices
}
GPUs"
echo
"current CUDA_VISIBLE_DEVICES=
$CUDA_VISIBLE_DEVICES
, gpus=
$num_gpu_devices
, batch_size=
$batch_size
"
# set runtime params
set_optimizer_lr_sp
=
" "
set_optimizer_lr_mp
=
" "
# parse model_name
case
${
model_name
}
in
faster_rcnn
)
model_yml
=
"configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.yml"
;;
fcos
)
model_yml
=
"configs/fcos/fcos_r50_fpn_1x_coco.yml"
;;
faster_rcnn
)
model_yml
=
"configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.yml"
set_optimizer_lr_sp
=
"LearningRate.base_lr=0.001"
;;
fcos
)
model_yml
=
"configs/fcos/fcos_r50_fpn_1x_coco.yml"
set_optimizer_lr_sp
=
"LearningRate.base_lr=0.001"
;;
deformable_detr
)
model_yml
=
"configs/deformable_detr/deformable_detr_r50_1x_coco.yml"
;;
gfl
)
model_yml
=
"configs/gfl/gfl_r50_fpn_1x_coco.yml"
;;
gfl
)
model_yml
=
"configs/gfl/gfl_r50_fpn_1x_coco.yml"
set_optimizer_lr_sp
=
"LearningRate.base_lr=0.001"
;;
hrnet
)
model_yml
=
"configs/keypoint/hrnet/hrnet_w32_256x192.yml"
;;
higherhrnet
)
model_yml
=
"configs/keypoint/higherhrnet/higherhrnet_hrnet_w32_512.yml"
;;
solov2
)
model_yml
=
"configs/solov2/solov2_r50_fpn_1x_coco.yml"
;;
...
...
@@ -45,10 +51,10 @@ function _train(){
case
${
run_mode
}
in
sp
)
train_cmd
=
"
${
python
}
-u tools/train.py -c
${
model_yml
}
${
set_fp_item
}
\
-o
${
set_batch_size
}
${
set_max_epoch
}
${
set_log_iter
}
"
;;
-o
${
set_batch_size
}
${
set_max_epoch
}
${
set_log_iter
}
${
set_optimizer_lr_sp
}
"
;;
mp
)
train_cmd
=
"
${
python
}
-m paddle.distributed.launch --log_dir=./mylog
\
--gpus=
${
CUDA_VISIBLE_DEVICES
}
tools/train.py -c
${
model_yml
}
${
set_fp_item
}
\
-o
${
set_batch_size
}
${
set_max_epoch
}
${
set_log_iter
}
"
-o
${
set_batch_size
}
${
set_max_epoch
}
${
set_log_iter
}
${
set_optimizer_lr_mp
}
"
log_parse_file
=
"mylog/workerlog.0"
;;
*
)
echo
"choose run_mode(sp or mp)"
;
exit
1
;
esac
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录