Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleDetection
提交
3cb63956
P
PaddleDetection
项目概览
PaddlePaddle
/
PaddleDetection
1 年多 前同步成功
通知
699
Star
11112
Fork
2696
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
184
列表
看板
标记
里程碑
合并请求
40
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
184
Issue
184
列表
看板
标记
里程碑
合并请求
40
合并请求
40
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
3cb63956
编写于
5月 30, 2018
作者:
X
Xin Pan
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
better profiler and benchmark
上级
38af7bca
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
66 addition
and
42 deletion
+66
-42
benchmark/fluid/fluid_benchmark.py
benchmark/fluid/fluid_benchmark.py
+20
-12
paddle/fluid/operators/CMakeLists.txt
paddle/fluid/operators/CMakeLists.txt
+2
-0
paddle/fluid/platform/profiler.cc
paddle/fluid/platform/profiler.cc
+8
-4
python/paddle/fluid/profiler.py
python/paddle/fluid/profiler.py
+36
-26
未找到文件。
benchmark/fluid/fluid_benchmark.py
浏览文件 @
3cb63956
...
@@ -98,6 +98,8 @@ def parse_args():
...
@@ -98,6 +98,8 @@ def parse_args():
'--use_fake_data'
,
'--use_fake_data'
,
action
=
'store_true'
,
action
=
'store_true'
,
help
=
'If set ommit the actual read data operators.'
)
help
=
'If set ommit the actual read data operators.'
)
parser
.
add_argument
(
'--profile'
,
action
=
'store_true'
,
help
=
'If set, profile a few steps.'
)
parser
.
add_argument
(
parser
.
add_argument
(
'--update_method'
,
'--update_method'
,
type
=
str
,
type
=
str
,
...
@@ -108,8 +110,8 @@ def parse_args():
...
@@ -108,8 +110,8 @@ def parse_args():
return
args
return
args
def
append_nccl2_prepare
():
def
append_nccl2_prepare
(
trainer_id
):
if
os
.
getenv
(
"PADDLE_TRAINER_ID"
,
None
)
!=
None
:
if
trainer_id
>=
0
:
# append gen_nccl_id at the end of startup program
# append gen_nccl_id at the end of startup program
trainer_id
=
int
(
os
.
getenv
(
"PADDLE_TRAINER_ID"
))
trainer_id
=
int
(
os
.
getenv
(
"PADDLE_TRAINER_ID"
))
port
=
os
.
getenv
(
"PADDLE_PSERVER_PORT"
)
port
=
os
.
getenv
(
"PADDLE_PSERVER_PORT"
)
...
@@ -136,12 +138,12 @@ def append_nccl2_prepare():
...
@@ -136,12 +138,12 @@ def append_nccl2_prepare():
})
})
return
nccl_id_var
,
num_trainers
,
trainer_id
return
nccl_id_var
,
num_trainers
,
trainer_id
else
:
else
:
raise
Exception
(
raise
Exception
(
"must set positive PADDLE_TRAINER_ID env variables for "
"must set PADDLE_TRAINER_ID env variables for
dist train."
)
"nccl-based
dist train."
)
def
dist_transpile
():
def
dist_transpile
(
trainer_id
):
if
"PADDLE_TRAINING_ROLE"
not
in
os
.
environ
:
if
trainer_id
<
0
:
return
None
,
None
return
None
,
None
# the port of all pservers, needed by both trainer and pserver
# the port of all pservers, needed by both trainer and pserver
...
@@ -158,9 +160,6 @@ def dist_transpile():
...
@@ -158,9 +160,6 @@ def dist_transpile():
trainers
=
int
(
os
.
getenv
(
"PADDLE_TRAINERS"
))
trainers
=
int
(
os
.
getenv
(
"PADDLE_TRAINERS"
))
# the IP of the local machine, needed by pserver only
# the IP of the local machine, needed by pserver only
current_endpoint
=
os
.
getenv
(
"PADDLE_CURRENT_IP"
,
""
)
+
":"
+
port
current_endpoint
=
os
.
getenv
(
"PADDLE_CURRENT_IP"
,
""
)
+
":"
+
port
# the unique trainer id, starting from 0, needed by trainer
# only
trainer_id
=
int
(
os
.
getenv
(
"PADDLE_TRAINER_ID"
,
"0"
))
# the role, should be either PSERVER or TRAINER
# the role, should be either PSERVER or TRAINER
training_role
=
os
.
getenv
(
"PADDLE_TRAINING_ROLE"
)
training_role
=
os
.
getenv
(
"PADDLE_TRAINING_ROLE"
)
...
@@ -295,6 +294,11 @@ def train_parallel(avg_loss, infer_prog, optimizer, train_reader, test_reader,
...
@@ -295,6 +294,11 @@ def train_parallel(avg_loss, infer_prog, optimizer, train_reader, test_reader,
iters
=
0
iters
=
0
start_time
=
time
.
time
()
start_time
=
time
.
time
()
for
batch_id
,
data
in
enumerate
(
train_reader
()):
for
batch_id
,
data
in
enumerate
(
train_reader
()):
if
args
.
profile
and
pass_id
==
0
and
batch_id
==
5
:
profiler
.
start_profiler
(
"All"
)
elif
args
.
profile
and
pass_id
==
0
and
batch_id
==
10
:
profiler
.
stop_profiler
(
"total"
,
"/tmp/profile_%d"
%
trainer_id
)
if
iters
==
args
.
skip_batch_num
:
if
iters
==
args
.
skip_batch_num
:
start_time
=
time
.
time
()
start_time
=
time
.
time
()
num_samples
=
0
num_samples
=
0
...
@@ -334,7 +338,11 @@ def print_arguments(args):
...
@@ -334,7 +338,11 @@ def print_arguments(args):
def
main
():
def
main
():
args
=
parse_args
()
args
=
parse_args
()
print_arguments
(
args
)
print_arguments
(
args
)
nccl_id_var
,
num_trainers
,
trainer_id
=
None
,
1
,
0
# the unique trainer id, starting from 0, needed by trainer
# only
nccl_id_var
,
num_trainers
,
trainer_id
=
(
None
,
1
,
int
(
os
.
getenv
(
"PADDLE_TRAINER_ID"
,
"-1"
)))
if
args
.
use_cprof
:
if
args
.
use_cprof
:
pr
=
cProfile
.
Profile
()
pr
=
cProfile
.
Profile
()
...
@@ -348,7 +356,7 @@ def main():
...
@@ -348,7 +356,7 @@ def main():
fluid
.
memory_optimize
(
fluid
.
default_main_program
())
fluid
.
memory_optimize
(
fluid
.
default_main_program
())
if
args
.
update_method
==
"pserver"
:
if
args
.
update_method
==
"pserver"
:
train_prog
,
startup_prog
=
dist_transpile
()
train_prog
,
startup_prog
=
dist_transpile
(
trainer_id
)
if
not
train_prog
:
if
not
train_prog
:
raise
Exception
(
raise
Exception
(
"Must configure correct environments to run dist train."
)
"Must configure correct environments to run dist train."
)
...
@@ -364,7 +372,7 @@ def main():
...
@@ -364,7 +372,7 @@ def main():
train_args
.
append
(
fluid
.
default_startup_program
())
train_args
.
append
(
fluid
.
default_startup_program
())
if
args
.
update_method
==
"nccl2"
:
if
args
.
update_method
==
"nccl2"
:
nccl_id_var
,
num_trainers
,
trainer_id
=
append_nccl2_prepare
()
nccl_id_var
,
num_trainers
,
trainer_id
=
append_nccl2_prepare
(
trainer_id
)
if
args
.
gpus
==
1
:
if
args
.
gpus
==
1
:
# NOTE: parallel executor use profiler interanlly
# NOTE: parallel executor use profiler interanlly
if
args
.
use_nvprof
and
args
.
device
==
'GPU'
:
if
args
.
use_nvprof
and
args
.
device
==
'GPU'
:
...
...
paddle/fluid/operators/CMakeLists.txt
浏览文件 @
3cb63956
...
@@ -272,6 +272,8 @@ if(NOT WITH_MKLDNN)
...
@@ -272,6 +272,8 @@ if(NOT WITH_MKLDNN)
list
(
REMOVE_ITEM GENERAL_OPS fc_op
)
list
(
REMOVE_ITEM GENERAL_OPS fc_op
)
endif
(
NOT WITH_MKLDNN
)
endif
(
NOT WITH_MKLDNN
)
list
(
REMOVE_ITEM GENERAL_OPS reduce_op
)
foreach
(
src
${
GENERAL_OPS
}
)
foreach
(
src
${
GENERAL_OPS
}
)
op_library
(
${
src
}
)
op_library
(
${
src
}
)
endforeach
()
endforeach
()
...
...
paddle/fluid/platform/profiler.cc
浏览文件 @
3cb63956
...
@@ -38,6 +38,7 @@ struct EventList;
...
@@ -38,6 +38,7 @@ struct EventList;
static
int64_t
profiler_lister_id
=
0
;
static
int64_t
profiler_lister_id
=
0
;
static
bool
should_send_profile_state
=
false
;
static
bool
should_send_profile_state
=
false
;
std
::
mutex
profiler_mu
;
// The profiler state, the initial value is ProfilerState::kDisabled
// The profiler state, the initial value is ProfilerState::kDisabled
static
ProfilerState
g_state
=
ProfilerState
::
kDisabled
;
static
ProfilerState
g_state
=
ProfilerState
::
kDisabled
;
...
@@ -228,11 +229,13 @@ void EnableProfiler(ProfilerState state) {
...
@@ -228,11 +229,13 @@ void EnableProfiler(ProfilerState state) {
PADDLE_ENFORCE
(
state
!=
ProfilerState
::
kDisabled
,
PADDLE_ENFORCE
(
state
!=
ProfilerState
::
kDisabled
,
"Can't enbale profling, since the input state is "
,
"Can't enbale profling, since the input state is "
,
"ProfilerState::kDisabled"
);
"ProfilerState::kDisabled"
);
std
::
lock_guard
<
std
::
mutex
>
l
(
profiler_mu
);
if
(
state
==
g_state
)
{
if
(
state
==
g_state
)
{
return
;
return
;
}
}
g_state
=
state
;
g_state
=
state
;
should_send_profile_state
=
true
;
{
should_send_profile_state
=
true
;
}
GetDeviceTracer
()
->
Enable
();
GetDeviceTracer
()
->
Enable
();
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
if
(
g_state
==
ProfilerState
::
kCUDA
)
{
if
(
g_state
==
ProfilerState
::
kCUDA
)
{
...
@@ -295,7 +298,7 @@ void PrintProfiler(const std::vector<std::vector<EventItem>>& events_table,
...
@@ -295,7 +298,7 @@ void PrintProfiler(const std::vector<std::vector<EventItem>>& events_table,
}
else
if
(
g_state
==
ProfilerState
::
kAll
)
{
}
else
if
(
g_state
==
ProfilerState
::
kAll
)
{
place
=
"All"
;
place
=
"All"
;
}
else
{
}
else
{
PADDLE_THROW
(
"Invalid profiler state"
);
PADDLE_THROW
(
"Invalid profiler state"
,
g_state
);
}
}
std
::
cout
<<
"Place: "
<<
place
<<
std
::
endl
;
std
::
cout
<<
"Place: "
<<
place
<<
std
::
endl
;
...
@@ -443,6 +446,7 @@ void ParseEvents(const std::vector<std::vector<Event>>& events,
...
@@ -443,6 +446,7 @@ void ParseEvents(const std::vector<std::vector<Event>>& events,
void
DisableProfiler
(
EventSortingKey
sorted_key
,
void
DisableProfiler
(
EventSortingKey
sorted_key
,
const
std
::
string
&
profile_path
)
{
const
std
::
string
&
profile_path
)
{
std
::
lock_guard
<
std
::
mutex
>
l
(
profiler_mu
);
if
(
g_state
==
ProfilerState
::
kDisabled
)
return
;
if
(
g_state
==
ProfilerState
::
kDisabled
)
return
;
// Mark the profiling stop.
// Mark the profiling stop.
Mark
(
"_stop_profiler_"
,
nullptr
);
Mark
(
"_stop_profiler_"
,
nullptr
);
...
@@ -456,7 +460,7 @@ void DisableProfiler(EventSortingKey sorted_key,
...
@@ -456,7 +460,7 @@ void DisableProfiler(EventSortingKey sorted_key,
tracer
->
GenProfile
(
profile_path
);
tracer
->
GenProfile
(
profile_path
);
}
}
g_state
=
ProfilerState
::
kDisabled
;
g_state
=
ProfilerState
::
kDisabled
;
should_send_profile_state
=
true
;
{
should_send_profile_state
=
true
;
}
}
}
bool
IsProfileEnabled
()
{
return
g_state
!=
ProfilerState
::
kDisabled
;
}
bool
IsProfileEnabled
()
{
return
g_state
!=
ProfilerState
::
kDisabled
;
}
...
@@ -466,7 +470,7 @@ void SetProfileListener() {
...
@@ -466,7 +470,7 @@ void SetProfileListener() {
std
::
mt19937
rng
;
std
::
mt19937
rng
;
rng
.
seed
(
std
::
random_device
()());
rng
.
seed
(
std
::
random_device
()());
std
::
uniform_int_distribution
<
std
::
mt19937
::
result_type
>
dist6
(
std
::
uniform_int_distribution
<
std
::
mt19937
::
result_type
>
dist6
(
1
,
std
::
numeric_limits
<
std
::
mt19937
::
result_type
>::
max
());
1
,
std
::
numeric_limits
<
int
>::
max
());
profiler_lister_id
=
dist6
(
rng
);
profiler_lister_id
=
dist6
(
rng
);
}
}
int64_t
ListenerId
()
{
return
profiler_lister_id
;
}
int64_t
ListenerId
()
{
return
profiler_lister_id
;
}
...
...
python/paddle/fluid/profiler.py
浏览文件 @
3cb63956
...
@@ -16,7 +16,10 @@ import core
...
@@ -16,7 +16,10 @@ import core
from
contextlib
import
contextmanager
from
contextlib
import
contextmanager
import
os
import
os
__all__
=
[
'cuda_profiler'
,
'reset_profiler'
,
'profiler'
]
__all__
=
[
'cuda_profiler'
,
'reset_profiler'
,
'profiler'
,
'start_profiler'
,
'stop_profiler'
]
NVPROF_CONFIG
=
[
NVPROF_CONFIG
=
[
"gpustarttimestamp"
,
"gpustarttimestamp"
,
...
@@ -72,6 +75,36 @@ def reset_profiler():
...
@@ -72,6 +75,36 @@ def reset_profiler():
core
.
reset_profiler
()
core
.
reset_profiler
()
def
start_profiler
(
state
):
if
state
not
in
[
'CPU'
,
'GPU'
,
"All"
]:
raise
ValueError
(
"The state must be 'CPU' or 'GPU' or 'All'."
)
if
state
==
"GPU"
:
prof_state
=
core
.
ProfilerState
.
kCUDA
elif
state
==
"CPU"
:
prof_state
=
core
.
ProfilerState
.
kCPU
else
:
prof_state
=
core
.
ProfilerState
.
kAll
core
.
enable_profiler
(
prof_state
)
def
stop_profiler
(
sorted_key
=
None
,
profile_path
=
'/tmp/profile'
):
sorted_key
=
'default'
if
sorted_key
is
None
else
sorted_key
if
sorted_key
not
in
[
'default'
,
'calls'
,
'total'
,
'max'
,
'min'
,
'ave'
]:
raise
ValueError
(
"The sorted_key must be None or in 'calls', 'total', "
"'max', 'min' and 'ave'"
)
key_map
=
{
'default'
:
core
.
EventSortingKey
.
kDefault
,
'calls'
:
core
.
EventSortingKey
.
kCalls
,
'total'
:
core
.
EventSortingKey
.
kTotal
,
'max'
:
core
.
EventSortingKey
.
kMax
,
'min'
:
core
.
EventSortingKey
.
kMin
,
'ave'
:
core
.
EventSortingKey
.
kAve
,
}
# TODO(qingqing) : redirect C++ ostream to Python stream.
# with core.ostream_redirect(stdout=True, stderr=True):
core
.
disable_profiler
(
key_map
[
sorted_key
],
profile_path
)
@
contextmanager
@
contextmanager
def
profiler
(
state
,
sorted_key
=
None
,
profile_path
=
'/tmp/profile'
):
def
profiler
(
state
,
sorted_key
=
None
,
profile_path
=
'/tmp/profile'
):
"""The profiler interface.
"""The profiler interface.
...
@@ -98,29 +131,6 @@ def profiler(state, sorted_key=None, profile_path='/tmp/profile'):
...
@@ -98,29 +131,6 @@ def profiler(state, sorted_key=None, profile_path='/tmp/profile'):
profile_path (string) : If state == 'All', it will write a profile
profile_path (string) : If state == 'All', it will write a profile
proto output file.
proto output file.
"""
"""
if
state
not
in
[
'CPU'
,
'GPU'
,
"All"
]:
start_profiler
(
state
)
raise
ValueError
(
"The state must be 'CPU' or 'GPU' or 'All'."
)
if
state
==
"GPU"
:
prof_state
=
core
.
ProfilerState
.
kCUDA
elif
state
==
"CPU"
:
prof_state
=
core
.
ProfilerState
.
kCPU
else
:
prof_state
=
core
.
ProfilerState
.
kAll
core
.
enable_profiler
(
prof_state
)
yield
yield
stop_profiler
(
sorted_key
,
profile_path
)
sorted_key
=
'default'
if
sorted_key
is
None
else
sorted_key
if
sorted_key
not
in
[
'default'
,
'calls'
,
'total'
,
'max'
,
'min'
,
'ave'
]:
raise
ValueError
(
"The sorted_key must be None or in 'calls', 'total', "
"'max', 'min' and 'ave'"
)
key_map
=
{
'default'
:
core
.
EventSortingKey
.
kDefault
,
'calls'
:
core
.
EventSortingKey
.
kCalls
,
'total'
:
core
.
EventSortingKey
.
kTotal
,
'max'
:
core
.
EventSortingKey
.
kMax
,
'min'
:
core
.
EventSortingKey
.
kMin
,
'ave'
:
core
.
EventSortingKey
.
kAve
,
}
# TODO(qingqing) : redirect C++ ostream to Python stream.
# with core.ostream_redirect(stdout=True, stderr=True):
core
.
disable_profiler
(
key_map
[
sorted_key
],
profile_path
)
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录