Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
3cb63956
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
3cb63956
编写于
5月 30, 2018
作者:
X
Xin Pan
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
better profiler and benchmark
上级
38af7bca
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
66 addition
and
42 deletion
+66
-42
benchmark/fluid/fluid_benchmark.py
benchmark/fluid/fluid_benchmark.py
+20
-12
paddle/fluid/operators/CMakeLists.txt
paddle/fluid/operators/CMakeLists.txt
+2
-0
paddle/fluid/platform/profiler.cc
paddle/fluid/platform/profiler.cc
+8
-4
python/paddle/fluid/profiler.py
python/paddle/fluid/profiler.py
+36
-26
未找到文件。
benchmark/fluid/fluid_benchmark.py
浏览文件 @
3cb63956
...
...
@@ -98,6 +98,8 @@ def parse_args():
'--use_fake_data'
,
action
=
'store_true'
,
help
=
'If set ommit the actual read data operators.'
)
parser
.
add_argument
(
'--profile'
,
action
=
'store_true'
,
help
=
'If set, profile a few steps.'
)
parser
.
add_argument
(
'--update_method'
,
type
=
str
,
...
...
@@ -108,8 +110,8 @@ def parse_args():
return
args
def
append_nccl2_prepare
():
if
os
.
getenv
(
"PADDLE_TRAINER_ID"
,
None
)
!=
None
:
def
append_nccl2_prepare
(
trainer_id
):
if
trainer_id
>=
0
:
# append gen_nccl_id at the end of startup program
trainer_id
=
int
(
os
.
getenv
(
"PADDLE_TRAINER_ID"
))
port
=
os
.
getenv
(
"PADDLE_PSERVER_PORT"
)
...
...
@@ -136,12 +138,12 @@ def append_nccl2_prepare():
})
return
nccl_id_var
,
num_trainers
,
trainer_id
else
:
raise
Exception
(
"must set PADDLE_TRAINER_ID env variables for
dist train."
)
raise
Exception
(
"must set positive PADDLE_TRAINER_ID env variables for "
"nccl-based
dist train."
)
def
dist_transpile
():
if
"PADDLE_TRAINING_ROLE"
not
in
os
.
environ
:
def
dist_transpile
(
trainer_id
):
if
trainer_id
<
0
:
return
None
,
None
# the port of all pservers, needed by both trainer and pserver
...
...
@@ -158,9 +160,6 @@ def dist_transpile():
trainers
=
int
(
os
.
getenv
(
"PADDLE_TRAINERS"
))
# the IP of the local machine, needed by pserver only
current_endpoint
=
os
.
getenv
(
"PADDLE_CURRENT_IP"
,
""
)
+
":"
+
port
# the unique trainer id, starting from 0, needed by trainer
# only
trainer_id
=
int
(
os
.
getenv
(
"PADDLE_TRAINER_ID"
,
"0"
))
# the role, should be either PSERVER or TRAINER
training_role
=
os
.
getenv
(
"PADDLE_TRAINING_ROLE"
)
...
...
@@ -295,6 +294,11 @@ def train_parallel(avg_loss, infer_prog, optimizer, train_reader, test_reader,
iters
=
0
start_time
=
time
.
time
()
for
batch_id
,
data
in
enumerate
(
train_reader
()):
if
args
.
profile
and
pass_id
==
0
and
batch_id
==
5
:
profiler
.
start_profiler
(
"All"
)
elif
args
.
profile
and
pass_id
==
0
and
batch_id
==
10
:
profiler
.
stop_profiler
(
"total"
,
"/tmp/profile_%d"
%
trainer_id
)
if
iters
==
args
.
skip_batch_num
:
start_time
=
time
.
time
()
num_samples
=
0
...
...
@@ -334,7 +338,11 @@ def print_arguments(args):
def
main
():
args
=
parse_args
()
print_arguments
(
args
)
nccl_id_var
,
num_trainers
,
trainer_id
=
None
,
1
,
0
# the unique trainer id, starting from 0, needed by trainer
# only
nccl_id_var
,
num_trainers
,
trainer_id
=
(
None
,
1
,
int
(
os
.
getenv
(
"PADDLE_TRAINER_ID"
,
"-1"
)))
if
args
.
use_cprof
:
pr
=
cProfile
.
Profile
()
...
...
@@ -348,7 +356,7 @@ def main():
fluid
.
memory_optimize
(
fluid
.
default_main_program
())
if
args
.
update_method
==
"pserver"
:
train_prog
,
startup_prog
=
dist_transpile
()
train_prog
,
startup_prog
=
dist_transpile
(
trainer_id
)
if
not
train_prog
:
raise
Exception
(
"Must configure correct environments to run dist train."
)
...
...
@@ -364,7 +372,7 @@ def main():
train_args
.
append
(
fluid
.
default_startup_program
())
if
args
.
update_method
==
"nccl2"
:
nccl_id_var
,
num_trainers
,
trainer_id
=
append_nccl2_prepare
()
nccl_id_var
,
num_trainers
,
trainer_id
=
append_nccl2_prepare
(
trainer_id
)
if
args
.
gpus
==
1
:
# NOTE: parallel executor use profiler interanlly
if
args
.
use_nvprof
and
args
.
device
==
'GPU'
:
...
...
paddle/fluid/operators/CMakeLists.txt
浏览文件 @
3cb63956
...
...
@@ -272,6 +272,8 @@ if(NOT WITH_MKLDNN)
list
(
REMOVE_ITEM GENERAL_OPS fc_op
)
endif
(
NOT WITH_MKLDNN
)
list
(
REMOVE_ITEM GENERAL_OPS reduce_op
)
foreach
(
src
${
GENERAL_OPS
}
)
op_library
(
${
src
}
)
endforeach
()
...
...
paddle/fluid/platform/profiler.cc
浏览文件 @
3cb63956
...
...
@@ -38,6 +38,7 @@ struct EventList;
static
int64_t
profiler_lister_id
=
0
;
static
bool
should_send_profile_state
=
false
;
std
::
mutex
profiler_mu
;
// The profiler state, the initial value is ProfilerState::kDisabled
static
ProfilerState
g_state
=
ProfilerState
::
kDisabled
;
...
...
@@ -228,11 +229,13 @@ void EnableProfiler(ProfilerState state) {
PADDLE_ENFORCE
(
state
!=
ProfilerState
::
kDisabled
,
"Can't enbale profling, since the input state is "
,
"ProfilerState::kDisabled"
);
std
::
lock_guard
<
std
::
mutex
>
l
(
profiler_mu
);
if
(
state
==
g_state
)
{
return
;
}
g_state
=
state
;
should_send_profile_state
=
true
;
{
should_send_profile_state
=
true
;
}
GetDeviceTracer
()
->
Enable
();
#ifdef PADDLE_WITH_CUDA
if
(
g_state
==
ProfilerState
::
kCUDA
)
{
...
...
@@ -295,7 +298,7 @@ void PrintProfiler(const std::vector<std::vector<EventItem>>& events_table,
}
else
if
(
g_state
==
ProfilerState
::
kAll
)
{
place
=
"All"
;
}
else
{
PADDLE_THROW
(
"Invalid profiler state"
);
PADDLE_THROW
(
"Invalid profiler state"
,
g_state
);
}
std
::
cout
<<
"Place: "
<<
place
<<
std
::
endl
;
...
...
@@ -443,6 +446,7 @@ void ParseEvents(const std::vector<std::vector<Event>>& events,
void
DisableProfiler
(
EventSortingKey
sorted_key
,
const
std
::
string
&
profile_path
)
{
std
::
lock_guard
<
std
::
mutex
>
l
(
profiler_mu
);
if
(
g_state
==
ProfilerState
::
kDisabled
)
return
;
// Mark the profiling stop.
Mark
(
"_stop_profiler_"
,
nullptr
);
...
...
@@ -456,7 +460,7 @@ void DisableProfiler(EventSortingKey sorted_key,
tracer
->
GenProfile
(
profile_path
);
}
g_state
=
ProfilerState
::
kDisabled
;
should_send_profile_state
=
true
;
{
should_send_profile_state
=
true
;
}
}
bool
IsProfileEnabled
()
{
return
g_state
!=
ProfilerState
::
kDisabled
;
}
...
...
@@ -466,7 +470,7 @@ void SetProfileListener() {
std
::
mt19937
rng
;
rng
.
seed
(
std
::
random_device
()());
std
::
uniform_int_distribution
<
std
::
mt19937
::
result_type
>
dist6
(
1
,
std
::
numeric_limits
<
std
::
mt19937
::
result_type
>::
max
());
1
,
std
::
numeric_limits
<
int
>::
max
());
profiler_lister_id
=
dist6
(
rng
);
}
int64_t
ListenerId
()
{
return
profiler_lister_id
;
}
...
...
python/paddle/fluid/profiler.py
浏览文件 @
3cb63956
...
...
@@ -16,7 +16,10 @@ import core
from
contextlib
import
contextmanager
import
os
__all__
=
[
'cuda_profiler'
,
'reset_profiler'
,
'profiler'
]
__all__
=
[
'cuda_profiler'
,
'reset_profiler'
,
'profiler'
,
'start_profiler'
,
'stop_profiler'
]
NVPROF_CONFIG
=
[
"gpustarttimestamp"
,
...
...
@@ -72,6 +75,36 @@ def reset_profiler():
core
.
reset_profiler
()
def
start_profiler
(
state
):
if
state
not
in
[
'CPU'
,
'GPU'
,
"All"
]:
raise
ValueError
(
"The state must be 'CPU' or 'GPU' or 'All'."
)
if
state
==
"GPU"
:
prof_state
=
core
.
ProfilerState
.
kCUDA
elif
state
==
"CPU"
:
prof_state
=
core
.
ProfilerState
.
kCPU
else
:
prof_state
=
core
.
ProfilerState
.
kAll
core
.
enable_profiler
(
prof_state
)
def
stop_profiler
(
sorted_key
=
None
,
profile_path
=
'/tmp/profile'
):
sorted_key
=
'default'
if
sorted_key
is
None
else
sorted_key
if
sorted_key
not
in
[
'default'
,
'calls'
,
'total'
,
'max'
,
'min'
,
'ave'
]:
raise
ValueError
(
"The sorted_key must be None or in 'calls', 'total', "
"'max', 'min' and 'ave'"
)
key_map
=
{
'default'
:
core
.
EventSortingKey
.
kDefault
,
'calls'
:
core
.
EventSortingKey
.
kCalls
,
'total'
:
core
.
EventSortingKey
.
kTotal
,
'max'
:
core
.
EventSortingKey
.
kMax
,
'min'
:
core
.
EventSortingKey
.
kMin
,
'ave'
:
core
.
EventSortingKey
.
kAve
,
}
# TODO(qingqing) : redirect C++ ostream to Python stream.
# with core.ostream_redirect(stdout=True, stderr=True):
core
.
disable_profiler
(
key_map
[
sorted_key
],
profile_path
)
@
contextmanager
def
profiler
(
state
,
sorted_key
=
None
,
profile_path
=
'/tmp/profile'
):
"""The profiler interface.
...
...
@@ -98,29 +131,6 @@ def profiler(state, sorted_key=None, profile_path='/tmp/profile'):
profile_path (string) : If state == 'All', it will write a profile
proto output file.
"""
if
state
not
in
[
'CPU'
,
'GPU'
,
"All"
]:
raise
ValueError
(
"The state must be 'CPU' or 'GPU' or 'All'."
)
if
state
==
"GPU"
:
prof_state
=
core
.
ProfilerState
.
kCUDA
elif
state
==
"CPU"
:
prof_state
=
core
.
ProfilerState
.
kCPU
else
:
prof_state
=
core
.
ProfilerState
.
kAll
core
.
enable_profiler
(
prof_state
)
start_profiler
(
state
)
yield
sorted_key
=
'default'
if
sorted_key
is
None
else
sorted_key
if
sorted_key
not
in
[
'default'
,
'calls'
,
'total'
,
'max'
,
'min'
,
'ave'
]:
raise
ValueError
(
"The sorted_key must be None or in 'calls', 'total', "
"'max', 'min' and 'ave'"
)
key_map
=
{
'default'
:
core
.
EventSortingKey
.
kDefault
,
'calls'
:
core
.
EventSortingKey
.
kCalls
,
'total'
:
core
.
EventSortingKey
.
kTotal
,
'max'
:
core
.
EventSortingKey
.
kMax
,
'min'
:
core
.
EventSortingKey
.
kMin
,
'ave'
:
core
.
EventSortingKey
.
kAve
,
}
# TODO(qingqing) : redirect C++ ostream to Python stream.
# with core.ostream_redirect(stdout=True, stderr=True):
core
.
disable_profiler
(
key_map
[
sorted_key
],
profile_path
)
stop_profiler
(
sorted_key
,
profile_path
)
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录