Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
0358fd01
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2299
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
0358fd01
编写于
1月 23, 2018
作者:
D
dangqingqing
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Refine profiler code.
上级
05a733b0
变更
5
显示空白变更内容
内联
并排
Showing
5 changed file
with
28 addition
and
26 deletion
+28
-26
paddle/framework/executor.cc
paddle/framework/executor.cc
+1
-2
paddle/platform/profiler.cc
paddle/platform/profiler.cc
+10
-8
paddle/platform/profiler.h
paddle/platform/profiler.h
+6
-6
python/paddle/v2/fluid/profiler.py
python/paddle/v2/fluid/profiler.py
+5
-4
python/paddle/v2/fluid/tests/test_profiler.py
python/paddle/v2/fluid/tests/test_profiler.py
+6
-6
未找到文件。
paddle/framework/executor.cc
浏览文件 @
0358fd01
...
...
@@ -120,8 +120,7 @@ void Executor::Run(const ProgramDesc& pdesc, Scope* scope, int block_id,
VLOG
(
3
)
<<
op
->
DebugStringEx
(
local_scope
);
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
auto
dev_ctx
=
const_cast
<
platform
::
DeviceContext
*>
(
pool
.
Get
(
place_
));
platform
::
RecordEvent
record_event
(
op
->
Type
(),
dev_ctx
);
platform
::
RecordEvent
record_event
(
op
->
Type
(),
pool
.
Get
(
place_
));
op
->
Run
(
*
local_scope
,
place_
);
if
(
FLAGS_do_memory_benchmark
)
{
...
...
paddle/platform/profiler.cc
浏览文件 @
0358fd01
...
...
@@ -47,16 +47,16 @@ inline uint64_t GetTimeInNsec() {
}
Event
::
Event
(
EventKind
kind
,
std
::
string
name
,
uint32_t
thread_id
,
DeviceContext
*
dev_ctx
)
const
DeviceContext
*
dev_ctx
)
:
kind_
(
kind
),
name_
(
name
),
thread_id_
(
thread_id
),
has_cuda_
(
false
)
{
#ifdef PADDLE_WITH_CUDA
has_cuda_
=
dev_ctx
?
platform
::
is_gpu_place
(
dev_ctx
->
GetPlace
())
:
false
;
if
(
has_cuda_
)
{
auto
*
cuda_dev_ctx
=
static_cast
<
const
CUDADeviceContext
*>
(
dev_ctx
);
if
(
cuda_dev_ctx
)
{
PADDLE_ENFORCE
(
cudaGetDevice
(
&
device_
));
PADDLE_ENFORCE
(
cudaEventCreate
(
&
event_
));
auto
stream
=
cuda_dev_ctx
->
stream
();
PADDLE_ENFORCE
(
cudaEventRecord
(
event_
,
stream
));
has_cuda_
=
true
;
}
#endif
cpu_ns_
=
GetTimeInNsec
();
...
...
@@ -114,19 +114,20 @@ inline EventList& GetEventList() {
return
*
g_event_list
;
}
void
Mark
(
const
std
::
string
&
name
,
DeviceContext
*
dev_ctx
)
{
void
Mark
(
const
std
::
string
&
name
,
const
DeviceContext
*
dev_ctx
)
{
GetEventList
().
Record
(
EventKind
::
kMark
,
name
,
g_thread_id
,
dev_ctx
);
}
void
PushEvent
(
const
std
::
string
&
name
,
DeviceContext
*
dev_ctx
)
{
void
PushEvent
(
const
std
::
string
&
name
,
const
DeviceContext
*
dev_ctx
)
{
GetEventList
().
Record
(
EventKind
::
kPushRange
,
name
,
g_thread_id
,
dev_ctx
);
}
void
PopEvent
(
const
std
::
string
&
name
,
DeviceContext
*
dev_ctx
)
{
void
PopEvent
(
const
std
::
string
&
name
,
const
DeviceContext
*
dev_ctx
)
{
GetEventList
().
Record
(
EventKind
::
kPopRange
,
name
,
g_thread_id
,
dev_ctx
);
}
RecordEvent
::
RecordEvent
(
const
std
::
string
&
name
,
DeviceContext
*
dev_ctx
)
{
RecordEvent
::
RecordEvent
(
const
std
::
string
&
name
,
const
DeviceContext
*
dev_ctx
)
{
if
(
g_state
==
ProfilerState
::
kDisabled
)
return
;
dev_ctx_
=
dev_ctx
;
name_
=
name
;
...
...
@@ -155,6 +156,7 @@ void EnableProfiler(ProfilerState state) {
DeviceContext
*
dev_ctx
=
new
CUDADeviceContext
(
CUDAPlace
(
d
));
Mark
(
"_cuda_startup_"
,
dev_ctx
);
dev_ctx
->
Wait
();
delete
dev_ctx
;
});
}
}
...
...
paddle/platform/profiler.h
浏览文件 @
0358fd01
...
...
@@ -29,7 +29,7 @@ class Event {
// The DeviceContext is used to get the cuda stream.
// If CPU profiling mode, can pass nullptr.
Event
(
EventKind
kind
,
std
::
string
name
,
uint32_t
thread_id
,
DeviceContext
*
dev_ctx
);
const
DeviceContext
*
dev_ctx
);
std
::
string
kind
()
const
;
std
::
string
name
()
const
{
return
name_
;
}
...
...
@@ -95,19 +95,19 @@ enum ProfilerState {
kCUDA
,
// GPU profiling state
};
void
Mark
(
const
std
::
string
&
name
,
DeviceContext
*
dev_ctx
);
void
Mark
(
const
std
::
string
&
name
,
const
DeviceContext
*
dev_ctx
);
void
PushEvent
(
const
std
::
string
&
name
,
DeviceContext
*
dev_ctx
);
void
PushEvent
(
const
std
::
string
&
name
,
const
DeviceContext
*
dev_ctx
);
void
PopEvent
(
const
std
::
string
&
name
,
DeviceContext
*
dev_ctx
);
void
PopEvent
(
const
std
::
string
&
name
,
const
DeviceContext
*
dev_ctx
);
struct
RecordEvent
{
explicit
RecordEvent
(
const
std
::
string
&
name
,
DeviceContext
*
dev_ctx
);
explicit
RecordEvent
(
const
std
::
string
&
name
,
const
DeviceContext
*
dev_ctx
);
~
RecordEvent
();
// The device context is used by Event to get the current cuda stream.
DeviceContext
*
dev_ctx_
;
const
DeviceContext
*
dev_ctx_
;
// Event name
std
::
string
name_
;
};
...
...
python/paddle/v2/fluid/profiler.py
浏览文件 @
0358fd01
...
...
@@ -81,10 +81,11 @@ def profiler(state, sorted_key=None):
to add more records.
Args:
state (string) : The profiling state, It should be 'CPU' or 'GPU'.
Although users may define CPUPlace or CUDAPlace when using Fluid,
the profiler doesn't get the state based on this Place. Since the
implementation is an independent part from the Fluid.
state (string) : The profiling state, which should be 'CPU' or 'GPU',
telling the profiler to use CPU timer or GPU timer for profiling.
Although users may have already specified the execution place
(CPUPlace/CUDAPlace) in the begining, for flexibility the profiler
would not inherit this place.
sorted_key (string) : If None, the profiling results will be printed
in the order of first end time of events. Otherwise, the profiling
results will be sorted by the this flag. This flag should be one
...
...
python/paddle/v2/fluid/tests/test_profiler.py
浏览文件 @
0358fd01
...
...
@@ -41,8 +41,8 @@ class TestProfiler(unittest.TestCase):
exe
.
run
(
fluid
.
default_main_program
(),
feed
=
{
'data'
:
input
})
os
.
remove
(
output_file
)
def
profiler
(
self
,
state
):
if
state
==
'GPU'
and
core
.
is_compile_gpu
():
def
net_
profiler
(
self
,
state
):
if
state
==
'GPU'
and
not
core
.
is_compile_gpu
():
return
startup_program
=
fluid
.
Program
()
main_program
=
fluid
.
Program
()
...
...
@@ -79,11 +79,11 @@ class TestProfiler(unittest.TestCase):
acc
=
np
.
array
(
outs
[
1
])
pass_acc
=
accuracy
.
eval
(
exe
)
def
not_
test_cpu_profiler
(
self
):
self
.
profiler
(
'CPU'
)
def
test_cpu_profiler
(
self
):
self
.
net_
profiler
(
'CPU'
)
def
not_
test_cuda_profiler
(
self
):
self
.
profiler
(
'GPU'
)
def
test_cuda_profiler
(
self
):
self
.
net_
profiler
(
'GPU'
)
if
__name__
==
'__main__'
:
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录