Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
d2a70243
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
d2a70243
编写于
1月 16, 2018
作者:
D
dangqingqing
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Refine profiler and expose to Python.
上级
df9c13a8
变更
12
显示空白变更内容
内联
并排
Showing
12 changed file
with
171 addition
and
91 deletion
+171
-91
cmake/external/pybind11.cmake
cmake/external/pybind11.cmake
+1
-1
paddle/framework/CMakeLists.txt
paddle/framework/CMakeLists.txt
+2
-1
paddle/framework/executor.cc
paddle/framework/executor.cc
+6
-0
paddle/platform/profiler.cc
paddle/platform/profiler.cc
+26
-11
paddle/platform/profiler.h
paddle/platform/profiler.h
+15
-7
paddle/platform/profiler_test.cc
paddle/platform/profiler_test.cc
+4
-6
paddle/pybind/CMakeLists.txt
paddle/pybind/CMakeLists.txt
+1
-1
paddle/pybind/protobuf.cc
paddle/pybind/protobuf.cc
+10
-60
paddle/pybind/protobuf.h
paddle/pybind/protobuf.h
+1
-0
paddle/pybind/pybind.cc
paddle/pybind/pybind.cc
+24
-3
python/paddle/v2/fluid/profiler.py
python/paddle/v2/fluid/profiler.py
+45
-0
python/paddle/v2/fluid/tests/test_profiler.py
python/paddle/v2/fluid/tests/test_profiler.py
+36
-1
未找到文件。
cmake/external/pybind11.cmake
浏览文件 @
d2a70243
...
@@ -26,7 +26,7 @@ ExternalProject_Add(
...
@@ -26,7 +26,7 @@ ExternalProject_Add(
extern_pybind
extern_pybind
${
EXTERNAL_PROJECT_LOG_ARGS
}
${
EXTERNAL_PROJECT_LOG_ARGS
}
GIT_REPOSITORY
"https://github.com/pybind/pybind11.git"
GIT_REPOSITORY
"https://github.com/pybind/pybind11.git"
GIT_TAG
"v2.
1
.1"
GIT_TAG
"v2.
2
.1"
PREFIX
${
PYBIND_SOURCE_DIR
}
PREFIX
${
PYBIND_SOURCE_DIR
}
UPDATE_COMMAND
""
UPDATE_COMMAND
""
CONFIGURE_COMMAND
""
CONFIGURE_COMMAND
""
...
...
paddle/framework/CMakeLists.txt
浏览文件 @
d2a70243
...
@@ -68,7 +68,8 @@ cc_library(backward SRCS backward.cc DEPS net_op)
...
@@ -68,7 +68,8 @@ cc_library(backward SRCS backward.cc DEPS net_op)
cc_test
(
backward_test SRCS backward_test.cc DEPS backward recurrent_op device_context fill_constant_op
)
cc_test
(
backward_test SRCS backward_test.cc DEPS backward recurrent_op device_context fill_constant_op
)
cc_library
(
lod_rank_table SRCS lod_rank_table.cc DEPS lod_tensor
)
cc_library
(
lod_rank_table SRCS lod_rank_table.cc DEPS lod_tensor
)
cc_library
(
executor SRCS executor.cc DEPS op_registry device_context scope framework_proto backward glog lod_rank_table
)
cc_library
(
executor SRCS executor.cc DEPS op_registry device_context scope
framework_proto backward glog lod_rank_table profiler
)
cc_library
(
prune SRCS prune.cc DEPS framework_proto
)
cc_library
(
prune SRCS prune.cc DEPS framework_proto
)
cc_test
(
prune_test SRCS prune_test.cc DEPS op_info prune recurrent_op device_context
)
cc_test
(
prune_test SRCS prune_test.cc DEPS op_info prune recurrent_op device_context
)
...
...
paddle/framework/executor.cc
浏览文件 @
d2a70243
...
@@ -22,6 +22,7 @@ limitations under the License. */
...
@@ -22,6 +22,7 @@ limitations under the License. */
#include "paddle/framework/lod_tensor_array.h"
#include "paddle/framework/lod_tensor_array.h"
#include "paddle/framework/op_registry.h"
#include "paddle/framework/op_registry.h"
#include "paddle/platform/place.h"
#include "paddle/platform/place.h"
#include "paddle/platform/profiler.h"
DEFINE_bool
(
check_nan_inf
,
false
,
DEFINE_bool
(
check_nan_inf
,
false
,
"Checking whether operator produce NAN/INF or not. It will be "
"Checking whether operator produce NAN/INF or not. It will be "
...
@@ -116,6 +117,11 @@ void Executor::Run(const ProgramDesc& pdesc, Scope* scope, int block_id,
...
@@ -116,6 +117,11 @@ void Executor::Run(const ProgramDesc& pdesc, Scope* scope, int block_id,
for
(
auto
&
op_desc
:
block
.
AllOps
())
{
for
(
auto
&
op_desc
:
block
.
AllOps
())
{
auto
op
=
paddle
::
framework
::
OpRegistry
::
CreateOp
(
*
op_desc
);
auto
op
=
paddle
::
framework
::
OpRegistry
::
CreateOp
(
*
op_desc
);
VLOG
(
3
)
<<
op
->
DebugStringEx
(
local_scope
);
VLOG
(
3
)
<<
op
->
DebugStringEx
(
local_scope
);
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
auto
dev_ctx
=
const_cast
<
platform
::
DeviceContext
*>
(
pool
.
Get
(
place_
));
platform
::
RecordEvent
record_event
(
op
->
Type
(),
dev_ctx
);
op
->
Run
(
*
local_scope
,
place_
);
op
->
Run
(
*
local_scope
,
place_
);
if
(
FLAGS_check_nan_inf
)
{
if
(
FLAGS_check_nan_inf
)
{
for
(
auto
&
vname
:
op
->
OutputVars
(
true
))
{
for
(
auto
&
vname
:
op
->
OutputVars
(
true
))
{
...
...
paddle/platform/profiler.cc
浏览文件 @
d2a70243
...
@@ -163,14 +163,17 @@ void EnableProfiler(ProfilerState state) {
...
@@ -163,14 +163,17 @@ void EnableProfiler(ProfilerState state) {
Mark
(
"_start_profiler_"
,
nullptr
);
Mark
(
"_start_profiler_"
,
nullptr
);
}
}
std
::
vector
<
std
::
vector
<
Event
>>
DisableProfiler
()
{
void
ResetProfiler
()
{
PADDLE_ENFORCE
(
g_state
!=
ProfilerState
::
kDisabled
,
"Can't disable profiling, since it's not starting."
);
// Mark the profiling stop.
Mark
(
"_stop_profiler_"
,
nullptr
);
g_state
=
ProfilerState
::
kDisabled
;
std
::
vector
<
std
::
vector
<
Event
>>
result
;
std
::
lock_guard
<
std
::
mutex
>
guard
(
g_all_event_lists_mutex
);
std
::
lock_guard
<
std
::
mutex
>
guard
(
g_all_event_lists_mutex
);
for
(
auto
it
=
g_all_event_lists
.
begin
();
it
!=
g_all_event_lists
.
end
();
++
it
)
{
(
*
it
)
->
Clear
();
}
}
std
::
vector
<
std
::
vector
<
Event
>>
GetAllEvents
()
{
std
::
lock_guard
<
std
::
mutex
>
guard
(
g_all_event_lists_mutex
);
std
::
vector
<
std
::
vector
<
Event
>>
result
;
for
(
auto
it
=
g_all_event_lists
.
begin
();
it
!=
g_all_event_lists
.
end
();
for
(
auto
it
=
g_all_event_lists
.
begin
();
it
!=
g_all_event_lists
.
end
();
++
it
)
{
++
it
)
{
result
.
emplace_back
((
*
it
)
->
Reduce
());
result
.
emplace_back
((
*
it
)
->
Reduce
());
...
@@ -178,6 +181,18 @@ std::vector<std::vector<Event>> DisableProfiler() {
...
@@ -178,6 +181,18 @@ std::vector<std::vector<Event>> DisableProfiler() {
return
result
;
return
result
;
}
}
void
DisableProfiler
(
EventSortingKey
sorted_key
)
{
PADDLE_ENFORCE
(
g_state
!=
ProfilerState
::
kDisabled
,
"Can't disable profiling, since it's not starting."
);
// Mark the profiling stop.
Mark
(
"_stop_profiler_"
,
nullptr
);
g_state
=
ProfilerState
::
kDisabled
;
std
::
vector
<
std
::
vector
<
Event
>>
all_events
=
GetAllEvents
();
ParseEvents
(
all_events
,
sorted_key
);
ResetProfiler
();
}
void
ParseEvents
(
std
::
vector
<
std
::
vector
<
Event
>>&
events
,
void
ParseEvents
(
std
::
vector
<
std
::
vector
<
Event
>>&
events
,
EventSortingKey
sorted_by
)
{
EventSortingKey
sorted_by
)
{
if
(
g_profiler_place
==
""
)
return
;
if
(
g_profiler_place
==
""
)
return
;
...
@@ -291,10 +306,10 @@ void ParseEvents(std::vector<std::vector<Event>>& events,
...
@@ -291,10 +306,10 @@ void ParseEvents(std::vector<std::vector<Event>>& events,
}
}
// Print report
// Print report
PrintProfil
ingReport
(
events_table
,
sorted_domain
,
max_name_width
+
4
,
12
);
PrintProfil
er
(
events_table
,
sorted_domain
,
max_name_width
+
4
,
12
);
}
}
void
PrintProfil
ingReport
(
std
::
vector
<
std
::
vector
<
EventItem
>>&
events_table
,
void
PrintProfil
er
(
std
::
vector
<
std
::
vector
<
EventItem
>>&
events_table
,
std
::
string
&
sorted_domain
,
const
size_t
name_width
,
std
::
string
&
sorted_domain
,
const
size_t
name_width
,
const
size_t
data_width
)
{
const
size_t
data_width
)
{
// Output header information
// Output header information
...
...
paddle/platform/profiler.h
浏览文件 @
d2a70243
...
@@ -84,6 +84,8 @@ struct EventList {
...
@@ -84,6 +84,8 @@ struct EventList {
return
result
;
return
result
;
}
}
void
Clear
()
{
event_blocks
.
clear
();
}
std
::
forward_list
<
std
::
vector
<
Event
>>
event_blocks
;
std
::
forward_list
<
std
::
vector
<
Event
>>
event_blocks
;
};
};
...
@@ -110,12 +112,9 @@ struct RecordEvent {
...
@@ -110,12 +112,9 @@ struct RecordEvent {
std
::
string
name_
;
std
::
string
name_
;
};
};
// Enable the profiling function.
void
EnableProfiler
(
ProfilerState
state
);
// Return the event list of all threads. Asummed the returned value calls
// Return the event list of all threads. Asummed the returned value calls
// event_lists, event_lists[i][j] represents the j-th Event of i-th thread.
// event_lists, event_lists[i][j] represents the j-th Event of i-th thread.
std
::
vector
<
std
::
vector
<
Event
>>
DisableProfiler
();
std
::
vector
<
std
::
vector
<
Event
>>
GetAllEvents
();
// The information of each event given in the profiling report
// The information of each event given in the profiling report
struct
EventItem
{
struct
EventItem
{
...
@@ -130,13 +129,22 @@ struct EventItem {
...
@@ -130,13 +129,22 @@ struct EventItem {
// Candidate keys to sort the profiling report
// Candidate keys to sort the profiling report
enum
EventSortingKey
{
kDefault
,
kCalls
,
kTotal
,
kMin
,
kMax
,
kAve
};
enum
EventSortingKey
{
kDefault
,
kCalls
,
kTotal
,
kMin
,
kMax
,
kAve
};
// Enable the profiling function.
void
EnableProfiler
(
ProfilerState
state
);
// Clear the g_all_event_lists, which is total event lists of all threads.
void
ResetProfiler
();
void
DisableProfiler
(
EventSortingKey
sorted_key
);
// Parse the event list and output the profiling report
// Parse the event list and output the profiling report
void
ParseEvents
(
std
::
vector
<
std
::
vector
<
Event
>>&
,
void
ParseEvents
(
std
::
vector
<
std
::
vector
<
Event
>>&
,
EventSortingKey
sorted_by
=
EventSortingKey
::
kDefault
);
EventSortingKey
sorted_by
=
EventSortingKey
::
kDefault
);
// Print results
// Print results
void
PrintProfil
ingReport
(
std
::
vector
<
std
::
vector
<
EventItem
>>&
events_table
,
void
PrintProfil
er
(
std
::
vector
<
std
::
vector
<
EventItem
>>&
events_table
,
std
::
string
&
sorted_domain
,
const
size_t
name_width
,
std
::
string
&
sorted_domain
,
const
size_t
name_width
,
const
size_t
data_width
);
const
size_t
data_width
);
}
// namespace platform
}
// namespace platform
}
// namespace paddle
}
// namespace paddle
paddle/platform/profiler_test.cc
浏览文件 @
d2a70243
...
@@ -103,18 +103,14 @@ TEST(RecordEvent, RecordEvent) {
...
@@ -103,18 +103,14 @@ TEST(RecordEvent, RecordEvent) {
// Bad Usage:
// Bad Usage:
PushEvent
(
"event_without_pop"
,
dev_ctx
);
PushEvent
(
"event_without_pop"
,
dev_ctx
);
PopEvent
(
"event_without_push"
,
dev_ctx
);
PopEvent
(
"event_without_push"
,
dev_ctx
);
std
::
vector
<
std
::
vector
<
Event
>>
events
=
paddle
::
platform
::
DisableProfiler
();
std
::
vector
<
std
::
vector
<
Event
>>
events
=
paddle
::
platform
::
GetAllEvents
();
// Will remove parsing-related code from test later
ParseEvents
(
events
,
EventSortingKey
::
kTotal
);
int
cuda_startup_count
=
0
;
int
cuda_startup_count
=
0
;
int
start_profiler_count
=
0
;
int
start_profiler_count
=
0
;
int
stop_profiler_count
=
0
;
for
(
size_t
i
=
0
;
i
<
events
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
events
.
size
();
++
i
)
{
for
(
size_t
j
=
0
;
j
<
events
[
i
].
size
();
++
j
)
{
for
(
size_t
j
=
0
;
j
<
events
[
i
].
size
();
++
j
)
{
if
(
events
[
i
][
j
].
name
()
==
"_cuda_startup_"
)
++
cuda_startup_count
;
if
(
events
[
i
][
j
].
name
()
==
"_cuda_startup_"
)
++
cuda_startup_count
;
if
(
events
[
i
][
j
].
name
()
==
"_start_profiler_"
)
++
start_profiler_count
;
if
(
events
[
i
][
j
].
name
()
==
"_start_profiler_"
)
++
start_profiler_count
;
if
(
events
[
i
][
j
].
name
()
==
"_stop_profiler_"
)
++
stop_profiler_count
;
if
(
events
[
i
][
j
].
name
()
==
"push"
)
{
if
(
events
[
i
][
j
].
name
()
==
"push"
)
{
EXPECT_EQ
(
events
[
i
][
j
+
1
].
name
(),
"pop"
);
EXPECT_EQ
(
events
[
i
][
j
+
1
].
name
(),
"pop"
);
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
...
@@ -127,5 +123,7 @@ TEST(RecordEvent, RecordEvent) {
...
@@ -127,5 +123,7 @@ TEST(RecordEvent, RecordEvent) {
}
}
EXPECT_EQ
(
cuda_startup_count
%
5
,
0
);
EXPECT_EQ
(
cuda_startup_count
%
5
,
0
);
EXPECT_EQ
(
start_profiler_count
,
1
);
EXPECT_EQ
(
start_profiler_count
,
1
);
EXPECT_EQ
(
stop_profiler_count
,
1
);
// Will remove parsing-related code from test later
DisableProfiler
(
EventSortingKey
::
kTotal
);
}
}
paddle/pybind/CMakeLists.txt
浏览文件 @
d2a70243
if
(
WITH_PYTHON
)
if
(
WITH_PYTHON
)
cc_library
(
paddle_pybind SHARED
cc_library
(
paddle_pybind SHARED
SRCS pybind.cc exception.cc protobuf.cc const_value.cc
SRCS pybind.cc exception.cc protobuf.cc const_value.cc
DEPS pybind python backward proto_desc paddle_memory executor prune init
DEPS pybind python backward proto_desc paddle_memory executor prune init
profiler
${
GLOB_OP_LIB
}
)
${
GLOB_OP_LIB
}
)
if
(
NOT APPLE AND NOT ANDROID
)
if
(
NOT APPLE AND NOT ANDROID
)
target_link_libraries
(
paddle_pybind rt
)
target_link_libraries
(
paddle_pybind rt
)
...
...
paddle/pybind/protobuf.cc
浏览文件 @
d2a70243
...
@@ -21,74 +21,24 @@ limitations under the License. */
...
@@ -21,74 +21,24 @@ limitations under the License. */
#include "paddle/framework/program_desc.h"
#include "paddle/framework/program_desc.h"
#include "paddle/framework/var_desc.h"
#include "paddle/framework/var_desc.h"
// Cast boost::variant for PyBind.
using
boost
::
variant
;
// Copy from
// https://github.com/pybind/pybind11/issues/576#issuecomment-269563199
namespace
pybind11
{
namespace
pybind11
{
namespace
detail
{
namespace
detail
{
// Can be replaced by a generic lambda in C++14
struct
variant_caster_visitor
:
public
boost
::
static_visitor
<
handle
>
{
return_value_policy
policy
;
handle
parent
;
variant_caster_visitor
(
return_value_policy
policy
,
handle
parent
)
:
policy
(
policy
),
parent
(
parent
)
{}
template
<
class
T
>
handle
operator
()(
T
const
&
src
)
const
{
return
make_caster
<
T
>::
cast
(
src
,
policy
,
parent
);
}
};
template
<
class
Variant
>
struct
variant_caster
;
template
<
template
<
class
...
>
class
V
,
class
...
Ts
>
struct
variant_caster
<
V
<
Ts
...
>>
{
using
Type
=
V
<
Ts
...
>
;
template
<
typename
T
>
typename
std
::
enable_if
<
!
std
::
is_same
<
T
,
boost
::
detail
::
variant
::
void_
>::
value
,
bool
>::
type
try_load
(
handle
src
,
bool
convert
)
{
auto
caster
=
make_caster
<
T
>
();
if
(
!
load_success_
&&
caster
.
load
(
src
,
convert
))
{
load_success_
=
true
;
value
=
cast_op
<
T
>
(
caster
);
return
true
;
}
return
false
;
}
template
<
typename
T
>
typename
std
::
enable_if
<
std
::
is_same
<
T
,
boost
::
detail
::
variant
::
void_
>::
value
,
bool
>::
type
try_load
(
handle
src
,
bool
convert
)
{
return
false
;
}
bool
load
(
handle
src
,
bool
convert
)
{
auto
unused
=
{
false
,
try_load
<
Ts
>
(
src
,
convert
)...};
(
void
)(
unused
);
return
load_success_
;
}
static
handle
cast
(
Type
const
&
src
,
return_value_policy
policy
,
handle
parent
)
{
variant_caster_visitor
visitor
(
policy
,
parent
);
return
boost
::
apply_visitor
(
visitor
,
src
);
}
PYBIND11_TYPE_CASTER
(
Type
,
_
(
"Variant"
));
bool
load_success_
{
false
};
};
// Add specialization for concrete variant type
// Add specialization for concrete variant type
template
<
class
...
Args
>
template
<
class
...
Args
>
struct
type_caster
<
boost
::
variant
<
Args
...
>>
struct
type_caster
<
boost
::
variant
<
Args
...
>>
:
variant_caster
<
boost
::
variant
<
Args
...
>>
{};
:
variant_caster
<
boost
::
variant
<
Args
...
>>
{};
template
<
>
struct
visit_helper
<
boost
::
variant
>
{
template
<
typename
...
Args
>
static
auto
call
(
Args
&&
...
args
)
->
decltype
(
boost
::
apply_visitor
(
args
...))
{
return
boost
::
apply_visitor
(
args
...);
}
};
}
// namespace detail
}
// namespace detail
}
// namespace pybind11
}
// namespace pybind11
...
...
paddle/pybind/protobuf.h
浏览文件 @
d2a70243
...
@@ -17,6 +17,7 @@ limitations under the License. */
...
@@ -17,6 +17,7 @@ limitations under the License. */
#include <Python.h>
#include <Python.h>
#include <fstream>
#include <fstream>
#include <vector>
#include <vector>
#include "paddle/platform/variant.h"
#include "pybind11/numpy.h"
#include "pybind11/numpy.h"
#include "pybind11/pybind11.h"
#include "pybind11/pybind11.h"
#include "pybind11/stl.h"
#include "pybind11/stl.h"
...
...
paddle/pybind/pybind.cc
浏览文件 @
d2a70243
...
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
...
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include "paddle/pybind/protobuf.h"
#include "paddle/pybind/protobuf.h"
#include "pybind11/iostream.h"
#include <mutex> // for call_once
#include <mutex> // for call_once
#include <unordered_map>
#include <unordered_map>
...
@@ -30,6 +31,7 @@ limitations under the License. */
...
@@ -30,6 +31,7 @@ limitations under the License. */
#include "paddle/operators/net_op.h"
#include "paddle/operators/net_op.h"
#include "paddle/platform/enforce.h"
#include "paddle/platform/enforce.h"
#include "paddle/platform/place.h"
#include "paddle/platform/place.h"
#include "paddle/platform/profiler.h"
#include "paddle/pybind/const_value.h"
#include "paddle/pybind/const_value.h"
#include "paddle/pybind/exception.h"
#include "paddle/pybind/exception.h"
#include "paddle/pybind/pybind.h"
#include "paddle/pybind/pybind.h"
...
@@ -60,8 +62,8 @@ bool IsCompileGPU() {
...
@@ -60,8 +62,8 @@ bool IsCompileGPU() {
#endif
#endif
}
}
PYBIND11_
PLUGIN
(
core
)
{
PYBIND11_
MODULE
(
core
,
m
)
{
py
::
module
m
(
"core"
,
"C++ core of PaddlePaddle"
)
;
m
.
doc
()
=
"C++ core of PaddlePaddle"
;
// using framework in this function. Since it is inside a function, it will
// using framework in this function. Since it is inside a function, it will
// not cause namespace pollution.
// not cause namespace pollution.
...
@@ -481,7 +483,26 @@ All parameter, weight, gradient are variables in Paddle.
...
@@ -481,7 +483,26 @@ All parameter, weight, gradient are variables in Paddle.
m
.
def
(
"nvprof_stop"
,
platform
::
CudaProfilerStop
);
m
.
def
(
"nvprof_stop"
,
platform
::
CudaProfilerStop
);
#endif
#endif
return
m
.
ptr
();
py
::
enum_
<
platform
::
ProfilerState
>
(
m
,
"ProfilerState"
,
py
::
arithmetic
())
.
value
(
"kDisabled"
,
platform
::
ProfilerState
::
kDisabled
)
.
value
(
"kCPU"
,
platform
::
ProfilerState
::
kCPU
)
.
value
(
"kCUDA"
,
platform
::
ProfilerState
::
kCUDA
)
.
export_values
();
py
::
enum_
<
platform
::
EventSortingKey
>
(
m
,
"EventSortingKey"
,
py
::
arithmetic
())
.
value
(
"kDefault"
,
platform
::
EventSortingKey
::
kDefault
)
.
value
(
"kCalls"
,
platform
::
EventSortingKey
::
kCalls
)
.
value
(
"kTotal"
,
platform
::
EventSortingKey
::
kTotal
)
.
value
(
"kMin"
,
platform
::
EventSortingKey
::
kMin
)
.
value
(
"kMax"
,
platform
::
EventSortingKey
::
kMax
)
.
value
(
"kAve"
,
platform
::
EventSortingKey
::
kAve
)
.
export_values
();
m
.
def
(
"enable_profiler"
,
platform
::
EnableProfiler
);
m
.
def
(
"disable_profiler"
,
platform
::
DisableProfiler
);
m
.
def
(
"reset_profiler"
,
platform
::
ResetProfiler
);
py
::
add_ostream_redirect
(
m
,
"ostream_redirect"
);
}
}
}
// namespace pybind
}
// namespace pybind
}
// namespace paddle
}
// namespace paddle
python/paddle/v2/fluid/profiler.py
浏览文件 @
d2a70243
...
@@ -49,3 +49,48 @@ def cuda_profiler(output_file, output_mode=None, config=None):
...
@@ -49,3 +49,48 @@ def cuda_profiler(output_file, output_mode=None, config=None):
# Disables profiler collection.
# Disables profiler collection.
core
.
nvprof_stop
()
core
.
nvprof_stop
()
os
.
remove
(
config_file
)
os
.
remove
(
config_file
)
def
reset_profiler
():
core
.
reset_profiler
()
@
contextmanager
def
profiler
(
state
,
sorted_key
=
None
):
"""The profiler interface.
Different from cuda_profiler, this fuction can be used to profile both CPU
and GPU program.
Args:
state (string) : The profiler state, It should be 'CPU' or 'GPU'.
sorted_key (string) : If None, the profiler results will be printed
without sorting. Otherwise, the profiler results will be sorted
by the this flag. This flag should be one of 'calls', 'total',
'max', 'min' or 'ave'.
The `calls` means sorting by the calling counter.
The `total` means sorting by the total execution time.
The `max` means sorting by the maximum execution time.
The `min` means sorting by the minimum execution time.
The `ave` means sorting by the average execution time.
"""
if
state
not
in
[
'CPU'
,
'GPU'
]:
raise
ValueError
(
"The state must be 'CPU' or 'GPU'."
)
prof_state
=
core
.
ProfilerState
.
kCUDA
if
state
==
"GPU"
else
core
.
ProfilerState
.
kCPU
core
.
enable_profiler
(
prof_state
)
yield
if
sorted_key
not
in
[
'calls'
,
'total'
,
'max'
,
'min'
,
'ave'
]:
raise
ValueError
(
"The state must be in 'calls', 'total', "
"'max', 'min', 'ave'"
)
sorted_key
=
'default'
if
sorted_key
is
None
else
sorted_key
key_map
=
{
'default'
:
core
.
EventSortingKey
.
kDefault
,
'calls'
:
core
.
EventSortingKey
.
kCalls
,
'total'
:
core
.
EventSortingKey
.
kTotal
,
'max'
:
core
.
EventSortingKey
.
kMax
,
'min'
:
core
.
EventSortingKey
.
kMin
,
'ave'
:
core
.
EventSortingKey
.
kAve
,
}
with
core
.
ostream_redirect
(
stdout
=
True
,
stderr
=
True
):
core
.
disable_profiler
(
key_map
[
sorted_key
])
python/paddle/v2/fluid/tests/test_profiler.py
浏览文件 @
d2a70243
import
unittest
import
unittest
import
os
import
numpy
as
np
import
numpy
as
np
import
paddle.v2.fluid
as
fluid
import
paddle.v2.fluid
as
fluid
import
paddle.v2.fluid.profiler
as
profiler
import
paddle.v2.fluid.profiler
as
profiler
import
paddle.v2.fluid.layers
as
layers
import
paddle.v2.fluid.layers
as
layers
import
os
import
paddle.v2.fluid.core
as
core
class
TestProfiler
(
unittest
.
TestCase
):
class
TestProfiler
(
unittest
.
TestCase
):
...
@@ -26,6 +27,40 @@ class TestProfiler(unittest.TestCase):
...
@@ -26,6 +27,40 @@ class TestProfiler(unittest.TestCase):
exe
.
run
(
fluid
.
default_main_program
(),
feed
=
{
'data'
:
input
})
exe
.
run
(
fluid
.
default_main_program
(),
feed
=
{
'data'
:
input
})
os
.
remove
(
output_file
)
os
.
remove
(
output_file
)
def
test_profiler
(
self
):
image
=
fluid
.
layers
.
data
(
name
=
'x'
,
shape
=
[
784
],
dtype
=
'float32'
)
hidden1
=
fluid
.
layers
.
fc
(
input
=
image
,
size
=
128
,
act
=
'relu'
)
hidden2
=
fluid
.
layers
.
fc
(
input
=
hidden1
,
size
=
64
,
act
=
'relu'
)
predict
=
fluid
.
layers
.
fc
(
input
=
hidden2
,
size
=
10
,
act
=
'softmax'
)
label
=
fluid
.
layers
.
data
(
name
=
'y'
,
shape
=
[
1
],
dtype
=
'int64'
)
cost
=
fluid
.
layers
.
cross_entropy
(
input
=
predict
,
label
=
label
)
avg_cost
=
fluid
.
layers
.
mean
(
x
=
cost
)
optimizer
=
fluid
.
optimizer
.
Momentum
(
learning_rate
=
0.001
,
momentum
=
0.9
)
opts
=
optimizer
.
minimize
(
avg_cost
)
accuracy
=
fluid
.
evaluator
.
Accuracy
(
input
=
predict
,
label
=
label
)
states
=
[
'CPU'
,
'GPU'
]
if
core
.
is_compile_gpu
()
else
[
'CPU'
]
for
state
in
states
:
place
=
fluid
.
CPUPlace
()
if
state
==
'CPU'
else
fluid
.
CUDAPlace
(
0
)
exe
=
fluid
.
Executor
(
place
)
exe
.
run
(
fluid
.
default_startup_program
())
accuracy
.
reset
(
exe
)
with
profiler
.
profiler
(
state
,
'total'
)
as
prof
:
for
iter
in
range
(
10
):
if
iter
==
2
:
profiler
.
reset_profiler
()
x
=
np
.
random
.
random
((
32
,
784
)).
astype
(
"float32"
)
y
=
np
.
random
.
randint
(
0
,
10
,
(
32
,
1
)).
astype
(
"int64"
)
outs
=
exe
.
run
(
fluid
.
default_main_program
(),
feed
=
{
'x'
:
x
,
'y'
:
y
},
fetch_list
=
[
avg_cost
]
+
accuracy
.
metrics
)
acc
=
np
.
array
(
outs
[
1
])
pass_acc
=
accuracy
.
eval
(
exe
)
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
unittest
.
main
()
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录