Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
52d43ca2
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
52d43ca2
编写于
6月 30, 2022
作者:
C
chenjian
提交者:
GitHub
6月 30, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add statistic code for memory (#43960)
* add code * add unit test
上级
35ca3009
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
472 addition
and
37 deletion
+472
-37
paddle/fluid/platform/profiler.cc
paddle/fluid/platform/profiler.cc
+269
-36
paddle/fluid/platform/profiler/mem_tracing.h
paddle/fluid/platform/profiler/mem_tracing.h
+12
-0
python/paddle/fluid/tests/unittests/test_profiler_statistic.py
...n/paddle/fluid/tests/unittests/test_profiler_statistic.py
+39
-0
python/paddle/profiler/profiler_statistic.py
python/paddle/profiler/profiler_statistic.py
+152
-1
未找到文件。
paddle/fluid/platform/profiler.cc
浏览文件 @
52d43ca2
...
...
@@ -308,6 +308,10 @@ RecordOpInfoSupplement::RecordOpInfoSupplement(
PosixInNsec
(),
type
,
input_shapes
,
dtypes
,
callstack
);
}
std
::
map
<
const
char
*
,
std
::
map
<
uint64_t
,
std
::
vector
<
uint64_t
>>>
RecordMemEvent
::
size_cache
;
std
::
map
<
const
char
*
,
std
::
map
<
uint64_t
,
bool
>>
RecordMemEvent
::
has_initialized
;
RecordMemEvent
::
RecordMemEvent
(
const
void
*
ptr
,
const
phi
::
Place
&
place
,
size_t
size
,
...
...
@@ -323,17 +327,75 @@ RecordMemEvent::RecordMemEvent(const void *ptr,
uint64_t
peak_reserved
=
0
;
// 0 means keep the same as before
if
(
platform
::
is_cpu_place
(
place
)
||
platform
::
is_cuda_pinned_place
(
place
))
{
current_allocated
=
HOST_MEMORY_STAT_CURRENT_VALUE
(
Allocated
,
place
.
GetDeviceId
());
peak_allocated
=
HOST_MEMORY_STAT_PEAK_VALUE
(
Allocated
,
place
.
GetDeviceId
());
if
(
RecordMemEvent
::
has_initialized
[
"cpu"
][
place
.
GetDeviceId
()]
==
false
)
{
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()].
push_back
(
HOST_MEMORY_STAT_CURRENT_VALUE
(
Allocated
,
place
.
GetDeviceId
()));
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()].
push_back
(
HOST_MEMORY_STAT_CURRENT_VALUE
(
Reserved
,
place
.
GetDeviceId
()));
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()].
push_back
(
HOST_MEMORY_STAT_PEAK_VALUE
(
Allocated
,
place
.
GetDeviceId
()));
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()].
push_back
(
HOST_MEMORY_STAT_PEAK_VALUE
(
Reserved
,
place
.
GetDeviceId
()));
current_allocated
=
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()][
0
];
current_reserved
=
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()][
1
];
peak_allocated
=
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()][
2
];
peak_reserved
=
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()][
3
];
RecordMemEvent
::
has_initialized
[
"cpu"
][
place
.
GetDeviceId
()]
=
true
;
}
else
{
current_allocated
=
HOST_MEMORY_STAT_CURRENT_VALUE
(
Allocated
,
place
.
GetDeviceId
());
peak_allocated
=
HOST_MEMORY_STAT_PEAK_VALUE
(
Allocated
,
place
.
GetDeviceId
());
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()][
0
]
=
current_allocated
;
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()][
2
]
=
peak_allocated
;
current_reserved
=
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()][
1
];
peak_reserved
=
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()][
3
];
}
}
else
{
current_allocated
=
DEVICE_MEMORY_STAT_CURRENT_VALUE
(
Allocated
,
place
.
GetDeviceId
());
peak_allocated
=
DEVICE_MEMORY_STAT_PEAK_VALUE
(
Allocated
,
place
.
GetDeviceId
());
if
(
RecordMemEvent
::
has_initialized
[
"gpu"
][
place
.
GetDeviceId
()]
==
false
)
{
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()].
push_back
(
DEVICE_MEMORY_STAT_CURRENT_VALUE
(
Allocated
,
place
.
GetDeviceId
()));
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()].
push_back
(
DEVICE_MEMORY_STAT_CURRENT_VALUE
(
Reserved
,
place
.
GetDeviceId
()));
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()].
push_back
(
DEVICE_MEMORY_STAT_PEAK_VALUE
(
Allocated
,
place
.
GetDeviceId
()));
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()].
push_back
(
DEVICE_MEMORY_STAT_PEAK_VALUE
(
Reserved
,
place
.
GetDeviceId
()));
current_allocated
=
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()][
0
];
current_reserved
=
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()][
1
];
peak_allocated
=
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()][
2
];
peak_reserved
=
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()][
3
];
RecordMemEvent
::
has_initialized
[
"gpu"
][
place
.
GetDeviceId
()]
=
true
;
}
else
{
current_allocated
=
DEVICE_MEMORY_STAT_CURRENT_VALUE
(
Allocated
,
place
.
GetDeviceId
());
peak_allocated
=
DEVICE_MEMORY_STAT_PEAK_VALUE
(
Allocated
,
place
.
GetDeviceId
());
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()][
0
]
=
current_allocated
;
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()][
2
]
=
peak_allocated
;
current_reserved
=
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()][
1
];
peak_reserved
=
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()][
3
];
}
}
platform
::
MemEvenRecorder
::
Instance
().
PushMemRecord
(
ptr
,
place
,
size
,
...
...
@@ -349,17 +411,74 @@ RecordMemEvent::RecordMemEvent(const void *ptr,
uint64_t
peak_allocated
=
0
;
// 0 means keep the same as before
if
(
platform
::
is_cpu_place
(
place
)
||
platform
::
is_cuda_pinned_place
(
place
))
{
current_reserved
=
HOST_MEMORY_STAT_CURRENT_VALUE
(
Reserved
,
place
.
GetDeviceId
());
peak_reserved
=
HOST_MEMORY_STAT_PEAK_VALUE
(
Reserved
,
place
.
GetDeviceId
());
if
(
RecordMemEvent
::
has_initialized
[
"cpu"
][
place
.
GetDeviceId
()]
==
false
)
{
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()].
push_back
(
HOST_MEMORY_STAT_CURRENT_VALUE
(
Allocated
,
place
.
GetDeviceId
()));
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()].
push_back
(
HOST_MEMORY_STAT_CURRENT_VALUE
(
Reserved
,
place
.
GetDeviceId
()));
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()].
push_back
(
HOST_MEMORY_STAT_PEAK_VALUE
(
Allocated
,
place
.
GetDeviceId
()));
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()].
push_back
(
HOST_MEMORY_STAT_PEAK_VALUE
(
Reserved
,
place
.
GetDeviceId
()));
current_allocated
=
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()][
0
];
current_reserved
=
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()][
1
];
peak_allocated
=
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()][
2
];
peak_reserved
=
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()][
3
];
RecordMemEvent
::
has_initialized
[
"cpu"
][
place
.
GetDeviceId
()]
=
true
;
}
else
{
current_reserved
=
HOST_MEMORY_STAT_CURRENT_VALUE
(
Reserved
,
place
.
GetDeviceId
());
peak_reserved
=
HOST_MEMORY_STAT_PEAK_VALUE
(
Reserved
,
place
.
GetDeviceId
());
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()][
1
]
=
current_reserved
;
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()][
3
]
=
peak_reserved
;
current_allocated
=
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()][
0
];
peak_allocated
=
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()][
2
];
}
}
else
{
current_reserved
=
DEVICE_MEMORY_STAT_CURRENT_VALUE
(
Reserved
,
place
.
GetDeviceId
());
peak_reserved
=
DEVICE_MEMORY_STAT_PEAK_VALUE
(
Reserved
,
place
.
GetDeviceId
());
if
(
RecordMemEvent
::
has_initialized
[
"gpu"
][
place
.
GetDeviceId
()]
==
false
)
{
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()].
push_back
(
DEVICE_MEMORY_STAT_CURRENT_VALUE
(
Allocated
,
place
.
GetDeviceId
()));
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()].
push_back
(
DEVICE_MEMORY_STAT_CURRENT_VALUE
(
Reserved
,
place
.
GetDeviceId
()));
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()].
push_back
(
DEVICE_MEMORY_STAT_PEAK_VALUE
(
Allocated
,
place
.
GetDeviceId
()));
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()].
push_back
(
DEVICE_MEMORY_STAT_PEAK_VALUE
(
Reserved
,
place
.
GetDeviceId
()));
current_allocated
=
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()][
0
];
current_reserved
=
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()][
1
];
peak_allocated
=
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()][
2
];
peak_reserved
=
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()][
3
];
RecordMemEvent
::
has_initialized
[
"gpu"
][
place
.
GetDeviceId
()]
=
true
;
}
else
{
current_reserved
=
DEVICE_MEMORY_STAT_CURRENT_VALUE
(
Reserved
,
place
.
GetDeviceId
());
peak_reserved
=
DEVICE_MEMORY_STAT_PEAK_VALUE
(
Reserved
,
place
.
GetDeviceId
());
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()][
1
]
=
current_reserved
;
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()][
3
]
=
peak_reserved
;
current_allocated
=
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()][
0
];
peak_allocated
=
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()][
2
];
}
}
platform
::
MemEvenRecorder
::
Instance
().
PushMemRecord
(
ptr
,
place
,
size
,
...
...
@@ -375,17 +494,74 @@ RecordMemEvent::RecordMemEvent(const void *ptr,
uint64_t
peak_reserved
=
0
;
// 0 means keep the same as before
if
(
platform
::
is_cpu_place
(
place
)
||
platform
::
is_cuda_pinned_place
(
place
))
{
current_allocated
=
HOST_MEMORY_STAT_CURRENT_VALUE
(
Allocated
,
place
.
GetDeviceId
());
peak_allocated
=
HOST_MEMORY_STAT_PEAK_VALUE
(
Allocated
,
place
.
GetDeviceId
());
if
(
RecordMemEvent
::
has_initialized
[
"cpu"
][
place
.
GetDeviceId
()]
==
false
)
{
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()].
push_back
(
HOST_MEMORY_STAT_CURRENT_VALUE
(
Allocated
,
place
.
GetDeviceId
()));
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()].
push_back
(
HOST_MEMORY_STAT_CURRENT_VALUE
(
Reserved
,
place
.
GetDeviceId
()));
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()].
push_back
(
HOST_MEMORY_STAT_PEAK_VALUE
(
Allocated
,
place
.
GetDeviceId
()));
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()].
push_back
(
HOST_MEMORY_STAT_PEAK_VALUE
(
Reserved
,
place
.
GetDeviceId
()));
current_allocated
=
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()][
0
];
current_reserved
=
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()][
1
];
peak_allocated
=
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()][
2
];
peak_reserved
=
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()][
3
];
RecordMemEvent
::
has_initialized
[
"cpu"
][
place
.
GetDeviceId
()]
=
true
;
}
else
{
current_allocated
=
HOST_MEMORY_STAT_CURRENT_VALUE
(
Allocated
,
place
.
GetDeviceId
());
peak_allocated
=
HOST_MEMORY_STAT_PEAK_VALUE
(
Allocated
,
place
.
GetDeviceId
());
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()][
0
]
=
current_allocated
;
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()][
2
]
=
peak_allocated
;
current_reserved
=
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()][
1
];
peak_reserved
=
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()][
3
];
}
}
else
{
current_allocated
=
DEVICE_MEMORY_STAT_CURRENT_VALUE
(
Allocated
,
place
.
GetDeviceId
());
peak_allocated
=
DEVICE_MEMORY_STAT_PEAK_VALUE
(
Allocated
,
place
.
GetDeviceId
());
if
(
RecordMemEvent
::
has_initialized
[
"gpu"
][
place
.
GetDeviceId
()]
==
false
)
{
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()].
push_back
(
DEVICE_MEMORY_STAT_CURRENT_VALUE
(
Allocated
,
place
.
GetDeviceId
()));
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()].
push_back
(
DEVICE_MEMORY_STAT_CURRENT_VALUE
(
Reserved
,
place
.
GetDeviceId
()));
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()].
push_back
(
DEVICE_MEMORY_STAT_PEAK_VALUE
(
Allocated
,
place
.
GetDeviceId
()));
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()].
push_back
(
DEVICE_MEMORY_STAT_PEAK_VALUE
(
Reserved
,
place
.
GetDeviceId
()));
current_allocated
=
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()][
0
];
current_reserved
=
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()][
1
];
peak_allocated
=
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()][
2
];
peak_reserved
=
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()][
3
];
RecordMemEvent
::
has_initialized
[
"gpu"
][
place
.
GetDeviceId
()]
=
true
;
}
else
{
current_allocated
=
DEVICE_MEMORY_STAT_CURRENT_VALUE
(
Allocated
,
place
.
GetDeviceId
());
peak_allocated
=
DEVICE_MEMORY_STAT_PEAK_VALUE
(
Allocated
,
place
.
GetDeviceId
());
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()][
0
]
=
current_allocated
;
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()][
2
]
=
peak_allocated
;
current_reserved
=
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()][
1
];
peak_reserved
=
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()][
3
];
}
}
platform
::
MemEvenRecorder
::
Instance
().
PopMemRecord
(
ptr
,
place
,
size
,
...
...
@@ -401,17 +577,74 @@ RecordMemEvent::RecordMemEvent(const void *ptr,
uint64_t
peak_allocated
=
0
;
// 0 means keep the same as before
if
(
platform
::
is_cpu_place
(
place
)
||
platform
::
is_cuda_pinned_place
(
place
))
{
current_reserved
=
HOST_MEMORY_STAT_CURRENT_VALUE
(
Reserved
,
place
.
GetDeviceId
());
peak_reserved
=
HOST_MEMORY_STAT_PEAK_VALUE
(
Reserved
,
place
.
GetDeviceId
());
if
(
RecordMemEvent
::
has_initialized
[
"cpu"
][
place
.
GetDeviceId
()]
==
false
)
{
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()].
push_back
(
HOST_MEMORY_STAT_CURRENT_VALUE
(
Allocated
,
place
.
GetDeviceId
()));
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()].
push_back
(
HOST_MEMORY_STAT_CURRENT_VALUE
(
Reserved
,
place
.
GetDeviceId
()));
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()].
push_back
(
HOST_MEMORY_STAT_PEAK_VALUE
(
Allocated
,
place
.
GetDeviceId
()));
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()].
push_back
(
HOST_MEMORY_STAT_PEAK_VALUE
(
Reserved
,
place
.
GetDeviceId
()));
current_allocated
=
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()][
0
];
current_reserved
=
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()][
1
];
peak_allocated
=
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()][
2
];
peak_reserved
=
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()][
3
];
RecordMemEvent
::
has_initialized
[
"cpu"
][
place
.
GetDeviceId
()]
=
true
;
}
else
{
current_reserved
=
HOST_MEMORY_STAT_CURRENT_VALUE
(
Reserved
,
place
.
GetDeviceId
());
peak_reserved
=
HOST_MEMORY_STAT_PEAK_VALUE
(
Reserved
,
place
.
GetDeviceId
());
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()][
1
]
=
current_reserved
;
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()][
3
]
=
peak_reserved
;
current_allocated
=
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()][
0
];
peak_allocated
=
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()][
2
];
}
}
else
{
current_reserved
=
DEVICE_MEMORY_STAT_CURRENT_VALUE
(
Reserved
,
place
.
GetDeviceId
());
peak_reserved
=
DEVICE_MEMORY_STAT_PEAK_VALUE
(
Reserved
,
place
.
GetDeviceId
());
if
(
RecordMemEvent
::
has_initialized
[
"gpu"
][
place
.
GetDeviceId
()]
==
false
)
{
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()].
push_back
(
DEVICE_MEMORY_STAT_CURRENT_VALUE
(
Allocated
,
place
.
GetDeviceId
()));
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()].
push_back
(
DEVICE_MEMORY_STAT_CURRENT_VALUE
(
Reserved
,
place
.
GetDeviceId
()));
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()].
push_back
(
DEVICE_MEMORY_STAT_PEAK_VALUE
(
Allocated
,
place
.
GetDeviceId
()));
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()].
push_back
(
DEVICE_MEMORY_STAT_PEAK_VALUE
(
Reserved
,
place
.
GetDeviceId
()));
current_allocated
=
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()][
0
];
current_reserved
=
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()][
1
];
peak_allocated
=
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()][
2
];
peak_reserved
=
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()][
3
];
RecordMemEvent
::
has_initialized
[
"gpu"
][
place
.
GetDeviceId
()]
=
true
;
}
else
{
current_reserved
=
DEVICE_MEMORY_STAT_CURRENT_VALUE
(
Reserved
,
place
.
GetDeviceId
());
peak_reserved
=
DEVICE_MEMORY_STAT_PEAK_VALUE
(
Reserved
,
place
.
GetDeviceId
());
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()][
1
]
=
current_reserved
;
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()][
3
]
=
peak_reserved
;
current_allocated
=
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()][
0
];
peak_allocated
=
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()][
2
];
}
}
platform
::
MemEvenRecorder
::
Instance
().
PopMemRecord
(
ptr
,
place
,
size
,
...
...
paddle/fluid/platform/profiler/mem_tracing.h
浏览文件 @
52d43ca2
...
...
@@ -14,6 +14,7 @@ limitations under the License. */
#pragma once
#include <map>
#include <string>
#include "paddle/fluid/platform/place.h"
...
...
@@ -37,6 +38,17 @@ class RecordMemEvent {
const
Place
&
place
,
size_t
size
,
const
TracerMemEventType
type
=
TracerMemEventType
::
Allocate
);
// size_cache: In the outer map, key is device type, 'cpu' or 'gpu', and in
// the inner map, key is device ip.
// Values record memory sizes for current_allocated, current_reserved,
// peak_allocated and peak_reserved.
// has_initialized: Flags to denote whether memory cache for some device has
// collected once.
static
std
::
map
<
const
char
*
,
std
::
map
<
uint64_t
,
std
::
vector
<
uint64_t
>>>
size_cache
;
static
std
::
map
<
const
char
*
,
std
::
map
<
uint64_t
,
bool
>>
has_initialized
;
};
}
// namespace platform
...
...
python/paddle/fluid/tests/unittests/test_profiler_statistic.py
浏览文件 @
52d43ca2
...
...
@@ -16,6 +16,7 @@ import unittest
import
paddle
import
paddle.profiler
as
profiler
import
paddle.profiler.profiler_statistic
as
profiler_statistic
class
HostPythonNode
:
...
...
@@ -30,6 +31,7 @@ class HostPythonNode:
self
.
children_node
=
[]
self
.
runtime_node
=
[]
self
.
device_node
=
[]
self
.
mem_node
=
[]
class
DevicePythonNode
:
...
...
@@ -45,6 +47,22 @@ class DevicePythonNode:
self
.
stream_id
=
stream_id
class
MemPythonNode
:
def
__init__
(
self
,
timestamp_ns
,
addr
,
type
,
process_id
,
thread_id
,
increase_bytes
,
place
,
current_allocated
,
\
current_reserved
,
peak_allocated
,
peak_reserved
):
self
.
timestamp_ns
=
timestamp_ns
self
.
addr
=
addr
self
.
type
=
type
self
.
process_id
=
process_id
self
.
thread_id
=
thread_id
self
.
increase_bytes
=
increase_bytes
self
.
place
=
place
self
.
current_allocated
=
current_allocated
self
.
current_reserved
=
current_reserved
self
.
peak_allocated
=
peak_allocated
self
.
peak_reserved
=
peak_reserved
class
TestProfilerStatistic
(
unittest
.
TestCase
):
def
test_statistic_case1
(
self
):
...
...
@@ -89,6 +107,9 @@ class TestProfilerStatistic(unittest.TestCase):
conv2d_compute
=
HostPythonNode
(
'conv2d::compute'
,
profiler
.
TracerEventType
.
OperatorInner
,
30
,
40
,
1000
,
1001
)
conv2d_compute
.
mem_node
.
append
(
MemPythonNode
(
33
,
0
,
profiler_statistic
.
TracerMemEventType
.
Allocate
,
1000
,
1001
,
20
,
'place(gpu:0)'
,
200
,
200
,
800
,
800
))
conv2d_launchkernel
=
HostPythonNode
(
'cudalaunchkernel'
,
profiler
.
TracerEventType
.
CudaRuntime
,
30
,
35
,
1000
,
1001
)
...
...
@@ -211,6 +232,24 @@ class TestProfilerStatistic(unittest.TestCase):
self
.
assertEqual
(
event_summary
.
memory_manipulation_items
[
'AsyncMemcpy'
].
general_gpu_time
,
60
)
self
.
assertEqual
(
statistic_data
.
memory_summary
.
allocated_items
[
'place(gpu:0)'
]
[
'conv2d'
].
allocation_count
,
1
)
self
.
assertEqual
(
statistic_data
.
memory_summary
.
allocated_items
[
'place(gpu:0)'
]
[
'conv2d'
].
allocation_size
,
20
)
self
.
assertEqual
(
statistic_data
.
memory_summary
.
allocated_items
[
'place(gpu:0)'
]
[
'conv2d'
].
increase_size
,
20
)
self
.
assertEqual
(
statistic_data
.
memory_summary
.
allocated_items
[
'place(gpu:0)'
]
[
'conv2d'
].
increase_size
,
20
)
self
.
assertEqual
(
statistic_data
.
memory_summary
.
peak_allocation_values
[
'place(gpu:0)'
],
800
)
self
.
assertEqual
(
statistic_data
.
memory_summary
.
peak_reserved_values
[
'place(gpu:0)'
],
800
)
print
(
profiler
.
profiler_statistic
.
_build_table
(
statistic_data
,
...
...
python/paddle/profiler/profiler_statistic.py
浏览文件 @
52d43ca2
...
...
@@ -15,7 +15,7 @@ import collections
from
enum
import
Enum
import
re
from
paddle.fluid.core
import
TracerEventType
from
paddle.fluid.core
import
TracerEventType
,
TracerMemEventType
from
.statistic_helper
import
*
...
...
@@ -603,6 +603,83 @@ class EventSummary:
self
.
kernel_items
[
name
].
add_item
(
device_node
)
class
MemorySummary
:
r
"""
Analyse memory events in profiling data.
"""
class
MemoryItem
:
def
__init__
(
self
,
event_name
,
place
,
memory_type
=
'Allocated'
):
self
.
event_name
=
event_name
self
.
place
=
place
self
.
allocation_count
=
0
self
.
free_count
=
0
self
.
allocation_size
=
0
self
.
free_size
=
0
self
.
increase_size
=
0
self
.
memory_type
=
memory_type
def
add_memory_record
(
self
,
size
,
allocation_type
):
if
allocation_type
==
TracerMemEventType
.
Allocate
or
allocation_type
==
TracerMemEventType
.
ReservedAllocate
:
self
.
allocation_count
+=
1
self
.
allocation_size
+=
size
elif
allocation_type
==
TracerMemEventType
.
Free
or
allocation_type
==
TracerMemEventType
.
ReservedFree
:
self
.
free_count
+=
1
self
.
free_size
-=
size
# size is sign(-) when free.
else
:
print
(
"No corresponding type."
)
self
.
increase_size
=
self
.
allocation_size
-
self
.
free_size
def
__init__
(
self
):
self
.
allocated_items
=
collections
.
defaultdict
(
dict
)
# for memory summary, device type: event
self
.
reserved_items
=
collections
.
defaultdict
(
dict
)
# for memory summary, device type: event
self
.
peak_allocation_values
=
collections
.
defaultdict
(
int
)
self
.
peak_reserved_values
=
collections
.
defaultdict
(
int
)
def
_analyse_node_memory
(
self
,
event_name
,
node
):
for
memnode
in
node
.
mem_node
:
# self mem node
if
memnode
.
type
==
TracerMemEventType
.
Allocate
or
memnode
.
type
==
TracerMemEventType
.
Free
:
if
event_name
not
in
self
.
allocated_items
[
memnode
.
place
]:
self
.
allocated_items
[
memnode
.
place
][
event_name
]
=
MemorySummary
.
MemoryItem
(
event_name
,
memnode
.
place
,
'Allocated'
)
self
.
allocated_items
[
memnode
.
place
][
event_name
].
add_memory_record
(
memnode
.
increase_bytes
,
memnode
.
type
)
elif
memnode
.
type
==
TracerMemEventType
.
ReservedAllocate
or
memnode
.
type
==
TracerMemEventType
.
ReservedFree
:
if
event_name
not
in
self
.
reserved_items
[
memnode
.
place
]:
self
.
reserved_items
[
memnode
.
place
][
event_name
]
=
MemorySummary
.
MemoryItem
(
event_name
,
memnode
.
place
,
'Reserved'
)
self
.
reserved_items
[
memnode
.
place
][
event_name
].
add_memory_record
(
memnode
.
increase_bytes
,
memnode
.
type
)
self
.
peak_allocation_values
[
memnode
.
place
]
=
max
(
self
.
peak_allocation_values
[
memnode
.
place
],
memnode
.
peak_allocated
)
self
.
peak_reserved_values
[
memnode
.
place
]
=
max
(
self
.
peak_reserved_values
[
memnode
.
place
],
memnode
.
peak_reserved
)
def
parse
(
self
,
nodetrees
):
r
"""
Analyse memory event in the nodetress.
"""
thread2hostnodes
=
traverse_tree
(
nodetrees
)
for
threadid
,
host_nodes
in
thread2hostnodes
.
items
():
for
host_node
in
host_nodes
[
1
:]:
#skip root node
if
host_node
.
type
==
TracerEventType
.
OperatorInner
:
continue
if
host_node
.
type
==
TracerEventType
.
Operator
:
for
child
in
host_node
.
children_node
:
self
.
_analyse_node_memory
(
host_node
.
name
,
child
)
self
.
_analyse_node_memory
(
host_node
.
name
,
host_node
)
class
StatisticData
:
r
"""
Hold all analysed results.
...
...
@@ -614,9 +691,11 @@ class StatisticData:
self
.
time_range_summary
=
TimeRangeSummary
()
self
.
event_summary
=
EventSummary
()
self
.
distributed_summary
=
DistributedSummary
()
self
.
memory_summary
=
MemorySummary
()
self
.
time_range_summary
.
parse
(
node_trees
)
self
.
event_summary
.
parse
(
node_trees
)
self
.
distributed_summary
.
parse
(
node_trees
)
self
.
memory_summary
.
parse
(
node_trees
)
def
_build_table
(
statistic_data
,
...
...
@@ -1498,4 +1577,76 @@ def _build_table(statistic_data,
append
(
''
)
append
(
''
)
###### Print Memory Summary Report ######
if
statistic_data
.
memory_summary
.
allocated_items
or
statistic_data
.
memory_summary
.
reserved_items
:
for
device_type
,
memory_events
in
statistic_data
.
memory_summary
.
allocated_items
.
items
(
):
all_row_values
=
[]
sorted_items
=
sorted
(
memory_events
.
items
(),
key
=
lambda
x
:
x
[
1
].
increase_size
,
reverse
=
True
)
for
event_name
,
item
in
sorted_items
:
row_values
=
[
event_name
,
item
.
memory_type
,
item
.
allocation_count
,
item
.
free_count
,
item
.
allocation_size
,
item
.
free_size
,
item
.
increase_size
]
all_row_values
.
append
(
row_values
)
sorted_reserved_items
=
sorted
(
statistic_data
.
memory_summary
.
reserved_items
[
device_type
].
items
(),
key
=
lambda
x
:
x
[
1
].
increase_size
,
reverse
=
True
)
for
event_name
,
item
in
sorted_reserved_items
:
row_values
=
[
event_name
,
item
.
memory_type
,
item
.
allocation_count
,
item
.
free_count
,
item
.
allocation_size
,
item
.
free_size
,
item
.
increase_size
]
all_row_values
.
append
(
row_values
)
# Calculate the column width
headers
=
[
'Name'
,
'Type'
,
'Allocation Count'
,
'Free Count'
,
'Allocation Size'
,
'Free Size'
,
'Increased Size'
]
row_format_list
=
[
""
]
header_sep_list
=
[
""
]
line_length_list
=
[
-
SPACING_SIZE
]
name_column_width
=
50
number_column_width
=
15
add_column
(
name_column_width
)
add_column
(
12
)
add_column
(
number_column_width
)
add_column
(
number_column_width
)
add_column
(
number_column_width
)
add_column
(
number_column_width
)
add_column
(
number_column_width
)
row_format
=
row_format_list
[
0
]
header_sep
=
header_sep_list
[
0
]
line_length
=
line_length_list
[
0
]
# construct table string
append
(
add_title
(
line_length
,
"Memory Summary - {}"
.
format
(
device_type
)))
append
(
'Peak Allocated Memory: {}'
.
format
(
statistic_data
.
memory_summary
.
peak_allocation_values
[
device_type
]))
append
(
'Peak Reserved Memory: {}'
.
format
(
statistic_data
.
memory_summary
.
peak_reserved_values
[
device_type
])
)
append
(
header_sep
)
append
(
row_format
.
format
(
*
headers
))
append
(
header_sep
)
for
row_values
in
all_row_values
:
if
isinstance
(
row_values
,
str
):
append
(
add_title
(
line_length
,
row_values
))
else
:
append
(
row_format
.
format
(
*
row_values
))
append
(
''
)
append
(
''
)
return
''
.
join
(
result
)
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录