Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
52d43ca2
P
Paddle
项目概览
PaddlePaddle
/
Paddle
1 年多 前同步成功
通知
2305
Star
20932
Fork
5423
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
52d43ca2
编写于
6月 30, 2022
作者:
C
chenjian
提交者:
GitHub
6月 30, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add statistic code for memory (#43960)
* add code * add unit test
上级
35ca3009
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
472 addition
and
37 deletion
+472
-37
paddle/fluid/platform/profiler.cc
paddle/fluid/platform/profiler.cc
+269
-36
paddle/fluid/platform/profiler/mem_tracing.h
paddle/fluid/platform/profiler/mem_tracing.h
+12
-0
python/paddle/fluid/tests/unittests/test_profiler_statistic.py
...n/paddle/fluid/tests/unittests/test_profiler_statistic.py
+39
-0
python/paddle/profiler/profiler_statistic.py
python/paddle/profiler/profiler_statistic.py
+152
-1
未找到文件。
paddle/fluid/platform/profiler.cc
浏览文件 @
52d43ca2
...
...
@@ -308,6 +308,10 @@ RecordOpInfoSupplement::RecordOpInfoSupplement(
PosixInNsec
(),
type
,
input_shapes
,
dtypes
,
callstack
);
}
std
::
map
<
const
char
*
,
std
::
map
<
uint64_t
,
std
::
vector
<
uint64_t
>>>
RecordMemEvent
::
size_cache
;
std
::
map
<
const
char
*
,
std
::
map
<
uint64_t
,
bool
>>
RecordMemEvent
::
has_initialized
;
RecordMemEvent
::
RecordMemEvent
(
const
void
*
ptr
,
const
phi
::
Place
&
place
,
size_t
size
,
...
...
@@ -323,17 +327,75 @@ RecordMemEvent::RecordMemEvent(const void *ptr,
uint64_t
peak_reserved
=
0
;
// 0 means keep the same as before
if
(
platform
::
is_cpu_place
(
place
)
||
platform
::
is_cuda_pinned_place
(
place
))
{
current_allocated
=
HOST_MEMORY_STAT_CURRENT_VALUE
(
Allocated
,
place
.
GetDeviceId
());
peak_allocated
=
HOST_MEMORY_STAT_PEAK_VALUE
(
Allocated
,
place
.
GetDeviceId
());
if
(
RecordMemEvent
::
has_initialized
[
"cpu"
][
place
.
GetDeviceId
()]
==
false
)
{
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()].
push_back
(
HOST_MEMORY_STAT_CURRENT_VALUE
(
Allocated
,
place
.
GetDeviceId
()));
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()].
push_back
(
HOST_MEMORY_STAT_CURRENT_VALUE
(
Reserved
,
place
.
GetDeviceId
()));
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()].
push_back
(
HOST_MEMORY_STAT_PEAK_VALUE
(
Allocated
,
place
.
GetDeviceId
()));
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()].
push_back
(
HOST_MEMORY_STAT_PEAK_VALUE
(
Reserved
,
place
.
GetDeviceId
()));
current_allocated
=
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()][
0
];
current_reserved
=
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()][
1
];
peak_allocated
=
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()][
2
];
peak_reserved
=
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()][
3
];
RecordMemEvent
::
has_initialized
[
"cpu"
][
place
.
GetDeviceId
()]
=
true
;
}
else
{
current_allocated
=
HOST_MEMORY_STAT_CURRENT_VALUE
(
Allocated
,
place
.
GetDeviceId
());
peak_allocated
=
HOST_MEMORY_STAT_PEAK_VALUE
(
Allocated
,
place
.
GetDeviceId
());
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()][
0
]
=
current_allocated
;
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()][
2
]
=
peak_allocated
;
current_reserved
=
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()][
1
];
peak_reserved
=
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()][
3
];
}
}
else
{
current_allocated
=
DEVICE_MEMORY_STAT_CURRENT_VALUE
(
Allocated
,
place
.
GetDeviceId
());
peak_allocated
=
DEVICE_MEMORY_STAT_PEAK_VALUE
(
Allocated
,
place
.
GetDeviceId
());
if
(
RecordMemEvent
::
has_initialized
[
"gpu"
][
place
.
GetDeviceId
()]
==
false
)
{
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()].
push_back
(
DEVICE_MEMORY_STAT_CURRENT_VALUE
(
Allocated
,
place
.
GetDeviceId
()));
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()].
push_back
(
DEVICE_MEMORY_STAT_CURRENT_VALUE
(
Reserved
,
place
.
GetDeviceId
()));
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()].
push_back
(
DEVICE_MEMORY_STAT_PEAK_VALUE
(
Allocated
,
place
.
GetDeviceId
()));
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()].
push_back
(
DEVICE_MEMORY_STAT_PEAK_VALUE
(
Reserved
,
place
.
GetDeviceId
()));
current_allocated
=
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()][
0
];
current_reserved
=
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()][
1
];
peak_allocated
=
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()][
2
];
peak_reserved
=
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()][
3
];
RecordMemEvent
::
has_initialized
[
"gpu"
][
place
.
GetDeviceId
()]
=
true
;
}
else
{
current_allocated
=
DEVICE_MEMORY_STAT_CURRENT_VALUE
(
Allocated
,
place
.
GetDeviceId
());
peak_allocated
=
DEVICE_MEMORY_STAT_PEAK_VALUE
(
Allocated
,
place
.
GetDeviceId
());
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()][
0
]
=
current_allocated
;
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()][
2
]
=
peak_allocated
;
current_reserved
=
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()][
1
];
peak_reserved
=
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()][
3
];
}
}
platform
::
MemEvenRecorder
::
Instance
().
PushMemRecord
(
ptr
,
place
,
size
,
...
...
@@ -349,17 +411,74 @@ RecordMemEvent::RecordMemEvent(const void *ptr,
uint64_t
peak_allocated
=
0
;
// 0 means keep the same as before
if
(
platform
::
is_cpu_place
(
place
)
||
platform
::
is_cuda_pinned_place
(
place
))
{
current_reserved
=
HOST_MEMORY_STAT_CURRENT_VALUE
(
Reserved
,
place
.
GetDeviceId
());
peak_reserved
=
HOST_MEMORY_STAT_PEAK_VALUE
(
Reserved
,
place
.
GetDeviceId
());
if
(
RecordMemEvent
::
has_initialized
[
"cpu"
][
place
.
GetDeviceId
()]
==
false
)
{
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()].
push_back
(
HOST_MEMORY_STAT_CURRENT_VALUE
(
Allocated
,
place
.
GetDeviceId
()));
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()].
push_back
(
HOST_MEMORY_STAT_CURRENT_VALUE
(
Reserved
,
place
.
GetDeviceId
()));
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()].
push_back
(
HOST_MEMORY_STAT_PEAK_VALUE
(
Allocated
,
place
.
GetDeviceId
()));
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()].
push_back
(
HOST_MEMORY_STAT_PEAK_VALUE
(
Reserved
,
place
.
GetDeviceId
()));
current_allocated
=
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()][
0
];
current_reserved
=
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()][
1
];
peak_allocated
=
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()][
2
];
peak_reserved
=
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()][
3
];
RecordMemEvent
::
has_initialized
[
"cpu"
][
place
.
GetDeviceId
()]
=
true
;
}
else
{
current_reserved
=
HOST_MEMORY_STAT_CURRENT_VALUE
(
Reserved
,
place
.
GetDeviceId
());
peak_reserved
=
HOST_MEMORY_STAT_PEAK_VALUE
(
Reserved
,
place
.
GetDeviceId
());
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()][
1
]
=
current_reserved
;
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()][
3
]
=
peak_reserved
;
current_allocated
=
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()][
0
];
peak_allocated
=
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()][
2
];
}
}
else
{
current_reserved
=
DEVICE_MEMORY_STAT_CURRENT_VALUE
(
Reserved
,
place
.
GetDeviceId
());
peak_reserved
=
DEVICE_MEMORY_STAT_PEAK_VALUE
(
Reserved
,
place
.
GetDeviceId
());
if
(
RecordMemEvent
::
has_initialized
[
"gpu"
][
place
.
GetDeviceId
()]
==
false
)
{
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()].
push_back
(
DEVICE_MEMORY_STAT_CURRENT_VALUE
(
Allocated
,
place
.
GetDeviceId
()));
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()].
push_back
(
DEVICE_MEMORY_STAT_CURRENT_VALUE
(
Reserved
,
place
.
GetDeviceId
()));
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()].
push_back
(
DEVICE_MEMORY_STAT_PEAK_VALUE
(
Allocated
,
place
.
GetDeviceId
()));
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()].
push_back
(
DEVICE_MEMORY_STAT_PEAK_VALUE
(
Reserved
,
place
.
GetDeviceId
()));
current_allocated
=
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()][
0
];
current_reserved
=
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()][
1
];
peak_allocated
=
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()][
2
];
peak_reserved
=
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()][
3
];
RecordMemEvent
::
has_initialized
[
"gpu"
][
place
.
GetDeviceId
()]
=
true
;
}
else
{
current_reserved
=
DEVICE_MEMORY_STAT_CURRENT_VALUE
(
Reserved
,
place
.
GetDeviceId
());
peak_reserved
=
DEVICE_MEMORY_STAT_PEAK_VALUE
(
Reserved
,
place
.
GetDeviceId
());
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()][
1
]
=
current_reserved
;
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()][
3
]
=
peak_reserved
;
current_allocated
=
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()][
0
];
peak_allocated
=
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()][
2
];
}
}
platform
::
MemEvenRecorder
::
Instance
().
PushMemRecord
(
ptr
,
place
,
size
,
...
...
@@ -375,17 +494,74 @@ RecordMemEvent::RecordMemEvent(const void *ptr,
uint64_t
peak_reserved
=
0
;
// 0 means keep the same as before
if
(
platform
::
is_cpu_place
(
place
)
||
platform
::
is_cuda_pinned_place
(
place
))
{
current_allocated
=
HOST_MEMORY_STAT_CURRENT_VALUE
(
Allocated
,
place
.
GetDeviceId
());
peak_allocated
=
HOST_MEMORY_STAT_PEAK_VALUE
(
Allocated
,
place
.
GetDeviceId
());
if
(
RecordMemEvent
::
has_initialized
[
"cpu"
][
place
.
GetDeviceId
()]
==
false
)
{
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()].
push_back
(
HOST_MEMORY_STAT_CURRENT_VALUE
(
Allocated
,
place
.
GetDeviceId
()));
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()].
push_back
(
HOST_MEMORY_STAT_CURRENT_VALUE
(
Reserved
,
place
.
GetDeviceId
()));
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()].
push_back
(
HOST_MEMORY_STAT_PEAK_VALUE
(
Allocated
,
place
.
GetDeviceId
()));
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()].
push_back
(
HOST_MEMORY_STAT_PEAK_VALUE
(
Reserved
,
place
.
GetDeviceId
()));
current_allocated
=
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()][
0
];
current_reserved
=
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()][
1
];
peak_allocated
=
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()][
2
];
peak_reserved
=
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()][
3
];
RecordMemEvent
::
has_initialized
[
"cpu"
][
place
.
GetDeviceId
()]
=
true
;
}
else
{
current_allocated
=
HOST_MEMORY_STAT_CURRENT_VALUE
(
Allocated
,
place
.
GetDeviceId
());
peak_allocated
=
HOST_MEMORY_STAT_PEAK_VALUE
(
Allocated
,
place
.
GetDeviceId
());
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()][
0
]
=
current_allocated
;
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()][
2
]
=
peak_allocated
;
current_reserved
=
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()][
1
];
peak_reserved
=
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()][
3
];
}
}
else
{
current_allocated
=
DEVICE_MEMORY_STAT_CURRENT_VALUE
(
Allocated
,
place
.
GetDeviceId
());
peak_allocated
=
DEVICE_MEMORY_STAT_PEAK_VALUE
(
Allocated
,
place
.
GetDeviceId
());
if
(
RecordMemEvent
::
has_initialized
[
"gpu"
][
place
.
GetDeviceId
()]
==
false
)
{
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()].
push_back
(
DEVICE_MEMORY_STAT_CURRENT_VALUE
(
Allocated
,
place
.
GetDeviceId
()));
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()].
push_back
(
DEVICE_MEMORY_STAT_CURRENT_VALUE
(
Reserved
,
place
.
GetDeviceId
()));
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()].
push_back
(
DEVICE_MEMORY_STAT_PEAK_VALUE
(
Allocated
,
place
.
GetDeviceId
()));
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()].
push_back
(
DEVICE_MEMORY_STAT_PEAK_VALUE
(
Reserved
,
place
.
GetDeviceId
()));
current_allocated
=
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()][
0
];
current_reserved
=
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()][
1
];
peak_allocated
=
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()][
2
];
peak_reserved
=
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()][
3
];
RecordMemEvent
::
has_initialized
[
"gpu"
][
place
.
GetDeviceId
()]
=
true
;
}
else
{
current_allocated
=
DEVICE_MEMORY_STAT_CURRENT_VALUE
(
Allocated
,
place
.
GetDeviceId
());
peak_allocated
=
DEVICE_MEMORY_STAT_PEAK_VALUE
(
Allocated
,
place
.
GetDeviceId
());
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()][
0
]
=
current_allocated
;
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()][
2
]
=
peak_allocated
;
current_reserved
=
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()][
1
];
peak_reserved
=
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()][
3
];
}
}
platform
::
MemEvenRecorder
::
Instance
().
PopMemRecord
(
ptr
,
place
,
size
,
...
...
@@ -401,17 +577,74 @@ RecordMemEvent::RecordMemEvent(const void *ptr,
uint64_t
peak_allocated
=
0
;
// 0 means keep the same as before
if
(
platform
::
is_cpu_place
(
place
)
||
platform
::
is_cuda_pinned_place
(
place
))
{
current_reserved
=
HOST_MEMORY_STAT_CURRENT_VALUE
(
Reserved
,
place
.
GetDeviceId
());
peak_reserved
=
HOST_MEMORY_STAT_PEAK_VALUE
(
Reserved
,
place
.
GetDeviceId
());
if
(
RecordMemEvent
::
has_initialized
[
"cpu"
][
place
.
GetDeviceId
()]
==
false
)
{
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()].
push_back
(
HOST_MEMORY_STAT_CURRENT_VALUE
(
Allocated
,
place
.
GetDeviceId
()));
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()].
push_back
(
HOST_MEMORY_STAT_CURRENT_VALUE
(
Reserved
,
place
.
GetDeviceId
()));
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()].
push_back
(
HOST_MEMORY_STAT_PEAK_VALUE
(
Allocated
,
place
.
GetDeviceId
()));
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()].
push_back
(
HOST_MEMORY_STAT_PEAK_VALUE
(
Reserved
,
place
.
GetDeviceId
()));
current_allocated
=
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()][
0
];
current_reserved
=
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()][
1
];
peak_allocated
=
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()][
2
];
peak_reserved
=
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()][
3
];
RecordMemEvent
::
has_initialized
[
"cpu"
][
place
.
GetDeviceId
()]
=
true
;
}
else
{
current_reserved
=
HOST_MEMORY_STAT_CURRENT_VALUE
(
Reserved
,
place
.
GetDeviceId
());
peak_reserved
=
HOST_MEMORY_STAT_PEAK_VALUE
(
Reserved
,
place
.
GetDeviceId
());
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()][
1
]
=
current_reserved
;
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()][
3
]
=
peak_reserved
;
current_allocated
=
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()][
0
];
peak_allocated
=
RecordMemEvent
::
size_cache
[
"cpu"
][
place
.
GetDeviceId
()][
2
];
}
}
else
{
current_reserved
=
DEVICE_MEMORY_STAT_CURRENT_VALUE
(
Reserved
,
place
.
GetDeviceId
());
peak_reserved
=
DEVICE_MEMORY_STAT_PEAK_VALUE
(
Reserved
,
place
.
GetDeviceId
());
if
(
RecordMemEvent
::
has_initialized
[
"gpu"
][
place
.
GetDeviceId
()]
==
false
)
{
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()].
push_back
(
DEVICE_MEMORY_STAT_CURRENT_VALUE
(
Allocated
,
place
.
GetDeviceId
()));
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()].
push_back
(
DEVICE_MEMORY_STAT_CURRENT_VALUE
(
Reserved
,
place
.
GetDeviceId
()));
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()].
push_back
(
DEVICE_MEMORY_STAT_PEAK_VALUE
(
Allocated
,
place
.
GetDeviceId
()));
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()].
push_back
(
DEVICE_MEMORY_STAT_PEAK_VALUE
(
Reserved
,
place
.
GetDeviceId
()));
current_allocated
=
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()][
0
];
current_reserved
=
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()][
1
];
peak_allocated
=
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()][
2
];
peak_reserved
=
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()][
3
];
RecordMemEvent
::
has_initialized
[
"gpu"
][
place
.
GetDeviceId
()]
=
true
;
}
else
{
current_reserved
=
DEVICE_MEMORY_STAT_CURRENT_VALUE
(
Reserved
,
place
.
GetDeviceId
());
peak_reserved
=
DEVICE_MEMORY_STAT_PEAK_VALUE
(
Reserved
,
place
.
GetDeviceId
());
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()][
1
]
=
current_reserved
;
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()][
3
]
=
peak_reserved
;
current_allocated
=
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()][
0
];
peak_allocated
=
RecordMemEvent
::
size_cache
[
"gpu"
][
place
.
GetDeviceId
()][
2
];
}
}
platform
::
MemEvenRecorder
::
Instance
().
PopMemRecord
(
ptr
,
place
,
size
,
...
...
paddle/fluid/platform/profiler/mem_tracing.h
浏览文件 @
52d43ca2
...
...
@@ -14,6 +14,7 @@ limitations under the License. */
#pragma once
#include <map>
#include <string>
#include "paddle/fluid/platform/place.h"
...
...
@@ -37,6 +38,17 @@ class RecordMemEvent {
const
Place
&
place
,
size_t
size
,
const
TracerMemEventType
type
=
TracerMemEventType
::
Allocate
);
// size_cache: In the outer map, key is device type, 'cpu' or 'gpu', and in
// the inner map, key is device ip.
// Values record memory sizes for current_allocated, current_reserved,
// peak_allocated and peak_reserved.
// has_initialized: Flags to denote whether memory cache for some device has
// collected once.
static
std
::
map
<
const
char
*
,
std
::
map
<
uint64_t
,
std
::
vector
<
uint64_t
>>>
size_cache
;
static
std
::
map
<
const
char
*
,
std
::
map
<
uint64_t
,
bool
>>
has_initialized
;
};
}
// namespace platform
...
...
python/paddle/fluid/tests/unittests/test_profiler_statistic.py
浏览文件 @
52d43ca2
...
...
@@ -16,6 +16,7 @@ import unittest
import
paddle
import
paddle.profiler
as
profiler
import
paddle.profiler.profiler_statistic
as
profiler_statistic
class
HostPythonNode
:
...
...
@@ -30,6 +31,7 @@ class HostPythonNode:
self
.
children_node
=
[]
self
.
runtime_node
=
[]
self
.
device_node
=
[]
self
.
mem_node
=
[]
class
DevicePythonNode
:
...
...
@@ -45,6 +47,22 @@ class DevicePythonNode:
self
.
stream_id
=
stream_id
class
MemPythonNode
:
def
__init__
(
self
,
timestamp_ns
,
addr
,
type
,
process_id
,
thread_id
,
increase_bytes
,
place
,
current_allocated
,
\
current_reserved
,
peak_allocated
,
peak_reserved
):
self
.
timestamp_ns
=
timestamp_ns
self
.
addr
=
addr
self
.
type
=
type
self
.
process_id
=
process_id
self
.
thread_id
=
thread_id
self
.
increase_bytes
=
increase_bytes
self
.
place
=
place
self
.
current_allocated
=
current_allocated
self
.
current_reserved
=
current_reserved
self
.
peak_allocated
=
peak_allocated
self
.
peak_reserved
=
peak_reserved
class
TestProfilerStatistic
(
unittest
.
TestCase
):
def
test_statistic_case1
(
self
):
...
...
@@ -89,6 +107,9 @@ class TestProfilerStatistic(unittest.TestCase):
conv2d_compute
=
HostPythonNode
(
'conv2d::compute'
,
profiler
.
TracerEventType
.
OperatorInner
,
30
,
40
,
1000
,
1001
)
conv2d_compute
.
mem_node
.
append
(
MemPythonNode
(
33
,
0
,
profiler_statistic
.
TracerMemEventType
.
Allocate
,
1000
,
1001
,
20
,
'place(gpu:0)'
,
200
,
200
,
800
,
800
))
conv2d_launchkernel
=
HostPythonNode
(
'cudalaunchkernel'
,
profiler
.
TracerEventType
.
CudaRuntime
,
30
,
35
,
1000
,
1001
)
...
...
@@ -211,6 +232,24 @@ class TestProfilerStatistic(unittest.TestCase):
self
.
assertEqual
(
event_summary
.
memory_manipulation_items
[
'AsyncMemcpy'
].
general_gpu_time
,
60
)
self
.
assertEqual
(
statistic_data
.
memory_summary
.
allocated_items
[
'place(gpu:0)'
]
[
'conv2d'
].
allocation_count
,
1
)
self
.
assertEqual
(
statistic_data
.
memory_summary
.
allocated_items
[
'place(gpu:0)'
]
[
'conv2d'
].
allocation_size
,
20
)
self
.
assertEqual
(
statistic_data
.
memory_summary
.
allocated_items
[
'place(gpu:0)'
]
[
'conv2d'
].
increase_size
,
20
)
self
.
assertEqual
(
statistic_data
.
memory_summary
.
allocated_items
[
'place(gpu:0)'
]
[
'conv2d'
].
increase_size
,
20
)
self
.
assertEqual
(
statistic_data
.
memory_summary
.
peak_allocation_values
[
'place(gpu:0)'
],
800
)
self
.
assertEqual
(
statistic_data
.
memory_summary
.
peak_reserved_values
[
'place(gpu:0)'
],
800
)
print
(
profiler
.
profiler_statistic
.
_build_table
(
statistic_data
,
...
...
python/paddle/profiler/profiler_statistic.py
浏览文件 @
52d43ca2
...
...
@@ -15,7 +15,7 @@ import collections
from
enum
import
Enum
import
re
from
paddle.fluid.core
import
TracerEventType
from
paddle.fluid.core
import
TracerEventType
,
TracerMemEventType
from
.statistic_helper
import
*
...
...
@@ -603,6 +603,83 @@ class EventSummary:
self
.
kernel_items
[
name
].
add_item
(
device_node
)
class
MemorySummary
:
r
"""
Analyse memory events in profiling data.
"""
class
MemoryItem
:
def
__init__
(
self
,
event_name
,
place
,
memory_type
=
'Allocated'
):
self
.
event_name
=
event_name
self
.
place
=
place
self
.
allocation_count
=
0
self
.
free_count
=
0
self
.
allocation_size
=
0
self
.
free_size
=
0
self
.
increase_size
=
0
self
.
memory_type
=
memory_type
def
add_memory_record
(
self
,
size
,
allocation_type
):
if
allocation_type
==
TracerMemEventType
.
Allocate
or
allocation_type
==
TracerMemEventType
.
ReservedAllocate
:
self
.
allocation_count
+=
1
self
.
allocation_size
+=
size
elif
allocation_type
==
TracerMemEventType
.
Free
or
allocation_type
==
TracerMemEventType
.
ReservedFree
:
self
.
free_count
+=
1
self
.
free_size
-=
size
# size is sign(-) when free.
else
:
print
(
"No corresponding type."
)
self
.
increase_size
=
self
.
allocation_size
-
self
.
free_size
def
__init__
(
self
):
self
.
allocated_items
=
collections
.
defaultdict
(
dict
)
# for memory summary, device type: event
self
.
reserved_items
=
collections
.
defaultdict
(
dict
)
# for memory summary, device type: event
self
.
peak_allocation_values
=
collections
.
defaultdict
(
int
)
self
.
peak_reserved_values
=
collections
.
defaultdict
(
int
)
def
_analyse_node_memory
(
self
,
event_name
,
node
):
for
memnode
in
node
.
mem_node
:
# self mem node
if
memnode
.
type
==
TracerMemEventType
.
Allocate
or
memnode
.
type
==
TracerMemEventType
.
Free
:
if
event_name
not
in
self
.
allocated_items
[
memnode
.
place
]:
self
.
allocated_items
[
memnode
.
place
][
event_name
]
=
MemorySummary
.
MemoryItem
(
event_name
,
memnode
.
place
,
'Allocated'
)
self
.
allocated_items
[
memnode
.
place
][
event_name
].
add_memory_record
(
memnode
.
increase_bytes
,
memnode
.
type
)
elif
memnode
.
type
==
TracerMemEventType
.
ReservedAllocate
or
memnode
.
type
==
TracerMemEventType
.
ReservedFree
:
if
event_name
not
in
self
.
reserved_items
[
memnode
.
place
]:
self
.
reserved_items
[
memnode
.
place
][
event_name
]
=
MemorySummary
.
MemoryItem
(
event_name
,
memnode
.
place
,
'Reserved'
)
self
.
reserved_items
[
memnode
.
place
][
event_name
].
add_memory_record
(
memnode
.
increase_bytes
,
memnode
.
type
)
self
.
peak_allocation_values
[
memnode
.
place
]
=
max
(
self
.
peak_allocation_values
[
memnode
.
place
],
memnode
.
peak_allocated
)
self
.
peak_reserved_values
[
memnode
.
place
]
=
max
(
self
.
peak_reserved_values
[
memnode
.
place
],
memnode
.
peak_reserved
)
def
parse
(
self
,
nodetrees
):
r
"""
Analyse memory event in the nodetress.
"""
thread2hostnodes
=
traverse_tree
(
nodetrees
)
for
threadid
,
host_nodes
in
thread2hostnodes
.
items
():
for
host_node
in
host_nodes
[
1
:]:
#skip root node
if
host_node
.
type
==
TracerEventType
.
OperatorInner
:
continue
if
host_node
.
type
==
TracerEventType
.
Operator
:
for
child
in
host_node
.
children_node
:
self
.
_analyse_node_memory
(
host_node
.
name
,
child
)
self
.
_analyse_node_memory
(
host_node
.
name
,
host_node
)
class
StatisticData
:
r
"""
Hold all analysed results.
...
...
@@ -614,9 +691,11 @@ class StatisticData:
self
.
time_range_summary
=
TimeRangeSummary
()
self
.
event_summary
=
EventSummary
()
self
.
distributed_summary
=
DistributedSummary
()
self
.
memory_summary
=
MemorySummary
()
self
.
time_range_summary
.
parse
(
node_trees
)
self
.
event_summary
.
parse
(
node_trees
)
self
.
distributed_summary
.
parse
(
node_trees
)
self
.
memory_summary
.
parse
(
node_trees
)
def
_build_table
(
statistic_data
,
...
...
@@ -1498,4 +1577,76 @@ def _build_table(statistic_data,
append
(
''
)
append
(
''
)
###### Print Memory Summary Report ######
if
statistic_data
.
memory_summary
.
allocated_items
or
statistic_data
.
memory_summary
.
reserved_items
:
for
device_type
,
memory_events
in
statistic_data
.
memory_summary
.
allocated_items
.
items
(
):
all_row_values
=
[]
sorted_items
=
sorted
(
memory_events
.
items
(),
key
=
lambda
x
:
x
[
1
].
increase_size
,
reverse
=
True
)
for
event_name
,
item
in
sorted_items
:
row_values
=
[
event_name
,
item
.
memory_type
,
item
.
allocation_count
,
item
.
free_count
,
item
.
allocation_size
,
item
.
free_size
,
item
.
increase_size
]
all_row_values
.
append
(
row_values
)
sorted_reserved_items
=
sorted
(
statistic_data
.
memory_summary
.
reserved_items
[
device_type
].
items
(),
key
=
lambda
x
:
x
[
1
].
increase_size
,
reverse
=
True
)
for
event_name
,
item
in
sorted_reserved_items
:
row_values
=
[
event_name
,
item
.
memory_type
,
item
.
allocation_count
,
item
.
free_count
,
item
.
allocation_size
,
item
.
free_size
,
item
.
increase_size
]
all_row_values
.
append
(
row_values
)
# Calculate the column width
headers
=
[
'Name'
,
'Type'
,
'Allocation Count'
,
'Free Count'
,
'Allocation Size'
,
'Free Size'
,
'Increased Size'
]
row_format_list
=
[
""
]
header_sep_list
=
[
""
]
line_length_list
=
[
-
SPACING_SIZE
]
name_column_width
=
50
number_column_width
=
15
add_column
(
name_column_width
)
add_column
(
12
)
add_column
(
number_column_width
)
add_column
(
number_column_width
)
add_column
(
number_column_width
)
add_column
(
number_column_width
)
add_column
(
number_column_width
)
row_format
=
row_format_list
[
0
]
header_sep
=
header_sep_list
[
0
]
line_length
=
line_length_list
[
0
]
# construct table string
append
(
add_title
(
line_length
,
"Memory Summary - {}"
.
format
(
device_type
)))
append
(
'Peak Allocated Memory: {}'
.
format
(
statistic_data
.
memory_summary
.
peak_allocation_values
[
device_type
]))
append
(
'Peak Reserved Memory: {}'
.
format
(
statistic_data
.
memory_summary
.
peak_reserved_values
[
device_type
])
)
append
(
header_sep
)
append
(
row_format
.
format
(
*
headers
))
append
(
header_sep
)
for
row_values
in
all_row_values
:
if
isinstance
(
row_values
,
str
):
append
(
add_title
(
line_length
,
row_values
))
else
:
append
(
row_format
.
format
(
*
row_values
))
append
(
''
)
append
(
''
)
return
''
.
join
(
result
)
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录