Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
0a56a5b1
MegEngine
项目概览
MegEngine 天元
/
MegEngine
1 年多 前同步成功
通知
404
Star
4705
Fork
582
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
0a56a5b1
编写于
11月 03, 2021
作者:
M
Megvii Engine Team
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
feat(cuda/comp_node): gets (maximum) GPU memory allocated/reserved
GitOrigin-RevId: da2cc22436022ac5187ce3d2a686cc258ac94150
上级
896a6fb0
变更
7
显示空白变更内容
内联
并排
Showing
7 changed file
with
173 addition
and
17 deletion
+173
-17
imperative/python/megengine/device.py
imperative/python/megengine/device.py
+60
-0
imperative/python/src/common.cpp
imperative/python/src/common.cpp
+20
-0
src/core/impl/comp_node/cuda/comp_node.cpp
src/core/impl/comp_node/cuda/comp_node.cpp
+58
-17
src/core/impl/comp_node/mem_alloc/impl.cpp
src/core/impl/comp_node/mem_alloc/impl.cpp
+9
-0
src/core/impl/comp_node/mem_alloc/impl.h
src/core/impl/comp_node/mem_alloc/impl.h
+5
-0
src/core/include/megbrain/comp_node.h
src/core/include/megbrain/comp_node.h
+17
-0
src/core/include/megbrain/comp_node/alloc.h
src/core/include/megbrain/comp_node/alloc.h
+4
-0
未找到文件。
imperative/python/megengine/device.py
浏览文件 @
0a56a5b1
...
...
@@ -25,6 +25,11 @@ __all__ = [
"set_default_device"
,
"get_mem_status_bytes"
,
"get_cuda_compute_capability"
,
"get_allocated_memory"
,
"get_reserved_memory"
,
"get_max_reserved_memory"
,
"get_max_allocated_memory"
,
"reset_max_memory_stats"
,
"set_prealloc_config"
,
"coalesce_free_memory"
,
"DeviceType"
,
...
...
@@ -157,6 +162,61 @@ def get_cuda_compute_capability(device: int, device_type=DeviceType.CUDA) -> int
return
_get_cuda_compute_capability
(
device
,
device_type
)
def
get_allocated_memory
(
device
:
Optional
[
str
]
=
None
):
r
"""Returns the current memory occupied by tensors on the computing device in bytes.
Due to the asynchronous execution of MegEngine, please call megengine._full_sync
before calling this function in order to get accurate value.
"""
if
device
is
None
:
device
=
get_default_device
()
return
CompNode
(
device
).
get_used_memory
def
get_reserved_memory
(
device
:
Optional
[
str
]
=
None
):
r
"""Returns the current memory managed by the caching allocator on the computing device in bytes.
Due to the asynchronous execution of MegEngine, please call megengine._full_sync
before calling this function in order to get accurate value.
"""
if
device
is
None
:
device
=
get_default_device
()
return
CompNode
(
device
).
get_reserved_memory
def
get_max_reserved_memory
(
device
:
Optional
[
str
]
=
None
):
r
"""Returns the maximum memory managed by the caching allocator on the computing device in bytes.
Due to the asynchronous execution of MegEngine, please call megengine._full_sync
before calling this function in order to get accurate value.
"""
if
device
is
None
:
device
=
get_default_device
()
return
CompNode
(
device
).
get_max_reserved_memory
def
get_max_allocated_memory
(
device
:
Optional
[
str
]
=
None
):
r
"""Returns the maximum memory occupied by tensors on the computing device in bytes.
Due to the asynchronous execution of MegEngine, please call megengine._full_sync
before calling this function in order to get accurate value.
"""
if
device
is
None
:
device
=
get_default_device
()
return
CompNode
(
device
).
get_max_used_memory
def
reset_max_memory_stats
(
device
:
Optional
[
str
]
=
None
):
r
"""Resets the maximum stats on the computing device.
Due to the asynchronous execution of MegEngine, please call megengine._full_sync
before calling this function in order to properly reset memory stats.
"""
if
device
is
None
:
device
=
get_default_device
()
CompNode
.
reset_max_memory_stats
(
device
)
set_default_device
(
os
.
getenv
(
"MGE_DEFAULT_DEVICE"
,
"xpux"
))
...
...
imperative/python/src/common.cpp
浏览文件 @
0a56a5b1
...
...
@@ -73,6 +73,26 @@ void init_common(py::module m) {
[](
const
CompNode
&
cn
)
{
return
cn
.
get_mem_status_bytes
();
})
.
def_property_readonly
(
"get_used_memory"
,
[](
const
CompNode
&
cn
)
{
return
cn
.
get_used_memory
();
})
.
def_property_readonly
(
"get_max_used_memory"
,
[](
const
CompNode
&
cn
)
{
return
cn
.
get_max_used_memory
();
})
.
def_property_readonly
(
"get_reserved_memory"
,
[](
const
CompNode
&
cn
)
{
return
cn
.
get_reserved_memory
();
})
.
def_property_readonly
(
"get_max_reserved_memory"
,
[](
const
CompNode
&
cn
)
{
return
cn
.
get_max_reserved_memory
();
})
.
def_static
(
"reset_max_memory_stats"
,
[](
const
CompNode
&
cn
)
{
cn
.
reset_max_used_memory
();
cn
.
reset_max_reserved_memory
();
})
.
def
(
"create_event"
,
&
CompNode
::
create_event
,
py
::
arg
(
"flags"
)
=
0ul
)
.
def_static
(
"_set_default_device"
,
&
set_default_device
)
...
...
src/core/impl/comp_node/cuda/comp_node.cpp
浏览文件 @
0a56a5b1
...
...
@@ -208,20 +208,7 @@ class CudaCompNode::CompNodeImpl final : public CompNode::Impl {
public:
CompNodeImpl
()
:
Impl
(
static_free_device
,
static_free_host
)
{}
void
*
alloc_device
(
size_t
size
)
override
{
activate
();
#if MGB_BUILD_SLIM_SERVING
return
m_mem_alloc
->
alloc
(
size
);
#else
void
*
ptr
=
m_mem_alloc
->
alloc
(
size
);
{
MGB_LOCK_GUARD
(
m_update_mem
);
ptr2size
[
ptr
]
=
size
;
m_used_mem
+=
size
;
}
return
ptr
;
#endif
}
void
*
alloc_device
(
size_t
size
)
override
;
void
free_device
(
void
*
ptr
);
...
...
@@ -311,20 +298,30 @@ public:
uint64_t
get_uid
()
override
{
return
m_uid
;
}
#if !MGB_BUILD_SLIM_SERVING
size_t
get_used_memory
()
override
{
return
m_used_mem
;
}
size_t
get_used_memory
()
override
;
size_t
get_max_used_memory
()
override
;
size_t
get_reserved_memory
()
override
;
size_t
get_max_reserved_memory
()
override
;
void
reset_max_used_memory
()
override
;
void
reset_max_reserved_memory
()
override
;
#endif
private:
uint64_t
m_uid
;
#if !MGB_BUILD_SLIM_SERVING
std
::
unordered_map
<
void
*
,
size_t
>
ptr2size
;
size_t
m_used_mem
=
0
;
#endif
};
MGB_DYN_TYPE_OBJ_FINAL_IMPL
(
CudaCompNode
::
CompNodeImpl
);
struct
CudaCompNodeImpl
::
DeviceInfo
{
int
dev_num
=
-
1
;
std
::
atomic_size_t
m_used_mem
{
0
};
std
::
atomic_size_t
m_max_used_mem
{
0
};
std
::
unique_ptr
<
mem_alloc
::
DevMemAlloc
>
mem_alloc
;
bool
init_done
()
const
{
return
mem_alloc
.
get
();
}
...
...
@@ -438,6 +435,24 @@ void CudaCompNodeImpl::fini() {
m_initialized
=
false
;
}
void
*
CudaCompNodeImpl
::
alloc_device
(
size_t
size
)
{
activate
();
#if MGB_BUILD_SLIM_SERVING
return
m_mem_alloc
->
alloc
(
size
);
#else
void
*
ptr
=
m_mem_alloc
->
alloc
(
size
);
{
MGB_LOCK_GUARD
(
m_update_mem
);
ptr2size
[
ptr
]
=
size
;
m_device_info
->
m_used_mem
+=
size
;
if
(
m_device_info
->
m_used_mem
>
m_device_info
->
m_max_used_mem
)
{
m_device_info
->
m_max_used_mem
=
m_device_info
->
m_used_mem
.
load
();
}
}
return
ptr
;
#endif
}
void
CudaCompNodeImpl
::
free_device
(
void
*
ptr
)
{
if
(
check_global_finalized
())
return
;
...
...
@@ -447,13 +462,39 @@ void CudaCompNodeImpl::free_device(void* ptr) {
{
MGB_LOCK_GUARD
(
m_update_mem
);
mgb_assert
(
ptr2size
.
find
(
ptr
)
!=
ptr2size
.
end
(),
"ptr %p not found!"
,
ptr
);
m_used_mem
-=
ptr2size
.
at
(
ptr
);
m_
device_info
->
m_
used_mem
-=
ptr2size
.
at
(
ptr
);
ptr2size
.
erase
(
ptr
);
}
#endif
m_mem_alloc
->
free
(
ptr
);
}
#if !MGB_BUILD_SLIM_SERVING
size_t
CudaCompNodeImpl
::
get_used_memory
()
{
return
m_device_info
->
m_used_mem
.
load
();
}
size_t
CudaCompNodeImpl
::
get_max_used_memory
()
{
return
m_device_info
->
m_max_used_mem
.
load
();
}
void
CudaCompNodeImpl
::
reset_max_used_memory
()
{
m_device_info
->
m_max_used_mem
=
0
;
}
size_t
CudaCompNodeImpl
::
get_reserved_memory
()
{
return
m_device_info
->
mem_alloc
->
get_used_memory
();
}
size_t
CudaCompNodeImpl
::
get_max_reserved_memory
()
{
return
m_device_info
->
mem_alloc
->
get_max_used_memory
();
}
void
CudaCompNodeImpl
::
reset_max_reserved_memory
()
{
m_device_info
->
mem_alloc
->
reset_max_used_memory
();
}
#endif
void
*
CudaCompNodeImpl
::
alloc_host
(
size_t
size
)
{
// need activate because it create cuda cuda context in current device
activate
();
...
...
src/core/impl/comp_node/mem_alloc/impl.cpp
浏览文件 @
0a56a5b1
...
...
@@ -226,6 +226,9 @@ StreamMemAlloc* DevMemAllocImpl::add_stream(StreamKey stream) {
MemAllocImplHelper
::
MemAddr
DevMemAllocImpl
::
alloc
(
size_t
size
)
{
auto
addr
=
do_alloc
(
size
,
true
);
m_used_size
+=
size
;
if
(
m_used_size
>
m_max_used_size
)
{
m_max_used_size
=
m_used_size
.
load
();
}
return
addr
;
}
...
...
@@ -291,6 +294,9 @@ MemAllocImplHelper::MemAddr DevMemAllocImpl::alloc_from_parent(size_t size) {
// exception would be thrown from here
auto
t
=
do_alloc
(
size
,
false
,
true
);
m_used_size
+=
size
;
if
(
m_used_size
>
m_max_used_size
)
{
m_max_used_size
=
m_used_size
.
load
();
}
return
t
;
}
}
...
...
@@ -419,6 +425,9 @@ void DevMemAllocImpl::insert_free_unsafe(const FreeBlock& block) {
child
->
insert_free_unsafe
(
block
);
}
m_used_size
+=
block
.
size
;
if
(
m_used_size
>
m_max_used_size
)
{
m_max_used_size
=
m_used_size
.
load
();
}
}
else
{
MemAllocImplHelper
::
insert_free_unsafe
(
block
);
}
...
...
src/core/impl/comp_node/mem_alloc/impl.h
浏览文件 @
0a56a5b1
...
...
@@ -171,6 +171,7 @@ class DevMemAllocImpl final : public DevMemAlloc, public MemAllocImplHelper {
size_t
m_tot_allocated_from_raw
=
0
;
std
::
atomic_size_t
m_used_size
{
0
};
std
::
atomic_size_t
m_max_used_size
{
0
};
/*!
* \brief gather all free blocks from child streams, and release full chunks
...
...
@@ -197,6 +198,10 @@ class DevMemAllocImpl final : public DevMemAlloc, public MemAllocImplHelper {
size_t
get_used_memory
()
override
{
return
m_used_size
.
load
();
}
size_t
get_max_used_memory
()
override
{
return
m_max_used_size
.
load
();
}
void
reset_max_used_memory
()
override
{
m_max_used_size
=
0
;
}
void
insert_free_unsafe
(
const
FreeBlock
&
block
)
override
;
/*!
...
...
src/core/include/megbrain/comp_node.h
浏览文件 @
0a56a5b1
...
...
@@ -335,11 +335,23 @@ public:
size_t
get_used_memory
()
const
{
return
m_impl
->
get_used_memory
();
}
size_t
get_reserved_memory
()
const
{
return
m_impl
->
get_reserved_memory
();
}
size_t
get_max_reserved_memory
()
const
{
return
m_impl
->
get_max_reserved_memory
();
}
size_t
get_max_used_memory
()
const
{
return
m_impl
->
get_max_used_memory
();
}
size_t
get_max_block_size_available
()
const
{
return
m_impl
->
get_max_block_size_available
();
}
size_t
get_free_mem
()
const
{
return
m_impl
->
get_free_mem
();
}
void
reset_max_reserved_memory
()
const
{
return
m_impl
->
reset_max_reserved_memory
();
}
void
reset_max_used_memory
()
const
{
return
m_impl
->
reset_max_used_memory
();
}
#endif
//! change to another stream on the same memory node
...
...
@@ -533,8 +545,13 @@ protected:
return
{
x
-
x
,
y
-
y
};
}
virtual
size_t
get_used_memory
()
{
return
0
;
}
virtual
size_t
get_reserved_memory
()
{
return
0
;
}
virtual
size_t
get_max_reserved_memory
()
{
return
0
;
}
virtual
size_t
get_max_used_memory
()
{
return
0
;
}
virtual
size_t
get_max_block_size_available
()
{
return
0
;
}
virtual
size_t
get_free_mem
()
{
return
0
;
}
virtual
void
reset_max_reserved_memory
()
{}
virtual
void
reset_max_used_memory
()
{}
#endif
virtual
Locator
locator
()
=
0
;
...
...
src/core/include/megbrain/comp_node/alloc.h
浏览文件 @
0a56a5b1
...
...
@@ -275,6 +275,10 @@ public:
const
PreAllocConfig
&
prealloc_config
()
{
return
m_prealloc_config
;
}
virtual
size_t
get_used_memory
()
{
return
0
;
}
virtual
size_t
get_max_used_memory
()
{
return
0
;
}
virtual
void
reset_max_used_memory
()
{}
private:
size_t
m_alignment
=
1
;
PreAllocConfig
m_prealloc_config
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录