Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
601a33a8
MegEngine
项目概览
MegEngine 天元
/
MegEngine
大约 1 年 前同步成功
通知
399
Star
4705
Fork
582
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
601a33a8
编写于
5月 07, 2021
作者:
M
Megvii Engine Team
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
refactor(mge/dtr): update dtr api
GitOrigin-RevId: dc366c65be5a03cded9a547ea49e46d037223d9f
上级
c269a690
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
147 addition
and
58 deletion
+147
-58
imperative/python/megengine/__init__.py
imperative/python/megengine/__init__.py
+1
-0
imperative/python/megengine/dtr.py
imperative/python/megengine/dtr.py
+131
-0
imperative/python/megengine/utils/dtr.py
imperative/python/megengine/utils/dtr.py
+0
-44
imperative/python/requires.txt
imperative/python/requires.txt
+1
-0
imperative/src/impl/interpreter/interpreter_impl.cpp
imperative/src/impl/interpreter/interpreter_impl.cpp
+11
-11
imperative/src/impl/interpreter/option_manager.h
imperative/src/impl/interpreter/option_manager.h
+3
-3
未找到文件。
imperative/python/megengine/__init__.py
浏览文件 @
601a33a8
...
...
@@ -76,6 +76,7 @@ from .core._imperative_rt.core2 import full_sync as _full_sync
from
.core._imperative_rt.core2
import
sync
as
_sync
from
.core._imperative_rt.utils
import
_set_fork_exec_path_for_timed_func
from
.device
import
*
from
.dtr
import
*
from
.logger
import
enable_debug_log
,
get_logger
,
set_log_file
,
set_log_level
from
.serialization
import
load
,
save
from
.tensor
import
Parameter
,
Tensor
,
tensor
...
...
imperative/python/megengine/dtr.py
0 → 100644
浏览文件 @
601a33a8
# -*- coding: utf-8 -*-
# MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
#
# Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
import
re
from
typing
import
Union
from
mprop
import
mproperty
from
.core._imperative_rt.core2
import
set_option
from
.core._imperative_rt.utils
import
_set_defrag
_eviction_threshold
=
0
_evictee_minimum_size
=
1024
**
2
def
str2bytes
(
text
:
str
)
->
int
:
regex
=
re
.
compile
(
r
"(\d+(?:\.\d+)?)\s*([kmg]?b)"
,
re
.
IGNORECASE
)
order
=
[
"b"
,
"kb"
,
"mb"
,
"gb"
]
result
=
regex
.
findall
(
text
)
if
len
(
result
)
!=
1
:
raise
ValueError
(
"Formatting of `value` only supports bytes(B), kilobyte(KB), megabyte(MB) and gigabyte(GB) units"
)
return
int
(
float
(
result
[
0
][
0
])
*
1024
**
order
.
index
(
result
[
0
][
1
].
lower
()))
@
mproperty
def
eviction_threshold
(
mod
):
r
"""
Returns the eviction threshold in bytes.
.. note::
When GPU memory usage exceeds this value, DTR will heuristically select
and evict resident tensors until the amount of used memory falls below
this threshold.
"""
return
mod
.
_eviction_threshold
@
eviction_threshold
.
setter
def
eviction_threshold
(
mod
,
value
:
Union
[
int
,
str
]):
r
"""
Change the eviction threshold. If `value` is an int, it represents the
number of bytes. If `value` is a string, its formatting supports bytes(B),
kilobyte(KB), megabyte(MB) and gigabyte(GB) units.
Examples:
.. code-block::
import megengine as mge
mge.dtr.eviction_threshold = 2 * 1024 ** 3
mge.dtr.eviction_threshold = "2GB"
mge.dtr.eviction_threshold = "2048MB"
"""
if
isinstance
(
value
,
str
):
mod
.
_eviction_threshold
=
mod
.
str2bytes
(
value
)
elif
isinstance
(
value
,
int
):
mod
.
_eviction_threshold
=
value
else
:
raise
TypeError
(
"`value` should be a str or an int"
)
set_option
(
"dtr_eviction_threshold"
,
mod
.
_eviction_threshold
)
@
mproperty
def
evictee_minimum_size
(
mod
):
r
"""
Returns the memory threshold of tensors in bytes.
.. note::
Only tensors whose size exceeds this threshold will be added to the
candidate set. A tensor that is not added to the candidate set will
never be evicted during its lifetime.
"""
return
mod
.
_evictee_minimum_size
@
evictee_minimum_size
.
setter
def
evictee_minimum_size
(
mod
,
value
:
Union
[
int
,
str
]):
r
"""
Change the memory threshold of tensors. If `value` is an int, it represents
the number of bytes. If `value` is a string, its formatting supports bytes(B),
kilobyte(KB), megabyte(MB) and gigabyte(GB) units.
Examples:
.. code-block::
import megengine as mge
mge.dtr.evictee_minimum_size = 2 * 1024 ** 2
mge.dtr.evictee_minimum_size = "2MB"
mge.dtr.evictee_minimum_size = "2048KB"
"""
if
isinstance
(
value
,
str
):
mod
.
_evictee_minimum_size
=
mod
.
str2bytes
(
value
)
elif
isinstance
(
value
,
int
):
mod
.
_evictee_minimum_size
=
value
else
:
raise
TypeError
(
"`value` should be a str or an int"
)
set_option
(
"dtr_evictee_minimum_size"
,
mod
.
_evictee_minimum_size
)
def
enable
():
r
"""
Enable to record computing path of tensors and to perform DTR policy.
"""
_set_defrag
(
True
)
set_option
(
"enable_dtr_auto_drop"
,
1
)
set_option
(
"enable_drop"
,
1
)
set_option
(
"buffer_length"
,
0
)
set_option
(
"record_computing_path"
,
1
)
def
disable
():
r
"""
Stop recording computing path of tensors and performing DTR policy.
"""
set_option
(
"enable_dtr_auto_drop"
,
0
)
set_option
(
"enable_drop"
,
0
)
set_option
(
"record_computing_path"
,
0
)
imperative/python/megengine/utils/dtr.py
已删除
100644 → 0
浏览文件 @
c269a690
# -*- coding: utf-8 -*-
# MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
#
# Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
from
..core._imperative_rt.core2
import
set_option
from
..core._imperative_rt.utils
import
_set_defrag
class
DTR
:
r
"""
DTR implements `Dynamic Tensor Rematerialization <https://arxiv.org/abs/2006.09616>`_ in MegEngine.
It is basically an online algorithm for checkpointing driven by certain eviction policies.
.. code-block::
from megengine.utils.dtr import DTR
ds = DTR(memory_budget=5*1024**3)
# your training code
"""
def
__init__
(
self
,
memory_budget
=
0
,
tensor_lowerbound
=
1048576
):
r
"""
:param memory_budget: int. The threshold of memory usage. When memory
usage exceeds this value, auto evict will be triggered.
:param tensor_lowerbound: int. The minimum memory limit of the tensor
that can be evicted. Default: 1MB.
"""
if
memory_budget
>
0
:
set_option
(
"enable_auto_drop"
,
1
)
set_option
(
"enable_drop"
,
1
)
set_option
(
"buffer_length"
,
0
)
set_option
(
"memory_budget"
,
memory_budget
)
set_option
(
"tensor_lowerbound"
,
tensor_lowerbound
)
set_option
(
"record_computing_path"
,
1
)
_set_defrag
(
True
)
imperative/python/requires.txt
浏览文件 @
601a33a8
...
...
@@ -6,3 +6,4 @@ tabulate
tqdm
redispy
deprecated
mprop
imperative/src/impl/interpreter/interpreter_impl.cpp
浏览文件 @
601a33a8
...
...
@@ -422,7 +422,7 @@ void ChannelImpl::do_drop(TensorInfo* ptr, bool user=false) {
}
void
ChannelImpl
::
free
(
TensorInfo
*
ptr
)
{
if
(
m_worker_state
.
options
.
enable_auto_drop
)
{
if
(
m_worker_state
.
options
.
enable_
dtr_
auto_drop
)
{
// Evicting a tensor, rather than freeing it, can avoid pinning
// potentially exploding amounts of memory and allow us to save
// more memory.
...
...
@@ -459,7 +459,7 @@ void ChannelImpl::real_free(TensorInfo* ptr) {
if
(
m_channel_state
.
profiler
->
is_profiling
())
{
m_channel_state
.
profiler
->
record_host
<
TensorEraseEvent
>
(
ptr
->
id
);
}
if
(
ptr
->
size_exceeds_thd
(
m_worker_state
.
options
.
tensor_lowerbound
))
{
if
(
ptr
->
size_exceeds_thd
(
m_worker_state
.
options
.
dtr_evictee_minimum_size
))
{
m_dtr
.
erase_candidate
(
ptr
);
}
detach_users
(
ptr
);
...
...
@@ -487,7 +487,7 @@ void ChannelImpl::produce_tensor(TensorInfo* dest, TensorPtr ptr, bool notice=tr
dest
->
memory
=
ptr
->
blob
()
->
size
();
dest
->
ptr
=
std
::
move
(
ptr
);
dest
->
evict_type
=
EvictType
::
NONE
;
if
(
notice
&&
dest
->
size_exceeds_thd
(
m_worker_state
.
options
.
tensor_lowerbound
))
{
if
(
notice
&&
dest
->
size_exceeds_thd
(
m_worker_state
.
options
.
dtr_evictee_minimum_size
))
{
m_dtr
.
insert_candidate
(
dest
);
}
if
(
notice
&&
m_waitee
==
dest
)
{
...
...
@@ -519,7 +519,7 @@ void ChannelImpl::recompute(TensorInfo::ComputePath* path) {
inputs
.
push_back
(
i
->
ptr
);
m_dtr
.
update_used_time
(
i
);
}
if
(
m_worker_state
.
options
.
enable_
auto_drop
&&
m_worker_state
.
options
.
memory_budget
>
0
)
{
if
(
m_worker_state
.
options
.
enable_
dtr_auto_drop
&&
m_worker_state
.
options
.
dtr_eviction_threshold
>
0
)
{
auto_evict
();
}
auto
outputs
=
OpDef
::
apply_on_physical_tensor
(
*
path
->
op
,
inputs
);
...
...
@@ -531,7 +531,7 @@ void ChannelImpl::recompute(TensorInfo::ComputePath* path) {
o
->
recompute_times
++
;
if
(
!
o
->
ptr
)
{
produce_tensor
(
o
,
std
::
move
(
outputs
[
i
]),
false
);
if
(
m_worker_state
.
options
.
enable_auto_drop
)
{
if
(
m_worker_state
.
options
.
enable_
dtr_
auto_drop
)
{
m_dtr
.
update_dsu_after_recompute
(
o
);
}
}
...
...
@@ -544,7 +544,7 @@ void ChannelImpl::auto_evict() {
return
;
}
size_t
current_memory
=
m_dtr
.
comp_node
.
get_used_memory
();
while
(
current_memory
>
m_worker_state
.
options
.
memory_budget
)
{
while
(
current_memory
>
m_worker_state
.
options
.
dtr_eviction_threshold
)
{
auto
best
=
m_dtr
.
find_best_tensor
();
if
(
!
best
)
{
if
(
!
m_dtr
.
warn_printed
)
{
...
...
@@ -642,7 +642,7 @@ void ChannelImpl::process_one_task(IdentifiedCommand& icmd) {
uint64_t
apply_id
=
++
m_last_id
;
SmallVector
<
TensorPtr
>
tensor_inputs
;
SmallVector
<
CompNode
>
devices
;
if
(
m_worker_state
.
options
.
enable_auto_drop
)
{
if
(
m_worker_state
.
options
.
enable_
dtr_
auto_drop
)
{
m_dtr
.
pin
(
cmd
.
inputs
);
}
for
(
auto
i
:
cmd
.
inputs
)
{
...
...
@@ -696,7 +696,7 @@ void ChannelImpl::process_one_task(IdentifiedCommand& icmd) {
m_worker_state
.
profiler
->
record_device
<
DeviceOpExecuteEvent
>
(
device
,
event_data
);
}
}
if
(
m_worker_state
.
options
.
enable_
auto_drop
&&
m_worker_state
.
options
.
memory_budget
>
0
)
{
if
(
m_worker_state
.
options
.
enable_
dtr_auto_drop
&&
m_worker_state
.
options
.
dtr_eviction_threshold
>
0
)
{
auto_evict
();
}
// Apply op
...
...
@@ -712,7 +712,7 @@ void ChannelImpl::process_one_task(IdentifiedCommand& icmd) {
}
// End profiling operator
double
estimate_compute_time
=
0
;
if
(
m_worker_state
.
options
.
enable_auto_drop
)
{
if
(
m_worker_state
.
options
.
enable_
dtr_
auto_drop
)
{
for
(
auto
i
:
cmd
.
inputs
)
{
estimate_compute_time
+=
i
->
memory
;
}
...
...
@@ -735,7 +735,7 @@ void ChannelImpl::process_one_task(IdentifiedCommand& icmd) {
continue
;
}
produce_tensor
(
cmd
.
outputs
[
i
],
std
::
move
(
tensor_outputs
[
i
]));
if
(
m_worker_state
.
options
.
enable_auto_drop
)
{
if
(
m_worker_state
.
options
.
enable_
dtr_
auto_drop
)
{
cmd
.
outputs
[
i
]
->
dsu_ptr
=
std
::
make_shared
<
DsuNode
>
(
estimate_compute_time
);
}
}
...
...
@@ -774,7 +774,7 @@ void ChannelImpl::process_one_task(IdentifiedCommand& icmd) {
TensorInfo
::
ComputePath
::
make
(
cmd
.
op
,
cmd
.
inputs
,
cmd
.
outputs
);
size_t
detach_cnt
=
0
;
for
(
auto
output
:
cmd
.
outputs
)
{
if
(
!
output
->
size_exceeds_thd
(
m_worker_state
.
options
.
tensor_lowerbound
))
{
if
(
!
output
->
size_exceeds_thd
(
m_worker_state
.
options
.
dtr_evictee_minimum_size
))
{
output
->
detach_producer
();
detach_cnt
++
;
}
...
...
imperative/src/impl/interpreter/option_manager.h
浏览文件 @
601a33a8
...
...
@@ -39,10 +39,10 @@ public:
"set command buffer length."
);
DEF_OPTION
(
enable_host_compute
,
"MEGENGINE_HOST_COMPUTE"
,
1
,
"enable host compute, thus computation may be done in host event if it's device is gpu."
);
DEF_OPTION
(
enable_
auto_drop
,
"MEGENGINE_AUTO_DROP"
,
0
,
""
);
DEF_OPTION
(
memory_budget
,
"MEGENGINE_MEMORY_BUDGET"
,
0
,
DEF_OPTION
(
enable_
dtr_auto_drop
,
"MEGENGINE_DTR_AUTO_DROP"
,
0
,
""
);
DEF_OPTION
(
dtr_eviction_threshold
,
"MEGENGINE_DTR_EVICTION_THRESHOLD"
,
0
,
"auto drop will start whenever gpu memory usage exceeds this value."
);
DEF_OPTION
(
tensor_lowerbound
,
"MEGENGINE_TENSOR_LOWERBOUND"
,
1048576
,
DEF_OPTION
(
dtr_evictee_minimum_size
,
"MEGENGINE_DTR_EVICTEE_MINIMUM_SIZE"
,
1048576
,
"the minimum memory value of a tensor added to the candidate set"
);
DEF_OPTION
(
record_computing_path
,
"MEGENGINE_RECORD_COMPUTING_PATH"
,
0
,
""
);
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录