Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
a92aea1f
MegEngine
项目概览
MegEngine 天元
/
MegEngine
1 年多 前同步成功
通知
403
Star
4705
Fork
582
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
a92aea1f
编写于
12月 13, 2022
作者:
M
Megvii Engine Team
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix(imperative/dtr): fix dtr crash issue
GitOrigin-RevId: 3de477593690838ffc2b5281aba44ab2b9facb7f
上级
2809316e
变更
3
显示空白变更内容
内联
并排
Showing
3 changed file
with
36 addition
and
16 deletion
+36
-16
imperative/python/test/integration/test_dtr.py
imperative/python/test/integration/test_dtr.py
+14
-8
imperative/src/impl/interpreter/interpreter_impl.cpp
imperative/src/impl/interpreter/interpreter_impl.cpp
+20
-7
imperative/src/impl/interpreter/interpreter_impl.h
imperative/src/impl/interpreter/interpreter_impl.h
+2
-1
未找到文件。
imperative/python/test/integration/test_dtr.py
浏览文件 @
a92aea1f
...
...
@@ -149,11 +149,17 @@ def test_dtr_resnet1202():
assert
p
.
exitcode
==
0
# FIXME: fix dtr crash
# @pytest.mark.require_ngpu(1)
# @pytest.mark.isolated_distributed
# def test_dtr_drop_copy_dev_tensor():
# p = mp.Process(target=run_dtr_drop_copy_dev_tensor)
# p.start()
# p.join()
# assert p.exitcode == 0
@
pytest
.
mark
.
require_ngpu
(
1
)
@
pytest
.
mark
.
isolated_distributed
def
test_dtr_drop_copy_dev_tensor
():
p
=
mp
.
Process
(
target
=
run_dtr_drop_copy_dev_tensor
)
p
.
start
()
p
.
join
()
assert
p
.
exitcode
==
0
@
pytest
.
mark
.
require_ngpu
(
1
)
@
pytest
.
mark
.
isolated_distributed
def
test_dtr_drop_tensor
():
for
i
in
range
(
50
):
test_dtr_drop_copy_dev_tensor
()
imperative/src/impl/interpreter/interpreter_impl.cpp
浏览文件 @
a92aea1f
...
...
@@ -493,7 +493,18 @@ HostTensorND ChannelImpl::get_value(Handle handle) {
auto
info
=
reinterpret_cast
<
TensorInfo
*>
(
handle
);
// donnot use info->value_fetched, it's unsafe
mgb_assert
(
!
info
->
invalid
,
"tensor is unusable due to previous error"
);
return
wait_tensor
(
info
,
TensorProp
::
HostValue
)
->
get_value
();
// pin
SmallVector
<
TensorInfo
*>
vec
({
info
});
m_dtr
.
pin
(
vec
);
auto
ret
=
wait_tensor
(
info
,
TensorProp
::
HostValue
)
->
get_value
();
// unpin
auto
&
state
=
get_channel_state
();
auto
dtr_evictee_minimum_size
=
state
.
options
.
dtr_evictee_minimum_size
;
m_dtr
.
unpin
(
vec
,
dtr_evictee_minimum_size
);
return
ret
;
}
TensorShape
ChannelImpl
::
get_shape
(
Handle
handle
)
{
...
...
@@ -916,7 +927,9 @@ void ChannelImpl::do_apply_op(const ApplyOp& cmd, std::string reason) {
i
->
compute_time
=
estimate_compute_time
;
}
}
m_dtr
.
unpin
(
cmd
.
inputs
,
state
);
auto
&
state
=
get_worker_state
();
auto
dtr_evictee_minimum_size
=
state
.
options
.
dtr_evictee_minimum_size
;
m_dtr
.
unpin
(
cmd
.
inputs
,
dtr_evictee_minimum_size
);
}
MGB_RECORD_EVENT
(
OpExecuteFinishEvent
,
apply_id
,
{},
reason
);
// End profiling operator
...
...
@@ -1098,11 +1111,12 @@ TensorPtr ChannelImpl::wait_tensor(TensorInfo* info, TensorProp prop) {
return
require_host
?
host_available
()
:
static_cast
<
bool
>
(
info
->
ptr
);
});
}
auto
ptr
=
info
->
ptr
;
MGB_RECORD_EVENT
(
TensorWaitPropFinishEvent
,
info
->
id
,
m_waitee_id
,
prop
,
backtrace_getter
);
m_waitee
=
nullptr
;
if
(
wait_host
)
{
auto
err
=
info
->
ptr
->
comp_node
().
check_async_error
();
auto
err
=
ptr
->
comp_node
().
check_async_error
();
mgb_assert
(
!
err
,
"%s"
,
err
->
what
());
}
if
(
wait_regen
)
{
...
...
@@ -1119,7 +1133,7 @@ TensorPtr ChannelImpl::wait_tensor(TensorInfo* info, TensorProp prop) {
}
lock
.
lock
();
}
return
info
->
ptr
;
return
ptr
;
}
void
ChannelImpl
::
notify_tensor_unsafe
(
TensorInfo
*
info
)
{
...
...
@@ -1556,11 +1570,10 @@ void ChannelImpl::DynamicSublinear::pin(const SmallVector<TensorInfo*>& vec) {
}
void
ChannelImpl
::
DynamicSublinear
::
unpin
(
const
SmallVector
<
TensorInfo
*>&
vec
,
WorkerState
&
stat
e
)
{
const
SmallVector
<
TensorInfo
*>&
vec
,
size_t
&
dtr_evictee_minimum_siz
e
)
{
for
(
auto
i
:
vec
)
{
i
->
unpin
();
if
(
i
->
pinned
==
0
&&
i
->
size_exceeds_thd
(
state
.
options
.
dtr_evictee_minimum_size
)
&&
if
(
i
->
pinned
==
0
&&
i
->
size_exceeds_thd
(
dtr_evictee_minimum_size
)
&&
i
->
cand_index
==
UINT_MAX
)
{
insert_candidate
(
i
);
}
...
...
imperative/src/impl/interpreter/interpreter_impl.h
浏览文件 @
a92aea1f
...
...
@@ -265,7 +265,8 @@ private:
/*!
* \brief unpin the tensors in vec
*/
void
unpin
(
const
SmallVector
<
TensorInfo
*>&
vec
,
WorkerState
&
state
);
void
unpin
(
const
SmallVector
<
TensorInfo
*>&
vec
,
size_t
&
dtr_evictee_minimum_size
);
/*!
* \brief add the tensor to the candidate set
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录