Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
c49427d1
MegEngine
项目概览
MegEngine 天元
/
MegEngine
1 年多 前同步成功
通知
403
Star
4705
Fork
582
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
c49427d1
编写于
12月 02, 2020
作者:
M
Megvii Engine Team
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
feat(imperative): add inplace add_update option in optimizer
GitOrigin-RevId: b8feb493218e4a8e62cf5d8f14b13cfa12461dc0
上级
aeb7980b
变更
23
显示空白变更内容
内联
并排
Showing
23 changed file
with
337 addition
and
105 deletion
+337
-105
imperative/python/megengine/functional/inplace.py
imperative/python/megengine/functional/inplace.py
+15
-0
imperative/python/megengine/jit/tracing.py
imperative/python/megengine/jit/tracing.py
+4
-2
imperative/python/megengine/optimizer/adam.py
imperative/python/megengine/optimizer/adam.py
+46
-15
imperative/python/megengine/optimizer/optimizer.py
imperative/python/megengine/optimizer/optimizer.py
+2
-1
imperative/python/megengine/optimizer/sgd.py
imperative/python/megengine/optimizer/sgd.py
+21
-2
imperative/python/megengine/tensor.py
imperative/python/megengine/tensor.py
+2
-2
imperative/python/src/grad_override.cpp
imperative/python/src/grad_override.cpp
+2
-2
imperative/python/src/tensor.cpp
imperative/python/src/tensor.cpp
+6
-5
imperative/python/test/integration/test_sgd_momentum.py
imperative/python/test/integration/test_sgd_momentum.py
+14
-4
imperative/python/test/unit/test_tracing.py
imperative/python/test/unit/test_tracing.py
+0
-1
imperative/src/impl/dnn_op_helper.h
imperative/src/impl/dnn_op_helper.h
+39
-5
imperative/src/impl/interpreter_impl.cpp
imperative/src/impl/interpreter_impl.cpp
+4
-3
imperative/src/impl/interpreter_impl.h
imperative/src/impl/interpreter_impl.h
+3
-2
imperative/src/impl/ops/cond_take.cpp
imperative/src/impl/ops/cond_take.cpp
+1
-39
imperative/src/impl/ops/elemwise.cpp
imperative/src/impl/ops/elemwise.cpp
+133
-0
imperative/src/impl/proxy_graph_detail.cpp
imperative/src/impl/proxy_graph_detail.cpp
+14
-14
imperative/src/include/megbrain/imperative/interpreter.h
imperative/src/include/megbrain/imperative/interpreter.h
+1
-1
imperative/src/include/megbrain/imperative/physical_tensor.h
imperative/src/include/megbrain/imperative/physical_tensor.h
+4
-0
imperative/src/include/megbrain/imperative/proxy_graph_detail.h
...tive/src/include/megbrain/imperative/proxy_graph_detail.h
+4
-0
src/core/include/megbrain/ir/ops.td
src/core/include/megbrain/ir/ops.td
+2
-0
src/opr/impl/basic_arith.cpp
src/opr/impl/basic_arith.cpp
+2
-5
src/opr/impl/internal/identical_fwd.cpp
src/opr/impl/internal/identical_fwd.cpp
+16
-0
src/opr/include/megbrain/opr/internal/identical_fwd.h
src/opr/include/megbrain/opr/internal/identical_fwd.h
+2
-2
未找到文件。
imperative/python/megengine/functional/inplace.py
0 → 100644
浏览文件 @
c49427d1
# -*- coding: utf-8 -*-
# MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
#
# Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
from
..core._imperative_rt.core2
import
apply
from
..core.ops
import
builtin
from
..core.ops.builtin
import
InplaceAdd
def
_inplace_add_
(
dest
,
delta
,
alpha
,
beta
):
return
dest
.
_reset
(
apply
(
InplaceAdd
(),
dest
,
delta
,
alpha
,
beta
)[
0
])
imperative/python/megengine/jit/tracing.py
浏览文件 @
c49427d1
...
...
@@ -502,6 +502,8 @@ class trace:
# profile
if
self
.
_profiling
:
self
.
_profiler
=
GraphProfiler
(
graph
)
if
int
(
os
.
getenv
(
"MEGENGINE_INPLACE_UPDATE"
,
"0"
)):
graph
.
options
.
var_sanity_check_first_run
=
False
def
_compile
(
self
):
graph
=
self
.
_graph
=
G
.
Graph
()
...
...
@@ -1073,7 +1075,7 @@ def apply_compiled_mode(op: OpDef, *args: RawTensor):
return
active_trace
.
_apply_op
(
op
,
args
)
def
apply_const_compiled_mode
(
value
,
dtype
,
device
,
is_const
):
def
apply_const_compiled_mode
(
value
,
dtype
,
device
,
is_const
,
no_cache
):
if
skip_tracing
:
args
=
[
RawTensor
(
x
.
_dev_tensor
())
if
x
.
__class__
is
CompiledTensorProxy
else
x
...
...
@@ -1099,7 +1101,7 @@ def apply_with_tracing(op: OpDef, *args: RawTensor):
return
list
(
outputs
)
def
apply_const_with_tracing
(
value
,
dtype
,
device
,
is_const
):
def
apply_const_with_tracing
(
value
,
dtype
,
device
,
is_const
,
no_cache
):
if
active_trace
.
_symbolic
:
outputs
=
apply_const_symbolic_mode
(
value
,
dtype
,
device
)
else
:
...
...
imperative/python/megengine/optimizer/adam.py
浏览文件 @
c49427d1
...
...
@@ -6,8 +6,10 @@
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
import
os
from
typing
import
Iterable
,
Tuple
,
Union
from
..functional.inplace
import
_inplace_add_
from
..tensor
import
Parameter
,
tensor
from
.optimizer
import
Optimizer
...
...
@@ -58,15 +60,24 @@ class Adam(Optimizer):
eps
=
param_group
[
"eps"
]
beta0
,
beta1
=
param_group
[
"betas"
]
def
make_scalar
(
val
):
return
tensor
([
val
])
# since `conver_inputs` is disabled for param updates,
# scalar should be explicitly tansforred to tensor
_lr
=
tensor
([
lr
])
_weight_decay
=
tensor
([
weight_decay
])
_eps
=
tensor
([
eps
])
_beta0
,
_beta1
=
tensor
([
beta0
]),
tensor
([
beta1
])
c1
=
tensor
([
1.0
])
c05
=
tensor
([
0.5
])
_lr
,
_neg_lr
=
map
(
make_scalar
,
(
lr
,
-
lr
))
_weight_decay
=
make_scalar
(
weight_decay
)
_eps
=
make_scalar
(
eps
)
_beta0
,
_beta1
=
map
(
make_scalar
,
(
beta0
,
beta1
))
c1
,
c05
=
map
(
make_scalar
,
(
1.0
,
0.5
))
inplace_mode
=
int
(
os
.
getenv
(
"MEGENGINE_INPLACE_UPDATE"
,
"0"
))
if
inplace_mode
:
# reduce device sync
c1_sub_beta0
,
c1_sub_beta1
=
map
(
make_scalar
,
(
1
-
beta0
,
1
-
beta1
))
for
param
in
param_group
[
"params"
]:
if
param
.
grad
is
None
:
...
...
@@ -77,18 +88,38 @@ class Adam(Optimizer):
grad
+=
param
*
_weight_decay
states
=
self
.
_state
[
param
]
step
=
states
[
"step"
]
step
,
exp_avg
,
exp_avg_sq
=
(
states
[
"step"
],
states
[
"exp_avg"
],
states
[
"exp_avg_sq"
],
)
if
inplace_mode
:
_inplace_add_
(
step
,
c1
,
alpha
=
c1
,
beta
=
c1
)
_inplace_add_
(
exp_avg
,
grad
,
alpha
=
_beta0
,
beta
=
c1_sub_beta0
)
_inplace_add_
(
exp_avg_sq
,
grad
*
grad
,
alpha
=
_beta1
,
beta
=
c1_sub_beta1
,
)
delta
=
(
exp_avg
/
(
c1
-
_beta0
**
step
))
/
(
(
exp_avg_sq
/
(
c1
-
_beta1
**
step
))
**
c05
+
_eps
)
_inplace_add_
(
param
,
delta
,
alpha
=
c1
,
beta
=
_neg_lr
)
continue
# step = step + c1
step
+=
c1
exp_avg
=
states
[
"exp_avg"
]
exp_avg_sq
=
states
[
"exp_avg_sq"
]
exp_avg
=
_beta0
*
exp_avg
+
grad
*
(
c1
-
_beta0
)
exp_avg_sq
=
_beta1
*
exp_avg_sq
+
(
c1
-
_beta1
)
*
(
grad
*
grad
)
# exp_avg = _beta0 * exp_avg + grad * (c1 - _beta0)
exp_avg
*=
_beta0
exp_avg
+=
grad
*
(
c1
-
_beta0
)
# exp_avg_sq = _beta1 * exp_avg_sq + (c1 - _beta1) * (grad * grad)
exp_avg_sq
*=
_beta1
exp_avg_sq
+=
(
c1
-
_beta1
)
*
(
grad
*
grad
)
delta
=
(
exp_avg
/
(
c1
-
_beta0
**
step
))
/
(
(
exp_avg_sq
/
(
c1
-
_beta1
**
step
))
**
c05
+
_eps
)
param
-=
_lr
*
delta
# not inplace change, need to update underlying tensor handler in state
states
[
"exp_avg"
].
_reset
(
exp_avg
)
states
[
"exp_avg_sq"
].
_reset
(
exp_avg_sq
)
imperative/python/megengine/optimizer/optimizer.py
浏览文件 @
c49427d1
...
...
@@ -96,6 +96,7 @@ class Optimizer(metaclass=ABCMeta):
"optimizer can only optimize Parameters, but one of the params is "
+
str
(
type
(
param
))
)
param
.
_reset
(
Tensor
(
param
.
numpy
(),
no_cache
=
True
))
for
name
,
default
in
self
.
_defaults
.
items
():
if
default
is
required
and
name
not
in
param_group
:
...
...
@@ -121,7 +122,7 @@ class Optimizer(metaclass=ABCMeta):
initializer
=
np
.
zeros
(
param
.
shape
,
dtype
=
np
.
float32
)
state_dict
=
self
.
_state
.
setdefault
(
param
,
{})
assert
state_name
not
in
state_dict
state
=
Tensor
(
initializer
)
state
=
Tensor
(
initializer
,
no_cache
=
True
)
state_dict
[
state_name
]
=
state
@
abstractmethod
...
...
imperative/python/megengine/optimizer/sgd.py
浏览文件 @
c49427d1
...
...
@@ -6,8 +6,10 @@
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
import
os
from
typing
import
Iterable
,
Union
from
..functional.inplace
import
_inplace_add_
from
..tensor
import
Parameter
,
tensor
from
.optimizer
import
Optimizer
...
...
@@ -54,10 +56,16 @@ class SGD(Optimizer):
# since `conver_inputs` is disabled for param updates,
# scalar should be explicitly tansforred to tensor
_lr
=
tensor
([
lr
])
_weight_decay
=
tensor
([
weight_decay
])
_momentum
=
tensor
([
momentum
])
inplace_mode
=
int
(
os
.
getenv
(
"MEGENGINE_INPLACE_UPDATE"
,
"0"
))
if
inplace_mode
:
_neg_lr
=
tensor
([
-
lr
])
c1
=
tensor
([
1.0
])
for
param
in
param_group
[
"params"
]:
if
param
.
grad
is
None
:
continue
...
...
@@ -66,10 +74,21 @@ class SGD(Optimizer):
if
weight_decay
!=
0.0
:
grad
+=
param
*
_weight_decay
if
inplace_mode
:
if
momentum
:
v
=
self
.
_state
[
param
][
"momentum_buffer"
]
_inplace_add_
(
v
,
grad
,
alpha
=
_momentum
,
beta
=
c1
)
_inplace_add_
(
param
,
v
,
alpha
=
c1
,
beta
=
_neg_lr
)
else
:
_inplace_add_
(
param
,
grad
,
alpha
=
c1
,
beta
=
_neg_lr
)
continue
if
momentum
:
v
=
self
.
_state
[
param
][
"momentum_buffer"
]
v
=
_momentum
*
v
+
grad
# v = v * _momentum + grad
v
*=
_momentum
v
+=
grad
param
-=
_lr
*
v
self
.
_state
[
param
][
"momentum_buffer"
].
_reset
(
v
)
else
:
param
-=
_lr
*
grad
imperative/python/megengine/tensor.py
浏览文件 @
c49427d1
...
...
@@ -28,7 +28,7 @@ class Tensor(_Tensor, ArrayMethodMixin):
dmap_callback
=
None
q_dict
=
{
"mode"
:
None
,
"scale"
:
None
,
"zero_point"
:
None
}
def
__new__
(
cls
,
data
,
dtype
=
None
,
device
=
None
,
is_const
=
False
):
def
__new__
(
cls
,
data
,
dtype
=
None
,
device
=
None
,
is_const
=
False
,
no_cache
=
False
):
if
device
is
None
:
cn
=
get_default_device
()
elif
isinstance
(
device
,
str
):
...
...
@@ -49,7 +49,7 @@ class Tensor(_Tensor, ArrayMethodMixin):
if
0
in
data
.
strides
:
data
=
data
.
squeeze
().
reshape
(
data
.
shape
)
obj
=
_Tensor
.
__new__
(
cls
,
data
,
dtype
,
cn
,
is_const
)
obj
=
_Tensor
.
__new__
(
cls
,
data
,
dtype
,
cn
,
is_const
,
no_cache
)
return
obj
@
property
...
...
imperative/python/src/grad_override.cpp
浏览文件 @
c49427d1
...
...
@@ -38,9 +38,9 @@ std::shared_ptr<Tensor> broadcast_to(Tensor* x, Tensor* s) {
std
::
shared_ptr
<
Tensor
>
make_tensor
(
CompNode
cn
,
Tensor
*
shape
,
float
v
=
0
)
{
HostTensorND
scalar
{
cn
,
{{
1
},
dtype
::
Float32
()}};
scalar
.
ptr
<
float
>
()[
0
]
=
v
;
interpreter
::
Interpreter
::
Handle
handle
=
interpreter_for_py
->
put
(
scalar
);
interpreter
::
Interpreter
::
Handle
handle
=
interpreter_for_py
->
put
(
scalar
,
false
);
auto
&&
t
=
std
::
make_shared
<
Tensor
>
(
handle
);
auto
&&
res
=
broadcast_to
(
t
.
get
(),
shape
);
auto
res
=
broadcast_to
(
t
.
get
(),
shape
);
return
res
;
}
...
...
imperative/python/src/tensor.cpp
浏览文件 @
c49427d1
...
...
@@ -231,13 +231,14 @@ TensorWrapper::TensorWrapper(PyObject* args, PyObject* kwargs) {
}
}
else
{
py
::
detail
::
loader_life_support
life_sup
;
// FIXME!!!required to cast DType
if
(
nargs
!=
4
&&
nargs
!=
5
)
{
throw
py
::
type_error
(
"expect 4 or 5 arguments"
);
}
auto
data
=
tup
[
0
].
cast
<
py
::
array
>
();
DType
dtype
=
tup
[
1
].
cast
<
DType
>
();
CompNode
cn
=
tup
[
2
].
cast
<
CompNode
>
();
bool
is_const
=
tup
[
3
].
cast
<
bool
>
();
if
(
nargs
!=
4
)
{
throw
py
::
type_error
(
"expect 3 arguments"
);
}
bool
no_cache
=
nargs
==
5
?
tup
[
4
].
cast
<
bool
>
()
:
false
;
// const op
if
(
is_const
&&
is_tracing
)
{
...
...
@@ -259,10 +260,10 @@ TensorWrapper::TensorWrapper(PyObject* args, PyObject* kwargs) {
interpreter
::
Interpreter
::
Handle
handle
;
constexpr
auto
size_threshhold
=
TensorShape
::
MAX_NDIM
;
if
(
data
.
size
()
>
size_threshhold
)
{
handle
=
interpreter_for_py
->
put
(
npy
::
np2tensor
(
data
.
ptr
(),
npy
::
Meth
::
borrow
(
cn
),
dtype
));
handle
=
interpreter_for_py
->
put
(
npy
::
np2tensor
(
data
.
ptr
(),
npy
::
Meth
::
borrow
(
cn
),
dtype
)
,
no_cache
);
}
else
{
HostTensorND
ret
(
cn
);
handle
=
interpreter_for_py
->
put
(
npy
::
np2tensor
(
data
.
ptr
(),
npy
::
Meth
::
copy_into
(
&
ret
),
dtype
));
handle
=
interpreter_for_py
->
put
(
npy
::
np2tensor
(
data
.
ptr
(),
npy
::
Meth
::
copy_into
(
&
ret
),
dtype
)
,
no_cache
);
}
m_tensor
=
std
::
make_shared
<
Tensor
>
(
handle
);
...
...
imperative/python/test/integration/test_sgd_momentum.py
浏览文件 @
c49427d1
...
...
@@ -6,6 +6,9 @@
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
import
itertools
import
os
import
numpy
as
np
import
megengine
...
...
@@ -58,13 +61,16 @@ def test_sgd_momentum():
np
.
testing
.
assert_almost_equal
(
loss
.
numpy
(),
2.34
*
(
1.23
-
2.34
),
5
)
np
.
testing
.
assert_almost_equal
(
optim
.
_state
[
net
.
a
][
"momentum_buffer"
].
numpy
(),
0.9
*
2.34
+
2.34
optim
.
_state
[
net
.
a
][
"momentum_buffer"
].
numpy
(),
0.9
*
2.34
+
2.34
,
5
)
def
test_sgd_momentum_trace
():
for
symbolic
in
(
True
,
False
):
origin_inplace
=
os
.
getenv
(
"MEGENGINE_INPLACE_UPDATE"
)
symbolic
=
(
True
,
False
)
inplace
=
(
0
,
1
)
for
symbolic
,
inplace
in
itertools
.
product
(
symbolic
,
inplace
):
os
.
environ
[
"MEGENGINE_INPLACE_UPDATE"
]
=
str
(
inplace
)
@
trace
(
symbolic
=
symbolic
)
def
train_func
(
data
,
*
,
model
=
None
,
optim
=
None
,
gm
=
None
):
...
...
@@ -101,5 +107,9 @@ def test_sgd_momentum_trace():
train_func
(
data
,
model
=
net
,
optim
=
optim
,
gm
=
gm
)
np
.
testing
.
assert_almost_equal
(
loss
.
numpy
(),
2.34
*
(
1.23
-
2.34
),
5
)
np
.
testing
.
assert_almost_equal
(
optim
.
_state
[
net
.
a
][
"momentum_buffer"
].
numpy
(),
0.9
*
2.34
+
2.34
optim
.
_state
[
net
.
a
][
"momentum_buffer"
].
numpy
(),
0.9
*
2.34
+
2.34
,
5
)
if
origin_inplace
:
os
.
environ
[
"MEGENGINE_INPLACE_UPDATE"
]
=
origin_inplace
else
:
del
os
.
environ
[
"MEGENGINE_INPLACE_UPDATE"
]
imperative/python/test/unit/test_tracing.py
浏览文件 @
c49427d1
...
...
@@ -325,7 +325,6 @@ def test_raise_on_trace():
@
trace
def
add_abc
(
a
,
b
,
c
):
print
(
"Hello"
)
ps
=
a
+
b
result
=
ps
+
c
if
step_count
==
bad_step
:
...
...
imperative/src/impl/dnn_op_helper.h
浏览文件 @
c49427d1
...
...
@@ -11,6 +11,7 @@
#include "megbrain/comp_node_env.h"
#include "megbrain/comp_node.h"
#include "megbrain/imperative/physical_tensor.h"
using
namespace
megdnn
;
...
...
@@ -29,10 +30,12 @@ struct DnnOprCaller {
Workspace
workspace
;
std
::
unique_ptr
<
Opr
>
op
;
DnnOprCaller
(
CompNode
cn
)
:
cn
(
cn
)
{
DnnOprCaller
(
CompNode
cn
)
:
cn
(
cn
),
op
(
create_operator
(
cn
))
{}
static
std
::
unique_ptr
<
Opr
>
create_operator
(
CompNode
cn
)
{
auto
&&
handle
=
MegDNNHandle
::
get
(
CompNodeEnv
::
from_comp_node
(
cn
)).
handle
();
op
=
handle
->
create_operator
<
Opr
>
();
return
handle
->
create_operator
<
Opr
>
();
}
megdnn
::
Workspace
create_workspace
(
TensorLayout
layout
)
{
...
...
@@ -52,5 +55,36 @@ struct DnnOprCaller {
}
};
template
<
size_t
OSize
>
class
MegDNNDynOutMallocImpl
final
:
public
megdnn
::
DynOutMallocPolicy
{
using
Output
=
std
::
array
<
TensorPtr
,
OSize
>
;
CompNode
m_cn
;
Output
m_out
;
public:
MegDNNDynOutMallocImpl
(
CompNode
cn
)
:
m_cn
{
cn
}
{}
megdnn
::
TensorND
alloc_output
(
size_t
id
,
DType
dtype
,
const
TensorShape
&
shape
,
void
*
user_data
)
override
{
TensorLayout
m_layout
(
shape
,
dtype
);
m_out
[
id
]
=
Tensor
::
make
(
m_layout
,
m_cn
);
return
m_out
[
id
]
->
dev_tensor
().
as_megdnn
();
}
void
*
alloc_workspace
(
size_t
sz
,
void
*
user_data
)
override
{
return
m_cn
.
alloc_device
(
sz
);
}
void
free_workspace
(
void
*
ptr
,
void
*
user_data
)
override
{
m_cn
.
free_device
(
ptr
);
}
TensorPtr
at
(
size_t
id
)
{
return
m_out
[
id
];
}
};
}
// namespace imperative
}
// namespace mgb
imperative/src/impl/interpreter_impl.cpp
浏览文件 @
c49427d1
...
...
@@ -28,13 +28,13 @@ Interpreter& Interpreter::inst() {
return
inst_
;
}
void
*
ChannelImpl
::
put
(
const
HostTensorND
&
value
)
{
void
*
ChannelImpl
::
put
(
const
HostTensorND
&
value
,
bool
no_cache
)
{
auto
info
=
alloc
();
info
->
desc
.
layout
=
value
.
layout
();
info
->
desc
.
comp_node
=
value
.
comp_node
();
info
->
desc
.
value
=
value
.
proxy_to_default_cpu
();
m_valid_handle
.
insert
(
info
);
m_worker
.
add_task
(
Put
{
info
,
value
});
m_worker
.
add_task
(
Put
{
info
,
value
,
no_cache
});
return
info
;
}
...
...
@@ -395,7 +395,8 @@ void ChannelImpl::process_one_task(Command& cmd) {
using
T
=
std
::
remove_reference_t
<
decltype
(
cmd
)
>
;
try
{
if
constexpr
(
std
::
is_same_v
<
T
,
Put
>
)
{
produce_tensor
(
cmd
.
dest
,
Tensor
::
make
(
cmd
.
value
));
auto
value
=
cmd
.
no_cache
?
std
::
make_shared
<
Tensor
>
(
cmd
.
value
)
:
Tensor
::
make
(
cmd
.
value
);
produce_tensor
(
cmd
.
dest
,
std
::
move
(
value
));
}
else
if
constexpr
(
std
::
is_same_v
<
T
,
ApplyOp
>
)
{
SmallVector
<
TensorPtr
>
tensor_inputs
;
tensor_inputs
.
reserve
(
cmd
.
inputs
.
size
());
...
...
imperative/src/impl/interpreter_impl.h
浏览文件 @
c49427d1
...
...
@@ -57,6 +57,7 @@ struct TensorInfo {
struct
Put
{
TensorInfo
*
dest
;
HostTensorND
value
;
bool
no_cache
=
false
;
};
struct
ApplyOp
{
std
::
shared_ptr
<
OpDef
>
op
;
...
...
@@ -92,7 +93,7 @@ struct ChannelImpl : Interpreter::Channel {
ChannelImpl
()
:
m_worker
(
this
)
{}
~
ChannelImpl
()
override
;
Handle
put
(
const
HostTensorND
&
value
)
override
;
Handle
put
(
const
HostTensorND
&
value
,
bool
no_cache
)
override
;
Handle
put
(
const
DeviceTensorND
&
value
)
override
;
void
del
(
Handle
)
override
;
...
...
imperative/src/impl/ops/cond_take.cpp
浏览文件 @
c49427d1
...
...
@@ -20,44 +20,6 @@ namespace mgb::imperative {
namespace
{
class
MegDNNDynOutMallocImpl
final
:
public
megdnn
::
DynOutMallocPolicy
{
using
Output
=
std
::
array
<
TensorPtr
,
2
>
;
CompNode
m_cn
;
Output
m_out
;
public:
MegDNNDynOutMallocImpl
(
CompNode
cn
)
:
m_cn
{
cn
}
{}
megdnn
::
TensorND
alloc_output
(
size_t
id
,
DType
dtype
,
const
TensorShape
&
shape
,
void
*
user_data
)
override
;
void
*
alloc_workspace
(
size_t
sz
,
void
*
user_data
)
override
;
void
free_workspace
(
void
*
ptr
,
void
*
user_data
)
override
;
TensorPtr
at
(
size_t
id
);
};
megdnn
::
TensorND
MegDNNDynOutMallocImpl
::
alloc_output
(
size_t
id
,
DType
dtype
,
const
TensorShape
&
shape
,
void
*
/*user_data*/
)
{
TensorLayout
m_layout
(
shape
,
dtype
);
m_out
[
id
]
=
Tensor
::
make
(
m_layout
,
m_cn
);
return
m_out
[
id
]
->
dev_tensor
().
as_megdnn
();
}
void
*
MegDNNDynOutMallocImpl
::
alloc_workspace
(
size_t
sz
,
void
*
/*user_data*/
)
{
return
m_cn
.
alloc_device
(
sz
);
}
void
MegDNNDynOutMallocImpl
::
free_workspace
(
void
*
ptr
,
void
*
/*user_data*/
)
{
m_cn
.
free_device
(
ptr
);
}
TensorPtr
MegDNNDynOutMallocImpl
::
at
(
size_t
id
)
{
return
m_out
[
id
];
}
cg
::
OperatorNodeBase
*
apply_on_var_node
(
const
OpDef
&
def
,
const
VarNodeArray
&
inputs
)
{
...
...
@@ -94,7 +56,7 @@ SmallVector<TensorPtr> apply_on_physical_tensor(
dtype
::
Byte
());
auto
dnn_workspace
=
dnn_op
.
create_workspace
(
m_layout
);
MegDNNDynOutMallocImpl
policy
{
inp
->
comp_node
()};
MegDNNDynOutMallocImpl
<
2
>
policy
{
inp
->
comp_node
()};
dnn_op
.
op
->
exec
(
inp
->
dev_tensor
().
as_megdnn
(),
msk
->
dev_tensor
().
as_megdnn
(),
...
...
imperative/src/impl/ops/elemwise.cpp
浏览文件 @
c49427d1
...
...
@@ -11,8 +11,11 @@
#include "megbrain/imperative/ops/autogen.h"
#include "megbrain/opr/basic_arith.h"
#include "megbrain/imperative/opr_utility.h"
#include "megbrain/opr/utility.h"
#include "../op_trait.h"
#include "../dnn_op_helper.h"
namespace
mgb
{
namespace
imperative
{
...
...
@@ -84,12 +87,142 @@ SmallVector<TensorPtr> apply_on_physical_tensor(
return
{
Tensor
::
make
(
out
)};
}
MGB_DEFINE_OPR_CLASS
(
ForceInplaceElemwise
,
cg
::
SingleCNOperatorNodeBaseT
<
opr
::
mixin
::
MegDNNOprHolder
>
)
//{
public:
struct
Param
{
using
Mode
=
megdnn
::
Elemwise
::
Param
::
Mode
;
Mode
mode
;
size_t
inplace_index
;
};
using
Mode
=
Param
::
Mode
;
ForceInplaceElemwise
(
const
VarNodeArray
&
inputs
,
Param
param
,
OperatorNodeConfig
config
=
{})
:
Super
(
inputs
[
0
]
->
owner_graph
(),
config
,
"device_add_update"
,
inputs
),
m_param
{
param
}
{
for
(
auto
*
input
:
inputs
)
{
add_input
({
input
});
}
add_output
(
None
)
->
set_fwd_in2out_writable_force
(
input
(
param
.
inplace_index
)).
add_flag
(
VarNode
::
Flag
::
NO_MEM_RECLAIM
);
}
static
SymbolVar
make
(
const
VarNodeArray
&
inputs
,
Param
param
)
{
return
SymbolVar
{
inputs
[
0
]}.
insert_single_output_opr
<
ForceInplaceElemwise
>
(
inputs
,
param
);
}
static
cg
::
OperatorNodeBase
*
shallow_copy
(
const
serialization
::
OprShallowCopyContext
&
ctx
,
const
cg
::
OperatorNodeBase
&
opr_
,
const
VarNodeArray
&
inputs
,
const
OperatorNodeConfig
&
config
);
protected:
NodeProp
*
do_make_node_prop
()
const
override
{
auto
ret
=
Super
::
do_make_node_prop
();
ret
->
add_flag
(
NodeProp
::
Flag
::
FORCE_UPDATE_INPUT_VAR
);
return
ret
;
}
void
create_megdnn_opr
()
override
{
auto
opr
=
DnnOprCaller
<
megdnn
::
Elemwise
>::
create_operator
(
comp_node
());
opr
->
param
().
mode
=
m_param
.
mode
;
set_megdnn_opr
(
std
::
move
(
opr
));
}
void
scn_do_execute
()
override
{
auto
to_dnnnd
=
[
&
](
auto
*
var
){
return
var
->
dev_tensor
().
as_megdnn
();
};
megdnn
::
TensorNDArray
inputs_dnnnd
;
for
(
auto
*
input
:
input
())
{
inputs_dnnnd
.
push_back
(
to_dnnnd
(
input
));
}
mgb_assert
(
input
(
m_param
.
inplace_index
)
->
contain_flag
(
VarNode
::
Flag
::
NO_SYS_MEM_ALLOC
),
"ForceInplaceElemwise cannot be applied in internal tensor"
);
auto
*
out_dest
=
output
(
0
);
auto
*
opr
=
static_cast
<
megdnn
::
Elemwise
*>
(
megdnn_opr
());
opr
->
exec
(
std
::
move
(
inputs_dnnnd
),
to_dnnnd
(
out_dest
));
}
void
init_output_static_infer_desc
()
override
{
using
namespace
cg
::
static_infer
;
owner_graph
()
->
static_infer_manager
().
register_shape_infer
(
output
(
0
),
ShapeInferDesc
::
make_identity
(
input
(
m_param
.
inplace_index
)));
}
private:
Param
m_param
;
void
record_execute_deps
(
ExecDependencyArray
&
deps
)
override
{
record_megdnn_opr
(
deps
);
}
};
MGB_DYN_TYPE_OBJ_FINAL_IMPL
(
ForceInplaceElemwise
);
cg
::
OperatorNodeBase
*
ForceInplaceElemwise
::
shallow_copy
(
const
serialization
::
OprShallowCopyContext
&
ctx
,
const
cg
::
OperatorNodeBase
&
opr_
,
const
VarNodeArray
&
inputs
,
const
OperatorNodeConfig
&
config
)
{
auto
&&
opr
=
opr_
.
cast_final_safe
<
ForceInplaceElemwise
>
();
auto
*
graph
=
ctx
.
owner_graph
(
opr
,
inputs
);
return
graph
->
insert_opr
(
std
::
make_unique
<
ForceInplaceElemwise
>
(
inputs
,
opr
.
m_param
,
config
));
}
MGB_REG_OPR_SHALLOW_COPY
(
ForceInplaceElemwise
,
ForceInplaceElemwise
::
shallow_copy
);
cg
::
OperatorNodeBase
*
apply_inplace_add_on_var_node
(
const
OpDef
&
def
,
const
VarNodeArray
&
inputs
)
{
auto
dest
=
inputs
[
0
],
delta
=
inputs
[
1
],
alpha
=
inputs
[
2
],
beta
=
inputs
[
3
];
auto
mode
=
ForceInplaceElemwise
::
Param
::
Mode
::
FUSE_MUL_ADD4
;
return
ForceInplaceElemwise
::
make
({
alpha
,
dest
,
beta
,
delta
},
{
mode
,
1
}).
node
()
->
owner_opr
();
}
SmallVector
<
TensorPtr
>
apply_inplace_add_on_physical_tensor
(
const
OpDef
&
def
,
const
SmallVector
<
TensorPtr
>&
inputs
){
auto
dest
=
inputs
[
0
],
delta
=
inputs
[
1
],
alpha
=
inputs
[
2
],
beta
=
inputs
[
3
];
auto
tensor_to_scalar
=
[](
const
TensorPtr
&
tensor
)
->
float
{
return
*
tensor
->
get_value
().
ptr
<
float
>
();
};
DnnOprCaller
<
megdnn
::
AddUpdate
>
caller
{
dest
->
comp_node
()};
caller
.
op
->
param
()
=
{
tensor_to_scalar
(
alpha
),
tensor_to_scalar
(
beta
)
};
caller
.
op
->
exec
(
dest
->
dev_tensor
().
as_megdnn
(),
delta
->
dev_tensor
().
as_megdnn
());
return
{
std
::
make_shared
<
Tensor
>
(
dest
->
blob
(),
dest
->
offset
(),
dest
->
layout
())
};
}
std
::
tuple
<
SmallVector
<
LogicalTensorDesc
>
,
bool
>
infer_inplace_add_output_attrs_fallible
(
const
OpDef
&
def
,
const
SmallVector
<
LogicalTensorDesc
>&
inputs
)
{
mgb_assert
(
inputs
.
size
()
==
4
,
"invalid input number for inplace_add"
);
CompNode
cn
;
for
(
auto
&&
input
:
inputs
)
{
if
(
!
cn
.
valid
())
{
cn
=
input
.
comp_node
;
}
else
{
mgb_assert
(
input
.
comp_node
==
cn
,
"inputs should be in same comp_node"
);
}
}
auto
dest
=
inputs
[
0
],
delta
=
inputs
[
1
],
alpha
=
inputs
[
2
],
beta
=
inputs
[
3
];
bool
succeed
=
dest
.
layout
.
ndim
!=
0
;
if
(
succeed
)
{
mgb_assert
(
delta
.
layout
.
ndim
==
0
||
dest
.
layout
.
eq_shape
(
delta
.
layout
),
"dest and delta must have same shape"
);
mgb_assert
(
alpha
.
layout
.
ndim
==
0
||
alpha
.
layout
.
eq_shape
({
1
}),
"alpha should be scalar"
);
mgb_assert
(
beta
.
layout
.
ndim
==
0
||
beta
.
layout
.
eq_shape
({
1
}),
"beta should be scalar"
);
}
mgb_assert
(
alpha
.
layout
.
dtype
==
dtype
::
Float32
(),
"alpha should be float32"
);
mgb_assert
(
beta
.
layout
.
dtype
==
dtype
::
Float32
(),
"beta should be float32"
);
return
{{
dest
},
succeed
};
}
OP_TRAIT_REG
(
Elemwise
,
Elemwise
,
opr
::
Elemwise
)
.
make_from_op_node
(
make_from_op_node
)
.
apply_on_var_node
(
apply_on_var_node
)
.
infer_output_attrs_fallible
(
infer_output_attrs_fallible
)
.
apply_on_physical_tensor
(
apply_on_physical_tensor
)
.
fallback
();
OP_TRAIT_REG
(
InplaceAdd
,
InplaceAdd
,
opr
::
AddUpdate
)
.
apply_on_var_node
(
apply_inplace_add_on_var_node
)
.
apply_on_physical_tensor
(
apply_inplace_add_on_physical_tensor
)
.
infer_output_attrs_fallible
(
infer_inplace_add_output_attrs_fallible
)
.
fallback
();
}
// anonymous namespace
}
// namespace imperative
...
...
imperative/src/impl/proxy_graph_detail.cpp
浏览文件 @
c49427d1
...
...
@@ -32,14 +32,22 @@ SmallVector<Tensor*> to_raw_ptr_array(
return
ret
;
}
SmallVector
<
LogicalTensorDesc
>
infer_output_attrs
(
const
OpDef
&
def
,
const
SmallVector
<
TensorPtr
>&
inputs
)
{
auto
&&
graph
=
ProxyGraph
::
get_default_graph
();
return
graph
->
infer_output_attrs
(
def
,
to_raw_ptr_array
(
inputs
));
}
}
// anonymous namespace
void
exec
(
const
OpDef
&
def
,
const
SmallVector
<
TensorPtr
>&
inputs
_
,
const
SmallVector
<
TensorPtr
>&
outputs
_
)
{
const
SmallVector
<
TensorPtr
>&
inputs
,
const
SmallVector
<
TensorPtr
>&
outputs
)
{
auto
&&
graph
=
ProxyGraph
::
get_default_graph
();
auto
inputs
=
to_raw_ptr_array
(
inputs_
),
outputs
=
to_raw_ptr_array
(
outputs_
);
auto
raw_inputs
=
to_raw_ptr_array
(
inputs
),
raw_outputs
=
to_raw_ptr_array
(
outputs
);
CompNode
::
UnorderedSet
used_cns
;
for
(
auto
&&
out
:
outputs
)
{
for
(
auto
&&
out
:
raw_
outputs
)
{
auto
cn
=
out
->
comp_node
();
if
(
used_cns
.
insert
(
cn
).
second
)
{
for
(
auto
&&
in
:
inputs
)
{
...
...
@@ -50,7 +58,7 @@ void exec(const OpDef& def,
}
}
}
graph
->
invoke_op
(
def
,
inputs
,
outputs
);
graph
->
invoke_op
(
def
,
raw_inputs
,
raw_
outputs
);
for
(
auto
&&
cn
:
used_cns
)
{
for
(
auto
&&
in
:
inputs
)
{
if
(
in
->
comp_node
()
!=
cn
)
{
...
...
@@ -60,14 +68,6 @@ void exec(const OpDef& def,
}
}
SmallVector
<
LogicalTensorDesc
>
infer_output_attrs
(
const
OpDef
&
def
,
const
SmallVector
<
TensorPtr
>&
inputs
)
{
auto
&&
graph
=
ProxyGraph
::
get_default_graph
();
return
graph
->
infer_output_attrs
(
def
,
to_raw_ptr_array
(
inputs
));
}
}
// anonymous namespace
SmallVector
<
TensorPtr
>
apply_on_physical_tensor
(
const
OpDef
&
def
,
const
SmallVector
<
TensorPtr
>&
inputs
)
{
...
...
imperative/src/include/megbrain/imperative/interpreter.h
浏览文件 @
c49427d1
...
...
@@ -21,7 +21,7 @@ struct Interpreter {
struct
Channel
{
virtual
~
Channel
()
=
default
;
virtual
Handle
put
(
const
HostTensorND
&
value
)
=
0
;
virtual
Handle
put
(
const
HostTensorND
&
value
,
bool
no_cache
)
=
0
;
virtual
Handle
put
(
const
DeviceTensorND
&
value
)
=
0
;
virtual
void
del
(
Handle
)
=
0
;
...
...
imperative/src/include/megbrain/imperative/physical_tensor.h
浏览文件 @
c49427d1
...
...
@@ -101,6 +101,10 @@ public:
return
m_layout
;
}
size_t
offset
()
const
{
return
m_offset
;
}
DeviceTensorND
dev_tensor
();
static
TensorPtr
make_scalar
(
DTypeScalar
value
,
CompNode
cn
);
...
...
imperative/src/include/megbrain/imperative/proxy_graph_detail.h
浏览文件 @
c49427d1
...
...
@@ -24,6 +24,10 @@ apply_on_physical_tensor(const OpDef& def,
std
::
tuple
<
SmallVector
<
LogicalTensorDesc
>
,
bool
>
infer_output_attrs_fallible
(
const
OpDef
&
def
,
const
SmallVector
<
LogicalTensorDesc
>&
inputs
);
void
exec
(
const
OpDef
&
def
,
const
SmallVector
<
TensorPtr
>&
inputs
,
const
SmallVector
<
TensorPtr
>&
outputs
);
BackwardGraphResult
make_backward_graph
(
const
OpDef
&
def
,
const
SmallVector
<
LogicalTensorDesc
>&
inputs
,
...
...
src/core/include/megbrain/ir/ops.td
浏览文件 @
c49427d1
...
...
@@ -239,4 +239,6 @@ def ElemwiseMultiType: MgbHashableOp<"ElemwiseMultiType", [ElemwiseMultiTypePara
);
}
def InplaceAdd: MgbHashableOp<"InplaceAdd", [EmptyParam]>;
#endif // MGB_OPS
src/opr/impl/basic_arith.cpp
浏览文件 @
c49427d1
...
...
@@ -886,12 +886,9 @@ AddUpdate::AddUpdate(VarNode *dest, VarNode *delta,
m_param
{
param
}
{
auto
dest_opr
=
dest
->
owner_opr
();
mgb_throw_if
(
!
(
dest_opr
->
same_type
<
SharedDeviceTensor
>
()
||
dest_opr
->
same_type
<
VolatileSharedDeviceTensor
>
()),
mgb_throw_if
(
dest_opr
->
same_type
<
ImmutableTensor
>
(),
GraphError
,
"AddUpdate must be applied on SharedDeviceTensor; "
"got %s{%s} actually"
,
dest_opr
->
cname
(),
dest_opr
->
dyn_typeinfo
()
->
name
);
"AddUpdate cannot be applied on ImmutableTensor; "
);
add_input
({
dest
,
delta
});
/*
...
...
src/opr/impl/internal/identical_fwd.cpp
浏览文件 @
c49427d1
...
...
@@ -80,6 +80,22 @@ public:
MGB_TYPEINFO_OBJ_IMPL
(
ForwardInputToOutput
::
MutableSrc
);
void
ForwardInputToOutput
::
mixin_init_rt_force_dynamic_mem_alloc_imply_chain
(
OperatorNodeBase
&
opr
)
{
VarNode
*
valid_out
=
nullptr
;
for
(
auto
i
:
opr
.
output
())
{
if
(
!
i
->
contain_flag
(
VarNode
::
Flag
::
VOLATILE_CONTENT
))
{
mgb_assert
(
!
valid_out
);
valid_out
=
i
;
}
}
mgb_assert
(
valid_out
);
// There may be many inputs such as in opr::VirtualDep, but we only forward first one
opr
.
input
(
0
)
->
add_rt_force_dynamic_mem_alloc_imply_chain
(
valid_out
);
valid_out
->
add_rt_force_dynamic_mem_alloc_imply_chain
(
opr
.
input
(
0
));
}
void
ForwardInputToOutput
::
mixin_mem_plan_fwd_in2out_readonly
(
OperatorNodeBase
&
opr
)
{
m_mem_fwd_success
=
opr
.
output
(
0
)
->
set_fwd_in2out_readonly
(
...
...
src/opr/include/megbrain/opr/internal/identical_fwd.h
浏览文件 @
c49427d1
...
...
@@ -67,6 +67,7 @@ class ForwardInputToOutput: public cg::OperatorNodeMixinBase {
virtual
void
mixin_scn_do_execute
(
OperatorNodeBase
&
opr
);
void
mixin_init_rt_force_dynamic_mem_alloc_imply_chain
(
OperatorNodeBase
&
opr
);
void
mixin_mem_plan_fwd_in2out_readonly
(
OperatorNodeBase
&
opr
);
void
mixin_init_output_static_infer_desc
(
OperatorNodeBase
&
opr
);
virtual
cg
::
static_infer
::
ValueInferDesc
mixin_get_static_infer_desc
(
OperatorNodeBase
&
opr
);
...
...
@@ -173,8 +174,7 @@ MGB_DEFINE_CLS_WITH_SUPER(ForwardInputToOutput,
protected:
using
Super
::
Super
;
void
init_rt_force_dynamic_mem_alloc_imply_chain
()
override
{
mixin
::
init_rt_force_dynamic_mem_alloc_imply_chain_for_dyn_pass_i2o
(
*
this
);
this
->
mixin_init_rt_force_dynamic_mem_alloc_imply_chain
(
*
this
);
}
void
mem_plan_fwd_in2out_readonly
()
override
{
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录