Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
da91e650
MegEngine
项目概览
MegEngine 天元
/
MegEngine
1 年多 前同步成功
通知
404
Star
4705
Fork
582
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
da91e650
编写于
5月 24, 2022
作者:
M
Megvii Engine Team
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
refactor(ops/layer_norm): speed up the host speed of layer_norm
GitOrigin-RevId: 6f359b5b295f3d340947e0f6ea948c0fc1c19886
上级
67cfce9f
变更
6
显示空白变更内容
内联
并排
Showing
6 changed file
with
157 addition
and
34 deletion
+157
-34
dnn/include/megdnn/oprs/nn.h
dnn/include/megdnn/oprs/nn.h
+5
-0
dnn/src/common/layer_norm.cpp
dnn/src/common/layer_norm.cpp
+13
-6
imperative/python/megengine/functional/nn.py
imperative/python/megengine/functional/nn.py
+1
-3
imperative/src/impl/ops/layer_norm.cpp
imperative/src/impl/ops/layer_norm.cpp
+115
-0
imperative/src/impl/ops/specializations.cpp
imperative/src/impl/ops/specializations.cpp
+0
-25
imperative/src/impl/transformations/dtype_promote.cpp
imperative/src/impl/transformations/dtype_promote.cpp
+23
-0
未找到文件。
dnn/include/megdnn/oprs/nn.h
浏览文件 @
da91e650
...
...
@@ -1939,6 +1939,11 @@ class LayerNormBase : public OperatorBase {
DEF_OPR_IMPL_CTOR
(
LayerNormBase
,
OperatorBase
);
DEF_OPR_PARAM
(
LayerNorm
);
public:
MGE_WIN_DECLSPEC_FUC
static
void
deduce_layout_fwd_impl
(
const
TensorLayout
&
data
,
const
Param
&
p
,
TensorLayout
&
dst
,
TensorLayout
&
mean
,
TensorLayout
&
rstd
);
protected:
void
deduce_layout_fwd
(
const
TensorLayout
&
data
,
const
TensorLayout
&
weight
,
...
...
dnn/src/common/layer_norm.cpp
浏览文件 @
da91e650
...
...
@@ -4,12 +4,11 @@
namespace
megdnn
{
void
LayerNormBase
::
deduce_layout_fwd
(
const
TensorLayout
&
data
,
const
TensorLayout
&
weight
,
const
TensorLayout
&
bias
,
TensorLayout
&
dst
,
TensorLayout
&
mean
,
TensorLayout
&
rstd
)
{
MEGDNN_MARK_USED_VAR
(
weight
);
MEGDNN_MARK_USED_VAR
(
bias
);
auto
p
=
param
();
using
Param
=
LayerNormBase
::
Param
;
void
LayerNormBase
::
deduce_layout_fwd_impl
(
const
TensorLayout
&
data
,
const
Param
&
p
,
TensorLayout
&
dst
,
TensorLayout
&
mean
,
TensorLayout
&
rstd
)
{
TensorShape
unnormalized_shape
;
unnormalized_shape
.
ndim
=
data
.
ndim
-
p
.
normalized_dim
;
for
(
size_t
i
=
0
;
i
<
unnormalized_shape
.
ndim
;
++
i
)
{
...
...
@@ -22,6 +21,14 @@ void LayerNormBase::deduce_layout_fwd(
rstd
=
unnormalized_layout
;
}
void
LayerNormBase
::
deduce_layout_fwd
(
const
TensorLayout
&
data
,
const
TensorLayout
&
weight
,
const
TensorLayout
&
bias
,
TensorLayout
&
dst
,
TensorLayout
&
mean
,
TensorLayout
&
rstd
)
{
MEGDNN_MARK_USED_VAR
(
weight
);
MEGDNN_MARK_USED_VAR
(
bias
);
deduce_layout_fwd_impl
(
data
,
param
(),
dst
,
mean
,
rstd
);
}
void
LayerNormBase
::
check_layout_fwd
(
const
TensorLayout
&
data
,
const
TensorLayout
&
weight
,
const
TensorLayout
&
bias
,
const
TensorLayout
&
dst
,
const
TensorLayout
&
mean
,
const
TensorLayout
&
rstd
)
{
...
...
imperative/python/megengine/functional/nn.py
浏览文件 @
da91e650
...
...
@@ -63,6 +63,7 @@ __all__ = [
"hsigmoid"
,
"hswish"
,
"indexing_one_hot"
,
"layer_norm"
,
"leaky_relu"
,
"linear"
,
"local_conv2d"
,
...
...
@@ -1135,9 +1136,6 @@ def layer_norm(
bias: must not be None when the affine is true
eps: a value added to the denominator for numerical stability. Default: 1e-5
"""
if
amp
.
_enabled
:
inp
,
weight
,
bias
=
cast_tensors
(
inp
,
weight
,
bias
,
promote
=
True
)
if
isinstance
(
normalized_shape
,
int
):
normalized_shape
=
[
normalized_shape
]
...
...
imperative/src/impl/ops/layer_norm.cpp
0 → 100644
浏览文件 @
da91e650
#include "megbrain/opr/dnn/layer_norm.h"
#include "megbrain/imperative/ops/autogen.h"
#include "megbrain/opr/internal/megdnn_opr_wrapper.h"
#include "../blob_manager_impl.h"
#include "../dnn_op_helper.h"
#include "../op_trait.h"
namespace
mgb
::
imperative
{
namespace
layer_norm
{
cg
::
OperatorNodeBase
*
apply_on_var_node
(
const
OpDef
&
def
,
const
VarNodeArray
&
inputs
)
{
auto
&&
op
=
static_cast
<
const
LayerNorm
&>
(
def
);
size_t
nr_inp
=
inputs
.
size
();
auto
p
=
op
.
param
();
mgb_assert
((
nr_inp
==
3
&&
p
.
affine
)
||
(
nr_inp
==
1
&&
!
p
.
affine
));
OperatorNodeConfig
config
{
op
.
make_name
()};
if
(
nr_inp
==
3
)
{
return
opr
::
LayerNorm
::
make
(
inputs
[
0
],
inputs
[
1
],
inputs
[
2
],
op
.
param
(),
config
)[
0
]
.
node
()
->
owner_opr
();
}
else
{
return
opr
::
LayerNorm
::
make
(
inputs
[
0
],
op
.
param
(),
config
)[
0
]
.
node
()
->
owner_opr
();
}
}
std
::
tuple
<
SmallVector
<
LogicalTensorDesc
>
,
bool
>
infer_output_attrs_fallible
(
const
OpDef
&
def
,
const
SmallVector
<
LogicalTensorDesc
>&
inputs
)
{
auto
&&
op_def
=
def
.
cast_final_safe
<
LayerNorm
>
();
size_t
nr_inp
=
inputs
.
size
();
auto
p
=
op_def
.
param
();
mgb_assert
(
(
nr_inp
==
3
&&
p
.
affine
)
||
(
nr_inp
==
1
&&
!
p
.
affine
),
"num of inputs of pooling should be 1 or 3 but you give %zu"
,
inputs
.
size
());
auto
&&
inp
=
inputs
[
0
];
auto
&
inp_cn
=
inp
.
comp_node
;
if
(
inp
.
layout
.
ndim
==
0
)
{
return
{{{
TensorLayout
{
inp
.
layout
.
dtype
},
inp_cn
,
{}},
{
TensorLayout
{
dtype
::
Float32
()},
inp_cn
,
{}},
{
TensorLayout
{
dtype
::
Float32
()},
inp_cn
,
{}}},
false
};
}
TensorLayout
oup_layout
,
mean_layout
,
rstd_layout
;
megdnn
::
LayerNorm
::
deduce_layout_fwd_impl
(
inp
.
layout
,
p
,
oup_layout
,
mean_layout
,
rstd_layout
);
return
{{{
oup_layout
,
inp_cn
,
{}},
{
mean_layout
,
inp_cn
,
{}},
{
rstd_layout
,
inp_cn
,
{}}},
true
};
}
SmallVector
<
TensorPtr
>
apply_on_physical_tensor
(
const
OpDef
&
def
,
const
SmallVector
<
TensorPtr
>&
inputs
,
SmallVector
<
LogicalTensorDesc
>&
output_descs
,
const
bool
&
validated
)
{
auto
&&
op_def
=
def
.
cast_final_safe
<
LayerNorm
>
();
size_t
nr_inp
=
inputs
.
size
();
auto
p
=
op_def
.
param
();
mgb_assert
(
(
nr_inp
==
3
&&
p
.
affine
)
||
(
nr_inp
==
1
&&
!
p
.
affine
),
"num of inputs of pooling should be 1 or 3 but you give %zu"
,
inputs
.
size
());
auto
cn
=
inputs
[
0
]
->
comp_node
();
DnnOprCaller
<
megdnn
::
LayerNorm
>
caller
(
cn
);
auto
&&
dnn_opr
=
caller
.
op
;
dnn_opr
->
param
()
=
p
;
TensorLayout
oup_layout
,
mean_layout
,
rstd_layout
;
megdnn
::
LayerNorm
::
deduce_layout_fwd_impl
(
inputs
[
0
]
->
dnn_tensor
().
layout
,
p
,
oup_layout
,
mean_layout
,
rstd_layout
);
DeviceTensorND
out_devtensor
=
BlobManager
::
inst
()
->
alloc_workspace_with_defrag
(
cn
,
oup_layout
);
DeviceTensorND
mean_devtensor
=
BlobManager
::
inst
()
->
alloc_workspace_with_defrag
(
cn
,
mean_layout
);
DeviceTensorND
rstd_devtensor
=
BlobManager
::
inst
()
->
alloc_workspace_with_defrag
(
cn
,
rstd_layout
);
megdnn
::
Workspace
dnn_wk
;
auto
wk_size
=
caller
.
op
->
get_workspace_in_bytes
(
inputs
[
0
]
->
dnn_tensor
().
layout
,
p
.
affine
?
inputs
[
1
]
->
dnn_tensor
().
layout
:
TensorLayout
(),
p
.
affine
?
inputs
[
2
]
->
dnn_tensor
().
layout
:
TensorLayout
(),
oup_layout
,
mean_layout
,
rstd_layout
);
if
(
wk_size
!=
0
)
{
TensorLayout
w_layout
({
wk_size
},
dtype
::
Byte
());
dnn_wk
=
caller
.
create_workspace
(
w_layout
);
}
dnn_opr
->
exec
(
inputs
[
0
]
->
dnn_tensor
(),
p
.
affine
?
inputs
[
1
]
->
dnn_tensor
()
:
megdnn
::
TensorND
(),
p
.
affine
?
inputs
[
2
]
->
dnn_tensor
()
:
megdnn
::
TensorND
(),
out_devtensor
.
as_megdnn
(),
mean_devtensor
.
as_megdnn
(),
rstd_devtensor
.
as_megdnn
(),
dnn_wk
);
return
{
Tensor
::
make
(
out_devtensor
),
Tensor
::
make
(
mean_devtensor
),
Tensor
::
make
(
rstd_devtensor
)};
}
OP_TRAIT_REG
(
LayerNorm
,
LayerNorm
)
.
apply_on_var_node
(
apply_on_var_node
)
.
infer_output_attrs_fallible
(
infer_output_attrs_fallible
)
.
apply_on_physical_tensor
(
apply_on_physical_tensor
)
.
fallback
();
}
// namespace layer_norm
}
// namespace mgb::imperative
\ No newline at end of file
imperative/src/impl/ops/specializations.cpp
浏览文件 @
da91e650
...
...
@@ -8,7 +8,6 @@
#include "megbrain/opr/dnn/correlation.h"
#include "megbrain/opr/dnn/fake_quant.h"
#include "megbrain/opr/dnn/images2neibs.h"
#include "megbrain/opr/dnn/layer_norm.h"
#include "megbrain/opr/dnn/local.h"
#include "megbrain/opr/dnn/lrn.h"
#include "megbrain/opr/dnn/lsq.h"
...
...
@@ -729,28 +728,4 @@ auto apply_on_var_node(const OpDef& def, const VarNodeArray& inputs) {
OP_TRAIT_REG
(
LRN
,
LRN
).
apply_on_var_node
(
apply_on_var_node
).
fallback
();
}
// namespace lrn
namespace
layer_norm
{
cg
::
OperatorNodeBase
*
apply_on_var_node
(
const
OpDef
&
def
,
const
VarNodeArray
&
inputs
)
{
auto
&&
op
=
static_cast
<
const
LayerNorm
&>
(
def
);
size_t
nr_inp
=
inputs
.
size
();
auto
p
=
op
.
param
();
mgb_assert
((
nr_inp
==
3
&&
p
.
affine
)
||
(
nr_inp
==
1
&&
!
p
.
affine
));
OperatorNodeConfig
config
{
op
.
make_name
()};
if
(
nr_inp
==
3
)
{
return
opr
::
LayerNorm
::
make
(
inputs
[
0
],
inputs
[
1
],
inputs
[
2
],
op
.
param
(),
config
)[
0
]
.
node
()
->
owner_opr
();
}
else
{
return
opr
::
LayerNorm
::
make
(
inputs
[
0
],
op
.
param
(),
config
)[
0
]
.
node
()
->
owner_opr
();
}
}
OP_TRAIT_REG
(
LayerNorm
,
LayerNorm
).
apply_on_var_node
(
apply_on_var_node
).
fallback
();
}
// namespace layer_norm
}
// namespace mgb::imperative
imperative/src/impl/transformations/dtype_promote.cpp
浏览文件 @
da91e650
...
...
@@ -289,6 +289,28 @@ ValueRefList batch_norm_rule(const OpDef& op, Span<ValueRef> inputs) {
return
imperative
::
apply
(
op
,
inputs
);
}
ValueRefList
layer_norm_rule
(
const
OpDef
&
op
,
Span
<
ValueRef
>
inputs
)
{
// avoid the amp_dtype_autocast
if
(
DTypePromoteCfg
::
amp_dtype_autocast_enabled
)
{
SmallVector
<
DType
>
dtypes
=
get_value_dtypes
(
inputs
);
ValueRefList
converted
(
inputs
.
size
());
for
(
size_t
i
=
0
;
i
<
inputs
.
size
();
++
i
)
{
mgb
::
DType
target_dtype
=
DTypePromoteCfg
::
amp_high_prec_dtype
;
if
(
dtypes
[
i
]
!=
target_dtype
)
{
converted
[
i
]
=
imperative
::
apply
(
ApplyOp
(
*
TypeCvt
::
make
(
target_dtype
)),
inputs
[
i
])[
0
];
}
else
{
converted
[
i
]
=
inputs
[
i
];
}
}
return
imperative
::
apply
(
op
,
converted
);
}
return
imperative
::
apply
(
op
,
inputs
);
}
ValueRefList
naive_promote_rule
(
const
OpDef
&
op
,
Span
<
ValueRef
>
inputs
)
{
SmallVector
<
DType
>
dtypes
=
get_value_dtypes
(
inputs
);
mgb
::
DType
target_dtype
=
get_promoted_dtype
(
dtypes
);
...
...
@@ -319,6 +341,7 @@ struct DTypePromoteRuleRegistry {
register_dtype_promote_rule
<
BatchNorm
>
(
batch_norm_rule
);
register_dtype_promote_rule
<
Convolution3D
>
(
naive_promote_rule
);
register_dtype_promote_rule
<
Convolution3DBackwardData
>
(
naive_promote_rule
);
register_dtype_promote_rule
<
LayerNorm
>
(
layer_norm_rule
);
}
}
register_helper
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录