Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
97b1b777
MegEngine
项目概览
MegEngine 天元
/
MegEngine
大约 1 年 前同步成功
通知
399
Star
4705
Fork
582
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
97b1b777
编写于
9月 16, 2021
作者:
M
Megvii Engine Team
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
feat(mgb): add megbrain layer norm opr with subgraph
GitOrigin-RevId: 9b7fa821f8e456329723aef3cf0b2a8080de669b
上级
eca6e1d9
变更
3
显示空白变更内容
内联
并排
Showing
3 changed file
with
146 addition
and
12 deletion
+146
-12
imperative/python/megengine/functional/nn.py
imperative/python/megengine/functional/nn.py
+87
-0
imperative/python/megengine/module/normalization.py
imperative/python/megengine/module/normalization.py
+3
-12
imperative/python/test/unit/functional/test_functional.py
imperative/python/test/unit/functional/test_functional.py
+56
-0
未找到文件。
imperative/python/megengine/functional/nn.py
浏览文件 @
97b1b777
...
...
@@ -19,6 +19,7 @@ from ..core.ops.builtin import (
GetVarShape
,
Identity
,
Reduce
,
Reshape
,
TypeCvt
,
)
from
..core.ops.special
import
Const
...
...
@@ -1022,6 +1023,92 @@ def softmax(inp: Tensor, axis: Optional[int] = None) -> Tensor:
return
cached
/
down
@
lru_cache
(
maxsize
=
None
)
def
_get_layerNorm
(
device
,
dtype
,
dim
,
gopt_level
=
2
):
@
subgraph
(
"LayerNormAffine"
,
dtype
,
device
,
5
,
gopt_level
=
gopt_level
)
def
layerNormAffine
(
inputs
,
f
,
c
):
inp
,
eps
,
_flatten_shape
,
weight
,
bias
=
inputs
inp_shape
=
f
(
GetVarShape
(),
inp
)
inp
=
f
(
Reshape
(
axis
=
dim
),
inp
,
_flatten_shape
)
mean
=
f
(
Reduce
(
mode
=
"mean"
,
axis
=-
1
),
inp
)
x2s
=
f
(
Reduce
(
mode
=
"sum_sqr"
,
axis
=-
1
),
inp
)
reduce_shape
=
f
(
GetVarShape
(),
x2s
)
reduce_size
=
f
(
"//"
,
f
(
Reduce
(
mode
=
"product"
,
axis
=
0
),
inp_shape
),
f
(
Reduce
(
mode
=
"product"
,
axis
=
0
),
reduce_shape
),
)
reduce_size_f
=
f
(
TypeCvt
(
dtype
=
dtype
),
reduce_size
)
var
=
f
(
"-"
,
f
(
"/"
,
x2s
,
reduce_size_f
),
f
(
"**"
,
mean
,
c
(
2
)))
inv_sqrt_var
=
f
(
"**"
,
f
(
"+"
,
var
,
eps
),
c
(
-
0.5
))
oup
=
f
(
"fma3"
,
inp
,
inv_sqrt_var
,
f
(
"*"
,
f
(
"-"
,
mean
),
inv_sqrt_var
))
affine_oup
=
f
(
Reshape
(),
oup
,
inp_shape
)
affine_oup
=
f
(
"fma3"
,
affine_oup
,
weight
,
bias
)
# NOTE: return oup make backward faster but take more memory
return
(
affine_oup
,
oup
,
mean
,
x2s
),
(
True
,
False
,
False
,
False
)
@
subgraph
(
"LayerNorm"
,
dtype
,
device
,
3
,
gopt_level
=
gopt_level
)
def
layerNorm
(
inputs
,
f
,
c
):
inp
,
eps
,
_flatten_shape
=
inputs
inp_shape
=
f
(
GetVarShape
(),
inp
)
inp
=
f
(
Reshape
(
axis
=
dim
),
inp
,
_flatten_shape
)
mean
=
f
(
Reduce
(
mode
=
"mean"
,
axis
=-
1
),
inp
)
x2s
=
f
(
Reduce
(
mode
=
"sum_sqr"
,
axis
=-
1
),
inp
)
reduce_shape
=
f
(
GetVarShape
(),
x2s
)
reduce_size
=
f
(
"//"
,
f
(
Reduce
(
mode
=
"product"
,
axis
=
0
),
inp_shape
),
f
(
Reduce
(
mode
=
"product"
,
axis
=
0
),
reduce_shape
),
)
reduce_size_f
=
f
(
TypeCvt
(
dtype
=
dtype
),
reduce_size
)
var
=
f
(
"-"
,
f
(
"/"
,
x2s
,
reduce_size_f
),
f
(
"**"
,
mean
,
c
(
2
)))
inv_sqrt_var
=
f
(
"**"
,
f
(
"+"
,
var
,
eps
),
c
(
-
0.5
))
oup
=
f
(
"fma3"
,
inp
,
inv_sqrt_var
,
f
(
"*"
,
f
(
"-"
,
mean
),
inv_sqrt_var
))
oup
=
f
(
Reshape
(),
oup
,
inp_shape
)
return
(
oup
,),
(
True
,)
return
(
layerNorm
,
layerNormAffine
)
def
layer_norm
(
inp
:
Tensor
,
normalized_shape
:
tuple
,
affine
:
bool
,
weight
:
Optional
[
Tensor
]
=
None
,
bias
:
Optional
[
Tensor
]
=
None
,
eps
:
float
=
1e-5
,
eps_mode
=
"additive"
,
):
assert
eps_mode
.
lower
()
in
{
"max"
,
"additive"
},
"unknown eps_mode: {}"
.
format
(
eps_mode
)
_device
=
inp
.
device
_dtype
=
inp
.
dtype
_dim
=
len
(
inp
.
shape
)
-
len
(
normalized_shape
)
_flatten_shape
=
concat
(
(
convert_single_value
(
inp
.
shape
[:
_dim
],
dtype
=
"int32"
,
device
=
inp
.
device
),
convert_single_value
(
-
1
,
dtype
=
"int32"
,
device
=
inp
.
device
),
)
)
(
layerNorm
,
layerNormAffine
)
=
_get_layerNorm
(
_device
,
_dtype
,
_dim
)
eps
=
convert_single_value
(
eps
,
dtype
=
inp
.
dtype
,
device
=
inp
.
device
)
if
affine
:
outvar
,
*
_
=
apply
(
layerNormAffine
(),
inp
,
eps
,
_flatten_shape
,
weight
,
bias
)
else
:
outvar
,
*
_
=
apply
(
layerNorm
(),
inp
,
eps
,
_flatten_shape
)
return
outvar
def
batch_norm
(
inp
:
Tensor
,
running_mean
:
Tensor
=
None
,
...
...
imperative/python/megengine/module/normalization.py
浏览文件 @
97b1b777
...
...
@@ -132,18 +132,9 @@ class LayerNorm(Module):
zeros_
(
self
.
bias
)
def
forward
(
self
,
x
):
x_shape
=
x
.
shape
dim_delta
=
len
(
x_shape
)
-
len
(
self
.
normalized_shape
)
non_flatten_shape
=
x_shape
[:
dim_delta
]
x
=
x
.
reshape
(
*
non_flatten_shape
,
-
1
)
mean
=
x
.
mean
(
axis
=-
1
,
keepdims
=
True
)
var
=
(
x
**
2
).
mean
(
axis
=-
1
,
keepdims
=
True
)
-
mean
*
mean
x
=
(
x
-
mean
)
/
F
.
sqrt
(
var
+
self
.
eps
)
x
=
x
.
reshape
(
x_shape
)
if
self
.
affine
:
x
=
self
.
weight
*
x
+
self
.
bias
x
=
F
.
nn
.
layer_norm
(
x
,
self
.
normalized_shape
,
self
.
affine
,
self
.
weight
,
self
.
bias
,
self
.
eps
)
return
x
def
_module_info_string
(
self
)
->
str
:
...
...
imperative/python/test/unit/functional/test_functional.py
浏览文件 @
97b1b777
...
...
@@ -24,6 +24,7 @@ from megengine.core._trace_option import use_symbolic_shape
from
megengine.core.autodiff.grad
import
Grad
from
megengine.core.tensor.utils
import
make_shape_tuple
from
megengine.device
import
get_device_count
from
megengine.module
import
LayerNorm
def
test_where
():
...
...
@@ -862,6 +863,61 @@ def test_conv1d():
)
def
test_layer_norm
():
def
_layer_norm
(
x
,
normalized_shape
,
affine
,
weight
=
None
,
bias
=
None
,
eps
=
1e-5
):
__layer_norm
=
LayerNorm
(
normalized_shape
=
normalized_shape
,
affine
=
affine
)
__layer_norm
.
weight
=
weight
__layer_norm
.
bias
=
bias
return
__layer_norm
(
x
)
def
_layer_norm_numpy
(
x
,
normalized_shape
,
affine
,
weight
=
None
,
bias
=
None
,
eps
=
1e-5
):
x_shape
=
x
.
shape
dim_delta
=
len
(
x_shape
)
-
len
(
normalized_shape
)
non_flatten_shape
=
x_shape
[:
dim_delta
]
x
=
x
.
reshape
(
*
non_flatten_shape
,
-
1
)
mean
=
x
.
mean
(
axis
=-
1
,
keepdims
=
True
)
var
=
(
x
**
2
).
mean
(
axis
=-
1
,
keepdims
=
True
)
-
mean
*
mean
x
=
(
x
-
mean
)
/
F
.
sqrt
(
var
+
eps
)
x
=
x
.
reshape
(
x_shape
)
if
affine
:
x
=
weight
*
x
+
bias
return
x
normalized_shape
=
(
28
,
28
)
inp_feat
=
Tensor
(
np
.
random
.
randn
(
32
,
64
,
28
,
28
),
dtype
=
"float32"
)
weight
=
Tensor
(
np
.
random
.
randn
(
28
,
28
),
dtype
=
"float32"
)
bias
=
Tensor
(
np
.
random
.
randn
(
28
,
28
),
dtype
=
"float32"
)
inp_feat
=
inp_feat
+
1
weight
=
weight
+
1
bias
=
bias
affine
=
False
outvar
=
F
.
nn
.
layer_norm
(
inp_feat
,
normalized_shape
,
affine
,
weight
,
bias
)
targetvar
=
_layer_norm_numpy
(
inp_feat
,
normalized_shape
,
affine
,
weight
,
bias
)
assert
abs
(
outvar
-
targetvar
).
mean
()
<
1e-7
# no random, affine True
normalized_shape
=
(
28
,
28
)
inp_feat
=
Tensor
(
np
.
ones
((
32
,
64
,
28
,
28
)),
dtype
=
"float32"
)
weight
=
Tensor
(
np
.
ones
((
28
,
28
)),
dtype
=
"float32"
)
bias
=
Tensor
(
np
.
zeros
((
28
,
28
)),
dtype
=
"float32"
)
affine
=
True
outvar
=
F
.
nn
.
layer_norm
(
inp_feat
,
normalized_shape
,
affine
,
weight
,
bias
)
targetvar
=
_layer_norm
(
inp_feat
,
normalized_shape
,
affine
,
weight
,
bias
)
assert
abs
((
outvar
-
targetvar
).
mean
())
<
1e-7
assert
abs
(
outvar
.
mean
())
<
1e-7
def
test_batchnorm2d_io16c32
():
amp
.
enabled
=
True
inp
=
tensor
(
np
.
random
.
randn
(
1
,
3
,
224
,
224
),
dtype
=
np
.
float32
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录