Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
b9b056f4
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
b9b056f4
编写于
4月 18, 2020
作者:
M
mindspore-ci-bot
提交者:
Gitee
4月 18, 2020
浏览文件
操作
浏览文件
下载
差异文件
!399 Add Global Batch Normalization
Merge pull request !399 from JichenZhao/syncbn
上级
6c59093f
97e250d4
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
115 addition
and
16 deletion
+115
-16
mindspore/nn/layer/__init__.py
mindspore/nn/layer/__init__.py
+2
-2
mindspore/nn/layer/normalization.py
mindspore/nn/layer/normalization.py
+113
-14
未找到文件。
mindspore/nn/layer/__init__.py
浏览文件 @
b9b056f4
...
...
@@ -18,7 +18,7 @@ Layer.
The high-level components(Cells) used to construct the neural network.
"""
from
.activation
import
Softmax
,
LogSoftmax
,
ReLU
,
ReLU6
,
Tanh
,
GELU
,
ELU
,
Sigmoid
,
PReLU
,
get_activation
,
LeakyReLU
,
HSigmoid
,
HSwish
from
.normalization
import
BatchNorm1d
,
BatchNorm2d
,
LayerNorm
,
GroupNorm
from
.normalization
import
BatchNorm1d
,
BatchNorm2d
,
LayerNorm
,
GroupNorm
,
GlobalBatchNorm
from
.container
import
SequentialCell
,
CellList
from
.conv
import
Conv2d
,
Conv2dTranspose
from
.lstm
import
LSTM
...
...
@@ -29,7 +29,7 @@ from .image import ImageGradients, SSIM, PSNR
__all__
=
[
'Softmax'
,
'LogSoftmax'
,
'ReLU'
,
'ReLU6'
,
'Tanh'
,
'GELU'
,
'Sigmoid'
,
'PReLU'
,
'get_activation'
,
'LeakyReLU'
,
'HSigmoid'
,
'HSwish'
,
'ELU'
,
'BatchNorm1d'
,
'BatchNorm2d'
,
'LayerNorm'
,
'GroupNorm'
,
'BatchNorm1d'
,
'BatchNorm2d'
,
'LayerNorm'
,
'GroupNorm'
,
'GlobalBatchNorm'
,
'SequentialCell'
,
'CellList'
,
'Conv2d'
,
'Conv2dTranspose'
,
'LSTM'
,
...
...
mindspore/nn/layer/normalization.py
浏览文件 @
b9b056f4
...
...
@@ -20,8 +20,11 @@ from mindspore.common.initializer import initializer
from
mindspore.common.tensor
import
Tensor
import
mindspore.common.dtype
as
mstype
import
mindspore.context
as
context
from
mindspore._checkparam
import
check_
int_positive
,
check_
bool
,
check_typename
from
mindspore._checkparam
import
check_bool
,
check_typename
from
mindspore._extends
import
cell_attr_register
from
mindspore.communication.management
import
get_group_size
,
get_rank
from
mindspore.communication
import
management
from
mindspore._checkparam
import
check_int_positive
from
..cell
import
Cell
...
...
@@ -30,6 +33,7 @@ class _BatchNorm(Cell):
@
cell_attr_register
def
__init__
(
self
,
num_features
,
group
=
1
,
eps
=
1e-5
,
momentum
=
0.9
,
affine
=
True
,
...
...
@@ -56,6 +60,21 @@ class _BatchNorm(Cell):
gamma_init
,
num_features
),
name
=
"gamma"
,
requires_grad
=
affine
)
self
.
beta
=
Parameter
(
initializer
(
beta_init
,
num_features
),
name
=
"beta"
,
requires_grad
=
affine
)
self
.
group
=
check_int_positive
(
group
)
if
self
.
group
!=
1
:
self
.
rank_id
=
get_rank
()
self
.
rank_size
=
get_group_size
()
self
.
device_list
=
[
i
for
i
in
range
(
0
,
self
.
rank_size
)]
self
.
rank_list
=
self
.
list_group
(
self
.
device_list
,
self
.
group
)
self
.
rank_list_idx
=
len
(
self
.
rank_list
)
for
i
in
range
(
self
.
rank_list_idx
):
if
self
.
rank_id
in
self
.
rank_list
[
i
]
and
self
.
group
!=
1
:
self
.
is_global
=
True
management
.
create_group
(
'group'
+
str
(
i
),
self
.
rank_list
[
i
])
self
.
all_reduce
=
P
.
AllReduce
(
P
.
ReduceOp
.
SUM
,
'group'
+
str
(
i
)).
add_prim_attr
(
'fusion'
,
1
)
self
.
shape
=
P
.
Shape
()
self
.
reduce_mean
=
P
.
ReduceMean
()
self
.
square
=
P
.
Square
()
if
context
.
get_context
(
"enable_ge"
):
self
.
is_ge_backend
=
True
...
...
@@ -82,22 +101,53 @@ class _BatchNorm(Cell):
def
_check_data_dim
(
self
,
x
):
raise
NotImplementedError
def
list_group
(
self
,
world_rank
,
group_size
):
if
group_size
>
get_group_size
():
raise
ValueError
(
"group size can not be greater than local rank size, group size is {}, "
"local_rank_size is {}"
.
format
(
group_size
,
get_group_size
()))
if
len
(
world_rank
)
%
group_size
!=
0
:
raise
ValueError
(
"please make your group size correct."
)
world_rank_list
=
zip
(
*
(
iter
(
world_rank
),)
*
group_size
)
group_list
=
[
list
(
i
)
for
i
in
world_rank_list
]
return
group_list
def
construct
(
self
,
x
):
if
self
.
training
and
self
.
use_batch_statistics
:
if
self
.
is_ge_backend
:
y
,
batch_mean
,
batch_var
,
_
,
_
=
\
self
.
bn_train
(
x
,
self
.
gamma
,
self
.
beta
,
None
,
None
)
mean_sub
=
self
.
sub_mean
(
self
.
moving_mean
,
batch_mean
)
temp_mean
=
self
.
mul_mean
(
mean_sub
,
self
.
momentum
)
mean_sub2
=
self
.
sub_var
(
self
.
moving_variance
,
batch_var
)
temp_variance
=
self
.
mul_var
(
mean_sub2
,
self
.
momentum
)
y
=
F
.
depend
(
y
,
self
.
assign_sub_mean
(
self
.
moving_mean
,
temp_mean
))
y
=
F
.
depend
(
y
,
self
.
assign_sub_var
(
self
.
moving_variance
,
temp_variance
))
if
self
.
is_global
:
x_mean
=
self
.
reduce_mean
(
x
)
x_mean_square
=
self
.
reduce_mean
(
self
.
square
(
x
))
global_batch_mean
=
self
.
all_reduce
(
x_mean
)
/
self
.
group
global_batch_mean_square
=
self
.
all_reduce
(
x_mean_square
)
/
self
.
group
global_mean
=
global_batch_mean
global_var
=
global_batch_mean_square
-
self
.
square
(
global_batch_mean
)
y
,
batch_mean
,
batch_var
,
_
,
_
=
\
self
.
bn_train
(
x
,
self
.
gamma
,
self
.
beta
,
None
,
None
)
mean_sub
=
self
.
sub_mean
(
self
.
moving_mean
,
global_mean
)
temp_mean
=
self
.
mul_mean
(
mean_sub
,
self
.
momentum
)
mean_sub2
=
self
.
sub_var
(
self
.
moving_variance
,
global_var
)
temp_variance
=
self
.
mul_var
(
mean_sub2
,
self
.
momentum
)
y
=
F
.
depend
(
y
,
self
.
assign_sub_mean
(
self
.
moving_mean
,
temp_mean
))
y
=
F
.
depend
(
y
,
self
.
assign_sub_var
(
self
.
moving_variance
,
temp_variance
))
else
:
y
,
batch_mean
,
batch_var
,
_
,
_
=
\
self
.
bn_train
(
x
,
self
.
gamma
,
self
.
beta
,
None
,
None
)
mean_sub
=
self
.
sub_mean
(
self
.
moving_mean
,
batch_mean
)
temp_mean
=
self
.
mul_mean
(
mean_sub
,
self
.
momentum
)
mean_sub2
=
self
.
sub_var
(
self
.
moving_variance
,
batch_var
)
temp_variance
=
self
.
mul_var
(
mean_sub2
,
self
.
momentum
)
y
=
F
.
depend
(
y
,
self
.
assign_sub_mean
(
self
.
moving_mean
,
temp_mean
))
y
=
F
.
depend
(
y
,
self
.
assign_sub_var
(
self
.
moving_variance
,
temp_variance
))
else
:
y
=
self
.
bn_train
(
x
,
self
.
gamma
,
...
...
@@ -221,6 +271,55 @@ class BatchNorm2d(_BatchNorm):
pass
class
GlobalBatchNorm
(
_BatchNorm
):
r
"""
Global normalization layer over a N-dimension input.
Global Normalization is cross device synchronized batch normalization. Batch Normalization implementation
only normalize the data within each device. Global normalization will normalize the input within the group.
It has been described in the paper `Batch Normalization: Accelerating Deep Network Training by
Reducing Internal Covariate Shift <https://arxiv.org/abs/1502.03167>`_. It rescales and recenters the
feature using a mini-batch of data and the learned parameters which can be described in the following formula.
.. math::
y = \frac{x - \mathrm{E}[x]}{\sqrt{\mathrm{Var}[x] + \epsilon}} * \gamma + \beta
Args:
num_features (int): `C` from an expected input of size (N, C, H, W).
group (int): The number of device in each group.
eps (float): A value added to the denominator for numerical stability. Default: 1e-5.
momentum (float): A floating hyperparameter of the momentum for the
running_mean and running_var computation. Default: 0.9.
gamma_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the gamma weight.
The values of str refer to the function `initializer` including 'zeros', 'ones', 'xavier_uniform',
'he_uniform', etc. Default: 'ones'.
beta_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the beta weight.
The values of str refer to the function `initializer` including 'zeros', 'ones', 'xavier_uniform',
'he_uniform', etc. Default: 'zeros'.
moving_mean_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the moving mean.
The values of str refer to the function `initializer` including 'zeros', 'ones', 'xavier_uniform',
'he_uniform', etc. Default: 'zeros'.
moving_var_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the moving variance.
The values of str refer to the function `initializer` including 'zeros', 'ones', 'xavier_uniform',
'he_uniform', etc. Default: 'ones'.
use_batch_statistics (bool): If true, use the mean value and variance value of current batch data, else use
the mean value and variance value of specified value. Default: True.
Inputs:
- **input** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
Outputs:
Tensor, the normalized, scaled, offset tensor, of shape :math:`(N, C_{out}, H_{out}, W_{out})`.
Examples:
>>> global_bn_op = nn.GlobalBatchNorm(num_features=3, group=4)
>>> input = Tensor(np.random.randint(0, 255, [1, 3, 224, 224]), mindspore.float32)
>>> global_bn_op(input)
"""
def
_check_data_dim
(
self
,
x
):
if
x
.
dim
==
0
:
pass
class
LayerNorm
(
Cell
):
r
"""
Applies Layer Normalization over a mini-batch of inputs.
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录