Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
a510ecc8
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
a510ecc8
编写于
5月 23, 2020
作者:
M
mindspore-ci-bot
提交者:
Gitee
5月 23, 2020
浏览文件
操作
浏览文件
下载
差异文件
!1339 add epsilon parameter for layernorm
Merge pull request !1339 from JichenZhao/layernorm_mean_var_shape
上级
e82088d1
7c9fb342
变更
7
隐藏空白更改
内联
并排
Showing
7 changed file
with
23 addition
and
13 deletion
+23
-13
mindspore/ccsrc/kernel/tbe/tbe_kernel_build.cc
mindspore/ccsrc/kernel/tbe/tbe_kernel_build.cc
+4
-0
mindspore/ccsrc/transform/op_declare.cc
mindspore/ccsrc/transform/op_declare.cc
+2
-1
mindspore/nn/layer/normalization.py
mindspore/nn/layer/normalization.py
+5
-1
mindspore/nn/loss/loss.py
mindspore/nn/loss/loss.py
+6
-8
mindspore/ops/_op_impl/tbe/layer_norm.py
mindspore/ops/_op_impl/tbe/layer_norm.py
+1
-0
mindspore/ops/operations/nn_ops.py
mindspore/ops/operations/nn_ops.py
+3
-1
tests/st/networks/models/bert/bert_tdt_lossscale.py
tests/st/networks/models/bert/bert_tdt_lossscale.py
+2
-2
未找到文件。
mindspore/ccsrc/kernel/tbe/tbe_kernel_build.cc
浏览文件 @
a510ecc8
...
...
@@ -368,6 +368,7 @@ bool TbeKernelJsonCreator::GenTbeAttrJson(const std::shared_ptr<AnfNode> &anf_no
MS_EXCEPTION_IF_NULL
(
op_info
);
MS_EXCEPTION_IF_NULL
(
attrs_json
);
auto
attrs_ptr
=
op_info
->
attrs_ptr
();
std
::
string
op_name
=
AnfAlgo
::
GetCNodeName
(
anf_node
);
if
(
TbeAdapter
::
RunAttrPass
(
anf_node
,
attrs_ptr
,
attrs_json
))
{
return
true
;
}
...
...
@@ -377,6 +378,9 @@ bool TbeKernelJsonCreator::GenTbeAttrJson(const std::shared_ptr<AnfNode> &anf_no
std
::
string
attr_name
=
attr_ptr
->
name
();
nlohmann
::
json
attr_obj
;
attr_obj
[
"name"
]
=
attr_name
;
if
(
op_name
==
"LayerNorm"
&&
attr_obj
[
"name"
]
==
"epsilon"
&&
creater_type_
==
OP_SELECT_FORMAT
)
{
continue
;
}
if
(
primitive
->
GetAttr
(
attr_name
)
!=
nullptr
)
{
auto
value
=
primitive
->
GetAttr
(
attr_name
);
std
::
string
type
=
attr_ptr
->
type
();
...
...
mindspore/ccsrc/transform/op_declare.cc
浏览文件 @
a510ecc8
...
...
@@ -1084,7 +1084,8 @@ OUTPUT_MAP(SGD) = {{0, OUTPUT_DESC(parameters)}};
// LayerNorm
INPUT_MAP
(
LayerNorm
)
=
{{
1
,
INPUT_DESC
(
x
)},
{
2
,
INPUT_DESC
(
gamma
)},
{
3
,
INPUT_DESC
(
beta
)}};
ATTR_MAP
(
LayerNorm
)
=
{{
"begin_norm_axis"
,
ATTR_DESC
(
begin_norm_axis
,
AnyTraits
<
int
>
())},
{
"begin_params_axis"
,
ATTR_DESC
(
begin_params_axis
,
AnyTraits
<
int
>
())}};
{
"begin_params_axis"
,
ATTR_DESC
(
begin_params_axis
,
AnyTraits
<
int
>
())},
{
"epsilon"
,
ATTR_DESC
(
epsilon
,
AnyTraits
<
float
>
())}};
OUTPUT_MAP
(
LayerNorm
)
=
{{
0
,
OUTPUT_DESC
(
y
)},
{
1
,
OUTPUT_DESC
(
mean
)},
{
2
,
OUTPUT_DESC
(
variance
)}};
// LayerNormGrad
...
...
mindspore/nn/layer/normalization.py
浏览文件 @
a510ecc8
...
...
@@ -449,6 +449,7 @@ class LayerNorm(Cell):
beta_init (Union[Tensor, str, Initializer, numbers.Number]): Initializer for the beta weight.
The values of str refer to the function `initializer` including 'zeros', 'ones', 'xavier_uniform',
'he_uniform', etc. Default: 'zeros'.
epsilon (float): A value added to the denominator for numerical stability. Default: 1e-7.
Inputs:
- **input_x** (Tensor) - The shape of 'input_x' is :math:`(x_1, x_2, ..., x_R)`,
...
...
@@ -469,6 +470,7 @@ class LayerNorm(Cell):
begin_params_axis
=-
1
,
gamma_init
=
'ones'
,
beta_init
=
'zeros'
,
epsilon
=
1e-7
):
super
(
LayerNorm
,
self
).
__init__
()
if
not
isinstance
(
normalized_shape
,
(
tuple
,
list
)):
...
...
@@ -477,11 +479,13 @@ class LayerNorm(Cell):
self
.
normalized_shape
=
normalized_shape
self
.
begin_norm_axis
=
begin_norm_axis
self
.
begin_params_axis
=
begin_params_axis
self
.
epsilon
=
epsilon
self
.
gamma
=
Parameter
(
initializer
(
gamma_init
,
normalized_shape
),
name
=
"gamma"
)
self
.
beta
=
Parameter
(
initializer
(
beta_init
,
normalized_shape
),
name
=
"beta"
)
self
.
layer_norm
=
P
.
LayerNorm
(
begin_norm_axis
=
self
.
begin_norm_axis
,
begin_params_axis
=
self
.
begin_params_axis
)
self
.
layer_norm
=
P
.
LayerNorm
(
begin_norm_axis
=
self
.
begin_norm_axis
,
begin_params_axis
=
self
.
begin_params_axis
,
epsilon
=
self
.
epsilon
)
def
construct
(
self
,
input_x
):
y
,
_
,
_
=
self
.
layer_norm
(
input_x
,
self
.
gamma
,
self
.
beta
)
...
...
mindspore/nn/loss/loss.py
浏览文件 @
a510ecc8
...
...
@@ -198,14 +198,12 @@ class SoftmaxCrossEntropyWithLogits(_Loss):
Scores Tensor :math:`x` is of shape :math:`(N, C)` and target Tensor :math:`t` is a
Tensor of shape :math:`(N, C)` which contains one-hot labels of length :math:`C`.
For each
batch
:math:`N_i`, the loss is given as:
For each
instance
:math:`N_i`, the loss is given as:
.. math::
\ell(x_i, t_i) = -w_{t_i} \log\left(\frac{\exp(x_{t_i})}{\sum_j \exp(x_j)}\right)
= w_{t_i} \left(-x_{t_i} + \log\left(\sum_j \exp(x_i)\right)\right),
where :math:`x_i` is a 1D score Tensor, :math:`t_i` is the target class and
:math:`w` is a weight Tensor to generate weighted loss for each class. When not specified,
weight Tensor is set to be None and weight is the same (:math:`1`) for all class.
\ell(x_i, t_i) = - \log\left(\frac{\exp(x_{t_i})}{\sum_j \exp(x_j)}\right)
= -x_{t_i} + \log\left(\sum_j \exp(x_i)\right),
where :math:`x_i` is a 1D score Tensor, :math:`t_i` is a scalar.
Note:
While the target classes are mutually exclusive, i.e., only one class is positive in the target, the predicted
...
...
@@ -221,8 +219,8 @@ class SoftmaxCrossEntropyWithLogits(_Loss):
num_classes (int): The number of classes in the task. It is a optional input Default: 2.
Inputs:
- **logits** (Tensor) - Tensor of shape
:math:`(x_1, x_2, ..., x_R)`
.
- **labels** (Tensor) - Tensor of shape
:math:`(y_1, y_2, ..., y_S)`
. If `sparse` is True, The type of
- **logits** (Tensor) - Tensor of shape
(N, C)
.
- **labels** (Tensor) - Tensor of shape
(N, )
. If `sparse` is True, The type of
`labels` is mindspore.int32. If `sparse` is False, the type of `labels` is same as the type of `logits`.
Outputs:
...
...
mindspore/ops/_op_impl/tbe/layer_norm.py
浏览文件 @
a510ecc8
...
...
@@ -25,6 +25,7 @@ layer_norm_op_info = TBERegOp("LayerNorm") \
.
partial_flag
(
True
)
\
.
attr
(
"begin_norm_axis"
,
"required"
,
"int"
,
"all"
)
\
.
attr
(
"begin_params_axis"
,
"required"
,
"int"
,
"all"
)
\
.
attr
(
"epsilon"
,
"optional"
,
"float"
,
"all"
)
\
.
input
(
0
,
"x"
,
False
,
"required"
,
"all"
)
\
.
input
(
1
,
"gamma"
,
False
,
"required"
,
"all"
)
\
.
input
(
2
,
"beta"
,
False
,
"required"
,
"all"
)
\
...
...
mindspore/ops/operations/nn_ops.py
浏览文件 @
a510ecc8
...
...
@@ -1853,6 +1853,7 @@ class LayerNorm(Primitive):
the value should be in [-1, rank(input)). Default: 1.
begin_params_axis (int): The begin axis of the parameter input (`gamma`, `beta`) to
apply LayerNorm, the value should be in [-1, rank(input)). Default: 1.
epsilon (float): A value added to the denominator for numerical stability. Default: 1e-7.
Inputs:
- **input_x** (Tensor) - Tensor of shape :math:`(N, \ldots)`.
...
...
@@ -1881,9 +1882,10 @@ class LayerNorm(Primitive):
"""
@
prim_attr_register
def
__init__
(
self
,
begin_norm_axis
=
1
,
begin_params_axis
=
1
):
def
__init__
(
self
,
begin_norm_axis
=
1
,
begin_params_axis
=
1
,
epsilon
=
1e-7
):
validator
.
check_value_type
(
'begin_norm_axis'
,
begin_norm_axis
,
[
int
],
self
.
name
)
validator
.
check_value_type
(
'begin_params_axis'
,
begin_params_axis
,
[
int
],
self
.
name
)
validator
.
check_value_type
(
'epsilon'
,
epsilon
,
[
float
],
self
.
name
)
class
L2Normalize
(
PrimitiveWithInfer
):
...
...
tests/st/networks/models/bert/bert_tdt_lossscale.py
浏览文件 @
a510ecc8
...
...
@@ -171,8 +171,8 @@ def test_bert_tdt():
# assertion occurs while the loss value, overflow state or loss_scale value is wrong
loss_value
=
np
.
array
(
callback
.
loss_list
)
expect_loss_value
=
[
12.207
201
,
11.980862
,
11.984737
,
11.879344
,
11.832838
,
12.411388
,
12.
009449
,
12.621273
,
12.223175
,
12.427313
]
expect_loss_value
=
[
12.207
198
,
11.980881
,
11.984844
,
11.879381
,
11.832978
,
12.411333
,
12.009284
,
12.
621277
,
12.223178
,
12.427385
]
print
(
"loss value: {}"
.
format
(
loss_value
))
assert
np
.
allclose
(
loss_value
,
expect_loss_value
,
0
,
0.0005
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录