Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
4ce39796
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
4ce39796
编写于
1月 27, 2018
作者:
C
chengduoZH
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix unit test and c++ code
上级
ae0ea541
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
30 addition
and
33 deletion
+30
-33
paddle/operators/layer_norm_op.cc
paddle/operators/layer_norm_op.cc
+21
-23
python/paddle/v2/fluid/tests/test_layer_norm_op.py
python/paddle/v2/fluid/tests/test_layer_norm_op.py
+9
-10
未找到文件。
paddle/operators/layer_norm_op.cc
浏览文件 @
4ce39796
...
...
@@ -233,39 +233,37 @@ class LayerNormGradKernel<platform::CPUDeviceContext, T>
if
(
d_x
)
{
d_x
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
auto
d_x_map
=
EigenMatrixMapRowMajor
<
T
>
(
d_x
->
data
<
T
>
(),
left
,
right
);
auto
triple_product
=
[](
T
ele
)
{
return
ele
*
ele
;
};
auto
neg_inv_std
=
[](
T
ele
)
{
return
-
std
::
sqrt
(
1
/
ele
);
};
auto
triple_product_func
=
[](
T
ele
)
{
return
ele
*
ele
*
ele
;
};
auto
scale_func
=
[
scale_data
](
T
ele
)
{
return
ele
*
scale_data
;
};
auto
inv_std_func
=
[](
T
ele
)
{
return
std
::
sqrt
(
1
/
ele
);
};
auto
inv_std_scale_func
=
[
scale_data
](
T
ele
)
{
return
std
::
sqrt
(
1
/
ele
)
*
scale_data
;
};
auto
neg_inv_std_scale_func
=
[
scale_data
](
T
ele
)
{
return
-
std
::
sqrt
(
1
/
ele
)
*
scale_data
;
};
// dy_dx
auto
dx_end
=
var_map
.
unaryExpr
(
inv_std_scale_func
)
.
replicate
(
1
,
right
)
.
cwiseProduct
(
d_y_map
);
// dy_dmean_dx
auto
dmean_end
=
var_map
.
unaryExpr
(
neg_inv_std_scale_func
)
.
replicate
(
1
,
right
)
.
cwiseProduct
(
d_y_map
)
.
rowwise
()
.
sum
();
auto
dx_mean
=
(
T
(
1.0
)
/
right
)
*
dmean_end
.
replicate
(
1
,
right
);
auto
dx_mean
=
(
T
(
-
1.0
)
/
right
)
*
var_map
.
unaryExpr
(
inv_std_scale_func
)
.
replicate
(
1
,
right
)
.
cwiseProduct
(
d_y_map
)
.
rowwise
()
.
sum
()
.
replicate
(
1
,
right
);
// dy_var_dx
auto
dvar_end_0
=
(
x_map
-
mean_map
.
replicate
(
1
,
right
))
.
cwiseProduct
(
d_y_map
)
.
rowwise
()
.
sum
();
auto
dvar_end
=
var_map
.
unaryExpr
(
neg_inv_std
)
.
unaryExpr
(
triple_product
)
.
cwiseProduct
(
dvar_end_0
);
auto
dx_var
=
(
T
(
1.0
)
/
right
)
*
auto
dvar_end_part
=
(
x_map
-
mean_map
.
replicate
(
1
,
right
))
.
cwiseProduct
(
d_y_map
)
.
rowwise
()
.
sum
();
auto
dvar_end
=
var_map
.
unaryExpr
(
inv_std_func
)
.
unaryExpr
(
triple_product_func
)
.
cwiseProduct
(
dvar_end_part
)
.
replicate
(
1
,
right
);
auto
dx_var
=
(
T
(
-
1.0
)
/
right
)
*
(
x_map
-
mean_map
.
replicate
(
1
,
right
))
.
cwiseProduct
(
dvar_end
.
replicate
(
1
,
right
));
// d_x = (1. / N) * scale * inv_var * (N * d_y - np.sum(d_y, axis=0)
// - (X - mean) * inv_var * inv_var * np.sum(d_y * (X - mean), axis=0))
.
cwiseProduct
(
dvar_end
)
.
unaryExpr
(
scale_func
);
d_x_map
=
dx_end
+
dx_mean
+
dx_var
;
}
...
...
python/paddle/v2/fluid/tests/test_layer_norm_op.py
浏览文件 @
4ce39796
...
...
@@ -52,18 +52,19 @@ def _reference_layer_norm_grad(x, grad_y, scale, mean, var, epsilon):
D
=
reduce
(
mul
,
x_shape
,
1
)
/
N
grad_y
.
shape
=
[
N
,
D
]
x
.
shape
=
[
N
,
D
]
grad_offset
=
np
.
sum
(
grad_y
)
mean
.
shape
=
[
N
,
1
]
var
.
shape
=
[
N
,
1
]
grad_scale
=
np
.
sum
(((
x
-
mean
)
*
np
.
sqrt
(
1
/
var
))
*
grad_y
)
d_scale
=
np
.
sum
(
grad_y
).
reshape
([
1
,
])
d_bias
=
np
.
sum
(((
x
-
mean
)
*
np
.
sqrt
(
1
/
var
))
*
grad_y
).
reshape
([
1
,
])
dx_end
=
np
.
sqrt
(
1.0
/
var
)
*
grad_y
d_mean_0
=
np
.
sum
(
-
np
.
sqrt
(
1.0
/
var
)
*
grad_y
,
axis
=
1
).
reshape
([
N
,
1
])
d_mean_1
=
np
.
sum
(
-
1.0
/
var
*
(
x
-
mean
)
*
grad_y
,
axis
=
1
).
reshape
(
[
N
,
1
])
*
(
-
1.0
/
D
*
np
.
sqrt
(
1.0
/
var
)
*
np
.
sum
(
x
-
mean
,
axis
=
1
).
reshape
([
N
,
1
])).
reshape
([
N
,
1
])
d_mean
=
1.0
/
D
*
(
d_mean_0
+
d_mean_1
)
#
d_mean_1 = np.sum(-1.0 / var * (x - mean) * grad_y, axis=1).reshape(
#
[N, 1]) * (-1.0 / D * np.sqrt(1.0 / var) *
#
np.sum(x - mean, axis=1).reshape([N, 1])).reshape([N, 1])
d_mean
=
1.0
/
D
*
(
d_mean_0
)
d_std
=
np
.
sum
(
-
1.0
/
var
*
(
x
-
mean
)
*
grad_y
,
axis
=
1
).
reshape
([
N
,
1
])
*
(
1.0
/
D
*
np
.
sqrt
(
1.0
/
var
).
reshape
([
N
,
1
])
*
(
x
-
mean
))
...
...
@@ -73,7 +74,7 @@ def _reference_layer_norm_grad(x, grad_y, scale, mean, var, epsilon):
grad_y
.
shape
=
x_shape
x
.
shape
=
x_shape
return
grad_x
,
grad_scale
,
grad_offset
return
grad_x
,
d_bias
,
d_scale
def
create_or_get_tensor
(
scope
,
var_name
,
var
,
place
):
...
...
@@ -144,7 +145,7 @@ class TestLayerNormdOp(OpTest):
epsilon
=
0.00001
x_shape
=
shape
scale_shape
=
[
1
]
np
.
random
.
random
(
123
)
x_val
=
np
.
random
.
random_sample
(
x_shape
).
astype
(
np
.
float32
)
scale_val
=
np
.
random
.
random_sample
(
scale_shape
).
astype
(
np
.
float32
)
bias_val
=
np
.
random
.
random_sample
(
scale_shape
).
astype
(
np
.
float32
)
...
...
@@ -154,7 +155,6 @@ class TestLayerNormdOp(OpTest):
x_val
,
scale_val
,
bias_val
,
epsilon
)
# for gradient test
# y_grad = np.ones(x_shape).astype(np.float32) * 0.00277778
y_grad
=
np
.
random
.
random_sample
(
x_shape
).
astype
(
np
.
float32
)
x_grad_ref
,
scale_grad_ref
,
bias_grad_ref
=
_reference_layer_norm_grad
(
...
...
@@ -229,7 +229,6 @@ class TestLayerNormdOp(OpTest):
for
place
in
places
:
test_with_place
(
place
,
[
2
,
3
,
4
,
5
])
test_with_place
(
place
,
[
2
,
3
])
if
__name__
==
'__main__'
:
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录