Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
acb90787
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
acb90787
编写于
1月 30, 2018
作者:
C
chengduoZH
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
refine unit test
上级
263e0197
变更
1
隐藏空白更改
内联
并排
Showing
1 changed file
with
59 addition
and
59 deletion
+59
-59
python/paddle/v2/fluid/tests/test_layer_norm_op.py
python/paddle/v2/fluid/tests/test_layer_norm_op.py
+59
-59
未找到文件。
python/paddle/v2/fluid/tests/test_layer_norm_op.py
浏览文件 @
acb90787
...
@@ -21,29 +21,19 @@ from paddle.v2.fluid.op import Operator
...
@@ -21,29 +21,19 @@ from paddle.v2.fluid.op import Operator
from
paddle.v2.fluid.framework
import
grad_var_name
from
paddle.v2.fluid.framework
import
grad_var_name
def
get_backward_op
(
scope
,
op
,
no_grad_set
):
backward_op
=
core
.
Operator
.
backward
(
op
,
no_grad_set
)
for
input
in
backward_op
.
input_vars
():
var
=
scope
.
var
(
input
)
var
.
get_tensor
()
for
output
in
backward_op
.
output_vars
():
var
=
scope
.
var
(
output
)
var
.
get_tensor
()
return
backward_op
def
_reference_layer_norm_naive
(
x
,
scale
,
beta
,
epsilon
,
begin_norm_axis
=
1
):
def
_reference_layer_norm_naive
(
x
,
scale
,
beta
,
epsilon
,
begin_norm_axis
=
1
):
old
_shape
=
x
.
shape
x
_shape
=
x
.
shape
N
=
reduce
(
mul
,
old
_shape
[
0
:
begin_norm_axis
],
1
)
N
=
reduce
(
mul
,
x
_shape
[
0
:
begin_norm_axis
],
1
)
D
=
reduce
(
mul
,
old_shape
[
begin_norm_axis
:
len
(
old
_shape
)],
1
)
D
=
reduce
(
mul
,
x_shape
[
begin_norm_axis
:
len
(
x
_shape
)],
1
)
x
.
shape
=
[
N
,
D
]
x
.
shape
=
[
N
,
D
]
mean
=
np
.
mean
(
x
,
axis
=
1
)
mean
=
np
.
mean
(
x
,
axis
=
1
)
var
=
np
.
var
(
x
,
axis
=
1
)
+
epsilon
var
=
np
.
var
(
x
,
axis
=
1
)
+
epsilon
output
=
scale
.
reshape
([
1
,
D
])
*
np
.
divide
(
output
=
scale
.
reshape
([
1
,
D
])
*
np
.
divide
(
(
x
-
mean
.
reshape
([
N
,
1
])),
(
x
-
mean
.
reshape
([
N
,
1
])),
(
np
.
sqrt
(
var
)).
reshape
([
N
,
1
]))
+
beta
.
reshape
([
1
,
D
])
(
np
.
sqrt
(
var
)).
reshape
([
N
,
1
]))
+
beta
.
reshape
([
1
,
D
])
output
.
shape
=
old_shape
x
.
shape
=
old
_shape
x
.
shape
,
output
.
shape
=
x_shape
,
x
_shape
return
output
,
mean
,
var
return
output
,
mean
,
var
...
@@ -52,27 +42,25 @@ def _reference_layer_norm_grad(x, grad_y, scale, mean, var, begin_norm_axis=1):
...
@@ -52,27 +42,25 @@ def _reference_layer_norm_grad(x, grad_y, scale, mean, var, begin_norm_axis=1):
scale_shape
=
scale
.
shape
scale_shape
=
scale
.
shape
N
=
reduce
(
mul
,
x_shape
[
0
:
begin_norm_axis
],
1
)
N
=
reduce
(
mul
,
x_shape
[
0
:
begin_norm_axis
],
1
)
D
=
reduce
(
mul
,
x_shape
[
begin_norm_axis
:
len
(
x_shape
)],
1
)
D
=
reduce
(
mul
,
x_shape
[
begin_norm_axis
:
len
(
x_shape
)],
1
)
grad_y
.
shape
=
[
N
,
D
]
x
.
shape
,
grad_y
.
shape
=
[
N
,
D
],
[
N
,
D
]
x
.
shape
=
[
N
,
D
]
var
.
shape
,
mean
.
shape
=
[
N
,
1
],
[
N
,
1
]
mean
.
shape
=
[
N
,
1
]
var
.
shape
=
[
N
,
1
]
scale
.
shape
=
[
1
,
D
]
scale
.
shape
=
[
1
,
D
]
# d_bias
d_bias
=
np
.
sum
(
grad_y
,
axis
=
0
).
reshape
([
1
,
D
])
d_bias
=
np
.
sum
(
grad_y
,
axis
=
0
).
reshape
([
1
,
D
])
# d_scale
d_scale
=
np
.
sum
(((
x
-
mean
)
*
np
.
sqrt
(
1
/
var
))
*
grad_y
,
d_scale
=
np
.
sum
(((
x
-
mean
)
*
np
.
sqrt
(
1
/
var
))
*
grad_y
,
axis
=
0
).
reshape
([
1
,
D
])
axis
=
0
).
reshape
([
1
,
D
])
# dx
dx_end
=
scale
*
np
.
sqrt
(
1.0
/
var
)
*
grad_y
dx_end
=
scale
*
np
.
sqrt
(
1.0
/
var
)
*
grad_y
d_mean_0
=
np
.
sum
(
-
np
.
sqrt
(
1.0
/
var
)
*
grad_y
*
scale
,
axis
=
1
).
reshape
(
d_mean_0
=
np
.
sum
(
-
np
.
sqrt
(
1.0
/
var
)
*
grad_y
*
scale
,
axis
=
1
).
reshape
(
[
N
,
1
])
[
N
,
1
])
# d_mean_1 = np.sum(-1.0 / var * (x - mean) * grad_y, axis=1).reshape(
# d_mean_1 = np.sum(-1.0 / var * (x - mean) * grad_y, axis=1).reshape(
# [N, 1]) * (-1.0 / D * np.sqrt(1.0 / var) *
# [N, 1]) * (-1.0 / D * np.sqrt(1.0 / var) *
# np.sum(x - mean, axis=1).reshape([N, 1])).reshape([N, 1])
# np.sum(x - mean, axis=1).reshape([N, 1])).reshape([N, 1])
d_mean
=
1.0
/
D
*
d_mean_0
d_mean
=
1.0
/
D
*
d_mean_0
d_std
=
np
.
sum
(
d_std
=
np
.
sum
(
-
1.0
/
var
*
(
x
-
mean
)
*
grad_y
*
scale
,
axis
=
1
).
reshape
([
N
,
1
])
*
(
-
(
1.0
/
var
)
*
(
x
-
mean
)
*
grad_y
*
scale
,
axis
=
1
).
reshape
([
N
,
1
])
*
(
1.0
/
D
*
np
.
sqrt
(
1.0
/
var
).
reshape
([
N
,
1
])
*
(
x
-
mean
))
1.0
/
D
*
np
.
sqrt
(
1.0
/
var
).
reshape
([
N
,
1
])
*
(
x
-
mean
))
grad_x
=
dx_end
+
d_mean
+
d_std
grad_x
=
dx_end
+
d_mean
+
d_std
...
@@ -83,6 +71,17 @@ def _reference_layer_norm_grad(x, grad_y, scale, mean, var, begin_norm_axis=1):
...
@@ -83,6 +71,17 @@ def _reference_layer_norm_grad(x, grad_y, scale, mean, var, begin_norm_axis=1):
return
grad_x
,
d_scale
,
d_bias
return
grad_x
,
d_scale
,
d_bias
def
get_backward_op
(
scope
,
op
,
no_grad_set
):
backward_op
=
core
.
Operator
.
backward
(
op
,
no_grad_set
)
for
input
in
backward_op
.
input_vars
():
var
=
scope
.
var
(
input
)
var
.
get_tensor
()
for
output
in
backward_op
.
output_vars
():
var
=
scope
.
var
(
output
)
var
.
get_tensor
()
return
backward_op
def
create_or_get_tensor
(
scope
,
var_name
,
var
,
place
):
def
create_or_get_tensor
(
scope
,
var_name
,
var
,
place
):
tensor
=
scope
.
var
(
var_name
).
get_tensor
()
tensor
=
scope
.
var
(
var_name
).
get_tensor
()
if
var
is
not
None
:
if
var
is
not
None
:
...
@@ -145,8 +144,9 @@ class TestLayerNormdOp(OpTest):
...
@@ -145,8 +144,9 @@ class TestLayerNormdOp(OpTest):
self
.
assertLessEqual
(
max_diff
,
max_relative_error
,
err_msg
())
self
.
assertLessEqual
(
max_diff
,
max_relative_error
,
err_msg
())
def
test_forward_backward
(
self
):
def
check_forward_backward
(
self
,
shape
,
begin_norm_axis
):
def
test_with_place
(
place
,
shape
,
begin_norm_axis
=
1
):
def
test_with_place
(
place
,
shape
,
begin_norm_axis
=
1
):
# setUp
assert
begin_norm_axis
>
0
and
begin_norm_axis
<
len
(
assert
begin_norm_axis
>
0
and
begin_norm_axis
<
len
(
shape
),
'begin_norm_axis must be between 0 and len(shape)-1.'
shape
),
'begin_norm_axis must be between 0 and len(shape)-1.'
# attr
# attr
...
@@ -158,30 +158,35 @@ class TestLayerNormdOp(OpTest):
...
@@ -158,30 +158,35 @@ class TestLayerNormdOp(OpTest):
x_val
=
np
.
random
.
random_sample
(
x_shape
).
astype
(
np
.
float32
)
x_val
=
np
.
random
.
random_sample
(
x_shape
).
astype
(
np
.
float32
)
scale_val
=
np
.
random
.
random_sample
(
scale_shape
).
astype
(
np
.
float32
)
scale_val
=
np
.
random
.
random_sample
(
scale_shape
).
astype
(
np
.
float32
)
bias_val
=
np
.
random
.
random_sample
(
scale_shape
).
astype
(
np
.
float32
)
bias_val
=
np
.
random
.
random_sample
(
scale_shape
).
astype
(
np
.
float32
)
y_grad
=
np
.
random
.
random_sample
(
x_shape
).
astype
(
np
.
float32
)
# run forward
# run forward
y_out
,
saved_mean
,
var_ref
=
_reference_layer_norm_naive
(
y_out
,
saved_mean
,
var_ref
=
_reference_layer_norm_naive
(
x_val
,
scale_val
,
bias_val
,
epsilon
,
begin_norm_axis
)
x_val
,
scale_val
,
bias_val
,
epsilon
,
begin_norm_axis
)
naive_fw
=
{
"Y"
:
y_out
,
"Mean"
:
saved_mean
,
"Variance"
:
var_ref
}
# for gradient test
# get gradient
y_grad
=
np
.
random
.
random_sample
(
x_shape
).
astype
(
np
.
float32
)
x_grad_ref
,
scale_grad_ref
,
bias_grad_ref
=
_reference_layer_norm_grad
(
x_grad_ref
,
scale_grad_ref
,
bias_grad_ref
=
_reference_layer_norm_grad
(
x_val
,
y_grad
,
scale_val
,
saved_mean
,
var_ref
,
begin_norm_axis
)
x_val
,
y_grad
,
scale_val
,
saved_mean
,
var_ref
,
begin_norm_axis
)
naive_grad
=
{
"X"
:
x_grad_ref
,
"Scale"
:
scale_grad_ref
,
"Bias"
:
bias_grad_ref
}
scope
=
core
.
Scope
()
scope
=
core
.
Scope
()
# create input
# create input
x_tensor
=
create_or_get_tensor
(
scope
,
"X"
,
x_val
,
place
)
input_map
=
{
"X"
:
x_val
,
"Scale"
:
scale_val
,
"Bias"
:
bias_val
}
scale_tensor
=
create_or_get_tensor
(
scope
,
"Scale"
,
scale_val
,
for
i_name
in
input_map
:
place
)
create_or_get_tensor
(
scope
,
i_name
,
input_map
[
i_name
],
place
)
bias_tensor
=
create_or_get_tensor
(
scope
,
"Bias"
,
bias_val
,
place
)
# create output
# create output
y_tensor
=
create_or_get_tensor
(
scope
,
"Y"
,
None
,
place
)
output_map
=
{
"Y"
:
None
,
"Mean"
:
None
,
"Variance"
:
None
}
mean_tensor
=
create_or_get_tensor
(
scope
,
"Mean"
,
None
,
place
)
output_tensor
=
{}
variance_tensor
=
create_or_get_tensor
(
scope
,
"Variance"
,
None
,
for
o_name
in
output_map
:
place
)
output_tensor
[
o_name
]
=
create_or_get_tensor
(
scope
,
o_name
,
output_map
[
o_name
],
place
)
layer_norm_op
=
Operator
(
layer_norm_op
=
Operator
(
"layer_norm"
,
"layer_norm"
,
...
@@ -200,13 +205,10 @@ class TestLayerNormdOp(OpTest):
...
@@ -200,13 +205,10 @@ class TestLayerNormdOp(OpTest):
layer_norm_op
.
run
(
scope
,
place
)
layer_norm_op
.
run
(
scope
,
place
)
# check forward result
# check forward result
if
isinstance
(
place
,
core
.
CUDAPlace
):
atol
=
5e-2
if
isinstance
(
place
,
core
.
CUDAPlace
)
else
1e-4
atol
=
5e-2
for
o_tensor
in
output_tensor
:
else
:
self
.
__assert_close
(
output_tensor
[
o_tensor
],
naive_fw
[
o_tensor
],
atol
=
1e-4
o_tensor
,
atol
)
self
.
__assert_close
(
y_tensor
,
y_out
,
"Y"
,
atol
)
self
.
__assert_close
(
mean_tensor
,
saved_mean
,
"Mean"
,
atol
)
self
.
__assert_close
(
variance_tensor
,
var_ref
,
"Variance"
,
atol
)
# run backward
# run backward
layer_norm_op_grad
=
get_backward_op
(
scope
,
layer_norm_op
,
set
())
layer_norm_op_grad
=
get_backward_op
(
scope
,
layer_norm_op
,
set
())
...
@@ -216,30 +218,28 @@ class TestLayerNormdOp(OpTest):
...
@@ -216,30 +218,28 @@ class TestLayerNormdOp(OpTest):
feed_dict
=
{
"Y"
:
y_grad
})
feed_dict
=
{
"Y"
:
y_grad
})
layer_norm_op_grad
.
run
(
scope
,
place
)
layer_norm_op_grad
.
run
(
scope
,
place
)
x_grad_tensor
=
create_or_get_tensor
(
scope
,
# get output
grad_var_name
(
"X"
),
None
,
grad_tensor
=
{}
place
)
for
o_name
in
naive_grad
:
scale_grad_tensor
=
create_or_get_tensor
(
scope
,
grad_tensor
[
o_name
]
=
x_
=
create_or_get_tensor
(
grad_var_name
(
"Scale"
),
scope
,
grad_var_name
(
o_name
),
None
,
place
)
None
,
place
)
bias_grad_tensor
=
create_or_get_tensor
(
scope
,
grad_var_name
(
"Bias"
),
None
,
place
)
# check gradient output
# check gradient output
self
.
__assert_grad_close
(
x_grad_tensor
,
x_grad_ref
,
"x_grad"
,
place
)
for
o_grad
in
naive_grad
:
self
.
__assert_grad_close
(
scale_grad_tensor
,
scale_grad_ref
,
self
.
__assert_grad_close
(
grad_tensor
[
o_grad
],
"scale_grad"
,
place
)
naive_grad
[
o_grad
],
o_grad
+
"@GRAD"
,
self
.
__assert_grad_close
(
bias_grad_tensor
,
bias_grad_ref
,
place
)
"bias_grad"
,
place
)
places
=
[
core
.
CPUPlace
()]
places
=
[
core
.
CPUPlace
()]
if
core
.
is_compile_gpu
()
and
core
.
op_support_gpu
(
"layer_norm"
):
if
core
.
is_compile_gpu
()
and
core
.
op_support_gpu
(
"layer_norm"
):
places
.
append
(
core
.
CUDAPlace
(
0
))
places
.
append
(
core
.
CUDAPlace
(
0
))
for
place
in
places
:
for
place
in
places
:
test_with_place
(
place
,
[
2
,
3
,
4
,
5
],
begin_norm_axis
=
1
)
test_with_place
(
place
,
shape
,
begin_norm_axis
)
test_with_place
(
place
,
[
2
,
3
,
4
,
5
],
begin_norm_axis
=
3
)
def
test_check_forward_backward
(
self
):
self
.
check_forward_backward
(
shape
=
[
2
,
3
,
4
,
5
],
begin_norm_axis
=
1
)
self
.
check_forward_backward
(
shape
=
[
2
,
3
,
4
,
5
],
begin_norm_axis
=
3
)
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录