Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
2e417b60
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
2e417b60
编写于
10月 25, 2017
作者:
Z
zchen0211
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
batch norm
上级
6c0b3836
变更
1
显示空白变更内容
内联
并排
Showing
1 changed file
with
121 addition
and
22 deletion
+121
-22
python/paddle/v2/framework/tests/test_batch_norm_op.py
python/paddle/v2/framework/tests/test_batch_norm_op.py
+121
-22
未找到文件。
python/paddle/v2/framework/tests/test_batch_norm_op.py
浏览文件 @
2e417b60
...
@@ -6,8 +6,26 @@ from paddle.v2.framework.op import Operator
...
@@ -6,8 +6,26 @@ from paddle.v2.framework.op import Operator
def
_reference_training
(
x
,
scale
,
offset
,
epsilon
,
data_format
):
def
_reference_training
(
x
,
scale
,
offset
,
epsilon
,
data_format
):
if
data_format
!=
"NHWC"
:
if
data_format
==
"NCHW"
:
raise
ValueError
(
"data_format must be NHWC, got %s."
%
data_format
)
n
,
c
,
h
,
w
=
x
.
shape
x_square
=
x
*
x
x_square_sum
=
np
.
sum
(
x_square
,
(
0
,
2
,
3
))
x_sum
=
np
.
sum
(
x
,
axis
=
(
0
,
2
,
3
))
element_count
=
np
.
size
(
x
)
/
int
(
np
.
shape
(
x
)[
1
])
mean
=
x_sum
/
element_count
var
=
x_square_sum
/
element_count
-
mean
*
mean
mean_tile
=
np
.
reshape
(
mean
,
(
1
,
c
,
1
,
1
))
mean_tile
=
np
.
tile
(
mean_tile
,
(
n
,
1
,
h
,
w
))
var_tile
=
np
.
reshape
(
var
,
(
1
,
c
,
1
,
1
))
var_tile
=
np
.
tile
(
var_tile
,
(
n
,
1
,
h
,
w
))
normalized
=
(
x
-
mean_tile
)
/
np
.
sqrt
(
var_tile
+
epsilon
)
scale_tile
=
np
.
reshape
(
scale
,
(
1
,
c
,
1
,
1
))
scale_tile
=
np
.
tile
(
scale_tile
,
(
n
,
1
,
h
,
w
))
offset_tile
=
np
.
reshape
(
offset
,
(
1
,
c
,
1
,
1
))
offset_tile
=
np
.
reshape
(
offset_tile
,
(
1
,
c
,
1
,
1
))
y
=
normalized
*
scale_tile
+
offset_tile
return
y
,
mean
,
var
elif
data_format
==
"NHWC"
:
x_square
=
x
*
x
x_square
=
x
*
x
x_square_sum
=
np
.
sum
(
x_square
,
(
0
,
1
,
2
))
x_square_sum
=
np
.
sum
(
x_square
,
(
0
,
1
,
2
))
x_sum
=
np
.
sum
(
x
,
axis
=
(
0
,
1
,
2
))
x_sum
=
np
.
sum
(
x
,
axis
=
(
0
,
1
,
2
))
...
@@ -16,6 +34,8 @@ def _reference_training(x, scale, offset, epsilon, data_format):
...
@@ -16,6 +34,8 @@ def _reference_training(x, scale, offset, epsilon, data_format):
var
=
x_square_sum
/
element_count
-
mean
*
mean
var
=
x_square_sum
/
element_count
-
mean
*
mean
normalized
=
(
x
-
mean
)
/
np
.
sqrt
(
var
+
epsilon
)
normalized
=
(
x
-
mean
)
/
np
.
sqrt
(
var
+
epsilon
)
return
(
normalized
*
scale
+
offset
),
mean
,
var
return
(
normalized
*
scale
+
offset
),
mean
,
var
else
:
raise
ValueError
(
"Unknown data order."
)
def
_reference_grad
(
x
,
grad_y
,
scale
,
mean
,
var
,
epsilon
,
data_format
):
def
_reference_grad
(
x
,
grad_y
,
scale
,
mean
,
var
,
epsilon
,
data_format
):
...
@@ -28,8 +48,13 @@ def _reference_grad(x, grad_y, scale, mean, var, epsilon, data_format):
...
@@ -28,8 +48,13 @@ def _reference_grad(x, grad_y, scale, mean, var, epsilon, data_format):
# grad_x =
# grad_x =
# 1/N * scale * rsqrt(var + epsilon) * (N * grad_y - sum(grad_y) -
# 1/N * scale * rsqrt(var + epsilon) * (N * grad_y - sum(grad_y) -
# (x - mean) * sum(grad_y * (x - mean)) / (var + epsilon))
# (x - mean) * sum(grad_y * (x - mean)) / (var + epsilon))
if
data_format
!=
"NHWC"
:
raise
ValueError
(
"data_format must be NHWC, got %s."
%
data_format
)
# transfer from (N, C, H, W) to (N, H, W, C) to simplify computation
if
data_format
==
"NCHW"
:
x
=
np
.
transpose
(
x
,
(
0
,
2
,
3
,
1
))
grad_y
=
np
.
transpose
(
grad_y
,
(
0
,
2
,
3
,
1
))
# raise ValueError("data_format must be NHWC, got %s." % data_format)
grad_x
=
scale
*
(
grad_y
-
np
.
mean
(
grad_x
=
scale
*
(
grad_y
-
np
.
mean
(
grad_y
,
axis
=
(
0
,
1
,
2
))
-
(
x
-
mean
)
*
np
.
mean
(
grad_y
,
axis
=
(
0
,
1
,
2
))
-
(
x
-
mean
)
*
np
.
mean
(
grad_y
*
(
x
-
mean
),
axis
=
(
0
,
1
,
2
))
/
grad_y
*
(
x
-
mean
),
axis
=
(
0
,
1
,
2
))
/
...
@@ -37,6 +62,12 @@ def _reference_grad(x, grad_y, scale, mean, var, epsilon, data_format):
...
@@ -37,6 +62,12 @@ def _reference_grad(x, grad_y, scale, mean, var, epsilon, data_format):
grad_scale
=
np
.
sum
(
grad_y
*
(
x
-
mean
)
/
np
.
sqrt
(
var
+
epsilon
),
grad_scale
=
np
.
sum
(
grad_y
*
(
x
-
mean
)
/
np
.
sqrt
(
var
+
epsilon
),
axis
=
(
0
,
1
,
2
))
axis
=
(
0
,
1
,
2
))
grad_offset
=
np
.
sum
(
grad_y
,
axis
=
(
0
,
1
,
2
))
grad_offset
=
np
.
sum
(
grad_y
,
axis
=
(
0
,
1
,
2
))
# transfer back to N, C, H, W
if
data_format
==
"NCHW"
:
grad_x
=
np
.
transpose
(
grad_x
,
(
0
,
3
,
1
,
2
))
x
=
np
.
transpose
(
x
,
(
0
,
3
,
1
,
2
))
grad_y
=
np
.
transpose
(
grad_y
,
(
0
,
3
,
1
,
2
))
return
grad_x
,
grad_scale
,
grad_offset
return
grad_x
,
grad_scale
,
grad_offset
...
@@ -72,39 +103,104 @@ class TestBatchNormOp(OpTest):
...
@@ -72,39 +103,104 @@ class TestBatchNormOp(OpTest):
def
__assert_close
(
self
,
tensor
,
np_array
,
msg
,
atol
=
1e-4
):
def
__assert_close
(
self
,
tensor
,
np_array
,
msg
,
atol
=
1e-4
):
self
.
assertTrue
(
np
.
allclose
(
np
.
array
(
tensor
),
np_array
,
atol
=
atol
),
msg
)
self
.
assertTrue
(
np
.
allclose
(
np
.
array
(
tensor
),
np_array
,
atol
=
atol
),
msg
)
def
test_forward_backward
(
self
):
def
test_python
(
self
):
# attr
data_format
=
"NHWC"
data_format
=
"NHWC"
epsilon
=
0.00001
epsilon
=
0.00001
momentum
=
0.9
momentum
=
0.9
# N, H, W, C: 2, 3, 4, 2
channel_num
=
2
channel_num
=
2
x_shape
=
[
2
,
3
,
4
,
channel_num
]
x_shape
=
[
2
,
3
,
4
,
channel_num
]
scale_shape
=
[
channel_num
]
scale_shape
=
[
channel_num
]
# input
x_val
=
np
.
random
.
random_sample
(
x_shape
).
astype
(
np
.
float32
)
x_val
=
np
.
random
.
random_sample
(
x_shape
).
astype
(
np
.
float32
)
scale_val
=
np
.
random
.
random_sample
(
scale_shape
).
astype
(
np
.
float32
)
scale_val
=
np
.
random
.
random_sample
(
scale_shape
).
astype
(
np
.
float32
)
bias_val
=
np
.
random
.
random_sample
(
scale_shape
).
astype
(
np
.
float32
)
bias_val
=
np
.
random
.
random_sample
(
scale_shape
).
astype
(
np
.
float32
)
mean
=
np
.
zeros
(
scale_shape
).
astype
(
np
.
float32
)
mean
=
np
.
zeros
(
scale_shape
).
astype
(
np
.
float32
)
variance
=
np
.
zeros
(
scale_shape
).
astype
(
np
.
float32
)
variance
=
np
.
ones
(
scale_shape
).
astype
(
np
.
float32
)
# run forward
y_out
,
saved_mean
,
var_ref
=
_reference_training
(
x_val
,
scale_val
,
bias_val
,
epsilon
,
"NHWC"
)
#
mean_out
=
saved_mean
*
(
1.
-
momentum
)
+
momentum
*
mean
variance_out
=
var_ref
*
(
1.
-
momentum
)
+
momentum
*
variance
saved_variance
=
1.
/
np
.
sqrt
(
var_ref
+
epsilon
)
# running N, C, H, W case
# should produce the same results
x_shape2
=
[
2
,
channel_num
,
3
,
4
]
x_val2
=
np
.
transpose
(
x_val
,
(
0
,
3
,
1
,
2
))
y_out2
,
saved_mean2
,
var_ref2
=
_reference_training
(
x_val2
,
scale_val
,
bias_val
,
epsilon
,
"NCHW"
)
self
.
__assert_close
(
saved_mean
,
saved_mean2
,
"batch mean"
)
self
.
__assert_close
(
var_ref
,
var_ref2
,
"batch variance"
)
# transfer (N, C, H, W) back to (N, H, W, C)
y_out2_trans
=
np
.
transpose
(
y_out2
,
(
0
,
2
,
3
,
1
))
self
.
__assert_close
(
y_out
,
y_out2_trans
,
"batch variance"
)
print
'python: NHWC, NCHW, forward checking passed'
# test backward now
# NHWC
y_grad
=
np
.
ones
(
x_shape
).
astype
(
np
.
float32
)
x_grad_ref
,
scale_grad_ref
,
bias_grad_ref
=
_reference_grad
(
x_val
,
y_grad
,
scale_val
,
saved_mean
,
var_ref
,
epsilon
,
"NHWC"
)
# NCHW
y_grad2
=
np
.
ones
(
x_shape2
).
astype
(
np
.
float32
)
x_grad_ref2
,
scale_grad_ref2
,
bias_grad_ref2
=
_reference_grad
(
x_val2
,
y_grad2
,
scale_val
,
saved_mean2
,
var_ref2
,
epsilon
,
"NCHW"
)
self
.
__assert_close
(
scale_grad_ref
,
scale_grad_ref2
,
"scale gradient"
)
self
.
__assert_close
(
bias_grad_ref
,
bias_grad_ref2
,
"bias gradient"
)
x_grad_transpose
=
np
.
transpose
(
x_grad_ref2
,
(
0
,
2
,
3
,
1
))
self
.
__assert_close
(
x_grad_ref
,
x_grad_transpose
,
"x gradient"
)
print
'python: NHWC, NCHW, backward checking passed'
def
test_forward_backward
(
self
):
# attr
data_format
=
"NCHW"
epsilon
=
0.00001
momentum
=
0.9
# N, H, W, C: 2, 3, 4, 2
n
,
h
,
w
,
c
=
2
,
3
,
4
,
2
if
data_format
==
"NHWC"
:
x_shape
=
[
n
,
h
,
w
,
c
]
elif
data_format
==
"NCHW"
:
x_shape
=
[
n
,
c
,
h
,
w
]
else
:
raise
ValueError
(
"Unknown data type."
)
scale_shape
=
[
c
]
x_val
=
np
.
random
.
random_sample
(
x_shape
).
astype
(
np
.
float32
)
scale_val
=
np
.
random
.
random_sample
(
scale_shape
).
astype
(
np
.
float32
)
bias_val
=
np
.
random
.
random_sample
(
scale_shape
).
astype
(
np
.
float32
)
mean
=
np
.
zeros
(
scale_shape
).
astype
(
np
.
float32
)
variance
=
np
.
ones
(
scale_shape
).
astype
(
np
.
float32
)
# run forward
# run forward
y_out
,
saved_mean
,
var_ref
=
_reference_training
(
y_out
,
saved_mean
,
var_ref
=
_reference_training
(
x_val
,
scale_val
,
bias_val
,
epsilon
,
data_format
)
x_val
,
scale_val
,
bias_val
,
epsilon
,
data_format
)
#
run backward
#
update moving mean and variance
mean_out
=
saved_mean
*
(
1
-
momentum
)
mean_out
=
saved_mean
*
(
1
.
-
momentum
)
+
momentum
*
mean
variance_out
=
var_ref
*
(
1
-
momentum
)
variance_out
=
var_ref
*
(
1
.
-
momentum
)
+
momentum
*
variance
saved_variance
=
1
/
np
.
sqrt
(
var_ref
+
epsilon
)
saved_variance
=
1
.
/
np
.
sqrt
(
var_ref
+
epsilon
)
# for gradient test
# for gradient test
y_grad
=
np
.
ones
(
x_shape
).
astype
(
np
.
float32
)
y_grad
=
np
.
ones
(
x_shape
).
astype
(
np
.
float32
)
x_grad_ref
,
scale_grad_ref
,
bias_grad_ref
=
_reference_grad
(
x_grad_ref
,
scale_grad_ref
,
bias_grad_ref
=
_reference_grad
(
x_val
,
y_grad
,
scale_val
,
saved_mean
,
var_ref
,
epsilon
,
data_format
)
x_val
,
y_grad
,
scale_val
,
saved_mean
,
var_ref
,
epsilon
,
data_format
)
def
test_with_place
(
place
):
def
test_with_place
(
place
,
tensor_format
=
data_format
):
scope
=
core
.
Scope
()
scope
=
core
.
Scope
()
# create input
# create input
...
@@ -142,7 +238,7 @@ class TestBatchNormOp(OpTest):
...
@@ -142,7 +238,7 @@ class TestBatchNormOp(OpTest):
SavedVariance
=
"saved_variance"
,
SavedVariance
=
"saved_variance"
,
# attrs
# attrs
is_test
=
False
,
is_test
=
False
,
tensor_format
=
data
_format
,
tensor_format
=
tensor
_format
,
momentum
=
momentum
,
momentum
=
momentum
,
epsilon
=
epsilon
)
epsilon
=
epsilon
)
...
@@ -162,6 +258,7 @@ class TestBatchNormOp(OpTest):
...
@@ -162,6 +258,7 @@ class TestBatchNormOp(OpTest):
atol
=
1e-4
atol
=
1e-4
self
.
__assert_close
(
variance_out_tensor
,
variance_out
,
self
.
__assert_close
(
variance_out_tensor
,
variance_out
,
"variance_out"
,
atol
)
"variance_out"
,
atol
)
print
"op test forward passed: "
,
tensor_format
# run backward
# run backward
batch_norm_op_grad
=
get_backward_op
(
scope
,
batch_norm_op
,
set
())
batch_norm_op_grad
=
get_backward_op
(
scope
,
batch_norm_op
,
set
())
...
@@ -185,12 +282,14 @@ class TestBatchNormOp(OpTest):
...
@@ -185,12 +282,14 @@ class TestBatchNormOp(OpTest):
self
.
__assert_close
(
x_grad_tensor
,
x_grad_ref
,
"x_grad"
)
self
.
__assert_close
(
x_grad_tensor
,
x_grad_ref
,
"x_grad"
)
self
.
__assert_close
(
scale_grad_tensor
,
scale_grad_ref
,
"scale_grad"
)
self
.
__assert_close
(
scale_grad_tensor
,
scale_grad_ref
,
"scale_grad"
)
self
.
__assert_close
(
bias_grad_tensor
,
bias_grad_ref
,
"bias_grad"
)
self
.
__assert_close
(
bias_grad_tensor
,
bias_grad_ref
,
"bias_grad"
)
print
"op test backward passed: "
,
tensor_format
places
=
[
core
.
CPUPlace
()]
places
=
[
core
.
CPUPlace
()]
if
core
.
is_compile_gpu
()
and
core
.
op_support_gpu
(
"batch_norm"
):
if
core
.
is_compile_gpu
()
and
core
.
op_support_gpu
(
"batch_norm"
):
places
.
append
(
core
.
GPUPlace
(
0
))
places
.
append
(
core
.
GPUPlace
(
0
))
for
place
in
places
:
for
place
in
places
:
test_with_place
(
place
)
test_with_place
(
place
)
print
"test forward passed"
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录