Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
s920243400
PaddleDetection
提交
eaa41857
P
PaddleDetection
项目概览
s920243400
/
PaddleDetection
与 Fork 源项目一致
Fork自
PaddlePaddle / PaddleDetection
通知
2
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
eaa41857
编写于
10月 27, 2017
作者:
Z
Zhuoyuan
提交者:
GitHub
10月 27, 2017
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #5103 from zchen0211/batch-norm-latest
Batch norm latest
上级
2a5edec0
52eb42cf
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
160 addition
and
48 deletion
+160
-48
paddle/operators/batch_norm_op.cu
paddle/operators/batch_norm_op.cu
+9
-5
python/paddle/v2/framework/tests/test_batch_norm_op.py
python/paddle/v2/framework/tests/test_batch_norm_op.py
+151
-43
未找到文件。
paddle/operators/batch_norm_op.cu
浏览文件 @
eaa41857
...
...
@@ -117,9 +117,6 @@ class BatchNormKernel<platform::GPUPlace, T> : public framework::OpKernel<T> {
math
::
SetConstant
<
platform
::
GPUPlace
,
T
>
functor
;
functor
(
ctx
.
device_context
(),
saved_mean
,
0
);
functor
(
ctx
.
device_context
(),
saved_variance
,
0
);
// FIXME(qiao) should not set zero self
functor
(
ctx
.
device_context
(),
mean_out
,
0
);
functor
(
ctx
.
device_context
(),
variance_out
,
0
);
auto
handle
=
ctx
.
cuda_device_context
().
cudnn_handle
();
...
...
@@ -211,8 +208,15 @@ class BatchNormGradKernel<platform::GPUPlace, T>
mode_
=
CUDNN_BATCHNORM_SPATIAL
;
#endif
std
::
vector
<
int
>
dims
=
{
N
,
C
,
H
,
W
,
D
};
std
::
vector
<
int
>
strides
=
{
H
*
W
*
C
*
D
,
1
,
W
*
D
*
C
,
D
*
C
,
C
};
std
::
vector
<
int
>
dims
;
std
::
vector
<
int
>
strides
;
if
(
tensor_format
==
TensorFormat
::
NCHW
)
{
dims
=
{
N
,
C
,
H
,
W
,
D
};
strides
=
{
C
*
H
*
W
*
D
,
H
*
W
*
D
,
W
*
D
,
D
,
1
};
}
else
{
dims
=
{
N
,
C
,
H
,
W
,
D
};
strides
=
{
H
*
W
*
C
*
D
,
1
,
W
*
D
*
C
,
D
*
C
,
C
};
}
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
data_desc_
,
CudnnDataType
<
T
>::
type
,
x_dims
.
size
()
>
3
?
x_dims
.
size
()
:
4
,
dims
.
data
(),
strides
.
data
()));
...
...
python/paddle/v2/framework/tests/test_batch_norm_op.py
浏览文件 @
eaa41857
...
...
@@ -21,16 +21,36 @@ def get_backward_op(scope, op, no_grad_set):
def
_reference_training
(
x
,
scale
,
offset
,
epsilon
,
data_format
):
if
data_format
!=
"NHWC"
:
raise
ValueError
(
"data_format must be NHWC, got %s."
%
data_format
)
x_square
=
x
*
x
x_square_sum
=
np
.
sum
(
x_square
,
(
0
,
1
,
2
))
x_sum
=
np
.
sum
(
x
,
axis
=
(
0
,
1
,
2
))
element_count
=
np
.
size
(
x
)
/
int
(
np
.
shape
(
x
)[
-
1
])
mean
=
x_sum
/
element_count
var
=
x_square_sum
/
element_count
-
mean
*
mean
normalized
=
(
x
-
mean
)
/
np
.
sqrt
(
var
+
epsilon
)
return
(
normalized
*
scale
+
offset
),
mean
,
var
if
data_format
==
"NCHW"
:
n
,
c
,
h
,
w
=
x
.
shape
x_square
=
x
*
x
x_square_sum
=
np
.
sum
(
x_square
,
(
0
,
2
,
3
))
x_sum
=
np
.
sum
(
x
,
axis
=
(
0
,
2
,
3
))
element_count
=
np
.
size
(
x
)
/
int
(
np
.
shape
(
x
)[
1
])
mean
=
x_sum
/
element_count
var
=
x_square_sum
/
element_count
-
mean
*
mean
mean_tile
=
np
.
reshape
(
mean
,
(
1
,
c
,
1
,
1
))
mean_tile
=
np
.
tile
(
mean_tile
,
(
n
,
1
,
h
,
w
))
var_tile
=
np
.
reshape
(
var
,
(
1
,
c
,
1
,
1
))
var_tile
=
np
.
tile
(
var_tile
,
(
n
,
1
,
h
,
w
))
normalized
=
(
x
-
mean_tile
)
/
np
.
sqrt
(
var_tile
+
epsilon
)
scale_tile
=
np
.
reshape
(
scale
,
(
1
,
c
,
1
,
1
))
scale_tile
=
np
.
tile
(
scale_tile
,
(
n
,
1
,
h
,
w
))
offset_tile
=
np
.
reshape
(
offset
,
(
1
,
c
,
1
,
1
))
offset_tile
=
np
.
reshape
(
offset_tile
,
(
1
,
c
,
1
,
1
))
y
=
normalized
*
scale_tile
+
offset_tile
return
y
,
mean
,
var
elif
data_format
==
"NHWC"
:
x_square
=
x
*
x
x_square_sum
=
np
.
sum
(
x_square
,
(
0
,
1
,
2
))
x_sum
=
np
.
sum
(
x
,
axis
=
(
0
,
1
,
2
))
element_count
=
np
.
size
(
x
)
/
int
(
np
.
shape
(
x
)[
-
1
])
mean
=
x_sum
/
element_count
var
=
x_square_sum
/
element_count
-
mean
*
mean
normalized
=
(
x
-
mean
)
/
np
.
sqrt
(
var
+
epsilon
)
return
(
normalized
*
scale
+
offset
),
mean
,
var
else
:
raise
ValueError
(
"Unknown data order."
)
def
_reference_grad
(
x
,
grad_y
,
scale
,
mean
,
var
,
epsilon
,
data_format
):
...
...
@@ -43,8 +63,13 @@ def _reference_grad(x, grad_y, scale, mean, var, epsilon, data_format):
# grad_x =
# 1/N * scale * rsqrt(var + epsilon) * (N * grad_y - sum(grad_y) -
# (x - mean) * sum(grad_y * (x - mean)) / (var + epsilon))
if
data_format
!=
"NHWC"
:
raise
ValueError
(
"data_format must be NHWC, got %s."
%
data_format
)
# transfer from (N, C, H, W) to (N, H, W, C) to simplify computation
if
data_format
==
"NCHW"
:
x
=
np
.
transpose
(
x
,
(
0
,
2
,
3
,
1
))
grad_y
=
np
.
transpose
(
grad_y
,
(
0
,
2
,
3
,
1
))
# raise ValueError("data_format must be NHWC, got %s." % data_format)
grad_x
=
scale
*
(
grad_y
-
np
.
mean
(
grad_y
,
axis
=
(
0
,
1
,
2
))
-
(
x
-
mean
)
*
np
.
mean
(
grad_y
*
(
x
-
mean
),
axis
=
(
0
,
1
,
2
))
/
...
...
@@ -52,6 +77,12 @@ def _reference_grad(x, grad_y, scale, mean, var, epsilon, data_format):
grad_scale
=
np
.
sum
(
grad_y
*
(
x
-
mean
)
/
np
.
sqrt
(
var
+
epsilon
),
axis
=
(
0
,
1
,
2
))
grad_offset
=
np
.
sum
(
grad_y
,
axis
=
(
0
,
1
,
2
))
# transfer back to N, C, H, W
if
data_format
==
"NCHW"
:
grad_x
=
np
.
transpose
(
grad_x
,
(
0
,
3
,
1
,
2
))
x
=
np
.
transpose
(
x
,
(
0
,
3
,
1
,
2
))
grad_y
=
np
.
transpose
(
grad_y
,
(
0
,
3
,
1
,
2
))
return
grad_x
,
grad_scale
,
grad_offset
...
...
@@ -65,61 +96,135 @@ def create_or_get_tensor(scope, var_name, var, place):
return
tensor
def
set_output_grad
(
scope
,
outputs
,
place
):
def
__set_tensor__
(
name
):
def
set_output_grad
(
scope
,
outputs
,
place
,
feed_dict
=
None
):
def
__set_tensor__
(
name
,
data
=
None
):
out_tensor
=
scope
.
find_var
(
name
).
get_tensor
()
grad_tensor
=
scope
.
var
(
grad_var_name
(
name
)).
get_tensor
()
out_dtype
=
out_tensor
.
dtype
()
if
out_dtype
==
core
.
DataType
.
FP64
:
data
=
np
.
ones
(
out_tensor
.
shape
(),
dtype
=
np
.
float64
)
elif
out_dtype
==
core
.
DataType
.
FP32
:
data
=
np
.
ones
(
out_tensor
.
shape
(),
dtype
=
np
.
float32
)
else
:
raise
ValueError
(
"Not supported data type "
+
str
(
out_dtype
))
if
data
is
None
:
if
out_dtype
==
core
.
DataType
.
FP64
:
data
=
np
.
ones
(
out_tensor
.
shape
(),
dtype
=
np
.
float64
)
elif
out_dtype
==
core
.
DataType
.
FP32
:
data
=
np
.
ones
(
out_tensor
.
shape
(),
dtype
=
np
.
float32
)
else
:
raise
ValueError
(
"Not supported data type "
+
str
(
out_dtype
))
grad_tensor
.
set
(
data
,
place
)
for
output
in
outputs
:
__set_tensor__
(
output
)
data
=
None
if
output
in
feed_dict
:
data
=
feed_dict
[
output
]
__set_tensor__
(
output
,
data
)
class
TestBatchNormOp
(
OpTest
):
def
__assert_close
(
self
,
tensor
,
np_array
,
msg
,
atol
=
1e-4
):
self
.
assertTrue
(
np
.
allclose
(
np
.
array
(
tensor
),
np_array
,
atol
=
atol
),
msg
)
def
test_forward_backward
(
self
):
# attr
def
test_python
(
self
):
data_format
=
"NHWC"
epsilon
=
0.00001
momentum
=
0.9
channel_num
=
2
x_shape
=
[
2
,
3
,
4
,
channel_num
]
scale_shape
=
[
channel_num
]
# N, H, W, C: 2, 3, 4, 2
n
,
h
,
w
,
c
=
2
,
3
,
4
,
2
x_shape
=
[
n
,
h
,
w
,
c
]
scale_shape
=
[
c
]
# input
x_val
=
np
.
random
.
random_sample
(
x_shape
).
astype
(
np
.
float32
)
scale_val
=
np
.
random
.
random_sample
(
scale_shape
).
astype
(
np
.
float32
)
bias_val
=
np
.
random
.
random_sample
(
scale_shape
).
astype
(
np
.
float32
)
mean
=
np
.
zeros
(
scale_shape
).
astype
(
np
.
float32
)
variance
=
np
.
zero
s
(
scale_shape
).
astype
(
np
.
float32
)
variance
=
np
.
one
s
(
scale_shape
).
astype
(
np
.
float32
)
# run forward
y_out
,
saved_mean
,
var_ref
=
_reference_training
(
x_val
,
scale_val
,
bias_val
,
epsilon
,
data_format
)
x_val
,
scale_val
,
bias_val
,
epsilon
,
"NHWC"
)
#
mean_out
=
saved_mean
*
(
1.
-
momentum
)
+
momentum
*
mean
variance_out
=
var_ref
*
(
1.
-
momentum
)
+
momentum
*
variance
saved_variance
=
1.
/
np
.
sqrt
(
var_ref
+
epsilon
)
# running N, C, H, W case
# should produce the same results
x_shape2
=
[
n
,
c
,
h
,
w
]
x_val2
=
np
.
transpose
(
x_val
,
(
0
,
3
,
1
,
2
))
y_out2
,
saved_mean2
,
var_ref2
=
_reference_training
(
x_val2
,
scale_val
,
bias_val
,
epsilon
,
"NCHW"
)
self
.
__assert_close
(
saved_mean
,
saved_mean2
,
"batch mean"
)
self
.
__assert_close
(
var_ref
,
var_ref2
,
"batch variance"
)
# transfer (N, C, H, W) back to (N, H, W, C)
y_out2_trans
=
np
.
transpose
(
y_out2
,
(
0
,
2
,
3
,
1
))
self
.
__assert_close
(
y_out
,
y_out2_trans
,
"batch variance"
)
print
'python: NHWC, NCHW, forward checking passed'
# test backward now
# NHWC
self
.
y_grad
=
np
.
random
.
random_sample
(
x_shape
).
astype
(
np
.
float32
)
y_grad
=
self
.
y_grad
# y_grad = np.ones(x_shape).astype(np.float32)
x_grad_ref
,
scale_grad_ref
,
bias_grad_ref
=
_reference_grad
(
x_val
,
y_grad
,
scale_val
,
saved_mean
,
var_ref
,
epsilon
,
"NHWC"
)
# run backward
mean_out
=
saved_mean
*
(
1
-
momentum
)
variance_out
=
var_ref
*
(
1
-
momentum
)
saved_variance
=
1
/
np
.
sqrt
(
var_ref
+
epsilon
)
# NCHW
y_grad2
=
np
.
transpose
(
y_grad
,
(
0
,
3
,
1
,
2
))
# y_grad2 = np.ones(x_shape2).astype(np.float32)
x_grad_ref2
,
scale_grad_ref2
,
bias_grad_ref2
=
_reference_grad
(
x_val2
,
y_grad2
,
scale_val
,
saved_mean2
,
var_ref2
,
epsilon
,
"NCHW"
)
# for gradient test
y_grad
=
np
.
ones
(
x_shape
).
astype
(
np
.
float32
)
x_grad_ref
,
scale_grad_ref
,
bias_grad_ref
=
_reference_grad
(
x_val
,
y_grad
,
scale_val
,
saved_mean
,
var_ref
,
epsilon
,
data_format
)
self
.
__assert_close
(
scale_grad_ref
,
scale_grad_ref2
,
"scale gradient"
)
self
.
__assert_close
(
bias_grad_ref
,
bias_grad_ref2
,
"bias gradient"
)
x_grad_transpose
=
np
.
transpose
(
x_grad_ref2
,
(
0
,
2
,
3
,
1
))
self
.
__assert_close
(
x_grad_ref
,
x_grad_transpose
,
"x gradient"
)
print
'python: NHWC, NCHW, backward checking passed'
def
test_forward_backward
(
self
):
def
test_with_place
(
place
,
tensor_format
):
# attr
epsilon
=
0.00001
momentum
=
0.9
# N, H, W, C: 12, 3, 4, 2
n
,
h
,
w
,
c
=
2
,
3
,
4
,
2
if
data_format
==
"NHWC"
:
x_shape
=
[
n
,
h
,
w
,
c
]
elif
data_format
==
"NCHW"
:
x_shape
=
[
n
,
c
,
h
,
w
]
else
:
raise
ValueError
(
"Unknown data type."
)
scale_shape
=
[
c
]
x_val
=
np
.
random
.
random_sample
(
x_shape
).
astype
(
np
.
float32
)
scale_val
=
np
.
random
.
random_sample
(
scale_shape
).
astype
(
np
.
float32
)
bias_val
=
np
.
random
.
random_sample
(
scale_shape
).
astype
(
np
.
float32
)
mean
=
np
.
zeros
(
scale_shape
).
astype
(
np
.
float32
)
variance
=
np
.
ones
(
scale_shape
).
astype
(
np
.
float32
)
# run forward
y_out
,
saved_mean
,
var_ref
=
_reference_training
(
x_val
,
scale_val
,
bias_val
,
epsilon
,
data_format
)
# update moving mean and variance
mean_out
=
saved_mean
*
(
1.
-
momentum
)
+
momentum
*
mean
variance_out
=
var_ref
*
(
1.
-
momentum
)
+
momentum
*
variance
saved_variance
=
1.
/
np
.
sqrt
(
var_ref
+
epsilon
)
# for gradient test
# y_grad = np.ones(x_shape).astype(np.float32)
y_grad
=
np
.
zeros
(
x_shape
).
astype
(
np
.
float32
)
y_grad
[
0
,
0
,
0
,
0
]
=
1.
# y_grad = np.random.random_sample(x_shape).astype(np.float32)
x_grad_ref
,
scale_grad_ref
,
bias_grad_ref
=
_reference_grad
(
x_val
,
y_grad
,
scale_val
,
saved_mean
,
var_ref
,
epsilon
,
data_format
)
def
test_with_place
(
place
):
scope
=
core
.
Scope
()
# create input
...
...
@@ -157,7 +262,7 @@ class TestBatchNormOp(OpTest):
SavedVariance
=
"saved_variance"
,
# attrs
is_test
=
False
,
tensor_format
=
data
_format
,
tensor_format
=
tensor
_format
,
momentum
=
momentum
,
epsilon
=
epsilon
)
...
...
@@ -170,20 +275,21 @@ class TestBatchNormOp(OpTest):
self
.
__assert_close
(
saved_variance_tensor
,
saved_variance
,
"saved_variance"
)
self
.
__assert_close
(
mean_out_tensor
,
mean_out
,
"mean_out"
)
# FIXME(qiao) figure out why with cuDNN variance_out have a higher error rate
if
isinstance
(
place
,
core
.
GPUPlace
):
atol
=
5e-2
else
:
atol
=
1e-4
self
.
__assert_close
(
variance_out_tensor
,
variance_out
,
"variance_out"
,
atol
)
print
"op test forward passed: "
,
str
(
place
),
tensor_format
# run backward
batch_norm_op_grad
=
get_backward_op
(
scope
,
batch_norm_op
,
set
())
set_output_grad
(
scope
,
[
"y_out"
,
"mean"
,
"variance"
,
"saved_mean"
,
"saved_variance"
],
place
)
place
,
feed_dict
=
{
"y_out"
:
y_grad
})
batch_norm_op_grad
.
run
(
scope
,
ctx
)
x_grad_tensor
=
create_or_get_tensor
(
scope
,
...
...
@@ -200,12 +306,14 @@ class TestBatchNormOp(OpTest):
self
.
__assert_close
(
x_grad_tensor
,
x_grad_ref
,
"x_grad"
)
self
.
__assert_close
(
scale_grad_tensor
,
scale_grad_ref
,
"scale_grad"
)
self
.
__assert_close
(
bias_grad_tensor
,
bias_grad_ref
,
"bias_grad"
)
print
"op test backward passed: "
,
str
(
place
),
tensor_format
places
=
[
core
.
CPUPlace
()]
if
core
.
is_compile_gpu
()
and
core
.
op_support_gpu
(
"batch_norm"
):
places
.
append
(
core
.
GPUPlace
(
0
))
for
place
in
places
:
test_with_place
(
place
)
for
data_format
in
[
"NCHW"
,
"NHWC"
]:
test_with_place
(
place
,
data_format
)
if
__name__
==
'__main__'
:
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录