Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
666efc23
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2299
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
666efc23
编写于
2月 03, 2021
作者:
A
AshburnLee
提交者:
GitHub
2月 03, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Call new cudnn batch norm API regardless of data type and data layout (#30157)
上级
5c8455d6
变更
7
显示空白变更内容
内联
并排
Showing
7 changed file
with
156 addition
and
187 deletion
+156
-187
paddle/fluid/operators/batch_norm_op.cu
paddle/fluid/operators/batch_norm_op.cu
+137
-136
paddle/fluid/operators/inplace_abn_op.cc
paddle/fluid/operators/inplace_abn_op.cc
+3
-0
python/paddle/fluid/dygraph/nn.py
python/paddle/fluid/dygraph/nn.py
+3
-13
python/paddle/fluid/layers/nn.py
python/paddle/fluid/layers/nn.py
+5
-23
python/paddle/fluid/tests/unittests/test_batch_norm_op.py
python/paddle/fluid/tests/unittests/test_batch_norm_op.py
+2
-10
python/paddle/fluid/tests/unittests/test_sync_batch_norm_op.py
...n/paddle/fluid/tests/unittests/test_sync_batch_norm_op.py
+2
-2
python/paddle/nn/functional/norm.py
python/paddle/nn/functional/norm.py
+4
-3
未找到文件。
paddle/fluid/operators/batch_norm_op.cu
浏览文件 @
666efc23
...
...
@@ -114,7 +114,7 @@ class BatchNormKernel<platform::CUDADeviceContext, T>
<<
"CUDNN_BN_MIN_EPSILON instead."
;
}
epsilon
=
std
::
max
(
epsilon
,
CUDNN_BN_MIN_EPSILON
);
#if CUDNN_VERSION_MIN(7, 0,
0
)
#if CUDNN_VERSION_MIN(7, 0,
1
)
if
(
FLAGS_cudnn_batchnorm_spatial_persistent
)
{
mode_
=
CUDNN_BATCHNORM_SPATIAL_PERSISTENT
;
}
else
{
...
...
@@ -122,7 +122,7 @@ class BatchNormKernel<platform::CUDADeviceContext, T>
}
#else
mode_
=
CUDNN_BATCHNORM_SPATIAL
;
#endif
#endif
// CUDNN_VERSION_MIN(7, 0, 1)
VLOG
(
3
)
<<
"Setting descriptors."
;
std
::
vector
<
int
>
dims
;
...
...
@@ -151,7 +151,10 @@ class BatchNormKernel<platform::CUDADeviceContext, T>
auto
handle
=
dev_ctx
.
cudnn_handle
();
// Now, depending on whether we are running test or not, we have two paths.
if
(
test_mode
||
use_global_stats
)
{
// It is training mode when it's not reference AND not using pre-trained
// model.
bool
training
=
!
test_mode
&&
!
use_global_stats
;
if
(
!
training
)
{
// only when test we use input to do computation.
const
auto
*
est_mean
=
ctx
.
Input
<
Tensor
>
(
"Mean"
);
const
auto
*
est_var
=
ctx
.
Input
<
Tensor
>
(
"Variance"
);
...
...
@@ -234,7 +237,6 @@ class BatchNormKernel<platform::CUDADeviceContext, T>
bool
called
=
false
;
#if CUDNN_VERSION_MIN(7, 4, 1)
if
(
compute_format
==
DataLayout
::
kNHWC
)
{
called
=
true
;
size_t
workspace_size
=
0
;
size_t
reserve_space_size
=
0
;
...
...
@@ -281,11 +283,11 @@ class BatchNormKernel<platform::CUDADeviceContext, T>
ctx
.
GetPlace
(),
transformed_x
.
type
(),
workspace_size
);
PADDLE_ENFORCE_CUDA_SUCCESS
(
platform
::
dynload
::
cudnnBatchNormalizationForwardTrainingEx
(
handle
,
mode_
,
CUDNN_BATCHNORM_OPS_BN
,
CudnnDataType
<
T
>::
kOne
(),
CudnnDataType
<
T
>::
kZero
()
,
data_desc_
,
transformed_x
.
template
data
<
T
>(),
nullptr
,
nullptr
,
data_desc_
,
transformed_y
.
template
data
<
T
>()
,
bn_param_desc_
,
scale
->
template
data
<
BatchNormParamType
<
T
>
>
(),
handle
,
mode_
,
CUDNN_BATCHNORM_OPS_BN
,
CudnnDataType
<
T
>::
kOne
()
,
CudnnDataType
<
T
>::
kZero
(),
data_desc_
,
transformed_x
.
template
data
<
T
>(),
nullptr
,
nullptr
,
data_desc_
,
transformed_y
.
template
data
<
T
>(),
bn_param_desc_
,
scale
->
template
data
<
BatchNormParamType
<
T
>
>
(),
bias
->
template
data
<
BatchNormParamType
<
T
>
>
(),
this_factor
,
mean_out
->
template
mutable_data
<
BatchNormParamType
<
T
>
>
(
ctx
.
GetPlace
()),
...
...
@@ -298,8 +300,7 @@ class BatchNormKernel<platform::CUDADeviceContext, T>
ctx
.
GetPlace
()),
nullptr
,
workspace_ptr
,
workspace_size
,
reserve_space_ptr
,
reserve_space_size
));
}
#endif
#endif // CUDNN_VERSION_MIN(7, 4, 1)
if
(
!
called
)
{
PADDLE_ENFORCE_CUDA_SUCCESS
(
platform
::
dynload
::
cudnnBatchNormalizationForwardTraining
(
...
...
@@ -640,7 +641,7 @@ class BatchNormGradKernel<platform::CUDADeviceContext, T>
<<
"CUDNN_BN_MIN_EPSILON instead."
;
}
epsilon
=
std
::
max
(
epsilon
,
CUDNN_BN_MIN_EPSILON
);
#if CUDNN_VERSION_MIN(7, 0,
0
)
#if CUDNN_VERSION_MIN(7, 0,
1
)
if
(
FLAGS_cudnn_batchnorm_spatial_persistent
)
{
mode_
=
CUDNN_BATCHNORM_SPATIAL_PERSISTENT
;
}
else
{
...
...
@@ -648,7 +649,7 @@ class BatchNormGradKernel<platform::CUDADeviceContext, T>
}
#else
mode_
=
CUDNN_BATCHNORM_SPATIAL
;
#endif
#endif
// CUDNN_VERSION_MIN(7, 0, 1)
PADDLE_ENFORCE_CUDA_SUCCESS
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
data_desc_
,
CudnnDataType
<
T
>::
type
,
...
...
@@ -672,10 +673,10 @@ class BatchNormGradKernel<platform::CUDADeviceContext, T>
num
,
transformed_x
.
data
<
T
>
(),
grid2
,
block
,
stream
);
}
// This branch calls CUDNN APIs
if
(
d_scale
&&
d_bias
)
{
bool
called
=
false
;
#if CUDNN_VERSION_MIN(7, 4, 1)
if
(
compute_format
==
DataLayout
::
kNHWC
)
{
called
=
true
;
size_t
workspace_size
=
0
;
void
*
workspace_ptr
=
nullptr
;
...
...
@@ -738,8 +739,7 @@ class BatchNormGradKernel<platform::CUDADeviceContext, T>
/*reserveSpace=*/
const_cast
<
T
*>
(
reserve_space
->
template
data
<
T
>()),
/*reserveSpaceSizeInBytes=*/
reserve_space_size
));
}
#endif
#endif // CUDNN_VERSION_MIN(7, 4, 1)
if
(
!
called
)
{
PADDLE_ENFORCE_CUDA_SUCCESS
(
platform
::
dynload
::
cudnnBatchNormalizationBackward
(
...
...
@@ -764,6 +764,7 @@ class BatchNormGradKernel<platform::CUDADeviceContext, T>
ctx
,
&
transformed_d_x
,
d_x
);
}
}
else
{
// This branch call CUDA kernels
if
(
compute_format
==
DataLayout
::
kNCHW
)
{
if
(
d_x
)
{
BNBackwardData
<
T
,
block
,
framework
::
DataLayout
::
kNCHW
><<<
...
...
paddle/fluid/operators/inplace_abn_op.cc
浏览文件 @
666efc23
...
...
@@ -178,6 +178,9 @@ class InplaceABNOpGradMaker : public framework::SingleGradOpMaker<T> {
op
->
SetInput
(
"Bias"
,
this
->
Input
(
"Bias"
));
op
->
SetInput
(
"SavedMean"
,
this
->
Output
(
"SavedMean"
));
op
->
SetInput
(
"SavedVariance"
,
this
->
Output
(
"SavedVariance"
));
if
(
this
->
HasOutput
(
"ReserveSpace"
))
{
op
->
SetInput
(
"ReserveSpace"
,
this
->
Output
(
"ReserveSpace"
));
}
// used when setting use_global_stats True during training
if
(
BOOST_GET_CONST
(
bool
,
this
->
GetAttr
(
"use_global_stats"
)))
{
...
...
python/paddle/fluid/dygraph/nn.py
浏览文件 @
666efc23
...
...
@@ -1309,12 +1309,6 @@ class BatchNorm(layers.Layer):
dtype
=
self
.
_dtype
)
self
.
_variance
.
stop_gradient
=
True
self
.
_has_reserve_space
=
False
if
data_layout
==
'NHWC'
:
flag
=
os
.
environ
.
get
(
'FLAGS_cudnn_batchnorm_spatial_persistent'
)
if
flag
is
not
None
and
flag
.
lower
()
in
[
'true'
,
'1'
]:
self
.
_has_reserve_space
=
True
self
.
_in_place
=
in_place
self
.
_data_layout
=
data_layout
self
.
_momentum
=
momentum
...
...
@@ -1341,7 +1335,6 @@ class BatchNorm(layers.Layer):
batch_norm_out
,
_
,
_
,
_
,
_
,
_
=
core
.
ops
.
batch_norm
(
input
,
self
.
weight
,
self
.
bias
,
self
.
_mean
,
self
.
_variance
,
mean_out
,
variance_out
,
*
attrs
)
return
dygraph_utils
.
_append_activation_in_dygraph
(
batch_norm_out
,
act
=
self
.
_act
,
use_mkldnn
=
self
.
_use_mkldnn
)
...
...
@@ -1371,11 +1364,8 @@ class BatchNorm(layers.Layer):
dtype
=
self
.
_dtype
,
stop_gradient
=
True
)
saved_variance
=
self
.
_helper
.
create_variable_for_type_inference
(
dtype
=
self
.
_dtype
,
stop_gradient
=
True
)
reserve_space
=
None
if
self
.
_has_reserve_space
:
reserve_space
=
self
.
_helper
.
create_variable_for_type_inference
(
dtype
=
core
.
VarDesc
.
VarType
.
FP16
,
stop_gradient
=
True
)
dtype
=
self
.
_helper
.
input_dtype
(
input
)
,
stop_gradient
=
True
)
batch_norm_out
=
input
if
self
.
_in_place
else
self
.
_helper
.
create_variable_for_type_inference
(
self
.
_dtype
)
...
...
@@ -1388,7 +1378,7 @@ class BatchNorm(layers.Layer):
"SavedVariance"
:
[
saved_variance
]
}
if
reserve_space
is
not
None
:
outputs
[
"ReserveSpace"
]
=
reserve_space
outputs
[
"ReserveSpace"
]
=
[
reserve_space
]
self
.
_helper
.
append_op
(
type
=
"batch_norm"
,
inputs
=
inputs
,
outputs
=
outputs
,
attrs
=
attrs
)
...
...
python/paddle/fluid/layers/nn.py
浏览文件 @
666efc23
...
...
@@ -2792,12 +2792,6 @@ def batch_norm(input,
'batch_norm')
dtype = helper.input_dtype()
has_reserve_space = False
if data_layout == 'NHWC':
flag = os.environ.get('FLAGS_cudnn_batchnorm_spatial_persistent')
if flag is not None and flag.lower() in ['true', '1']:
has_reserve_space = True
# use fp32 for bn parameter
if dtype == core.VarDesc.VarType.FP16:
dtype = core.VarDesc.VarType.FP32
...
...
@@ -2845,17 +2839,16 @@ def batch_norm(input,
# create output
# mean and mean_out share the same memory
mean_out = mean
# variance and variance
out share the same memory
# variance and variance
_
out share the same memory
variance_out = variance
saved_mean = helper.create_variable_for_type_inference(
dtype=dtype, stop_gradient=True)
saved_variance = helper.create_variable_for_type_inference(
dtype=dtype, stop_gradient=True)
reserve_space = None
if
has_reserve_space
:
if
not is_test
:
reserve_space = helper.create_variable_for_type_inference(
dtype=
core.VarDesc.VarType.FP16
, stop_gradient=True)
dtype=
helper.input_dtype()
, stop_gradient=True)
batch_norm_out = input if in_place else \
helper.create_variable_for_type_inference(dtype)
...
...
@@ -2998,12 +2991,6 @@ def inplace_abn(input,
'inplace_abn')
dtype = helper.input_dtype()
has_reserve_space = False
if data_layout == 'NHWC':
flag = os.environ.get('FLAGS_cudnn_batchnorm_spatial_persistent')
if flag is not None and flag.lower() in ['true', '1']:
has_reserve_space = True
input_shape = input.shape
if data_layout == 'NCHW':
channel_num = input_shape[1]
...
...
@@ -3053,12 +3040,8 @@ def inplace_abn(input,
dtype=dtype, stop_gradient=True)
saved_variance = helper.create_variable_for_type_inference(
dtype=dtype, stop_gradient=True)
reserve_space = None
if has_reserve_space:
reserve_space = helper.create_variable_for_type_inference(
dtype=core.VarDesc.VarType.FP16, stop_gradient=True)
dtype=dtype, stop_gradient=True)
batch_norm_out = input
inputs = {
...
...
@@ -3082,7 +3065,6 @@ def inplace_abn(input,
inputs['MomemtumTensor'] = momentum
else:
attrs['momentum'] = momentum
outputs = {
"Y": batch_norm_out,
"MeanOut": mean_out,
...
...
python/paddle/fluid/tests/unittests/test_batch_norm_op.py
浏览文件 @
666efc23
...
...
@@ -440,16 +440,8 @@ class TestBatchNormOpTraining(unittest.TestCase):
"SavedMean"
:
block
.
var
(
'saved_mean'
),
"SavedVariance"
:
block
.
var
(
'saved_variance'
)
}
has_reserve_space
=
False
if
data_format
==
'NHWC'
:
flag
=
os
.
environ
.
get
(
'FLAGS_cudnn_batchnorm_spatial_persistent'
)
if
flag
is
not
None
and
flag
.
lower
()
in
[
'true'
,
'1'
]:
has_reserve_space
=
True
if
has_reserve_space
:
block
.
create_var
(
name
=
"reserve_space"
,
dtype
=
'float16'
)
block
.
create_var
(
name
=
"reserve_space"
,
dtype
=
'float32'
)
outputs
[
"ReserveSpace"
]
=
block
.
var
(
'reserve_space'
)
del
os
.
environ
[
'FLAGS_cudnn_batchnorm_spatial_persistent'
]
bn_op
=
block
.
append_op
(
type
=
"batch_norm"
,
inputs
=
inputs
,
...
...
python/paddle/fluid/tests/unittests/test_sync_batch_norm_op.py
浏览文件 @
666efc23
...
...
@@ -122,7 +122,7 @@ class TestSyncBatchNormOpTraining(unittest.TestCase):
if
not
only_forward
:
others
=
[
'batch_norm_0.tmp_0'
,
'batch_norm_0.tmp_1'
,
'bn_scale@GRAD'
,
'bn_bias@GRAD'
,
'batch_norm_0.tmp_
2
@GRAD'
,
'conv2d_0.tmp_0@GRAD'
'bn_bias@GRAD'
,
'batch_norm_0.tmp_
3
@GRAD'
,
'conv2d_0.tmp_0@GRAD'
]
fetch_names
+=
others
bn_fetches
=
exe
.
run
(
program
=
main
,
...
...
@@ -142,7 +142,7 @@ class TestSyncBatchNormOpTraining(unittest.TestCase):
if
not
only_forward
:
others
=
[
'batch_norm_0.tmp_0'
,
'batch_norm_0.tmp_1'
,
'bn_scale@GRAD'
,
'bn_bias@GRAD'
,
'batch_norm_0.tmp_
2
@GRAD'
,
'conv2d_0.tmp_0@GRAD'
'bn_bias@GRAD'
,
'batch_norm_0.tmp_
3
@GRAD'
,
'conv2d_0.tmp_0@GRAD'
]
fetch_names
+=
others
for
nm
in
fetch_names
:
...
...
python/paddle/nn/functional/norm.py
浏览文件 @
666efc23
...
...
@@ -166,7 +166,6 @@ def batch_norm(x,
batch_norm_out = paddle.nn.functional.batch_norm(x, rm, rv, w, b)
print(batch_norm_out)
"""
assert
len
(
x
.
shape
)
>=
2
,
"input dim must be larger than 1"
# input ad out must share the memory
...
...
@@ -196,7 +195,6 @@ def batch_norm(x,
batch_norm_out
,
_
,
_
,
_
,
_
,
_
=
core
.
ops
.
batch_norm
(
x
,
weight
,
bias
,
running_mean
,
running_var
,
mean_out
,
variance_out
,
*
attrs
)
return
dygraph_utils
.
_append_activation_in_dygraph
(
batch_norm_out
,
act
=
None
)
...
...
@@ -230,13 +228,16 @@ def batch_norm(x,
saved_variance
=
helper
.
create_variable_for_type_inference
(
dtype
=
dtype
,
stop_gradient
=
True
)
batch_norm_out
=
helper
.
create_variable_for_type_inference
(
dtype
)
reserve_space
=
helper
.
create_variable_for_type_inference
(
dtype
=
x
.
dtype
,
stop_gradient
=
True
)
outputs
=
{
"Y"
:
[
batch_norm_out
],
"MeanOut"
:
[
running_mean
],
"VarianceOut"
:
[
running_var
],
"SavedMean"
:
[
saved_mean
],
"SavedVariance"
:
[
saved_variance
]
"SavedVariance"
:
[
saved_variance
],
"ReserveSpace"
:
[
reserve_space
]
}
helper
.
append_op
(
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录