Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
666efc23
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
666efc23
编写于
2月 03, 2021
作者:
A
AshburnLee
提交者:
GitHub
2月 03, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Call new cudnn batch norm API regardless of data type and data layout (#30157)
上级
5c8455d6
变更
7
隐藏空白更改
内联
并排
Showing
7 changed file
with
156 addition
and
187 deletion
+156
-187
paddle/fluid/operators/batch_norm_op.cu
paddle/fluid/operators/batch_norm_op.cu
+137
-136
paddle/fluid/operators/inplace_abn_op.cc
paddle/fluid/operators/inplace_abn_op.cc
+3
-0
python/paddle/fluid/dygraph/nn.py
python/paddle/fluid/dygraph/nn.py
+3
-13
python/paddle/fluid/layers/nn.py
python/paddle/fluid/layers/nn.py
+5
-23
python/paddle/fluid/tests/unittests/test_batch_norm_op.py
python/paddle/fluid/tests/unittests/test_batch_norm_op.py
+2
-10
python/paddle/fluid/tests/unittests/test_sync_batch_norm_op.py
...n/paddle/fluid/tests/unittests/test_sync_batch_norm_op.py
+2
-2
python/paddle/nn/functional/norm.py
python/paddle/nn/functional/norm.py
+4
-3
未找到文件。
paddle/fluid/operators/batch_norm_op.cu
浏览文件 @
666efc23
...
...
@@ -114,7 +114,7 @@ class BatchNormKernel<platform::CUDADeviceContext, T>
<<
"CUDNN_BN_MIN_EPSILON instead."
;
}
epsilon
=
std
::
max
(
epsilon
,
CUDNN_BN_MIN_EPSILON
);
#if CUDNN_VERSION_MIN(7, 0,
0
)
#if CUDNN_VERSION_MIN(7, 0,
1
)
if
(
FLAGS_cudnn_batchnorm_spatial_persistent
)
{
mode_
=
CUDNN_BATCHNORM_SPATIAL_PERSISTENT
;
}
else
{
...
...
@@ -122,7 +122,7 @@ class BatchNormKernel<platform::CUDADeviceContext, T>
}
#else
mode_
=
CUDNN_BATCHNORM_SPATIAL
;
#endif
#endif
// CUDNN_VERSION_MIN(7, 0, 1)
VLOG
(
3
)
<<
"Setting descriptors."
;
std
::
vector
<
int
>
dims
;
...
...
@@ -151,7 +151,10 @@ class BatchNormKernel<platform::CUDADeviceContext, T>
auto
handle
=
dev_ctx
.
cudnn_handle
();
// Now, depending on whether we are running test or not, we have two paths.
if
(
test_mode
||
use_global_stats
)
{
// It is training mode when it's not reference AND not using pre-trained
// model.
bool
training
=
!
test_mode
&&
!
use_global_stats
;
if
(
!
training
)
{
// only when test we use input to do computation.
const
auto
*
est_mean
=
ctx
.
Input
<
Tensor
>
(
"Mean"
);
const
auto
*
est_var
=
ctx
.
Input
<
Tensor
>
(
"Variance"
);
...
...
@@ -234,72 +237,70 @@ class BatchNormKernel<platform::CUDADeviceContext, T>
bool
called
=
false
;
#if CUDNN_VERSION_MIN(7, 4, 1)
if
(
compute_format
==
DataLayout
::
kNHWC
)
{
called
=
true
;
size_t
workspace_size
=
0
;
size_t
reserve_space_size
=
0
;
void
*
reserve_space_ptr
=
nullptr
;
void
*
workspace_ptr
=
nullptr
;
Tensor
workspace_tensor
;
// Create reserve space and workspace for batch norm.
// Create tensor for each batchnorm op, it will be used in the
// backward. Thus this tensor shouldn't be temp.
auto
*
reserve_space
=
ctx
.
Output
<
Tensor
>
(
"ReserveSpace"
);
PADDLE_ENFORCE_NOT_NULL
(
reserve_space
,
platform
::
errors
::
NotFound
(
"The argument ReserveSpace of batch_norm op is not found."
));
// --------------- cudnn batchnorm workspace ---------------
PADDLE_ENFORCE_CUDA_SUCCESS
(
platform
::
dynload
::
cudnnGetBatchNormalizationForwardTrainingExWorkspaceSize
(
/*handle=*/
handle
,
/*mode=*/
mode_
,
/*bnIps=*/
CUDNN_BATCHNORM_OPS_BN
,
/*xDesc=*/
data_desc_
,
/*zDesc=*/
nullptr
,
/*yDesc=*/
data_desc_
,
/*bnScaleBiasMeanVarDesc=*/
bn_param_desc_
,
/*activationDesc=*/
nullptr
,
/*sizeInBytes=*/
&
workspace_size
));
// -------------- cudnn batchnorm reserve space --------------
PADDLE_ENFORCE_CUDA_SUCCESS
(
platform
::
dynload
::
cudnnGetBatchNormalizationTrainingExReserveSpaceSize
(
/*handle=*/
handle
,
/*mode=*/
mode_
,
/*bnOps=*/
CUDNN_BATCHNORM_OPS_BN
,
/*activationDesc=*/
nullptr
,
/*xDesc=*/
data_desc_
,
/*sizeInBytes=*/
&
reserve_space_size
));
reserve_space_ptr
=
reserve_space
->
mutable_data
(
ctx
.
GetPlace
(),
transformed_x
.
type
(),
reserve_space_size
);
workspace_ptr
=
workspace_tensor
.
mutable_data
(
ctx
.
GetPlace
(),
transformed_x
.
type
(),
workspace_size
);
PADDLE_ENFORCE_CUDA_SUCCESS
(
platform
::
dynload
::
cudnnBatchNormalizationForwardTrainingEx
(
handle
,
mode_
,
CUDNN_BATCHNORM_OPS_BN
,
CudnnDataType
<
T
>::
kOne
(),
CudnnDataType
<
T
>::
kZero
(),
data_desc_
,
transformed_x
.
template
data
<
T
>(),
nullptr
,
nullptr
,
data_desc_
,
transformed_y
.
template
data
<
T
>(),
bn_param_desc_
,
scale
->
template
data
<
BatchNormParamType
<
T
>
>
(),
bias
->
template
data
<
BatchNormParamType
<
T
>
>
(),
this_factor
,
mean_out
->
template
mutable_data
<
BatchNormParamType
<
T
>
>
(
ctx
.
GetPlace
()),
variance_out
->
template
mutable_data
<
BatchNormParamType
<
T
>
>
(
ctx
.
GetPlace
()),
epsilon
,
saved_mean
->
template
mutable_data
<
BatchNormParamType
<
T
>
>
(
ctx
.
GetPlace
()),
saved_variance
->
template
mutable_data
<
BatchNormParamType
<
T
>
>
(
ctx
.
GetPlace
()),
nullptr
,
workspace_ptr
,
workspace_size
,
reserve_space_ptr
,
reserve_space_size
));
}
#endif
called
=
true
;
size_t
workspace_size
=
0
;
size_t
reserve_space_size
=
0
;
void
*
reserve_space_ptr
=
nullptr
;
void
*
workspace_ptr
=
nullptr
;
Tensor
workspace_tensor
;
// Create reserve space and workspace for batch norm.
// Create tensor for each batchnorm op, it will be used in the
// backward. Thus this tensor shouldn't be temp.
auto
*
reserve_space
=
ctx
.
Output
<
Tensor
>
(
"ReserveSpace"
);
PADDLE_ENFORCE_NOT_NULL
(
reserve_space
,
platform
::
errors
::
NotFound
(
"The argument ReserveSpace of batch_norm op is not found."
));
// --------------- cudnn batchnorm workspace ---------------
PADDLE_ENFORCE_CUDA_SUCCESS
(
platform
::
dynload
::
cudnnGetBatchNormalizationForwardTrainingExWorkspaceSize
(
/*handle=*/
handle
,
/*mode=*/
mode_
,
/*bnIps=*/
CUDNN_BATCHNORM_OPS_BN
,
/*xDesc=*/
data_desc_
,
/*zDesc=*/
nullptr
,
/*yDesc=*/
data_desc_
,
/*bnScaleBiasMeanVarDesc=*/
bn_param_desc_
,
/*activationDesc=*/
nullptr
,
/*sizeInBytes=*/
&
workspace_size
));
// -------------- cudnn batchnorm reserve space --------------
PADDLE_ENFORCE_CUDA_SUCCESS
(
platform
::
dynload
::
cudnnGetBatchNormalizationTrainingExReserveSpaceSize
(
/*handle=*/
handle
,
/*mode=*/
mode_
,
/*bnOps=*/
CUDNN_BATCHNORM_OPS_BN
,
/*activationDesc=*/
nullptr
,
/*xDesc=*/
data_desc_
,
/*sizeInBytes=*/
&
reserve_space_size
));
reserve_space_ptr
=
reserve_space
->
mutable_data
(
ctx
.
GetPlace
(),
transformed_x
.
type
(),
reserve_space_size
);
workspace_ptr
=
workspace_tensor
.
mutable_data
(
ctx
.
GetPlace
(),
transformed_x
.
type
(),
workspace_size
);
PADDLE_ENFORCE_CUDA_SUCCESS
(
platform
::
dynload
::
cudnnBatchNormalizationForwardTrainingEx
(
handle
,
mode_
,
CUDNN_BATCHNORM_OPS_BN
,
CudnnDataType
<
T
>::
kOne
(),
CudnnDataType
<
T
>::
kZero
(),
data_desc_
,
transformed_x
.
template
data
<
T
>(),
nullptr
,
nullptr
,
data_desc_
,
transformed_y
.
template
data
<
T
>(),
bn_param_desc_
,
scale
->
template
data
<
BatchNormParamType
<
T
>
>
(),
bias
->
template
data
<
BatchNormParamType
<
T
>
>
(),
this_factor
,
mean_out
->
template
mutable_data
<
BatchNormParamType
<
T
>
>
(
ctx
.
GetPlace
()),
variance_out
->
template
mutable_data
<
BatchNormParamType
<
T
>
>
(
ctx
.
GetPlace
()),
epsilon
,
saved_mean
->
template
mutable_data
<
BatchNormParamType
<
T
>
>
(
ctx
.
GetPlace
()),
saved_variance
->
template
mutable_data
<
BatchNormParamType
<
T
>
>
(
ctx
.
GetPlace
()),
nullptr
,
workspace_ptr
,
workspace_size
,
reserve_space_ptr
,
reserve_space_size
));
#endif // CUDNN_VERSION_MIN(7, 4, 1)
if
(
!
called
)
{
PADDLE_ENFORCE_CUDA_SUCCESS
(
platform
::
dynload
::
cudnnBatchNormalizationForwardTraining
(
...
...
@@ -640,7 +641,7 @@ class BatchNormGradKernel<platform::CUDADeviceContext, T>
<<
"CUDNN_BN_MIN_EPSILON instead."
;
}
epsilon
=
std
::
max
(
epsilon
,
CUDNN_BN_MIN_EPSILON
);
#if CUDNN_VERSION_MIN(7, 0,
0
)
#if CUDNN_VERSION_MIN(7, 0,
1
)
if
(
FLAGS_cudnn_batchnorm_spatial_persistent
)
{
mode_
=
CUDNN_BATCHNORM_SPATIAL_PERSISTENT
;
}
else
{
...
...
@@ -648,7 +649,7 @@ class BatchNormGradKernel<platform::CUDADeviceContext, T>
}
#else
mode_
=
CUDNN_BATCHNORM_SPATIAL
;
#endif
#endif
// CUDNN_VERSION_MIN(7, 0, 1)
PADDLE_ENFORCE_CUDA_SUCCESS
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
data_desc_
,
CudnnDataType
<
T
>::
type
,
...
...
@@ -672,74 +673,73 @@ class BatchNormGradKernel<platform::CUDADeviceContext, T>
num
,
transformed_x
.
data
<
T
>
(),
grid2
,
block
,
stream
);
}
// This branch calls CUDNN APIs
if
(
d_scale
&&
d_bias
)
{
bool
called
=
false
;
#if CUDNN_VERSION_MIN(7, 4, 1)
if
(
compute_format
==
DataLayout
::
kNHWC
)
{
called
=
true
;
size_t
workspace_size
=
0
;
void
*
workspace_ptr
=
nullptr
;
Tensor
workspace_tensor
;
auto
reserve_space_size
=
reserve_space
->
memory_size
();
// --------------- cudnn batchnorm workspace ---------------
PADDLE_ENFORCE_CUDA_SUCCESS
(
platform
::
dynload
::
cudnnGetBatchNormalizationBackwardExWorkspaceSize
(
/*handle=*/
dev_ctx
.
cudnn_handle
(),
/*mode=*/
mode_
,
/*bnIps=*/
CUDNN_BATCHNORM_OPS_BN
,
/*xDesc=*/
data_desc_
,
/*yDesc=*/
data_desc_
,
/*dyDesc=*/
data_desc_
,
/*dzDesc=*/
nullptr
,
/*dxDesc=*/
data_desc_
,
/*bnScaleBiasMeanVarDesc=*/
bn_param_desc_
,
/*activationDesc=*/
nullptr
,
/*sizeInBytes=*/
&
workspace_size
));
workspace_ptr
=
workspace_tensor
.
mutable_data
(
ctx
.
GetPlace
(),
transformed_x
.
type
(),
workspace_size
);
PADDLE_ENFORCE_CUDA_SUCCESS
(
platform
::
dynload
::
cudnnBatchNormalizationBackwardEx
(
/*handle=*/
dev_ctx
.
cudnn_handle
(),
/*mode=*/
mode_
,
/*bnOps=*/
CUDNN_BATCHNORM_OPS_BN
,
/*alphaDataDiff=*/
CudnnDataType
<
T
>::
kOne
(),
/*betaDataDiff=*/
CudnnDataType
<
T
>::
kZero
(),
/*alphaParamDiff=*/
CudnnDataType
<
T
>::
kOne
(),
/*betaParamDiff=*/
CudnnDataType
<
T
>::
kZero
(),
/*xDesc=*/
data_desc_
,
/*xData=*/
transformed_x
.
template
data
<
T
>(),
/*yDesc=*/
nullptr
,
/*yData=*/
nullptr
,
/*dyDesc=*/
data_desc_
,
/*dyData=*/
transformed_d_y
.
template
data
<
T
>(),
/*dzDesc=*/
nullptr
,
/*dzData=*/
nullptr
,
/*dxDesc=*/
data_desc_
,
/*dxData=*/
transformed_d_x
.
template
mutable_data
<
T
>(
ctx
.
GetPlace
()),
/*dBnScaleBiasDesc=*/
bn_param_desc_
,
/*bnScaleData=*/
scale
->
template
data
<
BatchNormParamType
<
T
>
>
(),
/*bnBiasData=*/
nullptr
,
/*dBnScaleData=*/
d_scale
->
template
mutable_data
<
BatchNormParamType
<
T
>
>
(
ctx
.
GetPlace
()),
/*dBnBiasData=*/
d_bias
->
template
mutable_data
<
BatchNormParamType
<
T
>
>
(
ctx
.
GetPlace
()),
/*epsilon=*/
epsilon
,
/*savedMean=*/
saved_mean_data
,
/*savedInvVariance=*/
saved_var_data
,
/*activationDesc=*/
nullptr
,
/*workspace=*/
workspace_ptr
,
/*workSpaceSizeInBytes=*/
workspace_size
,
/*reserveSpace=*/
const_cast
<
T
*>
(
reserve_space
->
template
data
<
T
>()),
/*reserveSpaceSizeInBytes=*/
reserve_space_size
));
}
#endif
called
=
true
;
size_t
workspace_size
=
0
;
void
*
workspace_ptr
=
nullptr
;
Tensor
workspace_tensor
;
auto
reserve_space_size
=
reserve_space
->
memory_size
();
// --------------- cudnn batchnorm workspace ---------------
PADDLE_ENFORCE_CUDA_SUCCESS
(
platform
::
dynload
::
cudnnGetBatchNormalizationBackwardExWorkspaceSize
(
/*handle=*/
dev_ctx
.
cudnn_handle
(),
/*mode=*/
mode_
,
/*bnIps=*/
CUDNN_BATCHNORM_OPS_BN
,
/*xDesc=*/
data_desc_
,
/*yDesc=*/
data_desc_
,
/*dyDesc=*/
data_desc_
,
/*dzDesc=*/
nullptr
,
/*dxDesc=*/
data_desc_
,
/*bnScaleBiasMeanVarDesc=*/
bn_param_desc_
,
/*activationDesc=*/
nullptr
,
/*sizeInBytes=*/
&
workspace_size
));
workspace_ptr
=
workspace_tensor
.
mutable_data
(
ctx
.
GetPlace
(),
transformed_x
.
type
(),
workspace_size
);
PADDLE_ENFORCE_CUDA_SUCCESS
(
platform
::
dynload
::
cudnnBatchNormalizationBackwardEx
(
/*handle=*/
dev_ctx
.
cudnn_handle
(),
/*mode=*/
mode_
,
/*bnOps=*/
CUDNN_BATCHNORM_OPS_BN
,
/*alphaDataDiff=*/
CudnnDataType
<
T
>::
kOne
(),
/*betaDataDiff=*/
CudnnDataType
<
T
>::
kZero
(),
/*alphaParamDiff=*/
CudnnDataType
<
T
>::
kOne
(),
/*betaParamDiff=*/
CudnnDataType
<
T
>::
kZero
(),
/*xDesc=*/
data_desc_
,
/*xData=*/
transformed_x
.
template
data
<
T
>(),
/*yDesc=*/
nullptr
,
/*yData=*/
nullptr
,
/*dyDesc=*/
data_desc_
,
/*dyData=*/
transformed_d_y
.
template
data
<
T
>(),
/*dzDesc=*/
nullptr
,
/*dzData=*/
nullptr
,
/*dxDesc=*/
data_desc_
,
/*dxData=*/
transformed_d_x
.
template
mutable_data
<
T
>(
ctx
.
GetPlace
()),
/*dBnScaleBiasDesc=*/
bn_param_desc_
,
/*bnScaleData=*/
scale
->
template
data
<
BatchNormParamType
<
T
>
>
(),
/*bnBiasData=*/
nullptr
,
/*dBnScaleData=*/
d_scale
->
template
mutable_data
<
BatchNormParamType
<
T
>
>
(
ctx
.
GetPlace
()),
/*dBnBiasData=*/
d_bias
->
template
mutable_data
<
BatchNormParamType
<
T
>
>
(
ctx
.
GetPlace
()),
/*epsilon=*/
epsilon
,
/*savedMean=*/
saved_mean_data
,
/*savedInvVariance=*/
saved_var_data
,
/*activationDesc=*/
nullptr
,
/*workspace=*/
workspace_ptr
,
/*workSpaceSizeInBytes=*/
workspace_size
,
/*reserveSpace=*/
const_cast
<
T
*>
(
reserve_space
->
template
data
<
T
>()),
/*reserveSpaceSizeInBytes=*/
reserve_space_size
));
#endif // CUDNN_VERSION_MIN(7, 4, 1)
if
(
!
called
)
{
PADDLE_ENFORCE_CUDA_SUCCESS
(
platform
::
dynload
::
cudnnBatchNormalizationBackward
(
...
...
@@ -764,6 +764,7 @@ class BatchNormGradKernel<platform::CUDADeviceContext, T>
ctx
,
&
transformed_d_x
,
d_x
);
}
}
else
{
// This branch call CUDA kernels
if
(
compute_format
==
DataLayout
::
kNCHW
)
{
if
(
d_x
)
{
BNBackwardData
<
T
,
block
,
framework
::
DataLayout
::
kNCHW
><<<
...
...
paddle/fluid/operators/inplace_abn_op.cc
浏览文件 @
666efc23
...
...
@@ -178,6 +178,9 @@ class InplaceABNOpGradMaker : public framework::SingleGradOpMaker<T> {
op
->
SetInput
(
"Bias"
,
this
->
Input
(
"Bias"
));
op
->
SetInput
(
"SavedMean"
,
this
->
Output
(
"SavedMean"
));
op
->
SetInput
(
"SavedVariance"
,
this
->
Output
(
"SavedVariance"
));
if
(
this
->
HasOutput
(
"ReserveSpace"
))
{
op
->
SetInput
(
"ReserveSpace"
,
this
->
Output
(
"ReserveSpace"
));
}
// used when setting use_global_stats True during training
if
(
BOOST_GET_CONST
(
bool
,
this
->
GetAttr
(
"use_global_stats"
)))
{
...
...
python/paddle/fluid/dygraph/nn.py
浏览文件 @
666efc23
...
...
@@ -1309,12 +1309,6 @@ class BatchNorm(layers.Layer):
dtype
=
self
.
_dtype
)
self
.
_variance
.
stop_gradient
=
True
self
.
_has_reserve_space
=
False
if
data_layout
==
'NHWC'
:
flag
=
os
.
environ
.
get
(
'FLAGS_cudnn_batchnorm_spatial_persistent'
)
if
flag
is
not
None
and
flag
.
lower
()
in
[
'true'
,
'1'
]:
self
.
_has_reserve_space
=
True
self
.
_in_place
=
in_place
self
.
_data_layout
=
data_layout
self
.
_momentum
=
momentum
...
...
@@ -1341,7 +1335,6 @@ class BatchNorm(layers.Layer):
batch_norm_out
,
_
,
_
,
_
,
_
,
_
=
core
.
ops
.
batch_norm
(
input
,
self
.
weight
,
self
.
bias
,
self
.
_mean
,
self
.
_variance
,
mean_out
,
variance_out
,
*
attrs
)
return
dygraph_utils
.
_append_activation_in_dygraph
(
batch_norm_out
,
act
=
self
.
_act
,
use_mkldnn
=
self
.
_use_mkldnn
)
...
...
@@ -1371,11 +1364,8 @@ class BatchNorm(layers.Layer):
dtype
=
self
.
_dtype
,
stop_gradient
=
True
)
saved_variance
=
self
.
_helper
.
create_variable_for_type_inference
(
dtype
=
self
.
_dtype
,
stop_gradient
=
True
)
reserve_space
=
None
if
self
.
_has_reserve_space
:
reserve_space
=
self
.
_helper
.
create_variable_for_type_inference
(
dtype
=
core
.
VarDesc
.
VarType
.
FP16
,
stop_gradient
=
True
)
reserve_space
=
self
.
_helper
.
create_variable_for_type_inference
(
dtype
=
self
.
_helper
.
input_dtype
(
input
),
stop_gradient
=
True
)
batch_norm_out
=
input
if
self
.
_in_place
else
self
.
_helper
.
create_variable_for_type_inference
(
self
.
_dtype
)
...
...
@@ -1388,7 +1378,7 @@ class BatchNorm(layers.Layer):
"SavedVariance"
:
[
saved_variance
]
}
if
reserve_space
is
not
None
:
outputs
[
"ReserveSpace"
]
=
reserve_space
outputs
[
"ReserveSpace"
]
=
[
reserve_space
]
self
.
_helper
.
append_op
(
type
=
"batch_norm"
,
inputs
=
inputs
,
outputs
=
outputs
,
attrs
=
attrs
)
...
...
python/paddle/fluid/layers/nn.py
浏览文件 @
666efc23
...
...
@@ -2792,12 +2792,6 @@ def batch_norm(input,
'batch_norm')
dtype = helper.input_dtype()
has_reserve_space = False
if data_layout == 'NHWC':
flag = os.environ.get('FLAGS_cudnn_batchnorm_spatial_persistent')
if flag is not None and flag.lower() in ['true', '1']:
has_reserve_space = True
# use fp32 for bn parameter
if dtype == core.VarDesc.VarType.FP16:
dtype = core.VarDesc.VarType.FP32
...
...
@@ -2845,17 +2839,16 @@ def batch_norm(input,
# create output
# mean and mean_out share the same memory
mean_out = mean
# variance and variance
out share the same memory
# variance and variance
_
out share the same memory
variance_out = variance
saved_mean = helper.create_variable_for_type_inference(
dtype=dtype, stop_gradient=True)
saved_variance = helper.create_variable_for_type_inference(
dtype=dtype, stop_gradient=True)
reserve_space = None
if
has_reserve_space
:
if
not is_test
:
reserve_space = helper.create_variable_for_type_inference(
dtype=
core.VarDesc.VarType.FP16
, stop_gradient=True)
dtype=
helper.input_dtype()
, stop_gradient=True)
batch_norm_out = input if in_place else \
helper.create_variable_for_type_inference(dtype)
...
...
@@ -2998,12 +2991,6 @@ def inplace_abn(input,
'inplace_abn')
dtype = helper.input_dtype()
has_reserve_space = False
if data_layout == 'NHWC':
flag = os.environ.get('FLAGS_cudnn_batchnorm_spatial_persistent')
if flag is not None and flag.lower() in ['true', '1']:
has_reserve_space = True
input_shape = input.shape
if data_layout == 'NCHW':
channel_num = input_shape[1]
...
...
@@ -3053,12 +3040,8 @@ def inplace_abn(input,
dtype=dtype, stop_gradient=True)
saved_variance = helper.create_variable_for_type_inference(
dtype=dtype, stop_gradient=True)
reserve_space = None
if has_reserve_space:
reserve_space = helper.create_variable_for_type_inference(
dtype=core.VarDesc.VarType.FP16, stop_gradient=True)
reserve_space = helper.create_variable_for_type_inference(
dtype=dtype, stop_gradient=True)
batch_norm_out = input
inputs = {
...
...
@@ -3082,7 +3065,6 @@ def inplace_abn(input,
inputs['MomemtumTensor'] = momentum
else:
attrs['momentum'] = momentum
outputs = {
"Y": batch_norm_out,
"MeanOut": mean_out,
...
...
python/paddle/fluid/tests/unittests/test_batch_norm_op.py
浏览文件 @
666efc23
...
...
@@ -440,16 +440,8 @@ class TestBatchNormOpTraining(unittest.TestCase):
"SavedMean"
:
block
.
var
(
'saved_mean'
),
"SavedVariance"
:
block
.
var
(
'saved_variance'
)
}
has_reserve_space
=
False
if
data_format
==
'NHWC'
:
flag
=
os
.
environ
.
get
(
'FLAGS_cudnn_batchnorm_spatial_persistent'
)
if
flag
is
not
None
and
flag
.
lower
()
in
[
'true'
,
'1'
]:
has_reserve_space
=
True
if
has_reserve_space
:
block
.
create_var
(
name
=
"reserve_space"
,
dtype
=
'float16'
)
outputs
[
"ReserveSpace"
]
=
block
.
var
(
'reserve_space'
)
del
os
.
environ
[
'FLAGS_cudnn_batchnorm_spatial_persistent'
]
block
.
create_var
(
name
=
"reserve_space"
,
dtype
=
'float32'
)
outputs
[
"ReserveSpace"
]
=
block
.
var
(
'reserve_space'
)
bn_op
=
block
.
append_op
(
type
=
"batch_norm"
,
inputs
=
inputs
,
...
...
python/paddle/fluid/tests/unittests/test_sync_batch_norm_op.py
浏览文件 @
666efc23
...
...
@@ -122,7 +122,7 @@ class TestSyncBatchNormOpTraining(unittest.TestCase):
if
not
only_forward
:
others
=
[
'batch_norm_0.tmp_0'
,
'batch_norm_0.tmp_1'
,
'bn_scale@GRAD'
,
'bn_bias@GRAD'
,
'batch_norm_0.tmp_
2
@GRAD'
,
'conv2d_0.tmp_0@GRAD'
'bn_bias@GRAD'
,
'batch_norm_0.tmp_
3
@GRAD'
,
'conv2d_0.tmp_0@GRAD'
]
fetch_names
+=
others
bn_fetches
=
exe
.
run
(
program
=
main
,
...
...
@@ -142,7 +142,7 @@ class TestSyncBatchNormOpTraining(unittest.TestCase):
if
not
only_forward
:
others
=
[
'batch_norm_0.tmp_0'
,
'batch_norm_0.tmp_1'
,
'bn_scale@GRAD'
,
'bn_bias@GRAD'
,
'batch_norm_0.tmp_
2
@GRAD'
,
'conv2d_0.tmp_0@GRAD'
'bn_bias@GRAD'
,
'batch_norm_0.tmp_
3
@GRAD'
,
'conv2d_0.tmp_0@GRAD'
]
fetch_names
+=
others
for
nm
in
fetch_names
:
...
...
python/paddle/nn/functional/norm.py
浏览文件 @
666efc23
...
...
@@ -166,7 +166,6 @@ def batch_norm(x,
batch_norm_out = paddle.nn.functional.batch_norm(x, rm, rv, w, b)
print(batch_norm_out)
"""
assert
len
(
x
.
shape
)
>=
2
,
"input dim must be larger than 1"
# input ad out must share the memory
...
...
@@ -196,7 +195,6 @@ def batch_norm(x,
batch_norm_out
,
_
,
_
,
_
,
_
,
_
=
core
.
ops
.
batch_norm
(
x
,
weight
,
bias
,
running_mean
,
running_var
,
mean_out
,
variance_out
,
*
attrs
)
return
dygraph_utils
.
_append_activation_in_dygraph
(
batch_norm_out
,
act
=
None
)
...
...
@@ -230,13 +228,16 @@ def batch_norm(x,
saved_variance
=
helper
.
create_variable_for_type_inference
(
dtype
=
dtype
,
stop_gradient
=
True
)
batch_norm_out
=
helper
.
create_variable_for_type_inference
(
dtype
)
reserve_space
=
helper
.
create_variable_for_type_inference
(
dtype
=
x
.
dtype
,
stop_gradient
=
True
)
outputs
=
{
"Y"
:
[
batch_norm_out
],
"MeanOut"
:
[
running_mean
],
"VarianceOut"
:
[
running_var
],
"SavedMean"
:
[
saved_mean
],
"SavedVariance"
:
[
saved_variance
]
"SavedVariance"
:
[
saved_variance
],
"ReserveSpace"
:
[
reserve_space
]
}
helper
.
append_op
(
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录