Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
90650534
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
90650534
编写于
2月 09, 2023
作者:
H
Huang Jiyi
提交者:
GitHub
2月 09, 2023
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
remove layout_utils in phi (#50355)
上级
10654c77
变更
2
显示空白变更内容
内联
并排
Showing
2 changed file
with
51 addition
and
63 deletion
+51
-63
paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu
paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu
+27
-33
paddle/phi/kernels/gpu/batch_norm_kernel.cu
paddle/phi/kernels/gpu/batch_norm_kernel.cu
+24
-30
未找到文件。
paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu
浏览文件 @
90650534
...
@@ -12,7 +12,6 @@
...
@@ -12,7 +12,6 @@
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// limitations under the License.
// limitations under the License.
#include "paddle/fluid/operators/layout_utils.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/backends/gpu/gpu_dnn.h"
#include "paddle/phi/backends/gpu/gpu_dnn.h"
#include "paddle/phi/common/layout.h"
#include "paddle/phi/common/layout.h"
...
@@ -630,7 +629,7 @@ void BatchNormGradRawKernel(const Context &ctx,
...
@@ -630,7 +629,7 @@ void BatchNormGradRawKernel(const Context &ctx,
if
(
!
use_global_stats
)
{
if
(
!
use_global_stats
)
{
if
((
N
*
H
*
W
*
D
)
==
1
)
{
if
((
N
*
H
*
W
*
D
)
==
1
)
{
if
(
d_x
)
{
if
(
d_x
)
{
p
addle
::
framework
::
TensorCopy
(
*
d_y
,
ctx
.
GetPlace
()
,
d_x
);
p
hi
::
Copy
(
ctx
,
*
d_y
,
ctx
.
GetPlace
(),
false
,
d_x
);
}
}
phi
::
funcs
::
SetConstant
<
Context
,
BatchNormParamType
<
T
>>
functor
;
phi
::
funcs
::
SetConstant
<
Context
,
BatchNormParamType
<
T
>>
functor
;
functor
(
ctx
,
d_scale
,
static_cast
<
BatchNormParamType
<
T
>>
(
0
));
functor
(
ctx
,
d_scale
,
static_cast
<
BatchNormParamType
<
T
>>
(
0
));
...
@@ -655,10 +654,9 @@ void BatchNormGradRawKernel(const Context &ctx,
...
@@ -655,10 +654,9 @@ void BatchNormGradRawKernel(const Context &ctx,
cudnnBatchNormMode_t
mode_
;
cudnnBatchNormMode_t
mode_
;
PADDLE_ENFORCE_GPU_SUCCESS
(
PADDLE_ENFORCE_GPU_SUCCESS
(
p
addle
::
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
data_desc_
));
p
hi
::
dynload
::
cudnnCreateTensorDescriptor
(
&
data_desc_
));
PADDLE_ENFORCE_GPU_SUCCESS
(
PADDLE_ENFORCE_GPU_SUCCESS
(
paddle
::
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
phi
::
dynload
::
cudnnCreateTensorDescriptor
(
&
bn_param_desc_
));
&
bn_param_desc_
));
#endif
#endif
if
(
epsilon
<=
CUDNN_BN_MIN_EPSILON
-
FLT_EPSILON
)
{
if
(
epsilon
<=
CUDNN_BN_MIN_EPSILON
-
FLT_EPSILON
)
{
LOG
(
ERROR
)
<<
"Provided epsilon is smaller than "
LOG
(
ERROR
)
<<
"Provided epsilon is smaller than "
...
@@ -695,15 +693,13 @@ void BatchNormGradRawKernel(const Context &ctx,
...
@@ -695,15 +693,13 @@ void BatchNormGradRawKernel(const Context &ctx,
// platform::dynload::miopenDeriveBNTensorDescriptor(bn_param_desc_,
// platform::dynload::miopenDeriveBNTensorDescriptor(bn_param_desc_,
// data_desc_, mode_));
// data_desc_, mode_));
#else
#else
PADDLE_ENFORCE_GPU_SUCCESS
(
PADDLE_ENFORCE_GPU_SUCCESS
(
phi
::
dynload
::
cudnnSetTensorNdDescriptor
(
paddle
::
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
data_desc_
,
data_desc_
,
CudnnDataType
<
T
>::
type
,
CudnnDataType
<
T
>::
type
,
x_dims
.
size
()
>
3
?
x_dims
.
size
()
:
4
,
x_dims
.
size
()
>
3
?
x_dims
.
size
()
:
4
,
dims
.
data
(),
dims
.
data
(),
strides
.
data
()));
strides
.
data
()));
PADDLE_ENFORCE_GPU_SUCCESS
(
PADDLE_ENFORCE_GPU_SUCCESS
(
phi
::
dynload
::
cudnnDeriveBNTensorDescriptor
(
paddle
::
platform
::
dynload
::
cudnnDeriveBNTensorDescriptor
(
bn_param_desc_
,
data_desc_
,
mode_
));
bn_param_desc_
,
data_desc_
,
mode_
));
#endif
#endif
...
@@ -934,8 +930,7 @@ void BatchNormGradRawKernel(const Context &ctx,
...
@@ -934,8 +930,7 @@ void BatchNormGradRawKernel(const Context &ctx,
auto
reserve_space_size
=
reserve_space
->
memory_size
();
auto
reserve_space_size
=
reserve_space
->
memory_size
();
// --------------- cudnn batchnorm workspace ---------------
// --------------- cudnn batchnorm workspace ---------------
PADDLE_ENFORCE_GPU_SUCCESS
(
PADDLE_ENFORCE_GPU_SUCCESS
(
paddle
::
platform
::
dynload
::
phi
::
dynload
::
cudnnGetBatchNormalizationBackwardExWorkspaceSize
(
cudnnGetBatchNormalizationBackwardExWorkspaceSize
(
/*handle=*/
ctx
.
cudnn_handle
(),
/*handle=*/
ctx
.
cudnn_handle
(),
/*mode=*/
mode_
,
/*mode=*/
mode_
,
/*bnIps=*/
CUDNN_BATCHNORM_OPS_BN
,
/*bnIps=*/
CUDNN_BATCHNORM_OPS_BN
,
...
@@ -953,7 +948,7 @@ void BatchNormGradRawKernel(const Context &ctx,
...
@@ -953,7 +948,7 @@ void BatchNormGradRawKernel(const Context &ctx,
static_cast
<
void
*>
(
ctx
.
template
Alloc
<
uint8_t
>(
&
workspace_tensor
));
static_cast
<
void
*>
(
ctx
.
template
Alloc
<
uint8_t
>(
&
workspace_tensor
));
PADDLE_ENFORCE_GPU_SUCCESS
(
PADDLE_ENFORCE_GPU_SUCCESS
(
p
addle
::
platform
::
dynload
::
cudnnBatchNormalizationBackwardEx
(
p
hi
::
dynload
::
cudnnBatchNormalizationBackwardEx
(
/*handle=*/
ctx
.
cudnn_handle
(),
/*handle=*/
ctx
.
cudnn_handle
(),
/*mode=*/
mode_
,
/*mode=*/
mode_
,
/*bnOps=*/
CUDNN_BATCHNORM_OPS_BN
,
/*bnOps=*/
CUDNN_BATCHNORM_OPS_BN
,
...
@@ -989,7 +984,7 @@ void BatchNormGradRawKernel(const Context &ctx,
...
@@ -989,7 +984,7 @@ void BatchNormGradRawKernel(const Context &ctx,
/*reserveSpaceSizeInBytes=*/
reserve_space_size
));
/*reserveSpaceSizeInBytes=*/
reserve_space_size
));
#else
#else
PADDLE_ENFORCE_GPU_SUCCESS
(
PADDLE_ENFORCE_GPU_SUCCESS
(
p
addle
::
platform
::
dynload
::
cudnnBatchNormalizationBackward
(
p
hi
::
dynload
::
cudnnBatchNormalizationBackward
(
ctx
.
cudnn_handle
(),
ctx
.
cudnn_handle
(),
mode_
,
mode_
,
CudnnDataType
<
T
>::
kOne
(),
CudnnDataType
<
T
>::
kOne
(),
...
@@ -1089,10 +1084,9 @@ void BatchNormGradRawKernel(const Context &ctx,
...
@@ -1089,10 +1084,9 @@ void BatchNormGradRawKernel(const Context &ctx,
#else
#else
// clean when exit.
// clean when exit.
PADDLE_ENFORCE_GPU_SUCCESS
(
PADDLE_ENFORCE_GPU_SUCCESS
(
p
addle
::
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
data_desc_
));
p
hi
::
dynload
::
cudnnDestroyTensorDescriptor
(
data_desc_
));
PADDLE_ENFORCE_GPU_SUCCESS
(
PADDLE_ENFORCE_GPU_SUCCESS
(
paddle
::
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
phi
::
dynload
::
cudnnDestroyTensorDescriptor
(
bn_param_desc_
));
bn_param_desc_
));
#endif
#endif
}
else
{
}
else
{
...
...
paddle/phi/kernels/gpu/batch_norm_kernel.cu
浏览文件 @
90650534
...
@@ -20,7 +20,6 @@
...
@@ -20,7 +20,6 @@
namespace
cub
=
hipcub
;
namespace
cub
=
hipcub
;
#endif
#endif
#include "paddle/fluid/operators/layout_utils.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/backends/gpu/gpu_dnn.h"
#include "paddle/phi/backends/gpu/gpu_dnn.h"
#include "paddle/phi/common/layout.h"
#include "paddle/phi/common/layout.h"
...
@@ -598,9 +597,9 @@ void BatchNormKernel(const Context &ctx,
...
@@ -598,9 +597,9 @@ void BatchNormKernel(const Context &ctx,
cudnnBatchNormMode_t
mode_
;
cudnnBatchNormMode_t
mode_
;
PADDLE_ENFORCE_GPU_SUCCESS
(
PADDLE_ENFORCE_GPU_SUCCESS
(
p
addle
::
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
data_desc_
));
p
hi
::
dynload
::
cudnnCreateTensorDescriptor
(
&
data_desc_
));
PADDLE_ENFORCE_GPU_SUCCESS
(
PADDLE_ENFORCE_GPU_SUCCESS
(
p
addle
::
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
bn_param_desc_
));
p
hi
::
dynload
::
cudnnCreateTensorDescriptor
(
&
bn_param_desc_
));
#endif
#endif
if
(
epsilon
<=
CUDNN_BN_MIN_EPSILON
-
FLT_EPSILON
)
{
if
(
epsilon
<=
CUDNN_BN_MIN_EPSILON
-
FLT_EPSILON
)
{
...
@@ -651,19 +650,15 @@ void BatchNormKernel(const Context &ctx,
...
@@ -651,19 +650,15 @@ void BatchNormKernel(const Context &ctx,
// platform::dynload::miopenDeriveBNTensorDescriptor(
// platform::dynload::miopenDeriveBNTensorDescriptor(
// bn_param_desc_, data_desc_, test_mode ? miopenBNSpatial : mode_));
// bn_param_desc_, data_desc_, test_mode ? miopenBNSpatial : mode_));
#else
#else
PADDLE_ENFORCE_GPU_SUCCESS
(
PADDLE_ENFORCE_GPU_SUCCESS
(
phi
::
dynload
::
cudnnSetTensorNdDescriptor
(
paddle
::
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
data_desc_
,
data_desc_
,
CudnnDataType
<
T
>::
type
,
CudnnDataType
<
T
>::
type
,
x_dims
.
size
()
>
3
?
x_dims
.
size
()
:
4
,
x_dims
.
size
()
>
3
?
x_dims
.
size
()
:
4
,
dims
.
data
(),
dims
.
data
(),
strides
.
data
()));
strides
.
data
()));
// Note: PERSISTENT not implemented for inference
// Note: PERSISTENT not implemented for inference
PADDLE_ENFORCE_GPU_SUCCESS
(
PADDLE_ENFORCE_GPU_SUCCESS
(
phi
::
dynload
::
cudnnDeriveBNTensorDescriptor
(
paddle
::
platform
::
dynload
::
cudnnDeriveBNTensorDescriptor
(
bn_param_desc_
,
data_desc_
,
test_mode
?
CUDNN_BATCHNORM_SPATIAL
:
mode_
));
bn_param_desc_
,
data_desc_
,
test_mode
?
CUDNN_BATCHNORM_SPATIAL
:
mode_
));
#endif
#endif
auto
handle
=
ctx
.
cudnn_handle
();
auto
handle
=
ctx
.
cudnn_handle
();
...
@@ -830,7 +825,7 @@ void BatchNormKernel(const Context &ctx,
...
@@ -830,7 +825,7 @@ void BatchNormKernel(const Context &ctx,
}
}
}
else
{
}
else
{
PADDLE_ENFORCE_GPU_SUCCESS
(
PADDLE_ENFORCE_GPU_SUCCESS
(
p
addle
::
platform
::
dynload
::
cudnnBatchNormalizationForwardInference
(
p
hi
::
dynload
::
cudnnBatchNormalizationForwardInference
(
handle
,
handle
,
// Note: PERSISTENT not implemented for inference
// Note: PERSISTENT not implemented for inference
CUDNN_BATCHNORM_SPATIAL
,
CUDNN_BATCHNORM_SPATIAL
,
...
@@ -873,7 +868,7 @@ void BatchNormKernel(const Context &ctx,
...
@@ -873,7 +868,7 @@ void BatchNormKernel(const Context &ctx,
if
((
N
*
H
*
W
*
D
)
==
1
)
{
if
((
N
*
H
*
W
*
D
)
==
1
)
{
// Only 1 element in normalization dimension,
// Only 1 element in normalization dimension,
// skip the batch norm calculation, let y = x.
// skip the batch norm calculation, let y = x.
p
addle
::
framework
::
TensorCopy
(
x
,
ctx
.
GetPlace
()
,
y
);
p
hi
::
Copy
(
ctx
,
x
,
ctx
.
GetPlace
(),
false
,
y
);
}
else
{
}
else
{
double
this_factor
=
1.
-
momentum
;
double
this_factor
=
1.
-
momentum
;
#ifdef PADDLE_WITH_HIP
#ifdef PADDLE_WITH_HIP
...
@@ -1114,7 +1109,7 @@ void BatchNormKernel(const Context &ctx,
...
@@ -1114,7 +1109,7 @@ void BatchNormKernel(const Context &ctx,
"The argument ReserveSpace of batch_norm op is not found."
));
"The argument ReserveSpace of batch_norm op is not found."
));
// --------------- cudnn batchnorm workspace ---------------
// --------------- cudnn batchnorm workspace ---------------
PADDLE_ENFORCE_GPU_SUCCESS
(
PADDLE_ENFORCE_GPU_SUCCESS
(
p
addle
::
platform
::
dynload
::
p
hi
::
dynload
::
cudnnGetBatchNormalizationForwardTrainingExWorkspaceSize
(
cudnnGetBatchNormalizationForwardTrainingExWorkspaceSize
(
/*handle=*/
handle
,
/*handle=*/
handle
,
/*mode=*/
mode_
,
/*mode=*/
mode_
,
...
@@ -1128,8 +1123,7 @@ void BatchNormKernel(const Context &ctx,
...
@@ -1128,8 +1123,7 @@ void BatchNormKernel(const Context &ctx,
// -------------- cudnn batchnorm reserve space --------------
// -------------- cudnn batchnorm reserve space --------------
PADDLE_ENFORCE_GPU_SUCCESS
(
PADDLE_ENFORCE_GPU_SUCCESS
(
paddle
::
platform
::
dynload
::
phi
::
dynload
::
cudnnGetBatchNormalizationTrainingExReserveSpaceSize
(
cudnnGetBatchNormalizationTrainingExReserveSpaceSize
(
/*handle=*/
handle
,
/*handle=*/
handle
,
/*mode=*/
mode_
,
/*mode=*/
mode_
,
/*bnOps=*/
CUDNN_BATCHNORM_OPS_BN
,
/*bnOps=*/
CUDNN_BATCHNORM_OPS_BN
,
...
@@ -1144,7 +1138,7 @@ void BatchNormKernel(const Context &ctx,
...
@@ -1144,7 +1138,7 @@ void BatchNormKernel(const Context &ctx,
workspace_ptr
=
workspace_ptr
=
static_cast
<
void
*>
(
ctx
.
template
Alloc
<
uint8_t
>(
&
workspace_tensor
));
static_cast
<
void
*>
(
ctx
.
template
Alloc
<
uint8_t
>(
&
workspace_tensor
));
PADDLE_ENFORCE_GPU_SUCCESS
(
PADDLE_ENFORCE_GPU_SUCCESS
(
p
addle
::
platform
::
dynload
::
cudnnBatchNormalizationForwardTrainingEx
(
p
hi
::
dynload
::
cudnnBatchNormalizationForwardTrainingEx
(
handle
,
handle
,
mode_
,
mode_
,
CUDNN_BATCHNORM_OPS_BN
,
CUDNN_BATCHNORM_OPS_BN
,
...
@@ -1172,7 +1166,7 @@ void BatchNormKernel(const Context &ctx,
...
@@ -1172,7 +1166,7 @@ void BatchNormKernel(const Context &ctx,
reserve_space_size
));
reserve_space_size
));
#else
#else
PADDLE_ENFORCE_GPU_SUCCESS
(
PADDLE_ENFORCE_GPU_SUCCESS
(
p
addle
::
platform
::
dynload
::
cudnnBatchNormalizationForwardTraining
(
p
hi
::
dynload
::
cudnnBatchNormalizationForwardTraining
(
handle
,
handle
,
mode_
,
mode_
,
CudnnDataType
<
T
>::
kOne
(),
CudnnDataType
<
T
>::
kOne
(),
...
@@ -1211,9 +1205,9 @@ void BatchNormKernel(const Context &ctx,
...
@@ -1211,9 +1205,9 @@ void BatchNormKernel(const Context &ctx,
#else
#else
// clean when exit.
// clean when exit.
PADDLE_ENFORCE_GPU_SUCCESS
(
PADDLE_ENFORCE_GPU_SUCCESS
(
p
addle
::
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
data_desc_
));
p
hi
::
dynload
::
cudnnDestroyTensorDescriptor
(
data_desc_
));
PADDLE_ENFORCE_GPU_SUCCESS
(
PADDLE_ENFORCE_GPU_SUCCESS
(
p
addle
::
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
bn_param_desc_
));
p
hi
::
dynload
::
cudnnDestroyTensorDescriptor
(
bn_param_desc_
));
#endif
#endif
}
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录