未验证 提交 90650534 编写于 作者: H Huang Jiyi 提交者: GitHub

remove layout_utils in phi (#50355)

上级 10654c77
......@@ -12,7 +12,6 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/operators/layout_utils.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/backends/gpu/gpu_dnn.h"
#include "paddle/phi/common/layout.h"
......@@ -630,7 +629,7 @@ void BatchNormGradRawKernel(const Context &ctx,
if (!use_global_stats) {
if ((N * H * W * D) == 1) {
if (d_x) {
paddle::framework::TensorCopy(*d_y, ctx.GetPlace(), d_x);
phi::Copy(ctx, *d_y, ctx.GetPlace(), false, d_x);
}
phi::funcs::SetConstant<Context, BatchNormParamType<T>> functor;
functor(ctx, d_scale, static_cast<BatchNormParamType<T>>(0));
......@@ -655,10 +654,9 @@ void BatchNormGradRawKernel(const Context &ctx,
cudnnBatchNormMode_t mode_;
PADDLE_ENFORCE_GPU_SUCCESS(
paddle::platform::dynload::cudnnCreateTensorDescriptor(&data_desc_));
phi::dynload::cudnnCreateTensorDescriptor(&data_desc_));
PADDLE_ENFORCE_GPU_SUCCESS(
paddle::platform::dynload::cudnnCreateTensorDescriptor(
&bn_param_desc_));
phi::dynload::cudnnCreateTensorDescriptor(&bn_param_desc_));
#endif
if (epsilon <= CUDNN_BN_MIN_EPSILON - FLT_EPSILON) {
LOG(ERROR) << "Provided epsilon is smaller than "
......@@ -695,16 +693,14 @@ void BatchNormGradRawKernel(const Context &ctx,
// platform::dynload::miopenDeriveBNTensorDescriptor(bn_param_desc_,
// data_desc_, mode_));
#else
PADDLE_ENFORCE_GPU_SUCCESS(
paddle::platform::dynload::cudnnSetTensorNdDescriptor(
data_desc_,
CudnnDataType<T>::type,
x_dims.size() > 3 ? x_dims.size() : 4,
dims.data(),
strides.data()));
PADDLE_ENFORCE_GPU_SUCCESS(
paddle::platform::dynload::cudnnDeriveBNTensorDescriptor(
bn_param_desc_, data_desc_, mode_));
PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSetTensorNdDescriptor(
data_desc_,
CudnnDataType<T>::type,
x_dims.size() > 3 ? x_dims.size() : 4,
dims.data(),
strides.data()));
PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnDeriveBNTensorDescriptor(
bn_param_desc_, data_desc_, mode_));
#endif
const auto *saved_mean_data =
......@@ -934,26 +930,25 @@ void BatchNormGradRawKernel(const Context &ctx,
auto reserve_space_size = reserve_space->memory_size();
// --------------- cudnn batchnorm workspace ---------------
PADDLE_ENFORCE_GPU_SUCCESS(
paddle::platform::dynload::
cudnnGetBatchNormalizationBackwardExWorkspaceSize(
/*handle=*/ctx.cudnn_handle(),
/*mode=*/mode_,
/*bnIps=*/CUDNN_BATCHNORM_OPS_BN,
/*xDesc=*/data_desc_,
/*yDesc=*/data_desc_,
/*dyDesc=*/data_desc_,
/*dzDesc=*/nullptr,
/*dxDesc=*/data_desc_,
/*bnScaleBiasMeanVarDesc=*/bn_param_desc_,
/*activationDesc=*/nullptr,
/*sizeInBytes=*/&workspace_size));
phi::dynload::cudnnGetBatchNormalizationBackwardExWorkspaceSize(
/*handle=*/ctx.cudnn_handle(),
/*mode=*/mode_,
/*bnIps=*/CUDNN_BATCHNORM_OPS_BN,
/*xDesc=*/data_desc_,
/*yDesc=*/data_desc_,
/*dyDesc=*/data_desc_,
/*dzDesc=*/nullptr,
/*dxDesc=*/data_desc_,
/*bnScaleBiasMeanVarDesc=*/bn_param_desc_,
/*activationDesc=*/nullptr,
/*sizeInBytes=*/&workspace_size));
workspace_tensor.Resize({static_cast<int64_t>(workspace_size)});
workspace_ptr =
static_cast<void *>(ctx.template Alloc<uint8_t>(&workspace_tensor));
PADDLE_ENFORCE_GPU_SUCCESS(
paddle::platform::dynload::cudnnBatchNormalizationBackwardEx(
phi::dynload::cudnnBatchNormalizationBackwardEx(
/*handle=*/ctx.cudnn_handle(),
/*mode=*/mode_,
/*bnOps=*/CUDNN_BATCHNORM_OPS_BN,
......@@ -989,7 +984,7 @@ void BatchNormGradRawKernel(const Context &ctx,
/*reserveSpaceSizeInBytes=*/reserve_space_size));
#else
PADDLE_ENFORCE_GPU_SUCCESS(
paddle::platform::dynload::cudnnBatchNormalizationBackward(
phi::dynload::cudnnBatchNormalizationBackward(
ctx.cudnn_handle(),
mode_,
CudnnDataType<T>::kOne(),
......@@ -1089,10 +1084,9 @@ void BatchNormGradRawKernel(const Context &ctx,
#else
// clean when exit.
PADDLE_ENFORCE_GPU_SUCCESS(
paddle::platform::dynload::cudnnDestroyTensorDescriptor(data_desc_));
phi::dynload::cudnnDestroyTensorDescriptor(data_desc_));
PADDLE_ENFORCE_GPU_SUCCESS(
paddle::platform::dynload::cudnnDestroyTensorDescriptor(
bn_param_desc_));
phi::dynload::cudnnDestroyTensorDescriptor(bn_param_desc_));
#endif
} else {
......
......@@ -20,7 +20,6 @@
namespace cub = hipcub;
#endif
#include "paddle/fluid/operators/layout_utils.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/backends/gpu/gpu_dnn.h"
#include "paddle/phi/common/layout.h"
......@@ -598,9 +597,9 @@ void BatchNormKernel(const Context &ctx,
cudnnBatchNormMode_t mode_;
PADDLE_ENFORCE_GPU_SUCCESS(
paddle::platform::dynload::cudnnCreateTensorDescriptor(&data_desc_));
phi::dynload::cudnnCreateTensorDescriptor(&data_desc_));
PADDLE_ENFORCE_GPU_SUCCESS(
paddle::platform::dynload::cudnnCreateTensorDescriptor(&bn_param_desc_));
phi::dynload::cudnnCreateTensorDescriptor(&bn_param_desc_));
#endif
if (epsilon <= CUDNN_BN_MIN_EPSILON - FLT_EPSILON) {
......@@ -651,19 +650,15 @@ void BatchNormKernel(const Context &ctx,
// platform::dynload::miopenDeriveBNTensorDescriptor(
// bn_param_desc_, data_desc_, test_mode ? miopenBNSpatial : mode_));
#else
PADDLE_ENFORCE_GPU_SUCCESS(
paddle::platform::dynload::cudnnSetTensorNdDescriptor(
data_desc_,
CudnnDataType<T>::type,
x_dims.size() > 3 ? x_dims.size() : 4,
dims.data(),
strides.data()));
PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnSetTensorNdDescriptor(
data_desc_,
CudnnDataType<T>::type,
x_dims.size() > 3 ? x_dims.size() : 4,
dims.data(),
strides.data()));
// Note: PERSISTENT not implemented for inference
PADDLE_ENFORCE_GPU_SUCCESS(
paddle::platform::dynload::cudnnDeriveBNTensorDescriptor(
bn_param_desc_,
data_desc_,
test_mode ? CUDNN_BATCHNORM_SPATIAL : mode_));
PADDLE_ENFORCE_GPU_SUCCESS(phi::dynload::cudnnDeriveBNTensorDescriptor(
bn_param_desc_, data_desc_, test_mode ? CUDNN_BATCHNORM_SPATIAL : mode_));
#endif
auto handle = ctx.cudnn_handle();
......@@ -830,7 +825,7 @@ void BatchNormKernel(const Context &ctx,
}
} else {
PADDLE_ENFORCE_GPU_SUCCESS(
paddle::platform::dynload::cudnnBatchNormalizationForwardInference(
phi::dynload::cudnnBatchNormalizationForwardInference(
handle,
// Note: PERSISTENT not implemented for inference
CUDNN_BATCHNORM_SPATIAL,
......@@ -873,7 +868,7 @@ void BatchNormKernel(const Context &ctx,
if ((N * H * W * D) == 1) {
// Only 1 element in normalization dimension,
// skip the batch norm calculation, let y = x.
paddle::framework::TensorCopy(x, ctx.GetPlace(), y);
phi::Copy(ctx, x, ctx.GetPlace(), false, y);
} else {
double this_factor = 1. - momentum;
#ifdef PADDLE_WITH_HIP
......@@ -1114,7 +1109,7 @@ void BatchNormKernel(const Context &ctx,
"The argument ReserveSpace of batch_norm op is not found."));
// --------------- cudnn batchnorm workspace ---------------
PADDLE_ENFORCE_GPU_SUCCESS(
paddle::platform::dynload::
phi::dynload::
cudnnGetBatchNormalizationForwardTrainingExWorkspaceSize(
/*handle=*/handle,
/*mode=*/mode_,
......@@ -1128,14 +1123,13 @@ void BatchNormKernel(const Context &ctx,
// -------------- cudnn batchnorm reserve space --------------
PADDLE_ENFORCE_GPU_SUCCESS(
paddle::platform::dynload::
cudnnGetBatchNormalizationTrainingExReserveSpaceSize(
/*handle=*/handle,
/*mode=*/mode_,
/*bnOps=*/CUDNN_BATCHNORM_OPS_BN,
/*activationDesc=*/nullptr,
/*xDesc=*/data_desc_,
/*sizeInBytes=*/&reserve_space_size));
phi::dynload::cudnnGetBatchNormalizationTrainingExReserveSpaceSize(
/*handle=*/handle,
/*mode=*/mode_,
/*bnOps=*/CUDNN_BATCHNORM_OPS_BN,
/*activationDesc=*/nullptr,
/*xDesc=*/data_desc_,
/*sizeInBytes=*/&reserve_space_size));
reserve_space->Resize({static_cast<int64_t>(reserve_space_size)});
reserve_space_ptr =
......@@ -1144,7 +1138,7 @@ void BatchNormKernel(const Context &ctx,
workspace_ptr =
static_cast<void *>(ctx.template Alloc<uint8_t>(&workspace_tensor));
PADDLE_ENFORCE_GPU_SUCCESS(
paddle::platform::dynload::cudnnBatchNormalizationForwardTrainingEx(
phi::dynload::cudnnBatchNormalizationForwardTrainingEx(
handle,
mode_,
CUDNN_BATCHNORM_OPS_BN,
......@@ -1172,7 +1166,7 @@ void BatchNormKernel(const Context &ctx,
reserve_space_size));
#else
PADDLE_ENFORCE_GPU_SUCCESS(
paddle::platform::dynload::cudnnBatchNormalizationForwardTraining(
phi::dynload::cudnnBatchNormalizationForwardTraining(
handle,
mode_,
CudnnDataType<T>::kOne(),
......@@ -1211,9 +1205,9 @@ void BatchNormKernel(const Context &ctx,
#else
// clean when exit.
PADDLE_ENFORCE_GPU_SUCCESS(
paddle::platform::dynload::cudnnDestroyTensorDescriptor(data_desc_));
phi::dynload::cudnnDestroyTensorDescriptor(data_desc_));
PADDLE_ENFORCE_GPU_SUCCESS(
paddle::platform::dynload::cudnnDestroyTensorDescriptor(bn_param_desc_));
phi::dynload::cudnnDestroyTensorDescriptor(bn_param_desc_));
#endif
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册