diff --git a/paddle/phi/kernels/funcs/batch_norm_utils.h b/paddle/phi/kernels/funcs/batch_norm_utils.h index 21ebae8487ffc3588034a8ea5feeab8ac1c47fa8..a7ed7d36eb1c41f688e875912c0a8648cb42cb03 100644 --- a/paddle/phi/kernels/funcs/batch_norm_utils.h +++ b/paddle/phi/kernels/funcs/batch_norm_utils.h @@ -36,8 +36,7 @@ inline void ResizeToChannelFirst(const DeviceContext& context, in_dims_vec[3] = input->dims()[2]; in_dims_vec[4] = input->dims()[3]; transformed_input->Resize(make_ddim(in_dims_vec)); - transformed_input->mutable_data(context.GetPlace()); - + context.template Alloc(transformed_input); } else if (dim == 2) { // input transformed_input->Resize(input->dims()); @@ -47,7 +46,7 @@ inline void ResizeToChannelFirst(const DeviceContext& context, in_dims_vec[2] = input->dims()[1]; in_dims_vec[3] = input->dims()[2]; transformed_input->Resize(make_ddim(in_dims_vec)); - transformed_input->mutable_data(context.GetPlace()); + context.template Alloc(transformed_input); } else if (dim == 1) { transformed_input->Resize(input->dims()); @@ -55,7 +54,7 @@ inline void ResizeToChannelFirst(const DeviceContext& context, in_dims_vec[1] = input->dims()[2]; in_dims_vec[2] = input->dims()[1]; transformed_input->Resize(make_ddim(in_dims_vec)); - transformed_input->mutable_data(context.GetPlace()); + context.template Alloc(transformed_input); } } @@ -74,7 +73,7 @@ inline void ResizeToChannelLast(const DeviceContext& context, in_dims_vec[3] = input->dims()[4]; in_dims_vec[4] = input->dims()[1]; transformed_input->Resize(make_ddim(in_dims_vec)); - transformed_input->mutable_data(context.GetPlace()); + context.template Alloc(transformed_input); } else if (dim == 2) { // input @@ -85,7 +84,7 @@ inline void ResizeToChannelLast(const DeviceContext& context, in_dims_vec[2] = input->dims()[3]; in_dims_vec[3] = input->dims()[1]; transformed_input->Resize(make_ddim(in_dims_vec)); - transformed_input->mutable_data(context.GetPlace()); + context.template Alloc(transformed_input); } else if (dim == 1) { transformed_input->Resize(input->dims()); @@ -93,7 +92,7 @@ inline void ResizeToChannelLast(const DeviceContext& context, in_dims_vec[1] = input->dims()[2]; in_dims_vec[2] = input->dims()[1]; transformed_input->Resize(make_ddim(in_dims_vec)); - transformed_input->mutable_data(context.GetPlace()); + context.template Alloc(transformed_input); } } diff --git a/paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu b/paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu index 2c9ee5ede010367697bb9477a536f807625fd02b..339c3536d7a7f476df0c2c46bf34ba48b73c07c3 100644 --- a/paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu @@ -359,8 +359,8 @@ void BatchNormGradRawKernel(const Context &ctx, } if (d_scale && d_bias) { - d_scale->mutable_data>(ctx.GetPlace()); - d_bias->mutable_data>(ctx.GetPlace()); + ctx.template Alloc>(d_scale); + ctx.template Alloc>(d_bias); } PADDLE_ENFORCE_EQ( @@ -569,8 +569,8 @@ void BatchNormGradRawKernel(const Context &ctx, /*activationDesc=*/nullptr, /*sizeInBytes=*/&workspace_size)); - workspace_ptr = workspace_tensor.mutable_data( - ctx.GetPlace(), transformed_x.type(), workspace_size); + workspace_tensor.Resize({static_cast(workspace_size)}); + workspace_ptr = ctx.template Alloc(&workspace_tensor); PADDLE_ENFORCE_GPU_SUCCESS( paddle::platform::dynload::cudnnBatchNormalizationBackwardEx( @@ -594,12 +594,9 @@ void BatchNormGradRawKernel(const Context &ctx, /*dBnScaleBiasDesc=*/bn_param_desc_, /*bnScaleData=*/scale.template data>(), /*bnBiasData=*/nullptr, - /*dBnScaleData=*/d_scale - ->template mutable_data>( - ctx.GetPlace()), - /*dBnBiasData=*/d_bias - ->template mutable_data>( - ctx.GetPlace()), + /*dBnScaleData=*/ctx.template Alloc>( + d_scale), + /*dBnBiasData=*/ctx.template Alloc>(d_bias), /*epsilon=*/epsilon, /*savedMean=*/saved_mean_data, /*savedInvVariance=*/saved_var_data, @@ -626,10 +623,8 @@ void BatchNormGradRawKernel(const Context &ctx, H * W * D, epsilon, transformed_d_x.template data(), - d_scale->template mutable_data>( - ctx.GetPlace()), - d_bias->template mutable_data>( - ctx.GetPlace())); + ctx.template Alloc>(d_scale), + ctx.template Alloc>(d_bias)); } else { BNBackward(), - d_scale->template mutable_data>( - ctx.GetPlace()), - d_bias->template mutable_data>( - ctx.GetPlace())); + ctx.template Alloc>(d_scale), + ctx.template Alloc>(d_bias)); } // TODO(wangran16): wait for MIOpen to improve the performance of BN @@ -682,10 +675,8 @@ void BatchNormGradRawKernel(const Context &ctx, ctx.template Alloc(&transformed_d_x), bn_param_desc_, scale.template data>(), - d_scale->template mutable_data>( - ctx.GetPlace()), - d_bias->template mutable_data>( - ctx.GetPlace()), + ctx.template Alloc>(d_scale), + ctx.template Alloc>(d_bias), epsilon, saved_mean_data, saved_var_data)); diff --git a/paddle/phi/kernels/gpu/batch_norm_kernel.cu b/paddle/phi/kernels/gpu/batch_norm_kernel.cu index 49b550f51e60e1cf31658f0d50afebf929a54079..74a523f4ecf942422a1f6c5ca9f710dc0e9d4cbf 100644 --- a/paddle/phi/kernels/gpu/batch_norm_kernel.cu +++ b/paddle/phi/kernels/gpu/batch_norm_kernel.cu @@ -439,11 +439,11 @@ void BatchNormKernel(const Context &ctx, // Run training mode. // obtain running mean and running inv var, and there is no need // to initialize them. - mean_out->mutable_data>(ctx.GetPlace()); - variance_out->mutable_data>(ctx.GetPlace()); + ctx.template Alloc>(mean_out); + ctx.template Alloc>(variance_out); - saved_mean->mutable_data>(ctx.GetPlace()); - saved_variance->mutable_data>(ctx.GetPlace()); + ctx.template Alloc>(saved_mean); + ctx.template Alloc>(saved_variance); if ((N * H * W * D) == 1) { // Only 1 element in normalization dimension, @@ -497,10 +497,10 @@ void BatchNormKernel(const Context &ctx, /*xDesc=*/data_desc_, /*sizeInBytes=*/&reserve_space_size)); - reserve_space_ptr = reserve_space->mutable_data( - ctx.GetPlace(), transformed_x.type(), reserve_space_size); - workspace_ptr = workspace_tensor.mutable_data( - ctx.GetPlace(), transformed_x.type(), workspace_size); + reserve_space->Resize({static_cast(reserve_space_size)}); + reserve_space_ptr = ctx.template Alloc(reserve_space); + workspace_tensor.Resize({static_cast(workspace_size)}); + workspace_ptr = ctx.template Alloc(&workspace_tensor); PADDLE_ENFORCE_GPU_SUCCESS( paddle::platform::dynload::cudnnBatchNormalizationForwardTrainingEx( handle, @@ -518,15 +518,11 @@ void BatchNormKernel(const Context &ctx, scale.template data>(), bias.template data>(), this_factor, - mean_out->template mutable_data>( - ctx.GetPlace()), - variance_out->template mutable_data>( - ctx.GetPlace()), + ctx.template Alloc>(mean_out), + ctx.template Alloc>(variance_out), epsilon, - saved_mean->template mutable_data>( - ctx.GetPlace()), - saved_variance->template mutable_data>( - ctx.GetPlace()), + ctx.template Alloc>(saved_mean), + ctx.template Alloc>(saved_variance), nullptr, workspace_ptr, workspace_size, @@ -621,15 +617,11 @@ void BatchNormKernel(const Context &ctx, scale.template data>(), bias.template data>(), this_factor, - mean_out->template mutable_data>( - ctx.GetPlace()), - variance_out->template mutable_data>( - ctx.GetPlace()), + ctx.template Alloc>(mean_out), + ctx.template Alloc>(variance_out), epsilon, - saved_mean->template mutable_data>( - ctx.GetPlace()), - saved_variance->template mutable_data>( - ctx.GetPlace()))); + ctx.template Alloc>(saved_mean), + ctx.template Alloc>(saved_variance))); #endif } } diff --git a/paddle/phi/kernels/gpudnn/conv_grad_grad_kernel.cu b/paddle/phi/kernels/gpudnn/conv_grad_grad_kernel.cu index b4a6fe337c8d21e37beb0d6e5219e1a5edf1f9e8..9c5e77d5fd84661cdcc53dffc8f92a954df81041 100644 --- a/paddle/phi/kernels/gpudnn/conv_grad_grad_kernel.cu +++ b/paddle/phi/kernels/gpudnn/conv_grad_grad_kernel.cu @@ -71,15 +71,15 @@ void ConvCudnnGradGradKernel( auto dW = filter_grad; auto dX = input_grad; if (ddO) { - ddO->mutable_data(ctx.GetPlace()); + ctx.template Alloc(ddO); phi::funcs::SetConstant set_zero; set_zero(ctx, ddO, static_cast(0)); } if (dW) { - dW->mutable_data(ctx.GetPlace()); + ctx.template Alloc(dW); } if (dX) { - dX->mutable_data(ctx.GetPlace()); + ctx.template Alloc(dX); } // const T* x = X->data(); @@ -131,7 +131,7 @@ void ConvCudnnGradGradKernel( } if (dX) { ResizeToChannelFirst(ctx, dX, &transformed_dX_channel); - transformed_dX_channel.mutable_data(ctx.GetPlace()); + ctx.template Alloc(&transformed_dX_channel); } } else { @@ -186,13 +186,13 @@ void ConvCudnnGradGradKernel( transformed_ddX.Resize(new_input_shape); transformed_dX.Resize(new_input_shape); - transformed_X.mutable_data(ctx.GetPlace()); + ctx.template Alloc(&transformed_X); if (ddX) { - transformed_ddX.mutable_data(ctx.GetPlace()); + ctx.template Alloc(&transformed_ddX); } if (dX) { - transformed_dX.mutable_data(ctx.GetPlace()); + ctx.template Alloc(&transformed_dX); } // pad for input diff --git a/paddle/phi/kernels/gpudnn/conv_grad_kernel.cu b/paddle/phi/kernels/gpudnn/conv_grad_kernel.cu index 64148e902fdb2123aa3f81846999b5d90f356cd6..a99a1e5f9471ed8cf2513c4690630ff24b00c284 100644 --- a/paddle/phi/kernels/gpudnn/conv_grad_kernel.cu +++ b/paddle/phi/kernels/gpudnn/conv_grad_kernel.cu @@ -58,10 +58,10 @@ void ConvCudnnGradKernel(const Context& ctx, DenseTensor* input_grad, DenseTensor* filter_grad) { if (input_grad) { - input_grad->mutable_data(ctx.GetPlace()); + ctx.template Alloc(input_grad); } if (filter_grad) { - filter_grad->mutable_data(ctx.GetPlace()); + ctx.template Alloc(filter_grad); } std::vector dilations = dilations_t; @@ -204,12 +204,12 @@ void ConvCudnnGradKernel(const Context& ctx, } DDim new_input_shape(make_ddim(new_input_shape_vec)); transformed_input.Resize(new_input_shape); - transformed_input.mutable_data(ctx.GetPlace()); + ctx.template Alloc(&transformed_input); transformed_input_grad.Resize(new_input_shape); if (input_grad) { - transformed_input_grad.mutable_data(ctx.GetPlace()); + ctx.template Alloc(&transformed_input_grad); } // pad for input const int rank = transformed_input_channel.dims().size(); @@ -427,7 +427,7 @@ void ConvCudnnGradKernel(const Context& ctx, if (use_addto) { DenseTensor temp_tensor(transformed_input_grad.type()); temp_tensor.Resize(transformed_input_grad.dims()); - T* temp_tensor_data = temp_tensor.mutable_data(ctx.GetPlace()); + T* temp_tensor_data = ctx.template Alloc(&temp_tensor); workspace_handle.RunFunc( [&](void* cudnn_workspace_ptr) { PADDLE_ENFORCE_GPU_SUCCESS( @@ -513,7 +513,7 @@ void ConvCudnnGradKernel(const Context& ctx, axes[i] = i; } - transformed_input_grad_channel.mutable_data(ctx.GetPlace()); + ctx.template Alloc(&transformed_input_grad_channel); if (transformed_input_channel.dims().size() == 4) { paddle::operators::RemovePaddingSlice( ctx, diff --git a/paddle/phi/kernels/gpudnn/conv_kernel.cu b/paddle/phi/kernels/gpudnn/conv_kernel.cu index 931b6d68845e27297784603c2427178eae6b6f7d..c2970cc8cde75169602de5eec9f0e1424b71a701 100644 --- a/paddle/phi/kernels/gpudnn/conv_kernel.cu +++ b/paddle/phi/kernels/gpudnn/conv_kernel.cu @@ -54,7 +54,7 @@ void ConvCudnnKernel(const Context& ctx, int workspace_size_MB, bool exhaustive_search_t, DenseTensor* output) { - output->mutable_data(ctx.GetPlace()); + ctx.template Alloc(output); std::vector paddings = paddings_t; std::vector dilations = dilations_t; @@ -170,7 +170,7 @@ void ConvCudnnKernel(const Context& ctx, } DDim new_input_shape(make_ddim(new_input_shape_vec)); transformed_input.Resize(new_input_shape); - transformed_input.mutable_data(ctx.GetPlace()); + ctx.template Alloc(&transformed_input); const int rank = transformed_input_channel.dims().size(); T pad_value(0.0); diff --git a/paddle/phi/kernels/impl/conv_grad_grad_kernel_impl.h b/paddle/phi/kernels/impl/conv_grad_grad_kernel_impl.h index fbcebf371a61bd3d652888b5eaad56185499726b..bc0ed44e17a3346db42f2f858caceabb9d5351b7 100644 --- a/paddle/phi/kernels/impl/conv_grad_grad_kernel_impl.h +++ b/paddle/phi/kernels/impl/conv_grad_grad_kernel_impl.h @@ -129,7 +129,7 @@ void ConvGradGradKernel(const Context& dev_ctx, DenseTensor col_matrix; if (is_expand) { col.Resize(col_shape); - col.mutable_data(dev_ctx.GetPlace()); + dev_ctx.template Alloc(&col); col_matrix.ShareDataWith(col); col_matrix.Resize(col_matrix_shape); } @@ -143,7 +143,7 @@ void ConvGradGradKernel(const Context& dev_ctx, if (dX && ddW_in) { Tensor ddW; ddW.ShareDataWith(*ddW_in).Resize(filter_matrix_shape); - dX->mutable_data(dev_ctx.GetPlace()); + dev_ctx.template Alloc(dX); DenseTensor transformed_dX(dX->type()); @@ -201,7 +201,7 @@ void ConvGradGradKernel(const Context& dev_ctx, // oH, oW) // dw convolution double grad: im2col(vol2col) + gemm if (dW && ddX) { - dW->mutable_data(dev_ctx.GetPlace()); + dev_ctx.template Alloc(dW); set_zero(dev_ctx, dW, static_cast(0)); DenseTensor dW_arr = *dW; dW_arr.Resize(filter_matrix_shape); @@ -244,7 +244,7 @@ void ConvGradGradKernel(const Context& dev_ctx, // w/ddw(Cout, Cin, kh, kw) // ddy convolution double grad: im2col(vol2col) + gemm if (ddY) { - ddY->mutable_data(dev_ctx.GetPlace()); + dev_ctx.template Alloc(ddY); DenseTensor transformed_ddY(ddY->type()); if (channel_last) { diff --git a/paddle/phi/kernels/impl/conv_grad_kernel_impl.h b/paddle/phi/kernels/impl/conv_grad_kernel_impl.h index f1971aca800b59171a2e741dbebce6d8adaf7899..2deebb996a057a84bd5343be76969ce3a12e1aa1 100644 --- a/paddle/phi/kernels/impl/conv_grad_kernel_impl.h +++ b/paddle/phi/kernels/impl/conv_grad_kernel_impl.h @@ -128,7 +128,7 @@ void ConvGradKernel(const Context& dev_ctx, DenseTensor col_matrix; if (is_expand) { col.Resize(col_shape); - col.mutable_data(dev_ctx.GetPlace()); + dev_ctx.template Alloc(&col); col_matrix.ShareDataWith(col); col_matrix.Resize(col_matrix_shape); } @@ -137,7 +137,7 @@ void ConvGradKernel(const Context& dev_ctx, auto blas = phi::funcs::GetBlas(dev_ctx); if (input_grad) { - input_grad->mutable_data(dev_ctx.GetPlace()); + dev_ctx.template Alloc(input_grad); DenseTensor transformed_input_grad(input_grad->type()); if (channel_last) { ResizeToChannelFirst( @@ -203,7 +203,7 @@ void ConvGradKernel(const Context& dev_ctx, } if (filter_grad) { - filter_grad->mutable_data(dev_ctx.GetPlace()); + dev_ctx.template Alloc(filter_grad); Tensor filter_grad_ = *filter_grad; filter_grad_.Resize(filter_matrix_shape); set_zero(dev_ctx, filter_grad, static_cast(0)); diff --git a/paddle/phi/kernels/impl/conv_kernel_impl.h b/paddle/phi/kernels/impl/conv_kernel_impl.h index 1945468f02551b8e348687ae578c9f23a038b8ca..2ef2ed8af2809c453db4f5a8c20ed4e004bf64be 100644 --- a/paddle/phi/kernels/impl/conv_kernel_impl.h +++ b/paddle/phi/kernels/impl/conv_kernel_impl.h @@ -44,7 +44,7 @@ void ConvKernel(const Context& dev_ctx, // The filter will be reshaped in the calculations, // so here use an assignment operation, // that avoids modifying the variable in the Scope. - output->mutable_data(dev_ctx.GetPlace()); + dev_ctx.template Alloc(output); const bool channel_last = (data_format == "NHWC" || data_format == "NDHWC"); @@ -115,7 +115,7 @@ void ConvKernel(const Context& dev_ctx, if (is_expand) { // col = context.AllocateTmpTensor(col_shape, dev_ctx); col.Resize(col_shape); - col.mutable_data(dev_ctx.GetPlace()); + dev_ctx.template Alloc(&col); col_matrix.ShareDataWith(col); col_matrix.Resize(col_matrix_shape); }