// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "paddle/phi/kernels/conv_grad_kernel.h" #include "paddle/phi/backends/xpu/enforce_xpu.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/cpu/conv_util.h" namespace phi { template void ConvGradKernel(const Context& dev_ctx, const DenseTensor& input, const DenseTensor& filter, const DenseTensor& out_grad, const std::vector& strides, const std::vector& paddings_t, const std::string& padding_algorithm, const std::vector& dilations_t, int groups, const std::string& data_format, DenseTensor* input_grad, DenseTensor* filter_grad) { using XPUT = typename XPUTypeTrait::Type; std::vector paddings = paddings_t; std::vector dilations = dilations_t; // The filter and filter_grad will be reshaped in the calculations, // so here use an assignment operation, // that avoids modifying the variable in the Scope. if (!input_grad && !filter_grad) return; PADDLE_ENFORCE_EQ( data_format == "NDHWC", false, phi::errors::InvalidArgument( ("XPU doesn't support data_format is NDHWC in conv grad op."))); phi::DDim in_data_dims = phi::slice_ddim(input.dims(), 2, input.dims().size()); phi::DDim filter_data_dims = phi::slice_ddim(filter.dims(), 2, filter.dims().size()); std::vector ksize = phi::vectorize(filter_data_dims); std::vector filter_shape = phi::vectorize(filter.dims()); UpdatePaddingAndDilation( &paddings, &dilations, padding_algorithm, in_data_dims, strides, ksize); int batch_size = static_cast(input.dims()[0]); int img_c = static_cast(input.dims()[1]); int img_h = static_cast(input.dims()[2]); int img_w = static_cast(input.dims()[3]); int f = static_cast(filter.dims()[0]); bool is_nchw = true; if (data_format == "NHWC") { img_c = static_cast(input.dims()[3]); img_h = static_cast(input.dims()[1]); img_w = static_cast(input.dims()[2]); is_nchw = false; } const XPUT* input_data = reinterpret_cast(input.data()); const XPUT* filter_data = reinterpret_cast(filter.data()); const XPUT* output_grad_data = reinterpret_cast(out_grad.data()); XPUT* input_grad_data = nullptr; if (input_grad) { dev_ctx.template Alloc(input_grad); input_grad_data = reinterpret_cast(input_grad->data()); } XPUT* filter_grad_data = nullptr; if (filter_grad) { dev_ctx.template Alloc(filter_grad); filter_grad_data = reinterpret_cast(filter_grad->data()); } xpu::ctx_guard RAII_GUARD(dev_ctx.x_context()); XPUT* filter_data_tmp; XPUT* filter_grad_data_tmp; const XPUT* filter_data_ptr = filter_data; XPUT* filter_grad_data_ptr = filter_grad_data; if (data_format == "NHWC") { filter_data_tmp = RAII_GUARD.alloc(filter.numel()); PADDLE_ENFORCE_XDNN_NOT_NULL(filter_data_tmp); int r = xpu::transpose(dev_ctx.x_context(), filter_data, filter_data_tmp, filter_shape, {0, 2, 3, 1}); PADDLE_ENFORCE_XDNN_SUCCESS(r, "transpose"); filter_data_ptr = reinterpret_cast(filter_data_tmp); if (filter_grad_data != nullptr) { filter_grad_data_tmp = RAII_GUARD.alloc(filter.numel()); PADDLE_ENFORCE_XDNN_NOT_NULL(filter_grad_data_tmp); filter_grad_data_ptr = filter_grad_data_tmp; } } int r = xpu::conv2d_grad(dev_ctx.x_context(), input_data, filter_data_ptr, output_grad_data, input_grad_data, filter_grad_data_ptr, batch_size, img_c, img_h, img_w, f, ksize, strides, paddings, dilations, groups, nullptr, nullptr, nullptr, nullptr, nullptr, is_nchw); PADDLE_ENFORCE_XDNN_SUCCESS(r, "conv2d_grad"); if ((filter_grad_data_ptr != nullptr) && (data_format == "NHWC")) { std::vector filter_shape_fhwc = { filter_shape[0], filter_shape[2], filter_shape[3], filter_shape[1]}; int r = xpu::transpose(dev_ctx.x_context(), filter_grad_data_ptr, filter_grad_data, filter_shape_fhwc, {0, 3, 1, 2}); PADDLE_ENFORCE_XDNN_SUCCESS(r, "transpose"); } } template void DepthwiseConvGradKernel(const Context& dev_ctx, const DenseTensor& input, const DenseTensor& filter, const DenseTensor& out_grad, const std::vector& strides, const std::vector& paddings, const std::string& padding_algorithm, int groups, const std::vector& dilations, const std::string& data_format, DenseTensor* input_grad, DenseTensor* filter_grad) { ConvGradKernel(dev_ctx, input, filter, out_grad, strides, paddings, padding_algorithm, dilations, groups, data_format, input_grad, filter_grad); } template void Conv3DGradKernel(const Context& dev_ctx, const DenseTensor& input, const DenseTensor& filter, const DenseTensor& out_grad, const std::vector& strides, const std::vector& paddings_t, const std::string& padding_algorithm, int groups, const std::vector& dilations_t, const std::string& data_format, DenseTensor* input_grad, DenseTensor* filter_grad) { using XPUT = typename XPUTypeTrait::Type; std::vector paddings = paddings_t; std::vector dilations = dilations_t; // The filter and filter_grad will be reshaped in the calculations, // so here use an assignment operation, // that avoids modifying the variable in the Scope. if (!input_grad && !filter_grad) return; phi::DDim in_data_dims = phi::slice_ddim(input.dims(), 2, input.dims().size()); phi::DDim filter_data_dims = phi::slice_ddim(filter.dims(), 2, filter.dims().size()); std::vector ksize = phi::vectorize(filter_data_dims); std::vector filter_shape = phi::vectorize(filter.dims()); UpdatePaddingAndDilation( &paddings, &dilations, padding_algorithm, in_data_dims, strides, ksize); int batch_size = static_cast(input.dims()[0]); int img_c = static_cast(input.dims()[1]); int img_d = static_cast(input.dims()[2]); int img_h = static_cast(input.dims()[3]); int img_w = static_cast(input.dims()[4]); int f = static_cast(filter.dims()[0]); bool is_ncdhw = true; if (data_format == "NDHWC") { img_c = static_cast(input.dims()[4]); img_d = static_cast(input.dims()[1]); img_h = static_cast(input.dims()[2]); img_w = static_cast(input.dims()[3]); is_ncdhw = false; } const XPUT* input_data = reinterpret_cast(input.data()); const XPUT* filter_data = reinterpret_cast(filter.data()); const XPUT* output_grad_data = reinterpret_cast(out_grad.data()); XPUT* input_grad_data = nullptr; if (input_grad) { dev_ctx.template Alloc(input_grad); input_grad_data = reinterpret_cast(input_grad->data()); } XPUT* filter_grad_data = nullptr; if (filter_grad) { dev_ctx.template Alloc(filter_grad); filter_grad_data = reinterpret_cast(filter_grad->data()); } xpu::ctx_guard RAII_GUARD(dev_ctx.x_context()); XPUT* filter_data_tmp; XPUT* filter_grad_data_tmp; const XPUT* filter_data_ptr = filter_data; XPUT* filter_grad_data_ptr = filter_grad_data; if (data_format == "NDHWC") { filter_data_tmp = RAII_GUARD.alloc(filter.numel()); PADDLE_ENFORCE_XDNN_NOT_NULL(filter_data_tmp); int r = xpu::transpose(dev_ctx.x_context(), filter_data, filter_data_tmp, filter_shape, {0, 2, 3, 4, 1}); PADDLE_ENFORCE_XDNN_SUCCESS(r, "transpose"); filter_data_ptr = reinterpret_cast(filter_data_tmp); if (filter_grad_data != nullptr) { filter_grad_data_tmp = RAII_GUARD.alloc(filter.numel()); PADDLE_ENFORCE_XDNN_NOT_NULL(filter_grad_data_tmp); filter_grad_data_ptr = filter_grad_data_tmp; } } int r = xpu::conv3d_grad(dev_ctx.x_context(), input_data, filter_data_ptr, output_grad_data, input_grad_data, filter_grad_data_ptr, batch_size, img_c, img_d, img_h, img_w, f, ksize, strides, paddings, dilations, groups, nullptr, nullptr, nullptr, nullptr, nullptr, is_ncdhw); PADDLE_ENFORCE_XDNN_SUCCESS(r, "conv3d_grad"); if ((filter_grad_data_ptr != nullptr) && (data_format == "NDHWC")) { std::vector filter_shape_fhwc = {filter_shape[0], filter_shape[2], filter_shape[3], filter_shape[4], filter_shape[1]}; int r = xpu::transpose(dev_ctx.x_context(), filter_grad_data_ptr, filter_grad_data, filter_shape_fhwc, {0, 4, 1, 2, 3}); PADDLE_ENFORCE_XDNN_SUCCESS(r, "transpose"); } } } // namespace phi PD_REGISTER_KERNEL(conv2d_grad, XPU, ALL_LAYOUT, phi::ConvGradKernel, float, phi::dtype::float16) {} PD_REGISTER_KERNEL(depthwise_conv2d_grad, XPU, ALL_LAYOUT, phi::DepthwiseConvGradKernel, float) {} PD_REGISTER_KERNEL(conv3d_grad, XPU, ALL_LAYOUT, phi::Conv3DGradKernel, float, phi::dtype::float16) {}