// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "paddle/phi/kernels/conv_kernel.h" #include "paddle/phi/backends/xpu/enforce_xpu.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/cpu/conv_util.h" namespace phi { template void ConvKernel(const Context& dev_ctx, const DenseTensor& input, const DenseTensor& filter, const std::vector& strides, const std::vector& paddings_t, const std::string& padding_algorithm, const std::vector& dilations_t, int groups, const std::string& data_format, DenseTensor* out) { using XPUT = typename XPUTypeTrait::Type; std::vector paddings = paddings_t; std::vector dilations = dilations_t; // The filter will be reshaped in the calculations, // so here use an assignment operation, // that avoids modifying the variable in the Scope. dev_ctx.template Alloc(out); PADDLE_ENFORCE_EQ( data_format == "NDHWC", false, phi::errors::InvalidArgument( ("XPU does not support data_format is NDHWC in conv op."))); phi::DDim in_data_dims = phi::slice_ddim(input.dims(), 2, input.dims().size()); phi::DDim filter_data_dims = phi::slice_ddim(filter.dims(), 2, filter.dims().size()); std::vector ksize = phi::vectorize(filter_data_dims); UpdatePaddingAndDilation( &paddings, &dilations, padding_algorithm, in_data_dims, strides, ksize); int batch_size = static_cast(input.dims()[0]); int img_c = static_cast(input.dims()[1]); int img_h = static_cast(input.dims()[2]); int img_w = static_cast(input.dims()[3]); int f = static_cast(filter.dims()[0]); bool is_nchw = true; if (data_format == "NHWC") { img_c = static_cast(input.dims()[3]); img_h = static_cast(input.dims()[1]); img_w = static_cast(input.dims()[2]); is_nchw = false; } const XPUT* input_data = reinterpret_cast(input.data()); const XPUT* filter_data = reinterpret_cast(filter.data()); XPUT* output_data = reinterpret_cast(out->data()); xpu::ctx_guard RAII_GUARD(dev_ctx.x_context()); XPUT* filter_data_tmp; const XPUT* filter_data_ptr = filter_data; if (data_format == "NHWC") { filter_data_tmp = RAII_GUARD.alloc(filter.numel()); PADDLE_ENFORCE_XDNN_NOT_NULL(filter_data_tmp); std::vector filter_shape = phi::vectorize(filter.dims()); int r = xpu::transpose(dev_ctx.x_context(), filter_data, filter_data_tmp, filter_shape, {0, 2, 3, 1}); PADDLE_ENFORCE_XDNN_SUCCESS(r, "transpose"); filter_data_ptr = reinterpret_cast(filter_data_tmp); } int r = xpu::conv2d(dev_ctx.x_context(), input_data, filter_data_ptr, output_data, batch_size, img_c, img_h, img_w, f, ksize, strides, paddings, dilations, groups, nullptr, nullptr, nullptr, is_nchw); PADDLE_ENFORCE_XDNN_SUCCESS(r, "conv2d"); } template void DepthwiseConvKernel(const Context& dev_ctx, const DenseTensor& input, const DenseTensor& filter, const std::vector& strides, const std::vector& paddings, const std::string& padding_algorithm, int groups, const std::vector& dilations, const std::string& data_format, DenseTensor* out) { ConvKernel(dev_ctx, input, filter, strides, paddings, padding_algorithm, dilations, groups, data_format, out); } template void Conv3DKernel(const Context& dev_ctx, const DenseTensor& input, const DenseTensor& filter, const std::vector& strides, const std::vector& paddings_t, const std::string& padding_algorithm, int groups, const std::vector& dilations_t, const std::string& data_format, DenseTensor* out) { using XPUT = typename XPUTypeTrait::Type; std::vector paddings = paddings_t; std::vector dilations = dilations_t; // The filter will be reshaped in the calculations, // so here use an assignment operation, // that avoids modifying the variable in the Scope. dev_ctx.template Alloc(out); phi::DDim in_data_dims = phi::slice_ddim(input.dims(), 2, input.dims().size()); phi::DDim filter_data_dims = phi::slice_ddim(filter.dims(), 2, filter.dims().size()); std::vector ksize = phi::vectorize(filter_data_dims); UpdatePaddingAndDilation( &paddings, &dilations, padding_algorithm, in_data_dims, strides, ksize); int batch_size = static_cast(input.dims()[0]); int img_c = static_cast(input.dims()[1]); int img_d = static_cast(input.dims()[2]); int img_h = static_cast(input.dims()[3]); int img_w = static_cast(input.dims()[4]); int f = static_cast(filter.dims()[0]); bool is_ncdhw = true; if (data_format == "NDHWC") { img_c = static_cast(input.dims()[4]); img_d = static_cast(input.dims()[1]); img_h = static_cast(input.dims()[2]); img_w = static_cast(input.dims()[3]); is_ncdhw = false; } XPUT* output_data = reinterpret_cast(out->data()); const XPUT* filter_data = reinterpret_cast(filter.data()); const XPUT* input_data = reinterpret_cast(input.data()); xpu::ctx_guard RAII_GUARD(dev_ctx.x_context()); XPUT* filter_data_tmp; const XPUT* filter_data_ptr = filter_data; if (data_format == "NDHWC") { filter_data_tmp = RAII_GUARD.alloc(filter.numel()); PADDLE_ENFORCE_XDNN_NOT_NULL(filter_data_tmp); std::vector filter_shape = phi::vectorize(filter.dims()); int r = xpu::transpose(dev_ctx.x_context(), filter_data, filter_data_tmp, filter_shape, {0, 2, 3, 4, 1}); PADDLE_ENFORCE_XDNN_SUCCESS(r, "transpose"); filter_data_ptr = reinterpret_cast(filter_data_tmp); } int r = xpu::conv3d(dev_ctx.x_context(), input_data, filter_data_ptr, output_data, batch_size, img_c, img_d, img_h, img_w, f, ksize, strides, paddings, dilations, groups, nullptr, nullptr, nullptr, is_ncdhw); PADDLE_ENFORCE_XDNN_SUCCESS(r, "conv3d"); } } // namespace phi PD_REGISTER_KERNEL( conv2d, XPU, ALL_LAYOUT, phi::ConvKernel, float, phi::dtype::float16) {} PD_REGISTER_KERNEL( depthwise_conv2d, XPU, ALL_LAYOUT, phi::DepthwiseConvKernel, float) {} PD_REGISTER_KERNEL( conv3d, XPU, ALL_LAYOUT, phi::Conv3DKernel, float, phi::dtype::float16) {}