// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "paddle/phi/kernels/conv_grad_kernel.h" #include "paddle/fluid/platform/profiler/event_tracing.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/visit_type.h" #include "paddle/phi/kernels/funcs/data_layout_transform.h" #include "paddle/phi/kernels/onednn/conv_handler.h" namespace phi { #define PD_VISIT_FLOAT_AND_BF16_TYPES(TYPE, NAME, ...) \ [&] { \ const auto& __dtype__ = TYPE; \ switch (__dtype__) { \ PD_PRIVATE_CASE_TYPE( \ NAME, ::paddle::DataType::FLOAT32, float, __VA_ARGS__) \ PD_PRIVATE_CASE_TYPE(NAME, \ ::paddle::DataType::BFLOAT16, \ ::phi::dtype::bfloat16, \ __VA_ARGS__) \ default: \ PD_THROW("function " #NAME " is not implemented for data type `", \ __dtype__, \ "`"); \ } \ }() template void ConvGradKernel(const Context& dev_ctx, const DenseTensor& input, const DenseTensor& filter, const DenseTensor& out_grad, const std::vector& strides, const std::vector& paddings, const std::string& padding_algorithm, const std::vector& dilations, int groups, const std::string& data_format, DenseTensor* input_grad, DenseTensor* filter_grad) { PADDLE_ENFORCE_EQ(dev_ctx.GetPlace().GetType(), AllocationType::CPU, phi::errors::PreconditionNotMet( "Operator oneDNN ConvGrad must use CPUPlace")); const auto& onednn_engine = dev_ctx.GetEngine(); const auto* bias = dev_ctx.HasDnnInput("Bias") ? dev_ctx.GetDnnInput("Bias") : nullptr; bool is_test = dev_ctx.HasDnnAttr("is_test") ? PADDLE_GET_CONST(bool, dev_ctx.GetDnnAttr("is_test")) : false; if (!input_grad && !filter_grad) return; const std::string& unique_name = dev_ctx.GetInputsName("Input")[0] + dev_ctx.GetInputsName("Filter")[0]; PD_VISIT_FLOAT_AND_BF16_TYPES( filter.dtype(), "ConvOneDNNHandlerT", ([&] { // TODO(jczaja): Are all tensors really needed? onednn::ConvOneDNNHandlerT handler(dev_ctx, dev_ctx.GetPlace(), &input, &filter, bias, &out_grad, strides, paddings, padding_algorithm, dilations, groups, data_format, is_test, filter_grad, input_grad, unique_name); // create mkldnn memory from input tensors (data/weights) auto& astream = OneDNNContext::tls().get_stream(); if (filter_grad) { auto src_memory_p = handler.AcquireSrcMemoryWithReorderFromWeightsPrimitive(&input); auto diff_dst_memory_p = handler.AcquireDiffDstMemoryWithReorderFromWeightsPrimitive( &out_grad); // For convoluition with groups write filter grad into // oneDNN buffer and then we reorder it into filter_grad tensor int g = std::max(groups, 1); auto diff_weights_memory_p = g > 1 ? handler.AcquireDiffWeightsMemory() : handler.AcquireDiffWeightsMemory(filter_grad); auto conv_bwd_weights_p = handler.AcquireBackwardWeightsPrimitive(); conv_bwd_weights_p->execute( astream, {{DNNL_ARG_SRC, *src_memory_p}, {DNNL_ARG_DIFF_DST, *diff_dst_memory_p}, {DNNL_ARG_DIFF_WEIGHTS, *diff_weights_memory_p}}); astream.wait(); // For convolution with groups convert from blocked to NCHW // otherwise there will be problems in next operators working on // this data if (g > 1) { // in OneDNN groups in convolution are treated as separate // dimension which is not the case in paddlepaddle dnnl::memory::data_type in_type = funcs::ToOneDNNDataType(filter.dtype()); // for 3d conv with groups (six dimensional data reorder to // goidhw) for 2d conv with groups (five dimensional data reorder // to goihw) auto weights_tz = phi::vectorize(filter->dims()); auto weights_tz = diff_weights_memory_p->get_desc().dims(); dnnl::memory::format_tag out_format = weights_tz.size() == 6 ? dnnl::memory::format_tag::goidhw : dnnl::memory::format_tag::goihw; funcs::ReorderOneDNNHandler handler( weights_tz, filter.dtype(), in_type, onednn_engine); auto reorder_dst_memory_p = handler.AcquireDstMemory( filter_grad, out_format, dev_ctx.GetPlace()); auto reorder_p = handler.AcquireReorder(reorder_dst_memory_p, diff_weights_memory_p); { paddle::platform::RecordEvent record_reorder( "int_reorder", paddle::platform::TracerEventType::UserDefined, 2, paddle::platform::EventRole::kUniqueOp); reorder_p->execute( astream, *diff_weights_memory_p, *reorder_dst_memory_p); astream.wait(); } // So here we have a data in goihw , which can be interpreted as // OIHW (OIDHW for conv3d) because filter_grad shape is set for // OIHW (OIDHW for conv3d) dnnl::memory::format_tag target_format = weights_tz.size() == 6 ? dnnl::memory::format_tag::oidhw : dnnl::memory::format_tag::oihw; filter_grad->set_mem_desc( dnnl::memory::desc(phi::vectorize(filter_grad->dims()), in_type, target_format)); } else { filter_grad->set_mem_desc(diff_weights_memory_p->get_desc()); } } if (input_grad) { auto weights_memory_p = handler.AcquireWeightsMemoryWithReorderFromDataPrimitive( &filter, groups, strides.size() == 3U); auto diff_dst_memory_p = handler.AcquireDiffDstMemoryWithReorderMemoryFromDataPrimitive( &out_grad); auto diff_src_memory_p = handler.AcquireDiffSrcMemory(input_grad); auto conv_bwd_data_p = handler.AcquireBackwardPrimitive(); conv_bwd_data_p->execute(astream, {{DNNL_ARG_WEIGHTS, *weights_memory_p}, {DNNL_ARG_DIFF_DST, *diff_dst_memory_p}, {DNNL_ARG_DIFF_SRC, *diff_src_memory_p}}); astream.wait(); input_grad->set_mem_desc(diff_src_memory_p->get_desc()); } })); } template void DepthwiseConvGradKernel(const Context& dev_ctx, const DenseTensor& input, const DenseTensor& filter, const DenseTensor& out_grad, const std::vector& strides, const std::vector& paddings, const std::string& padding_algorithm, int groups, const std::vector& dilations, const std::string& data_format, DenseTensor* input_grad, DenseTensor* filter_grad) { ConvGradKernel(dev_ctx, input, filter, out_grad, strides, paddings, padding_algorithm, dilations, groups, data_format, input_grad, filter_grad); } template void Conv3DGradKernel(const Context& dev_ctx, const DenseTensor& input, const DenseTensor& filter, const DenseTensor& out_grad, const std::vector& strides, const std::vector& paddings, const std::string& padding_algorithm, int groups, const std::vector& dilations, const std::string& data_format, DenseTensor* input_grad, DenseTensor* filter_grad) { ConvGradKernel(dev_ctx, input, filter, out_grad, strides, paddings, padding_algorithm, dilations, groups, data_format, input_grad, filter_grad); } } // namespace phi PD_REGISTER_KERNEL(conv2d_grad, OneDNN, ONEDNN, phi::ConvGradKernel, float, phi::dtype::bfloat16) {} PD_REGISTER_KERNEL(depthwise_conv2d_grad, OneDNN, ONEDNN, phi::DepthwiseConvGradKernel, float, phi::dtype::bfloat16) {} PD_REGISTER_KERNEL(conv3d_grad, OneDNN, ONEDNN, phi::Conv3DGradKernel, float) {}