diff --git a/paddle/fluid/operators/mkldnn/transpose_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/transpose_mkldnn_op.cc index a01901950bc41753fc5d5a92e47571fc24f898bf..611c54d74c9cbb6535fdda785d10b846e5baba2a 100644 --- a/paddle/fluid/operators/mkldnn/transpose_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/transpose_mkldnn_op.cc @@ -224,8 +224,3 @@ REGISTER_OP_KERNEL(transpose_grad, MKLDNN, ::paddle::platform::CPUPlace, ops::TransposeMKLDNNGradOpKernel); - -REGISTER_OP_KERNEL(transpose2_grad, - MKLDNN, - ::paddle::platform::CPUPlace, - ops::TransposeMKLDNNGradOpKernel); diff --git a/paddle/phi/backends/onednn/onednn_reuse.h b/paddle/phi/backends/onednn/onednn_reuse.h index d52e0bd5db8e44979bc0c9fec6d18f630dfd085c..4775a694e1f1af6857de3b82446eecccc0c3e2e2 100644 --- a/paddle/phi/backends/onednn/onednn_reuse.h +++ b/paddle/phi/backends/onednn/onednn_reuse.h @@ -1046,5 +1046,69 @@ class ClipOneDNNHandler to_void_cast(input_data)); } }; +template +class TransposeOneDNNHandler { + public: + TransposeOneDNNHandler(const OneDNNContext& dev_ctx, + std::vector& dims, // NOLINT + std::vector& axis, // NOLINT + dnnl::engine engine) + : dev_ctx_(dev_ctx), + dims_(dims), + axis_(axis), + logical_axis_(dims.size(), 0), + engine_(engine) {} + + std::shared_ptr AcquireSrcMemory(const OneDNNMemoryFormat& fmt, + void* ptr) { + // Make memory descriptor using input format, unless it + // cannot be trusted (nchw) then make up memory fmt manually + for (size_t i = 0; i < this->logical_axis_.size(); ++i) { + this->logical_axis_[i] = i; + } + + auto src_md = fmt != OneDNNMemoryFormat::nchw + ? OneDNNMemDesc(dims_, OneDNNGetDataType(), fmt) + : Axis2MemoryDesc(dims_, logical_axis_); + return std::make_shared(src_md, engine_, ptr); + } + + std::shared_ptr AcquireDstMemory(DenseTensor* output, + Place place) { + auto dst_md = Axis2MemoryDesc(dims_, axis_); + auto dst_data = dev_ctx_.Alloc(output); + return std::make_shared(dst_md, engine_, dst_data); + } + + std::shared_ptr AcquireTranspose( + std::shared_ptr dst_memory_p, + std::shared_ptr src_memory_p) { + return std::make_shared(*(src_memory_p), *(dst_memory_p)); + } + + protected: + dnnl::memory::desc Axis2MemoryDesc(std::vector& nchw_tz, // NOLINT + std::vector& axis // NOLINT + ) { + size_t ndims = axis.size(); + + std::vector strides(ndims); + unsigned int total_stride = 1; + for (int i = ndims - 1; i >= 0; --i) { + strides[axis[i]] = total_stride; + total_stride *= nchw_tz[axis[i]]; + } + dnnl::memory::desc mem_d(nchw_tz, OneDNNGetDataType(), strides); + + return mem_d; + } + + private: + const OneDNNContext& dev_ctx_; + std::vector dims_; + std::vector axis_; + std::vector logical_axis_; + dnnl::engine engine_; +}; } // namespace funcs } // namespace phi diff --git a/paddle/phi/kernels/onednn/transpose_grad_kernel.cc b/paddle/phi/kernels/onednn/transpose_grad_kernel.cc new file mode 100644 index 0000000000000000000000000000000000000000..09f410c61c90fdef329996522ce880ed29353d89 --- /dev/null +++ b/paddle/phi/kernels/onednn/transpose_grad_kernel.cc @@ -0,0 +1,68 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/kernels/transpose_grad_kernel.h" + +#include "paddle/fluid/framework/tensor_util.h" +#include "paddle/phi/backends/onednn/onednn_reuse.h" +#include "paddle/phi/core/kernel_registry.h" + +namespace phi { +template +void TransposeGradKernel(const Context& dev_ctx, + const DenseTensor& out_grad, + const std::vector& axis, + DenseTensor* x_grad) { + PADDLE_ENFORCE_EQ(dev_ctx.GetPlace().GetType() == phi::AllocationType::CPU, + true, + errors::PreconditionNotMet( + "Operator DNNL TransposeGrad must use CPUPlace")); + if (!x_grad) return; + + const auto& onednn_engine = dev_ctx.GetEngine(); + std::vector reversed_axis(axis); + if (axis.size() == 1) { + paddle::framework::TensorCopy(out_grad, out_grad.place(), x_grad); + x_grad->set_format(out_grad.format()); + return; + } + + for (size_t i = 0; i < axis.size(); i++) { + reversed_axis[axis[i]] = i; + } + + const T* out_grad_data = out_grad.data(); + dev_ctx.template Alloc(x_grad); + auto nchw_tz = vectorize(out_grad.dims()); + + funcs::TransposeOneDNNHandler handler( + dev_ctx, nchw_tz, reversed_axis, onednn_engine); + + auto transpose_src_memory_p = handler.AcquireSrcMemory( + out_grad.format(), funcs::to_void_cast(out_grad_data)); + auto transpose_dst_memory_p = + handler.AcquireDstMemory(x_grad, dev_ctx.GetPlace()); + auto transpose_p = + handler.AcquireTranspose(transpose_dst_memory_p, transpose_src_memory_p); + + auto& astream = OneDNNContext::tls().get_stream(); + transpose_p->execute( + astream, *transpose_src_memory_p, *transpose_dst_memory_p); + astream.wait(); +} + +} // namespace phi + +PD_REGISTER_KERNEL( + transpose_grad, OneDNN, ALL_LAYOUT, phi::TransposeGradKernel, float) {} diff --git a/python/paddle/fluid/tests/unittests/test_transpose_op.py b/python/paddle/fluid/tests/unittests/test_transpose_op.py index adef1b25aaa9755cbe173bc6ebb1fa4dca28fa9c..9ae50dfd1701139d1819e5d516c359424feea787 100644 --- a/python/paddle/fluid/tests/unittests/test_transpose_op.py +++ b/python/paddle/fluid/tests/unittests/test_transpose_op.py @@ -16,7 +16,7 @@ from __future__ import print_function import unittest import numpy as np -from op_test import OpTest, convert_float_to_uint16 +from paddle.fluid.tests.unittests.op_test import OpTest, convert_float_to_uint16 import paddle import paddle.fluid as fluid from paddle.fluid import Program, program_guard