From d666c7df39f78e5a0aa4a5ec18de47406940c56b Mon Sep 17 00:00:00 2001 From: Paulina Gacek Date: Mon, 12 Dec 2022 04:04:53 +0100 Subject: [PATCH] [PHI] OneDNN version of Copy (#48539) * OneDNN version of Copy, tranpose kernels adjusted * style fixes in tranpose_grad * redundant headers deleted --- paddle/phi/core/tensor_utils.cc | 43 ++++++++++++------- .../kernels/onednn/transpose_grad_kernel.cc | 8 ++-- paddle/phi/kernels/onednn/transpose_kernel.cc | 3 +- 3 files changed, 31 insertions(+), 23 deletions(-) diff --git a/paddle/phi/core/tensor_utils.cc b/paddle/phi/core/tensor_utils.cc index 6e87f40ed0a..467552032f0 100644 --- a/paddle/phi/core/tensor_utils.cc +++ b/paddle/phi/core/tensor_utils.cc @@ -56,6 +56,9 @@ void Copy(const Context& dev_ctx, void* dst_ptr = nullptr; if (paddle::platform::is_cpu_place(dst_place)) { dst_ptr = dev_ctx.HostAlloc(dst, src.dtype()); +#ifdef PADDLE_WITH_MKLDNN + dst->set_layout(src.layout()); +#endif #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) } else if (paddle::platform::is_gpu_place(dst_place) || paddle::platform::is_cuda_pinned_place(dst_place)) { @@ -81,7 +84,7 @@ void Copy(const Context& dev_ctx, PADDLE_ENFORCE_EQ( dst->place(), dst_place, - phi::errors::Unavailable( + errors::Unavailable( "The Dst Tensor's place and dst_place do not match, Tensor's place " "place is %s, dst_place is %s.", dst->place(), @@ -112,13 +115,13 @@ void Copy(const Context& dev_ctx, PADDLE_ENFORCE_EQ( paddle::platform::is_gpu_place(ctx_place), true, - phi::errors::PreconditionNotMet( + errors::PreconditionNotMet( "Context place error, excepted GPUPlace, but actually %s.", ctx_place)); auto ctx_gpu_place = ctx_place; PADDLE_ENFORCE_EQ(src_gpu_place, ctx_gpu_place, - phi::errors::Unavailable( + errors::Unavailable( "Source place and context place do not match, source " "place is %s, context place is %s.", src_gpu_place, @@ -137,17 +140,17 @@ void Copy(const Context& dev_ctx, PADDLE_ENFORCE_EQ( paddle::platform::is_gpu_place(ctx_place), true, - phi::errors::PreconditionNotMet( + errors::PreconditionNotMet( "Context place error, excepted GPUPlace, but actually %s.", ctx_place)); auto ctx_gpu_place = ctx_place; - PADDLE_ENFORCE_EQ(dst_gpu_place, - ctx_gpu_place, - phi::errors::Unavailable( - "Destination place and context place do not match, " - "destination place is %s, context place is %s.", - dst_gpu_place, - ctx_gpu_place)); + PADDLE_ENFORCE_EQ( + dst_gpu_place, + ctx_gpu_place, + errors::Unavailable("Destination place and context place do not match, " + "destination place is %s, context place is %s.", + dst_gpu_place, + ctx_gpu_place)); auto stream = blocking ? nullptr : reinterpret_cast(dev_ctx).stream(); @@ -161,7 +164,7 @@ void Copy(const Context& dev_ctx, PADDLE_ENFORCE_EQ( paddle::platform::is_gpu_place(ctx_place), true, - phi::errors::PreconditionNotMet( + errors::PreconditionNotMet( "Context place error, excepted GPUPlace, but actually %s.", ctx_place)); auto stream = @@ -184,7 +187,7 @@ void Copy(const Context& dev_ctx, paddle::memory::Copy( dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size, stream); } else { - PADDLE_THROW(phi::errors::Unavailable( + PADDLE_THROW(errors::Unavailable( "Context place dose not match the source and destination place.")); } } @@ -196,13 +199,13 @@ void Copy(const Context& dev_ctx, PADDLE_ENFORCE_EQ( paddle::platform::is_gpu_place(ctx_place), true, - phi::errors::PreconditionNotMet( + errors::PreconditionNotMet( "Context place error, excepted GPUPlace, but actually %s.", ctx_place)); auto ctx_gpu_place = ctx_place; PADDLE_ENFORCE_EQ(src_gpu_place, ctx_gpu_place, - phi::errors::Unavailable( + errors::Unavailable( "Source place and context place do not match, source " "place is %s, context place is %s.", src_gpu_place, @@ -259,7 +262,7 @@ void Copy(const Context& dev_ctx, paddle::memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size, stream); #endif } else { - PADDLE_THROW(phi::errors::Unimplemented( + PADDLE_THROW(errors::Unimplemented( "Copy from %s to %s is not supported.", src_place, dst_place)); } } @@ -411,4 +414,12 @@ template void Copy(const CustomContext& dev_ctx, bool blocking, DenseTensor* dst); #endif + +#ifdef PADDLE_WITH_MKLDNN +template void Copy(const OneDNNContext& dev_ctx, + const DenseTensor& src, + Place dst_place, + bool blocking, + DenseTensor* dst); +#endif } // namespace phi diff --git a/paddle/phi/kernels/onednn/transpose_grad_kernel.cc b/paddle/phi/kernels/onednn/transpose_grad_kernel.cc index 64f1f9f6108..dafbb75dc07 100644 --- a/paddle/phi/kernels/onednn/transpose_grad_kernel.cc +++ b/paddle/phi/kernels/onednn/transpose_grad_kernel.cc @@ -13,8 +13,6 @@ // limitations under the License. #include "paddle/phi/kernels/transpose_grad_kernel.h" - -#include "paddle/fluid/framework/tensor_util.h" #include "paddle/phi/backends/onednn/onednn_reuse.h" #include "paddle/phi/core/kernel_registry.h" @@ -24,16 +22,16 @@ void TransposeGradKernel(const Context& dev_ctx, const DenseTensor& out_grad, const std::vector& axis, DenseTensor* x_grad) { - PADDLE_ENFORCE_EQ(dev_ctx.GetPlace().GetType() == phi::AllocationType::CPU, + PADDLE_ENFORCE_EQ(dev_ctx.GetPlace().GetType() == AllocationType::CPU, true, errors::PreconditionNotMet( - "Operator DNNL TransposeGrad must use CPUPlace")); + "oneDNN TransposeGrad kernel must use CPUPlace")); if (!x_grad) return; const auto& onednn_engine = dev_ctx.GetEngine(); if (axis.size() == 1) { - paddle::framework::TensorCopy(out_grad, out_grad.place(), x_grad); + Copy(dev_ctx, out_grad, out_grad.place(), false, x_grad); x_grad->set_mem_desc(out_grad.mem_desc()); return; } diff --git a/paddle/phi/kernels/onednn/transpose_kernel.cc b/paddle/phi/kernels/onednn/transpose_kernel.cc index 26c89197e0d..a36d5e4493a 100644 --- a/paddle/phi/kernels/onednn/transpose_kernel.cc +++ b/paddle/phi/kernels/onednn/transpose_kernel.cc @@ -13,7 +13,6 @@ // limitations under the License. #include "paddle/phi/kernels/transpose_kernel.h" -#include "paddle/fluid/framework/tensor_util.h" #include "paddle/phi/backends/onednn/onednn_reuse.h" #include "paddle/phi/core/kernel_registry.h" @@ -80,7 +79,7 @@ void TransposeKernel(const Context& dev_ctx, dev_ctx, const_cast(&x), x.mem_desc()); if (axis.size() == 1) { - paddle::framework::TensorCopy(x, x.place(), out); + Copy(dev_ctx, x, x.place(), false, out); out->set_mem_desc(x.mem_desc()); return; } -- GitLab