From 22255528807c49e45e6bd0a7dbc9581d6293ea59 Mon Sep 17 00:00:00 2001 From: piotrekobi <48731682+piotrekobi@users.noreply.github.com> Date: Wed, 7 Sep 2022 10:09:19 +0200 Subject: [PATCH] [PHI] Migrate reduce sum+grad, mean+grad, min and max oneDNN kernels (#45536) * gaussian random * mkldnn to onednn renaming * fix merge conflicts * Migrate reduce_op oneDNN kernels to phi * Remove unnecessary header * remove fluid code * onednn renaming * Change std::vector to IntArray * Fix code style * Move classes from mkldnn_reuse.h to onednn_reuse.h * Move more functions from mkldnn_helper.h to onednn_helpper.h * Change MKLDNN to OneDNN in VLOG message * Implement reviewer suggestions Co-authored-by: Silv3S --- .../reduce_ops/mkldnn/reduce_max_mkldnn_op.cc | 36 ---- .../mkldnn/reduce_mean_mkldnn_op.cc | 70 ------- .../reduce_ops/mkldnn/reduce_min_mkldnn_op.cc | 36 ---- .../reduce_ops/mkldnn/reduce_mkldnn_op.h | 173 ------------------ .../reduce_ops/mkldnn/reduce_sum_mkldnn_op.cc | 54 ------ .../phi/kernels/onednn/reduce_kernel_impl.h | 150 +++++++++++++++ .../phi/kernels/onednn/reduce_max_kernel.cc | 42 +++++ .../kernels/onednn/reduce_mean_grad_kernel.cc | 61 ++++++ .../phi/kernels/onednn/reduce_mean_kernel.cc | 42 +++++ .../phi/kernels/onednn/reduce_min_kernel.cc | 43 +++++ .../kernels/onednn/reduce_sum_grad_kernel.cc | 47 +++++ .../phi/kernels/onednn/reduce_sum_kernel.cc | 43 +++++ paddle/phi/kernels/reduce_max_kernel.cc | 5 + paddle/phi/kernels/reduce_mean_kernel.cc | 5 + paddle/phi/kernels/reduce_min_kernel.cc | 5 + paddle/phi/kernels/reduce_sum_kernel.cc | 5 + 16 files changed, 448 insertions(+), 369 deletions(-) delete mode 100644 paddle/fluid/operators/reduce_ops/mkldnn/reduce_max_mkldnn_op.cc delete mode 100644 paddle/fluid/operators/reduce_ops/mkldnn/reduce_mean_mkldnn_op.cc delete mode 100644 paddle/fluid/operators/reduce_ops/mkldnn/reduce_min_mkldnn_op.cc delete mode 100644 paddle/fluid/operators/reduce_ops/mkldnn/reduce_mkldnn_op.h delete mode 100644 paddle/fluid/operators/reduce_ops/mkldnn/reduce_sum_mkldnn_op.cc create mode 100644 paddle/phi/kernels/onednn/reduce_kernel_impl.h create mode 100644 paddle/phi/kernels/onednn/reduce_max_kernel.cc create mode 100644 paddle/phi/kernels/onednn/reduce_mean_grad_kernel.cc create mode 100644 paddle/phi/kernels/onednn/reduce_mean_kernel.cc create mode 100644 paddle/phi/kernels/onednn/reduce_min_kernel.cc create mode 100644 paddle/phi/kernels/onednn/reduce_sum_grad_kernel.cc create mode 100644 paddle/phi/kernels/onednn/reduce_sum_kernel.cc diff --git a/paddle/fluid/operators/reduce_ops/mkldnn/reduce_max_mkldnn_op.cc b/paddle/fluid/operators/reduce_ops/mkldnn/reduce_max_mkldnn_op.cc deleted file mode 100644 index 99103d10b1..0000000000 --- a/paddle/fluid/operators/reduce_ops/mkldnn/reduce_max_mkldnn_op.cc +++ /dev/null @@ -1,36 +0,0 @@ -/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/reduce_ops/mkldnn/reduce_mkldnn_op.h" - -namespace paddle { -namespace operators { - -template -class ReduceMaxMKLDNNKernel : public ReduceMKLDNNKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - this->RunKernel(ctx, dnnl::algorithm::reduction_max); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OP_KERNEL(reduce_max, - MKLDNN, - paddle::platform::CPUPlace, - ops::ReduceMaxMKLDNNKernel, - ops::ReduceMaxMKLDNNKernel); diff --git a/paddle/fluid/operators/reduce_ops/mkldnn/reduce_mean_mkldnn_op.cc b/paddle/fluid/operators/reduce_ops/mkldnn/reduce_mean_mkldnn_op.cc deleted file mode 100644 index 8dd3c0780d..0000000000 --- a/paddle/fluid/operators/reduce_ops/mkldnn/reduce_mean_mkldnn_op.cc +++ /dev/null @@ -1,70 +0,0 @@ -/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/reduce_ops/mkldnn/reduce_mkldnn_op.h" - -namespace paddle { -namespace operators { - -template -class ReduceMeanMKLDNNKernel : public ReduceMKLDNNKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - this->RunKernel(ctx, dnnl::algorithm::reduction_mean); - } -}; - -template -class ReduceMeanGradMKLDNNKernel : public ReduceGradMKLDNNKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - const auto* input_x = ctx.Input("X"); - auto input_dims = phi::vectorize(input_x->dims()); - auto reduce_dims = ctx.Attr>("dim"); - - int number_of_elements = 1; - if (!ctx.Attr("reduce_all")) { - for (size_t i = 0; i < reduce_dims.size(); ++i) { - reduce_dims[i] = (reduce_dims[i] >= 0) - ? reduce_dims[i] - : input_dims.size() + reduce_dims[i]; - number_of_elements *= input_dims[reduce_dims[i]]; - } - } else { - number_of_elements = input_x->numel(); - } - - this->RunKernel(ctx, - dnnl::algorithm::binary_add, - dnnl::algorithm::reduction_mean, - 0.0f, - 1.0L / number_of_elements); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OP_KERNEL(reduce_mean, - MKLDNN, - paddle::platform::CPUPlace, - ops::ReduceMeanMKLDNNKernel, - ops::ReduceMeanMKLDNNKernel); - -REGISTER_OP_KERNEL(reduce_mean_grad, - MKLDNN, - paddle::platform::CPUPlace, - ops::ReduceMeanGradMKLDNNKernel, - ops::ReduceMeanGradMKLDNNKernel); diff --git a/paddle/fluid/operators/reduce_ops/mkldnn/reduce_min_mkldnn_op.cc b/paddle/fluid/operators/reduce_ops/mkldnn/reduce_min_mkldnn_op.cc deleted file mode 100644 index 586bfe0537..0000000000 --- a/paddle/fluid/operators/reduce_ops/mkldnn/reduce_min_mkldnn_op.cc +++ /dev/null @@ -1,36 +0,0 @@ -/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/reduce_ops/mkldnn/reduce_mkldnn_op.h" - -namespace paddle { -namespace operators { - -template -class ReduceMinMKLDNNKernel : public ReduceMKLDNNKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - this->RunKernel(ctx, dnnl::algorithm::reduction_min); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OP_KERNEL(reduce_min, - MKLDNN, - paddle::platform::CPUPlace, - ops::ReduceMinMKLDNNKernel, - ops::ReduceMinMKLDNNKernel); diff --git a/paddle/fluid/operators/reduce_ops/mkldnn/reduce_mkldnn_op.h b/paddle/fluid/operators/reduce_ops/mkldnn/reduce_mkldnn_op.h deleted file mode 100644 index 8cfe540bae..0000000000 --- a/paddle/fluid/operators/reduce_ops/mkldnn/reduce_mkldnn_op.h +++ /dev/null @@ -1,173 +0,0 @@ -/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once -#include "paddle/fluid/framework/convert_utils.h" -#include "paddle/fluid/platform/mkldnn_reuse.h" - -namespace paddle { -namespace operators { - -using paddle::framework::LoDTensor; -using paddle::framework::Tensor; -using platform::to_void_cast; - -inline std::vector CalculateReducedDims( - const Tensor* input, - const Tensor* output, - std::vector& reduce_dims, // NOLINT - bool reduce_all, - bool keep_dim) { - if (keep_dim) return phi::vectorize(output->dims()); - - if (reduce_all) return std::vector(input->dims().size(), 1); - - std::vector output_dims(phi::vectorize(input->dims())); - for (size_t i = 0; i < reduce_dims.size(); ++i) { - // handle negative dims, f.e. "-1" means rightmost dimension - reduce_dims[i] = (reduce_dims[i] >= 0) - ? reduce_dims[i] - : input->dims().size() + reduce_dims[i]; - output_dims[reduce_dims[i]] = 1; - } - - return output_dims; -} - -template -class ReduceMKLDNNKernel : public framework::OpKernel { - public: - void RunKernel(const framework::ExecutionContext& ctx, - dnnl::algorithm reduction_type) const { - auto& dev_ctx = - ctx.template device_context(); - const auto& onednn_engine = dev_ctx.GetEngine(); - - const auto* x = ctx.Input("X"); - auto* out = ctx.Output("Out"); - - auto reduce_dims = ctx.Attr>("dim"); - bool reduce_all = ctx.Attr("reduce_all"); - bool keep_dim = ctx.Attr("keep_dim"); - - auto x_tz = phi::vectorize(x->dims()); - auto out_tz = - CalculateReducedDims(x, out, reduce_dims, reduce_all, keep_dim); - - auto& astream = platform::MKLDNNDeviceContext::tls().get_stream(); - - // oneDNN reduce op does not support edge case in which memory is being - // copied without actual reduction. - // In that case reorder must be executed to maintain compatibility with - // PaddlePaddle reduce op - if (x_tz == out_tz) { - dnnl::memory::data_type x_type = framework::ToMKLDNNDataType( - framework::TransToProtoVarType(x->dtype())); - platform::ReorderMKLDNNHandler reorder_handler( - x_tz, - framework::TransToProtoVarType(x->dtype()), - x_type, - onednn_engine); - - auto reorder_src_memory_p = reorder_handler.AcquireSrcMemory( - x->mem_desc(), platform::to_void_cast(x->data())); - - // reuse mem desc since it is a simple copy - auto reorder_dst_memory_p = - reorder_handler.AcquireDstMemory(out, x->mem_desc(), ctx.GetPlace()); - - auto reorder_p = reorder_handler.AcquireReorder(reorder_src_memory_p, - reorder_dst_memory_p); - - reorder_p->execute(astream, *reorder_src_memory_p, *reorder_dst_memory_p); - astream.wait(); - - out->set_mem_desc(reorder_dst_memory_p->get_desc().reshape( - phi::vectorize(out->dims()))); - } else { - platform::ReductionMKLDNNHandler handler(reduction_type, - 0.0f, - 0.0f, - onednn_engine, - ctx.GetPlace(), - x, - out, - out_tz); - - auto src_memory_p = handler.AcquireSrcMemory(x); - auto dst_memory_p = handler.AcquireDstMemory(out); - - std::unordered_map reduction_args = { - {DNNL_ARG_SRC, *src_memory_p}, {DNNL_ARG_DST, *dst_memory_p}}; - - auto reduction_p = handler.AcquireForwardPrimitive(); - - reduction_p->execute(astream, reduction_args); - astream.wait(); - - out->set_mem_desc(dst_memory_p->get_desc().reshape( - phi::vectorize(out->dims()))); - } - } -}; - -template -class ReduceGradMKLDNNKernel : public framework::OpKernel { - public: - void RunKernel(const framework::ExecutionContext& ctx, - dnnl::algorithm binary_type, - dnnl::algorithm reduction_type, - float scale_x, - float scale_y) const { - const auto& dev_ctx = - ctx.template device_context(); - const auto& onednn_engine = dev_ctx.GetEngine(); - - bool keep_dim = ctx.Attr("keep_dim"); - bool reduce_all = ctx.Attr("reduce_all"); - auto dims = ctx.Attr>("dim"); - const auto* dout = ctx.Input(framework::GradVarName("Out")); - auto* dx = ctx.Output(framework::GradVarName("X")); - - auto dout_tz = CalculateReducedDims(dx, dout, dims, reduce_all, keep_dim); - auto dx_tz = phi::vectorize(dx->dims()); - - platform::BroadcastDataMKLDNNHandler handler(binary_type, - onednn_engine, - ctx.GetPlace(), - dout, - dx, - scale_x, - scale_y, - dout_tz); - - const auto src_memory_p = handler.AcquireSrcMemory(dout); - const auto dst_memory_p = handler.AcquireZeroedDstMemory(dx); - const auto binary_prim = handler.AcquireForwardPrimitive(); - - const std::unordered_map args = { - {DNNL_ARG_SRC_0, *dst_memory_p}, - {DNNL_ARG_SRC_1, *src_memory_p}, - {DNNL_ARG_DST, *dst_memory_p}}; - - auto& astream = platform::MKLDNNDeviceContext::tls().get_stream(); - binary_prim->execute(astream, args); - astream.wait(); - - dx->set_mem_desc(dst_memory_p->get_desc()); - } -}; - -} // namespace operators -} // namespace paddle diff --git a/paddle/fluid/operators/reduce_ops/mkldnn/reduce_sum_mkldnn_op.cc b/paddle/fluid/operators/reduce_ops/mkldnn/reduce_sum_mkldnn_op.cc deleted file mode 100644 index f708b3ddd5..0000000000 --- a/paddle/fluid/operators/reduce_ops/mkldnn/reduce_sum_mkldnn_op.cc +++ /dev/null @@ -1,54 +0,0 @@ -/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/reduce_ops/mkldnn/reduce_mkldnn_op.h" - -namespace paddle { -namespace operators { - -template -class ReduceSumMKLDNNKernel : public ReduceMKLDNNKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - this->RunKernel(ctx, dnnl::algorithm::reduction_sum); - } -}; - -template -class ReduceSumGradMKLDNNKernel : public ReduceGradMKLDNNKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - this->RunKernel(ctx, - dnnl::algorithm::binary_add, - dnnl::algorithm::reduction_sum, - 0.0f, - 1.0f); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OP_KERNEL(reduce_sum, - MKLDNN, - paddle::platform::CPUPlace, - ops::ReduceSumMKLDNNKernel, - ops::ReduceSumMKLDNNKernel); - -REGISTER_OP_KERNEL(reduce_sum_grad, - MKLDNN, - paddle::platform::CPUPlace, - ops::ReduceSumGradMKLDNNKernel, - ops::ReduceSumGradMKLDNNKernel); diff --git a/paddle/phi/kernels/onednn/reduce_kernel_impl.h b/paddle/phi/kernels/onednn/reduce_kernel_impl.h new file mode 100644 index 0000000000..a3c2f149a9 --- /dev/null +++ b/paddle/phi/kernels/onednn/reduce_kernel_impl.h @@ -0,0 +1,150 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include "paddle/phi/backends/onednn/onednn_reuse.h" + +namespace phi { + +inline std::vector CalculateReducedDims( + const DenseTensor* input, + const DenseTensor* output, + const std::vector& reduce_dims, // NOLINT + bool reduce_all, + bool keep_dim) { + if (keep_dim) return vectorize(output->dims()); + + if (reduce_all && reduce_dims.size() > 0) + return std::vector(input->dims().size(), 1); + + std::vector output_dims(vectorize(input->dims())); + for (size_t i = 0; i < reduce_dims.size(); ++i) { + // handle negative dims, f.e. "-1" means rightmost dimension + int index = (reduce_dims[i] >= 0) ? reduce_dims[i] + : input->dims().size() + reduce_dims[i]; + output_dims[index] = 1; + } + + return output_dims; +} + +template +void ReduceKernel(const Context& dev_ctx, + const DenseTensor& x, + const IntArray& dims, + bool keep_dim, + bool reduce_all, + DenseTensor* out, + dnnl::algorithm reduction_type) { + const auto& onednn_engine = dev_ctx.GetEngine(); + auto x_tz = vectorize(x.dims()); + auto out_tz = + CalculateReducedDims(&x, out, dims.GetData(), reduce_all, keep_dim); + + auto& astream = OneDNNContext::tls().get_stream(); + + // oneDNN reduce op does not support edge case in which memory is being + // copied without actual reduction. + // In that case reorder must be executed to maintain compatibility with + // PaddlePaddle reduce op + if (x_tz == out_tz) { + dnnl::memory::data_type x_type = funcs::ToOneDNNDataType((x.dtype())); + + funcs::ReorderOneDNNHandler reorder_handler( + x_tz, x.dtype(), x_type, onednn_engine); + + auto reorder_src_memory_p = reorder_handler.AcquireSrcMemory( + x.mem_desc(), funcs::to_void_cast(x.data())); + + // reuse mem desc since it is a simple copy + auto reorder_dst_memory_p = + reorder_handler.AcquireDstMemory(out, x.mem_desc(), dev_ctx.GetPlace()); + + auto reorder_p = reorder_handler.AcquireReorder(reorder_src_memory_p, + reorder_dst_memory_p); + + reorder_p->execute(astream, *reorder_src_memory_p, *reorder_dst_memory_p); + astream.wait(); + + out->set_mem_desc(reorder_dst_memory_p->get_desc().reshape( + vectorize(out->dims()))); + } else { + funcs::ReductionOneDNNHandler handler(reduction_type, + 0.0f, + 0.0f, + onednn_engine, + dev_ctx.GetPlace(), + &x, + out, + out_tz); + + auto src_memory_p = handler.AcquireSrcMemory(&x); + auto dst_memory_p = handler.AcquireDstMemory(out); + + std::unordered_map reduction_args = { + {DNNL_ARG_SRC, *src_memory_p}, {DNNL_ARG_DST, *dst_memory_p}}; + + auto reduction_p = handler.AcquireForwardPrimitive(); + + reduction_p->execute(astream, reduction_args); + astream.wait(); + + out->set_mem_desc( + dst_memory_p->get_desc().reshape(vectorize(out->dims()))); + } +} + +template +void ReduceGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& out_grad, + const IntArray& dims, + bool keep_dim, + bool reduce_all, + DenseTensor* x_grad, + dnnl::algorithm binary_type, + dnnl::algorithm reduction_type, + float scale_x, + float scale_y) { + const auto& onednn_engine = dev_ctx.GetEngine(); + auto out_grad_tz = CalculateReducedDims( + x_grad, &out_grad, dims.GetData(), reduce_all, keep_dim); + auto x_grad_tz = vectorize(x_grad->dims()); + + funcs::BroadcastDataOneDNNHandler handler(binary_type, + onednn_engine, + dev_ctx.GetPlace(), + &out_grad, + x_grad, + scale_x, + scale_y, + out_grad_tz); + + const auto src_memory_p = handler.AcquireSrcMemory(&out_grad); + const auto dst_memory_p = handler.AcquireZeroedDstMemory(x_grad); + const auto binary_prim = handler.AcquireForwardPrimitive(); + + const std::unordered_map args = { + {DNNL_ARG_SRC_0, *dst_memory_p}, + {DNNL_ARG_SRC_1, *src_memory_p}, + {DNNL_ARG_DST, *dst_memory_p}}; + + auto& astream = OneDNNContext::tls().get_stream(); + binary_prim->execute(astream, args); + astream.wait(); + + x_grad->set_mem_desc(dst_memory_p->get_desc()); +} + +} // namespace phi diff --git a/paddle/phi/kernels/onednn/reduce_max_kernel.cc b/paddle/phi/kernels/onednn/reduce_max_kernel.cc new file mode 100644 index 0000000000..55fe45554e --- /dev/null +++ b/paddle/phi/kernels/onednn/reduce_max_kernel.cc @@ -0,0 +1,42 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/phi/kernels/reduce_max_kernel.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/onednn/reduce_kernel_impl.h" + +namespace phi { +template +void MaxRawKernel(const Context& dev_ctx, + const DenseTensor& x, + const IntArray& dims, + bool keep_dim, + bool reduce_all, + DenseTensor* out) { + ReduceKernel(dev_ctx, + x, + dims, + keep_dim, + reduce_all, + out, + dnnl::algorithm::reduction_max); +} +} // namespace phi + +PD_REGISTER_KERNEL(max_raw, + OneDNN, + ALL_LAYOUT, + phi::MaxRawKernel, + float, + phi::dtype::bfloat16) {} diff --git a/paddle/phi/kernels/onednn/reduce_mean_grad_kernel.cc b/paddle/phi/kernels/onednn/reduce_mean_grad_kernel.cc new file mode 100644 index 0000000000..bccfce5864 --- /dev/null +++ b/paddle/phi/kernels/onednn/reduce_mean_grad_kernel.cc @@ -0,0 +1,61 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/phi/kernels/reduce_mean_grad_kernel.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/onednn/reduce_kernel_impl.h" + +namespace phi { +template +void MeanGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& out_grad, + const IntArray& dims, + bool keep_dim, + bool reduce_all, + DenseTensor* x_grad) { + auto input_dims = phi::vectorize(x.dims()); + std::vector reduce_dims = dims.GetData(); + int number_of_elements = 1; + if (reduce_all == false) { + for (size_t i = 0; i < dims.size(); ++i) { + reduce_dims[i] = (reduce_dims[i] >= 0) + ? reduce_dims[i] + : input_dims.size() + reduce_dims[i]; + number_of_elements *= input_dims[reduce_dims[i]]; + } + } else { + number_of_elements = x.numel(); + } + const IntArray new_dims = IntArray(reduce_dims); + ReduceGradKernel(dev_ctx, + x, + out_grad, + new_dims, + keep_dim, + reduce_all, + x_grad, + dnnl::algorithm::binary_add, + dnnl::algorithm::reduction_mean, + 0.0f, + 1.0L / number_of_elements); +} +} // namespace phi + +PD_REGISTER_KERNEL(mean_grad, + OneDNN, + ALL_LAYOUT, + phi::MeanGradKernel, + float, + phi::dtype::bfloat16) {} diff --git a/paddle/phi/kernels/onednn/reduce_mean_kernel.cc b/paddle/phi/kernels/onednn/reduce_mean_kernel.cc new file mode 100644 index 0000000000..cd5e1781db --- /dev/null +++ b/paddle/phi/kernels/onednn/reduce_mean_kernel.cc @@ -0,0 +1,42 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/phi/kernels/reduce_mean_kernel.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/onednn/reduce_kernel_impl.h" + +namespace phi { +template +void MeanRawKernel(const Context& dev_ctx, + const DenseTensor& x, + const IntArray& dims, + bool keep_dim, + bool reduce_all, + DenseTensor* out) { + ReduceKernel(dev_ctx, + x, + dims, + keep_dim, + reduce_all, + out, + dnnl::algorithm::reduction_mean); +} +} // namespace phi + +PD_REGISTER_KERNEL(mean_raw, + OneDNN, + ALL_LAYOUT, + phi::MeanRawKernel, + float, + phi::dtype::bfloat16) {} diff --git a/paddle/phi/kernels/onednn/reduce_min_kernel.cc b/paddle/phi/kernels/onednn/reduce_min_kernel.cc new file mode 100644 index 0000000000..118a5cc642 --- /dev/null +++ b/paddle/phi/kernels/onednn/reduce_min_kernel.cc @@ -0,0 +1,43 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/phi/kernels/reduce_min_kernel.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/onednn/reduce_kernel_impl.h" + +namespace phi { +template +void MinRawKernel(const Context& dev_ctx, + const DenseTensor& x, + const IntArray& dims, + bool keep_dim, + bool reduce_all, + DenseTensor* out) { + ReduceKernel(dev_ctx, + x, + dims, + keep_dim, + reduce_all, + out, + dnnl::algorithm::reduction_min); +} + +} // namespace phi + +PD_REGISTER_KERNEL(min_raw, + OneDNN, + ALL_LAYOUT, + phi::MinRawKernel, + float, + phi::dtype::bfloat16) {} diff --git a/paddle/phi/kernels/onednn/reduce_sum_grad_kernel.cc b/paddle/phi/kernels/onednn/reduce_sum_grad_kernel.cc new file mode 100644 index 0000000000..26d5aea1a9 --- /dev/null +++ b/paddle/phi/kernels/onednn/reduce_sum_grad_kernel.cc @@ -0,0 +1,47 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/phi/kernels/reduce_sum_grad_kernel.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/onednn/reduce_kernel_impl.h" + +namespace phi { +template +void SumGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& out_grad, + const IntArray& dims, + bool keep_dim, + bool reduce_all, + DenseTensor* x_grad) { + ReduceGradKernel(dev_ctx, + x, + out_grad, + dims, + keep_dim, + reduce_all, + x_grad, + dnnl::algorithm::binary_add, + dnnl::algorithm::reduction_sum, + 0.0f, + 1.0f); +} +} // namespace phi + +PD_REGISTER_KERNEL(sum_grad, + OneDNN, + ALL_LAYOUT, + phi::SumGradKernel, + float, + phi::dtype::bfloat16) {} diff --git a/paddle/phi/kernels/onednn/reduce_sum_kernel.cc b/paddle/phi/kernels/onednn/reduce_sum_kernel.cc new file mode 100644 index 0000000000..18c714e7d9 --- /dev/null +++ b/paddle/phi/kernels/onednn/reduce_sum_kernel.cc @@ -0,0 +1,43 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/phi/kernels/reduce_sum_kernel.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/onednn/reduce_kernel_impl.h" + +namespace phi { +template +void SumRawKernel(const Context& dev_ctx, + const DenseTensor& x, + const IntArray& dims, + bool keep_dim, + bool reduce_all, + DataType out_dtype, + DenseTensor* out) { + ReduceKernel(dev_ctx, + x, + dims, + keep_dim, + reduce_all, + out, + dnnl::algorithm::reduction_sum); +} +} // namespace phi + +PD_REGISTER_KERNEL(sum_raw, + OneDNN, + ALL_LAYOUT, + phi::SumRawKernel, + float, + phi::dtype::bfloat16) {} diff --git a/paddle/phi/kernels/reduce_max_kernel.cc b/paddle/phi/kernels/reduce_max_kernel.cc index cd38f7cbce..7a6e53d765 100644 --- a/paddle/phi/kernels/reduce_max_kernel.cc +++ b/paddle/phi/kernels/reduce_max_kernel.cc @@ -45,3 +45,8 @@ PD_REGISTER_KERNEL( #if defined(PADDLE_WITH_XPU_KP) PD_REGISTER_KERNEL(max, KPS, ALL_LAYOUT, phi::MaxKernel, float) {} #endif + +#if defined(PADDLE_WITH_MKLDNN) +PD_REGISTER_KERNEL( + max, OneDNN, ALL_LAYOUT, phi::MaxKernel, float, phi::dtype::bfloat16) {} +#endif diff --git a/paddle/phi/kernels/reduce_mean_kernel.cc b/paddle/phi/kernels/reduce_mean_kernel.cc index 4bb77ac974..df3ec97592 100644 --- a/paddle/phi/kernels/reduce_mean_kernel.cc +++ b/paddle/phi/kernels/reduce_mean_kernel.cc @@ -50,3 +50,8 @@ PD_REGISTER_KERNEL(mean, #if defined(PADDLE_WITH_XPU_KP) PD_REGISTER_KERNEL(mean, KPS, ALL_LAYOUT, phi::MeanKernel, float) {} #endif + +#if defined(PADDLE_WITH_MKLDNN) +PD_REGISTER_KERNEL( + mean, OneDNN, ALL_LAYOUT, phi::MeanKernel, float, phi::dtype::bfloat16) {} +#endif diff --git a/paddle/phi/kernels/reduce_min_kernel.cc b/paddle/phi/kernels/reduce_min_kernel.cc index 4d3041adf4..981c7afa62 100644 --- a/paddle/phi/kernels/reduce_min_kernel.cc +++ b/paddle/phi/kernels/reduce_min_kernel.cc @@ -45,3 +45,8 @@ PD_REGISTER_KERNEL( #if defined(PADDLE_WITH_XPU_KP) PD_REGISTER_KERNEL(min, KPS, ALL_LAYOUT, phi::MinKernel, float) {} #endif + +#if defined(PADDLE_WITH_MKLDNN) +PD_REGISTER_KERNEL( + min, OneDNN, ALL_LAYOUT, phi::MinKernel, float, phi::dtype::bfloat16) {} +#endif diff --git a/paddle/phi/kernels/reduce_sum_kernel.cc b/paddle/phi/kernels/reduce_sum_kernel.cc index 5fed4dbc44..83db2d854b 100644 --- a/paddle/phi/kernels/reduce_sum_kernel.cc +++ b/paddle/phi/kernels/reduce_sum_kernel.cc @@ -78,3 +78,8 @@ PD_REGISTER_KERNEL(sum, KPS, ALL_LAYOUT, phi::SumKernel, float) { kernel->OutputAt(0).SetDataType(paddle::experimental::DataType::UNDEFINED); } #endif + +#if defined(PADDLE_WITH_MKLDNN) +PD_REGISTER_KERNEL( + sum, OneDNN, ALL_LAYOUT, phi::SumKernel, float, phi::dtype::bfloat16) {} +#endif -- GitLab