From ee0034575ca022d212acc9aa1d2b6f64153e8d39 Mon Sep 17 00:00:00 2001 From: wz1qqx <55830058+wz1qqx@users.noreply.github.com> Date: Thu, 10 Aug 2023 22:57:25 -0700 Subject: [PATCH] [XPU]Add flip kernel (#55932) --- .../ir/xpu/add_layernorm_xpu_fuse_pass.cc | 3 +- paddle/phi/backends/xpu/xpu2_op_list.cc | 4 +- .../fusion/xpu/add_layernorm_xpu_kernel.cc | 3 +- paddle/phi/kernels/xpu/flip_kernel.cc | 61 +++++++++++++++++++ 4 files changed, 68 insertions(+), 3 deletions(-) create mode 100644 paddle/phi/kernels/xpu/flip_kernel.cc diff --git a/paddle/fluid/framework/ir/xpu/add_layernorm_xpu_fuse_pass.cc b/paddle/fluid/framework/ir/xpu/add_layernorm_xpu_fuse_pass.cc index 5e50b762e8c..7a3a826fc71 100644 --- a/paddle/fluid/framework/ir/xpu/add_layernorm_xpu_fuse_pass.cc +++ b/paddle/fluid/framework/ir/xpu/add_layernorm_xpu_fuse_pass.cc @@ -91,7 +91,8 @@ AddLayernormXPUPattern::AddLayernormXPUPattern(PDPattern* pattern, ->AsInput(); auto ele_out = pattern->NewNode(ele_out_repr()) ->assert_is_op_output("elementwise_add", "Out") - ->assert_is_op_input("layer_norm", "X"); + ->assert_is_op_input("layer_norm", "X") + ->assert_has_n_outputs(1); ele_add->LinksFrom({add_x, add_y}).LinksTo({ele_out}); auto l_norm = pattern->NewNode(l_norm_repr())->assert_is_op("layer_norm"); auto norm_bias = pattern->NewNode(norm_bias_repr()) diff --git a/paddle/phi/backends/xpu/xpu2_op_list.cc b/paddle/phi/backends/xpu/xpu2_op_list.cc index bb22e15d43c..f74b5d1edc9 100644 --- a/paddle/phi/backends/xpu/xpu2_op_list.cc +++ b/paddle/phi/backends/xpu/xpu2_op_list.cc @@ -24,7 +24,8 @@ XPUOpMap& get_kl2_ops() { static XPUOpMap s_xpu2_kernels{ {"add_act_xpu", XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::FLOAT16})}, - {"add_layernorm_xpu", XPUKernelSet({phi::DataType::FLOAT32})}, + {"add_layernorm_xpu", + XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::FLOAT16})}, {"abs", XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::FLOAT16})}, {"abs_grad", XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::FLOAT16})}, @@ -371,6 +372,7 @@ XPUOpMap& get_kl2_ops() { phi::DataType::INT32, phi::DataType::INT8, phi::DataType::FLOAT32})}, + {"flip", XPUKernelSet({phi::DataType::FLOAT32})}, {"full_batch_size_like", XPUKernelSet({phi::DataType::INT64, phi::DataType::INT32, diff --git a/paddle/phi/kernels/fusion/xpu/add_layernorm_xpu_kernel.cc b/paddle/phi/kernels/fusion/xpu/add_layernorm_xpu_kernel.cc index 616e81c138c..a3a524d3e88 100644 --- a/paddle/phi/kernels/fusion/xpu/add_layernorm_xpu_kernel.cc +++ b/paddle/phi/kernels/fusion/xpu/add_layernorm_xpu_kernel.cc @@ -119,4 +119,5 @@ PD_REGISTER_KERNEL(add_layernorm_xpu, XPU, ALL_LAYOUT, phi::fusion::AddLayernormXPUKernel, - float) {} + float, + phi::dtype::float16) {} diff --git a/paddle/phi/kernels/xpu/flip_kernel.cc b/paddle/phi/kernels/xpu/flip_kernel.cc new file mode 100644 index 00000000000..3311fce88bc --- /dev/null +++ b/paddle/phi/kernels/xpu/flip_kernel.cc @@ -0,0 +1,61 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/kernels/flip_kernel.h" + +#include "paddle/phi/backends/xpu/enforce_xpu.h" +#include "paddle/phi/core/kernel_registry.h" + +namespace phi { + +template +void FlipKernel(const Context& dev_ctx, + const DenseTensor& x, + const std::vector& axis, + DenseTensor* out) { + using XPUInTDType = typename XPUTypeTrait::Type; + int x_rank = x.dims().size(); + std::vector formated_axis(std::begin(axis), std::end(axis)); + for (size_t i = 0; i < axis.size(); i++) { + if (axis[i] < 0) { + formated_axis[i] = static_cast(axis[i] + x_rank); + } + } + dev_ctx.template Alloc(out); + if (out->numel() == 0) { + return; + } + if (formated_axis.size() == 0) { + phi::Copy(dev_ctx, x, dev_ctx.GetPlace(), false, out); + return; + } + std::vector x_shape = phi::vectorize(x.dims()); + auto x_data = reinterpret_cast(x.data()); + auto out_data = reinterpret_cast(out->data()); + auto numel = x.numel(); + if (numel <= 0) { + return; + } + int r = xpu::flip( + /* Context* ctx */ dev_ctx.x_context(), + /* const T* x */ x_data, + /* T* y */ out_data, + /* const std::vector& xshape */ x_shape, + /* const std::vector& axis */ formated_axis); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "flip"); +} + +} // namespace phi + +PD_REGISTER_KERNEL(flip, XPU, ALL_LAYOUT, phi::FlipKernel, float) {} -- GitLab