From ee0034575ca022d212acc9aa1d2b6f64153e8d39 Mon Sep 17 00:00:00 2001
From: wz1qqx <55830058+wz1qqx@users.noreply.github.com>
Date: Thu, 10 Aug 2023 22:57:25 -0700
Subject: [PATCH] [XPU]Add flip kernel (#55932)

---
 .../ir/xpu/add_layernorm_xpu_fuse_pass.cc     |  3 +-
 paddle/phi/backends/xpu/xpu2_op_list.cc       |  4 +-
 .../fusion/xpu/add_layernorm_xpu_kernel.cc    |  3 +-
 paddle/phi/kernels/xpu/flip_kernel.cc         | 61 +++++++++++++++++++
 4 files changed, 68 insertions(+), 3 deletions(-)
 create mode 100644 paddle/phi/kernels/xpu/flip_kernel.cc

diff --git a/paddle/fluid/framework/ir/xpu/add_layernorm_xpu_fuse_pass.cc b/paddle/fluid/framework/ir/xpu/add_layernorm_xpu_fuse_pass.cc
index 5e50b762e8c..7a3a826fc71 100644
--- a/paddle/fluid/framework/ir/xpu/add_layernorm_xpu_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/xpu/add_layernorm_xpu_fuse_pass.cc
@@ -91,7 +91,8 @@ AddLayernormXPUPattern::AddLayernormXPUPattern(PDPattern* pattern,
                    ->AsInput();
   auto ele_out = pattern->NewNode(ele_out_repr())
                      ->assert_is_op_output("elementwise_add", "Out")
-                     ->assert_is_op_input("layer_norm", "X");
+                     ->assert_is_op_input("layer_norm", "X")
+                     ->assert_has_n_outputs(1);
   ele_add->LinksFrom({add_x, add_y}).LinksTo({ele_out});
   auto l_norm = pattern->NewNode(l_norm_repr())->assert_is_op("layer_norm");
   auto norm_bias = pattern->NewNode(norm_bias_repr())
diff --git a/paddle/phi/backends/xpu/xpu2_op_list.cc b/paddle/phi/backends/xpu/xpu2_op_list.cc
index bb22e15d43c..f74b5d1edc9 100644
--- a/paddle/phi/backends/xpu/xpu2_op_list.cc
+++ b/paddle/phi/backends/xpu/xpu2_op_list.cc
@@ -24,7 +24,8 @@ XPUOpMap& get_kl2_ops() {
   static XPUOpMap s_xpu2_kernels{
       {"add_act_xpu",
        XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::FLOAT16})},
-      {"add_layernorm_xpu", XPUKernelSet({phi::DataType::FLOAT32})},
+      {"add_layernorm_xpu",
+       XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::FLOAT16})},
       {"abs", XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::FLOAT16})},
       {"abs_grad",
        XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::FLOAT16})},
@@ -371,6 +372,7 @@ XPUOpMap& get_kl2_ops() {
                      phi::DataType::INT32,
                      phi::DataType::INT8,
                      phi::DataType::FLOAT32})},
+      {"flip", XPUKernelSet({phi::DataType::FLOAT32})},
       {"full_batch_size_like",
        XPUKernelSet({phi::DataType::INT64,
                      phi::DataType::INT32,
diff --git a/paddle/phi/kernels/fusion/xpu/add_layernorm_xpu_kernel.cc b/paddle/phi/kernels/fusion/xpu/add_layernorm_xpu_kernel.cc
index 616e81c138c..a3a524d3e88 100644
--- a/paddle/phi/kernels/fusion/xpu/add_layernorm_xpu_kernel.cc
+++ b/paddle/phi/kernels/fusion/xpu/add_layernorm_xpu_kernel.cc
@@ -119,4 +119,5 @@ PD_REGISTER_KERNEL(add_layernorm_xpu,
                    XPU,
                    ALL_LAYOUT,
                    phi::fusion::AddLayernormXPUKernel,
-                   float) {}
+                   float,
+                   phi::dtype::float16) {}
diff --git a/paddle/phi/kernels/xpu/flip_kernel.cc b/paddle/phi/kernels/xpu/flip_kernel.cc
new file mode 100644
index 00000000000..3311fce88bc
--- /dev/null
+++ b/paddle/phi/kernels/xpu/flip_kernel.cc
@@ -0,0 +1,61 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/phi/kernels/flip_kernel.h"
+
+#include "paddle/phi/backends/xpu/enforce_xpu.h"
+#include "paddle/phi/core/kernel_registry.h"
+
+namespace phi {
+
+template <typename T, typename Context>
+void FlipKernel(const Context& dev_ctx,
+                const DenseTensor& x,
+                const std::vector<int>& axis,
+                DenseTensor* out) {
+  using XPUInTDType = typename XPUTypeTrait<T>::Type;
+  int x_rank = x.dims().size();
+  std::vector<int64_t> formated_axis(std::begin(axis), std::end(axis));
+  for (size_t i = 0; i < axis.size(); i++) {
+    if (axis[i] < 0) {
+      formated_axis[i] = static_cast<int64_t>(axis[i] + x_rank);
+    }
+  }
+  dev_ctx.template Alloc<T>(out);
+  if (out->numel() == 0) {
+    return;
+  }
+  if (formated_axis.size() == 0) {
+    phi::Copy<Context>(dev_ctx, x, dev_ctx.GetPlace(), false, out);
+    return;
+  }
+  std::vector<int64_t> x_shape = phi::vectorize(x.dims());
+  auto x_data = reinterpret_cast<const XPUInTDType*>(x.data<T>());
+  auto out_data = reinterpret_cast<XPUInTDType*>(out->data<T>());
+  auto numel = x.numel();
+  if (numel <= 0) {
+    return;
+  }
+  int r = xpu::flip<XPUInTDType>(
+      /* Context* ctx */ dev_ctx.x_context(),
+      /* const T* x */ x_data,
+      /* T* y */ out_data,
+      /* const std::vector<int64_t>& xshape */ x_shape,
+      /* const std::vector<int64_t>& axis */ formated_axis);
+  PADDLE_ENFORCE_XDNN_SUCCESS(r, "flip");
+}
+
+}  // namespace phi
+
+PD_REGISTER_KERNEL(flip, XPU, ALL_LAYOUT, phi::FlipKernel, float) {}
-- 
GitLab