Move matmul_v2 kernel of xpu from fluid to phi (#45446)

* move matmul_v2 kernel of xpu from fluid to phi, test=kunlun * fix complie bug, test=kunlun * fix complie bug, test=kunlun * fix complie bug, test=kunlun

Move matmul_v2 kernel of xpu from fluid to phi (#45446)
* move matmul_v2 kernel of xpu from fluid to phi, test=kunlun * fix complie bug, test=kunlun * fix complie bug, test=kunlun * fix complie bug, test=kunlun
de436f07 · zyfncg · GitHub · d3ec3fe3 · de436f07 · d3ec3fe3
7 changed file
--- a/paddle/fluid/operators/matmul_op_xpu.cc
+++ b/paddle/fluid/operators/matmul_op_xpu.cc
@@ -44,13 +44,14 @@ class MatMulXPUKernel : public framework::OpKernel<T> {
    auto x_dims = x->dims();
    auto y_dims = y->dims();

-    XpuFcInfo fc_info;
-    GetFCInfo(x_dims, y_dims, trans_x, trans_y, &fc_info);
+    phi::XpuFcInfo fc_info;
+    phi::GetFCInfo(x_dims, y_dims, trans_x, trans_y, &fc_info);
    auto& dev_ctx =
        context.template device_context<paddle::platform::XPUDeviceContext>();
    xpu::Context* xpu_ctx = dev_ctx.x_context();

-    MatMulXPUFunction<XPUType>(xpu_ctx, x_ptr, y_ptr, out_ptr, fc_info, alpha);
+    phi::MatMulXPUFunction<XPUType>(
+        xpu_ctx, x_ptr, y_ptr, out_ptr, fc_info, alpha);
  }
 };

@@ -109,8 +110,8 @@ class MatMulGradXPUKernel : public framework::OpKernel<T> {

    xpu::Context* xpu_ctx = dev_ctx.x_context();

-    XpuFcInfo info_forward;
-    GetFCInfo(x.dims(), y.dims(), transpose_x, transpose_y, &info_forward);
+    phi::XpuFcInfo info_forward;
+    phi::GetFCInfo(x.dims(), y.dims(), transpose_x, transpose_y, &info_forward);
    xpu::ctx_guard RAII_GUARD(xpu_ctx);
    // begin calculate
    const XPUType* a_1 = reinterpret_cast<const XPUType*>(NULL);
@@ -121,28 +122,28 @@ class MatMulGradXPUKernel : public framework::OpKernel<T> {
                                : reinterpret_cast<XPUType*>(dx->data<T>());
    XPUType* c_2 = (dy == NULL) ? reinterpret_cast<XPUType*>(NULL)
                                : reinterpret_cast<XPUType*>(dy->data<T>());
-    XpuFcInfo info_dx;
-    XpuFcInfo info_dy;
-    std::tuple<XpuFcInfo,
-               XpuFcInfo,
+    phi::XpuFcInfo info_dx;
+    phi::XpuFcInfo info_dy;
+    std::tuple<phi::XpuFcInfo,
+               phi::XpuFcInfo,
               const XPUType*,
               const XPUType*,
               const XPUType*,
               const XPUType*>
-        fc_info = MatmulGradFcInfo(xpu_ctx,
-                                   &RAII_GUARD,
-                                   info_forward,
-                                   transpose_x,
-                                   transpose_y,
-                                   x_ptr,
-                                   y_ptr,
-                                   dout_ptr);
+        fc_info = phi::MatmulGradFcInfo(xpu_ctx,
+                                        &RAII_GUARD,
+                                        info_forward,
+                                        transpose_x,
+                                        transpose_y,
+                                        x_ptr,
+                                        y_ptr,
+                                        dout_ptr);
    std::tie(info_dx, info_dy, a_1, b_1, a_2, b_2) = fc_info;
    if (dx) {
-      MatMulXPUFunction<XPUType>(xpu_ctx, a_1, b_1, c_1, info_dx, alpha);
+      phi::MatMulXPUFunction<XPUType>(xpu_ctx, a_1, b_1, c_1, info_dx, alpha);
    }
    if (dy) {
-      MatMulXPUFunction<XPUType>(xpu_ctx, a_2, b_2, c_2, info_dy, alpha);
+      phi::MatMulXPUFunction<XPUType>(xpu_ctx, a_2, b_2, c_2, info_dy, alpha);
    }
  }
 };

--- a/paddle/fluid/operators/matmul_v2_op_xpu.cc
+++ b/paddle/fluid/operators/matmul_v2_op_xpu.cc
-//   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifdef PADDLE_WITH_XPU
-
-#include <string>
-#include <vector>
-#include "paddle/fluid/operators/matmul_v2_op.h"
-
-#include "paddle/fluid/operators/xpu_api_wrapper.h"
-
-namespace paddle {
-namespace operators {
-
-template <typename T>
-class MatMulV2XPUKernel : public framework::OpKernel<T> {
-  using XPUType = typename XPUTypeTrait<T>::Type;
-
- public:
-  void Compute(const paddle::framework::ExecutionContext& ctx) const override {
-    auto* x = ctx.Input<Tensor>("X");
-    auto* y = ctx.Input<Tensor>("Y");
-    auto* out = ctx.Output<Tensor>("Out");
-    bool trans_x = ctx.Attr<bool>("trans_x");
-    bool trans_y = ctx.Attr<bool>("trans_y");
-    out->mutable_data<T>(ctx.GetPlace());
-    const XPUType* x_ptr = reinterpret_cast<const XPUType*>(x->data<T>());
-    const XPUType* y_ptr = reinterpret_cast<const XPUType*>(y->data<T>());
-    XPUType* out_ptr = reinterpret_cast<XPUType*>(out->data<T>());
-    auto x_dims = x->dims();
-    auto y_dims = y->dims();
-
-    XpuFcInfo fc_info;
-    GetFCInfo(x_dims, y_dims, trans_x, trans_y, &fc_info);
-    auto& dev_ctx =
-        ctx.template device_context<paddle::platform::XPUDeviceContext>();
-    xpu::Context* xpu_ctx = dev_ctx.x_context();
-    MatMulXPUFunction<XPUType>(xpu_ctx, x_ptr, y_ptr, out_ptr, fc_info, 1.0f);
-  }
-};
-
-template <typename T>
-class MatMulV2XPUGradKernel : public framework::OpKernel<T> {
-  using XPUType = typename XPUTypeTrait<T>::Type;
-
- public:
-  void Compute(const framework::ExecutionContext& context) const override {
-    bool transpose_x = context.Attr<bool>("trans_x");
-    bool transpose_y = context.Attr<bool>("trans_y");
-    auto x = *context.Input<framework::Tensor>("X");
-    auto y = *context.Input<framework::Tensor>("Y");
-    auto dout =
-        *context.Input<framework::Tensor>(framework::GradVarName("Out"));
-    auto* dx = context.Output<framework::Tensor>(framework::GradVarName("X"));
-    auto* dy = context.Output<framework::Tensor>(framework::GradVarName("Y"));
-    if (dx) {
-      dx->mutable_data<T>(context.GetPlace());
-    }
-    if (dy) {
-      dy->mutable_data<T>(context.GetPlace());
-    }
-    auto& dev_ctx =
-        context.template device_context<paddle::platform::XPUDeviceContext>();
-
-    const XPUType* dout_ptr = reinterpret_cast<const XPUType*>(dout.data<T>());
-    const XPUType* x_ptr = reinterpret_cast<const XPUType*>(x.data<T>());
-    const XPUType* y_ptr = reinterpret_cast<const XPUType*>(y.data<T>());
-
-    xpu::Context* xpu_ctx = dev_ctx.x_context();
-
-    XpuFcInfo info_forward;
-    GetFCInfo(x.dims(), y.dims(), transpose_x, transpose_y, &info_forward);
-    xpu::ctx_guard RAII_GUARD(xpu_ctx);
-    // begin calculate
-    const XPUType* a_1 = reinterpret_cast<const XPUType*>(NULL);
-    const XPUType* b_1 = reinterpret_cast<const XPUType*>(NULL);
-    const XPUType* a_2 = reinterpret_cast<const XPUType*>(NULL);
-    const XPUType* b_2 = reinterpret_cast<const XPUType*>(NULL);
-    XPUType* c_1 = (dx == NULL) ? reinterpret_cast<XPUType*>(NULL)
-                                : reinterpret_cast<XPUType*>(dx->data<T>());
-    XPUType* c_2 = (dy == NULL) ? reinterpret_cast<XPUType*>(NULL)
-                                : reinterpret_cast<XPUType*>(dy->data<T>());
-    XpuFcInfo info_dx;
-    XpuFcInfo info_dy;
-    std::tuple<XpuFcInfo,
-               XpuFcInfo,
-               const XPUType*,
-               const XPUType*,
-               const XPUType*,
-               const XPUType*>
-        fc_info = MatmulGradFcInfo(xpu_ctx,
-                                   &RAII_GUARD,
-                                   info_forward,
-                                   transpose_x,
-                                   transpose_y,
-                                   x_ptr,
-                                   y_ptr,
-                                   dout_ptr);
-    std::tie(info_dx, info_dy, a_1, b_1, a_2, b_2) = fc_info;
-    if (dx) {
-      MatMulXPUFunction<XPUType>(xpu_ctx, a_1, b_1, c_1, info_dx, 1.0f);
-    }
-    if (dy) {
-      MatMulXPUFunction<XPUType>(xpu_ctx, a_2, b_2, c_2, info_dy, 1.0f);
-    }
-  }
-};
-
-}  // namespace operators
-}  // namespace paddle
-
-namespace ops = paddle::operators;
-namespace plat = paddle::platform;
-REGISTER_OP_XPU_KERNEL(matmul_v2,
-                       ops::MatMulV2XPUKernel<float>,
-                       ops::MatMulV2XPUKernel<plat::float16>);
-REGISTER_OP_XPU_KERNEL(matmul_v2_grad,
-                       ops::MatMulV2XPUGradKernel<float>,
-                       ops::MatMulV2XPUGradKernel<plat::float16>);
-
-#endif
--- a/paddle/fluid/operators/mul_op_xpu.cc
+++ b/paddle/fluid/operators/mul_op_xpu.cc
@@ -59,13 +59,14 @@ class MulXPUKernel : public framework::OpKernel<T> {
    auto x_dims = x_matrix.dims();
    auto y_dims = y_matrix.dims();

-    XpuFcInfo fc_info;
-    GetFCInfo(x_dims, y_dims, trans_a, trans_b, &fc_info);
+    phi::XpuFcInfo fc_info;
+    phi::GetFCInfo(x_dims, y_dims, trans_a, trans_b, &fc_info);
    auto& dev_ctx =
        context.template device_context<paddle::platform::XPUDeviceContext>();
    xpu::Context* xpu_ctx = dev_ctx.x_context();

-    MatMulXPUFunction<XPUType>(xpu_ctx, x_ptr, y_ptr, out_ptr, fc_info, 1.0f);
+    phi::MatMulXPUFunction<XPUType>(
+        xpu_ctx, x_ptr, y_ptr, out_ptr, fc_info, 1.0f);
  }
 };

@@ -99,8 +100,9 @@ class MulGradXPUKernel : public framework::OpKernel<T> {
    }
    auto& dev_ctx = ctx.template device_context<DeviceContext>();

-    XpuFcInfo info_forward;
-    GetFCInfo(x_matrix.dims(), y_matrix.dims(), false, false, &info_forward);
+    phi::XpuFcInfo info_forward;
+    phi::GetFCInfo(
+        x_matrix.dims(), y_matrix.dims(), false, false, &info_forward);

    const XPUType* dout_ptr = reinterpret_cast<const XPUType*>(dout->data<T>());
    const XPUType* x_ptr = reinterpret_cast<const XPUType*>(x->data<T>());
@@ -121,28 +123,28 @@ class MulGradXPUKernel : public framework::OpKernel<T> {
        (dy == NULL)
            ? reinterpret_cast<XPUType*>(NULL)
            : reinterpret_cast<XPUType*>(dy->mutable_data<T>(ctx.GetPlace()));
-    XpuFcInfo info_dx;
-    XpuFcInfo info_dy;
-    std::tuple<XpuFcInfo,
-               XpuFcInfo,
+    phi::XpuFcInfo info_dx;
+    phi::XpuFcInfo info_dy;
+    std::tuple<phi::XpuFcInfo,
+               phi::XpuFcInfo,
               const XPUType*,
               const XPUType*,
               const XPUType*,
               const XPUType*>
-        fc_info = MatmulGradFcInfo(xpu_ctx,
-                                   &RAII_GUARD,
-                                   info_forward,
-                                   false,
-                                   false,
-                                   x_ptr,
-                                   y_ptr,
-                                   dout_ptr);
+        fc_info = phi::MatmulGradFcInfo(xpu_ctx,
+                                        &RAII_GUARD,
+                                        info_forward,
+                                        false,
+                                        false,
+                                        x_ptr,
+                                        y_ptr,
+                                        dout_ptr);
    std::tie(info_dx, info_dy, a_1, b_1, a_2, b_2) = fc_info;
    if (dx) {
-      MatMulXPUFunction<XPUType>(xpu_ctx, a_1, b_1, c_1, info_dx, 1.0f);
+      phi::MatMulXPUFunction<XPUType>(xpu_ctx, a_1, b_1, c_1, info_dx, 1.0f);
    }
    if (dy) {
-      MatMulXPUFunction<XPUType>(xpu_ctx, a_2, b_2, c_2, info_dy, 1.0f);
+      phi::MatMulXPUFunction<XPUType>(xpu_ctx, a_2, b_2, c_2, info_dy, 1.0f);
    }
  }
 };

--- a/paddle/fluid/operators/xpu_api_wrapper.h
+++ b/paddle/fluid/operators/xpu_api_wrapper.h
--- a/paddle/phi/kernels/xpu/matmul_grad_kernel.cc
+++ b/paddle/phi/kernels/xpu/matmul_grad_kernel.cc
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/phi/kernels/matmul_grad_kernel.h"
+#include "paddle/phi/backends/xpu/enforce_xpu.h"
+#include "paddle/phi/backends/xpu/xpu_context.h"
+#include "paddle/phi/core/kernel_registry.h"
+#include "paddle/phi/kernels/xpu/xpu_api_wrapper.h"
+
+namespace phi {
+
+template <typename T, typename Context>
+void MatmulGradKernel(const Context& dev_ctx,
+                      const DenseTensor& x,
+                      const DenseTensor& y,
+                      const DenseTensor& dout,
+                      bool transpose_x,
+                      bool transpose_y,
+                      DenseTensor* dx,
+                      DenseTensor* dy) {
+  using XPUType = typename XPUTypeTrait<T>::Type;
+
+  if (dx) {
+    dev_ctx.template Alloc<T>(dx);
+  }
+  if (dy) {
+    dev_ctx.template Alloc<T>(dy);
+  }
+
+  const XPUType* dout_ptr = reinterpret_cast<const XPUType*>(dout.data<T>());
+  const XPUType* x_ptr = reinterpret_cast<const XPUType*>(x.data<T>());
+  const XPUType* y_ptr = reinterpret_cast<const XPUType*>(y.data<T>());
+
+  xpu::Context* xpu_ctx = dev_ctx.x_context();
+
+  XpuFcInfo info_forward;
+  GetFCInfo(x.dims(), y.dims(), transpose_x, transpose_y, &info_forward);
+  xpu::ctx_guard RAII_GUARD(xpu_ctx);
+  // begin calculate
+  const XPUType* a_1 = reinterpret_cast<const XPUType*>(NULL);
+  const XPUType* b_1 = reinterpret_cast<const XPUType*>(NULL);
+  const XPUType* a_2 = reinterpret_cast<const XPUType*>(NULL);
+  const XPUType* b_2 = reinterpret_cast<const XPUType*>(NULL);
+  XPUType* c_1 = (dx == NULL) ? reinterpret_cast<XPUType*>(NULL)
+                              : reinterpret_cast<XPUType*>(dx->data<T>());
+  XPUType* c_2 = (dy == NULL) ? reinterpret_cast<XPUType*>(NULL)
+                              : reinterpret_cast<XPUType*>(dy->data<T>());
+  XpuFcInfo info_dx;
+  XpuFcInfo info_dy;
+  std::tuple<XpuFcInfo,
+             XpuFcInfo,
+             const XPUType*,
+             const XPUType*,
+             const XPUType*,
+             const XPUType*>
+      fc_info = MatmulGradFcInfo(xpu_ctx,
+                                 &RAII_GUARD,
+                                 info_forward,
+                                 transpose_x,
+                                 transpose_y,
+                                 x_ptr,
+                                 y_ptr,
+                                 dout_ptr);
+  std::tie(info_dx, info_dy, a_1, b_1, a_2, b_2) = fc_info;
+  if (dx) {
+    MatMulXPUFunction<XPUType>(xpu_ctx, a_1, b_1, c_1, info_dx, 1.0f);
+  }
+  if (dy) {
+    MatMulXPUFunction<XPUType>(xpu_ctx, a_2, b_2, c_2, info_dy, 1.0f);
+  }
+}
+
+}  // namespace phi
+
+PD_REGISTER_KERNEL(matmul_grad,
+                   XPU,
+                   ALL_LAYOUT,
+                   phi::MatmulGradKernel,
+                   float,
+                   phi::dtype::float16) {}
--- a/paddle/phi/kernels/xpu/matmul_kernel.cc
+++ b/paddle/phi/kernels/xpu/matmul_kernel.cc
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/phi/kernels/matmul_kernel.h"
+#include "paddle/phi/backends/xpu/enforce_xpu.h"
+#include "paddle/phi/backends/xpu/xpu_context.h"
+#include "paddle/phi/core/kernel_registry.h"
+#include "paddle/phi/kernels/xpu/xpu_api_wrapper.h"
+
+namespace phi {
+
+template <typename T, typename Context>
+void MatmulKernel(const Context& dev_ctx,
+                  const DenseTensor& x,
+                  const DenseTensor& y,
+                  bool transpose_x,
+                  bool transpose_y,
+                  DenseTensor* out) {
+  using XPUType = typename XPUTypeTrait<T>::Type;
+
+  dev_ctx.template Alloc<T>(out);
+  const XPUType* x_ptr = reinterpret_cast<const XPUType*>(x.data<T>());
+  const XPUType* y_ptr = reinterpret_cast<const XPUType*>(y.data<T>());
+  XPUType* out_ptr = reinterpret_cast<XPUType*>(out->data<T>());
+  auto x_dims = x.dims();
+  auto y_dims = y.dims();
+
+  XpuFcInfo fc_info;
+  GetFCInfo(x_dims, y_dims, transpose_x, transpose_y, &fc_info);
+  xpu::Context* xpu_ctx = dev_ctx.x_context();
+  MatMulXPUFunction<XPUType>(xpu_ctx, x_ptr, y_ptr, out_ptr, fc_info, 1.0f);
+}
+
+}  // namespace phi
+
+PD_REGISTER_KERNEL(
+    matmul, XPU, ALL_LAYOUT, phi::MatmulKernel, float, phi::dtype::float16) {}
--- a/paddle/phi/kernels/xpu/xpu_api_wrapper.h
+++ b/paddle/phi/kernels/xpu/xpu_api_wrapper.h