[PTen] Move dot kernel impl (#38359)

* move dot kernel impl * remove needless cmake items

[PTen] Move dot kernel impl (#38359)
* move dot kernel impl * remove needless cmake items
0a4ffbc7 · Chen Weihang · GitHub · ebbd3564 · 0a4ffbc7 · 0a4ffbc7
9 changed file
--- a/paddle/pten/api/lib/kernel_declare.h
+++ b/paddle/pten/api/lib/kernel_declare.h
@@ -20,13 +20,13 @@ limitations under the License. */
 // the kernel declare statement is automatically generated according to the
 // file name of the kernel, and this header file will be removed

-PT_DECLARE_KERNEL(dot, CPU, ALL_LAYOUT);
+PT_DECLARE_KERNEL(matmul, CPU, ALL_LAYOUT);
 PT_DECLARE_KERNEL(cast, CPU, ALL_LAYOUT);
 PT_DECLARE_KERNEL(sign, CPU, ALL_LAYOUT);
 PT_DECLARE_KERNEL(conj, CPU, ALL_LAYOUT);

 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
-PT_DECLARE_KERNEL(dot, GPU, ALL_LAYOUT);
+PT_DECLARE_KERNEL(matmul, GPU, ALL_LAYOUT);
 PT_DECLARE_KERNEL(cast, GPU, ALL_LAYOUT);
 PT_DECLARE_KERNEL(sign, GPU, ALL_LAYOUT);
 PT_DECLARE_KERNEL(conj, GPU, ALL_LAYOUT);

--- a/paddle/pten/include/linalg.h
+++ b/paddle/pten/include/linalg.h
@@ -18,6 +18,7 @@
 #include "paddle/pten/api/lib/utils/storage.h"
 #include "paddle/pten/include/infermeta.h"
 #include "paddle/pten/kernels/cpu/linalg.h"
+#include "paddle/pten/kernels/dot_kernel.h"
 #include "paddle/pten/kernels/gpu/linalg.h"

 namespace pten {
@@ -31,7 +32,7 @@ DenseTensor Dot(const ContextT& dev_ctx,
      pten::make_intrusive<paddle::experimental::SharedStorage>(
          dev_ctx.GetPlace()),
      std::move(out_meta));
-  Dot<T>(dev_ctx, x, y, &dense_out);
+  Dot<T, ContextT>(dev_ctx, x, y, &dense_out);
  return dense_out;
 }


--- a/paddle/pten/kernels/cpu/dot_kernel.cc
+++ b/paddle/pten/kernels/cpu/dot_kernel.cc
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/pten/kernels/dot_kernel.h"
+
+#include "paddle/pten/backends/cpu/cpu_context.h"
+#include "paddle/pten/core/kernel_registry.h"
+
+// See Note [ Why still include the fluid headers? ]
+#include "paddle/fluid/platform/complex.h"
+
+namespace pten {
+
+template <typename T, typename ContextT>
+void Dot(const ContextT& dev_ctx,
+         const DenseTensor& x,
+         const DenseTensor& y,
+         DenseTensor* out) {
+  auto const *x_ptr = x.data<T>(), *x_ptr_ = &x_ptr[0];
+  auto const *y_ptr = y.data<T>(), *y_ptr_ = &y_ptr[0];
+  auto* z = out->mutable_data<T>();
+
+  // Loop over the total N elements of both operands while sum-reducing every
+  // B pairs along the way where B is the dimension of the least ordered axis
+  auto&& d = x.dims();
+  auto const N = x.numel();
+  auto const B = d[d.size() - 1];
+
+  for (int j = 0; j < N / B; j++) {
+    T ss = 0;
+    for (int i = 0; i < B; i++) ss += (*x_ptr_++) * (*y_ptr_++);
+    z[j] = ss;
+  }
+}
+
+}  // namespace pten
+
+using complex64 = ::paddle::platform::complex<float>;
+using complex128 = ::paddle::platform::complex<double>;
+
+PT_REGISTER_CTX_KERNEL(dot,
+                       CPU,
+                       ALL_LAYOUT,
+                       pten::Dot,
+                       float,
+                       double,
+                       int,
+                       int64_t,
+                       complex64,
+                       complex128) {}
--- a/paddle/pten/kernels/cpu/linalg.cc
+++ b/paddle/pten/kernels/cpu/linalg.cc
@@ -25,28 +25,6 @@

 namespace pten {

-template <typename T>
-void Dot(const CPUContext& dev_ctx,
-         const DenseTensor& x,
-         const DenseTensor& y,
-         DenseTensor* out) {
-  auto const *x_ptr = x.data<T>(), *x_ptr_ = &x_ptr[0];
-  auto const *y_ptr = y.data<T>(), *y_ptr_ = &y_ptr[0];
-  auto* z = out->mutable_data<T>();
-
-  // Loop over the total N elements of both operands while sum-reducing every
-  // B pairs along the way where B is the dimension of the least ordered axis
-  auto&& d = x.dims();
-  auto const N = x.numel();
-  auto const B = d[d.size() - 1];
-
-  for (int j = 0; j < N / B; j++) {
-    T ss = 0;
-    for (int i = 0; i < B; i++) ss += (*x_ptr_++) * (*y_ptr_++);
-    z[j] = ss;
-  }
-}
-
 template <typename T>
 void Matmul(const CPUContext& dev_ctx,
            const DenseTensor& x,
@@ -73,17 +51,6 @@ void Matmul(const CPUContext& dev_ctx,
 using complex64 = ::paddle::platform::complex<float>;
 using complex128 = ::paddle::platform::complex<double>;

-PT_REGISTER_KERNEL(dot,
-                   CPU,
-                   ALL_LAYOUT,
-                   pten::Dot,
-                   float,
-                   double,
-                   int,
-                   int64_t,
-                   complex64,
-                   complex128) {}
-
 PT_REGISTER_KERNEL(matmul,
                   CPU,
                   ALL_LAYOUT,

--- a/paddle/pten/kernels/cpu/linalg.h
+++ b/paddle/pten/kernels/cpu/linalg.h
@@ -22,12 +22,6 @@

 namespace pten {

-template <typename T>
-void Dot(const CPUContext& dev_ctx,
-         const DenseTensor& x,
-         const DenseTensor& y,
-         DenseTensor* out);
-
 template <typename T>
 void Matmul(const CPUContext& dev_ctx,
            const DenseTensor& x,

--- a/paddle/pten/kernels/dot_kernel.h
+++ b/paddle/pten/kernels/dot_kernel.h
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "paddle/pten/core/dense_tensor.h"
+
+namespace pten {
+
+template <typename T, typename ContextT>
+void Dot(const ContextT& dev_ctx,
+         const DenseTensor& x,
+         const DenseTensor& y,
+         DenseTensor* out);
+
+}  // namespace pten
--- a/paddle/pten/kernels/hybird/eigen/dot.h
+++ b/paddle/pten/kernels/hybird/eigen/dot.h
-/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.

-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
+#include "paddle/pten/kernels/dot_kernel.h"

-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#pragma once
-
-#include "paddle/pten/core/dense_tensor.h"
+#include "paddle/pten/backends/gpu/gpu_context.h"
+#include "paddle/pten/core/kernel_registry.h"
 #include "paddle/pten/kernels/hybird/eigen/common.h"

 // See Note [ Why still include the fluid headers? ]
 #include "paddle/fluid/operators/eigen/eigen_function.h"
+#include "paddle/fluid/platform/complex.h"

 namespace pten {
-namespace eigen {

-template <typename DevCtx, typename T>
-void Dot(const DevCtx& dev_ctx,
+template <typename T, typename ContextT>
+void Dot(const ContextT& dev_ctx,
         const DenseTensor& x,
         const DenseTensor& y,
         DenseTensor* out) {
@@ -46,5 +47,18 @@ void Dot(const DevCtx& dev_ctx,
  }
 }

-}  // namespace eigen
 }  // namespace pten
+
+using complex64 = ::paddle::platform::complex<float>;
+using complex128 = ::paddle::platform::complex<double>;
+
+PT_REGISTER_CTX_KERNEL(dot,
+                       GPU,
+                       ALL_LAYOUT,
+                       pten::Dot,
+                       float,
+                       double,
+                       int,
+                       int64_t,
+                       complex64,
+                       complex128) {}
--- a/paddle/pten/kernels/gpu/linalg.cu
+++ b/paddle/pten/kernels/gpu/linalg.cu
@@ -15,7 +15,6 @@
 #include "paddle/pten/kernels/gpu/linalg.h"

 #include "paddle/pten/core/kernel_registry.h"
-#include "paddle/pten/kernels/hybird/eigen/dot.h"
 #include "paddle/pten/kernels/hybird/math/matmul_func.h"

 // See Note [ Why still include the fluid headers? ]
@@ -23,14 +22,6 @@

 namespace pten {

-template <typename T>
-void Dot(const GPUContext& dev_ctx,
-         const DenseTensor& x,
-         const DenseTensor& y,
-         DenseTensor* out) {
-  eigen::Dot<GPUContext, T>(dev_ctx, x, y, out);
-}
-
 template <typename T>
 void Matmul(const GPUContext& dev_ctx,
            const DenseTensor& x,
@@ -58,17 +49,6 @@ using float16 = paddle::platform::float16;
 using complex64 = ::paddle::platform::complex<float>;
 using complex128 = ::paddle::platform::complex<double>;

-PT_REGISTER_KERNEL(dot,
-                   GPU,
-                   ALL_LAYOUT,
-                   pten::Dot,
-                   float,
-                   double,
-                   int,
-                   int64_t,
-                   complex64,
-                   complex128) {}
-
 PT_REGISTER_KERNEL(matmul,
                   GPU,
                   ALL_LAYOUT,

--- a/paddle/pten/kernels/gpu/linalg.h
+++ b/paddle/pten/kernels/gpu/linalg.h
@@ -22,12 +22,6 @@

 namespace pten {

-template <typename T>
-void Dot(const GPUContext& dev_ctx,
-         const DenseTensor& x,
-         const DenseTensor& y,
-         DenseTensor* out);
-
 template <typename T>
 void Matmul(const GPUContext& dev_ctx,
            const DenseTensor& x,