move conj kernel impl (#38365)

8da9eff4 · Chen Weihang · GitHub · a3e6f18c · 8da9eff4 · 8da9eff4
11 changed file
--- a/paddle/fluid/operators/conj_op.h
+++ b/paddle/fluid/operators/conj_op.h
@@ -20,8 +20,7 @@
 // only can include the headers in paddle/pten/api dirs
 #include "paddle/pten/api/lib/utils/tensor_utils.h"
 #include "paddle/pten/include/core.h"
-#include "paddle/pten/kernels/cpu/conj_kernel.h"
+#include "paddle/pten/kernels/complex_kernel.h"
-#include "paddle/pten/kernels/gpu/conj_kernel.h"
 namespace paddle {
 namespace operators {

--- a/paddle/pten/CMakeLists.txt
+++ b/paddle/pten/CMakeLists.txt
@@ -26,10 +26,10 @@ add_subdirectory(tests)
 set(PTEN_DEPS convert_utils dense_tensor pten_context kernel_factory kernel_context)
 get_property(pten_kernels GLOBAL PROPERTY PTEN_KERNELS)
 set(PTEN_DEPS ${PTEN_DEPS} ${pten_kernels})
-set(PTEN_DEPS ${PTEN_DEPS} math_cpu linalg_cpu manipulation_cpu conj_kernel_cpu)
+set(PTEN_DEPS ${PTEN_DEPS} math_cpu linalg_cpu manipulation_cpu)
 set(PTEN_DEPS ${PTEN_DEPS} nary unary binary)
 if(WITH_GPU OR WITH_ROCM)
-  set(PTEN_DEPS ${PTEN_DEPS} math_gpu linalg_gpu manipulation_gpu conj_kernel_gpu)
+  set(PTEN_DEPS ${PTEN_DEPS} math_gpu linalg_gpu manipulation_gpu)
 endif()
 if(WITH_XPU)
  set(PTEN_DEPS ${PTEN_DEPS} manipulation_xpu)

--- a/paddle/pten/api/lib/kernel_declare.h
+++ b/paddle/pten/api/lib/kernel_declare.h
@@ -23,13 +23,11 @@ limitations under the License. */
 PT_DECLARE_KERNEL(matmul, CPU, ALL_LAYOUT);
 PT_DECLARE_KERNEL(cast, CPU, ALL_LAYOUT);
 PT_DECLARE_KERNEL(mean, CPU, ALL_LAYOUT);
-PT_DECLARE_KERNEL(conj, CPU, ALL_LAYOUT);
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
 PT_DECLARE_KERNEL(matmul, GPU, ALL_LAYOUT);
 PT_DECLARE_KERNEL(cast, GPU, ALL_LAYOUT);
 PT_DECLARE_KERNEL(mean, GPU, ALL_LAYOUT);
-PT_DECLARE_KERNEL(conj, GPU, ALL_LAYOUT);
 #endif
 #ifdef PADDLE_WITH_XPU

--- a/paddle/pten/include/math.h
+++ b/paddle/pten/include/math.h
@@ -17,9 +17,8 @@ limitations under the License. */
 // See Note: [ How do we organize the kernel directory ]
 #include "paddle/pten/api/lib/utils/storage.h"
 #include "paddle/pten/include/infermeta.h"
-#include "paddle/pten/kernels/cpu/conj_kernel.h"
+#include "paddle/pten/kernels/complex_kernel.h"
 #include "paddle/pten/kernels/cpu/math.h"
-#include "paddle/pten/kernels/gpu/conj_kernel.h"
 #include "paddle/pten/kernels/gpu/math.h"
 #include "paddle/pten/kernels/scale_kernel.h"

--- a/paddle/pten/kernels/cpu/conj_kernel.h
+++ b/paddle/pten/kernels/cpu/conj_kernel.h
@@ -14,12 +14,11 @@ limitations under the License. */
 #pragma once
-#include "paddle/pten/backends/cpu/cpu_context.h"
 #include "paddle/pten/core/dense_tensor.h"
 namespace pten {
-template <typename T>
+template <typename T, typename ContextT>
-void Conj(const CPUContext& dev_ctx, const DenseTensor& x, DenseTensor* out);
+void Conj(const ContextT& dev_ctx, const DenseTensor& x, DenseTensor* out);
 }  // namespace pten
--- a/paddle/pten/kernels/cpu/CMakeLists.txt
+++ b/paddle/pten/kernels/cpu/CMakeLists.txt
@@ -2,4 +2,3 @@ cc_library(math_cpu SRCS math.cc DEPS dense_tensor kernel_context kernel_factory
 cc_library(linalg_cpu SRCS linalg.cc DEPS dense_tensor kernel_context kernel_factory)
 cc_library(utils_cpu SRCS utils.cc DEPS dense_tensor kernel_context kernel_factory memory convert_utils)
 cc_library(manipulation_cpu SRCS manipulation.cc DEPS dense_tensor kernel_context kernel_factory utils_cpu unary)
-cc_library(conj_kernel_cpu SRCS conj_kernel.cc DEPS dense_tensor kernel_context kernel_factory)
--- a/paddle/pten/kernels/cpu/conj_kernel.cc
+++ b/paddle/pten/kernels/cpu/conj_kernel.cc
@@ -12,22 +12,16 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
-#include "paddle/pten/kernels/cpu/conj_kernel.h"
+#include "paddle/pten/kernels/complex_kernel.h"
+#include "paddle/pten/kernels/impl/complex_kernel_impl.h"
 #include "paddle/pten/backends/cpu/cpu_context.h"
 #include "paddle/pten/core/kernel_registry.h"
-#include "paddle/pten/kernels/hybird/math/conj_impl.h"
-namespace pten {
+// See Note [ Why still include the fluid headers? ]
+#include "paddle/fluid/platform/complex.h"
-template <typename T>
+PT_REGISTER_CTX_KERNEL(conj,
-void Conj(const CPUContext& dev_ctx, const DenseTensor& x, DenseTensor* out) {
-  ConjImpl<T, CPUContext>(dev_ctx, x, out);
-}
-}  // namespace pten
-PT_REGISTER_KERNEL(conj,
                       CPU,
                       ALL_LAYOUT,
                       pten::Conj,

--- a/paddle/pten/kernels/gpu/CMakeLists.txt
+++ b/paddle/pten/kernels/gpu/CMakeLists.txt
@@ -3,11 +3,9 @@ if(WITH_GPU)
  nv_library(linalg_gpu SRCS linalg.cu DEPS eigen_function dense_tensor kernel_context kernel_factory)
  nv_library(utils_gpu SRCS utils.cu DEPS dense_tensor kernel_context kernel_factory memory convert_utils)
  nv_library(manipulation_gpu SRCS manipulation.cu DEPS dense_tensor kernel_context kernel_factory utils_gpu unary)
-  nv_library(conj_kernel_gpu SRCS conj_kernel.cu DEPS dense_tensor kernel_context kernel_factory)
 elseif(WITH_ROCM)
  hip_library(math_gpu SRCS math.cu DEPS eigen_function dense_tensor convert_utils kernel_context kernel_factory pten_transpose_gpu)
  hip_library(linalg_gpu SRCS linalg.cu DEPS eigen_function dense_tensor kernel_context kernel_factory)
  hip_library(utils_gpu SRCS utils.cu DEPS dense_tensor kernel_context kernel_factory memory convert_utils)
  hip_library(manipulation_gpu SRCS manipulation.cu DEPS dense_tensor kernel_context kernel_factory utils_gpu unary)
-  hip_library(conj_kernel_gpu SRCS conj_kernel.cu DEPS dense_tensor kernel_context kernel_factory)
 endif()
--- a/paddle/pten/kernels/gpu/conj_kernel.cu
+++ b/paddle/pten/kernels/gpu/conj_kernel.cu
@@ -12,22 +12,16 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
-#include "paddle/pten/kernels/gpu/conj_kernel.h"
+#include "paddle/pten/kernels/complex_kernel.h"
+#include "paddle/pten/kernels/impl/complex_kernel_impl.h"
 #include "paddle/pten/backends/gpu/gpu_context.h"
 #include "paddle/pten/core/kernel_registry.h"
-#include "paddle/pten/kernels/hybird/math/conj_impl.h"
-namespace pten {
+// See Note [ Why still include the fluid headers? ]
+#include "paddle/fluid/platform/complex.h"
-template <typename T>
+PT_REGISTER_CTX_KERNEL(conj,
-void Conj(const GPUContext& dev_ctx, const DenseTensor& x, DenseTensor* out) {
-  ConjImpl<T, GPUContext>(dev_ctx, x, out);
-}
-}  // namespace pten
-PT_REGISTER_KERNEL(conj,
                       GPU,
                       ALL_LAYOUT,
                       pten::Conj,

--- a/paddle/pten/kernels/gpu/conj_kernel.h
+++ b/paddle/pten/kernels/gpu/conj_kernel.h
-/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-    http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-#pragma once
-// CUDA and HIP use same api
-#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
-#include "paddle/pten/backends/gpu/gpu_context.h"
-#include "paddle/pten/core/dense_tensor.h"
-namespace pten {
-template <typename T>
-void Conj(const GPUContext& dev_ctx, const DenseTensor& x, DenseTensor* out);
-}  // namespace pten
-#endif
--- a/paddle/pten/kernels/hybird/math/conj_impl.h
+++ b/paddle/pten/kernels/hybird/math/conj_impl.h
@@ -14,14 +14,14 @@
 #pragma once
+// See Note [ Why still include the fluid headers? ]
 #include "paddle/fluid/operators/math/complex_functors.h"
-#include "paddle/fluid/platform/complex.h"
 #include "paddle/fluid/platform/for_range.h"
 namespace pten {
 template <typename T, typename ContextT>
-void ConjImpl(const ContextT& dev_ctx, const DenseTensor& x, DenseTensor* out) {
+void Conj(const ContextT& dev_ctx, const DenseTensor& x, DenseTensor* out) {
  auto numel = x.numel();
  auto* x_data = x.data<T>();
  auto* out_data = out->mutable_data<T>();