diff --git a/paddle/fluid/operators/conj_op.h b/paddle/fluid/operators/conj_op.h
index 0b5a35f515ef0cde1f5c7d80003dd342b659918c..1012e9383f60795a1f4923ca75799fe606b19014 100644
--- a/paddle/fluid/operators/conj_op.h
+++ b/paddle/fluid/operators/conj_op.h
@@ -20,8 +20,7 @@
 // only can include the headers in paddle/pten/api dirs
 #include "paddle/pten/api/lib/utils/tensor_utils.h"
 #include "paddle/pten/include/core.h"
-#include "paddle/pten/kernels/cpu/conj_kernel.h"
-#include "paddle/pten/kernels/gpu/conj_kernel.h"
+#include "paddle/pten/kernels/complex_kernel.h"
 
 namespace paddle {
 namespace operators {
diff --git a/paddle/pten/CMakeLists.txt b/paddle/pten/CMakeLists.txt
index b6ea57fdf964bc3318b40d2b9994e191990d44d4..9605a3b0091a25e30b94909ae71da9a39ef579e3 100644
--- a/paddle/pten/CMakeLists.txt
+++ b/paddle/pten/CMakeLists.txt
@@ -26,10 +26,10 @@ add_subdirectory(tests)
 set(PTEN_DEPS convert_utils dense_tensor pten_context kernel_factory kernel_context)
 get_property(pten_kernels GLOBAL PROPERTY PTEN_KERNELS)
 set(PTEN_DEPS ${PTEN_DEPS} ${pten_kernels})
-set(PTEN_DEPS ${PTEN_DEPS} math_cpu linalg_cpu manipulation_cpu conj_kernel_cpu)
+set(PTEN_DEPS ${PTEN_DEPS} math_cpu linalg_cpu manipulation_cpu)
 set(PTEN_DEPS ${PTEN_DEPS} nary unary binary)
 if(WITH_GPU OR WITH_ROCM)
-  set(PTEN_DEPS ${PTEN_DEPS} math_gpu linalg_gpu manipulation_gpu conj_kernel_gpu)
+  set(PTEN_DEPS ${PTEN_DEPS} math_gpu linalg_gpu manipulation_gpu)
 endif()
 if(WITH_XPU)
   set(PTEN_DEPS ${PTEN_DEPS} manipulation_xpu)
diff --git a/paddle/pten/api/lib/kernel_declare.h b/paddle/pten/api/lib/kernel_declare.h
index 975729099999584a46025669ae618dec958d9531..f8874cfb17fef0a2dd2a77c6e1d0c5fd9c682aae 100644
--- a/paddle/pten/api/lib/kernel_declare.h
+++ b/paddle/pten/api/lib/kernel_declare.h
@@ -23,13 +23,11 @@ limitations under the License. */
 PT_DECLARE_KERNEL(matmul, CPU, ALL_LAYOUT);
 PT_DECLARE_KERNEL(cast, CPU, ALL_LAYOUT);
 PT_DECLARE_KERNEL(mean, CPU, ALL_LAYOUT);
-PT_DECLARE_KERNEL(conj, CPU, ALL_LAYOUT);
 
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
 PT_DECLARE_KERNEL(matmul, GPU, ALL_LAYOUT);
 PT_DECLARE_KERNEL(cast, GPU, ALL_LAYOUT);
 PT_DECLARE_KERNEL(mean, GPU, ALL_LAYOUT);
-PT_DECLARE_KERNEL(conj, GPU, ALL_LAYOUT);
 #endif
 
 #ifdef PADDLE_WITH_XPU
diff --git a/paddle/pten/include/math.h b/paddle/pten/include/math.h
index 83471692c8746b855839c68dcf9a957fc9ca700a..876834cea7806daf04e381551e8a4d24a4b74bd6 100644
--- a/paddle/pten/include/math.h
+++ b/paddle/pten/include/math.h
@@ -17,9 +17,8 @@ limitations under the License. */
 // See Note: [ How do we organize the kernel directory ]
 #include "paddle/pten/api/lib/utils/storage.h"
 #include "paddle/pten/include/infermeta.h"
-#include "paddle/pten/kernels/cpu/conj_kernel.h"
+#include "paddle/pten/kernels/complex_kernel.h"
 #include "paddle/pten/kernels/cpu/math.h"
-#include "paddle/pten/kernels/gpu/conj_kernel.h"
 #include "paddle/pten/kernels/gpu/math.h"
 #include "paddle/pten/kernels/scale_kernel.h"
 
diff --git a/paddle/pten/kernels/cpu/conj_kernel.h b/paddle/pten/kernels/complex_kernel.h
similarity index 82%
rename from paddle/pten/kernels/cpu/conj_kernel.h
rename to paddle/pten/kernels/complex_kernel.h
index 49dad8f5b2df6a5f8d5f0d0386d7e81f63956515..b57e6d0fb4e0943afdac499ada06aeefcdac4a29 100644
--- a/paddle/pten/kernels/cpu/conj_kernel.h
+++ b/paddle/pten/kernels/complex_kernel.h
@@ -14,12 +14,11 @@ limitations under the License. */
 
 #pragma once
 
-#include "paddle/pten/backends/cpu/cpu_context.h"
 #include "paddle/pten/core/dense_tensor.h"
 
 namespace pten {
 
-template <typename T>
-void Conj(const CPUContext& dev_ctx, const DenseTensor& x, DenseTensor* out);
+template <typename T, typename ContextT>
+void Conj(const ContextT& dev_ctx, const DenseTensor& x, DenseTensor* out);
 
 }  // namespace pten
diff --git a/paddle/pten/kernels/cpu/CMakeLists.txt b/paddle/pten/kernels/cpu/CMakeLists.txt
index 7a32fab2674c34f6cb7d7218661139977fa2fc1c..f45d511602d71a175c5f917cd955e5be93a8f431 100644
--- a/paddle/pten/kernels/cpu/CMakeLists.txt
+++ b/paddle/pten/kernels/cpu/CMakeLists.txt
@@ -2,4 +2,3 @@ cc_library(math_cpu SRCS math.cc DEPS dense_tensor kernel_context kernel_factory
 cc_library(linalg_cpu SRCS linalg.cc DEPS dense_tensor kernel_context kernel_factory)
 cc_library(utils_cpu SRCS utils.cc DEPS dense_tensor kernel_context kernel_factory memory convert_utils)
 cc_library(manipulation_cpu SRCS manipulation.cc DEPS dense_tensor kernel_context kernel_factory utils_cpu unary)
-cc_library(conj_kernel_cpu SRCS conj_kernel.cc DEPS dense_tensor kernel_context kernel_factory)
diff --git a/paddle/pten/kernels/cpu/conj_kernel.cc b/paddle/pten/kernels/cpu/complex_kernel.cc
similarity index 53%
rename from paddle/pten/kernels/cpu/conj_kernel.cc
rename to paddle/pten/kernels/cpu/complex_kernel.cc
index f10d9f761eaed6cdcc12db9bf33846499e3b5c44..9bf27ef22dcd79492105fd97ec46abea49fdf88d 100644
--- a/paddle/pten/kernels/cpu/conj_kernel.cc
+++ b/paddle/pten/kernels/cpu/complex_kernel.cc
@@ -12,28 +12,22 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "paddle/pten/kernels/cpu/conj_kernel.h"
+#include "paddle/pten/kernels/complex_kernel.h"
+#include "paddle/pten/kernels/impl/complex_kernel_impl.h"
 
 #include "paddle/pten/backends/cpu/cpu_context.h"
 #include "paddle/pten/core/kernel_registry.h"
-#include "paddle/pten/kernels/hybird/math/conj_impl.h"
 
-namespace pten {
+// See Note [ Why still include the fluid headers? ]
+#include "paddle/fluid/platform/complex.h"
 
-template <typename T>
-void Conj(const CPUContext& dev_ctx, const DenseTensor& x, DenseTensor* out) {
-  ConjImpl<T, CPUContext>(dev_ctx, x, out);
-}
-
-}  // namespace pten
-
-PT_REGISTER_KERNEL(conj,
-                   CPU,
-                   ALL_LAYOUT,
-                   pten::Conj,
-                   paddle::platform::complex<float>,
-                   paddle::platform::complex<double>,
-                   float,
-                   double,
-                   int,
-                   int64_t) {}
+PT_REGISTER_CTX_KERNEL(conj,
+                       CPU,
+                       ALL_LAYOUT,
+                       pten::Conj,
+                       paddle::platform::complex<float>,
+                       paddle::platform::complex<double>,
+                       float,
+                       double,
+                       int,
+                       int64_t) {}
diff --git a/paddle/pten/kernels/gpu/CMakeLists.txt b/paddle/pten/kernels/gpu/CMakeLists.txt
index a0646e1cb7879270d25e6bf95dc8d00e82ff470f..041df126c024c8e20f8794042d3904c281fedf2d 100644
--- a/paddle/pten/kernels/gpu/CMakeLists.txt
+++ b/paddle/pten/kernels/gpu/CMakeLists.txt
@@ -3,11 +3,9 @@ if(WITH_GPU)
   nv_library(linalg_gpu SRCS linalg.cu DEPS eigen_function dense_tensor kernel_context kernel_factory)
   nv_library(utils_gpu SRCS utils.cu DEPS dense_tensor kernel_context kernel_factory memory convert_utils)
   nv_library(manipulation_gpu SRCS manipulation.cu DEPS dense_tensor kernel_context kernel_factory utils_gpu unary)
-  nv_library(conj_kernel_gpu SRCS conj_kernel.cu DEPS dense_tensor kernel_context kernel_factory)
 elseif(WITH_ROCM)
   hip_library(math_gpu SRCS math.cu DEPS eigen_function dense_tensor convert_utils kernel_context kernel_factory pten_transpose_gpu)
   hip_library(linalg_gpu SRCS linalg.cu DEPS eigen_function dense_tensor kernel_context kernel_factory)
   hip_library(utils_gpu SRCS utils.cu DEPS dense_tensor kernel_context kernel_factory memory convert_utils)
   hip_library(manipulation_gpu SRCS manipulation.cu DEPS dense_tensor kernel_context kernel_factory utils_gpu unary)
-  hip_library(conj_kernel_gpu SRCS conj_kernel.cu DEPS dense_tensor kernel_context kernel_factory)
 endif()
diff --git a/paddle/pten/kernels/gpu/conj_kernel.cu b/paddle/pten/kernels/gpu/complex_kernel.cu
similarity index 53%
rename from paddle/pten/kernels/gpu/conj_kernel.cu
rename to paddle/pten/kernels/gpu/complex_kernel.cu
index cb4fef883fdacd1062f1001c26b2b634d548cd9e..5a3c14de4036a5e60e90233b02d8ef0c5bdd9ee1 100644
--- a/paddle/pten/kernels/gpu/conj_kernel.cu
+++ b/paddle/pten/kernels/gpu/complex_kernel.cu
@@ -12,28 +12,22 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "paddle/pten/kernels/gpu/conj_kernel.h"
+#include "paddle/pten/kernels/complex_kernel.h"
+#include "paddle/pten/kernels/impl/complex_kernel_impl.h"
 
 #include "paddle/pten/backends/gpu/gpu_context.h"
 #include "paddle/pten/core/kernel_registry.h"
-#include "paddle/pten/kernels/hybird/math/conj_impl.h"
 
-namespace pten {
+// See Note [ Why still include the fluid headers? ]
+#include "paddle/fluid/platform/complex.h"
 
-template <typename T>
-void Conj(const GPUContext& dev_ctx, const DenseTensor& x, DenseTensor* out) {
-  ConjImpl<T, GPUContext>(dev_ctx, x, out);
-}
-
-}  // namespace pten
-
-PT_REGISTER_KERNEL(conj,
-                   GPU,
-                   ALL_LAYOUT,
-                   pten::Conj,
-                   paddle::platform::complex<float>,
-                   paddle::platform::complex<double>,
-                   float,
-                   double,
-                   int,
-                   int64_t) {}
+PT_REGISTER_CTX_KERNEL(conj,
+                       GPU,
+                       ALL_LAYOUT,
+                       pten::Conj,
+                       paddle::platform::complex<float>,
+                       paddle::platform::complex<double>,
+                       float,
+                       double,
+                       int,
+                       int64_t) {}
diff --git a/paddle/pten/kernels/gpu/conj_kernel.h b/paddle/pten/kernels/gpu/conj_kernel.h
deleted file mode 100644
index 7541f9290d246b37030cc2f9bedf0f229ec2b22f..0000000000000000000000000000000000000000
--- a/paddle/pten/kernels/gpu/conj_kernel.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#pragma once
-
-// CUDA and HIP use same api
-#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
-
-#include "paddle/pten/backends/gpu/gpu_context.h"
-#include "paddle/pten/core/dense_tensor.h"
-
-namespace pten {
-
-template <typename T>
-void Conj(const GPUContext& dev_ctx, const DenseTensor& x, DenseTensor* out);
-
-}  // namespace pten
-
-#endif
diff --git a/paddle/pten/kernels/hybird/math/conj_impl.h b/paddle/pten/kernels/impl/complex_kernel_impl.h
similarity index 89%
rename from paddle/pten/kernels/hybird/math/conj_impl.h
rename to paddle/pten/kernels/impl/complex_kernel_impl.h
index 84ad0b1a6ce95a08728c49318ca011e2f18bf904..7b5cabd6806e20a1f5c159e01b8aa14f3c25bc86 100644
--- a/paddle/pten/kernels/hybird/math/conj_impl.h
+++ b/paddle/pten/kernels/impl/complex_kernel_impl.h
@@ -14,14 +14,14 @@
 
 #pragma once
 
+// See Note [ Why still include the fluid headers? ]
 #include "paddle/fluid/operators/math/complex_functors.h"
-#include "paddle/fluid/platform/complex.h"
 #include "paddle/fluid/platform/for_range.h"
 
 namespace pten {
 
 template <typename T, typename ContextT>
-void ConjImpl(const ContextT& dev_ctx, const DenseTensor& x, DenseTensor* out) {
+void Conj(const ContextT& dev_ctx, const DenseTensor& x, DenseTensor* out) {
   auto numel = x.numel();
   auto* x_data = x.data<T>();
   auto* out_data = out->mutable_data<T>();