未验证 提交 8da9eff4 编写于 作者: C Chen Weihang 提交者: GitHub

move conj kernel impl (#38365)

上级 a3e6f18c
...@@ -20,8 +20,7 @@ ...@@ -20,8 +20,7 @@
// only can include the headers in paddle/pten/api dirs // only can include the headers in paddle/pten/api dirs
#include "paddle/pten/api/lib/utils/tensor_utils.h" #include "paddle/pten/api/lib/utils/tensor_utils.h"
#include "paddle/pten/include/core.h" #include "paddle/pten/include/core.h"
#include "paddle/pten/kernels/cpu/conj_kernel.h" #include "paddle/pten/kernels/complex_kernel.h"
#include "paddle/pten/kernels/gpu/conj_kernel.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
......
...@@ -26,10 +26,10 @@ add_subdirectory(tests) ...@@ -26,10 +26,10 @@ add_subdirectory(tests)
set(PTEN_DEPS convert_utils dense_tensor pten_context kernel_factory kernel_context) set(PTEN_DEPS convert_utils dense_tensor pten_context kernel_factory kernel_context)
get_property(pten_kernels GLOBAL PROPERTY PTEN_KERNELS) get_property(pten_kernels GLOBAL PROPERTY PTEN_KERNELS)
set(PTEN_DEPS ${PTEN_DEPS} ${pten_kernels}) set(PTEN_DEPS ${PTEN_DEPS} ${pten_kernels})
set(PTEN_DEPS ${PTEN_DEPS} math_cpu linalg_cpu manipulation_cpu conj_kernel_cpu) set(PTEN_DEPS ${PTEN_DEPS} math_cpu linalg_cpu manipulation_cpu)
set(PTEN_DEPS ${PTEN_DEPS} nary unary binary) set(PTEN_DEPS ${PTEN_DEPS} nary unary binary)
if(WITH_GPU OR WITH_ROCM) if(WITH_GPU OR WITH_ROCM)
set(PTEN_DEPS ${PTEN_DEPS} math_gpu linalg_gpu manipulation_gpu conj_kernel_gpu) set(PTEN_DEPS ${PTEN_DEPS} math_gpu linalg_gpu manipulation_gpu)
endif() endif()
if(WITH_XPU) if(WITH_XPU)
set(PTEN_DEPS ${PTEN_DEPS} manipulation_xpu) set(PTEN_DEPS ${PTEN_DEPS} manipulation_xpu)
......
...@@ -23,13 +23,11 @@ limitations under the License. */ ...@@ -23,13 +23,11 @@ limitations under the License. */
PT_DECLARE_KERNEL(matmul, CPU, ALL_LAYOUT); PT_DECLARE_KERNEL(matmul, CPU, ALL_LAYOUT);
PT_DECLARE_KERNEL(cast, CPU, ALL_LAYOUT); PT_DECLARE_KERNEL(cast, CPU, ALL_LAYOUT);
PT_DECLARE_KERNEL(mean, CPU, ALL_LAYOUT); PT_DECLARE_KERNEL(mean, CPU, ALL_LAYOUT);
PT_DECLARE_KERNEL(conj, CPU, ALL_LAYOUT);
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
PT_DECLARE_KERNEL(matmul, GPU, ALL_LAYOUT); PT_DECLARE_KERNEL(matmul, GPU, ALL_LAYOUT);
PT_DECLARE_KERNEL(cast, GPU, ALL_LAYOUT); PT_DECLARE_KERNEL(cast, GPU, ALL_LAYOUT);
PT_DECLARE_KERNEL(mean, GPU, ALL_LAYOUT); PT_DECLARE_KERNEL(mean, GPU, ALL_LAYOUT);
PT_DECLARE_KERNEL(conj, GPU, ALL_LAYOUT);
#endif #endif
#ifdef PADDLE_WITH_XPU #ifdef PADDLE_WITH_XPU
......
...@@ -17,9 +17,8 @@ limitations under the License. */ ...@@ -17,9 +17,8 @@ limitations under the License. */
// See Note: [ How do we organize the kernel directory ] // See Note: [ How do we organize the kernel directory ]
#include "paddle/pten/api/lib/utils/storage.h" #include "paddle/pten/api/lib/utils/storage.h"
#include "paddle/pten/include/infermeta.h" #include "paddle/pten/include/infermeta.h"
#include "paddle/pten/kernels/cpu/conj_kernel.h" #include "paddle/pten/kernels/complex_kernel.h"
#include "paddle/pten/kernels/cpu/math.h" #include "paddle/pten/kernels/cpu/math.h"
#include "paddle/pten/kernels/gpu/conj_kernel.h"
#include "paddle/pten/kernels/gpu/math.h" #include "paddle/pten/kernels/gpu/math.h"
#include "paddle/pten/kernels/scale_kernel.h" #include "paddle/pten/kernels/scale_kernel.h"
......
...@@ -14,12 +14,11 @@ limitations under the License. */ ...@@ -14,12 +14,11 @@ limitations under the License. */
#pragma once #pragma once
#include "paddle/pten/backends/cpu/cpu_context.h"
#include "paddle/pten/core/dense_tensor.h" #include "paddle/pten/core/dense_tensor.h"
namespace pten { namespace pten {
template <typename T> template <typename T, typename ContextT>
void Conj(const CPUContext& dev_ctx, const DenseTensor& x, DenseTensor* out); void Conj(const ContextT& dev_ctx, const DenseTensor& x, DenseTensor* out);
} // namespace pten } // namespace pten
...@@ -2,4 +2,3 @@ cc_library(math_cpu SRCS math.cc DEPS dense_tensor kernel_context kernel_factory ...@@ -2,4 +2,3 @@ cc_library(math_cpu SRCS math.cc DEPS dense_tensor kernel_context kernel_factory
cc_library(linalg_cpu SRCS linalg.cc DEPS dense_tensor kernel_context kernel_factory) cc_library(linalg_cpu SRCS linalg.cc DEPS dense_tensor kernel_context kernel_factory)
cc_library(utils_cpu SRCS utils.cc DEPS dense_tensor kernel_context kernel_factory memory convert_utils) cc_library(utils_cpu SRCS utils.cc DEPS dense_tensor kernel_context kernel_factory memory convert_utils)
cc_library(manipulation_cpu SRCS manipulation.cc DEPS dense_tensor kernel_context kernel_factory utils_cpu unary) cc_library(manipulation_cpu SRCS manipulation.cc DEPS dense_tensor kernel_context kernel_factory utils_cpu unary)
cc_library(conj_kernel_cpu SRCS conj_kernel.cc DEPS dense_tensor kernel_context kernel_factory)
...@@ -12,28 +12,22 @@ ...@@ -12,28 +12,22 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "paddle/pten/kernels/cpu/conj_kernel.h" #include "paddle/pten/kernels/complex_kernel.h"
#include "paddle/pten/kernels/impl/complex_kernel_impl.h"
#include "paddle/pten/backends/cpu/cpu_context.h" #include "paddle/pten/backends/cpu/cpu_context.h"
#include "paddle/pten/core/kernel_registry.h" #include "paddle/pten/core/kernel_registry.h"
#include "paddle/pten/kernels/hybird/math/conj_impl.h"
namespace pten { // See Note [ Why still include the fluid headers? ]
#include "paddle/fluid/platform/complex.h"
template <typename T> PT_REGISTER_CTX_KERNEL(conj,
void Conj(const CPUContext& dev_ctx, const DenseTensor& x, DenseTensor* out) { CPU,
ConjImpl<T, CPUContext>(dev_ctx, x, out); ALL_LAYOUT,
} pten::Conj,
paddle::platform::complex<float>,
} // namespace pten paddle::platform::complex<double>,
float,
PT_REGISTER_KERNEL(conj, double,
CPU, int,
ALL_LAYOUT, int64_t) {}
pten::Conj,
paddle::platform::complex<float>,
paddle::platform::complex<double>,
float,
double,
int,
int64_t) {}
...@@ -3,11 +3,9 @@ if(WITH_GPU) ...@@ -3,11 +3,9 @@ if(WITH_GPU)
nv_library(linalg_gpu SRCS linalg.cu DEPS eigen_function dense_tensor kernel_context kernel_factory) nv_library(linalg_gpu SRCS linalg.cu DEPS eigen_function dense_tensor kernel_context kernel_factory)
nv_library(utils_gpu SRCS utils.cu DEPS dense_tensor kernel_context kernel_factory memory convert_utils) nv_library(utils_gpu SRCS utils.cu DEPS dense_tensor kernel_context kernel_factory memory convert_utils)
nv_library(manipulation_gpu SRCS manipulation.cu DEPS dense_tensor kernel_context kernel_factory utils_gpu unary) nv_library(manipulation_gpu SRCS manipulation.cu DEPS dense_tensor kernel_context kernel_factory utils_gpu unary)
nv_library(conj_kernel_gpu SRCS conj_kernel.cu DEPS dense_tensor kernel_context kernel_factory)
elseif(WITH_ROCM) elseif(WITH_ROCM)
hip_library(math_gpu SRCS math.cu DEPS eigen_function dense_tensor convert_utils kernel_context kernel_factory pten_transpose_gpu) hip_library(math_gpu SRCS math.cu DEPS eigen_function dense_tensor convert_utils kernel_context kernel_factory pten_transpose_gpu)
hip_library(linalg_gpu SRCS linalg.cu DEPS eigen_function dense_tensor kernel_context kernel_factory) hip_library(linalg_gpu SRCS linalg.cu DEPS eigen_function dense_tensor kernel_context kernel_factory)
hip_library(utils_gpu SRCS utils.cu DEPS dense_tensor kernel_context kernel_factory memory convert_utils) hip_library(utils_gpu SRCS utils.cu DEPS dense_tensor kernel_context kernel_factory memory convert_utils)
hip_library(manipulation_gpu SRCS manipulation.cu DEPS dense_tensor kernel_context kernel_factory utils_gpu unary) hip_library(manipulation_gpu SRCS manipulation.cu DEPS dense_tensor kernel_context kernel_factory utils_gpu unary)
hip_library(conj_kernel_gpu SRCS conj_kernel.cu DEPS dense_tensor kernel_context kernel_factory)
endif() endif()
...@@ -12,28 +12,22 @@ ...@@ -12,28 +12,22 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "paddle/pten/kernels/gpu/conj_kernel.h" #include "paddle/pten/kernels/complex_kernel.h"
#include "paddle/pten/kernels/impl/complex_kernel_impl.h"
#include "paddle/pten/backends/gpu/gpu_context.h" #include "paddle/pten/backends/gpu/gpu_context.h"
#include "paddle/pten/core/kernel_registry.h" #include "paddle/pten/core/kernel_registry.h"
#include "paddle/pten/kernels/hybird/math/conj_impl.h"
namespace pten { // See Note [ Why still include the fluid headers? ]
#include "paddle/fluid/platform/complex.h"
template <typename T> PT_REGISTER_CTX_KERNEL(conj,
void Conj(const GPUContext& dev_ctx, const DenseTensor& x, DenseTensor* out) { GPU,
ConjImpl<T, GPUContext>(dev_ctx, x, out); ALL_LAYOUT,
} pten::Conj,
paddle::platform::complex<float>,
} // namespace pten paddle::platform::complex<double>,
float,
PT_REGISTER_KERNEL(conj, double,
GPU, int,
ALL_LAYOUT, int64_t) {}
pten::Conj,
paddle::platform::complex<float>,
paddle::platform::complex<double>,
float,
double,
int,
int64_t) {}
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
// CUDA and HIP use same api
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#include "paddle/pten/backends/gpu/gpu_context.h"
#include "paddle/pten/core/dense_tensor.h"
namespace pten {
template <typename T>
void Conj(const GPUContext& dev_ctx, const DenseTensor& x, DenseTensor* out);
} // namespace pten
#endif
...@@ -14,14 +14,14 @@ ...@@ -14,14 +14,14 @@
#pragma once #pragma once
// See Note [ Why still include the fluid headers? ]
#include "paddle/fluid/operators/math/complex_functors.h" #include "paddle/fluid/operators/math/complex_functors.h"
#include "paddle/fluid/platform/complex.h"
#include "paddle/fluid/platform/for_range.h" #include "paddle/fluid/platform/for_range.h"
namespace pten { namespace pten {
template <typename T, typename ContextT> template <typename T, typename ContextT>
void ConjImpl(const ContextT& dev_ctx, const DenseTensor& x, DenseTensor* out) { void Conj(const ContextT& dev_ctx, const DenseTensor& x, DenseTensor* out) {
auto numel = x.numel(); auto numel = x.numel();
auto* x_data = x.data<T>(); auto* x_data = x.data<T>();
auto* out_data = out->mutable_data<T>(); auto* out_data = out->mutable_data<T>();
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册