From d9fcdc3a8f441a3d421d5f34bd65adb5a0a219dd Mon Sep 17 00:00:00 2001 From: Chen Weihang Date: Mon, 20 Dec 2021 20:28:29 -0600 Subject: [PATCH] [PTen] Remove eigen and blas directory (#38291) * remove eigen and blas dir * fix declare error --- paddle/pten/CMakeLists.txt | 5 +-- paddle/pten/api/lib/kernel_declare.h | 1 + paddle/pten/kernels/CMakeLists.txt | 8 ---- paddle/pten/kernels/blas/CMakeLists.txt | 0 paddle/pten/kernels/cpu/CMakeLists.txt | 2 + .../kernels/{eigen => cpu}/full_kernel.cc | 5 +-- paddle/pten/kernels/cpu/scale_kernel.cc | 35 ++++++++++++++++ paddle/pten/kernels/cuda/CMakeLists.txt | 4 ++ .../kernels/{eigen => cuda}/full_kernel.cu | 5 +-- paddle/pten/kernels/cuda/scale_kernel.cu | 35 ++++++++++++++++ paddle/pten/kernels/eigen/CMakeLists.txt | 10 ----- .../{eigen/full.h => impl/full_kernel_impl.h} | 1 - .../scale_kernel_impl.h} | 40 ++----------------- paddle/pten/tests/api/CMakeLists.txt | 2 +- 14 files changed, 87 insertions(+), 66 deletions(-) delete mode 100644 paddle/pten/kernels/blas/CMakeLists.txt rename paddle/pten/kernels/{eigen => cpu}/full_kernel.cc (93%) create mode 100644 paddle/pten/kernels/cpu/scale_kernel.cc rename paddle/pten/kernels/{eigen => cuda}/full_kernel.cu (93%) create mode 100644 paddle/pten/kernels/cuda/scale_kernel.cu delete mode 100644 paddle/pten/kernels/eigen/CMakeLists.txt rename paddle/pten/kernels/{eigen/full.h => impl/full_kernel_impl.h} (99%) rename paddle/pten/kernels/{eigen/scale_kernel.cc => impl/scale_kernel_impl.h} (54%) diff --git a/paddle/pten/CMakeLists.txt b/paddle/pten/CMakeLists.txt index cda991913db..eb9a149dd6d 100644 --- a/paddle/pten/CMakeLists.txt +++ b/paddle/pten/CMakeLists.txt @@ -24,11 +24,10 @@ add_subdirectory(tests) # make an unity target for compile deps set(PTEN_DEPS convert_utils dense_tensor pten_context kernel_factory kernel_context) -set(PTEN_DEPS ${PTEN_DEPS} scale_kernel_eigen full_kernel_eigen) -set(PTEN_DEPS ${PTEN_DEPS} math_cpu linalg_cpu manipulation_cpu conj_kernel_cpu) +set(PTEN_DEPS ${PTEN_DEPS} math_cpu linalg_cpu manipulation_cpu conj_kernel_cpu scale_kernel_cpu full_kernel_cpu) set(PTEN_DEPS ${PTEN_DEPS} nary unary binary) if(WITH_GPU OR WITH_ROCM) - set(PTEN_DEPS ${PTEN_DEPS} math_cuda linalg_cuda manipulation_cuda conj_kernel_cuda) + set(PTEN_DEPS ${PTEN_DEPS} math_cuda linalg_cuda manipulation_cuda conj_kernel_cuda scale_kernel_cuda full_kernel_cuda) endif() if(WITH_XPU) set(PTEN_DEPS ${PTEN_DEPS} manipulation_xpu) diff --git a/paddle/pten/api/lib/kernel_declare.h b/paddle/pten/api/lib/kernel_declare.h index a4dd3af6f0d..e748a51082c 100644 --- a/paddle/pten/api/lib/kernel_declare.h +++ b/paddle/pten/api/lib/kernel_declare.h @@ -32,6 +32,7 @@ PT_DECLARE_KERNEL(full_like, CUDA, ALL_LAYOUT); PT_DECLARE_KERNEL(dot, CUDA, ALL_LAYOUT); PT_DECLARE_KERNEL(flatten, CUDA, ALL_LAYOUT); PT_DECLARE_KERNEL(sign, CUDA, ALL_LAYOUT); +PT_DECLARE_KERNEL(scale, CUDA, ALL_LAYOUT); PT_DECLARE_KERNEL(conj, CUDA, ALL_LAYOUT); #endif diff --git a/paddle/pten/kernels/CMakeLists.txt b/paddle/pten/kernels/CMakeLists.txt index ebf659da472..d87def812d5 100644 --- a/paddle/pten/kernels/CMakeLists.txt +++ b/paddle/pten/kernels/CMakeLists.txt @@ -3,14 +3,6 @@ add_subdirectory(primitive) # pten hybird functors and functions called by kernels add_subdirectory(hybird) -# pten kernels for different backends -# NOTE(chenweihang): We need to increase the compilation option of WITH_EIGEN, -# which will support splitting eigen at compile time on demand in the future -add_subdirectory(eigen) -# NOTE(chenweihang): We need to increase the compilation option of WITH_BLAS, -# which will support splitting eigen at compile time on demand in the future, -# and if necessary, blas can be split into openblas and cublas -add_subdirectory(blas) add_subdirectory(cpu) if(WITH_GPU OR WITH_ROCM) # NOTE(chenweihang): if hip can split from cuda impl, we should add hip dir diff --git a/paddle/pten/kernels/blas/CMakeLists.txt b/paddle/pten/kernels/blas/CMakeLists.txt deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/paddle/pten/kernels/cpu/CMakeLists.txt b/paddle/pten/kernels/cpu/CMakeLists.txt index 7a32fab2674..036ce68ee43 100644 --- a/paddle/pten/kernels/cpu/CMakeLists.txt +++ b/paddle/pten/kernels/cpu/CMakeLists.txt @@ -2,4 +2,6 @@ cc_library(math_cpu SRCS math.cc DEPS dense_tensor kernel_context kernel_factory cc_library(linalg_cpu SRCS linalg.cc DEPS dense_tensor kernel_context kernel_factory) cc_library(utils_cpu SRCS utils.cc DEPS dense_tensor kernel_context kernel_factory memory convert_utils) cc_library(manipulation_cpu SRCS manipulation.cc DEPS dense_tensor kernel_context kernel_factory utils_cpu unary) +cc_library(scale_kernel_cpu SRCS scale_kernel.cc DEPS dense_tensor kernel_context kernel_factory eigen_function) +cc_library(full_kernel_cpu SRCS full_kernel.cc DEPS dense_tensor kernel_context kernel_factory eigen_function) cc_library(conj_kernel_cpu SRCS conj_kernel.cc DEPS dense_tensor kernel_context kernel_factory) diff --git a/paddle/pten/kernels/eigen/full_kernel.cc b/paddle/pten/kernels/cpu/full_kernel.cc similarity index 93% rename from paddle/pten/kernels/eigen/full_kernel.cc rename to paddle/pten/kernels/cpu/full_kernel.cc index 7db9a6b181d..4912656bb2a 100644 --- a/paddle/pten/kernels/eigen/full_kernel.cc +++ b/paddle/pten/kernels/cpu/full_kernel.cc @@ -13,11 +13,10 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/pten/kernels/full_kernel.h" -#include "paddle/pten/kernels/eigen/full.h" +#include "paddle/pten/backends/cpu/cpu_context.h" #include "paddle/pten/core/kernel_registry.h" - -#include "paddle/pten/backends/all_context.h" +#include "paddle/pten/kernels/impl/full_kernel_impl.h" PT_REGISTER_CTX_KERNEL(full, CPU, diff --git a/paddle/pten/kernels/cpu/scale_kernel.cc b/paddle/pten/kernels/cpu/scale_kernel.cc new file mode 100644 index 00000000000..fe9a0a033bc --- /dev/null +++ b/paddle/pten/kernels/cpu/scale_kernel.cc @@ -0,0 +1,35 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/pten/kernels/scale_kernel.h" +#include "paddle/pten/kernels/impl/scale_kernel_impl.h" + +#include "paddle/pten/backends/cpu/cpu_context.h" +#include "paddle/pten/core/kernel_registry.h" + +// See Note [ Why still include the fluid headers? ] +#include "paddle/fluid/platform/bfloat16.h" + +PT_REGISTER_CTX_KERNEL(scale, + CPU, + ALL_LAYOUT, + pten::Scale, + float, + double, + paddle::platform::bfloat16, + uint8_t, + int8_t, + int16_t, + int, + int64_t) {} diff --git a/paddle/pten/kernels/cuda/CMakeLists.txt b/paddle/pten/kernels/cuda/CMakeLists.txt index 48b6dc16234..428b2762ca7 100644 --- a/paddle/pten/kernels/cuda/CMakeLists.txt +++ b/paddle/pten/kernels/cuda/CMakeLists.txt @@ -3,11 +3,15 @@ if(WITH_GPU) nv_library(linalg_cuda SRCS linalg.cu DEPS eigen_function dense_tensor kernel_context kernel_factory) nv_library(utils_cuda SRCS utils.cu DEPS dense_tensor kernel_context kernel_factory memory convert_utils) nv_library(manipulation_cuda SRCS manipulation.cu DEPS dense_tensor kernel_context kernel_factory utils_cuda unary) + nv_library(scale_kernel_cuda SRCS scale_kernel.cu DEPS dense_tensor kernel_context kernel_factory eigen_function) + nv_library(full_kernel_cuda SRCS full_kernel.cu DEPS dense_tensor kernel_context kernel_factory eigen_function) nv_library(conj_kernel_cuda SRCS conj_kernel.cu DEPS dense_tensor kernel_context kernel_factory) elseif(WITH_ROCM) hip_library(math_cuda SRCS math.cu DEPS eigen_function dense_tensor convert_utils kernel_context kernel_factory pten_transpose_cuda) hip_library(linalg_cuda SRCS linalg.cu DEPS eigen_function dense_tensor kernel_context kernel_factory) hip_library(utils_cuda SRCS utils.cu DEPS dense_tensor kernel_context kernel_factory memory convert_utils) hip_library(manipulation_cuda SRCS manipulation.cu DEPS dense_tensor kernel_context kernel_factory utils_cuda unary) + hip_library(scale_kernel_cuda SRCS scale_kernel.cu DEPS dense_tensor kernel_context kernel_factory eigen_function) + hip_library(full_kernel_cuda SRCS full_kernel.cu DEPS dense_tensor kernel_context kernel_factory eigen_function) hip_library(conj_kernel_cuda SRCS conj_kernel.cu DEPS dense_tensor kernel_context kernel_factory) endif() diff --git a/paddle/pten/kernels/eigen/full_kernel.cu b/paddle/pten/kernels/cuda/full_kernel.cu similarity index 93% rename from paddle/pten/kernels/eigen/full_kernel.cu rename to paddle/pten/kernels/cuda/full_kernel.cu index 32e48f12f90..8a6639a2dc4 100644 --- a/paddle/pten/kernels/eigen/full_kernel.cu +++ b/paddle/pten/kernels/cuda/full_kernel.cu @@ -12,12 +12,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/pten/kernels/eigen/full.h" #include "paddle/pten/kernels/full_kernel.h" +#include "paddle/pten/backends/cuda/cuda_context.h" #include "paddle/pten/core/kernel_registry.h" - -#include "paddle/pten/backends/all_context.h" +#include "paddle/pten/kernels/impl/full_kernel_impl.h" PT_REGISTER_CTX_KERNEL(full, CUDA, diff --git a/paddle/pten/kernels/cuda/scale_kernel.cu b/paddle/pten/kernels/cuda/scale_kernel.cu new file mode 100644 index 00000000000..904976ae956 --- /dev/null +++ b/paddle/pten/kernels/cuda/scale_kernel.cu @@ -0,0 +1,35 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/pten/kernels/scale_kernel.h" + +#include "paddle/pten/backends/cuda/cuda_context.h" +#include "paddle/pten/core/kernel_registry.h" +#include "paddle/pten/kernels/impl/scale_kernel_impl.h" + +// See Note [ Why still include the fluid headers? ] +#include "paddle/fluid/platform/float16.h" + +PT_REGISTER_CTX_KERNEL(scale, + CUDA, + ALL_LAYOUT, + pten::Scale, + float, + double, + paddle::platform::float16, + uint8_t, + int8_t, + int16_t, + int, + int64_t) {} diff --git a/paddle/pten/kernels/eigen/CMakeLists.txt b/paddle/pten/kernels/eigen/CMakeLists.txt deleted file mode 100644 index 9188d0b21ed..00000000000 --- a/paddle/pten/kernels/eigen/CMakeLists.txt +++ /dev/null @@ -1,10 +0,0 @@ -if(WITH_GPU) - nv_library(scale_kernel_eigen SRCS scale_kernel.cc DEPS dense_tensor kernel_context kernel_factory eigen_function) - nv_library(full_kernel_eigen SRCS full_kernel.cc full_kernel.cu DEPS kernel_context kernel_factory dense_tensor eigen_function) -elseif(WITH_ROCM) - hip_library(scale_kernel_eigen SRCS scale_kernel.cc DEPS dense_tensor kernel_context kernel_factory eigen_function) - hip_library(full_kernel_eigen SRCS full_kernel.cc full_kernel.cu DEPS kernel_context kernel_factory dense_tensor eigen_function) -else() - cc_library(scale_kernel_eigen SRCS scale_kernel.cc DEPS dense_tensor kernel_context kernel_factory eigen_function) - cc_library(full_kernel_eigen SRCS full_kernel.cc DEPS dense_tensor kernel_context kernel_factory eigen_function) -endif() diff --git a/paddle/pten/kernels/eigen/full.h b/paddle/pten/kernels/impl/full_kernel_impl.h similarity index 99% rename from paddle/pten/kernels/eigen/full.h rename to paddle/pten/kernels/impl/full_kernel_impl.h index 27d18c4a9ab..7076bb51b36 100644 --- a/paddle/pten/kernels/eigen/full.h +++ b/paddle/pten/kernels/impl/full_kernel_impl.h @@ -17,7 +17,6 @@ limitations under the License. */ #include "paddle/pten/common/scalar.h" #include "paddle/pten/common/scalar_array.h" #include "paddle/pten/core/dense_tensor.h" - #include "paddle/pten/kernels/hybird/eigen/common.h" // See Note [ Why still include the fluid headers? ] diff --git a/paddle/pten/kernels/eigen/scale_kernel.cc b/paddle/pten/kernels/impl/scale_kernel_impl.h similarity index 54% rename from paddle/pten/kernels/eigen/scale_kernel.cc rename to paddle/pten/kernels/impl/scale_kernel_impl.h index 5ec27be3af9..421bb9f7b00 100644 --- a/paddle/pten/kernels/eigen/scale_kernel.cc +++ b/paddle/pten/kernels/impl/scale_kernel_impl.h @@ -12,16 +12,14 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/pten/kernels/scale_kernel.h" +#pragma once -#include "paddle/pten/core/kernel_registry.h" +#include "paddle/pten/common/scalar.h" +#include "paddle/pten/core/dense_tensor.h" #include "paddle/pten/kernels/hybird/eigen/common.h" // See Note [ Why still include the fluid headers? ] #include "paddle/fluid/operators/eigen/eigen_function.h" -#include "paddle/fluid/platform/bfloat16.h" -#include "paddle/fluid/platform/device_context.h" -#include "paddle/fluid/platform/float16.h" namespace pten { @@ -50,35 +48,3 @@ void Scale(const ContextT& dev_ctx, } } // namespace pten - -// TODO(chenweihang): Use EigenContext to specialize the ContextT parameter, -// and only register the backend as Eigen's kernel during registration, -// instead of using macros to register the CPU and CUDA kernels separately - -PT_REGISTER_CTX_KERNEL(scale, - CPU, - ALL_LAYOUT, - pten::Scale, - float, - double, - paddle::platform::bfloat16, - uint8_t, - int8_t, - int16_t, - int, - int64_t) {} - -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) -PT_REGISTER_CTX_KERNEL(scale, - CUDA, - ALL_LAYOUT, - pten::Scale, - float, - double, - paddle::platform::float16, - uint8_t, - int8_t, - int16_t, - int, - int64_t) {} -#endif diff --git a/paddle/pten/tests/api/CMakeLists.txt b/paddle/pten/tests/api/CMakeLists.txt index 2c494043e27..e85eb4c3294 100644 --- a/paddle/pten/tests/api/CMakeLists.txt +++ b/paddle/pten/tests/api/CMakeLists.txt @@ -21,5 +21,5 @@ cc_test(test_to_api SRCS test_to_api.cc DEPS pten_tensor pten_api pten_api_utils cc_test(test_slice_api SRCS test_slice_api.cc DEPS pten_tensor pten_api pten_api_utils) cc_test(test_sum_api SRCS test_sum_api.cc DEPS pten_tensor pten_api pten_api_utils) cc_test(test_scale_api SRCS test_scale_api.cc DEPS pten_tensor pten_api pten_api_utils) -cc_test(test_scale_benchmark SRCS test_scale_benchmark.cc DEPS pten_tensor pten_api pten_api_utils scale_kernel_eigen) +cc_test(test_scale_benchmark SRCS test_scale_benchmark.cc DEPS pten_tensor pten_api pten_api_utils) cc_test(test_conj_api SRCS test_conj_api.cc DEPS pten_tensor pten_api pten_api_utils) -- GitLab