diff --git a/paddle/fluid/pybind/CMakeLists.txt b/paddle/fluid/pybind/CMakeLists.txt index 3cc43cdfe643c59beab110dbd9797b8c1b1e4a71..1df77c78a419bb5c99a06a327b3309fdf3c7e6f2 100644 --- a/paddle/fluid/pybind/CMakeLists.txt +++ b/paddle/fluid/pybind/CMakeLists.txt @@ -275,7 +275,7 @@ if(WITH_PYTHON) if(NOT ON_INFER) cc_library(paddle_eager SRCS eager.cc eager_functions.cc eager_method.cc eager_properties.cc eager_utils.cc - DEPS eager_api autograd_meta backward grad_node_info pten op_function_common dygraph_function dygraph_node math_cpu linalg_cpu utils_cpu manipulation_cpu accumulation_node global_utils utils python) + DEPS eager_api autograd_meta backward grad_node_info pten op_function_common dygraph_function dygraph_node accumulation_node global_utils utils python) add_dependencies(paddle_eager eager_codegen) add_dependencies(paddle_eager eager_op_function_generator_cmd) list(APPEND PYBIND_DEPS paddle_eager) diff --git a/paddle/pten/CMakeLists.txt b/paddle/pten/CMakeLists.txt index 9605a3b0091a25e30b94909ae71da9a39ef579e3..5e961ce23dbaa144936a338af3beac41b8be0a20 100644 --- a/paddle/pten/CMakeLists.txt +++ b/paddle/pten/CMakeLists.txt @@ -25,6 +25,8 @@ add_subdirectory(tests) # make an unity target for compile deps set(PTEN_DEPS convert_utils dense_tensor pten_context kernel_factory kernel_context) get_property(pten_kernels GLOBAL PROPERTY PTEN_KERNELS) +# keep this message for debug, remove it later if needless +message(STATUS "All standard pten kernels: ${pten_kernels}") set(PTEN_DEPS ${PTEN_DEPS} ${pten_kernels}) set(PTEN_DEPS ${PTEN_DEPS} math_cpu linalg_cpu manipulation_cpu) set(PTEN_DEPS ${PTEN_DEPS} nary unary binary) diff --git a/paddle/pten/kernels/CMakeLists.txt b/paddle/pten/kernels/CMakeLists.txt index 9d37effb8d7b312f4d4d7f7cd3f21029ac287d2a..27d274a5b34ad51663414b9571a465d72a5c8df7 100644 --- a/paddle/pten/kernels/CMakeLists.txt +++ b/paddle/pten/kernels/CMakeLists.txt @@ -33,15 +33,6 @@ set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} eigen_function) # auto build kernel targets by cmake register_kernels(EXCLUDES flatten_kernel DEPS ${COMMON_KERNEL_DEPS}) # TODO(chenweihang): auto parse compile deps by include headers later -set(FLATTEN_DEPS ${COMMON_KERNEL_DEPS} utils_cpu unary) -if(WITH_GPU OR WITH_ROCM) - set(FLATTEN_DEPS ${FLATTEN_DEPS} utils_gpu) -elseif(WITH_XPU) - set(FLATTEN_DEPS ${FLATTEN_DEPS} utils_xpu) -endif() -kernel_library(flatten_kernel DEPS ${FLATTEN_DEPS}) - -get_property(pten_kernels GLOBAL PROPERTY PTEN_KERNELS) -message(STATUS "PTEN_KERNELS: ${pten_kernels}") +kernel_library(flatten_kernel DEPS ${COMMON_KERNEL_DEPS} copy_kernel unary) copy_if_different(${kernel_declare_file} ${kernel_declare_file_final}) diff --git a/paddle/pten/kernels/cpu/utils.h b/paddle/pten/kernels/copy_kernel.h similarity index 85% rename from paddle/pten/kernels/cpu/utils.h rename to paddle/pten/kernels/copy_kernel.h index 93730692079e3e6ab6c6fac9578c9b108d66c63f..d095d18a371f0913cafa793fae7392326b39934e 100644 --- a/paddle/pten/kernels/cpu/utils.h +++ b/paddle/pten/kernels/copy_kernel.h @@ -14,13 +14,12 @@ limitations under the License. */ #pragma once -#include "paddle/pten/backends/cpu/cpu_context.h" #include "paddle/pten/core/dense_tensor.h" -#include "paddle/pten/core/kernel_registry.h" namespace pten { -void Copy(const CPUContext& dev_ctx, +template +void Copy(const ContextT& dev_ctx, const DenseTensor& src, bool blocking, DenseTensor* dst); diff --git a/paddle/pten/kernels/cpu/CMakeLists.txt b/paddle/pten/kernels/cpu/CMakeLists.txt index f45d511602d71a175c5f917cd955e5be93a8f431..f7dabf47eb68f929cb733d572dac2931a44fa366 100644 --- a/paddle/pten/kernels/cpu/CMakeLists.txt +++ b/paddle/pten/kernels/cpu/CMakeLists.txt @@ -1,4 +1,3 @@ cc_library(math_cpu SRCS math.cc DEPS dense_tensor kernel_context kernel_factory eigen_function blas pten_transpose_cpu) cc_library(linalg_cpu SRCS linalg.cc DEPS dense_tensor kernel_context kernel_factory) -cc_library(utils_cpu SRCS utils.cc DEPS dense_tensor kernel_context kernel_factory memory convert_utils) -cc_library(manipulation_cpu SRCS manipulation.cc DEPS dense_tensor kernel_context kernel_factory utils_cpu unary) +cc_library(manipulation_cpu SRCS manipulation.cc DEPS dense_tensor kernel_context kernel_factory copy_kernel unary) diff --git a/paddle/pten/kernels/cpu/utils.cc b/paddle/pten/kernels/cpu/copy_kernel.cc similarity index 84% rename from paddle/pten/kernels/cpu/utils.cc rename to paddle/pten/kernels/cpu/copy_kernel.cc index 1ca20df4d92dcbc89008c5befa0c6bfb37c36de5..6a81579eb4f0333c78df169ea82a52cec1a44eed 100644 --- a/paddle/pten/kernels/cpu/utils.cc +++ b/paddle/pten/kernels/cpu/copy_kernel.cc @@ -12,15 +12,21 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/pten/kernels/cpu/utils.h" -#include "paddle/fluid/memory/memcpy.h" +#include "paddle/pten/kernels/copy_kernel.h" + +#include "paddle/pten/backends/cpu/cpu_context.h" #include "paddle/pten/common/data_type.h" #include "paddle/pten/core/convert_utils.h" +#include "paddle/pten/core/kernel_registry.h" + +// See Note [ Why still include the fluid headers? ] +#include "paddle/fluid/memory/memcpy.h" namespace pten { // NOTE(chenweihang): blocking is useless in cpu kernel -void Copy(const CPUContext& dev_ctx, +template +void Copy(const ContextT& dev_ctx, const DenseTensor& src, bool blocking, DenseTensor* dst) { @@ -57,4 +63,5 @@ void Copy(const CPUContext& dev_ctx, } // namespace pten -PT_REGISTER_NO_TEMPLATE_KERNEL(copy, CPU, ALL_LAYOUT, pten::Copy, ALL_DTYPE) {} +PT_REGISTER_GENERAL_KERNEL( + copy, CPU, ALL_LAYOUT, pten::Copy, ALL_DTYPE) {} diff --git a/paddle/pten/kernels/cpu/manipulation.cc b/paddle/pten/kernels/cpu/manipulation.cc index b413882c86221d3c5c82a58ff41d5b7f4bf14121..3dba89ea19620b4c8ea41e725e03c512062a0e6c 100644 --- a/paddle/pten/kernels/cpu/manipulation.cc +++ b/paddle/pten/kernels/cpu/manipulation.cc @@ -15,7 +15,7 @@ #include "paddle/pten/kernels/cpu/manipulation.h" #include "paddle/pten/api/ext/dispatch.h" #include "paddle/pten/infermeta/unary.h" -#include "paddle/pten/kernels/cpu/utils.h" +#include "paddle/pten/kernels/copy_kernel.h" #include "paddle/pten/kernels/hybird/general/manipulation.h" #include "paddle/pten/kernels/hybird/math/cast_func.h" diff --git a/paddle/pten/kernels/flatten_kernel.cc b/paddle/pten/kernels/flatten_kernel.cc index 025dc8a83294901cc046cd63f1c53635d1167120..0dc593618637f5f2a9feaaf03aab6a11f8390c2c 100644 --- a/paddle/pten/kernels/flatten_kernel.cc +++ b/paddle/pten/kernels/flatten_kernel.cc @@ -16,10 +16,8 @@ #include "paddle/pten/backends/all_context.h" #include "paddle/pten/core/kernel_registry.h" #include "paddle/pten/infermeta/unary.h" -#include "paddle/pten/kernels/cpu/utils.h" +#include "paddle/pten/kernels/copy_kernel.h" #include "paddle/pten/kernels/funcs/common_shape.h" -#include "paddle/pten/kernels/gpu/utils.h" -#include "paddle/pten/kernels/xpu/utils.h" namespace pten { diff --git a/paddle/pten/kernels/gpu/CMakeLists.txt b/paddle/pten/kernels/gpu/CMakeLists.txt index 041df126c024c8e20f8794042d3904c281fedf2d..5fdb5359e1fd757311ebc8dbcaa094fd84ff583a 100644 --- a/paddle/pten/kernels/gpu/CMakeLists.txt +++ b/paddle/pten/kernels/gpu/CMakeLists.txt @@ -1,11 +1,9 @@ if(WITH_GPU) nv_library(math_gpu SRCS math.cu DEPS eigen_function dense_tensor convert_utils kernel_context kernel_factory pten_transpose_gpu) nv_library(linalg_gpu SRCS linalg.cu DEPS eigen_function dense_tensor kernel_context kernel_factory) - nv_library(utils_gpu SRCS utils.cu DEPS dense_tensor kernel_context kernel_factory memory convert_utils) - nv_library(manipulation_gpu SRCS manipulation.cu DEPS dense_tensor kernel_context kernel_factory utils_gpu unary) + nv_library(manipulation_gpu SRCS manipulation.cu DEPS dense_tensor kernel_context kernel_factory copy_kernel unary) elseif(WITH_ROCM) hip_library(math_gpu SRCS math.cu DEPS eigen_function dense_tensor convert_utils kernel_context kernel_factory pten_transpose_gpu) hip_library(linalg_gpu SRCS linalg.cu DEPS eigen_function dense_tensor kernel_context kernel_factory) - hip_library(utils_gpu SRCS utils.cu DEPS dense_tensor kernel_context kernel_factory memory convert_utils) - hip_library(manipulation_gpu SRCS manipulation.cu DEPS dense_tensor kernel_context kernel_factory utils_gpu unary) + hip_library(manipulation_gpu SRCS manipulation.cu DEPS dense_tensor kernel_context kernel_factory copy_kernel unary) endif() diff --git a/paddle/pten/kernels/gpu/utils.cu b/paddle/pten/kernels/gpu/copy_kernel.cu similarity index 97% rename from paddle/pten/kernels/gpu/utils.cu rename to paddle/pten/kernels/gpu/copy_kernel.cu index 4d080be11e3ed9164ca58408589b3ace6bcb363b..cb9f8054d3b0f4f13251299c4f0f0c921dba4b06 100644 --- a/paddle/pten/kernels/gpu/utils.cu +++ b/paddle/pten/kernels/gpu/copy_kernel.cu @@ -12,15 +12,20 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/memory/memcpy.h" +#include "paddle/pten/kernels/copy_kernel.h" + +#include "paddle/pten/backends/gpu/gpu_context.h" #include "paddle/pten/common/data_type.h" #include "paddle/pten/core/convert_utils.h" #include "paddle/pten/core/kernel_registry.h" -#include "paddle/pten/kernels/gpu/utils.h" + +// See Note [ Why still include the fluid headers? ] +#include "paddle/fluid/memory/memcpy.h" namespace pten { -void Copy(const GPUContext& dev_ctx, +template +void Copy(const ContextT& dev_ctx, const DenseTensor& src, bool blocking, DenseTensor* dst) { @@ -232,6 +237,8 @@ void Copy(const GPUContext& dev_ctx, } } } + } // namespace pten -PT_REGISTER_NO_TEMPLATE_KERNEL(copy, GPU, ALL_LAYOUT, pten::Copy, ALL_DTYPE) {} +PT_REGISTER_GENERAL_KERNEL( + copy, GPU, ALL_LAYOUT, pten::Copy, ALL_DTYPE) {} diff --git a/paddle/pten/kernels/gpu/manipulation.cu b/paddle/pten/kernels/gpu/manipulation.cu index 8c4aa7449a304513fb56131c5f26cad6126fa166..4dbf8e69b450e2ed085f94cc3bfe39dbcd806023 100644 --- a/paddle/pten/kernels/gpu/manipulation.cu +++ b/paddle/pten/kernels/gpu/manipulation.cu @@ -14,8 +14,8 @@ #include "paddle/pten/api/ext/dispatch.h" #include "paddle/pten/infermeta/unary.h" +#include "paddle/pten/kernels/copy_kernel.h" #include "paddle/pten/kernels/gpu/manipulation.h" -#include "paddle/pten/kernels/gpu/utils.h" #include "paddle/pten/kernels/hybird/cuda/cast_kernel_impl.h" #include "paddle/pten/kernels/hybird/general/manipulation.h" @@ -85,6 +85,7 @@ PTEN_REGISTER_CAST_CUDA_BASE_TYPE(cast, paddle::platform::bfloat16) PTEN_REGISTER_CAST_CUDA_BASE_TYPE(cast) #endif -PT_REGISTER_NO_TEMPLATE_KERNEL(reshape, GPU, ANY, pten::Reshape, ALL_DTYPE) {} PT_REGISTER_NO_TEMPLATE_KERNEL( - reshape_with_xshape, GPU, ANY, pten::ReshapeWithXShape, ALL_DTYPE) {} + reshape, GPU, ALL_LAYOUT, pten::Reshape, ALL_DTYPE) {} +PT_REGISTER_NO_TEMPLATE_KERNEL( + reshape_with_xshape, GPU, ALL_LAYOUT, pten::ReshapeWithXShape, ALL_DTYPE) {} diff --git a/paddle/pten/kernels/gpu/utils.h b/paddle/pten/kernels/gpu/utils.h deleted file mode 100644 index 3a455ad70c4dcb2552fa2722a3d9504b64a417a2..0000000000000000000000000000000000000000 --- a/paddle/pten/kernels/gpu/utils.h +++ /dev/null @@ -1,33 +0,0 @@ -/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -// CUDA and HIP use same api -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - -#include "paddle/pten/backends/gpu/gpu_context.h" -#include "paddle/pten/core/dense_tensor.h" -#include "paddle/pten/core/kernel_registry.h" - -namespace pten { - -void Copy(const GPUContext& dev_ctx, - const DenseTensor& src, - bool blocking, - DenseTensor* dst); - -} // namespace pten - -#endif diff --git a/paddle/pten/kernels/hybird/cuda/reduce/reduce_cuda_impl.h b/paddle/pten/kernels/hybird/cuda/reduce/reduce_cuda_impl.h index 8c2213ca9b3ce994f975bfd8d98c2b877535260e..a2faf2a5416942900ee8ab6ec895286a06996d50 100644 --- a/paddle/pten/kernels/hybird/cuda/reduce/reduce_cuda_impl.h +++ b/paddle/pten/kernels/hybird/cuda/reduce/reduce_cuda_impl.h @@ -41,7 +41,7 @@ namespace cub = hipcub; #include "paddle/pten/api/ext/dispatch.h" #include "paddle/pten/core/dense_tensor.h" -#include "paddle/pten/kernels/gpu/utils.h" +#include "paddle/pten/kernels/copy_kernel.h" #include "paddle/pten/kernels/hybird/math/cast_func.h" // Reduce split or not, Whether to use ReduceHigherDim diff --git a/paddle/pten/kernels/xpu/CMakeLists.txt b/paddle/pten/kernels/xpu/CMakeLists.txt index 3ba070bdd6c96cbb34abc7de6a84d65f7c6cea9f..c6d66b16512785156d4ca846b537d1e9612a0e2f 100644 --- a/paddle/pten/kernels/xpu/CMakeLists.txt +++ b/paddle/pten/kernels/xpu/CMakeLists.txt @@ -1,2 +1 @@ -cc_library(utils_xpu SRCS utils.cc DEPS dense_tensor kernel_context kernel_factory memory convert_utils) -cc_library(manipulation_xpu SRCS manipulation.cc DEPS dense_tensor kernel_context kernel_factory utils_xpu unary) +cc_library(manipulation_xpu SRCS manipulation.cc DEPS dense_tensor kernel_context kernel_factory copy_kernel unary) diff --git a/paddle/pten/kernels/xpu/utils.cc b/paddle/pten/kernels/xpu/copy_kernel.cc similarity index 89% rename from paddle/pten/kernels/xpu/utils.cc rename to paddle/pten/kernels/xpu/copy_kernel.cc index 5ea3a359ef6d69b2fafaa9b76418c770ebfc9de7..479ef50836622614ec7bde9888acdda059266b96 100644 --- a/paddle/pten/kernels/xpu/utils.cc +++ b/paddle/pten/kernels/xpu/copy_kernel.cc @@ -12,14 +12,20 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/pten/kernels/xpu/utils.h" -#include "paddle/fluid/memory/memcpy.h" +#include "paddle/pten/kernels/copy_kernel.h" + +#include "paddle/pten/backends/xpu/xpu_context.h" #include "paddle/pten/common/data_type.h" #include "paddle/pten/core/convert_utils.h" +#include "paddle/pten/core/kernel_registry.h" + +// See Note [ Why still include the fluid headers? ] +#include "paddle/fluid/memory/memcpy.h" namespace pten { -void Copy(const XPUDeviceContext& dev_ctx, +template +void Copy(const ContextT& dev_ctx, const DenseTensor& src, bool blocking, DenseTensor* dst) { @@ -76,4 +82,5 @@ void Copy(const XPUDeviceContext& dev_ctx, } // namespace pten -PT_REGISTER_NO_TEMPLATE_KERNEL(copy, XPU, ALL_LAYOUT, pten::Copy, ALL_DTYPE) {} +PT_REGISTER_GENERAL_KERNEL( + copy, XPU, ALL_LAYOUT, pten::Copy, ALL_DTYPE) {} diff --git a/paddle/pten/kernels/xpu/manipulation.cc b/paddle/pten/kernels/xpu/manipulation.cc index ecd673015a6770517032218989f9f9947cc2a69f..4d0ed7cb825811f302ee1be399c3f01d9b1f40e2 100644 --- a/paddle/pten/kernels/xpu/manipulation.cc +++ b/paddle/pten/kernels/xpu/manipulation.cc @@ -14,8 +14,8 @@ #include "paddle/pten/kernels/xpu/manipulation.h" #include "paddle/pten/infermeta/unary.h" +#include "paddle/pten/kernels/copy_kernel.h" #include "paddle/pten/kernels/hybird/general/manipulation.h" -#include "paddle/pten/kernels/xpu/utils.h" namespace pten { diff --git a/paddle/pten/kernels/xpu/utils.h b/paddle/pten/kernels/xpu/utils.h deleted file mode 100644 index 6e34502eb23a503a3c63ecdc45a73dfc86765780..0000000000000000000000000000000000000000 --- a/paddle/pten/kernels/xpu/utils.h +++ /dev/null @@ -1,35 +0,0 @@ -/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#ifdef PADDLE_WITH_XPU - -#include "paddle/pten/core/dense_tensor.h" -#include "paddle/pten/core/kernel_registry.h" - -// See Note [ Why still include the fluid headers? ] -#include "paddle/fluid/platform/device_context.h" -namespace pten { - -using XPUDeviceContext = paddle::platform::XPUDeviceContext; - -void Copy(const XPUDeviceContext& dev_ctx, - const DenseTensor& src, - bool blocking, - DenseTensor* dst); - -} // namespace pten - -#endif diff --git a/paddle/pten/tests/api/test_matmul_api.cc b/paddle/pten/tests/api/test_matmul_api.cc index e29fa11d58d1d4816f475ef3e708aa5bcf009586..bef0e2af4cf920ba6f2b90309b13ab6ccc03bdf6 100644 --- a/paddle/pten/tests/api/test_matmul_api.cc +++ b/paddle/pten/tests/api/test_matmul_api.cc @@ -20,7 +20,7 @@ limitations under the License. */ #include "paddle/pten/api/lib/utils/allocator.h" #include "paddle/pten/core/dense_tensor.h" #include "paddle/pten/core/kernel_registry.h" -#include "paddle/pten/kernels/gpu/utils.h" +#include "paddle/pten/kernels/copy_kernel.h" namespace paddle { namespace tests { diff --git a/paddle/pten/tests/kernels/test_copy_dev_api.cc b/paddle/pten/tests/kernels/test_copy_dev_api.cc index 9cc994c569553791dc51a4fea97a267d4b73c5c2..3095c83d97c9801c49d073a873fd8db496d63952 100644 --- a/paddle/pten/tests/kernels/test_copy_dev_api.cc +++ b/paddle/pten/tests/kernels/test_copy_dev_api.cc @@ -16,7 +16,7 @@ limitations under the License. */ #include #include "paddle/pten/core/kernel_registry.h" -#include "paddle/pten/kernels/cpu/utils.h" +#include "paddle/pten/kernels/copy_kernel.h" #include "paddle/pten/api/lib/utils/allocator.h" #include "paddle/pten/core/dense_tensor.h" @@ -28,8 +28,7 @@ namespace framework = paddle::framework; using DDim = paddle::framework::DDim; // TODO(YuanRisheng): This TEST file need to be refactored after 'copy' realized -// in -// 'paddle/api', +// in 'paddle/api' TEST(DEV_API, copy) { // 1. create tensor const auto alloc = std::make_shared(