未验证 提交 73819658 编写于 作者: C Chen Weihang 提交者: GitHub

[PTen] Move copy kernel impl (#38421)

* add register general kernel marco

* move copy kernel impl

* revert needless change

* polish details

* fix xpu compil faild

* fix xpu compile failed

* polish format
上级 e5c7ca48
...@@ -275,7 +275,7 @@ if(WITH_PYTHON) ...@@ -275,7 +275,7 @@ if(WITH_PYTHON)
if(NOT ON_INFER) if(NOT ON_INFER)
cc_library(paddle_eager cc_library(paddle_eager
SRCS eager.cc eager_functions.cc eager_method.cc eager_properties.cc eager_utils.cc SRCS eager.cc eager_functions.cc eager_method.cc eager_properties.cc eager_utils.cc
DEPS eager_api autograd_meta backward grad_node_info pten op_function_common dygraph_function dygraph_node math_cpu linalg_cpu utils_cpu manipulation_cpu accumulation_node global_utils utils python) DEPS eager_api autograd_meta backward grad_node_info pten op_function_common dygraph_function dygraph_node accumulation_node global_utils utils python)
add_dependencies(paddle_eager eager_codegen) add_dependencies(paddle_eager eager_codegen)
add_dependencies(paddle_eager eager_op_function_generator_cmd) add_dependencies(paddle_eager eager_op_function_generator_cmd)
list(APPEND PYBIND_DEPS paddle_eager) list(APPEND PYBIND_DEPS paddle_eager)
......
...@@ -25,6 +25,8 @@ add_subdirectory(tests) ...@@ -25,6 +25,8 @@ add_subdirectory(tests)
# make an unity target for compile deps # make an unity target for compile deps
set(PTEN_DEPS convert_utils dense_tensor pten_context kernel_factory kernel_context) set(PTEN_DEPS convert_utils dense_tensor pten_context kernel_factory kernel_context)
get_property(pten_kernels GLOBAL PROPERTY PTEN_KERNELS) get_property(pten_kernels GLOBAL PROPERTY PTEN_KERNELS)
# keep this message for debug, remove it later if needless
message(STATUS "All standard pten kernels: ${pten_kernels}")
set(PTEN_DEPS ${PTEN_DEPS} ${pten_kernels}) set(PTEN_DEPS ${PTEN_DEPS} ${pten_kernels})
set(PTEN_DEPS ${PTEN_DEPS} math_cpu linalg_cpu manipulation_cpu) set(PTEN_DEPS ${PTEN_DEPS} math_cpu linalg_cpu manipulation_cpu)
set(PTEN_DEPS ${PTEN_DEPS} nary unary binary) set(PTEN_DEPS ${PTEN_DEPS} nary unary binary)
......
...@@ -33,15 +33,6 @@ set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} eigen_function) ...@@ -33,15 +33,6 @@ set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} eigen_function)
# auto build kernel targets by cmake # auto build kernel targets by cmake
register_kernels(EXCLUDES flatten_kernel DEPS ${COMMON_KERNEL_DEPS}) register_kernels(EXCLUDES flatten_kernel DEPS ${COMMON_KERNEL_DEPS})
# TODO(chenweihang): auto parse compile deps by include headers later # TODO(chenweihang): auto parse compile deps by include headers later
set(FLATTEN_DEPS ${COMMON_KERNEL_DEPS} utils_cpu unary) kernel_library(flatten_kernel DEPS ${COMMON_KERNEL_DEPS} copy_kernel unary)
if(WITH_GPU OR WITH_ROCM)
set(FLATTEN_DEPS ${FLATTEN_DEPS} utils_gpu)
elseif(WITH_XPU)
set(FLATTEN_DEPS ${FLATTEN_DEPS} utils_xpu)
endif()
kernel_library(flatten_kernel DEPS ${FLATTEN_DEPS})
get_property(pten_kernels GLOBAL PROPERTY PTEN_KERNELS)
message(STATUS "PTEN_KERNELS: ${pten_kernels}")
copy_if_different(${kernel_declare_file} ${kernel_declare_file_final}) copy_if_different(${kernel_declare_file} ${kernel_declare_file_final})
...@@ -14,13 +14,12 @@ limitations under the License. */ ...@@ -14,13 +14,12 @@ limitations under the License. */
#pragma once #pragma once
#include "paddle/pten/backends/cpu/cpu_context.h"
#include "paddle/pten/core/dense_tensor.h" #include "paddle/pten/core/dense_tensor.h"
#include "paddle/pten/core/kernel_registry.h"
namespace pten { namespace pten {
void Copy(const CPUContext& dev_ctx, template <typename ContextT>
void Copy(const ContextT& dev_ctx,
const DenseTensor& src, const DenseTensor& src,
bool blocking, bool blocking,
DenseTensor* dst); DenseTensor* dst);
......
cc_library(math_cpu SRCS math.cc DEPS dense_tensor kernel_context kernel_factory eigen_function blas pten_transpose_cpu) cc_library(math_cpu SRCS math.cc DEPS dense_tensor kernel_context kernel_factory eigen_function blas pten_transpose_cpu)
cc_library(linalg_cpu SRCS linalg.cc DEPS dense_tensor kernel_context kernel_factory) cc_library(linalg_cpu SRCS linalg.cc DEPS dense_tensor kernel_context kernel_factory)
cc_library(utils_cpu SRCS utils.cc DEPS dense_tensor kernel_context kernel_factory memory convert_utils) cc_library(manipulation_cpu SRCS manipulation.cc DEPS dense_tensor kernel_context kernel_factory copy_kernel unary)
cc_library(manipulation_cpu SRCS manipulation.cc DEPS dense_tensor kernel_context kernel_factory utils_cpu unary)
...@@ -12,15 +12,21 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,15 +12,21 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/pten/kernels/cpu/utils.h" #include "paddle/pten/kernels/copy_kernel.h"
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/pten/backends/cpu/cpu_context.h"
#include "paddle/pten/common/data_type.h" #include "paddle/pten/common/data_type.h"
#include "paddle/pten/core/convert_utils.h" #include "paddle/pten/core/convert_utils.h"
#include "paddle/pten/core/kernel_registry.h"
// See Note [ Why still include the fluid headers? ]
#include "paddle/fluid/memory/memcpy.h"
namespace pten { namespace pten {
// NOTE(chenweihang): blocking is useless in cpu kernel // NOTE(chenweihang): blocking is useless in cpu kernel
void Copy(const CPUContext& dev_ctx, template <typename ContextT>
void Copy(const ContextT& dev_ctx,
const DenseTensor& src, const DenseTensor& src,
bool blocking, bool blocking,
DenseTensor* dst) { DenseTensor* dst) {
...@@ -57,4 +63,5 @@ void Copy(const CPUContext& dev_ctx, ...@@ -57,4 +63,5 @@ void Copy(const CPUContext& dev_ctx,
} // namespace pten } // namespace pten
PT_REGISTER_NO_TEMPLATE_KERNEL(copy, CPU, ALL_LAYOUT, pten::Copy, ALL_DTYPE) {} PT_REGISTER_GENERAL_KERNEL(
copy, CPU, ALL_LAYOUT, pten::Copy<pten::CPUContext>, ALL_DTYPE) {}
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
#include "paddle/pten/kernels/cpu/manipulation.h" #include "paddle/pten/kernels/cpu/manipulation.h"
#include "paddle/pten/api/ext/dispatch.h" #include "paddle/pten/api/ext/dispatch.h"
#include "paddle/pten/infermeta/unary.h" #include "paddle/pten/infermeta/unary.h"
#include "paddle/pten/kernels/cpu/utils.h" #include "paddle/pten/kernels/copy_kernel.h"
#include "paddle/pten/kernels/hybird/general/manipulation.h" #include "paddle/pten/kernels/hybird/general/manipulation.h"
#include "paddle/pten/kernels/hybird/math/cast_func.h" #include "paddle/pten/kernels/hybird/math/cast_func.h"
......
...@@ -16,10 +16,8 @@ ...@@ -16,10 +16,8 @@
#include "paddle/pten/backends/all_context.h" #include "paddle/pten/backends/all_context.h"
#include "paddle/pten/core/kernel_registry.h" #include "paddle/pten/core/kernel_registry.h"
#include "paddle/pten/infermeta/unary.h" #include "paddle/pten/infermeta/unary.h"
#include "paddle/pten/kernels/cpu/utils.h" #include "paddle/pten/kernels/copy_kernel.h"
#include "paddle/pten/kernels/funcs/common_shape.h" #include "paddle/pten/kernels/funcs/common_shape.h"
#include "paddle/pten/kernels/gpu/utils.h"
#include "paddle/pten/kernels/xpu/utils.h"
namespace pten { namespace pten {
......
if(WITH_GPU) if(WITH_GPU)
nv_library(math_gpu SRCS math.cu DEPS eigen_function dense_tensor convert_utils kernel_context kernel_factory pten_transpose_gpu) nv_library(math_gpu SRCS math.cu DEPS eigen_function dense_tensor convert_utils kernel_context kernel_factory pten_transpose_gpu)
nv_library(linalg_gpu SRCS linalg.cu DEPS eigen_function dense_tensor kernel_context kernel_factory) nv_library(linalg_gpu SRCS linalg.cu DEPS eigen_function dense_tensor kernel_context kernel_factory)
nv_library(utils_gpu SRCS utils.cu DEPS dense_tensor kernel_context kernel_factory memory convert_utils) nv_library(manipulation_gpu SRCS manipulation.cu DEPS dense_tensor kernel_context kernel_factory copy_kernel unary)
nv_library(manipulation_gpu SRCS manipulation.cu DEPS dense_tensor kernel_context kernel_factory utils_gpu unary)
elseif(WITH_ROCM) elseif(WITH_ROCM)
hip_library(math_gpu SRCS math.cu DEPS eigen_function dense_tensor convert_utils kernel_context kernel_factory pten_transpose_gpu) hip_library(math_gpu SRCS math.cu DEPS eigen_function dense_tensor convert_utils kernel_context kernel_factory pten_transpose_gpu)
hip_library(linalg_gpu SRCS linalg.cu DEPS eigen_function dense_tensor kernel_context kernel_factory) hip_library(linalg_gpu SRCS linalg.cu DEPS eigen_function dense_tensor kernel_context kernel_factory)
hip_library(utils_gpu SRCS utils.cu DEPS dense_tensor kernel_context kernel_factory memory convert_utils) hip_library(manipulation_gpu SRCS manipulation.cu DEPS dense_tensor kernel_context kernel_factory copy_kernel unary)
hip_library(manipulation_gpu SRCS manipulation.cu DEPS dense_tensor kernel_context kernel_factory utils_gpu unary)
endif() endif()
...@@ -12,15 +12,20 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,15 +12,20 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/memory/memcpy.h" #include "paddle/pten/kernels/copy_kernel.h"
#include "paddle/pten/backends/gpu/gpu_context.h"
#include "paddle/pten/common/data_type.h" #include "paddle/pten/common/data_type.h"
#include "paddle/pten/core/convert_utils.h" #include "paddle/pten/core/convert_utils.h"
#include "paddle/pten/core/kernel_registry.h" #include "paddle/pten/core/kernel_registry.h"
#include "paddle/pten/kernels/gpu/utils.h"
// See Note [ Why still include the fluid headers? ]
#include "paddle/fluid/memory/memcpy.h"
namespace pten { namespace pten {
void Copy(const GPUContext& dev_ctx, template <typename ContextT>
void Copy(const ContextT& dev_ctx,
const DenseTensor& src, const DenseTensor& src,
bool blocking, bool blocking,
DenseTensor* dst) { DenseTensor* dst) {
...@@ -232,6 +237,8 @@ void Copy(const GPUContext& dev_ctx, ...@@ -232,6 +237,8 @@ void Copy(const GPUContext& dev_ctx,
} }
} }
} }
} // namespace pten } // namespace pten
PT_REGISTER_NO_TEMPLATE_KERNEL(copy, GPU, ALL_LAYOUT, pten::Copy, ALL_DTYPE) {} PT_REGISTER_GENERAL_KERNEL(
copy, GPU, ALL_LAYOUT, pten::Copy<pten::GPUContext>, ALL_DTYPE) {}
...@@ -14,8 +14,8 @@ ...@@ -14,8 +14,8 @@
#include "paddle/pten/api/ext/dispatch.h" #include "paddle/pten/api/ext/dispatch.h"
#include "paddle/pten/infermeta/unary.h" #include "paddle/pten/infermeta/unary.h"
#include "paddle/pten/kernels/copy_kernel.h"
#include "paddle/pten/kernels/gpu/manipulation.h" #include "paddle/pten/kernels/gpu/manipulation.h"
#include "paddle/pten/kernels/gpu/utils.h"
#include "paddle/pten/kernels/hybird/cuda/cast_kernel_impl.h" #include "paddle/pten/kernels/hybird/cuda/cast_kernel_impl.h"
#include "paddle/pten/kernels/hybird/general/manipulation.h" #include "paddle/pten/kernels/hybird/general/manipulation.h"
...@@ -85,6 +85,7 @@ PTEN_REGISTER_CAST_CUDA_BASE_TYPE(cast, paddle::platform::bfloat16) ...@@ -85,6 +85,7 @@ PTEN_REGISTER_CAST_CUDA_BASE_TYPE(cast, paddle::platform::bfloat16)
PTEN_REGISTER_CAST_CUDA_BASE_TYPE(cast) PTEN_REGISTER_CAST_CUDA_BASE_TYPE(cast)
#endif #endif
PT_REGISTER_NO_TEMPLATE_KERNEL(reshape, GPU, ANY, pten::Reshape, ALL_DTYPE) {}
PT_REGISTER_NO_TEMPLATE_KERNEL( PT_REGISTER_NO_TEMPLATE_KERNEL(
reshape_with_xshape, GPU, ANY, pten::ReshapeWithXShape, ALL_DTYPE) {} reshape, GPU, ALL_LAYOUT, pten::Reshape, ALL_DTYPE) {}
PT_REGISTER_NO_TEMPLATE_KERNEL(
reshape_with_xshape, GPU, ALL_LAYOUT, pten::ReshapeWithXShape, ALL_DTYPE) {}
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
// CUDA and HIP use same api
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#include "paddle/pten/backends/gpu/gpu_context.h"
#include "paddle/pten/core/dense_tensor.h"
#include "paddle/pten/core/kernel_registry.h"
namespace pten {
void Copy(const GPUContext& dev_ctx,
const DenseTensor& src,
bool blocking,
DenseTensor* dst);
} // namespace pten
#endif
...@@ -41,7 +41,7 @@ namespace cub = hipcub; ...@@ -41,7 +41,7 @@ namespace cub = hipcub;
#include "paddle/pten/api/ext/dispatch.h" #include "paddle/pten/api/ext/dispatch.h"
#include "paddle/pten/core/dense_tensor.h" #include "paddle/pten/core/dense_tensor.h"
#include "paddle/pten/kernels/gpu/utils.h" #include "paddle/pten/kernels/copy_kernel.h"
#include "paddle/pten/kernels/hybird/math/cast_func.h" #include "paddle/pten/kernels/hybird/math/cast_func.h"
// Reduce split or not, Whether to use ReduceHigherDim // Reduce split or not, Whether to use ReduceHigherDim
......
cc_library(utils_xpu SRCS utils.cc DEPS dense_tensor kernel_context kernel_factory memory convert_utils) cc_library(manipulation_xpu SRCS manipulation.cc DEPS dense_tensor kernel_context kernel_factory copy_kernel unary)
cc_library(manipulation_xpu SRCS manipulation.cc DEPS dense_tensor kernel_context kernel_factory utils_xpu unary)
...@@ -12,14 +12,20 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,14 +12,20 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/pten/kernels/xpu/utils.h" #include "paddle/pten/kernels/copy_kernel.h"
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/pten/backends/xpu/xpu_context.h"
#include "paddle/pten/common/data_type.h" #include "paddle/pten/common/data_type.h"
#include "paddle/pten/core/convert_utils.h" #include "paddle/pten/core/convert_utils.h"
#include "paddle/pten/core/kernel_registry.h"
// See Note [ Why still include the fluid headers? ]
#include "paddle/fluid/memory/memcpy.h"
namespace pten { namespace pten {
void Copy(const XPUDeviceContext& dev_ctx, template <typename ContextT>
void Copy(const ContextT& dev_ctx,
const DenseTensor& src, const DenseTensor& src,
bool blocking, bool blocking,
DenseTensor* dst) { DenseTensor* dst) {
...@@ -76,4 +82,5 @@ void Copy(const XPUDeviceContext& dev_ctx, ...@@ -76,4 +82,5 @@ void Copy(const XPUDeviceContext& dev_ctx,
} // namespace pten } // namespace pten
PT_REGISTER_NO_TEMPLATE_KERNEL(copy, XPU, ALL_LAYOUT, pten::Copy, ALL_DTYPE) {} PT_REGISTER_GENERAL_KERNEL(
copy, XPU, ALL_LAYOUT, pten::Copy<pten::XPUContext>, ALL_DTYPE) {}
...@@ -14,8 +14,8 @@ ...@@ -14,8 +14,8 @@
#include "paddle/pten/kernels/xpu/manipulation.h" #include "paddle/pten/kernels/xpu/manipulation.h"
#include "paddle/pten/infermeta/unary.h" #include "paddle/pten/infermeta/unary.h"
#include "paddle/pten/kernels/copy_kernel.h"
#include "paddle/pten/kernels/hybird/general/manipulation.h" #include "paddle/pten/kernels/hybird/general/manipulation.h"
#include "paddle/pten/kernels/xpu/utils.h"
namespace pten { namespace pten {
......
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#ifdef PADDLE_WITH_XPU
#include "paddle/pten/core/dense_tensor.h"
#include "paddle/pten/core/kernel_registry.h"
// See Note [ Why still include the fluid headers? ]
#include "paddle/fluid/platform/device_context.h"
namespace pten {
using XPUDeviceContext = paddle::platform::XPUDeviceContext;
void Copy(const XPUDeviceContext& dev_ctx,
const DenseTensor& src,
bool blocking,
DenseTensor* dst);
} // namespace pten
#endif
...@@ -20,7 +20,7 @@ limitations under the License. */ ...@@ -20,7 +20,7 @@ limitations under the License. */
#include "paddle/pten/api/lib/utils/allocator.h" #include "paddle/pten/api/lib/utils/allocator.h"
#include "paddle/pten/core/dense_tensor.h" #include "paddle/pten/core/dense_tensor.h"
#include "paddle/pten/core/kernel_registry.h" #include "paddle/pten/core/kernel_registry.h"
#include "paddle/pten/kernels/gpu/utils.h" #include "paddle/pten/kernels/copy_kernel.h"
namespace paddle { namespace paddle {
namespace tests { namespace tests {
......
...@@ -16,7 +16,7 @@ limitations under the License. */ ...@@ -16,7 +16,7 @@ limitations under the License. */
#include <memory> #include <memory>
#include "paddle/pten/core/kernel_registry.h" #include "paddle/pten/core/kernel_registry.h"
#include "paddle/pten/kernels/cpu/utils.h" #include "paddle/pten/kernels/copy_kernel.h"
#include "paddle/pten/api/lib/utils/allocator.h" #include "paddle/pten/api/lib/utils/allocator.h"
#include "paddle/pten/core/dense_tensor.h" #include "paddle/pten/core/dense_tensor.h"
...@@ -28,8 +28,7 @@ namespace framework = paddle::framework; ...@@ -28,8 +28,7 @@ namespace framework = paddle::framework;
using DDim = paddle::framework::DDim; using DDim = paddle::framework::DDim;
// TODO(YuanRisheng): This TEST file need to be refactored after 'copy' realized // TODO(YuanRisheng): This TEST file need to be refactored after 'copy' realized
// in // in 'paddle/api'
// 'paddle/api',
TEST(DEV_API, copy) { TEST(DEV_API, copy) {
// 1. create tensor // 1. create tensor
const auto alloc = std::make_shared<paddle::experimental::DefaultAllocator>( const auto alloc = std::make_shared<paddle::experimental::DefaultAllocator>(
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册