未验证 提交 73819658 编写于 作者: C Chen Weihang 提交者: GitHub

[PTen] Move copy kernel impl (#38421)

* add register general kernel marco

* move copy kernel impl

* revert needless change

* polish details

* fix xpu compil faild

* fix xpu compile failed

* polish format
上级 e5c7ca48
......@@ -275,7 +275,7 @@ if(WITH_PYTHON)
if(NOT ON_INFER)
cc_library(paddle_eager
SRCS eager.cc eager_functions.cc eager_method.cc eager_properties.cc eager_utils.cc
DEPS eager_api autograd_meta backward grad_node_info pten op_function_common dygraph_function dygraph_node math_cpu linalg_cpu utils_cpu manipulation_cpu accumulation_node global_utils utils python)
DEPS eager_api autograd_meta backward grad_node_info pten op_function_common dygraph_function dygraph_node accumulation_node global_utils utils python)
add_dependencies(paddle_eager eager_codegen)
add_dependencies(paddle_eager eager_op_function_generator_cmd)
list(APPEND PYBIND_DEPS paddle_eager)
......
......@@ -25,6 +25,8 @@ add_subdirectory(tests)
# make an unity target for compile deps
set(PTEN_DEPS convert_utils dense_tensor pten_context kernel_factory kernel_context)
get_property(pten_kernels GLOBAL PROPERTY PTEN_KERNELS)
# keep this message for debug, remove it later if needless
message(STATUS "All standard pten kernels: ${pten_kernels}")
set(PTEN_DEPS ${PTEN_DEPS} ${pten_kernels})
set(PTEN_DEPS ${PTEN_DEPS} math_cpu linalg_cpu manipulation_cpu)
set(PTEN_DEPS ${PTEN_DEPS} nary unary binary)
......
......@@ -33,15 +33,6 @@ set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} eigen_function)
# auto build kernel targets by cmake
register_kernels(EXCLUDES flatten_kernel DEPS ${COMMON_KERNEL_DEPS})
# TODO(chenweihang): auto parse compile deps by include headers later
set(FLATTEN_DEPS ${COMMON_KERNEL_DEPS} utils_cpu unary)
if(WITH_GPU OR WITH_ROCM)
set(FLATTEN_DEPS ${FLATTEN_DEPS} utils_gpu)
elseif(WITH_XPU)
set(FLATTEN_DEPS ${FLATTEN_DEPS} utils_xpu)
endif()
kernel_library(flatten_kernel DEPS ${FLATTEN_DEPS})
get_property(pten_kernels GLOBAL PROPERTY PTEN_KERNELS)
message(STATUS "PTEN_KERNELS: ${pten_kernels}")
kernel_library(flatten_kernel DEPS ${COMMON_KERNEL_DEPS} copy_kernel unary)
copy_if_different(${kernel_declare_file} ${kernel_declare_file_final})
......@@ -14,13 +14,12 @@ limitations under the License. */
#pragma once
#include "paddle/pten/backends/cpu/cpu_context.h"
#include "paddle/pten/core/dense_tensor.h"
#include "paddle/pten/core/kernel_registry.h"
namespace pten {
void Copy(const CPUContext& dev_ctx,
template <typename ContextT>
void Copy(const ContextT& dev_ctx,
const DenseTensor& src,
bool blocking,
DenseTensor* dst);
......
cc_library(math_cpu SRCS math.cc DEPS dense_tensor kernel_context kernel_factory eigen_function blas pten_transpose_cpu)
cc_library(linalg_cpu SRCS linalg.cc DEPS dense_tensor kernel_context kernel_factory)
cc_library(utils_cpu SRCS utils.cc DEPS dense_tensor kernel_context kernel_factory memory convert_utils)
cc_library(manipulation_cpu SRCS manipulation.cc DEPS dense_tensor kernel_context kernel_factory utils_cpu unary)
cc_library(manipulation_cpu SRCS manipulation.cc DEPS dense_tensor kernel_context kernel_factory copy_kernel unary)
......@@ -12,15 +12,21 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/pten/kernels/cpu/utils.h"
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/pten/kernels/copy_kernel.h"
#include "paddle/pten/backends/cpu/cpu_context.h"
#include "paddle/pten/common/data_type.h"
#include "paddle/pten/core/convert_utils.h"
#include "paddle/pten/core/kernel_registry.h"
// See Note [ Why still include the fluid headers? ]
#include "paddle/fluid/memory/memcpy.h"
namespace pten {
// NOTE(chenweihang): blocking is useless in cpu kernel
void Copy(const CPUContext& dev_ctx,
template <typename ContextT>
void Copy(const ContextT& dev_ctx,
const DenseTensor& src,
bool blocking,
DenseTensor* dst) {
......@@ -57,4 +63,5 @@ void Copy(const CPUContext& dev_ctx,
} // namespace pten
PT_REGISTER_NO_TEMPLATE_KERNEL(copy, CPU, ALL_LAYOUT, pten::Copy, ALL_DTYPE) {}
PT_REGISTER_GENERAL_KERNEL(
copy, CPU, ALL_LAYOUT, pten::Copy<pten::CPUContext>, ALL_DTYPE) {}
......@@ -15,7 +15,7 @@
#include "paddle/pten/kernels/cpu/manipulation.h"
#include "paddle/pten/api/ext/dispatch.h"
#include "paddle/pten/infermeta/unary.h"
#include "paddle/pten/kernels/cpu/utils.h"
#include "paddle/pten/kernels/copy_kernel.h"
#include "paddle/pten/kernels/hybird/general/manipulation.h"
#include "paddle/pten/kernels/hybird/math/cast_func.h"
......
......@@ -16,10 +16,8 @@
#include "paddle/pten/backends/all_context.h"
#include "paddle/pten/core/kernel_registry.h"
#include "paddle/pten/infermeta/unary.h"
#include "paddle/pten/kernels/cpu/utils.h"
#include "paddle/pten/kernels/copy_kernel.h"
#include "paddle/pten/kernels/funcs/common_shape.h"
#include "paddle/pten/kernels/gpu/utils.h"
#include "paddle/pten/kernels/xpu/utils.h"
namespace pten {
......
if(WITH_GPU)
nv_library(math_gpu SRCS math.cu DEPS eigen_function dense_tensor convert_utils kernel_context kernel_factory pten_transpose_gpu)
nv_library(linalg_gpu SRCS linalg.cu DEPS eigen_function dense_tensor kernel_context kernel_factory)
nv_library(utils_gpu SRCS utils.cu DEPS dense_tensor kernel_context kernel_factory memory convert_utils)
nv_library(manipulation_gpu SRCS manipulation.cu DEPS dense_tensor kernel_context kernel_factory utils_gpu unary)
nv_library(manipulation_gpu SRCS manipulation.cu DEPS dense_tensor kernel_context kernel_factory copy_kernel unary)
elseif(WITH_ROCM)
hip_library(math_gpu SRCS math.cu DEPS eigen_function dense_tensor convert_utils kernel_context kernel_factory pten_transpose_gpu)
hip_library(linalg_gpu SRCS linalg.cu DEPS eigen_function dense_tensor kernel_context kernel_factory)
hip_library(utils_gpu SRCS utils.cu DEPS dense_tensor kernel_context kernel_factory memory convert_utils)
hip_library(manipulation_gpu SRCS manipulation.cu DEPS dense_tensor kernel_context kernel_factory utils_gpu unary)
hip_library(manipulation_gpu SRCS manipulation.cu DEPS dense_tensor kernel_context kernel_factory copy_kernel unary)
endif()
......@@ -12,15 +12,20 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/pten/kernels/copy_kernel.h"
#include "paddle/pten/backends/gpu/gpu_context.h"
#include "paddle/pten/common/data_type.h"
#include "paddle/pten/core/convert_utils.h"
#include "paddle/pten/core/kernel_registry.h"
#include "paddle/pten/kernels/gpu/utils.h"
// See Note [ Why still include the fluid headers? ]
#include "paddle/fluid/memory/memcpy.h"
namespace pten {
void Copy(const GPUContext& dev_ctx,
template <typename ContextT>
void Copy(const ContextT& dev_ctx,
const DenseTensor& src,
bool blocking,
DenseTensor* dst) {
......@@ -232,6 +237,8 @@ void Copy(const GPUContext& dev_ctx,
}
}
}
} // namespace pten
PT_REGISTER_NO_TEMPLATE_KERNEL(copy, GPU, ALL_LAYOUT, pten::Copy, ALL_DTYPE) {}
PT_REGISTER_GENERAL_KERNEL(
copy, GPU, ALL_LAYOUT, pten::Copy<pten::GPUContext>, ALL_DTYPE) {}
......@@ -14,8 +14,8 @@
#include "paddle/pten/api/ext/dispatch.h"
#include "paddle/pten/infermeta/unary.h"
#include "paddle/pten/kernels/copy_kernel.h"
#include "paddle/pten/kernels/gpu/manipulation.h"
#include "paddle/pten/kernels/gpu/utils.h"
#include "paddle/pten/kernels/hybird/cuda/cast_kernel_impl.h"
#include "paddle/pten/kernels/hybird/general/manipulation.h"
......@@ -85,6 +85,7 @@ PTEN_REGISTER_CAST_CUDA_BASE_TYPE(cast, paddle::platform::bfloat16)
PTEN_REGISTER_CAST_CUDA_BASE_TYPE(cast)
#endif
PT_REGISTER_NO_TEMPLATE_KERNEL(reshape, GPU, ANY, pten::Reshape, ALL_DTYPE) {}
PT_REGISTER_NO_TEMPLATE_KERNEL(
reshape_with_xshape, GPU, ANY, pten::ReshapeWithXShape, ALL_DTYPE) {}
reshape, GPU, ALL_LAYOUT, pten::Reshape, ALL_DTYPE) {}
PT_REGISTER_NO_TEMPLATE_KERNEL(
reshape_with_xshape, GPU, ALL_LAYOUT, pten::ReshapeWithXShape, ALL_DTYPE) {}
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
// CUDA and HIP use same api
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#include "paddle/pten/backends/gpu/gpu_context.h"
#include "paddle/pten/core/dense_tensor.h"
#include "paddle/pten/core/kernel_registry.h"
namespace pten {
void Copy(const GPUContext& dev_ctx,
const DenseTensor& src,
bool blocking,
DenseTensor* dst);
} // namespace pten
#endif
......@@ -41,7 +41,7 @@ namespace cub = hipcub;
#include "paddle/pten/api/ext/dispatch.h"
#include "paddle/pten/core/dense_tensor.h"
#include "paddle/pten/kernels/gpu/utils.h"
#include "paddle/pten/kernels/copy_kernel.h"
#include "paddle/pten/kernels/hybird/math/cast_func.h"
// Reduce split or not, Whether to use ReduceHigherDim
......
cc_library(utils_xpu SRCS utils.cc DEPS dense_tensor kernel_context kernel_factory memory convert_utils)
cc_library(manipulation_xpu SRCS manipulation.cc DEPS dense_tensor kernel_context kernel_factory utils_xpu unary)
cc_library(manipulation_xpu SRCS manipulation.cc DEPS dense_tensor kernel_context kernel_factory copy_kernel unary)
......@@ -12,14 +12,20 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/pten/kernels/xpu/utils.h"
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/pten/kernels/copy_kernel.h"
#include "paddle/pten/backends/xpu/xpu_context.h"
#include "paddle/pten/common/data_type.h"
#include "paddle/pten/core/convert_utils.h"
#include "paddle/pten/core/kernel_registry.h"
// See Note [ Why still include the fluid headers? ]
#include "paddle/fluid/memory/memcpy.h"
namespace pten {
void Copy(const XPUDeviceContext& dev_ctx,
template <typename ContextT>
void Copy(const ContextT& dev_ctx,
const DenseTensor& src,
bool blocking,
DenseTensor* dst) {
......@@ -76,4 +82,5 @@ void Copy(const XPUDeviceContext& dev_ctx,
} // namespace pten
PT_REGISTER_NO_TEMPLATE_KERNEL(copy, XPU, ALL_LAYOUT, pten::Copy, ALL_DTYPE) {}
PT_REGISTER_GENERAL_KERNEL(
copy, XPU, ALL_LAYOUT, pten::Copy<pten::XPUContext>, ALL_DTYPE) {}
......@@ -14,8 +14,8 @@
#include "paddle/pten/kernels/xpu/manipulation.h"
#include "paddle/pten/infermeta/unary.h"
#include "paddle/pten/kernels/copy_kernel.h"
#include "paddle/pten/kernels/hybird/general/manipulation.h"
#include "paddle/pten/kernels/xpu/utils.h"
namespace pten {
......
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#ifdef PADDLE_WITH_XPU
#include "paddle/pten/core/dense_tensor.h"
#include "paddle/pten/core/kernel_registry.h"
// See Note [ Why still include the fluid headers? ]
#include "paddle/fluid/platform/device_context.h"
namespace pten {
using XPUDeviceContext = paddle::platform::XPUDeviceContext;
void Copy(const XPUDeviceContext& dev_ctx,
const DenseTensor& src,
bool blocking,
DenseTensor* dst);
} // namespace pten
#endif
......@@ -20,7 +20,7 @@ limitations under the License. */
#include "paddle/pten/api/lib/utils/allocator.h"
#include "paddle/pten/core/dense_tensor.h"
#include "paddle/pten/core/kernel_registry.h"
#include "paddle/pten/kernels/gpu/utils.h"
#include "paddle/pten/kernels/copy_kernel.h"
namespace paddle {
namespace tests {
......
......@@ -16,7 +16,7 @@ limitations under the License. */
#include <memory>
#include "paddle/pten/core/kernel_registry.h"
#include "paddle/pten/kernels/cpu/utils.h"
#include "paddle/pten/kernels/copy_kernel.h"
#include "paddle/pten/api/lib/utils/allocator.h"
#include "paddle/pten/core/dense_tensor.h"
......@@ -28,8 +28,7 @@ namespace framework = paddle::framework;
using DDim = paddle::framework::DDim;
// TODO(YuanRisheng): This TEST file need to be refactored after 'copy' realized
// in
// 'paddle/api',
// in 'paddle/api'
TEST(DEV_API, copy) {
// 1. create tensor
const auto alloc = std::make_shared<paddle::experimental::DefaultAllocator>(
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册