From 64538c8d12f12c77d2373b66c25e9499e8acb3d0 Mon Sep 17 00:00:00 2001 From: Chen Weihang Date: Tue, 4 Jan 2022 10:17:55 +0800 Subject: [PATCH] [PTen] Move inner empty and cast api to kernel.h (#38587) * move inner cast api to cast_kernel.h * resolve conflit --- paddle/fluid/operators/cast_op.h | 2 +- paddle/pten/include/creation.h | 31 ------------ paddle/pten/include/manipulation.h | 13 ----- paddle/pten/kernels/CMakeLists.txt | 2 + paddle/pten/kernels/cast_kernel.h | 21 ++++++-- paddle/pten/kernels/cpu/cast_kernel.cc | 12 ++--- paddle/pten/kernels/empty_kernel.cc | 16 +++--- paddle/pten/kernels/empty_kernel.h | 49 +++++++++++++++++-- paddle/pten/kernels/gpu/cast_kernel.cu | 12 ++--- .../hybird/cuda/reduce/reduce_cuda_impl.h | 2 +- .../pten/kernels/hybird/general/reduce_impl.h | 2 +- 11 files changed, 86 insertions(+), 76 deletions(-) diff --git a/paddle/fluid/operators/cast_op.h b/paddle/fluid/operators/cast_op.h index 466adfa5f36..4f7fe2854ae 100644 --- a/paddle/fluid/operators/cast_op.h +++ b/paddle/fluid/operators/cast_op.h @@ -71,7 +71,7 @@ class CastOpKernel : public framework::OpKernel { static_cast(out_dtype)); // call new kernel - pten::Cast(dev_ctx, *pt_x.get(), pt_out_dtype, pt_out.get()); + pten::CastKernel(dev_ctx, *pt_x.get(), pt_out_dtype, pt_out.get()); } }; diff --git a/paddle/pten/include/creation.h b/paddle/pten/include/creation.h index 73c5999ca92..fa5bd49ca30 100644 --- a/paddle/pten/include/creation.h +++ b/paddle/pten/include/creation.h @@ -23,37 +23,6 @@ namespace pten { // TODO(YuanRisheng) This function name should be same as User API name. // TODO(zyfncg) Automatic code generation -template -DenseTensor Empty(const ContextT& dev_ctx, - const ScalarArray& shape, - DataType dtype = DataType::FLOAT32, - Backend backend = Backend::CPU, // Is backend needed here? - DataLayout layout = DataLayout::NCHW) { - auto out_meta = CreateInferMeta(shape, dtype, layout); - pten::DenseTensor dense_out( - pten::make_intrusive( - dev_ctx.GetPlace()), - std::move(out_meta)); - Empty(dev_ctx, shape, &dense_out); - return dense_out; -} - -template -DenseTensor EmptyLike( - const ContextT& dev_ctx, - const DenseTensor& x, - DataType dtype = DataType::UNDEFINED, - Backend backend = Backend::UNDEFINED, // Is backend needed here? - DataLayout layout = DataLayout::UNDEFINED) { - auto out_meta = CreateLikeInferMeta(x.meta(), dtype, layout); - pten::DenseTensor dense_out( - pten::make_intrusive( - dev_ctx.GetPlace()), - std::move(out_meta)); - EmptyLike(dev_ctx, &dense_out); - return dense_out; -} - template DenseTensor Full(const ContextT& dev_ctx, const ScalarArray& shape, diff --git a/paddle/pten/include/manipulation.h b/paddle/pten/include/manipulation.h index e317964dd1e..a8625e52f56 100644 --- a/paddle/pten/include/manipulation.h +++ b/paddle/pten/include/manipulation.h @@ -37,19 +37,6 @@ DenseTensor Flatten(const ContextT& dev_ctx, return dense_out; } -template -DenseTensor Cast(const ContextT& dev_ctx, - const DenseTensor& x, - DataType out_dtype) { - auto out_meta = CastInferMeta(x.meta(), out_dtype); - pten::DenseTensor dense_out( - pten::make_intrusive( - dev_ctx.GetPlace()), - std::move(out_meta)); - Cast(dev_ctx, x, out_dtype, &dense_out); - return dense_out; -} - template DenseTensor Reshape(const ContextT& dev_ctx, const DenseTensor& x, diff --git a/paddle/pten/kernels/CMakeLists.txt b/paddle/pten/kernels/CMakeLists.txt index 4c705767f4c..b76d408f89e 100644 --- a/paddle/pten/kernels/CMakeLists.txt +++ b/paddle/pten/kernels/CMakeLists.txt @@ -26,6 +26,8 @@ set_property(GLOBAL PROPERTY PTEN_KERNELS "") set(COMMON_KERNEL_DEPS dense_tensor kernel_context kernel_factory convert_utils) set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} eigen_function blas) +# remove this dep after removing fluid deps on tensor creation +set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} pten_api_utils) set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} infermeta) set(MATH_KERNEL_DEPS ${COMMON_KERNEL_DEPS} cast_kernel copy_kernel pten_transpose_cpu) diff --git a/paddle/pten/kernels/cast_kernel.h b/paddle/pten/kernels/cast_kernel.h index 5243fa05fac..8fdce9cda6f 100644 --- a/paddle/pten/kernels/cast_kernel.h +++ b/paddle/pten/kernels/cast_kernel.h @@ -15,13 +15,24 @@ limitations under the License. */ #pragma once #include "paddle/pten/core/dense_tensor.h" +#include "paddle/pten/kernels/empty_kernel.h" namespace pten { -template -void Cast(const ContextT& dev_ctx, - const DenseTensor& x, - DataType out_dtype, - DenseTensor* out); +template +void CastKernel(const Context& dev_ctx, + const DenseTensor& x, + DataType out_dtype, + DenseTensor* out); + +template +DenseTensor Cast(const Context& dev_ctx, + const DenseTensor& x, + DataType out_dtype) { + auto out_meta = CastInferMeta(x.meta(), out_dtype); + auto dense_out = pten::Empty(dev_ctx, std::move(out_meta)); + CastKernel(dev_ctx, x, out_dtype, &dense_out); + return dense_out; +} } // namespace pten diff --git a/paddle/pten/kernels/cpu/cast_kernel.cc b/paddle/pten/kernels/cpu/cast_kernel.cc index a9964d99eef..c6736cdd1bc 100644 --- a/paddle/pten/kernels/cpu/cast_kernel.cc +++ b/paddle/pten/kernels/cpu/cast_kernel.cc @@ -46,11 +46,11 @@ void CastKernelImpl(const CPUContext& dev_ctx, CastOpTransformFunctor()); } -template -void Cast(const ContextT& dev_ctx, - const DenseTensor& x, - DataType out_dtype, - DenseTensor* out) { +template +void CastKernel(const Context& dev_ctx, + const DenseTensor& x, + DataType out_dtype, + DenseTensor* out) { PD_VISIT_ALL_TYPES(out_dtype, "CastKernelImpl", ([&] { CastKernelImpl(dev_ctx, x, out); })); @@ -61,7 +61,7 @@ void Cast(const ContextT& dev_ctx, PT_REGISTER_CTX_KERNEL(cast, CPU, ALL_LAYOUT, - pten::Cast, + pten::CastKernel, float, double, int, diff --git a/paddle/pten/kernels/empty_kernel.cc b/paddle/pten/kernels/empty_kernel.cc index 4c6d8706e0f..94886806bcc 100644 --- a/paddle/pten/kernels/empty_kernel.cc +++ b/paddle/pten/kernels/empty_kernel.cc @@ -20,14 +20,14 @@ limitations under the License. */ namespace pten { template -void Empty(const ContextT& dev_ctx, - const ScalarArray& shape, - DenseTensor* out) { +void EmptyKernel(const ContextT& dev_ctx, + const ScalarArray& shape, + DenseTensor* out) { out->Resize(paddle::framework::make_ddim(shape.GetData())); } template -void EmptyLike(const ContextT& dev_ctx, DenseTensor* out) { +void EmptyLikeKernel(const ContextT& dev_ctx, DenseTensor* out) { out->mutable_data(); } @@ -36,7 +36,7 @@ void EmptyLike(const ContextT& dev_ctx, DenseTensor* out) { PT_REGISTER_CTX_KERNEL(empty, CPU, ALL_LAYOUT, - pten::Empty, + pten::EmptyKernel, bool, int, int64_t, @@ -47,7 +47,7 @@ PT_REGISTER_CTX_KERNEL(empty, PT_REGISTER_CTX_KERNEL(empty_like, CPU, ALL_LAYOUT, - pten::EmptyLike, + pten::EmptyLikeKernel, bool, int, int64_t, @@ -59,7 +59,7 @@ PT_REGISTER_CTX_KERNEL(empty_like, PT_REGISTER_CTX_KERNEL(empty, GPU, ALL_LAYOUT, - pten::Empty, + pten::EmptyKernel, bool, int, int64_t, @@ -70,7 +70,7 @@ PT_REGISTER_CTX_KERNEL(empty, PT_REGISTER_CTX_KERNEL(empty_like, GPU, ALL_LAYOUT, - pten::EmptyLike, + pten::EmptyLikeKernel, bool, int, int64_t, diff --git a/paddle/pten/kernels/empty_kernel.h b/paddle/pten/kernels/empty_kernel.h index 7aa5a27765a..3249526805b 100644 --- a/paddle/pten/kernels/empty_kernel.h +++ b/paddle/pten/kernels/empty_kernel.h @@ -14,15 +14,56 @@ #pragma once +#include "paddle/pten/api/lib/utils/storage.h" #include "paddle/pten/common/scalar_array.h" #include "paddle/pten/core/dense_tensor.h" +#include "paddle/pten/infermeta/nary.h" +#include "paddle/pten/infermeta/unary.h" namespace pten { -template -void Empty(const ContextT& dev_ctx, const ScalarArray& shape, DenseTensor* out); +template +void EmptyKernel(const Context& dev_ctx, + const ScalarArray& shape, + DenseTensor* out); -template -void EmptyLike(const ContextT& dev_ctx, DenseTensor* out); +template +void EmptyLikeKernel(const Context& dev_ctx, DenseTensor* out); + +// TODO(chenweihang): the tensor creation method need to be replaced later, +// all kernel api call Empty here instead of making tensor self +template +DenseTensor Empty(const Context& dev_ctx, DenseTensorMeta&& meta) { + pten::DenseTensor dense_out( + pten::make_intrusive( + dev_ctx.GetPlace()), + std::move(meta)); + return dense_out; +} + +template +DenseTensor Empty(const Context& dev_ctx, + const ScalarArray& shape, + DataType dtype = DataType::FLOAT32, + Backend backend = Backend::CPU, // Is backend needed here? + DataLayout layout = DataLayout::NCHW) { + auto out_meta = CreateInferMeta(shape, dtype, layout); + auto dense_out = Empty(dev_ctx, std::move(out_meta)); + EmptyKernel(dev_ctx, shape, &dense_out); + return dense_out; +} + +template +DenseTensor EmptyLike( + const Context& dev_ctx, + const DenseTensor& x, + DataType dtype = DataType::UNDEFINED, + Backend backend = Backend::UNDEFINED, // Is backend needed here? + DataLayout layout = DataLayout::UNDEFINED) { + auto out_meta = CreateLikeInferMeta(x.meta(), dtype, layout); + auto dense_out = Empty(dev_ctx, std::move(out_meta)); + EmptyLikeKernel(dev_ctx, &dense_out); + return dense_out; +} } // namespace pten diff --git a/paddle/pten/kernels/gpu/cast_kernel.cu b/paddle/pten/kernels/gpu/cast_kernel.cu index e413a38d5e0..58adbcc6f35 100644 --- a/paddle/pten/kernels/gpu/cast_kernel.cu +++ b/paddle/pten/kernels/gpu/cast_kernel.cu @@ -85,11 +85,11 @@ void CastCUDAKernelImpl(const GPUContext& dev_ctx, CastCUDAKernelImplWithPtr(dev_ctx, in_data, out_data, size); } -template -void Cast(const ContextT& dev_ctx, - const DenseTensor& x, - DataType out_dtype, - DenseTensor* out) { +template +void CastKernel(const Context& dev_ctx, + const DenseTensor& x, + DataType out_dtype, + DenseTensor* out) { PD_VISIT_ALL_TYPES(out_dtype, "CastCUDAKernelImpl", ([&] { CastCUDAKernelImpl(dev_ctx, x, out); })); @@ -101,7 +101,7 @@ void Cast(const ContextT& dev_ctx, PT_REGISTER_CTX_KERNEL(cast, \ GPU, \ ALL_LAYOUT, \ - pten::Cast, \ + pten::CastKernel, \ float, \ double, \ int, \ diff --git a/paddle/pten/kernels/hybird/cuda/reduce/reduce_cuda_impl.h b/paddle/pten/kernels/hybird/cuda/reduce/reduce_cuda_impl.h index d4fdb477633..4cfcad9149a 100644 --- a/paddle/pten/kernels/hybird/cuda/reduce/reduce_cuda_impl.h +++ b/paddle/pten/kernels/hybird/cuda/reduce/reduce_cuda_impl.h @@ -1112,7 +1112,7 @@ void TensorReduceFunctorImpl(const pten::DenseTensor& x, AsyncCopy(x, y); y->Resize(out_dims); } else { - pten::Cast(*dev_ctx, x, y->dtype(), y); + pten::CastKernel(*dev_ctx, x, y->dtype(), y); } return; } diff --git a/paddle/pten/kernels/hybird/general/reduce_impl.h b/paddle/pten/kernels/hybird/general/reduce_impl.h index 5dddccd11f2..631ad7f6125 100644 --- a/paddle/pten/kernels/hybird/general/reduce_impl.h +++ b/paddle/pten/kernels/hybird/general/reduce_impl.h @@ -60,7 +60,7 @@ void Reduce(const DeviceContext& dev_ctx, pten::DenseTensorMeta(out_dtype, x.dims(), x.layout())); // cast x tensor to out_dtype - pten::Cast(dev_ctx, x, out_dtype, &tmp_tensor); + pten::CastKernel(dev_ctx, x, out_dtype, &tmp_tensor); // do reduce sum PD_VISIT_ALL_TYPES( -- GitLab