diff --git a/paddle/fluid/operators/cast_op.h b/paddle/fluid/operators/cast_op.h index 466adfa5f3672853dd3b9e8aa6f1cfae3b0b41aa..4f7fe2854ae874b1fa309074c31fa10cde013850 100644 --- a/paddle/fluid/operators/cast_op.h +++ b/paddle/fluid/operators/cast_op.h @@ -71,7 +71,7 @@ class CastOpKernel : public framework::OpKernel { static_cast(out_dtype)); // call new kernel - pten::Cast(dev_ctx, *pt_x.get(), pt_out_dtype, pt_out.get()); + pten::CastKernel(dev_ctx, *pt_x.get(), pt_out_dtype, pt_out.get()); } }; diff --git a/paddle/pten/include/creation.h b/paddle/pten/include/creation.h index 73c5999ca9247ac3d29c3cb852afd7bec8d3fe86..fa5bd49ca3026f26afdcbb67ac6f50036eded6cc 100644 --- a/paddle/pten/include/creation.h +++ b/paddle/pten/include/creation.h @@ -23,37 +23,6 @@ namespace pten { // TODO(YuanRisheng) This function name should be same as User API name. // TODO(zyfncg) Automatic code generation -template -DenseTensor Empty(const ContextT& dev_ctx, - const ScalarArray& shape, - DataType dtype = DataType::FLOAT32, - Backend backend = Backend::CPU, // Is backend needed here? - DataLayout layout = DataLayout::NCHW) { - auto out_meta = CreateInferMeta(shape, dtype, layout); - pten::DenseTensor dense_out( - pten::make_intrusive( - dev_ctx.GetPlace()), - std::move(out_meta)); - Empty(dev_ctx, shape, &dense_out); - return dense_out; -} - -template -DenseTensor EmptyLike( - const ContextT& dev_ctx, - const DenseTensor& x, - DataType dtype = DataType::UNDEFINED, - Backend backend = Backend::UNDEFINED, // Is backend needed here? - DataLayout layout = DataLayout::UNDEFINED) { - auto out_meta = CreateLikeInferMeta(x.meta(), dtype, layout); - pten::DenseTensor dense_out( - pten::make_intrusive( - dev_ctx.GetPlace()), - std::move(out_meta)); - EmptyLike(dev_ctx, &dense_out); - return dense_out; -} - template DenseTensor Full(const ContextT& dev_ctx, const ScalarArray& shape, diff --git a/paddle/pten/include/manipulation.h b/paddle/pten/include/manipulation.h index e317964dd1e236023ac5bc41c981ea64b84fcbcc..a8625e52f561820b7e300f079f6ac8b3ed16ac59 100644 --- a/paddle/pten/include/manipulation.h +++ b/paddle/pten/include/manipulation.h @@ -37,19 +37,6 @@ DenseTensor Flatten(const ContextT& dev_ctx, return dense_out; } -template -DenseTensor Cast(const ContextT& dev_ctx, - const DenseTensor& x, - DataType out_dtype) { - auto out_meta = CastInferMeta(x.meta(), out_dtype); - pten::DenseTensor dense_out( - pten::make_intrusive( - dev_ctx.GetPlace()), - std::move(out_meta)); - Cast(dev_ctx, x, out_dtype, &dense_out); - return dense_out; -} - template DenseTensor Reshape(const ContextT& dev_ctx, const DenseTensor& x, diff --git a/paddle/pten/kernels/CMakeLists.txt b/paddle/pten/kernels/CMakeLists.txt index 4c705767f4c2ff298ce648a63fdebb78d0f69f18..b76d408f89e85ed16372ee064b872059216c526d 100644 --- a/paddle/pten/kernels/CMakeLists.txt +++ b/paddle/pten/kernels/CMakeLists.txt @@ -26,6 +26,8 @@ set_property(GLOBAL PROPERTY PTEN_KERNELS "") set(COMMON_KERNEL_DEPS dense_tensor kernel_context kernel_factory convert_utils) set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} eigen_function blas) +# remove this dep after removing fluid deps on tensor creation +set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} pten_api_utils) set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} infermeta) set(MATH_KERNEL_DEPS ${COMMON_KERNEL_DEPS} cast_kernel copy_kernel pten_transpose_cpu) diff --git a/paddle/pten/kernels/cast_kernel.h b/paddle/pten/kernels/cast_kernel.h index 5243fa05fac154b16f85c80b9305424c3a43f2cd..8fdce9cda6f1d1c2f4b3ea6bb6a8c0fd592486b8 100644 --- a/paddle/pten/kernels/cast_kernel.h +++ b/paddle/pten/kernels/cast_kernel.h @@ -15,13 +15,24 @@ limitations under the License. */ #pragma once #include "paddle/pten/core/dense_tensor.h" +#include "paddle/pten/kernels/empty_kernel.h" namespace pten { -template -void Cast(const ContextT& dev_ctx, - const DenseTensor& x, - DataType out_dtype, - DenseTensor* out); +template +void CastKernel(const Context& dev_ctx, + const DenseTensor& x, + DataType out_dtype, + DenseTensor* out); + +template +DenseTensor Cast(const Context& dev_ctx, + const DenseTensor& x, + DataType out_dtype) { + auto out_meta = CastInferMeta(x.meta(), out_dtype); + auto dense_out = pten::Empty(dev_ctx, std::move(out_meta)); + CastKernel(dev_ctx, x, out_dtype, &dense_out); + return dense_out; +} } // namespace pten diff --git a/paddle/pten/kernels/cpu/cast_kernel.cc b/paddle/pten/kernels/cpu/cast_kernel.cc index a9964d99eef07e637d2b857b32c13bfb2f6d5875..c6736cdd1bcf0336dc76cffdafd910fd8e49b924 100644 --- a/paddle/pten/kernels/cpu/cast_kernel.cc +++ b/paddle/pten/kernels/cpu/cast_kernel.cc @@ -46,11 +46,11 @@ void CastKernelImpl(const CPUContext& dev_ctx, CastOpTransformFunctor()); } -template -void Cast(const ContextT& dev_ctx, - const DenseTensor& x, - DataType out_dtype, - DenseTensor* out) { +template +void CastKernel(const Context& dev_ctx, + const DenseTensor& x, + DataType out_dtype, + DenseTensor* out) { PD_VISIT_ALL_TYPES(out_dtype, "CastKernelImpl", ([&] { CastKernelImpl(dev_ctx, x, out); })); @@ -61,7 +61,7 @@ void Cast(const ContextT& dev_ctx, PT_REGISTER_CTX_KERNEL(cast, CPU, ALL_LAYOUT, - pten::Cast, + pten::CastKernel, float, double, int, diff --git a/paddle/pten/kernels/empty_kernel.cc b/paddle/pten/kernels/empty_kernel.cc index 4c6d8706e0ff3e6edb03e68158b339f561ca543a..94886806bccf3f0d063ee7463dbe29a1aca076b9 100644 --- a/paddle/pten/kernels/empty_kernel.cc +++ b/paddle/pten/kernels/empty_kernel.cc @@ -20,14 +20,14 @@ limitations under the License. */ namespace pten { template -void Empty(const ContextT& dev_ctx, - const ScalarArray& shape, - DenseTensor* out) { +void EmptyKernel(const ContextT& dev_ctx, + const ScalarArray& shape, + DenseTensor* out) { out->Resize(paddle::framework::make_ddim(shape.GetData())); } template -void EmptyLike(const ContextT& dev_ctx, DenseTensor* out) { +void EmptyLikeKernel(const ContextT& dev_ctx, DenseTensor* out) { out->mutable_data(); } @@ -36,7 +36,7 @@ void EmptyLike(const ContextT& dev_ctx, DenseTensor* out) { PT_REGISTER_CTX_KERNEL(empty, CPU, ALL_LAYOUT, - pten::Empty, + pten::EmptyKernel, bool, int, int64_t, @@ -47,7 +47,7 @@ PT_REGISTER_CTX_KERNEL(empty, PT_REGISTER_CTX_KERNEL(empty_like, CPU, ALL_LAYOUT, - pten::EmptyLike, + pten::EmptyLikeKernel, bool, int, int64_t, @@ -59,7 +59,7 @@ PT_REGISTER_CTX_KERNEL(empty_like, PT_REGISTER_CTX_KERNEL(empty, GPU, ALL_LAYOUT, - pten::Empty, + pten::EmptyKernel, bool, int, int64_t, @@ -70,7 +70,7 @@ PT_REGISTER_CTX_KERNEL(empty, PT_REGISTER_CTX_KERNEL(empty_like, GPU, ALL_LAYOUT, - pten::EmptyLike, + pten::EmptyLikeKernel, bool, int, int64_t, diff --git a/paddle/pten/kernels/empty_kernel.h b/paddle/pten/kernels/empty_kernel.h index 7aa5a27765a198e071d558d51a6b375e5eb54498..3249526805bfb013c3369ab7fc1f8cbbe76fd201 100644 --- a/paddle/pten/kernels/empty_kernel.h +++ b/paddle/pten/kernels/empty_kernel.h @@ -14,15 +14,56 @@ #pragma once +#include "paddle/pten/api/lib/utils/storage.h" #include "paddle/pten/common/scalar_array.h" #include "paddle/pten/core/dense_tensor.h" +#include "paddle/pten/infermeta/nary.h" +#include "paddle/pten/infermeta/unary.h" namespace pten { -template -void Empty(const ContextT& dev_ctx, const ScalarArray& shape, DenseTensor* out); +template +void EmptyKernel(const Context& dev_ctx, + const ScalarArray& shape, + DenseTensor* out); -template -void EmptyLike(const ContextT& dev_ctx, DenseTensor* out); +template +void EmptyLikeKernel(const Context& dev_ctx, DenseTensor* out); + +// TODO(chenweihang): the tensor creation method need to be replaced later, +// all kernel api call Empty here instead of making tensor self +template +DenseTensor Empty(const Context& dev_ctx, DenseTensorMeta&& meta) { + pten::DenseTensor dense_out( + pten::make_intrusive( + dev_ctx.GetPlace()), + std::move(meta)); + return dense_out; +} + +template +DenseTensor Empty(const Context& dev_ctx, + const ScalarArray& shape, + DataType dtype = DataType::FLOAT32, + Backend backend = Backend::CPU, // Is backend needed here? + DataLayout layout = DataLayout::NCHW) { + auto out_meta = CreateInferMeta(shape, dtype, layout); + auto dense_out = Empty(dev_ctx, std::move(out_meta)); + EmptyKernel(dev_ctx, shape, &dense_out); + return dense_out; +} + +template +DenseTensor EmptyLike( + const Context& dev_ctx, + const DenseTensor& x, + DataType dtype = DataType::UNDEFINED, + Backend backend = Backend::UNDEFINED, // Is backend needed here? + DataLayout layout = DataLayout::UNDEFINED) { + auto out_meta = CreateLikeInferMeta(x.meta(), dtype, layout); + auto dense_out = Empty(dev_ctx, std::move(out_meta)); + EmptyLikeKernel(dev_ctx, &dense_out); + return dense_out; +} } // namespace pten diff --git a/paddle/pten/kernels/gpu/cast_kernel.cu b/paddle/pten/kernels/gpu/cast_kernel.cu index e413a38d5e01e39e58674d326ecf661708fafae5..58adbcc6f3599df56301eb9668467df458785eee 100644 --- a/paddle/pten/kernels/gpu/cast_kernel.cu +++ b/paddle/pten/kernels/gpu/cast_kernel.cu @@ -85,11 +85,11 @@ void CastCUDAKernelImpl(const GPUContext& dev_ctx, CastCUDAKernelImplWithPtr(dev_ctx, in_data, out_data, size); } -template -void Cast(const ContextT& dev_ctx, - const DenseTensor& x, - DataType out_dtype, - DenseTensor* out) { +template +void CastKernel(const Context& dev_ctx, + const DenseTensor& x, + DataType out_dtype, + DenseTensor* out) { PD_VISIT_ALL_TYPES(out_dtype, "CastCUDAKernelImpl", ([&] { CastCUDAKernelImpl(dev_ctx, x, out); })); @@ -101,7 +101,7 @@ void Cast(const ContextT& dev_ctx, PT_REGISTER_CTX_KERNEL(cast, \ GPU, \ ALL_LAYOUT, \ - pten::Cast, \ + pten::CastKernel, \ float, \ double, \ int, \ diff --git a/paddle/pten/kernels/hybird/cuda/reduce/reduce_cuda_impl.h b/paddle/pten/kernels/hybird/cuda/reduce/reduce_cuda_impl.h index d4fdb477633a3bccaaf62e701c40573dd9a181e4..4cfcad9149a3f0ba4d14704b8c92c6717062c219 100644 --- a/paddle/pten/kernels/hybird/cuda/reduce/reduce_cuda_impl.h +++ b/paddle/pten/kernels/hybird/cuda/reduce/reduce_cuda_impl.h @@ -1112,7 +1112,7 @@ void TensorReduceFunctorImpl(const pten::DenseTensor& x, AsyncCopy(x, y); y->Resize(out_dims); } else { - pten::Cast(*dev_ctx, x, y->dtype(), y); + pten::CastKernel(*dev_ctx, x, y->dtype(), y); } return; } diff --git a/paddle/pten/kernels/hybird/general/reduce_impl.h b/paddle/pten/kernels/hybird/general/reduce_impl.h index 5dddccd11f2ef066bc16111e979a8dcadef339fb..631ad7f6125bc1bcf199c3836ddf8d20fa6f90df 100644 --- a/paddle/pten/kernels/hybird/general/reduce_impl.h +++ b/paddle/pten/kernels/hybird/general/reduce_impl.h @@ -60,7 +60,7 @@ void Reduce(const DeviceContext& dev_ctx, pten::DenseTensorMeta(out_dtype, x.dims(), x.layout())); // cast x tensor to out_dtype - pten::Cast(dev_ctx, x, out_dtype, &tmp_tensor); + pten::CastKernel(dev_ctx, x, out_dtype, &tmp_tensor); // do reduce sum PD_VISIT_ALL_TYPES(