未验证 提交 64538c8d 编写于 作者: C Chen Weihang 提交者: GitHub

[PTen] Move inner empty and cast api to kernel.h (#38587)

* move inner cast api to cast_kernel.h

* resolve conflit
上级 59888bba
...@@ -71,7 +71,7 @@ class CastOpKernel : public framework::OpKernel<InT> { ...@@ -71,7 +71,7 @@ class CastOpKernel : public framework::OpKernel<InT> {
static_cast<framework::proto::VarType::Type>(out_dtype)); static_cast<framework::proto::VarType::Type>(out_dtype));
// call new kernel // call new kernel
pten::Cast<InT>(dev_ctx, *pt_x.get(), pt_out_dtype, pt_out.get()); pten::CastKernel<InT>(dev_ctx, *pt_x.get(), pt_out_dtype, pt_out.get());
} }
}; };
......
...@@ -23,37 +23,6 @@ namespace pten { ...@@ -23,37 +23,6 @@ namespace pten {
// TODO(YuanRisheng) This function name should be same as User API name. // TODO(YuanRisheng) This function name should be same as User API name.
// TODO(zyfncg) Automatic code generation // TODO(zyfncg) Automatic code generation
template <typename T, typename ContextT>
DenseTensor Empty(const ContextT& dev_ctx,
const ScalarArray& shape,
DataType dtype = DataType::FLOAT32,
Backend backend = Backend::CPU, // Is backend needed here?
DataLayout layout = DataLayout::NCHW) {
auto out_meta = CreateInferMeta(shape, dtype, layout);
pten::DenseTensor dense_out(
pten::make_intrusive<paddle::experimental::SharedStorage>(
dev_ctx.GetPlace()),
std::move(out_meta));
Empty<T, ContextT>(dev_ctx, shape, &dense_out);
return dense_out;
}
template <typename T, typename ContextT>
DenseTensor EmptyLike(
const ContextT& dev_ctx,
const DenseTensor& x,
DataType dtype = DataType::UNDEFINED,
Backend backend = Backend::UNDEFINED, // Is backend needed here?
DataLayout layout = DataLayout::UNDEFINED) {
auto out_meta = CreateLikeInferMeta(x.meta(), dtype, layout);
pten::DenseTensor dense_out(
pten::make_intrusive<paddle::experimental::SharedStorage>(
dev_ctx.GetPlace()),
std::move(out_meta));
EmptyLike<T, ContextT>(dev_ctx, &dense_out);
return dense_out;
}
template <typename T, typename ContextT> template <typename T, typename ContextT>
DenseTensor Full(const ContextT& dev_ctx, DenseTensor Full(const ContextT& dev_ctx,
const ScalarArray& shape, const ScalarArray& shape,
......
...@@ -37,19 +37,6 @@ DenseTensor Flatten(const ContextT& dev_ctx, ...@@ -37,19 +37,6 @@ DenseTensor Flatten(const ContextT& dev_ctx,
return dense_out; return dense_out;
} }
template <typename T, typename ContextT>
DenseTensor Cast(const ContextT& dev_ctx,
const DenseTensor& x,
DataType out_dtype) {
auto out_meta = CastInferMeta(x.meta(), out_dtype);
pten::DenseTensor dense_out(
pten::make_intrusive<paddle::experimental::SharedStorage>(
dev_ctx.GetPlace()),
std::move(out_meta));
Cast<T, ContextT>(dev_ctx, x, out_dtype, &dense_out);
return dense_out;
}
template <typename T, typename ContextT> template <typename T, typename ContextT>
DenseTensor Reshape(const ContextT& dev_ctx, DenseTensor Reshape(const ContextT& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
......
...@@ -26,6 +26,8 @@ set_property(GLOBAL PROPERTY PTEN_KERNELS "") ...@@ -26,6 +26,8 @@ set_property(GLOBAL PROPERTY PTEN_KERNELS "")
set(COMMON_KERNEL_DEPS dense_tensor kernel_context kernel_factory convert_utils) set(COMMON_KERNEL_DEPS dense_tensor kernel_context kernel_factory convert_utils)
set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} eigen_function blas) set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} eigen_function blas)
# remove this dep after removing fluid deps on tensor creation
set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} pten_api_utils)
set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} infermeta) set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} infermeta)
set(MATH_KERNEL_DEPS ${COMMON_KERNEL_DEPS} cast_kernel copy_kernel pten_transpose_cpu) set(MATH_KERNEL_DEPS ${COMMON_KERNEL_DEPS} cast_kernel copy_kernel pten_transpose_cpu)
......
...@@ -15,13 +15,24 @@ limitations under the License. */ ...@@ -15,13 +15,24 @@ limitations under the License. */
#pragma once #pragma once
#include "paddle/pten/core/dense_tensor.h" #include "paddle/pten/core/dense_tensor.h"
#include "paddle/pten/kernels/empty_kernel.h"
namespace pten { namespace pten {
template <typename T, typename ContextT> template <typename T, typename Context>
void Cast(const ContextT& dev_ctx, void CastKernel(const Context& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
DataType out_dtype, DataType out_dtype,
DenseTensor* out); DenseTensor* out);
template <typename T, typename Context>
DenseTensor Cast(const Context& dev_ctx,
const DenseTensor& x,
DataType out_dtype) {
auto out_meta = CastInferMeta(x.meta(), out_dtype);
auto dense_out = pten::Empty<T, Context>(dev_ctx, std::move(out_meta));
CastKernel<T, Context>(dev_ctx, x, out_dtype, &dense_out);
return dense_out;
}
} // namespace pten } // namespace pten
...@@ -46,8 +46,8 @@ void CastKernelImpl(const CPUContext& dev_ctx, ...@@ -46,8 +46,8 @@ void CastKernelImpl(const CPUContext& dev_ctx,
CastOpTransformFunctor<InT, OutT>()); CastOpTransformFunctor<InT, OutT>());
} }
template <typename T, typename ContextT> template <typename T, typename Context>
void Cast(const ContextT& dev_ctx, void CastKernel(const Context& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
DataType out_dtype, DataType out_dtype,
DenseTensor* out) { DenseTensor* out) {
...@@ -61,7 +61,7 @@ void Cast(const ContextT& dev_ctx, ...@@ -61,7 +61,7 @@ void Cast(const ContextT& dev_ctx,
PT_REGISTER_CTX_KERNEL(cast, PT_REGISTER_CTX_KERNEL(cast,
CPU, CPU,
ALL_LAYOUT, ALL_LAYOUT,
pten::Cast, pten::CastKernel,
float, float,
double, double,
int, int,
......
...@@ -20,14 +20,14 @@ limitations under the License. */ ...@@ -20,14 +20,14 @@ limitations under the License. */
namespace pten { namespace pten {
template <typename T, typename ContextT> template <typename T, typename ContextT>
void Empty(const ContextT& dev_ctx, void EmptyKernel(const ContextT& dev_ctx,
const ScalarArray& shape, const ScalarArray& shape,
DenseTensor* out) { DenseTensor* out) {
out->Resize(paddle::framework::make_ddim(shape.GetData())); out->Resize(paddle::framework::make_ddim(shape.GetData()));
} }
template <typename T, typename ContextT> template <typename T, typename ContextT>
void EmptyLike(const ContextT& dev_ctx, DenseTensor* out) { void EmptyLikeKernel(const ContextT& dev_ctx, DenseTensor* out) {
out->mutable_data<T>(); out->mutable_data<T>();
} }
...@@ -36,7 +36,7 @@ void EmptyLike(const ContextT& dev_ctx, DenseTensor* out) { ...@@ -36,7 +36,7 @@ void EmptyLike(const ContextT& dev_ctx, DenseTensor* out) {
PT_REGISTER_CTX_KERNEL(empty, PT_REGISTER_CTX_KERNEL(empty,
CPU, CPU,
ALL_LAYOUT, ALL_LAYOUT,
pten::Empty, pten::EmptyKernel,
bool, bool,
int, int,
int64_t, int64_t,
...@@ -47,7 +47,7 @@ PT_REGISTER_CTX_KERNEL(empty, ...@@ -47,7 +47,7 @@ PT_REGISTER_CTX_KERNEL(empty,
PT_REGISTER_CTX_KERNEL(empty_like, PT_REGISTER_CTX_KERNEL(empty_like,
CPU, CPU,
ALL_LAYOUT, ALL_LAYOUT,
pten::EmptyLike, pten::EmptyLikeKernel,
bool, bool,
int, int,
int64_t, int64_t,
...@@ -59,7 +59,7 @@ PT_REGISTER_CTX_KERNEL(empty_like, ...@@ -59,7 +59,7 @@ PT_REGISTER_CTX_KERNEL(empty_like,
PT_REGISTER_CTX_KERNEL(empty, PT_REGISTER_CTX_KERNEL(empty,
GPU, GPU,
ALL_LAYOUT, ALL_LAYOUT,
pten::Empty, pten::EmptyKernel,
bool, bool,
int, int,
int64_t, int64_t,
...@@ -70,7 +70,7 @@ PT_REGISTER_CTX_KERNEL(empty, ...@@ -70,7 +70,7 @@ PT_REGISTER_CTX_KERNEL(empty,
PT_REGISTER_CTX_KERNEL(empty_like, PT_REGISTER_CTX_KERNEL(empty_like,
GPU, GPU,
ALL_LAYOUT, ALL_LAYOUT,
pten::EmptyLike, pten::EmptyLikeKernel,
bool, bool,
int, int,
int64_t, int64_t,
......
...@@ -14,15 +14,56 @@ ...@@ -14,15 +14,56 @@
#pragma once #pragma once
#include "paddle/pten/api/lib/utils/storage.h"
#include "paddle/pten/common/scalar_array.h" #include "paddle/pten/common/scalar_array.h"
#include "paddle/pten/core/dense_tensor.h" #include "paddle/pten/core/dense_tensor.h"
#include "paddle/pten/infermeta/nary.h"
#include "paddle/pten/infermeta/unary.h"
namespace pten { namespace pten {
template <typename T, typename ContextT> template <typename T, typename Context>
void Empty(const ContextT& dev_ctx, const ScalarArray& shape, DenseTensor* out); void EmptyKernel(const Context& dev_ctx,
const ScalarArray& shape,
DenseTensor* out);
template <typename T, typename ContextT> template <typename T, typename Context>
void EmptyLike(const ContextT& dev_ctx, DenseTensor* out); void EmptyLikeKernel(const Context& dev_ctx, DenseTensor* out);
// TODO(chenweihang): the tensor creation method need to be replaced later,
// all kernel api call Empty here instead of making tensor self
template <typename T, typename Context>
DenseTensor Empty(const Context& dev_ctx, DenseTensorMeta&& meta) {
pten::DenseTensor dense_out(
pten::make_intrusive<paddle::experimental::SharedStorage>(
dev_ctx.GetPlace()),
std::move(meta));
return dense_out;
}
template <typename T, typename Context>
DenseTensor Empty(const Context& dev_ctx,
const ScalarArray& shape,
DataType dtype = DataType::FLOAT32,
Backend backend = Backend::CPU, // Is backend needed here?
DataLayout layout = DataLayout::NCHW) {
auto out_meta = CreateInferMeta(shape, dtype, layout);
auto dense_out = Empty<T, Context>(dev_ctx, std::move(out_meta));
EmptyKernel<T, Context>(dev_ctx, shape, &dense_out);
return dense_out;
}
template <typename T, typename Context>
DenseTensor EmptyLike(
const Context& dev_ctx,
const DenseTensor& x,
DataType dtype = DataType::UNDEFINED,
Backend backend = Backend::UNDEFINED, // Is backend needed here?
DataLayout layout = DataLayout::UNDEFINED) {
auto out_meta = CreateLikeInferMeta(x.meta(), dtype, layout);
auto dense_out = Empty<T, Context>(dev_ctx, std::move(out_meta));
EmptyLikeKernel<T, Context>(dev_ctx, &dense_out);
return dense_out;
}
} // namespace pten } // namespace pten
...@@ -85,8 +85,8 @@ void CastCUDAKernelImpl(const GPUContext& dev_ctx, ...@@ -85,8 +85,8 @@ void CastCUDAKernelImpl(const GPUContext& dev_ctx,
CastCUDAKernelImplWithPtr(dev_ctx, in_data, out_data, size); CastCUDAKernelImplWithPtr(dev_ctx, in_data, out_data, size);
} }
template <typename T, typename ContextT> template <typename T, typename Context>
void Cast(const ContextT& dev_ctx, void CastKernel(const Context& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
DataType out_dtype, DataType out_dtype,
DenseTensor* out) { DenseTensor* out) {
...@@ -101,7 +101,7 @@ void Cast(const ContextT& dev_ctx, ...@@ -101,7 +101,7 @@ void Cast(const ContextT& dev_ctx,
PT_REGISTER_CTX_KERNEL(cast, \ PT_REGISTER_CTX_KERNEL(cast, \
GPU, \ GPU, \
ALL_LAYOUT, \ ALL_LAYOUT, \
pten::Cast, \ pten::CastKernel, \
float, \ float, \
double, \ double, \
int, \ int, \
......
...@@ -1112,7 +1112,7 @@ void TensorReduceFunctorImpl(const pten::DenseTensor& x, ...@@ -1112,7 +1112,7 @@ void TensorReduceFunctorImpl(const pten::DenseTensor& x,
AsyncCopy(x, y); AsyncCopy(x, y);
y->Resize(out_dims); y->Resize(out_dims);
} else { } else {
pten::Cast<Tx>(*dev_ctx, x, y->dtype(), y); pten::CastKernel<Tx>(*dev_ctx, x, y->dtype(), y);
} }
return; return;
} }
......
...@@ -60,7 +60,7 @@ void Reduce(const DeviceContext& dev_ctx, ...@@ -60,7 +60,7 @@ void Reduce(const DeviceContext& dev_ctx,
pten::DenseTensorMeta(out_dtype, x.dims(), x.layout())); pten::DenseTensorMeta(out_dtype, x.dims(), x.layout()));
// cast x tensor to out_dtype // cast x tensor to out_dtype
pten::Cast<T, DeviceContext>(dev_ctx, x, out_dtype, &tmp_tensor); pten::CastKernel<T, DeviceContext>(dev_ctx, x, out_dtype, &tmp_tensor);
// do reduce sum // do reduce sum
PD_VISIT_ALL_TYPES( PD_VISIT_ALL_TYPES(
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册