未验证 提交 64538c8d 编写于 作者: C Chen Weihang 提交者: GitHub

[PTen] Move inner empty and cast api to kernel.h (#38587)

* move inner cast api to cast_kernel.h

* resolve conflit
上级 59888bba
......@@ -71,7 +71,7 @@ class CastOpKernel : public framework::OpKernel<InT> {
static_cast<framework::proto::VarType::Type>(out_dtype));
// call new kernel
pten::Cast<InT>(dev_ctx, *pt_x.get(), pt_out_dtype, pt_out.get());
pten::CastKernel<InT>(dev_ctx, *pt_x.get(), pt_out_dtype, pt_out.get());
}
};
......
......@@ -23,37 +23,6 @@ namespace pten {
// TODO(YuanRisheng) This function name should be same as User API name.
// TODO(zyfncg) Automatic code generation
template <typename T, typename ContextT>
DenseTensor Empty(const ContextT& dev_ctx,
const ScalarArray& shape,
DataType dtype = DataType::FLOAT32,
Backend backend = Backend::CPU, // Is backend needed here?
DataLayout layout = DataLayout::NCHW) {
auto out_meta = CreateInferMeta(shape, dtype, layout);
pten::DenseTensor dense_out(
pten::make_intrusive<paddle::experimental::SharedStorage>(
dev_ctx.GetPlace()),
std::move(out_meta));
Empty<T, ContextT>(dev_ctx, shape, &dense_out);
return dense_out;
}
template <typename T, typename ContextT>
DenseTensor EmptyLike(
const ContextT& dev_ctx,
const DenseTensor& x,
DataType dtype = DataType::UNDEFINED,
Backend backend = Backend::UNDEFINED, // Is backend needed here?
DataLayout layout = DataLayout::UNDEFINED) {
auto out_meta = CreateLikeInferMeta(x.meta(), dtype, layout);
pten::DenseTensor dense_out(
pten::make_intrusive<paddle::experimental::SharedStorage>(
dev_ctx.GetPlace()),
std::move(out_meta));
EmptyLike<T, ContextT>(dev_ctx, &dense_out);
return dense_out;
}
template <typename T, typename ContextT>
DenseTensor Full(const ContextT& dev_ctx,
const ScalarArray& shape,
......
......@@ -37,19 +37,6 @@ DenseTensor Flatten(const ContextT& dev_ctx,
return dense_out;
}
template <typename T, typename ContextT>
DenseTensor Cast(const ContextT& dev_ctx,
const DenseTensor& x,
DataType out_dtype) {
auto out_meta = CastInferMeta(x.meta(), out_dtype);
pten::DenseTensor dense_out(
pten::make_intrusive<paddle::experimental::SharedStorage>(
dev_ctx.GetPlace()),
std::move(out_meta));
Cast<T, ContextT>(dev_ctx, x, out_dtype, &dense_out);
return dense_out;
}
template <typename T, typename ContextT>
DenseTensor Reshape(const ContextT& dev_ctx,
const DenseTensor& x,
......
......@@ -26,6 +26,8 @@ set_property(GLOBAL PROPERTY PTEN_KERNELS "")
set(COMMON_KERNEL_DEPS dense_tensor kernel_context kernel_factory convert_utils)
set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} eigen_function blas)
# remove this dep after removing fluid deps on tensor creation
set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} pten_api_utils)
set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} infermeta)
set(MATH_KERNEL_DEPS ${COMMON_KERNEL_DEPS} cast_kernel copy_kernel pten_transpose_cpu)
......
......@@ -15,13 +15,24 @@ limitations under the License. */
#pragma once
#include "paddle/pten/core/dense_tensor.h"
#include "paddle/pten/kernels/empty_kernel.h"
namespace pten {
template <typename T, typename ContextT>
void Cast(const ContextT& dev_ctx,
const DenseTensor& x,
DataType out_dtype,
DenseTensor* out);
template <typename T, typename Context>
void CastKernel(const Context& dev_ctx,
const DenseTensor& x,
DataType out_dtype,
DenseTensor* out);
template <typename T, typename Context>
DenseTensor Cast(const Context& dev_ctx,
const DenseTensor& x,
DataType out_dtype) {
auto out_meta = CastInferMeta(x.meta(), out_dtype);
auto dense_out = pten::Empty<T, Context>(dev_ctx, std::move(out_meta));
CastKernel<T, Context>(dev_ctx, x, out_dtype, &dense_out);
return dense_out;
}
} // namespace pten
......@@ -46,11 +46,11 @@ void CastKernelImpl(const CPUContext& dev_ctx,
CastOpTransformFunctor<InT, OutT>());
}
template <typename T, typename ContextT>
void Cast(const ContextT& dev_ctx,
const DenseTensor& x,
DataType out_dtype,
DenseTensor* out) {
template <typename T, typename Context>
void CastKernel(const Context& dev_ctx,
const DenseTensor& x,
DataType out_dtype,
DenseTensor* out) {
PD_VISIT_ALL_TYPES(out_dtype, "CastKernelImpl", ([&] {
CastKernelImpl<T, data_t>(dev_ctx, x, out);
}));
......@@ -61,7 +61,7 @@ void Cast(const ContextT& dev_ctx,
PT_REGISTER_CTX_KERNEL(cast,
CPU,
ALL_LAYOUT,
pten::Cast,
pten::CastKernel,
float,
double,
int,
......
......@@ -20,14 +20,14 @@ limitations under the License. */
namespace pten {
template <typename T, typename ContextT>
void Empty(const ContextT& dev_ctx,
const ScalarArray& shape,
DenseTensor* out) {
void EmptyKernel(const ContextT& dev_ctx,
const ScalarArray& shape,
DenseTensor* out) {
out->Resize(paddle::framework::make_ddim(shape.GetData()));
}
template <typename T, typename ContextT>
void EmptyLike(const ContextT& dev_ctx, DenseTensor* out) {
void EmptyLikeKernel(const ContextT& dev_ctx, DenseTensor* out) {
out->mutable_data<T>();
}
......@@ -36,7 +36,7 @@ void EmptyLike(const ContextT& dev_ctx, DenseTensor* out) {
PT_REGISTER_CTX_KERNEL(empty,
CPU,
ALL_LAYOUT,
pten::Empty,
pten::EmptyKernel,
bool,
int,
int64_t,
......@@ -47,7 +47,7 @@ PT_REGISTER_CTX_KERNEL(empty,
PT_REGISTER_CTX_KERNEL(empty_like,
CPU,
ALL_LAYOUT,
pten::EmptyLike,
pten::EmptyLikeKernel,
bool,
int,
int64_t,
......@@ -59,7 +59,7 @@ PT_REGISTER_CTX_KERNEL(empty_like,
PT_REGISTER_CTX_KERNEL(empty,
GPU,
ALL_LAYOUT,
pten::Empty,
pten::EmptyKernel,
bool,
int,
int64_t,
......@@ -70,7 +70,7 @@ PT_REGISTER_CTX_KERNEL(empty,
PT_REGISTER_CTX_KERNEL(empty_like,
GPU,
ALL_LAYOUT,
pten::EmptyLike,
pten::EmptyLikeKernel,
bool,
int,
int64_t,
......
......@@ -14,15 +14,56 @@
#pragma once
#include "paddle/pten/api/lib/utils/storage.h"
#include "paddle/pten/common/scalar_array.h"
#include "paddle/pten/core/dense_tensor.h"
#include "paddle/pten/infermeta/nary.h"
#include "paddle/pten/infermeta/unary.h"
namespace pten {
template <typename T, typename ContextT>
void Empty(const ContextT& dev_ctx, const ScalarArray& shape, DenseTensor* out);
template <typename T, typename Context>
void EmptyKernel(const Context& dev_ctx,
const ScalarArray& shape,
DenseTensor* out);
template <typename T, typename ContextT>
void EmptyLike(const ContextT& dev_ctx, DenseTensor* out);
template <typename T, typename Context>
void EmptyLikeKernel(const Context& dev_ctx, DenseTensor* out);
// TODO(chenweihang): the tensor creation method need to be replaced later,
// all kernel api call Empty here instead of making tensor self
template <typename T, typename Context>
DenseTensor Empty(const Context& dev_ctx, DenseTensorMeta&& meta) {
pten::DenseTensor dense_out(
pten::make_intrusive<paddle::experimental::SharedStorage>(
dev_ctx.GetPlace()),
std::move(meta));
return dense_out;
}
template <typename T, typename Context>
DenseTensor Empty(const Context& dev_ctx,
const ScalarArray& shape,
DataType dtype = DataType::FLOAT32,
Backend backend = Backend::CPU, // Is backend needed here?
DataLayout layout = DataLayout::NCHW) {
auto out_meta = CreateInferMeta(shape, dtype, layout);
auto dense_out = Empty<T, Context>(dev_ctx, std::move(out_meta));
EmptyKernel<T, Context>(dev_ctx, shape, &dense_out);
return dense_out;
}
template <typename T, typename Context>
DenseTensor EmptyLike(
const Context& dev_ctx,
const DenseTensor& x,
DataType dtype = DataType::UNDEFINED,
Backend backend = Backend::UNDEFINED, // Is backend needed here?
DataLayout layout = DataLayout::UNDEFINED) {
auto out_meta = CreateLikeInferMeta(x.meta(), dtype, layout);
auto dense_out = Empty<T, Context>(dev_ctx, std::move(out_meta));
EmptyLikeKernel<T, Context>(dev_ctx, &dense_out);
return dense_out;
}
} // namespace pten
......@@ -85,11 +85,11 @@ void CastCUDAKernelImpl(const GPUContext& dev_ctx,
CastCUDAKernelImplWithPtr(dev_ctx, in_data, out_data, size);
}
template <typename T, typename ContextT>
void Cast(const ContextT& dev_ctx,
const DenseTensor& x,
DataType out_dtype,
DenseTensor* out) {
template <typename T, typename Context>
void CastKernel(const Context& dev_ctx,
const DenseTensor& x,
DataType out_dtype,
DenseTensor* out) {
PD_VISIT_ALL_TYPES(out_dtype, "CastCUDAKernelImpl", ([&] {
CastCUDAKernelImpl<T, data_t>(dev_ctx, x, out);
}));
......@@ -101,7 +101,7 @@ void Cast(const ContextT& dev_ctx,
PT_REGISTER_CTX_KERNEL(cast, \
GPU, \
ALL_LAYOUT, \
pten::Cast, \
pten::CastKernel, \
float, \
double, \
int, \
......
......@@ -1112,7 +1112,7 @@ void TensorReduceFunctorImpl(const pten::DenseTensor& x,
AsyncCopy(x, y);
y->Resize(out_dims);
} else {
pten::Cast<Tx>(*dev_ctx, x, y->dtype(), y);
pten::CastKernel<Tx>(*dev_ctx, x, y->dtype(), y);
}
return;
}
......
......@@ -60,7 +60,7 @@ void Reduce(const DeviceContext& dev_ctx,
pten::DenseTensorMeta(out_dtype, x.dims(), x.layout()));
// cast x tensor to out_dtype
pten::Cast<T, DeviceContext>(dev_ctx, x, out_dtype, &tmp_tensor);
pten::CastKernel<T, DeviceContext>(dev_ctx, x, out_dtype, &tmp_tensor);
// do reduce sum
PD_VISIT_ALL_TYPES(
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册