未验证 提交 be874c08 编写于 作者: Y YuanRisheng 提交者: GitHub

[Pten]Modify registered kernel name (#38109)

* Reduce reshape kernel functions in pten

* delete notes

* fix bugs when compile

* modify register name

* fix compile bugs
上级 047ee26c
......@@ -68,8 +68,7 @@ class ElementwiseAddKernel : public framework::OpKernel<T> {
auto pt_x = paddle::experimental::MakePtenDenseTensor(*x);
auto pt_y = paddle::experimental::MakePtenDenseTensor(*y);
auto pt_z = paddle::experimental::MakePtenDenseTensor(*z);
pten::ElementwiseAdd<T>(dev_ctx, *pt_x.get(), *pt_y.get(), axis,
pt_z.get());
pten::Add<T>(dev_ctx, *pt_x.get(), *pt_y.get(), axis, pt_z.get());
}
};
......
......@@ -62,8 +62,7 @@ class ElementwiseDivKernel : public framework::OpKernel<T> {
auto pt_x = paddle::experimental::MakePtenDenseTensor(*x);
auto pt_y = paddle::experimental::MakePtenDenseTensor(*y);
auto pt_z = paddle::experimental::MakePtenDenseTensor(*z);
pten::ElementwiseDiv<T>(dev_ctx, *pt_x.get(), *pt_y.get(), axis,
pt_z.get());
pten::Divide<T>(dev_ctx, *pt_x.get(), *pt_y.get(), axis, pt_z.get());
}
};
......
......@@ -57,8 +57,7 @@ class ElementwiseMulKernel<platform::CUDADeviceContext, T>
auto pt_x = paddle::experimental::MakePtenDenseTensor(*x_lod);
auto pt_y = paddle::experimental::MakePtenDenseTensor(*y_lod);
auto pt_z = paddle::experimental::MakePtenDenseTensor(*z_lod);
pten::ElementwiseMul<T>(cuda_ctx, *pt_x.get(), *pt_y.get(), axis,
pt_z.get());
pten::Multiply<T>(cuda_ctx, *pt_x.get(), *pt_y.get(), axis, pt_z.get());
} else {
PADDLE_THROW(platform::errors::InvalidArgument(
"X's type[%s] is not supported by elementwise_op. X's type should be "
......
......@@ -129,8 +129,7 @@ class ElementwiseMulKernel : public framework::OpKernel<T> {
auto pt_x = paddle::experimental::MakePtenDenseTensor(*x_lod);
auto pt_y = paddle::experimental::MakePtenDenseTensor(*y);
auto pt_z = paddle::experimental::MakePtenDenseTensor(*z_lod);
pten::ElementwiseMul<T>(dev_ctx, *pt_x.get(), *pt_y.get(), axis,
pt_z.get());
pten::Multiply<T>(dev_ctx, *pt_x.get(), *pt_y.get(), axis, pt_z.get());
} else {
PADDLE_THROW(platform::errors::InvalidArgument(
"X's type[%s] is not supported by elementwise_op. X's type should be "
......
......@@ -56,8 +56,7 @@ class ElementwiseSubKernel : public framework::OpKernel<T> {
auto pt_x = paddle::experimental::MakePtenDenseTensor(*x);
auto pt_y = paddle::experimental::MakePtenDenseTensor(*y);
auto pt_z = paddle::experimental::MakePtenDenseTensor(*z);
pten::ElementwiseSub<T>(dev_ctx, *pt_x.get(), *pt_y.get(), axis,
pt_z.get());
pten::Subtract<T>(dev_ctx, *pt_x.get(), *pt_y.get(), axis, pt_z.get());
}
};
......
......@@ -65,7 +65,7 @@ class FillAnyLikeKernel : public framework::OpKernel<T> {
const auto& dev_ctx = context.template device_context<DeviceContext>();
// call new kernel
pten::FillAnyLike<T>(dev_ctx, value, pt_out.get());
pten::FullLike<T>(dev_ctx, value, pt_out.get());
}
};
......
......@@ -24,7 +24,7 @@ namespace pten {
// TODO(YuanRisheng) This function name should be same as User API name.
// TODO(zyfncg) Automatic code generation
template <typename T, typename ContextT>
DenseTensor FillAnyLike(
DenseTensor FullLike(
const ContextT& dev_ctx,
const DenseTensor& x,
const Scalar& val,
......@@ -36,7 +36,7 @@ DenseTensor FillAnyLike(
pten::make_intrusive<paddle::experimental::SharedStorage>(
dev_ctx.GetPlace()),
std::move(out_meta));
FillAnyLike<T>(dev_ctx, val, &dense_out);
FullLike<T>(dev_ctx, val, &dense_out);
return dense_out;
}
......
......@@ -100,7 +100,7 @@ DenseTensor Add(const ContextT& dev_ctx,
pten::make_intrusive<paddle::experimental::SharedStorage>(
dev_ctx.GetPlace()),
std::move(out_meta));
ElementwiseAdd<T>(dev_ctx, x, y, axis, &dense_out);
Add<T>(dev_ctx, x, y, axis, &dense_out);
return dense_out;
}
......@@ -114,7 +114,7 @@ DenseTensor Subtract(const ContextT& dev_ctx,
pten::make_intrusive<paddle::experimental::SharedStorage>(
dev_ctx.GetPlace()),
std::move(out_meta));
ElementwiseSub<T>(dev_ctx, x, y, axis, &dense_out);
Subtract<T>(dev_ctx, x, y, axis, &dense_out);
return dense_out;
}
......@@ -128,7 +128,7 @@ DenseTensor Divide(const ContextT& dev_ctx,
pten::make_intrusive<paddle::experimental::SharedStorage>(
dev_ctx.GetPlace()),
std::move(out_meta));
ElementwiseDiv<T>(dev_ctx, x, y, axis, &dense_out);
Divide<T>(dev_ctx, x, y, axis, &dense_out);
return dense_out;
}
......@@ -142,7 +142,7 @@ DenseTensor Multiply(const ContextT& dev_ctx,
pten::make_intrusive<paddle::experimental::SharedStorage>(
dev_ctx.GetPlace()),
std::move(out_meta));
ElementwiseMul<T>(dev_ctx, x, y, axis, &dense_out);
Multiply<T>(dev_ctx, x, y, axis, &dense_out);
return dense_out;
}
} // namespace pten
......@@ -20,9 +20,7 @@
namespace pten {
template <typename T>
void FillAnyLike(const CPUContext& dev_ctx,
const Scalar& val,
DenseTensor* out) {
void FullLike(const CPUContext& dev_ctx, const Scalar& val, DenseTensor* out) {
auto value = val.to<float>();
using CommonType = typename std::common_type<
float,
......@@ -51,10 +49,10 @@ void FillAnyLike(const CPUContext& dev_ctx,
}
template <typename T>
void FillConstant(const CPUContext& dev_ctx,
const ScalarArray& shape,
const Scalar& val,
DenseTensor* out) {
void Full(const CPUContext& dev_ctx,
const ScalarArray& shape,
const Scalar& val,
DenseTensor* out) {
out->Resize(paddle::framework::make_ddim(shape.GetData()));
eigen::fill<CPUContext, T>(dev_ctx, out, val.to<T>());
}
......@@ -64,7 +62,7 @@ void FillConstant(const CPUContext& dev_ctx,
PT_REGISTER_KERNEL(full_like,
CPU,
ALL_LAYOUT,
pten::FillAnyLike,
pten::FullLike,
float,
double,
int,
......@@ -75,7 +73,7 @@ PT_REGISTER_KERNEL(full_like,
PT_REGISTER_KERNEL(full,
CPU,
ALL_LAYOUT,
pten::FillConstant,
pten::Full,
float,
double,
uint8_t,
......
......@@ -25,14 +25,12 @@ namespace pten {
using CPUContext = paddle::platform::CPUDeviceContext;
template <typename T>
void FillAnyLike(const CPUContext& dev_ctx,
const Scalar& val,
DenseTensor* out);
void FullLike(const CPUContext& dev_ctx, const Scalar& val, DenseTensor* out);
template <typename T>
void FillConstant(const CPUContext& dev_ctx,
const ScalarArray& shape,
const Scalar& val,
DenseTensor* out);
void Full(const CPUContext& dev_ctx,
const ScalarArray& shape,
const Scalar& val,
DenseTensor* out);
} // namespace pten
......@@ -59,25 +59,25 @@ void Scale(const CPUContext& dev_ctx,
}
template <typename T>
void ElementwiseDiv(const CPUContext& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
int axis,
DenseTensor* out) {
void Divide(const CPUContext& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
int axis,
DenseTensor* out) {
// allocate memory for out
out->mutable_data<T>();
if (x.dims() == y.dims() && std::is_floating_point<T>::value) {
SameDimsElementwiseCompute<general::SameDimsDivFunctor<CPUContext, T>>()(
SameDimsElementwiseCompute<general::SameDimsDivideFunctor<CPUContext, T>>()(
dev_ctx, x, y, out);
} else {
auto x_dims = x.dims();
auto y_dims = y.dims();
if (x_dims.size() >= y_dims.size()) {
ElementwiseCompute<general::DivFunctor<T>, T>(
dev_ctx, x, y, axis, general::DivFunctor<T>(), out);
ElementwiseCompute<general::DivideFunctor<T>, T>(
dev_ctx, x, y, axis, general::DivideFunctor<T>(), out);
} else {
ElementwiseCompute<general::InverseDivFunctor<T>, T>(
dev_ctx, x, y, axis, general::InverseDivFunctor<T>(), out);
ElementwiseCompute<general::InverseDivideFunctor<T>, T>(
dev_ctx, x, y, axis, general::InverseDivideFunctor<T>(), out);
}
}
}
......@@ -95,14 +95,14 @@ void Sum(const CPUContext& dev_ctx,
dev_ctx, x, reduce_all, dims, keep_dim, out_dtype, out);
}
// Create the definition of ElementwiseAdd
// Create the definition of Add
DEFINE_CPU_ELEMENTWISE_OP(Add)
// Create the definition of ElementwiseSub
DEFINE_CPU_ELEMENTWISE_OP(Sub)
// Create the definition of Subtract
DEFINE_CPU_ELEMENTWISE_OP(Subtract)
// Create the definition of ElementwiseMul
DEFINE_CPU_ELEMENTWISE_OP(Mul)
// Create the definition of Multiply
DEFINE_CPU_ELEMENTWISE_OP(Multiply)
} // namespace pten
......@@ -128,7 +128,7 @@ PT_REGISTER_KERNEL(scale,
PT_REGISTER_KERNEL(add,
CPU,
ALL_LAYOUT,
pten::ElementwiseAdd,
pten::Add,
float,
double,
int,
......@@ -138,7 +138,7 @@ PT_REGISTER_KERNEL(add,
PT_REGISTER_KERNEL(subtract,
CPU,
ALL_LAYOUT,
pten::ElementwiseSub,
pten::Subtract,
float,
double,
int,
......@@ -148,7 +148,7 @@ PT_REGISTER_KERNEL(subtract,
PT_REGISTER_KERNEL(divide,
CPU,
ALL_LAYOUT,
pten::ElementwiseDiv,
pten::Divide,
float,
double,
int,
......@@ -158,7 +158,7 @@ PT_REGISTER_KERNEL(divide,
PT_REGISTER_KERNEL(multiply,
CPU,
ALL_LAYOUT,
pten::ElementwiseMul,
pten::Multiply,
float,
double,
int,
......
......@@ -47,32 +47,32 @@ void Scale(const CPUContext& dev_ctx,
DenseTensor* out);
template <typename T>
void ElementwiseAdd(const CPUContext& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
int axis,
DenseTensor* out);
void Add(const CPUContext& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
int axis,
DenseTensor* out);
template <typename T>
void ElementwiseSub(const CPUContext& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
int axis,
DenseTensor* out);
void Subtract(const CPUContext& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
int axis,
DenseTensor* out);
template <typename T>
void ElementwiseDiv(const CPUContext& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
int axis,
DenseTensor* out);
void Divide(const CPUContext& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
int axis,
DenseTensor* out);
template <typename T>
void ElementwiseMul(const CPUContext& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
int axis,
DenseTensor* out);
void Multiply(const CPUContext& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
int axis,
DenseTensor* out);
template <typename T>
void Sum(const CPUContext& dev_ctx,
const DenseTensor& x,
......@@ -87,11 +87,11 @@ void Sum(const CPUContext& dev_ctx,
#define DEFINE_CPU_ELEMENTWISE_OP(name) \
template <typename T> \
void Elementwise##name(const CPUContext& dev_ctx, \
const DenseTensor& x, \
const DenseTensor& y, \
int axis, \
DenseTensor* out) { \
void name(const CPUContext& dev_ctx, \
const DenseTensor& x, \
const DenseTensor& y, \
int axis, \
DenseTensor* out) { \
out->mutable_data<T>(); \
if (x.dims() == y.dims()) { \
SameDimsElementwiseCompute< \
......
......@@ -20,9 +20,7 @@
namespace pten {
template <typename T>
void FillAnyLike(const CUDAContext& dev_ctx,
const Scalar& val,
DenseTensor* out) {
void FullLike(const CUDAContext& dev_ctx, const Scalar& val, DenseTensor* out) {
auto value = val.to<float>();
using CommonType = typename std::common_type<
float,
......@@ -52,10 +50,10 @@ void FillAnyLike(const CUDAContext& dev_ctx,
}
template <typename T>
void FillConstant(const CUDAContext& dev_ctx,
const ScalarArray& shape,
const Scalar& val,
DenseTensor* out) {
void Full(const CUDAContext& dev_ctx,
const ScalarArray& shape,
const Scalar& val,
DenseTensor* out) {
out->Resize(paddle::framework::make_ddim(shape.GetData()));
eigen::fill<CUDAContext, T>(dev_ctx, out, val.to<T>());
}
......@@ -65,7 +63,7 @@ void FillConstant(const CUDAContext& dev_ctx,
PT_REGISTER_KERNEL(full_like,
CUDA,
ALL_LAYOUT,
pten::FillAnyLike,
pten::FullLike,
float,
double,
int,
......@@ -76,7 +74,7 @@ PT_REGISTER_KERNEL(full_like,
PT_REGISTER_KERNEL(full,
CUDA,
ALL_LAYOUT,
pten::FillConstant,
pten::Full,
float,
double,
uint8_t,
......
......@@ -28,15 +28,13 @@ namespace pten {
using CUDAContext = paddle::platform::CUDADeviceContext;
template <typename T>
void FillAnyLike(const CUDAContext& dev_ctx,
const Scalar& val,
DenseTensor* out);
void FullLike(const CUDAContext& dev_ctx, const Scalar& val, DenseTensor* out);
template <typename T>
void FillConstant(const CUDAContext& dev_ctx,
const ScalarArray& shape,
const Scalar& val,
DenseTensor* out);
void Full(const CUDAContext& dev_ctx,
const ScalarArray& shape,
const Scalar& val,
DenseTensor* out);
} // namespace pten
......
......@@ -87,14 +87,14 @@ void Scale(const CUDAContext& dev_ctx,
dev_ctx, x, scale.to<float>(), bias, bias_after_scale, out);
}
// Create the definition of ElementwiseAdd
// Create the definition of Add
DEFINE_CUDA_ELEMENTWISE_OP(Add)
// Create the definition of ElementwiseSub
DEFINE_CUDA_ELEMENTWISE_OP(Sub)
// Create the definition of ElementwiseMul
DEFINE_CUDA_ELEMENTWISE_OP(Mul)
// Create the definition of ElementwiseDiv
DEFINE_CUDA_ELEMENTWISE_OP(Div)
// Create the definition of Subtract
DEFINE_CUDA_ELEMENTWISE_OP(Subtract)
// Create the definition of Multiply
DEFINE_CUDA_ELEMENTWISE_OP(Multiply)
// Create the definition of Divide
DEFINE_CUDA_ELEMENTWISE_OP(Divide)
template <typename T>
void Sum(const CUDAContext& dev_ctx,
......@@ -133,7 +133,7 @@ PT_REGISTER_KERNEL(scale,
PT_REGISTER_KERNEL(add,
CUDA,
ALL_LAYOUT,
pten::ElementwiseAdd,
pten::Add,
float,
double,
int,
......@@ -144,7 +144,7 @@ PT_REGISTER_KERNEL(add,
PT_REGISTER_KERNEL(subtract,
CUDA,
ALL_LAYOUT,
pten::ElementwiseSub,
pten::Subtract,
float,
double,
int,
......@@ -155,7 +155,7 @@ PT_REGISTER_KERNEL(subtract,
PT_REGISTER_KERNEL(divide,
CUDA,
ALL_LAYOUT,
pten::ElementwiseDiv,
pten::Divide,
float,
double,
int,
......@@ -166,7 +166,7 @@ PT_REGISTER_KERNEL(divide,
PT_REGISTER_KERNEL(multiply,
CUDA,
ALL_LAYOUT,
pten::ElementwiseMul,
pten::Multiply,
float,
double,
int,
......
......@@ -49,32 +49,32 @@ void Scale(const CUDAContext& dev_ctx,
DenseTensor* out);
template <typename T>
void ElementwiseAdd(const CUDAContext& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
int axis,
DenseTensor* out);
void Add(const CUDAContext& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
int axis,
DenseTensor* out);
template <typename T>
void ElementwiseSub(const CUDAContext& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
int axis,
DenseTensor* out);
void Subtract(const CUDAContext& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
int axis,
DenseTensor* out);
template <typename T>
void ElementwiseDiv(const CUDAContext& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
int axis,
DenseTensor* out);
void Divide(const CUDAContext& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
int axis,
DenseTensor* out);
template <typename T>
void ElementwiseMul(const CUDAContext& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
int axis,
DenseTensor* out);
void Multiply(const CUDAContext& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
int axis,
DenseTensor* out);
template <typename T>
void Sum(const CUDAContext& dev_ctx,
......@@ -90,11 +90,11 @@ void Sum(const CUDAContext& dev_ctx,
#define DEFINE_CUDA_ELEMENTWISE_OP(name) \
template <typename T> \
void Elementwise##name(const CUDAContext& dev_ctx, \
const DenseTensor& x, \
const DenseTensor& y, \
int axis, \
DenseTensor* out) { \
void name(const CUDAContext& dev_ctx, \
const DenseTensor& x, \
const DenseTensor& y, \
int axis, \
DenseTensor* out) { \
std::vector<const DenseTensor*> inputs; \
std::vector<DenseTensor*> outputs; \
inputs.emplace_back(&x); \
......
......@@ -72,7 +72,7 @@ struct InverseAddFunctor {
// Subtract
template <typename DevCtx, typename T, class Enable = void>
struct SameDimsSubFunctor {
struct SameDimsSubtractFunctor {
void operator()(const DevCtx& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
......@@ -80,7 +80,7 @@ struct SameDimsSubFunctor {
};
template <typename DevCtx, typename T>
struct SameDimsSubFunctor<
struct SameDimsSubtractFunctor<
DevCtx,
T,
typename std::enable_if<std::is_floating_point<T>::value>::type> {
......@@ -93,7 +93,7 @@ struct SameDimsSubFunctor<
};
template <typename DevCtx, typename T>
struct SameDimsSubFunctor<
struct SameDimsSubtractFunctor<
DevCtx,
T,
typename std::enable_if<!std::is_floating_point<T>::value>::type> {
......@@ -106,17 +106,17 @@ struct SameDimsSubFunctor<
};
template <typename T>
struct SubFunctor {
struct SubtractFunctor {
inline HOSTDEVICE T operator()(const T& a, const T& b) const { return a - b; }
};
template <typename T>
struct InverseSubFunctor {
struct InverseSubtractFunctor {
inline HOSTDEVICE T operator()(const T& a, const T& b) const { return b - a; }
};
// Divide
template <typename DevCtx, typename T, class Enable = void>
struct SameDimsDivFunctor {
struct SameDimsDivideFunctor {
void operator()(const DevCtx& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
......@@ -124,7 +124,7 @@ struct SameDimsDivFunctor {
};
template <typename DevCtx, typename T>
struct SameDimsDivFunctor<
struct SameDimsDivideFunctor<
DevCtx,
T,
typename std::enable_if<!std::is_floating_point<T>::value>::type> {
......@@ -133,12 +133,13 @@ struct SameDimsDivFunctor<
const DenseTensor& y,
DenseTensor* z) {
paddle::platform::errors::InvalidArgument(
"If use SameDimsDivFunctor, template args(T) must be floating point. ");
"If use SameDimsDivideFunctor, template args(T) must be floating "
"point. ");
}
};
template <typename DevCtx, typename T>
struct SameDimsDivFunctor<
struct SameDimsDivideFunctor<
DevCtx,
T,
typename std::enable_if<std::is_floating_point<T>::value>::type> {
......@@ -155,13 +156,14 @@ struct SameDimsDivFunctor<
"(floor) divide. Please check the input value."
template <typename T, typename Enable = void>
struct DivFunctor {
struct DivideFunctor {
inline HOSTDEVICE T operator()(const T& a, const T& b) const { return a / b; }
};
template <typename T>
struct DivFunctor<T,
typename std::enable_if<std::is_integral<T>::value>::type> {
struct DivideFunctor<
T,
typename std::enable_if<std::is_integral<T>::value>::type> {
inline HOSTDEVICE T operator()(const T& a, const T& b) const {
// For int32/int64, need to check whether the divison is zero.
PADDLE_ENFORCE(b != 0, DIV_ERROR_INFO);
......@@ -170,13 +172,13 @@ struct DivFunctor<T,
};
template <typename T, typename Enable = void>
struct InverseDivFunctor {
struct InverseDivideFunctor {
inline HOSTDEVICE T operator()(const T& a, const T& b) const { return b / a; }
};
// Multiply
template <typename DevCtx, typename T, class Enable = void>
struct SameDimsMulFunctor {
struct SameDimsMultiplyFunctor {
void operator()(const DevCtx& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
......@@ -184,7 +186,7 @@ struct SameDimsMulFunctor {
};
template <typename DevCtx, typename T>
struct SameDimsMulFunctor<
struct SameDimsMultiplyFunctor<
DevCtx,
T,
typename std::enable_if<std::is_floating_point<T>::value>::type> {
......@@ -197,7 +199,7 @@ struct SameDimsMulFunctor<
};
template <typename DevCtx, typename T>
struct SameDimsMulFunctor<
struct SameDimsMultiplyFunctor<
DevCtx,
T,
typename std::enable_if<!std::is_floating_point<T>::value>::type> {
......@@ -209,11 +211,11 @@ struct SameDimsMulFunctor<
}
};
template <typename T>
struct MulFunctor {
struct MultiplyFunctor {
inline HOSTDEVICE T operator()(const T& a, const T& b) const { return a * b; }
};
template <typename T>
struct InverseMulFunctor {
struct InverseMultiplyFunctor {
inline HOSTDEVICE T operator()(const T& a, const T& b) const { return b * a; }
};
......
......@@ -44,7 +44,7 @@ TEST(DEV_API, fill_any_like) {
auto* dev_ctx = pool.Get(paddle::platform::CPUPlace());
// 2. test API
auto out = pten::FillAnyLike<float>(
auto out = pten::FullLike<float>(
*(static_cast<paddle::platform::CPUDeviceContext*>(dev_ctx)),
dense_x,
val);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册