未验证 提交 3c2420a3 编写于 作者: Y ykkk2333 提交者: GitHub

xpu kernels support api int64 vector inputs, test=kunlun (#49336)

上级 418edae5
...@@ -10,7 +10,7 @@ set(XPU_RT_LIB_NAME "libxpurt.so") ...@@ -10,7 +10,7 @@ set(XPU_RT_LIB_NAME "libxpurt.so")
if(NOT DEFINED XPU_BASE_URL) if(NOT DEFINED XPU_BASE_URL)
set(XPU_BASE_URL_WITHOUT_DATE set(XPU_BASE_URL_WITHOUT_DATE
"https://baidu-kunlun-product.su.bcebos.com/KL-SDK/klsdk-dev") "https://baidu-kunlun-product.su.bcebos.com/KL-SDK/klsdk-dev")
set(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20221215") set(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20221227")
else() else()
set(XPU_BASE_URL "${XPU_BASE_URL}") set(XPU_BASE_URL "${XPU_BASE_URL}")
endif() endif()
......
...@@ -52,22 +52,30 @@ void XPUCompareKernelImpl(const Context& dev_ctx, ...@@ -52,22 +52,30 @@ void XPUCompareKernelImpl(const Context& dev_ctx,
PADDLE_ENFORCE_XDNN_SUCCESS(ret, "compare op"); PADDLE_ENFORCE_XDNN_SUCCESS(ret, "compare op");
} }
#define DEFINE_XPU_COMPARE_KERNEL(name, functor) \ #define DEFINE_XPU_COMPARE_KERNEL(name, functor) \
template <typename T, typename Context> \ template <typename T, typename Context> \
void name##RawKernel(const Context& dev_ctx, \ void name##RawKernel(const Context& dev_ctx, \
const DenseTensor& x, \ const DenseTensor& x, \
const DenseTensor& y, \ const DenseTensor& y, \
int axis, \ int axis, \
DenseTensor* out) { \ DenseTensor* out) { \
using XPUType = typename XPUTypeTrait<T>::Type; \ using XPUType = typename XPUTypeTrait<T>::Type; \
XPUCompareKernelImpl<T, XPUType, Context>(dev_ctx, x, y, out, functor); \ auto f = [](xpu::Context* ctx, \
} \ const XPUType* x, \
template <typename T, typename Context> \ const XPUType* y, \
void name##Kernel(const Context& dev_ctx, \ bool* z, \
const DenseTensor& x, \ const std::vector<int>& xshape, \
const DenseTensor& y, \ const std::vector<int>& yshape) { \
DenseTensor* out) { \ return functor(ctx, x, y, z, xshape, yshape); \
name##RawKernel<T, Context>(dev_ctx, x, y, -1, out); \ }; \
XPUCompareKernelImpl<T, XPUType, Context>(dev_ctx, x, y, out, f); \
} \
template <typename T, typename Context> \
void name##Kernel(const Context& dev_ctx, \
const DenseTensor& x, \
const DenseTensor& y, \
DenseTensor* out) { \
name##RawKernel<T, Context>(dev_ctx, x, y, -1, out); \
} }
DEFINE_XPU_COMPARE_KERNEL(Equal, xpu::broadcast_equal<XPUType>) DEFINE_XPU_COMPARE_KERNEL(Equal, xpu::broadcast_equal<XPUType>)
......
...@@ -54,8 +54,17 @@ void AddRawKernel(const Context& dev_ctx, ...@@ -54,8 +54,17 @@ void AddRawKernel(const Context& dev_ctx,
int axis, int axis,
DenseTensor* out) { DenseTensor* out) {
using XPUType = typename XPUTypeTrait<T>::Type; using XPUType = typename XPUTypeTrait<T>::Type;
XPUElementwise<T, XPUType>(
dev_ctx, x, y, axis, out, xpu::broadcast_add<XPUType>); auto f = [](xpu::Context* ctx,
const XPUType* x,
const XPUType* y,
XPUType* z,
const std::vector<int>& xshape,
const std::vector<int>& yshape) {
return xpu::broadcast_add<XPUType>(ctx, x, y, z, xshape, yshape);
};
XPUElementwise<T, XPUType>(dev_ctx, x, y, axis, out, f);
} }
} // namespace phi } // namespace phi
......
...@@ -35,15 +35,21 @@ void DivideGradKernel(const Context& dev_ctx, ...@@ -35,15 +35,21 @@ void DivideGradKernel(const Context& dev_ctx,
DenseTensor* dy) { DenseTensor* dy) {
using XPUType = typename XPUTypeTrait<T>::Type; using XPUType = typename XPUTypeTrait<T>::Type;
funcs::ElementwiseGradPreProcess(dout, dx); funcs::ElementwiseGradPreProcess(dout, dx);
XPUElementwiseGrad<T, XPUType>(dev_ctx,
x, auto f = [](xpu::Context* ctx,
y, const XPUType* x,
dout, const XPUType* y,
axis, const XPUType* z,
dx, const XPUType* dz,
dy, XPUType* dy,
xpu::broadcast_div_grad<XPUType>, XPUType* dx,
true); const std::vector<int>& xshape,
const std::vector<int>& yshape) {
return xpu::broadcast_div_grad<XPUType>(
ctx, x, y, z, dz, dy, dx, xshape, yshape);
};
XPUElementwiseGrad<T, XPUType>(dev_ctx, x, y, dout, axis, dx, dy, f, true);
} }
} // namespace phi } // namespace phi
......
...@@ -31,8 +31,16 @@ void DivideRawKernel(const Context& dev_ctx, ...@@ -31,8 +31,16 @@ void DivideRawKernel(const Context& dev_ctx,
int axis, int axis,
DenseTensor* out) { DenseTensor* out) {
using XPUType = typename XPUTypeTrait<T>::Type; using XPUType = typename XPUTypeTrait<T>::Type;
XPUElementwise<T, XPUType>( auto f = [](xpu::Context* ctx,
dev_ctx, x, y, axis, out, xpu::broadcast_div<XPUType>); const XPUType* x,
const XPUType* y,
XPUType* z,
const std::vector<int>& xshape,
const std::vector<int>& yshape) {
return xpu::broadcast_div<XPUType>(ctx, x, y, z, xshape, yshape);
};
XPUElementwise<T, XPUType>(dev_ctx, x, y, axis, out, f);
} }
} // namespace phi } // namespace phi
......
...@@ -29,15 +29,21 @@ void MaximumGradKernel(const Context& dev_ctx, ...@@ -29,15 +29,21 @@ void MaximumGradKernel(const Context& dev_ctx,
DenseTensor* dx, DenseTensor* dx,
DenseTensor* dy) { DenseTensor* dy) {
using XPUType = typename XPUTypeTrait<T>::Type; using XPUType = typename XPUTypeTrait<T>::Type;
XPUElementwiseGrad<T, XPUType>(dev_ctx,
x, auto f = [](xpu::Context* ctx,
y, const XPUType* x,
dout, const XPUType* y,
axis, const XPUType* z,
dx, const XPUType* dz,
dy, XPUType* dy,
xpu::broadcast_max_grad<XPUType>, XPUType* dx,
true); const std::vector<int>& xshape,
const std::vector<int>& yshape) {
return xpu::broadcast_max_grad<XPUType>(
ctx, x, y, z, dz, dy, dx, xshape, yshape);
};
XPUElementwiseGrad<T, XPUType>(dev_ctx, x, y, dout, axis, dx, dy, f, true);
} }
template <typename T, typename Context> template <typename T, typename Context>
...@@ -49,15 +55,21 @@ void MinimumGradKernel(const Context& dev_ctx, ...@@ -49,15 +55,21 @@ void MinimumGradKernel(const Context& dev_ctx,
DenseTensor* dx, DenseTensor* dx,
DenseTensor* dy) { DenseTensor* dy) {
using XPUType = typename XPUTypeTrait<T>::Type; using XPUType = typename XPUTypeTrait<T>::Type;
XPUElementwiseGrad<T, XPUType>(dev_ctx,
x, auto f = [](xpu::Context* ctx,
y, const XPUType* x,
dout, const XPUType* y,
axis, const XPUType* z,
dx, const XPUType* dz,
dy, XPUType* dy,
xpu::broadcast_min_grad<XPUType>, XPUType* dx,
true); const std::vector<int>& xshape,
const std::vector<int>& yshape) {
return xpu::broadcast_min_grad<XPUType>(
ctx, x, y, z, dz, dy, dx, xshape, yshape);
};
XPUElementwiseGrad<T, XPUType>(dev_ctx, x, y, dout, axis, dx, dy, f, true);
} }
} // namespace phi } // namespace phi
......
...@@ -27,8 +27,16 @@ void FloorDivideRawKernel(const Context& dev_ctx, ...@@ -27,8 +27,16 @@ void FloorDivideRawKernel(const Context& dev_ctx,
int axis, int axis,
DenseTensor* out) { DenseTensor* out) {
using XPUType = typename XPUTypeTrait<T>::Type; using XPUType = typename XPUTypeTrait<T>::Type;
XPUElementwise<T, XPUType>( auto f = [](xpu::Context* ctx,
dev_ctx, x, y, axis, out, xpu::broadcast_floordiv<XPUType>); const XPUType* x,
const XPUType* y,
XPUType* z,
const std::vector<int>& xshape,
const std::vector<int>& yshape) {
return xpu::broadcast_floordiv<XPUType>(ctx, x, y, z, xshape, yshape);
};
XPUElementwise<T, XPUType>(dev_ctx, x, y, axis, out, f);
} }
template <typename T, typename Context> template <typename T, typename Context>
...@@ -38,8 +46,16 @@ void MaximumRawKernel(const Context& dev_ctx, ...@@ -38,8 +46,16 @@ void MaximumRawKernel(const Context& dev_ctx,
int axis, int axis,
DenseTensor* out) { DenseTensor* out) {
using XPUType = typename XPUTypeTrait<T>::Type; using XPUType = typename XPUTypeTrait<T>::Type;
XPUElementwise<T, XPUType>( auto f = [](xpu::Context* ctx,
dev_ctx, x, y, axis, out, xpu::broadcast_max<XPUType>); const XPUType* x,
const XPUType* y,
XPUType* z,
const std::vector<int>& xshape,
const std::vector<int>& yshape) {
return xpu::broadcast_max<XPUType>(ctx, x, y, z, xshape, yshape);
};
XPUElementwise<T, XPUType>(dev_ctx, x, y, axis, out, f);
} }
template <typename T, typename Context> template <typename T, typename Context>
...@@ -49,8 +65,16 @@ void MinimumRawKernel(const Context& dev_ctx, ...@@ -49,8 +65,16 @@ void MinimumRawKernel(const Context& dev_ctx,
int axis, int axis,
DenseTensor* out) { DenseTensor* out) {
using XPUType = typename XPUTypeTrait<T>::Type; using XPUType = typename XPUTypeTrait<T>::Type;
XPUElementwise<T, XPUType>( auto f = [](xpu::Context* ctx,
dev_ctx, x, y, axis, out, xpu::broadcast_min<XPUType>); const XPUType* x,
const XPUType* y,
XPUType* z,
const std::vector<int>& xshape,
const std::vector<int>& yshape) {
return xpu::broadcast_min<XPUType>(ctx, x, y, z, xshape, yshape);
};
XPUElementwise<T, XPUType>(dev_ctx, x, y, axis, out, f);
} }
template <typename T, typename Context> template <typename T, typename Context>
...@@ -60,8 +84,16 @@ void RemainderRawKernel(const Context& dev_ctx, ...@@ -60,8 +84,16 @@ void RemainderRawKernel(const Context& dev_ctx,
int axis, int axis,
DenseTensor* out) { DenseTensor* out) {
using XPUType = typename XPUTypeTrait<T>::Type; using XPUType = typename XPUTypeTrait<T>::Type;
XPUElementwise<T, XPUType>( auto f = [](xpu::Context* ctx,
dev_ctx, x, y, axis, out, xpu::broadcast_mod<XPUType>); const XPUType* x,
const XPUType* y,
XPUType* z,
const std::vector<int>& xshape,
const std::vector<int>& yshape) {
return xpu::broadcast_mod<XPUType>(ctx, x, y, z, xshape, yshape);
};
XPUElementwise<T, XPUType>(dev_ctx, x, y, axis, out, f);
} }
template <typename T, typename Context> template <typename T, typename Context>
...@@ -71,8 +103,16 @@ void ElementwisePowRawKernel(const Context& dev_ctx, ...@@ -71,8 +103,16 @@ void ElementwisePowRawKernel(const Context& dev_ctx,
int axis, int axis,
DenseTensor* out) { DenseTensor* out) {
using XPUType = typename XPUTypeTrait<T>::Type; using XPUType = typename XPUTypeTrait<T>::Type;
XPUElementwise<T, XPUType>( auto f = [](xpu::Context* ctx,
dev_ctx, x, y, axis, out, xpu::broadcast_pow<XPUType>); const XPUType* x,
const XPUType* y,
XPUType* z,
const std::vector<int>& xshape,
const std::vector<int>& yshape) {
return xpu::broadcast_pow<XPUType>(ctx, x, y, z, xshape, yshape);
};
XPUElementwise<T, XPUType>(dev_ctx, x, y, axis, out, f);
} }
} // namespace phi } // namespace phi
......
...@@ -34,15 +34,20 @@ void MultiplyGradKernel(const Context& dev_ctx, ...@@ -34,15 +34,20 @@ void MultiplyGradKernel(const Context& dev_ctx,
DenseTensor* dy) { DenseTensor* dy) {
using XPUType = typename XPUTypeTrait<T>::Type; using XPUType = typename XPUTypeTrait<T>::Type;
funcs::ElementwiseGradPreProcess(dout, dx); funcs::ElementwiseGradPreProcess(dout, dx);
XPUElementwiseGrad<T, XPUType>(dev_ctx, auto f = [](xpu::Context* ctx,
x, const XPUType* x,
y, const XPUType* y,
dout, const XPUType* z,
axis, const XPUType* dz,
dx, XPUType* dy,
dy, XPUType* dx,
xpu::broadcast_mul_grad<XPUType>, const std::vector<int>& xshape,
true); const std::vector<int>& yshape) {
return xpu::broadcast_mul_grad<XPUType>(
ctx, x, y, z, dz, dy, dx, xshape, yshape);
};
XPUElementwiseGrad<T, XPUType>(dev_ctx, x, y, dout, axis, dx, dy, f, true);
} }
} // namespace phi } // namespace phi
......
...@@ -31,8 +31,16 @@ void MultiplyRawKernel(const Context& dev_ctx, ...@@ -31,8 +31,16 @@ void MultiplyRawKernel(const Context& dev_ctx,
int axis, int axis,
DenseTensor* out) { DenseTensor* out) {
using XPUType = typename XPUTypeTrait<T>::Type; using XPUType = typename XPUTypeTrait<T>::Type;
XPUElementwise<T, XPUType>( auto f = [](xpu::Context* ctx,
dev_ctx, x, y, axis, out, xpu::broadcast_mul<XPUType>); const XPUType* x,
const XPUType* y,
XPUType* z,
const std::vector<int>& xshape,
const std::vector<int>& yshape) {
return xpu::broadcast_mul<XPUType>(ctx, x, y, z, xshape, yshape);
};
XPUElementwise<T, XPUType>(dev_ctx, x, y, axis, out, f);
} }
} // namespace phi } // namespace phi
......
...@@ -28,15 +28,22 @@ void SubtractGradKernel(const Context& dev_ctx, ...@@ -28,15 +28,22 @@ void SubtractGradKernel(const Context& dev_ctx,
DenseTensor* dx, DenseTensor* dx,
DenseTensor* dy) { DenseTensor* dy) {
using XPUType = typename XPUTypeTrait<T>::Type; using XPUType = typename XPUTypeTrait<T>::Type;
phi::XPUElementwiseGrad<T, XPUType>(dev_ctx,
x, auto f = [](xpu::Context* ctx,
y, const XPUType* x,
dout, const XPUType* y,
axis, const XPUType* z,
dx, const XPUType* dz,
dy, XPUType* dy,
xpu::broadcast_sub_grad<XPUType>, XPUType* dx,
false); const std::vector<int>& xshape,
const std::vector<int>& yshape) {
return xpu::broadcast_sub_grad<XPUType>(
ctx, x, y, z, dz, dy, dx, xshape, yshape);
};
phi::XPUElementwiseGrad<T, XPUType>(
dev_ctx, x, y, dout, axis, dx, dy, f, false);
} }
} // namespace phi } // namespace phi
......
...@@ -26,8 +26,16 @@ void SubtractRawKernel(const Context& dev_ctx, ...@@ -26,8 +26,16 @@ void SubtractRawKernel(const Context& dev_ctx,
int axis, int axis,
DenseTensor* out) { DenseTensor* out) {
using XPUType = typename XPUTypeTrait<T>::Type; using XPUType = typename XPUTypeTrait<T>::Type;
phi::XPUElementwise<T, XPUType>( auto f = [](xpu::Context* ctx,
dev_ctx, x, y, axis, out, xpu::broadcast_sub<XPUType>); const XPUType* x,
const XPUType* y,
XPUType* z,
const std::vector<int>& xshape,
const std::vector<int>& yshape) {
return xpu::broadcast_sub<XPUType>(ctx, x, y, z, xshape, yshape);
};
phi::XPUElementwise<T, XPUType>(dev_ctx, x, y, axis, out, f);
} }
} // namespace phi } // namespace phi
......
...@@ -29,13 +29,18 @@ void ProdRawKernel(const Context& dev_ctx, ...@@ -29,13 +29,18 @@ void ProdRawKernel(const Context& dev_ctx,
bool reduce_all, bool reduce_all,
DenseTensor* out) { DenseTensor* out) {
reduce_all = recompute_reduce_all(x, dims, reduce_all); reduce_all = recompute_reduce_all(x, dims, reduce_all);
int r = XPUReduce<Context, T>(dev_ctx, using XPUType = typename XPUTypeTrait<T>::Type;
x,
dims.GetData(), auto f = [](xpu::Context* ctx,
keep_dim, const XPUType* x,
reduce_all, XPUType* y,
out, const std::vector<int>& xdims,
xpu::reduce_prod<T>); const std::vector<int>& reduce_dims) {
return xpu::reduce_prod<XPUType>(ctx, x, y, xdims, reduce_dims);
};
int r = XPUReduce<Context, T>(
dev_ctx, x, dims.GetData(), keep_dim, reduce_all, out, f);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "reduce_prod"); PADDLE_ENFORCE_XDNN_SUCCESS(r, "reduce_prod");
} }
......
...@@ -29,13 +29,17 @@ void MaxRawKernel(const Context& dev_ctx, ...@@ -29,13 +29,17 @@ void MaxRawKernel(const Context& dev_ctx,
bool reduce_all, bool reduce_all,
DenseTensor* out) { DenseTensor* out) {
reduce_all = recompute_reduce_all(x, dims, reduce_all); reduce_all = recompute_reduce_all(x, dims, reduce_all);
int r = XPUReduce<Context, T>(dev_ctx, using XPUType = typename XPUTypeTrait<T>::Type;
x, auto f = [](xpu::Context* ctx,
dims.GetData(), const XPUType* x,
keep_dim, XPUType* y,
reduce_all, const std::vector<int>& xdims,
out, const std::vector<int>& reduce_dims) {
xpu::reduce_max<T>); return xpu::reduce_max<XPUType>(ctx, x, y, xdims, reduce_dims);
};
int r = XPUReduce<Context, XPUType>(
dev_ctx, x, dims.GetData(), keep_dim, reduce_all, out, f);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "reduce_max"); PADDLE_ENFORCE_XDNN_SUCCESS(r, "reduce_max");
} }
......
...@@ -29,13 +29,18 @@ void MeanRawKernel(const Context& dev_ctx, ...@@ -29,13 +29,18 @@ void MeanRawKernel(const Context& dev_ctx,
bool reduce_all, bool reduce_all,
DenseTensor* out) { DenseTensor* out) {
reduce_all = recompute_reduce_all(x, dims, reduce_all); reduce_all = recompute_reduce_all(x, dims, reduce_all);
int r = XPUReduce<Context, T>(dev_ctx, using XPUType = typename XPUTypeTrait<T>::Type;
x, auto f = [](xpu::Context* ctx,
dims.GetData(), const XPUType* x,
keep_dim, XPUType* y,
reduce_all, const std::vector<int>& xdims,
out, const std::vector<int>& reduce_dims) {
xpu::reduce_mean<T>); return xpu::reduce_mean<XPUType>(ctx, x, y, xdims, reduce_dims);
};
int r = XPUReduce<Context, XPUType>(
dev_ctx, x, dims.GetData(), keep_dim, reduce_all, out, f);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "reduce_mean"); PADDLE_ENFORCE_XDNN_SUCCESS(r, "reduce_mean");
} }
......
...@@ -29,13 +29,18 @@ void MinRawKernel(const Context& dev_ctx, ...@@ -29,13 +29,18 @@ void MinRawKernel(const Context& dev_ctx,
bool reduce_all, bool reduce_all,
DenseTensor* out) { DenseTensor* out) {
reduce_all = recompute_reduce_all(x, dims, reduce_all); reduce_all = recompute_reduce_all(x, dims, reduce_all);
int r = XPUReduce<Context, T>(dev_ctx, using XPUType = typename XPUTypeTrait<T>::Type;
x,
dims.GetData(), auto f = [](xpu::Context* ctx,
keep_dim, const XPUType* x,
reduce_all, XPUType* y,
out, const std::vector<int>& xdims,
xpu::reduce_min<T>); const std::vector<int>& reduce_dims) {
return xpu::reduce_min<XPUType>(ctx, x, y, xdims, reduce_dims);
};
int r = XPUReduce<Context, XPUType>(
dev_ctx, x, dims.GetData(), keep_dim, reduce_all, out, f);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "reduce_min"); PADDLE_ENFORCE_XDNN_SUCCESS(r, "reduce_min");
} }
......
...@@ -30,13 +30,17 @@ void SumRawKernel(const Context& dev_ctx, ...@@ -30,13 +30,17 @@ void SumRawKernel(const Context& dev_ctx,
DataType out_dtype, DataType out_dtype,
DenseTensor* out) { DenseTensor* out) {
reduce_all = recompute_reduce_all(x, dims, reduce_all); reduce_all = recompute_reduce_all(x, dims, reduce_all);
int r = XPUReduce<Context, T>(dev_ctx, using XPUType = typename XPUTypeTrait<T>::Type;
x,
dims.GetData(), auto f = [](xpu::Context* ctx,
keep_dim, const XPUType* x,
reduce_all, XPUType* y,
out, const std::vector<int>& xdims,
xpu::reduce_sum<T>); const std::vector<int>& reduce_dims) {
return xpu::reduce_sum<XPUType>(ctx, x, y, xdims, reduce_dims);
};
int r = XPUReduce<Context, XPUType>(
dev_ctx, x, dims.GetData(), keep_dim, reduce_all, out, f);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "reduce_sum"); PADDLE_ENFORCE_XDNN_SUCCESS(r, "reduce_sum");
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册