未验证 提交 3c2420a3 编写于 作者: Y ykkk2333 提交者: GitHub

xpu kernels support api int64 vector inputs, test=kunlun (#49336)

上级 418edae5
......@@ -10,7 +10,7 @@ set(XPU_RT_LIB_NAME "libxpurt.so")
if(NOT DEFINED XPU_BASE_URL)
set(XPU_BASE_URL_WITHOUT_DATE
"https://baidu-kunlun-product.su.bcebos.com/KL-SDK/klsdk-dev")
set(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20221215")
set(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20221227")
else()
set(XPU_BASE_URL "${XPU_BASE_URL}")
endif()
......
......@@ -52,22 +52,30 @@ void XPUCompareKernelImpl(const Context& dev_ctx,
PADDLE_ENFORCE_XDNN_SUCCESS(ret, "compare op");
}
#define DEFINE_XPU_COMPARE_KERNEL(name, functor) \
template <typename T, typename Context> \
void name##RawKernel(const Context& dev_ctx, \
const DenseTensor& x, \
const DenseTensor& y, \
int axis, \
DenseTensor* out) { \
using XPUType = typename XPUTypeTrait<T>::Type; \
XPUCompareKernelImpl<T, XPUType, Context>(dev_ctx, x, y, out, functor); \
} \
template <typename T, typename Context> \
void name##Kernel(const Context& dev_ctx, \
const DenseTensor& x, \
const DenseTensor& y, \
DenseTensor* out) { \
name##RawKernel<T, Context>(dev_ctx, x, y, -1, out); \
#define DEFINE_XPU_COMPARE_KERNEL(name, functor) \
template <typename T, typename Context> \
void name##RawKernel(const Context& dev_ctx, \
const DenseTensor& x, \
const DenseTensor& y, \
int axis, \
DenseTensor* out) { \
using XPUType = typename XPUTypeTrait<T>::Type; \
auto f = [](xpu::Context* ctx, \
const XPUType* x, \
const XPUType* y, \
bool* z, \
const std::vector<int>& xshape, \
const std::vector<int>& yshape) { \
return functor(ctx, x, y, z, xshape, yshape); \
}; \
XPUCompareKernelImpl<T, XPUType, Context>(dev_ctx, x, y, out, f); \
} \
template <typename T, typename Context> \
void name##Kernel(const Context& dev_ctx, \
const DenseTensor& x, \
const DenseTensor& y, \
DenseTensor* out) { \
name##RawKernel<T, Context>(dev_ctx, x, y, -1, out); \
}
DEFINE_XPU_COMPARE_KERNEL(Equal, xpu::broadcast_equal<XPUType>)
......
......@@ -54,8 +54,17 @@ void AddRawKernel(const Context& dev_ctx,
int axis,
DenseTensor* out) {
using XPUType = typename XPUTypeTrait<T>::Type;
XPUElementwise<T, XPUType>(
dev_ctx, x, y, axis, out, xpu::broadcast_add<XPUType>);
auto f = [](xpu::Context* ctx,
const XPUType* x,
const XPUType* y,
XPUType* z,
const std::vector<int>& xshape,
const std::vector<int>& yshape) {
return xpu::broadcast_add<XPUType>(ctx, x, y, z, xshape, yshape);
};
XPUElementwise<T, XPUType>(dev_ctx, x, y, axis, out, f);
}
} // namespace phi
......
......@@ -35,15 +35,21 @@ void DivideGradKernel(const Context& dev_ctx,
DenseTensor* dy) {
using XPUType = typename XPUTypeTrait<T>::Type;
funcs::ElementwiseGradPreProcess(dout, dx);
XPUElementwiseGrad<T, XPUType>(dev_ctx,
x,
y,
dout,
axis,
dx,
dy,
xpu::broadcast_div_grad<XPUType>,
true);
auto f = [](xpu::Context* ctx,
const XPUType* x,
const XPUType* y,
const XPUType* z,
const XPUType* dz,
XPUType* dy,
XPUType* dx,
const std::vector<int>& xshape,
const std::vector<int>& yshape) {
return xpu::broadcast_div_grad<XPUType>(
ctx, x, y, z, dz, dy, dx, xshape, yshape);
};
XPUElementwiseGrad<T, XPUType>(dev_ctx, x, y, dout, axis, dx, dy, f, true);
}
} // namespace phi
......
......@@ -31,8 +31,16 @@ void DivideRawKernel(const Context& dev_ctx,
int axis,
DenseTensor* out) {
using XPUType = typename XPUTypeTrait<T>::Type;
XPUElementwise<T, XPUType>(
dev_ctx, x, y, axis, out, xpu::broadcast_div<XPUType>);
auto f = [](xpu::Context* ctx,
const XPUType* x,
const XPUType* y,
XPUType* z,
const std::vector<int>& xshape,
const std::vector<int>& yshape) {
return xpu::broadcast_div<XPUType>(ctx, x, y, z, xshape, yshape);
};
XPUElementwise<T, XPUType>(dev_ctx, x, y, axis, out, f);
}
} // namespace phi
......
......@@ -29,15 +29,21 @@ void MaximumGradKernel(const Context& dev_ctx,
DenseTensor* dx,
DenseTensor* dy) {
using XPUType = typename XPUTypeTrait<T>::Type;
XPUElementwiseGrad<T, XPUType>(dev_ctx,
x,
y,
dout,
axis,
dx,
dy,
xpu::broadcast_max_grad<XPUType>,
true);
auto f = [](xpu::Context* ctx,
const XPUType* x,
const XPUType* y,
const XPUType* z,
const XPUType* dz,
XPUType* dy,
XPUType* dx,
const std::vector<int>& xshape,
const std::vector<int>& yshape) {
return xpu::broadcast_max_grad<XPUType>(
ctx, x, y, z, dz, dy, dx, xshape, yshape);
};
XPUElementwiseGrad<T, XPUType>(dev_ctx, x, y, dout, axis, dx, dy, f, true);
}
template <typename T, typename Context>
......@@ -49,15 +55,21 @@ void MinimumGradKernel(const Context& dev_ctx,
DenseTensor* dx,
DenseTensor* dy) {
using XPUType = typename XPUTypeTrait<T>::Type;
XPUElementwiseGrad<T, XPUType>(dev_ctx,
x,
y,
dout,
axis,
dx,
dy,
xpu::broadcast_min_grad<XPUType>,
true);
auto f = [](xpu::Context* ctx,
const XPUType* x,
const XPUType* y,
const XPUType* z,
const XPUType* dz,
XPUType* dy,
XPUType* dx,
const std::vector<int>& xshape,
const std::vector<int>& yshape) {
return xpu::broadcast_min_grad<XPUType>(
ctx, x, y, z, dz, dy, dx, xshape, yshape);
};
XPUElementwiseGrad<T, XPUType>(dev_ctx, x, y, dout, axis, dx, dy, f, true);
}
} // namespace phi
......
......@@ -27,8 +27,16 @@ void FloorDivideRawKernel(const Context& dev_ctx,
int axis,
DenseTensor* out) {
using XPUType = typename XPUTypeTrait<T>::Type;
XPUElementwise<T, XPUType>(
dev_ctx, x, y, axis, out, xpu::broadcast_floordiv<XPUType>);
auto f = [](xpu::Context* ctx,
const XPUType* x,
const XPUType* y,
XPUType* z,
const std::vector<int>& xshape,
const std::vector<int>& yshape) {
return xpu::broadcast_floordiv<XPUType>(ctx, x, y, z, xshape, yshape);
};
XPUElementwise<T, XPUType>(dev_ctx, x, y, axis, out, f);
}
template <typename T, typename Context>
......@@ -38,8 +46,16 @@ void MaximumRawKernel(const Context& dev_ctx,
int axis,
DenseTensor* out) {
using XPUType = typename XPUTypeTrait<T>::Type;
XPUElementwise<T, XPUType>(
dev_ctx, x, y, axis, out, xpu::broadcast_max<XPUType>);
auto f = [](xpu::Context* ctx,
const XPUType* x,
const XPUType* y,
XPUType* z,
const std::vector<int>& xshape,
const std::vector<int>& yshape) {
return xpu::broadcast_max<XPUType>(ctx, x, y, z, xshape, yshape);
};
XPUElementwise<T, XPUType>(dev_ctx, x, y, axis, out, f);
}
template <typename T, typename Context>
......@@ -49,8 +65,16 @@ void MinimumRawKernel(const Context& dev_ctx,
int axis,
DenseTensor* out) {
using XPUType = typename XPUTypeTrait<T>::Type;
XPUElementwise<T, XPUType>(
dev_ctx, x, y, axis, out, xpu::broadcast_min<XPUType>);
auto f = [](xpu::Context* ctx,
const XPUType* x,
const XPUType* y,
XPUType* z,
const std::vector<int>& xshape,
const std::vector<int>& yshape) {
return xpu::broadcast_min<XPUType>(ctx, x, y, z, xshape, yshape);
};
XPUElementwise<T, XPUType>(dev_ctx, x, y, axis, out, f);
}
template <typename T, typename Context>
......@@ -60,8 +84,16 @@ void RemainderRawKernel(const Context& dev_ctx,
int axis,
DenseTensor* out) {
using XPUType = typename XPUTypeTrait<T>::Type;
XPUElementwise<T, XPUType>(
dev_ctx, x, y, axis, out, xpu::broadcast_mod<XPUType>);
auto f = [](xpu::Context* ctx,
const XPUType* x,
const XPUType* y,
XPUType* z,
const std::vector<int>& xshape,
const std::vector<int>& yshape) {
return xpu::broadcast_mod<XPUType>(ctx, x, y, z, xshape, yshape);
};
XPUElementwise<T, XPUType>(dev_ctx, x, y, axis, out, f);
}
template <typename T, typename Context>
......@@ -71,8 +103,16 @@ void ElementwisePowRawKernel(const Context& dev_ctx,
int axis,
DenseTensor* out) {
using XPUType = typename XPUTypeTrait<T>::Type;
XPUElementwise<T, XPUType>(
dev_ctx, x, y, axis, out, xpu::broadcast_pow<XPUType>);
auto f = [](xpu::Context* ctx,
const XPUType* x,
const XPUType* y,
XPUType* z,
const std::vector<int>& xshape,
const std::vector<int>& yshape) {
return xpu::broadcast_pow<XPUType>(ctx, x, y, z, xshape, yshape);
};
XPUElementwise<T, XPUType>(dev_ctx, x, y, axis, out, f);
}
} // namespace phi
......
......@@ -34,15 +34,20 @@ void MultiplyGradKernel(const Context& dev_ctx,
DenseTensor* dy) {
using XPUType = typename XPUTypeTrait<T>::Type;
funcs::ElementwiseGradPreProcess(dout, dx);
XPUElementwiseGrad<T, XPUType>(dev_ctx,
x,
y,
dout,
axis,
dx,
dy,
xpu::broadcast_mul_grad<XPUType>,
true);
auto f = [](xpu::Context* ctx,
const XPUType* x,
const XPUType* y,
const XPUType* z,
const XPUType* dz,
XPUType* dy,
XPUType* dx,
const std::vector<int>& xshape,
const std::vector<int>& yshape) {
return xpu::broadcast_mul_grad<XPUType>(
ctx, x, y, z, dz, dy, dx, xshape, yshape);
};
XPUElementwiseGrad<T, XPUType>(dev_ctx, x, y, dout, axis, dx, dy, f, true);
}
} // namespace phi
......
......@@ -31,8 +31,16 @@ void MultiplyRawKernel(const Context& dev_ctx,
int axis,
DenseTensor* out) {
using XPUType = typename XPUTypeTrait<T>::Type;
XPUElementwise<T, XPUType>(
dev_ctx, x, y, axis, out, xpu::broadcast_mul<XPUType>);
auto f = [](xpu::Context* ctx,
const XPUType* x,
const XPUType* y,
XPUType* z,
const std::vector<int>& xshape,
const std::vector<int>& yshape) {
return xpu::broadcast_mul<XPUType>(ctx, x, y, z, xshape, yshape);
};
XPUElementwise<T, XPUType>(dev_ctx, x, y, axis, out, f);
}
} // namespace phi
......
......@@ -28,15 +28,22 @@ void SubtractGradKernel(const Context& dev_ctx,
DenseTensor* dx,
DenseTensor* dy) {
using XPUType = typename XPUTypeTrait<T>::Type;
phi::XPUElementwiseGrad<T, XPUType>(dev_ctx,
x,
y,
dout,
axis,
dx,
dy,
xpu::broadcast_sub_grad<XPUType>,
false);
auto f = [](xpu::Context* ctx,
const XPUType* x,
const XPUType* y,
const XPUType* z,
const XPUType* dz,
XPUType* dy,
XPUType* dx,
const std::vector<int>& xshape,
const std::vector<int>& yshape) {
return xpu::broadcast_sub_grad<XPUType>(
ctx, x, y, z, dz, dy, dx, xshape, yshape);
};
phi::XPUElementwiseGrad<T, XPUType>(
dev_ctx, x, y, dout, axis, dx, dy, f, false);
}
} // namespace phi
......
......@@ -26,8 +26,16 @@ void SubtractRawKernel(const Context& dev_ctx,
int axis,
DenseTensor* out) {
using XPUType = typename XPUTypeTrait<T>::Type;
phi::XPUElementwise<T, XPUType>(
dev_ctx, x, y, axis, out, xpu::broadcast_sub<XPUType>);
auto f = [](xpu::Context* ctx,
const XPUType* x,
const XPUType* y,
XPUType* z,
const std::vector<int>& xshape,
const std::vector<int>& yshape) {
return xpu::broadcast_sub<XPUType>(ctx, x, y, z, xshape, yshape);
};
phi::XPUElementwise<T, XPUType>(dev_ctx, x, y, axis, out, f);
}
} // namespace phi
......
......@@ -29,13 +29,18 @@ void ProdRawKernel(const Context& dev_ctx,
bool reduce_all,
DenseTensor* out) {
reduce_all = recompute_reduce_all(x, dims, reduce_all);
int r = XPUReduce<Context, T>(dev_ctx,
x,
dims.GetData(),
keep_dim,
reduce_all,
out,
xpu::reduce_prod<T>);
using XPUType = typename XPUTypeTrait<T>::Type;
auto f = [](xpu::Context* ctx,
const XPUType* x,
XPUType* y,
const std::vector<int>& xdims,
const std::vector<int>& reduce_dims) {
return xpu::reduce_prod<XPUType>(ctx, x, y, xdims, reduce_dims);
};
int r = XPUReduce<Context, T>(
dev_ctx, x, dims.GetData(), keep_dim, reduce_all, out, f);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "reduce_prod");
}
......
......@@ -29,13 +29,17 @@ void MaxRawKernel(const Context& dev_ctx,
bool reduce_all,
DenseTensor* out) {
reduce_all = recompute_reduce_all(x, dims, reduce_all);
int r = XPUReduce<Context, T>(dev_ctx,
x,
dims.GetData(),
keep_dim,
reduce_all,
out,
xpu::reduce_max<T>);
using XPUType = typename XPUTypeTrait<T>::Type;
auto f = [](xpu::Context* ctx,
const XPUType* x,
XPUType* y,
const std::vector<int>& xdims,
const std::vector<int>& reduce_dims) {
return xpu::reduce_max<XPUType>(ctx, x, y, xdims, reduce_dims);
};
int r = XPUReduce<Context, XPUType>(
dev_ctx, x, dims.GetData(), keep_dim, reduce_all, out, f);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "reduce_max");
}
......
......@@ -29,13 +29,18 @@ void MeanRawKernel(const Context& dev_ctx,
bool reduce_all,
DenseTensor* out) {
reduce_all = recompute_reduce_all(x, dims, reduce_all);
int r = XPUReduce<Context, T>(dev_ctx,
x,
dims.GetData(),
keep_dim,
reduce_all,
out,
xpu::reduce_mean<T>);
using XPUType = typename XPUTypeTrait<T>::Type;
auto f = [](xpu::Context* ctx,
const XPUType* x,
XPUType* y,
const std::vector<int>& xdims,
const std::vector<int>& reduce_dims) {
return xpu::reduce_mean<XPUType>(ctx, x, y, xdims, reduce_dims);
};
int r = XPUReduce<Context, XPUType>(
dev_ctx, x, dims.GetData(), keep_dim, reduce_all, out, f);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "reduce_mean");
}
......
......@@ -29,13 +29,18 @@ void MinRawKernel(const Context& dev_ctx,
bool reduce_all,
DenseTensor* out) {
reduce_all = recompute_reduce_all(x, dims, reduce_all);
int r = XPUReduce<Context, T>(dev_ctx,
x,
dims.GetData(),
keep_dim,
reduce_all,
out,
xpu::reduce_min<T>);
using XPUType = typename XPUTypeTrait<T>::Type;
auto f = [](xpu::Context* ctx,
const XPUType* x,
XPUType* y,
const std::vector<int>& xdims,
const std::vector<int>& reduce_dims) {
return xpu::reduce_min<XPUType>(ctx, x, y, xdims, reduce_dims);
};
int r = XPUReduce<Context, XPUType>(
dev_ctx, x, dims.GetData(), keep_dim, reduce_all, out, f);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "reduce_min");
}
......
......@@ -30,13 +30,17 @@ void SumRawKernel(const Context& dev_ctx,
DataType out_dtype,
DenseTensor* out) {
reduce_all = recompute_reduce_all(x, dims, reduce_all);
int r = XPUReduce<Context, T>(dev_ctx,
x,
dims.GetData(),
keep_dim,
reduce_all,
out,
xpu::reduce_sum<T>);
using XPUType = typename XPUTypeTrait<T>::Type;
auto f = [](xpu::Context* ctx,
const XPUType* x,
XPUType* y,
const std::vector<int>& xdims,
const std::vector<int>& reduce_dims) {
return xpu::reduce_sum<XPUType>(ctx, x, y, xdims, reduce_dims);
};
int r = XPUReduce<Context, XPUType>(
dev_ctx, x, dims.GetData(), keep_dim, reduce_all, out, f);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "reduce_sum");
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册