From 3c2420a39db3e91076789f8041b8a7a64202a971 Mon Sep 17 00:00:00 2001 From: ykkk2333 <77383312+ykkk2333@users.noreply.github.com> Date: Thu, 29 Dec 2022 15:45:57 +0800 Subject: [PATCH] xpu kernels support api int64 vector inputs, test=kunlun (#49336) --- cmake/external/xpu.cmake | 2 +- paddle/phi/kernels/xpu/compare_kernel.cc | 40 ++++++++----- .../phi/kernels/xpu/elementwise_add_kernel.cc | 13 +++- .../xpu/elementwise_divide_grad_kernel.cc | 24 +++++--- .../kernels/xpu/elementwise_divide_kernel.cc | 12 +++- .../kernels/xpu/elementwise_grad_kernel.cc | 48 +++++++++------ paddle/phi/kernels/xpu/elementwise_kernel.cc | 60 +++++++++++++++---- .../xpu/elementwise_multiply_grad_kernel.cc | 23 ++++--- .../xpu/elementwise_multiply_kernel.cc | 12 +++- .../xpu/elementwise_subtract_grad_kernel.cc | 25 +++++--- .../xpu/elementwise_subtract_kernel.cc | 12 +++- paddle/phi/kernels/xpu/prod_kernel.cc | 19 +++--- paddle/phi/kernels/xpu/reduce_max_kernel.cc | 18 +++--- paddle/phi/kernels/xpu/reduce_mean_kernel.cc | 19 +++--- paddle/phi/kernels/xpu/reduce_min_kernel.cc | 19 +++--- paddle/phi/kernels/xpu/reduce_sum_kernel.cc | 18 +++--- 16 files changed, 249 insertions(+), 115 deletions(-) diff --git a/cmake/external/xpu.cmake b/cmake/external/xpu.cmake index daaf066689..5fbfe57cbf 100644 --- a/cmake/external/xpu.cmake +++ b/cmake/external/xpu.cmake @@ -10,7 +10,7 @@ set(XPU_RT_LIB_NAME "libxpurt.so") if(NOT DEFINED XPU_BASE_URL) set(XPU_BASE_URL_WITHOUT_DATE "https://baidu-kunlun-product.su.bcebos.com/KL-SDK/klsdk-dev") - set(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20221215") + set(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20221227") else() set(XPU_BASE_URL "${XPU_BASE_URL}") endif() diff --git a/paddle/phi/kernels/xpu/compare_kernel.cc b/paddle/phi/kernels/xpu/compare_kernel.cc index 7e46b9da64..a433329c92 100644 --- a/paddle/phi/kernels/xpu/compare_kernel.cc +++ b/paddle/phi/kernels/xpu/compare_kernel.cc @@ -52,22 +52,30 @@ void XPUCompareKernelImpl(const Context& dev_ctx, PADDLE_ENFORCE_XDNN_SUCCESS(ret, "compare op"); } -#define DEFINE_XPU_COMPARE_KERNEL(name, functor) \ - template \ - void name##RawKernel(const Context& dev_ctx, \ - const DenseTensor& x, \ - const DenseTensor& y, \ - int axis, \ - DenseTensor* out) { \ - using XPUType = typename XPUTypeTrait::Type; \ - XPUCompareKernelImpl(dev_ctx, x, y, out, functor); \ - } \ - template \ - void name##Kernel(const Context& dev_ctx, \ - const DenseTensor& x, \ - const DenseTensor& y, \ - DenseTensor* out) { \ - name##RawKernel(dev_ctx, x, y, -1, out); \ +#define DEFINE_XPU_COMPARE_KERNEL(name, functor) \ + template \ + void name##RawKernel(const Context& dev_ctx, \ + const DenseTensor& x, \ + const DenseTensor& y, \ + int axis, \ + DenseTensor* out) { \ + using XPUType = typename XPUTypeTrait::Type; \ + auto f = [](xpu::Context* ctx, \ + const XPUType* x, \ + const XPUType* y, \ + bool* z, \ + const std::vector& xshape, \ + const std::vector& yshape) { \ + return functor(ctx, x, y, z, xshape, yshape); \ + }; \ + XPUCompareKernelImpl(dev_ctx, x, y, out, f); \ + } \ + template \ + void name##Kernel(const Context& dev_ctx, \ + const DenseTensor& x, \ + const DenseTensor& y, \ + DenseTensor* out) { \ + name##RawKernel(dev_ctx, x, y, -1, out); \ } DEFINE_XPU_COMPARE_KERNEL(Equal, xpu::broadcast_equal) diff --git a/paddle/phi/kernels/xpu/elementwise_add_kernel.cc b/paddle/phi/kernels/xpu/elementwise_add_kernel.cc index 1e838acaa8..0e19c59d26 100644 --- a/paddle/phi/kernels/xpu/elementwise_add_kernel.cc +++ b/paddle/phi/kernels/xpu/elementwise_add_kernel.cc @@ -54,8 +54,17 @@ void AddRawKernel(const Context& dev_ctx, int axis, DenseTensor* out) { using XPUType = typename XPUTypeTrait::Type; - XPUElementwise( - dev_ctx, x, y, axis, out, xpu::broadcast_add); + + auto f = [](xpu::Context* ctx, + const XPUType* x, + const XPUType* y, + XPUType* z, + const std::vector& xshape, + const std::vector& yshape) { + return xpu::broadcast_add(ctx, x, y, z, xshape, yshape); + }; + + XPUElementwise(dev_ctx, x, y, axis, out, f); } } // namespace phi diff --git a/paddle/phi/kernels/xpu/elementwise_divide_grad_kernel.cc b/paddle/phi/kernels/xpu/elementwise_divide_grad_kernel.cc index 9251286e49..3b20874b5f 100644 --- a/paddle/phi/kernels/xpu/elementwise_divide_grad_kernel.cc +++ b/paddle/phi/kernels/xpu/elementwise_divide_grad_kernel.cc @@ -35,15 +35,21 @@ void DivideGradKernel(const Context& dev_ctx, DenseTensor* dy) { using XPUType = typename XPUTypeTrait::Type; funcs::ElementwiseGradPreProcess(dout, dx); - XPUElementwiseGrad(dev_ctx, - x, - y, - dout, - axis, - dx, - dy, - xpu::broadcast_div_grad, - true); + + auto f = [](xpu::Context* ctx, + const XPUType* x, + const XPUType* y, + const XPUType* z, + const XPUType* dz, + XPUType* dy, + XPUType* dx, + const std::vector& xshape, + const std::vector& yshape) { + return xpu::broadcast_div_grad( + ctx, x, y, z, dz, dy, dx, xshape, yshape); + }; + + XPUElementwiseGrad(dev_ctx, x, y, dout, axis, dx, dy, f, true); } } // namespace phi diff --git a/paddle/phi/kernels/xpu/elementwise_divide_kernel.cc b/paddle/phi/kernels/xpu/elementwise_divide_kernel.cc index ebefd05a02..0ec748b817 100644 --- a/paddle/phi/kernels/xpu/elementwise_divide_kernel.cc +++ b/paddle/phi/kernels/xpu/elementwise_divide_kernel.cc @@ -31,8 +31,16 @@ void DivideRawKernel(const Context& dev_ctx, int axis, DenseTensor* out) { using XPUType = typename XPUTypeTrait::Type; - XPUElementwise( - dev_ctx, x, y, axis, out, xpu::broadcast_div); + auto f = [](xpu::Context* ctx, + const XPUType* x, + const XPUType* y, + XPUType* z, + const std::vector& xshape, + const std::vector& yshape) { + return xpu::broadcast_div(ctx, x, y, z, xshape, yshape); + }; + + XPUElementwise(dev_ctx, x, y, axis, out, f); } } // namespace phi diff --git a/paddle/phi/kernels/xpu/elementwise_grad_kernel.cc b/paddle/phi/kernels/xpu/elementwise_grad_kernel.cc index 9b1d2a6957..47da6b25de 100644 --- a/paddle/phi/kernels/xpu/elementwise_grad_kernel.cc +++ b/paddle/phi/kernels/xpu/elementwise_grad_kernel.cc @@ -29,15 +29,21 @@ void MaximumGradKernel(const Context& dev_ctx, DenseTensor* dx, DenseTensor* dy) { using XPUType = typename XPUTypeTrait::Type; - XPUElementwiseGrad(dev_ctx, - x, - y, - dout, - axis, - dx, - dy, - xpu::broadcast_max_grad, - true); + + auto f = [](xpu::Context* ctx, + const XPUType* x, + const XPUType* y, + const XPUType* z, + const XPUType* dz, + XPUType* dy, + XPUType* dx, + const std::vector& xshape, + const std::vector& yshape) { + return xpu::broadcast_max_grad( + ctx, x, y, z, dz, dy, dx, xshape, yshape); + }; + + XPUElementwiseGrad(dev_ctx, x, y, dout, axis, dx, dy, f, true); } template @@ -49,15 +55,21 @@ void MinimumGradKernel(const Context& dev_ctx, DenseTensor* dx, DenseTensor* dy) { using XPUType = typename XPUTypeTrait::Type; - XPUElementwiseGrad(dev_ctx, - x, - y, - dout, - axis, - dx, - dy, - xpu::broadcast_min_grad, - true); + + auto f = [](xpu::Context* ctx, + const XPUType* x, + const XPUType* y, + const XPUType* z, + const XPUType* dz, + XPUType* dy, + XPUType* dx, + const std::vector& xshape, + const std::vector& yshape) { + return xpu::broadcast_min_grad( + ctx, x, y, z, dz, dy, dx, xshape, yshape); + }; + + XPUElementwiseGrad(dev_ctx, x, y, dout, axis, dx, dy, f, true); } } // namespace phi diff --git a/paddle/phi/kernels/xpu/elementwise_kernel.cc b/paddle/phi/kernels/xpu/elementwise_kernel.cc index 87edfb22e5..f70f9e743a 100644 --- a/paddle/phi/kernels/xpu/elementwise_kernel.cc +++ b/paddle/phi/kernels/xpu/elementwise_kernel.cc @@ -27,8 +27,16 @@ void FloorDivideRawKernel(const Context& dev_ctx, int axis, DenseTensor* out) { using XPUType = typename XPUTypeTrait::Type; - XPUElementwise( - dev_ctx, x, y, axis, out, xpu::broadcast_floordiv); + auto f = [](xpu::Context* ctx, + const XPUType* x, + const XPUType* y, + XPUType* z, + const std::vector& xshape, + const std::vector& yshape) { + return xpu::broadcast_floordiv(ctx, x, y, z, xshape, yshape); + }; + + XPUElementwise(dev_ctx, x, y, axis, out, f); } template @@ -38,8 +46,16 @@ void MaximumRawKernel(const Context& dev_ctx, int axis, DenseTensor* out) { using XPUType = typename XPUTypeTrait::Type; - XPUElementwise( - dev_ctx, x, y, axis, out, xpu::broadcast_max); + auto f = [](xpu::Context* ctx, + const XPUType* x, + const XPUType* y, + XPUType* z, + const std::vector& xshape, + const std::vector& yshape) { + return xpu::broadcast_max(ctx, x, y, z, xshape, yshape); + }; + + XPUElementwise(dev_ctx, x, y, axis, out, f); } template @@ -49,8 +65,16 @@ void MinimumRawKernel(const Context& dev_ctx, int axis, DenseTensor* out) { using XPUType = typename XPUTypeTrait::Type; - XPUElementwise( - dev_ctx, x, y, axis, out, xpu::broadcast_min); + auto f = [](xpu::Context* ctx, + const XPUType* x, + const XPUType* y, + XPUType* z, + const std::vector& xshape, + const std::vector& yshape) { + return xpu::broadcast_min(ctx, x, y, z, xshape, yshape); + }; + + XPUElementwise(dev_ctx, x, y, axis, out, f); } template @@ -60,8 +84,16 @@ void RemainderRawKernel(const Context& dev_ctx, int axis, DenseTensor* out) { using XPUType = typename XPUTypeTrait::Type; - XPUElementwise( - dev_ctx, x, y, axis, out, xpu::broadcast_mod); + auto f = [](xpu::Context* ctx, + const XPUType* x, + const XPUType* y, + XPUType* z, + const std::vector& xshape, + const std::vector& yshape) { + return xpu::broadcast_mod(ctx, x, y, z, xshape, yshape); + }; + + XPUElementwise(dev_ctx, x, y, axis, out, f); } template @@ -71,8 +103,16 @@ void ElementwisePowRawKernel(const Context& dev_ctx, int axis, DenseTensor* out) { using XPUType = typename XPUTypeTrait::Type; - XPUElementwise( - dev_ctx, x, y, axis, out, xpu::broadcast_pow); + auto f = [](xpu::Context* ctx, + const XPUType* x, + const XPUType* y, + XPUType* z, + const std::vector& xshape, + const std::vector& yshape) { + return xpu::broadcast_pow(ctx, x, y, z, xshape, yshape); + }; + + XPUElementwise(dev_ctx, x, y, axis, out, f); } } // namespace phi diff --git a/paddle/phi/kernels/xpu/elementwise_multiply_grad_kernel.cc b/paddle/phi/kernels/xpu/elementwise_multiply_grad_kernel.cc index b111630506..ebe190827d 100644 --- a/paddle/phi/kernels/xpu/elementwise_multiply_grad_kernel.cc +++ b/paddle/phi/kernels/xpu/elementwise_multiply_grad_kernel.cc @@ -34,15 +34,20 @@ void MultiplyGradKernel(const Context& dev_ctx, DenseTensor* dy) { using XPUType = typename XPUTypeTrait::Type; funcs::ElementwiseGradPreProcess(dout, dx); - XPUElementwiseGrad(dev_ctx, - x, - y, - dout, - axis, - dx, - dy, - xpu::broadcast_mul_grad, - true); + auto f = [](xpu::Context* ctx, + const XPUType* x, + const XPUType* y, + const XPUType* z, + const XPUType* dz, + XPUType* dy, + XPUType* dx, + const std::vector& xshape, + const std::vector& yshape) { + return xpu::broadcast_mul_grad( + ctx, x, y, z, dz, dy, dx, xshape, yshape); + }; + + XPUElementwiseGrad(dev_ctx, x, y, dout, axis, dx, dy, f, true); } } // namespace phi diff --git a/paddle/phi/kernels/xpu/elementwise_multiply_kernel.cc b/paddle/phi/kernels/xpu/elementwise_multiply_kernel.cc index e3b62d5394..aa7f525c9b 100644 --- a/paddle/phi/kernels/xpu/elementwise_multiply_kernel.cc +++ b/paddle/phi/kernels/xpu/elementwise_multiply_kernel.cc @@ -31,8 +31,16 @@ void MultiplyRawKernel(const Context& dev_ctx, int axis, DenseTensor* out) { using XPUType = typename XPUTypeTrait::Type; - XPUElementwise( - dev_ctx, x, y, axis, out, xpu::broadcast_mul); + auto f = [](xpu::Context* ctx, + const XPUType* x, + const XPUType* y, + XPUType* z, + const std::vector& xshape, + const std::vector& yshape) { + return xpu::broadcast_mul(ctx, x, y, z, xshape, yshape); + }; + + XPUElementwise(dev_ctx, x, y, axis, out, f); } } // namespace phi diff --git a/paddle/phi/kernels/xpu/elementwise_subtract_grad_kernel.cc b/paddle/phi/kernels/xpu/elementwise_subtract_grad_kernel.cc index 0fb0ced46b..d22b369619 100644 --- a/paddle/phi/kernels/xpu/elementwise_subtract_grad_kernel.cc +++ b/paddle/phi/kernels/xpu/elementwise_subtract_grad_kernel.cc @@ -28,15 +28,22 @@ void SubtractGradKernel(const Context& dev_ctx, DenseTensor* dx, DenseTensor* dy) { using XPUType = typename XPUTypeTrait::Type; - phi::XPUElementwiseGrad(dev_ctx, - x, - y, - dout, - axis, - dx, - dy, - xpu::broadcast_sub_grad, - false); + + auto f = [](xpu::Context* ctx, + const XPUType* x, + const XPUType* y, + const XPUType* z, + const XPUType* dz, + XPUType* dy, + XPUType* dx, + const std::vector& xshape, + const std::vector& yshape) { + return xpu::broadcast_sub_grad( + ctx, x, y, z, dz, dy, dx, xshape, yshape); + }; + + phi::XPUElementwiseGrad( + dev_ctx, x, y, dout, axis, dx, dy, f, false); } } // namespace phi diff --git a/paddle/phi/kernels/xpu/elementwise_subtract_kernel.cc b/paddle/phi/kernels/xpu/elementwise_subtract_kernel.cc index 4e18264d71..866d9cf620 100644 --- a/paddle/phi/kernels/xpu/elementwise_subtract_kernel.cc +++ b/paddle/phi/kernels/xpu/elementwise_subtract_kernel.cc @@ -26,8 +26,16 @@ void SubtractRawKernel(const Context& dev_ctx, int axis, DenseTensor* out) { using XPUType = typename XPUTypeTrait::Type; - phi::XPUElementwise( - dev_ctx, x, y, axis, out, xpu::broadcast_sub); + auto f = [](xpu::Context* ctx, + const XPUType* x, + const XPUType* y, + XPUType* z, + const std::vector& xshape, + const std::vector& yshape) { + return xpu::broadcast_sub(ctx, x, y, z, xshape, yshape); + }; + + phi::XPUElementwise(dev_ctx, x, y, axis, out, f); } } // namespace phi diff --git a/paddle/phi/kernels/xpu/prod_kernel.cc b/paddle/phi/kernels/xpu/prod_kernel.cc index cf237afb22..ebc9abc049 100644 --- a/paddle/phi/kernels/xpu/prod_kernel.cc +++ b/paddle/phi/kernels/xpu/prod_kernel.cc @@ -29,13 +29,18 @@ void ProdRawKernel(const Context& dev_ctx, bool reduce_all, DenseTensor* out) { reduce_all = recompute_reduce_all(x, dims, reduce_all); - int r = XPUReduce(dev_ctx, - x, - dims.GetData(), - keep_dim, - reduce_all, - out, - xpu::reduce_prod); + using XPUType = typename XPUTypeTrait::Type; + + auto f = [](xpu::Context* ctx, + const XPUType* x, + XPUType* y, + const std::vector& xdims, + const std::vector& reduce_dims) { + return xpu::reduce_prod(ctx, x, y, xdims, reduce_dims); + }; + + int r = XPUReduce( + dev_ctx, x, dims.GetData(), keep_dim, reduce_all, out, f); PADDLE_ENFORCE_XDNN_SUCCESS(r, "reduce_prod"); } diff --git a/paddle/phi/kernels/xpu/reduce_max_kernel.cc b/paddle/phi/kernels/xpu/reduce_max_kernel.cc index 8db710a24a..ba4aec72cd 100644 --- a/paddle/phi/kernels/xpu/reduce_max_kernel.cc +++ b/paddle/phi/kernels/xpu/reduce_max_kernel.cc @@ -29,13 +29,17 @@ void MaxRawKernel(const Context& dev_ctx, bool reduce_all, DenseTensor* out) { reduce_all = recompute_reduce_all(x, dims, reduce_all); - int r = XPUReduce(dev_ctx, - x, - dims.GetData(), - keep_dim, - reduce_all, - out, - xpu::reduce_max); + using XPUType = typename XPUTypeTrait::Type; + auto f = [](xpu::Context* ctx, + const XPUType* x, + XPUType* y, + const std::vector& xdims, + const std::vector& reduce_dims) { + return xpu::reduce_max(ctx, x, y, xdims, reduce_dims); + }; + + int r = XPUReduce( + dev_ctx, x, dims.GetData(), keep_dim, reduce_all, out, f); PADDLE_ENFORCE_XDNN_SUCCESS(r, "reduce_max"); } diff --git a/paddle/phi/kernels/xpu/reduce_mean_kernel.cc b/paddle/phi/kernels/xpu/reduce_mean_kernel.cc index d29db35517..b646cd7ebf 100644 --- a/paddle/phi/kernels/xpu/reduce_mean_kernel.cc +++ b/paddle/phi/kernels/xpu/reduce_mean_kernel.cc @@ -29,13 +29,18 @@ void MeanRawKernel(const Context& dev_ctx, bool reduce_all, DenseTensor* out) { reduce_all = recompute_reduce_all(x, dims, reduce_all); - int r = XPUReduce(dev_ctx, - x, - dims.GetData(), - keep_dim, - reduce_all, - out, - xpu::reduce_mean); + using XPUType = typename XPUTypeTrait::Type; + auto f = [](xpu::Context* ctx, + const XPUType* x, + XPUType* y, + const std::vector& xdims, + const std::vector& reduce_dims) { + return xpu::reduce_mean(ctx, x, y, xdims, reduce_dims); + }; + + int r = XPUReduce( + dev_ctx, x, dims.GetData(), keep_dim, reduce_all, out, f); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "reduce_mean"); } diff --git a/paddle/phi/kernels/xpu/reduce_min_kernel.cc b/paddle/phi/kernels/xpu/reduce_min_kernel.cc index e330e30bec..e5294e4353 100644 --- a/paddle/phi/kernels/xpu/reduce_min_kernel.cc +++ b/paddle/phi/kernels/xpu/reduce_min_kernel.cc @@ -29,13 +29,18 @@ void MinRawKernel(const Context& dev_ctx, bool reduce_all, DenseTensor* out) { reduce_all = recompute_reduce_all(x, dims, reduce_all); - int r = XPUReduce(dev_ctx, - x, - dims.GetData(), - keep_dim, - reduce_all, - out, - xpu::reduce_min); + using XPUType = typename XPUTypeTrait::Type; + + auto f = [](xpu::Context* ctx, + const XPUType* x, + XPUType* y, + const std::vector& xdims, + const std::vector& reduce_dims) { + return xpu::reduce_min(ctx, x, y, xdims, reduce_dims); + }; + + int r = XPUReduce( + dev_ctx, x, dims.GetData(), keep_dim, reduce_all, out, f); PADDLE_ENFORCE_XDNN_SUCCESS(r, "reduce_min"); } diff --git a/paddle/phi/kernels/xpu/reduce_sum_kernel.cc b/paddle/phi/kernels/xpu/reduce_sum_kernel.cc index 952ed101cd..ac13dc3de3 100644 --- a/paddle/phi/kernels/xpu/reduce_sum_kernel.cc +++ b/paddle/phi/kernels/xpu/reduce_sum_kernel.cc @@ -30,13 +30,17 @@ void SumRawKernel(const Context& dev_ctx, DataType out_dtype, DenseTensor* out) { reduce_all = recompute_reduce_all(x, dims, reduce_all); - int r = XPUReduce(dev_ctx, - x, - dims.GetData(), - keep_dim, - reduce_all, - out, - xpu::reduce_sum); + using XPUType = typename XPUTypeTrait::Type; + + auto f = [](xpu::Context* ctx, + const XPUType* x, + XPUType* y, + const std::vector& xdims, + const std::vector& reduce_dims) { + return xpu::reduce_sum(ctx, x, y, xdims, reduce_dims); + }; + int r = XPUReduce( + dev_ctx, x, dims.GetData(), keep_dim, reduce_all, out, f); PADDLE_ENFORCE_XDNN_SUCCESS(r, "reduce_sum"); } -- GitLab