diff --git a/cmake/external/xpu.cmake b/cmake/external/xpu.cmake index daaf06668920a1436d91249f0d8e20071754058b..5fbfe57cbf29616bf0b79c875f74d0b8f442a2e7 100644 --- a/cmake/external/xpu.cmake +++ b/cmake/external/xpu.cmake @@ -10,7 +10,7 @@ set(XPU_RT_LIB_NAME "libxpurt.so") if(NOT DEFINED XPU_BASE_URL) set(XPU_BASE_URL_WITHOUT_DATE "https://baidu-kunlun-product.su.bcebos.com/KL-SDK/klsdk-dev") - set(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20221215") + set(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20221227") else() set(XPU_BASE_URL "${XPU_BASE_URL}") endif() diff --git a/paddle/phi/kernels/xpu/compare_kernel.cc b/paddle/phi/kernels/xpu/compare_kernel.cc index 7e46b9da647867bde953680122d34a42709674da..a433329c924d0c6c1ddd35bf5190777eb6a73eb2 100644 --- a/paddle/phi/kernels/xpu/compare_kernel.cc +++ b/paddle/phi/kernels/xpu/compare_kernel.cc @@ -52,22 +52,30 @@ void XPUCompareKernelImpl(const Context& dev_ctx, PADDLE_ENFORCE_XDNN_SUCCESS(ret, "compare op"); } -#define DEFINE_XPU_COMPARE_KERNEL(name, functor) \ - template \ - void name##RawKernel(const Context& dev_ctx, \ - const DenseTensor& x, \ - const DenseTensor& y, \ - int axis, \ - DenseTensor* out) { \ - using XPUType = typename XPUTypeTrait::Type; \ - XPUCompareKernelImpl(dev_ctx, x, y, out, functor); \ - } \ - template \ - void name##Kernel(const Context& dev_ctx, \ - const DenseTensor& x, \ - const DenseTensor& y, \ - DenseTensor* out) { \ - name##RawKernel(dev_ctx, x, y, -1, out); \ +#define DEFINE_XPU_COMPARE_KERNEL(name, functor) \ + template \ + void name##RawKernel(const Context& dev_ctx, \ + const DenseTensor& x, \ + const DenseTensor& y, \ + int axis, \ + DenseTensor* out) { \ + using XPUType = typename XPUTypeTrait::Type; \ + auto f = [](xpu::Context* ctx, \ + const XPUType* x, \ + const XPUType* y, \ + bool* z, \ + const std::vector& xshape, \ + const std::vector& yshape) { \ + return functor(ctx, x, y, z, xshape, yshape); \ + }; \ + XPUCompareKernelImpl(dev_ctx, x, y, out, f); \ + } \ + template \ + void name##Kernel(const Context& dev_ctx, \ + const DenseTensor& x, \ + const DenseTensor& y, \ + DenseTensor* out) { \ + name##RawKernel(dev_ctx, x, y, -1, out); \ } DEFINE_XPU_COMPARE_KERNEL(Equal, xpu::broadcast_equal) diff --git a/paddle/phi/kernels/xpu/elementwise_add_kernel.cc b/paddle/phi/kernels/xpu/elementwise_add_kernel.cc index 1e838acaa8a88c4c53dc9882f1937acad603c6b3..0e19c59d26c91bb69daa08842751f3907f349dbb 100644 --- a/paddle/phi/kernels/xpu/elementwise_add_kernel.cc +++ b/paddle/phi/kernels/xpu/elementwise_add_kernel.cc @@ -54,8 +54,17 @@ void AddRawKernel(const Context& dev_ctx, int axis, DenseTensor* out) { using XPUType = typename XPUTypeTrait::Type; - XPUElementwise( - dev_ctx, x, y, axis, out, xpu::broadcast_add); + + auto f = [](xpu::Context* ctx, + const XPUType* x, + const XPUType* y, + XPUType* z, + const std::vector& xshape, + const std::vector& yshape) { + return xpu::broadcast_add(ctx, x, y, z, xshape, yshape); + }; + + XPUElementwise(dev_ctx, x, y, axis, out, f); } } // namespace phi diff --git a/paddle/phi/kernels/xpu/elementwise_divide_grad_kernel.cc b/paddle/phi/kernels/xpu/elementwise_divide_grad_kernel.cc index 9251286e495a89b4426d04a064c5d5cf118a6681..3b20874b5f312e892a969937a9a4bfba2c1b5188 100644 --- a/paddle/phi/kernels/xpu/elementwise_divide_grad_kernel.cc +++ b/paddle/phi/kernels/xpu/elementwise_divide_grad_kernel.cc @@ -35,15 +35,21 @@ void DivideGradKernel(const Context& dev_ctx, DenseTensor* dy) { using XPUType = typename XPUTypeTrait::Type; funcs::ElementwiseGradPreProcess(dout, dx); - XPUElementwiseGrad(dev_ctx, - x, - y, - dout, - axis, - dx, - dy, - xpu::broadcast_div_grad, - true); + + auto f = [](xpu::Context* ctx, + const XPUType* x, + const XPUType* y, + const XPUType* z, + const XPUType* dz, + XPUType* dy, + XPUType* dx, + const std::vector& xshape, + const std::vector& yshape) { + return xpu::broadcast_div_grad( + ctx, x, y, z, dz, dy, dx, xshape, yshape); + }; + + XPUElementwiseGrad(dev_ctx, x, y, dout, axis, dx, dy, f, true); } } // namespace phi diff --git a/paddle/phi/kernels/xpu/elementwise_divide_kernel.cc b/paddle/phi/kernels/xpu/elementwise_divide_kernel.cc index ebefd05a02af4dd5933bf1c176c44ce86a88b6f5..0ec748b817effae91ce16386a7c932188145896c 100644 --- a/paddle/phi/kernels/xpu/elementwise_divide_kernel.cc +++ b/paddle/phi/kernels/xpu/elementwise_divide_kernel.cc @@ -31,8 +31,16 @@ void DivideRawKernel(const Context& dev_ctx, int axis, DenseTensor* out) { using XPUType = typename XPUTypeTrait::Type; - XPUElementwise( - dev_ctx, x, y, axis, out, xpu::broadcast_div); + auto f = [](xpu::Context* ctx, + const XPUType* x, + const XPUType* y, + XPUType* z, + const std::vector& xshape, + const std::vector& yshape) { + return xpu::broadcast_div(ctx, x, y, z, xshape, yshape); + }; + + XPUElementwise(dev_ctx, x, y, axis, out, f); } } // namespace phi diff --git a/paddle/phi/kernels/xpu/elementwise_grad_kernel.cc b/paddle/phi/kernels/xpu/elementwise_grad_kernel.cc index 9b1d2a6957f727686fa213addf5e59a274c8ffbe..47da6b25de92017ae6aa817c4bdc547004447809 100644 --- a/paddle/phi/kernels/xpu/elementwise_grad_kernel.cc +++ b/paddle/phi/kernels/xpu/elementwise_grad_kernel.cc @@ -29,15 +29,21 @@ void MaximumGradKernel(const Context& dev_ctx, DenseTensor* dx, DenseTensor* dy) { using XPUType = typename XPUTypeTrait::Type; - XPUElementwiseGrad(dev_ctx, - x, - y, - dout, - axis, - dx, - dy, - xpu::broadcast_max_grad, - true); + + auto f = [](xpu::Context* ctx, + const XPUType* x, + const XPUType* y, + const XPUType* z, + const XPUType* dz, + XPUType* dy, + XPUType* dx, + const std::vector& xshape, + const std::vector& yshape) { + return xpu::broadcast_max_grad( + ctx, x, y, z, dz, dy, dx, xshape, yshape); + }; + + XPUElementwiseGrad(dev_ctx, x, y, dout, axis, dx, dy, f, true); } template @@ -49,15 +55,21 @@ void MinimumGradKernel(const Context& dev_ctx, DenseTensor* dx, DenseTensor* dy) { using XPUType = typename XPUTypeTrait::Type; - XPUElementwiseGrad(dev_ctx, - x, - y, - dout, - axis, - dx, - dy, - xpu::broadcast_min_grad, - true); + + auto f = [](xpu::Context* ctx, + const XPUType* x, + const XPUType* y, + const XPUType* z, + const XPUType* dz, + XPUType* dy, + XPUType* dx, + const std::vector& xshape, + const std::vector& yshape) { + return xpu::broadcast_min_grad( + ctx, x, y, z, dz, dy, dx, xshape, yshape); + }; + + XPUElementwiseGrad(dev_ctx, x, y, dout, axis, dx, dy, f, true); } } // namespace phi diff --git a/paddle/phi/kernels/xpu/elementwise_kernel.cc b/paddle/phi/kernels/xpu/elementwise_kernel.cc index 87edfb22e502d2bdc8ea8158d445bbbc79d32935..f70f9e743a41147d9ff91cd9ea351aeadeee75af 100644 --- a/paddle/phi/kernels/xpu/elementwise_kernel.cc +++ b/paddle/phi/kernels/xpu/elementwise_kernel.cc @@ -27,8 +27,16 @@ void FloorDivideRawKernel(const Context& dev_ctx, int axis, DenseTensor* out) { using XPUType = typename XPUTypeTrait::Type; - XPUElementwise( - dev_ctx, x, y, axis, out, xpu::broadcast_floordiv); + auto f = [](xpu::Context* ctx, + const XPUType* x, + const XPUType* y, + XPUType* z, + const std::vector& xshape, + const std::vector& yshape) { + return xpu::broadcast_floordiv(ctx, x, y, z, xshape, yshape); + }; + + XPUElementwise(dev_ctx, x, y, axis, out, f); } template @@ -38,8 +46,16 @@ void MaximumRawKernel(const Context& dev_ctx, int axis, DenseTensor* out) { using XPUType = typename XPUTypeTrait::Type; - XPUElementwise( - dev_ctx, x, y, axis, out, xpu::broadcast_max); + auto f = [](xpu::Context* ctx, + const XPUType* x, + const XPUType* y, + XPUType* z, + const std::vector& xshape, + const std::vector& yshape) { + return xpu::broadcast_max(ctx, x, y, z, xshape, yshape); + }; + + XPUElementwise(dev_ctx, x, y, axis, out, f); } template @@ -49,8 +65,16 @@ void MinimumRawKernel(const Context& dev_ctx, int axis, DenseTensor* out) { using XPUType = typename XPUTypeTrait::Type; - XPUElementwise( - dev_ctx, x, y, axis, out, xpu::broadcast_min); + auto f = [](xpu::Context* ctx, + const XPUType* x, + const XPUType* y, + XPUType* z, + const std::vector& xshape, + const std::vector& yshape) { + return xpu::broadcast_min(ctx, x, y, z, xshape, yshape); + }; + + XPUElementwise(dev_ctx, x, y, axis, out, f); } template @@ -60,8 +84,16 @@ void RemainderRawKernel(const Context& dev_ctx, int axis, DenseTensor* out) { using XPUType = typename XPUTypeTrait::Type; - XPUElementwise( - dev_ctx, x, y, axis, out, xpu::broadcast_mod); + auto f = [](xpu::Context* ctx, + const XPUType* x, + const XPUType* y, + XPUType* z, + const std::vector& xshape, + const std::vector& yshape) { + return xpu::broadcast_mod(ctx, x, y, z, xshape, yshape); + }; + + XPUElementwise(dev_ctx, x, y, axis, out, f); } template @@ -71,8 +103,16 @@ void ElementwisePowRawKernel(const Context& dev_ctx, int axis, DenseTensor* out) { using XPUType = typename XPUTypeTrait::Type; - XPUElementwise( - dev_ctx, x, y, axis, out, xpu::broadcast_pow); + auto f = [](xpu::Context* ctx, + const XPUType* x, + const XPUType* y, + XPUType* z, + const std::vector& xshape, + const std::vector& yshape) { + return xpu::broadcast_pow(ctx, x, y, z, xshape, yshape); + }; + + XPUElementwise(dev_ctx, x, y, axis, out, f); } } // namespace phi diff --git a/paddle/phi/kernels/xpu/elementwise_multiply_grad_kernel.cc b/paddle/phi/kernels/xpu/elementwise_multiply_grad_kernel.cc index b111630506f8388c370e46a9cc11da964a11a4b4..ebe190827d69d06bd05c1be00d339e5d6a8d2fca 100644 --- a/paddle/phi/kernels/xpu/elementwise_multiply_grad_kernel.cc +++ b/paddle/phi/kernels/xpu/elementwise_multiply_grad_kernel.cc @@ -34,15 +34,20 @@ void MultiplyGradKernel(const Context& dev_ctx, DenseTensor* dy) { using XPUType = typename XPUTypeTrait::Type; funcs::ElementwiseGradPreProcess(dout, dx); - XPUElementwiseGrad(dev_ctx, - x, - y, - dout, - axis, - dx, - dy, - xpu::broadcast_mul_grad, - true); + auto f = [](xpu::Context* ctx, + const XPUType* x, + const XPUType* y, + const XPUType* z, + const XPUType* dz, + XPUType* dy, + XPUType* dx, + const std::vector& xshape, + const std::vector& yshape) { + return xpu::broadcast_mul_grad( + ctx, x, y, z, dz, dy, dx, xshape, yshape); + }; + + XPUElementwiseGrad(dev_ctx, x, y, dout, axis, dx, dy, f, true); } } // namespace phi diff --git a/paddle/phi/kernels/xpu/elementwise_multiply_kernel.cc b/paddle/phi/kernels/xpu/elementwise_multiply_kernel.cc index e3b62d539486f8ed6bfdbc2822304ea1615f615e..aa7f525c9b5f15b0f599a24bb2eb2f4b9e383391 100644 --- a/paddle/phi/kernels/xpu/elementwise_multiply_kernel.cc +++ b/paddle/phi/kernels/xpu/elementwise_multiply_kernel.cc @@ -31,8 +31,16 @@ void MultiplyRawKernel(const Context& dev_ctx, int axis, DenseTensor* out) { using XPUType = typename XPUTypeTrait::Type; - XPUElementwise( - dev_ctx, x, y, axis, out, xpu::broadcast_mul); + auto f = [](xpu::Context* ctx, + const XPUType* x, + const XPUType* y, + XPUType* z, + const std::vector& xshape, + const std::vector& yshape) { + return xpu::broadcast_mul(ctx, x, y, z, xshape, yshape); + }; + + XPUElementwise(dev_ctx, x, y, axis, out, f); } } // namespace phi diff --git a/paddle/phi/kernels/xpu/elementwise_subtract_grad_kernel.cc b/paddle/phi/kernels/xpu/elementwise_subtract_grad_kernel.cc index 0fb0ced46b8439753f9a9115412a1a7973159387..d22b369619d40da7219e051bb495993139377b08 100644 --- a/paddle/phi/kernels/xpu/elementwise_subtract_grad_kernel.cc +++ b/paddle/phi/kernels/xpu/elementwise_subtract_grad_kernel.cc @@ -28,15 +28,22 @@ void SubtractGradKernel(const Context& dev_ctx, DenseTensor* dx, DenseTensor* dy) { using XPUType = typename XPUTypeTrait::Type; - phi::XPUElementwiseGrad(dev_ctx, - x, - y, - dout, - axis, - dx, - dy, - xpu::broadcast_sub_grad, - false); + + auto f = [](xpu::Context* ctx, + const XPUType* x, + const XPUType* y, + const XPUType* z, + const XPUType* dz, + XPUType* dy, + XPUType* dx, + const std::vector& xshape, + const std::vector& yshape) { + return xpu::broadcast_sub_grad( + ctx, x, y, z, dz, dy, dx, xshape, yshape); + }; + + phi::XPUElementwiseGrad( + dev_ctx, x, y, dout, axis, dx, dy, f, false); } } // namespace phi diff --git a/paddle/phi/kernels/xpu/elementwise_subtract_kernel.cc b/paddle/phi/kernels/xpu/elementwise_subtract_kernel.cc index 4e18264d71343119d24b20a88a8ffa0b72d9c1c7..866d9cf6206eda6968e3813cd5d88a9d229102d7 100644 --- a/paddle/phi/kernels/xpu/elementwise_subtract_kernel.cc +++ b/paddle/phi/kernels/xpu/elementwise_subtract_kernel.cc @@ -26,8 +26,16 @@ void SubtractRawKernel(const Context& dev_ctx, int axis, DenseTensor* out) { using XPUType = typename XPUTypeTrait::Type; - phi::XPUElementwise( - dev_ctx, x, y, axis, out, xpu::broadcast_sub); + auto f = [](xpu::Context* ctx, + const XPUType* x, + const XPUType* y, + XPUType* z, + const std::vector& xshape, + const std::vector& yshape) { + return xpu::broadcast_sub(ctx, x, y, z, xshape, yshape); + }; + + phi::XPUElementwise(dev_ctx, x, y, axis, out, f); } } // namespace phi diff --git a/paddle/phi/kernels/xpu/prod_kernel.cc b/paddle/phi/kernels/xpu/prod_kernel.cc index cf237afb227975b7f7485262e739be9fbfac8d0c..ebc9abc049c0e3fe976bb5d64a55418dabe65317 100644 --- a/paddle/phi/kernels/xpu/prod_kernel.cc +++ b/paddle/phi/kernels/xpu/prod_kernel.cc @@ -29,13 +29,18 @@ void ProdRawKernel(const Context& dev_ctx, bool reduce_all, DenseTensor* out) { reduce_all = recompute_reduce_all(x, dims, reduce_all); - int r = XPUReduce(dev_ctx, - x, - dims.GetData(), - keep_dim, - reduce_all, - out, - xpu::reduce_prod); + using XPUType = typename XPUTypeTrait::Type; + + auto f = [](xpu::Context* ctx, + const XPUType* x, + XPUType* y, + const std::vector& xdims, + const std::vector& reduce_dims) { + return xpu::reduce_prod(ctx, x, y, xdims, reduce_dims); + }; + + int r = XPUReduce( + dev_ctx, x, dims.GetData(), keep_dim, reduce_all, out, f); PADDLE_ENFORCE_XDNN_SUCCESS(r, "reduce_prod"); } diff --git a/paddle/phi/kernels/xpu/reduce_max_kernel.cc b/paddle/phi/kernels/xpu/reduce_max_kernel.cc index 8db710a24adce8b8055b4bb4fe2476e13b5fcb51..ba4aec72cd38c829fc0cacfa5c41bb7b69484fda 100644 --- a/paddle/phi/kernels/xpu/reduce_max_kernel.cc +++ b/paddle/phi/kernels/xpu/reduce_max_kernel.cc @@ -29,13 +29,17 @@ void MaxRawKernel(const Context& dev_ctx, bool reduce_all, DenseTensor* out) { reduce_all = recompute_reduce_all(x, dims, reduce_all); - int r = XPUReduce(dev_ctx, - x, - dims.GetData(), - keep_dim, - reduce_all, - out, - xpu::reduce_max); + using XPUType = typename XPUTypeTrait::Type; + auto f = [](xpu::Context* ctx, + const XPUType* x, + XPUType* y, + const std::vector& xdims, + const std::vector& reduce_dims) { + return xpu::reduce_max(ctx, x, y, xdims, reduce_dims); + }; + + int r = XPUReduce( + dev_ctx, x, dims.GetData(), keep_dim, reduce_all, out, f); PADDLE_ENFORCE_XDNN_SUCCESS(r, "reduce_max"); } diff --git a/paddle/phi/kernels/xpu/reduce_mean_kernel.cc b/paddle/phi/kernels/xpu/reduce_mean_kernel.cc index d29db35517f3725c879a6e04581744a49b7d79ec..b646cd7ebfbc74d43d62ac097432d66917af7576 100644 --- a/paddle/phi/kernels/xpu/reduce_mean_kernel.cc +++ b/paddle/phi/kernels/xpu/reduce_mean_kernel.cc @@ -29,13 +29,18 @@ void MeanRawKernel(const Context& dev_ctx, bool reduce_all, DenseTensor* out) { reduce_all = recompute_reduce_all(x, dims, reduce_all); - int r = XPUReduce(dev_ctx, - x, - dims.GetData(), - keep_dim, - reduce_all, - out, - xpu::reduce_mean); + using XPUType = typename XPUTypeTrait::Type; + auto f = [](xpu::Context* ctx, + const XPUType* x, + XPUType* y, + const std::vector& xdims, + const std::vector& reduce_dims) { + return xpu::reduce_mean(ctx, x, y, xdims, reduce_dims); + }; + + int r = XPUReduce( + dev_ctx, x, dims.GetData(), keep_dim, reduce_all, out, f); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "reduce_mean"); } diff --git a/paddle/phi/kernels/xpu/reduce_min_kernel.cc b/paddle/phi/kernels/xpu/reduce_min_kernel.cc index e330e30becdcfee17c993ba2ee4051c029e0bad8..e5294e43537b485ad454a157899d22361d468698 100644 --- a/paddle/phi/kernels/xpu/reduce_min_kernel.cc +++ b/paddle/phi/kernels/xpu/reduce_min_kernel.cc @@ -29,13 +29,18 @@ void MinRawKernel(const Context& dev_ctx, bool reduce_all, DenseTensor* out) { reduce_all = recompute_reduce_all(x, dims, reduce_all); - int r = XPUReduce(dev_ctx, - x, - dims.GetData(), - keep_dim, - reduce_all, - out, - xpu::reduce_min); + using XPUType = typename XPUTypeTrait::Type; + + auto f = [](xpu::Context* ctx, + const XPUType* x, + XPUType* y, + const std::vector& xdims, + const std::vector& reduce_dims) { + return xpu::reduce_min(ctx, x, y, xdims, reduce_dims); + }; + + int r = XPUReduce( + dev_ctx, x, dims.GetData(), keep_dim, reduce_all, out, f); PADDLE_ENFORCE_XDNN_SUCCESS(r, "reduce_min"); } diff --git a/paddle/phi/kernels/xpu/reduce_sum_kernel.cc b/paddle/phi/kernels/xpu/reduce_sum_kernel.cc index 952ed101cdcb8eb9616072a5ed2fc22d91cfc31d..ac13dc3de3e0dd9f38dee199fa8d2794bc19e00c 100644 --- a/paddle/phi/kernels/xpu/reduce_sum_kernel.cc +++ b/paddle/phi/kernels/xpu/reduce_sum_kernel.cc @@ -30,13 +30,17 @@ void SumRawKernel(const Context& dev_ctx, DataType out_dtype, DenseTensor* out) { reduce_all = recompute_reduce_all(x, dims, reduce_all); - int r = XPUReduce(dev_ctx, - x, - dims.GetData(), - keep_dim, - reduce_all, - out, - xpu::reduce_sum); + using XPUType = typename XPUTypeTrait::Type; + + auto f = [](xpu::Context* ctx, + const XPUType* x, + XPUType* y, + const std::vector& xdims, + const std::vector& reduce_dims) { + return xpu::reduce_sum(ctx, x, y, xdims, reduce_dims); + }; + int r = XPUReduce( + dev_ctx, x, dims.GetData(), keep_dim, reduce_all, out, f); PADDLE_ENFORCE_XDNN_SUCCESS(r, "reduce_sum"); }