diff --git a/paddle/phi/kernels/sparse/cpu/coalesce_kernel.cc b/paddle/phi/kernels/sparse/cpu/coalesce_kernel.cc index 95d8abd6bcf5c50e829ac2be0c44211a77649907..2906a1f226ce7b96648d42df5e442c39548dbc21 100644 --- a/paddle/phi/kernels/sparse/cpu/coalesce_kernel.cc +++ b/paddle/phi/kernels/sparse/cpu/coalesce_kernel.cc @@ -98,7 +98,7 @@ template void CoalesceKernel(const Context& dev_ctx, const SparseCooTensor& x, SparseCooTensor* out) { - PD_VISIT_INTEGRAL_TYPES( + PD_VISIT_BASE_INTEGRAL_TYPES( x.non_zero_indices().dtype(), "CoalesceCPUKernel", ([&] { CoalesceCPUKernel(dev_ctx, x, out); })); diff --git a/paddle/phi/kernels/sparse/cpu/conv_grad_kernel.cc b/paddle/phi/kernels/sparse/cpu/conv_grad_kernel.cc index 44ad2fa588b55c4410c17633b0c789bfbac73d5d..753f6f97b5bc8ba9d82f9f39312e6dd1fe3c3f70 100644 --- a/paddle/phi/kernels/sparse/cpu/conv_grad_kernel.cc +++ b/paddle/phi/kernels/sparse/cpu/conv_grad_kernel.cc @@ -196,7 +196,7 @@ void Conv3dCooGradKernel(const Context& dev_ctx, const std::string& key, SparseCooTensor* x_grad, DenseTensor* kernel_grad) { - PD_VISIT_INTEGRAL_TYPES( + PD_VISIT_BASE_INTEGRAL_TYPES( x.non_zero_indices().dtype(), "Conv3dCooGradCPUKernel", ([&] { Conv3dCooGradCPUKernel(dev_ctx, x, diff --git a/paddle/phi/kernels/sparse/cpu/conv_kernel.cc b/paddle/phi/kernels/sparse/cpu/conv_kernel.cc index f15a636f96d45e1868d0db5f43f2c62f66edcbaf..217aca56e6dd3aed03f838e6408677b16d23565d 100644 --- a/paddle/phi/kernels/sparse/cpu/conv_kernel.cc +++ b/paddle/phi/kernels/sparse/cpu/conv_kernel.cc @@ -186,7 +186,7 @@ void Conv3dCooKernel(const Context& dev_ctx, SparseCooTensor* out, DenseTensor* rulebook, DenseTensor* counter) { - PD_VISIT_INTEGRAL_TYPES( + PD_VISIT_BASE_INTEGRAL_TYPES( x.non_zero_indices().dtype(), "Conv3dCooCPUKernel", ([&] { Conv3dCooCPUKernel(dev_ctx, x, diff --git a/paddle/phi/kernels/sparse/cpu/elementwise_grad_kernel.cc b/paddle/phi/kernels/sparse/cpu/elementwise_grad_kernel.cc index 972b4537b95547509eb7e486907306c310f24ab7..2abd4957de49bcb2382070ab80f57348dec7cf24 100644 --- a/paddle/phi/kernels/sparse/cpu/elementwise_grad_kernel.cc +++ b/paddle/phi/kernels/sparse/cpu/elementwise_grad_kernel.cc @@ -236,7 +236,7 @@ void ElementWiseDivideCsrGradKernel(const Context& dev_ctx, const SparseCsrTensor& dout, SparseCsrTensor* dx, SparseCsrTensor* dy) { - PD_VISIT_INTEGRAL_TYPES( + PD_VISIT_BASE_INTEGRAL_TYPES( x.non_zero_crows().dtype(), "ElementWiseDivideCsrGradCPUKernel", ([&] { ElementWiseDivideCsrGradCPUKernel( dev_ctx, x, y, out, dout, dx, dy); @@ -250,7 +250,7 @@ void ElementWiseDivideCooGradKernel(const Context& dev_ctx, const SparseCooTensor& dout, SparseCooTensor* dx, SparseCooTensor* dy) { - PD_VISIT_INTEGRAL_TYPES( + PD_VISIT_BASE_INTEGRAL_TYPES( x.non_zero_indices().dtype(), "ElementWiseDivideCooGradCPUKernel", ([&] { ElementWiseDivideCooGradCPUKernel( dev_ctx, x, y, out, dout, dx, dy); @@ -262,36 +262,38 @@ void ElementWiseDivideCooGradKernel(const Context& dev_ctx, \ DEFINE_ELEMENTWISE_GRAD_KERNEL_COO(name) -#define DEFINE_ELEMENTWISE_GRAD_KERNEL_CSR(name) \ - template \ - void ElementWise##name##CsrGradKernel(const Context& dev_ctx, \ - const SparseCsrTensor& x, \ - const SparseCsrTensor& y, \ - const SparseCsrTensor& dout, \ - SparseCsrTensor* dx, \ - SparseCsrTensor* dy) { \ - PD_VISIT_INTEGRAL_TYPES(x.non_zero_crows().dtype(), \ - "ElementWise##name##CsrGradCPUKernel", \ - ([&] { \ - ElementWise##name##CsrGradCPUKernel( \ - dev_ctx, x, y, dout, dx, dy); \ - })); \ +#define DEFINE_ELEMENTWISE_GRAD_KERNEL_CSR(name) \ + template \ + void ElementWise##name##CsrGradKernel(const Context& dev_ctx, \ + const SparseCsrTensor& x, \ + const SparseCsrTensor& y, \ + const SparseCsrTensor& dout, \ + SparseCsrTensor* dx, \ + SparseCsrTensor* dy) { \ + PD_VISIT_BASE_INTEGRAL_TYPES( \ + x.non_zero_crows().dtype(), \ + "ElementWise##name##CsrGradCPUKernel", \ + ([&] { \ + ElementWise##name##CsrGradCPUKernel( \ + dev_ctx, x, y, dout, dx, dy); \ + })); \ } -#define DEFINE_ELEMENTWISE_GRAD_KERNEL_COO(name) \ - template \ - void ElementWise##name##CooGradKernel(const Context& dev_ctx, \ - const SparseCooTensor& x, \ - const SparseCooTensor& y, \ - const SparseCooTensor& dout, \ - SparseCooTensor* dx, \ - SparseCooTensor* dy) { \ - PD_VISIT_INTEGRAL_TYPES(x.non_zero_indices().dtype(), \ - "ElementWise##name##CooGradCPUKernel", \ - ([&] { \ - ElementWise##name##CooGradCPUKernel( \ - dev_ctx, x, y, dout, dx, dy); \ - })); \ +#define DEFINE_ELEMENTWISE_GRAD_KERNEL_COO(name) \ + template \ + void ElementWise##name##CooGradKernel(const Context& dev_ctx, \ + const SparseCooTensor& x, \ + const SparseCooTensor& y, \ + const SparseCooTensor& dout, \ + SparseCooTensor* dx, \ + SparseCooTensor* dy) { \ + PD_VISIT_BASE_INTEGRAL_TYPES( \ + x.non_zero_indices().dtype(), \ + "ElementWise##name##CooGradCPUKernel", \ + ([&] { \ + ElementWise##name##CooGradCPUKernel( \ + dev_ctx, x, y, dout, dx, dy); \ + })); \ } DEFINE_ELEMENTWISE_GRAD_KERNEL(Add) diff --git a/paddle/phi/kernels/sparse/cpu/elementwise_kernel.cc b/paddle/phi/kernels/sparse/cpu/elementwise_kernel.cc index fc8592cbc9d4de51d64baaf41e833e166d60fd1d..3f7b26ee92e1e0337dabfb2e184e1574e1804cd8 100644 --- a/paddle/phi/kernels/sparse/cpu/elementwise_kernel.cc +++ b/paddle/phi/kernels/sparse/cpu/elementwise_kernel.cc @@ -57,11 +57,12 @@ void Merge(const IntT el_len, const IntT len_b_max, IntT* c_index, T* c_values, - IntT& nnz, + IntT* out_nnz, const Functor& functor_org, const bool is_divide) { IntT a = 0; IntT b = 0; + IntT& nnz = (*out_nnz); nnz = 0; const IntT* b_index = nullptr; std::vector b_full_index; @@ -94,9 +95,7 @@ void Merge(const IntT el_len, } ++a; ++b; - } - // coordinate x[a] < coordinate y[b] - else if (a_index[a] < b_index[b]) { + } else if (a_index[a] < b_index[b]) { // coordinate x[a] < coordinate y[b] if (!functor(a_values + a * el_len, zero.data(), c_values + nnz * el_len, @@ -105,9 +104,7 @@ void Merge(const IntT el_len, ++nnz; } ++a; - } - // coordinate x[a] > coordinate y[b] - else if (a_index[a] > b_index[b]) { + } else if (a_index[a] > b_index[b]) { // coordinate x[a] > coordinate y[b] if (!functor(zero.data(), b_values[b_index[b]], c_values + nnz * el_len, @@ -215,7 +212,7 @@ void ElementWiseCooKernelImpl(const Context& dev_ctx, max_len, out_indexs.data(), out_values_vec.data(), - nnz, + &nnz, functor, is_divide); @@ -292,7 +289,7 @@ void ElementWiseCooKernelImpl(const Context& dev_ctx, const SparseCsrTensor& x, \ const SparseCsrTensor& y, \ SparseCsrTensor* out) { \ - PD_VISIT_INTEGRAL_TYPES( \ + PD_VISIT_BASE_INTEGRAL_TYPES( \ x.non_zero_crows().dtype(), "ElementWise##name##CsrCPUKernel", ([&] { \ ElementWise##name##CsrCPUKernel(dev_ctx, x, y, out); \ })); \ @@ -309,18 +306,18 @@ void ElementWiseCooKernelImpl(const Context& dev_ctx, dev_ctx, x, y, out, functor); \ } -#define DEFINE_COO_ELEMENTWISE_KERNEL(name) \ - template \ - void ElementWise##name##CooKernel(const Context& dev_ctx, \ - const SparseCooTensor& x, \ - const SparseCooTensor& y, \ - SparseCooTensor* out) { \ - PD_VISIT_INTEGRAL_TYPES(x.non_zero_indices().dtype(), \ - "ElementWise##name##CooCPUKernel", \ - ([&] { \ - ElementWise##name##CooCPUKernel( \ - dev_ctx, x, y, out); \ - })); \ +#define DEFINE_COO_ELEMENTWISE_KERNEL(name) \ + template \ + void ElementWise##name##CooKernel(const Context& dev_ctx, \ + const SparseCooTensor& x, \ + const SparseCooTensor& y, \ + SparseCooTensor* out) { \ + PD_VISIT_BASE_INTEGRAL_TYPES(x.non_zero_indices().dtype(), \ + "ElementWise##name##CooCPUKernel", \ + ([&] { \ + ElementWise##name##CooCPUKernel( \ + dev_ctx, x, y, out); \ + })); \ } DEFINE_CSR_ELEMENTWISE_CPU_KERNEL(Add) diff --git a/paddle/phi/kernels/sparse/cpu/mask_kernel.cc b/paddle/phi/kernels/sparse/cpu/mask_kernel.cc index 92c015101264c0fc259434a06ea8a59355ac381f..8dbf7a84c19388e0874748c7172fab7726450d30 100644 --- a/paddle/phi/kernels/sparse/cpu/mask_kernel.cc +++ b/paddle/phi/kernels/sparse/cpu/mask_kernel.cc @@ -79,7 +79,7 @@ void SparseMaskKernel(const Context& dev_ctx, const DenseTensor& x, const SparseCooTensor& mask, SparseCooTensor* out) { - PD_VISIT_INTEGRAL_TYPES( + PD_VISIT_BASE_INTEGRAL_TYPES( mask.non_zero_indices().dtype(), "SparseMaskCPUKernel", ([&] { SparseMaskCPUKernel(dev_ctx, x, mask, out); })); @@ -146,7 +146,7 @@ void SparseMaskHelperKernel(const Context& dev_ctx, const SparseCooTensor& x, const DenseTensor& mask_indices, DenseTensor* out) { - PD_VISIT_INTEGRAL_TYPES( + PD_VISIT_BASE_INTEGRAL_TYPES( x.non_zero_indices().dtype(), "SparseMaskHelperCPUKernel", ([&] { SparseMaskHelperCPUKernel(dev_ctx, x, mask_indices, out); })); diff --git a/paddle/phi/kernels/sparse/cpu/pool_grad_kernel.cc b/paddle/phi/kernels/sparse/cpu/pool_grad_kernel.cc index 077ac07e8d38edde198cc1c8a88b5967f013454d..0d68e712cdd540e001790b3c02de6f9c4bf56809 100644 --- a/paddle/phi/kernels/sparse/cpu/pool_grad_kernel.cc +++ b/paddle/phi/kernels/sparse/cpu/pool_grad_kernel.cc @@ -83,7 +83,7 @@ void MaxPoolCooGradKernel(const Context& dev_ctx, const SparseCooTensor& out_grad, const std::vector& kernel_sizes, SparseCooTensor* x_grad) { - PD_VISIT_INTEGRAL_TYPES( + PD_VISIT_BASE_INTEGRAL_TYPES( x.non_zero_indices().dtype(), "MaxPoolCooGradCPUKernel", ([&] { MaxPoolCooGradCPUKernel( dev_ctx, x, rulebook, counter, out, out_grad, kernel_sizes, x_grad); diff --git a/paddle/phi/kernels/sparse/cpu/pool_kernel.cc b/paddle/phi/kernels/sparse/cpu/pool_kernel.cc index f01017bba56f5d01dee00284f927c92ef6338de0..8479036789766c92efddef6ad2136a915539a2a7 100644 --- a/paddle/phi/kernels/sparse/cpu/pool_kernel.cc +++ b/paddle/phi/kernels/sparse/cpu/pool_kernel.cc @@ -109,7 +109,7 @@ void MaxPoolCooKernel(const Context& dev_ctx, SparseCooTensor* out, DenseTensor* rulebook, DenseTensor* counter) { - PD_VISIT_INTEGRAL_TYPES( + PD_VISIT_BASE_INTEGRAL_TYPES( x.non_zero_indices().dtype(), "MaxPoolCooCPUKernel", ([&] { MaxPoolCooCPUKernel(dev_ctx, x, diff --git a/paddle/phi/kernels/sparse/cpu/softmax_grad_kernel.cc b/paddle/phi/kernels/sparse/cpu/softmax_grad_kernel.cc index 18d4f4a9c246b8ad37f48decbf30ee5ffdd958ae..e56fe869705aae8cf1caa516aae5dd000a7c6f9c 100644 --- a/paddle/phi/kernels/sparse/cpu/softmax_grad_kernel.cc +++ b/paddle/phi/kernels/sparse/cpu/softmax_grad_kernel.cc @@ -62,7 +62,7 @@ void SoftmaxCsrGradKernel(const Context& dev_ctx, T* dx_data = dx_values->data(); // dx = (dout - sum(dout * out)) * out - PD_VISIT_INTEGRAL_TYPES( + PD_VISIT_BASE_INTEGRAL_TYPES( out.non_zero_crows().dtype(), "SoftmaxCsrGradKernel", ([&] { const data_t* out_crows_data = out_crows.data(); for (int i = 0; i < batch_size; ++i) { diff --git a/paddle/phi/kernels/sparse/cpu/softmax_kernel.cc b/paddle/phi/kernels/sparse/cpu/softmax_kernel.cc index 5f7342b52addacb7223cd2b410f23a17060dfd94..96b6470e0f39e966fbe5b8c3a2dc582e08fe2548 100644 --- a/paddle/phi/kernels/sparse/cpu/softmax_kernel.cc +++ b/paddle/phi/kernels/sparse/cpu/softmax_kernel.cc @@ -60,7 +60,7 @@ void SoftmaxCsrKernel(const Context& dev_ctx, T* out_data = out_values->data(); // out = exp(x-x_max) / sum( exp(x-x_max )) - PD_VISIT_INTEGRAL_TYPES( + PD_VISIT_BASE_INTEGRAL_TYPES( x.non_zero_crows().dtype(), "CsrSoftmaxKernel", ([&] { const data_t* x_crows_data = x_crows.data(); for (int i = 0; i < batch_size; ++i) { diff --git a/paddle/phi/kernels/sparse/cpu/sparse_utils_kernel.cc b/paddle/phi/kernels/sparse/cpu/sparse_utils_kernel.cc index 8bf0104ef0baf755794fa6962e3ca80385380098..85227b7cf64fcb9e36eecf8b1d435c9a0bd530e2 100644 --- a/paddle/phi/kernels/sparse/cpu/sparse_utils_kernel.cc +++ b/paddle/phi/kernels/sparse/cpu/sparse_utils_kernel.cc @@ -160,7 +160,7 @@ template void SparseCsrToCooKernel(const Context& dev_ctx, const SparseCsrTensor& x, SparseCooTensor* out) { - PD_VISIT_INTEGRAL_TYPES( + PD_VISIT_BASE_INTEGRAL_TYPES( x.non_zero_crows().dtype(), "SparseCsrToCooCPUKernel", ([&] { SparseCsrToCooCPUKernel(dev_ctx, x, out); })); @@ -250,7 +250,7 @@ template void SparseCooToCsrKernel(const Context& dev_ctx, const SparseCooTensor& x, SparseCsrTensor* out) { - PD_VISIT_INTEGRAL_TYPES( + PD_VISIT_BASE_INTEGRAL_TYPES( x.non_zero_indices().dtype(), "SparseCooToCsrCPUKernel", ([&] { SparseCooToCsrCPUKernel(dev_ctx, x, out); })); @@ -304,7 +304,7 @@ template void SparseCooToDenseKernel(const Context& dev_ctx, const SparseCooTensor& x, DenseTensor* out) { - PD_VISIT_INTEGRAL_TYPES( + PD_VISIT_BASE_INTEGRAL_TYPES( x.non_zero_indices().dtype(), "SparseCooToDenseCPUKernel", ([&] { SparseCooToDenseCPUKernel(dev_ctx, x, out); }));