From ab5831733d86871b38b472d3f144a1e9ee7aff36 Mon Sep 17 00:00:00 2001 From: zhangkaihuo Date: Tue, 16 Aug 2022 13:26:16 +0800 Subject: [PATCH] Use base visit in cpu kernel (#45062) --- .../phi/kernels/sparse/cpu/coalesce_kernel.cc | 2 +- .../kernels/sparse/cpu/conv_grad_kernel.cc | 2 +- paddle/phi/kernels/sparse/cpu/conv_kernel.cc | 2 +- .../sparse/cpu/elementwise_grad_kernel.cc | 62 ++++++++++--------- .../kernels/sparse/cpu/elementwise_kernel.cc | 39 ++++++------ paddle/phi/kernels/sparse/cpu/mask_kernel.cc | 4 +- .../kernels/sparse/cpu/pool_grad_kernel.cc | 2 +- paddle/phi/kernels/sparse/cpu/pool_kernel.cc | 2 +- .../kernels/sparse/cpu/softmax_grad_kernel.cc | 2 +- .../phi/kernels/sparse/cpu/softmax_kernel.cc | 2 +- .../kernels/sparse/cpu/sparse_utils_kernel.cc | 6 +- 11 files changed, 62 insertions(+), 63 deletions(-) diff --git a/paddle/phi/kernels/sparse/cpu/coalesce_kernel.cc b/paddle/phi/kernels/sparse/cpu/coalesce_kernel.cc index 95d8abd6bc..2906a1f226 100644 --- a/paddle/phi/kernels/sparse/cpu/coalesce_kernel.cc +++ b/paddle/phi/kernels/sparse/cpu/coalesce_kernel.cc @@ -98,7 +98,7 @@ template void CoalesceKernel(const Context& dev_ctx, const SparseCooTensor& x, SparseCooTensor* out) { - PD_VISIT_INTEGRAL_TYPES( + PD_VISIT_BASE_INTEGRAL_TYPES( x.non_zero_indices().dtype(), "CoalesceCPUKernel", ([&] { CoalesceCPUKernel(dev_ctx, x, out); })); diff --git a/paddle/phi/kernels/sparse/cpu/conv_grad_kernel.cc b/paddle/phi/kernels/sparse/cpu/conv_grad_kernel.cc index 44ad2fa588..753f6f97b5 100644 --- a/paddle/phi/kernels/sparse/cpu/conv_grad_kernel.cc +++ b/paddle/phi/kernels/sparse/cpu/conv_grad_kernel.cc @@ -196,7 +196,7 @@ void Conv3dCooGradKernel(const Context& dev_ctx, const std::string& key, SparseCooTensor* x_grad, DenseTensor* kernel_grad) { - PD_VISIT_INTEGRAL_TYPES( + PD_VISIT_BASE_INTEGRAL_TYPES( x.non_zero_indices().dtype(), "Conv3dCooGradCPUKernel", ([&] { Conv3dCooGradCPUKernel(dev_ctx, x, diff --git a/paddle/phi/kernels/sparse/cpu/conv_kernel.cc b/paddle/phi/kernels/sparse/cpu/conv_kernel.cc index f15a636f96..217aca56e6 100644 --- a/paddle/phi/kernels/sparse/cpu/conv_kernel.cc +++ b/paddle/phi/kernels/sparse/cpu/conv_kernel.cc @@ -186,7 +186,7 @@ void Conv3dCooKernel(const Context& dev_ctx, SparseCooTensor* out, DenseTensor* rulebook, DenseTensor* counter) { - PD_VISIT_INTEGRAL_TYPES( + PD_VISIT_BASE_INTEGRAL_TYPES( x.non_zero_indices().dtype(), "Conv3dCooCPUKernel", ([&] { Conv3dCooCPUKernel(dev_ctx, x, diff --git a/paddle/phi/kernels/sparse/cpu/elementwise_grad_kernel.cc b/paddle/phi/kernels/sparse/cpu/elementwise_grad_kernel.cc index 972b4537b9..2abd4957de 100644 --- a/paddle/phi/kernels/sparse/cpu/elementwise_grad_kernel.cc +++ b/paddle/phi/kernels/sparse/cpu/elementwise_grad_kernel.cc @@ -236,7 +236,7 @@ void ElementWiseDivideCsrGradKernel(const Context& dev_ctx, const SparseCsrTensor& dout, SparseCsrTensor* dx, SparseCsrTensor* dy) { - PD_VISIT_INTEGRAL_TYPES( + PD_VISIT_BASE_INTEGRAL_TYPES( x.non_zero_crows().dtype(), "ElementWiseDivideCsrGradCPUKernel", ([&] { ElementWiseDivideCsrGradCPUKernel( dev_ctx, x, y, out, dout, dx, dy); @@ -250,7 +250,7 @@ void ElementWiseDivideCooGradKernel(const Context& dev_ctx, const SparseCooTensor& dout, SparseCooTensor* dx, SparseCooTensor* dy) { - PD_VISIT_INTEGRAL_TYPES( + PD_VISIT_BASE_INTEGRAL_TYPES( x.non_zero_indices().dtype(), "ElementWiseDivideCooGradCPUKernel", ([&] { ElementWiseDivideCooGradCPUKernel( dev_ctx, x, y, out, dout, dx, dy); @@ -262,36 +262,38 @@ void ElementWiseDivideCooGradKernel(const Context& dev_ctx, \ DEFINE_ELEMENTWISE_GRAD_KERNEL_COO(name) -#define DEFINE_ELEMENTWISE_GRAD_KERNEL_CSR(name) \ - template \ - void ElementWise##name##CsrGradKernel(const Context& dev_ctx, \ - const SparseCsrTensor& x, \ - const SparseCsrTensor& y, \ - const SparseCsrTensor& dout, \ - SparseCsrTensor* dx, \ - SparseCsrTensor* dy) { \ - PD_VISIT_INTEGRAL_TYPES(x.non_zero_crows().dtype(), \ - "ElementWise##name##CsrGradCPUKernel", \ - ([&] { \ - ElementWise##name##CsrGradCPUKernel( \ - dev_ctx, x, y, dout, dx, dy); \ - })); \ +#define DEFINE_ELEMENTWISE_GRAD_KERNEL_CSR(name) \ + template \ + void ElementWise##name##CsrGradKernel(const Context& dev_ctx, \ + const SparseCsrTensor& x, \ + const SparseCsrTensor& y, \ + const SparseCsrTensor& dout, \ + SparseCsrTensor* dx, \ + SparseCsrTensor* dy) { \ + PD_VISIT_BASE_INTEGRAL_TYPES( \ + x.non_zero_crows().dtype(), \ + "ElementWise##name##CsrGradCPUKernel", \ + ([&] { \ + ElementWise##name##CsrGradCPUKernel( \ + dev_ctx, x, y, dout, dx, dy); \ + })); \ } -#define DEFINE_ELEMENTWISE_GRAD_KERNEL_COO(name) \ - template \ - void ElementWise##name##CooGradKernel(const Context& dev_ctx, \ - const SparseCooTensor& x, \ - const SparseCooTensor& y, \ - const SparseCooTensor& dout, \ - SparseCooTensor* dx, \ - SparseCooTensor* dy) { \ - PD_VISIT_INTEGRAL_TYPES(x.non_zero_indices().dtype(), \ - "ElementWise##name##CooGradCPUKernel", \ - ([&] { \ - ElementWise##name##CooGradCPUKernel( \ - dev_ctx, x, y, dout, dx, dy); \ - })); \ +#define DEFINE_ELEMENTWISE_GRAD_KERNEL_COO(name) \ + template \ + void ElementWise##name##CooGradKernel(const Context& dev_ctx, \ + const SparseCooTensor& x, \ + const SparseCooTensor& y, \ + const SparseCooTensor& dout, \ + SparseCooTensor* dx, \ + SparseCooTensor* dy) { \ + PD_VISIT_BASE_INTEGRAL_TYPES( \ + x.non_zero_indices().dtype(), \ + "ElementWise##name##CooGradCPUKernel", \ + ([&] { \ + ElementWise##name##CooGradCPUKernel( \ + dev_ctx, x, y, dout, dx, dy); \ + })); \ } DEFINE_ELEMENTWISE_GRAD_KERNEL(Add) diff --git a/paddle/phi/kernels/sparse/cpu/elementwise_kernel.cc b/paddle/phi/kernels/sparse/cpu/elementwise_kernel.cc index fc8592cbc9..3f7b26ee92 100644 --- a/paddle/phi/kernels/sparse/cpu/elementwise_kernel.cc +++ b/paddle/phi/kernels/sparse/cpu/elementwise_kernel.cc @@ -57,11 +57,12 @@ void Merge(const IntT el_len, const IntT len_b_max, IntT* c_index, T* c_values, - IntT& nnz, + IntT* out_nnz, const Functor& functor_org, const bool is_divide) { IntT a = 0; IntT b = 0; + IntT& nnz = (*out_nnz); nnz = 0; const IntT* b_index = nullptr; std::vector b_full_index; @@ -94,9 +95,7 @@ void Merge(const IntT el_len, } ++a; ++b; - } - // coordinate x[a] < coordinate y[b] - else if (a_index[a] < b_index[b]) { + } else if (a_index[a] < b_index[b]) { // coordinate x[a] < coordinate y[b] if (!functor(a_values + a * el_len, zero.data(), c_values + nnz * el_len, @@ -105,9 +104,7 @@ void Merge(const IntT el_len, ++nnz; } ++a; - } - // coordinate x[a] > coordinate y[b] - else if (a_index[a] > b_index[b]) { + } else if (a_index[a] > b_index[b]) { // coordinate x[a] > coordinate y[b] if (!functor(zero.data(), b_values[b_index[b]], c_values + nnz * el_len, @@ -215,7 +212,7 @@ void ElementWiseCooKernelImpl(const Context& dev_ctx, max_len, out_indexs.data(), out_values_vec.data(), - nnz, + &nnz, functor, is_divide); @@ -292,7 +289,7 @@ void ElementWiseCooKernelImpl(const Context& dev_ctx, const SparseCsrTensor& x, \ const SparseCsrTensor& y, \ SparseCsrTensor* out) { \ - PD_VISIT_INTEGRAL_TYPES( \ + PD_VISIT_BASE_INTEGRAL_TYPES( \ x.non_zero_crows().dtype(), "ElementWise##name##CsrCPUKernel", ([&] { \ ElementWise##name##CsrCPUKernel(dev_ctx, x, y, out); \ })); \ @@ -309,18 +306,18 @@ void ElementWiseCooKernelImpl(const Context& dev_ctx, dev_ctx, x, y, out, functor); \ } -#define DEFINE_COO_ELEMENTWISE_KERNEL(name) \ - template \ - void ElementWise##name##CooKernel(const Context& dev_ctx, \ - const SparseCooTensor& x, \ - const SparseCooTensor& y, \ - SparseCooTensor* out) { \ - PD_VISIT_INTEGRAL_TYPES(x.non_zero_indices().dtype(), \ - "ElementWise##name##CooCPUKernel", \ - ([&] { \ - ElementWise##name##CooCPUKernel( \ - dev_ctx, x, y, out); \ - })); \ +#define DEFINE_COO_ELEMENTWISE_KERNEL(name) \ + template \ + void ElementWise##name##CooKernel(const Context& dev_ctx, \ + const SparseCooTensor& x, \ + const SparseCooTensor& y, \ + SparseCooTensor* out) { \ + PD_VISIT_BASE_INTEGRAL_TYPES(x.non_zero_indices().dtype(), \ + "ElementWise##name##CooCPUKernel", \ + ([&] { \ + ElementWise##name##CooCPUKernel( \ + dev_ctx, x, y, out); \ + })); \ } DEFINE_CSR_ELEMENTWISE_CPU_KERNEL(Add) diff --git a/paddle/phi/kernels/sparse/cpu/mask_kernel.cc b/paddle/phi/kernels/sparse/cpu/mask_kernel.cc index 92c0151012..8dbf7a84c1 100644 --- a/paddle/phi/kernels/sparse/cpu/mask_kernel.cc +++ b/paddle/phi/kernels/sparse/cpu/mask_kernel.cc @@ -79,7 +79,7 @@ void SparseMaskKernel(const Context& dev_ctx, const DenseTensor& x, const SparseCooTensor& mask, SparseCooTensor* out) { - PD_VISIT_INTEGRAL_TYPES( + PD_VISIT_BASE_INTEGRAL_TYPES( mask.non_zero_indices().dtype(), "SparseMaskCPUKernel", ([&] { SparseMaskCPUKernel(dev_ctx, x, mask, out); })); @@ -146,7 +146,7 @@ void SparseMaskHelperKernel(const Context& dev_ctx, const SparseCooTensor& x, const DenseTensor& mask_indices, DenseTensor* out) { - PD_VISIT_INTEGRAL_TYPES( + PD_VISIT_BASE_INTEGRAL_TYPES( x.non_zero_indices().dtype(), "SparseMaskHelperCPUKernel", ([&] { SparseMaskHelperCPUKernel(dev_ctx, x, mask_indices, out); })); diff --git a/paddle/phi/kernels/sparse/cpu/pool_grad_kernel.cc b/paddle/phi/kernels/sparse/cpu/pool_grad_kernel.cc index 077ac07e8d..0d68e712cd 100644 --- a/paddle/phi/kernels/sparse/cpu/pool_grad_kernel.cc +++ b/paddle/phi/kernels/sparse/cpu/pool_grad_kernel.cc @@ -83,7 +83,7 @@ void MaxPoolCooGradKernel(const Context& dev_ctx, const SparseCooTensor& out_grad, const std::vector& kernel_sizes, SparseCooTensor* x_grad) { - PD_VISIT_INTEGRAL_TYPES( + PD_VISIT_BASE_INTEGRAL_TYPES( x.non_zero_indices().dtype(), "MaxPoolCooGradCPUKernel", ([&] { MaxPoolCooGradCPUKernel( dev_ctx, x, rulebook, counter, out, out_grad, kernel_sizes, x_grad); diff --git a/paddle/phi/kernels/sparse/cpu/pool_kernel.cc b/paddle/phi/kernels/sparse/cpu/pool_kernel.cc index f01017bba5..8479036789 100644 --- a/paddle/phi/kernels/sparse/cpu/pool_kernel.cc +++ b/paddle/phi/kernels/sparse/cpu/pool_kernel.cc @@ -109,7 +109,7 @@ void MaxPoolCooKernel(const Context& dev_ctx, SparseCooTensor* out, DenseTensor* rulebook, DenseTensor* counter) { - PD_VISIT_INTEGRAL_TYPES( + PD_VISIT_BASE_INTEGRAL_TYPES( x.non_zero_indices().dtype(), "MaxPoolCooCPUKernel", ([&] { MaxPoolCooCPUKernel(dev_ctx, x, diff --git a/paddle/phi/kernels/sparse/cpu/softmax_grad_kernel.cc b/paddle/phi/kernels/sparse/cpu/softmax_grad_kernel.cc index 18d4f4a9c2..e56fe86970 100644 --- a/paddle/phi/kernels/sparse/cpu/softmax_grad_kernel.cc +++ b/paddle/phi/kernels/sparse/cpu/softmax_grad_kernel.cc @@ -62,7 +62,7 @@ void SoftmaxCsrGradKernel(const Context& dev_ctx, T* dx_data = dx_values->data(); // dx = (dout - sum(dout * out)) * out - PD_VISIT_INTEGRAL_TYPES( + PD_VISIT_BASE_INTEGRAL_TYPES( out.non_zero_crows().dtype(), "SoftmaxCsrGradKernel", ([&] { const data_t* out_crows_data = out_crows.data(); for (int i = 0; i < batch_size; ++i) { diff --git a/paddle/phi/kernels/sparse/cpu/softmax_kernel.cc b/paddle/phi/kernels/sparse/cpu/softmax_kernel.cc index 5f7342b52a..96b6470e0f 100644 --- a/paddle/phi/kernels/sparse/cpu/softmax_kernel.cc +++ b/paddle/phi/kernels/sparse/cpu/softmax_kernel.cc @@ -60,7 +60,7 @@ void SoftmaxCsrKernel(const Context& dev_ctx, T* out_data = out_values->data(); // out = exp(x-x_max) / sum( exp(x-x_max )) - PD_VISIT_INTEGRAL_TYPES( + PD_VISIT_BASE_INTEGRAL_TYPES( x.non_zero_crows().dtype(), "CsrSoftmaxKernel", ([&] { const data_t* x_crows_data = x_crows.data(); for (int i = 0; i < batch_size; ++i) { diff --git a/paddle/phi/kernels/sparse/cpu/sparse_utils_kernel.cc b/paddle/phi/kernels/sparse/cpu/sparse_utils_kernel.cc index 8bf0104ef0..85227b7cf6 100644 --- a/paddle/phi/kernels/sparse/cpu/sparse_utils_kernel.cc +++ b/paddle/phi/kernels/sparse/cpu/sparse_utils_kernel.cc @@ -160,7 +160,7 @@ template void SparseCsrToCooKernel(const Context& dev_ctx, const SparseCsrTensor& x, SparseCooTensor* out) { - PD_VISIT_INTEGRAL_TYPES( + PD_VISIT_BASE_INTEGRAL_TYPES( x.non_zero_crows().dtype(), "SparseCsrToCooCPUKernel", ([&] { SparseCsrToCooCPUKernel(dev_ctx, x, out); })); @@ -250,7 +250,7 @@ template void SparseCooToCsrKernel(const Context& dev_ctx, const SparseCooTensor& x, SparseCsrTensor* out) { - PD_VISIT_INTEGRAL_TYPES( + PD_VISIT_BASE_INTEGRAL_TYPES( x.non_zero_indices().dtype(), "SparseCooToCsrCPUKernel", ([&] { SparseCooToCsrCPUKernel(dev_ctx, x, out); })); @@ -304,7 +304,7 @@ template void SparseCooToDenseKernel(const Context& dev_ctx, const SparseCooTensor& x, DenseTensor* out) { - PD_VISIT_INTEGRAL_TYPES( + PD_VISIT_BASE_INTEGRAL_TYPES( x.non_zero_indices().dtype(), "SparseCooToDenseCPUKernel", ([&] { SparseCooToDenseCPUKernel(dev_ctx, x, out); })); -- GitLab