add sparse visit (#44847)

f419e341 · zhangkaihuo · GitHub · d4ca7ffb · f419e341 · f419e341
13 changed file
--- a/paddle/phi/core/visit_type.h
+++ b/paddle/phi/core/visit_type.h
@@ -87,6 +87,20 @@ namespace phi {
    }                                                                         \
  }()
+#define PD_VISIT_BASE_INTEGRAL_TYPES(TYPE, NAME, ...)                         \
+  [&] {                                                                       \
+    const auto& __dtype__ = TYPE;                                             \
+    switch (__dtype__) {                                                      \
+      PD_PRIVATE_CASE_TYPE(NAME, ::paddle::DataType::INT32, int, __VA_ARGS__) \
+      PD_PRIVATE_CASE_TYPE(                                                   \
+          NAME, ::paddle::DataType::INT64, int64_t, __VA_ARGS__)              \
+      default:                                                                \
+        PD_THROW("function " #NAME " is not implemented for data type `",     \
+                 __dtype__,                                                   \
+                 "`");                                                        \
+    }                                                                         \
+  }()
 ///////// Complex Dispatch Marco ///////////
 #define PD_VISIT_COMPLEX_TYPES(TYPE, NAME, ...)                           \

--- a/paddle/phi/kernels/funcs/sparse/sparse_blas_impl.cu.h
+++ b/paddle/phi/kernels/funcs/sparse/sparse_blas_impl.cu.h
@@ -174,7 +174,7 @@ class CuSparseSpMatDescriptor {
  explicit CuSparseSpMatDescriptor(const phi::SparseCsrTensor& x,
                                   const phi::GPUContext& dev_ctx)
      : dev_ctx_(dev_ctx) {
-    PD_VISIT_INTEGRAL_TYPES(
+    PD_VISIT_BASE_INTEGRAL_TYPES(
        x.non_zero_crows().dtype(), "Csr CuSparseSpMatDescriptor", ([&] {
          CreateCsrDescriptor<T, data_t>(x, dev_ctx_, &descriptor_);
        }));
@@ -184,7 +184,7 @@ class CuSparseSpMatDescriptor {
  explicit CuSparseSpMatDescriptor(const phi::SparseCooTensor& x,
                                   const phi::GPUContext& dev_ctx)
      : dev_ctx_(dev_ctx) {
-    PD_VISIT_INTEGRAL_TYPES(
+    PD_VISIT_BASE_INTEGRAL_TYPES(
        x.non_zero_indices().dtype(), "Coo CuSparseSpMatDescriptor", ([&] {
          CreateCooDescriptor<T, data_t>(x, dev_ctx_, &descriptor_);
        }));

--- a/paddle/phi/kernels/sparse/gpu/coalesce_kernel.cu
+++ b/paddle/phi/kernels/sparse/gpu/coalesce_kernel.cu
@@ -175,7 +175,7 @@ template <typename T, typename Context>
 void CoalesceKernel(const Context& dev_ctx,
                    const SparseCooTensor& x,
                    SparseCooTensor* out) {
-  PD_VISIT_INTEGRAL_TYPES(
+  PD_VISIT_BASE_INTEGRAL_TYPES(
      x.non_zero_indices().dtype(), "CoalesceGPUKernel", ([&] {
        CoalesceGPUKernel<T, data_t>(dev_ctx, x, out);
      }));

--- a/paddle/phi/kernels/sparse/gpu/conv_grad_kernel.cu
+++ b/paddle/phi/kernels/sparse/gpu/conv_grad_kernel.cu
@@ -233,7 +233,7 @@ void Conv3dCooGradKernel(const Context& dev_ctx,
                         const std::string& key,
                         SparseCooTensor* x_grad,
                         DenseTensor* kernel_grad) {
-  PD_VISIT_INTEGRAL_TYPES(
+  PD_VISIT_BASE_INTEGRAL_TYPES(
      x.non_zero_indices().dtype(), "Conv3dCooGradGPUKernel", ([&] {
        Conv3dCooGradGPUKernel<T, data_t>(dev_ctx,
                                          x,

--- a/paddle/phi/kernels/sparse/gpu/conv_kernel.cu
+++ b/paddle/phi/kernels/sparse/gpu/conv_kernel.cu
@@ -221,7 +221,7 @@ void Conv3dCooKernel(const Context& dev_ctx,
                     SparseCooTensor* out,
                     DenseTensor* rulebook,
                     DenseTensor* counter) {
-  PD_VISIT_INTEGRAL_TYPES(
+  PD_VISIT_BASE_INTEGRAL_TYPES(
      x.non_zero_indices().dtype(), "Conv3dCooGPUKernel", ([&] {
        Conv3dCooGPUKernel<T, data_t>(dev_ctx,
                                      x,

--- a/paddle/phi/kernels/sparse/gpu/mask_kernel.cu
+++ b/paddle/phi/kernels/sparse/gpu/mask_kernel.cu
@@ -111,7 +111,7 @@ void SparseMaskKernel(const Context& dev_ctx,
                      const DenseTensor& x,
                      const SparseCooTensor& mask,
                      SparseCooTensor* out) {
-  PD_VISIT_INTEGRAL_TYPES(
+  PD_VISIT_BASE_INTEGRAL_TYPES(
      mask.non_zero_indices().dtype(), "SparseMaskGPUKernel", ([&] {
        SparseMaskGPUKernel<T, data_t>(dev_ctx, x, mask, out);
      }));
@@ -270,7 +270,7 @@ void SparseMaskHelperKernel(const Context& dev_ctx,
                            const SparseCooTensor& x,
                            const DenseTensor& mask_indices,
                            DenseTensor* out) {
-  PD_VISIT_INTEGRAL_TYPES(
+  PD_VISIT_BASE_INTEGRAL_TYPES(
      x.non_zero_indices().dtype(), "SparseMaskHelperGPUKernel", ([&] {
        SparseMaskHelperGPUKernel<T, data_t>(dev_ctx, x, mask_indices, out);
      }));

--- a/paddle/phi/kernels/sparse/gpu/mv_grad_kernel.cu
+++ b/paddle/phi/kernels/sparse/gpu/mv_grad_kernel.cu
@@ -70,7 +70,7 @@ void MvCooGradKernel(const Context &dev_ctx,
    // InferMeta of SparseCooTensor 'dx', CreateLikeInferMeta
    EmptyLikeCooKernel<T, Context>(dev_ctx, x, dx);
    auto config = phi::backends::gpu::GetGpuLaunchConfig1D(dev_ctx, dx->nnz());
-    PD_VISIT_INTEGRAL_TYPES(
+    PD_VISIT_BASE_INTEGRAL_TYPES(
        dx->non_zero_indices().dtype(), "MvCooGradKernel", ([&] {
          MvCooGradGpuKernel<T>
              <<<config.block_per_grid.x,
@@ -117,7 +117,7 @@ void MvCsrGradKernel(const Context &dev_ctx,
    int col_number = dx->dims()[1];
    auto config = phi::backends::gpu::GetGpuLaunchConfig2D(
        dev_ctx, col_number, row_number);
-    PD_VISIT_INTEGRAL_TYPES(
+    PD_VISIT_BASE_INTEGRAL_TYPES(
        dx->non_zero_crows().dtype(), "MvCsrGradKernel", ([&] {
          MvCsrGradGpuKernel<T>
              <<<config.block_per_grid.x,

--- a/paddle/phi/kernels/sparse/gpu/pool_grad_kernel.cu
+++ b/paddle/phi/kernels/sparse/gpu/pool_grad_kernel.cu
@@ -116,7 +116,7 @@ void MaxPoolCooGradKernel(const Context& dev_ctx,
                          const SparseCooTensor& out_grad,
                          const std::vector<int>& kernel_sizes,
                          SparseCooTensor* x_grad) {
-  PD_VISIT_INTEGRAL_TYPES(
+  PD_VISIT_BASE_INTEGRAL_TYPES(
      x.non_zero_indices().dtype(), "MaxPoolCooGradGPUKernel", ([&] {
        MaxPoolCooGradGPUKernel<T, data_t>(
            dev_ctx, x, rulebook, counter, out, out_grad, kernel_sizes, x_grad);

--- a/paddle/phi/kernels/sparse/gpu/pool_kernel.cu
+++ b/paddle/phi/kernels/sparse/gpu/pool_kernel.cu
@@ -139,7 +139,7 @@ void MaxPoolCooKernel(const Context& dev_ctx,
                      SparseCooTensor* out,
                      DenseTensor* rulebook,
                      DenseTensor* counter) {
-  PD_VISIT_INTEGRAL_TYPES(
+  PD_VISIT_BASE_INTEGRAL_TYPES(
      x.non_zero_indices().dtype(), "MaxPoolCooGPUKernel", ([&] {
        MaxPoolCooGPUKernel<T, data_t>(dev_ctx,
                                       x,

--- a/paddle/phi/kernels/sparse/gpu/softmax_grad_kernel.cu
+++ b/paddle/phi/kernels/sparse/gpu/softmax_grad_kernel.cu
@@ -92,7 +92,7 @@ void SoftmaxCsrGradKernel(const Context& dev_ctx,
  dim3 grid((total_row_number + 3) / 4);
  dim3 block(32, 4);
-  PD_VISIT_INTEGRAL_TYPES(
+  PD_VISIT_BASE_INTEGRAL_TYPES(
      out.non_zero_crows().dtype(), "SoftmaxCsrGradKernel", ([&] {
        SoftmaxGradGpuKernel<T, data_t><<<grid, block, 0, dev_ctx.stream()>>>(
            out.non_zero_crows().data<data_t>(),

--- a/paddle/phi/kernels/sparse/gpu/softmax_kernel.cu
+++ b/paddle/phi/kernels/sparse/gpu/softmax_kernel.cu
@@ -105,9 +105,9 @@ void SoftmaxCsrKernel(const Context& dev_ctx,
  dim3 grid((total_row_number + 3) / 4);
  dim3 block(32, 4);
-  PD_VISIT_INTEGRAL_TYPES(x.non_zero_crows().dtype(), "CsrSoftmaxKernel", ([&] {
+  PD_VISIT_BASE_INTEGRAL_TYPES(
-                            SoftmaxGpuKernel<T, data_t>
+      x.non_zero_crows().dtype(), "CsrSoftmaxKernel", ([&] {
-                                <<<grid, block, 0, dev_ctx.stream()>>>(
+        SoftmaxGpuKernel<T, data_t><<<grid, block, 0, dev_ctx.stream()>>>(
            x.non_zero_crows().data<data_t>(),
            x.non_zero_elements().data<T>(),
            out->mutable_non_zero_elements()->data<T>(),

--- a/paddle/phi/kernels/sparse/gpu/sparse_utils_kernel.cu
+++ b/paddle/phi/kernels/sparse/gpu/sparse_utils_kernel.cu
@@ -277,7 +277,7 @@ template <typename T, typename Context>
 void SparseCsrToCooKernel(const Context& dev_ctx,
                          const SparseCsrTensor& x,
                          SparseCooTensor* out) {
-  PD_VISIT_INTEGRAL_TYPES(
+  PD_VISIT_BASE_INTEGRAL_TYPES(
      x.non_zero_crows().dtype(), "SparseCsrToCooGPUKernel", ([&] {
        SparseCsrToCooGPUKernel<T, data_t>(dev_ctx, x, out);
      }));
@@ -421,7 +421,7 @@ template <typename T, typename Context>
 void SparseCooToCsrKernel(const Context& dev_ctx,
                          const SparseCooTensor& x,
                          SparseCsrTensor* out) {
-  PD_VISIT_INTEGRAL_TYPES(
+  PD_VISIT_BASE_INTEGRAL_TYPES(
      x.non_zero_indices().dtype(), "SparseCooToCsrGPUKernel", ([&] {
        SparseCooToCsrGPUKernel<T, data_t>(dev_ctx, x, out);
      }));
@@ -510,7 +510,7 @@ template <typename T, typename Context>
 void SparseCooToDenseKernel(const Context& dev_ctx,
                            const SparseCooTensor& x,
                            DenseTensor* out) {
-  PD_VISIT_INTEGRAL_TYPES(
+  PD_VISIT_BASE_INTEGRAL_TYPES(
      x.non_zero_indices().dtype(), "SparseCooToDenseGPUKernel", ([&] {
        SparseCooToDenseGPUKernel<T, data_t>(dev_ctx, x, out);
      }));

--- a/python/paddle/fluid/tests/unittests/test_sparse_unary_op.py
+++ b/python/paddle/fluid/tests/unittests/test_sparse_unary_op.py
@@ -160,7 +160,7 @@ class TestSparseUnary(unittest.TestCase):
    def test_sparse_cast(self):
        self.compare_with_dense_two_attr(paddle.cast,
-                                         paddle.incubate.sparse.cast, 'int16',
+                                         paddle.incubate.sparse.cast, 'int32',
                                         'float32')
        self.compare_with_dense_two_attr(paddle.cast,
                                         paddle.incubate.sparse.cast, 'int32',