[cherry-pick] remove if constexpr(), which is not supported on gcc54 (#50421)

att， cherry-pick #48563

[cherry-pick] remove if constexpr(), which is not supported on gcc54 (#50421)
att， cherry-pick #48563
913f40ee · zhangkaihuo · GitHub · eb610740 · 913f40ee · 913f40ee
Showing with 73 addition and 54 deletion

paddle/phi/kernels/sparse/gpu/conv_kernel.cu paddle/phi/kernels/sparse/gpu/conv_kernel.cu +12 -54

paddle/phi/kernels/sparse/gpu/gather_gemm_scatter.h paddle/phi/kernels/sparse/gpu/gather_gemm_scatter.h +61 -0

未找到文件。
--- a/paddle/phi/kernels/sparse/gpu/conv_kernel.cu
+++ b/paddle/phi/kernels/sparse/gpu/conv_kernel.cu
@@ -150,31 +150,7 @@ void Conv3dCooGPUKernel(const GPUContext& dev_ctx,
      const IntT* gather_indices = rulebook_ptr + h_offsets_ptr[i];
      const IntT* scatter_indices =
          rulebook_ptr + rulebook_len + h_offsets_ptr[i];
+      dispatchKernel(dev_ctx,
-      if constexpr (std::is_same<T, phi::dtype::float16>::value &&
-                    std::is_same<IntT, int32_t>::value) {
-        fp16_gather_gemm_scatter gather_gemm_scatter =
-            getBestFp16Kernel(M, N, K);
-        gather_gemm_scatter(
-            dev_ctx,
-            reinterpret_cast<const cutlass::half_t*>(
-                x.non_zero_elements().data<T>()),
-            reinterpret_cast<const cutlass::half_t*>(tmp_kernel_ptr),
-            reinterpret_cast<cutlass::half_t*>(out_values_ptr),
-            reinterpret_cast<cutlass::half_t*>(out_values_ptr),
-            M,
-            N,
-            K,
-            static_cast<const int32_t*>(gather_indices),
-            static_cast<const int32_t*>(scatter_indices),
-            static_cast<cutlass::half_t>(1),
-            static_cast<cutlass::half_t>(1));
-      }
-      if constexpr (std::is_same<T, float>::value &&
-                    std::is_same<IntT, int32_t>::value) {
-        fp32_gather_gemm_scatter gather_gemm_scatter =
-            getBestFp32Kernel(M, N, K, dev_ctx.GetComputeCapability());
-        gather_gemm_scatter(dev_ctx,
                     x.non_zero_elements().data<T>(),
                     tmp_kernel_ptr,
                     out_values_ptr,
@@ -184,26 +160,8 @@ void Conv3dCooGPUKernel(const GPUContext& dev_ctx,
                     K,
                     gather_indices,
                     scatter_indices,
-                            static_cast<T>(1),
+                     cutlass,
-                            static_cast<T>(1));
+                     x.dtype());
-      }
-      if constexpr (std::is_same<T, double>::value &&
-                    std::is_same<IntT, int32_t>::value) {
-        fp64_gather_gemm_scatter gather_gemm_scatter =
-            getBestFp64Kernel(M, N, K);
-        gather_gemm_scatter(dev_ctx,
-                            x.non_zero_elements().data<T>(),
-                            tmp_kernel_ptr,
-                            out_values_ptr,
-                            out_values_ptr,
-                            M,
-                            N,
-                            K,
-                            gather_indices,
-                            scatter_indices,
-                            static_cast<T>(1),
-                            static_cast<T>(1));
-      }
    }
  } else {
 #endif

--- a/paddle/phi/kernels/sparse/gpu/gather_gemm_scatter.h
+++ b/paddle/phi/kernels/sparse/gpu/gather_gemm_scatter.h
@@ -23,6 +23,7 @@
 #include "cutlass/util/device_memory.h"
 #include "examples/common/helper.h"
 #include "paddle/phi/backends/gpu/gpu_context.h"
+#include "paddle/phi/common/data_type.h"
 namespace phi {
 namespace sparse {
 typedef void (*fp16_gather_gemm_scatter)(const GPUContext& dev_ctx,
@@ -115,6 +116,66 @@ void launchKernel(const GPUContext& dev_ctx,
  CUTLASS_CHECK(status);
  gemm_op(dev_ctx.stream());
 }
+static void dispatchKernel(const GPUContext& dev_ctx,
+                           const void* const a,
+                           const void* const b,
+                           const void* const c,
+                           void* const d,
+                           const int m,
+                           const int n,
+                           const int k,
+                           const void* a_indices,
+                           const void* c_d_indices,
+                           const bool cutlass,
+                           const phi::DataType type) {
+  if (!cutlass) return;
+  if (type == phi::DataType::FLOAT16) {
+    fp16_gather_gemm_scatter gather_gemm_scatter = getBestFp16Kernel(m, n, k);
+    gather_gemm_scatter(dev_ctx,
+                        static_cast<const cutlass::half_t*>(a),
+                        static_cast<const cutlass::half_t*>(b),
+                        static_cast<const cutlass::half_t*>(c),
+                        static_cast<cutlass::half_t*>(d),
+                        m,
+                        n,
+                        k,
+                        static_cast<const int32_t*>(a_indices),
+                        static_cast<const int32_t*>(c_d_indices),
+                        static_cast<cutlass::half_t>(1),
+                        static_cast<cutlass::half_t>(1));
+  } else if (type == phi::DataType::FLOAT32) {
+    fp32_gather_gemm_scatter gather_gemm_scatter =
+        getBestFp32Kernel(m, n, k, dev_ctx.GetComputeCapability());
+    gather_gemm_scatter(dev_ctx,
+                        static_cast<const float*>(a),
+                        static_cast<const float*>(b),
+                        static_cast<const float*>(c),
+                        static_cast<float*>(d),
+                        m,
+                        n,
+                        k,
+                        static_cast<const int32_t*>(a_indices),
+                        static_cast<const int32_t*>(c_d_indices),
+                        static_cast<float>(1),
+                        static_cast<float>(1));
+  } else if (type == phi::DataType::FLOAT64) {
+    fp64_gather_gemm_scatter gather_gemm_scatter = getBestFp64Kernel(m, n, k);
+    gather_gemm_scatter(dev_ctx,
+                        static_cast<const double*>(a),
+                        static_cast<const double*>(b),
+                        static_cast<const double*>(c),
+                        static_cast<double*>(d),
+                        m,
+                        n,
+                        k,
+                        static_cast<const int32_t*>(a_indices),
+                        static_cast<const int32_t*>(c_d_indices),
+                        static_cast<double>(1),
+                        static_cast<double>(1));
+  }
+}
 struct cutlass_tensorop_h1688gemm_128x64_32x2_nn_align8 {
  using Gemm = cutlass::gemm::device::GemmUniversal<
      cutlass::half_t,