From e7a38f15ec696d3254094f321b02b68514ec1f68 Mon Sep 17 00:00:00 2001 From: umiswing Date: Mon, 5 Jun 2023 14:06:14 +0800 Subject: [PATCH] Add macro SPCONV_WITH_CUTLASS (#54274) --- cmake/external/cutlass.cmake | 1 + paddle/phi/kernels/sparse/gpu/conv_grad_kernel.cu | 14 +++++++------- paddle/phi/kernels/sparse/gpu/conv_kernel.cu | 6 +++--- .../kernels/sparse/gpu/cutlass_generator/common.h | 2 +- .../gather_gemm_scatter_manifest.py | 4 ++-- .../gather_gemm_scatter_operation.py | 2 +- .../phi/kernels/sparse/gpu/gather_gemm_scatter.h | 2 +- 7 files changed, 16 insertions(+), 15 deletions(-) diff --git a/cmake/external/cutlass.cmake b/cmake/external/cutlass.cmake index ae7bf6ce6fa..55aa4ef3b53 100644 --- a/cmake/external/cutlass.cmake +++ b/cmake/external/cutlass.cmake @@ -25,6 +25,7 @@ include_directories( "${THIRD_PARTY_PATH}/cutlass/src/extern_cutlass/tools/util/include/") add_definitions("-DPADDLE_WITH_CUTLASS") +add_definitions("-DSPCONV_WITH_CUTLASS=0") if(NOT PYTHON_EXECUTABLE) find_package(PythonInterp REQUIRED) diff --git a/paddle/phi/kernels/sparse/gpu/conv_grad_kernel.cu b/paddle/phi/kernels/sparse/gpu/conv_grad_kernel.cu index 5128348b5d5..852eda01223 100644 --- a/paddle/phi/kernels/sparse/gpu/conv_grad_kernel.cu +++ b/paddle/phi/kernels/sparse/gpu/conv_grad_kernel.cu @@ -24,7 +24,7 @@ limitations under the License. */ #include "paddle/phi/kernels/funcs/blas/blas.h" #include "paddle/phi/kernels/funcs/math_function.h" #include "paddle/phi/kernels/sparse/gpu/conv.cu.h" -#ifdef PADDLE_WITH_CUTLASS +#if defined(PADDLE_WITH_CUTLASS) && SPCONV_WITH_CUTLASS #include "paddle/phi/kernels/sparse/gpu/gather_gemm_scatter.h" #endif @@ -134,7 +134,7 @@ void Conv3dCooGradGPUKernel(const GPUContext& dev_ctx, phi::backends::gpu::GpuMemsetAsync( out_index_ptr, 0, sizeof(int) * x.nnz() * 2, dev_ctx.stream()); -#ifdef PADDLE_WITH_CUTLASS +#if defined(PADDLE_WITH_CUTLASS) && SPCONV_WITH_CUTLASS bool cutlass = true; if (dev_ctx.GetComputeCapability() < 80) cutlass = false; @@ -177,7 +177,7 @@ void Conv3dCooGradGPUKernel(const GPUContext& dev_ctx, out_channels, out_grad_features_ptr); -#ifdef PADDLE_WITH_CUTLASS +#if defined(PADDLE_WITH_CUTLASS) && SPCONV_WITH_CUTLASS } #endif const T* kernel_ptr = kernel.data(); @@ -195,7 +195,7 @@ void Conv3dCooGradGPUKernel(const GPUContext& dev_ctx, T* tmp_d_x_ptr = d_x_features_ptr + offsets[i] * in_channels; T* tmp_d_kernel_ptr = d_kernel_ptr + i * in_channels * out_channels; -#ifdef PADDLE_WITH_CUTLASS +#if defined(PADDLE_WITH_CUTLASS) && SPCONV_WITH_CUTLASS if (cutlass) { const IntT* gather_x_indices = rulebook_ptr + offsets[i]; const IntT* scatter_x_indices = rulebook_ptr + offsets[i]; @@ -266,13 +266,13 @@ void Conv3dCooGradGPUKernel(const GPUContext& dev_ctx, tmp_kernel_ptr, static_cast(0), tmp_d_x_ptr); -#ifdef PADDLE_WITH_CUTLASS +#if defined(PADDLE_WITH_CUTLASS) && SPCONV_WITH_CUTLASS } #endif } // 4. scatter -#ifdef PADDLE_WITH_CUTLASS +#if defined(PADDLE_WITH_CUTLASS) && SPCONV_WITH_CUTLASS if (!cutlass) { #endif phi::funcs::sparse::ScatterV2(dev_ctx, @@ -284,7 +284,7 @@ void Conv3dCooGradGPUKernel(const GPUContext& dev_ctx, in_channels, 2, x_grad_values_ptr); -#ifdef PADDLE_WITH_CUTLASS +#if defined(PADDLE_WITH_CUTLASS) && SPCONV_WITH_CUTLASS } #endif } diff --git a/paddle/phi/kernels/sparse/gpu/conv_kernel.cu b/paddle/phi/kernels/sparse/gpu/conv_kernel.cu index adefddd5af1..58c2b3bd5a8 100644 --- a/paddle/phi/kernels/sparse/gpu/conv_kernel.cu +++ b/paddle/phi/kernels/sparse/gpu/conv_kernel.cu @@ -23,7 +23,7 @@ limitations under the License. */ #include "paddle/phi/kernels/funcs/scatter.cu.h" #include "paddle/phi/kernels/funcs/sparse/scatter.cu.h" #include "paddle/phi/kernels/sparse/gpu/conv.cu.h" -#ifdef PADDLE_WITH_CUTLASS +#if defined(PADDLE_WITH_CUTLASS) && SPCONV_WITH_CUTLASS #include "paddle/phi/kernels/sparse/gpu/gather_gemm_scatter.h" #endif @@ -159,7 +159,7 @@ void Conv3dCooGPUKernel(const GPUContext& dev_ctx, dev_ctx, x, key, tmp_rulebook, h_counter, out, rulebook, counter); } -#ifdef PADDLE_WITH_CUTLASS +#if defined(PADDLE_WITH_CUTLASS) && SPCONV_WITH_CUTLASS bool mixed_precision = dev_ctx.GetComputeCapability() >= 75 && dev_ctx.GetComputeCapability() < 80 && std::is_same::value; @@ -273,7 +273,7 @@ void Conv3dCooGPUKernel(const GPUContext& dev_ctx, out_channels, 1, out_values_ptr); -#ifdef PADDLE_WITH_CUTLASS +#if defined(PADDLE_WITH_CUTLASS) && SPCONV_WITH_CUTLASS } #endif } diff --git a/paddle/phi/kernels/sparse/gpu/cutlass_generator/common.h b/paddle/phi/kernels/sparse/gpu/cutlass_generator/common.h index 71d9aa3084a..79ddd5cf1b9 100644 --- a/paddle/phi/kernels/sparse/gpu/cutlass_generator/common.h +++ b/paddle/phi/kernels/sparse/gpu/cutlass_generator/common.h @@ -14,7 +14,7 @@ #pragma once -#ifdef PADDLE_WITH_CUTLASS +#if defined(PADDLE_WITH_CUTLASS) && SPCONV_WITH_CUTLASS #include "cutlass/arch/mma.h" #include "cutlass/device_kernel.h" #include "cutlass/epilogue/thread/linear_combination.h" diff --git a/paddle/phi/kernels/sparse/gpu/cutlass_generator/gather_gemm_scatter_manifest.py b/paddle/phi/kernels/sparse/gpu/cutlass_generator/gather_gemm_scatter_manifest.py index ac0b3baf325..a93e042cf22 100644 --- a/paddle/phi/kernels/sparse/gpu/cutlass_generator/gather_gemm_scatter_manifest.py +++ b/paddle/phi/kernels/sparse/gpu/cutlass_generator/gather_gemm_scatter_manifest.py @@ -28,13 +28,13 @@ class GatherGemmScatterEmitOperationKindLibrary(EmitOperationKindLibrary): self.emitters = { OperationKind.Gemm: EmitGatherGemmScatterConfigurationLibrary } - self.header_template = "#pragma once\n#ifdef PADDLE_WITH_CUTLASS\n#include \"paddle/phi/kernels/sparse/gpu/cutlass_generator/common.h\"\n" + self.header_template = "#pragma once\n#if defined(PADDLE_WITH_CUTLASS) && SPCONV_WITH_CUTLASS\n#include \"paddle/phi/kernels/sparse/gpu/cutlass_generator/common.h\"\n" self.configuration_header_template = """ /* Generated by gemm_operation.py - Do not edit. */ #pragma once -#ifdef PADDLE_WITH_CUTLASS +#if defined(PADDLE_WITH_CUTLASS) && SPCONV_WITH_CUTLASS """ self.entry_template = "" self.configuration_prototype_template = "" diff --git a/paddle/phi/kernels/sparse/gpu/cutlass_generator/gather_gemm_scatter_operation.py b/paddle/phi/kernels/sparse/gpu/cutlass_generator/gather_gemm_scatter_operation.py index 8cfa2c3373b..22b259742f4 100644 --- a/paddle/phi/kernels/sparse/gpu/cutlass_generator/gather_gemm_scatter_operation.py +++ b/paddle/phi/kernels/sparse/gpu/cutlass_generator/gather_gemm_scatter_operation.py @@ -232,7 +232,7 @@ class EmitGatherGemmScatterConfigurationLibrary(EmitGemmConfigurationLibrary): Generated by gemm_operation.py - Do not edit. */ #pragma once -#ifdef PADDLE_WITH_CUTLASS +#if defined(PADDLE_WITH_CUTLASS) && SPCONV_WITH_CUTLASS """ self.namespace_template = """ diff --git a/paddle/phi/kernels/sparse/gpu/gather_gemm_scatter.h b/paddle/phi/kernels/sparse/gpu/gather_gemm_scatter.h index 73b7c3f3003..9cfad6db7e1 100644 --- a/paddle/phi/kernels/sparse/gpu/gather_gemm_scatter.h +++ b/paddle/phi/kernels/sparse/gpu/gather_gemm_scatter.h @@ -14,7 +14,7 @@ #pragma once #include -#ifdef PADDLE_WITH_CUTLASS +#if defined(PADDLE_WITH_CUTLASS) && SPCONV_WITH_CUTLASS #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/common/data_type.h" #include "paddle/phi/kernels/autotune/auto_tune_base.h" -- GitLab