CMakeLists.txt 5.3 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11
set(kernel_declare_file
    ${PADDLE_BINARY_DIR}/paddle/phi/kernels/declarations.h.tmp
    CACHE INTERNAL "declarations.h file")
set(kernel_declare_file_final
    ${PADDLE_BINARY_DIR}/paddle/phi/kernels/declarations.h)
file(
  WRITE ${kernel_declare_file}
  "// Generated by the paddle/phi/kernels/CMakeLists.txt.  DO NOT EDIT!\n\n#pragma once\n\n"
)
file(APPEND ${kernel_declare_file}
     "#include \"paddle/phi/core/kernel_registry.h\"\n\n")
12 13 14
set(kernel_declare_file_prune
    ${PADDLE_BINARY_DIR}/paddle/phi/kernels/declarations.h.prune
    CACHE INTERNAL "declarations.h file")
15

16
# phi functors and functions called by kernels
C
Chen Weihang 已提交
17
add_subdirectory(funcs)
C
Chen Weihang 已提交
18

19 20 21
# kernel autotune
add_subdirectory(autotune)

22
copy_if_different(${kernel_declare_file} ${kernel_declare_file_final})
23 24 25 26 27

file(GLOB kernel_h "*.h" "selected_rows/*.h" "sparse/*.h" "strings/*.h")
file(GLOB kernel_impl_h "impl/*.h" "selected_rows/impl/*.h")
file(GLOB kernel_primitive_h "primitive/*.h")

28
# fusion ops would be included here
29
file(
30 31
  GLOB kernel_cu
  RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}"
32 33 34 35
  "gpu/*.cu"
  "gpu/*.cu.cc"
  "gpudnn/*.cu"
  "kps/*.cu"
Z
zhangyuqin1998 已提交
36
  "legacy/kps/*.cu"
37
  "legacy/gpu/*.cu"
38 39
  "selected_rows/gpu/*.cu"
  "sparse/gpu/*.cu"
40 41
  "strings/gpu/*.cu"
  "fusion/gpu/*.cu")
42

43 44 45 46
if(APPLE OR WIN32)
  list(REMOVE_ITEM kernel_cu "fusion/gpu/fusion_group_kernel.cu")
endif()

H
huangjiyi 已提交
47 48 49 50
if(NOT WITH_DGC)
  list(REMOVE_ITEM kernel_cu "gpu/dgc_kernel.cu")
endif()

51 52 53 54 55
if(DEFINED REDUCE_INFERENCE_LIB_SIZE)
  list(FILTER kernel_cu EXCLUDE REGEX ".*_grad_kernel\\.cc$")
  list(FILTER kernel_cu EXCLUDE REGEX ".*_grad_kernel\\.cu$")
endif()

56
if(WITH_CUTLASS)
57 58 59 60 61
  execute_process(
    COMMAND ${CMAKE_COMMAND} -E make_directory
            "${CMAKE_CURRENT_SOURCE_DIR}/fusion/cutlass/conv2d/generated"
    COMMAND ${PYTHON_EXECUTABLE} "conv2d_bias_act.py"
    COMMAND ${PYTHON_EXECUTABLE} "conv2d_bias_residual.py"
62
    COMMAND ${PYTHON_EXECUTABLE} "conv2d_depthwise_bias_act.py"
63 64
    WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/fusion/cutlass/conv2d")

65 66 67 68
  execute_process(
    COMMAND
      ${PYTHON_EXECUTABLE}
      ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/cutlass/memory_efficient_attention/generate_kernels.py
69 70 71
      --cuda_arch "${NVCC_ARCH_BIN}"
    RESULT_VARIABLE memory_efficient_attention_gen_res)

72 73 74 75 76 77 78
  execute_process(
    COMMAND
      ${PYTHON_EXECUTABLE}
      ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/cutlass/memory_efficient_attention/generate_variable_forward_kernels.py
      --cuda_arch "${NVCC_ARCH_BIN}"
    RESULT_VARIABLE memory_efficient_attention_gen_res)

79 80 81 82 83 84 85
  if(NOT memory_efficient_attention_gen_res EQUAL 0)
    message(
      FATAL_ERROR
        "The memory efficient attention kernel generation errors with NVCC_ARCH_BIN=${NVCC_ARCH_BIN}"
    )
  endif()

86 87 88
  file(
    GLOB cutlass_cu
    RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}"
89 90
    "fusion/cutlass/conv2d/generated/*.cu"
    "fusion/cutlass/conv2d/*.cu"
91
    "fusion/cutlass/*.cu"
92 93
    "fusion/cutlass/memory_efficient_attention/autogen/impl/*.cu"
    "fusion/cutlass/memory_efficient_attention/autogen_variable/impl/*.cu")
94 95 96
  list(APPEND kernel_cu ${cutlass_cu})
endif()

97 98 99 100 101
if(NOT WITH_CUDNN_FRONTEND)
  list(REMOVE_ITEM kernel_cu
       "fusion/gpu/fused_scale_bias_relu_conv_bnstats_kernel.cu")
endif()

Y
YuanRisheng 已提交
102
set(cc_search_pattern
103 104
    "*.cc"
    "cpu/*.cc"
Z
zhangyuqin1998 已提交
105 106
    "legacy/*.cc"
    "legacy/cpu/*.cc"
107 108 109 110
    "selected_rows/*.cc"
    "selected_rows/cpu/*.cc"
    "sparse/*.cc"
    "sparse/cpu/*.cc"
111 112
    "legacy/*.cc"
    "legacy/cpu/*.cc"
113
    "strings/*.cc"
114 115
    "strings/cpu/*.cc"
    "fusion/*.cc"
W
wanghuancoder 已提交
116
    "stride/*.cc"
117
    "fusion/cpu/*.cc")
Y
YuanRisheng 已提交
118 119 120 121

if(WITH_MKLDNN)
  set(cc_search_pattern ${cc_search_pattern} "legacy/onednn/*.cc" "onednn/*.cc"
                        "fusion/onednn/*.cc")
122 123
endif()

Y
YuanRisheng 已提交
124 125 126 127 128
file(
  GLOB kernel_cc
  RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}"
  ${cc_search_pattern})

129 130 131 132
if(DEFINED REDUCE_INFERENCE_LIB_SIZE)
  list(FILTER kernel_cc EXCLUDE REGEX ".*_grad_kernel\\.cc$")
endif()

133
file(
134 135 136
  GLOB kernel_xpu
  RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}"
  "xpu/*.cc" "legacy/xpu/*.cc" "selected_rows/xpu/*.cc" "fusion/xpu/*.cc"
137
  "sparse/xpu/*.cc")
138 139

if(WITH_GPU OR WITH_ROCM)
140
  collect_srcs(kernels_srcs SRCS ${kernel_cu})
141
  kernel_declare("${kernel_cu}")
142 143 144
endif()

if(WITH_XPU)
145
  if(WITH_XPU_KP)
L
Leo Chen 已提交
146 147
    file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/kps/
         DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/kps/)
Z
zhangyuqin1998 已提交
148 149
    file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/legacy/kps/
         DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/kps/)
L
Leo Chen 已提交
150 151 152 153 154 155
    file(GLOB kernel_xpu_kps "${CMAKE_CURRENT_BINARY_DIR}/kps/*.cu")
    foreach(kernel ${kernel_xpu_kps})
      get_filename_component(name ${kernel} NAME_WE)
      file(RENAME ${kernel} "${CMAKE_CURRENT_BINARY_DIR}/kps/${name}.kps")
    endforeach()
    file(GLOB kernel_xpu_kps "${CMAKE_CURRENT_BINARY_DIR}/kps/*.kps")
Y
YuanRisheng 已提交
156
    collect_generated_srcs(kernels_srcs SRCS ${kernel_xpu_kps})
157 158

    foreach(kernel ${kernel_cc})
Y
YuanRisheng 已提交
159 160
      configure_file(${CMAKE_CURRENT_SOURCE_DIR}/${kernel}
                     ${CMAKE_CURRENT_BINARY_DIR}/${kernel} COPYONLY)
Y
YuanRisheng 已提交
161 162
    endforeach()
    file(GLOB_RECURSE kernel_xpu_cc "${CMAKE_CURRENT_BINARY_DIR}/*.cc")
Y
YuanRisheng 已提交
163 164
    collect_generated_srcs(kernels_srcs SRCS ${kernel_xpu_cc})
    set(kernel_cc "")
165

166
  endif()
167
  collect_srcs(kernels_srcs SRCS ${kernel_xpu})
168 169
  kernel_declare("${kernel_xpu}")
  kernel_declare("${kernel_xpu_kps}")
Y
YuanRisheng 已提交
170
  kernel_declare("${kernel_xpu_cc}")
171 172
endif()

173 174
collect_srcs(kernels_srcs SRCS ${kernel_cc})
kernel_declare("${kernel_cc}")
175 176 177 178

if(NOT "${KERNEL_LIST}" STREQUAL "")
  prune_declaration_h()
endif()