phi.cmake 17.9 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35
function(generate_unify_header DIR_NAME)
    set(options "")
    set(oneValueArgs HEADER_NAME SKIP_SUFFIX)
    set(multiValueArgs "")
    cmake_parse_arguments(generate_unify_header "${options}" "${oneValueArgs}"
        "${multiValueArgs}" ${ARGN})

    # get header name and suffix
    set(header_name "${DIR_NAME}")
    list(LENGTH generate_unify_header_HEADER_NAME generate_unify_header_HEADER_NAME_len)
    if(${generate_unify_header_HEADER_NAME_len} GREATER 0)
        set(header_name "${generate_unify_header_HEADER_NAME}")
    endif()
    set(skip_suffix "")
    list(LENGTH generate_unify_header_SKIP_SUFFIX generate_unify_header_SKIP_SUFFIX_len)
    if(${generate_unify_header_SKIP_SUFFIX_len} GREATER 0)
        set(skip_suffix "${generate_unify_header_SKIP_SUFFIX}")
    endif()

    # generate target header file
    set(header_file ${CMAKE_CURRENT_SOURCE_DIR}/include/${header_name}.h)
36
    file(WRITE ${header_file} "// Header file generated by paddle/phi/CMakeLists.txt for external users,\n// DO NOT edit or include it within paddle.\n\n#pragma once\n\n")
37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53

    # get all top-level headers and write into header file
    file(GLOB HEADERS "${CMAKE_CURRENT_SOURCE_DIR}\/${DIR_NAME}\/*.h")
    foreach(header ${HEADERS})
        if("${skip_suffix}" STREQUAL "")
            string(REPLACE "${PADDLE_SOURCE_DIR}\/" "" header "${header}")
            file(APPEND ${header_file} "#include \"${header}\"\n")
        else()
            string(FIND "${header}" "${skip_suffix}.h" skip_suffix_found)
            if(${skip_suffix_found} EQUAL -1)
                string(REPLACE "${PADDLE_SOURCE_DIR}\/" "" header "${header}")
                file(APPEND ${header_file} "#include \"${header}\"\n")
            endif()
        endif()
    endforeach()
    # append header into extension.h
    string(REPLACE "${PADDLE_SOURCE_DIR}\/" "" header_file "${header_file}")
54
    file(APPEND ${phi_extension_header_file} "#include \"${header_file}\"\n")
55 56
endfunction()

57
# call kernel_declare need to make sure whether the target of input exists
58 59 60
function(kernel_declare TARGET_LIST)
    foreach(kernel_path ${TARGET_LIST})
        file(READ ${kernel_path} kernel_impl)
61
        string(REGEX MATCH "(PD_REGISTER_KERNEL|PD_REGISTER_GENERAL_KERNEL)\\([ \t\r\n]*[a-z0-9_]*,[ \t\r\n\/]*[a-z0-9_]*" first_registry "${kernel_impl}")
62
        if (NOT first_registry STREQUAL "")
63 64 65 66 67 68 69
            # some gpu kernel only can run on cuda, not support rocm, so we add this branch
            if (WITH_ROCM)
                string(FIND "${first_registry}" "cuda_only" pos)
                if(pos GREATER 1)
                    continue()
                endif()
            endif()
70
            # parse the first kernel name
71 72
            string(REPLACE "PD_REGISTER_KERNEL(" "" kernel_name "${first_registry}")
            string(REPLACE "PD_REGISTER_GENERAL_KERNEL(" "" kernel_name "${kernel_name}")
73 74
            string(REPLACE "," "" kernel_name "${kernel_name}")
            string(REGEX REPLACE "[ \t\r\n]+" "" kernel_name "${kernel_name}")
75
            string(REGEX REPLACE "//cuda_only" "" kernel_name "${kernel_name}")
76 77 78
            # append kernel declare into declarations.h
            # TODO(chenweihang): default declare ALL_LAYOUT for each kernel
            if (${kernel_path} MATCHES "./cpu\/")
79
                file(APPEND ${kernel_declare_file} "PD_DECLARE_KERNEL(${kernel_name}, CPU, ALL_LAYOUT);\n")
80
            elseif (${kernel_path} MATCHES "./gpu\/")
81
                file(APPEND ${kernel_declare_file} "PD_DECLARE_KERNEL(${kernel_name}, GPU, ALL_LAYOUT);\n")
82
            elseif (${kernel_path} MATCHES "./xpu\/")
83
                file(APPEND ${kernel_declare_file} "PD_DECLARE_KERNEL(${kernel_name}, XPU, ALL_LAYOUT);\n")
84 85
            elseif (${kernel_path} MATCHES "./gpudnn\/")
                file(APPEND ${kernel_declare_file} "PD_DECLARE_KERNEL(${kernel_name}, GPUDNN, ALL_LAYOUT);\n")
86 87
            elseif (${kernel_path} MATCHES "./kps\/")
                file(APPEND ${kernel_declare_file} "PD_DECLARE_KERNEL(${kernel_name}, KPS, ALL_LAYOUT);\n")
88 89
            else ()
                # deal with device independent kernel, now we use CPU temporaary
90
                file(APPEND ${kernel_declare_file} "PD_DECLARE_KERNEL(${kernel_name}, CPU, ALL_LAYOUT);\n")
91 92 93 94 95 96 97 98 99 100
            endif()
        endif()
    endforeach()
endfunction()

function(kernel_library TARGET)
    set(common_srcs)
    set(cpu_srcs)
    set(gpu_srcs)
    set(xpu_srcs)
101
    set(gpudnn_srcs)
102
    set(kps_srcs)
103 104 105
    # parse and save the deps kerenl targets
    set(all_srcs)
    set(kernel_deps)
106

107
    set(oneValueArgs SUB_DIR)
108
    set(multiValueArgs SRCS DEPS)
109 110
    set(target_build_flag 1)

111 112
    cmake_parse_arguments(kernel_library "${options}" "${oneValueArgs}"
        "${multiValueArgs}" ${ARGN})
113

114 115
    # used for cc_library selected_rows dir target
    set(target_suffix "")
116
    if ("${kernel_library_SUB_DIR}" STREQUAL "selected_rows")
117 118
        set(target_suffix "_sr")
    endif()
119 120 121
    if ("${kernel_library_SUB_DIR}" STREQUAL "sparse")
        set(target_suffix "_sp")
    endif()
122 123 124 125 126 127 128

    list(LENGTH kernel_library_SRCS kernel_library_SRCS_len)
    # one kernel only match one impl file in each backend
    if (${kernel_library_SRCS_len} EQUAL 0)
        if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cc)
            list(APPEND common_srcs ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cc)
        endif()
129
        if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/cpu/${TARGET}.cc AND NOT WITH_XPU_KP)
130 131 132 133 134 135
            list(APPEND cpu_srcs ${CMAKE_CURRENT_SOURCE_DIR}/cpu/${TARGET}.cc)
        endif()
        if (WITH_GPU OR WITH_ROCM)
            if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/gpu/${TARGET}.cu)
                list(APPEND gpu_srcs ${CMAKE_CURRENT_SOURCE_DIR}/gpu/${TARGET}.cu)
            endif()
136 137 138
            if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/gpu/${TARGET}.cu.cc)
                list(APPEND gpu_srcs ${CMAKE_CURRENT_SOURCE_DIR}/gpu/${TARGET}.cu.cc)
            endif()
139 140 141
            if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/kps/${TARGET}.cu)
                list(APPEND gpu_srcs ${CMAKE_CURRENT_SOURCE_DIR}/kps/${TARGET}.cu)
            endif()
142 143
            if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/gpudnn/${TARGET}.cu)
                list(APPEND gpudnn_srcs ${CMAKE_CURRENT_SOURCE_DIR}/gpudnn/${TARGET}.cu)
144
            endif()
145 146 147 148 149 150
        endif()
        if (WITH_XPU)
            if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/xpu/${TARGET}.cc)
                list(APPEND xpu_srcs ${CMAKE_CURRENT_SOURCE_DIR}/xpu/${TARGET}.cc)
            endif()
        endif()
151
        if (WITH_XPU_KP)
152 153 154 155 156
            # Change XPU2 file suffix
            # NOTE(chenweihang): If we can be sure that the *.kps suffix is no longer used, it can be copied directly to *.xpu
            file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/kps/${TARGET}.cu DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/kps)
            file(RENAME ${CMAKE_CURRENT_BINARY_DIR}/kps/${TARGET}.cu ${CMAKE_CURRENT_BINARY_DIR}/kps/${TARGET}.kps)
            list(APPEND kps_srcs ${CMAKE_CURRENT_BINARY_DIR}/kps/${TARGET}.kps)
157
        endif()
158 159 160 161
    else()
        # TODO(chenweihang): impl compile by source later
    endif()

162
    list(APPEND all_srcs ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.h)
163 164 165
    if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/impl/${TARGET}_impl.h)
        list(APPEND all_srcs ${CMAKE_CURRENT_SOURCE_DIR}/impl/${TARGET}_impl.h)
    endif()
166 167 168 169
    list(APPEND all_srcs ${common_srcs})
    list(APPEND all_srcs ${cpu_srcs})
    list(APPEND all_srcs ${gpu_srcs})
    list(APPEND all_srcs ${xpu_srcs})
170
    list(APPEND all_srcs ${gpudnn_srcs})
171
    list(APPEND all_srcs ${kps_srcs})
172 173 174 175

    set(all_include_kernels)
    set(all_kernel_name)

176 177
    foreach(src ${all_srcs})
        file(READ ${src} target_content)
178
        # "kernels/xxx"(DenseTensor Kernel) can only include each other, but can't include "SUB_DIR/xxx" (such as selected_rows Kernel)
179
        string(REGEX MATCHALL "#include \"paddle\/phi\/kernels\/[a-z0-9_]+_kernel.h\"" include_kernels ${target_content})
180 181 182 183
        list(APPEND all_include_kernels ${include_kernels})

        # "SUB_DIR/xxx" can include "kernels/xx" and "SUB_DIR/xxx"
        if (NOT "${kernel_library_SUB_DIR}" STREQUAL "")
184
            string(REGEX MATCHALL "#include \"paddle\/phi\/kernels\/${kernel_library_SUB_DIR}\/[a-z0-9_]+_kernel.h\"" include_kernels ${target_content})
185
            list(APPEND all_include_kernels ${include_kernels})
186
        endif()
187

188
        foreach(include_kernel ${all_include_kernels})
189 190
            if ("${kernel_library_SUB_DIR}" STREQUAL "")
                string(REGEX REPLACE "#include \"paddle\/phi\/kernels\/" "" kernel_name ${include_kernel})
191 192
                string(REGEX REPLACE ".h\"" "" kernel_name ${kernel_name})
                list(APPEND all_kernel_name ${kernel_name})
193
            else()
194 195 196 197 198 199 200 201 202 203 204
                # NOTE(dev): we should firstly match kernel_library_SUB_DIR.
                if (${include_kernel} MATCHES "#include \"paddle\/phi\/kernels\/${kernel_library_SUB_DIR}\/")
                    string(REGEX REPLACE "#include \"paddle\/phi\/kernels\/${kernel_library_SUB_DIR}\/" "" kernel_name ${include_kernel})
                    # for selected_rows directory, add ${target_suffix}.
                    string(REGEX REPLACE ".h\"" "${target_suffix}" kernel_name ${kernel_name})
                    list(APPEND all_kernel_name ${kernel_name})
                else()
                    string(REGEX REPLACE "#include \"paddle\/phi\/kernels\/" "" kernel_name ${include_kernel})
                    string(REGEX REPLACE ".h\"" "" kernel_name ${kernel_name})
                    list(APPEND all_kernel_name ${kernel_name})
                endif()
205
            endif()
206
            list(APPEND kernel_deps ${all_kernel_name})
207 208 209
        endforeach()
    endforeach()
    list(REMOVE_DUPLICATES kernel_deps)
210
    list(REMOVE_ITEM kernel_deps ${TARGET}${target_suffix})
211

212 213 214 215
    list(LENGTH common_srcs common_srcs_len)
    list(LENGTH cpu_srcs cpu_srcs_len)
    list(LENGTH gpu_srcs gpu_srcs_len)
    list(LENGTH xpu_srcs xpu_srcs_len)
216
    list(LENGTH gpudnn_srcs gpudnn_srcs_len)
217
    list(LENGTH kps_srcs kps_srcs_len)
218

219
    # kernel source file level
220 221 222
    # level 1: base device kernel (if any device or dnn kernel exists, the cpu_kernel must be exists!!!)
    # - cpu_srcs / gpu_srcs / xpu_srcs / kps_srcs
    # = dnn srcs: gpudnn_srcs
223 224 225
    # level 2: device-independent kernel
    # - common_srcs

226 227 228 229
    set(partial_build_flag 0)
    set(base_build_flag 0)
    if (${common_srcs_len} GREATER 0)
        set(partial_build_flag 1)
230
    endif()
231 232
    if (${cpu_srcs_len} GREATER 0 OR ${gpu_srcs_len} GREATER 0 OR ${xpu_srcs_len} GREATER 0 OR ${kps_srcs_len} GREATER 0)
        set(base_build_flag 1)
233
    endif()
234 235 236

    # gpudnn or mkldnn needs to be compiled separately
    set(dnn_kernels)
237
    if (${gpudnn_srcs_len} GREATER 0)
238
        if (WITH_GPU)
239
            nv_library(${TARGET}_gpudnn${target_suffix} SRCS ${gpudnn_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
240
        elseif (WITH_ROCM)
241
            hip_library(${TARGET}_gpudnn${target_suffix} SRCS ${gpudnn_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
242
        endif()
243
        list(APPEND dnn_kernels ${TARGET}_gpudnn${target_suffix})
244
    endif()
245
    list(LENGTH dnn_kernels dnn_kernels_len)
246

247
    if (${partial_build_flag} EQUAL 0 AND ${base_build_flag} EQUAL 1)
248
        if (WITH_GPU)
249 250 251 252 253 254
            if (${dnn_kernels_len} GREATER 0)
                nv_library(${TARGET}_base${target_suffix} SRCS ${cpu_srcs} ${gpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
                nv_library(${TARGET}${target_suffix} DEPS ${TARGET}_base${target_suffix} ${dnn_kernels})
            else()
                nv_library(${TARGET}${target_suffix} SRCS ${cpu_srcs} ${gpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
            endif()
255
        elseif (WITH_ROCM)
256 257 258 259 260 261
            if (${dnn_kernels_len} GREATER 0)
                hip_library(${TARGET}_base${target_suffix} SRCS ${cpu_srcs} ${gpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
                hip_library(${TARGET}${target_suffix} DEPS ${TARGET}_base${target_suffix} ${dnn_kernels})
            else()
                hip_library(${TARGET}${target_suffix} SRCS ${cpu_srcs} ${gpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
            endif()
262
        elseif (WITH_XPU_KP)
263
            xpu_library(${TARGET}${target_suffix} SRCS ${cpu_srcs} ${kps_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
264
        else()
265
            cc_library(${TARGET}${target_suffix} SRCS ${cpu_srcs} ${xpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
266
        endif()
267 268 269 270 271 272 273 274 275 276 277 278 279 280 281
    elseif (${partial_build_flag} EQUAL 1 AND ${base_build_flag} EQUAL 1)
        if (WITH_GPU)
            nv_library(${TARGET}_base${target_suffix} SRCS ${cpu_srcs} ${gpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
            nv_library(${TARGET}${target_suffix} SRCS ${common_srcs} DEPS ${TARGET}_base${target_suffix} ${dnn_kernels})
        elseif (WITH_ROCM)
            hip_library(${TARGET}_base${target_suffix} SRCS ${cpu_srcs} ${gpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
            hip_library(${TARGET}${target_suffix} SRCS ${common_srcs} DEPS ${TARGET}_base${target_suffix} ${dnn_kernels})
        elseif (WITH_XPU_KP)
            xpu_library(${TARGET}_base${target_suffix} SRCS ${cpu_srcs} ${kps_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
            xpu_library(${TARGET}${target_suffix} SRCS ${common_srcs} DEPS ${TARGET}_base${target_suffix})
        else()
            cc_library(${TARGET}_base${target_suffix} SRCS ${cpu_srcs} ${xpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
            cc_library(${TARGET}${target_suffix} SRCS ${common_srcs} DEPS ${TARGET}_base${target_suffix})
        endif()
    elseif (${partial_build_flag} EQUAL 1 AND ${base_build_flag} EQUAL 0)
282
        if (WITH_GPU)
283
            nv_library(${TARGET}${target_suffix} SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
284
        elseif (WITH_ROCM)
285
            hip_library(${TARGET}${target_suffix} SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
286
        elseif (WITH_XPU_KP)
287
            xpu_library(${TARGET}${target_suffix} SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
288
        else()
289
            cc_library(${TARGET}${target_suffix} SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
290 291
        endif()
    else()
292
        set(target_build_flag 0)
293 294
    endif()

295 296
    if (${target_build_flag} EQUAL 1)
        if (${common_srcs_len} GREATER 0 OR ${cpu_srcs_len} GREATER 0 OR
297
            ${gpu_srcs_len} GREATER 0 OR ${xpu_srcs_len} GREATER 0 OR ${kps_srcs_len} GREATER 0 OR
298
            ${gpudnn_srcs_len} GREATER 0)
299 300
            # append target into PHI_KERNELS property
            get_property(phi_kernels GLOBAL PROPERTY PHI_KERNELS)
301
            set(phi_kernels ${phi_kernels} ${TARGET}${target_suffix})
302 303
            set_property(GLOBAL PROPERTY PHI_KERNELS ${phi_kernels})
        endif()
304

305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322
        # parse kernel name and auto generate kernel declaration
        # here, we don't need to check WITH_XXX, because if not WITH_XXX, the
        # xxx_srcs_len will be equal to 0
        if (${common_srcs_len} GREATER 0)
            kernel_declare(${common_srcs})
        endif()
        if (${cpu_srcs_len} GREATER 0)
            kernel_declare(${cpu_srcs})
        endif()
        if (${gpu_srcs_len} GREATER 0)
            kernel_declare(${gpu_srcs})
        endif()
        if (${xpu_srcs_len} GREATER 0)
            kernel_declare(${xpu_srcs})
        endif()
        if (${gpudnn_srcs_len} GREATER 0)
            kernel_declare(${gpudnn_srcs})
        endif()
323 324 325
        if (${kps_srcs_len} GREATER 0)
            kernel_declare(${kps_srcs})
        endif()
326
    endif()
327 328 329 330
endfunction()

function(register_kernels)
    set(options "")
331
    set(oneValueArgs SUB_DIR)
332 333 334 335 336 337 338 339 340 341 342 343
    set(multiValueArgs EXCLUDES DEPS)
    cmake_parse_arguments(register_kernels "${options}" "${oneValueArgs}"
        "${multiValueArgs}" ${ARGN})

    file(GLOB KERNELS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*_kernel.h")
    string(REPLACE ".h" "" KERNELS "${KERNELS}")
    list(LENGTH register_kernels_DEPS register_kernels_DEPS_len)

    foreach(target ${KERNELS})
        list(FIND register_kernels_EXCLUDES ${target} _index)
        if (${_index} EQUAL -1)
            if (${register_kernels_DEPS_len} GREATER 0)
344
                kernel_library(${target} DEPS ${register_kernels_DEPS} SUB_DIR ${register_kernels_SUB_DIR})
345
            else()
346
                kernel_library(${target} SUB_DIR ${register_kernels_SUB_DIR})
347 348 349 350
            endif()
        endif()
    endforeach()
endfunction()
351 352 353

function(append_op_util_declare TARGET)
    file(READ ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET} target_content)
354 355 356
    string(REGEX MATCH "(PD_REGISTER_BASE_KERNEL_NAME|PD_REGISTER_ARG_MAPPING_FN)\\([ \t\r\n]*[a-z0-9_]*" util_registrar "${target_content}")
    string(REPLACE "PD_REGISTER_ARG_MAPPING_FN" "PD_DECLARE_ARG_MAPPING_FN" util_declare "${util_registrar}")
    string(REPLACE "PD_REGISTER_BASE_KERNEL_NAME" "PD_DECLARE_BASE_KERNEL_NAME" util_declare "${util_declare}")
357
    string(APPEND util_declare ");\n")
358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376
    file(APPEND ${op_utils_header} "${util_declare}")
endfunction()

function(register_op_utils TARGET_NAME)
    set(utils_srcs)
    set(options "")
    set(oneValueArgs "")
    set(multiValueArgs EXCLUDES DEPS)
    cmake_parse_arguments(register_op_utils "${options}" "${oneValueArgs}"
        "${multiValueArgs}" ${ARGN})

    file(GLOB SIGNATURES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*_sig.cc")
    foreach(target ${SIGNATURES})
        append_op_util_declare(${target})
        list(APPEND utils_srcs ${CMAKE_CURRENT_SOURCE_DIR}/${target})
    endforeach()

    cc_library(${TARGET_NAME} SRCS ${utils_srcs} DEPS ${register_op_utils_DEPS})
endfunction()