pten.cmake 14.7 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35
function(generate_unify_header DIR_NAME)
    set(options "")
    set(oneValueArgs HEADER_NAME SKIP_SUFFIX)
    set(multiValueArgs "")
    cmake_parse_arguments(generate_unify_header "${options}" "${oneValueArgs}"
        "${multiValueArgs}" ${ARGN})

    # get header name and suffix
    set(header_name "${DIR_NAME}")
    list(LENGTH generate_unify_header_HEADER_NAME generate_unify_header_HEADER_NAME_len)
    if(${generate_unify_header_HEADER_NAME_len} GREATER 0)
        set(header_name "${generate_unify_header_HEADER_NAME}")
    endif()
    set(skip_suffix "")
    list(LENGTH generate_unify_header_SKIP_SUFFIX generate_unify_header_SKIP_SUFFIX_len)
    if(${generate_unify_header_SKIP_SUFFIX_len} GREATER 0)
        set(skip_suffix "${generate_unify_header_SKIP_SUFFIX}")
    endif()

    # generate target header file
    set(header_file ${CMAKE_CURRENT_SOURCE_DIR}/include/${header_name}.h)
36
    file(WRITE ${header_file} "// Header file generated by paddle/phi/CMakeLists.txt for external users,\n// DO NOT edit or include it within paddle.\n\n#pragma once\n\n")
37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56

    # get all top-level headers and write into header file
    file(GLOB HEADERS "${CMAKE_CURRENT_SOURCE_DIR}\/${DIR_NAME}\/*.h")
    foreach(header ${HEADERS})
        if("${skip_suffix}" STREQUAL "")
            string(REPLACE "${PADDLE_SOURCE_DIR}\/" "" header "${header}")
            file(APPEND ${header_file} "#include \"${header}\"\n")
        else()
            string(FIND "${header}" "${skip_suffix}.h" skip_suffix_found)
            if(${skip_suffix_found} EQUAL -1)
                string(REPLACE "${PADDLE_SOURCE_DIR}\/" "" header "${header}")
                file(APPEND ${header_file} "#include \"${header}\"\n")
            endif()
        endif()
    endforeach()
    # append header into extension.h
    string(REPLACE "${PADDLE_SOURCE_DIR}\/" "" header_file "${header_file}")
    file(APPEND ${pten_extension_header_file} "#include \"${header_file}\"\n")
endfunction()

57
# call kernel_declare need to make sure whether the target of input exists
58 59 60
function(kernel_declare TARGET_LIST)
    foreach(kernel_path ${TARGET_LIST})
        file(READ ${kernel_path} kernel_impl)
61
        # TODO(chenweihang): rename PT_REGISTER_KERNEL to PT_REGISTER_KERNEL
62
        # NOTE(chenweihang): now we don't recommend to use digit in kernel name
63
        string(REGEX MATCH "(PT_REGISTER_KERNEL|PT_REGISTER_GENERAL_KERNEL)\\([ \t\r\n]*[a-z0-9_]*," first_registry "${kernel_impl}")
64 65
        if (NOT first_registry STREQUAL "")
            # parse the first kernel name
66
            string(REPLACE "PT_REGISTER_KERNEL(" "" kernel_name "${first_registry}")
67
            string(REPLACE "PT_REGISTER_GENERAL_KERNEL(" "" kernel_name "${kernel_name}")
68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
            string(REPLACE "," "" kernel_name "${kernel_name}")
            string(REGEX REPLACE "[ \t\r\n]+" "" kernel_name "${kernel_name}")
            # append kernel declare into declarations.h
            # TODO(chenweihang): default declare ALL_LAYOUT for each kernel
            if (${kernel_path} MATCHES "./cpu\/")
                file(APPEND ${kernel_declare_file} "PT_DECLARE_KERNEL(${kernel_name}, CPU, ALL_LAYOUT);\n")
            elseif (${kernel_path} MATCHES "./gpu\/")
                file(APPEND ${kernel_declare_file} "PT_DECLARE_KERNEL(${kernel_name}, GPU, ALL_LAYOUT);\n")
            elseif (${kernel_path} MATCHES "./xpu\/")
                file(APPEND ${kernel_declare_file} "PT_DECLARE_KERNEL(${kernel_name}, XPU, ALL_LAYOUT);\n")
            else ()
                # deal with device independent kernel, now we use CPU temporaary
                file(APPEND ${kernel_declare_file} "PT_DECLARE_KERNEL(${kernel_name}, CPU, ALL_LAYOUT);\n")
            endif()
        endif()
    endforeach()
endfunction()

function(kernel_library TARGET)
    set(common_srcs)
    set(cpu_srcs)
    set(gpu_srcs)
    set(xpu_srcs)
91
    set(selected_rows_srcs)
92 93 94
    # parse and save the deps kerenl targets
    set(all_srcs)
    set(kernel_deps)
95

96
    set(oneValueArgs SUB_DIR)
97 98 99 100 101 102 103 104 105 106 107 108 109
    set(multiValueArgs SRCS DEPS)
    cmake_parse_arguments(kernel_library "${options}" "${oneValueArgs}"
        "${multiValueArgs}" ${ARGN})

    list(LENGTH kernel_library_SRCS kernel_library_SRCS_len)
    # one kernel only match one impl file in each backend
    if (${kernel_library_SRCS_len} EQUAL 0)
        if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cc)
            list(APPEND common_srcs ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cc)
        endif()
        if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/cpu/${TARGET}.cc)
            list(APPEND cpu_srcs ${CMAKE_CURRENT_SOURCE_DIR}/cpu/${TARGET}.cc)
        endif()
110 111 112
        if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/selected_rows/${TARGET}.cc)
            list(APPEND selected_rows_srcs ${CMAKE_CURRENT_SOURCE_DIR}/selected_rows/${TARGET}.cc)
        endif()
113 114 115 116
        if (WITH_GPU OR WITH_ROCM)
            if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/gpu/${TARGET}.cu)
                list(APPEND gpu_srcs ${CMAKE_CURRENT_SOURCE_DIR}/gpu/${TARGET}.cu)
            endif()
117 118 119
            if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/gpu/${TARGET}.cu.cc)
                list(APPEND gpu_srcs ${CMAKE_CURRENT_SOURCE_DIR}/gpu/${TARGET}.cu.cc)
            endif()
120 121 122 123 124 125 126 127 128 129
        endif()
        if (WITH_XPU)
            if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/xpu/${TARGET}.cc)
                list(APPEND xpu_srcs ${CMAKE_CURRENT_SOURCE_DIR}/xpu/${TARGET}.cc)
            endif()
        endif()
    else()
        # TODO(chenweihang): impl compile by source later
    endif()

130
    list(APPEND all_srcs ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.h)
131 132 133
    if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/impl/${TARGET}_impl.h)
        list(APPEND all_srcs ${CMAKE_CURRENT_SOURCE_DIR}/impl/${TARGET}_impl.h)
    endif()
134 135 136 137 138 139
    list(APPEND all_srcs ${common_srcs})
    list(APPEND all_srcs ${cpu_srcs})
    list(APPEND all_srcs ${gpu_srcs})
    list(APPEND all_srcs ${xpu_srcs})
    foreach(src ${all_srcs})
        file(READ ${src} target_content)
140
        string(REGEX MATCHALL "#include \"paddle\/phi\/kernels\/[a-z0-9_]+_kernel.h\"" include_kernels ${target_content})
141
        if ("${kernel_library_SUB_DIR}" STREQUAL "")
142
            string(REGEX MATCHALL "#include \"paddle\/phi\/kernels\/[a-z0-9_]+_kernel.h\"" include_kernels ${target_content})
143
        else()
144
            string(REGEX MATCHALL "#include \"paddle\/phi\/kernels\/${kernel_library_SUB_DIR}\/[a-z0-9_]+_kernel.h\"" include_kernels ${target_content})
145
        endif()
146
        foreach(include_kernel ${include_kernels})
147
        if ("${kernel_library_SUB_DIR}" STREQUAL "")
148
            string(REGEX REPLACE "#include \"paddle\/phi\/kernels\/" "" kernel_name ${include_kernel})
149
        else()
150
            string(REGEX REPLACE "#include \"paddle\/phi\/kernels\/${kernel_library_SUB_DIR}\/" "" kernel_name ${include_kernel})
151
        endif()
152 153 154 155 156 157 158
            string(REGEX REPLACE ".h\"" "" kernel_name ${kernel_name})
            list(APPEND kernel_deps ${kernel_name})
        endforeach()
    endforeach()
    list(REMOVE_DUPLICATES kernel_deps)
    list(REMOVE_ITEM kernel_deps ${TARGET})

159 160 161 162
    list(LENGTH common_srcs common_srcs_len)
    list(LENGTH cpu_srcs cpu_srcs_len)
    list(LENGTH gpu_srcs gpu_srcs_len)
    list(LENGTH xpu_srcs xpu_srcs_len)
163
    list(LENGTH selected_rows_srcs selected_rows_srcs_len)
164

165 166
    # Build Target according different src organization
    if((${cpu_srcs_len} GREATER 0 OR ${gpu_srcs_len} GREATER 0 OR
167
        ${xpu_srcs_len} GREATER 0) AND (${common_srcs_len} GREATER 0 OR
168 169
        ${selected_rows_srcs_len} GREATER 0))
        # If the common_srcs/selected_rows_srcs depends on specific device srcs, build target using this rule.
170 171 172
        if (WITH_GPU)
            if (${cpu_srcs_len} GREATER 0 OR ${gpu_srcs_len} GREATER 0)
                nv_library(${TARGET}_part SRCS ${cpu_srcs} ${gpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
173
                nv_library(${TARGET} SRCS ${common_srcs} ${selected_rows_srcs} DEPS ${TARGET}_part)
174 175 176 177
            endif()
        elseif (WITH_ROCM)
            if (${cpu_srcs_len} GREATER 0 OR ${gpu_srcs_len} GREATER 0)
                hip_library(${TARGET}_part SRCS ${cpu_srcs} ${gpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
178
                hip_library(${TARGET} SRCS ${common_srcs} ${selected_rows_srcs} DEPS ${TARGET}_part)
179 180 181 182
            endif()
        else()
            if (${cpu_srcs_len} GREATER 0 OR ${xpu_srcs_len} GREATER 0)
                cc_library(${TARGET}_part SRCS ${cpu_srcs} ${xpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
183
                cc_library(${TARGET} SRCS ${common_srcs} ${selected_rows_srcs} DEPS ${TARGET}_part)
184 185
            endif()
        endif()
186
    # If there are only specific device srcs, build target using this rule.
187
    elseif (${cpu_srcs_len} GREATER 0 OR ${gpu_srcs_len} GREATER 0 OR ${xpu_srcs_len} GREATER 0)
188
        if (WITH_GPU)
189 190 191
            if (${cpu_srcs_len} GREATER 0 OR ${gpu_srcs_len} GREATER 0)
                nv_library(${TARGET} SRCS ${cpu_srcs} ${gpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
            endif()
192
        elseif (WITH_ROCM)
193 194 195
            if (${cpu_srcs_len} GREATER 0 OR ${gpu_srcs_len} GREATER 0)
                hip_library(${TARGET} SRCS ${cpu_srcs} ${gpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
            endif()
196
        else()
197 198 199
            if (${cpu_srcs_len} GREATER 0 OR ${xpu_srcs_len} GREATER 0)
                cc_library(${TARGET} SRCS ${cpu_srcs} ${xpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
            endif()
200
        endif()
201 202 203 204 205 206 207 208
    # If the selected_rows_srcs depends on common_srcs, build target using this rule.
    elseif (${common_srcs_len} GREATER 0 AND ${selected_rows_srcs_len} GREATER 0)
        if (WITH_GPU)
            nv_library(${TARGET}_part SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
            nv_library(${TARGET} SRCS ${selected_rows_srcs} DEPS ${TARGET}_part)
        elseif (WITH_ROCM)
            hip_library(${TARGET}_part SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
            hip_library(${TARGET} SRCS ${selected_rows_srcs} DEPS ${TARGET}_part)
209
        else()
210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230
            cc_library(${TARGET}_part SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
            cc_library(${TARGET} SRCS ${selected_rows_srcs} DEPS ${TARGET}_part)
        endif()
    # If there are only common_srcs or selected_rows_srcs, build target using below rules.
    elseif (${common_srcs_len} GREATER 0)
        if (WITH_GPU)
            nv_library(${TARGET} SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
        elseif (WITH_ROCM)
            hip_library(${TARGET} SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
        else()
            cc_library(${TARGET} SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
        endif()
    elseif (${selected_rows_srcs_len} GREATER 0)
        if (WITH_GPU)
            nv_library(${TARGET} SRCS ${selected_rows_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
        elseif (WITH_ROCM)
            hip_library(${TARGET} SRCS ${selected_rows_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
        else()
            cc_library(${TARGET} SRCS ${selected_rows_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
        endif()
    else()
231
        message(FATAL_ERROR "Cannot find any implementation for ${TARGET}")
232 233
    endif()

C
Chen Weihang 已提交
234
    if (${common_srcs_len} GREATER 0 OR ${cpu_srcs_len} GREATER 0 OR
235
        ${gpu_srcs_len} GREATER 0 OR ${xpu_srcs_len} GREATER 0 OR
236
        ${selected_rows_srcs_len} GREATER 0)
237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257
        # append target into PTEN_KERNELS property
        get_property(pten_kernels GLOBAL PROPERTY PTEN_KERNELS)
        set(pten_kernels ${pten_kernels} ${TARGET})
        set_property(GLOBAL PROPERTY PTEN_KERNELS ${pten_kernels})
    endif()

    # parse kernel name and auto generate kernel declaration
    # here, we don't need to check WITH_XXX, because if not WITH_XXX, the
    # xxx_srcs_len will be equal to 0
    if (${common_srcs_len} GREATER 0)
        kernel_declare(${common_srcs})
    endif()
    if (${cpu_srcs_len} GREATER 0)
        kernel_declare(${cpu_srcs})
    endif()
    if (${gpu_srcs_len} GREATER 0)
        kernel_declare(${gpu_srcs})
    endif()
    if (${xpu_srcs_len} GREATER 0)
        kernel_declare(${xpu_srcs})
    endif()
258 259 260
    if (${selected_rows_srcs_len} GREATER 0)
        kernel_declare(${selected_rows_srcs})
    endif()
261 262 263 264
endfunction()

function(register_kernels)
    set(options "")
265
    set(oneValueArgs SUB_DIR)
266 267 268 269 270 271 272 273 274 275 276 277
    set(multiValueArgs EXCLUDES DEPS)
    cmake_parse_arguments(register_kernels "${options}" "${oneValueArgs}"
        "${multiValueArgs}" ${ARGN})

    file(GLOB KERNELS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*_kernel.h")
    string(REPLACE ".h" "" KERNELS "${KERNELS}")
    list(LENGTH register_kernels_DEPS register_kernels_DEPS_len)

    foreach(target ${KERNELS})
        list(FIND register_kernels_EXCLUDES ${target} _index)
        if (${_index} EQUAL -1)
            if (${register_kernels_DEPS_len} GREATER 0)
278
                kernel_library(${target} DEPS ${register_kernels_DEPS} SUB_DIR ${register_kernels_SUB_DIR})
279
            else()
280
                kernel_library(${target} SUB_DIR ${register_kernels_SUB_DIR})
281 282 283 284
            endif()
        endif()
    endforeach()
endfunction()
285 286 287

function(append_op_util_declare TARGET)
    file(READ ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET} target_content)
288
    string(REGEX MATCH "(PT_REGISTER_BASE_KERNEL_NAME|PT_REGISTER_ARG_MAPPING_FN)\\([ \t\r\n]*[a-z0-9_]*" util_registrar "${target_content}")
289
    string(REPLACE "PT_REGISTER_ARG_MAPPING_FN" "PT_DECLARE_ARG_MAPPING_FN" util_declare "${util_registrar}")
290
    string(REPLACE "PT_REGISTER_BASE_KERNEL_NAME" "PT_DECLARE_BASE_KERNEL_NAME" util_declare "${util_declare}")
291
    string(APPEND util_declare ");\n")
292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310
    file(APPEND ${op_utils_header} "${util_declare}")
endfunction()

function(register_op_utils TARGET_NAME)
    set(utils_srcs)
    set(options "")
    set(oneValueArgs "")
    set(multiValueArgs EXCLUDES DEPS)
    cmake_parse_arguments(register_op_utils "${options}" "${oneValueArgs}"
        "${multiValueArgs}" ${ARGN})

    file(GLOB SIGNATURES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*_sig.cc")
    foreach(target ${SIGNATURES})
        append_op_util_declare(${target})
        list(APPEND utils_srcs ${CMAKE_CURRENT_SOURCE_DIR}/${target})
    endforeach()

    cc_library(${TARGET_NAME} SRCS ${utils_srcs} DEPS ${register_op_utils_DEPS})
endfunction()