operators.cmake 17.9 KB
Newer Older
1 2
# CMake file `unity_build` is used to handle Unity Build compilation.
include(unity_build)
W
Wu Yi 已提交
3 4 5 6 7 8 9 10 11 12
set(PART_CUDA_KERNEL_FILES)
function(op_library TARGET)
    # op_library is a function to create op library. The interface is same as
    # cc_library. But it handle split GPU/CPU code and link some common library
    # for ops.
    set(cc_srcs)
    set(cu_srcs)
    set(hip_cu_srcs)
    set(miopen_hip_cc_srcs)
    set(cu_cc_srcs)
13
    set(xpu_cc_srcs)
14
    set(npu_cc_srcs)
W
Wu Yi 已提交
15
    set(cudnn_cu_cc_srcs)
L
liym27 已提交
16
    set(cudnn_cu_srcs)
W
Wu Yi 已提交
17 18 19
    set(CUDNN_FILE)
    set(mkldnn_cc_srcs)
    set(MKLDNN_FILE)
20
    set(op_common_deps operator op_registry math_function layer common_infer_shape_functions)
21 22 23
    if (WITH_ASCEND_CL)
      set(op_common_deps ${op_common_deps} npu_op_runner)
    endif()
24 25
    # Option `UNITY` is used to specify that operator `TARGET` will compiles with Unity Build.
    set(options UNITY)
W
Wu Yi 已提交
26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55
    set(oneValueArgs "")
    set(multiValueArgs SRCS DEPS)
    set(pybind_flag 0)
    cmake_parse_arguments(op_library "${options}" "${oneValueArgs}"
            "${multiValueArgs}" ${ARGN})

    list(LENGTH op_library_SRCS op_library_SRCS_len)
    if (${op_library_SRCS_len} EQUAL 0)
        if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cc)
            list(APPEND cc_srcs ${TARGET}.cc)
        endif()
        if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cu.cc)
            list(APPEND cu_cc_srcs ${TARGET}.cu.cc)
        endif()
        if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cu)
            list(APPEND cu_srcs ${TARGET}.cu)
        endif()
        if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.part.cu)
            set(PART_CUDA_KERNEL_FILES ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.part.cu
                    ${PART_CUDA_KERNEL_FILES} PARENT_SCOPE)
            list(APPEND cu_srcs ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.part.cu)
        endif()

        if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.hip.cu)
            list(APPEND hip_cu_srcs ${TARGET}.hip.cu)
        endif()
        string(REPLACE "_op" "_cudnn_op" CUDNN_FILE "${TARGET}")
        if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${CUDNN_FILE}.cu.cc)
            list(APPEND cudnn_cu_cc_srcs ${CUDNN_FILE}.cu.cc)
        endif()
L
liym27 已提交
56 57 58
        if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${CUDNN_FILE}.cu)
            list(APPEND cudnn_cu_srcs ${CUDNN_FILE}.cu)
        endif()
Y
Y_Xuan 已提交
59 60 61 62 63 64 65 66 67 68 69 70
        if(WITH_ROCM_PLATFORM)
            if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.hip.cu.cc)
                list(APPEND hip_cu_cc_srcs ${TARGET}.hip.cu.cc)
            endif()
            if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.hip.cu)
                list(APPEND hip_cu_srcs ${TARGET}.hip.cu)
            endif()
            if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.part.hip.cu)
                set(PART_CUDA_KERNEL_FILES ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.part.hip.cu
                        ${PART_CUDA_KERNEL_FILES} PARENT_SCOPE)
                list(APPEND hip_cu_srcs ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.part.hip.cu)
            endif()
W
Wu Yi 已提交
71
            string(REPLACE "_op" "_miopen_op" MIOPEN_FILE "${TARGET}")
Y
Y_Xuan 已提交
72 73 74 75 76
            if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${MIOPEN_FILE}.hip.cu.cc)
                list(APPEND miopen_hip_cu_cc_srcs ${MIOPEN_FILE}.hip.cu.cc)
            endif()
            if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${MIOPEN_FILE}.hip.cu)
                list(APPEND miopen_hip_cu_srcs ${MIOPEN_FILE}.hip.cu)
W
Wu Yi 已提交
77 78 79 80
            endif()
        endif()
        if(WITH_MKLDNN)
            string(REPLACE "_op" "_mkldnn_op" MKLDNN_FILE "${TARGET}")
81 82
            if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/mkldnn/${MKLDNN_FILE}.cc)
                list(APPEND mkldnn_cc_srcs mkldnn/${MKLDNN_FILE}.cc)
W
Wu Yi 已提交
83 84
            endif()
        endif()
85
        if(WITH_XPU)
86 87 88
            string(REPLACE "_op" "_op_xpu" XPU_FILE "${TARGET}")
            if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${XPU_FILE}.cc)
                list(APPEND xpu_cc_srcs ${XPU_FILE}.cc)
89 90
            endif()
        endif()
91 92 93 94 95 96
        if(WITH_ASCEND_CL)
            string(REPLACE "_op" "_op_npu" NPU_FILE "${TARGET}")
            if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${NPU_FILE}.cc)
                list(APPEND npu_cc_srcs ${NPU_FILE}.cc)
            endif()
        endif()
W
Wu Yi 已提交
97 98
    else()
        foreach(src ${op_library_SRCS})
Y
Y_Xuan 已提交
99
            if (WITH_ROCM_PLATFORM AND ${src} MATCHES ".*\\.hip.cu$")
W
Wu Yi 已提交
100
                list(APPEND hip_cu_srcs ${src})
Y
Y_Xuan 已提交
101 102
            elseif(WITH_ROCM_PLATFORM AND ${src} MATCHES ".*\\.hip.cu.cc$")
                list(APPEND hip_cu_cc_srcs ${src})
L
liym27 已提交
103 104
            elseif(${src} MATCHES ".*_cudnn_op.cu$")
                list(APPEND cudnn_cu_srcs ${src})
W
Wu Yi 已提交
105 106 107 108
            elseif (${src} MATCHES ".*\\.cu$")
                list(APPEND cu_srcs ${src})
            elseif(${src} MATCHES ".*_cudnn_op.cu.cc$")
                list(APPEND cudnn_cu_cc_srcs ${src})
Y
Y_Xuan 已提交
109
            elseif(WITH_ROCM_PLATFORM AND ${src} MATCHES ".*_miopen_op.hip.cc$")
W
Wu Yi 已提交
110
                list(APPEND miopen_hip_cc_srcs ${src})
Y
Y_Xuan 已提交
111 112
            elseif(WITH_ROCM_PLATFORM AND ${src} MATCHES ".*_miopen_op.hip.cu$")
                list(APPEND miopen_hip_cu_srcs ${src})
W
Wu Yi 已提交
113 114 115 116
            elseif(WITH_MKLDNN AND ${src} MATCHES ".*_mkldnn_op.cc$")
                list(APPEND mkldnn_cc_srcs ${src})
            elseif(${src} MATCHES ".*\\.cu.cc$")
                list(APPEND cu_cc_srcs ${src})
117
            elseif(WITH_XPU AND ${src} MATCHES ".*_op_xpu.cc$")
118
                list(APPEND xpu_cc_srcs ${src})
119 120
            elseif(WITH_ASCEND_CL AND ${src} MATCHES ".*_op_npu.cc$")
                list(APPEND npu_cc_srcs ${src})
W
Wu Yi 已提交
121 122 123 124 125 126 127 128 129 130 131 132 133 134
            elseif(${src} MATCHES ".*\\.cc$")
                list(APPEND cc_srcs ${src})
            else()
                message(FATAL_ERROR "${TARGET} Source file ${src} should only be .cc or .cu")
            endif()
        endforeach()
    endif()

    list(LENGTH cc_srcs cc_srcs_len)
    if (${cc_srcs_len} EQUAL 0)
        message(FATAL_ERROR "The op library ${TARGET} should contains at least one .cc file")
    endif()
    if (WIN32)
    # remove windows unsupported op, because windows has no nccl, no warpctc such ops.
P
peizhilin 已提交
135
    foreach(windows_unsupport_op "nccl_op" "gen_nccl_id_op")
W
Wu Yi 已提交
136 137 138 139 140
        if ("${TARGET}" STREQUAL "${windows_unsupport_op}")
          return()
        endif()
    endforeach()
    endif(WIN32)
141 142 143 144 145 146 147 148 149 150 151 152 153

    # Unity Build relies on global option `WITH_UNITY_BUILD` and local option `UNITY`.
    if(WITH_UNITY_BUILD AND op_library_UNITY)
        # Generate the unity target name by the directory where source files located.
        string(REPLACE "${PADDLE_SOURCE_DIR}/paddle/fluid/" "" UNITY_TARGET ${CMAKE_CURRENT_SOURCE_DIR})
        string(REPLACE "/" "_" UNITY_TARGET ${UNITY_TARGET})
        set(UNITY_TARGET "paddle_${UNITY_TARGET}_unity")
        if(NOT ${UNITY_TARGET} IN_LIST OP_LIBRARY)
            set(OP_LIBRARY ${UNITY_TARGET} ${OP_LIBRARY} CACHE INTERNAL "op libs")
        endif()
    else()
        set(OP_LIBRARY ${TARGET} ${OP_LIBRARY} CACHE INTERNAL "op libs")
    endif()
W
Wu Yi 已提交
154 155 156 157 158 159

    list(LENGTH op_library_DEPS op_library_DEPS_len)
    if (${op_library_DEPS_len} GREATER 0)
        set(DEPS_OPS ${TARGET} ${DEPS_OPS} PARENT_SCOPE)
    endif()
    if (WITH_GPU)
160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175
        # Unity Build relies on global option `WITH_UNITY_BUILD` and local option `UNITY`.
        if(WITH_UNITY_BUILD AND op_library_UNITY)
            # Combine the cc and cu source files.
            compose_unity_target_sources(${UNITY_TARGET} cc ${cc_srcs} ${cu_cc_srcs} ${cudnn_cu_cc_srcs} ${mkldnn_cc_srcs})
            compose_unity_target_sources(${UNITY_TARGET} cu ${cudnn_cu_srcs} ${cu_srcs})
            if(TARGET ${UNITY_TARGET})
                # If `UNITY_TARGET` exists, add source files to `UNITY_TARGET`.
                target_sources(${UNITY_TARGET} PRIVATE ${unity_target_cc_sources} ${unity_target_cu_sources})
            else()
                # If `UNITY_TARGET` does not exist, create `UNITY_TARGET` with source files.
                nv_library(${UNITY_TARGET} SRCS ${unity_target_cc_sources} ${unity_target_cu_sources} DEPS ${op_library_DEPS} ${op_common_deps})
            endif()
            # Add alias library to handle dependencies.
            add_library(${TARGET} ALIAS ${UNITY_TARGET})
        else()
            nv_library(${TARGET} SRCS ${cc_srcs} ${cu_cc_srcs} ${cudnn_cu_cc_srcs} ${cudnn_cu_srcs} ${mkldnn_cc_srcs} ${cu_srcs} DEPS ${op_library_DEPS}
W
Wu Yi 已提交
176
                ${op_common_deps})
177
        endif()
Y
Y_Xuan 已提交
178 179
    elseif (WITH_ROCM_PLATFORM)
        hip_library_ops(${TARGET} SRCS ${cc_srcs} ${hip_cu_cc_srcs} ${hip_cu_srcs} ${miopen_hip_cu_cc_srcs} ${miopen_hip_cu_srcs} ${mkldnn_cc_srcs} DEPS ${op_library_DEPS}
W
Wu Yi 已提交
180 181
                ${op_common_deps})
    else()
182 183 184
        # Unity Build relies on global option `WITH_UNITY_BUILD` and local option `UNITY`.
        if(WITH_UNITY_BUILD AND op_library_UNITY)
            # Combine the cc source files.
185
            compose_unity_target_sources(${UNITY_TARGET} cc ${cc_srcs} ${mkldnn_cc_srcs} ${xpu_cc_srcs} ${npu_cc_srcs})
186 187 188 189 190 191 192 193 194 195
            if(TARGET ${UNITY_TARGET})
                # If `UNITY_TARGET` exists, add source files to `UNITY_TARGET`.
                target_sources(${UNITY_TARGET} PRIVATE ${unity_target_cc_sources})
            else()
                # If `UNITY_TARGET` does not exist, create `UNITY_TARGET` with source files.
                cc_library(${UNITY_TARGET} SRCS ${unity_target_cc_sources} DEPS ${op_library_DEPS} ${op_common_deps})
            endif()
            # Add alias library to handle dependencies.
            add_library(${TARGET} ALIAS ${UNITY_TARGET})
        else()
196
            cc_library(${TARGET} SRCS ${cc_srcs} ${mkldnn_cc_srcs} ${xpu_cc_srcs} ${npu_cc_srcs} DEPS ${op_library_DEPS}
197 198
                ${op_common_deps})
        endif()
W
Wu Yi 已提交
199 200 201
    endif()

    # Define operators that don't need pybind here.
W
wawltor 已提交
202
    foreach(manual_pybind_op "compare_all_op" "compare_op" "logical_op" "nccl_op"
203
"tensor_array_read_write_op" "tensorrt_engine_op" "conv_fusion_op"
204 205
"fusion_transpose_flatten_concat_op" "fusion_conv_inception_op"
"sync_batch_norm_op" "dgc_op" "fused_fc_elementwise_layernorm_op"
206
"skip_layernorm_op" "multihead_matmul_op" "fusion_group_op" "fused_bn_activation_op" "fused_embedding_eltwise_layernorm_op" "fusion_gru_op"
Z
Zhang Ting 已提交
207
"fused_bn_add_activation_op")
W
Wu Yi 已提交
208 209 210 211 212 213 214 215
        if ("${TARGET}" STREQUAL "${manual_pybind_op}")
            set(pybind_flag 1)
        endif()
    endforeach()

    # The registration of USE_OP, please refer to paddle/fluid/framework/op_registry.h.
    # Note that it's enough to just adding one operator to pybind in a *_op.cc file.
    # And for detail pybind information, please see generated paddle/pybind/pybind.h.
216
    set(ORIGINAL_TARGET ${TARGET})
W
Wu Yi 已提交
217 218
    file(READ ${TARGET}.cc TARGET_CONTENT)
    string(REGEX MATCH "REGISTER_OPERATOR\\(.*REGISTER_OPERATOR\\(" multi_register "${TARGET_CONTENT}")
219 220 221
    # [ \t\r\n]* is used for blank characters
    string(REGEX MATCH "REGISTER_OPERATOR\\([ \t\r\n]*[a-z0-9_]*," one_register "${multi_register}")

W
Wu Yi 已提交
222 223 224 225 226
    if (one_register STREQUAL "")
        string(REPLACE "_op" "" TARGET "${TARGET}")
    else ()
        string(REPLACE "REGISTER_OPERATOR(" "" TARGET "${one_register}")
        string(REPLACE "," "" TARGET "${TARGET}")
227 228 229
        # [ \t\r\n]+ is used for blank characters.
        # Here we use '+' instead of '*' since it is a REPLACE operation.
        string(REGEX REPLACE "[ \t\r\n]+" "" TARGET "${TARGET}")
W
Wu Yi 已提交
230 231 232 233 234 235 236 237 238 239 240 241 242 243 244
    endif()

    # pybind USE_NO_KERNEL_OP
    # HACK: if REGISTER_OP_CPU_KERNEL presents the operator must have kernel
    string(REGEX MATCH "REGISTER_OP_CPU_KERNEL" regex_result "${TARGET_CONTENT}")
    string(REPLACE "_op" "" TARGET "${TARGET}")
    if (${pybind_flag} EQUAL 0 AND regex_result STREQUAL "")
        file(APPEND ${pybind_file} "USE_NO_KERNEL_OP(${TARGET});\n")
        set(pybind_flag 1)
    endif()

    # pybind USE_CPU_ONLY_OP
    list(LENGTH cu_srcs cu_srcs_len)
    list(LENGTH cu_cc_srcs cu_cc_srcs_len)
    list(LENGTH mkldnn_cc_srcs mkldnn_cc_srcs_len)
245
    list(LENGTH xpu_cc_srcs xpu_cc_srcs_len)
246
    list(LENGTH npu_cc_srcs npu_cc_srcs_len)
W
Wu Yi 已提交
247 248 249
    list(LENGTH hip_cu_srcs hip_cu_srcs_len)
    list(LENGTH miopen_hip_cc_srcs miopen_hip_cc_srcs_len)
    if (${pybind_flag} EQUAL 0 AND ${mkldnn_cc_srcs_len} EQUAL 0 AND ${cu_srcs_len} EQUAL 0 AND ${cu_cc_srcs_len} EQUAL 0 AND
250
        ${hip_cu_srcs_len} EQUAL 0 AND ${miopen_hip_cc_srcs_len} EQUAL 0 AND ${xpu_cc_srcs_len} EQUAL 0 AND ${npu_cc_srcs_len} EQUAL 0)
W
Wu Yi 已提交
251 252 253 254 255 256 257
        file(APPEND ${pybind_file} "USE_CPU_ONLY_OP(${TARGET});\n")
        set(pybind_flag 1)
    endif()

    # pybind USE_OP_DEVICE_KERNEL for CUDNN
    list(LENGTH cudnn_cu_cc_srcs cudnn_cu_cc_srcs_len)
    if (WITH_GPU AND ${cudnn_cu_cc_srcs_len} GREATER 0)
258 259 260
      if(${TARGET} STREQUAL "activation")
        file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(relu, CUDNN);\n")
      else()
W
Wu Yi 已提交
261
        file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${TARGET}, CUDNN);\n")
262
      endif()
W
Wu Yi 已提交
263 264
    endif()

L
liym27 已提交
265 266 267 268 269 270
    # pybind USE_OP_DEVICE_KERNEL for CUDNN
    list(LENGTH cudnn_cu_srcs cudnn_cu_srcs_len)
    if (WITH_GPU AND ${cudnn_cu_srcs_len} GREATER 0)
        file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${TARGET}, CUDNN);\n")
    endif()

W
Wu Yi 已提交
271
    # pybind USE_OP_DEVICE_KERNEL for MIOPEN
Y
Y_Xuan 已提交
272 273 274 275 276 277 278 279 280 281 282 283 284
    list(LENGTH miopen_hip_cu_cc_srcs miopen_hip_cu_cc_srcs_len)
    if (WITH_ROCM_PLATFORM AND ${miopen_hip_cu_cc_srcs_len} GREATER 0)
      if(${TARGET} STREQUAL "activation")
        file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(relu, CUDNN);\n")
      else()
       file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${TARGET}, CUDNN);\n")
      endif()
    endif()

    # pybind USE_OP_DEVICE_KERNEL for MIOPEN
    list(LENGTH miopen_hip_cu_srcs miopen_hip_cu_srcs_len)
    if (WITH_ROCM_PLATFORM AND ${miopen_hip_cu_srcs_len} GREATER 0)
       file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${TARGET}, CUDNN);\n")
W
Wu Yi 已提交
285 286
    endif()

287 288 289
    if (WITH_XPU AND ${xpu_cc_srcs_len} GREATER 0)
        file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${TARGET}, XPU);\n")
    endif()
290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307

    if (WITH_ASCEND_CL AND ${npu_cc_srcs_len} GREATER 0)
        file(READ ${ORIGINAL_TARGET}_npu.cc TARGET_NPU_CONTENT)
        # It is different from the logic above, becareful
        string(REGEX MATCH "REGISTER_OP_NPU_KERNEL\\(.*" multi_npu_register "${TARGET_NPU_CONTENT}")
        # [ \t\r\n]* is used for blank characters
        string(REGEX MATCH "REGISTER_OP_NPU_KERNEL\\([ \t\r\n]*[a-z0-9_]*," one_npu_register "${multi_npu_register}")

        if (one_npu_register STREQUAL "")
            string(REPLACE "_op" "" NPU_TARGET "${TARGET}")
        else ()
            string(REPLACE "REGISTER_OP_NPU_KERNEL(" "" NPU_TARGET "${one_npu_register}")
            string(REPLACE "," "" NPU_TARGET "${NPU_TARGET}")
            # [ \t\r\n]+ is used for blank characters.
            # Here we use '+' instead of '*' since it is a REPLACE operation.
            string(REGEX REPLACE "[ \t\r\n]+" "" NPU_TARGET "${NPU_TARGET}")
        endif()
        file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${NPU_TARGET}, NPU);\n")
308
    endif()
309

W
Wu Yi 已提交
310 311 312 313 314
    # pybind USE_OP_DEVICE_KERNEL for MKLDNN
    if (WITH_MKLDNN AND ${mkldnn_cc_srcs_len} GREATER 0)
      # Append first implemented MKLDNN activation operator
      if (${MKLDNN_FILE} STREQUAL "activation_mkldnn_op")
        file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(relu, MKLDNN);\n")
X
Xin Pan 已提交
315 316
      elseif(${MKLDNN_FILE} STREQUAL "conv_mkldnn_op")
        file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL_WITH_CUSTOM_TYPE(conv2d, MKLDNN, FP32);\n")
317 318
        file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL_WITH_CUSTOM_TYPE(conv2d, MKLDNN, S8);\n")
        file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL_WITH_CUSTOM_TYPE(conv2d, MKLDNN, U8);\n")
M
Michał Gallus 已提交
319 320 321
      elseif(${MKLDNN_FILE} STREQUAL "transpose_mkldnn_op")
        file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL_WITH_CUSTOM_TYPE(transpose2, MKLDNN, FP32);\n")
        file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL_WITH_CUSTOM_TYPE(transpose2, MKLDNN, S8);\n")
322
        file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL_WITH_CUSTOM_TYPE(transpose2, MKLDNN, U8);\n")
M
Michał Gallus 已提交
323 324 325 326
      elseif(${MKLDNN_FILE} STREQUAL "fc_mkldnn_op")
        file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL_WITH_CUSTOM_TYPE(fc, MKLDNN, FP32);\n")
        file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL_WITH_CUSTOM_TYPE(fc, MKLDNN, S8);\n")
        file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL_WITH_CUSTOM_TYPE(fc, MKLDNN, U8);\n")
W
Wu Yi 已提交
327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352
      else()
        file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${TARGET}, MKLDNN);\n")
      endif()
    endif()

    # pybind USE_OP
    if (${pybind_flag} EQUAL 0)
      # NOTE(*): activation use macro to regist the kernels, set use_op manually.
      if(${TARGET} STREQUAL "activation")
        file(APPEND ${pybind_file} "USE_OP(relu);\n")
      elseif(${TARGET} STREQUAL "fake_dequantize")
        file(APPEND ${pybind_file} "USE_OP(fake_dequantize_max_abs);\n")
      elseif(${TARGET} STREQUAL "fake_quantize")
        file(APPEND ${pybind_file} "USE_OP(fake_quantize_abs_max);\n")
      elseif(${TARGET} STREQUAL "tensorrt_engine_op")
          message(STATUS "Pybind skips [tensorrt_engine_op], for this OP is only used in inference")
      else()
        file(APPEND ${pybind_file} "USE_OP(${TARGET});\n")
      endif()
    endif()
endfunction()


function(register_operators)
    set(options "")
    set(oneValueArgs "")
W
Wu Yi 已提交
353
    set(multiValueArgs EXCLUDES DEPS)
W
Wu Yi 已提交
354 355 356 357 358
    cmake_parse_arguments(register_operators "${options}" "${oneValueArgs}"
            "${multiValueArgs}" ${ARGN})

    file(GLOB OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*_op.cc")
    string(REPLACE "_mkldnn" "" OPS "${OPS}")
359
    string(REPLACE "_xpu" "" OPS "${OPS}")
360
    string(REPLACE "_npu" "" OPS "${OPS}")
W
Wu Yi 已提交
361 362
    string(REPLACE ".cc" "" OPS "${OPS}")
    list(REMOVE_DUPLICATES OPS)
W
Wu Yi 已提交
363
    list(LENGTH register_operators_DEPS register_operators_DEPS_len)
W
Wu Yi 已提交
364 365 366 367

    foreach(src ${OPS})
        list(FIND register_operators_EXCLUDES ${src} _index)
        if (${_index} EQUAL -1)
W
Wu Yi 已提交
368
            if (${register_operators_DEPS_len} GREATER 0)
369
                op_library(${src} UNITY DEPS ${register_operators_DEPS})
W
Wu Yi 已提交
370
            else()
371
                op_library(${src} UNITY)
W
Wu Yi 已提交
372
            endif()
W
Wu Yi 已提交
373 374
        endif()
    endforeach()
375 376 377 378 379 380 381 382

    # Complete the processing of `UNITY_TARGET`.
    if(WITH_UNITY_BUILD)
        finish_unity_target(cc)
        if(WITH_GPU)
            finish_unity_target(cu)
        endif()
    endif()
W
Wu Yi 已提交
383
endfunction()