未验证 提交 e52e6d01 编写于 作者: zhouweiwei2014's avatar zhouweiwei2014 提交者: GitHub

merge all phi kernel lib to several big static lib, reduce link command (#42185)

* merge all phi lib to several big static lib

* merge all phi lib to several big static lib
上级 30f65c25
...@@ -20,6 +20,9 @@ else(APPLE AND WITH_ARM) ...@@ -20,6 +20,9 @@ else(APPLE AND WITH_ARM)
cmake_minimum_required(VERSION 3.15) cmake_minimum_required(VERSION 3.15)
cmake_policy(VERSION 3.10) cmake_policy(VERSION 3.10)
endif(APPLE AND WITH_ARM) endif(APPLE AND WITH_ARM)
# use to get_property location of static lib
# https://cmake.org/cmake/help/v3.0/policy/CMP0026.html?highlight=cmp0026
cmake_policy(SET CMP0026 OLD)
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake") set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
set(PADDLE_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) set(PADDLE_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
set(PADDLE_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}) set(PADDLE_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR})
......
...@@ -176,6 +176,36 @@ function(create_static_lib TARGET_NAME) ...@@ -176,6 +176,36 @@ function(create_static_lib TARGET_NAME)
endif() endif()
endfunction() endfunction()
function(create_dummy_static_lib TARGET_NAME)
set(options "")
set(oneValueArgs "")
set(multiValueArgs LIBS DEPS LIMIT)
cmake_parse_arguments(merge "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
list(REMOVE_DUPLICATES merge_LIBS)
set(index 1)
set(offset 1)
# the dummy target would be consisted of limit size libraries
set(limit ${merge_LIMIT})
list(LENGTH merge_LIBS libs_len)
foreach(lib ${merge_LIBS})
list(APPEND merge_list ${lib})
list(LENGTH merge_list listlen)
if ((${listlen} GREATER ${limit}) OR (${offset} EQUAL ${libs_len}))
message("Merge and generate static library: ${TARGET_NAME}_static_${index}")
merge_static_libs(${TARGET_NAME}_static_${index} ${merge_list})
if(merge_DEPS)
target_link_libraries(${TARGET_NAME}_static_${index} ${merge_DEPS})
endif()
set(merge_list)
list(APPEND ${TARGET_NAME}_list ${TARGET_NAME}_static_${index})
MATH(EXPR index "${index}+1")
endif()
MATH(EXPR offset "${offset}+1")
endforeach()
cc_library(${TARGET_NAME} DEPS ${${TARGET_NAME}_list})
endfunction()
function(merge_static_libs TARGET_NAME) function(merge_static_libs TARGET_NAME)
set(libs ${ARGN}) set(libs ${ARGN})
list(REMOVE_DUPLICATES libs) list(REMOVE_DUPLICATES libs)
...@@ -193,92 +223,61 @@ function(merge_static_libs TARGET_NAME) ...@@ -193,92 +223,61 @@ function(merge_static_libs TARGET_NAME)
# also help to track dependencies. # also help to track dependencies.
set(target_SRCS ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}_dummy.c) set(target_SRCS ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}_dummy.c)
if(APPLE) # Use OSX's libtool to merge archives # Make the generated dummy source file depended on all static input
# Make the generated dummy source file depended on all static input # libs. If input lib changes,the source file is touched
# libs. If input lib changes,the source file is touched # which causes the desired effect (relink).
# which causes the desired effect (relink). add_custom_command(OUTPUT ${target_SRCS}
add_custom_command(OUTPUT ${target_SRCS} COMMAND ${CMAKE_COMMAND} -E touch ${target_SRCS}
COMMAND ${CMAKE_COMMAND} -E touch ${target_SRCS} DEPENDS ${libs})
DEPENDS ${libs})
# Generate dummy staic lib
# Generate dummy static lib generate_dummy_static_lib(LIB_NAME ${TARGET_NAME} FILE_PATH ${target_SRCS} GENERATOR "generic.cmake:merge_static_libs")
generate_dummy_static_lib(LIB_NAME ${TARGET_NAME} FILE_PATH ${target_SRCS} GENERATOR "generic.cmake:merge_static_libs") target_link_libraries(${TARGET_NAME} ${libs_deps})
target_link_libraries(${TARGET_NAME} ${libs_deps})
# OSX: use 'libtool' to merge archives
if(APPLE)
foreach(lib ${libs}) foreach(lib ${libs})
# Get the file names of the libraries to be merged # Get the file names of the libraries to be merged
set(libfiles ${libfiles} $<TARGET_FILE:${lib}>) set(libfiles ${libfiles} $<TARGET_FILE:${lib}>)
endforeach() endforeach()
add_custom_command(TARGET ${TARGET_NAME} POST_BUILD add_custom_command(TARGET ${TARGET_NAME} POST_BUILD
COMMENT "Merge and generate static lib: lib${TARGET_NAME}.a"
COMMAND rm "${CMAKE_CURRENT_BINARY_DIR}/lib${TARGET_NAME}.a" COMMAND rm "${CMAKE_CURRENT_BINARY_DIR}/lib${TARGET_NAME}.a"
COMMAND /usr/bin/libtool -static -o "${CMAKE_CURRENT_BINARY_DIR}/lib${TARGET_NAME}.a" ${libfiles} COMMAND /usr/bin/libtool -static -o "${CMAKE_CURRENT_BINARY_DIR}/lib${TARGET_NAME}.a" ${libfiles}
) )
endif(APPLE) endif()
if(LINUX) # general UNIX: use "ar" to extract objects and re-add to a common lib
set(target_DIR ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}.dir)
foreach(lib ${libs})
set(objlistfile ${target_DIR}/${lib}.objlist) # list of objects in the input library
set(objdir ${target_DIR}/${lib}.objdir)
add_custom_command(OUTPUT ${objdir}
COMMAND ${CMAKE_COMMAND} -E make_directory ${objdir}
DEPENDS ${lib})
add_custom_command(OUTPUT ${objlistfile} # LINUX: use "ar" to extract objects and re-add to a common lib
COMMAND ${CMAKE_AR} -x "$<TARGET_FILE:${lib}>" if(LINUX)
COMMAND ${CMAKE_AR} -t "$<TARGET_FILE:${lib}>" > ${objlistfile} set(mri_file ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}.mri CACHE INTERNAL "phi_static.mri file")
DEPENDS ${lib} ${objdir} get_property(ABS_MERGE_LIB_PATH TARGET ${TARGET_NAME} PROPERTY LOCATION)
WORKING_DIRECTORY ${objdir}) file(WRITE ${mri_file} "create ${ABS_MERGE_LIB_PATH}\n")
list(APPEND target_OBJS "${objlistfile}") foreach(lib ${libs})
get_property(ABS_LIB_PATH TARGET ${lib} PROPERTY LOCATION)
file(APPEND ${mri_file} "addlib ${ABS_LIB_PATH}\n")
endforeach() endforeach()
file(APPEND ${mri_file} "save\nend\n")
# Make the generated dummy source file depended on all static input
# libs. If input lib changes,the source file is touched
# which causes the desired effect (relink).
add_custom_command(OUTPUT ${target_SRCS}
COMMAND ${CMAKE_COMMAND} -E touch ${target_SRCS}
DEPENDS ${libs} ${target_OBJS})
# Generate dummy staic lib
generate_dummy_static_lib(LIB_NAME ${TARGET_NAME} FILE_PATH ${target_SRCS} GENERATOR "generic.cmake:merge_static_libs")
target_link_libraries(${TARGET_NAME} ${libs_deps})
# Get the file name of the generated library
set(target_LIBNAME "$<TARGET_FILE:${TARGET_NAME}>")
add_custom_command(TARGET ${TARGET_NAME} POST_BUILD add_custom_command(TARGET ${TARGET_NAME} POST_BUILD
COMMAND ${CMAKE_AR} crs ${target_LIBNAME} `find ${target_DIR} -name '*.o'` COMMENT "Merge and generate static lib: lib${TARGET_NAME}.a"
COMMAND ${CMAKE_RANLIB} ${target_LIBNAME} COMMAND ${CMAKE_AR} -M < ${mri_file}
WORKING_DIRECTORY ${target_DIR}) COMMAND ${CMAKE_RANLIB} "$<TARGET_FILE:${TARGET_NAME}>")
endif(LINUX) endif()
if(WIN32) # windows do not support gcc/nvcc combined compiling. Use msvc lib.exe to merge libs.
# Make the generated dummy source file depended on all static input
# libs. If input lib changes,the source file is touched
# which causes the desired effect (relink).
add_custom_command(OUTPUT ${target_SRCS}
COMMAND ${CMAKE_COMMAND} -E touch ${target_SRCS}
DEPENDS ${libs})
# Generate dummy staic lib
generate_dummy_static_lib(LIB_NAME ${TARGET_NAME} FILE_PATH ${target_SRCS} GENERATOR "generic.cmake:merge_static_libs")
target_link_libraries(${TARGET_NAME} ${libs_deps})
# Windows do not support gcc/nvcc combined compiling. Use msvc 'lib.exe' to merge libs.
if(WIN32)
foreach(lib ${libs}) foreach(lib ${libs})
# Get the file names of the libraries to be merged
set(libfiles ${libfiles} $<TARGET_FILE:${lib}>) set(libfiles ${libfiles} $<TARGET_FILE:${lib}>)
endforeach() endforeach()
# msvc will put libarary in directory of "/Release/xxxlib" by default # msvc compiler will put libarary in directory of "/Release/xxxlib" by default
# COMMAND cmake -E remove "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}/${TARGET_NAME}.lib"
add_custom_command(TARGET ${TARGET_NAME} POST_BUILD add_custom_command(TARGET ${TARGET_NAME} POST_BUILD
COMMENT "Merge and generate static lib: lib${TARGET_NAME}.lib"
COMMAND cmake -E make_directory $<TARGET_FILE_DIR:${TARGET_NAME}> COMMAND cmake -E make_directory $<TARGET_FILE_DIR:${TARGET_NAME}>
COMMAND lib /OUT:$<TARGET_FILE:${TARGET_NAME}> ${libfiles} COMMAND lib /OUT:$<TARGET_FILE:${TARGET_NAME}> ${libfiles}
) )
endif(WIN32) endif()
endfunction(merge_static_libs) endfunction()
function(check_coverage_opt TARGET_NAME SRCS) function(check_coverage_opt TARGET_NAME SRCS)
if(WITH_COVERAGE AND WITH_INCREMENTAL_COVERAGE) if(WITH_COVERAGE AND WITH_INCREMENTAL_COVERAGE)
...@@ -1076,4 +1075,3 @@ function(math_library TARGET) ...@@ -1076,4 +1075,3 @@ function(math_library TARGET)
cc_library(${TARGET} SRCS ${cc_srcs} DEPS ${math_library_DEPS} ${math_common_deps}) cc_library(${TARGET} SRCS ${cc_srcs} DEPS ${math_library_DEPS} ${math_common_deps})
endif() endif()
endfunction() endfunction()
cc_library(processgroup SRCS ProcessGroup.cc DEPS phi phi_api eager_api) cc_library(processgroup SRCS ProcessGroup.cc DEPS phi_api eager_api)
cc_library(eager_reducer SRCS reducer.cc DEPS eager_api processgroup phi phi_api string_helper) cc_library(eager_reducer SRCS reducer.cc DEPS eager_api processgroup phi_api string_helper)
if (WITH_DISTRIBUTE) if (WITH_DISTRIBUTE)
cc_library(processgroup_gloo SRCS ProcessGroupGloo.cc DEPS phi phi_api eager_api gloo_wrapper) cc_library(processgroup_gloo SRCS ProcessGroupGloo.cc DEPS phi_api eager_api gloo_wrapper)
endif() endif()
if(WITH_NCCL) if(WITH_NCCL)
cc_library(processgroup_nccl SRCS ProcessGroupNCCL.cc NCCLTools.cc Common.cc DEPS place cuda_stream enforce collective_helper device_context phi phi_api eager_api) cc_library(processgroup_nccl SRCS ProcessGroupNCCL.cc NCCLTools.cc Common.cc DEPS place cuda_stream enforce collective_helper device_context phi_api eager_api)
if (WITH_DISTRIBUTE AND WITH_PSCORE) if (WITH_DISTRIBUTE AND WITH_PSCORE)
cc_library(processgroup_heter SRCS ProcessGroupHeter.cc NCCLTools.cc Common.cc DEPS place cuda_stream enforce collective_helper device_context phi phi_api eager_api) cc_library(processgroup_heter SRCS ProcessGroupHeter.cc NCCLTools.cc Common.cc DEPS place cuda_stream enforce collective_helper device_context phi_api eager_api)
endif() endif()
endif() endif()
if(WITH_ASCEND_CL) if(WITH_ASCEND_CL)
cc_library(processgroup_hccl SRCS ProcessGroupHCCL.cc HCCLTools.cc Common.cc DEPS place npu_stream enforce collective_helper device_context phi phi_api eager_api) cc_library(processgroup_hccl SRCS ProcessGroupHCCL.cc HCCLTools.cc Common.cc DEPS place npu_stream enforce collective_helper device_context phi_api eager_api)
if (WITH_DISTRIBUTE AND WITH_PSCORE) if (WITH_DISTRIBUTE AND WITH_PSCORE)
cc_library(processgroup_heter SRCS ProcessGroupHeter.cc HCCLTools.cc Common.cc DEPS place npu_stream enforce collective_helper device_context phi phi_api eager_api) cc_library(processgroup_heter SRCS ProcessGroupHeter.cc HCCLTools.cc Common.cc DEPS place npu_stream enforce collective_helper device_context phi_api eager_api)
endif() endif()
endif() endif()
cc_library(accumulation_node SRCS accumulation_node.cc DEPS gradient_accumulator phi phi_api grad_node_info) cc_library(accumulation_node SRCS accumulation_node.cc DEPS gradient_accumulator phi_api grad_node_info)
cc_library(tensor_utils SRCS tensor_utils.cc DEPS phi phi_api autograd_meta grad_node_info accumulation_node) cc_library(tensor_utils SRCS tensor_utils.cc DEPS phi_api autograd_meta grad_node_info accumulation_node)
cc_library(hook_utils SRCS hook_utils.cc DEPS phi tensor_utils autograd_meta grad_node_info utils accumulation_node) cc_library(hook_utils SRCS hook_utils.cc DEPS phi tensor_utils autograd_meta grad_node_info utils accumulation_node)
cc_library(global_utils SRCS global_utils.cc DEPS place tracer) cc_library(global_utils SRCS global_utils.cc DEPS place tracer)
cc_library(py_layer_node SRCS py_layer_node.cc DEPS pybind phi phi_api grad_node_info) cc_library(py_layer_node SRCS py_layer_node.cc DEPS pybind phi_api grad_node_info)
...@@ -206,11 +206,11 @@ ENDIF() ...@@ -206,11 +206,11 @@ ENDIF()
IF(WITH_XPU) IF(WITH_XPU)
cc_library(operator SRCS operator.cc DEPS xpu_op_list op_info device_context tensor scope glog trainer_desc_proto data_feed_proto cc_library(operator SRCS operator.cc DEPS xpu_op_list op_info device_context tensor scope glog trainer_desc_proto data_feed_proto
shape_inference data_transform lod_tensor profiler transfer_scope_cache op_kernel_type op_call_stack unused_var_check nan_inf_utils shape_inference data_transform lod_tensor profiler transfer_scope_cache op_kernel_type op_call_stack unused_var_check nan_inf_utils
phi phi_utils kernel_factory infershape_utils op_utils) phi_utils kernel_factory infershape_utils op_utils)
ELSE() ELSE()
cc_library(operator SRCS operator.cc DEPS op_info device_context tensor scope glog trainer_desc_proto data_feed_proto cc_library(operator SRCS operator.cc DEPS op_info device_context tensor scope glog trainer_desc_proto data_feed_proto
shape_inference data_transform lod_tensor profiler transfer_scope_cache op_kernel_type op_call_stack unused_var_check nan_inf_utils shape_inference data_transform lod_tensor profiler transfer_scope_cache op_kernel_type op_call_stack unused_var_check nan_inf_utils
phi phi_utils kernel_factory infershape_utils op_utils) phi_utils kernel_factory infershape_utils op_utils)
ENDIF() ENDIF()
cc_test(operator_test SRCS operator_test.cc DEPS operator op_registry device_context) cc_test(operator_test SRCS operator_test.cc DEPS operator op_registry device_context)
...@@ -418,7 +418,7 @@ cc_library(save_load_util SRCS save_load_util.cc DEPS tensor scope layer) ...@@ -418,7 +418,7 @@ cc_library(save_load_util SRCS save_load_util.cc DEPS tensor scope layer)
cc_test(save_load_util_test SRCS save_load_util_test.cc DEPS save_load_util tensor scope layer) cc_test(save_load_util_test SRCS save_load_util_test.cc DEPS save_load_util tensor scope layer)
cc_library(generator SRCS generator.cc DEPS enforce place) cc_library(generator SRCS generator.cc DEPS enforce place)
cc_library(infershape_utils SRCS infershape_utils.cc DEPS lod_tensor selected_rows_utils attribute place phi var_type_traits phi phi_api_utils op_info shape_inference) cc_library(infershape_utils SRCS infershape_utils.cc DEPS lod_tensor selected_rows_utils attribute place var_type_traits phi phi_api_utils op_info shape_inference)
cc_test(infershape_utils_test SRCS infershape_utils_test.cc DEPS infershape_utils infermeta_utils meta_tensor) cc_test(infershape_utils_test SRCS infershape_utils_test.cc DEPS infershape_utils infermeta_utils meta_tensor)
# Get the current working branch # Get the current working branch
......
cc_library(imperative_flag SRCS flags.cc DEPS gflags flags) cc_library(imperative_flag SRCS flags.cc DEPS gflags flags)
cc_library(var_helper SRCS var_helper.cc DEPS tensor phi_api) cc_library(var_helper SRCS var_helper.cc DEPS tensor phi_api)
IF(WITH_XPU) IF(WITH_XPU)
cc_library(prepared_operator SRCS prepared_operator.cc DEPS xpu_op_list proto_desc operator device_context lod_tensor selected_rows_utils var_type_traits op_kernel_type data_transform nan_inf_utils phi_api phi phi_utils var_helper) cc_library(prepared_operator SRCS prepared_operator.cc DEPS xpu_op_list proto_desc operator device_context lod_tensor selected_rows_utils var_type_traits op_kernel_type data_transform nan_inf_utils phi_api phi_utils var_helper)
ELSE() ELSE()
cc_library(prepared_operator SRCS prepared_operator.cc DEPS proto_desc operator device_context lod_tensor selected_rows_utils var_type_traits op_kernel_type data_transform nan_inf_utils phi_api phi phi_utils var_helper) cc_library(prepared_operator SRCS prepared_operator.cc DEPS proto_desc operator device_context lod_tensor selected_rows_utils var_type_traits op_kernel_type data_transform nan_inf_utils phi_api phi_utils var_helper)
ENDIF() ENDIF()
cc_library(layer SRCS layer.cc DEPS prepared_operator math_function imperative_flag variable_helper op_registry var_helper phi_api) cc_library(layer SRCS layer.cc DEPS prepared_operator math_function imperative_flag variable_helper op_registry var_helper phi_api)
add_subdirectory(jit) add_subdirectory(jit)
......
...@@ -36,7 +36,7 @@ endif() ...@@ -36,7 +36,7 @@ endif()
# fluid_modules exclude API-interface of inference/api and inference/capi_exp # fluid_modules exclude API-interface of inference/api and inference/capi_exp
get_property(fluid_modules GLOBAL PROPERTY FLUID_MODULES) get_property(fluid_modules GLOBAL PROPERTY FLUID_MODULES)
get_property(phi_modules GLOBAL PROPERTY PHI_MODULES) get_property(phi_modules GLOBAL PROPERTY PHI_MODULES)
set(utils_modules stringpiece pretty_log string_helper) set(utils_modules stringpiece pretty_log string_helper benchmark)
add_subdirectory(api) add_subdirectory(api)
...@@ -50,9 +50,9 @@ if(WITH_ONNXRUNTIME) ...@@ -50,9 +50,9 @@ if(WITH_ONNXRUNTIME)
set(STATIC_INFERENCE_API ${STATIC_INFERENCE_API} onnxruntime_predictor) set(STATIC_INFERENCE_API ${STATIC_INFERENCE_API} onnxruntime_predictor)
endif() endif()
#TODO(wilber, T8T9): Do we still need to support windows gpu static library? #windows GPU static library over the limit, so not create_static_lib, and cc_library is dummy
if(WIN32 AND WITH_GPU) if(WIN32 AND WITH_GPU)
cc_library(paddle_inference DEPS ${fluid_modules} ${phi_modules} ${STATIC_INFERENCE_API} ${utils_modules}) cc_library(paddle_inference DEPS ${fluid_modules} phi ${STATIC_INFERENCE_API} ${utils_modules})
else() else()
create_static_lib(paddle_inference ${fluid_modules} ${phi_modules} ${STATIC_INFERENCE_API} ${utils_modules}) create_static_lib(paddle_inference ${fluid_modules} ${phi_modules} ${STATIC_INFERENCE_API} ${utils_modules})
endif() endif()
...@@ -84,7 +84,7 @@ set(SHARED_INFERENCE_SRCS ...@@ -84,7 +84,7 @@ set(SHARED_INFERENCE_SRCS
${PADDLE_CUSTOM_OP_SRCS}) ${PADDLE_CUSTOM_OP_SRCS})
# shared inference library deps # shared inference library deps
set(SHARED_INFERENCE_DEPS ${fluid_modules} ${phi_modules} analysis_predictor) set(SHARED_INFERENCE_DEPS ${fluid_modules} phi analysis_predictor ${utils_modules})
if (WITH_CRYPTO) if (WITH_CRYPTO)
set(SHARED_INFERENCE_DEPS ${SHARED_INFERENCE_DEPS} paddle_crypto) set(SHARED_INFERENCE_DEPS ${SHARED_INFERENCE_DEPS} paddle_crypto)
......
# Compiling with WITH_PYTHON=ON and WITH_TENSORRT=ON failed on windows. Temporarily add paddle_inference_api dependency to solve the problem # Compiling with WITH_PYTHON=ON and WITH_TENSORRT=ON failed on windows. Temporarily add paddle_inference_api dependency to solve the problem
if(WIN32) if(WIN32)
nv_library(tensorrt_engine SRCS engine.cc trt_int8_calibrator.cc DEPS ${GLOB_OPERATOR_DEPS} framework_proto device_context boost paddle_inference_api) nv_library(tensorrt_engine SRCS engine.cc trt_int8_calibrator.cc DEPS ${GLOB_OPERATOR_DEPS} framework_proto device_context boost paddle_inference_api)
else() else()
nv_library(tensorrt_engine SRCS engine.cc trt_int8_calibrator.cc DEPS ${GLOB_OPERATOR_DEPS} framework_proto device_context boost) nv_library(tensorrt_engine SRCS engine.cc trt_int8_calibrator.cc DEPS ${GLOB_OPERATOR_DEPS} framework_proto device_context boost)
endif() endif()
nv_library(tensorrt_op_teller SRCS op_teller.cc DEPS framework_proto device_context boost) nv_library(tensorrt_op_teller SRCS op_teller.cc DEPS framework_proto device_context boost)
nv_test(test_tensorrt SRCS test_tensorrt.cc DEPS dynload_cuda device_context dynamic_loader) nv_test(test_tensorrt SRCS test_tensorrt.cc DEPS dynload_cuda device_context dynamic_loader)
......
...@@ -27,7 +27,7 @@ set(PHI_DEPS convert_utils dense_tensor phi_context kernel_factory kernel_contex ...@@ -27,7 +27,7 @@ set(PHI_DEPS convert_utils dense_tensor phi_context kernel_factory kernel_contex
get_property(phi_kernels GLOBAL PROPERTY PHI_KERNELS) get_property(phi_kernels GLOBAL PROPERTY PHI_KERNELS)
set(PHI_DEPS ${PHI_DEPS} ${phi_kernels}) set(PHI_DEPS ${PHI_DEPS} ${phi_kernels})
cc_library(phi DEPS ${PHI_DEPS}) create_dummy_static_lib(phi LIBS ${PHI_DEPS} LIMIT 100)
set(phi_extension_header_file ${CMAKE_CURRENT_SOURCE_DIR}/extension.h CACHE INTERNAL "phi/extension.h file") set(phi_extension_header_file ${CMAKE_CURRENT_SOURCE_DIR}/extension.h CACHE INTERNAL "phi/extension.h file")
file(WRITE ${phi_extension_header_file} "// Header file generated by paddle/phi/CMakeLists.txt for external users,\n// DO NOT edit or include it within paddle.\n\n#pragma once\n\n") file(WRITE ${phi_extension_header_file} "// Header file generated by paddle/phi/CMakeLists.txt for external users,\n// DO NOT edit or include it within paddle.\n\n#pragma once\n\n")
......
...@@ -36,7 +36,7 @@ set(MANUAL_BUILD_KERNELS ${AUTOTUNE_KERNELS} cross_entropy_kernel adam_kernel ad ...@@ -36,7 +36,7 @@ set(MANUAL_BUILD_KERNELS ${AUTOTUNE_KERNELS} cross_entropy_kernel adam_kernel ad
matrix_power_kernel matrix_power_grad_kernel maxout_kernel maxout_grad_kernel pool_kernel matrix_power_kernel matrix_power_grad_kernel maxout_kernel maxout_grad_kernel pool_kernel
put_along_axis_kernel put_along_axis_grad_kernel segment_pool_kernel segment_pool_grad_kernel put_along_axis_kernel put_along_axis_grad_kernel segment_pool_kernel segment_pool_grad_kernel
softmax_kernel softmax_grad_kernel take_along_axis_kernel take_along_axis_grad_kernel softmax_kernel softmax_grad_kernel take_along_axis_kernel take_along_axis_grad_kernel
triangular_solve_grad_kernel determinant_grad_kernel reduce_sum_kernel rnn_kernel rnn_grad_kernel warpctc_kernel warpctc_grad_kernel) triangular_solve_grad_kernel determinant_grad_kernel reduce_sum_kernel reduce_mean_kernel rnn_kernel rnn_grad_kernel warpctc_kernel warpctc_grad_kernel)
foreach(src ${AUTOTUNE_KERNELS}) foreach(src ${AUTOTUNE_KERNELS})
kernel_library(${src} DEPS ${COMMON_KERNEL_DEPS} switch_autotune) kernel_library(${src} DEPS ${COMMON_KERNEL_DEPS} switch_autotune)
endforeach() endforeach()
...@@ -52,6 +52,7 @@ kernel_library(hierarchical_sigmoid_grad_kernel DEPS ${COMMON_KERNEL_DEPS} matri ...@@ -52,6 +52,7 @@ kernel_library(hierarchical_sigmoid_grad_kernel DEPS ${COMMON_KERNEL_DEPS} matri
kernel_library(gumbel_softmax_kernel DEPS ${COMMON_KERNEL_DEPS} softmax) kernel_library(gumbel_softmax_kernel DEPS ${COMMON_KERNEL_DEPS} softmax)
kernel_library(gumbel_softmax_grad_kernel DEPS ${COMMON_KERNEL_DEPS} softmax) kernel_library(gumbel_softmax_grad_kernel DEPS ${COMMON_KERNEL_DEPS} softmax)
kernel_library(reduce_sum_kernel DEPS ${COMMON_KERNEL_DEPS} cast_kernel) kernel_library(reduce_sum_kernel DEPS ${COMMON_KERNEL_DEPS} cast_kernel)
kernel_library(reduce_mean_kernel DEPS ${COMMON_KERNEL_DEPS} cast_kernel)
kernel_library(matrix_power_kernel DEPS ${COMMON_KERNEL_DEPS} matrix_inverse) kernel_library(matrix_power_kernel DEPS ${COMMON_KERNEL_DEPS} matrix_inverse)
kernel_library(matrix_power_grad_kernel DEPS ${COMMON_KERNEL_DEPS} matrix_inverse) kernel_library(matrix_power_grad_kernel DEPS ${COMMON_KERNEL_DEPS} matrix_inverse)
kernel_library(maxout_kernel DEPS ${COMMON_KERNEL_DEPS} maxouting) kernel_library(maxout_kernel DEPS ${COMMON_KERNEL_DEPS} maxouting)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册