未验证 提交 671555ed 编写于 作者: L LoveAn 提交者: GitHub

Compiling operator libraries with Unity build (#29130)

* Compiling operator libraries with Unity Build on Windows CPU.

* Compiling operator libraries with Unity Build on Windows GPU, no_test, test=windows_ci

* Add option in windows ci script, no_test, test=windows_ci

* Optimize parallel compiling, test=develop

* remove limit of parallel compile and skip some ops in UB, test=develop

* remove changes of header file, test=develop

* remove changes of header file, test=develop

* fix test_eye_op unittest failed, test=develop

* Compiling operator libraries with Unity Build on Linux, test=develop

* set default WITH_UNITY_BUILD=OFF, test=develop

* Move unity build rules into a single file and add comment, test=develop

* optimize parallel compilation, test=develop

* fix undefined reference error on coverage ci, test=develop
上级 6f2bb20e
......@@ -146,6 +146,7 @@ option(WITH_CRYPTO "Compile PaddlePaddle with crypto support" ON)
option(WITH_ARM "Compile PaddlePaddle with arm support" OFF)
option(WITH_SW "Compile PaddlePaddle with sw support" OFF)
option(WITH_MUSL "Compile with musl libc instead of gblic" OFF)
option(WITH_UNITY_BUILD "Compile with UnityBuild mode" OFF)
# PY_VERSION
if(NOT PY_VERSION)
......
# CMake file `unity_build` is used to handle Unity Build compilation.
include(unity_build)
set(PART_CUDA_KERNEL_FILES)
function(op_library TARGET)
# op_library is a function to create op library. The interface is same as
......@@ -15,7 +17,8 @@ function(op_library TARGET)
set(mkldnn_cc_srcs)
set(MKLDNN_FILE)
set(op_common_deps operator op_registry math_function layer common_infer_shape_functions)
set(options "")
# Option `UNITY` is used to specify that operator `TARGET` will compiles with Unity Build.
set(options UNITY)
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS)
set(pybind_flag 0)
......@@ -105,21 +108,64 @@ function(op_library TARGET)
endif()
endforeach()
endif(WIN32)
set(OP_LIBRARY ${TARGET} ${OP_LIBRARY} CACHE INTERNAL "op libs")
# Unity Build relies on global option `WITH_UNITY_BUILD` and local option `UNITY`.
if(WITH_UNITY_BUILD AND op_library_UNITY)
# Generate the unity target name by the directory where source files located.
string(REPLACE "${PADDLE_SOURCE_DIR}/paddle/fluid/" "" UNITY_TARGET ${CMAKE_CURRENT_SOURCE_DIR})
string(REPLACE "/" "_" UNITY_TARGET ${UNITY_TARGET})
set(UNITY_TARGET "paddle_${UNITY_TARGET}_unity")
if(NOT ${UNITY_TARGET} IN_LIST OP_LIBRARY)
set(OP_LIBRARY ${UNITY_TARGET} ${OP_LIBRARY} CACHE INTERNAL "op libs")
endif()
else()
set(OP_LIBRARY ${TARGET} ${OP_LIBRARY} CACHE INTERNAL "op libs")
endif()
list(LENGTH op_library_DEPS op_library_DEPS_len)
if (${op_library_DEPS_len} GREATER 0)
set(DEPS_OPS ${TARGET} ${DEPS_OPS} PARENT_SCOPE)
endif()
if (WITH_GPU)
nv_library(${TARGET} SRCS ${cc_srcs} ${cu_cc_srcs} ${cudnn_cu_cc_srcs} ${cudnn_cu_srcs} ${mkldnn_cc_srcs} ${cu_srcs} DEPS ${op_library_DEPS}
# Unity Build relies on global option `WITH_UNITY_BUILD` and local option `UNITY`.
if(WITH_UNITY_BUILD AND op_library_UNITY)
# Combine the cc and cu source files.
compose_unity_target_sources(${UNITY_TARGET} cc ${cc_srcs} ${cu_cc_srcs} ${cudnn_cu_cc_srcs} ${mkldnn_cc_srcs})
compose_unity_target_sources(${UNITY_TARGET} cu ${cudnn_cu_srcs} ${cu_srcs})
if(TARGET ${UNITY_TARGET})
# If `UNITY_TARGET` exists, add source files to `UNITY_TARGET`.
target_sources(${UNITY_TARGET} PRIVATE ${unity_target_cc_sources} ${unity_target_cu_sources})
else()
# If `UNITY_TARGET` does not exist, create `UNITY_TARGET` with source files.
nv_library(${UNITY_TARGET} SRCS ${unity_target_cc_sources} ${unity_target_cu_sources} DEPS ${op_library_DEPS} ${op_common_deps})
endif()
# Add alias library to handle dependencies.
add_library(${TARGET} ALIAS ${UNITY_TARGET})
else()
nv_library(${TARGET} SRCS ${cc_srcs} ${cu_cc_srcs} ${cudnn_cu_cc_srcs} ${cudnn_cu_srcs} ${mkldnn_cc_srcs} ${cu_srcs} DEPS ${op_library_DEPS}
${op_common_deps})
endif()
elseif (WITH_AMD_GPU)
hip_library(${TARGET} SRCS ${cc_srcs} ${hip_cu_srcs} ${miopen_hip_cc_srcs} ${mkldnn_cc_srcs} DEPS ${op_library_DEPS}
${op_common_deps})
else()
cc_library(${TARGET} SRCS ${cc_srcs} ${mkldnn_cc_srcs} ${xpu_cc_srcs} DEPS ${op_library_DEPS}
${op_common_deps})
# Unity Build relies on global option `WITH_UNITY_BUILD` and local option `UNITY`.
if(WITH_UNITY_BUILD AND op_library_UNITY)
# Combine the cc source files.
compose_unity_target_sources(${UNITY_TARGET} cc ${cc_srcs} ${mkldnn_cc_srcs} ${xpu_cc_srcs})
if(TARGET ${UNITY_TARGET})
# If `UNITY_TARGET` exists, add source files to `UNITY_TARGET`.
target_sources(${UNITY_TARGET} PRIVATE ${unity_target_cc_sources})
else()
# If `UNITY_TARGET` does not exist, create `UNITY_TARGET` with source files.
cc_library(${UNITY_TARGET} SRCS ${unity_target_cc_sources} DEPS ${op_library_DEPS} ${op_common_deps})
endif()
# Add alias library to handle dependencies.
add_library(${TARGET} ALIAS ${UNITY_TARGET})
else()
cc_library(${TARGET} SRCS ${cc_srcs} ${mkldnn_cc_srcs} ${xpu_cc_srcs} DEPS ${op_library_DEPS}
${op_common_deps})
endif()
endif()
# Define operators that don't need pybind here.
......@@ -256,10 +302,18 @@ function(register_operators)
list(FIND register_operators_EXCLUDES ${src} _index)
if (${_index} EQUAL -1)
if (${register_operators_DEPS_len} GREATER 0)
op_library(${src} DEPS ${register_operators_DEPS})
op_library(${src} UNITY DEPS ${register_operators_DEPS})
else()
op_library(${src})
op_library(${src} UNITY)
endif()
endif()
endforeach()
# Complete the processing of `UNITY_TARGET`.
if(WITH_UNITY_BUILD)
finish_unity_target(cc)
if(WITH_GPU)
finish_unity_target(cu)
endif()
endif()
endfunction()
# Add the following code before all include to avoid compilation failure.
set(UNITY_BEFORE_CODE [[
#ifndef NOMINMAX
#define NOMINMAX
#endif
#ifndef _USE_MATH_DEFINES
#define _USE_MATH_DEFINES
#endif]])
# Group a list of source files that can be included together.
# This combination is just a guiding rule, and the source file of group
# do not have to exist.
# Here you need to specify the source type which belongs to cc or cu.
function(register_unity_group TYPE)
# Get UNITY_TARGET from CMAKE_CURRENT_SOURCE_DIR.
string(REPLACE "${PADDLE_SOURCE_DIR}/paddle/fluid/" "" UNITY_TARGET ${CMAKE_CURRENT_SOURCE_DIR})
string(REPLACE "/" "_" UNITY_TARGET ${UNITY_TARGET})
set(UNITY_TARGET "paddle_${UNITY_TARGET}_unity")
# Variable unity_group_index is used to record the number of UNITY_TARGET groups.
get_property(unity_group_index GLOBAL PROPERTY ${UNITY_TARGET}_${TYPE}_group_index)
if("${unity_group_index}" STREQUAL "")
set(unity_group_index 0)
endif()
# Variable unity_group_sources is used to record the sources of one group.
set(unity_group_sources ${UNITY_TARGET}_${TYPE}_group_${unity_group_index}_sources)
set_property(GLOBAL PROPERTY ${unity_group_sources} "")
foreach(src ${ARGN})
# UB use absolute path of source.
if(NOT IS_ABSOLUTE ${src})
set(src ${CMAKE_CURRENT_SOURCE_DIR}/${src})
endif()
set_property(GLOBAL APPEND PROPERTY ${unity_group_sources} ${src})
endforeach()
# If unity_file does not exists, nv_library or cc_library will use
# dummy_file. Touch unity_file to avoid to use dummy file.
set(unity_file ${CMAKE_CURRENT_BINARY_DIR}/${UNITY_TARGET}_${unity_group_index}_${TYPE}.${TYPE})
if(NOT EXISTS ${unity_file})
file(TOUCH ${unity_file})
endif()
math(EXPR unity_group_index "${unity_group_index} + 1")
set_property(GLOBAL PROPERTY ${UNITY_TARGET}_${TYPE}_group_index ${unity_group_index})
endfunction(register_unity_group)
# Combine the original source files used by `TARGET`, then use
# `unity_target_${TYPE}_sources` to get the combined source files.
# If the source file does not hit any registed groups, use itself.
# This function put the actual combination relationship in variables instead of
# writing the unity source file. The reason is that writing unity source file
# will change the timestampe and affect the effect of retaining the build
# directory on Windows.
# Here you need to specify the source type which belongs to cc or cu.
function(compose_unity_target_sources TARGET TYPE)
# Variable unity_target_sources represents the source file used in TARGET
set(unity_target_sources "")
get_property(unity_group_index_max GLOBAL PROPERTY ${TARGET}_${TYPE}_group_index)
foreach(src ${ARGN})
set(unity_file "")
# UB use absolute path of source.
if(IS_ABSOLUTE ${src})
set(src_absolute_path ${src})
else()
set(src_absolute_path ${CMAKE_CURRENT_SOURCE_DIR}/${src})
endif()
# If `unity_group_index_max` is empty, there is no combination
# relationship.
# TODO(Avin0323): Whether use target property `UNITY_BUILD` of CMAKE to
# combine source files.
if(NOT "${unity_group_index_max}" STREQUAL "")
# Search in each registed group.
foreach(unity_group_index RANGE ${unity_group_index_max})
if(${unity_group_index} GREATER_EQUAL ${unity_group_index_max})
break()
endif()
get_property(unity_group_sources GLOBAL PROPERTY ${TARGET}_${TYPE}_group_${unity_group_index}_sources)
if(${src_absolute_path} IN_LIST unity_group_sources)
set(unity_file ${CMAKE_CURRENT_BINARY_DIR}/${TARGET}_${unity_group_index}_${TYPE}.${TYPE})
set(unity_file_sources ${TARGET}_${TYPE}_file_${unity_group_index}_sources)
get_property(set_unity_file_sources GLOBAL PROPERTY ${unity_file_sources} SET)
if(NOT ${set_unity_file_sources})
# Add macro before include source files.
set_property(GLOBAL PROPERTY ${unity_file_sources} "// Generate by Unity Build")
set_property(GLOBAL APPEND PROPERTY ${unity_file_sources} ${UNITY_BEFORE_CODE})
endif()
set_property(GLOBAL APPEND PROPERTY ${unity_file_sources} "#include \"${src_absolute_path}\"")
set(unity_target_sources ${unity_target_sources} ${unity_file})
break()
endif()
endforeach()
endif()
# Use original source file.
if("${unity_file}" STREQUAL "")
set(unity_target_sources ${unity_target_sources} ${src})
endif()
endforeach()
set(unity_target_${TYPE}_sources ${unity_target_sources} PARENT_SCOPE)
endfunction(compose_unity_target_sources)
# Write the unity files used by `UNITY_TARGET`.
# Write dependent on whether the contents of the unity file have changed, which
# protects incremental compilation speed.
function(finish_unity_target TYPE)
# Get UNITY_TARGET from CMAKE_CURRENT_SOURCE_DIR.
string(REPLACE "${PADDLE_SOURCE_DIR}/paddle/fluid/" "" UNITY_TARGET ${CMAKE_CURRENT_SOURCE_DIR})
string(REPLACE "/" "_" UNITY_TARGET ${UNITY_TARGET})
set(UNITY_TARGET "paddle_${UNITY_TARGET}_unity")
get_property(unity_group_index_max GLOBAL PROPERTY ${UNITY_TARGET}_${TYPE}_group_index)
if(NOT "${unity_group_index_max}" STREQUAL "")
foreach(unity_group_index RANGE ${unity_group_index_max})
if(${unity_group_index} GREATER_EQUAL ${unity_group_index_max})
break()
endif()
get_property(unity_file_sources GLOBAL PROPERTY ${UNITY_TARGET}_${TYPE}_file_${unity_group_index}_sources)
set(unity_file_read_content "")
string(JOIN "\n" unity_file_write_content ${unity_file_sources})
set(unity_file ${CMAKE_CURRENT_BINARY_DIR}/${UNITY_TARGET}_${unity_group_index}_${TYPE}.${TYPE})
file(READ ${unity_file} unity_file_read_content)
if(NOT "${unity_file_read_content}" STREQUAL "${unity_file_write_content}")
file(WRITE ${unity_file} ${unity_file_write_content})
endif()
endforeach()
endif()
endfunction(finish_unity_target)
......@@ -64,7 +64,12 @@ if(WITH_COVERAGE OR WIN32 OR WITH_NV_JETSON)
SET(OP_MKL_DEPS ${OP_MKL_DEPS} pyramid_hash_op)
endif()
register_operators(EXCLUDES py_func_op warpctc_op dgc_op lstm_op run_program_op
if(WITH_UNITY_BUILD)
# Load Unity Build rules for operators in paddle/fluid/operators.
include(unity_build_rule.cmake)
endif()
register_operators(EXCLUDES py_func_op warpctc_op dgc_op lstm_op run_program_op eye_op recurrent_op
sync_batch_norm_op ${OP_MKL_DEPS} DEPS ${OP_HEADER_DEPS} ${OP_PREFETCH_DEPS})
op_library(run_program_op SRCS run_program_op.cc run_program_op.cu.cc DEPS executor_cache ${OP_HEADER_DEPS} ${OP_PREFETCH_DEPS})
......@@ -82,6 +87,8 @@ else()
op_library(warpctc_op DEPS dynload_warpctc sequence_padding sequence_scale)
endif()
op_library(lstm_op DEPS ${OP_HEADER_DEPS} ${OP_PREFETCH_DEPS} lstm_compute)
op_library(eye_op DEPS ${OP_HEADER_DEPS} ${OP_PREFETCH_DEPS})
op_library(recurrent_op DEPS ${OP_HEADER_DEPS} ${OP_PREFETCH_DEPS})
set(COMMON_OP_DEPS ${OP_HEADER_DEPS})
......@@ -150,3 +157,10 @@ if(WITH_MKLDNN)
include(mkldnn/inplace_op_tests.cmake)
include(mkldnn/nhwc_op_tests.cmake)
endif()
if(WITH_UNITY_BUILD)
# Using Unity Build to compile operators, `register_operator` will cause
# the unity library to lose some symbols.
# The specified link dependency needs to be displayed here.
target_link_libraries(paddle_operators_unity ${OP_HEADER_DEPS} ${OP_PREFETCH_DEPS} ${COMMON_OP_DEPS})
endif()
include(operators)
if(WITH_UNITY_BUILD)
# Load Unity Build rules for operators in paddle/fluid/operators/amp.
include(unity_build_rule.cmake)
endif()
register_operators()
# This file records the Unity Build compilation rules.
# The source files in a `register_unity_group` called are compiled in a unity
# file.
# Generally, the combination rules in this file do not need to be modified.
# If there are some redefined error in compiling with the source file which
# in combination rule, you can remove the source file from the following rules.
register_unity_group(cc
check_finite_and_unscale_op.cc
update_loss_scaling_op.cc)
register_unity_group(cu
check_finite_and_unscale_op.cu
update_loss_scaling_op.cu)
include(operators)
if(WITH_UNITY_BUILD)
# Load Unity Build rules for operators in paddle/fluid/operators/controlflow.
include(unity_build_rule.cmake)
endif()
register_operators(EXCLUDES conditional_block_op DEPS naive_executor)
cc_library(conditional_block_op SRCS conditional_block_op.cc DEPS executor)
......@@ -9,6 +13,10 @@ cc_library(while_op_helper SRCS while_op_helper.cc DEPS operator op_variant)
cc_test(conditional_block_op_test SRCS conditional_block_op_test.cc DEPS conditional_block_op executor)
target_link_libraries(conditional_block_infer_op conditional_block_op)
if(WITH_UNITY_BUILD)
target_link_libraries(paddle_operators_controlflow_unity conditional_block_op)
else()
target_link_libraries(conditional_block_infer_op conditional_block_op)
endif()
file(APPEND ${pybind_file} "USE_OP(less_than);\nUSE_OP(equal_all);\nUSE_OP(logical_and);\nUSE_NO_KERNEL_OP(read_from_array);\n")
# This file records the Unity Build compilation rules.
# The source files in a `register_unity_group` called are compiled in a unity
# file.
# Generally, the combination rules in this file do not need to be modified.
# If there are some redefined error in compiling with the source file which
# in combination rule, you can remove the source file from the following rules.
register_unity_group(cc
compare_all_op.cc
compare_op.cc
conditional_block_infer_op.cc
feed_op.cc
fetch_op.cc
get_places_op.cc
logical_op.cc
tensor_array_read_write_op.cc
while_op.cc)
......@@ -58,7 +58,7 @@ endif()
cc_test(rpc_server_test SRCS rpc_server_test.cc
DEPS ${RPC_DEPS} executor scope proto_desc lookup_sparse_table_read_op checkpoint_notify_op scale_op )
cc_test(varhandle_test SRCS varhandle_test.cc DEPS profiler scope)
cc_library(parameter_prefetch SRCS parameter_prefetch.cc DEPS sendrecvop_rpc memory)
cc_library(parameter_prefetch SRCS parameter_prefetch.cc DEPS sendrecvop_rpc memory node)
cc_library(parameter_send SRCS parameter_send.cc DEPS sendrecvop_rpc memory)
cc_library(parameter_recv SRCS parameter_recv.cc DEPS sendrecvop_rpc memory)
cc_library(communicator SRCS communicator.cc DEPS scope selected_rows tensor variable_helper selected_rows_functor simple_threadpool parameter_send parameter_recv generator)
......
include(operators)
if(WITH_UNITY_BUILD)
# Load Unity Build rules for operators in paddle/fluid/operators/elementwise.
include(unity_build_rule.cmake)
endif()
register_operators()
cc_test(test_elementwise_add_op_inplace SRCS test_elementwise_add_op_inplace.cc DEPS op_registry elementwise_add_op scope device_context enforce executor)
......
# This file records the Unity Build compilation rules.
# The source files in a `register_unity_group` called are compiled in a unity
# file.
# Generally, the combination rules in this file do not need to be modified.
# If there are some redefined error in compiling with the source file which
# in combination rule, you can remove the source file from the following rules.
register_unity_group(cc
elementwise_add_op.cc
mkldnn/elementwise_add_mkldnn_op.cc
elementwise_div_op.cc
elementwise_floordiv_op.cc
elementwise_max_op.cc
elementwise_min_op.cc
elementwise_mod_op.cc
elementwise_mul_op.cc
mkldnn/elementwise_mul_mkldnn_op.cc
elementwise_pow_op.cc
elementwise_sub_op.cc)
register_unity_group(cu
elementwise_add_op.cu
elementwise_div_op.cu
elementwise_floordiv_op.cu
elementwise_max_op.cu
elementwise_min_op.cu
elementwise_mod_op.cu
elementwise_mul_op.cu
elementwise_pow_op.cu
elementwise_sub_op.cu)
include(operators)
if(WITH_UNITY_BUILD)
# Load Unity Build rules for operators in paddle/fluid/operators/fused.
include(unity_build_rule.cmake)
endif()
register_operators(EXCLUDES
fused_bn_activation_op
conv_fusion_op
......
# This file records the Unity Build compilation rules.
# The source files in a `register_unity_group` called are compiled in a unity
# file.
# Generally, the combination rules in this file do not need to be modified.
# If there are some redefined error in compiling with the source file which
# in combination rule, you can remove the source file from the following rules.
register_unity_group(cc
fused_elemwise_activation_op.cc
fused_embedding_fc_lstm_op.cc
fused_embedding_seq_pool_op.cc
fusion_lstm_op.cc
fusion_repeated_fc_relu_op.cc
fusion_seqconv_eltadd_relu_op.cc
fusion_seqexpand_concat_fc_op.cc
fusion_seqpool_concat_op.cc
fusion_squared_mat_sub_op.cc
multi_gru_op.cc
mkldnn/multi_gru_mkldnn_op.cc
fusion_seqpool_cvm_concat_op.cc)
......@@ -50,7 +50,7 @@ math_library(cos_sim_functor)
math_library(depthwise_conv)
math_library(im2col)
math_library(sample_prob)
math_library(sampler)
math_library(sampler DEPS generator)
math_library(gru_compute DEPS activation_functions math_function)
math_library(lstm_compute DEPS activation_functions)
......
include(operators)
if(WITH_UNITY_BUILD)
# Load Unity Build rules for operators in paddle/fluid/operators/metrics.
include(unity_build_rule.cmake)
endif()
register_operators()
# This file records the Unity Build compilation rules.
# The source files in a `register_unity_group` called are compiled in a unity
# file.
# Generally, the combination rules in this file do not need to be modified.
# If there are some redefined error in compiling with the source file which
# in combination rule, you can remove the source file from the following rules.
register_unity_group(cc
accuracy_op.cc
auc_op.cc
precision_recall_op.cc)
register_unity_group(cu
accuracy_op.cu
auc_op.cu)
include(operators)
if(WITH_UNITY_BUILD)
# Load Unity Build rules for operators in paddle/fluid/operators/optimizers.
include(unity_build_rule.cmake)
endif()
register_operators()
# This file records the Unity Build compilation rules.
# The source files in a `register_unity_group` called are compiled in a unity
# file.
# Generally, the combination rules in this file do not need to be modified.
# If there are some redefined error in compiling with the source file which
# in combination rule, you can remove the source file from the following rules.
register_unity_group(cc
ftrl_op.cc
lars_momentum_op.cc
momentum_op.cc
sgd_op.cc)
register_unity_group(cc
adagrad_op.cc
adam_op.cc
adamax_op.cc
dgc_momentum_op.cc
proximal_gd_op.cc)
register_unity_group(cc
decayed_adagrad_op.cc
adadelta_op.cc
lamb_op.cc
dpsgd_op.cc
rmsprop_op.cc)
register_unity_group(cu
ftrl_op.cu
lars_momentum_op.cu
momentum_op.cu
sgd_op.cu)
register_unity_group(cu
adagrad_op.cu
adam_op.cu
adamax_op.cu)
register_unity_group(cu
decayed_adagrad_op.cu
adadelta_op.cu
lamb_op.cu
rmsprop_op.cu)
# The following groups are to make better use of `/MP` which MSVC's parallel
# compilation instruction when compiling in Unity Build.
register_unity_group(cu proximal_adagrad_op.cu)
include(operators)
if(WITH_UNITY_BUILD)
# Load Unity Build rules for operators in paddle/fluid/operators/reduce_ops.
include(unity_build_rule.cmake)
endif()
if(WITH_GPU)
if(${CMAKE_CUDA_COMPILER_VERSION} LESS 11.0)
register_operators(DEPS cub)
......
# This file records the Unity Build compilation rules.
# The source files in a `register_unity_group` called are compiled in a unity
# file.
# Generally, the combination rules in this file do not need to be modified.
# If there are some redefined error in compiling with the source file which
# in combination rule, you can remove the source file from the following rules.
register_unity_group(cc
reduce_all_op.cc
reduce_any_op.cc
reduce_prod_op.cc
reduce_sum_op.cc)
register_unity_group(cu
reduce_all_op.cu
reduce_any_op.cu
reduce_prod_op.cu
reduce_prod_op.part.cu
reduce_sum_op.cu
reduce_sum_op.part.cu)
# The following groups are to make better use of `/MP` which MSVC's parallel
# compilation instruction when compiling in Unity Build.
register_unity_group(cu frobenius_norm_op.cu)
register_unity_group(cu logsumexp_op.cu)
register_unity_group(cu reduce_max_op.cu)
register_unity_group(cu reduce_mean_op.cu)
register_unity_group(cu reduce_min_op.cu)
include(operators)
if(WITH_UNITY_BUILD)
# Load Unity Build rules for operators in paddle/fluid/operators/sequence_ops.
include(unity_build_rule.cmake)
endif()
register_operators()
# This file records the Unity Build compilation rules.
# The source files in a `register_unity_group` called are compiled in a unity
# file.
# Generally, the combination rules in this file do not need to be modified.
# If there are some redefined error in compiling with the source file which
# in combination rule, you can remove the source file from the following rules.
register_unity_group(cc
sequence_concat_op.cc
sequence_conv_op.cc
sequence_enumerate_op.cc
sequence_erase_op.cc
sequence_expand_op.cc
sequence_mask_op.cc
sequence_pad_op.cc
sequence_pool_op.cc)
register_unity_group(cc
sequence_expand_as_op.cc
sequence_reshape_op.cc
sequence_reverse_op.cc
sequence_scatter_op.cc
sequence_slice_op.cc
sequence_softmax_op.cc
sequence_topk_avg_pooling_op.cc
sequence_unpad_op.cc)
register_unity_group(cc
sequence_concat_op.cu.cc
sequence_conv_op.cu.cc)
register_unity_group(cu
sequence_enumerate_op.cu
sequence_erase_op.cu
sequence_expand_op.cu
sequence_mask_op.cu
sequence_pad_op.cu
sequence_pool_op.cu)
register_unity_group(cu
sequence_expand_as_op.cu
sequence_reshape_op.cu
sequence_reverse_op.cu
sequence_slice_op.cu
sequence_softmax_cudnn_op.cu.cc
sequence_softmax_op.cu
sequence_unpad_op.cu)
# This file records the Unity Build compilation rules.
# The source files in a `register_unity_group` called are compiled in a unity
# file.
# Generally, the combination rules in this file do not need to be modified.
# If there are some redefined error in compiling with the source file which
# in combination rule, you can remove the source file from the following rules.
register_unity_group(cc
add_position_encoding_op.cc
addmm_op.cc
affine_channel_op.cc
affine_grid_op.cc
allclose_op.cc
argsort_op.cc
array_to_lod_tensor_op.cc
assert_op.cc
assign_op.cc
assign_value_op.cc
attention_lstm_op.cc
average_accumulates_op.cc
batch_fc_op.cc
bce_loss_op.cc
beam_search_op.cc
beam_search_decode_op.cc
bernoulli_op.cc
bilateral_slice_op.cc)
register_unity_group(cc
mkldnn/batch_norm_mkldnn_op.cc
bilinear_tensor_product_op.cc
bmm_op.cc
bpr_loss_op.cc
cast_op.cc
cholesky_op.cc
chunk_eval_op.cc
clip_by_norm_op.cc
clip_op.cc
coalesce_tensor_op.cc)
register_unity_group(cc
center_loss_op.cc
mkldnn/concat_mkldnn_op.cc
mkldnn/conv_mkldnn_op.cc
mkldnn/conv_transpose_mkldnn_op.cc
correlation_op.cc
cos_sim_op.cc
crf_decoding_op.cc
crop_op.cc)
register_unity_group(cc
cross_entropy_op.cc
cross_op.cc
ctc_align_op.cc
cudnn_lstm_op.cc
cumsum_op.cc
cvm_op.cc
data_norm_op.cc
deformable_conv_op.cc
deformable_conv_v1_op.cc
deformable_psroi_pooling_op.cc
delete_var_op.cc
dequantize_abs_max_op.cc
dequantize_op.cc
mkldnn/dequantize_mkldnn_op.cc)
register_unity_group(cc
dequeue_op.cc
detection_map_op.cc
dgc_clip_by_norm_op.cc
diag_embed_op.cc
diag_op.cc
diag_v2_op.cc
dot_op.cc
edit_distance_op.cc
empty_op.cc
enqueue_op.cc
erf_op.cc)
register_unity_group(cc
expand_v2_op.cc
fake_dequantize_op.cc
fc_op.cc
mkldnn/fc_mkldnn_op.cc
fill_any_like_op.cc
fill_constant_batch_size_like_op.cc
fill_constant_op.cc
fill_op.cc
fill_zeros_like_op.cc
filter_by_instag_op.cc)
register_unity_group(cc
flatten_op.cc
flip_op.cc
fsp_op.cc
gather_nd_op.cc
gather_op.cc
gather_tree_op.cc
gaussian_random_batch_size_like_op.cc
gaussian_random_op.cc
mkldnn/gaussian_random_mkldnn_op.cc
grid_sampler_op.cc
group_norm_op.cc gru_op.cc)
register_unity_group(cc
hash_op.cc
hierarchical_sigmoid_op.cc
hinge_loss_op.cc
histogram_op.cc
huber_loss_op.cc
im2sequence_op.cc
increment_op.cc
index_sample_op.cc
index_select_op.cc
interpolate_op.cc
isfinite_v2_op.cc)
register_unity_group(cc
inplace_abn_op.cc
interpolate_v2_op.cc
inverse_op.cc
is_empty_op.cc
isfinite_op.cc
kron_op.cc
l1_norm_op.cc
label_smooth_op.cc
layer_norm_op.cc
mkldnn/layer_norm_mkldnn_op.cc
mkldnn/layer_norm_mkldnn_op.cc
linspace_op.cc
load_combine_op.cc
load_op.cc)
register_unity_group(cc
lod_array_length_op.cc
lod_rank_table_op.cc
lod_reset_op.cc
lod_tensor_to_array_op.cc
log_softmax_op.cc
lookup_table_dequant_op.cc
lrn_op.cc
mkldnn/lrn_mkldnn_op.cc
lstm_unit_op.cc
lstmp_op.cc)
register_unity_group(cc
log_loss_op.cc
lookup_table_v2_op.cc
margin_rank_loss_op.cc
masked_select_op.cc
match_matrix_tensor_op.cc
matmul_op.cc
mkldnn/matmul_mkldnn_op.cc
max_sequence_len_op.cc
maxout_op.cc
merge_lod_tensor_op.cc
merge_selected_rows_op.cc
meshgrid_op.cc)
register_unity_group(cc
concat_op.cc
conv_shift_op.cc
dequantize_log_op.cc
dropout_op.cc
expand_op.cc
fake_quantize_op.cc
gelu_op.cc
get_tensor_from_selected_rows_op.cc
lookup_table_op.cc
matmul_v2_op.cc)
register_unity_group(cc
mean_iou_op.cc
mean_op.cc
minus_op.cc
mish_op.cc
mul_op.cc
multinomial_op.cc
multiplex_op.cc
mv_op.cc
nce_op.cc
nll_loss_op.cc
norm_op.cc
one_hot_op.cc
one_hot_v2_op.cc
p_norm_op.cc
pad2d_op.cc
pad3d_op.cc
pad_constant_like_op.cc
pad_op.cc)
register_unity_group(cc
modified_huber_loss_op.cc
mkldnn/mul_mkldnn_op.cc
partial_sum_op.cc
pixel_shuffle_op.cc
pool_op.cc
pool_with_index_op.cc
positive_negative_pair_op.cc
prelu_op.cc
print_op.cc
prroi_pool_op.cc
psroi_pool_op.cc
pull_box_extended_sparse_op.cc
pull_box_sparse_op.cc
pull_sparse_op.cc
pull_sparse_v2_op.cc)
register_unity_group(cc
push_dense_op.cc
quantize_op.cc
mkldnn/quantize_mkldnn_op.cc
queue_generator_op.cc
randint_op.cc
random_crop_op.cc
randperm_op.cc
range_op.cc
rank_attention_op.cc
rank_loss_op.cc
recurrent_op.cc
reorder_lod_tensor_by_rank_op.cc
requantize_op.cc
mkldnn/requantize_mkldnn_op.cc
reshape_op.cc
reverse_op.cc)
register_unity_group(cc
rnn_memory_helper_op.cc
roi_align_op.cc
roll_op.cc
run_program_op.cc
sample_logits_op.cc
sampling_id_op.cc
save_combine_op.cc
save_op.cc
scale_op.cc
scatter_nd_add_op.cc
scatter_op.cc
seed_op.cc
segment_pool_op.cc
select_input_op.cc
select_output_op.cc)
register_unity_group(cc
roi_pool_op.cc
selu_op.cc
shape_op.cc
shard_index_op.cc
shrink_rnn_memory_op.cc
shuffle_batch_op.cc
shuffle_channel_op.cc
sigmoid_cross_entropy_with_logits_op.cc
sign_op.cc
similarity_focus_op.cc
size_op.cc
slice_op.cc
softmax_op.cc)
register_unity_group(cc
space_to_depth_op.cc
spectral_norm_op.cc
split_lod_tensor_op.cc
split_op.cc
split_selected_rows_op.cc
spp_op.cc
squared_l2_norm_op.cc
squeeze_op.cc
stack_op.cc
strided_slice_op.cc
sum_op.cc
mkldnn/sum_mkldnn_op.cc
tdm_child_op.cc
tdm_sampler_op.cc
teacher_student_sigmoid_loss_op.cc
temporal_shift_op.cc)
register_unity_group(cc
row_conv_op.cc
tensor_array_to_tensor_op.cc
tile_op.cc
top_k_v2_op.cc
trace_op.cc
transpose_op.cc
mkldnn/transpose_mkldnn_op.cc
tree_conv_op.cc
tril_triu_op.cc
truncated_gaussian_random_op.cc
unbind_op.cc
unfold_op.cc)
register_unity_group(cc
smooth_l1_loss_op.cc
uniform_random_batch_size_like_op.cc
uniform_random_op.cc
unique_op.cc
unique_with_counts_op.cc
unpool_op.cc
unsqueeze_op.cc
unstack_op.cc
var_conv_2d_op.cc
where_index_op.cc
where_op.cc)
register_unity_group(cc
affine_grid_cudnn_op.cu.cc
beam_search_op.cu.cc
cudnn_lstm_op.cu.cc
empty_op.cu.cc
fc_op.cu.cc
fill_constant_batch_size_like_op.cu.cc
fill_constant_op.cu.cc
fill_op.cu.cc
fill_zeros_like_op.cu.cc
flatten_op.cu.cc
grid_sampler_cudnn_op.cu.cc
gru_op.cu.cc
inverse_op.cu.cc
is_empty_op.cu.cc
maxout_op.cu.cc
mul_op.cu.cc
concat_op.cu.cc
mul_op.cu.cc
pool_op.cu.cc
pool_cudnn_op.cu.cc
pool_with_index_op.cu.cc
run_program_op.cu.cc
softmax_op.cu.cc
softmax_cudnn_op.cu.cc
spp_op.cu.cc
squeeze_op.cu.cc
unbind_op.cu.cc
unique_op.cu
unpool_op.cu.cc
unsqueeze_op.cu.cc)
register_unity_group(cu
addmm_op.cu
affine_channel_op.cu
allclose_op.cu
argsort_op.cu
assign_value_op.cu
bce_loss_op.cu
bernoulli_op.cu
bilateral_slice_op.cu)
register_unity_group(cu
bilinear_tensor_product_op.cu
bmm_op.cu
cast_op.cu
cholesky_op.cu
clip_by_norm_op.cu
clip_op.cu)
register_unity_group(cu
center_loss_op.cu
conv_op.cu
conv_transpose_cudnn_op.cu
conv_transpose_op.cu
cos_sim_op.cu
crop_op.cu)
register_unity_group(cu
cross_entropy_op.cu
cross_op.cu
ctc_align_op.cu
cumsum_op.cu
cvm_op.cu
data_norm_op.cu
deformable_conv_op.cu
deformable_conv_v1_op.cu
dequantize_abs_max_op.cu)
register_unity_group(cu
dgc_clip_by_norm_op.cu
diag_embed_op.cu
diag_op.cu
diag_v2_op.cu
edit_distance_op.cu
erf_op.cu)
register_unity_group(cu
expand_v2_op.cu
fake_dequantize_op.cu
fill_any_like_op.cu)
register_unity_group(cu
flip_op.cu
fsp_op.cu
gather_nd_op.cu
gather_op.cu
gather_tree_op.cu
gaussian_random_op.cu
grid_sampler_op.cu
group_norm_op.cu)
register_unity_group(cu
hinge_loss_op.cu
histogram_op.cu
huber_loss_op.cu
im2sequence_op.cu
increment_op.cu
index_sample_op.cu
index_select_op.cu
interpolate_op.cu
isfinite_v2_op.cu)
register_unity_group(cu
inplace_abn_op.cu
interpolate_v2_op.cu
isfinite_op.cu
kron_op.cu
l1_norm_op.cu
label_smooth_op.cu
layer_norm_op.cu
linspace_op.cu
load_combine_op.cu
load_op.cu)
register_unity_group(cu
lod_reset_op.cu
log_softmax_op.cu
lrn_op.cu
lstm_unit_op.cu)
register_unity_group(cu
log_loss_op.cu
lookup_table_v2_op.cu
margin_rank_loss_op.cu
masked_select_op.cu
merge_selected_rows_op.cu)
register_unity_group(cu
conv_shift_op.cu
dequantize_log_op.cu
dropout_op.cu
fake_quantize_op.cu
gelu_op.cu
lookup_table_op.cu)
register_unity_group(cu
mean_iou_op.cu
mean_op.cu
minus_op.cu
mish_op.cu
multinomial_op.cu
multiplex_op.cu
mv_op.cu
nll_loss_op.cu
norm_op.cu
one_hot_op.cu
p_norm_op.cu
pad2d_op.cu
pad3d_op.cu
pad_constant_like_op.cu
pad_op.cu)
register_unity_group(cu
partial_sum_op.cu
pixel_shuffle_op.cu
prelu_op.cu
prroi_pool_op.cu
pull_box_extended_sparse_op.cu
pull_box_sparse_op.cu)
register_unity_group(cu
randint_op.cu
random_crop_op.cu
randperm_op.cu
range_op.cu
reverse_op.cu)
register_unity_group(cu
roi_align_op.cu
roll_op.cu
sample_logits_op.cu
sampling_id_op.cu
save_combine_op.cu
save_op.cu
scale_op.cu
scatter_nd_add_op.cu
scatter_op.cu
seed_op.cu
segment_pool_op.cu)
register_unity_group(cu
roi_pool_op.cu
selu_op.cu
shape_op.cu
shard_index_op.cu
sign_op.cu
size_op.cu
slice_op.cu)
register_unity_group(cu
space_to_depth_op.cu
spectral_norm_op.cu
split_op.cu
split_selected_rows_op.cu
squared_l2_norm_op.cu
stack_op.cu
strided_slice_op.cu
sum_op.cu
temporal_shift_op.cu)
register_unity_group(cu
row_conv_op.cu
tile_op.cu
trace_op.cu
transpose_op.cu
tree_conv_op.cu
tril_triu_op.cu
truncated_gaussian_random_op.cu
unfold_op.cu)
register_unity_group(cu
smooth_l1_loss_op.cu
uniform_random_op.cu
unique_op.cu
unstack_op.cu
where_index_op.cu
where_op.cu)
# The following groups are to make better use of `/MP` which MSVC's parallel
# compilation instruction when compiling in Unity Build.
register_unity_group(cu activation_op.cu)
register_unity_group(cu arg_max_op.cu)
register_unity_group(cu arg_min_op.cu)
register_unity_group(cu batch_norm_op.cu)
register_unity_group(cu crop_tensor_op.cu)
register_unity_group(cu dist_op.cu)
register_unity_group(cu expand_as_op.cu)
register_unity_group(cu expand_as_v2_op.cu)
register_unity_group(cu gru_unit_op.cu)
register_unity_group(cu instance_norm_op.cu)
register_unity_group(cu kldiv_loss_op.cu)
register_unity_group(cu partial_concat_op.cu)
register_unity_group(cu softmax_with_cross_entropy_op.cu)
register_unity_group(cu squared_l2_distance_op.cu)
register_unity_group(cu top_k_op.cu)
......@@ -41,6 +41,7 @@ if not defined WITH_INFERENCE_API_TEST set WITH_INFERENCE_API_TEST=ON
if not defined WITH_STATIC_LIB set WITH_STATIC_LIB=ON
if not defined WITH_CACHE set WITH_CACHE=OFF
if not defined WITH_TPCACHE set WITH_TPCACHE=ON
if not defined WITH_UNITY_BUILD set WITH_UNITY_BUILD=OFF
set INFERENCE_DEMO_INSTALL_DIR=%cache_dir:\=/%/inference_demo
rem -------set cache build work directory-----------
......@@ -227,13 +228,13 @@ echo cmake .. -G "Visual Studio 14 2015 Win64" -DWITH_AVX=%WITH_AVX% -DWITH_GPU=
-DWITH_TESTING=%WITH_TESTING% -DWITH_PYTHON=%WITH_PYTHON% -DON_INFER=%ON_INFER% ^
-DWITH_INFERENCE_API_TEST=%WITH_INFERENCE_API_TEST% -DTHIRD_PARTY_PATH=%THIRD_PARTY_PATH% ^
-DINFERENCE_DEMO_INSTALL_DIR=%INFERENCE_DEMO_INSTALL_DIR% -DWITH_STATIC_LIB=%WITH_STATIC_LIB% ^
-DTENSORRT_ROOT=%TENSORRT_ROOT% -DMSVC_STATIC_CRT=%MSVC_STATIC_CRT%
-DTENSORRT_ROOT=%TENSORRT_ROOT% -DMSVC_STATIC_CRT=%MSVC_STATIC_CRT% -DWITH_UNITY_BUILD=%WITH_UNITY_BUILD%
cmake .. -G "Visual Studio 14 2015 Win64" -DWITH_AVX=%WITH_AVX% -DWITH_GPU=%WITH_GPU% -DWITH_MKL=%WITH_MKL% ^
-DWITH_TESTING=%WITH_TESTING% -DWITH_PYTHON=%WITH_PYTHON% -DON_INFER=%ON_INFER% ^
-DWITH_INFERENCE_API_TEST=%WITH_INFERENCE_API_TEST% -DTHIRD_PARTY_PATH=%THIRD_PARTY_PATH% ^
-DINFERENCE_DEMO_INSTALL_DIR=%INFERENCE_DEMO_INSTALL_DIR% -DWITH_STATIC_LIB=%WITH_STATIC_LIB% ^
-DTENSORRT_ROOT=%TENSORRT_ROOT% -DMSVC_STATIC_CRT=%MSVC_STATIC_CRT%
-DTENSORRT_ROOT=%TENSORRT_ROOT% -DMSVC_STATIC_CRT=%MSVC_STATIC_CRT% -DWITH_UNITY_BUILD=%WITH_UNITY_BUILD%
goto:eof
:cmake_error
......
......@@ -246,6 +246,7 @@ function cmake_base() {
-DWITH_LITE=${WITH_LITE:-OFF}
-DWITH_XPU=${WITH_XPU:-OFF}
-DLITE_GIT_TAG=develop
-DWITH_UNITY_BUILD=${WITH_UNITY_BUILD:-OFF}
========================================
EOF
# Disable UNITTEST_USE_VIRTUALENV in docker because
......@@ -279,7 +280,8 @@ EOF
-DWITH_GLOO=${gloo_flag} \
-DLITE_GIT_TAG=develop \
-DWITH_XPU=${WITH_XPU:-OFF} \
-DWITH_LITE=${WITH_LITE:-OFF};build_error=$?
-DWITH_LITE=${WITH_LITE:-OFF} \
-DWITH_UNITY_BUILD=${WITH_UNITY_BUILD:-OFF};build_error=$?
if [ "$build_error" != 0 ];then
exit 7;
fi
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册