未验证 提交 4baf0dbe 编写于 作者: W wanghuancoder 提交者: GitHub

Compilation optimization (#44242)

* Compilation optimization
上级 e9b4d0be
...@@ -6,7 +6,7 @@ cc_library( ...@@ -6,7 +6,7 @@ cc_library(
if(NOT (NOT WITH_PYTHON AND ON_INFER)) if(NOT (NOT WITH_PYTHON AND ON_INFER))
cc_library( cc_library(
final_dygraph_node final_dygraph_node
SRCS nodes.cc SRCS nodes.cc ${eager_manual_nodes}
DEPS ${eager_deps} ${eager_manual_nodes}) DEPS ${eager_deps})
add_dependencies(final_dygraph_node eager_final_state_codegen) add_dependencies(final_dygraph_node eager_final_state_codegen)
endif() endif()
...@@ -6,7 +6,7 @@ cc_library( ...@@ -6,7 +6,7 @@ cc_library(
if(NOT (NOT WITH_PYTHON AND ON_INFER)) if(NOT (NOT WITH_PYTHON AND ON_INFER))
cc_library( cc_library(
final_dygraph_function final_dygraph_function
SRCS dygraph_functions.cc SRCS dygraph_functions.cc ${eager_manual_functions}
DEPS ${eager_deps} ${eager_manual_functions}) DEPS ${eager_deps})
add_dependencies(final_dygraph_function eager_final_state_codegen) add_dependencies(final_dygraph_function eager_final_state_codegen)
endif() endif()
cc_library(
add_n_fwd_func
SRCS add_n_fwd_func.cc
DEPS ${eager_deps} ${fluid_deps} ${GLOB_OP_LIB} ${GLOB_OPERATOR_DEPS})
add_dependencies(add_n_fwd_func eager_codegen)
cc_library(
conv2d_fwd_function
SRCS conv2d_fwd_function.cc
DEPS ${eager_deps} ${fluid_deps} ${GLOB_OP_LIB} ${GLOB_OPERATOR_DEPS})
add_dependencies(conv2d_fwd_function eager_codegen)
set(eager_manual_functions set(eager_manual_functions
conv2d_fwd_function add_n_fwd_func ${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/manual/eager_manual/forwards/add_n_fwd_func.cc
${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/manual/eager_manual/forwards/conv2d_fwd_function.cc
PARENT_SCOPE) PARENT_SCOPE)
cc_library(
add_n_node
SRCS add_n_node.cc
DEPS ${eager_deps} ${fluid_deps})
cc_library(
conv2d_nodes
SRCS conv2d_nodes.cc
DEPS ${eager_deps} ${fluid_deps})
set(eager_manual_nodes set(eager_manual_nodes
conv2d_nodes add_n_node ${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/manual/eager_manual/nodes/conv2d_nodes.cc
${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/manual/eager_manual/nodes/add_n_node.cc
PARENT_SCOPE) PARENT_SCOPE)
cc_library(
fused_gate_attention_fwd_func
SRCS fused_gate_attention_fwd_func.cc
DEPS ${eager_deps} ${fluid_deps} ${GLOB_OP_LIB} ${GLOB_OPERATOR_DEPS})
add_dependencies(fused_gate_attention_fwd_func eager_codegen
copy_dygraph_forward_functions)
cc_library(
fused_feedforward_fwd_func
SRCS fused_feedforward_fwd_func.cc
DEPS ${eager_deps} ${fluid_deps} ${GLOB_OP_LIB} ${GLOB_OPERATOR_DEPS})
add_dependencies(fused_feedforward_fwd_func eager_codegen
copy_dygraph_forward_functions)
cc_library(
fused_attention_fwd_func
SRCS fused_attention_fwd_func.cc
DEPS ${eager_deps} ${fluid_deps} ${GLOB_OP_LIB} ${GLOB_OPERATOR_DEPS})
add_dependencies(fused_attention_fwd_func eager_codegen
copy_dygraph_forward_functions)
set(fluid_manual_functions set(fluid_manual_functions
fused_gate_attention_fwd_func fused_feedforward_fwd_func ${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/manual/fluid_manual/forwards/fused_gate_attention_fwd_func.cc
fused_attention_fwd_func ${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/manual/fluid_manual/forwards/fused_feedforward_fwd_func.cc
${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/manual/fluid_manual/forwards/fused_attention_fwd_func.cc
PARENT_SCOPE) PARENT_SCOPE)
cc_library(
fused_gate_attention_node
SRCS fused_gate_attention_node.cc
DEPS ${eager_deps} ${fluid_deps})
cc_library(
fused_feedforward_node
SRCS fused_feedforward_node.cc
DEPS ${eager_deps} ${fluid_deps})
cc_library(
fused_attention_node
SRCS fused_attention_node.cc
DEPS ${eager_deps} ${fluid_deps})
set(fluid_manual_nodes set(fluid_manual_nodes
fused_gate_attention_node fused_feedforward_node fused_attention_node ${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/manual/fluid_manual/nodes/fused_gate_attention_node.cc
${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/manual/fluid_manual/nodes/fused_feedforward_node.cc
${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/manual/fluid_manual/nodes/fused_attention_node.cc
PARENT_SCOPE) PARENT_SCOPE)
...@@ -3083,27 +3083,44 @@ static std::string ConvertCoreOpsInfosToString( ...@@ -3083,27 +3083,44 @@ static std::string ConvertCoreOpsInfosToString(
return core_ops_returns_info_init_str; return core_ops_returns_info_init_str;
} }
static std::string GenerateCoreOpsReturnsInfo() { static std::string GenerateCoreOpsArgsInfo() {
const char* Core_Ops_Returns_MAP_TEMPLATE = const char* Core_Ops_Returns_MAP_TEMPLATE =
"std::unordered_map<std::string, std::vector<std::string>> " "std::unordered_map<std::string, std::vector<std::string>> "
"core_ops_args_info = { %s };\n" "core_ops_args_info = { %s };\n";
"std::unordered_map<std::string, std::vector<std::string>> "
"core_ops_args_type_info = { %s };\n"
"std::unordered_map<std::string, std::vector<std::string>> "
"core_ops_returns_info = { %s };\n";
std::string core_ops_args_info_init_str = std::string core_ops_args_info_init_str =
ConvertCoreOpsInfosToString(core_ops_args_info); ConvertCoreOpsInfosToString(core_ops_args_info);
std::string core_ops_info_str = paddle::string::Sprintf(
Core_Ops_Returns_MAP_TEMPLATE, core_ops_args_info_init_str);
return core_ops_info_str;
}
static std::string GenerateCoreOpsArgsTypeInfo() {
const char* Core_Ops_Returns_MAP_TEMPLATE =
"std::unordered_map<std::string, std::vector<std::string>> "
"core_ops_args_type_info = { %s };\n";
std::string core_ops_args_type_info_init_str = std::string core_ops_args_type_info_init_str =
ConvertCoreOpsInfosToString(core_ops_args_type_info); ConvertCoreOpsInfosToString(core_ops_args_type_info);
std::string core_ops_info_str = paddle::string::Sprintf(
Core_Ops_Returns_MAP_TEMPLATE, core_ops_args_type_info_init_str);
return core_ops_info_str;
}
static std::string GenerateCoreOpsReturnsInfo() {
const char* Core_Ops_Returns_MAP_TEMPLATE =
"std::unordered_map<std::string, std::vector<std::string>> "
"core_ops_returns_info = { %s };\n";
std::string core_ops_returns_info_init_str = std::string core_ops_returns_info_init_str =
ConvertCoreOpsInfosToString(core_ops_returns_info); ConvertCoreOpsInfosToString(core_ops_returns_info);
std::string core_ops_info_str = std::string core_ops_info_str = paddle::string::Sprintf(
paddle::string::Sprintf(Core_Ops_Returns_MAP_TEMPLATE, Core_Ops_Returns_MAP_TEMPLATE, core_ops_returns_info_init_str);
core_ops_args_info_init_str,
core_ops_args_type_info_init_str,
core_ops_returns_info_init_str);
return core_ops_info_str; return core_ops_info_str;
} }
...@@ -3252,6 +3269,12 @@ static void DygraphCodeGeneration(const std::string& output_dir, ...@@ -3252,6 +3269,12 @@ static void DygraphCodeGeneration(const std::string& output_dir,
GenerateForwardDygraphFile( GenerateForwardDygraphFile(
output_dir + "/forwards/dygraph_forward_functions_args_info.tmp.cc", output_dir + "/forwards/dygraph_forward_functions_args_info.tmp.cc",
GenerateCoreOpsArgsInfo());
GenerateForwardDygraphFile(
output_dir + "/forwards/dygraph_forward_functions_args_type_info.tmp.cc",
GenerateCoreOpsArgsTypeInfo());
GenerateForwardDygraphFile(
output_dir + "/forwards/dygraph_forward_functions_returns_info.tmp.cc",
GenerateCoreOpsReturnsInfo()); GenerateCoreOpsReturnsInfo());
VLOG(6) << "-------- GenerateNodeCCFile -------"; VLOG(6) << "-------- GenerateNodeCCFile -------";
......
...@@ -96,6 +96,11 @@ def GenerateFileStructureForIntermediateDygraph(eager_dir, split_count): ...@@ -96,6 +96,11 @@ def GenerateFileStructureForIntermediateDygraph(eager_dir, split_count):
"nodes" + str(i + 1) + ".cc")) "nodes" + str(i + 1) + ".cc"))
empty_files.append( empty_files.append(
os.path.join(forwards_dir, "dygraph_forward_functions_args_info.cc")) os.path.join(forwards_dir, "dygraph_forward_functions_args_info.cc"))
empty_files.append(
os.path.join(forwards_dir,
"dygraph_forward_functions_args_type_info.cc"))
empty_files.append(
os.path.join(forwards_dir, "dygraph_forward_functions_returns_info.cc"))
for path in empty_files: for path in empty_files:
if not os.path.exists(path): if not os.path.exists(path):
open(path, 'a').close() open(path, 'a').close()
...@@ -125,7 +130,7 @@ def GenerateFileStructureForIntermediateDygraph(eager_dir, split_count): ...@@ -125,7 +130,7 @@ def GenerateFileStructureForIntermediateDygraph(eager_dir, split_count):
f.write("cc_library(dygraph_node SRCS ") f.write("cc_library(dygraph_node SRCS ")
for i in range(split_count): for i in range(split_count):
f.write("nodes" + str(i + 1) + ".cc ") f.write("nodes" + str(i + 1) + ".cc ")
f.write("DEPS ${eager_deps} ${fluid_deps} ${fluid_manual_nodes})\n") f.write("${fluid_manual_nodes} DEPS ${eager_deps} ${fluid_deps})\n")
f.write("add_dependencies(dygraph_node copy_dygraph_node)") f.write("add_dependencies(dygraph_node copy_dygraph_node)")
with open(forwards_level_cmakelist_path, "w") as f: with open(forwards_level_cmakelist_path, "w") as f:
...@@ -143,6 +148,12 @@ def GenerateFileStructureForIntermediateDygraph(eager_dir, split_count): ...@@ -143,6 +148,12 @@ def GenerateFileStructureForIntermediateDygraph(eager_dir, split_count):
f.write( f.write(
" COMMAND ${CMAKE_COMMAND} -E copy_if_different \"${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/generated/fluid_generated/forwards/dygraph_forward_functions_args_info.tmp.cc\" \"${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/generated/fluid_generated/forwards/dygraph_forward_functions_args_info.cc\"\n" " COMMAND ${CMAKE_COMMAND} -E copy_if_different \"${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/generated/fluid_generated/forwards/dygraph_forward_functions_args_info.tmp.cc\" \"${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/generated/fluid_generated/forwards/dygraph_forward_functions_args_info.cc\"\n"
) )
f.write(
" COMMAND ${CMAKE_COMMAND} -E copy_if_different \"${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/generated/fluid_generated/forwards/dygraph_forward_functions_args_type_info.tmp.cc\" \"${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/generated/fluid_generated/forwards/dygraph_forward_functions_args_type_info.cc\"\n"
)
f.write(
" COMMAND ${CMAKE_COMMAND} -E copy_if_different \"${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/generated/fluid_generated/forwards/dygraph_forward_functions_returns_info.tmp.cc\" \"${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/generated/fluid_generated/forwards/dygraph_forward_functions_returns_info.cc\"\n"
)
f.write(" DEPENDS eager_codegen\n") f.write(" DEPENDS eager_codegen\n")
f.write(" VERBATIM)\n") f.write(" VERBATIM)\n")
...@@ -150,8 +161,10 @@ def GenerateFileStructureForIntermediateDygraph(eager_dir, split_count): ...@@ -150,8 +161,10 @@ def GenerateFileStructureForIntermediateDygraph(eager_dir, split_count):
for i in range(split_count): for i in range(split_count):
f.write("dygraph_forward_functions" + str(i + 1) + ".cc ") f.write("dygraph_forward_functions" + str(i + 1) + ".cc ")
f.write("dygraph_forward_functions_args_info.cc ") f.write("dygraph_forward_functions_args_info.cc ")
f.write("dygraph_forward_functions_args_type_info.cc ")
f.write("dygraph_forward_functions_returns_info.cc ")
f.write( f.write(
"DEPS ${eager_deps} ${fluid_deps} ${GLOB_OP_LIB} ${GLOB_OPERATOR_DEPS} ${fluid_manual_functions})\n" "${fluid_manual_functions} DEPS ${eager_deps} ${fluid_deps} ${GLOB_OP_LIB} ${GLOB_OPERATOR_DEPS})\n"
) )
f.write( f.write(
"add_dependencies(dygraph_function copy_dygraph_forward_functions)") "add_dependencies(dygraph_function copy_dygraph_forward_functions)")
......
set(INTERPRETERCORE_DEPS add_subdirectory(workqueue)
add_subdirectory(garbage_collector)
set(STANDALONE_EXECUTOR_SRCS
data_transfer.cc
new_executor_defs.cc
interpretercore_util.cc
event_manager.cc
stream_analyzer.cc
interpretercore.cc
standalone_executor.cc)
set(STANDALONE_EXECUTOR_DEPS
op_registry op_registry
device_context device_context
scope scope
...@@ -20,62 +32,33 @@ set(INTERPRETERCORE_DEPS ...@@ -20,62 +32,33 @@ set(INTERPRETERCORE_DEPS
variable_helper variable_helper
timer timer
monitor monitor
nan_inf_utils) nan_inf_utils
enforce
add_subdirectory(workqueue) scope
add_subdirectory(garbage_collector) glog
enforce
cc_library( glog
data_transfer scope
SRCS data_transfer.cc workqueue
DEPS enforce scope glog) interpretercore_event_garbage_collector
cc_library( ${DEVICE_EVENT_LIBS}
new_executor_defs glog)
SRCS new_executor_defs.cc
DEPS enforce glog scope)
cc_library(
interpretercore_util
SRCS interpretercore_util.cc
DEPS ${INTERPRETERCORE_DEPS} workqueue new_executor_defs data_transfer)
cc_library(
event_manager
SRCS event_manager.cc
DEPS ${DEVICE_EVENT_LIBS} glog new_executor_defs)
cc_library(
stream_analyzer
SRCS stream_analyzer.cc
DEPS ${DEVICE_EVENT_LIBS} glog device_context new_executor_defs)
if(WITH_GPU OR WITH_ROCM) if(WITH_GPU OR WITH_ROCM)
cc_library( set(STANDALONE_EXECUTOR_DEPS ${STANDALONE_EXECUTOR_DEPS}
interpretercore interpretercore_fast_garbage_collector)
SRCS interpretercore.cc
DEPS workqueue
${DEVICE_EVENT_LIBS}
interpretercore_util
interpretercore_event_garbage_collector
interpretercore_fast_garbage_collector
stream_analyzer
event_manager)
else()
cc_library(
interpretercore
SRCS interpretercore.cc
DEPS workqueue ${DEVICE_EVENT_LIBS} interpretercore_util
interpretercore_event_garbage_collector stream_analyzer event_manager)
endif() endif()
cc_library( cc_library(
standalone_executor standalone_executor
SRCS standalone_executor.cc SRCS ${STANDALONE_EXECUTOR_SRCS}
DEPS interpretercore) DEPS ${STANDALONE_EXECUTOR_DEPS})
cc_library( cc_library(
staticgraph_executor_statistics staticgraph_executor_statistics
SRCS executor_statistics.cc SRCS executor_statistics.cc
DEPS enforce glog os_info) DEPS enforce glog os_info)
# cc_binary(standalone_executor_test SRCS standalone_executor_test.cc DEPS interpretercore standalone_executor operator op_registry executor ${GLOB_OP_LIB} ${GLOB_OPERATOR_DEPS} profiler)
# skip win32 since wget is not installed by default on windows machine. # skip win32 since wget is not installed by default on windows machine.
if(WITH_GPU if(WITH_GPU
AND WITH_TESTING AND WITH_TESTING
...@@ -120,13 +103,7 @@ if(WITH_GPU ...@@ -120,13 +103,7 @@ if(WITH_GPU
cc_test( cc_test(
standalone_executor_test standalone_executor_test
SRCS standalone_executor_test.cc SRCS standalone_executor_test.cc
DEPS interpretercore DEPS standalone_executor operator op_registry executor ${OPS} ${OP_DEPS})
standalone_executor
operator
op_registry
executor
${OPS}
${OP_DEPS})
set_tests_properties(standalone_executor_test PROPERTIES TIMEOUT 100) set_tests_properties(standalone_executor_test PROPERTIES TIMEOUT 100)
add_dependencies(standalone_executor_test download_program) add_dependencies(standalone_executor_test download_program)
......
...@@ -5,7 +5,7 @@ cc_library( ...@@ -5,7 +5,7 @@ cc_library(
cc_library( cc_library(
var_helper var_helper
SRCS var_helper.cc SRCS var_helper.cc
DEPS tensor phi_api) DEPS tensor selected_rows)
if(WITH_XPU) if(WITH_XPU)
cc_library( cc_library(
prepared_operator prepared_operator
...@@ -20,8 +20,8 @@ if(WITH_XPU) ...@@ -20,8 +20,8 @@ if(WITH_XPU)
op_kernel_type op_kernel_type
data_transform data_transform
nan_inf_utils nan_inf_utils
phi_api scalar
phi_utils int_array
var_helper var_helper
profiler) profiler)
else() else()
...@@ -37,21 +37,16 @@ else() ...@@ -37,21 +37,16 @@ else()
op_kernel_type op_kernel_type
data_transform data_transform
nan_inf_utils nan_inf_utils
phi_api scalar
phi_utils int_array
var_helper var_helper
profiler) profiler)
endif() endif()
cc_library( cc_library(
layer layer
SRCS layer.cc SRCS layer.cc
DEPS prepared_operator DEPS prepared_operator math_function imperative_flag variable_helper
math_function op_registry var_helper)
imperative_flag
variable_helper
op_registry
var_helper
phi_api)
add_subdirectory(jit) add_subdirectory(jit)
if(WITH_GPU) if(WITH_GPU)
cc_library( cc_library(
......
...@@ -101,7 +101,7 @@ else() ...@@ -101,7 +101,7 @@ else()
cc_library(gather_scatter_kernel SRCS gather_scatter_kernel.cc gather_scatter_kernel.cu DEPS tensor) cc_library(gather_scatter_kernel SRCS gather_scatter_kernel.cc gather_scatter_kernel.cu DEPS tensor)
endif() endif()
set(OP_HEADER_DEPS ${OP_HEADER_DEPS} phi phi_api_utils gather_scatter_kernel) set(OP_HEADER_DEPS ${OP_HEADER_DEPS} phi phi_api_utils gather_scatter_kernel backward_infermeta)
register_operators(EXCLUDES py_layer_op py_func_op warpctc_op dgc_op load_combine_op lstm_op run_program_op eye_op quantize_linear_op register_operators(EXCLUDES py_layer_op py_func_op warpctc_op dgc_op load_combine_op lstm_op run_program_op eye_op quantize_linear_op
recurrent_op save_combine_op sparse_attention_op sync_batch_norm_op spectral_op ${OP_MKL_DEPS} DEPS ${OP_HEADER_DEPS}) recurrent_op save_combine_op sparse_attention_op sync_batch_norm_op spectral_op ${OP_MKL_DEPS} DEPS ${OP_HEADER_DEPS})
......
...@@ -10,4 +10,4 @@ nv_library( ...@@ -10,4 +10,4 @@ nv_library(
nv_test( nv_test(
cudnn_helper_test cudnn_helper_test
SRCS cudnn_helper_test.cc SRCS cudnn_helper_test.cc
DEPS dynload_cuda phi) DEPS dynload_cuda)
pybind.h pybind.h
op_function.cc op_function1.cc
op_function2.cc
op_function3.cc
op_function4.cc
op_function5.cc
op_function6.cc
op_function7.cc
op_function8.cc
eager_op_function.cc eager_op_function.cc
eager_final_state_op_function.cc eager_final_state_op_function.cc
...@@ -102,13 +102,16 @@ endif() ...@@ -102,13 +102,16 @@ endif()
set(PYBIND_SRCS set(PYBIND_SRCS
pybind.cc pybind.cc
imperative.cc imperative.cc
op_function.cc
inference_api.cc inference_api.cc
ir.cc ir.cc
bind_fleet_executor.cc bind_fleet_executor.cc
reader_py.cc reader_py.cc
protobuf.cc protobuf.cc
exception.cc exception.cc
op_function_common.cc
parallel_executor.cc
tensor.cc
place.cc
const_value.cc const_value.cc
global_value_getter_setter.cc global_value_getter_setter.cc
fleet_wrapper_py.cc fleet_wrapper_py.cc
...@@ -124,13 +127,15 @@ set(PYBIND_SRCS ...@@ -124,13 +127,15 @@ set(PYBIND_SRCS
generator_py.cc generator_py.cc
communication.cc communication.cc
cuda_streams_py.cc cuda_streams_py.cc
jit.cc) jit.cc
op_function1.cc
execute_process( op_function2.cc
COMMAND op_function3.cc
"${PYTHON_EXECUTABLE}" op_function4.cc
"${PADDLE_SOURCE_DIR}/paddle/fluid/pybind/generate_file_structures.py" op_function5.cc
"${PADDLE_SOURCE_DIR}/paddle/fluid/pybind/") op_function6.cc
op_function7.cc
op_function8.cc)
if(WITH_CUSTOM_DEVICE) if(WITH_CUSTOM_DEVICE)
set(PYBIND_DEPS ${PYBIND_DEPS} phi_capi) set(PYBIND_DEPS ${PYBIND_DEPS} phi_capi)
...@@ -267,12 +272,35 @@ if(WITH_PYTHON) ...@@ -267,12 +272,35 @@ if(WITH_PYTHON)
target_link_libraries(kernel_signature_generator ${ROCM_HIPRTC_LIB}) target_link_libraries(kernel_signature_generator ${ROCM_HIPRTC_LIB})
endif() endif()
set(impl_file ${CMAKE_SOURCE_DIR}/paddle/fluid/pybind/op_function.cc) set(op_function_output_path ${CMAKE_SOURCE_DIR}/paddle/fluid/pybind/)
set(tmp_impl_file ${impl_file}.tmp) set(impl_file1 ${CMAKE_SOURCE_DIR}/paddle/fluid/pybind/op_function1.cc)
set(tmp_impl_file1 ${impl_file1}.tmp)
set(impl_file2 ${CMAKE_SOURCE_DIR}/paddle/fluid/pybind/op_function2.cc)
set(tmp_impl_file2 ${impl_file2}.tmp)
set(impl_file3 ${CMAKE_SOURCE_DIR}/paddle/fluid/pybind/op_function3.cc)
set(tmp_impl_file3 ${impl_file3}.tmp)
set(impl_file4 ${CMAKE_SOURCE_DIR}/paddle/fluid/pybind/op_function4.cc)
set(tmp_impl_file4 ${impl_file4}.tmp)
set(impl_file5 ${CMAKE_SOURCE_DIR}/paddle/fluid/pybind/op_function5.cc)
set(tmp_impl_file5 ${impl_file5}.tmp)
set(impl_file6 ${CMAKE_SOURCE_DIR}/paddle/fluid/pybind/op_function6.cc)
set(tmp_impl_file6 ${impl_file6}.tmp)
set(impl_file7 ${CMAKE_SOURCE_DIR}/paddle/fluid/pybind/op_function7.cc)
set(tmp_impl_file7 ${impl_file7}.tmp)
set(impl_file8 ${CMAKE_SOURCE_DIR}/paddle/fluid/pybind/op_function8.cc)
set(tmp_impl_file8 ${impl_file8}.tmp)
set(CODE_GEN_SPLIT_FILE_COUNT "8")
set(eager_impl_file set(eager_impl_file
${CMAKE_SOURCE_DIR}/paddle/fluid/pybind/eager_op_function.cc) ${CMAKE_SOURCE_DIR}/paddle/fluid/pybind/eager_op_function.cc)
set(tmp_eager_impl_file ${eager_impl_file}.tmp) set(tmp_eager_impl_file ${eager_impl_file}.tmp)
execute_process(
COMMAND
"${PYTHON_EXECUTABLE}"
"${PADDLE_SOURCE_DIR}/paddle/fluid/pybind/generate_file_structures.py"
"${PADDLE_SOURCE_DIR}/paddle/fluid/pybind/"
"${CODE_GEN_SPLIT_FILE_COUNT}")
set(OP_IMPL_DEPS op_function_generator) set(OP_IMPL_DEPS op_function_generator)
set(EAGER_OP_IMPL_DEPS eager_op_function_generator set(EAGER_OP_IMPL_DEPS eager_op_function_generator
eager_final_state_python_c_codegen) eager_final_state_python_c_codegen)
...@@ -292,7 +320,7 @@ if(WITH_PYTHON) ...@@ -292,7 +320,7 @@ if(WITH_PYTHON)
":retry\n" ":retry\n"
"ECHO op_function_generator run %build_times% time\n" "ECHO op_function_generator run %build_times% time\n"
"taskkill /f /im op_function_generator.exe 2>NUL\n" "taskkill /f /im op_function_generator.exe 2>NUL\n"
"${op_impl_path}/op_function_generator.exe ${tmp_impl_file}\n" "${op_impl_path}/op_function_generator.exe ${op_function_output_path} ${CODE_GEN_SPLIT_FILE_COUNT}\n"
"if %ERRORLEVEL% NEQ 0 (\n" "if %ERRORLEVEL% NEQ 0 (\n"
" set /a build_times=%build_times%+1\n" " set /a build_times=%build_times%+1\n"
" if %build_times% GEQ 10 (\n" " if %build_times% GEQ 10 (\n"
...@@ -367,12 +395,33 @@ if(WITH_PYTHON) ...@@ -367,12 +395,33 @@ if(WITH_PYTHON)
endif() endif()
add_custom_command( add_custom_command(
OUTPUT ${impl_file} OUTPUT op_function
COMMAND COMMAND
${CMAKE_BINARY_DIR}/paddle/fluid/pybind/op_function_generator_retry.bat ${CMAKE_BINARY_DIR}/paddle/fluid/pybind/op_function_generator_retry.bat
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_impl_file} COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_impl_file1}
${impl_file} ${impl_file1}
COMMENT "copy_if_different ${tmp_impl_file} to ${impl_file}" COMMENT "copy_if_different ${tmp_impl_file1} to ${impl_file1}"
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_impl_file2}
${impl_file2}
COMMENT "copy_if_different ${tmp_impl_file2} to ${impl_file2}"
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_impl_file3}
${impl_file3}
COMMENT "copy_if_different ${tmp_impl_file3} to ${impl_file3}"
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_impl_file4}
${impl_file4}
COMMENT "copy_if_different ${tmp_impl_file4} to ${impl_file4}"
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_impl_file5}
${impl_file5}
COMMENT "copy_if_different ${tmp_impl_file5} to ${impl_file5}"
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_impl_file6}
${impl_file6}
COMMENT "copy_if_different ${tmp_impl_file6} to ${impl_file6}"
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_impl_file7}
${impl_file7}
COMMENT "copy_if_different ${tmp_impl_file7} to ${impl_file7}"
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_impl_file8}
${impl_file8}
COMMENT "copy_if_different ${tmp_impl_file8} to ${impl_file8}"
DEPENDS ${OP_IMPL_DEPS}) DEPENDS ${OP_IMPL_DEPS})
if(NOT ((NOT WITH_PYTHON) AND ON_INFER)) if(NOT ((NOT WITH_PYTHON) AND ON_INFER))
add_custom_command( add_custom_command(
...@@ -431,13 +480,35 @@ if(WITH_PYTHON) ...@@ -431,13 +480,35 @@ if(WITH_PYTHON)
list(APPEND EAGER_OP_IMPL_DEPS ${CMAKE_CURRENT_BINARY_DIR}/libdnnl.so.0) list(APPEND EAGER_OP_IMPL_DEPS ${CMAKE_CURRENT_BINARY_DIR}/libdnnl.so.0)
endif() endif()
add_custom_command( add_custom_command(
OUTPUT ${impl_file} OUTPUT op_function
COMMAND COMMAND
${CMAKE_COMMAND} -E env "LD_LIBRARY_PATH=$ENV{LD_LIBRARY_PATH}:." ${CMAKE_COMMAND} -E env "LD_LIBRARY_PATH=$ENV{LD_LIBRARY_PATH}:."
"${CMAKE_CURRENT_BINARY_DIR}/op_function_generator" "${tmp_impl_file}" "${CMAKE_CURRENT_BINARY_DIR}/op_function_generator"
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_impl_file} "${op_function_output_path}" "${CODE_GEN_SPLIT_FILE_COUNT}"
${impl_file} COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_impl_file1}
COMMENT "copy_if_different ${tmp_impl_file} to ${impl_file}" ${impl_file1}
COMMENT "copy_if_different ${tmp_impl_file1} to ${impl_file1}"
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_impl_file2}
${impl_file2}
COMMENT "copy_if_different ${tmp_impl_file2} to ${impl_file2}"
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_impl_file3}
${impl_file3}
COMMENT "copy_if_different ${tmp_impl_file3} to ${impl_file3}"
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_impl_file4}
${impl_file4}
COMMENT "copy_if_different ${tmp_impl_file4} to ${impl_file4}"
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_impl_file5}
${impl_file5}
COMMENT "copy_if_different ${tmp_impl_file5} to ${impl_file5}"
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_impl_file6}
${impl_file6}
COMMENT "copy_if_different ${tmp_impl_file6} to ${impl_file6}"
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_impl_file7}
${impl_file7}
COMMENT "copy_if_different ${tmp_impl_file7} to ${impl_file7}"
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_impl_file8}
${impl_file8}
COMMENT "copy_if_different ${tmp_impl_file8} to ${impl_file8}"
DEPENDS ${OP_IMPL_DEPS} DEPENDS ${OP_IMPL_DEPS}
VERBATIM) VERBATIM)
if(NOT ((NOT WITH_PYTHON) AND ON_INFER)) if(NOT ((NOT WITH_PYTHON) AND ON_INFER))
...@@ -454,19 +525,13 @@ if(WITH_PYTHON) ...@@ -454,19 +525,13 @@ if(WITH_PYTHON)
VERBATIM) VERBATIM)
endif() endif()
endif() endif()
add_custom_target(op_function_generator_cmd ALL DEPENDS ${impl_file}) add_custom_target(op_function_generator_cmd ALL DEPENDS op_function)
if(NOT ((NOT WITH_PYTHON) AND ON_INFER)) if(NOT ((NOT WITH_PYTHON) AND ON_INFER))
add_custom_target(eager_op_function_generator_cmd ALL add_custom_target(eager_op_function_generator_cmd ALL
DEPENDS ${eager_impl_file}) DEPENDS ${eager_impl_file})
endif() endif()
list(APPEND PYBIND_DEPS interpretercore standalone_executor list(APPEND PYBIND_DEPS standalone_executor staticgraph_executor_statistics)
staticgraph_executor_statistics)
cc_library(
op_function_common
SRCS op_function_common.cc
DEPS ${PYBIND_DEPS})
list(APPEND PYBIND_DEPS op_function_common)
if(NOT ((NOT WITH_PYTHON) AND ON_INFER)) if(NOT ((NOT WITH_PYTHON) AND ON_INFER))
set(PYBIND_SRCS eager.cc ${PYBIND_SRCS}) set(PYBIND_SRCS eager.cc ${PYBIND_SRCS})
...@@ -482,7 +547,6 @@ if(WITH_PYTHON) ...@@ -482,7 +547,6 @@ if(WITH_PYTHON)
list(APPEND PYBIND_DEPS backward) list(APPEND PYBIND_DEPS backward)
list(APPEND PYBIND_DEPS grad_node_info) list(APPEND PYBIND_DEPS grad_node_info)
list(APPEND PYBIND_DEPS phi) list(APPEND PYBIND_DEPS phi)
list(APPEND PYBIND_DEPS op_function_common)
list(APPEND PYBIND_DEPS final_dygraph_function) list(APPEND PYBIND_DEPS final_dygraph_function)
list(APPEND PYBIND_DEPS final_dygraph_node) list(APPEND PYBIND_DEPS final_dygraph_node)
list(APPEND PYBIND_DEPS dygraph_function) list(APPEND PYBIND_DEPS dygraph_function)
......
...@@ -16,12 +16,16 @@ import sys ...@@ -16,12 +16,16 @@ import sys
import os import os
if __name__ == "__main__": if __name__ == "__main__":
assert len(sys.argv) == 2 assert len(sys.argv) == 3
pybind_dir = sys.argv[1] pybind_dir = sys.argv[1]
split_count = int(sys.argv[2])
empty_files = [os.path.join(pybind_dir, "eager_final_state_op_function.cc")] empty_files = [os.path.join(pybind_dir, "eager_final_state_op_function.cc")]
empty_files.append(os.path.join(pybind_dir, "eager_op_function.cc")) empty_files.append(os.path.join(pybind_dir, "eager_op_function.cc"))
empty_files.append(os.path.join(pybind_dir, "op_function.cc"))
for i in range(split_count):
empty_files.append(
os.path.join(pybind_dir, "op_function" + str(i + 1) + ".cc"))
for path in empty_files: for path in empty_files:
if not os.path.exists(path): if not os.path.exists(path):
......
...@@ -64,6 +64,7 @@ limitations under the License. */ ...@@ -64,6 +64,7 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace pybind { namespace pybind {
std::atomic<int> VarBaseUniqueNameID{0};
PyTypeObject *g_varbase_pytype = nullptr; PyTypeObject *g_varbase_pytype = nullptr;
namespace py = ::pybind11; namespace py = ::pybind11;
...@@ -497,7 +498,14 @@ static void VarBaseCopy(std::shared_ptr<imperative::VarBase> &src, // NOLINT ...@@ -497,7 +498,14 @@ static void VarBaseCopy(std::shared_ptr<imperative::VarBase> &src, // NOLINT
void BindImperative(py::module *m_ptr) { void BindImperative(py::module *m_ptr) {
auto &m = *m_ptr; auto &m = *m_ptr;
BindOpFunctions(&m); BindOpFunctions1(&m);
BindOpFunctions2(&m);
BindOpFunctions3(&m);
BindOpFunctions4(&m);
BindOpFunctions5(&m);
BindOpFunctions6(&m);
BindOpFunctions7(&m);
BindOpFunctions8(&m);
#ifndef _WIN32 #ifndef _WIN32
// Dygraph DataLoader signal handler // Dygraph DataLoader signal handler
......
...@@ -257,7 +257,14 @@ PyObject* MakeReturnPyObject(const std::tuple<Args...>& out) { ...@@ -257,7 +257,14 @@ PyObject* MakeReturnPyObject(const std::tuple<Args...>& out) {
return result; return result;
} }
void BindOpFunctions(pybind11::module* module); void BindOpFunctions1(pybind11::module* module);
void BindOpFunctions2(pybind11::module* module);
void BindOpFunctions3(pybind11::module* module);
void BindOpFunctions4(pybind11::module* module);
void BindOpFunctions5(pybind11::module* module);
void BindOpFunctions6(pybind11::module* module);
void BindOpFunctions7(pybind11::module* module);
void BindOpFunctions8(pybind11::module* module);
} // namespace pybind } // namespace pybind
} // namespace paddle } // namespace paddle
...@@ -422,13 +422,17 @@ std::string GenerateOpFunctionsBody( ...@@ -422,13 +422,17 @@ std::string GenerateOpFunctionsBody(
return op_function_str; return op_function_str;
} }
static std::tuple<std::vector<std::string>, std::vector<std::string>> static std::vector<
GenerateOpFunctions() { std::tuple<std::vector<std::string>, std::vector<std::string>>>
GenerateOpFunctions(int split_count) {
auto& op_info_map = paddle::framework::OpInfoMap::Instance().map(); auto& op_info_map = paddle::framework::OpInfoMap::Instance().map();
std::vector<std::tuple<std::vector<std::string>, std::vector<std::string>>>
result;
std::vector<std::string> op_function_list, bind_function_list; std::vector<std::string> op_function_list, bind_function_list;
auto& all_kernels = paddle::framework::OperatorWithKernel::AllOpKernels(); auto& all_kernels = paddle::framework::OperatorWithKernel::AllOpKernels();
paddle::flat_hash_map<std::string, paddle::framework::OpInfo>
op_info_map_need_gen;
for (auto& pair : op_info_map) { for (auto& pair : op_info_map) {
auto& op_info = pair.second; auto& op_info = pair.second;
auto op_proto = op_info.proto_; auto op_proto = op_info.proto_;
...@@ -444,6 +448,22 @@ GenerateOpFunctions() { ...@@ -444,6 +448,22 @@ GenerateOpFunctions() {
continue; continue;
} }
op_info_map_need_gen.emplace(pair);
}
int cc_file_api_size = op_info_map_need_gen.size() / split_count;
if (op_info_map_need_gen.size() % split_count != 0) {
cc_file_api_size++;
}
int api_index = 0;
int file_index = 0;
for (auto& pair : op_info_map_need_gen) {
auto& op_info = pair.second;
auto op_proto = op_info.proto_;
auto& op_type = op_proto->type();
// NOTE(pangyoki): Inplace Strategy. // NOTE(pangyoki): Inplace Strategy.
// In this case, output will reuse input varbase. // In this case, output will reuse input varbase.
// Dygraph mode needs to be aligned with the in-place strategy in static // Dygraph mode needs to be aligned with the in-place strategy in static
...@@ -489,13 +509,24 @@ GenerateOpFunctions() { ...@@ -489,13 +509,24 @@ GenerateOpFunctions() {
op_function_list.emplace_back(std::move(inplace_op_function_str)); op_function_list.emplace_back(std::move(inplace_op_function_str));
bind_function_list.emplace_back(std::move(inplace_bind_function_str)); bind_function_list.emplace_back(std::move(inplace_bind_function_str));
} }
api_index++;
if (api_index / cc_file_api_size > file_index) {
file_index++;
result.push_back(std::make_tuple(op_function_list, bind_function_list));
op_function_list.clear();
bind_function_list.clear();
}
} }
return std::make_tuple(op_function_list, bind_function_list);
result.push_back(std::make_tuple(op_function_list, bind_function_list));
return result;
} }
int main(int argc, char* argv[]) { int main(int argc, char* argv[]) {
if (argc != 2) { if (argc != 3) {
std::cerr << "argc must be 2" << std::endl; std::cerr << "argc must be 3" << std::endl;
return -1; return -1;
} }
...@@ -513,7 +544,14 @@ int main(int argc, char* argv[]) { ...@@ -513,7 +544,14 @@ int main(int argc, char* argv[]) {
"\"paddle/fluid/pybind/op_function.h\"", "\"paddle/fluid/pybind/op_function.h\"",
"<Python.h>"}; "<Python.h>"};
std::ofstream out(argv[1], std::ios::out); std::string path = argv[1];
int split_count = atoi(argv[2]);
auto op_funcs = GenerateOpFunctions(split_count);
for (size_t i = 0; i < op_funcs.size(); i++) {
std::ofstream out(path + "op_function" + std::to_string(i + 1) + ".cc.tmp",
std::ios::out);
for (auto& header : headers) { for (auto& header : headers) {
out << "#include " + header + "\n"; out << "#include " + header + "\n";
...@@ -521,20 +559,18 @@ int main(int argc, char* argv[]) { ...@@ -521,20 +559,18 @@ int main(int argc, char* argv[]) {
out << "\n\n"; out << "\n\n";
auto op_funcs = GenerateOpFunctions();
out << "namespace paddle {\n" out << "namespace paddle {\n"
<< "namespace pybind {\n\n"; << "namespace pybind {\n\n";
out << "std::atomic<int> VarBaseUniqueNameID{0};\n"; out << "extern std::atomic<int> VarBaseUniqueNameID;\n";
out << paddle::string::join_strings(std::get<0>(op_funcs), '\n'); out << paddle::string::join_strings(std::get<0>(op_funcs[i]), '\n');
out << "\n\n"; out << "\n\n";
out << "static PyMethodDef ExtestMethods[] = {\n" out << "static PyMethodDef ExtestMethods[] = {\n"
<< paddle::string::join_strings(std::get<1>(op_funcs), '\n') << paddle::string::join_strings(std::get<1>(op_funcs[i]), '\n')
<< "\n {nullptr,nullptr,0,nullptr}" << "\n {nullptr,nullptr,0,nullptr}"
<< "};\n\n"; << "};\n\n";
out << "void BindOpFunctions(pybind11::module *module) {\n" out << "void BindOpFunctions" << i + 1 << "(pybind11::module *module) {\n"
<< " auto m = module->def_submodule(\"ops\");\n" << " auto m = module->def_submodule(\"ops\");\n"
<< " if (PyModule_AddFunctions(m.ptr(), ExtestMethods) < 0) {\n" << " if (PyModule_AddFunctions(m.ptr(), ExtestMethods) < 0) {\n"
<< " PADDLE_THROW(platform::errors::Fatal (\"Add functions to " << " PADDLE_THROW(platform::errors::Fatal (\"Add functions to "
...@@ -546,6 +582,7 @@ int main(int argc, char* argv[]) { ...@@ -546,6 +582,7 @@ int main(int argc, char* argv[]) {
<< "} // namespace paddle\n"; << "} // namespace paddle\n";
out.close(); out.close();
}
#ifdef PADDLE_WITH_ASCEND_CL #ifdef PADDLE_WITH_ASCEND_CL
ge::GEFinalize(); ge::GEFinalize();
......
此差异已折叠。
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "pybind11/pybind11.h"
namespace paddle {
namespace pybind {
void BindParallelExecutor(pybind11::module& m); // NOLINT
} // namespace pybind
} // namespace paddle
此差异已折叠。
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "pybind11/pybind11.h"
namespace paddle {
namespace pybind {
void BindPlace(pybind11::module& m); // NOLINT
} // namespace pybind
} // namespace paddle
此差异已折叠。
此差异已折叠。
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "pybind11/pybind11.h"
namespace paddle {
namespace pybind {
void BindTensor(pybind11::module& m); // NOLINT
} // namespace pybind
} // namespace paddle
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册