未验证 提交 4baf0dbe 编写于 作者: W wanghuancoder 提交者: GitHub

Compilation optimization (#44242)

* Compilation optimization
上级 e9b4d0be
......@@ -6,7 +6,7 @@ cc_library(
if(NOT (NOT WITH_PYTHON AND ON_INFER))
cc_library(
final_dygraph_node
SRCS nodes.cc
DEPS ${eager_deps} ${eager_manual_nodes})
SRCS nodes.cc ${eager_manual_nodes}
DEPS ${eager_deps})
add_dependencies(final_dygraph_node eager_final_state_codegen)
endif()
......@@ -6,7 +6,7 @@ cc_library(
if(NOT (NOT WITH_PYTHON AND ON_INFER))
cc_library(
final_dygraph_function
SRCS dygraph_functions.cc
DEPS ${eager_deps} ${eager_manual_functions})
SRCS dygraph_functions.cc ${eager_manual_functions}
DEPS ${eager_deps})
add_dependencies(final_dygraph_function eager_final_state_codegen)
endif()
cc_library(
add_n_fwd_func
SRCS add_n_fwd_func.cc
DEPS ${eager_deps} ${fluid_deps} ${GLOB_OP_LIB} ${GLOB_OPERATOR_DEPS})
add_dependencies(add_n_fwd_func eager_codegen)
cc_library(
conv2d_fwd_function
SRCS conv2d_fwd_function.cc
DEPS ${eager_deps} ${fluid_deps} ${GLOB_OP_LIB} ${GLOB_OPERATOR_DEPS})
add_dependencies(conv2d_fwd_function eager_codegen)
set(eager_manual_functions
conv2d_fwd_function add_n_fwd_func
${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/manual/eager_manual/forwards/add_n_fwd_func.cc
${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/manual/eager_manual/forwards/conv2d_fwd_function.cc
PARENT_SCOPE)
cc_library(
add_n_node
SRCS add_n_node.cc
DEPS ${eager_deps} ${fluid_deps})
cc_library(
conv2d_nodes
SRCS conv2d_nodes.cc
DEPS ${eager_deps} ${fluid_deps})
set(eager_manual_nodes
conv2d_nodes add_n_node
${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/manual/eager_manual/nodes/conv2d_nodes.cc
${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/manual/eager_manual/nodes/add_n_node.cc
PARENT_SCOPE)
cc_library(
fused_gate_attention_fwd_func
SRCS fused_gate_attention_fwd_func.cc
DEPS ${eager_deps} ${fluid_deps} ${GLOB_OP_LIB} ${GLOB_OPERATOR_DEPS})
add_dependencies(fused_gate_attention_fwd_func eager_codegen
copy_dygraph_forward_functions)
cc_library(
fused_feedforward_fwd_func
SRCS fused_feedforward_fwd_func.cc
DEPS ${eager_deps} ${fluid_deps} ${GLOB_OP_LIB} ${GLOB_OPERATOR_DEPS})
add_dependencies(fused_feedforward_fwd_func eager_codegen
copy_dygraph_forward_functions)
cc_library(
fused_attention_fwd_func
SRCS fused_attention_fwd_func.cc
DEPS ${eager_deps} ${fluid_deps} ${GLOB_OP_LIB} ${GLOB_OPERATOR_DEPS})
add_dependencies(fused_attention_fwd_func eager_codegen
copy_dygraph_forward_functions)
set(fluid_manual_functions
fused_gate_attention_fwd_func fused_feedforward_fwd_func
fused_attention_fwd_func
${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/manual/fluid_manual/forwards/fused_gate_attention_fwd_func.cc
${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/manual/fluid_manual/forwards/fused_feedforward_fwd_func.cc
${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/manual/fluid_manual/forwards/fused_attention_fwd_func.cc
PARENT_SCOPE)
cc_library(
fused_gate_attention_node
SRCS fused_gate_attention_node.cc
DEPS ${eager_deps} ${fluid_deps})
cc_library(
fused_feedforward_node
SRCS fused_feedforward_node.cc
DEPS ${eager_deps} ${fluid_deps})
cc_library(
fused_attention_node
SRCS fused_attention_node.cc
DEPS ${eager_deps} ${fluid_deps})
set(fluid_manual_nodes
fused_gate_attention_node fused_feedforward_node fused_attention_node
${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/manual/fluid_manual/nodes/fused_gate_attention_node.cc
${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/manual/fluid_manual/nodes/fused_feedforward_node.cc
${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/manual/fluid_manual/nodes/fused_attention_node.cc
PARENT_SCOPE)
......@@ -3083,27 +3083,44 @@ static std::string ConvertCoreOpsInfosToString(
return core_ops_returns_info_init_str;
}
static std::string GenerateCoreOpsReturnsInfo() {
static std::string GenerateCoreOpsArgsInfo() {
const char* Core_Ops_Returns_MAP_TEMPLATE =
"std::unordered_map<std::string, std::vector<std::string>> "
"core_ops_args_info = { %s };\n"
"std::unordered_map<std::string, std::vector<std::string>> "
"core_ops_args_type_info = { %s };\n"
"std::unordered_map<std::string, std::vector<std::string>> "
"core_ops_returns_info = { %s };\n";
"core_ops_args_info = { %s };\n";
std::string core_ops_args_info_init_str =
ConvertCoreOpsInfosToString(core_ops_args_info);
std::string core_ops_info_str = paddle::string::Sprintf(
Core_Ops_Returns_MAP_TEMPLATE, core_ops_args_info_init_str);
return core_ops_info_str;
}
static std::string GenerateCoreOpsArgsTypeInfo() {
const char* Core_Ops_Returns_MAP_TEMPLATE =
"std::unordered_map<std::string, std::vector<std::string>> "
"core_ops_args_type_info = { %s };\n";
std::string core_ops_args_type_info_init_str =
ConvertCoreOpsInfosToString(core_ops_args_type_info);
std::string core_ops_info_str = paddle::string::Sprintf(
Core_Ops_Returns_MAP_TEMPLATE, core_ops_args_type_info_init_str);
return core_ops_info_str;
}
static std::string GenerateCoreOpsReturnsInfo() {
const char* Core_Ops_Returns_MAP_TEMPLATE =
"std::unordered_map<std::string, std::vector<std::string>> "
"core_ops_returns_info = { %s };\n";
std::string core_ops_returns_info_init_str =
ConvertCoreOpsInfosToString(core_ops_returns_info);
std::string core_ops_info_str =
paddle::string::Sprintf(Core_Ops_Returns_MAP_TEMPLATE,
core_ops_args_info_init_str,
core_ops_args_type_info_init_str,
core_ops_returns_info_init_str);
std::string core_ops_info_str = paddle::string::Sprintf(
Core_Ops_Returns_MAP_TEMPLATE, core_ops_returns_info_init_str);
return core_ops_info_str;
}
......@@ -3252,6 +3269,12 @@ static void DygraphCodeGeneration(const std::string& output_dir,
GenerateForwardDygraphFile(
output_dir + "/forwards/dygraph_forward_functions_args_info.tmp.cc",
GenerateCoreOpsArgsInfo());
GenerateForwardDygraphFile(
output_dir + "/forwards/dygraph_forward_functions_args_type_info.tmp.cc",
GenerateCoreOpsArgsTypeInfo());
GenerateForwardDygraphFile(
output_dir + "/forwards/dygraph_forward_functions_returns_info.tmp.cc",
GenerateCoreOpsReturnsInfo());
VLOG(6) << "-------- GenerateNodeCCFile -------";
......
......@@ -96,6 +96,11 @@ def GenerateFileStructureForIntermediateDygraph(eager_dir, split_count):
"nodes" + str(i + 1) + ".cc"))
empty_files.append(
os.path.join(forwards_dir, "dygraph_forward_functions_args_info.cc"))
empty_files.append(
os.path.join(forwards_dir,
"dygraph_forward_functions_args_type_info.cc"))
empty_files.append(
os.path.join(forwards_dir, "dygraph_forward_functions_returns_info.cc"))
for path in empty_files:
if not os.path.exists(path):
open(path, 'a').close()
......@@ -125,7 +130,7 @@ def GenerateFileStructureForIntermediateDygraph(eager_dir, split_count):
f.write("cc_library(dygraph_node SRCS ")
for i in range(split_count):
f.write("nodes" + str(i + 1) + ".cc ")
f.write("DEPS ${eager_deps} ${fluid_deps} ${fluid_manual_nodes})\n")
f.write("${fluid_manual_nodes} DEPS ${eager_deps} ${fluid_deps})\n")
f.write("add_dependencies(dygraph_node copy_dygraph_node)")
with open(forwards_level_cmakelist_path, "w") as f:
......@@ -143,6 +148,12 @@ def GenerateFileStructureForIntermediateDygraph(eager_dir, split_count):
f.write(
" COMMAND ${CMAKE_COMMAND} -E copy_if_different \"${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/generated/fluid_generated/forwards/dygraph_forward_functions_args_info.tmp.cc\" \"${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/generated/fluid_generated/forwards/dygraph_forward_functions_args_info.cc\"\n"
)
f.write(
" COMMAND ${CMAKE_COMMAND} -E copy_if_different \"${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/generated/fluid_generated/forwards/dygraph_forward_functions_args_type_info.tmp.cc\" \"${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/generated/fluid_generated/forwards/dygraph_forward_functions_args_type_info.cc\"\n"
)
f.write(
" COMMAND ${CMAKE_COMMAND} -E copy_if_different \"${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/generated/fluid_generated/forwards/dygraph_forward_functions_returns_info.tmp.cc\" \"${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/generated/fluid_generated/forwards/dygraph_forward_functions_returns_info.cc\"\n"
)
f.write(" DEPENDS eager_codegen\n")
f.write(" VERBATIM)\n")
......@@ -150,8 +161,10 @@ def GenerateFileStructureForIntermediateDygraph(eager_dir, split_count):
for i in range(split_count):
f.write("dygraph_forward_functions" + str(i + 1) + ".cc ")
f.write("dygraph_forward_functions_args_info.cc ")
f.write("dygraph_forward_functions_args_type_info.cc ")
f.write("dygraph_forward_functions_returns_info.cc ")
f.write(
"DEPS ${eager_deps} ${fluid_deps} ${GLOB_OP_LIB} ${GLOB_OPERATOR_DEPS} ${fluid_manual_functions})\n"
"${fluid_manual_functions} DEPS ${eager_deps} ${fluid_deps} ${GLOB_OP_LIB} ${GLOB_OPERATOR_DEPS})\n"
)
f.write(
"add_dependencies(dygraph_function copy_dygraph_forward_functions)")
......
set(INTERPRETERCORE_DEPS
add_subdirectory(workqueue)
add_subdirectory(garbage_collector)
set(STANDALONE_EXECUTOR_SRCS
data_transfer.cc
new_executor_defs.cc
interpretercore_util.cc
event_manager.cc
stream_analyzer.cc
interpretercore.cc
standalone_executor.cc)
set(STANDALONE_EXECUTOR_DEPS
op_registry
device_context
scope
......@@ -20,62 +32,33 @@ set(INTERPRETERCORE_DEPS
variable_helper
timer
monitor
nan_inf_utils)
add_subdirectory(workqueue)
add_subdirectory(garbage_collector)
cc_library(
data_transfer
SRCS data_transfer.cc
DEPS enforce scope glog)
cc_library(
new_executor_defs
SRCS new_executor_defs.cc
DEPS enforce glog scope)
cc_library(
interpretercore_util
SRCS interpretercore_util.cc
DEPS ${INTERPRETERCORE_DEPS} workqueue new_executor_defs data_transfer)
cc_library(
event_manager
SRCS event_manager.cc
DEPS ${DEVICE_EVENT_LIBS} glog new_executor_defs)
cc_library(
stream_analyzer
SRCS stream_analyzer.cc
DEPS ${DEVICE_EVENT_LIBS} glog device_context new_executor_defs)
nan_inf_utils
enforce
scope
glog
enforce
glog
scope
workqueue
interpretercore_event_garbage_collector
${DEVICE_EVENT_LIBS}
glog)
if(WITH_GPU OR WITH_ROCM)
cc_library(
interpretercore
SRCS interpretercore.cc
DEPS workqueue
${DEVICE_EVENT_LIBS}
interpretercore_util
interpretercore_event_garbage_collector
interpretercore_fast_garbage_collector
stream_analyzer
event_manager)
else()
cc_library(
interpretercore
SRCS interpretercore.cc
DEPS workqueue ${DEVICE_EVENT_LIBS} interpretercore_util
interpretercore_event_garbage_collector stream_analyzer event_manager)
set(STANDALONE_EXECUTOR_DEPS ${STANDALONE_EXECUTOR_DEPS}
interpretercore_fast_garbage_collector)
endif()
cc_library(
standalone_executor
SRCS standalone_executor.cc
DEPS interpretercore)
SRCS ${STANDALONE_EXECUTOR_SRCS}
DEPS ${STANDALONE_EXECUTOR_DEPS})
cc_library(
staticgraph_executor_statistics
SRCS executor_statistics.cc
DEPS enforce glog os_info)
# cc_binary(standalone_executor_test SRCS standalone_executor_test.cc DEPS interpretercore standalone_executor operator op_registry executor ${GLOB_OP_LIB} ${GLOB_OPERATOR_DEPS} profiler)
# skip win32 since wget is not installed by default on windows machine.
if(WITH_GPU
AND WITH_TESTING
......@@ -120,13 +103,7 @@ if(WITH_GPU
cc_test(
standalone_executor_test
SRCS standalone_executor_test.cc
DEPS interpretercore
standalone_executor
operator
op_registry
executor
${OPS}
${OP_DEPS})
DEPS standalone_executor operator op_registry executor ${OPS} ${OP_DEPS})
set_tests_properties(standalone_executor_test PROPERTIES TIMEOUT 100)
add_dependencies(standalone_executor_test download_program)
......
......@@ -5,7 +5,7 @@ cc_library(
cc_library(
var_helper
SRCS var_helper.cc
DEPS tensor phi_api)
DEPS tensor selected_rows)
if(WITH_XPU)
cc_library(
prepared_operator
......@@ -20,8 +20,8 @@ if(WITH_XPU)
op_kernel_type
data_transform
nan_inf_utils
phi_api
phi_utils
scalar
int_array
var_helper
profiler)
else()
......@@ -37,21 +37,16 @@ else()
op_kernel_type
data_transform
nan_inf_utils
phi_api
phi_utils
scalar
int_array
var_helper
profiler)
endif()
cc_library(
layer
SRCS layer.cc
DEPS prepared_operator
math_function
imperative_flag
variable_helper
op_registry
var_helper
phi_api)
DEPS prepared_operator math_function imperative_flag variable_helper
op_registry var_helper)
add_subdirectory(jit)
if(WITH_GPU)
cc_library(
......
......@@ -101,7 +101,7 @@ else()
cc_library(gather_scatter_kernel SRCS gather_scatter_kernel.cc gather_scatter_kernel.cu DEPS tensor)
endif()
set(OP_HEADER_DEPS ${OP_HEADER_DEPS} phi phi_api_utils gather_scatter_kernel)
set(OP_HEADER_DEPS ${OP_HEADER_DEPS} phi phi_api_utils gather_scatter_kernel backward_infermeta)
register_operators(EXCLUDES py_layer_op py_func_op warpctc_op dgc_op load_combine_op lstm_op run_program_op eye_op quantize_linear_op
recurrent_op save_combine_op sparse_attention_op sync_batch_norm_op spectral_op ${OP_MKL_DEPS} DEPS ${OP_HEADER_DEPS})
......
......@@ -10,4 +10,4 @@ nv_library(
nv_test(
cudnn_helper_test
SRCS cudnn_helper_test.cc
DEPS dynload_cuda phi)
DEPS dynload_cuda)
pybind.h
op_function.cc
op_function1.cc
op_function2.cc
op_function3.cc
op_function4.cc
op_function5.cc
op_function6.cc
op_function7.cc
op_function8.cc
eager_op_function.cc
eager_final_state_op_function.cc
......@@ -102,13 +102,16 @@ endif()
set(PYBIND_SRCS
pybind.cc
imperative.cc
op_function.cc
inference_api.cc
ir.cc
bind_fleet_executor.cc
reader_py.cc
protobuf.cc
exception.cc
op_function_common.cc
parallel_executor.cc
tensor.cc
place.cc
const_value.cc
global_value_getter_setter.cc
fleet_wrapper_py.cc
......@@ -124,13 +127,15 @@ set(PYBIND_SRCS
generator_py.cc
communication.cc
cuda_streams_py.cc
jit.cc)
execute_process(
COMMAND
"${PYTHON_EXECUTABLE}"
"${PADDLE_SOURCE_DIR}/paddle/fluid/pybind/generate_file_structures.py"
"${PADDLE_SOURCE_DIR}/paddle/fluid/pybind/")
jit.cc
op_function1.cc
op_function2.cc
op_function3.cc
op_function4.cc
op_function5.cc
op_function6.cc
op_function7.cc
op_function8.cc)
if(WITH_CUSTOM_DEVICE)
set(PYBIND_DEPS ${PYBIND_DEPS} phi_capi)
......@@ -267,12 +272,35 @@ if(WITH_PYTHON)
target_link_libraries(kernel_signature_generator ${ROCM_HIPRTC_LIB})
endif()
set(impl_file ${CMAKE_SOURCE_DIR}/paddle/fluid/pybind/op_function.cc)
set(tmp_impl_file ${impl_file}.tmp)
set(op_function_output_path ${CMAKE_SOURCE_DIR}/paddle/fluid/pybind/)
set(impl_file1 ${CMAKE_SOURCE_DIR}/paddle/fluid/pybind/op_function1.cc)
set(tmp_impl_file1 ${impl_file1}.tmp)
set(impl_file2 ${CMAKE_SOURCE_DIR}/paddle/fluid/pybind/op_function2.cc)
set(tmp_impl_file2 ${impl_file2}.tmp)
set(impl_file3 ${CMAKE_SOURCE_DIR}/paddle/fluid/pybind/op_function3.cc)
set(tmp_impl_file3 ${impl_file3}.tmp)
set(impl_file4 ${CMAKE_SOURCE_DIR}/paddle/fluid/pybind/op_function4.cc)
set(tmp_impl_file4 ${impl_file4}.tmp)
set(impl_file5 ${CMAKE_SOURCE_DIR}/paddle/fluid/pybind/op_function5.cc)
set(tmp_impl_file5 ${impl_file5}.tmp)
set(impl_file6 ${CMAKE_SOURCE_DIR}/paddle/fluid/pybind/op_function6.cc)
set(tmp_impl_file6 ${impl_file6}.tmp)
set(impl_file7 ${CMAKE_SOURCE_DIR}/paddle/fluid/pybind/op_function7.cc)
set(tmp_impl_file7 ${impl_file7}.tmp)
set(impl_file8 ${CMAKE_SOURCE_DIR}/paddle/fluid/pybind/op_function8.cc)
set(tmp_impl_file8 ${impl_file8}.tmp)
set(CODE_GEN_SPLIT_FILE_COUNT "8")
set(eager_impl_file
${CMAKE_SOURCE_DIR}/paddle/fluid/pybind/eager_op_function.cc)
set(tmp_eager_impl_file ${eager_impl_file}.tmp)
execute_process(
COMMAND
"${PYTHON_EXECUTABLE}"
"${PADDLE_SOURCE_DIR}/paddle/fluid/pybind/generate_file_structures.py"
"${PADDLE_SOURCE_DIR}/paddle/fluid/pybind/"
"${CODE_GEN_SPLIT_FILE_COUNT}")
set(OP_IMPL_DEPS op_function_generator)
set(EAGER_OP_IMPL_DEPS eager_op_function_generator
eager_final_state_python_c_codegen)
......@@ -292,7 +320,7 @@ if(WITH_PYTHON)
":retry\n"
"ECHO op_function_generator run %build_times% time\n"
"taskkill /f /im op_function_generator.exe 2>NUL\n"
"${op_impl_path}/op_function_generator.exe ${tmp_impl_file}\n"
"${op_impl_path}/op_function_generator.exe ${op_function_output_path} ${CODE_GEN_SPLIT_FILE_COUNT}\n"
"if %ERRORLEVEL% NEQ 0 (\n"
" set /a build_times=%build_times%+1\n"
" if %build_times% GEQ 10 (\n"
......@@ -367,12 +395,33 @@ if(WITH_PYTHON)
endif()
add_custom_command(
OUTPUT ${impl_file}
OUTPUT op_function
COMMAND
${CMAKE_BINARY_DIR}/paddle/fluid/pybind/op_function_generator_retry.bat
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_impl_file}
${impl_file}
COMMENT "copy_if_different ${tmp_impl_file} to ${impl_file}"
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_impl_file1}
${impl_file1}
COMMENT "copy_if_different ${tmp_impl_file1} to ${impl_file1}"
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_impl_file2}
${impl_file2}
COMMENT "copy_if_different ${tmp_impl_file2} to ${impl_file2}"
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_impl_file3}
${impl_file3}
COMMENT "copy_if_different ${tmp_impl_file3} to ${impl_file3}"
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_impl_file4}
${impl_file4}
COMMENT "copy_if_different ${tmp_impl_file4} to ${impl_file4}"
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_impl_file5}
${impl_file5}
COMMENT "copy_if_different ${tmp_impl_file5} to ${impl_file5}"
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_impl_file6}
${impl_file6}
COMMENT "copy_if_different ${tmp_impl_file6} to ${impl_file6}"
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_impl_file7}
${impl_file7}
COMMENT "copy_if_different ${tmp_impl_file7} to ${impl_file7}"
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_impl_file8}
${impl_file8}
COMMENT "copy_if_different ${tmp_impl_file8} to ${impl_file8}"
DEPENDS ${OP_IMPL_DEPS})
if(NOT ((NOT WITH_PYTHON) AND ON_INFER))
add_custom_command(
......@@ -431,13 +480,35 @@ if(WITH_PYTHON)
list(APPEND EAGER_OP_IMPL_DEPS ${CMAKE_CURRENT_BINARY_DIR}/libdnnl.so.0)
endif()
add_custom_command(
OUTPUT ${impl_file}
OUTPUT op_function
COMMAND
${CMAKE_COMMAND} -E env "LD_LIBRARY_PATH=$ENV{LD_LIBRARY_PATH}:."
"${CMAKE_CURRENT_BINARY_DIR}/op_function_generator" "${tmp_impl_file}"
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_impl_file}
${impl_file}
COMMENT "copy_if_different ${tmp_impl_file} to ${impl_file}"
"${CMAKE_CURRENT_BINARY_DIR}/op_function_generator"
"${op_function_output_path}" "${CODE_GEN_SPLIT_FILE_COUNT}"
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_impl_file1}
${impl_file1}
COMMENT "copy_if_different ${tmp_impl_file1} to ${impl_file1}"
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_impl_file2}
${impl_file2}
COMMENT "copy_if_different ${tmp_impl_file2} to ${impl_file2}"
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_impl_file3}
${impl_file3}
COMMENT "copy_if_different ${tmp_impl_file3} to ${impl_file3}"
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_impl_file4}
${impl_file4}
COMMENT "copy_if_different ${tmp_impl_file4} to ${impl_file4}"
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_impl_file5}
${impl_file5}
COMMENT "copy_if_different ${tmp_impl_file5} to ${impl_file5}"
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_impl_file6}
${impl_file6}
COMMENT "copy_if_different ${tmp_impl_file6} to ${impl_file6}"
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_impl_file7}
${impl_file7}
COMMENT "copy_if_different ${tmp_impl_file7} to ${impl_file7}"
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_impl_file8}
${impl_file8}
COMMENT "copy_if_different ${tmp_impl_file8} to ${impl_file8}"
DEPENDS ${OP_IMPL_DEPS}
VERBATIM)
if(NOT ((NOT WITH_PYTHON) AND ON_INFER))
......@@ -454,19 +525,13 @@ if(WITH_PYTHON)
VERBATIM)
endif()
endif()
add_custom_target(op_function_generator_cmd ALL DEPENDS ${impl_file})
add_custom_target(op_function_generator_cmd ALL DEPENDS op_function)
if(NOT ((NOT WITH_PYTHON) AND ON_INFER))
add_custom_target(eager_op_function_generator_cmd ALL
DEPENDS ${eager_impl_file})
endif()
list(APPEND PYBIND_DEPS interpretercore standalone_executor
staticgraph_executor_statistics)
cc_library(
op_function_common
SRCS op_function_common.cc
DEPS ${PYBIND_DEPS})
list(APPEND PYBIND_DEPS op_function_common)
list(APPEND PYBIND_DEPS standalone_executor staticgraph_executor_statistics)
if(NOT ((NOT WITH_PYTHON) AND ON_INFER))
set(PYBIND_SRCS eager.cc ${PYBIND_SRCS})
......@@ -482,7 +547,6 @@ if(WITH_PYTHON)
list(APPEND PYBIND_DEPS backward)
list(APPEND PYBIND_DEPS grad_node_info)
list(APPEND PYBIND_DEPS phi)
list(APPEND PYBIND_DEPS op_function_common)
list(APPEND PYBIND_DEPS final_dygraph_function)
list(APPEND PYBIND_DEPS final_dygraph_node)
list(APPEND PYBIND_DEPS dygraph_function)
......
......@@ -16,12 +16,16 @@ import sys
import os
if __name__ == "__main__":
assert len(sys.argv) == 2
assert len(sys.argv) == 3
pybind_dir = sys.argv[1]
split_count = int(sys.argv[2])
empty_files = [os.path.join(pybind_dir, "eager_final_state_op_function.cc")]
empty_files.append(os.path.join(pybind_dir, "eager_op_function.cc"))
empty_files.append(os.path.join(pybind_dir, "op_function.cc"))
for i in range(split_count):
empty_files.append(
os.path.join(pybind_dir, "op_function" + str(i + 1) + ".cc"))
for path in empty_files:
if not os.path.exists(path):
......
......@@ -64,6 +64,7 @@ limitations under the License. */
namespace paddle {
namespace pybind {
std::atomic<int> VarBaseUniqueNameID{0};
PyTypeObject *g_varbase_pytype = nullptr;
namespace py = ::pybind11;
......@@ -497,7 +498,14 @@ static void VarBaseCopy(std::shared_ptr<imperative::VarBase> &src, // NOLINT
void BindImperative(py::module *m_ptr) {
auto &m = *m_ptr;
BindOpFunctions(&m);
BindOpFunctions1(&m);
BindOpFunctions2(&m);
BindOpFunctions3(&m);
BindOpFunctions4(&m);
BindOpFunctions5(&m);
BindOpFunctions6(&m);
BindOpFunctions7(&m);
BindOpFunctions8(&m);
#ifndef _WIN32
// Dygraph DataLoader signal handler
......
......@@ -257,7 +257,14 @@ PyObject* MakeReturnPyObject(const std::tuple<Args...>& out) {
return result;
}
void BindOpFunctions(pybind11::module* module);
void BindOpFunctions1(pybind11::module* module);
void BindOpFunctions2(pybind11::module* module);
void BindOpFunctions3(pybind11::module* module);
void BindOpFunctions4(pybind11::module* module);
void BindOpFunctions5(pybind11::module* module);
void BindOpFunctions6(pybind11::module* module);
void BindOpFunctions7(pybind11::module* module);
void BindOpFunctions8(pybind11::module* module);
} // namespace pybind
} // namespace paddle
......@@ -422,13 +422,17 @@ std::string GenerateOpFunctionsBody(
return op_function_str;
}
static std::tuple<std::vector<std::string>, std::vector<std::string>>
GenerateOpFunctions() {
static std::vector<
std::tuple<std::vector<std::string>, std::vector<std::string>>>
GenerateOpFunctions(int split_count) {
auto& op_info_map = paddle::framework::OpInfoMap::Instance().map();
std::vector<std::tuple<std::vector<std::string>, std::vector<std::string>>>
result;
std::vector<std::string> op_function_list, bind_function_list;
auto& all_kernels = paddle::framework::OperatorWithKernel::AllOpKernels();
paddle::flat_hash_map<std::string, paddle::framework::OpInfo>
op_info_map_need_gen;
for (auto& pair : op_info_map) {
auto& op_info = pair.second;
auto op_proto = op_info.proto_;
......@@ -444,6 +448,22 @@ GenerateOpFunctions() {
continue;
}
op_info_map_need_gen.emplace(pair);
}
int cc_file_api_size = op_info_map_need_gen.size() / split_count;
if (op_info_map_need_gen.size() % split_count != 0) {
cc_file_api_size++;
}
int api_index = 0;
int file_index = 0;
for (auto& pair : op_info_map_need_gen) {
auto& op_info = pair.second;
auto op_proto = op_info.proto_;
auto& op_type = op_proto->type();
// NOTE(pangyoki): Inplace Strategy.
// In this case, output will reuse input varbase.
// Dygraph mode needs to be aligned with the in-place strategy in static
......@@ -489,13 +509,24 @@ GenerateOpFunctions() {
op_function_list.emplace_back(std::move(inplace_op_function_str));
bind_function_list.emplace_back(std::move(inplace_bind_function_str));
}
api_index++;
if (api_index / cc_file_api_size > file_index) {
file_index++;
result.push_back(std::make_tuple(op_function_list, bind_function_list));
op_function_list.clear();
bind_function_list.clear();
}
}
return std::make_tuple(op_function_list, bind_function_list);
result.push_back(std::make_tuple(op_function_list, bind_function_list));
return result;
}
int main(int argc, char* argv[]) {
if (argc != 2) {
std::cerr << "argc must be 2" << std::endl;
if (argc != 3) {
std::cerr << "argc must be 3" << std::endl;
return -1;
}
......@@ -513,7 +544,14 @@ int main(int argc, char* argv[]) {
"\"paddle/fluid/pybind/op_function.h\"",
"<Python.h>"};
std::ofstream out(argv[1], std::ios::out);
std::string path = argv[1];
int split_count = atoi(argv[2]);
auto op_funcs = GenerateOpFunctions(split_count);
for (size_t i = 0; i < op_funcs.size(); i++) {
std::ofstream out(path + "op_function" + std::to_string(i + 1) + ".cc.tmp",
std::ios::out);
for (auto& header : headers) {
out << "#include " + header + "\n";
......@@ -521,20 +559,18 @@ int main(int argc, char* argv[]) {
out << "\n\n";
auto op_funcs = GenerateOpFunctions();
out << "namespace paddle {\n"
<< "namespace pybind {\n\n";
out << "std::atomic<int> VarBaseUniqueNameID{0};\n";
out << paddle::string::join_strings(std::get<0>(op_funcs), '\n');
out << "extern std::atomic<int> VarBaseUniqueNameID;\n";
out << paddle::string::join_strings(std::get<0>(op_funcs[i]), '\n');
out << "\n\n";
out << "static PyMethodDef ExtestMethods[] = {\n"
<< paddle::string::join_strings(std::get<1>(op_funcs), '\n')
<< paddle::string::join_strings(std::get<1>(op_funcs[i]), '\n')
<< "\n {nullptr,nullptr,0,nullptr}"
<< "};\n\n";
out << "void BindOpFunctions(pybind11::module *module) {\n"
out << "void BindOpFunctions" << i + 1 << "(pybind11::module *module) {\n"
<< " auto m = module->def_submodule(\"ops\");\n"
<< " if (PyModule_AddFunctions(m.ptr(), ExtestMethods) < 0) {\n"
<< " PADDLE_THROW(platform::errors::Fatal (\"Add functions to "
......@@ -546,6 +582,7 @@ int main(int argc, char* argv[]) {
<< "} // namespace paddle\n";
out.close();
}
#ifdef PADDLE_WITH_ASCEND_CL
ge::GEFinalize();
......
此差异已折叠。
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "pybind11/pybind11.h"
namespace paddle {
namespace pybind {
void BindParallelExecutor(pybind11::module& m); // NOLINT
} // namespace pybind
} // namespace paddle
此差异已折叠。
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "pybind11/pybind11.h"
namespace paddle {
namespace pybind {
void BindPlace(pybind11::module& m); // NOLINT
} // namespace pybind
} // namespace paddle
此差异已折叠。
此差异已折叠。
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "pybind11/pybind11.h"
namespace paddle {
namespace pybind {
void BindTensor(pybind11::module& m); // NOLINT
} // namespace pybind
} // namespace paddle
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册