未验证 提交 1b5e1e81 编写于 作者: R risemeup1 提交者: GitHub

Fix c++17 bug (#54228)

* “update”

* update

* update

* update

* update

* test

* update

* test

* fix_c++17_bug

* fix coverage compile error

* test

* test

* test

* fix C++17 error

* fix c++17 error

* fix c++17 error

* test

* test

* test

* test

* fix cinn compile error

* compile to compiler

* set cinn c++14

---------
Co-authored-by: 欧拉传人's avatarhuangjiyi <947613776@qq.com>
Co-authored-by: Nhuangjiyi <43315610+huangjiyi@users.noreply.github.com>
上级 e32c4375
......@@ -295,7 +295,7 @@ message(STATUS "NVCC_FLAGS_EXTRA: ${NVCC_FLAGS_EXTRA}")
set(CUDA_PROPAGATE_HOST_FLAGS OFF)
# Release/Debug flags set by cmake. Such as -O3 -g -DNDEBUG etc.
# So, don't set these flags here.
set(CMAKE_CUDA_STANDARD 14)
set(CMAKE_CUDA_STANDARD 17)
# (Note) For windows, if delete /W[1-4], /W1 will be added defaultly and conflic with -w
# So replace /W[1-4] with /W0
......
......@@ -38,7 +38,7 @@ if(NOT WIN32)
if(WITH_CINN)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14")
else()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17")
endif()
else()
set(CMAKE_CXX_STANDARD 17)
......
......@@ -25,66 +25,35 @@ cc_library(
task_loop_thread_pool
SRCS task_loop_thread_pool.cc task_loop_thread.cc task_loop.cc
DEPS enforce glog)
if(WITH_XPU OR WITH_ROCM)
cc_library(
fleet_executor
SRCS fleet_executor.cc
carrier.cc
task_node.cc
runtime_graph.cc
dist_model.cc
interceptor.cc
compute_interceptor.cc
amplifier_interceptor.cc
cond_interceptor.cc
start_interceptor.cc
source_interceptor.cc
sink_interceptor.cc
message_service.cc
message_bus.cc
dist_model_tensor_wrapper.cc
DEPS naive_executor
proto_desc
standalone_executor
fleet_executor_desc_proto
interceptor_message_proto
task_loop_thread_pool
collective_helper
executor_gc_helper
op_registry
phi
glog
${BRPC_DEPS})
else()
cc_library(
fleet_executor
SRCS fleet_executor.cc
carrier.cc
task_node.cc
runtime_graph.cc
dist_model.cc
interceptor.cc
compute_interceptor.cc
amplifier_interceptor.cc
cond_interceptor.cc
start_interceptor.cc
source_interceptor.cc
sink_interceptor.cc
message_service.cc
message_bus.cc
dist_model_tensor_wrapper.cc
DEPS proto_desc
standalone_executor
fleet_executor_desc_proto
interceptor_message_proto
task_loop_thread_pool
collective_helper
op_registry
executor_gc_helper
phi
glog
${BRPC_DEPS})
endif()
cc_library(
fleet_executor
SRCS fleet_executor.cc
carrier.cc
task_node.cc
runtime_graph.cc
dist_model.cc
interceptor.cc
compute_interceptor.cc
amplifier_interceptor.cc
cond_interceptor.cc
start_interceptor.cc
source_interceptor.cc
sink_interceptor.cc
message_service.cc
message_bus.cc
dist_model_tensor_wrapper.cc
DEPS naive_executor
proto_desc
standalone_executor
fleet_executor_desc_proto
interceptor_message_proto
task_loop_thread_pool
collective_helper
executor_gc_helper
op_registry
phi
glog
${BRPC_DEPS})
if(WITH_DISTRIBUTE)
set(DISTRIBUTE_COMPILE_FLAGS
"-Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor"
......
......@@ -729,8 +729,7 @@ if(WITH_DISTRIBUTE)
section_worker.cc
device_worker_factory.cc
data_set.cc
DEPS fleet_executor
fleet_wrapper
DEPS fleet_wrapper
recurrent_op_helper
op_registry
device_context
......@@ -837,7 +836,6 @@ if(WITH_DISTRIBUTE)
fleet
heter_server
brpc
fleet_executor
phi)
set(DISTRIBUTE_COMPILE_FLAGS "")
if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 7.0)
......@@ -907,8 +905,7 @@ if(WITH_DISTRIBUTE)
graph_to_program_pass
variable_helper
timer
monitor
fleet_executor)
monitor)
endif()
elseif(WITH_PSLIB)
set(DISTRIBUTE_COMPILE_FLAGS "")
......@@ -969,7 +966,6 @@ elseif(WITH_PSLIB)
variable_helper
timer
monitor
fleet_executor
${BRPC_DEP})
else()
cc_library(
......@@ -1017,8 +1013,7 @@ else()
graph_to_program_pass
variable_helper
timer
monitor
fleet_executor)
monitor)
endif()
target_link_libraries(executor while_op_helper executor_gc_helper
......
......@@ -40,6 +40,7 @@ cc_library(
cinn_graph_symbolization
cinn
cinn_launch_context
parallel_executor
python
pybind)
......@@ -56,6 +57,7 @@ if(WITH_TESTING)
SRCS
build_cinn_pass_test.cc
DEPS
fleet_executor
build_cinn_pass
cinn_compiler
op_registry
......@@ -72,6 +74,7 @@ if(WITH_TESTING)
SRCS
cinn_zero_tensor_trick_pass_test.cc
DEPS
fleet_executor
build_cinn_pass
cinn_compiler
op_registry
......
......@@ -80,14 +80,20 @@ if(WITH_ONNXRUNTIME)
infer_io_utils
model_utils
onnxruntime
paddle2onnx)
paddle2onnx
fleet_executor)
else()
cc_library(
analysis_predictor
SRCS analysis_predictor.cc resource_manager.cc infer_context.cc
${mkldnn_quantizer_src}
DEPS ${inference_deps} zero_copy_tensor ir_pass_manager op_compatible_info
infer_io_utils model_utils)
DEPS ${inference_deps}
zero_copy_tensor
ir_pass_manager
op_compatible_info
infer_io_utils
model_utils
fleet_executor)
endif()
if(WITH_ONNXRUNTIME AND WIN32)
......
......@@ -28,10 +28,23 @@ nv_test(
test_tensorrt
SRCS test_tensorrt.cc
DEPS dynload_cuda device_context dynamic_loader)
nv_test(
test_tensorrt_engine
SRCS test_engine.cc test_dynamic_engine.cc
DEPS dynload_cuda tensorrt_engine tensorrt_plugin)
if(WIN32)
nv_test(
test_tensorrt_engine
SRCS test_engine.cc test_dynamic_engine.cc
DEPS dynload_cuda tensorrt_engine tensorrt_plugin)
elseif(WITH_CINN)
nv_test(
test_tensorrt_engine
SRCS test_engine.cc test_dynamic_engine.cc
DEPS fleet_executor cinn_compiler dynload_cuda tensorrt_engine
tensorrt_plugin python)
else()
nv_test(
test_tensorrt_engine
SRCS test_engine.cc test_dynamic_engine.cc
DEPS fleet_executor dynload_cuda tensorrt_engine tensorrt_plugin python)
endif()
nv_test(
test_arg_mapping_context
SRCS test_arg_mapping_context.cc
......
......@@ -137,11 +137,35 @@ nv_test(
SRCS test_op_converter.cc
DEPS paddle_framework ${GLOB_OPERATOR_DEPS} tensorrt_engine
tensorrt_converter)
nv_test(
test_custom_plugin_creater
SRCS test_custom_plugin_creater.cc
DEPS paddle_framework tensorrt_converter phi custom_operator init_phi)
if(WIN32)
nv_test(
test_custom_plugin_creater
SRCS test_custom_plugin_creater.cc
DEPS paddle_framework tensorrt_converter phi custom_operator init_phi)
elseif(WITH_CINN)
nv_test(
test_custom_plugin_creater
SRCS test_custom_plugin_creater.cc
DEPS paddle_framework
tensorrt_converter
phi
custom_operator
init_phi
fleet_executor
cinn_compiler
python)
else()
nv_test(
test_custom_plugin_creater
SRCS test_custom_plugin_creater.cc
DEPS paddle_framework
tensorrt_converter
phi
custom_operator
init_phi
fleet_executor
python)
endif()
if(WITH_ONNXRUNTIME AND WIN32)
# Copy onnxruntime for some c++ test in Windows, since the test will
......
......@@ -11,7 +11,15 @@ cc_library(
model_utils
SRCS model_utils.cc
DEPS proto_desc enforce)
cc_test_old(infer_io_utils_tester SRCS io_utils_tester.cc DEPS infer_io_utils)
cc_test_old(
infer_io_utils_tester
SRCS
io_utils_tester.cc
DEPS
infer_io_utils
fleet_executor
python)
if(WITH_ONNXRUNTIME AND WIN32)
# Copy onnxruntime for some c++ test in Windows, since the test will
......
......@@ -93,7 +93,7 @@ endif()
set(OP_HEADER_DEPS ${OP_HEADER_DEPS} phi phi_utils static_prim_api get_expected_kernel_func)
register_operators(EXCLUDES py_func_op dgc_op generated_op1 generated_op2 generated_op3 generated_op4 load_combine_op lstm_op run_program_op quantize_linear_op
recurrent_op save_combine_op sparse_attention_op sync_batch_norm_op activation_op ${OP_MKL_DEPS} DEPS ${OP_HEADER_DEPS})
recurrent_op save_combine_op sparse_attention_op sync_batch_norm_op activation_op ${OP_MKL_DEPS} DEPS ${OP_HEADER_DEPS} processgroup_comm_utils)
op_library(generated_op UNITY SRCS generated_op1.cc generated_op2.cc generated_op3.cc generated_op4.cc DEPS ${OP_HEADER_DEPS})
op_library(run_program_op DEPS executor_cache ${OP_HEADER_DEPS})
......@@ -119,9 +119,9 @@ else()
endif()
if (WITH_GPU OR WITH_ROCM)
op_library(sync_batch_norm_op)
op_library(sync_batch_norm_op DEPS processgroup_comm_utils)
if ((NOT WIN32) AND (NOT WITH_ROCM) AND (NOT PADDLE_WITH_ARM) AND (NOT ${CMAKE_CUDA_COMPILER_VERSION} VERSION_LESS 11.3) )
op_library(sparse_attention_op)
op_library(sparse_attention_op DEPS processgroup_comm_utils)
endif()
endif()
......
op_library(tensorrt_engine_op DEPS tensorrt_engine tensorrt_converter
infer_io_utils analysis_helper)
nv_test(
test_tensorrt_engine_op
SRCS tensorrt_engine_op_test.cc
DEPS tensorrt_engine_op analysis)
set(tensorrt_engine_op_deps tensorrt_engine tensorrt_converter infer_io_utils
analysis_helper)
op_library(tensorrt_engine_op DEPS ${tensorrt_engine_op_deps})
if(NOT WIN32)
nv_test(
test_tensorrt_engine_op
SRCS tensorrt_engine_op_test.cc
DEPS tensorrt_engine_op analysis fleet_executor python)
else()
nv_test(
test_tensorrt_engine_op
SRCS tensorrt_engine_op_test.cc
DEPS tensorrt_engine_op analysis fleet_executor)
endif()
if(WITH_ONNXRUNTIME AND WIN32)
# Copy onnxruntime for some c++ test in Windows, since the test will
......
......@@ -3751,7 +3751,6 @@ EOF
ccache -z
cd ..
if [ "${PYTHON_EXECUTABLE}" != "" ];then
if [ "$SYSTEM" == "Darwin" ]; then
${PYTHON_EXECUTABLE} setup.py $2 --plat-name=macosx_10_9_x86_64;build_error=$?
......@@ -3767,7 +3766,7 @@ EOF
fi
# ci will collect ccache hit rate
collect_ccache_hits
if [ "$build_error" != 0 ];then
exit 7;
fi
......
cc_test_old(test_egr_ds_eager_tensor SRCS eager_tensor_test.cc DEPS
${eager_deps})
cc_test_old(test_egr_ds_auotgrad_meta SRCS autograd_meta_test.cc DEPS
${eager_deps})
cc_test_old(
test_egr_ds_eager_tensor
SRCS
eager_tensor_test.cc
DEPS
fleet_executor
final_dygraph_function
${eager_deps}
python)
cc_test_old(
test_egr_ds_auotgrad_meta
SRCS
autograd_meta_test.cc
DEPS
fleet_executor
final_dygraph_function
${eager_deps}
python)
if(NOT ((NOT WITH_PYTHON) AND ON_INFER))
if(WITH_CINN)
set(eager_deps ${eager_deps} cinn_compiler python)
endif()
cc_test_old(
test_egr_ds_grad_tensor_holder
SRCS
grad_tensor_holder_test.cc
DEPS
fleet_executor
conditional_block_op
${eager_deps}
${generated_deps})
......@@ -17,6 +35,7 @@ if(NOT ((NOT WITH_PYTHON) AND ON_INFER))
SRCS
grad_node_info_test.cc
DEPS
fleet_executor
conditional_block_op
${eager_deps}
${generated_deps})
......@@ -25,6 +44,7 @@ if(NOT ((NOT WITH_PYTHON) AND ON_INFER))
SRCS
accumulation_node_test.cc
DEPS
fleet_executor
conditional_block_op
${eager_deps}
${generated_deps})
......@@ -33,6 +53,7 @@ if(NOT ((NOT WITH_PYTHON) AND ON_INFER))
SRCS
tensor_wrapper_test.cc
DEPS
fleet_executor
conditional_block_op
${eager_deps}
${generated_deps})
......
if(NOT (NOT WITH_PYTHON AND ON_INFER))
if(WITH_CINN)
set(eager_deps ${eager_deps} cinn_compiler python)
endif()
cc_library(
performance_benchmark_utils
SRCS benchmark_utils.cc
......@@ -17,6 +20,7 @@ if(NOT (NOT WITH_PYTHON AND ON_INFER))
SRCS
benchmark_eager_cpu.cc
DEPS
fleet_executor
conditional_block_op
performance_benchmark_utils
${eager_deps}
......@@ -26,6 +30,7 @@ if(NOT (NOT WITH_PYTHON AND ON_INFER))
SRCS
benchmark_fluid_cpu.cc
DEPS
fleet_executor
dygraph_function
performance_benchmark_utils
${eager_deps}
......@@ -36,17 +41,19 @@ if(NOT (NOT WITH_PYTHON AND ON_INFER))
SRCS
benchmark_eager_cuda.cc
DEPS
fleet_executor
conditional_block_op
performance_benchmark_utils
${eager_deps}
${fluid_deps})
${fluid_deps}
fleet_executor)
cc_test_old(
test_egr_performance_benchmark_fluid_cuda
SRCS
benchmark_fluid_cuda.cc
DEPS
fleet_executor
conditional_block_op
standalone_executor
performance_benchmark_utils
dygraph_function
${eager_deps}
......
......@@ -67,7 +67,9 @@ cc_test(
save_load_combine_op_test
SRCS save_load_combine_op_test.cc
DEPS save_combine_op load_combine_op)
if(WITH_CINN)
set(CINN_DEPS cinn_compiler python)
endif()
if(WITH_GPU)
nv_test(
dropout_op_test
......@@ -81,7 +83,7 @@ if(WITH_GPU)
nv_test(
feed_forward_test
SRCS feed_forward_test.cu
DEPS elementwise_add_op matmul_op tensor phi)
DEPS fleet_executor elementwise_add_op matmul_op tensor phi ${CINN_DEPS})
elseif(WITH_ROCM)
hip_test(
dropout_op_test
......@@ -103,16 +105,33 @@ cc_test(
SRCS share_buffer_op_test.cc
DEPS lod_tensor device_context generated_static_op)
cc_test_old(
op_debug_string_test
SRCS
op_debug_string_test.cc
DEPS
executor
recurrent_op_helper
recurrent_op
elementwise_add_op
${COMMON_OP_DEPS})
if(WITH_CINN)
cc_test_old(
op_debug_string_test
SRCS
op_debug_string_test.cc
DEPS
executor
fleet_executor
cinn_compiler
recurrent_op_helper
recurrent_op
elementwise_add_op
${COMMON_OP_DEPS}
python)
else()
cc_test_old(
op_debug_string_test
SRCS
op_debug_string_test.cc
DEPS
executor
fleet_executor
recurrent_op_helper
recurrent_op
elementwise_add_op
${COMMON_OP_DEPS})
endif()
if(WITH_GPU)
cc_test(
......
......@@ -4,6 +4,7 @@ if(WITH_TESTING)
SRCS
cinn_launch_context_test.cc
DEPS
fleet_executor
phi
lod_tensor
scope
......
if(WITH_CINN)
set(CINN_DEPS cinn_compiler python)
endif()
if(WITH_GPU OR WITH_ROCM)
# fusion_group
if(NOT APPLE AND NOT WIN32)
......@@ -30,13 +33,15 @@ if(WITH_GPU OR WITH_ROCM)
nv_test(
test_fused_layernorm_residual_dropout_bias
SRCS fused_layernorm_residual_dropout_bias_test.cu
DEPS tensor
DEPS fleet_executor
tensor
op_registry
dropout_op
generated_op
device_context
phi
memory)
memory
${CINN_DEPS})
endif()
# resnet_unit needs cudnn 8.0 above
if((NOT WITH_ROCM) AND (NOT ${CUDNN_VERSION} VERSION_LESS 8000))
......
cc_test_old(test_lite_engine_op SRCS lite_engine_op_test.cc DEPS lite_engine_op
analysis)
cc_test_old(
test_lite_engine_op
SRCS
lite_engine_op_test.cc
DEPS
fleet_executor
lite_engine_op
analysis)
......@@ -29,26 +29,54 @@ cc_test(
test_mkldnn_caching
SRCS test_mkldnn_caching.cc
DEPS ${TEST_MKLDNN_CACHING_DEPS})
cc_test_old(
test_mkldnn_op_nhwc
SRCS
test_mkldnn_op_nhwc.cc
DEPS
conditional_block_op
standalone_executor
executor
recurrent_op_helper
recurrent_op
op_registry
generated_static_op
crop_op
activation_op
generated_op
generated_static_op
phi
transpose_op
fused_transpose_op
scope
device_context
enforce)
if(WITH_CINN)
cc_test_old(
test_mkldnn_op_nhwc
SRCS
test_mkldnn_op_nhwc.cc
DEPS
fleet_executor
conditional_block_op
standalone_executor
executor
recurrent_op_helper
cinn_compiler
recurrent_op
op_registry
generated_static_op
crop_op
activation_op
generated_op
generated_static_op
phi
transpose_op
fused_transpose_op
scope
device_context
enforce
python)
else()
cc_test_old(
test_mkldnn_op_nhwc
SRCS
test_mkldnn_op_nhwc.cc
DEPS
fleet_executor
conditional_block_op
standalone_executor
executor
recurrent_op_helper
recurrent_op
op_registry
generated_static_op
crop_op
activation_op
generated_op
generated_static_op
phi
transpose_op
fused_transpose_op
scope
device_context
enforce)
endif()
......@@ -36,4 +36,16 @@ set(PRIM_OP_SRCS
${PRIM_OP_PATH}cast_p_op.cc
${PRIM_OP_PATH}rsqrt_p_op.cc
${PRIM_OP_PATH}uniform_random_p_op.cc)
cc_test_old(prim_op_test SRCS prim_op_test.cc ${PRIM_OP_SRCS} DEPS op_registry)
if(WITH_CINN)
set(CINN_DEPS cinn_compiler)
endif()
cc_test_old(
prim_op_test
SRCS
prim_op_test.cc
${PRIM_OP_SRCS}
DEPS
fleet_executor
${CINN_DEPS}
op_registry
python)
......@@ -14,12 +14,15 @@ set(prim_eager_deps
set(prim_generated_deps final_dygraph_function final_dygraph_node
dygraph_function dygraph_node)
if(WITH_CINN)
set(CINN_DEPS cinn_compiler)
endif()
cc_test_old(
test_comp_static
SRCS
test_static_prim.cc
DEPS
fleet_executor
static_utils
static_prim_api
generated_op
......@@ -33,9 +36,14 @@ cc_test_old(
phi
static_global_utils
static_tensor_operants
generated_static_op)
generated_static_op
${CINN_DEPS}
python)
if(NOT (NOT WITH_PYTHON AND ON_INFER))
if(WITH_CINN)
set(prim_eager_deps ${prim_eager_deps} cinn_compiler)
endif()
cc_library(
init_env_utils
SRCS init_env_utils.cc
......@@ -46,9 +54,11 @@ if(NOT (NOT WITH_PYTHON AND ON_INFER))
SRCS
test_eager_prim.cc
DEPS
fleet_executor
${prim_eager_deps}
${prim_generated_deps}
prim_utils
static_global_utils
init_env_utils)
init_env_utils
python)
endif()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册