From 950b563bf86ac1639804c34fe3d48953ab7a64bc Mon Sep 17 00:00:00 2001 From: risemeup1 <62429225+risemeup1@users.noreply.github.com> Date: Tue, 30 May 2023 21:41:05 +0800 Subject: [PATCH] update_c++17 (#53892) * update_c++17 * update_c++17 * fix windows bug * solve cirle depend * solve cirle depend * solve cirle depend * solve cirle depend * solve cirle depend * fix windows bug * fix compiler error * fix compiler error * update eigen3 * update eigen3 * update eigen3 * fix mac-py3 compiler error * update C++17 * fix mac compiler error * fix compile error * fix coverage_compiler error * fix coverage_ci_problem * fix coverage_error * fix_kunlun200 compile error * fix kunlun200 compiler error * fix compile error * fix compiler error * fix py3 failed test * fix kunlun200 compiler error * test * fix test error * fix test error * fix test error * test * test * fix mac py3 error * fix mac py3 error * fix mac py3 error * fix test error * fix test error * fix compile error * fix compile error * fix compile error * test * test * fix compiler error * test * test * debug on ci * fix compiler error * fix compiler error * test * fix cinn compiler error * test * fix rocm cmpile error * fix cinn and kunlun compile error * update c++14 * Update flags.cmake --- cmake/cuda.cmake | 2 +- cmake/external/pocketfft.cmake | 10 +++ cmake/flags.cmake | 6 +- paddle/fluid/distributed/common/registerer.h | 2 +- .../distributed/fleet_executor/CMakeLists.txt | 90 ++++++++++++------- .../fleet_executor/test/CMakeLists.txt | 7 +- .../distributed/ps/service/CMakeLists.txt | 2 + .../fluid/distributed/ps/table/CMakeLists.txt | 4 +- paddle/fluid/distributed/rpc/CMakeLists.txt | 2 +- paddle/fluid/distributed/test/CMakeLists.txt | 14 ++- .../fluid/eager/accumulation/CMakeLists.txt | 2 +- .../eager/custom_operator/CMakeLists.txt | 2 +- paddle/fluid/framework/CMakeLists.txt | 29 +++--- paddle/fluid/framework/details/CMakeLists.txt | 37 ++++---- ...d_multi_transformer_encoder_pass_tester.cc | 8 +- .../new_executor/interpreter/CMakeLists.txt | 2 +- .../framework/paddle2cinn/CMakeLists.txt | 4 +- .../analysis/ir_passes/CMakeLists.txt | 3 +- paddle/fluid/inference/api/CMakeLists.txt | 45 +++++++--- paddle/fluid/jit/CMakeLists.txt | 5 +- paddle/fluid/operators/CMakeLists.txt | 7 +- .../operators/controlflow/CMakeLists.txt | 8 +- paddle/phi/core/dense_tensor.h | 4 +- paddle/phi/kernels/funcs/reduce_functor.h | 2 +- patches/pocketfft/pocketfft_hdronly.h.patch | 42 +++++++++ .../eager/data_structure_tests/CMakeLists.txt | 40 +++++++-- .../eager/performance_tests/CMakeLists.txt | 6 ++ test/cpp/fluid/CMakeLists.txt | 12 ++- test/cpp/fluid/elementwise/CMakeLists.txt | 6 +- test/cpp/fluid/mkldnn/CMakeLists.txt | 12 ++- test/cpp/inference/analysis/CMakeLists.txt | 1 + test/cpp/inference/api/CMakeLists.txt | 43 ++++++++- 32 files changed, 338 insertions(+), 121 deletions(-) create mode 100644 patches/pocketfft/pocketfft_hdronly.h.patch diff --git a/cmake/cuda.cmake b/cmake/cuda.cmake index 82c4ec14d9e..cfe3339e855 100644 --- a/cmake/cuda.cmake +++ b/cmake/cuda.cmake @@ -295,7 +295,7 @@ message(STATUS "NVCC_FLAGS_EXTRA: ${NVCC_FLAGS_EXTRA}") set(CUDA_PROPAGATE_HOST_FLAGS OFF) # Release/Debug flags set by cmake. Such as -O3 -g -DNDEBUG etc. # So, don't set these flags here. -set(CMAKE_CUDA_STANDARD 14) +set(CMAKE_CUDA_STANDARD 17) # (Note) For windows, if delete /W[1-4], /W1 will be added defaultly and conflic with -w # So replace /W[1-4] with /W0 diff --git a/cmake/external/pocketfft.cmake b/cmake/external/pocketfft.cmake index 2d809bbcf03..f56b92d467b 100644 --- a/cmake/external/pocketfft.cmake +++ b/cmake/external/pocketfft.cmake @@ -26,12 +26,22 @@ set(POCKETFFT_INCLUDE_DIR ${POCKETFFT_PREFIX_DIR}/src) message("POCKETFFT_INCLUDE_DIR is ${POCKETFFT_INCLUDE_DIR}") include_directories(${POCKETFFT_INCLUDE_DIR}) +if(APPLE) + file(TO_NATIVE_PATH + ${PADDLE_SOURCE_DIR}/patches/pocketfft/pocketfft_hdronly.h.patch + native_dst) + set(POCKETFFT_PATCH_COMMAND + git checkout -- . && git checkout ${GLOO_TAG} && patch -Nd + ${POCKETFFT_INCLUDE_DIR}/extern_pocketfft < ${native_dst}) +endif() + ExternalProject_Add( extern_pocketfft ${EXTERNAL_PROJECT_LOG_ARGS} ${SHALLOW_CLONE} GIT_REPOSITORY ${POCKETFFT_REPOSITORY} GIT_TAG ${POCKETFFT_TAG} PREFIX ${POCKETFFT_PREFIX_DIR} + PATCH_COMMAND ${POCKETFFT_PATCH_COMMAND} UPDATE_COMMAND "" CONFIGURE_COMMAND "" BUILD_COMMAND "" diff --git a/cmake/flags.cmake b/cmake/flags.cmake index c1dc26e101f..95dace85a52 100644 --- a/cmake/flags.cmake +++ b/cmake/flags.cmake @@ -35,7 +35,11 @@ endfunction() checkcompilercxx14flag() if(NOT WIN32) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14") + if(WITH_CINN) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14") + else() + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17") + endif() else() set(CMAKE_CXX_STANDARD 17) endif() diff --git a/paddle/fluid/distributed/common/registerer.h b/paddle/fluid/distributed/common/registerer.h index 663119a8e56..5b2d4291d82 100644 --- a/paddle/fluid/distributed/common/registerer.h +++ b/paddle/fluid/distributed/common/registerer.h @@ -116,7 +116,7 @@ inline PsCoreClassMap &global_factory_map_cpp() { return global_factory_map(); } public: \ Any NewInstance() { return Any(new name()); } \ }; \ - void register_factory_##name() { \ + static void register_factory_##name() { \ FactoryMap &map = global_factory_map_cpp()[#clazz]; \ if (map.find(#name) == map.end()) { \ map[#name] = new ObjectFactory##name(); \ diff --git a/paddle/fluid/distributed/fleet_executor/CMakeLists.txt b/paddle/fluid/distributed/fleet_executor/CMakeLists.txt index 70153873ced..f054c59fa27 100755 --- a/paddle/fluid/distributed/fleet_executor/CMakeLists.txt +++ b/paddle/fluid/distributed/fleet_executor/CMakeLists.txt @@ -25,36 +25,66 @@ cc_library( task_loop_thread_pool SRCS task_loop_thread_pool.cc task_loop_thread.cc task_loop.cc DEPS enforce glog) - -cc_library( - fleet_executor - SRCS fleet_executor.cc - carrier.cc - task_node.cc - runtime_graph.cc - dist_model.cc - interceptor.cc - compute_interceptor.cc - amplifier_interceptor.cc - cond_interceptor.cc - start_interceptor.cc - source_interceptor.cc - sink_interceptor.cc - message_service.cc - message_bus.cc - dist_model_tensor_wrapper.cc - DEPS proto_desc - standalone_executor - fleet_executor_desc_proto - interceptor_message_proto - task_loop_thread_pool - collective_helper - op_registry - executor_gc_helper - phi - glog - ${BRPC_DEPS}) - +if(WITH_XPU OR WITH_ROCM) + cc_library( + fleet_executor + SRCS fleet_executor.cc + carrier.cc + task_node.cc + runtime_graph.cc + dist_model.cc + interceptor.cc + compute_interceptor.cc + amplifier_interceptor.cc + cond_interceptor.cc + start_interceptor.cc + source_interceptor.cc + sink_interceptor.cc + message_service.cc + message_bus.cc + dist_model_tensor_wrapper.cc + DEPS naive_executor + proto_desc + standalone_executor + fleet_executor_desc_proto + interceptor_message_proto + task_loop_thread_pool + collective_helper + executor_gc_helper + op_registry + phi + glog + ${BRPC_DEPS}) +else() + cc_library( + fleet_executor + SRCS fleet_executor.cc + carrier.cc + task_node.cc + runtime_graph.cc + dist_model.cc + interceptor.cc + compute_interceptor.cc + amplifier_interceptor.cc + cond_interceptor.cc + start_interceptor.cc + source_interceptor.cc + sink_interceptor.cc + message_service.cc + message_bus.cc + dist_model_tensor_wrapper.cc + DEPS proto_desc + standalone_executor + fleet_executor_desc_proto + interceptor_message_proto + task_loop_thread_pool + collective_helper + op_registry + executor_gc_helper + phi + glog + ${BRPC_DEPS}) +endif() if(WITH_DISTRIBUTE) set(DISTRIBUTE_COMPILE_FLAGS "-Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor" diff --git a/paddle/fluid/distributed/fleet_executor/test/CMakeLists.txt b/paddle/fluid/distributed/fleet_executor/test/CMakeLists.txt index 66baeca33f2..5a08ecb8571 100644 --- a/paddle/fluid/distributed/fleet_executor/test/CMakeLists.txt +++ b/paddle/fluid/distributed/fleet_executor/test/CMakeLists.txt @@ -44,12 +44,13 @@ cc_test_old( compute_interceptor_run_op_test.cc DEPS fleet_executor - ${BRPC_DEPS} - op_registry + naive_executor fill_constant_op + op_registry elementwise_add_op scope - device_context) + device_context + ${BRPC_DEPS}) if(WITH_DISTRIBUTE AND NOT WITH_PSLIB) set_source_files_properties( diff --git a/paddle/fluid/distributed/ps/service/CMakeLists.txt b/paddle/fluid/distributed/ps/service/CMakeLists.txt index 8510273e13f..585dd111bf7 100755 --- a/paddle/fluid/distributed/ps/service/CMakeLists.txt +++ b/paddle/fluid/distributed/ps/service/CMakeLists.txt @@ -41,6 +41,7 @@ brpc_library( PROTO sendrecv.proto DEPS + ps_framework_proto ${BRPC_DEPS}) #set_property(GLOBAL PROPERTY RPC_DEPS sendrecv_rpc ${BRPC_DEPS} string_helper) @@ -122,6 +123,7 @@ cc_library( scope phi ps_gpu_wrapper + fleet ${RPC_DEPS}) #cc_library( diff --git a/paddle/fluid/distributed/ps/table/CMakeLists.txt b/paddle/fluid/distributed/ps/table/CMakeLists.txt index 507ce1dcef7..6a2c988e609 100644 --- a/paddle/fluid/distributed/ps/table/CMakeLists.txt +++ b/paddle/fluid/distributed/ps/table/CMakeLists.txt @@ -36,12 +36,10 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp") set(TABLE_SRC memory_dense_table.cc barrier_table.cc common_graph_table.cc) #set(EXTERN_DEP rocksdb) - cc_library( common_table SRCS ${TABLE_SRC} - DEPS ${TABLE_DEPS} - ${RPC_DEPS} + DEPS ${RPC_DEPS} graph_edge graph_node device_context diff --git a/paddle/fluid/distributed/rpc/CMakeLists.txt b/paddle/fluid/distributed/rpc/CMakeLists.txt index f4fb06c0d84..76c6dc00110 100644 --- a/paddle/fluid/distributed/rpc/CMakeLists.txt +++ b/paddle/fluid/distributed/rpc/CMakeLists.txt @@ -27,5 +27,5 @@ proto_library(paddle_rpc_proto SRCS rpc.proto) cc_library( paddle_rpc SRCS ${PADDLE_RPC_SRCS} - DEPS ${PADDLE_RPC_DEPS} paddle_rpc_proto) + DEPS ${PADDLE_RPC_DEPS} paddle_rpc_proto pybind) add_dependencies(paddle_rpc brpc) diff --git a/paddle/fluid/distributed/test/CMakeLists.txt b/paddle/fluid/distributed/test/CMakeLists.txt index a7ce9615a45..0dd44c2318e 100644 --- a/paddle/fluid/distributed/test/CMakeLists.txt +++ b/paddle/fluid/distributed/test/CMakeLists.txt @@ -74,6 +74,8 @@ cc_test_old( brpc_utils scope phi + sendrecv_rpc + ps_service ${COMMON_DEPS} ${RPC_DEPS}) @@ -117,8 +119,16 @@ cc_test_old( set_source_files_properties( feature_value_test.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) -cc_test_old(feature_value_test SRCS feature_value_test.cc DEPS ${COMMON_DEPS} - table) + +cc_test_old( + feature_value_test + SRCS + feature_value_test.cc + DEPS + table + common_table + sendrecv_rpc + ${COMMON_DEPS}) set_source_files_properties( sparse_sgd_rule_test.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) diff --git a/paddle/fluid/eager/accumulation/CMakeLists.txt b/paddle/fluid/eager/accumulation/CMakeLists.txt index af37915bfc1..57412366184 100755 --- a/paddle/fluid/eager/accumulation/CMakeLists.txt +++ b/paddle/fluid/eager/accumulation/CMakeLists.txt @@ -2,5 +2,5 @@ if(NOT (NOT WITH_PYTHON AND ON_INFER)) cc_library( accumulation_node SRCS accumulation_node.cc - DEPS gradient_accumulator phi grad_node_info) + DEPS gradient_accumulator phi grad_node_info final_dygraph_function) endif() diff --git a/paddle/fluid/eager/custom_operator/CMakeLists.txt b/paddle/fluid/eager/custom_operator/CMakeLists.txt index ea8c2a89f35..a2648d3e325 100644 --- a/paddle/fluid/eager/custom_operator/CMakeLists.txt +++ b/paddle/fluid/eager/custom_operator/CMakeLists.txt @@ -1,4 +1,4 @@ cc_library( custom_operator_node SRCS custom_operator_node.cc - DEPS phi grad_node_info custom_operator) + DEPS phi grad_node_info custom_operator utils) diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt index ff74b96534e..40530c49ca7 100755 --- a/paddle/fluid/framework/CMakeLists.txt +++ b/paddle/fluid/framework/CMakeLists.txt @@ -695,14 +695,14 @@ endif() cc_library( executor_gc_helper SRCS executor_gc_helper.cc - DEPS scope + DEPS while_op_helper + recurrent_op_helper + conditional_block_op_helper + scope proto_desc operator garbage_collector - op_registry - while_op_helper - recurrent_op_helper - conditional_block_op_helper) + op_registry) if(WITH_DISTRIBUTE) if(WITH_PSLIB) cc_library( @@ -729,7 +729,10 @@ if(WITH_DISTRIBUTE) section_worker.cc device_worker_factory.cc data_set.cc - DEPS op_registry + DEPS fleet_executor + fleet_wrapper + recurrent_op_helper + op_registry device_context scope framework_proto @@ -737,7 +740,6 @@ if(WITH_DISTRIBUTE) glog fs shell - fleet_wrapper heter_wrapper ps_gpu_wrapper box_wrapper @@ -753,7 +755,6 @@ if(WITH_DISTRIBUTE) timer monitor heter_service_proto - fleet_executor ${BRPC_DEP}) set(DISTRIBUTE_COMPILE_FLAGS "") if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 7.0) @@ -806,7 +807,8 @@ if(WITH_DISTRIBUTE) heter_section_worker.cc device_worker_factory.cc data_set.cc - DEPS op_registry + DEPS recurrent_op_helper + op_registry device_context scope framework_proto @@ -883,7 +885,8 @@ if(WITH_DISTRIBUTE) section_worker.cc device_worker_factory.cc data_set.cc - DEPS op_registry + DEPS recurrent_op_helper + op_registry device_context scope framework_proto @@ -944,7 +947,8 @@ elseif(WITH_PSLIB) section_worker.cc device_worker_factory.cc data_set.cc - DEPS op_registry + DEPS recurrent_op_helper + op_registry device_context scope framework_proto @@ -992,7 +996,8 @@ else() section_worker.cc device_worker_factory.cc data_set.cc - DEPS op_registry + DEPS recurrent_op_helper + op_registry device_context scope framework_proto diff --git a/paddle/fluid/framework/details/CMakeLists.txt b/paddle/fluid/framework/details/CMakeLists.txt index 5f304734b24..4d9a88cf223 100644 --- a/paddle/fluid/framework/details/CMakeLists.txt +++ b/paddle/fluid/framework/details/CMakeLists.txt @@ -75,28 +75,30 @@ if(WITH_GPU) nv_library( all_reduce_op_handle SRCS all_reduce_op_handle.cc - DEPS op_handle_base + DEPS variable_visitor + op_handle_base scope lod_tensor phi memory - dynload_cuda - variable_visitor) + dynload_cuda) nv_library( fused_all_reduce_op_handle SRCS fused_all_reduce_op_handle.cc - DEPS op_handle_base + DEPS all_reduce_op_handle + variable_visitor + op_handle_base scope lod_tensor phi memory dynload_cuda - variable_visitor place) nv_library( grad_merge_all_reduce_op_handle SRCS grad_merge_all_reduce_op_handle.cc - DEPS op_handle_base + DEPS fused_all_reduce_op_handle + op_handle_base scope lod_tensor phi @@ -104,8 +106,7 @@ if(WITH_GPU) dynload_cuda variable_visitor place - all_reduce_op_handle - fused_all_reduce_op_handle) + all_reduce_op_handle) if(WITH_DGC) nv_library( @@ -159,18 +160,20 @@ elseif(WITH_ROCM) hip_library( fused_all_reduce_op_handle SRCS fused_all_reduce_op_handle.cc - DEPS op_handle_base + DEPS all_reduce_op_handle + op_handle_base + variable_visitor scope lod_tensor phi memory dynload_cuda - variable_visitor place) hip_library( grad_merge_all_reduce_op_handle SRCS grad_merge_all_reduce_op_handle.cc - DEPS op_handle_base + DEPS fused_all_reduce_op_handle + op_handle_base scope lod_tensor phi @@ -178,8 +181,7 @@ elseif(WITH_ROCM) dynload_cuda variable_visitor place - all_reduce_op_handle - fused_all_reduce_op_handle) + all_reduce_op_handle) if(WITH_DISTRIBUTE) hip_library( @@ -212,7 +214,8 @@ else() cc_library( fused_all_reduce_op_handle SRCS fused_all_reduce_op_handle.cc - DEPS op_handle_base + DEPS all_reduce_op_handle + op_handle_base scope lod_tensor phi @@ -222,15 +225,15 @@ else() cc_library( grad_merge_all_reduce_op_handle SRCS grad_merge_all_reduce_op_handle.cc - DEPS op_handle_base + DEPS fused_all_reduce_op_handle + op_handle_base scope lod_tensor phi memory variable_visitor place - all_reduce_op_handle - fused_all_reduce_op_handle) + all_reduce_op_handle) if(WITH_DISTRIBUTE) cc_library( reduce_op_handle diff --git a/paddle/fluid/framework/ir/fused_multi_transformer_encoder_pass_tester.cc b/paddle/fluid/framework/ir/fused_multi_transformer_encoder_pass_tester.cc index 844c2124bde..0254986c14c 100644 --- a/paddle/fluid/framework/ir/fused_multi_transformer_encoder_pass_tester.cc +++ b/paddle/fluid/framework/ir/fused_multi_transformer_encoder_pass_tester.cc @@ -14,7 +14,9 @@ limitations under the License. */ #include "paddle/fluid/framework/ir/fused_multi_transformer_encoder_pass.h" // NOLINT #include "paddle/fluid/framework/ir/pass_tester_helper.h" #include "paddle/fluid/framework/op_version_registry.h" - +#ifndef UNUSED +#define UNUSED __attribute__((unused)) +#endif namespace paddle { namespace framework { namespace ir { @@ -180,7 +182,7 @@ TEST(FusedMultiTransformerEncoderPass, basic) { // FFN: post LayerNorm auto* ffn_ln_scale = layers.data("ffn_ln_scale", {1024}, true); auto* ffn_ln_bias = layers.data("ffn_ln_bias", {1024}, true); - layers.layer_norm(ffn_out, ffn_ln_scale, ffn_ln_bias)[0]; + UNUSED auto res = layers.layer_norm(ffn_out, ffn_ln_scale, ffn_ln_bias)[0]; std::unique_ptr graph(new ir::Graph(layers.main_program())); graph->Set("__param_scope__", CreateParamScope()); @@ -355,7 +357,7 @@ TEST(MultiDevicesFusedMultiTransformerEncoderPass, basic) { // FFN: post LayerNorm auto* ffn_ln_scale = layers.data("ffn_ln_scale", {1024}, true); auto* ffn_ln_bias = layers.data("ffn_ln_bias", {1024}, true); - layers.layer_norm(ffn_out, ffn_ln_scale, ffn_ln_bias)[0]; + UNUSED auto res = layers.layer_norm(ffn_out, ffn_ln_scale, ffn_ln_bias)[0]; std::unique_ptr graph(new ir::Graph(layers.main_program())); graph->Set("__param_scope__", CreateParamScope()); diff --git a/paddle/fluid/framework/new_executor/interpreter/CMakeLists.txt b/paddle/fluid/framework/new_executor/interpreter/CMakeLists.txt index 55ab3c68c0f..9455d238134 100644 --- a/paddle/fluid/framework/new_executor/interpreter/CMakeLists.txt +++ b/paddle/fluid/framework/new_executor/interpreter/CMakeLists.txt @@ -37,4 +37,4 @@ set(INTERPRETER_DEPS cc_library( interpreter SRCS ${INTERPRETER_SRCS} - DEPS ${INTERPRETER_DEPS}) + DEPS standalone_executor ${INTERPRETER_DEPS}) diff --git a/paddle/fluid/framework/paddle2cinn/CMakeLists.txt b/paddle/fluid/framework/paddle2cinn/CMakeLists.txt index 0c028857ce3..6001224c7d5 100644 --- a/paddle/fluid/framework/paddle2cinn/CMakeLists.txt +++ b/paddle/fluid/framework/paddle2cinn/CMakeLists.txt @@ -39,7 +39,9 @@ cc_library( cinn_cache_key cinn_graph_symbolization cinn - cinn_launch_context) + cinn_launch_context + python + pybind) if(WITH_TESTING) # cc_test_old(cinn_lib_test SRCS cinn_lib_test.cc DEPS cinn) diff --git a/paddle/fluid/inference/analysis/ir_passes/CMakeLists.txt b/paddle/fluid/inference/analysis/ir_passes/CMakeLists.txt index a7a561b7b37..7c5965b4a36 100644 --- a/paddle/fluid/inference/analysis/ir_passes/CMakeLists.txt +++ b/paddle/fluid/inference/analysis/ir_passes/CMakeLists.txt @@ -7,7 +7,8 @@ if(WITH_GPU AND TENSORRT_FOUND) cc_library( tensorrt_subgraph_pass SRCS tensorrt_subgraph_pass.cc - DEPS subgraph_util tensorrt_op_teller infer_io_utils) + DEPS convert_to_mixed_precision subgraph_util tensorrt_op_teller + infer_io_utils) set(analysis_deps ${analysis_deps} subgraph_util tensorrt_subgraph_pass diff --git a/paddle/fluid/inference/api/CMakeLists.txt b/paddle/fluid/inference/api/CMakeLists.txt index 8ca1de1f63c..b841a3b1558 100755 --- a/paddle/fluid/inference/api/CMakeLists.txt +++ b/paddle/fluid/inference/api/CMakeLists.txt @@ -15,12 +15,25 @@ if(WITH_MKLDNN) ${mkldnn_quantizer_cfg} PARENT_SCOPE) endif() +if(WIN32) + cc_library( + analysis_config + SRCS analysis_config.cc + DEPS ${mkldnn_quantizer_cfg} lod_tensor paddle_pass_builder table_printer + utf8proc) +else() + cc_library( + analysis_config + SRCS analysis_config.cc + DEPS analysis_helper + processgroup_comm_utils + ${mkldnn_quantizer_cfg} + lod_tensor + paddle_pass_builder + table_printer + utf8proc) +endif() -cc_library( - analysis_config - SRCS analysis_config.cc - DEPS ${mkldnn_quantizer_cfg} lod_tensor paddle_pass_builder table_printer - utf8proc) cc_library( paddle_infer_contrib SRCS paddle_infer_contrib.cc @@ -28,14 +41,14 @@ cc_library( cc_library(paddle_pass_builder SRCS paddle_pass_builder.cc) set(paddle_inference_api_deps - lod_tensor - scope reset_tensor_array analysis_config paddle_infer_contrib zero_copy_tensor trainer_desc_proto - custom_operator) + custom_operator + lod_tensor + scope) if(WITH_CRYPTO) list(APPEND paddle_inference_api_deps paddle_crypto) @@ -43,11 +56,17 @@ endif() if(WITH_CUSTOM_DEVICE) set(paddle_inference_api_deps ${paddle_inference_api_deps} phi) endif() - -cc_library( - paddle_inference_api - SRCS api.cc api_impl.cc helper.cc - DEPS ${paddle_inference_api_deps}) +if(WIN32) + cc_library( + paddle_inference_api + SRCS api.cc api_impl.cc helper.cc + DEPS executor ${paddle_inference_api_deps}) +else() + cc_library( + paddle_inference_api + SRCS api.cc api_impl.cc helper.cc + DEPS executor paddle_inference_io ${paddle_inference_api_deps}) +endif() if(WIN32) target_link_libraries(paddle_inference_api phi) diff --git a/paddle/fluid/jit/CMakeLists.txt b/paddle/fluid/jit/CMakeLists.txt index 7d55f3a52bf..7730b6d8fe0 100644 --- a/paddle/fluid/jit/CMakeLists.txt +++ b/paddle/fluid/jit/CMakeLists.txt @@ -46,4 +46,7 @@ cc_library( jit_function_schema jit_interpreter_engine jit_predictor_engine - jit_function) + jit_function + executor + recurrent_op_helper + recurrent_op) diff --git a/paddle/fluid/operators/CMakeLists.txt b/paddle/fluid/operators/CMakeLists.txt index aef36587ed5..f68ba66875b 100644 --- a/paddle/fluid/operators/CMakeLists.txt +++ b/paddle/fluid/operators/CMakeLists.txt @@ -126,7 +126,12 @@ if (WITH_GPU OR WITH_ROCM) endif() op_library(lstm_op DEPS ${OP_HEADER_DEPS}) -op_library(recurrent_op DEPS ${OP_HEADER_DEPS}) +if (WITH_ROCM) + op_library(recurrent_op DEPS executor ${OP_HEADER_DEPS}) +else() + op_library(recurrent_op DEPS ${OP_HEADER_DEPS}) +endif() + set(COMMON_OP_DEPS ${OP_HEADER_DEPS}) diff --git a/paddle/fluid/operators/controlflow/CMakeLists.txt b/paddle/fluid/operators/controlflow/CMakeLists.txt index 7db85eebb4f..03e6527a58a 100644 --- a/paddle/fluid/operators/controlflow/CMakeLists.txt +++ b/paddle/fluid/operators/controlflow/CMakeLists.txt @@ -9,7 +9,7 @@ register_operators(EXCLUDES conditional_block_op DEPS naive_executor cc_library( conditional_block_op SRCS conditional_block_op.cc - DEPS standalone_executor executor) + DEPS executor standalone_executor) cc_library( op_variant SRCS op_variant.cc @@ -17,15 +17,15 @@ cc_library( cc_library( conditional_block_op_helper SRCS conditional_block_op_helper.cc - DEPS operator op_variant conditional_block_op) + DEPS op_variant operator conditional_block_op) cc_library( recurrent_op_helper SRCS recurrent_op_helper.cc - DEPS operator op_variant recurrent_op) + DEPS recurrent_op op_variant operator) cc_library( while_op_helper SRCS while_op_helper.cc - DEPS operator op_variant) + DEPS op_variant operator) if(WITH_UNITY_BUILD) target_link_libraries(paddle_operators_controlflow_unity conditional_block_op) diff --git a/paddle/phi/core/dense_tensor.h b/paddle/phi/core/dense_tensor.h index a6c5c358262..f4f1a7cb25c 100644 --- a/paddle/phi/core/dense_tensor.h +++ b/paddle/phi/core/dense_tensor.h @@ -268,8 +268,8 @@ class DenseTensor : public TensorBase, }; protected: - std::shared_ptr inplace_version_counter_{ - std::make_shared()}; + std::shared_ptr inplace_version_counter_ = + std::make_shared(); /* @jim19930609: This is a hack In general, it is badly designed to fuse MKLDNN-specific objects into a diff --git a/paddle/phi/kernels/funcs/reduce_functor.h b/paddle/phi/kernels/funcs/reduce_functor.h index e5e440d64d5..df36bee5f98 100644 --- a/paddle/phi/kernels/funcs/reduce_functor.h +++ b/paddle/phi/kernels/funcs/reduce_functor.h @@ -258,7 +258,7 @@ struct AMaxOrAMinGradFunctor { // reduce_all if (size == static_cast(x_numel)) { auto equal_number = mask.sum() - .reshape(Eigen::array({1})) + .reshape(Eigen::array({{1}})) .broadcast(Eigen::array({size})); dx->device(place) = dy->broadcast(dim).reshape(x->dimensions()) * mask / equal_number; diff --git a/patches/pocketfft/pocketfft_hdronly.h.patch b/patches/pocketfft/pocketfft_hdronly.h.patch new file mode 100644 index 00000000000..8bbdcacae1f --- /dev/null +++ b/patches/pocketfft/pocketfft_hdronly.h.patch @@ -0,0 +1,42 @@ +diff --git a/pocketfft_hdronly.h b/pocketfft_hdronly.h +index d75ada6..d3c352d 100644 +--- a/pocketfft_hdronly.h ++++ b/pocketfft_hdronly.h +@@ -149,17 +149,17 @@ template<> struct VLEN { static constexpr size_t val=2; }; + #endif + #endif + +-#if __cplusplus >= 201703L +-inline void *aligned_alloc(size_t align, size_t size) +- { +- // aligned_alloc() requires that the requested size is a multiple of "align" +- void *ptr = ::aligned_alloc(align,(size+align-1)&(~(align-1))); +- if (!ptr) throw std::bad_alloc(); +- return ptr; +- } +-inline void aligned_dealloc(void *ptr) +- { free(ptr); } +-#else // portable emulation ++// #if __cplusplus >= 201703L ++// inline void *aligned_alloc(size_t align, size_t size) ++// { ++// // aligned_alloc() requires that the requested size is a multiple of "align" ++// void *ptr = ::aligned_alloc(align,(size+align-1)&(~(align-1))); ++// if (!ptr) throw std::bad_alloc(); ++// return ptr; ++// } ++// inline void aligned_dealloc(void *ptr) ++// { free(ptr); } ++// #else // portable emulation + inline void *aligned_alloc(size_t align, size_t size) + { + align = std::max(align, alignof(max_align_t)); +@@ -172,7 +172,7 @@ inline void *aligned_alloc(size_t align, size_t size) + } + inline void aligned_dealloc(void *ptr) + { if (ptr) free((reinterpret_cast(ptr))[-1]); } +-#endif ++// #endif + + template class arr + { diff --git a/test/cpp/eager/data_structure_tests/CMakeLists.txt b/test/cpp/eager/data_structure_tests/CMakeLists.txt index 0d5876477c6..57b0bb2c98b 100755 --- a/test/cpp/eager/data_structure_tests/CMakeLists.txt +++ b/test/cpp/eager/data_structure_tests/CMakeLists.txt @@ -4,12 +4,36 @@ cc_test_old(test_egr_ds_auotgrad_meta SRCS autograd_meta_test.cc DEPS ${eager_deps}) if(NOT ((NOT WITH_PYTHON) AND ON_INFER)) - cc_test_old(test_egr_ds_grad_tensor_holder SRCS grad_tensor_holder_test.cc - DEPS ${eager_deps} ${generated_deps}) - cc_test_old(test_egr_ds_grad_node_info SRCS grad_node_info_test.cc DEPS - ${eager_deps} ${generated_deps}) - cc_test_old(test_egr_ds_accumulation_node SRCS accumulation_node_test.cc DEPS - ${eager_deps} ${generated_deps}) - cc_test_old(test_egr_ds_tensor_wrapper SRCS tensor_wrapper_test.cc DEPS - ${eager_deps} ${generated_deps}) + cc_test_old( + test_egr_ds_grad_tensor_holder + SRCS + grad_tensor_holder_test.cc + DEPS + conditional_block_op + ${eager_deps} + ${generated_deps}) + cc_test_old( + test_egr_ds_grad_node_info + SRCS + grad_node_info_test.cc + DEPS + conditional_block_op + ${eager_deps} + ${generated_deps}) + cc_test_old( + test_egr_ds_accumulation_node + SRCS + accumulation_node_test.cc + DEPS + conditional_block_op + ${eager_deps} + ${generated_deps}) + cc_test_old( + test_egr_ds_tensor_wrapper + SRCS + tensor_wrapper_test.cc + DEPS + conditional_block_op + ${eager_deps} + ${generated_deps}) endif() diff --git a/test/cpp/eager/performance_tests/CMakeLists.txt b/test/cpp/eager/performance_tests/CMakeLists.txt index dac2b6f2445..5c4f1af9ff0 100644 --- a/test/cpp/eager/performance_tests/CMakeLists.txt +++ b/test/cpp/eager/performance_tests/CMakeLists.txt @@ -17,6 +17,7 @@ if(NOT (NOT WITH_PYTHON AND ON_INFER)) SRCS benchmark_eager_cpu.cc DEPS + conditional_block_op performance_benchmark_utils ${eager_deps} ${fluid_deps}) @@ -25,6 +26,7 @@ if(NOT (NOT WITH_PYTHON AND ON_INFER)) SRCS benchmark_fluid_cpu.cc DEPS + dygraph_function performance_benchmark_utils ${eager_deps} ${fluid_deps}) @@ -34,6 +36,7 @@ if(NOT (NOT WITH_PYTHON AND ON_INFER)) SRCS benchmark_eager_cuda.cc DEPS + conditional_block_op performance_benchmark_utils ${eager_deps} ${fluid_deps}) @@ -42,7 +45,10 @@ if(NOT (NOT WITH_PYTHON AND ON_INFER)) SRCS benchmark_fluid_cuda.cc DEPS + conditional_block_op + standalone_executor performance_benchmark_utils + dygraph_function ${eager_deps} ${fluid_deps}) endif() diff --git a/test/cpp/fluid/CMakeLists.txt b/test/cpp/fluid/CMakeLists.txt index b7936fadaad..e40c01f628e 100644 --- a/test/cpp/fluid/CMakeLists.txt +++ b/test/cpp/fluid/CMakeLists.txt @@ -103,8 +103,16 @@ cc_test( SRCS share_buffer_op_test.cc DEPS lod_tensor device_context generated_static_op) -cc_test_old(op_debug_string_test SRCS op_debug_string_test.cc DEPS - elementwise_add_op ${COMMON_OP_DEPS}) +cc_test_old( + op_debug_string_test + SRCS + op_debug_string_test.cc + DEPS + executor + recurrent_op_helper + recurrent_op + elementwise_add_op + ${COMMON_OP_DEPS}) if(WITH_GPU) cc_test( diff --git a/test/cpp/fluid/elementwise/CMakeLists.txt b/test/cpp/fluid/elementwise/CMakeLists.txt index 25fffee90b9..304063cd81c 100644 --- a/test/cpp/fluid/elementwise/CMakeLists.txt +++ b/test/cpp/fluid/elementwise/CMakeLists.txt @@ -1,12 +1,12 @@ cc_test( test_elementwise_add_op_inplace SRCS test_elementwise_add_op_inplace.cc - DEPS op_registry elementwise_add_op scope device_context enforce executor) + DEPS executor op_registry elementwise_add_op scope device_context enforce) cc_test( test_elementwise_div_grad_grad SRCS test_elementwise_div_grad_grad.cc - DEPS op_registry elementwise_div_op scope device_context enforce executor) + DEPS executor op_registry elementwise_div_op scope device_context enforce) cc_test( test_elementwise_add_grad_grad SRCS test_elementwise_add_grad_grad.cc - DEPS op_registry elementwise_add_op scope device_context enforce executor) + DEPS executor op_registry elementwise_add_op scope device_context enforce) diff --git a/test/cpp/fluid/mkldnn/CMakeLists.txt b/test/cpp/fluid/mkldnn/CMakeLists.txt index 0784ad54a78..555a4ea27e4 100644 --- a/test/cpp/fluid/mkldnn/CMakeLists.txt +++ b/test/cpp/fluid/mkldnn/CMakeLists.txt @@ -1,14 +1,14 @@ cc_test( test_mkldnn_op_inplace SRCS test_mkldnn_op_inplace.cc - DEPS op_registry + DEPS executor + op_registry elementwise_add_op activation_op phi scope device_context enforce - executor generated_static_op) set(TEST_MKLDNN_CACHING_DEPS @@ -36,6 +36,11 @@ cc_test_old( SRCS test_mkldnn_op_nhwc.cc DEPS + conditional_block_op + standalone_executor + executor + recurrent_op_helper + recurrent_op op_registry pool_op shape_op @@ -48,5 +53,4 @@ cc_test_old( fused_transpose_op scope device_context - enforce - executor) + enforce) diff --git a/test/cpp/inference/analysis/CMakeLists.txt b/test/cpp/inference/analysis/CMakeLists.txt index 11e346855fe..5ac96ec73fc 100644 --- a/test/cpp/inference/analysis/CMakeLists.txt +++ b/test/cpp/inference/analysis/CMakeLists.txt @@ -60,6 +60,7 @@ if(NOT APPLE AND NOT WIN32) EXTRA_DEPS reset_tensor_array paddle_inference_shared + python ARGS --inference_model_dir=${WORD2VEC_MODEL_DIR}) elseif(WIN32) diff --git a/test/cpp/inference/api/CMakeLists.txt b/test/cpp/inference/api/CMakeLists.txt index e2facc779b1..39bbd3456ff 100644 --- a/test/cpp/inference/api/CMakeLists.txt +++ b/test/cpp/inference/api/CMakeLists.txt @@ -126,6 +126,8 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) ${filename} EXTRA_DEPS paddle_inference_shared + python + pybind ARGS --infer_model=${install_dir}/model --infer_data=${install_dir}/data.txt @@ -139,6 +141,8 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) ${filename} EXTRA_DEPS paddle_inference_shared + python + pybind ARGS --infer_model=${install_dir}/model --infer_data=${install_dir}/data.txt @@ -156,6 +160,7 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) ${filename} EXTRA_DEPS paddle_inference_shared + python ARGS --infer_model=${install_dir}/mobilenet_v2_models/1 --infer_model2=${install_dir}/mobilenet_v2_models/xx @@ -164,7 +169,7 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) function(inference_analysis_api_test_build TARGET_NAME filename) inference_analysis_test_build(${TARGET_NAME} SRCS ${filename} EXTRA_DEPS - paddle_inference_shared) + paddle_inference_shared python) endfunction() function(inference_analysis_api_int8_test_run TARGET_NAME test_binary @@ -224,7 +229,7 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) function(inference_analysis_api_test_with_fake_data_build TARGET_NAME filename) inference_analysis_test_build(${TARGET_NAME} SRCS ${filename} EXTRA_DEPS - paddle_inference_shared) + paddle_inference_shared python) endfunction() function(inference_analysis_api_test_with_fake_data_run TARGET_NAME @@ -403,6 +408,7 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) analyzer_dam_tester.cc EXTRA_DEPS paddle_inference_shared + python ARGS --infer_model=${DAM_SMALL_INSTALL_DIR}/model --infer_data=${DAM_SMALL_INSTALL_DIR}/data.txt) @@ -458,6 +464,8 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) analyzer_ernie_tester.cc EXTRA_DEPS paddle_inference_shared + python + pybind ARGS --infer_model=${ERNIE_INSTALL_DIR}/model --infer_data=${ERNIE_INSTALL_DIR}/data.txt @@ -503,6 +511,8 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) analyzer_transformer_compare_tester.cc EXTRA_DEPS paddle_inference_shared + python + pybind ARGS --infer_model=${TRANSFORMER_INSTALL_DIR}/model --infer_data=${TRANSFORMER_INSTALL_DIR}/data.txt @@ -514,6 +524,8 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) analyzer_transformer_fuse_tester.cc EXTRA_DEPS paddle_inference_shared + python + pybind ARGS --infer_model=${TRANSFORMER_INSTALL_DIR}/model --infer_data=${TRANSFORMER_INSTALL_DIR}/data.txt @@ -525,6 +537,7 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) analyzer_transformer_profile_tester.cc EXTRA_DEPS paddle_inference_shared + python ARGS --infer_model=${TRANSFORMER_INSTALL_DIR}/model --infer_data=${TRANSFORMER_INSTALL_DIR}/data.txt @@ -543,6 +556,7 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) analyzer_vit_ocr_tester.cc EXTRA_DEPS paddle_inference_shared + python ARGS --infer_model=${VIT_OCR_INSTALL_DIR}/vit_ocr/model --infer_data=${VIT_OCR_INSTALL_DIR}/vit_ocr/datavit.txt) @@ -566,6 +580,8 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) analyzer_detect_functional_mkldnn_tester.cc EXTRA_DEPS paddle_inference_shared + python + pybind ARGS --infer_model=${DENSEBOX_INSTALL_DIR}/model --infer_data=${DENSEBOX_INSTALL_DIR}/detect_input_50.txt @@ -1161,6 +1177,8 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) lite_mul_model_test.cc EXTRA_DEPS paddle_inference_shared + python + pybind ARGS --infer_model=${LITE_MODEL_INSTALL_DIR}) inference_analysis_test( @@ -1169,6 +1187,8 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) lite_resnet50_test.cc EXTRA_DEPS paddle_inference_shared + python + pybind ARGS --infer_model=${RESNET50_MODEL_DIR}) @@ -1178,6 +1198,7 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) analyzer_capi_exp_tester.cc EXTRA_DEPS ${INFERENCE_C_EXTRA_DEPS} + python ARGS --infer_model=${RESNET50_MODEL_DIR}/model) if(WIN32) @@ -1192,6 +1213,8 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) analyzer_capi_exp_pd_config_tester.cc EXTRA_DEPS ${INFERENCE_C_EXTRA_DEPS} + python + pybind ARGS --infer_model=${MOBILENET_INSTALL_DIR}/model) if(WIN32) @@ -1207,6 +1230,7 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) analyzer_capi_exp_pd_tensor_tester.cc EXTRA_DEPS ${INFERENCE_C_EXTRA_DEPS} + python ARGS --infer_model=${MOBILENET_INSTALL_DIR}/model) if(WIN32) @@ -1223,6 +1247,8 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) analyzer_capi_exp_pd_threads_tester.cc EXTRA_DEPS ${INFERENCE_C_EXTRA_DEPS} + python + pybind ARGS --infer_model=${MOBILENET_INSTALL_DIR}/model) if(WIN32) @@ -1240,6 +1266,7 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) analyzer_zerocopy_tensor_tester.cc EXTRA_DEPS paddle_inference_shared + python ARGS --infer_model=${OCR_INSTALL_DIR}/model) @@ -1250,6 +1277,7 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) analyzer_dist_model_tester.cc EXTRA_DEPS paddle_inference_shared + python ARGS --infer_model=${OCR_INSTALL_DIR}/model) endif() @@ -1264,6 +1292,7 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) analyzer_dist_model_xpu_tester.cc EXTRA_DEPS paddle_inference_shared + python ARGS --infer_model=${OCR_INSTALL_DIR}/model) endif() @@ -1274,6 +1303,7 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) analyzer_paddle_tensor_tester.cc EXTRA_DEPS paddle_inference_shared + python ARGS --infer_model=${OCR_INSTALL_DIR}/model --infer_data=${OCR_INSTALL_DIR}/data.txt @@ -1302,6 +1332,8 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) analyzer_capi_exp_ner_tester.cc EXTRA_DEPS ${INFERENCE_C_EXTRA_DEPS} + python + pybind ARGS --infer_model=${CHINESE_NER_INSTALL_DIR}/model) if(WIN32) @@ -1459,6 +1491,7 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) xpu_runtime_config_resnet50_test.cc EXTRA_DEPS paddle_inference_shared + python ARGS --infer_model=${RESNET50_MODEL_DIR}) endif() @@ -1474,6 +1507,7 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) api_impl_tester.cc DEPS paddle_inference_shared + python ARGS --word2vec_dirname=${WORD2VEC_MODEL_DIR} --book_dirname=${IMG_CLS_RESNET_INSTALL_DIR}) @@ -1497,6 +1531,7 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) analysis_predictor_tester.cc DEPS paddle_inference_shared + python ARGS --dirname=${WORD2VEC_MODEL_DIR}) elseif(WIN32) @@ -1508,6 +1543,7 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) analysis_predictor benchmark ${inference_deps} + pybind ARGS --dirname=${WORD2VEC_MODEL_DIR}) endif() @@ -1517,7 +1553,8 @@ if(WITH_TESTING AND WITH_INFERENCE_API_TEST) cc_test( test_mkldnn_quantizer SRCS mkldnn_quantizer_tester.cc - DEPS paddle_inference_shared ARGS --dirname=${WORD2VEC_MODEL_DIR}) + DEPS paddle_inference_shared python ARGS + --dirname=${WORD2VEC_MODEL_DIR}) elseif(WIN32) cc_test( test_mkldnn_quantizer -- GitLab