From da50a0093e76dda388976e27a51f060348dd7be6 Mon Sep 17 00:00:00 2001 From: YuanRisheng Date: Fri, 26 May 2023 17:12:32 +0800 Subject: [PATCH] [PHI Decoupling]Create PHI shared lib (#53735) * create phi so * fix ci bugs * fix py3 bugs * add file * fix py3 bugs * fix windows bugs * perfect so * fix py3 bugs * delete all static target in phi * fix windows bugs * fix py3 bugs * fix ci bugs * fix windows bugs * fix bugs: gflags can't be linked by dynamic and static lib * fix bugs that can not load 3rd party * fix ci bugs * fix compile bugs * fix py3 bugs * fix conflict * fix xpu bugs * fix mac compile bugs * fix psgpu bugs * fix inference failed * deal with conflict * fix LIBRARY_PATH bug * fix windows bugs * fix onednn error * fix windows compile bugs * fix windows compile bugs * fix test_cuda_graph_static_mode_error aborted * fix windows bugs * fix mac-python3 error * fix hip compile bugs * change mode to static * change to static mode * fix ci bugs * fix py3 bugs * fix windows bugs * fix bugs * add static flag * add PADDLE_API * change position of PADDLE_API * fix windows bugs * change mode to dynamic lib * fix windows static bugs * deal with conflict * fix windows unit bug * fix coverage * deal with conflict * fix windows-inference * fix py3 bugs * fix bugs when compile type_info * fix compile bugs * fix py3 bugs * fix windows bugs * fix windows openblas * fix xpu bugs * fix enforce_test in windows * update code according comment * fix windows cmake bug * fix windows bugs * fix windows bugs * delete cinn unittest * fix cinn bugs --------- Co-authored-by: lzydev <1528794076@qq.com> --- cmake/cblas.cmake | 1 - cmake/configure.cmake | 13 + cmake/external/warpctc.cmake | 3 +- cmake/generic.cmake | 48 +-- cmake/inference_lib.cmake | 7 + cmake/operators.cmake | 7 +- cmake/phi.cmake | 31 ++ paddle/fluid/dialect/CMakeLists.txt | 2 +- .../distributed/auto_parallel/CMakeLists.txt | 3 +- .../auto_parallel/test/CMakeLists.txt | 8 +- .../distributed/collective/CMakeLists.txt | 31 +- .../distributed/fleet_executor/CMakeLists.txt | 6 +- .../distributed/ps/service/CMakeLists.txt | 11 +- .../fluid/distributed/ps/table/CMakeLists.txt | 4 +- paddle/fluid/distributed/rpc/CMakeLists.txt | 2 +- paddle/fluid/distributed/test/CMakeLists.txt | 2 +- paddle/fluid/eager/CMakeLists.txt | 15 +- .../fluid/eager/accumulation/CMakeLists.txt | 2 +- .../eager_generated/backwards/CMakeLists.txt | 2 +- .../eager_generated/forwards/CMakeLists.txt | 2 +- paddle/fluid/eager/api/utils/CMakeLists.txt | 4 +- .../eager/auto_code_generator/CMakeLists.txt | 9 + .../generator/eager_gen.py | 2 +- .../eager/custom_operator/CMakeLists.txt | 2 +- paddle/fluid/eager/pylayer/CMakeLists.txt | 2 +- paddle/fluid/framework/CMakeLists.txt | 89 ++--- paddle/fluid/framework/details/CMakeLists.txt | 58 ++- .../fluid/framework/details/build_strategy.cc | 3 +- paddle/fluid/framework/ir/CMakeLists.txt | 7 +- .../fluid/framework/ir/fuse_adamw_op_pass.cc | 2 +- .../framework/ir/fusion_group/CMakeLists.txt | 4 +- .../ir/memory_optimize_pass/CMakeLists.txt | 3 +- .../framework/new_executor/CMakeLists.txt | 2 +- .../new_executor/interpreter/CMakeLists.txt | 3 +- .../new_executor/workqueue/CMakeLists.txt | 2 +- .../framework/paddle2cinn/CMakeLists.txt | 4 +- paddle/fluid/framework/raw_tensor.h | 3 +- paddle/fluid/framework/type_info.cc | 54 +++ paddle/fluid/imperative/CMakeLists.txt | 32 +- paddle/fluid/inference/CMakeLists.txt | 30 +- paddle/fluid/inference/api/CMakeLists.txt | 4 +- .../fluid/inference/api/analysis_predictor.cc | 8 +- .../inference/api/demo_ci/CMakeLists.txt | 2 +- paddle/fluid/inference/api/demo_ci/run.sh | 1 + .../inference/api/details/CMakeLists.txt | 4 +- .../fluid/inference/capi_exp/CMakeLists.txt | 2 +- paddle/fluid/inference/goapi/test.sh | 2 +- .../inference/tensorrt/convert/CMakeLists.txt | 3 +- paddle/fluid/memory/allocation/CMakeLists.txt | 4 +- .../memory/allocation/allocator_facade.cc | 3 +- paddle/fluid/operators/CMakeLists.txt | 19 +- paddle/fluid/operators/cinn/CMakeLists.txt | 2 +- .../fluid/operators/collective/CMakeLists.txt | 5 +- .../fluid/operators/detection/CMakeLists.txt | 4 +- .../fluid/operators/generator/CMakeLists.txt | 2 +- paddle/fluid/operators/gru_op.cc | 3 +- paddle/fluid/operators/math/CMakeLists.txt | 13 +- paddle/fluid/operators/pscore/CMakeLists.txt | 4 +- .../operators/sequence_ops/CMakeLists.txt | 2 +- paddle/fluid/operators/var_conv_2d_op.cc | 2 +- paddle/fluid/platform/CMakeLists.txt | 58 +-- paddle/fluid/platform/cpu_helper.cc | 4 +- .../platform/device/custom/CMakeLists.txt | 4 +- .../fluid/platform/device/gpu/CMakeLists.txt | 10 +- .../fluid/platform/device/xpu/CMakeLists.txt | 16 +- paddle/fluid/platform/dynload/CMakeLists.txt | 18 +- paddle/fluid/platform/dynload/mklml.cc | 32 -- paddle/fluid/platform/dynload/mklml.h | 113 ------ paddle/fluid/platform/profiler.cc | 16 + paddle/fluid/platform/profiler/CMakeLists.txt | 4 +- paddle/fluid/pybind/CMakeLists.txt | 23 +- paddle/fluid/pybind/eager_utils.cc | 4 +- paddle/phi/CMakeLists.txt | 177 +++++++-- paddle/phi/api/CMakeLists.txt | 5 - paddle/phi/api/all.cc | 19 - paddle/phi/api/ext/op_meta_info.h | 50 +-- paddle/phi/api/include/tensor.h | 4 +- paddle/phi/api/lib/CMakeLists.txt | 365 ++++-------------- paddle/phi/api/lib/context_pool.cc | 11 +- paddle/phi/api/lib/op_meta_info.cc | 88 ++++- paddle/phi/api/lib/tensor.cc | 4 + paddle/phi/api/profiler/CMakeLists.txt | 14 +- paddle/phi/backends/CMakeLists.txt | 35 +- paddle/phi/backends/cpu/cpu_context.cc | 4 + paddle/phi/backends/custom/custom_context.cc | 5 + paddle/phi/backends/dynload/CMakeLists.txt | 79 ++-- paddle/phi/backends/gpu/cuda/CMakeLists.txt | 2 +- paddle/phi/backends/gpu/gpu_context.cc | 9 + paddle/phi/backends/gpu/gpu_context.h | 4 + paddle/phi/backends/onednn/onednn_context.cc | 7 + paddle/phi/backends/onednn/onednn_context.h | 7 +- paddle/phi/backends/xpu/xpu_context.cc | 3 + paddle/phi/capi/CMakeLists.txt | 12 - paddle/phi/capi/all.cc | 19 - paddle/phi/capi/lib/CMakeLists.txt | 56 +-- paddle/phi/common/CMakeLists.txt | 27 +- paddle/phi/core/CMakeLists.txt | 179 ++------- paddle/phi/core/compat/CMakeLists.txt | 25 +- paddle/phi/core/compat/op_utils.cc | 12 + paddle/phi/core/compat/op_utils.h | 10 +- paddle/phi/core/dense_tensor.cc | 25 +- paddle/phi/core/distributed/CMakeLists.txt | 26 +- .../distributed/auto_parallel/CMakeLists.txt | 23 +- .../phi/core/distributed/check/CMakeLists.txt | 12 +- .../phi/core/distributed/store/CMakeLists.txt | 14 +- .../phi/core/distributed/store/tcp_store.cc | 8 +- paddle/phi/core/enforce.cc | 18 +- paddle/phi/core/flags.h | 2 +- paddle/phi/core/lod_utils.cc | 5 +- paddle/phi/core/selected_rows.cc | 5 + paddle/phi/core/sparse_coo_tensor.cc | 17 +- paddle/phi/core/sparse_csr_tensor.cc | 30 +- paddle/phi/core/storage_properties.cc | 32 ++ paddle/phi/core/string_tensor.cc | 11 +- paddle/phi/core/tensor_array.cc | 5 + paddle/phi/core/utils/type_info.h | 4 - paddle/phi/infermeta/CMakeLists.txt | 18 +- paddle/phi/infermeta/multiary.cc | 26 +- paddle/phi/infermeta/sparse/CMakeLists.txt | 10 +- paddle/phi/infermeta/strings/CMakeLists.txt | 5 +- paddle/phi/infermeta/unary.cc | 28 +- paddle/phi/kernels/CMakeLists.txt | 187 ++------- paddle/phi/kernels/autotune/CMakeLists.txt | 16 +- paddle/phi/kernels/autotune/cache_base.h | 4 +- paddle/phi/kernels/cpu/rmsprop_kernel.cc | 4 - paddle/phi/kernels/funcs/CMakeLists.txt | 70 +--- paddle/phi/kernels/funcs/blas/CMakeLists.txt | 5 +- paddle/phi/kernels/funcs/blas/blas_impl.cu.h | 5 +- .../phi/kernels/funcs/detail/CMakeLists.txt | 2 +- paddle/phi/kernels/funcs/eigen/CMakeLists.txt | 18 +- paddle/phi/kernels/funcs/jit/CMakeLists.txt | 24 +- .../phi/kernels/funcs/jit/gen/CMakeLists.txt | 8 +- paddle/phi/kernels/funcs/jit/gen_base.h | 2 +- .../phi/kernels/funcs/jit/more/CMakeLists.txt | 4 - .../funcs/jit/more/intrinsic/CMakeLists.txt | 8 +- .../kernels/funcs/jit/more/mix/CMakeLists.txt | 8 +- .../kernels/funcs/jit/more/mkl/CMakeLists.txt | 8 +- .../kernels/funcs/jit/refer/CMakeLists.txt | 8 +- .../phi/kernels/funcs/lapack/CMakeLists.txt | 2 +- paddle/phi/kernels/funcs/math_function.h | 3 +- ...matrix_inverse.cu.cc => matrix_inverse.cu} | 0 .../cutlass/memory_efficient_attention.cu | 43 ++- .../memory_efficient_attention_backward.cu | 44 ++- paddle/phi/kernels/gpu/eigvalsh_kernel.cu | 4 + paddle/phi/kernels/gpu/gelu_funcs.h | 2 +- paddle/phi/kernels/impl/isclose_kernel_impl.h | 26 +- .../phi/kernels/impl/slice_grad_kernel_impl.h | 26 +- paddle/phi/kernels/transfer_layout_kernel.cc | 1 + paddle/scripts/paddle_build.sh | 4 + paddle/testing/CMakeLists.txt | 2 +- paddle/utils/CMakeLists.txt | 6 +- paddle/utils/string/CMakeLists.txt | 6 +- python/env_dict.py.in | 3 + .../fluid/tests/unittests/CMakeLists.txt | 8 - .../test_parallel_executor_run_cinn.py | 4 +- .../unittests/test_resnet50_with_cinn.py | 149 ------- python/setup.py.in | 12 +- setup.py | 23 +- test/CMakeLists.txt | 7 +- test/cpp/eager/CMakeLists.txt | 4 +- test/cpp/fluid/CMakeLists.txt | 10 +- test/cpp/fluid/benchmark/CMakeLists.txt | 2 +- test/cpp/fluid/cinn/CMakeLists.txt | 91 ++--- test/cpp/fluid/fused/CMakeLists.txt | 14 +- test/cpp/fluid/math/CMakeLists.txt | 10 +- test/cpp/fluid/mkldnn/CMakeLists.txt | 8 +- test/cpp/fluid/pscore/CMakeLists.txt | 12 +- test/cpp/imperative/CMakeLists.txt | 9 +- test/cpp/imperative/test_hooks.cc | 3 +- test/cpp/inference/infer_ut/CMakeLists.txt | 4 +- test/cpp/jit/CMakeLists.txt | 1 - test/cpp/new_executor/CMakeLists.txt | 3 +- test/cpp/phi/api/CMakeLists.txt | 22 +- test/cpp/phi/api/scale_api.h | 3 +- test/cpp/phi/common/CMakeLists.txt | 14 +- test/cpp/phi/core/CMakeLists.txt | 38 +- test/cpp/phi/core/test_type_info.cc | 5 + test/cpp/phi/kernels/CMakeLists.txt | 16 +- test/cpp/phi/ops/CMakeLists.txt | 2 +- test/cpp/prim/CMakeLists.txt | 12 +- tools/parallel_UT_rule.py | 1 - 181 files changed, 1355 insertions(+), 2147 deletions(-) create mode 100644 paddle/fluid/framework/type_info.cc delete mode 100644 paddle/fluid/platform/dynload/mklml.cc delete mode 100644 paddle/fluid/platform/dynload/mklml.h delete mode 100644 paddle/phi/api/all.cc delete mode 100644 paddle/phi/capi/all.cc create mode 100644 paddle/phi/core/storage_properties.cc rename paddle/phi/kernels/funcs/{matrix_inverse.cu.cc => matrix_inverse.cu} (100%) delete mode 100644 python/paddle/fluid/tests/unittests/test_resnet50_with_cinn.py diff --git a/cmake/cblas.cmake b/cmake/cblas.cmake index 8aee8888708..b68ca023704 100644 --- a/cmake/cblas.cmake +++ b/cmake/cblas.cmake @@ -40,7 +40,6 @@ if(WITH_MKLML) add_definitions(-DLAPACK_FOUND) add_dependencies(cblas mklml) - target_link_libraries(cblas dynload_mklml) message(STATUS "Found cblas and lapack in MKLML " "(include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})") diff --git a/cmake/configure.cmake b/cmake/configure.cmake index ad789a53e83..c5b9e896686 100644 --- a/cmake/configure.cmake +++ b/cmake/configure.cmake @@ -235,3 +235,16 @@ endif() if(WITH_CUDNN_FRONTEND) add_definitions(-DPADDLE_WITH_CUDNN_FRONTEND) endif() + +set(WITH_PHI_SHARED + ON + CACHE BOOL "" FORCE) +if(WIN32 OR WITH_ROCM) + set(WITH_PHI_SHARED + OFF + CACHE BOOL "" FORCE) +endif() + +if(WITH_PHI_SHARED) + add_definitions(-DPHI_SHARED) +endif() diff --git a/cmake/external/warpctc.cmake b/cmake/external/warpctc.cmake index 749de1b46ef..f2fc570c048 100644 --- a/cmake/external/warpctc.cmake +++ b/cmake/external/warpctc.cmake @@ -122,6 +122,5 @@ get_filename_component(WARPCTC_LIBRARY_PATH ${WARPCTC_LIBRARIES} DIRECTORY) include_directories(${WARPCTC_INCLUDE_DIR} )# For warpctc code to include its headers. -add_library(warpctc SHARED IMPORTED GLOBAL) -set_property(TARGET warpctc PROPERTY IMPORTED_LOCATION ${WARPCTC_LIBRARIES}) +add_library(warpctc INTERFACE) add_dependencies(warpctc extern_warpctc) diff --git a/cmake/generic.cmake b/cmake/generic.cmake index 09a51306749..947d44950d5 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -364,20 +364,7 @@ function(cc_library TARGET_NAME) list(REMOVE_ITEM cc_library_DEPS warpctc) add_dependencies(${TARGET_NAME} warpctc) endif() - # Only deps libmklml.so, not link - if("${cc_library_DEPS};" MATCHES "mklml;") - list(REMOVE_ITEM cc_library_DEPS mklml) - if(NOT "${TARGET_NAME}" MATCHES "dynload_mklml") - list(APPEND cc_library_DEPS dynload_mklml) - endif() - add_dependencies(${TARGET_NAME} mklml) - if(WIN32) - target_link_libraries(${TARGET_NAME} ${MKLML_IOMP_LIB}) - else() - target_link_libraries(${TARGET_NAME} - "-L${MKLML_LIB_DIR} -liomp5 -Wl,--as-needed") - endif() - endif() + # remove link to python, see notes at: # https://github.com/pybind/pybind11/blob/master/docs/compiling.rst#building-manually if("${cc_library_DEPS};" MATCHES "python;") @@ -457,25 +444,10 @@ function(cc_test_build TARGET_NAME) endif() endif() get_property(os_dependency_modules GLOBAL PROPERTY OS_DEPENDENCY_MODULES) - target_link_libraries( - ${TARGET_NAME} - ${cc_test_DEPS} - ${os_dependency_modules} - paddle_gtest_main - lod_tensor - memory - gtest - gflags - glog) - add_dependencies( - ${TARGET_NAME} - ${cc_test_DEPS} - paddle_gtest_main - lod_tensor - memory - gtest - gflags - glog) + target_link_libraries(${TARGET_NAME} ${cc_test_DEPS} + ${os_dependency_modules} paddle_gtest_main gtest glog) + add_dependencies(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main gtest + glog) common_link(${TARGET_NAME}) if(WITH_ROCM) target_link_libraries(${TARGET_NAME} ${ROCM_HIPRTC_LIB}) @@ -670,7 +642,7 @@ function(nv_test TARGET_NAME) add_executable(${TARGET_NAME} ${nv_test_SRCS}) get_property(os_dependency_modules GLOBAL PROPERTY OS_DEPENDENCY_MODULES) target_link_libraries(${TARGET_NAME} ${nv_test_DEPS} - ${os_dependency_modules} paddle_gtest_main) + ${os_dependency_modules} paddle_gtest_main phi) add_dependencies(${TARGET_NAME} ${nv_test_DEPS} paddle_gtest_main) common_link(${TARGET_NAME}) add_test(${TARGET_NAME} ${TARGET_NAME}) @@ -774,8 +746,8 @@ function(hip_test TARGET_NAME) lod_tensor memory gtest - gflags glog + phi ${os_dependency_modules}) add_dependencies( ${TARGET_NAME} @@ -784,7 +756,7 @@ function(hip_test TARGET_NAME) lod_tensor memory gtest - gflags + phi glog) common_link(${TARGET_NAME}) add_test(${TARGET_NAME} ${TARGET_NAME}) @@ -881,7 +853,7 @@ function(xpu_test TARGET_NAME) lod_tensor memory gtest - gflags + phi glog ${os_dependency_modules}) add_dependencies( @@ -891,7 +863,7 @@ function(xpu_test TARGET_NAME) lod_tensor memory gtest - gflags + phi glog) common_link(${TARGET_NAME}) add_test(${TARGET_NAME} ${TARGET_NAME}) diff --git a/cmake/inference_lib.cmake b/cmake/inference_lib.cmake index 8d47dbd5e9b..3731d23b813 100755 --- a/cmake/inference_lib.cmake +++ b/cmake/inference_lib.cmake @@ -269,6 +269,13 @@ else() SRCS ${src_dir}/inference/api/paddle_*.h ${paddle_inference_lib} DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/lib) + + set(paddle_phi_lib ${PADDLE_BINARY_DIR}/paddle/phi/libphi.*) + copy( + inference_lib_dist + SRCS ${paddle_phi_lib} + DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/lib) + endif() copy( diff --git a/cmake/operators.cmake b/cmake/operators.cmake index 4c426d66876..a0f5d2c82ee 100644 --- a/cmake/operators.cmake +++ b/cmake/operators.cmake @@ -61,8 +61,7 @@ function(register_cu_kernel TARGET) "${multiValueArgs}" ${ARGN}) set(cu_srcs) - set(op_common_deps operator op_registry math_function layer - common_infer_shape_functions) + set(op_common_deps operator op_registry layer common_infer_shape_functions) foreach(cu_src ${register_cu_kernel_SRCS}) if(${cu_src} MATCHES ".*\\.cu$") list(APPEND cu_srcs ${cu_src}) @@ -113,7 +112,7 @@ function(register_mkldnn_kernel TARGET) "${multiValueArgs}" ${ARGN}) set(mkldnn_cc_srcs) - set(op_common_deps operator op_registry math_function layer + set(op_common_deps operator op_registry phi layer common_infer_shape_functions) foreach(mkldnn_src ${register_mkldnn_kernel_SRCS}) if(${mkldnn_src} MATCHES ".*_mkldnn_op.cc$") @@ -164,7 +163,7 @@ function(op_library TARGET) set(MIOPEN_FILE) set(mkldnn_cc_srcs) set(MKLDNN_FILE) - set(op_common_deps operator op_registry math_function layer + set(op_common_deps operator op_registry phi layer common_infer_shape_functions) # Option `UNITY` is used to specify that operator `TARGET` will compiles with Unity Build. diff --git a/cmake/phi.cmake b/cmake/phi.cmake index f97d5d3f8f7..8a500f93860 100644 --- a/cmake/phi.cmake +++ b/cmake/phi.cmake @@ -94,6 +94,13 @@ function(kernel_declare TARGET_LIST) continue() endif() endif() + # fusion group kernel is not supported in windows and mac + if(WIN32 OR APPLE) + string(FIND "${first_registry}" "fusion_group" pos) + if(pos GREATER 1) + continue() + endif() + endif() # some gpu kernel only can run on cuda, not support rocm, so we add this branch if(WITH_ROCM) string(FIND "${first_registry}" "cuda_only" pos) @@ -216,3 +223,27 @@ function(prune_declaration_h) endif() endforeach() endfunction() + +function(collect_srcs SRC_GROUP) + set(options) + set(oneValueArgs) + set(multiValueArgs "SRCS") + cmake_parse_arguments(prefix "" "" "${multiValueArgs}" ${ARGN}) + foreach(src ${prefix_SRCS}) + set(${SRC_GROUP} + "${${SRC_GROUP}};${CMAKE_CURRENT_SOURCE_DIR}/${src}" + CACHE INTERNAL "") + endforeach() +endfunction() + +function(collect_generated_srcs SRC_GROUP) + set(options) + set(oneValueArgs) + set(multiValueArgs "SRCS") + cmake_parse_arguments(prefix "" "" "${multiValueArgs}" ${ARGN}) + foreach(src ${prefix_SRCS}) + set(${SRC_GROUP} + "${${SRC_GROUP}};${src}" + CACHE INTERNAL "") + endforeach() +endfunction() diff --git a/paddle/fluid/dialect/CMakeLists.txt b/paddle/fluid/dialect/CMakeLists.txt index 8130b75f637..24c18e24c23 100644 --- a/paddle/fluid/dialect/CMakeLists.txt +++ b/paddle/fluid/dialect/CMakeLists.txt @@ -49,5 +49,5 @@ file(GLOB PD_DIALECT_SRCS "*.cc") cc_library( pd_dialect SRCS ${PD_DIALECT_SRCS} ${op_source_file} - DEPS new_ir framework_proto dense_tensor phi_utils) + DEPS new_ir framework_proto phi phi_utils) target_include_directories(pd_dialect PRIVATE ${PD_DIALECT_BINARY_DIR}) diff --git a/paddle/fluid/distributed/auto_parallel/CMakeLists.txt b/paddle/fluid/distributed/auto_parallel/CMakeLists.txt index 9d9cb97d855..a0806fa1a64 100644 --- a/paddle/fluid/distributed/auto_parallel/CMakeLists.txt +++ b/paddle/fluid/distributed/auto_parallel/CMakeLists.txt @@ -1,7 +1,6 @@ cc_library( op_dist_attr SRCS dist_attr.cc - DEPS dist_attr process_mesh dist_mapper auto_parallel_proto proto_desc - phi_enforce) + DEPS phi auto_parallel_proto proto_desc) add_subdirectory(test) diff --git a/paddle/fluid/distributed/auto_parallel/test/CMakeLists.txt b/paddle/fluid/distributed/auto_parallel/test/CMakeLists.txt index fcc000e596b..15c0ed63052 100644 --- a/paddle/fluid/distributed/auto_parallel/test/CMakeLists.txt +++ b/paddle/fluid/distributed/auto_parallel/test/CMakeLists.txt @@ -1,19 +1,19 @@ cc_test( device_mesh_test SRCS device_mesh_test.cc - DEPS device_mesh) + DEPS phi) cc_test( process_mesh_test SRCS process_mesh_test.cc - DEPS process_mesh) + DEPS phi) cc_test( dist_attr_test SRCS dist_attr_test.cc - DEPS dist_attr proto_desc) + DEPS phi proto_desc) cc_test( dist_mapper_test SRCS dist_mapper_test.cc - DEPS dist_mapper) + DEPS phi) diff --git a/paddle/fluid/distributed/collective/CMakeLists.txt b/paddle/fluid/distributed/collective/CMakeLists.txt index ef626ea2985..215f55f2d18 100644 --- a/paddle/fluid/distributed/collective/CMakeLists.txt +++ b/paddle/fluid/distributed/collective/CMakeLists.txt @@ -1,18 +1,18 @@ cc_library( process_group SRCS process_group.cc - DEPS dense_tensor xxhash) + DEPS phi xxhash) cc_library( eager_reducer SRCS reducer.cc - DEPS eager_api process_group phi_api string_helper) + DEPS eager_api process_group phi string_helper) if(WITH_DISTRIBUTE) cc_library( process_group_gloo SRCS process_group_gloo.cc gloo_send_recv.cc - DEPS phi_api eager_api gloo_wrapper tcp_store) + DEPS phi eager_api gloo_wrapper) endif() if(WITH_NCCL OR WITH_RCCL) @@ -20,28 +20,19 @@ if(WITH_NCCL OR WITH_RCCL) process_group_nccl SRCS process_group_nccl.cc nccl_tools.cc common.cc DEPS process_group - tcp_store + phi place enforce collective_helper device_context - ${DEVICE_EVENT_LIBS} - dense_tensor - comm_static_check - nccl_dynamic_check) + ${DEVICE_EVENT_LIBS}) endif() if(WITH_XPU_BKCL) cc_library( process_group_bkcl SRCS process_group_bkcl.cc bkcl_tools.cc common.cc - DEPS process_group - tcp_store - place - enforce - collective_helper - device_context - dense_tensor) + DEPS process_group phi place enforce collective_helper device_context) endif() if(WITH_MPI) @@ -55,15 +46,7 @@ if(WITH_CUSTOM_DEVICE) cc_library( process_group_custom SRCS process_group_custom.cc custom_ccl_tools.cc common.cc - DEPS process_group - tcp_store - phi_backends - place - enforce - collective_helper - device_context - comm_static_check - dense_tensor) + DEPS process_group phi place enforce collective_helper device_context) endif() set(COMM_UTILS_DEPS process_group) diff --git a/paddle/fluid/distributed/fleet_executor/CMakeLists.txt b/paddle/fluid/distributed/fleet_executor/CMakeLists.txt index 27733979c32..70153873ced 100755 --- a/paddle/fluid/distributed/fleet_executor/CMakeLists.txt +++ b/paddle/fluid/distributed/fleet_executor/CMakeLists.txt @@ -5,7 +5,7 @@ endif() proto_library(interceptor_message_proto SRCS interceptor_message.proto) if(WITH_ARM_BRPC) - set(BRPC_DEPS arm_brpc snappy gflags glog) + set(BRPC_DEPS arm_brpc snappy phi glog) elseif(WITH_DISTRIBUTE AND NOT WITH_PSLIB) set(BRPC_DEPS brpc @@ -15,7 +15,7 @@ elseif(WITH_DISTRIBUTE AND NOT WITH_PSLIB) zlib leveldb snappy - gflags + phi glog) else() set(BRPC_DEPS "") @@ -51,7 +51,7 @@ cc_library( collective_helper op_registry executor_gc_helper - gflags + phi glog ${BRPC_DEPS}) diff --git a/paddle/fluid/distributed/ps/service/CMakeLists.txt b/paddle/fluid/distributed/ps/service/CMakeLists.txt index 0c5e460fcbd..8510273e13f 100755 --- a/paddle/fluid/distributed/ps/service/CMakeLists.txt +++ b/paddle/fluid/distributed/ps/service/CMakeLists.txt @@ -8,12 +8,11 @@ if(WITH_HETERPS) ssl crypto protobuf - gflags + phi glog zlib leveldb snappy - gflags glog device_context rocksdb) @@ -25,12 +24,11 @@ else() ssl crypto protobuf - gflags + phi glog zlib leveldb snappy - gflags glog device_context) @@ -122,8 +120,7 @@ cc_library( simple_threadpool simple_rpc scope - math_function - selected_rows_functor + phi ps_gpu_wrapper ${RPC_DEPS}) @@ -150,7 +147,7 @@ cc_library( #cc_library( # communicator # SRCS communicator/communicator.cc -# DEPS scope client table math_function selected_rows_functor ${RPC_DEPS}) +# DEPS scope client table phi ${RPC_DEPS}) #cc_library( # ps_service # SRCS ps_service/service.cc diff --git a/paddle/fluid/distributed/ps/table/CMakeLists.txt b/paddle/fluid/distributed/ps/table/CMakeLists.txt index 2a5c4ad25d1..507ce1dcef7 100644 --- a/paddle/fluid/distributed/ps/table/CMakeLists.txt +++ b/paddle/fluid/distributed/ps/table/CMakeLists.txt @@ -48,7 +48,7 @@ cc_library( string_helper simple_threadpool xxhash - generator) + phi) set_source_files_properties( tensor_accessor.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) @@ -91,7 +91,7 @@ cc_library( ps_framework_proto string_helper device_context - gflags + phi glog fs afs_wrapper diff --git a/paddle/fluid/distributed/rpc/CMakeLists.txt b/paddle/fluid/distributed/rpc/CMakeLists.txt index ccac6022110..f4fb06c0d84 100644 --- a/paddle/fluid/distributed/rpc/CMakeLists.txt +++ b/paddle/fluid/distributed/rpc/CMakeLists.txt @@ -20,7 +20,7 @@ set(PADDLE_RPC_DEPS zlib leveldb snappy - gflags + phi glog pybind) proto_library(paddle_rpc_proto SRCS rpc.proto) diff --git a/paddle/fluid/distributed/test/CMakeLists.txt b/paddle/fluid/distributed/test/CMakeLists.txt index 30f14923e05..a7ce9615a45 100644 --- a/paddle/fluid/distributed/test/CMakeLists.txt +++ b/paddle/fluid/distributed/test/CMakeLists.txt @@ -73,7 +73,7 @@ cc_test_old( DEPS brpc_utils scope - math_function + phi ${COMMON_DEPS} ${RPC_DEPS}) diff --git a/paddle/fluid/eager/CMakeLists.txt b/paddle/fluid/eager/CMakeLists.txt index a0ff3300ffa..aa9e7c7d2eb 100755 --- a/paddle/fluid/eager/CMakeLists.txt +++ b/paddle/fluid/eager/CMakeLists.txt @@ -1,12 +1,10 @@ set(eager_deps - phi_api - phi_dygraph_api + phi hook_utils tensor_utils utils global_utils backward - phi_tensor tracer layer autograd_meta @@ -48,27 +46,26 @@ if(NOT ((NOT WITH_PYTHON) AND ON_INFER)) cc_library( backward SRCS backward.cc - DEPS grad_tensor_holder utils autograd_meta grad_node_info switch_autotune) + DEPS grad_tensor_holder utils autograd_meta grad_node_info phi) endif() cc_library( eager_nan_inf_utils SRCS nan_inf_utils.cc - DEPS phi_tensor nan_inf_utils enforce) + DEPS phi nan_inf_utils enforce) cc_library( grad_node_info SRCS grad_node_info.cc - DEPS phi_api phi_tensor) + DEPS phi) cc_library( autograd_meta SRCS autograd_meta.cc - DEPS phi_api phi_tensor) + DEPS phi) cc_library( utils SRCS utils.cc - DEPS phi_api - phi_tensor + DEPS phi global_utils layer proto_desc diff --git a/paddle/fluid/eager/accumulation/CMakeLists.txt b/paddle/fluid/eager/accumulation/CMakeLists.txt index a924a9b106d..af37915bfc1 100755 --- a/paddle/fluid/eager/accumulation/CMakeLists.txt +++ b/paddle/fluid/eager/accumulation/CMakeLists.txt @@ -2,5 +2,5 @@ if(NOT (NOT WITH_PYTHON AND ON_INFER)) cc_library( accumulation_node SRCS accumulation_node.cc - DEPS gradient_accumulator phi_api grad_node_info) + DEPS gradient_accumulator phi grad_node_info) endif() diff --git a/paddle/fluid/eager/api/generated/eager_generated/backwards/CMakeLists.txt b/paddle/fluid/eager/api/generated/eager_generated/backwards/CMakeLists.txt index 9f2b99d38d4..8537729da97 100644 --- a/paddle/fluid/eager/api/generated/eager_generated/backwards/CMakeLists.txt +++ b/paddle/fluid/eager/api/generated/eager_generated/backwards/CMakeLists.txt @@ -1,7 +1,7 @@ cc_library( scale_node SRCS scale_node.cc - DEPS global_utils phi phi_api grad_node_info) + DEPS global_utils phi grad_node_info) if(NOT (NOT WITH_PYTHON AND ON_INFER)) cc_library( diff --git a/paddle/fluid/eager/api/generated/eager_generated/forwards/CMakeLists.txt b/paddle/fluid/eager/api/generated/eager_generated/forwards/CMakeLists.txt index 4c0625b4b46..5cda6ba553a 100644 --- a/paddle/fluid/eager/api/generated/eager_generated/forwards/CMakeLists.txt +++ b/paddle/fluid/eager/api/generated/eager_generated/forwards/CMakeLists.txt @@ -1,7 +1,7 @@ cc_library( eager_scale SRCS scale.cc - DEPS phi_api phi autograd_meta scale_node) + DEPS phi autograd_meta scale_node) if(NOT (NOT WITH_PYTHON AND ON_INFER)) cc_library( diff --git a/paddle/fluid/eager/api/utils/CMakeLists.txt b/paddle/fluid/eager/api/utils/CMakeLists.txt index dbb59e5aae7..94c77b73922 100755 --- a/paddle/fluid/eager/api/utils/CMakeLists.txt +++ b/paddle/fluid/eager/api/utils/CMakeLists.txt @@ -7,7 +7,7 @@ if(NOT (NOT WITH_PYTHON AND ON_INFER)) cc_library( tensor_utils SRCS tensor_utils.cc - DEPS phi_api autograd_meta grad_node_info accumulation_node) + DEPS phi autograd_meta grad_node_info accumulation_node) cc_library( hook_utils SRCS hook_utils.cc @@ -16,7 +16,7 @@ else() cc_library( tensor_utils SRCS tensor_utils.cc - DEPS phi_api autograd_meta grad_node_info) + DEPS phi autograd_meta grad_node_info) cc_library( hook_utils SRCS hook_utils.cc diff --git a/paddle/fluid/eager/auto_code_generator/CMakeLists.txt b/paddle/fluid/eager/auto_code_generator/CMakeLists.txt index 003bf273e3c..d187b1abb11 100644 --- a/paddle/fluid/eager/auto_code_generator/CMakeLists.txt +++ b/paddle/fluid/eager/auto_code_generator/CMakeLists.txt @@ -52,6 +52,15 @@ if(WIN32) set(eager_generator_path "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}") endif() + if(WITH_PHI_SHARED) + message("Copied phi.dll for Eager AutoCodeGen") + add_custom_command( + OUTPUT ${eager_generator_path}/phi.dll + COMMAND ${CMAKE_COMMAND} -E copy ${PHI_LIB} ${eager_generator_path} + DEPENDS phi) + list(APPEND EAGER_CODEGEN_DEPS ${eager_generator_path}/phi.dll) + endif() + if(${CBLAS_PROVIDER} STREQUAL MKLML) message("Copied libiomp5md.dll for Eager AutoCodeGen") add_custom_command( diff --git a/paddle/fluid/eager/auto_code_generator/generator/eager_gen.py b/paddle/fluid/eager/auto_code_generator/generator/eager_gen.py index 532eabdef43..709372dd98e 100644 --- a/paddle/fluid/eager/auto_code_generator/generator/eager_gen.py +++ b/paddle/fluid/eager/auto_code_generator/generator/eager_gen.py @@ -392,7 +392,7 @@ FORWARD_CC_FILE_TEMPLATE = """ #include "paddle/fluid/eager/api/manual/eager_manual/dygraph_forward_api.h" #include "paddle/phi/core/flags.h" -DECLARE_bool(check_nan_inf); +PHI_DECLARE_bool(check_nan_inf); PHI_DECLARE_string(tensor_operants_mode); {} {} diff --git a/paddle/fluid/eager/custom_operator/CMakeLists.txt b/paddle/fluid/eager/custom_operator/CMakeLists.txt index 424194557dd..ea8c2a89f35 100644 --- a/paddle/fluid/eager/custom_operator/CMakeLists.txt +++ b/paddle/fluid/eager/custom_operator/CMakeLists.txt @@ -1,4 +1,4 @@ cc_library( custom_operator_node SRCS custom_operator_node.cc - DEPS phi_tensor phi_api grad_node_info custom_operator op_meta_info) + DEPS phi grad_node_info custom_operator) diff --git a/paddle/fluid/eager/pylayer/CMakeLists.txt b/paddle/fluid/eager/pylayer/CMakeLists.txt index 4b0ad071117..fe7a57fe795 100644 --- a/paddle/fluid/eager/pylayer/CMakeLists.txt +++ b/paddle/fluid/eager/pylayer/CMakeLists.txt @@ -1,4 +1,4 @@ cc_library( py_layer_node SRCS py_layer_node.cc - DEPS pybind phi_api grad_node_info) + DEPS pybind phi grad_node_info) diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt index d4451c7c491..ff74b96534e 100755 --- a/paddle/fluid/framework/CMakeLists.txt +++ b/paddle/fluid/framework/CMakeLists.txt @@ -115,7 +115,7 @@ proto_library(trainer_desc_proto SRCS trainer_desc.proto DEPS framework_proto cc_library( string_array SRCS string_array.cc - DEPS utf8proc phi_enforce) + DEPS utf8proc phi) cc_library( data_type @@ -130,7 +130,7 @@ cc_test( cc_library( tensor SRCS tensor_util.cc - DEPS place memory data_type device_context dense_tensor) + DEPS place memory data_type device_context phi) cc_test( tensor_test @@ -166,12 +166,12 @@ cc_test( cc_library( lod_tensor SRCS lod_tensor.cc - DEPS ddim mixed_vector place tensor framework_proto version) + DEPS phi place tensor framework_proto version) cc_test( lod_tensor_test SRCS lod_tensor_test.cc - DEPS lod_utils lod_tensor memory) + DEPS phi lod_tensor memory) if(WITH_GPU) nv_test( @@ -188,12 +188,12 @@ endif() cc_library( garbage_collector SRCS garbage_collector.cc - DEPS device_context memory gflags glog) + DEPS device_context memory phi glog) cc_library( reader SRCS reader.cc - DEPS lod_tensor ddim) + DEPS lod_tensor phi) cc_test( reader_test SRCS reader_test.cc @@ -202,13 +202,12 @@ cc_test( cc_test( threadpool_test SRCS threadpool_test.cc - DEPS threadpool) + DEPS phi) cc_library( var_type_traits SRCS var_type_traits.cc - DEPS framework_proto scope tensor_array sparse_coo_tensor sparse_csr_tensor - extended_tensor) + DEPS framework_proto scope phi) if(WITH_GPU) target_link_libraries(var_type_traits dynload_cuda) endif() @@ -242,7 +241,7 @@ endif() cc_library( scope SRCS scope.cc - DEPS glog threadpool xxhash var_type_traits) + DEPS glog phi xxhash var_type_traits) cc_library( device_worker SRCS device_worker.cc @@ -273,12 +272,12 @@ if(WITH_GPU) nv_test( data_device_transform_test SRCS data_device_transform_test.cu - DEPS operator op_registry device_context math_function scope) + DEPS operator op_registry device_context phi scope) elseif(WITH_ROCM) hip_test( data_device_transform_test SRCS data_device_transform_test.cu - DEPS operator op_registry device_context math_function scope) + DEPS operator op_registry device_context phi scope) endif() if(WITH_GPU) @@ -333,7 +332,7 @@ endif() cc_library( data_layout_transform SRCS data_layout_transform.cc - DEPS tensor math_function phi_data_layout_transform) + DEPS tensor phi) cc_test( data_layout_transform_test SRCS data_layout_transform_test.cc @@ -342,14 +341,13 @@ cc_test( cc_library( data_transform SRCS data_transform.cc - DEPS math_function + DEPS phi tensor framework_proto selected_rows_utils data_device_transform data_type_transform - data_layout_transform - phi_data_transform) + data_layout_transform) cc_library( attribute @@ -400,7 +398,7 @@ cc_library( cc_library( shape_inference SRCS shape_inference.cc - DEPS ddim attribute selected_rows_utils) + DEPS phi attribute selected_rows_utils) # every source file that includes "dnnl.h" must depends on mkldnn # or, the first one should depends on mkldnn @@ -433,30 +431,17 @@ if(WITH_XPU) phi_utils SRCS phi_utils.cc DEPS lod_tensor - dense_tensor selected_rows_utils - int_array - scalar place phi var_type_traits op_info - xpu_op_list - convert_utils) + xpu_op_list) else() cc_library( phi_utils SRCS phi_utils.cc - DEPS lod_tensor - dense_tensor - selected_rows_utils - int_array - scalar - place - phi - var_type_traits - op_info - convert_utils) + DEPS lod_tensor selected_rows_utils place phi var_type_traits op_info) endif() if(WITH_XPU) @@ -482,11 +467,10 @@ if(WITH_XPU) unused_var_check nan_inf_utils phi_utils - kernel_factory infershape_utils - op_utils + phi op_compat_infos - get_kerneltype_forvar_utils) + type_info) else() cc_library( operator @@ -509,11 +493,10 @@ else() unused_var_check nan_inf_utils phi_utils - kernel_factory infershape_utils - op_utils + phi op_compat_infos - get_kerneltype_forvar_utils) + type_info) endif() cc_test( @@ -543,7 +526,7 @@ cc_library( version xxhash op_dist_attr - scalar + phi op_version_proto op_version_registry) @@ -853,7 +836,7 @@ if(WITH_DISTRIBUTE) heter_server brpc fleet_executor - flags) + phi) set(DISTRIBUTE_COMPILE_FLAGS "") if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 7.0) set(DISTRIBUTE_COMPILE_FLAGS "${DISTRIBUTE_COMPILE_FLAGS} -faligned-new") @@ -1071,7 +1054,7 @@ if(WITH_PSCORE) executor heter_server gloo_wrapper - eigen_function + phi ${RPC_DEPS} graph_gpu_wrapper) else() @@ -1088,7 +1071,7 @@ if(WITH_PSCORE) executor heter_server gloo_wrapper - eigen_function + phi ${RPC_DEPS}) endif() else() @@ -1112,7 +1095,7 @@ cc_test( cc_library( selected_rows_utils SRCS selected_rows_utils.cc - DEPS selected_rows device_context) + DEPS phi device_context) cc_test( selected_rows_utils_test SRCS selected_rows_utils_test.cc @@ -1162,12 +1145,11 @@ cc_library( phi phi_utils op_info - shape_inference - sparse_coo_tensor) + shape_inference) cc_test( infershape_utils_test SRCS infershape_utils_test.cc - DEPS infershape_utils infermeta_utils meta_tensor) + DEPS infershape_utils phi) # Get the current working branch execute_process( @@ -1198,12 +1180,15 @@ cc_library( operator dynamic_loader string_helper - phi_tensor - op_meta_info - phi_api - tensor_api - phi_tensor_operants - operants_manager) + phi + imperative_flag + layer) + +cc_library(type_info SRCS type_info.cc) +add_dependencies(type_info framework_proto auto_parallel_proto xxhash) +if(WITH_MKLDNN) + add_dependencies(type_info mkldnn) +endif() set(FLUID_FRAMEWORK_MODULES proto_desc diff --git a/paddle/fluid/framework/details/CMakeLists.txt b/paddle/fluid/framework/details/CMakeLists.txt index 99ebd6a370b..b660cbcef2b 100644 --- a/paddle/fluid/framework/details/CMakeLists.txt +++ b/paddle/fluid/framework/details/CMakeLists.txt @@ -10,15 +10,15 @@ cc_library( cc_library( scale_loss_grad_op_handle SRCS scale_loss_grad_op_handle.cc - DEPS op_handle_base scope lod_tensor ddim memory) + DEPS op_handle_base scope lod_tensor phi memory) cc_library( fetch_op_handle SRCS fetch_op_handle.cc - DEPS op_handle_base scope lod_tensor ddim memory) + DEPS op_handle_base scope lod_tensor phi memory) cc_library( fetch_async_op_handle SRCS fetch_async_op_handle.cc - DEPS op_handle_base scope lod_tensor ddim memory) + DEPS op_handle_base scope lod_tensor phi memory) cc_library( share_tensor_buffer_functor @@ -78,7 +78,7 @@ if(WITH_GPU) DEPS op_handle_base scope lod_tensor - ddim + phi memory dynload_cuda variable_visitor) @@ -88,7 +88,7 @@ if(WITH_GPU) DEPS op_handle_base scope lod_tensor - ddim + phi memory dynload_cuda variable_visitor @@ -99,7 +99,7 @@ if(WITH_GPU) DEPS op_handle_base scope lod_tensor - ddim + phi memory dynload_cuda variable_visitor @@ -114,7 +114,7 @@ if(WITH_GPU) DEPS op_handle_base scope lod_tensor - ddim + phi memory dynload_cuda variable_visitor @@ -126,19 +126,17 @@ if(WITH_GPU) nv_library( reduce_op_handle SRCS reduce_op_handle.cc - DEPS op_handle_base variable_visitor scope ddim dynload_cuda - selected_rows_functor) + DEPS op_handle_base variable_visitor scope phi dynload_cuda) else() nv_library( reduce_op_handle SRCS reduce_op_handle.cc - DEPS op_handle_base variable_visitor scope ddim dynload_cuda - selected_rows_functor) + DEPS op_handle_base variable_visitor scope phi dynload_cuda) endif() nv_library( broadcast_op_handle SRCS broadcast_op_handle.cc - DEPS op_handle_base scope ddim memory variable_visitor dynload_cuda) + DEPS op_handle_base scope phi memory variable_visitor dynload_cuda) nv_library( fused_broadcast_op_handle SRCS fused_broadcast_op_handle.cc @@ -154,7 +152,7 @@ elseif(WITH_ROCM) DEPS op_handle_base scope lod_tensor - ddim + phi memory dynload_cuda variable_visitor) @@ -164,7 +162,7 @@ elseif(WITH_ROCM) DEPS op_handle_base scope lod_tensor - ddim + phi memory dynload_cuda variable_visitor @@ -175,7 +173,7 @@ elseif(WITH_ROCM) DEPS op_handle_base scope lod_tensor - ddim + phi memory dynload_cuda variable_visitor @@ -187,19 +185,17 @@ elseif(WITH_ROCM) hip_library( reduce_op_handle SRCS reduce_op_handle.cc - DEPS op_handle_base variable_visitor scope ddim dynload_cuda - selected_rows_functor) + DEPS op_handle_base variable_visitor scope phi dynload_cuda) else() hip_library( reduce_op_handle SRCS reduce_op_handle.cc - DEPS op_handle_base variable_visitor scope ddim dynload_cuda - selected_rows_functor) + DEPS op_handle_base variable_visitor scope phi dynload_cuda) endif() hip_library( broadcast_op_handle SRCS broadcast_op_handle.cc - DEPS op_handle_base scope ddim memory variable_visitor dynload_cuda) + DEPS op_handle_base scope phi memory variable_visitor dynload_cuda) hip_library( fused_broadcast_op_handle SRCS fused_broadcast_op_handle.cc @@ -212,14 +208,14 @@ else() cc_library( all_reduce_op_handle SRCS all_reduce_op_handle.cc - DEPS op_handle_base scope lod_tensor ddim memory variable_visitor) + DEPS op_handle_base scope lod_tensor phi memory variable_visitor) cc_library( fused_all_reduce_op_handle SRCS fused_all_reduce_op_handle.cc DEPS op_handle_base scope lod_tensor - ddim + phi memory variable_visitor place) @@ -229,7 +225,7 @@ else() DEPS op_handle_base scope lod_tensor - ddim + phi memory variable_visitor place @@ -239,17 +235,17 @@ else() cc_library( reduce_op_handle SRCS reduce_op_handle.cc - DEPS op_handle_base variable_visitor scope ddim selected_rows_functor) + DEPS op_handle_base variable_visitor scope phi) else() cc_library( reduce_op_handle SRCS reduce_op_handle.cc - DEPS op_handle_base variable_visitor scope ddim selected_rows_functor) + DEPS op_handle_base variable_visitor scope phi) endif() cc_library( broadcast_op_handle SRCS broadcast_op_handle.cc - DEPS op_handle_base scope ddim memory variable_visitor) + DEPS op_handle_base scope phi memory variable_visitor) cc_library( fused_broadcast_op_handle SRCS fused_broadcast_op_handle.cc @@ -259,7 +255,7 @@ endif() cc_library( gather_op_handle SRCS gather_op_handle.cc - DEPS op_handle_base scope ddim memory variable_visitor) + DEPS op_handle_base scope phi memory variable_visitor) cc_library( eager_deletion_op_handle @@ -305,7 +301,7 @@ cc_test( DEPS var_handle op_handle_base scope - ddim + phi memory device_context broadcast_op_handle) @@ -317,7 +313,7 @@ cc_test_old( var_handle op_handle_base scope - ddim + phi memory device_context gather_op_handle) @@ -330,12 +326,12 @@ cc_library( scope_buffered_ssa_graph_executor SRCS scope_buffered_ssa_graph_executor.cc DEPS ssa_graph_executor scope_buffered_monitor) -#cc_test(reduce_op_handle_test SRCS reduce_op_handle_test.cc DEPS var_handle op_handle_base scope ddim memory +#cc_test(reduce_op_handle_test SRCS reduce_op_handle_test.cc DEPS var_handle op_handle_base scope phi memory # device_context reduce_op_handle ) cc_library( bind_threaded_ssa_graph_executor SRCS bind_threaded_ssa_graph_executor.cc - DEPS fetch_op_handle gflags ssa_graph_executor scope simple_threadpool + DEPS fetch_op_handle phi ssa_graph_executor scope simple_threadpool device_context) cc_library( fast_threaded_ssa_graph_executor diff --git a/paddle/fluid/framework/details/build_strategy.cc b/paddle/fluid/framework/details/build_strategy.cc index b0349966bb5..69f7a49ce55 100644 --- a/paddle/fluid/framework/details/build_strategy.cc +++ b/paddle/fluid/framework/details/build_strategy.cc @@ -20,9 +20,10 @@ limitations under the License. */ #include "paddle/fluid/framework/details/reduce_op_handle.h" #include "paddle/fluid/framework/ir/graph_printer.h" #include "paddle/fluid/framework/ir/multi_devices_graph_pass/multi_devices_graph_pass.h" +#include "paddle/phi/core/flags.h" DECLARE_bool(convert_all_blocks); -DECLARE_bool(use_mkldnn); +PHI_DECLARE_bool(use_mkldnn); #ifdef PADDLE_WITH_CINN DECLARE_bool(use_cinn); #endif diff --git a/paddle/fluid/framework/ir/CMakeLists.txt b/paddle/fluid/framework/ir/CMakeLists.txt index 7f22793fc0c..a18607595e1 100755 --- a/paddle/fluid/framework/ir/CMakeLists.txt +++ b/paddle/fluid/framework/ir/CMakeLists.txt @@ -32,7 +32,7 @@ cc_library( cc_library( cost_model SRCS cost_model.cc - DEPS executor graph profiler proto_desc phi_device_tracer) + DEPS executor graph profiler proto_desc phi) set(GRAPH_PATTERN_DETECTOR_DEPS graph graph_helper graph_traits) if(WITH_TESTING) @@ -458,9 +458,6 @@ if(WITH_MKLDNN) graph_to_program_pass conv_op conv_transpose_op - math_function - im2col - vol2col batch_norm_op generated_op activation_op @@ -468,7 +465,7 @@ if(WITH_MKLDNN) concat_and_split naive_executor device_context - eigen_function) + phi) if(WITH_GPU OR WITH_ROCM) set(TEST_CONV_BN_PASS_DEPS ${TEST_CONV_BN_PASS_DEPS} depthwise_conv) endif() diff --git a/paddle/fluid/framework/ir/fuse_adamw_op_pass.cc b/paddle/fluid/framework/ir/fuse_adamw_op_pass.cc index c26032fadc2..edceedd546b 100644 --- a/paddle/fluid/framework/ir/fuse_adamw_op_pass.cc +++ b/paddle/fluid/framework/ir/fuse_adamw_op_pass.cc @@ -221,7 +221,7 @@ bool InitAndCheckAttrs(const size_t &found_adamw_count, } } - // Check whether with_decay and multi_precision are matched。 + // Check whether with_decay and multi_precision are matched if (config->with_decay != PADDLE_GET_CONST(bool, adamw_op_desc->GetAttr("with_decay")) || config->multi_precision != diff --git a/paddle/fluid/framework/ir/fusion_group/CMakeLists.txt b/paddle/fluid/framework/ir/fusion_group/CMakeLists.txt index 5e05108b666..2357247b37d 100644 --- a/paddle/fluid/framework/ir/fusion_group/CMakeLists.txt +++ b/paddle/fluid/framework/ir/fusion_group/CMakeLists.txt @@ -6,13 +6,13 @@ if(WITH_GPU OR WITH_ROCM) cc_test( test_code_generator SRCS code_generator_tester.cc - DEPS code_generator phi_backends lod_tensor graph_viz_pass) + DEPS code_generator phi lod_tensor graph_viz_pass) endif() cc_library( fusion_group_pass SRCS fusion_group_pass.cc elementwise_group_detector.cc - DEPS subgraph_detector fuse_pass_base code_generator phi_backends) + DEPS subgraph_detector fuse_pass_base code_generator phi) cc_test( test_fusion_group_pass SRCS fusion_group_pass_tester.cc diff --git a/paddle/fluid/framework/ir/memory_optimize_pass/CMakeLists.txt b/paddle/fluid/framework/ir/memory_optimize_pass/CMakeLists.txt index 1723e881cd5..ffb1606b95c 100644 --- a/paddle/fluid/framework/ir/memory_optimize_pass/CMakeLists.txt +++ b/paddle/fluid/framework/ir/memory_optimize_pass/CMakeLists.txt @@ -76,5 +76,4 @@ cc_library( cc_test( test_reference_count_pass_last_lived_ops SRCS test_reference_count_pass_last_lived_ops.cc - DEPS parallel_executor elementwise_mul_op elementwise_add_op generated_op - eigen_function) + DEPS parallel_executor elementwise_mul_op elementwise_add_op generated_op phi) diff --git a/paddle/fluid/framework/new_executor/CMakeLists.txt b/paddle/fluid/framework/new_executor/CMakeLists.txt index 33311fef61a..894275697f7 100644 --- a/paddle/fluid/framework/new_executor/CMakeLists.txt +++ b/paddle/fluid/framework/new_executor/CMakeLists.txt @@ -16,4 +16,4 @@ cc_library( cc_library( staticgraph_executor_statistics SRCS executor_statistics.cc - DEPS enforce glog phi_os_info) + DEPS enforce glog phi) diff --git a/paddle/fluid/framework/new_executor/interpreter/CMakeLists.txt b/paddle/fluid/framework/new_executor/interpreter/CMakeLists.txt index 3885c29c6a9..55ab3c68c0f 100644 --- a/paddle/fluid/framework/new_executor/interpreter/CMakeLists.txt +++ b/paddle/fluid/framework/new_executor/interpreter/CMakeLists.txt @@ -6,7 +6,6 @@ set(INTERPRETER_DEPS device_context global_utils op_registry - phi_tensor_utils scope framework_proto data_feed_proto @@ -31,7 +30,7 @@ set(INTERPRETER_DEPS enforce scope glog - comm_context_manager + phi ${DEVICE_EVENT_LIBS} glog) diff --git a/paddle/fluid/framework/new_executor/workqueue/CMakeLists.txt b/paddle/fluid/framework/new_executor/workqueue/CMakeLists.txt index e1826df133c..b0ab1826fb4 100644 --- a/paddle/fluid/framework/new_executor/workqueue/CMakeLists.txt +++ b/paddle/fluid/framework/new_executor/workqueue/CMakeLists.txt @@ -5,7 +5,7 @@ cc_library( cc_library( workqueue SRCS workqueue.cc - DEPS workqueue_utils enforce glog phi_os_info) + DEPS workqueue_utils enforce glog phi) cc_test( workqueue_test SRCS workqueue_test.cc diff --git a/paddle/fluid/framework/paddle2cinn/CMakeLists.txt b/paddle/fluid/framework/paddle2cinn/CMakeLists.txt index f6a18330407..a415c7d5832 100644 --- a/paddle/fluid/framework/paddle2cinn/CMakeLists.txt +++ b/paddle/fluid/framework/paddle2cinn/CMakeLists.txt @@ -5,7 +5,7 @@ pass_library( cinn_subgraph_detector subgraph_detector cinn_compiler - errors + phi enforce) pass_library(cinn_zero_tensor_trick_pass base) @@ -17,7 +17,7 @@ cc_library( cc_library( transform_type SRCS transform_type.cc - DEPS errors enforce cinn) + DEPS phi enforce cinn) cc_library( cinn_cache_key SRCS cinn_cache_key.cc diff --git a/paddle/fluid/framework/raw_tensor.h b/paddle/fluid/framework/raw_tensor.h index 60ccd6a5bae..d5130e21de2 100644 --- a/paddle/fluid/framework/raw_tensor.h +++ b/paddle/fluid/framework/raw_tensor.h @@ -16,6 +16,7 @@ limitations under the License. */ #include +#include "paddle/phi/core/enforce.h" #include "paddle/phi/core/extended_tensor.h" #include "paddle/utils/any.h" @@ -52,7 +53,7 @@ class RawTensor : public phi::ExtendedTensor, T& Get() const { PADDLE_ENFORCE_EQ(data_.empty(), false, - platform::errors::PreconditionNotMet( + phi::errors::PreconditionNotMet( "The data in RawTensor is empty. Please set data " "before using it.")); diff --git a/paddle/fluid/framework/type_info.cc b/paddle/fluid/framework/type_info.cc new file mode 100644 index 00000000000..b24e7fa53a3 --- /dev/null +++ b/paddle/fluid/framework/type_info.cc @@ -0,0 +1,54 @@ +/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/eager/eager_tensor.h" +#include "paddle/fluid/framework/feed_fetch_type.h" +#include "paddle/fluid/framework/raw_tensor.h" +#include "paddle/fluid/framework/string_array.h" +#include "paddle/fluid/prim/utils/static/desc_tensor.h" + +namespace phi { +template <> +const TypeInfo + TypeInfoTraits::kType = + RegisterStaticType( + paddle::framework::RawTensor::name()); + +template <> +const TypeInfo + TypeInfoTraits::kType = + RegisterStaticType(paddle::framework::Vocab::name()); + +template <> +const TypeInfo + TypeInfoTraits::kType = + RegisterStaticType(paddle::framework::Strings::name()); + +template <> +const TypeInfo + TypeInfoTraits::kType = + RegisterStaticType( + paddle::framework::FeedList::name()); + +template <> +const TypeInfo + TypeInfoTraits::kType = + RegisterStaticType(egr::VariableCompatTensor::name()); + +template <> +const TypeInfo + TypeInfoTraits::kType = + RegisterStaticType(paddle::prim::DescTensor::name()); + +} // namespace phi diff --git a/paddle/fluid/imperative/CMakeLists.txt b/paddle/fluid/imperative/CMakeLists.txt index f6fe845b30c..2894b450756 100644 --- a/paddle/fluid/imperative/CMakeLists.txt +++ b/paddle/fluid/imperative/CMakeLists.txt @@ -1,11 +1,11 @@ cc_library( imperative_flag SRCS flags.cc - DEPS gflags flags) + DEPS phi) cc_library( var_helper SRCS var_helper.cc - DEPS tensor selected_rows extended_tensor) + DEPS tensor phi) if(WITH_XPU) cc_library( prepared_operator @@ -20,8 +20,7 @@ if(WITH_XPU) op_kernel_type data_transform nan_inf_utils - scalar - int_array + phi var_helper profiler place) @@ -38,8 +37,7 @@ else() op_kernel_type data_transform nan_inf_utils - scalar - int_array + phi var_helper profiler place) @@ -47,14 +45,14 @@ endif() cc_library( layer SRCS layer.cc - DEPS prepared_operator math_function imperative_flag variable_helper - op_registry var_helper) + DEPS prepared_operator phi imperative_flag variable_helper op_registry + var_helper) add_subdirectory(jit) if(WITH_GPU) cc_library( layout_autotune SRCS layout_autotune.cc - DEPS op_info phi_backends) + DEPS op_info phi) else() cc_library( layout_autotune @@ -80,15 +78,15 @@ cc_library( cc_library( basic_engine SRCS basic_engine.cc - DEPS layer gradient_accumulator switch_autotune) + DEPS layer gradient_accumulator phi) cc_library( engine SRCS basic_engine.cc partial_grad_engine.cc - DEPS layer gradient_accumulator switch_autotune) + DEPS layer gradient_accumulator phi) cc_library( imperative_profiler SRCS profiler.cc - DEPS flags) + DEPS phi) if(NOT WIN32) if(WITH_NCCL OR WITH_RCCL) cc_library( @@ -174,12 +172,4 @@ endif() cc_library( gradient_accumulator SRCS gradient_accumulator.cc - DEPS blas - operator - lod_tensor - selected_rows_utils - selected_rows_functor - var_type_traits - layer - math_function - phi_tensor) + DEPS operator lod_tensor selected_rows_utils var_type_traits layer phi) diff --git a/paddle/fluid/inference/CMakeLists.txt b/paddle/fluid/inference/CMakeLists.txt index 237a7608160..037025405fc 100644 --- a/paddle/fluid/inference/CMakeLists.txt +++ b/paddle/fluid/inference/CMakeLists.txt @@ -32,14 +32,8 @@ endif() # fluid_modules exclude API-interface of inference/api and inference/capi_exp get_property(fluid_modules GLOBAL PROPERTY FLUID_MODULES) -get_property(phi_modules GLOBAL PROPERTY PHI_MODULES) -get_property(phi_kernels GLOBAL PROPERTY PHI_KERNELS) set(utils_modules pretty_log string_helper benchmark) -if(WITH_CUSTOM_DEVICE) - set(fluid_modules ${fluid_modules} phi_capi) -endif() - add_subdirectory(api) # Create static inference library if needed @@ -51,7 +45,6 @@ set(STATIC_INFERENCE_API reset_tensor_array analysis_config paddle_pass_builder - phi ${mkldnn_quantizer_cfg}) set(OP_LIST @@ -64,16 +57,14 @@ set(KERNEL_LIST #windows GPU static library over the limit, so not create_static_lib, and cc_library is dummy if(WIN32 AND WITH_GPU) - cc_library(paddle_inference DEPS ${fluid_modules} phi ${STATIC_INFERENCE_API} + cc_library(paddle_inference DEPS ${fluid_modules} ${STATIC_INFERENCE_API} ${utils_modules}) else() # message("${fluid_modules}") - # message("PHI_MODULES ${phi_modules}") - # message("${phi_kernels}") # message("${STATIC_INFERENCE_API}") # message("${utils_modules}") - create_static_lib(paddle_inference ${fluid_modules} ${phi_modules} - ${phi_kernels} ${STATIC_INFERENCE_API} ${utils_modules}) + create_static_lib(paddle_inference ${fluid_modules} ${STATIC_INFERENCE_API} + ${utils_modules}) endif() if(NOT APPLE) @@ -103,7 +94,7 @@ set(SHARED_INFERENCE_SRCS # shared inference library deps list(REMOVE_ITEM fluid_modules standalone_executor interpretercore_garbage_collector) -set(SHARED_INFERENCE_DEPS ${fluid_modules} phi analysis_predictor +set(SHARED_INFERENCE_DEPS phi ${fluid_modules} analysis_predictor ${utils_modules}) if(WITH_CRYPTO) @@ -124,12 +115,6 @@ if(WITH_ONNXRUNTIME) ${CMAKE_CURRENT_SOURCE_DIR}/api/onnxruntime_predictor.cc) endif() -#export all symbols for paddle/phi/api/include/api.h on paddle_inference_shared, only for UNIX -if(UNIX) - set(SHARED_INFERENCE_DEPS ${SHARED_INFERENCE_DEPS} - $) -endif() - # Create shared inference library cc_library( paddle_inference_shared SHARED @@ -141,12 +126,15 @@ target_link_libraries(paddle_inference_shared ${os_dependency_modules}) if(WIN32) set_property(TARGET paddle_inference_shared PROPERTY WINDOWS_EXPORT_ALL_SYMBOLS ON) - target_link_libraries(paddle_inference_shared gflags) + target_link_libraries(paddle_inference_shared phi) endif() set_target_properties(paddle_inference_shared PROPERTIES OUTPUT_NAME paddle_inference) -if(NOT APPLE AND NOT WIN32) +if(NOT APPLE + AND NOT WIN32 + AND NOT WITH_TESTING + AND NOT WITH_INFERENCE_API_TEST) # TODO(liuyiqun): Temporarily disable the link flag because it is not support on Mac. set(LINK_FLAGS "-Wl,--version-script ${CMAKE_CURRENT_SOURCE_DIR}/paddle_inference.map") diff --git a/paddle/fluid/inference/api/CMakeLists.txt b/paddle/fluid/inference/api/CMakeLists.txt index b681e56d3b9..8ca1de1f63c 100755 --- a/paddle/fluid/inference/api/CMakeLists.txt +++ b/paddle/fluid/inference/api/CMakeLists.txt @@ -41,7 +41,7 @@ if(WITH_CRYPTO) list(APPEND paddle_inference_api_deps paddle_crypto) endif() if(WITH_CUSTOM_DEVICE) - set(paddle_inference_api_deps ${paddle_inference_api_deps} phi_capi) + set(paddle_inference_api_deps ${paddle_inference_api_deps} phi) endif() cc_library( @@ -50,7 +50,7 @@ cc_library( DEPS ${paddle_inference_api_deps}) if(WIN32) - target_link_libraries(paddle_inference_api gflags) + target_link_libraries(paddle_inference_api phi) endif() set(inference_deps ${analysis_deps} paddle_inference_api analysis diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 18c036a1ebe..831fa36535d 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -72,7 +72,7 @@ #endif #ifdef PADDLE_WITH_MKLML -#include "paddle/fluid/platform/dynload/mklml.h" +#include "paddle/phi/backends/dynload/mklml.h" #endif #ifdef PADDLE_WITH_MKLDNN @@ -1121,7 +1121,7 @@ bool AnalysisPredictor::Run(const std::vector &inputs, // Frees unused memory allocated by the Intel® MKL Memory Allocator to // avoid memory leak. See: // https://software.intel.com/en-us/mkl-developer-reference-c-mkl-free-buffers - platform::dynload::MKL_Free_Buffers(); + phi::dynload::MKL_Free_Buffers(); #endif return true; } @@ -1185,7 +1185,7 @@ bool AnalysisPredictor::Run(const std::vector &inputs, // Frees unused memory allocated by the Intel® MKL Memory Allocator to // avoid memory leak. See: // https://software.intel.com/en-us/mkl-developer-reference-c-mkl-free-buffers - platform::dynload::MKL_Free_Buffers(); + phi::dynload::MKL_Free_Buffers(); #endif return true; } @@ -2100,7 +2100,7 @@ bool AnalysisPredictor::ZeroCopyRun() { // Frees unused memory allocated by the Intel® MKL Memory Allocator to // avoid memory leak. See: // https://software.intel.com/en-us/mkl-developer-reference-c-mkl-free-buffers - platform::dynload::MKL_Free_Buffers(); + phi::dynload::MKL_Free_Buffers(); #endif return true; } diff --git a/paddle/fluid/inference/api/demo_ci/CMakeLists.txt b/paddle/fluid/inference/api/demo_ci/CMakeLists.txt index 7a58b386ad6..e2c4b007c52 100644 --- a/paddle/fluid/inference/api/demo_ci/CMakeLists.txt +++ b/paddle/fluid/inference/api/demo_ci/CMakeLists.txt @@ -199,7 +199,7 @@ if(NOT WIN32) ${MATH_LIB} ${MKLDNN_LIB} glog - gflags + phi protobuf xxhash cryptopp diff --git a/paddle/fluid/inference/api/demo_ci/run.sh b/paddle/fluid/inference/api/demo_ci/run.sh index f11319d7665..50112b20f29 100755 --- a/paddle/fluid/inference/api/demo_ci/run.sh +++ b/paddle/fluid/inference/api/demo_ci/run.sh @@ -29,6 +29,7 @@ WIN_DETECT=$(echo `uname` | grep "Win") # detect current platform cd `dirname $0` current_dir=`pwd` + if [ $2 == ON ]; then # You can export yourself if move the install path MKL_LIB=${inference_install_dir}/third_party/install/mklml/lib diff --git a/paddle/fluid/inference/api/details/CMakeLists.txt b/paddle/fluid/inference/api/details/CMakeLists.txt index 5d2357d362e..105ff16747d 100644 --- a/paddle/fluid/inference/api/details/CMakeLists.txt +++ b/paddle/fluid/inference/api/details/CMakeLists.txt @@ -25,7 +25,7 @@ if(WITH_ONNXRUNTIME) cc_library( zero_copy_tensor_dummy SRCS zero_copy_tensor_dummy.cc - DEPS onnxruntime phi_enforce) + DEPS onnxruntime phi) else() cc_library( zero_copy_tensor @@ -34,7 +34,7 @@ else() cc_library( zero_copy_tensor_dummy SRCS zero_copy_tensor_dummy.cc - DEPS phi_enforce) + DEPS phi) endif() cc_test( diff --git a/paddle/fluid/inference/capi_exp/CMakeLists.txt b/paddle/fluid/inference/capi_exp/CMakeLists.txt index 26d76c280bd..30bafbf488a 100644 --- a/paddle/fluid/inference/capi_exp/CMakeLists.txt +++ b/paddle/fluid/inference/capi_exp/CMakeLists.txt @@ -39,7 +39,7 @@ if(APPLE) utf8proc cryptopp protobuf - gflags + phi cblas) endif() diff --git a/paddle/fluid/inference/goapi/test.sh b/paddle/fluid/inference/goapi/test.sh index cff9fd4aa7c..fbde661d177 100644 --- a/paddle/fluid/inference/goapi/test.sh +++ b/paddle/fluid/inference/goapi/test.sh @@ -23,7 +23,7 @@ fi # 2. set LD_LIBRARY_PATH export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$PWD/paddle_inference_c/third_party/install/mklml/lib/:$PWD/paddle_inference_c/third_party/install/mkldnn/lib/:$PWD/paddle_inference_c/paddle/lib/ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$PWD/paddle_inference_c/third_party/install/onnxruntime/lib/:$PWD/paddle_inference_c/third_party/install/paddle2onnx/lib/ - +export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:${PADDLE_ROOT}/build/paddle/phi/ # 3. go test go clean -testcache go test -v ./... diff --git a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt index a52d6b1c39d..1437ef5f31a 100755 --- a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt +++ b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt @@ -141,8 +141,7 @@ nv_test( nv_test( test_custom_plugin_creater SRCS test_custom_plugin_creater.cc - DEPS paddle_framework tensorrt_converter op_meta_info custom_operator - init_phi) + DEPS paddle_framework tensorrt_converter phi custom_operator init_phi) if(WITH_ONNXRUNTIME AND WIN32) # Copy onnxruntime for some c++ test in Windows, since the test will diff --git a/paddle/fluid/memory/allocation/CMakeLists.txt b/paddle/fluid/memory/allocation/CMakeLists.txt index 4dc408241f4..bb1d9e2e897 100644 --- a/paddle/fluid/memory/allocation/CMakeLists.txt +++ b/paddle/fluid/memory/allocation/CMakeLists.txt @@ -1,6 +1,6 @@ include(ExternalProject) -set(ALLOCATOR_DEPS place stats profiler phi_backends device_context) +set(ALLOCATOR_DEPS place stats profiler phi device_context) set(ALLOCATOR_SRCS allocator.cc cpu_allocator.cc @@ -32,7 +32,7 @@ if(WITH_GPU OR WITH_ROCM) endif() if(WITH_GPU) - list(APPEND ALLOCATOR_DEPS phi_backends) + list(APPEND ALLOCATOR_DEPS phi) endif() if(CUDA_VERSION VERSION_GREATER_EQUAL 10.2) diff --git a/paddle/fluid/memory/allocation/allocator_facade.cc b/paddle/fluid/memory/allocation/allocator_facade.cc index 931372a0d9a..251ec771728 100644 --- a/paddle/fluid/memory/allocation/allocator_facade.cc +++ b/paddle/fluid/memory/allocation/allocator_facade.cc @@ -124,7 +124,7 @@ class CUDAGraphAllocator : underlying_allocator_(allocator) {} public: - ~CUDAGraphAllocator() { VLOG(10) << "CUDAGraphAllocator destructed"; } + ~CUDAGraphAllocator() {} static std::shared_ptr Create( const std::shared_ptr& allocator) { @@ -1137,7 +1137,6 @@ void AllocatorFacade::RemoveMemoryPoolOfCUDAGraph(int64_t id) { if (ref_cnt == 0) { cuda_graph_map_.erase(id); cuda_graph_ref_cnt_.erase(ref_cnt_iter); - VLOG(10) << "Remove memory pool of CUDA Graph with memory ID " << id; } else { VLOG(10) << "Decrease memory pool ID " << id << " reference count to be " << ref_cnt; diff --git a/paddle/fluid/operators/CMakeLists.txt b/paddle/fluid/operators/CMakeLists.txt index b12ec19b9b9..aef36587ed5 100644 --- a/paddle/fluid/operators/CMakeLists.txt +++ b/paddle/fluid/operators/CMakeLists.txt @@ -90,7 +90,7 @@ if(WITH_UNITY_BUILD) include(unity_build_rule.cmake) endif() -set(OP_HEADER_DEPS ${OP_HEADER_DEPS} phi phi_utils backward_infermeta sparse_backward_infermeta static_prim_api get_expected_kernel_func) +set(OP_HEADER_DEPS ${OP_HEADER_DEPS} phi phi_utils static_prim_api get_expected_kernel_func) register_operators(EXCLUDES py_func_op dgc_op generated_op1 generated_op2 generated_op3 generated_op4 load_combine_op lstm_op run_program_op quantize_linear_op recurrent_op save_combine_op sparse_attention_op sync_batch_norm_op activation_op ${OP_MKL_DEPS} DEPS ${OP_HEADER_DEPS}) @@ -125,7 +125,7 @@ if (WITH_GPU OR WITH_ROCM) endif() endif() -op_library(lstm_op DEPS ${OP_HEADER_DEPS} lstm_compute) +op_library(lstm_op DEPS ${OP_HEADER_DEPS}) op_library(recurrent_op DEPS ${OP_HEADER_DEPS}) set(COMMON_OP_DEPS ${OP_HEADER_DEPS}) @@ -136,17 +136,16 @@ if (WITH_DGC) endif() cc_library(common_infer_shape_functions SRCS common_infer_shape_functions.cc DEPS operator) -cc_library(ops_extra_info SRCS ops_extra_info.cc DEPS attribute cudnn_workspace_helper) +cc_library(ops_extra_info SRCS ops_extra_info.cc DEPS attribute phi) -set(COMMON_OP_DEPS ${COMMON_OP_DEPS} selected_rows_functor selected_rows_utils lapack_function -lod_tensor maxouting unpooling pooling lod_rank_table context_project -sequence_pooling executor generator static_prim_api) +set(COMMON_OP_DEPS ${COMMON_OP_DEPS} phi) +set(COMMON_OP_DEPS ${COMMON_OP_DEPS} selected_rows_utils +lod_tensor unpooling lod_rank_table context_project executor static_prim_api) set(COMMON_OP_DEPS ${COMMON_OP_DEPS} dynload_warpctc static_prim_api static_utils static_global_utils prim_utils) -set(COMMON_OP_DEPS ${COMMON_OP_DEPS} sequence_padding sequence_scale cos_sim_functor memory jit_kernel_helper concat_and_split cross_entropy softmax vol2col im2col sampler sample_prob tree2col) -set(COMMON_OP_DEPS ${COMMON_OP_DEPS} sequence2batch lstm_compute matrix_bit_code gru_compute activation_functions beam_search fc_functor matrix_inverse matrix_solve) +set(COMMON_OP_DEPS ${COMMON_OP_DEPS} cos_sim_functor memory concat_and_split sampler sample_prob tree2col) +set(COMMON_OP_DEPS ${COMMON_OP_DEPS} beam_search) set(COMMON_OP_DEPS ${COMMON_OP_DEPS} box_wrapper ps_gpu_wrapper) set(COMMON_OP_DEPS ${COMMON_OP_DEPS} common_infer_shape_functions) -set(COMMON_OP_DEPS ${COMMON_OP_DEPS} eigen_function) set(COMMON_OP_DEPS ${COMMON_OP_DEPS} processgroup_comm_utils) if(WITH_NCCL OR WITH_RCCL) set(COMMON_OP_DEPS ${COMMON_OP_DEPS} process_group_nccl) @@ -189,7 +188,7 @@ endif() copy_if_different(${pybind_file} ${pybind_file_final}) if (WITH_CUSTOM_DEVICE) -cc_library(custom_device_common_op_registry SRCS custom_device_common_op_registry.cc DEPS operator phi_api) +cc_library(custom_device_common_op_registry SRCS custom_device_common_op_registry.cc DEPS operator phi type_info) endif() if(NOT "${OP_LIST}" STREQUAL "") diff --git a/paddle/fluid/operators/cinn/CMakeLists.txt b/paddle/fluid/operators/cinn/CMakeLists.txt index b700b2798fc..d1a77af60aa 100644 --- a/paddle/fluid/operators/cinn/CMakeLists.txt +++ b/paddle/fluid/operators/cinn/CMakeLists.txt @@ -7,7 +7,7 @@ cc_library( cc_library( cinn_launch_context SRCS cinn_launch_context.cc - DEPS ddim + DEPS phi lod_tensor scope proto_desc diff --git a/paddle/fluid/operators/collective/CMakeLists.txt b/paddle/fluid/operators/collective/CMakeLists.txt index 8d523f90ace..cef1390ed23 100644 --- a/paddle/fluid/operators/collective/CMakeLists.txt +++ b/paddle/fluid/operators/collective/CMakeLists.txt @@ -18,7 +18,7 @@ foreach(src ${OPS}) endforeach() if(WITH_GLOO) - set(COLLECTIVE_DEPS ${COLLECTIVE_DEPS} gloo_wrapper comm_context_manager) + set(COLLECTIVE_DEPS ${COLLECTIVE_DEPS} gloo_wrapper phi) endif() register_operators( @@ -31,8 +31,7 @@ register_operators( ${COLLECTIVE_DEPS}) if(WITH_NCCL OR WITH_RCCL) - set(COLLECTIVE_DEPS ${COLLECTIVE_DEPS} nccl_common collective_helper - comm_context_manager nccl_comm_context) + set(COLLECTIVE_DEPS ${COLLECTIVE_DEPS} nccl_common collective_helper phi) op_library(c_gen_nccl_id_op DEPS ${COLLECTIVE_DEPS}) op_library(gen_nccl_id_op DEPS ${COLLECTIVE_DEPS}) endif() diff --git a/paddle/fluid/operators/detection/CMakeLists.txt b/paddle/fluid/operators/detection/CMakeLists.txt index 1bca2068f83..554c701b11e 100644 --- a/paddle/fluid/operators/detection/CMakeLists.txt +++ b/paddle/fluid/operators/detection/CMakeLists.txt @@ -51,8 +51,8 @@ detection_library(polygon_box_transform_op SRCS polygon_box_transform_op.cc detection_library(rpn_target_assign_op SRCS rpn_target_assign_op.cc) detection_library(generate_proposal_labels_op SRCS generate_proposal_labels_op.cc) -detection_library(multiclass_nms_op SRCS multiclass_nms_op.cc DEPS gpc) -detection_library(locality_aware_nms_op SRCS locality_aware_nms_op.cc DEPS gpc) +detection_library(multiclass_nms_op SRCS multiclass_nms_op.cc DEPS phi) +detection_library(locality_aware_nms_op SRCS locality_aware_nms_op.cc DEPS phi) detection_library(box_clip_op SRCS box_clip_op.cc box_clip_op.cu) detection_library(box_decoder_and_assign_op SRCS box_decoder_and_assign_op.cc box_decoder_and_assign_op.cu) diff --git a/paddle/fluid/operators/generator/CMakeLists.txt b/paddle/fluid/operators/generator/CMakeLists.txt index 43e8c158da0..124a4f21133 100644 --- a/paddle/fluid/operators/generator/CMakeLists.txt +++ b/paddle/fluid/operators/generator/CMakeLists.txt @@ -289,7 +289,7 @@ file(APPEND ${op_utils_header} # Automatically generate the registration code of all arg map functions # and compile the corresponding target to avoid frequent code conflicts # when writing to same file -register_op_utils(op_compat_infos DEPS op_utils) +register_op_utils(op_compat_infos DEPS phi) copy_if_different(${op_utils_header} ${op_utils_header_final}) diff --git a/paddle/fluid/operators/gru_op.cc b/paddle/fluid/operators/gru_op.cc index 921076a4a14..6c3294ac5e2 100644 --- a/paddle/fluid/operators/gru_op.cc +++ b/paddle/fluid/operators/gru_op.cc @@ -17,11 +17,12 @@ limitations under the License. */ #include #include +#include "paddle/phi/core/flags.h" #include "paddle/phi/kernels/funcs/blas/blas.h" #include "paddle/phi/kernels/funcs/detail/gru_cpu_kernel.h" #include "paddle/phi/kernels/funcs/detail/gru_kernel.h" -DECLARE_int32(paddle_num_threads); +PHI_DECLARE_int32(paddle_num_threads); namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/math/CMakeLists.txt b/paddle/fluid/operators/math/CMakeLists.txt index c439ace9714..af14333b9d1 100644 --- a/paddle/fluid/operators/math/CMakeLists.txt +++ b/paddle/fluid/operators/math/CMakeLists.txt @@ -6,21 +6,20 @@ if(WITH_XPU) endif() # please add new math_library in alphabetical order -math_library(concat_and_split DEPS concat_and_split_functor) -math_library(context_project DEPS im2col math_function) +math_library(concat_and_split DEPS phi) +math_library(context_project DEPS phi) math_library(cos_sim_functor) math_library(depthwise_conv) math_library(sample_prob) -math_library(sampler DEPS generator) +math_library(sampler DEPS phi) -# math_library(math_function DEPS blas dense_tensor tensor) if(WITH_XPU) - math_library(beam_search DEPS math_function beam_search_xpu) + math_library(beam_search DEPS phi beam_search_xpu) else() - math_library(beam_search DEPS math_function) + math_library(beam_search DEPS phi) endif() math_library(unpooling) math_library(prelu) math_library(bert_encoder_functor) -math_library(tree2col DEPS math_function) +math_library(tree2col DEPS phi) diff --git a/paddle/fluid/operators/pscore/CMakeLists.txt b/paddle/fluid/operators/pscore/CMakeLists.txt index d2aa95c2fd3..5a397699951 100755 --- a/paddle/fluid/operators/pscore/CMakeLists.txt +++ b/paddle/fluid/operators/pscore/CMakeLists.txt @@ -20,7 +20,7 @@ if(WITH_ARM_BRPC) framework_proto sendrecv_rpc arm_brpc - gflags + phi glog snappy device_context) @@ -42,7 +42,7 @@ else() ssl crypto protobuf - gflags + phi glog zlib snappy diff --git a/paddle/fluid/operators/sequence_ops/CMakeLists.txt b/paddle/fluid/operators/sequence_ops/CMakeLists.txt index 06281b6f376..1bd10f19e03 100644 --- a/paddle/fluid/operators/sequence_ops/CMakeLists.txt +++ b/paddle/fluid/operators/sequence_ops/CMakeLists.txt @@ -6,5 +6,5 @@ endif() register_operators() if(WITH_UNITY_BUILD) - target_link_libraries(paddle_operators_sequence_ops_unity sequence_pooling) + target_link_libraries(paddle_operators_sequence_ops_unity phi) endif() diff --git a/paddle/fluid/operators/var_conv_2d_op.cc b/paddle/fluid/operators/var_conv_2d_op.cc index 112aefbe7f2..31c1f61ed4b 100644 --- a/paddle/fluid/operators/var_conv_2d_op.cc +++ b/paddle/fluid/operators/var_conv_2d_op.cc @@ -17,7 +17,7 @@ limitations under the License. */ #include #include -#include "paddle/fluid/platform/dynload/mklml.h" +#include "paddle/phi/backends/dynload/mklml.h" #include "paddle/phi/kernels/funcs/blas/blas.h" #include "paddle/phi/kernels/funcs/math_function.h" diff --git a/paddle/fluid/platform/CMakeLists.txt b/paddle/fluid/platform/CMakeLists.txt index 0907b05622d..e2efc315ca5 100644 --- a/paddle/fluid/platform/CMakeLists.txt +++ b/paddle/fluid/platform/CMakeLists.txt @@ -6,9 +6,9 @@ cc_library( cc_test( errors_test SRCS errors_test.cc - DEPS errors enforce) + DEPS phi enforce) -set(enforce_deps flags errors flags phi_enforce) +set(enforce_deps phi) if(WITH_GPU) set(enforce_deps ${enforce_deps} external_error_proto) endif() @@ -26,20 +26,20 @@ cc_test( cc_test( cpu_info_test SRCS cpu_info_test.cc - DEPS phi_backends) + DEPS phi) cc_test( os_info_test SRCS os_info_test.cc - DEPS phi_os_info) + DEPS phi) cc_library( place SRCS place.cc - DEPS enforce phi_place) + DEPS enforce phi) cc_test( place_test SRCS place_test.cc - DEPS place glog gflags) + DEPS place glog phi) if(WITH_MKLDNN) set(MKLDNN_CTX_DEPS mkldnn) @@ -104,7 +104,7 @@ endif() cc_library( init SRCS init.cc - DEPS device_context custom_kernel context_pool memcpy) + DEPS device_context phi memcpy) # memcpy depends on device_context, here add deps individually for # avoiding cycle dependencies @@ -117,7 +117,6 @@ cc_library( xxhash ${STREAM_CALLBACK_DEPS} place - phi_place eigen3 cpu_helper framework_proto @@ -126,12 +125,8 @@ cc_library( ${MKLDNN_CTX_DEPS} ${dgc_deps} dlpack - cudnn_workspace_helper - ${XPU_CTX_DEPS} - phi_backends - phi_device_context - generator - phi_enforce) + phi + ${XPU_CTX_DEPS}) cc_library( collective_helper @@ -189,12 +184,12 @@ if(WITH_GPU) cuda_graph_with_memory_pool SRCS cuda_graph_with_memory_pool.cc DEPS ${DEVICE_EVENT_LIBS} device_event_custom_device device_context - allocator phi_backends) + allocator phi) else() nv_library( cuda_graph_with_memory_pool SRCS cuda_graph_with_memory_pool.cc - DEPS ${DEVICE_EVENT_LIBS} device_context allocator phi_backends) + DEPS ${DEVICE_EVENT_LIBS} device_context allocator phi) endif() nv_test( device_context_test @@ -245,7 +240,7 @@ cc_test( cc_library( lodtensor_printer SRCS lodtensor_printer.cc - DEPS ddim + DEPS phi place tensor scope @@ -263,41 +258,30 @@ if(WITH_GPU) nv_library( profiler SRCS profiler.cc profiler.cu - DEPS phi_os_info - phi_device_tracer + DEPS phi gpu_info enforce dynload_cuda new_profiler stats op_proto_maker - shape_inference - phi_profiler) + shape_inference) elseif(WITH_ROCM) hip_library( profiler SRCS profiler.cc profiler.cu - DEPS phi_os_info - phi_device_tracer + DEPS phi gpu_info enforce new_profiler stats op_proto_maker - shape_inference - phi_profiler) + shape_inference) else() cc_library( profiler SRCS profiler.cc - DEPS phi_os_info - phi_device_tracer - enforce - new_profiler - stats - op_proto_maker - shape_inference - phi_profiler) + DEPS phi enforce new_profiler stats op_proto_maker shape_inference) endif() cc_test( @@ -333,7 +317,7 @@ if(WITH_GPU) nv_test( test_limit_gpu_memory SRCS test_limit_gpu_memory.cu - DEPS gpu_info flags) + DEPS gpu_info phi) nv_library( cuda_device_guard SRCS cuda_device_guard.cc @@ -348,7 +332,7 @@ if(WITH_ROCM) hip_test( test_limit_gpu_memory SRCS test_limit_gpu_memory.cu - DEPS gpu_info flags) + DEPS gpu_info phi) hip_library( cuda_device_guard SRCS cuda_device_guard.cc @@ -360,7 +344,7 @@ if(NOT APPLE AND NOT WIN32) cc_test( device_code_test SRCS device_code_test.cc - DEPS phi_backends lod_tensor) + DEPS phi lod_tensor) endif() endif() @@ -382,4 +366,4 @@ cc_library( cc_test( init_phi_test SRCS init_phi_test.cc - DEPS phi_tensor init_phi) + DEPS phi init_phi) diff --git a/paddle/fluid/platform/cpu_helper.cc b/paddle/fluid/platform/cpu_helper.cc index 8a9501c0dc7..af1640cfd9a 100644 --- a/paddle/fluid/platform/cpu_helper.cc +++ b/paddle/fluid/platform/cpu_helper.cc @@ -17,7 +17,7 @@ limitations under the License. */ #ifdef PADDLE_WITH_MKLML #include -#include "paddle/fluid/platform/dynload/mklml.h" +#include "paddle/phi/backends/dynload/mklml.h" #endif #ifdef PADDLE_USE_OPENBLAS @@ -40,7 +40,7 @@ void SetNumThreads(int num_threads) { openblas_set_num_threads(real_num_threads); #elif defined(PADDLE_WITH_MKLML) int real_num_threads = num_threads > 1 ? num_threads : 1; - platform::dynload::MKL_Set_Num_Threads(real_num_threads); + phi::dynload::MKL_Set_Num_Threads(real_num_threads); omp_set_num_threads(real_num_threads); #elif defined(PADDLE_USE_REFERENCE_CBLAS) // cblas not support multi-thread diff --git a/paddle/fluid/platform/device/custom/CMakeLists.txt b/paddle/fluid/platform/device/custom/CMakeLists.txt index 3846111489f..8e081781e29 100644 --- a/paddle/fluid/platform/device/custom/CMakeLists.txt +++ b/paddle/fluid/platform/device/custom/CMakeLists.txt @@ -2,9 +2,9 @@ if(WITH_CUSTOM_DEVICE) cc_library( custom_device_resource_pool SRCS custom_device_resource_pool.cc - DEPS gflags glog enforce monitor) + DEPS phi glog enforce monitor) cc_test( custom_device_test SRCS custom_device_test.cc - DEPS phi_tensor_utils phi_backends phi_device_context gradient_accumulator) + DEPS phi gradient_accumulator) endif() diff --git a/paddle/fluid/platform/device/gpu/CMakeLists.txt b/paddle/fluid/platform/device/gpu/CMakeLists.txt index a6f6bc232e6..897f8d3732b 100644 --- a/paddle/fluid/platform/device/gpu/CMakeLists.txt +++ b/paddle/fluid/platform/device/gpu/CMakeLists.txt @@ -3,13 +3,7 @@ if(WITH_GPU) nv_library( gpu_info SRCS gpu_info.cc - DEPS phi_backends - gflags - glog - enforce - monitor - dynload_cuda - malloc) + DEPS phi glog enforce monitor dynload_cuda malloc) nv_test(cuda_helper_test SRCS cuda_helper_test.cu) nv_test( @@ -21,7 +15,7 @@ elseif(WITH_ROCM) hip_library( gpu_info SRCS gpu_info.cc - DEPS phi_backends gflags glog enforce monitor dynload_cuda) + DEPS phi glog enforce monitor dynload_cuda) hip_test(cuda_helper_test SRCS cuda_helper_test.cu) hip_test( diff --git a/paddle/fluid/platform/device/xpu/CMakeLists.txt b/paddle/fluid/platform/device/xpu/CMakeLists.txt index 70a1c3fc3b0..2f09e25de27 100644 --- a/paddle/fluid/platform/device/xpu/CMakeLists.txt +++ b/paddle/fluid/platform/device/xpu/CMakeLists.txt @@ -14,23 +14,11 @@ set(XPU_CTX_DEPS cc_library( xpu_info SRCS xpu_info.cc - DEPS gflags - glog - enforce - xpulib - device_context - place - phi_backends) + DEPS glog enforce xpulib device_context place phi) cc_library( xpu_op_list SRCS xpu_op_list.cc - DEPS gflags - glog - enforce - xpulib - device_context - op_kernel_type - phi_backends) + DEPS glog enforce xpulib device_context op_kernel_type phi) cc_library( xpu_resource_pool SRCS xpu_resource_pool.cc diff --git a/paddle/fluid/platform/dynload/CMakeLists.txt b/paddle/fluid/platform/dynload/CMakeLists.txt index 3cbbc32b400..976223be354 100644 --- a/paddle/fluid/platform/dynload/CMakeLists.txt +++ b/paddle/fluid/platform/dynload/CMakeLists.txt @@ -1,7 +1,7 @@ cc_library( dynamic_loader SRCS dynamic_loader.cc - DEPS glog gflags enforce phi_dynamic_loader) + DEPS glog enforce phi) list( APPEND @@ -57,26 +57,20 @@ if(WITH_ROCM) hip_library( dynload_cuda SRCS ${HIP_SRCS} - DEPS dynamic_loader phi_dynload_cuda) + DEPS dynamic_loader phi) cc_library( dynload_warpctc SRCS warpctc.cc - DEPS dynamic_loader warpctc phi_dynload_warpctc) + DEPS dynamic_loader warpctc phi) else() nv_library( dynload_cuda SRCS ${CUDA_SRCS} - DEPS dynamic_loader phi_dynload_cuda) + DEPS dynamic_loader phi) cc_library( dynload_warpctc SRCS warpctc.cc - DEPS dynamic_loader warpctc phi_dynload_warpctc) -endif() -if(WITH_MKLML) - cc_library( - dynload_mklml - SRCS mklml.cc - DEPS dynamic_loader mklml phi_dynload_mklml) + DEPS dynamic_loader warpctc phi) endif() # TODO(TJ): add iomp, mkldnn? @@ -86,6 +80,6 @@ if(MKL_FOUND AND WITH_ONEMKL) cc_library( dynload_mklrt SRCS mklrt.cc - DEPS dynamic_loader phi_dynload_mklrt) + DEPS dynamic_loader phi) target_include_directories(dynload_mklrt PRIVATE ${MKL_INCLUDE}) endif() diff --git a/paddle/fluid/platform/dynload/mklml.cc b/paddle/fluid/platform/dynload/mklml.cc deleted file mode 100644 index ff475b2312c..00000000000 --- a/paddle/fluid/platform/dynload/mklml.cc +++ /dev/null @@ -1,32 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/platform/dynload/mklml.h" - -namespace paddle { -namespace platform { -namespace dynload { - -#define DEFINE_WRAP(__name) DynLoad__##__name __name - -MKLML_ROUTINE_EACH(DEFINE_WRAP); - -#if !defined(_WIN32) -DEFINE_WRAP(mkl_scsrmm); -DEFINE_WRAP(mkl_dcsrmm); -#endif - -} // namespace dynload -} // namespace platform -} // namespace paddle diff --git a/paddle/fluid/platform/dynload/mklml.h b/paddle/fluid/platform/dynload/mklml.h deleted file mode 100644 index 78cae9a0821..00000000000 --- a/paddle/fluid/platform/dynload/mklml.h +++ /dev/null @@ -1,113 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include - -#include // NOLINT - -#include "paddle/phi/backends/dynload/mklml.h" - -namespace paddle { -namespace platform { -namespace dynload { - -/** - * The following macro definition can generate structs - * (for each function) to dynamic load mklml routine - * via operator overloading. - */ -#define DYNAMIC_LOAD_MKLML_WRAP(__name) \ - using DynLoad__##__name = phi::dynload::DynLoad__##__name; \ - extern DynLoad__##__name __name - -#define PLATFORM_DECLARE_DYNAMIC_LOAD_MKLML_WRAP(__name) \ - DYNAMIC_LOAD_MKLML_WRAP(__name) - -#define MKLML_ROUTINE_EACH(__macro) \ - __macro(cblas_sgemm); \ - __macro(cblas_dgemm); \ - __macro(cblas_cgemm); \ - __macro(cblas_zgemm); \ - __macro(cblas_saxpy); \ - __macro(cblas_daxpy); \ - __macro(cblas_caxpy); \ - __macro(cblas_zaxpy); \ - __macro(cblas_scopy); \ - __macro(cblas_dcopy); \ - __macro(cblas_ccopy); \ - __macro(cblas_zcopy); \ - __macro(cblas_sgemv); \ - __macro(cblas_dgemv); \ - __macro(cblas_cgemv); \ - __macro(cblas_zgemv); \ - __macro(cblas_strsm); \ - __macro(cblas_dtrsm); \ - __macro(cblas_ctrsm); \ - __macro(cblas_ztrsm); \ - __macro(cblas_sgemm_alloc); \ - __macro(cblas_dgemm_alloc); \ - __macro(cblas_sgemm_pack); \ - __macro(cblas_dgemm_pack); \ - __macro(cblas_sgemm_compute); \ - __macro(cblas_dgemm_compute); \ - __macro(cblas_sgemm_free); \ - __macro(cblas_dgemm_free); \ - __macro(cblas_sgemm_batch); \ - __macro(cblas_dgemm_batch); \ - __macro(cblas_cgemm_batch); \ - __macro(cblas_zgemm_batch); \ - __macro(cblas_sdot); \ - __macro(cblas_ddot); \ - __macro(cblas_sasum); \ - __macro(cblas_dasum); \ - __macro(cblas_isamax); \ - __macro(cblas_idamax); \ - __macro(cblas_sscal); \ - __macro(cblas_dscal); \ - __macro(vsAdd); \ - __macro(vdAdd); \ - __macro(vsSub); \ - __macro(vdSub); \ - __macro(vsMul); \ - __macro(vdMul); \ - __macro(vsDiv); \ - __macro(vdDiv); \ - __macro(vsExp); \ - __macro(vdExp); \ - __macro(vsSqr); \ - __macro(vdSqr); \ - __macro(vsPowx); \ - __macro(vdPowx); \ - __macro(vsInv); \ - __macro(vdInv); \ - __macro(vmsErf); \ - __macro(vmdErf); \ - __macro(MKL_Free_Buffers); \ - __macro(MKL_Set_Num_Threads); \ - __macro(MKL_Get_Max_Threads); - -MKLML_ROUTINE_EACH(PLATFORM_DECLARE_DYNAMIC_LOAD_MKLML_WRAP); - -#if !defined(_WIN32) -DYNAMIC_LOAD_MKLML_WRAP(mkl_scsrmm); -DYNAMIC_LOAD_MKLML_WRAP(mkl_dcsrmm); -#endif - -#undef DYNAMIC_LOAD_MKLML_WRAP - -} // namespace dynload -} // namespace platform -} // namespace paddle diff --git a/paddle/fluid/platform/profiler.cc b/paddle/fluid/platform/profiler.cc index cd8e8ea350f..2c65023988d 100644 --- a/paddle/fluid/platform/profiler.cc +++ b/paddle/fluid/platform/profiler.cc @@ -40,6 +40,22 @@ PADDLE_DEFINE_EXPORTED_bool(enable_rpc_profiler, DEFINE_bool(enable_record_memory, false, "enable memory recorder"); +#if defined(_WIN32) && defined(PHI_SHARED) +phi::ProfilerState phi::ProfilerHelper::g_state = phi::ProfilerState::kDisabled; +bool phi::ProfilerHelper::g_enable_nvprof_hook = false; +thread_local uint64_t phi::ProfilerHelper::g_thread_id; +uint32_t phi::ProfilerHelper::g_next_thread_id = 0; +std::mutex phi::ProfilerHelper::g_all_event_lists_mutex; +std::list>> + phi::ProfilerHelper::g_all_event_lists; +thread_local std::shared_ptr> + phi::ProfilerHelper::g_event_list; +std::list>> + phi::ProfilerHelper::g_all_mem_event_lists; +thread_local std::shared_ptr> + phi::ProfilerHelper::g_mem_event_list; +std::mutex phi::ProfilerHelper::g_all_mem_event_lists_mutex; +#endif namespace paddle { namespace platform { diff --git a/paddle/fluid/platform/profiler/CMakeLists.txt b/paddle/fluid/platform/profiler/CMakeLists.txt index 66f07791ad0..df5b9818d69 100644 --- a/paddle/fluid/platform/profiler/CMakeLists.txt +++ b/paddle/fluid/platform/profiler/CMakeLists.txt @@ -1,7 +1,7 @@ cc_library( host_tracer SRCS host_tracer.cc - DEPS framework_proto enforce ddim var_type_traits) + DEPS framework_proto enforce phi var_type_traits) cc_library( cuda_tracer SRCS cuda_tracer.cc cupti_data_process.cc @@ -28,7 +28,7 @@ cc_library( cc_library( cpu_utilization SRCS cpu_utilization.cc - DEPS phi_backends phi_os_info enforce glog) + DEPS phi enforce glog) cc_library( new_profiler SRCS profiler.cc diff --git a/paddle/fluid/pybind/CMakeLists.txt b/paddle/fluid/pybind/CMakeLists.txt index bb0c614ba03..382b9d24aaa 100755 --- a/paddle/fluid/pybind/CMakeLists.txt +++ b/paddle/fluid/pybind/CMakeLists.txt @@ -28,7 +28,6 @@ set(PYBIND_DEPS gloo_wrapper infer_io_utils heter_wrapper - generator op_version_registry ps_gpu_wrapper custom_operator @@ -37,16 +36,13 @@ set(PYBIND_DEPS fleet_executor global_utils phi_utils - tcp_store - comm_context_manager + phi new_profiler - auto_parallel jit_layer jit_property prim_utils - operants_manager - phi_tensor_operants - static_tensor_operants) + static_tensor_operants + type_info) if(WITH_PSCORE) set(PYBIND_DEPS ${PYBIND_DEPS} ps_service) @@ -65,7 +61,7 @@ if(WITH_RPC) zlib leveldb snappy - gflags + phi glog) endif() if(WITH_GPU OR WITH_ROCM) @@ -148,7 +144,6 @@ set(PYBIND_SRCS auto_parallel_py.cc) if(WITH_CUSTOM_DEVICE) - set(PYBIND_DEPS ${PYBIND_DEPS} phi_capi) set(PYBIND_DEPS ${PYBIND_DEPS} custom_device_common_op_registry) endif() @@ -334,6 +329,14 @@ if(WITH_PYTHON) ")\n" "exit /b 0") + if(WITH_PHI_SHARED) + add_custom_command( + OUTPUT ${op_impl_path}/phi.dll + COMMAND ${CMAKE_COMMAND} -E copy ${PHI_LIB} ${op_impl_path} + DEPENDS phi) + list(APPEND EAGER_OP_IMPL_DEPS ${op_impl_path}/libiomp5md.dll) + endif() + if(${CBLAS_PROVIDER} STREQUAL MKLML) add_custom_command( OUTPUT ${op_impl_path}/libiomp5md.dll @@ -481,10 +484,8 @@ if(WITH_PYTHON) list(APPEND PYBIND_DEPS python) list(APPEND PYBIND_DEPS custom_operator) list(APPEND PYBIND_DEPS custom_operator_node) - list(APPEND PYBIND_DEPS tensor_api) list(APPEND PYBIND_DEPS eager_tensor_operants) list(APPEND PYBIND_DEPS pybind_util) - list(APPEND PYBIND_DEPS flags) endif() # On Linux, cc_library(paddle SHARED ..) will generate the libpaddle.so, diff --git a/paddle/fluid/pybind/eager_utils.cc b/paddle/fluid/pybind/eager_utils.cc index a78831efc3b..3f49622bd04 100644 --- a/paddle/fluid/pybind/eager_utils.cc +++ b/paddle/fluid/pybind/eager_utils.cc @@ -38,7 +38,9 @@ limitations under the License. */ #include "paddle/phi/common/data_type.h" #include "paddle/phi/core/compat/convert_utils.h" #include "paddle/phi/core/dense_tensor.h" -DECLARE_bool(check_nan_inf); +#include "paddle/phi/core/flags.h" + +PHI_DECLARE_bool(check_nan_inf); namespace paddle { namespace pybind { diff --git a/paddle/phi/CMakeLists.txt b/paddle/phi/CMakeLists.txt index f14f4ee9880..2395e024090 100644 --- a/paddle/phi/CMakeLists.txt +++ b/paddle/phi/CMakeLists.txt @@ -3,6 +3,15 @@ configure_file(config.h.in ${CMAKE_CURRENT_SOURCE_DIR}/config.h) # phi auto cmake utils include(phi) +set(common_srcs CACHE INTERNAL "" FORCE) +set(api_srcs CACHE INTERNAL "" FORCE) +set(capi_srcs CACHE INTERNAL "" FORCE) +set(core_srcs CACHE INTERNAL "" FORCE) +set(backends_srcs CACHE INTERNAL "" FORCE) +set(kernels_srcs CACHE INTERNAL "" FORCE) +set(infermeta_srcs CACHE INTERNAL "" FORCE) +#set(excluded_srcs CACHE INTERNAL "" FORCE) + # paddle experimental common components add_subdirectory(common) @@ -24,29 +33,153 @@ if(WITH_CUSTOM_DEVICE) add_subdirectory(capi) endif() -# make an unity target for compile deps set(PHI_DEPS - convert_utils - dense_tensor - phi_backends - kernel_factory - kernel_context - arg_map_context - infermeta - lod_utils - sparse_csr_tensor - sparse_coo_tensor - string_tensor - api_scalar - api_int_array - extended_tensor - dist_attr - dist_mapper) - -get_property(phi_kernels GLOBAL PROPERTY PHI_KERNELS) -set(PHI_DEPS ${PHI_DEPS} ${phi_kernels}) - -cc_library(phi DEPS ${PHI_DEPS}) + phi_profiler_proto + auto_parallel_proto + gflags + glog + warpctc + warprnnt + eigen3 + xxhash + cblas + utf8proc) + +if(WITH_GPU) + list(APPEND PHI_DEPS external_error_proto) +endif() + +if(WITH_ASCEND_CL) + list(APPEND PHI_DEPS npu_hccl) +endif() + +if(WITH_FLASHATTN) + list(APPEND PHI_DEPS flashattn) +endif() + +if(WITH_XBYAK) + list(APPEND PHI_DEPS xbyak) +endif() + +if(WITH_MKLDNN) + list(APPEND PHI_DEPS mkldnn) +endif() + +if(WITH_GLOO) + list(APPEND PHI_DEPS gloo) +endif() + +if(WITH_CUDNN_FRONTEND) + list(APPEND PHI_DEPS cudnn-frontend) +endif() + +if(WITH_POCKETFFT) + list(APPEND PHI_DEPS pocketfft) +endif() + +if(WITH_MKLML) + list(APPEND PHI_DEPS pocketfft dynload_mklml) +endif() + +if(WITH_XPU) + list(APPEND PHI_DEPS xpulib) +endif() + +set(PHI_SRCS + ${common_srcs} + ${api_srcs} + ${core_srcs} + ${backends_srcs} + ${kernels_srcs} + ${infermeta_srcs} + ${capi_srcs}) + +if(WITH_PHI_SHARED) + set(PHI_BUILD_TYPE + SHARED + CACHE INTERNAL "" FORCE) +else() + set(PHI_BUILD_TYPE + STATIC + CACHE INTERNAL "" FORCE) +endif() + +if(WITH_GPU) + add_definitions(-DCUDA_REAL_ARCHS=${NVCC_FLAGS_EXTRA_real_archs} + )# for backends/gpu/gpu_resources.cc + nv_library( + phi ${PHI_BUILD_TYPE} + SRCS ${PHI_SRCS} + DEPS ${PHI_DEPS}) +elseif(WITH_ROCM) + hip_add_library(phi ${PHI_BUILD_TYPE} ${PHI_SRCS}) + target_link_libraries(phi ${PHI_DEPS}) +elseif(WITH_XPU_KP) + xpu_library( + phi ${PHI_BUILD_TYPE} + SRCS ${PHI_SRCS} + DEPS ${PHI_DEPS}) +else() + cc_library( + phi ${PHI_BUILD_TYPE} + SRCS ${PHI_SRCS} + DEPS ${PHI_DEPS}) +endif() + +if(WIN32) + target_link_libraries(phi shlwapi.lib) +endif() + +if(WIN32) + if(WITH_PHI_SHARED) + set_property(TARGET phi PROPERTY WINDOWS_EXPORT_ALL_SYMBOLS ON) + set(PHI_NAME + phi.dll + CACHE INTERNAL "" FORCE) + else() + set(PHI_NAME + phi.lib + CACHE INTERNAL "" FORCE) + endif() +elseif(APPLE) + if(WITH_PHI_SHARED) + set(PHI_NAME + libphi.dylib + CACHE INTERNAL "" FORCE) + else() + set(PHI_NAME + libphi.a + CACHE INTERNAL "" FORCE) + endif() +else() + if(WITH_PHI_SHARED) + set(PHI_NAME + libphi.so + CACHE INTERNAL "" FORCE) + else() + set(PHI_NAME + libphi.a + CACHE INTERNAL "" FORCE) + endif() +endif() + +set(PHI_LIB + "${CMAKE_CURRENT_BINARY_DIR}/${PHI_NAME}" + CACHE FILEPATH "PHI Library" FORCE) + +if(MKL_FOUND AND WITH_ONEMKL) + target_include_directories(phi PRIVATE ${MKL_INCLUDE}) +endif() + +add_dependencies(phi extern_lapack) +if(WITH_CUTLASS) + add_dependencies(phi cutlass_codegen) + add_definitions("-DPADDLE_WITH_MEMORY_EFFICIENT_ATTENTION" + )# for memory_efficient_attention.h +endif() +if(WITH_FLASHATTN) + add_dependencies(phi flashattn) +endif() set(phi_extension_header_file ${CMAKE_CURRENT_SOURCE_DIR}/extension.h diff --git a/paddle/phi/api/CMakeLists.txt b/paddle/phi/api/CMakeLists.txt index 854c2d2fbfc..1827dfbeb7f 100644 --- a/paddle/phi/api/CMakeLists.txt +++ b/paddle/phi/api/CMakeLists.txt @@ -1,7 +1,2 @@ add_subdirectory(profiler) add_subdirectory(lib) -cc_library( - phi_api - SRCS all.cc - DEPS phi_function_api phi_bw_function_api sparse_api sparse_bw_api - strings_api) diff --git a/paddle/phi/api/all.cc b/paddle/phi/api/all.cc deleted file mode 100644 index 20f3a492f71..00000000000 --- a/paddle/phi/api/all.cc +++ /dev/null @@ -1,19 +0,0 @@ -/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/phi/api/all.h" - -namespace paddle { -namespace experimental {} // namespace experimental -} // namespace paddle diff --git a/paddle/phi/api/ext/op_meta_info.h b/paddle/phi/api/ext/op_meta_info.h index 21a433df4b8..73a784a6eb9 100644 --- a/paddle/phi/api/ext/op_meta_info.h +++ b/paddle/phi/api/ext/op_meta_info.h @@ -112,9 +112,7 @@ class PADDLE_API CustomOpKernelContext { void EmplaceBackOutput(Tensor&& output); void EmplaceBackOutputs(const std::vector& outputs); void EmplaceBackAttr(paddle::any attr); - void EmplaceBackAttrs(const std::vector& attrs) { - attrs_ = std::move(attrs); - } + void EmplaceBackAttrs(const std::vector& attrs); const std::pair& InputRangeAt(size_t idx) const; const std::pair& OutputRangeAt(size_t idx) const; @@ -125,13 +123,9 @@ class PADDLE_API CustomOpKernelContext { paddle::optional OptionalInputAt(size_t idx); paddle::optional> OptionalInputsBetween(size_t start, size_t end); - const std::vector& Attrs() const { return attrs_; } - const std::vector>& InputRange() { - return input_range_; - } - const std::vector>& OutputRange() { - return output_range_; - } + const std::vector& Attrs() const; + const std::vector>& InputRange(); + const std::vector>& OutputRange(); Tensor* MutableOutputAt(size_t idx); std::vector MutableOutputBetween(size_t start, size_t end); std::vector OutputsBetween(size_t start, size_t end); @@ -811,38 +805,20 @@ class PADDLE_API OpMetaInfo { //////////////// Op Meta Info Helper ///////////////// class OpMetaInfoHelper { public: - static const std::string& GetOpName(const paddle::OpMetaInfo& info) { - return info.name_; - } + static const std::string& GetOpName(const paddle::OpMetaInfo& info); static const std::vector& GetInputs( - const paddle::OpMetaInfo& info) { - return info.inputs_; - } + const paddle::OpMetaInfo& info); static const std::vector& GetOutputs( - const paddle::OpMetaInfo& info) { - return info.outputs_; - } + const paddle::OpMetaInfo& info); static const std::vector& GetAttrs( - const paddle::OpMetaInfo& info) { - return info.attrs_; - } + const paddle::OpMetaInfo& info); static const std::unordered_map& GetInplaceMap( - const paddle::OpMetaInfo& info) { - return info.inplace_map_; - } + const paddle::OpMetaInfo& info); static const std::unordered_map& - GetInplaceReverseMap(const paddle::OpMetaInfo& info) { - return info.inplace_reverse_map_; - } - static const KernelFunc& GetKernelFn(const paddle::OpMetaInfo& info) { - return info.kernel_fn_; - } - static const InferShapeFunc& GetInferShapeFn(const paddle::OpMetaInfo& info) { - return info.infer_shape_fn_; - } - static const InferDtypeFunc& GetInferDtypeFn(const paddle::OpMetaInfo& info) { - return info.infer_dtype_fn_; - } + GetInplaceReverseMap(const paddle::OpMetaInfo& info); + static const KernelFunc& GetKernelFn(const paddle::OpMetaInfo& info); + static const InferShapeFunc& GetInferShapeFn(const paddle::OpMetaInfo& info); + static const InferDtypeFunc& GetInferDtypeFn(const paddle::OpMetaInfo& info); }; //////////////// Op Meta Info Map ///////////////// diff --git a/paddle/phi/api/include/tensor.h b/paddle/phi/api/include/tensor.h index dd1c1637acf..b68db1f45fa 100644 --- a/paddle/phi/api/include/tensor.h +++ b/paddle/phi/api/include/tensor.h @@ -410,7 +410,7 @@ class PADDLE_API Tensor final { * * @return const std::string& */ - const std::string& name() const { return name_; } + const std::string& name() const; /** * @brief Set name of Tensor. @@ -419,7 +419,7 @@ class PADDLE_API Tensor final { * * @param const std::string& name */ - void set_name(const std::string& name) { name_ = name; } + void set_name(const std::string& name); /* Part 5: Data Transform methods */ /* Alert!!!!: All copy method can only deep copy impl, autograd info only be diff --git a/paddle/phi/api/lib/CMakeLists.txt b/paddle/phi/api/lib/CMakeLists.txt index 03b75ee6760..1bf3883b083 100644 --- a/paddle/phi/api/lib/CMakeLists.txt +++ b/paddle/phi/api/lib/CMakeLists.txt @@ -1,38 +1,3 @@ -if(WITH_GPU) - nv_library( - phi_tensor_raw - SRCS tensor.cc - DEPS tensor_base - dense_tensor - phi_enforce - context_pool - tensor_api - int_array - scalar) -elseif(WITH_ROCM) - hip_library( - phi_tensor_raw - SRCS tensor.cc - DEPS tensor_base - dense_tensor - phi_enforce - context_pool - tensor_api - int_array - scalar) -else() - cc_library( - phi_tensor_raw - SRCS tensor.cc - DEPS tensor_base - dense_tensor - phi_enforce - context_pool - tensor_api - int_array - scalar) -endif() - set(api_gen_base ${CMAKE_SOURCE_DIR}/paddle/phi/api/yaml/generator/api_base.py) # forward api file @@ -157,157 +122,77 @@ if(NOT PYTHONINTERP_FOUND) find_package(PythonInterp REQUIRED) endif() +execute_process(COMMAND ${PYTHON_EXECUTABLE} -m pip install pyyaml) + # generate forward api -add_custom_command( - OUTPUT ${api_header_file} ${api_source_file} - COMMAND ${PYTHON_EXECUTABLE} -m pip install pyyaml +execute_process( COMMAND ${PYTHON_EXECUTABLE} ${api_gen_file} --api_yaml_path ${api_yaml_file} ${legacy_api_yaml_file} --api_header_path ${api_header_file_tmp} - --api_source_path ${api_source_file_tmp} - COMMAND ${CMAKE_COMMAND} -E copy_if_different ${api_header_file_tmp} - ${api_header_file} - COMMAND ${CMAKE_COMMAND} -E copy_if_different ${api_source_file_tmp} - ${api_source_file} - COMMENT "copy_if_different ${api_header_file} ${api_source_file}" - DEPENDS ${api_yaml_file} ${legacy_api_yaml_file} ${api_gen_file} - ${api_gen_base} - VERBATIM) + --api_source_path ${api_source_file_tmp}) # generate backward api -add_custom_command( - OUTPUT ${bw_api_header_file} ${bw_api_source_file} ${bw_api_header_file_tmp} - ${bw_api_source_file_tmp} +execute_process( COMMAND ${PYTHON_EXECUTABLE} ${bw_api_gen_file} --backward_yaml_path ${bw_api_yaml_file} ${legacy_bw_api_yaml_file} --backward_header_path - ${bw_api_header_file_tmp} --backward_source_path ${bw_api_source_file_tmp} - COMMAND ${CMAKE_COMMAND} -E copy_if_different ${bw_api_header_file_tmp} - ${bw_api_header_file} - COMMAND ${CMAKE_COMMAND} -E copy_if_different ${bw_api_source_file_tmp} - ${bw_api_source_file} - COMMENT "copy_if_different ${bw_api_header_file} ${bw_api_source_file}" - DEPENDS ${bw_api_yaml_file} ${bw_api_gen_file} ${api_gen_base} - ${legacy_bw_api_yaml_file} - VERBATIM) + ${bw_api_header_file_tmp} --backward_source_path ${bw_api_source_file_tmp}) # generate fused_op api -add_custom_command( - OUTPUT ${fused_api_header_file} ${fused_api_source_file} - COMMAND ${PYTHON_EXECUTABLE} -m pip install pyyaml +execute_process( COMMAND ${PYTHON_EXECUTABLE} ${api_gen_file} --api_yaml_path ${fused_api_yaml_file} --is_fused_ops_yaml --api_header_path ${fused_api_header_file_tmp} - --api_source_path ${fused_api_source_file_tmp} - COMMAND ${CMAKE_COMMAND} -E copy_if_different ${fused_api_header_file_tmp} - ${fused_api_header_file} - COMMAND ${CMAKE_COMMAND} -E copy_if_different ${fused_api_source_file_tmp} - ${fused_api_source_file} - COMMENT "copy_if_different ${fused_api_header_file} ${fused_api_source_file}" - DEPENDS ${fused_api_yaml_file} ${api_gen_file} ${api_gen_base} - VERBATIM) + --api_source_path ${fused_api_source_file_tmp}) # generate fused_op backward api -add_custom_command( - OUTPUT ${fused_bw_api_header_file} ${fused_bw_api_source_file} - ${fused_bw_api_header_file_tmp} ${fused_bw_api_source_file_tmp} +execute_process( COMMAND ${PYTHON_EXECUTABLE} ${fused_bw_api_gen_file} --backward_yaml_path ${fused_bw_api_yaml_file} --is_fused_backward_yaml --backward_header_path ${fused_bw_api_header_file_tmp} --backward_source_path - ${fused_bw_api_source_file_tmp} - COMMAND ${CMAKE_COMMAND} -E copy_if_different ${fused_bw_api_header_file_tmp} - ${fused_bw_api_header_file} - COMMAND ${CMAKE_COMMAND} -E copy_if_different ${fused_bw_api_source_file_tmp} - ${fused_bw_api_source_file} - COMMENT - "copy_if_different ${fused_bw_api_header_file} ${fused_bw_api_source_file}" - DEPENDS ${fused_bw_api_yaml_file} ${bw_api_gen_file} ${api_gen_base} - VERBATIM) + ${fused_bw_api_source_file_tmp}) # generate sparse api -add_custom_command( - OUTPUT ${sparse_api_header_file} ${sparse_api_source_file} +execute_process( COMMAND ${PYTHON_EXECUTABLE} ${sparse_api_gen_file} --api_yaml_path ${sparse_api_yaml_file} --api_header_path ${sparse_api_header_file_tmp} - --api_source_path ${sparse_api_source_file_tmp} - COMMAND ${CMAKE_COMMAND} -E copy_if_different ${sparse_api_header_file_tmp} - ${sparse_api_header_file} - COMMAND ${CMAKE_COMMAND} -E copy_if_different ${sparse_api_source_file_tmp} - ${sparse_api_source_file} - COMMENT - "copy_if_different ${sparse_api_header_file} ${sparse_sparse_api_source_file}" - DEPENDS ${sparse_api_yaml_file} ${sparse_api_gen_file} ${api_gen_base} - ${api_gen_file} - VERBATIM) + --api_source_path ${sparse_api_source_file_tmp}) # generate backward sparse api -add_custom_command( - OUTPUT ${sparse_bw_api_header_file} ${sparse_bw_api_source_file} +execute_process( COMMAND ${PYTHON_EXECUTABLE} ${sparse_bw_api_gen_file} --api_yaml_path ${sparse_bw_api_yaml_file} --api_header_path ${sparse_bw_api_header_file_tmp} --api_source_path - ${sparse_bw_api_source_file_tmp} - COMMAND ${CMAKE_COMMAND} -E copy_if_different ${sparse_bw_api_header_file_tmp} - ${sparse_bw_api_header_file} - COMMAND ${CMAKE_COMMAND} -E copy_if_different ${sparse_bw_api_source_file_tmp} - ${sparse_bw_api_source_file} - COMMENT - "copy_if_different ${sparse_bw_api_header_file} ${sparse_bw_sparse_api_source_file}" - DEPENDS ${sparse_bw_api_yaml_file} ${sparse_bw_api_gen_file} ${api_gen_base} - ${api_gen_file} ${sparse_api_gen_file} ${bw_api_gen_file} - VERBATIM) + ${sparse_bw_api_source_file_tmp}) # generate strings api -add_custom_command( - OUTPUT ${strings_api_header_file} ${strings_api_source_file} +execute_process( COMMAND ${PYTHON_EXECUTABLE} ${strings_api_gen_file} --api_yaml_path ${strings_api_yaml_file} --api_header_path ${strings_api_header_file_tmp} - --api_source_path ${strings_api_source_file_tmp} - COMMAND ${CMAKE_COMMAND} -E copy_if_different ${strings_api_header_file_tmp} - ${strings_api_header_file} - COMMAND ${CMAKE_COMMAND} -E copy_if_different ${strings_api_source_file_tmp} - ${strings_api_source_file} - COMMENT - "copy_if_different ${strings_api_header_file} ${strings_strings_api_source_file}" - DEPENDS ${strings_api_yaml_file} ${strings_api_gen_file} ${api_gen_base} - ${api_gen_file} - VERBATIM) + --api_source_path ${strings_api_source_file_tmp}) # generate dygraph(intermediate) api -add_custom_command( - OUTPUT ${dygraph_api_header_file} ${dygraph_api_source_file} +execute_process( COMMAND ${PYTHON_EXECUTABLE} ${im_api_gen_file} --api_yaml_path ${api_yaml_file} ${legacy_api_yaml_file} --sparse_api_yaml_path ${sparse_api_yaml_file} --dygraph_api_header_path ${dygraph_api_header_file_tmp} - --dygraph_api_source_path ${dygraph_api_source_file_tmp} - COMMAND ${CMAKE_COMMAND} -E copy_if_different ${dygraph_api_header_file_tmp} - ${dygraph_api_header_file} - COMMAND ${CMAKE_COMMAND} -E copy_if_different ${dygraph_api_source_file_tmp} - ${dygraph_api_source_file} - DEPENDS ${api_yaml_file} ${legacy_api_yaml_file} ${sparse_api_yaml_file} - ${im_api_gen_file} ${api_gen_base} ${api_gen_file} - VERBATIM) + --dygraph_api_source_path ${dygraph_api_source_file_tmp}) # generate wrapped infermeta -add_custom_command( - OUTPUT ${wrapped_infermeta_header_file} ${wrapped_infermeta_source_file} +execute_process( COMMAND ${PYTHON_EXECUTABLE} ${wrapped_infermeta_gen_file} --api_yaml_path ${api_yaml_file} ${legacy_api_yaml_file} --wrapped_infermeta_header_path ${wrapped_infermeta_header_file} --wrapped_infermeta_source_path - ${wrapped_infermeta_source_file} - DEPENDS ${api_yaml_file} ${legacy_api_yaml_file} ${wrapped_infermeta_gen_file} - ${api_gen_base} - VERBATIM) + ${wrapped_infermeta_source_file}) # generate tensor and tensor operants file message("create or copy auto-geneated tensor files") -execute_process(COMMAND ${PYTHON_EXECUTABLE} -m pip install pyyaml) execute_process( WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/paddle/phi/api/yaml/generator COMMAND @@ -324,154 +209,70 @@ if(${_result}) message(FATAL_ERROR "tensor codegen failed, exiting.") endif() -set(generated_tensor_files - "${operants_base_file}" "${tensor_api_source_file}" - "${phi_tensor_operants_header_file}" "${phi_tensor_operants_source_file}" - "${operants_manager_header_file}" "${operants_manager_source_file}") +set(generated_files + "${operants_base_file}" + "${tensor_api_source_file}" + "${phi_tensor_operants_header_file}" + "${phi_tensor_operants_source_file}" + "${operants_manager_header_file}" + "${operants_manager_source_file}" + "${wrapped_infermeta_source_file}" + "${api_source_file}" + "${api_header_file}" + "${bw_api_source_file}" + "${bw_api_header_file}" + "${fused_api_source_file}" + "${fused_api_header_file}" + "${fused_bw_api_source_file}" + "${fused_bw_api_header_file}" + "${sparse_api_source_file}" + "${sparse_api_header_file}" + "${sparse_bw_api_source_file}" + "${sparse_bw_api_header_file}" + "${dygraph_api_source_file}" + "${dygraph_api_header_file}" + "${strings_api_source_file}" + "${strings_api_header_file}") -foreach(generated_tensor_file ${generated_tensor_files}) - if(EXISTS "${generated_tensor_file}.tmp" AND EXISTS - "${generated_tensor_file}") - execute_process( - COMMAND ${CMAKE_COMMAND} -E copy_if_different - "${generated_tensor_file}.tmp" "${generated_tensor_file}") - message( - "copy if different ${generated_tensor_file}.tmp ${generated_tensor_file}") - elseif(EXISTS "${generated_tensor_file}.tmp") - execute_process( - COMMAND ${CMAKE_COMMAND} -E copy "${generated_tensor_file}.tmp" - "${generated_tensor_file}") - message("copy ${generated_tensor_file}.tmp ${generated_tensor_file}") +foreach(generated_file ${generated_files}) + if(EXISTS "${generated_file}.tmp" AND EXISTS "${generated_file}") + execute_process(COMMAND ${CMAKE_COMMAND} -E copy_if_different + "${generated_file}.tmp" "${generated_file}") + message("copy if different ${generated_file}.tmp ${generated_file}") + elseif(EXISTS "${generated_file}.tmp") + execute_process(COMMAND ${CMAKE_COMMAND} -E copy "${generated_file}.tmp" + "${generated_file}") + message("copy ${generated_file}.tmp ${generated_file}") endif() endforeach() -cc_library( - op_meta_info - SRCS op_meta_info.cc - DEPS phi_tensor_raw) -cc_library( - wrapped_infermeta - SRCS ${wrapped_infermeta_source_file} - DEPS phi) -cc_library( - context_pool - SRCS context_pool.cc - DEPS phi_backends phi_enforce place init phi_device_context) -cc_library( - api_tensor_utils - SRCS tensor_utils.cc - DEPS phi_tensor_raw) - -cc_library( - kernel_dispatch - SRCS kernel_dispatch.cc - DEPS phi_tensor_raw phi_backends kernel_factory context_pool) -cc_library( - api_gen_utils - SRCS api_gen_utils.cc - DEPS phi_tensor_raw selected_rows sparse_csr_tensor sparse_coo_tensor - infermeta_utils) -cc_library( - phi_data_transform - SRCS data_transform.cc - DEPS phi_tensor_raw phi tensor) -cc_library( - api_custom_impl - SRCS api_custom_impl.cc - DEPS phi_tensor_raw - phi - kernel_dispatch - api_gen_utils - backward_infermeta - phi_data_transform - phi_profiler) -cc_library( - phi_function_api - SRCS ${api_source_file} ${fused_api_source_file} - DEPS phi_tensor_raw - phi - kernel_dispatch - api_gen_utils - phi_data_transform - api_custom_impl - api_tensor_utils - phi_profiler) -cc_library( - phi_bw_function_api - SRCS ${bw_api_source_file} ${fused_bw_api_source_file} - DEPS phi_tensor_raw - phi - kernel_dispatch - api_gen_utils - backward_infermeta - sparse_backward_infermeta - phi_data_transform - phi_function_api - api_custom_impl - global_utils - phi_profiler) -cc_library( - sparse_api - SRCS ${sparse_api_source_file} - DEPS phi_tensor_raw phi kernel_dispatch api_gen_utils phi_profiler) -cc_library( - sparse_bw_api - SRCS ${sparse_bw_api_source_file} - DEPS phi_tensor_raw - phi - kernel_dispatch - api_gen_utils - sparse_api - sparse_backward_infermeta - phi_profiler) -cc_library( - phi_dygraph_api - SRCS ${dygraph_api_source_file} - DEPS phi_tensor_raw - phi - kernel_dispatch - api_gen_utils - phi_data_transform - phi_function_api - sparse_api - phi_profiler) -cc_library( - strings_api - SRCS ${strings_api_source_file} - DEPS phi_tensor_raw phi kernel_dispatch api_gen_utils phi_profiler) -cc_library( - phi_tensor - SRCS tensor_method.cc - DEPS phi_tensor_raw - phi_function_api - api_gen_utils - kernel_dispatch - infermeta - sparse_infermeta - sparse_api - strings_api) -cc_library( - tensor_copy - SRCS tensor_copy.cc - DEPS phi_tensor_raw phi kernel_dispatch api_gen_utils) -cc_library( - api_scalar - SRCS scalar.cc - DEPS tensor_copy) -cc_library( - api_int_array - SRCS int_array.cc - DEPS tensor_copy) - -cc_library( - phi_tensor_operants - SRCS ${phi_tensor_operants_source_file} - DEPS phi_function_api) -cc_library( - operants_manager - SRCS ${operants_manager_source_file} - DEPS phi_enforce) -cc_library( - tensor_api - SRCS ${tensor_api_source_file} - DEPS operants_manager) +collect_srcs( + api_srcs + SRCS + tensor.cc + op_meta_info.cc + context_pool.cc + tensor_utils.cc + kernel_dispatch.cc + api_gen_utils.cc + data_transform.cc + api_custom_impl.cc + tensor_method.cc + tensor_copy.cc + scalar.cc + int_array.cc) +collect_generated_srcs( + api_srcs + SRCS + ${wrapped_infermeta_source_file} + ${api_source_file} + ${bw_api_source_file} + ${fused_api_source_file} + ${fused_bw_api_source_file} + ${sparse_api_source_file} + ${sparse_bw_api_source_file} + ${dygraph_api_source_file} + ${strings_api_source_file} + ${phi_tensor_operants_source_file} + ${operants_manager_source_file} + ${tensor_api_source_file}) diff --git a/paddle/phi/api/lib/context_pool.cc b/paddle/phi/api/lib/context_pool.cc index 6b5b7790e74..292bd8a7e47 100644 --- a/paddle/phi/api/lib/context_pool.cc +++ b/paddle/phi/api/lib/context_pool.cc @@ -65,11 +65,12 @@ PADDLE_API phi::Allocator* GetAllocator(const phi::Place& place) { #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) PADDLE_API phi::CUDAStream* GetCurrentCUDAStream(const phi::Place& place) { - PADDLE_ENFORCE(place.GetType() == phi::AllocationType::GPU, - phi::errors::InvalidArgument( - "GetCurrentCUDAStream only supports GPUPlace input. " - "However, your input is place=%s", - place)); + PADDLE_ENFORCE_EQ(place.GetType(), + phi::AllocationType::GPU, + phi::errors::InvalidArgument( + "GetCurrentCUDAStream only supports GPUPlace input. " + "However, your input is place=%s", + place)); auto& pool = paddle::experimental::DeviceContextPool::Instance(); const phi::GPUContext* dev_ctx = diff --git a/paddle/phi/api/lib/op_meta_info.cc b/paddle/phi/api/lib/op_meta_info.cc index e1221969cf2..90335269536 100644 --- a/paddle/phi/api/lib/op_meta_info.cc +++ b/paddle/phi/api/lib/op_meta_info.cc @@ -119,6 +119,11 @@ void CustomOpKernelContext::EmplaceBackAttr(paddle::any attr) { << " has value of type: " << attrs_[attrs_.size() - 1].type().name(); } +void CustomOpKernelContext::EmplaceBackAttrs( + const std::vector& attrs) { + attrs_ = std::move(attrs); +} + const Tensor& CustomOpKernelContext::InputAt(size_t idx) const { return inputs_.at(idx); } @@ -132,6 +137,10 @@ std::vector CustomOpKernelContext::InputsBetween(size_t start, return rlt; } +const std::vector& CustomOpKernelContext::Attrs() const { + return attrs_; +} + Tensor& CustomOpKernelContext::MutableInputAt(size_t idx) { return inputs_.at(idx); } @@ -193,6 +202,16 @@ const std::pair& CustomOpKernelContext::OutputRangeAt( return output_range_.at(idx); } +const std::vector>& +CustomOpKernelContext::InputRange() { + return input_range_; +} + +const std::vector>& +CustomOpKernelContext::OutputRange() { + return output_range_; +} + void CustomOpKernelContext::ConstructInplaceIndex( const std::vector& inputs, const std::vector& outputs, @@ -208,8 +227,9 @@ void CustomOpKernelContext::ConstructInplaceIndex( continue; } auto out_iter = find(outputs.begin(), outputs.end(), inplace_map.at(input)); - PADDLE_ENFORCE( - out_iter != outputs.end(), + PADDLE_ENFORCE_NE( + out_iter, + outputs.end(), phi::errors::NotFound("Can't find the mapped value of %s, please check " "the input of `Inplace` again and make " "sure you registered your op accurately. ", @@ -253,8 +273,9 @@ void CustomOpKernelContext::AssignInplaceOutputs() { size_t out_start_idx = output_range_[pair.second].first; size_t out_end_idx = output_range_[pair.second].second; size_t assign_tensor_size = in_end_idx - in_start_idx; - PADDLE_ENFORCE( - assign_tensor_size == out_end_idx - out_start_idx, + PADDLE_ENFORCE_EQ( + assign_tensor_size, + out_end_idx - out_start_idx, phi::errors::OutOfRange("When assigning inplaced tensor, Input vector " "size %d mismatch output vector size %d", in_end_idx - in_start_idx, @@ -316,6 +337,43 @@ OpMetaInfo& OpMetaInfo::SetInferDtypeFn(InferDtypeFunc&& func) { return *this; } +//////////////// Op Meta Info Helper ///////////////// +const std::string& OpMetaInfoHelper::GetOpName(const paddle::OpMetaInfo& info) { + return info.name_; +} +const std::vector& OpMetaInfoHelper::GetInputs( + const paddle::OpMetaInfo& info) { + return info.inputs_; +} +const std::vector& OpMetaInfoHelper::GetOutputs( + const paddle::OpMetaInfo& info) { + return info.outputs_; +} +const std::vector& OpMetaInfoHelper::GetAttrs( + const paddle::OpMetaInfo& info) { + return info.attrs_; +} +const std::unordered_map& +OpMetaInfoHelper::GetInplaceMap(const paddle::OpMetaInfo& info) { + return info.inplace_map_; +} +const std::unordered_map& +OpMetaInfoHelper::GetInplaceReverseMap(const paddle::OpMetaInfo& info) { + return info.inplace_reverse_map_; +} +const KernelFunc& OpMetaInfoHelper::GetKernelFn( + const paddle::OpMetaInfo& info) { + return info.kernel_fn_; +} +const InferShapeFunc& OpMetaInfoHelper::GetInferShapeFn( + const paddle::OpMetaInfo& info) { + return info.infer_shape_fn_; +} +const InferDtypeFunc& OpMetaInfoHelper::GetInferDtypeFn( + const paddle::OpMetaInfo& info) { + return info.infer_dtype_fn_; +} + //////////////// Op Meta Info Map ///////////////// std::vector& OpMetaInfoMap::operator[](const std::string& name) { @@ -414,21 +472,23 @@ OpMetaInfoBuilder& OpMetaInfoBuilder::SetInplaceMap( const std::vector& outputs = OpMetaInfoHelper::GetOutputs(*info_ptr_); for (const auto& pair : inplace_map) { - PADDLE_ENFORCE( - std::find(inputs.begin(), inputs.end(), pair.first) != inputs.cend(), + PADDLE_ENFORCE_NE( + std::find(inputs.begin(), inputs.end(), pair.first), + inputs.cend(), phi::errors::PreconditionNotMet( "The register of operator %s's `SetInplaceMap` failed. " "Please make sure: 1. Call `Inputs` and `Outputs` before " "`SetInplaceMap`; 2. The keys of inplace_map are inside `Inputs`", name_)); - PADDLE_ENFORCE(std::find(outputs.begin(), outputs.end(), pair.second) != - outputs.cend(), - phi::errors::PreconditionNotMet( - "The register of operator %s's `SetInplaceMap` failed. " - "Please make sure: 1. Call `Inputs` and `Outputs` " - "before `SetInplaceMap`; 2. The values of inplace_map " - "are inside `Outputs`", - name_)); + PADDLE_ENFORCE_NE( + std::find(outputs.begin(), outputs.end(), pair.second), + outputs.cend(), + phi::errors::PreconditionNotMet( + "The register of operator %s's `SetInplaceMap` failed. " + "Please make sure: 1. Call `Inputs` and `Outputs` " + "before `SetInplaceMap`; 2. The values of inplace_map " + "are inside `Outputs`", + name_)); } info_ptr_->SetInplaceMap( std::forward>(inplace_map)); diff --git a/paddle/phi/api/lib/tensor.cc b/paddle/phi/api/lib/tensor.cc index 634c37933cb..e9c68367b16 100644 --- a/paddle/phi/api/lib/tensor.cc +++ b/paddle/phi/api/lib/tensor.cc @@ -358,6 +358,10 @@ gpuStream_t Tensor::stream() const { } #endif +const std::string &Tensor::name() const { return name_; } + +void Tensor::set_name(const std::string &name) { name_ = name; } + /* Part 5: Status utils methods */ bool Tensor::defined() const { return impl_ != nullptr; } diff --git a/paddle/phi/api/profiler/CMakeLists.txt b/paddle/phi/api/profiler/CMakeLists.txt index 14e3ace536a..ec569fe9fbc 100644 --- a/paddle/phi/api/profiler/CMakeLists.txt +++ b/paddle/phi/api/profiler/CMakeLists.txt @@ -26,16 +26,4 @@ if(WITH_PYTHON AND EXISTS ${PADDLE_BINARY_DIR}) endif() endif() -if(WITH_GPU OR WITH_ROCM) - set(GPU_CTX_DEPS dynload_cuda dynamic_loader) -endif() - -cc_library( - phi_device_tracer - SRCS device_tracer.cc - DEPS phi_profiler_proto ${GPU_CTX_DEPS}) - -cc_library( - phi_profiler - SRCS profiler.cc - DEPS phi_os_info phi_device_tracer phi_enforce) +collect_srcs(api_srcs SRCS device_tracer.cc profiler.cc) diff --git a/paddle/phi/backends/CMakeLists.txt b/paddle/phi/backends/CMakeLists.txt index 828437c8f2a..3ec479398a2 100644 --- a/paddle/phi/backends/CMakeLists.txt +++ b/paddle/phi/backends/CMakeLists.txt @@ -2,17 +2,6 @@ add_subdirectory(dynload) add_subdirectory(gpu) set(BACKENDS_SRCS all_context.cc cpu/cpu_context.cc cpu/cpu_info.cc) -set(BACKENDS_DEPS - enforce - place - flags - eigen3 - phi_device_context - generator - phi_os_info) -if(WITH_XBYAK) - list(APPEND BACKENDS_DEPS xbyak) -endif() if(NOT APPLE AND NOT WIN32) list(APPEND BACKENDS_SRCS device_code.cc) @@ -23,16 +12,10 @@ if(WITH_GPU OR WITH_ROCM) gpu/gpu_resources.cc) if(WITH_GPU) list(APPEND BACKENDS_SRCS gpu/cuda/cuda_info.cc gpu/cuda/cuda_graph.cc) - set_source_files_properties( - gpu/gpu_resources.cc - PROPERTIES COMPILE_FLAGS - "-DCUDA_REAL_ARCHS=\"${NVCC_FLAGS_EXTRA_real_archs}\"") - endif() if(WITH_ROCM) list(APPEND BACKENDS_SRCS gpu/rocm/rocm_info.cc) endif() - list(APPEND BACKENDS_DEPS phi_dynload_cuda) endif() if(WITH_XPU) @@ -45,7 +28,6 @@ if(WITH_MKLDNN) list(APPEND BACKENDS_SRCS onednn/onednn_context.cc) list(APPEND BACKENDS_SRCS onednn/axpy_handler.cc) list(APPEND BACKENDS_SRCS onednn/matmul_utils.cc) - list(APPEND BACKENDS_DEPS mkldnn) endif() list( @@ -55,26 +37,25 @@ list( device_guard.cc stream.cc event.cc - device_base.cc device_manager.cc context_pool.cc) +if(WITH_GPU + OR WITH_ROCM + OR WITH_CUSTOM_DEVICE) + list(APPEND BACKENDS_SRCS device_base.cc) +endif() + if(WITH_CUSTOM_DEVICE) list(APPEND BACKENDS_SRCS custom/custom_context.cc custom/custom_device.cc custom/custom_device_op_list.cc) endif() -add_library(phi_backends "${BACKENDS_SRCS}") -target_link_libraries(phi_backends ${BACKENDS_DEPS}) - -# for inference library -get_property(phi_modules GLOBAL PROPERTY PHI_MODULES) -set(phi_modules ${phi_modules} phi_backends) -set_property(GLOBAL PROPERTY PHI_MODULES "${phi_modules}") +collect_srcs(backends_srcs SRCS ${BACKENDS_SRCS}) if(WITH_CUSTOM_DEVICE) cc_test( capi_test SRCS custom/capi_test.cc - DEPS phi_capi) + DEPS phi) endif() diff --git a/paddle/phi/backends/cpu/cpu_context.cc b/paddle/phi/backends/cpu/cpu_context.cc index d42189e00ee..4538a96dc99 100644 --- a/paddle/phi/backends/cpu/cpu_context.cc +++ b/paddle/phi/backends/cpu/cpu_context.cc @@ -24,6 +24,10 @@ namespace phi { +template <> +const TypeInfo TypeInfoTraits::kType = + RegisterStaticType(CPUContext::name()); + struct CPUContext::Impl { Impl() : place_(CPUPlace()) {} diff --git a/paddle/phi/backends/custom/custom_context.cc b/paddle/phi/backends/custom/custom_context.cc index ddba0baea7e..d382c766cfd 100644 --- a/paddle/phi/backends/custom/custom_context.cc +++ b/paddle/phi/backends/custom/custom_context.cc @@ -19,6 +19,11 @@ limitations under the License. */ namespace phi { +template <> +const TypeInfo + TypeInfoTraits::kType = + RegisterStaticType(CustomContext::name()); + struct CustomContext::Impl { explicit Impl(const CustomPlace& place) : place_(place) {} diff --git a/paddle/phi/backends/dynload/CMakeLists.txt b/paddle/phi/backends/dynload/CMakeLists.txt index 2d4e84beb69..568c54cb342 100644 --- a/paddle/phi/backends/dynload/CMakeLists.txt +++ b/paddle/phi/backends/dynload/CMakeLists.txt @@ -1,8 +1,8 @@ -cc_library( - phi_dynamic_loader - SRCS dynamic_loader.cc port.cc - DEPS enforce glog gflags) - +set(DYNLOAD_COMMON_SRCS dynamic_loader.cc port.cc warpctc.cc warprnnt.cc + lapack.cc) +if(WITH_ASCEND_CL) + list(REMOVE_ITEM DYNLOAD_COMMON_SRCS warprnnt.cc) +endif() list( APPEND CUDA_SRCS @@ -60,66 +60,39 @@ configure_file(cupti_lib_path.h.in ${CMAKE_CURRENT_BINARY_DIR}/cupti_lib_path.h) if(CUPTI_FOUND) list(APPEND CUDA_SRCS cupti.cc) endif() -if(WITH_ROCM) - hip_library( - phi_dynload_cuda - SRCS ${HIP_SRCS} - DEPS phi_dynamic_loader) - cc_library( - phi_dynload_warpctc - SRCS warpctc.cc - DEPS phi_dynamic_loader warpctc) - cc_library( - phi_dynload_warprnnt - SRCS warprnnt.cc - DEPS phi_dynamic_loader warprnnt) -else() - nv_library( - phi_dynload_cuda - SRCS ${CUDA_SRCS} - DEPS phi_dynamic_loader) - cc_library( - phi_dynload_warpctc - SRCS warpctc.cc - DEPS phi_dynamic_loader warpctc) - cc_library( - phi_dynload_warprnnt - SRCS warprnnt.cc - DEPS phi_dynamic_loader warprnnt) -endif() + if(WITH_MKLML) - cc_library( - phi_dynload_mklml - SRCS mklml.cc - DEPS phi_dynamic_loader mklml) + # Only deps libmklml.so, not link + add_library(dynload_mklml STATIC mklml.cc) + add_dependencies(dynload_mklml mklml) + if(WIN32) + target_link_libraries(dynload_mklml ${MKLML_IOMP_LIB}) + else() + target_link_libraries(dynload_mklml + "-L${MKLML_LIB_DIR} -liomp5 -Wl,--as-needed") + endif() endif() if(WITH_FLASHATTN) - cc_library( - phi_dynload_flashattn - SRCS flashattn.cc - DEPS phi_dynamic_loader flashattn) + list(APPEND DYNLOAD_COMMON_SRCS flashattn.cc) endif() -cc_library( - phi_dynload_lapack - SRCS lapack.cc - DEPS phi_dynamic_loader) -add_dependencies(phi_dynload_lapack extern_lapack) -# TODO(TJ): add iomp, mkldnn? - if(MKL_FOUND AND WITH_ONEMKL) message("ONEMKL INCLUDE directory is ${MKL_INCLUDE}") - cc_library( - phi_dynload_mklrt - SRCS mklrt.cc - DEPS phi_dynamic_loader) - target_include_directories(phi_dynload_mklrt PRIVATE ${MKL_INCLUDE}) + list(APPEND DYNLOAD_COMMON_SRCS mklrt.cc) +endif() + +if(WITH_ROCM) + collect_srcs(backends_srcs SRCS ${DYNLOAD_COMMON_SRCS} ${HIP_SRCS}) +elseif(WITH_GPU) + collect_srcs(backends_srcs SRCS ${DYNLOAD_COMMON_SRCS} ${CUDA_SRCS}) +else() + collect_srcs(backends_srcs SRCS ${DYNLOAD_COMMON_SRCS}) endif() if(WITH_CUDNN_FRONTEND) nv_test( cudnn_frontend_test SRCS cudnn_frontend_test.cc - DEPS phi_dynload_cuda cudnn-frontend) + DEPS phi cudnn-frontend) endif() diff --git a/paddle/phi/backends/gpu/cuda/CMakeLists.txt b/paddle/phi/backends/gpu/cuda/CMakeLists.txt index 7768cdd1161..6f138d4a0dd 100644 --- a/paddle/phi/backends/gpu/cuda/CMakeLists.txt +++ b/paddle/phi/backends/gpu/cuda/CMakeLists.txt @@ -1 +1 @@ -cc_library(cudnn_workspace_helper SRCS cudnn_workspace_helper.cc) +collect_srcs(backends_srcs SRCS cudnn_workspace_helper.cc) diff --git a/paddle/phi/backends/gpu/gpu_context.cc b/paddle/phi/backends/gpu/gpu_context.cc index 5c9c010d365..5ab7019e601 100644 --- a/paddle/phi/backends/gpu/gpu_context.cc +++ b/paddle/phi/backends/gpu/gpu_context.cc @@ -59,6 +59,15 @@ limitations under the License. */ namespace phi { +template <> +const TypeInfo TypeInfoTraits::kType = + RegisterStaticType(GPUContext::name()); + +template <> +const TypeInfo + TypeInfoTraits::kType = + RegisterStaticType(GPUPinnedContext::name()); + namespace internal { class EigenGpuStreamDevice : public Eigen::StreamInterface { diff --git a/paddle/phi/backends/gpu/gpu_context.h b/paddle/phi/backends/gpu/gpu_context.h index 0b72f8b30c0..ef7df28d9a9 100644 --- a/paddle/phi/backends/gpu/gpu_context.h +++ b/paddle/phi/backends/gpu/gpu_context.h @@ -15,6 +15,8 @@ limitations under the License. */ #pragma once +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) + #include #include #include @@ -305,3 +307,5 @@ class GPUPinnedContext }; #endif } // namespace phi + +#endif diff --git a/paddle/phi/backends/onednn/onednn_context.cc b/paddle/phi/backends/onednn/onednn_context.cc index 5095b5c234b..9ad73795da4 100644 --- a/paddle/phi/backends/onednn/onednn_context.cc +++ b/paddle/phi/backends/onednn/onednn_context.cc @@ -83,6 +83,11 @@ void OneDNNContextThreadLocals::Body::log_lib_version(void) { } } +OneDNNContextThreadLocals::Body& OneDNNContextThreadLocals::fetch() { + thread_local Body b; + return b; +} + struct OneDNNContext::Impl { Impl() : p_blobmap_() { p_blobmap_.reset(new BlobMap()); @@ -462,5 +467,7 @@ const std::vector& OneDNNContext::GetOutputsName( return impl_->GetOutputsName(output); } +const char* OneDNNContext::name() { return "OneDNNContext"; } + } // namespace phi #endif diff --git a/paddle/phi/backends/onednn/onednn_context.h b/paddle/phi/backends/onednn/onednn_context.h index 79eaa05948c..8262a8bb290 100644 --- a/paddle/phi/backends/onednn/onednn_context.h +++ b/paddle/phi/backends/onednn/onednn_context.h @@ -76,10 +76,7 @@ class OneDNNContextThreadLocals { static constexpr size_t kMKLDNNSessionID_Default = 0; // mkldnn session id for cache clearing mode static constexpr size_t kMKLDNNSessionID_CacheClearing = -1; - static Body& fetch() { - thread_local Body b; - return b; - } + static Body& fetch(); }; class OneDNNContext : public CPUContext { @@ -157,7 +154,7 @@ class OneDNNContext : public CPUContext { const std::vector& GetOutputsName( const std::string& output) const; - static const char* name() { return "OneDNNContext"; } + static const char* name(); private: struct Impl; diff --git a/paddle/phi/backends/xpu/xpu_context.cc b/paddle/phi/backends/xpu/xpu_context.cc index 44f247ff259..0c554270b51 100644 --- a/paddle/phi/backends/xpu/xpu_context.cc +++ b/paddle/phi/backends/xpu/xpu_context.cc @@ -30,6 +30,9 @@ namespace xpu = baidu::xpu::api; namespace phi { +template <> +const TypeInfo TypeInfoTraits::kType = + RegisterStaticType(XPUContext::name()); struct XPUContext::Impl { void SetL3Cache(int l3_size = 14155776) { const int MAX_XPU_NUM = 16; diff --git a/paddle/phi/capi/CMakeLists.txt b/paddle/phi/capi/CMakeLists.txt index c00c38cfa3a..3ea7a4199b2 100644 --- a/paddle/phi/capi/CMakeLists.txt +++ b/paddle/phi/capi/CMakeLists.txt @@ -1,13 +1 @@ add_subdirectory(lib) -cc_library( - phi_capi - SRCS all.cc - DEPS phi_c_data_type - phi_c_device_context - phi_c_int_array - phi_c_kernel_context - phi_c_kernel_factory - phi_c_kernel_registry - phi_c_place - phi_c_scalar - phi_c_tensor) diff --git a/paddle/phi/capi/all.cc b/paddle/phi/capi/all.cc deleted file mode 100644 index 3d9c9315b31..00000000000 --- a/paddle/phi/capi/all.cc +++ /dev/null @@ -1,19 +0,0 @@ -/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/phi/capi/all.h" - -namespace paddle { -namespace capi {} // namespace capi -} // namespace paddle diff --git a/paddle/phi/capi/lib/CMakeLists.txt b/paddle/phi/capi/lib/CMakeLists.txt index 60afb74a6d4..8cf3c9caf8e 100644 --- a/paddle/phi/capi/lib/CMakeLists.txt +++ b/paddle/phi/capi/lib/CMakeLists.txt @@ -1,44 +1,12 @@ -cc_library( - phi_c_data_type - SRCS c_data_type.cc - DEPS dense_tensor) - -cc_library( - phi_c_device_context - SRCS c_device_context.cc - DEPS phi_backends) - -cc_library( - phi_c_int_array - SRCS c_int_array.cc - DEPS int_array) - -cc_library( - phi_c_kernel_context - SRCS c_kernel_context.cc - DEPS kernel_context) - -cc_library( - phi_c_kernel_factory - SRCS c_kernel_factory.cc - DEPS kernel_factory) - -cc_library( - phi_c_kernel_registry - SRCS c_kernel_registry.cc - DEPS dense_tensor) - -cc_library( - phi_c_place - SRCS c_place.cc - DEPS phi_place) - -cc_library( - phi_c_scalar - SRCS c_scalar.cc - DEPS scalar) - -cc_library( - phi_c_tensor - SRCS c_tensor.cc - DEPS dense_tensor) +collect_srcs( + capi_srcs + SRCS + c_data_type.cc + c_device_context.cc + c_int_array.cc + c_kernel_context.cc + c_kernel_factory.cc + c_kernel_registry.cc + c_place.cc + c_scalar.cc + c_tensor.cc) diff --git a/paddle/phi/common/CMakeLists.txt b/paddle/phi/common/CMakeLists.txt index 67f6fa9729c..5fe96a2a682 100644 --- a/paddle/phi/common/CMakeLists.txt +++ b/paddle/phi/common/CMakeLists.txt @@ -1,26 +1 @@ -if(WITH_GPU) - nv_library( - phi_place - SRCS place.cc - DEPS phi_backends) -elseif(WITH_ROCM) - hip_library( - phi_place - SRCS place.cc - DEPS phi_backends) -else() - cc_library(phi_place SRCS place.cc) -endif() - -cc_library( - scalar - SRCS scalar.cc - DEPS phi_enforce phi_tensor_utils) -cc_library( - int_array - SRCS int_array.cc - DEPS phi_enforce phi_tensor_utils) -cc_library( - memory_utils - SRCS memory_utils.cc - DEPS phi_enforce phi_place) +collect_srcs(common_srcs SRCS place.cc scalar.cc int_array.cc memory_utils.cc) diff --git a/paddle/phi/core/CMakeLists.txt b/paddle/phi/core/CMakeLists.txt index 9cb2cec158a..3500d880907 100644 --- a/paddle/phi/core/CMakeLists.txt +++ b/paddle/phi/core/CMakeLists.txt @@ -6,150 +6,35 @@ if(WITH_GPU) proto_library(external_error_proto SRCS external_error.proto) endif() -cc_library( - flags - SRCS flags.cc - DEPS gflags) - -cc_library(errors SRCS errors.cc) -set(phi_enforce_deps errors flags) -if(WITH_GPU) - set(phi_enforce_deps ${phi_enforce_deps} external_error_proto) -endif() -cc_library( - phi_enforce - SRCS enforce.cc - DEPS ${phi_enforce_deps}) - -cc_library( - phi_os_info - SRCS os_info.cc - DEPS phi_enforce) - -if(WITH_XPU) - cc_library( - kernel_factory - SRCS kernel_factory.cc - DEPS phi_enforce convert_utils phi_backends) -else() - cc_library( - kernel_factory - SRCS kernel_factory.cc - DEPS phi_enforce convert_utils) -endif() -cc_library( - kernel_context - SRCS kernel_context.cc - DEPS phi_enforce phi_backends) - -cc_library( - ddim - SRCS ddim.cc - DEPS phi_enforce) -cc_library( - tensor_base - SRCS tensor_base.cc allocator.cc - DEPS phi_enforce) -cc_library( - tensor_meta - SRCS tensor_meta.cc - DEPS phi_enforce) -cc_library( - lod_utils - SRCS lod_utils.cc - DEPS phi_enforce) -cc_library( - threadpool - SRCS threadpool.cc - DEPS phi_enforce) - -cc_library( - dense_tensor - SRCS dense_tensor.cc dense_tensor_impl.cc - DEPS convert_utils tensor_meta tensor_base ddim) - -target_link_libraries(dense_tensor memory_utils) - -cc_library( - sparse_coo_tensor - SRCS sparse_coo_tensor.cc - DEPS tensor_meta tensor_base) -cc_library( - sparse_csr_tensor - SRCS sparse_csr_tensor.cc - DEPS dense_tensor tensor_base) -cc_library( - string_tensor - SRCS string_tensor.cc - DEPS convert_utils tensor_meta tensor_base) - -cc_library( - tensor_array - SRCS tensor_array.cc - DEPS dense_tensor tensor_base) - -cc_library( - extended_tensor - SRCS extended_tensor.cc - DEPS tensor_base) - -cc_library( - meta_tensor - SRCS meta_tensor.cc - DEPS tensor_base tensor_meta dense_tensor) -cc_library( - infermeta_utils - SRCS infermeta_utils.cc - DEPS meta_tensor) - -cc_library( - selected_rows - SRCS selected_rows_impl.cc selected_rows.cc - DEPS tensor_base dense_tensor phi_enforce ddim) -cc_library( - phi_device_context - SRCS device_context.cc - DEPS dense_tensor selected_rows) - -cc_library( - custom_kernel - SRCS custom_kernel.cc - DEPS kernel_factory) - -cc_library( - mixed_vector - SRCS mixed_vector.cc - DEPS phi_backends place memory) - -cc_library( - generator - SRCS generator.cc - DEPS enforce place) - -# Will remove once we implemented MKLDNN_Tensor -if(WITH_MKLDNN) - add_dependencies(dense_tensor mkldnn) - add_dependencies(tensor_base mkldnn) -endif() - -if(WITH_GPU) - nv_library( - phi_tensor_utils - SRCS tensor_utils.cc - DEPS phi_backends dense_tensor selected_rows memcpy memory_utils) -elseif(WITH_ROCM) - hip_library( - phi_tensor_utils - SRCS tensor_utils.cc - DEPS phi_backends dense_tensor selected_rows memcpy memory_utils) -elseif(WITH_XPU_KP) - xpu_library( - phi_tensor_utils - SRCS tensor_utils.cc - DEPS phi_backends dense_tensor selected_rows memcpy memory_utils) -else() - cc_library( - phi_tensor_utils - SRCS tensor_utils.cc - DEPS dense_tensor selected_rows memcpy phi_backends memory_utils) -endif() +collect_srcs( + core_srcs + SRCS + flags.cc + errors.cc + enforce.cc + os_info.cc + kernel_context.cc + ddim.cc + tensor_base.cc + allocator.cc + tensor_meta.cc + lod_utils.cc + threadpool.cc + dense_tensor.cc + dense_tensor_impl.cc + sparse_coo_tensor.cc + sparse_csr_tensor.cc + string_tensor.cc + tensor_array.cc + extended_tensor.cc + meta_tensor.cc + infermeta_utils.cc + selected_rows_impl.cc + selected_rows.cc + device_context.cc + custom_kernel.cc + mixed_vector.cc + generator.cc + kernel_factory.cc + tensor_utils.cc + storage_properties.cc) diff --git a/paddle/phi/core/compat/CMakeLists.txt b/paddle/phi/core/compat/CMakeLists.txt index 3234f1004f0..4df1ac8e932 100644 --- a/paddle/phi/core/compat/CMakeLists.txt +++ b/paddle/phi/core/compat/CMakeLists.txt @@ -1,23 +1,2 @@ -cc_library( - arg_map_context - SRCS arg_map_context.cc - DEPS phi_enforce) -cc_library( - op_utils - SRCS op_utils.cc - DEPS arg_map_context enforce) -cc_library( - get_kerneltype_forvar_utils - SRCS get_kerneltype_forvar_utils.cc - DEPS enforce) - -set(convert_utils_deps data_type place op_utils phi_backends) - -if(WITH_MKLDNN) - set(convert_utils_deps ${convert_utils_deps} mkldnn) -endif() - -cc_library( - convert_utils - SRCS convert_utils.cc - DEPS ${convert_utils_deps}) +collect_srcs(core_srcs SRCS arg_map_context.cc op_utils.cc + get_kerneltype_forvar_utils.cc convert_utils.cc) diff --git a/paddle/phi/core/compat/op_utils.cc b/paddle/phi/core/compat/op_utils.cc index 086cf6da5f1..11c887785f1 100644 --- a/paddle/phi/core/compat/op_utils.cc +++ b/paddle/phi/core/compat/op_utils.cc @@ -26,4 +26,16 @@ OpUtilsMap& OpUtilsMap::Instance() { return g_op_utils_map; } +BaseKernelNameRegistrar::BaseKernelNameRegistrar(const char* op_type, + const char* base_kernel_name) { + OpUtilsMap::Instance().InsertBaseKernelName(op_type, base_kernel_name); + OpUtilsMap::Instance().InsertFluidOplName(op_type, base_kernel_name); +} + +ArgumentMappingFnRegistrar::ArgumentMappingFnRegistrar( + const char* op_type, ArgumentMappingFn arg_mapping_fn) { + OpUtilsMap::Instance().InsertArgumentMappingFn(op_type, + std::move(arg_mapping_fn)); +} + } // namespace phi diff --git a/paddle/phi/core/compat/op_utils.h b/paddle/phi/core/compat/op_utils.h index f3e594eae11..cfa64714966 100644 --- a/paddle/phi/core/compat/op_utils.h +++ b/paddle/phi/core/compat/op_utils.h @@ -210,18 +210,12 @@ class OpUtilsMap { }; struct BaseKernelNameRegistrar { - BaseKernelNameRegistrar(const char* op_type, const char* base_kernel_name) { - OpUtilsMap::Instance().InsertBaseKernelName(op_type, base_kernel_name); - OpUtilsMap::Instance().InsertFluidOplName(op_type, base_kernel_name); - } + BaseKernelNameRegistrar(const char* op_type, const char* base_kernel_name); }; struct ArgumentMappingFnRegistrar { ArgumentMappingFnRegistrar(const char* op_type, - ArgumentMappingFn arg_mapping_fn) { - OpUtilsMap::Instance().InsertArgumentMappingFn(op_type, - std::move(arg_mapping_fn)); - } + ArgumentMappingFn arg_mapping_fn); }; #define PD_REGISTER_BASE_KERNEL_NAME(op_type, base_kernel_name) \ diff --git a/paddle/phi/core/dense_tensor.cc b/paddle/phi/core/dense_tensor.cc index 2c8f36f6c34..4346cc6f32b 100644 --- a/paddle/phi/core/dense_tensor.cc +++ b/paddle/phi/core/dense_tensor.cc @@ -42,6 +42,11 @@ limitations under the License. */ namespace phi { +template <> +const TypeInfo + TypeInfoTraits::kType = + RegisterStaticType(DenseTensor::name()); + DenseTensor::DenseTensor(Allocator* a, const DenseTensorMeta& meta) : meta_(meta), holder_(a->Allocate(SizeOf(dtype()) * numel())) {} @@ -115,8 +120,9 @@ void* DenseTensor::AllocateFrom(Allocator* allocator, if (fake_alloc) { bytes = 0; } else { - PADDLE_ENFORCE( + PADDLE_ENFORCE_EQ( valid(), + true, phi::errors::PreconditionNotMet("The meta data must be valid when " "call the mutable data function.")); if (requested_size) { @@ -169,8 +175,9 @@ const T* DenseTensor::data() const { template T* DenseTensor::data() { T* ret = static_cast(data()); - PADDLE_ENFORCE( - (dtype() == phi::CppTypeToDataType::Type()), + PADDLE_ENFORCE_EQ( + dtype(), + phi::CppTypeToDataType::Type(), phi::errors::InvalidArgument( "The type of data we are trying to retrieve (%s) does not match the " "type of data (%s) currently contained in the container.", @@ -200,16 +207,18 @@ const void* DenseTensor::data() const { } void DenseTensor::set_meta(DenseTensorMeta&& meta) { - PADDLE_ENFORCE(!meta_.valid(), - phi::errors::InvalidArgument( - "Only when the original attribute of Tensor is " - "incomplete, can it be reset.")); + PADDLE_ENFORCE_EQ(meta_.valid(), + false, + phi::errors::InvalidArgument( + "Only when the original attribute of Tensor is " + "incomplete, can it be reset.")); meta_ = std::move(meta); } void DenseTensor::set_meta(const DenseTensorMeta& meta) { - PADDLE_ENFORCE( + PADDLE_ENFORCE_EQ( meta.valid(), + true, phi::errors::InvalidArgument( "Input meta is invalid, please check the meta attribute.")); meta_.dims = meta.dims; diff --git a/paddle/phi/core/distributed/CMakeLists.txt b/paddle/phi/core/distributed/CMakeLists.txt index b68a6890485..e759b7d9c8d 100644 --- a/paddle/phi/core/distributed/CMakeLists.txt +++ b/paddle/phi/core/distributed/CMakeLists.txt @@ -2,32 +2,14 @@ add_subdirectory(check) add_subdirectory(store) add_subdirectory(auto_parallel) -set(COMM_CONTEXT_MANAGER_DEPS tcp_store) +set(DISTRIBUTED_COMMON_SRCS comm_context_manager.cc) if(WITH_NCCL OR WITH_RCCL) - cc_library( - nccl_comm_context - SRCS nccl_comm_context.cc - DEPS dense_tensor comm_static_check nccl_dynamic_check) - - list(APPEND COMM_CONTEXT_MANAGER_DEPS nccl_comm_context) + list(APPEND DISTRIBUTED_COMMON_SRCS nccl_comm_context.cc) endif() if(WITH_GLOO) - cc_library( - gloo_utils - SRCS gloo_utils.cc - DEPS gloo dense_tensor enforce tcp_store) - - cc_library( - gloo_comm_context - SRCS gloo_comm_context.cc - DEPS gloo_utils comm_static_check) - - list(APPEND COMM_CONTEXT_MANAGER_DEPS gloo_comm_context gloo_store) + list(APPEND DISTRIBUTED_COMMON_SRCS gloo_utils.cc gloo_comm_context.cc) endif() -cc_library( - comm_context_manager - SRCS comm_context_manager.cc - DEPS ${COMM_CONTEXT_MANAGER_DEPS}) +collect_srcs(core_srcs SRCS ${DISTRIBUTED_COMMON_SRCS}) diff --git a/paddle/phi/core/distributed/auto_parallel/CMakeLists.txt b/paddle/phi/core/distributed/auto_parallel/CMakeLists.txt index 2c4728c5a4c..d6e52ca8044 100644 --- a/paddle/phi/core/distributed/auto_parallel/CMakeLists.txt +++ b/paddle/phi/core/distributed/auto_parallel/CMakeLists.txt @@ -1,23 +1,4 @@ proto_library(auto_parallel_proto SRCS auto_parallel.proto) -cc_library( - device_mesh - SRCS device_mesh.cc - DEPS auto_parallel_proto phi_enforce) - -cc_library( - process_mesh - SRCS process_mesh.cc - DEPS auto_parallel_proto phi_enforce) - -cc_library( - dist_attr - SRCS dist_attr.cc - DEPS process_mesh auto_parallel_proto proto_desc phi_enforce) - -cc_library( - dist_mapper - SRCS dist_mapper.cc - DEPS device_mesh auto_parallel_proto phi_enforce) - -cc_library(auto_parallel DEPS device_mesh process_mesh dist_attr dist_mapper) +collect_srcs(core_srcs SRCS device_mesh.cc process_mesh.cc dist_attr.cc + dist_mapper.cc) diff --git a/paddle/phi/core/distributed/check/CMakeLists.txt b/paddle/phi/core/distributed/check/CMakeLists.txt index 76f4977263d..1721a4a4602 100644 --- a/paddle/phi/core/distributed/check/CMakeLists.txt +++ b/paddle/phi/core/distributed/check/CMakeLists.txt @@ -1,11 +1,7 @@ -cc_library( - comm_static_check - SRCS static_check.cc - DEPS place dense_tensor enforce) +set(CHECK_COMMON_SRCS static_check.cc) if(WITH_NCCL OR WITH_RCCL) - cc_library( - nccl_dynamic_check - SRCS nccl_dynamic_check.cc - DEPS dense_tensor) + list(APPEND CHECK_COMMON_SRCS nccl_dynamic_check.cc) endif() + +collect_srcs(core_srcs SRCS ${CHECK_COMMON_SRCS}) diff --git a/paddle/phi/core/distributed/store/CMakeLists.txt b/paddle/phi/core/distributed/store/CMakeLists.txt index d6b35eb342b..8eaa76eac1c 100644 --- a/paddle/phi/core/distributed/store/CMakeLists.txt +++ b/paddle/phi/core/distributed/store/CMakeLists.txt @@ -1,18 +1,14 @@ -cc_library( - tcp_store - SRCS tcp_store.cc tcp_utils.cc socket.cpp store.cc - DEPS enforce glog) +set(STORE_COMMON_SRCS tcp_store.cc tcp_utils.cc socket.cpp store.cc) if(WITH_GLOO) - cc_library( - gloo_store - SRCS gloo_store.cc - DEPS gloo) + list(APPEND STORE_COMMON_SRCS gloo_store.cc) endif() +collect_srcs(core_srcs SRCS ${STORE_COMMON_SRCS}) + if(NOT WIN32) cc_test( test_c_tcp_store SRCS test_tcp_store.cc - DEPS tcp_store) + DEPS phi) endif() diff --git a/paddle/phi/core/distributed/store/tcp_store.cc b/paddle/phi/core/distributed/store/tcp_store.cc index 98b1ad3f850..baae37148f7 100644 --- a/paddle/phi/core/distributed/store/tcp_store.cc +++ b/paddle/phi/core/distributed/store/tcp_store.cc @@ -139,8 +139,9 @@ void MasterDaemon::StopByControlFd() { #else void MasterDaemon::InitControlFd() { ghStopEvent_ = CreateEvent(NULL, TRUE, FALSE, NULL); - PADDLE_ENFORCE(ghStopEvent_, - phi::errors::Fatal("failed to cread control pipe")); + PADDLE_ENFORCE_NE(ghStopEvent_, + nullptr, + phi::errors::Fatal("failed to cread control pipe")); } void MasterDaemon::CloseControlFd() { CloseHandle(ghStopEvent_); } void MasterDaemon::StopByControlFd() { SetEvent(ghStopEvent_); } @@ -422,8 +423,9 @@ void TCPStore::wait(const std::string& key) { VLOG(3) << "TCPStore wait."; _client->send_command_for_key(Command::WAIT, _key_prefix + key); reply = _client->receive_value(); - PADDLE_ENFORCE( + PADDLE_ENFORCE_EQ( reply == ReplyType::STOP_WAIT, + true, phi::errors::InvalidArgument("Stop_waiting response is expected")); } diff --git a/paddle/phi/core/enforce.cc b/paddle/phi/core/enforce.cc index 9630c6c96d4..1291571141c 100644 --- a/paddle/phi/core/enforce.cc +++ b/paddle/phi/core/enforce.cc @@ -280,13 +280,19 @@ std::string GetExternalErrorMsg(T status) { if (std::string::npos != last_slash_idx) { strModule.erase(last_slash_idx, std::string::npos); } - if (compare_path.compare("avx.so") == 0) { + // TODO(lizhiyu02): I don't know what the 'compare_path.compare("avx.so") + // == 0' means, while + // 'compare_path.find("dist-packages") != std::string::npos' means that + // after using 'pip install paddle'. + if (compare_path.compare("avx.so") == 0 || + strModule.find("dist-packages") != std::string::npos) { filePath = strModule + "/../include/third_party/externalError/data/externalErrorMsg.pb"; } else { + // Just for unittest filePath = strModule + - "/../../third_party/externalError/data/externalErrorMsg.pb"; + "/../third_party/externalError/data/externalErrorMsg.pb"; } } #else @@ -303,14 +309,14 @@ std::string GetExternalErrorMsg(T status) { if (std::string::npos != last_slash_idx) { strModule.erase(last_slash_idx, std::string::npos); } - if (compare_path.compare("avx.pyd") == 0) { + if (strModule.find("dist-packages") != std::string::npos) { filePath = strModule + "\\..\\include\\third_" "party\\externalerror\\data\\externalErrorMsg.pb"; } else { - filePath = - strModule + - "\\..\\..\\third_party\\externalerror\\data\\externalErrorMsg.pb"; + filePath = strModule + + "\\..\\..\\third_party" + "\\externalerror\\data\\externalErrorMsg.pb"; } #endif std::ifstream fin(filePath, std::ios::in | std::ios::binary); diff --git a/paddle/phi/core/flags.h b/paddle/phi/core/flags.h index 0112be93b7f..e8711c73f30 100644 --- a/paddle/phi/core/flags.h +++ b/paddle/phi/core/flags.h @@ -24,7 +24,7 @@ #include "paddle/utils/variant.h" -#if defined(_WIN32) && defined(BUILD_PHI_SHARED) +#if defined(_WIN32) #define PHI_EXPORT_FLAG __declspec(dllexport) #define PHI_IMPORT_FLAG __declspec(dllimport) #else diff --git a/paddle/phi/core/lod_utils.cc b/paddle/phi/core/lod_utils.cc index d775ad1a18f..dac1059182c 100644 --- a/paddle/phi/core/lod_utils.cc +++ b/paddle/phi/core/lod_utils.cc @@ -32,8 +32,9 @@ LoD ToAbsOffset(const LoD &in) { } void AppendLoD(LoD *lod, const LoD &lod_length) { - PADDLE_ENFORCE( - lod->empty() || lod->size() == lod_length.size(), + PADDLE_ENFORCE_EQ( + (lod->empty() || lod->size() == lod_length.size()), + true, phi::errors::InvalidArgument( "The input LoD length should be equal to the appended LoD size, but " "received input LoD length is %d, actual LoD size is %d.", diff --git a/paddle/phi/core/selected_rows.cc b/paddle/phi/core/selected_rows.cc index ec2d0d61fae..3ececdfc0bb 100644 --- a/paddle/phi/core/selected_rows.cc +++ b/paddle/phi/core/selected_rows.cc @@ -16,6 +16,11 @@ limitations under the License. */ namespace phi { +template <> +const TypeInfo + TypeInfoTraits::kType = + RegisterStaticType(SelectedRows::name()); + SelectedRows::SelectedRows(const std::vector& rows, const int64_t& height) : impl_(std::make_shared(rows, height)) {} diff --git a/paddle/phi/core/sparse_coo_tensor.cc b/paddle/phi/core/sparse_coo_tensor.cc index b7b0d06de8a..d76064b5a3d 100644 --- a/paddle/phi/core/sparse_coo_tensor.cc +++ b/paddle/phi/core/sparse_coo_tensor.cc @@ -16,6 +16,11 @@ limitations under the License. */ namespace phi { +template <> +const TypeInfo + TypeInfoTraits::kType = + RegisterStaticType(SparseCooTensor::name()); + SparseCooTensor::SparseCooTensor() { DenseTensor non_zero_indices, non_zero_elements; this->SetMember(non_zero_indices, non_zero_elements, {1}, true); @@ -155,16 +160,18 @@ int32_t SparseCooTensor::dense_dim() const { } void SparseCooTensor::set_meta(SparseTensorMeta&& meta) { - PADDLE_ENFORCE(!meta_.valid(), - phi::errors::InvalidArgument( - "Only when the original attribute of Tensor is " - "incomplete, can it be reset.")); + PADDLE_ENFORCE_EQ(meta_.valid(), + false, + phi::errors::InvalidArgument( + "Only when the original attribute of Tensor is " + "incomplete, can it be reset.")); meta_ = std::move(meta); } void SparseCooTensor::set_meta(const SparseTensorMeta& meta) { - PADDLE_ENFORCE( + PADDLE_ENFORCE_EQ( meta.valid(), + true, phi::errors::InvalidArgument( "Input meta is invalid, please check the meta attribute.")); meta_.dims = meta.dims; diff --git a/paddle/phi/core/sparse_csr_tensor.cc b/paddle/phi/core/sparse_csr_tensor.cc index 32680106a96..156a324f8b6 100644 --- a/paddle/phi/core/sparse_csr_tensor.cc +++ b/paddle/phi/core/sparse_csr_tensor.cc @@ -16,6 +16,11 @@ limitations under the License. */ namespace phi { +template <> +const TypeInfo + TypeInfoTraits::kType = + RegisterStaticType(SparseCsrTensor::name()); + SparseCsrTensor::SparseCsrTensor() { DenseTensor crows, cols, values; this->non_zero_crows_ = crows; @@ -26,8 +31,9 @@ SparseCsrTensor::SparseCsrTensor() { inline void check_shape(const DDim& dims) { bool valid = dims.size() == 2 || dims.size() == 3; - PADDLE_ENFORCE( + PADDLE_ENFORCE_EQ( valid, + true, phi::errors::InvalidArgument("the SparseCsrTensor only support 2-D or " "3-D Tensor, but get %d-D Tensor", dims.size())); @@ -96,10 +102,12 @@ void SparseCsrTensor::set_layout(const DataLayout layout) { void SparseCsrTensor::Resize(const DDim& dense_dims, const int64_t non_zero_num) { - PADDLE_ENFORCE(this->initialized(), - phi::errors::InvalidArgument( - "the SparseCsrTensor must be initialized when call Resize " - "function.")); + PADDLE_ENFORCE_EQ( + this->initialized(), + true, + phi::errors::InvalidArgument( + "the SparseCsrTensor must be initialized when call Resize " + "function.")); check_shape(dense_dims); int64_t crows_size = dense_dims[0] + 1; @@ -139,16 +147,18 @@ void SparseCsrTensor::SetMember(const DenseTensor& non_zero_crows, } void SparseCsrTensor::set_meta(SparseTensorMeta&& meta) { - PADDLE_ENFORCE(!meta_.valid(), - phi::errors::InvalidArgument( - "Only when the original attribute of Tensor is " - "incomplete, can it be reset.")); + PADDLE_ENFORCE_EQ(meta_.valid(), + false, + phi::errors::InvalidArgument( + "Only when the original attribute of Tensor is " + "incomplete, can it be reset.")); meta_ = std::move(meta); } void SparseCsrTensor::set_meta(const SparseTensorMeta& meta) { - PADDLE_ENFORCE( + PADDLE_ENFORCE_EQ( meta.valid(), + true, phi::errors::InvalidArgument( "Input meta is invalid, please check the meta attribute.")); meta_.dims = meta.dims; diff --git a/paddle/phi/core/storage_properties.cc b/paddle/phi/core/storage_properties.cc new file mode 100644 index 00000000000..f05a3572f5e --- /dev/null +++ b/paddle/phi/core/storage_properties.cc @@ -0,0 +1,32 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/phi/core/storage_properties.h" + +namespace phi { + +#ifdef PADDLE_WITH_MKLDNN +template <> +const TypeInfo + TypeInfoTraits::kType = + RegisterStaticType(OneDNNStorageProperties::name()); + +#endif + +template <> +const TypeInfo + TypeInfoTraits::kType = + RegisterStaticType(NPUStorageProperties::name()); + +} // namespace phi diff --git a/paddle/phi/core/string_tensor.cc b/paddle/phi/core/string_tensor.cc index e82915a38ab..428c890c1f2 100644 --- a/paddle/phi/core/string_tensor.cc +++ b/paddle/phi/core/string_tensor.cc @@ -21,6 +21,11 @@ limitations under the License. */ namespace phi { +template <> +const TypeInfo + TypeInfoTraits::kType = + RegisterStaticType(StringTensor::name()); + StringTensor::StringTensor() { meta_.offset = 0; } StringTensor::StringTensor(Allocator* a, const StringTensorMeta& meta) @@ -91,8 +96,9 @@ dtype::pstring* StringTensor::data() { } void StringTensor::set_meta(const StringTensorMeta& meta) { - PADDLE_ENFORCE( + PADDLE_ENFORCE_EQ( meta.valid(), + true, phi::errors::InvalidArgument( "Input meta is invalid, please check the meta attribute.")); meta_.dims = meta.dims; @@ -143,8 +149,9 @@ void* StringTensor::AllocateFrom(Allocator* allocator, if (fake_alloc) { bytes = 0; } else { - PADDLE_ENFORCE( + PADDLE_ENFORCE_EQ( valid(), + true, errors::PreconditionNotMet("The meta data must be valid when call the " "mutable data function.")); if (requested_size) { diff --git a/paddle/phi/core/tensor_array.cc b/paddle/phi/core/tensor_array.cc index e774bd0da44..f30b17251cd 100644 --- a/paddle/phi/core/tensor_array.cc +++ b/paddle/phi/core/tensor_array.cc @@ -16,6 +16,11 @@ limitations under the License. */ namespace phi { +template <> +const TypeInfo + TypeInfoTraits::kType = + RegisterStaticType(TensorArray::name()); + TensorArray::TensorArray(const std::vector& vec) { tensors_ = vec; } diff --git a/paddle/phi/core/utils/type_info.h b/paddle/phi/core/utils/type_info.h index 33a4e09933a..1b3d0f8683b 100644 --- a/paddle/phi/core/utils/type_info.h +++ b/paddle/phi/core/utils/type_info.h @@ -52,8 +52,4 @@ class TypeInfoTraits { template TypeInfo RegisterStaticType(const std::string& type); -template -const TypeInfo TypeInfoTraits::kType = - RegisterStaticType(DerivedT::name()); - } // namespace phi diff --git a/paddle/phi/infermeta/CMakeLists.txt b/paddle/phi/infermeta/CMakeLists.txt index fe3c8abfbd3..f53f655b244 100644 --- a/paddle/phi/infermeta/CMakeLists.txt +++ b/paddle/phi/infermeta/CMakeLists.txt @@ -1,10 +1,12 @@ -cc_library( - infermeta - SRCS nullary.cc unary.cc binary.cc ternary.cc multiary.cc fusion.cc - DEPS convert_utils meta_tensor infermeta_utils xxhash) -cc_library( - backward_infermeta - SRCS backward.cc - DEPS meta_tensor convert_utils) add_subdirectory(strings) add_subdirectory(sparse) +collect_srcs( + infermeta_srcs + SRCS + nullary.cc + unary.cc + binary.cc + ternary.cc + multiary.cc + fusion.cc + backward.cc) diff --git a/paddle/phi/infermeta/multiary.cc b/paddle/phi/infermeta/multiary.cc index 832680b7f59..efe2e1c65bd 100644 --- a/paddle/phi/infermeta/multiary.cc +++ b/paddle/phi/infermeta/multiary.cc @@ -1668,9 +1668,10 @@ static void Interpolate2DInferShapeCheck( MetaConfig config) { auto dim_x = x.dims(); - PADDLE_ENFORCE( - "bilinear" == interp_method || "nearest" == interp_method || - "bicubic" == interp_method, + PADDLE_ENFORCE_EQ( + ("bilinear" == interp_method || "nearest" == interp_method || + "bicubic" == interp_method), + true, phi::errors::InvalidArgument( "Interpolation method can only be \"bilinear\" or \"nearest\" when " "Input(X) dimension is 4, but got method = %s.", @@ -1818,12 +1819,14 @@ static void Interpolate3DInferShapeCheck( MetaConfig config) { auto dim_x = x.dims(); - PADDLE_ENFORCE("nearest" == interp_method || "trilinear" == interp_method, - phi::errors::InvalidArgument( - "Interpolation method can only be \"trilinear\" or " - "\"nearest\" when Input(X) " - "dimension is 5, but got method = %s .", - interp_method)); + PADDLE_ENFORCE_EQ( + ("nearest" == interp_method || "trilinear" == interp_method), + true, + phi::errors::InvalidArgument( + "Interpolation method can only be \"trilinear\" or " + "\"nearest\" when Input(X) " + "dimension is 5, but got method = %s .", + interp_method)); const DataLayout data_layout = phi::StringToDataLayout(data_layout_str); for (int i = 0; i < dim_x.size(); ++i) { @@ -1972,8 +1975,9 @@ void InterpolateInferMeta( MetaTensor* output, MetaConfig config) { auto dim_x = x.dims(); // NCHW format - PADDLE_ENFORCE( - dim_x.size() == 3 || dim_x.size() == 4 || dim_x.size() == 5, + PADDLE_ENFORCE_EQ( + (dim_x.size() == 3 || dim_x.size() == 4 || dim_x.size() == 5), + true, phi::errors::Unimplemented( "Input(X) dimension must be 3, 4 or 5, but got dimension = %d .", dim_x.size())); diff --git a/paddle/phi/infermeta/sparse/CMakeLists.txt b/paddle/phi/infermeta/sparse/CMakeLists.txt index 8717ef2cf6f..f48ae8c33d7 100644 --- a/paddle/phi/infermeta/sparse/CMakeLists.txt +++ b/paddle/phi/infermeta/sparse/CMakeLists.txt @@ -1,9 +1 @@ -cc_library( - sparse_infermeta - SRCS unary.cc binary.cc multiary.cc - DEPS convert_utils infermeta_utils) - -cc_library( - sparse_backward_infermeta - SRCS backward.cc - DEPS meta_tensor convert_utils) +collect_srcs(infermeta_srcs SRCS unary.cc binary.cc multiary.cc backward.cc) diff --git a/paddle/phi/infermeta/strings/CMakeLists.txt b/paddle/phi/infermeta/strings/CMakeLists.txt index c2f891fe712..c6ed4a715a2 100644 --- a/paddle/phi/infermeta/strings/CMakeLists.txt +++ b/paddle/phi/infermeta/strings/CMakeLists.txt @@ -1,4 +1 @@ -cc_library( - string_infermeta - SRCS nullary.cc unary.cc - DEPS convert_utils infermeta_utils) +collect_srcs(infermeta_srcs SRCS nullary.cc unary.cc) diff --git a/paddle/phi/infermeta/unary.cc b/paddle/phi/infermeta/unary.cc index 67ad639f648..92cf654aee8 100644 --- a/paddle/phi/infermeta/unary.cc +++ b/paddle/phi/infermeta/unary.cc @@ -2088,10 +2088,12 @@ void MaxPoolWithIndexInferMeta(const MetaTensor& x, auto x_dims = x.dims(); - PADDLE_ENFORCE(x_dims.size() == 4 || x_dims.size() == 5, - errors::InvalidArgument("Pooling intput should be 4-D or " - "5-D tensor but received %dD-Tensor", - x_dims.size())); + PADDLE_ENFORCE_EQ( + (x_dims.size() == 4 || x_dims.size() == 5), + true, + errors::InvalidArgument("Pooling intput should be 4-D or " + "5-D tensor but received %dD-Tensor", + x_dims.size())); if (global_pooling) { kernel_size_.resize(static_cast(x_dims.size()) - 2); @@ -4430,15 +4432,15 @@ void TransposeInferMeta(const MetaTensor& x, // Note: x_rank > axis_size when fuse squeeze2 + transpose2, else x_rank == // axis_size - PADDLE_ENFORCE_GE( - x_rank, - axis_size, - errors::InvalidArgument("The input tensor's dimension " - "should be equal to the axis's size. " - "But received input tensor's dimension is %d, " - "axis's size is %d", - x_rank, - axis_size)); + PADDLE_ENFORCE_GE(x_rank, + axis_size, + errors::InvalidArgument( + "The input tensor's dimension " + "should be equal to or greater than the axis's size. " + "But received input tensor's dimension is %d, " + "axis's size is %d", + x_rank, + axis_size)); std::vector formated_axis = axis; std::vector count(axis_size, 0); diff --git a/paddle/phi/kernels/CMakeLists.txt b/paddle/phi/kernels/CMakeLists.txt index d7f9849ad94..347eadc4d4f 100644 --- a/paddle/phi/kernels/CMakeLists.txt +++ b/paddle/phi/kernels/CMakeLists.txt @@ -19,84 +19,6 @@ add_subdirectory(funcs) # kernel autotune add_subdirectory(autotune) -# phi depends all phi kernel targets -set_property(GLOBAL PROPERTY PHI_KERNELS "") - -# [ 1. Common kernel compilation dependencies ] -set(COMMON_KERNEL_DEPS - dense_tensor - string_tensor - sparse_coo_tensor - sparse_csr_tensor - tensor_array - int_array - scalar - kernel_context - kernel_factory - arg_map_context - convert_utils - lod_utils - custom_kernel - string_infermeta - phi_tensor_utils) -set(COMMON_KERNEL_DEPS - ${COMMON_KERNEL_DEPS} - eigen_function - blas - math_function - im2col - vol2col - concat_and_split_functor - selected_rows_functor) -# remove this dep after removing fluid deps on tensor creation -set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} lod_utils) -set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} infermeta infermeta_utils - sparse_infermeta) -set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} switch_autotune) - -set(COMMON_KERNEL_DEPS - ${COMMON_KERNEL_DEPS} - threadpool - jit_kernel_helper - softmax - cross_entropy - matrix_bit_code - lapack_function - lstm_compute - gru_compute - deformable_conv_functor - matrix_reduce - segment_pooling - pooling - maxouting - matrix_inverse - matrix_solve - phi_dynload_warpctc - phi_dynload_warprnnt - sequence_padding - sequence_pooling - sequence_scale - fft - phi_data_layout_transform - gpc - utf8proc - gather_scatter_functor) - -set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} process_group) - -if(WITH_FLASHATTN) - set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} phi_dynload_flashattn) -endif() - -if(WITH_NCCL OR WITH_RCCL) - set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} nccl_comm_context) -endif() -if(WITH_GLOO) - set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} gloo_comm_context) -endif() -if(WITH_CUDNN_FRONTEND) - set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} cudnn-frontend) -endif() copy_if_different(${kernel_declare_file} ${kernel_declare_file_final}) file(GLOB kernel_h "*.h" "selected_rows/*.h" "sparse/*.h" "strings/*.h") @@ -105,8 +27,8 @@ file(GLOB kernel_primitive_h "primitive/*.h") # fusion ops would be included here file( - GLOB - kernel_cu + GLOB kernel_cu + RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "gpu/*.cu" "gpu/*.cu.cc" "gpudnn/*.cu" @@ -118,6 +40,10 @@ file( "strings/gpu/*.cu" "fusion/gpu/*.cu") +if(APPLE OR WIN32) + list(REMOVE_ITEM kernel_cu "fusion/gpu/fusion_group_kernel.cu") +endif() + if(DEFINED REDUCE_INFERENCE_LIB_SIZE) list(FILTER kernel_cu EXCLUDE REGEX ".*_grad_kernel\\.cc$") list(FILTER kernel_cu EXCLUDE REGEX ".*_grad_kernel\\.cu$") @@ -146,22 +72,19 @@ if(WITH_CUTLASS) ) endif() - file(GLOB cutlass_cu "fusion/cutlass/conv2d/generated/*.cu" - "fusion/cutlass/conv2d/*.cu" "fusion/cutlass/*.cu" - "fusion/cutlass/memory_efficient_attention/autogen/impl/*.cu") - add_definitions("-DPADDLE_WITH_MEMORY_EFFICIENT_ATTENTION") + file( + GLOB cutlass_cu + RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" + "fusion/cutlass/conv2d/generated/*.cu" "fusion/cutlass/conv2d/*.cu" + "fusion/cutlass/*.cu" + "fusion/cutlass/memory_efficient_attention/autogen/impl/*.cu") list(APPEND kernel_cu ${cutlass_cu}) endif() -if(APPLE OR WIN32) - list(REMOVE_ITEM kernel_cu - "${CMAKE_CURRENT_SOURCE_DIR}/fusion/gpu/fusion_group_kernel.cu") -endif() - if(WITH_MKLDNN) file( - GLOB - kernel_cc + GLOB kernel_cc + RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*.cc" "cpu/*.cc" "legacy/*.cc" @@ -171,6 +94,8 @@ if(WITH_MKLDNN) "selected_rows/cpu/*.cc" "sparse/*.cc" "sparse/cpu/*.cc" + "legacy/*.cc" + "legacy/cpu/*.cc" "strings/*.cc" "strings/cpu/*.cc" "onednn/*.cc" @@ -179,8 +104,8 @@ if(WITH_MKLDNN) "fusion/cpu/*.cc") else() file( - GLOB - kernel_cc + GLOB kernel_cc + RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*.cc" "cpu/*.cc" "legacy/*.cc" @@ -189,6 +114,8 @@ else() "selected_rows/cpu/*.cc" "sparse/*.cc" "sparse/cpu/*.cc" + "legacy/*.cc" + "legacy/cpu/*.cc" "strings/*.cc" "strings/cpu/*.cc" "fusion/*.cc" @@ -200,32 +127,17 @@ if(DEFINED REDUCE_INFERENCE_LIB_SIZE) endif() file( - GLOB - kernel_xpu - "xpu/*.cc" - "legacy/xpu/*.cc" - "selected_rows/xpu/*.cc" - "fusion/xpu/*.cc" + GLOB kernel_xpu + RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" + "xpu/*.cc" "legacy/xpu/*.cc" "selected_rows/xpu/*.cc" "fusion/xpu/*.cc" "sparse/xpu/*.cc") -if(WITH_MKLDNN) - set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} get_kerneltype_forvar_utils) -endif() - if(WITH_GPU OR WITH_ROCM) - if(WITH_GPU) - add_library(phi_gpu ${kernel_cu} ${kernel_cc}) - if(WITH_CUTLASS) - add_dependencies(phi_gpu cutlass_codegen) - endif() - elseif(WITH_ROCM) - hip_add_library(phi_gpu STATIC ${kernel_cu} ${kernel_cc}) - endif() + collect_srcs(kernels_srcs SRCS ${kernel_cu}) kernel_declare("${kernel_cu}") - kernel_declare("${kernel_cc}") - target_link_libraries(phi_gpu ${COMMON_KERNEL_DEPS}) - set(ADD_PHI_KERNELS ${ADD_PHI_KERNELS} phi_gpu) -elseif(WITH_XPU) +endif() + +if(WITH_XPU) if(WITH_XPU_KP) file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/kps/ DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/kps/) @@ -237,52 +149,23 @@ elseif(WITH_XPU) file(RENAME ${kernel} "${CMAKE_CURRENT_BINARY_DIR}/kps/${name}.kps") endforeach() file(GLOB kernel_xpu_kps "${CMAKE_CURRENT_BINARY_DIR}/kps/*.kps") - file( - GLOB kernel_cc_relative - RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" - "*.cc" - "cpu/*.cc" - "legacy/*.cc" - "legacy/cpu/*.cc" - "selected_rows/*.cc" - "selected_rows/cpu/*.cc" - "sparse/*.cc" - "sparse/cpu/*.cc" - "strings/*.cc" - "strings/cpu/*.cc" - "fusion/*.cc" - "fusion/cpu/*.cc") - foreach(kernel ${kernel_cc_relative}) + + foreach(kernel ${kernel_cc}) file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/${kernel} DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/${kernel}) endforeach() file(GLOB_RECURSE kernel_xpu_cc "${CMAKE_CURRENT_BINARY_DIR}/*.cc") - xpu_add_library( - phi_xpu - STATIC - ${kernel_xpu} - ${kernel_xpu_kps} - ${kernel_xpu_cc} - DEPENDS - ${COMMON_KERNEL_DEPS}) - kernel_declare("${kernel_xpu_cc}") - else() - add_library(phi_xpu ${kernel_xpu} ${kernel_cc}) - kernel_declare("${kernel_cc}") + + set(kernel_cc ${kernel_xpu_cc}) + collect_generated_srcs(kernels_srcs SRCS ${kernel_xpu_kps}) endif() + collect_srcs(kernels_srcs SRCS ${kernel_xpu}) kernel_declare("${kernel_xpu}") kernel_declare("${kernel_xpu_kps}") - - target_link_libraries(phi_xpu ${COMMON_KERNEL_DEPS}) - set(ADD_PHI_KERNELS ${ADD_PHI_KERNELS} phi_xpu) -else() - add_library(phi_cpu ${kernel_cc}) - target_link_libraries(phi_cpu ${COMMON_KERNEL_DEPS}) - kernel_declare("${kernel_cc}") - set(ADD_PHI_KERNELS phi_cpu) endif() -set_property(GLOBAL PROPERTY PHI_KERNELS ${ADD_PHI_KERNELS}) +collect_srcs(kernels_srcs SRCS ${kernel_cc}) +kernel_declare("${kernel_cc}") if(NOT "${KERNEL_LIST}" STREQUAL "") prune_declaration_h() diff --git a/paddle/phi/kernels/autotune/CMakeLists.txt b/paddle/phi/kernels/autotune/CMakeLists.txt index aa05fcd74cc..456e6770a70 100644 --- a/paddle/phi/kernels/autotune/CMakeLists.txt +++ b/paddle/phi/kernels/autotune/CMakeLists.txt @@ -1,15 +1 @@ -if(WITH_CUDNN_FRONTEND) - cc_library( - cache - SRCS cache.cc - DEPS cudnn-frontend phi_enforce) -else() - cc_library( - cache - SRCS cache.cc - DEPS phi_enforce) -endif() -cc_library( - switch_autotune - SRCS switch_autotune.cc - DEPS cache flags) +collect_srcs(kernels_srcs SRCS cache.cc switch_autotune.cc) diff --git a/paddle/phi/kernels/autotune/cache_base.h b/paddle/phi/kernels/autotune/cache_base.h index 798898f4dd7..68463e900c3 100644 --- a/paddle/phi/kernels/autotune/cache_base.h +++ b/paddle/phi/kernels/autotune/cache_base.h @@ -18,11 +18,11 @@ #include #include -#include "gflags/gflags.h" #include "paddle/phi/core/enforce.h" #include "paddle/phi/core/errors.h" +#include "paddle/phi/core/flags.h" -DECLARE_int32(search_cache_max_number); +PHI_DECLARE_int32(search_cache_max_number); inline void HashCombine(std::size_t* seed UNUSED) {} diff --git a/paddle/phi/kernels/cpu/rmsprop_kernel.cc b/paddle/phi/kernels/cpu/rmsprop_kernel.cc index f72f912e5be..fd2b4b43c5d 100644 --- a/paddle/phi/kernels/cpu/rmsprop_kernel.cc +++ b/paddle/phi/kernels/cpu/rmsprop_kernel.cc @@ -105,10 +105,6 @@ struct RmsFunctor { } }; -template struct RmsFunctor; -template struct RmsFunctor; -template struct RmsFunctor; - } // namespace phi PD_REGISTER_KERNEL( rmsprop, CPU, ALL_LAYOUT, phi::RmspropDenseKernel, float, double) {} diff --git a/paddle/phi/kernels/funcs/CMakeLists.txt b/paddle/phi/kernels/funcs/CMakeLists.txt index bd1774d756c..999625cf3df 100644 --- a/paddle/phi/kernels/funcs/CMakeLists.txt +++ b/paddle/phi/kernels/funcs/CMakeLists.txt @@ -4,67 +4,15 @@ add_subdirectory(lapack) add_subdirectory(detail) add_subdirectory(jit) -math_library(deformable_conv_functor DEPS dense_tensor) -math_library(concat_and_split_functor DEPS dense_tensor) -math_library(fc_functor DEPS blas jit_kernel_helper) -math_library(gpc DEPS phi_enforce) -math_library(gru_compute DEPS activation_functions math_function) -math_library(lstm_compute DEPS activation_functions) -math_library(math_function DEPS blas dense_tensor) -math_library(matrix_reduce DEPS dense_tensor) -math_library(matrix_inverse DEPS dense_tensor eigen3 blas) -math_library(pooling DEPS dense_tensor) -math_library(segment_pooling) -math_library(sequence2batch) -math_library(matrix_solve DEPS dense_tensor eigen3 blas math_function) -math_library(cross_entropy) -math_library(im2col) -math_library(vol2col) -math_library(softmax DEPS math_function) -math_library(maxouting) -math_library(matrix_bit_code) -math_library(sequence_scale) -math_library(sequence_padding DEPS lod_utils) -math_library(sequence_pooling DEPS math_function jit_kernel_helper) - -cc_library( - phi_data_layout_transform - SRCS data_layout_transform.cc - DEPS tensor blas) - +file( + GLOB func_cc_srcs + RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" + "*.cc") if(WITH_GPU OR WITH_ROCM) - if(MKL_FOUND AND WITH_ONEMKL) - math_library(fft spectral_op.cu DEPS dynload_cuda dynload_mklrt - dense_tensor) - target_include_directories(fft PRIVATE ${MKL_INCLUDE}) - else() - math_library(fft spectral_op.cu DEPS dynload_cuda dense_tensor pocketfft) - endif() -else() - if(MKL_FOUND AND WITH_ONEMKL) - mathp_library(fft DEPS dynload_mklrt dense_tensor) - target_include_directories(fft PRIVATE ${MKL_INCLUDE}) - else() - math_library(fft DEPS dense_tensor pocketfft) - endif() + file( + GLOB func_cu_srcs + RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" + "*.cu") endif() -if(WITH_MKLDNN) - math_library(selected_rows_functor DEPS selected_rows_utils math_function - blas mixed_vector) -else() - math_library(selected_rows_functor DEPS selected_rows_utils math_function - blas mixed_vector) -endif() - -if(WITH_ROCM) - hip_library( - gather_scatter_functor - SRCS gather_scatter_functor.cc gather_scatter_functor.cu - DEPS tensor) -else() - cc_library( - gather_scatter_functor - SRCS gather_scatter_functor.cc gather_scatter_functor.cu - DEPS tensor) -endif() +collect_srcs(kernels_srcs SRCS ${func_cc_srcs} ${func_cu_srcs}) diff --git a/paddle/phi/kernels/funcs/blas/CMakeLists.txt b/paddle/phi/kernels/funcs/blas/CMakeLists.txt index 6f08472efab..4a0feb20bd6 100644 --- a/paddle/phi/kernels/funcs/blas/CMakeLists.txt +++ b/paddle/phi/kernels/funcs/blas/CMakeLists.txt @@ -1,4 +1 @@ -cc_library( - blas - SRCS blas.cc - DEPS cblas framework_proto phi_backends) +collect_srcs(kernels_srcs SRCS blas.cc) diff --git a/paddle/phi/kernels/funcs/blas/blas_impl.cu.h b/paddle/phi/kernels/funcs/blas/blas_impl.cu.h index c08903e7d37..2f0f3f7cd70 100644 --- a/paddle/phi/kernels/funcs/blas/blas_impl.cu.h +++ b/paddle/phi/kernels/funcs/blas/blas_impl.cu.h @@ -19,10 +19,11 @@ #include "paddle/phi/backends/dynload/cublas.h" #include "paddle/phi/backends/gpu/gpu_context.h" +#include "paddle/phi/core/flags.h" #include "paddle/phi/kernels/funcs/math_function.h" -DECLARE_bool(enable_cublas_tensor_op_math); -DECLARE_bool(gemm_use_half_precision_compute_type); +PHI_DECLARE_bool(enable_cublas_tensor_op_math); +PHI_DECLARE_bool(gemm_use_half_precision_compute_type); namespace phi { namespace funcs { diff --git a/paddle/phi/kernels/funcs/detail/CMakeLists.txt b/paddle/phi/kernels/funcs/detail/CMakeLists.txt index 0df1c060f90..15c5ba0ac78 100644 --- a/paddle/phi/kernels/funcs/detail/CMakeLists.txt +++ b/paddle/phi/kernels/funcs/detail/CMakeLists.txt @@ -1 +1 @@ -cc_library(activation_functions SRCS avx_functions.cc) +collect_srcs(kernels_srcs SRCS avx_functions.cc) diff --git a/paddle/phi/kernels/funcs/eigen/CMakeLists.txt b/paddle/phi/kernels/funcs/eigen/CMakeLists.txt index de771f12fbf..30d6dc6013c 100644 --- a/paddle/phi/kernels/funcs/eigen/CMakeLists.txt +++ b/paddle/phi/kernels/funcs/eigen/CMakeLists.txt @@ -6,19 +6,5 @@ file( GLOB EIGEN_CU_SOURCES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*.cu") -if(WITH_GPU) - nv_library( - eigen_function - SRCS ${EIGEN_CC_SOURCES} ${EIGEN_CU_SOURCES} - DEPS eigen3) -elseif(WITH_ROCM) - hip_library( - eigen_function - SRCS ${EIGEN_CC_SOURCES} ${EIGEN_CU_SOURCES} - DEPS eigen3) -else() - cc_library( - eigen_function - SRCS ${EIGEN_CC_SOURCES} - DEPS eigen3) -endif() + +collect_srcs(kernels_srcs SRCS ${EIGEN_CC_SOURCES} ${EIGEN_CU_SOURCES}) diff --git a/paddle/phi/kernels/funcs/jit/CMakeLists.txt b/paddle/phi/kernels/funcs/jit/CMakeLists.txt index cb9dc6a3757..fd44ca30810 100644 --- a/paddle/phi/kernels/funcs/jit/CMakeLists.txt +++ b/paddle/phi/kernels/funcs/jit/CMakeLists.txt @@ -9,17 +9,13 @@ file(APPEND ${jit_file} "\#include \"paddle/phi/kernels/funcs/jit/helper.h\"\n") file(APPEND ${jit_file} "\#include \"paddle/phi/kernels/funcs/jit/registry.h\"\n\n") -set(JIT_KERNEL_DEPS device_context cblas gflags enforce place xxhash) - file( GLOB jit_kernel_cc_srcs RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*.cc") list(REMOVE_ITEM jit_kernel_cc_srcs test.cc benchmark.cc) -cc_library( - jit_kernel_base - SRCS ${jit_kernel_cc_srcs} - DEPS ${JIT_KERNEL_DEPS}) + +collect_srcs(kernels_srcs SRCS ${jit_kernel_cc_srcs}) copy_if_different(${jit_file} ${jit_file_final}) @@ -30,14 +26,11 @@ if(WITH_XBYAK) add_subdirectory(gen) endif() -cc_library( - jit_kernel_helper INTERFACE - SRCS ${jit_kernel_cc_srcs} - DEPS jit_kernel_base ${JIT_KERNEL_DEPS}) cc_test( jit_kernel_test SRCS test.cc - DEPS jit_kernel_helper) + DEPS phi) + if(NOT WIN32) set(cuda_less12_and_gcc_greater12 false) if(DEFINED CMAKE_CUDA_COMPILER_VERSION) @@ -47,14 +40,7 @@ if(NOT WIN32) endif() endif() if(NOT cuda_less12_and_gcc_greater12) - cc_binary( - jit_kernel_benchmark - SRCS - benchmark.cc - DEPS - jit_kernel_helper - phi_device_tracer - tensor) + cc_binary(jit_kernel_benchmark SRCS benchmark.cc DEPS phi) endif() endif() if(WITH_TESTING AND TEST jit_kernel_test) diff --git a/paddle/phi/kernels/funcs/jit/gen/CMakeLists.txt b/paddle/phi/kernels/funcs/jit/gen/CMakeLists.txt index e2b9b51590f..fc16fc4740e 100644 --- a/paddle/phi/kernels/funcs/jit/gen/CMakeLists.txt +++ b/paddle/phi/kernels/funcs/jit/gen/CMakeLists.txt @@ -3,13 +3,7 @@ file( RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*.cc") -cc_library( - jit_kernel_jitcode - SRCS ${jitcode_cc_srcs} - DEPS jit_kernel_base xbyak) -set(JIT_KERNEL_DEPS - ${JIT_KERNEL_DEPS} xbyak jit_kernel_jitcode - PARENT_SCOPE) +collect_srcs(kernels_srcs SRCS ${jitcode_cc_srcs}) function(USE_JITKERNEL_GEN TARGET) file(APPEND ${jit_file} "USE_JITKERNEL_GEN(${TARGET});\n") diff --git a/paddle/phi/kernels/funcs/jit/gen_base.h b/paddle/phi/kernels/funcs/jit/gen_base.h index c72c0c52792..dfad19eff34 100644 --- a/paddle/phi/kernels/funcs/jit/gen_base.h +++ b/paddle/phi/kernels/funcs/jit/gen_base.h @@ -33,7 +33,7 @@ namespace jit { class GenBase : public Kernel { public: - virtual ~GenBase() = default; + virtual ~GenBase() {} virtual std::string name() const = 0; virtual size_t getSize() const = 0; virtual const unsigned char* getCodeInternal() const = 0; diff --git a/paddle/phi/kernels/funcs/jit/more/CMakeLists.txt b/paddle/phi/kernels/funcs/jit/more/CMakeLists.txt index 0851ca065b5..ad536a05d12 100644 --- a/paddle/phi/kernels/funcs/jit/more/CMakeLists.txt +++ b/paddle/phi/kernels/funcs/jit/more/CMakeLists.txt @@ -12,7 +12,3 @@ endif() # mix should be last add_subdirectory(mix) - -set(JIT_KERNEL_DEPS - ${JIT_KERNEL_DEPS} - PARENT_SCOPE) diff --git a/paddle/phi/kernels/funcs/jit/more/intrinsic/CMakeLists.txt b/paddle/phi/kernels/funcs/jit/more/intrinsic/CMakeLists.txt index c6222c9b29b..dbf94d7483e 100644 --- a/paddle/phi/kernels/funcs/jit/more/intrinsic/CMakeLists.txt +++ b/paddle/phi/kernels/funcs/jit/more/intrinsic/CMakeLists.txt @@ -2,14 +2,8 @@ file( GLOB jit_kernel_cc_intrinsic RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*.cc") -cc_library( - jit_kernel_intrinsic - SRCS ${jit_kernel_cc_intrinsic} - DEPS jit_kernel_base) -set(JIT_KERNEL_DEPS - ${JIT_KERNEL_DEPS} jit_kernel_intrinsic - PARENT_SCOPE) +collect_srcs(kernels_srcs SRCS ${jit_kernel_cc_intrinsic}) # use mkl kernels by name and type use_jitkernel_more(kCRFDecoding, intrinsic) diff --git a/paddle/phi/kernels/funcs/jit/more/mix/CMakeLists.txt b/paddle/phi/kernels/funcs/jit/more/mix/CMakeLists.txt index 2fa8557c1d8..21b74179f73 100644 --- a/paddle/phi/kernels/funcs/jit/more/mix/CMakeLists.txt +++ b/paddle/phi/kernels/funcs/jit/more/mix/CMakeLists.txt @@ -2,14 +2,8 @@ file( GLOB jit_kernel_mix_cc RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*.cc") -cc_library( - jit_kernel_mix - SRCS ${jit_kernel_mix_cc} - DEPS jit_kernel_base) -set(JIT_KERNEL_DEPS - ${JIT_KERNEL_DEPS} jit_kernel_mix - PARENT_SCOPE) +collect_srcs(kernels_srcs SRCS ${jit_kernel_mix_cc}) use_jitkernel_more(kVSigmoid, mix) use_jitkernel_more(kVTanh, mix) diff --git a/paddle/phi/kernels/funcs/jit/more/mkl/CMakeLists.txt b/paddle/phi/kernels/funcs/jit/more/mkl/CMakeLists.txt index 7f6df06f87a..0c5d21002d1 100644 --- a/paddle/phi/kernels/funcs/jit/more/mkl/CMakeLists.txt +++ b/paddle/phi/kernels/funcs/jit/more/mkl/CMakeLists.txt @@ -1,10 +1,4 @@ -cc_library( - jit_kernel_mkl - SRCS mkl.cc - DEPS jit_kernel_base dynload_mklml) -set(JIT_KERNEL_DEPS - ${JIT_KERNEL_DEPS} dynload_mklml jit_kernel_mkl - PARENT_SCOPE) +collect_srcs(kernels_srcs SRCS mkl.cc) # use mkl kernels by name and type use_jitkernel_more(kMatMul, mkl) diff --git a/paddle/phi/kernels/funcs/jit/refer/CMakeLists.txt b/paddle/phi/kernels/funcs/jit/refer/CMakeLists.txt index 632dc98eb71..825ce47a601 100644 --- a/paddle/phi/kernels/funcs/jit/refer/CMakeLists.txt +++ b/paddle/phi/kernels/funcs/jit/refer/CMakeLists.txt @@ -1,10 +1,4 @@ -cc_library( - jit_kernel_refer - SRCS refer.cc - DEPS jit_kernel_base) -set(JIT_KERNEL_DEPS - ${JIT_KERNEL_DEPS} jit_kernel_refer - PARENT_SCOPE) +collect_srcs(kernels_srcs SRCS refer.cc) function(USE_JITKERNEL_REFER TARGET) file(APPEND ${jit_file} "USE_JITKERNEL_REFER(${TARGET});\n") diff --git a/paddle/phi/kernels/funcs/lapack/CMakeLists.txt b/paddle/phi/kernels/funcs/lapack/CMakeLists.txt index 1a53470b2e6..3321eddf41c 100644 --- a/paddle/phi/kernels/funcs/lapack/CMakeLists.txt +++ b/paddle/phi/kernels/funcs/lapack/CMakeLists.txt @@ -1 +1 @@ -math_library(lapack_function DEPS phi_dynload_lapack) +collect_srcs(kernels_srcs SRCS lapack_function.cc) diff --git a/paddle/phi/kernels/funcs/math_function.h b/paddle/phi/kernels/funcs/math_function.h index e2824cf4e26..b42714e80db 100644 --- a/paddle/phi/kernels/funcs/math_function.h +++ b/paddle/phi/kernels/funcs/math_function.h @@ -25,6 +25,7 @@ limitations under the License. */ namespace phi { namespace funcs { +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) template void BatchTranspose(T* output, const T* input, @@ -32,7 +33,7 @@ void BatchTranspose(T* output, int64_t m, int64_t n, const phi::GPUContext* dev_ctx); - +#endif template struct TransposeNormal { // for dims >= 7 situation diff --git a/paddle/phi/kernels/funcs/matrix_inverse.cu.cc b/paddle/phi/kernels/funcs/matrix_inverse.cu similarity index 100% rename from paddle/phi/kernels/funcs/matrix_inverse.cu.cc rename to paddle/phi/kernels/funcs/matrix_inverse.cu diff --git a/paddle/phi/kernels/fusion/cutlass/memory_efficient_attention.cu b/paddle/phi/kernels/fusion/cutlass/memory_efficient_attention.cu index 36578a361d7..8e564ff7dfc 100644 --- a/paddle/phi/kernels/fusion/cutlass/memory_efficient_attention.cu +++ b/paddle/phi/kernels/fusion/cutlass/memory_efficient_attention.cu @@ -12,17 +12,21 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/phi/kernels/fusion/cutlass/memory_efficient_attention/autogen/memory_efficient_attention.h" -#include "paddle/fluid/memory/malloc.h" -#include "paddle/fluid/platform/errors.h" +#include "glog/logging.h" + #include "paddle/phi/core/dense_tensor.h" +#include "paddle/phi/core/errors.h" #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/fusion/cutlass/memory_efficient_attention/autogen/memory_efficient_attention.h" +#include "paddle/phi/kernels/fusion/cutlass/memory_efficient_attention/gemm_kernel_utils.h" #include "paddle/phi/kernels/fusion/cutlass/memory_efficient_attention_utils.h" namespace phi { namespace fusion { namespace cutlass_internal { +using gemm_kernel_utils::getMaximumSharedMemoryPerBlockKb; + template void MemoryEfficientAttentionForwardKernel( const Context& ctx, @@ -124,9 +128,9 @@ void MemoryEfficientAttentionForwardKernel( VLOG(3) << "kAlignLSE" << kAlignLSE; typename KernelType::Params p; - p.query_ptr = SafeGetTensorPtr(query); - p.key_ptr = SafeGetTensorPtr(key); - p.value_ptr = SafeGetTensorPtr(value); + p.query_ptr = phi::SafeGetTensorPtr(query); + p.key_ptr = phi::SafeGetTensorPtr(key); + p.value_ptr = phi::SafeGetTensorPtr(value); p.logsumexp_ptr = is_test ? nullptr : logsumexp->data(); VLOG(3) << "logsumexp_ptr" << p.logsumexp_ptr; @@ -134,19 +138,19 @@ void MemoryEfficientAttentionForwardKernel( if (KernelType::kNeedsOutputAccumulatorBuffer) { out_accum.Resize(output->dims()); p.output_accum_ptr = - SafeAllocTensor( + phi::SafeAllocTensor( ctx, &out_accum); VLOG(3) << "output_accum_ptr " << p.output_accum_ptr; } else { p.output_accum_ptr = nullptr; } - p.output_ptr = - SafeAllocTensor(ctx, output); + p.output_ptr = phi::SafeAllocTensor( + ctx, output); VLOG(3) << "output_ptr " << p.output_ptr; if (cu_seqlens_q) { - p.seqstart_q_ptr = SafeGetTensorPtr(cu_seqlens_q); - p.seqstart_k_ptr = SafeGetTensorPtr(cu_seqlens_k); + p.seqstart_q_ptr = phi::SafeGetTensorPtr(cu_seqlens_q); + p.seqstart_k_ptr = phi::SafeGetTensorPtr(cu_seqlens_k); VLOG(3) << "seqstart_q_ptr " << p.seqstart_q_ptr; } else { p.seqstart_q_ptr = nullptr; @@ -164,7 +168,7 @@ void MemoryEfficientAttentionForwardKernel( cu_seqlens_q ? cu_seqlens_q.get().dims()[0] - 1 : q_dims[0]); p.causal = causal; if (causal_diagonal) { - p.causal_diagonal_ptr = SafeGetTensorPtr(causal_diagonal); + p.causal_diagonal_ptr = phi::SafeGetTensorPtr(causal_diagonal); } else { p.causal_diagonal_ptr = nullptr; } @@ -172,7 +176,7 @@ void MemoryEfficientAttentionForwardKernel( p.seqlen_k_ptr = nullptr; if (seqlen_k) { - p.seqlen_k_ptr = SafeGetTensorPtr(seqlen_k); + p.seqlen_k_ptr = phi::SafeGetTensorPtr(seqlen_k); } else { p.seqlen_k_ptr = nullptr; } @@ -197,7 +201,7 @@ void MemoryEfficientAttentionForwardKernel( PD_MEA_CHECK_OVERFLOW(p.o_strideM, DimStride(output->dims(), 1)); if (bias) { - p.attn_bias_ptr = SafeGetTensorPtr(bias); + p.attn_bias_ptr = phi::SafeGetTensorPtr(bias); PD_MEA_CHECK_OVERFLOW( p.bias_strideB, GetMemoryEfficientBiasStrideB(bias.get().dims(), q_dims, k_dims)); @@ -215,7 +219,8 @@ void MemoryEfficientAttentionForwardKernel( seed_dims[0] = 2; seed_and_offset->Resize(seed_dims); ctx.template HostAlloc(seed_and_offset); - int64_t* seed_and_offset_ptr = SafeGetTensorPtr(seed_and_offset); + int64_t* seed_and_offset_ptr = + phi::SafeGetTensorPtr(seed_and_offset); auto gen = ctx.GetGenerator(); uint64_t inc = query.dims()[0] * query.dims()[2] * 32; @@ -254,10 +259,10 @@ void MemoryEfficientAttentionForwardKernel( ctx.stream()>>>(p); }; dispatch_cutlass_forward(ctx, launchKernel); - PADDLE_ENFORCE_EQ(kernel_launched, - true, - paddle::platform::errors::InvalidArgument( - "the kernel should not be launched")); + PADDLE_ENFORCE_EQ( + kernel_launched, + true, + phi::errors::InvalidArgument("the kernel should not be launched")); } } // namespace cutlass_internal diff --git a/paddle/phi/kernels/fusion/cutlass/memory_efficient_attention_backward.cu b/paddle/phi/kernels/fusion/cutlass/memory_efficient_attention_backward.cu index 00d09cf00a8..2e16f9db347 100644 --- a/paddle/phi/kernels/fusion/cutlass/memory_efficient_attention_backward.cu +++ b/paddle/phi/kernels/fusion/cutlass/memory_efficient_attention_backward.cu @@ -15,16 +15,16 @@ #include "paddle/fluid/memory/malloc.h" #include "paddle/fluid/platform/errors.h" #include "paddle/phi/api/include/tensor_operants.h" +#include "paddle/phi/common/memory_utils.h" #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/fusion/cutlass/memory_efficient_attention/autogen/memory_efficient_attention.h" -#include "paddle/phi/kernels/fusion/cutlass/memory_efficient_attention_utils.h" - #include "paddle/phi/kernels/cast_kernel.h" #include "paddle/phi/kernels/cum_kernel.h" #include "paddle/phi/kernels/elementwise_add_kernel.h" #include "paddle/phi/kernels/elementwise_multiply_kernel.h" #include "paddle/phi/kernels/funcs/get_pad_lse.cu.h" +#include "paddle/phi/kernels/fusion/cutlass/memory_efficient_attention/autogen/memory_efficient_attention.h" +#include "paddle/phi/kernels/fusion/cutlass/memory_efficient_attention_utils.h" #include "paddle/phi/kernels/matmul_kernel.h" #include "paddle/phi/kernels/reduce_sum_kernel.h" #include "paddle/phi/kernels/reshape_kernel.h" @@ -34,6 +34,8 @@ namespace phi { namespace fusion { namespace cutlass_internal { +using gemm_kernel_utils::getMaximumSharedMemoryPerBlockKb; + template void MemoryEfficientAttentionBackwardKernel( const Context& ctx, @@ -387,9 +389,9 @@ void MemoryEfficientAttentionBackwardKernel( VLOG(3) << "delta has been set" << delta.data(); typename KernelType::Params p; - p.query_ptr = SafeGetTensorPtr(query); - p.key_ptr = SafeGetTensorPtr(key); - p.value_ptr = SafeGetTensorPtr(value); + p.query_ptr = phi::SafeGetTensorPtr(query); + p.key_ptr = phi::SafeGetTensorPtr(key); + p.value_ptr = phi::SafeGetTensorPtr(value); bool force_pad_inf = (compute_capacity == 75); const std::string data_format = "NCHW"; @@ -400,14 +402,14 @@ void MemoryEfficientAttentionBackwardKernel( 32, data_format, force_pad_inf); - p.logsumexp_ptr = SafeGetTensorPtr(padded_lse); + p.logsumexp_ptr = phi::SafeGetTensorPtr(padded_lse); VLOG(3) << "logsumexp_ptr" << p.logsumexp_ptr; - p.output_ptr = SafeGetTensorPtr(output); - p.grad_output_ptr = SafeGetTensorPtr(output_grad); - p.grad_query_ptr = SafeAllocTensor(ctx, query_grad); - p.grad_key_ptr = SafeAllocTensor(ctx, key_grad); - p.grad_value_ptr = SafeAllocTensor(ctx, value_grad); - p.delta_ptr = SafeGetTensorPtr(delta); + p.output_ptr = phi::SafeGetTensorPtr(output); + p.grad_output_ptr = phi::SafeGetTensorPtr(output_grad); + p.grad_query_ptr = phi::SafeAllocTensor(ctx, query_grad); + p.grad_key_ptr = phi::SafeAllocTensor(ctx, key_grad); + p.grad_value_ptr = phi::SafeAllocTensor(ctx, value_grad); + p.delta_ptr = phi::SafeGetTensorPtr(delta); PD_MEA_CHECK_OVERFLOW(p.head_dim, q_dims[3]); PD_MEA_CHECK_OVERFLOW(p.head_dim_value, v_dims[3]); @@ -427,8 +429,8 @@ void MemoryEfficientAttentionBackwardKernel( VLOG(3) << "p.scale" << p.scale; if (cu_seqlens_q) { - p.cu_seqlens_q_ptr = SafeGetTensorPtr(cu_seqlens_q); - p.cu_seqlens_k_ptr = SafeGetTensorPtr(cu_seqlens_k); + p.cu_seqlens_q_ptr = phi::SafeGetTensorPtr(cu_seqlens_q); + p.cu_seqlens_k_ptr = phi::SafeGetTensorPtr(cu_seqlens_k); VLOG(3) << "p.cu_seqlens_q_ptr" << p.cu_seqlens_q_ptr; } @@ -483,7 +485,7 @@ void MemoryEfficientAttentionBackwardKernel( PD_MEA_CHECK_OVERFLOW(p.delta_strideB, DimStride(delta.dims(), 0)); if (bias) { - p.bias_ptr = SafeGetTensorPtr(bias); + p.bias_ptr = phi::SafeGetTensorPtr(bias); PD_MEA_CHECK_OVERFLOW( p.bias_strideB, GetMemoryEfficientBiasStrideB(bias.get().dims(), q_dims, k_dims)); @@ -491,7 +493,8 @@ void MemoryEfficientAttentionBackwardKernel( PD_MEA_CHECK_OVERFLOW(p.bias_strideM, k_dims[1]); VLOG(3) << "p.bias_ptr" << p.bias_ptr; if (bias_grad) { - p.grad_bias_ptr = SafeAllocTensor(ctx, bias_grad); + p.grad_bias_ptr = + phi::SafeAllocTensor(ctx, bias_grad); PD_MEA_CHECK_OVERFLOW(p.gB_strideB, q_dims[2] * q_dims[1] * k_dims[1]); PD_MEA_CHECK_OVERFLOW(p.gB_strideH, q_dims[1] * k_dims[1]); PD_MEA_CHECK_OVERFLOW(p.gB_strideM, k_dims[1]); @@ -504,7 +507,8 @@ void MemoryEfficientAttentionBackwardKernel( p.grad_bias_ptr = nullptr; } if (dropout_p != 0) { - int64_t* seed_and_offset_ptr = SafeGetTensorPtr(seed_and_offset); + int64_t* seed_and_offset_ptr = + phi::SafeGetTensorPtr(seed_and_offset); p.seed = (uint64_t)seed_and_offset_ptr[0]; p.offset = (uint64_t)seed_and_offset_ptr[1]; p.dropout_prob = dropout_p; @@ -514,9 +518,9 @@ void MemoryEfficientAttentionBackwardKernel( } int64_t size_bytes = p.workspace_size(); - paddle::memory::AllocationPtr temp_workspace{nullptr}; + phi::Allocator::AllocationPtr temp_workspace{nullptr}; VLOG(3) << "size_bytes " << size_bytes; - temp_workspace = paddle::memory::Alloc( + temp_workspace = phi::memory_utils::Alloc( ctx.GetPlace(), size_bytes, phi::Stream(reinterpret_cast(ctx.stream()))); diff --git a/paddle/phi/kernels/gpu/eigvalsh_kernel.cu b/paddle/phi/kernels/gpu/eigvalsh_kernel.cu index a075dad6cdd..9671cc9f3e8 100644 --- a/paddle/phi/kernels/gpu/eigvalsh_kernel.cu +++ b/paddle/phi/kernels/gpu/eigvalsh_kernel.cu @@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#ifndef PADDLE_WITH_HIP + #include "paddle/phi/kernels/eigvalsh_kernel.h" #include "paddle/phi/backends/gpu/gpu_context.h" @@ -29,3 +31,5 @@ PD_REGISTER_KERNEL(eigvalsh, // cuda_only phi::dtype::complex) { kernel->InputAt(1).SetDataType(phi::dtype::ToReal(kernel_key.dtype())); } + +#endif // not PADDLE_WITH_HIP diff --git a/paddle/phi/kernels/gpu/gelu_funcs.h b/paddle/phi/kernels/gpu/gelu_funcs.h index a8f685c0ab0..06e789cc1b8 100644 --- a/paddle/phi/kernels/gpu/gelu_funcs.h +++ b/paddle/phi/kernels/gpu/gelu_funcs.h @@ -22,7 +22,7 @@ #include "paddle/phi/core/flags.h" #include "paddle/phi/kernels/funcs/aligned_vector.h" -DECLARE_bool(use_fast_math); +PHI_DECLARE_bool(use_fast_math); namespace phi { diff --git a/paddle/phi/kernels/impl/isclose_kernel_impl.h b/paddle/phi/kernels/impl/isclose_kernel_impl.h index 4d9d0cd7b86..de59cb0c32c 100644 --- a/paddle/phi/kernels/impl/isclose_kernel_impl.h +++ b/paddle/phi/kernels/impl/isclose_kernel_impl.h @@ -52,19 +52,6 @@ struct GetTensorValue { } }; -template -struct GetTensorValue { - T operator()(const phi::GPUContext& dev_ctx, - const DenseTensor& tensor) const { - const T* data = tensor.data(); - T value; - const auto gpu_place = dev_ctx.GetPlace(); - memory_utils::Copy( - phi::CPUPlace(), &value, gpu_place, data, sizeof(T), dev_ctx.stream()); - return value; - } -}; - template struct IscloseFunctor { void operator()(const phi::CPUContext& ctx, @@ -127,6 +114,19 @@ __global__ void IscloseCUDAKernel(const T* in_data, } } +template +struct GetTensorValue { + T operator()(const phi::GPUContext& dev_ctx, + const DenseTensor& tensor) const { + const T* data = tensor.data(); + T value; + const auto gpu_place = dev_ctx.GetPlace(); + memory_utils::Copy( + phi::CPUPlace(), &value, gpu_place, data, sizeof(T), dev_ctx.stream()); + return value; + } +}; + template struct IscloseFunctor { void operator()(const phi::GPUContext& dev_ctx, diff --git a/paddle/phi/kernels/impl/slice_grad_kernel_impl.h b/paddle/phi/kernels/impl/slice_grad_kernel_impl.h index 33ec8f8a841..ac2769e041e 100644 --- a/paddle/phi/kernels/impl/slice_grad_kernel_impl.h +++ b/paddle/phi/kernels/impl/slice_grad_kernel_impl.h @@ -30,7 +30,7 @@ void LaunchEigenPadding( const DDim& in_dims, const DenseTensor* d_out, const DDim& out_dims, - const Eigen::array, D>& paddings) { + const std::array, D>& paddings) { auto& place = *context.eigen_device(); auto d_in_t = EigenTensor::From( *d_input, in_dims); @@ -40,7 +40,7 @@ void LaunchEigenPadding( if (d_input->numel() <= Eigen::NumTraits::highest()) { // similar to tf.pad: // if element number less than INT_MAX, change the type of index to int - Eigen::array, D> paddings_32bit; + std::array, D> paddings_32bit; for (size_t i = 0; i < D; i++) { paddings_32bit[i] = std::make_pair(paddings[i].first, paddings[i].second); } @@ -63,7 +63,7 @@ void EigenPaddingCompute( const DDim& in_dims, const DenseTensor* d_out, const DDim& out_dims, - const Eigen::array, D>& paddings) { + const std::array, D>& paddings) { if (D <= 3) { // if dimension less than 3, cannot reduce dimension LaunchEigenPadding( @@ -97,7 +97,7 @@ void EigenPaddingCompute( // only last dimension need padding, // reshape the dimension of tensor in 2: [preceding, padding] std::vector in_tore_shape(2, 1), out_tore_shape(2, 1); - Eigen::array, 2> reshaped_padding; + std::array, 2> reshaped_padding; // first dimension is the accumulate of preceding dimension for (int i = 0; i < pad_dim; i++) { @@ -119,18 +119,18 @@ void EigenPaddingCompute( reshaped_padding[1].first = paddings[pad_dim].first; reshaped_padding[1].second = paddings[pad_dim].second; - LaunchEigenPadding(context, - d_input, - reshaped_in_dims, - d_out, - reshaped_out_dims, - reshaped_padding); + LaunchEigenPadding(context, + d_input, + reshaped_in_dims, + d_out, + reshaped_out_dims, + reshaped_padding); } else if (pad_dim == 0) { // only first dimension need padding, // reshape the dimension of tensor in 2: [padding, succeeding] // similar to (D - 1) std::vector in_tore_shape(2, 1), out_tore_shape(2, 1); - Eigen::array, 2> reshaped_padding; + std::array, 2> reshaped_padding; // first dimension is the padding dimension in_tore_shape[0] = in_dims[pad_dim]; @@ -163,7 +163,7 @@ void EigenPaddingCompute( // reshape the dimension of tensor in 3: // [preceding, padding, succeeding] std::vector in_tore_shape(3, 1), out_tore_shape(3, 1); - Eigen::array, 3> reshaped_padding; + std::array, 3> reshaped_padding; // first dimension is the accumulate of preceding dimension for (int i = 0; i < pad_dim; i++) { @@ -261,7 +261,7 @@ void SliceGradCompute(const Context& ctx, offsets[axis] = start; } - Eigen::array, D> paddings; + std::array, D> paddings; for (size_t i = 0; i < paddings.size(); ++i) { paddings[i].first = offsets[i]; paddings[i].second = (in_dims[i] - out_dims[i]) - offsets[i]; diff --git a/paddle/phi/kernels/transfer_layout_kernel.cc b/paddle/phi/kernels/transfer_layout_kernel.cc index 3dec79b13b5..84b978436e1 100644 --- a/paddle/phi/kernels/transfer_layout_kernel.cc +++ b/paddle/phi/kernels/transfer_layout_kernel.cc @@ -112,6 +112,7 @@ void TransferLayoutGeneral(const Context& dev_ctx, } } #endif + PD_VISIT_ALL_TYPES(x.dtype(), "CastDataLayout", ([&] { CastDataLayout(dev_ctx, x, axis, out); })); diff --git a/paddle/scripts/paddle_build.sh b/paddle/scripts/paddle_build.sh index 697b74c39a4..2dd6ddd550a 100644 --- a/paddle/scripts/paddle_build.sh +++ b/paddle/scripts/paddle_build.sh @@ -3568,6 +3568,7 @@ function run_setup_mac(){ if [ -d "/Library/Frameworks/Python.framework/Versions/3.7" ]; then export LD_LIBRARY_PATH=/Library/Frameworks/Python.framework/Versions/3.7/lib/ export DYLD_LIBRARY_PATH=${DYLD_LIBRARY_PATH}:/Library/Frameworks/Python.framework/Versions/3.7/lib/ + export DYLD_LIBRARY_PATH=${DYLD_LIBRARY_PATH}:${PADDLE_ROOT}/build/third_party/install/lapack/lib export PATH=/Library/Frameworks/Python.framework/Versions/3.7/bin/:${PATH} #after changing "PYTHON_LIBRARY:FILEPATH" to "PYTHON_LIBRARY" ,we can use export export PYTHON_EXECUTABLE=/Library/Frameworks/Python.framework/Versions/3.7/bin/python3 @@ -3581,6 +3582,7 @@ function run_setup_mac(){ if [ -d "/Library/Frameworks/Python.framework/Versions/3.8" ]; then export LD_LIBRARY_PATH=/Library/Frameworks/Python.framework/Versions/3.8/lib/ export DYLD_LIBRARY_PATH=${DYLD_LIBRARY_PATH}:/Library/Frameworks/Python.framework/Versions/3.8/lib/ + export DYLD_LIBRARY_PATH=${DYLD_LIBRARY_PATH}:${PADDLE_ROOT}/build/third_party/install/lapack/lib export PATH=/Library/Frameworks/Python.framework/Versions/3.8/bin/:${PATH} #after changing "PYTHON_LIBRARY:FILEPATH" to "PYTHON_LIBRARY" ,we can use export export PYTHON_EXECUTABLE=/Library/Frameworks/Python.framework/Versions/3.8/bin/python3 @@ -3594,6 +3596,7 @@ function run_setup_mac(){ if [ -d "/Library/Frameworks/Python.framework/Versions/3.9" ]; then export LD_LIBRARY_PATH=/Library/Frameworks/Python.framework/Versions/3.9/lib/ export DYLD_LIBRARY_PATH=${DYLD_LIBRARY_PATH}:/Library/Frameworks/Python.framework/Versions/3.9/lib/ + export DYLD_LIBRARY_PATH=${DYLD_LIBRARY_PATH}:${PADDLE_ROOT}/build/third_party/install/lapack/lib export PATH=/Library/Frameworks/Python.framework/Versions/3.9/bin/:${PATH} #after changing "PYTHON_LIBRARY:FILEPATH" to "PYTHON_LIBRARY" ,we can use export export PYTHON_EXECUTABLE=/Library/Frameworks/Python.framework/Versions/3.9/bin/python3 @@ -3607,6 +3610,7 @@ function run_setup_mac(){ if [ -d "/Library/Frameworks/Python.framework/Versions/3.10" ]; then export LD_LIBRARY_PATH=/Library/Frameworks/Python.framework/Versions/3.10/lib/ export DYLD_LIBRARY_PATH=${DYLD_LIBRARY_PATH}:/Library/Frameworks/Python.framework/Versions/3.10/lib/ + export DYLD_LIBRARY_PATH=${DYLD_LIBRARY_PATH}:${PADDLE_ROOT}/build/third_party/install/lapack/lib export PATH=/Library/Frameworks/Python.framework/Versions/3.10/bin/:${PATH} #after changing "PYTHON_LIBRARY:FILEPATH" to "PYTHON_LIBRARY" ,we can use export export PYTHON_EXECUTABLE=/Library/Frameworks/Python.framework/Versions/3.10/bin/python3 diff --git a/paddle/testing/CMakeLists.txt b/paddle/testing/CMakeLists.txt index 2c345473193..4c5f3049f23 100644 --- a/paddle/testing/CMakeLists.txt +++ b/paddle/testing/CMakeLists.txt @@ -4,7 +4,7 @@ if(WITH_TESTING) set(paddle_gtest_main_deps device_context gtest - gflags + phi init memory phi_utils diff --git a/paddle/utils/CMakeLists.txt b/paddle/utils/CMakeLists.txt index e8de5c30bbb..e7d5282c2f2 100644 --- a/paddle/utils/CMakeLists.txt +++ b/paddle/utils/CMakeLists.txt @@ -3,11 +3,11 @@ add_subdirectory(string) cc_test( array_ref_test SRCS array_ref_test.cc - DEPS gtest gflags) + DEPS gtest phi) cc_test( small_vector_test SRCS small_vector_test.cc - DEPS gtest gflags) + DEPS gtest phi) cc_test( variant_test SRCS variant_test.cc @@ -17,5 +17,5 @@ if(NOT ((NOT WITH_PYTHON) AND ON_INFER)) cc_library( pybind_util SRCS pybind.cc - DEPS phi_tensor_raw flags) + DEPS phi) endif() diff --git a/paddle/utils/string/CMakeLists.txt b/paddle/utils/string/CMakeLists.txt index 89b95385eb1..ddfc8f96b2e 100644 --- a/paddle/utils/string/CMakeLists.txt +++ b/paddle/utils/string/CMakeLists.txt @@ -1,15 +1,15 @@ cc_library( pretty_log SRCS pretty_log.cc - DEPS flags) + DEPS phi) cc_library( string_helper SRCS string_helper.cc - DEPS flags) + DEPS phi) cc_test( stringprintf_test SRCS printf_test.cc - DEPS gflags) + DEPS phi) cc_test(to_string_test SRCS to_string_test.cc) cc_test(split_test SRCS split_test.cc) cc_test( diff --git a/python/env_dict.py.in b/python/env_dict.py.in index d8ae5f9144b..f72ea07c602 100644 --- a/python/env_dict.py.in +++ b/python/env_dict.py.in @@ -10,6 +10,9 @@ env_dict={ 'CUDA_VERSION':'@CUDA_VERSION@', 'WITH_PSLI':'@WITH_PSLI@', 'FLUID_CORE_NAME':'@FLUID_CORE_NAME@', + 'PHI_LIB':'@PHI_LIB@', + 'PHI_NAME':'@PHI_NAME@', + 'WITH_PHI_SHARED':'@WITH_PHI_SHARED@', 'WARPCTC_LIBRARIES':'@WARPCTC_LIBRARIES@', 'WARPRNNT_LIBRARIES':'@WARPRNNT_LIBRARIES@', 'FLASHATTN_LIBRARIES':'@FLASHATTN_LIBRARIES@', diff --git a/python/paddle/fluid/tests/unittests/CMakeLists.txt b/python/paddle/fluid/tests/unittests/CMakeLists.txt index d4c50707cbe..b7bd5d5fa0e 100755 --- a/python/paddle/fluid/tests/unittests/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/CMakeLists.txt @@ -1134,14 +1134,6 @@ foreach(TEST_CINN_OPS ${TEST_CINN_OPS}) endforeach() if(WITH_CINN AND WITH_TESTING) - set_tests_properties( - test_resnet50_with_cinn - PROPERTIES - LABELS - "RUN_TYPE=CINN" - ENVIRONMENT - FLAGS_allow_cinn_ops="conv2d;conv2d_grad;elementwise_add;elementwise_add_grad;relu;relu_grad;sum" - ) set_tests_properties( test_parallel_executor_run_cinn PROPERTIES diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_run_cinn.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_run_cinn.py index c598073f434..2ca34842f0b 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_executor_run_cinn.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_run_cinn.py @@ -123,9 +123,9 @@ class TestParallelExecutorRunCinn(unittest.TestCase): shutil.rmtree(self.tmpdir) def test_run_with_cinn(self): - cinn_losses = train(self.tmpdir, "paddle") + cinn_losses = np.array(train(self.tmpdir, "paddle")).flatten() set_cinn_flag(False) - pd_losses = train(self.tmpdir, "cinn") + pd_losses = np.array(train(self.tmpdir, "cinn")).flatten() np.testing.assert_allclose( cinn_losses, pd_losses, rtol=1e-05, atol=1e-05 ) diff --git a/python/paddle/fluid/tests/unittests/test_resnet50_with_cinn.py b/python/paddle/fluid/tests/unittests/test_resnet50_with_cinn.py deleted file mode 100644 index d262319eee8..00000000000 --- a/python/paddle/fluid/tests/unittests/test_resnet50_with_cinn.py +++ /dev/null @@ -1,149 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import logging -import unittest - -import numpy as np - -import paddle -from paddle.fluid import core - -paddle.enable_static() - -logging.basicConfig( - format='%(asctime)s - %(levelname)s - %(message)s', level=logging.INFO -) -logger = logging.getLogger(__name__) - - -def set_cinn_flag(val): - cinn_compiled = False - try: - paddle.set_flags({'FLAGS_use_cinn': val}) - cinn_compiled = True - except ValueError: - logger.warning("The used paddle is not compiled with CINN.") - return cinn_compiled - - -@unittest.skipIf(not set_cinn_flag(True), "Paddle is not compiled with CINN.") -class TestResnet50Accuracy(unittest.TestCase): - def reader(self, limit): - for _ in range(limit): - yield { - 'image': np.random.randint( - 0, 256, size=[32, 3, 224, 224] - ).astype('float32'), - 'label': np.random.randint(0, 1000, size=[32]).astype('int64'), - } - - def generate_random_data(self, loop_num=10): - feed = [] - data = self.reader(loop_num) - for _ in range(loop_num): - feed.append(next(data)) - return feed - - def build_program(self, main_program, startup_program): - with paddle.static.program_guard(main_program, startup_program): - image = paddle.static.data( - name='image', shape=[32, 3, 224, 224], dtype='float32' - ) - label = paddle.static.data(name='label', shape=[32], dtype='int64') - - # TODO: stop_gradient slower training speed, need fix - image.stop_gradient = False - - model = paddle.vision.models.resnet50() - prediction = model(image) - - loss = paddle.nn.functional.cross_entropy( - input=prediction, label=label - ) - loss = paddle.mean(loss) - adam = paddle.optimizer.Adam(learning_rate=0.001) - adam.minimize(loss) - return loss - - def train(self, place, iters, feed, use_cinn=False, seed=1234): - np.random.seed(seed) - paddle.seed(seed) - if paddle.is_compiled_with_cuda(): - paddle.set_flags({'FLAGS_cudnn_deterministic': 1}) - set_cinn_flag(use_cinn) - - startup_program = paddle.static.Program() - main_program = paddle.static.Program() - - loss = self.build_program(main_program, startup_program) - exe = paddle.static.Executor(place) - - compiled_prog = paddle.static.CompiledProgram(main_program) - loss_vals = [] - scope = paddle.static.Scope() - - with paddle.static.scope_guard(scope): - exe.run(startup_program) - for step in range(iters): - loss_v = exe.run( - compiled_prog, - feed=feed[step], - fetch_list=[loss], - return_numpy=True, - ) - loss_vals.append(loss_v[0]) - return loss_vals - - def test_check_resnet50_accuracy(self): - place = ( - paddle.CUDAPlace(0) - if paddle.is_compiled_with_cuda() - else paddle.CPUPlace() - ) - - loop_num = 10 - feed = self.generate_random_data(loop_num) - - loss_c = self.train(place, loop_num, feed, use_cinn=True) - loss_p = self.train(place, loop_num, feed, use_cinn=False) - print("Losses of CINN:") - print(loss_c) - print("Losses of Paddle") - print(loss_p) - np.testing.assert_allclose(loss_c, loss_p, rtol=1e-05, atol=1e-05) - - def test_check_resnet50_accuracy_with_composite(self): - place = ( - paddle.CUDAPlace(0) - if paddle.is_compiled_with_cuda() - else paddle.CPUPlace() - ) - - loop_num = 10 - feed = self.generate_random_data(loop_num) - core._set_prim_backward_enabled(True) - core._add_skip_comp_ops("batch_norm") - loss_c = self.train(place, loop_num, feed, use_cinn=True) - core._set_prim_backward_enabled(False) - loss_p = self.train(place, loop_num, feed, use_cinn=True) - print("Losses of Composite + CINN:") - print(loss_c) - print("Losses of CINN: ") - print(loss_p) - np.testing.assert_allclose(loss_c, loss_p, rtol=1e-05, atol=1e-05) - - -if __name__ == '__main__': - unittest.main() diff --git a/python/setup.py.in b/python/setup.py.in index 89acf5fe09d..9a6517a7d55 100644 --- a/python/setup.py.in +++ b/python/setup.py.in @@ -561,7 +561,11 @@ package_dir={ libs_path='${PADDLE_BINARY_DIR}/python/paddle/libs' package_data['paddle.libs']= [] -package_data['paddle.libs']=[ +if('${WITH_PHI_SHARED}' == 'ON'): + package_data['paddle.libs'] = [('libphi' if os.name != 'nt' else 'phi') + ext_name] + shutil.copy('${PHI_LIB}', libs_path) + +package_data['paddle.libs']+=[ ('libwarpctc' if os.name != 'nt' else 'warpctc') + ext_name, ('libwarprnnt' if os.name != 'nt' else 'warprnnt') + ext_name, ] @@ -722,8 +726,14 @@ if '${CMAKE_BUILD_TYPE}' == 'Release': if "@APPLE@" == "1": commands = ["install_name_tool -id '@loader_path/../libs/' ${PADDLE_BINARY_DIR}/python/paddle/fluid/${FLUID_CORE_NAME}" + '.so'] commands.append("install_name_tool -add_rpath '@loader_path/../libs/' ${PADDLE_BINARY_DIR}/python/paddle/fluid/${FLUID_CORE_NAME}" + '.so') + if('${WITH_PHI_SHARED}' == 'ON'): + # change rpath of phi.ext for loading 3rd party libb + commands.append("install_name_tool -add_rpath '@loader_path' ${PADDLE_BINARY_DIR}/python/paddle/libs/${PHI_NAME}") else: commands = ["patchelf --set-rpath '$ORIGIN/../libs/' ${PADDLE_BINARY_DIR}/python/paddle/fluid/${FLUID_CORE_NAME}" + '.so'] + if('${WITH_PHI_SHARED}' == 'ON'): + # change rpath of phi.ext for loading 3rd party lib + commands.append("patchelf --set-rpath '$ORIGIN' ${PADDLE_BINARY_DIR}/python/paddle/libs/${PHI_NAME}") # The sw_64 not suppot patchelf, so we just disable that. if platform.machine() != 'sw_64' and platform.machine() != 'mips64': for command in commands: diff --git a/setup.py b/setup.py index e10308e1dff..f8858321ae6 100644 --- a/setup.py +++ b/setup.py @@ -966,7 +966,14 @@ def get_package_data_and_package_dir(): # put all thirdparty libraries in paddle.libs libs_path = paddle_binary_dir + '/python/paddle/libs' package_data['paddle.libs'] = [] - package_data['paddle.libs'] = [ + + if env_dict.get("WITH_PHI_SHARED") == "ON": + package_data['paddle.libs'] = [ + ('libphi' if os.name != 'nt' else 'phi') + ext_suffix + ] + shutil.copy(env_dict.get("PHI_LIB"), libs_path) + + package_data['paddle.libs'] += [ ('libwarpctc' if os.name != 'nt' else 'warpctc') + ext_suffix, ('libwarprnnt' if os.name != 'nt' else 'warprnnt') + ext_suffix, ] @@ -1204,6 +1211,13 @@ def get_package_data_and_package_dir(): + env_dict.get("FLUID_CORE_NAME") + '.so' ) + if env_dict.get("WITH_PHI_SHARED") == "ON": + commands.append( + "install_name_tool -add_rpath '@loader_path' " + + env_dict.get("PADDLE_BINARY_DIR") + + '/python/paddle/libs/' + + env_dict.get("PHI_NAME") + ) else: commands = [ "patchelf --set-rpath '$ORIGIN/../libs/' " @@ -1212,6 +1226,13 @@ def get_package_data_and_package_dir(): + env_dict.get("FLUID_CORE_NAME") + '.so' ] + if env_dict.get("WITH_PHI_SHARED") == "ON": + commands.append( + "patchelf --set-rpath '$ORIGIN' " + + env_dict.get("PADDLE_BINARY_DIR") + + '/python/paddle/libs/' + + env_dict.get("PHI_NAME") + ) # The sw_64 not suppot patchelf, so we just disable that. if platform.machine() != 'sw_64' and platform.machine() != 'mips64': for command in commands: diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 258ea9025dd..975446b6002 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -168,6 +168,7 @@ if(${len} GREATER_EQUAL 1) add_executable(${test_name} ${test_src}) target_link_libraries(${test_name} paddle_gtest_main_new) target_link_libraries(${test_name} $) + target_link_libraries(${test_name} $) add_dependencies(${test_name} ${paddle_lib} paddle_gtest_main_new) if(WITH_GPU) target_link_libraries(${test_name} ${CUDA_CUDART_LIBRARY} @@ -177,8 +178,10 @@ if(${len} GREATER_EQUAL 1) target_link_libraries(${test_name} ${ROCM_HIPRTC_LIB}) endif() if(APPLE) - target_link_libraries(${test_name} - "-Wl,-rpath,$") + target_link_libraries( + ${test_name} + "-Wl,-rpath,$ -Wl,-rpath,$" + ) endif() if(NOT ((NOT WITH_PYTHON) AND ON_INFER)) target_link_libraries(${test_name} ${PYTHON_LIBRARIES}) diff --git a/test/cpp/eager/CMakeLists.txt b/test/cpp/eager/CMakeLists.txt index d8d3a6304cf..7747a42d250 100644 --- a/test/cpp/eager/CMakeLists.txt +++ b/test/cpp/eager/CMakeLists.txt @@ -1,12 +1,10 @@ set(eager_deps - phi_api - phi_dygraph_api + phi hook_utils tensor_utils utils global_utils backward - phi_tensor tracer layer autograd_meta diff --git a/test/cpp/fluid/CMakeLists.txt b/test/cpp/fluid/CMakeLists.txt index 590816d1b5e..b7936fadaad 100644 --- a/test/cpp/fluid/CMakeLists.txt +++ b/test/cpp/fluid/CMakeLists.txt @@ -42,7 +42,7 @@ cc_test( test_common_infer_shape_functions SRCS test_common_infer_shape_functions.cc DEPS common_infer_shape_functions ${COMMON_OP_DEPS} activation_op - elementwise_add_op softmax generated_static_op) + elementwise_add_op phi generated_static_op) cc_test( gather_test SRCS gather_test.cc @@ -54,7 +54,7 @@ cc_test( cc_test( scatter_test SRCS scatter_test.cc - DEPS tensor math_function) + DEPS tensor phi) cc_test( beam_search_decode_op_test SRCS beam_search_decode_op_test.cc @@ -72,7 +72,7 @@ if(WITH_GPU) nv_test( dropout_op_test SRCS dropout_op_test.cc - DEPS dropout_op tensor generator) + DEPS dropout_op tensor phi) nv_test( test_leaky_relu_grad_grad_functor SRCS test_leaky_relu_grad_grad_functor.cc @@ -81,12 +81,12 @@ if(WITH_GPU) nv_test( feed_forward_test SRCS feed_forward_test.cu - DEPS elementwise_add_op matmul_op tensor generator) + DEPS elementwise_add_op matmul_op tensor phi) elseif(WITH_ROCM) hip_test( dropout_op_test SRCS dropout_op_test.cc - DEPS dropout_op tensor generator) + DEPS dropout_op tensor phi) hip_test( test_leaky_relu_grad_grad_functor SRCS test_leaky_relu_grad_grad_functor.cc diff --git a/test/cpp/fluid/benchmark/CMakeLists.txt b/test/cpp/fluid/benchmark/CMakeLists.txt index a5a799d71da..9111dfe2ff3 100644 --- a/test/cpp/fluid/benchmark/CMakeLists.txt +++ b/test/cpp/fluid/benchmark/CMakeLists.txt @@ -11,7 +11,7 @@ cc_test( scope ${GLOB_OP_LIB} ${GLOB_OPERATOR_DEPS} - eigen_function) + phi) if(WITH_ONNXRUNTIME AND WIN32) # Copy onnxruntime for some c++ test in Windows, since the test will diff --git a/test/cpp/fluid/cinn/CMakeLists.txt b/test/cpp/fluid/cinn/CMakeLists.txt index f396d1c58cc..2553457a9e1 100644 --- a/test/cpp/fluid/cinn/CMakeLists.txt +++ b/test/cpp/fluid/cinn/CMakeLists.txt @@ -1,46 +1,49 @@ -cc_test_old( - cinn_launch_context_test - SRCS - cinn_launch_context_test.cc - DEPS - ddim - lod_tensor - scope - proto_desc - graph - cinn_launch_context - cinn_instruction_run_op - cinn) -target_link_libraries(cinn_launch_context_test ${PYTHON_LIBRARIES}) -set_tests_properties(cinn_launch_context_test PROPERTIES LABELS "RUN_TYPE=CINN") +if(WITH_TESTING) + cc_test_old( + cinn_launch_context_test + SRCS + cinn_launch_context_test.cc + DEPS + phi + lod_tensor + scope + proto_desc + graph + cinn_launch_context + cinn_instruction_run_op + cinn) + target_link_libraries(cinn_launch_context_test ${PYTHON_LIBRARIES}) + set_tests_properties(cinn_launch_context_test PROPERTIES LABELS + "RUN_TYPE=CINN") -set(CINN_RUN_ENVIRONMENT - "OMP_NUM_THREADS=1;runtime_include_dir=${PADDLE_BINARY_DIR}/third_party/CINN/src/external_cinn/cinn/runtime/cuda" -) -# cc_test_old( -# cinn_launch_op_test -# SRCS -# cinn_launch_op_test.cc -# DEPS -# cinn_compiler -# cinn_launch_op -# cinn_instruction_run_op -# elementwise_add_op -# gflags) -# set_tests_properties( -# cinn_launch_op_test PROPERTIES LABELS "RUN_TYPE=CINN" ENVIRONMENT -# "${CINN_RUN_ENVIRONMENT}") + set(CINN_RUN_ENVIRONMENT + "OMP_NUM_THREADS=1;runtime_include_dir=${PADDLE_BINARY_DIR}/third_party/CINN/src/external_cinn/cinn/runtime/cuda" + ) + # cc_test_old( + # cinn_launch_op_test + # SRCS + # cinn_launch_op_test.cc + # DEPS + # cinn_compiler + # cinn_launch_op + # cinn_instruction_run_op + # elementwise_add_op + # gflags) + # set_tests_properties( + # cinn_launch_op_test PROPERTIES LABELS "RUN_TYPE=CINN" ENVIRONMENT + # "${CINN_RUN_ENVIRONMENT}") -cc_test_old( - cinn_instruction_run_op_test - SRCS - cinn_instruction_run_op_test.cc - DEPS - cinn_compiler - cinn_launch_op - cinn_instruction_run_op - elementwise_add_op) -target_link_libraries(cinn_instruction_run_op_test ${PYTHON_LIBRARIES}) -set_tests_properties( - cinn_instruction_run_op_test PROPERTIES LABELS "RUN_TYPE=CINN" ENVIRONMENT - "${CINN_RUN_ENVIRONMENT}") + cc_test_old( + cinn_instruction_run_op_test + SRCS + cinn_instruction_run_op_test.cc + DEPS + cinn_compiler + cinn_launch_op + cinn_instruction_run_op + elementwise_add_op) + target_link_libraries(cinn_instruction_run_op_test ${PYTHON_LIBRARIES}) + set_tests_properties( + cinn_instruction_run_op_test PROPERTIES LABELS "RUN_TYPE=CINN" ENVIRONMENT + "${CINN_RUN_ENVIRONMENT}") +endif() diff --git a/test/cpp/fluid/fused/CMakeLists.txt b/test/cpp/fluid/fused/CMakeLists.txt index ff239f2f0c6..6529e13c90c 100644 --- a/test/cpp/fluid/fused/CMakeLists.txt +++ b/test/cpp/fluid/fused/CMakeLists.txt @@ -15,7 +15,7 @@ if(WITH_GPU OR WITH_ROCM) dropout_op generated_op device_context - generator + phi memory) nv_test( test_fused_dropout_act_bias @@ -25,7 +25,7 @@ if(WITH_GPU OR WITH_ROCM) dropout_op generated_op device_context - generator + phi memory) nv_test( test_fused_layernorm_residual_dropout_bias @@ -35,7 +35,7 @@ if(WITH_GPU OR WITH_ROCM) dropout_op generated_op device_context - generator + phi memory) endif() # resnet_unit needs cudnn 8.0 above @@ -44,15 +44,11 @@ if(WITH_GPU OR WITH_ROCM) test_cudnn_norm_conv SRCS cudnn_norm_conv_test.cc DEPS conv_op - blas - im2col - vol2col depthwise_conv - eigen_function tensor op_registry device_context - generator + phi memory) cc_test( test_cudnn_bn_add_relu @@ -62,7 +58,7 @@ if(WITH_GPU OR WITH_ROCM) tensor op_registry device_context - generator + phi memory) endif() endif() diff --git a/test/cpp/fluid/math/CMakeLists.txt b/test/cpp/fluid/math/CMakeLists.txt index cbe53b0828c..1edc2f25e68 100644 --- a/test/cpp/fluid/math/CMakeLists.txt +++ b/test/cpp/fluid/math/CMakeLists.txt @@ -1,15 +1,15 @@ cc_test( selected_rows_functor_test SRCS selected_rows_functor_test.cc - DEPS allocator selected_rows_functor) + DEPS allocator phi) cc_test( im2col_test SRCS im2col_test.cc - DEPS im2col) + DEPS phi) cc_test( vol2col_test SRCS vol2col_test.cc - DEPS vol2col) + DEPS phi) cc_test( beam_search_test SRCS beam_search_test.cc @@ -18,13 +18,13 @@ if(WITH_GPU) nv_test( selected_rows_functor_gpu_test SRCS selected_rows_functor_test.cu.cc - DEPS selected_rows_functor math_function) + DEPS phi) endif() if(WITH_ROCM) hip_test( selected_rows_functor_gpu_test SRCS selected_rows_functor_test.cu.cc - DEPS selected_rows_functor math_function) + DEPS phi) endif() cc_test( concat_test diff --git a/test/cpp/fluid/mkldnn/CMakeLists.txt b/test/cpp/fluid/mkldnn/CMakeLists.txt index dae56ea5eb6..d08e30b346a 100644 --- a/test/cpp/fluid/mkldnn/CMakeLists.txt +++ b/test/cpp/fluid/mkldnn/CMakeLists.txt @@ -4,7 +4,7 @@ cc_test( DEPS op_registry elementwise_add_op activation_op - softmax + phi scope device_context enforce @@ -17,9 +17,7 @@ set(TEST_MKLDNN_CACHING_DEPS elementwise_add_op activation_op conv_op - im2col - vol2col - softmax + phi scope device_context enforce @@ -44,7 +42,7 @@ cc_test_old( crop_op activation_op generated_op - pooling + phi transpose_op fused_transpose_op scope diff --git a/test/cpp/fluid/pscore/CMakeLists.txt b/test/cpp/fluid/pscore/CMakeLists.txt index c195c6c7975..c19df6b4696 100644 --- a/test/cpp/fluid/pscore/CMakeLists.txt +++ b/test/cpp/fluid/pscore/CMakeLists.txt @@ -68,7 +68,7 @@ cc_test_old( scope proto_desc generated_op - eigen_function) + phi) set_source_files_properties( send_and_recv_op_cpu_test.cc PROPERTIES COMPILE_FLAGS @@ -85,7 +85,7 @@ cc_test_old( send_and_recv_op ${RPC_DEPS} ${DISTRIBUTE_DEPS} - eigen_function) + phi) set_source_files_properties( send_and_recv_op_gpu_test.cc PROPERTIES COMPILE_FLAGS @@ -102,7 +102,7 @@ cc_test_old( send_and_recv_op ${RPC_DEPS} ${DISTRIBUTE_DEPS} - eigen_function) + phi) set_source_files_properties( heter_listen_and_server_test.cc PROPERTIES COMPILE_FLAGS @@ -119,10 +119,10 @@ cc_test_old( heter_listen_and_serv_op ${RPC_DEPS} ${DISTRIBUTE_DEPS} - eigen_function) + phi) #set_source_files_properties(heter_cloud_comm_cpu_test.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) -#cc_test(heter_cloud_comm_cpu_test SRCS heter_cloud_comm_cpu_test.cc DEPS executor scope proto_desc generated_static_op heter_listen_and_serv_op ${RPC_DEPS} ${DISTRIBUTE_DEPS} eigen_function) +#cc_test(heter_cloud_comm_cpu_test SRCS heter_cloud_comm_cpu_test.cc DEPS executor scope proto_desc generated_static_op heter_listen_and_serv_op ${RPC_DEPS} ${DISTRIBUTE_DEPS} phi) set_source_files_properties( switch_server_test.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) @@ -138,4 +138,4 @@ cc_binary( heter_listen_and_serv_op ${RPC_DEPS} ${DISTRIBUTE_DEPS} - eigen_function) + phi) diff --git a/test/cpp/imperative/CMakeLists.txt b/test/cpp/imperative/CMakeLists.txt index acecb4fe010..c0a103603ce 100644 --- a/test/cpp/imperative/CMakeLists.txt +++ b/test/cpp/imperative/CMakeLists.txt @@ -33,14 +33,7 @@ endif() cc_test( test_gradient_accmulator SRCS test_gradient_accmulator.cc - DEPS memcpy - selected_rows_utils - selected_rows_functor - gradient_accumulator - math_function - phi_tensor - phi_api - phi_utils) + DEPS memcpy selected_rows_utils gradient_accumulator phi phi_utils) cc_test( test_layer SRCS test_layer.cc diff --git a/test/cpp/imperative/test_hooks.cc b/test/cpp/imperative/test_hooks.cc index 005ac3d3168..8f28ebaf294 100644 --- a/test/cpp/imperative/test_hooks.cc +++ b/test/cpp/imperative/test_hooks.cc @@ -24,6 +24,7 @@ #include "paddle/fluid/imperative/hooks.h" #include "paddle/fluid/imperative/tracer.h" #include "paddle/fluid/memory/memcpy.h" +#include "paddle/phi/core/flags.h" #include "paddle/phi/core/kernel_registry.h" PD_DECLARE_KERNEL(add, CPU, ALL_LAYOUT); @@ -35,7 +36,7 @@ namespace platform = paddle::platform; namespace framework = paddle::framework; namespace memory = paddle::memory; -DECLARE_bool(sort_sum_gradient); +PHI_DECLARE_bool(sort_sum_gradient); namespace paddle { namespace imperative { diff --git a/test/cpp/inference/infer_ut/CMakeLists.txt b/test/cpp/inference/infer_ut/CMakeLists.txt index e27cf7ffe1d..a1e39ca142d 100644 --- a/test/cpp/inference/infer_ut/CMakeLists.txt +++ b/test/cpp/inference/infer_ut/CMakeLists.txt @@ -224,7 +224,7 @@ if(NOT WIN32) ${MATH_LIB} ${MKLDNN_LIB} glog - gflags + phi protobuf xxhash cryptopp @@ -235,7 +235,7 @@ else() ${MATH_LIB} ${MKLDNN_LIB} glog - gflags_static + phi libprotobuf xxhash cryptopp-static diff --git a/test/cpp/jit/CMakeLists.txt b/test/cpp/jit/CMakeLists.txt index c5490c7aa4b..708f48bbf49 100644 --- a/test/cpp/jit/CMakeLists.txt +++ b/test/cpp/jit/CMakeLists.txt @@ -8,7 +8,6 @@ if(WITH_TESTING AND NOT WIN32) WORKING_DIRECTORY "${CC_TESTS_DIR}") set(JIT_DEPS phi - phi_api elementwise_add_op matmul_v2_op activation_op diff --git a/test/cpp/new_executor/CMakeLists.txt b/test/cpp/new_executor/CMakeLists.txt index 11e4e9a84e1..30af210725c 100644 --- a/test/cpp/new_executor/CMakeLists.txt +++ b/test/cpp/new_executor/CMakeLists.txt @@ -37,8 +37,7 @@ if(WITH_GPU fetch_v2_op) # All deps of the operators above, part of GLOB_OPERATOR_DEPS. - set(OP_DEPS generator softmax selected_rows_functor jit_kernel_helper - concat_and_split cross_entropy) + set(OP_DEPS phi concat_and_split cross_entropy) cc_test(standalone_executor_test SRCS standalone_executor_test.cc) # add_dependencies(standalone_executor_test download_program) diff --git a/test/cpp/phi/api/CMakeLists.txt b/test/cpp/phi/api/CMakeLists.txt index c2898a2fde2..fd06e6d460d 100644 --- a/test/cpp/phi/api/CMakeLists.txt +++ b/test/cpp/phi/api/CMakeLists.txt @@ -1,48 +1,48 @@ -set(COMMON_API_TEST_DEPS phi_tensor phi_api api_tensor_utils) +set(COMMON_API_TEST_DEPS phi) if(WITH_GPU) nv_test( test_phi_tensor SRCS test_phi_tensor.cc - DEPS glog selected_rows ${COMMON_API_TEST_DEPS}) + DEPS glog ${COMMON_API_TEST_DEPS}) nv_test( test_allocator SRCS test_allocator.cu - DEPS place device_context context_pool) + DEPS place device_context phi) nv_test( test_cuda_stream SRCS test_cuda_stream.cu - DEPS context_pool) + DEPS phi) nv_test( test_from_blob SRCS test_from_blob.cc - DEPS phi_backends ${COMMON_API_TEST_DEPS}) + DEPS ${COMMON_API_TEST_DEPS}) elseif(WITH_ROCM) hip_test( test_phi_tensor SRCS test_phi_tensor.cc - DEPS glog selected_rows ${COMMON_API_TEST_DEPS}) + DEPS glog ${COMMON_API_TEST_DEPS}) hip_test( test_allocator SRCS test_allocator.cu - DEPS place device_context context_pool) + DEPS place device_context phi) hip_test( test_cuda_stream SRCS test_cuda_stream.cu - DEPS context_pool) + DEPS phi) hip_test( test_from_blob SRCS test_from_blob.cc - DEPS phi_backends ${COMMON_API_TEST_DEPS}) + DEPS ${COMMON_API_TEST_DEPS}) else() cc_test( test_phi_tensor SRCS test_phi_tensor.cc - DEPS glog selected_rows ${COMMON_API_TEST_DEPS}) + DEPS glog ${COMMON_API_TEST_DEPS}) cc_test( test_from_blob SRCS test_from_blob.cc - DEPS phi_backends ${COMMON_API_TEST_DEPS}) + DEPS ${COMMON_API_TEST_DEPS}) endif() cc_test( diff --git a/test/cpp/phi/api/scale_api.h b/test/cpp/phi/api/scale_api.h index c8ab3c7e985..571ab0defbc 100644 --- a/test/cpp/phi/api/scale_api.h +++ b/test/cpp/phi/api/scale_api.h @@ -21,12 +21,13 @@ #include "paddle/phi/api/lib/utils/allocator.h" #include "paddle/phi/common/int_array.h" #include "paddle/phi/common/scalar.h" +#include "paddle/phi/core/flags.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/meta_tensor.h" #include "paddle/phi/infermeta/unary.h" #include "paddle/phi/kernels/scale_kernel.h" -DECLARE_int32(low_precision_op_list); +PHI_DECLARE_int32(low_precision_op_list); namespace paddle { namespace experimental { diff --git a/test/cpp/phi/common/CMakeLists.txt b/test/cpp/phi/common/CMakeLists.txt index ed9eaf7fef0..b40e7e9f5a4 100644 --- a/test/cpp/phi/common/CMakeLists.txt +++ b/test/cpp/phi/common/CMakeLists.txt @@ -13,32 +13,32 @@ cc_test( cc_test( phi_test_place SRCS test_place.cc - DEPS phi_place) + DEPS phi) cc_test( phi_test_int_array SRCS test_int_array.cc - DEPS int_array api_int_array phi phi_api) + DEPS phi) cc_test( phi_test_scalar_cpu SRCS test_scalar.cc - DEPS scalar api_scalar) + DEPS phi) if(WITH_GPU) nv_test( phi_test_scalar SRCS test_scalar.cu - DEPS scalar api_scalar) + DEPS phi) nv_test( transform_test SRCS transform_test.cu - DEPS memory place phi_backends) + DEPS memory place phi) endif() if(WITH_ROCM) hip_test( phi_test_scalar SRCS test_scalar.cu - DEPS scalar api_scalar) + DEPS phi) hip_test( transform_test SRCS transform_test.cu - DEPS memory place phi_backends) + DEPS memory place phi) endif() diff --git a/test/cpp/phi/core/CMakeLists.txt b/test/cpp/phi/core/CMakeLists.txt index 0fa3cca9e2a..7b8bb1ff8b2 100644 --- a/test/cpp/phi/core/CMakeLists.txt +++ b/test/cpp/phi/core/CMakeLists.txt @@ -1,59 +1,51 @@ cc_test( test_custom_kernel SRCS test_custom_kernel.cc - DEPS custom_kernel scalar) + DEPS phi) cc_test( test_dense_tensor SRCS test_dense_tensor.cc - DEPS dense_tensor) + DEPS phi) cc_test(test_intrusive_ptr SRCS test_intrusive_ptr.cc) cc_test(test_type_info SRCS test_type_info.cc) cc_test( test_kernel_factory SRCS test_kernel_factory.cc - DEPS kernel_factory phi) + DEPS phi) cc_test( test_sparse_coo_tensor SRCS test_sparse_coo_tensor.cc - DEPS dense_tensor sparse_coo_tensor) + DEPS phi) cc_test( test_sparse_csr_tensor SRCS test_sparse_csr_tensor.cc - DEPS dense_tensor sparse_csr_tensor) + DEPS phi) cc_test( test_op_utils SRCS test_op_utils.cc DEPS op_compat_infos) -cc_test_old( - test_meta_fn_utils - SRCS - test_meta_fn_utils.cc - DEPS - dense_tensor - wrapped_infermeta - infermeta - infermeta_utils) +cc_test_old(test_meta_fn_utils SRCS test_meta_fn_utils.cc DEPS phi) cc_test( test_ddim SRCS test_ddim.cc - DEPS ddim) + DEPS phi) if(WITH_GPU) nv_test( test_dim SRCS test_dim.cu - DEPS ddim) + DEPS phi) elseif(WITH_ROCM) hip_test( test_dim SRCS test_dim.cu - DEPS ddim) + DEPS phi) endif() cc_test( selected_rows_test SRCS test_selected_rows.cc - DEPS selected_rows) + DEPS phi) if(WITH_TESTING AND TEST selected_rows_test) set_tests_properties(selected_rows_test PROPERTIES TIMEOUT 120) endif() @@ -63,27 +55,27 @@ endif() cc_test( test_string_tensor SRCS test_string_tensor.cc - DEPS string_tensor) + DEPS phi) cc_test(unroll_array_ops_test SRCS unroll_array_ops_test.cc) cc_test( test_tensor_array SRCS test_tensor_array.cc - DEPS tensor_array) + DEPS phi) if(WITH_GPU) nv_test( test_mixed_vector SRCS test_mixed_vector.cc test_mixed_vector.cu - DEPS mixed_vector place memory phi_backends tensor) + DEPS place memory phi tensor) elseif(WITH_ROCM) hip_test( test_mixed_vector SRCS test_mixed_vector.cc test_mixed_vector.cu - DEPS mixed_vector place memory phi_backends tensor) + DEPS place memory phi tensor) else() cc_test( test_mixed_vector SRCS test_mixed_vector.cc - DEPS mixed_vector place memory phi_backends tensor) + DEPS place memory phi tensor) endif() diff --git a/test/cpp/phi/core/test_type_info.cc b/test/cpp/phi/core/test_type_info.cc index 56980ebbe21..40e89f6203d 100644 --- a/test/cpp/phi/core/test_type_info.cc +++ b/test/cpp/phi/core/test_type_info.cc @@ -17,6 +17,11 @@ limitations under the License. */ #include "paddle/phi/core/utils/type_registry.h" namespace phi { + +template +const TypeInfo TypeInfoTraits::kType = + RegisterStaticType(DerivedT::name()); + namespace tests { template diff --git a/test/cpp/phi/kernels/CMakeLists.txt b/test/cpp/phi/kernels/CMakeLists.txt index 3e7f394f186..a4906b3d1a8 100644 --- a/test/cpp/phi/kernels/CMakeLists.txt +++ b/test/cpp/phi/kernels/CMakeLists.txt @@ -1,12 +1,12 @@ cc_test( test_math_function SRCS test_math_function.cc - DEPS math_function) + DEPS phi) if(WITH_GPU) nv_test( test_math_function_gpu SRCS test_math_function.cu - DEPS math_function) + DEPS phi) nv_test( test_broadcast_gpu SRCS test_ternary_broadcast.cu @@ -16,13 +16,13 @@ if(WITH_ROCM) hip_test( test_math_function_gpu SRCS test_math_function.cu - DEPS math_function) + DEPS phi) endif() cc_test( test_cpu_vec SRCS test_cpu_vec.cc - DEPS blas phi_backends) + DEPS phi) # For String Kernels cc_test( @@ -94,19 +94,19 @@ endif() cc_test( test_cache SRCS test_cache.cc - DEPS gtest cache) + DEPS gtest phi) cc_test( strided_memcpy_test SRCS strided_memcpy_test.cc - DEPS phi_backends memory) + DEPS phi memory) cc_test( sequence_padding_test SRCS sequence_padding_test.cc - DEPS sequence_padding) + DEPS phi) cc_test( sequence_pooling_test SRCS sequence_pooling_test.cc - DEPS sequence_pooling) + DEPS phi) diff --git a/test/cpp/phi/ops/CMakeLists.txt b/test/cpp/phi/ops/CMakeLists.txt index 634af80f05a..4e6cf31f75c 100644 --- a/test/cpp/phi/ops/CMakeLists.txt +++ b/test/cpp/phi/ops/CMakeLists.txt @@ -1,4 +1,4 @@ cc_test( test_op_signature SRCS test_op_signature.cc - DEPS op_utils) + DEPS phi) diff --git a/test/cpp/prim/CMakeLists.txt b/test/cpp/prim/CMakeLists.txt index 92845d5bd81..947e446ca93 100644 --- a/test/cpp/prim/CMakeLists.txt +++ b/test/cpp/prim/CMakeLists.txt @@ -1,12 +1,10 @@ set(prim_eager_deps - phi_api - phi_dygraph_api + phi hook_utils tensor_utils utils global_utils backward - phi_tensor tracer layer autograd_meta @@ -33,20 +31,16 @@ cc_test_old( elementwise_pow_op fill_constant_op activation_op - phi_api - phi_dygraph_api + phi static_global_utils static_tensor_operants - tensor_api - operants_manager generated_static_op) if(NOT (NOT WITH_PYTHON AND ON_INFER)) cc_library( init_env_utils SRCS init_env_utils.cc - DEPS operants_manager tensor_api eager_tensor_operants - static_tensor_operants) + DEPS phi eager_tensor_operants static_tensor_operants) cc_test_old( test_comp_eager diff --git a/tools/parallel_UT_rule.py b/tools/parallel_UT_rule.py index ec9130ff6d9..71f63175c45 100755 --- a/tools/parallel_UT_rule.py +++ b/tools/parallel_UT_rule.py @@ -2221,7 +2221,6 @@ CPU_PARALLEL_JOB = [ 'test_egr_ds_grad_tensor_holder', 'test_egr_ds_auotgrad_meta', 'test_egr_ds_accumulation_node', - 'test_resnet50_with_cinn', 'test_parallel_dygraph_sync_batch_norm', 'test_monitor', 'test_mkldnn_quantizer', -- GitLab