未验证 提交 da50a009 编写于 作者: Y YuanRisheng 提交者: GitHub

[PHI Decoupling]Create PHI shared lib (#53735)

* create phi so

* fix ci bugs

* fix py3 bugs

* add file

* fix py3 bugs

* fix windows bugs

* perfect so

* fix py3 bugs

* delete all static target in phi

* fix windows bugs

* fix py3 bugs

* fix ci bugs

* fix windows bugs

* fix bugs: gflags can't be linked by dynamic and static lib

* fix bugs that can not load 3rd party

* fix ci bugs

* fix compile bugs

* fix py3 bugs

* fix conflict

* fix xpu bugs

* fix mac compile bugs

* fix psgpu bugs

* fix inference failed

* deal with conflict

* fix LIBRARY_PATH bug

* fix windows bugs

* fix onednn error

* fix windows compile bugs

* fix windows compile bugs

* fix test_cuda_graph_static_mode_error aborted

* fix windows bugs

* fix mac-python3 error

* fix hip compile bugs

* change mode to static

* change to static mode

* fix ci bugs

* fix py3 bugs

* fix windows bugs

* fix bugs

* add static flag

* add PADDLE_API

* change position of PADDLE_API

* fix windows bugs

* change mode to dynamic lib

* fix windows static bugs

* deal with conflict

* fix windows unit bug

* fix coverage

* deal with conflict

* fix windows-inference

* fix py3 bugs

* fix bugs when compile type_info

* fix compile bugs

* fix py3 bugs

* fix windows bugs

* fix windows openblas

* fix xpu bugs

* fix enforce_test in windows

* update code according comment

* fix windows cmake bug

* fix windows bugs

* fix windows bugs

* delete cinn unittest

* fix cinn bugs

---------
Co-authored-by: HappyHeavyRain's avatarlzydev <1528794076@qq.com>
上级 7aabdfd9
...@@ -40,7 +40,6 @@ if(WITH_MKLML) ...@@ -40,7 +40,6 @@ if(WITH_MKLML)
add_definitions(-DLAPACK_FOUND) add_definitions(-DLAPACK_FOUND)
add_dependencies(cblas mklml) add_dependencies(cblas mklml)
target_link_libraries(cblas dynload_mklml)
message(STATUS "Found cblas and lapack in MKLML " message(STATUS "Found cblas and lapack in MKLML "
"(include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})") "(include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})")
......
...@@ -235,3 +235,16 @@ endif() ...@@ -235,3 +235,16 @@ endif()
if(WITH_CUDNN_FRONTEND) if(WITH_CUDNN_FRONTEND)
add_definitions(-DPADDLE_WITH_CUDNN_FRONTEND) add_definitions(-DPADDLE_WITH_CUDNN_FRONTEND)
endif() endif()
set(WITH_PHI_SHARED
ON
CACHE BOOL "" FORCE)
if(WIN32 OR WITH_ROCM)
set(WITH_PHI_SHARED
OFF
CACHE BOOL "" FORCE)
endif()
if(WITH_PHI_SHARED)
add_definitions(-DPHI_SHARED)
endif()
...@@ -122,6 +122,5 @@ get_filename_component(WARPCTC_LIBRARY_PATH ${WARPCTC_LIBRARIES} DIRECTORY) ...@@ -122,6 +122,5 @@ get_filename_component(WARPCTC_LIBRARY_PATH ${WARPCTC_LIBRARIES} DIRECTORY)
include_directories(${WARPCTC_INCLUDE_DIR} include_directories(${WARPCTC_INCLUDE_DIR}
)# For warpctc code to include its headers. )# For warpctc code to include its headers.
add_library(warpctc SHARED IMPORTED GLOBAL) add_library(warpctc INTERFACE)
set_property(TARGET warpctc PROPERTY IMPORTED_LOCATION ${WARPCTC_LIBRARIES})
add_dependencies(warpctc extern_warpctc) add_dependencies(warpctc extern_warpctc)
...@@ -364,20 +364,7 @@ function(cc_library TARGET_NAME) ...@@ -364,20 +364,7 @@ function(cc_library TARGET_NAME)
list(REMOVE_ITEM cc_library_DEPS warpctc) list(REMOVE_ITEM cc_library_DEPS warpctc)
add_dependencies(${TARGET_NAME} warpctc) add_dependencies(${TARGET_NAME} warpctc)
endif() endif()
# Only deps libmklml.so, not link
if("${cc_library_DEPS};" MATCHES "mklml;")
list(REMOVE_ITEM cc_library_DEPS mklml)
if(NOT "${TARGET_NAME}" MATCHES "dynload_mklml")
list(APPEND cc_library_DEPS dynload_mklml)
endif()
add_dependencies(${TARGET_NAME} mklml)
if(WIN32)
target_link_libraries(${TARGET_NAME} ${MKLML_IOMP_LIB})
else()
target_link_libraries(${TARGET_NAME}
"-L${MKLML_LIB_DIR} -liomp5 -Wl,--as-needed")
endif()
endif()
# remove link to python, see notes at: # remove link to python, see notes at:
# https://github.com/pybind/pybind11/blob/master/docs/compiling.rst#building-manually # https://github.com/pybind/pybind11/blob/master/docs/compiling.rst#building-manually
if("${cc_library_DEPS};" MATCHES "python;") if("${cc_library_DEPS};" MATCHES "python;")
...@@ -457,25 +444,10 @@ function(cc_test_build TARGET_NAME) ...@@ -457,25 +444,10 @@ function(cc_test_build TARGET_NAME)
endif() endif()
endif() endif()
get_property(os_dependency_modules GLOBAL PROPERTY OS_DEPENDENCY_MODULES) get_property(os_dependency_modules GLOBAL PROPERTY OS_DEPENDENCY_MODULES)
target_link_libraries( target_link_libraries(${TARGET_NAME} ${cc_test_DEPS}
${TARGET_NAME} ${os_dependency_modules} paddle_gtest_main gtest glog)
${cc_test_DEPS} add_dependencies(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main gtest
${os_dependency_modules} glog)
paddle_gtest_main
lod_tensor
memory
gtest
gflags
glog)
add_dependencies(
${TARGET_NAME}
${cc_test_DEPS}
paddle_gtest_main
lod_tensor
memory
gtest
gflags
glog)
common_link(${TARGET_NAME}) common_link(${TARGET_NAME})
if(WITH_ROCM) if(WITH_ROCM)
target_link_libraries(${TARGET_NAME} ${ROCM_HIPRTC_LIB}) target_link_libraries(${TARGET_NAME} ${ROCM_HIPRTC_LIB})
...@@ -670,7 +642,7 @@ function(nv_test TARGET_NAME) ...@@ -670,7 +642,7 @@ function(nv_test TARGET_NAME)
add_executable(${TARGET_NAME} ${nv_test_SRCS}) add_executable(${TARGET_NAME} ${nv_test_SRCS})
get_property(os_dependency_modules GLOBAL PROPERTY OS_DEPENDENCY_MODULES) get_property(os_dependency_modules GLOBAL PROPERTY OS_DEPENDENCY_MODULES)
target_link_libraries(${TARGET_NAME} ${nv_test_DEPS} target_link_libraries(${TARGET_NAME} ${nv_test_DEPS}
${os_dependency_modules} paddle_gtest_main) ${os_dependency_modules} paddle_gtest_main phi)
add_dependencies(${TARGET_NAME} ${nv_test_DEPS} paddle_gtest_main) add_dependencies(${TARGET_NAME} ${nv_test_DEPS} paddle_gtest_main)
common_link(${TARGET_NAME}) common_link(${TARGET_NAME})
add_test(${TARGET_NAME} ${TARGET_NAME}) add_test(${TARGET_NAME} ${TARGET_NAME})
...@@ -774,8 +746,8 @@ function(hip_test TARGET_NAME) ...@@ -774,8 +746,8 @@ function(hip_test TARGET_NAME)
lod_tensor lod_tensor
memory memory
gtest gtest
gflags
glog glog
phi
${os_dependency_modules}) ${os_dependency_modules})
add_dependencies( add_dependencies(
${TARGET_NAME} ${TARGET_NAME}
...@@ -784,7 +756,7 @@ function(hip_test TARGET_NAME) ...@@ -784,7 +756,7 @@ function(hip_test TARGET_NAME)
lod_tensor lod_tensor
memory memory
gtest gtest
gflags phi
glog) glog)
common_link(${TARGET_NAME}) common_link(${TARGET_NAME})
add_test(${TARGET_NAME} ${TARGET_NAME}) add_test(${TARGET_NAME} ${TARGET_NAME})
...@@ -881,7 +853,7 @@ function(xpu_test TARGET_NAME) ...@@ -881,7 +853,7 @@ function(xpu_test TARGET_NAME)
lod_tensor lod_tensor
memory memory
gtest gtest
gflags phi
glog glog
${os_dependency_modules}) ${os_dependency_modules})
add_dependencies( add_dependencies(
...@@ -891,7 +863,7 @@ function(xpu_test TARGET_NAME) ...@@ -891,7 +863,7 @@ function(xpu_test TARGET_NAME)
lod_tensor lod_tensor
memory memory
gtest gtest
gflags phi
glog) glog)
common_link(${TARGET_NAME}) common_link(${TARGET_NAME})
add_test(${TARGET_NAME} ${TARGET_NAME}) add_test(${TARGET_NAME} ${TARGET_NAME})
......
...@@ -269,6 +269,13 @@ else() ...@@ -269,6 +269,13 @@ else()
SRCS ${src_dir}/inference/api/paddle_*.h ${paddle_inference_lib} SRCS ${src_dir}/inference/api/paddle_*.h ${paddle_inference_lib}
DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include
${PADDLE_INFERENCE_INSTALL_DIR}/paddle/lib) ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/lib)
set(paddle_phi_lib ${PADDLE_BINARY_DIR}/paddle/phi/libphi.*)
copy(
inference_lib_dist
SRCS ${paddle_phi_lib}
DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/lib)
endif() endif()
copy( copy(
......
...@@ -61,8 +61,7 @@ function(register_cu_kernel TARGET) ...@@ -61,8 +61,7 @@ function(register_cu_kernel TARGET)
"${multiValueArgs}" ${ARGN}) "${multiValueArgs}" ${ARGN})
set(cu_srcs) set(cu_srcs)
set(op_common_deps operator op_registry math_function layer set(op_common_deps operator op_registry layer common_infer_shape_functions)
common_infer_shape_functions)
foreach(cu_src ${register_cu_kernel_SRCS}) foreach(cu_src ${register_cu_kernel_SRCS})
if(${cu_src} MATCHES ".*\\.cu$") if(${cu_src} MATCHES ".*\\.cu$")
list(APPEND cu_srcs ${cu_src}) list(APPEND cu_srcs ${cu_src})
...@@ -113,7 +112,7 @@ function(register_mkldnn_kernel TARGET) ...@@ -113,7 +112,7 @@ function(register_mkldnn_kernel TARGET)
"${multiValueArgs}" ${ARGN}) "${multiValueArgs}" ${ARGN})
set(mkldnn_cc_srcs) set(mkldnn_cc_srcs)
set(op_common_deps operator op_registry math_function layer set(op_common_deps operator op_registry phi layer
common_infer_shape_functions) common_infer_shape_functions)
foreach(mkldnn_src ${register_mkldnn_kernel_SRCS}) foreach(mkldnn_src ${register_mkldnn_kernel_SRCS})
if(${mkldnn_src} MATCHES ".*_mkldnn_op.cc$") if(${mkldnn_src} MATCHES ".*_mkldnn_op.cc$")
...@@ -164,7 +163,7 @@ function(op_library TARGET) ...@@ -164,7 +163,7 @@ function(op_library TARGET)
set(MIOPEN_FILE) set(MIOPEN_FILE)
set(mkldnn_cc_srcs) set(mkldnn_cc_srcs)
set(MKLDNN_FILE) set(MKLDNN_FILE)
set(op_common_deps operator op_registry math_function layer set(op_common_deps operator op_registry phi layer
common_infer_shape_functions) common_infer_shape_functions)
# Option `UNITY` is used to specify that operator `TARGET` will compiles with Unity Build. # Option `UNITY` is used to specify that operator `TARGET` will compiles with Unity Build.
......
...@@ -94,6 +94,13 @@ function(kernel_declare TARGET_LIST) ...@@ -94,6 +94,13 @@ function(kernel_declare TARGET_LIST)
continue() continue()
endif() endif()
endif() endif()
# fusion group kernel is not supported in windows and mac
if(WIN32 OR APPLE)
string(FIND "${first_registry}" "fusion_group" pos)
if(pos GREATER 1)
continue()
endif()
endif()
# some gpu kernel only can run on cuda, not support rocm, so we add this branch # some gpu kernel only can run on cuda, not support rocm, so we add this branch
if(WITH_ROCM) if(WITH_ROCM)
string(FIND "${first_registry}" "cuda_only" pos) string(FIND "${first_registry}" "cuda_only" pos)
...@@ -216,3 +223,27 @@ function(prune_declaration_h) ...@@ -216,3 +223,27 @@ function(prune_declaration_h)
endif() endif()
endforeach() endforeach()
endfunction() endfunction()
function(collect_srcs SRC_GROUP)
set(options)
set(oneValueArgs)
set(multiValueArgs "SRCS")
cmake_parse_arguments(prefix "" "" "${multiValueArgs}" ${ARGN})
foreach(src ${prefix_SRCS})
set(${SRC_GROUP}
"${${SRC_GROUP}};${CMAKE_CURRENT_SOURCE_DIR}/${src}"
CACHE INTERNAL "")
endforeach()
endfunction()
function(collect_generated_srcs SRC_GROUP)
set(options)
set(oneValueArgs)
set(multiValueArgs "SRCS")
cmake_parse_arguments(prefix "" "" "${multiValueArgs}" ${ARGN})
foreach(src ${prefix_SRCS})
set(${SRC_GROUP}
"${${SRC_GROUP}};${src}"
CACHE INTERNAL "")
endforeach()
endfunction()
...@@ -49,5 +49,5 @@ file(GLOB PD_DIALECT_SRCS "*.cc") ...@@ -49,5 +49,5 @@ file(GLOB PD_DIALECT_SRCS "*.cc")
cc_library( cc_library(
pd_dialect pd_dialect
SRCS ${PD_DIALECT_SRCS} ${op_source_file} SRCS ${PD_DIALECT_SRCS} ${op_source_file}
DEPS new_ir framework_proto dense_tensor phi_utils) DEPS new_ir framework_proto phi phi_utils)
target_include_directories(pd_dialect PRIVATE ${PD_DIALECT_BINARY_DIR}) target_include_directories(pd_dialect PRIVATE ${PD_DIALECT_BINARY_DIR})
cc_library( cc_library(
op_dist_attr op_dist_attr
SRCS dist_attr.cc SRCS dist_attr.cc
DEPS dist_attr process_mesh dist_mapper auto_parallel_proto proto_desc DEPS phi auto_parallel_proto proto_desc)
phi_enforce)
add_subdirectory(test) add_subdirectory(test)
cc_test( cc_test(
device_mesh_test device_mesh_test
SRCS device_mesh_test.cc SRCS device_mesh_test.cc
DEPS device_mesh) DEPS phi)
cc_test( cc_test(
process_mesh_test process_mesh_test
SRCS process_mesh_test.cc SRCS process_mesh_test.cc
DEPS process_mesh) DEPS phi)
cc_test( cc_test(
dist_attr_test dist_attr_test
SRCS dist_attr_test.cc SRCS dist_attr_test.cc
DEPS dist_attr proto_desc) DEPS phi proto_desc)
cc_test( cc_test(
dist_mapper_test dist_mapper_test
SRCS dist_mapper_test.cc SRCS dist_mapper_test.cc
DEPS dist_mapper) DEPS phi)
cc_library( cc_library(
process_group process_group
SRCS process_group.cc SRCS process_group.cc
DEPS dense_tensor xxhash) DEPS phi xxhash)
cc_library( cc_library(
eager_reducer eager_reducer
SRCS reducer.cc SRCS reducer.cc
DEPS eager_api process_group phi_api string_helper) DEPS eager_api process_group phi string_helper)
if(WITH_DISTRIBUTE) if(WITH_DISTRIBUTE)
cc_library( cc_library(
process_group_gloo process_group_gloo
SRCS process_group_gloo.cc gloo_send_recv.cc SRCS process_group_gloo.cc gloo_send_recv.cc
DEPS phi_api eager_api gloo_wrapper tcp_store) DEPS phi eager_api gloo_wrapper)
endif() endif()
if(WITH_NCCL OR WITH_RCCL) if(WITH_NCCL OR WITH_RCCL)
...@@ -20,28 +20,19 @@ if(WITH_NCCL OR WITH_RCCL) ...@@ -20,28 +20,19 @@ if(WITH_NCCL OR WITH_RCCL)
process_group_nccl process_group_nccl
SRCS process_group_nccl.cc nccl_tools.cc common.cc SRCS process_group_nccl.cc nccl_tools.cc common.cc
DEPS process_group DEPS process_group
tcp_store phi
place place
enforce enforce
collective_helper collective_helper
device_context device_context
${DEVICE_EVENT_LIBS} ${DEVICE_EVENT_LIBS})
dense_tensor
comm_static_check
nccl_dynamic_check)
endif() endif()
if(WITH_XPU_BKCL) if(WITH_XPU_BKCL)
cc_library( cc_library(
process_group_bkcl process_group_bkcl
SRCS process_group_bkcl.cc bkcl_tools.cc common.cc SRCS process_group_bkcl.cc bkcl_tools.cc common.cc
DEPS process_group DEPS process_group phi place enforce collective_helper device_context)
tcp_store
place
enforce
collective_helper
device_context
dense_tensor)
endif() endif()
if(WITH_MPI) if(WITH_MPI)
...@@ -55,15 +46,7 @@ if(WITH_CUSTOM_DEVICE) ...@@ -55,15 +46,7 @@ if(WITH_CUSTOM_DEVICE)
cc_library( cc_library(
process_group_custom process_group_custom
SRCS process_group_custom.cc custom_ccl_tools.cc common.cc SRCS process_group_custom.cc custom_ccl_tools.cc common.cc
DEPS process_group DEPS process_group phi place enforce collective_helper device_context)
tcp_store
phi_backends
place
enforce
collective_helper
device_context
comm_static_check
dense_tensor)
endif() endif()
set(COMM_UTILS_DEPS process_group) set(COMM_UTILS_DEPS process_group)
......
...@@ -5,7 +5,7 @@ endif() ...@@ -5,7 +5,7 @@ endif()
proto_library(interceptor_message_proto SRCS interceptor_message.proto) proto_library(interceptor_message_proto SRCS interceptor_message.proto)
if(WITH_ARM_BRPC) if(WITH_ARM_BRPC)
set(BRPC_DEPS arm_brpc snappy gflags glog) set(BRPC_DEPS arm_brpc snappy phi glog)
elseif(WITH_DISTRIBUTE AND NOT WITH_PSLIB) elseif(WITH_DISTRIBUTE AND NOT WITH_PSLIB)
set(BRPC_DEPS set(BRPC_DEPS
brpc brpc
...@@ -15,7 +15,7 @@ elseif(WITH_DISTRIBUTE AND NOT WITH_PSLIB) ...@@ -15,7 +15,7 @@ elseif(WITH_DISTRIBUTE AND NOT WITH_PSLIB)
zlib zlib
leveldb leveldb
snappy snappy
gflags phi
glog) glog)
else() else()
set(BRPC_DEPS "") set(BRPC_DEPS "")
...@@ -51,7 +51,7 @@ cc_library( ...@@ -51,7 +51,7 @@ cc_library(
collective_helper collective_helper
op_registry op_registry
executor_gc_helper executor_gc_helper
gflags phi
glog glog
${BRPC_DEPS}) ${BRPC_DEPS})
......
...@@ -8,12 +8,11 @@ if(WITH_HETERPS) ...@@ -8,12 +8,11 @@ if(WITH_HETERPS)
ssl ssl
crypto crypto
protobuf protobuf
gflags phi
glog glog
zlib zlib
leveldb leveldb
snappy snappy
gflags
glog glog
device_context device_context
rocksdb) rocksdb)
...@@ -25,12 +24,11 @@ else() ...@@ -25,12 +24,11 @@ else()
ssl ssl
crypto crypto
protobuf protobuf
gflags phi
glog glog
zlib zlib
leveldb leveldb
snappy snappy
gflags
glog glog
device_context) device_context)
...@@ -122,8 +120,7 @@ cc_library( ...@@ -122,8 +120,7 @@ cc_library(
simple_threadpool simple_threadpool
simple_rpc simple_rpc
scope scope
math_function phi
selected_rows_functor
ps_gpu_wrapper ps_gpu_wrapper
${RPC_DEPS}) ${RPC_DEPS})
...@@ -150,7 +147,7 @@ cc_library( ...@@ -150,7 +147,7 @@ cc_library(
#cc_library( #cc_library(
# communicator # communicator
# SRCS communicator/communicator.cc # SRCS communicator/communicator.cc
# DEPS scope client table math_function selected_rows_functor ${RPC_DEPS}) # DEPS scope client table phi ${RPC_DEPS})
#cc_library( #cc_library(
# ps_service # ps_service
# SRCS ps_service/service.cc # SRCS ps_service/service.cc
......
...@@ -48,7 +48,7 @@ cc_library( ...@@ -48,7 +48,7 @@ cc_library(
string_helper string_helper
simple_threadpool simple_threadpool
xxhash xxhash
generator) phi)
set_source_files_properties( set_source_files_properties(
tensor_accessor.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) tensor_accessor.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS})
...@@ -91,7 +91,7 @@ cc_library( ...@@ -91,7 +91,7 @@ cc_library(
ps_framework_proto ps_framework_proto
string_helper string_helper
device_context device_context
gflags phi
glog glog
fs fs
afs_wrapper afs_wrapper
......
...@@ -20,7 +20,7 @@ set(PADDLE_RPC_DEPS ...@@ -20,7 +20,7 @@ set(PADDLE_RPC_DEPS
zlib zlib
leveldb leveldb
snappy snappy
gflags phi
glog glog
pybind) pybind)
proto_library(paddle_rpc_proto SRCS rpc.proto) proto_library(paddle_rpc_proto SRCS rpc.proto)
......
...@@ -73,7 +73,7 @@ cc_test_old( ...@@ -73,7 +73,7 @@ cc_test_old(
DEPS DEPS
brpc_utils brpc_utils
scope scope
math_function phi
${COMMON_DEPS} ${COMMON_DEPS}
${RPC_DEPS}) ${RPC_DEPS})
......
set(eager_deps set(eager_deps
phi_api phi
phi_dygraph_api
hook_utils hook_utils
tensor_utils tensor_utils
utils utils
global_utils global_utils
backward backward
phi_tensor
tracer tracer
layer layer
autograd_meta autograd_meta
...@@ -48,27 +46,26 @@ if(NOT ((NOT WITH_PYTHON) AND ON_INFER)) ...@@ -48,27 +46,26 @@ if(NOT ((NOT WITH_PYTHON) AND ON_INFER))
cc_library( cc_library(
backward backward
SRCS backward.cc SRCS backward.cc
DEPS grad_tensor_holder utils autograd_meta grad_node_info switch_autotune) DEPS grad_tensor_holder utils autograd_meta grad_node_info phi)
endif() endif()
cc_library( cc_library(
eager_nan_inf_utils eager_nan_inf_utils
SRCS nan_inf_utils.cc SRCS nan_inf_utils.cc
DEPS phi_tensor nan_inf_utils enforce) DEPS phi nan_inf_utils enforce)
cc_library( cc_library(
grad_node_info grad_node_info
SRCS grad_node_info.cc SRCS grad_node_info.cc
DEPS phi_api phi_tensor) DEPS phi)
cc_library( cc_library(
autograd_meta autograd_meta
SRCS autograd_meta.cc SRCS autograd_meta.cc
DEPS phi_api phi_tensor) DEPS phi)
cc_library( cc_library(
utils utils
SRCS utils.cc SRCS utils.cc
DEPS phi_api DEPS phi
phi_tensor
global_utils global_utils
layer layer
proto_desc proto_desc
......
...@@ -2,5 +2,5 @@ if(NOT (NOT WITH_PYTHON AND ON_INFER)) ...@@ -2,5 +2,5 @@ if(NOT (NOT WITH_PYTHON AND ON_INFER))
cc_library( cc_library(
accumulation_node accumulation_node
SRCS accumulation_node.cc SRCS accumulation_node.cc
DEPS gradient_accumulator phi_api grad_node_info) DEPS gradient_accumulator phi grad_node_info)
endif() endif()
cc_library( cc_library(
scale_node scale_node
SRCS scale_node.cc SRCS scale_node.cc
DEPS global_utils phi phi_api grad_node_info) DEPS global_utils phi grad_node_info)
if(NOT (NOT WITH_PYTHON AND ON_INFER)) if(NOT (NOT WITH_PYTHON AND ON_INFER))
cc_library( cc_library(
......
cc_library( cc_library(
eager_scale eager_scale
SRCS scale.cc SRCS scale.cc
DEPS phi_api phi autograd_meta scale_node) DEPS phi autograd_meta scale_node)
if(NOT (NOT WITH_PYTHON AND ON_INFER)) if(NOT (NOT WITH_PYTHON AND ON_INFER))
cc_library( cc_library(
......
...@@ -7,7 +7,7 @@ if(NOT (NOT WITH_PYTHON AND ON_INFER)) ...@@ -7,7 +7,7 @@ if(NOT (NOT WITH_PYTHON AND ON_INFER))
cc_library( cc_library(
tensor_utils tensor_utils
SRCS tensor_utils.cc SRCS tensor_utils.cc
DEPS phi_api autograd_meta grad_node_info accumulation_node) DEPS phi autograd_meta grad_node_info accumulation_node)
cc_library( cc_library(
hook_utils hook_utils
SRCS hook_utils.cc SRCS hook_utils.cc
...@@ -16,7 +16,7 @@ else() ...@@ -16,7 +16,7 @@ else()
cc_library( cc_library(
tensor_utils tensor_utils
SRCS tensor_utils.cc SRCS tensor_utils.cc
DEPS phi_api autograd_meta grad_node_info) DEPS phi autograd_meta grad_node_info)
cc_library( cc_library(
hook_utils hook_utils
SRCS hook_utils.cc SRCS hook_utils.cc
......
...@@ -52,6 +52,15 @@ if(WIN32) ...@@ -52,6 +52,15 @@ if(WIN32)
set(eager_generator_path "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}") set(eager_generator_path "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}")
endif() endif()
if(WITH_PHI_SHARED)
message("Copied phi.dll for Eager AutoCodeGen")
add_custom_command(
OUTPUT ${eager_generator_path}/phi.dll
COMMAND ${CMAKE_COMMAND} -E copy ${PHI_LIB} ${eager_generator_path}
DEPENDS phi)
list(APPEND EAGER_CODEGEN_DEPS ${eager_generator_path}/phi.dll)
endif()
if(${CBLAS_PROVIDER} STREQUAL MKLML) if(${CBLAS_PROVIDER} STREQUAL MKLML)
message("Copied libiomp5md.dll for Eager AutoCodeGen") message("Copied libiomp5md.dll for Eager AutoCodeGen")
add_custom_command( add_custom_command(
......
...@@ -392,7 +392,7 @@ FORWARD_CC_FILE_TEMPLATE = """ ...@@ -392,7 +392,7 @@ FORWARD_CC_FILE_TEMPLATE = """
#include "paddle/fluid/eager/api/manual/eager_manual/dygraph_forward_api.h" #include "paddle/fluid/eager/api/manual/eager_manual/dygraph_forward_api.h"
#include "paddle/phi/core/flags.h" #include "paddle/phi/core/flags.h"
DECLARE_bool(check_nan_inf); PHI_DECLARE_bool(check_nan_inf);
PHI_DECLARE_string(tensor_operants_mode); PHI_DECLARE_string(tensor_operants_mode);
{} {}
{} {}
......
cc_library( cc_library(
custom_operator_node custom_operator_node
SRCS custom_operator_node.cc SRCS custom_operator_node.cc
DEPS phi_tensor phi_api grad_node_info custom_operator op_meta_info) DEPS phi grad_node_info custom_operator)
cc_library( cc_library(
py_layer_node py_layer_node
SRCS py_layer_node.cc SRCS py_layer_node.cc
DEPS pybind phi_api grad_node_info) DEPS pybind phi grad_node_info)
...@@ -115,7 +115,7 @@ proto_library(trainer_desc_proto SRCS trainer_desc.proto DEPS framework_proto ...@@ -115,7 +115,7 @@ proto_library(trainer_desc_proto SRCS trainer_desc.proto DEPS framework_proto
cc_library( cc_library(
string_array string_array
SRCS string_array.cc SRCS string_array.cc
DEPS utf8proc phi_enforce) DEPS utf8proc phi)
cc_library( cc_library(
data_type data_type
...@@ -130,7 +130,7 @@ cc_test( ...@@ -130,7 +130,7 @@ cc_test(
cc_library( cc_library(
tensor tensor
SRCS tensor_util.cc SRCS tensor_util.cc
DEPS place memory data_type device_context dense_tensor) DEPS place memory data_type device_context phi)
cc_test( cc_test(
tensor_test tensor_test
...@@ -166,12 +166,12 @@ cc_test( ...@@ -166,12 +166,12 @@ cc_test(
cc_library( cc_library(
lod_tensor lod_tensor
SRCS lod_tensor.cc SRCS lod_tensor.cc
DEPS ddim mixed_vector place tensor framework_proto version) DEPS phi place tensor framework_proto version)
cc_test( cc_test(
lod_tensor_test lod_tensor_test
SRCS lod_tensor_test.cc SRCS lod_tensor_test.cc
DEPS lod_utils lod_tensor memory) DEPS phi lod_tensor memory)
if(WITH_GPU) if(WITH_GPU)
nv_test( nv_test(
...@@ -188,12 +188,12 @@ endif() ...@@ -188,12 +188,12 @@ endif()
cc_library( cc_library(
garbage_collector garbage_collector
SRCS garbage_collector.cc SRCS garbage_collector.cc
DEPS device_context memory gflags glog) DEPS device_context memory phi glog)
cc_library( cc_library(
reader reader
SRCS reader.cc SRCS reader.cc
DEPS lod_tensor ddim) DEPS lod_tensor phi)
cc_test( cc_test(
reader_test reader_test
SRCS reader_test.cc SRCS reader_test.cc
...@@ -202,13 +202,12 @@ cc_test( ...@@ -202,13 +202,12 @@ cc_test(
cc_test( cc_test(
threadpool_test threadpool_test
SRCS threadpool_test.cc SRCS threadpool_test.cc
DEPS threadpool) DEPS phi)
cc_library( cc_library(
var_type_traits var_type_traits
SRCS var_type_traits.cc SRCS var_type_traits.cc
DEPS framework_proto scope tensor_array sparse_coo_tensor sparse_csr_tensor DEPS framework_proto scope phi)
extended_tensor)
if(WITH_GPU) if(WITH_GPU)
target_link_libraries(var_type_traits dynload_cuda) target_link_libraries(var_type_traits dynload_cuda)
endif() endif()
...@@ -242,7 +241,7 @@ endif() ...@@ -242,7 +241,7 @@ endif()
cc_library( cc_library(
scope scope
SRCS scope.cc SRCS scope.cc
DEPS glog threadpool xxhash var_type_traits) DEPS glog phi xxhash var_type_traits)
cc_library( cc_library(
device_worker device_worker
SRCS device_worker.cc SRCS device_worker.cc
...@@ -273,12 +272,12 @@ if(WITH_GPU) ...@@ -273,12 +272,12 @@ if(WITH_GPU)
nv_test( nv_test(
data_device_transform_test data_device_transform_test
SRCS data_device_transform_test.cu SRCS data_device_transform_test.cu
DEPS operator op_registry device_context math_function scope) DEPS operator op_registry device_context phi scope)
elseif(WITH_ROCM) elseif(WITH_ROCM)
hip_test( hip_test(
data_device_transform_test data_device_transform_test
SRCS data_device_transform_test.cu SRCS data_device_transform_test.cu
DEPS operator op_registry device_context math_function scope) DEPS operator op_registry device_context phi scope)
endif() endif()
if(WITH_GPU) if(WITH_GPU)
...@@ -333,7 +332,7 @@ endif() ...@@ -333,7 +332,7 @@ endif()
cc_library( cc_library(
data_layout_transform data_layout_transform
SRCS data_layout_transform.cc SRCS data_layout_transform.cc
DEPS tensor math_function phi_data_layout_transform) DEPS tensor phi)
cc_test( cc_test(
data_layout_transform_test data_layout_transform_test
SRCS data_layout_transform_test.cc SRCS data_layout_transform_test.cc
...@@ -342,14 +341,13 @@ cc_test( ...@@ -342,14 +341,13 @@ cc_test(
cc_library( cc_library(
data_transform data_transform
SRCS data_transform.cc SRCS data_transform.cc
DEPS math_function DEPS phi
tensor tensor
framework_proto framework_proto
selected_rows_utils selected_rows_utils
data_device_transform data_device_transform
data_type_transform data_type_transform
data_layout_transform data_layout_transform)
phi_data_transform)
cc_library( cc_library(
attribute attribute
...@@ -400,7 +398,7 @@ cc_library( ...@@ -400,7 +398,7 @@ cc_library(
cc_library( cc_library(
shape_inference shape_inference
SRCS shape_inference.cc SRCS shape_inference.cc
DEPS ddim attribute selected_rows_utils) DEPS phi attribute selected_rows_utils)
# every source file that includes "dnnl.h" must depends on mkldnn # every source file that includes "dnnl.h" must depends on mkldnn
# or, the first one should depends on mkldnn # or, the first one should depends on mkldnn
...@@ -433,30 +431,17 @@ if(WITH_XPU) ...@@ -433,30 +431,17 @@ if(WITH_XPU)
phi_utils phi_utils
SRCS phi_utils.cc SRCS phi_utils.cc
DEPS lod_tensor DEPS lod_tensor
dense_tensor
selected_rows_utils selected_rows_utils
int_array
scalar
place place
phi phi
var_type_traits var_type_traits
op_info op_info
xpu_op_list xpu_op_list)
convert_utils)
else() else()
cc_library( cc_library(
phi_utils phi_utils
SRCS phi_utils.cc SRCS phi_utils.cc
DEPS lod_tensor DEPS lod_tensor selected_rows_utils place phi var_type_traits op_info)
dense_tensor
selected_rows_utils
int_array
scalar
place
phi
var_type_traits
op_info
convert_utils)
endif() endif()
if(WITH_XPU) if(WITH_XPU)
...@@ -482,11 +467,10 @@ if(WITH_XPU) ...@@ -482,11 +467,10 @@ if(WITH_XPU)
unused_var_check unused_var_check
nan_inf_utils nan_inf_utils
phi_utils phi_utils
kernel_factory
infershape_utils infershape_utils
op_utils phi
op_compat_infos op_compat_infos
get_kerneltype_forvar_utils) type_info)
else() else()
cc_library( cc_library(
operator operator
...@@ -509,11 +493,10 @@ else() ...@@ -509,11 +493,10 @@ else()
unused_var_check unused_var_check
nan_inf_utils nan_inf_utils
phi_utils phi_utils
kernel_factory
infershape_utils infershape_utils
op_utils phi
op_compat_infos op_compat_infos
get_kerneltype_forvar_utils) type_info)
endif() endif()
cc_test( cc_test(
...@@ -543,7 +526,7 @@ cc_library( ...@@ -543,7 +526,7 @@ cc_library(
version version
xxhash xxhash
op_dist_attr op_dist_attr
scalar phi
op_version_proto op_version_proto
op_version_registry) op_version_registry)
...@@ -853,7 +836,7 @@ if(WITH_DISTRIBUTE) ...@@ -853,7 +836,7 @@ if(WITH_DISTRIBUTE)
heter_server heter_server
brpc brpc
fleet_executor fleet_executor
flags) phi)
set(DISTRIBUTE_COMPILE_FLAGS "") set(DISTRIBUTE_COMPILE_FLAGS "")
if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 7.0) if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 7.0)
set(DISTRIBUTE_COMPILE_FLAGS "${DISTRIBUTE_COMPILE_FLAGS} -faligned-new") set(DISTRIBUTE_COMPILE_FLAGS "${DISTRIBUTE_COMPILE_FLAGS} -faligned-new")
...@@ -1071,7 +1054,7 @@ if(WITH_PSCORE) ...@@ -1071,7 +1054,7 @@ if(WITH_PSCORE)
executor executor
heter_server heter_server
gloo_wrapper gloo_wrapper
eigen_function phi
${RPC_DEPS} ${RPC_DEPS}
graph_gpu_wrapper) graph_gpu_wrapper)
else() else()
...@@ -1088,7 +1071,7 @@ if(WITH_PSCORE) ...@@ -1088,7 +1071,7 @@ if(WITH_PSCORE)
executor executor
heter_server heter_server
gloo_wrapper gloo_wrapper
eigen_function phi
${RPC_DEPS}) ${RPC_DEPS})
endif() endif()
else() else()
...@@ -1112,7 +1095,7 @@ cc_test( ...@@ -1112,7 +1095,7 @@ cc_test(
cc_library( cc_library(
selected_rows_utils selected_rows_utils
SRCS selected_rows_utils.cc SRCS selected_rows_utils.cc
DEPS selected_rows device_context) DEPS phi device_context)
cc_test( cc_test(
selected_rows_utils_test selected_rows_utils_test
SRCS selected_rows_utils_test.cc SRCS selected_rows_utils_test.cc
...@@ -1162,12 +1145,11 @@ cc_library( ...@@ -1162,12 +1145,11 @@ cc_library(
phi phi
phi_utils phi_utils
op_info op_info
shape_inference shape_inference)
sparse_coo_tensor)
cc_test( cc_test(
infershape_utils_test infershape_utils_test
SRCS infershape_utils_test.cc SRCS infershape_utils_test.cc
DEPS infershape_utils infermeta_utils meta_tensor) DEPS infershape_utils phi)
# Get the current working branch # Get the current working branch
execute_process( execute_process(
...@@ -1198,12 +1180,15 @@ cc_library( ...@@ -1198,12 +1180,15 @@ cc_library(
operator operator
dynamic_loader dynamic_loader
string_helper string_helper
phi_tensor phi
op_meta_info imperative_flag
phi_api layer)
tensor_api
phi_tensor_operants cc_library(type_info SRCS type_info.cc)
operants_manager) add_dependencies(type_info framework_proto auto_parallel_proto xxhash)
if(WITH_MKLDNN)
add_dependencies(type_info mkldnn)
endif()
set(FLUID_FRAMEWORK_MODULES set(FLUID_FRAMEWORK_MODULES
proto_desc proto_desc
......
...@@ -10,15 +10,15 @@ cc_library( ...@@ -10,15 +10,15 @@ cc_library(
cc_library( cc_library(
scale_loss_grad_op_handle scale_loss_grad_op_handle
SRCS scale_loss_grad_op_handle.cc SRCS scale_loss_grad_op_handle.cc
DEPS op_handle_base scope lod_tensor ddim memory) DEPS op_handle_base scope lod_tensor phi memory)
cc_library( cc_library(
fetch_op_handle fetch_op_handle
SRCS fetch_op_handle.cc SRCS fetch_op_handle.cc
DEPS op_handle_base scope lod_tensor ddim memory) DEPS op_handle_base scope lod_tensor phi memory)
cc_library( cc_library(
fetch_async_op_handle fetch_async_op_handle
SRCS fetch_async_op_handle.cc SRCS fetch_async_op_handle.cc
DEPS op_handle_base scope lod_tensor ddim memory) DEPS op_handle_base scope lod_tensor phi memory)
cc_library( cc_library(
share_tensor_buffer_functor share_tensor_buffer_functor
...@@ -78,7 +78,7 @@ if(WITH_GPU) ...@@ -78,7 +78,7 @@ if(WITH_GPU)
DEPS op_handle_base DEPS op_handle_base
scope scope
lod_tensor lod_tensor
ddim phi
memory memory
dynload_cuda dynload_cuda
variable_visitor) variable_visitor)
...@@ -88,7 +88,7 @@ if(WITH_GPU) ...@@ -88,7 +88,7 @@ if(WITH_GPU)
DEPS op_handle_base DEPS op_handle_base
scope scope
lod_tensor lod_tensor
ddim phi
memory memory
dynload_cuda dynload_cuda
variable_visitor variable_visitor
...@@ -99,7 +99,7 @@ if(WITH_GPU) ...@@ -99,7 +99,7 @@ if(WITH_GPU)
DEPS op_handle_base DEPS op_handle_base
scope scope
lod_tensor lod_tensor
ddim phi
memory memory
dynload_cuda dynload_cuda
variable_visitor variable_visitor
...@@ -114,7 +114,7 @@ if(WITH_GPU) ...@@ -114,7 +114,7 @@ if(WITH_GPU)
DEPS op_handle_base DEPS op_handle_base
scope scope
lod_tensor lod_tensor
ddim phi
memory memory
dynload_cuda dynload_cuda
variable_visitor variable_visitor
...@@ -126,19 +126,17 @@ if(WITH_GPU) ...@@ -126,19 +126,17 @@ if(WITH_GPU)
nv_library( nv_library(
reduce_op_handle reduce_op_handle
SRCS reduce_op_handle.cc SRCS reduce_op_handle.cc
DEPS op_handle_base variable_visitor scope ddim dynload_cuda DEPS op_handle_base variable_visitor scope phi dynload_cuda)
selected_rows_functor)
else() else()
nv_library( nv_library(
reduce_op_handle reduce_op_handle
SRCS reduce_op_handle.cc SRCS reduce_op_handle.cc
DEPS op_handle_base variable_visitor scope ddim dynload_cuda DEPS op_handle_base variable_visitor scope phi dynload_cuda)
selected_rows_functor)
endif() endif()
nv_library( nv_library(
broadcast_op_handle broadcast_op_handle
SRCS broadcast_op_handle.cc SRCS broadcast_op_handle.cc
DEPS op_handle_base scope ddim memory variable_visitor dynload_cuda) DEPS op_handle_base scope phi memory variable_visitor dynload_cuda)
nv_library( nv_library(
fused_broadcast_op_handle fused_broadcast_op_handle
SRCS fused_broadcast_op_handle.cc SRCS fused_broadcast_op_handle.cc
...@@ -154,7 +152,7 @@ elseif(WITH_ROCM) ...@@ -154,7 +152,7 @@ elseif(WITH_ROCM)
DEPS op_handle_base DEPS op_handle_base
scope scope
lod_tensor lod_tensor
ddim phi
memory memory
dynload_cuda dynload_cuda
variable_visitor) variable_visitor)
...@@ -164,7 +162,7 @@ elseif(WITH_ROCM) ...@@ -164,7 +162,7 @@ elseif(WITH_ROCM)
DEPS op_handle_base DEPS op_handle_base
scope scope
lod_tensor lod_tensor
ddim phi
memory memory
dynload_cuda dynload_cuda
variable_visitor variable_visitor
...@@ -175,7 +173,7 @@ elseif(WITH_ROCM) ...@@ -175,7 +173,7 @@ elseif(WITH_ROCM)
DEPS op_handle_base DEPS op_handle_base
scope scope
lod_tensor lod_tensor
ddim phi
memory memory
dynload_cuda dynload_cuda
variable_visitor variable_visitor
...@@ -187,19 +185,17 @@ elseif(WITH_ROCM) ...@@ -187,19 +185,17 @@ elseif(WITH_ROCM)
hip_library( hip_library(
reduce_op_handle reduce_op_handle
SRCS reduce_op_handle.cc SRCS reduce_op_handle.cc
DEPS op_handle_base variable_visitor scope ddim dynload_cuda DEPS op_handle_base variable_visitor scope phi dynload_cuda)
selected_rows_functor)
else() else()
hip_library( hip_library(
reduce_op_handle reduce_op_handle
SRCS reduce_op_handle.cc SRCS reduce_op_handle.cc
DEPS op_handle_base variable_visitor scope ddim dynload_cuda DEPS op_handle_base variable_visitor scope phi dynload_cuda)
selected_rows_functor)
endif() endif()
hip_library( hip_library(
broadcast_op_handle broadcast_op_handle
SRCS broadcast_op_handle.cc SRCS broadcast_op_handle.cc
DEPS op_handle_base scope ddim memory variable_visitor dynload_cuda) DEPS op_handle_base scope phi memory variable_visitor dynload_cuda)
hip_library( hip_library(
fused_broadcast_op_handle fused_broadcast_op_handle
SRCS fused_broadcast_op_handle.cc SRCS fused_broadcast_op_handle.cc
...@@ -212,14 +208,14 @@ else() ...@@ -212,14 +208,14 @@ else()
cc_library( cc_library(
all_reduce_op_handle all_reduce_op_handle
SRCS all_reduce_op_handle.cc SRCS all_reduce_op_handle.cc
DEPS op_handle_base scope lod_tensor ddim memory variable_visitor) DEPS op_handle_base scope lod_tensor phi memory variable_visitor)
cc_library( cc_library(
fused_all_reduce_op_handle fused_all_reduce_op_handle
SRCS fused_all_reduce_op_handle.cc SRCS fused_all_reduce_op_handle.cc
DEPS op_handle_base DEPS op_handle_base
scope scope
lod_tensor lod_tensor
ddim phi
memory memory
variable_visitor variable_visitor
place) place)
...@@ -229,7 +225,7 @@ else() ...@@ -229,7 +225,7 @@ else()
DEPS op_handle_base DEPS op_handle_base
scope scope
lod_tensor lod_tensor
ddim phi
memory memory
variable_visitor variable_visitor
place place
...@@ -239,17 +235,17 @@ else() ...@@ -239,17 +235,17 @@ else()
cc_library( cc_library(
reduce_op_handle reduce_op_handle
SRCS reduce_op_handle.cc SRCS reduce_op_handle.cc
DEPS op_handle_base variable_visitor scope ddim selected_rows_functor) DEPS op_handle_base variable_visitor scope phi)
else() else()
cc_library( cc_library(
reduce_op_handle reduce_op_handle
SRCS reduce_op_handle.cc SRCS reduce_op_handle.cc
DEPS op_handle_base variable_visitor scope ddim selected_rows_functor) DEPS op_handle_base variable_visitor scope phi)
endif() endif()
cc_library( cc_library(
broadcast_op_handle broadcast_op_handle
SRCS broadcast_op_handle.cc SRCS broadcast_op_handle.cc
DEPS op_handle_base scope ddim memory variable_visitor) DEPS op_handle_base scope phi memory variable_visitor)
cc_library( cc_library(
fused_broadcast_op_handle fused_broadcast_op_handle
SRCS fused_broadcast_op_handle.cc SRCS fused_broadcast_op_handle.cc
...@@ -259,7 +255,7 @@ endif() ...@@ -259,7 +255,7 @@ endif()
cc_library( cc_library(
gather_op_handle gather_op_handle
SRCS gather_op_handle.cc SRCS gather_op_handle.cc
DEPS op_handle_base scope ddim memory variable_visitor) DEPS op_handle_base scope phi memory variable_visitor)
cc_library( cc_library(
eager_deletion_op_handle eager_deletion_op_handle
...@@ -305,7 +301,7 @@ cc_test( ...@@ -305,7 +301,7 @@ cc_test(
DEPS var_handle DEPS var_handle
op_handle_base op_handle_base
scope scope
ddim phi
memory memory
device_context device_context
broadcast_op_handle) broadcast_op_handle)
...@@ -317,7 +313,7 @@ cc_test_old( ...@@ -317,7 +313,7 @@ cc_test_old(
var_handle var_handle
op_handle_base op_handle_base
scope scope
ddim phi
memory memory
device_context device_context
gather_op_handle) gather_op_handle)
...@@ -330,12 +326,12 @@ cc_library( ...@@ -330,12 +326,12 @@ cc_library(
scope_buffered_ssa_graph_executor scope_buffered_ssa_graph_executor
SRCS scope_buffered_ssa_graph_executor.cc SRCS scope_buffered_ssa_graph_executor.cc
DEPS ssa_graph_executor scope_buffered_monitor) DEPS ssa_graph_executor scope_buffered_monitor)
#cc_test(reduce_op_handle_test SRCS reduce_op_handle_test.cc DEPS var_handle op_handle_base scope ddim memory #cc_test(reduce_op_handle_test SRCS reduce_op_handle_test.cc DEPS var_handle op_handle_base scope phi memory
# device_context reduce_op_handle ) # device_context reduce_op_handle )
cc_library( cc_library(
bind_threaded_ssa_graph_executor bind_threaded_ssa_graph_executor
SRCS bind_threaded_ssa_graph_executor.cc SRCS bind_threaded_ssa_graph_executor.cc
DEPS fetch_op_handle gflags ssa_graph_executor scope simple_threadpool DEPS fetch_op_handle phi ssa_graph_executor scope simple_threadpool
device_context) device_context)
cc_library( cc_library(
fast_threaded_ssa_graph_executor fast_threaded_ssa_graph_executor
......
...@@ -20,9 +20,10 @@ limitations under the License. */ ...@@ -20,9 +20,10 @@ limitations under the License. */
#include "paddle/fluid/framework/details/reduce_op_handle.h" #include "paddle/fluid/framework/details/reduce_op_handle.h"
#include "paddle/fluid/framework/ir/graph_printer.h" #include "paddle/fluid/framework/ir/graph_printer.h"
#include "paddle/fluid/framework/ir/multi_devices_graph_pass/multi_devices_graph_pass.h" #include "paddle/fluid/framework/ir/multi_devices_graph_pass/multi_devices_graph_pass.h"
#include "paddle/phi/core/flags.h"
DECLARE_bool(convert_all_blocks); DECLARE_bool(convert_all_blocks);
DECLARE_bool(use_mkldnn); PHI_DECLARE_bool(use_mkldnn);
#ifdef PADDLE_WITH_CINN #ifdef PADDLE_WITH_CINN
DECLARE_bool(use_cinn); DECLARE_bool(use_cinn);
#endif #endif
......
...@@ -32,7 +32,7 @@ cc_library( ...@@ -32,7 +32,7 @@ cc_library(
cc_library( cc_library(
cost_model cost_model
SRCS cost_model.cc SRCS cost_model.cc
DEPS executor graph profiler proto_desc phi_device_tracer) DEPS executor graph profiler proto_desc phi)
set(GRAPH_PATTERN_DETECTOR_DEPS graph graph_helper graph_traits) set(GRAPH_PATTERN_DETECTOR_DEPS graph graph_helper graph_traits)
if(WITH_TESTING) if(WITH_TESTING)
...@@ -458,9 +458,6 @@ if(WITH_MKLDNN) ...@@ -458,9 +458,6 @@ if(WITH_MKLDNN)
graph_to_program_pass graph_to_program_pass
conv_op conv_op
conv_transpose_op conv_transpose_op
math_function
im2col
vol2col
batch_norm_op batch_norm_op
generated_op generated_op
activation_op activation_op
...@@ -468,7 +465,7 @@ if(WITH_MKLDNN) ...@@ -468,7 +465,7 @@ if(WITH_MKLDNN)
concat_and_split concat_and_split
naive_executor naive_executor
device_context device_context
eigen_function) phi)
if(WITH_GPU OR WITH_ROCM) if(WITH_GPU OR WITH_ROCM)
set(TEST_CONV_BN_PASS_DEPS ${TEST_CONV_BN_PASS_DEPS} depthwise_conv) set(TEST_CONV_BN_PASS_DEPS ${TEST_CONV_BN_PASS_DEPS} depthwise_conv)
endif() endif()
......
...@@ -221,7 +221,7 @@ bool InitAndCheckAttrs(const size_t &found_adamw_count, ...@@ -221,7 +221,7 @@ bool InitAndCheckAttrs(const size_t &found_adamw_count,
} }
} }
// Check whether with_decay and multi_precision are matched // Check whether with_decay and multi_precision are matched
if (config->with_decay != if (config->with_decay !=
PADDLE_GET_CONST(bool, adamw_op_desc->GetAttr("with_decay")) || PADDLE_GET_CONST(bool, adamw_op_desc->GetAttr("with_decay")) ||
config->multi_precision != config->multi_precision !=
......
...@@ -6,13 +6,13 @@ if(WITH_GPU OR WITH_ROCM) ...@@ -6,13 +6,13 @@ if(WITH_GPU OR WITH_ROCM)
cc_test( cc_test(
test_code_generator test_code_generator
SRCS code_generator_tester.cc SRCS code_generator_tester.cc
DEPS code_generator phi_backends lod_tensor graph_viz_pass) DEPS code_generator phi lod_tensor graph_viz_pass)
endif() endif()
cc_library( cc_library(
fusion_group_pass fusion_group_pass
SRCS fusion_group_pass.cc elementwise_group_detector.cc SRCS fusion_group_pass.cc elementwise_group_detector.cc
DEPS subgraph_detector fuse_pass_base code_generator phi_backends) DEPS subgraph_detector fuse_pass_base code_generator phi)
cc_test( cc_test(
test_fusion_group_pass test_fusion_group_pass
SRCS fusion_group_pass_tester.cc SRCS fusion_group_pass_tester.cc
......
...@@ -76,5 +76,4 @@ cc_library( ...@@ -76,5 +76,4 @@ cc_library(
cc_test( cc_test(
test_reference_count_pass_last_lived_ops test_reference_count_pass_last_lived_ops
SRCS test_reference_count_pass_last_lived_ops.cc SRCS test_reference_count_pass_last_lived_ops.cc
DEPS parallel_executor elementwise_mul_op elementwise_add_op generated_op DEPS parallel_executor elementwise_mul_op elementwise_add_op generated_op phi)
eigen_function)
...@@ -16,4 +16,4 @@ cc_library( ...@@ -16,4 +16,4 @@ cc_library(
cc_library( cc_library(
staticgraph_executor_statistics staticgraph_executor_statistics
SRCS executor_statistics.cc SRCS executor_statistics.cc
DEPS enforce glog phi_os_info) DEPS enforce glog phi)
...@@ -6,7 +6,6 @@ set(INTERPRETER_DEPS ...@@ -6,7 +6,6 @@ set(INTERPRETER_DEPS
device_context device_context
global_utils global_utils
op_registry op_registry
phi_tensor_utils
scope scope
framework_proto framework_proto
data_feed_proto data_feed_proto
...@@ -31,7 +30,7 @@ set(INTERPRETER_DEPS ...@@ -31,7 +30,7 @@ set(INTERPRETER_DEPS
enforce enforce
scope scope
glog glog
comm_context_manager phi
${DEVICE_EVENT_LIBS} ${DEVICE_EVENT_LIBS}
glog) glog)
......
...@@ -5,7 +5,7 @@ cc_library( ...@@ -5,7 +5,7 @@ cc_library(
cc_library( cc_library(
workqueue workqueue
SRCS workqueue.cc SRCS workqueue.cc
DEPS workqueue_utils enforce glog phi_os_info) DEPS workqueue_utils enforce glog phi)
cc_test( cc_test(
workqueue_test workqueue_test
SRCS workqueue_test.cc SRCS workqueue_test.cc
......
...@@ -5,7 +5,7 @@ pass_library( ...@@ -5,7 +5,7 @@ pass_library(
cinn_subgraph_detector cinn_subgraph_detector
subgraph_detector subgraph_detector
cinn_compiler cinn_compiler
errors phi
enforce) enforce)
pass_library(cinn_zero_tensor_trick_pass base) pass_library(cinn_zero_tensor_trick_pass base)
...@@ -17,7 +17,7 @@ cc_library( ...@@ -17,7 +17,7 @@ cc_library(
cc_library( cc_library(
transform_type transform_type
SRCS transform_type.cc SRCS transform_type.cc
DEPS errors enforce cinn) DEPS phi enforce cinn)
cc_library( cc_library(
cinn_cache_key cinn_cache_key
SRCS cinn_cache_key.cc SRCS cinn_cache_key.cc
......
...@@ -16,6 +16,7 @@ limitations under the License. */ ...@@ -16,6 +16,7 @@ limitations under the License. */
#include <unordered_map> #include <unordered_map>
#include "paddle/phi/core/enforce.h"
#include "paddle/phi/core/extended_tensor.h" #include "paddle/phi/core/extended_tensor.h"
#include "paddle/utils/any.h" #include "paddle/utils/any.h"
...@@ -52,7 +53,7 @@ class RawTensor : public phi::ExtendedTensor, ...@@ -52,7 +53,7 @@ class RawTensor : public phi::ExtendedTensor,
T& Get() const { T& Get() const {
PADDLE_ENFORCE_EQ(data_.empty(), PADDLE_ENFORCE_EQ(data_.empty(),
false, false,
platform::errors::PreconditionNotMet( phi::errors::PreconditionNotMet(
"The data in RawTensor is empty. Please set data " "The data in RawTensor is empty. Please set data "
"before using it.")); "before using it."));
......
/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/eager/eager_tensor.h"
#include "paddle/fluid/framework/feed_fetch_type.h"
#include "paddle/fluid/framework/raw_tensor.h"
#include "paddle/fluid/framework/string_array.h"
#include "paddle/fluid/prim/utils/static/desc_tensor.h"
namespace phi {
template <>
const TypeInfo<phi::TensorBase>
TypeInfoTraits<phi::TensorBase, paddle::framework::RawTensor>::kType =
RegisterStaticType<phi::TensorBase>(
paddle::framework::RawTensor::name());
template <>
const TypeInfo<phi::TensorBase>
TypeInfoTraits<phi::TensorBase, paddle::framework::Vocab>::kType =
RegisterStaticType<phi::TensorBase>(paddle::framework::Vocab::name());
template <>
const TypeInfo<phi::TensorBase>
TypeInfoTraits<phi::TensorBase, paddle::framework::Strings>::kType =
RegisterStaticType<phi::TensorBase>(paddle::framework::Strings::name());
template <>
const TypeInfo<phi::TensorBase>
TypeInfoTraits<phi::TensorBase, paddle::framework::FeedList>::kType =
RegisterStaticType<phi::TensorBase>(
paddle::framework::FeedList::name());
template <>
const TypeInfo<phi::TensorBase>
TypeInfoTraits<phi::TensorBase, egr::VariableCompatTensor>::kType =
RegisterStaticType<phi::TensorBase>(egr::VariableCompatTensor::name());
template <>
const TypeInfo<phi::TensorBase>
TypeInfoTraits<phi::TensorBase, paddle::prim::DescTensor>::kType =
RegisterStaticType<phi::TensorBase>(paddle::prim::DescTensor::name());
} // namespace phi
cc_library( cc_library(
imperative_flag imperative_flag
SRCS flags.cc SRCS flags.cc
DEPS gflags flags) DEPS phi)
cc_library( cc_library(
var_helper var_helper
SRCS var_helper.cc SRCS var_helper.cc
DEPS tensor selected_rows extended_tensor) DEPS tensor phi)
if(WITH_XPU) if(WITH_XPU)
cc_library( cc_library(
prepared_operator prepared_operator
...@@ -20,8 +20,7 @@ if(WITH_XPU) ...@@ -20,8 +20,7 @@ if(WITH_XPU)
op_kernel_type op_kernel_type
data_transform data_transform
nan_inf_utils nan_inf_utils
scalar phi
int_array
var_helper var_helper
profiler profiler
place) place)
...@@ -38,8 +37,7 @@ else() ...@@ -38,8 +37,7 @@ else()
op_kernel_type op_kernel_type
data_transform data_transform
nan_inf_utils nan_inf_utils
scalar phi
int_array
var_helper var_helper
profiler profiler
place) place)
...@@ -47,14 +45,14 @@ endif() ...@@ -47,14 +45,14 @@ endif()
cc_library( cc_library(
layer layer
SRCS layer.cc SRCS layer.cc
DEPS prepared_operator math_function imperative_flag variable_helper DEPS prepared_operator phi imperative_flag variable_helper op_registry
op_registry var_helper) var_helper)
add_subdirectory(jit) add_subdirectory(jit)
if(WITH_GPU) if(WITH_GPU)
cc_library( cc_library(
layout_autotune layout_autotune
SRCS layout_autotune.cc SRCS layout_autotune.cc
DEPS op_info phi_backends) DEPS op_info phi)
else() else()
cc_library( cc_library(
layout_autotune layout_autotune
...@@ -80,15 +78,15 @@ cc_library( ...@@ -80,15 +78,15 @@ cc_library(
cc_library( cc_library(
basic_engine basic_engine
SRCS basic_engine.cc SRCS basic_engine.cc
DEPS layer gradient_accumulator switch_autotune) DEPS layer gradient_accumulator phi)
cc_library( cc_library(
engine engine
SRCS basic_engine.cc partial_grad_engine.cc SRCS basic_engine.cc partial_grad_engine.cc
DEPS layer gradient_accumulator switch_autotune) DEPS layer gradient_accumulator phi)
cc_library( cc_library(
imperative_profiler imperative_profiler
SRCS profiler.cc SRCS profiler.cc
DEPS flags) DEPS phi)
if(NOT WIN32) if(NOT WIN32)
if(WITH_NCCL OR WITH_RCCL) if(WITH_NCCL OR WITH_RCCL)
cc_library( cc_library(
...@@ -174,12 +172,4 @@ endif() ...@@ -174,12 +172,4 @@ endif()
cc_library( cc_library(
gradient_accumulator gradient_accumulator
SRCS gradient_accumulator.cc SRCS gradient_accumulator.cc
DEPS blas DEPS operator lod_tensor selected_rows_utils var_type_traits layer phi)
operator
lod_tensor
selected_rows_utils
selected_rows_functor
var_type_traits
layer
math_function
phi_tensor)
...@@ -32,14 +32,8 @@ endif() ...@@ -32,14 +32,8 @@ endif()
# fluid_modules exclude API-interface of inference/api and inference/capi_exp # fluid_modules exclude API-interface of inference/api and inference/capi_exp
get_property(fluid_modules GLOBAL PROPERTY FLUID_MODULES) get_property(fluid_modules GLOBAL PROPERTY FLUID_MODULES)
get_property(phi_modules GLOBAL PROPERTY PHI_MODULES)
get_property(phi_kernels GLOBAL PROPERTY PHI_KERNELS)
set(utils_modules pretty_log string_helper benchmark) set(utils_modules pretty_log string_helper benchmark)
if(WITH_CUSTOM_DEVICE)
set(fluid_modules ${fluid_modules} phi_capi)
endif()
add_subdirectory(api) add_subdirectory(api)
# Create static inference library if needed # Create static inference library if needed
...@@ -51,7 +45,6 @@ set(STATIC_INFERENCE_API ...@@ -51,7 +45,6 @@ set(STATIC_INFERENCE_API
reset_tensor_array reset_tensor_array
analysis_config analysis_config
paddle_pass_builder paddle_pass_builder
phi
${mkldnn_quantizer_cfg}) ${mkldnn_quantizer_cfg})
set(OP_LIST set(OP_LIST
...@@ -64,16 +57,14 @@ set(KERNEL_LIST ...@@ -64,16 +57,14 @@ set(KERNEL_LIST
#windows GPU static library over the limit, so not create_static_lib, and cc_library is dummy #windows GPU static library over the limit, so not create_static_lib, and cc_library is dummy
if(WIN32 AND WITH_GPU) if(WIN32 AND WITH_GPU)
cc_library(paddle_inference DEPS ${fluid_modules} phi ${STATIC_INFERENCE_API} cc_library(paddle_inference DEPS ${fluid_modules} ${STATIC_INFERENCE_API}
${utils_modules}) ${utils_modules})
else() else()
# message("${fluid_modules}") # message("${fluid_modules}")
# message("PHI_MODULES ${phi_modules}")
# message("${phi_kernels}")
# message("${STATIC_INFERENCE_API}") # message("${STATIC_INFERENCE_API}")
# message("${utils_modules}") # message("${utils_modules}")
create_static_lib(paddle_inference ${fluid_modules} ${phi_modules} create_static_lib(paddle_inference ${fluid_modules} ${STATIC_INFERENCE_API}
${phi_kernels} ${STATIC_INFERENCE_API} ${utils_modules}) ${utils_modules})
endif() endif()
if(NOT APPLE) if(NOT APPLE)
...@@ -103,7 +94,7 @@ set(SHARED_INFERENCE_SRCS ...@@ -103,7 +94,7 @@ set(SHARED_INFERENCE_SRCS
# shared inference library deps # shared inference library deps
list(REMOVE_ITEM fluid_modules standalone_executor list(REMOVE_ITEM fluid_modules standalone_executor
interpretercore_garbage_collector) interpretercore_garbage_collector)
set(SHARED_INFERENCE_DEPS ${fluid_modules} phi analysis_predictor set(SHARED_INFERENCE_DEPS phi ${fluid_modules} analysis_predictor
${utils_modules}) ${utils_modules})
if(WITH_CRYPTO) if(WITH_CRYPTO)
...@@ -124,12 +115,6 @@ if(WITH_ONNXRUNTIME) ...@@ -124,12 +115,6 @@ if(WITH_ONNXRUNTIME)
${CMAKE_CURRENT_SOURCE_DIR}/api/onnxruntime_predictor.cc) ${CMAKE_CURRENT_SOURCE_DIR}/api/onnxruntime_predictor.cc)
endif() endif()
#export all symbols for paddle/phi/api/include/api.h on paddle_inference_shared, only for UNIX
if(UNIX)
set(SHARED_INFERENCE_DEPS ${SHARED_INFERENCE_DEPS}
$<TARGET_OBJECTS:phi_function_api>)
endif()
# Create shared inference library # Create shared inference library
cc_library( cc_library(
paddle_inference_shared SHARED paddle_inference_shared SHARED
...@@ -141,12 +126,15 @@ target_link_libraries(paddle_inference_shared ${os_dependency_modules}) ...@@ -141,12 +126,15 @@ target_link_libraries(paddle_inference_shared ${os_dependency_modules})
if(WIN32) if(WIN32)
set_property(TARGET paddle_inference_shared set_property(TARGET paddle_inference_shared
PROPERTY WINDOWS_EXPORT_ALL_SYMBOLS ON) PROPERTY WINDOWS_EXPORT_ALL_SYMBOLS ON)
target_link_libraries(paddle_inference_shared gflags) target_link_libraries(paddle_inference_shared phi)
endif() endif()
set_target_properties(paddle_inference_shared PROPERTIES OUTPUT_NAME set_target_properties(paddle_inference_shared PROPERTIES OUTPUT_NAME
paddle_inference) paddle_inference)
if(NOT APPLE AND NOT WIN32) if(NOT APPLE
AND NOT WIN32
AND NOT WITH_TESTING
AND NOT WITH_INFERENCE_API_TEST)
# TODO(liuyiqun): Temporarily disable the link flag because it is not support on Mac. # TODO(liuyiqun): Temporarily disable the link flag because it is not support on Mac.
set(LINK_FLAGS set(LINK_FLAGS
"-Wl,--version-script ${CMAKE_CURRENT_SOURCE_DIR}/paddle_inference.map") "-Wl,--version-script ${CMAKE_CURRENT_SOURCE_DIR}/paddle_inference.map")
......
...@@ -41,7 +41,7 @@ if(WITH_CRYPTO) ...@@ -41,7 +41,7 @@ if(WITH_CRYPTO)
list(APPEND paddle_inference_api_deps paddle_crypto) list(APPEND paddle_inference_api_deps paddle_crypto)
endif() endif()
if(WITH_CUSTOM_DEVICE) if(WITH_CUSTOM_DEVICE)
set(paddle_inference_api_deps ${paddle_inference_api_deps} phi_capi) set(paddle_inference_api_deps ${paddle_inference_api_deps} phi)
endif() endif()
cc_library( cc_library(
...@@ -50,7 +50,7 @@ cc_library( ...@@ -50,7 +50,7 @@ cc_library(
DEPS ${paddle_inference_api_deps}) DEPS ${paddle_inference_api_deps})
if(WIN32) if(WIN32)
target_link_libraries(paddle_inference_api gflags) target_link_libraries(paddle_inference_api phi)
endif() endif()
set(inference_deps ${analysis_deps} paddle_inference_api analysis set(inference_deps ${analysis_deps} paddle_inference_api analysis
......
...@@ -72,7 +72,7 @@ ...@@ -72,7 +72,7 @@
#endif #endif
#ifdef PADDLE_WITH_MKLML #ifdef PADDLE_WITH_MKLML
#include "paddle/fluid/platform/dynload/mklml.h" #include "paddle/phi/backends/dynload/mklml.h"
#endif #endif
#ifdef PADDLE_WITH_MKLDNN #ifdef PADDLE_WITH_MKLDNN
...@@ -1121,7 +1121,7 @@ bool AnalysisPredictor::Run(const std::vector<PaddleTensor> &inputs, ...@@ -1121,7 +1121,7 @@ bool AnalysisPredictor::Run(const std::vector<PaddleTensor> &inputs,
// Frees unused memory allocated by the Intel® MKL Memory Allocator to // Frees unused memory allocated by the Intel® MKL Memory Allocator to
// avoid memory leak. See: // avoid memory leak. See:
// https://software.intel.com/en-us/mkl-developer-reference-c-mkl-free-buffers // https://software.intel.com/en-us/mkl-developer-reference-c-mkl-free-buffers
platform::dynload::MKL_Free_Buffers(); phi::dynload::MKL_Free_Buffers();
#endif #endif
return true; return true;
} }
...@@ -1185,7 +1185,7 @@ bool AnalysisPredictor::Run(const std::vector<paddle::Tensor> &inputs, ...@@ -1185,7 +1185,7 @@ bool AnalysisPredictor::Run(const std::vector<paddle::Tensor> &inputs,
// Frees unused memory allocated by the Intel® MKL Memory Allocator to // Frees unused memory allocated by the Intel® MKL Memory Allocator to
// avoid memory leak. See: // avoid memory leak. See:
// https://software.intel.com/en-us/mkl-developer-reference-c-mkl-free-buffers // https://software.intel.com/en-us/mkl-developer-reference-c-mkl-free-buffers
platform::dynload::MKL_Free_Buffers(); phi::dynload::MKL_Free_Buffers();
#endif #endif
return true; return true;
} }
...@@ -2100,7 +2100,7 @@ bool AnalysisPredictor::ZeroCopyRun() { ...@@ -2100,7 +2100,7 @@ bool AnalysisPredictor::ZeroCopyRun() {
// Frees unused memory allocated by the Intel® MKL Memory Allocator to // Frees unused memory allocated by the Intel® MKL Memory Allocator to
// avoid memory leak. See: // avoid memory leak. See:
// https://software.intel.com/en-us/mkl-developer-reference-c-mkl-free-buffers // https://software.intel.com/en-us/mkl-developer-reference-c-mkl-free-buffers
platform::dynload::MKL_Free_Buffers(); phi::dynload::MKL_Free_Buffers();
#endif #endif
return true; return true;
} }
......
...@@ -199,7 +199,7 @@ if(NOT WIN32) ...@@ -199,7 +199,7 @@ if(NOT WIN32)
${MATH_LIB} ${MATH_LIB}
${MKLDNN_LIB} ${MKLDNN_LIB}
glog glog
gflags phi
protobuf protobuf
xxhash xxhash
cryptopp cryptopp
......
...@@ -29,6 +29,7 @@ WIN_DETECT=$(echo `uname` | grep "Win") # detect current platform ...@@ -29,6 +29,7 @@ WIN_DETECT=$(echo `uname` | grep "Win") # detect current platform
cd `dirname $0` cd `dirname $0`
current_dir=`pwd` current_dir=`pwd`
if [ $2 == ON ]; then if [ $2 == ON ]; then
# You can export yourself if move the install path # You can export yourself if move the install path
MKL_LIB=${inference_install_dir}/third_party/install/mklml/lib MKL_LIB=${inference_install_dir}/third_party/install/mklml/lib
......
...@@ -25,7 +25,7 @@ if(WITH_ONNXRUNTIME) ...@@ -25,7 +25,7 @@ if(WITH_ONNXRUNTIME)
cc_library( cc_library(
zero_copy_tensor_dummy zero_copy_tensor_dummy
SRCS zero_copy_tensor_dummy.cc SRCS zero_copy_tensor_dummy.cc
DEPS onnxruntime phi_enforce) DEPS onnxruntime phi)
else() else()
cc_library( cc_library(
zero_copy_tensor zero_copy_tensor
...@@ -34,7 +34,7 @@ else() ...@@ -34,7 +34,7 @@ else()
cc_library( cc_library(
zero_copy_tensor_dummy zero_copy_tensor_dummy
SRCS zero_copy_tensor_dummy.cc SRCS zero_copy_tensor_dummy.cc
DEPS phi_enforce) DEPS phi)
endif() endif()
cc_test( cc_test(
......
...@@ -39,7 +39,7 @@ if(APPLE) ...@@ -39,7 +39,7 @@ if(APPLE)
utf8proc utf8proc
cryptopp cryptopp
protobuf protobuf
gflags phi
cblas) cblas)
endif() endif()
......
...@@ -23,7 +23,7 @@ fi ...@@ -23,7 +23,7 @@ fi
# 2. set LD_LIBRARY_PATH # 2. set LD_LIBRARY_PATH
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$PWD/paddle_inference_c/third_party/install/mklml/lib/:$PWD/paddle_inference_c/third_party/install/mkldnn/lib/:$PWD/paddle_inference_c/paddle/lib/ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$PWD/paddle_inference_c/third_party/install/mklml/lib/:$PWD/paddle_inference_c/third_party/install/mkldnn/lib/:$PWD/paddle_inference_c/paddle/lib/
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$PWD/paddle_inference_c/third_party/install/onnxruntime/lib/:$PWD/paddle_inference_c/third_party/install/paddle2onnx/lib/ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$PWD/paddle_inference_c/third_party/install/onnxruntime/lib/:$PWD/paddle_inference_c/third_party/install/paddle2onnx/lib/
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:${PADDLE_ROOT}/build/paddle/phi/
# 3. go test # 3. go test
go clean -testcache go clean -testcache
go test -v ./... go test -v ./...
...@@ -141,8 +141,7 @@ nv_test( ...@@ -141,8 +141,7 @@ nv_test(
nv_test( nv_test(
test_custom_plugin_creater test_custom_plugin_creater
SRCS test_custom_plugin_creater.cc SRCS test_custom_plugin_creater.cc
DEPS paddle_framework tensorrt_converter op_meta_info custom_operator DEPS paddle_framework tensorrt_converter phi custom_operator init_phi)
init_phi)
if(WITH_ONNXRUNTIME AND WIN32) if(WITH_ONNXRUNTIME AND WIN32)
# Copy onnxruntime for some c++ test in Windows, since the test will # Copy onnxruntime for some c++ test in Windows, since the test will
......
include(ExternalProject) include(ExternalProject)
set(ALLOCATOR_DEPS place stats profiler phi_backends device_context) set(ALLOCATOR_DEPS place stats profiler phi device_context)
set(ALLOCATOR_SRCS set(ALLOCATOR_SRCS
allocator.cc allocator.cc
cpu_allocator.cc cpu_allocator.cc
...@@ -32,7 +32,7 @@ if(WITH_GPU OR WITH_ROCM) ...@@ -32,7 +32,7 @@ if(WITH_GPU OR WITH_ROCM)
endif() endif()
if(WITH_GPU) if(WITH_GPU)
list(APPEND ALLOCATOR_DEPS phi_backends) list(APPEND ALLOCATOR_DEPS phi)
endif() endif()
if(CUDA_VERSION VERSION_GREATER_EQUAL 10.2) if(CUDA_VERSION VERSION_GREATER_EQUAL 10.2)
......
...@@ -124,7 +124,7 @@ class CUDAGraphAllocator ...@@ -124,7 +124,7 @@ class CUDAGraphAllocator
: underlying_allocator_(allocator) {} : underlying_allocator_(allocator) {}
public: public:
~CUDAGraphAllocator() { VLOG(10) << "CUDAGraphAllocator destructed"; } ~CUDAGraphAllocator() {}
static std::shared_ptr<Allocator> Create( static std::shared_ptr<Allocator> Create(
const std::shared_ptr<Allocator>& allocator) { const std::shared_ptr<Allocator>& allocator) {
...@@ -1137,7 +1137,6 @@ void AllocatorFacade::RemoveMemoryPoolOfCUDAGraph(int64_t id) { ...@@ -1137,7 +1137,6 @@ void AllocatorFacade::RemoveMemoryPoolOfCUDAGraph(int64_t id) {
if (ref_cnt == 0) { if (ref_cnt == 0) {
cuda_graph_map_.erase(id); cuda_graph_map_.erase(id);
cuda_graph_ref_cnt_.erase(ref_cnt_iter); cuda_graph_ref_cnt_.erase(ref_cnt_iter);
VLOG(10) << "Remove memory pool of CUDA Graph with memory ID " << id;
} else { } else {
VLOG(10) << "Decrease memory pool ID " << id << " reference count to be " VLOG(10) << "Decrease memory pool ID " << id << " reference count to be "
<< ref_cnt; << ref_cnt;
......
...@@ -90,7 +90,7 @@ if(WITH_UNITY_BUILD) ...@@ -90,7 +90,7 @@ if(WITH_UNITY_BUILD)
include(unity_build_rule.cmake) include(unity_build_rule.cmake)
endif() endif()
set(OP_HEADER_DEPS ${OP_HEADER_DEPS} phi phi_utils backward_infermeta sparse_backward_infermeta static_prim_api get_expected_kernel_func) set(OP_HEADER_DEPS ${OP_HEADER_DEPS} phi phi_utils static_prim_api get_expected_kernel_func)
register_operators(EXCLUDES py_func_op dgc_op generated_op1 generated_op2 generated_op3 generated_op4 load_combine_op lstm_op run_program_op quantize_linear_op register_operators(EXCLUDES py_func_op dgc_op generated_op1 generated_op2 generated_op3 generated_op4 load_combine_op lstm_op run_program_op quantize_linear_op
recurrent_op save_combine_op sparse_attention_op sync_batch_norm_op activation_op ${OP_MKL_DEPS} DEPS ${OP_HEADER_DEPS}) recurrent_op save_combine_op sparse_attention_op sync_batch_norm_op activation_op ${OP_MKL_DEPS} DEPS ${OP_HEADER_DEPS})
...@@ -125,7 +125,7 @@ if (WITH_GPU OR WITH_ROCM) ...@@ -125,7 +125,7 @@ if (WITH_GPU OR WITH_ROCM)
endif() endif()
endif() endif()
op_library(lstm_op DEPS ${OP_HEADER_DEPS} lstm_compute) op_library(lstm_op DEPS ${OP_HEADER_DEPS})
op_library(recurrent_op DEPS ${OP_HEADER_DEPS}) op_library(recurrent_op DEPS ${OP_HEADER_DEPS})
set(COMMON_OP_DEPS ${OP_HEADER_DEPS}) set(COMMON_OP_DEPS ${OP_HEADER_DEPS})
...@@ -136,17 +136,16 @@ if (WITH_DGC) ...@@ -136,17 +136,16 @@ if (WITH_DGC)
endif() endif()
cc_library(common_infer_shape_functions SRCS common_infer_shape_functions.cc DEPS operator) cc_library(common_infer_shape_functions SRCS common_infer_shape_functions.cc DEPS operator)
cc_library(ops_extra_info SRCS ops_extra_info.cc DEPS attribute cudnn_workspace_helper) cc_library(ops_extra_info SRCS ops_extra_info.cc DEPS attribute phi)
set(COMMON_OP_DEPS ${COMMON_OP_DEPS} selected_rows_functor selected_rows_utils lapack_function set(COMMON_OP_DEPS ${COMMON_OP_DEPS} phi)
lod_tensor maxouting unpooling pooling lod_rank_table context_project set(COMMON_OP_DEPS ${COMMON_OP_DEPS} selected_rows_utils
sequence_pooling executor generator static_prim_api) lod_tensor unpooling lod_rank_table context_project executor static_prim_api)
set(COMMON_OP_DEPS ${COMMON_OP_DEPS} dynload_warpctc static_prim_api static_utils static_global_utils prim_utils) set(COMMON_OP_DEPS ${COMMON_OP_DEPS} dynload_warpctc static_prim_api static_utils static_global_utils prim_utils)
set(COMMON_OP_DEPS ${COMMON_OP_DEPS} sequence_padding sequence_scale cos_sim_functor memory jit_kernel_helper concat_and_split cross_entropy softmax vol2col im2col sampler sample_prob tree2col) set(COMMON_OP_DEPS ${COMMON_OP_DEPS} cos_sim_functor memory concat_and_split sampler sample_prob tree2col)
set(COMMON_OP_DEPS ${COMMON_OP_DEPS} sequence2batch lstm_compute matrix_bit_code gru_compute activation_functions beam_search fc_functor matrix_inverse matrix_solve) set(COMMON_OP_DEPS ${COMMON_OP_DEPS} beam_search)
set(COMMON_OP_DEPS ${COMMON_OP_DEPS} box_wrapper ps_gpu_wrapper) set(COMMON_OP_DEPS ${COMMON_OP_DEPS} box_wrapper ps_gpu_wrapper)
set(COMMON_OP_DEPS ${COMMON_OP_DEPS} common_infer_shape_functions) set(COMMON_OP_DEPS ${COMMON_OP_DEPS} common_infer_shape_functions)
set(COMMON_OP_DEPS ${COMMON_OP_DEPS} eigen_function)
set(COMMON_OP_DEPS ${COMMON_OP_DEPS} processgroup_comm_utils) set(COMMON_OP_DEPS ${COMMON_OP_DEPS} processgroup_comm_utils)
if(WITH_NCCL OR WITH_RCCL) if(WITH_NCCL OR WITH_RCCL)
set(COMMON_OP_DEPS ${COMMON_OP_DEPS} process_group_nccl) set(COMMON_OP_DEPS ${COMMON_OP_DEPS} process_group_nccl)
...@@ -189,7 +188,7 @@ endif() ...@@ -189,7 +188,7 @@ endif()
copy_if_different(${pybind_file} ${pybind_file_final}) copy_if_different(${pybind_file} ${pybind_file_final})
if (WITH_CUSTOM_DEVICE) if (WITH_CUSTOM_DEVICE)
cc_library(custom_device_common_op_registry SRCS custom_device_common_op_registry.cc DEPS operator phi_api) cc_library(custom_device_common_op_registry SRCS custom_device_common_op_registry.cc DEPS operator phi type_info)
endif() endif()
if(NOT "${OP_LIST}" STREQUAL "") if(NOT "${OP_LIST}" STREQUAL "")
......
...@@ -7,7 +7,7 @@ cc_library( ...@@ -7,7 +7,7 @@ cc_library(
cc_library( cc_library(
cinn_launch_context cinn_launch_context
SRCS cinn_launch_context.cc SRCS cinn_launch_context.cc
DEPS ddim DEPS phi
lod_tensor lod_tensor
scope scope
proto_desc proto_desc
......
...@@ -18,7 +18,7 @@ foreach(src ${OPS}) ...@@ -18,7 +18,7 @@ foreach(src ${OPS})
endforeach() endforeach()
if(WITH_GLOO) if(WITH_GLOO)
set(COLLECTIVE_DEPS ${COLLECTIVE_DEPS} gloo_wrapper comm_context_manager) set(COLLECTIVE_DEPS ${COLLECTIVE_DEPS} gloo_wrapper phi)
endif() endif()
register_operators( register_operators(
...@@ -31,8 +31,7 @@ register_operators( ...@@ -31,8 +31,7 @@ register_operators(
${COLLECTIVE_DEPS}) ${COLLECTIVE_DEPS})
if(WITH_NCCL OR WITH_RCCL) if(WITH_NCCL OR WITH_RCCL)
set(COLLECTIVE_DEPS ${COLLECTIVE_DEPS} nccl_common collective_helper set(COLLECTIVE_DEPS ${COLLECTIVE_DEPS} nccl_common collective_helper phi)
comm_context_manager nccl_comm_context)
op_library(c_gen_nccl_id_op DEPS ${COLLECTIVE_DEPS}) op_library(c_gen_nccl_id_op DEPS ${COLLECTIVE_DEPS})
op_library(gen_nccl_id_op DEPS ${COLLECTIVE_DEPS}) op_library(gen_nccl_id_op DEPS ${COLLECTIVE_DEPS})
endif() endif()
......
...@@ -51,8 +51,8 @@ detection_library(polygon_box_transform_op SRCS polygon_box_transform_op.cc ...@@ -51,8 +51,8 @@ detection_library(polygon_box_transform_op SRCS polygon_box_transform_op.cc
detection_library(rpn_target_assign_op SRCS rpn_target_assign_op.cc) detection_library(rpn_target_assign_op SRCS rpn_target_assign_op.cc)
detection_library(generate_proposal_labels_op SRCS detection_library(generate_proposal_labels_op SRCS
generate_proposal_labels_op.cc) generate_proposal_labels_op.cc)
detection_library(multiclass_nms_op SRCS multiclass_nms_op.cc DEPS gpc) detection_library(multiclass_nms_op SRCS multiclass_nms_op.cc DEPS phi)
detection_library(locality_aware_nms_op SRCS locality_aware_nms_op.cc DEPS gpc) detection_library(locality_aware_nms_op SRCS locality_aware_nms_op.cc DEPS phi)
detection_library(box_clip_op SRCS box_clip_op.cc box_clip_op.cu) detection_library(box_clip_op SRCS box_clip_op.cc box_clip_op.cu)
detection_library(box_decoder_and_assign_op SRCS box_decoder_and_assign_op.cc detection_library(box_decoder_and_assign_op SRCS box_decoder_and_assign_op.cc
box_decoder_and_assign_op.cu) box_decoder_and_assign_op.cu)
......
...@@ -289,7 +289,7 @@ file(APPEND ${op_utils_header} ...@@ -289,7 +289,7 @@ file(APPEND ${op_utils_header}
# Automatically generate the registration code of all arg map functions # Automatically generate the registration code of all arg map functions
# and compile the corresponding target to avoid frequent code conflicts # and compile the corresponding target to avoid frequent code conflicts
# when writing to same file # when writing to same file
register_op_utils(op_compat_infos DEPS op_utils) register_op_utils(op_compat_infos DEPS phi)
copy_if_different(${op_utils_header} ${op_utils_header_final}) copy_if_different(${op_utils_header} ${op_utils_header_final})
......
...@@ -17,11 +17,12 @@ limitations under the License. */ ...@@ -17,11 +17,12 @@ limitations under the License. */
#include <memory> #include <memory>
#include <string> #include <string>
#include "paddle/phi/core/flags.h"
#include "paddle/phi/kernels/funcs/blas/blas.h" #include "paddle/phi/kernels/funcs/blas/blas.h"
#include "paddle/phi/kernels/funcs/detail/gru_cpu_kernel.h" #include "paddle/phi/kernels/funcs/detail/gru_cpu_kernel.h"
#include "paddle/phi/kernels/funcs/detail/gru_kernel.h" #include "paddle/phi/kernels/funcs/detail/gru_kernel.h"
DECLARE_int32(paddle_num_threads); PHI_DECLARE_int32(paddle_num_threads);
namespace paddle { namespace paddle {
namespace operators { namespace operators {
......
...@@ -6,21 +6,20 @@ if(WITH_XPU) ...@@ -6,21 +6,20 @@ if(WITH_XPU)
endif() endif()
# please add new math_library in alphabetical order # please add new math_library in alphabetical order
math_library(concat_and_split DEPS concat_and_split_functor) math_library(concat_and_split DEPS phi)
math_library(context_project DEPS im2col math_function) math_library(context_project DEPS phi)
math_library(cos_sim_functor) math_library(cos_sim_functor)
math_library(depthwise_conv) math_library(depthwise_conv)
math_library(sample_prob) math_library(sample_prob)
math_library(sampler DEPS generator) math_library(sampler DEPS phi)
# math_library(math_function DEPS blas dense_tensor tensor)
if(WITH_XPU) if(WITH_XPU)
math_library(beam_search DEPS math_function beam_search_xpu) math_library(beam_search DEPS phi beam_search_xpu)
else() else()
math_library(beam_search DEPS math_function) math_library(beam_search DEPS phi)
endif() endif()
math_library(unpooling) math_library(unpooling)
math_library(prelu) math_library(prelu)
math_library(bert_encoder_functor) math_library(bert_encoder_functor)
math_library(tree2col DEPS math_function) math_library(tree2col DEPS phi)
...@@ -20,7 +20,7 @@ if(WITH_ARM_BRPC) ...@@ -20,7 +20,7 @@ if(WITH_ARM_BRPC)
framework_proto framework_proto
sendrecv_rpc sendrecv_rpc
arm_brpc arm_brpc
gflags phi
glog glog
snappy snappy
device_context) device_context)
...@@ -42,7 +42,7 @@ else() ...@@ -42,7 +42,7 @@ else()
ssl ssl
crypto crypto
protobuf protobuf
gflags phi
glog glog
zlib zlib
snappy snappy
......
...@@ -6,5 +6,5 @@ endif() ...@@ -6,5 +6,5 @@ endif()
register_operators() register_operators()
if(WITH_UNITY_BUILD) if(WITH_UNITY_BUILD)
target_link_libraries(paddle_operators_sequence_ops_unity sequence_pooling) target_link_libraries(paddle_operators_sequence_ops_unity phi)
endif() endif()
...@@ -17,7 +17,7 @@ limitations under the License. */ ...@@ -17,7 +17,7 @@ limitations under the License. */
#include <memory> #include <memory>
#include <vector> #include <vector>
#include "paddle/fluid/platform/dynload/mklml.h" #include "paddle/phi/backends/dynload/mklml.h"
#include "paddle/phi/kernels/funcs/blas/blas.h" #include "paddle/phi/kernels/funcs/blas/blas.h"
#include "paddle/phi/kernels/funcs/math_function.h" #include "paddle/phi/kernels/funcs/math_function.h"
......
...@@ -6,9 +6,9 @@ cc_library( ...@@ -6,9 +6,9 @@ cc_library(
cc_test( cc_test(
errors_test errors_test
SRCS errors_test.cc SRCS errors_test.cc
DEPS errors enforce) DEPS phi enforce)
set(enforce_deps flags errors flags phi_enforce) set(enforce_deps phi)
if(WITH_GPU) if(WITH_GPU)
set(enforce_deps ${enforce_deps} external_error_proto) set(enforce_deps ${enforce_deps} external_error_proto)
endif() endif()
...@@ -26,20 +26,20 @@ cc_test( ...@@ -26,20 +26,20 @@ cc_test(
cc_test( cc_test(
cpu_info_test cpu_info_test
SRCS cpu_info_test.cc SRCS cpu_info_test.cc
DEPS phi_backends) DEPS phi)
cc_test( cc_test(
os_info_test os_info_test
SRCS os_info_test.cc SRCS os_info_test.cc
DEPS phi_os_info) DEPS phi)
cc_library( cc_library(
place place
SRCS place.cc SRCS place.cc
DEPS enforce phi_place) DEPS enforce phi)
cc_test( cc_test(
place_test place_test
SRCS place_test.cc SRCS place_test.cc
DEPS place glog gflags) DEPS place glog phi)
if(WITH_MKLDNN) if(WITH_MKLDNN)
set(MKLDNN_CTX_DEPS mkldnn) set(MKLDNN_CTX_DEPS mkldnn)
...@@ -104,7 +104,7 @@ endif() ...@@ -104,7 +104,7 @@ endif()
cc_library( cc_library(
init init
SRCS init.cc SRCS init.cc
DEPS device_context custom_kernel context_pool memcpy) DEPS device_context phi memcpy)
# memcpy depends on device_context, here add deps individually for # memcpy depends on device_context, here add deps individually for
# avoiding cycle dependencies # avoiding cycle dependencies
...@@ -117,7 +117,6 @@ cc_library( ...@@ -117,7 +117,6 @@ cc_library(
xxhash xxhash
${STREAM_CALLBACK_DEPS} ${STREAM_CALLBACK_DEPS}
place place
phi_place
eigen3 eigen3
cpu_helper cpu_helper
framework_proto framework_proto
...@@ -126,12 +125,8 @@ cc_library( ...@@ -126,12 +125,8 @@ cc_library(
${MKLDNN_CTX_DEPS} ${MKLDNN_CTX_DEPS}
${dgc_deps} ${dgc_deps}
dlpack dlpack
cudnn_workspace_helper phi
${XPU_CTX_DEPS} ${XPU_CTX_DEPS})
phi_backends
phi_device_context
generator
phi_enforce)
cc_library( cc_library(
collective_helper collective_helper
...@@ -189,12 +184,12 @@ if(WITH_GPU) ...@@ -189,12 +184,12 @@ if(WITH_GPU)
cuda_graph_with_memory_pool cuda_graph_with_memory_pool
SRCS cuda_graph_with_memory_pool.cc SRCS cuda_graph_with_memory_pool.cc
DEPS ${DEVICE_EVENT_LIBS} device_event_custom_device device_context DEPS ${DEVICE_EVENT_LIBS} device_event_custom_device device_context
allocator phi_backends) allocator phi)
else() else()
nv_library( nv_library(
cuda_graph_with_memory_pool cuda_graph_with_memory_pool
SRCS cuda_graph_with_memory_pool.cc SRCS cuda_graph_with_memory_pool.cc
DEPS ${DEVICE_EVENT_LIBS} device_context allocator phi_backends) DEPS ${DEVICE_EVENT_LIBS} device_context allocator phi)
endif() endif()
nv_test( nv_test(
device_context_test device_context_test
...@@ -245,7 +240,7 @@ cc_test( ...@@ -245,7 +240,7 @@ cc_test(
cc_library( cc_library(
lodtensor_printer lodtensor_printer
SRCS lodtensor_printer.cc SRCS lodtensor_printer.cc
DEPS ddim DEPS phi
place place
tensor tensor
scope scope
...@@ -263,41 +258,30 @@ if(WITH_GPU) ...@@ -263,41 +258,30 @@ if(WITH_GPU)
nv_library( nv_library(
profiler profiler
SRCS profiler.cc profiler.cu SRCS profiler.cc profiler.cu
DEPS phi_os_info DEPS phi
phi_device_tracer
gpu_info gpu_info
enforce enforce
dynload_cuda dynload_cuda
new_profiler new_profiler
stats stats
op_proto_maker op_proto_maker
shape_inference shape_inference)
phi_profiler)
elseif(WITH_ROCM) elseif(WITH_ROCM)
hip_library( hip_library(
profiler profiler
SRCS profiler.cc profiler.cu SRCS profiler.cc profiler.cu
DEPS phi_os_info DEPS phi
phi_device_tracer
gpu_info gpu_info
enforce enforce
new_profiler new_profiler
stats stats
op_proto_maker op_proto_maker
shape_inference shape_inference)
phi_profiler)
else() else()
cc_library( cc_library(
profiler profiler
SRCS profiler.cc SRCS profiler.cc
DEPS phi_os_info DEPS phi enforce new_profiler stats op_proto_maker shape_inference)
phi_device_tracer
enforce
new_profiler
stats
op_proto_maker
shape_inference
phi_profiler)
endif() endif()
cc_test( cc_test(
...@@ -333,7 +317,7 @@ if(WITH_GPU) ...@@ -333,7 +317,7 @@ if(WITH_GPU)
nv_test( nv_test(
test_limit_gpu_memory test_limit_gpu_memory
SRCS test_limit_gpu_memory.cu SRCS test_limit_gpu_memory.cu
DEPS gpu_info flags) DEPS gpu_info phi)
nv_library( nv_library(
cuda_device_guard cuda_device_guard
SRCS cuda_device_guard.cc SRCS cuda_device_guard.cc
...@@ -348,7 +332,7 @@ if(WITH_ROCM) ...@@ -348,7 +332,7 @@ if(WITH_ROCM)
hip_test( hip_test(
test_limit_gpu_memory test_limit_gpu_memory
SRCS test_limit_gpu_memory.cu SRCS test_limit_gpu_memory.cu
DEPS gpu_info flags) DEPS gpu_info phi)
hip_library( hip_library(
cuda_device_guard cuda_device_guard
SRCS cuda_device_guard.cc SRCS cuda_device_guard.cc
...@@ -360,7 +344,7 @@ if(NOT APPLE AND NOT WIN32) ...@@ -360,7 +344,7 @@ if(NOT APPLE AND NOT WIN32)
cc_test( cc_test(
device_code_test device_code_test
SRCS device_code_test.cc SRCS device_code_test.cc
DEPS phi_backends lod_tensor) DEPS phi lod_tensor)
endif() endif()
endif() endif()
...@@ -382,4 +366,4 @@ cc_library( ...@@ -382,4 +366,4 @@ cc_library(
cc_test( cc_test(
init_phi_test init_phi_test
SRCS init_phi_test.cc SRCS init_phi_test.cc
DEPS phi_tensor init_phi) DEPS phi init_phi)
...@@ -17,7 +17,7 @@ limitations under the License. */ ...@@ -17,7 +17,7 @@ limitations under the License. */
#ifdef PADDLE_WITH_MKLML #ifdef PADDLE_WITH_MKLML
#include <omp.h> #include <omp.h>
#include "paddle/fluid/platform/dynload/mklml.h" #include "paddle/phi/backends/dynload/mklml.h"
#endif #endif
#ifdef PADDLE_USE_OPENBLAS #ifdef PADDLE_USE_OPENBLAS
...@@ -40,7 +40,7 @@ void SetNumThreads(int num_threads) { ...@@ -40,7 +40,7 @@ void SetNumThreads(int num_threads) {
openblas_set_num_threads(real_num_threads); openblas_set_num_threads(real_num_threads);
#elif defined(PADDLE_WITH_MKLML) #elif defined(PADDLE_WITH_MKLML)
int real_num_threads = num_threads > 1 ? num_threads : 1; int real_num_threads = num_threads > 1 ? num_threads : 1;
platform::dynload::MKL_Set_Num_Threads(real_num_threads); phi::dynload::MKL_Set_Num_Threads(real_num_threads);
omp_set_num_threads(real_num_threads); omp_set_num_threads(real_num_threads);
#elif defined(PADDLE_USE_REFERENCE_CBLAS) #elif defined(PADDLE_USE_REFERENCE_CBLAS)
// cblas not support multi-thread // cblas not support multi-thread
......
...@@ -2,9 +2,9 @@ if(WITH_CUSTOM_DEVICE) ...@@ -2,9 +2,9 @@ if(WITH_CUSTOM_DEVICE)
cc_library( cc_library(
custom_device_resource_pool custom_device_resource_pool
SRCS custom_device_resource_pool.cc SRCS custom_device_resource_pool.cc
DEPS gflags glog enforce monitor) DEPS phi glog enforce monitor)
cc_test( cc_test(
custom_device_test custom_device_test
SRCS custom_device_test.cc SRCS custom_device_test.cc
DEPS phi_tensor_utils phi_backends phi_device_context gradient_accumulator) DEPS phi gradient_accumulator)
endif() endif()
...@@ -3,13 +3,7 @@ if(WITH_GPU) ...@@ -3,13 +3,7 @@ if(WITH_GPU)
nv_library( nv_library(
gpu_info gpu_info
SRCS gpu_info.cc SRCS gpu_info.cc
DEPS phi_backends DEPS phi glog enforce monitor dynload_cuda malloc)
gflags
glog
enforce
monitor
dynload_cuda
malloc)
nv_test(cuda_helper_test SRCS cuda_helper_test.cu) nv_test(cuda_helper_test SRCS cuda_helper_test.cu)
nv_test( nv_test(
...@@ -21,7 +15,7 @@ elseif(WITH_ROCM) ...@@ -21,7 +15,7 @@ elseif(WITH_ROCM)
hip_library( hip_library(
gpu_info gpu_info
SRCS gpu_info.cc SRCS gpu_info.cc
DEPS phi_backends gflags glog enforce monitor dynload_cuda) DEPS phi glog enforce monitor dynload_cuda)
hip_test(cuda_helper_test SRCS cuda_helper_test.cu) hip_test(cuda_helper_test SRCS cuda_helper_test.cu)
hip_test( hip_test(
......
...@@ -14,23 +14,11 @@ set(XPU_CTX_DEPS ...@@ -14,23 +14,11 @@ set(XPU_CTX_DEPS
cc_library( cc_library(
xpu_info xpu_info
SRCS xpu_info.cc SRCS xpu_info.cc
DEPS gflags DEPS glog enforce xpulib device_context place phi)
glog
enforce
xpulib
device_context
place
phi_backends)
cc_library( cc_library(
xpu_op_list xpu_op_list
SRCS xpu_op_list.cc SRCS xpu_op_list.cc
DEPS gflags DEPS glog enforce xpulib device_context op_kernel_type phi)
glog
enforce
xpulib
device_context
op_kernel_type
phi_backends)
cc_library( cc_library(
xpu_resource_pool xpu_resource_pool
SRCS xpu_resource_pool.cc SRCS xpu_resource_pool.cc
......
cc_library( cc_library(
dynamic_loader dynamic_loader
SRCS dynamic_loader.cc SRCS dynamic_loader.cc
DEPS glog gflags enforce phi_dynamic_loader) DEPS glog enforce phi)
list( list(
APPEND APPEND
...@@ -57,26 +57,20 @@ if(WITH_ROCM) ...@@ -57,26 +57,20 @@ if(WITH_ROCM)
hip_library( hip_library(
dynload_cuda dynload_cuda
SRCS ${HIP_SRCS} SRCS ${HIP_SRCS}
DEPS dynamic_loader phi_dynload_cuda) DEPS dynamic_loader phi)
cc_library( cc_library(
dynload_warpctc dynload_warpctc
SRCS warpctc.cc SRCS warpctc.cc
DEPS dynamic_loader warpctc phi_dynload_warpctc) DEPS dynamic_loader warpctc phi)
else() else()
nv_library( nv_library(
dynload_cuda dynload_cuda
SRCS ${CUDA_SRCS} SRCS ${CUDA_SRCS}
DEPS dynamic_loader phi_dynload_cuda) DEPS dynamic_loader phi)
cc_library( cc_library(
dynload_warpctc dynload_warpctc
SRCS warpctc.cc SRCS warpctc.cc
DEPS dynamic_loader warpctc phi_dynload_warpctc) DEPS dynamic_loader warpctc phi)
endif()
if(WITH_MKLML)
cc_library(
dynload_mklml
SRCS mklml.cc
DEPS dynamic_loader mklml phi_dynload_mklml)
endif() endif()
# TODO(TJ): add iomp, mkldnn? # TODO(TJ): add iomp, mkldnn?
...@@ -86,6 +80,6 @@ if(MKL_FOUND AND WITH_ONEMKL) ...@@ -86,6 +80,6 @@ if(MKL_FOUND AND WITH_ONEMKL)
cc_library( cc_library(
dynload_mklrt dynload_mklrt
SRCS mklrt.cc SRCS mklrt.cc
DEPS dynamic_loader phi_dynload_mklrt) DEPS dynamic_loader phi)
target_include_directories(dynload_mklrt PRIVATE ${MKL_INCLUDE}) target_include_directories(dynload_mklrt PRIVATE ${MKL_INCLUDE})
endif() endif()
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <mkl.h>
#include <mutex> // NOLINT
#include "paddle/phi/backends/dynload/mklml.h"
namespace paddle {
namespace platform {
namespace dynload {
/**
* The following macro definition can generate structs
* (for each function) to dynamic load mklml routine
* via operator overloading.
*/
#define DYNAMIC_LOAD_MKLML_WRAP(__name) \
using DynLoad__##__name = phi::dynload::DynLoad__##__name; \
extern DynLoad__##__name __name
#define PLATFORM_DECLARE_DYNAMIC_LOAD_MKLML_WRAP(__name) \
DYNAMIC_LOAD_MKLML_WRAP(__name)
#define MKLML_ROUTINE_EACH(__macro) \
__macro(cblas_sgemm); \
__macro(cblas_dgemm); \
__macro(cblas_cgemm); \
__macro(cblas_zgemm); \
__macro(cblas_saxpy); \
__macro(cblas_daxpy); \
__macro(cblas_caxpy); \
__macro(cblas_zaxpy); \
__macro(cblas_scopy); \
__macro(cblas_dcopy); \
__macro(cblas_ccopy); \
__macro(cblas_zcopy); \
__macro(cblas_sgemv); \
__macro(cblas_dgemv); \
__macro(cblas_cgemv); \
__macro(cblas_zgemv); \
__macro(cblas_strsm); \
__macro(cblas_dtrsm); \
__macro(cblas_ctrsm); \
__macro(cblas_ztrsm); \
__macro(cblas_sgemm_alloc); \
__macro(cblas_dgemm_alloc); \
__macro(cblas_sgemm_pack); \
__macro(cblas_dgemm_pack); \
__macro(cblas_sgemm_compute); \
__macro(cblas_dgemm_compute); \
__macro(cblas_sgemm_free); \
__macro(cblas_dgemm_free); \
__macro(cblas_sgemm_batch); \
__macro(cblas_dgemm_batch); \
__macro(cblas_cgemm_batch); \
__macro(cblas_zgemm_batch); \
__macro(cblas_sdot); \
__macro(cblas_ddot); \
__macro(cblas_sasum); \
__macro(cblas_dasum); \
__macro(cblas_isamax); \
__macro(cblas_idamax); \
__macro(cblas_sscal); \
__macro(cblas_dscal); \
__macro(vsAdd); \
__macro(vdAdd); \
__macro(vsSub); \
__macro(vdSub); \
__macro(vsMul); \
__macro(vdMul); \
__macro(vsDiv); \
__macro(vdDiv); \
__macro(vsExp); \
__macro(vdExp); \
__macro(vsSqr); \
__macro(vdSqr); \
__macro(vsPowx); \
__macro(vdPowx); \
__macro(vsInv); \
__macro(vdInv); \
__macro(vmsErf); \
__macro(vmdErf); \
__macro(MKL_Free_Buffers); \
__macro(MKL_Set_Num_Threads); \
__macro(MKL_Get_Max_Threads);
MKLML_ROUTINE_EACH(PLATFORM_DECLARE_DYNAMIC_LOAD_MKLML_WRAP);
#if !defined(_WIN32)
DYNAMIC_LOAD_MKLML_WRAP(mkl_scsrmm);
DYNAMIC_LOAD_MKLML_WRAP(mkl_dcsrmm);
#endif
#undef DYNAMIC_LOAD_MKLML_WRAP
} // namespace dynload
} // namespace platform
} // namespace paddle
...@@ -40,6 +40,22 @@ PADDLE_DEFINE_EXPORTED_bool(enable_rpc_profiler, ...@@ -40,6 +40,22 @@ PADDLE_DEFINE_EXPORTED_bool(enable_rpc_profiler,
DEFINE_bool(enable_record_memory, false, "enable memory recorder"); DEFINE_bool(enable_record_memory, false, "enable memory recorder");
#if defined(_WIN32) && defined(PHI_SHARED)
phi::ProfilerState phi::ProfilerHelper::g_state = phi::ProfilerState::kDisabled;
bool phi::ProfilerHelper::g_enable_nvprof_hook = false;
thread_local uint64_t phi::ProfilerHelper::g_thread_id;
uint32_t phi::ProfilerHelper::g_next_thread_id = 0;
std::mutex phi::ProfilerHelper::g_all_event_lists_mutex;
std::list<std::shared_ptr<phi::EventList<phi::Event>>>
phi::ProfilerHelper::g_all_event_lists;
thread_local std::shared_ptr<phi::EventList<phi::Event>>
phi::ProfilerHelper::g_event_list;
std::list<std::shared_ptr<phi::EventList<phi::MemEvent>>>
phi::ProfilerHelper::g_all_mem_event_lists;
thread_local std::shared_ptr<phi::EventList<phi::MemEvent>>
phi::ProfilerHelper::g_mem_event_list;
std::mutex phi::ProfilerHelper::g_all_mem_event_lists_mutex;
#endif
namespace paddle { namespace paddle {
namespace platform { namespace platform {
......
cc_library( cc_library(
host_tracer host_tracer
SRCS host_tracer.cc SRCS host_tracer.cc
DEPS framework_proto enforce ddim var_type_traits) DEPS framework_proto enforce phi var_type_traits)
cc_library( cc_library(
cuda_tracer cuda_tracer
SRCS cuda_tracer.cc cupti_data_process.cc SRCS cuda_tracer.cc cupti_data_process.cc
...@@ -28,7 +28,7 @@ cc_library( ...@@ -28,7 +28,7 @@ cc_library(
cc_library( cc_library(
cpu_utilization cpu_utilization
SRCS cpu_utilization.cc SRCS cpu_utilization.cc
DEPS phi_backends phi_os_info enforce glog) DEPS phi enforce glog)
cc_library( cc_library(
new_profiler new_profiler
SRCS profiler.cc SRCS profiler.cc
......
...@@ -28,7 +28,6 @@ set(PYBIND_DEPS ...@@ -28,7 +28,6 @@ set(PYBIND_DEPS
gloo_wrapper gloo_wrapper
infer_io_utils infer_io_utils
heter_wrapper heter_wrapper
generator
op_version_registry op_version_registry
ps_gpu_wrapper ps_gpu_wrapper
custom_operator custom_operator
...@@ -37,16 +36,13 @@ set(PYBIND_DEPS ...@@ -37,16 +36,13 @@ set(PYBIND_DEPS
fleet_executor fleet_executor
global_utils global_utils
phi_utils phi_utils
tcp_store phi
comm_context_manager
new_profiler new_profiler
auto_parallel
jit_layer jit_layer
jit_property jit_property
prim_utils prim_utils
operants_manager static_tensor_operants
phi_tensor_operants type_info)
static_tensor_operants)
if(WITH_PSCORE) if(WITH_PSCORE)
set(PYBIND_DEPS ${PYBIND_DEPS} ps_service) set(PYBIND_DEPS ${PYBIND_DEPS} ps_service)
...@@ -65,7 +61,7 @@ if(WITH_RPC) ...@@ -65,7 +61,7 @@ if(WITH_RPC)
zlib zlib
leveldb leveldb
snappy snappy
gflags phi
glog) glog)
endif() endif()
if(WITH_GPU OR WITH_ROCM) if(WITH_GPU OR WITH_ROCM)
...@@ -148,7 +144,6 @@ set(PYBIND_SRCS ...@@ -148,7 +144,6 @@ set(PYBIND_SRCS
auto_parallel_py.cc) auto_parallel_py.cc)
if(WITH_CUSTOM_DEVICE) if(WITH_CUSTOM_DEVICE)
set(PYBIND_DEPS ${PYBIND_DEPS} phi_capi)
set(PYBIND_DEPS ${PYBIND_DEPS} custom_device_common_op_registry) set(PYBIND_DEPS ${PYBIND_DEPS} custom_device_common_op_registry)
endif() endif()
...@@ -334,6 +329,14 @@ if(WITH_PYTHON) ...@@ -334,6 +329,14 @@ if(WITH_PYTHON)
")\n" ")\n"
"exit /b 0") "exit /b 0")
if(WITH_PHI_SHARED)
add_custom_command(
OUTPUT ${op_impl_path}/phi.dll
COMMAND ${CMAKE_COMMAND} -E copy ${PHI_LIB} ${op_impl_path}
DEPENDS phi)
list(APPEND EAGER_OP_IMPL_DEPS ${op_impl_path}/libiomp5md.dll)
endif()
if(${CBLAS_PROVIDER} STREQUAL MKLML) if(${CBLAS_PROVIDER} STREQUAL MKLML)
add_custom_command( add_custom_command(
OUTPUT ${op_impl_path}/libiomp5md.dll OUTPUT ${op_impl_path}/libiomp5md.dll
...@@ -481,10 +484,8 @@ if(WITH_PYTHON) ...@@ -481,10 +484,8 @@ if(WITH_PYTHON)
list(APPEND PYBIND_DEPS python) list(APPEND PYBIND_DEPS python)
list(APPEND PYBIND_DEPS custom_operator) list(APPEND PYBIND_DEPS custom_operator)
list(APPEND PYBIND_DEPS custom_operator_node) list(APPEND PYBIND_DEPS custom_operator_node)
list(APPEND PYBIND_DEPS tensor_api)
list(APPEND PYBIND_DEPS eager_tensor_operants) list(APPEND PYBIND_DEPS eager_tensor_operants)
list(APPEND PYBIND_DEPS pybind_util) list(APPEND PYBIND_DEPS pybind_util)
list(APPEND PYBIND_DEPS flags)
endif() endif()
# On Linux, cc_library(paddle SHARED ..) will generate the libpaddle.so, # On Linux, cc_library(paddle SHARED ..) will generate the libpaddle.so,
......
...@@ -38,7 +38,9 @@ limitations under the License. */ ...@@ -38,7 +38,9 @@ limitations under the License. */
#include "paddle/phi/common/data_type.h" #include "paddle/phi/common/data_type.h"
#include "paddle/phi/core/compat/convert_utils.h" #include "paddle/phi/core/compat/convert_utils.h"
#include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/dense_tensor.h"
DECLARE_bool(check_nan_inf); #include "paddle/phi/core/flags.h"
PHI_DECLARE_bool(check_nan_inf);
namespace paddle { namespace paddle {
namespace pybind { namespace pybind {
......
...@@ -3,6 +3,15 @@ configure_file(config.h.in ${CMAKE_CURRENT_SOURCE_DIR}/config.h) ...@@ -3,6 +3,15 @@ configure_file(config.h.in ${CMAKE_CURRENT_SOURCE_DIR}/config.h)
# phi auto cmake utils # phi auto cmake utils
include(phi) include(phi)
set(common_srcs CACHE INTERNAL "" FORCE)
set(api_srcs CACHE INTERNAL "" FORCE)
set(capi_srcs CACHE INTERNAL "" FORCE)
set(core_srcs CACHE INTERNAL "" FORCE)
set(backends_srcs CACHE INTERNAL "" FORCE)
set(kernels_srcs CACHE INTERNAL "" FORCE)
set(infermeta_srcs CACHE INTERNAL "" FORCE)
#set(excluded_srcs CACHE INTERNAL "" FORCE)
# paddle experimental common components # paddle experimental common components
add_subdirectory(common) add_subdirectory(common)
...@@ -24,29 +33,153 @@ if(WITH_CUSTOM_DEVICE) ...@@ -24,29 +33,153 @@ if(WITH_CUSTOM_DEVICE)
add_subdirectory(capi) add_subdirectory(capi)
endif() endif()
# make an unity target for compile deps
set(PHI_DEPS set(PHI_DEPS
convert_utils phi_profiler_proto
dense_tensor auto_parallel_proto
phi_backends gflags
kernel_factory glog
kernel_context warpctc
arg_map_context warprnnt
infermeta eigen3
lod_utils xxhash
sparse_csr_tensor cblas
sparse_coo_tensor utf8proc)
string_tensor
api_scalar if(WITH_GPU)
api_int_array list(APPEND PHI_DEPS external_error_proto)
extended_tensor endif()
dist_attr
dist_mapper) if(WITH_ASCEND_CL)
list(APPEND PHI_DEPS npu_hccl)
get_property(phi_kernels GLOBAL PROPERTY PHI_KERNELS) endif()
set(PHI_DEPS ${PHI_DEPS} ${phi_kernels})
if(WITH_FLASHATTN)
cc_library(phi DEPS ${PHI_DEPS}) list(APPEND PHI_DEPS flashattn)
endif()
if(WITH_XBYAK)
list(APPEND PHI_DEPS xbyak)
endif()
if(WITH_MKLDNN)
list(APPEND PHI_DEPS mkldnn)
endif()
if(WITH_GLOO)
list(APPEND PHI_DEPS gloo)
endif()
if(WITH_CUDNN_FRONTEND)
list(APPEND PHI_DEPS cudnn-frontend)
endif()
if(WITH_POCKETFFT)
list(APPEND PHI_DEPS pocketfft)
endif()
if(WITH_MKLML)
list(APPEND PHI_DEPS pocketfft dynload_mklml)
endif()
if(WITH_XPU)
list(APPEND PHI_DEPS xpulib)
endif()
set(PHI_SRCS
${common_srcs}
${api_srcs}
${core_srcs}
${backends_srcs}
${kernels_srcs}
${infermeta_srcs}
${capi_srcs})
if(WITH_PHI_SHARED)
set(PHI_BUILD_TYPE
SHARED
CACHE INTERNAL "" FORCE)
else()
set(PHI_BUILD_TYPE
STATIC
CACHE INTERNAL "" FORCE)
endif()
if(WITH_GPU)
add_definitions(-DCUDA_REAL_ARCHS=${NVCC_FLAGS_EXTRA_real_archs}
)# for backends/gpu/gpu_resources.cc
nv_library(
phi ${PHI_BUILD_TYPE}
SRCS ${PHI_SRCS}
DEPS ${PHI_DEPS})
elseif(WITH_ROCM)
hip_add_library(phi ${PHI_BUILD_TYPE} ${PHI_SRCS})
target_link_libraries(phi ${PHI_DEPS})
elseif(WITH_XPU_KP)
xpu_library(
phi ${PHI_BUILD_TYPE}
SRCS ${PHI_SRCS}
DEPS ${PHI_DEPS})
else()
cc_library(
phi ${PHI_BUILD_TYPE}
SRCS ${PHI_SRCS}
DEPS ${PHI_DEPS})
endif()
if(WIN32)
target_link_libraries(phi shlwapi.lib)
endif()
if(WIN32)
if(WITH_PHI_SHARED)
set_property(TARGET phi PROPERTY WINDOWS_EXPORT_ALL_SYMBOLS ON)
set(PHI_NAME
phi.dll
CACHE INTERNAL "" FORCE)
else()
set(PHI_NAME
phi.lib
CACHE INTERNAL "" FORCE)
endif()
elseif(APPLE)
if(WITH_PHI_SHARED)
set(PHI_NAME
libphi.dylib
CACHE INTERNAL "" FORCE)
else()
set(PHI_NAME
libphi.a
CACHE INTERNAL "" FORCE)
endif()
else()
if(WITH_PHI_SHARED)
set(PHI_NAME
libphi.so
CACHE INTERNAL "" FORCE)
else()
set(PHI_NAME
libphi.a
CACHE INTERNAL "" FORCE)
endif()
endif()
set(PHI_LIB
"${CMAKE_CURRENT_BINARY_DIR}/${PHI_NAME}"
CACHE FILEPATH "PHI Library" FORCE)
if(MKL_FOUND AND WITH_ONEMKL)
target_include_directories(phi PRIVATE ${MKL_INCLUDE})
endif()
add_dependencies(phi extern_lapack)
if(WITH_CUTLASS)
add_dependencies(phi cutlass_codegen)
add_definitions("-DPADDLE_WITH_MEMORY_EFFICIENT_ATTENTION"
)# for memory_efficient_attention.h
endif()
if(WITH_FLASHATTN)
add_dependencies(phi flashattn)
endif()
set(phi_extension_header_file set(phi_extension_header_file
${CMAKE_CURRENT_SOURCE_DIR}/extension.h ${CMAKE_CURRENT_SOURCE_DIR}/extension.h
......
add_subdirectory(profiler) add_subdirectory(profiler)
add_subdirectory(lib) add_subdirectory(lib)
cc_library(
phi_api
SRCS all.cc
DEPS phi_function_api phi_bw_function_api sparse_api sparse_bw_api
strings_api)
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/phi/api/all.h"
namespace paddle {
namespace experimental {} // namespace experimental
} // namespace paddle
...@@ -112,9 +112,7 @@ class PADDLE_API CustomOpKernelContext { ...@@ -112,9 +112,7 @@ class PADDLE_API CustomOpKernelContext {
void EmplaceBackOutput(Tensor&& output); void EmplaceBackOutput(Tensor&& output);
void EmplaceBackOutputs(const std::vector<Tensor>& outputs); void EmplaceBackOutputs(const std::vector<Tensor>& outputs);
void EmplaceBackAttr(paddle::any attr); void EmplaceBackAttr(paddle::any attr);
void EmplaceBackAttrs(const std::vector<paddle::any>& attrs) { void EmplaceBackAttrs(const std::vector<paddle::any>& attrs);
attrs_ = std::move(attrs);
}
const std::pair<size_t, size_t>& InputRangeAt(size_t idx) const; const std::pair<size_t, size_t>& InputRangeAt(size_t idx) const;
const std::pair<size_t, size_t>& OutputRangeAt(size_t idx) const; const std::pair<size_t, size_t>& OutputRangeAt(size_t idx) const;
...@@ -125,13 +123,9 @@ class PADDLE_API CustomOpKernelContext { ...@@ -125,13 +123,9 @@ class PADDLE_API CustomOpKernelContext {
paddle::optional<Tensor> OptionalInputAt(size_t idx); paddle::optional<Tensor> OptionalInputAt(size_t idx);
paddle::optional<std::vector<Tensor>> OptionalInputsBetween(size_t start, paddle::optional<std::vector<Tensor>> OptionalInputsBetween(size_t start,
size_t end); size_t end);
const std::vector<paddle::any>& Attrs() const { return attrs_; } const std::vector<paddle::any>& Attrs() const;
const std::vector<std::pair<size_t, size_t>>& InputRange() { const std::vector<std::pair<size_t, size_t>>& InputRange();
return input_range_; const std::vector<std::pair<size_t, size_t>>& OutputRange();
}
const std::vector<std::pair<size_t, size_t>>& OutputRange() {
return output_range_;
}
Tensor* MutableOutputAt(size_t idx); Tensor* MutableOutputAt(size_t idx);
std::vector<Tensor*> MutableOutputBetween(size_t start, size_t end); std::vector<Tensor*> MutableOutputBetween(size_t start, size_t end);
std::vector<Tensor> OutputsBetween(size_t start, size_t end); std::vector<Tensor> OutputsBetween(size_t start, size_t end);
...@@ -811,38 +805,20 @@ class PADDLE_API OpMetaInfo { ...@@ -811,38 +805,20 @@ class PADDLE_API OpMetaInfo {
//////////////// Op Meta Info Helper ///////////////// //////////////// Op Meta Info Helper /////////////////
class OpMetaInfoHelper { class OpMetaInfoHelper {
public: public:
static const std::string& GetOpName(const paddle::OpMetaInfo& info) { static const std::string& GetOpName(const paddle::OpMetaInfo& info);
return info.name_;
}
static const std::vector<std::string>& GetInputs( static const std::vector<std::string>& GetInputs(
const paddle::OpMetaInfo& info) { const paddle::OpMetaInfo& info);
return info.inputs_;
}
static const std::vector<std::string>& GetOutputs( static const std::vector<std::string>& GetOutputs(
const paddle::OpMetaInfo& info) { const paddle::OpMetaInfo& info);
return info.outputs_;
}
static const std::vector<std::string>& GetAttrs( static const std::vector<std::string>& GetAttrs(
const paddle::OpMetaInfo& info) { const paddle::OpMetaInfo& info);
return info.attrs_;
}
static const std::unordered_map<std::string, std::string>& GetInplaceMap( static const std::unordered_map<std::string, std::string>& GetInplaceMap(
const paddle::OpMetaInfo& info) { const paddle::OpMetaInfo& info);
return info.inplace_map_;
}
static const std::unordered_map<std::string, std::string>& static const std::unordered_map<std::string, std::string>&
GetInplaceReverseMap(const paddle::OpMetaInfo& info) { GetInplaceReverseMap(const paddle::OpMetaInfo& info);
return info.inplace_reverse_map_; static const KernelFunc& GetKernelFn(const paddle::OpMetaInfo& info);
} static const InferShapeFunc& GetInferShapeFn(const paddle::OpMetaInfo& info);
static const KernelFunc& GetKernelFn(const paddle::OpMetaInfo& info) { static const InferDtypeFunc& GetInferDtypeFn(const paddle::OpMetaInfo& info);
return info.kernel_fn_;
}
static const InferShapeFunc& GetInferShapeFn(const paddle::OpMetaInfo& info) {
return info.infer_shape_fn_;
}
static const InferDtypeFunc& GetInferDtypeFn(const paddle::OpMetaInfo& info) {
return info.infer_dtype_fn_;
}
}; };
//////////////// Op Meta Info Map ///////////////// //////////////// Op Meta Info Map /////////////////
......
...@@ -410,7 +410,7 @@ class PADDLE_API Tensor final { ...@@ -410,7 +410,7 @@ class PADDLE_API Tensor final {
* *
* @return const std::string& * @return const std::string&
*/ */
const std::string& name() const { return name_; } const std::string& name() const;
/** /**
* @brief Set name of Tensor. * @brief Set name of Tensor.
...@@ -419,7 +419,7 @@ class PADDLE_API Tensor final { ...@@ -419,7 +419,7 @@ class PADDLE_API Tensor final {
* *
* @param const std::string& name * @param const std::string& name
*/ */
void set_name(const std::string& name) { name_ = name; } void set_name(const std::string& name);
/* Part 5: Data Transform methods */ /* Part 5: Data Transform methods */
/* Alert!!!!: All copy method can only deep copy impl, autograd info only be /* Alert!!!!: All copy method can only deep copy impl, autograd info only be
......
if(WITH_GPU)
nv_library(
phi_tensor_raw
SRCS tensor.cc
DEPS tensor_base
dense_tensor
phi_enforce
context_pool
tensor_api
int_array
scalar)
elseif(WITH_ROCM)
hip_library(
phi_tensor_raw
SRCS tensor.cc
DEPS tensor_base
dense_tensor
phi_enforce
context_pool
tensor_api
int_array
scalar)
else()
cc_library(
phi_tensor_raw
SRCS tensor.cc
DEPS tensor_base
dense_tensor
phi_enforce
context_pool
tensor_api
int_array
scalar)
endif()
set(api_gen_base ${CMAKE_SOURCE_DIR}/paddle/phi/api/yaml/generator/api_base.py) set(api_gen_base ${CMAKE_SOURCE_DIR}/paddle/phi/api/yaml/generator/api_base.py)
# forward api file # forward api file
...@@ -157,157 +122,77 @@ if(NOT PYTHONINTERP_FOUND) ...@@ -157,157 +122,77 @@ if(NOT PYTHONINTERP_FOUND)
find_package(PythonInterp REQUIRED) find_package(PythonInterp REQUIRED)
endif() endif()
execute_process(COMMAND ${PYTHON_EXECUTABLE} -m pip install pyyaml)
# generate forward api # generate forward api
add_custom_command( execute_process(
OUTPUT ${api_header_file} ${api_source_file}
COMMAND ${PYTHON_EXECUTABLE} -m pip install pyyaml
COMMAND COMMAND
${PYTHON_EXECUTABLE} ${api_gen_file} --api_yaml_path ${api_yaml_file} ${PYTHON_EXECUTABLE} ${api_gen_file} --api_yaml_path ${api_yaml_file}
${legacy_api_yaml_file} --api_header_path ${api_header_file_tmp} ${legacy_api_yaml_file} --api_header_path ${api_header_file_tmp}
--api_source_path ${api_source_file_tmp} --api_source_path ${api_source_file_tmp})
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${api_header_file_tmp}
${api_header_file}
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${api_source_file_tmp}
${api_source_file}
COMMENT "copy_if_different ${api_header_file} ${api_source_file}"
DEPENDS ${api_yaml_file} ${legacy_api_yaml_file} ${api_gen_file}
${api_gen_base}
VERBATIM)
# generate backward api # generate backward api
add_custom_command( execute_process(
OUTPUT ${bw_api_header_file} ${bw_api_source_file} ${bw_api_header_file_tmp}
${bw_api_source_file_tmp}
COMMAND COMMAND
${PYTHON_EXECUTABLE} ${bw_api_gen_file} --backward_yaml_path ${PYTHON_EXECUTABLE} ${bw_api_gen_file} --backward_yaml_path
${bw_api_yaml_file} ${legacy_bw_api_yaml_file} --backward_header_path ${bw_api_yaml_file} ${legacy_bw_api_yaml_file} --backward_header_path
${bw_api_header_file_tmp} --backward_source_path ${bw_api_source_file_tmp} ${bw_api_header_file_tmp} --backward_source_path ${bw_api_source_file_tmp})
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${bw_api_header_file_tmp}
${bw_api_header_file}
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${bw_api_source_file_tmp}
${bw_api_source_file}
COMMENT "copy_if_different ${bw_api_header_file} ${bw_api_source_file}"
DEPENDS ${bw_api_yaml_file} ${bw_api_gen_file} ${api_gen_base}
${legacy_bw_api_yaml_file}
VERBATIM)
# generate fused_op api # generate fused_op api
add_custom_command( execute_process(
OUTPUT ${fused_api_header_file} ${fused_api_source_file}
COMMAND ${PYTHON_EXECUTABLE} -m pip install pyyaml
COMMAND COMMAND
${PYTHON_EXECUTABLE} ${api_gen_file} --api_yaml_path ${fused_api_yaml_file} ${PYTHON_EXECUTABLE} ${api_gen_file} --api_yaml_path ${fused_api_yaml_file}
--is_fused_ops_yaml --api_header_path ${fused_api_header_file_tmp} --is_fused_ops_yaml --api_header_path ${fused_api_header_file_tmp}
--api_source_path ${fused_api_source_file_tmp} --api_source_path ${fused_api_source_file_tmp})
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${fused_api_header_file_tmp}
${fused_api_header_file}
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${fused_api_source_file_tmp}
${fused_api_source_file}
COMMENT "copy_if_different ${fused_api_header_file} ${fused_api_source_file}"
DEPENDS ${fused_api_yaml_file} ${api_gen_file} ${api_gen_base}
VERBATIM)
# generate fused_op backward api # generate fused_op backward api
add_custom_command( execute_process(
OUTPUT ${fused_bw_api_header_file} ${fused_bw_api_source_file}
${fused_bw_api_header_file_tmp} ${fused_bw_api_source_file_tmp}
COMMAND COMMAND
${PYTHON_EXECUTABLE} ${fused_bw_api_gen_file} --backward_yaml_path ${PYTHON_EXECUTABLE} ${fused_bw_api_gen_file} --backward_yaml_path
${fused_bw_api_yaml_file} --is_fused_backward_yaml --backward_header_path ${fused_bw_api_yaml_file} --is_fused_backward_yaml --backward_header_path
${fused_bw_api_header_file_tmp} --backward_source_path ${fused_bw_api_header_file_tmp} --backward_source_path
${fused_bw_api_source_file_tmp} ${fused_bw_api_source_file_tmp})
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${fused_bw_api_header_file_tmp}
${fused_bw_api_header_file}
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${fused_bw_api_source_file_tmp}
${fused_bw_api_source_file}
COMMENT
"copy_if_different ${fused_bw_api_header_file} ${fused_bw_api_source_file}"
DEPENDS ${fused_bw_api_yaml_file} ${bw_api_gen_file} ${api_gen_base}
VERBATIM)
# generate sparse api # generate sparse api
add_custom_command( execute_process(
OUTPUT ${sparse_api_header_file} ${sparse_api_source_file}
COMMAND COMMAND
${PYTHON_EXECUTABLE} ${sparse_api_gen_file} --api_yaml_path ${PYTHON_EXECUTABLE} ${sparse_api_gen_file} --api_yaml_path
${sparse_api_yaml_file} --api_header_path ${sparse_api_header_file_tmp} ${sparse_api_yaml_file} --api_header_path ${sparse_api_header_file_tmp}
--api_source_path ${sparse_api_source_file_tmp} --api_source_path ${sparse_api_source_file_tmp})
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${sparse_api_header_file_tmp}
${sparse_api_header_file}
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${sparse_api_source_file_tmp}
${sparse_api_source_file}
COMMENT
"copy_if_different ${sparse_api_header_file} ${sparse_sparse_api_source_file}"
DEPENDS ${sparse_api_yaml_file} ${sparse_api_gen_file} ${api_gen_base}
${api_gen_file}
VERBATIM)
# generate backward sparse api # generate backward sparse api
add_custom_command( execute_process(
OUTPUT ${sparse_bw_api_header_file} ${sparse_bw_api_source_file}
COMMAND COMMAND
${PYTHON_EXECUTABLE} ${sparse_bw_api_gen_file} --api_yaml_path ${PYTHON_EXECUTABLE} ${sparse_bw_api_gen_file} --api_yaml_path
${sparse_bw_api_yaml_file} --api_header_path ${sparse_bw_api_yaml_file} --api_header_path
${sparse_bw_api_header_file_tmp} --api_source_path ${sparse_bw_api_header_file_tmp} --api_source_path
${sparse_bw_api_source_file_tmp} ${sparse_bw_api_source_file_tmp})
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${sparse_bw_api_header_file_tmp}
${sparse_bw_api_header_file}
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${sparse_bw_api_source_file_tmp}
${sparse_bw_api_source_file}
COMMENT
"copy_if_different ${sparse_bw_api_header_file} ${sparse_bw_sparse_api_source_file}"
DEPENDS ${sparse_bw_api_yaml_file} ${sparse_bw_api_gen_file} ${api_gen_base}
${api_gen_file} ${sparse_api_gen_file} ${bw_api_gen_file}
VERBATIM)
# generate strings api # generate strings api
add_custom_command( execute_process(
OUTPUT ${strings_api_header_file} ${strings_api_source_file}
COMMAND COMMAND
${PYTHON_EXECUTABLE} ${strings_api_gen_file} --api_yaml_path ${PYTHON_EXECUTABLE} ${strings_api_gen_file} --api_yaml_path
${strings_api_yaml_file} --api_header_path ${strings_api_header_file_tmp} ${strings_api_yaml_file} --api_header_path ${strings_api_header_file_tmp}
--api_source_path ${strings_api_source_file_tmp} --api_source_path ${strings_api_source_file_tmp})
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${strings_api_header_file_tmp}
${strings_api_header_file}
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${strings_api_source_file_tmp}
${strings_api_source_file}
COMMENT
"copy_if_different ${strings_api_header_file} ${strings_strings_api_source_file}"
DEPENDS ${strings_api_yaml_file} ${strings_api_gen_file} ${api_gen_base}
${api_gen_file}
VERBATIM)
# generate dygraph(intermediate) api # generate dygraph(intermediate) api
add_custom_command( execute_process(
OUTPUT ${dygraph_api_header_file} ${dygraph_api_source_file}
COMMAND COMMAND
${PYTHON_EXECUTABLE} ${im_api_gen_file} --api_yaml_path ${api_yaml_file} ${PYTHON_EXECUTABLE} ${im_api_gen_file} --api_yaml_path ${api_yaml_file}
${legacy_api_yaml_file} --sparse_api_yaml_path ${sparse_api_yaml_file} ${legacy_api_yaml_file} --sparse_api_yaml_path ${sparse_api_yaml_file}
--dygraph_api_header_path ${dygraph_api_header_file_tmp} --dygraph_api_header_path ${dygraph_api_header_file_tmp}
--dygraph_api_source_path ${dygraph_api_source_file_tmp} --dygraph_api_source_path ${dygraph_api_source_file_tmp})
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${dygraph_api_header_file_tmp}
${dygraph_api_header_file}
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${dygraph_api_source_file_tmp}
${dygraph_api_source_file}
DEPENDS ${api_yaml_file} ${legacy_api_yaml_file} ${sparse_api_yaml_file}
${im_api_gen_file} ${api_gen_base} ${api_gen_file}
VERBATIM)
# generate wrapped infermeta # generate wrapped infermeta
add_custom_command( execute_process(
OUTPUT ${wrapped_infermeta_header_file} ${wrapped_infermeta_source_file}
COMMAND COMMAND
${PYTHON_EXECUTABLE} ${wrapped_infermeta_gen_file} --api_yaml_path ${PYTHON_EXECUTABLE} ${wrapped_infermeta_gen_file} --api_yaml_path
${api_yaml_file} ${legacy_api_yaml_file} --wrapped_infermeta_header_path ${api_yaml_file} ${legacy_api_yaml_file} --wrapped_infermeta_header_path
${wrapped_infermeta_header_file} --wrapped_infermeta_source_path ${wrapped_infermeta_header_file} --wrapped_infermeta_source_path
${wrapped_infermeta_source_file} ${wrapped_infermeta_source_file})
DEPENDS ${api_yaml_file} ${legacy_api_yaml_file} ${wrapped_infermeta_gen_file}
${api_gen_base}
VERBATIM)
# generate tensor and tensor operants file # generate tensor and tensor operants file
message("create or copy auto-geneated tensor files") message("create or copy auto-geneated tensor files")
execute_process(COMMAND ${PYTHON_EXECUTABLE} -m pip install pyyaml)
execute_process( execute_process(
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/paddle/phi/api/yaml/generator WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/paddle/phi/api/yaml/generator
COMMAND COMMAND
...@@ -324,154 +209,70 @@ if(${_result}) ...@@ -324,154 +209,70 @@ if(${_result})
message(FATAL_ERROR "tensor codegen failed, exiting.") message(FATAL_ERROR "tensor codegen failed, exiting.")
endif() endif()
set(generated_tensor_files set(generated_files
"${operants_base_file}" "${tensor_api_source_file}" "${operants_base_file}"
"${phi_tensor_operants_header_file}" "${phi_tensor_operants_source_file}" "${tensor_api_source_file}"
"${operants_manager_header_file}" "${operants_manager_source_file}") "${phi_tensor_operants_header_file}"
"${phi_tensor_operants_source_file}"
"${operants_manager_header_file}"
"${operants_manager_source_file}"
"${wrapped_infermeta_source_file}"
"${api_source_file}"
"${api_header_file}"
"${bw_api_source_file}"
"${bw_api_header_file}"
"${fused_api_source_file}"
"${fused_api_header_file}"
"${fused_bw_api_source_file}"
"${fused_bw_api_header_file}"
"${sparse_api_source_file}"
"${sparse_api_header_file}"
"${sparse_bw_api_source_file}"
"${sparse_bw_api_header_file}"
"${dygraph_api_source_file}"
"${dygraph_api_header_file}"
"${strings_api_source_file}"
"${strings_api_header_file}")
foreach(generated_tensor_file ${generated_tensor_files}) foreach(generated_file ${generated_files})
if(EXISTS "${generated_tensor_file}.tmp" AND EXISTS if(EXISTS "${generated_file}.tmp" AND EXISTS "${generated_file}")
"${generated_tensor_file}") execute_process(COMMAND ${CMAKE_COMMAND} -E copy_if_different
execute_process( "${generated_file}.tmp" "${generated_file}")
COMMAND ${CMAKE_COMMAND} -E copy_if_different message("copy if different ${generated_file}.tmp ${generated_file}")
"${generated_tensor_file}.tmp" "${generated_tensor_file}") elseif(EXISTS "${generated_file}.tmp")
message( execute_process(COMMAND ${CMAKE_COMMAND} -E copy "${generated_file}.tmp"
"copy if different ${generated_tensor_file}.tmp ${generated_tensor_file}") "${generated_file}")
elseif(EXISTS "${generated_tensor_file}.tmp") message("copy ${generated_file}.tmp ${generated_file}")
execute_process(
COMMAND ${CMAKE_COMMAND} -E copy "${generated_tensor_file}.tmp"
"${generated_tensor_file}")
message("copy ${generated_tensor_file}.tmp ${generated_tensor_file}")
endif() endif()
endforeach() endforeach()
cc_library( collect_srcs(
op_meta_info api_srcs
SRCS op_meta_info.cc SRCS
DEPS phi_tensor_raw) tensor.cc
cc_library( op_meta_info.cc
wrapped_infermeta context_pool.cc
SRCS ${wrapped_infermeta_source_file} tensor_utils.cc
DEPS phi) kernel_dispatch.cc
cc_library( api_gen_utils.cc
context_pool data_transform.cc
SRCS context_pool.cc api_custom_impl.cc
DEPS phi_backends phi_enforce place init phi_device_context) tensor_method.cc
cc_library( tensor_copy.cc
api_tensor_utils scalar.cc
SRCS tensor_utils.cc int_array.cc)
DEPS phi_tensor_raw) collect_generated_srcs(
api_srcs
cc_library( SRCS
kernel_dispatch ${wrapped_infermeta_source_file}
SRCS kernel_dispatch.cc ${api_source_file}
DEPS phi_tensor_raw phi_backends kernel_factory context_pool) ${bw_api_source_file}
cc_library( ${fused_api_source_file}
api_gen_utils ${fused_bw_api_source_file}
SRCS api_gen_utils.cc ${sparse_api_source_file}
DEPS phi_tensor_raw selected_rows sparse_csr_tensor sparse_coo_tensor ${sparse_bw_api_source_file}
infermeta_utils) ${dygraph_api_source_file}
cc_library( ${strings_api_source_file}
phi_data_transform ${phi_tensor_operants_source_file}
SRCS data_transform.cc ${operants_manager_source_file}
DEPS phi_tensor_raw phi tensor) ${tensor_api_source_file})
cc_library(
api_custom_impl
SRCS api_custom_impl.cc
DEPS phi_tensor_raw
phi
kernel_dispatch
api_gen_utils
backward_infermeta
phi_data_transform
phi_profiler)
cc_library(
phi_function_api
SRCS ${api_source_file} ${fused_api_source_file}
DEPS phi_tensor_raw
phi
kernel_dispatch
api_gen_utils
phi_data_transform
api_custom_impl
api_tensor_utils
phi_profiler)
cc_library(
phi_bw_function_api
SRCS ${bw_api_source_file} ${fused_bw_api_source_file}
DEPS phi_tensor_raw
phi
kernel_dispatch
api_gen_utils
backward_infermeta
sparse_backward_infermeta
phi_data_transform
phi_function_api
api_custom_impl
global_utils
phi_profiler)
cc_library(
sparse_api
SRCS ${sparse_api_source_file}
DEPS phi_tensor_raw phi kernel_dispatch api_gen_utils phi_profiler)
cc_library(
sparse_bw_api
SRCS ${sparse_bw_api_source_file}
DEPS phi_tensor_raw
phi
kernel_dispatch
api_gen_utils
sparse_api
sparse_backward_infermeta
phi_profiler)
cc_library(
phi_dygraph_api
SRCS ${dygraph_api_source_file}
DEPS phi_tensor_raw
phi
kernel_dispatch
api_gen_utils
phi_data_transform
phi_function_api
sparse_api
phi_profiler)
cc_library(
strings_api
SRCS ${strings_api_source_file}
DEPS phi_tensor_raw phi kernel_dispatch api_gen_utils phi_profiler)
cc_library(
phi_tensor
SRCS tensor_method.cc
DEPS phi_tensor_raw
phi_function_api
api_gen_utils
kernel_dispatch
infermeta
sparse_infermeta
sparse_api
strings_api)
cc_library(
tensor_copy
SRCS tensor_copy.cc
DEPS phi_tensor_raw phi kernel_dispatch api_gen_utils)
cc_library(
api_scalar
SRCS scalar.cc
DEPS tensor_copy)
cc_library(
api_int_array
SRCS int_array.cc
DEPS tensor_copy)
cc_library(
phi_tensor_operants
SRCS ${phi_tensor_operants_source_file}
DEPS phi_function_api)
cc_library(
operants_manager
SRCS ${operants_manager_source_file}
DEPS phi_enforce)
cc_library(
tensor_api
SRCS ${tensor_api_source_file}
DEPS operants_manager)
...@@ -65,11 +65,12 @@ PADDLE_API phi::Allocator* GetAllocator(const phi::Place& place) { ...@@ -65,11 +65,12 @@ PADDLE_API phi::Allocator* GetAllocator(const phi::Place& place) {
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
PADDLE_API phi::CUDAStream* GetCurrentCUDAStream(const phi::Place& place) { PADDLE_API phi::CUDAStream* GetCurrentCUDAStream(const phi::Place& place) {
PADDLE_ENFORCE(place.GetType() == phi::AllocationType::GPU, PADDLE_ENFORCE_EQ(place.GetType(),
phi::errors::InvalidArgument( phi::AllocationType::GPU,
"GetCurrentCUDAStream only supports GPUPlace input. " phi::errors::InvalidArgument(
"However, your input is place=%s", "GetCurrentCUDAStream only supports GPUPlace input. "
place)); "However, your input is place=%s",
place));
auto& pool = paddle::experimental::DeviceContextPool::Instance(); auto& pool = paddle::experimental::DeviceContextPool::Instance();
const phi::GPUContext* dev_ctx = const phi::GPUContext* dev_ctx =
......
...@@ -119,6 +119,11 @@ void CustomOpKernelContext::EmplaceBackAttr(paddle::any attr) { ...@@ -119,6 +119,11 @@ void CustomOpKernelContext::EmplaceBackAttr(paddle::any attr) {
<< " has value of type: " << attrs_[attrs_.size() - 1].type().name(); << " has value of type: " << attrs_[attrs_.size() - 1].type().name();
} }
void CustomOpKernelContext::EmplaceBackAttrs(
const std::vector<paddle::any>& attrs) {
attrs_ = std::move(attrs);
}
const Tensor& CustomOpKernelContext::InputAt(size_t idx) const { const Tensor& CustomOpKernelContext::InputAt(size_t idx) const {
return inputs_.at(idx); return inputs_.at(idx);
} }
...@@ -132,6 +137,10 @@ std::vector<Tensor> CustomOpKernelContext::InputsBetween(size_t start, ...@@ -132,6 +137,10 @@ std::vector<Tensor> CustomOpKernelContext::InputsBetween(size_t start,
return rlt; return rlt;
} }
const std::vector<paddle::any>& CustomOpKernelContext::Attrs() const {
return attrs_;
}
Tensor& CustomOpKernelContext::MutableInputAt(size_t idx) { Tensor& CustomOpKernelContext::MutableInputAt(size_t idx) {
return inputs_.at(idx); return inputs_.at(idx);
} }
...@@ -193,6 +202,16 @@ const std::pair<size_t, size_t>& CustomOpKernelContext::OutputRangeAt( ...@@ -193,6 +202,16 @@ const std::pair<size_t, size_t>& CustomOpKernelContext::OutputRangeAt(
return output_range_.at(idx); return output_range_.at(idx);
} }
const std::vector<std::pair<size_t, size_t>>&
CustomOpKernelContext::InputRange() {
return input_range_;
}
const std::vector<std::pair<size_t, size_t>>&
CustomOpKernelContext::OutputRange() {
return output_range_;
}
void CustomOpKernelContext::ConstructInplaceIndex( void CustomOpKernelContext::ConstructInplaceIndex(
const std::vector<std::string>& inputs, const std::vector<std::string>& inputs,
const std::vector<std::string>& outputs, const std::vector<std::string>& outputs,
...@@ -208,8 +227,9 @@ void CustomOpKernelContext::ConstructInplaceIndex( ...@@ -208,8 +227,9 @@ void CustomOpKernelContext::ConstructInplaceIndex(
continue; continue;
} }
auto out_iter = find(outputs.begin(), outputs.end(), inplace_map.at(input)); auto out_iter = find(outputs.begin(), outputs.end(), inplace_map.at(input));
PADDLE_ENFORCE( PADDLE_ENFORCE_NE(
out_iter != outputs.end(), out_iter,
outputs.end(),
phi::errors::NotFound("Can't find the mapped value of %s, please check " phi::errors::NotFound("Can't find the mapped value of %s, please check "
"the input of `Inplace` again and make " "the input of `Inplace` again and make "
"sure you registered your op accurately. ", "sure you registered your op accurately. ",
...@@ -253,8 +273,9 @@ void CustomOpKernelContext::AssignInplaceOutputs() { ...@@ -253,8 +273,9 @@ void CustomOpKernelContext::AssignInplaceOutputs() {
size_t out_start_idx = output_range_[pair.second].first; size_t out_start_idx = output_range_[pair.second].first;
size_t out_end_idx = output_range_[pair.second].second; size_t out_end_idx = output_range_[pair.second].second;
size_t assign_tensor_size = in_end_idx - in_start_idx; size_t assign_tensor_size = in_end_idx - in_start_idx;
PADDLE_ENFORCE( PADDLE_ENFORCE_EQ(
assign_tensor_size == out_end_idx - out_start_idx, assign_tensor_size,
out_end_idx - out_start_idx,
phi::errors::OutOfRange("When assigning inplaced tensor, Input vector " phi::errors::OutOfRange("When assigning inplaced tensor, Input vector "
"size %d mismatch output vector size %d", "size %d mismatch output vector size %d",
in_end_idx - in_start_idx, in_end_idx - in_start_idx,
...@@ -316,6 +337,43 @@ OpMetaInfo& OpMetaInfo::SetInferDtypeFn(InferDtypeFunc&& func) { ...@@ -316,6 +337,43 @@ OpMetaInfo& OpMetaInfo::SetInferDtypeFn(InferDtypeFunc&& func) {
return *this; return *this;
} }
//////////////// Op Meta Info Helper /////////////////
const std::string& OpMetaInfoHelper::GetOpName(const paddle::OpMetaInfo& info) {
return info.name_;
}
const std::vector<std::string>& OpMetaInfoHelper::GetInputs(
const paddle::OpMetaInfo& info) {
return info.inputs_;
}
const std::vector<std::string>& OpMetaInfoHelper::GetOutputs(
const paddle::OpMetaInfo& info) {
return info.outputs_;
}
const std::vector<std::string>& OpMetaInfoHelper::GetAttrs(
const paddle::OpMetaInfo& info) {
return info.attrs_;
}
const std::unordered_map<std::string, std::string>&
OpMetaInfoHelper::GetInplaceMap(const paddle::OpMetaInfo& info) {
return info.inplace_map_;
}
const std::unordered_map<std::string, std::string>&
OpMetaInfoHelper::GetInplaceReverseMap(const paddle::OpMetaInfo& info) {
return info.inplace_reverse_map_;
}
const KernelFunc& OpMetaInfoHelper::GetKernelFn(
const paddle::OpMetaInfo& info) {
return info.kernel_fn_;
}
const InferShapeFunc& OpMetaInfoHelper::GetInferShapeFn(
const paddle::OpMetaInfo& info) {
return info.infer_shape_fn_;
}
const InferDtypeFunc& OpMetaInfoHelper::GetInferDtypeFn(
const paddle::OpMetaInfo& info) {
return info.infer_dtype_fn_;
}
//////////////// Op Meta Info Map ///////////////// //////////////// Op Meta Info Map /////////////////
std::vector<OpMetaInfo>& OpMetaInfoMap::operator[](const std::string& name) { std::vector<OpMetaInfo>& OpMetaInfoMap::operator[](const std::string& name) {
...@@ -414,21 +472,23 @@ OpMetaInfoBuilder& OpMetaInfoBuilder::SetInplaceMap( ...@@ -414,21 +472,23 @@ OpMetaInfoBuilder& OpMetaInfoBuilder::SetInplaceMap(
const std::vector<std::string>& outputs = const std::vector<std::string>& outputs =
OpMetaInfoHelper::GetOutputs(*info_ptr_); OpMetaInfoHelper::GetOutputs(*info_ptr_);
for (const auto& pair : inplace_map) { for (const auto& pair : inplace_map) {
PADDLE_ENFORCE( PADDLE_ENFORCE_NE(
std::find(inputs.begin(), inputs.end(), pair.first) != inputs.cend(), std::find(inputs.begin(), inputs.end(), pair.first),
inputs.cend(),
phi::errors::PreconditionNotMet( phi::errors::PreconditionNotMet(
"The register of operator %s's `SetInplaceMap` failed. " "The register of operator %s's `SetInplaceMap` failed. "
"Please make sure: 1. Call `Inputs` and `Outputs` before " "Please make sure: 1. Call `Inputs` and `Outputs` before "
"`SetInplaceMap`; 2. The keys of inplace_map are inside `Inputs`", "`SetInplaceMap`; 2. The keys of inplace_map are inside `Inputs`",
name_)); name_));
PADDLE_ENFORCE(std::find(outputs.begin(), outputs.end(), pair.second) != PADDLE_ENFORCE_NE(
outputs.cend(), std::find(outputs.begin(), outputs.end(), pair.second),
phi::errors::PreconditionNotMet( outputs.cend(),
"The register of operator %s's `SetInplaceMap` failed. " phi::errors::PreconditionNotMet(
"Please make sure: 1. Call `Inputs` and `Outputs` " "The register of operator %s's `SetInplaceMap` failed. "
"before `SetInplaceMap`; 2. The values of inplace_map " "Please make sure: 1. Call `Inputs` and `Outputs` "
"are inside `Outputs`", "before `SetInplaceMap`; 2. The values of inplace_map "
name_)); "are inside `Outputs`",
name_));
} }
info_ptr_->SetInplaceMap( info_ptr_->SetInplaceMap(
std::forward<std::unordered_map<std::string, std::string>>(inplace_map)); std::forward<std::unordered_map<std::string, std::string>>(inplace_map));
......
...@@ -358,6 +358,10 @@ gpuStream_t Tensor::stream() const { ...@@ -358,6 +358,10 @@ gpuStream_t Tensor::stream() const {
} }
#endif #endif
const std::string &Tensor::name() const { return name_; }
void Tensor::set_name(const std::string &name) { name_ = name; }
/* Part 5: Status utils methods */ /* Part 5: Status utils methods */
bool Tensor::defined() const { return impl_ != nullptr; } bool Tensor::defined() const { return impl_ != nullptr; }
......
...@@ -26,16 +26,4 @@ if(WITH_PYTHON AND EXISTS ${PADDLE_BINARY_DIR}) ...@@ -26,16 +26,4 @@ if(WITH_PYTHON AND EXISTS ${PADDLE_BINARY_DIR})
endif() endif()
endif() endif()
if(WITH_GPU OR WITH_ROCM) collect_srcs(api_srcs SRCS device_tracer.cc profiler.cc)
set(GPU_CTX_DEPS dynload_cuda dynamic_loader)
endif()
cc_library(
phi_device_tracer
SRCS device_tracer.cc
DEPS phi_profiler_proto ${GPU_CTX_DEPS})
cc_library(
phi_profiler
SRCS profiler.cc
DEPS phi_os_info phi_device_tracer phi_enforce)
...@@ -2,17 +2,6 @@ add_subdirectory(dynload) ...@@ -2,17 +2,6 @@ add_subdirectory(dynload)
add_subdirectory(gpu) add_subdirectory(gpu)
set(BACKENDS_SRCS all_context.cc cpu/cpu_context.cc cpu/cpu_info.cc) set(BACKENDS_SRCS all_context.cc cpu/cpu_context.cc cpu/cpu_info.cc)
set(BACKENDS_DEPS
enforce
place
flags
eigen3
phi_device_context
generator
phi_os_info)
if(WITH_XBYAK)
list(APPEND BACKENDS_DEPS xbyak)
endif()
if(NOT APPLE AND NOT WIN32) if(NOT APPLE AND NOT WIN32)
list(APPEND BACKENDS_SRCS device_code.cc) list(APPEND BACKENDS_SRCS device_code.cc)
...@@ -23,16 +12,10 @@ if(WITH_GPU OR WITH_ROCM) ...@@ -23,16 +12,10 @@ if(WITH_GPU OR WITH_ROCM)
gpu/gpu_resources.cc) gpu/gpu_resources.cc)
if(WITH_GPU) if(WITH_GPU)
list(APPEND BACKENDS_SRCS gpu/cuda/cuda_info.cc gpu/cuda/cuda_graph.cc) list(APPEND BACKENDS_SRCS gpu/cuda/cuda_info.cc gpu/cuda/cuda_graph.cc)
set_source_files_properties(
gpu/gpu_resources.cc
PROPERTIES COMPILE_FLAGS
"-DCUDA_REAL_ARCHS=\"${NVCC_FLAGS_EXTRA_real_archs}\"")
endif() endif()
if(WITH_ROCM) if(WITH_ROCM)
list(APPEND BACKENDS_SRCS gpu/rocm/rocm_info.cc) list(APPEND BACKENDS_SRCS gpu/rocm/rocm_info.cc)
endif() endif()
list(APPEND BACKENDS_DEPS phi_dynload_cuda)
endif() endif()
if(WITH_XPU) if(WITH_XPU)
...@@ -45,7 +28,6 @@ if(WITH_MKLDNN) ...@@ -45,7 +28,6 @@ if(WITH_MKLDNN)
list(APPEND BACKENDS_SRCS onednn/onednn_context.cc) list(APPEND BACKENDS_SRCS onednn/onednn_context.cc)
list(APPEND BACKENDS_SRCS onednn/axpy_handler.cc) list(APPEND BACKENDS_SRCS onednn/axpy_handler.cc)
list(APPEND BACKENDS_SRCS onednn/matmul_utils.cc) list(APPEND BACKENDS_SRCS onednn/matmul_utils.cc)
list(APPEND BACKENDS_DEPS mkldnn)
endif() endif()
list( list(
...@@ -55,26 +37,25 @@ list( ...@@ -55,26 +37,25 @@ list(
device_guard.cc device_guard.cc
stream.cc stream.cc
event.cc event.cc
device_base.cc
device_manager.cc device_manager.cc
context_pool.cc) context_pool.cc)
if(WITH_GPU
OR WITH_ROCM
OR WITH_CUSTOM_DEVICE)
list(APPEND BACKENDS_SRCS device_base.cc)
endif()
if(WITH_CUSTOM_DEVICE) if(WITH_CUSTOM_DEVICE)
list(APPEND BACKENDS_SRCS custom/custom_context.cc custom/custom_device.cc list(APPEND BACKENDS_SRCS custom/custom_context.cc custom/custom_device.cc
custom/custom_device_op_list.cc) custom/custom_device_op_list.cc)
endif() endif()
add_library(phi_backends "${BACKENDS_SRCS}") collect_srcs(backends_srcs SRCS ${BACKENDS_SRCS})
target_link_libraries(phi_backends ${BACKENDS_DEPS})
# for inference library
get_property(phi_modules GLOBAL PROPERTY PHI_MODULES)
set(phi_modules ${phi_modules} phi_backends)
set_property(GLOBAL PROPERTY PHI_MODULES "${phi_modules}")
if(WITH_CUSTOM_DEVICE) if(WITH_CUSTOM_DEVICE)
cc_test( cc_test(
capi_test capi_test
SRCS custom/capi_test.cc SRCS custom/capi_test.cc
DEPS phi_capi) DEPS phi)
endif() endif()
...@@ -24,6 +24,10 @@ ...@@ -24,6 +24,10 @@
namespace phi { namespace phi {
template <>
const TypeInfo<DeviceContext> TypeInfoTraits<DeviceContext, CPUContext>::kType =
RegisterStaticType<DeviceContext>(CPUContext::name());
struct CPUContext::Impl { struct CPUContext::Impl {
Impl() : place_(CPUPlace()) {} Impl() : place_(CPUPlace()) {}
......
...@@ -19,6 +19,11 @@ limitations under the License. */ ...@@ -19,6 +19,11 @@ limitations under the License. */
namespace phi { namespace phi {
template <>
const TypeInfo<DeviceContext>
TypeInfoTraits<DeviceContext, CustomContext>::kType =
RegisterStaticType<DeviceContext>(CustomContext::name());
struct CustomContext::Impl { struct CustomContext::Impl {
explicit Impl(const CustomPlace& place) : place_(place) {} explicit Impl(const CustomPlace& place) : place_(place) {}
......
cc_library( set(DYNLOAD_COMMON_SRCS dynamic_loader.cc port.cc warpctc.cc warprnnt.cc
phi_dynamic_loader lapack.cc)
SRCS dynamic_loader.cc port.cc if(WITH_ASCEND_CL)
DEPS enforce glog gflags) list(REMOVE_ITEM DYNLOAD_COMMON_SRCS warprnnt.cc)
endif()
list( list(
APPEND APPEND
CUDA_SRCS CUDA_SRCS
...@@ -60,66 +60,39 @@ configure_file(cupti_lib_path.h.in ${CMAKE_CURRENT_BINARY_DIR}/cupti_lib_path.h) ...@@ -60,66 +60,39 @@ configure_file(cupti_lib_path.h.in ${CMAKE_CURRENT_BINARY_DIR}/cupti_lib_path.h)
if(CUPTI_FOUND) if(CUPTI_FOUND)
list(APPEND CUDA_SRCS cupti.cc) list(APPEND CUDA_SRCS cupti.cc)
endif() endif()
if(WITH_ROCM)
hip_library(
phi_dynload_cuda
SRCS ${HIP_SRCS}
DEPS phi_dynamic_loader)
cc_library(
phi_dynload_warpctc
SRCS warpctc.cc
DEPS phi_dynamic_loader warpctc)
cc_library(
phi_dynload_warprnnt
SRCS warprnnt.cc
DEPS phi_dynamic_loader warprnnt)
else()
nv_library(
phi_dynload_cuda
SRCS ${CUDA_SRCS}
DEPS phi_dynamic_loader)
cc_library(
phi_dynload_warpctc
SRCS warpctc.cc
DEPS phi_dynamic_loader warpctc)
cc_library(
phi_dynload_warprnnt
SRCS warprnnt.cc
DEPS phi_dynamic_loader warprnnt)
endif()
if(WITH_MKLML) if(WITH_MKLML)
cc_library( # Only deps libmklml.so, not link
phi_dynload_mklml add_library(dynload_mklml STATIC mklml.cc)
SRCS mklml.cc add_dependencies(dynload_mklml mklml)
DEPS phi_dynamic_loader mklml) if(WIN32)
target_link_libraries(dynload_mklml ${MKLML_IOMP_LIB})
else()
target_link_libraries(dynload_mklml
"-L${MKLML_LIB_DIR} -liomp5 -Wl,--as-needed")
endif()
endif() endif()
if(WITH_FLASHATTN) if(WITH_FLASHATTN)
cc_library( list(APPEND DYNLOAD_COMMON_SRCS flashattn.cc)
phi_dynload_flashattn
SRCS flashattn.cc
DEPS phi_dynamic_loader flashattn)
endif() endif()
cc_library(
phi_dynload_lapack
SRCS lapack.cc
DEPS phi_dynamic_loader)
add_dependencies(phi_dynload_lapack extern_lapack)
# TODO(TJ): add iomp, mkldnn?
if(MKL_FOUND AND WITH_ONEMKL) if(MKL_FOUND AND WITH_ONEMKL)
message("ONEMKL INCLUDE directory is ${MKL_INCLUDE}") message("ONEMKL INCLUDE directory is ${MKL_INCLUDE}")
cc_library( list(APPEND DYNLOAD_COMMON_SRCS mklrt.cc)
phi_dynload_mklrt endif()
SRCS mklrt.cc
DEPS phi_dynamic_loader) if(WITH_ROCM)
target_include_directories(phi_dynload_mklrt PRIVATE ${MKL_INCLUDE}) collect_srcs(backends_srcs SRCS ${DYNLOAD_COMMON_SRCS} ${HIP_SRCS})
elseif(WITH_GPU)
collect_srcs(backends_srcs SRCS ${DYNLOAD_COMMON_SRCS} ${CUDA_SRCS})
else()
collect_srcs(backends_srcs SRCS ${DYNLOAD_COMMON_SRCS})
endif() endif()
if(WITH_CUDNN_FRONTEND) if(WITH_CUDNN_FRONTEND)
nv_test( nv_test(
cudnn_frontend_test cudnn_frontend_test
SRCS cudnn_frontend_test.cc SRCS cudnn_frontend_test.cc
DEPS phi_dynload_cuda cudnn-frontend) DEPS phi cudnn-frontend)
endif() endif()
cc_library(cudnn_workspace_helper SRCS cudnn_workspace_helper.cc) collect_srcs(backends_srcs SRCS cudnn_workspace_helper.cc)
...@@ -59,6 +59,15 @@ limitations under the License. */ ...@@ -59,6 +59,15 @@ limitations under the License. */
namespace phi { namespace phi {
template <>
const TypeInfo<DeviceContext> TypeInfoTraits<DeviceContext, GPUContext>::kType =
RegisterStaticType<DeviceContext>(GPUContext::name());
template <>
const TypeInfo<DeviceContext>
TypeInfoTraits<DeviceContext, GPUPinnedContext>::kType =
RegisterStaticType<DeviceContext>(GPUPinnedContext::name());
namespace internal { namespace internal {
class EigenGpuStreamDevice : public Eigen::StreamInterface { class EigenGpuStreamDevice : public Eigen::StreamInterface {
......
...@@ -15,6 +15,8 @@ limitations under the License. */ ...@@ -15,6 +15,8 @@ limitations under the License. */
#pragma once #pragma once
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#include <array> #include <array>
#include <functional> #include <functional>
#include <mutex> #include <mutex>
...@@ -305,3 +307,5 @@ class GPUPinnedContext ...@@ -305,3 +307,5 @@ class GPUPinnedContext
}; };
#endif #endif
} // namespace phi } // namespace phi
#endif
...@@ -83,6 +83,11 @@ void OneDNNContextThreadLocals::Body::log_lib_version(void) { ...@@ -83,6 +83,11 @@ void OneDNNContextThreadLocals::Body::log_lib_version(void) {
} }
} }
OneDNNContextThreadLocals::Body& OneDNNContextThreadLocals::fetch() {
thread_local Body b;
return b;
}
struct OneDNNContext::Impl { struct OneDNNContext::Impl {
Impl() : p_blobmap_() { Impl() : p_blobmap_() {
p_blobmap_.reset(new BlobMap()); p_blobmap_.reset(new BlobMap());
...@@ -462,5 +467,7 @@ const std::vector<std::string>& OneDNNContext::GetOutputsName( ...@@ -462,5 +467,7 @@ const std::vector<std::string>& OneDNNContext::GetOutputsName(
return impl_->GetOutputsName(output); return impl_->GetOutputsName(output);
} }
const char* OneDNNContext::name() { return "OneDNNContext"; }
} // namespace phi } // namespace phi
#endif #endif
...@@ -76,10 +76,7 @@ class OneDNNContextThreadLocals { ...@@ -76,10 +76,7 @@ class OneDNNContextThreadLocals {
static constexpr size_t kMKLDNNSessionID_Default = 0; static constexpr size_t kMKLDNNSessionID_Default = 0;
// mkldnn session id for cache clearing mode // mkldnn session id for cache clearing mode
static constexpr size_t kMKLDNNSessionID_CacheClearing = -1; static constexpr size_t kMKLDNNSessionID_CacheClearing = -1;
static Body& fetch() { static Body& fetch();
thread_local Body b;
return b;
}
}; };
class OneDNNContext : public CPUContext { class OneDNNContext : public CPUContext {
...@@ -157,7 +154,7 @@ class OneDNNContext : public CPUContext { ...@@ -157,7 +154,7 @@ class OneDNNContext : public CPUContext {
const std::vector<std::string>& GetOutputsName( const std::vector<std::string>& GetOutputsName(
const std::string& output) const; const std::string& output) const;
static const char* name() { return "OneDNNContext"; } static const char* name();
private: private:
struct Impl; struct Impl;
......
...@@ -30,6 +30,9 @@ namespace xpu = baidu::xpu::api; ...@@ -30,6 +30,9 @@ namespace xpu = baidu::xpu::api;
namespace phi { namespace phi {
template <>
const TypeInfo<DeviceContext> TypeInfoTraits<DeviceContext, XPUContext>::kType =
RegisterStaticType<DeviceContext>(XPUContext::name());
struct XPUContext::Impl { struct XPUContext::Impl {
void SetL3Cache(int l3_size = 14155776) { void SetL3Cache(int l3_size = 14155776) {
const int MAX_XPU_NUM = 16; const int MAX_XPU_NUM = 16;
......
add_subdirectory(lib) add_subdirectory(lib)
cc_library(
phi_capi
SRCS all.cc
DEPS phi_c_data_type
phi_c_device_context
phi_c_int_array
phi_c_kernel_context
phi_c_kernel_factory
phi_c_kernel_registry
phi_c_place
phi_c_scalar
phi_c_tensor)
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/phi/capi/all.h"
namespace paddle {
namespace capi {} // namespace capi
} // namespace paddle
cc_library( collect_srcs(
phi_c_data_type capi_srcs
SRCS c_data_type.cc SRCS
DEPS dense_tensor) c_data_type.cc
c_device_context.cc
cc_library( c_int_array.cc
phi_c_device_context c_kernel_context.cc
SRCS c_device_context.cc c_kernel_factory.cc
DEPS phi_backends) c_kernel_registry.cc
c_place.cc
cc_library( c_scalar.cc
phi_c_int_array c_tensor.cc)
SRCS c_int_array.cc
DEPS int_array)
cc_library(
phi_c_kernel_context
SRCS c_kernel_context.cc
DEPS kernel_context)
cc_library(
phi_c_kernel_factory
SRCS c_kernel_factory.cc
DEPS kernel_factory)
cc_library(
phi_c_kernel_registry
SRCS c_kernel_registry.cc
DEPS dense_tensor)
cc_library(
phi_c_place
SRCS c_place.cc
DEPS phi_place)
cc_library(
phi_c_scalar
SRCS c_scalar.cc
DEPS scalar)
cc_library(
phi_c_tensor
SRCS c_tensor.cc
DEPS dense_tensor)
if(WITH_GPU) collect_srcs(common_srcs SRCS place.cc scalar.cc int_array.cc memory_utils.cc)
nv_library(
phi_place
SRCS place.cc
DEPS phi_backends)
elseif(WITH_ROCM)
hip_library(
phi_place
SRCS place.cc
DEPS phi_backends)
else()
cc_library(phi_place SRCS place.cc)
endif()
cc_library(
scalar
SRCS scalar.cc
DEPS phi_enforce phi_tensor_utils)
cc_library(
int_array
SRCS int_array.cc
DEPS phi_enforce phi_tensor_utils)
cc_library(
memory_utils
SRCS memory_utils.cc
DEPS phi_enforce phi_place)
...@@ -6,150 +6,35 @@ if(WITH_GPU) ...@@ -6,150 +6,35 @@ if(WITH_GPU)
proto_library(external_error_proto SRCS external_error.proto) proto_library(external_error_proto SRCS external_error.proto)
endif() endif()
cc_library( collect_srcs(
flags core_srcs
SRCS flags.cc SRCS
DEPS gflags) flags.cc
errors.cc
cc_library(errors SRCS errors.cc) enforce.cc
set(phi_enforce_deps errors flags) os_info.cc
if(WITH_GPU) kernel_context.cc
set(phi_enforce_deps ${phi_enforce_deps} external_error_proto) ddim.cc
endif() tensor_base.cc
cc_library( allocator.cc
phi_enforce tensor_meta.cc
SRCS enforce.cc lod_utils.cc
DEPS ${phi_enforce_deps}) threadpool.cc
dense_tensor.cc
cc_library( dense_tensor_impl.cc
phi_os_info sparse_coo_tensor.cc
SRCS os_info.cc sparse_csr_tensor.cc
DEPS phi_enforce) string_tensor.cc
tensor_array.cc
if(WITH_XPU) extended_tensor.cc
cc_library( meta_tensor.cc
kernel_factory infermeta_utils.cc
SRCS kernel_factory.cc selected_rows_impl.cc
DEPS phi_enforce convert_utils phi_backends) selected_rows.cc
else() device_context.cc
cc_library( custom_kernel.cc
kernel_factory mixed_vector.cc
SRCS kernel_factory.cc generator.cc
DEPS phi_enforce convert_utils) kernel_factory.cc
endif() tensor_utils.cc
cc_library( storage_properties.cc)
kernel_context
SRCS kernel_context.cc
DEPS phi_enforce phi_backends)
cc_library(
ddim
SRCS ddim.cc
DEPS phi_enforce)
cc_library(
tensor_base
SRCS tensor_base.cc allocator.cc
DEPS phi_enforce)
cc_library(
tensor_meta
SRCS tensor_meta.cc
DEPS phi_enforce)
cc_library(
lod_utils
SRCS lod_utils.cc
DEPS phi_enforce)
cc_library(
threadpool
SRCS threadpool.cc
DEPS phi_enforce)
cc_library(
dense_tensor
SRCS dense_tensor.cc dense_tensor_impl.cc
DEPS convert_utils tensor_meta tensor_base ddim)
target_link_libraries(dense_tensor memory_utils)
cc_library(
sparse_coo_tensor
SRCS sparse_coo_tensor.cc
DEPS tensor_meta tensor_base)
cc_library(
sparse_csr_tensor
SRCS sparse_csr_tensor.cc
DEPS dense_tensor tensor_base)
cc_library(
string_tensor
SRCS string_tensor.cc
DEPS convert_utils tensor_meta tensor_base)
cc_library(
tensor_array
SRCS tensor_array.cc
DEPS dense_tensor tensor_base)
cc_library(
extended_tensor
SRCS extended_tensor.cc
DEPS tensor_base)
cc_library(
meta_tensor
SRCS meta_tensor.cc
DEPS tensor_base tensor_meta dense_tensor)
cc_library(
infermeta_utils
SRCS infermeta_utils.cc
DEPS meta_tensor)
cc_library(
selected_rows
SRCS selected_rows_impl.cc selected_rows.cc
DEPS tensor_base dense_tensor phi_enforce ddim)
cc_library(
phi_device_context
SRCS device_context.cc
DEPS dense_tensor selected_rows)
cc_library(
custom_kernel
SRCS custom_kernel.cc
DEPS kernel_factory)
cc_library(
mixed_vector
SRCS mixed_vector.cc
DEPS phi_backends place memory)
cc_library(
generator
SRCS generator.cc
DEPS enforce place)
# Will remove once we implemented MKLDNN_Tensor
if(WITH_MKLDNN)
add_dependencies(dense_tensor mkldnn)
add_dependencies(tensor_base mkldnn)
endif()
if(WITH_GPU)
nv_library(
phi_tensor_utils
SRCS tensor_utils.cc
DEPS phi_backends dense_tensor selected_rows memcpy memory_utils)
elseif(WITH_ROCM)
hip_library(
phi_tensor_utils
SRCS tensor_utils.cc
DEPS phi_backends dense_tensor selected_rows memcpy memory_utils)
elseif(WITH_XPU_KP)
xpu_library(
phi_tensor_utils
SRCS tensor_utils.cc
DEPS phi_backends dense_tensor selected_rows memcpy memory_utils)
else()
cc_library(
phi_tensor_utils
SRCS tensor_utils.cc
DEPS dense_tensor selected_rows memcpy phi_backends memory_utils)
endif()
cc_library( collect_srcs(core_srcs SRCS arg_map_context.cc op_utils.cc
arg_map_context get_kerneltype_forvar_utils.cc convert_utils.cc)
SRCS arg_map_context.cc
DEPS phi_enforce)
cc_library(
op_utils
SRCS op_utils.cc
DEPS arg_map_context enforce)
cc_library(
get_kerneltype_forvar_utils
SRCS get_kerneltype_forvar_utils.cc
DEPS enforce)
set(convert_utils_deps data_type place op_utils phi_backends)
if(WITH_MKLDNN)
set(convert_utils_deps ${convert_utils_deps} mkldnn)
endif()
cc_library(
convert_utils
SRCS convert_utils.cc
DEPS ${convert_utils_deps})
...@@ -26,4 +26,16 @@ OpUtilsMap& OpUtilsMap::Instance() { ...@@ -26,4 +26,16 @@ OpUtilsMap& OpUtilsMap::Instance() {
return g_op_utils_map; return g_op_utils_map;
} }
BaseKernelNameRegistrar::BaseKernelNameRegistrar(const char* op_type,
const char* base_kernel_name) {
OpUtilsMap::Instance().InsertBaseKernelName(op_type, base_kernel_name);
OpUtilsMap::Instance().InsertFluidOplName(op_type, base_kernel_name);
}
ArgumentMappingFnRegistrar::ArgumentMappingFnRegistrar(
const char* op_type, ArgumentMappingFn arg_mapping_fn) {
OpUtilsMap::Instance().InsertArgumentMappingFn(op_type,
std::move(arg_mapping_fn));
}
} // namespace phi } // namespace phi
...@@ -210,18 +210,12 @@ class OpUtilsMap { ...@@ -210,18 +210,12 @@ class OpUtilsMap {
}; };
struct BaseKernelNameRegistrar { struct BaseKernelNameRegistrar {
BaseKernelNameRegistrar(const char* op_type, const char* base_kernel_name) { BaseKernelNameRegistrar(const char* op_type, const char* base_kernel_name);
OpUtilsMap::Instance().InsertBaseKernelName(op_type, base_kernel_name);
OpUtilsMap::Instance().InsertFluidOplName(op_type, base_kernel_name);
}
}; };
struct ArgumentMappingFnRegistrar { struct ArgumentMappingFnRegistrar {
ArgumentMappingFnRegistrar(const char* op_type, ArgumentMappingFnRegistrar(const char* op_type,
ArgumentMappingFn arg_mapping_fn) { ArgumentMappingFn arg_mapping_fn);
OpUtilsMap::Instance().InsertArgumentMappingFn(op_type,
std::move(arg_mapping_fn));
}
}; };
#define PD_REGISTER_BASE_KERNEL_NAME(op_type, base_kernel_name) \ #define PD_REGISTER_BASE_KERNEL_NAME(op_type, base_kernel_name) \
......
...@@ -42,6 +42,11 @@ limitations under the License. */ ...@@ -42,6 +42,11 @@ limitations under the License. */
namespace phi { namespace phi {
template <>
const TypeInfo<phi::TensorBase>
TypeInfoTraits<phi::TensorBase, DenseTensor>::kType =
RegisterStaticType<phi::TensorBase>(DenseTensor::name());
DenseTensor::DenseTensor(Allocator* a, const DenseTensorMeta& meta) DenseTensor::DenseTensor(Allocator* a, const DenseTensorMeta& meta)
: meta_(meta), holder_(a->Allocate(SizeOf(dtype()) * numel())) {} : meta_(meta), holder_(a->Allocate(SizeOf(dtype()) * numel())) {}
...@@ -115,8 +120,9 @@ void* DenseTensor::AllocateFrom(Allocator* allocator, ...@@ -115,8 +120,9 @@ void* DenseTensor::AllocateFrom(Allocator* allocator,
if (fake_alloc) { if (fake_alloc) {
bytes = 0; bytes = 0;
} else { } else {
PADDLE_ENFORCE( PADDLE_ENFORCE_EQ(
valid(), valid(),
true,
phi::errors::PreconditionNotMet("The meta data must be valid when " phi::errors::PreconditionNotMet("The meta data must be valid when "
"call the mutable data function.")); "call the mutable data function."));
if (requested_size) { if (requested_size) {
...@@ -169,8 +175,9 @@ const T* DenseTensor::data() const { ...@@ -169,8 +175,9 @@ const T* DenseTensor::data() const {
template <typename T> template <typename T>
T* DenseTensor::data() { T* DenseTensor::data() {
T* ret = static_cast<T*>(data()); T* ret = static_cast<T*>(data());
PADDLE_ENFORCE( PADDLE_ENFORCE_EQ(
(dtype() == phi::CppTypeToDataType<T>::Type()), dtype(),
phi::CppTypeToDataType<T>::Type(),
phi::errors::InvalidArgument( phi::errors::InvalidArgument(
"The type of data we are trying to retrieve (%s) does not match the " "The type of data we are trying to retrieve (%s) does not match the "
"type of data (%s) currently contained in the container.", "type of data (%s) currently contained in the container.",
...@@ -200,16 +207,18 @@ const void* DenseTensor::data() const { ...@@ -200,16 +207,18 @@ const void* DenseTensor::data() const {
} }
void DenseTensor::set_meta(DenseTensorMeta&& meta) { void DenseTensor::set_meta(DenseTensorMeta&& meta) {
PADDLE_ENFORCE(!meta_.valid(), PADDLE_ENFORCE_EQ(meta_.valid(),
phi::errors::InvalidArgument( false,
"Only when the original attribute of Tensor is " phi::errors::InvalidArgument(
"incomplete, can it be reset.")); "Only when the original attribute of Tensor is "
"incomplete, can it be reset."));
meta_ = std::move(meta); meta_ = std::move(meta);
} }
void DenseTensor::set_meta(const DenseTensorMeta& meta) { void DenseTensor::set_meta(const DenseTensorMeta& meta) {
PADDLE_ENFORCE( PADDLE_ENFORCE_EQ(
meta.valid(), meta.valid(),
true,
phi::errors::InvalidArgument( phi::errors::InvalidArgument(
"Input meta is invalid, please check the meta attribute.")); "Input meta is invalid, please check the meta attribute."));
meta_.dims = meta.dims; meta_.dims = meta.dims;
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册