未验证 提交 da50a009 编写于 作者: Y YuanRisheng 提交者: GitHub

[PHI Decoupling]Create PHI shared lib (#53735)

* create phi so

* fix ci bugs

* fix py3 bugs

* add file

* fix py3 bugs

* fix windows bugs

* perfect so

* fix py3 bugs

* delete all static target in phi

* fix windows bugs

* fix py3 bugs

* fix ci bugs

* fix windows bugs

* fix bugs: gflags can't be linked by dynamic and static lib

* fix bugs that can not load 3rd party

* fix ci bugs

* fix compile bugs

* fix py3 bugs

* fix conflict

* fix xpu bugs

* fix mac compile bugs

* fix psgpu bugs

* fix inference failed

* deal with conflict

* fix LIBRARY_PATH bug

* fix windows bugs

* fix onednn error

* fix windows compile bugs

* fix windows compile bugs

* fix test_cuda_graph_static_mode_error aborted

* fix windows bugs

* fix mac-python3 error

* fix hip compile bugs

* change mode to static

* change to static mode

* fix ci bugs

* fix py3 bugs

* fix windows bugs

* fix bugs

* add static flag

* add PADDLE_API

* change position of PADDLE_API

* fix windows bugs

* change mode to dynamic lib

* fix windows static bugs

* deal with conflict

* fix windows unit bug

* fix coverage

* deal with conflict

* fix windows-inference

* fix py3 bugs

* fix bugs when compile type_info

* fix compile bugs

* fix py3 bugs

* fix windows bugs

* fix windows openblas

* fix xpu bugs

* fix enforce_test in windows

* update code according comment

* fix windows cmake bug

* fix windows bugs

* fix windows bugs

* delete cinn unittest

* fix cinn bugs

---------
Co-authored-by: HappyHeavyRain's avatarlzydev <1528794076@qq.com>
上级 7aabdfd9
......@@ -40,7 +40,6 @@ if(WITH_MKLML)
add_definitions(-DLAPACK_FOUND)
add_dependencies(cblas mklml)
target_link_libraries(cblas dynload_mklml)
message(STATUS "Found cblas and lapack in MKLML "
"(include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})")
......
......@@ -235,3 +235,16 @@ endif()
if(WITH_CUDNN_FRONTEND)
add_definitions(-DPADDLE_WITH_CUDNN_FRONTEND)
endif()
set(WITH_PHI_SHARED
ON
CACHE BOOL "" FORCE)
if(WIN32 OR WITH_ROCM)
set(WITH_PHI_SHARED
OFF
CACHE BOOL "" FORCE)
endif()
if(WITH_PHI_SHARED)
add_definitions(-DPHI_SHARED)
endif()
......@@ -122,6 +122,5 @@ get_filename_component(WARPCTC_LIBRARY_PATH ${WARPCTC_LIBRARIES} DIRECTORY)
include_directories(${WARPCTC_INCLUDE_DIR}
)# For warpctc code to include its headers.
add_library(warpctc SHARED IMPORTED GLOBAL)
set_property(TARGET warpctc PROPERTY IMPORTED_LOCATION ${WARPCTC_LIBRARIES})
add_library(warpctc INTERFACE)
add_dependencies(warpctc extern_warpctc)
......@@ -364,20 +364,7 @@ function(cc_library TARGET_NAME)
list(REMOVE_ITEM cc_library_DEPS warpctc)
add_dependencies(${TARGET_NAME} warpctc)
endif()
# Only deps libmklml.so, not link
if("${cc_library_DEPS};" MATCHES "mklml;")
list(REMOVE_ITEM cc_library_DEPS mklml)
if(NOT "${TARGET_NAME}" MATCHES "dynload_mklml")
list(APPEND cc_library_DEPS dynload_mklml)
endif()
add_dependencies(${TARGET_NAME} mklml)
if(WIN32)
target_link_libraries(${TARGET_NAME} ${MKLML_IOMP_LIB})
else()
target_link_libraries(${TARGET_NAME}
"-L${MKLML_LIB_DIR} -liomp5 -Wl,--as-needed")
endif()
endif()
# remove link to python, see notes at:
# https://github.com/pybind/pybind11/blob/master/docs/compiling.rst#building-manually
if("${cc_library_DEPS};" MATCHES "python;")
......@@ -457,24 +444,9 @@ function(cc_test_build TARGET_NAME)
endif()
endif()
get_property(os_dependency_modules GLOBAL PROPERTY OS_DEPENDENCY_MODULES)
target_link_libraries(
${TARGET_NAME}
${cc_test_DEPS}
${os_dependency_modules}
paddle_gtest_main
lod_tensor
memory
gtest
gflags
glog)
add_dependencies(
${TARGET_NAME}
${cc_test_DEPS}
paddle_gtest_main
lod_tensor
memory
gtest
gflags
target_link_libraries(${TARGET_NAME} ${cc_test_DEPS}
${os_dependency_modules} paddle_gtest_main gtest glog)
add_dependencies(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main gtest
glog)
common_link(${TARGET_NAME})
if(WITH_ROCM)
......@@ -670,7 +642,7 @@ function(nv_test TARGET_NAME)
add_executable(${TARGET_NAME} ${nv_test_SRCS})
get_property(os_dependency_modules GLOBAL PROPERTY OS_DEPENDENCY_MODULES)
target_link_libraries(${TARGET_NAME} ${nv_test_DEPS}
${os_dependency_modules} paddle_gtest_main)
${os_dependency_modules} paddle_gtest_main phi)
add_dependencies(${TARGET_NAME} ${nv_test_DEPS} paddle_gtest_main)
common_link(${TARGET_NAME})
add_test(${TARGET_NAME} ${TARGET_NAME})
......@@ -774,8 +746,8 @@ function(hip_test TARGET_NAME)
lod_tensor
memory
gtest
gflags
glog
phi
${os_dependency_modules})
add_dependencies(
${TARGET_NAME}
......@@ -784,7 +756,7 @@ function(hip_test TARGET_NAME)
lod_tensor
memory
gtest
gflags
phi
glog)
common_link(${TARGET_NAME})
add_test(${TARGET_NAME} ${TARGET_NAME})
......@@ -881,7 +853,7 @@ function(xpu_test TARGET_NAME)
lod_tensor
memory
gtest
gflags
phi
glog
${os_dependency_modules})
add_dependencies(
......@@ -891,7 +863,7 @@ function(xpu_test TARGET_NAME)
lod_tensor
memory
gtest
gflags
phi
glog)
common_link(${TARGET_NAME})
add_test(${TARGET_NAME} ${TARGET_NAME})
......
......@@ -269,6 +269,13 @@ else()
SRCS ${src_dir}/inference/api/paddle_*.h ${paddle_inference_lib}
DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include
${PADDLE_INFERENCE_INSTALL_DIR}/paddle/lib)
set(paddle_phi_lib ${PADDLE_BINARY_DIR}/paddle/phi/libphi.*)
copy(
inference_lib_dist
SRCS ${paddle_phi_lib}
DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/lib)
endif()
copy(
......
......@@ -61,8 +61,7 @@ function(register_cu_kernel TARGET)
"${multiValueArgs}" ${ARGN})
set(cu_srcs)
set(op_common_deps operator op_registry math_function layer
common_infer_shape_functions)
set(op_common_deps operator op_registry layer common_infer_shape_functions)
foreach(cu_src ${register_cu_kernel_SRCS})
if(${cu_src} MATCHES ".*\\.cu$")
list(APPEND cu_srcs ${cu_src})
......@@ -113,7 +112,7 @@ function(register_mkldnn_kernel TARGET)
"${multiValueArgs}" ${ARGN})
set(mkldnn_cc_srcs)
set(op_common_deps operator op_registry math_function layer
set(op_common_deps operator op_registry phi layer
common_infer_shape_functions)
foreach(mkldnn_src ${register_mkldnn_kernel_SRCS})
if(${mkldnn_src} MATCHES ".*_mkldnn_op.cc$")
......@@ -164,7 +163,7 @@ function(op_library TARGET)
set(MIOPEN_FILE)
set(mkldnn_cc_srcs)
set(MKLDNN_FILE)
set(op_common_deps operator op_registry math_function layer
set(op_common_deps operator op_registry phi layer
common_infer_shape_functions)
# Option `UNITY` is used to specify that operator `TARGET` will compiles with Unity Build.
......
......@@ -94,6 +94,13 @@ function(kernel_declare TARGET_LIST)
continue()
endif()
endif()
# fusion group kernel is not supported in windows and mac
if(WIN32 OR APPLE)
string(FIND "${first_registry}" "fusion_group" pos)
if(pos GREATER 1)
continue()
endif()
endif()
# some gpu kernel only can run on cuda, not support rocm, so we add this branch
if(WITH_ROCM)
string(FIND "${first_registry}" "cuda_only" pos)
......@@ -216,3 +223,27 @@ function(prune_declaration_h)
endif()
endforeach()
endfunction()
function(collect_srcs SRC_GROUP)
set(options)
set(oneValueArgs)
set(multiValueArgs "SRCS")
cmake_parse_arguments(prefix "" "" "${multiValueArgs}" ${ARGN})
foreach(src ${prefix_SRCS})
set(${SRC_GROUP}
"${${SRC_GROUP}};${CMAKE_CURRENT_SOURCE_DIR}/${src}"
CACHE INTERNAL "")
endforeach()
endfunction()
function(collect_generated_srcs SRC_GROUP)
set(options)
set(oneValueArgs)
set(multiValueArgs "SRCS")
cmake_parse_arguments(prefix "" "" "${multiValueArgs}" ${ARGN})
foreach(src ${prefix_SRCS})
set(${SRC_GROUP}
"${${SRC_GROUP}};${src}"
CACHE INTERNAL "")
endforeach()
endfunction()
......@@ -49,5 +49,5 @@ file(GLOB PD_DIALECT_SRCS "*.cc")
cc_library(
pd_dialect
SRCS ${PD_DIALECT_SRCS} ${op_source_file}
DEPS new_ir framework_proto dense_tensor phi_utils)
DEPS new_ir framework_proto phi phi_utils)
target_include_directories(pd_dialect PRIVATE ${PD_DIALECT_BINARY_DIR})
cc_library(
op_dist_attr
SRCS dist_attr.cc
DEPS dist_attr process_mesh dist_mapper auto_parallel_proto proto_desc
phi_enforce)
DEPS phi auto_parallel_proto proto_desc)
add_subdirectory(test)
cc_test(
device_mesh_test
SRCS device_mesh_test.cc
DEPS device_mesh)
DEPS phi)
cc_test(
process_mesh_test
SRCS process_mesh_test.cc
DEPS process_mesh)
DEPS phi)
cc_test(
dist_attr_test
SRCS dist_attr_test.cc
DEPS dist_attr proto_desc)
DEPS phi proto_desc)
cc_test(
dist_mapper_test
SRCS dist_mapper_test.cc
DEPS dist_mapper)
DEPS phi)
cc_library(
process_group
SRCS process_group.cc
DEPS dense_tensor xxhash)
DEPS phi xxhash)
cc_library(
eager_reducer
SRCS reducer.cc
DEPS eager_api process_group phi_api string_helper)
DEPS eager_api process_group phi string_helper)
if(WITH_DISTRIBUTE)
cc_library(
process_group_gloo
SRCS process_group_gloo.cc gloo_send_recv.cc
DEPS phi_api eager_api gloo_wrapper tcp_store)
DEPS phi eager_api gloo_wrapper)
endif()
if(WITH_NCCL OR WITH_RCCL)
......@@ -20,28 +20,19 @@ if(WITH_NCCL OR WITH_RCCL)
process_group_nccl
SRCS process_group_nccl.cc nccl_tools.cc common.cc
DEPS process_group
tcp_store
phi
place
enforce
collective_helper
device_context
${DEVICE_EVENT_LIBS}
dense_tensor
comm_static_check
nccl_dynamic_check)
${DEVICE_EVENT_LIBS})
endif()
if(WITH_XPU_BKCL)
cc_library(
process_group_bkcl
SRCS process_group_bkcl.cc bkcl_tools.cc common.cc
DEPS process_group
tcp_store
place
enforce
collective_helper
device_context
dense_tensor)
DEPS process_group phi place enforce collective_helper device_context)
endif()
if(WITH_MPI)
......@@ -55,15 +46,7 @@ if(WITH_CUSTOM_DEVICE)
cc_library(
process_group_custom
SRCS process_group_custom.cc custom_ccl_tools.cc common.cc
DEPS process_group
tcp_store
phi_backends
place
enforce
collective_helper
device_context
comm_static_check
dense_tensor)
DEPS process_group phi place enforce collective_helper device_context)
endif()
set(COMM_UTILS_DEPS process_group)
......
......@@ -5,7 +5,7 @@ endif()
proto_library(interceptor_message_proto SRCS interceptor_message.proto)
if(WITH_ARM_BRPC)
set(BRPC_DEPS arm_brpc snappy gflags glog)
set(BRPC_DEPS arm_brpc snappy phi glog)
elseif(WITH_DISTRIBUTE AND NOT WITH_PSLIB)
set(BRPC_DEPS
brpc
......@@ -15,7 +15,7 @@ elseif(WITH_DISTRIBUTE AND NOT WITH_PSLIB)
zlib
leveldb
snappy
gflags
phi
glog)
else()
set(BRPC_DEPS "")
......@@ -51,7 +51,7 @@ cc_library(
collective_helper
op_registry
executor_gc_helper
gflags
phi
glog
${BRPC_DEPS})
......
......@@ -8,12 +8,11 @@ if(WITH_HETERPS)
ssl
crypto
protobuf
gflags
phi
glog
zlib
leveldb
snappy
gflags
glog
device_context
rocksdb)
......@@ -25,12 +24,11 @@ else()
ssl
crypto
protobuf
gflags
phi
glog
zlib
leveldb
snappy
gflags
glog
device_context)
......@@ -122,8 +120,7 @@ cc_library(
simple_threadpool
simple_rpc
scope
math_function
selected_rows_functor
phi
ps_gpu_wrapper
${RPC_DEPS})
......@@ -150,7 +147,7 @@ cc_library(
#cc_library(
# communicator
# SRCS communicator/communicator.cc
# DEPS scope client table math_function selected_rows_functor ${RPC_DEPS})
# DEPS scope client table phi ${RPC_DEPS})
#cc_library(
# ps_service
# SRCS ps_service/service.cc
......
......@@ -48,7 +48,7 @@ cc_library(
string_helper
simple_threadpool
xxhash
generator)
phi)
set_source_files_properties(
tensor_accessor.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS})
......@@ -91,7 +91,7 @@ cc_library(
ps_framework_proto
string_helper
device_context
gflags
phi
glog
fs
afs_wrapper
......
......@@ -20,7 +20,7 @@ set(PADDLE_RPC_DEPS
zlib
leveldb
snappy
gflags
phi
glog
pybind)
proto_library(paddle_rpc_proto SRCS rpc.proto)
......
......@@ -73,7 +73,7 @@ cc_test_old(
DEPS
brpc_utils
scope
math_function
phi
${COMMON_DEPS}
${RPC_DEPS})
......
set(eager_deps
phi_api
phi_dygraph_api
phi
hook_utils
tensor_utils
utils
global_utils
backward
phi_tensor
tracer
layer
autograd_meta
......@@ -48,27 +46,26 @@ if(NOT ((NOT WITH_PYTHON) AND ON_INFER))
cc_library(
backward
SRCS backward.cc
DEPS grad_tensor_holder utils autograd_meta grad_node_info switch_autotune)
DEPS grad_tensor_holder utils autograd_meta grad_node_info phi)
endif()
cc_library(
eager_nan_inf_utils
SRCS nan_inf_utils.cc
DEPS phi_tensor nan_inf_utils enforce)
DEPS phi nan_inf_utils enforce)
cc_library(
grad_node_info
SRCS grad_node_info.cc
DEPS phi_api phi_tensor)
DEPS phi)
cc_library(
autograd_meta
SRCS autograd_meta.cc
DEPS phi_api phi_tensor)
DEPS phi)
cc_library(
utils
SRCS utils.cc
DEPS phi_api
phi_tensor
DEPS phi
global_utils
layer
proto_desc
......
......@@ -2,5 +2,5 @@ if(NOT (NOT WITH_PYTHON AND ON_INFER))
cc_library(
accumulation_node
SRCS accumulation_node.cc
DEPS gradient_accumulator phi_api grad_node_info)
DEPS gradient_accumulator phi grad_node_info)
endif()
cc_library(
scale_node
SRCS scale_node.cc
DEPS global_utils phi phi_api grad_node_info)
DEPS global_utils phi grad_node_info)
if(NOT (NOT WITH_PYTHON AND ON_INFER))
cc_library(
......
cc_library(
eager_scale
SRCS scale.cc
DEPS phi_api phi autograd_meta scale_node)
DEPS phi autograd_meta scale_node)
if(NOT (NOT WITH_PYTHON AND ON_INFER))
cc_library(
......
......@@ -7,7 +7,7 @@ if(NOT (NOT WITH_PYTHON AND ON_INFER))
cc_library(
tensor_utils
SRCS tensor_utils.cc
DEPS phi_api autograd_meta grad_node_info accumulation_node)
DEPS phi autograd_meta grad_node_info accumulation_node)
cc_library(
hook_utils
SRCS hook_utils.cc
......@@ -16,7 +16,7 @@ else()
cc_library(
tensor_utils
SRCS tensor_utils.cc
DEPS phi_api autograd_meta grad_node_info)
DEPS phi autograd_meta grad_node_info)
cc_library(
hook_utils
SRCS hook_utils.cc
......
......@@ -52,6 +52,15 @@ if(WIN32)
set(eager_generator_path "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}")
endif()
if(WITH_PHI_SHARED)
message("Copied phi.dll for Eager AutoCodeGen")
add_custom_command(
OUTPUT ${eager_generator_path}/phi.dll
COMMAND ${CMAKE_COMMAND} -E copy ${PHI_LIB} ${eager_generator_path}
DEPENDS phi)
list(APPEND EAGER_CODEGEN_DEPS ${eager_generator_path}/phi.dll)
endif()
if(${CBLAS_PROVIDER} STREQUAL MKLML)
message("Copied libiomp5md.dll for Eager AutoCodeGen")
add_custom_command(
......
......@@ -392,7 +392,7 @@ FORWARD_CC_FILE_TEMPLATE = """
#include "paddle/fluid/eager/api/manual/eager_manual/dygraph_forward_api.h"
#include "paddle/phi/core/flags.h"
DECLARE_bool(check_nan_inf);
PHI_DECLARE_bool(check_nan_inf);
PHI_DECLARE_string(tensor_operants_mode);
{}
{}
......
cc_library(
custom_operator_node
SRCS custom_operator_node.cc
DEPS phi_tensor phi_api grad_node_info custom_operator op_meta_info)
DEPS phi grad_node_info custom_operator)
cc_library(
py_layer_node
SRCS py_layer_node.cc
DEPS pybind phi_api grad_node_info)
DEPS pybind phi grad_node_info)
......@@ -115,7 +115,7 @@ proto_library(trainer_desc_proto SRCS trainer_desc.proto DEPS framework_proto
cc_library(
string_array
SRCS string_array.cc
DEPS utf8proc phi_enforce)
DEPS utf8proc phi)
cc_library(
data_type
......@@ -130,7 +130,7 @@ cc_test(
cc_library(
tensor
SRCS tensor_util.cc
DEPS place memory data_type device_context dense_tensor)
DEPS place memory data_type device_context phi)
cc_test(
tensor_test
......@@ -166,12 +166,12 @@ cc_test(
cc_library(
lod_tensor
SRCS lod_tensor.cc
DEPS ddim mixed_vector place tensor framework_proto version)
DEPS phi place tensor framework_proto version)
cc_test(
lod_tensor_test
SRCS lod_tensor_test.cc
DEPS lod_utils lod_tensor memory)
DEPS phi lod_tensor memory)
if(WITH_GPU)
nv_test(
......@@ -188,12 +188,12 @@ endif()
cc_library(
garbage_collector
SRCS garbage_collector.cc
DEPS device_context memory gflags glog)
DEPS device_context memory phi glog)
cc_library(
reader
SRCS reader.cc
DEPS lod_tensor ddim)
DEPS lod_tensor phi)
cc_test(
reader_test
SRCS reader_test.cc
......@@ -202,13 +202,12 @@ cc_test(
cc_test(
threadpool_test
SRCS threadpool_test.cc
DEPS threadpool)
DEPS phi)
cc_library(
var_type_traits
SRCS var_type_traits.cc
DEPS framework_proto scope tensor_array sparse_coo_tensor sparse_csr_tensor
extended_tensor)
DEPS framework_proto scope phi)
if(WITH_GPU)
target_link_libraries(var_type_traits dynload_cuda)
endif()
......@@ -242,7 +241,7 @@ endif()
cc_library(
scope
SRCS scope.cc
DEPS glog threadpool xxhash var_type_traits)
DEPS glog phi xxhash var_type_traits)
cc_library(
device_worker
SRCS device_worker.cc
......@@ -273,12 +272,12 @@ if(WITH_GPU)
nv_test(
data_device_transform_test
SRCS data_device_transform_test.cu
DEPS operator op_registry device_context math_function scope)
DEPS operator op_registry device_context phi scope)
elseif(WITH_ROCM)
hip_test(
data_device_transform_test
SRCS data_device_transform_test.cu
DEPS operator op_registry device_context math_function scope)
DEPS operator op_registry device_context phi scope)
endif()
if(WITH_GPU)
......@@ -333,7 +332,7 @@ endif()
cc_library(
data_layout_transform
SRCS data_layout_transform.cc
DEPS tensor math_function phi_data_layout_transform)
DEPS tensor phi)
cc_test(
data_layout_transform_test
SRCS data_layout_transform_test.cc
......@@ -342,14 +341,13 @@ cc_test(
cc_library(
data_transform
SRCS data_transform.cc
DEPS math_function
DEPS phi
tensor
framework_proto
selected_rows_utils
data_device_transform
data_type_transform
data_layout_transform
phi_data_transform)
data_layout_transform)
cc_library(
attribute
......@@ -400,7 +398,7 @@ cc_library(
cc_library(
shape_inference
SRCS shape_inference.cc
DEPS ddim attribute selected_rows_utils)
DEPS phi attribute selected_rows_utils)
# every source file that includes "dnnl.h" must depends on mkldnn
# or, the first one should depends on mkldnn
......@@ -433,30 +431,17 @@ if(WITH_XPU)
phi_utils
SRCS phi_utils.cc
DEPS lod_tensor
dense_tensor
selected_rows_utils
int_array
scalar
place
phi
var_type_traits
op_info
xpu_op_list
convert_utils)
xpu_op_list)
else()
cc_library(
phi_utils
SRCS phi_utils.cc
DEPS lod_tensor
dense_tensor
selected_rows_utils
int_array
scalar
place
phi
var_type_traits
op_info
convert_utils)
DEPS lod_tensor selected_rows_utils place phi var_type_traits op_info)
endif()
if(WITH_XPU)
......@@ -482,11 +467,10 @@ if(WITH_XPU)
unused_var_check
nan_inf_utils
phi_utils
kernel_factory
infershape_utils
op_utils
phi
op_compat_infos
get_kerneltype_forvar_utils)
type_info)
else()
cc_library(
operator
......@@ -509,11 +493,10 @@ else()
unused_var_check
nan_inf_utils
phi_utils
kernel_factory
infershape_utils
op_utils
phi
op_compat_infos
get_kerneltype_forvar_utils)
type_info)
endif()
cc_test(
......@@ -543,7 +526,7 @@ cc_library(
version
xxhash
op_dist_attr
scalar
phi
op_version_proto
op_version_registry)
......@@ -853,7 +836,7 @@ if(WITH_DISTRIBUTE)
heter_server
brpc
fleet_executor
flags)
phi)
set(DISTRIBUTE_COMPILE_FLAGS "")
if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 7.0)
set(DISTRIBUTE_COMPILE_FLAGS "${DISTRIBUTE_COMPILE_FLAGS} -faligned-new")
......@@ -1071,7 +1054,7 @@ if(WITH_PSCORE)
executor
heter_server
gloo_wrapper
eigen_function
phi
${RPC_DEPS}
graph_gpu_wrapper)
else()
......@@ -1088,7 +1071,7 @@ if(WITH_PSCORE)
executor
heter_server
gloo_wrapper
eigen_function
phi
${RPC_DEPS})
endif()
else()
......@@ -1112,7 +1095,7 @@ cc_test(
cc_library(
selected_rows_utils
SRCS selected_rows_utils.cc
DEPS selected_rows device_context)
DEPS phi device_context)
cc_test(
selected_rows_utils_test
SRCS selected_rows_utils_test.cc
......@@ -1162,12 +1145,11 @@ cc_library(
phi
phi_utils
op_info
shape_inference
sparse_coo_tensor)
shape_inference)
cc_test(
infershape_utils_test
SRCS infershape_utils_test.cc
DEPS infershape_utils infermeta_utils meta_tensor)
DEPS infershape_utils phi)
# Get the current working branch
execute_process(
......@@ -1198,12 +1180,15 @@ cc_library(
operator
dynamic_loader
string_helper
phi_tensor
op_meta_info
phi_api
tensor_api
phi_tensor_operants
operants_manager)
phi
imperative_flag
layer)
cc_library(type_info SRCS type_info.cc)
add_dependencies(type_info framework_proto auto_parallel_proto xxhash)
if(WITH_MKLDNN)
add_dependencies(type_info mkldnn)
endif()
set(FLUID_FRAMEWORK_MODULES
proto_desc
......
......@@ -10,15 +10,15 @@ cc_library(
cc_library(
scale_loss_grad_op_handle
SRCS scale_loss_grad_op_handle.cc
DEPS op_handle_base scope lod_tensor ddim memory)
DEPS op_handle_base scope lod_tensor phi memory)
cc_library(
fetch_op_handle
SRCS fetch_op_handle.cc
DEPS op_handle_base scope lod_tensor ddim memory)
DEPS op_handle_base scope lod_tensor phi memory)
cc_library(
fetch_async_op_handle
SRCS fetch_async_op_handle.cc
DEPS op_handle_base scope lod_tensor ddim memory)
DEPS op_handle_base scope lod_tensor phi memory)
cc_library(
share_tensor_buffer_functor
......@@ -78,7 +78,7 @@ if(WITH_GPU)
DEPS op_handle_base
scope
lod_tensor
ddim
phi
memory
dynload_cuda
variable_visitor)
......@@ -88,7 +88,7 @@ if(WITH_GPU)
DEPS op_handle_base
scope
lod_tensor
ddim
phi
memory
dynload_cuda
variable_visitor
......@@ -99,7 +99,7 @@ if(WITH_GPU)
DEPS op_handle_base
scope
lod_tensor
ddim
phi
memory
dynload_cuda
variable_visitor
......@@ -114,7 +114,7 @@ if(WITH_GPU)
DEPS op_handle_base
scope
lod_tensor
ddim
phi
memory
dynload_cuda
variable_visitor
......@@ -126,19 +126,17 @@ if(WITH_GPU)
nv_library(
reduce_op_handle
SRCS reduce_op_handle.cc
DEPS op_handle_base variable_visitor scope ddim dynload_cuda
selected_rows_functor)
DEPS op_handle_base variable_visitor scope phi dynload_cuda)
else()
nv_library(
reduce_op_handle
SRCS reduce_op_handle.cc
DEPS op_handle_base variable_visitor scope ddim dynload_cuda
selected_rows_functor)
DEPS op_handle_base variable_visitor scope phi dynload_cuda)
endif()
nv_library(
broadcast_op_handle
SRCS broadcast_op_handle.cc
DEPS op_handle_base scope ddim memory variable_visitor dynload_cuda)
DEPS op_handle_base scope phi memory variable_visitor dynload_cuda)
nv_library(
fused_broadcast_op_handle
SRCS fused_broadcast_op_handle.cc
......@@ -154,7 +152,7 @@ elseif(WITH_ROCM)
DEPS op_handle_base
scope
lod_tensor
ddim
phi
memory
dynload_cuda
variable_visitor)
......@@ -164,7 +162,7 @@ elseif(WITH_ROCM)
DEPS op_handle_base
scope
lod_tensor
ddim
phi
memory
dynload_cuda
variable_visitor
......@@ -175,7 +173,7 @@ elseif(WITH_ROCM)
DEPS op_handle_base
scope
lod_tensor
ddim
phi
memory
dynload_cuda
variable_visitor
......@@ -187,19 +185,17 @@ elseif(WITH_ROCM)
hip_library(
reduce_op_handle
SRCS reduce_op_handle.cc
DEPS op_handle_base variable_visitor scope ddim dynload_cuda
selected_rows_functor)
DEPS op_handle_base variable_visitor scope phi dynload_cuda)
else()
hip_library(
reduce_op_handle
SRCS reduce_op_handle.cc
DEPS op_handle_base variable_visitor scope ddim dynload_cuda
selected_rows_functor)
DEPS op_handle_base variable_visitor scope phi dynload_cuda)
endif()
hip_library(
broadcast_op_handle
SRCS broadcast_op_handle.cc
DEPS op_handle_base scope ddim memory variable_visitor dynload_cuda)
DEPS op_handle_base scope phi memory variable_visitor dynload_cuda)
hip_library(
fused_broadcast_op_handle
SRCS fused_broadcast_op_handle.cc
......@@ -212,14 +208,14 @@ else()
cc_library(
all_reduce_op_handle
SRCS all_reduce_op_handle.cc
DEPS op_handle_base scope lod_tensor ddim memory variable_visitor)
DEPS op_handle_base scope lod_tensor phi memory variable_visitor)
cc_library(
fused_all_reduce_op_handle
SRCS fused_all_reduce_op_handle.cc
DEPS op_handle_base
scope
lod_tensor
ddim
phi
memory
variable_visitor
place)
......@@ -229,7 +225,7 @@ else()
DEPS op_handle_base
scope
lod_tensor
ddim
phi
memory
variable_visitor
place
......@@ -239,17 +235,17 @@ else()
cc_library(
reduce_op_handle
SRCS reduce_op_handle.cc
DEPS op_handle_base variable_visitor scope ddim selected_rows_functor)
DEPS op_handle_base variable_visitor scope phi)
else()
cc_library(
reduce_op_handle
SRCS reduce_op_handle.cc
DEPS op_handle_base variable_visitor scope ddim selected_rows_functor)
DEPS op_handle_base variable_visitor scope phi)
endif()
cc_library(
broadcast_op_handle
SRCS broadcast_op_handle.cc
DEPS op_handle_base scope ddim memory variable_visitor)
DEPS op_handle_base scope phi memory variable_visitor)
cc_library(
fused_broadcast_op_handle
SRCS fused_broadcast_op_handle.cc
......@@ -259,7 +255,7 @@ endif()
cc_library(
gather_op_handle
SRCS gather_op_handle.cc
DEPS op_handle_base scope ddim memory variable_visitor)
DEPS op_handle_base scope phi memory variable_visitor)
cc_library(
eager_deletion_op_handle
......@@ -305,7 +301,7 @@ cc_test(
DEPS var_handle
op_handle_base
scope
ddim
phi
memory
device_context
broadcast_op_handle)
......@@ -317,7 +313,7 @@ cc_test_old(
var_handle
op_handle_base
scope
ddim
phi
memory
device_context
gather_op_handle)
......@@ -330,12 +326,12 @@ cc_library(
scope_buffered_ssa_graph_executor
SRCS scope_buffered_ssa_graph_executor.cc
DEPS ssa_graph_executor scope_buffered_monitor)
#cc_test(reduce_op_handle_test SRCS reduce_op_handle_test.cc DEPS var_handle op_handle_base scope ddim memory
#cc_test(reduce_op_handle_test SRCS reduce_op_handle_test.cc DEPS var_handle op_handle_base scope phi memory
# device_context reduce_op_handle )
cc_library(
bind_threaded_ssa_graph_executor
SRCS bind_threaded_ssa_graph_executor.cc
DEPS fetch_op_handle gflags ssa_graph_executor scope simple_threadpool
DEPS fetch_op_handle phi ssa_graph_executor scope simple_threadpool
device_context)
cc_library(
fast_threaded_ssa_graph_executor
......
......@@ -20,9 +20,10 @@ limitations under the License. */
#include "paddle/fluid/framework/details/reduce_op_handle.h"
#include "paddle/fluid/framework/ir/graph_printer.h"
#include "paddle/fluid/framework/ir/multi_devices_graph_pass/multi_devices_graph_pass.h"
#include "paddle/phi/core/flags.h"
DECLARE_bool(convert_all_blocks);
DECLARE_bool(use_mkldnn);
PHI_DECLARE_bool(use_mkldnn);
#ifdef PADDLE_WITH_CINN
DECLARE_bool(use_cinn);
#endif
......
......@@ -32,7 +32,7 @@ cc_library(
cc_library(
cost_model
SRCS cost_model.cc
DEPS executor graph profiler proto_desc phi_device_tracer)
DEPS executor graph profiler proto_desc phi)
set(GRAPH_PATTERN_DETECTOR_DEPS graph graph_helper graph_traits)
if(WITH_TESTING)
......@@ -458,9 +458,6 @@ if(WITH_MKLDNN)
graph_to_program_pass
conv_op
conv_transpose_op
math_function
im2col
vol2col
batch_norm_op
generated_op
activation_op
......@@ -468,7 +465,7 @@ if(WITH_MKLDNN)
concat_and_split
naive_executor
device_context
eigen_function)
phi)
if(WITH_GPU OR WITH_ROCM)
set(TEST_CONV_BN_PASS_DEPS ${TEST_CONV_BN_PASS_DEPS} depthwise_conv)
endif()
......
......@@ -221,7 +221,7 @@ bool InitAndCheckAttrs(const size_t &found_adamw_count,
}
}
// Check whether with_decay and multi_precision are matched
// Check whether with_decay and multi_precision are matched
if (config->with_decay !=
PADDLE_GET_CONST(bool, adamw_op_desc->GetAttr("with_decay")) ||
config->multi_precision !=
......
......@@ -6,13 +6,13 @@ if(WITH_GPU OR WITH_ROCM)
cc_test(
test_code_generator
SRCS code_generator_tester.cc
DEPS code_generator phi_backends lod_tensor graph_viz_pass)
DEPS code_generator phi lod_tensor graph_viz_pass)
endif()
cc_library(
fusion_group_pass
SRCS fusion_group_pass.cc elementwise_group_detector.cc
DEPS subgraph_detector fuse_pass_base code_generator phi_backends)
DEPS subgraph_detector fuse_pass_base code_generator phi)
cc_test(
test_fusion_group_pass
SRCS fusion_group_pass_tester.cc
......
......@@ -76,5 +76,4 @@ cc_library(
cc_test(
test_reference_count_pass_last_lived_ops
SRCS test_reference_count_pass_last_lived_ops.cc
DEPS parallel_executor elementwise_mul_op elementwise_add_op generated_op
eigen_function)
DEPS parallel_executor elementwise_mul_op elementwise_add_op generated_op phi)
......@@ -16,4 +16,4 @@ cc_library(
cc_library(
staticgraph_executor_statistics
SRCS executor_statistics.cc
DEPS enforce glog phi_os_info)
DEPS enforce glog phi)
......@@ -6,7 +6,6 @@ set(INTERPRETER_DEPS
device_context
global_utils
op_registry
phi_tensor_utils
scope
framework_proto
data_feed_proto
......@@ -31,7 +30,7 @@ set(INTERPRETER_DEPS
enforce
scope
glog
comm_context_manager
phi
${DEVICE_EVENT_LIBS}
glog)
......
......@@ -5,7 +5,7 @@ cc_library(
cc_library(
workqueue
SRCS workqueue.cc
DEPS workqueue_utils enforce glog phi_os_info)
DEPS workqueue_utils enforce glog phi)
cc_test(
workqueue_test
SRCS workqueue_test.cc
......
......@@ -5,7 +5,7 @@ pass_library(
cinn_subgraph_detector
subgraph_detector
cinn_compiler
errors
phi
enforce)
pass_library(cinn_zero_tensor_trick_pass base)
......@@ -17,7 +17,7 @@ cc_library(
cc_library(
transform_type
SRCS transform_type.cc
DEPS errors enforce cinn)
DEPS phi enforce cinn)
cc_library(
cinn_cache_key
SRCS cinn_cache_key.cc
......
......@@ -16,6 +16,7 @@ limitations under the License. */
#include <unordered_map>
#include "paddle/phi/core/enforce.h"
#include "paddle/phi/core/extended_tensor.h"
#include "paddle/utils/any.h"
......@@ -52,7 +53,7 @@ class RawTensor : public phi::ExtendedTensor,
T& Get() const {
PADDLE_ENFORCE_EQ(data_.empty(),
false,
platform::errors::PreconditionNotMet(
phi::errors::PreconditionNotMet(
"The data in RawTensor is empty. Please set data "
"before using it."));
......
/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/eager/eager_tensor.h"
#include "paddle/fluid/framework/feed_fetch_type.h"
#include "paddle/fluid/framework/raw_tensor.h"
#include "paddle/fluid/framework/string_array.h"
#include "paddle/fluid/prim/utils/static/desc_tensor.h"
namespace phi {
template <>
const TypeInfo<phi::TensorBase>
TypeInfoTraits<phi::TensorBase, paddle::framework::RawTensor>::kType =
RegisterStaticType<phi::TensorBase>(
paddle::framework::RawTensor::name());
template <>
const TypeInfo<phi::TensorBase>
TypeInfoTraits<phi::TensorBase, paddle::framework::Vocab>::kType =
RegisterStaticType<phi::TensorBase>(paddle::framework::Vocab::name());
template <>
const TypeInfo<phi::TensorBase>
TypeInfoTraits<phi::TensorBase, paddle::framework::Strings>::kType =
RegisterStaticType<phi::TensorBase>(paddle::framework::Strings::name());
template <>
const TypeInfo<phi::TensorBase>
TypeInfoTraits<phi::TensorBase, paddle::framework::FeedList>::kType =
RegisterStaticType<phi::TensorBase>(
paddle::framework::FeedList::name());
template <>
const TypeInfo<phi::TensorBase>
TypeInfoTraits<phi::TensorBase, egr::VariableCompatTensor>::kType =
RegisterStaticType<phi::TensorBase>(egr::VariableCompatTensor::name());
template <>
const TypeInfo<phi::TensorBase>
TypeInfoTraits<phi::TensorBase, paddle::prim::DescTensor>::kType =
RegisterStaticType<phi::TensorBase>(paddle::prim::DescTensor::name());
} // namespace phi
cc_library(
imperative_flag
SRCS flags.cc
DEPS gflags flags)
DEPS phi)
cc_library(
var_helper
SRCS var_helper.cc
DEPS tensor selected_rows extended_tensor)
DEPS tensor phi)
if(WITH_XPU)
cc_library(
prepared_operator
......@@ -20,8 +20,7 @@ if(WITH_XPU)
op_kernel_type
data_transform
nan_inf_utils
scalar
int_array
phi
var_helper
profiler
place)
......@@ -38,8 +37,7 @@ else()
op_kernel_type
data_transform
nan_inf_utils
scalar
int_array
phi
var_helper
profiler
place)
......@@ -47,14 +45,14 @@ endif()
cc_library(
layer
SRCS layer.cc
DEPS prepared_operator math_function imperative_flag variable_helper
op_registry var_helper)
DEPS prepared_operator phi imperative_flag variable_helper op_registry
var_helper)
add_subdirectory(jit)
if(WITH_GPU)
cc_library(
layout_autotune
SRCS layout_autotune.cc
DEPS op_info phi_backends)
DEPS op_info phi)
else()
cc_library(
layout_autotune
......@@ -80,15 +78,15 @@ cc_library(
cc_library(
basic_engine
SRCS basic_engine.cc
DEPS layer gradient_accumulator switch_autotune)
DEPS layer gradient_accumulator phi)
cc_library(
engine
SRCS basic_engine.cc partial_grad_engine.cc
DEPS layer gradient_accumulator switch_autotune)
DEPS layer gradient_accumulator phi)
cc_library(
imperative_profiler
SRCS profiler.cc
DEPS flags)
DEPS phi)
if(NOT WIN32)
if(WITH_NCCL OR WITH_RCCL)
cc_library(
......@@ -174,12 +172,4 @@ endif()
cc_library(
gradient_accumulator
SRCS gradient_accumulator.cc
DEPS blas
operator
lod_tensor
selected_rows_utils
selected_rows_functor
var_type_traits
layer
math_function
phi_tensor)
DEPS operator lod_tensor selected_rows_utils var_type_traits layer phi)
......@@ -32,14 +32,8 @@ endif()
# fluid_modules exclude API-interface of inference/api and inference/capi_exp
get_property(fluid_modules GLOBAL PROPERTY FLUID_MODULES)
get_property(phi_modules GLOBAL PROPERTY PHI_MODULES)
get_property(phi_kernels GLOBAL PROPERTY PHI_KERNELS)
set(utils_modules pretty_log string_helper benchmark)
if(WITH_CUSTOM_DEVICE)
set(fluid_modules ${fluid_modules} phi_capi)
endif()
add_subdirectory(api)
# Create static inference library if needed
......@@ -51,7 +45,6 @@ set(STATIC_INFERENCE_API
reset_tensor_array
analysis_config
paddle_pass_builder
phi
${mkldnn_quantizer_cfg})
set(OP_LIST
......@@ -64,16 +57,14 @@ set(KERNEL_LIST
#windows GPU static library over the limit, so not create_static_lib, and cc_library is dummy
if(WIN32 AND WITH_GPU)
cc_library(paddle_inference DEPS ${fluid_modules} phi ${STATIC_INFERENCE_API}
cc_library(paddle_inference DEPS ${fluid_modules} ${STATIC_INFERENCE_API}
${utils_modules})
else()
# message("${fluid_modules}")
# message("PHI_MODULES ${phi_modules}")
# message("${phi_kernels}")
# message("${STATIC_INFERENCE_API}")
# message("${utils_modules}")
create_static_lib(paddle_inference ${fluid_modules} ${phi_modules}
${phi_kernels} ${STATIC_INFERENCE_API} ${utils_modules})
create_static_lib(paddle_inference ${fluid_modules} ${STATIC_INFERENCE_API}
${utils_modules})
endif()
if(NOT APPLE)
......@@ -103,7 +94,7 @@ set(SHARED_INFERENCE_SRCS
# shared inference library deps
list(REMOVE_ITEM fluid_modules standalone_executor
interpretercore_garbage_collector)
set(SHARED_INFERENCE_DEPS ${fluid_modules} phi analysis_predictor
set(SHARED_INFERENCE_DEPS phi ${fluid_modules} analysis_predictor
${utils_modules})
if(WITH_CRYPTO)
......@@ -124,12 +115,6 @@ if(WITH_ONNXRUNTIME)
${CMAKE_CURRENT_SOURCE_DIR}/api/onnxruntime_predictor.cc)
endif()
#export all symbols for paddle/phi/api/include/api.h on paddle_inference_shared, only for UNIX
if(UNIX)
set(SHARED_INFERENCE_DEPS ${SHARED_INFERENCE_DEPS}
$<TARGET_OBJECTS:phi_function_api>)
endif()
# Create shared inference library
cc_library(
paddle_inference_shared SHARED
......@@ -141,12 +126,15 @@ target_link_libraries(paddle_inference_shared ${os_dependency_modules})
if(WIN32)
set_property(TARGET paddle_inference_shared
PROPERTY WINDOWS_EXPORT_ALL_SYMBOLS ON)
target_link_libraries(paddle_inference_shared gflags)
target_link_libraries(paddle_inference_shared phi)
endif()
set_target_properties(paddle_inference_shared PROPERTIES OUTPUT_NAME
paddle_inference)
if(NOT APPLE AND NOT WIN32)
if(NOT APPLE
AND NOT WIN32
AND NOT WITH_TESTING
AND NOT WITH_INFERENCE_API_TEST)
# TODO(liuyiqun): Temporarily disable the link flag because it is not support on Mac.
set(LINK_FLAGS
"-Wl,--version-script ${CMAKE_CURRENT_SOURCE_DIR}/paddle_inference.map")
......
......@@ -41,7 +41,7 @@ if(WITH_CRYPTO)
list(APPEND paddle_inference_api_deps paddle_crypto)
endif()
if(WITH_CUSTOM_DEVICE)
set(paddle_inference_api_deps ${paddle_inference_api_deps} phi_capi)
set(paddle_inference_api_deps ${paddle_inference_api_deps} phi)
endif()
cc_library(
......@@ -50,7 +50,7 @@ cc_library(
DEPS ${paddle_inference_api_deps})
if(WIN32)
target_link_libraries(paddle_inference_api gflags)
target_link_libraries(paddle_inference_api phi)
endif()
set(inference_deps ${analysis_deps} paddle_inference_api analysis
......
......@@ -72,7 +72,7 @@
#endif
#ifdef PADDLE_WITH_MKLML
#include "paddle/fluid/platform/dynload/mklml.h"
#include "paddle/phi/backends/dynload/mklml.h"
#endif
#ifdef PADDLE_WITH_MKLDNN
......@@ -1121,7 +1121,7 @@ bool AnalysisPredictor::Run(const std::vector<PaddleTensor> &inputs,
// Frees unused memory allocated by the Intel® MKL Memory Allocator to
// avoid memory leak. See:
// https://software.intel.com/en-us/mkl-developer-reference-c-mkl-free-buffers
platform::dynload::MKL_Free_Buffers();
phi::dynload::MKL_Free_Buffers();
#endif
return true;
}
......@@ -1185,7 +1185,7 @@ bool AnalysisPredictor::Run(const std::vector<paddle::Tensor> &inputs,
// Frees unused memory allocated by the Intel® MKL Memory Allocator to
// avoid memory leak. See:
// https://software.intel.com/en-us/mkl-developer-reference-c-mkl-free-buffers
platform::dynload::MKL_Free_Buffers();
phi::dynload::MKL_Free_Buffers();
#endif
return true;
}
......@@ -2100,7 +2100,7 @@ bool AnalysisPredictor::ZeroCopyRun() {
// Frees unused memory allocated by the Intel® MKL Memory Allocator to
// avoid memory leak. See:
// https://software.intel.com/en-us/mkl-developer-reference-c-mkl-free-buffers
platform::dynload::MKL_Free_Buffers();
phi::dynload::MKL_Free_Buffers();
#endif
return true;
}
......
......@@ -199,7 +199,7 @@ if(NOT WIN32)
${MATH_LIB}
${MKLDNN_LIB}
glog
gflags
phi
protobuf
xxhash
cryptopp
......
......@@ -29,6 +29,7 @@ WIN_DETECT=$(echo `uname` | grep "Win") # detect current platform
cd `dirname $0`
current_dir=`pwd`
if [ $2 == ON ]; then
# You can export yourself if move the install path
MKL_LIB=${inference_install_dir}/third_party/install/mklml/lib
......
......@@ -25,7 +25,7 @@ if(WITH_ONNXRUNTIME)
cc_library(
zero_copy_tensor_dummy
SRCS zero_copy_tensor_dummy.cc
DEPS onnxruntime phi_enforce)
DEPS onnxruntime phi)
else()
cc_library(
zero_copy_tensor
......@@ -34,7 +34,7 @@ else()
cc_library(
zero_copy_tensor_dummy
SRCS zero_copy_tensor_dummy.cc
DEPS phi_enforce)
DEPS phi)
endif()
cc_test(
......
......@@ -39,7 +39,7 @@ if(APPLE)
utf8proc
cryptopp
protobuf
gflags
phi
cblas)
endif()
......
......@@ -23,7 +23,7 @@ fi
# 2. set LD_LIBRARY_PATH
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$PWD/paddle_inference_c/third_party/install/mklml/lib/:$PWD/paddle_inference_c/third_party/install/mkldnn/lib/:$PWD/paddle_inference_c/paddle/lib/
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$PWD/paddle_inference_c/third_party/install/onnxruntime/lib/:$PWD/paddle_inference_c/third_party/install/paddle2onnx/lib/
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:${PADDLE_ROOT}/build/paddle/phi/
# 3. go test
go clean -testcache
go test -v ./...
......@@ -141,8 +141,7 @@ nv_test(
nv_test(
test_custom_plugin_creater
SRCS test_custom_plugin_creater.cc
DEPS paddle_framework tensorrt_converter op_meta_info custom_operator
init_phi)
DEPS paddle_framework tensorrt_converter phi custom_operator init_phi)
if(WITH_ONNXRUNTIME AND WIN32)
# Copy onnxruntime for some c++ test in Windows, since the test will
......
include(ExternalProject)
set(ALLOCATOR_DEPS place stats profiler phi_backends device_context)
set(ALLOCATOR_DEPS place stats profiler phi device_context)
set(ALLOCATOR_SRCS
allocator.cc
cpu_allocator.cc
......@@ -32,7 +32,7 @@ if(WITH_GPU OR WITH_ROCM)
endif()
if(WITH_GPU)
list(APPEND ALLOCATOR_DEPS phi_backends)
list(APPEND ALLOCATOR_DEPS phi)
endif()
if(CUDA_VERSION VERSION_GREATER_EQUAL 10.2)
......
......@@ -124,7 +124,7 @@ class CUDAGraphAllocator
: underlying_allocator_(allocator) {}
public:
~CUDAGraphAllocator() { VLOG(10) << "CUDAGraphAllocator destructed"; }
~CUDAGraphAllocator() {}
static std::shared_ptr<Allocator> Create(
const std::shared_ptr<Allocator>& allocator) {
......@@ -1137,7 +1137,6 @@ void AllocatorFacade::RemoveMemoryPoolOfCUDAGraph(int64_t id) {
if (ref_cnt == 0) {
cuda_graph_map_.erase(id);
cuda_graph_ref_cnt_.erase(ref_cnt_iter);
VLOG(10) << "Remove memory pool of CUDA Graph with memory ID " << id;
} else {
VLOG(10) << "Decrease memory pool ID " << id << " reference count to be "
<< ref_cnt;
......
......@@ -90,7 +90,7 @@ if(WITH_UNITY_BUILD)
include(unity_build_rule.cmake)
endif()
set(OP_HEADER_DEPS ${OP_HEADER_DEPS} phi phi_utils backward_infermeta sparse_backward_infermeta static_prim_api get_expected_kernel_func)
set(OP_HEADER_DEPS ${OP_HEADER_DEPS} phi phi_utils static_prim_api get_expected_kernel_func)
register_operators(EXCLUDES py_func_op dgc_op generated_op1 generated_op2 generated_op3 generated_op4 load_combine_op lstm_op run_program_op quantize_linear_op
recurrent_op save_combine_op sparse_attention_op sync_batch_norm_op activation_op ${OP_MKL_DEPS} DEPS ${OP_HEADER_DEPS})
......@@ -125,7 +125,7 @@ if (WITH_GPU OR WITH_ROCM)
endif()
endif()
op_library(lstm_op DEPS ${OP_HEADER_DEPS} lstm_compute)
op_library(lstm_op DEPS ${OP_HEADER_DEPS})
op_library(recurrent_op DEPS ${OP_HEADER_DEPS})
set(COMMON_OP_DEPS ${OP_HEADER_DEPS})
......@@ -136,17 +136,16 @@ if (WITH_DGC)
endif()
cc_library(common_infer_shape_functions SRCS common_infer_shape_functions.cc DEPS operator)
cc_library(ops_extra_info SRCS ops_extra_info.cc DEPS attribute cudnn_workspace_helper)
cc_library(ops_extra_info SRCS ops_extra_info.cc DEPS attribute phi)
set(COMMON_OP_DEPS ${COMMON_OP_DEPS} selected_rows_functor selected_rows_utils lapack_function
lod_tensor maxouting unpooling pooling lod_rank_table context_project
sequence_pooling executor generator static_prim_api)
set(COMMON_OP_DEPS ${COMMON_OP_DEPS} phi)
set(COMMON_OP_DEPS ${COMMON_OP_DEPS} selected_rows_utils
lod_tensor unpooling lod_rank_table context_project executor static_prim_api)
set(COMMON_OP_DEPS ${COMMON_OP_DEPS} dynload_warpctc static_prim_api static_utils static_global_utils prim_utils)
set(COMMON_OP_DEPS ${COMMON_OP_DEPS} sequence_padding sequence_scale cos_sim_functor memory jit_kernel_helper concat_and_split cross_entropy softmax vol2col im2col sampler sample_prob tree2col)
set(COMMON_OP_DEPS ${COMMON_OP_DEPS} sequence2batch lstm_compute matrix_bit_code gru_compute activation_functions beam_search fc_functor matrix_inverse matrix_solve)
set(COMMON_OP_DEPS ${COMMON_OP_DEPS} cos_sim_functor memory concat_and_split sampler sample_prob tree2col)
set(COMMON_OP_DEPS ${COMMON_OP_DEPS} beam_search)
set(COMMON_OP_DEPS ${COMMON_OP_DEPS} box_wrapper ps_gpu_wrapper)
set(COMMON_OP_DEPS ${COMMON_OP_DEPS} common_infer_shape_functions)
set(COMMON_OP_DEPS ${COMMON_OP_DEPS} eigen_function)
set(COMMON_OP_DEPS ${COMMON_OP_DEPS} processgroup_comm_utils)
if(WITH_NCCL OR WITH_RCCL)
set(COMMON_OP_DEPS ${COMMON_OP_DEPS} process_group_nccl)
......@@ -189,7 +188,7 @@ endif()
copy_if_different(${pybind_file} ${pybind_file_final})
if (WITH_CUSTOM_DEVICE)
cc_library(custom_device_common_op_registry SRCS custom_device_common_op_registry.cc DEPS operator phi_api)
cc_library(custom_device_common_op_registry SRCS custom_device_common_op_registry.cc DEPS operator phi type_info)
endif()
if(NOT "${OP_LIST}" STREQUAL "")
......
......@@ -7,7 +7,7 @@ cc_library(
cc_library(
cinn_launch_context
SRCS cinn_launch_context.cc
DEPS ddim
DEPS phi
lod_tensor
scope
proto_desc
......
......@@ -18,7 +18,7 @@ foreach(src ${OPS})
endforeach()
if(WITH_GLOO)
set(COLLECTIVE_DEPS ${COLLECTIVE_DEPS} gloo_wrapper comm_context_manager)
set(COLLECTIVE_DEPS ${COLLECTIVE_DEPS} gloo_wrapper phi)
endif()
register_operators(
......@@ -31,8 +31,7 @@ register_operators(
${COLLECTIVE_DEPS})
if(WITH_NCCL OR WITH_RCCL)
set(COLLECTIVE_DEPS ${COLLECTIVE_DEPS} nccl_common collective_helper
comm_context_manager nccl_comm_context)
set(COLLECTIVE_DEPS ${COLLECTIVE_DEPS} nccl_common collective_helper phi)
op_library(c_gen_nccl_id_op DEPS ${COLLECTIVE_DEPS})
op_library(gen_nccl_id_op DEPS ${COLLECTIVE_DEPS})
endif()
......
......@@ -51,8 +51,8 @@ detection_library(polygon_box_transform_op SRCS polygon_box_transform_op.cc
detection_library(rpn_target_assign_op SRCS rpn_target_assign_op.cc)
detection_library(generate_proposal_labels_op SRCS
generate_proposal_labels_op.cc)
detection_library(multiclass_nms_op SRCS multiclass_nms_op.cc DEPS gpc)
detection_library(locality_aware_nms_op SRCS locality_aware_nms_op.cc DEPS gpc)
detection_library(multiclass_nms_op SRCS multiclass_nms_op.cc DEPS phi)
detection_library(locality_aware_nms_op SRCS locality_aware_nms_op.cc DEPS phi)
detection_library(box_clip_op SRCS box_clip_op.cc box_clip_op.cu)
detection_library(box_decoder_and_assign_op SRCS box_decoder_and_assign_op.cc
box_decoder_and_assign_op.cu)
......
......@@ -289,7 +289,7 @@ file(APPEND ${op_utils_header}
# Automatically generate the registration code of all arg map functions
# and compile the corresponding target to avoid frequent code conflicts
# when writing to same file
register_op_utils(op_compat_infos DEPS op_utils)
register_op_utils(op_compat_infos DEPS phi)
copy_if_different(${op_utils_header} ${op_utils_header_final})
......
......@@ -17,11 +17,12 @@ limitations under the License. */
#include <memory>
#include <string>
#include "paddle/phi/core/flags.h"
#include "paddle/phi/kernels/funcs/blas/blas.h"
#include "paddle/phi/kernels/funcs/detail/gru_cpu_kernel.h"
#include "paddle/phi/kernels/funcs/detail/gru_kernel.h"
DECLARE_int32(paddle_num_threads);
PHI_DECLARE_int32(paddle_num_threads);
namespace paddle {
namespace operators {
......
......@@ -6,21 +6,20 @@ if(WITH_XPU)
endif()
# please add new math_library in alphabetical order
math_library(concat_and_split DEPS concat_and_split_functor)
math_library(context_project DEPS im2col math_function)
math_library(concat_and_split DEPS phi)
math_library(context_project DEPS phi)
math_library(cos_sim_functor)
math_library(depthwise_conv)
math_library(sample_prob)
math_library(sampler DEPS generator)
math_library(sampler DEPS phi)
# math_library(math_function DEPS blas dense_tensor tensor)
if(WITH_XPU)
math_library(beam_search DEPS math_function beam_search_xpu)
math_library(beam_search DEPS phi beam_search_xpu)
else()
math_library(beam_search DEPS math_function)
math_library(beam_search DEPS phi)
endif()
math_library(unpooling)
math_library(prelu)
math_library(bert_encoder_functor)
math_library(tree2col DEPS math_function)
math_library(tree2col DEPS phi)
......@@ -20,7 +20,7 @@ if(WITH_ARM_BRPC)
framework_proto
sendrecv_rpc
arm_brpc
gflags
phi
glog
snappy
device_context)
......@@ -42,7 +42,7 @@ else()
ssl
crypto
protobuf
gflags
phi
glog
zlib
snappy
......
......@@ -6,5 +6,5 @@ endif()
register_operators()
if(WITH_UNITY_BUILD)
target_link_libraries(paddle_operators_sequence_ops_unity sequence_pooling)
target_link_libraries(paddle_operators_sequence_ops_unity phi)
endif()
......@@ -17,7 +17,7 @@ limitations under the License. */
#include <memory>
#include <vector>
#include "paddle/fluid/platform/dynload/mklml.h"
#include "paddle/phi/backends/dynload/mklml.h"
#include "paddle/phi/kernels/funcs/blas/blas.h"
#include "paddle/phi/kernels/funcs/math_function.h"
......
......@@ -6,9 +6,9 @@ cc_library(
cc_test(
errors_test
SRCS errors_test.cc
DEPS errors enforce)
DEPS phi enforce)
set(enforce_deps flags errors flags phi_enforce)
set(enforce_deps phi)
if(WITH_GPU)
set(enforce_deps ${enforce_deps} external_error_proto)
endif()
......@@ -26,20 +26,20 @@ cc_test(
cc_test(
cpu_info_test
SRCS cpu_info_test.cc
DEPS phi_backends)
DEPS phi)
cc_test(
os_info_test
SRCS os_info_test.cc
DEPS phi_os_info)
DEPS phi)
cc_library(
place
SRCS place.cc
DEPS enforce phi_place)
DEPS enforce phi)
cc_test(
place_test
SRCS place_test.cc
DEPS place glog gflags)
DEPS place glog phi)
if(WITH_MKLDNN)
set(MKLDNN_CTX_DEPS mkldnn)
......@@ -104,7 +104,7 @@ endif()
cc_library(
init
SRCS init.cc
DEPS device_context custom_kernel context_pool memcpy)
DEPS device_context phi memcpy)
# memcpy depends on device_context, here add deps individually for
# avoiding cycle dependencies
......@@ -117,7 +117,6 @@ cc_library(
xxhash
${STREAM_CALLBACK_DEPS}
place
phi_place
eigen3
cpu_helper
framework_proto
......@@ -126,12 +125,8 @@ cc_library(
${MKLDNN_CTX_DEPS}
${dgc_deps}
dlpack
cudnn_workspace_helper
${XPU_CTX_DEPS}
phi_backends
phi_device_context
generator
phi_enforce)
phi
${XPU_CTX_DEPS})
cc_library(
collective_helper
......@@ -189,12 +184,12 @@ if(WITH_GPU)
cuda_graph_with_memory_pool
SRCS cuda_graph_with_memory_pool.cc
DEPS ${DEVICE_EVENT_LIBS} device_event_custom_device device_context
allocator phi_backends)
allocator phi)
else()
nv_library(
cuda_graph_with_memory_pool
SRCS cuda_graph_with_memory_pool.cc
DEPS ${DEVICE_EVENT_LIBS} device_context allocator phi_backends)
DEPS ${DEVICE_EVENT_LIBS} device_context allocator phi)
endif()
nv_test(
device_context_test
......@@ -245,7 +240,7 @@ cc_test(
cc_library(
lodtensor_printer
SRCS lodtensor_printer.cc
DEPS ddim
DEPS phi
place
tensor
scope
......@@ -263,41 +258,30 @@ if(WITH_GPU)
nv_library(
profiler
SRCS profiler.cc profiler.cu
DEPS phi_os_info
phi_device_tracer
DEPS phi
gpu_info
enforce
dynload_cuda
new_profiler
stats
op_proto_maker
shape_inference
phi_profiler)
shape_inference)
elseif(WITH_ROCM)
hip_library(
profiler
SRCS profiler.cc profiler.cu
DEPS phi_os_info
phi_device_tracer
DEPS phi
gpu_info
enforce
new_profiler
stats
op_proto_maker
shape_inference
phi_profiler)
shape_inference)
else()
cc_library(
profiler
SRCS profiler.cc
DEPS phi_os_info
phi_device_tracer
enforce
new_profiler
stats
op_proto_maker
shape_inference
phi_profiler)
DEPS phi enforce new_profiler stats op_proto_maker shape_inference)
endif()
cc_test(
......@@ -333,7 +317,7 @@ if(WITH_GPU)
nv_test(
test_limit_gpu_memory
SRCS test_limit_gpu_memory.cu
DEPS gpu_info flags)
DEPS gpu_info phi)
nv_library(
cuda_device_guard
SRCS cuda_device_guard.cc
......@@ -348,7 +332,7 @@ if(WITH_ROCM)
hip_test(
test_limit_gpu_memory
SRCS test_limit_gpu_memory.cu
DEPS gpu_info flags)
DEPS gpu_info phi)
hip_library(
cuda_device_guard
SRCS cuda_device_guard.cc
......@@ -360,7 +344,7 @@ if(NOT APPLE AND NOT WIN32)
cc_test(
device_code_test
SRCS device_code_test.cc
DEPS phi_backends lod_tensor)
DEPS phi lod_tensor)
endif()
endif()
......@@ -382,4 +366,4 @@ cc_library(
cc_test(
init_phi_test
SRCS init_phi_test.cc
DEPS phi_tensor init_phi)
DEPS phi init_phi)
......@@ -17,7 +17,7 @@ limitations under the License. */
#ifdef PADDLE_WITH_MKLML
#include <omp.h>
#include "paddle/fluid/platform/dynload/mklml.h"
#include "paddle/phi/backends/dynload/mklml.h"
#endif
#ifdef PADDLE_USE_OPENBLAS
......@@ -40,7 +40,7 @@ void SetNumThreads(int num_threads) {
openblas_set_num_threads(real_num_threads);
#elif defined(PADDLE_WITH_MKLML)
int real_num_threads = num_threads > 1 ? num_threads : 1;
platform::dynload::MKL_Set_Num_Threads(real_num_threads);
phi::dynload::MKL_Set_Num_Threads(real_num_threads);
omp_set_num_threads(real_num_threads);
#elif defined(PADDLE_USE_REFERENCE_CBLAS)
// cblas not support multi-thread
......
......@@ -2,9 +2,9 @@ if(WITH_CUSTOM_DEVICE)
cc_library(
custom_device_resource_pool
SRCS custom_device_resource_pool.cc
DEPS gflags glog enforce monitor)
DEPS phi glog enforce monitor)
cc_test(
custom_device_test
SRCS custom_device_test.cc
DEPS phi_tensor_utils phi_backends phi_device_context gradient_accumulator)
DEPS phi gradient_accumulator)
endif()
......@@ -3,13 +3,7 @@ if(WITH_GPU)
nv_library(
gpu_info
SRCS gpu_info.cc
DEPS phi_backends
gflags
glog
enforce
monitor
dynload_cuda
malloc)
DEPS phi glog enforce monitor dynload_cuda malloc)
nv_test(cuda_helper_test SRCS cuda_helper_test.cu)
nv_test(
......@@ -21,7 +15,7 @@ elseif(WITH_ROCM)
hip_library(
gpu_info
SRCS gpu_info.cc
DEPS phi_backends gflags glog enforce monitor dynload_cuda)
DEPS phi glog enforce monitor dynload_cuda)
hip_test(cuda_helper_test SRCS cuda_helper_test.cu)
hip_test(
......
......@@ -14,23 +14,11 @@ set(XPU_CTX_DEPS
cc_library(
xpu_info
SRCS xpu_info.cc
DEPS gflags
glog
enforce
xpulib
device_context
place
phi_backends)
DEPS glog enforce xpulib device_context place phi)
cc_library(
xpu_op_list
SRCS xpu_op_list.cc
DEPS gflags
glog
enforce
xpulib
device_context
op_kernel_type
phi_backends)
DEPS glog enforce xpulib device_context op_kernel_type phi)
cc_library(
xpu_resource_pool
SRCS xpu_resource_pool.cc
......
cc_library(
dynamic_loader
SRCS dynamic_loader.cc
DEPS glog gflags enforce phi_dynamic_loader)
DEPS glog enforce phi)
list(
APPEND
......@@ -57,26 +57,20 @@ if(WITH_ROCM)
hip_library(
dynload_cuda
SRCS ${HIP_SRCS}
DEPS dynamic_loader phi_dynload_cuda)
DEPS dynamic_loader phi)
cc_library(
dynload_warpctc
SRCS warpctc.cc
DEPS dynamic_loader warpctc phi_dynload_warpctc)
DEPS dynamic_loader warpctc phi)
else()
nv_library(
dynload_cuda
SRCS ${CUDA_SRCS}
DEPS dynamic_loader phi_dynload_cuda)
DEPS dynamic_loader phi)
cc_library(
dynload_warpctc
SRCS warpctc.cc
DEPS dynamic_loader warpctc phi_dynload_warpctc)
endif()
if(WITH_MKLML)
cc_library(
dynload_mklml
SRCS mklml.cc
DEPS dynamic_loader mklml phi_dynload_mklml)
DEPS dynamic_loader warpctc phi)
endif()
# TODO(TJ): add iomp, mkldnn?
......@@ -86,6 +80,6 @@ if(MKL_FOUND AND WITH_ONEMKL)
cc_library(
dynload_mklrt
SRCS mklrt.cc
DEPS dynamic_loader phi_dynload_mklrt)
DEPS dynamic_loader phi)
target_include_directories(dynload_mklrt PRIVATE ${MKL_INCLUDE})
endif()
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <mkl.h>
#include <mutex> // NOLINT
#include "paddle/phi/backends/dynload/mklml.h"
namespace paddle {
namespace platform {
namespace dynload {
/**
* The following macro definition can generate structs
* (for each function) to dynamic load mklml routine
* via operator overloading.
*/
#define DYNAMIC_LOAD_MKLML_WRAP(__name) \
using DynLoad__##__name = phi::dynload::DynLoad__##__name; \
extern DynLoad__##__name __name
#define PLATFORM_DECLARE_DYNAMIC_LOAD_MKLML_WRAP(__name) \
DYNAMIC_LOAD_MKLML_WRAP(__name)
#define MKLML_ROUTINE_EACH(__macro) \
__macro(cblas_sgemm); \
__macro(cblas_dgemm); \
__macro(cblas_cgemm); \
__macro(cblas_zgemm); \
__macro(cblas_saxpy); \
__macro(cblas_daxpy); \
__macro(cblas_caxpy); \
__macro(cblas_zaxpy); \
__macro(cblas_scopy); \
__macro(cblas_dcopy); \
__macro(cblas_ccopy); \
__macro(cblas_zcopy); \
__macro(cblas_sgemv); \
__macro(cblas_dgemv); \
__macro(cblas_cgemv); \
__macro(cblas_zgemv); \
__macro(cblas_strsm); \
__macro(cblas_dtrsm); \
__macro(cblas_ctrsm); \
__macro(cblas_ztrsm); \
__macro(cblas_sgemm_alloc); \
__macro(cblas_dgemm_alloc); \
__macro(cblas_sgemm_pack); \
__macro(cblas_dgemm_pack); \
__macro(cblas_sgemm_compute); \
__macro(cblas_dgemm_compute); \
__macro(cblas_sgemm_free); \
__macro(cblas_dgemm_free); \
__macro(cblas_sgemm_batch); \
__macro(cblas_dgemm_batch); \
__macro(cblas_cgemm_batch); \
__macro(cblas_zgemm_batch); \
__macro(cblas_sdot); \
__macro(cblas_ddot); \
__macro(cblas_sasum); \
__macro(cblas_dasum); \
__macro(cblas_isamax); \
__macro(cblas_idamax); \
__macro(cblas_sscal); \
__macro(cblas_dscal); \
__macro(vsAdd); \
__macro(vdAdd); \
__macro(vsSub); \
__macro(vdSub); \
__macro(vsMul); \
__macro(vdMul); \
__macro(vsDiv); \
__macro(vdDiv); \
__macro(vsExp); \
__macro(vdExp); \
__macro(vsSqr); \
__macro(vdSqr); \
__macro(vsPowx); \
__macro(vdPowx); \
__macro(vsInv); \
__macro(vdInv); \
__macro(vmsErf); \
__macro(vmdErf); \
__macro(MKL_Free_Buffers); \
__macro(MKL_Set_Num_Threads); \
__macro(MKL_Get_Max_Threads);
MKLML_ROUTINE_EACH(PLATFORM_DECLARE_DYNAMIC_LOAD_MKLML_WRAP);
#if !defined(_WIN32)
DYNAMIC_LOAD_MKLML_WRAP(mkl_scsrmm);
DYNAMIC_LOAD_MKLML_WRAP(mkl_dcsrmm);
#endif
#undef DYNAMIC_LOAD_MKLML_WRAP
} // namespace dynload
} // namespace platform
} // namespace paddle
......@@ -40,6 +40,22 @@ PADDLE_DEFINE_EXPORTED_bool(enable_rpc_profiler,
DEFINE_bool(enable_record_memory, false, "enable memory recorder");
#if defined(_WIN32) && defined(PHI_SHARED)
phi::ProfilerState phi::ProfilerHelper::g_state = phi::ProfilerState::kDisabled;
bool phi::ProfilerHelper::g_enable_nvprof_hook = false;
thread_local uint64_t phi::ProfilerHelper::g_thread_id;
uint32_t phi::ProfilerHelper::g_next_thread_id = 0;
std::mutex phi::ProfilerHelper::g_all_event_lists_mutex;
std::list<std::shared_ptr<phi::EventList<phi::Event>>>
phi::ProfilerHelper::g_all_event_lists;
thread_local std::shared_ptr<phi::EventList<phi::Event>>
phi::ProfilerHelper::g_event_list;
std::list<std::shared_ptr<phi::EventList<phi::MemEvent>>>
phi::ProfilerHelper::g_all_mem_event_lists;
thread_local std::shared_ptr<phi::EventList<phi::MemEvent>>
phi::ProfilerHelper::g_mem_event_list;
std::mutex phi::ProfilerHelper::g_all_mem_event_lists_mutex;
#endif
namespace paddle {
namespace platform {
......
cc_library(
host_tracer
SRCS host_tracer.cc
DEPS framework_proto enforce ddim var_type_traits)
DEPS framework_proto enforce phi var_type_traits)
cc_library(
cuda_tracer
SRCS cuda_tracer.cc cupti_data_process.cc
......@@ -28,7 +28,7 @@ cc_library(
cc_library(
cpu_utilization
SRCS cpu_utilization.cc
DEPS phi_backends phi_os_info enforce glog)
DEPS phi enforce glog)
cc_library(
new_profiler
SRCS profiler.cc
......
......@@ -28,7 +28,6 @@ set(PYBIND_DEPS
gloo_wrapper
infer_io_utils
heter_wrapper
generator
op_version_registry
ps_gpu_wrapper
custom_operator
......@@ -37,16 +36,13 @@ set(PYBIND_DEPS
fleet_executor
global_utils
phi_utils
tcp_store
comm_context_manager
phi
new_profiler
auto_parallel
jit_layer
jit_property
prim_utils
operants_manager
phi_tensor_operants
static_tensor_operants)
static_tensor_operants
type_info)
if(WITH_PSCORE)
set(PYBIND_DEPS ${PYBIND_DEPS} ps_service)
......@@ -65,7 +61,7 @@ if(WITH_RPC)
zlib
leveldb
snappy
gflags
phi
glog)
endif()
if(WITH_GPU OR WITH_ROCM)
......@@ -148,7 +144,6 @@ set(PYBIND_SRCS
auto_parallel_py.cc)
if(WITH_CUSTOM_DEVICE)
set(PYBIND_DEPS ${PYBIND_DEPS} phi_capi)
set(PYBIND_DEPS ${PYBIND_DEPS} custom_device_common_op_registry)
endif()
......@@ -334,6 +329,14 @@ if(WITH_PYTHON)
")\n"
"exit /b 0")
if(WITH_PHI_SHARED)
add_custom_command(
OUTPUT ${op_impl_path}/phi.dll
COMMAND ${CMAKE_COMMAND} -E copy ${PHI_LIB} ${op_impl_path}
DEPENDS phi)
list(APPEND EAGER_OP_IMPL_DEPS ${op_impl_path}/libiomp5md.dll)
endif()
if(${CBLAS_PROVIDER} STREQUAL MKLML)
add_custom_command(
OUTPUT ${op_impl_path}/libiomp5md.dll
......@@ -481,10 +484,8 @@ if(WITH_PYTHON)
list(APPEND PYBIND_DEPS python)
list(APPEND PYBIND_DEPS custom_operator)
list(APPEND PYBIND_DEPS custom_operator_node)
list(APPEND PYBIND_DEPS tensor_api)
list(APPEND PYBIND_DEPS eager_tensor_operants)
list(APPEND PYBIND_DEPS pybind_util)
list(APPEND PYBIND_DEPS flags)
endif()
# On Linux, cc_library(paddle SHARED ..) will generate the libpaddle.so,
......
......@@ -38,7 +38,9 @@ limitations under the License. */
#include "paddle/phi/common/data_type.h"
#include "paddle/phi/core/compat/convert_utils.h"
#include "paddle/phi/core/dense_tensor.h"
DECLARE_bool(check_nan_inf);
#include "paddle/phi/core/flags.h"
PHI_DECLARE_bool(check_nan_inf);
namespace paddle {
namespace pybind {
......
......@@ -3,6 +3,15 @@ configure_file(config.h.in ${CMAKE_CURRENT_SOURCE_DIR}/config.h)
# phi auto cmake utils
include(phi)
set(common_srcs CACHE INTERNAL "" FORCE)
set(api_srcs CACHE INTERNAL "" FORCE)
set(capi_srcs CACHE INTERNAL "" FORCE)
set(core_srcs CACHE INTERNAL "" FORCE)
set(backends_srcs CACHE INTERNAL "" FORCE)
set(kernels_srcs CACHE INTERNAL "" FORCE)
set(infermeta_srcs CACHE INTERNAL "" FORCE)
#set(excluded_srcs CACHE INTERNAL "" FORCE)
# paddle experimental common components
add_subdirectory(common)
......@@ -24,29 +33,153 @@ if(WITH_CUSTOM_DEVICE)
add_subdirectory(capi)
endif()
# make an unity target for compile deps
set(PHI_DEPS
convert_utils
dense_tensor
phi_backends
kernel_factory
kernel_context
arg_map_context
infermeta
lod_utils
sparse_csr_tensor
sparse_coo_tensor
string_tensor
api_scalar
api_int_array
extended_tensor
dist_attr
dist_mapper)
get_property(phi_kernels GLOBAL PROPERTY PHI_KERNELS)
set(PHI_DEPS ${PHI_DEPS} ${phi_kernels})
cc_library(phi DEPS ${PHI_DEPS})
phi_profiler_proto
auto_parallel_proto
gflags
glog
warpctc
warprnnt
eigen3
xxhash
cblas
utf8proc)
if(WITH_GPU)
list(APPEND PHI_DEPS external_error_proto)
endif()
if(WITH_ASCEND_CL)
list(APPEND PHI_DEPS npu_hccl)
endif()
if(WITH_FLASHATTN)
list(APPEND PHI_DEPS flashattn)
endif()
if(WITH_XBYAK)
list(APPEND PHI_DEPS xbyak)
endif()
if(WITH_MKLDNN)
list(APPEND PHI_DEPS mkldnn)
endif()
if(WITH_GLOO)
list(APPEND PHI_DEPS gloo)
endif()
if(WITH_CUDNN_FRONTEND)
list(APPEND PHI_DEPS cudnn-frontend)
endif()
if(WITH_POCKETFFT)
list(APPEND PHI_DEPS pocketfft)
endif()
if(WITH_MKLML)
list(APPEND PHI_DEPS pocketfft dynload_mklml)
endif()
if(WITH_XPU)
list(APPEND PHI_DEPS xpulib)
endif()
set(PHI_SRCS
${common_srcs}
${api_srcs}
${core_srcs}
${backends_srcs}
${kernels_srcs}
${infermeta_srcs}
${capi_srcs})
if(WITH_PHI_SHARED)
set(PHI_BUILD_TYPE
SHARED
CACHE INTERNAL "" FORCE)
else()
set(PHI_BUILD_TYPE
STATIC
CACHE INTERNAL "" FORCE)
endif()
if(WITH_GPU)
add_definitions(-DCUDA_REAL_ARCHS=${NVCC_FLAGS_EXTRA_real_archs}
)# for backends/gpu/gpu_resources.cc
nv_library(
phi ${PHI_BUILD_TYPE}
SRCS ${PHI_SRCS}
DEPS ${PHI_DEPS})
elseif(WITH_ROCM)
hip_add_library(phi ${PHI_BUILD_TYPE} ${PHI_SRCS})
target_link_libraries(phi ${PHI_DEPS})
elseif(WITH_XPU_KP)
xpu_library(
phi ${PHI_BUILD_TYPE}
SRCS ${PHI_SRCS}
DEPS ${PHI_DEPS})
else()
cc_library(
phi ${PHI_BUILD_TYPE}
SRCS ${PHI_SRCS}
DEPS ${PHI_DEPS})
endif()
if(WIN32)
target_link_libraries(phi shlwapi.lib)
endif()
if(WIN32)
if(WITH_PHI_SHARED)
set_property(TARGET phi PROPERTY WINDOWS_EXPORT_ALL_SYMBOLS ON)
set(PHI_NAME
phi.dll
CACHE INTERNAL "" FORCE)
else()
set(PHI_NAME
phi.lib
CACHE INTERNAL "" FORCE)
endif()
elseif(APPLE)
if(WITH_PHI_SHARED)
set(PHI_NAME
libphi.dylib
CACHE INTERNAL "" FORCE)
else()
set(PHI_NAME
libphi.a
CACHE INTERNAL "" FORCE)
endif()
else()
if(WITH_PHI_SHARED)
set(PHI_NAME
libphi.so
CACHE INTERNAL "" FORCE)
else()
set(PHI_NAME
libphi.a
CACHE INTERNAL "" FORCE)
endif()
endif()
set(PHI_LIB
"${CMAKE_CURRENT_BINARY_DIR}/${PHI_NAME}"
CACHE FILEPATH "PHI Library" FORCE)
if(MKL_FOUND AND WITH_ONEMKL)
target_include_directories(phi PRIVATE ${MKL_INCLUDE})
endif()
add_dependencies(phi extern_lapack)
if(WITH_CUTLASS)
add_dependencies(phi cutlass_codegen)
add_definitions("-DPADDLE_WITH_MEMORY_EFFICIENT_ATTENTION"
)# for memory_efficient_attention.h
endif()
if(WITH_FLASHATTN)
add_dependencies(phi flashattn)
endif()
set(phi_extension_header_file
${CMAKE_CURRENT_SOURCE_DIR}/extension.h
......
add_subdirectory(profiler)
add_subdirectory(lib)
cc_library(
phi_api
SRCS all.cc
DEPS phi_function_api phi_bw_function_api sparse_api sparse_bw_api
strings_api)
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/phi/api/all.h"
namespace paddle {
namespace experimental {} // namespace experimental
} // namespace paddle
......@@ -112,9 +112,7 @@ class PADDLE_API CustomOpKernelContext {
void EmplaceBackOutput(Tensor&& output);
void EmplaceBackOutputs(const std::vector<Tensor>& outputs);
void EmplaceBackAttr(paddle::any attr);
void EmplaceBackAttrs(const std::vector<paddle::any>& attrs) {
attrs_ = std::move(attrs);
}
void EmplaceBackAttrs(const std::vector<paddle::any>& attrs);
const std::pair<size_t, size_t>& InputRangeAt(size_t idx) const;
const std::pair<size_t, size_t>& OutputRangeAt(size_t idx) const;
......@@ -125,13 +123,9 @@ class PADDLE_API CustomOpKernelContext {
paddle::optional<Tensor> OptionalInputAt(size_t idx);
paddle::optional<std::vector<Tensor>> OptionalInputsBetween(size_t start,
size_t end);
const std::vector<paddle::any>& Attrs() const { return attrs_; }
const std::vector<std::pair<size_t, size_t>>& InputRange() {
return input_range_;
}
const std::vector<std::pair<size_t, size_t>>& OutputRange() {
return output_range_;
}
const std::vector<paddle::any>& Attrs() const;
const std::vector<std::pair<size_t, size_t>>& InputRange();
const std::vector<std::pair<size_t, size_t>>& OutputRange();
Tensor* MutableOutputAt(size_t idx);
std::vector<Tensor*> MutableOutputBetween(size_t start, size_t end);
std::vector<Tensor> OutputsBetween(size_t start, size_t end);
......@@ -811,38 +805,20 @@ class PADDLE_API OpMetaInfo {
//////////////// Op Meta Info Helper /////////////////
class OpMetaInfoHelper {
public:
static const std::string& GetOpName(const paddle::OpMetaInfo& info) {
return info.name_;
}
static const std::string& GetOpName(const paddle::OpMetaInfo& info);
static const std::vector<std::string>& GetInputs(
const paddle::OpMetaInfo& info) {
return info.inputs_;
}
const paddle::OpMetaInfo& info);
static const std::vector<std::string>& GetOutputs(
const paddle::OpMetaInfo& info) {
return info.outputs_;
}
const paddle::OpMetaInfo& info);
static const std::vector<std::string>& GetAttrs(
const paddle::OpMetaInfo& info) {
return info.attrs_;
}
const paddle::OpMetaInfo& info);
static const std::unordered_map<std::string, std::string>& GetInplaceMap(
const paddle::OpMetaInfo& info) {
return info.inplace_map_;
}
const paddle::OpMetaInfo& info);
static const std::unordered_map<std::string, std::string>&
GetInplaceReverseMap(const paddle::OpMetaInfo& info) {
return info.inplace_reverse_map_;
}
static const KernelFunc& GetKernelFn(const paddle::OpMetaInfo& info) {
return info.kernel_fn_;
}
static const InferShapeFunc& GetInferShapeFn(const paddle::OpMetaInfo& info) {
return info.infer_shape_fn_;
}
static const InferDtypeFunc& GetInferDtypeFn(const paddle::OpMetaInfo& info) {
return info.infer_dtype_fn_;
}
GetInplaceReverseMap(const paddle::OpMetaInfo& info);
static const KernelFunc& GetKernelFn(const paddle::OpMetaInfo& info);
static const InferShapeFunc& GetInferShapeFn(const paddle::OpMetaInfo& info);
static const InferDtypeFunc& GetInferDtypeFn(const paddle::OpMetaInfo& info);
};
//////////////// Op Meta Info Map /////////////////
......
......@@ -410,7 +410,7 @@ class PADDLE_API Tensor final {
*
* @return const std::string&
*/
const std::string& name() const { return name_; }
const std::string& name() const;
/**
* @brief Set name of Tensor.
......@@ -419,7 +419,7 @@ class PADDLE_API Tensor final {
*
* @param const std::string& name
*/
void set_name(const std::string& name) { name_ = name; }
void set_name(const std::string& name);
/* Part 5: Data Transform methods */
/* Alert!!!!: All copy method can only deep copy impl, autograd info only be
......
if(WITH_GPU)
nv_library(
phi_tensor_raw
SRCS tensor.cc
DEPS tensor_base
dense_tensor
phi_enforce
context_pool
tensor_api
int_array
scalar)
elseif(WITH_ROCM)
hip_library(
phi_tensor_raw
SRCS tensor.cc
DEPS tensor_base
dense_tensor
phi_enforce
context_pool
tensor_api
int_array
scalar)
else()
cc_library(
phi_tensor_raw
SRCS tensor.cc
DEPS tensor_base
dense_tensor
phi_enforce
context_pool
tensor_api
int_array
scalar)
endif()
set(api_gen_base ${CMAKE_SOURCE_DIR}/paddle/phi/api/yaml/generator/api_base.py)
# forward api file
......@@ -157,157 +122,77 @@ if(NOT PYTHONINTERP_FOUND)
find_package(PythonInterp REQUIRED)
endif()
execute_process(COMMAND ${PYTHON_EXECUTABLE} -m pip install pyyaml)
# generate forward api
add_custom_command(
OUTPUT ${api_header_file} ${api_source_file}
COMMAND ${PYTHON_EXECUTABLE} -m pip install pyyaml
execute_process(
COMMAND
${PYTHON_EXECUTABLE} ${api_gen_file} --api_yaml_path ${api_yaml_file}
${legacy_api_yaml_file} --api_header_path ${api_header_file_tmp}
--api_source_path ${api_source_file_tmp}
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${api_header_file_tmp}
${api_header_file}
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${api_source_file_tmp}
${api_source_file}
COMMENT "copy_if_different ${api_header_file} ${api_source_file}"
DEPENDS ${api_yaml_file} ${legacy_api_yaml_file} ${api_gen_file}
${api_gen_base}
VERBATIM)
--api_source_path ${api_source_file_tmp})
# generate backward api
add_custom_command(
OUTPUT ${bw_api_header_file} ${bw_api_source_file} ${bw_api_header_file_tmp}
${bw_api_source_file_tmp}
execute_process(
COMMAND
${PYTHON_EXECUTABLE} ${bw_api_gen_file} --backward_yaml_path
${bw_api_yaml_file} ${legacy_bw_api_yaml_file} --backward_header_path
${bw_api_header_file_tmp} --backward_source_path ${bw_api_source_file_tmp}
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${bw_api_header_file_tmp}
${bw_api_header_file}
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${bw_api_source_file_tmp}
${bw_api_source_file}
COMMENT "copy_if_different ${bw_api_header_file} ${bw_api_source_file}"
DEPENDS ${bw_api_yaml_file} ${bw_api_gen_file} ${api_gen_base}
${legacy_bw_api_yaml_file}
VERBATIM)
${bw_api_header_file_tmp} --backward_source_path ${bw_api_source_file_tmp})
# generate fused_op api
add_custom_command(
OUTPUT ${fused_api_header_file} ${fused_api_source_file}
COMMAND ${PYTHON_EXECUTABLE} -m pip install pyyaml
execute_process(
COMMAND
${PYTHON_EXECUTABLE} ${api_gen_file} --api_yaml_path ${fused_api_yaml_file}
--is_fused_ops_yaml --api_header_path ${fused_api_header_file_tmp}
--api_source_path ${fused_api_source_file_tmp}
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${fused_api_header_file_tmp}
${fused_api_header_file}
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${fused_api_source_file_tmp}
${fused_api_source_file}
COMMENT "copy_if_different ${fused_api_header_file} ${fused_api_source_file}"
DEPENDS ${fused_api_yaml_file} ${api_gen_file} ${api_gen_base}
VERBATIM)
--api_source_path ${fused_api_source_file_tmp})
# generate fused_op backward api
add_custom_command(
OUTPUT ${fused_bw_api_header_file} ${fused_bw_api_source_file}
${fused_bw_api_header_file_tmp} ${fused_bw_api_source_file_tmp}
execute_process(
COMMAND
${PYTHON_EXECUTABLE} ${fused_bw_api_gen_file} --backward_yaml_path
${fused_bw_api_yaml_file} --is_fused_backward_yaml --backward_header_path
${fused_bw_api_header_file_tmp} --backward_source_path
${fused_bw_api_source_file_tmp}
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${fused_bw_api_header_file_tmp}
${fused_bw_api_header_file}
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${fused_bw_api_source_file_tmp}
${fused_bw_api_source_file}
COMMENT
"copy_if_different ${fused_bw_api_header_file} ${fused_bw_api_source_file}"
DEPENDS ${fused_bw_api_yaml_file} ${bw_api_gen_file} ${api_gen_base}
VERBATIM)
${fused_bw_api_source_file_tmp})
# generate sparse api
add_custom_command(
OUTPUT ${sparse_api_header_file} ${sparse_api_source_file}
execute_process(
COMMAND
${PYTHON_EXECUTABLE} ${sparse_api_gen_file} --api_yaml_path
${sparse_api_yaml_file} --api_header_path ${sparse_api_header_file_tmp}
--api_source_path ${sparse_api_source_file_tmp}
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${sparse_api_header_file_tmp}
${sparse_api_header_file}
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${sparse_api_source_file_tmp}
${sparse_api_source_file}
COMMENT
"copy_if_different ${sparse_api_header_file} ${sparse_sparse_api_source_file}"
DEPENDS ${sparse_api_yaml_file} ${sparse_api_gen_file} ${api_gen_base}
${api_gen_file}
VERBATIM)
--api_source_path ${sparse_api_source_file_tmp})
# generate backward sparse api
add_custom_command(
OUTPUT ${sparse_bw_api_header_file} ${sparse_bw_api_source_file}
execute_process(
COMMAND
${PYTHON_EXECUTABLE} ${sparse_bw_api_gen_file} --api_yaml_path
${sparse_bw_api_yaml_file} --api_header_path
${sparse_bw_api_header_file_tmp} --api_source_path
${sparse_bw_api_source_file_tmp}
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${sparse_bw_api_header_file_tmp}
${sparse_bw_api_header_file}
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${sparse_bw_api_source_file_tmp}
${sparse_bw_api_source_file}
COMMENT
"copy_if_different ${sparse_bw_api_header_file} ${sparse_bw_sparse_api_source_file}"
DEPENDS ${sparse_bw_api_yaml_file} ${sparse_bw_api_gen_file} ${api_gen_base}
${api_gen_file} ${sparse_api_gen_file} ${bw_api_gen_file}
VERBATIM)
${sparse_bw_api_source_file_tmp})
# generate strings api
add_custom_command(
OUTPUT ${strings_api_header_file} ${strings_api_source_file}
execute_process(
COMMAND
${PYTHON_EXECUTABLE} ${strings_api_gen_file} --api_yaml_path
${strings_api_yaml_file} --api_header_path ${strings_api_header_file_tmp}
--api_source_path ${strings_api_source_file_tmp}
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${strings_api_header_file_tmp}
${strings_api_header_file}
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${strings_api_source_file_tmp}
${strings_api_source_file}
COMMENT
"copy_if_different ${strings_api_header_file} ${strings_strings_api_source_file}"
DEPENDS ${strings_api_yaml_file} ${strings_api_gen_file} ${api_gen_base}
${api_gen_file}
VERBATIM)
--api_source_path ${strings_api_source_file_tmp})
# generate dygraph(intermediate) api
add_custom_command(
OUTPUT ${dygraph_api_header_file} ${dygraph_api_source_file}
execute_process(
COMMAND
${PYTHON_EXECUTABLE} ${im_api_gen_file} --api_yaml_path ${api_yaml_file}
${legacy_api_yaml_file} --sparse_api_yaml_path ${sparse_api_yaml_file}
--dygraph_api_header_path ${dygraph_api_header_file_tmp}
--dygraph_api_source_path ${dygraph_api_source_file_tmp}
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${dygraph_api_header_file_tmp}
${dygraph_api_header_file}
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${dygraph_api_source_file_tmp}
${dygraph_api_source_file}
DEPENDS ${api_yaml_file} ${legacy_api_yaml_file} ${sparse_api_yaml_file}
${im_api_gen_file} ${api_gen_base} ${api_gen_file}
VERBATIM)
--dygraph_api_source_path ${dygraph_api_source_file_tmp})
# generate wrapped infermeta
add_custom_command(
OUTPUT ${wrapped_infermeta_header_file} ${wrapped_infermeta_source_file}
execute_process(
COMMAND
${PYTHON_EXECUTABLE} ${wrapped_infermeta_gen_file} --api_yaml_path
${api_yaml_file} ${legacy_api_yaml_file} --wrapped_infermeta_header_path
${wrapped_infermeta_header_file} --wrapped_infermeta_source_path
${wrapped_infermeta_source_file}
DEPENDS ${api_yaml_file} ${legacy_api_yaml_file} ${wrapped_infermeta_gen_file}
${api_gen_base}
VERBATIM)
${wrapped_infermeta_source_file})
# generate tensor and tensor operants file
message("create or copy auto-geneated tensor files")
execute_process(COMMAND ${PYTHON_EXECUTABLE} -m pip install pyyaml)
execute_process(
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/paddle/phi/api/yaml/generator
COMMAND
......@@ -324,154 +209,70 @@ if(${_result})
message(FATAL_ERROR "tensor codegen failed, exiting.")
endif()
set(generated_tensor_files
"${operants_base_file}" "${tensor_api_source_file}"
"${phi_tensor_operants_header_file}" "${phi_tensor_operants_source_file}"
"${operants_manager_header_file}" "${operants_manager_source_file}")
set(generated_files
"${operants_base_file}"
"${tensor_api_source_file}"
"${phi_tensor_operants_header_file}"
"${phi_tensor_operants_source_file}"
"${operants_manager_header_file}"
"${operants_manager_source_file}"
"${wrapped_infermeta_source_file}"
"${api_source_file}"
"${api_header_file}"
"${bw_api_source_file}"
"${bw_api_header_file}"
"${fused_api_source_file}"
"${fused_api_header_file}"
"${fused_bw_api_source_file}"
"${fused_bw_api_header_file}"
"${sparse_api_source_file}"
"${sparse_api_header_file}"
"${sparse_bw_api_source_file}"
"${sparse_bw_api_header_file}"
"${dygraph_api_source_file}"
"${dygraph_api_header_file}"
"${strings_api_source_file}"
"${strings_api_header_file}")
foreach(generated_tensor_file ${generated_tensor_files})
if(EXISTS "${generated_tensor_file}.tmp" AND EXISTS
"${generated_tensor_file}")
execute_process(
COMMAND ${CMAKE_COMMAND} -E copy_if_different
"${generated_tensor_file}.tmp" "${generated_tensor_file}")
message(
"copy if different ${generated_tensor_file}.tmp ${generated_tensor_file}")
elseif(EXISTS "${generated_tensor_file}.tmp")
execute_process(
COMMAND ${CMAKE_COMMAND} -E copy "${generated_tensor_file}.tmp"
"${generated_tensor_file}")
message("copy ${generated_tensor_file}.tmp ${generated_tensor_file}")
foreach(generated_file ${generated_files})
if(EXISTS "${generated_file}.tmp" AND EXISTS "${generated_file}")
execute_process(COMMAND ${CMAKE_COMMAND} -E copy_if_different
"${generated_file}.tmp" "${generated_file}")
message("copy if different ${generated_file}.tmp ${generated_file}")
elseif(EXISTS "${generated_file}.tmp")
execute_process(COMMAND ${CMAKE_COMMAND} -E copy "${generated_file}.tmp"
"${generated_file}")
message("copy ${generated_file}.tmp ${generated_file}")
endif()
endforeach()
cc_library(
op_meta_info
SRCS op_meta_info.cc
DEPS phi_tensor_raw)
cc_library(
wrapped_infermeta
SRCS ${wrapped_infermeta_source_file}
DEPS phi)
cc_library(
context_pool
SRCS context_pool.cc
DEPS phi_backends phi_enforce place init phi_device_context)
cc_library(
api_tensor_utils
SRCS tensor_utils.cc
DEPS phi_tensor_raw)
cc_library(
kernel_dispatch
SRCS kernel_dispatch.cc
DEPS phi_tensor_raw phi_backends kernel_factory context_pool)
cc_library(
api_gen_utils
SRCS api_gen_utils.cc
DEPS phi_tensor_raw selected_rows sparse_csr_tensor sparse_coo_tensor
infermeta_utils)
cc_library(
phi_data_transform
SRCS data_transform.cc
DEPS phi_tensor_raw phi tensor)
cc_library(
api_custom_impl
SRCS api_custom_impl.cc
DEPS phi_tensor_raw
phi
kernel_dispatch
api_gen_utils
backward_infermeta
phi_data_transform
phi_profiler)
cc_library(
phi_function_api
SRCS ${api_source_file} ${fused_api_source_file}
DEPS phi_tensor_raw
phi
kernel_dispatch
api_gen_utils
phi_data_transform
api_custom_impl
api_tensor_utils
phi_profiler)
cc_library(
phi_bw_function_api
SRCS ${bw_api_source_file} ${fused_bw_api_source_file}
DEPS phi_tensor_raw
phi
kernel_dispatch
api_gen_utils
backward_infermeta
sparse_backward_infermeta
phi_data_transform
phi_function_api
api_custom_impl
global_utils
phi_profiler)
cc_library(
sparse_api
SRCS ${sparse_api_source_file}
DEPS phi_tensor_raw phi kernel_dispatch api_gen_utils phi_profiler)
cc_library(
sparse_bw_api
SRCS ${sparse_bw_api_source_file}
DEPS phi_tensor_raw
phi
kernel_dispatch
api_gen_utils
sparse_api
sparse_backward_infermeta
phi_profiler)
cc_library(
phi_dygraph_api
SRCS ${dygraph_api_source_file}
DEPS phi_tensor_raw
phi
kernel_dispatch
api_gen_utils
phi_data_transform
phi_function_api
sparse_api
phi_profiler)
cc_library(
strings_api
SRCS ${strings_api_source_file}
DEPS phi_tensor_raw phi kernel_dispatch api_gen_utils phi_profiler)
cc_library(
phi_tensor
SRCS tensor_method.cc
DEPS phi_tensor_raw
phi_function_api
api_gen_utils
kernel_dispatch
infermeta
sparse_infermeta
sparse_api
strings_api)
cc_library(
tensor_copy
SRCS tensor_copy.cc
DEPS phi_tensor_raw phi kernel_dispatch api_gen_utils)
cc_library(
api_scalar
SRCS scalar.cc
DEPS tensor_copy)
cc_library(
api_int_array
SRCS int_array.cc
DEPS tensor_copy)
cc_library(
phi_tensor_operants
SRCS ${phi_tensor_operants_source_file}
DEPS phi_function_api)
cc_library(
operants_manager
SRCS ${operants_manager_source_file}
DEPS phi_enforce)
cc_library(
tensor_api
SRCS ${tensor_api_source_file}
DEPS operants_manager)
collect_srcs(
api_srcs
SRCS
tensor.cc
op_meta_info.cc
context_pool.cc
tensor_utils.cc
kernel_dispatch.cc
api_gen_utils.cc
data_transform.cc
api_custom_impl.cc
tensor_method.cc
tensor_copy.cc
scalar.cc
int_array.cc)
collect_generated_srcs(
api_srcs
SRCS
${wrapped_infermeta_source_file}
${api_source_file}
${bw_api_source_file}
${fused_api_source_file}
${fused_bw_api_source_file}
${sparse_api_source_file}
${sparse_bw_api_source_file}
${dygraph_api_source_file}
${strings_api_source_file}
${phi_tensor_operants_source_file}
${operants_manager_source_file}
${tensor_api_source_file})
......@@ -65,7 +65,8 @@ PADDLE_API phi::Allocator* GetAllocator(const phi::Place& place) {
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
PADDLE_API phi::CUDAStream* GetCurrentCUDAStream(const phi::Place& place) {
PADDLE_ENFORCE(place.GetType() == phi::AllocationType::GPU,
PADDLE_ENFORCE_EQ(place.GetType(),
phi::AllocationType::GPU,
phi::errors::InvalidArgument(
"GetCurrentCUDAStream only supports GPUPlace input. "
"However, your input is place=%s",
......
......@@ -119,6 +119,11 @@ void CustomOpKernelContext::EmplaceBackAttr(paddle::any attr) {
<< " has value of type: " << attrs_[attrs_.size() - 1].type().name();
}
void CustomOpKernelContext::EmplaceBackAttrs(
const std::vector<paddle::any>& attrs) {
attrs_ = std::move(attrs);
}
const Tensor& CustomOpKernelContext::InputAt(size_t idx) const {
return inputs_.at(idx);
}
......@@ -132,6 +137,10 @@ std::vector<Tensor> CustomOpKernelContext::InputsBetween(size_t start,
return rlt;
}
const std::vector<paddle::any>& CustomOpKernelContext::Attrs() const {
return attrs_;
}
Tensor& CustomOpKernelContext::MutableInputAt(size_t idx) {
return inputs_.at(idx);
}
......@@ -193,6 +202,16 @@ const std::pair<size_t, size_t>& CustomOpKernelContext::OutputRangeAt(
return output_range_.at(idx);
}
const std::vector<std::pair<size_t, size_t>>&
CustomOpKernelContext::InputRange() {
return input_range_;
}
const std::vector<std::pair<size_t, size_t>>&
CustomOpKernelContext::OutputRange() {
return output_range_;
}
void CustomOpKernelContext::ConstructInplaceIndex(
const std::vector<std::string>& inputs,
const std::vector<std::string>& outputs,
......@@ -208,8 +227,9 @@ void CustomOpKernelContext::ConstructInplaceIndex(
continue;
}
auto out_iter = find(outputs.begin(), outputs.end(), inplace_map.at(input));
PADDLE_ENFORCE(
out_iter != outputs.end(),
PADDLE_ENFORCE_NE(
out_iter,
outputs.end(),
phi::errors::NotFound("Can't find the mapped value of %s, please check "
"the input of `Inplace` again and make "
"sure you registered your op accurately. ",
......@@ -253,8 +273,9 @@ void CustomOpKernelContext::AssignInplaceOutputs() {
size_t out_start_idx = output_range_[pair.second].first;
size_t out_end_idx = output_range_[pair.second].second;
size_t assign_tensor_size = in_end_idx - in_start_idx;
PADDLE_ENFORCE(
assign_tensor_size == out_end_idx - out_start_idx,
PADDLE_ENFORCE_EQ(
assign_tensor_size,
out_end_idx - out_start_idx,
phi::errors::OutOfRange("When assigning inplaced tensor, Input vector "
"size %d mismatch output vector size %d",
in_end_idx - in_start_idx,
......@@ -316,6 +337,43 @@ OpMetaInfo& OpMetaInfo::SetInferDtypeFn(InferDtypeFunc&& func) {
return *this;
}
//////////////// Op Meta Info Helper /////////////////
const std::string& OpMetaInfoHelper::GetOpName(const paddle::OpMetaInfo& info) {
return info.name_;
}
const std::vector<std::string>& OpMetaInfoHelper::GetInputs(
const paddle::OpMetaInfo& info) {
return info.inputs_;
}
const std::vector<std::string>& OpMetaInfoHelper::GetOutputs(
const paddle::OpMetaInfo& info) {
return info.outputs_;
}
const std::vector<std::string>& OpMetaInfoHelper::GetAttrs(
const paddle::OpMetaInfo& info) {
return info.attrs_;
}
const std::unordered_map<std::string, std::string>&
OpMetaInfoHelper::GetInplaceMap(const paddle::OpMetaInfo& info) {
return info.inplace_map_;
}
const std::unordered_map<std::string, std::string>&
OpMetaInfoHelper::GetInplaceReverseMap(const paddle::OpMetaInfo& info) {
return info.inplace_reverse_map_;
}
const KernelFunc& OpMetaInfoHelper::GetKernelFn(
const paddle::OpMetaInfo& info) {
return info.kernel_fn_;
}
const InferShapeFunc& OpMetaInfoHelper::GetInferShapeFn(
const paddle::OpMetaInfo& info) {
return info.infer_shape_fn_;
}
const InferDtypeFunc& OpMetaInfoHelper::GetInferDtypeFn(
const paddle::OpMetaInfo& info) {
return info.infer_dtype_fn_;
}
//////////////// Op Meta Info Map /////////////////
std::vector<OpMetaInfo>& OpMetaInfoMap::operator[](const std::string& name) {
......@@ -414,14 +472,16 @@ OpMetaInfoBuilder& OpMetaInfoBuilder::SetInplaceMap(
const std::vector<std::string>& outputs =
OpMetaInfoHelper::GetOutputs(*info_ptr_);
for (const auto& pair : inplace_map) {
PADDLE_ENFORCE(
std::find(inputs.begin(), inputs.end(), pair.first) != inputs.cend(),
PADDLE_ENFORCE_NE(
std::find(inputs.begin(), inputs.end(), pair.first),
inputs.cend(),
phi::errors::PreconditionNotMet(
"The register of operator %s's `SetInplaceMap` failed. "
"Please make sure: 1. Call `Inputs` and `Outputs` before "
"`SetInplaceMap`; 2. The keys of inplace_map are inside `Inputs`",
name_));
PADDLE_ENFORCE(std::find(outputs.begin(), outputs.end(), pair.second) !=
PADDLE_ENFORCE_NE(
std::find(outputs.begin(), outputs.end(), pair.second),
outputs.cend(),
phi::errors::PreconditionNotMet(
"The register of operator %s's `SetInplaceMap` failed. "
......
......@@ -358,6 +358,10 @@ gpuStream_t Tensor::stream() const {
}
#endif
const std::string &Tensor::name() const { return name_; }
void Tensor::set_name(const std::string &name) { name_ = name; }
/* Part 5: Status utils methods */
bool Tensor::defined() const { return impl_ != nullptr; }
......
......@@ -26,16 +26,4 @@ if(WITH_PYTHON AND EXISTS ${PADDLE_BINARY_DIR})
endif()
endif()
if(WITH_GPU OR WITH_ROCM)
set(GPU_CTX_DEPS dynload_cuda dynamic_loader)
endif()
cc_library(
phi_device_tracer
SRCS device_tracer.cc
DEPS phi_profiler_proto ${GPU_CTX_DEPS})
cc_library(
phi_profiler
SRCS profiler.cc
DEPS phi_os_info phi_device_tracer phi_enforce)
collect_srcs(api_srcs SRCS device_tracer.cc profiler.cc)
......@@ -2,17 +2,6 @@ add_subdirectory(dynload)
add_subdirectory(gpu)
set(BACKENDS_SRCS all_context.cc cpu/cpu_context.cc cpu/cpu_info.cc)
set(BACKENDS_DEPS
enforce
place
flags
eigen3
phi_device_context
generator
phi_os_info)
if(WITH_XBYAK)
list(APPEND BACKENDS_DEPS xbyak)
endif()
if(NOT APPLE AND NOT WIN32)
list(APPEND BACKENDS_SRCS device_code.cc)
......@@ -23,16 +12,10 @@ if(WITH_GPU OR WITH_ROCM)
gpu/gpu_resources.cc)
if(WITH_GPU)
list(APPEND BACKENDS_SRCS gpu/cuda/cuda_info.cc gpu/cuda/cuda_graph.cc)
set_source_files_properties(
gpu/gpu_resources.cc
PROPERTIES COMPILE_FLAGS
"-DCUDA_REAL_ARCHS=\"${NVCC_FLAGS_EXTRA_real_archs}\"")
endif()
if(WITH_ROCM)
list(APPEND BACKENDS_SRCS gpu/rocm/rocm_info.cc)
endif()
list(APPEND BACKENDS_DEPS phi_dynload_cuda)
endif()
if(WITH_XPU)
......@@ -45,7 +28,6 @@ if(WITH_MKLDNN)
list(APPEND BACKENDS_SRCS onednn/onednn_context.cc)
list(APPEND BACKENDS_SRCS onednn/axpy_handler.cc)
list(APPEND BACKENDS_SRCS onednn/matmul_utils.cc)
list(APPEND BACKENDS_DEPS mkldnn)
endif()
list(
......@@ -55,26 +37,25 @@ list(
device_guard.cc
stream.cc
event.cc
device_base.cc
device_manager.cc
context_pool.cc)
if(WITH_GPU
OR WITH_ROCM
OR WITH_CUSTOM_DEVICE)
list(APPEND BACKENDS_SRCS device_base.cc)
endif()
if(WITH_CUSTOM_DEVICE)
list(APPEND BACKENDS_SRCS custom/custom_context.cc custom/custom_device.cc
custom/custom_device_op_list.cc)
endif()
add_library(phi_backends "${BACKENDS_SRCS}")
target_link_libraries(phi_backends ${BACKENDS_DEPS})
# for inference library
get_property(phi_modules GLOBAL PROPERTY PHI_MODULES)
set(phi_modules ${phi_modules} phi_backends)
set_property(GLOBAL PROPERTY PHI_MODULES "${phi_modules}")
collect_srcs(backends_srcs SRCS ${BACKENDS_SRCS})
if(WITH_CUSTOM_DEVICE)
cc_test(
capi_test
SRCS custom/capi_test.cc
DEPS phi_capi)
DEPS phi)
endif()
......@@ -24,6 +24,10 @@
namespace phi {
template <>
const TypeInfo<DeviceContext> TypeInfoTraits<DeviceContext, CPUContext>::kType =
RegisterStaticType<DeviceContext>(CPUContext::name());
struct CPUContext::Impl {
Impl() : place_(CPUPlace()) {}
......
......@@ -19,6 +19,11 @@ limitations under the License. */
namespace phi {
template <>
const TypeInfo<DeviceContext>
TypeInfoTraits<DeviceContext, CustomContext>::kType =
RegisterStaticType<DeviceContext>(CustomContext::name());
struct CustomContext::Impl {
explicit Impl(const CustomPlace& place) : place_(place) {}
......
cc_library(
phi_dynamic_loader
SRCS dynamic_loader.cc port.cc
DEPS enforce glog gflags)
set(DYNLOAD_COMMON_SRCS dynamic_loader.cc port.cc warpctc.cc warprnnt.cc
lapack.cc)
if(WITH_ASCEND_CL)
list(REMOVE_ITEM DYNLOAD_COMMON_SRCS warprnnt.cc)
endif()
list(
APPEND
CUDA_SRCS
......@@ -60,66 +60,39 @@ configure_file(cupti_lib_path.h.in ${CMAKE_CURRENT_BINARY_DIR}/cupti_lib_path.h)
if(CUPTI_FOUND)
list(APPEND CUDA_SRCS cupti.cc)
endif()
if(WITH_ROCM)
hip_library(
phi_dynload_cuda
SRCS ${HIP_SRCS}
DEPS phi_dynamic_loader)
cc_library(
phi_dynload_warpctc
SRCS warpctc.cc
DEPS phi_dynamic_loader warpctc)
cc_library(
phi_dynload_warprnnt
SRCS warprnnt.cc
DEPS phi_dynamic_loader warprnnt)
else()
nv_library(
phi_dynload_cuda
SRCS ${CUDA_SRCS}
DEPS phi_dynamic_loader)
cc_library(
phi_dynload_warpctc
SRCS warpctc.cc
DEPS phi_dynamic_loader warpctc)
cc_library(
phi_dynload_warprnnt
SRCS warprnnt.cc
DEPS phi_dynamic_loader warprnnt)
endif()
if(WITH_MKLML)
cc_library(
phi_dynload_mklml
SRCS mklml.cc
DEPS phi_dynamic_loader mklml)
# Only deps libmklml.so, not link
add_library(dynload_mklml STATIC mklml.cc)
add_dependencies(dynload_mklml mklml)
if(WIN32)
target_link_libraries(dynload_mklml ${MKLML_IOMP_LIB})
else()
target_link_libraries(dynload_mklml
"-L${MKLML_LIB_DIR} -liomp5 -Wl,--as-needed")
endif()
endif()
if(WITH_FLASHATTN)
cc_library(
phi_dynload_flashattn
SRCS flashattn.cc
DEPS phi_dynamic_loader flashattn)
list(APPEND DYNLOAD_COMMON_SRCS flashattn.cc)
endif()
cc_library(
phi_dynload_lapack
SRCS lapack.cc
DEPS phi_dynamic_loader)
add_dependencies(phi_dynload_lapack extern_lapack)
# TODO(TJ): add iomp, mkldnn?
if(MKL_FOUND AND WITH_ONEMKL)
message("ONEMKL INCLUDE directory is ${MKL_INCLUDE}")
cc_library(
phi_dynload_mklrt
SRCS mklrt.cc
DEPS phi_dynamic_loader)
target_include_directories(phi_dynload_mklrt PRIVATE ${MKL_INCLUDE})
list(APPEND DYNLOAD_COMMON_SRCS mklrt.cc)
endif()
if(WITH_ROCM)
collect_srcs(backends_srcs SRCS ${DYNLOAD_COMMON_SRCS} ${HIP_SRCS})
elseif(WITH_GPU)
collect_srcs(backends_srcs SRCS ${DYNLOAD_COMMON_SRCS} ${CUDA_SRCS})
else()
collect_srcs(backends_srcs SRCS ${DYNLOAD_COMMON_SRCS})
endif()
if(WITH_CUDNN_FRONTEND)
nv_test(
cudnn_frontend_test
SRCS cudnn_frontend_test.cc
DEPS phi_dynload_cuda cudnn-frontend)
DEPS phi cudnn-frontend)
endif()
cc_library(cudnn_workspace_helper SRCS cudnn_workspace_helper.cc)
collect_srcs(backends_srcs SRCS cudnn_workspace_helper.cc)
......@@ -59,6 +59,15 @@ limitations under the License. */
namespace phi {
template <>
const TypeInfo<DeviceContext> TypeInfoTraits<DeviceContext, GPUContext>::kType =
RegisterStaticType<DeviceContext>(GPUContext::name());
template <>
const TypeInfo<DeviceContext>
TypeInfoTraits<DeviceContext, GPUPinnedContext>::kType =
RegisterStaticType<DeviceContext>(GPUPinnedContext::name());
namespace internal {
class EigenGpuStreamDevice : public Eigen::StreamInterface {
......
......@@ -15,6 +15,8 @@ limitations under the License. */
#pragma once
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#include <array>
#include <functional>
#include <mutex>
......@@ -305,3 +307,5 @@ class GPUPinnedContext
};
#endif
} // namespace phi
#endif
......@@ -83,6 +83,11 @@ void OneDNNContextThreadLocals::Body::log_lib_version(void) {
}
}
OneDNNContextThreadLocals::Body& OneDNNContextThreadLocals::fetch() {
thread_local Body b;
return b;
}
struct OneDNNContext::Impl {
Impl() : p_blobmap_() {
p_blobmap_.reset(new BlobMap());
......@@ -462,5 +467,7 @@ const std::vector<std::string>& OneDNNContext::GetOutputsName(
return impl_->GetOutputsName(output);
}
const char* OneDNNContext::name() { return "OneDNNContext"; }
} // namespace phi
#endif
......@@ -76,10 +76,7 @@ class OneDNNContextThreadLocals {
static constexpr size_t kMKLDNNSessionID_Default = 0;
// mkldnn session id for cache clearing mode
static constexpr size_t kMKLDNNSessionID_CacheClearing = -1;
static Body& fetch() {
thread_local Body b;
return b;
}
static Body& fetch();
};
class OneDNNContext : public CPUContext {
......@@ -157,7 +154,7 @@ class OneDNNContext : public CPUContext {
const std::vector<std::string>& GetOutputsName(
const std::string& output) const;
static const char* name() { return "OneDNNContext"; }
static const char* name();
private:
struct Impl;
......
......@@ -30,6 +30,9 @@ namespace xpu = baidu::xpu::api;
namespace phi {
template <>
const TypeInfo<DeviceContext> TypeInfoTraits<DeviceContext, XPUContext>::kType =
RegisterStaticType<DeviceContext>(XPUContext::name());
struct XPUContext::Impl {
void SetL3Cache(int l3_size = 14155776) {
const int MAX_XPU_NUM = 16;
......
add_subdirectory(lib)
cc_library(
phi_capi
SRCS all.cc
DEPS phi_c_data_type
phi_c_device_context
phi_c_int_array
phi_c_kernel_context
phi_c_kernel_factory
phi_c_kernel_registry
phi_c_place
phi_c_scalar
phi_c_tensor)
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/phi/capi/all.h"
namespace paddle {
namespace capi {} // namespace capi
} // namespace paddle
cc_library(
phi_c_data_type
SRCS c_data_type.cc
DEPS dense_tensor)
cc_library(
phi_c_device_context
SRCS c_device_context.cc
DEPS phi_backends)
cc_library(
phi_c_int_array
SRCS c_int_array.cc
DEPS int_array)
cc_library(
phi_c_kernel_context
SRCS c_kernel_context.cc
DEPS kernel_context)
cc_library(
phi_c_kernel_factory
SRCS c_kernel_factory.cc
DEPS kernel_factory)
cc_library(
phi_c_kernel_registry
SRCS c_kernel_registry.cc
DEPS dense_tensor)
cc_library(
phi_c_place
SRCS c_place.cc
DEPS phi_place)
cc_library(
phi_c_scalar
SRCS c_scalar.cc
DEPS scalar)
cc_library(
phi_c_tensor
SRCS c_tensor.cc
DEPS dense_tensor)
collect_srcs(
capi_srcs
SRCS
c_data_type.cc
c_device_context.cc
c_int_array.cc
c_kernel_context.cc
c_kernel_factory.cc
c_kernel_registry.cc
c_place.cc
c_scalar.cc
c_tensor.cc)
if(WITH_GPU)
nv_library(
phi_place
SRCS place.cc
DEPS phi_backends)
elseif(WITH_ROCM)
hip_library(
phi_place
SRCS place.cc
DEPS phi_backends)
else()
cc_library(phi_place SRCS place.cc)
endif()
cc_library(
scalar
SRCS scalar.cc
DEPS phi_enforce phi_tensor_utils)
cc_library(
int_array
SRCS int_array.cc
DEPS phi_enforce phi_tensor_utils)
cc_library(
memory_utils
SRCS memory_utils.cc
DEPS phi_enforce phi_place)
collect_srcs(common_srcs SRCS place.cc scalar.cc int_array.cc memory_utils.cc)
......@@ -6,150 +6,35 @@ if(WITH_GPU)
proto_library(external_error_proto SRCS external_error.proto)
endif()
cc_library(
flags
SRCS flags.cc
DEPS gflags)
cc_library(errors SRCS errors.cc)
set(phi_enforce_deps errors flags)
if(WITH_GPU)
set(phi_enforce_deps ${phi_enforce_deps} external_error_proto)
endif()
cc_library(
phi_enforce
SRCS enforce.cc
DEPS ${phi_enforce_deps})
cc_library(
phi_os_info
SRCS os_info.cc
DEPS phi_enforce)
if(WITH_XPU)
cc_library(
kernel_factory
SRCS kernel_factory.cc
DEPS phi_enforce convert_utils phi_backends)
else()
cc_library(
kernel_factory
SRCS kernel_factory.cc
DEPS phi_enforce convert_utils)
endif()
cc_library(
kernel_context
SRCS kernel_context.cc
DEPS phi_enforce phi_backends)
cc_library(
ddim
SRCS ddim.cc
DEPS phi_enforce)
cc_library(
tensor_base
SRCS tensor_base.cc allocator.cc
DEPS phi_enforce)
cc_library(
tensor_meta
SRCS tensor_meta.cc
DEPS phi_enforce)
cc_library(
lod_utils
SRCS lod_utils.cc
DEPS phi_enforce)
cc_library(
threadpool
SRCS threadpool.cc
DEPS phi_enforce)
cc_library(
dense_tensor
SRCS dense_tensor.cc dense_tensor_impl.cc
DEPS convert_utils tensor_meta tensor_base ddim)
target_link_libraries(dense_tensor memory_utils)
cc_library(
sparse_coo_tensor
SRCS sparse_coo_tensor.cc
DEPS tensor_meta tensor_base)
cc_library(
sparse_csr_tensor
SRCS sparse_csr_tensor.cc
DEPS dense_tensor tensor_base)
cc_library(
string_tensor
SRCS string_tensor.cc
DEPS convert_utils tensor_meta tensor_base)
cc_library(
tensor_array
SRCS tensor_array.cc
DEPS dense_tensor tensor_base)
cc_library(
extended_tensor
SRCS extended_tensor.cc
DEPS tensor_base)
cc_library(
meta_tensor
SRCS meta_tensor.cc
DEPS tensor_base tensor_meta dense_tensor)
cc_library(
infermeta_utils
SRCS infermeta_utils.cc
DEPS meta_tensor)
cc_library(
selected_rows
SRCS selected_rows_impl.cc selected_rows.cc
DEPS tensor_base dense_tensor phi_enforce ddim)
cc_library(
phi_device_context
SRCS device_context.cc
DEPS dense_tensor selected_rows)
cc_library(
custom_kernel
SRCS custom_kernel.cc
DEPS kernel_factory)
cc_library(
mixed_vector
SRCS mixed_vector.cc
DEPS phi_backends place memory)
cc_library(
generator
SRCS generator.cc
DEPS enforce place)
# Will remove once we implemented MKLDNN_Tensor
if(WITH_MKLDNN)
add_dependencies(dense_tensor mkldnn)
add_dependencies(tensor_base mkldnn)
endif()
if(WITH_GPU)
nv_library(
phi_tensor_utils
SRCS tensor_utils.cc
DEPS phi_backends dense_tensor selected_rows memcpy memory_utils)
elseif(WITH_ROCM)
hip_library(
phi_tensor_utils
SRCS tensor_utils.cc
DEPS phi_backends dense_tensor selected_rows memcpy memory_utils)
elseif(WITH_XPU_KP)
xpu_library(
phi_tensor_utils
SRCS tensor_utils.cc
DEPS phi_backends dense_tensor selected_rows memcpy memory_utils)
else()
cc_library(
phi_tensor_utils
SRCS tensor_utils.cc
DEPS dense_tensor selected_rows memcpy phi_backends memory_utils)
endif()
collect_srcs(
core_srcs
SRCS
flags.cc
errors.cc
enforce.cc
os_info.cc
kernel_context.cc
ddim.cc
tensor_base.cc
allocator.cc
tensor_meta.cc
lod_utils.cc
threadpool.cc
dense_tensor.cc
dense_tensor_impl.cc
sparse_coo_tensor.cc
sparse_csr_tensor.cc
string_tensor.cc
tensor_array.cc
extended_tensor.cc
meta_tensor.cc
infermeta_utils.cc
selected_rows_impl.cc
selected_rows.cc
device_context.cc
custom_kernel.cc
mixed_vector.cc
generator.cc
kernel_factory.cc
tensor_utils.cc
storage_properties.cc)
cc_library(
arg_map_context
SRCS arg_map_context.cc
DEPS phi_enforce)
cc_library(
op_utils
SRCS op_utils.cc
DEPS arg_map_context enforce)
cc_library(
get_kerneltype_forvar_utils
SRCS get_kerneltype_forvar_utils.cc
DEPS enforce)
set(convert_utils_deps data_type place op_utils phi_backends)
if(WITH_MKLDNN)
set(convert_utils_deps ${convert_utils_deps} mkldnn)
endif()
cc_library(
convert_utils
SRCS convert_utils.cc
DEPS ${convert_utils_deps})
collect_srcs(core_srcs SRCS arg_map_context.cc op_utils.cc
get_kerneltype_forvar_utils.cc convert_utils.cc)
......@@ -26,4 +26,16 @@ OpUtilsMap& OpUtilsMap::Instance() {
return g_op_utils_map;
}
BaseKernelNameRegistrar::BaseKernelNameRegistrar(const char* op_type,
const char* base_kernel_name) {
OpUtilsMap::Instance().InsertBaseKernelName(op_type, base_kernel_name);
OpUtilsMap::Instance().InsertFluidOplName(op_type, base_kernel_name);
}
ArgumentMappingFnRegistrar::ArgumentMappingFnRegistrar(
const char* op_type, ArgumentMappingFn arg_mapping_fn) {
OpUtilsMap::Instance().InsertArgumentMappingFn(op_type,
std::move(arg_mapping_fn));
}
} // namespace phi
......@@ -210,18 +210,12 @@ class OpUtilsMap {
};
struct BaseKernelNameRegistrar {
BaseKernelNameRegistrar(const char* op_type, const char* base_kernel_name) {
OpUtilsMap::Instance().InsertBaseKernelName(op_type, base_kernel_name);
OpUtilsMap::Instance().InsertFluidOplName(op_type, base_kernel_name);
}
BaseKernelNameRegistrar(const char* op_type, const char* base_kernel_name);
};
struct ArgumentMappingFnRegistrar {
ArgumentMappingFnRegistrar(const char* op_type,
ArgumentMappingFn arg_mapping_fn) {
OpUtilsMap::Instance().InsertArgumentMappingFn(op_type,
std::move(arg_mapping_fn));
}
ArgumentMappingFn arg_mapping_fn);
};
#define PD_REGISTER_BASE_KERNEL_NAME(op_type, base_kernel_name) \
......
......@@ -42,6 +42,11 @@ limitations under the License. */
namespace phi {
template <>
const TypeInfo<phi::TensorBase>
TypeInfoTraits<phi::TensorBase, DenseTensor>::kType =
RegisterStaticType<phi::TensorBase>(DenseTensor::name());
DenseTensor::DenseTensor(Allocator* a, const DenseTensorMeta& meta)
: meta_(meta), holder_(a->Allocate(SizeOf(dtype()) * numel())) {}
......@@ -115,8 +120,9 @@ void* DenseTensor::AllocateFrom(Allocator* allocator,
if (fake_alloc) {
bytes = 0;
} else {
PADDLE_ENFORCE(
PADDLE_ENFORCE_EQ(
valid(),
true,
phi::errors::PreconditionNotMet("The meta data must be valid when "
"call the mutable data function."));
if (requested_size) {
......@@ -169,8 +175,9 @@ const T* DenseTensor::data() const {
template <typename T>
T* DenseTensor::data() {
T* ret = static_cast<T*>(data());
PADDLE_ENFORCE(
(dtype() == phi::CppTypeToDataType<T>::Type()),
PADDLE_ENFORCE_EQ(
dtype(),
phi::CppTypeToDataType<T>::Type(),
phi::errors::InvalidArgument(
"The type of data we are trying to retrieve (%s) does not match the "
"type of data (%s) currently contained in the container.",
......@@ -200,7 +207,8 @@ const void* DenseTensor::data() const {
}
void DenseTensor::set_meta(DenseTensorMeta&& meta) {
PADDLE_ENFORCE(!meta_.valid(),
PADDLE_ENFORCE_EQ(meta_.valid(),
false,
phi::errors::InvalidArgument(
"Only when the original attribute of Tensor is "
"incomplete, can it be reset."));
......@@ -208,8 +216,9 @@ void DenseTensor::set_meta(DenseTensorMeta&& meta) {
}
void DenseTensor::set_meta(const DenseTensorMeta& meta) {
PADDLE_ENFORCE(
PADDLE_ENFORCE_EQ(
meta.valid(),
true,
phi::errors::InvalidArgument(
"Input meta is invalid, please check the meta attribute."));
meta_.dims = meta.dims;
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册