未验证 提交 da50a009 编写于 作者: Y YuanRisheng 提交者: GitHub

[PHI Decoupling]Create PHI shared lib (#53735)

* create phi so

* fix ci bugs

* fix py3 bugs

* add file

* fix py3 bugs

* fix windows bugs

* perfect so

* fix py3 bugs

* delete all static target in phi

* fix windows bugs

* fix py3 bugs

* fix ci bugs

* fix windows bugs

* fix bugs: gflags can't be linked by dynamic and static lib

* fix bugs that can not load 3rd party

* fix ci bugs

* fix compile bugs

* fix py3 bugs

* fix conflict

* fix xpu bugs

* fix mac compile bugs

* fix psgpu bugs

* fix inference failed

* deal with conflict

* fix LIBRARY_PATH bug

* fix windows bugs

* fix onednn error

* fix windows compile bugs

* fix windows compile bugs

* fix test_cuda_graph_static_mode_error aborted

* fix windows bugs

* fix mac-python3 error

* fix hip compile bugs

* change mode to static

* change to static mode

* fix ci bugs

* fix py3 bugs

* fix windows bugs

* fix bugs

* add static flag

* add PADDLE_API

* change position of PADDLE_API

* fix windows bugs

* change mode to dynamic lib

* fix windows static bugs

* deal with conflict

* fix windows unit bug

* fix coverage

* deal with conflict

* fix windows-inference

* fix py3 bugs

* fix bugs when compile type_info

* fix compile bugs

* fix py3 bugs

* fix windows bugs

* fix windows openblas

* fix xpu bugs

* fix enforce_test in windows

* update code according comment

* fix windows cmake bug

* fix windows bugs

* fix windows bugs

* delete cinn unittest

* fix cinn bugs

---------
Co-authored-by: HappyHeavyRain's avatarlzydev <1528794076@qq.com>
上级 7aabdfd9
...@@ -40,7 +40,6 @@ if(WITH_MKLML) ...@@ -40,7 +40,6 @@ if(WITH_MKLML)
add_definitions(-DLAPACK_FOUND) add_definitions(-DLAPACK_FOUND)
add_dependencies(cblas mklml) add_dependencies(cblas mklml)
target_link_libraries(cblas dynload_mklml)
message(STATUS "Found cblas and lapack in MKLML " message(STATUS "Found cblas and lapack in MKLML "
"(include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})") "(include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})")
......
...@@ -235,3 +235,16 @@ endif() ...@@ -235,3 +235,16 @@ endif()
if(WITH_CUDNN_FRONTEND) if(WITH_CUDNN_FRONTEND)
add_definitions(-DPADDLE_WITH_CUDNN_FRONTEND) add_definitions(-DPADDLE_WITH_CUDNN_FRONTEND)
endif() endif()
set(WITH_PHI_SHARED
ON
CACHE BOOL "" FORCE)
if(WIN32 OR WITH_ROCM)
set(WITH_PHI_SHARED
OFF
CACHE BOOL "" FORCE)
endif()
if(WITH_PHI_SHARED)
add_definitions(-DPHI_SHARED)
endif()
...@@ -122,6 +122,5 @@ get_filename_component(WARPCTC_LIBRARY_PATH ${WARPCTC_LIBRARIES} DIRECTORY) ...@@ -122,6 +122,5 @@ get_filename_component(WARPCTC_LIBRARY_PATH ${WARPCTC_LIBRARIES} DIRECTORY)
include_directories(${WARPCTC_INCLUDE_DIR} include_directories(${WARPCTC_INCLUDE_DIR}
)# For warpctc code to include its headers. )# For warpctc code to include its headers.
add_library(warpctc SHARED IMPORTED GLOBAL) add_library(warpctc INTERFACE)
set_property(TARGET warpctc PROPERTY IMPORTED_LOCATION ${WARPCTC_LIBRARIES})
add_dependencies(warpctc extern_warpctc) add_dependencies(warpctc extern_warpctc)
...@@ -364,20 +364,7 @@ function(cc_library TARGET_NAME) ...@@ -364,20 +364,7 @@ function(cc_library TARGET_NAME)
list(REMOVE_ITEM cc_library_DEPS warpctc) list(REMOVE_ITEM cc_library_DEPS warpctc)
add_dependencies(${TARGET_NAME} warpctc) add_dependencies(${TARGET_NAME} warpctc)
endif() endif()
# Only deps libmklml.so, not link
if("${cc_library_DEPS};" MATCHES "mklml;")
list(REMOVE_ITEM cc_library_DEPS mklml)
if(NOT "${TARGET_NAME}" MATCHES "dynload_mklml")
list(APPEND cc_library_DEPS dynload_mklml)
endif()
add_dependencies(${TARGET_NAME} mklml)
if(WIN32)
target_link_libraries(${TARGET_NAME} ${MKLML_IOMP_LIB})
else()
target_link_libraries(${TARGET_NAME}
"-L${MKLML_LIB_DIR} -liomp5 -Wl,--as-needed")
endif()
endif()
# remove link to python, see notes at: # remove link to python, see notes at:
# https://github.com/pybind/pybind11/blob/master/docs/compiling.rst#building-manually # https://github.com/pybind/pybind11/blob/master/docs/compiling.rst#building-manually
if("${cc_library_DEPS};" MATCHES "python;") if("${cc_library_DEPS};" MATCHES "python;")
...@@ -457,24 +444,9 @@ function(cc_test_build TARGET_NAME) ...@@ -457,24 +444,9 @@ function(cc_test_build TARGET_NAME)
endif() endif()
endif() endif()
get_property(os_dependency_modules GLOBAL PROPERTY OS_DEPENDENCY_MODULES) get_property(os_dependency_modules GLOBAL PROPERTY OS_DEPENDENCY_MODULES)
target_link_libraries( target_link_libraries(${TARGET_NAME} ${cc_test_DEPS}
${TARGET_NAME} ${os_dependency_modules} paddle_gtest_main gtest glog)
${cc_test_DEPS} add_dependencies(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main gtest
${os_dependency_modules}
paddle_gtest_main
lod_tensor
memory
gtest
gflags
glog)
add_dependencies(
${TARGET_NAME}
${cc_test_DEPS}
paddle_gtest_main
lod_tensor
memory
gtest
gflags
glog) glog)
common_link(${TARGET_NAME}) common_link(${TARGET_NAME})
if(WITH_ROCM) if(WITH_ROCM)
...@@ -670,7 +642,7 @@ function(nv_test TARGET_NAME) ...@@ -670,7 +642,7 @@ function(nv_test TARGET_NAME)
add_executable(${TARGET_NAME} ${nv_test_SRCS}) add_executable(${TARGET_NAME} ${nv_test_SRCS})
get_property(os_dependency_modules GLOBAL PROPERTY OS_DEPENDENCY_MODULES) get_property(os_dependency_modules GLOBAL PROPERTY OS_DEPENDENCY_MODULES)
target_link_libraries(${TARGET_NAME} ${nv_test_DEPS} target_link_libraries(${TARGET_NAME} ${nv_test_DEPS}
${os_dependency_modules} paddle_gtest_main) ${os_dependency_modules} paddle_gtest_main phi)
add_dependencies(${TARGET_NAME} ${nv_test_DEPS} paddle_gtest_main) add_dependencies(${TARGET_NAME} ${nv_test_DEPS} paddle_gtest_main)
common_link(${TARGET_NAME}) common_link(${TARGET_NAME})
add_test(${TARGET_NAME} ${TARGET_NAME}) add_test(${TARGET_NAME} ${TARGET_NAME})
...@@ -774,8 +746,8 @@ function(hip_test TARGET_NAME) ...@@ -774,8 +746,8 @@ function(hip_test TARGET_NAME)
lod_tensor lod_tensor
memory memory
gtest gtest
gflags
glog glog
phi
${os_dependency_modules}) ${os_dependency_modules})
add_dependencies( add_dependencies(
${TARGET_NAME} ${TARGET_NAME}
...@@ -784,7 +756,7 @@ function(hip_test TARGET_NAME) ...@@ -784,7 +756,7 @@ function(hip_test TARGET_NAME)
lod_tensor lod_tensor
memory memory
gtest gtest
gflags phi
glog) glog)
common_link(${TARGET_NAME}) common_link(${TARGET_NAME})
add_test(${TARGET_NAME} ${TARGET_NAME}) add_test(${TARGET_NAME} ${TARGET_NAME})
...@@ -881,7 +853,7 @@ function(xpu_test TARGET_NAME) ...@@ -881,7 +853,7 @@ function(xpu_test TARGET_NAME)
lod_tensor lod_tensor
memory memory
gtest gtest
gflags phi
glog glog
${os_dependency_modules}) ${os_dependency_modules})
add_dependencies( add_dependencies(
...@@ -891,7 +863,7 @@ function(xpu_test TARGET_NAME) ...@@ -891,7 +863,7 @@ function(xpu_test TARGET_NAME)
lod_tensor lod_tensor
memory memory
gtest gtest
gflags phi
glog) glog)
common_link(${TARGET_NAME}) common_link(${TARGET_NAME})
add_test(${TARGET_NAME} ${TARGET_NAME}) add_test(${TARGET_NAME} ${TARGET_NAME})
......
...@@ -269,6 +269,13 @@ else() ...@@ -269,6 +269,13 @@ else()
SRCS ${src_dir}/inference/api/paddle_*.h ${paddle_inference_lib} SRCS ${src_dir}/inference/api/paddle_*.h ${paddle_inference_lib}
DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include
${PADDLE_INFERENCE_INSTALL_DIR}/paddle/lib) ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/lib)
set(paddle_phi_lib ${PADDLE_BINARY_DIR}/paddle/phi/libphi.*)
copy(
inference_lib_dist
SRCS ${paddle_phi_lib}
DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/lib)
endif() endif()
copy( copy(
......
...@@ -61,8 +61,7 @@ function(register_cu_kernel TARGET) ...@@ -61,8 +61,7 @@ function(register_cu_kernel TARGET)
"${multiValueArgs}" ${ARGN}) "${multiValueArgs}" ${ARGN})
set(cu_srcs) set(cu_srcs)
set(op_common_deps operator op_registry math_function layer set(op_common_deps operator op_registry layer common_infer_shape_functions)
common_infer_shape_functions)
foreach(cu_src ${register_cu_kernel_SRCS}) foreach(cu_src ${register_cu_kernel_SRCS})
if(${cu_src} MATCHES ".*\\.cu$") if(${cu_src} MATCHES ".*\\.cu$")
list(APPEND cu_srcs ${cu_src}) list(APPEND cu_srcs ${cu_src})
...@@ -113,7 +112,7 @@ function(register_mkldnn_kernel TARGET) ...@@ -113,7 +112,7 @@ function(register_mkldnn_kernel TARGET)
"${multiValueArgs}" ${ARGN}) "${multiValueArgs}" ${ARGN})
set(mkldnn_cc_srcs) set(mkldnn_cc_srcs)
set(op_common_deps operator op_registry math_function layer set(op_common_deps operator op_registry phi layer
common_infer_shape_functions) common_infer_shape_functions)
foreach(mkldnn_src ${register_mkldnn_kernel_SRCS}) foreach(mkldnn_src ${register_mkldnn_kernel_SRCS})
if(${mkldnn_src} MATCHES ".*_mkldnn_op.cc$") if(${mkldnn_src} MATCHES ".*_mkldnn_op.cc$")
...@@ -164,7 +163,7 @@ function(op_library TARGET) ...@@ -164,7 +163,7 @@ function(op_library TARGET)
set(MIOPEN_FILE) set(MIOPEN_FILE)
set(mkldnn_cc_srcs) set(mkldnn_cc_srcs)
set(MKLDNN_FILE) set(MKLDNN_FILE)
set(op_common_deps operator op_registry math_function layer set(op_common_deps operator op_registry phi layer
common_infer_shape_functions) common_infer_shape_functions)
# Option `UNITY` is used to specify that operator `TARGET` will compiles with Unity Build. # Option `UNITY` is used to specify that operator `TARGET` will compiles with Unity Build.
......
...@@ -94,6 +94,13 @@ function(kernel_declare TARGET_LIST) ...@@ -94,6 +94,13 @@ function(kernel_declare TARGET_LIST)
continue() continue()
endif() endif()
endif() endif()
# fusion group kernel is not supported in windows and mac
if(WIN32 OR APPLE)
string(FIND "${first_registry}" "fusion_group" pos)
if(pos GREATER 1)
continue()
endif()
endif()
# some gpu kernel only can run on cuda, not support rocm, so we add this branch # some gpu kernel only can run on cuda, not support rocm, so we add this branch
if(WITH_ROCM) if(WITH_ROCM)
string(FIND "${first_registry}" "cuda_only" pos) string(FIND "${first_registry}" "cuda_only" pos)
...@@ -216,3 +223,27 @@ function(prune_declaration_h) ...@@ -216,3 +223,27 @@ function(prune_declaration_h)
endif() endif()
endforeach() endforeach()
endfunction() endfunction()
function(collect_srcs SRC_GROUP)
set(options)
set(oneValueArgs)
set(multiValueArgs "SRCS")
cmake_parse_arguments(prefix "" "" "${multiValueArgs}" ${ARGN})
foreach(src ${prefix_SRCS})
set(${SRC_GROUP}
"${${SRC_GROUP}};${CMAKE_CURRENT_SOURCE_DIR}/${src}"
CACHE INTERNAL "")
endforeach()
endfunction()
function(collect_generated_srcs SRC_GROUP)
set(options)
set(oneValueArgs)
set(multiValueArgs "SRCS")
cmake_parse_arguments(prefix "" "" "${multiValueArgs}" ${ARGN})
foreach(src ${prefix_SRCS})
set(${SRC_GROUP}
"${${SRC_GROUP}};${src}"
CACHE INTERNAL "")
endforeach()
endfunction()
...@@ -49,5 +49,5 @@ file(GLOB PD_DIALECT_SRCS "*.cc") ...@@ -49,5 +49,5 @@ file(GLOB PD_DIALECT_SRCS "*.cc")
cc_library( cc_library(
pd_dialect pd_dialect
SRCS ${PD_DIALECT_SRCS} ${op_source_file} SRCS ${PD_DIALECT_SRCS} ${op_source_file}
DEPS new_ir framework_proto dense_tensor phi_utils) DEPS new_ir framework_proto phi phi_utils)
target_include_directories(pd_dialect PRIVATE ${PD_DIALECT_BINARY_DIR}) target_include_directories(pd_dialect PRIVATE ${PD_DIALECT_BINARY_DIR})
cc_library( cc_library(
op_dist_attr op_dist_attr
SRCS dist_attr.cc SRCS dist_attr.cc
DEPS dist_attr process_mesh dist_mapper auto_parallel_proto proto_desc DEPS phi auto_parallel_proto proto_desc)
phi_enforce)
add_subdirectory(test) add_subdirectory(test)
cc_test( cc_test(
device_mesh_test device_mesh_test
SRCS device_mesh_test.cc SRCS device_mesh_test.cc
DEPS device_mesh) DEPS phi)
cc_test( cc_test(
process_mesh_test process_mesh_test
SRCS process_mesh_test.cc SRCS process_mesh_test.cc
DEPS process_mesh) DEPS phi)
cc_test( cc_test(
dist_attr_test dist_attr_test
SRCS dist_attr_test.cc SRCS dist_attr_test.cc
DEPS dist_attr proto_desc) DEPS phi proto_desc)
cc_test( cc_test(
dist_mapper_test dist_mapper_test
SRCS dist_mapper_test.cc SRCS dist_mapper_test.cc
DEPS dist_mapper) DEPS phi)
cc_library( cc_library(
process_group process_group
SRCS process_group.cc SRCS process_group.cc
DEPS dense_tensor xxhash) DEPS phi xxhash)
cc_library( cc_library(
eager_reducer eager_reducer
SRCS reducer.cc SRCS reducer.cc
DEPS eager_api process_group phi_api string_helper) DEPS eager_api process_group phi string_helper)
if(WITH_DISTRIBUTE) if(WITH_DISTRIBUTE)
cc_library( cc_library(
process_group_gloo process_group_gloo
SRCS process_group_gloo.cc gloo_send_recv.cc SRCS process_group_gloo.cc gloo_send_recv.cc
DEPS phi_api eager_api gloo_wrapper tcp_store) DEPS phi eager_api gloo_wrapper)
endif() endif()
if(WITH_NCCL OR WITH_RCCL) if(WITH_NCCL OR WITH_RCCL)
...@@ -20,28 +20,19 @@ if(WITH_NCCL OR WITH_RCCL) ...@@ -20,28 +20,19 @@ if(WITH_NCCL OR WITH_RCCL)
process_group_nccl process_group_nccl
SRCS process_group_nccl.cc nccl_tools.cc common.cc SRCS process_group_nccl.cc nccl_tools.cc common.cc
DEPS process_group DEPS process_group
tcp_store phi
place place
enforce enforce
collective_helper collective_helper
device_context device_context
${DEVICE_EVENT_LIBS} ${DEVICE_EVENT_LIBS})
dense_tensor
comm_static_check
nccl_dynamic_check)
endif() endif()
if(WITH_XPU_BKCL) if(WITH_XPU_BKCL)
cc_library( cc_library(
process_group_bkcl process_group_bkcl
SRCS process_group_bkcl.cc bkcl_tools.cc common.cc SRCS process_group_bkcl.cc bkcl_tools.cc common.cc
DEPS process_group DEPS process_group phi place enforce collective_helper device_context)
tcp_store
place
enforce
collective_helper
device_context
dense_tensor)
endif() endif()
if(WITH_MPI) if(WITH_MPI)
...@@ -55,15 +46,7 @@ if(WITH_CUSTOM_DEVICE) ...@@ -55,15 +46,7 @@ if(WITH_CUSTOM_DEVICE)
cc_library( cc_library(
process_group_custom process_group_custom
SRCS process_group_custom.cc custom_ccl_tools.cc common.cc SRCS process_group_custom.cc custom_ccl_tools.cc common.cc
DEPS process_group DEPS process_group phi place enforce collective_helper device_context)
tcp_store
phi_backends
place
enforce
collective_helper
device_context
comm_static_check
dense_tensor)
endif() endif()
set(COMM_UTILS_DEPS process_group) set(COMM_UTILS_DEPS process_group)
......
...@@ -5,7 +5,7 @@ endif() ...@@ -5,7 +5,7 @@ endif()
proto_library(interceptor_message_proto SRCS interceptor_message.proto) proto_library(interceptor_message_proto SRCS interceptor_message.proto)
if(WITH_ARM_BRPC) if(WITH_ARM_BRPC)
set(BRPC_DEPS arm_brpc snappy gflags glog) set(BRPC_DEPS arm_brpc snappy phi glog)
elseif(WITH_DISTRIBUTE AND NOT WITH_PSLIB) elseif(WITH_DISTRIBUTE AND NOT WITH_PSLIB)
set(BRPC_DEPS set(BRPC_DEPS
brpc brpc
...@@ -15,7 +15,7 @@ elseif(WITH_DISTRIBUTE AND NOT WITH_PSLIB) ...@@ -15,7 +15,7 @@ elseif(WITH_DISTRIBUTE AND NOT WITH_PSLIB)
zlib zlib
leveldb leveldb
snappy snappy
gflags phi
glog) glog)
else() else()
set(BRPC_DEPS "") set(BRPC_DEPS "")
...@@ -51,7 +51,7 @@ cc_library( ...@@ -51,7 +51,7 @@ cc_library(
collective_helper collective_helper
op_registry op_registry
executor_gc_helper executor_gc_helper
gflags phi
glog glog
${BRPC_DEPS}) ${BRPC_DEPS})
......
...@@ -8,12 +8,11 @@ if(WITH_HETERPS) ...@@ -8,12 +8,11 @@ if(WITH_HETERPS)
ssl ssl
crypto crypto
protobuf protobuf
gflags phi
glog glog
zlib zlib
leveldb leveldb
snappy snappy
gflags
glog glog
device_context device_context
rocksdb) rocksdb)
...@@ -25,12 +24,11 @@ else() ...@@ -25,12 +24,11 @@ else()
ssl ssl
crypto crypto
protobuf protobuf
gflags phi
glog glog
zlib zlib
leveldb leveldb
snappy snappy
gflags
glog glog
device_context) device_context)
...@@ -122,8 +120,7 @@ cc_library( ...@@ -122,8 +120,7 @@ cc_library(
simple_threadpool simple_threadpool
simple_rpc simple_rpc
scope scope
math_function phi
selected_rows_functor
ps_gpu_wrapper ps_gpu_wrapper
${RPC_DEPS}) ${RPC_DEPS})
...@@ -150,7 +147,7 @@ cc_library( ...@@ -150,7 +147,7 @@ cc_library(
#cc_library( #cc_library(
# communicator # communicator
# SRCS communicator/communicator.cc # SRCS communicator/communicator.cc
# DEPS scope client table math_function selected_rows_functor ${RPC_DEPS}) # DEPS scope client table phi ${RPC_DEPS})
#cc_library( #cc_library(
# ps_service # ps_service
# SRCS ps_service/service.cc # SRCS ps_service/service.cc
......
...@@ -48,7 +48,7 @@ cc_library( ...@@ -48,7 +48,7 @@ cc_library(
string_helper string_helper
simple_threadpool simple_threadpool
xxhash xxhash
generator) phi)
set_source_files_properties( set_source_files_properties(
tensor_accessor.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) tensor_accessor.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS})
...@@ -91,7 +91,7 @@ cc_library( ...@@ -91,7 +91,7 @@ cc_library(
ps_framework_proto ps_framework_proto
string_helper string_helper
device_context device_context
gflags phi
glog glog
fs fs
afs_wrapper afs_wrapper
......
...@@ -20,7 +20,7 @@ set(PADDLE_RPC_DEPS ...@@ -20,7 +20,7 @@ set(PADDLE_RPC_DEPS
zlib zlib
leveldb leveldb
snappy snappy
gflags phi
glog glog
pybind) pybind)
proto_library(paddle_rpc_proto SRCS rpc.proto) proto_library(paddle_rpc_proto SRCS rpc.proto)
......
...@@ -73,7 +73,7 @@ cc_test_old( ...@@ -73,7 +73,7 @@ cc_test_old(
DEPS DEPS
brpc_utils brpc_utils
scope scope
math_function phi
${COMMON_DEPS} ${COMMON_DEPS}
${RPC_DEPS}) ${RPC_DEPS})
......
set(eager_deps set(eager_deps
phi_api phi
phi_dygraph_api
hook_utils hook_utils
tensor_utils tensor_utils
utils utils
global_utils global_utils
backward backward
phi_tensor
tracer tracer
layer layer
autograd_meta autograd_meta
...@@ -48,27 +46,26 @@ if(NOT ((NOT WITH_PYTHON) AND ON_INFER)) ...@@ -48,27 +46,26 @@ if(NOT ((NOT WITH_PYTHON) AND ON_INFER))
cc_library( cc_library(
backward backward
SRCS backward.cc SRCS backward.cc
DEPS grad_tensor_holder utils autograd_meta grad_node_info switch_autotune) DEPS grad_tensor_holder utils autograd_meta grad_node_info phi)
endif() endif()
cc_library( cc_library(
eager_nan_inf_utils eager_nan_inf_utils
SRCS nan_inf_utils.cc SRCS nan_inf_utils.cc
DEPS phi_tensor nan_inf_utils enforce) DEPS phi nan_inf_utils enforce)
cc_library( cc_library(
grad_node_info grad_node_info
SRCS grad_node_info.cc SRCS grad_node_info.cc
DEPS phi_api phi_tensor) DEPS phi)
cc_library( cc_library(
autograd_meta autograd_meta
SRCS autograd_meta.cc SRCS autograd_meta.cc
DEPS phi_api phi_tensor) DEPS phi)
cc_library( cc_library(
utils utils
SRCS utils.cc SRCS utils.cc
DEPS phi_api DEPS phi
phi_tensor
global_utils global_utils
layer layer
proto_desc proto_desc
......
...@@ -2,5 +2,5 @@ if(NOT (NOT WITH_PYTHON AND ON_INFER)) ...@@ -2,5 +2,5 @@ if(NOT (NOT WITH_PYTHON AND ON_INFER))
cc_library( cc_library(
accumulation_node accumulation_node
SRCS accumulation_node.cc SRCS accumulation_node.cc
DEPS gradient_accumulator phi_api grad_node_info) DEPS gradient_accumulator phi grad_node_info)
endif() endif()
cc_library( cc_library(
scale_node scale_node
SRCS scale_node.cc SRCS scale_node.cc
DEPS global_utils phi phi_api grad_node_info) DEPS global_utils phi grad_node_info)
if(NOT (NOT WITH_PYTHON AND ON_INFER)) if(NOT (NOT WITH_PYTHON AND ON_INFER))
cc_library( cc_library(
......
cc_library( cc_library(
eager_scale eager_scale
SRCS scale.cc SRCS scale.cc
DEPS phi_api phi autograd_meta scale_node) DEPS phi autograd_meta scale_node)
if(NOT (NOT WITH_PYTHON AND ON_INFER)) if(NOT (NOT WITH_PYTHON AND ON_INFER))
cc_library( cc_library(
......
...@@ -7,7 +7,7 @@ if(NOT (NOT WITH_PYTHON AND ON_INFER)) ...@@ -7,7 +7,7 @@ if(NOT (NOT WITH_PYTHON AND ON_INFER))
cc_library( cc_library(
tensor_utils tensor_utils
SRCS tensor_utils.cc SRCS tensor_utils.cc
DEPS phi_api autograd_meta grad_node_info accumulation_node) DEPS phi autograd_meta grad_node_info accumulation_node)
cc_library( cc_library(
hook_utils hook_utils
SRCS hook_utils.cc SRCS hook_utils.cc
...@@ -16,7 +16,7 @@ else() ...@@ -16,7 +16,7 @@ else()
cc_library( cc_library(
tensor_utils tensor_utils
SRCS tensor_utils.cc SRCS tensor_utils.cc
DEPS phi_api autograd_meta grad_node_info) DEPS phi autograd_meta grad_node_info)
cc_library( cc_library(
hook_utils hook_utils
SRCS hook_utils.cc SRCS hook_utils.cc
......
...@@ -52,6 +52,15 @@ if(WIN32) ...@@ -52,6 +52,15 @@ if(WIN32)
set(eager_generator_path "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}") set(eager_generator_path "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}")
endif() endif()
if(WITH_PHI_SHARED)
message("Copied phi.dll for Eager AutoCodeGen")
add_custom_command(
OUTPUT ${eager_generator_path}/phi.dll
COMMAND ${CMAKE_COMMAND} -E copy ${PHI_LIB} ${eager_generator_path}
DEPENDS phi)
list(APPEND EAGER_CODEGEN_DEPS ${eager_generator_path}/phi.dll)
endif()
if(${CBLAS_PROVIDER} STREQUAL MKLML) if(${CBLAS_PROVIDER} STREQUAL MKLML)
message("Copied libiomp5md.dll for Eager AutoCodeGen") message("Copied libiomp5md.dll for Eager AutoCodeGen")
add_custom_command( add_custom_command(
......
...@@ -392,7 +392,7 @@ FORWARD_CC_FILE_TEMPLATE = """ ...@@ -392,7 +392,7 @@ FORWARD_CC_FILE_TEMPLATE = """
#include "paddle/fluid/eager/api/manual/eager_manual/dygraph_forward_api.h" #include "paddle/fluid/eager/api/manual/eager_manual/dygraph_forward_api.h"
#include "paddle/phi/core/flags.h" #include "paddle/phi/core/flags.h"
DECLARE_bool(check_nan_inf); PHI_DECLARE_bool(check_nan_inf);
PHI_DECLARE_string(tensor_operants_mode); PHI_DECLARE_string(tensor_operants_mode);
{} {}
{} {}
......
cc_library( cc_library(
custom_operator_node custom_operator_node
SRCS custom_operator_node.cc SRCS custom_operator_node.cc
DEPS phi_tensor phi_api grad_node_info custom_operator op_meta_info) DEPS phi grad_node_info custom_operator)
cc_library( cc_library(
py_layer_node py_layer_node
SRCS py_layer_node.cc SRCS py_layer_node.cc
DEPS pybind phi_api grad_node_info) DEPS pybind phi grad_node_info)
...@@ -115,7 +115,7 @@ proto_library(trainer_desc_proto SRCS trainer_desc.proto DEPS framework_proto ...@@ -115,7 +115,7 @@ proto_library(trainer_desc_proto SRCS trainer_desc.proto DEPS framework_proto
cc_library( cc_library(
string_array string_array
SRCS string_array.cc SRCS string_array.cc
DEPS utf8proc phi_enforce) DEPS utf8proc phi)
cc_library( cc_library(
data_type data_type
...@@ -130,7 +130,7 @@ cc_test( ...@@ -130,7 +130,7 @@ cc_test(
cc_library( cc_library(
tensor tensor
SRCS tensor_util.cc SRCS tensor_util.cc
DEPS place memory data_type device_context dense_tensor) DEPS place memory data_type device_context phi)
cc_test( cc_test(
tensor_test tensor_test
...@@ -166,12 +166,12 @@ cc_test( ...@@ -166,12 +166,12 @@ cc_test(
cc_library( cc_library(
lod_tensor lod_tensor
SRCS lod_tensor.cc SRCS lod_tensor.cc
DEPS ddim mixed_vector place tensor framework_proto version) DEPS phi place tensor framework_proto version)
cc_test( cc_test(
lod_tensor_test lod_tensor_test
SRCS lod_tensor_test.cc SRCS lod_tensor_test.cc
DEPS lod_utils lod_tensor memory) DEPS phi lod_tensor memory)
if(WITH_GPU) if(WITH_GPU)
nv_test( nv_test(
...@@ -188,12 +188,12 @@ endif() ...@@ -188,12 +188,12 @@ endif()
cc_library( cc_library(
garbage_collector garbage_collector
SRCS garbage_collector.cc SRCS garbage_collector.cc
DEPS device_context memory gflags glog) DEPS device_context memory phi glog)
cc_library( cc_library(
reader reader
SRCS reader.cc SRCS reader.cc
DEPS lod_tensor ddim) DEPS lod_tensor phi)
cc_test( cc_test(
reader_test reader_test
SRCS reader_test.cc SRCS reader_test.cc
...@@ -202,13 +202,12 @@ cc_test( ...@@ -202,13 +202,12 @@ cc_test(
cc_test( cc_test(
threadpool_test threadpool_test
SRCS threadpool_test.cc SRCS threadpool_test.cc
DEPS threadpool) DEPS phi)
cc_library( cc_library(
var_type_traits var_type_traits
SRCS var_type_traits.cc SRCS var_type_traits.cc
DEPS framework_proto scope tensor_array sparse_coo_tensor sparse_csr_tensor DEPS framework_proto scope phi)
extended_tensor)
if(WITH_GPU) if(WITH_GPU)
target_link_libraries(var_type_traits dynload_cuda) target_link_libraries(var_type_traits dynload_cuda)
endif() endif()
...@@ -242,7 +241,7 @@ endif() ...@@ -242,7 +241,7 @@ endif()
cc_library( cc_library(
scope scope
SRCS scope.cc SRCS scope.cc
DEPS glog threadpool xxhash var_type_traits) DEPS glog phi xxhash var_type_traits)
cc_library( cc_library(
device_worker device_worker
SRCS device_worker.cc SRCS device_worker.cc
...@@ -273,12 +272,12 @@ if(WITH_GPU) ...@@ -273,12 +272,12 @@ if(WITH_GPU)
nv_test( nv_test(
data_device_transform_test data_device_transform_test
SRCS data_device_transform_test.cu SRCS data_device_transform_test.cu
DEPS operator op_registry device_context math_function scope) DEPS operator op_registry device_context phi scope)
elseif(WITH_ROCM) elseif(WITH_ROCM)
hip_test( hip_test(
data_device_transform_test data_device_transform_test
SRCS data_device_transform_test.cu SRCS data_device_transform_test.cu
DEPS operator op_registry device_context math_function scope) DEPS operator op_registry device_context phi scope)
endif() endif()
if(WITH_GPU) if(WITH_GPU)
...@@ -333,7 +332,7 @@ endif() ...@@ -333,7 +332,7 @@ endif()
cc_library( cc_library(
data_layout_transform data_layout_transform
SRCS data_layout_transform.cc SRCS data_layout_transform.cc
DEPS tensor math_function phi_data_layout_transform) DEPS tensor phi)
cc_test( cc_test(
data_layout_transform_test data_layout_transform_test
SRCS data_layout_transform_test.cc SRCS data_layout_transform_test.cc
...@@ -342,14 +341,13 @@ cc_test( ...@@ -342,14 +341,13 @@ cc_test(
cc_library( cc_library(
data_transform data_transform
SRCS data_transform.cc SRCS data_transform.cc
DEPS math_function DEPS phi
tensor tensor
framework_proto framework_proto
selected_rows_utils selected_rows_utils
data_device_transform data_device_transform
data_type_transform data_type_transform
data_layout_transform data_layout_transform)
phi_data_transform)
cc_library( cc_library(
attribute attribute
...@@ -400,7 +398,7 @@ cc_library( ...@@ -400,7 +398,7 @@ cc_library(
cc_library( cc_library(
shape_inference shape_inference
SRCS shape_inference.cc SRCS shape_inference.cc
DEPS ddim attribute selected_rows_utils) DEPS phi attribute selected_rows_utils)
# every source file that includes "dnnl.h" must depends on mkldnn # every source file that includes "dnnl.h" must depends on mkldnn
# or, the first one should depends on mkldnn # or, the first one should depends on mkldnn
...@@ -433,30 +431,17 @@ if(WITH_XPU) ...@@ -433,30 +431,17 @@ if(WITH_XPU)
phi_utils phi_utils
SRCS phi_utils.cc SRCS phi_utils.cc
DEPS lod_tensor DEPS lod_tensor
dense_tensor
selected_rows_utils selected_rows_utils
int_array
scalar
place place
phi phi
var_type_traits var_type_traits
op_info op_info
xpu_op_list xpu_op_list)
convert_utils)
else() else()
cc_library( cc_library(
phi_utils phi_utils
SRCS phi_utils.cc SRCS phi_utils.cc
DEPS lod_tensor DEPS lod_tensor selected_rows_utils place phi var_type_traits op_info)
dense_tensor
selected_rows_utils
int_array
scalar
place
phi
var_type_traits
op_info
convert_utils)
endif() endif()
if(WITH_XPU) if(WITH_XPU)
...@@ -482,11 +467,10 @@ if(WITH_XPU) ...@@ -482,11 +467,10 @@ if(WITH_XPU)
unused_var_check unused_var_check
nan_inf_utils nan_inf_utils
phi_utils phi_utils
kernel_factory
infershape_utils infershape_utils
op_utils phi
op_compat_infos op_compat_infos
get_kerneltype_forvar_utils) type_info)
else() else()
cc_library( cc_library(
operator operator
...@@ -509,11 +493,10 @@ else() ...@@ -509,11 +493,10 @@ else()
unused_var_check unused_var_check
nan_inf_utils nan_inf_utils
phi_utils phi_utils
kernel_factory
infershape_utils infershape_utils
op_utils phi
op_compat_infos op_compat_infos
get_kerneltype_forvar_utils) type_info)
endif() endif()
cc_test( cc_test(
...@@ -543,7 +526,7 @@ cc_library( ...@@ -543,7 +526,7 @@ cc_library(
version version
xxhash xxhash
op_dist_attr op_dist_attr
scalar phi
op_version_proto op_version_proto
op_version_registry) op_version_registry)
...@@ -853,7 +836,7 @@ if(WITH_DISTRIBUTE) ...@@ -853,7 +836,7 @@ if(WITH_DISTRIBUTE)
heter_server heter_server
brpc brpc
fleet_executor fleet_executor
flags) phi)
set(DISTRIBUTE_COMPILE_FLAGS "") set(DISTRIBUTE_COMPILE_FLAGS "")
if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 7.0) if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 7.0)
set(DISTRIBUTE_COMPILE_FLAGS "${DISTRIBUTE_COMPILE_FLAGS} -faligned-new") set(DISTRIBUTE_COMPILE_FLAGS "${DISTRIBUTE_COMPILE_FLAGS} -faligned-new")
...@@ -1071,7 +1054,7 @@ if(WITH_PSCORE) ...@@ -1071,7 +1054,7 @@ if(WITH_PSCORE)
executor executor
heter_server heter_server
gloo_wrapper gloo_wrapper
eigen_function phi
${RPC_DEPS} ${RPC_DEPS}
graph_gpu_wrapper) graph_gpu_wrapper)
else() else()
...@@ -1088,7 +1071,7 @@ if(WITH_PSCORE) ...@@ -1088,7 +1071,7 @@ if(WITH_PSCORE)
executor executor
heter_server heter_server
gloo_wrapper gloo_wrapper
eigen_function phi
${RPC_DEPS}) ${RPC_DEPS})
endif() endif()
else() else()
...@@ -1112,7 +1095,7 @@ cc_test( ...@@ -1112,7 +1095,7 @@ cc_test(
cc_library( cc_library(
selected_rows_utils selected_rows_utils
SRCS selected_rows_utils.cc SRCS selected_rows_utils.cc
DEPS selected_rows device_context) DEPS phi device_context)
cc_test( cc_test(
selected_rows_utils_test selected_rows_utils_test
SRCS selected_rows_utils_test.cc SRCS selected_rows_utils_test.cc
...@@ -1162,12 +1145,11 @@ cc_library( ...@@ -1162,12 +1145,11 @@ cc_library(
phi phi
phi_utils phi_utils
op_info op_info
shape_inference shape_inference)
sparse_coo_tensor)
cc_test( cc_test(
infershape_utils_test infershape_utils_test
SRCS infershape_utils_test.cc SRCS infershape_utils_test.cc
DEPS infershape_utils infermeta_utils meta_tensor) DEPS infershape_utils phi)
# Get the current working branch # Get the current working branch
execute_process( execute_process(
...@@ -1198,12 +1180,15 @@ cc_library( ...@@ -1198,12 +1180,15 @@ cc_library(
operator operator
dynamic_loader dynamic_loader
string_helper string_helper
phi_tensor phi
op_meta_info imperative_flag
phi_api layer)
tensor_api
phi_tensor_operants cc_library(type_info SRCS type_info.cc)
operants_manager) add_dependencies(type_info framework_proto auto_parallel_proto xxhash)
if(WITH_MKLDNN)
add_dependencies(type_info mkldnn)
endif()
set(FLUID_FRAMEWORK_MODULES set(FLUID_FRAMEWORK_MODULES
proto_desc proto_desc
......
...@@ -10,15 +10,15 @@ cc_library( ...@@ -10,15 +10,15 @@ cc_library(
cc_library( cc_library(
scale_loss_grad_op_handle scale_loss_grad_op_handle
SRCS scale_loss_grad_op_handle.cc SRCS scale_loss_grad_op_handle.cc
DEPS op_handle_base scope lod_tensor ddim memory) DEPS op_handle_base scope lod_tensor phi memory)
cc_library( cc_library(
fetch_op_handle fetch_op_handle
SRCS fetch_op_handle.cc SRCS fetch_op_handle.cc
DEPS op_handle_base scope lod_tensor ddim memory) DEPS op_handle_base scope lod_tensor phi memory)
cc_library( cc_library(
fetch_async_op_handle fetch_async_op_handle
SRCS fetch_async_op_handle.cc SRCS fetch_async_op_handle.cc
DEPS op_handle_base scope lod_tensor ddim memory) DEPS op_handle_base scope lod_tensor phi memory)
cc_library( cc_library(
share_tensor_buffer_functor share_tensor_buffer_functor
...@@ -78,7 +78,7 @@ if(WITH_GPU) ...@@ -78,7 +78,7 @@ if(WITH_GPU)
DEPS op_handle_base DEPS op_handle_base
scope scope
lod_tensor lod_tensor
ddim phi
memory memory
dynload_cuda dynload_cuda
variable_visitor) variable_visitor)
...@@ -88,7 +88,7 @@ if(WITH_GPU) ...@@ -88,7 +88,7 @@ if(WITH_GPU)
DEPS op_handle_base DEPS op_handle_base
scope scope
lod_tensor lod_tensor
ddim phi
memory memory
dynload_cuda dynload_cuda
variable_visitor variable_visitor
...@@ -99,7 +99,7 @@ if(WITH_GPU) ...@@ -99,7 +99,7 @@ if(WITH_GPU)
DEPS op_handle_base DEPS op_handle_base
scope scope
lod_tensor lod_tensor
ddim phi
memory memory
dynload_cuda dynload_cuda
variable_visitor variable_visitor
...@@ -114,7 +114,7 @@ if(WITH_GPU) ...@@ -114,7 +114,7 @@ if(WITH_GPU)
DEPS op_handle_base DEPS op_handle_base
scope scope
lod_tensor lod_tensor
ddim phi
memory memory
dynload_cuda dynload_cuda
variable_visitor variable_visitor
...@@ -126,19 +126,17 @@ if(WITH_GPU) ...@@ -126,19 +126,17 @@ if(WITH_GPU)
nv_library( nv_library(
reduce_op_handle reduce_op_handle
SRCS reduce_op_handle.cc SRCS reduce_op_handle.cc
DEPS op_handle_base variable_visitor scope ddim dynload_cuda DEPS op_handle_base variable_visitor scope phi dynload_cuda)
selected_rows_functor)
else() else()
nv_library( nv_library(
reduce_op_handle reduce_op_handle
SRCS reduce_op_handle.cc SRCS reduce_op_handle.cc
DEPS op_handle_base variable_visitor scope ddim dynload_cuda DEPS op_handle_base variable_visitor scope phi dynload_cuda)
selected_rows_functor)
endif() endif()
nv_library( nv_library(
broadcast_op_handle broadcast_op_handle
SRCS broadcast_op_handle.cc SRCS broadcast_op_handle.cc
DEPS op_handle_base scope ddim memory variable_visitor dynload_cuda) DEPS op_handle_base scope phi memory variable_visitor dynload_cuda)
nv_library( nv_library(
fused_broadcast_op_handle fused_broadcast_op_handle
SRCS fused_broadcast_op_handle.cc SRCS fused_broadcast_op_handle.cc
...@@ -154,7 +152,7 @@ elseif(WITH_ROCM) ...@@ -154,7 +152,7 @@ elseif(WITH_ROCM)
DEPS op_handle_base DEPS op_handle_base
scope scope
lod_tensor lod_tensor
ddim phi
memory memory
dynload_cuda dynload_cuda
variable_visitor) variable_visitor)
...@@ -164,7 +162,7 @@ elseif(WITH_ROCM) ...@@ -164,7 +162,7 @@ elseif(WITH_ROCM)
DEPS op_handle_base DEPS op_handle_base
scope scope
lod_tensor lod_tensor
ddim phi
memory memory
dynload_cuda dynload_cuda
variable_visitor variable_visitor
...@@ -175,7 +173,7 @@ elseif(WITH_ROCM) ...@@ -175,7 +173,7 @@ elseif(WITH_ROCM)
DEPS op_handle_base DEPS op_handle_base
scope scope
lod_tensor lod_tensor
ddim phi
memory memory
dynload_cuda dynload_cuda
variable_visitor variable_visitor
...@@ -187,19 +185,17 @@ elseif(WITH_ROCM) ...@@ -187,19 +185,17 @@ elseif(WITH_ROCM)
hip_library( hip_library(
reduce_op_handle reduce_op_handle
SRCS reduce_op_handle.cc SRCS reduce_op_handle.cc
DEPS op_handle_base variable_visitor scope ddim dynload_cuda DEPS op_handle_base variable_visitor scope phi dynload_cuda)
selected_rows_functor)
else() else()
hip_library( hip_library(
reduce_op_handle reduce_op_handle
SRCS reduce_op_handle.cc SRCS reduce_op_handle.cc
DEPS op_handle_base variable_visitor scope ddim dynload_cuda DEPS op_handle_base variable_visitor scope phi dynload_cuda)
selected_rows_functor)
endif() endif()
hip_library( hip_library(
broadcast_op_handle broadcast_op_handle
SRCS broadcast_op_handle.cc SRCS broadcast_op_handle.cc
DEPS op_handle_base scope ddim memory variable_visitor dynload_cuda) DEPS op_handle_base scope phi memory variable_visitor dynload_cuda)
hip_library( hip_library(
fused_broadcast_op_handle fused_broadcast_op_handle
SRCS fused_broadcast_op_handle.cc SRCS fused_broadcast_op_handle.cc
...@@ -212,14 +208,14 @@ else() ...@@ -212,14 +208,14 @@ else()
cc_library( cc_library(
all_reduce_op_handle all_reduce_op_handle
SRCS all_reduce_op_handle.cc SRCS all_reduce_op_handle.cc
DEPS op_handle_base scope lod_tensor ddim memory variable_visitor) DEPS op_handle_base scope lod_tensor phi memory variable_visitor)
cc_library( cc_library(
fused_all_reduce_op_handle fused_all_reduce_op_handle
SRCS fused_all_reduce_op_handle.cc SRCS fused_all_reduce_op_handle.cc
DEPS op_handle_base DEPS op_handle_base
scope scope
lod_tensor lod_tensor
ddim phi
memory memory
variable_visitor variable_visitor
place) place)
...@@ -229,7 +225,7 @@ else() ...@@ -229,7 +225,7 @@ else()
DEPS op_handle_base DEPS op_handle_base
scope scope
lod_tensor lod_tensor
ddim phi
memory memory
variable_visitor variable_visitor
place place
...@@ -239,17 +235,17 @@ else() ...@@ -239,17 +235,17 @@ else()
cc_library( cc_library(
reduce_op_handle reduce_op_handle
SRCS reduce_op_handle.cc SRCS reduce_op_handle.cc
DEPS op_handle_base variable_visitor scope ddim selected_rows_functor) DEPS op_handle_base variable_visitor scope phi)
else() else()
cc_library( cc_library(
reduce_op_handle reduce_op_handle
SRCS reduce_op_handle.cc SRCS reduce_op_handle.cc
DEPS op_handle_base variable_visitor scope ddim selected_rows_functor) DEPS op_handle_base variable_visitor scope phi)
endif() endif()
cc_library( cc_library(
broadcast_op_handle broadcast_op_handle
SRCS broadcast_op_handle.cc SRCS broadcast_op_handle.cc
DEPS op_handle_base scope ddim memory variable_visitor) DEPS op_handle_base scope phi memory variable_visitor)
cc_library( cc_library(
fused_broadcast_op_handle fused_broadcast_op_handle
SRCS fused_broadcast_op_handle.cc SRCS fused_broadcast_op_handle.cc
...@@ -259,7 +255,7 @@ endif() ...@@ -259,7 +255,7 @@ endif()
cc_library( cc_library(
gather_op_handle gather_op_handle
SRCS gather_op_handle.cc SRCS gather_op_handle.cc
DEPS op_handle_base scope ddim memory variable_visitor) DEPS op_handle_base scope phi memory variable_visitor)
cc_library( cc_library(
eager_deletion_op_handle eager_deletion_op_handle
...@@ -305,7 +301,7 @@ cc_test( ...@@ -305,7 +301,7 @@ cc_test(
DEPS var_handle DEPS var_handle
op_handle_base op_handle_base
scope scope
ddim phi
memory memory
device_context device_context
broadcast_op_handle) broadcast_op_handle)
...@@ -317,7 +313,7 @@ cc_test_old( ...@@ -317,7 +313,7 @@ cc_test_old(
var_handle var_handle
op_handle_base op_handle_base
scope scope
ddim phi
memory memory
device_context device_context
gather_op_handle) gather_op_handle)
...@@ -330,12 +326,12 @@ cc_library( ...@@ -330,12 +326,12 @@ cc_library(
scope_buffered_ssa_graph_executor scope_buffered_ssa_graph_executor
SRCS scope_buffered_ssa_graph_executor.cc SRCS scope_buffered_ssa_graph_executor.cc
DEPS ssa_graph_executor scope_buffered_monitor) DEPS ssa_graph_executor scope_buffered_monitor)
#cc_test(reduce_op_handle_test SRCS reduce_op_handle_test.cc DEPS var_handle op_handle_base scope ddim memory #cc_test(reduce_op_handle_test SRCS reduce_op_handle_test.cc DEPS var_handle op_handle_base scope phi memory
# device_context reduce_op_handle ) # device_context reduce_op_handle )
cc_library( cc_library(
bind_threaded_ssa_graph_executor bind_threaded_ssa_graph_executor
SRCS bind_threaded_ssa_graph_executor.cc SRCS bind_threaded_ssa_graph_executor.cc
DEPS fetch_op_handle gflags ssa_graph_executor scope simple_threadpool DEPS fetch_op_handle phi ssa_graph_executor scope simple_threadpool
device_context) device_context)
cc_library( cc_library(
fast_threaded_ssa_graph_executor fast_threaded_ssa_graph_executor
......
...@@ -20,9 +20,10 @@ limitations under the License. */ ...@@ -20,9 +20,10 @@ limitations under the License. */
#include "paddle/fluid/framework/details/reduce_op_handle.h" #include "paddle/fluid/framework/details/reduce_op_handle.h"
#include "paddle/fluid/framework/ir/graph_printer.h" #include "paddle/fluid/framework/ir/graph_printer.h"
#include "paddle/fluid/framework/ir/multi_devices_graph_pass/multi_devices_graph_pass.h" #include "paddle/fluid/framework/ir/multi_devices_graph_pass/multi_devices_graph_pass.h"
#include "paddle/phi/core/flags.h"
DECLARE_bool(convert_all_blocks); DECLARE_bool(convert_all_blocks);
DECLARE_bool(use_mkldnn); PHI_DECLARE_bool(use_mkldnn);
#ifdef PADDLE_WITH_CINN #ifdef PADDLE_WITH_CINN
DECLARE_bool(use_cinn); DECLARE_bool(use_cinn);
#endif #endif
......
...@@ -32,7 +32,7 @@ cc_library( ...@@ -32,7 +32,7 @@ cc_library(
cc_library( cc_library(
cost_model cost_model
SRCS cost_model.cc SRCS cost_model.cc
DEPS executor graph profiler proto_desc phi_device_tracer) DEPS executor graph profiler proto_desc phi)
set(GRAPH_PATTERN_DETECTOR_DEPS graph graph_helper graph_traits) set(GRAPH_PATTERN_DETECTOR_DEPS graph graph_helper graph_traits)
if(WITH_TESTING) if(WITH_TESTING)
...@@ -458,9 +458,6 @@ if(WITH_MKLDNN) ...@@ -458,9 +458,6 @@ if(WITH_MKLDNN)
graph_to_program_pass graph_to_program_pass
conv_op conv_op
conv_transpose_op conv_transpose_op
math_function
im2col
vol2col
batch_norm_op batch_norm_op
generated_op generated_op
activation_op activation_op
...@@ -468,7 +465,7 @@ if(WITH_MKLDNN) ...@@ -468,7 +465,7 @@ if(WITH_MKLDNN)
concat_and_split concat_and_split
naive_executor naive_executor
device_context device_context
eigen_function) phi)
if(WITH_GPU OR WITH_ROCM) if(WITH_GPU OR WITH_ROCM)
set(TEST_CONV_BN_PASS_DEPS ${TEST_CONV_BN_PASS_DEPS} depthwise_conv) set(TEST_CONV_BN_PASS_DEPS ${TEST_CONV_BN_PASS_DEPS} depthwise_conv)
endif() endif()
......
...@@ -221,7 +221,7 @@ bool InitAndCheckAttrs(const size_t &found_adamw_count, ...@@ -221,7 +221,7 @@ bool InitAndCheckAttrs(const size_t &found_adamw_count,
} }
} }
// Check whether with_decay and multi_precision are matched // Check whether with_decay and multi_precision are matched
if (config->with_decay != if (config->with_decay !=
PADDLE_GET_CONST(bool, adamw_op_desc->GetAttr("with_decay")) || PADDLE_GET_CONST(bool, adamw_op_desc->GetAttr("with_decay")) ||
config->multi_precision != config->multi_precision !=
......
...@@ -6,13 +6,13 @@ if(WITH_GPU OR WITH_ROCM) ...@@ -6,13 +6,13 @@ if(WITH_GPU OR WITH_ROCM)
cc_test( cc_test(
test_code_generator test_code_generator
SRCS code_generator_tester.cc SRCS code_generator_tester.cc
DEPS code_generator phi_backends lod_tensor graph_viz_pass) DEPS code_generator phi lod_tensor graph_viz_pass)
endif() endif()
cc_library( cc_library(
fusion_group_pass fusion_group_pass
SRCS fusion_group_pass.cc elementwise_group_detector.cc SRCS fusion_group_pass.cc elementwise_group_detector.cc
DEPS subgraph_detector fuse_pass_base code_generator phi_backends) DEPS subgraph_detector fuse_pass_base code_generator phi)
cc_test( cc_test(
test_fusion_group_pass test_fusion_group_pass
SRCS fusion_group_pass_tester.cc SRCS fusion_group_pass_tester.cc
......
...@@ -76,5 +76,4 @@ cc_library( ...@@ -76,5 +76,4 @@ cc_library(
cc_test( cc_test(
test_reference_count_pass_last_lived_ops test_reference_count_pass_last_lived_ops
SRCS test_reference_count_pass_last_lived_ops.cc SRCS test_reference_count_pass_last_lived_ops.cc
DEPS parallel_executor elementwise_mul_op elementwise_add_op generated_op DEPS parallel_executor elementwise_mul_op elementwise_add_op generated_op phi)
eigen_function)
...@@ -16,4 +16,4 @@ cc_library( ...@@ -16,4 +16,4 @@ cc_library(
cc_library( cc_library(
staticgraph_executor_statistics staticgraph_executor_statistics
SRCS executor_statistics.cc SRCS executor_statistics.cc
DEPS enforce glog phi_os_info) DEPS enforce glog phi)
...@@ -6,7 +6,6 @@ set(INTERPRETER_DEPS ...@@ -6,7 +6,6 @@ set(INTERPRETER_DEPS
device_context device_context
global_utils global_utils
op_registry op_registry
phi_tensor_utils
scope scope
framework_proto framework_proto
data_feed_proto data_feed_proto
...@@ -31,7 +30,7 @@ set(INTERPRETER_DEPS ...@@ -31,7 +30,7 @@ set(INTERPRETER_DEPS
enforce enforce
scope scope
glog glog
comm_context_manager phi
${DEVICE_EVENT_LIBS} ${DEVICE_EVENT_LIBS}
glog) glog)
......
...@@ -5,7 +5,7 @@ cc_library( ...@@ -5,7 +5,7 @@ cc_library(
cc_library( cc_library(
workqueue workqueue
SRCS workqueue.cc SRCS workqueue.cc
DEPS workqueue_utils enforce glog phi_os_info) DEPS workqueue_utils enforce glog phi)
cc_test( cc_test(
workqueue_test workqueue_test
SRCS workqueue_test.cc SRCS workqueue_test.cc
......
...@@ -5,7 +5,7 @@ pass_library( ...@@ -5,7 +5,7 @@ pass_library(
cinn_subgraph_detector cinn_subgraph_detector
subgraph_detector subgraph_detector
cinn_compiler cinn_compiler
errors phi
enforce) enforce)
pass_library(cinn_zero_tensor_trick_pass base) pass_library(cinn_zero_tensor_trick_pass base)
...@@ -17,7 +17,7 @@ cc_library( ...@@ -17,7 +17,7 @@ cc_library(
cc_library( cc_library(
transform_type transform_type
SRCS transform_type.cc SRCS transform_type.cc
DEPS errors enforce cinn) DEPS phi enforce cinn)
cc_library( cc_library(
cinn_cache_key cinn_cache_key
SRCS cinn_cache_key.cc SRCS cinn_cache_key.cc
......
...@@ -16,6 +16,7 @@ limitations under the License. */ ...@@ -16,6 +16,7 @@ limitations under the License. */
#include <unordered_map> #include <unordered_map>
#include "paddle/phi/core/enforce.h"
#include "paddle/phi/core/extended_tensor.h" #include "paddle/phi/core/extended_tensor.h"
#include "paddle/utils/any.h" #include "paddle/utils/any.h"
...@@ -52,7 +53,7 @@ class RawTensor : public phi::ExtendedTensor, ...@@ -52,7 +53,7 @@ class RawTensor : public phi::ExtendedTensor,
T& Get() const { T& Get() const {
PADDLE_ENFORCE_EQ(data_.empty(), PADDLE_ENFORCE_EQ(data_.empty(),
false, false,
platform::errors::PreconditionNotMet( phi::errors::PreconditionNotMet(
"The data in RawTensor is empty. Please set data " "The data in RawTensor is empty. Please set data "
"before using it.")); "before using it."));
......
/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/eager/eager_tensor.h"
#include "paddle/fluid/framework/feed_fetch_type.h"
#include "paddle/fluid/framework/raw_tensor.h"
#include "paddle/fluid/framework/string_array.h"
#include "paddle/fluid/prim/utils/static/desc_tensor.h"
namespace phi {
template <>
const TypeInfo<phi::TensorBase>
TypeInfoTraits<phi::TensorBase, paddle::framework::RawTensor>::kType =
RegisterStaticType<phi::TensorBase>(
paddle::framework::RawTensor::name());
template <>
const TypeInfo<phi::TensorBase>
TypeInfoTraits<phi::TensorBase, paddle::framework::Vocab>::kType =
RegisterStaticType<phi::TensorBase>(paddle::framework::Vocab::name());
template <>
const TypeInfo<phi::TensorBase>
TypeInfoTraits<phi::TensorBase, paddle::framework::Strings>::kType =
RegisterStaticType<phi::TensorBase>(paddle::framework::Strings::name());
template <>
const TypeInfo<phi::TensorBase>
TypeInfoTraits<phi::TensorBase, paddle::framework::FeedList>::kType =
RegisterStaticType<phi::TensorBase>(
paddle::framework::FeedList::name());
template <>
const TypeInfo<phi::TensorBase>
TypeInfoTraits<phi::TensorBase, egr::VariableCompatTensor>::kType =
RegisterStaticType<phi::TensorBase>(egr::VariableCompatTensor::name());
template <>
const TypeInfo<phi::TensorBase>
TypeInfoTraits<phi::TensorBase, paddle::prim::DescTensor>::kType =
RegisterStaticType<phi::TensorBase>(paddle::prim::DescTensor::name());
} // namespace phi
cc_library( cc_library(
imperative_flag imperative_flag
SRCS flags.cc SRCS flags.cc
DEPS gflags flags) DEPS phi)
cc_library( cc_library(
var_helper var_helper
SRCS var_helper.cc SRCS var_helper.cc
DEPS tensor selected_rows extended_tensor) DEPS tensor phi)
if(WITH_XPU) if(WITH_XPU)
cc_library( cc_library(
prepared_operator prepared_operator
...@@ -20,8 +20,7 @@ if(WITH_XPU) ...@@ -20,8 +20,7 @@ if(WITH_XPU)
op_kernel_type op_kernel_type
data_transform data_transform
nan_inf_utils nan_inf_utils
scalar phi
int_array
var_helper var_helper
profiler profiler
place) place)
...@@ -38,8 +37,7 @@ else() ...@@ -38,8 +37,7 @@ else()
op_kernel_type op_kernel_type
data_transform data_transform
nan_inf_utils nan_inf_utils
scalar phi
int_array
var_helper var_helper
profiler profiler
place) place)
...@@ -47,14 +45,14 @@ endif() ...@@ -47,14 +45,14 @@ endif()
cc_library( cc_library(
layer layer
SRCS layer.cc SRCS layer.cc
DEPS prepared_operator math_function imperative_flag variable_helper DEPS prepared_operator phi imperative_flag variable_helper op_registry
op_registry var_helper) var_helper)
add_subdirectory(jit) add_subdirectory(jit)
if(WITH_GPU) if(WITH_GPU)
cc_library( cc_library(
layout_autotune layout_autotune
SRCS layout_autotune.cc SRCS layout_autotune.cc
DEPS op_info phi_backends) DEPS op_info phi)
else() else()
cc_library( cc_library(
layout_autotune layout_autotune
...@@ -80,15 +78,15 @@ cc_library( ...@@ -80,15 +78,15 @@ cc_library(
cc_library( cc_library(
basic_engine basic_engine
SRCS basic_engine.cc SRCS basic_engine.cc
DEPS layer gradient_accumulator switch_autotune) DEPS layer gradient_accumulator phi)
cc_library( cc_library(
engine engine
SRCS basic_engine.cc partial_grad_engine.cc SRCS basic_engine.cc partial_grad_engine.cc
DEPS layer gradient_accumulator switch_autotune) DEPS layer gradient_accumulator phi)
cc_library( cc_library(
imperative_profiler imperative_profiler
SRCS profiler.cc SRCS profiler.cc
DEPS flags) DEPS phi)
if(NOT WIN32) if(NOT WIN32)
if(WITH_NCCL OR WITH_RCCL) if(WITH_NCCL OR WITH_RCCL)
cc_library( cc_library(
...@@ -174,12 +172,4 @@ endif() ...@@ -174,12 +172,4 @@ endif()
cc_library( cc_library(
gradient_accumulator gradient_accumulator
SRCS gradient_accumulator.cc SRCS gradient_accumulator.cc
DEPS blas DEPS operator lod_tensor selected_rows_utils var_type_traits layer phi)
operator
lod_tensor
selected_rows_utils
selected_rows_functor
var_type_traits
layer
math_function
phi_tensor)
...@@ -32,14 +32,8 @@ endif() ...@@ -32,14 +32,8 @@ endif()
# fluid_modules exclude API-interface of inference/api and inference/capi_exp # fluid_modules exclude API-interface of inference/api and inference/capi_exp
get_property(fluid_modules GLOBAL PROPERTY FLUID_MODULES) get_property(fluid_modules GLOBAL PROPERTY FLUID_MODULES)
get_property(phi_modules GLOBAL PROPERTY PHI_MODULES)
get_property(phi_kernels GLOBAL PROPERTY PHI_KERNELS)
set(utils_modules pretty_log string_helper benchmark) set(utils_modules pretty_log string_helper benchmark)
if(WITH_CUSTOM_DEVICE)
set(fluid_modules ${fluid_modules} phi_capi)
endif()
add_subdirectory(api) add_subdirectory(api)
# Create static inference library if needed # Create static inference library if needed
...@@ -51,7 +45,6 @@ set(STATIC_INFERENCE_API ...@@ -51,7 +45,6 @@ set(STATIC_INFERENCE_API
reset_tensor_array reset_tensor_array
analysis_config analysis_config
paddle_pass_builder paddle_pass_builder
phi
${mkldnn_quantizer_cfg}) ${mkldnn_quantizer_cfg})
set(OP_LIST set(OP_LIST
...@@ -64,16 +57,14 @@ set(KERNEL_LIST ...@@ -64,16 +57,14 @@ set(KERNEL_LIST
#windows GPU static library over the limit, so not create_static_lib, and cc_library is dummy #windows GPU static library over the limit, so not create_static_lib, and cc_library is dummy
if(WIN32 AND WITH_GPU) if(WIN32 AND WITH_GPU)
cc_library(paddle_inference DEPS ${fluid_modules} phi ${STATIC_INFERENCE_API} cc_library(paddle_inference DEPS ${fluid_modules} ${STATIC_INFERENCE_API}
${utils_modules}) ${utils_modules})
else() else()
# message("${fluid_modules}") # message("${fluid_modules}")
# message("PHI_MODULES ${phi_modules}")
# message("${phi_kernels}")
# message("${STATIC_INFERENCE_API}") # message("${STATIC_INFERENCE_API}")
# message("${utils_modules}") # message("${utils_modules}")
create_static_lib(paddle_inference ${fluid_modules} ${phi_modules} create_static_lib(paddle_inference ${fluid_modules} ${STATIC_INFERENCE_API}
${phi_kernels} ${STATIC_INFERENCE_API} ${utils_modules}) ${utils_modules})
endif() endif()
if(NOT APPLE) if(NOT APPLE)
...@@ -103,7 +94,7 @@ set(SHARED_INFERENCE_SRCS ...@@ -103,7 +94,7 @@ set(SHARED_INFERENCE_SRCS
# shared inference library deps # shared inference library deps
list(REMOVE_ITEM fluid_modules standalone_executor list(REMOVE_ITEM fluid_modules standalone_executor
interpretercore_garbage_collector) interpretercore_garbage_collector)
set(SHARED_INFERENCE_DEPS ${fluid_modules} phi analysis_predictor set(SHARED_INFERENCE_DEPS phi ${fluid_modules} analysis_predictor
${utils_modules}) ${utils_modules})
if(WITH_CRYPTO) if(WITH_CRYPTO)
...@@ -124,12 +115,6 @@ if(WITH_ONNXRUNTIME) ...@@ -124,12 +115,6 @@ if(WITH_ONNXRUNTIME)
${CMAKE_CURRENT_SOURCE_DIR}/api/onnxruntime_predictor.cc) ${CMAKE_CURRENT_SOURCE_DIR}/api/onnxruntime_predictor.cc)
endif() endif()
#export all symbols for paddle/phi/api/include/api.h on paddle_inference_shared, only for UNIX
if(UNIX)
set(SHARED_INFERENCE_DEPS ${SHARED_INFERENCE_DEPS}
$<TARGET_OBJECTS:phi_function_api>)
endif()
# Create shared inference library # Create shared inference library
cc_library( cc_library(
paddle_inference_shared SHARED paddle_inference_shared SHARED
...@@ -141,12 +126,15 @@ target_link_libraries(paddle_inference_shared ${os_dependency_modules}) ...@@ -141,12 +126,15 @@ target_link_libraries(paddle_inference_shared ${os_dependency_modules})
if(WIN32) if(WIN32)
set_property(TARGET paddle_inference_shared set_property(TARGET paddle_inference_shared
PROPERTY WINDOWS_EXPORT_ALL_SYMBOLS ON) PROPERTY WINDOWS_EXPORT_ALL_SYMBOLS ON)
target_link_libraries(paddle_inference_shared gflags) target_link_libraries(paddle_inference_shared phi)
endif() endif()
set_target_properties(paddle_inference_shared PROPERTIES OUTPUT_NAME set_target_properties(paddle_inference_shared PROPERTIES OUTPUT_NAME
paddle_inference) paddle_inference)
if(NOT APPLE AND NOT WIN32) if(NOT APPLE
AND NOT WIN32
AND NOT WITH_TESTING
AND NOT WITH_INFERENCE_API_TEST)
# TODO(liuyiqun): Temporarily disable the link flag because it is not support on Mac. # TODO(liuyiqun): Temporarily disable the link flag because it is not support on Mac.
set(LINK_FLAGS set(LINK_FLAGS
"-Wl,--version-script ${CMAKE_CURRENT_SOURCE_DIR}/paddle_inference.map") "-Wl,--version-script ${CMAKE_CURRENT_SOURCE_DIR}/paddle_inference.map")
......
...@@ -41,7 +41,7 @@ if(WITH_CRYPTO) ...@@ -41,7 +41,7 @@ if(WITH_CRYPTO)
list(APPEND paddle_inference_api_deps paddle_crypto) list(APPEND paddle_inference_api_deps paddle_crypto)
endif() endif()
if(WITH_CUSTOM_DEVICE) if(WITH_CUSTOM_DEVICE)
set(paddle_inference_api_deps ${paddle_inference_api_deps} phi_capi) set(paddle_inference_api_deps ${paddle_inference_api_deps} phi)
endif() endif()
cc_library( cc_library(
...@@ -50,7 +50,7 @@ cc_library( ...@@ -50,7 +50,7 @@ cc_library(
DEPS ${paddle_inference_api_deps}) DEPS ${paddle_inference_api_deps})
if(WIN32) if(WIN32)
target_link_libraries(paddle_inference_api gflags) target_link_libraries(paddle_inference_api phi)
endif() endif()
set(inference_deps ${analysis_deps} paddle_inference_api analysis set(inference_deps ${analysis_deps} paddle_inference_api analysis
......
...@@ -72,7 +72,7 @@ ...@@ -72,7 +72,7 @@
#endif #endif
#ifdef PADDLE_WITH_MKLML #ifdef PADDLE_WITH_MKLML
#include "paddle/fluid/platform/dynload/mklml.h" #include "paddle/phi/backends/dynload/mklml.h"
#endif #endif
#ifdef PADDLE_WITH_MKLDNN #ifdef PADDLE_WITH_MKLDNN
...@@ -1121,7 +1121,7 @@ bool AnalysisPredictor::Run(const std::vector<PaddleTensor> &inputs, ...@@ -1121,7 +1121,7 @@ bool AnalysisPredictor::Run(const std::vector<PaddleTensor> &inputs,
// Frees unused memory allocated by the Intel® MKL Memory Allocator to // Frees unused memory allocated by the Intel® MKL Memory Allocator to
// avoid memory leak. See: // avoid memory leak. See:
// https://software.intel.com/en-us/mkl-developer-reference-c-mkl-free-buffers // https://software.intel.com/en-us/mkl-developer-reference-c-mkl-free-buffers
platform::dynload::MKL_Free_Buffers(); phi::dynload::MKL_Free_Buffers();
#endif #endif
return true; return true;
} }
...@@ -1185,7 +1185,7 @@ bool AnalysisPredictor::Run(const std::vector<paddle::Tensor> &inputs, ...@@ -1185,7 +1185,7 @@ bool AnalysisPredictor::Run(const std::vector<paddle::Tensor> &inputs,
// Frees unused memory allocated by the Intel® MKL Memory Allocator to // Frees unused memory allocated by the Intel® MKL Memory Allocator to
// avoid memory leak. See: // avoid memory leak. See:
// https://software.intel.com/en-us/mkl-developer-reference-c-mkl-free-buffers // https://software.intel.com/en-us/mkl-developer-reference-c-mkl-free-buffers
platform::dynload::MKL_Free_Buffers(); phi::dynload::MKL_Free_Buffers();
#endif #endif
return true; return true;
} }
...@@ -2100,7 +2100,7 @@ bool AnalysisPredictor::ZeroCopyRun() { ...@@ -2100,7 +2100,7 @@ bool AnalysisPredictor::ZeroCopyRun() {
// Frees unused memory allocated by the Intel® MKL Memory Allocator to // Frees unused memory allocated by the Intel® MKL Memory Allocator to
// avoid memory leak. See: // avoid memory leak. See:
// https://software.intel.com/en-us/mkl-developer-reference-c-mkl-free-buffers // https://software.intel.com/en-us/mkl-developer-reference-c-mkl-free-buffers
platform::dynload::MKL_Free_Buffers(); phi::dynload::MKL_Free_Buffers();
#endif #endif
return true; return true;
} }
......
...@@ -199,7 +199,7 @@ if(NOT WIN32) ...@@ -199,7 +199,7 @@ if(NOT WIN32)
${MATH_LIB} ${MATH_LIB}
${MKLDNN_LIB} ${MKLDNN_LIB}
glog glog
gflags phi
protobuf protobuf
xxhash xxhash
cryptopp cryptopp
......
...@@ -29,6 +29,7 @@ WIN_DETECT=$(echo `uname` | grep "Win") # detect current platform ...@@ -29,6 +29,7 @@ WIN_DETECT=$(echo `uname` | grep "Win") # detect current platform
cd `dirname $0` cd `dirname $0`
current_dir=`pwd` current_dir=`pwd`
if [ $2 == ON ]; then if [ $2 == ON ]; then
# You can export yourself if move the install path # You can export yourself if move the install path
MKL_LIB=${inference_install_dir}/third_party/install/mklml/lib MKL_LIB=${inference_install_dir}/third_party/install/mklml/lib
......
...@@ -25,7 +25,7 @@ if(WITH_ONNXRUNTIME) ...@@ -25,7 +25,7 @@ if(WITH_ONNXRUNTIME)
cc_library( cc_library(
zero_copy_tensor_dummy zero_copy_tensor_dummy
SRCS zero_copy_tensor_dummy.cc SRCS zero_copy_tensor_dummy.cc
DEPS onnxruntime phi_enforce) DEPS onnxruntime phi)
else() else()
cc_library( cc_library(
zero_copy_tensor zero_copy_tensor
...@@ -34,7 +34,7 @@ else() ...@@ -34,7 +34,7 @@ else()
cc_library( cc_library(
zero_copy_tensor_dummy zero_copy_tensor_dummy
SRCS zero_copy_tensor_dummy.cc SRCS zero_copy_tensor_dummy.cc
DEPS phi_enforce) DEPS phi)
endif() endif()
cc_test( cc_test(
......
...@@ -39,7 +39,7 @@ if(APPLE) ...@@ -39,7 +39,7 @@ if(APPLE)
utf8proc utf8proc
cryptopp cryptopp
protobuf protobuf
gflags phi
cblas) cblas)
endif() endif()
......
...@@ -23,7 +23,7 @@ fi ...@@ -23,7 +23,7 @@ fi
# 2. set LD_LIBRARY_PATH # 2. set LD_LIBRARY_PATH
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$PWD/paddle_inference_c/third_party/install/mklml/lib/:$PWD/paddle_inference_c/third_party/install/mkldnn/lib/:$PWD/paddle_inference_c/paddle/lib/ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$PWD/paddle_inference_c/third_party/install/mklml/lib/:$PWD/paddle_inference_c/third_party/install/mkldnn/lib/:$PWD/paddle_inference_c/paddle/lib/
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$PWD/paddle_inference_c/third_party/install/onnxruntime/lib/:$PWD/paddle_inference_c/third_party/install/paddle2onnx/lib/ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$PWD/paddle_inference_c/third_party/install/onnxruntime/lib/:$PWD/paddle_inference_c/third_party/install/paddle2onnx/lib/
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:${PADDLE_ROOT}/build/paddle/phi/
# 3. go test # 3. go test
go clean -testcache go clean -testcache
go test -v ./... go test -v ./...
...@@ -141,8 +141,7 @@ nv_test( ...@@ -141,8 +141,7 @@ nv_test(
nv_test( nv_test(
test_custom_plugin_creater test_custom_plugin_creater
SRCS test_custom_plugin_creater.cc SRCS test_custom_plugin_creater.cc
DEPS paddle_framework tensorrt_converter op_meta_info custom_operator DEPS paddle_framework tensorrt_converter phi custom_operator init_phi)
init_phi)
if(WITH_ONNXRUNTIME AND WIN32) if(WITH_ONNXRUNTIME AND WIN32)
# Copy onnxruntime for some c++ test in Windows, since the test will # Copy onnxruntime for some c++ test in Windows, since the test will
......
include(ExternalProject) include(ExternalProject)
set(ALLOCATOR_DEPS place stats profiler phi_backends device_context) set(ALLOCATOR_DEPS place stats profiler phi device_context)
set(ALLOCATOR_SRCS set(ALLOCATOR_SRCS
allocator.cc allocator.cc
cpu_allocator.cc cpu_allocator.cc
...@@ -32,7 +32,7 @@ if(WITH_GPU OR WITH_ROCM) ...@@ -32,7 +32,7 @@ if(WITH_GPU OR WITH_ROCM)
endif() endif()
if(WITH_GPU) if(WITH_GPU)
list(APPEND ALLOCATOR_DEPS phi_backends) list(APPEND ALLOCATOR_DEPS phi)
endif() endif()
if(CUDA_VERSION VERSION_GREATER_EQUAL 10.2) if(CUDA_VERSION VERSION_GREATER_EQUAL 10.2)
......
...@@ -124,7 +124,7 @@ class CUDAGraphAllocator ...@@ -124,7 +124,7 @@ class CUDAGraphAllocator
: underlying_allocator_(allocator) {} : underlying_allocator_(allocator) {}
public: public:
~CUDAGraphAllocator() { VLOG(10) << "CUDAGraphAllocator destructed"; } ~CUDAGraphAllocator() {}
static std::shared_ptr<Allocator> Create( static std::shared_ptr<Allocator> Create(
const std::shared_ptr<Allocator>& allocator) { const std::shared_ptr<Allocator>& allocator) {
...@@ -1137,7 +1137,6 @@ void AllocatorFacade::RemoveMemoryPoolOfCUDAGraph(int64_t id) { ...@@ -1137,7 +1137,6 @@ void AllocatorFacade::RemoveMemoryPoolOfCUDAGraph(int64_t id) {
if (ref_cnt == 0) { if (ref_cnt == 0) {
cuda_graph_map_.erase(id); cuda_graph_map_.erase(id);
cuda_graph_ref_cnt_.erase(ref_cnt_iter); cuda_graph_ref_cnt_.erase(ref_cnt_iter);
VLOG(10) << "Remove memory pool of CUDA Graph with memory ID " << id;
} else { } else {
VLOG(10) << "Decrease memory pool ID " << id << " reference count to be " VLOG(10) << "Decrease memory pool ID " << id << " reference count to be "
<< ref_cnt; << ref_cnt;
......
...@@ -90,7 +90,7 @@ if(WITH_UNITY_BUILD) ...@@ -90,7 +90,7 @@ if(WITH_UNITY_BUILD)
include(unity_build_rule.cmake) include(unity_build_rule.cmake)
endif() endif()
set(OP_HEADER_DEPS ${OP_HEADER_DEPS} phi phi_utils backward_infermeta sparse_backward_infermeta static_prim_api get_expected_kernel_func) set(OP_HEADER_DEPS ${OP_HEADER_DEPS} phi phi_utils static_prim_api get_expected_kernel_func)
register_operators(EXCLUDES py_func_op dgc_op generated_op1 generated_op2 generated_op3 generated_op4 load_combine_op lstm_op run_program_op quantize_linear_op register_operators(EXCLUDES py_func_op dgc_op generated_op1 generated_op2 generated_op3 generated_op4 load_combine_op lstm_op run_program_op quantize_linear_op
recurrent_op save_combine_op sparse_attention_op sync_batch_norm_op activation_op ${OP_MKL_DEPS} DEPS ${OP_HEADER_DEPS}) recurrent_op save_combine_op sparse_attention_op sync_batch_norm_op activation_op ${OP_MKL_DEPS} DEPS ${OP_HEADER_DEPS})
...@@ -125,7 +125,7 @@ if (WITH_GPU OR WITH_ROCM) ...@@ -125,7 +125,7 @@ if (WITH_GPU OR WITH_ROCM)
endif() endif()
endif() endif()
op_library(lstm_op DEPS ${OP_HEADER_DEPS} lstm_compute) op_library(lstm_op DEPS ${OP_HEADER_DEPS})
op_library(recurrent_op DEPS ${OP_HEADER_DEPS}) op_library(recurrent_op DEPS ${OP_HEADER_DEPS})
set(COMMON_OP_DEPS ${OP_HEADER_DEPS}) set(COMMON_OP_DEPS ${OP_HEADER_DEPS})
...@@ -136,17 +136,16 @@ if (WITH_DGC) ...@@ -136,17 +136,16 @@ if (WITH_DGC)
endif() endif()
cc_library(common_infer_shape_functions SRCS common_infer_shape_functions.cc DEPS operator) cc_library(common_infer_shape_functions SRCS common_infer_shape_functions.cc DEPS operator)
cc_library(ops_extra_info SRCS ops_extra_info.cc DEPS attribute cudnn_workspace_helper) cc_library(ops_extra_info SRCS ops_extra_info.cc DEPS attribute phi)
set(COMMON_OP_DEPS ${COMMON_OP_DEPS} selected_rows_functor selected_rows_utils lapack_function set(COMMON_OP_DEPS ${COMMON_OP_DEPS} phi)
lod_tensor maxouting unpooling pooling lod_rank_table context_project set(COMMON_OP_DEPS ${COMMON_OP_DEPS} selected_rows_utils
sequence_pooling executor generator static_prim_api) lod_tensor unpooling lod_rank_table context_project executor static_prim_api)
set(COMMON_OP_DEPS ${COMMON_OP_DEPS} dynload_warpctc static_prim_api static_utils static_global_utils prim_utils) set(COMMON_OP_DEPS ${COMMON_OP_DEPS} dynload_warpctc static_prim_api static_utils static_global_utils prim_utils)
set(COMMON_OP_DEPS ${COMMON_OP_DEPS} sequence_padding sequence_scale cos_sim_functor memory jit_kernel_helper concat_and_split cross_entropy softmax vol2col im2col sampler sample_prob tree2col) set(COMMON_OP_DEPS ${COMMON_OP_DEPS} cos_sim_functor memory concat_and_split sampler sample_prob tree2col)
set(COMMON_OP_DEPS ${COMMON_OP_DEPS} sequence2batch lstm_compute matrix_bit_code gru_compute activation_functions beam_search fc_functor matrix_inverse matrix_solve) set(COMMON_OP_DEPS ${COMMON_OP_DEPS} beam_search)
set(COMMON_OP_DEPS ${COMMON_OP_DEPS} box_wrapper ps_gpu_wrapper) set(COMMON_OP_DEPS ${COMMON_OP_DEPS} box_wrapper ps_gpu_wrapper)
set(COMMON_OP_DEPS ${COMMON_OP_DEPS} common_infer_shape_functions) set(COMMON_OP_DEPS ${COMMON_OP_DEPS} common_infer_shape_functions)
set(COMMON_OP_DEPS ${COMMON_OP_DEPS} eigen_function)
set(COMMON_OP_DEPS ${COMMON_OP_DEPS} processgroup_comm_utils) set(COMMON_OP_DEPS ${COMMON_OP_DEPS} processgroup_comm_utils)
if(WITH_NCCL OR WITH_RCCL) if(WITH_NCCL OR WITH_RCCL)
set(COMMON_OP_DEPS ${COMMON_OP_DEPS} process_group_nccl) set(COMMON_OP_DEPS ${COMMON_OP_DEPS} process_group_nccl)
...@@ -189,7 +188,7 @@ endif() ...@@ -189,7 +188,7 @@ endif()
copy_if_different(${pybind_file} ${pybind_file_final}) copy_if_different(${pybind_file} ${pybind_file_final})
if (WITH_CUSTOM_DEVICE) if (WITH_CUSTOM_DEVICE)
cc_library(custom_device_common_op_registry SRCS custom_device_common_op_registry.cc DEPS operator phi_api) cc_library(custom_device_common_op_registry SRCS custom_device_common_op_registry.cc DEPS operator phi type_info)
endif() endif()
if(NOT "${OP_LIST}" STREQUAL "") if(NOT "${OP_LIST}" STREQUAL "")
......
...@@ -7,7 +7,7 @@ cc_library( ...@@ -7,7 +7,7 @@ cc_library(
cc_library( cc_library(
cinn_launch_context cinn_launch_context
SRCS cinn_launch_context.cc SRCS cinn_launch_context.cc
DEPS ddim DEPS phi
lod_tensor lod_tensor
scope scope
proto_desc proto_desc
......
...@@ -18,7 +18,7 @@ foreach(src ${OPS}) ...@@ -18,7 +18,7 @@ foreach(src ${OPS})
endforeach() endforeach()
if(WITH_GLOO) if(WITH_GLOO)
set(COLLECTIVE_DEPS ${COLLECTIVE_DEPS} gloo_wrapper comm_context_manager) set(COLLECTIVE_DEPS ${COLLECTIVE_DEPS} gloo_wrapper phi)
endif() endif()
register_operators( register_operators(
...@@ -31,8 +31,7 @@ register_operators( ...@@ -31,8 +31,7 @@ register_operators(
${COLLECTIVE_DEPS}) ${COLLECTIVE_DEPS})
if(WITH_NCCL OR WITH_RCCL) if(WITH_NCCL OR WITH_RCCL)
set(COLLECTIVE_DEPS ${COLLECTIVE_DEPS} nccl_common collective_helper set(COLLECTIVE_DEPS ${COLLECTIVE_DEPS} nccl_common collective_helper phi)
comm_context_manager nccl_comm_context)
op_library(c_gen_nccl_id_op DEPS ${COLLECTIVE_DEPS}) op_library(c_gen_nccl_id_op DEPS ${COLLECTIVE_DEPS})
op_library(gen_nccl_id_op DEPS ${COLLECTIVE_DEPS}) op_library(gen_nccl_id_op DEPS ${COLLECTIVE_DEPS})
endif() endif()
......
...@@ -51,8 +51,8 @@ detection_library(polygon_box_transform_op SRCS polygon_box_transform_op.cc ...@@ -51,8 +51,8 @@ detection_library(polygon_box_transform_op SRCS polygon_box_transform_op.cc
detection_library(rpn_target_assign_op SRCS rpn_target_assign_op.cc) detection_library(rpn_target_assign_op SRCS rpn_target_assign_op.cc)
detection_library(generate_proposal_labels_op SRCS detection_library(generate_proposal_labels_op SRCS
generate_proposal_labels_op.cc) generate_proposal_labels_op.cc)
detection_library(multiclass_nms_op SRCS multiclass_nms_op.cc DEPS gpc) detection_library(multiclass_nms_op SRCS multiclass_nms_op.cc DEPS phi)
detection_library(locality_aware_nms_op SRCS locality_aware_nms_op.cc DEPS gpc) detection_library(locality_aware_nms_op SRCS locality_aware_nms_op.cc DEPS phi)
detection_library(box_clip_op SRCS box_clip_op.cc box_clip_op.cu) detection_library(box_clip_op SRCS box_clip_op.cc box_clip_op.cu)
detection_library(box_decoder_and_assign_op SRCS box_decoder_and_assign_op.cc detection_library(box_decoder_and_assign_op SRCS box_decoder_and_assign_op.cc
box_decoder_and_assign_op.cu) box_decoder_and_assign_op.cu)
......
...@@ -289,7 +289,7 @@ file(APPEND ${op_utils_header} ...@@ -289,7 +289,7 @@ file(APPEND ${op_utils_header}
# Automatically generate the registration code of all arg map functions # Automatically generate the registration code of all arg map functions
# and compile the corresponding target to avoid frequent code conflicts # and compile the corresponding target to avoid frequent code conflicts
# when writing to same file # when writing to same file
register_op_utils(op_compat_infos DEPS op_utils) register_op_utils(op_compat_infos DEPS phi)
copy_if_different(${op_utils_header} ${op_utils_header_final}) copy_if_different(${op_utils_header} ${op_utils_header_final})
......
...@@ -17,11 +17,12 @@ limitations under the License. */ ...@@ -17,11 +17,12 @@ limitations under the License. */
#include <memory> #include <memory>
#include <string> #include <string>
#include "paddle/phi/core/flags.h"
#include "paddle/phi/kernels/funcs/blas/blas.h" #include "paddle/phi/kernels/funcs/blas/blas.h"
#include "paddle/phi/kernels/funcs/detail/gru_cpu_kernel.h" #include "paddle/phi/kernels/funcs/detail/gru_cpu_kernel.h"
#include "paddle/phi/kernels/funcs/detail/gru_kernel.h" #include "paddle/phi/kernels/funcs/detail/gru_kernel.h"
DECLARE_int32(paddle_num_threads); PHI_DECLARE_int32(paddle_num_threads);
namespace paddle { namespace paddle {
namespace operators { namespace operators {
......
...@@ -6,21 +6,20 @@ if(WITH_XPU) ...@@ -6,21 +6,20 @@ if(WITH_XPU)
endif() endif()
# please add new math_library in alphabetical order # please add new math_library in alphabetical order
math_library(concat_and_split DEPS concat_and_split_functor) math_library(concat_and_split DEPS phi)
math_library(context_project DEPS im2col math_function) math_library(context_project DEPS phi)
math_library(cos_sim_functor) math_library(cos_sim_functor)
math_library(depthwise_conv) math_library(depthwise_conv)
math_library(sample_prob) math_library(sample_prob)
math_library(sampler DEPS generator) math_library(sampler DEPS phi)
# math_library(math_function DEPS blas dense_tensor tensor)
if(WITH_XPU) if(WITH_XPU)
math_library(beam_search DEPS math_function beam_search_xpu) math_library(beam_search DEPS phi beam_search_xpu)
else() else()
math_library(beam_search DEPS math_function) math_library(beam_search DEPS phi)
endif() endif()
math_library(unpooling) math_library(unpooling)
math_library(prelu) math_library(prelu)
math_library(bert_encoder_functor) math_library(bert_encoder_functor)
math_library(tree2col DEPS math_function) math_library(tree2col DEPS phi)
...@@ -20,7 +20,7 @@ if(WITH_ARM_BRPC) ...@@ -20,7 +20,7 @@ if(WITH_ARM_BRPC)
framework_proto framework_proto
sendrecv_rpc sendrecv_rpc
arm_brpc arm_brpc
gflags phi
glog glog
snappy snappy
device_context) device_context)
...@@ -42,7 +42,7 @@ else() ...@@ -42,7 +42,7 @@ else()
ssl ssl
crypto crypto
protobuf protobuf
gflags phi
glog glog
zlib zlib
snappy snappy
......
...@@ -6,5 +6,5 @@ endif() ...@@ -6,5 +6,5 @@ endif()
register_operators() register_operators()
if(WITH_UNITY_BUILD) if(WITH_UNITY_BUILD)
target_link_libraries(paddle_operators_sequence_ops_unity sequence_pooling) target_link_libraries(paddle_operators_sequence_ops_unity phi)
endif() endif()
...@@ -17,7 +17,7 @@ limitations under the License. */ ...@@ -17,7 +17,7 @@ limitations under the License. */
#include <memory> #include <memory>
#include <vector> #include <vector>
#include "paddle/fluid/platform/dynload/mklml.h" #include "paddle/phi/backends/dynload/mklml.h"
#include "paddle/phi/kernels/funcs/blas/blas.h" #include "paddle/phi/kernels/funcs/blas/blas.h"
#include "paddle/phi/kernels/funcs/math_function.h" #include "paddle/phi/kernels/funcs/math_function.h"
......
...@@ -6,9 +6,9 @@ cc_library( ...@@ -6,9 +6,9 @@ cc_library(
cc_test( cc_test(
errors_test errors_test
SRCS errors_test.cc SRCS errors_test.cc
DEPS errors enforce) DEPS phi enforce)
set(enforce_deps flags errors flags phi_enforce) set(enforce_deps phi)
if(WITH_GPU) if(WITH_GPU)
set(enforce_deps ${enforce_deps} external_error_proto) set(enforce_deps ${enforce_deps} external_error_proto)
endif() endif()
...@@ -26,20 +26,20 @@ cc_test( ...@@ -26,20 +26,20 @@ cc_test(
cc_test( cc_test(
cpu_info_test cpu_info_test
SRCS cpu_info_test.cc SRCS cpu_info_test.cc
DEPS phi_backends) DEPS phi)
cc_test( cc_test(
os_info_test os_info_test
SRCS os_info_test.cc SRCS os_info_test.cc
DEPS phi_os_info) DEPS phi)
cc_library( cc_library(
place place
SRCS place.cc SRCS place.cc
DEPS enforce phi_place) DEPS enforce phi)
cc_test( cc_test(
place_test place_test
SRCS place_test.cc SRCS place_test.cc
DEPS place glog gflags) DEPS place glog phi)
if(WITH_MKLDNN) if(WITH_MKLDNN)
set(MKLDNN_CTX_DEPS mkldnn) set(MKLDNN_CTX_DEPS mkldnn)
...@@ -104,7 +104,7 @@ endif() ...@@ -104,7 +104,7 @@ endif()
cc_library( cc_library(
init init
SRCS init.cc SRCS init.cc
DEPS device_context custom_kernel context_pool memcpy) DEPS device_context phi memcpy)
# memcpy depends on device_context, here add deps individually for # memcpy depends on device_context, here add deps individually for
# avoiding cycle dependencies # avoiding cycle dependencies
...@@ -117,7 +117,6 @@ cc_library( ...@@ -117,7 +117,6 @@ cc_library(
xxhash xxhash
${STREAM_CALLBACK_DEPS} ${STREAM_CALLBACK_DEPS}
place place
phi_place
eigen3 eigen3
cpu_helper cpu_helper
framework_proto framework_proto
...@@ -126,12 +125,8 @@ cc_library( ...@@ -126,12 +125,8 @@ cc_library(
${MKLDNN_CTX_DEPS} ${MKLDNN_CTX_DEPS}
${dgc_deps} ${dgc_deps}
dlpack dlpack
cudnn_workspace_helper phi
${XPU_CTX_DEPS} ${XPU_CTX_DEPS})
phi_backends
phi_device_context
generator
phi_enforce)
cc_library( cc_library(
collective_helper collective_helper
...@@ -189,12 +184,12 @@ if(WITH_GPU) ...@@ -189,12 +184,12 @@ if(WITH_GPU)
cuda_graph_with_memory_pool cuda_graph_with_memory_pool
SRCS cuda_graph_with_memory_pool.cc SRCS cuda_graph_with_memory_pool.cc
DEPS ${DEVICE_EVENT_LIBS} device_event_custom_device device_context DEPS ${DEVICE_EVENT_LIBS} device_event_custom_device device_context
allocator phi_backends) allocator phi)
else() else()
nv_library( nv_library(
cuda_graph_with_memory_pool cuda_graph_with_memory_pool
SRCS cuda_graph_with_memory_pool.cc SRCS cuda_graph_with_memory_pool.cc
DEPS ${DEVICE_EVENT_LIBS} device_context allocator phi_backends) DEPS ${DEVICE_EVENT_LIBS} device_context allocator phi)
endif() endif()
nv_test( nv_test(
device_context_test device_context_test
...@@ -245,7 +240,7 @@ cc_test( ...@@ -245,7 +240,7 @@ cc_test(
cc_library( cc_library(
lodtensor_printer lodtensor_printer
SRCS lodtensor_printer.cc SRCS lodtensor_printer.cc
DEPS ddim DEPS phi
place place
tensor tensor
scope scope
...@@ -263,41 +258,30 @@ if(WITH_GPU) ...@@ -263,41 +258,30 @@ if(WITH_GPU)
nv_library( nv_library(
profiler profiler
SRCS profiler.cc profiler.cu SRCS profiler.cc profiler.cu
DEPS phi_os_info DEPS phi
phi_device_tracer
gpu_info gpu_info
enforce enforce
dynload_cuda dynload_cuda
new_profiler new_profiler
stats stats
op_proto_maker op_proto_maker
shape_inference shape_inference)
phi_profiler)
elseif(WITH_ROCM) elseif(WITH_ROCM)
hip_library( hip_library(
profiler profiler
SRCS profiler.cc profiler.cu SRCS profiler.cc profiler.cu
DEPS phi_os_info DEPS phi
phi_device_tracer
gpu_info gpu_info
enforce enforce
new_profiler new_profiler
stats stats
op_proto_maker op_proto_maker
shape_inference shape_inference)
phi_profiler)
else() else()
cc_library( cc_library(
profiler profiler
SRCS profiler.cc SRCS profiler.cc
DEPS phi_os_info DEPS phi enforce new_profiler stats op_proto_maker shape_inference)
phi_device_tracer
enforce
new_profiler
stats
op_proto_maker
shape_inference
phi_profiler)
endif() endif()
cc_test( cc_test(
...@@ -333,7 +317,7 @@ if(WITH_GPU) ...@@ -333,7 +317,7 @@ if(WITH_GPU)
nv_test( nv_test(
test_limit_gpu_memory test_limit_gpu_memory
SRCS test_limit_gpu_memory.cu SRCS test_limit_gpu_memory.cu
DEPS gpu_info flags) DEPS gpu_info phi)
nv_library( nv_library(
cuda_device_guard cuda_device_guard
SRCS cuda_device_guard.cc SRCS cuda_device_guard.cc
...@@ -348,7 +332,7 @@ if(WITH_ROCM) ...@@ -348,7 +332,7 @@ if(WITH_ROCM)
hip_test( hip_test(
test_limit_gpu_memory test_limit_gpu_memory
SRCS test_limit_gpu_memory.cu SRCS test_limit_gpu_memory.cu
DEPS gpu_info flags) DEPS gpu_info phi)
hip_library( hip_library(
cuda_device_guard cuda_device_guard
SRCS cuda_device_guard.cc SRCS cuda_device_guard.cc
...@@ -360,7 +344,7 @@ if(NOT APPLE AND NOT WIN32) ...@@ -360,7 +344,7 @@ if(NOT APPLE AND NOT WIN32)
cc_test( cc_test(
device_code_test device_code_test
SRCS device_code_test.cc SRCS device_code_test.cc
DEPS phi_backends lod_tensor) DEPS phi lod_tensor)
endif() endif()
endif() endif()
...@@ -382,4 +366,4 @@ cc_library( ...@@ -382,4 +366,4 @@ cc_library(
cc_test( cc_test(
init_phi_test init_phi_test
SRCS init_phi_test.cc SRCS init_phi_test.cc
DEPS phi_tensor init_phi) DEPS phi init_phi)
...@@ -17,7 +17,7 @@ limitations under the License. */ ...@@ -17,7 +17,7 @@ limitations under the License. */
#ifdef PADDLE_WITH_MKLML #ifdef PADDLE_WITH_MKLML
#include <omp.h> #include <omp.h>
#include "paddle/fluid/platform/dynload/mklml.h" #include "paddle/phi/backends/dynload/mklml.h"
#endif #endif
#ifdef PADDLE_USE_OPENBLAS #ifdef PADDLE_USE_OPENBLAS
...@@ -40,7 +40,7 @@ void SetNumThreads(int num_threads) { ...@@ -40,7 +40,7 @@ void SetNumThreads(int num_threads) {
openblas_set_num_threads(real_num_threads); openblas_set_num_threads(real_num_threads);
#elif defined(PADDLE_WITH_MKLML) #elif defined(PADDLE_WITH_MKLML)
int real_num_threads = num_threads > 1 ? num_threads : 1; int real_num_threads = num_threads > 1 ? num_threads : 1;
platform::dynload::MKL_Set_Num_Threads(real_num_threads); phi::dynload::MKL_Set_Num_Threads(real_num_threads);
omp_set_num_threads(real_num_threads); omp_set_num_threads(real_num_threads);
#elif defined(PADDLE_USE_REFERENCE_CBLAS) #elif defined(PADDLE_USE_REFERENCE_CBLAS)
// cblas not support multi-thread // cblas not support multi-thread
......
...@@ -2,9 +2,9 @@ if(WITH_CUSTOM_DEVICE) ...@@ -2,9 +2,9 @@ if(WITH_CUSTOM_DEVICE)
cc_library( cc_library(
custom_device_resource_pool custom_device_resource_pool
SRCS custom_device_resource_pool.cc SRCS custom_device_resource_pool.cc
DEPS gflags glog enforce monitor) DEPS phi glog enforce monitor)
cc_test( cc_test(
custom_device_test custom_device_test
SRCS custom_device_test.cc SRCS custom_device_test.cc
DEPS phi_tensor_utils phi_backends phi_device_context gradient_accumulator) DEPS phi gradient_accumulator)
endif() endif()
...@@ -3,13 +3,7 @@ if(WITH_GPU) ...@@ -3,13 +3,7 @@ if(WITH_GPU)
nv_library( nv_library(
gpu_info gpu_info
SRCS gpu_info.cc SRCS gpu_info.cc
DEPS phi_backends DEPS phi glog enforce monitor dynload_cuda malloc)
gflags
glog
enforce
monitor
dynload_cuda
malloc)
nv_test(cuda_helper_test SRCS cuda_helper_test.cu) nv_test(cuda_helper_test SRCS cuda_helper_test.cu)
nv_test( nv_test(
...@@ -21,7 +15,7 @@ elseif(WITH_ROCM) ...@@ -21,7 +15,7 @@ elseif(WITH_ROCM)
hip_library( hip_library(
gpu_info gpu_info
SRCS gpu_info.cc SRCS gpu_info.cc
DEPS phi_backends gflags glog enforce monitor dynload_cuda) DEPS phi glog enforce monitor dynload_cuda)
hip_test(cuda_helper_test SRCS cuda_helper_test.cu) hip_test(cuda_helper_test SRCS cuda_helper_test.cu)
hip_test( hip_test(
......
...@@ -14,23 +14,11 @@ set(XPU_CTX_DEPS ...@@ -14,23 +14,11 @@ set(XPU_CTX_DEPS
cc_library( cc_library(
xpu_info xpu_info
SRCS xpu_info.cc SRCS xpu_info.cc
DEPS gflags DEPS glog enforce xpulib device_context place phi)
glog
enforce
xpulib
device_context
place
phi_backends)
cc_library( cc_library(
xpu_op_list xpu_op_list
SRCS xpu_op_list.cc SRCS xpu_op_list.cc
DEPS gflags DEPS glog enforce xpulib device_context op_kernel_type phi)
glog
enforce
xpulib
device_context
op_kernel_type
phi_backends)
cc_library( cc_library(
xpu_resource_pool xpu_resource_pool
SRCS xpu_resource_pool.cc SRCS xpu_resource_pool.cc
......
cc_library( cc_library(
dynamic_loader dynamic_loader
SRCS dynamic_loader.cc SRCS dynamic_loader.cc
DEPS glog gflags enforce phi_dynamic_loader) DEPS glog enforce phi)
list( list(
APPEND APPEND
...@@ -57,26 +57,20 @@ if(WITH_ROCM) ...@@ -57,26 +57,20 @@ if(WITH_ROCM)
hip_library( hip_library(
dynload_cuda dynload_cuda
SRCS ${HIP_SRCS} SRCS ${HIP_SRCS}
DEPS dynamic_loader phi_dynload_cuda) DEPS dynamic_loader phi)
cc_library( cc_library(
dynload_warpctc dynload_warpctc
SRCS warpctc.cc SRCS warpctc.cc
DEPS dynamic_loader warpctc phi_dynload_warpctc) DEPS dynamic_loader warpctc phi)
else() else()
nv_library( nv_library(
dynload_cuda dynload_cuda
SRCS ${CUDA_SRCS} SRCS ${CUDA_SRCS}
DEPS dynamic_loader phi_dynload_cuda) DEPS dynamic_loader phi)
cc_library( cc_library(
dynload_warpctc dynload_warpctc
SRCS warpctc.cc SRCS warpctc.cc
DEPS dynamic_loader warpctc phi_dynload_warpctc) DEPS dynamic_loader warpctc phi)
endif()
if(WITH_MKLML)
cc_library(
dynload_mklml
SRCS mklml.cc
DEPS dynamic_loader mklml phi_dynload_mklml)
endif() endif()
# TODO(TJ): add iomp, mkldnn? # TODO(TJ): add iomp, mkldnn?
...@@ -86,6 +80,6 @@ if(MKL_FOUND AND WITH_ONEMKL) ...@@ -86,6 +80,6 @@ if(MKL_FOUND AND WITH_ONEMKL)
cc_library( cc_library(
dynload_mklrt dynload_mklrt
SRCS mklrt.cc SRCS mklrt.cc
DEPS dynamic_loader phi_dynload_mklrt) DEPS dynamic_loader phi)
target_include_directories(dynload_mklrt PRIVATE ${MKL_INCLUDE}) target_include_directories(dynload_mklrt PRIVATE ${MKL_INCLUDE})
endif() endif()
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <mkl.h>
#include <mutex> // NOLINT
#include "paddle/phi/backends/dynload/mklml.h"
namespace paddle {
namespace platform {
namespace dynload {
/**
* The following macro definition can generate structs
* (for each function) to dynamic load mklml routine
* via operator overloading.
*/
#define DYNAMIC_LOAD_MKLML_WRAP(__name) \
using DynLoad__##__name = phi::dynload::DynLoad__##__name; \
extern DynLoad__##__name __name
#define PLATFORM_DECLARE_DYNAMIC_LOAD_MKLML_WRAP(__name) \
DYNAMIC_LOAD_MKLML_WRAP(__name)
#define MKLML_ROUTINE_EACH(__macro) \
__macro(cblas_sgemm); \
__macro(cblas_dgemm); \
__macro(cblas_cgemm); \
__macro(cblas_zgemm); \
__macro(cblas_saxpy); \
__macro(cblas_daxpy); \
__macro(cblas_caxpy); \
__macro(cblas_zaxpy); \
__macro(cblas_scopy); \
__macro(cblas_dcopy); \
__macro(cblas_ccopy); \
__macro(cblas_zcopy); \
__macro(cblas_sgemv); \
__macro(cblas_dgemv); \
__macro(cblas_cgemv); \
__macro(cblas_zgemv); \
__macro(cblas_strsm); \
__macro(cblas_dtrsm); \
__macro(cblas_ctrsm); \
__macro(cblas_ztrsm); \
__macro(cblas_sgemm_alloc); \
__macro(cblas_dgemm_alloc); \
__macro(cblas_sgemm_pack); \
__macro(cblas_dgemm_pack); \
__macro(cblas_sgemm_compute); \
__macro(cblas_dgemm_compute); \
__macro(cblas_sgemm_free); \
__macro(cblas_dgemm_free); \
__macro(cblas_sgemm_batch); \
__macro(cblas_dgemm_batch); \
__macro(cblas_cgemm_batch); \
__macro(cblas_zgemm_batch); \
__macro(cblas_sdot); \
__macro(cblas_ddot); \
__macro(cblas_sasum); \
__macro(cblas_dasum); \
__macro(cblas_isamax); \
__macro(cblas_idamax); \
__macro(cblas_sscal); \
__macro(cblas_dscal); \
__macro(vsAdd); \
__macro(vdAdd); \
__macro(vsSub); \
__macro(vdSub); \
__macro(vsMul); \
__macro(vdMul); \
__macro(vsDiv); \
__macro(vdDiv); \
__macro(vsExp); \
__macro(vdExp); \
__macro(vsSqr); \
__macro(vdSqr); \
__macro(vsPowx); \
__macro(vdPowx); \
__macro(vsInv); \
__macro(vdInv); \
__macro(vmsErf); \
__macro(vmdErf); \
__macro(MKL_Free_Buffers); \
__macro(MKL_Set_Num_Threads); \
__macro(MKL_Get_Max_Threads);
MKLML_ROUTINE_EACH(PLATFORM_DECLARE_DYNAMIC_LOAD_MKLML_WRAP);
#if !defined(_WIN32)
DYNAMIC_LOAD_MKLML_WRAP(mkl_scsrmm);
DYNAMIC_LOAD_MKLML_WRAP(mkl_dcsrmm);
#endif
#undef DYNAMIC_LOAD_MKLML_WRAP
} // namespace dynload
} // namespace platform
} // namespace paddle
...@@ -40,6 +40,22 @@ PADDLE_DEFINE_EXPORTED_bool(enable_rpc_profiler, ...@@ -40,6 +40,22 @@ PADDLE_DEFINE_EXPORTED_bool(enable_rpc_profiler,
DEFINE_bool(enable_record_memory, false, "enable memory recorder"); DEFINE_bool(enable_record_memory, false, "enable memory recorder");
#if defined(_WIN32) && defined(PHI_SHARED)
phi::ProfilerState phi::ProfilerHelper::g_state = phi::ProfilerState::kDisabled;
bool phi::ProfilerHelper::g_enable_nvprof_hook = false;
thread_local uint64_t phi::ProfilerHelper::g_thread_id;
uint32_t phi::ProfilerHelper::g_next_thread_id = 0;
std::mutex phi::ProfilerHelper::g_all_event_lists_mutex;
std::list<std::shared_ptr<phi::EventList<phi::Event>>>
phi::ProfilerHelper::g_all_event_lists;
thread_local std::shared_ptr<phi::EventList<phi::Event>>
phi::ProfilerHelper::g_event_list;
std::list<std::shared_ptr<phi::EventList<phi::MemEvent>>>
phi::ProfilerHelper::g_all_mem_event_lists;
thread_local std::shared_ptr<phi::EventList<phi::MemEvent>>
phi::ProfilerHelper::g_mem_event_list;
std::mutex phi::ProfilerHelper::g_all_mem_event_lists_mutex;
#endif
namespace paddle { namespace paddle {
namespace platform { namespace platform {
......
cc_library( cc_library(
host_tracer host_tracer
SRCS host_tracer.cc SRCS host_tracer.cc
DEPS framework_proto enforce ddim var_type_traits) DEPS framework_proto enforce phi var_type_traits)
cc_library( cc_library(
cuda_tracer cuda_tracer
SRCS cuda_tracer.cc cupti_data_process.cc SRCS cuda_tracer.cc cupti_data_process.cc
...@@ -28,7 +28,7 @@ cc_library( ...@@ -28,7 +28,7 @@ cc_library(
cc_library( cc_library(
cpu_utilization cpu_utilization
SRCS cpu_utilization.cc SRCS cpu_utilization.cc
DEPS phi_backends phi_os_info enforce glog) DEPS phi enforce glog)
cc_library( cc_library(
new_profiler new_profiler
SRCS profiler.cc SRCS profiler.cc
......
...@@ -28,7 +28,6 @@ set(PYBIND_DEPS ...@@ -28,7 +28,6 @@ set(PYBIND_DEPS
gloo_wrapper gloo_wrapper
infer_io_utils infer_io_utils
heter_wrapper heter_wrapper
generator
op_version_registry op_version_registry
ps_gpu_wrapper ps_gpu_wrapper
custom_operator custom_operator
...@@ -37,16 +36,13 @@ set(PYBIND_DEPS ...@@ -37,16 +36,13 @@ set(PYBIND_DEPS
fleet_executor fleet_executor
global_utils global_utils
phi_utils phi_utils
tcp_store phi
comm_context_manager
new_profiler new_profiler
auto_parallel
jit_layer jit_layer
jit_property jit_property
prim_utils prim_utils
operants_manager static_tensor_operants
phi_tensor_operants type_info)
static_tensor_operants)
if(WITH_PSCORE) if(WITH_PSCORE)
set(PYBIND_DEPS ${PYBIND_DEPS} ps_service) set(PYBIND_DEPS ${PYBIND_DEPS} ps_service)
...@@ -65,7 +61,7 @@ if(WITH_RPC) ...@@ -65,7 +61,7 @@ if(WITH_RPC)
zlib zlib
leveldb leveldb
snappy snappy
gflags phi
glog) glog)
endif() endif()
if(WITH_GPU OR WITH_ROCM) if(WITH_GPU OR WITH_ROCM)
...@@ -148,7 +144,6 @@ set(PYBIND_SRCS ...@@ -148,7 +144,6 @@ set(PYBIND_SRCS
auto_parallel_py.cc) auto_parallel_py.cc)
if(WITH_CUSTOM_DEVICE) if(WITH_CUSTOM_DEVICE)
set(PYBIND_DEPS ${PYBIND_DEPS} phi_capi)
set(PYBIND_DEPS ${PYBIND_DEPS} custom_device_common_op_registry) set(PYBIND_DEPS ${PYBIND_DEPS} custom_device_common_op_registry)
endif() endif()
...@@ -334,6 +329,14 @@ if(WITH_PYTHON) ...@@ -334,6 +329,14 @@ if(WITH_PYTHON)
")\n" ")\n"
"exit /b 0") "exit /b 0")
if(WITH_PHI_SHARED)
add_custom_command(
OUTPUT ${op_impl_path}/phi.dll
COMMAND ${CMAKE_COMMAND} -E copy ${PHI_LIB} ${op_impl_path}
DEPENDS phi)
list(APPEND EAGER_OP_IMPL_DEPS ${op_impl_path}/libiomp5md.dll)
endif()
if(${CBLAS_PROVIDER} STREQUAL MKLML) if(${CBLAS_PROVIDER} STREQUAL MKLML)
add_custom_command( add_custom_command(
OUTPUT ${op_impl_path}/libiomp5md.dll OUTPUT ${op_impl_path}/libiomp5md.dll
...@@ -481,10 +484,8 @@ if(WITH_PYTHON) ...@@ -481,10 +484,8 @@ if(WITH_PYTHON)
list(APPEND PYBIND_DEPS python) list(APPEND PYBIND_DEPS python)
list(APPEND PYBIND_DEPS custom_operator) list(APPEND PYBIND_DEPS custom_operator)
list(APPEND PYBIND_DEPS custom_operator_node) list(APPEND PYBIND_DEPS custom_operator_node)
list(APPEND PYBIND_DEPS tensor_api)
list(APPEND PYBIND_DEPS eager_tensor_operants) list(APPEND PYBIND_DEPS eager_tensor_operants)
list(APPEND PYBIND_DEPS pybind_util) list(APPEND PYBIND_DEPS pybind_util)
list(APPEND PYBIND_DEPS flags)
endif() endif()
# On Linux, cc_library(paddle SHARED ..) will generate the libpaddle.so, # On Linux, cc_library(paddle SHARED ..) will generate the libpaddle.so,
......
...@@ -38,7 +38,9 @@ limitations under the License. */ ...@@ -38,7 +38,9 @@ limitations under the License. */
#include "paddle/phi/common/data_type.h" #include "paddle/phi/common/data_type.h"
#include "paddle/phi/core/compat/convert_utils.h" #include "paddle/phi/core/compat/convert_utils.h"
#include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/dense_tensor.h"
DECLARE_bool(check_nan_inf); #include "paddle/phi/core/flags.h"
PHI_DECLARE_bool(check_nan_inf);
namespace paddle { namespace paddle {
namespace pybind { namespace pybind {
......
...@@ -3,6 +3,15 @@ configure_file(config.h.in ${CMAKE_CURRENT_SOURCE_DIR}/config.h) ...@@ -3,6 +3,15 @@ configure_file(config.h.in ${CMAKE_CURRENT_SOURCE_DIR}/config.h)
# phi auto cmake utils # phi auto cmake utils
include(phi) include(phi)
set(common_srcs CACHE INTERNAL "" FORCE)
set(api_srcs CACHE INTERNAL "" FORCE)
set(capi_srcs CACHE INTERNAL "" FORCE)
set(core_srcs CACHE INTERNAL "" FORCE)
set(backends_srcs CACHE INTERNAL "" FORCE)
set(kernels_srcs CACHE INTERNAL "" FORCE)
set(infermeta_srcs CACHE INTERNAL "" FORCE)
#set(excluded_srcs CACHE INTERNAL "" FORCE)
# paddle experimental common components # paddle experimental common components
add_subdirectory(common) add_subdirectory(common)
...@@ -24,29 +33,153 @@ if(WITH_CUSTOM_DEVICE) ...@@ -24,29 +33,153 @@ if(WITH_CUSTOM_DEVICE)
add_subdirectory(capi) add_subdirectory(capi)
endif() endif()
# make an unity target for compile deps
set(PHI_DEPS set(PHI_DEPS
convert_utils phi_profiler_proto
dense_tensor auto_parallel_proto
phi_backends gflags
kernel_factory glog
kernel_context warpctc
arg_map_context warprnnt
infermeta eigen3
lod_utils xxhash
sparse_csr_tensor cblas
sparse_coo_tensor utf8proc)
string_tensor
api_scalar if(WITH_GPU)
api_int_array list(APPEND PHI_DEPS external_error_proto)
extended_tensor endif()
dist_attr
dist_mapper) if(WITH_ASCEND_CL)
list(APPEND PHI_DEPS npu_hccl)
get_property(phi_kernels GLOBAL PROPERTY PHI_KERNELS) endif()
set(PHI_DEPS ${PHI_DEPS} ${phi_kernels})
if(WITH_FLASHATTN)
cc_library(phi DEPS ${PHI_DEPS}) list(APPEND PHI_DEPS flashattn)
endif()
if(WITH_XBYAK)
list(APPEND PHI_DEPS xbyak)
endif()
if(WITH_MKLDNN)
list(APPEND PHI_DEPS mkldnn)
endif()
if(WITH_GLOO)
list(APPEND PHI_DEPS gloo)
endif()
if(WITH_CUDNN_FRONTEND)
list(APPEND PHI_DEPS cudnn-frontend)
endif()
if(WITH_POCKETFFT)
list(APPEND PHI_DEPS pocketfft)
endif()
if(WITH_MKLML)
list(APPEND PHI_DEPS pocketfft dynload_mklml)
endif()
if(WITH_XPU)
list(APPEND PHI_DEPS xpulib)
endif()
set(PHI_SRCS
${common_srcs}
${api_srcs}
${core_srcs}
${backends_srcs}
${kernels_srcs}
${infermeta_srcs}
${capi_srcs})
if(WITH_PHI_SHARED)
set(PHI_BUILD_TYPE
SHARED
CACHE INTERNAL "" FORCE)
else()
set(PHI_BUILD_TYPE
STATIC
CACHE INTERNAL "" FORCE)
endif()
if(WITH_GPU)
add_definitions(-DCUDA_REAL_ARCHS=${NVCC_FLAGS_EXTRA_real_archs}
)# for backends/gpu/gpu_resources.cc
nv_library(
phi ${PHI_BUILD_TYPE}
SRCS ${PHI_SRCS}
DEPS ${PHI_DEPS})
elseif(WITH_ROCM)
hip_add_library(phi ${PHI_BUILD_TYPE} ${PHI_SRCS})
target_link_libraries(phi ${PHI_DEPS})
elseif(WITH_XPU_KP)
xpu_library(
phi ${PHI_BUILD_TYPE}
SRCS ${PHI_SRCS}
DEPS ${PHI_DEPS})
else()
cc_library(
phi ${PHI_BUILD_TYPE}
SRCS ${PHI_SRCS}
DEPS ${PHI_DEPS})
endif()
if(WIN32)
target_link_libraries(phi shlwapi.lib)
endif()
if(WIN32)
if(WITH_PHI_SHARED)
set_property(TARGET phi PROPERTY WINDOWS_EXPORT_ALL_SYMBOLS ON)
set(PHI_NAME
phi.dll
CACHE INTERNAL "" FORCE)
else()
set(PHI_NAME
phi.lib
CACHE INTERNAL "" FORCE)
endif()
elseif(APPLE)
if(WITH_PHI_SHARED)
set(PHI_NAME
libphi.dylib
CACHE INTERNAL "" FORCE)
else()
set(PHI_NAME
libphi.a
CACHE INTERNAL "" FORCE)
endif()
else()
if(WITH_PHI_SHARED)
set(PHI_NAME
libphi.so
CACHE INTERNAL "" FORCE)
else()
set(PHI_NAME
libphi.a
CACHE INTERNAL "" FORCE)
endif()
endif()
set(PHI_LIB
"${CMAKE_CURRENT_BINARY_DIR}/${PHI_NAME}"
CACHE FILEPATH "PHI Library" FORCE)
if(MKL_FOUND AND WITH_ONEMKL)
target_include_directories(phi PRIVATE ${MKL_INCLUDE})
endif()
add_dependencies(phi extern_lapack)
if(WITH_CUTLASS)
add_dependencies(phi cutlass_codegen)
add_definitions("-DPADDLE_WITH_MEMORY_EFFICIENT_ATTENTION"
)# for memory_efficient_attention.h
endif()
if(WITH_FLASHATTN)
add_dependencies(phi flashattn)
endif()
set(phi_extension_header_file set(phi_extension_header_file
${CMAKE_CURRENT_SOURCE_DIR}/extension.h ${CMAKE_CURRENT_SOURCE_DIR}/extension.h
......
add_subdirectory(profiler) add_subdirectory(profiler)
add_subdirectory(lib) add_subdirectory(lib)
cc_library(
phi_api
SRCS all.cc
DEPS phi_function_api phi_bw_function_api sparse_api sparse_bw_api
strings_api)
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/phi/api/all.h"
namespace paddle {
namespace experimental {} // namespace experimental
} // namespace paddle
...@@ -112,9 +112,7 @@ class PADDLE_API CustomOpKernelContext { ...@@ -112,9 +112,7 @@ class PADDLE_API CustomOpKernelContext {
void EmplaceBackOutput(Tensor&& output); void EmplaceBackOutput(Tensor&& output);
void EmplaceBackOutputs(const std::vector<Tensor>& outputs); void EmplaceBackOutputs(const std::vector<Tensor>& outputs);
void EmplaceBackAttr(paddle::any attr); void EmplaceBackAttr(paddle::any attr);
void EmplaceBackAttrs(const std::vector<paddle::any>& attrs) { void EmplaceBackAttrs(const std::vector<paddle::any>& attrs);
attrs_ = std::move(attrs);
}
const std::pair<size_t, size_t>& InputRangeAt(size_t idx) const; const std::pair<size_t, size_t>& InputRangeAt(size_t idx) const;
const std::pair<size_t, size_t>& OutputRangeAt(size_t idx) const; const std::pair<size_t, size_t>& OutputRangeAt(size_t idx) const;
...@@ -125,13 +123,9 @@ class PADDLE_API CustomOpKernelContext { ...@@ -125,13 +123,9 @@ class PADDLE_API CustomOpKernelContext {
paddle::optional<Tensor> OptionalInputAt(size_t idx); paddle::optional<Tensor> OptionalInputAt(size_t idx);
paddle::optional<std::vector<Tensor>> OptionalInputsBetween(size_t start, paddle::optional<std::vector<Tensor>> OptionalInputsBetween(size_t start,
size_t end); size_t end);
const std::vector<paddle::any>& Attrs() const { return attrs_; } const std::vector<paddle::any>& Attrs() const;
const std::vector<std::pair<size_t, size_t>>& InputRange() { const std::vector<std::pair<size_t, size_t>>& InputRange();
return input_range_; const std::vector<std::pair<size_t, size_t>>& OutputRange();
}
const std::vector<std::pair<size_t, size_t>>& OutputRange() {
return output_range_;
}
Tensor* MutableOutputAt(size_t idx); Tensor* MutableOutputAt(size_t idx);
std::vector<Tensor*> MutableOutputBetween(size_t start, size_t end); std::vector<Tensor*> MutableOutputBetween(size_t start, size_t end);
std::vector<Tensor> OutputsBetween(size_t start, size_t end); std::vector<Tensor> OutputsBetween(size_t start, size_t end);
...@@ -811,38 +805,20 @@ class PADDLE_API OpMetaInfo { ...@@ -811,38 +805,20 @@ class PADDLE_API OpMetaInfo {
//////////////// Op Meta Info Helper ///////////////// //////////////// Op Meta Info Helper /////////////////
class OpMetaInfoHelper { class OpMetaInfoHelper {
public: public:
static const std::string& GetOpName(const paddle::OpMetaInfo& info) { static const std::string& GetOpName(const paddle::OpMetaInfo& info);
return info.name_;
}
static const std::vector<std::string>& GetInputs( static const std::vector<std::string>& GetInputs(
const paddle::OpMetaInfo& info) { const paddle::OpMetaInfo& info);
return info.inputs_;
}
static const std::vector<std::string>& GetOutputs( static const std::vector<std::string>& GetOutputs(
const paddle::OpMetaInfo& info) { const paddle::OpMetaInfo& info);
return info.outputs_;
}
static const std::vector<std::string>& GetAttrs( static const std::vector<std::string>& GetAttrs(
const paddle::OpMetaInfo& info) { const paddle::OpMetaInfo& info);
return info.attrs_;
}
static const std::unordered_map<std::string, std::string>& GetInplaceMap( static const std::unordered_map<std::string, std::string>& GetInplaceMap(
const paddle::OpMetaInfo& info) { const paddle::OpMetaInfo& info);
return info.inplace_map_;
}
static const std::unordered_map<std::string, std::string>& static const std::unordered_map<std::string, std::string>&
GetInplaceReverseMap(const paddle::OpMetaInfo& info) { GetInplaceReverseMap(const paddle::OpMetaInfo& info);
return info.inplace_reverse_map_; static const KernelFunc& GetKernelFn(const paddle::OpMetaInfo& info);
} static const InferShapeFunc& GetInferShapeFn(const paddle::OpMetaInfo& info);
static const KernelFunc& GetKernelFn(const paddle::OpMetaInfo& info) { static const InferDtypeFunc& GetInferDtypeFn(const paddle::OpMetaInfo& info);
return info.kernel_fn_;
}
static const InferShapeFunc& GetInferShapeFn(const paddle::OpMetaInfo& info) {
return info.infer_shape_fn_;
}
static const InferDtypeFunc& GetInferDtypeFn(const paddle::OpMetaInfo& info) {
return info.infer_dtype_fn_;
}
}; };
//////////////// Op Meta Info Map ///////////////// //////////////// Op Meta Info Map /////////////////
......
...@@ -410,7 +410,7 @@ class PADDLE_API Tensor final { ...@@ -410,7 +410,7 @@ class PADDLE_API Tensor final {
* *
* @return const std::string& * @return const std::string&
*/ */
const std::string& name() const { return name_; } const std::string& name() const;
/** /**
* @brief Set name of Tensor. * @brief Set name of Tensor.
...@@ -419,7 +419,7 @@ class PADDLE_API Tensor final { ...@@ -419,7 +419,7 @@ class PADDLE_API Tensor final {
* *
* @param const std::string& name * @param const std::string& name
*/ */
void set_name(const std::string& name) { name_ = name; } void set_name(const std::string& name);
/* Part 5: Data Transform methods */ /* Part 5: Data Transform methods */
/* Alert!!!!: All copy method can only deep copy impl, autograd info only be /* Alert!!!!: All copy method can only deep copy impl, autograd info only be
......
if(WITH_GPU)
nv_library(
phi_tensor_raw
SRCS tensor.cc
DEPS tensor_base
dense_tensor
phi_enforce
context_pool
tensor_api
int_array
scalar)
elseif(WITH_ROCM)
hip_library(
phi_tensor_raw
SRCS tensor.cc
DEPS tensor_base
dense_tensor
phi_enforce
context_pool
tensor_api
int_array
scalar)
else()
cc_library(
phi_tensor_raw
SRCS tensor.cc
DEPS tensor_base
dense_tensor
phi_enforce
context_pool
tensor_api
int_array
scalar)
endif()
set(api_gen_base ${CMAKE_SOURCE_DIR}/paddle/phi/api/yaml/generator/api_base.py) set(api_gen_base ${CMAKE_SOURCE_DIR}/paddle/phi/api/yaml/generator/api_base.py)
# forward api file # forward api file
...@@ -157,157 +122,77 @@ if(NOT PYTHONINTERP_FOUND) ...@@ -157,157 +122,77 @@ if(NOT PYTHONINTERP_FOUND)
find_package(PythonInterp REQUIRED) find_package(PythonInterp REQUIRED)
endif() endif()
execute_process(COMMAND ${PYTHON_EXECUTABLE} -m pip install pyyaml)
# generate forward api # generate forward api
add_custom_command( execute_process(
OUTPUT ${api_header_file} ${api_source_file}
COMMAND ${PYTHON_EXECUTABLE} -m pip install pyyaml
COMMAND COMMAND
${PYTHON_EXECUTABLE} ${api_gen_file} --api_yaml_path ${api_yaml_file} ${PYTHON_EXECUTABLE} ${api_gen_file} --api_yaml_path ${api_yaml_file}
${legacy_api_yaml_file} --api_header_path ${api_header_file_tmp} ${legacy_api_yaml_file} --api_header_path ${api_header_file_tmp}
--api_source_path ${api_source_file_tmp} --api_source_path ${api_source_file_tmp})
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${api_header_file_tmp}
${api_header_file}
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${api_source_file_tmp}
${api_source_file}
COMMENT "copy_if_different ${api_header_file} ${api_source_file}"
DEPENDS ${api_yaml_file} ${legacy_api_yaml_file} ${api_gen_file}
${api_gen_base}
VERBATIM)
# generate backward api # generate backward api
add_custom_command( execute_process(
OUTPUT ${bw_api_header_file} ${bw_api_source_file} ${bw_api_header_file_tmp}
${bw_api_source_file_tmp}
COMMAND COMMAND
${PYTHON_EXECUTABLE} ${bw_api_gen_file} --backward_yaml_path ${PYTHON_EXECUTABLE} ${bw_api_gen_file} --backward_yaml_path
${bw_api_yaml_file} ${legacy_bw_api_yaml_file} --backward_header_path ${bw_api_yaml_file} ${legacy_bw_api_yaml_file} --backward_header_path
${bw_api_header_file_tmp} --backward_source_path ${bw_api_source_file_tmp} ${bw_api_header_file_tmp} --backward_source_path ${bw_api_source_file_tmp})
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${bw_api_header_file_tmp}
${bw_api_header_file}
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${bw_api_source_file_tmp}
${bw_api_source_file}
COMMENT "copy_if_different ${bw_api_header_file} ${bw_api_source_file}"
DEPENDS ${bw_api_yaml_file} ${bw_api_gen_file} ${api_gen_base}
${legacy_bw_api_yaml_file}
VERBATIM)
# generate fused_op api # generate fused_op api
add_custom_command( execute_process(
OUTPUT ${fused_api_header_file} ${fused_api_source_file}
COMMAND ${PYTHON_EXECUTABLE} -m pip install pyyaml
COMMAND COMMAND
${PYTHON_EXECUTABLE} ${api_gen_file} --api_yaml_path ${fused_api_yaml_file} ${PYTHON_EXECUTABLE} ${api_gen_file} --api_yaml_path ${fused_api_yaml_file}
--is_fused_ops_yaml --api_header_path ${fused_api_header_file_tmp} --is_fused_ops_yaml --api_header_path ${fused_api_header_file_tmp}
--api_source_path ${fused_api_source_file_tmp} --api_source_path ${fused_api_source_file_tmp})
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${fused_api_header_file_tmp}
${fused_api_header_file}
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${fused_api_source_file_tmp}
${fused_api_source_file}
COMMENT "copy_if_different ${fused_api_header_file} ${fused_api_source_file}"
DEPENDS ${fused_api_yaml_file} ${api_gen_file} ${api_gen_base}
VERBATIM)
# generate fused_op backward api # generate fused_op backward api
add_custom_command( execute_process(
OUTPUT ${fused_bw_api_header_file} ${fused_bw_api_source_file}
${fused_bw_api_header_file_tmp} ${fused_bw_api_source_file_tmp}
COMMAND COMMAND
${PYTHON_EXECUTABLE} ${fused_bw_api_gen_file} --backward_yaml_path ${PYTHON_EXECUTABLE} ${fused_bw_api_gen_file} --backward_yaml_path
${fused_bw_api_yaml_file} --is_fused_backward_yaml --backward_header_path ${fused_bw_api_yaml_file} --is_fused_backward_yaml --backward_header_path
${fused_bw_api_header_file_tmp} --backward_source_path ${fused_bw_api_header_file_tmp} --backward_source_path
${fused_bw_api_source_file_tmp} ${fused_bw_api_source_file_tmp})
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${fused_bw_api_header_file_tmp}
${fused_bw_api_header_file}
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${fused_bw_api_source_file_tmp}
${fused_bw_api_source_file}
COMMENT
"copy_if_different ${fused_bw_api_header_file} ${fused_bw_api_source_file}"
DEPENDS ${fused_bw_api_yaml_file} ${bw_api_gen_file} ${api_gen_base}
VERBATIM)
# generate sparse api # generate sparse api
add_custom_command( execute_process(
OUTPUT ${sparse_api_header_file} ${sparse_api_source_file}
COMMAND COMMAND
${PYTHON_EXECUTABLE} ${sparse_api_gen_file} --api_yaml_path ${PYTHON_EXECUTABLE} ${sparse_api_gen_file} --api_yaml_path
${sparse_api_yaml_file} --api_header_path ${sparse_api_header_file_tmp} ${sparse_api_yaml_file} --api_header_path ${sparse_api_header_file_tmp}
--api_source_path ${sparse_api_source_file_tmp} --api_source_path ${sparse_api_source_file_tmp})
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${sparse_api_header_file_tmp}
${sparse_api_header_file}
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${sparse_api_source_file_tmp}
${sparse_api_source_file}
COMMENT
"copy_if_different ${sparse_api_header_file} ${sparse_sparse_api_source_file}"
DEPENDS ${sparse_api_yaml_file} ${sparse_api_gen_file} ${api_gen_base}
${api_gen_file}
VERBATIM)
# generate backward sparse api # generate backward sparse api
add_custom_command( execute_process(
OUTPUT ${sparse_bw_api_header_file} ${sparse_bw_api_source_file}
COMMAND COMMAND
${PYTHON_EXECUTABLE} ${sparse_bw_api_gen_file} --api_yaml_path ${PYTHON_EXECUTABLE} ${sparse_bw_api_gen_file} --api_yaml_path
${sparse_bw_api_yaml_file} --api_header_path ${sparse_bw_api_yaml_file} --api_header_path
${sparse_bw_api_header_file_tmp} --api_source_path ${sparse_bw_api_header_file_tmp} --api_source_path
${sparse_bw_api_source_file_tmp} ${sparse_bw_api_source_file_tmp})
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${sparse_bw_api_header_file_tmp}
${sparse_bw_api_header_file}
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${sparse_bw_api_source_file_tmp}
${sparse_bw_api_source_file}
COMMENT
"copy_if_different ${sparse_bw_api_header_file} ${sparse_bw_sparse_api_source_file}"
DEPENDS ${sparse_bw_api_yaml_file} ${sparse_bw_api_gen_file} ${api_gen_base}
${api_gen_file} ${sparse_api_gen_file} ${bw_api_gen_file}
VERBATIM)
# generate strings api # generate strings api
add_custom_command( execute_process(
OUTPUT ${strings_api_header_file} ${strings_api_source_file}
COMMAND COMMAND
${PYTHON_EXECUTABLE} ${strings_api_gen_file} --api_yaml_path ${PYTHON_EXECUTABLE} ${strings_api_gen_file} --api_yaml_path
${strings_api_yaml_file} --api_header_path ${strings_api_header_file_tmp} ${strings_api_yaml_file} --api_header_path ${strings_api_header_file_tmp}
--api_source_path ${strings_api_source_file_tmp} --api_source_path ${strings_api_source_file_tmp})
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${strings_api_header_file_tmp}
${strings_api_header_file}
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${strings_api_source_file_tmp}
${strings_api_source_file}
COMMENT
"copy_if_different ${strings_api_header_file} ${strings_strings_api_source_file}"
DEPENDS ${strings_api_yaml_file} ${strings_api_gen_file} ${api_gen_base}
${api_gen_file}
VERBATIM)
# generate dygraph(intermediate) api # generate dygraph(intermediate) api
add_custom_command( execute_process(
OUTPUT ${dygraph_api_header_file} ${dygraph_api_source_file}
COMMAND COMMAND
${PYTHON_EXECUTABLE} ${im_api_gen_file} --api_yaml_path ${api_yaml_file} ${PYTHON_EXECUTABLE} ${im_api_gen_file} --api_yaml_path ${api_yaml_file}
${legacy_api_yaml_file} --sparse_api_yaml_path ${sparse_api_yaml_file} ${legacy_api_yaml_file} --sparse_api_yaml_path ${sparse_api_yaml_file}
--dygraph_api_header_path ${dygraph_api_header_file_tmp} --dygraph_api_header_path ${dygraph_api_header_file_tmp}
--dygraph_api_source_path ${dygraph_api_source_file_tmp} --dygraph_api_source_path ${dygraph_api_source_file_tmp})
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${dygraph_api_header_file_tmp}
${dygraph_api_header_file}
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${dygraph_api_source_file_tmp}
${dygraph_api_source_file}
DEPENDS ${api_yaml_file} ${legacy_api_yaml_file} ${sparse_api_yaml_file}
${im_api_gen_file} ${api_gen_base} ${api_gen_file}
VERBATIM)
# generate wrapped infermeta # generate wrapped infermeta
add_custom_command( execute_process(
OUTPUT ${wrapped_infermeta_header_file} ${wrapped_infermeta_source_file}
COMMAND COMMAND
${PYTHON_EXECUTABLE} ${wrapped_infermeta_gen_file} --api_yaml_path ${PYTHON_EXECUTABLE} ${wrapped_infermeta_gen_file} --api_yaml_path
${api_yaml_file} ${legacy_api_yaml_file} --wrapped_infermeta_header_path ${api_yaml_file} ${legacy_api_yaml_file} --wrapped_infermeta_header_path
${wrapped_infermeta_header_file} --wrapped_infermeta_source_path ${wrapped_infermeta_header_file} --wrapped_infermeta_source_path
${wrapped_infermeta_source_file} ${wrapped_infermeta_source_file})
DEPENDS ${api_yaml_file} ${legacy_api_yaml_file} ${wrapped_infermeta_gen_file}
${api_gen_base}
VERBATIM)
# generate tensor and tensor operants file # generate tensor and tensor operants file
message("create or copy auto-geneated tensor files") message("create or copy auto-geneated tensor files")
execute_process(COMMAND ${PYTHON_EXECUTABLE} -m pip install pyyaml)
execute_process( execute_process(
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/paddle/phi/api/yaml/generator WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/paddle/phi/api/yaml/generator
COMMAND COMMAND
...@@ -324,154 +209,70 @@ if(${_result}) ...@@ -324,154 +209,70 @@ if(${_result})
message(FATAL_ERROR "tensor codegen failed, exiting.") message(FATAL_ERROR "tensor codegen failed, exiting.")
endif() endif()
set(generated_tensor_files set(generated_files
"${operants_base_file}" "${tensor_api_source_file}" "${operants_base_file}"
"${phi_tensor_operants_header_file}" "${phi_tensor_operants_source_file}" "${tensor_api_source_file}"
"${operants_manager_header_file}" "${operants_manager_source_file}") "${phi_tensor_operants_header_file}"
"${phi_tensor_operants_source_file}"
"${operants_manager_header_file}"
"${operants_manager_source_file}"
"${wrapped_infermeta_source_file}"
"${api_source_file}"
"${api_header_file}"
"${bw_api_source_file}"
"${bw_api_header_file}"
"${fused_api_source_file}"
"${fused_api_header_file}"
"${fused_bw_api_source_file}"
"${fused_bw_api_header_file}"
"${sparse_api_source_file}"
"${sparse_api_header_file}"
"${sparse_bw_api_source_file}"
"${sparse_bw_api_header_file}"
"${dygraph_api_source_file}"
"${dygraph_api_header_file}"
"${strings_api_source_file}"
"${strings_api_header_file}")
foreach(generated_tensor_file ${generated_tensor_files}) foreach(generated_file ${generated_files})
if(EXISTS "${generated_tensor_file}.tmp" AND EXISTS if(EXISTS "${generated_file}.tmp" AND EXISTS "${generated_file}")
"${generated_tensor_file}") execute_process(COMMAND ${CMAKE_COMMAND} -E copy_if_different
execute_process( "${generated_file}.tmp" "${generated_file}")
COMMAND ${CMAKE_COMMAND} -E copy_if_different message("copy if different ${generated_file}.tmp ${generated_file}")
"${generated_tensor_file}.tmp" "${generated_tensor_file}") elseif(EXISTS "${generated_file}.tmp")
message( execute_process(COMMAND ${CMAKE_COMMAND} -E copy "${generated_file}.tmp"
"copy if different ${generated_tensor_file}.tmp ${generated_tensor_file}") "${generated_file}")
elseif(EXISTS "${generated_tensor_file}.tmp") message("copy ${generated_file}.tmp ${generated_file}")
execute_process(
COMMAND ${CMAKE_COMMAND} -E copy "${generated_tensor_file}.tmp"
"${generated_tensor_file}")
message("copy ${generated_tensor_file}.tmp ${generated_tensor_file}")
endif() endif()
endforeach() endforeach()
cc_library( collect_srcs(
op_meta_info api_srcs
SRCS op_meta_info.cc SRCS
DEPS phi_tensor_raw) tensor.cc
cc_library( op_meta_info.cc
wrapped_infermeta context_pool.cc
SRCS ${wrapped_infermeta_source_file} tensor_utils.cc
DEPS phi) kernel_dispatch.cc
cc_library( api_gen_utils.cc
context_pool data_transform.cc
SRCS context_pool.cc api_custom_impl.cc
DEPS phi_backends phi_enforce place init phi_device_context) tensor_method.cc
cc_library( tensor_copy.cc
api_tensor_utils scalar.cc
SRCS tensor_utils.cc int_array.cc)
DEPS phi_tensor_raw) collect_generated_srcs(
api_srcs
cc_library( SRCS
kernel_dispatch ${wrapped_infermeta_source_file}
SRCS kernel_dispatch.cc ${api_source_file}
DEPS phi_tensor_raw phi_backends kernel_factory context_pool) ${bw_api_source_file}
cc_library( ${fused_api_source_file}
api_gen_utils ${fused_bw_api_source_file}
SRCS api_gen_utils.cc ${sparse_api_source_file}
DEPS phi_tensor_raw selected_rows sparse_csr_tensor sparse_coo_tensor ${sparse_bw_api_source_file}
infermeta_utils) ${dygraph_api_source_file}
cc_library( ${strings_api_source_file}
phi_data_transform ${phi_tensor_operants_source_file}
SRCS data_transform.cc ${operants_manager_source_file}
DEPS phi_tensor_raw phi tensor) ${tensor_api_source_file})
cc_library(
api_custom_impl
SRCS api_custom_impl.cc
DEPS phi_tensor_raw
phi
kernel_dispatch
api_gen_utils
backward_infermeta
phi_data_transform
phi_profiler)
cc_library(
phi_function_api
SRCS ${api_source_file} ${fused_api_source_file}
DEPS phi_tensor_raw
phi
kernel_dispatch
api_gen_utils
phi_data_transform
api_custom_impl
api_tensor_utils
phi_profiler)
cc_library(
phi_bw_function_api
SRCS ${bw_api_source_file} ${fused_bw_api_source_file}
DEPS phi_tensor_raw
phi
kernel_dispatch
api_gen_utils
backward_infermeta
sparse_backward_infermeta
phi_data_transform
phi_function_api
api_custom_impl
global_utils
phi_profiler)
cc_library(
sparse_api
SRCS ${sparse_api_source_file}
DEPS phi_tensor_raw phi kernel_dispatch api_gen_utils phi_profiler)
cc_library(
sparse_bw_api
SRCS ${sparse_bw_api_source_file}
DEPS phi_tensor_raw
phi
kernel_dispatch
api_gen_utils
sparse_api
sparse_backward_infermeta
phi_profiler)
cc_library(
phi_dygraph_api
SRCS ${dygraph_api_source_file}
DEPS phi_tensor_raw
phi
kernel_dispatch
api_gen_utils
phi_data_transform
phi_function_api
sparse_api
phi_profiler)
cc_library(
strings_api
SRCS ${strings_api_source_file}
DEPS phi_tensor_raw phi kernel_dispatch api_gen_utils phi_profiler)
cc_library(
phi_tensor
SRCS tensor_method.cc
DEPS phi_tensor_raw
phi_function_api
api_gen_utils
kernel_dispatch
infermeta
sparse_infermeta
sparse_api
strings_api)
cc_library(
tensor_copy
SRCS tensor_copy.cc
DEPS phi_tensor_raw phi kernel_dispatch api_gen_utils)
cc_library(
api_scalar
SRCS scalar.cc
DEPS tensor_copy)
cc_library(
api_int_array
SRCS int_array.cc
DEPS tensor_copy)
cc_library(
phi_tensor_operants
SRCS ${phi_tensor_operants_source_file}
DEPS phi_function_api)
cc_library(
operants_manager
SRCS ${operants_manager_source_file}
DEPS phi_enforce)
cc_library(
tensor_api
SRCS ${tensor_api_source_file}
DEPS operants_manager)
...@@ -65,7 +65,8 @@ PADDLE_API phi::Allocator* GetAllocator(const phi::Place& place) { ...@@ -65,7 +65,8 @@ PADDLE_API phi::Allocator* GetAllocator(const phi::Place& place) {
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
PADDLE_API phi::CUDAStream* GetCurrentCUDAStream(const phi::Place& place) { PADDLE_API phi::CUDAStream* GetCurrentCUDAStream(const phi::Place& place) {
PADDLE_ENFORCE(place.GetType() == phi::AllocationType::GPU, PADDLE_ENFORCE_EQ(place.GetType(),
phi::AllocationType::GPU,
phi::errors::InvalidArgument( phi::errors::InvalidArgument(
"GetCurrentCUDAStream only supports GPUPlace input. " "GetCurrentCUDAStream only supports GPUPlace input. "
"However, your input is place=%s", "However, your input is place=%s",
......
...@@ -119,6 +119,11 @@ void CustomOpKernelContext::EmplaceBackAttr(paddle::any attr) { ...@@ -119,6 +119,11 @@ void CustomOpKernelContext::EmplaceBackAttr(paddle::any attr) {
<< " has value of type: " << attrs_[attrs_.size() - 1].type().name(); << " has value of type: " << attrs_[attrs_.size() - 1].type().name();
} }
void CustomOpKernelContext::EmplaceBackAttrs(
const std::vector<paddle::any>& attrs) {
attrs_ = std::move(attrs);
}
const Tensor& CustomOpKernelContext::InputAt(size_t idx) const { const Tensor& CustomOpKernelContext::InputAt(size_t idx) const {
return inputs_.at(idx); return inputs_.at(idx);
} }
...@@ -132,6 +137,10 @@ std::vector<Tensor> CustomOpKernelContext::InputsBetween(size_t start, ...@@ -132,6 +137,10 @@ std::vector<Tensor> CustomOpKernelContext::InputsBetween(size_t start,
return rlt; return rlt;
} }
const std::vector<paddle::any>& CustomOpKernelContext::Attrs() const {
return attrs_;
}
Tensor& CustomOpKernelContext::MutableInputAt(size_t idx) { Tensor& CustomOpKernelContext::MutableInputAt(size_t idx) {
return inputs_.at(idx); return inputs_.at(idx);
} }
...@@ -193,6 +202,16 @@ const std::pair<size_t, size_t>& CustomOpKernelContext::OutputRangeAt( ...@@ -193,6 +202,16 @@ const std::pair<size_t, size_t>& CustomOpKernelContext::OutputRangeAt(
return output_range_.at(idx); return output_range_.at(idx);
} }
const std::vector<std::pair<size_t, size_t>>&
CustomOpKernelContext::InputRange() {
return input_range_;
}
const std::vector<std::pair<size_t, size_t>>&
CustomOpKernelContext::OutputRange() {
return output_range_;
}
void CustomOpKernelContext::ConstructInplaceIndex( void CustomOpKernelContext::ConstructInplaceIndex(
const std::vector<std::string>& inputs, const std::vector<std::string>& inputs,
const std::vector<std::string>& outputs, const std::vector<std::string>& outputs,
...@@ -208,8 +227,9 @@ void CustomOpKernelContext::ConstructInplaceIndex( ...@@ -208,8 +227,9 @@ void CustomOpKernelContext::ConstructInplaceIndex(
continue; continue;
} }
auto out_iter = find(outputs.begin(), outputs.end(), inplace_map.at(input)); auto out_iter = find(outputs.begin(), outputs.end(), inplace_map.at(input));
PADDLE_ENFORCE( PADDLE_ENFORCE_NE(
out_iter != outputs.end(), out_iter,
outputs.end(),
phi::errors::NotFound("Can't find the mapped value of %s, please check " phi::errors::NotFound("Can't find the mapped value of %s, please check "
"the input of `Inplace` again and make " "the input of `Inplace` again and make "
"sure you registered your op accurately. ", "sure you registered your op accurately. ",
...@@ -253,8 +273,9 @@ void CustomOpKernelContext::AssignInplaceOutputs() { ...@@ -253,8 +273,9 @@ void CustomOpKernelContext::AssignInplaceOutputs() {
size_t out_start_idx = output_range_[pair.second].first; size_t out_start_idx = output_range_[pair.second].first;
size_t out_end_idx = output_range_[pair.second].second; size_t out_end_idx = output_range_[pair.second].second;
size_t assign_tensor_size = in_end_idx - in_start_idx; size_t assign_tensor_size = in_end_idx - in_start_idx;
PADDLE_ENFORCE( PADDLE_ENFORCE_EQ(
assign_tensor_size == out_end_idx - out_start_idx, assign_tensor_size,
out_end_idx - out_start_idx,
phi::errors::OutOfRange("When assigning inplaced tensor, Input vector " phi::errors::OutOfRange("When assigning inplaced tensor, Input vector "
"size %d mismatch output vector size %d", "size %d mismatch output vector size %d",
in_end_idx - in_start_idx, in_end_idx - in_start_idx,
...@@ -316,6 +337,43 @@ OpMetaInfo& OpMetaInfo::SetInferDtypeFn(InferDtypeFunc&& func) { ...@@ -316,6 +337,43 @@ OpMetaInfo& OpMetaInfo::SetInferDtypeFn(InferDtypeFunc&& func) {
return *this; return *this;
} }
//////////////// Op Meta Info Helper /////////////////
const std::string& OpMetaInfoHelper::GetOpName(const paddle::OpMetaInfo& info) {
return info.name_;
}
const std::vector<std::string>& OpMetaInfoHelper::GetInputs(
const paddle::OpMetaInfo& info) {
return info.inputs_;
}
const std::vector<std::string>& OpMetaInfoHelper::GetOutputs(
const paddle::OpMetaInfo& info) {
return info.outputs_;
}
const std::vector<std::string>& OpMetaInfoHelper::GetAttrs(
const paddle::OpMetaInfo& info) {
return info.attrs_;
}
const std::unordered_map<std::string, std::string>&
OpMetaInfoHelper::GetInplaceMap(const paddle::OpMetaInfo& info) {
return info.inplace_map_;
}
const std::unordered_map<std::string, std::string>&
OpMetaInfoHelper::GetInplaceReverseMap(const paddle::OpMetaInfo& info) {
return info.inplace_reverse_map_;
}
const KernelFunc& OpMetaInfoHelper::GetKernelFn(
const paddle::OpMetaInfo& info) {
return info.kernel_fn_;
}
const InferShapeFunc& OpMetaInfoHelper::GetInferShapeFn(
const paddle::OpMetaInfo& info) {
return info.infer_shape_fn_;
}
const InferDtypeFunc& OpMetaInfoHelper::GetInferDtypeFn(
const paddle::OpMetaInfo& info) {
return info.infer_dtype_fn_;
}
//////////////// Op Meta Info Map ///////////////// //////////////// Op Meta Info Map /////////////////
std::vector<OpMetaInfo>& OpMetaInfoMap::operator[](const std::string& name) { std::vector<OpMetaInfo>& OpMetaInfoMap::operator[](const std::string& name) {
...@@ -414,14 +472,16 @@ OpMetaInfoBuilder& OpMetaInfoBuilder::SetInplaceMap( ...@@ -414,14 +472,16 @@ OpMetaInfoBuilder& OpMetaInfoBuilder::SetInplaceMap(
const std::vector<std::string>& outputs = const std::vector<std::string>& outputs =
OpMetaInfoHelper::GetOutputs(*info_ptr_); OpMetaInfoHelper::GetOutputs(*info_ptr_);
for (const auto& pair : inplace_map) { for (const auto& pair : inplace_map) {
PADDLE_ENFORCE( PADDLE_ENFORCE_NE(
std::find(inputs.begin(), inputs.end(), pair.first) != inputs.cend(), std::find(inputs.begin(), inputs.end(), pair.first),
inputs.cend(),
phi::errors::PreconditionNotMet( phi::errors::PreconditionNotMet(
"The register of operator %s's `SetInplaceMap` failed. " "The register of operator %s's `SetInplaceMap` failed. "
"Please make sure: 1. Call `Inputs` and `Outputs` before " "Please make sure: 1. Call `Inputs` and `Outputs` before "
"`SetInplaceMap`; 2. The keys of inplace_map are inside `Inputs`", "`SetInplaceMap`; 2. The keys of inplace_map are inside `Inputs`",
name_)); name_));
PADDLE_ENFORCE(std::find(outputs.begin(), outputs.end(), pair.second) != PADDLE_ENFORCE_NE(
std::find(outputs.begin(), outputs.end(), pair.second),
outputs.cend(), outputs.cend(),
phi::errors::PreconditionNotMet( phi::errors::PreconditionNotMet(
"The register of operator %s's `SetInplaceMap` failed. " "The register of operator %s's `SetInplaceMap` failed. "
......
...@@ -358,6 +358,10 @@ gpuStream_t Tensor::stream() const { ...@@ -358,6 +358,10 @@ gpuStream_t Tensor::stream() const {
} }
#endif #endif
const std::string &Tensor::name() const { return name_; }
void Tensor::set_name(const std::string &name) { name_ = name; }
/* Part 5: Status utils methods */ /* Part 5: Status utils methods */
bool Tensor::defined() const { return impl_ != nullptr; } bool Tensor::defined() const { return impl_ != nullptr; }
......
...@@ -26,16 +26,4 @@ if(WITH_PYTHON AND EXISTS ${PADDLE_BINARY_DIR}) ...@@ -26,16 +26,4 @@ if(WITH_PYTHON AND EXISTS ${PADDLE_BINARY_DIR})
endif() endif()
endif() endif()
if(WITH_GPU OR WITH_ROCM) collect_srcs(api_srcs SRCS device_tracer.cc profiler.cc)
set(GPU_CTX_DEPS dynload_cuda dynamic_loader)
endif()
cc_library(
phi_device_tracer
SRCS device_tracer.cc
DEPS phi_profiler_proto ${GPU_CTX_DEPS})
cc_library(
phi_profiler
SRCS profiler.cc
DEPS phi_os_info phi_device_tracer phi_enforce)
...@@ -2,17 +2,6 @@ add_subdirectory(dynload) ...@@ -2,17 +2,6 @@ add_subdirectory(dynload)
add_subdirectory(gpu) add_subdirectory(gpu)
set(BACKENDS_SRCS all_context.cc cpu/cpu_context.cc cpu/cpu_info.cc) set(BACKENDS_SRCS all_context.cc cpu/cpu_context.cc cpu/cpu_info.cc)
set(BACKENDS_DEPS
enforce
place
flags
eigen3
phi_device_context
generator
phi_os_info)
if(WITH_XBYAK)
list(APPEND BACKENDS_DEPS xbyak)
endif()
if(NOT APPLE AND NOT WIN32) if(NOT APPLE AND NOT WIN32)
list(APPEND BACKENDS_SRCS device_code.cc) list(APPEND BACKENDS_SRCS device_code.cc)
...@@ -23,16 +12,10 @@ if(WITH_GPU OR WITH_ROCM) ...@@ -23,16 +12,10 @@ if(WITH_GPU OR WITH_ROCM)
gpu/gpu_resources.cc) gpu/gpu_resources.cc)
if(WITH_GPU) if(WITH_GPU)
list(APPEND BACKENDS_SRCS gpu/cuda/cuda_info.cc gpu/cuda/cuda_graph.cc) list(APPEND BACKENDS_SRCS gpu/cuda/cuda_info.cc gpu/cuda/cuda_graph.cc)
set_source_files_properties(
gpu/gpu_resources.cc
PROPERTIES COMPILE_FLAGS
"-DCUDA_REAL_ARCHS=\"${NVCC_FLAGS_EXTRA_real_archs}\"")
endif() endif()
if(WITH_ROCM) if(WITH_ROCM)
list(APPEND BACKENDS_SRCS gpu/rocm/rocm_info.cc) list(APPEND BACKENDS_SRCS gpu/rocm/rocm_info.cc)
endif() endif()
list(APPEND BACKENDS_DEPS phi_dynload_cuda)
endif() endif()
if(WITH_XPU) if(WITH_XPU)
...@@ -45,7 +28,6 @@ if(WITH_MKLDNN) ...@@ -45,7 +28,6 @@ if(WITH_MKLDNN)
list(APPEND BACKENDS_SRCS onednn/onednn_context.cc) list(APPEND BACKENDS_SRCS onednn/onednn_context.cc)
list(APPEND BACKENDS_SRCS onednn/axpy_handler.cc) list(APPEND BACKENDS_SRCS onednn/axpy_handler.cc)
list(APPEND BACKENDS_SRCS onednn/matmul_utils.cc) list(APPEND BACKENDS_SRCS onednn/matmul_utils.cc)
list(APPEND BACKENDS_DEPS mkldnn)
endif() endif()
list( list(
...@@ -55,26 +37,25 @@ list( ...@@ -55,26 +37,25 @@ list(
device_guard.cc device_guard.cc
stream.cc stream.cc
event.cc event.cc
device_base.cc
device_manager.cc device_manager.cc
context_pool.cc) context_pool.cc)
if(WITH_GPU
OR WITH_ROCM
OR WITH_CUSTOM_DEVICE)
list(APPEND BACKENDS_SRCS device_base.cc)
endif()
if(WITH_CUSTOM_DEVICE) if(WITH_CUSTOM_DEVICE)
list(APPEND BACKENDS_SRCS custom/custom_context.cc custom/custom_device.cc list(APPEND BACKENDS_SRCS custom/custom_context.cc custom/custom_device.cc
custom/custom_device_op_list.cc) custom/custom_device_op_list.cc)
endif() endif()
add_library(phi_backends "${BACKENDS_SRCS}") collect_srcs(backends_srcs SRCS ${BACKENDS_SRCS})
target_link_libraries(phi_backends ${BACKENDS_DEPS})
# for inference library
get_property(phi_modules GLOBAL PROPERTY PHI_MODULES)
set(phi_modules ${phi_modules} phi_backends)
set_property(GLOBAL PROPERTY PHI_MODULES "${phi_modules}")
if(WITH_CUSTOM_DEVICE) if(WITH_CUSTOM_DEVICE)
cc_test( cc_test(
capi_test capi_test
SRCS custom/capi_test.cc SRCS custom/capi_test.cc
DEPS phi_capi) DEPS phi)
endif() endif()
...@@ -24,6 +24,10 @@ ...@@ -24,6 +24,10 @@
namespace phi { namespace phi {
template <>
const TypeInfo<DeviceContext> TypeInfoTraits<DeviceContext, CPUContext>::kType =
RegisterStaticType<DeviceContext>(CPUContext::name());
struct CPUContext::Impl { struct CPUContext::Impl {
Impl() : place_(CPUPlace()) {} Impl() : place_(CPUPlace()) {}
......
...@@ -19,6 +19,11 @@ limitations under the License. */ ...@@ -19,6 +19,11 @@ limitations under the License. */
namespace phi { namespace phi {
template <>
const TypeInfo<DeviceContext>
TypeInfoTraits<DeviceContext, CustomContext>::kType =
RegisterStaticType<DeviceContext>(CustomContext::name());
struct CustomContext::Impl { struct CustomContext::Impl {
explicit Impl(const CustomPlace& place) : place_(place) {} explicit Impl(const CustomPlace& place) : place_(place) {}
......
cc_library( set(DYNLOAD_COMMON_SRCS dynamic_loader.cc port.cc warpctc.cc warprnnt.cc
phi_dynamic_loader lapack.cc)
SRCS dynamic_loader.cc port.cc if(WITH_ASCEND_CL)
DEPS enforce glog gflags) list(REMOVE_ITEM DYNLOAD_COMMON_SRCS warprnnt.cc)
endif()
list( list(
APPEND APPEND
CUDA_SRCS CUDA_SRCS
...@@ -60,66 +60,39 @@ configure_file(cupti_lib_path.h.in ${CMAKE_CURRENT_BINARY_DIR}/cupti_lib_path.h) ...@@ -60,66 +60,39 @@ configure_file(cupti_lib_path.h.in ${CMAKE_CURRENT_BINARY_DIR}/cupti_lib_path.h)
if(CUPTI_FOUND) if(CUPTI_FOUND)
list(APPEND CUDA_SRCS cupti.cc) list(APPEND CUDA_SRCS cupti.cc)
endif() endif()
if(WITH_ROCM)
hip_library(
phi_dynload_cuda
SRCS ${HIP_SRCS}
DEPS phi_dynamic_loader)
cc_library(
phi_dynload_warpctc
SRCS warpctc.cc
DEPS phi_dynamic_loader warpctc)
cc_library(
phi_dynload_warprnnt
SRCS warprnnt.cc
DEPS phi_dynamic_loader warprnnt)
else()
nv_library(
phi_dynload_cuda
SRCS ${CUDA_SRCS}
DEPS phi_dynamic_loader)
cc_library(
phi_dynload_warpctc
SRCS warpctc.cc
DEPS phi_dynamic_loader warpctc)
cc_library(
phi_dynload_warprnnt
SRCS warprnnt.cc
DEPS phi_dynamic_loader warprnnt)
endif()
if(WITH_MKLML) if(WITH_MKLML)
cc_library( # Only deps libmklml.so, not link
phi_dynload_mklml add_library(dynload_mklml STATIC mklml.cc)
SRCS mklml.cc add_dependencies(dynload_mklml mklml)
DEPS phi_dynamic_loader mklml) if(WIN32)
target_link_libraries(dynload_mklml ${MKLML_IOMP_LIB})
else()
target_link_libraries(dynload_mklml
"-L${MKLML_LIB_DIR} -liomp5 -Wl,--as-needed")
endif()
endif() endif()
if(WITH_FLASHATTN) if(WITH_FLASHATTN)
cc_library( list(APPEND DYNLOAD_COMMON_SRCS flashattn.cc)
phi_dynload_flashattn
SRCS flashattn.cc
DEPS phi_dynamic_loader flashattn)
endif() endif()
cc_library(
phi_dynload_lapack
SRCS lapack.cc
DEPS phi_dynamic_loader)
add_dependencies(phi_dynload_lapack extern_lapack)
# TODO(TJ): add iomp, mkldnn?
if(MKL_FOUND AND WITH_ONEMKL) if(MKL_FOUND AND WITH_ONEMKL)
message("ONEMKL INCLUDE directory is ${MKL_INCLUDE}") message("ONEMKL INCLUDE directory is ${MKL_INCLUDE}")
cc_library( list(APPEND DYNLOAD_COMMON_SRCS mklrt.cc)
phi_dynload_mklrt endif()
SRCS mklrt.cc
DEPS phi_dynamic_loader) if(WITH_ROCM)
target_include_directories(phi_dynload_mklrt PRIVATE ${MKL_INCLUDE}) collect_srcs(backends_srcs SRCS ${DYNLOAD_COMMON_SRCS} ${HIP_SRCS})
elseif(WITH_GPU)
collect_srcs(backends_srcs SRCS ${DYNLOAD_COMMON_SRCS} ${CUDA_SRCS})
else()
collect_srcs(backends_srcs SRCS ${DYNLOAD_COMMON_SRCS})
endif() endif()
if(WITH_CUDNN_FRONTEND) if(WITH_CUDNN_FRONTEND)
nv_test( nv_test(
cudnn_frontend_test cudnn_frontend_test
SRCS cudnn_frontend_test.cc SRCS cudnn_frontend_test.cc
DEPS phi_dynload_cuda cudnn-frontend) DEPS phi cudnn-frontend)
endif() endif()
cc_library(cudnn_workspace_helper SRCS cudnn_workspace_helper.cc) collect_srcs(backends_srcs SRCS cudnn_workspace_helper.cc)
...@@ -59,6 +59,15 @@ limitations under the License. */ ...@@ -59,6 +59,15 @@ limitations under the License. */
namespace phi { namespace phi {
template <>
const TypeInfo<DeviceContext> TypeInfoTraits<DeviceContext, GPUContext>::kType =
RegisterStaticType<DeviceContext>(GPUContext::name());
template <>
const TypeInfo<DeviceContext>
TypeInfoTraits<DeviceContext, GPUPinnedContext>::kType =
RegisterStaticType<DeviceContext>(GPUPinnedContext::name());
namespace internal { namespace internal {
class EigenGpuStreamDevice : public Eigen::StreamInterface { class EigenGpuStreamDevice : public Eigen::StreamInterface {
......
...@@ -15,6 +15,8 @@ limitations under the License. */ ...@@ -15,6 +15,8 @@ limitations under the License. */
#pragma once #pragma once
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#include <array> #include <array>
#include <functional> #include <functional>
#include <mutex> #include <mutex>
...@@ -305,3 +307,5 @@ class GPUPinnedContext ...@@ -305,3 +307,5 @@ class GPUPinnedContext
}; };
#endif #endif
} // namespace phi } // namespace phi
#endif
...@@ -83,6 +83,11 @@ void OneDNNContextThreadLocals::Body::log_lib_version(void) { ...@@ -83,6 +83,11 @@ void OneDNNContextThreadLocals::Body::log_lib_version(void) {
} }
} }
OneDNNContextThreadLocals::Body& OneDNNContextThreadLocals::fetch() {
thread_local Body b;
return b;
}
struct OneDNNContext::Impl { struct OneDNNContext::Impl {
Impl() : p_blobmap_() { Impl() : p_blobmap_() {
p_blobmap_.reset(new BlobMap()); p_blobmap_.reset(new BlobMap());
...@@ -462,5 +467,7 @@ const std::vector<std::string>& OneDNNContext::GetOutputsName( ...@@ -462,5 +467,7 @@ const std::vector<std::string>& OneDNNContext::GetOutputsName(
return impl_->GetOutputsName(output); return impl_->GetOutputsName(output);
} }
const char* OneDNNContext::name() { return "OneDNNContext"; }
} // namespace phi } // namespace phi
#endif #endif
...@@ -76,10 +76,7 @@ class OneDNNContextThreadLocals { ...@@ -76,10 +76,7 @@ class OneDNNContextThreadLocals {
static constexpr size_t kMKLDNNSessionID_Default = 0; static constexpr size_t kMKLDNNSessionID_Default = 0;
// mkldnn session id for cache clearing mode // mkldnn session id for cache clearing mode
static constexpr size_t kMKLDNNSessionID_CacheClearing = -1; static constexpr size_t kMKLDNNSessionID_CacheClearing = -1;
static Body& fetch() { static Body& fetch();
thread_local Body b;
return b;
}
}; };
class OneDNNContext : public CPUContext { class OneDNNContext : public CPUContext {
...@@ -157,7 +154,7 @@ class OneDNNContext : public CPUContext { ...@@ -157,7 +154,7 @@ class OneDNNContext : public CPUContext {
const std::vector<std::string>& GetOutputsName( const std::vector<std::string>& GetOutputsName(
const std::string& output) const; const std::string& output) const;
static const char* name() { return "OneDNNContext"; } static const char* name();
private: private:
struct Impl; struct Impl;
......
...@@ -30,6 +30,9 @@ namespace xpu = baidu::xpu::api; ...@@ -30,6 +30,9 @@ namespace xpu = baidu::xpu::api;
namespace phi { namespace phi {
template <>
const TypeInfo<DeviceContext> TypeInfoTraits<DeviceContext, XPUContext>::kType =
RegisterStaticType<DeviceContext>(XPUContext::name());
struct XPUContext::Impl { struct XPUContext::Impl {
void SetL3Cache(int l3_size = 14155776) { void SetL3Cache(int l3_size = 14155776) {
const int MAX_XPU_NUM = 16; const int MAX_XPU_NUM = 16;
......
add_subdirectory(lib) add_subdirectory(lib)
cc_library(
phi_capi
SRCS all.cc
DEPS phi_c_data_type
phi_c_device_context
phi_c_int_array
phi_c_kernel_context
phi_c_kernel_factory
phi_c_kernel_registry
phi_c_place
phi_c_scalar
phi_c_tensor)
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/phi/capi/all.h"
namespace paddle {
namespace capi {} // namespace capi
} // namespace paddle
cc_library( collect_srcs(
phi_c_data_type capi_srcs
SRCS c_data_type.cc SRCS
DEPS dense_tensor) c_data_type.cc
c_device_context.cc
cc_library( c_int_array.cc
phi_c_device_context c_kernel_context.cc
SRCS c_device_context.cc c_kernel_factory.cc
DEPS phi_backends) c_kernel_registry.cc
c_place.cc
cc_library( c_scalar.cc
phi_c_int_array c_tensor.cc)
SRCS c_int_array.cc
DEPS int_array)
cc_library(
phi_c_kernel_context
SRCS c_kernel_context.cc
DEPS kernel_context)
cc_library(
phi_c_kernel_factory
SRCS c_kernel_factory.cc
DEPS kernel_factory)
cc_library(
phi_c_kernel_registry
SRCS c_kernel_registry.cc
DEPS dense_tensor)
cc_library(
phi_c_place
SRCS c_place.cc
DEPS phi_place)
cc_library(
phi_c_scalar
SRCS c_scalar.cc
DEPS scalar)
cc_library(
phi_c_tensor
SRCS c_tensor.cc
DEPS dense_tensor)
if(WITH_GPU) collect_srcs(common_srcs SRCS place.cc scalar.cc int_array.cc memory_utils.cc)
nv_library(
phi_place
SRCS place.cc
DEPS phi_backends)
elseif(WITH_ROCM)
hip_library(
phi_place
SRCS place.cc
DEPS phi_backends)
else()
cc_library(phi_place SRCS place.cc)
endif()
cc_library(
scalar
SRCS scalar.cc
DEPS phi_enforce phi_tensor_utils)
cc_library(
int_array
SRCS int_array.cc
DEPS phi_enforce phi_tensor_utils)
cc_library(
memory_utils
SRCS memory_utils.cc
DEPS phi_enforce phi_place)
...@@ -6,150 +6,35 @@ if(WITH_GPU) ...@@ -6,150 +6,35 @@ if(WITH_GPU)
proto_library(external_error_proto SRCS external_error.proto) proto_library(external_error_proto SRCS external_error.proto)
endif() endif()
cc_library( collect_srcs(
flags core_srcs
SRCS flags.cc SRCS
DEPS gflags) flags.cc
errors.cc
cc_library(errors SRCS errors.cc) enforce.cc
set(phi_enforce_deps errors flags) os_info.cc
if(WITH_GPU) kernel_context.cc
set(phi_enforce_deps ${phi_enforce_deps} external_error_proto) ddim.cc
endif() tensor_base.cc
cc_library( allocator.cc
phi_enforce tensor_meta.cc
SRCS enforce.cc lod_utils.cc
DEPS ${phi_enforce_deps}) threadpool.cc
dense_tensor.cc
cc_library( dense_tensor_impl.cc
phi_os_info sparse_coo_tensor.cc
SRCS os_info.cc sparse_csr_tensor.cc
DEPS phi_enforce) string_tensor.cc
tensor_array.cc
if(WITH_XPU) extended_tensor.cc
cc_library( meta_tensor.cc
kernel_factory infermeta_utils.cc
SRCS kernel_factory.cc selected_rows_impl.cc
DEPS phi_enforce convert_utils phi_backends) selected_rows.cc
else() device_context.cc
cc_library( custom_kernel.cc
kernel_factory mixed_vector.cc
SRCS kernel_factory.cc generator.cc
DEPS phi_enforce convert_utils) kernel_factory.cc
endif() tensor_utils.cc
cc_library( storage_properties.cc)
kernel_context
SRCS kernel_context.cc
DEPS phi_enforce phi_backends)
cc_library(
ddim
SRCS ddim.cc
DEPS phi_enforce)
cc_library(
tensor_base
SRCS tensor_base.cc allocator.cc
DEPS phi_enforce)
cc_library(
tensor_meta
SRCS tensor_meta.cc
DEPS phi_enforce)
cc_library(
lod_utils
SRCS lod_utils.cc
DEPS phi_enforce)
cc_library(
threadpool
SRCS threadpool.cc
DEPS phi_enforce)
cc_library(
dense_tensor
SRCS dense_tensor.cc dense_tensor_impl.cc
DEPS convert_utils tensor_meta tensor_base ddim)
target_link_libraries(dense_tensor memory_utils)
cc_library(
sparse_coo_tensor
SRCS sparse_coo_tensor.cc
DEPS tensor_meta tensor_base)
cc_library(
sparse_csr_tensor
SRCS sparse_csr_tensor.cc
DEPS dense_tensor tensor_base)
cc_library(
string_tensor
SRCS string_tensor.cc
DEPS convert_utils tensor_meta tensor_base)
cc_library(
tensor_array
SRCS tensor_array.cc
DEPS dense_tensor tensor_base)
cc_library(
extended_tensor
SRCS extended_tensor.cc
DEPS tensor_base)
cc_library(
meta_tensor
SRCS meta_tensor.cc
DEPS tensor_base tensor_meta dense_tensor)
cc_library(
infermeta_utils
SRCS infermeta_utils.cc
DEPS meta_tensor)
cc_library(
selected_rows
SRCS selected_rows_impl.cc selected_rows.cc
DEPS tensor_base dense_tensor phi_enforce ddim)
cc_library(
phi_device_context
SRCS device_context.cc
DEPS dense_tensor selected_rows)
cc_library(
custom_kernel
SRCS custom_kernel.cc
DEPS kernel_factory)
cc_library(
mixed_vector
SRCS mixed_vector.cc
DEPS phi_backends place memory)
cc_library(
generator
SRCS generator.cc
DEPS enforce place)
# Will remove once we implemented MKLDNN_Tensor
if(WITH_MKLDNN)
add_dependencies(dense_tensor mkldnn)
add_dependencies(tensor_base mkldnn)
endif()
if(WITH_GPU)
nv_library(
phi_tensor_utils
SRCS tensor_utils.cc
DEPS phi_backends dense_tensor selected_rows memcpy memory_utils)
elseif(WITH_ROCM)
hip_library(
phi_tensor_utils
SRCS tensor_utils.cc
DEPS phi_backends dense_tensor selected_rows memcpy memory_utils)
elseif(WITH_XPU_KP)
xpu_library(
phi_tensor_utils
SRCS tensor_utils.cc
DEPS phi_backends dense_tensor selected_rows memcpy memory_utils)
else()
cc_library(
phi_tensor_utils
SRCS tensor_utils.cc
DEPS dense_tensor selected_rows memcpy phi_backends memory_utils)
endif()
cc_library( collect_srcs(core_srcs SRCS arg_map_context.cc op_utils.cc
arg_map_context get_kerneltype_forvar_utils.cc convert_utils.cc)
SRCS arg_map_context.cc
DEPS phi_enforce)
cc_library(
op_utils
SRCS op_utils.cc
DEPS arg_map_context enforce)
cc_library(
get_kerneltype_forvar_utils
SRCS get_kerneltype_forvar_utils.cc
DEPS enforce)
set(convert_utils_deps data_type place op_utils phi_backends)
if(WITH_MKLDNN)
set(convert_utils_deps ${convert_utils_deps} mkldnn)
endif()
cc_library(
convert_utils
SRCS convert_utils.cc
DEPS ${convert_utils_deps})
...@@ -26,4 +26,16 @@ OpUtilsMap& OpUtilsMap::Instance() { ...@@ -26,4 +26,16 @@ OpUtilsMap& OpUtilsMap::Instance() {
return g_op_utils_map; return g_op_utils_map;
} }
BaseKernelNameRegistrar::BaseKernelNameRegistrar(const char* op_type,
const char* base_kernel_name) {
OpUtilsMap::Instance().InsertBaseKernelName(op_type, base_kernel_name);
OpUtilsMap::Instance().InsertFluidOplName(op_type, base_kernel_name);
}
ArgumentMappingFnRegistrar::ArgumentMappingFnRegistrar(
const char* op_type, ArgumentMappingFn arg_mapping_fn) {
OpUtilsMap::Instance().InsertArgumentMappingFn(op_type,
std::move(arg_mapping_fn));
}
} // namespace phi } // namespace phi
...@@ -210,18 +210,12 @@ class OpUtilsMap { ...@@ -210,18 +210,12 @@ class OpUtilsMap {
}; };
struct BaseKernelNameRegistrar { struct BaseKernelNameRegistrar {
BaseKernelNameRegistrar(const char* op_type, const char* base_kernel_name) { BaseKernelNameRegistrar(const char* op_type, const char* base_kernel_name);
OpUtilsMap::Instance().InsertBaseKernelName(op_type, base_kernel_name);
OpUtilsMap::Instance().InsertFluidOplName(op_type, base_kernel_name);
}
}; };
struct ArgumentMappingFnRegistrar { struct ArgumentMappingFnRegistrar {
ArgumentMappingFnRegistrar(const char* op_type, ArgumentMappingFnRegistrar(const char* op_type,
ArgumentMappingFn arg_mapping_fn) { ArgumentMappingFn arg_mapping_fn);
OpUtilsMap::Instance().InsertArgumentMappingFn(op_type,
std::move(arg_mapping_fn));
}
}; };
#define PD_REGISTER_BASE_KERNEL_NAME(op_type, base_kernel_name) \ #define PD_REGISTER_BASE_KERNEL_NAME(op_type, base_kernel_name) \
......
...@@ -42,6 +42,11 @@ limitations under the License. */ ...@@ -42,6 +42,11 @@ limitations under the License. */
namespace phi { namespace phi {
template <>
const TypeInfo<phi::TensorBase>
TypeInfoTraits<phi::TensorBase, DenseTensor>::kType =
RegisterStaticType<phi::TensorBase>(DenseTensor::name());
DenseTensor::DenseTensor(Allocator* a, const DenseTensorMeta& meta) DenseTensor::DenseTensor(Allocator* a, const DenseTensorMeta& meta)
: meta_(meta), holder_(a->Allocate(SizeOf(dtype()) * numel())) {} : meta_(meta), holder_(a->Allocate(SizeOf(dtype()) * numel())) {}
...@@ -115,8 +120,9 @@ void* DenseTensor::AllocateFrom(Allocator* allocator, ...@@ -115,8 +120,9 @@ void* DenseTensor::AllocateFrom(Allocator* allocator,
if (fake_alloc) { if (fake_alloc) {
bytes = 0; bytes = 0;
} else { } else {
PADDLE_ENFORCE( PADDLE_ENFORCE_EQ(
valid(), valid(),
true,
phi::errors::PreconditionNotMet("The meta data must be valid when " phi::errors::PreconditionNotMet("The meta data must be valid when "
"call the mutable data function.")); "call the mutable data function."));
if (requested_size) { if (requested_size) {
...@@ -169,8 +175,9 @@ const T* DenseTensor::data() const { ...@@ -169,8 +175,9 @@ const T* DenseTensor::data() const {
template <typename T> template <typename T>
T* DenseTensor::data() { T* DenseTensor::data() {
T* ret = static_cast<T*>(data()); T* ret = static_cast<T*>(data());
PADDLE_ENFORCE( PADDLE_ENFORCE_EQ(
(dtype() == phi::CppTypeToDataType<T>::Type()), dtype(),
phi::CppTypeToDataType<T>::Type(),
phi::errors::InvalidArgument( phi::errors::InvalidArgument(
"The type of data we are trying to retrieve (%s) does not match the " "The type of data we are trying to retrieve (%s) does not match the "
"type of data (%s) currently contained in the container.", "type of data (%s) currently contained in the container.",
...@@ -200,7 +207,8 @@ const void* DenseTensor::data() const { ...@@ -200,7 +207,8 @@ const void* DenseTensor::data() const {
} }
void DenseTensor::set_meta(DenseTensorMeta&& meta) { void DenseTensor::set_meta(DenseTensorMeta&& meta) {
PADDLE_ENFORCE(!meta_.valid(), PADDLE_ENFORCE_EQ(meta_.valid(),
false,
phi::errors::InvalidArgument( phi::errors::InvalidArgument(
"Only when the original attribute of Tensor is " "Only when the original attribute of Tensor is "
"incomplete, can it be reset.")); "incomplete, can it be reset."));
...@@ -208,8 +216,9 @@ void DenseTensor::set_meta(DenseTensorMeta&& meta) { ...@@ -208,8 +216,9 @@ void DenseTensor::set_meta(DenseTensorMeta&& meta) {
} }
void DenseTensor::set_meta(const DenseTensorMeta& meta) { void DenseTensor::set_meta(const DenseTensorMeta& meta) {
PADDLE_ENFORCE( PADDLE_ENFORCE_EQ(
meta.valid(), meta.valid(),
true,
phi::errors::InvalidArgument( phi::errors::InvalidArgument(
"Input meta is invalid, please check the meta attribute.")); "Input meta is invalid, please check the meta attribute."));
meta_.dims = meta.dims; meta_.dims = meta.dims;
......
...@@ -2,32 +2,14 @@ add_subdirectory(check) ...@@ -2,32 +2,14 @@ add_subdirectory(check)
add_subdirectory(store) add_subdirectory(store)
add_subdirectory(auto_parallel) add_subdirectory(auto_parallel)
set(COMM_CONTEXT_MANAGER_DEPS tcp_store) set(DISTRIBUTED_COMMON_SRCS comm_context_manager.cc)
if(WITH_NCCL OR WITH_RCCL) if(WITH_NCCL OR WITH_RCCL)
cc_library( list(APPEND DISTRIBUTED_COMMON_SRCS nccl_comm_context.cc)
nccl_comm_context
SRCS nccl_comm_context.cc
DEPS dense_tensor comm_static_check nccl_dynamic_check)
list(APPEND COMM_CONTEXT_MANAGER_DEPS nccl_comm_context)
endif() endif()
if(WITH_GLOO) if(WITH_GLOO)
cc_library( list(APPEND DISTRIBUTED_COMMON_SRCS gloo_utils.cc gloo_comm_context.cc)
gloo_utils
SRCS gloo_utils.cc
DEPS gloo dense_tensor enforce tcp_store)
cc_library(
gloo_comm_context
SRCS gloo_comm_context.cc
DEPS gloo_utils comm_static_check)
list(APPEND COMM_CONTEXT_MANAGER_DEPS gloo_comm_context gloo_store)
endif() endif()
cc_library( collect_srcs(core_srcs SRCS ${DISTRIBUTED_COMMON_SRCS})
comm_context_manager
SRCS comm_context_manager.cc
DEPS ${COMM_CONTEXT_MANAGER_DEPS})
proto_library(auto_parallel_proto SRCS auto_parallel.proto) proto_library(auto_parallel_proto SRCS auto_parallel.proto)
cc_library( collect_srcs(core_srcs SRCS device_mesh.cc process_mesh.cc dist_attr.cc
device_mesh dist_mapper.cc)
SRCS device_mesh.cc
DEPS auto_parallel_proto phi_enforce)
cc_library(
process_mesh
SRCS process_mesh.cc
DEPS auto_parallel_proto phi_enforce)
cc_library(
dist_attr
SRCS dist_attr.cc
DEPS process_mesh auto_parallel_proto proto_desc phi_enforce)
cc_library(
dist_mapper
SRCS dist_mapper.cc
DEPS device_mesh auto_parallel_proto phi_enforce)
cc_library(auto_parallel DEPS device_mesh process_mesh dist_attr dist_mapper)
cc_library( set(CHECK_COMMON_SRCS static_check.cc)
comm_static_check
SRCS static_check.cc
DEPS place dense_tensor enforce)
if(WITH_NCCL OR WITH_RCCL) if(WITH_NCCL OR WITH_RCCL)
cc_library( list(APPEND CHECK_COMMON_SRCS nccl_dynamic_check.cc)
nccl_dynamic_check
SRCS nccl_dynamic_check.cc
DEPS dense_tensor)
endif() endif()
collect_srcs(core_srcs SRCS ${CHECK_COMMON_SRCS})
cc_library( set(STORE_COMMON_SRCS tcp_store.cc tcp_utils.cc socket.cpp store.cc)
tcp_store
SRCS tcp_store.cc tcp_utils.cc socket.cpp store.cc
DEPS enforce glog)
if(WITH_GLOO) if(WITH_GLOO)
cc_library( list(APPEND STORE_COMMON_SRCS gloo_store.cc)
gloo_store
SRCS gloo_store.cc
DEPS gloo)
endif() endif()
collect_srcs(core_srcs SRCS ${STORE_COMMON_SRCS})
if(NOT WIN32) if(NOT WIN32)
cc_test( cc_test(
test_c_tcp_store test_c_tcp_store
SRCS test_tcp_store.cc SRCS test_tcp_store.cc
DEPS tcp_store) DEPS phi)
endif() endif()
...@@ -139,7 +139,8 @@ void MasterDaemon::StopByControlFd() { ...@@ -139,7 +139,8 @@ void MasterDaemon::StopByControlFd() {
#else #else
void MasterDaemon::InitControlFd() { void MasterDaemon::InitControlFd() {
ghStopEvent_ = CreateEvent(NULL, TRUE, FALSE, NULL); ghStopEvent_ = CreateEvent(NULL, TRUE, FALSE, NULL);
PADDLE_ENFORCE(ghStopEvent_, PADDLE_ENFORCE_NE(ghStopEvent_,
nullptr,
phi::errors::Fatal("failed to cread control pipe")); phi::errors::Fatal("failed to cread control pipe"));
} }
void MasterDaemon::CloseControlFd() { CloseHandle(ghStopEvent_); } void MasterDaemon::CloseControlFd() { CloseHandle(ghStopEvent_); }
...@@ -422,8 +423,9 @@ void TCPStore::wait(const std::string& key) { ...@@ -422,8 +423,9 @@ void TCPStore::wait(const std::string& key) {
VLOG(3) << "TCPStore wait."; VLOG(3) << "TCPStore wait.";
_client->send_command_for_key(Command::WAIT, _key_prefix + key); _client->send_command_for_key(Command::WAIT, _key_prefix + key);
reply = _client->receive_value<ReplyType>(); reply = _client->receive_value<ReplyType>();
PADDLE_ENFORCE( PADDLE_ENFORCE_EQ(
reply == ReplyType::STOP_WAIT, reply == ReplyType::STOP_WAIT,
true,
phi::errors::InvalidArgument("Stop_waiting response is expected")); phi::errors::InvalidArgument("Stop_waiting response is expected"));
} }
......
...@@ -280,13 +280,19 @@ std::string GetExternalErrorMsg(T status) { ...@@ -280,13 +280,19 @@ std::string GetExternalErrorMsg(T status) {
if (std::string::npos != last_slash_idx) { if (std::string::npos != last_slash_idx) {
strModule.erase(last_slash_idx, std::string::npos); strModule.erase(last_slash_idx, std::string::npos);
} }
if (compare_path.compare("avx.so") == 0) { // TODO(lizhiyu02): I don't know what the 'compare_path.compare("avx.so")
// == 0' means, while
// 'compare_path.find("dist-packages") != std::string::npos' means that
// after using 'pip install paddle'.
if (compare_path.compare("avx.so") == 0 ||
strModule.find("dist-packages") != std::string::npos) {
filePath = filePath =
strModule + strModule +
"/../include/third_party/externalError/data/externalErrorMsg.pb"; "/../include/third_party/externalError/data/externalErrorMsg.pb";
} else { } else {
// Just for unittest
filePath = strModule + filePath = strModule +
"/../../third_party/externalError/data/externalErrorMsg.pb"; "/../third_party/externalError/data/externalErrorMsg.pb";
} }
} }
#else #else
...@@ -303,14 +309,14 @@ std::string GetExternalErrorMsg(T status) { ...@@ -303,14 +309,14 @@ std::string GetExternalErrorMsg(T status) {
if (std::string::npos != last_slash_idx) { if (std::string::npos != last_slash_idx) {
strModule.erase(last_slash_idx, std::string::npos); strModule.erase(last_slash_idx, std::string::npos);
} }
if (compare_path.compare("avx.pyd") == 0) { if (strModule.find("dist-packages") != std::string::npos) {
filePath = strModule + filePath = strModule +
"\\..\\include\\third_" "\\..\\include\\third_"
"party\\externalerror\\data\\externalErrorMsg.pb"; "party\\externalerror\\data\\externalErrorMsg.pb";
} else { } else {
filePath = filePath = strModule +
strModule + "\\..\\..\\third_party"
"\\..\\..\\third_party\\externalerror\\data\\externalErrorMsg.pb"; "\\externalerror\\data\\externalErrorMsg.pb";
} }
#endif #endif
std::ifstream fin(filePath, std::ios::in | std::ios::binary); std::ifstream fin(filePath, std::ios::in | std::ios::binary);
......
...@@ -24,7 +24,7 @@ ...@@ -24,7 +24,7 @@
#include "paddle/utils/variant.h" #include "paddle/utils/variant.h"
#if defined(_WIN32) && defined(BUILD_PHI_SHARED) #if defined(_WIN32)
#define PHI_EXPORT_FLAG __declspec(dllexport) #define PHI_EXPORT_FLAG __declspec(dllexport)
#define PHI_IMPORT_FLAG __declspec(dllimport) #define PHI_IMPORT_FLAG __declspec(dllimport)
#else #else
......
...@@ -32,8 +32,9 @@ LoD ToAbsOffset(const LoD &in) { ...@@ -32,8 +32,9 @@ LoD ToAbsOffset(const LoD &in) {
} }
void AppendLoD(LoD *lod, const LoD &lod_length) { void AppendLoD(LoD *lod, const LoD &lod_length) {
PADDLE_ENFORCE( PADDLE_ENFORCE_EQ(
lod->empty() || lod->size() == lod_length.size(), (lod->empty() || lod->size() == lod_length.size()),
true,
phi::errors::InvalidArgument( phi::errors::InvalidArgument(
"The input LoD length should be equal to the appended LoD size, but " "The input LoD length should be equal to the appended LoD size, but "
"received input LoD length is %d, actual LoD size is %d.", "received input LoD length is %d, actual LoD size is %d.",
......
...@@ -16,6 +16,11 @@ limitations under the License. */ ...@@ -16,6 +16,11 @@ limitations under the License. */
namespace phi { namespace phi {
template <>
const TypeInfo<phi::TensorBase>
TypeInfoTraits<phi::TensorBase, SelectedRows>::kType =
RegisterStaticType<phi::TensorBase>(SelectedRows::name());
SelectedRows::SelectedRows(const std::vector<int64_t>& rows, SelectedRows::SelectedRows(const std::vector<int64_t>& rows,
const int64_t& height) const int64_t& height)
: impl_(std::make_shared<phi::SelectedRowsImpl>(rows, height)) {} : impl_(std::make_shared<phi::SelectedRowsImpl>(rows, height)) {}
......
...@@ -16,6 +16,11 @@ limitations under the License. */ ...@@ -16,6 +16,11 @@ limitations under the License. */
namespace phi { namespace phi {
template <>
const TypeInfo<phi::TensorBase>
TypeInfoTraits<phi::TensorBase, SparseCooTensor>::kType =
RegisterStaticType<phi::TensorBase>(SparseCooTensor::name());
SparseCooTensor::SparseCooTensor() { SparseCooTensor::SparseCooTensor() {
DenseTensor non_zero_indices, non_zero_elements; DenseTensor non_zero_indices, non_zero_elements;
this->SetMember(non_zero_indices, non_zero_elements, {1}, true); this->SetMember(non_zero_indices, non_zero_elements, {1}, true);
...@@ -155,7 +160,8 @@ int32_t SparseCooTensor::dense_dim() const { ...@@ -155,7 +160,8 @@ int32_t SparseCooTensor::dense_dim() const {
} }
void SparseCooTensor::set_meta(SparseTensorMeta&& meta) { void SparseCooTensor::set_meta(SparseTensorMeta&& meta) {
PADDLE_ENFORCE(!meta_.valid(), PADDLE_ENFORCE_EQ(meta_.valid(),
false,
phi::errors::InvalidArgument( phi::errors::InvalidArgument(
"Only when the original attribute of Tensor is " "Only when the original attribute of Tensor is "
"incomplete, can it be reset.")); "incomplete, can it be reset."));
...@@ -163,8 +169,9 @@ void SparseCooTensor::set_meta(SparseTensorMeta&& meta) { ...@@ -163,8 +169,9 @@ void SparseCooTensor::set_meta(SparseTensorMeta&& meta) {
} }
void SparseCooTensor::set_meta(const SparseTensorMeta& meta) { void SparseCooTensor::set_meta(const SparseTensorMeta& meta) {
PADDLE_ENFORCE( PADDLE_ENFORCE_EQ(
meta.valid(), meta.valid(),
true,
phi::errors::InvalidArgument( phi::errors::InvalidArgument(
"Input meta is invalid, please check the meta attribute.")); "Input meta is invalid, please check the meta attribute."));
meta_.dims = meta.dims; meta_.dims = meta.dims;
......
...@@ -16,6 +16,11 @@ limitations under the License. */ ...@@ -16,6 +16,11 @@ limitations under the License. */
namespace phi { namespace phi {
template <>
const TypeInfo<phi::TensorBase>
TypeInfoTraits<phi::TensorBase, SparseCsrTensor>::kType =
RegisterStaticType<phi::TensorBase>(SparseCsrTensor::name());
SparseCsrTensor::SparseCsrTensor() { SparseCsrTensor::SparseCsrTensor() {
DenseTensor crows, cols, values; DenseTensor crows, cols, values;
this->non_zero_crows_ = crows; this->non_zero_crows_ = crows;
...@@ -26,8 +31,9 @@ SparseCsrTensor::SparseCsrTensor() { ...@@ -26,8 +31,9 @@ SparseCsrTensor::SparseCsrTensor() {
inline void check_shape(const DDim& dims) { inline void check_shape(const DDim& dims) {
bool valid = dims.size() == 2 || dims.size() == 3; bool valid = dims.size() == 2 || dims.size() == 3;
PADDLE_ENFORCE( PADDLE_ENFORCE_EQ(
valid, valid,
true,
phi::errors::InvalidArgument("the SparseCsrTensor only support 2-D or " phi::errors::InvalidArgument("the SparseCsrTensor only support 2-D or "
"3-D Tensor, but get %d-D Tensor", "3-D Tensor, but get %d-D Tensor",
dims.size())); dims.size()));
...@@ -96,7 +102,9 @@ void SparseCsrTensor::set_layout(const DataLayout layout) { ...@@ -96,7 +102,9 @@ void SparseCsrTensor::set_layout(const DataLayout layout) {
void SparseCsrTensor::Resize(const DDim& dense_dims, void SparseCsrTensor::Resize(const DDim& dense_dims,
const int64_t non_zero_num) { const int64_t non_zero_num) {
PADDLE_ENFORCE(this->initialized(), PADDLE_ENFORCE_EQ(
this->initialized(),
true,
phi::errors::InvalidArgument( phi::errors::InvalidArgument(
"the SparseCsrTensor must be initialized when call Resize " "the SparseCsrTensor must be initialized when call Resize "
"function.")); "function."));
...@@ -139,7 +147,8 @@ void SparseCsrTensor::SetMember(const DenseTensor& non_zero_crows, ...@@ -139,7 +147,8 @@ void SparseCsrTensor::SetMember(const DenseTensor& non_zero_crows,
} }
void SparseCsrTensor::set_meta(SparseTensorMeta&& meta) { void SparseCsrTensor::set_meta(SparseTensorMeta&& meta) {
PADDLE_ENFORCE(!meta_.valid(), PADDLE_ENFORCE_EQ(meta_.valid(),
false,
phi::errors::InvalidArgument( phi::errors::InvalidArgument(
"Only when the original attribute of Tensor is " "Only when the original attribute of Tensor is "
"incomplete, can it be reset.")); "incomplete, can it be reset."));
...@@ -147,8 +156,9 @@ void SparseCsrTensor::set_meta(SparseTensorMeta&& meta) { ...@@ -147,8 +156,9 @@ void SparseCsrTensor::set_meta(SparseTensorMeta&& meta) {
} }
void SparseCsrTensor::set_meta(const SparseTensorMeta& meta) { void SparseCsrTensor::set_meta(const SparseTensorMeta& meta) {
PADDLE_ENFORCE( PADDLE_ENFORCE_EQ(
meta.valid(), meta.valid(),
true,
phi::errors::InvalidArgument( phi::errors::InvalidArgument(
"Input meta is invalid, please check the meta attribute.")); "Input meta is invalid, please check the meta attribute."));
meta_.dims = meta.dims; meta_.dims = meta.dims;
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. /* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License"); Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License. you may not use this file except in compliance with the License.
...@@ -12,21 +12,21 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,21 +12,21 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/platform/dynload/mklml.h" #include "paddle/phi/core/storage_properties.h"
namespace paddle { namespace phi {
namespace platform {
namespace dynload {
#define DEFINE_WRAP(__name) DynLoad__##__name __name #ifdef PADDLE_WITH_MKLDNN
template <>
const TypeInfo<StorageProperties>
TypeInfoTraits<StorageProperties, OneDNNStorageProperties>::kType =
RegisterStaticType<StorageProperties>(OneDNNStorageProperties::name());
MKLML_ROUTINE_EACH(DEFINE_WRAP);
#if !defined(_WIN32)
DEFINE_WRAP(mkl_scsrmm);
DEFINE_WRAP(mkl_dcsrmm);
#endif #endif
} // namespace dynload template <>
} // namespace platform const TypeInfo<StorageProperties>
} // namespace paddle TypeInfoTraits<StorageProperties, NPUStorageProperties>::kType =
RegisterStaticType<StorageProperties>(NPUStorageProperties::name());
} // namespace phi
...@@ -21,6 +21,11 @@ limitations under the License. */ ...@@ -21,6 +21,11 @@ limitations under the License. */
namespace phi { namespace phi {
template <>
const TypeInfo<phi::TensorBase>
TypeInfoTraits<phi::TensorBase, StringTensor>::kType =
RegisterStaticType<phi::TensorBase>(StringTensor::name());
StringTensor::StringTensor() { meta_.offset = 0; } StringTensor::StringTensor() { meta_.offset = 0; }
StringTensor::StringTensor(Allocator* a, const StringTensorMeta& meta) StringTensor::StringTensor(Allocator* a, const StringTensorMeta& meta)
...@@ -91,8 +96,9 @@ dtype::pstring* StringTensor::data() { ...@@ -91,8 +96,9 @@ dtype::pstring* StringTensor::data() {
} }
void StringTensor::set_meta(const StringTensorMeta& meta) { void StringTensor::set_meta(const StringTensorMeta& meta) {
PADDLE_ENFORCE( PADDLE_ENFORCE_EQ(
meta.valid(), meta.valid(),
true,
phi::errors::InvalidArgument( phi::errors::InvalidArgument(
"Input meta is invalid, please check the meta attribute.")); "Input meta is invalid, please check the meta attribute."));
meta_.dims = meta.dims; meta_.dims = meta.dims;
...@@ -143,8 +149,9 @@ void* StringTensor::AllocateFrom(Allocator* allocator, ...@@ -143,8 +149,9 @@ void* StringTensor::AllocateFrom(Allocator* allocator,
if (fake_alloc) { if (fake_alloc) {
bytes = 0; bytes = 0;
} else { } else {
PADDLE_ENFORCE( PADDLE_ENFORCE_EQ(
valid(), valid(),
true,
errors::PreconditionNotMet("The meta data must be valid when call the " errors::PreconditionNotMet("The meta data must be valid when call the "
"mutable data function.")); "mutable data function."));
if (requested_size) { if (requested_size) {
......
...@@ -16,6 +16,11 @@ limitations under the License. */ ...@@ -16,6 +16,11 @@ limitations under the License. */
namespace phi { namespace phi {
template <>
const TypeInfo<phi::TensorBase>
TypeInfoTraits<phi::TensorBase, TensorArray>::kType =
RegisterStaticType<phi::TensorBase>(TensorArray::name());
TensorArray::TensorArray(const std::vector<DenseTensor>& vec) { TensorArray::TensorArray(const std::vector<DenseTensor>& vec) {
tensors_ = vec; tensors_ = vec;
} }
......
...@@ -52,8 +52,4 @@ class TypeInfoTraits { ...@@ -52,8 +52,4 @@ class TypeInfoTraits {
template <typename BaseT> template <typename BaseT>
TypeInfo<BaseT> RegisterStaticType(const std::string& type); TypeInfo<BaseT> RegisterStaticType(const std::string& type);
template <typename BaseT, typename DerivedT>
const TypeInfo<BaseT> TypeInfoTraits<BaseT, DerivedT>::kType =
RegisterStaticType<BaseT>(DerivedT::name());
} // namespace phi } // namespace phi
cc_library(
infermeta
SRCS nullary.cc unary.cc binary.cc ternary.cc multiary.cc fusion.cc
DEPS convert_utils meta_tensor infermeta_utils xxhash)
cc_library(
backward_infermeta
SRCS backward.cc
DEPS meta_tensor convert_utils)
add_subdirectory(strings) add_subdirectory(strings)
add_subdirectory(sparse) add_subdirectory(sparse)
collect_srcs(
infermeta_srcs
SRCS
nullary.cc
unary.cc
binary.cc
ternary.cc
multiary.cc
fusion.cc
backward.cc)
...@@ -1668,9 +1668,10 @@ static void Interpolate2DInferShapeCheck( ...@@ -1668,9 +1668,10 @@ static void Interpolate2DInferShapeCheck(
MetaConfig config) { MetaConfig config) {
auto dim_x = x.dims(); auto dim_x = x.dims();
PADDLE_ENFORCE( PADDLE_ENFORCE_EQ(
"bilinear" == interp_method || "nearest" == interp_method || ("bilinear" == interp_method || "nearest" == interp_method ||
"bicubic" == interp_method, "bicubic" == interp_method),
true,
phi::errors::InvalidArgument( phi::errors::InvalidArgument(
"Interpolation method can only be \"bilinear\" or \"nearest\" when " "Interpolation method can only be \"bilinear\" or \"nearest\" when "
"Input(X) dimension is 4, but got method = %s.", "Input(X) dimension is 4, but got method = %s.",
...@@ -1818,7 +1819,9 @@ static void Interpolate3DInferShapeCheck( ...@@ -1818,7 +1819,9 @@ static void Interpolate3DInferShapeCheck(
MetaConfig config) { MetaConfig config) {
auto dim_x = x.dims(); auto dim_x = x.dims();
PADDLE_ENFORCE("nearest" == interp_method || "trilinear" == interp_method, PADDLE_ENFORCE_EQ(
("nearest" == interp_method || "trilinear" == interp_method),
true,
phi::errors::InvalidArgument( phi::errors::InvalidArgument(
"Interpolation method can only be \"trilinear\" or " "Interpolation method can only be \"trilinear\" or "
"\"nearest\" when Input(X) " "\"nearest\" when Input(X) "
...@@ -1972,8 +1975,9 @@ void InterpolateInferMeta( ...@@ -1972,8 +1975,9 @@ void InterpolateInferMeta(
MetaTensor* output, MetaTensor* output,
MetaConfig config) { MetaConfig config) {
auto dim_x = x.dims(); // NCHW format auto dim_x = x.dims(); // NCHW format
PADDLE_ENFORCE( PADDLE_ENFORCE_EQ(
dim_x.size() == 3 || dim_x.size() == 4 || dim_x.size() == 5, (dim_x.size() == 3 || dim_x.size() == 4 || dim_x.size() == 5),
true,
phi::errors::Unimplemented( phi::errors::Unimplemented(
"Input(X) dimension must be 3, 4 or 5, but got dimension = %d .", "Input(X) dimension must be 3, 4 or 5, but got dimension = %d .",
dim_x.size())); dim_x.size()));
......
cc_library( collect_srcs(infermeta_srcs SRCS unary.cc binary.cc multiary.cc backward.cc)
sparse_infermeta
SRCS unary.cc binary.cc multiary.cc
DEPS convert_utils infermeta_utils)
cc_library(
sparse_backward_infermeta
SRCS backward.cc
DEPS meta_tensor convert_utils)
cc_library( collect_srcs(infermeta_srcs SRCS nullary.cc unary.cc)
string_infermeta
SRCS nullary.cc unary.cc
DEPS convert_utils infermeta_utils)
...@@ -2088,7 +2088,9 @@ void MaxPoolWithIndexInferMeta(const MetaTensor& x, ...@@ -2088,7 +2088,9 @@ void MaxPoolWithIndexInferMeta(const MetaTensor& x,
auto x_dims = x.dims(); auto x_dims = x.dims();
PADDLE_ENFORCE(x_dims.size() == 4 || x_dims.size() == 5, PADDLE_ENFORCE_EQ(
(x_dims.size() == 4 || x_dims.size() == 5),
true,
errors::InvalidArgument("Pooling intput should be 4-D or " errors::InvalidArgument("Pooling intput should be 4-D or "
"5-D tensor but received %dD-Tensor", "5-D tensor but received %dD-Tensor",
x_dims.size())); x_dims.size()));
...@@ -4430,11 +4432,11 @@ void TransposeInferMeta(const MetaTensor& x, ...@@ -4430,11 +4432,11 @@ void TransposeInferMeta(const MetaTensor& x,
// Note: x_rank > axis_size when fuse squeeze2 + transpose2, else x_rank == // Note: x_rank > axis_size when fuse squeeze2 + transpose2, else x_rank ==
// axis_size // axis_size
PADDLE_ENFORCE_GE( PADDLE_ENFORCE_GE(x_rank,
x_rank,
axis_size, axis_size,
errors::InvalidArgument("The input tensor's dimension " errors::InvalidArgument(
"should be equal to the axis's size. " "The input tensor's dimension "
"should be equal to or greater than the axis's size. "
"But received input tensor's dimension is %d, " "But received input tensor's dimension is %d, "
"axis's size is %d", "axis's size is %d",
x_rank, x_rank,
......
...@@ -19,84 +19,6 @@ add_subdirectory(funcs) ...@@ -19,84 +19,6 @@ add_subdirectory(funcs)
# kernel autotune # kernel autotune
add_subdirectory(autotune) add_subdirectory(autotune)
# phi depends all phi kernel targets
set_property(GLOBAL PROPERTY PHI_KERNELS "")
# [ 1. Common kernel compilation dependencies ]
set(COMMON_KERNEL_DEPS
dense_tensor
string_tensor
sparse_coo_tensor
sparse_csr_tensor
tensor_array
int_array
scalar
kernel_context
kernel_factory
arg_map_context
convert_utils
lod_utils
custom_kernel
string_infermeta
phi_tensor_utils)
set(COMMON_KERNEL_DEPS
${COMMON_KERNEL_DEPS}
eigen_function
blas
math_function
im2col
vol2col
concat_and_split_functor
selected_rows_functor)
# remove this dep after removing fluid deps on tensor creation
set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} lod_utils)
set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} infermeta infermeta_utils
sparse_infermeta)
set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} switch_autotune)
set(COMMON_KERNEL_DEPS
${COMMON_KERNEL_DEPS}
threadpool
jit_kernel_helper
softmax
cross_entropy
matrix_bit_code
lapack_function
lstm_compute
gru_compute
deformable_conv_functor
matrix_reduce
segment_pooling
pooling
maxouting
matrix_inverse
matrix_solve
phi_dynload_warpctc
phi_dynload_warprnnt
sequence_padding
sequence_pooling
sequence_scale
fft
phi_data_layout_transform
gpc
utf8proc
gather_scatter_functor)
set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} process_group)
if(WITH_FLASHATTN)
set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} phi_dynload_flashattn)
endif()
if(WITH_NCCL OR WITH_RCCL)
set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} nccl_comm_context)
endif()
if(WITH_GLOO)
set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} gloo_comm_context)
endif()
if(WITH_CUDNN_FRONTEND)
set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} cudnn-frontend)
endif()
copy_if_different(${kernel_declare_file} ${kernel_declare_file_final}) copy_if_different(${kernel_declare_file} ${kernel_declare_file_final})
file(GLOB kernel_h "*.h" "selected_rows/*.h" "sparse/*.h" "strings/*.h") file(GLOB kernel_h "*.h" "selected_rows/*.h" "sparse/*.h" "strings/*.h")
...@@ -105,8 +27,8 @@ file(GLOB kernel_primitive_h "primitive/*.h") ...@@ -105,8 +27,8 @@ file(GLOB kernel_primitive_h "primitive/*.h")
# fusion ops would be included here # fusion ops would be included here
file( file(
GLOB GLOB kernel_cu
kernel_cu RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}"
"gpu/*.cu" "gpu/*.cu"
"gpu/*.cu.cc" "gpu/*.cu.cc"
"gpudnn/*.cu" "gpudnn/*.cu"
...@@ -118,6 +40,10 @@ file( ...@@ -118,6 +40,10 @@ file(
"strings/gpu/*.cu" "strings/gpu/*.cu"
"fusion/gpu/*.cu") "fusion/gpu/*.cu")
if(APPLE OR WIN32)
list(REMOVE_ITEM kernel_cu "fusion/gpu/fusion_group_kernel.cu")
endif()
if(DEFINED REDUCE_INFERENCE_LIB_SIZE) if(DEFINED REDUCE_INFERENCE_LIB_SIZE)
list(FILTER kernel_cu EXCLUDE REGEX ".*_grad_kernel\\.cc$") list(FILTER kernel_cu EXCLUDE REGEX ".*_grad_kernel\\.cc$")
list(FILTER kernel_cu EXCLUDE REGEX ".*_grad_kernel\\.cu$") list(FILTER kernel_cu EXCLUDE REGEX ".*_grad_kernel\\.cu$")
...@@ -146,22 +72,19 @@ if(WITH_CUTLASS) ...@@ -146,22 +72,19 @@ if(WITH_CUTLASS)
) )
endif() endif()
file(GLOB cutlass_cu "fusion/cutlass/conv2d/generated/*.cu" file(
"fusion/cutlass/conv2d/*.cu" "fusion/cutlass/*.cu" GLOB cutlass_cu
RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}"
"fusion/cutlass/conv2d/generated/*.cu" "fusion/cutlass/conv2d/*.cu"
"fusion/cutlass/*.cu"
"fusion/cutlass/memory_efficient_attention/autogen/impl/*.cu") "fusion/cutlass/memory_efficient_attention/autogen/impl/*.cu")
add_definitions("-DPADDLE_WITH_MEMORY_EFFICIENT_ATTENTION")
list(APPEND kernel_cu ${cutlass_cu}) list(APPEND kernel_cu ${cutlass_cu})
endif() endif()
if(APPLE OR WIN32)
list(REMOVE_ITEM kernel_cu
"${CMAKE_CURRENT_SOURCE_DIR}/fusion/gpu/fusion_group_kernel.cu")
endif()
if(WITH_MKLDNN) if(WITH_MKLDNN)
file( file(
GLOB GLOB kernel_cc
kernel_cc RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}"
"*.cc" "*.cc"
"cpu/*.cc" "cpu/*.cc"
"legacy/*.cc" "legacy/*.cc"
...@@ -171,6 +94,8 @@ if(WITH_MKLDNN) ...@@ -171,6 +94,8 @@ if(WITH_MKLDNN)
"selected_rows/cpu/*.cc" "selected_rows/cpu/*.cc"
"sparse/*.cc" "sparse/*.cc"
"sparse/cpu/*.cc" "sparse/cpu/*.cc"
"legacy/*.cc"
"legacy/cpu/*.cc"
"strings/*.cc" "strings/*.cc"
"strings/cpu/*.cc" "strings/cpu/*.cc"
"onednn/*.cc" "onednn/*.cc"
...@@ -179,8 +104,8 @@ if(WITH_MKLDNN) ...@@ -179,8 +104,8 @@ if(WITH_MKLDNN)
"fusion/cpu/*.cc") "fusion/cpu/*.cc")
else() else()
file( file(
GLOB GLOB kernel_cc
kernel_cc RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}"
"*.cc" "*.cc"
"cpu/*.cc" "cpu/*.cc"
"legacy/*.cc" "legacy/*.cc"
...@@ -189,6 +114,8 @@ else() ...@@ -189,6 +114,8 @@ else()
"selected_rows/cpu/*.cc" "selected_rows/cpu/*.cc"
"sparse/*.cc" "sparse/*.cc"
"sparse/cpu/*.cc" "sparse/cpu/*.cc"
"legacy/*.cc"
"legacy/cpu/*.cc"
"strings/*.cc" "strings/*.cc"
"strings/cpu/*.cc" "strings/cpu/*.cc"
"fusion/*.cc" "fusion/*.cc"
...@@ -200,32 +127,17 @@ if(DEFINED REDUCE_INFERENCE_LIB_SIZE) ...@@ -200,32 +127,17 @@ if(DEFINED REDUCE_INFERENCE_LIB_SIZE)
endif() endif()
file( file(
GLOB GLOB kernel_xpu
kernel_xpu RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}"
"xpu/*.cc" "xpu/*.cc" "legacy/xpu/*.cc" "selected_rows/xpu/*.cc" "fusion/xpu/*.cc"
"legacy/xpu/*.cc"
"selected_rows/xpu/*.cc"
"fusion/xpu/*.cc"
"sparse/xpu/*.cc") "sparse/xpu/*.cc")
if(WITH_MKLDNN)
set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} get_kerneltype_forvar_utils)
endif()
if(WITH_GPU OR WITH_ROCM) if(WITH_GPU OR WITH_ROCM)
if(WITH_GPU) collect_srcs(kernels_srcs SRCS ${kernel_cu})
add_library(phi_gpu ${kernel_cu} ${kernel_cc})
if(WITH_CUTLASS)
add_dependencies(phi_gpu cutlass_codegen)
endif()
elseif(WITH_ROCM)
hip_add_library(phi_gpu STATIC ${kernel_cu} ${kernel_cc})
endif()
kernel_declare("${kernel_cu}") kernel_declare("${kernel_cu}")
kernel_declare("${kernel_cc}") endif()
target_link_libraries(phi_gpu ${COMMON_KERNEL_DEPS})
set(ADD_PHI_KERNELS ${ADD_PHI_KERNELS} phi_gpu) if(WITH_XPU)
elseif(WITH_XPU)
if(WITH_XPU_KP) if(WITH_XPU_KP)
file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/kps/ file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/kps/
DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/kps/) DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/kps/)
...@@ -237,52 +149,23 @@ elseif(WITH_XPU) ...@@ -237,52 +149,23 @@ elseif(WITH_XPU)
file(RENAME ${kernel} "${CMAKE_CURRENT_BINARY_DIR}/kps/${name}.kps") file(RENAME ${kernel} "${CMAKE_CURRENT_BINARY_DIR}/kps/${name}.kps")
endforeach() endforeach()
file(GLOB kernel_xpu_kps "${CMAKE_CURRENT_BINARY_DIR}/kps/*.kps") file(GLOB kernel_xpu_kps "${CMAKE_CURRENT_BINARY_DIR}/kps/*.kps")
file(
GLOB kernel_cc_relative foreach(kernel ${kernel_cc})
RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}"
"*.cc"
"cpu/*.cc"
"legacy/*.cc"
"legacy/cpu/*.cc"
"selected_rows/*.cc"
"selected_rows/cpu/*.cc"
"sparse/*.cc"
"sparse/cpu/*.cc"
"strings/*.cc"
"strings/cpu/*.cc"
"fusion/*.cc"
"fusion/cpu/*.cc")
foreach(kernel ${kernel_cc_relative})
file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/${kernel} file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/${kernel}
DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/${kernel}) DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/${kernel})
endforeach() endforeach()
file(GLOB_RECURSE kernel_xpu_cc "${CMAKE_CURRENT_BINARY_DIR}/*.cc") file(GLOB_RECURSE kernel_xpu_cc "${CMAKE_CURRENT_BINARY_DIR}/*.cc")
xpu_add_library(
phi_xpu set(kernel_cc ${kernel_xpu_cc})
STATIC collect_generated_srcs(kernels_srcs SRCS ${kernel_xpu_kps})
${kernel_xpu}
${kernel_xpu_kps}
${kernel_xpu_cc}
DEPENDS
${COMMON_KERNEL_DEPS})
kernel_declare("${kernel_xpu_cc}")
else()
add_library(phi_xpu ${kernel_xpu} ${kernel_cc})
kernel_declare("${kernel_cc}")
endif() endif()
collect_srcs(kernels_srcs SRCS ${kernel_xpu})
kernel_declare("${kernel_xpu}") kernel_declare("${kernel_xpu}")
kernel_declare("${kernel_xpu_kps}") kernel_declare("${kernel_xpu_kps}")
target_link_libraries(phi_xpu ${COMMON_KERNEL_DEPS})
set(ADD_PHI_KERNELS ${ADD_PHI_KERNELS} phi_xpu)
else()
add_library(phi_cpu ${kernel_cc})
target_link_libraries(phi_cpu ${COMMON_KERNEL_DEPS})
kernel_declare("${kernel_cc}")
set(ADD_PHI_KERNELS phi_cpu)
endif() endif()
set_property(GLOBAL PROPERTY PHI_KERNELS ${ADD_PHI_KERNELS}) collect_srcs(kernels_srcs SRCS ${kernel_cc})
kernel_declare("${kernel_cc}")
if(NOT "${KERNEL_LIST}" STREQUAL "") if(NOT "${KERNEL_LIST}" STREQUAL "")
prune_declaration_h() prune_declaration_h()
......
if(WITH_CUDNN_FRONTEND) collect_srcs(kernels_srcs SRCS cache.cc switch_autotune.cc)
cc_library(
cache
SRCS cache.cc
DEPS cudnn-frontend phi_enforce)
else()
cc_library(
cache
SRCS cache.cc
DEPS phi_enforce)
endif()
cc_library(
switch_autotune
SRCS switch_autotune.cc
DEPS cache flags)
...@@ -18,11 +18,11 @@ ...@@ -18,11 +18,11 @@
#include <unordered_map> #include <unordered_map>
#include <vector> #include <vector>
#include "gflags/gflags.h"
#include "paddle/phi/core/enforce.h" #include "paddle/phi/core/enforce.h"
#include "paddle/phi/core/errors.h" #include "paddle/phi/core/errors.h"
#include "paddle/phi/core/flags.h"
DECLARE_int32(search_cache_max_number); PHI_DECLARE_int32(search_cache_max_number);
inline void HashCombine(std::size_t* seed UNUSED) {} inline void HashCombine(std::size_t* seed UNUSED) {}
......
...@@ -105,10 +105,6 @@ struct RmsFunctor<T, phi::CPUContext> { ...@@ -105,10 +105,6 @@ struct RmsFunctor<T, phi::CPUContext> {
} }
}; };
template struct RmsFunctor<phi::GPUContext, float>;
template struct RmsFunctor<phi::GPUContext, double>;
template struct RmsFunctor<phi::GPUContext, phi::dtype::float16>;
} // namespace phi } // namespace phi
PD_REGISTER_KERNEL( PD_REGISTER_KERNEL(
rmsprop, CPU, ALL_LAYOUT, phi::RmspropDenseKernel, float, double) {} rmsprop, CPU, ALL_LAYOUT, phi::RmspropDenseKernel, float, double) {}
......
...@@ -4,67 +4,15 @@ add_subdirectory(lapack) ...@@ -4,67 +4,15 @@ add_subdirectory(lapack)
add_subdirectory(detail) add_subdirectory(detail)
add_subdirectory(jit) add_subdirectory(jit)
math_library(deformable_conv_functor DEPS dense_tensor) file(
math_library(concat_and_split_functor DEPS dense_tensor) GLOB func_cc_srcs
math_library(fc_functor DEPS blas jit_kernel_helper) RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}"
math_library(gpc DEPS phi_enforce) "*.cc")
math_library(gru_compute DEPS activation_functions math_function)
math_library(lstm_compute DEPS activation_functions)
math_library(math_function DEPS blas dense_tensor)
math_library(matrix_reduce DEPS dense_tensor)
math_library(matrix_inverse DEPS dense_tensor eigen3 blas)
math_library(pooling DEPS dense_tensor)
math_library(segment_pooling)
math_library(sequence2batch)
math_library(matrix_solve DEPS dense_tensor eigen3 blas math_function)
math_library(cross_entropy)
math_library(im2col)
math_library(vol2col)
math_library(softmax DEPS math_function)
math_library(maxouting)
math_library(matrix_bit_code)
math_library(sequence_scale)
math_library(sequence_padding DEPS lod_utils)
math_library(sequence_pooling DEPS math_function jit_kernel_helper)
cc_library(
phi_data_layout_transform
SRCS data_layout_transform.cc
DEPS tensor blas)
if(WITH_GPU OR WITH_ROCM) if(WITH_GPU OR WITH_ROCM)
if(MKL_FOUND AND WITH_ONEMKL) file(
math_library(fft spectral_op.cu DEPS dynload_cuda dynload_mklrt GLOB func_cu_srcs
dense_tensor) RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}"
target_include_directories(fft PRIVATE ${MKL_INCLUDE}) "*.cu")
else()
math_library(fft spectral_op.cu DEPS dynload_cuda dense_tensor pocketfft)
endif()
else()
if(MKL_FOUND AND WITH_ONEMKL)
mathp_library(fft DEPS dynload_mklrt dense_tensor)
target_include_directories(fft PRIVATE ${MKL_INCLUDE})
else()
math_library(fft DEPS dense_tensor pocketfft)
endif()
endif() endif()
if(WITH_MKLDNN) collect_srcs(kernels_srcs SRCS ${func_cc_srcs} ${func_cu_srcs})
math_library(selected_rows_functor DEPS selected_rows_utils math_function
blas mixed_vector)
else()
math_library(selected_rows_functor DEPS selected_rows_utils math_function
blas mixed_vector)
endif()
if(WITH_ROCM)
hip_library(
gather_scatter_functor
SRCS gather_scatter_functor.cc gather_scatter_functor.cu
DEPS tensor)
else()
cc_library(
gather_scatter_functor
SRCS gather_scatter_functor.cc gather_scatter_functor.cu
DEPS tensor)
endif()
cc_library( collect_srcs(kernels_srcs SRCS blas.cc)
blas
SRCS blas.cc
DEPS cblas framework_proto phi_backends)
...@@ -19,10 +19,11 @@ ...@@ -19,10 +19,11 @@
#include "paddle/phi/backends/dynload/cublas.h" #include "paddle/phi/backends/dynload/cublas.h"
#include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/core/flags.h"
#include "paddle/phi/kernels/funcs/math_function.h" #include "paddle/phi/kernels/funcs/math_function.h"
DECLARE_bool(enable_cublas_tensor_op_math); PHI_DECLARE_bool(enable_cublas_tensor_op_math);
DECLARE_bool(gemm_use_half_precision_compute_type); PHI_DECLARE_bool(gemm_use_half_precision_compute_type);
namespace phi { namespace phi {
namespace funcs { namespace funcs {
......
cc_library(activation_functions SRCS avx_functions.cc) collect_srcs(kernels_srcs SRCS avx_functions.cc)
...@@ -6,19 +6,5 @@ file( ...@@ -6,19 +6,5 @@ file(
GLOB EIGEN_CU_SOURCES GLOB EIGEN_CU_SOURCES
RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}"
"*.cu") "*.cu")
if(WITH_GPU)
nv_library( collect_srcs(kernels_srcs SRCS ${EIGEN_CC_SOURCES} ${EIGEN_CU_SOURCES})
eigen_function
SRCS ${EIGEN_CC_SOURCES} ${EIGEN_CU_SOURCES}
DEPS eigen3)
elseif(WITH_ROCM)
hip_library(
eigen_function
SRCS ${EIGEN_CC_SOURCES} ${EIGEN_CU_SOURCES}
DEPS eigen3)
else()
cc_library(
eigen_function
SRCS ${EIGEN_CC_SOURCES}
DEPS eigen3)
endif()
...@@ -9,17 +9,13 @@ file(APPEND ${jit_file} "\#include \"paddle/phi/kernels/funcs/jit/helper.h\"\n") ...@@ -9,17 +9,13 @@ file(APPEND ${jit_file} "\#include \"paddle/phi/kernels/funcs/jit/helper.h\"\n")
file(APPEND ${jit_file} file(APPEND ${jit_file}
"\#include \"paddle/phi/kernels/funcs/jit/registry.h\"\n\n") "\#include \"paddle/phi/kernels/funcs/jit/registry.h\"\n\n")
set(JIT_KERNEL_DEPS device_context cblas gflags enforce place xxhash)
file( file(
GLOB jit_kernel_cc_srcs GLOB jit_kernel_cc_srcs
RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}"
"*.cc") "*.cc")
list(REMOVE_ITEM jit_kernel_cc_srcs test.cc benchmark.cc) list(REMOVE_ITEM jit_kernel_cc_srcs test.cc benchmark.cc)
cc_library(
jit_kernel_base collect_srcs(kernels_srcs SRCS ${jit_kernel_cc_srcs})
SRCS ${jit_kernel_cc_srcs}
DEPS ${JIT_KERNEL_DEPS})
copy_if_different(${jit_file} ${jit_file_final}) copy_if_different(${jit_file} ${jit_file_final})
...@@ -30,14 +26,11 @@ if(WITH_XBYAK) ...@@ -30,14 +26,11 @@ if(WITH_XBYAK)
add_subdirectory(gen) add_subdirectory(gen)
endif() endif()
cc_library(
jit_kernel_helper INTERFACE
SRCS ${jit_kernel_cc_srcs}
DEPS jit_kernel_base ${JIT_KERNEL_DEPS})
cc_test( cc_test(
jit_kernel_test jit_kernel_test
SRCS test.cc SRCS test.cc
DEPS jit_kernel_helper) DEPS phi)
if(NOT WIN32) if(NOT WIN32)
set(cuda_less12_and_gcc_greater12 false) set(cuda_less12_and_gcc_greater12 false)
if(DEFINED CMAKE_CUDA_COMPILER_VERSION) if(DEFINED CMAKE_CUDA_COMPILER_VERSION)
...@@ -47,14 +40,7 @@ if(NOT WIN32) ...@@ -47,14 +40,7 @@ if(NOT WIN32)
endif() endif()
endif() endif()
if(NOT cuda_less12_and_gcc_greater12) if(NOT cuda_less12_and_gcc_greater12)
cc_binary( cc_binary(jit_kernel_benchmark SRCS benchmark.cc DEPS phi)
jit_kernel_benchmark
SRCS
benchmark.cc
DEPS
jit_kernel_helper
phi_device_tracer
tensor)
endif() endif()
endif() endif()
if(WITH_TESTING AND TEST jit_kernel_test) if(WITH_TESTING AND TEST jit_kernel_test)
......
...@@ -3,13 +3,7 @@ file( ...@@ -3,13 +3,7 @@ file(
RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}"
"*.cc") "*.cc")
cc_library( collect_srcs(kernels_srcs SRCS ${jitcode_cc_srcs})
jit_kernel_jitcode
SRCS ${jitcode_cc_srcs}
DEPS jit_kernel_base xbyak)
set(JIT_KERNEL_DEPS
${JIT_KERNEL_DEPS} xbyak jit_kernel_jitcode
PARENT_SCOPE)
function(USE_JITKERNEL_GEN TARGET) function(USE_JITKERNEL_GEN TARGET)
file(APPEND ${jit_file} "USE_JITKERNEL_GEN(${TARGET});\n") file(APPEND ${jit_file} "USE_JITKERNEL_GEN(${TARGET});\n")
......
...@@ -33,7 +33,7 @@ namespace jit { ...@@ -33,7 +33,7 @@ namespace jit {
class GenBase : public Kernel { class GenBase : public Kernel {
public: public:
virtual ~GenBase() = default; virtual ~GenBase() {}
virtual std::string name() const = 0; virtual std::string name() const = 0;
virtual size_t getSize() const = 0; virtual size_t getSize() const = 0;
virtual const unsigned char* getCodeInternal() const = 0; virtual const unsigned char* getCodeInternal() const = 0;
......
...@@ -12,7 +12,3 @@ endif() ...@@ -12,7 +12,3 @@ endif()
# mix should be last # mix should be last
add_subdirectory(mix) add_subdirectory(mix)
set(JIT_KERNEL_DEPS
${JIT_KERNEL_DEPS}
PARENT_SCOPE)
...@@ -2,14 +2,8 @@ file( ...@@ -2,14 +2,8 @@ file(
GLOB jit_kernel_cc_intrinsic GLOB jit_kernel_cc_intrinsic
RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}"
"*.cc") "*.cc")
cc_library(
jit_kernel_intrinsic
SRCS ${jit_kernel_cc_intrinsic}
DEPS jit_kernel_base)
set(JIT_KERNEL_DEPS collect_srcs(kernels_srcs SRCS ${jit_kernel_cc_intrinsic})
${JIT_KERNEL_DEPS} jit_kernel_intrinsic
PARENT_SCOPE)
# use mkl kernels by name and type # use mkl kernels by name and type
use_jitkernel_more(kCRFDecoding, intrinsic) use_jitkernel_more(kCRFDecoding, intrinsic)
......
...@@ -2,14 +2,8 @@ file( ...@@ -2,14 +2,8 @@ file(
GLOB jit_kernel_mix_cc GLOB jit_kernel_mix_cc
RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}"
"*.cc") "*.cc")
cc_library(
jit_kernel_mix
SRCS ${jit_kernel_mix_cc}
DEPS jit_kernel_base)
set(JIT_KERNEL_DEPS collect_srcs(kernels_srcs SRCS ${jit_kernel_mix_cc})
${JIT_KERNEL_DEPS} jit_kernel_mix
PARENT_SCOPE)
use_jitkernel_more(kVSigmoid, mix) use_jitkernel_more(kVSigmoid, mix)
use_jitkernel_more(kVTanh, mix) use_jitkernel_more(kVTanh, mix)
......
cc_library( collect_srcs(kernels_srcs SRCS mkl.cc)
jit_kernel_mkl
SRCS mkl.cc
DEPS jit_kernel_base dynload_mklml)
set(JIT_KERNEL_DEPS
${JIT_KERNEL_DEPS} dynload_mklml jit_kernel_mkl
PARENT_SCOPE)
# use mkl kernels by name and type # use mkl kernels by name and type
use_jitkernel_more(kMatMul, mkl) use_jitkernel_more(kMatMul, mkl)
......
cc_library( collect_srcs(kernels_srcs SRCS refer.cc)
jit_kernel_refer
SRCS refer.cc
DEPS jit_kernel_base)
set(JIT_KERNEL_DEPS
${JIT_KERNEL_DEPS} jit_kernel_refer
PARENT_SCOPE)
function(USE_JITKERNEL_REFER TARGET) function(USE_JITKERNEL_REFER TARGET)
file(APPEND ${jit_file} "USE_JITKERNEL_REFER(${TARGET});\n") file(APPEND ${jit_file} "USE_JITKERNEL_REFER(${TARGET});\n")
......
math_library(lapack_function DEPS phi_dynload_lapack) collect_srcs(kernels_srcs SRCS lapack_function.cc)
...@@ -25,6 +25,7 @@ limitations under the License. */ ...@@ -25,6 +25,7 @@ limitations under the License. */
namespace phi { namespace phi {
namespace funcs { namespace funcs {
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
template <typename T> template <typename T>
void BatchTranspose(T* output, void BatchTranspose(T* output,
const T* input, const T* input,
...@@ -32,7 +33,7 @@ void BatchTranspose(T* output, ...@@ -32,7 +33,7 @@ void BatchTranspose(T* output,
int64_t m, int64_t m,
int64_t n, int64_t n,
const phi::GPUContext* dev_ctx); const phi::GPUContext* dev_ctx);
#endif
template <typename DeviceContext, typename T> template <typename DeviceContext, typename T>
struct TransposeNormal { struct TransposeNormal {
// for dims >= 7 situation // for dims >= 7 situation
......
...@@ -12,17 +12,21 @@ ...@@ -12,17 +12,21 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "paddle/phi/kernels/fusion/cutlass/memory_efficient_attention/autogen/memory_efficient_attention.h" #include "glog/logging.h"
#include "paddle/fluid/memory/malloc.h"
#include "paddle/fluid/platform/errors.h"
#include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/errors.h"
#include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/fusion/cutlass/memory_efficient_attention/autogen/memory_efficient_attention.h"
#include "paddle/phi/kernels/fusion/cutlass/memory_efficient_attention/gemm_kernel_utils.h"
#include "paddle/phi/kernels/fusion/cutlass/memory_efficient_attention_utils.h" #include "paddle/phi/kernels/fusion/cutlass/memory_efficient_attention_utils.h"
namespace phi { namespace phi {
namespace fusion { namespace fusion {
namespace cutlass_internal { namespace cutlass_internal {
using gemm_kernel_utils::getMaximumSharedMemoryPerBlockKb;
template <typename T, typename Context> template <typename T, typename Context>
void MemoryEfficientAttentionForwardKernel( void MemoryEfficientAttentionForwardKernel(
const Context& ctx, const Context& ctx,
...@@ -124,9 +128,9 @@ void MemoryEfficientAttentionForwardKernel( ...@@ -124,9 +128,9 @@ void MemoryEfficientAttentionForwardKernel(
VLOG(3) << "kAlignLSE" << kAlignLSE; VLOG(3) << "kAlignLSE" << kAlignLSE;
typename KernelType::Params p; typename KernelType::Params p;
p.query_ptr = SafeGetTensorPtr<scalar_t>(query); p.query_ptr = phi::SafeGetTensorPtr<scalar_t>(query);
p.key_ptr = SafeGetTensorPtr<scalar_t>(key); p.key_ptr = phi::SafeGetTensorPtr<scalar_t>(key);
p.value_ptr = SafeGetTensorPtr<scalar_t>(value); p.value_ptr = phi::SafeGetTensorPtr<scalar_t>(value);
p.logsumexp_ptr = is_test ? nullptr : logsumexp->data<float>(); p.logsumexp_ptr = is_test ? nullptr : logsumexp->data<float>();
VLOG(3) << "logsumexp_ptr" << p.logsumexp_ptr; VLOG(3) << "logsumexp_ptr" << p.logsumexp_ptr;
...@@ -134,19 +138,19 @@ void MemoryEfficientAttentionForwardKernel( ...@@ -134,19 +138,19 @@ void MemoryEfficientAttentionForwardKernel(
if (KernelType::kNeedsOutputAccumulatorBuffer) { if (KernelType::kNeedsOutputAccumulatorBuffer) {
out_accum.Resize(output->dims()); out_accum.Resize(output->dims());
p.output_accum_ptr = p.output_accum_ptr =
SafeAllocTensor<typename KernelType::output_accum_t, Context>( phi::SafeAllocTensor<typename KernelType::output_accum_t, Context>(
ctx, &out_accum); ctx, &out_accum);
VLOG(3) << "output_accum_ptr " << p.output_accum_ptr; VLOG(3) << "output_accum_ptr " << p.output_accum_ptr;
} else { } else {
p.output_accum_ptr = nullptr; p.output_accum_ptr = nullptr;
} }
p.output_ptr = p.output_ptr = phi::SafeAllocTensor<typename KernelType::output_t, Context>(
SafeAllocTensor<typename KernelType::output_t, Context>(ctx, output); ctx, output);
VLOG(3) << "output_ptr " << p.output_ptr; VLOG(3) << "output_ptr " << p.output_ptr;
if (cu_seqlens_q) { if (cu_seqlens_q) {
p.seqstart_q_ptr = SafeGetTensorPtr<int32_t>(cu_seqlens_q); p.seqstart_q_ptr = phi::SafeGetTensorPtr<int32_t>(cu_seqlens_q);
p.seqstart_k_ptr = SafeGetTensorPtr<int32_t>(cu_seqlens_k); p.seqstart_k_ptr = phi::SafeGetTensorPtr<int32_t>(cu_seqlens_k);
VLOG(3) << "seqstart_q_ptr " << p.seqstart_q_ptr; VLOG(3) << "seqstart_q_ptr " << p.seqstart_q_ptr;
} else { } else {
p.seqstart_q_ptr = nullptr; p.seqstart_q_ptr = nullptr;
...@@ -164,7 +168,7 @@ void MemoryEfficientAttentionForwardKernel( ...@@ -164,7 +168,7 @@ void MemoryEfficientAttentionForwardKernel(
cu_seqlens_q ? cu_seqlens_q.get().dims()[0] - 1 : q_dims[0]); cu_seqlens_q ? cu_seqlens_q.get().dims()[0] - 1 : q_dims[0]);
p.causal = causal; p.causal = causal;
if (causal_diagonal) { if (causal_diagonal) {
p.causal_diagonal_ptr = SafeGetTensorPtr<int32_t>(causal_diagonal); p.causal_diagonal_ptr = phi::SafeGetTensorPtr<int32_t>(causal_diagonal);
} else { } else {
p.causal_diagonal_ptr = nullptr; p.causal_diagonal_ptr = nullptr;
} }
...@@ -172,7 +176,7 @@ void MemoryEfficientAttentionForwardKernel( ...@@ -172,7 +176,7 @@ void MemoryEfficientAttentionForwardKernel(
p.seqlen_k_ptr = nullptr; p.seqlen_k_ptr = nullptr;
if (seqlen_k) { if (seqlen_k) {
p.seqlen_k_ptr = SafeGetTensorPtr<int32_t>(seqlen_k); p.seqlen_k_ptr = phi::SafeGetTensorPtr<int32_t>(seqlen_k);
} else { } else {
p.seqlen_k_ptr = nullptr; p.seqlen_k_ptr = nullptr;
} }
...@@ -197,7 +201,7 @@ void MemoryEfficientAttentionForwardKernel( ...@@ -197,7 +201,7 @@ void MemoryEfficientAttentionForwardKernel(
PD_MEA_CHECK_OVERFLOW(p.o_strideM, DimStride(output->dims(), 1)); PD_MEA_CHECK_OVERFLOW(p.o_strideM, DimStride(output->dims(), 1));
if (bias) { if (bias) {
p.attn_bias_ptr = SafeGetTensorPtr<scalar_t>(bias); p.attn_bias_ptr = phi::SafeGetTensorPtr<scalar_t>(bias);
PD_MEA_CHECK_OVERFLOW( PD_MEA_CHECK_OVERFLOW(
p.bias_strideB, p.bias_strideB,
GetMemoryEfficientBiasStrideB(bias.get().dims(), q_dims, k_dims)); GetMemoryEfficientBiasStrideB(bias.get().dims(), q_dims, k_dims));
...@@ -215,7 +219,8 @@ void MemoryEfficientAttentionForwardKernel( ...@@ -215,7 +219,8 @@ void MemoryEfficientAttentionForwardKernel(
seed_dims[0] = 2; seed_dims[0] = 2;
seed_and_offset->Resize(seed_dims); seed_and_offset->Resize(seed_dims);
ctx.template HostAlloc<int64_t>(seed_and_offset); ctx.template HostAlloc<int64_t>(seed_and_offset);
int64_t* seed_and_offset_ptr = SafeGetTensorPtr<int64_t>(seed_and_offset); int64_t* seed_and_offset_ptr =
phi::SafeGetTensorPtr<int64_t>(seed_and_offset);
auto gen = ctx.GetGenerator(); auto gen = ctx.GetGenerator();
uint64_t inc = query.dims()[0] * query.dims()[2] * 32; uint64_t inc = query.dims()[0] * query.dims()[2] * 32;
...@@ -254,10 +259,10 @@ void MemoryEfficientAttentionForwardKernel( ...@@ -254,10 +259,10 @@ void MemoryEfficientAttentionForwardKernel(
ctx.stream()>>>(p); ctx.stream()>>>(p);
}; };
dispatch_cutlass_forward<T>(ctx, launchKernel); dispatch_cutlass_forward<T>(ctx, launchKernel);
PADDLE_ENFORCE_EQ(kernel_launched, PADDLE_ENFORCE_EQ(
kernel_launched,
true, true,
paddle::platform::errors::InvalidArgument( phi::errors::InvalidArgument("the kernel should not be launched"));
"the kernel should not be launched"));
} }
} // namespace cutlass_internal } // namespace cutlass_internal
......
...@@ -15,16 +15,16 @@ ...@@ -15,16 +15,16 @@
#include "paddle/fluid/memory/malloc.h" #include "paddle/fluid/memory/malloc.h"
#include "paddle/fluid/platform/errors.h" #include "paddle/fluid/platform/errors.h"
#include "paddle/phi/api/include/tensor_operants.h" #include "paddle/phi/api/include/tensor_operants.h"
#include "paddle/phi/common/memory_utils.h"
#include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/fusion/cutlass/memory_efficient_attention/autogen/memory_efficient_attention.h"
#include "paddle/phi/kernels/fusion/cutlass/memory_efficient_attention_utils.h"
#include "paddle/phi/kernels/cast_kernel.h" #include "paddle/phi/kernels/cast_kernel.h"
#include "paddle/phi/kernels/cum_kernel.h" #include "paddle/phi/kernels/cum_kernel.h"
#include "paddle/phi/kernels/elementwise_add_kernel.h" #include "paddle/phi/kernels/elementwise_add_kernel.h"
#include "paddle/phi/kernels/elementwise_multiply_kernel.h" #include "paddle/phi/kernels/elementwise_multiply_kernel.h"
#include "paddle/phi/kernels/funcs/get_pad_lse.cu.h" #include "paddle/phi/kernels/funcs/get_pad_lse.cu.h"
#include "paddle/phi/kernels/fusion/cutlass/memory_efficient_attention/autogen/memory_efficient_attention.h"
#include "paddle/phi/kernels/fusion/cutlass/memory_efficient_attention_utils.h"
#include "paddle/phi/kernels/matmul_kernel.h" #include "paddle/phi/kernels/matmul_kernel.h"
#include "paddle/phi/kernels/reduce_sum_kernel.h" #include "paddle/phi/kernels/reduce_sum_kernel.h"
#include "paddle/phi/kernels/reshape_kernel.h" #include "paddle/phi/kernels/reshape_kernel.h"
...@@ -34,6 +34,8 @@ namespace phi { ...@@ -34,6 +34,8 @@ namespace phi {
namespace fusion { namespace fusion {
namespace cutlass_internal { namespace cutlass_internal {
using gemm_kernel_utils::getMaximumSharedMemoryPerBlockKb;
template <typename T, typename Context> template <typename T, typename Context>
void MemoryEfficientAttentionBackwardKernel( void MemoryEfficientAttentionBackwardKernel(
const Context& ctx, const Context& ctx,
...@@ -387,9 +389,9 @@ void MemoryEfficientAttentionBackwardKernel( ...@@ -387,9 +389,9 @@ void MemoryEfficientAttentionBackwardKernel(
VLOG(3) << "delta has been set" << delta.data(); VLOG(3) << "delta has been set" << delta.data();
typename KernelType::Params p; typename KernelType::Params p;
p.query_ptr = SafeGetTensorPtr<scalar_t>(query); p.query_ptr = phi::SafeGetTensorPtr<scalar_t>(query);
p.key_ptr = SafeGetTensorPtr<scalar_t>(key); p.key_ptr = phi::SafeGetTensorPtr<scalar_t>(key);
p.value_ptr = SafeGetTensorPtr<scalar_t>(value); p.value_ptr = phi::SafeGetTensorPtr<scalar_t>(value);
bool force_pad_inf = (compute_capacity == 75); bool force_pad_inf = (compute_capacity == 75);
const std::string data_format = "NCHW"; const std::string data_format = "NCHW";
...@@ -400,14 +402,14 @@ void MemoryEfficientAttentionBackwardKernel( ...@@ -400,14 +402,14 @@ void MemoryEfficientAttentionBackwardKernel(
32, 32,
data_format, data_format,
force_pad_inf); force_pad_inf);
p.logsumexp_ptr = SafeGetTensorPtr<float>(padded_lse); p.logsumexp_ptr = phi::SafeGetTensorPtr<float>(padded_lse);
VLOG(3) << "logsumexp_ptr" << p.logsumexp_ptr; VLOG(3) << "logsumexp_ptr" << p.logsumexp_ptr;
p.output_ptr = SafeGetTensorPtr<scalar_t>(output); p.output_ptr = phi::SafeGetTensorPtr<scalar_t>(output);
p.grad_output_ptr = SafeGetTensorPtr<scalar_t>(output_grad); p.grad_output_ptr = phi::SafeGetTensorPtr<scalar_t>(output_grad);
p.grad_query_ptr = SafeAllocTensor<scalar_t, Context>(ctx, query_grad); p.grad_query_ptr = phi::SafeAllocTensor<scalar_t, Context>(ctx, query_grad);
p.grad_key_ptr = SafeAllocTensor<scalar_t, Context>(ctx, key_grad); p.grad_key_ptr = phi::SafeAllocTensor<scalar_t, Context>(ctx, key_grad);
p.grad_value_ptr = SafeAllocTensor<scalar_t, Context>(ctx, value_grad); p.grad_value_ptr = phi::SafeAllocTensor<scalar_t, Context>(ctx, value_grad);
p.delta_ptr = SafeGetTensorPtr<float>(delta); p.delta_ptr = phi::SafeGetTensorPtr<float>(delta);
PD_MEA_CHECK_OVERFLOW(p.head_dim, q_dims[3]); PD_MEA_CHECK_OVERFLOW(p.head_dim, q_dims[3]);
PD_MEA_CHECK_OVERFLOW(p.head_dim_value, v_dims[3]); PD_MEA_CHECK_OVERFLOW(p.head_dim_value, v_dims[3]);
...@@ -427,8 +429,8 @@ void MemoryEfficientAttentionBackwardKernel( ...@@ -427,8 +429,8 @@ void MemoryEfficientAttentionBackwardKernel(
VLOG(3) << "p.scale" << p.scale; VLOG(3) << "p.scale" << p.scale;
if (cu_seqlens_q) { if (cu_seqlens_q) {
p.cu_seqlens_q_ptr = SafeGetTensorPtr<int32_t>(cu_seqlens_q); p.cu_seqlens_q_ptr = phi::SafeGetTensorPtr<int32_t>(cu_seqlens_q);
p.cu_seqlens_k_ptr = SafeGetTensorPtr<int32_t>(cu_seqlens_k); p.cu_seqlens_k_ptr = phi::SafeGetTensorPtr<int32_t>(cu_seqlens_k);
VLOG(3) << "p.cu_seqlens_q_ptr" << p.cu_seqlens_q_ptr; VLOG(3) << "p.cu_seqlens_q_ptr" << p.cu_seqlens_q_ptr;
} }
...@@ -483,7 +485,7 @@ void MemoryEfficientAttentionBackwardKernel( ...@@ -483,7 +485,7 @@ void MemoryEfficientAttentionBackwardKernel(
PD_MEA_CHECK_OVERFLOW(p.delta_strideB, DimStride(delta.dims(), 0)); PD_MEA_CHECK_OVERFLOW(p.delta_strideB, DimStride(delta.dims(), 0));
if (bias) { if (bias) {
p.bias_ptr = SafeGetTensorPtr<scalar_t>(bias); p.bias_ptr = phi::SafeGetTensorPtr<scalar_t>(bias);
PD_MEA_CHECK_OVERFLOW( PD_MEA_CHECK_OVERFLOW(
p.bias_strideB, p.bias_strideB,
GetMemoryEfficientBiasStrideB(bias.get().dims(), q_dims, k_dims)); GetMemoryEfficientBiasStrideB(bias.get().dims(), q_dims, k_dims));
...@@ -491,7 +493,8 @@ void MemoryEfficientAttentionBackwardKernel( ...@@ -491,7 +493,8 @@ void MemoryEfficientAttentionBackwardKernel(
PD_MEA_CHECK_OVERFLOW(p.bias_strideM, k_dims[1]); PD_MEA_CHECK_OVERFLOW(p.bias_strideM, k_dims[1]);
VLOG(3) << "p.bias_ptr" << p.bias_ptr; VLOG(3) << "p.bias_ptr" << p.bias_ptr;
if (bias_grad) { if (bias_grad) {
p.grad_bias_ptr = SafeAllocTensor<scalar_t, Context>(ctx, bias_grad); p.grad_bias_ptr =
phi::SafeAllocTensor<scalar_t, Context>(ctx, bias_grad);
PD_MEA_CHECK_OVERFLOW(p.gB_strideB, q_dims[2] * q_dims[1] * k_dims[1]); PD_MEA_CHECK_OVERFLOW(p.gB_strideB, q_dims[2] * q_dims[1] * k_dims[1]);
PD_MEA_CHECK_OVERFLOW(p.gB_strideH, q_dims[1] * k_dims[1]); PD_MEA_CHECK_OVERFLOW(p.gB_strideH, q_dims[1] * k_dims[1]);
PD_MEA_CHECK_OVERFLOW(p.gB_strideM, k_dims[1]); PD_MEA_CHECK_OVERFLOW(p.gB_strideM, k_dims[1]);
...@@ -504,7 +507,8 @@ void MemoryEfficientAttentionBackwardKernel( ...@@ -504,7 +507,8 @@ void MemoryEfficientAttentionBackwardKernel(
p.grad_bias_ptr = nullptr; p.grad_bias_ptr = nullptr;
} }
if (dropout_p != 0) { if (dropout_p != 0) {
int64_t* seed_and_offset_ptr = SafeGetTensorPtr<int64_t>(seed_and_offset); int64_t* seed_and_offset_ptr =
phi::SafeGetTensorPtr<int64_t>(seed_and_offset);
p.seed = (uint64_t)seed_and_offset_ptr[0]; p.seed = (uint64_t)seed_and_offset_ptr[0];
p.offset = (uint64_t)seed_and_offset_ptr[1]; p.offset = (uint64_t)seed_and_offset_ptr[1];
p.dropout_prob = dropout_p; p.dropout_prob = dropout_p;
...@@ -514,9 +518,9 @@ void MemoryEfficientAttentionBackwardKernel( ...@@ -514,9 +518,9 @@ void MemoryEfficientAttentionBackwardKernel(
} }
int64_t size_bytes = p.workspace_size(); int64_t size_bytes = p.workspace_size();
paddle::memory::AllocationPtr temp_workspace{nullptr}; phi::Allocator::AllocationPtr temp_workspace{nullptr};
VLOG(3) << "size_bytes " << size_bytes; VLOG(3) << "size_bytes " << size_bytes;
temp_workspace = paddle::memory::Alloc( temp_workspace = phi::memory_utils::Alloc(
ctx.GetPlace(), ctx.GetPlace(),
size_bytes, size_bytes,
phi::Stream(reinterpret_cast<phi::StreamId>(ctx.stream()))); phi::Stream(reinterpret_cast<phi::StreamId>(ctx.stream())));
......
...@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifndef PADDLE_WITH_HIP
#include "paddle/phi/kernels/eigvalsh_kernel.h" #include "paddle/phi/kernels/eigvalsh_kernel.h"
#include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/backends/gpu/gpu_context.h"
...@@ -29,3 +31,5 @@ PD_REGISTER_KERNEL(eigvalsh, // cuda_only ...@@ -29,3 +31,5 @@ PD_REGISTER_KERNEL(eigvalsh, // cuda_only
phi::dtype::complex<double>) { phi::dtype::complex<double>) {
kernel->InputAt(1).SetDataType(phi::dtype::ToReal(kernel_key.dtype())); kernel->InputAt(1).SetDataType(phi::dtype::ToReal(kernel_key.dtype()));
} }
#endif // not PADDLE_WITH_HIP
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#include "paddle/phi/core/flags.h" #include "paddle/phi/core/flags.h"
#include "paddle/phi/kernels/funcs/aligned_vector.h" #include "paddle/phi/kernels/funcs/aligned_vector.h"
DECLARE_bool(use_fast_math); PHI_DECLARE_bool(use_fast_math);
namespace phi { namespace phi {
......
...@@ -52,19 +52,6 @@ struct GetTensorValue<phi::CPUContext, T> { ...@@ -52,19 +52,6 @@ struct GetTensorValue<phi::CPUContext, T> {
} }
}; };
template <typename T>
struct GetTensorValue<phi::GPUContext, T> {
T operator()(const phi::GPUContext& dev_ctx,
const DenseTensor& tensor) const {
const T* data = tensor.data<T>();
T value;
const auto gpu_place = dev_ctx.GetPlace();
memory_utils::Copy(
phi::CPUPlace(), &value, gpu_place, data, sizeof(T), dev_ctx.stream());
return value;
}
};
template <typename T> template <typename T>
struct IscloseFunctor<phi::CPUContext, T> { struct IscloseFunctor<phi::CPUContext, T> {
void operator()(const phi::CPUContext& ctx, void operator()(const phi::CPUContext& ctx,
...@@ -127,6 +114,19 @@ __global__ void IscloseCUDAKernel(const T* in_data, ...@@ -127,6 +114,19 @@ __global__ void IscloseCUDAKernel(const T* in_data,
} }
} }
template <typename T>
struct GetTensorValue<phi::GPUContext, T> {
T operator()(const phi::GPUContext& dev_ctx,
const DenseTensor& tensor) const {
const T* data = tensor.data<T>();
T value;
const auto gpu_place = dev_ctx.GetPlace();
memory_utils::Copy(
phi::CPUPlace(), &value, gpu_place, data, sizeof(T), dev_ctx.stream());
return value;
}
};
template <typename T> template <typename T>
struct IscloseFunctor<phi::GPUContext, T> { struct IscloseFunctor<phi::GPUContext, T> {
void operator()(const phi::GPUContext& dev_ctx, void operator()(const phi::GPUContext& dev_ctx,
......
...@@ -30,7 +30,7 @@ void LaunchEigenPadding( ...@@ -30,7 +30,7 @@ void LaunchEigenPadding(
const DDim& in_dims, const DDim& in_dims,
const DenseTensor* d_out, const DenseTensor* d_out,
const DDim& out_dims, const DDim& out_dims,
const Eigen::array<std::pair<int64_t, int64_t>, D>& paddings) { const std::array<std::pair<int64_t, int64_t>, D>& paddings) {
auto& place = *context.eigen_device(); auto& place = *context.eigen_device();
auto d_in_t = EigenTensor<T, D, Eigen::RowMajor, Eigen::DenseIndex>::From( auto d_in_t = EigenTensor<T, D, Eigen::RowMajor, Eigen::DenseIndex>::From(
*d_input, in_dims); *d_input, in_dims);
...@@ -40,7 +40,7 @@ void LaunchEigenPadding( ...@@ -40,7 +40,7 @@ void LaunchEigenPadding(
if (d_input->numel() <= Eigen::NumTraits<int>::highest()) { if (d_input->numel() <= Eigen::NumTraits<int>::highest()) {
// similar to tf.pad: // similar to tf.pad:
// if element number less than INT_MAX, change the type of index to int // if element number less than INT_MAX, change the type of index to int
Eigen::array<std::pair<int, int>, D> paddings_32bit; std::array<std::pair<int, int>, D> paddings_32bit;
for (size_t i = 0; i < D; i++) { for (size_t i = 0; i < D; i++) {
paddings_32bit[i] = std::make_pair(paddings[i].first, paddings[i].second); paddings_32bit[i] = std::make_pair(paddings[i].first, paddings[i].second);
} }
...@@ -63,7 +63,7 @@ void EigenPaddingCompute( ...@@ -63,7 +63,7 @@ void EigenPaddingCompute(
const DDim& in_dims, const DDim& in_dims,
const DenseTensor* d_out, const DenseTensor* d_out,
const DDim& out_dims, const DDim& out_dims,
const Eigen::array<std::pair<int64_t, int64_t>, D>& paddings) { const std::array<std::pair<int64_t, int64_t>, D>& paddings) {
if (D <= 3) { if (D <= 3) {
// if dimension less than 3, cannot reduce dimension // if dimension less than 3, cannot reduce dimension
LaunchEigenPadding<T, Context, D>( LaunchEigenPadding<T, Context, D>(
...@@ -97,7 +97,7 @@ void EigenPaddingCompute( ...@@ -97,7 +97,7 @@ void EigenPaddingCompute(
// only last dimension need padding, // only last dimension need padding,
// reshape the dimension of tensor in 2: [preceding, padding] // reshape the dimension of tensor in 2: [preceding, padding]
std::vector<int64_t> in_tore_shape(2, 1), out_tore_shape(2, 1); std::vector<int64_t> in_tore_shape(2, 1), out_tore_shape(2, 1);
Eigen::array<std::pair<int64_t, int64_t>, 2> reshaped_padding; std::array<std::pair<int64_t, int64_t>, 2> reshaped_padding;
// first dimension is the accumulate of preceding dimension // first dimension is the accumulate of preceding dimension
for (int i = 0; i < pad_dim; i++) { for (int i = 0; i < pad_dim; i++) {
...@@ -119,7 +119,7 @@ void EigenPaddingCompute( ...@@ -119,7 +119,7 @@ void EigenPaddingCompute(
reshaped_padding[1].first = paddings[pad_dim].first; reshaped_padding[1].first = paddings[pad_dim].first;
reshaped_padding[1].second = paddings[pad_dim].second; reshaped_padding[1].second = paddings[pad_dim].second;
LaunchEigenPadding<T, Context>(context, LaunchEigenPadding<T, Context, 2>(context,
d_input, d_input,
reshaped_in_dims, reshaped_in_dims,
d_out, d_out,
...@@ -130,7 +130,7 @@ void EigenPaddingCompute( ...@@ -130,7 +130,7 @@ void EigenPaddingCompute(
// reshape the dimension of tensor in 2: [padding, succeeding] // reshape the dimension of tensor in 2: [padding, succeeding]
// similar to (D - 1) // similar to (D - 1)
std::vector<int64_t> in_tore_shape(2, 1), out_tore_shape(2, 1); std::vector<int64_t> in_tore_shape(2, 1), out_tore_shape(2, 1);
Eigen::array<std::pair<int64_t, int64_t>, 2> reshaped_padding; std::array<std::pair<int64_t, int64_t>, 2> reshaped_padding;
// first dimension is the padding dimension // first dimension is the padding dimension
in_tore_shape[0] = in_dims[pad_dim]; in_tore_shape[0] = in_dims[pad_dim];
...@@ -163,7 +163,7 @@ void EigenPaddingCompute( ...@@ -163,7 +163,7 @@ void EigenPaddingCompute(
// reshape the dimension of tensor in 3: // reshape the dimension of tensor in 3:
// [preceding, padding, succeeding] // [preceding, padding, succeeding]
std::vector<int64_t> in_tore_shape(3, 1), out_tore_shape(3, 1); std::vector<int64_t> in_tore_shape(3, 1), out_tore_shape(3, 1);
Eigen::array<std::pair<int64_t, int64_t>, 3> reshaped_padding; std::array<std::pair<int64_t, int64_t>, 3> reshaped_padding;
// first dimension is the accumulate of preceding dimension // first dimension is the accumulate of preceding dimension
for (int i = 0; i < pad_dim; i++) { for (int i = 0; i < pad_dim; i++) {
...@@ -261,7 +261,7 @@ void SliceGradCompute(const Context& ctx, ...@@ -261,7 +261,7 @@ void SliceGradCompute(const Context& ctx,
offsets[axis] = start; offsets[axis] = start;
} }
Eigen::array<std::pair<int64_t, int64_t>, D> paddings; std::array<std::pair<int64_t, int64_t>, D> paddings;
for (size_t i = 0; i < paddings.size(); ++i) { for (size_t i = 0; i < paddings.size(); ++i) {
paddings[i].first = offsets[i]; paddings[i].first = offsets[i];
paddings[i].second = (in_dims[i] - out_dims[i]) - offsets[i]; paddings[i].second = (in_dims[i] - out_dims[i]) - offsets[i];
......
...@@ -112,6 +112,7 @@ void TransferLayoutGeneral(const Context& dev_ctx, ...@@ -112,6 +112,7 @@ void TransferLayoutGeneral(const Context& dev_ctx,
} }
} }
#endif #endif
PD_VISIT_ALL_TYPES(x.dtype(), "CastDataLayout", ([&] { PD_VISIT_ALL_TYPES(x.dtype(), "CastDataLayout", ([&] {
CastDataLayout<data_t, Context>(dev_ctx, x, axis, out); CastDataLayout<data_t, Context>(dev_ctx, x, axis, out);
})); }));
......
...@@ -3568,6 +3568,7 @@ function run_setup_mac(){ ...@@ -3568,6 +3568,7 @@ function run_setup_mac(){
if [ -d "/Library/Frameworks/Python.framework/Versions/3.7" ]; then if [ -d "/Library/Frameworks/Python.framework/Versions/3.7" ]; then
export LD_LIBRARY_PATH=/Library/Frameworks/Python.framework/Versions/3.7/lib/ export LD_LIBRARY_PATH=/Library/Frameworks/Python.framework/Versions/3.7/lib/
export DYLD_LIBRARY_PATH=${DYLD_LIBRARY_PATH}:/Library/Frameworks/Python.framework/Versions/3.7/lib/ export DYLD_LIBRARY_PATH=${DYLD_LIBRARY_PATH}:/Library/Frameworks/Python.framework/Versions/3.7/lib/
export DYLD_LIBRARY_PATH=${DYLD_LIBRARY_PATH}:${PADDLE_ROOT}/build/third_party/install/lapack/lib
export PATH=/Library/Frameworks/Python.framework/Versions/3.7/bin/:${PATH} export PATH=/Library/Frameworks/Python.framework/Versions/3.7/bin/:${PATH}
#after changing "PYTHON_LIBRARY:FILEPATH" to "PYTHON_LIBRARY" ,we can use export #after changing "PYTHON_LIBRARY:FILEPATH" to "PYTHON_LIBRARY" ,we can use export
export PYTHON_EXECUTABLE=/Library/Frameworks/Python.framework/Versions/3.7/bin/python3 export PYTHON_EXECUTABLE=/Library/Frameworks/Python.framework/Versions/3.7/bin/python3
...@@ -3581,6 +3582,7 @@ function run_setup_mac(){ ...@@ -3581,6 +3582,7 @@ function run_setup_mac(){
if [ -d "/Library/Frameworks/Python.framework/Versions/3.8" ]; then if [ -d "/Library/Frameworks/Python.framework/Versions/3.8" ]; then
export LD_LIBRARY_PATH=/Library/Frameworks/Python.framework/Versions/3.8/lib/ export LD_LIBRARY_PATH=/Library/Frameworks/Python.framework/Versions/3.8/lib/
export DYLD_LIBRARY_PATH=${DYLD_LIBRARY_PATH}:/Library/Frameworks/Python.framework/Versions/3.8/lib/ export DYLD_LIBRARY_PATH=${DYLD_LIBRARY_PATH}:/Library/Frameworks/Python.framework/Versions/3.8/lib/
export DYLD_LIBRARY_PATH=${DYLD_LIBRARY_PATH}:${PADDLE_ROOT}/build/third_party/install/lapack/lib
export PATH=/Library/Frameworks/Python.framework/Versions/3.8/bin/:${PATH} export PATH=/Library/Frameworks/Python.framework/Versions/3.8/bin/:${PATH}
#after changing "PYTHON_LIBRARY:FILEPATH" to "PYTHON_LIBRARY" ,we can use export #after changing "PYTHON_LIBRARY:FILEPATH" to "PYTHON_LIBRARY" ,we can use export
export PYTHON_EXECUTABLE=/Library/Frameworks/Python.framework/Versions/3.8/bin/python3 export PYTHON_EXECUTABLE=/Library/Frameworks/Python.framework/Versions/3.8/bin/python3
...@@ -3594,6 +3596,7 @@ function run_setup_mac(){ ...@@ -3594,6 +3596,7 @@ function run_setup_mac(){
if [ -d "/Library/Frameworks/Python.framework/Versions/3.9" ]; then if [ -d "/Library/Frameworks/Python.framework/Versions/3.9" ]; then
export LD_LIBRARY_PATH=/Library/Frameworks/Python.framework/Versions/3.9/lib/ export LD_LIBRARY_PATH=/Library/Frameworks/Python.framework/Versions/3.9/lib/
export DYLD_LIBRARY_PATH=${DYLD_LIBRARY_PATH}:/Library/Frameworks/Python.framework/Versions/3.9/lib/ export DYLD_LIBRARY_PATH=${DYLD_LIBRARY_PATH}:/Library/Frameworks/Python.framework/Versions/3.9/lib/
export DYLD_LIBRARY_PATH=${DYLD_LIBRARY_PATH}:${PADDLE_ROOT}/build/third_party/install/lapack/lib
export PATH=/Library/Frameworks/Python.framework/Versions/3.9/bin/:${PATH} export PATH=/Library/Frameworks/Python.framework/Versions/3.9/bin/:${PATH}
#after changing "PYTHON_LIBRARY:FILEPATH" to "PYTHON_LIBRARY" ,we can use export #after changing "PYTHON_LIBRARY:FILEPATH" to "PYTHON_LIBRARY" ,we can use export
export PYTHON_EXECUTABLE=/Library/Frameworks/Python.framework/Versions/3.9/bin/python3 export PYTHON_EXECUTABLE=/Library/Frameworks/Python.framework/Versions/3.9/bin/python3
...@@ -3607,6 +3610,7 @@ function run_setup_mac(){ ...@@ -3607,6 +3610,7 @@ function run_setup_mac(){
if [ -d "/Library/Frameworks/Python.framework/Versions/3.10" ]; then if [ -d "/Library/Frameworks/Python.framework/Versions/3.10" ]; then
export LD_LIBRARY_PATH=/Library/Frameworks/Python.framework/Versions/3.10/lib/ export LD_LIBRARY_PATH=/Library/Frameworks/Python.framework/Versions/3.10/lib/
export DYLD_LIBRARY_PATH=${DYLD_LIBRARY_PATH}:/Library/Frameworks/Python.framework/Versions/3.10/lib/ export DYLD_LIBRARY_PATH=${DYLD_LIBRARY_PATH}:/Library/Frameworks/Python.framework/Versions/3.10/lib/
export DYLD_LIBRARY_PATH=${DYLD_LIBRARY_PATH}:${PADDLE_ROOT}/build/third_party/install/lapack/lib
export PATH=/Library/Frameworks/Python.framework/Versions/3.10/bin/:${PATH} export PATH=/Library/Frameworks/Python.framework/Versions/3.10/bin/:${PATH}
#after changing "PYTHON_LIBRARY:FILEPATH" to "PYTHON_LIBRARY" ,we can use export #after changing "PYTHON_LIBRARY:FILEPATH" to "PYTHON_LIBRARY" ,we can use export
export PYTHON_EXECUTABLE=/Library/Frameworks/Python.framework/Versions/3.10/bin/python3 export PYTHON_EXECUTABLE=/Library/Frameworks/Python.framework/Versions/3.10/bin/python3
......
...@@ -4,7 +4,7 @@ if(WITH_TESTING) ...@@ -4,7 +4,7 @@ if(WITH_TESTING)
set(paddle_gtest_main_deps set(paddle_gtest_main_deps
device_context device_context
gtest gtest
gflags phi
init init
memory memory
phi_utils phi_utils
......
...@@ -3,11 +3,11 @@ add_subdirectory(string) ...@@ -3,11 +3,11 @@ add_subdirectory(string)
cc_test( cc_test(
array_ref_test array_ref_test
SRCS array_ref_test.cc SRCS array_ref_test.cc
DEPS gtest gflags) DEPS gtest phi)
cc_test( cc_test(
small_vector_test small_vector_test
SRCS small_vector_test.cc SRCS small_vector_test.cc
DEPS gtest gflags) DEPS gtest phi)
cc_test( cc_test(
variant_test variant_test
SRCS variant_test.cc SRCS variant_test.cc
...@@ -17,5 +17,5 @@ if(NOT ((NOT WITH_PYTHON) AND ON_INFER)) ...@@ -17,5 +17,5 @@ if(NOT ((NOT WITH_PYTHON) AND ON_INFER))
cc_library( cc_library(
pybind_util pybind_util
SRCS pybind.cc SRCS pybind.cc
DEPS phi_tensor_raw flags) DEPS phi)
endif() endif()
cc_library( cc_library(
pretty_log pretty_log
SRCS pretty_log.cc SRCS pretty_log.cc
DEPS flags) DEPS phi)
cc_library( cc_library(
string_helper string_helper
SRCS string_helper.cc SRCS string_helper.cc
DEPS flags) DEPS phi)
cc_test( cc_test(
stringprintf_test stringprintf_test
SRCS printf_test.cc SRCS printf_test.cc
DEPS gflags) DEPS phi)
cc_test(to_string_test SRCS to_string_test.cc) cc_test(to_string_test SRCS to_string_test.cc)
cc_test(split_test SRCS split_test.cc) cc_test(split_test SRCS split_test.cc)
cc_test( cc_test(
......
...@@ -10,6 +10,9 @@ env_dict={ ...@@ -10,6 +10,9 @@ env_dict={
'CUDA_VERSION':'@CUDA_VERSION@', 'CUDA_VERSION':'@CUDA_VERSION@',
'WITH_PSLI':'@WITH_PSLI@', 'WITH_PSLI':'@WITH_PSLI@',
'FLUID_CORE_NAME':'@FLUID_CORE_NAME@', 'FLUID_CORE_NAME':'@FLUID_CORE_NAME@',
'PHI_LIB':'@PHI_LIB@',
'PHI_NAME':'@PHI_NAME@',
'WITH_PHI_SHARED':'@WITH_PHI_SHARED@',
'WARPCTC_LIBRARIES':'@WARPCTC_LIBRARIES@', 'WARPCTC_LIBRARIES':'@WARPCTC_LIBRARIES@',
'WARPRNNT_LIBRARIES':'@WARPRNNT_LIBRARIES@', 'WARPRNNT_LIBRARIES':'@WARPRNNT_LIBRARIES@',
'FLASHATTN_LIBRARIES':'@FLASHATTN_LIBRARIES@', 'FLASHATTN_LIBRARIES':'@FLASHATTN_LIBRARIES@',
......
...@@ -1134,14 +1134,6 @@ foreach(TEST_CINN_OPS ${TEST_CINN_OPS}) ...@@ -1134,14 +1134,6 @@ foreach(TEST_CINN_OPS ${TEST_CINN_OPS})
endforeach() endforeach()
if(WITH_CINN AND WITH_TESTING) if(WITH_CINN AND WITH_TESTING)
set_tests_properties(
test_resnet50_with_cinn
PROPERTIES
LABELS
"RUN_TYPE=CINN"
ENVIRONMENT
FLAGS_allow_cinn_ops="conv2d;conv2d_grad;elementwise_add;elementwise_add_grad;relu;relu_grad;sum"
)
set_tests_properties( set_tests_properties(
test_parallel_executor_run_cinn test_parallel_executor_run_cinn
PROPERTIES PROPERTIES
......
...@@ -123,9 +123,9 @@ class TestParallelExecutorRunCinn(unittest.TestCase): ...@@ -123,9 +123,9 @@ class TestParallelExecutorRunCinn(unittest.TestCase):
shutil.rmtree(self.tmpdir) shutil.rmtree(self.tmpdir)
def test_run_with_cinn(self): def test_run_with_cinn(self):
cinn_losses = train(self.tmpdir, "paddle") cinn_losses = np.array(train(self.tmpdir, "paddle")).flatten()
set_cinn_flag(False) set_cinn_flag(False)
pd_losses = train(self.tmpdir, "cinn") pd_losses = np.array(train(self.tmpdir, "cinn")).flatten()
np.testing.assert_allclose( np.testing.assert_allclose(
cinn_losses, pd_losses, rtol=1e-05, atol=1e-05 cinn_losses, pd_losses, rtol=1e-05, atol=1e-05
) )
......
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
import unittest
import numpy as np
import paddle
from paddle.fluid import core
paddle.enable_static()
logging.basicConfig(
format='%(asctime)s - %(levelname)s - %(message)s', level=logging.INFO
)
logger = logging.getLogger(__name__)
def set_cinn_flag(val):
cinn_compiled = False
try:
paddle.set_flags({'FLAGS_use_cinn': val})
cinn_compiled = True
except ValueError:
logger.warning("The used paddle is not compiled with CINN.")
return cinn_compiled
@unittest.skipIf(not set_cinn_flag(True), "Paddle is not compiled with CINN.")
class TestResnet50Accuracy(unittest.TestCase):
def reader(self, limit):
for _ in range(limit):
yield {
'image': np.random.randint(
0, 256, size=[32, 3, 224, 224]
).astype('float32'),
'label': np.random.randint(0, 1000, size=[32]).astype('int64'),
}
def generate_random_data(self, loop_num=10):
feed = []
data = self.reader(loop_num)
for _ in range(loop_num):
feed.append(next(data))
return feed
def build_program(self, main_program, startup_program):
with paddle.static.program_guard(main_program, startup_program):
image = paddle.static.data(
name='image', shape=[32, 3, 224, 224], dtype='float32'
)
label = paddle.static.data(name='label', shape=[32], dtype='int64')
# TODO: stop_gradient slower training speed, need fix
image.stop_gradient = False
model = paddle.vision.models.resnet50()
prediction = model(image)
loss = paddle.nn.functional.cross_entropy(
input=prediction, label=label
)
loss = paddle.mean(loss)
adam = paddle.optimizer.Adam(learning_rate=0.001)
adam.minimize(loss)
return loss
def train(self, place, iters, feed, use_cinn=False, seed=1234):
np.random.seed(seed)
paddle.seed(seed)
if paddle.is_compiled_with_cuda():
paddle.set_flags({'FLAGS_cudnn_deterministic': 1})
set_cinn_flag(use_cinn)
startup_program = paddle.static.Program()
main_program = paddle.static.Program()
loss = self.build_program(main_program, startup_program)
exe = paddle.static.Executor(place)
compiled_prog = paddle.static.CompiledProgram(main_program)
loss_vals = []
scope = paddle.static.Scope()
with paddle.static.scope_guard(scope):
exe.run(startup_program)
for step in range(iters):
loss_v = exe.run(
compiled_prog,
feed=feed[step],
fetch_list=[loss],
return_numpy=True,
)
loss_vals.append(loss_v[0])
return loss_vals
def test_check_resnet50_accuracy(self):
place = (
paddle.CUDAPlace(0)
if paddle.is_compiled_with_cuda()
else paddle.CPUPlace()
)
loop_num = 10
feed = self.generate_random_data(loop_num)
loss_c = self.train(place, loop_num, feed, use_cinn=True)
loss_p = self.train(place, loop_num, feed, use_cinn=False)
print("Losses of CINN:")
print(loss_c)
print("Losses of Paddle")
print(loss_p)
np.testing.assert_allclose(loss_c, loss_p, rtol=1e-05, atol=1e-05)
def test_check_resnet50_accuracy_with_composite(self):
place = (
paddle.CUDAPlace(0)
if paddle.is_compiled_with_cuda()
else paddle.CPUPlace()
)
loop_num = 10
feed = self.generate_random_data(loop_num)
core._set_prim_backward_enabled(True)
core._add_skip_comp_ops("batch_norm")
loss_c = self.train(place, loop_num, feed, use_cinn=True)
core._set_prim_backward_enabled(False)
loss_p = self.train(place, loop_num, feed, use_cinn=True)
print("Losses of Composite + CINN:")
print(loss_c)
print("Losses of CINN: ")
print(loss_p)
np.testing.assert_allclose(loss_c, loss_p, rtol=1e-05, atol=1e-05)
if __name__ == '__main__':
unittest.main()
...@@ -561,7 +561,11 @@ package_dir={ ...@@ -561,7 +561,11 @@ package_dir={
libs_path='${PADDLE_BINARY_DIR}/python/paddle/libs' libs_path='${PADDLE_BINARY_DIR}/python/paddle/libs'
package_data['paddle.libs']= [] package_data['paddle.libs']= []
package_data['paddle.libs']=[ if('${WITH_PHI_SHARED}' == 'ON'):
package_data['paddle.libs'] = [('libphi' if os.name != 'nt' else 'phi') + ext_name]
shutil.copy('${PHI_LIB}', libs_path)
package_data['paddle.libs']+=[
('libwarpctc' if os.name != 'nt' else 'warpctc') + ext_name, ('libwarpctc' if os.name != 'nt' else 'warpctc') + ext_name,
('libwarprnnt' if os.name != 'nt' else 'warprnnt') + ext_name, ('libwarprnnt' if os.name != 'nt' else 'warprnnt') + ext_name,
] ]
...@@ -722,8 +726,14 @@ if '${CMAKE_BUILD_TYPE}' == 'Release': ...@@ -722,8 +726,14 @@ if '${CMAKE_BUILD_TYPE}' == 'Release':
if "@APPLE@" == "1": if "@APPLE@" == "1":
commands = ["install_name_tool -id '@loader_path/../libs/' ${PADDLE_BINARY_DIR}/python/paddle/fluid/${FLUID_CORE_NAME}" + '.so'] commands = ["install_name_tool -id '@loader_path/../libs/' ${PADDLE_BINARY_DIR}/python/paddle/fluid/${FLUID_CORE_NAME}" + '.so']
commands.append("install_name_tool -add_rpath '@loader_path/../libs/' ${PADDLE_BINARY_DIR}/python/paddle/fluid/${FLUID_CORE_NAME}" + '.so') commands.append("install_name_tool -add_rpath '@loader_path/../libs/' ${PADDLE_BINARY_DIR}/python/paddle/fluid/${FLUID_CORE_NAME}" + '.so')
if('${WITH_PHI_SHARED}' == 'ON'):
# change rpath of phi.ext for loading 3rd party libb
commands.append("install_name_tool -add_rpath '@loader_path' ${PADDLE_BINARY_DIR}/python/paddle/libs/${PHI_NAME}")
else: else:
commands = ["patchelf --set-rpath '$ORIGIN/../libs/' ${PADDLE_BINARY_DIR}/python/paddle/fluid/${FLUID_CORE_NAME}" + '.so'] commands = ["patchelf --set-rpath '$ORIGIN/../libs/' ${PADDLE_BINARY_DIR}/python/paddle/fluid/${FLUID_CORE_NAME}" + '.so']
if('${WITH_PHI_SHARED}' == 'ON'):
# change rpath of phi.ext for loading 3rd party lib
commands.append("patchelf --set-rpath '$ORIGIN' ${PADDLE_BINARY_DIR}/python/paddle/libs/${PHI_NAME}")
# The sw_64 not suppot patchelf, so we just disable that. # The sw_64 not suppot patchelf, so we just disable that.
if platform.machine() != 'sw_64' and platform.machine() != 'mips64': if platform.machine() != 'sw_64' and platform.machine() != 'mips64':
for command in commands: for command in commands:
......
...@@ -966,7 +966,14 @@ def get_package_data_and_package_dir(): ...@@ -966,7 +966,14 @@ def get_package_data_and_package_dir():
# put all thirdparty libraries in paddle.libs # put all thirdparty libraries in paddle.libs
libs_path = paddle_binary_dir + '/python/paddle/libs' libs_path = paddle_binary_dir + '/python/paddle/libs'
package_data['paddle.libs'] = [] package_data['paddle.libs'] = []
if env_dict.get("WITH_PHI_SHARED") == "ON":
package_data['paddle.libs'] = [ package_data['paddle.libs'] = [
('libphi' if os.name != 'nt' else 'phi') + ext_suffix
]
shutil.copy(env_dict.get("PHI_LIB"), libs_path)
package_data['paddle.libs'] += [
('libwarpctc' if os.name != 'nt' else 'warpctc') + ext_suffix, ('libwarpctc' if os.name != 'nt' else 'warpctc') + ext_suffix,
('libwarprnnt' if os.name != 'nt' else 'warprnnt') + ext_suffix, ('libwarprnnt' if os.name != 'nt' else 'warprnnt') + ext_suffix,
] ]
...@@ -1204,6 +1211,13 @@ def get_package_data_and_package_dir(): ...@@ -1204,6 +1211,13 @@ def get_package_data_and_package_dir():
+ env_dict.get("FLUID_CORE_NAME") + env_dict.get("FLUID_CORE_NAME")
+ '.so' + '.so'
) )
if env_dict.get("WITH_PHI_SHARED") == "ON":
commands.append(
"install_name_tool -add_rpath '@loader_path' "
+ env_dict.get("PADDLE_BINARY_DIR")
+ '/python/paddle/libs/'
+ env_dict.get("PHI_NAME")
)
else: else:
commands = [ commands = [
"patchelf --set-rpath '$ORIGIN/../libs/' " "patchelf --set-rpath '$ORIGIN/../libs/' "
...@@ -1212,6 +1226,13 @@ def get_package_data_and_package_dir(): ...@@ -1212,6 +1226,13 @@ def get_package_data_and_package_dir():
+ env_dict.get("FLUID_CORE_NAME") + env_dict.get("FLUID_CORE_NAME")
+ '.so' + '.so'
] ]
if env_dict.get("WITH_PHI_SHARED") == "ON":
commands.append(
"patchelf --set-rpath '$ORIGIN' "
+ env_dict.get("PADDLE_BINARY_DIR")
+ '/python/paddle/libs/'
+ env_dict.get("PHI_NAME")
)
# The sw_64 not suppot patchelf, so we just disable that. # The sw_64 not suppot patchelf, so we just disable that.
if platform.machine() != 'sw_64' and platform.machine() != 'mips64': if platform.machine() != 'sw_64' and platform.machine() != 'mips64':
for command in commands: for command in commands:
......
...@@ -168,6 +168,7 @@ if(${len} GREATER_EQUAL 1) ...@@ -168,6 +168,7 @@ if(${len} GREATER_EQUAL 1)
add_executable(${test_name} ${test_src}) add_executable(${test_name} ${test_src})
target_link_libraries(${test_name} paddle_gtest_main_new) target_link_libraries(${test_name} paddle_gtest_main_new)
target_link_libraries(${test_name} $<TARGET_LINKER_FILE:${paddle_lib}>) target_link_libraries(${test_name} $<TARGET_LINKER_FILE:${paddle_lib}>)
target_link_libraries(${test_name} $<TARGET_LINKER_FILE:phi>)
add_dependencies(${test_name} ${paddle_lib} paddle_gtest_main_new) add_dependencies(${test_name} ${paddle_lib} paddle_gtest_main_new)
if(WITH_GPU) if(WITH_GPU)
target_link_libraries(${test_name} ${CUDA_CUDART_LIBRARY} target_link_libraries(${test_name} ${CUDA_CUDART_LIBRARY}
...@@ -177,8 +178,10 @@ if(${len} GREATER_EQUAL 1) ...@@ -177,8 +178,10 @@ if(${len} GREATER_EQUAL 1)
target_link_libraries(${test_name} ${ROCM_HIPRTC_LIB}) target_link_libraries(${test_name} ${ROCM_HIPRTC_LIB})
endif() endif()
if(APPLE) if(APPLE)
target_link_libraries(${test_name} target_link_libraries(
"-Wl,-rpath,$<TARGET_FILE_DIR:${paddle_lib}>") ${test_name}
"-Wl,-rpath,$<TARGET_FILE_DIR:${paddle_lib}> -Wl,-rpath,$<TARGET_FILE_DIR:phi>"
)
endif() endif()
if(NOT ((NOT WITH_PYTHON) AND ON_INFER)) if(NOT ((NOT WITH_PYTHON) AND ON_INFER))
target_link_libraries(${test_name} ${PYTHON_LIBRARIES}) target_link_libraries(${test_name} ${PYTHON_LIBRARIES})
......
set(eager_deps set(eager_deps
phi_api phi
phi_dygraph_api
hook_utils hook_utils
tensor_utils tensor_utils
utils utils
global_utils global_utils
backward backward
phi_tensor
tracer tracer
layer layer
autograd_meta autograd_meta
......
...@@ -42,7 +42,7 @@ cc_test( ...@@ -42,7 +42,7 @@ cc_test(
test_common_infer_shape_functions test_common_infer_shape_functions
SRCS test_common_infer_shape_functions.cc SRCS test_common_infer_shape_functions.cc
DEPS common_infer_shape_functions ${COMMON_OP_DEPS} activation_op DEPS common_infer_shape_functions ${COMMON_OP_DEPS} activation_op
elementwise_add_op softmax generated_static_op) elementwise_add_op phi generated_static_op)
cc_test( cc_test(
gather_test gather_test
SRCS gather_test.cc SRCS gather_test.cc
...@@ -54,7 +54,7 @@ cc_test( ...@@ -54,7 +54,7 @@ cc_test(
cc_test( cc_test(
scatter_test scatter_test
SRCS scatter_test.cc SRCS scatter_test.cc
DEPS tensor math_function) DEPS tensor phi)
cc_test( cc_test(
beam_search_decode_op_test beam_search_decode_op_test
SRCS beam_search_decode_op_test.cc SRCS beam_search_decode_op_test.cc
...@@ -72,7 +72,7 @@ if(WITH_GPU) ...@@ -72,7 +72,7 @@ if(WITH_GPU)
nv_test( nv_test(
dropout_op_test dropout_op_test
SRCS dropout_op_test.cc SRCS dropout_op_test.cc
DEPS dropout_op tensor generator) DEPS dropout_op tensor phi)
nv_test( nv_test(
test_leaky_relu_grad_grad_functor test_leaky_relu_grad_grad_functor
SRCS test_leaky_relu_grad_grad_functor.cc SRCS test_leaky_relu_grad_grad_functor.cc
...@@ -81,12 +81,12 @@ if(WITH_GPU) ...@@ -81,12 +81,12 @@ if(WITH_GPU)
nv_test( nv_test(
feed_forward_test feed_forward_test
SRCS feed_forward_test.cu SRCS feed_forward_test.cu
DEPS elementwise_add_op matmul_op tensor generator) DEPS elementwise_add_op matmul_op tensor phi)
elseif(WITH_ROCM) elseif(WITH_ROCM)
hip_test( hip_test(
dropout_op_test dropout_op_test
SRCS dropout_op_test.cc SRCS dropout_op_test.cc
DEPS dropout_op tensor generator) DEPS dropout_op tensor phi)
hip_test( hip_test(
test_leaky_relu_grad_grad_functor test_leaky_relu_grad_grad_functor
SRCS test_leaky_relu_grad_grad_functor.cc SRCS test_leaky_relu_grad_grad_functor.cc
......
...@@ -11,7 +11,7 @@ cc_test( ...@@ -11,7 +11,7 @@ cc_test(
scope scope
${GLOB_OP_LIB} ${GLOB_OP_LIB}
${GLOB_OPERATOR_DEPS} ${GLOB_OPERATOR_DEPS}
eigen_function) phi)
if(WITH_ONNXRUNTIME AND WIN32) if(WITH_ONNXRUNTIME AND WIN32)
# Copy onnxruntime for some c++ test in Windows, since the test will # Copy onnxruntime for some c++ test in Windows, since the test will
......
cc_test_old( if(WITH_TESTING)
cc_test_old(
cinn_launch_context_test cinn_launch_context_test
SRCS SRCS
cinn_launch_context_test.cc cinn_launch_context_test.cc
DEPS DEPS
ddim phi
lod_tensor lod_tensor
scope scope
proto_desc proto_desc
...@@ -11,27 +12,28 @@ cc_test_old( ...@@ -11,27 +12,28 @@ cc_test_old(
cinn_launch_context cinn_launch_context
cinn_instruction_run_op cinn_instruction_run_op
cinn) cinn)
target_link_libraries(cinn_launch_context_test ${PYTHON_LIBRARIES}) target_link_libraries(cinn_launch_context_test ${PYTHON_LIBRARIES})
set_tests_properties(cinn_launch_context_test PROPERTIES LABELS "RUN_TYPE=CINN") set_tests_properties(cinn_launch_context_test PROPERTIES LABELS
"RUN_TYPE=CINN")
set(CINN_RUN_ENVIRONMENT set(CINN_RUN_ENVIRONMENT
"OMP_NUM_THREADS=1;runtime_include_dir=${PADDLE_BINARY_DIR}/third_party/CINN/src/external_cinn/cinn/runtime/cuda" "OMP_NUM_THREADS=1;runtime_include_dir=${PADDLE_BINARY_DIR}/third_party/CINN/src/external_cinn/cinn/runtime/cuda"
) )
# cc_test_old( # cc_test_old(
# cinn_launch_op_test # cinn_launch_op_test
# SRCS # SRCS
# cinn_launch_op_test.cc # cinn_launch_op_test.cc
# DEPS # DEPS
# cinn_compiler # cinn_compiler
# cinn_launch_op # cinn_launch_op
# cinn_instruction_run_op # cinn_instruction_run_op
# elementwise_add_op # elementwise_add_op
# gflags) # gflags)
# set_tests_properties( # set_tests_properties(
# cinn_launch_op_test PROPERTIES LABELS "RUN_TYPE=CINN" ENVIRONMENT # cinn_launch_op_test PROPERTIES LABELS "RUN_TYPE=CINN" ENVIRONMENT
# "${CINN_RUN_ENVIRONMENT}") # "${CINN_RUN_ENVIRONMENT}")
cc_test_old( cc_test_old(
cinn_instruction_run_op_test cinn_instruction_run_op_test
SRCS SRCS
cinn_instruction_run_op_test.cc cinn_instruction_run_op_test.cc
...@@ -40,7 +42,8 @@ cc_test_old( ...@@ -40,7 +42,8 @@ cc_test_old(
cinn_launch_op cinn_launch_op
cinn_instruction_run_op cinn_instruction_run_op
elementwise_add_op) elementwise_add_op)
target_link_libraries(cinn_instruction_run_op_test ${PYTHON_LIBRARIES}) target_link_libraries(cinn_instruction_run_op_test ${PYTHON_LIBRARIES})
set_tests_properties( set_tests_properties(
cinn_instruction_run_op_test PROPERTIES LABELS "RUN_TYPE=CINN" ENVIRONMENT cinn_instruction_run_op_test PROPERTIES LABELS "RUN_TYPE=CINN" ENVIRONMENT
"${CINN_RUN_ENVIRONMENT}") "${CINN_RUN_ENVIRONMENT}")
endif()
...@@ -15,7 +15,7 @@ if(WITH_GPU OR WITH_ROCM) ...@@ -15,7 +15,7 @@ if(WITH_GPU OR WITH_ROCM)
dropout_op dropout_op
generated_op generated_op
device_context device_context
generator phi
memory) memory)
nv_test( nv_test(
test_fused_dropout_act_bias test_fused_dropout_act_bias
...@@ -25,7 +25,7 @@ if(WITH_GPU OR WITH_ROCM) ...@@ -25,7 +25,7 @@ if(WITH_GPU OR WITH_ROCM)
dropout_op dropout_op
generated_op generated_op
device_context device_context
generator phi
memory) memory)
nv_test( nv_test(
test_fused_layernorm_residual_dropout_bias test_fused_layernorm_residual_dropout_bias
...@@ -35,7 +35,7 @@ if(WITH_GPU OR WITH_ROCM) ...@@ -35,7 +35,7 @@ if(WITH_GPU OR WITH_ROCM)
dropout_op dropout_op
generated_op generated_op
device_context device_context
generator phi
memory) memory)
endif() endif()
# resnet_unit needs cudnn 8.0 above # resnet_unit needs cudnn 8.0 above
...@@ -44,15 +44,11 @@ if(WITH_GPU OR WITH_ROCM) ...@@ -44,15 +44,11 @@ if(WITH_GPU OR WITH_ROCM)
test_cudnn_norm_conv test_cudnn_norm_conv
SRCS cudnn_norm_conv_test.cc SRCS cudnn_norm_conv_test.cc
DEPS conv_op DEPS conv_op
blas
im2col
vol2col
depthwise_conv depthwise_conv
eigen_function
tensor tensor
op_registry op_registry
device_context device_context
generator phi
memory) memory)
cc_test( cc_test(
test_cudnn_bn_add_relu test_cudnn_bn_add_relu
...@@ -62,7 +58,7 @@ if(WITH_GPU OR WITH_ROCM) ...@@ -62,7 +58,7 @@ if(WITH_GPU OR WITH_ROCM)
tensor tensor
op_registry op_registry
device_context device_context
generator phi
memory) memory)
endif() endif()
endif() endif()
cc_test( cc_test(
selected_rows_functor_test selected_rows_functor_test
SRCS selected_rows_functor_test.cc SRCS selected_rows_functor_test.cc
DEPS allocator selected_rows_functor) DEPS allocator phi)
cc_test( cc_test(
im2col_test im2col_test
SRCS im2col_test.cc SRCS im2col_test.cc
DEPS im2col) DEPS phi)
cc_test( cc_test(
vol2col_test vol2col_test
SRCS vol2col_test.cc SRCS vol2col_test.cc
DEPS vol2col) DEPS phi)
cc_test( cc_test(
beam_search_test beam_search_test
SRCS beam_search_test.cc SRCS beam_search_test.cc
...@@ -18,13 +18,13 @@ if(WITH_GPU) ...@@ -18,13 +18,13 @@ if(WITH_GPU)
nv_test( nv_test(
selected_rows_functor_gpu_test selected_rows_functor_gpu_test
SRCS selected_rows_functor_test.cu.cc SRCS selected_rows_functor_test.cu.cc
DEPS selected_rows_functor math_function) DEPS phi)
endif() endif()
if(WITH_ROCM) if(WITH_ROCM)
hip_test( hip_test(
selected_rows_functor_gpu_test selected_rows_functor_gpu_test
SRCS selected_rows_functor_test.cu.cc SRCS selected_rows_functor_test.cu.cc
DEPS selected_rows_functor math_function) DEPS phi)
endif() endif()
cc_test( cc_test(
concat_test concat_test
......
...@@ -4,7 +4,7 @@ cc_test( ...@@ -4,7 +4,7 @@ cc_test(
DEPS op_registry DEPS op_registry
elementwise_add_op elementwise_add_op
activation_op activation_op
softmax phi
scope scope
device_context device_context
enforce enforce
...@@ -17,9 +17,7 @@ set(TEST_MKLDNN_CACHING_DEPS ...@@ -17,9 +17,7 @@ set(TEST_MKLDNN_CACHING_DEPS
elementwise_add_op elementwise_add_op
activation_op activation_op
conv_op conv_op
im2col phi
vol2col
softmax
scope scope
device_context device_context
enforce enforce
...@@ -44,7 +42,7 @@ cc_test_old( ...@@ -44,7 +42,7 @@ cc_test_old(
crop_op crop_op
activation_op activation_op
generated_op generated_op
pooling phi
transpose_op transpose_op
fused_transpose_op fused_transpose_op
scope scope
......
...@@ -68,7 +68,7 @@ cc_test_old( ...@@ -68,7 +68,7 @@ cc_test_old(
scope scope
proto_desc proto_desc
generated_op generated_op
eigen_function) phi)
set_source_files_properties( set_source_files_properties(
send_and_recv_op_cpu_test.cc PROPERTIES COMPILE_FLAGS send_and_recv_op_cpu_test.cc PROPERTIES COMPILE_FLAGS
...@@ -85,7 +85,7 @@ cc_test_old( ...@@ -85,7 +85,7 @@ cc_test_old(
send_and_recv_op send_and_recv_op
${RPC_DEPS} ${RPC_DEPS}
${DISTRIBUTE_DEPS} ${DISTRIBUTE_DEPS}
eigen_function) phi)
set_source_files_properties( set_source_files_properties(
send_and_recv_op_gpu_test.cc PROPERTIES COMPILE_FLAGS send_and_recv_op_gpu_test.cc PROPERTIES COMPILE_FLAGS
...@@ -102,7 +102,7 @@ cc_test_old( ...@@ -102,7 +102,7 @@ cc_test_old(
send_and_recv_op send_and_recv_op
${RPC_DEPS} ${RPC_DEPS}
${DISTRIBUTE_DEPS} ${DISTRIBUTE_DEPS}
eigen_function) phi)
set_source_files_properties( set_source_files_properties(
heter_listen_and_server_test.cc PROPERTIES COMPILE_FLAGS heter_listen_and_server_test.cc PROPERTIES COMPILE_FLAGS
...@@ -119,10 +119,10 @@ cc_test_old( ...@@ -119,10 +119,10 @@ cc_test_old(
heter_listen_and_serv_op heter_listen_and_serv_op
${RPC_DEPS} ${RPC_DEPS}
${DISTRIBUTE_DEPS} ${DISTRIBUTE_DEPS}
eigen_function) phi)
#set_source_files_properties(heter_cloud_comm_cpu_test.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) #set_source_files_properties(heter_cloud_comm_cpu_test.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS})
#cc_test(heter_cloud_comm_cpu_test SRCS heter_cloud_comm_cpu_test.cc DEPS executor scope proto_desc generated_static_op heter_listen_and_serv_op ${RPC_DEPS} ${DISTRIBUTE_DEPS} eigen_function) #cc_test(heter_cloud_comm_cpu_test SRCS heter_cloud_comm_cpu_test.cc DEPS executor scope proto_desc generated_static_op heter_listen_and_serv_op ${RPC_DEPS} ${DISTRIBUTE_DEPS} phi)
set_source_files_properties( set_source_files_properties(
switch_server_test.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) switch_server_test.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS})
...@@ -138,4 +138,4 @@ cc_binary( ...@@ -138,4 +138,4 @@ cc_binary(
heter_listen_and_serv_op heter_listen_and_serv_op
${RPC_DEPS} ${RPC_DEPS}
${DISTRIBUTE_DEPS} ${DISTRIBUTE_DEPS}
eigen_function) phi)
...@@ -33,14 +33,7 @@ endif() ...@@ -33,14 +33,7 @@ endif()
cc_test( cc_test(
test_gradient_accmulator test_gradient_accmulator
SRCS test_gradient_accmulator.cc SRCS test_gradient_accmulator.cc
DEPS memcpy DEPS memcpy selected_rows_utils gradient_accumulator phi phi_utils)
selected_rows_utils
selected_rows_functor
gradient_accumulator
math_function
phi_tensor
phi_api
phi_utils)
cc_test( cc_test(
test_layer test_layer
SRCS test_layer.cc SRCS test_layer.cc
......
...@@ -24,6 +24,7 @@ ...@@ -24,6 +24,7 @@
#include "paddle/fluid/imperative/hooks.h" #include "paddle/fluid/imperative/hooks.h"
#include "paddle/fluid/imperative/tracer.h" #include "paddle/fluid/imperative/tracer.h"
#include "paddle/fluid/memory/memcpy.h" #include "paddle/fluid/memory/memcpy.h"
#include "paddle/phi/core/flags.h"
#include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/kernel_registry.h"
PD_DECLARE_KERNEL(add, CPU, ALL_LAYOUT); PD_DECLARE_KERNEL(add, CPU, ALL_LAYOUT);
...@@ -35,7 +36,7 @@ namespace platform = paddle::platform; ...@@ -35,7 +36,7 @@ namespace platform = paddle::platform;
namespace framework = paddle::framework; namespace framework = paddle::framework;
namespace memory = paddle::memory; namespace memory = paddle::memory;
DECLARE_bool(sort_sum_gradient); PHI_DECLARE_bool(sort_sum_gradient);
namespace paddle { namespace paddle {
namespace imperative { namespace imperative {
......
...@@ -224,7 +224,7 @@ if(NOT WIN32) ...@@ -224,7 +224,7 @@ if(NOT WIN32)
${MATH_LIB} ${MATH_LIB}
${MKLDNN_LIB} ${MKLDNN_LIB}
glog glog
gflags phi
protobuf protobuf
xxhash xxhash
cryptopp cryptopp
...@@ -235,7 +235,7 @@ else() ...@@ -235,7 +235,7 @@ else()
${MATH_LIB} ${MATH_LIB}
${MKLDNN_LIB} ${MKLDNN_LIB}
glog glog
gflags_static phi
libprotobuf libprotobuf
xxhash xxhash
cryptopp-static cryptopp-static
......
...@@ -8,7 +8,6 @@ if(WITH_TESTING AND NOT WIN32) ...@@ -8,7 +8,6 @@ if(WITH_TESTING AND NOT WIN32)
WORKING_DIRECTORY "${CC_TESTS_DIR}") WORKING_DIRECTORY "${CC_TESTS_DIR}")
set(JIT_DEPS set(JIT_DEPS
phi phi
phi_api
elementwise_add_op elementwise_add_op
matmul_v2_op matmul_v2_op
activation_op activation_op
......
...@@ -37,8 +37,7 @@ if(WITH_GPU ...@@ -37,8 +37,7 @@ if(WITH_GPU
fetch_v2_op) fetch_v2_op)
# All deps of the operators above, part of GLOB_OPERATOR_DEPS. # All deps of the operators above, part of GLOB_OPERATOR_DEPS.
set(OP_DEPS generator softmax selected_rows_functor jit_kernel_helper set(OP_DEPS phi concat_and_split cross_entropy)
concat_and_split cross_entropy)
cc_test(standalone_executor_test SRCS standalone_executor_test.cc) cc_test(standalone_executor_test SRCS standalone_executor_test.cc)
# add_dependencies(standalone_executor_test download_program) # add_dependencies(standalone_executor_test download_program)
......
set(COMMON_API_TEST_DEPS phi_tensor phi_api api_tensor_utils) set(COMMON_API_TEST_DEPS phi)
if(WITH_GPU) if(WITH_GPU)
nv_test( nv_test(
test_phi_tensor test_phi_tensor
SRCS test_phi_tensor.cc SRCS test_phi_tensor.cc
DEPS glog selected_rows ${COMMON_API_TEST_DEPS}) DEPS glog ${COMMON_API_TEST_DEPS})
nv_test( nv_test(
test_allocator test_allocator
SRCS test_allocator.cu SRCS test_allocator.cu
DEPS place device_context context_pool) DEPS place device_context phi)
nv_test( nv_test(
test_cuda_stream test_cuda_stream
SRCS test_cuda_stream.cu SRCS test_cuda_stream.cu
DEPS context_pool) DEPS phi)
nv_test( nv_test(
test_from_blob test_from_blob
SRCS test_from_blob.cc SRCS test_from_blob.cc
DEPS phi_backends ${COMMON_API_TEST_DEPS}) DEPS ${COMMON_API_TEST_DEPS})
elseif(WITH_ROCM) elseif(WITH_ROCM)
hip_test( hip_test(
test_phi_tensor test_phi_tensor
SRCS test_phi_tensor.cc SRCS test_phi_tensor.cc
DEPS glog selected_rows ${COMMON_API_TEST_DEPS}) DEPS glog ${COMMON_API_TEST_DEPS})
hip_test( hip_test(
test_allocator test_allocator
SRCS test_allocator.cu SRCS test_allocator.cu
DEPS place device_context context_pool) DEPS place device_context phi)
hip_test( hip_test(
test_cuda_stream test_cuda_stream
SRCS test_cuda_stream.cu SRCS test_cuda_stream.cu
DEPS context_pool) DEPS phi)
hip_test( hip_test(
test_from_blob test_from_blob
SRCS test_from_blob.cc SRCS test_from_blob.cc
DEPS phi_backends ${COMMON_API_TEST_DEPS}) DEPS ${COMMON_API_TEST_DEPS})
else() else()
cc_test( cc_test(
test_phi_tensor test_phi_tensor
SRCS test_phi_tensor.cc SRCS test_phi_tensor.cc
DEPS glog selected_rows ${COMMON_API_TEST_DEPS}) DEPS glog ${COMMON_API_TEST_DEPS})
cc_test( cc_test(
test_from_blob test_from_blob
SRCS test_from_blob.cc SRCS test_from_blob.cc
DEPS phi_backends ${COMMON_API_TEST_DEPS}) DEPS ${COMMON_API_TEST_DEPS})
endif() endif()
cc_test( cc_test(
......
...@@ -21,12 +21,13 @@ ...@@ -21,12 +21,13 @@
#include "paddle/phi/api/lib/utils/allocator.h" #include "paddle/phi/api/lib/utils/allocator.h"
#include "paddle/phi/common/int_array.h" #include "paddle/phi/common/int_array.h"
#include "paddle/phi/common/scalar.h" #include "paddle/phi/common/scalar.h"
#include "paddle/phi/core/flags.h"
#include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/meta_tensor.h" #include "paddle/phi/core/meta_tensor.h"
#include "paddle/phi/infermeta/unary.h" #include "paddle/phi/infermeta/unary.h"
#include "paddle/phi/kernels/scale_kernel.h" #include "paddle/phi/kernels/scale_kernel.h"
DECLARE_int32(low_precision_op_list); PHI_DECLARE_int32(low_precision_op_list);
namespace paddle { namespace paddle {
namespace experimental { namespace experimental {
......
...@@ -13,32 +13,32 @@ cc_test( ...@@ -13,32 +13,32 @@ cc_test(
cc_test( cc_test(
phi_test_place phi_test_place
SRCS test_place.cc SRCS test_place.cc
DEPS phi_place) DEPS phi)
cc_test( cc_test(
phi_test_int_array phi_test_int_array
SRCS test_int_array.cc SRCS test_int_array.cc
DEPS int_array api_int_array phi phi_api) DEPS phi)
cc_test( cc_test(
phi_test_scalar_cpu phi_test_scalar_cpu
SRCS test_scalar.cc SRCS test_scalar.cc
DEPS scalar api_scalar) DEPS phi)
if(WITH_GPU) if(WITH_GPU)
nv_test( nv_test(
phi_test_scalar phi_test_scalar
SRCS test_scalar.cu SRCS test_scalar.cu
DEPS scalar api_scalar) DEPS phi)
nv_test( nv_test(
transform_test transform_test
SRCS transform_test.cu SRCS transform_test.cu
DEPS memory place phi_backends) DEPS memory place phi)
endif() endif()
if(WITH_ROCM) if(WITH_ROCM)
hip_test( hip_test(
phi_test_scalar phi_test_scalar
SRCS test_scalar.cu SRCS test_scalar.cu
DEPS scalar api_scalar) DEPS phi)
hip_test( hip_test(
transform_test transform_test
SRCS transform_test.cu SRCS transform_test.cu
DEPS memory place phi_backends) DEPS memory place phi)
endif() endif()
cc_test( cc_test(
test_custom_kernel test_custom_kernel
SRCS test_custom_kernel.cc SRCS test_custom_kernel.cc
DEPS custom_kernel scalar) DEPS phi)
cc_test( cc_test(
test_dense_tensor test_dense_tensor
SRCS test_dense_tensor.cc SRCS test_dense_tensor.cc
DEPS dense_tensor) DEPS phi)
cc_test(test_intrusive_ptr SRCS test_intrusive_ptr.cc) cc_test(test_intrusive_ptr SRCS test_intrusive_ptr.cc)
cc_test(test_type_info SRCS test_type_info.cc) cc_test(test_type_info SRCS test_type_info.cc)
cc_test( cc_test(
test_kernel_factory test_kernel_factory
SRCS test_kernel_factory.cc SRCS test_kernel_factory.cc
DEPS kernel_factory phi) DEPS phi)
cc_test( cc_test(
test_sparse_coo_tensor test_sparse_coo_tensor
SRCS test_sparse_coo_tensor.cc SRCS test_sparse_coo_tensor.cc
DEPS dense_tensor sparse_coo_tensor) DEPS phi)
cc_test( cc_test(
test_sparse_csr_tensor test_sparse_csr_tensor
SRCS test_sparse_csr_tensor.cc SRCS test_sparse_csr_tensor.cc
DEPS dense_tensor sparse_csr_tensor) DEPS phi)
cc_test( cc_test(
test_op_utils test_op_utils
SRCS test_op_utils.cc SRCS test_op_utils.cc
DEPS op_compat_infos) DEPS op_compat_infos)
cc_test_old( cc_test_old(test_meta_fn_utils SRCS test_meta_fn_utils.cc DEPS phi)
test_meta_fn_utils
SRCS
test_meta_fn_utils.cc
DEPS
dense_tensor
wrapped_infermeta
infermeta
infermeta_utils)
cc_test( cc_test(
test_ddim test_ddim
SRCS test_ddim.cc SRCS test_ddim.cc
DEPS ddim) DEPS phi)
if(WITH_GPU) if(WITH_GPU)
nv_test( nv_test(
test_dim test_dim
SRCS test_dim.cu SRCS test_dim.cu
DEPS ddim) DEPS phi)
elseif(WITH_ROCM) elseif(WITH_ROCM)
hip_test( hip_test(
test_dim test_dim
SRCS test_dim.cu SRCS test_dim.cu
DEPS ddim) DEPS phi)
endif() endif()
cc_test( cc_test(
selected_rows_test selected_rows_test
SRCS test_selected_rows.cc SRCS test_selected_rows.cc
DEPS selected_rows) DEPS phi)
if(WITH_TESTING AND TEST selected_rows_test) if(WITH_TESTING AND TEST selected_rows_test)
set_tests_properties(selected_rows_test PROPERTIES TIMEOUT 120) set_tests_properties(selected_rows_test PROPERTIES TIMEOUT 120)
endif() endif()
...@@ -63,27 +55,27 @@ endif() ...@@ -63,27 +55,27 @@ endif()
cc_test( cc_test(
test_string_tensor test_string_tensor
SRCS test_string_tensor.cc SRCS test_string_tensor.cc
DEPS string_tensor) DEPS phi)
cc_test(unroll_array_ops_test SRCS unroll_array_ops_test.cc) cc_test(unroll_array_ops_test SRCS unroll_array_ops_test.cc)
cc_test( cc_test(
test_tensor_array test_tensor_array
SRCS test_tensor_array.cc SRCS test_tensor_array.cc
DEPS tensor_array) DEPS phi)
if(WITH_GPU) if(WITH_GPU)
nv_test( nv_test(
test_mixed_vector test_mixed_vector
SRCS test_mixed_vector.cc test_mixed_vector.cu SRCS test_mixed_vector.cc test_mixed_vector.cu
DEPS mixed_vector place memory phi_backends tensor) DEPS place memory phi tensor)
elseif(WITH_ROCM) elseif(WITH_ROCM)
hip_test( hip_test(
test_mixed_vector test_mixed_vector
SRCS test_mixed_vector.cc test_mixed_vector.cu SRCS test_mixed_vector.cc test_mixed_vector.cu
DEPS mixed_vector place memory phi_backends tensor) DEPS place memory phi tensor)
else() else()
cc_test( cc_test(
test_mixed_vector test_mixed_vector
SRCS test_mixed_vector.cc SRCS test_mixed_vector.cc
DEPS mixed_vector place memory phi_backends tensor) DEPS place memory phi tensor)
endif() endif()
...@@ -17,6 +17,11 @@ limitations under the License. */ ...@@ -17,6 +17,11 @@ limitations under the License. */
#include "paddle/phi/core/utils/type_registry.h" #include "paddle/phi/core/utils/type_registry.h"
namespace phi { namespace phi {
template <typename BaseT, typename DerivedT>
const TypeInfo<BaseT> TypeInfoTraits<BaseT, DerivedT>::kType =
RegisterStaticType<BaseT>(DerivedT::name());
namespace tests { namespace tests {
template <typename T> template <typename T>
......
cc_test( cc_test(
test_math_function test_math_function
SRCS test_math_function.cc SRCS test_math_function.cc
DEPS math_function) DEPS phi)
if(WITH_GPU) if(WITH_GPU)
nv_test( nv_test(
test_math_function_gpu test_math_function_gpu
SRCS test_math_function.cu SRCS test_math_function.cu
DEPS math_function) DEPS phi)
nv_test( nv_test(
test_broadcast_gpu test_broadcast_gpu
SRCS test_ternary_broadcast.cu SRCS test_ternary_broadcast.cu
...@@ -16,13 +16,13 @@ if(WITH_ROCM) ...@@ -16,13 +16,13 @@ if(WITH_ROCM)
hip_test( hip_test(
test_math_function_gpu test_math_function_gpu
SRCS test_math_function.cu SRCS test_math_function.cu
DEPS math_function) DEPS phi)
endif() endif()
cc_test( cc_test(
test_cpu_vec test_cpu_vec
SRCS test_cpu_vec.cc SRCS test_cpu_vec.cc
DEPS blas phi_backends) DEPS phi)
# For String Kernels # For String Kernels
cc_test( cc_test(
...@@ -94,19 +94,19 @@ endif() ...@@ -94,19 +94,19 @@ endif()
cc_test( cc_test(
test_cache test_cache
SRCS test_cache.cc SRCS test_cache.cc
DEPS gtest cache) DEPS gtest phi)
cc_test( cc_test(
strided_memcpy_test strided_memcpy_test
SRCS strided_memcpy_test.cc SRCS strided_memcpy_test.cc
DEPS phi_backends memory) DEPS phi memory)
cc_test( cc_test(
sequence_padding_test sequence_padding_test
SRCS sequence_padding_test.cc SRCS sequence_padding_test.cc
DEPS sequence_padding) DEPS phi)
cc_test( cc_test(
sequence_pooling_test sequence_pooling_test
SRCS sequence_pooling_test.cc SRCS sequence_pooling_test.cc
DEPS sequence_pooling) DEPS phi)
cc_test( cc_test(
test_op_signature test_op_signature
SRCS test_op_signature.cc SRCS test_op_signature.cc
DEPS op_utils) DEPS phi)
set(prim_eager_deps set(prim_eager_deps
phi_api phi
phi_dygraph_api
hook_utils hook_utils
tensor_utils tensor_utils
utils utils
global_utils global_utils
backward backward
phi_tensor
tracer tracer
layer layer
autograd_meta autograd_meta
...@@ -33,20 +31,16 @@ cc_test_old( ...@@ -33,20 +31,16 @@ cc_test_old(
elementwise_pow_op elementwise_pow_op
fill_constant_op fill_constant_op
activation_op activation_op
phi_api phi
phi_dygraph_api
static_global_utils static_global_utils
static_tensor_operants static_tensor_operants
tensor_api
operants_manager
generated_static_op) generated_static_op)
if(NOT (NOT WITH_PYTHON AND ON_INFER)) if(NOT (NOT WITH_PYTHON AND ON_INFER))
cc_library( cc_library(
init_env_utils init_env_utils
SRCS init_env_utils.cc SRCS init_env_utils.cc
DEPS operants_manager tensor_api eager_tensor_operants DEPS phi eager_tensor_operants static_tensor_operants)
static_tensor_operants)
cc_test_old( cc_test_old(
test_comp_eager test_comp_eager
......
...@@ -2221,7 +2221,6 @@ CPU_PARALLEL_JOB = [ ...@@ -2221,7 +2221,6 @@ CPU_PARALLEL_JOB = [
'test_egr_ds_grad_tensor_holder', 'test_egr_ds_grad_tensor_holder',
'test_egr_ds_auotgrad_meta', 'test_egr_ds_auotgrad_meta',
'test_egr_ds_accumulation_node', 'test_egr_ds_accumulation_node',
'test_resnet50_with_cinn',
'test_parallel_dygraph_sync_batch_norm', 'test_parallel_dygraph_sync_batch_norm',
'test_monitor', 'test_monitor',
'test_mkldnn_quantizer', 'test_mkldnn_quantizer',
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册