未验证 提交 67c69dca 编写于 作者: 6 6clc 提交者: GitHub

Code merge | Merge CINN into Paddle (#54749)

* feat(cmake): add cmake of cinn

* feat(cmake): add cmake of cinn python test

* feat(cmake): add jit

* feat(cmake): test/CMakeList.txt

* feat(cmake): rebase to develop

* feat(cmake): remove some flags

* fix(cmake): fix cinn's gflags depends

* feat(cmake): add ci scripts of cinn

* feat(cmake): copy code of cinn

* fix(cmake): fix cinn third_party model path

* gflags dynamic dependce

* fix ci build_demo

* tmp update to c++17 of cinn-only test

* fix cinn only with c++17
上级 72b8c7c2

要显示的变更太多。

To preserve performance only 1000 of 1000+ files are displayed.
...@@ -63,6 +63,17 @@ option(WITH_ONNXRUNTIME "Compile PaddlePaddle with ONNXRUNTIME" OFF) ...@@ -63,6 +63,17 @@ option(WITH_ONNXRUNTIME "Compile PaddlePaddle with ONNXRUNTIME" OFF)
option(WITH_CUSPARSELT "Compile PaddlePaddle with CUSPARSELT" OFF) option(WITH_CUSPARSELT "Compile PaddlePaddle with CUSPARSELT" OFF)
option(WITH_SETUP_INSTALL "Compile PaddlePaddle with setup.py" OFF) option(WITH_SETUP_INSTALL "Compile PaddlePaddle with setup.py" OFF)
option(WITH_SHARED_PHI "Compile PaddlePaddle with SHARED LIB of PHI" OFF) option(WITH_SHARED_PHI "Compile PaddlePaddle with SHARED LIB of PHI" OFF)
option(CINN_ONLY "Compile CINN only in Paddle" OFF)
option(CINN_WITH_CUDNN "Compile CINN with CUDNN support" ON)
find_package(Git REQUIRED)
# config GIT_URL with github mirrors to speed up dependent repos clone
option(GIT_URL "Git URL to clone dependent repos" ${GIT_URL})
if(NOT GIT_URL)
set(GIT_URL "https://github.com")
endif()
# Note(zhouwei): It use option above, so put here # Note(zhouwei): It use option above, so put here
include(init) include(init)
include(generic) # simplify cmake module include(generic) # simplify cmake module
...@@ -229,13 +240,6 @@ else() ...@@ -229,13 +240,6 @@ else()
) )
endif() endif()
find_package(Git REQUIRED)
# config GIT_URL with github mirrors to speed up dependent repos clone
option(GIT_URL "Git URL to clone dependent repos" ${GIT_URL})
if(NOT GIT_URL)
set(GIT_URL "https://github.com")
endif()
find_package(Threads REQUIRED) find_package(Threads REQUIRED)
...@@ -569,6 +573,37 @@ include(third_party ...@@ -569,6 +573,37 @@ include(third_party
include(flags) # set paddle compile flags include(flags) # set paddle compile flags
#------------- cinn cmake config start --------------
set(WITH_MKL_CBLAS ${WITH_MKL})
set(WITH_CUDA ${WITH_GPU})
set(WITH_CUDNN ${WITH_GPU})
if(WITH_CINN)
message(STATUS "Compile Paddle with CINN.")
include(cmake/cinn.cmake)
add_definitions(-DPADDLE_WITH_CINN)
# TODO(6clc): Use CINN_WITH_CUDNN to completely replace WITH_CUDNN in CINN.
# Use WITH_GPU to completely replace WITH_CUDA in CINN.
if(WITH_GPU)
set(WITH_CUDA ${WITH_GPU})
add_definitions(-DCINN_WITH_CUDA)
set(WITH_CUDNN ${CINN_WITH_CUDNN})
if(WITH_CUDNN)
add_definitions(-DCINN_WITH_CUDNN)
endif()
endif()
if(CINN_ONLY)
if(WITH_PYTHON)
add_subdirectory(python)
endif()
add_subdirectory(test)
return()
endif()
endif()
#------------- cinn cmake config end --------------
if(WITH_PROFILER) if(WITH_PROFILER)
find_package(Gperftools REQUIRED) find_package(Gperftools REQUIRED)
include_directories(${GPERFTOOLS_INCLUDE_DIR}) include_directories(${GPERFTOOLS_INCLUDE_DIR})
......
set(CINN_THIRD_PARTY_PATH "${CMAKE_BINARY_DIR}/third_party")
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
set(DOWNLOAD_MODEL_DIR "${CINN_THIRD_PARTY_PATH}/model")
string(REGEX MATCH "-std=(c\\+\\+[^ ]+)" STD_FLAG "${CMAKE_CXX_FLAGS}")
if (NOT STD_FLAG)
if (NOT CMAKE_CXX_STANDARD)
message(STATUS "STD_FLAG and CMAKE_CXX_STANDARD not found, using default flag: -std=c++17")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17")
set(CMAKE_CXX_STANDARD 17)
else()
message(STATUS "Got CMAKE_CXX_STANDARD=${CMAKE_CXX_STANDARD}, append -std=c++${CMAKE_CXX_STANDARD} to CMAKE_CXX_FLAGS")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++${CMAKE_CXX_STANDARD}")
endif()
else()
string(REGEX MATCH "([0-9]+)" STD_VALUE "${STD_FLAG}")
message(STATUS "Got STD_FLAG=${STD_FLAG}, set CMAKE_CXX_STANDARD=${STD_VALUE}")
set(CMAKE_CXX_STANDARD ${STD_VALUE})
endif()
if(NOT DEFINED ENV{runtime_include_dir})
message(
STATUS
"set runtime_include_dir: ${CMAKE_SOURCE_DIR}/paddle/cinn/runtime/cuda")
set(ENV{runtime_include_dir} "${CMAKE_SOURCE_DIR}/paddle/cinn/runtime/cuda")
add_definitions(
-DRUNTIME_INCLUDE_DIR="${CMAKE_SOURCE_DIR}/paddle/cinn/runtime/cuda")
endif()
if(WITH_TESTING)
add_definitions(-DCINN_WITH_TEST)
endif()
if(WITH_DEBUG)
add_definitions(-DCINN_WITH_DEBUG)
endif()
# TODO(zhhsplendid): CINN has lots of warnings during early development.
# They will be treated as errors under paddle. We set no-error now and we will
# clean the code in the future.
add_definitions(-w)
include(cmake/cinn/version.cmake)
# include the customized configures
if(NOT EXISTS ${CMAKE_BINARY_DIR}/cmake/cinn/config.cmake)
file(COPY ${PROJECT_SOURCE_DIR}/cmake/cinn/config.cmake DESTINATION ${CMAKE_BINARY_DIR}/cmake/cinn)
endif()
include(${CMAKE_BINARY_DIR}/cmake/cinn/config.cmake)
if(WITH_MKL)
generate_dummy_static_lib(LIB_NAME "cinn_mklml" GENERATOR "mklml.cmake")
target_link_libraries(cinn_mklml ${MKLML_LIB} ${MKLML_IOMP_LIB})
add_definitions(-DCINN_WITH_MKL_CBLAS)
endif()
if(WITH_MKLDNN)
add_definitions(-DCINN_WITH_MKLDNN)
endif()
if(WITH_GPU)
message(STATUS "Enable CINN CUDA")
add_definitions(-DCINN_WITH_CUDA)
message(STATUS "Enable CINN CUDNN")
add_definitions(-DCINN_WITH_CUDNN)
enable_language(CUDA)
find_package(CUDA REQUIRED)
include_directories(${CUDA_INCLUDE_DIRS})
include_directories(${CMAKE_SOURCE_DIR}/paddle/cinn/runtime/cuda)
include_directories(/usr/lib/x86_64-linux-gnu)
set(CUDA_SEPARABLE_COMPILATION ON)
cuda_select_nvcc_arch_flags(ARCH_FLAGS Auto)
list(APPEND CUDA_NVCC_FLAGS ${ARCH_FLAGS})
set(CMAKE_CUDA_STANDARD ${CMAKE_CXX_STANDARD})
message(
STATUS
"copy paddle/cinn/common/float16.h paddle/cinn/common/bfloat16.h to $ENV{runtime_include_dir}"
)
file(COPY paddle/cinn/common/float16.h paddle/cinn/common/bfloat16.h
DESTINATION $ENV{runtime_include_dir})
find_library(CUDASTUB libcuda.so HINTS ${CUDA_TOOLKIT_ROOT_DIR}/lib64/stubs/
REQUIRED)
find_library(CUBLAS libcublas.so HINTS ${CUDA_TOOLKIT_ROOT_DIR}/lib64 /usr/lib /usr/lib64 REQUIRED)
find_library(CUDNN libcudnn.so HINTS ${CUDA_TOOLKIT_ROOT_DIR}/lib64 /usr/lib /usr/lib64 REQUIRED)
find_library(CURAND libcurand.so HINTS ${CUDA_TOOLKIT_ROOT_DIR}/lib64 /usr/lib /usr/lib64 REQUIRED)
find_library(CUSOLVER libcusolver.so HINTS ${CUDA_TOOLKIT_ROOT_DIR}/lib64 /usr/lib /usr/lib64 REQUIRED)
endif()
set(cinnapi_src CACHE INTERNAL "" FORCE)
set(core_src CACHE INTERNAL "" FORCE)
set(core_includes CACHE INTERNAL "" FORCE)
set(core_proto_includes CACHE INTERNAL "" FORCE)
include_directories(${CMAKE_SOURCE_DIR})
include_directories(${CMAKE_BINARY_DIR})
include(cmake/generic.cmake)
include(cmake/cinn/system.cmake)
include(cmake/cinn/core.cmake)
include(cmake/cinn/external/absl.cmake)
include(cmake/cinn/nvrtc.cmake)
include(cmake/cinn/nvtx.cmake)
include(cmake/cinn/external/llvm.cmake)
include(cmake/cinn/external/isl.cmake)
include(cmake/cinn/external/ginac.cmake)
include(cmake/cinn/external/openmp.cmake)
include(cmake/cinn/external/jitify.cmake)
if(CINN_ONLY)
LINK_LIBRARIES(gflags)
endif()
set(LINK_FLAGS
"-Wl,--version-script ${CMAKE_CURRENT_SOURCE_DIR}/cmake/cinn/export.map"
CACHE INTERNAL "")
set(global_test_args
"--cinn_x86_builtin_code_root=${CMAKE_SOURCE_DIR}/paddle/cinn/backends")
set(Python_VIRTUALENV FIRST)
if(NOT PYTHON_EXECUTABLE)
find_package(PythonInterp ${PY_VERSION} REQUIRED)
endif()
if(NOT PYTHON_LIBRARIES)
find_package(PythonLibs ${PY_VERSION} REQUIRED)
endif()
message(STATUS "PYTHON_LIBRARIES: ${PYTHON_LIBRARIES}")
message(STATUS "PYTHON_INCLUDE_DIR: ${PYTHON_INCLUDE_DIR}")
include_directories(${PYTHON_INCLUDE_DIR})
set(core_deps CACHE INTERNAL "" FORCE)
set(hlir_src CACHE INTERNAL "" FORCE)
# TODO(chenweihang): The logic later depends adding cinn subdirectory here,
# but better to move to paddle/CMakeLists.txt
add_subdirectory(paddle/cinn)
set(core_src "${cinnapi_src}")
cinn_cc_library(
cinnapi
SHARED
SRCS
${cinnapi_src}
DEPS
glog
${llvm_libs}
cinn_framework_proto
param_proto
auto_schedule_proto
schedule_desc_proto
absl
isl
ginac
pybind
${jitify_deps})
add_dependencies(cinnapi GEN_LLVM_RUNTIME_IR_HEADER ZLIB::ZLIB)
add_dependencies(cinnapi GEN_LLVM_RUNTIME_IR_HEADER ${core_deps})
target_link_libraries(cinnapi ${PYTHON_LIBRARIES})
if(WITH_MKL)
target_link_libraries(cinnapi cinn_mklml)
add_dependencies(cinnapi cinn_mklml)
if(WITH_MKLDNN)
target_link_libraries(cinnapi mkldnn)
add_dependencies(cinnapi mkldnn)
endif()
endif()
if(WITH_GPU)
target_link_libraries(
cinnapi
${CUDA_NVRTC_LIB}
${CUDA_LIBRARIES}
${CUDASTUB}
${CUBLAS}
${CUDNN}
${CURAND}
${CUSOLVER})
if(NVTX_FOUND)
target_link_libraries(cinnapi ${CUDA_NVTX_LIB})
endif()
endif()
function(gen_cinncore LINKTYPE)
set(CINNCORE_TARGET cinncore)
if(${LINKTYPE} STREQUAL "STATIC")
set(CINNCORE_TARGET cinncore_static)
endif()
cinn_cc_library(
${CINNCORE_TARGET}
${LINKTYPE}
SRCS
${core_src}
DEPS
glog
${llvm_libs}
cinn_framework_proto
param_proto
auto_schedule_proto
schedule_desc_proto
absl
isl
ginac)
add_dependencies(${CINNCORE_TARGET} GEN_LLVM_RUNTIME_IR_HEADER ZLIB::ZLIB)
add_dependencies(${CINNCORE_TARGET} GEN_LLVM_RUNTIME_IR_HEADER ${core_deps})
add_dependencies(${CINNCORE_TARGET} pybind)
target_link_libraries(${CINNCORE_TARGET} ${PYTHON_LIBRARIES})
if(WITH_MKL)
target_link_libraries(${CINNCORE_TARGET} cinn_mklml)
add_dependencies(${CINNCORE_TARGET} cinn_mklml)
if(WITH_MKLDNN)
target_link_libraries(${CINNCORE_TARGET} mkldnn)
add_dependencies(${CINNCORE_TARGET} mkldnn)
endif()
endif()
if(WITH_GPU)
target_link_libraries(
${CINNCORE_TARGET}
${CUDA_NVRTC_LIB}
${CUDA_LIBRARIES}
${CUDASTUB}
${CUBLAS}
${CUDNN}
${CURAND}
${CUSOLVER}
${jitify_deps})
if(NVTX_FOUND)
target_link_libraries(${CINNCORE_TARGET} ${CUDA_NVTX_LIB})
endif()
endif()
endfunction()
gen_cinncore(STATIC)
gen_cinncore(SHARED)
# --------distribute cinncore lib and include begin--------
set(PUBLISH_LIBS ON)
if(PUBLISH_LIBS)
set(core_includes
"${core_includes};paddle/cinn/runtime/cuda/cinn_cuda_runtime_source.cuh")
foreach(header ${core_includes})
get_filename_component(prefix ${header} DIRECTORY)
file(COPY ${header}
DESTINATION ${CMAKE_BINARY_DIR}/dist/cinn/include/${prefix})
endforeach()
foreach(proto_header ${core_proto_includes})
string(REPLACE ${CMAKE_BINARY_DIR}/ "" relname ${proto_header})
get_filename_component(prefix ${relname} DIRECTORY)
set(target_name ${CMAKE_BINARY_DIR}/dist/cinn/include/${relname})
add_custom_command(
TARGET cinnapi
POST_BUILD
COMMENT "copy generated proto header '${relname}' to dist"
COMMAND cmake -E copy ${proto_header} ${target_name} DEPENDS cinnapi)
endforeach()
add_custom_command(
TARGET cinnapi
POST_BUILD
COMMAND cmake -E copy ${CMAKE_BINARY_DIR}/libcinnapi.so
${CMAKE_BINARY_DIR}/dist/cinn/lib/libcinnapi.so
COMMAND cmake -E copy_directory ${CINN_THIRD_PARTY_PATH}/install
${CMAKE_BINARY_DIR}/dist/third_party DEPENDS cinnapi)
add_custom_command(
TARGET cinncore_static
POST_BUILD
COMMAND cmake -E copy ${PROJECT_SOURCE_DIR}/tools/cinn/tutorials_demo/demo.cc
${CMAKE_BINARY_DIR}/dist/demo.cc
COMMAND cmake -E copy ${PROJECT_SOURCE_DIR}/tools/cinn/tutorials_demo/build_demo.sh
${CMAKE_BINARY_DIR}/dist/build_demo.sh
COMMAND cmake -E copy ${CMAKE_BINARY_DIR}/libcinncore_static.a
${CMAKE_BINARY_DIR}/dist/cinn/lib/libcinncore_static.a
COMMAND
cmake -E copy
${CMAKE_BINARY_DIR}/paddle/cinn/frontend/paddle/libcinn_framework_proto.a
${CMAKE_BINARY_DIR}/dist/cinn/lib/libcinn_framework_proto.a
COMMAND
cmake -E copy ${CMAKE_BINARY_DIR}/paddle/cinn/hlir/pe/libparam_proto.a
${CMAKE_BINARY_DIR}/dist/cinn/lib/libparam_proto.a
COMMAND
cmake -E copy
${CMAKE_BINARY_DIR}/paddle/cinn/auto_schedule/libauto_schedule_proto.a
${CMAKE_BINARY_DIR}/dist/cinn/lib/libauto_schedule_proto.a
COMMAND
cmake -E copy ${CMAKE_BINARY_DIR}/paddle/cinn/ir/libschedule_desc_proto.a
${CMAKE_BINARY_DIR}/dist/cinn/lib/libschedule_desc_proto.a
COMMENT "distribute libcinncore_static.a and related header files." DEPENDS
cinncore_static)
endif()
# --------distribute cinncore lib and include end--------
set(CINN_LIB_NAME "libcinnapi.so")
set(CINN_LIB_LOCATION "${CMAKE_BINARY_DIR}/dist/cinn/lib")
set(CINN_LIB "${CINN_LIB_LOCATION}/${CINN_LIB_NAME}")
######################################
# Add CINN's dependencies header files
######################################
# Add absl
set(ABSL_INCLUDE_DIR "${CMAKE_BINARY_DIR}/dist/third_party/absl/include")
include_directories(${ABSL_INCLUDE_DIR})
# Add isl
set(ISL_INCLUDE_DIR "${CMAKE_BINARY_DIR}/dist/third_party/isl/include")
include_directories(${ISL_INCLUDE_DIR})
# Add LLVM
set(LLVM_INCLUDE_DIR "${CMAKE_BINARY_DIR}/dist/third_party/llvm/include")
include_directories(${LLVM_INCLUDE_DIR})
######################################################
# Put external_cinn and dependencies together as a lib
######################################################
set(CINN_INCLUDE_DIR "${CMAKE_BINARY_DIR}/dist/cinn/include")
include_directories(${CINN_INCLUDE_DIR})
# The home path of ISL
# Required!
set(ISL_HOME "")
set(USE_OPENMP "intel")
set(CMAKE_CXX_FLAGS
"${CMAKE_CXX_FLAGS} -fPIC -mavx -mfma -Wno-write-strings -Wno-psabi")
set(PADDLE_RESOURCE_URL
"http://paddle-inference-dist.bj.bcebos.com"
CACHE STRING "inference download url")
function(cinn_cc_library TARGET_NAME)
set(options STATIC static SHARED shared)
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS)
cmake_parse_arguments(cinn_cc_library "${options}" "${oneValueArgs}"
"${multiValueArgs}" ${ARGN})
if(cinn_cc_library_SRCS)
if(cinn_cc_library_SHARED OR cinn_cc_library_shared) # build *.so
add_library(${TARGET_NAME} SHARED ${cinn_cc_library_SRCS})
else()
add_library(${TARGET_NAME} STATIC ${cinn_cc_library_SRCS})
endif()
if(cinn_cc_library_DEPS)
# Don't need link libwarpctc.so
target_link_libraries(${TARGET_NAME} ${cinn_cc_library_DEPS})
add_dependencies(${TARGET_NAME} ${cinn_cc_library_DEPS})
endif()
# cpplint code style
foreach(source_file ${cinn_cc_library_SRCS})
string(REGEX REPLACE "\\.[^.]*$" "" source ${source_file})
if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${source}.h)
list(APPEND cinn_cc_library_HEADERS
${CMAKE_CURRENT_SOURCE_DIR}/${source}.h)
endif()
endforeach()
else(cinn_cc_library_SRCS)
if(cinn_cc_library_DEPS)
cinn_merge_static_libs(${TARGET_NAME} ${cinn_cc_library_DEPS})
else()
message(
FATAL_ERROR
"Please specify source files or libraries in cinn_cc_library(${TARGET_NAME} ...)."
)
endif()
endif(cinn_cc_library_SRCS)
if((NOT ("${TARGET_NAME}" STREQUAL "cinn_gtest_main"))
AND (NOT ("${TARGET_NAME}" STREQUAL "utils"))
AND (NOT ("${TARGET_NAME}" STREQUAL "lib")))
target_link_libraries(${TARGET_NAME} Threads::Threads)
endif(
(NOT ("${TARGET_NAME}" STREQUAL "cinn_gtest_main"))
AND (NOT ("${TARGET_NAME}" STREQUAL "utils"))
AND (NOT ("${TARGET_NAME}" STREQUAL "lib")))
endfunction(cinn_cc_library)
list(APPEND CMAKE_CTEST_ARGUMENTS)
function(remove_gflags TARGET_NAME)
get_target_property(TARGET_LIBRARIES ${TARGET_NAME} LINK_LIBRARIES)
list(REMOVE_ITEM TARGET_LIBRARIES glog)
list(REMOVE_ITEM TARGET_LIBRARIES gflags)
set_property(TARGET ${TARGET_NAME} PROPERTY LINK_LIBRARIES
${TARGET_LIBRARIES})
endfunction()
function(cinn_cc_test TARGET_NAME)
if(WITH_TESTING AND CINN_ONLY)
set(options SERIAL)
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS ARGS)
cmake_parse_arguments(cinn_cc_test "${options}" "${oneValueArgs}"
"${multiValueArgs}" ${ARGN})
add_executable(${TARGET_NAME} ${cinn_cc_test_SRCS})
get_property(os_dependency_modules GLOBAL PROPERTY OS_DEPENDENCY_MODULES)
target_link_libraries(${TARGET_NAME} ${os_dependency_modules}
cinn_gtest_main gtest glog ${cinn_cc_test_DEPS})
add_dependencies(${TARGET_NAME} cinn_gtest_main gtest glog
${cinn_cc_test_DEPS})
add_test(
NAME ${TARGET_NAME}
COMMAND ${TARGET_NAME} "${cinn_cc_test_ARGS}"
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
if(${cinn_cc_test_SERIAL})
set_property(TEST ${TARGET_NAME} PROPERTY RUN_SERIAL 1)
endif()
# No unit test should exceed 10 minutes.
set_tests_properties(${TARGET_NAME} PROPERTIES TIMEOUT 6000)
remove_gflags(${TARGET_NAME})
endif()
endfunction()
function(cinn_nv_library TARGET_NAME)
if(WITH_GPU)
set(options STATIC static SHARED shared)
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS)
cmake_parse_arguments(cinn_nv_library "${options}" "${oneValueArgs}"
"${multiValueArgs}" ${ARGN})
if(cinn_nv_library_SRCS)
if(cinn_nv_library_SHARED OR cinn_nv_library_shared) # build *.so
cuda_add_library(${TARGET_NAME} SHARED ${cinn_nv_library_SRCS})
else()
cuda_add_library(${TARGET_NAME} STATIC ${cinn_nv_library_SRCS})
endif()
if(cinn_nv_library_DEPS)
add_dependencies(${TARGET_NAME} ${cinn_nv_library_DEPS})
target_link_libraries(${TARGET_NAME} ${cinn_nv_library_DEPS})
endif()
# cpplint code style
foreach(source_file ${cinn_nv_library_SRCS})
string(REGEX REPLACE "\\.[^.]*$" "" source ${source_file})
if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${source}.h)
list(APPEND cinn_nv_library_HEADERS
${CMAKE_CURRENT_SOURCE_DIR}/${source}.h)
endif()
endforeach()
else(cinn_nv_library_SRCS)
if(cinn_nv_library_DEPS)
cinn_merge_static_libs(${TARGET_NAME} ${cinn_nv_library_DEPS})
else()
message(FATAL
"Please specify source file or library in cinn_nv_library.")
endif()
endif(cinn_nv_library_SRCS)
target_link_libraries(${TARGET_NAME} Threads::Threads)
endif()
endfunction(cinn_nv_library)
function(cinn_nv_binary TARGET_NAME)
if(WITH_GPU)
set(options "")
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS)
cmake_parse_arguments(cinn_nv_binary "${options}" "${oneValueArgs}"
"${multiValueArgs}" ${ARGN})
cuda_add_executable(${TARGET_NAME} ${cinn_nv_binary_SRCS})
if(cinn_nv_binary_DEPS)
target_link_libraries(${TARGET_NAME} ${cinn_nv_binary_DEPS})
add_dependencies(${TARGET_NAME} ${cinn_nv_binary_DEPS})
common_link(${TARGET_NAME})
endif()
endif()
endfunction(cinn_nv_binary)
function(cinn_nv_test TARGET_NAME)
if(WITH_GPU AND WITH_TESTING AND CINN_ONLY)
set(options SERIAL)
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS ARGS)
cmake_parse_arguments(cinn_nv_test "${options}" "${oneValueArgs}"
"${multiValueArgs}" ${ARGN})
cuda_add_executable(${TARGET_NAME} ${cinn_nv_test_SRCS} OPTIONS "-std=c++${CMAKE_CUDA_STANDARD}")
get_property(os_dependency_modules GLOBAL PROPERTY OS_DEPENDENCY_MODULES)
target_link_libraries(
${TARGET_NAME}
${cinn_nv_test_DEPS}
cinn_gtest_main
gtest
${os_dependency_modules}
${CUDNN_LIBRARY}
${CUBLAS_LIBRARIES}
${CUDA_LIBRARIES})
add_dependencies(${TARGET_NAME} ${cinn_nv_test_DEPS} cinn_gtest_main gtest)
common_link(${TARGET_NAME})
# add_test(${TARGET_NAME} ${TARGET_NAME})
add_test(
NAME ${TARGET_NAME}
COMMAND ${TARGET_NAME} "${cinn_nv_test_ARGS}"
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
if(cinn_nv_test_SERIAL)
set_property(TEST ${TARGET_NAME} PROPERTY RUN_SERIAL 1)
endif()
target_link_libraries(
${TARGET_NAME} Threads::Threads ${CUDA_NVRTC_LIB} ${CUDA_LIBRARIES}
${CUDA_cudart_static_LIBRARY}
${CUDA_TOOLKIT_ROOT_DIR}/lib64/stubs/libcuda.so)
if(NVTX_FOUND)
target_link_libraries(${TARGET_NAME} ${CUDA_NVTX_LIB})
endif()
remove_gflags(${TARGET_NAME})
endif()
endfunction(cinn_nv_test)
# Add dependency that TARGET will depend on test result of DEP, this function executes the DEP during make.
function(add_run_test_dependency TARGET_NAME DEP_NAME)
if(WITH_TESTING AND CINN_ONLY)
set(custom_target_name ${TARGET_NAME}_TEST_OUTPUT_DEPENDENCY_ON_${DEP_NAME})
add_custom_target(
${custom_target_name}
COMMAND
cd ${CMAKE_CURRENT_BINARY_DIR} && ./${DEP_NAME}
--cinn_x86_builtin_code_root=${CMAKE_SOURCE_DIR}/paddle/cinn/backends
COMMAND cd ${CMAKE_BINARY_DIR}
DEPENDS ${DEP_NAME})
add_dependencies(${TARGET_NAME} ${DEP_NAME} ${custom_target_name})
endif(WITH_TESTING AND CINN_ONLY)
endfunction(add_run_test_dependency)
# find all third_party modules is used for paddle static library
# for reduce the dependency when building the inference libs.
set_property(GLOBAL PROPERTY FLUID_THIRD_PARTY)
function(find_fluid_thirdparties TARGET_NAME)
get_filename_component(__target_path ${TARGET_NAME} ABSOLUTE)
string(REGEX REPLACE "^${PADDLE_SOURCE_DIR}/" "" __target_path
${__target_path})
string(FIND "${__target_path}" "third_party" pos)
if(pos GREATER 1)
get_property(fluid_ GLOBAL PROPERTY FLUID_THIRD_PARTY)
set(fluid_third_partys ${fluid_third_partys} ${TARGET_NAME})
set_property(GLOBAL PROPERTY FLUID_THIRD_PARTY "${fluid_third_partys}")
endif()
endfunction(find_fluid_thirdparties)
function(cinn_merge_static_libs TARGET_NAME)
set(libs ${ARGN})
list(REMOVE_DUPLICATES libs)
# Get all propagation dependencies from the merged libraries
foreach(lib ${libs})
list(APPEND libs_deps ${${lib}_LIB_DEPENDS})
endforeach()
if(libs_deps)
list(REMOVE_DUPLICATES libs_deps)
endif()
# To produce a library we need at least one source file.
# It is created by add_custom_command below and will helps
# also help to track dependencies.
set(target_SRCS ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}_dummy.c)
if(APPLE) # Use OSX's libtool to merge archives
# Make the generated dummy source file depended on all static input
# libs. If input lib changes,the source file is touched
# which causes the desired effect (relink).
add_custom_command(
OUTPUT ${target_SRCS}
COMMAND ${CMAKE_COMMAND} -E touch ${target_SRCS}
DEPENDS ${libs})
# Generate dummy staic lib
file(WRITE ${target_SRCS}
"const char *dummy_${TARGET_NAME} = \"${target_SRCS}\";")
add_library(${TARGET_NAME} STATIC ${target_SRCS})
target_link_libraries(${TARGET_NAME} ${libs_deps})
foreach(lib ${libs})
# Get the file names of the libraries to be merged
set(libfiles ${libfiles} $<TARGET_FILE:${lib}>)
endforeach()
add_custom_command(
TARGET ${TARGET_NAME}
POST_BUILD
COMMAND rm "${CMAKE_CURRENT_BINARY_DIR}/lib${TARGET_NAME}.a"
COMMAND /usr/bin/libtool -static -o
"${CMAKE_CURRENT_BINARY_DIR}/lib${TARGET_NAME}.a" ${libfiles})
endif(APPLE)
if(LINUX
)# general UNIX: use "ar" to extract objects and re-add to a common lib
set(target_DIR ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}.dir)
foreach(lib ${libs})
set(objlistfile ${target_DIR}/${lib}.objlist
)# list of objects in the input library
set(objdir ${target_DIR}/${lib}.objdir)
add_custom_command(
OUTPUT ${objdir}
COMMAND ${CMAKE_COMMAND} -E make_directory ${objdir}
DEPENDS ${lib})
add_custom_command(
OUTPUT ${objlistfile}
COMMAND ${CMAKE_AR} -x "$<TARGET_FILE:${lib}>"
COMMAND ${CMAKE_AR} -t "$<TARGET_FILE:${lib}>" > ${objlistfile}
DEPENDS ${lib} ${objdir}
WORKING_DIRECTORY ${objdir})
list(APPEND target_OBJS "${objlistfile}")
endforeach()
# Make the generated dummy source file depended on all static input
# libs. If input lib changes,the source file is touched
# which causes the desired effect (relink).
add_custom_command(
OUTPUT ${target_SRCS}
COMMAND ${CMAKE_COMMAND} -E touch ${target_SRCS}
DEPENDS ${libs} ${target_OBJS})
# Generate dummy static lib
file(WRITE ${target_SRCS}
"const char *dummy_${TARGET_NAME} = \"${target_SRCS}\";")
add_library(${TARGET_NAME} STATIC ${target_SRCS})
target_link_libraries(${TARGET_NAME} ${libs_deps})
# Get the file name of the generated library
set(target_LIBNAME "$<TARGET_FILE:${TARGET_NAME}>")
add_custom_command(
TARGET ${TARGET_NAME}
POST_BUILD
COMMAND ${CMAKE_AR} crs ${target_LIBNAME} `find ${target_DIR} -name '*.o'`
COMMAND ${CMAKE_RANLIB} ${target_LIBNAME}
WORKING_DIRECTORY ${target_DIR})
endif(LINUX)
if(WIN32)
# windows do not support gcc/nvcc combined compiling. Use msvc lib.exe to merge libs.
# Make the generated dummy source file depended on all static input
# libs. If input lib changes,the source file is touched
# which causes the desired effect (relink).
add_custom_command(
OUTPUT ${target_SRCS}
COMMAND ${CMAKE_COMMAND} -E touch ${target_SRCS}
DEPENDS ${libs})
# Generate dummy static lib
file(WRITE ${target_SRCS}
"const char *dummy_${TARGET_NAME} = \"${target_SRCS}\";")
add_library(${TARGET_NAME} STATIC ${target_SRCS})
target_link_libraries(${TARGET_NAME} ${libs_deps})
foreach(lib ${libs})
# Get the file names of the libraries to be merged
set(libfiles ${libfiles} $<TARGET_FILE:${lib}>)
endforeach()
# msvc will put library in directory of "/Release/xxxlib" by default
# COMMAND cmake -E remove "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}/${TARGET_NAME}.lib"
add_custom_command(
TARGET ${TARGET_NAME}
POST_BUILD
COMMAND cmake -E make_directory
"${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}"
COMMAND
lib
/OUT:${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}/lib${TARGET_NAME}.lib
${libfiles})
endif(WIN32)
endfunction(cinn_merge_static_libs)
# Modification of standard 'protobuf_generate_cpp()' with protobuf-lite support
# Usage:
# paddle_protobuf_generate_cpp(<proto_srcs> <proto_hdrs> <proto_files>)
function(paddle_protobuf_generate_cpp SRCS HDRS)
if(NOT ARGN)
message(
SEND_ERROR
"Error: paddle_protobuf_generate_cpp() called without any proto files")
return()
endif()
set(${SRCS})
set(${HDRS})
foreach(FIL ${ARGN})
get_filename_component(ABS_FIL ${FIL} ABSOLUTE)
get_filename_component(FIL_WE ${FIL} NAME_WE)
set(_protobuf_protoc_src "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.pb.cc")
set(_protobuf_protoc_hdr "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.pb.h")
list(APPEND ${SRCS} "${_protobuf_protoc_src}")
list(APPEND ${HDRS} "${_protobuf_protoc_hdr}")
add_custom_command(
OUTPUT "${_protobuf_protoc_src}" "${_protobuf_protoc_hdr}"
COMMAND ${CMAKE_COMMAND} -E make_directory "${CMAKE_CURRENT_BINARY_DIR}"
COMMAND ${PROTOBUF_PROTOC_EXECUTABLE} -I${CMAKE_SOURCE_DIR} --cpp_out
"${CMAKE_BINARY_DIR}" ${ABS_FIL}
DEPENDS ${ABS_FIL} protoc
COMMENT "Running C++ protocol buffer compiler on ${FIL}"
VERBATIM)
endforeach()
set_source_files_properties(${${SRCS}} ${${HDRS}} PROPERTIES GENERATED TRUE)
set(${SRCS}
${${SRCS}}
PARENT_SCOPE)
set(${HDRS}
${${HDRS}}
PARENT_SCOPE)
endfunction()
function(cinn_proto_library TARGET_NAME)
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS)
cmake_parse_arguments(cinn_proto_library "${options}" "${oneValueArgs}"
"${multiValueArgs}" ${ARGN})
set(proto_srcs)
set(proto_hdrs)
paddle_protobuf_generate_cpp(proto_srcs proto_hdrs ${cinn_proto_library_SRCS})
cinn_cc_library(${TARGET_NAME} SRCS ${proto_srcs} DEPS
${cinn_proto_library_DEPS} protobuf)
set("${TARGET_NAME}_HDRS"
${proto_hdrs}
PARENT_SCOPE)
set("${TARGET_NAME}_SRCS"
${proto_srcs}
PARENT_SCOPE)
endfunction()
function(common_link TARGET_NAME)
if(WITH_PROFILER)
target_link_libraries(${TARGET_NAME} gperftools::profiler)
endif()
if(WITH_JEMALLOC)
target_link_libraries(${TARGET_NAME} jemalloc::jemalloc)
endif()
endfunction()
# This method is borrowed from Paddle-Lite.
function(download_and_uncompress INSTALL_DIR URL FILENAME)
message(STATUS "Download inference test stuff from ${URL}/${FILENAME}")
string(REGEX REPLACE "[-%.]" "_" FILENAME_EX ${FILENAME})
set(EXTERNAL_PROJECT_NAME "extern_lite_download_${FILENAME_EX}")
set(UNPACK_DIR "${INSTALL_DIR}/src/${EXTERNAL_PROJECT_NAME}")
ExternalProject_Add(
${EXTERNAL_PROJECT_NAME}
${EXTERNAL_PROJECT_LOG_ARGS}
PREFIX ${INSTALL_DIR}
DOWNLOAD_COMMAND
wget --no-check-certificate -q -O ${INSTALL_DIR}/${FILENAME}
${URL}/${FILENAME} && ${CMAKE_COMMAND} -E tar xzf
${INSTALL_DIR}/${FILENAME}
DOWNLOAD_DIR ${INSTALL_DIR}
DOWNLOAD_NO_PROGRESS 1
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
UPDATE_COMMAND ""
INSTALL_COMMAND "")
endfunction()
function(gather_srcs SRC_GROUP)
set(options)
set(oneValueArgs)
set(multiValueArgs "SRCS")
cmake_parse_arguments(prefix "" "" "${multiValueArgs}" ${ARGN})
foreach(cpp ${prefix_SRCS})
set(${SRC_GROUP}
"${${SRC_GROUP}};${CMAKE_CURRENT_SOURCE_DIR}/${cpp}"
CACHE INTERNAL "")
endforeach()
endfunction()
function(core_gather_headers)
file(
GLOB includes
LIST_DIRECTORIES false
RELATIVE ${CMAKE_SOURCE_DIR}
*.h)
foreach(header ${includes})
set(core_includes
"${core_includes};${header}"
CACHE INTERNAL "")
endforeach()
endfunction()
{
global:
RegisterKernels;
local:
*;
};
include(ExternalProject)
set(ABSL_SOURCES_DIR ${CINN_THIRD_PARTY_PATH}/absl)
set(ABSL_INSTALL_DIR ${CINN_THIRD_PARTY_PATH}/install/absl)
set(ABSL_CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
set(ABSL_REPOSITORY "https://github.com/abseil/abseil-cpp.git")
set(ABSL_TAG "20210324.2")
set(OPTIONAL_ARGS
"-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}"
"-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}"
"-DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}"
"-DABSL_CXX_STANDARD=17"
"-DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE}"
"-DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG}"
"-DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}"
"-DCMAKE_C_FLAGS_DEBUG=${CMAKE_C_FLAGS_DEBUG}"
"-DCMAKE_C_FLAGS_RELEASE=${CMAKE_C_FLAGS_RELEASE}")
ExternalProject_Add(
external_absl
${EXTERNAL_PROJECT_LOG_ARGS}
DEPENDS gflags
GIT_REPOSITORY ${ABSL_REPOSITORY}
GIT_TAG ${ABSL_TAG}
PREFIX ${ABSL_SOURCES_DIR}
UPDATE_COMMAND ""
CMAKE_ARGS ${OPTIONAL_ARGS}
-DCMAKE_INSTALL_PREFIX=${ABSL_INSTALL_DIR}
-DCMAKE_INSTALL_LIBDIR=${ABSL_INSTALL_DIR}/lib
-DCMAKE_POSITION_INDEPENDENT_CODE=ON
-DWITH_GFLAGS=ON
-Dgflags_DIR=${GFLAGS_INSTALL_DIR}/lib/cmake/gflags
-DBUILD_TESTING=OFF
-DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
${EXTERNAL_OPTIONAL_ARGS}
CMAKE_CACHE_ARGS
-DCMAKE_INSTALL_PREFIX:PATH=${ABSL_INSTALL_DIR}
-DCMAKE_INSTALL_LIBDIR:PATH=${ABSL_INSTALL_DIR}/lib
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
BUILD_BYPRODUCTS ${ABSL_INSTALL_DIR}/lib/libabsl_base.a
BUILD_BYPRODUCTS ${ABSL_INSTALL_DIR}/lib/libabsl_hash.a
BUILD_BYPRODUCTS ${ABSL_INSTALL_DIR}/lib/libabsl_wyhash.a
BUILD_BYPRODUCTS ${ABSL_INSTALL_DIR}/lib/libabsl_city.a
BUILD_BYPRODUCTS ${ABSL_INSTALL_DIR}/lib/libabsl_strings.a
BUILD_BYPRODUCTS ${ABSL_INSTALL_DIR}/lib/libabsl_throw_delegate.a
BUILD_BYPRODUCTS ${ABSL_INSTALL_DIR}/lib/libabsl_bad_any_cast_impl.a
BUILD_BYPRODUCTS ${ABSL_INSTALL_DIR}/lib/libabsl_bad_optional_access.a
BUILD_BYPRODUCTS ${ABSL_INSTALL_DIR}/lib/libabsl_bad_variant_access.a
BUILD_BYPRODUCTS ${ABSL_INSTALL_DIR}/lib/libabsl_raw_hash_set.a)
# It may be more convinent if we just include all absl libs
set(ABSL_LIB_NAMES
hash
wyhash
city
strings
throw_delegate
bad_any_cast_impl
bad_optional_access
bad_variant_access
raw_hash_set)
set(ABSL_LIBS "")
add_library(absl STATIC IMPORTED GLOBAL)
set_property(TARGET absl PROPERTY IMPORTED_LOCATION
${ABSL_INSTALL_DIR}/lib/libabsl_base.a)
if(NOT USE_PREBUILD_EXTERNAL)
add_dependencies(absl external_absl)
endif()
foreach(lib_name ${ABSL_LIB_NAMES})
target_link_libraries(absl
INTERFACE ${ABSL_INSTALL_DIR}/lib/libabsl_${lib_name}.a)
endforeach()
include_directories(${ABSL_INSTALL_DIR}/include)
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
include(ExternalProject)
set(BOOST_PROJECT "extern_boost")
# To release PaddlePaddle as a pip package, we have to follow the
# manylinux1 standard, which features as old Linux kernels and
# compilers as possible and recommends CentOS 5. Indeed, the earliest
# CentOS version that works with NVIDIA CUDA is CentOS 6. And a new
# version of boost, say, 1.66.0, doesn't build on CentOS 6. We
# checked that the devtools package of CentOS 6 installs boost 1.41.0.
# So we use 1.41.0 here.
set(BOOST_VER "1.41.0")
set(BOOST_TAR
"boost_1_41_0"
CACHE STRING "" FORCE)
set(BOOST_URL
"http://paddlepaddledeps.bj.bcebos.com/${BOOST_TAR}.tar.gz"
CACHE STRING "" FORCE)
message(STATUS "BOOST_TAR: ${BOOST_TAR}, BOOST_URL: ${BOOST_URL}")
set(BOOST_SOURCES_DIR ${CINN_THIRD_PARTY_PATH}/boost)
set(BOOST_DOWNLOAD_DIR "${BOOST_SOURCES_DIR}/src/${BOOST_PROJECT}")
set(BOOST_INCLUDE_DIR
"${BOOST_DOWNLOAD_DIR}"
CACHE PATH "boost include directory." FORCE)
set_directory_properties(PROPERTIES CLEAN_NO_CUSTOM 1)
include_directories(${BOOST_INCLUDE_DIR})
ExternalProject_Add(
${BOOST_PROJECT}
${EXTERNAL_PROJECT_LOG_ARGS}
DOWNLOAD_DIR ${BOOST_DOWNLOAD_DIR}
URL ${BOOST_URL}
DOWNLOAD_NO_PROGRESS 1
PREFIX ${BOOST_SOURCES_DIR}
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
INSTALL_COMMAND ""
UPDATE_COMMAND "")
if(${CMAKE_VERSION} VERSION_LESS "3.3.0" OR NOT WIN32)
set(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/boost_dummy.c)
file(WRITE ${dummyfile} "const char *dummy = \"${dummyfile}\";")
add_library(boost STATIC ${dummyfile})
else()
add_library(boost INTERFACE)
endif()
add_dependencies(boost ${BOOST_PROJECT})
set(Boost_INCLUDE_DIR ${BOOST_INCLUDE_DIR})
include(ExternalProject)
# gmp-6.2.1 https://gmplib.org/download/gmp/gmp-6.2.1.tar.xz
# cln-1.3.6 https://www.ginac.de/CLN/cln-1.3.6.tar.bz2
# ginac-1.8.1 https://www.ginac.de/ginac-1.8.1.tar.bz2
# all build with CFLAGS="-fPIC -DPIC" CXXFLAGS="-fPIC -DPIC" --enable-static=yes
set(GINAC_DOWNLOAD_URL
https://paddle-inference-dist.bj.bcebos.com/CINN/ginac-1.8.1_cln-1.3.6_gmp-6.2.1.tar.gz
)
set(GINAC_MD5 ebc3e4b7770dd604777ac3f01bfc8b06)
ExternalProject_Add(
external_ginac
${EXTERNAL_PROJECT_LOG_ARGS}
URL ${GINAC_DOWNLOAD_URL}
URL_MD5 ${GINAC_MD5}
PREFIX ${CINN_THIRD_PARTY_PATH}/ginac
SOURCE_DIR ${CINN_THIRD_PARTY_PATH}/install/ginac
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
UPDATE_COMMAND ""
INSTALL_COMMAND ""
BUILD_BYPRODUCTS ${CINN_THIRD_PARTY_PATH}/install/ginac/lib/libginac.a
BUILD_BYPRODUCTS ${CINN_THIRD_PARTY_PATH}/install/ginac/lib/libcln.a
BUILD_BYPRODUCTS ${CINN_THIRD_PARTY_PATH}/install/ginac/lib/libgmp.a)
add_library(ginac STATIC IMPORTED GLOBAL)
add_dependencies(ginac external_ginac)
set_property(
TARGET ginac PROPERTY IMPORTED_LOCATION
${CINN_THIRD_PARTY_PATH}/install/ginac/lib/libginac.a)
target_link_libraries(
ginac INTERFACE ${CINN_THIRD_PARTY_PATH}/install/ginac/lib/libcln.a
${CINN_THIRD_PARTY_PATH}/install/ginac/lib/libgmp.a)
include_directories(${CINN_THIRD_PARTY_PATH}/install/ginac/include)
include(ExternalProject)
# isl https://github.com/inducer/ISL
# commit-id 6a1760fe46967cda2a06387793a6b7d4a0876581
# depends on llvm f9dc2b7079350d0fed3bb3775f496b90483c9e42
# depends on gmp-6.2.1
# static build
# CPPFLAGS="-fPIC -DPIC" ./configure --with-gmp-prefix=<gmp-install-path> --with-clang-prefix=<llvm-install-path> --enable-shared=no --enable-static=yes
set(ISL_DOWNLOAD_URL
https://paddle-inference-dist.bj.bcebos.com/CINN/isl-6a1760fe.tar.gz)
set(ISL_MD5 fff10083fb79d394b8a7b7b2089f6183)
ExternalProject_Add(
external_isl
${EXTERNAL_PROJECT_LOG_ARGS}
URL ${ISL_DOWNLOAD_URL}
URL_MD5 ${ISL_MD5}
PREFIX ${CINN_THIRD_PARTY_PATH}/isl
SOURCE_DIR ${CINN_THIRD_PARTY_PATH}/install/isl
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
UPDATE_COMMAND ""
INSTALL_COMMAND ""
BUILD_BYPRODUCTS ${CINN_THIRD_PARTY_PATH}/install/isl/lib/libisl.a)
add_library(isl STATIC IMPORTED GLOBAL)
set_property(
TARGET isl PROPERTY IMPORTED_LOCATION
${CINN_THIRD_PARTY_PATH}/install/isl/lib/libisl.a)
add_dependencies(isl external_isl)
include_directories(${CINN_THIRD_PARTY_PATH}/install/isl/include)
if(NOT WITH_GPU)
set(JITIFY_FOUND OFF)
return()
endif()
include(ExternalProject)
set(JITIFY_SOURCE_PATH ${CINN_THIRD_PARTY_PATH}/install/jitify)
ExternalProject_Add(
external_jitify
${EXTERNAL_PROJECT_LOG_ARGS}
GIT_REPOSITORY "https://github.com/NVIDIA/jitify.git"
GIT_TAG 57de649139c866eb83acacfe50c92ad7c6278776
PREFIX ${CINN_THIRD_PARTY_PATH}/jitify
SOURCE_DIR ${JITIFY_SOURCE_PATH}
CONFIGURE_COMMAND ""
PATCH_COMMAND ""
BUILD_COMMAND ""
UPDATE_COMMAND ""
INSTALL_COMMAND "")
include_directories(${JITIFY_SOURCE_PATH})
add_library(extern_jitify INTERFACE)
add_dependencies(extern_jitify external_jitify)
set(jitify_deps extern_jitify)
include(FetchContent)
# set(LLVM_DOWNLOAD_URL https://paddle-inference-dist.bj.bcebos.com/CINN/llvm11.tar.gz)
# set(LLVM_MD5 39d32b6be466781dddf5869318dcba53)
set(LLVM_DOWNLOAD_URL
https://paddle-inference-dist.bj.bcebos.com/CINN/llvm11-glibc2.17.tar.gz)
set(LLVM_MD5 33c7d3cc6d370585381e8d90bd7c2198)
set(FETCHCONTENT_BASE_DIR ${CINN_THIRD_PARTY_PATH}/llvm)
set(FETCHCONTENT_QUIET OFF)
FetchContent_Declare(
external_llvm
URL ${LLVM_DOWNLOAD_URL}
URL_MD5 ${LLVM_MD5}
PREFIX ${CINN_THIRD_PARTY_PATH}/llvm SOURCE_DIR
${CINN_THIRD_PARTY_PATH}/install/llvm)
if(NOT LLVM_PATH)
FetchContent_GetProperties(external_llvm)
if(NOT external_llvm_POPULATED)
FetchContent_Populate(external_llvm)
endif()
set(LLVM_PATH ${CINN_THIRD_PARTY_PATH}/install/llvm)
set(LLVM_DIR ${CINN_THIRD_PARTY_PATH}/install/llvm/lib/cmake/llvm)
set(MLIR_DIR ${CINN_THIRD_PARTY_PATH}/install/llvm/lib/cmake/mlir)
else()
set(LLVM_DIR ${LLVM_PATH}/lib/cmake/llvm)
set(MLIR_DIR ${LLVM_PATH}/lib/cmake/mlir)
endif()
if(${CMAKE_CXX_COMPILER} STREQUAL "clang++")
set(CMAKE_EXE_LINKER_FLAGS
"${CMAKE_EXE_LINKER_FLAGS} -stdlib=libc++ -lc++abi")
endif()
message(STATUS "set LLVM_DIR: ${LLVM_DIR}")
message(STATUS "set MLIR_DIR: ${MLIR_DIR}")
find_package(LLVM REQUIRED CONFIG HINTS ${LLVM_DIR})
find_package(MLIR REQUIRED CONFIG HINTS ${MLIR_DIR})
find_package(ZLIB REQUIRED)
list(APPEND CMAKE_MODULE_PATH "${LLVM_CMAKE_DIR}")
include(AddLLVM)
include_directories(${LLVM_INCLUDE_DIRS})
list(APPEND CMAKE_MODULE_PATH "${LLVM_CMAKE_DIR}")
list(APPEND CMAKE_MODULE_PATH "${MLIR_CMAKE_DIR}")
include(AddLLVM)
include(TableGen)
include(AddMLIR)
message(STATUS "Found MLIR: ${MLIR_DIR}")
message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}")
message(STATUS "Using LLVMConfig.cmake in: ${LLVM_DIR}")
# To build with MLIR, the LLVM is build from source code using the following flags:
#[==[
cmake -G Ninja ../llvm \
-DLLVM_ENABLE_PROJECTS="mlir;clang" \
-DLLVM_BUILD_EXAMPLES=OFF \
-DLLVM_TARGETS_TO_BUILD="X86" \
-DCMAKE_BUILD_TYPE=Release \
-DLLVM_ENABLE_ASSERTIONS=ON \
-DLLVM_ENABLE_ZLIB=OFF \
-DLLVM_ENABLE_RTTI=ON \
-DLLVM_ENABLE_TERMINFO=OFF \
-DCMAKE_INSTALL_PREFIX=./install
#]==]
# The matched llvm-project version is f9dc2b7079350d0fed3bb3775f496b90483c9e42 (currently a temporary commit)
# Update: to build llvm in manylinux docker with glibc-2.17, and use it in manylinux and ubuntu docker,
# the patch https://gist.github.com/zhiqiu/6e8d969176dce13d98fd15338a16265e is needed.
add_definitions(${LLVM_DEFINITIONS})
llvm_map_components_to_libnames(
llvm_libs
Support
Core
irreader
X86
executionengine
orcjit
mcjit
all
codegen)
message(STATUS "LLVM libs: ${llvm_libs}")
get_property(mlir_libs GLOBAL PROPERTY MLIR_ALL_LIBS)
message(STATUS "MLIR libs: ${mlir_libs}")
add_definitions(${LLVM_DEFINITIONS})
# The minimum needed libraries for MLIR IR parse and transform.
set(MLIR_IR_LIBS
MLIRAnalysis
MLIRStandardOps
MLIRPass
MLIRParser
MLIRDialect
MLIRIR
MLIROptLib)
# tb_base is the name of a xxx.td file (without the .td suffix)
function(mlir_tablegen_on td_base)
set(options)
set(oneValueArgs DIALECT)
cmake_parse_arguments(mlir_tablegen_on "${options}" "${oneValueArgs}"
"${multiValueArgs}" ${ARGN})
set(LLVM_TARGET_DEFINITIONS ${td_base}.td)
mlir_tablegen(${td_base}.hpp.inc -gen-op-decls)
mlir_tablegen(${td_base}.cpp.inc -gen-op-defs)
if(mlir_tablegen_on_DIALECT)
mlir_tablegen(${td_base}_dialect.hpp.inc --gen-dialect-decls
-dialect=${mlir_tablegen_on_DIALECT})
endif()
add_public_tablegen_target(${td_base}_IncGen)
add_custom_target(${td_base}_inc DEPENDS ${td_base}_IncGen)
endfunction()
function(mlir_add_rewriter td_base)
set(LLVM_TARGET_DEFINITIONS ${td_base}.td)
mlir_tablegen(${td_base}.hpp.inc -gen-rewriters
"-I${CMAKE_SOURCE_DIR}/infrt/dialect/pass")
add_public_tablegen_target(${td_base}_IncGen)
add_custom_target(${td_base}_inc DEPENDS ${td_base}_IncGen)
endfunction()
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
if(USE_OPENMP STREQUAL "gnu")
find_package(OpenMP)
if(OPENMP_FOUND)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
add_definitions(-DCINN_USE_OPENMP)
set(WITH_OPENMP ON)
message(STATUS "Build with OpenMP ${OpenMP_CXX_LIBRARIES}")
message(STATUS "CMAKE_CXX_FLAGS " ${CMAKE_CXX_FLAGS})
else()
set(WITH_OPENMP OFF)
endif()
elseif(USE_OPENMP STREQUAL "intel")
find_package(OpenMP)
if(OPENMP_FOUND)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
message(STATUS "CMAKE_CXX_FLAGS " ${CMAKE_CXX_FLAGS})
add_definitions(-DCINN_USE_OPENMP)
set(WITH_OPENMP ON)
message(STATUS "Build with OpenMP " ${MKLML_IOMP_LIB})
else()
set(WITH_OPENMP OFF)
endif()
endif()
if(${CMAKE_CXX_COMPILER} STREQUAL "clang++")
set(CMAKE_EXE_LINKER_FLAGS
"${CMAKE_EXE_LINKER_FLAGS} -stdlib=libc++ -lc++abi")
endif()
message(STATUS "set LLVM_DIR: ${LLVM_DIR}")
message(STATUS "set MLIR_DIR: ${MLIR_DIR}")
find_package(LLVM REQUIRED CONFIG HINTS ${LLVM_DIR})
find_package(MLIR REQUIRED CONFIG HINTS ${MLIR_DIR})
find_package(ZLIB REQUIRED)
list(APPEND CMAKE_MODULE_PATH "${LLVM_CMAKE_DIR}")
include(AddLLVM)
include_directories(${LLVM_INCLUDE_DIRS})
list(APPEND CMAKE_MODULE_PATH "${LLVM_CMAKE_DIR}")
list(APPEND CMAKE_MODULE_PATH "${MLIR_CMAKE_DIR}")
include(AddLLVM)
include(TableGen)
include(AddMLIR)
message(STATUS "Found MLIR: ${MLIR_DIR}")
message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}")
message(STATUS "Using LLVMConfig.cmake in: ${LLVM_DIR}")
# To build with MLIR, the LLVM is build from source code using the following flags:
#[==[
cmake -G Ninja ../llvm \
-DLLVM_ENABLE_PROJECTS=mlir \
-DLLVM_BUILD_EXAMPLES=OFF \
-DLLVM_TARGETS_TO_BUILD="X86" \
-DCMAKE_BUILD_TYPE=Release \
-DLLVM_ENABLE_ASSERTIONS=ON \
-DLLVM_ENABLE_ZLIB=OFF \
-DLLVM_ENABLE_RTTI=ON \
#]==]
# The matched llvm-project version is f9dc2b7079350d0fed3bb3775f496b90483c9e42 (currently a temporary commit)
add_definitions(${LLVM_DEFINITIONS})
llvm_map_components_to_libnames(
llvm_libs
Support
Core
irreader
X86
executionengine
orcjit
mcjit
all
codegen)
message(STATUS "LLVM libs: ${llvm_libs}")
get_property(mlir_libs GLOBAL PROPERTY MLIR_ALL_LIBS)
message(STATUS "MLIR libs: ${mlir_libs}")
add_definitions(${LLVM_DEFINITIONS})
# The minimum needed libraries for MLIR IR parse and transform.
set(MLIR_IR_LIBS
MLIRAnalysis
MLIRStandardOps
MLIRPass
MLIRParser
MLIRDialect
MLIRIR
MLIROptLib)
# tb_base is the name of a xxx.td file (without the .td suffix)
function(mlir_tablegen_on td_base)
set(options)
set(oneValueArgs DIALECT)
cmake_parse_arguments(mlir_tablegen_on "${options}" "${oneValueArgs}"
"${multiValueArgs}" ${ARGN})
set(LLVM_TARGET_DEFINITIONS ${td_base}.td)
mlir_tablegen(${td_base}.hpp.inc -gen-op-decls)
mlir_tablegen(${td_base}.cpp.inc -gen-op-defs)
if(mlir_tablegen_on_DIALECT)
mlir_tablegen(${td_base}_dialect.hpp.inc --gen-dialect-decls
-dialect=${mlir_tablegen_on_DIALECT})
endif()
add_public_tablegen_target(${td_base}_IncGen)
add_custom_target(${td_base}_inc DEPENDS ${td_base}_IncGen)
endfunction()
if(NOT WITH_GPU)
return()
endif()
find_package(PkgConfig)
find_library(
CUDA_NVRTC_LIB libnvrtc nvrtc
HINTS "${CUDA_TOOLKIT_ROOT_DIR}/lib64" "${LIBNVRTC_LIBRARY_DIR}"
"${CUDA_TOOLKIT_ROOT_DIR}/lib/x64" /usr/lib64 /usr/local/cuda/lib64)
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(LibNVRTC DEFAULT_MSG CUDA_NVRTC_LIB)
message(STATUS "found NVRTC: ${CUDA_NVRTC_LIB}")
mark_as_advanced(CUDA_NVRTC_LIB)
if(NOT LIBNVRTC_FOUND)
message(
FATAL_ERROR
"Cuda NVRTC Library not found: Specify the LIBNVRTC_LIBRARY_DIR where libnvrtc is located"
)
endif()
if((NOT WITH_GPU)
OR WIN32
OR APPLE)
set(NVTX_FOUND OFF)
return()
endif()
set(NVTX_ROOT
"/usr"
CACHE PATH "NVTX ROOT")
find_path(
NVTX_INCLUDE_DIR nvToolsExt.h
PATHS ${NVTX_ROOT} ${NVTX_ROOT}/include $ENV{NVTX_ROOT}
$ENV{NVTX_ROOT}/include ${CUDA_TOOLKIT_INCLUDE}
NO_DEFAULT_PATH)
get_filename_component(__libpath_hint ${CUDA_CUDART_LIBRARY} PATH)
set(TARGET_ARCH "x86_64")
if(NOT ${CMAKE_SYSTEM_PROCESSOR})
set(TARGET_ARCH ${CMAKE_SYSTEM_PROCESSOR})
endif()
list(
APPEND
NVTX_CHECK_LIBRARY_DIRS
${NVTX_ROOT}
${NVTX_ROOT}/lib64
${NVTX_ROOT}/lib
${NVTX_ROOT}/lib/${TARGET_ARCH}-linux-gnu
$ENV{NVTX_ROOT}
$ENV{NVTX_ROOT}/lib64
$ENV{NVTX_ROOT}/lib
${CUDA_TOOLKIT_ROOT_DIR}
${CUDA_TOOLKIT_ROOT_DIR}/targets/${TARGET_ARCH}-linux/lib)
find_library(
CUDA_NVTX_LIB
NAMES libnvToolsExt.so
PATHS ${NVTX_CHECK_LIBRARY_DIRS} ${NVTX_INCLUDE_DIR} ${__libpath_hint}
NO_DEFAULT_PATH
DOC "Path to the NVTX library.")
if(NVTX_INCLUDE_DIR AND CUDA_NVTX_LIB)
set(NVTX_FOUND ON)
else()
set(NVTX_FOUND OFF)
endif()
if(NVTX_FOUND)
include_directories(${NVTX_INCLUDE_DIR})
add_definitions(-DCINN_WITH_NVTX)
endif()
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Detects the OS and sets appropriate variables.
# CMAKE_SYSTEM_NAME only give us a coarse-grained name of the OS CMake is
# building for, but the host processor name like centos is necessary
# in some scenes to distinguish system for customization.
#
# for instance, protobuf libs path is <install_dir>/lib64
# on CentOS, but <install_dir>/lib on other systems.
if(UNIX AND NOT APPLE)
# except apple from nix*Os family
set(LINUX TRUE)
endif()
if(WIN32)
set(HOST_SYSTEM "win32")
else()
if(APPLE)
set(HOST_SYSTEM "macosx")
exec_program(
sw_vers ARGS
-productVersion
OUTPUT_VARIABLE HOST_SYSTEM_VERSION)
string(REGEX MATCH "[0-9]+.[0-9]+" MACOS_VERSION "${HOST_SYSTEM_VERSION}")
if(NOT DEFINED $ENV{MACOSX_DEPLOYMENT_TARGET})
# Set cache variable - end user may change this during ccmake or cmake-gui configure.
set(CMAKE_OSX_DEPLOYMENT_TARGET
${MACOS_VERSION}
CACHE
STRING
"Minimum OS X version to target for deployment (at runtime); newer APIs weak linked. Set to empty string for default value."
)
endif()
set(CMAKE_EXE_LINKER_FLAGS "-framework CoreFoundation -framework Security")
else()
if(EXISTS "/etc/issue")
file(READ "/etc/issue" LINUX_ISSUE)
if(LINUX_ISSUE MATCHES "CentOS")
set(HOST_SYSTEM "centos")
elseif(LINUX_ISSUE MATCHES "Debian")
set(HOST_SYSTEM "debian")
elseif(LINUX_ISSUE MATCHES "Ubuntu")
set(HOST_SYSTEM "ubuntu")
elseif(LINUX_ISSUE MATCHES "Red Hat")
set(HOST_SYSTEM "redhat")
elseif(LINUX_ISSUE MATCHES "Fedora")
set(HOST_SYSTEM "fedora")
endif()
string(REGEX MATCH "(([0-9]+)\\.)+([0-9]+)" HOST_SYSTEM_VERSION
"${LINUX_ISSUE}")
endif()
if(EXISTS "/etc/redhat-release")
file(READ "/etc/redhat-release" LINUX_ISSUE)
if(LINUX_ISSUE MATCHES "CentOS")
set(HOST_SYSTEM "centos")
endif()
endif()
if(NOT HOST_SYSTEM)
set(HOST_SYSTEM ${CMAKE_SYSTEM_NAME})
endif()
endif()
endif()
# query number of logical cores
cmake_host_system_information(RESULT CPU_CORES QUERY NUMBER_OF_LOGICAL_CORES)
mark_as_advanced(HOST_SYSTEM CPU_CORES)
message(
STATUS
"Found Paddle host system: ${HOST_SYSTEM}, version: ${HOST_SYSTEM_VERSION}")
message(STATUS "Found Paddle host system's CPU: ${CPU_CORES} cores")
# external dependencies log output
set(EXTERNAL_PROJECT_LOG_ARGS
LOG_DOWNLOAD
0 # Wrap download in script to log output
LOG_UPDATE
1 # Wrap update in script to log output
LOG_CONFIGURE
1 # Wrap configure in script to log output
LOG_BUILD
0 # Wrap build in script to log output
LOG_TEST
1 # Wrap test in script to log output
LOG_INSTALL
0 # Wrap install in script to log output
)
# Get the latest git tag.
set(CINN_VERSION $ENV{CINN_VERSION})
set(tmp_version "HEAD")
set(TAG_VERSION_REGEX "[0-9]+\\.[0-9]+\\.[0-9]+(\\.(a|b|rc)\\.[0-9]+)?")
set(COMMIT_VERSION_REGEX "[0-9a-f]+[0-9a-f]+[0-9a-f]+[0-9a-f]+[0-9a-f]+")
while("${CINN_VERSION}" STREQUAL "")
# Check current branch name
execute_process(
COMMAND ${GIT_EXECUTABLE} rev-parse --abbrev-ref ${tmp_version}
WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}
OUTPUT_VARIABLE GIT_BRANCH_NAME
RESULT_VARIABLE GIT_BRANCH_RESULT
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
if(NOT ${GIT_BRANCH_RESULT})
execute_process(
COMMAND ${GIT_EXECUTABLE} describe --tags --abbrev=0 --always
${tmp_version}
WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}
OUTPUT_VARIABLE GIT_TAG_NAME
RESULT_VARIABLE GIT_RESULT
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
if(NOT ${GIT_RESULT})
# Check if current branch is release branch
if(${GIT_BRANCH_NAME} MATCHES "release/${TAG_VERSION_REGEX}")
# Check the tag is a correct version
if(${GIT_TAG_NAME} MATCHES "${COMMIT_VERSION_REGEX}")
# if no tag was found, set CINN_VERSION to 0.0.0 to represent latest
set(CINN_VERSION "0.0.0")
elseif(${GIT_TAG_NAME} MATCHES "v${TAG_VERSION_REGEX}")
string(REPLACE "v" "" CINN_VERSION ${GIT_TAG_NAME})
else() # otherwise, get the previous git tag name.
set(tmp_version "${GIT_TAG_NAME}~1")
endif()
else()
execute_process(
COMMAND ${GIT_EXECUTABLE} describe --exact-match --tags ${tmp_version}
WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}
OUTPUT_VARIABLE GIT_EXACT_TAG_NAME
RESULT_VARIABLE GIT_EXACT_TAG_RESULT
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
if(NOT ${GIT_EXACT_TAG_NAME})
# Check if current branch is tag branch
if(${GIT_EXACT_TAG_NAME} MATCHES "v${TAG_VERSION_REGEX}")
string(REPLACE "v" "" CINN_VERSION ${GIT_EXACT_TAG_NAME})
else()
set(CINN_VERSION "0.0.0")
endif()
else()
# otherwise, we always set CINN_VERSION to 0.0.0 to represent latest
set(CINN_VERSION "0.0.0")
endif()
endif()
else()
set(CINN_VERSION "0.0.0")
message(WARNING "Cannot add CINN version from git tag")
endif()
else()
set(CINN_VERSION "0.0.0")
message(WARNING "Cannot add CINN version for wrong git branch result")
endif()
endwhile()
string(REPLACE "-" "." CINN_VER_LIST ${CINN_VERSION})
string(REPLACE "." ";" CINN_VER_LIST ${CINN_VER_LIST})
list(GET CINN_VER_LIST 0 CINN_MAJOR_VER)
list(GET CINN_VER_LIST 1 CINN_MINOR_VER)
list(GET CINN_VER_LIST 2 CINN_PATCH_VER)
math(EXPR CINN_VERSION_INTEGER "${CINN_MAJOR_VER} * 1000000
+ ${CINN_MINOR_VER} * 1000 + ${CINN_PATCH_VER}")
add_definitions(-DCINN_VERSION=${CINN_VERSION})
add_definitions(-DCINN_VERSION_INTEGER=${CINN_VERSION_INTEGER})
message(
STATUS
"CINN version is ${CINN_VERSION} (major: ${CINN_MAJOR_VER}, minor: ${CINN_MINOR_VER}, patch: ${CINN_PATCH_VER})"
)
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
if(NOT WITH_CINN)
return()
endif()
if(NOT CINN_GIT_TAG)
set(CINN_GIT_TAG develop)
endif()
message(STATUS "CINN version: " ${CINN_GIT_TAG})
# TODO(zhhsplendid): CINN has lots of warnings during early development.
# They will be treated as errors under paddle. We set no-error now and we will
# clean the code in the future.
add_definitions(-w)
######################################
# Build CINN from Git External Project
######################################
include(ExternalProject)
set(CINN_PREFIX_DIR ${THIRD_PARTY_PATH}/CINN)
set(CINN_OPTIONAL_ARGS
-DPY_VERSION=${PY_VERSION}
-DWITH_CUDA=${WITH_GPU}
-DWITH_CUDNN=${WITH_GPU}
-DWITH_MKL_CBLAS=${WITH_MKL}
-DWITH_MKLDNN=${WITH_MKL}
-DPUBLISH_LIBS=ON
-DWITH_TESTING=ON
-DPYTHON_EXECUTABLE=${PYTHON_EXECUTABLE}
-DPYTHON_INCLUDE_DIR=${PYTHON_INCLUDE_DIR}
-DPYTHON_LIBRARIES=${PYTHON_LIBRARIES})
set(CINN_BUILD_COMMAND ${CMAKE_COMMAND} --build . --target cinnapi -j)
set(CINN_BINARY_DIR ${CINN_PREFIX_DIR}/src/external_cinn-build)
set(CINN_LIB_NAME "libcinnapi.so")
set(CINN_LIB_LOCATION "${CINN_BINARY_DIR}/dist/cinn/lib")
set(CINN_LIB "${CINN_LIB_LOCATION}/${CINN_LIB_NAME}")
ExternalProject_Add(
external_cinn
${EXTERNAL_PROJECT_LOG_ARGS}
GIT_REPOSITORY "${GIT_URL}/PaddlePaddle/CINN.git"
GIT_TAG ${CINN_GIT_TAG}
PREFIX ${CINN_PREFIX_DIR}
BUILD_COMMAND ${CINN_BUILD_COMMAND}
INSTALL_COMMAND ""
CMAKE_ARGS ${CINN_OPTIONAL_ARGS}
BUILD_BYPRODUCTS ${CINN_LIB})
ExternalProject_Get_Property(external_cinn BINARY_DIR)
ExternalProject_Get_Property(external_cinn SOURCE_DIR)
set(CINN_SOURCE_DIR ${SOURCE_DIR})
message(STATUS "CINN BINARY_DIR: ${CINN_BINARY_DIR}")
message(STATUS "CINN SOURCE_DIR: ${CINN_SOURCE_DIR}")
######################################
# Add CINN's dependencies header files
######################################
# Add absl
set(ABSL_INCLUDE_DIR "${CINN_BINARY_DIR}/dist/third_party/absl/include")
include_directories(${ABSL_INCLUDE_DIR})
# Add isl
set(ISL_INCLUDE_DIR "${CINN_BINARY_DIR}/dist/third_party/isl/include")
include_directories(${ISL_INCLUDE_DIR})
# Add LLVM
set(LLVM_INCLUDE_DIR "${CINN_BINARY_DIR}/dist/third_party/llvm/include")
include_directories(${LLVM_INCLUDE_DIR})
######################################################
# Put external_cinn and dependencies together as a lib
######################################################
set(CINN_INCLUDE_DIR "${CINN_BINARY_DIR}/dist/cinn/include")
add_library(cinn SHARED IMPORTED GLOBAL)
set_target_properties(cinn PROPERTIES IMPORTED_LOCATION
"${CINN_LIB_LOCATION}/${CINN_LIB_NAME}")
include_directories(${CINN_INCLUDE_DIR})
add_dependencies(cinn external_cinn)
...@@ -56,9 +56,14 @@ else() ...@@ -56,9 +56,14 @@ else()
"${GTEST_INSTALL_DIR}/${CMAKE_INSTALL_LIBDIR}/libgmock.a" "${GTEST_INSTALL_DIR}/${CMAKE_INSTALL_LIBDIR}/libgmock.a"
CACHE FILEPATH "gmock libraries." FORCE) CACHE FILEPATH "gmock libraries." FORCE)
set(GTEST_CMAKE_C_FLAGS "${CMAKE_C_FLAGS}") set(GTEST_CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
set(GTEST_CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") if(CINN_ONLY)
set(GTEST_CMAKE_CXX_FLAGS "-std=c++17 ${CMAKE_CXX_FLAGS}")
else()
set(GTEST_CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
endif()
endif() endif()
if(WITH_MKLML) if(WITH_MKLML)
# wait for mklml downloading completed # wait for mklml downloading completed
set(GTEST_DEPENDS ${MKLML_PROJECT}) set(GTEST_DEPENDS ${MKLML_PROJECT})
......
...@@ -24,7 +24,7 @@ set(SOURCE_INCLUDE_DIR ${SOURCE_DIR}/include) ...@@ -24,7 +24,7 @@ set(SOURCE_INCLUDE_DIR ${SOURCE_DIR}/include)
include_directories(${PYBIND_INCLUDE_DIR}) include_directories(${PYBIND_INCLUDE_DIR})
set(PYBIND_PATCH_COMMAND "") set(PYBIND_PATCH_COMMAND "")
if(NOT WIN32) if(NOT WIN32 AND NOT CINN_ONLY)
file(TO_NATIVE_PATH ${PADDLE_SOURCE_DIR}/patches/pybind/cast.h.patch file(TO_NATIVE_PATH ${PADDLE_SOURCE_DIR}/patches/pybind/cast.h.patch
native_dst) native_dst)
# Note: [Why calling some `git` commands before `patch`?] # Note: [Why calling some `git` commands before `patch`?]
......
...@@ -260,6 +260,36 @@ if(${CMAKE_VERSION} VERSION_GREATER "3.5.2") ...@@ -260,6 +260,36 @@ if(${CMAKE_VERSION} VERSION_GREATER "3.5.2")
endif() endif()
########################### include third_party according to flags ############################### ########################### include third_party according to flags ###############################
# cinn_only includes third-party libraries separately
if(CINN_ONLY)
include(external/zlib)
include(external/gflags)
include(external/glog)
include(external/gtest)
include(external/protobuf)
if(WITH_PYTHON)
include(external/pybind11)
endif()
if(WITH_MKL)
include(external/mklml)
endif()
if(WITH_MKLDNN)
include(external/mkldnn)
endif()
return()
endif()
if(WITH_CINN)
if(WITH_MKL)
add_definitions(-DCINN_WITH_MKL_CBLAS)
endif()
if(WITH_MKLDNN)
add_definitions(-DCINN_WITH_MKLDNN)
endif()
endif()
include(external/zlib) # download, build, install zlib include(external/zlib) # download, build, install zlib
include(external/gflags) # download, build, install gflags include(external/gflags) # download, build, install gflags
include(external/glog) # download, build, install glog include(external/glog) # download, build, install glog
...@@ -474,20 +504,6 @@ if(WITH_LITE) ...@@ -474,20 +504,6 @@ if(WITH_LITE)
include(external/lite) include(external/lite)
endif() endif()
if(WITH_CINN)
message(STATUS "Compile Paddle with CINN.")
include(external/cinn)
add_definitions(-DPADDLE_WITH_CINN)
if(WITH_GPU)
add_definitions(-DCINN_WITH_CUDA)
add_definitions(-DCINN_WITH_CUDNN)
endif()
if(WITH_MKL)
add_definitions(-DCINN_WITH_MKL_CBLAS)
add_definitions(-DCINN_WITH_MKLDNN)
endif()
endif()
if(WITH_CRYPTO) if(WITH_CRYPTO)
include(external/cryptopp) # download, build, install cryptopp include(external/cryptopp) # download, build, install cryptopp
list(APPEND third_party_deps extern_cryptopp) list(APPEND third_party_deps extern_cryptopp)
......
if (WITH_TESTING)
cinn_cc_library(cinn_gtest_main SRCS gtest_main.cc DEPS gtest gflags)
endif()
add_subdirectory(auto_schedule)
add_subdirectory(common)
add_subdirectory(utils)
add_subdirectory(poly)
add_subdirectory(runtime)
add_subdirectory(ir)
add_subdirectory(backends)
add_subdirectory(lang)
add_subdirectory(optim)
add_subdirectory(hlir)
if(CINN_ONLY)
add_subdirectory(pybind)
endif()
add_subdirectory(frontend)
# Download a model
download_and_uncompress("${DOWNLOAD_MODEL_DIR}" "${PADDLE_RESOURCE_URL}" "lite_naive_model.tar.gz")
core_gather_headers()
add_subdirectory(analysis)
add_subdirectory(cost_model)
add_subdirectory(database)
add_subdirectory(measure)
add_subdirectory(post_schedule_rule)
add_subdirectory(search_space)
add_subdirectory(search_strategy)
add_subdirectory(task)
add_subdirectory(task_scheduler)
add_subdirectory(tests)
cinn_proto_library(auto_schedule_proto SRCS auto_schedule.proto DEPS schedule_desc_proto)
core_gather_headers()
gather_srcs(cinnapi_src SRCS auto_tuner.cc)
#cinn_cc_test(test_auto_tuner SRCS auto_tuner_test.cc DEPS cinncore)
foreach(header ${auto_schedule_proto_HDRS})
set(core_proto_includes "${core_proto_includes};${header}" CACHE INTERNAL "")
endforeach()
core_gather_headers()
gather_srcs(cinnapi_src SRCS analyze_ir.cc)
cinn_cc_test(test_analyze_ir SRCS analyze_ir_test.cc DEPS cinncore)
// Copyright (c) 2022 CINN Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/cinn/auto_schedule/analysis/analyze_ir.h"
#include <glog/logging.h>
#include <algorithm>
#include <string>
#include <unordered_set>
#include "paddle/cinn/ir/buffer.h"
#include "paddle/cinn/ir/collect_ir_nodes.h"
#include "paddle/cinn/ir/ir.h"
#include "paddle/cinn/ir/ir_base.h"
#include "paddle/cinn/ir/ir_printer.h"
#include "paddle/cinn/ir/ir_schedule.h"
#include "paddle/cinn/ir/lowered_func.h"
#include "paddle/cinn/ir/tensor.h"
#include "paddle/cinn/lang/lower.h"
#include "paddle/cinn/optim/ir_copy.h"
#include "paddle/cinn/optim/optimize.h"
#include "paddle/cinn/optim/transform_gpu_forloop.h"
namespace cinn {
namespace auto_schedule {
std::vector<ir::Var> IndicesToVars(const std::vector<ir::Expr>& indices) {
std::vector<ir::Var> result;
for (const ir::Expr& e : indices) {
// Whether we have to convert other types, like const numbers to Var?
if (e.As<ir::_Var_>() != nullptr) {
ir::Expr copy_e = optim::IRCopy(e);
ir::_Var_* var_ref = copy_e.As<ir::_Var_>();
result.emplace_back(ir::Var(var_ref));
}
}
return result;
}
void AnalyzeScheduleBlockReadWriteBuffer(ir::ScheduleBlock* sche_block) {
if (!sche_block->read_buffers.empty() || !sche_block->write_buffers.empty()) {
return;
}
ir::CollectIRNodesWithoutTensor(sche_block->body, [&](const Expr* x) {
const ir::Load* load_expr = x->As<ir::Load>();
if (load_expr != nullptr) {
const ir::Tensor t = load_expr->tensor.as_tensor_ref();
sche_block->read_buffers.emplace_back(ir::BufferRange(t->buffer, IndicesToVars(load_expr->indices)));
return false;
}
const ir::Store* store_expr = x->As<ir::Store>();
if (store_expr != nullptr) {
const ir::Tensor t = store_expr->tensor.as_tensor_ref();
sche_block->write_buffers.emplace_back(ir::BufferRange(t->buffer, IndicesToVars(store_expr->indices)));
return false;
}
return false;
});
}
bool ContainsNodeType(ir::Expr expr, const std::unordered_set<ir::IrNodeTy>& node_types) {
std::set<ir::Expr> collection = ir::CollectIRNodesWithoutTensor(
expr, [&](const Expr* x) { return node_types.find(x->node_type()) != node_types.end(); });
return !collection.empty();
}
std::unordered_set<std::string> GetOutputNamesFromLoweredFunc(const std::vector<ir::LoweredFunc>& lowered_funcs) {
std::unordered_set<std::string> result;
for (const ir::LoweredFunc& func : lowered_funcs) {
for (const ir::Argument& arg : func->args) {
if (arg.is_output()) {
result.insert(arg.name());
}
}
}
return result;
}
bool NeedsMultiLevelTiling(const ir::ScheduleBlockRealize& sche_block_realize) {
const ir::ScheduleBlock* sche_block = sche_block_realize.schedule_block.As<ir::ScheduleBlock>();
if (sche_block->write_buffers.size() != 1 || sche_block->read_buffers.empty()) {
return false;
}
const ir::Expr& write_buffer = sche_block->write_buffers[0].As<ir::_BufferRange_>()->buffer;
// Enumerate each read region, get the number of schedule block iter vars
// which are not used to index the read region
int total_unused_iter_vars = 0;
for (const ir::Expr& read_buffer_expr : sche_block->read_buffers) {
const ir::_BufferRange_* read_buffer = read_buffer_expr.As<ir::_BufferRange_>();
// Skip the reduction buffer
if (read_buffer->buffer == write_buffer) {
continue;
}
// Collect the vars in schedule block that are used to index the read region
std::unordered_set<std::string> vars_index_read;
for (const Var& range : read_buffer->ranges) {
vars_index_read.insert(range->name);
}
// Check the block iter vars are not used to index the read region
int n_unused_block_vars = 0;
for (const ir::Var& block_iter_var : sche_block->iter_vars) {
if (!block_iter_var->is_reduce_axis) {
bool iter_var_in_read = false;
for (const std::string& var : vars_index_read) {
if (var == block_iter_var->name) {
iter_var_in_read = true;
break;
}
}
if (!iter_var_in_read) {
++n_unused_block_vars;
}
}
}
total_unused_iter_vars += n_unused_block_vars;
}
return total_unused_iter_vars >= 1;
}
ir::LoweredFunc UpdateFuncWithNewBody(const common::Target& target, const ir::LoweredFunc& old_func, ir::Expr& body) {
ir::ModuleExpr mod_expr(std::vector<ir::Expr>({body}));
ir::IRSchedule ir_sch(mod_expr);
// temp_bufs may be deleted during auto tuning (such as auto inline),
// we have to check from old temp bufs and set them as local buffer.
for (const ir::Buffer& buf : old_func->temp_bufs) {
const std::string& buf_name = buf->name;
std::vector<ir::Expr> all_block_realizes = ir_sch.GetAllBlocks();
for (ir::Expr& e : all_block_realizes) {
const ir::ScheduleBlockRealize* sche_block_realize = e.As<ir::ScheduleBlockRealize>();
const std::string& sche_name = sche_block_realize->schedule_block.As<ir::ScheduleBlock>()->name;
if (buf_name == "_" + sche_name) {
VLOG(6) << "Set local buffer for temp buffer " << buf_name;
ir_sch.SetBuffer(e, "local", true);
break;
}
}
}
ir::Expr updated_body = ir_sch.GetModule().GetExprs()[0];
#ifdef CINN_WITH_CUDA
optim::OptimizeExprGPU(&updated_body);
#endif
// Get new temp bufs by analyzing.
std::vector<ir::Buffer> new_temp_bufs = lang::GetTempBuffers(old_func->args, updated_body);
ir::LoweredFunc new_func = ir::_LoweredFunc_::Make(old_func->name, old_func->args, updated_body, new_temp_bufs);
#ifdef CINN_WITH_CUDA
if (target == common::DefaultNVGPUTarget()) {
new_func->PrepareCudaAxisInfoFromBody();
}
#endif
new_func = optim::Optimize(Expr(new_func), target, false).as_lowered_func_ref();
new_func->PrepareBufferCastExprs(/*with_expr_gen_tensor = */ false);
return new_func;
}
} // namespace auto_schedule
} // namespace cinn
// Copyright (c) 2022 CINN Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include <unordered_set>
#include "paddle/cinn/ir/ir.h"
#include "paddle/cinn/ir/ir_base.h"
#include "paddle/cinn/ir/ir_schedule.h"
#include "paddle/cinn/ir/lowered_func.h"
namespace cinn {
namespace auto_schedule {
void AnalyzeScheduleBlockReadWriteBuffer(ir::ScheduleBlock* sche_block);
bool ContainsNodeType(ir::Expr expr, const std::unordered_set<ir::IrNodeTy>& node_types);
/**
* Collects all input lowered_funcs and return names of all output arguments
*/
std::unordered_set<std::string> GetOutputNamesFromLoweredFunc(const std::vector<ir::LoweredFunc>& lowered_funcs);
/**
* Determine whether a schedule block needs multileveltiling
*/
bool NeedsMultiLevelTiling(const ir::ScheduleBlockRealize& sche_block_realize);
/**
* Update a LoweredFunc by regenerating related fields with a new function body
*/
ir::LoweredFunc UpdateFuncWithNewBody(const common::Target& target, const ir::LoweredFunc& old_func, ir::Expr& body);
} // namespace auto_schedule
} // namespace cinn
// Copyright (c) 2022 CINN Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/cinn/auto_schedule/analysis/analyze_ir.h"
#include <glog/logging.h>
#include <gtest/gtest.h>
#include <sstream>
#include <vector>
#include "paddle/cinn/common/context.h"
#include "paddle/cinn/ir/ir.h"
#include "paddle/cinn/ir/ir_base.h"
#include "paddle/cinn/ir/ir_schedule.h"
#include "paddle/cinn/ir/tensor.h"
#include "paddle/cinn/lang/compute.h"
#include "paddle/cinn/lang/lower.h"
#include "paddle/cinn/lang/placeholder.h"
#include "paddle/cinn/poly/stage.h"
#include "paddle/cinn/utils/string.h"
namespace cinn {
namespace auto_schedule {
TEST(AnalyzeIr, AnalyzeScheduleBlockReadWriteBuffer_SimpleAssign) {
Context::Global().ResetNameId();
#ifdef CINN_WITH_CUDA
Target target = common::DefaultNVGPUTarget();
#else
Target target = common::DefaultHostTarget();
#endif
ir::Expr M(32);
ir::Expr N(32);
lang::Placeholder<float> A("A", {M, N});
ir::Tensor B = lang::Compute(
{M, N}, [&](Var i, Var j) { return A(i, j); }, "B");
poly::StageMap stages = poly::CreateStages({A, B});
std::vector<ir::LoweredFunc> funcs = lang::LowerVec("SimpleAssign", stages, {A, B}, {}, {}, nullptr, target, true);
ASSERT_FALSE(funcs.empty());
ir::Expr ast_expr = funcs[0]->body;
VLOG(6) << "Analyzing for Expr:";
VLOG(6) << ast_expr;
std::vector<Expr> vec_ast{ast_expr};
ir::ModuleExpr mod_expr(vec_ast);
ir::IRSchedule ir_sch(mod_expr);
std::vector<ir::Expr> all_block_realizes = ir_sch.GetAllBlocks();
ASSERT_EQ(all_block_realizes.size(), 1UL);
ir::ScheduleBlockRealize* sche_block_realize = all_block_realizes[0].As<ir::ScheduleBlockRealize>();
ir::ScheduleBlock* sche_block = sche_block_realize->schedule_block.As<ir::ScheduleBlock>();
AnalyzeScheduleBlockReadWriteBuffer(sche_block);
/*
* the sche_block_realize will be:
* ScheduleBlock(B)
* {
* i0, i1 = axis.bind(i, j)
* read_buffers(_A[i0(undefined:undefined), i1(undefined:undefined)])
* write_buffers(_B[i0(undefined:undefined), i1(undefined:undefined)])
* B[i0, i1] = A[i0, i1]
* }
*/
VLOG(6) << "ScheduleBlockRealize: ";
VLOG(6) << all_block_realizes[0];
ASSERT_EQ(sche_block->read_buffers.size(), 1UL);
std::stringstream read_ss;
read_ss << sche_block->read_buffers[0];
ASSERT_EQ(read_ss.str(), "_A[i0(0:32), i1(0:32)]");
ASSERT_EQ(sche_block->write_buffers.size(), 1UL);
std::stringstream write_ss;
write_ss << sche_block->write_buffers[0];
ASSERT_EQ(write_ss.str(), "_B[i0(0:32), i1(0:32)]");
}
TEST(AnalyzeIr, AnalyzeScheduleBlockReadWriteBuffer_AddDiffShape) {
Context::Global().ResetNameId();
#ifdef CINN_WITH_CUDA
Target target = common::DefaultNVGPUTarget();
#else
Target target = common::DefaultHostTarget();
#endif
ir::Expr M(32);
ir::Expr N(128);
lang::Placeholder<float> A("A", {M});
lang::Placeholder<float> B("B", {N});
ir::Tensor C = lang::Compute(
{M, N}, [&](Var i, Var j) { return A(i) + B(j); }, "C");
poly::StageMap stages = poly::CreateStages({C});
std::vector<ir::LoweredFunc> funcs = lang::LowerVec("AddDiffShape", stages, {C}, {}, {}, nullptr, target, true);
ir::Expr ast_expr = funcs[0]->body;
VLOG(6) << "Expr before MultiLevelTiling: ";
VLOG(6) << ast_expr;
std::vector<Expr> vec_ast{ast_expr};
ir::ModuleExpr mod_expr(vec_ast);
ir::IRSchedule ir_sch(mod_expr);
std::vector<ir::Expr> all_block_realizes = ir_sch.GetAllBlocks();
ASSERT_EQ(all_block_realizes.size(), 1UL);
ir::ScheduleBlockRealize* sche_block_realize = all_block_realizes[0].As<ir::ScheduleBlockRealize>();
ir::ScheduleBlock* sche_block = sche_block_realize->schedule_block.As<ir::ScheduleBlock>();
AnalyzeScheduleBlockReadWriteBuffer(sche_block);
VLOG(6) << "ScheduleBlockRealize: ";
VLOG(6) << all_block_realizes[0];
ASSERT_EQ(sche_block->read_buffers.size(), 2UL);
std::vector<std::string> expect_read = {"_A[i0(0:32)]", "_B[i1(0:128)]"};
ASSERT_EQ(sche_block->read_buffers.size(), expect_read.size());
for (size_t i = 0; i < expect_read.size(); ++i) {
std::stringstream read_ss;
read_ss << sche_block->read_buffers[i];
ASSERT_EQ(read_ss.str(), expect_read[i]);
}
ASSERT_EQ(sche_block->write_buffers.size(), 1UL);
std::stringstream write_ss;
write_ss << sche_block->write_buffers[0];
ASSERT_EQ(write_ss.str(), "_C[i0(0:32), i1(0:128)]");
}
TEST(AnalyzeIr, ContainsNodeType) {
Context::Global().ResetNameId();
#ifdef CINN_WITH_CUDA
Target target = common::DefaultNVGPUTarget();
#else
Target target = common::DefaultHostTarget();
#endif
ir::Expr M(32);
ir::Expr N(32);
lang::Placeholder<float> A("A", {M, N});
ir::Tensor B = lang::Compute(
{M, N}, [&](Var i, Var j) { return A(i, j); }, "B");
poly::StageMap stages = poly::CreateStages({A, B});
std::vector<ir::LoweredFunc> funcs = lang::LowerVec("SimpleAssign", stages, {A, B}, {}, {}, nullptr, target, true);
ASSERT_FALSE(funcs.empty());
ir::Expr ast_expr = funcs[0]->body;
VLOG(6) << "Analyzing for Expr:";
VLOG(6) << ast_expr;
ASSERT_TRUE(ContainsNodeType(ast_expr, {ir::IrNodeTy::Load, ir::IrNodeTy::Store}));
ASSERT_TRUE(ContainsNodeType(ast_expr, {ir::IrNodeTy::Load, ir::IrNodeTy::IfThenElse}));
ASSERT_FALSE(ContainsNodeType(ast_expr, {ir::IrNodeTy::IfThenElse, ir::IrNodeTy::Sum}));
}
} // namespace auto_schedule
} // namespace cinn
// Copyright (c) 2022 CINN Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax ="proto3";
package cinn.auto_schedule.proto;
import "paddle/cinn/ir/schedule_desc.proto";
message TuningRecord {
string task_key = 1;
double execution_cost = 2;
double predicted_cost = 3;
cinn.ir.proto.ScheduleDesc trace = 4;
}
// Copyright (c) 2022 CINN Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/cinn/auto_schedule/auto_tuner.h"
#include <glog/logging.h>
#include <pybind11/embed.h>
#include <algorithm>
#include <memory>
#include <utility>
#include "paddle/cinn/auto_schedule/database/jsonfile_database.h"
#include "paddle/cinn/auto_schedule/measure/schedule_measurer.h"
#include "paddle/cinn/auto_schedule/measure/simple_builder.h"
#include "paddle/cinn/auto_schedule/measure/simple_runner.h"
#include "paddle/cinn/auto_schedule/task/task_creator.h"
#include "paddle/cinn/auto_schedule/task/task_registry.h"
#include "paddle/cinn/auto_schedule/task/tune_task.h"
#include "paddle/cinn/auto_schedule/task_scheduler/task_scheduler.h"
#include "paddle/cinn/common/context.h"
#include "paddle/cinn/common/type.h"
#include "paddle/cinn/hlir/framework/op.h"
#include "paddle/cinn/hlir/framework/visualize_helper.h"
#include "paddle/cinn/utils/string.h"
namespace cinn {
namespace auto_schedule {
AutoTuner::AutoTuner(const common::Target& target, hlir::framework::Graph* graph) : target_(target), graph_(graph) {}
void AutoTuner::Initialize(const Config& config, hlir::framework::GraphCompiler* graph_compiler) {
// create builder, runner, and schedule measurer
builder_ = std::make_unique<SimpleBuilder>(graph_compiler);
runner_ = std::make_unique<SimpleRunner>(config.runner_repeat_times);
schedule_measurer_ = std::make_unique<ScheduleMeasurer>(builder_.get(), runner_.get());
// initialize database
database_ = std::move(Database::Make(config.database_config));
// create tasks
TaskCreator task_creator;
tasks_ = task_creator.CreateTuneTaskOpLevel(graph_);
const auto& dtype_dict = graph_->GetAttrs<absl::flat_hash_map<std::string, common::Type>>("inferdtype");
const auto& shape_dict = graph_->GetAttrs<absl::flat_hash_map<std::string, hlir::framework::shape_t>>("infershape");
op_lowerer_ = std::make_unique<hlir::framework::OpLowerer>(dtype_dict, shape_dict, target_);
InitialTaskRegistry* task_registry = InitialTaskRegistry::Global();
for (auto i = 0; i < tasks_.size(); ++i) {
auto&& task = tasks_[i];
task.Initialize(shape_dict, dtype_dict, op_lowerer_.get());
// Register the initial ModuleExpr corresponding to the task
task_registry->Regist(task.serialized_key, ir::ModuleExpr(task.GetLoweredFuncBodyExprs()));
VLOG(3) << "Add a task, id:" << i << ", serialized_key:\n" << task.serialized_key;
}
// create task optimizers
utils::LinearRandomEngine::StateType initial_seed = utils::LinearRandomEngine::GetDeviceRandomValue();
task_optimizers_.resize(tasks_.size());
std::transform(tasks_.begin(), tasks_.end(), task_optimizers_.begin(), [&](TuneTask& task) {
return std::make_unique<TaskOptimizer>(
&task, schedule_measurer_.get(), database_.get(), utils::ForkRandomState(&initial_seed));
});
// create task scheduler
task_scheduler_ = TaskScheduler::Make(tasks_, config.task_schedule_config, config.task_schedule_strategy);
}
void PrintResult(std::shared_ptr<hlir::framework::Graph::Group> group) {
if (!VLOG_IS_ON(3)) {
return;
}
auto nodes = group->CollectNodes();
VLOG(3) << "Node size:" << nodes.size();
VLOG(3) << "Group {";
for (auto* node : nodes) {
VLOG(3) << " " << hlir::framework::DebugString(node);
}
VLOG(3) << "}";
}
void PrintResult(const FunctionGroup& functions) {
if (!VLOG_IS_ON(3)) {
return;
}
VLOG(3) << "Function size:" << functions.size();
for (auto i = 0; i < functions.size(); ++i) {
const ir::LoweredFunc& func = functions.at(i);
VLOG(3) << "LoweredFunc-" << i << " detail:\n" << func;
}
}
void PrintResult(const TuningResult& result) {
if (!VLOG_IS_ON(3)) {
return;
}
VLOG(3) << "###### Debug TuningResult ######\n";
VLOG(3) << "Tuned SubGraph num:" << result.subgraphs.size();
for (auto i = 0; i < result.subgraphs.size(); ++i) {
VLOG(3) << "****** SubGraph-" << i << " Detail ******\n";
PrintResult(result.subgraphs.at(i));
VLOG(3) << "****** SubGraph End ******";
}
VLOG(3) << "Tuned FunctionGroup num:" << result.function_groups.size();
for (auto i = 0; i < result.function_groups.size(); ++i) {
VLOG(3) << "****** FunctionGroup-" << i << " Detail ******\n";
PrintResult(result.function_groups.at(i));
VLOG(3) << "****** FunctionGroup End ******";
}
VLOG(3) << "###### TuningResult End ######";
}
TuningResult AutoTuner::Tune(const TuningOptions& options) {
CHECK_GT(options.num_tuning_rounds, 0) << "Invalid config";
VLOG(3) << "Begin tuning with round num=" << options.num_tuning_rounds << ", tasks size=" << tasks_.size();
TuningResult result;
result.subgraphs.resize(tasks_.size());
result.function_groups.resize(tasks_.size());
// A task only tunes schedule now, so we populate its sub_graph
// as default result of graph tuning, and that should be updated
// once we support graph tuning.
for (auto i = 0; i < tasks_.size(); ++i) {
auto&& task = tasks_.at(i);
result.subgraphs[i] = task.subgraph;
}
for (int r = 0; r < options.num_tuning_rounds; ++r) {
VLOG(3) << "<<<<<< Round " << r << " >>>>>>";
int run_id = -1;
task_scheduler_->Reset();
while ((run_id = task_scheduler_->NextTaskId()) != -1) {
VLOG(3) << "Start tuning Task-" << run_id;
auto* opt = task_optimizers_.at(run_id).get();
auto function_group = opt->Optimize(options);
VLOG(3) << "Task-" << run_id << " finished, print optimized functions:\n";
PrintResult(function_group);
// update the best schedules searched so far.
result.function_groups.at(run_id) = std::move(function_group);
}
}
PrintResult(result);
return result;
}
} // namespace auto_schedule
} // namespace cinn
// Copyright (c) 2022 CINN Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <memory>
#include <string>
#include <vector>
#include "paddle/cinn/auto_schedule/measure/schedule_measurer.h"
#include "paddle/cinn/auto_schedule/task/task_optimizer.h"
#include "paddle/cinn/auto_schedule/task/tune_task.h"
#include "paddle/cinn/auto_schedule/task_scheduler/task_scheduler.h"
#include "paddle/cinn/auto_schedule/tuning.h"
#include "paddle/cinn/common/target.h"
#include "paddle/cinn/hlir/framework/graph.h"
#include "paddle/cinn/hlir/framework/graph_compiler.h"
#include "paddle/cinn/hlir/framework/op_lowering.h"
namespace cinn {
namespace auto_schedule {
// This class is entrance of auto-tune, users can use it
// to tune graph (not supported yet) and search a series of schedules
// that maybe more likely to obtain better performance.
// Internally, it creates necessary components and use them to perform tuning.
class AutoTuner {
public:
// configure how to perform auto-tune, such as
// the way to create tasks, the strategy of scheduling tasks and so on.
struct Config {
std::string task_schedule_strategy = "round_robin";
TaskScheduler::Config task_schedule_config;
int runner_repeat_times = 1;
DatabaseConfig database_config;
};
AutoTuner(const common::Target& target, hlir::framework::Graph* graph);
// Initialize tuner with specific config and auxiliary objects.
void Initialize(const Config& config, hlir::framework::GraphCompiler* graph_compiler);
// Perform the tuning process and return the final result
TuningResult Tune(const TuningOptions& options);
private:
const common::Target& target_;
hlir::framework::Graph* graph_;
std::unique_ptr<hlir::framework::OpLowerer> op_lowerer_;
// Tasks to tune
std::vector<TuneTask> tasks_;
// Scheduler that select a task to tune at every turn.
std::unique_ptr<TaskScheduler> task_scheduler_;
// The actor to perform auto-tune, each optimizer take a task.
std::vector<std::unique_ptr<TaskOptimizer>> task_optimizers_;
// Classes used to measure AutoTune samples
std::unique_ptr<ScheduleBuilder> builder_;
std::unique_ptr<ScheduleRunner> runner_;
std::unique_ptr<ScheduleMeasurer> schedule_measurer_;
// The database to store tuning record
std::unique_ptr<Database> database_;
};
} // namespace auto_schedule
} // namespace cinn
// Copyright (c) 2022 CINN Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/cinn/auto_schedule/auto_tuner.h"
#include <glog/logging.h>
#include <gtest/gtest.h>
#include <cstdlib>
#include <iostream>
#include "paddle/cinn/common/target.h"
#include "paddle/cinn/frontend/net_builder.h"
#include "paddle/cinn/frontend/optimize.h"
#include "paddle/cinn/frontend/syntax.h"
#include "paddle/cinn/hlir/framework/graph.h"
#include "paddle/cinn/hlir/framework/graph_compiler.h"
#include "paddle/cinn/hlir/framework/node.h"
#include "paddle/cinn/hlir/framework/pass.h"
#include "paddle/cinn/ir/ir_base.h"
#include "paddle/cinn/runtime/flags.h"
DECLARE_bool(auto_schedule_use_cost_model);
DECLARE_bool(cinn_ir_schedule);
namespace cinn {
namespace auto_schedule {
using ::cinn::hlir::framework::BuildScope;
using ::cinn::hlir::framework::Graph;
using ::cinn::hlir::framework::GraphCompiler;
using ::cinn::hlir::framework::Instruction;
using ::cinn::hlir::framework::Node;
using ::cinn::hlir::framework::Scope;
class TestAutoTuner : public ::testing::Test {
public:
#ifdef CINN_WITH_CUDA
Target target = common::DefaultNVGPUTarget();
#else
Target target = common::DefaultHostTarget();
#endif
std::shared_ptr<Graph> graph;
std::shared_ptr<Scope> compiled_scope;
std::unique_ptr<GraphCompiler> graph_compiler;
std::unique_ptr<AutoTuner> tuner;
frontend::Program CreateAddReluProgram() {
frontend::NetBuilder builder("test");
auto a = builder.CreateInput(Float(32), {1, 64, 112, 112}, "A");
auto b = builder.CreateInput(Float(32), {64}, "B");
auto c = builder.Add(a, b, 1);
auto d = builder.Relu(c);
return builder.Build();
}
void SetUp() override {
srand(0);
// AutoTuner is combined with new IR Schedule
FLAGS_cinn_ir_schedule = true;
std::unordered_set<std::string> fetch_ids;
auto program = CreateAddReluProgram();
auto graph = cinn::frontend::Optimize(&program, fetch_ids, target);
compiled_scope = BuildScope(target, graph);
graph_compiler = std::make_unique<GraphCompiler>(target, compiled_scope, graph);
tuner = std::make_unique<AutoTuner>(target, graph.get());
}
TuningResult InitializeAndTune(const AutoTuner::Config& config, const TuningOptions& options) {
tuner->Initialize(config, graph_compiler.get());
return tuner->Tune(options);
}
virtual void BasicCheckResult(const TuningResult& result) {
ASSERT_EQ(1, result.subgraphs.size());
auto nodes = result.subgraphs.front()->CollectNodes();
ASSERT_EQ(nodes.size(), 4UL);
ASSERT_EQ(nodes[0]->op()->name, "broadcast_to");
ASSERT_EQ(nodes[1]->op()->name, "fill_constant");
ASSERT_EQ(nodes[2]->op()->name, "elementwise_add");
ASSERT_EQ(nodes[3]->op()->name, "max");
ASSERT_EQ(result.function_groups.size(), 1UL);
ASSERT_EQ(result.function_groups[0].size(), 1UL);
}
virtual void ApplyTunedAndRun(const TuningResult& result) {
// build runtime program with tuning result
GraphCompiler::CompileOptions compile_options;
compile_options.with_instantiate_variables = true;
compile_options.Apply(result);
ASSERT_EQ(1, compile_options.groups.size());
ASSERT_EQ(1, compile_options.lowered_funcs.size());
VLOG(6) << "Print lowered_funcs before building";
VLOG(6) << compile_options.lowered_funcs[0][0];
VLOG(6) << compile_options.lowered_funcs[1][0];
auto runtime_program = graph_compiler->Build(compile_options).runtime_program;
ASSERT_EQ(1, runtime_program->size());
runtime_program->Execute();
}
void ZeroMeasure() {
// set config and options
AutoTuner::Config tuning_config;
tuning_config.task_schedule_strategy = "round_robin";
TuningOptions tuning_options;
tuning_options.num_measure_trials = 0;
auto result = InitializeAndTune(tuning_config, tuning_options);
BasicCheckResult(result);
ApplyTunedAndRun(result);
}
void NonZeroMeasure() {
// set config and options
AutoTuner::Config tuning_config;
tuning_config.task_schedule_strategy = "round_robin";
TuningOptions tuning_options;
tuning_options.num_measure_trials = 4;
tuning_options.num_samples_per_iteration = 2;
auto result = InitializeAndTune(tuning_config, tuning_options);
BasicCheckResult(result);
ApplyTunedAndRun(result);
}
};
TEST_F(TestAutoTuner, ZeroMeasure_DisableCostModel) {
FLAGS_auto_schedule_use_cost_model = false;
ZeroMeasure();
}
TEST_F(TestAutoTuner, ZeroMeasure_EnableCostModel) {
FLAGS_auto_schedule_use_cost_model = true;
ZeroMeasure();
}
TEST_F(TestAutoTuner, NonZeroMeasure_DisableCostModel) {
FLAGS_auto_schedule_use_cost_model = false;
NonZeroMeasure();
}
TEST_F(TestAutoTuner, NonZeroMeasure_EnableCostModel) {
FLAGS_auto_schedule_use_cost_model = true;
NonZeroMeasure();
}
} // namespace auto_schedule
} // namespace cinn
core_gather_headers()
gather_srcs(cinnapi_src SRCS xgb_cost_model.cc expr_cost_model.cc feature.cc feature_extractor.cc)
cinn_cc_test(test_xgb_cost_model SRCS xgb_cost_model_test.cc DEPS cinncore)
cinn_cc_test(test_feature_extractor SRCS feature_extractor_test.cc DEPS cinncore)
cinn_cc_test(test_feature SRCS feature_test.cc DEPS cinncore)
// Copyright (c) 2022 CINN Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/cinn/auto_schedule/cost_model/expr_cost_model.h"
#include <glog/logging.h>
#include <atomic>
#include <vector>
#include "paddle/cinn/auto_schedule/cost_model/feature.h"
#include "paddle/cinn/auto_schedule/cost_model/feature_extractor.h"
#include "paddle/cinn/auto_schedule/search_space/search_state.h"
#include "paddle/cinn/common/target.h"
#include "paddle/cinn/ir/ir_schedule.h"
namespace cinn {
namespace auto_schedule {
float ExprCostModel::Predict(const ir::ModuleExpr& sample, const common::Target& target) const {
if (trained_times_.load() == 0) {
return SearchState::NOT_INIT_COST;
}
FeatureExtractor extractor;
Feature feature = extractor.Extract(sample, target);
std::vector<float> feature_numbers = feature.ToFixedSizeVector();
std::vector<float> pred = XgbCostModel::Predict({feature_numbers});
return pred[0];
}
void ExprCostModel::Train(const std::vector<const ir::ModuleExpr*>& samples,
const std::vector<float>& labels,
const common::Target& target) {
trained_times_.store(1);
size_t total_size = samples.size();
CHECK_EQ(total_size, labels.size()) << "Samples must have same size as labels";
std::vector<std::vector<float>> train_feature_numbers(total_size);
FeatureExtractor extractor;
for (size_t i = 0; i < total_size; ++i) {
CHECK(samples[i] != nullptr) << "Train samples cannot be nullptr";
Feature feature = extractor.Extract(*samples[i], target);
train_feature_numbers[i] = feature.ToFixedSizeVector();
}
XgbCostModel::Train(train_feature_numbers, labels);
}
void ExprCostModel::Update(const std::vector<const ir::ModuleExpr*>& samples,
const std::vector<float>& labels,
const common::Target& target) {
++trained_times_;
size_t total_size = samples.size();
CHECK_EQ(total_size, labels.size()) << "Samples must have same size as labels";
std::vector<std::vector<float>> train_feature_numbers(total_size);
FeatureExtractor extractor;
for (size_t i = 0; i < total_size; ++i) {
CHECK(samples[i] != nullptr) << "Train samples cannot be nullptr";
Feature feature = extractor.Extract(*samples[i], target);
train_feature_numbers[i] = feature.ToFixedSizeVector();
}
XgbCostModel::Update(train_feature_numbers, labels);
}
} // namespace auto_schedule
} // namespace cinn
// Copyright (c) 2022 CINN Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <atomic>
#include <vector>
#include "paddle/cinn/auto_schedule/cost_model/xgb_cost_model.h"
#include "paddle/cinn/ir/ir_schedule.h"
namespace cinn {
namespace auto_schedule {
/**
* A C++ cost model which trains and predicts on ir::Expr
*
*/
class ExprCostModel : public XgbCostModel {
public:
virtual float Predict(const ir::ModuleExpr& sample, const common::Target& target) const;
void Train(const std::vector<const ir::ModuleExpr*>& samples,
const std::vector<float>& labels,
const common::Target& target);
void Update(const std::vector<const ir::ModuleExpr*>& samples,
const std::vector<float>& labels,
const common::Target& target);
private:
std::atomic<int> trained_times_{0};
};
} // namespace auto_schedule
} // namespace cinn
// Copyright (c) 2022 CINN Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/cinn/auto_schedule/cost_model/feature.h"
#include <glog/logging.h>
#include <vector>
#include "paddle/cinn/common/target.h"
namespace cinn {
namespace auto_schedule {
Feature::Feature()
: target_(common::UnkTarget()),
stack_encoded_feature_(1), // initialize a LoopBlockFeature as root block
current_loop_block_index_(0),
parent_indices_(1, -1) {}
Feature::Feature(const common::Target& target)
: target_(target),
stack_encoded_feature_(1), // initialize a LoopBlockFeature as root block
current_loop_block_index_(0),
parent_indices_(1, -1) {}
std::vector<float> Feature::ToFixedSizeVector() {
std::vector<float> ret(LoopBlockFeature::kTotalSize + 1, 0); // LoopBlockFeature::kTotalSize plus 1 for target
if (target_ == common::DefaultNVGPUTarget()) {
ret[0] = 1;
} // else 0 for other cases
// loop[i] feature count should multiply iter_multi_num[i]
std::vector<int> iter_multi_num;
for (size_t i = 0; i < stack_encoded_feature_.size(); ++i) {
int j = 1;
const LoopBlockFeature& loop_feature = stack_encoded_feature_[i];
int loop_prod = 1;
int parent_prod = 1;
if (i != 0) {
parent_prod = iter_multi_num[parent_indices_[i]];
loop_prod = parent_prod * loop_feature.loop_length;
}
iter_multi_num.push_back(loop_prod);
ret[j] += (loop_feature.float_add_or_sub * loop_prod);
++j;
ret[j] += (loop_feature.float_mul * loop_prod);
++j;
ret[j] += (loop_feature.float_div_or_mod * loop_prod);
++j;
ret[j] += (loop_feature.float_cmp * loop_prod);
++j;
ret[j] += (loop_feature.float_math_func * loop_prod);
++j;
ret[j] += (loop_feature.float_other_call * loop_prod);
++j;
ret[j] += (loop_feature.int_add_or_sub * loop_prod);
++j;
ret[j] += (loop_feature.int_mul * loop_prod);
++j;
ret[j] += (loop_feature.int_div_or_mod * loop_prod);
++j;
ret[j] += (loop_feature.int_cmp * loop_prod);
++j;
ret[j] += (loop_feature.int_math_func * loop_prod);
++j;
ret[j] += (loop_feature.int_other_call * loop_prod);
++j;
ret[j] += (loop_feature.bool_op * loop_prod);
++j;
ret[j] += (loop_feature.select_op * loop_prod);
++j;
ret[j] += (loop_feature.mem_alloc * loop_prod);
++j;
ret[j] += (loop_feature.mem_free * loop_prod);
++j;
ret[j] += (loop_feature.mem_read * loop_prod);
++j;
ret[j] += (loop_feature.mem_write * loop_prod);
++j;
ret[j] += (loop_feature.float_reduce_sum_or_sub * loop_prod);
++j;
ret[j] += (loop_feature.float_reduce_mul * loop_prod);
++j;
ret[j] += (loop_feature.float_reduce_div * loop_prod);
++j;
ret[j] += (loop_feature.float_reduce_max_or_min * loop_prod);
++j;
ret[j] += (loop_feature.float_broadcast * loop_prod);
++j;
ret[j] += (loop_feature.int_reduce_sum_or_sub * loop_prod);
++j;
ret[j] += (loop_feature.int_reduce_mul * loop_prod);
++j;
ret[j] += (loop_feature.int_reduce_div * loop_prod);
++j;
ret[j] += (loop_feature.int_reduce_max_or_min * loop_prod);
++j;
ret[j] += (loop_feature.int_broadcast * loop_prod);
++j;
ret[j + static_cast<int>(loop_feature.loop_opt_type)] += 1;
j += LoopBlockFeature::kOptApplySize;
ret[j] += (loop_feature.len_blockIdx_x * parent_prod);
++j;
ret[j] += (loop_feature.len_blockIdx_y * parent_prod);
++j;
ret[j] += (loop_feature.len_blockIdx_z * parent_prod);
++j;
ret[j] += (loop_feature.len_threadIdx_x * parent_prod);
++j;
ret[j] += (loop_feature.len_threadIdx_y * parent_prod);
++j;
ret[j] += (loop_feature.len_threadIdx_z * parent_prod);
++j;
ret[j] += (loop_feature.len_vthread * parent_prod);
++j;
ret[j] += (loop_feature.vectorize_factor * parent_prod);
++j;
}
for (size_t i = 0; i < ret.size(); ++i) {
ret[i] = slog(ret[i]);
}
return ret;
}
void Feature::IntoLoopBlock() {
stack_encoded_feature_.emplace_back(LoopBlockFeature());
stack_encoded_feature_[current_loop_block_index_].num_sub_loops += 1;
parent_indices_.push_back(current_loop_block_index_);
current_loop_block_index_ = stack_encoded_feature_.size() - 1;
}
void Feature::ExitLoopBlock() { current_loop_block_index_ = parent_indices_[current_loop_block_index_]; }
LoopBlockFeature& Feature::CurrentLoopBlock() { return stack_encoded_feature_[current_loop_block_index_]; }
const LoopBlockFeature& Feature::CurrentLoopBlock() const { return stack_encoded_feature_[current_loop_block_index_]; }
} // namespace auto_schedule
} // namespace cinn
// Copyright (c) 2022 CINN Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <cmath>
#include <vector>
#include "paddle/cinn/common/target.h"
#include "paddle/cinn/ir/ir_schedule.h"
namespace cinn {
namespace auto_schedule {
/* Loop feature enums */
enum class ForOptimizeFeatureEnum : int { kNone, kGpuBind, kParallel, kUnroll, kVectorize };
/* function to scale feature numbers */
inline float slog(float x) { return x < 0 ? std::log2(-x + 1) : std::log2(x + 1); }
class LoopBlockFeature {
public:
// TODO(zhhsplendid): distinguish more types such as float16, float32,
// float64, etc. However speed the gap between float and int are larger than
// different bits, so we just distinguished int and float here
/* Arithmetic features */
int float_add_or_sub = 0;
int float_mul = 0;
int float_div_or_mod = 0;
int float_cmp = 0;
int float_math_func = 0;
int float_other_call = 0; // like simple assign, cast, etc.
int int_add_or_sub = 0;
int int_mul = 0;
int int_div_or_mod = 0;
int int_cmp = 0;
int int_math_func = 0;
int int_other_call = 0; // like simple assign, cast, etc.
int bool_op = 0;
int select_op = 0;
static constexpr int kArithSize = 6 * 2 + 2;
/**
* Buffer memory features, which is the number of memory operations.
* Note that different size of memory operation can have various speed,
* however the speed difference would be small in OS. A meticulous TODO
* may be collect operand sizes (like alloc size, write size, or so)
*/
int mem_alloc = 0;
int mem_free = 0;
int mem_read = 0;
int mem_write = 0;
static constexpr int kMemSize = 4;
/**
* Reduce and Broadcast features
*/
int float_reduce_sum_or_sub = 0;
int float_reduce_mul = 0;
int float_reduce_div = 0;
int float_reduce_max_or_min = 0;
int float_broadcast = 0;
int int_reduce_sum_or_sub = 0;
int int_reduce_mul = 0;
int int_reduce_div = 0;
int int_reduce_max_or_min = 0;
int int_broadcast = 0;
static constexpr int kReduceBroadcastSize = 10;
/* Loop type features */
// A TODO maybe add loop position (Inner, Outer, Middle) feature
ForOptimizeFeatureEnum loop_opt_type = ForOptimizeFeatureEnum::kNone;
static constexpr int kOptApplySize = 5;
/* Thread features if loop is optimized by GPU or CPU parallelism.
* Useless in other cases.
*/
int len_blockIdx_x = 0;
int len_blockIdx_y = 0;
int len_blockIdx_z = 0;
int len_threadIdx_x = 0;
int len_threadIdx_y = 0;
int len_threadIdx_z = 0;
int len_vthread = 0; // length of virtual thread
int vectorize_factor = 0;
static constexpr int kThreadFeatureSize = 8;
static constexpr int kTotalSize = kArithSize + kMemSize + kReduceBroadcastSize + kOptApplySize + kThreadFeatureSize;
/* Non-feature attributes, used to maintain during feature_extractor */
// Number to indicate the loop block inside current one
int num_sub_loops = 0;
// Number of repeats of this loop, -1 represents unknown
int loop_length = 1;
};
/**
* Feature of Expr. It is used in CostModel
*/
class Feature {
public:
Feature();
Feature(const common::Target& target);
// Convert the various-length loop block features to fixed-size vector
std::vector<float> ToFixedSizeVector();
// Call when visit into a loop block to collect LoopBlockFeature
void IntoLoopBlock();
// Call when exit a loop block to collect LoopBlockFeature
void ExitLoopBlock();
// The current loop block which we should collect feature on
LoopBlockFeature& CurrentLoopBlock();
// The current loop block which we should collect feature on
const LoopBlockFeature& CurrentLoopBlock() const;
private:
// We treat a computation feature to be encoded as variable-length vector.
// The root compute block is not a loop, but we treat it as a size-1 loop.
// Blocks are encoded like a stack. Each LoopBlockFeature contains a
// num_sub_loops to indicate the next level sub-loop-block it contains.
//
// For example, code like:
//
// some_compute_0
// loop1 {
// some_compute_1
// loop2 {
// some_compute_2
// }
// }
//
// loop3 {
// some_compute_3
// }
//
// We go through the code and push loops into stack, then the features are encoded as
// [loop_block_feature_0, loop_block_feature_1, loop_block_feature_2, loop_block_feature_3]
// where loop_block_feature_i stores the features of some_compute_i (such
// as number of arithmetic operations)
//
// loop_block_feature_0.num_sub_loops = 2
// loop_block_feature_1.num_sub_loops = 1
// loop_block_feature_2.num_sub_loops = 0
// loop_block_feature_3.num_sub_loops = 0
std::vector<LoopBlockFeature> stack_encoded_feature_;
int current_loop_block_index_;
std::vector<int> parent_indices_;
common::Target target_;
};
} // namespace auto_schedule
} // namespace cinn
// Copyright (c) 2022 CINN Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/cinn/auto_schedule/cost_model/feature_extractor.h"
#include <vector>
#include "paddle/cinn/common/target.h"
#include "paddle/cinn/common/type.h"
#include "paddle/cinn/ir/ir.h"
#include "paddle/cinn/ir/ir_base.h"
#include "paddle/cinn/ir/ir_printer.h"
#include "paddle/cinn/ir/ir_schedule.h"
#include "paddle/cinn/optim/ir_copy.h"
#include "paddle/cinn/optim/transform_polyfor_to_for.h"
namespace cinn {
namespace auto_schedule {
using namespace ::cinn::ir;
FeatureExtractor::FeatureExtractor() {}
void FeatureExtractor::Visit(const Expr *x) { IRVisitor::Visit(x); }
Feature FeatureExtractor::Extract(const ir::ModuleExpr &mod_expr, const common::Target &target) {
feature_ = Feature(target);
for (const ir::Expr &e : mod_expr.GetExprs()) {
Visit(&e);
}
return feature_;
}
#define VisitDoNothing(NodeType) \
void FeatureExtractor::Visit(const NodeType *x) { \
std::vector<const Expr *> sub_exprs = x->expr_fields(); \
for (const Expr *e : sub_exprs) { \
if (e->defined()) { \
Visit(e); \
} \
} \
}
VisitDoNothing(IntImm);
VisitDoNothing(UIntImm);
VisitDoNothing(FloatImm);
VisitDoNothing(StringImm);
VisitDoNothing(Block);
VisitDoNothing(_Module_);
VisitDoNothing(_Var_);
VisitDoNothing(_LoweredFunc_);
VisitDoNothing(ScheduleBlock);
VisitDoNothing(ScheduleBlockRealize);
VisitDoNothing(Ramp);
VisitDoNothing(_Buffer_);
VisitDoNothing(_BufferRange_);
#define NotVisitExprFields(NodeType) \
void FeatureExtractor::Visit(const NodeType *x) {}
NotVisitExprFields(_Tensor_)
#define VisitForDtypePattern(NodeType, member) \
void FeatureExtractor::Visit(const NodeType *x) { \
if (x->type() == common::F32() || x->type() == common::F16() || x->type() == common::F64()) { \
feature_.CurrentLoopBlock().float_##member += x->type().lanes(); \
} else { \
feature_.CurrentLoopBlock().int_##member += x->type().lanes(); \
} \
std::vector<const Expr *> sub_exprs = x->expr_fields(); \
for (const Expr *e : sub_exprs) { \
if (e->defined()) { \
Visit(e); \
} \
} \
}
VisitForDtypePattern(Add, add_or_sub);
VisitForDtypePattern(Sub, add_or_sub);
VisitForDtypePattern(Minus, add_or_sub);
VisitForDtypePattern(Mul, mul);
VisitForDtypePattern(Div, div_or_mod);
VisitForDtypePattern(Mod, div_or_mod);
VisitForDtypePattern(FracOp, div_or_mod);
VisitForDtypePattern(EQ, cmp);
VisitForDtypePattern(NE, cmp);
VisitForDtypePattern(GT, cmp);
VisitForDtypePattern(GE, cmp);
VisitForDtypePattern(LT, cmp);
VisitForDtypePattern(LE, cmp);
VisitForDtypePattern(Call, math_func);
VisitForDtypePattern(PrimitiveNode, math_func);
VisitForDtypePattern(Cast, other_call);
VisitForDtypePattern(Let, other_call);
#define VisitForMultiOperandsDtypePattern(NodeType, member) \
void FeatureExtractor::Visit(const NodeType *x) { \
if (x->type() == common::F32() || x->type() == common::F16() || x->type() == common::F64()) { \
feature_.CurrentLoopBlock().float_##member += (x->operands().size() - 1); \
} else { \
feature_.CurrentLoopBlock().int_##member += (x->operands().size() - 1); \
} \
std::vector<const Expr *> sub_exprs = x->expr_fields(); \
for (const Expr *e : sub_exprs) { \
if (e->defined()) { \
Visit(e); \
} \
} \
}
VisitForMultiOperandsDtypePattern(Sum, add_or_sub);
VisitForMultiOperandsDtypePattern(Product, mul);
#define VisitCountMemberPattern(NodeType, member) \
void FeatureExtractor::Visit(const NodeType *x) { \
feature_.CurrentLoopBlock().member += 1; \
std::vector<const Expr *> sub_exprs = x->expr_fields(); \
for (const Expr *e : sub_exprs) { \
if (e->defined()) { \
Visit(e); \
} \
} \
}
VisitCountMemberPattern(And, bool_op);
VisitCountMemberPattern(Or, bool_op);
VisitCountMemberPattern(Not, bool_op);
VisitCountMemberPattern(Max, select_op);
VisitCountMemberPattern(Min, select_op);
VisitCountMemberPattern(IfThenElse, select_op);
VisitCountMemberPattern(Select, select_op);
VisitCountMemberPattern(Alloc, mem_alloc);
VisitCountMemberPattern(Free, mem_free);
VisitCountMemberPattern(Load, mem_read);
VisitCountMemberPattern(Store, mem_write);
/* Visit for loops */
void FeatureExtractor::Visit(const For *x) {
feature_.IntoLoopBlock();
LoopBlockFeature &loop_feature = feature_.CurrentLoopBlock();
if (x->min.is_constant() && x->extent.is_constant()) {
loop_feature.loop_length = (x->extent.get_constant() - x->min.get_constant());
} else {
loop_feature.loop_length = -1; // -1 represents unknown
}
if (x->is_parallel()) {
loop_feature.loop_opt_type = ForOptimizeFeatureEnum::kParallel;
loop_feature.len_vthread = loop_feature.loop_length;
} else if (x->is_unrolled()) {
loop_feature.loop_opt_type = ForOptimizeFeatureEnum::kUnroll;
} else if (x->is_vectorized()) {
loop_feature.loop_opt_type = ForOptimizeFeatureEnum::kVectorize;
loop_feature.vectorize_factor = x->vectorize_info().factor;
} else if (x->is_binded()) {
loop_feature.loop_opt_type = ForOptimizeFeatureEnum::kGpuBind;
const BindInfo &bind_info = x->bind_info();
int offset = bind_info.offset;
if (bind_info.for_type == ForType::GPUBlock) {
if (offset == 0) {
loop_feature.len_blockIdx_x = loop_feature.loop_length;
} else if (offset == 1) {
loop_feature.len_blockIdx_y = loop_feature.loop_length;
} else if (offset == 2) {
loop_feature.len_blockIdx_z = loop_feature.loop_length;
}
} else if (bind_info.for_type == ForType::GPUThread) {
if (offset == 0) {
loop_feature.len_threadIdx_x = loop_feature.loop_length;
} else if (offset == 1) {
loop_feature.len_threadIdx_y = loop_feature.loop_length;
} else if (offset == 2) {
loop_feature.len_threadIdx_z = loop_feature.loop_length;
}
}
}
std::vector<const Expr *> sub_exprs = x->expr_fields();
for (const Expr *e : sub_exprs) {
Visit(e);
}
feature_.ExitLoopBlock();
}
void FeatureExtractor::Visit(const PolyFor *x) {
Expr copy = optim::IRCopy(Expr(x));
feature_.IntoLoopBlock();
optim::TransformPolyForToFor(&copy);
ir::For *loop = copy.As<For>();
CHECK(loop != nullptr);
Visit(loop);
feature_.ExitLoopBlock();
}
/* Visit for Reduce and Broadcast */
void FeatureExtractor::Visit(const Reduce *x) {
if (x->type() == common::F32() || x->type() == common::F16() || x->type() == common::F64()) {
switch (x->reduce_type) {
case Reduce::ReduceType::kSum:
feature_.CurrentLoopBlock().float_reduce_sum_or_sub += x->type().lanes();
break;
case Reduce::ReduceType::kSub:
feature_.CurrentLoopBlock().float_reduce_sum_or_sub += x->type().lanes();
break;
case Reduce::ReduceType::kDiv:
feature_.CurrentLoopBlock().float_reduce_div += x->type().lanes();
break;
case Reduce::ReduceType::kMul:
feature_.CurrentLoopBlock().float_reduce_mul += x->type().lanes();
break;
case Reduce::ReduceType::kMax:
feature_.CurrentLoopBlock().float_reduce_max_or_min += x->type().lanes();
break;
case Reduce::ReduceType::kMin:
feature_.CurrentLoopBlock().float_reduce_max_or_min += x->type().lanes();
break;
}
} else {
switch (x->reduce_type) {
case Reduce::ReduceType::kSum:
feature_.CurrentLoopBlock().int_reduce_sum_or_sub += x->type().lanes();
break;
case Reduce::ReduceType::kSub:
feature_.CurrentLoopBlock().int_reduce_sum_or_sub += x->type().lanes();
break;
case Reduce::ReduceType::kDiv:
feature_.CurrentLoopBlock().int_reduce_div += x->type().lanes();
break;
case Reduce::ReduceType::kMul:
feature_.CurrentLoopBlock().int_reduce_mul += x->type().lanes();
break;
case Reduce::ReduceType::kMax:
feature_.CurrentLoopBlock().int_reduce_max_or_min += x->type().lanes();
break;
case Reduce::ReduceType::kMin:
feature_.CurrentLoopBlock().int_reduce_max_or_min += x->type().lanes();
break;
}
}
std::vector<const Expr *> sub_exprs = x->expr_fields();
for (const Expr *e : sub_exprs) {
Visit(e);
}
}
VisitForDtypePattern(Broadcast, broadcast);
/* Visit for IntrinsicOp */
void FeatureExtractor::Visit(const IntrinsicOp *x) {
switch (x->getKind()) {
#define __(op__) \
case IntrinsicKind::k##op__: \
Visit(llvm::dyn_cast<intrinsics::op__>(x)); \
break;
INTRINSIC_KIND_FOR_EACH(__)
#undef __
}
}
VisitDoNothing(intrinsics::BufferGetDataHandle);
VisitDoNothing(intrinsics::BufferGetDataConstHandle);
VisitDoNothing(intrinsics::PodValueToX);
VisitDoNothing(intrinsics::BufferCreate);
VisitDoNothing(intrinsics::GetAddr);
VisitDoNothing(intrinsics::ArgsConstruct);
VisitForDtypePattern(intrinsics::BuiltinIntrin, other_call)
} // namespace auto_schedule
} // namespace cinn
// Copyright (c) 2022 CINN Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/cinn/auto_schedule/cost_model/feature.h"
#include "paddle/cinn/common/target.h"
#include "paddle/cinn/ir/ir.h"
#include "paddle/cinn/ir/ir_base.h"
#include "paddle/cinn/ir/ir_schedule.h"
#include "paddle/cinn/ir/ir_visitor.h"
namespace cinn {
namespace auto_schedule {
class FeatureExtractor : public ir::IRVisitor {
public:
FeatureExtractor();
Feature Extract(const ir::ModuleExpr& mod_expr, const common::Target& target);
void Visit(const Expr* x) override;
#define __(op__) void Visit(const ir::op__* x) override;
NODETY_FORALL(__)
#undef __
#define __(op__) virtual void Visit(const ir::intrinsics::op__* x);
INTRINSIC_KIND_FOR_EACH(__)
#undef __
private:
Feature feature_;
};
} // namespace auto_schedule
} // namespace cinn
// Copyright (c) 2022 CINN Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/cinn/auto_schedule/cost_model/feature_extractor.h"
#include <gtest/gtest.h>
#include <pybind11/embed.h>
#include <cmath>
#include <unordered_set>
#include <vector>
#include "paddle/cinn/common/context.h"
#include "paddle/cinn/ir/ir.h"
#include "paddle/cinn/ir/ir_base.h"
#include "paddle/cinn/ir/ir_schedule.h"
#include "paddle/cinn/lang/builtin.h"
#include "paddle/cinn/lang/compute.h"
#include "paddle/cinn/lang/lower.h"
#include "paddle/cinn/lang/placeholder.h"
#include "paddle/cinn/poly/stage.h"
namespace cinn {
namespace auto_schedule {
TEST(FeatureExtractor, SimpleAssign) {
Context::Global().ResetNameId();
#ifdef CINN_WITH_CUDA
Target target = common::DefaultNVGPUTarget();
#else
Target target = common::DefaultHostTarget();
#endif
ir::Expr M(32);
ir::Expr N(32);
lang::Placeholder<float> A("A", {M, N});
ir::Tensor B = lang::Compute(
{M, N}, [&](Var i, Var j) { return A(i, j); }, "B");
poly::StageMap stages = poly::CreateStages({A, B});
std::vector<ir::LoweredFunc> funcs = lang::LowerVec("SimpleAssign", stages, {A, B}, {}, {}, nullptr, target, true);
ir::Expr ast_expr = funcs[0]->body;
VLOG(6) << "Expr to test: " << ast_expr;
std::vector<Expr> vec_ast{ast_expr};
ir::ModuleExpr mod_expr(vec_ast);
FeatureExtractor extractor;
Feature feature = extractor.Extract(mod_expr, target);
std::vector<float> to_check = feature.ToFixedSizeVector();
ASSERT_EQ(to_check.size(), static_cast<size_t>(LoopBlockFeature::kTotalSize + 1));
VLOG(6) << "Feature data before slog:";
for (size_t i = 0; i < to_check.size(); ++i) {
VLOG(6) << i << " " << (std::pow(2, to_check[i]) - 1);
if (i != 0 && i != 17 && i != 18 && i != 29) {
ASSERT_EQ(to_check[i], 0);
}
}
// target
#ifdef CINN_WITH_CUDA
ASSERT_EQ(to_check[0], 1);
#else
ASSERT_EQ(to_check[0], 0);
#endif
// mem_read
ASSERT_EQ(to_check[17], slog(M.get_constant() * N.get_constant())); // mem_read
// mem_write
ASSERT_EQ(to_check[18], slog(M.get_constant() * N.get_constant())); // mem_write
// non-opt loops, including root block
ASSERT_EQ(to_check[29], slog(3));
}
TEST(FeatureExtractor, MatrixMultiply) {
Context::Global().ResetNameId();
#ifdef CINN_WITH_CUDA
Target target = common::DefaultNVGPUTarget();
#else
Target target = common::DefaultHostTarget();
#endif
ir::Expr M(2);
ir::Expr N(2);
ir::Expr K(4);
lang::Placeholder<float> A("A", {M, K});
lang::Placeholder<float> B("B", {K, N});
ir::Var k(K.as_int32(), "reduce_axis_k");
ir::Tensor C = lang::Compute(
{M, N}, [&](Var i, Var j) { return lang::ReduceSum(A(i, k) * B(k, j), {k}); }, "C");
poly::StageMap stages = poly::CreateStages({C});
std::vector<ir::LoweredFunc> funcs = lang::LowerVec("MatrixMultiply", stages, {C}, {}, {}, nullptr, target, true);
std::vector<Expr> vec_ast{funcs[0]->body};
ir::ModuleExpr mod_expr(vec_ast);
ir::IRSchedule ir_sch(mod_expr);
std::vector<ir::Expr> blocks = ir_sch.GetAllBlocks();
std::vector<ir::Expr> loops = ir_sch.GetLoops(blocks[0]);
ir_sch.Bind(loops.back(), "threadIdx.x");
ir::Expr ast_expr = mod_expr.GetExprs()[0];
VLOG(6) << "Expr to test: " << ast_expr;
FeatureExtractor extractor;
Feature feature = extractor.Extract(mod_expr, target);
std::vector<float> to_check = feature.ToFixedSizeVector();
ASSERT_EQ(to_check.size(), static_cast<size_t>(LoopBlockFeature::kTotalSize + 1));
std::unordered_set<size_t> non_zero_indice = {0, 1, 2, 17, 18, 29, 30, 37};
for (size_t i = 0; i < to_check.size(); ++i) {
VLOG(6) << i << " " << (std::pow(2, to_check[i]) - 1);
if (!non_zero_indice.count(i)) {
ASSERT_EQ(to_check[i], 0);
}
}
// target
#ifdef CINN_WITH_CUDA
ASSERT_EQ(to_check[0], 1);
#else
ASSERT_EQ(to_check[0], 0);
#endif
float out_loop = M.get_constant() * N.get_constant();
float total_loop = out_loop * K.get_constant();
// float_mul
ASSERT_EQ(to_check[1], slog(total_loop));
// float_add_or_sub
ASSERT_EQ(to_check[2], slog(total_loop));
// mem_read
ASSERT_EQ(to_check[17], slog(total_loop * 3));
// mem_write
ASSERT_EQ(to_check[18], slog(total_loop + out_loop));
// non-opt loops, including root block
ASSERT_EQ(to_check[29], slog(3));
// GpuBind loop
ASSERT_EQ(to_check[30], slog(1));
// GpuBind loop
ASSERT_EQ(to_check[37], slog(out_loop));
}
} // namespace auto_schedule
} // namespace cinn
// Copyright (c) 2022 CINN Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/cinn/auto_schedule/cost_model/feature.h"
#include <gtest/gtest.h>
#include <pybind11/embed.h>
namespace cinn {
namespace auto_schedule {
TEST(Feature, Basic) {
// TODO(zhhsplendid): add some basic tests
}
} // namespace auto_schedule
} // namespace cinn
// Copyright (c) 2022 CINN Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/cinn/auto_schedule/cost_model/xgb_cost_model.h"
#include <dirent.h>
#include <glog/logging.h>
#include <pybind11/embed.h>
#include <pybind11/numpy.h>
#include <pybind11/pybind11.h>
#include <pybind11/stl.h>
#include <atomic>
#include <cassert>
#include <cstring>
#include <iostream>
#include <memory>
#include <mutex>
#include <regex>
#include <string>
#include <utility>
#include <vector>
#include "paddle/cinn/common/python_interpreter_guard.h"
namespace cinn {
namespace auto_schedule {
std::atomic<int> XgbCostModel::xgb_cost_model_count_(0);
// Convert 1D vector to py numpy
template <typename Dtype>
pybind11::array VectorToNumpy(const std::vector<Dtype>& vec) {
return pybind11::array(pybind11::cast(vec));
}
// Convert 2D vector to py numpy
template <typename Dtype>
pybind11::array VectorToNumpy(const std::vector<std::vector<Dtype>>& vec) {
if (vec.size() == 0) {
return pybind11::array(pybind11::dtype::of<Dtype>(), {0, 0});
}
std::vector<size_t> shape{vec.size(), vec[0].size()};
pybind11::array ret(pybind11::dtype::of<Dtype>(), shape);
Dtype* py_data = static_cast<Dtype*>(ret.mutable_data());
for (size_t i = 0; i < vec.size(); ++i) {
assert(vec[i].size() == shape[1] && "Sub vectors must have same size in VectorToNumpy");
memcpy(py_data + (shape[1] * i), vec[i].data(), shape[1] * sizeof(Dtype));
}
return ret;
}
// the Pybind default Python interpreter doesn't contain some paths in
// sys.path, so we have to add it.
//
// Note: the Pybind default Python interpreter only uses default Python.
// Something may be wrong when users use virtual Python environment.
void AddDistPkgToPythonSysPath() {
pybind11::module sys_py_mod = pybind11::module::import("sys");
// short version such as "3.7", "3.8", ...
std::string py_short_version = sys_py_mod.attr("version").cast<std::string>().substr(0, 3);
std::string site_pkg_str = "/usr/local/lib/python" + py_short_version + "/dist-packages";
sys_py_mod.attr("path").attr("append")(site_pkg_str);
// TODO(zhhsplendid): warning to users if setuptools hasn't been installed
DIR* site_pkg_dir = opendir(site_pkg_str.c_str());
if (site_pkg_dir != nullptr) {
std::regex setuptool_regex("setuptools-.*-py" + py_short_version + "\\.egg");
struct dirent* entry = nullptr;
while ((entry = readdir(site_pkg_dir)) != nullptr) {
if (std::regex_match(entry->d_name, setuptool_regex)) {
sys_py_mod.attr("path").attr("append")(site_pkg_str + "/" + entry->d_name);
}
}
closedir(site_pkg_dir);
}
}
XgbCostModel::XgbCostModel() {
common::PythonInterpreterGuard::Guard();
int previous = xgb_cost_model_count_.fetch_add(1);
if (previous == 0) {
AddDistPkgToPythonSysPath();
}
xgb_module_ = pybind11::module::import("xgboost");
xgb_booster_ = xgb_module_.attr("Booster")();
}
void XgbCostModel::Train(const std::vector<std::vector<float>>& samples, const std::vector<float>& labels) {
update_samples_ = samples;
update_labels_ = labels;
pybind11::array np_samples = VectorToNumpy<float>(samples);
pybind11::array np_labels = VectorToNumpy<float>(labels);
pybind11::object dmatrix = xgb_module_.attr("DMatrix")(np_samples, np_labels);
xgb_booster_ = xgb_module_.attr("train")(pybind11::dict(), dmatrix, pybind11::int_(kTrainRound_));
}
std::vector<float> XgbCostModel::Predict(const std::vector<std::vector<float>>& samples) const {
pybind11::array np_samples = VectorToNumpy<float>(samples);
pybind11::object dmatrix = xgb_module_.attr("DMatrix")(np_samples);
pybind11::array py_result = xgb_booster_.attr("predict")(dmatrix);
return py_result.cast<std::vector<float>>();
}
void XgbCostModel::Update(const std::vector<std::vector<float>>& samples, const std::vector<float>& labels) {
update_samples_.insert(update_samples_.end(), samples.begin(), samples.end());
update_labels_.insert(update_labels_.end(), labels.begin(), labels.end());
pybind11::array np_samples = VectorToNumpy<float>(update_samples_);
pybind11::array np_labels = VectorToNumpy<float>(update_labels_);
pybind11::object dmatrix = xgb_module_.attr("DMatrix")(np_samples, np_labels);
xgb_booster_ = xgb_module_.attr("train")(pybind11::dict(), dmatrix, pybind11::int_(kTrainRound_));
}
void XgbCostModel::Save(const std::string& path) { xgb_booster_.attr("save_model")(pybind11::str(path)); }
void XgbCostModel::Load(const std::string& path) { xgb_booster_.attr("load_model")(pybind11::str(path)); }
} // namespace auto_schedule
} // namespace cinn
// Copyright (c) 2022 CINN Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <pybind11/embed.h>
#include <atomic>
#include <memory>
#include <mutex>
#include <string>
#include <vector>
#include "paddle/cinn/common/cost_model.h"
namespace cinn {
namespace auto_schedule {
/**
* A C++ cost model which calls Python xgboost via pybind
*
* Note: this class handles Python interpreter life time in class.
* If you have to call other Python functions out of this class so that meet
* life time conflict, you can check cinn::common::PythonInterpreterGuard
*
* For cinn::common::PythonInterpreterGuard, see:
* cinn/common/python_interpreter_guard.h .cc
*
* For pybind interpreter lifetime management, see:
*
* https://pybind11.readthedocs.io/en/stable/advanced/embedding.html#interpreter-lifetime
* https://pybind11.readthedocs.io/en/stable/reference.html#_CPPv422initialize_interpreterbiPPCKcb
*/
class XgbCostModel : public CostModel {
public:
XgbCostModel();
~XgbCostModel() = default;
void Train(const std::vector<std::vector<float>>& samples, const std::vector<float>& labels) override;
std::vector<float> Predict(const std::vector<std::vector<float>>& samples) const override;
void Update(const std::vector<std::vector<float>>& samples, const std::vector<float>& labels) override;
void Save(const std::string& path) override;
void Load(const std::string& path) override;
private:
// Python xgboost module
pybind11::module xgb_module_;
// Object points to Python xgb.Booster()
pybind11::object xgb_booster_;
// atomic int to handle python interpreter lifetime and package dependency
static std::atomic<int> xgb_cost_model_count_;
// Default train rounds
static constexpr int kTrainRound_ = 10;
std::vector<std::vector<float>> update_samples_;
std::vector<float> update_labels_;
};
} // namespace auto_schedule
} // namespace cinn
// Copyright (c) 2022 CINN Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/cinn/auto_schedule/cost_model/xgb_cost_model.h"
#include <glog/logging.h>
#include <gtest/gtest.h>
#include <pybind11/embed.h>
#include <cstdio>
#include <cstdlib>
#include <ctime>
#include <vector>
namespace cinn {
namespace auto_schedule {
TEST(CostModel, Basic) {
XgbCostModel cost_model;
srand(time(NULL));
int batch_size = 16;
int feature_size = 8;
std::vector<float> labels(batch_size, 1.0);
std::vector<std::vector<float>> samples(batch_size, std::vector<float>(feature_size));
for (int i = 0; i < batch_size; ++i) {
for (int j = 0; j < feature_size; ++j) {
samples[i][j] = rand() % 10;
}
}
cost_model.Train(samples, labels);
std::vector<float> pred = cost_model.Predict(samples);
std::string path = "./test_cost_model.cpp_save_model";
cost_model.Save(path);
XgbCostModel load_cost_model;
load_cost_model.Load(path);
std::vector<float> load_pred = cost_model.Predict(samples);
ASSERT_EQ(pred.size(), load_pred.size());
for (size_t i = 0; i < pred.size(); ++i) {
ASSERT_FLOAT_EQ(pred[i], load_pred[i]);
VLOG(6) << "pred[" << i << "] = " << pred[i];
}
std::remove(path.c_str());
cost_model.Update(samples, labels);
pred = cost_model.Predict(samples);
for (size_t i = 0; i < pred.size(); ++i) {
VLOG(6) << "pred[" << i << "] = " << pred[i];
}
}
} // namespace auto_schedule
} // namespace cinn
core_gather_headers()
gather_srcs(cinnapi_src SRCS database.cc jsonfile_database.cc)
cinn_cc_test(test_database SRCS database_test.cc DEPS cinncore)
cinn_cc_test(test_jsonfile_database SRCS jsonfile_database_test.cc DEPS cinncore)
// Copyright (c) 2022 CINN Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/cinn/auto_schedule/database/database.h"
#include <google/protobuf/message.h>
#include <google/protobuf/text_format.h>
#include <google/protobuf/util/json_util.h>
#include "paddle/cinn/auto_schedule/database/jsonfile_database.h"
#include "paddle/cinn/auto_schedule/task/task_registry.h"
#include "paddle/cinn/ir/ir_schedule.h"
#include "paddle/cinn/ir/schedule_desc.h"
namespace cinn {
namespace auto_schedule {
bool TuningRecord::Compare::operator()(const TuningRecord& lhs, const TuningRecord& rhs) const {
return lhs.execution_cost < rhs.execution_cost;
}
proto::TuningRecord TuningRecord::ToProto() const {
proto::TuningRecord record_proto;
record_proto.set_task_key(task_key);
record_proto.set_execution_cost(execution_cost);
record_proto.set_predicted_cost(predicted_cost);
record_proto.mutable_trace()->CopyFrom(trace);
return record_proto;
}
Database::Database(int capacity_per_task) : capacity_per_task_(capacity_per_task) {
CHECK_GT(capacity_per_task_, 0) << "capacity_per_task_ should be greater than 0";
}
std::unique_ptr<Database> Database::Make(const DatabaseConfig& config) {
if (config.type == DatabaseType::kMemory) {
return std::make_unique<Database>(config.capacity_per_task);
} else if (config.type == DatabaseType::kJSONFile) {
return std::make_unique<JSONFileDatabase>(config.capacity_per_task, config.record_file_path, true);
}
LOG(FATAL) << "Unimplemented database type.";
return nullptr;
}
void Database::Insert(const TuningRecord& record) {
auto& records = key2record_[record.task_key];
records.emplace(record);
if (records.size() > capacity_per_task_) {
records.erase(std::prev(records.end()));
}
}
bool Database::AddRecord(const TuningRecord& record) {
CHECK(!record.task_key.empty()) << "task_key of TuningRecord can't be empty";
Insert(record);
return Commit(record);
}
std::vector<TuningRecord> Database::LookUp(const std::string& task_key) {
auto fit = key2record_.find(task_key);
if (fit == key2record_.end()) {
return {};
}
std::vector<TuningRecord> results;
results.reserve(fit->second.size());
results.assign(fit->second.begin(), fit->second.end());
return results;
}
std::vector<TuningRecord> Database::GetTopK(const std::string& task_key, int k) {
auto fit = key2record_.find(task_key);
if (fit == key2record_.end() || k <= 0) {
return {};
}
if (k > capacity_per_task_) {
LOG(WARNING) << "Top k=" << k << " is greater than the capacity, will adjust k=" << capacity_per_task_;
k = capacity_per_task_;
}
std::vector<TuningRecord> results;
results.reserve(k);
for (const TuningRecord& record : fit->second) {
results.emplace_back(record);
if (results.size() == k) {
break;
}
}
return results;
}
size_t Database::Size() {
auto res =
std::accumulate(key2record_.begin(), key2record_.end(), size_t(0), [](size_t res, const auto& kv) -> size_t {
return std::move(res) + kv.second.size();
});
return res;
}
size_t Database::Count(const std::string& task_key) {
auto fit = key2record_.find(task_key);
if (fit == key2record_.end()) {
return 0;
}
return fit->second.size();
}
} // namespace auto_schedule
} // namespace cinn
// Copyright (c) 2022 CINN Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <unordered_map>
#include "paddle/cinn/auto_schedule/auto_schedule.pb.h"
#include "paddle/cinn/auto_schedule/search_space/search_state.h"
#include "paddle/cinn/ir/schedule_desc.pb.h"
namespace cinn {
namespace auto_schedule {
// Record related data about tuning process of a measure candidate
struct TuningRecord {
// the unique key to identify a task
std::string task_key;
// the predicted cost of CostModel
float predicted_cost; // unit: us
// the ScheduleDesc of this tuning process
ir::proto::ScheduleDesc trace;
// the cost time of the candidate executed during measure
double execution_cost; // unit: us
TuningRecord() = default;
TuningRecord(const proto::TuningRecord& record)
: task_key(record.task_key()),
predicted_cost(record.predicted_cost()),
trace(record.trace()),
execution_cost(record.execution_cost()) {}
TuningRecord(const std::string& task_key, const SearchState& state, double execution_cost)
: task_key(task_key),
predicted_cost(state->predicted_cost),
trace(state->ir_schedule.GetTraceDesc().ToProto()),
execution_cost(execution_cost) {}
// convert to proto object
proto::TuningRecord ToProto() const;
// a binary compare function that denotes when the left
// will be sorted in the front of the right
struct Compare {
bool operator()(const TuningRecord& lhs, const TuningRecord& rhs) const;
};
};
enum class DatabaseType : int { kMemory, kJSONFile };
struct DatabaseConfig {
DatabaseType type = DatabaseType::kMemory;
int capacity_per_task = 2;
std::string record_file_path = "/tmp/tuning_record.json";
};
// A database supports insert or lookup historial tuning result with specified traits.
// It can be implemented with a concrete storage to save/load underlying data,
// such as memory, file, database server and so on, this base class can be regarded as
// one using memory as its underlying storage medium.
class Database {
public:
explicit Database(int capacity_per_task);
~Database() = default;
// Create a Database with the specific config
static std::unique_ptr<Database> Make(const DatabaseConfig& config);
// add a record into the database
bool AddRecord(const TuningRecord& record);
// return all records whose task_keys are equal to the specified key
std::vector<TuningRecord> LookUp(const std::string& task_key);
// return the states of the top k in sorted candidates
std::vector<TuningRecord> GetTopK(const std::string& task_key, int k);
// return the total number of stored candidates
size_t Size();
// return the number of stored candidates with specified key
size_t Count(const std::string& task_key);
protected:
// commit the newly added record into underlying storage
virtual bool Commit(const TuningRecord& record) { return true; }
// insert a newly added record into memory storage
void Insert(const TuningRecord& record);
// map task_key to its records
std::unordered_map<std::string, std::multiset<TuningRecord, TuningRecord::Compare>> key2record_;
// the max number of candidates stored
const int capacity_per_task_;
};
} // namespace auto_schedule
} // namespace cinn
// Copyright (c) 2022 CINN Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/cinn/auto_schedule/database/database.h"
#include <gtest/gtest.h>
#include <vector>
#include "paddle/cinn/auto_schedule/auto_schedule.pb.h"
#include "paddle/cinn/auto_schedule/search_space/search_state.h"
#include "paddle/cinn/ir/ir_schedule.h"
namespace cinn {
namespace auto_schedule {
class TestDatabase : public ::testing::Test {
public:
TestDatabase() : test_db(2) {
auto state = SearchState(ir::IRSchedule());
test_db.AddRecord(TuningRecord("k1", state, 1.0));
test_db.AddRecord(TuningRecord("k2", state, 2.0));
test_db.AddRecord(TuningRecord("k2", state, 3.0));
test_db.AddRecord(TuningRecord("k3", state, 3.0));
test_db.AddRecord(TuningRecord("k3", state, 4.0));
test_db.AddRecord(TuningRecord("k3", state, 5.0));
test_db.AddRecord(TuningRecord("k4", state, 4.0));
}
void SetUp() override {}
Database test_db;
};
TEST_F(TestDatabase, Basic) {
ASSERT_EQ(test_db.Size(), 6);
auto records = test_db.LookUp("k3");
// check the max number of stored candidates will
// be restricted to capacity_per_task
ASSERT_EQ(test_db.Count("k3"), 2);
ASSERT_EQ(records.size(), 2);
EXPECT_EQ(records[0].execution_cost, 3.0);
EXPECT_EQ(records[1].execution_cost, 4.0);
}
TEST_F(TestDatabase, GetTopK) {
ASSERT_TRUE(test_db.GetTopK("k5", 2).empty());
ASSERT_EQ(test_db.GetTopK("k4", 3).size(), 1);
test_db.AddRecord(TuningRecord("k4", SearchState(ir::IRSchedule(), 1.2), 2.0));
test_db.AddRecord(TuningRecord("k4", SearchState(ir::IRSchedule(), 1.0), 3.0));
auto records = test_db.GetTopK("k4", 3);
ASSERT_EQ(records.size(), 2);
EXPECT_FLOAT_EQ(records[0].predicted_cost, 1.2);
EXPECT_FLOAT_EQ(records[1].predicted_cost, 1.0);
}
} // namespace auto_schedule
} // namespace cinn
// Copyright (c) 2022 CINN Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/cinn/auto_schedule/database/jsonfile_database.h"
#include <google/protobuf/message.h>
#include <google/protobuf/text_format.h>
#include <google/protobuf/util/json_util.h>
#include <fstream>
#include "paddle/cinn/auto_schedule/auto_schedule.pb.h"
#include "paddle/cinn/auto_schedule/task/task_registry.h"
#include "paddle/cinn/utils/multi_threading.h"
namespace cinn {
namespace auto_schedule {
// append a line to file
void AppendLineToFile(const std::string& file_path, const std::string& line) {
std::ofstream os(file_path, std::ofstream::app);
CHECK(os.good()) << "Cannot open the file to write: " << file_path;
os << line << std::endl;
}
// read lines from a json file
std::vector<std::string> ReadLinesFromFile(const std::string& file_path, bool allow_new_file) {
std::ifstream is(file_path);
if (is.good()) {
std::vector<std::string> json_strs;
for (std::string str; std::getline(is, str);) {
json_strs.push_back(str);
}
return json_strs;
}
CHECK(allow_new_file) << "File doesn't exist: " << file_path;
std::ofstream os(file_path);
CHECK(os.good()) << "Cannot create new file: " << file_path;
return {};
}
JSONFileDatabase::JSONFileDatabase(int capacity_per_task, const std::string& record_file_path, bool allow_new_file)
: Database(capacity_per_task), record_file_path_(record_file_path) {
VLOG(3) << "Auto schedule will save/load tuning records on file:" << record_file_path;
auto json_lines = ReadLinesFromFile(record_file_path_, allow_new_file);
std::vector<cinn::auto_schedule::proto::TuningRecord> all_records_proto(json_lines.size());
// convert JSON string to proto object
auto worker_fn = [this, &json_lines, &all_records_proto](int index) {
cinn::auto_schedule::proto::TuningRecord record_proto;
auto status = google::protobuf::util::JsonStringToMessage(json_lines[index], &record_proto);
CHECK(status.ok()) << "Failed to parse JSON: " << json_lines[index];
all_records_proto[index].Swap(&record_proto);
};
utils::parallel_run(worker_fn, utils::SequenceDispatcher(0, json_lines.size()), -1);
InitialTaskRegistry* task_registry = InitialTaskRegistry::Global();
for (const auto& record_proto : all_records_proto) {
std::string task_key = record_proto.task_key();
if (task_registry->Has(task_key)) {
VLOG(4) << "Add a measured TuningRecord with task_key=" << task_key;
Insert(TuningRecord(record_proto));
}
}
}
// convert a TuningRecord object to string in JSON format
std::string JSONFileDatabase::RecordToJSON(const TuningRecord& record) {
proto::TuningRecord record_proto = record.ToProto();
std::string json_string;
auto status = google::protobuf::util::MessageToJsonString(record_proto, &json_string);
CHECK(status.ok()) << "Failed to serialize record to JSON, task key = " << record.task_key;
VLOG(4) << "json_string = \n" << json_string;
return json_string;
}
bool JSONFileDatabase::Commit(const TuningRecord& record) {
std::string json_string = RecordToJSON(record);
AppendLineToFile(record_file_path_, json_string);
return true;
}
} // namespace auto_schedule
} // namespace cinn
// Copyright (c) 2022 CINN Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/cinn/auto_schedule/database/database.h"
namespace cinn {
namespace auto_schedule {
// JSONFileDatabase is a database implemented by JSON file to save/load underlying data.
class JSONFileDatabase : public Database {
public:
/*!
* \brief Build a JSONFileDatabase object from a json file.
* \param capacity_per_task The max number of candidates stored.
* \param record_file_path The path of the json file.
* \param allow_new_file Whether to create new file when the given path is not found.
*/
JSONFileDatabase(int capacity_per_task, const std::string& record_file_path, bool allow_new_file);
~JSONFileDatabase() = default;
// convert a TuningRecord object to string in JSON format
std::string RecordToJSON(const TuningRecord& record);
protected:
// commit the newly added record into json file
bool Commit(const TuningRecord& record) override;
// the name of the json file to save tuning records.
std::string record_file_path_;
};
// append a line to file
void AppendLineToFile(const std::string& file_path, const std::string& line);
// read lines from a json file
std::vector<std::string> ReadLinesFromFile(const std::string& file_path, bool allow_new_file = true);
} // namespace auto_schedule
} // namespace cinn
// Copyright (c) 2022 CINN Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/cinn/auto_schedule/database/jsonfile_database.h"
#include <google/protobuf/util/message_differencer.h>
#include <gtest/gtest.h>
#include <fstream>
#include <vector>
#include "paddle/cinn/auto_schedule/search_space/search_state.h"
#include "paddle/cinn/auto_schedule/task/task_registry.h"
#include "paddle/cinn/cinn.h"
#include "paddle/cinn/ir/ir_printer.h"
#include "paddle/cinn/ir/ir_schedule.h"
#include "paddle/cinn/optim/ir_copy.h"
namespace cinn {
namespace auto_schedule {
// Return lowerd ir AST for example functions used in this test
std::vector<ir::LoweredFunc> LowerCompute(const std::vector<int>& shape, const Target& target) {
CHECK(shape.size() == 2) << "shape should be 2";
std::vector<Expr> domain;
for (auto i = 0; i < shape.size(); ++i) {
domain.emplace_back(shape[i]);
}
Placeholder<float> A("A", domain);
ir::Tensor B, C;
B = Compute(
domain, [&A](Var i, Var j) { return A(i, j); }, "B");
C = Compute(
domain, [&B](Var i, Var j) { return B(i, j); }, "C");
return cinn::lang::LowerVec("test_func", CreateStages({A, B}), {A, B}, {}, {}, nullptr, target, true);
}
// Create a new IRSchedule with copied ir::LoweredFunc AST
ir::IRSchedule MakeIRSchedule(const std::vector<ir::LoweredFunc>& lowered_funcs, const std::string& task_key) {
std::vector<Expr> exprs;
for (auto&& func : lowered_funcs) {
exprs.emplace_back(optim::IRCopy(func->body));
}
InitialTaskRegistry* task_registry = InitialTaskRegistry::Global();
task_registry->Regist(task_key, ir::ModuleExpr(exprs));
return ir::IRSchedule(ir::ModuleExpr(exprs));
}
class TestJSONFileDatabase : public ::testing::Test {
public:
TestJSONFileDatabase() : record_file_path("/tmp/test_record.json"), test_db(2, record_file_path, true) {}
void SetUp() override { lowered_funcs = LowerCompute({32, 32}, target); }
void TearDown() override {
auto isFileExists = [](const std::string& file_path) -> bool {
std::ifstream f(file_path.c_str());
return f.good();
};
if (isFileExists(record_file_path)) {
if (remove(record_file_path.c_str()) == 0) {
LOG(INFO) << "Successfully deleted file: " << record_file_path;
} else {
LOG(INFO) << "failed to delete file: " << record_file_path;
}
} else {
LOG(INFO) << "file: " << record_file_path << "does not exist.";
}
}
std::string record_file_path;
JSONFileDatabase test_db;
std::vector<ir::LoweredFunc> lowered_funcs;
Target target = common::DefaultHostTarget();
};
TEST_F(TestJSONFileDatabase, Serialize) {
ir::IRSchedule ir_sch = MakeIRSchedule(lowered_funcs, "test");
auto fused = ir_sch.Fuse("B", {0, 1});
VLOG(3) << "after Fuse, Expr: " << fused;
TuningRecord record1("test", SearchState(std::move(ir_sch), 2.0), 1.0);
std::string str = test_db.RecordToJSON(record1);
VLOG(3) << "RecordToJSON: " << str;
// Because the serialization of protobuf does not guarantee the order, we give all possible results.
std::string case1 =
"{\"taskKey\":\"test\",\"executionCost\":1,\"predictedCost\":2,\"trace\":{\"steps\":[{\"type\":\"FuseWithName\","
"\"outputs\":[\"e0\"],\"attrs\":[{\"name\":\"loops_index\",\"dtype\":\"INTS\",\"ints\":[0,1]},{\"name\":\"block_"
"name\",\"dtype\":\"STRING\",\"s\":\"B\"}]}]}}";
std::string case2 =
"{\"taskKey\":\"test\",\"executionCost\":1,\"predictedCost\":2,\"trace\":{\"steps\":[{\"type\":\"FuseWithName\","
"\"outputs\":[\"e0\"],\"attrs\":[{\"name\":\"block_name\",\"dtype\":\"STRING\",\"s\":\"B\"},{\"name\":\"loops_"
"index\",\"dtype\":\"INTS\",\"ints\":[0,1]}]}]}}";
EXPECT_EQ(true, str == case1 || str == case2);
}
TEST_F(TestJSONFileDatabase, SaveLoad) {
ir::IRSchedule ir_sch1 = MakeIRSchedule(lowered_funcs, "k1");
auto fused1 = ir_sch1.Fuse("B", {0, 1});
ir::IRSchedule ir_sch2 = MakeIRSchedule(lowered_funcs, "k2");
test_db.AddRecord(TuningRecord("k1", SearchState(std::move(ir_sch1), 1.5), 1.0));
test_db.AddRecord(TuningRecord("k2", SearchState(std::move(ir_sch2), 3.5), 3.0));
std::vector<std::string> strs = ReadLinesFromFile(record_file_path);
ASSERT_EQ(strs.size(), 2);
// Because the serialization of protobuf does not guarantee the order, we give all possible results.
std::string case1 =
"{\"taskKey\":\"k1\",\"executionCost\":1,\"predictedCost\":1.5,\"trace\":{\"steps\":[{\"type\":\"FuseWithName\","
"\"outputs\":[\"e0\"],\"attrs\":[{\"name\":\"loops_index\",\"dtype\":\"INTS\",\"ints\":[0,1]},{\"name\":\"block_"
"name\",\"dtype\":\"STRING\",\"s\":\"B\"}]}]}}";
std::string case2 =
"{\"taskKey\":\"k1\",\"executionCost\":1,\"predictedCost\":1.5,\"trace\":{\"steps\":[{\"type\":\"FuseWithName\","
"\"outputs\":[\"e0\"],\"attrs\":[{\"name\":\"block_name\",\"dtype\":\"STRING\",\"s\":\"B\"},{\"name\":\"loops_"
"index\",\"dtype\":\"INTS\",\"ints\":[0,1]}]}]}}";
EXPECT_EQ(true, strs[0] == case1 || strs[0] == case2);
EXPECT_EQ(strs[1], "{\"taskKey\":\"k2\",\"executionCost\":3,\"predictedCost\":3.5,\"trace\":{}}");
}
TEST_F(TestJSONFileDatabase, Basic) {
test_db.AddRecord(TuningRecord("k1", SearchState(MakeIRSchedule(lowered_funcs, "k1"), 1.0), 1.0));
test_db.AddRecord(TuningRecord("k2", SearchState(MakeIRSchedule(lowered_funcs, "k2"), 1.0), 2.0));
test_db.AddRecord(TuningRecord("k2", SearchState(MakeIRSchedule(lowered_funcs, "k2"), 1.0), 3.0));
test_db.AddRecord(TuningRecord("k3", SearchState(MakeIRSchedule(lowered_funcs, "k3"), 8.0), 3.0));
test_db.AddRecord(TuningRecord("k3", SearchState(MakeIRSchedule(lowered_funcs, "k3"), 7.0), 4.0));
test_db.AddRecord(TuningRecord("k3", SearchState(MakeIRSchedule(lowered_funcs, "k3"), 6.0), 5.0));
test_db.AddRecord(TuningRecord("k4", SearchState(MakeIRSchedule(lowered_funcs, "k4"), 1.0), 4.0));
ASSERT_EQ(test_db.Size(), 6);
auto records = test_db.LookUp("k3");
// check the max number of stored candidates will
// be restricted to capacity_per_task
ASSERT_EQ(test_db.Count("k3"), 2);
ASSERT_EQ(records.size(), 2);
EXPECT_EQ(records[0].execution_cost, 3.0);
EXPECT_EQ(records[1].execution_cost, 4.0);
}
TEST_F(TestJSONFileDatabase, GetTopK) {
test_db.AddRecord(TuningRecord("k1", SearchState(MakeIRSchedule(lowered_funcs, "k1"), 1.0), 1.0));
test_db.AddRecord(TuningRecord("k2", SearchState(MakeIRSchedule(lowered_funcs, "k2"), 1.0), 2.0));
test_db.AddRecord(TuningRecord("k2", SearchState(MakeIRSchedule(lowered_funcs, "k2"), 1.0), 3.0));
test_db.AddRecord(TuningRecord("k3", SearchState(MakeIRSchedule(lowered_funcs, "k3"), 1.0), 3.0));
test_db.AddRecord(TuningRecord("k3", SearchState(MakeIRSchedule(lowered_funcs, "k3"), 1.0), 4.0));
test_db.AddRecord(TuningRecord("k3", SearchState(MakeIRSchedule(lowered_funcs, "k3"), 1.0), 5.0));
test_db.AddRecord(TuningRecord("k4", SearchState(MakeIRSchedule(lowered_funcs, "k4"), 2.0), 4.0));
test_db.AddRecord(TuningRecord("k4", SearchState(MakeIRSchedule(lowered_funcs, "k4"), 1.2), 2.0));
test_db.AddRecord(TuningRecord("k4", SearchState(MakeIRSchedule(lowered_funcs, "k4"), 1.0), 3.0));
auto records = test_db.GetTopK("k4", 3);
ASSERT_EQ(records.size(), 2);
EXPECT_FLOAT_EQ(records[0].predicted_cost, 1.2);
EXPECT_FLOAT_EQ(records[1].predicted_cost, 1.0);
}
TEST_F(TestJSONFileDatabase, Reload) {
ir::IRSchedule ir_sch = MakeIRSchedule(lowered_funcs, "k1");
auto fused = ir_sch.Fuse("B", {0, 1});
test_db.AddRecord(TuningRecord("k1", SearchState(std::move(ir_sch), 1.0), 1.0));
test_db.AddRecord(TuningRecord("k2", SearchState(MakeIRSchedule(lowered_funcs, "k2"), 1.0), 2.0));
auto records = test_db.LookUp("k1");
ASSERT_EQ(records.size(), 1);
JSONFileDatabase new_db(2, record_file_path, false);
ASSERT_EQ(new_db.Size(), 2);
auto loaded_records = new_db.LookUp("k1");
ASSERT_EQ(records.size(), loaded_records.size());
EXPECT_EQ(records[0].task_key, loaded_records[0].task_key);
EXPECT_EQ(records[0].execution_cost, loaded_records[0].execution_cost);
EXPECT_EQ(records[0].predicted_cost, loaded_records[0].predicted_cost);
// check the equality of trace info between original TuningRecord and the loaded TuningRecord
const auto& lhs_trace = records[0].trace;
const auto& rhs_trace = loaded_records[0].trace;
google::protobuf::util::MessageDifferencer dif;
static const google::protobuf::Descriptor* descriptor = cinn::ir::proto::ScheduleDesc_Step::descriptor();
dif.TreatAsSet(descriptor->FindFieldByName("attrs"));
EXPECT_TRUE(dif.Compare(lhs_trace, rhs_trace));
// check the equality of module expr between original TuningRecord
// and the loaded TuningRecord by replaying with tracing ScheduleDesc
ir::IRSchedule lhs_sch = MakeIRSchedule(lowered_funcs, "k1");
ir::IRSchedule rhs_sch = MakeIRSchedule(lowered_funcs, "k1");
ir::ScheduleDesc::ReplayWithProto(lhs_trace, &lhs_sch);
ir::ScheduleDesc::ReplayWithProto(rhs_trace, &rhs_sch);
auto lhs_exprs = lhs_sch.GetModule().GetExprs();
auto rhs_exprs = rhs_sch.GetModule().GetExprs();
ASSERT_EQ(lhs_exprs.size(), rhs_exprs.size());
for (auto i = 0; i < lhs_exprs.size(); ++i) {
std::string lhs = utils::GetStreamCnt(lhs_exprs.at(i));
std::string rhs = utils::GetStreamCnt(rhs_exprs.at(i));
size_t remove_prefix_len = 28;
ASSERT_EQ(lhs.erase(0, remove_prefix_len), rhs.erase(0, remove_prefix_len));
}
}
} // namespace auto_schedule
} // namespace cinn
core_gather_headers()
gather_srcs(cinnapi_src SRCS schedule_measurer.cc simple_builder.cc simple_runner.cc)
cinn_cc_test(test_simple_runner SRCS simple_runner_test.cc DEPS cinncore)
cinn_cc_test(test_measurer SRCS measurer_test.cc DEPS cinncore)
// Copyright (c) 2022 CINN Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <map>
#include <memory>
#include <string>
#include <vector>
#include "paddle/cinn/auto_schedule/task/tune_task.h"
#include "paddle/cinn/hlir/framework/graph_compiler.h"
#include "paddle/cinn/hlir/framework/instruction.h"
#include "paddle/cinn/runtime/cinn_runtime.h"
namespace cinn {
namespace auto_schedule {
// The input to a measurer
struct MeasureInput {
// The task object related to this measurement.
const TuneTask* task;
// lowered Exprs to be measured
std::vector<ir::LoweredFunc> lowered_funcs;
// It is used to pass for some arguments that maybe
// specified value in advance. default is null
const std::map<std::string, cinn_pod_value_t>* execution_args = nullptr;
};
// The result of a measurement
struct MeasureResult {
// The time cost of execution in average of running
// with a specific repeated times.
double execution_cost = 0.0; // unit: us
// The time cost of the whole measurement process including
// building and running
double elapsed_time = 0.0; // unit: us
// used to return detail messages once an error occurred during measurement,
// empty if nothing goes wrong
std::string error_msg;
};
// The result of building with input schedule
struct BuildResult {
// The scope that owns detail compilation infos of parameters in the runtime program
const hlir::framework::Scope* compiled_scope;
// The executable program
std::unique_ptr<hlir::framework::Program> runtime_program;
};
// This interface defines how to generate executable objects
// with input schedule. A builder should not contain stateful data
// related to any task so it can be called parallelly among multiple
// processes of task tuning.
class ScheduleBuilder {
public:
virtual BuildResult Build(const MeasureInput& input) = 0;
};
// This interface defines how to run the built result. Like above ScheduleBuilder,
// a runner shoule be implemented with not bound to a specific task.
class ScheduleRunner {
public:
virtual MeasureResult Run(const MeasureInput& input, const BuildResult& build_result) = 0;
};
} // namespace auto_schedule
} // namespace cinn
// Copyright (c) 2022 CINN Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gtest/gtest.h>
#include <memory>
#include "paddle/cinn/auto_schedule/measure/schedule_measurer.h"
#include "paddle/cinn/auto_schedule/measure/simple_builder.h"
#include "paddle/cinn/auto_schedule/measure/simple_runner.h"
#include "paddle/cinn/auto_schedule/task/task_creator.h"
#include "paddle/cinn/common/target.h"
#include "paddle/cinn/frontend/net_builder.h"
#include "paddle/cinn/frontend/optimize.h"
#include "paddle/cinn/frontend/syntax.h"
#include "paddle/cinn/hlir/framework/graph_compiler.h"
#include "paddle/cinn/runtime/flags.h"
DECLARE_bool(cinn_ir_schedule);
namespace cinn {
namespace auto_schedule {
using ::cinn::hlir::framework::BuildScope;
using ::cinn::hlir::framework::Graph;
using ::cinn::hlir::framework::GraphCompiler;
frontend::Program CreateAddReluProgram() {
constexpr int M = 32;
constexpr int N = 24;
frontend::NetBuilder builder("test");
auto a = builder.CreateInput(Float(32), {M, N}, "A");
auto b = builder.CreateInput(Float(32), {M, N}, "B");
auto c = builder.Add(a, b);
auto d = builder.Relu(c);
return builder.Build();
}
class TestMeasurer : public ::testing::Test {
public:
std::unique_ptr<GraphCompiler> graph_compiler;
std::vector<TuneTask> tasks;
std::vector<MeasureInput> inputs;
void SetUp() override {
FLAGS_cinn_ir_schedule = true;
#ifdef CINN_WITH_CUDA
Target target = common::DefaultNVGPUTarget();
#else
Target target = common::DefaultHostTarget();
#endif
std::unordered_set<std::string> fetch_ids;
auto program = CreateAddReluProgram();
auto graph = cinn::frontend::Optimize(&program, fetch_ids, target);
auto scope = BuildScope(target, graph);
graph_compiler = std::make_unique<GraphCompiler>(target, scope, graph);
TaskCreator task_creator;
tasks = task_creator.CreateTuneTaskOpLevel(graph.get());
const auto& dtype_dict = graph->GetAttrs<absl::flat_hash_map<std::string, common::Type>>("inferdtype");
const auto& shape_dict = graph->GetAttrs<absl::flat_hash_map<std::string, hlir::framework::shape_t>>("infershape");
auto op_lowerer = std::make_unique<hlir::framework::OpLowerer>(dtype_dict, shape_dict, target);
inputs.reserve(tasks.size());
for (int i = 0; i < tasks.size(); ++i) {
auto* task = &tasks[i];
task->Initialize(shape_dict, dtype_dict, op_lowerer.get());
MeasureInput input;
input.task = task;
input.lowered_funcs = task->lowered_funcs;
inputs.emplace_back(input);
}
}
};
class ThrowExceptionBuilder : public ScheduleBuilder {
struct Exception : public std::exception {
const char* what() const throw() { return "BuildError"; }
};
BuildResult Build(const MeasureInput& input) override { throw Exception(); }
};
class ThrowExceptionRunner : public ScheduleRunner {
struct Exception : public std::exception {
const char* what() const throw() { return "RunError"; }
};
MeasureResult Run(const MeasureInput& input, const BuildResult& build_result) override { throw Exception(); }
};
TEST_F(TestMeasurer, Basic) {
auto builder = std::make_unique<SimpleBuilder>(graph_compiler.get());
auto runner = std::make_unique<SimpleRunner>(1);
auto measurer = std::make_unique<ScheduleMeasurer>(builder.get(), runner.get());
std::vector<MeasureResult> results = measurer->Measure(inputs);
ASSERT_EQ(inputs.size(), results.size());
}
TEST_F(TestMeasurer, CatchException) {
auto builder = std::make_unique<SimpleBuilder>(graph_compiler.get());
auto runner = std::make_unique<SimpleRunner>(1);
auto throw_builder = std::make_unique<ThrowExceptionBuilder>();
auto throw_runner = std::make_unique<ThrowExceptionRunner>();
auto measurer_with_build_error = std::make_unique<ScheduleMeasurer>(throw_builder.get(), runner.get(), 2);
std::vector<MeasureResult> results = measurer_with_build_error->Measure(inputs);
ASSERT_EQ(inputs.size(), results.size());
EXPECT_EQ(results[0].error_msg, "Build failed, error: BuildError\n");
// TODO(CtfGo): test parallel build after we support thread-safe compilation
auto measurer_with_run_error = std::make_unique<ScheduleMeasurer>(builder.get(), throw_runner.get(), 1);
results = measurer_with_run_error->Measure(inputs);
ASSERT_EQ(inputs.size(), results.size());
EXPECT_EQ(results[0].error_msg, "Run failed, error: RunError\n");
}
} // namespace auto_schedule
} // namespace cinn
// Copyright (c) 2022 CINN Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/cinn/auto_schedule/measure/schedule_measurer.h"
#include <exception>
#include "paddle/cinn/utils/multi_threading.h"
namespace cinn {
namespace auto_schedule {
ScheduleMeasurer::ScheduleMeasurer(ScheduleBuilder* builder, ScheduleRunner* runner, int num_threads)
: builder_(builder), runner_(runner), num_threads_(num_threads) {}
std::vector<MeasureResult> ScheduleMeasurer::Measure(const std::vector<MeasureInput>& inputs) {
if (inputs.empty()) {
LOG(WARNING) << "inputs is empty";
return {};
}
std::vector<BuildResult> build_results(inputs.size());
std::vector<MeasureResult> results(inputs.size());
// define how to build a candidate with the specified index
auto build_fn = [builder = builder_, &inputs, &build_results, &results](int index) {
VLOG(6) << "Build candidate index: " << index;
auto m_start = std::chrono::steady_clock::now();
try {
build_results[index] = builder->Build(inputs[index]);
} catch (std::exception& e) {
results[index].error_msg = utils::StringFormat("Build failed, error: %s\n", e.what());
}
auto time_span = std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::steady_clock::now() - m_start);
results[index].elapsed_time += static_cast<double>(time_span.count());
};
// define how to run a candidate with the specified index
auto run_fn = [runner = runner_, &inputs, &build_results, &results](int index) {
VLOG(6) << "Run candidate index: " << index;
auto m_start = std::chrono::steady_clock::now();
try {
// if error occurred in building, then skip running
if (results[index].error_msg.empty()) {
results[index] = runner->Run(inputs[index], build_results[index]);
}
} catch (std::exception& e) {
results[index].error_msg = utils::StringFormat("Run failed, error: %s\n", e.what());
}
auto time_span = std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::steady_clock::now() - m_start);
results[index].elapsed_time += static_cast<double>(time_span.count());
};
// measure a candidate by calling build and run successively
auto measure_fn = [&build_fn, &run_fn](int index) {
build_fn(index);
run_fn(index);
};
// default num_threads_ is 1 and in that case it will perform all measurements sequentially inplace.
utils::parallel_run(measure_fn, utils::SequenceDispatcher(0, inputs.size()), num_threads_);
VLOG(4) << "Measure " << inputs.size() << " candidates";
return results;
}
} // namespace auto_schedule
} // namespace cinn
// Copyright (c) 2022 CINN Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <vector>
#include "paddle/cinn/auto_schedule/measure/measure.h"
namespace cinn {
namespace auto_schedule {
// Entrance of schedule measurement, it mainly includes two processes:
// which are building the input schedules and running the generated codes.
class ScheduleMeasurer {
public:
ScheduleMeasurer(ScheduleBuilder* builder, ScheduleRunner* runner, int num_threads = 1);
// Measure a batch of inputs and return all results once.
std::vector<MeasureResult> Measure(const std::vector<MeasureInput>& inputs);
private:
// The handle to implemented ScheduleBuilder
ScheduleBuilder* builder_;
// The handle to implemented ScheduleRunner
ScheduleRunner* runner_;
// The number of threads used to perform measurement,
// if it is greater than 1 that means parallel measurement.
const int num_threads_;
};
} // namespace auto_schedule
} // namespace cinn
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
core_gather_headers()
gather_srcs(cinnapi_src SRCS
cooperative_process.cc
)
if (WITH_CUDA)
cinn_nv_test(test_cooperative_process SRCS cooperative_process_test.cc DEPS cinncore auto_gen_rule_test_helper test_program_builder)
endif()
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
add_subdirectory(mutate_rule)
core_gather_headers()
gather_srcs(cinnapi_src SRCS evolutionary_search.cc)
cinn_cc_test(test_evolutionary_search SRCS evolutionary_search_test.cc DEPS cinncore test_program_builder)
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册