未验证 提交 1f5841c2 编写于 作者: Q Qi Li 提交者: GitHub

[ROCM] update cmake and dockerfile, test=develop (#30598)

上级 cf9bdb94
...@@ -148,7 +148,8 @@ option(WITH_DISTRIBUTE "Compile with distributed support" OFF) ...@@ -148,7 +148,8 @@ option(WITH_DISTRIBUTE "Compile with distributed support" OFF)
option(WITH_BRPC_RDMA "Use brpc rdma as the rpc protocal" OFF) option(WITH_BRPC_RDMA "Use brpc rdma as the rpc protocal" OFF)
option(ON_INFER "Turn on inference optimization and inference-lib generation" OFF) option(ON_INFER "Turn on inference optimization and inference-lib generation" OFF)
################################ Internal Configurations ####################################### ################################ Internal Configurations #######################################
option(WITH_ROCM_PLATFORM "Compile PaddlePaddle with ROCM platform" OFF) option(WITH_ROCM "Compile PaddlePaddle with ROCM platform" OFF)
option(WITH_RCCL "Compile PaddlePaddle with RCCL support" OFF)
option(WITH_NV_JETSON "Compile PaddlePaddle with NV JETSON" OFF) option(WITH_NV_JETSON "Compile PaddlePaddle with NV JETSON" OFF)
option(WITH_PROFILER "Compile PaddlePaddle with GPU profiler and gperftools" OFF) option(WITH_PROFILER "Compile PaddlePaddle with GPU profiler and gperftools" OFF)
option(WITH_COVERAGE "Compile PaddlePaddle with code coverage" OFF) option(WITH_COVERAGE "Compile PaddlePaddle with code coverage" OFF)
...@@ -278,19 +279,25 @@ include(configure) # add paddle env configuration ...@@ -278,19 +279,25 @@ include(configure) # add paddle env configuration
include_directories("${PADDLE_SOURCE_DIR}") include_directories("${PADDLE_SOURCE_DIR}")
if(NOT DEFINED ENV{ROCM_PATH}) if(WITH_ROCM)
set(ROCM_PATH "/opt/rocm" CACHE PATH "Path to which ROCm has been installed") include(hip)
set(HIP_PATH ${ROCM_PATH}/hip CACHE PATH "Path to which HIP has been installed") endif(WITH_ROCM)
else()
set(ROCM_PATH $ENV{ROCM_PATH} CACHE PATH "Path to which ROCm has been installed") if (NOT WITH_ROCM AND WITH_RCCL)
set(HIP_PATH ${ROCM_PATH}/hip CACHE PATH "Path to which HIP has been installed") MESSAGE(WARNING
"Disable RCCL when compiling without GPU. Force WITH_RCCL=OFF.")
set(WITH_NCCL OFF CACHE STRING
"Disable RCCL when compiling without GPU" FORCE)
endif() endif()
set(CMAKE_MODULE_PATH "${HIP_PATH}/cmake" ${CMAKE_MODULE_PATH})
if(WITH_ROCM_PLATFORM) if(WITH_RCCL)
find_package(HIP) add_definitions("-DPADDLE_WITH_RCCL")
include(hip) include(rccl)
endif(WITH_ROCM_PLATFORM) else()
if(WITH_ROCM)
MESSAGE(WARNING "If the environment is multi-card, the WITH_RCCL option needs to be turned on, otherwise only a single card can be used.")
endif()
endif()
if(WITH_NV_JETSON) if(WITH_NV_JETSON)
set(WITH_ARM ON CACHE STRING "Set WITH_ARM=ON when compiling WITH_NV_JETSON=ON." FORCE) set(WITH_ARM ON CACHE STRING "Set WITH_ARM=ON when compiling WITH_NV_JETSON=ON." FORCE)
......
...@@ -130,14 +130,10 @@ if(WITH_GPU) ...@@ -130,14 +130,10 @@ if(WITH_GPU)
endif() endif()
include_directories(${TENSORRT_INCLUDE_DIR}) include_directories(${TENSORRT_INCLUDE_DIR})
endif() endif()
elseif(WITH_ROCM_PLATFORM) elseif(WITH_ROCM)
add_definitions(-DPADDLE_WITH_HIP) add_definitions(-DPADDLE_WITH_HIP)
add_definitions(-DEIGEN_USE_GPU)
add_definitions(-DEIGEN_USE_HIP) add_definitions(-DEIGEN_USE_HIP)
add_definitions(-D__HIP_PLATFORM_HCC__)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -D__HIP_PLATFORM_HCC__")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__HIP_PLATFORM_HCC__")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DTHRUST_DEVICE_SYSTEM=THRUST_DEVICE_SYSTEM_HIP")
set(THRUST_DEVICE_SYSTEM THRUST_DEVICE_SYSTEM_HIP)
else() else()
add_definitions(-DHPPL_STUB_FUNC) add_definitions(-DHPPL_STUB_FUNC)
list(APPEND CMAKE_CXX_SOURCE_FILE_EXTENSIONS cu) list(APPEND CMAKE_CXX_SOURCE_FILE_EXTENSIONS cu)
......
...@@ -155,7 +155,7 @@ set(COMMON_FLAGS ...@@ -155,7 +155,7 @@ set(COMMON_FLAGS
) )
if(NOT APPLE) if(NOT APPLE)
if((${CMAKE_CXX_COMPILER_VERSION} VERSION_GREATER 8.0) OR (WITH_ROCM_PLATFORM AND ${CMAKE_CXX_COMPILER_VERSION} VERSION_GREATER 7.3)) if((${CMAKE_CXX_COMPILER_VERSION} VERSION_GREATER 8.0) OR (WITH_ROCM))
set(COMMON_FLAGS set(COMMON_FLAGS
${COMMON_FLAGS} ${COMMON_FLAGS}
-Wno-format-truncation # Warning in boost gcc 8.2 -Wno-format-truncation # Warning in boost gcc 8.2
...@@ -213,5 +213,17 @@ foreach(flag ${GPU_COMMON_FLAGS}) ...@@ -213,5 +213,17 @@ foreach(flag ${GPU_COMMON_FLAGS})
safe_set_nvflag(${flag}) safe_set_nvflag(${flag})
endforeach() endforeach()
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} ${SAFE_GPU_COMMON_FLAGS}") if(WITH_GPU)
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} ${SAFE_GPU_COMMON_FLAGS}")
endif()
if(WITH_ROCM)
set(HIP_HIPCC_FLAGS "${HIP_HIPCC_FLAGS} ${SAFE_GPU_COMMON_FLAGS}")
endif()
# Disable -Werror, otherwise the compile will fail for rocblas_gemm_ex
if(WITH_ROCM)
string (REPLACE "-Werror" "-Wno-error" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
string (REPLACE "-Werror" "-Wno-error" CMAKE_C_FLAGS ${CMAKE_C_FLAGS})
endif()
...@@ -382,6 +382,9 @@ function(cc_binary TARGET_NAME) ...@@ -382,6 +382,9 @@ function(cc_binary TARGET_NAME)
endif() endif()
get_property(os_dependency_modules GLOBAL PROPERTY OS_DEPENDENCY_MODULES) get_property(os_dependency_modules GLOBAL PROPERTY OS_DEPENDENCY_MODULES)
target_link_libraries(${TARGET_NAME} ${os_dependency_modules}) target_link_libraries(${TARGET_NAME} ${os_dependency_modules})
if(WITH_ROCM)
target_link_libraries(${TARGET_NAME} ${ROCM_HIPRTC_LIB})
endif()
check_coverage_opt(${TARGET_NAME} ${cc_binary_SRCS}) check_coverage_opt(${TARGET_NAME} ${cc_binary_SRCS})
...@@ -403,6 +406,9 @@ function(cc_test_build TARGET_NAME) ...@@ -403,6 +406,9 @@ function(cc_test_build TARGET_NAME)
target_link_libraries(${TARGET_NAME} ${cc_test_DEPS} ${os_dependency_modules} paddle_gtest_main lod_tensor memory gtest gflags glog) target_link_libraries(${TARGET_NAME} ${cc_test_DEPS} ${os_dependency_modules} paddle_gtest_main lod_tensor memory gtest gflags glog)
add_dependencies(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main lod_tensor memory gtest gflags glog) add_dependencies(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main lod_tensor memory gtest gflags glog)
common_link(${TARGET_NAME}) common_link(${TARGET_NAME})
if(WITH_ROCM)
target_link_libraries(${TARGET_NAME} ${ROCM_HIPRTC_LIB})
endif()
endif() endif()
check_coverage_opt(${TARGET_NAME} ${cc_test_SRCS}) check_coverage_opt(${TARGET_NAME} ${cc_test_SRCS})
...@@ -538,33 +544,24 @@ function(nv_test TARGET_NAME) ...@@ -538,33 +544,24 @@ function(nv_test TARGET_NAME)
endfunction(nv_test) endfunction(nv_test)
function(hip_library TARGET_NAME) function(hip_library TARGET_NAME)
if (WITH_ROCM_PLATFORM) if (WITH_ROCM)
set(options STATIC static SHARED shared) set(options STATIC static SHARED shared)
set(oneValueArgs "") set(oneValueArgs "")
set(multiValueArgs SRCS DEPS) set(multiValueArgs SRCS DEPS)
cmake_parse_arguments(hip_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) cmake_parse_arguments(hip_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
set(_sources ${hip_library_SRCS})
set_source_files_properties(${_sources} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
HIP_PREPARE_TARGET_COMMANDS(${TARGET_NAME} OBJ _generated_files _source_files ${_sources} HIPCC_OPTIONS ${_hipcc_options} HCC_OPTIONS ${_hcc_options} NVCC_OPTIONS ${_nvcc_options})
if(_source_files)
list(REMOVE_ITEM _sources ${_source_files})
endif()
if(hip_library_SRCS) if(hip_library_SRCS)
# FindHIP.cmake defined hip_add_library, HIP_SOURCE_PROPERTY_FORMAT is requried if no .cu files found
if(NOT ${CMAKE_CURRENT_SOURCE_DIR} MATCHES ".*/operators")
set_source_files_properties(${hip_library_SRCS} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
endif()
if (hip_library_SHARED OR hip_library_shared) # build *.so if (hip_library_SHARED OR hip_library_shared) # build *.so
add_library(${TARGET_NAME} SHARED ${_cmake_options} ${_generated_files} ${_sources}) hip_add_library(${TARGET_NAME} SHARED ${hip_library_SRCS})
set_target_properties(${TARGET_NAME} PROPERTIES LINKER_LANGUAGE HIP)
else() else()
add_library(${TARGET_NAME} STATIC ${_cmake_options} ${_generated_files} ${_sources}) hip_add_library(${TARGET_NAME} STATIC ${hip_library_SRCS})
set_target_properties(${TARGET_NAME} PROPERTIES LINKER_LANGUAGE CXX)
target_link_libraries(${TARGET_NAME} ${ROCM_PATH}/hip/lib/libhip_hcc.so)
find_fluid_modules(${TARGET_NAME}) find_fluid_modules(${TARGET_NAME})
endif() endif()
if("${hip_library_DEPS}" MATCHES "ARCHIVE_START") if (hip_library_DEPS)
# Support linking flags: --whole-archive (Linux) / -force_load (MacOS). add_dependencies(${TARGET_NAME} ${hip_library_DEPS})
# WARNING: Please don't use ARCHIVE_START&ARCHIVE_END if TARGET_NAME will be linked by other libraries.
target_circle_link_libraries(${TARGET_NAME} ${hip_library_DEPS})
list(REMOVE_ITEM hip_library_DEPS ARCHIVE_START ARCHIVE_END)
else()
target_link_libraries(${TARGET_NAME} ${hip_library_DEPS}) target_link_libraries(${TARGET_NAME} ${hip_library_DEPS})
endif() endif()
# cpplint code style # cpplint code style
...@@ -574,72 +571,27 @@ function(hip_library TARGET_NAME) ...@@ -574,72 +571,27 @@ function(hip_library TARGET_NAME)
list(APPEND hip_library_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/${source}.h) list(APPEND hip_library_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/${source}.h)
endif() endif()
endforeach() endforeach()
check_coverage_opt(${TARGET_NAME} ${hip_library_SRCS})
else(hip_library_SRCS) else(hip_library_SRCS)
if (hip_library_DEPS) if (hip_library_DEPS)
merge_static_libs(${TARGET_NAME} ${hip_library_DEPS}) list(REMOVE_DUPLICATES hip_library_DEPS)
generate_dummy_static_lib(LIB_NAME ${TARGET_NAME} FILE_PATH ${target_SRCS} GENERATOR "generic.cmake:hip_library")
target_link_libraries(${TARGET_NAME} ${hip_library_DEPS})
add_dependencies(${TARGET_NAME} ${hip_library_DEPS})
else() else()
message(FATAL "Please specify source file or library in nv_library.") message(FATAL "Please specify source file or library in hip_library.")
endif() endif()
endif(hip_library_SRCS) endif(hip_library_SRCS)
endif() endif()
endfunction(hip_library) endfunction(hip_library)
function(hip_library_ops TARGET_NAME)
if (WITH_ROCM_PLATFORM)
set(options STATIC static SHARED shared)
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS)
cmake_parse_arguments(hip_library_ops "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
set(_sources ${hip_library_ops_SRCS})
HIP_PREPARE_TARGET_COMMANDS(${TARGET_NAME} OBJ _generated_files _source_files ${_sources} HIPCC_OPTIONS ${_hipcc_options} HCC_OPTIONS ${_hcc_options} NVCC_OPTIONS ${_nvcc_options})
if(_source_files)
list(REMOVE_ITEM _sources ${_source_files})
endif()
if(hip_library_ops_SRCS)
if (hip_library_ops_SHARED OR hip_library_ops_shared) # build *.so
add_library(${TARGET_NAME} SHARED ${_cmake_options} ${_generated_files} ${_sources})
set_target_properties(${TARGET_NAME} PROPERTIES LINKER_LANGUAGE HIP)
else()
add_library(${TARGET_NAME} STATIC ${_cmake_options} ${_generated_files} ${_sources})
set_target_properties(${TARGET_NAME} PROPERTIES LINKER_LANGUAGE CXX)
target_link_libraries(${TARGET_NAME} ${ROCM_PATH}/hip/lib/libhip_hcc.so)
find_fluid_modules(${TARGET_NAME})
endif()
if("${hip_library_ops_DEPS}" MATCHES "ARCHIVE_START")
# Support linking flags: --whole-archive (Linux) / -force_load (MacOS).
# WARNING: Please don't use ARCHIVE_START&ARCHIVE_END if TARGET_NAME will be linked by other libraries.
target_circle_link_libraries(${TARGET_NAME} ${hip_library_ops_DEPS})
list(REMOVE_ITEM hip_library_ops_DEPS ARCHIVE_START ARCHIVE_END)
else()
target_link_libraries(${TARGET_NAME} ${hip_library_ops_DEPS})
endif()
# cpplint code style
foreach(source_file ${hip_library_ops_SRCS})
string(REGEX REPLACE "\\.[^.]*$" "" source ${source_file})
if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${source}.h)
list(APPEND hip_library_ops_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/${source}.h)
endif()
endforeach()
else(hip_library_ops_SRCS)
if (hip_library_ops_DEPS)
merge_static_libs(${TARGET_NAME} ${hip_library_ops_DEPS})
else()
message(FATAL "Please specify source file or library in nv_library.")
endif()
endif(hip_library_ops_SRCS)
endif()
endfunction(hip_library_ops)
function(hip_binary TARGET_NAME) function(hip_binary TARGET_NAME)
if (WITH_ROCM_PLATFORM) if (WITH_ROCM)
set(options "") set(options "")
set(oneValueArgs "") set(oneValueArgs "")
set(multiValueArgs SRCS DEPS) set(multiValueArgs SRCS DEPS)
cmake_parse_arguments(hip_binary "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) cmake_parse_arguments(hip_binary "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
set_source_files_properties(${_sources} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1) # FindHIP.cmake defined hip_add_executable, HIP_SOURCE_PROPERTY_FORMAT is requried for .cc files
hip_add_executable(${TARGET_NAME} ${hip_binary_SRCS}) hip_add_executable(${TARGET_NAME} ${hip_binary_SRCS})
if(hip_binary_DEPS) if(hip_binary_DEPS)
target_link_libraries(${TARGET_NAME} ${hip_binary_DEPS}) target_link_libraries(${TARGET_NAME} ${hip_binary_DEPS})
...@@ -647,34 +599,29 @@ function(hip_binary TARGET_NAME) ...@@ -647,34 +599,29 @@ function(hip_binary TARGET_NAME)
common_link(${TARGET_NAME}) common_link(${TARGET_NAME})
endif() endif()
endif() endif()
check_coverage_opt(${TARGET_NAME} ${hip_binary_SRCS})
endfunction(hip_binary) endfunction(hip_binary)
function(hip_test TARGET_NAME) function(hip_test TARGET_NAME)
if (WITH_ROCM_PLATFORM AND WITH_TESTING) # The environment variable `CI_SKIP_CPP_TEST` is used to skip the compilation
set(options "") # and execution of test in CI. `CI_SKIP_CPP_TEST` is set to ON when no files
# other than *.py are modified.
if (WITH_ROCM AND WITH_TESTING AND NOT "$ENV{CI_SKIP_CPP_TEST}" STREQUAL "ON")
set(oneValueArgs "") set(oneValueArgs "")
set(multiValueArgs SRCS DEPS) set(multiValueArgs SRCS DEPS)
cmake_parse_arguments(hip_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) cmake_parse_arguments(hip_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
set(_sources ${hip_test_SRCS}) # FindHIP.cmake defined hip_add_executable, HIP_SOURCE_PROPERTY_FORMAT is requried for .cc files
set_source_files_properties(${_sources} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1) hip_add_executable(${TARGET_NAME} ${hip_test_SRCS})
HIP_PREPARE_TARGET_COMMANDS(${TARGET_NAME} OBJ _generated_files _source_files ${_sources} HIPCC_OPTIONS ${_hipcc_options} HCC_OPTIONS ${_hcc_options} NVCC_OPTIONS ${_nvcc_options}) # "-pthread -ldl -lrt" is defined in CMAKE_CXX_LINK_EXECUTABLE
if(_source_files) target_link_options(${TARGET_NAME} PRIVATE -pthread -ldl -lrt)
list(REMOVE_ITEM _sources ${_source_files})
endif()
add_executable(${TARGET_NAME} ${_cmake_options} ${_generated_files} ${_sources})
set_target_properties(${TARGET_NAME} PROPERTIES LINKER_LANGUAGE HIP)
get_property(os_dependency_modules GLOBAL PROPERTY OS_DEPENDENCY_MODULES) get_property(os_dependency_modules GLOBAL PROPERTY OS_DEPENDENCY_MODULES)
target_link_libraries(${TARGET_NAME} ${hip_test_DEPS} paddle_gtest_main memory gtest gflags ${os_dependency_modules}) target_link_libraries(${TARGET_NAME} ${hip_test_DEPS} paddle_gtest_main lod_tensor memory gtest gflags glog ${os_dependency_modules})
add_dependencies(${TARGET_NAME} ${hip_test_DEPS} paddle_gtest_main memory gtest gflags) add_dependencies(${TARGET_NAME} ${hip_test_DEPS} paddle_gtest_main lod_tensor memory gtest gflags glog)
common_link(${TARGET_NAME}) common_link(${TARGET_NAME})
add_test(${TARGET_NAME} ${TARGET_NAME}) add_test(${TARGET_NAME} ${TARGET_NAME})
set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_cpu_deterministic=true)
set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_init_allocated_mem=true)
set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_cudnn_deterministic=true)
endif() endif()
check_coverage_opt(${TARGET_NAME} ${hip_test_SRCS})
endfunction(hip_test) endfunction(hip_test)
function(go_library TARGET_NAME) function(go_library TARGET_NAME)
......
if(NOT WITH_ROCM_PLATFORM) if(NOT WITH_ROCM)
return() return()
endif() endif()
include_directories("${ROCM_PATH}/include") if(NOT DEFINED ENV{ROCM_PATH})
include_directories("${ROCM_PATH}/hip/include") set(ROCM_PATH "/opt/rocm" CACHE PATH "Path to which ROCm has been installed")
include_directories("${ROCM_PATH}/miopen/include") set(HIP_PATH ${ROCM_PATH}/hip CACHE PATH "Path to which HIP has been installed")
include_directories("${ROCM_PATH}/hipblas/include") set(HIP_CLANG_PATH ${ROCM_PATH}/llvm/bin CACHE PATH "Path to which clang has been installed")
include_directories("${ROCM_PATH}/rocblas/include") else()
include_directories("${ROCM_PATH}/hiprand/include") set(ROCM_PATH $ENV{ROCM_PATH} CACHE PATH "Path to which ROCm has been installed")
include_directories("${ROCM_PATH}/rocrand/include") set(HIP_PATH ${ROCM_PATH}/hip CACHE PATH "Path to which HIP has been installed")
include_directories("${ROCM_PATH}/rccl/include") set(HIP_CLANG_PATH ${ROCM_PATH}/llvm/bin CACHE PATH "Path to which clang has been installed")
include_directories("${ROCM_PATH}/rocthrust/include/")
include_directories("${ROCM_PATH}/hipcub/include/")
include_directories("${ROCM_PATH}/rocprim/include/")
include_directories("${ROCM_PATH}/hipsparse/include/")
include_directories("${ROCM_PATH}/rocsparse/include/")
include_directories("${ROCM_PATH}/rocfft/include/")
set(HIP_CLANG_PARALLEL_BUILD_COMPILE_OPTIONS "")
set(HIP_CLANG_PARALLEL_BUILD_LINK_OPTIONS "")
# now default is clang
set(HIP_COMPILER "clang")
list(APPEND EXTERNAL_LIBS "-L${ROCM_PATH}/lib/ -lhip_hcc")
set(HIP_HIPCC_FLAGS "${HIP_HIPCC_FLAGS} -fPIC -DPADDLE_WITH_HIP -DEIGEN_USE_HIP -DEIGEN_USE_GPU -D__HIP_NO_HALF_CONVERSIONS__ -std=c++11 --amdgpu-target=gfx906" )
if(WITH_RCCL)
set(HIP_HIPCC_FLAGS "${HIP_HIPCC_FLAGS} -DPADDLE_WITH_RCCL")
endif() endif()
set(CMAKE_MODULE_PATH "${HIP_PATH}/cmake" ${CMAKE_MODULE_PATH})
if(NOT WITH_PYTHON)
set(HIP_HIPCC_FLAGS "${HIP_HIPCC_FLAGS} -DPADDLE_NO_PYTHON") find_package(HIP REQUIRED)
endif(NOT WITH_PYTHON) include_directories(${ROCM_PATH}/include)
message(STATUS "HIP version: ${HIP_VERSION}")
if(WITH_DSO) message(STATUS "HIP_CLANG_PATH: ${HIP_CLANG_PATH}")
set(HIP_HIPCC_FLAGS "${HIP_HIPCC_FLAGS} -DPADDLE_USE_DSO")
endif(WITH_DSO) macro(find_package_and_include PACKAGE_NAME)
find_package("${PACKAGE_NAME}" REQUIRED)
if(WITH_TESTING) include_directories("${ROCM_PATH}/${PACKAGE_NAME}/include")
set(HIP_HIPCC_FLAGS "${HIP_HIPCC_FLAGS} -DPADDLE_WITH_TESTING") message(STATUS "${PACKAGE_NAME} version: ${${PACKAGE_NAME}_VERSION}")
endif(WITH_TESTING) endmacro()
if(WITH_DISTRIBUTE) find_package_and_include(miopen)
set(HIP_HIPCC_FLAGS "${HIP_HIPCC_FLAGS} -DPADDLE_WITH_DISTRIBUTE") find_package_and_include(rocblas)
endif(WITH_DISTRIBUTE) find_package_and_include(hiprand)
find_package_and_include(rocrand)
if(WITH_GRPC) find_package_and_include(rccl)
set(HIP_HIPCC_FLAGS "${HIP_HIPCC_FLAGS} -DPADDLE_WITH_GRPC") find_package_and_include(rocthrust)
endif(WITH_GRPC) find_package_and_include(hipcub)
find_package_and_include(rocprim)
if(WITH_MKLDNN) find_package_and_include(hipsparse)
set(HIP_HIPCC_FLAGS "${HIP_HIPCC_FLAGS} -DPADDLE_WITH_MKLDNN") find_package_and_include(rocsparse)
endif(WITH_MKLDNN) find_package_and_include(rocfft)
set(HIP_HIPCC_FLAGS "${HIP_HIPCC_FLAGS} -DANY_IMPL_ANY_CAST_MOVEABLE") # set CXX flags for HIP
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -D__HIP_PLATFORM_HCC__")
if(CMAKE_BUILD_TYPE STREQUAL "Debug") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__HIP_PLATFORM_HCC__")
list(APPEND HIP_HIPCC_FLAGS ${CMAKE_CXX_FLAGS_DEBUG}) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DTHRUST_DEVICE_SYSTEM=THRUST_DEVICE_SYSTEM_HIP")
elseif(CMAKE_BUILD_TYPE STREQUAL "RelWithDebInfo") set(THRUST_DEVICE_SYSTEM THRUST_DEVICE_SYSTEM_HIP)
list(APPEND HIP_HIPCC_FLAGS ${CMAKE_CXX_FLAGS_RELWITHDEBINFO})
elseif(CMAKE_BUILD_TYPE STREQUAL "MinSizeRel") # define HIP_CXX_FLAGS
list(APPEND HIP_HIPCC_FLAGS ${CMAKE_CXX_FLAGS_MINSIZEREL}) list(APPEND HIP_CXX_FLAGS -fPIC)
list(APPEND HIP_CXX_FLAGS -D__HIP_PLATFORM_HCC__=1)
list(APPEND HIP_CXX_FLAGS -D__HIP_NO_HALF_CONVERSIONS__=1)
list(APPEND HIP_CXX_FLAGS -Wno-macro-redefined)
list(APPEND HIP_CXX_FLAGS -Wno-inconsistent-missing-override)
list(APPEND HIP_CXX_FLAGS -Wno-exceptions)
list(APPEND HIP_CXX_FLAGS -Wno-shift-count-negative)
list(APPEND HIP_CXX_FLAGS -Wno-shift-count-overflow)
list(APPEND HIP_CXX_FLAGS -Wno-unused-command-line-argument)
list(APPEND HIP_CXX_FLAGS -Wno-duplicate-decl-specifier)
list(APPEND HIP_CXX_FLAGS -Wno-implicit-int-float-conversion)
list(APPEND HIP_CXX_FLAGS -Wno-pass-failed)
list(APPEND HIP_CXX_FLAGS -DTHRUST_DEVICE_SYSTEM=THRUST_DEVICE_SYSTEM_HIP)
list(APPEND HIP_CXX_FLAGS -std=c++14)
if(CMAKE_BUILD_TYPE MATCHES Debug)
list(APPEND HIP_CXX_FLAGS -g2)
list(APPEND HIP_CXX_FLAGS -O0)
list(APPEND HIP_HIPCC_FLAGS -fdebug-info-for-profiling)
endif(CMAKE_BUILD_TYPE MATCHES Debug)
set(HIP_HCC_FLAGS ${HIP_CXX_FLAGS})
set(HIP_CLANG_FLAGS ${HIP_CXX_FLAGS})
# Ask hcc to generate device code during compilation so we can use
# host linker to link.
list(APPEND HIP_HCC_FLAGS -fno-gpu-rdc)
list(APPEND HIP_HCC_FLAGS --amdgpu-target=gfx906)
list(APPEND HIP_CLANG_FLAGS -fno-gpu-rdc)
list(APPEND HIP_CLANG_FLAGS --amdgpu-target=gfx906)
if(HIP_COMPILER STREQUAL clang)
set(hip_library_name amdhip64)
else()
set(hip_library_name hip_hcc)
endif() endif()
message(STATUS "HIP library name: ${hip_library_name}")
if("${HIP_COMPILER}" STREQUAL "hcc") # set HIP link libs
if("x${HCC_HOME}" STREQUAL "x") find_library(ROCM_HIPRTC_LIB ${hip_library_name} HINTS ${HIP_PATH}/lib)
set(HCC_HOME "${ROCM_PATH}/hcc") message(STATUS "ROCM_HIPRTC_LIB: ${ROCM_HIPRTC_LIB}")
endif()
set(CMAKE_HIP_LINK_EXECUTABLE "${HIP_HIPCC_CMAKE_LINKER_HELPER} ${HCC_HOME} <FLAGS> <CMAKE_CXX_LINK_FLAGS> <LINK_FLAGS> <OBJECTS> -o <TARGET> <LINK_LIBRARIES> -ldl --amdgpu-target=gfx906 ")
set(CMAKE_HIP_CREATE_SHARED_LIBRARY "${HIP_HIPCC_CMAKE_LINKER_HELPER} ${HCC_HOME} <CMAKE_CXX_LINK_FLAGS> <LINK_FLAGS> <OBJECTS> -o <TARGET> <LINK_LIBRARIES> -shared --amdgpu-target=gfx906")
set(CMAKE_HIP_CREATE_SHARED_MODULE "${HIP_HIPCC_CMAKE_LINKER_HELPER} ${HCC_HOME} <CMAKE_CXX_LINK_FLAGS> <LINK_FLAGS> <OBJECTS> -o <TARGET> <LINK_LIBRARIES> -shared --amdgpu-target=gfx906")
elseif("${HIP_COMPILER}" STREQUAL "clang")
if("x${HIP_CLANG_PATH}" STREQUAL "x")
set(HIP_CLANG_PATH "${ROCM_PATH}/llvm/bin")
endif()
#Number of parallel jobs by default is 1
if(NOT DEFINED HIP_CLANG_NUM_PARALLEL_JOBS)
set(HIP_CLANG_NUM_PARALLEL_JOBS 1)
endif()
#Add support for parallel build and link
if(${CMAKE_CXX_COMPILER_ID} STREQUAL "Clang")
check_cxx_compiler_flag("-parallel-jobs=1" HIP_CLANG_SUPPORTS_PARALLEL_JOBS)
endif()
if(HIP_CLANG_NUM_PARALLEL_JOBS GREATER 1)
if(${HIP_CLANG_SUPPORTS_PARALLEL_JOBS})
set(HIP_CLANG_PARALLEL_BUILD_COMPILE_OPTIONS "-parallel-jobs=${HIP_CLANG_NUM_PARALLEL_JOBS} -Wno-format-nonliteral")
set(HIP_CLANG_PARALLEL_BUILD_LINK_OPTIONS "-parallel-jobs=${HIP_CLANG_NUM_PARALLEL_JOBS}")
else()
message("clang compiler doesn't support parallel jobs")
endif()
endif()
# Set the CMake Flags to use the HIP-Clang Compiler.
set(CMAKE_HIP_CREATE_SHARED_LIBRARY "${HIP_HIPCC_CMAKE_LINKER_HELPER} ${HIP_CLANG_PATH} ${HIP_CLANG_PARALLEL_BUILD_LINK_OPTIONS} <CMAKE_SHARED_LIBRARY_CXX_FLAGS> <LANGUAGE_COMPILE_FLAGS> <LINK_FLAGS> <CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS> <SONAME_FLAG><TARGET_SONAME> -o <TARGET> <OBJECTS> <LINK_LIBRARIES> --amdgpu-target=gfx906")
set(CMAKE_HIP_CREATE_SHARED_MODULE "${HIP_HIPCC_CMAKE_LINKER_HELPER} ${HIP_CLANG_PATH} ${HIP_CLANG_PARALLEL_BUILD_LINK_OPTIONS} <CMAKE_CXX_LINK_FLAGS> <LINK_FLAGS> <OBJECTS> <SONAME_FLAG><TARGET_SONAME> -o <TARGET> <LINK_LIBRARIES> -shared --amdgpu-target=gfx906" )
set(CMAKE_HIP_LINK_EXECUTABLE "${HIP_HIPCC_CMAKE_LINKER_HELPER} ${HIP_CLANG_PATH} ${HIP_CLANG_PARALLEL_BUILD_LINK_OPTIONS} <FLAGS> <CMAKE_CXX_LINK_FLAGS> <LINK_FLAGS> <OBJECTS> -o <TARGET> <LINK_LIBRARIES> -ldl --amdgpu-target=gfx906")
endif()
...@@ -7,13 +7,16 @@ function(op_library TARGET) ...@@ -7,13 +7,16 @@ function(op_library TARGET)
# for ops. # for ops.
set(cc_srcs) set(cc_srcs)
set(cu_srcs) set(cu_srcs)
set(hip_cu_srcs) set(hip_srcs)
set(miopen_hip_cc_srcs)
set(cu_cc_srcs) set(cu_cc_srcs)
set(hip_cc_srcs)
set(xpu_cc_srcs) set(xpu_cc_srcs)
set(cudnn_cu_cc_srcs) set(cudnn_cu_cc_srcs)
set(miopen_cu_cc_srcs)
set(cudnn_cu_srcs) set(cudnn_cu_srcs)
set(miopen_cu_srcs)
set(CUDNN_FILE) set(CUDNN_FILE)
set(MIOPEN_FILE)
set(mkldnn_cc_srcs) set(mkldnn_cc_srcs)
set(MKLDNN_FILE) set(MKLDNN_FILE)
set(op_common_deps operator op_registry math_function layer common_infer_shape_functions) set(op_common_deps operator op_registry math_function layer common_infer_shape_functions)
...@@ -30,6 +33,7 @@ function(op_library TARGET) ...@@ -30,6 +33,7 @@ function(op_library TARGET)
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cc) if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cc)
list(APPEND cc_srcs ${TARGET}.cc) list(APPEND cc_srcs ${TARGET}.cc)
endif() endif()
if(WITH_GPU)
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cu.cc) if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cu.cc)
list(APPEND cu_cc_srcs ${TARGET}.cu.cc) list(APPEND cu_cc_srcs ${TARGET}.cu.cc)
endif() endif()
...@@ -41,10 +45,6 @@ function(op_library TARGET) ...@@ -41,10 +45,6 @@ function(op_library TARGET)
${PART_CUDA_KERNEL_FILES} PARENT_SCOPE) ${PART_CUDA_KERNEL_FILES} PARENT_SCOPE)
list(APPEND cu_srcs ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.part.cu) list(APPEND cu_srcs ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.part.cu)
endif() endif()
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.hip.cu)
list(APPEND hip_cu_srcs ${TARGET}.hip.cu)
endif()
string(REPLACE "_op" "_cudnn_op" CUDNN_FILE "${TARGET}") string(REPLACE "_op" "_cudnn_op" CUDNN_FILE "${TARGET}")
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${CUDNN_FILE}.cu.cc) if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${CUDNN_FILE}.cu.cc)
list(APPEND cudnn_cu_cc_srcs ${CUDNN_FILE}.cu.cc) list(APPEND cudnn_cu_cc_srcs ${CUDNN_FILE}.cu.cc)
...@@ -52,24 +52,25 @@ function(op_library TARGET) ...@@ -52,24 +52,25 @@ function(op_library TARGET)
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${CUDNN_FILE}.cu) if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${CUDNN_FILE}.cu)
list(APPEND cudnn_cu_srcs ${CUDNN_FILE}.cu) list(APPEND cudnn_cu_srcs ${CUDNN_FILE}.cu)
endif() endif()
if(WITH_ROCM_PLATFORM)
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.hip.cu.cc)
list(APPEND hip_cu_cc_srcs ${TARGET}.hip.cu.cc)
endif() endif()
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.hip.cu) if(WITH_ROCM)
list(APPEND hip_cu_srcs ${TARGET}.hip.cu) if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cu.cc)
list(APPEND hip_cc_srcs ${TARGET}.cu.cc)
endif()
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cu)
list(APPEND hip_srcs ${TARGET}.cu)
endif() endif()
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.part.hip.cu) if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.part.cu)
set(PART_CUDA_KERNEL_FILES ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.part.hip.cu set(PART_CUDA_KERNEL_FILES ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.part.cu
${PART_CUDA_KERNEL_FILES} PARENT_SCOPE) ${PART_CUDA_KERNEL_FILES} PARENT_SCOPE)
list(APPEND hip_cu_srcs ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.part.hip.cu) list(APPEND hip_srcs ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.part.cu)
endif() endif()
string(REPLACE "_op" "_miopen_op" MIOPEN_FILE "${TARGET}") string(REPLACE "_op" "_cudnn_op" MIOPEN_FILE "${TARGET}")
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${MIOPEN_FILE}.hip.cu.cc) if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${MIOPEN_FILE}.cu.cc)
list(APPEND miopen_hip_cu_cc_srcs ${MIOPEN_FILE}.hip.cu.cc) list(APPEND miopen_cu_cc_srcs ${MIOPEN_FILE}.cu.cc)
endif() endif()
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${MIOPEN_FILE}.hip.cu) if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${MIOPEN_FILE}.cu)
list(APPEND miopen_hip_cu_srcs ${MIOPEN_FILE}.hip.cu) list(APPEND miopen_cu_srcs ${MIOPEN_FILE}.cu)
endif() endif()
endif() endif()
if(WITH_MKLDNN) if(WITH_MKLDNN)
...@@ -86,20 +87,20 @@ function(op_library TARGET) ...@@ -86,20 +87,20 @@ function(op_library TARGET)
endif() endif()
else() else()
foreach(src ${op_library_SRCS}) foreach(src ${op_library_SRCS})
if (WITH_ROCM_PLATFORM AND ${src} MATCHES ".*\\.hip.cu$") if(WITH_ROCM AND ${src} MATCHES ".*_cudnn_op.cu$")
list(APPEND hip_cu_srcs ${src}) list(APPEND miopen_cu_srcs ${src})
elseif(WITH_ROCM_PLATFORM AND ${src} MATCHES ".*\\.hip.cu.cc$") elseif(WITH_ROCM AND ${src} MATCHES ".*\\.cu$")
list(APPEND hip_cu_cc_srcs ${src}) list(APPEND hip_srcs ${src})
elseif(WITH_ROCM AND ${src} MATCHES ".*_cudnn_op.cu.cc$")
list(APPEND miopen_cu_cc_srcs ${src})
elseif(WITH_ROCM AND ${src} MATCHES ".*\\.cu.cc$")
list(APPEND hip_cc_srcs ${src})
elseif(${src} MATCHES ".*_cudnn_op.cu$") elseif(${src} MATCHES ".*_cudnn_op.cu$")
list(APPEND cudnn_cu_srcs ${src}) list(APPEND cudnn_cu_srcs ${src})
elseif (${src} MATCHES ".*\\.cu$") elseif (${src} MATCHES ".*\\.cu$")
list(APPEND cu_srcs ${src}) list(APPEND cu_srcs ${src})
elseif(${src} MATCHES ".*_cudnn_op.cu.cc$") elseif(${src} MATCHES ".*_cudnn_op.cu.cc$")
list(APPEND cudnn_cu_cc_srcs ${src}) list(APPEND cudnn_cu_cc_srcs ${src})
elseif(WITH_ROCM_PLATFORM AND ${src} MATCHES ".*_miopen_op.hip.cc$")
list(APPEND miopen_hip_cc_srcs ${src})
elseif(WITH_ROCM_PLATFORM AND ${src} MATCHES ".*_miopen_op.hip.cu$")
list(APPEND miopen_hip_cu_srcs ${src})
elseif(WITH_MKLDNN AND ${src} MATCHES ".*_mkldnn_op.cc$") elseif(WITH_MKLDNN AND ${src} MATCHES ".*_mkldnn_op.cc$")
list(APPEND mkldnn_cc_srcs ${src}) list(APPEND mkldnn_cc_srcs ${src})
elseif(${src} MATCHES ".*\\.cu.cc$") elseif(${src} MATCHES ".*\\.cu.cc$")
...@@ -163,8 +164,13 @@ function(op_library TARGET) ...@@ -163,8 +164,13 @@ function(op_library TARGET)
nv_library(${TARGET} SRCS ${cc_srcs} ${cu_cc_srcs} ${cudnn_cu_cc_srcs} ${cudnn_cu_srcs} ${mkldnn_cc_srcs} ${cu_srcs} DEPS ${op_library_DEPS} nv_library(${TARGET} SRCS ${cc_srcs} ${cu_cc_srcs} ${cudnn_cu_cc_srcs} ${cudnn_cu_srcs} ${mkldnn_cc_srcs} ${cu_srcs} DEPS ${op_library_DEPS}
${op_common_deps}) ${op_common_deps})
endif() endif()
elseif (WITH_ROCM_PLATFORM) elseif (WITH_ROCM)
hip_library_ops(${TARGET} SRCS ${cc_srcs} ${hip_cu_cc_srcs} ${hip_cu_srcs} ${miopen_hip_cu_cc_srcs} ${miopen_hip_cu_srcs} ${mkldnn_cc_srcs} DEPS ${op_library_DEPS} list(REMOVE_ITEM miopen_cu_cc_srcs "affine_grid_cudnn_op.cu.cc")
list(REMOVE_ITEM miopen_cu_cc_srcs "grid_sampler_cudnn_op.cu.cc")
list(REMOVE_ITEM hip_srcs "cholesky_op.cu")
list(REMOVE_ITEM hip_srcs "correlation_op.cu")
list(REMOVE_ITEM hip_srcs "multinomial_op.cu")
hip_library(${TARGET} SRCS ${cc_srcs} ${hip_cc_srcs} ${miopen_cu_cc_srcs} ${miopen_cu_srcs} ${mkldnn_cc_srcs} ${hip_srcs} DEPS ${op_library_DEPS}
${op_common_deps}) ${op_common_deps})
else() else()
# Unity Build relies on global option `WITH_UNITY_BUILD` and local option `UNITY`. # Unity Build relies on global option `WITH_UNITY_BUILD` and local option `UNITY`.
...@@ -227,13 +233,14 @@ function(op_library TARGET) ...@@ -227,13 +233,14 @@ function(op_library TARGET)
# pybind USE_CPU_ONLY_OP # pybind USE_CPU_ONLY_OP
list(LENGTH cu_srcs cu_srcs_len) list(LENGTH cu_srcs cu_srcs_len)
list(LENGTH hip_srcs hip_srcs_len)
list(LENGTH cu_cc_srcs cu_cc_srcs_len) list(LENGTH cu_cc_srcs cu_cc_srcs_len)
list(LENGTH hip_cc_srcs hip_cc_srcs_len)
list(LENGTH mkldnn_cc_srcs mkldnn_cc_srcs_len) list(LENGTH mkldnn_cc_srcs mkldnn_cc_srcs_len)
list(LENGTH xpu_cc_srcs xpu_cc_srcs_len) list(LENGTH xpu_cc_srcs xpu_cc_srcs_len)
list(LENGTH hip_cu_srcs hip_cu_srcs_len) list(LENGTH miopen_cu_cc_srcs miopen_cu_cc_srcs_len)
list(LENGTH miopen_hip_cc_srcs miopen_hip_cc_srcs_len)
if (${pybind_flag} EQUAL 0 AND ${mkldnn_cc_srcs_len} EQUAL 0 AND ${cu_srcs_len} EQUAL 0 AND ${cu_cc_srcs_len} EQUAL 0 AND if (${pybind_flag} EQUAL 0 AND ${mkldnn_cc_srcs_len} EQUAL 0 AND ${cu_srcs_len} EQUAL 0 AND ${cu_cc_srcs_len} EQUAL 0 AND
${hip_cu_srcs_len} EQUAL 0 AND ${miopen_hip_cc_srcs_len} EQUAL 0 AND ${xpu_cc_srcs_len} EQUAL 0) ${hip_srcs_len} EQUAL 0 AND ${hip_cc_srcs_len} EQUAL 0 AND ${miopen_cu_cc_srcs_len} EQUAL 0 AND ${xpu_cc_srcs_len} EQUAL 0)
file(APPEND ${pybind_file} "USE_CPU_ONLY_OP(${TARGET});\n") file(APPEND ${pybind_file} "USE_CPU_ONLY_OP(${TARGET});\n")
set(pybind_flag 1) set(pybind_flag 1)
endif() endif()
...@@ -248,15 +255,9 @@ function(op_library TARGET) ...@@ -248,15 +255,9 @@ function(op_library TARGET)
endif() endif()
endif() endif()
# pybind USE_OP_DEVICE_KERNEL for CUDNN
list(LENGTH cudnn_cu_srcs cudnn_cu_srcs_len)
if (WITH_GPU AND ${cudnn_cu_srcs_len} GREATER 0)
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${TARGET}, CUDNN);\n")
endif()
# pybind USE_OP_DEVICE_KERNEL for MIOPEN # pybind USE_OP_DEVICE_KERNEL for MIOPEN
list(LENGTH miopen_hip_cu_cc_srcs miopen_hip_cu_cc_srcs_len) list(LENGTH miopen_cu_cc_srcs miopen_cu_cc_srcs_len)
if (WITH_ROCM_PLATFORM AND ${miopen_hip_cu_cc_srcs_len} GREATER 0) if (WITH_ROCM AND ${miopen_cu_cc_srcs_len} GREATER 0)
if(${TARGET} STREQUAL "activation") if(${TARGET} STREQUAL "activation")
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(relu, CUDNN);\n") file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(relu, CUDNN);\n")
else() else()
...@@ -264,9 +265,15 @@ function(op_library TARGET) ...@@ -264,9 +265,15 @@ function(op_library TARGET)
endif() endif()
endif() endif()
# pybind USE_OP_DEVICE_KERNEL for CUDNN
list(LENGTH cudnn_cu_srcs cudnn_cu_srcs_len)
if (WITH_GPU AND ${cudnn_cu_srcs_len} GREATER 0)
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${TARGET}, CUDNN);\n")
endif()
# pybind USE_OP_DEVICE_KERNEL for MIOPEN # pybind USE_OP_DEVICE_KERNEL for MIOPEN
list(LENGTH miopen_hip_cu_srcs miopen_hip_cu_srcs_len) list(LENGTH miopen_cu_srcs miopen_cu_srcs_len)
if (WITH_ROCM_PLATFORM AND ${miopen_hip_cu_srcs_len} GREATER 0) if (WITH_ROCM AND ${miopen_cu_srcs_len} GREATER 0)
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${TARGET}, CUDNN);\n") file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${TARGET}, CUDNN);\n")
endif() endif()
......
if(NOT WITH_ROCM)
return()
endif()
# Now we don't support RCCL on windows
if(WIN32)
return()
endif()
if(WITH_RCCL)
set(RCCL_ROOT ${ROCM_PATH}/rccl CACHE PATH "RCCL ROOT")
find_path(RCCL_INCLUDE_DIR rccl.h
PATHS ${RCCL_ROOT} ${RCCL_ROOT}/include ${RCCL_ROOT}/local/include
$ENV{RCCL_ROOT} $ENV{RCCL_ROOT}/include $ENV{RCCL_ROOT}/local/include
NO_DEFAULT_PATH
)
file(READ ${RCCL_INCLUDE_DIR}/rccl.h RCCL_VERSION_FILE_CONTENTS)
string(REGEX MATCH "define NCCL_VERSION_CODE +([0-9]+)"
RCCL_VERSION "${RCCL_VERSION_FILE_CONTENTS}")
string(REGEX REPLACE "define NCCL_VERSION_CODE +([0-9]+)" "\\1"
RCCL_VERSION "${RCCL_VERSION}")
# 2604 for ROCM3.5 and 2708 for ROCM 3.9
message(STATUS "Current RCCL header is ${RCCL_INCLUDE_DIR}/rccl.h. "
"Current RCCL version is v${RCCL_VERSION}. ")
endif()
...@@ -6,6 +6,8 @@ set(PY_FILES paddle/__init__.py ...@@ -6,6 +6,8 @@ set(PY_FILES paddle/__init__.py
if(WITH_GPU) if(WITH_GPU)
SET(PACKAGE_NAME "paddlepaddle-gpu") SET(PACKAGE_NAME "paddlepaddle-gpu")
elseif(WITH_ROCM)
SET(PACKAGE_NAME "paddlepaddle-rocm")
else() else()
SET(PACKAGE_NAME "paddlepaddle") SET(PACKAGE_NAME "paddlepaddle")
endif() endif()
......
if (WITH_GPU) if(WITH_ROCM)
hip_library(relu_op_shared SHARED SRCS relu_op.cc relu_op.cu DEPS paddle_framework_shared)
elseif(WITH_GPU)
nv_library(relu_op_shared SHARED SRCS relu_op.cc relu_op.cu DEPS paddle_framework_shared) nv_library(relu_op_shared SHARED SRCS relu_op.cc relu_op.cu DEPS paddle_framework_shared)
else() else()
cc_library(relu_op_shared SHARED SRCS relu_op.cc DEPS paddle_framework_shared) cc_library(relu_op_shared SHARED SRCS relu_op.cc DEPS paddle_framework_shared)
......
...@@ -5,7 +5,7 @@ set(dist_ENVS http_proxy="" https_proxy="") ...@@ -5,7 +5,7 @@ set(dist_ENVS http_proxy="" https_proxy="")
file(GLOB DIST_TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_dist_*.py") file(GLOB DIST_TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_dist_*.py")
list(REMOVE_ITEM DIST_TEST_OPS "test_dist_op") list(REMOVE_ITEM DIST_TEST_OPS "test_dist_op")
if(NOT WITH_NCCL) if ((NOT WITH_NCCL) AND (NOT WITH_RCCL))
list(REMOVE_ITEM DIST_TEST_OPS "test_dist_mnist_dgc_nccl") list(REMOVE_ITEM DIST_TEST_OPS "test_dist_mnist_dgc_nccl")
endif() endif()
string(REPLACE ".py" "" DIST_TEST_OPS "${DIST_TEST_OPS}") string(REPLACE ".py" "" DIST_TEST_OPS "${DIST_TEST_OPS}")
...@@ -63,7 +63,7 @@ foreach(TEST_OP ${MIXED_DIST_TEST_OPS}) ...@@ -63,7 +63,7 @@ foreach(TEST_OP ${MIXED_DIST_TEST_OPS})
list(REMOVE_ITEM TEST_OPS ${TEST_OP}) list(REMOVE_ITEM TEST_OPS ${TEST_OP})
endforeach() endforeach()
if(NOT WITH_GPU OR WIN32) if(((NOT WITH_ROCM) AND (NOT WITH_GPU)) OR WIN32)
LIST(REMOVE_ITEM TEST_OPS test_c_comm_init_all_op) LIST(REMOVE_ITEM TEST_OPS test_c_comm_init_all_op)
LIST(REMOVE_ITEM TEST_OPS test_allgather) LIST(REMOVE_ITEM TEST_OPS test_allgather)
LIST(REMOVE_ITEM TEST_OPS test_allreduce) LIST(REMOVE_ITEM TEST_OPS test_allreduce)
...@@ -146,7 +146,7 @@ if(APPLE OR WIN32) ...@@ -146,7 +146,7 @@ if(APPLE OR WIN32)
LIST(REMOVE_ITEM TEST_OPS test_fleet_metric) LIST(REMOVE_ITEM TEST_OPS test_fleet_metric)
endif() endif()
if (NOT ${WITH_GPU}) if ((NOT WITH_GPU) AND (NOT WITH_ROCM))
LIST(REMOVE_ITEM TEST_OPS test_conv2d_fusion_op) LIST(REMOVE_ITEM TEST_OPS test_conv2d_fusion_op)
LIST(REMOVE_ITEM TEST_OPS test_rank_attention_op) # TODO(shenliang03): rank_attention_op support CPU device in future LIST(REMOVE_ITEM TEST_OPS test_rank_attention_op) # TODO(shenliang03): rank_attention_op support CPU device in future
LIST(REMOVE_ITEM TEST_OPS test_batch_fc_op) # TODO(shenliang03): batch_fc_op support CPU device in future LIST(REMOVE_ITEM TEST_OPS test_batch_fc_op) # TODO(shenliang03): batch_fc_op support CPU device in future
...@@ -159,9 +159,10 @@ if (NOT ${WITH_GPU}) ...@@ -159,9 +159,10 @@ if (NOT ${WITH_GPU})
LIST(REMOVE_ITEM TEST_OPS test_parallel_dygraph_sync_batch_norm) LIST(REMOVE_ITEM TEST_OPS test_parallel_dygraph_sync_batch_norm)
LIST(REMOVE_ITEM TEST_OPS test_imperative_auto_mixed_precision) LIST(REMOVE_ITEM TEST_OPS test_imperative_auto_mixed_precision)
LIST(REMOVE_ITEM TEST_OPS test_fleet_base_single) LIST(REMOVE_ITEM TEST_OPS test_fleet_base_single)
elseif(WITH_GPU)
elseif(${CUDNN_VERSION} VERSION_LESS 7100) if (${CUDNN_VERSION} VERSION_LESS 7100)
LIST(REMOVE_ITEM TEST_OPS test_conv2d_fusion_op) LIST(REMOVE_ITEM TEST_OPS test_conv2d_fusion_op)
endif()
endif() endif()
if (WITH_NCCL) if (WITH_NCCL)
...@@ -172,11 +173,11 @@ if (WITH_NCCL) ...@@ -172,11 +173,11 @@ if (WITH_NCCL)
endif() endif()
endif() endif()
if(NOT WITH_NCCL) if ((NOT WITH_NCCL) AND (NOT WITH_RCCL))
list(REMOVE_ITEM TEST_OPS test_imperative_group) list(REMOVE_ITEM TEST_OPS test_imperative_group)
endif() endif()
if(NOT WITH_GPU OR WIN32) if(((NOT WITH_ROCM) AND (NOT WITH_GPU)) OR WIN32)
LIST(REMOVE_ITEM TEST_OPS test_boxps) LIST(REMOVE_ITEM TEST_OPS test_boxps)
endif() endif()
list(REMOVE_ITEM TEST_OPS test_seq_concat_op) # FIXME(helin): https://github.com/PaddlePaddle/Paddle/issues/8290 list(REMOVE_ITEM TEST_OPS test_seq_concat_op) # FIXME(helin): https://github.com/PaddlePaddle/Paddle/issues/8290
...@@ -213,7 +214,7 @@ endif() ...@@ -213,7 +214,7 @@ endif()
list(REMOVE_ITEM TEST_OPS test_fleet_pyramid_hash) list(REMOVE_ITEM TEST_OPS test_fleet_pyramid_hash)
if(WITH_GPU OR NOT WITH_MKLML) if((WITH_ROCM OR WITH_GPU) OR NOT WITH_MKLML)
# matmul with multiple heads need MKL support # matmul with multiple heads need MKL support
LIST(REMOVE_ITEM TEST_OPS test_matmul_op_with_head) LIST(REMOVE_ITEM TEST_OPS test_matmul_op_with_head)
endif() endif()
...@@ -510,7 +511,7 @@ if(WITH_DISTRIBUTE) ...@@ -510,7 +511,7 @@ if(WITH_DISTRIBUTE)
list(REMOVE_ITEM DIST_TEST_OPS "test_dist_se_resnext_dgc") list(REMOVE_ITEM DIST_TEST_OPS "test_dist_se_resnext_dgc")
endif() endif()
if(NOT APPLE) if(NOT APPLE)
if(WITH_GPU) if(WITH_GPU OR WITH_ROCM)
bash_test_modules(test_c_comm_init_op START_BASH test_c_comm_init_op.sh ENVS PADDLE_BINARY_DIR=${PADDLE_BINARY_DIR}) bash_test_modules(test_c_comm_init_op START_BASH test_c_comm_init_op.sh ENVS PADDLE_BINARY_DIR=${PADDLE_BINARY_DIR})
py_test_modules(test_launch_coverage MODULES test_launch_coverage) py_test_modules(test_launch_coverage MODULES test_launch_coverage)
endif() endif()
...@@ -667,7 +668,7 @@ if (WITH_DISTRIBUTE) ...@@ -667,7 +668,7 @@ if (WITH_DISTRIBUTE)
endif() endif()
if (WITH_DISTRIBUTE AND NOT APPLE) if (WITH_DISTRIBUTE AND NOT APPLE)
if(WITH_GPU) if(WITH_GPU OR WITH_ROCM)
set_tests_properties(test_c_comm_init_op PROPERTIES TIMEOUT 120) set_tests_properties(test_c_comm_init_op PROPERTIES TIMEOUT 120)
set_tests_properties(test_dist_mnist_gradient_merge PROPERTIES TIMEOUT 120) set_tests_properties(test_dist_mnist_gradient_merge PROPERTIES TIMEOUT 120)
endif() endif()
...@@ -821,7 +822,7 @@ if(WITH_DISTRIBUTE AND WITH_GPU AND WITH_NCCL) ...@@ -821,7 +822,7 @@ if(WITH_DISTRIBUTE AND WITH_GPU AND WITH_NCCL)
set_tests_properties(test_parallel_dygraph_unused_variables PROPERTIES TIMEOUT 120) set_tests_properties(test_parallel_dygraph_unused_variables PROPERTIES TIMEOUT 120)
endif() endif()
endif() endif()
if(WITH_GPU AND NOT WIN32) if((WITH_ROCM OR WITH_GPU) AND NOT WIN32)
set_tests_properties(test_collective_allgather_api PROPERTIES TIMEOUT 120) set_tests_properties(test_collective_allgather_api PROPERTIES TIMEOUT 120)
set_tests_properties(test_collective_broadcast_api PROPERTIES TIMEOUT 120) set_tests_properties(test_collective_broadcast_api PROPERTIES TIMEOUT 120)
set_tests_properties(test_collective_allreduce_api PROPERTIES TIMEOUT 120) set_tests_properties(test_collective_allreduce_api PROPERTIES TIMEOUT 120)
...@@ -851,7 +852,7 @@ if(WITH_GPU AND NOT WIN32) ...@@ -851,7 +852,7 @@ if(WITH_GPU AND NOT WIN32)
test_collective_allgather_api test_collective_allgather_api
PROPERTIES LABELS "RUN_TYPE=DIST") PROPERTIES LABELS "RUN_TYPE=DIST")
endif() endif()
if(WITH_GPU) if(WITH_GPU OR WITH_ROCM)
set_tests_properties(test_imperative_auto_mixed_precision PROPERTIES TIMEOUT 120) set_tests_properties(test_imperative_auto_mixed_precision PROPERTIES TIMEOUT 120)
set_tests_properties(test_parallel_dygraph_sync_batch_norm PROPERTIES TIMEOUT 120) set_tests_properties(test_parallel_dygraph_sync_batch_norm PROPERTIES TIMEOUT 120)
set_tests_properties(test_rank_attention_op PROPERTIES TIMEOUT 120) set_tests_properties(test_rank_attention_op PROPERTIES TIMEOUT 120)
......
file(GLOB TEST_IR_PASSES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py") file(GLOB TEST_IR_PASSES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py")
string(REPLACE ".py" "" TEST_IR_PASSES "${TEST_IR_PASSES}") string(REPLACE ".py" "" TEST_IR_PASSES "${TEST_IR_PASSES}")
if(NOT WITH_GPU OR WIN32 OR APPLE) if(((NOT WITH_GPU) AND (NOT WITH_ROCM)) OR WIN32 OR APPLE)
LIST(REMOVE_ITEM TEST_IR_PASSES test_ir_fusion_group_pass) LIST(REMOVE_ITEM TEST_IR_PASSES test_ir_fusion_group_pass)
endif() endif()
......
...@@ -55,7 +55,7 @@ function test_launch_ps_heter(){ ...@@ -55,7 +55,7 @@ function test_launch_ps_heter(){
fi fi
} }
if [[ ${WITH_GPU} == "OFF" ]]; then if [[ ${WITH_GPU} == "OFF" && ("${WITH_ROCM}x" == "x" || ${WITH_ROCM} == "OFF") ]]; then
echo "in cpu test mode" echo "in cpu test mode"
test_launch_ps test_launch_ps
exit 0 exit 0
......
...@@ -19,7 +19,7 @@ function(py_dist_test TARGET_NAME) ...@@ -19,7 +19,7 @@ function(py_dist_test TARGET_NAME)
set(multiValueArgs SRCS DEPS ARGS ENVS) set(multiValueArgs SRCS DEPS ARGS ENVS)
cmake_parse_arguments(py_dist_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) cmake_parse_arguments(py_dist_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
if(WITH_COVERAGE AND WITH_GPU AND WITH_NCCL AND NOT WIN32) if(WITH_COVERAGE AND (WITH_GPU OR WITH_ROCM) AND (WITH_NCCL OR WITH_RCCL) AND NOT WIN32)
add_test(NAME ${TARGET_NAME} add_test(NAME ${TARGET_NAME}
COMMAND ${CMAKE_COMMAND} -E env FLAGS_init_allocated_mem=true FLAGS_cudnn_deterministic=true COMMAND ${CMAKE_COMMAND} -E env FLAGS_init_allocated_mem=true FLAGS_cudnn_deterministic=true
FLAGS_cpu_deterministic=true NCCL_P2P_DISABLE=1 NCCL_SHM_DISABLE=1 FLAGS_cpu_deterministic=true NCCL_P2P_DISABLE=1 NCCL_SHM_DISABLE=1
......
...@@ -401,7 +401,7 @@ headers = ( ...@@ -401,7 +401,7 @@ headers = (
if '${WITH_MKLDNN}' == 'ON': if '${WITH_MKLDNN}' == 'ON':
headers += list(find_files('*', '${MKLDNN_INSTALL_DIR}/include')) # mkldnn headers += list(find_files('*', '${MKLDNN_INSTALL_DIR}/include')) # mkldnn
if '${WITH_GPU}' == 'ON': if '${WITH_GPU}' == 'ON' or '${WITH_ROCM}' == 'ON':
headers += list(find_files('*.pb', '${cudaerror_INCLUDE_DIR}')) # errorMessage.pb for errormessage headers += list(find_files('*.pb', '${cudaerror_INCLUDE_DIR}')) # errorMessage.pb for errormessage
class InstallCommand(InstallCommandBase): class InstallCommand(InstallCommandBase):
...@@ -462,7 +462,7 @@ class InstallHeaders(Command): ...@@ -462,7 +462,7 @@ class InstallHeaders(Command):
def run(self): def run(self):
# only copy third_party/cudaErrorMessage.pb for cudaErrorMessage on mac or windows # only copy third_party/cudaErrorMessage.pb for cudaErrorMessage on mac or windows
if os.name == 'nt' or sys.platform == 'darwin': if os.name == 'nt' or sys.platform == 'darwin':
if '${WITH_GPU}' == 'ON': if '${WITH_GPU}' == 'ON' or '${WITH_ROCM}' == 'ON':
self.mkdir_and_copy_file('${cudaerror_INCLUDE_DIR}/cudaErrorMessage.pb') self.mkdir_and_copy_file('${cudaerror_INCLUDE_DIR}/cudaErrorMessage.pb')
return return
hdrs = self.distribution.headers hdrs = self.distribution.headers
......
# A image for building paddle binaries # A image for building paddle binaries
# Use rocm-terminal base image for both rocm environment # Use rocm-terminal base image for both rocm environment
# When you modify it, please be aware of rocm version # When you modify it, please be aware of rocm version
FROM ubuntu:18.04 #
# Build: ROCM 3.5.1
# cd Paddle/tools/dockerfile
# docker build -f Dockerfile.rocm \
# --build-arg ROCM_VERSION=3.5.1 \
# --build-arg CENTOS_VERSION=7.7.1908 \
# -t paddlepaddle/paddle-centos-rocm35-dev:latest .
#
# Build: ROCM 3.9.1
# cd Paddle/tools/dockerfile
# docker build -f Dockerfile.rocm \
# --build-arg ROCM_VERSION=3.9.1 \
# --build-arg CENTOS_VERSION=7.8.2003 \
# -t paddlepaddle/paddle-centos-rocm39-dev:latest .
#
# Run: ROCM 3.5.1
# docker run -it --device=/dev/kfd --device=/dev/dri \
# --security-opt seccomp=unconfined --group-add video \
# paddlepaddle/paddle-centos-rocm35-dev:latest /bin/bash
ARG CENTOS_VERSION
FROM centos:${CENTOS_VERSION}
ARG CENTOS_VERSION
MAINTAINER PaddlePaddle Authors <paddle-dev@baidu.com> MAINTAINER PaddlePaddle Authors <paddle-dev@baidu.com>
# ENV variables ENV LC_ALL en_US.UTF-8
ARG WITH_GPU ENV LANG en_US.UTF-8
ARG WITH_ROCM_PLATFORM ENV LANGUAGE en_US.UTF-8
ENV WITH_GPU=${WITH_GPU:-OFF} RUN yum install -y epel-release deltarpm sudo openssh-server openssl-devel gettext-devel sqlite-devel \
ENV WITH_ROCM_PLATFORM=${WITH_ROCM_PLATFORM:-ON} zlib-devel openssl-devel pcre-devel vim tk-devel tkinter libtool xz graphviz wget curl-devel \
make bzip2 git patch unzip bison yasm diffutils automake which file kernel-headers kernel-devel
ENV HOME /root
ENV DEBIAN_FRONTEND=noninteractive # Install devtoolset-7 for ROCM 3.5/3.9
RUN yum install -y yum-utils centos-release-scl && \
# Add bash enhancements yum-config-manager --enable rhel-server-rhscl-7-rpms && \
COPY paddle/scripts/docker/root/ /root/ yum-config-manager --enable rhel-7-server-rpms && \
yum-config-manager --enable rhel-7-server-optional-rpms && \
# Update Environment INSTALL_PKGS="devtoolset-7-binutils devtoolset-7-gcc devtoolset-7-gcc-c++ devtoolset-7-gcc-gfortran devtoolset-7-gdb" && \
RUN apt-get update && apt-get upgrade -y yum install -y --setopt=tsflags=nodocs $INSTALL_PKGS && \
RUN apt-get update && apt-get install -y apt-utils sudo rpm -V $INSTALL_PKGS && \
yum -y clean all --enablerepo='*'
# Update Timezone ENV PATH=/opt/rh/devtoolset-7/root/usr/bin:$PATH
RUN apt install tzdata && \ ENV LD_LIBRARY_PATH=/opt/rh/devtoolset-7/root/usr/lib64:/opt/rh/devtoolset-7/root/usr/lib:$LD_LIBRARY_PATH
ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime && echo 'Asia/Shanghai' > /etc/timezone && \ RUN echo "source scl_source enable devtoolset-7" > "/etc/profile.d/devtoolset-7.sh"
dpkg-reconfigure -f noninteractive tzdata
# cmake 3.16.0
# Location
RUN apt-get update && apt-get install -y locales && locale-gen en_US.UTF-8
ENV LANG="en_US.UTF-8"
ENV LANGUAGE="en_US.UTF-8"
ENV LC_ALL="en_US.UTF-8"
RUN apt-get update && \
apt-get install -y make cmake build-essential libssl-dev zlib1g-dev libbz2-dev \
libreadline-dev libsqlite3-dev wget curl llvm libncurses5-dev libncursesw5-dev \
xz-utils tk-dev libffi-dev liblzma-dev openmpi-bin openmpi-doc libopenmpi-dev \
git vim texinfo patchelf openssl unzip pciutils net-tools python-pip python-dev \
python-opencv python-matplotlib
# Downgrade gcc&&g++
WORKDIR /usr/bin
COPY tools/dockerfile/build_scripts /build_scripts
RUN bash /build_scripts/install_gcc.sh gcc82 && rm -rf /build_scripts
RUN cp gcc gcc.bak && cp g++ g++.bak && rm gcc && rm g++ && \
ln -s /usr/local/gcc-8.2/bin/gcc /usr/local/bin/gcc && \
ln -s /usr/local/gcc-8.2/bin/g++ /usr/local/bin/g++ && \
ln -s /usr/local/gcc-8.2/bin/gcc /usr/bin/gcc && \
ln -s /usr/local/gcc-8.2/bin/g++ /usr/bin/g++
ENV PATH=/usr/local/gcc-8.2/bin:$PATH
# install cmake
WORKDIR /opt WORKDIR /opt
RUN wget -q https://cmake.org/files/v3.16/cmake-3.16.0-Linux-x86_64.tar.gz && tar -zxf cmake-3.16.0-Linux-x86_64.tar.gz && rm cmake-3.16.0-Linux-x86_64.tar.gz RUN wget -q https://cmake.org/files/v3.16/cmake-3.16.0-Linux-x86_64.tar.gz && \
ENV PATH=/opt/cmake-3.16.0-Linux-x86_64/bin:$PATH tar -zxvf cmake-3.16.0-Linux-x86_64.tar.gz && rm cmake-3.16.0-Linux-x86_64.tar.gz && \
RUN echo "export PATH=/opt/cmake-3.16.0-Linux-x86_64/bin:\${PATH}" >> ~/.bashrc mv cmake-3.16.0-Linux-x86_64 cmake-3.16
ENV PATH=/opt/cmake-3.16/bin:${PATH}
# ROCM
ARG ROCM_VERSION
RUN yum install -y kmod wget openblas-devel epel-release
RUN echo "[ROCm]" > /etc/yum.repos.d/rocm.repo && \
echo "name=ROCm" >> /etc/yum.repos.d/rocm.repo && \
echo "baseurl=http://repo.radeon.com/rocm/yum/${ROCM_VERSION}" >> /etc/yum.repos.d/rocm.repo && \
echo "enabled=1" >> /etc/yum.repos.d/rocm.repo && \
echo "gpgcheck=0" >> /etc/yum.repos.d/rocm.repo
RUN yum install -y rocm-dev rocm-utils rocfft miopen-hip rocblas hipsparse rocrand rccl hipcub rocthrust rocprofiler-dev roctracer-dev
# fix rocthrust
RUN sed -i '21 a #include <thrust/system/hip/config.h>' /opt/rocm/include/thrust/system/hip/detail/error.inl
# git 2.17.1
RUN cd /opt && wget -q https://paddle-ci.gz.bcebos.com/git-2.17.1.tar.gz && \
tar -xvf git-2.17.1.tar.gz && \
cd git-2.17.1 && \
./configure --with-openssl --prefix=/usr/local && \
make -j8 && make install && \
cd .. && rm -rf git-2.17.1.tar.gz && rm -rf git-2.17.1
ENV GOROOT=/usr/local/go
ENV GOPATH=/root/gopath
ENV PATH=${GOROOT}/bin:${GOPATH}/bin:${PATH}
# Install Go and glide # go 1.8.1
RUN wget -qO- https://paddle-ci.cdn.bcebos.com/go1.8.1.linux-amd64.tar.gz | \ RUN wget --no-check-certificate -qO- https://storage.googleapis.com/golang/go1.8.1.linux-amd64.tar.gz | \
tar -xz -C /usr/local && \ tar -xz -C /usr/local && \
mkdir /root/gopath && \ mkdir /root/gopath && \
mkdir /root/gopath/bin && \ mkdir /root/gopath/bin && \
mkdir /root/gopath/src mkdir /root/gopath/src
ENV GOROOT=/usr/local/go GOPATH=/root/gopath
# should not be in the same line with GOROOT definition, otherwise docker build could not find GOROOT. # protobuf 3.6.1
ENV PATH=${PATH}:${GOROOT}/bin:${GOPATH}/bin RUN cd /opt && wget -q --no-check-certificate https://paddle-ci.cdn.bcebos.com/protobuf-cpp-3.6.1.tar.gz && \
RUN echo "GOROOT=/usr/local/go" >> ~/.bashrc && \ tar xzf protobuf-cpp-3.6.1.tar.gz && \
echo "GOPATH=/root/gopath" >> ~/.bashrc && \ cd protobuf-3.6.1 && ./configure && make -j4 && make install && \
echo "export PATH=\${PATH}:\${GOROOT}/bin:\${GOPATH}/bin" >> ~/.bashrc cd .. && rm -f protobuf-cpp-3.6.1.tar.gz && rm -rf protobuf-3.6.1
# install glide # conda
RUN curl -s -q https://glide.sh/get | sh RUN cd /opt && wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && chmod +x Miniconda3-latest-Linux-x86_64.sh
RUN mkdir /opt/conda && ./Miniconda3-latest-Linux-x86_64.sh -b -f -p "/opt/conda" && rm -rf Miniconda3-latest-Linux-x86_64.sh
# git credential to skip password typing ENV PATH=/opt/conda/bin:${PATH}
RUN git config --global credential.helper store RUN conda init bash && \
conda create -n python2.7 python=2.7 && \
# Fix locales to en_US.UTF-8 conda create -n python3.7 python=3.7
RUN localedef -i en_US -f UTF-8 en_US.UTF-8
# install paddle requirement
RUN apt-get update && \ RUN wget https://raw.githubusercontent.com/PaddlePaddle/Paddle/develop/python/requirements.txt -O /root/requirements.txt
apt-get install -y python2.7 python2.7-dev \ RUN /opt/conda/bin/pip install -r /root/requirements.txt && \
python3.6 python3.6-dev \ /opt/conda/envs/python2.7/bin/pip install -r /root/requirements.txt && \
python3.7 python3.7-dev \ /opt/conda/envs/python3.7/bin/pip install -r /root/requirements.txt && \
python3.8 python3.8-dev \ rm -rf /root/requirements.txt
python3-distutils && \
curl https://bootstrap.pypa.io/get-pip.py -o - | python2.7 && \ RUN wget https://raw.githubusercontent.com/PaddlePaddle/Paddle/develop/python/unittest_py/requirements.txt -O /root/requirements.txt
curl https://bootstrap.pypa.io/get-pip.py -o - | python3.6 && \ RUN /opt/conda/bin/pip install -r /root/requirements.txt && \
curl https://bootstrap.pypa.io/get-pip.py -o - | python3.7 && \ /opt/conda/envs/python2.7/bin/pip install -r /root/requirements.txt && \
curl https://bootstrap.pypa.io/get-pip.py -o - | python3.8 && \ /opt/conda/envs/python3.7/bin/pip install -r /root/requirements.txt && \
rm /usr/bin/python && ln -s /usr/bin/python2.7 /usr/bin/python && \ rm -rf /root/requirements.txt
rm /usr/bin/python3 && ln -s /usr/bin/python3.7 /usr/bin/python3 && \
rm /usr/local/bin/pip && ln -s /usr/local/bin/pip2.7 /usr/local/bin/pip && \ # configure ssh
rm /usr/local/bin/pip3 && ln -s /usr/local/bin/pip3.7 /usr/local/bin/pip3 RUN sed -i "s/^#PermitRootLogin/PermitRootLogin/" /etc/ssh/sshd_config && \
sed -i "s/^#PubkeyAuthentication/PubkeyAuthentication/" /etc/ssh/sshd_config && \
RUN pip3 --no-cache-dir install pre-commit==1.10.4 ipython==5.3.0 && \ sed -i "s/^#RSAAuthentication/RSAAuthentication/" /etc/ssh/sshd_config
pip3 --no-cache-dir install ipykernel==4.6.0 wheel && \
pip3.6 --no-cache-dir install pre-commit==1.10.4 ipython==5.3.0 && \ # swig 2.0.12
pip3.6 --no-cache-dir install ipykernel==4.6.0 wheel && \ RUN wget -O /opt/swig-2.0.12.tar.gz https://sourceforge.net/projects/swig/files/swig/swig-2.0.12/swig-2.0.12.tar.gz/download && \
pip3.8 --no-cache-dir install pre-commit==1.10.4 ipython==5.3.0 && \ cd /opt && tar xzf swig-2.0.12.tar.gz && cd /opt/swig-2.0.12 && ./configure && make && make install && \
pip3.8 --no-cache-dir install ipykernel==4.6.0 wheel && \ cd /opt && rm swig-2.0.12.tar.gz && rm -rf swig-2.0.12
pip --no-cache-dir install pre-commit==1.10.4 ipython==5.3.0 && \
pip --no-cache-dir install ipykernel==4.6.0 wheel
#For docstring checker
RUN pip3 --no-cache-dir install pylint pytest astroid isort && \
pip3.6 --no-cache-dir install pylint pytest astroid isort && \
pip3.8 --no-cache-dir install pylint pytest astroid isort && \
pip --no-cache-dir install pylint pytest astroid isort
COPY ./python/requirements.txt /root/
RUN pip3 --no-cache-dir install -r /root/requirements.txt && \
pip3.6 --no-cache-dir install -r /root/requirements.txt && \
pip3.8 --no-cache-dir install -r /root/requirements.txt && \
pip --no-cache-dir install -r /root/requirements.txt
RUN apt-get install libprotobuf-dev -y
# Configure OpenSSH server. c.f. https://docs.docker.com/engine/examples/running_ssh_service
RUN apt-get update && apt-get install -y openssh-server
RUN mkdir /var/run/sshd && echo 'root:root' | chpasswd && \
sed -ri 's/^#?PermitRootLogin\s+.*/PermitRootLogin yes/' /etc/ssh/sshd_config && \
sed -ri 's/UsePAM yes/#UsePAM yes/g' /etc/ssh/sshd_config
CMD source ~/.bashrc
# ccache 3.7.9 # ccache 3.7.9
RUN wget https://paddle-ci.gz.bcebos.com/ccache-3.7.9.tar.gz && \ RUN cd /opt && wget https://paddle-ci.gz.bcebos.com/ccache-3.7.9.tar.gz && \
tar xf ccache-3.7.9.tar.gz && mkdir /usr/local/ccache-3.7.9 && cd ccache-3.7.9 && \ tar xf ccache-3.7.9.tar.gz && mkdir /usr/local/ccache-3.7.9 && cd ccache-3.7.9 && \
./configure -prefix=/usr/local/ccache-3.7.9 && \ ./configure -prefix=/usr/local/ccache-3.7.9 && \
make -j8 && make install && \ make -j8 && make install && \
ln -s /usr/local/ccache-3.7.9/bin/ccache /usr/local/bin/ccache ln -s /usr/local/ccache-3.7.9/bin/ccache /usr/local/bin/ccache && \
cd .. && rm -rf ccache-3.7.9.tar.gz && rm -rf ccache-3.7.9
# Install ROCM Package
RUN wget -q -O - https://repo.radeon.com/rocm/rocm.gpg.key | apt-key add -
RUN echo 'deb [arch=amd64] https://repo.radeon.com/rocm/apt/<rocm_repo_version>/ xenial main' | tee /etc/apt/sources.list.d/rocm.list
RUN apt-get update && apt install rocm-dkms -y
# Install ROCM Libs
RUN apt-get update && apt-get install rocblas miopen-hip rocrand rccl -y
# rocPRIM
RUN wget https://github.com/ROCmSoftwarePlatform/rocPRIM/archive/rocm-<rocprim_version>.tar.gz && tar zxf rocm-<rocprim_version>.tar.gz && rm -rf rocm-<rocprim_version>.tar.gz && \
cd rocPRIM-rocm-<rocprim_version> && mkdir build && cd build && \
CXX=/opt/rocm/hip/bin/hipcc cmake .. && \
make -j8 && make install && \
cd .. && rm -rf rocPRIM-rocm-<rocprim_version>/
# rocThrust
RUN wget https://github.com/ROCmSoftwarePlatform/rocThrust/archive/rocm-<rocthrust_version>.tar.gz && tar zxf rocm-<rocthrust_version>.tar.gz && rm -rf rocm-<rocthrust_version>.tar.gz && \
cd rocThrust-rocm-<rocthrust_version> && mkdir build && cd build && \
CXX=/opt/rocm/hip/bin/hipcc cmake .. && \
make -j8 && make install && \
cd .. && rm -rf rocThrust-rocm-<rocthrust_version>/
# hipCUB
RUN wget https://github.com/ROCmSoftwarePlatform/hipCUB/archive/rocm-<hipcub_version>.tar.gz && tar zxf rocm-<hipcub_version>.tar.gz && rm -rf rocm-<hipcub_version>.tar.gz && \
cd hipCUB-rocm-<hipcub_version> && mkdir build && cd build && \
CXX=/opt/rocm/hip/bin/hipcc cmake .. && \
make -j8 && make install && \
cd .. && rm -rf hipCUB-rocm-<hipcub_version>/
ENV PATH=/opt/rocm/bin:$PATH
RUN echo "export PATH=/opt/rocm/bin:\${PATH}" >> ~/.bashrc
EXPOSE 22 EXPOSE 22
...@@ -65,7 +65,7 @@ yum -y install bzip2 make git patch unzip bison yasm diffutils \ ...@@ -65,7 +65,7 @@ yum -y install bzip2 make git patch unzip bison yasm diffutils \
wget -q https://cmake.org/files/v3.16/cmake-3.16.0.tar.gz && tar xzf cmake-3.16.0.tar.gz && \ wget -q https://cmake.org/files/v3.16/cmake-3.16.0.tar.gz && tar xzf cmake-3.16.0.tar.gz && \
cd cmake-3.16.0 && ./bootstrap && \ cd cmake-3.16.0 && ./bootstrap && \
make -j8 && make install && cd .. && rm cmake-3.16.0.tar.gz make -j8 && make install && cd .. && rm cmake-3.16.0.tar.gz && rm -rf cmake-3.16.0
# Install newest autoconf # Install newest autoconf
build_autoconf $AUTOCONF_ROOT $AUTOCONF_HASH build_autoconf $AUTOCONF_ROOT $AUTOCONF_HASH
...@@ -160,3 +160,4 @@ LD_LIBRARY_PATH="${ORIGINAL_LD_LIBRARY_PATH}" ...@@ -160,3 +160,4 @@ LD_LIBRARY_PATH="${ORIGINAL_LD_LIBRARY_PATH}"
wget https://ftp.gnu.org/gnu/binutils/binutils-2.27.tar.gz wget https://ftp.gnu.org/gnu/binutils/binutils-2.27.tar.gz
tar xzf binutils-2.27.tar.gz && cd binutils-2.27 tar xzf binutils-2.27.tar.gz && cd binutils-2.27
./configure --prefix=/opt/rh/devtoolset-2/root/usr/ --enable-64-bit-archive && make -j `nproc` && make install ./configure --prefix=/opt/rh/devtoolset-2/root/usr/ --enable-64-bit-archive && make -j `nproc` && make install
cd .. && rm binutils-2.27.tar.gz && rm -rf binutils-2.27
#!/bin/bash
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
function rocm() {
# ROCM 3.3 - not work as rocthrust build fail without AMD GPU
# sed 's#<rocm_repo_version>#3.3#g' Dockerfile.rocm >test/rocm33.dockerfile
# sed -ri 's#<rocprim_version>#3.3.0#g' test/rocm33.dockerfile
# sed -ri 's#<rocthrust_version>#3.3.0#g' test/rocm33.dockerfile
# sed -ri 's#<hipcub_version>#3.3.0#g' test/rocm33.dockerfile
# ROCM 3.5
sed 's#<rocm_repo_version>#3.5.1#g' Dockerfile.rocm >test/rocm35.dockerfile
sed -ri 's#<rocprim_version>#3.5.1#g' test/rocm35.dockerfile
sed -ri 's#<rocthrust_version>#3.5.0#g' test/rocm35.dockerfile
sed -ri 's#<hipcub_version>#3.5.0#g' test/rocm35.dockerfile
# ROCM 3.9
sed 's#<rocm_repo_version>#3.9.1#g' Dockerfile.rocm >test/rocm39.dockerfile
sed -ri 's#<rocprim_version>#3.9.0#g' test/rocm39.dockerfile
sed -ri 's#<rocthrust_version>#3.9.0#g' test/rocm39.dockerfile
sed -ri 's#<hipcub_version>#3.9.0#g' test/rocm39.dockerfile
}
function main() {
if [ ! -d "test" ];then
mkdir test
fi
rocm
}
main
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册