diff --git a/CMakeLists.txt b/CMakeLists.txt index 0ec65bac84b0b0d89123473a8941f80c90f1b339..1e11f86d0ee836f65e69c8398fb26c3b6a1070f6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -36,6 +36,7 @@ include(simd) ################################ Configurations ####################################### option(WITH_GPU "Compile PaddlePaddle with NVIDIA GPU" ${CUDA_FOUND}) +option(WITH_AMD_GPU "Compile PaddlePaddle with AMD GPU" OFF) option(WITH_AVX "Compile PaddlePaddle with AVX intrinsics" ${AVX_FOUND}) option(WITH_MKL "Compile PaddlePaddle with MKL support." ${AVX_FOUND}) option(WITH_DSO "Compile PaddlePaddle with dynamic linked CUDA" ON) @@ -180,6 +181,11 @@ if(WITH_GPU) include(cuda) endif(WITH_GPU) +if(WITH_AMD_GPU) + find_package(HIP) + include(hip) +endif(WITH_AMD_GPU) + if(WITH_MKLML) list(APPEND EXTERNAL_LIBS ${MKLML_IOMP_LIB}) endif() diff --git a/cmake/configure.cmake b/cmake/configure.cmake index 0f76f55270592c5625a9624b33f4c0f82efdc627..f726405c4773994f6ca6509e5218750805b03995 100644 --- a/cmake/configure.cmake +++ b/cmake/configure.cmake @@ -57,11 +57,7 @@ if(NOT WITH_GOLANG) add_definitions(-DPADDLE_WITHOUT_GOLANG) endif(NOT WITH_GOLANG) -if(NOT WITH_GPU) - add_definitions(-DHPPL_STUB_FUNC) - - list(APPEND CMAKE_CXX_SOURCE_FILE_EXTENSIONS cu) -else() +if(WITH_GPU) add_definitions(-DPADDLE_WITH_CUDA) FIND_PACKAGE(CUDA REQUIRED) @@ -84,7 +80,14 @@ else() # Include cuda and cudnn include_directories(${CUDNN_INCLUDE_DIR}) include_directories(${CUDA_TOOLKIT_INCLUDE}) -endif(NOT WITH_GPU) +elseif(WITH_AMD_GPU) + add_definitions(-DPADDLE_WITH_HIP) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -D__HIP_PLATFORM_HCC__") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__HIP_PLATFORM_HCC__") +else() + add_definitions(-DHPPL_STUB_FUNC) + list(APPEND CMAKE_CXX_SOURCE_FILE_EXTENSIONS cu) +endif() if (WITH_MKLML AND MKLML_IOMP_LIB) message(STATUS "Enable Intel OpenMP with ${MKLML_IOMP_LIB}") diff --git a/cmake/external/eigen.cmake b/cmake/external/eigen.cmake index 6a701e076c95372f903a09d35d4208ee73bd584c..73d70c34dce8bedd9e62519c207e5be3dcf7dba3 100644 --- a/cmake/external/eigen.cmake +++ b/cmake/external/eigen.cmake @@ -4,18 +4,33 @@ SET(EIGEN_SOURCE_DIR ${THIRD_PARTY_PATH}/eigen3) SET(EIGEN_INCLUDE_DIR ${EIGEN_SOURCE_DIR}/src/extern_eigen3) INCLUDE_DIRECTORIES(${EIGEN_INCLUDE_DIR}) -ExternalProject_Add( - extern_eigen3 - ${EXTERNAL_PROJECT_LOG_ARGS} - GIT_REPOSITORY "https://github.com/RLovelett/eigen.git" - GIT_TAG 70661066beef694cadf6c304d0d07e0758825c10 - PREFIX ${EIGEN_SOURCE_DIR} - UPDATE_COMMAND "" - CONFIGURE_COMMAND "" - BUILD_COMMAND "" - INSTALL_COMMAND "" - TEST_COMMAND "" -) +if(WITH_AMD_GPU) + ExternalProject_Add( + extern_eigen3 + ${EXTERNAL_PROJECT_LOG_ARGS} + GIT_REPOSITORY "https://github.com/sabreshao/hipeigen.git" + GIT_TAG 0cba03ff9f8f9f70bbd92ac5857b031aa8fed6f9 + PREFIX ${EIGEN_SOURCE_DIR} + UPDATE_COMMAND "" + CONFIGURE_COMMAND "" + BUILD_COMMAND "" + INSTALL_COMMAND "" + TEST_COMMAND "" + ) +else() + ExternalProject_Add( + extern_eigen3 + ${EXTERNAL_PROJECT_LOG_ARGS} + GIT_REPOSITORY "https://github.com/RLovelett/eigen.git" + GIT_TAG 70661066beef694cadf6c304d0d07e0758825c10 + PREFIX ${EIGEN_SOURCE_DIR} + UPDATE_COMMAND "" + CONFIGURE_COMMAND "" + BUILD_COMMAND "" + INSTALL_COMMAND "" + TEST_COMMAND "" + ) +endif() if (${CMAKE_VERSION} VERSION_LESS "3.3.0") set(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/eigen3_dummy.c) diff --git a/cmake/generic.cmake b/cmake/generic.cmake index 471e3929069d0d28105404b4f0f6baa303faf0e0..c749c97f13649fe8432091414b56f7d0ea8ace8b 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -317,6 +317,82 @@ function(nv_test TARGET_NAME) endif() endfunction(nv_test) +function(hip_library TARGET_NAME) + if (WITH_AMD_GPU) + set(options STATIC static SHARED shared) + set(oneValueArgs "") + set(multiValueArgs SRCS DEPS) + cmake_parse_arguments(hip_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + set(_sources ${hip_library_SRCS}) + HIP_PREPARE_TARGET_COMMANDS(${TARGET_NAME} OBJ _generated_files _source_files ${_sources} HIPCC_OPTIONS ${_hipcc_options} HCC_OPTIONS ${_hcc_options} NVCC_OPTIONS ${_nvcc_options}) + if(_source_files) + list(REMOVE_ITEM _sources ${_source_files}) + endif() + if(hip_library_SRCS) + if (hip_library_SHARED OR hip_library_shared) # build *.so + add_library(${TARGET_NAME} SHARED ${_cmake_options} ${_generated_files} ${_sources}) + set_target_properties(${TARGET_NAME} PROPERTIES LINKER_LANGUAGE HIP) + else() + add_library(${TARGET_NAME} STATIC ${_cmake_options} ${_generated_files} ${_sources}) + set_target_properties(${TARGET_NAME} PROPERTIES LINKER_LANGUAGE CXX) + target_link_libraries(${TARGET_NAME} /opt/rocm/hip/lib/libhip_hcc.so /opt/rocm/hip/lib/libhip_device.a) + find_fluid_modules(${TARGET_NAME}) + endif() + if (hip_library_DEPS) + add_dependencies(${TARGET_NAME} ${hip_library_DEPS}) + target_link_libraries(${TARGET_NAME} ${hip_library_DEPS}) + endif() + # cpplint code style + foreach(source_file ${hip_library_SRCS}) + string(REGEX REPLACE "\\.[^.]*$" "" source ${source_file}) + if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${source}.h) + list(APPEND hip_library_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/${source}.h) + endif() + endforeach() + add_style_check_target(${TARGET_NAME} ${hip_library_SRCS} ${hip_library_HEADERS}) + else(hip_library_SRCS) + if (hip_library_DEPS) + merge_static_libs(${TARGET_NAME} ${hip_library_DEPS}) + else() + message(FATAL "Please specify source file or library in nv_library.") + endif() + endif(hip_library_SRCS) + endif() +endfunction(hip_library) + +function(hip_binary TARGET_NAME) + if (WITH_AMD_GPU) + set(options "") + set(oneValueArgs "") + set(multiValueArgs SRCS DEPS) + cmake_parse_arguments(hip_binary "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + hip_add_executable(${TARGET_NAME} ${hip_binary_SRCS}) + if(hip_binary_DEPS) + target_link_libraries(${TARGET_NAME} ${hip_binary_DEPS}) + add_dependencies(${TARGET_NAME} ${hip_binary_DEPS}) + endif() + endif() +endfunction(hip_binary) + +function(hip_test TARGET_NAME) + if (WITH_AMD_GPU AND WITH_TESTING) + set(options "") + set(oneValueArgs "") + set(multiValueArgs SRCS DEPS) + cmake_parse_arguments(hip_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + set(_sources ${hip_test_SRCS}) + HIP_PREPARE_TARGET_COMMANDS(${TARGET_NAME} OBJ _generated_files _source_files ${_sources} HIPCC_OPTIONS ${_hipcc_options} HCC_OPTIONS ${_hcc_options} NVCC_OPTIONS ${_nvcc_options}) + if(_source_files) + list(REMOVE_ITEM _sources ${_source_files}) + endif() + add_executable(${TARGET_NAME} ${_cmake_options} ${_generated_files} ${_sources}) + set_target_properties(${TARGET_NAME} PROPERTIES LINKER_LANGUAGE HIP) + target_link_libraries(${TARGET_NAME} ${hip_test_DEPS} paddle_gtest_main paddle_memory gtest gflags) + add_dependencies(${TARGET_NAME} ${hip_test_DEPS} paddle_gtest_main paddle_memory gtest gflags) + add_test(${TARGET_NAME} ${TARGET_NAME}) + endif() +endfunction(hip_test) + function(go_library TARGET_NAME) set(options STATIC static SHARED shared) set(oneValueArgs "") diff --git a/cmake/hip.cmake b/cmake/hip.cmake new file mode 100644 index 0000000000000000000000000000000000000000..bfe491bd6b7602959d3dd60bd06c67993593cc9b --- /dev/null +++ b/cmake/hip.cmake @@ -0,0 +1,43 @@ +if(NOT WITH_AMD_GPU) + return() +endif() + +include_directories("/opt/rocm/include") +include_directories("/opt/rocm/hipblas/include") +include_directories("/opt/rocm/hiprand/include") +include_directories("/opt/rocm/rocrand/include") +include_directories("/opt/rocm/rccl/include") +include_directories("/opt/rocm/thrust") + +list(APPEND EXTERNAL_LIBS "-L/opt/rocm/lib/ -lhip_hcc") + +set(HIP_HCC_FLAGS "${HIP_HCC_FLAGS} -fPIC -DPADDLE_WITH_HIP -std=c++14" ) + +if(WITH_DSO) + set(HIP_HCC_FLAGS "${HIP_HCC_FLAGS} -DPADDLE_USE_DSO") +endif(WITH_DSO) + +if(WITH_DOUBLE) + set(HIP_HCC_FLAGS "${HIP_HCC_FLAGS} -DPADDLE_TYPE_DOUBLE") +endif(WITH_DOUBLE) + +if(WITH_TESTING) + set(HIP_HCC_FLAGS "${HIP_HCC_FLAGS} -DPADDLE_WITH_TESTING") +endif(WITH_TESTING) + +if(CMAKE_BUILD_TYPE STREQUAL "Debug") + list(APPEND HIP_HCC_FLAGS ${CMAKE_CXX_FLAGS_DEBUG}) +elseif(CMAKE_BUILD_TYPE STREQUAL "RelWithDebInfo") + list(APPEND HIP_HCC_FLAGS ${CMAKE_CXX_FLAGS_RELWITHDEBINFO}) +elseif(CMAKE_BUILD_TYPE STREQUAL "MinSizeRel") + list(APPEND HIP_HCC_FLAGS ${CMAKE_CXX_FLAGS_MINSIZEREL}) +endif() + +if("x${HCC_HOME}" STREQUAL "x") + set(HCC_HOME "/opt/rocm/hcc") +endif() + +set(CMAKE_HIP_LINK_EXECUTABLE "${HIP_HIPCC_CMAKE_LINKER_HELPER} ${HCC_HOME} -o ") +set(CMAKE_HIP_CREATE_SHARED_LIBRARY "${HIP_HIPCC_CMAKE_LINKER_HELPER} ${HCC_HOME} -o -shared") +set(CMAKE_HIP_CREATE_SHARED_MODULE "${HIP_HIPCC_CMAKE_LINKER_HELPER} ${HCC_HOME} -o -shared") + diff --git a/paddle/fluid/operators/CMakeLists.txt b/paddle/fluid/operators/CMakeLists.txt index d30124d4a3b89b802a4abaae07a33b76526f163d..c0245379ac481d922ee936c75bfc6b63a81be5fd 100644 --- a/paddle/fluid/operators/CMakeLists.txt +++ b/paddle/fluid/operators/CMakeLists.txt @@ -12,6 +12,8 @@ function(op_library TARGET) set(OP_LIBRARY ${TARGET} ${OP_LIBRARY} PARENT_SCOPE) set(cc_srcs) set(cu_srcs) + set(hip_cu_srcs) + set(miopen_hip_cc_srcs) set(cu_cc_srcs) set(cudnn_cu_cc_srcs) set(CUDNN_FILE) @@ -36,10 +38,19 @@ function(op_library TARGET) if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cu) list(APPEND cu_srcs ${TARGET}.cu) endif() + if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.hip.cu) + list(APPEND hip_cu_srcs ${TARGET}.hip.cu) + endif() string(REPLACE "_op" "_cudnn_op" CUDNN_FILE "${TARGET}") if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${CUDNN_FILE}.cu.cc) list(APPEND cudnn_cu_cc_srcs ${CUDNN_FILE}.cu.cc) endif() + if(WITH_AMD_GPU) + string(REPLACE "_op" "_miopen_op" MIOPEN_FILE "${TARGET}") + if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${MIOPEN_FILE}.hip.cc) + list(APPEND miopen_hip_cc_srcs ${MIOPEN_FILE}.hip.cc) + endif() + endif() if(WITH_MKLDNN) string(REPLACE "_op" "_mkldnn_op" MKLDNN_FILE "${TARGET}") if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${MKLDNN_FILE}.cc) @@ -48,10 +59,14 @@ function(op_library TARGET) endif() else() foreach(src ${op_library_SRCS}) - if (${src} MATCHES ".*\\.cu$") + if (${src} MATCHES ".*\\.hip.cu$") + list(APPEND hip_cu_srcs ${src}) + elseif (${src} MATCHES ".*\\.cu$") list(APPEND cu_srcs ${src}) elseif(${src} MATCHES ".*_cudnn_op.cu.cc$") list(APPEND cudnn_cu_cc_srcs ${src}) + elseif(WITH_AMD_GPU AND ${src} MATCHES ".*_miopen_op.hip.cc$") + list(APPEND miopen_hip_cc_srcs ${src}) elseif(WITH_MKLDNN AND ${src} MATCHES ".*_mkldnn_op.cc$") list(APPEND mkldnn_cc_srcs ${src}) elseif(${src} MATCHES ".*\\.cu.cc$") @@ -76,6 +91,9 @@ function(op_library TARGET) if (WITH_GPU) nv_library(${TARGET} SRCS ${cc_srcs} ${cu_cc_srcs} ${cudnn_cu_cc_srcs} ${mkldnn_cc_srcs} ${cu_srcs} DEPS ${op_library_DEPS} ${op_common_deps}) + elseif (WITH_AMD_GPU) + hip_library(${TARGET} SRCS ${cc_srcs} ${hip_cu_srcs} ${miopen_hip_cc_srcs} ${mkldnn_cc_srcs} DEPS ${op_library_DEPS} + ${op_common_deps}) else() cc_library(${TARGET} SRCS ${cc_srcs} ${mkldnn_cc_srcs} DEPS ${op_library_DEPS} ${op_common_deps}) @@ -88,7 +106,7 @@ function(op_library TARGET) endif() endforeach() - # The registration of USE_OP, please refer to paddle/framework/op_registry.h. + # The registration of USE_OP, please refer to paddle/fluid/framework/op_registry.h. # Note that it's enough to just adding one operator to pybind in a *_op.cc file. # And for detail pybind information, please see generated paddle/pybind/pybind.h. file(READ ${TARGET}.cc TARGET_CONTENT) @@ -114,7 +132,10 @@ function(op_library TARGET) list(LENGTH cu_srcs cu_srcs_len) list(LENGTH cu_cc_srcs cu_cc_srcs_len) list(LENGTH mkldnn_cc_srcs mkldnn_cc_srcs_len) - if (${pybind_flag} EQUAL 0 AND ${mkldnn_cc_srcs_len} EQUAL 0 AND ${cu_srcs_len} EQUAL 0 AND ${cu_cc_srcs_len} EQUAL 0) + list(LENGTH hip_cu_srcs hip_cu_srcs_len) + list(LENGTH miopen_hip_cc_srcs miopen_hip_cc_srcs_len) + if (${pybind_flag} EQUAL 0 AND ${mkldnn_cc_srcs_len} EQUAL 0 AND ${cu_srcs_len} EQUAL 0 AND ${cu_cc_srcs_len} EQUAL 0 AND + ${hip_cu_srcs_len} EQUAL 0 AND ${miopen_hip_cc_srcs_len} EQUAL 0) file(APPEND ${pybind_file} "USE_CPU_ONLY_OP(${TARGET});\n") set(pybind_flag 1) endif() @@ -125,6 +146,11 @@ function(op_library TARGET) file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${TARGET}, CUDNN);\n") endif() + # pybind USE_OP_DEVICE_KERNEL for MIOPEN + if (WITH_AMD_GPU AND ${miopen_hip_cc_srcs_len} GREATER 0) + file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${TARGET}, MIOPEN);\n") + endif() + # pybind USE_OP_DEVICE_KERNEL for MKLDNN if (WITH_MKLDNN AND ${mkldnn_cc_srcs_len} GREATER 0) file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${TARGET}, MKLDNN);\n") diff --git a/paddle/fluid/operators/box_coder_op.cc b/paddle/fluid/operators/box_coder_op.cc index eccdd408a17a07a541480705242b137f8207c139..ec416f725e75fae57484751ee8a066c0b9da8a70 100644 --- a/paddle/fluid/operators/box_coder_op.cc +++ b/paddle/fluid/operators/box_coder_op.cc @@ -126,6 +126,7 @@ width and height. } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP_WITHOUT_GRADIENT(box_coder, ops::BoxCoderOp, ops::BoxCoderOpMaker); +REGISTER_OPERATOR(box_coder, ops::BoxCoderOp, ops::BoxCoderOpMaker, + paddle::framework::EmptyGradOpMaker); REGISTER_OP_CPU_KERNEL(box_coder, ops::BoxCoderKernel, ops::BoxCoderKernel); diff --git a/paddle/fluid/operators/detail/grpc_server.h b/paddle/fluid/operators/detail/grpc_server.h index 10e6dd45a901d36de4a6577db4da05551645eb73..f891c75dbc81a5cdb5274bbae84e9e85f42464fb 100644 --- a/paddle/fluid/operators/detail/grpc_server.h +++ b/paddle/fluid/operators/detail/grpc_server.h @@ -22,10 +22,9 @@ limitations under the License. */ #include "paddle/fluid/framework/selected_rows.h" #include "paddle/fluid/framework/var_type.h" #include "paddle/fluid/operators/detail/grpc_service.h" +#include "paddle/fluid/operators/detail/grpc_service.h" #include "paddle/fluid/operators/detail/send_recv.grpc.pb.h" #include "paddle/fluid/operators/detail/send_recv.pb.h" -#include "paddle/fluid/operators/detail/sendrecvop_utils.h" -#include "paddle/fluid/operators/detail/simple_block_queue.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/detail/test_serde.cc b/paddle/fluid/operators/detail/test_serde.cc index 494ac1d67948e4a1d1c2a05f4838f80a42620064..99c1577223c4fe8001b4ce651b10e9e1f0024296 100644 --- a/paddle/fluid/operators/detail/test_serde.cc +++ b/paddle/fluid/operators/detail/test_serde.cc @@ -194,24 +194,23 @@ void RunTestLodTensor(platform::Place place, int from_type = 0) { for (int i = 0; i < tensor_numel; ++i) EXPECT_FLOAT_EQ(tensor_data2[i], 31.9); } -TEST(LodTensor, GPU) { - platform::CUDAPlace place; +TEST(LodTensor, Run) { + platform::CPUPlace place; RunTestLodTensor(place); RunTestLodTensor(place, 1); -} - -TEST(LodTensor, CPU) { - platform::CPUPlace place; +#ifdef PADDLE_WITH_CUDA + platform::CUDAPlace place; RunTestLodTensor(place); RunTestLodTensor(place, 1); +#endif } -TEST(SelectedRows, CPU) { +TEST(SelectedRows, Run) { platform::CPUPlace place; RunSerdeTestSelectedRows(place); -} -TEST(SelectedRows, GPU) { +#ifdef PADDLE_WITH_CUDA platform::CUDAPlace place; RunSerdeTestSelectedRows(place); +#endif } diff --git a/paddle/fluid/operators/detection_map_op.cc b/paddle/fluid/operators/detection_map_op.cc index 73c84c2fe0155d21d7059938330e44fa3668c6df..93ef15b9332168a9c62abfd4d0827207173ece45 100644 --- a/paddle/fluid/operators/detection_map_op.cc +++ b/paddle/fluid/operators/detection_map_op.cc @@ -188,8 +188,8 @@ The general steps are as follows. First, calculate the true positive and } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP_WITHOUT_GRADIENT(detection_map, ops::DetectionMAPOp, - ops::DetectionMAPOpMaker); +REGISTER_OPERATOR(detection_map, ops::DetectionMAPOp, ops::DetectionMAPOpMaker, + paddle::framework::EmptyGradOpMaker); REGISTER_OP_CPU_KERNEL( detection_map, ops::DetectionMAPOpKernel, ops::DetectionMAPOpKernel); diff --git a/paddle/fluid/operators/iou_similarity_op.cc b/paddle/fluid/operators/iou_similarity_op.cc index ffbd7c7814c3fdec9fef0580ccd1ea3661ac0012..4b78ec510d1fb73592ee8af9a641622f4d713f8d 100755 --- a/paddle/fluid/operators/iou_similarity_op.cc +++ b/paddle/fluid/operators/iou_similarity_op.cc @@ -87,8 +87,9 @@ $$ } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP_WITHOUT_GRADIENT(iou_similarity, ops::IOUSimilarityOp, - ops::IOUSimilarityOpMaker); +REGISTER_OPERATOR(iou_similarity, ops::IOUSimilarityOp, + ops::IOUSimilarityOpMaker, + paddle::framework::EmptyGradOpMaker); REGISTER_OP_CPU_KERNEL( iou_similarity, diff --git a/paddle/fluid/operators/math/CMakeLists.txt b/paddle/fluid/operators/math/CMakeLists.txt index 547d081006f1c28ba73cb02d38e36bb612cea494..ee0e91132bce52998e9c45b37335618e4354e1cd 100644 --- a/paddle/fluid/operators/math/CMakeLists.txt +++ b/paddle/fluid/operators/math/CMakeLists.txt @@ -6,6 +6,7 @@ function(math_library TARGET) # But it handle split GPU/CPU code and link some common library. set(cc_srcs) set(cu_srcs) + set(hip_srcs) set(math_common_deps device_context framework_proto) set(multiValueArgs DEPS) cmake_parse_arguments(math_library "${options}" "${oneValueArgs}" @@ -17,10 +18,15 @@ function(math_library TARGET) if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cu) list(APPEND cu_srcs ${TARGET}.cu) endif() + if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.hip.cu) + list(APPEND hip_srcs ${TARGET}.hip.cu) + endif() list(LENGTH cc_srcs cc_srcs_len) if (WITH_GPU) nv_library(${TARGET} SRCS ${cc_srcs} ${cu_srcs} DEPS ${math_library_DEPS} ${math_common_deps}) + elseif (WITH_AMD_GPU) + hip_library(${TARGET} SRCS ${cc_srcs} ${hip_srcs} DEPS ${math_library_DEPS} ${math_common_deps}) elseif(${cc_srcs_len} GREATER 0) cc_library(${TARGET} SRCS ${cc_srcs} DEPS ${math_library_DEPS} ${math_common_deps}) endif() diff --git a/paddle/fluid/operators/math/concat.hip.cu b/paddle/fluid/operators/math/concat.hip.cu new file mode 100644 index 0000000000000000000000000000000000000000..eacef0438883891671fec6e4001f862f619723cb --- /dev/null +++ b/paddle/fluid/operators/math/concat.hip.cu @@ -0,0 +1,15 @@ +/* Copyright (c) 2018 paddlepaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include diff --git a/paddle/fluid/operators/mine_hard_examples_op.cc b/paddle/fluid/operators/mine_hard_examples_op.cc index 0e81d60878dce747b047abbe4641b71462373b2b..277901cff493445e1e85e92e22ea0ada0e1cba43 100644 --- a/paddle/fluid/operators/mine_hard_examples_op.cc +++ b/paddle/fluid/operators/mine_hard_examples_op.cc @@ -324,8 +324,9 @@ MatchIndices elements with value -1. } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP_WITHOUT_GRADIENT(mine_hard_examples, ops::MineHardExamplesOp, - ops::MineHardExamplesOpMaker); +REGISTER_OPERATOR(mine_hard_examples, ops::MineHardExamplesOp, + ops::MineHardExamplesOpMaker, + paddle::framework::EmptyGradOpMaker); REGISTER_OP_CPU_KERNEL( mine_hard_examples, diff --git a/paddle/fluid/operators/prior_box_op.cc b/paddle/fluid/operators/prior_box_op.cc index 7ba55437cb20f802cc12ceea7777d7d78bba62a6..c22a55bce263423d5c17fffdb06b7ece02ae26da 100644 --- a/paddle/fluid/operators/prior_box_op.cc +++ b/paddle/fluid/operators/prior_box_op.cc @@ -168,7 +168,9 @@ https://arxiv.org/abs/1512.02325. } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP_WITHOUT_GRADIENT(prior_box, ops::PriorBoxOp, ops::PriorBoxOpMaker); +REGISTER_OPERATOR(prior_box, ops::PriorBoxOp, ops::PriorBoxOpMaker, + paddle::framework::EmptyGradOpMaker); + REGISTER_OP_CPU_KERNEL( prior_box, ops::PriorBoxOpKernel, ops::PriorBoxOpKernel); diff --git a/paddle/fluid/operators/target_assign_op.cc b/paddle/fluid/operators/target_assign_op.cc index a894b12fa35a121eff0b8f9d2d0eecc5ae5185f3..33ff967e5e8f5afbaa62ba39ce596687ae0a71cd 100644 --- a/paddle/fluid/operators/target_assign_op.cc +++ b/paddle/fluid/operators/target_assign_op.cc @@ -153,8 +153,8 @@ template struct NegTargetAssignFunctor, diff --git a/paddle/fluid/pybind/CMakeLists.txt b/paddle/fluid/pybind/CMakeLists.txt index 8942b5c9430ffa4e499b0ad1d2b5acf6d18ec0ab..fe991033dfc2a6ccc66b0ca5588fe8f808d1eb43 100644 --- a/paddle/fluid/pybind/CMakeLists.txt +++ b/paddle/fluid/pybind/CMakeLists.txt @@ -1,9 +1,16 @@ if(WITH_PYTHON) - cc_library(paddle_pybind SHARED - SRCS pybind.cc exception.cc protobuf.cc const_value.cc recordio.cc - DEPS pybind python backward proto_desc paddle_memory executor prune init profiler feed_fetch_method - ${GLOB_OP_LIB}) - if(NOT APPLE AND NOT ANDROID) - target_link_libraries(paddle_pybind rt) - endif(NOT APPLE AND NOT ANDROID) + if(WITH_AMD_GPU) + hip_library(paddle_pybind SHARED + SRCS pybind.cc exception.cc protobuf.cc const_value.cc recordio.cc + DEPS pybind python backward proto_desc paddle_memory executor prune init profiler feed_fetch_method + ${GLOB_OP_LIB}) + else() + cc_library(paddle_pybind SHARED + SRCS pybind.cc exception.cc protobuf.cc const_value.cc recordio.cc + DEPS pybind python backward proto_desc paddle_memory executor prune init profiler feed_fetch_method + ${GLOB_OP_LIB}) + if(NOT APPLE AND NOT ANDROID) + target_link_libraries(paddle_pybind rt) + endif(NOT APPLE AND NOT ANDROID) + endif(WITH_AMD_GPU) endif(WITH_PYTHON) diff --git a/paddle/scripts/docker/build.sh b/paddle/scripts/docker/build.sh old mode 100644 new mode 100755 index 2e9b088bfa596d86c3c43c09d360da772fb2775a..322f72e4a58c7e8f2c26d994477cbb55551c595a --- a/paddle/scripts/docker/build.sh +++ b/paddle/scripts/docker/build.sh @@ -37,6 +37,7 @@ function cmake_gen() { -DWITH_DSO=ON -DWITH_DOC=${WITH_DOC:-OFF} -DWITH_GPU=${WITH_GPU:-OFF} + -DWITH_AMD_GPU=${WITH_AMD_GPU:-OFF} -DWITH_DISTRIBUTE=${WITH_DISTRIBUTE:-OFF} -DWITH_MKL=${WITH_MKL:-ON} -DWITH_AVX=${WITH_AVX:-OFF} @@ -50,6 +51,7 @@ function cmake_gen() { -DWITH_STYLE_CHECK=${WITH_STYLE_CHECK:-ON} -DWITH_TESTING=${WITH_TESTING:-ON} -DWITH_FAST_BUNDLE_TEST=ON + -DCMAKE_MODULE_PATH=/opt/rocm/hip/cmake -DCMAKE_EXPORT_COMPILE_COMMANDS=ON ======================================== EOF @@ -62,6 +64,7 @@ EOF -DWITH_DSO=ON \ -DWITH_DOC=${WITH_DOC:-OFF} \ -DWITH_GPU=${WITH_GPU:-OFF} \ + -DWITH_AMD_GPU=${WITH_AMD_GPU:-OFF} \ -DWITH_DISTRIBUTE=${WITH_DISTRIBUTE:-OFF} \ -DWITH_MKL=${WITH_MKL:-ON} \ -DWITH_AVX=${WITH_AVX:-OFF} \ @@ -74,6 +77,7 @@ EOF -DWITH_STYLE_CHECK=${WITH_STYLE_CHECK:-ON} \ -DWITH_TESTING=${WITH_TESTING:-ON} \ -DWITH_FAST_BUNDLE_TEST=ON \ + -DCMAKE_MODULE_PATH=/opt/rocm/hip/cmake \ -DCMAKE_EXPORT_COMPILE_COMMANDS=ON } diff --git a/python/paddle/fluid/layers/detection.py b/python/paddle/fluid/layers/detection.py index a889ab6bdc6ac9494ef992a97292b7a2536c41c4..cd519e1ee082d27ccadc6247c149701fac31e812 100644 --- a/python/paddle/fluid/layers/detection.py +++ b/python/paddle/fluid/layers/detection.py @@ -129,13 +129,11 @@ def detection_output(loc, prior_box_var=prior_box_var, target_box=loc, code_type='decode_center_size') - old_shape = scores.shape scores = ops.reshape(x=scores, shape=(-1, old_shape[-1])) scores = nn.softmax(input=scores) scores = ops.reshape(x=scores, shape=old_shape) scores = nn.transpose(scores, perm=[0, 2, 1]) - nmsed_outs = helper.create_tmp_variable(dtype=decoded_box.dtype) helper.append_op( type="multiclass_nms", @@ -475,6 +473,7 @@ def ssd_loss(location, # 2. Compute confidence for mining hard examples # 2.1. Get the target label based on matched indices gt_label = ops.reshape(x=gt_label, shape=gt_label.shape + (1, )) + gt_label.stop_gradient = True target_label, _ = target_assign( gt_label, matched_indices, mismatch_value=background_label) # 2.2. Compute confidence loss. @@ -482,10 +481,12 @@ def ssd_loss(location, confidence = __reshape_to_2d(confidence) target_label = tensor.cast(x=target_label, dtype='int64') target_label = __reshape_to_2d(target_label) + target_label.stop_gradient = True conf_loss = nn.softmax_with_cross_entropy(confidence, target_label) # 3. Mining hard examples conf_loss = ops.reshape(x=conf_loss, shape=(num, num_prior)) + conf_loss.stop_gradient = True neg_indices = helper.create_tmp_variable(dtype='int32') dtype = matched_indices.dtype updated_matched_indices = helper.create_tmp_variable(dtype=dtype) @@ -695,6 +696,8 @@ def multi_box_head(inputs, outputs={"Boxes": box, "Variances": var}, attrs=attrs, ) + box.stop_gradient = True + var.stop_gradient = True return box, var def _reshape_with_axis_(input, axis=1):