提交 9367f11e 编写于 作者: T typhoonzero

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into pserver_prepare_before_run

...@@ -36,6 +36,7 @@ include(simd) ...@@ -36,6 +36,7 @@ include(simd)
################################ Configurations ####################################### ################################ Configurations #######################################
option(WITH_GPU "Compile PaddlePaddle with NVIDIA GPU" ${CUDA_FOUND}) option(WITH_GPU "Compile PaddlePaddle with NVIDIA GPU" ${CUDA_FOUND})
option(WITH_AMD_GPU "Compile PaddlePaddle with AMD GPU" OFF)
option(WITH_AVX "Compile PaddlePaddle with AVX intrinsics" ${AVX_FOUND}) option(WITH_AVX "Compile PaddlePaddle with AVX intrinsics" ${AVX_FOUND})
option(WITH_MKL "Compile PaddlePaddle with MKL support." ${AVX_FOUND}) option(WITH_MKL "Compile PaddlePaddle with MKL support." ${AVX_FOUND})
option(WITH_DSO "Compile PaddlePaddle with dynamic linked CUDA" ON) option(WITH_DSO "Compile PaddlePaddle with dynamic linked CUDA" ON)
...@@ -180,6 +181,11 @@ if(WITH_GPU) ...@@ -180,6 +181,11 @@ if(WITH_GPU)
include(cuda) include(cuda)
endif(WITH_GPU) endif(WITH_GPU)
if(WITH_AMD_GPU)
find_package(HIP)
include(hip)
endif(WITH_AMD_GPU)
if(WITH_MKLML) if(WITH_MKLML)
list(APPEND EXTERNAL_LIBS ${MKLML_IOMP_LIB}) list(APPEND EXTERNAL_LIBS ${MKLML_IOMP_LIB})
endif() endif()
......
...@@ -57,11 +57,7 @@ if(NOT WITH_GOLANG) ...@@ -57,11 +57,7 @@ if(NOT WITH_GOLANG)
add_definitions(-DPADDLE_WITHOUT_GOLANG) add_definitions(-DPADDLE_WITHOUT_GOLANG)
endif(NOT WITH_GOLANG) endif(NOT WITH_GOLANG)
if(NOT WITH_GPU) if(WITH_GPU)
add_definitions(-DHPPL_STUB_FUNC)
list(APPEND CMAKE_CXX_SOURCE_FILE_EXTENSIONS cu)
else()
add_definitions(-DPADDLE_WITH_CUDA) add_definitions(-DPADDLE_WITH_CUDA)
FIND_PACKAGE(CUDA REQUIRED) FIND_PACKAGE(CUDA REQUIRED)
...@@ -84,7 +80,14 @@ else() ...@@ -84,7 +80,14 @@ else()
# Include cuda and cudnn # Include cuda and cudnn
include_directories(${CUDNN_INCLUDE_DIR}) include_directories(${CUDNN_INCLUDE_DIR})
include_directories(${CUDA_TOOLKIT_INCLUDE}) include_directories(${CUDA_TOOLKIT_INCLUDE})
endif(NOT WITH_GPU) elseif(WITH_AMD_GPU)
add_definitions(-DPADDLE_WITH_HIP)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -D__HIP_PLATFORM_HCC__")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__HIP_PLATFORM_HCC__")
else()
add_definitions(-DHPPL_STUB_FUNC)
list(APPEND CMAKE_CXX_SOURCE_FILE_EXTENSIONS cu)
endif()
if (WITH_MKLML AND MKLML_IOMP_LIB) if (WITH_MKLML AND MKLML_IOMP_LIB)
message(STATUS "Enable Intel OpenMP with ${MKLML_IOMP_LIB}") message(STATUS "Enable Intel OpenMP with ${MKLML_IOMP_LIB}")
......
...@@ -4,18 +4,33 @@ SET(EIGEN_SOURCE_DIR ${THIRD_PARTY_PATH}/eigen3) ...@@ -4,18 +4,33 @@ SET(EIGEN_SOURCE_DIR ${THIRD_PARTY_PATH}/eigen3)
SET(EIGEN_INCLUDE_DIR ${EIGEN_SOURCE_DIR}/src/extern_eigen3) SET(EIGEN_INCLUDE_DIR ${EIGEN_SOURCE_DIR}/src/extern_eigen3)
INCLUDE_DIRECTORIES(${EIGEN_INCLUDE_DIR}) INCLUDE_DIRECTORIES(${EIGEN_INCLUDE_DIR})
ExternalProject_Add( if(WITH_AMD_GPU)
extern_eigen3 ExternalProject_Add(
${EXTERNAL_PROJECT_LOG_ARGS} extern_eigen3
GIT_REPOSITORY "https://github.com/RLovelett/eigen.git" ${EXTERNAL_PROJECT_LOG_ARGS}
GIT_TAG 70661066beef694cadf6c304d0d07e0758825c10 GIT_REPOSITORY "https://github.com/sabreshao/hipeigen.git"
PREFIX ${EIGEN_SOURCE_DIR} GIT_TAG 0cba03ff9f8f9f70bbd92ac5857b031aa8fed6f9
UPDATE_COMMAND "" PREFIX ${EIGEN_SOURCE_DIR}
CONFIGURE_COMMAND "" UPDATE_COMMAND ""
BUILD_COMMAND "" CONFIGURE_COMMAND ""
INSTALL_COMMAND "" BUILD_COMMAND ""
TEST_COMMAND "" INSTALL_COMMAND ""
) TEST_COMMAND ""
)
else()
ExternalProject_Add(
extern_eigen3
${EXTERNAL_PROJECT_LOG_ARGS}
GIT_REPOSITORY "https://github.com/RLovelett/eigen.git"
GIT_TAG 70661066beef694cadf6c304d0d07e0758825c10
PREFIX ${EIGEN_SOURCE_DIR}
UPDATE_COMMAND ""
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
INSTALL_COMMAND ""
TEST_COMMAND ""
)
endif()
if (${CMAKE_VERSION} VERSION_LESS "3.3.0") if (${CMAKE_VERSION} VERSION_LESS "3.3.0")
set(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/eigen3_dummy.c) set(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/eigen3_dummy.c)
......
...@@ -317,6 +317,82 @@ function(nv_test TARGET_NAME) ...@@ -317,6 +317,82 @@ function(nv_test TARGET_NAME)
endif() endif()
endfunction(nv_test) endfunction(nv_test)
function(hip_library TARGET_NAME)
if (WITH_AMD_GPU)
set(options STATIC static SHARED shared)
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS)
cmake_parse_arguments(hip_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
set(_sources ${hip_library_SRCS})
HIP_PREPARE_TARGET_COMMANDS(${TARGET_NAME} OBJ _generated_files _source_files ${_sources} HIPCC_OPTIONS ${_hipcc_options} HCC_OPTIONS ${_hcc_options} NVCC_OPTIONS ${_nvcc_options})
if(_source_files)
list(REMOVE_ITEM _sources ${_source_files})
endif()
if(hip_library_SRCS)
if (hip_library_SHARED OR hip_library_shared) # build *.so
add_library(${TARGET_NAME} SHARED ${_cmake_options} ${_generated_files} ${_sources})
set_target_properties(${TARGET_NAME} PROPERTIES LINKER_LANGUAGE HIP)
else()
add_library(${TARGET_NAME} STATIC ${_cmake_options} ${_generated_files} ${_sources})
set_target_properties(${TARGET_NAME} PROPERTIES LINKER_LANGUAGE CXX)
target_link_libraries(${TARGET_NAME} /opt/rocm/hip/lib/libhip_hcc.so /opt/rocm/hip/lib/libhip_device.a)
find_fluid_modules(${TARGET_NAME})
endif()
if (hip_library_DEPS)
add_dependencies(${TARGET_NAME} ${hip_library_DEPS})
target_link_libraries(${TARGET_NAME} ${hip_library_DEPS})
endif()
# cpplint code style
foreach(source_file ${hip_library_SRCS})
string(REGEX REPLACE "\\.[^.]*$" "" source ${source_file})
if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${source}.h)
list(APPEND hip_library_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/${source}.h)
endif()
endforeach()
add_style_check_target(${TARGET_NAME} ${hip_library_SRCS} ${hip_library_HEADERS})
else(hip_library_SRCS)
if (hip_library_DEPS)
merge_static_libs(${TARGET_NAME} ${hip_library_DEPS})
else()
message(FATAL "Please specify source file or library in nv_library.")
endif()
endif(hip_library_SRCS)
endif()
endfunction(hip_library)
function(hip_binary TARGET_NAME)
if (WITH_AMD_GPU)
set(options "")
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS)
cmake_parse_arguments(hip_binary "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
hip_add_executable(${TARGET_NAME} ${hip_binary_SRCS})
if(hip_binary_DEPS)
target_link_libraries(${TARGET_NAME} ${hip_binary_DEPS})
add_dependencies(${TARGET_NAME} ${hip_binary_DEPS})
endif()
endif()
endfunction(hip_binary)
function(hip_test TARGET_NAME)
if (WITH_AMD_GPU AND WITH_TESTING)
set(options "")
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS)
cmake_parse_arguments(hip_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
set(_sources ${hip_test_SRCS})
HIP_PREPARE_TARGET_COMMANDS(${TARGET_NAME} OBJ _generated_files _source_files ${_sources} HIPCC_OPTIONS ${_hipcc_options} HCC_OPTIONS ${_hcc_options} NVCC_OPTIONS ${_nvcc_options})
if(_source_files)
list(REMOVE_ITEM _sources ${_source_files})
endif()
add_executable(${TARGET_NAME} ${_cmake_options} ${_generated_files} ${_sources})
set_target_properties(${TARGET_NAME} PROPERTIES LINKER_LANGUAGE HIP)
target_link_libraries(${TARGET_NAME} ${hip_test_DEPS} paddle_gtest_main paddle_memory gtest gflags)
add_dependencies(${TARGET_NAME} ${hip_test_DEPS} paddle_gtest_main paddle_memory gtest gflags)
add_test(${TARGET_NAME} ${TARGET_NAME})
endif()
endfunction(hip_test)
function(go_library TARGET_NAME) function(go_library TARGET_NAME)
set(options STATIC static SHARED shared) set(options STATIC static SHARED shared)
set(oneValueArgs "") set(oneValueArgs "")
......
if(NOT WITH_AMD_GPU)
return()
endif()
include_directories("/opt/rocm/include")
include_directories("/opt/rocm/hipblas/include")
include_directories("/opt/rocm/hiprand/include")
include_directories("/opt/rocm/rocrand/include")
include_directories("/opt/rocm/rccl/include")
include_directories("/opt/rocm/thrust")
list(APPEND EXTERNAL_LIBS "-L/opt/rocm/lib/ -lhip_hcc")
set(HIP_HCC_FLAGS "${HIP_HCC_FLAGS} -fPIC -DPADDLE_WITH_HIP -std=c++14" )
if(WITH_DSO)
set(HIP_HCC_FLAGS "${HIP_HCC_FLAGS} -DPADDLE_USE_DSO")
endif(WITH_DSO)
if(WITH_DOUBLE)
set(HIP_HCC_FLAGS "${HIP_HCC_FLAGS} -DPADDLE_TYPE_DOUBLE")
endif(WITH_DOUBLE)
if(WITH_TESTING)
set(HIP_HCC_FLAGS "${HIP_HCC_FLAGS} -DPADDLE_WITH_TESTING")
endif(WITH_TESTING)
if(CMAKE_BUILD_TYPE STREQUAL "Debug")
list(APPEND HIP_HCC_FLAGS ${CMAKE_CXX_FLAGS_DEBUG})
elseif(CMAKE_BUILD_TYPE STREQUAL "RelWithDebInfo")
list(APPEND HIP_HCC_FLAGS ${CMAKE_CXX_FLAGS_RELWITHDEBINFO})
elseif(CMAKE_BUILD_TYPE STREQUAL "MinSizeRel")
list(APPEND HIP_HCC_FLAGS ${CMAKE_CXX_FLAGS_MINSIZEREL})
endif()
if("x${HCC_HOME}" STREQUAL "x")
set(HCC_HOME "/opt/rocm/hcc")
endif()
set(CMAKE_HIP_LINK_EXECUTABLE "${HIP_HIPCC_CMAKE_LINKER_HELPER} ${HCC_HOME} <FLAGS> <CMAKE_CXX_LINK_FLAGS> <LINK_FLAGS> <OBJECTS> -o <TARGET> <LINK_LIBRARIES>")
set(CMAKE_HIP_CREATE_SHARED_LIBRARY "${HIP_HIPCC_CMAKE_LINKER_HELPER} ${HCC_HOME} <CMAKE_CXX_LINK_FLAGS> <LINK_FLAGS> <OBJECTS> -o <TARGET> <LINK_LIBRARIES> -shared")
set(CMAKE_HIP_CREATE_SHARED_MODULE "${HIP_HIPCC_CMAKE_LINKER_HELPER} ${HCC_HOME} <CMAKE_CXX_LINK_FLAGS> <LINK_FLAGS> <OBJECTS> -o <TARGET> <LINK_LIBRARIES> -shared")
...@@ -12,6 +12,8 @@ function(op_library TARGET) ...@@ -12,6 +12,8 @@ function(op_library TARGET)
set(OP_LIBRARY ${TARGET} ${OP_LIBRARY} PARENT_SCOPE) set(OP_LIBRARY ${TARGET} ${OP_LIBRARY} PARENT_SCOPE)
set(cc_srcs) set(cc_srcs)
set(cu_srcs) set(cu_srcs)
set(hip_cu_srcs)
set(miopen_hip_cc_srcs)
set(cu_cc_srcs) set(cu_cc_srcs)
set(cudnn_cu_cc_srcs) set(cudnn_cu_cc_srcs)
set(CUDNN_FILE) set(CUDNN_FILE)
...@@ -36,10 +38,19 @@ function(op_library TARGET) ...@@ -36,10 +38,19 @@ function(op_library TARGET)
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cu) if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cu)
list(APPEND cu_srcs ${TARGET}.cu) list(APPEND cu_srcs ${TARGET}.cu)
endif() endif()
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.hip.cu)
list(APPEND hip_cu_srcs ${TARGET}.hip.cu)
endif()
string(REPLACE "_op" "_cudnn_op" CUDNN_FILE "${TARGET}") string(REPLACE "_op" "_cudnn_op" CUDNN_FILE "${TARGET}")
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${CUDNN_FILE}.cu.cc) if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${CUDNN_FILE}.cu.cc)
list(APPEND cudnn_cu_cc_srcs ${CUDNN_FILE}.cu.cc) list(APPEND cudnn_cu_cc_srcs ${CUDNN_FILE}.cu.cc)
endif() endif()
if(WITH_AMD_GPU)
string(REPLACE "_op" "_miopen_op" MIOPEN_FILE "${TARGET}")
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${MIOPEN_FILE}.hip.cc)
list(APPEND miopen_hip_cc_srcs ${MIOPEN_FILE}.hip.cc)
endif()
endif()
if(WITH_MKLDNN) if(WITH_MKLDNN)
string(REPLACE "_op" "_mkldnn_op" MKLDNN_FILE "${TARGET}") string(REPLACE "_op" "_mkldnn_op" MKLDNN_FILE "${TARGET}")
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${MKLDNN_FILE}.cc) if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${MKLDNN_FILE}.cc)
...@@ -48,10 +59,14 @@ function(op_library TARGET) ...@@ -48,10 +59,14 @@ function(op_library TARGET)
endif() endif()
else() else()
foreach(src ${op_library_SRCS}) foreach(src ${op_library_SRCS})
if (${src} MATCHES ".*\\.cu$") if (${src} MATCHES ".*\\.hip.cu$")
list(APPEND hip_cu_srcs ${src})
elseif (${src} MATCHES ".*\\.cu$")
list(APPEND cu_srcs ${src}) list(APPEND cu_srcs ${src})
elseif(${src} MATCHES ".*_cudnn_op.cu.cc$") elseif(${src} MATCHES ".*_cudnn_op.cu.cc$")
list(APPEND cudnn_cu_cc_srcs ${src}) list(APPEND cudnn_cu_cc_srcs ${src})
elseif(WITH_AMD_GPU AND ${src} MATCHES ".*_miopen_op.hip.cc$")
list(APPEND miopen_hip_cc_srcs ${src})
elseif(WITH_MKLDNN AND ${src} MATCHES ".*_mkldnn_op.cc$") elseif(WITH_MKLDNN AND ${src} MATCHES ".*_mkldnn_op.cc$")
list(APPEND mkldnn_cc_srcs ${src}) list(APPEND mkldnn_cc_srcs ${src})
elseif(${src} MATCHES ".*\\.cu.cc$") elseif(${src} MATCHES ".*\\.cu.cc$")
...@@ -76,6 +91,9 @@ function(op_library TARGET) ...@@ -76,6 +91,9 @@ function(op_library TARGET)
if (WITH_GPU) if (WITH_GPU)
nv_library(${TARGET} SRCS ${cc_srcs} ${cu_cc_srcs} ${cudnn_cu_cc_srcs} ${mkldnn_cc_srcs} ${cu_srcs} DEPS ${op_library_DEPS} nv_library(${TARGET} SRCS ${cc_srcs} ${cu_cc_srcs} ${cudnn_cu_cc_srcs} ${mkldnn_cc_srcs} ${cu_srcs} DEPS ${op_library_DEPS}
${op_common_deps}) ${op_common_deps})
elseif (WITH_AMD_GPU)
hip_library(${TARGET} SRCS ${cc_srcs} ${hip_cu_srcs} ${miopen_hip_cc_srcs} ${mkldnn_cc_srcs} DEPS ${op_library_DEPS}
${op_common_deps})
else() else()
cc_library(${TARGET} SRCS ${cc_srcs} ${mkldnn_cc_srcs} DEPS ${op_library_DEPS} cc_library(${TARGET} SRCS ${cc_srcs} ${mkldnn_cc_srcs} DEPS ${op_library_DEPS}
${op_common_deps}) ${op_common_deps})
...@@ -88,7 +106,7 @@ function(op_library TARGET) ...@@ -88,7 +106,7 @@ function(op_library TARGET)
endif() endif()
endforeach() endforeach()
# The registration of USE_OP, please refer to paddle/framework/op_registry.h. # The registration of USE_OP, please refer to paddle/fluid/framework/op_registry.h.
# Note that it's enough to just adding one operator to pybind in a *_op.cc file. # Note that it's enough to just adding one operator to pybind in a *_op.cc file.
# And for detail pybind information, please see generated paddle/pybind/pybind.h. # And for detail pybind information, please see generated paddle/pybind/pybind.h.
file(READ ${TARGET}.cc TARGET_CONTENT) file(READ ${TARGET}.cc TARGET_CONTENT)
...@@ -114,7 +132,10 @@ function(op_library TARGET) ...@@ -114,7 +132,10 @@ function(op_library TARGET)
list(LENGTH cu_srcs cu_srcs_len) list(LENGTH cu_srcs cu_srcs_len)
list(LENGTH cu_cc_srcs cu_cc_srcs_len) list(LENGTH cu_cc_srcs cu_cc_srcs_len)
list(LENGTH mkldnn_cc_srcs mkldnn_cc_srcs_len) list(LENGTH mkldnn_cc_srcs mkldnn_cc_srcs_len)
if (${pybind_flag} EQUAL 0 AND ${mkldnn_cc_srcs_len} EQUAL 0 AND ${cu_srcs_len} EQUAL 0 AND ${cu_cc_srcs_len} EQUAL 0) list(LENGTH hip_cu_srcs hip_cu_srcs_len)
list(LENGTH miopen_hip_cc_srcs miopen_hip_cc_srcs_len)
if (${pybind_flag} EQUAL 0 AND ${mkldnn_cc_srcs_len} EQUAL 0 AND ${cu_srcs_len} EQUAL 0 AND ${cu_cc_srcs_len} EQUAL 0 AND
${hip_cu_srcs_len} EQUAL 0 AND ${miopen_hip_cc_srcs_len} EQUAL 0)
file(APPEND ${pybind_file} "USE_CPU_ONLY_OP(${TARGET});\n") file(APPEND ${pybind_file} "USE_CPU_ONLY_OP(${TARGET});\n")
set(pybind_flag 1) set(pybind_flag 1)
endif() endif()
...@@ -125,6 +146,11 @@ function(op_library TARGET) ...@@ -125,6 +146,11 @@ function(op_library TARGET)
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${TARGET}, CUDNN);\n") file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${TARGET}, CUDNN);\n")
endif() endif()
# pybind USE_OP_DEVICE_KERNEL for MIOPEN
if (WITH_AMD_GPU AND ${miopen_hip_cc_srcs_len} GREATER 0)
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${TARGET}, MIOPEN);\n")
endif()
# pybind USE_OP_DEVICE_KERNEL for MKLDNN # pybind USE_OP_DEVICE_KERNEL for MKLDNN
if (WITH_MKLDNN AND ${mkldnn_cc_srcs_len} GREATER 0) if (WITH_MKLDNN AND ${mkldnn_cc_srcs_len} GREATER 0)
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${TARGET}, MKLDNN);\n") file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${TARGET}, MKLDNN);\n")
......
...@@ -126,6 +126,7 @@ width and height. ...@@ -126,6 +126,7 @@ width and height.
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OP_WITHOUT_GRADIENT(box_coder, ops::BoxCoderOp, ops::BoxCoderOpMaker); REGISTER_OPERATOR(box_coder, ops::BoxCoderOp, ops::BoxCoderOpMaker,
paddle::framework::EmptyGradOpMaker);
REGISTER_OP_CPU_KERNEL(box_coder, ops::BoxCoderKernel<float>, REGISTER_OP_CPU_KERNEL(box_coder, ops::BoxCoderKernel<float>,
ops::BoxCoderKernel<double>); ops::BoxCoderKernel<double>);
...@@ -22,10 +22,9 @@ limitations under the License. */ ...@@ -22,10 +22,9 @@ limitations under the License. */
#include "paddle/fluid/framework/selected_rows.h" #include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/framework/var_type.h" #include "paddle/fluid/framework/var_type.h"
#include "paddle/fluid/operators/detail/grpc_service.h" #include "paddle/fluid/operators/detail/grpc_service.h"
#include "paddle/fluid/operators/detail/grpc_service.h"
#include "paddle/fluid/operators/detail/send_recv.grpc.pb.h" #include "paddle/fluid/operators/detail/send_recv.grpc.pb.h"
#include "paddle/fluid/operators/detail/send_recv.pb.h" #include "paddle/fluid/operators/detail/send_recv.pb.h"
#include "paddle/fluid/operators/detail/sendrecvop_utils.h"
#include "paddle/fluid/operators/detail/simple_block_queue.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
......
...@@ -194,24 +194,23 @@ void RunTestLodTensor(platform::Place place, int from_type = 0) { ...@@ -194,24 +194,23 @@ void RunTestLodTensor(platform::Place place, int from_type = 0) {
for (int i = 0; i < tensor_numel; ++i) EXPECT_FLOAT_EQ(tensor_data2[i], 31.9); for (int i = 0; i < tensor_numel; ++i) EXPECT_FLOAT_EQ(tensor_data2[i], 31.9);
} }
TEST(LodTensor, GPU) { TEST(LodTensor, Run) {
platform::CUDAPlace place; platform::CPUPlace place;
RunTestLodTensor(place); RunTestLodTensor(place);
RunTestLodTensor(place, 1); RunTestLodTensor(place, 1);
} #ifdef PADDLE_WITH_CUDA
platform::CUDAPlace place;
TEST(LodTensor, CPU) {
platform::CPUPlace place;
RunTestLodTensor(place); RunTestLodTensor(place);
RunTestLodTensor(place, 1); RunTestLodTensor(place, 1);
#endif
} }
TEST(SelectedRows, CPU) { TEST(SelectedRows, Run) {
platform::CPUPlace place; platform::CPUPlace place;
RunSerdeTestSelectedRows(place); RunSerdeTestSelectedRows(place);
}
TEST(SelectedRows, GPU) { #ifdef PADDLE_WITH_CUDA
platform::CUDAPlace place; platform::CUDAPlace place;
RunSerdeTestSelectedRows(place); RunSerdeTestSelectedRows(place);
#endif
} }
...@@ -188,8 +188,8 @@ The general steps are as follows. First, calculate the true positive and ...@@ -188,8 +188,8 @@ The general steps are as follows. First, calculate the true positive and
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OP_WITHOUT_GRADIENT(detection_map, ops::DetectionMAPOp, REGISTER_OPERATOR(detection_map, ops::DetectionMAPOp, ops::DetectionMAPOpMaker,
ops::DetectionMAPOpMaker); paddle::framework::EmptyGradOpMaker);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
detection_map, ops::DetectionMAPOpKernel<paddle::platform::CPUPlace, float>, detection_map, ops::DetectionMAPOpKernel<paddle::platform::CPUPlace, float>,
ops::DetectionMAPOpKernel<paddle::platform::CPUPlace, double>); ops::DetectionMAPOpKernel<paddle::platform::CPUPlace, double>);
...@@ -87,8 +87,9 @@ $$ ...@@ -87,8 +87,9 @@ $$
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OP_WITHOUT_GRADIENT(iou_similarity, ops::IOUSimilarityOp, REGISTER_OPERATOR(iou_similarity, ops::IOUSimilarityOp,
ops::IOUSimilarityOpMaker); ops::IOUSimilarityOpMaker,
paddle::framework::EmptyGradOpMaker);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
iou_similarity, iou_similarity,
......
...@@ -6,6 +6,7 @@ function(math_library TARGET) ...@@ -6,6 +6,7 @@ function(math_library TARGET)
# But it handle split GPU/CPU code and link some common library. # But it handle split GPU/CPU code and link some common library.
set(cc_srcs) set(cc_srcs)
set(cu_srcs) set(cu_srcs)
set(hip_srcs)
set(math_common_deps device_context framework_proto) set(math_common_deps device_context framework_proto)
set(multiValueArgs DEPS) set(multiValueArgs DEPS)
cmake_parse_arguments(math_library "${options}" "${oneValueArgs}" cmake_parse_arguments(math_library "${options}" "${oneValueArgs}"
...@@ -17,10 +18,15 @@ function(math_library TARGET) ...@@ -17,10 +18,15 @@ function(math_library TARGET)
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cu) if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cu)
list(APPEND cu_srcs ${TARGET}.cu) list(APPEND cu_srcs ${TARGET}.cu)
endif() endif()
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.hip.cu)
list(APPEND hip_srcs ${TARGET}.hip.cu)
endif()
list(LENGTH cc_srcs cc_srcs_len) list(LENGTH cc_srcs cc_srcs_len)
if (WITH_GPU) if (WITH_GPU)
nv_library(${TARGET} SRCS ${cc_srcs} ${cu_srcs} DEPS ${math_library_DEPS} ${math_common_deps}) nv_library(${TARGET} SRCS ${cc_srcs} ${cu_srcs} DEPS ${math_library_DEPS} ${math_common_deps})
elseif (WITH_AMD_GPU)
hip_library(${TARGET} SRCS ${cc_srcs} ${hip_srcs} DEPS ${math_library_DEPS} ${math_common_deps})
elseif(${cc_srcs_len} GREATER 0) elseif(${cc_srcs_len} GREATER 0)
cc_library(${TARGET} SRCS ${cc_srcs} DEPS ${math_library_DEPS} ${math_common_deps}) cc_library(${TARGET} SRCS ${cc_srcs} DEPS ${math_library_DEPS} ${math_common_deps})
endif() endif()
......
/* Copyright (c) 2018 paddlepaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <hip/hip_runtime.h>
...@@ -324,8 +324,9 @@ MatchIndices elements with value -1. ...@@ -324,8 +324,9 @@ MatchIndices elements with value -1.
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OP_WITHOUT_GRADIENT(mine_hard_examples, ops::MineHardExamplesOp, REGISTER_OPERATOR(mine_hard_examples, ops::MineHardExamplesOp,
ops::MineHardExamplesOpMaker); ops::MineHardExamplesOpMaker,
paddle::framework::EmptyGradOpMaker);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
mine_hard_examples, mine_hard_examples,
......
...@@ -168,7 +168,9 @@ https://arxiv.org/abs/1512.02325. ...@@ -168,7 +168,9 @@ https://arxiv.org/abs/1512.02325.
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OP_WITHOUT_GRADIENT(prior_box, ops::PriorBoxOp, ops::PriorBoxOpMaker); REGISTER_OPERATOR(prior_box, ops::PriorBoxOp, ops::PriorBoxOpMaker,
paddle::framework::EmptyGradOpMaker);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
prior_box, ops::PriorBoxOpKernel<paddle::platform::CPUPlace, float>, prior_box, ops::PriorBoxOpKernel<paddle::platform::CPUPlace, float>,
ops::PriorBoxOpKernel<paddle::platform::CPUPlace, double>); ops::PriorBoxOpKernel<paddle::platform::CPUPlace, double>);
...@@ -153,8 +153,8 @@ template struct NegTargetAssignFunctor<platform::CPUDeviceContext, float, ...@@ -153,8 +153,8 @@ template struct NegTargetAssignFunctor<platform::CPUDeviceContext, float,
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OP_WITHOUT_GRADIENT(target_assign, ops::TargetAssignOp, REGISTER_OPERATOR(target_assign, ops::TargetAssignOp, ops::TargetAssignOpMaker,
ops::TargetAssignOpMaker); paddle::framework::EmptyGradOpMaker);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
target_assign, target_assign,
ops::TargetAssignKernel<paddle::platform::CPUDeviceContext, int, float>, ops::TargetAssignKernel<paddle::platform::CPUDeviceContext, int, float>,
......
if(WITH_PYTHON) if(WITH_PYTHON)
cc_library(paddle_pybind SHARED if(WITH_AMD_GPU)
SRCS pybind.cc exception.cc protobuf.cc const_value.cc recordio.cc hip_library(paddle_pybind SHARED
DEPS pybind python backward proto_desc paddle_memory executor prune init profiler feed_fetch_method SRCS pybind.cc exception.cc protobuf.cc const_value.cc recordio.cc
${GLOB_OP_LIB}) DEPS pybind python backward proto_desc paddle_memory executor prune init profiler feed_fetch_method
if(NOT APPLE AND NOT ANDROID) ${GLOB_OP_LIB})
target_link_libraries(paddle_pybind rt) else()
endif(NOT APPLE AND NOT ANDROID) cc_library(paddle_pybind SHARED
SRCS pybind.cc exception.cc protobuf.cc const_value.cc recordio.cc
DEPS pybind python backward proto_desc paddle_memory executor prune init profiler feed_fetch_method
${GLOB_OP_LIB})
if(NOT APPLE AND NOT ANDROID)
target_link_libraries(paddle_pybind rt)
endif(NOT APPLE AND NOT ANDROID)
endif(WITH_AMD_GPU)
endif(WITH_PYTHON) endif(WITH_PYTHON)
...@@ -37,6 +37,7 @@ function cmake_gen() { ...@@ -37,6 +37,7 @@ function cmake_gen() {
-DWITH_DSO=ON -DWITH_DSO=ON
-DWITH_DOC=${WITH_DOC:-OFF} -DWITH_DOC=${WITH_DOC:-OFF}
-DWITH_GPU=${WITH_GPU:-OFF} -DWITH_GPU=${WITH_GPU:-OFF}
-DWITH_AMD_GPU=${WITH_AMD_GPU:-OFF}
-DWITH_DISTRIBUTE=${WITH_DISTRIBUTE:-OFF} -DWITH_DISTRIBUTE=${WITH_DISTRIBUTE:-OFF}
-DWITH_MKL=${WITH_MKL:-ON} -DWITH_MKL=${WITH_MKL:-ON}
-DWITH_AVX=${WITH_AVX:-OFF} -DWITH_AVX=${WITH_AVX:-OFF}
...@@ -50,6 +51,7 @@ function cmake_gen() { ...@@ -50,6 +51,7 @@ function cmake_gen() {
-DWITH_STYLE_CHECK=${WITH_STYLE_CHECK:-ON} -DWITH_STYLE_CHECK=${WITH_STYLE_CHECK:-ON}
-DWITH_TESTING=${WITH_TESTING:-ON} -DWITH_TESTING=${WITH_TESTING:-ON}
-DWITH_FAST_BUNDLE_TEST=ON -DWITH_FAST_BUNDLE_TEST=ON
-DCMAKE_MODULE_PATH=/opt/rocm/hip/cmake
-DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DCMAKE_EXPORT_COMPILE_COMMANDS=ON
======================================== ========================================
EOF EOF
...@@ -62,6 +64,7 @@ EOF ...@@ -62,6 +64,7 @@ EOF
-DWITH_DSO=ON \ -DWITH_DSO=ON \
-DWITH_DOC=${WITH_DOC:-OFF} \ -DWITH_DOC=${WITH_DOC:-OFF} \
-DWITH_GPU=${WITH_GPU:-OFF} \ -DWITH_GPU=${WITH_GPU:-OFF} \
-DWITH_AMD_GPU=${WITH_AMD_GPU:-OFF} \
-DWITH_DISTRIBUTE=${WITH_DISTRIBUTE:-OFF} \ -DWITH_DISTRIBUTE=${WITH_DISTRIBUTE:-OFF} \
-DWITH_MKL=${WITH_MKL:-ON} \ -DWITH_MKL=${WITH_MKL:-ON} \
-DWITH_AVX=${WITH_AVX:-OFF} \ -DWITH_AVX=${WITH_AVX:-OFF} \
...@@ -74,6 +77,7 @@ EOF ...@@ -74,6 +77,7 @@ EOF
-DWITH_STYLE_CHECK=${WITH_STYLE_CHECK:-ON} \ -DWITH_STYLE_CHECK=${WITH_STYLE_CHECK:-ON} \
-DWITH_TESTING=${WITH_TESTING:-ON} \ -DWITH_TESTING=${WITH_TESTING:-ON} \
-DWITH_FAST_BUNDLE_TEST=ON \ -DWITH_FAST_BUNDLE_TEST=ON \
-DCMAKE_MODULE_PATH=/opt/rocm/hip/cmake \
-DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DCMAKE_EXPORT_COMPILE_COMMANDS=ON
} }
......
...@@ -129,13 +129,11 @@ def detection_output(loc, ...@@ -129,13 +129,11 @@ def detection_output(loc,
prior_box_var=prior_box_var, prior_box_var=prior_box_var,
target_box=loc, target_box=loc,
code_type='decode_center_size') code_type='decode_center_size')
old_shape = scores.shape old_shape = scores.shape
scores = ops.reshape(x=scores, shape=(-1, old_shape[-1])) scores = ops.reshape(x=scores, shape=(-1, old_shape[-1]))
scores = nn.softmax(input=scores) scores = nn.softmax(input=scores)
scores = ops.reshape(x=scores, shape=old_shape) scores = ops.reshape(x=scores, shape=old_shape)
scores = nn.transpose(scores, perm=[0, 2, 1]) scores = nn.transpose(scores, perm=[0, 2, 1])
nmsed_outs = helper.create_tmp_variable(dtype=decoded_box.dtype) nmsed_outs = helper.create_tmp_variable(dtype=decoded_box.dtype)
helper.append_op( helper.append_op(
type="multiclass_nms", type="multiclass_nms",
...@@ -475,6 +473,7 @@ def ssd_loss(location, ...@@ -475,6 +473,7 @@ def ssd_loss(location,
# 2. Compute confidence for mining hard examples # 2. Compute confidence for mining hard examples
# 2.1. Get the target label based on matched indices # 2.1. Get the target label based on matched indices
gt_label = ops.reshape(x=gt_label, shape=gt_label.shape + (1, )) gt_label = ops.reshape(x=gt_label, shape=gt_label.shape + (1, ))
gt_label.stop_gradient = True
target_label, _ = target_assign( target_label, _ = target_assign(
gt_label, matched_indices, mismatch_value=background_label) gt_label, matched_indices, mismatch_value=background_label)
# 2.2. Compute confidence loss. # 2.2. Compute confidence loss.
...@@ -482,10 +481,12 @@ def ssd_loss(location, ...@@ -482,10 +481,12 @@ def ssd_loss(location,
confidence = __reshape_to_2d(confidence) confidence = __reshape_to_2d(confidence)
target_label = tensor.cast(x=target_label, dtype='int64') target_label = tensor.cast(x=target_label, dtype='int64')
target_label = __reshape_to_2d(target_label) target_label = __reshape_to_2d(target_label)
target_label.stop_gradient = True
conf_loss = nn.softmax_with_cross_entropy(confidence, target_label) conf_loss = nn.softmax_with_cross_entropy(confidence, target_label)
# 3. Mining hard examples # 3. Mining hard examples
conf_loss = ops.reshape(x=conf_loss, shape=(num, num_prior)) conf_loss = ops.reshape(x=conf_loss, shape=(num, num_prior))
conf_loss.stop_gradient = True
neg_indices = helper.create_tmp_variable(dtype='int32') neg_indices = helper.create_tmp_variable(dtype='int32')
dtype = matched_indices.dtype dtype = matched_indices.dtype
updated_matched_indices = helper.create_tmp_variable(dtype=dtype) updated_matched_indices = helper.create_tmp_variable(dtype=dtype)
...@@ -695,6 +696,8 @@ def multi_box_head(inputs, ...@@ -695,6 +696,8 @@ def multi_box_head(inputs,
outputs={"Boxes": box, outputs={"Boxes": box,
"Variances": var}, "Variances": var},
attrs=attrs, ) attrs=attrs, )
box.stop_gradient = True
var.stop_gradient = True
return box, var return box, var
def _reshape_with_axis_(input, axis=1): def _reshape_with_axis_(input, axis=1):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册