提交 c488ee96 编写于 作者: T tensor-tang

Merge remote-tracking branch 'ups/develop' into refine/op/fusion_lstm

...@@ -24,6 +24,9 @@ message(STATUS "CXX compiler: ${CMAKE_CXX_COMPILER}, version: " ...@@ -24,6 +24,9 @@ message(STATUS "CXX compiler: ${CMAKE_CXX_COMPILER}, version: "
"${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION}") "${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION}")
message(STATUS "C compiler: ${CMAKE_C_COMPILER}, version: " message(STATUS "C compiler: ${CMAKE_C_COMPILER}, version: "
"${CMAKE_C_COMPILER_ID} ${CMAKE_C_COMPILER_VERSION}") "${CMAKE_C_COMPILER_ID} ${CMAKE_C_COMPILER_VERSION}")
if(WIN32)
set(CMAKE_STATIC_LIBRARY_PREFIX lib)
endif(WIN32)
if(NOT CMAKE_CROSSCOMPILING) if(NOT CMAKE_CROSSCOMPILING)
find_package(CUDA QUIET) find_package(CUDA QUIET)
...@@ -165,7 +168,6 @@ include(external/python) # download, build, install python ...@@ -165,7 +168,6 @@ include(external/python) # download, build, install python
include(external/openblas) # download, build, install openblas include(external/openblas) # download, build, install openblas
include(external/mkldnn) # download, build, install mkldnn include(external/mkldnn) # download, build, install mkldnn
include(external/swig) # download, build, install swig include(external/swig) # download, build, install swig
include(external/warpctc) # download, build, install warpctc
include(external/boost) # download boost include(external/boost) # download boost
include(external/any) # download libn::any include(external/any) # download libn::any
include(external/eigen) # download eigen3 include(external/eigen) # download eigen3
...@@ -173,6 +175,14 @@ include(external/pybind11) # download pybind11 ...@@ -173,6 +175,14 @@ include(external/pybind11) # download pybind11
include(external/cares) include(external/cares)
include(external/cub) include(external/cub)
if (NOT WIN32)
# there is no official support of snappystream, warpctc, nccl, cupti in windows
include(external/snappy) # download snappy
include(external/snappystream) # download snappystream
include(external/warpctc) # download, build, install warpctc
include(cupti)
endif (NOT WIN32)
if(WITH_DISTRIBUTE) if(WITH_DISTRIBUTE)
if(WITH_GRPC) if(WITH_GRPC)
include(external/grpc) include(external/grpc)
...@@ -194,13 +204,10 @@ if(WITH_BRPC_RDMA) ...@@ -194,13 +204,10 @@ if(WITH_BRPC_RDMA)
endif() endif()
endif() endif()
include(external/snappy) # download snappy
include(external/snappystream)
include(external/threadpool)
include(external/threadpool)
include(flags) # set paddle compile flags include(flags) # set paddle compile flags
include(cudnn) # set cudnn libraries, must before configure include(cudnn) # set cudnn libraries, must before configure
include(cupti)
include(configure) # add paddle env configuration include(configure) # add paddle env configuration
if(WITH_GPU) if(WITH_GPU)
......
...@@ -61,6 +61,11 @@ if(NOT CMAKE_CROSSCOMPILING) ...@@ -61,6 +61,11 @@ if(NOT CMAKE_CROSSCOMPILING)
endif() endif()
endif() endif()
if(WIN32)
# windows stupid compile option for all targets.
add_definitions(-D_XKEYCHECK_H)
endif(WIN32)
if(NOT WITH_GOLANG) if(NOT WITH_GOLANG)
add_definitions(-DPADDLE_WITHOUT_GOLANG) add_definitions(-DPADDLE_WITHOUT_GOLANG)
endif(NOT WITH_GOLANG) endif(NOT WITH_GOLANG)
......
...@@ -28,7 +28,12 @@ if((NOT DEFINED BOOST_TAR) OR (NOT DEFINED BOOST_URL)) ...@@ -28,7 +28,12 @@ if((NOT DEFINED BOOST_TAR) OR (NOT DEFINED BOOST_URL))
set(BOOST_TAR "boost_1_41_0" CACHE STRING "" FORCE) set(BOOST_TAR "boost_1_41_0" CACHE STRING "" FORCE)
set(BOOST_URL "http://paddlepaddledeps.cdn.bcebos.com/${BOOST_TAR}.tar.gz" CACHE STRING "" FORCE) set(BOOST_URL "http://paddlepaddledeps.cdn.bcebos.com/${BOOST_TAR}.tar.gz" CACHE STRING "" FORCE)
endif() endif()
MESSAGE(STATUS "BOOST_TAR: ${BOOST_TAR}, BOOST_URL: ${BOOST_URL}") IF (WIN32)
MESSAGE(WARNING, "In windows, boost can not be downloaded automaticlly, please build it manually and put it at " ${THIRD_PARTY_PATH}install/boost)
else()
MESSAGE(STATUS "BOOST_TAR: ${BOOST_TAR}, BOOST_URL: ${BOOST_URL}")
ENDIF(WIN32)
set(BOOST_SOURCES_DIR ${THIRD_PARTY_PATH}/boost) set(BOOST_SOURCES_DIR ${THIRD_PARTY_PATH}/boost)
set(BOOST_DOWNLOAD_DIR "${BOOST_SOURCES_DIR}/src/${BOOST_PROJECT}") set(BOOST_DOWNLOAD_DIR "${BOOST_SOURCES_DIR}/src/${BOOST_PROJECT}")
set(BOOST_INCLUDE_DIR "${BOOST_DOWNLOAD_DIR}/${BOOST_TAR}" CACHE PATH "boost include directory." FORCE) set(BOOST_INCLUDE_DIR "${BOOST_DOWNLOAD_DIR}/${BOOST_TAR}" CACHE PATH "boost include directory." FORCE)
...@@ -36,12 +41,13 @@ set_directory_properties(PROPERTIES CLEAN_NO_CUSTOM 1) ...@@ -36,12 +41,13 @@ set_directory_properties(PROPERTIES CLEAN_NO_CUSTOM 1)
include_directories(${BOOST_INCLUDE_DIR}) include_directories(${BOOST_INCLUDE_DIR})
if (NOT WIN32)
ExternalProject_Add( ExternalProject_Add(
${BOOST_PROJECT} ${BOOST_PROJECT}
${EXTERNAL_PROJECT_LOG_ARGS} ${EXTERNAL_PROJECT_LOG_ARGS}
DOWNLOAD_DIR ${BOOST_DOWNLOAD_DIR} DOWNLOAD_DIR ${BOOST_DOWNLOAD_DIR}
DOWNLOAD_COMMAND wget --no-check-certificate ${BOOST_URL} -c -q -O ${BOOST_TAR}.tar.gz DOWNLOAD_COMMAND wget --no-check-certificate ${BOOST_URL} -c -q -O ${BOOST_TAR}.tar.gz
&& tar zxf ${BOOST_TAR}.tar.gz && tar zxf ${BOOST_TAR}.tar.gz
DOWNLOAD_NO_PROGRESS 1 DOWNLOAD_NO_PROGRESS 1
PREFIX ${BOOST_SOURCES_DIR} PREFIX ${BOOST_SOURCES_DIR}
CONFIGURE_COMMAND "" CONFIGURE_COMMAND ""
...@@ -49,8 +55,9 @@ ExternalProject_Add( ...@@ -49,8 +55,9 @@ ExternalProject_Add(
INSTALL_COMMAND "" INSTALL_COMMAND ""
UPDATE_COMMAND "" UPDATE_COMMAND ""
) )
endif(NOT WIN32)
if (${CMAKE_VERSION} VERSION_LESS "3.3.0") if (${CMAKE_VERSION} VERSION_LESS "3.3.0" OR NOT WIN32)
set(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/boost_dummy.c) set(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/boost_dummy.c)
file(WRITE ${dummyfile} "const char *dummy = \"${dummyfile}\";") file(WRITE ${dummyfile} "const char *dummy = \"${dummyfile}\";")
add_library(boost STATIC ${dummyfile}) add_library(boost STATIC ${dummyfile})
......
...@@ -18,7 +18,7 @@ SET(GFLAGS_SOURCES_DIR ${THIRD_PARTY_PATH}/gflags) ...@@ -18,7 +18,7 @@ SET(GFLAGS_SOURCES_DIR ${THIRD_PARTY_PATH}/gflags)
SET(GFLAGS_INSTALL_DIR ${THIRD_PARTY_PATH}/install/gflags) SET(GFLAGS_INSTALL_DIR ${THIRD_PARTY_PATH}/install/gflags)
SET(GFLAGS_INCLUDE_DIR "${GFLAGS_INSTALL_DIR}/include" CACHE PATH "gflags include directory." FORCE) SET(GFLAGS_INCLUDE_DIR "${GFLAGS_INSTALL_DIR}/include" CACHE PATH "gflags include directory." FORCE)
IF(WIN32) IF(WIN32)
set(GFLAGS_LIBRARIES "${GFLAGS_INSTALL_DIR}/lib/gflags.lib" CACHE FILEPATH "GFLAGS_LIBRARIES" FORCE) set(GFLAGS_LIBRARIES "${GFLAGS_INSTALL_DIR}/lib/libgflags.lib" CACHE FILEPATH "GFLAGS_LIBRARIES" FORCE)
ELSE(WIN32) ELSE(WIN32)
set(GFLAGS_LIBRARIES "${GFLAGS_INSTALL_DIR}/lib/libgflags.a" CACHE FILEPATH "GFLAGS_LIBRARIES" FORCE) set(GFLAGS_LIBRARIES "${GFLAGS_INSTALL_DIR}/lib/libgflags.a" CACHE FILEPATH "GFLAGS_LIBRARIES" FORCE)
ENDIF(WIN32) ENDIF(WIN32)
...@@ -45,7 +45,13 @@ ExternalProject_Add( ...@@ -45,7 +45,13 @@ ExternalProject_Add(
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE} -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
) )
IF(WIN32)
IF(NOT EXISTS "${GFLAGS_INSTALL_DIR}/lib/libgflags.lib")
add_custom_command(TARGET extern_gflags POST_BUILD
COMMAND cmake -E rename ${GFLAGS_INSTALL_DIR}/lib/gflags_static.lib ${GFLAGS_INSTALL_DIR}/lib/libgflags.lib
)
ENDIF()
ENDIF(WIN32)
ADD_LIBRARY(gflags STATIC IMPORTED GLOBAL) ADD_LIBRARY(gflags STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET gflags PROPERTY IMPORTED_LOCATION ${GFLAGS_LIBRARIES}) SET_PROPERTY(TARGET gflags PROPERTY IMPORTED_LOCATION ${GFLAGS_LIBRARIES})
ADD_DEPENDENCIES(gflags extern_gflags) ADD_DEPENDENCIES(gflags extern_gflags)
...@@ -60,3 +66,4 @@ IF(WITH_C_API) ...@@ -60,3 +66,4 @@ IF(WITH_C_API)
INSTALL(FILES ${GFLAGS_LIBRARIES} DESTINATION third_party/gflags/lib) INSTALL(FILES ${GFLAGS_LIBRARIES} DESTINATION third_party/gflags/lib)
ENDIF() ENDIF()
ENDIF() ENDIF()
...@@ -60,6 +60,13 @@ ExternalProject_Add( ...@@ -60,6 +60,13 @@ ExternalProject_Add(
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE} -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
) )
IF(WIN32)
IF(NOT EXISTS "${GLOG_INSTALL_DIR}/lib/libglog.lib")
add_custom_command(TARGET extern_glog POST_BUILD
COMMAND cmake -E rename ${GLOG_INSTALL_DIR}/lib/glog.lib ${GLOG_INSTALL_DIR}/lib/libglog.lib
)
ENDIF()
ENDIF(WIN32)
ADD_LIBRARY(glog STATIC IMPORTED GLOBAL) ADD_LIBRARY(glog STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET glog PROPERTY IMPORTED_LOCATION ${GLOG_LIBRARIES}) SET_PROPERTY(TARGET glog PROPERTY IMPORTED_LOCATION ${GLOG_LIBRARIES})
......
...@@ -17,20 +17,29 @@ IF(USE_EIGEN_FOR_BLAS) ...@@ -17,20 +17,29 @@ IF(USE_EIGEN_FOR_BLAS)
ENDIF(USE_EIGEN_FOR_BLAS) ENDIF(USE_EIGEN_FOR_BLAS)
INCLUDE(cblas) INCLUDE(cblas)
# IF(WIN32 AND NOT ${CBLAS_FOUND})
IF(NOT ${CBLAS_FOUND}) IF(NOT ${CBLAS_FOUND})
INCLUDE(ExternalProject) INCLUDE(ExternalProject)
SET(CBLAS_SOURCES_DIR ${THIRD_PARTY_PATH}/openblas) SET(CBLAS_SOURCES_DIR ${THIRD_PARTY_PATH}/openblas)
SET(CBLAS_INSTALL_DIR ${THIRD_PARTY_PATH}/install/openblas) SET(CBLAS_INSTALL_DIR ${THIRD_PARTY_PATH}/install/openblas)
SET(CBLAS_INC_DIR "${CBLAS_INSTALL_DIR}/include" CACHE PATH "openblas include directory." FORCE) SET(CBLAS_INCLUDE_DIR "${CBLAS_INSTALL_DIR}/include" CACHE PATH "openblas include directory." FORCE)
SET(CBLAS_LIBRARIES SET(CBLAS_LIBRARIES
"${CBLAS_INSTALL_DIR}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}openblas${CMAKE_STATIC_LIBRARY_SUFFIX}" "${CBLAS_INSTALL_DIR}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}openblas${CMAKE_STATIC_LIBRARY_SUFFIX}"
CACHE FILEPATH "openblas library." FORCE) CACHE FILEPATH "openblas library." FORCE)
ADD_DEFINITIONS(-DPADDLE_USE_OPENBLAS) ADD_DEFINITIONS(-DPADDLE_USE_OPENBLAS)
IF (WIN32)
SET(CBLAS_FOUND true)
MESSAGE(WARNING, "In windows, openblas only support msvc build, please build it manually and put it at " ${CBLAS_INSTALL_DIR})
ENDIF(WIN32)
IF (NOT WIN32)
SET(OPENBLAS_CC "${CMAKE_C_COMPILER} -Wno-unused-but-set-variable -Wno-unused-variable") SET(OPENBLAS_CC "${CMAKE_C_COMPILER} -Wno-unused-but-set-variable -Wno-unused-variable")
SET(OPENBLAS_COMMIT "v0.2.20") SET(OPENBLAS_COMMIT "v0.2.20")
...@@ -69,7 +78,6 @@ IF(NOT ${CBLAS_FOUND}) ...@@ -69,7 +78,6 @@ IF(NOT ${CBLAS_FOUND})
ENDIF() ENDIF()
SET(COMMON_ARGS CC=${OPENBLAS_CC} NO_SHARED=1 NO_LAPACK=1 libs) SET(COMMON_ARGS CC=${OPENBLAS_CC} NO_SHARED=1 NO_LAPACK=1 libs)
ExternalProject_Add( ExternalProject_Add(
extern_openblas extern_openblas
${EXTERNAL_PROJECT_LOG_ARGS} ${EXTERNAL_PROJECT_LOG_ARGS}
...@@ -84,9 +92,11 @@ IF(NOT ${CBLAS_FOUND}) ...@@ -84,9 +92,11 @@ IF(NOT ${CBLAS_FOUND})
UPDATE_COMMAND "" UPDATE_COMMAND ""
CONFIGURE_COMMAND "" CONFIGURE_COMMAND ""
) )
ELSE()
ENDIF(NOT WIN32)
SET(CBLAS_PROVIDER openblas) SET(CBLAS_PROVIDER openblas)
IF(WITH_C_API) IF(WITH_C_API)
INSTALL(DIRECTORY ${CBLAS_INC_DIR} DESTINATION third_party/openblas) INSTALL(DIRECTORY ${CBLAS_INCLUDE_DIR} DESTINATION third_party/openblas)
# Because libopenblas.a is a symbolic link of another library, thus need to # Because libopenblas.a is a symbolic link of another library, thus need to
# install the whole directory. # install the whole directory.
IF(ANDROID) IF(ANDROID)
...@@ -107,7 +117,8 @@ IF(NOT ${CBLAS_FOUND}) ...@@ -107,7 +117,8 @@ IF(NOT ${CBLAS_FOUND})
ENDIF(NOT ${CBLAS_FOUND}) ENDIF(NOT ${CBLAS_FOUND})
MESSAGE(STATUS "BLAS library: ${CBLAS_LIBRARIES}") MESSAGE(STATUS "BLAS library: ${CBLAS_LIBRARIES}")
INCLUDE_DIRECTORIES(${CBLAS_INC_DIR}) MESSAGE(STATUS "BLAS Include: ${CBLAS_INCLUDE_DIR}")
INCLUDE_DIRECTORIES(${CBLAS_INCLUDE_DIR})
# FIXME(gangliao): generate cblas target to track all high performance # FIXME(gangliao): generate cblas target to track all high performance
# linear algebra libraries for cc_library(xxx SRCS xxx.c DEPS cblas) # linear algebra libraries for cc_library(xxx SRCS xxx.c DEPS cblas)
......
...@@ -14,11 +14,14 @@ ...@@ -14,11 +14,14 @@
INCLUDE(ExternalProject) INCLUDE(ExternalProject)
# Always invoke `FIND_PACKAGE(Protobuf)` for importing function protobuf_generate_cpp # Always invoke `FIND_PACKAGE(Protobuf)` for importing function protobuf_generate_cpp
IF(NOT WIN32)
FIND_PACKAGE(Protobuf QUIET) FIND_PACKAGE(Protobuf QUIET)
ENDIF(NOT WIN32)
macro(UNSET_VAR VAR_NAME) macro(UNSET_VAR VAR_NAME)
UNSET(${VAR_NAME} CACHE) UNSET(${VAR_NAME} CACHE)
UNSET(${VAR_NAME}) UNSET(${VAR_NAME})
endmacro() endmacro()
UNSET_VAR(PROTOBUF_INCLUDE_DIR) UNSET_VAR(PROTOBUF_INCLUDE_DIR)
UNSET_VAR(PROTOBUF_FOUND) UNSET_VAR(PROTOBUF_FOUND)
UNSET_VAR(PROTOBUF_PROTOC_EXECUTABLE) UNSET_VAR(PROTOBUF_PROTOC_EXECUTABLE)
...@@ -94,12 +97,14 @@ macro(PROMPT_PROTOBUF_LIB) ...@@ -94,12 +97,14 @@ macro(PROMPT_PROTOBUF_LIB)
SET(protobuf_DEPS ${ARGN}) SET(protobuf_DEPS ${ARGN})
MESSAGE(STATUS "Protobuf protoc executable: ${PROTOBUF_PROTOC_EXECUTABLE}") MESSAGE(STATUS "Protobuf protoc executable: ${PROTOBUF_PROTOC_EXECUTABLE}")
MESSAGE(STATUS "Protobuf-lite library: ${PROTOBUF_LITE_LIBRARY}")
MESSAGE(STATUS "Protobuf library: ${PROTOBUF_LIBRARY}") MESSAGE(STATUS "Protobuf library: ${PROTOBUF_LIBRARY}")
MESSAGE(STATUS "Protoc library: ${PROTOBUF_PROTOC_LIBRARY}")
MESSAGE(STATUS "Protobuf version: ${PROTOBUF_VERSION}") MESSAGE(STATUS "Protobuf version: ${PROTOBUF_VERSION}")
INCLUDE_DIRECTORIES(${PROTOBUF_INCLUDE_DIR}) INCLUDE_DIRECTORIES(${PROTOBUF_INCLUDE_DIR})
# Assuming that all the protobuf libraries are of the same type. # Assuming that all the protobuf libraries are of the same type.
IF(${PROTOBUF_LIBRARY} MATCHES "${CMAKE_STATIC_LIBRARY_SUFFIX}$") IF(${PROTOBUF_LIBRARY} MATCHES ${CMAKE_STATIC_LIBRARY_SUFFIX})
SET(protobuf_LIBTYPE STATIC) SET(protobuf_LIBTYPE STATIC)
ELSEIF(${PROTOBUF_LIBRARY} MATCHES "${CMAKE_SHARED_LIBRARY_SUFFIX}$") ELSEIF(${PROTOBUF_LIBRARY} MATCHES "${CMAKE_SHARED_LIBRARY_SUFFIX}$")
SET(protobuf_LIBTYPE SHARED) SET(protobuf_LIBTYPE SHARED)
...@@ -137,18 +142,25 @@ macro(SET_PROTOBUF_VERSION) ...@@ -137,18 +142,25 @@ macro(SET_PROTOBUF_VERSION)
endmacro() endmacro()
set(PROTOBUF_ROOT "" CACHE PATH "Folder contains protobuf") set(PROTOBUF_ROOT "" CACHE PATH "Folder contains protobuf")
IF (WIN32)
SET(PROTOBUF_ROOT ${THIRD_PARTY_PATH}/install/protobuf)
MESSAGE(WARNING, "In windows, protobuf only support msvc build, please build it manually and put it at " ${PROTOBUF_ROOT})
ENDIF(WIN32)
if (NOT "${PROTOBUF_ROOT}" STREQUAL "") if (NOT "${PROTOBUF_ROOT}" STREQUAL "")
find_path(PROTOBUF_INCLUDE_DIR google/protobuf/message.h PATHS ${PROTOBUF_ROOT}/include NO_DEFAULT_PATH) find_path(PROTOBUF_INCLUDE_DIR google/protobuf/message.h PATHS ${PROTOBUF_ROOT}/include NO_DEFAULT_PATH)
find_library(PROTOBUF_LIBRARY protobuf PATHS ${PROTOBUF_ROOT}/lib NO_DEFAULT_PATH) find_library(PROTOBUF_LIBRARY protobuf libprotobuf.lib PATHS ${PROTOBUF_ROOT}/lib NO_DEFAULT_PATH)
find_library(PROTOBUF_LITE_LIBRARY protobuf-lite PATHS ${PROTOBUF_ROOT}/lib NO_DEFAULT_PATH) find_library(PROTOBUF_LITE_LIBRARY protobuf-lite libprotobuf-lite.lib PATHS ${PROTOBUF_ROOT}/lib NO_DEFAULT_PATH)
find_library(PROTOBUF_PROTOC_LIBRARY protoc PATHS ${PROTOBUF_ROOT}/lib NO_DEFAULT_PATH) find_library(PROTOBUF_PROTOC_LIBRARY protoc libprotoc.lib PATHS ${PROTOBUF_ROOT}/lib NO_DEFAULT_PATH)
find_program(PROTOBUF_PROTOC_EXECUTABLE protoc PATHS ${PROTOBUF_ROOT}/bin NO_DEFAULT_PATH) find_program(PROTOBUF_PROTOC_EXECUTABLE protoc PATHS ${PROTOBUF_ROOT}/bin NO_DEFAULT_PATH)
if (PROTOBUF_INCLUDE_DIR AND PROTOBUF_LIBRARY AND PROTOBUF_LITE_LIBRARY AND PROTOBUF_PROTOC_LIBRARY AND PROTOBUF_PROTOC_EXECUTABLE) if (PROTOBUF_INCLUDE_DIR AND PROTOBUF_LIBRARY AND PROTOBUF_LITE_LIBRARY AND PROTOBUF_PROTOC_LIBRARY AND PROTOBUF_PROTOC_EXECUTABLE)
message(STATUS "Using custom protobuf library in ${PROTOBUF_ROOT}.") message(STATUS "Using custom protobuf library in ${PROTOBUF_ROOT}.")
SET(PROTOBUF_FOUND true)
SET_PROTOBUF_VERSION() SET_PROTOBUF_VERSION()
PROMPT_PROTOBUF_LIB() PROMPT_PROTOBUF_LIB()
else() else()
message(WARNING "Cannot find protobuf library in ${PROTOBUF_ROOT}.") message(WARNING "Cannot find protobuf library in ${PROTOBUF_ROOT}")
endif() endif()
endif() endif()
...@@ -239,6 +251,7 @@ IF(CMAKE_CROSSCOMPILING) ...@@ -239,6 +251,7 @@ IF(CMAKE_CROSSCOMPILING)
CACHE FILEPATH "protobuf executable." FORCE) CACHE FILEPATH "protobuf executable." FORCE)
ENDIF() ENDIF()
IF(NOT PROTOBUF_FOUND) IF(NOT PROTOBUF_FOUND)
build_protobuf(extern_protobuf FALSE) build_protobuf(extern_protobuf FALSE)
......
...@@ -148,7 +148,8 @@ function(merge_static_libs TARGET_NAME) ...@@ -148,7 +148,8 @@ function(merge_static_libs TARGET_NAME)
COMMAND rm "${CMAKE_CURRENT_BINARY_DIR}/lib${TARGET_NAME}.a" COMMAND rm "${CMAKE_CURRENT_BINARY_DIR}/lib${TARGET_NAME}.a"
COMMAND /usr/bin/libtool -static -o "${CMAKE_CURRENT_BINARY_DIR}/lib${TARGET_NAME}.a" ${libfiles} COMMAND /usr/bin/libtool -static -o "${CMAKE_CURRENT_BINARY_DIR}/lib${TARGET_NAME}.a" ${libfiles}
) )
else() # general UNIX: use "ar" to extract objects and re-add to a common lib endif(APPLE)
if(LINUX) # general UNIX: use "ar" to extract objects and re-add to a common lib
set(target_DIR ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}.dir) set(target_DIR ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}.dir)
foreach(lib ${libs}) foreach(lib ${libs})
...@@ -187,7 +188,36 @@ function(merge_static_libs TARGET_NAME) ...@@ -187,7 +188,36 @@ function(merge_static_libs TARGET_NAME)
COMMAND ${CMAKE_AR} crs ${target_LIBNAME} `find ${target_DIR} -name '*.o'` COMMAND ${CMAKE_AR} crs ${target_LIBNAME} `find ${target_DIR} -name '*.o'`
COMMAND ${CMAKE_RANLIB} ${target_LIBNAME} COMMAND ${CMAKE_RANLIB} ${target_LIBNAME}
WORKING_DIRECTORY ${target_DIR}) WORKING_DIRECTORY ${target_DIR})
endif() endif(LINUX)
if(WIN32) # windows do not support gcc/nvcc combined compiling. Use msvc lib.exe to merge libs.
# Make the generated dummy source file depended on all static input
# libs. If input lib changes,the source file is touched
# which causes the desired effect (relink).
add_custom_command(OUTPUT ${target_SRCS}
COMMAND ${CMAKE_COMMAND} -E touch ${target_SRCS}
DEPENDS ${libs})
# Generate dummy staic lib
file(WRITE ${target_SRCS} "const char *dummy_${TARGET_NAME} = \"${target_SRCS}\";")
add_library(${TARGET_NAME} STATIC ${target_SRCS})
target_link_libraries(${TARGET_NAME} ${libs_deps})
foreach(lib ${libs})
# Get the file names of the libraries to be merged
#if(NOT $<TARGET_FILE:${lib}> MATCHES "lib.*\\.lib")
# message("library" ${lib})
# set(libfiles ${libfiles} lib$<TARGET_FILE:${lib}>)
#else()
set(libfiles ${libfiles} $<TARGET_FILE:${lib}>)
#endif()
endforeach()
# windows cmd return error in clean env.
# COMMAND del "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}/${TARGET_NAME}.lib"
add_custom_command(TARGET ${TARGET_NAME} POST_BUILD
COMMAND lib /OUT:${CMAKE_CURRENT_BINARY_DIR}/lib${TARGET_NAME}.lib ${libfiles}
)
endif(WIN32)
endfunction(merge_static_libs) endfunction(merge_static_libs)
function(cc_library TARGET_NAME) function(cc_library TARGET_NAME)
...@@ -195,6 +225,10 @@ function(cc_library TARGET_NAME) ...@@ -195,6 +225,10 @@ function(cc_library TARGET_NAME)
set(oneValueArgs "") set(oneValueArgs "")
set(multiValueArgs SRCS DEPS) set(multiValueArgs SRCS DEPS)
cmake_parse_arguments(cc_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) cmake_parse_arguments(cc_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
if(WIN32)
# add libxxx.lib prefix in windows
set(${TARGET_NAME}_LIB_NAME "${CMAKE_STATIC_LIBRARY_PREFIX}${TARGET_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}" CACHE STRING "output library name for target ${TARGET_NAME}")
endif(WIN32)
if(cc_library_SRCS) if(cc_library_SRCS)
if(cc_library_SHARED OR cc_library_shared) # build *.so if(cc_library_SHARED OR cc_library_shared) # build *.so
add_library(${TARGET_NAME} SHARED ${cc_library_SRCS}) add_library(${TARGET_NAME} SHARED ${cc_library_SRCS})
......
...@@ -101,6 +101,7 @@ if(WITH_MKLDNN) ...@@ -101,6 +101,7 @@ if(WITH_MKLDNN)
) )
endif() endif()
if (NOT WIN32)
if(NOT MOBILE_INFERENCE AND NOT RPI) if(NOT MOBILE_INFERENCE AND NOT RPI)
set(dst_dir "${FLUID_INSTALL_DIR}/third_party/install/snappy") set(dst_dir "${FLUID_INSTALL_DIR}/third_party/install/snappy")
copy(snappy_lib copy(snappy_lib
...@@ -120,15 +121,23 @@ if(NOT MOBILE_INFERENCE AND NOT RPI) ...@@ -120,15 +121,23 @@ if(NOT MOBILE_INFERENCE AND NOT RPI)
DSTS ${dst_dir} ${dst_dir}/lib DSTS ${dst_dir} ${dst_dir}/lib
DEPS zlib) DEPS zlib)
endif() endif()
endif(NOT WIN32)
# paddle fluid module # paddle fluid module
set(src_dir "${PADDLE_SOURCE_DIR}/paddle/fluid") set(src_dir "${PADDLE_SOURCE_DIR}/paddle/fluid")
set(dst_dir "${FLUID_INSTALL_DIR}/paddle/fluid") set(dst_dir "${FLUID_INSTALL_DIR}/paddle/fluid")
set(module "framework") set(module "framework")
if (NOT WIN32)
copy(framework_lib DEPS framework_py_proto copy(framework_lib DEPS framework_py_proto
SRCS ${src_dir}/${module}/*.h ${src_dir}/${module}/details/*.h ${PADDLE_BINARY_DIR}/paddle/fluid/framework/framework.pb.h SRCS ${src_dir}/${module}/*.h ${src_dir}/${module}/details/*.h ${PADDLE_BINARY_DIR}/paddle/fluid/framework/framework.pb.h
DSTS ${dst_dir}/${module} ${dst_dir}/${module}/details ${dst_dir}/${module} DSTS ${dst_dir}/${module} ${dst_dir}/${module}/details ${dst_dir}/${module}
) )
else()
copy(framework_lib
SRCS ${src_dir}/${module}/*.h ${src_dir}/${module}/details/*.h ${PADDLE_BINARY_DIR}/paddle/fluid/framework/framework.pb.h
DSTS ${dst_dir}/${module} ${dst_dir}/${module}/details ${dst_dir}/${module}
)
endif(NOT WIN32)
set(module "memory") set(module "memory")
copy(memory_lib copy(memory_lib
......
...@@ -2,9 +2,13 @@ add_subdirectory(memory) ...@@ -2,9 +2,13 @@ add_subdirectory(memory)
add_subdirectory(platform) add_subdirectory(platform)
add_subdirectory(framework) add_subdirectory(framework)
add_subdirectory(operators) add_subdirectory(operators)
add_subdirectory(pybind)
add_subdirectory(string) add_subdirectory(string)
if (NOT WIN32)
add_subdirectory(pybind)
add_subdirectory(recordio) add_subdirectory(recordio)
endif(NOT WIN32)
if(WITH_INFERENCE) if(WITH_INFERENCE)
# NOTE: please add subdirectory inference at last. # NOTE: please add subdirectory inference at last.
add_subdirectory(inference) add_subdirectory(inference)
......
add_subdirectory(details)
add_subdirectory(ir) add_subdirectory(ir)
if (NOT WIN32)
add_subdirectory(details)
endif (NOT WIN32)
# ddim lib # ddim lib
proto_library(framework_proto SRCS framework.proto) proto_library(framework_proto SRCS framework.proto)
...@@ -28,8 +30,12 @@ if(WITH_GPU) ...@@ -28,8 +30,12 @@ if(WITH_GPU)
else() else()
cc_test(mixed_vector_test SRCS mixed_vector_test.cc DEPS place memory device_context tensor) cc_test(mixed_vector_test SRCS mixed_vector_test.cc DEPS place memory device_context tensor)
endif() endif()
if (NOT WIN32)
cc_library(lod_tensor SRCS lod_tensor.cc DEPS ddim place tensor framework_proto recordio) cc_library(lod_tensor SRCS lod_tensor.cc DEPS ddim place tensor framework_proto recordio)
else()
cc_library(lod_tensor SRCS lod_tensor.cc DEPS ddim place tensor framework_proto)
endif (NOT WIN32)
cc_test(lod_tensor_test SRCS lod_tensor_test.cc DEPS lod_tensor memory) cc_test(lod_tensor_test SRCS lod_tensor_test.cc DEPS lod_tensor memory)
nv_test(lod_tensor_gpu_test SRCS lod_tensor_test.cu DEPS lod_tensor) nv_test(lod_tensor_gpu_test SRCS lod_tensor_test.cu DEPS lod_tensor)
...@@ -69,14 +75,22 @@ cc_library(op_proto_maker SRCS op_proto_maker.cc DEPS framework_proto attribute ...@@ -69,14 +75,22 @@ cc_library(op_proto_maker SRCS op_proto_maker.cc DEPS framework_proto attribute
cc_test(op_proto_maker_test SRCS op_proto_maker_test.cc DEPS op_proto_maker) cc_test(op_proto_maker_test SRCS op_proto_maker_test.cc DEPS op_proto_maker)
cc_library(op_info SRCS op_info.cc DEPS attribute framework_proto) cc_library(op_info SRCS op_info.cc DEPS attribute framework_proto)
cc_library(shape_inference SRCS shape_inference.cc DEPS ddim attribute device_context) cc_library(shape_inference SRCS shape_inference.cc DEPS ddim attribute device_context)
if (NOT WIN32)
cc_library(operator SRCS operator.cc DEPS op_info device_context tensor scope glog cc_library(operator SRCS operator.cc DEPS op_info device_context tensor scope glog
shape_inference data_transform lod_tensor profiler) shape_inference data_transform lod_tensor profiler)
else()
cc_library(operator SRCS operator.cc DEPS op_info device_context tensor scope glog
shape_inference data_transform lod_tensor)
endif(NOT WIN32)
cc_test(operator_test SRCS operator_test.cc DEPS operator op_registry device_context) cc_test(operator_test SRCS operator_test.cc DEPS operator op_registry device_context)
cc_library(proto_desc SRCS var_desc.cc op_desc.cc block_desc.cc program_desc.cc DEPS shape_inference op_info operator glog) cc_library(proto_desc SRCS var_desc.cc op_desc.cc block_desc.cc program_desc.cc DEPS shape_inference op_info operator glog)
cc_library(op_registry SRCS op_registry.cc DEPS op_proto_maker op_info operator glog proto_desc) cc_library(op_registry SRCS op_registry.cc DEPS op_proto_maker op_info operator glog proto_desc)
nv_test(op_registry_test SRCS op_registry_test.cc DEPS op_registry) nv_test(op_registry_test SRCS op_registry_test.cc DEPS op_registry)
if (NOT WIN32)
py_proto_compile(framework_py_proto SRCS framework.proto) py_proto_compile(framework_py_proto SRCS framework.proto)
# Generate an empty __init__.py to make framework_py_proto as a valid python module. # Generate an empty __init__.py to make framework_py_proto as a valid python module.
add_custom_target(framework_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py) add_custom_target(framework_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py)
...@@ -86,6 +100,7 @@ add_custom_command(TARGET framework_py_proto POST_BUILD ...@@ -86,6 +100,7 @@ add_custom_command(TARGET framework_py_proto POST_BUILD
COMMAND cp *.py ${PADDLE_BINARY_DIR}/python/paddle/fluid/proto/ COMMAND cp *.py ${PADDLE_BINARY_DIR}/python/paddle/fluid/proto/
COMMENT "Copy generated python proto into directory paddle/fluid/proto." COMMENT "Copy generated python proto into directory paddle/fluid/proto."
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
endif(NOT WIN32)
cc_library(lod_rank_table SRCS lod_rank_table.cc DEPS lod_tensor) cc_library(lod_rank_table SRCS lod_rank_table.cc DEPS lod_tensor)
...@@ -120,7 +135,9 @@ cc_test(cow_ptr_tests SRCS details/cow_ptr_test.cc) ...@@ -120,7 +135,9 @@ cc_test(cow_ptr_tests SRCS details/cow_ptr_test.cc)
# cc_test(channel_test SRCS channel_test.cc) # cc_test(channel_test SRCS channel_test.cc)
cc_test(tuple_test SRCS tuple_test.cc ) cc_test(tuple_test SRCS tuple_test.cc )
if (NOT WIN32)
cc_test(rw_lock_test SRCS rw_lock_test.cc) cc_test(rw_lock_test SRCS rw_lock_test.cc)
endif (NOT WIN32)
# disable test temporarily. # disable test temporarily.
# TODO https://github.com/PaddlePaddle/Paddle/issues/11971 # TODO https://github.com/PaddlePaddle/Paddle/issues/11971
......
...@@ -26,6 +26,7 @@ namespace framework { ...@@ -26,6 +26,7 @@ namespace framework {
extern proto::VarType::Type ToDataType(std::type_index type); extern proto::VarType::Type ToDataType(std::type_index type);
extern std::type_index ToTypeIndex(proto::VarType::Type type); extern std::type_index ToTypeIndex(proto::VarType::Type type);
#if !defined(_WIN32)
template <typename Visitor> template <typename Visitor>
inline void VisitDataType(proto::VarType::Type type, Visitor visitor) { inline void VisitDataType(proto::VarType::Type type, Visitor visitor) {
switch (type) { switch (type) {
...@@ -57,6 +58,40 @@ inline void VisitDataType(proto::VarType::Type type, Visitor visitor) { ...@@ -57,6 +58,40 @@ inline void VisitDataType(proto::VarType::Type type, Visitor visitor) {
PADDLE_THROW("Not supported %d", type); PADDLE_THROW("Not supported %d", type);
} }
} }
#else
// the msvc compiler do not implement two-stage name lookup correctly.
template <typename Visitor>
inline void VisitDataType(proto::VarType::Type type, Visitor visitor) {
switch (type) {
case proto::VarType::FP16:
visitor.operator()<platform::float16>();
break;
case proto::VarType::FP32:
visitor.operator()<float>();
break;
case proto::VarType::FP64:
visitor.operator()<double>();
break;
case proto::VarType::INT32:
visitor.operator()<int>();
break;
case proto::VarType::INT64:
visitor.operator()<int64_t>();
break;
case proto::VarType::BOOL:
visitor.operator()<bool>();
break;
case proto::VarType::UINT8:
visitor.operator()<uint8_t>();
break;
case proto::VarType::INT16:
visitor.operator()<int16_t>();
break;
default:
PADDLE_THROW("Not supported %d", type);
}
}
#endif // _WIN32
extern std::string DataTypeToString(const proto::VarType::Type type); extern std::string DataTypeToString(const proto::VarType::Type type);
extern size_t SizeOfType(std::type_index type); extern size_t SizeOfType(std::type_index type);
......
...@@ -25,8 +25,10 @@ limitations under the License. */ ...@@ -25,8 +25,10 @@ limitations under the License. */
#include "paddle/fluid/memory/memcpy.h" #include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/memory/memory.h" #include "paddle/fluid/memory/memory.h"
#if !defined(_WIN32)
#include "paddle/fluid/recordio/scanner.h" #include "paddle/fluid/recordio/scanner.h"
#include "paddle/fluid/recordio/writer.h" #include "paddle/fluid/recordio/writer.h"
#endif // _WIN32
namespace paddle { namespace paddle {
namespace framework { namespace framework {
...@@ -300,6 +302,7 @@ void DeserializeFromStream(std::istream &is, LoDTensor *tensor, ...@@ -300,6 +302,7 @@ void DeserializeFromStream(std::istream &is, LoDTensor *tensor,
TensorFromStream(is, static_cast<Tensor *>(tensor), dev_ctx); TensorFromStream(is, static_cast<Tensor *>(tensor), dev_ctx);
} }
#if !defined(_WIN32)
void WriteToRecordIO(recordio::Writer *writer, void WriteToRecordIO(recordio::Writer *writer,
const std::vector<LoDTensor> &tensor, const std::vector<LoDTensor> &tensor,
const platform::DeviceContext &dev_ctx) { const platform::DeviceContext &dev_ctx) {
...@@ -329,7 +332,19 @@ bool ReadFromRecordIO(recordio::Scanner *scanner, ...@@ -329,7 +332,19 @@ bool ReadFromRecordIO(recordio::Scanner *scanner,
return true; return true;
} }
#else
class Writer {};
class Scanner {};
void WriteToRecordIO(recordio::Writer *writer,
const std::vector<LoDTensor> &tensor,
const platform::DeviceContext &dev_ctx) {}
bool ReadFromRecordIO(recordio::Scanner *scanner,
const platform::DeviceContext &dev_ctx,
std::vector<LoDTensor> *result_ptr) {
PADDLE_ENFORCE("windows didn't supported recordio!.");
return true;
}
#endif // _WIN32
std::vector<LoDTensor> LoDTensor::SplitLoDTensor( std::vector<LoDTensor> LoDTensor::SplitLoDTensor(
const std::vector<platform::Place> places) const { const std::vector<platform::Place> places) const {
check_memory_size(); check_memory_size();
......
...@@ -274,6 +274,7 @@ TEST(LoD, ConvertToOffsetBasedLoD) { ...@@ -274,6 +274,7 @@ TEST(LoD, ConvertToOffsetBasedLoD) {
EXPECT_EQ(offset_lod, expected); EXPECT_EQ(offset_lod, expected);
} }
#if !defined(_WIN32)
template <typename T> template <typename T>
static void TestRecordIO() { static void TestRecordIO() {
LoDTensor tensor; LoDTensor tensor;
...@@ -320,6 +321,7 @@ TEST(LoDTensor, RecordIO) { ...@@ -320,6 +321,7 @@ TEST(LoDTensor, RecordIO) {
TestRecordIO<float>(); TestRecordIO<float>();
TestRecordIO<double>(); TestRecordIO<double>();
} }
#endif // !defined(_WIN32)
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
...@@ -74,6 +74,12 @@ static DDim GetDims(const Scope& scope, const std::string& name, ...@@ -74,6 +74,12 @@ static DDim GetDims(const Scope& scope, const std::string& name,
} }
} }
static bool VarInited(const Scope& scope, const std::string& name) {
Variable* var = scope.FindVar(name);
if (var == nullptr) return false;
return var->IsInitialized();
}
static std::string GetDtype(const Scope& scope, const std::string& name) { static std::string GetDtype(const Scope& scope, const std::string& name) {
Variable* var = scope.FindVar(name); Variable* var = scope.FindVar(name);
if (var == nullptr) { if (var == nullptr) {
...@@ -87,8 +93,12 @@ static std::string GetDtype(const Scope& scope, const std::string& name) { ...@@ -87,8 +93,12 @@ static std::string GetDtype(const Scope& scope, const std::string& name) {
} }
return DataTypeToString(ToDataType(tensor.type())); return DataTypeToString(ToDataType(tensor.type()));
} else if (var->IsType<SelectedRows>()) { } else if (var->IsType<SelectedRows>()) {
return DataTypeToString( auto tensor = var->Get<SelectedRows>().value();
ToDataType(var->Get<SelectedRows>().value().type())); if (UNLIKELY(!tensor.IsInitialized())) {
return "uninited";
} else {
return DataTypeToString(ToDataType(tensor.type()));
}
} else { } else {
return ""; return "";
} }
...@@ -197,16 +207,21 @@ std::string OperatorBase::DebugStringEx(const Scope* scope) const { ...@@ -197,16 +207,21 @@ std::string OperatorBase::DebugStringEx(const Scope* scope) const {
auto& input = *it; auto& input = *it;
ss << input.first << "["; ss << input.first << "[";
for (size_t i = 0; i < input.second.size(); ++i) { for (size_t i = 0; i < input.second.size(); ++i) {
ss << input.second[i]; auto var_name = input.second[i];
ss << var_name;
if (scope) { if (scope) {
int row_size = GetRowSize(*scope, input.second[i]); if (!VarInited(*scope, var_name)) {
if (row_size >= 0) { ss << "[uninited]";
ss << "[row_size=" << row_size << "]"; } else {
int row_size = GetRowSize(*scope, var_name);
if (row_size >= 0) {
ss << "[row_size=" << row_size << "]";
}
std::string dtype = GetDtype(*scope, var_name);
ss << ":" << dtype;
ss << "[" << GetDims(*scope, var_name, true) << "]";
ss << "(" << GetLoD(*scope, var_name) << ")";
} }
std::string dtype = GetDtype(*scope, input.second[i]);
ss << ":" << dtype;
ss << "[" << GetDims(*scope, input.second[i], true) << "]";
ss << "(" << GetLoD(*scope, input.second[i]) << ")";
} }
if (i != input.second.size() - 1) { if (i != input.second.size() - 1) {
ss << ", "; ss << ", ";
...@@ -223,14 +238,19 @@ std::string OperatorBase::DebugStringEx(const Scope* scope) const { ...@@ -223,14 +238,19 @@ std::string OperatorBase::DebugStringEx(const Scope* scope) const {
auto& output = *it; auto& output = *it;
ss << output.first << "["; ss << output.first << "[";
for (size_t i = 0; i < output.second.size(); ++i) { for (size_t i = 0; i < output.second.size(); ++i) {
ss << output.second[i]; auto var_name = output.second[i];
ss << var_name;
if (scope) { if (scope) {
int row_size = GetRowSize(*scope, output.second[i]); if (!VarInited(*scope, var_name)) {
if (row_size >= 0) { ss << "[uninited]";
ss << "[row_size=" << row_size << "]"; } else {
int row_size = GetRowSize(*scope, output.second[i]);
if (row_size >= 0) {
ss << "[row_size=" << row_size << "]";
}
ss << "[" << GetDims(*scope, var_name, true) << "]";
ss << "(" << GetLoD(*scope, var_name) << ")";
} }
ss << "[" << GetDims(*scope, output.second[i], true) << "]";
ss << "(" << GetLoD(*scope, output.second[i]) << ")";
} }
if (i != output.second.size() - 1) { if (i != output.second.size() - 1) {
ss << ", "; ss << ", ";
......
...@@ -14,13 +14,16 @@ limitations under the License. */ ...@@ -14,13 +14,16 @@ limitations under the License. */
#pragma once #pragma once
#if !defined(_WIN32)
#include <pthread.h> #include <pthread.h>
#endif // !_WIN32
#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/enforce.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
#if !defined(_WIN32)
struct RWLock { struct RWLock {
RWLock() { pthread_rwlock_init(&lock_, nullptr); } RWLock() { pthread_rwlock_init(&lock_, nullptr); }
...@@ -43,6 +46,15 @@ struct RWLock { ...@@ -43,6 +46,15 @@ struct RWLock {
private: private:
pthread_rwlock_t lock_; pthread_rwlock_t lock_;
}; };
#else
// https://stackoverflow.com/questions/7125250/making-pthread-rwlock-wrlock-recursive
// In windows, rw_lock seems like a hack. Use empty object and do nothing.
struct RWLock {
void RDLock() {}
void WRLock() {}
void UNLock() {}
};
#endif
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
...@@ -23,9 +23,11 @@ include_directories("${PADDLE_LIB}") ...@@ -23,9 +23,11 @@ include_directories("${PADDLE_LIB}")
include_directories("${PADDLE_LIB}/third_party/install/protobuf/include") include_directories("${PADDLE_LIB}/third_party/install/protobuf/include")
include_directories("${PADDLE_LIB}/third_party/install/glog/include") include_directories("${PADDLE_LIB}/third_party/install/glog/include")
include_directories("${PADDLE_LIB}/third_party/install/gflags/include") include_directories("${PADDLE_LIB}/third_party/install/gflags/include")
if (NOT WIN32)
include_directories("${PADDLE_LIB}/third_party/install/snappy/include") include_directories("${PADDLE_LIB}/third_party/install/snappy/include")
include_directories("${PADDLE_LIB}/third_party/install/snappystream/include") include_directories("${PADDLE_LIB}/third_party/install/snappystream/include")
include_directories("${PADDLE_LIB}/third_party/install/zlib/include") include_directories("${PADDLE_LIB}/third_party/install/zlib/include")
endif(NOT WIN32)
include_directories("${PADDLE_LIB}/third_party/boost") include_directories("${PADDLE_LIB}/third_party/boost")
include_directories("${PADDLE_LIB}/third_party/eigen3") include_directories("${PADDLE_LIB}/third_party/eigen3")
......
...@@ -11,12 +11,18 @@ distributed under the License is distributed on an "AS IS" BASIS, ...@@ -11,12 +11,18 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#define GLOG_NO_ABBREVIATED_SEVERITIES
#include "paddle/fluid/memory/detail/system_allocator.h" #include "paddle/fluid/memory/detail/system_allocator.h"
#include <stdlib.h> // for malloc and free #ifdef _WIN32
#include <malloc.h>
#include <windows.h> // VirtualLock/VirtualUnlock
#else
#include <sys/mman.h> // for mlock and munlock #include <sys/mman.h> // for mlock and munlock
#include <algorithm> // for std::max #endif
#include <stdlib.h> // for malloc and free
#include <algorithm> // for std::max
#include "gflags/gflags.h" #include "gflags/gflags.h"
#include "paddle/fluid/platform/assert.h" #include "paddle/fluid/platform/assert.h"
...@@ -35,31 +41,42 @@ namespace paddle { ...@@ -35,31 +41,42 @@ namespace paddle {
namespace memory { namespace memory {
namespace detail { namespace detail {
void* CPUAllocator::Alloc(size_t* index, size_t size) { void* AlignedMalloc(size_t size) {
// According to http://www.cplusplus.com/reference/cstdlib/malloc/,
// malloc might not return nullptr if size is zero, but the returned
// pointer shall not be dereferenced -- so we make it nullptr.
if (size <= 0) return nullptr;
*index = 0; // unlock memory
void* p = nullptr; void* p = nullptr;
size_t alignment = 32ul;
#ifdef PADDLE_WITH_MKLDNN #ifdef PADDLE_WITH_MKLDNN
// refer to https://github.com/01org/mkl-dnn/blob/master/include/mkldnn.hpp // refer to https://github.com/01org/mkl-dnn/blob/master/include/mkldnn.hpp
// memory alignment // memory alignment
PADDLE_ENFORCE_EQ(posix_memalign(&p, 4096ul, size), 0, "Alloc %ld error!", alignment = 4096ul;
size); #endif
#ifdef _WIN32
p = _aligned_malloc(size, alignment);
#else #else
PADDLE_ENFORCE_EQ(posix_memalign(&p, 32ul, size), 0, "Alloc %ld error!", PADDLE_ENFORCE_EQ(posix_memalign(&p, alignment, size), 0, "Alloc %ld error!",
size); size);
#endif #endif
PADDLE_ENFORCE(p, "Fail to allocate CPU memory: size = %d .", size); PADDLE_ENFORCE(p, "Fail to allocate CPU memory: size = %d .", size);
return p;
}
void* CPUAllocator::Alloc(size_t* index, size_t size) {
// According to http://www.cplusplus.com/reference/cstdlib/malloc/,
// malloc might not return nullptr if size is zero, but the returned
// pointer shall not be dereferenced -- so we make it nullptr.
if (size <= 0) return nullptr;
*index = 0; // unlock memory
void* p = AlignedMalloc(size);
if (p != nullptr) { if (p != nullptr) {
if (FLAGS_use_pinned_memory) { if (FLAGS_use_pinned_memory) {
*index = 1; *index = 1;
#ifdef _WIN32
VirtualLock(p, size);
#else
mlock(p, size); // lock memory mlock(p, size); // lock memory
#endif
} }
} }
...@@ -68,7 +85,11 @@ void* CPUAllocator::Alloc(size_t* index, size_t size) { ...@@ -68,7 +85,11 @@ void* CPUAllocator::Alloc(size_t* index, size_t size) {
void CPUAllocator::Free(void* p, size_t size, size_t index) { void CPUAllocator::Free(void* p, size_t size, size_t index) {
if (p != nullptr && index == 1) { if (p != nullptr && index == 1) {
#ifdef _WIN32
VirtualUnlock(p, size);
#else
munlock(p, size); munlock(p, size);
#endif
} }
free(p); free(p);
} }
......
...@@ -85,7 +85,7 @@ function(op_library TARGET) ...@@ -85,7 +85,7 @@ function(op_library TARGET)
#remove windows unsupported op #remove windows unsupported op
if (WIN32) if (WIN32)
foreach(windows_unsupport_op "nccl_op" "gen_nccl_id_op") foreach(windows_unsupport_op "nccl_op" "gen_nccl_id_op" "warpctc_op")
if ("${TARGET}" STREQUAL "${windows_unsupport_op}") if ("${TARGET}" STREQUAL "${windows_unsupport_op}")
return() return()
endif() endif()
...@@ -319,8 +319,9 @@ foreach(src ${GENERAL_OPS}) ...@@ -319,8 +319,9 @@ foreach(src ${GENERAL_OPS})
endforeach() endforeach()
file(APPEND ${pybind_file} "USE_OP(less_than);\nUSE_OP(logical_and);\nUSE_NO_KERNEL_OP(read_from_array);\n") file(APPEND ${pybind_file} "USE_OP(less_than);\nUSE_OP(logical_and);\nUSE_NO_KERNEL_OP(read_from_array);\n")
if (NOT WIN32)
add_subdirectory(reader) add_subdirectory(reader)
endif(NOT WIN32)
foreach(src ${READER_LIBRARY}) foreach(src ${READER_LIBRARY})
set(OP_LIBRARY ${src} ${OP_LIBRARY}) set(OP_LIBRARY ${src} ${OP_LIBRARY})
endforeach() endforeach()
......
...@@ -60,6 +60,20 @@ class AucKernel : public framework::OpKernel<T> { ...@@ -60,6 +60,20 @@ class AucKernel : public framework::OpKernel<T> {
const T* inference_data = predict->data<T>(); const T* inference_data = predict->data<T>();
const auto* label_data = label->data<int64_t>(); const auto* label_data = label->data<int64_t>();
// check if states are inited.
auto* tp_in = ctx.Input<Tensor>("TP");
auto* fp_in = ctx.Input<Tensor>("FP");
auto* tn_in = ctx.Input<Tensor>("TN");
auto* fn_in = ctx.Input<Tensor>("FN");
PADDLE_ENFORCE(tp_in->IsInitialized(), "true_positive is not inited!");
PADDLE_ENFORCE(fp_in->IsInitialized(), "false_negative is not inited!");
PADDLE_ENFORCE(tn_in->IsInitialized(), "true_negative is not inited!");
PADDLE_ENFORCE(fn_in->IsInitialized(), "false_positive is not inited!");
PADDLE_ENFORCE_EQ(tp_in->numel(), num_thresholds, "");
PADDLE_ENFORCE_EQ(fp_in->numel(), num_thresholds, "");
PADDLE_ENFORCE_EQ(tn_in->numel(), num_thresholds, "");
PADDLE_ENFORCE_EQ(fn_in->numel(), num_thresholds, "");
auto* tp_data = true_positive->mutable_data<int64_t>(ctx.GetPlace()); auto* tp_data = true_positive->mutable_data<int64_t>(ctx.GetPlace());
auto* fn_data = false_negative->mutable_data<int64_t>(ctx.GetPlace()); auto* fn_data = false_negative->mutable_data<int64_t>(ctx.GetPlace());
auto* tn_data = true_negative->mutable_data<int64_t>(ctx.GetPlace()); auto* tn_data = true_negative->mutable_data<int64_t>(ctx.GetPlace());
......
...@@ -15,6 +15,7 @@ limitations under the License. */ ...@@ -15,6 +15,7 @@ limitations under the License. */
#include <sys/time.h> #include <sys/time.h>
#include <cmath> #include <cmath>
#include <cstring> #include <cstring>
#include <random>
#include <vector> #include <vector>
#include "gflags/gflags.h" #include "gflags/gflags.h"
#include "glog/logging.h" #include "glog/logging.h"
......
...@@ -19,6 +19,10 @@ limitations under the License. */ ...@@ -19,6 +19,10 @@ limitations under the License. */
#ifdef PADDLE_USE_OPENBLAS #ifdef PADDLE_USE_OPENBLAS
#include <cblas.h> #include <cblas.h>
// remove typedef in openblas
#undef FLOAT
#undef INT
#undef SIZE
#endif #endif
#include <cmath> #include <cmath>
......
...@@ -150,30 +150,17 @@ class StackKernel : public framework::OpKernel<T> { ...@@ -150,30 +150,17 @@ class StackKernel : public framework::OpKernel<T> {
int total_num = pre * n * post; int total_num = pre * n * post;
auto &dev_ctx = ctx.template device_context<DeviceContext>(); auto &dev_ctx = ctx.template device_context<DeviceContext>();
constexpr auto kMaxThreshold = 16;
if (std::is_same<DeviceContext, platform::CPUDeviceContext>::value ||
n > kMaxThreshold) {
#ifdef __NVCC__ #ifdef __NVCC__
VLOG(10) << "Stack more than " << kMaxThreshold thrust::device_vector<const T *> device_x_vec(x_datas);
<< " tensors on GPU may be slow."; auto x_data_arr = device_x_vec.data().get();
thrust::device_vector<const T *> device_x_vec(x_datas);
auto x_data_arr = device_x_vec.data().get();
#else #else
auto x_data_arr = x_datas.data(); auto x_data_arr = x_datas.data();
#endif #endif
StackFunctorForRange(dev_ctx, x_data_arr, y_data, total_num, n, post); StackFunctorForRange(dev_ctx, x_data_arr, y_data, total_num, n, post);
#ifdef __NVCC__ #ifdef __NVCC__
// Wait() must be called because device_x_vec may be destructed before // Wait() must be called because device_x_vec may be destructed before
// kernel ends // kernel ends
dev_ctx.Wait(); dev_ctx.Wait();
#endif
}
#ifdef __NVCC__
else { // NOLINT
framework::Array<const T *, kMaxThreshold> x_data_arr;
for (int i = 0; i < n; ++i) x_data_arr[i] = x_datas[i];
StackFunctorForRange(dev_ctx, x_data_arr, y_data, total_num, n, post);
}
#endif #endif
} }
}; };
...@@ -244,32 +231,17 @@ class StackGradKernel : public framework::OpKernel<T> { ...@@ -244,32 +231,17 @@ class StackGradKernel : public framework::OpKernel<T> {
int post = total_num / (n * pre); int post = total_num / (n * pre);
auto &dev_ctx = ctx.template device_context<DeviceContext>(); auto &dev_ctx = ctx.template device_context<DeviceContext>();
constexpr auto kMaxThreshold = 16;
if (std::is_same<DeviceContext, platform::CPUDeviceContext>::value ||
n > kMaxThreshold) {
#ifdef __NVCC__ #ifdef __NVCC__
VLOG(10) << "Stack more than " << kMaxThreshold thrust::device_vector<T *> device_dx_vec(dx_datas);
<< " tensors on GPU may be slow."; auto dx_data_arr = device_dx_vec.data().get();
thrust::device_vector<T *> device_dx_vec(dx_datas);
auto dx_data_arr = device_dx_vec.data().get();
#else #else
auto dx_data_arr = dx_datas.data(); auto dx_data_arr = dx_datas.data();
#endif #endif
StackGradFunctorForRange(dev_ctx, dx_data_arr, dy_data, total_num, n, StackGradFunctorForRange(dev_ctx, dx_data_arr, dy_data, total_num, n, post);
post);
#ifdef __NVCC__ #ifdef __NVCC__
// Wait() must be called because device_dx_vec may be destructed before // Wait() must be called because device_dx_vec may be destructed before
// kernel ends // kernel ends
dev_ctx.Wait(); dev_ctx.Wait();
#endif
}
#ifdef __NVCC__
else { // NOLINT
framework::Array<T *, kMaxThreshold> dx_data_arr;
for (int i = 0; i < n; ++i) dx_data_arr[i] = dx_datas[i];
StackGradFunctorForRange(dev_ctx, dx_data_arr, dy_data, total_num, n,
post);
}
#endif #endif
} }
}; };
......
...@@ -56,7 +56,11 @@ limitations under the License. */ ...@@ -56,7 +56,11 @@ limitations under the License. */
#include <immintrin.h> #include <immintrin.h>
#endif // PADDLE_ARM #endif // PADDLE_ARM
#if !defined(_WIN32)
#define PADDLE_ALIGN(x) __attribute__((aligned(x))) #define PADDLE_ALIGN(x) __attribute__((aligned(x)))
#else
#define PADDLE_ALIGN(x) /*do nothing*/
#endif
namespace paddle { namespace paddle {
namespace platform { namespace platform {
......
set(PYBIND_DEPS pybind python proto_desc memory executor prune profiler feed_fetch_method
) set(PYBIND_DEPS pybind python proto_desc memory executor prune feed_fetch_method)
set(PYBIND_SRCS pybind.cc exception.cc protobuf.cc const_value.cc)
if(NOT WIN32) if(NOT WIN32)
list(APPEND PYBIND_DEPS parallel_executor) list(APPEND PYBIND_DEPS parallel_executor profiler)
list(APPEND PYBIND_SRCS recordio.cc)
endif() endif()
if(WITH_PYTHON) if(WITH_PYTHON)
if(WITH_AMD_GPU) if(WITH_AMD_GPU)
hip_library(paddle_pybind SHARED hip_library(paddle_pybind SHARED
SRCS pybind.cc exception.cc protobuf.cc const_value.cc recordio.cc SRCS ${PYBIND_SRCS}
DEPS ${PYBIND_DEPS} DEPS ${PYBIND_DEPS}
${GLOB_OP_LIB}) ${GLOB_OP_LIB})
else() else()
cc_library(paddle_pybind SHARED cc_library(paddle_pybind SHARED
SRCS pybind.cc exception.cc protobuf.cc const_value.cc recordio.cc SRCS ${PYBIND_SRCS}
DEPS ${PYBIND_DEPS} DEPS ${PYBIND_DEPS}
${GLOB_OP_LIB}) ${GLOB_OP_LIB})
if(NOT APPLE AND NOT ANDROID AND NOT WIN32) if(NOT APPLE AND NOT ANDROID AND NOT WIN32)
......
...@@ -119,10 +119,14 @@ def auc(input, label, curve='ROC', num_thresholds=200, topk=1): ...@@ -119,10 +119,14 @@ def auc(input, label, curve='ROC', num_thresholds=200, topk=1):
helper = LayerHelper("auc", **locals()) helper = LayerHelper("auc", **locals())
auc_out = helper.create_tmp_variable(dtype="float64") auc_out = helper.create_tmp_variable(dtype="float64")
# make tp, tn, fp, fn persistable, so that can accumulate all batches. # make tp, tn, fp, fn persistable, so that can accumulate all batches.
tp = helper.create_global_variable(persistable=True, dtype='int64') tp = helper.create_global_variable(
tn = helper.create_global_variable(persistable=True, dtype='int64') persistable=True, dtype='int64', shape=[num_thresholds])
fp = helper.create_global_variable(persistable=True, dtype='int64') tn = helper.create_global_variable(
fn = helper.create_global_variable(persistable=True, dtype='int64') persistable=True, dtype='int64', shape=[num_thresholds])
fp = helper.create_global_variable(
persistable=True, dtype='int64', shape=[num_thresholds])
fn = helper.create_global_variable(
persistable=True, dtype='int64', shape=[num_thresholds])
for var in [tp, tn, fp, fn]: for var in [tp, tn, fp, fn]:
helper.set_variable_initializer( helper.set_variable_initializer(
var, Constant( var, Constant(
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册