Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleDetection
提交
5a6d7fe2
P
PaddleDetection
项目概览
PaddlePaddle
/
PaddleDetection
大约 1 年 前同步成功
通知
695
Star
11112
Fork
2696
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
184
列表
看板
标记
里程碑
合并请求
40
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
184
Issue
184
列表
看板
标记
里程碑
合并请求
40
合并请求
40
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
5a6d7fe2
编写于
12月 18, 2018
作者:
P
peizhilin
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add mkl,ctc support for windows
上级
0f085f0a
变更
40
显示空白变更内容
内联
并排
Showing
40 changed file
with
315 addition
and
172 deletion
+315
-172
CMakeLists.txt
CMakeLists.txt
+4
-8
cmake/cuda.cmake
cmake/cuda.cmake
+3
-0
cmake/cudnn.cmake
cmake/cudnn.cmake
+1
-0
cmake/external/cub.cmake
cmake/external/cub.cmake
+1
-1
cmake/external/dlpack.cmake
cmake/external/dlpack.cmake
+1
-1
cmake/external/mkldnn.cmake
cmake/external/mkldnn.cmake
+31
-12
cmake/external/mklml.cmake
cmake/external/mklml.cmake
+50
-33
cmake/external/python.cmake
cmake/external/python.cmake
+7
-1
cmake/external/warpctc.cmake
cmake/external/warpctc.cmake
+25
-5
cmake/external/xbyak.cmake
cmake/external/xbyak.cmake
+2
-2
cmake/generic.cmake
cmake/generic.cmake
+5
-1
cmake/inference_lib.cmake
cmake/inference_lib.cmake
+8
-8
cmake/operators.cmake
cmake/operators.cmake
+1
-1
cmake/simd.cmake
cmake/simd.cmake
+35
-38
paddle/fluid/framework/CMakeLists.txt
paddle/fluid/framework/CMakeLists.txt
+1
-2
paddle/fluid/framework/details/all_reduce_op_handle.cc
paddle/fluid/framework/details/all_reduce_op_handle.cc
+1
-1
paddle/fluid/framework/mixed_vector.h
paddle/fluid/framework/mixed_vector.h
+5
-5
paddle/fluid/framework/op_registry.h
paddle/fluid/framework/op_registry.h
+2
-1
paddle/fluid/inference/api/demo_ci/CMakeLists.txt
paddle/fluid/inference/api/demo_ci/CMakeLists.txt
+12
-3
paddle/fluid/memory/detail/system_allocator.cc
paddle/fluid/memory/detail/system_allocator.cc
+0
-1
paddle/fluid/operators/CMakeLists.txt
paddle/fluid/operators/CMakeLists.txt
+2
-5
paddle/fluid/operators/cum_op.h
paddle/fluid/operators/cum_op.h
+2
-0
paddle/fluid/operators/elementwise/elementwise_mul_mkldnn_op.cc
.../fluid/operators/elementwise/elementwise_mul_mkldnn_op.cc
+3
-0
paddle/fluid/operators/math/detail/lstm_cpu_kernel.h
paddle/fluid/operators/math/detail/lstm_cpu_kernel.h
+6
-0
paddle/fluid/operators/math/jit_gen.h
paddle/fluid/operators/math/jit_gen.h
+3
-0
paddle/fluid/platform/cpu_info.cc
paddle/fluid/platform/cpu_info.cc
+5
-2
paddle/fluid/platform/dynload/CMakeLists.txt
paddle/fluid/platform/dynload/CMakeLists.txt
+0
-2
paddle/fluid/platform/dynload/cudnn.cc
paddle/fluid/platform/dynload/cudnn.cc
+4
-0
paddle/fluid/platform/dynload/cudnn.h
paddle/fluid/platform/dynload/cudnn.h
+1
-1
paddle/fluid/platform/dynload/dynamic_loader.cc
paddle/fluid/platform/dynload/dynamic_loader.cc
+16
-0
paddle/fluid/platform/dynload/dynamic_loader.h
paddle/fluid/platform/dynload/dynamic_loader.h
+6
-0
paddle/fluid/platform/dynload/mklml.h
paddle/fluid/platform/dynload/mklml.h
+1
-1
paddle/fluid/platform/dynload/tensorrt.h
paddle/fluid/platform/dynload/tensorrt.h
+1
-1
paddle/fluid/platform/dynload/warpctc.h
paddle/fluid/platform/dynload/warpctc.h
+1
-1
paddle/fluid/platform/port.h
paddle/fluid/platform/port.h
+4
-1
paddle/fluid/train/demo/CMakeLists.txt
paddle/fluid/train/demo/CMakeLists.txt
+14
-4
python/CMakeLists.txt
python/CMakeLists.txt
+11
-5
python/paddle/fluid/__init__.py
python/paddle/fluid/__init__.py
+7
-2
python/paddle/fluid/framework.py
python/paddle/fluid/framework.py
+13
-5
python/setup.py.in
python/setup.py.in
+20
-18
未找到文件。
CMakeLists.txt
浏览文件 @
5a6d7fe2
...
...
@@ -125,16 +125,12 @@ if(ANDROID OR IOS)
add_definitions
(
-DPADDLE_MOBILE_INFERENCE
)
endif
()
if
(
APPLE
OR WIN32
)
if
(
APPLE
)
set
(
WITH_MKL OFF CACHE STRING
"Disable MKL for building on mac
and windows
"
FORCE
)
"Disable MKL for building on mac"
FORCE
)
endif
()
if
(
WIN32
)
set
(
WITH_DSO OFF CACHE STRING
"Disable DSO when compiling for Windows"
FORCE
)
set
(
WITH_MKL OFF CACHE STRING
"Disable MKL when compiling for Windows"
FORCE
)
set
(
WITH_DISTRIBUTE OFF CACHE STRING
"Disable DISTRIBUTE when compiling for Windows"
FORCE
)
set
(
WITH_C_API OFF CACHE STRING
...
...
@@ -207,10 +203,10 @@ include(external/xxhash) # download xxhash
include
(
external/dlpack
)
include
(
external/snappy
)
# download snappy
include
(
external/snappystream
)
# download snappystream
include
(
external/warpctc
)
# download, build, install warpctc
if
(
NOT WIN32
)
# there is no official support of warpctc, nccl, cupti in windows
include
(
external/warpctc
)
# download, build, install warpctc
# there is no official support of nccl, cupti in windows
include
(
cupti
)
include
(
external/gzstream
)
endif
(
NOT WIN32
)
...
...
cmake/cuda.cmake
浏览文件 @
5a6d7fe2
...
...
@@ -139,10 +139,12 @@ endfunction()
message
(
STATUS
"CUDA detected: "
${
CUDA_VERSION
}
)
if
(
${
CUDA_VERSION
}
LESS 7.0
)
set
(
paddle_known_gpu_archs
${
paddle_known_gpu_archs
}
)
add_definitions
(
"-DPADDLE_CUDA_BINVER=
\"
60
\"
"
)
elseif
(
${
CUDA_VERSION
}
LESS 8.0
)
# CUDA 7.x
set
(
paddle_known_gpu_archs
${
paddle_known_gpu_archs7
}
)
list
(
APPEND CUDA_NVCC_FLAGS
"-D_MWAITXINTRIN_H_INCLUDED"
)
list
(
APPEND CUDA_NVCC_FLAGS
"-D__STRICT_ANSI__"
)
add_definitions
(
"-DPADDLE_CUDA_BINVER=
\"
70
\"
"
)
elseif
(
${
CUDA_VERSION
}
LESS 9.0
)
# CUDA 8.x
set
(
paddle_known_gpu_archs
${
paddle_known_gpu_archs8
}
)
list
(
APPEND CUDA_NVCC_FLAGS
"-D_MWAITXINTRIN_H_INCLUDED"
)
...
...
@@ -150,6 +152,7 @@ elseif (${CUDA_VERSION} LESS 9.0) # CUDA 8.x
# CUDA 8 may complain that sm_20 is no longer supported. Suppress the
# warning for now.
list
(
APPEND CUDA_NVCC_FLAGS
"-Wno-deprecated-gpu-targets"
)
add_definitions
(
"-DPADDLE_CUDA_BINVER=
\"
80
\"
"
)
endif
()
include_directories
(
${
CUDA_INCLUDE_DIRS
}
)
...
...
cmake/cudnn.cmake
浏览文件 @
5a6d7fe2
...
...
@@ -89,6 +89,7 @@ if(CUDNN_FOUND)
if
(
NOT CUDNN_MAJOR_VERSION
)
set
(
CUDNN_VERSION
"???"
)
else
()
add_definitions
(
"-DPADDLE_CUDNN_BINVER=
\"
${
CUDNN_MAJOR_VERSION
}
\"
"
)
math
(
EXPR CUDNN_VERSION
"
${
CUDNN_MAJOR_VERSION
}
* 1000 +
${
CUDNN_MINOR_VERSION
}
* 100 +
${
CUDNN_PATCHLEVEL_VERSION
}
"
)
...
...
cmake/external/cub.cmake
浏览文件 @
5a6d7fe2
...
...
@@ -32,4 +32,4 @@ endif()
add_dependencies
(
cub extern_cub
)
LIST
(
APPEND externl_project_dependencies cub
)
LIST
(
APPEND extern
a
l_project_dependencies cub
)
cmake/external/dlpack.cmake
浏览文件 @
5a6d7fe2
...
...
@@ -28,4 +28,4 @@ endif()
add_dependencies
(
dlpack extern_dlpack
)
LIST
(
APPEND externl_project_dependencies dlpack
)
LIST
(
APPEND extern
a
l_project_dependencies dlpack
)
cmake/external/mkldnn.cmake
浏览文件 @
5a6d7fe2
...
...
@@ -23,15 +23,14 @@ SET(MKLDNN_SOURCES_DIR ${THIRD_PARTY_PATH}/mkldnn)
SET
(
MKLDNN_INSTALL_DIR
${
THIRD_PARTY_PATH
}
/install/mkldnn
)
SET
(
MKLDNN_INC_DIR
"
${
MKLDNN_INSTALL_DIR
}
/include"
CACHE PATH
"mkldnn include directory."
FORCE
)
IF
(
WIN32 OR
APPLE
)
IF
(
APPLE
)
MESSAGE
(
WARNING
"
Windows or
Mac is not supported with MKLDNN in Paddle yet."
"Mac is not supported with MKLDNN in Paddle yet."
"Force WITH_MKLDNN=OFF"
)
SET
(
WITH_MKLDNN OFF CACHE STRING
"Disable MKLDNN in
Windows and
MacOS"
FORCE
)
SET
(
WITH_MKLDNN OFF CACHE STRING
"Disable MKLDNN in MacOS"
FORCE
)
return
()
ENDIF
()
SET
(
MKLDNN_LIB
"
${
MKLDNN_INSTALL_DIR
}
/lib/libmkldnn.so"
CACHE FILEPATH
"mkldnn library."
FORCE
)
MESSAGE
(
STATUS
"Set
${
MKLDNN_INSTALL_DIR
}
/lib to runtime path"
)
SET
(
CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE
)
SET
(
CMAKE_INSTALL_RPATH
"
${
CMAKE_INSTALL_RPATH
}
"
"
${
MKLDNN_INSTALL_DIR
}
/lib"
)
...
...
@@ -44,10 +43,14 @@ IF(${CBLAS_PROVIDER} STREQUAL "MKLML")
ELSE
()
MESSAGE
(
FATAL_ERROR
"Should enable MKLML when build MKLDNN"
)
ENDIF
()
SET
(
MKLDNN_FLAG
"-Wno-error=strict-overflow -Wno-error=unused-result -Wno-error=array-bounds"
)
SET
(
MKLDNN_FLAG
"
${
MKLDNN_FLAG
}
-Wno-unused-result -Wno-unused-value"
)
SET
(
MKLDNN_CFLAG
"
${
CMAKE_C_FLAGS
}
${
MKLDNN_FLAG
}
"
)
SET
(
MKLDNN_CXXFLAG
"
${
CMAKE_CXX_FLAGS
}
${
MKLDNN_FLAG
}
"
)
IF
(
NOT WIN32
)
SET
(
MKLDNN_FLAG
"-Wno-error=strict-overflow -Wno-error=unused-result -Wno-error=array-bounds"
)
SET
(
MKLDNN_FLAG
"
${
MKLDNN_FLAG
}
-Wno-unused-result -Wno-unused-value"
)
SET
(
MKLDNN_CFLAG
"
${
CMAKE_C_FLAGS
}
${
MKLDNN_FLAG
}
"
)
SET
(
MKLDNN_CXXFLAG
"
${
CMAKE_CXX_FLAGS
}
${
MKLDNN_FLAG
}
"
)
ENDIF
(
NOT WIN32
)
ExternalProject_Add
(
${
MKLDNN_PROJECT
}
${
EXTERNAL_PROJECT_LOG_ARGS
}
...
...
@@ -58,8 +61,15 @@ ExternalProject_Add(
UPDATE_COMMAND
""
CMAKE_ARGS -DCMAKE_CXX_COMPILER=
${
CMAKE_CXX_COMPILER
}
CMAKE_ARGS -DCMAKE_C_COMPILER=
${
CMAKE_C_COMPILER
}
CMAKE_ARGS -DCMAKE_CXX_FLAGS=
${
CMAKE_CXX_FLAGS
}
CMAKE_ARGS -DCMAKE_CXX_FLAGS_RELEASE=
${
CMAKE_CXX_FLAGS_RELEASE
}
CMAKE_ARGS -DCMAKE_CXX_FLAGS_DEBUG=
${
CMAKE_CXX_FLAGS_DEBUG
}
CMAKE_ARGS -DCMAKE_C_FLAGS=
${
CMAKE_C_FLAGS
}
CMAKE_ARGS -DCMAKE_C_FLAGS_DEBUG=
${
CMAKE_C_FLAGS_DEBUG
}
CMAKE_ARGS -DCMAKE_C_FLAGS_RELEASE=
${
CMAKE_C_FLAGS_RELEASE
}
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=
${
MKLDNN_INSTALL_DIR
}
CMAKE_ARGS -DCMAKE_BUILD_TYPE=
${
CMAKE_BUILD_TYPE
}
CMAKE_ARGS -DCMAKE_POSITION_INDEPENDENT_CODE=ON
CMAKE_ARGS -DMKLROOT=
${
MKLML_ROOT
}
CMAKE_ARGS -DCMAKE_C_FLAGS=
${
MKLDNN_CFLAG
}
CMAKE_ARGS -DCMAKE_CXX_FLAGS=
${
MKLDNN_CXXFLAG
}
...
...
@@ -67,6 +77,11 @@ ExternalProject_Add(
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=
${
MKLDNN_INSTALL_DIR
}
-DMKLROOT:PATH=
${
MKLML_ROOT
}
)
if
(
WIN32
)
SET
(
MKLDNN_LIB
"
${
MKLDNN_INSTALL_DIR
}
/lib/mkldnn.lib"
CACHE FILEPATH
"mkldnn library."
FORCE
)
else
(
WIN32
)
SET
(
MKLDNN_LIB
"
${
MKLDNN_INSTALL_DIR
}
/lib/libmkldnn.so"
CACHE FILEPATH
"mkldnn library."
FORCE
)
endif
(
WIN32
)
ADD_LIBRARY
(
shared_mkldnn SHARED IMPORTED GLOBAL
)
SET_PROPERTY
(
TARGET shared_mkldnn PROPERTY IMPORTED_LOCATION
${
MKLDNN_LIB
}
)
...
...
@@ -85,10 +100,14 @@ ADD_DEPENDENCIES(mkldnn ${MKLDNN_PROJECT})
# copy the real so.0 lib to install dir
# it can be directly contained in wheel or capi
SET
(
MKLDNN_SHARED_LIB
${
MKLDNN_INSTALL_DIR
}
/libmkldnn.so.0
)
ADD_CUSTOM_COMMAND
(
OUTPUT
${
MKLDNN_SHARED_LIB
}
COMMAND cp
${
MKLDNN_LIB
}
${
MKLDNN_SHARED_LIB
}
if
(
WIN32
)
SET
(
MKLDNN_SHARED_LIB
${
MKLDNN_INSTALL_DIR
}
/lib/mkldnn.dll
)
else
(
WIN32
)
SET
(
MKLDNN_SHARED_LIB
${
MKLDNN_INSTALL_DIR
}
/libmkldnn.so.0
)
ADD_CUSTOM_COMMAND
(
OUTPUT
${
MKLDNN_SHARED_LIB
}
COMMAND
${
CMAKE_COMMAND
}
-E copy
${
MKLDNN_LIB
}
${
MKLDNN_SHARED_LIB
}
DEPENDS mkldnn
)
endif
(
WIN32
)
ADD_CUSTOM_TARGET
(
mkldnn_shared_lib ALL DEPENDS
${
MKLDNN_SHARED_LIB
}
)
IF
(
WITH_C_API
)
...
...
cmake/external/mklml.cmake
浏览文件 @
5a6d7fe2
...
...
@@ -16,44 +16,55 @@ IF(NOT ${WITH_MKLML})
return
()
ENDIF
(
NOT
${
WITH_MKLML
}
)
IF
(
WIN32 OR
APPLE
)
IF
(
APPLE
)
MESSAGE
(
WARNING
"
Windows or
Mac is not supported with MKLML in Paddle yet."
"Mac is not supported with MKLML in Paddle yet."
"Force WITH_MKLML=OFF"
)
SET
(
WITH_MKLML OFF CACHE STRING
"Disable MKLML package in Windows and MacOS"
FORCE
)
return
()
ENDIF
()
INCLUDE
(
ExternalProject
)
SET
(
MKLML_PROJECT
"extern_mklml"
)
IF
((
NOT DEFINED MKLML_VER
)
OR
(
NOT DEFINED MKLML_URL
))
MESSAGE
(
STATUS
"use pre defined download url"
)
SET
(
MKLML_VER
"mklml_lnx_2019.0.20180710"
CACHE STRING
""
FORCE
)
SET
(
MKLML_URL
"http://paddlepaddledeps.cdn.bcebos.com/
${
MKLML_VER
}
.tgz"
CACHE STRING
""
FORCE
)
ENDIF
()
MESSAGE
(
STATUS
"MKLML_VER:
${
MKLML_VER
}
, MKLML_URL:
${
MKLML_URL
}
"
)
SET
(
MKLML_SOURCE_DIR
"
${
THIRD_PARTY_PATH
}
/mklml"
)
SET
(
MKLML_DOWNLOAD_DIR
"
${
MKLML_SOURCE_DIR
}
/src/
${
MKLML_PROJECT
}
"
)
SET
(
MKLML_DST_DIR
"mklml"
)
SET
(
MKLML_INSTALL_ROOT
"
${
THIRD_PARTY_PATH
}
/install"
)
SET
(
MKLML_INSTALL_DIR
${
MKLML_INSTALL_ROOT
}
/
${
MKLML_DST_DIR
}
)
SET
(
MKLML_ROOT
${
MKLML_INSTALL_DIR
}
)
SET
(
MKLML_INC_DIR
${
MKLML_ROOT
}
/include
)
SET
(
MKLML_LIB_DIR
${
MKLML_ROOT
}
/lib
)
SET
(
MKLML_LIB
${
MKLML_LIB_DIR
}
/libmklml_intel.so
)
SET
(
MKLML_IOMP_LIB
${
MKLML_LIB_DIR
}
/libiomp5.so
)
if
(
WIN32
)
SET
(
MKLML_LIB
${
MKLML_LIB_DIR
}
/mklml.lib
)
SET
(
MKLML_IOMP_LIB
${
MKLML_LIB_DIR
}
/libiomp5md.lib
)
SET
(
MKLML_SHARED_LIB
${
MKLML_LIB_DIR
}
/mklml.dll
)
SET
(
MKLML_SHARED_IOMP_LIB
${
MKLML_LIB_DIR
}
/libiomp5md.dll
)
else
()
SET
(
MKLML_LIB
${
MKLML_LIB_DIR
}
/libmklml_intel.so
)
SET
(
MKLML_IOMP_LIB
${
MKLML_LIB_DIR
}
/libiomp5.so
)
SET
(
MKLML_SHARED_LIB
${
MKLML_LIB_DIR
}
/libmklml_intel.so
)
SET
(
MKLML_SHARED_IOMP_LIB
${
MKLML_LIB_DIR
}
/libiomp5.so
)
endif
()
SET
(
CMAKE_INSTALL_RPATH
"
${
CMAKE_INSTALL_RPATH
}
"
"
${
MKLML_ROOT
}
/lib"
)
INCLUDE_DIRECTORIES
(
${
MKLML_INC_DIR
}
)
if
(
WIN32
)
MESSAGE
(
WARNING
"Please download the MKLML and and put it at "
${
THIRD_PARTY_PATH
}
/install/mklml
)
else
()
SET
(
MKLML_PROJECT
"extern_mklml"
)
IF
((
NOT DEFINED MKLML_VER
)
OR
(
NOT DEFINED MKLML_URL
))
MESSAGE
(
STATUS
"use pre defined download url"
)
SET
(
MKLML_VER
"mklml_lnx_2019.0.20180710"
CACHE STRING
""
FORCE
)
SET
(
MKLML_URL
"http://paddlepaddledeps.cdn.bcebos.com/
${
MKLML_VER
}
.tgz"
CACHE STRING
""
FORCE
)
ENDIF
()
MESSAGE
(
STATUS
"MKLML_VER:
${
MKLML_VER
}
, MKLML_URL:
${
MKLML_URL
}
"
)
SET
(
MKLML_SOURCE_DIR
"
${
THIRD_PARTY_PATH
}
/mklml"
)
SET
(
MKLML_DOWNLOAD_DIR
"
${
MKLML_SOURCE_DIR
}
/src/
${
MKLML_PROJECT
}
"
)
FILE
(
WRITE
${
MKLML_DOWNLOAD_DIR
}
/CMakeLists.txt
FILE
(
WRITE
${
MKLML_DOWNLOAD_DIR
}
/CMakeLists.txt
"PROJECT(MKLML)
\n
"
"cmake_minimum_required(VERSION 3.0)
\n
"
"install(DIRECTORY
${
MKLML_VER
}
/include
${
MKLML_VER
}
/lib
\n
"
" DESTINATION
${
MKLML_DST_DIR
}
)
\n
"
)
ExternalProject_Add
(
ExternalProject_Add
(
${
MKLML_PROJECT
}
${
EXTERNAL_PROJECT_LOG_ARGS
}
PREFIX
${
MKLML_SOURCE_DIR
}
...
...
@@ -64,11 +75,17 @@ ExternalProject_Add(
UPDATE_COMMAND
""
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=
${
MKLML_INSTALL_ROOT
}
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=
${
MKLML_INSTALL_ROOT
}
)
)
endif
()
INCLUDE_DIRECTORIES
(
${
MKLML_INC_DIR
}
)
ADD_LIBRARY
(
mklml SHARED IMPORTED GLOBAL
)
SET_PROPERTY
(
TARGET mklml PROPERTY IMPORTED_LOCATION
${
MKLML_LIB
}
)
ADD_DEPENDENCIES
(
mklml
${
MKLML_PROJECT
}
)
if
(
NOT WIN32
)
ADD_DEPENDENCIES
(
mklml
${
MKLML_PROJECT
}
)
endif
()
LIST
(
APPEND external_project_dependencies mklml
)
IF
(
WITH_C_API
)
...
...
cmake/external/python.cmake
浏览文件 @
5a6d7fe2
...
...
@@ -23,9 +23,12 @@ FIND_PACKAGE(PythonLibs ${PY_VERSION})
if
(
WIN32
)
execute_process
(
COMMAND
"
${
PYTHON_EXECUTABLE
}
"
"-c"
"from distutils import sysconfig as s;import sys;import struct;
"from distutils import sysconfig as s;import sys;import struct;
import sysconfig;
print(sys.prefix);
print(s.get_config_var('LDVERSION') or s.get_config_var('VERSION'));
print(sysconfig.get_platform());
print(sysconfig.get_config_var('py_version_nodot'));
print(sysconfig.get_config_var('SOABI'));
"
RESULT_VARIABLE _PYTHON_SUCCESS
OUTPUT_VARIABLE _PYTHON_VALUES
...
...
@@ -41,6 +44,9 @@ print(s.get_config_var('LDVERSION') or s.get_config_var('VERSION'));
string
(
REGEX REPLACE
"
\n
"
";"
_PYTHON_VALUES
${
_PYTHON_VALUES
}
)
list
(
GET _PYTHON_VALUES 0 PYTHON_PREFIX
)
list
(
GET _PYTHON_VALUES 1 PYTHON_LIBRARY_SUFFIX
)
list
(
GET _PYTHON_VALUES 2 SYS_PLATFORM
)
list
(
GET _PYTHON_VALUES 3 PYTHON_SHORT_VERSION_NODOT
)
list
(
GET _PYTHON_VALUES 4 PYTHON_SOABI
)
# Make sure all directory separators are '/'
string
(
REGEX REPLACE
"
\\\\
"
"/"
PYTHON_PREFIX
${
PYTHON_PREFIX
}
)
...
...
cmake/external/warpctc.cmake
浏览文件 @
5a6d7fe2
...
...
@@ -26,25 +26,33 @@ SET(WARPCTC_INCLUDE_DIR "${WARPCTC_INSTALL_DIR}/include"
# Used in unit test test_WarpCTCLayer
SET
(
WARPCTC_LIB_DIR
"
${
WARPCTC_INSTALL_DIR
}
/lib"
CACHE PATH
"Warp-ctc Library Directory"
FORCE
)
SET
(
WARPCTC_LIBRARIES
"
${
WARPCTC_INSTALL_DIR
}
/lib/libwarpctc
${
CMAKE_SHARED_LIBRARY_SUFFIX
}
"
CACHE FILEPATH
"Warp-ctc Library"
FORCE
)
IF
(
CMAKE_CXX_COMPILER_ID STREQUAL
"Clang"
OR CMAKE_CXX_COMPILER_ID STREQUAL
"AppleClang"
)
IF
(
CMAKE_CXX_COMPILER_ID STREQUAL
"Clang"
OR CMAKE_CXX_COMPILER_ID STREQUAL
"AppleClang"
OR WIN32
)
SET
(
USE_OMP OFF
)
ELSE
()
SET
(
USE_OMP ON
)
ENDIF
()
IF
(
WIN32
)
SET
(
WARPCTC_REPOSITORY
"https://github.com/wopeizl/warp-ctc.git"
)
ELSE
()
SET
(
WARPCTC_REPOSITORY
"https://github.com/dzhwinter/warp-ctc.git"
)
ENDIF
()
ExternalProject_Add
(
extern_warpctc
${
EXTERNAL_PROJECT_LOG_ARGS
}
GIT_REPOSITORY
"https://github.com/dzhwinter/warp-ctc.git"
GIT_REPOSITORY
${
WARPCTC_REPOSITORY
}
PREFIX
${
WARPCTC_SOURCES_DIR
}
UPDATE_COMMAND
""
CMAKE_ARGS -DCMAKE_CXX_COMPILER=
${
CMAKE_CXX_COMPILER
}
-DCMAKE_C_COMPILER=
${
CMAKE_C_COMPILER
}
-DCMAKE_CXX_FLAGS=
${
CMAKE_CXX_FLAGS
}
-DCMAKE_C_FLAGS=
${
CMAKE_C_FLAGS
}
-DCMAKE_C_FLAGS_DEBUG=
${
CMAKE_C_FLAGS_DEBUG
}
-DCMAKE_C_FLAGS_RELEASE=
${
CMAKE_C_FLAGS_RELEASE
}
-DCMAKE_CXX_FLAGS=
${
CMAKE_CXX_FLAGS
}
-DCMAKE_CXX_FLAGS_RELEASE=
${
CMAKE_CXX_FLAGS_RELEASE
}
-DCMAKE_CXX_FLAGS_DEBUG=
${
CMAKE_CXX_FLAGS_DEBUG
}
-DCMAKE_INSTALL_PREFIX=
${
WARPCTC_INSTALL_DIR
}
-DWITH_GPU=
${
WITH_GPU
}
-DWITH_OMP=
${
USE_OMP
}
...
...
@@ -59,6 +67,18 @@ ExternalProject_Add(
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-DCMAKE_INSTALL_PREFIX:PATH=
${
WARPCTC_INSTALL_DIR
}
)
IF
(
WIN32
)
IF
(
NOT EXISTS
"
${
WARPCTC_INSTALL_DIR
}
/lib/warpctc
${
CMAKE_SHARED_LIBRARY_SUFFIX
}
"
)
add_custom_command
(
TARGET extern_warpctc POST_BUILD
COMMAND cmake -E copy
${
WARPCTC_INSTALL_DIR
}
/bin/warpctc
${
CMAKE_SHARED_LIBRARY_SUFFIX
}
${
WARPCTC_INSTALL_DIR
}
/lib/warpctc
${
CMAKE_SHARED_LIBRARY_SUFFIX
}
)
ENDIF
()
SET
(
WARPCTC_LIBRARIES
"
${
WARPCTC_INSTALL_DIR
}
/lib/warpctc
${
CMAKE_SHARED_LIBRARY_SUFFIX
}
"
CACHE FILEPATH
"Warp-ctc Library"
FORCE
)
else
(
WIN32
)
SET
(
WARPCTC_LIBRARIES
"
${
WARPCTC_INSTALL_DIR
}
/lib/libwarpctc
${
CMAKE_SHARED_LIBRARY_SUFFIX
}
"
CACHE FILEPATH
"Warp-ctc Library"
FORCE
)
ENDIF
(
WIN32
)
MESSAGE
(
STATUS
"warp-ctc library:
${
WARPCTC_LIBRARIES
}
"
)
INCLUDE_DIRECTORIES
(
${
WARPCTC_INCLUDE_DIR
}
)
# For warpctc code to include its headers.
...
...
cmake/external/xbyak.cmake
浏览文件 @
5a6d7fe2
...
...
@@ -13,8 +13,8 @@
# limitations under the License.
set
(
WITH_XBYAK ON
)
if
(
WIN32 OR
APPLE
)
SET
(
WITH_XBYAK OFF CACHE STRING
"Disable XBYAK in
Windows and
MacOS"
FORCE
)
if
(
APPLE
)
SET
(
WITH_XBYAK OFF CACHE STRING
"Disable XBYAK in MacOS"
FORCE
)
return
()
endif
()
...
...
cmake/generic.cmake
浏览文件 @
5a6d7fe2
...
...
@@ -267,7 +267,11 @@ function(cc_library TARGET_NAME)
list
(
APPEND cc_library_DEPS dynload_mklml
)
endif
()
add_dependencies
(
${
TARGET_NAME
}
mklml
)
if
(
WIN32
)
target_link_libraries
(
${
TARGET_NAME
}
${
MKLML_IOMP_LIB
}
)
else
(
WIN32
)
target_link_libraries
(
${
TARGET_NAME
}
"-L
${
MKLML_LIB_DIR
}
-liomp5 -Wl,--as-needed"
)
endif
(
WIN32
)
endif
()
# remove link to python, see notes at:
# https://github.com/pybind/pybind11/blob/master/docs/compiling.rst#building-manually
...
...
cmake/inference_lib.cmake
浏览文件 @
5a6d7fe2
...
...
@@ -115,20 +115,20 @@ if (NOT PROTOBUF_FOUND OR WIN32)
)
endif
()
if
(
NOT CBLAS_FOUND
)
set
(
dst_dir
"
${
FLUID_INSTALL_DIR
}
/third_party/install/openblas"
)
copy
(
openblas_lib
SRCS
${
CBLAS_INSTALL_DIR
}
/lib
${
CBLAS_INSTALL_DIR
}
/include
DSTS
${
dst_dir
}
${
dst_dir
}
DEPS extern_openblas
)
elseif
(
WITH_MKLML
)
if
(
WITH_MKLML
)
set
(
dst_dir
"
${
FLUID_INSTALL_DIR
}
/third_party/install/mklml"
)
copy
(
mklml_lib
SRCS
${
MKLML_LIB
}
${
MKLML_IOMP_LIB
}
${
MKLML_INC_DIR
}
DSTS
${
dst_dir
}
/lib
${
dst_dir
}
/lib
${
dst_dir
}
DEPS mklml
)
elseif
(
NOT CBLAS_FOUND OR WIN32
)
set
(
dst_dir
"
${
FLUID_INSTALL_DIR
}
/third_party/install/openblas"
)
copy
(
openblas_lib
SRCS
${
CBLAS_INSTALL_DIR
}
/lib
${
CBLAS_INSTALL_DIR
}
/include
DSTS
${
dst_dir
}
${
dst_dir
}
DEPS extern_openblas
)
endif
()
if
(
WITH_MKLDNN
)
...
...
cmake/operators.cmake
浏览文件 @
5a6d7fe2
...
...
@@ -84,7 +84,7 @@ function(op_library TARGET)
endif
()
if
(
WIN32
)
# remove windows unsupported op, because windows has no nccl, no warpctc such ops.
foreach
(
windows_unsupport_op
"nccl_op"
"gen_nccl_id_op"
"warpctc_op"
)
foreach
(
windows_unsupport_op
"nccl_op"
"gen_nccl_id_op"
)
if
(
"
${
TARGET
}
"
STREQUAL
"
${
windows_unsupport_op
}
"
)
return
()
endif
()
...
...
cmake/simd.cmake
浏览文件 @
5a6d7fe2
...
...
@@ -57,46 +57,43 @@ int main()
return 0;
}"
SSE3_FOUND
)
# disable AVX by default on windows
if
(
NOT WIN32
)
# Check AVX
set
(
CMAKE_REQUIRED_FLAGS
${
AVX_FLAG
}
)
set
(
AVX_FOUND_EXITCODE 1 CACHE STRING
"Result from TRY_RUN"
FORCE
)
CHECK_CXX_SOURCE_RUNS
(
"
#include <immintrin.h>
int main()
{
# Check AVX
set
(
CMAKE_REQUIRED_FLAGS
${
AVX_FLAG
}
)
set
(
AVX_FOUND_EXITCODE 1 CACHE STRING
"Result from TRY_RUN"
FORCE
)
CHECK_CXX_SOURCE_RUNS
(
"
#include <immintrin.h>
int main()
{
__m256 a = _mm256_set_ps (-1.0f, 2.0f, -3.0f, 4.0f, -1.0f, 2.0f, -3.0f, 4.0f);
__m256 b = _mm256_set_ps (1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f);
__m256 result = _mm256_add_ps (a, b);
return 0;
}"
AVX_FOUND
)
}"
AVX_FOUND
)
# Check AVX 2
set
(
CMAKE_REQUIRED_FLAGS
${
AVX2_FLAG
}
)
set
(
AVX2_FOUND_EXITCODE 1 CACHE STRING
"Result from TRY_RUN"
FORCE
)
CHECK_CXX_SOURCE_RUNS
(
"
#include <immintrin.h>
int main()
{
# Check AVX 2
set
(
CMAKE_REQUIRED_FLAGS
${
AVX2_FLAG
}
)
set
(
AVX2_FOUND_EXITCODE 1 CACHE STRING
"Result from TRY_RUN"
FORCE
)
CHECK_CXX_SOURCE_RUNS
(
"
#include <immintrin.h>
int main()
{
__m256i a = _mm256_set_epi32 (-1, 2, -3, 4, -1, 2, -3, 4);
__m256i result = _mm256_abs_epi32 (a);
return 0;
}"
AVX2_FOUND
)
}"
AVX2_FOUND
)
# Check AVX512F
set
(
CMAKE_REQUIRED_FLAGS
${
AVX512F_FLAG
}
)
set
(
AVX512F_FOUND_EXITCODE 1 CACHE STRING
"Result from TRY_RUN"
FORCE
)
CHECK_CXX_SOURCE_RUNS
(
"
#include <immintrin.h>
int main()
{
# Check AVX512F
set
(
CMAKE_REQUIRED_FLAGS
${
AVX512F_FLAG
}
)
set
(
AVX512F_FOUND_EXITCODE 1 CACHE STRING
"Result from TRY_RUN"
FORCE
)
CHECK_CXX_SOURCE_RUNS
(
"
#include <immintrin.h>
int main()
{
__m512i a = _mm512_set_epi32 (-1, 2, -3, 4, -1, 2, -3, 4,
13, -5, 6, -7, 9, 2, -6, 3);
__m512i result = _mm512_abs_epi32 (a);
return 0;
}"
AVX512F_FOUND
)
endif
(
NOT WIN32
)
}"
AVX512F_FOUND
)
set
(
CMAKE_REQUIRED_FLAGS
${
CMAKE_REQUIRED_FLAGS_RETAINED
}
)
mark_as_advanced
(
MMX_FOUND SSE2_FOUND SSE3_FOUND AVX_FOUND AVX2_FOUND AVX512F_FOUND
)
paddle/fluid/framework/CMakeLists.txt
浏览文件 @
5a6d7fe2
...
...
@@ -15,8 +15,7 @@ function(windows_symbolic TARGET)
file
(
GENERATE OUTPUT
${
final_path
}
/.
${
src
}
.cu INPUT
${
final_path
}
/
${
src
}
.cc
)
add_custom_command
(
OUTPUT
${
final_path
}
/.
${
src
}
.cu
COMMAND
${
CMAKE_COMMAND
}
-E remove
${
final_path
}
/.
${
src
}
.cu
COMMAND
${
CMAKE_COMMAND
}
-E copy
"
${
final_path
}
/
${
src
}
.cc"
"
${
final_path
}
/.
${
src
}
.cu"
COMMAND
${
CMAKE_COMMAND
}
-E copy_if_different
"
${
final_path
}
/
${
src
}
.cc"
"
${
final_path
}
/.
${
src
}
.cu"
COMMENT
"create hidden file of
${
src
}
.cu"
)
add_custom_target
(
${
TARGET
}
ALL DEPENDS .
${
src
}
.cu
)
endforeach
()
...
...
paddle/fluid/framework/details/all_reduce_op_handle.cc
浏览文件 @
5a6d7fe2
...
...
@@ -50,7 +50,7 @@ void AllReduceOpHandle::RunImpl() {
// FIXME(typhoonzero): If scope0(global scope) have NCCL_ID_VAR,
// this is a distributed or inter-process call, find a better way.
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
if
(
NoDummyInputSize
()
==
1
&&
local_scopes_
[
0
]
->
FindLocalVar
(
NCCL_ID_VARNAME
)
==
nullptr
)
{
#else
...
...
paddle/fluid/framework/mixed_vector.h
浏览文件 @
5a6d7fe2
...
...
@@ -215,7 +215,7 @@ class Vector {
auto
stream
=
dev_ctx
->
stream
();
void
*
src
=
gpu_
->
ptr
();
void
*
dst
=
cpu_
.
data
();
memory
::
Copy
(
platform
::
CPUPlace
(),
dst
,
CUDAPlace
().
get
(),
src
,
paddle
::
memory
::
Copy
(
platform
::
CPUPlace
(),
dst
,
CUDAPlace
().
get
(),
src
,
gpu_
->
size
(),
stream
);
dev_ctx
->
Wait
();
}
...
...
@@ -261,7 +261,7 @@ class Vector {
auto
*
dev_ctx
=
static_cast
<
platform
::
CUDADeviceContext
*>
(
platform
::
DeviceContextPool
::
Instance
().
Get
(
place
));
auto
stream
=
dev_ctx
->
stream
();
memory
::
Copy
(
CUDAPlace
().
get
(),
dst
,
platform
::
CPUPlace
(),
src
,
paddle
::
memory
::
Copy
(
CUDAPlace
().
get
(),
dst
,
platform
::
CPUPlace
(),
src
,
gpu_
->
size
(),
stream
);
}
...
...
@@ -284,7 +284,7 @@ class Vector {
bool
IsInCPU
()
const
{
return
flag_
&
kDataInCPU
;
}
mutable
std
::
vector
<
T
>
cpu_
;
mutable
memory
::
AllocationPtr
gpu_
;
mutable
paddle
::
memory
::
AllocationPtr
gpu_
;
mutable
int
flag_
;
mutable
std
::
mutex
mtx_
;
...
...
paddle/fluid/framework/op_registry.h
浏览文件 @
5a6d7fe2
...
...
@@ -23,6 +23,7 @@ limitations under the License. */
#include <unordered_map>
#include <unordered_set>
#define GLOG_NO_ABBREVIATED_SEVERITIES // msvc conflict logging with windows.h
#include "glog/logging.h" // For VLOG()
#include "paddle/fluid/framework/attribute.h"
#include "paddle/fluid/framework/details/op_registry.h"
...
...
paddle/fluid/inference/api/demo_ci/CMakeLists.txt
浏览文件 @
5a6d7fe2
...
...
@@ -89,12 +89,21 @@ endif()
if
(
WITH_MKL
)
include_directories
(
"
${
PADDLE_LIB
}
/third_party/install/mklml/include"
)
if
(
NOT WIN32
)
set
(
MATH_LIB
${
PADDLE_LIB
}
/third_party/install/mklml/lib/libmklml_intel
${
CMAKE_SHARED_LIBRARY_SUFFIX
}
${
PADDLE_LIB
}
/third_party/install/mklml/lib/libiomp5
${
CMAKE_SHARED_LIBRARY_SUFFIX
}
)
else
(
WIN32
)
set
(
MATH_LIB
${
PADDLE_LIB
}
/third_party/install/mklml/lib/libmklml
${
CMAKE_SHARED_LIBRARY_SUFFIX
}
${
PADDLE_LIB
}
/third_party/install/mklml/lib/libiomp5md
${
CMAKE_SHARED_LIBRARY_SUFFIX
}
)
endif
(
WIN32
)
set
(
MKLDNN_PATH
"
${
PADDLE_LIB
}
/third_party/install/mkldnn"
)
if
(
EXISTS
${
MKLDNN_PATH
}
)
include_directories
(
"
${
MKLDNN_PATH
}
/include"
)
if
(
WIN32
)
set
(
MKLDNN_LIB
${
MKLDNN_PATH
}
/lib/mkldnn.lib
)
else
(
WIN32
)
set
(
MKLDNN_LIB
${
MKLDNN_PATH
}
/lib/libmkldnn.so.0
)
endif
(
WIN32
)
endif
()
else
()
set
(
MATH_LIB
${
PADDLE_LIB
}
/third_party/install/openblas/lib/libopenblas
${
CMAKE_STATIC_LIBRARY_SUFFIX
}
)
...
...
paddle/fluid/memory/detail/system_allocator.cc
浏览文件 @
5a6d7fe2
...
...
@@ -17,7 +17,6 @@ limitations under the License. */
#ifdef _WIN32
#include <malloc.h>
#include <windows.h> // VirtualLock/VirtualUnlock
#else
#include <sys/mman.h> // for mlock and munlock
#endif
...
...
paddle/fluid/operators/CMakeLists.txt
浏览文件 @
5a6d7fe2
...
...
@@ -44,9 +44,8 @@ endif()
register_operators
(
EXCLUDES warpctc_op conv_fusion_op DEPS
${
OP_HEADER_DEPS
}
${
OP_PREFETCH_DEPS
}
)
# warpctc_op needs cudnn 7 above
if
(
WITH_GPU
AND NOT WIN32
)
if
(
WITH_GPU
)
if
(
${
CUDNN_MAJOR_VERSION
}
VERSION_LESS 7
)
op_library
(
warpctc_op DEPS dynload_warpctc sequence_padding sequence_scale SRCS warpctc_op.cc warpctc_op.cu.cc
)
else
()
...
...
@@ -64,9 +63,7 @@ endif()
set
(
COMMON_OP_DEPS
${
OP_HEADER_DEPS
}
)
set
(
COMMON_OP_DEPS
${
COMMON_OP_DEPS
}
selected_rows_functor selected_rows lod_tensor maxouting unpooling pooling lod_rank_table context_project sequence_pooling executor
)
if
(
NOT WIN32
)
set
(
COMMON_OP_DEPS
${
COMMON_OP_DEPS
}
dynload_warpctc
)
endif
()
set
(
COMMON_OP_DEPS
${
COMMON_OP_DEPS
}
dynload_warpctc
)
set
(
COMMON_OP_DEPS
${
COMMON_OP_DEPS
}
sequence_padding sequence_scale cos_sim_functor memory jit_kernel concat_and_split cross_entropy softmax vol2col im2col sampler
)
set
(
COMMON_OP_DEPS
${
COMMON_OP_DEPS
}
sequence2batch lstm_compute matrix_bit_code gru_compute activation_functions
)
if
(
WITH_GPU
)
...
...
paddle/fluid/operators/cum_op.h
浏览文件 @
5a6d7fe2
...
...
@@ -13,6 +13,8 @@ See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <array>
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
...
...
paddle/fluid/operators/elementwise/elementwise_mul_mkldnn_op.cc
浏览文件 @
5a6d7fe2
...
...
@@ -19,6 +19,9 @@ limitations under the License. */
#include "paddle/fluid/platform/mkldnn_helper.h"
#include "paddle/fluid/operators/math/jit_kernel.h"
#if defined(_WIN32) && defined(_WINSOCKAPI_)
#define _WINSOCK2API_
/* Prevent inclusion of winsock2.h */
#endif
#include "xbyak/xbyak.h"
#include "xbyak/xbyak_util.h"
...
...
paddle/fluid/operators/math/detail/lstm_cpu_kernel.h
浏览文件 @
5a6d7fe2
...
...
@@ -17,6 +17,12 @@ limitations under the License. */
#include "paddle/fluid/operators/math/detail/activation_functions.h"
#include "paddle/fluid/operators/math/lstm_compute.h"
#if defined(_WIN32)
#if defined(__AVX2__) || defined(__AVX__)
inline
__m256
operator
+=
(
__m256
a
,
__m256
b
)
{
return
_mm256_add_ps
(
a
,
b
);
}
#endif
#endif
namespace
paddle
{
namespace
operators
{
namespace
math
{
...
...
paddle/fluid/operators/math/jit_gen.h
浏览文件 @
5a6d7fe2
...
...
@@ -18,6 +18,9 @@ limitations under the License. */
#include <type_traits>
#include "paddle/fluid/platform/macros.h"
#if defined(_WIN32) && defined(_WINSOCKAPI_)
#define _WINSOCK2API_
/* Prevent inclusion of winsock2.h */
#endif
#define XBYAK_USE_MMAP_ALLOCATOR
#include "xbyak/xbyak.h"
#include "xbyak/xbyak_util.h"
...
...
paddle/fluid/platform/cpu_info.cc
浏览文件 @
5a6d7fe2
...
...
@@ -14,6 +14,10 @@ limitations under the License. */
#include "paddle/fluid/platform/cpu_info.h"
#if defined(_WIN32)
#define NOMINMAX // msvc max/min macro conflict with std::min/max
#endif
#ifdef PADDLE_WITH_XBYAK
#include "xbyak/xbyak.h"
#include "xbyak/xbyak_util.h"
...
...
@@ -22,9 +26,8 @@ limitations under the License. */
#ifdef __APPLE__
#include <sys/sysctl.h>
#include <sys/types.h>
#elif defined(_WIN32)
#define
NOMINMAX // msvc max/min macro conflict with std::min/max
#define
WIN32_LEAN_AND_MEAN
#include <windows.h>
#else
#include <unistd.h>
...
...
paddle/fluid/platform/dynload/CMakeLists.txt
浏览文件 @
5a6d7fe2
...
...
@@ -16,9 +16,7 @@ if (CUPTI_FOUND)
list
(
APPEND CUDA_SRCS cupti.cc
)
endif
(
CUPTI_FOUND
)
nv_library
(
dynload_cuda SRCS
${
CUDA_SRCS
}
DEPS dynamic_loader
)
if
(
NOT WIN32
)
cc_library
(
dynload_warpctc SRCS warpctc.cc DEPS dynamic_loader warpctc
)
endif
(
NOT WIN32
)
if
(
WITH_MKLML
)
cc_library
(
dynload_mklml SRCS mklml.cc DEPS dynamic_loader mklml
)
endif
()
...
...
paddle/fluid/platform/dynload/cudnn.cc
浏览文件 @
5a6d7fe2
...
...
@@ -38,6 +38,10 @@ CUDNN_DNN_ROUTINE_EACH_AFTER_R4(DEFINE_WRAP);
CUDNN_DNN_ROUTINE_EACH_R5
(
DEFINE_WRAP
);
#endif
#ifdef CUDNN_DNN_ROUTINE_EACH_R6
CUDNN_DNN_ROUTINE_EACH_R6
(
DEFINE_WRAP
);
#endif
#ifdef CUDNN_DNN_ROUTINE_EACH_R7
CUDNN_DNN_ROUTINE_EACH_R7
(
DEFINE_WRAP
);
#endif
...
...
paddle/fluid/platform/dynload/cudnn.h
浏览文件 @
5a6d7fe2
...
...
@@ -34,7 +34,7 @@ extern void EnforceCUDNNLoaded(const char* fn_name);
#define DECLARE_DYNAMIC_LOAD_CUDNN_WRAP(__name) \
struct DynLoad__##__name { \
template <typename... Args> \
auto operator()(Args... args) ->
decltype(__name(args...)) {
\
auto operator()(Args... args) ->
DECLARE_TYPE(__name, args...) {
\
using cudnn_func = decltype(&::__name); \
std::call_once(cudnn_dso_flag, []() { \
cudnn_dso_handle = paddle::platform::dynload::GetCUDNNDsoHandle(); \
...
...
paddle/fluid/platform/dynload/dynamic_loader.cc
浏览文件 @
5a6d7fe2
...
...
@@ -53,6 +53,12 @@ namespace platform {
namespace
dynload
{
static
constexpr
char
cupti_lib_path
[]
=
CUPTI_LIB_PATH
;
#if defined(_WIN32) && defined(PADDLE_WITH_CUDA)
static
constexpr
char
*
win_cublas_lib
=
"cublas64_"
PADDLE_CUDA_BINVER
".dll"
;
static
constexpr
char
*
win_curand_lib
=
"curand64_"
PADDLE_CUDA_BINVER
".dll"
;
static
constexpr
char
*
win_cudnn_lib
=
"cudnn64_"
PADDLE_CUDNN_BINVER
".dll"
;
#endif
static
inline
std
::
string
join
(
const
std
::
string
&
part1
,
const
std
::
string
&
part2
)
{
// directory separator
...
...
@@ -165,6 +171,8 @@ static inline void* GetDsoHandleFromSearchPath(const std::string& search_root,
void
*
GetCublasDsoHandle
()
{
#if defined(__APPLE__) || defined(__OSX__)
return
GetDsoHandleFromSearchPath
(
FLAGS_cuda_dir
,
"libcublas.dylib"
);
#elif defined(_WIN32) && defined(PADDLE_WITH_CUDA)
return
GetDsoHandleFromSearchPath
(
FLAGS_cuda_dir
,
win_cublas_lib
);
#else
return
GetDsoHandleFromSearchPath
(
FLAGS_cuda_dir
,
"libcublas.so"
);
#endif
...
...
@@ -173,6 +181,8 @@ void* GetCublasDsoHandle() {
void
*
GetCUDNNDsoHandle
()
{
#if defined(__APPLE__) || defined(__OSX__)
return
GetDsoHandleFromSearchPath
(
FLAGS_cudnn_dir
,
"libcudnn.dylib"
,
false
);
#elif defined(_WIN32) && defined(PADDLE_WITH_CUDA)
return
GetDsoHandleFromSearchPath
(
FLAGS_cudnn_dir
,
win_cudnn_lib
);
#else
return
GetDsoHandleFromSearchPath
(
FLAGS_cudnn_dir
,
"libcudnn.so"
,
false
);
#endif
...
...
@@ -193,6 +203,8 @@ void* GetCUPTIDsoHandle() {
void
*
GetCurandDsoHandle
()
{
#if defined(__APPLE__) || defined(__OSX__)
return
GetDsoHandleFromSearchPath
(
FLAGS_cuda_dir
,
"libcurand.dylib"
);
#elif defined(_WIN32) && defined(PADDLE_WITH_CUDA)
return
GetDsoHandleFromSearchPath
(
FLAGS_cuda_dir
,
win_curand_lib
);
#else
return
GetDsoHandleFromSearchPath
(
FLAGS_cuda_dir
,
"libcurand.so"
);
#endif
...
...
@@ -201,6 +213,8 @@ void* GetCurandDsoHandle() {
void
*
GetWarpCTCDsoHandle
()
{
#if defined(__APPLE__) || defined(__OSX__)
return
GetDsoHandleFromSearchPath
(
FLAGS_warpctc_dir
,
"libwarpctc.dylib"
);
#elif defined(_WIN32)
return
GetDsoHandleFromSearchPath
(
FLAGS_warpctc_dir
,
"warpctc.dll"
);
#else
return
GetDsoHandleFromSearchPath
(
FLAGS_warpctc_dir
,
"libwarpctc.so"
);
#endif
...
...
@@ -225,6 +239,8 @@ void* GetTensorRtDsoHandle() {
void
*
GetMKLMLDsoHandle
()
{
#if defined(__APPLE__) || defined(__OSX__)
return
GetDsoHandleFromSearchPath
(
FLAGS_mklml_dir
,
"libmklml_intel.dylib"
);
#elif defined(_WIN32)
return
GetDsoHandleFromSearchPath
(
FLAGS_mklml_dir
,
"mklml.dll"
);
#else
return
GetDsoHandleFromSearchPath
(
FLAGS_mklml_dir
,
"libmklml_intel.so"
);
#endif
...
...
paddle/fluid/platform/dynload/dynamic_loader.h
浏览文件 @
5a6d7fe2
...
...
@@ -18,6 +18,12 @@ namespace paddle {
namespace
platform
{
namespace
dynload
{
#ifndef _WIN32
#define DECLARE_TYPE(__name, ...) decltype(__name(__VA_ARGS__))
#else
#define DECLARE_TYPE(__name, ...) decltype(auto)
#endif
void
*
GetCublasDsoHandle
();
void
*
GetCUDNNDsoHandle
();
void
*
GetCUPTIDsoHandle
();
...
...
paddle/fluid/platform/dynload/mklml.h
浏览文件 @
5a6d7fe2
...
...
@@ -34,7 +34,7 @@ extern void* mklml_dso_handle;
#define DYNAMIC_LOAD_MKLML_WRAP(__name) \
struct DynLoad__##__name { \
template <typename... Args> \
auto operator()(Args... args) ->
decltype(__name(args...)) {
\
auto operator()(Args... args) ->
DECLARE_TYPE(__name, args...) {
\
using mklmlFunc = decltype(&::__name); \
std::call_once(mklml_dso_flag, []() { \
mklml_dso_handle = paddle::platform::dynload::GetMKLMLDsoHandle(); \
...
...
paddle/fluid/platform/dynload/tensorrt.h
浏览文件 @
5a6d7fe2
...
...
@@ -33,7 +33,7 @@ extern void* tensorrt_dso_handle;
#define DECLARE_DYNAMIC_LOAD_TENSORRT_WRAP(__name) \
struct DynLoad__##__name { \
template <typename... Args> \
auto operator()(Args... args) ->
decltype(__name(args...)) {
\
auto operator()(Args... args) ->
DECLARE_TYPE(__name, args...) {
\
using tensorrt_func = decltype(__name(args...)) (*)(Args...); \
std::call_once(tensorrt_dso_flag, []() { \
tensorrt_dso_handle = \
...
...
paddle/fluid/platform/dynload/warpctc.h
浏览文件 @
5a6d7fe2
...
...
@@ -34,7 +34,7 @@ extern void* warpctc_dso_handle;
#define DYNAMIC_LOAD_WARPCTC_WRAP(__name) \
struct DynLoad__##__name { \
template <typename... Args> \
auto operator()(Args... args) ->
decltype(__name(args...)) {
\
auto operator()(Args... args) ->
DECLARE_TYPE(__name, args...) {
\
using warpctcFunc = decltype(&::__name); \
std::call_once(warpctc_dso_flag, []() { \
warpctc_dso_handle = paddle::platform::dynload::GetWarpCTCDsoHandle(); \
...
...
paddle/fluid/platform/port.h
浏览文件 @
5a6d7fe2
...
...
@@ -37,6 +37,10 @@
#define GOOGLE_GLOG_DLL_DECL
#include <io.h> // _popen, _pclose
#include <stdio.h>
#ifdef _WINSOCKAPI_
/* Prevent inclusion of winsock.h in windows.h */
#define WIN32_LEAN_AND_MEAN
#endif
#include <windows.h>
#include <numeric> // std::accumulate in msvc
#ifndef S_ISDIR // windows port for sys/stat.h
...
...
@@ -55,7 +59,6 @@ static void *dlsym(void *handle, const char *symbol_name) {
static
void
*
dlopen
(
const
char
*
filename
,
int
flag
)
{
std
::
string
file_name
(
filename
);
file_name
.
replace
(
0
,
file_name
.
size
()
-
1
,
'/'
,
'\\'
);
HMODULE
hModule
=
LoadLibrary
(
file_name
.
c_str
());
if
(
!
hModule
)
{
throw
std
::
runtime_error
(
file_name
+
" not found."
);
...
...
paddle/fluid/train/demo/CMakeLists.txt
浏览文件 @
5a6d7fe2
...
...
@@ -35,16 +35,26 @@ add_executable(demo_trainer demo_trainer.cc)
if
(
WITH_MKLDNN
)
include_directories
(
"
${
PADDLE_LIB
}
/third_party/install/mkldnn/include"
)
if
(
WIN32
)
set
(
MKLDNN_LIB
${
PADDLE_LIB
}
/third_party/install/mkldnn/lib/mkldnn.lib
)
else
(
WIN32
)
set
(
MKLDNN_LIB
${
PADDLE_LIB
}
/third_party/install/mkldnn/lib/libmkldnn.so.0
)
endif
()
endif
(
WIN32
)
endif
(
WITH_MKLDNN
)
if
(
WITH_MKL
)
include_directories
(
"
${
PADDLE_LIB
}
/third_party/install/mklml/include"
)
if
(
WIN32
)
set
(
MATH_LIB
${
PADDLE_LIB
}
/third_party/install/mklml/lib/mklml.lib
)
else
(
WIN32
)
set
(
MATH_LIB
${
PADDLE_LIB
}
/third_party/install/mklml/lib/libmklml_intel.so
)
endif
(
WIN32
)
else
()
if
(
APPLE
)
set
(
MATH_LIB cblas
)
else
(
APPLE
)
elseif
(
WIN32
)
set
(
MATH_LIB
${
PADDLE_LIB
}
/third_party/install/openblas/lib/libopenblas.lib
)
else
()
set
(
MATH_LIB
${
PADDLE_LIB
}
/third_party/install/openblas/lib/libopenblas.a
)
endif
(
APPLE
)
endif
()
...
...
python/CMakeLists.txt
浏览文件 @
5a6d7fe2
...
...
@@ -48,12 +48,18 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.in
IF
(
WIN32
)
# Python would use the .pyd by default under Windows series platform
set
(
FLUID_DST_DIR
${
PADDLE_BINARY_DIR
}
/python/paddle/fluid/
)
get_filename_component
(
openblas_refpath
${
CBLAS_LIBRARIES
}
DIRECTORY
)
set
(
FLUID_CORE
${
FLUID_DST_DIR
}
/core.pyd
)
if
(
NOT WITH_MKLDNN
)
get_filename_component
(
openblas_refpath
${
CBLAS_LIBRARIES
}
DIRECTORY
)
add_custom_command
(
OUTPUT
${
FLUID_CORE
}
COMMAND cmake -E copy $<TARGET_FILE:paddle_pybind>
${
FLUID_CORE
}
COMMAND cmake -E copy
${
openblas_refpath
}
/openblas.dll
${
FLUID_DST_DIR
}
DEPENDS paddle_pybind
)
else
(
NOT WITH_MKLDNN
)
add_custom_command
(
OUTPUT
${
FLUID_CORE
}
COMMAND cmake -E copy $<TARGET_FILE:paddle_pybind>
${
FLUID_CORE
}
DEPENDS paddle_pybind
)
endif
(
NOT WITH_MKLDNN
)
ELSE
()
set
(
FLUID_CORE
${
PADDLE_BINARY_DIR
}
/python/paddle/fluid/core.so
)
add_custom_command
(
OUTPUT
${
FLUID_CORE
}
...
...
python/paddle/fluid/__init__.py
浏览文件 @
5a6d7fe2
...
...
@@ -102,6 +102,12 @@ def __bootstrap__():
import
sys
import
os
import
platform
if
os
.
name
==
'nt'
:
third_lib_path
=
os
.
path
.
abspath
(
os
.
path
.
dirname
(
__file__
))
+
os
.
sep
+
'..'
+
os
.
sep
+
'libs'
os
.
environ
[
'path'
]
+=
';'
+
third_lib_path
sys
.
path
.
append
(
third_lib_path
)
from
.
import
core
in_test
=
'unittest'
in
sys
.
modules
...
...
@@ -128,13 +134,12 @@ def __bootstrap__():
'free_idle_memory'
,
'paddle_num_threads'
,
"dist_threadpool_size"
,
'eager_delete_tensor_gb'
,
'fast_eager_deletion_mode'
,
'allocator_strategy'
,
'reader_queue_speed_test_mode'
,
'print_sub_graph_dir'
,
'pe_profile_fname'
'print_sub_graph_dir'
,
'pe_profile_fname'
,
'warpctc_dir'
]
if
'Darwin'
not
in
sysstr
:
read_env_flags
.
append
(
'use_pinned_memory'
)
if
os
.
name
!=
'nt'
:
read_env_flags
.
append
(
'warpctc_dir'
)
read_env_flags
.
append
(
'cpu_deterministic'
)
if
core
.
is_compiled_with_dist
():
...
...
python/paddle/fluid/framework.py
浏览文件 @
5a6d7fe2
...
...
@@ -16,6 +16,7 @@ from __future__ import print_function
import
collections
import
contextlib
import
os
import
re
import
six
import
sys
...
...
@@ -27,6 +28,13 @@ from .proto import framework_pb2
try
:
from
.
import
core
except
ImportError
as
e
:
if
os
.
name
==
'nt'
:
raise
ImportError
(
"""NOTE: You may need to run
\"
set PATH=c:\python27\lib:%PATH%
\"
if you encounters
\"
mkldnn.dll not found
\"
errors. If you have python
installed in other directory, replace
\"
c:\python27\lib" with your own
directory. The original error is:
\n
"""
+
cpt
.
get_exception_message
(
e
))
else
:
raise
ImportError
(
"""NOTE: You may need to run
\"
export LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH
\"
if you encounters
\"
libmkldnn.so not found
\"
errors. If you have python
...
...
python/setup.py.in
浏览文件 @
5a6d7fe2
...
...
@@ -158,16 +158,18 @@ if '${WITH_FLUID_ONLY}'== 'OFF':
# put all thirdparty libraries in paddle.libs
libs_path='${PADDLE_BINARY_DIR}/python/paddle/libs'
if os.name != 'nt':
package_data['paddle.libs']= []
package_data['paddle.libs']=['libwarpctc' + ext_name]
shutil.copy('${WARPCTC_LIBRARIES}', libs_path)
package_data['paddle.libs']= []
package_data['paddle.libs']=[('libwarpctc' if os.name != 'nt' else 'warpctc') + ext_name]
shutil.copy('${WARPCTC_LIBRARIES}', libs_path)
if '${WITH_MKL}' == 'ON':
shutil.copy('${MKLML_LIB}', libs_path)
shutil.copy('${MKLML_IOMP_LIB}', libs_path)
package_data['paddle.libs']+=[
'libmklml_intel' + ext_name,'libiomp5'
+ ext_name]
shutil.copy('${MKLML_
SHARED_
LIB}', libs_path)
shutil.copy('${MKLML_
SHARED_
IOMP_LIB}', libs_path)
package_data['paddle.libs']+=[
('libmklml_intel' if os.name != 'nt' else 'mklml') + ext_name, ('libiomp5' if os.name != 'nt' else 'libiomp5md')
+ ext_name]
if '${WITH_MKLDNN}' == 'ON':
if '${CMAKE_BUILD_TYPE}' == 'Release':
if os.name != 'nt':
# only change rpath in Release mode.
# TODO(typhoonzero): use install_name_tool to patch mkl libs once
# we can support mkl on mac.
...
...
@@ -178,7 +180,7 @@ if '${WITH_MKLDNN}' == 'ON':
command = "patchelf --set-rpath '$ORIGIN/' ${MKLDNN_SHARED_LIB}"
if os.system(command) != 0:
raise Exception("patch libmkldnn.so failed, command: %s" % command)
package_data['paddle.libs']+=['libmkldnn.so.0']
package_data['paddle.libs']+=['libmkldnn.so.0'
if os.name != 'nt' else ('mkldnn' + ext_name)
]
shutil.copy('${MKLDNN_SHARED_LIB}', libs_path)
if '${WITH_NGRAPH}' == 'ON':
# only change rpath in Release mode,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录