Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
5a6d7fe2
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
5a6d7fe2
编写于
12月 18, 2018
作者:
P
peizhilin
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add mkl,ctc support for windows
上级
0f085f0a
变更
40
显示空白变更内容
内联
并排
Showing
40 changed file
with
315 addition
and
172 deletion
+315
-172
CMakeLists.txt
CMakeLists.txt
+4
-8
cmake/cuda.cmake
cmake/cuda.cmake
+3
-0
cmake/cudnn.cmake
cmake/cudnn.cmake
+1
-0
cmake/external/cub.cmake
cmake/external/cub.cmake
+1
-1
cmake/external/dlpack.cmake
cmake/external/dlpack.cmake
+1
-1
cmake/external/mkldnn.cmake
cmake/external/mkldnn.cmake
+31
-12
cmake/external/mklml.cmake
cmake/external/mklml.cmake
+50
-33
cmake/external/python.cmake
cmake/external/python.cmake
+7
-1
cmake/external/warpctc.cmake
cmake/external/warpctc.cmake
+25
-5
cmake/external/xbyak.cmake
cmake/external/xbyak.cmake
+2
-2
cmake/generic.cmake
cmake/generic.cmake
+5
-1
cmake/inference_lib.cmake
cmake/inference_lib.cmake
+8
-8
cmake/operators.cmake
cmake/operators.cmake
+1
-1
cmake/simd.cmake
cmake/simd.cmake
+35
-38
paddle/fluid/framework/CMakeLists.txt
paddle/fluid/framework/CMakeLists.txt
+1
-2
paddle/fluid/framework/details/all_reduce_op_handle.cc
paddle/fluid/framework/details/all_reduce_op_handle.cc
+1
-1
paddle/fluid/framework/mixed_vector.h
paddle/fluid/framework/mixed_vector.h
+5
-5
paddle/fluid/framework/op_registry.h
paddle/fluid/framework/op_registry.h
+2
-1
paddle/fluid/inference/api/demo_ci/CMakeLists.txt
paddle/fluid/inference/api/demo_ci/CMakeLists.txt
+12
-3
paddle/fluid/memory/detail/system_allocator.cc
paddle/fluid/memory/detail/system_allocator.cc
+0
-1
paddle/fluid/operators/CMakeLists.txt
paddle/fluid/operators/CMakeLists.txt
+2
-5
paddle/fluid/operators/cum_op.h
paddle/fluid/operators/cum_op.h
+2
-0
paddle/fluid/operators/elementwise/elementwise_mul_mkldnn_op.cc
.../fluid/operators/elementwise/elementwise_mul_mkldnn_op.cc
+3
-0
paddle/fluid/operators/math/detail/lstm_cpu_kernel.h
paddle/fluid/operators/math/detail/lstm_cpu_kernel.h
+6
-0
paddle/fluid/operators/math/jit_gen.h
paddle/fluid/operators/math/jit_gen.h
+3
-0
paddle/fluid/platform/cpu_info.cc
paddle/fluid/platform/cpu_info.cc
+5
-2
paddle/fluid/platform/dynload/CMakeLists.txt
paddle/fluid/platform/dynload/CMakeLists.txt
+0
-2
paddle/fluid/platform/dynload/cudnn.cc
paddle/fluid/platform/dynload/cudnn.cc
+4
-0
paddle/fluid/platform/dynload/cudnn.h
paddle/fluid/platform/dynload/cudnn.h
+1
-1
paddle/fluid/platform/dynload/dynamic_loader.cc
paddle/fluid/platform/dynload/dynamic_loader.cc
+16
-0
paddle/fluid/platform/dynload/dynamic_loader.h
paddle/fluid/platform/dynload/dynamic_loader.h
+6
-0
paddle/fluid/platform/dynload/mklml.h
paddle/fluid/platform/dynload/mklml.h
+1
-1
paddle/fluid/platform/dynload/tensorrt.h
paddle/fluid/platform/dynload/tensorrt.h
+1
-1
paddle/fluid/platform/dynload/warpctc.h
paddle/fluid/platform/dynload/warpctc.h
+1
-1
paddle/fluid/platform/port.h
paddle/fluid/platform/port.h
+4
-1
paddle/fluid/train/demo/CMakeLists.txt
paddle/fluid/train/demo/CMakeLists.txt
+14
-4
python/CMakeLists.txt
python/CMakeLists.txt
+11
-5
python/paddle/fluid/__init__.py
python/paddle/fluid/__init__.py
+7
-2
python/paddle/fluid/framework.py
python/paddle/fluid/framework.py
+13
-5
python/setup.py.in
python/setup.py.in
+20
-18
未找到文件。
CMakeLists.txt
浏览文件 @
5a6d7fe2
...
@@ -125,16 +125,12 @@ if(ANDROID OR IOS)
...
@@ -125,16 +125,12 @@ if(ANDROID OR IOS)
add_definitions
(
-DPADDLE_MOBILE_INFERENCE
)
add_definitions
(
-DPADDLE_MOBILE_INFERENCE
)
endif
()
endif
()
if
(
APPLE
OR WIN32
)
if
(
APPLE
)
set
(
WITH_MKL OFF CACHE STRING
set
(
WITH_MKL OFF CACHE STRING
"Disable MKL for building on mac
and windows
"
FORCE
)
"Disable MKL for building on mac"
FORCE
)
endif
()
endif
()
if
(
WIN32
)
if
(
WIN32
)
set
(
WITH_DSO OFF CACHE STRING
"Disable DSO when compiling for Windows"
FORCE
)
set
(
WITH_MKL OFF CACHE STRING
"Disable MKL when compiling for Windows"
FORCE
)
set
(
WITH_DISTRIBUTE OFF CACHE STRING
set
(
WITH_DISTRIBUTE OFF CACHE STRING
"Disable DISTRIBUTE when compiling for Windows"
FORCE
)
"Disable DISTRIBUTE when compiling for Windows"
FORCE
)
set
(
WITH_C_API OFF CACHE STRING
set
(
WITH_C_API OFF CACHE STRING
...
@@ -207,10 +203,10 @@ include(external/xxhash) # download xxhash
...
@@ -207,10 +203,10 @@ include(external/xxhash) # download xxhash
include
(
external/dlpack
)
include
(
external/dlpack
)
include
(
external/snappy
)
# download snappy
include
(
external/snappy
)
# download snappy
include
(
external/snappystream
)
# download snappystream
include
(
external/snappystream
)
# download snappystream
include
(
external/warpctc
)
# download, build, install warpctc
if
(
NOT WIN32
)
if
(
NOT WIN32
)
# there is no official support of warpctc, nccl, cupti in windows
# there is no official support of nccl, cupti in windows
include
(
external/warpctc
)
# download, build, install warpctc
include
(
cupti
)
include
(
cupti
)
include
(
external/gzstream
)
include
(
external/gzstream
)
endif
(
NOT WIN32
)
endif
(
NOT WIN32
)
...
...
cmake/cuda.cmake
浏览文件 @
5a6d7fe2
...
@@ -139,10 +139,12 @@ endfunction()
...
@@ -139,10 +139,12 @@ endfunction()
message
(
STATUS
"CUDA detected: "
${
CUDA_VERSION
}
)
message
(
STATUS
"CUDA detected: "
${
CUDA_VERSION
}
)
if
(
${
CUDA_VERSION
}
LESS 7.0
)
if
(
${
CUDA_VERSION
}
LESS 7.0
)
set
(
paddle_known_gpu_archs
${
paddle_known_gpu_archs
}
)
set
(
paddle_known_gpu_archs
${
paddle_known_gpu_archs
}
)
add_definitions
(
"-DPADDLE_CUDA_BINVER=
\"
60
\"
"
)
elseif
(
${
CUDA_VERSION
}
LESS 8.0
)
# CUDA 7.x
elseif
(
${
CUDA_VERSION
}
LESS 8.0
)
# CUDA 7.x
set
(
paddle_known_gpu_archs
${
paddle_known_gpu_archs7
}
)
set
(
paddle_known_gpu_archs
${
paddle_known_gpu_archs7
}
)
list
(
APPEND CUDA_NVCC_FLAGS
"-D_MWAITXINTRIN_H_INCLUDED"
)
list
(
APPEND CUDA_NVCC_FLAGS
"-D_MWAITXINTRIN_H_INCLUDED"
)
list
(
APPEND CUDA_NVCC_FLAGS
"-D__STRICT_ANSI__"
)
list
(
APPEND CUDA_NVCC_FLAGS
"-D__STRICT_ANSI__"
)
add_definitions
(
"-DPADDLE_CUDA_BINVER=
\"
70
\"
"
)
elseif
(
${
CUDA_VERSION
}
LESS 9.0
)
# CUDA 8.x
elseif
(
${
CUDA_VERSION
}
LESS 9.0
)
# CUDA 8.x
set
(
paddle_known_gpu_archs
${
paddle_known_gpu_archs8
}
)
set
(
paddle_known_gpu_archs
${
paddle_known_gpu_archs8
}
)
list
(
APPEND CUDA_NVCC_FLAGS
"-D_MWAITXINTRIN_H_INCLUDED"
)
list
(
APPEND CUDA_NVCC_FLAGS
"-D_MWAITXINTRIN_H_INCLUDED"
)
...
@@ -150,6 +152,7 @@ elseif (${CUDA_VERSION} LESS 9.0) # CUDA 8.x
...
@@ -150,6 +152,7 @@ elseif (${CUDA_VERSION} LESS 9.0) # CUDA 8.x
# CUDA 8 may complain that sm_20 is no longer supported. Suppress the
# CUDA 8 may complain that sm_20 is no longer supported. Suppress the
# warning for now.
# warning for now.
list
(
APPEND CUDA_NVCC_FLAGS
"-Wno-deprecated-gpu-targets"
)
list
(
APPEND CUDA_NVCC_FLAGS
"-Wno-deprecated-gpu-targets"
)
add_definitions
(
"-DPADDLE_CUDA_BINVER=
\"
80
\"
"
)
endif
()
endif
()
include_directories
(
${
CUDA_INCLUDE_DIRS
}
)
include_directories
(
${
CUDA_INCLUDE_DIRS
}
)
...
...
cmake/cudnn.cmake
浏览文件 @
5a6d7fe2
...
@@ -89,6 +89,7 @@ if(CUDNN_FOUND)
...
@@ -89,6 +89,7 @@ if(CUDNN_FOUND)
if
(
NOT CUDNN_MAJOR_VERSION
)
if
(
NOT CUDNN_MAJOR_VERSION
)
set
(
CUDNN_VERSION
"???"
)
set
(
CUDNN_VERSION
"???"
)
else
()
else
()
add_definitions
(
"-DPADDLE_CUDNN_BINVER=
\"
${
CUDNN_MAJOR_VERSION
}
\"
"
)
math
(
EXPR CUDNN_VERSION
math
(
EXPR CUDNN_VERSION
"
${
CUDNN_MAJOR_VERSION
}
* 1000 +
"
${
CUDNN_MAJOR_VERSION
}
* 1000 +
${
CUDNN_MINOR_VERSION
}
* 100 +
${
CUDNN_PATCHLEVEL_VERSION
}
"
)
${
CUDNN_MINOR_VERSION
}
* 100 +
${
CUDNN_PATCHLEVEL_VERSION
}
"
)
...
...
cmake/external/cub.cmake
浏览文件 @
5a6d7fe2
...
@@ -32,4 +32,4 @@ endif()
...
@@ -32,4 +32,4 @@ endif()
add_dependencies
(
cub extern_cub
)
add_dependencies
(
cub extern_cub
)
LIST
(
APPEND externl_project_dependencies cub
)
LIST
(
APPEND extern
a
l_project_dependencies cub
)
cmake/external/dlpack.cmake
浏览文件 @
5a6d7fe2
...
@@ -28,4 +28,4 @@ endif()
...
@@ -28,4 +28,4 @@ endif()
add_dependencies
(
dlpack extern_dlpack
)
add_dependencies
(
dlpack extern_dlpack
)
LIST
(
APPEND externl_project_dependencies dlpack
)
LIST
(
APPEND extern
a
l_project_dependencies dlpack
)
cmake/external/mkldnn.cmake
浏览文件 @
5a6d7fe2
...
@@ -23,15 +23,14 @@ SET(MKLDNN_SOURCES_DIR ${THIRD_PARTY_PATH}/mkldnn)
...
@@ -23,15 +23,14 @@ SET(MKLDNN_SOURCES_DIR ${THIRD_PARTY_PATH}/mkldnn)
SET
(
MKLDNN_INSTALL_DIR
${
THIRD_PARTY_PATH
}
/install/mkldnn
)
SET
(
MKLDNN_INSTALL_DIR
${
THIRD_PARTY_PATH
}
/install/mkldnn
)
SET
(
MKLDNN_INC_DIR
"
${
MKLDNN_INSTALL_DIR
}
/include"
CACHE PATH
"mkldnn include directory."
FORCE
)
SET
(
MKLDNN_INC_DIR
"
${
MKLDNN_INSTALL_DIR
}
/include"
CACHE PATH
"mkldnn include directory."
FORCE
)
IF
(
WIN32 OR
APPLE
)
IF
(
APPLE
)
MESSAGE
(
WARNING
MESSAGE
(
WARNING
"
Windows or
Mac is not supported with MKLDNN in Paddle yet."
"Mac is not supported with MKLDNN in Paddle yet."
"Force WITH_MKLDNN=OFF"
)
"Force WITH_MKLDNN=OFF"
)
SET
(
WITH_MKLDNN OFF CACHE STRING
"Disable MKLDNN in
Windows and
MacOS"
FORCE
)
SET
(
WITH_MKLDNN OFF CACHE STRING
"Disable MKLDNN in MacOS"
FORCE
)
return
()
return
()
ENDIF
()
ENDIF
()
SET
(
MKLDNN_LIB
"
${
MKLDNN_INSTALL_DIR
}
/lib/libmkldnn.so"
CACHE FILEPATH
"mkldnn library."
FORCE
)
MESSAGE
(
STATUS
"Set
${
MKLDNN_INSTALL_DIR
}
/lib to runtime path"
)
MESSAGE
(
STATUS
"Set
${
MKLDNN_INSTALL_DIR
}
/lib to runtime path"
)
SET
(
CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE
)
SET
(
CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE
)
SET
(
CMAKE_INSTALL_RPATH
"
${
CMAKE_INSTALL_RPATH
}
"
"
${
MKLDNN_INSTALL_DIR
}
/lib"
)
SET
(
CMAKE_INSTALL_RPATH
"
${
CMAKE_INSTALL_RPATH
}
"
"
${
MKLDNN_INSTALL_DIR
}
/lib"
)
...
@@ -44,10 +43,14 @@ IF(${CBLAS_PROVIDER} STREQUAL "MKLML")
...
@@ -44,10 +43,14 @@ IF(${CBLAS_PROVIDER} STREQUAL "MKLML")
ELSE
()
ELSE
()
MESSAGE
(
FATAL_ERROR
"Should enable MKLML when build MKLDNN"
)
MESSAGE
(
FATAL_ERROR
"Should enable MKLML when build MKLDNN"
)
ENDIF
()
ENDIF
()
SET
(
MKLDNN_FLAG
"-Wno-error=strict-overflow -Wno-error=unused-result -Wno-error=array-bounds"
)
SET
(
MKLDNN_FLAG
"
${
MKLDNN_FLAG
}
-Wno-unused-result -Wno-unused-value"
)
IF
(
NOT WIN32
)
SET
(
MKLDNN_CFLAG
"
${
CMAKE_C_FLAGS
}
${
MKLDNN_FLAG
}
"
)
SET
(
MKLDNN_FLAG
"-Wno-error=strict-overflow -Wno-error=unused-result -Wno-error=array-bounds"
)
SET
(
MKLDNN_CXXFLAG
"
${
CMAKE_CXX_FLAGS
}
${
MKLDNN_FLAG
}
"
)
SET
(
MKLDNN_FLAG
"
${
MKLDNN_FLAG
}
-Wno-unused-result -Wno-unused-value"
)
SET
(
MKLDNN_CFLAG
"
${
CMAKE_C_FLAGS
}
${
MKLDNN_FLAG
}
"
)
SET
(
MKLDNN_CXXFLAG
"
${
CMAKE_CXX_FLAGS
}
${
MKLDNN_FLAG
}
"
)
ENDIF
(
NOT WIN32
)
ExternalProject_Add
(
ExternalProject_Add
(
${
MKLDNN_PROJECT
}
${
MKLDNN_PROJECT
}
${
EXTERNAL_PROJECT_LOG_ARGS
}
${
EXTERNAL_PROJECT_LOG_ARGS
}
...
@@ -58,8 +61,15 @@ ExternalProject_Add(
...
@@ -58,8 +61,15 @@ ExternalProject_Add(
UPDATE_COMMAND
""
UPDATE_COMMAND
""
CMAKE_ARGS -DCMAKE_CXX_COMPILER=
${
CMAKE_CXX_COMPILER
}
CMAKE_ARGS -DCMAKE_CXX_COMPILER=
${
CMAKE_CXX_COMPILER
}
CMAKE_ARGS -DCMAKE_C_COMPILER=
${
CMAKE_C_COMPILER
}
CMAKE_ARGS -DCMAKE_C_COMPILER=
${
CMAKE_C_COMPILER
}
CMAKE_ARGS -DCMAKE_CXX_FLAGS=
${
CMAKE_CXX_FLAGS
}
CMAKE_ARGS -DCMAKE_CXX_FLAGS_RELEASE=
${
CMAKE_CXX_FLAGS_RELEASE
}
CMAKE_ARGS -DCMAKE_CXX_FLAGS_DEBUG=
${
CMAKE_CXX_FLAGS_DEBUG
}
CMAKE_ARGS -DCMAKE_C_FLAGS=
${
CMAKE_C_FLAGS
}
CMAKE_ARGS -DCMAKE_C_FLAGS_DEBUG=
${
CMAKE_C_FLAGS_DEBUG
}
CMAKE_ARGS -DCMAKE_C_FLAGS_RELEASE=
${
CMAKE_C_FLAGS_RELEASE
}
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=
${
MKLDNN_INSTALL_DIR
}
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=
${
MKLDNN_INSTALL_DIR
}
CMAKE_ARGS -DCMAKE_BUILD_TYPE=
${
CMAKE_BUILD_TYPE
}
CMAKE_ARGS -DCMAKE_BUILD_TYPE=
${
CMAKE_BUILD_TYPE
}
CMAKE_ARGS -DCMAKE_POSITION_INDEPENDENT_CODE=ON
CMAKE_ARGS -DMKLROOT=
${
MKLML_ROOT
}
CMAKE_ARGS -DMKLROOT=
${
MKLML_ROOT
}
CMAKE_ARGS -DCMAKE_C_FLAGS=
${
MKLDNN_CFLAG
}
CMAKE_ARGS -DCMAKE_C_FLAGS=
${
MKLDNN_CFLAG
}
CMAKE_ARGS -DCMAKE_CXX_FLAGS=
${
MKLDNN_CXXFLAG
}
CMAKE_ARGS -DCMAKE_CXX_FLAGS=
${
MKLDNN_CXXFLAG
}
...
@@ -67,6 +77,11 @@ ExternalProject_Add(
...
@@ -67,6 +77,11 @@ ExternalProject_Add(
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=
${
MKLDNN_INSTALL_DIR
}
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=
${
MKLDNN_INSTALL_DIR
}
-DMKLROOT:PATH=
${
MKLML_ROOT
}
-DMKLROOT:PATH=
${
MKLML_ROOT
}
)
)
if
(
WIN32
)
SET
(
MKLDNN_LIB
"
${
MKLDNN_INSTALL_DIR
}
/lib/mkldnn.lib"
CACHE FILEPATH
"mkldnn library."
FORCE
)
else
(
WIN32
)
SET
(
MKLDNN_LIB
"
${
MKLDNN_INSTALL_DIR
}
/lib/libmkldnn.so"
CACHE FILEPATH
"mkldnn library."
FORCE
)
endif
(
WIN32
)
ADD_LIBRARY
(
shared_mkldnn SHARED IMPORTED GLOBAL
)
ADD_LIBRARY
(
shared_mkldnn SHARED IMPORTED GLOBAL
)
SET_PROPERTY
(
TARGET shared_mkldnn PROPERTY IMPORTED_LOCATION
${
MKLDNN_LIB
}
)
SET_PROPERTY
(
TARGET shared_mkldnn PROPERTY IMPORTED_LOCATION
${
MKLDNN_LIB
}
)
...
@@ -85,10 +100,14 @@ ADD_DEPENDENCIES(mkldnn ${MKLDNN_PROJECT})
...
@@ -85,10 +100,14 @@ ADD_DEPENDENCIES(mkldnn ${MKLDNN_PROJECT})
# copy the real so.0 lib to install dir
# copy the real so.0 lib to install dir
# it can be directly contained in wheel or capi
# it can be directly contained in wheel or capi
SET
(
MKLDNN_SHARED_LIB
${
MKLDNN_INSTALL_DIR
}
/libmkldnn.so.0
)
if
(
WIN32
)
ADD_CUSTOM_COMMAND
(
OUTPUT
${
MKLDNN_SHARED_LIB
}
SET
(
MKLDNN_SHARED_LIB
${
MKLDNN_INSTALL_DIR
}
/lib/mkldnn.dll
)
COMMAND cp
${
MKLDNN_LIB
}
${
MKLDNN_SHARED_LIB
}
else
(
WIN32
)
SET
(
MKLDNN_SHARED_LIB
${
MKLDNN_INSTALL_DIR
}
/libmkldnn.so.0
)
ADD_CUSTOM_COMMAND
(
OUTPUT
${
MKLDNN_SHARED_LIB
}
COMMAND
${
CMAKE_COMMAND
}
-E copy
${
MKLDNN_LIB
}
${
MKLDNN_SHARED_LIB
}
DEPENDS mkldnn
)
DEPENDS mkldnn
)
endif
(
WIN32
)
ADD_CUSTOM_TARGET
(
mkldnn_shared_lib ALL DEPENDS
${
MKLDNN_SHARED_LIB
}
)
ADD_CUSTOM_TARGET
(
mkldnn_shared_lib ALL DEPENDS
${
MKLDNN_SHARED_LIB
}
)
IF
(
WITH_C_API
)
IF
(
WITH_C_API
)
...
...
cmake/external/mklml.cmake
浏览文件 @
5a6d7fe2
...
@@ -16,44 +16,55 @@ IF(NOT ${WITH_MKLML})
...
@@ -16,44 +16,55 @@ IF(NOT ${WITH_MKLML})
return
()
return
()
ENDIF
(
NOT
${
WITH_MKLML
}
)
ENDIF
(
NOT
${
WITH_MKLML
}
)
IF
(
WIN32 OR
APPLE
)
IF
(
APPLE
)
MESSAGE
(
WARNING
MESSAGE
(
WARNING
"
Windows or
Mac is not supported with MKLML in Paddle yet."
"Mac is not supported with MKLML in Paddle yet."
"Force WITH_MKLML=OFF"
)
"Force WITH_MKLML=OFF"
)
SET
(
WITH_MKLML OFF CACHE STRING
"Disable MKLML package in Windows and MacOS"
FORCE
)
SET
(
WITH_MKLML OFF CACHE STRING
"Disable MKLML package in Windows and MacOS"
FORCE
)
return
()
return
()
ENDIF
()
ENDIF
()
INCLUDE
(
ExternalProject
)
INCLUDE
(
ExternalProject
)
SET
(
MKLML_PROJECT
"extern_mklml"
)
IF
((
NOT DEFINED MKLML_VER
)
OR
(
NOT DEFINED MKLML_URL
))
MESSAGE
(
STATUS
"use pre defined download url"
)
SET
(
MKLML_VER
"mklml_lnx_2019.0.20180710"
CACHE STRING
""
FORCE
)
SET
(
MKLML_URL
"http://paddlepaddledeps.cdn.bcebos.com/
${
MKLML_VER
}
.tgz"
CACHE STRING
""
FORCE
)
ENDIF
()
MESSAGE
(
STATUS
"MKLML_VER:
${
MKLML_VER
}
, MKLML_URL:
${
MKLML_URL
}
"
)
SET
(
MKLML_SOURCE_DIR
"
${
THIRD_PARTY_PATH
}
/mklml"
)
SET
(
MKLML_DOWNLOAD_DIR
"
${
MKLML_SOURCE_DIR
}
/src/
${
MKLML_PROJECT
}
"
)
SET
(
MKLML_DST_DIR
"mklml"
)
SET
(
MKLML_DST_DIR
"mklml"
)
SET
(
MKLML_INSTALL_ROOT
"
${
THIRD_PARTY_PATH
}
/install"
)
SET
(
MKLML_INSTALL_ROOT
"
${
THIRD_PARTY_PATH
}
/install"
)
SET
(
MKLML_INSTALL_DIR
${
MKLML_INSTALL_ROOT
}
/
${
MKLML_DST_DIR
}
)
SET
(
MKLML_INSTALL_DIR
${
MKLML_INSTALL_ROOT
}
/
${
MKLML_DST_DIR
}
)
SET
(
MKLML_ROOT
${
MKLML_INSTALL_DIR
}
)
SET
(
MKLML_ROOT
${
MKLML_INSTALL_DIR
}
)
SET
(
MKLML_INC_DIR
${
MKLML_ROOT
}
/include
)
SET
(
MKLML_INC_DIR
${
MKLML_ROOT
}
/include
)
SET
(
MKLML_LIB_DIR
${
MKLML_ROOT
}
/lib
)
SET
(
MKLML_LIB_DIR
${
MKLML_ROOT
}
/lib
)
SET
(
MKLML_LIB
${
MKLML_LIB_DIR
}
/libmklml_intel.so
)
if
(
WIN32
)
SET
(
MKLML_IOMP_LIB
${
MKLML_LIB_DIR
}
/libiomp5.so
)
SET
(
MKLML_LIB
${
MKLML_LIB_DIR
}
/mklml.lib
)
SET
(
MKLML_IOMP_LIB
${
MKLML_LIB_DIR
}
/libiomp5md.lib
)
SET
(
MKLML_SHARED_LIB
${
MKLML_LIB_DIR
}
/mklml.dll
)
SET
(
MKLML_SHARED_IOMP_LIB
${
MKLML_LIB_DIR
}
/libiomp5md.dll
)
else
()
SET
(
MKLML_LIB
${
MKLML_LIB_DIR
}
/libmklml_intel.so
)
SET
(
MKLML_IOMP_LIB
${
MKLML_LIB_DIR
}
/libiomp5.so
)
SET
(
MKLML_SHARED_LIB
${
MKLML_LIB_DIR
}
/libmklml_intel.so
)
SET
(
MKLML_SHARED_IOMP_LIB
${
MKLML_LIB_DIR
}
/libiomp5.so
)
endif
()
SET
(
CMAKE_INSTALL_RPATH
"
${
CMAKE_INSTALL_RPATH
}
"
"
${
MKLML_ROOT
}
/lib"
)
SET
(
CMAKE_INSTALL_RPATH
"
${
CMAKE_INSTALL_RPATH
}
"
"
${
MKLML_ROOT
}
/lib"
)
INCLUDE_DIRECTORIES
(
${
MKLML_INC_DIR
}
)
if
(
WIN32
)
MESSAGE
(
WARNING
"Please download the MKLML and and put it at "
${
THIRD_PARTY_PATH
}
/install/mklml
)
else
()
SET
(
MKLML_PROJECT
"extern_mklml"
)
IF
((
NOT DEFINED MKLML_VER
)
OR
(
NOT DEFINED MKLML_URL
))
MESSAGE
(
STATUS
"use pre defined download url"
)
SET
(
MKLML_VER
"mklml_lnx_2019.0.20180710"
CACHE STRING
""
FORCE
)
SET
(
MKLML_URL
"http://paddlepaddledeps.cdn.bcebos.com/
${
MKLML_VER
}
.tgz"
CACHE STRING
""
FORCE
)
ENDIF
()
MESSAGE
(
STATUS
"MKLML_VER:
${
MKLML_VER
}
, MKLML_URL:
${
MKLML_URL
}
"
)
SET
(
MKLML_SOURCE_DIR
"
${
THIRD_PARTY_PATH
}
/mklml"
)
SET
(
MKLML_DOWNLOAD_DIR
"
${
MKLML_SOURCE_DIR
}
/src/
${
MKLML_PROJECT
}
"
)
FILE
(
WRITE
${
MKLML_DOWNLOAD_DIR
}
/CMakeLists.txt
FILE
(
WRITE
${
MKLML_DOWNLOAD_DIR
}
/CMakeLists.txt
"PROJECT(MKLML)
\n
"
"PROJECT(MKLML)
\n
"
"cmake_minimum_required(VERSION 3.0)
\n
"
"cmake_minimum_required(VERSION 3.0)
\n
"
"install(DIRECTORY
${
MKLML_VER
}
/include
${
MKLML_VER
}
/lib
\n
"
"install(DIRECTORY
${
MKLML_VER
}
/include
${
MKLML_VER
}
/lib
\n
"
" DESTINATION
${
MKLML_DST_DIR
}
)
\n
"
)
" DESTINATION
${
MKLML_DST_DIR
}
)
\n
"
)
ExternalProject_Add
(
ExternalProject_Add
(
${
MKLML_PROJECT
}
${
MKLML_PROJECT
}
${
EXTERNAL_PROJECT_LOG_ARGS
}
${
EXTERNAL_PROJECT_LOG_ARGS
}
PREFIX
${
MKLML_SOURCE_DIR
}
PREFIX
${
MKLML_SOURCE_DIR
}
...
@@ -64,11 +75,17 @@ ExternalProject_Add(
...
@@ -64,11 +75,17 @@ ExternalProject_Add(
UPDATE_COMMAND
""
UPDATE_COMMAND
""
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=
${
MKLML_INSTALL_ROOT
}
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=
${
MKLML_INSTALL_ROOT
}
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=
${
MKLML_INSTALL_ROOT
}
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=
${
MKLML_INSTALL_ROOT
}
)
)
endif
()
INCLUDE_DIRECTORIES
(
${
MKLML_INC_DIR
}
)
ADD_LIBRARY
(
mklml SHARED IMPORTED GLOBAL
)
ADD_LIBRARY
(
mklml SHARED IMPORTED GLOBAL
)
SET_PROPERTY
(
TARGET mklml PROPERTY IMPORTED_LOCATION
${
MKLML_LIB
}
)
SET_PROPERTY
(
TARGET mklml PROPERTY IMPORTED_LOCATION
${
MKLML_LIB
}
)
ADD_DEPENDENCIES
(
mklml
${
MKLML_PROJECT
}
)
if
(
NOT WIN32
)
ADD_DEPENDENCIES
(
mklml
${
MKLML_PROJECT
}
)
endif
()
LIST
(
APPEND external_project_dependencies mklml
)
LIST
(
APPEND external_project_dependencies mklml
)
IF
(
WITH_C_API
)
IF
(
WITH_C_API
)
...
...
cmake/external/python.cmake
浏览文件 @
5a6d7fe2
...
@@ -23,9 +23,12 @@ FIND_PACKAGE(PythonLibs ${PY_VERSION})
...
@@ -23,9 +23,12 @@ FIND_PACKAGE(PythonLibs ${PY_VERSION})
if
(
WIN32
)
if
(
WIN32
)
execute_process
(
COMMAND
"
${
PYTHON_EXECUTABLE
}
"
"-c"
execute_process
(
COMMAND
"
${
PYTHON_EXECUTABLE
}
"
"-c"
"from distutils import sysconfig as s;import sys;import struct;
"from distutils import sysconfig as s;import sys;import struct;
import sysconfig;
print(sys.prefix);
print(sys.prefix);
print(s.get_config_var('LDVERSION') or s.get_config_var('VERSION'));
print(s.get_config_var('LDVERSION') or s.get_config_var('VERSION'));
print(sysconfig.get_platform());
print(sysconfig.get_config_var('py_version_nodot'));
print(sysconfig.get_config_var('SOABI'));
"
"
RESULT_VARIABLE _PYTHON_SUCCESS
RESULT_VARIABLE _PYTHON_SUCCESS
OUTPUT_VARIABLE _PYTHON_VALUES
OUTPUT_VARIABLE _PYTHON_VALUES
...
@@ -41,6 +44,9 @@ print(s.get_config_var('LDVERSION') or s.get_config_var('VERSION'));
...
@@ -41,6 +44,9 @@ print(s.get_config_var('LDVERSION') or s.get_config_var('VERSION'));
string
(
REGEX REPLACE
"
\n
"
";"
_PYTHON_VALUES
${
_PYTHON_VALUES
}
)
string
(
REGEX REPLACE
"
\n
"
";"
_PYTHON_VALUES
${
_PYTHON_VALUES
}
)
list
(
GET _PYTHON_VALUES 0 PYTHON_PREFIX
)
list
(
GET _PYTHON_VALUES 0 PYTHON_PREFIX
)
list
(
GET _PYTHON_VALUES 1 PYTHON_LIBRARY_SUFFIX
)
list
(
GET _PYTHON_VALUES 1 PYTHON_LIBRARY_SUFFIX
)
list
(
GET _PYTHON_VALUES 2 SYS_PLATFORM
)
list
(
GET _PYTHON_VALUES 3 PYTHON_SHORT_VERSION_NODOT
)
list
(
GET _PYTHON_VALUES 4 PYTHON_SOABI
)
# Make sure all directory separators are '/'
# Make sure all directory separators are '/'
string
(
REGEX REPLACE
"
\\\\
"
"/"
PYTHON_PREFIX
${
PYTHON_PREFIX
}
)
string
(
REGEX REPLACE
"
\\\\
"
"/"
PYTHON_PREFIX
${
PYTHON_PREFIX
}
)
...
...
cmake/external/warpctc.cmake
浏览文件 @
5a6d7fe2
...
@@ -26,25 +26,33 @@ SET(WARPCTC_INCLUDE_DIR "${WARPCTC_INSTALL_DIR}/include"
...
@@ -26,25 +26,33 @@ SET(WARPCTC_INCLUDE_DIR "${WARPCTC_INSTALL_DIR}/include"
# Used in unit test test_WarpCTCLayer
# Used in unit test test_WarpCTCLayer
SET
(
WARPCTC_LIB_DIR
"
${
WARPCTC_INSTALL_DIR
}
/lib"
SET
(
WARPCTC_LIB_DIR
"
${
WARPCTC_INSTALL_DIR
}
/lib"
CACHE PATH
"Warp-ctc Library Directory"
FORCE
)
CACHE PATH
"Warp-ctc Library Directory"
FORCE
)
SET
(
WARPCTC_LIBRARIES
"
${
WARPCTC_INSTALL_DIR
}
/lib/libwarpctc
${
CMAKE_SHARED_LIBRARY_SUFFIX
}
"
CACHE FILEPATH
"Warp-ctc Library"
FORCE
)
IF
(
CMAKE_CXX_COMPILER_ID STREQUAL
"Clang"
OR CMAKE_CXX_COMPILER_ID STREQUAL
"AppleClang"
)
IF
(
CMAKE_CXX_COMPILER_ID STREQUAL
"Clang"
OR CMAKE_CXX_COMPILER_ID STREQUAL
"AppleClang"
OR WIN32
)
SET
(
USE_OMP OFF
)
SET
(
USE_OMP OFF
)
ELSE
()
ELSE
()
SET
(
USE_OMP ON
)
SET
(
USE_OMP ON
)
ENDIF
()
ENDIF
()
IF
(
WIN32
)
SET
(
WARPCTC_REPOSITORY
"https://github.com/wopeizl/warp-ctc.git"
)
ELSE
()
SET
(
WARPCTC_REPOSITORY
"https://github.com/dzhwinter/warp-ctc.git"
)
ENDIF
()
ExternalProject_Add
(
ExternalProject_Add
(
extern_warpctc
extern_warpctc
${
EXTERNAL_PROJECT_LOG_ARGS
}
${
EXTERNAL_PROJECT_LOG_ARGS
}
GIT_REPOSITORY
"https://github.com/dzhwinter/warp-ctc.git"
GIT_REPOSITORY
${
WARPCTC_REPOSITORY
}
PREFIX
${
WARPCTC_SOURCES_DIR
}
PREFIX
${
WARPCTC_SOURCES_DIR
}
UPDATE_COMMAND
""
UPDATE_COMMAND
""
CMAKE_ARGS -DCMAKE_CXX_COMPILER=
${
CMAKE_CXX_COMPILER
}
CMAKE_ARGS -DCMAKE_CXX_COMPILER=
${
CMAKE_CXX_COMPILER
}
-DCMAKE_C_COMPILER=
${
CMAKE_C_COMPILER
}
-DCMAKE_C_COMPILER=
${
CMAKE_C_COMPILER
}
-DCMAKE_CXX_FLAGS=
${
CMAKE_CXX_FLAGS
}
-DCMAKE_C_FLAGS=
${
CMAKE_C_FLAGS
}
-DCMAKE_C_FLAGS=
${
CMAKE_C_FLAGS
}
-DCMAKE_C_FLAGS_DEBUG=
${
CMAKE_C_FLAGS_DEBUG
}
-DCMAKE_C_FLAGS_RELEASE=
${
CMAKE_C_FLAGS_RELEASE
}
-DCMAKE_CXX_FLAGS=
${
CMAKE_CXX_FLAGS
}
-DCMAKE_CXX_FLAGS_RELEASE=
${
CMAKE_CXX_FLAGS_RELEASE
}
-DCMAKE_CXX_FLAGS_DEBUG=
${
CMAKE_CXX_FLAGS_DEBUG
}
-DCMAKE_INSTALL_PREFIX=
${
WARPCTC_INSTALL_DIR
}
-DCMAKE_INSTALL_PREFIX=
${
WARPCTC_INSTALL_DIR
}
-DWITH_GPU=
${
WITH_GPU
}
-DWITH_GPU=
${
WITH_GPU
}
-DWITH_OMP=
${
USE_OMP
}
-DWITH_OMP=
${
USE_OMP
}
...
@@ -59,6 +67,18 @@ ExternalProject_Add(
...
@@ -59,6 +67,18 @@ ExternalProject_Add(
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-DCMAKE_INSTALL_PREFIX:PATH=
${
WARPCTC_INSTALL_DIR
}
-DCMAKE_INSTALL_PREFIX:PATH=
${
WARPCTC_INSTALL_DIR
}
)
)
IF
(
WIN32
)
IF
(
NOT EXISTS
"
${
WARPCTC_INSTALL_DIR
}
/lib/warpctc
${
CMAKE_SHARED_LIBRARY_SUFFIX
}
"
)
add_custom_command
(
TARGET extern_warpctc POST_BUILD
COMMAND cmake -E copy
${
WARPCTC_INSTALL_DIR
}
/bin/warpctc
${
CMAKE_SHARED_LIBRARY_SUFFIX
}
${
WARPCTC_INSTALL_DIR
}
/lib/warpctc
${
CMAKE_SHARED_LIBRARY_SUFFIX
}
)
ENDIF
()
SET
(
WARPCTC_LIBRARIES
"
${
WARPCTC_INSTALL_DIR
}
/lib/warpctc
${
CMAKE_SHARED_LIBRARY_SUFFIX
}
"
CACHE FILEPATH
"Warp-ctc Library"
FORCE
)
else
(
WIN32
)
SET
(
WARPCTC_LIBRARIES
"
${
WARPCTC_INSTALL_DIR
}
/lib/libwarpctc
${
CMAKE_SHARED_LIBRARY_SUFFIX
}
"
CACHE FILEPATH
"Warp-ctc Library"
FORCE
)
ENDIF
(
WIN32
)
MESSAGE
(
STATUS
"warp-ctc library:
${
WARPCTC_LIBRARIES
}
"
)
MESSAGE
(
STATUS
"warp-ctc library:
${
WARPCTC_LIBRARIES
}
"
)
INCLUDE_DIRECTORIES
(
${
WARPCTC_INCLUDE_DIR
}
)
# For warpctc code to include its headers.
INCLUDE_DIRECTORIES
(
${
WARPCTC_INCLUDE_DIR
}
)
# For warpctc code to include its headers.
...
...
cmake/external/xbyak.cmake
浏览文件 @
5a6d7fe2
...
@@ -13,8 +13,8 @@
...
@@ -13,8 +13,8 @@
# limitations under the License.
# limitations under the License.
set
(
WITH_XBYAK ON
)
set
(
WITH_XBYAK ON
)
if
(
WIN32 OR
APPLE
)
if
(
APPLE
)
SET
(
WITH_XBYAK OFF CACHE STRING
"Disable XBYAK in
Windows and
MacOS"
FORCE
)
SET
(
WITH_XBYAK OFF CACHE STRING
"Disable XBYAK in MacOS"
FORCE
)
return
()
return
()
endif
()
endif
()
...
...
cmake/generic.cmake
浏览文件 @
5a6d7fe2
...
@@ -267,7 +267,11 @@ function(cc_library TARGET_NAME)
...
@@ -267,7 +267,11 @@ function(cc_library TARGET_NAME)
list
(
APPEND cc_library_DEPS dynload_mklml
)
list
(
APPEND cc_library_DEPS dynload_mklml
)
endif
()
endif
()
add_dependencies
(
${
TARGET_NAME
}
mklml
)
add_dependencies
(
${
TARGET_NAME
}
mklml
)
if
(
WIN32
)
target_link_libraries
(
${
TARGET_NAME
}
${
MKLML_IOMP_LIB
}
)
else
(
WIN32
)
target_link_libraries
(
${
TARGET_NAME
}
"-L
${
MKLML_LIB_DIR
}
-liomp5 -Wl,--as-needed"
)
target_link_libraries
(
${
TARGET_NAME
}
"-L
${
MKLML_LIB_DIR
}
-liomp5 -Wl,--as-needed"
)
endif
(
WIN32
)
endif
()
endif
()
# remove link to python, see notes at:
# remove link to python, see notes at:
# https://github.com/pybind/pybind11/blob/master/docs/compiling.rst#building-manually
# https://github.com/pybind/pybind11/blob/master/docs/compiling.rst#building-manually
...
...
cmake/inference_lib.cmake
浏览文件 @
5a6d7fe2
...
@@ -115,20 +115,20 @@ if (NOT PROTOBUF_FOUND OR WIN32)
...
@@ -115,20 +115,20 @@ if (NOT PROTOBUF_FOUND OR WIN32)
)
)
endif
()
endif
()
if
(
NOT CBLAS_FOUND
)
if
(
WITH_MKLML
)
set
(
dst_dir
"
${
FLUID_INSTALL_DIR
}
/third_party/install/openblas"
)
copy
(
openblas_lib
SRCS
${
CBLAS_INSTALL_DIR
}
/lib
${
CBLAS_INSTALL_DIR
}
/include
DSTS
${
dst_dir
}
${
dst_dir
}
DEPS extern_openblas
)
elseif
(
WITH_MKLML
)
set
(
dst_dir
"
${
FLUID_INSTALL_DIR
}
/third_party/install/mklml"
)
set
(
dst_dir
"
${
FLUID_INSTALL_DIR
}
/third_party/install/mklml"
)
copy
(
mklml_lib
copy
(
mklml_lib
SRCS
${
MKLML_LIB
}
${
MKLML_IOMP_LIB
}
${
MKLML_INC_DIR
}
SRCS
${
MKLML_LIB
}
${
MKLML_IOMP_LIB
}
${
MKLML_INC_DIR
}
DSTS
${
dst_dir
}
/lib
${
dst_dir
}
/lib
${
dst_dir
}
DSTS
${
dst_dir
}
/lib
${
dst_dir
}
/lib
${
dst_dir
}
DEPS mklml
DEPS mklml
)
)
elseif
(
NOT CBLAS_FOUND OR WIN32
)
set
(
dst_dir
"
${
FLUID_INSTALL_DIR
}
/third_party/install/openblas"
)
copy
(
openblas_lib
SRCS
${
CBLAS_INSTALL_DIR
}
/lib
${
CBLAS_INSTALL_DIR
}
/include
DSTS
${
dst_dir
}
${
dst_dir
}
DEPS extern_openblas
)
endif
()
endif
()
if
(
WITH_MKLDNN
)
if
(
WITH_MKLDNN
)
...
...
cmake/operators.cmake
浏览文件 @
5a6d7fe2
...
@@ -84,7 +84,7 @@ function(op_library TARGET)
...
@@ -84,7 +84,7 @@ function(op_library TARGET)
endif
()
endif
()
if
(
WIN32
)
if
(
WIN32
)
# remove windows unsupported op, because windows has no nccl, no warpctc such ops.
# remove windows unsupported op, because windows has no nccl, no warpctc such ops.
foreach
(
windows_unsupport_op
"nccl_op"
"gen_nccl_id_op"
"warpctc_op"
)
foreach
(
windows_unsupport_op
"nccl_op"
"gen_nccl_id_op"
)
if
(
"
${
TARGET
}
"
STREQUAL
"
${
windows_unsupport_op
}
"
)
if
(
"
${
TARGET
}
"
STREQUAL
"
${
windows_unsupport_op
}
"
)
return
()
return
()
endif
()
endif
()
...
...
cmake/simd.cmake
浏览文件 @
5a6d7fe2
...
@@ -57,46 +57,43 @@ int main()
...
@@ -57,46 +57,43 @@ int main()
return 0;
return 0;
}"
SSE3_FOUND
)
}"
SSE3_FOUND
)
# disable AVX by default on windows
# Check AVX
if
(
NOT WIN32
)
set
(
CMAKE_REQUIRED_FLAGS
${
AVX_FLAG
}
)
# Check AVX
set
(
AVX_FOUND_EXITCODE 1 CACHE STRING
"Result from TRY_RUN"
FORCE
)
set
(
CMAKE_REQUIRED_FLAGS
${
AVX_FLAG
}
)
CHECK_CXX_SOURCE_RUNS
(
"
set
(
AVX_FOUND_EXITCODE 1 CACHE STRING
"Result from TRY_RUN"
FORCE
)
#include <immintrin.h>
CHECK_CXX_SOURCE_RUNS
(
"
int main()
#include <immintrin.h>
{
int main()
{
__m256 a = _mm256_set_ps (-1.0f, 2.0f, -3.0f, 4.0f, -1.0f, 2.0f, -3.0f, 4.0f);
__m256 a = _mm256_set_ps (-1.0f, 2.0f, -3.0f, 4.0f, -1.0f, 2.0f, -3.0f, 4.0f);
__m256 b = _mm256_set_ps (1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f);
__m256 b = _mm256_set_ps (1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f);
__m256 result = _mm256_add_ps (a, b);
__m256 result = _mm256_add_ps (a, b);
return 0;
return 0;
}"
AVX_FOUND
)
}"
AVX_FOUND
)
# Check AVX 2
# Check AVX 2
set
(
CMAKE_REQUIRED_FLAGS
${
AVX2_FLAG
}
)
set
(
CMAKE_REQUIRED_FLAGS
${
AVX2_FLAG
}
)
set
(
AVX2_FOUND_EXITCODE 1 CACHE STRING
"Result from TRY_RUN"
FORCE
)
set
(
AVX2_FOUND_EXITCODE 1 CACHE STRING
"Result from TRY_RUN"
FORCE
)
CHECK_CXX_SOURCE_RUNS
(
"
CHECK_CXX_SOURCE_RUNS
(
"
#include <immintrin.h>
#include <immintrin.h>
int main()
int main()
{
{
__m256i a = _mm256_set_epi32 (-1, 2, -3, 4, -1, 2, -3, 4);
__m256i a = _mm256_set_epi32 (-1, 2, -3, 4, -1, 2, -3, 4);
__m256i result = _mm256_abs_epi32 (a);
__m256i result = _mm256_abs_epi32 (a);
return 0;
return 0;
}"
AVX2_FOUND
)
}"
AVX2_FOUND
)
# Check AVX512F
# Check AVX512F
set
(
CMAKE_REQUIRED_FLAGS
${
AVX512F_FLAG
}
)
set
(
CMAKE_REQUIRED_FLAGS
${
AVX512F_FLAG
}
)
set
(
AVX512F_FOUND_EXITCODE 1 CACHE STRING
"Result from TRY_RUN"
FORCE
)
set
(
AVX512F_FOUND_EXITCODE 1 CACHE STRING
"Result from TRY_RUN"
FORCE
)
CHECK_CXX_SOURCE_RUNS
(
"
CHECK_CXX_SOURCE_RUNS
(
"
#include <immintrin.h>
#include <immintrin.h>
int main()
int main()
{
{
__m512i a = _mm512_set_epi32 (-1, 2, -3, 4, -1, 2, -3, 4,
__m512i a = _mm512_set_epi32 (-1, 2, -3, 4, -1, 2, -3, 4,
13, -5, 6, -7, 9, 2, -6, 3);
13, -5, 6, -7, 9, 2, -6, 3);
__m512i result = _mm512_abs_epi32 (a);
__m512i result = _mm512_abs_epi32 (a);
return 0;
return 0;
}"
AVX512F_FOUND
)
}"
AVX512F_FOUND
)
endif
(
NOT WIN32
)
set
(
CMAKE_REQUIRED_FLAGS
${
CMAKE_REQUIRED_FLAGS_RETAINED
}
)
set
(
CMAKE_REQUIRED_FLAGS
${
CMAKE_REQUIRED_FLAGS_RETAINED
}
)
mark_as_advanced
(
MMX_FOUND SSE2_FOUND SSE3_FOUND AVX_FOUND AVX2_FOUND AVX512F_FOUND
)
mark_as_advanced
(
MMX_FOUND SSE2_FOUND SSE3_FOUND AVX_FOUND AVX2_FOUND AVX512F_FOUND
)
paddle/fluid/framework/CMakeLists.txt
浏览文件 @
5a6d7fe2
...
@@ -15,8 +15,7 @@ function(windows_symbolic TARGET)
...
@@ -15,8 +15,7 @@ function(windows_symbolic TARGET)
file
(
GENERATE OUTPUT
${
final_path
}
/.
${
src
}
.cu INPUT
${
final_path
}
/
${
src
}
.cc
)
file
(
GENERATE OUTPUT
${
final_path
}
/.
${
src
}
.cu INPUT
${
final_path
}
/
${
src
}
.cc
)
add_custom_command
(
OUTPUT
${
final_path
}
/.
${
src
}
.cu
add_custom_command
(
OUTPUT
${
final_path
}
/.
${
src
}
.cu
COMMAND
${
CMAKE_COMMAND
}
-E remove
${
final_path
}
/.
${
src
}
.cu
COMMAND
${
CMAKE_COMMAND
}
-E copy_if_different
"
${
final_path
}
/
${
src
}
.cc"
"
${
final_path
}
/.
${
src
}
.cu"
COMMAND
${
CMAKE_COMMAND
}
-E copy
"
${
final_path
}
/
${
src
}
.cc"
"
${
final_path
}
/.
${
src
}
.cu"
COMMENT
"create hidden file of
${
src
}
.cu"
)
COMMENT
"create hidden file of
${
src
}
.cu"
)
add_custom_target
(
${
TARGET
}
ALL DEPENDS .
${
src
}
.cu
)
add_custom_target
(
${
TARGET
}
ALL DEPENDS .
${
src
}
.cu
)
endforeach
()
endforeach
()
...
...
paddle/fluid/framework/details/all_reduce_op_handle.cc
浏览文件 @
5a6d7fe2
...
@@ -50,7 +50,7 @@ void AllReduceOpHandle::RunImpl() {
...
@@ -50,7 +50,7 @@ void AllReduceOpHandle::RunImpl() {
// FIXME(typhoonzero): If scope0(global scope) have NCCL_ID_VAR,
// FIXME(typhoonzero): If scope0(global scope) have NCCL_ID_VAR,
// this is a distributed or inter-process call, find a better way.
// this is a distributed or inter-process call, find a better way.
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
if
(
NoDummyInputSize
()
==
1
&&
if
(
NoDummyInputSize
()
==
1
&&
local_scopes_
[
0
]
->
FindLocalVar
(
NCCL_ID_VARNAME
)
==
nullptr
)
{
local_scopes_
[
0
]
->
FindLocalVar
(
NCCL_ID_VARNAME
)
==
nullptr
)
{
#else
#else
...
...
paddle/fluid/framework/mixed_vector.h
浏览文件 @
5a6d7fe2
...
@@ -215,7 +215,7 @@ class Vector {
...
@@ -215,7 +215,7 @@ class Vector {
auto
stream
=
dev_ctx
->
stream
();
auto
stream
=
dev_ctx
->
stream
();
void
*
src
=
gpu_
->
ptr
();
void
*
src
=
gpu_
->
ptr
();
void
*
dst
=
cpu_
.
data
();
void
*
dst
=
cpu_
.
data
();
memory
::
Copy
(
platform
::
CPUPlace
(),
dst
,
CUDAPlace
().
get
(),
src
,
paddle
::
memory
::
Copy
(
platform
::
CPUPlace
(),
dst
,
CUDAPlace
().
get
(),
src
,
gpu_
->
size
(),
stream
);
gpu_
->
size
(),
stream
);
dev_ctx
->
Wait
();
dev_ctx
->
Wait
();
}
}
...
@@ -261,7 +261,7 @@ class Vector {
...
@@ -261,7 +261,7 @@ class Vector {
auto
*
dev_ctx
=
static_cast
<
platform
::
CUDADeviceContext
*>
(
auto
*
dev_ctx
=
static_cast
<
platform
::
CUDADeviceContext
*>
(
platform
::
DeviceContextPool
::
Instance
().
Get
(
place
));
platform
::
DeviceContextPool
::
Instance
().
Get
(
place
));
auto
stream
=
dev_ctx
->
stream
();
auto
stream
=
dev_ctx
->
stream
();
memory
::
Copy
(
CUDAPlace
().
get
(),
dst
,
platform
::
CPUPlace
(),
src
,
paddle
::
memory
::
Copy
(
CUDAPlace
().
get
(),
dst
,
platform
::
CPUPlace
(),
src
,
gpu_
->
size
(),
stream
);
gpu_
->
size
(),
stream
);
}
}
...
@@ -284,7 +284,7 @@ class Vector {
...
@@ -284,7 +284,7 @@ class Vector {
bool
IsInCPU
()
const
{
return
flag_
&
kDataInCPU
;
}
bool
IsInCPU
()
const
{
return
flag_
&
kDataInCPU
;
}
mutable
std
::
vector
<
T
>
cpu_
;
mutable
std
::
vector
<
T
>
cpu_
;
mutable
memory
::
AllocationPtr
gpu_
;
mutable
paddle
::
memory
::
AllocationPtr
gpu_
;
mutable
int
flag_
;
mutable
int
flag_
;
mutable
std
::
mutex
mtx_
;
mutable
std
::
mutex
mtx_
;
...
...
paddle/fluid/framework/op_registry.h
浏览文件 @
5a6d7fe2
...
@@ -23,6 +23,7 @@ limitations under the License. */
...
@@ -23,6 +23,7 @@ limitations under the License. */
#include <unordered_map>
#include <unordered_map>
#include <unordered_set>
#include <unordered_set>
#define GLOG_NO_ABBREVIATED_SEVERITIES // msvc conflict logging with windows.h
#include "glog/logging.h" // For VLOG()
#include "glog/logging.h" // For VLOG()
#include "paddle/fluid/framework/attribute.h"
#include "paddle/fluid/framework/attribute.h"
#include "paddle/fluid/framework/details/op_registry.h"
#include "paddle/fluid/framework/details/op_registry.h"
...
...
paddle/fluid/inference/api/demo_ci/CMakeLists.txt
浏览文件 @
5a6d7fe2
...
@@ -89,12 +89,21 @@ endif()
...
@@ -89,12 +89,21 @@ endif()
if
(
WITH_MKL
)
if
(
WITH_MKL
)
include_directories
(
"
${
PADDLE_LIB
}
/third_party/install/mklml/include"
)
include_directories
(
"
${
PADDLE_LIB
}
/third_party/install/mklml/include"
)
if
(
NOT WIN32
)
set
(
MATH_LIB
${
PADDLE_LIB
}
/third_party/install/mklml/lib/libmklml_intel
${
CMAKE_SHARED_LIBRARY_SUFFIX
}
set
(
MATH_LIB
${
PADDLE_LIB
}
/third_party/install/mklml/lib/libmklml_intel
${
CMAKE_SHARED_LIBRARY_SUFFIX
}
${
PADDLE_LIB
}
/third_party/install/mklml/lib/libiomp5
${
CMAKE_SHARED_LIBRARY_SUFFIX
}
)
${
PADDLE_LIB
}
/third_party/install/mklml/lib/libiomp5
${
CMAKE_SHARED_LIBRARY_SUFFIX
}
)
else
(
WIN32
)
set
(
MATH_LIB
${
PADDLE_LIB
}
/third_party/install/mklml/lib/libmklml
${
CMAKE_SHARED_LIBRARY_SUFFIX
}
${
PADDLE_LIB
}
/third_party/install/mklml/lib/libiomp5md
${
CMAKE_SHARED_LIBRARY_SUFFIX
}
)
endif
(
WIN32
)
set
(
MKLDNN_PATH
"
${
PADDLE_LIB
}
/third_party/install/mkldnn"
)
set
(
MKLDNN_PATH
"
${
PADDLE_LIB
}
/third_party/install/mkldnn"
)
if
(
EXISTS
${
MKLDNN_PATH
}
)
if
(
EXISTS
${
MKLDNN_PATH
}
)
include_directories
(
"
${
MKLDNN_PATH
}
/include"
)
include_directories
(
"
${
MKLDNN_PATH
}
/include"
)
if
(
WIN32
)
set
(
MKLDNN_LIB
${
MKLDNN_PATH
}
/lib/mkldnn.lib
)
else
(
WIN32
)
set
(
MKLDNN_LIB
${
MKLDNN_PATH
}
/lib/libmkldnn.so.0
)
set
(
MKLDNN_LIB
${
MKLDNN_PATH
}
/lib/libmkldnn.so.0
)
endif
(
WIN32
)
endif
()
endif
()
else
()
else
()
set
(
MATH_LIB
${
PADDLE_LIB
}
/third_party/install/openblas/lib/libopenblas
${
CMAKE_STATIC_LIBRARY_SUFFIX
}
)
set
(
MATH_LIB
${
PADDLE_LIB
}
/third_party/install/openblas/lib/libopenblas
${
CMAKE_STATIC_LIBRARY_SUFFIX
}
)
...
...
paddle/fluid/memory/detail/system_allocator.cc
浏览文件 @
5a6d7fe2
...
@@ -17,7 +17,6 @@ limitations under the License. */
...
@@ -17,7 +17,6 @@ limitations under the License. */
#ifdef _WIN32
#ifdef _WIN32
#include <malloc.h>
#include <malloc.h>
#include <windows.h> // VirtualLock/VirtualUnlock
#else
#else
#include <sys/mman.h> // for mlock and munlock
#include <sys/mman.h> // for mlock and munlock
#endif
#endif
...
...
paddle/fluid/operators/CMakeLists.txt
浏览文件 @
5a6d7fe2
...
@@ -44,9 +44,8 @@ endif()
...
@@ -44,9 +44,8 @@ endif()
register_operators
(
EXCLUDES warpctc_op conv_fusion_op DEPS
${
OP_HEADER_DEPS
}
${
OP_PREFETCH_DEPS
}
)
register_operators
(
EXCLUDES warpctc_op conv_fusion_op DEPS
${
OP_HEADER_DEPS
}
${
OP_PREFETCH_DEPS
}
)
# warpctc_op needs cudnn 7 above
# warpctc_op needs cudnn 7 above
if
(
WITH_GPU
AND NOT WIN32
)
if
(
WITH_GPU
)
if
(
${
CUDNN_MAJOR_VERSION
}
VERSION_LESS 7
)
if
(
${
CUDNN_MAJOR_VERSION
}
VERSION_LESS 7
)
op_library
(
warpctc_op DEPS dynload_warpctc sequence_padding sequence_scale SRCS warpctc_op.cc warpctc_op.cu.cc
)
op_library
(
warpctc_op DEPS dynload_warpctc sequence_padding sequence_scale SRCS warpctc_op.cc warpctc_op.cu.cc
)
else
()
else
()
...
@@ -64,9 +63,7 @@ endif()
...
@@ -64,9 +63,7 @@ endif()
set
(
COMMON_OP_DEPS
${
OP_HEADER_DEPS
}
)
set
(
COMMON_OP_DEPS
${
OP_HEADER_DEPS
}
)
set
(
COMMON_OP_DEPS
${
COMMON_OP_DEPS
}
selected_rows_functor selected_rows lod_tensor maxouting unpooling pooling lod_rank_table context_project sequence_pooling executor
)
set
(
COMMON_OP_DEPS
${
COMMON_OP_DEPS
}
selected_rows_functor selected_rows lod_tensor maxouting unpooling pooling lod_rank_table context_project sequence_pooling executor
)
if
(
NOT WIN32
)
set
(
COMMON_OP_DEPS
${
COMMON_OP_DEPS
}
dynload_warpctc
)
set
(
COMMON_OP_DEPS
${
COMMON_OP_DEPS
}
dynload_warpctc
)
endif
()
set
(
COMMON_OP_DEPS
${
COMMON_OP_DEPS
}
sequence_padding sequence_scale cos_sim_functor memory jit_kernel concat_and_split cross_entropy softmax vol2col im2col sampler
)
set
(
COMMON_OP_DEPS
${
COMMON_OP_DEPS
}
sequence_padding sequence_scale cos_sim_functor memory jit_kernel concat_and_split cross_entropy softmax vol2col im2col sampler
)
set
(
COMMON_OP_DEPS
${
COMMON_OP_DEPS
}
sequence2batch lstm_compute matrix_bit_code gru_compute activation_functions
)
set
(
COMMON_OP_DEPS
${
COMMON_OP_DEPS
}
sequence2batch lstm_compute matrix_bit_code gru_compute activation_functions
)
if
(
WITH_GPU
)
if
(
WITH_GPU
)
...
...
paddle/fluid/operators/cum_op.h
浏览文件 @
5a6d7fe2
...
@@ -13,6 +13,8 @@ See the License for the specific language governing permissions and
...
@@ -13,6 +13,8 @@ See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#pragma once
#pragma once
#include <array>
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/operator.h"
...
...
paddle/fluid/operators/elementwise/elementwise_mul_mkldnn_op.cc
浏览文件 @
5a6d7fe2
...
@@ -19,6 +19,9 @@ limitations under the License. */
...
@@ -19,6 +19,9 @@ limitations under the License. */
#include "paddle/fluid/platform/mkldnn_helper.h"
#include "paddle/fluid/platform/mkldnn_helper.h"
#include "paddle/fluid/operators/math/jit_kernel.h"
#include "paddle/fluid/operators/math/jit_kernel.h"
#if defined(_WIN32) && defined(_WINSOCKAPI_)
#define _WINSOCK2API_
/* Prevent inclusion of winsock2.h */
#endif
#include "xbyak/xbyak.h"
#include "xbyak/xbyak.h"
#include "xbyak/xbyak_util.h"
#include "xbyak/xbyak_util.h"
...
...
paddle/fluid/operators/math/detail/lstm_cpu_kernel.h
浏览文件 @
5a6d7fe2
...
@@ -17,6 +17,12 @@ limitations under the License. */
...
@@ -17,6 +17,12 @@ limitations under the License. */
#include "paddle/fluid/operators/math/detail/activation_functions.h"
#include "paddle/fluid/operators/math/detail/activation_functions.h"
#include "paddle/fluid/operators/math/lstm_compute.h"
#include "paddle/fluid/operators/math/lstm_compute.h"
#if defined(_WIN32)
#if defined(__AVX2__) || defined(__AVX__)
inline
__m256
operator
+=
(
__m256
a
,
__m256
b
)
{
return
_mm256_add_ps
(
a
,
b
);
}
#endif
#endif
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
namespace
math
{
namespace
math
{
...
...
paddle/fluid/operators/math/jit_gen.h
浏览文件 @
5a6d7fe2
...
@@ -18,6 +18,9 @@ limitations under the License. */
...
@@ -18,6 +18,9 @@ limitations under the License. */
#include <type_traits>
#include <type_traits>
#include "paddle/fluid/platform/macros.h"
#include "paddle/fluid/platform/macros.h"
#if defined(_WIN32) && defined(_WINSOCKAPI_)
#define _WINSOCK2API_
/* Prevent inclusion of winsock2.h */
#endif
#define XBYAK_USE_MMAP_ALLOCATOR
#define XBYAK_USE_MMAP_ALLOCATOR
#include "xbyak/xbyak.h"
#include "xbyak/xbyak.h"
#include "xbyak/xbyak_util.h"
#include "xbyak/xbyak_util.h"
...
...
paddle/fluid/platform/cpu_info.cc
浏览文件 @
5a6d7fe2
...
@@ -14,6 +14,10 @@ limitations under the License. */
...
@@ -14,6 +14,10 @@ limitations under the License. */
#include "paddle/fluid/platform/cpu_info.h"
#include "paddle/fluid/platform/cpu_info.h"
#if defined(_WIN32)
#define NOMINMAX // msvc max/min macro conflict with std::min/max
#endif
#ifdef PADDLE_WITH_XBYAK
#ifdef PADDLE_WITH_XBYAK
#include "xbyak/xbyak.h"
#include "xbyak/xbyak.h"
#include "xbyak/xbyak_util.h"
#include "xbyak/xbyak_util.h"
...
@@ -22,9 +26,8 @@ limitations under the License. */
...
@@ -22,9 +26,8 @@ limitations under the License. */
#ifdef __APPLE__
#ifdef __APPLE__
#include <sys/sysctl.h>
#include <sys/sysctl.h>
#include <sys/types.h>
#include <sys/types.h>
#elif defined(_WIN32)
#elif defined(_WIN32)
#define
NOMINMAX // msvc max/min macro conflict with std::min/max
#define
WIN32_LEAN_AND_MEAN
#include <windows.h>
#include <windows.h>
#else
#else
#include <unistd.h>
#include <unistd.h>
...
...
paddle/fluid/platform/dynload/CMakeLists.txt
浏览文件 @
5a6d7fe2
...
@@ -16,9 +16,7 @@ if (CUPTI_FOUND)
...
@@ -16,9 +16,7 @@ if (CUPTI_FOUND)
list
(
APPEND CUDA_SRCS cupti.cc
)
list
(
APPEND CUDA_SRCS cupti.cc
)
endif
(
CUPTI_FOUND
)
endif
(
CUPTI_FOUND
)
nv_library
(
dynload_cuda SRCS
${
CUDA_SRCS
}
DEPS dynamic_loader
)
nv_library
(
dynload_cuda SRCS
${
CUDA_SRCS
}
DEPS dynamic_loader
)
if
(
NOT WIN32
)
cc_library
(
dynload_warpctc SRCS warpctc.cc DEPS dynamic_loader warpctc
)
cc_library
(
dynload_warpctc SRCS warpctc.cc DEPS dynamic_loader warpctc
)
endif
(
NOT WIN32
)
if
(
WITH_MKLML
)
if
(
WITH_MKLML
)
cc_library
(
dynload_mklml SRCS mklml.cc DEPS dynamic_loader mklml
)
cc_library
(
dynload_mklml SRCS mklml.cc DEPS dynamic_loader mklml
)
endif
()
endif
()
...
...
paddle/fluid/platform/dynload/cudnn.cc
浏览文件 @
5a6d7fe2
...
@@ -38,6 +38,10 @@ CUDNN_DNN_ROUTINE_EACH_AFTER_R4(DEFINE_WRAP);
...
@@ -38,6 +38,10 @@ CUDNN_DNN_ROUTINE_EACH_AFTER_R4(DEFINE_WRAP);
CUDNN_DNN_ROUTINE_EACH_R5
(
DEFINE_WRAP
);
CUDNN_DNN_ROUTINE_EACH_R5
(
DEFINE_WRAP
);
#endif
#endif
#ifdef CUDNN_DNN_ROUTINE_EACH_R6
CUDNN_DNN_ROUTINE_EACH_R6
(
DEFINE_WRAP
);
#endif
#ifdef CUDNN_DNN_ROUTINE_EACH_R7
#ifdef CUDNN_DNN_ROUTINE_EACH_R7
CUDNN_DNN_ROUTINE_EACH_R7
(
DEFINE_WRAP
);
CUDNN_DNN_ROUTINE_EACH_R7
(
DEFINE_WRAP
);
#endif
#endif
...
...
paddle/fluid/platform/dynload/cudnn.h
浏览文件 @
5a6d7fe2
...
@@ -34,7 +34,7 @@ extern void EnforceCUDNNLoaded(const char* fn_name);
...
@@ -34,7 +34,7 @@ extern void EnforceCUDNNLoaded(const char* fn_name);
#define DECLARE_DYNAMIC_LOAD_CUDNN_WRAP(__name) \
#define DECLARE_DYNAMIC_LOAD_CUDNN_WRAP(__name) \
struct DynLoad__##__name { \
struct DynLoad__##__name { \
template <typename... Args> \
template <typename... Args> \
auto operator()(Args... args) ->
decltype(__name(args...)) {
\
auto operator()(Args... args) ->
DECLARE_TYPE(__name, args...) {
\
using cudnn_func = decltype(&::__name); \
using cudnn_func = decltype(&::__name); \
std::call_once(cudnn_dso_flag, []() { \
std::call_once(cudnn_dso_flag, []() { \
cudnn_dso_handle = paddle::platform::dynload::GetCUDNNDsoHandle(); \
cudnn_dso_handle = paddle::platform::dynload::GetCUDNNDsoHandle(); \
...
...
paddle/fluid/platform/dynload/dynamic_loader.cc
浏览文件 @
5a6d7fe2
...
@@ -53,6 +53,12 @@ namespace platform {
...
@@ -53,6 +53,12 @@ namespace platform {
namespace
dynload
{
namespace
dynload
{
static
constexpr
char
cupti_lib_path
[]
=
CUPTI_LIB_PATH
;
static
constexpr
char
cupti_lib_path
[]
=
CUPTI_LIB_PATH
;
#if defined(_WIN32) && defined(PADDLE_WITH_CUDA)
static
constexpr
char
*
win_cublas_lib
=
"cublas64_"
PADDLE_CUDA_BINVER
".dll"
;
static
constexpr
char
*
win_curand_lib
=
"curand64_"
PADDLE_CUDA_BINVER
".dll"
;
static
constexpr
char
*
win_cudnn_lib
=
"cudnn64_"
PADDLE_CUDNN_BINVER
".dll"
;
#endif
static
inline
std
::
string
join
(
const
std
::
string
&
part1
,
static
inline
std
::
string
join
(
const
std
::
string
&
part1
,
const
std
::
string
&
part2
)
{
const
std
::
string
&
part2
)
{
// directory separator
// directory separator
...
@@ -165,6 +171,8 @@ static inline void* GetDsoHandleFromSearchPath(const std::string& search_root,
...
@@ -165,6 +171,8 @@ static inline void* GetDsoHandleFromSearchPath(const std::string& search_root,
void
*
GetCublasDsoHandle
()
{
void
*
GetCublasDsoHandle
()
{
#if defined(__APPLE__) || defined(__OSX__)
#if defined(__APPLE__) || defined(__OSX__)
return
GetDsoHandleFromSearchPath
(
FLAGS_cuda_dir
,
"libcublas.dylib"
);
return
GetDsoHandleFromSearchPath
(
FLAGS_cuda_dir
,
"libcublas.dylib"
);
#elif defined(_WIN32) && defined(PADDLE_WITH_CUDA)
return
GetDsoHandleFromSearchPath
(
FLAGS_cuda_dir
,
win_cublas_lib
);
#else
#else
return
GetDsoHandleFromSearchPath
(
FLAGS_cuda_dir
,
"libcublas.so"
);
return
GetDsoHandleFromSearchPath
(
FLAGS_cuda_dir
,
"libcublas.so"
);
#endif
#endif
...
@@ -173,6 +181,8 @@ void* GetCublasDsoHandle() {
...
@@ -173,6 +181,8 @@ void* GetCublasDsoHandle() {
void
*
GetCUDNNDsoHandle
()
{
void
*
GetCUDNNDsoHandle
()
{
#if defined(__APPLE__) || defined(__OSX__)
#if defined(__APPLE__) || defined(__OSX__)
return
GetDsoHandleFromSearchPath
(
FLAGS_cudnn_dir
,
"libcudnn.dylib"
,
false
);
return
GetDsoHandleFromSearchPath
(
FLAGS_cudnn_dir
,
"libcudnn.dylib"
,
false
);
#elif defined(_WIN32) && defined(PADDLE_WITH_CUDA)
return
GetDsoHandleFromSearchPath
(
FLAGS_cudnn_dir
,
win_cudnn_lib
);
#else
#else
return
GetDsoHandleFromSearchPath
(
FLAGS_cudnn_dir
,
"libcudnn.so"
,
false
);
return
GetDsoHandleFromSearchPath
(
FLAGS_cudnn_dir
,
"libcudnn.so"
,
false
);
#endif
#endif
...
@@ -193,6 +203,8 @@ void* GetCUPTIDsoHandle() {
...
@@ -193,6 +203,8 @@ void* GetCUPTIDsoHandle() {
void
*
GetCurandDsoHandle
()
{
void
*
GetCurandDsoHandle
()
{
#if defined(__APPLE__) || defined(__OSX__)
#if defined(__APPLE__) || defined(__OSX__)
return
GetDsoHandleFromSearchPath
(
FLAGS_cuda_dir
,
"libcurand.dylib"
);
return
GetDsoHandleFromSearchPath
(
FLAGS_cuda_dir
,
"libcurand.dylib"
);
#elif defined(_WIN32) && defined(PADDLE_WITH_CUDA)
return
GetDsoHandleFromSearchPath
(
FLAGS_cuda_dir
,
win_curand_lib
);
#else
#else
return
GetDsoHandleFromSearchPath
(
FLAGS_cuda_dir
,
"libcurand.so"
);
return
GetDsoHandleFromSearchPath
(
FLAGS_cuda_dir
,
"libcurand.so"
);
#endif
#endif
...
@@ -201,6 +213,8 @@ void* GetCurandDsoHandle() {
...
@@ -201,6 +213,8 @@ void* GetCurandDsoHandle() {
void
*
GetWarpCTCDsoHandle
()
{
void
*
GetWarpCTCDsoHandle
()
{
#if defined(__APPLE__) || defined(__OSX__)
#if defined(__APPLE__) || defined(__OSX__)
return
GetDsoHandleFromSearchPath
(
FLAGS_warpctc_dir
,
"libwarpctc.dylib"
);
return
GetDsoHandleFromSearchPath
(
FLAGS_warpctc_dir
,
"libwarpctc.dylib"
);
#elif defined(_WIN32)
return
GetDsoHandleFromSearchPath
(
FLAGS_warpctc_dir
,
"warpctc.dll"
);
#else
#else
return
GetDsoHandleFromSearchPath
(
FLAGS_warpctc_dir
,
"libwarpctc.so"
);
return
GetDsoHandleFromSearchPath
(
FLAGS_warpctc_dir
,
"libwarpctc.so"
);
#endif
#endif
...
@@ -225,6 +239,8 @@ void* GetTensorRtDsoHandle() {
...
@@ -225,6 +239,8 @@ void* GetTensorRtDsoHandle() {
void
*
GetMKLMLDsoHandle
()
{
void
*
GetMKLMLDsoHandle
()
{
#if defined(__APPLE__) || defined(__OSX__)
#if defined(__APPLE__) || defined(__OSX__)
return
GetDsoHandleFromSearchPath
(
FLAGS_mklml_dir
,
"libmklml_intel.dylib"
);
return
GetDsoHandleFromSearchPath
(
FLAGS_mklml_dir
,
"libmklml_intel.dylib"
);
#elif defined(_WIN32)
return
GetDsoHandleFromSearchPath
(
FLAGS_mklml_dir
,
"mklml.dll"
);
#else
#else
return
GetDsoHandleFromSearchPath
(
FLAGS_mklml_dir
,
"libmklml_intel.so"
);
return
GetDsoHandleFromSearchPath
(
FLAGS_mklml_dir
,
"libmklml_intel.so"
);
#endif
#endif
...
...
paddle/fluid/platform/dynload/dynamic_loader.h
浏览文件 @
5a6d7fe2
...
@@ -18,6 +18,12 @@ namespace paddle {
...
@@ -18,6 +18,12 @@ namespace paddle {
namespace
platform
{
namespace
platform
{
namespace
dynload
{
namespace
dynload
{
#ifndef _WIN32
#define DECLARE_TYPE(__name, ...) decltype(__name(__VA_ARGS__))
#else
#define DECLARE_TYPE(__name, ...) decltype(auto)
#endif
void
*
GetCublasDsoHandle
();
void
*
GetCublasDsoHandle
();
void
*
GetCUDNNDsoHandle
();
void
*
GetCUDNNDsoHandle
();
void
*
GetCUPTIDsoHandle
();
void
*
GetCUPTIDsoHandle
();
...
...
paddle/fluid/platform/dynload/mklml.h
浏览文件 @
5a6d7fe2
...
@@ -34,7 +34,7 @@ extern void* mklml_dso_handle;
...
@@ -34,7 +34,7 @@ extern void* mklml_dso_handle;
#define DYNAMIC_LOAD_MKLML_WRAP(__name) \
#define DYNAMIC_LOAD_MKLML_WRAP(__name) \
struct DynLoad__##__name { \
struct DynLoad__##__name { \
template <typename... Args> \
template <typename... Args> \
auto operator()(Args... args) ->
decltype(__name(args...)) {
\
auto operator()(Args... args) ->
DECLARE_TYPE(__name, args...) {
\
using mklmlFunc = decltype(&::__name); \
using mklmlFunc = decltype(&::__name); \
std::call_once(mklml_dso_flag, []() { \
std::call_once(mklml_dso_flag, []() { \
mklml_dso_handle = paddle::platform::dynload::GetMKLMLDsoHandle(); \
mklml_dso_handle = paddle::platform::dynload::GetMKLMLDsoHandle(); \
...
...
paddle/fluid/platform/dynload/tensorrt.h
浏览文件 @
5a6d7fe2
...
@@ -33,7 +33,7 @@ extern void* tensorrt_dso_handle;
...
@@ -33,7 +33,7 @@ extern void* tensorrt_dso_handle;
#define DECLARE_DYNAMIC_LOAD_TENSORRT_WRAP(__name) \
#define DECLARE_DYNAMIC_LOAD_TENSORRT_WRAP(__name) \
struct DynLoad__##__name { \
struct DynLoad__##__name { \
template <typename... Args> \
template <typename... Args> \
auto operator()(Args... args) ->
decltype(__name(args...)) {
\
auto operator()(Args... args) ->
DECLARE_TYPE(__name, args...) {
\
using tensorrt_func = decltype(__name(args...)) (*)(Args...); \
using tensorrt_func = decltype(__name(args...)) (*)(Args...); \
std::call_once(tensorrt_dso_flag, []() { \
std::call_once(tensorrt_dso_flag, []() { \
tensorrt_dso_handle = \
tensorrt_dso_handle = \
...
...
paddle/fluid/platform/dynload/warpctc.h
浏览文件 @
5a6d7fe2
...
@@ -34,7 +34,7 @@ extern void* warpctc_dso_handle;
...
@@ -34,7 +34,7 @@ extern void* warpctc_dso_handle;
#define DYNAMIC_LOAD_WARPCTC_WRAP(__name) \
#define DYNAMIC_LOAD_WARPCTC_WRAP(__name) \
struct DynLoad__##__name { \
struct DynLoad__##__name { \
template <typename... Args> \
template <typename... Args> \
auto operator()(Args... args) ->
decltype(__name(args...)) {
\
auto operator()(Args... args) ->
DECLARE_TYPE(__name, args...) {
\
using warpctcFunc = decltype(&::__name); \
using warpctcFunc = decltype(&::__name); \
std::call_once(warpctc_dso_flag, []() { \
std::call_once(warpctc_dso_flag, []() { \
warpctc_dso_handle = paddle::platform::dynload::GetWarpCTCDsoHandle(); \
warpctc_dso_handle = paddle::platform::dynload::GetWarpCTCDsoHandle(); \
...
...
paddle/fluid/platform/port.h
浏览文件 @
5a6d7fe2
...
@@ -37,6 +37,10 @@
...
@@ -37,6 +37,10 @@
#define GOOGLE_GLOG_DLL_DECL
#define GOOGLE_GLOG_DLL_DECL
#include <io.h> // _popen, _pclose
#include <io.h> // _popen, _pclose
#include <stdio.h>
#include <stdio.h>
#ifdef _WINSOCKAPI_
/* Prevent inclusion of winsock.h in windows.h */
#define WIN32_LEAN_AND_MEAN
#endif
#include <windows.h>
#include <windows.h>
#include <numeric> // std::accumulate in msvc
#include <numeric> // std::accumulate in msvc
#ifndef S_ISDIR // windows port for sys/stat.h
#ifndef S_ISDIR // windows port for sys/stat.h
...
@@ -55,7 +59,6 @@ static void *dlsym(void *handle, const char *symbol_name) {
...
@@ -55,7 +59,6 @@ static void *dlsym(void *handle, const char *symbol_name) {
static
void
*
dlopen
(
const
char
*
filename
,
int
flag
)
{
static
void
*
dlopen
(
const
char
*
filename
,
int
flag
)
{
std
::
string
file_name
(
filename
);
std
::
string
file_name
(
filename
);
file_name
.
replace
(
0
,
file_name
.
size
()
-
1
,
'/'
,
'\\'
);
HMODULE
hModule
=
LoadLibrary
(
file_name
.
c_str
());
HMODULE
hModule
=
LoadLibrary
(
file_name
.
c_str
());
if
(
!
hModule
)
{
if
(
!
hModule
)
{
throw
std
::
runtime_error
(
file_name
+
" not found."
);
throw
std
::
runtime_error
(
file_name
+
" not found."
);
...
...
paddle/fluid/train/demo/CMakeLists.txt
浏览文件 @
5a6d7fe2
...
@@ -35,16 +35,26 @@ add_executable(demo_trainer demo_trainer.cc)
...
@@ -35,16 +35,26 @@ add_executable(demo_trainer demo_trainer.cc)
if
(
WITH_MKLDNN
)
if
(
WITH_MKLDNN
)
include_directories
(
"
${
PADDLE_LIB
}
/third_party/install/mkldnn/include"
)
include_directories
(
"
${
PADDLE_LIB
}
/third_party/install/mkldnn/include"
)
if
(
WIN32
)
set
(
MKLDNN_LIB
${
PADDLE_LIB
}
/third_party/install/mkldnn/lib/mkldnn.lib
)
else
(
WIN32
)
set
(
MKLDNN_LIB
${
PADDLE_LIB
}
/third_party/install/mkldnn/lib/libmkldnn.so.0
)
set
(
MKLDNN_LIB
${
PADDLE_LIB
}
/third_party/install/mkldnn/lib/libmkldnn.so.0
)
endif
()
endif
(
WIN32
)
endif
(
WITH_MKLDNN
)
if
(
WITH_MKL
)
if
(
WITH_MKL
)
include_directories
(
"
${
PADDLE_LIB
}
/third_party/install/mklml/include"
)
include_directories
(
"
${
PADDLE_LIB
}
/third_party/install/mklml/include"
)
if
(
WIN32
)
set
(
MATH_LIB
${
PADDLE_LIB
}
/third_party/install/mklml/lib/mklml.lib
)
else
(
WIN32
)
set
(
MATH_LIB
${
PADDLE_LIB
}
/third_party/install/mklml/lib/libmklml_intel.so
)
set
(
MATH_LIB
${
PADDLE_LIB
}
/third_party/install/mklml/lib/libmklml_intel.so
)
endif
(
WIN32
)
else
()
else
()
if
(
APPLE
)
if
(
APPLE
)
set
(
MATH_LIB cblas
)
set
(
MATH_LIB cblas
)
else
(
APPLE
)
elseif
(
WIN32
)
set
(
MATH_LIB
${
PADDLE_LIB
}
/third_party/install/openblas/lib/libopenblas.lib
)
else
()
set
(
MATH_LIB
${
PADDLE_LIB
}
/third_party/install/openblas/lib/libopenblas.a
)
set
(
MATH_LIB
${
PADDLE_LIB
}
/third_party/install/openblas/lib/libopenblas.a
)
endif
(
APPLE
)
endif
(
APPLE
)
endif
()
endif
()
...
...
python/CMakeLists.txt
浏览文件 @
5a6d7fe2
...
@@ -48,12 +48,18 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.in
...
@@ -48,12 +48,18 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.in
IF
(
WIN32
)
IF
(
WIN32
)
# Python would use the .pyd by default under Windows series platform
# Python would use the .pyd by default under Windows series platform
set
(
FLUID_DST_DIR
${
PADDLE_BINARY_DIR
}
/python/paddle/fluid/
)
set
(
FLUID_DST_DIR
${
PADDLE_BINARY_DIR
}
/python/paddle/fluid/
)
get_filename_component
(
openblas_refpath
${
CBLAS_LIBRARIES
}
DIRECTORY
)
set
(
FLUID_CORE
${
FLUID_DST_DIR
}
/core.pyd
)
set
(
FLUID_CORE
${
FLUID_DST_DIR
}
/core.pyd
)
if
(
NOT WITH_MKLDNN
)
get_filename_component
(
openblas_refpath
${
CBLAS_LIBRARIES
}
DIRECTORY
)
add_custom_command
(
OUTPUT
${
FLUID_CORE
}
add_custom_command
(
OUTPUT
${
FLUID_CORE
}
COMMAND cmake -E copy $<TARGET_FILE:paddle_pybind>
${
FLUID_CORE
}
COMMAND cmake -E copy $<TARGET_FILE:paddle_pybind>
${
FLUID_CORE
}
COMMAND cmake -E copy
${
openblas_refpath
}
/openblas.dll
${
FLUID_DST_DIR
}
COMMAND cmake -E copy
${
openblas_refpath
}
/openblas.dll
${
FLUID_DST_DIR
}
DEPENDS paddle_pybind
)
DEPENDS paddle_pybind
)
else
(
NOT WITH_MKLDNN
)
add_custom_command
(
OUTPUT
${
FLUID_CORE
}
COMMAND cmake -E copy $<TARGET_FILE:paddle_pybind>
${
FLUID_CORE
}
DEPENDS paddle_pybind
)
endif
(
NOT WITH_MKLDNN
)
ELSE
()
ELSE
()
set
(
FLUID_CORE
${
PADDLE_BINARY_DIR
}
/python/paddle/fluid/core.so
)
set
(
FLUID_CORE
${
PADDLE_BINARY_DIR
}
/python/paddle/fluid/core.so
)
add_custom_command
(
OUTPUT
${
FLUID_CORE
}
add_custom_command
(
OUTPUT
${
FLUID_CORE
}
...
...
python/paddle/fluid/__init__.py
浏览文件 @
5a6d7fe2
...
@@ -102,6 +102,12 @@ def __bootstrap__():
...
@@ -102,6 +102,12 @@ def __bootstrap__():
import
sys
import
sys
import
os
import
os
import
platform
import
platform
if
os
.
name
==
'nt'
:
third_lib_path
=
os
.
path
.
abspath
(
os
.
path
.
dirname
(
__file__
))
+
os
.
sep
+
'..'
+
os
.
sep
+
'libs'
os
.
environ
[
'path'
]
+=
';'
+
third_lib_path
sys
.
path
.
append
(
third_lib_path
)
from
.
import
core
from
.
import
core
in_test
=
'unittest'
in
sys
.
modules
in_test
=
'unittest'
in
sys
.
modules
...
@@ -128,13 +134,12 @@ def __bootstrap__():
...
@@ -128,13 +134,12 @@ def __bootstrap__():
'free_idle_memory'
,
'paddle_num_threads'
,
"dist_threadpool_size"
,
'free_idle_memory'
,
'paddle_num_threads'
,
"dist_threadpool_size"
,
'eager_delete_tensor_gb'
,
'fast_eager_deletion_mode'
,
'eager_delete_tensor_gb'
,
'fast_eager_deletion_mode'
,
'allocator_strategy'
,
'reader_queue_speed_test_mode'
,
'allocator_strategy'
,
'reader_queue_speed_test_mode'
,
'print_sub_graph_dir'
,
'pe_profile_fname'
'print_sub_graph_dir'
,
'pe_profile_fname'
,
'warpctc_dir'
]
]
if
'Darwin'
not
in
sysstr
:
if
'Darwin'
not
in
sysstr
:
read_env_flags
.
append
(
'use_pinned_memory'
)
read_env_flags
.
append
(
'use_pinned_memory'
)
if
os
.
name
!=
'nt'
:
if
os
.
name
!=
'nt'
:
read_env_flags
.
append
(
'warpctc_dir'
)
read_env_flags
.
append
(
'cpu_deterministic'
)
read_env_flags
.
append
(
'cpu_deterministic'
)
if
core
.
is_compiled_with_dist
():
if
core
.
is_compiled_with_dist
():
...
...
python/paddle/fluid/framework.py
浏览文件 @
5a6d7fe2
...
@@ -16,6 +16,7 @@ from __future__ import print_function
...
@@ -16,6 +16,7 @@ from __future__ import print_function
import
collections
import
collections
import
contextlib
import
contextlib
import
os
import
re
import
re
import
six
import
six
import
sys
import
sys
...
@@ -27,6 +28,13 @@ from .proto import framework_pb2
...
@@ -27,6 +28,13 @@ from .proto import framework_pb2
try
:
try
:
from
.
import
core
from
.
import
core
except
ImportError
as
e
:
except
ImportError
as
e
:
if
os
.
name
==
'nt'
:
raise
ImportError
(
"""NOTE: You may need to run
\"
set PATH=c:\python27\lib:%PATH%
\"
if you encounters
\"
mkldnn.dll not found
\"
errors. If you have python
installed in other directory, replace
\"
c:\python27\lib" with your own
directory. The original error is:
\n
"""
+
cpt
.
get_exception_message
(
e
))
else
:
raise
ImportError
(
raise
ImportError
(
"""NOTE: You may need to run
\"
export LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH
\"
"""NOTE: You may need to run
\"
export LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH
\"
if you encounters
\"
libmkldnn.so not found
\"
errors. If you have python
if you encounters
\"
libmkldnn.so not found
\"
errors. If you have python
...
...
python/setup.py.in
浏览文件 @
5a6d7fe2
...
@@ -158,16 +158,18 @@ if '${WITH_FLUID_ONLY}'== 'OFF':
...
@@ -158,16 +158,18 @@ if '${WITH_FLUID_ONLY}'== 'OFF':
# put all thirdparty libraries in paddle.libs
# put all thirdparty libraries in paddle.libs
libs_path='${PADDLE_BINARY_DIR}/python/paddle/libs'
libs_path='${PADDLE_BINARY_DIR}/python/paddle/libs'
if os.name != 'nt':
package_data['paddle.libs']= []
package_data['paddle.libs']= []
package_data['paddle.libs']=['libwarpctc' + ext_name]
package_data['paddle.libs']=[('libwarpctc' if os.name != 'nt' else 'warpctc') + ext_name]
shutil.copy('${WARPCTC_LIBRARIES}', libs_path)
shutil.copy('${WARPCTC_LIBRARIES}', libs_path)
if '${WITH_MKL}' == 'ON':
if '${WITH_MKL}' == 'ON':
shutil.copy('${MKLML_LIB}', libs_path)
shutil.copy('${MKLML_
SHARED_
LIB}', libs_path)
shutil.copy('${MKLML_IOMP_LIB}', libs_path)
shutil.copy('${MKLML_
SHARED_
IOMP_LIB}', libs_path)
package_data['paddle.libs']+=[
'libmklml_intel' + ext_name,'libiomp5'
+ ext_name]
package_data['paddle.libs']+=[
('libmklml_intel' if os.name != 'nt' else 'mklml') + ext_name, ('libiomp5' if os.name != 'nt' else 'libiomp5md')
+ ext_name]
if '${WITH_MKLDNN}' == 'ON':
if '${WITH_MKLDNN}' == 'ON':
if '${CMAKE_BUILD_TYPE}' == 'Release':
if '${CMAKE_BUILD_TYPE}' == 'Release':
if os.name != 'nt':
# only change rpath in Release mode.
# only change rpath in Release mode.
# TODO(typhoonzero): use install_name_tool to patch mkl libs once
# TODO(typhoonzero): use install_name_tool to patch mkl libs once
# we can support mkl on mac.
# we can support mkl on mac.
...
@@ -178,7 +180,7 @@ if '${WITH_MKLDNN}' == 'ON':
...
@@ -178,7 +180,7 @@ if '${WITH_MKLDNN}' == 'ON':
command = "patchelf --set-rpath '$ORIGIN/' ${MKLDNN_SHARED_LIB}"
command = "patchelf --set-rpath '$ORIGIN/' ${MKLDNN_SHARED_LIB}"
if os.system(command) != 0:
if os.system(command) != 0:
raise Exception("patch libmkldnn.so failed, command: %s" % command)
raise Exception("patch libmkldnn.so failed, command: %s" % command)
package_data['paddle.libs']+=['libmkldnn.so.0']
package_data['paddle.libs']+=['libmkldnn.so.0'
if os.name != 'nt' else ('mkldnn' + ext_name)
]
shutil.copy('${MKLDNN_SHARED_LIB}', libs_path)
shutil.copy('${MKLDNN_SHARED_LIB}', libs_path)
if '${WITH_NGRAPH}' == 'ON':
if '${WITH_NGRAPH}' == 'ON':
# only change rpath in Release mode,
# only change rpath in Release mode,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录