Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
5a6d7fe2
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
5a6d7fe2
编写于
12月 18, 2018
作者:
P
peizhilin
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add mkl,ctc support for windows
上级
0f085f0a
变更
40
隐藏空白更改
内联
并排
Showing
40 changed file
with
315 addition
and
172 deletion
+315
-172
CMakeLists.txt
CMakeLists.txt
+4
-8
cmake/cuda.cmake
cmake/cuda.cmake
+3
-0
cmake/cudnn.cmake
cmake/cudnn.cmake
+1
-0
cmake/external/cub.cmake
cmake/external/cub.cmake
+1
-1
cmake/external/dlpack.cmake
cmake/external/dlpack.cmake
+1
-1
cmake/external/mkldnn.cmake
cmake/external/mkldnn.cmake
+31
-12
cmake/external/mklml.cmake
cmake/external/mklml.cmake
+50
-33
cmake/external/python.cmake
cmake/external/python.cmake
+7
-1
cmake/external/warpctc.cmake
cmake/external/warpctc.cmake
+25
-5
cmake/external/xbyak.cmake
cmake/external/xbyak.cmake
+2
-2
cmake/generic.cmake
cmake/generic.cmake
+5
-1
cmake/inference_lib.cmake
cmake/inference_lib.cmake
+8
-8
cmake/operators.cmake
cmake/operators.cmake
+1
-1
cmake/simd.cmake
cmake/simd.cmake
+35
-38
paddle/fluid/framework/CMakeLists.txt
paddle/fluid/framework/CMakeLists.txt
+1
-2
paddle/fluid/framework/details/all_reduce_op_handle.cc
paddle/fluid/framework/details/all_reduce_op_handle.cc
+1
-1
paddle/fluid/framework/mixed_vector.h
paddle/fluid/framework/mixed_vector.h
+5
-5
paddle/fluid/framework/op_registry.h
paddle/fluid/framework/op_registry.h
+2
-1
paddle/fluid/inference/api/demo_ci/CMakeLists.txt
paddle/fluid/inference/api/demo_ci/CMakeLists.txt
+12
-3
paddle/fluid/memory/detail/system_allocator.cc
paddle/fluid/memory/detail/system_allocator.cc
+0
-1
paddle/fluid/operators/CMakeLists.txt
paddle/fluid/operators/CMakeLists.txt
+2
-5
paddle/fluid/operators/cum_op.h
paddle/fluid/operators/cum_op.h
+2
-0
paddle/fluid/operators/elementwise/elementwise_mul_mkldnn_op.cc
.../fluid/operators/elementwise/elementwise_mul_mkldnn_op.cc
+3
-0
paddle/fluid/operators/math/detail/lstm_cpu_kernel.h
paddle/fluid/operators/math/detail/lstm_cpu_kernel.h
+6
-0
paddle/fluid/operators/math/jit_gen.h
paddle/fluid/operators/math/jit_gen.h
+3
-0
paddle/fluid/platform/cpu_info.cc
paddle/fluid/platform/cpu_info.cc
+5
-2
paddle/fluid/platform/dynload/CMakeLists.txt
paddle/fluid/platform/dynload/CMakeLists.txt
+0
-2
paddle/fluid/platform/dynload/cudnn.cc
paddle/fluid/platform/dynload/cudnn.cc
+4
-0
paddle/fluid/platform/dynload/cudnn.h
paddle/fluid/platform/dynload/cudnn.h
+1
-1
paddle/fluid/platform/dynload/dynamic_loader.cc
paddle/fluid/platform/dynload/dynamic_loader.cc
+16
-0
paddle/fluid/platform/dynload/dynamic_loader.h
paddle/fluid/platform/dynload/dynamic_loader.h
+6
-0
paddle/fluid/platform/dynload/mklml.h
paddle/fluid/platform/dynload/mklml.h
+1
-1
paddle/fluid/platform/dynload/tensorrt.h
paddle/fluid/platform/dynload/tensorrt.h
+1
-1
paddle/fluid/platform/dynload/warpctc.h
paddle/fluid/platform/dynload/warpctc.h
+1
-1
paddle/fluid/platform/port.h
paddle/fluid/platform/port.h
+4
-1
paddle/fluid/train/demo/CMakeLists.txt
paddle/fluid/train/demo/CMakeLists.txt
+14
-4
python/CMakeLists.txt
python/CMakeLists.txt
+11
-5
python/paddle/fluid/__init__.py
python/paddle/fluid/__init__.py
+7
-2
python/paddle/fluid/framework.py
python/paddle/fluid/framework.py
+13
-5
python/setup.py.in
python/setup.py.in
+20
-18
未找到文件。
CMakeLists.txt
浏览文件 @
5a6d7fe2
...
...
@@ -125,16 +125,12 @@ if(ANDROID OR IOS)
add_definitions
(
-DPADDLE_MOBILE_INFERENCE
)
endif
()
if
(
APPLE
OR WIN32
)
if
(
APPLE
)
set
(
WITH_MKL OFF CACHE STRING
"Disable MKL for building on mac
and windows
"
FORCE
)
"Disable MKL for building on mac"
FORCE
)
endif
()
if
(
WIN32
)
set
(
WITH_DSO OFF CACHE STRING
"Disable DSO when compiling for Windows"
FORCE
)
set
(
WITH_MKL OFF CACHE STRING
"Disable MKL when compiling for Windows"
FORCE
)
set
(
WITH_DISTRIBUTE OFF CACHE STRING
"Disable DISTRIBUTE when compiling for Windows"
FORCE
)
set
(
WITH_C_API OFF CACHE STRING
...
...
@@ -207,10 +203,10 @@ include(external/xxhash) # download xxhash
include
(
external/dlpack
)
include
(
external/snappy
)
# download snappy
include
(
external/snappystream
)
# download snappystream
include
(
external/warpctc
)
# download, build, install warpctc
if
(
NOT WIN32
)
# there is no official support of warpctc, nccl, cupti in windows
include
(
external/warpctc
)
# download, build, install warpctc
# there is no official support of nccl, cupti in windows
include
(
cupti
)
include
(
external/gzstream
)
endif
(
NOT WIN32
)
...
...
cmake/cuda.cmake
浏览文件 @
5a6d7fe2
...
...
@@ -139,10 +139,12 @@ endfunction()
message
(
STATUS
"CUDA detected: "
${
CUDA_VERSION
}
)
if
(
${
CUDA_VERSION
}
LESS 7.0
)
set
(
paddle_known_gpu_archs
${
paddle_known_gpu_archs
}
)
add_definitions
(
"-DPADDLE_CUDA_BINVER=
\"
60
\"
"
)
elseif
(
${
CUDA_VERSION
}
LESS 8.0
)
# CUDA 7.x
set
(
paddle_known_gpu_archs
${
paddle_known_gpu_archs7
}
)
list
(
APPEND CUDA_NVCC_FLAGS
"-D_MWAITXINTRIN_H_INCLUDED"
)
list
(
APPEND CUDA_NVCC_FLAGS
"-D__STRICT_ANSI__"
)
add_definitions
(
"-DPADDLE_CUDA_BINVER=
\"
70
\"
"
)
elseif
(
${
CUDA_VERSION
}
LESS 9.0
)
# CUDA 8.x
set
(
paddle_known_gpu_archs
${
paddle_known_gpu_archs8
}
)
list
(
APPEND CUDA_NVCC_FLAGS
"-D_MWAITXINTRIN_H_INCLUDED"
)
...
...
@@ -150,6 +152,7 @@ elseif (${CUDA_VERSION} LESS 9.0) # CUDA 8.x
# CUDA 8 may complain that sm_20 is no longer supported. Suppress the
# warning for now.
list
(
APPEND CUDA_NVCC_FLAGS
"-Wno-deprecated-gpu-targets"
)
add_definitions
(
"-DPADDLE_CUDA_BINVER=
\"
80
\"
"
)
endif
()
include_directories
(
${
CUDA_INCLUDE_DIRS
}
)
...
...
cmake/cudnn.cmake
浏览文件 @
5a6d7fe2
...
...
@@ -89,6 +89,7 @@ if(CUDNN_FOUND)
if
(
NOT CUDNN_MAJOR_VERSION
)
set
(
CUDNN_VERSION
"???"
)
else
()
add_definitions
(
"-DPADDLE_CUDNN_BINVER=
\"
${
CUDNN_MAJOR_VERSION
}
\"
"
)
math
(
EXPR CUDNN_VERSION
"
${
CUDNN_MAJOR_VERSION
}
* 1000 +
${
CUDNN_MINOR_VERSION
}
* 100 +
${
CUDNN_PATCHLEVEL_VERSION
}
"
)
...
...
cmake/external/cub.cmake
浏览文件 @
5a6d7fe2
...
...
@@ -32,4 +32,4 @@ endif()
add_dependencies
(
cub extern_cub
)
LIST
(
APPEND externl_project_dependencies cub
)
LIST
(
APPEND extern
a
l_project_dependencies cub
)
cmake/external/dlpack.cmake
浏览文件 @
5a6d7fe2
...
...
@@ -28,4 +28,4 @@ endif()
add_dependencies
(
dlpack extern_dlpack
)
LIST
(
APPEND externl_project_dependencies dlpack
)
LIST
(
APPEND extern
a
l_project_dependencies dlpack
)
cmake/external/mkldnn.cmake
浏览文件 @
5a6d7fe2
...
...
@@ -23,15 +23,14 @@ SET(MKLDNN_SOURCES_DIR ${THIRD_PARTY_PATH}/mkldnn)
SET
(
MKLDNN_INSTALL_DIR
${
THIRD_PARTY_PATH
}
/install/mkldnn
)
SET
(
MKLDNN_INC_DIR
"
${
MKLDNN_INSTALL_DIR
}
/include"
CACHE PATH
"mkldnn include directory."
FORCE
)
IF
(
WIN32 OR
APPLE
)
IF
(
APPLE
)
MESSAGE
(
WARNING
"
Windows or
Mac is not supported with MKLDNN in Paddle yet."
"Mac is not supported with MKLDNN in Paddle yet."
"Force WITH_MKLDNN=OFF"
)
SET
(
WITH_MKLDNN OFF CACHE STRING
"Disable MKLDNN in
Windows and
MacOS"
FORCE
)
SET
(
WITH_MKLDNN OFF CACHE STRING
"Disable MKLDNN in MacOS"
FORCE
)
return
()
ENDIF
()
SET
(
MKLDNN_LIB
"
${
MKLDNN_INSTALL_DIR
}
/lib/libmkldnn.so"
CACHE FILEPATH
"mkldnn library."
FORCE
)
MESSAGE
(
STATUS
"Set
${
MKLDNN_INSTALL_DIR
}
/lib to runtime path"
)
SET
(
CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE
)
SET
(
CMAKE_INSTALL_RPATH
"
${
CMAKE_INSTALL_RPATH
}
"
"
${
MKLDNN_INSTALL_DIR
}
/lib"
)
...
...
@@ -44,10 +43,14 @@ IF(${CBLAS_PROVIDER} STREQUAL "MKLML")
ELSE
()
MESSAGE
(
FATAL_ERROR
"Should enable MKLML when build MKLDNN"
)
ENDIF
()
SET
(
MKLDNN_FLAG
"-Wno-error=strict-overflow -Wno-error=unused-result -Wno-error=array-bounds"
)
SET
(
MKLDNN_FLAG
"
${
MKLDNN_FLAG
}
-Wno-unused-result -Wno-unused-value"
)
SET
(
MKLDNN_CFLAG
"
${
CMAKE_C_FLAGS
}
${
MKLDNN_FLAG
}
"
)
SET
(
MKLDNN_CXXFLAG
"
${
CMAKE_CXX_FLAGS
}
${
MKLDNN_FLAG
}
"
)
IF
(
NOT WIN32
)
SET
(
MKLDNN_FLAG
"-Wno-error=strict-overflow -Wno-error=unused-result -Wno-error=array-bounds"
)
SET
(
MKLDNN_FLAG
"
${
MKLDNN_FLAG
}
-Wno-unused-result -Wno-unused-value"
)
SET
(
MKLDNN_CFLAG
"
${
CMAKE_C_FLAGS
}
${
MKLDNN_FLAG
}
"
)
SET
(
MKLDNN_CXXFLAG
"
${
CMAKE_CXX_FLAGS
}
${
MKLDNN_FLAG
}
"
)
ENDIF
(
NOT WIN32
)
ExternalProject_Add
(
${
MKLDNN_PROJECT
}
${
EXTERNAL_PROJECT_LOG_ARGS
}
...
...
@@ -58,8 +61,15 @@ ExternalProject_Add(
UPDATE_COMMAND
""
CMAKE_ARGS -DCMAKE_CXX_COMPILER=
${
CMAKE_CXX_COMPILER
}
CMAKE_ARGS -DCMAKE_C_COMPILER=
${
CMAKE_C_COMPILER
}
CMAKE_ARGS -DCMAKE_CXX_FLAGS=
${
CMAKE_CXX_FLAGS
}
CMAKE_ARGS -DCMAKE_CXX_FLAGS_RELEASE=
${
CMAKE_CXX_FLAGS_RELEASE
}
CMAKE_ARGS -DCMAKE_CXX_FLAGS_DEBUG=
${
CMAKE_CXX_FLAGS_DEBUG
}
CMAKE_ARGS -DCMAKE_C_FLAGS=
${
CMAKE_C_FLAGS
}
CMAKE_ARGS -DCMAKE_C_FLAGS_DEBUG=
${
CMAKE_C_FLAGS_DEBUG
}
CMAKE_ARGS -DCMAKE_C_FLAGS_RELEASE=
${
CMAKE_C_FLAGS_RELEASE
}
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=
${
MKLDNN_INSTALL_DIR
}
CMAKE_ARGS -DCMAKE_BUILD_TYPE=
${
CMAKE_BUILD_TYPE
}
CMAKE_ARGS -DCMAKE_POSITION_INDEPENDENT_CODE=ON
CMAKE_ARGS -DMKLROOT=
${
MKLML_ROOT
}
CMAKE_ARGS -DCMAKE_C_FLAGS=
${
MKLDNN_CFLAG
}
CMAKE_ARGS -DCMAKE_CXX_FLAGS=
${
MKLDNN_CXXFLAG
}
...
...
@@ -67,6 +77,11 @@ ExternalProject_Add(
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=
${
MKLDNN_INSTALL_DIR
}
-DMKLROOT:PATH=
${
MKLML_ROOT
}
)
if
(
WIN32
)
SET
(
MKLDNN_LIB
"
${
MKLDNN_INSTALL_DIR
}
/lib/mkldnn.lib"
CACHE FILEPATH
"mkldnn library."
FORCE
)
else
(
WIN32
)
SET
(
MKLDNN_LIB
"
${
MKLDNN_INSTALL_DIR
}
/lib/libmkldnn.so"
CACHE FILEPATH
"mkldnn library."
FORCE
)
endif
(
WIN32
)
ADD_LIBRARY
(
shared_mkldnn SHARED IMPORTED GLOBAL
)
SET_PROPERTY
(
TARGET shared_mkldnn PROPERTY IMPORTED_LOCATION
${
MKLDNN_LIB
}
)
...
...
@@ -85,10 +100,14 @@ ADD_DEPENDENCIES(mkldnn ${MKLDNN_PROJECT})
# copy the real so.0 lib to install dir
# it can be directly contained in wheel or capi
SET
(
MKLDNN_SHARED_LIB
${
MKLDNN_INSTALL_DIR
}
/libmkldnn.so.0
)
ADD_CUSTOM_COMMAND
(
OUTPUT
${
MKLDNN_SHARED_LIB
}
COMMAND cp
${
MKLDNN_LIB
}
${
MKLDNN_SHARED_LIB
}
DEPENDS mkldnn
)
if
(
WIN32
)
SET
(
MKLDNN_SHARED_LIB
${
MKLDNN_INSTALL_DIR
}
/lib/mkldnn.dll
)
else
(
WIN32
)
SET
(
MKLDNN_SHARED_LIB
${
MKLDNN_INSTALL_DIR
}
/libmkldnn.so.0
)
ADD_CUSTOM_COMMAND
(
OUTPUT
${
MKLDNN_SHARED_LIB
}
COMMAND
${
CMAKE_COMMAND
}
-E copy
${
MKLDNN_LIB
}
${
MKLDNN_SHARED_LIB
}
DEPENDS mkldnn
)
endif
(
WIN32
)
ADD_CUSTOM_TARGET
(
mkldnn_shared_lib ALL DEPENDS
${
MKLDNN_SHARED_LIB
}
)
IF
(
WITH_C_API
)
...
...
cmake/external/mklml.cmake
浏览文件 @
5a6d7fe2
...
...
@@ -16,59 +16,76 @@ IF(NOT ${WITH_MKLML})
return
()
ENDIF
(
NOT
${
WITH_MKLML
}
)
IF
(
WIN32 OR
APPLE
)
IF
(
APPLE
)
MESSAGE
(
WARNING
"
Windows or
Mac is not supported with MKLML in Paddle yet."
"Mac is not supported with MKLML in Paddle yet."
"Force WITH_MKLML=OFF"
)
SET
(
WITH_MKLML OFF CACHE STRING
"Disable MKLML package in Windows and MacOS"
FORCE
)
return
()
ENDIF
()
INCLUDE
(
ExternalProject
)
SET
(
MKLML_PROJECT
"extern_mklml"
)
IF
((
NOT DEFINED MKLML_VER
)
OR
(
NOT DEFINED MKLML_URL
))
MESSAGE
(
STATUS
"use pre defined download url"
)
SET
(
MKLML_VER
"mklml_lnx_2019.0.20180710"
CACHE STRING
""
FORCE
)
SET
(
MKLML_URL
"http://paddlepaddledeps.cdn.bcebos.com/
${
MKLML_VER
}
.tgz"
CACHE STRING
""
FORCE
)
ENDIF
()
MESSAGE
(
STATUS
"MKLML_VER:
${
MKLML_VER
}
, MKLML_URL:
${
MKLML_URL
}
"
)
SET
(
MKLML_SOURCE_DIR
"
${
THIRD_PARTY_PATH
}
/mklml"
)
SET
(
MKLML_DOWNLOAD_DIR
"
${
MKLML_SOURCE_DIR
}
/src/
${
MKLML_PROJECT
}
"
)
SET
(
MKLML_DST_DIR
"mklml"
)
SET
(
MKLML_INSTALL_ROOT
"
${
THIRD_PARTY_PATH
}
/install"
)
SET
(
MKLML_INSTALL_DIR
${
MKLML_INSTALL_ROOT
}
/
${
MKLML_DST_DIR
}
)
SET
(
MKLML_ROOT
${
MKLML_INSTALL_DIR
}
)
SET
(
MKLML_INC_DIR
${
MKLML_ROOT
}
/include
)
SET
(
MKLML_LIB_DIR
${
MKLML_ROOT
}
/lib
)
SET
(
MKLML_LIB
${
MKLML_LIB_DIR
}
/libmklml_intel.so
)
SET
(
MKLML_IOMP_LIB
${
MKLML_LIB_DIR
}
/libiomp5.so
)
if
(
WIN32
)
SET
(
MKLML_LIB
${
MKLML_LIB_DIR
}
/mklml.lib
)
SET
(
MKLML_IOMP_LIB
${
MKLML_LIB_DIR
}
/libiomp5md.lib
)
SET
(
MKLML_SHARED_LIB
${
MKLML_LIB_DIR
}
/mklml.dll
)
SET
(
MKLML_SHARED_IOMP_LIB
${
MKLML_LIB_DIR
}
/libiomp5md.dll
)
else
()
SET
(
MKLML_LIB
${
MKLML_LIB_DIR
}
/libmklml_intel.so
)
SET
(
MKLML_IOMP_LIB
${
MKLML_LIB_DIR
}
/libiomp5.so
)
SET
(
MKLML_SHARED_LIB
${
MKLML_LIB_DIR
}
/libmklml_intel.so
)
SET
(
MKLML_SHARED_IOMP_LIB
${
MKLML_LIB_DIR
}
/libiomp5.so
)
endif
()
SET
(
CMAKE_INSTALL_RPATH
"
${
CMAKE_INSTALL_RPATH
}
"
"
${
MKLML_ROOT
}
/lib"
)
INCLUDE_DIRECTORIES
(
${
MKLML_INC_DIR
}
)
if
(
WIN32
)
MESSAGE
(
WARNING
"Please download the MKLML and and put it at "
${
THIRD_PARTY_PATH
}
/install/mklml
)
else
()
SET
(
MKLML_PROJECT
"extern_mklml"
)
IF
((
NOT DEFINED MKLML_VER
)
OR
(
NOT DEFINED MKLML_URL
))
MESSAGE
(
STATUS
"use pre defined download url"
)
SET
(
MKLML_VER
"mklml_lnx_2019.0.20180710"
CACHE STRING
""
FORCE
)
SET
(
MKLML_URL
"http://paddlepaddledeps.cdn.bcebos.com/
${
MKLML_VER
}
.tgz"
CACHE STRING
""
FORCE
)
ENDIF
()
MESSAGE
(
STATUS
"MKLML_VER:
${
MKLML_VER
}
, MKLML_URL:
${
MKLML_URL
}
"
)
SET
(
MKLML_SOURCE_DIR
"
${
THIRD_PARTY_PATH
}
/mklml"
)
SET
(
MKLML_DOWNLOAD_DIR
"
${
MKLML_SOURCE_DIR
}
/src/
${
MKLML_PROJECT
}
"
)
FILE
(
WRITE
${
MKLML_DOWNLOAD_DIR
}
/CMakeLists.txt
"PROJECT(MKLML)
\n
"
"cmake_minimum_required(VERSION 3.0)
\n
"
"install(DIRECTORY
${
MKLML_VER
}
/include
${
MKLML_VER
}
/lib
\n
"
" DESTINATION
${
MKLML_DST_DIR
}
)
\n
"
)
FILE
(
WRITE
${
MKLML_DOWNLOAD_DIR
}
/CMakeLists.txt
"PROJECT(MKLML)
\n
"
"cmake_minimum_required(VERSION 3.0)
\n
"
"install(DIRECTORY
${
MKLML_VER
}
/include
${
MKLML_VER
}
/lib
\n
"
" DESTINATION
${
MKLML_DST_DIR
}
)
\n
"
)
ExternalProject_Add
(
${
MKLML_PROJECT
}
${
EXTERNAL_PROJECT_LOG_ARGS
}
PREFIX
${
MKLML_SOURCE_DIR
}
DOWNLOAD_DIR
${
MKLML_DOWNLOAD_DIR
}
DOWNLOAD_COMMAND wget --no-check-certificate
${
MKLML_URL
}
-c -q -O
${
MKLML_VER
}
.tgz
&& tar zxf
${
MKLML_VER
}
.tgz
DOWNLOAD_NO_PROGRESS 1
UPDATE_COMMAND
""
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=
${
MKLML_INSTALL_ROOT
}
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=
${
MKLML_INSTALL_ROOT
}
)
ExternalProject_Add
(
${
MKLML_PROJECT
}
${
EXTERNAL_PROJECT_LOG_ARGS
}
PREFIX
${
MKLML_SOURCE_DIR
}
DOWNLOAD_DIR
${
MKLML_DOWNLOAD_DIR
}
DOWNLOAD_COMMAND wget --no-check-certificate
${
MKLML_URL
}
-c -q -O
${
MKLML_VER
}
.tgz
&& tar zxf
${
MKLML_VER
}
.tgz
DOWNLOAD_NO_PROGRESS 1
UPDATE_COMMAND
""
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=
${
MKLML_INSTALL_ROOT
}
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=
${
MKLML_INSTALL_ROOT
}
)
endif
()
INCLUDE_DIRECTORIES
(
${
MKLML_INC_DIR
}
)
ADD_LIBRARY
(
mklml SHARED IMPORTED GLOBAL
)
SET_PROPERTY
(
TARGET mklml PROPERTY IMPORTED_LOCATION
${
MKLML_LIB
}
)
ADD_DEPENDENCIES
(
mklml
${
MKLML_PROJECT
}
)
if
(
NOT WIN32
)
ADD_DEPENDENCIES
(
mklml
${
MKLML_PROJECT
}
)
endif
()
LIST
(
APPEND external_project_dependencies mklml
)
IF
(
WITH_C_API
)
...
...
cmake/external/python.cmake
浏览文件 @
5a6d7fe2
...
...
@@ -23,9 +23,12 @@ FIND_PACKAGE(PythonLibs ${PY_VERSION})
if
(
WIN32
)
execute_process
(
COMMAND
"
${
PYTHON_EXECUTABLE
}
"
"-c"
"from distutils import sysconfig as s;import sys;import struct;
"from distutils import sysconfig as s;import sys;import struct;
import sysconfig;
print(sys.prefix);
print(s.get_config_var('LDVERSION') or s.get_config_var('VERSION'));
print(sysconfig.get_platform());
print(sysconfig.get_config_var('py_version_nodot'));
print(sysconfig.get_config_var('SOABI'));
"
RESULT_VARIABLE _PYTHON_SUCCESS
OUTPUT_VARIABLE _PYTHON_VALUES
...
...
@@ -41,6 +44,9 @@ print(s.get_config_var('LDVERSION') or s.get_config_var('VERSION'));
string
(
REGEX REPLACE
"
\n
"
";"
_PYTHON_VALUES
${
_PYTHON_VALUES
}
)
list
(
GET _PYTHON_VALUES 0 PYTHON_PREFIX
)
list
(
GET _PYTHON_VALUES 1 PYTHON_LIBRARY_SUFFIX
)
list
(
GET _PYTHON_VALUES 2 SYS_PLATFORM
)
list
(
GET _PYTHON_VALUES 3 PYTHON_SHORT_VERSION_NODOT
)
list
(
GET _PYTHON_VALUES 4 PYTHON_SOABI
)
# Make sure all directory separators are '/'
string
(
REGEX REPLACE
"
\\\\
"
"/"
PYTHON_PREFIX
${
PYTHON_PREFIX
}
)
...
...
cmake/external/warpctc.cmake
浏览文件 @
5a6d7fe2
...
...
@@ -26,25 +26,33 @@ SET(WARPCTC_INCLUDE_DIR "${WARPCTC_INSTALL_DIR}/include"
# Used in unit test test_WarpCTCLayer
SET
(
WARPCTC_LIB_DIR
"
${
WARPCTC_INSTALL_DIR
}
/lib"
CACHE PATH
"Warp-ctc Library Directory"
FORCE
)
SET
(
WARPCTC_LIBRARIES
"
${
WARPCTC_INSTALL_DIR
}
/lib/libwarpctc
${
CMAKE_SHARED_LIBRARY_SUFFIX
}
"
CACHE FILEPATH
"Warp-ctc Library"
FORCE
)
IF
(
CMAKE_CXX_COMPILER_ID STREQUAL
"Clang"
OR CMAKE_CXX_COMPILER_ID STREQUAL
"AppleClang"
)
IF
(
CMAKE_CXX_COMPILER_ID STREQUAL
"Clang"
OR CMAKE_CXX_COMPILER_ID STREQUAL
"AppleClang"
OR WIN32
)
SET
(
USE_OMP OFF
)
ELSE
()
SET
(
USE_OMP ON
)
ENDIF
()
IF
(
WIN32
)
SET
(
WARPCTC_REPOSITORY
"https://github.com/wopeizl/warp-ctc.git"
)
ELSE
()
SET
(
WARPCTC_REPOSITORY
"https://github.com/dzhwinter/warp-ctc.git"
)
ENDIF
()
ExternalProject_Add
(
extern_warpctc
${
EXTERNAL_PROJECT_LOG_ARGS
}
GIT_REPOSITORY
"https://github.com/dzhwinter/warp-ctc.git"
GIT_REPOSITORY
${
WARPCTC_REPOSITORY
}
PREFIX
${
WARPCTC_SOURCES_DIR
}
UPDATE_COMMAND
""
CMAKE_ARGS -DCMAKE_CXX_COMPILER=
${
CMAKE_CXX_COMPILER
}
-DCMAKE_C_COMPILER=
${
CMAKE_C_COMPILER
}
-DCMAKE_CXX_FLAGS=
${
CMAKE_CXX_FLAGS
}
-DCMAKE_C_FLAGS=
${
CMAKE_C_FLAGS
}
-DCMAKE_C_FLAGS_DEBUG=
${
CMAKE_C_FLAGS_DEBUG
}
-DCMAKE_C_FLAGS_RELEASE=
${
CMAKE_C_FLAGS_RELEASE
}
-DCMAKE_CXX_FLAGS=
${
CMAKE_CXX_FLAGS
}
-DCMAKE_CXX_FLAGS_RELEASE=
${
CMAKE_CXX_FLAGS_RELEASE
}
-DCMAKE_CXX_FLAGS_DEBUG=
${
CMAKE_CXX_FLAGS_DEBUG
}
-DCMAKE_INSTALL_PREFIX=
${
WARPCTC_INSTALL_DIR
}
-DWITH_GPU=
${
WITH_GPU
}
-DWITH_OMP=
${
USE_OMP
}
...
...
@@ -59,6 +67,18 @@ ExternalProject_Add(
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-DCMAKE_INSTALL_PREFIX:PATH=
${
WARPCTC_INSTALL_DIR
}
)
IF
(
WIN32
)
IF
(
NOT EXISTS
"
${
WARPCTC_INSTALL_DIR
}
/lib/warpctc
${
CMAKE_SHARED_LIBRARY_SUFFIX
}
"
)
add_custom_command
(
TARGET extern_warpctc POST_BUILD
COMMAND cmake -E copy
${
WARPCTC_INSTALL_DIR
}
/bin/warpctc
${
CMAKE_SHARED_LIBRARY_SUFFIX
}
${
WARPCTC_INSTALL_DIR
}
/lib/warpctc
${
CMAKE_SHARED_LIBRARY_SUFFIX
}
)
ENDIF
()
SET
(
WARPCTC_LIBRARIES
"
${
WARPCTC_INSTALL_DIR
}
/lib/warpctc
${
CMAKE_SHARED_LIBRARY_SUFFIX
}
"
CACHE FILEPATH
"Warp-ctc Library"
FORCE
)
else
(
WIN32
)
SET
(
WARPCTC_LIBRARIES
"
${
WARPCTC_INSTALL_DIR
}
/lib/libwarpctc
${
CMAKE_SHARED_LIBRARY_SUFFIX
}
"
CACHE FILEPATH
"Warp-ctc Library"
FORCE
)
ENDIF
(
WIN32
)
MESSAGE
(
STATUS
"warp-ctc library:
${
WARPCTC_LIBRARIES
}
"
)
INCLUDE_DIRECTORIES
(
${
WARPCTC_INCLUDE_DIR
}
)
# For warpctc code to include its headers.
...
...
cmake/external/xbyak.cmake
浏览文件 @
5a6d7fe2
...
...
@@ -13,8 +13,8 @@
# limitations under the License.
set
(
WITH_XBYAK ON
)
if
(
WIN32 OR
APPLE
)
SET
(
WITH_XBYAK OFF CACHE STRING
"Disable XBYAK in
Windows and
MacOS"
FORCE
)
if
(
APPLE
)
SET
(
WITH_XBYAK OFF CACHE STRING
"Disable XBYAK in MacOS"
FORCE
)
return
()
endif
()
...
...
cmake/generic.cmake
浏览文件 @
5a6d7fe2
...
...
@@ -267,7 +267,11 @@ function(cc_library TARGET_NAME)
list
(
APPEND cc_library_DEPS dynload_mklml
)
endif
()
add_dependencies
(
${
TARGET_NAME
}
mklml
)
target_link_libraries
(
${
TARGET_NAME
}
"-L
${
MKLML_LIB_DIR
}
-liomp5 -Wl,--as-needed"
)
if
(
WIN32
)
target_link_libraries
(
${
TARGET_NAME
}
${
MKLML_IOMP_LIB
}
)
else
(
WIN32
)
target_link_libraries
(
${
TARGET_NAME
}
"-L
${
MKLML_LIB_DIR
}
-liomp5 -Wl,--as-needed"
)
endif
(
WIN32
)
endif
()
# remove link to python, see notes at:
# https://github.com/pybind/pybind11/blob/master/docs/compiling.rst#building-manually
...
...
cmake/inference_lib.cmake
浏览文件 @
5a6d7fe2
...
...
@@ -115,20 +115,20 @@ if (NOT PROTOBUF_FOUND OR WIN32)
)
endif
()
if
(
NOT CBLAS_FOUND
)
set
(
dst_dir
"
${
FLUID_INSTALL_DIR
}
/third_party/install/openblas"
)
copy
(
openblas_lib
SRCS
${
CBLAS_INSTALL_DIR
}
/lib
${
CBLAS_INSTALL_DIR
}
/include
DSTS
${
dst_dir
}
${
dst_dir
}
DEPS extern_openblas
)
elseif
(
WITH_MKLML
)
if
(
WITH_MKLML
)
set
(
dst_dir
"
${
FLUID_INSTALL_DIR
}
/third_party/install/mklml"
)
copy
(
mklml_lib
SRCS
${
MKLML_LIB
}
${
MKLML_IOMP_LIB
}
${
MKLML_INC_DIR
}
DSTS
${
dst_dir
}
/lib
${
dst_dir
}
/lib
${
dst_dir
}
DEPS mklml
)
elseif
(
NOT CBLAS_FOUND OR WIN32
)
set
(
dst_dir
"
${
FLUID_INSTALL_DIR
}
/third_party/install/openblas"
)
copy
(
openblas_lib
SRCS
${
CBLAS_INSTALL_DIR
}
/lib
${
CBLAS_INSTALL_DIR
}
/include
DSTS
${
dst_dir
}
${
dst_dir
}
DEPS extern_openblas
)
endif
()
if
(
WITH_MKLDNN
)
...
...
cmake/operators.cmake
浏览文件 @
5a6d7fe2
...
...
@@ -84,7 +84,7 @@ function(op_library TARGET)
endif
()
if
(
WIN32
)
# remove windows unsupported op, because windows has no nccl, no warpctc such ops.
foreach
(
windows_unsupport_op
"nccl_op"
"gen_nccl_id_op"
"warpctc_op"
)
foreach
(
windows_unsupport_op
"nccl_op"
"gen_nccl_id_op"
)
if
(
"
${
TARGET
}
"
STREQUAL
"
${
windows_unsupport_op
}
"
)
return
()
endif
()
...
...
cmake/simd.cmake
浏览文件 @
5a6d7fe2
...
...
@@ -57,46 +57,43 @@ int main()
return 0;
}"
SSE3_FOUND
)
# disable AVX by default on windows
if
(
NOT WIN32
)
# Check AVX
set
(
CMAKE_REQUIRED_FLAGS
${
AVX_FLAG
}
)
set
(
AVX_FOUND_EXITCODE 1 CACHE STRING
"Result from TRY_RUN"
FORCE
)
CHECK_CXX_SOURCE_RUNS
(
"
#include <immintrin.h>
int main()
{
__m256 a = _mm256_set_ps (-1.0f, 2.0f, -3.0f, 4.0f, -1.0f, 2.0f, -3.0f, 4.0f);
__m256 b = _mm256_set_ps (1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f);
__m256 result = _mm256_add_ps (a, b);
return 0;
}"
AVX_FOUND
)
# Check AVX
set
(
CMAKE_REQUIRED_FLAGS
${
AVX_FLAG
}
)
set
(
AVX_FOUND_EXITCODE 1 CACHE STRING
"Result from TRY_RUN"
FORCE
)
CHECK_CXX_SOURCE_RUNS
(
"
#include <immintrin.h>
int main()
{
__m256 a = _mm256_set_ps (-1.0f, 2.0f, -3.0f, 4.0f, -1.0f, 2.0f, -3.0f, 4.0f);
__m256 b = _mm256_set_ps (1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f);
__m256 result = _mm256_add_ps (a, b);
return 0;
}"
AVX_FOUND
)
# Check AVX 2
set
(
CMAKE_REQUIRED_FLAGS
${
AVX2_FLAG
}
)
set
(
AVX2_FOUND_EXITCODE 1 CACHE STRING
"Result from TRY_RUN"
FORCE
)
CHECK_CXX_SOURCE_RUNS
(
"
#include <immintrin.h>
int main()
{
__m256i a = _mm256_set_epi32 (-1, 2, -3, 4, -1, 2, -3, 4);
__m256i result = _mm256_abs_epi32 (a);
return 0;
}"
AVX2_FOUND
)
# Check AVX 2
set
(
CMAKE_REQUIRED_FLAGS
${
AVX2_FLAG
}
)
set
(
AVX2_FOUND_EXITCODE 1 CACHE STRING
"Result from TRY_RUN"
FORCE
)
CHECK_CXX_SOURCE_RUNS
(
"
#include <immintrin.h>
int main()
{
__m256i a = _mm256_set_epi32 (-1, 2, -3, 4, -1, 2, -3, 4);
__m256i result = _mm256_abs_epi32 (a);
return 0;
}"
AVX2_FOUND
)
# Check AVX512F
set
(
CMAKE_REQUIRED_FLAGS
${
AVX512F_FLAG
}
)
set
(
AVX512F_FOUND_EXITCODE 1 CACHE STRING
"Result from TRY_RUN"
FORCE
)
CHECK_CXX_SOURCE_RUNS
(
"
#include <immintrin.h>
int main()
{
__m512i a = _mm512_set_epi32 (-1, 2, -3, 4, -1, 2, -3, 4,
13, -5, 6, -7, 9, 2, -6, 3);
__m512i result = _mm512_abs_epi32 (a);
return 0;
}"
AVX512F_FOUND
)
endif
(
NOT WIN32
)
# Check AVX512F
set
(
CMAKE_REQUIRED_FLAGS
${
AVX512F_FLAG
}
)
set
(
AVX512F_FOUND_EXITCODE 1 CACHE STRING
"Result from TRY_RUN"
FORCE
)
CHECK_CXX_SOURCE_RUNS
(
"
#include <immintrin.h>
int main()
{
__m512i a = _mm512_set_epi32 (-1, 2, -3, 4, -1, 2, -3, 4,
13, -5, 6, -7, 9, 2, -6, 3);
__m512i result = _mm512_abs_epi32 (a);
return 0;
}"
AVX512F_FOUND
)
set
(
CMAKE_REQUIRED_FLAGS
${
CMAKE_REQUIRED_FLAGS_RETAINED
}
)
mark_as_advanced
(
MMX_FOUND SSE2_FOUND SSE3_FOUND AVX_FOUND AVX2_FOUND AVX512F_FOUND
)
paddle/fluid/framework/CMakeLists.txt
浏览文件 @
5a6d7fe2
...
...
@@ -15,8 +15,7 @@ function(windows_symbolic TARGET)
file
(
GENERATE OUTPUT
${
final_path
}
/.
${
src
}
.cu INPUT
${
final_path
}
/
${
src
}
.cc
)
add_custom_command
(
OUTPUT
${
final_path
}
/.
${
src
}
.cu
COMMAND
${
CMAKE_COMMAND
}
-E remove
${
final_path
}
/.
${
src
}
.cu
COMMAND
${
CMAKE_COMMAND
}
-E copy
"
${
final_path
}
/
${
src
}
.cc"
"
${
final_path
}
/.
${
src
}
.cu"
COMMAND
${
CMAKE_COMMAND
}
-E copy_if_different
"
${
final_path
}
/
${
src
}
.cc"
"
${
final_path
}
/.
${
src
}
.cu"
COMMENT
"create hidden file of
${
src
}
.cu"
)
add_custom_target
(
${
TARGET
}
ALL DEPENDS .
${
src
}
.cu
)
endforeach
()
...
...
paddle/fluid/framework/details/all_reduce_op_handle.cc
浏览文件 @
5a6d7fe2
...
...
@@ -50,7 +50,7 @@ void AllReduceOpHandle::RunImpl() {
// FIXME(typhoonzero): If scope0(global scope) have NCCL_ID_VAR,
// this is a distributed or inter-process call, find a better way.
#if
def PADDLE_WITH_CUDA
#if
defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
if
(
NoDummyInputSize
()
==
1
&&
local_scopes_
[
0
]
->
FindLocalVar
(
NCCL_ID_VARNAME
)
==
nullptr
)
{
#else
...
...
paddle/fluid/framework/mixed_vector.h
浏览文件 @
5a6d7fe2
...
...
@@ -215,8 +215,8 @@ class Vector {
auto
stream
=
dev_ctx
->
stream
();
void
*
src
=
gpu_
->
ptr
();
void
*
dst
=
cpu_
.
data
();
memory
::
Copy
(
platform
::
CPUPlace
(),
dst
,
CUDAPlace
().
get
(),
src
,
gpu_
->
size
(),
stream
);
paddle
::
memory
::
Copy
(
platform
::
CPUPlace
(),
dst
,
CUDAPlace
().
get
(),
src
,
gpu_
->
size
(),
stream
);
dev_ctx
->
Wait
();
}
...
...
@@ -261,8 +261,8 @@ class Vector {
auto
*
dev_ctx
=
static_cast
<
platform
::
CUDADeviceContext
*>
(
platform
::
DeviceContextPool
::
Instance
().
Get
(
place
));
auto
stream
=
dev_ctx
->
stream
();
memory
::
Copy
(
CUDAPlace
().
get
(),
dst
,
platform
::
CPUPlace
(),
src
,
gpu_
->
size
(),
stream
);
paddle
::
memory
::
Copy
(
CUDAPlace
().
get
(),
dst
,
platform
::
CPUPlace
(),
src
,
gpu_
->
size
(),
stream
);
}
void
ImmutableCPU
()
const
{
...
...
@@ -284,7 +284,7 @@ class Vector {
bool
IsInCPU
()
const
{
return
flag_
&
kDataInCPU
;
}
mutable
std
::
vector
<
T
>
cpu_
;
mutable
memory
::
AllocationPtr
gpu_
;
mutable
paddle
::
memory
::
AllocationPtr
gpu_
;
mutable
int
flag_
;
mutable
std
::
mutex
mtx_
;
...
...
paddle/fluid/framework/op_registry.h
浏览文件 @
5a6d7fe2
...
...
@@ -23,7 +23,8 @@ limitations under the License. */
#include <unordered_map>
#include <unordered_set>
#include "glog/logging.h" // For VLOG()
#define GLOG_NO_ABBREVIATED_SEVERITIES // msvc conflict logging with windows.h
#include "glog/logging.h" // For VLOG()
#include "paddle/fluid/framework/attribute.h"
#include "paddle/fluid/framework/details/op_registry.h"
#include "paddle/fluid/framework/framework.pb.h"
...
...
paddle/fluid/inference/api/demo_ci/CMakeLists.txt
浏览文件 @
5a6d7fe2
...
...
@@ -89,12 +89,21 @@ endif()
if
(
WITH_MKL
)
include_directories
(
"
${
PADDLE_LIB
}
/third_party/install/mklml/include"
)
set
(
MATH_LIB
${
PADDLE_LIB
}
/third_party/install/mklml/lib/libmklml_intel
${
CMAKE_SHARED_LIBRARY_SUFFIX
}
${
PADDLE_LIB
}
/third_party/install/mklml/lib/libiomp5
${
CMAKE_SHARED_LIBRARY_SUFFIX
}
)
if
(
NOT WIN32
)
set
(
MATH_LIB
${
PADDLE_LIB
}
/third_party/install/mklml/lib/libmklml_intel
${
CMAKE_SHARED_LIBRARY_SUFFIX
}
${
PADDLE_LIB
}
/third_party/install/mklml/lib/libiomp5
${
CMAKE_SHARED_LIBRARY_SUFFIX
}
)
else
(
WIN32
)
set
(
MATH_LIB
${
PADDLE_LIB
}
/third_party/install/mklml/lib/libmklml
${
CMAKE_SHARED_LIBRARY_SUFFIX
}
${
PADDLE_LIB
}
/third_party/install/mklml/lib/libiomp5md
${
CMAKE_SHARED_LIBRARY_SUFFIX
}
)
endif
(
WIN32
)
set
(
MKLDNN_PATH
"
${
PADDLE_LIB
}
/third_party/install/mkldnn"
)
if
(
EXISTS
${
MKLDNN_PATH
}
)
include_directories
(
"
${
MKLDNN_PATH
}
/include"
)
set
(
MKLDNN_LIB
${
MKLDNN_PATH
}
/lib/libmkldnn.so.0
)
if
(
WIN32
)
set
(
MKLDNN_LIB
${
MKLDNN_PATH
}
/lib/mkldnn.lib
)
else
(
WIN32
)
set
(
MKLDNN_LIB
${
MKLDNN_PATH
}
/lib/libmkldnn.so.0
)
endif
(
WIN32
)
endif
()
else
()
set
(
MATH_LIB
${
PADDLE_LIB
}
/third_party/install/openblas/lib/libopenblas
${
CMAKE_STATIC_LIBRARY_SUFFIX
}
)
...
...
paddle/fluid/memory/detail/system_allocator.cc
浏览文件 @
5a6d7fe2
...
...
@@ -17,7 +17,6 @@ limitations under the License. */
#ifdef _WIN32
#include <malloc.h>
#include <windows.h> // VirtualLock/VirtualUnlock
#else
#include <sys/mman.h> // for mlock and munlock
#endif
...
...
paddle/fluid/operators/CMakeLists.txt
浏览文件 @
5a6d7fe2
...
...
@@ -44,9 +44,8 @@ endif()
register_operators
(
EXCLUDES warpctc_op conv_fusion_op DEPS
${
OP_HEADER_DEPS
}
${
OP_PREFETCH_DEPS
}
)
# warpctc_op needs cudnn 7 above
if
(
WITH_GPU
AND NOT WIN32
)
if
(
WITH_GPU
)
if
(
${
CUDNN_MAJOR_VERSION
}
VERSION_LESS 7
)
op_library
(
warpctc_op DEPS dynload_warpctc sequence_padding sequence_scale SRCS warpctc_op.cc warpctc_op.cu.cc
)
else
()
...
...
@@ -64,9 +63,7 @@ endif()
set
(
COMMON_OP_DEPS
${
OP_HEADER_DEPS
}
)
set
(
COMMON_OP_DEPS
${
COMMON_OP_DEPS
}
selected_rows_functor selected_rows lod_tensor maxouting unpooling pooling lod_rank_table context_project sequence_pooling executor
)
if
(
NOT WIN32
)
set
(
COMMON_OP_DEPS
${
COMMON_OP_DEPS
}
dynload_warpctc
)
endif
()
set
(
COMMON_OP_DEPS
${
COMMON_OP_DEPS
}
dynload_warpctc
)
set
(
COMMON_OP_DEPS
${
COMMON_OP_DEPS
}
sequence_padding sequence_scale cos_sim_functor memory jit_kernel concat_and_split cross_entropy softmax vol2col im2col sampler
)
set
(
COMMON_OP_DEPS
${
COMMON_OP_DEPS
}
sequence2batch lstm_compute matrix_bit_code gru_compute activation_functions
)
if
(
WITH_GPU
)
...
...
paddle/fluid/operators/cum_op.h
浏览文件 @
5a6d7fe2
...
...
@@ -13,6 +13,8 @@ See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <array>
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
...
...
paddle/fluid/operators/elementwise/elementwise_mul_mkldnn_op.cc
浏览文件 @
5a6d7fe2
...
...
@@ -19,6 +19,9 @@ limitations under the License. */
#include "paddle/fluid/platform/mkldnn_helper.h"
#include "paddle/fluid/operators/math/jit_kernel.h"
#if defined(_WIN32) && defined(_WINSOCKAPI_)
#define _WINSOCK2API_
/* Prevent inclusion of winsock2.h */
#endif
#include "xbyak/xbyak.h"
#include "xbyak/xbyak_util.h"
...
...
paddle/fluid/operators/math/detail/lstm_cpu_kernel.h
浏览文件 @
5a6d7fe2
...
...
@@ -17,6 +17,12 @@ limitations under the License. */
#include "paddle/fluid/operators/math/detail/activation_functions.h"
#include "paddle/fluid/operators/math/lstm_compute.h"
#if defined(_WIN32)
#if defined(__AVX2__) || defined(__AVX__)
inline
__m256
operator
+=
(
__m256
a
,
__m256
b
)
{
return
_mm256_add_ps
(
a
,
b
);
}
#endif
#endif
namespace
paddle
{
namespace
operators
{
namespace
math
{
...
...
paddle/fluid/operators/math/jit_gen.h
浏览文件 @
5a6d7fe2
...
...
@@ -18,6 +18,9 @@ limitations under the License. */
#include <type_traits>
#include "paddle/fluid/platform/macros.h"
#if defined(_WIN32) && defined(_WINSOCKAPI_)
#define _WINSOCK2API_
/* Prevent inclusion of winsock2.h */
#endif
#define XBYAK_USE_MMAP_ALLOCATOR
#include "xbyak/xbyak.h"
#include "xbyak/xbyak_util.h"
...
...
paddle/fluid/platform/cpu_info.cc
浏览文件 @
5a6d7fe2
...
...
@@ -14,6 +14,10 @@ limitations under the License. */
#include "paddle/fluid/platform/cpu_info.h"
#if defined(_WIN32)
#define NOMINMAX // msvc max/min macro conflict with std::min/max
#endif
#ifdef PADDLE_WITH_XBYAK
#include "xbyak/xbyak.h"
#include "xbyak/xbyak_util.h"
...
...
@@ -22,9 +26,8 @@ limitations under the License. */
#ifdef __APPLE__
#include <sys/sysctl.h>
#include <sys/types.h>
#elif defined(_WIN32)
#define
NOMINMAX // msvc max/min macro conflict with std::min/max
#define
WIN32_LEAN_AND_MEAN
#include <windows.h>
#else
#include <unistd.h>
...
...
paddle/fluid/platform/dynload/CMakeLists.txt
浏览文件 @
5a6d7fe2
...
...
@@ -16,9 +16,7 @@ if (CUPTI_FOUND)
list
(
APPEND CUDA_SRCS cupti.cc
)
endif
(
CUPTI_FOUND
)
nv_library
(
dynload_cuda SRCS
${
CUDA_SRCS
}
DEPS dynamic_loader
)
if
(
NOT WIN32
)
cc_library
(
dynload_warpctc SRCS warpctc.cc DEPS dynamic_loader warpctc
)
endif
(
NOT WIN32
)
if
(
WITH_MKLML
)
cc_library
(
dynload_mklml SRCS mklml.cc DEPS dynamic_loader mklml
)
endif
()
...
...
paddle/fluid/platform/dynload/cudnn.cc
浏览文件 @
5a6d7fe2
...
...
@@ -38,6 +38,10 @@ CUDNN_DNN_ROUTINE_EACH_AFTER_R4(DEFINE_WRAP);
CUDNN_DNN_ROUTINE_EACH_R5
(
DEFINE_WRAP
);
#endif
#ifdef CUDNN_DNN_ROUTINE_EACH_R6
CUDNN_DNN_ROUTINE_EACH_R6
(
DEFINE_WRAP
);
#endif
#ifdef CUDNN_DNN_ROUTINE_EACH_R7
CUDNN_DNN_ROUTINE_EACH_R7
(
DEFINE_WRAP
);
#endif
...
...
paddle/fluid/platform/dynload/cudnn.h
浏览文件 @
5a6d7fe2
...
...
@@ -34,7 +34,7 @@ extern void EnforceCUDNNLoaded(const char* fn_name);
#define DECLARE_DYNAMIC_LOAD_CUDNN_WRAP(__name) \
struct DynLoad__##__name { \
template <typename... Args> \
auto operator()(Args... args) ->
decltype(__name(args...)) {
\
auto operator()(Args... args) ->
DECLARE_TYPE(__name, args...) {
\
using cudnn_func = decltype(&::__name); \
std::call_once(cudnn_dso_flag, []() { \
cudnn_dso_handle = paddle::platform::dynload::GetCUDNNDsoHandle(); \
...
...
paddle/fluid/platform/dynload/dynamic_loader.cc
浏览文件 @
5a6d7fe2
...
...
@@ -53,6 +53,12 @@ namespace platform {
namespace
dynload
{
static
constexpr
char
cupti_lib_path
[]
=
CUPTI_LIB_PATH
;
#if defined(_WIN32) && defined(PADDLE_WITH_CUDA)
static
constexpr
char
*
win_cublas_lib
=
"cublas64_"
PADDLE_CUDA_BINVER
".dll"
;
static
constexpr
char
*
win_curand_lib
=
"curand64_"
PADDLE_CUDA_BINVER
".dll"
;
static
constexpr
char
*
win_cudnn_lib
=
"cudnn64_"
PADDLE_CUDNN_BINVER
".dll"
;
#endif
static
inline
std
::
string
join
(
const
std
::
string
&
part1
,
const
std
::
string
&
part2
)
{
// directory separator
...
...
@@ -165,6 +171,8 @@ static inline void* GetDsoHandleFromSearchPath(const std::string& search_root,
void
*
GetCublasDsoHandle
()
{
#if defined(__APPLE__) || defined(__OSX__)
return
GetDsoHandleFromSearchPath
(
FLAGS_cuda_dir
,
"libcublas.dylib"
);
#elif defined(_WIN32) && defined(PADDLE_WITH_CUDA)
return
GetDsoHandleFromSearchPath
(
FLAGS_cuda_dir
,
win_cublas_lib
);
#else
return
GetDsoHandleFromSearchPath
(
FLAGS_cuda_dir
,
"libcublas.so"
);
#endif
...
...
@@ -173,6 +181,8 @@ void* GetCublasDsoHandle() {
void
*
GetCUDNNDsoHandle
()
{
#if defined(__APPLE__) || defined(__OSX__)
return
GetDsoHandleFromSearchPath
(
FLAGS_cudnn_dir
,
"libcudnn.dylib"
,
false
);
#elif defined(_WIN32) && defined(PADDLE_WITH_CUDA)
return
GetDsoHandleFromSearchPath
(
FLAGS_cudnn_dir
,
win_cudnn_lib
);
#else
return
GetDsoHandleFromSearchPath
(
FLAGS_cudnn_dir
,
"libcudnn.so"
,
false
);
#endif
...
...
@@ -193,6 +203,8 @@ void* GetCUPTIDsoHandle() {
void
*
GetCurandDsoHandle
()
{
#if defined(__APPLE__) || defined(__OSX__)
return
GetDsoHandleFromSearchPath
(
FLAGS_cuda_dir
,
"libcurand.dylib"
);
#elif defined(_WIN32) && defined(PADDLE_WITH_CUDA)
return
GetDsoHandleFromSearchPath
(
FLAGS_cuda_dir
,
win_curand_lib
);
#else
return
GetDsoHandleFromSearchPath
(
FLAGS_cuda_dir
,
"libcurand.so"
);
#endif
...
...
@@ -201,6 +213,8 @@ void* GetCurandDsoHandle() {
void
*
GetWarpCTCDsoHandle
()
{
#if defined(__APPLE__) || defined(__OSX__)
return
GetDsoHandleFromSearchPath
(
FLAGS_warpctc_dir
,
"libwarpctc.dylib"
);
#elif defined(_WIN32)
return
GetDsoHandleFromSearchPath
(
FLAGS_warpctc_dir
,
"warpctc.dll"
);
#else
return
GetDsoHandleFromSearchPath
(
FLAGS_warpctc_dir
,
"libwarpctc.so"
);
#endif
...
...
@@ -225,6 +239,8 @@ void* GetTensorRtDsoHandle() {
void
*
GetMKLMLDsoHandle
()
{
#if defined(__APPLE__) || defined(__OSX__)
return
GetDsoHandleFromSearchPath
(
FLAGS_mklml_dir
,
"libmklml_intel.dylib"
);
#elif defined(_WIN32)
return
GetDsoHandleFromSearchPath
(
FLAGS_mklml_dir
,
"mklml.dll"
);
#else
return
GetDsoHandleFromSearchPath
(
FLAGS_mklml_dir
,
"libmklml_intel.so"
);
#endif
...
...
paddle/fluid/platform/dynload/dynamic_loader.h
浏览文件 @
5a6d7fe2
...
...
@@ -18,6 +18,12 @@ namespace paddle {
namespace
platform
{
namespace
dynload
{
#ifndef _WIN32
#define DECLARE_TYPE(__name, ...) decltype(__name(__VA_ARGS__))
#else
#define DECLARE_TYPE(__name, ...) decltype(auto)
#endif
void
*
GetCublasDsoHandle
();
void
*
GetCUDNNDsoHandle
();
void
*
GetCUPTIDsoHandle
();
...
...
paddle/fluid/platform/dynload/mklml.h
浏览文件 @
5a6d7fe2
...
...
@@ -34,7 +34,7 @@ extern void* mklml_dso_handle;
#define DYNAMIC_LOAD_MKLML_WRAP(__name) \
struct DynLoad__##__name { \
template <typename... Args> \
auto operator()(Args... args) ->
decltype(__name(args...)) {
\
auto operator()(Args... args) ->
DECLARE_TYPE(__name, args...) {
\
using mklmlFunc = decltype(&::__name); \
std::call_once(mklml_dso_flag, []() { \
mklml_dso_handle = paddle::platform::dynload::GetMKLMLDsoHandle(); \
...
...
paddle/fluid/platform/dynload/tensorrt.h
浏览文件 @
5a6d7fe2
...
...
@@ -33,7 +33,7 @@ extern void* tensorrt_dso_handle;
#define DECLARE_DYNAMIC_LOAD_TENSORRT_WRAP(__name) \
struct DynLoad__##__name { \
template <typename... Args> \
auto operator()(Args... args) ->
decltype(__name(args...)) {
\
auto operator()(Args... args) ->
DECLARE_TYPE(__name, args...) {
\
using tensorrt_func = decltype(__name(args...)) (*)(Args...); \
std::call_once(tensorrt_dso_flag, []() { \
tensorrt_dso_handle = \
...
...
paddle/fluid/platform/dynload/warpctc.h
浏览文件 @
5a6d7fe2
...
...
@@ -34,7 +34,7 @@ extern void* warpctc_dso_handle;
#define DYNAMIC_LOAD_WARPCTC_WRAP(__name) \
struct DynLoad__##__name { \
template <typename... Args> \
auto operator()(Args... args) ->
decltype(__name(args...)) {
\
auto operator()(Args... args) ->
DECLARE_TYPE(__name, args...) {
\
using warpctcFunc = decltype(&::__name); \
std::call_once(warpctc_dso_flag, []() { \
warpctc_dso_handle = paddle::platform::dynload::GetWarpCTCDsoHandle(); \
...
...
paddle/fluid/platform/port.h
浏览文件 @
5a6d7fe2
...
...
@@ -37,6 +37,10 @@
#define GOOGLE_GLOG_DLL_DECL
#include <io.h> // _popen, _pclose
#include <stdio.h>
#ifdef _WINSOCKAPI_
/* Prevent inclusion of winsock.h in windows.h */
#define WIN32_LEAN_AND_MEAN
#endif
#include <windows.h>
#include <numeric> // std::accumulate in msvc
#ifndef S_ISDIR // windows port for sys/stat.h
...
...
@@ -55,7 +59,6 @@ static void *dlsym(void *handle, const char *symbol_name) {
static
void
*
dlopen
(
const
char
*
filename
,
int
flag
)
{
std
::
string
file_name
(
filename
);
file_name
.
replace
(
0
,
file_name
.
size
()
-
1
,
'/'
,
'\\'
);
HMODULE
hModule
=
LoadLibrary
(
file_name
.
c_str
());
if
(
!
hModule
)
{
throw
std
::
runtime_error
(
file_name
+
" not found."
);
...
...
paddle/fluid/train/demo/CMakeLists.txt
浏览文件 @
5a6d7fe2
...
...
@@ -35,16 +35,26 @@ add_executable(demo_trainer demo_trainer.cc)
if
(
WITH_MKLDNN
)
include_directories
(
"
${
PADDLE_LIB
}
/third_party/install/mkldnn/include"
)
set
(
MKLDNN_LIB
${
PADDLE_LIB
}
/third_party/install/mkldnn/lib/libmkldnn.so.0
)
endif
()
if
(
WIN32
)
set
(
MKLDNN_LIB
${
PADDLE_LIB
}
/third_party/install/mkldnn/lib/mkldnn.lib
)
else
(
WIN32
)
set
(
MKLDNN_LIB
${
PADDLE_LIB
}
/third_party/install/mkldnn/lib/libmkldnn.so.0
)
endif
(
WIN32
)
endif
(
WITH_MKLDNN
)
if
(
WITH_MKL
)
include_directories
(
"
${
PADDLE_LIB
}
/third_party/install/mklml/include"
)
set
(
MATH_LIB
${
PADDLE_LIB
}
/third_party/install/mklml/lib/libmklml_intel.so
)
if
(
WIN32
)
set
(
MATH_LIB
${
PADDLE_LIB
}
/third_party/install/mklml/lib/mklml.lib
)
else
(
WIN32
)
set
(
MATH_LIB
${
PADDLE_LIB
}
/third_party/install/mklml/lib/libmklml_intel.so
)
endif
(
WIN32
)
else
()
if
(
APPLE
)
set
(
MATH_LIB cblas
)
else
(
APPLE
)
elseif
(
WIN32
)
set
(
MATH_LIB
${
PADDLE_LIB
}
/third_party/install/openblas/lib/libopenblas.lib
)
else
()
set
(
MATH_LIB
${
PADDLE_LIB
}
/third_party/install/openblas/lib/libopenblas.a
)
endif
(
APPLE
)
endif
()
...
...
python/CMakeLists.txt
浏览文件 @
5a6d7fe2
...
...
@@ -48,12 +48,18 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.in
IF
(
WIN32
)
# Python would use the .pyd by default under Windows series platform
set
(
FLUID_DST_DIR
${
PADDLE_BINARY_DIR
}
/python/paddle/fluid/
)
get_filename_component
(
openblas_refpath
${
CBLAS_LIBRARIES
}
DIRECTORY
)
set
(
FLUID_CORE
${
FLUID_DST_DIR
}
/core.pyd
)
add_custom_command
(
OUTPUT
${
FLUID_CORE
}
COMMAND cmake -E copy $<TARGET_FILE:paddle_pybind>
${
FLUID_CORE
}
COMMAND cmake -E copy
${
openblas_refpath
}
/openblas.dll
${
FLUID_DST_DIR
}
DEPENDS paddle_pybind
)
if
(
NOT WITH_MKLDNN
)
get_filename_component
(
openblas_refpath
${
CBLAS_LIBRARIES
}
DIRECTORY
)
add_custom_command
(
OUTPUT
${
FLUID_CORE
}
COMMAND cmake -E copy $<TARGET_FILE:paddle_pybind>
${
FLUID_CORE
}
COMMAND cmake -E copy
${
openblas_refpath
}
/openblas.dll
${
FLUID_DST_DIR
}
DEPENDS paddle_pybind
)
else
(
NOT WITH_MKLDNN
)
add_custom_command
(
OUTPUT
${
FLUID_CORE
}
COMMAND cmake -E copy $<TARGET_FILE:paddle_pybind>
${
FLUID_CORE
}
DEPENDS paddle_pybind
)
endif
(
NOT WITH_MKLDNN
)
ELSE
()
set
(
FLUID_CORE
${
PADDLE_BINARY_DIR
}
/python/paddle/fluid/core.so
)
add_custom_command
(
OUTPUT
${
FLUID_CORE
}
...
...
python/paddle/fluid/__init__.py
浏览文件 @
5a6d7fe2
...
...
@@ -102,6 +102,12 @@ def __bootstrap__():
import
sys
import
os
import
platform
if
os
.
name
==
'nt'
:
third_lib_path
=
os
.
path
.
abspath
(
os
.
path
.
dirname
(
__file__
))
+
os
.
sep
+
'..'
+
os
.
sep
+
'libs'
os
.
environ
[
'path'
]
+=
';'
+
third_lib_path
sys
.
path
.
append
(
third_lib_path
)
from
.
import
core
in_test
=
'unittest'
in
sys
.
modules
...
...
@@ -128,13 +134,12 @@ def __bootstrap__():
'free_idle_memory'
,
'paddle_num_threads'
,
"dist_threadpool_size"
,
'eager_delete_tensor_gb'
,
'fast_eager_deletion_mode'
,
'allocator_strategy'
,
'reader_queue_speed_test_mode'
,
'print_sub_graph_dir'
,
'pe_profile_fname'
'print_sub_graph_dir'
,
'pe_profile_fname'
,
'warpctc_dir'
]
if
'Darwin'
not
in
sysstr
:
read_env_flags
.
append
(
'use_pinned_memory'
)
if
os
.
name
!=
'nt'
:
read_env_flags
.
append
(
'warpctc_dir'
)
read_env_flags
.
append
(
'cpu_deterministic'
)
if
core
.
is_compiled_with_dist
():
...
...
python/paddle/fluid/framework.py
浏览文件 @
5a6d7fe2
...
...
@@ -16,6 +16,7 @@ from __future__ import print_function
import
collections
import
contextlib
import
os
import
re
import
six
import
sys
...
...
@@ -27,11 +28,18 @@ from .proto import framework_pb2
try
:
from
.
import
core
except
ImportError
as
e
:
raise
ImportError
(
"""NOTE: You may need to run
\"
export LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH
\"
if you encounters
\"
libmkldnn.so not found
\"
errors. If you have python
installed in other directory, replace
\"
/usr/local/lib
\"
with your own
directory. The original error is:
\n
"""
+
cpt
.
get_exception_message
(
e
))
if
os
.
name
==
'nt'
:
raise
ImportError
(
"""NOTE: You may need to run
\"
set PATH=c:\python27\lib:%PATH%
\"
if you encounters
\"
mkldnn.dll not found
\"
errors. If you have python
installed in other directory, replace
\"
c:\python27\lib" with your own
directory. The original error is:
\n
"""
+
cpt
.
get_exception_message
(
e
))
else
:
raise
ImportError
(
"""NOTE: You may need to run
\"
export LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH
\"
if you encounters
\"
libmkldnn.so not found
\"
errors. If you have python
installed in other directory, replace
\"
/usr/local/lib
\"
with your own
directory. The original error is:
\n
"""
+
cpt
.
get_exception_message
(
e
))
except
Exception
as
e
:
raise
e
from
.
import
unique_name
...
...
python/setup.py.in
浏览文件 @
5a6d7fe2
...
...
@@ -158,27 +158,29 @@ if '${WITH_FLUID_ONLY}'== 'OFF':
# put all thirdparty libraries in paddle.libs
libs_path='${PADDLE_BINARY_DIR}/python/paddle/libs'
if os.name != 'nt':
package_data['paddle.libs']= []
package_data['paddle.libs']=['libwarpctc' + ext_name]
shutil.copy('${WARPCTC_LIBRARIES}', libs_path)
package_data['paddle.libs']= []
package_data['paddle.libs']=[('libwarpctc' if os.name != 'nt' else 'warpctc') + ext_name]
shutil.copy('${WARPCTC_LIBRARIES}', libs_path)
if '${WITH_MKL}' == 'ON':
shutil.copy('${MKLML_LIB}', libs_path)
shutil.copy('${MKLML_IOMP_LIB}', libs_path)
package_data['paddle.libs']+=[
'libmklml_intel' + ext_name,'libiomp5'
+ ext_name]
shutil.copy('${MKLML_
SHARED_
LIB}', libs_path)
shutil.copy('${MKLML_
SHARED_
IOMP_LIB}', libs_path)
package_data['paddle.libs']+=[
('libmklml_intel' if os.name != 'nt' else 'mklml') + ext_name, ('libiomp5' if os.name != 'nt' else 'libiomp5md')
+ ext_name]
if '${WITH_MKLDNN}' == 'ON':
if '${CMAKE_BUILD_TYPE}' == 'Release':
# only change rpath in Release mode.
# TODO(typhoonzero): use install_name_tool to patch mkl libs once
# we can support mkl on mac.
#
# change rpath of libmkldnn.so.0, add $ORIGIN/ to it.
# The reason is that all thirdparty libraries in the same directory,
# thus, libmkldnn.so.0 will find libmklml_intel.so and libiomp5.so.
command = "patchelf --set-rpath '$ORIGIN/' ${MKLDNN_SHARED_LIB}"
if os.system(command) != 0:
raise Exception("patch libmkldnn.so failed, command: %s" % command)
package_data['paddle.libs']+=['libmkldnn.so.0']
if os.name != 'nt':
# only change rpath in Release mode.
# TODO(typhoonzero): use install_name_tool to patch mkl libs once
# we can support mkl on mac.
#
# change rpath of libmkldnn.so.0, add $ORIGIN/ to it.
# The reason is that all thirdparty libraries in the same directory,
# thus, libmkldnn.so.0 will find libmklml_intel.so and libiomp5.so.
command = "patchelf --set-rpath '$ORIGIN/' ${MKLDNN_SHARED_LIB}"
if os.system(command) != 0:
raise Exception("patch libmkldnn.so failed, command: %s" % command)
package_data['paddle.libs']+=['libmkldnn.so.0' if os.name != 'nt' else ('mkldnn' + ext_name)]
shutil.copy('${MKLDNN_SHARED_LIB}', libs_path)
if '${WITH_NGRAPH}' == 'ON':
# only change rpath in Release mode,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录