Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
b601f2de
P
Paddle
项目概览
PaddlePaddle
/
Paddle
1 年多 前同步成功
通知
2302
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
b601f2de
编写于
12月 18, 2018
作者:
P
peizhilin
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
include the mkl fix only
test=develop
上级
001891ae
变更
18
显示空白变更内容
内联
并排
Showing
18 changed file
with
61 addition
and
91 deletion
+61
-91
CMakeLists.txt
CMakeLists.txt
+2
-2
cmake/cuda.cmake
cmake/cuda.cmake
+0
-3
cmake/cudnn.cmake
cmake/cudnn.cmake
+0
-1
cmake/operators.cmake
cmake/operators.cmake
+1
-1
cmake/simd.cmake
cmake/simd.cmake
+38
-35
paddle/fluid/framework/CMakeLists.txt
paddle/fluid/framework/CMakeLists.txt
+2
-1
paddle/fluid/framework/mixed_vector.h
paddle/fluid/framework/mixed_vector.h
+5
-5
paddle/fluid/framework/op_registry.h
paddle/fluid/framework/op_registry.h
+1
-2
paddle/fluid/memory/detail/system_allocator.cc
paddle/fluid/memory/detail/system_allocator.cc
+1
-0
paddle/fluid/operators/CMakeLists.txt
paddle/fluid/operators/CMakeLists.txt
+5
-2
paddle/fluid/operators/cum_op.h
paddle/fluid/operators/cum_op.h
+0
-2
paddle/fluid/operators/elementwise/elementwise_mul_mkldnn_op.cc
.../fluid/operators/elementwise/elementwise_mul_mkldnn_op.cc
+0
-3
paddle/fluid/operators/math/detail/lstm_cpu_kernel.h
paddle/fluid/operators/math/detail/lstm_cpu_kernel.h
+0
-6
paddle/fluid/operators/math/jit_gen.h
paddle/fluid/operators/math/jit_gen.h
+0
-3
paddle/fluid/platform/dynload/CMakeLists.txt
paddle/fluid/platform/dynload/CMakeLists.txt
+2
-0
paddle/fluid/platform/dynload/cudnn.cc
paddle/fluid/platform/dynload/cudnn.cc
+0
-4
paddle/fluid/platform/dynload/dynamic_loader.cc
paddle/fluid/platform/dynload/dynamic_loader.cc
+0
-16
python/setup.py.in
python/setup.py.in
+4
-5
未找到文件。
CMakeLists.txt
浏览文件 @
b601f2de
...
...
@@ -203,10 +203,10 @@ include(external/xxhash) # download xxhash
include
(
external/dlpack
)
include
(
external/snappy
)
# download snappy
include
(
external/snappystream
)
# download snappystream
include
(
external/warpctc
)
# download, build, install warpctc
if
(
NOT WIN32
)
# there is no official support of nccl, cupti in windows
# there is no official support of warpctc, nccl, cupti in windows
include
(
external/warpctc
)
# download, build, install warpctc
include
(
cupti
)
include
(
external/gzstream
)
endif
(
NOT WIN32
)
...
...
cmake/cuda.cmake
浏览文件 @
b601f2de
...
...
@@ -139,12 +139,10 @@ endfunction()
message
(
STATUS
"CUDA detected: "
${
CUDA_VERSION
}
)
if
(
${
CUDA_VERSION
}
LESS 7.0
)
set
(
paddle_known_gpu_archs
${
paddle_known_gpu_archs
}
)
add_definitions
(
"-DPADDLE_CUDA_BINVER=
\"
60
\"
"
)
elseif
(
${
CUDA_VERSION
}
LESS 8.0
)
# CUDA 7.x
set
(
paddle_known_gpu_archs
${
paddle_known_gpu_archs7
}
)
list
(
APPEND CUDA_NVCC_FLAGS
"-D_MWAITXINTRIN_H_INCLUDED"
)
list
(
APPEND CUDA_NVCC_FLAGS
"-D__STRICT_ANSI__"
)
add_definitions
(
"-DPADDLE_CUDA_BINVER=
\"
70
\"
"
)
elseif
(
${
CUDA_VERSION
}
LESS 9.0
)
# CUDA 8.x
set
(
paddle_known_gpu_archs
${
paddle_known_gpu_archs8
}
)
list
(
APPEND CUDA_NVCC_FLAGS
"-D_MWAITXINTRIN_H_INCLUDED"
)
...
...
@@ -152,7 +150,6 @@ elseif (${CUDA_VERSION} LESS 9.0) # CUDA 8.x
# CUDA 8 may complain that sm_20 is no longer supported. Suppress the
# warning for now.
list
(
APPEND CUDA_NVCC_FLAGS
"-Wno-deprecated-gpu-targets"
)
add_definitions
(
"-DPADDLE_CUDA_BINVER=
\"
80
\"
"
)
endif
()
include_directories
(
${
CUDA_INCLUDE_DIRS
}
)
...
...
cmake/cudnn.cmake
浏览文件 @
b601f2de
...
...
@@ -89,7 +89,6 @@ if(CUDNN_FOUND)
if
(
NOT CUDNN_MAJOR_VERSION
)
set
(
CUDNN_VERSION
"???"
)
else
()
add_definitions
(
"-DPADDLE_CUDNN_BINVER=
\"
${
CUDNN_MAJOR_VERSION
}
\"
"
)
math
(
EXPR CUDNN_VERSION
"
${
CUDNN_MAJOR_VERSION
}
* 1000 +
${
CUDNN_MINOR_VERSION
}
* 100 +
${
CUDNN_PATCHLEVEL_VERSION
}
"
)
...
...
cmake/operators.cmake
浏览文件 @
b601f2de
...
...
@@ -84,7 +84,7 @@ function(op_library TARGET)
endif
()
if
(
WIN32
)
# remove windows unsupported op, because windows has no nccl, no warpctc such ops.
foreach
(
windows_unsupport_op
"nccl_op"
"gen_nccl_id_op"
)
foreach
(
windows_unsupport_op
"nccl_op"
"gen_nccl_id_op"
"warpctc_op"
)
if
(
"
${
TARGET
}
"
STREQUAL
"
${
windows_unsupport_op
}
"
)
return
()
endif
()
...
...
cmake/simd.cmake
浏览文件 @
b601f2de
...
...
@@ -57,43 +57,46 @@ int main()
return 0;
}"
SSE3_FOUND
)
# Check AVX
set
(
CMAKE_REQUIRED_FLAGS
${
AVX_FLAG
}
)
set
(
AVX_FOUND_EXITCODE 1 CACHE STRING
"Result from TRY_RUN"
FORCE
)
CHECK_CXX_SOURCE_RUNS
(
"
#include <immintrin.h>
int main()
{
# disable AVX by default on windows
if
(
NOT WIN32
)
# Check AVX
set
(
CMAKE_REQUIRED_FLAGS
${
AVX_FLAG
}
)
set
(
AVX_FOUND_EXITCODE 1 CACHE STRING
"Result from TRY_RUN"
FORCE
)
CHECK_CXX_SOURCE_RUNS
(
"
#include <immintrin.h>
int main()
{
__m256 a = _mm256_set_ps (-1.0f, 2.0f, -3.0f, 4.0f, -1.0f, 2.0f, -3.0f, 4.0f);
__m256 b = _mm256_set_ps (1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f);
__m256 result = _mm256_add_ps (a, b);
return 0;
}"
AVX_FOUND
)
}"
AVX_FOUND
)
# Check AVX 2
set
(
CMAKE_REQUIRED_FLAGS
${
AVX2_FLAG
}
)
set
(
AVX2_FOUND_EXITCODE 1 CACHE STRING
"Result from TRY_RUN"
FORCE
)
CHECK_CXX_SOURCE_RUNS
(
"
#include <immintrin.h>
int main()
{
# Check AVX 2
set
(
CMAKE_REQUIRED_FLAGS
${
AVX2_FLAG
}
)
set
(
AVX2_FOUND_EXITCODE 1 CACHE STRING
"Result from TRY_RUN"
FORCE
)
CHECK_CXX_SOURCE_RUNS
(
"
#include <immintrin.h>
int main()
{
__m256i a = _mm256_set_epi32 (-1, 2, -3, 4, -1, 2, -3, 4);
__m256i result = _mm256_abs_epi32 (a);
return 0;
}"
AVX2_FOUND
)
}"
AVX2_FOUND
)
# Check AVX512F
set
(
CMAKE_REQUIRED_FLAGS
${
AVX512F_FLAG
}
)
set
(
AVX512F_FOUND_EXITCODE 1 CACHE STRING
"Result from TRY_RUN"
FORCE
)
CHECK_CXX_SOURCE_RUNS
(
"
#include <immintrin.h>
int main()
{
# Check AVX512F
set
(
CMAKE_REQUIRED_FLAGS
${
AVX512F_FLAG
}
)
set
(
AVX512F_FOUND_EXITCODE 1 CACHE STRING
"Result from TRY_RUN"
FORCE
)
CHECK_CXX_SOURCE_RUNS
(
"
#include <immintrin.h>
int main()
{
__m512i a = _mm512_set_epi32 (-1, 2, -3, 4, -1, 2, -3, 4,
13, -5, 6, -7, 9, 2, -6, 3);
__m512i result = _mm512_abs_epi32 (a);
return 0;
}"
AVX512F_FOUND
)
}"
AVX512F_FOUND
)
endif
(
NOT WIN32
)
set
(
CMAKE_REQUIRED_FLAGS
${
CMAKE_REQUIRED_FLAGS_RETAINED
}
)
mark_as_advanced
(
MMX_FOUND SSE2_FOUND SSE3_FOUND AVX_FOUND AVX2_FOUND AVX512F_FOUND
)
paddle/fluid/framework/CMakeLists.txt
浏览文件 @
b601f2de
...
...
@@ -15,7 +15,8 @@ function(windows_symbolic TARGET)
file
(
GENERATE OUTPUT
${
final_path
}
/.
${
src
}
.cu INPUT
${
final_path
}
/
${
src
}
.cc
)
add_custom_command
(
OUTPUT
${
final_path
}
/.
${
src
}
.cu
COMMAND
${
CMAKE_COMMAND
}
-E copy_if_different
"
${
final_path
}
/
${
src
}
.cc"
"
${
final_path
}
/.
${
src
}
.cu"
COMMAND
${
CMAKE_COMMAND
}
-E remove
${
final_path
}
/.
${
src
}
.cu
COMMAND
${
CMAKE_COMMAND
}
-E copy
"
${
final_path
}
/
${
src
}
.cc"
"
${
final_path
}
/.
${
src
}
.cu"
COMMENT
"create hidden file of
${
src
}
.cu"
)
add_custom_target
(
${
TARGET
}
ALL DEPENDS .
${
src
}
.cu
)
endforeach
()
...
...
paddle/fluid/framework/mixed_vector.h
浏览文件 @
b601f2de
...
...
@@ -215,7 +215,7 @@ class Vector {
auto
stream
=
dev_ctx
->
stream
();
void
*
src
=
gpu_
->
ptr
();
void
*
dst
=
cpu_
.
data
();
paddle
::
memory
::
Copy
(
platform
::
CPUPlace
(),
dst
,
CUDAPlace
().
get
(),
src
,
memory
::
Copy
(
platform
::
CPUPlace
(),
dst
,
CUDAPlace
().
get
(),
src
,
gpu_
->
size
(),
stream
);
dev_ctx
->
Wait
();
}
...
...
@@ -261,7 +261,7 @@ class Vector {
auto
*
dev_ctx
=
static_cast
<
platform
::
CUDADeviceContext
*>
(
platform
::
DeviceContextPool
::
Instance
().
Get
(
place
));
auto
stream
=
dev_ctx
->
stream
();
paddle
::
memory
::
Copy
(
CUDAPlace
().
get
(),
dst
,
platform
::
CPUPlace
(),
src
,
memory
::
Copy
(
CUDAPlace
().
get
(),
dst
,
platform
::
CPUPlace
(),
src
,
gpu_
->
size
(),
stream
);
}
...
...
@@ -284,7 +284,7 @@ class Vector {
bool
IsInCPU
()
const
{
return
flag_
&
kDataInCPU
;
}
mutable
std
::
vector
<
T
>
cpu_
;
mutable
paddle
::
memory
::
AllocationPtr
gpu_
;
mutable
memory
::
AllocationPtr
gpu_
;
mutable
int
flag_
;
mutable
std
::
mutex
mtx_
;
...
...
paddle/fluid/framework/op_registry.h
浏览文件 @
b601f2de
...
...
@@ -23,7 +23,6 @@ limitations under the License. */
#include <unordered_map>
#include <unordered_set>
#define GLOG_NO_ABBREVIATED_SEVERITIES // msvc conflict logging with windows.h
#include "glog/logging.h" // For VLOG()
#include "paddle/fluid/framework/attribute.h"
#include "paddle/fluid/framework/details/op_registry.h"
...
...
paddle/fluid/memory/detail/system_allocator.cc
浏览文件 @
b601f2de
...
...
@@ -17,6 +17,7 @@ limitations under the License. */
#ifdef _WIN32
#include <malloc.h>
#include <windows.h> // VirtualLock/VirtualUnlock
#else
#include <sys/mman.h> // for mlock and munlock
#endif
...
...
paddle/fluid/operators/CMakeLists.txt
浏览文件 @
b601f2de
...
...
@@ -44,8 +44,9 @@ endif()
register_operators
(
EXCLUDES warpctc_op conv_fusion_op DEPS
${
OP_HEADER_DEPS
}
${
OP_PREFETCH_DEPS
}
)
# warpctc_op needs cudnn 7 above
if
(
WITH_GPU
)
if
(
WITH_GPU
AND NOT WIN32
)
if
(
${
CUDNN_MAJOR_VERSION
}
VERSION_LESS 7
)
op_library
(
warpctc_op DEPS dynload_warpctc sequence_padding sequence_scale SRCS warpctc_op.cc warpctc_op.cu.cc
)
else
()
...
...
@@ -63,7 +64,9 @@ endif()
set
(
COMMON_OP_DEPS
${
OP_HEADER_DEPS
}
)
set
(
COMMON_OP_DEPS
${
COMMON_OP_DEPS
}
selected_rows_functor selected_rows lod_tensor maxouting unpooling pooling lod_rank_table context_project sequence_pooling executor
)
set
(
COMMON_OP_DEPS
${
COMMON_OP_DEPS
}
dynload_warpctc
)
if
(
NOT WIN32
)
set
(
COMMON_OP_DEPS
${
COMMON_OP_DEPS
}
dynload_warpctc
)
endif
()
set
(
COMMON_OP_DEPS
${
COMMON_OP_DEPS
}
sequence_padding sequence_scale cos_sim_functor memory jit_kernel concat_and_split cross_entropy softmax vol2col im2col sampler
)
set
(
COMMON_OP_DEPS
${
COMMON_OP_DEPS
}
sequence2batch lstm_compute matrix_bit_code gru_compute activation_functions
)
if
(
WITH_GPU
)
...
...
paddle/fluid/operators/cum_op.h
浏览文件 @
b601f2de
...
...
@@ -13,8 +13,6 @@ See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <array>
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
...
...
paddle/fluid/operators/elementwise/elementwise_mul_mkldnn_op.cc
浏览文件 @
b601f2de
...
...
@@ -19,9 +19,6 @@ limitations under the License. */
#include "paddle/fluid/platform/mkldnn_helper.h"
#include "paddle/fluid/operators/math/jit_kernel.h"
#if defined(_WIN32) && defined(_WINSOCKAPI_)
#define _WINSOCK2API_
/* Prevent inclusion of winsock2.h */
#endif
#include "xbyak/xbyak.h"
#include "xbyak/xbyak_util.h"
...
...
paddle/fluid/operators/math/detail/lstm_cpu_kernel.h
浏览文件 @
b601f2de
...
...
@@ -17,12 +17,6 @@ limitations under the License. */
#include "paddle/fluid/operators/math/detail/activation_functions.h"
#include "paddle/fluid/operators/math/lstm_compute.h"
#if defined(_WIN32)
#if defined(__AVX2__) || defined(__AVX__)
inline
__m256
operator
+=
(
__m256
a
,
__m256
b
)
{
return
_mm256_add_ps
(
a
,
b
);
}
#endif
#endif
namespace
paddle
{
namespace
operators
{
namespace
math
{
...
...
paddle/fluid/operators/math/jit_gen.h
浏览文件 @
b601f2de
...
...
@@ -18,9 +18,6 @@ limitations under the License. */
#include <type_traits>
#include "paddle/fluid/platform/macros.h"
#if defined(_WIN32) && defined(_WINSOCKAPI_)
#define _WINSOCK2API_
/* Prevent inclusion of winsock2.h */
#endif
#define XBYAK_USE_MMAP_ALLOCATOR
#include "xbyak/xbyak.h"
#include "xbyak/xbyak_util.h"
...
...
paddle/fluid/platform/dynload/CMakeLists.txt
浏览文件 @
b601f2de
...
...
@@ -16,7 +16,9 @@ if (CUPTI_FOUND)
list
(
APPEND CUDA_SRCS cupti.cc
)
endif
(
CUPTI_FOUND
)
nv_library
(
dynload_cuda SRCS
${
CUDA_SRCS
}
DEPS dynamic_loader
)
if
(
NOT WIN32
)
cc_library
(
dynload_warpctc SRCS warpctc.cc DEPS dynamic_loader warpctc
)
endif
(
NOT WIN32
)
if
(
WITH_MKLML
)
cc_library
(
dynload_mklml SRCS mklml.cc DEPS dynamic_loader mklml
)
endif
()
...
...
paddle/fluid/platform/dynload/cudnn.cc
浏览文件 @
b601f2de
...
...
@@ -38,10 +38,6 @@ CUDNN_DNN_ROUTINE_EACH_AFTER_R4(DEFINE_WRAP);
CUDNN_DNN_ROUTINE_EACH_R5
(
DEFINE_WRAP
);
#endif
#ifdef CUDNN_DNN_ROUTINE_EACH_R6
CUDNN_DNN_ROUTINE_EACH_R6
(
DEFINE_WRAP
);
#endif
#ifdef CUDNN_DNN_ROUTINE_EACH_R7
CUDNN_DNN_ROUTINE_EACH_R7
(
DEFINE_WRAP
);
#endif
...
...
paddle/fluid/platform/dynload/dynamic_loader.cc
浏览文件 @
b601f2de
...
...
@@ -53,12 +53,6 @@ namespace platform {
namespace
dynload
{
static
constexpr
char
cupti_lib_path
[]
=
CUPTI_LIB_PATH
;
#if defined(_WIN32) && defined(PADDLE_WITH_CUDA)
static
constexpr
char
*
win_cublas_lib
=
"cublas64_"
PADDLE_CUDA_BINVER
".dll"
;
static
constexpr
char
*
win_curand_lib
=
"curand64_"
PADDLE_CUDA_BINVER
".dll"
;
static
constexpr
char
*
win_cudnn_lib
=
"cudnn64_"
PADDLE_CUDNN_BINVER
".dll"
;
#endif
static
inline
std
::
string
join
(
const
std
::
string
&
part1
,
const
std
::
string
&
part2
)
{
// directory separator
...
...
@@ -171,8 +165,6 @@ static inline void* GetDsoHandleFromSearchPath(const std::string& search_root,
void
*
GetCublasDsoHandle
()
{
#if defined(__APPLE__) || defined(__OSX__)
return
GetDsoHandleFromSearchPath
(
FLAGS_cuda_dir
,
"libcublas.dylib"
);
#elif defined(_WIN32) && defined(PADDLE_WITH_CUDA)
return
GetDsoHandleFromSearchPath
(
FLAGS_cuda_dir
,
win_cublas_lib
);
#else
return
GetDsoHandleFromSearchPath
(
FLAGS_cuda_dir
,
"libcublas.so"
);
#endif
...
...
@@ -181,8 +173,6 @@ void* GetCublasDsoHandle() {
void
*
GetCUDNNDsoHandle
()
{
#if defined(__APPLE__) || defined(__OSX__)
return
GetDsoHandleFromSearchPath
(
FLAGS_cudnn_dir
,
"libcudnn.dylib"
,
false
);
#elif defined(_WIN32) && defined(PADDLE_WITH_CUDA)
return
GetDsoHandleFromSearchPath
(
FLAGS_cudnn_dir
,
win_cudnn_lib
);
#else
return
GetDsoHandleFromSearchPath
(
FLAGS_cudnn_dir
,
"libcudnn.so"
,
false
);
#endif
...
...
@@ -203,8 +193,6 @@ void* GetCUPTIDsoHandle() {
void
*
GetCurandDsoHandle
()
{
#if defined(__APPLE__) || defined(__OSX__)
return
GetDsoHandleFromSearchPath
(
FLAGS_cuda_dir
,
"libcurand.dylib"
);
#elif defined(_WIN32) && defined(PADDLE_WITH_CUDA)
return
GetDsoHandleFromSearchPath
(
FLAGS_cuda_dir
,
win_curand_lib
);
#else
return
GetDsoHandleFromSearchPath
(
FLAGS_cuda_dir
,
"libcurand.so"
);
#endif
...
...
@@ -213,8 +201,6 @@ void* GetCurandDsoHandle() {
void
*
GetWarpCTCDsoHandle
()
{
#if defined(__APPLE__) || defined(__OSX__)
return
GetDsoHandleFromSearchPath
(
FLAGS_warpctc_dir
,
"libwarpctc.dylib"
);
#elif defined(_WIN32)
return
GetDsoHandleFromSearchPath
(
FLAGS_warpctc_dir
,
"warpctc.dll"
);
#else
return
GetDsoHandleFromSearchPath
(
FLAGS_warpctc_dir
,
"libwarpctc.so"
);
#endif
...
...
@@ -239,8 +225,6 @@ void* GetTensorRtDsoHandle() {
void
*
GetMKLMLDsoHandle
()
{
#if defined(__APPLE__) || defined(__OSX__)
return
GetDsoHandleFromSearchPath
(
FLAGS_mklml_dir
,
"libmklml_intel.dylib"
);
#elif defined(_WIN32)
return
GetDsoHandleFromSearchPath
(
FLAGS_mklml_dir
,
"mklml.dll"
);
#else
return
GetDsoHandleFromSearchPath
(
FLAGS_mklml_dir
,
"libmklml_intel.so"
);
#endif
...
...
python/setup.py.in
浏览文件 @
b601f2de
...
...
@@ -158,11 +158,10 @@ if '${WITH_FLUID_ONLY}'== 'OFF':
# put all thirdparty libraries in paddle.libs
libs_path='${PADDLE_BINARY_DIR}/python/paddle/libs'
package_data['paddle.libs']= []
package_data['paddle.libs']=[('libwarpctc' if os.name != 'nt' else 'warpctc') + ext_name]
shutil.copy('${WARPCTC_LIBRARIES}', libs_path)
if os.name != 'nt':
package_data['paddle.libs']= []
package_data['paddle.libs']=['libwarpctc' + ext_name]
shutil.copy('${WARPCTC_LIBRARIES}', libs_path)
if '${WITH_MKL}' == 'ON':
shutil.copy('${MKLML_SHARED_LIB}', libs_path)
shutil.copy('${MKLML_SHARED_IOMP_LIB}', libs_path)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录