Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
09fd2b2a
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
09fd2b2a
编写于
11月 03, 2020
作者:
W
Wilber
提交者:
GitHub
11月 03, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Paddle support compile on sw (#27858)
上级
953302d9
变更
12
隐藏空白更改
内联
并排
Showing
12 changed file
with
59 addition
and
11 deletion
+59
-11
CMakeLists.txt
CMakeLists.txt
+10
-1
cmake/cblas.cmake
cmake/cblas.cmake
+4
-0
cmake/external/eigen.cmake
cmake/external/eigen.cmake
+5
-0
cmake/flags.cmake
cmake/flags.cmake
+1
-1
paddle/fluid/operators/math/blas.h
paddle/fluid/operators/math/blas.h
+1
-1
paddle/fluid/operators/search_compute.h
paddle/fluid/operators/search_compute.h
+4
-4
paddle/fluid/platform/cpu_helper.cc
paddle/fluid/platform/cpu_helper.cc
+3
-0
paddle/fluid/platform/cpu_info.cc
paddle/fluid/platform/cpu_info.cc
+2
-1
paddle/fluid/platform/cpu_info.h
paddle/fluid/platform/cpu_info.h
+2
-1
paddle/fluid/platform/device_tracer.cc
paddle/fluid/platform/device_tracer.cc
+24
-0
python/CMakeLists.txt
python/CMakeLists.txt
+1
-1
python/setup.py.in
python/setup.py.in
+2
-1
未找到文件。
CMakeLists.txt
浏览文件 @
09fd2b2a
...
...
@@ -138,6 +138,7 @@ option(WITH_LITE "Compile Paddle Fluid with Lite Engine" OFF)
option
(
WITH_NCCL
"Compile PaddlePaddle with NCCL support"
ON
)
option
(
WITH_CRYPTO
"Compile PaddlePaddle with crypto support"
ON
)
option
(
WITH_ARM
"Compile PaddlePaddle with arm support"
OFF
)
option
(
WITH_SW
"Compile PaddlePaddle with sw support"
OFF
)
option
(
WITH_MUSL
"Compile with musl libc instead of gblic"
OFF
)
# PY_VERSION
...
...
@@ -257,10 +258,18 @@ if(WITH_ARM)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
-fPIC"
)
set
(
WITH_XBYAK OFF CACHE STRING
"Disable XBYAK when compiling WITH_ARM=ON"
FORCE
)
set
(
WITH_MKL OFF CACHE STRING
"Disable MKL when compiling WITH_ARM=ON."
FORCE
)
set
(
WITH_GPU OFF CACHE STRING
"Disable GPU when compiling WITH_ARM=ON."
FORCE
)
add_definitions
(
-DPADDLE_WITH_ARM
)
endif
()
if
(
WITH_SW
)
# mieee flag solves floating-point exceptions under sw and ALPHA architectures
set
(
CMAKE_C_FLAGS
"
${
CMAKE_C_FLAGS
}
-fPIC -mieee"
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
-fPIC -mieee"
)
set
(
WITH_XBYAK OFF CACHE STRING
"Disable XBYAK when compiling WITH_SW=ON"
FORCE
)
set
(
WITH_MKL OFF CACHE STRING
"Disable MKL when compiling WITH_SW=ON."
FORCE
)
add_definitions
(
-DPADDLE_WITH_SW
)
endif
()
set
(
PADDLE_PYTHON_BUILD_DIR
"
${
CMAKE_CURRENT_BINARY_DIR
}
/python/build"
)
set
(
CMAKE_CXX_FLAGS_RELWITHDEBINFO
"-O3 -g -DNDEBUG"
)
...
...
cmake/cblas.cmake
浏览文件 @
09fd2b2a
...
...
@@ -101,6 +101,8 @@ if(NOT DEFINED CBLAS_PROVIDER AND WITH_SYSTEM_BLAS)
${
REFERENCE_CBLAS_INCLUDE_SEARCH_PATHS
}
)
find_library
(
REFERENCE_CBLAS_LIBRARY NAMES cblas PATHS
${
REFERENCE_CBLAS_LIB_SEARCH_PATHS
}
)
find_library
(
REFERENCE_BLAS_LIBRARY NAMES blas PATHS
${
REFERENCE_BLAS_LIB_SEARCH_PATHS
}
)
if
(
REFERENCE_CBLAS_INCLUDE_DIR AND REFERENCE_CBLAS_LIBRARY
)
set
(
CBLAS_PROVIDER REFERENCE_CBLAS
)
...
...
@@ -127,5 +129,7 @@ endif()
include_directories
(
${
CBLAS_INC_DIR
}
)
if
(
NOT
${
CBLAS_PROVIDER
}
STREQUAL MKLML
)
target_link_libraries
(
cblas
${
CBLAS_LIBRARIES
}
)
elseif
(
${
CBLAS_PROVIDER
}
STREQUAL REFERENCE_CBLAS
)
target_link_libraries
(
cblas gfortran
${
CBLAS_LIBRARIES
}
${
REFERENCE_BLAS_LIBRARY
}
)
endif
()
cmake/external/eigen.cmake
浏览文件 @
09fd2b2a
...
...
@@ -97,3 +97,8 @@ endif()
add_library
(
eigen3 INTERFACE
)
add_dependencies
(
eigen3 extern_eigen3
)
# sw not support thread_local semantic
if
(
WITH_SW
)
add_definitions
(
-DEIGEN_AVOID_THREAD_LOCAL
)
endif
()
cmake/flags.cmake
浏览文件 @
09fd2b2a
...
...
@@ -183,7 +183,7 @@ set(GPU_COMMON_FLAGS
-Wno-error=unused-function
# Warnings in Numpy Header.
-Wno-error=array-bounds
# Warnings in Eigen::array
)
if
(
NOT WITH_NV_JETSON AND NOT WITH_ARM
)
if
(
NOT WITH_NV_JETSON AND NOT WITH_ARM
AND NOT WITH_SW
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
-m64"
)
endif
()
endif
(
NOT WIN32
)
...
...
paddle/fluid/operators/math/blas.h
浏览文件 @
09fd2b2a
...
...
@@ -32,7 +32,7 @@ class Tensor;
#include <libxsmm.h>
#endif
#if
def PADDLE_USE_OPENBLAS
#if
defined(PADDLE_USE_OPENBLAS) || defined(PADDLE_USE_REFERENCE_CBLAS)
#include <cblas.h>
#endif
...
...
paddle/fluid/operators/search_compute.h
浏览文件 @
09fd2b2a
...
...
@@ -14,7 +14,7 @@ limitations under the License. */
#pragma once
#if !defined(PADDLE_WITH_ARM)
#if !defined(PADDLE_WITH_ARM)
&& !defined(PADDLE_WITH_SW)
#include <immintrin.h>
#endif
#include <cfloat>
...
...
@@ -74,7 +74,7 @@ void call_gemm_batched(const framework::ExecutionContext& ctx,
}
}
#if !defined(PADDLE_WITH_ARM)
#if !defined(PADDLE_WITH_ARM)
&& !defined(PADDLE_WITH_SW)
#define __m256x __m256
...
...
@@ -114,7 +114,7 @@ inline void axpy(const T* x, T* y, size_t len, const T alpha) {
_mm256_add_px
(
_mm256_load_px
(
y
+
jjj
),
_mm256_mul_px
(
mm_alpha
,
_mm256_load_px
(
x
+
jjj
))));
}
#elif defined(PADDLE_WITH_ARM)
#elif defined(PADDLE_WITH_ARM)
|| defined(PADDLE_WITH_SW)
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
"axpy is not supported"
));
#else
lll
=
len
&
~
SSE_CUT_LEN_MASK
;
...
...
@@ -143,7 +143,7 @@ inline void axpy_noadd(const T* x, T* y, size_t len, const T alpha) {
for
(
jjj
=
0
;
jjj
<
lll
;
jjj
+=
AVX_STEP_SIZE
)
{
_mm256_store_px
(
y
+
jjj
,
_mm256_mul_px
(
mm_alpha
,
_mm256_load_px
(
x
+
jjj
)));
}
#elif defined(PADDLE_WITH_ARM)
#elif defined(PADDLE_WITH_ARM)
|| defined(PADDLE_WITH_SW)
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
"axpy_noadd is not supported"
));
#else
lll
=
len
&
~
SSE_CUT_LEN_MASK
;
...
...
paddle/fluid/platform/cpu_helper.cc
浏览文件 @
09fd2b2a
...
...
@@ -42,6 +42,9 @@ void SetNumThreads(int num_threads) {
int
real_num_threads
=
num_threads
>
1
?
num_threads
:
1
;
platform
::
dynload
::
MKL_Set_Num_Threads
(
real_num_threads
);
omp_set_num_threads
(
real_num_threads
);
#elif defined(PADDLE_USE_REFERENCE_CBLAS)
// cblas not support multi-thread
return
;
#else
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
"This library (except OPENBLAS, MKLML) is not supported yet, so the"
...
...
paddle/fluid/platform/cpu_info.cc
浏览文件 @
09fd2b2a
...
...
@@ -140,7 +140,8 @@ bool MayIUse(const cpu_isa_t cpu_isa) {
if
(
cpu_isa
==
isa_any
)
{
return
true
;
}
else
{
#if !defined(WITH_NV_JETSON) && !defined(PADDLE_WITH_ARM)
#if !defined(WITH_NV_JETSON) && !defined(PADDLE_WITH_ARM) && \
!defined(PADDLE_WITH_SW)
int
reg
[
4
];
cpuid
(
reg
,
0
);
int
nIds
=
reg
[
0
];
...
...
paddle/fluid/platform/cpu_info.h
浏览文件 @
09fd2b2a
...
...
@@ -40,7 +40,8 @@ limitations under the License. */
#ifdef _WIN32
#define cpuid(reg, x) __cpuidex(reg, x, 0)
#else
#if !defined(WITH_NV_JETSON) && !defined(PADDLE_WITH_ARM)
#if !defined(WITH_NV_JETSON) && !defined(PADDLE_WITH_ARM) && \
!defined(PADDLE_WITH_SW)
#include <cpuid.h>
inline
void
cpuid
(
int
reg
[
4
],
int
x
)
{
__cpuid_count
(
x
,
0
,
reg
[
0
],
reg
[
1
],
reg
[
2
],
reg
[
3
]);
...
...
paddle/fluid/platform/device_tracer.cc
浏览文件 @
09fd2b2a
...
...
@@ -37,9 +37,16 @@ namespace paddle {
namespace
platform
{
namespace
{
// Tracking the nested block stacks of each thread.
#ifdef PADDLE_WITH_SW
// sw not supported thread_local
std
::
deque
<
int
>
block_id_stack
;
std
::
deque
<
Event
*>
annotation_stack
;
#else
// Tracking the nested event stacks.
thread_local
std
::
deque
<
int
>
block_id_stack
;
// Tracking the nested event stacks.
thread_local
std
::
deque
<
Event
*>
annotation_stack
;
#endif
// stack to strore event sunch as pe and so on
static
std
::
deque
<
Event
*>
main_thread_annotation_stack
{};
static
std
::
deque
<
std
::
string
>
main_thread_annotation_stack_name
{};
...
...
@@ -288,8 +295,13 @@ class DeviceTracerImpl : public DeviceTracer {
}
void
AddAnnotation
(
uint32_t
id
,
Event
*
event
)
{
#ifdef PADDLE_WITH_SW
std
::
forward_list
<
std
::
pair
<
uint32_t
,
Event
*>>
*
local_correlations_pairs
=
nullptr
;
#else
thread_local
std
::
forward_list
<
std
::
pair
<
uint32_t
,
Event
*>>
*
local_correlations_pairs
=
nullptr
;
#endif
if
(
local_correlations_pairs
==
nullptr
)
{
std
::
lock_guard
<
std
::
mutex
>
l
(
trace_mu_
);
correlations_pairs
.
emplace_front
();
...
...
@@ -304,7 +316,11 @@ class DeviceTracerImpl : public DeviceTracer {
VLOG
(
1
)
<<
"Empty timeline annotation."
;
return
;
}
#ifdef PADDLE_WITH_SW
std
::
forward_list
<
CPURecord
>
*
local_cpu_records_
=
nullptr
;
#else
thread_local
std
::
forward_list
<
CPURecord
>
*
local_cpu_records_
=
nullptr
;
#endif
if
(
local_cpu_records_
==
nullptr
)
{
std
::
lock_guard
<
std
::
mutex
>
l
(
trace_mu_
);
cpu_records_
.
emplace_front
();
...
...
@@ -335,8 +351,12 @@ class DeviceTracerImpl : public DeviceTracer {
VLOG
(
3
)
<<
alloc_in
<<
", "
<<
free_in
<<
" Cannot be traced."
;
return
;
}
#ifdef PADDLE_WITH_SW
std
::
forward_list
<
MemInfoRecord
>
*
local_mem_info_record
=
nullptr
;
#else
thread_local
std
::
forward_list
<
MemInfoRecord
>
*
local_mem_info_record
=
nullptr
;
#endif
if
(
local_mem_info_record
==
nullptr
)
{
std
::
lock_guard
<
std
::
mutex
>
l
(
trace_mu_
);
mem_info_record_
.
emplace_front
();
...
...
@@ -353,8 +373,12 @@ class DeviceTracerImpl : public DeviceTracer {
VLOG
(
1
)
<<
"Empty timeline annotation."
;
return
;
}
#ifdef PADDLE_WITH_SW
std
::
forward_list
<
ActiveKindRecord
>
*
local_active_kind_records
=
nullptr
;
#else
thread_local
std
::
forward_list
<
ActiveKindRecord
>
*
local_active_kind_records
=
nullptr
;
#endif
if
(
local_active_kind_records
==
nullptr
)
{
std
::
lock_guard
<
std
::
mutex
>
l
(
trace_mu_
);
active_kind_records_
.
emplace_front
();
...
...
python/CMakeLists.txt
浏览文件 @
09fd2b2a
...
...
@@ -106,7 +106,7 @@ if(APPLE)
message
(
FATAL_ERROR
"install_name_tool not found, please check.
\n
"
)
endif
()
endif
()
if
(
LINUX
)
if
(
LINUX
AND NOT WITH_SW
)
find_program
(
PATCHELF_EXECUTABLE patchelf
)
if
(
NOT PATCHELF_EXECUTABLE
)
message
(
FATAL_ERROR
"patchelf not found, please install it.
\n
"
...
...
python/setup.py.in
浏览文件 @
09fd2b2a
...
...
@@ -349,7 +349,8 @@ if '${CMAKE_BUILD_TYPE}' == 'Release':
command = "patchelf --set-rpath '$ORIGIN/../libs/' ${PADDLE_BINARY_DIR}/python/paddle/fluid/${FLUID_CORE_NAME}" + '.so'
# The dynamic library compiled under aarch64 is greater than 64M,
# and an oversize error will be reported when using patchelf.
if platform.machine() != 'aarch64':
# The sw_64 not suppot patchelf, so we just disable that.
if platform.machine() != 'aarch64' and platform.machine() != 'sw_64':
if os.system(command) != 0:
raise Exception("patch ${FLUID_CORE_NAME}.%s failed, command: %s" % (ext_name, command))
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录