Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
d5cab4f0
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
d5cab4f0
编写于
12月 15, 2017
作者:
Y
Yu Yang
提交者:
GitHub
12月 15, 2017
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Fix compile on CUDA9.1 & MacOS (#6642)
上级
8a24915d
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
35 addition
and
16 deletion
+35
-16
paddle/math/float16.h
paddle/math/float16.h
+2
-2
paddle/platform/dynload/nccl.cc
paddle/platform/dynload/nccl.cc
+5
-0
paddle/platform/dynload/nccl.h
paddle/platform/dynload/nccl.h
+12
-12
paddle/platform/nccl_test.cu
paddle/platform/nccl_test.cu
+2
-2
paddle/platform/variant.h
paddle/platform/variant.h
+13
-0
python/.gitignore
python/.gitignore
+1
-0
未找到文件。
paddle/math/float16.h
浏览文件 @
d5cab4f0
...
@@ -79,7 +79,7 @@ public:
...
@@ -79,7 +79,7 @@ public:
#ifdef PADDLE_CUDA_FP16
#ifdef PADDLE_CUDA_FP16
HOSTDEVICE
inline
explicit
float16
(
const
half
&
h
)
{
HOSTDEVICE
inline
explicit
float16
(
const
half
&
h
)
{
#if CUDA_VERSION >= 9000
#if CUDA_VERSION >= 9000
x
=
reinterpret_cast
<
__half_raw
*>
(
&
h
)
->
x
;
x
=
reinterpret_cast
<
__half_raw
*>
(
const_cast
<
half
*>
(
&
h
)
)
->
x
;
#else
#else
x
=
h
.
x
;
x
=
h
.
x
;
#endif // CUDA_VERSION >= 9000
#endif // CUDA_VERSION >= 9000
...
@@ -145,7 +145,7 @@ public:
...
@@ -145,7 +145,7 @@ public:
#ifdef PADDLE_CUDA_FP16
#ifdef PADDLE_CUDA_FP16
HOSTDEVICE
inline
float16
&
operator
=
(
const
half
&
rhs
)
{
HOSTDEVICE
inline
float16
&
operator
=
(
const
half
&
rhs
)
{
#if CUDA_VERSION >= 9000
#if CUDA_VERSION >= 9000
x
=
reinterpret_cast
<
__half_raw
*>
(
&
rhs
)
->
x
;
x
=
reinterpret_cast
<
__half_raw
*>
(
const_cast
<
half
*>
(
&
rhs
)
)
->
x
;
#else
#else
x
=
rhs
.
x
;
x
=
rhs
.
x
;
#endif
#endif
...
...
paddle/platform/dynload/nccl.cc
浏览文件 @
d5cab4f0
...
@@ -25,6 +25,11 @@ void *nccl_dso_handle;
...
@@ -25,6 +25,11 @@ void *nccl_dso_handle;
NCCL_RAND_ROUTINE_EACH
(
DEFINE_WRAP
);
NCCL_RAND_ROUTINE_EACH
(
DEFINE_WRAP
);
void
LoadNCCLDSO
()
{
platform
::
call_once
(
nccl_dso_flag
,
[]
{
GetNCCLDsoHandle
(
&
nccl_dso_handle
);
});
}
}
// namespace dynload
}
// namespace dynload
}
// namespace platform
}
// namespace platform
}
// namespace paddle
}
// namespace paddle
paddle/platform/dynload/nccl.h
浏览文件 @
d5cab4f0
...
@@ -28,18 +28,18 @@ extern std::once_flag nccl_dso_flag;
...
@@ -28,18 +28,18 @@ extern std::once_flag nccl_dso_flag;
extern
void
*
nccl_dso_handle
;
extern
void
*
nccl_dso_handle
;
#ifdef PADDLE_USE_DSO
#ifdef PADDLE_USE_DSO
#define DECLARE_DYNAMIC_LOAD_NCCL_WRAP(__name) \
extern
void
LoadNCCLDSO
();
struct DynLoad__##__name { \
template <typename... Args>
\
#define DECLARE_DYNAMIC_LOAD_NCCL_WRAP(__name)
\
auto operator()(Args... args) -> decltype(__name(args...)) {
\
struct DynLoad__##__name {
\
using nccl_func = decltype(__name(args...)) (*)(Args...);
\
template <typename... Args>
\
platform::call_once(nccl_dso_flag,
\
auto operator()(Args... args) -> decltype(__name(args...)) {
\
paddle::platform::dynload::GetNCCLDsoHandle,
\
using nccl_func = decltype(__name(args...)) (*)(Args...);
\
&nccl_dso_handle);
\
paddle::platform::dynload::LoadNCCLDSO();
\
void* p_##__name = dlsym(nccl_dso_handle, #__name);
\
void* p_##__name = dlsym(nccl_dso_handle, #__name); \
return reinterpret_cast<nccl_func>(p_##__name)(args...);
\
return reinterpret_cast<nccl_func>(p_##__name)(args...); \
}
\
} \
};
\
}; \
extern DynLoad__##__name __name
extern DynLoad__##__name __name
#else
#else
#define DECLARE_DYNAMIC_LOAD_NCCL_WRAP(__name) \
#define DECLARE_DYNAMIC_LOAD_NCCL_WRAP(__name) \
...
...
paddle/platform/nccl_test.cu
浏览文件 @
d5cab4f0
...
@@ -31,7 +31,7 @@ namespace platform {
...
@@ -31,7 +31,7 @@ namespace platform {
TEST
(
NCCL
,
init
)
{
TEST
(
NCCL
,
init
)
{
std
::
vector
<
ncclComm_t
>
comms
;
std
::
vector
<
ncclComm_t
>
comms
;
comms
.
resize
(
dev_count
);
comms
.
resize
(
dev_count
);
PADDLE_ENFORCE
(
dynload
::
ncclCommInitAll
(
comms
.
data
(),
dev_count
,
nullptr
)
);
dynload
::
ncclCommInitAll
(
comms
.
data
(),
dev_count
,
nullptr
);
for
(
int
i
=
0
;
i
<
dev_count
;
++
i
)
{
for
(
int
i
=
0
;
i
<
dev_count
;
++
i
)
{
dynload
::
ncclCommDestroy
(
comms
[
i
]);
dynload
::
ncclCommDestroy
(
comms
[
i
]);
}
}
...
@@ -62,7 +62,7 @@ TEST(NCCL, all_reduce) {
...
@@ -62,7 +62,7 @@ TEST(NCCL, all_reduce) {
std
::
vector
<
ncclComm_t
>
comms
;
std
::
vector
<
ncclComm_t
>
comms
;
comms
.
resize
(
dev_count
);
comms
.
resize
(
dev_count
);
VLOG
(
1
)
<<
"Initializing ncclComm"
;
VLOG
(
1
)
<<
"Initializing ncclComm"
;
PADDLE_ENFORCE
(
dynload
::
ncclCommInitAll
(
comms
.
data
(),
dev_count
,
nullptr
)
);
dynload
::
ncclCommInitAll
(
comms
.
data
(),
dev_count
,
nullptr
);
VLOG
(
1
)
<<
"ncclComm initialized"
;
VLOG
(
1
)
<<
"ncclComm initialized"
;
VLOG
(
1
)
<<
"Creating thread data"
;
VLOG
(
1
)
<<
"Creating thread data"
;
std
::
vector
<
std
::
unique_ptr
<
PerThreadData
<
double
>>>
data
;
std
::
vector
<
std
::
unique_ptr
<
PerThreadData
<
double
>>>
data
;
...
...
paddle/platform/variant.h
浏览文件 @
d5cab4f0
...
@@ -14,6 +14,19 @@
...
@@ -14,6 +14,19 @@
#pragma once
#pragma once
#ifdef __CUDACC__
#ifdef __CUDACC_VER_MAJOR__
// CUDA 9 define `__CUDACC_VER__` as a warning message, manually define
// __CUDACC_VER__ instead.
#undef __CUDACC_VER__
#define __CUDACC_VER__ \
(__CUDACC_VER_MAJOR__ * 10000 + __CUDACC_VER_MINOR__ * 100 + \
__CUDACC_VER_BUILD__)
#endif
#endif
#include <boost/config.hpp>
#include <boost/config.hpp>
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
...
...
python/.gitignore
浏览文件 @
d5cab4f0
...
@@ -2,6 +2,7 @@
...
@@ -2,6 +2,7 @@
build
build
dist
dist
paddle.egg-info
paddle.egg-info
paddlepaddle_gpu.egg-info
.idea
.idea
paddle/proto/*.py
paddle/proto/*.py
paddle/proto/*.pyc
paddle/proto/*.pyc
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录