Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
fb70682f
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2298
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
未验证
提交
fb70682f
编写于
7月 03, 2020
作者:
G
GaoWei8
提交者:
GitHub
7月 03, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix PADDLE_ENFORCE (#25297)
* fix PADDLE_ENFORCE and refine the description test=develop
上级
dc17ac91
变更
8
显示空白变更内容
内联
并排
Showing
8 changed file
with
69 addition
and
29 deletion
+69
-29
paddle/fluid/platform/collective_helper.cc
paddle/fluid/platform/collective_helper.cc
+29
-7
paddle/fluid/platform/collective_helper.h
paddle/fluid/platform/collective_helper.h
+14
-10
paddle/fluid/platform/device_context.h
paddle/fluid/platform/device_context.h
+3
-1
paddle/fluid/platform/device_memory_aligment.cc
paddle/fluid/platform/device_memory_aligment.cc
+2
-1
paddle/fluid/platform/device_tracer.cc
paddle/fluid/platform/device_tracer.cc
+4
-2
paddle/fluid/platform/dynload/cudnn.cc
paddle/fluid/platform/dynload/cudnn.cc
+5
-3
paddle/fluid/platform/stream_callback_manager.cc
paddle/fluid/platform/stream_callback_manager.cc
+6
-3
paddle/fluid/platform/transform.h
paddle/fluid/platform/transform.h
+6
-2
未找到文件。
paddle/fluid/platform/collective_helper.cc
浏览文件 @
fb70682f
...
@@ -57,11 +57,25 @@ class NCCLCommImpl : public NCCLComm {
...
@@ -57,11 +57,25 @@ class NCCLCommImpl : public NCCLComm {
NCCLComm
*
NCCLCommContext
::
CreateNCCLComm
(
ncclUniqueId
*
nccl_id
,
int
nranks
,
NCCLComm
*
NCCLCommContext
::
CreateNCCLComm
(
ncclUniqueId
*
nccl_id
,
int
nranks
,
int
rank
,
int
dev_id
,
int
ring_id
)
{
int
rank
,
int
dev_id
,
int
ring_id
)
{
PADDLE_ENFORCE_NOT_NULL
(
nccl_id
);
PADDLE_ENFORCE_NOT_NULL
(
nccl_id
,
PADDLE_ENFORCE_GT
(
nranks
,
1
);
platform
::
errors
::
InvalidArgument
(
PADDLE_ENFORCE_GE
(
rank
,
0
);
"The nccl unique id should not be null."
));
PADDLE_ENFORCE_LT
(
rank
,
nranks
);
PADDLE_ENFORCE_GT
(
PADDLE_ENFORCE_GE
(
dev_id
,
0
);
nranks
,
1
,
platform
::
errors
::
InvalidArgument
(
"Expected nranks > 1. But received nranks is %d."
,
nranks
));
PADDLE_ENFORCE_GE
(
rank
,
0
,
platform
::
errors
::
InvalidArgument
(
"Expected rank >= 0. But received rank is %d."
,
rank
));
PADDLE_ENFORCE_LT
(
rank
,
nranks
,
platform
::
errors
::
InvalidArgument
(
"Expected rank < nranks. But received rank is %d, nranks is %d."
,
rank
,
nranks
));
PADDLE_ENFORCE_GE
(
dev_id
,
0
,
platform
::
errors
::
InvalidArgument
(
"Expected dev_id >= 0. But received dev_id is %d."
,
dev_id
));
ncclComm_t
comm
=
nullptr
;
ncclComm_t
comm
=
nullptr
;
PADDLE_ENFORCE_CUDA_SUCCESS
(
cudaSetDevice
(
dev_id
));
PADDLE_ENFORCE_CUDA_SUCCESS
(
cudaSetDevice
(
dev_id
));
...
@@ -82,14 +96,22 @@ NCCLComm* NCCLCommContext::CreateNCCLComm(ncclUniqueId* nccl_id, int nranks,
...
@@ -82,14 +96,22 @@ NCCLComm* NCCLCommContext::CreateNCCLComm(ncclUniqueId* nccl_id, int nranks,
void
NCCLCommContext
::
CreateAllNCCLComms
(
const
std
::
vector
<
int
>&
dev_ids
,
void
NCCLCommContext
::
CreateAllNCCLComms
(
const
std
::
vector
<
int
>&
dev_ids
,
int
ring_id
)
{
int
ring_id
)
{
PADDLE_ENFORCE_GT
(
dev_ids
.
size
(),
0
);
PADDLE_ENFORCE_GT
(
dev_ids
.
size
(),
0
,
platform
::
errors
::
InvalidArgument
(
"Expected the size of dev_ids > 0. But "
"received the size of dev_ids is %d."
,
dev_ids
.
size
()));
const
int
kDevices
=
dev_ids
.
size
();
const
int
kDevices
=
dev_ids
.
size
();
ncclComm_t
comms
[
kDevices
];
ncclComm_t
comms
[
kDevices
];
PADDLE_ENFORCE_CUDA_SUCCESS
(
platform
::
dynload
::
ncclCommInitAll
(
PADDLE_ENFORCE_CUDA_SUCCESS
(
platform
::
dynload
::
ncclCommInitAll
(
comms
,
dev_ids
.
size
(),
dev_ids
.
data
()));
comms
,
dev_ids
.
size
(),
dev_ids
.
data
()));
PADDLE_ENFORCE_EQ
(
comm_map_
.
count
(
ring_id
),
0
);
PADDLE_ENFORCE_EQ
(
comm_map_
.
count
(
ring_id
),
0
,
platform
::
errors
::
InvalidArgument
(
"Expected comm_map_.count(ring_id) = 0. But received "
"comm_map_.count(ring_id) is %d."
,
comm_map_
.
count
(
ring_id
)));
for
(
size_t
i
=
0
;
i
<
dev_ids
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
dev_ids
.
size
();
++
i
)
{
AssignNCCLComm
(
comms
[
i
],
dev_ids
.
size
(),
i
,
dev_ids
[
i
],
ring_id
);
AssignNCCLComm
(
comms
[
i
],
dev_ids
.
size
(),
i
,
dev_ids
[
i
],
ring_id
);
VLOG
(
1
)
<<
"nccl communicator of rank "
<<
i
<<
" in ring "
<<
ring_id
VLOG
(
1
)
<<
"nccl communicator of rank "
<<
i
<<
" in ring "
<<
ring_id
...
...
paddle/fluid/platform/collective_helper.h
浏览文件 @
fb70682f
...
@@ -78,24 +78,28 @@ class NCCLCommContext {
...
@@ -78,24 +78,28 @@ class NCCLCommContext {
// retrieve a communicator by the ring id in multiprocessing mode
// retrieve a communicator by the ring id in multiprocessing mode
NCCLComm
*
Get
(
int
ring_id
)
const
{
NCCLComm
*
Get
(
int
ring_id
)
const
{
PADDLE_ENFORCE_GT
(
comm_map_
.
count
(
ring_id
),
0
,
PADDLE_ENFORCE_GT
(
"comunicator in ring id %d has not been initialized"
,
comm_map_
.
count
(
ring_id
),
0
,
ring_id
);
platform
::
errors
::
InvalidArgument
(
"Comunicator in ring id %d has not been initialized."
,
ring_id
));
PADDLE_ENFORCE_EQ
(
comm_map_
.
at
(
ring_id
).
size
(),
1
,
PADDLE_ENFORCE_EQ
(
comm_map_
.
at
(
ring_id
).
size
(),
1
,
"you should specify a device id to retrieve from "
platform
::
errors
::
InvalidArgument
(
"multiple communicators"
);
"One device id should be specified to retrieve from "
"multiple communicators."
));
return
comm_map_
.
at
(
ring_id
).
begin
()
->
second
.
get
();
return
comm_map_
.
at
(
ring_id
).
begin
()
->
second
.
get
();
}
}
// retrieve a communicator by the ring id and the device id
// retrieve a communicator by the ring id and the device id
NCCLComm
*
Get
(
int
ring_id
,
int
dev_id
)
const
{
NCCLComm
*
Get
(
int
ring_id
,
int
dev_id
)
const
{
PADDLE_ENFORCE_GT
(
comm_map_
.
count
(
ring_id
),
0
,
PADDLE_ENFORCE_GT
(
"comunicator of ring id %d has not been initialized"
,
comm_map_
.
count
(
ring_id
),
0
,
ring_id
);
platform
::
errors
::
InvalidArgument
(
"Comunicator of ring id %d has not been initialized."
,
ring_id
));
PADDLE_ENFORCE_GT
(
PADDLE_ENFORCE_GT
(
comm_map_
.
at
(
ring_id
).
count
(
dev_id
),
0
,
comm_map_
.
at
(
ring_id
).
count
(
dev_id
),
0
,
"comunicator at device id %d has not been initialized in ring %d"
,
platform
::
errors
::
InvalidArgument
(
dev_id
,
ring_id
);
"Comunicator at device id %d has not been initialized in ring %d."
,
dev_id
,
ring_id
));
return
comm_map_
.
at
(
ring_id
).
at
(
dev_id
).
get
();
return
comm_map_
.
at
(
ring_id
).
at
(
dev_id
).
get
();
}
}
...
...
paddle/fluid/platform/device_context.h
浏览文件 @
fb70682f
...
@@ -515,7 +515,9 @@ class DeviceContextPool {
...
@@ -515,7 +515,9 @@ class DeviceContextPool {
explicit
DeviceContextPool
(
const
std
::
vector
<
platform
::
Place
>&
places
);
explicit
DeviceContextPool
(
const
std
::
vector
<
platform
::
Place
>&
places
);
static
DeviceContextPool
&
Instance
()
{
static
DeviceContextPool
&
Instance
()
{
PADDLE_ENFORCE_NOT_NULL
(
pool
,
"Need to Create DeviceContextPool first!"
);
PADDLE_ENFORCE_NOT_NULL
(
pool
,
platform
::
errors
::
PreconditionNotMet
(
"Need to Create DeviceContextPool firstly!"
));
return
*
pool
;
return
*
pool
;
}
}
...
...
paddle/fluid/platform/device_memory_aligment.cc
浏览文件 @
fb70682f
...
@@ -24,7 +24,8 @@ size_t Alignment(size_t size, const platform::Place &place) {
...
@@ -24,7 +24,8 @@ size_t Alignment(size_t size, const platform::Place &place) {
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
alignment
=
GpuMinChunkSize
();
alignment
=
GpuMinChunkSize
();
#else
#else
PADDLE_THROW
(
"Fluid is not compiled with CUDA"
);
PADDLE_THROW
(
platform
::
errors
::
PreconditionNotMet
(
"Fluid is not compiled with CUDA."
));
#endif
#endif
}
}
size_t
remaining
=
size
%
alignment
;
size_t
remaining
=
size
%
alignment
;
...
...
paddle/fluid/platform/device_tracer.cc
浏览文件 @
fb70682f
...
@@ -177,8 +177,10 @@ void CUPTIAPI bufferCompleted(CUcontext ctx, uint32_t streamId, uint8_t *buffer,
...
@@ -177,8 +177,10 @@ void CUPTIAPI bufferCompleted(CUcontext ctx, uint32_t streamId, uint8_t *buffer,
static
std
::
thread
::
id
cupti_thread_id
(
0
);
static
std
::
thread
::
id
cupti_thread_id
(
0
);
if
(
cupti_thread_id
==
std
::
thread
::
id
(
0
))
if
(
cupti_thread_id
==
std
::
thread
::
id
(
0
))
cupti_thread_id
=
std
::
this_thread
::
get_id
();
cupti_thread_id
=
std
::
this_thread
::
get_id
();
PADDLE_ENFORCE_EQ
(
std
::
this_thread
::
get_id
(),
cupti_thread_id
,
PADDLE_ENFORCE_EQ
(
"Only one thread is allowed to call bufferCompleted()"
);
std
::
this_thread
::
get_id
(),
cupti_thread_id
,
platform
::
errors
::
PermissionDenied
(
"Only one thread is allowed to call bufferCompleted()."
));
CUptiResult
status
;
CUptiResult
status
;
CUpti_Activity
*
record
=
NULL
;
CUpti_Activity
*
record
=
NULL
;
if
(
validSize
>
0
)
{
if
(
validSize
>
0
)
{
...
...
paddle/fluid/platform/dynload/cudnn.cc
浏览文件 @
fb70682f
...
@@ -58,9 +58,11 @@ bool HasCUDNN() {
...
@@ -58,9 +58,11 @@ bool HasCUDNN() {
}
}
void
EnforceCUDNNLoaded
(
const
char
*
fn_name
)
{
void
EnforceCUDNNLoaded
(
const
char
*
fn_name
)
{
PADDLE_ENFORCE
(
cudnn_dso_handle
!=
nullptr
,
PADDLE_ENFORCE_NOT_NULL
(
"Cannot load cudnn shared library. Cannot invoke method %s"
,
cudnn_dso_handle
,
fn_name
);
platform
::
errors
::
PreconditionNotMet
(
"Cannot load cudnn shared library. Cannot invoke method %s."
,
fn_name
));
}
}
#else
#else
bool
HasCUDNN
()
{
return
true
;
}
bool
HasCUDNN
()
{
return
true
;
}
...
...
paddle/fluid/platform/stream_callback_manager.cc
浏览文件 @
fb70682f
...
@@ -13,6 +13,7 @@
...
@@ -13,6 +13,7 @@
// limitations under the License.
// limitations under the License.
#include "paddle/fluid/platform/stream_callback_manager.h"
#include "paddle/fluid/platform/stream_callback_manager.h"
#include <utility>
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/enforce.h"
namespace
paddle
{
namespace
paddle
{
...
@@ -43,14 +44,16 @@ void StreamCallbackManager::AddCallback(std::function<void()> callback) const {
...
@@ -43,14 +44,16 @@ void StreamCallbackManager::AddCallback(std::function<void()> callback) const {
});
});
});
});
#if CUDA_VERSION >= 10000
#if CUDA_VERSION >= 10000
PADDLE_ENFORCE
(
cudaLaunchHostFunc
(
stream_
,
StreamCallbackFunc
,
func
));
PADDLE_ENFORCE_CUDA_SUCCESS
(
cudaLaunchHostFunc
(
stream_
,
StreamCallbackFunc
,
func
));
#else
#else
PADDLE_ENFORCE
(
cudaStreamAddCallback
(
stream_
,
StreamCallbackFunc
,
func
,
0
));
PADDLE_ENFORCE_CUDA_SUCCESS
(
cudaStreamAddCallback
(
stream_
,
StreamCallbackFunc
,
func
,
0
));
#endif
#endif
}
}
void
StreamCallbackManager
::
Wait
()
const
{
void
StreamCallbackManager
::
Wait
()
const
{
PADDLE_ENFORCE
(
cudaStreamSynchronize
(
stream_
));
PADDLE_ENFORCE
_CUDA_SUCCESS
(
cudaStreamSynchronize
(
stream_
));
{
{
std
::
lock_guard
<
std
::
mutex
>
lock
(
mtx_
);
std
::
lock_guard
<
std
::
mutex
>
lock
(
mtx_
);
if
(
last_future_
.
valid
())
{
if
(
last_future_
.
valid
())
{
...
...
paddle/fluid/platform/transform.h
浏览文件 @
fb70682f
...
@@ -83,7 +83,9 @@ struct Transform<platform::CUDADeviceContext> {
...
@@ -83,7 +83,9 @@ struct Transform<platform::CUDADeviceContext> {
void
operator
()(
const
platform
::
CUDADeviceContext
&
context
,
InputIter
first
,
void
operator
()(
const
platform
::
CUDADeviceContext
&
context
,
InputIter
first
,
InputIter
last
,
OutputIter
result
,
UnaryOperation
op
)
{
InputIter
last
,
OutputIter
result
,
UnaryOperation
op
)
{
auto
place
=
context
.
GetPlace
();
auto
place
=
context
.
GetPlace
();
PADDLE_ENFORCE
(
is_gpu_place
(
place
),
"It must use GPU place."
);
PADDLE_ENFORCE_EQ
(
is_gpu_place
(
place
),
true
,
platform
::
errors
::
PreconditionNotMet
(
"The CUDA Transform must be used in GPU place."
));
thrust
::
transform
(
thrust
::
cuda
::
par
.
on
(
context
.
stream
()),
thrust
::
transform
(
thrust
::
cuda
::
par
.
on
(
context
.
stream
()),
details
::
CastToCUDATransformIterator
(
first
),
details
::
CastToCUDATransformIterator
(
first
),
details
::
CastToCUDATransformIterator
(
last
),
details
::
CastToCUDATransformIterator
(
last
),
...
@@ -96,7 +98,9 @@ struct Transform<platform::CUDADeviceContext> {
...
@@ -96,7 +98,9 @@ struct Transform<platform::CUDADeviceContext> {
InputIter1
last1
,
InputIter2
first2
,
OutputIter
result
,
InputIter1
last1
,
InputIter2
first2
,
OutputIter
result
,
BinaryOperation
op
)
{
BinaryOperation
op
)
{
auto
place
=
context
.
GetPlace
();
auto
place
=
context
.
GetPlace
();
PADDLE_ENFORCE
(
is_gpu_place
(
place
),
"It must use GPU place."
);
PADDLE_ENFORCE_EQ
(
is_gpu_place
(
place
),
true
,
platform
::
errors
::
PreconditionNotMet
(
"The CUDA Transform must be used in GPU place."
));
thrust
::
transform
(
thrust
::
cuda
::
par
.
on
(
context
.
stream
()),
thrust
::
transform
(
thrust
::
cuda
::
par
.
on
(
context
.
stream
()),
details
::
CastToCUDATransformIterator
(
first1
),
details
::
CastToCUDATransformIterator
(
first1
),
details
::
CastToCUDATransformIterator
(
last1
),
details
::
CastToCUDATransformIterator
(
last1
),
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录