Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
a710ccc0
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
a710ccc0
编写于
11月 06, 2019
作者:
Z
Zeng Jinle
提交者:
GitHub
11月 06, 2019
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
refine error message of allocator again, test=develop (#21023)
上级
d89ca2ff
变更
5
显示空白变更内容
内联
并排
Showing
5 changed file
with
41 addition
and
27 deletion
+41
-27
paddle/fluid/memory/allocation/cuda_allocator.cc
paddle/fluid/memory/allocation/cuda_allocator.cc
+20
-7
paddle/fluid/memory/allocation/retry_allocator_test.cc
paddle/fluid/memory/allocation/retry_allocator_test.cc
+1
-3
paddle/fluid/memory/detail/system_allocator.cc
paddle/fluid/memory/detail/system_allocator.cc
+2
-17
paddle/fluid/platform/gpu_info.cc
paddle/fluid/platform/gpu_info.cc
+15
-0
paddle/fluid/platform/gpu_info.h
paddle/fluid/platform/gpu_info.h
+3
-0
未找到文件。
paddle/fluid/memory/allocation/cuda_allocator.cc
浏览文件 @
a710ccc0
...
...
@@ -35,14 +35,27 @@ void CUDAAllocator::FreeImpl(Allocation* allocation) {
Allocation
*
CUDAAllocator
::
AllocateImpl
(
size_t
size
)
{
platform
::
CUDADeviceGuard
guard
(
place_
.
device
);
void
*
ptr
;
auto
status
=
cudaMalloc
(
&
ptr
,
size
);
if
(
UNLIKELY
(
status
!=
cudaSuccess
))
{
PADDLE_ENFORCE_NE
(
cudaGetLastError
(),
cudaSuccess
);
PADDLE_THROW_BAD_ALLOC
(
"Cannot allocate %d on GPU %d, cuda status %d, %s"
,
size
,
place_
.
device
,
status
,
cudaGetErrorString
(
status
));
}
auto
result
=
cudaMalloc
(
&
ptr
,
size
);
if
(
LIKELY
(
result
==
cudaSuccess
))
{
return
new
Allocation
(
ptr
,
size
,
platform
::
Place
(
place_
));
}
platform
::
RaiseNonOutOfMemoryError
(
&
result
);
size_t
avail
=
0
,
total
=
0
;
result
=
cudaMemGetInfo
(
&
avail
,
&
total
);
if
(
result
!=
cudaSuccess
)
avail
=
0
;
platform
::
RaiseNonOutOfMemoryError
(
&
result
);
PADDLE_THROW_BAD_ALLOC
(
"
\n\n
Out of memory error on GPU %d. "
"Cannot allocate %s memory on GPU %d, "
"available memory is only %s.
\n\n
"
"Please check whether there is any other process using GPU %d.
\n
"
"1. If yes, please stop them, or start PaddlePaddle on another GPU.
\n
"
"2. If no, please decrease the batch size of your model.
\n
"
,
place_
.
device
,
string
::
HumanReadableSize
(
size
),
place_
.
device
,
string
::
HumanReadableSize
(
avail
),
place_
.
device
);
}
}
// namespace allocation
...
...
paddle/fluid/memory/allocation/retry_allocator_test.cc
浏览文件 @
a710ccc0
...
...
@@ -133,9 +133,7 @@ TEST(RetryAllocator, RetryAllocatorLastAllocFailure) {
ASSERT_TRUE
(
false
);
allocation
.
reset
();
}
catch
(
BadAlloc
&
ex
)
{
ASSERT_TRUE
(
std
::
string
(
ex
.
what
()).
find
(
"Cannot allocate "
+
std
::
to_string
(
allocate_size
)
+
" on GPU "
+
std
::
to_string
(
p
.
device
))
!=
ASSERT_TRUE
(
std
::
string
(
ex
.
what
()).
find
(
"Cannot allocate"
)
!=
std
::
string
::
npos
);
}
}
...
...
paddle/fluid/memory/detail/system_allocator.cc
浏览文件 @
a710ccc0
...
...
@@ -105,21 +105,6 @@ bool CPUAllocator::UseGpu() const { return false; }
#ifdef PADDLE_WITH_CUDA
static
void
ClearCUDAOutOfMemoryError
(
cudaError_t
*
status
)
{
if
(
*
status
==
cudaErrorMemoryAllocation
)
{
*
status
=
cudaSuccess
;
}
PADDLE_ENFORCE_CUDA_SUCCESS
(
*
status
);
*
status
=
cudaGetLastError
();
if
(
*
status
==
cudaErrorMemoryAllocation
)
{
*
status
=
cudaSuccess
;
}
PADDLE_ENFORCE_CUDA_SUCCESS
(
*
status
);
}
void
*
GPUAllocator
::
Alloc
(
size_t
*
index
,
size_t
size
)
{
// CUDA documentation doesn't explain if cudaMalloc returns nullptr
// if size is 0. We just make sure it does.
...
...
@@ -135,7 +120,7 @@ void* GPUAllocator::Alloc(size_t* index, size_t size) {
gpu_alloc_size_
+=
size
;
return
p
;
}
else
{
ClearCUDA
OutOfMemoryError
(
&
result
);
platform
::
RaiseNon
OutOfMemoryError
(
&
result
);
/**
* NOTE(zjl): Sometimes cudaMemGetInfo would raise OOM error
...
...
@@ -147,7 +132,7 @@ void* GPUAllocator::Alloc(size_t* index, size_t size) {
size_t
avail
=
0
,
total
=
0
;
result
=
cudaMemGetInfo
(
&
avail
,
&
total
);
if
(
result
!=
cudaSuccess
)
avail
=
0
;
ClearCUDA
OutOfMemoryError
(
&
result
);
platform
::
RaiseNon
OutOfMemoryError
(
&
result
);
PADDLE_THROW_BAD_ALLOC
(
"
\n\n
Out of memory error on GPU %d. "
...
...
paddle/fluid/platform/gpu_info.cc
浏览文件 @
a710ccc0
...
...
@@ -302,5 +302,20 @@ void GpuMemsetAsync(void *dst, int value, size_t count, cudaStream_t stream) {
"error code : %d, %s"
,
error_code
,
CudaErrorWebsite
());
}
void
RaiseNonOutOfMemoryError
(
cudaError_t
*
status
)
{
if
(
*
status
==
cudaErrorMemoryAllocation
)
{
*
status
=
cudaSuccess
;
}
PADDLE_ENFORCE_CUDA_SUCCESS
(
*
status
);
*
status
=
cudaGetLastError
();
if
(
*
status
==
cudaErrorMemoryAllocation
)
{
*
status
=
cudaSuccess
;
}
PADDLE_ENFORCE_CUDA_SUCCESS
(
*
status
);
}
}
// namespace platform
}
// namespace paddle
paddle/fluid/platform/gpu_info.h
浏览文件 @
a710ccc0
...
...
@@ -95,6 +95,9 @@ void GpuMemcpyPeerSync(void *dst, int dst_device, const void *src,
//! Set memory dst with value count size asynchronously
void
GpuMemsetAsync
(
void
*
dst
,
int
value
,
size_t
count
,
cudaStream_t
stream
);
//! Raise error if status is not cudaSuccess or OOM, otherwise reset status.
void
RaiseNonOutOfMemoryError
(
cudaError_t
*
status
);
}
// namespace platform
}
// namespace paddle
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录