Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
a900015c
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
a900015c
编写于
1月 12, 2019
作者:
D
Dun Liang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add async copy and pinned place
上级
adc96e06
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
61 addition
and
4 deletion
+61
-4
paddle/fluid/operators/reader/buffered_reader.cc
paddle/fluid/operators/reader/buffered_reader.cc
+35
-1
paddle/fluid/operators/reader/buffered_reader.h
paddle/fluid/operators/reader/buffered_reader.h
+6
-0
python/paddle/fluid/layers/io.py
python/paddle/fluid/layers/io.py
+20
-3
未找到文件。
paddle/fluid/operators/reader/buffered_reader.cc
浏览文件 @
a900015c
...
...
@@ -14,6 +14,7 @@
#include "paddle/fluid/operators/reader/buffered_reader.h"
#include <vector>
#include "paddle/fluid/framework/data_type.h"
namespace
paddle
{
namespace
operators
{
...
...
@@ -24,6 +25,12 @@ BufferedReader::~BufferedReader() {
position_
.
front
().
wait
();
position_
.
pop
();
}
#ifdef PADDLE_WITH_CUDA
if
(
platform
::
is_gpu_place
(
place_
))
{
platform
::
SetDeviceId
(
boost
::
get
<
platform
::
CUDAPlace
>
(
place_
).
device
);
PADDLE_ENFORCE
(
cudaStreamDestroy
(
stream
));
}
#endif
}
BufferedReader
::
BufferedReader
(
...
...
@@ -33,6 +40,12 @@ BufferedReader::BufferedReader(
thread_pool_
(
1
),
place_
(
place
),
buffer_size_
(
buffer_size
)
{
#ifdef PADDLE_WITH_CUDA
if
(
platform
::
is_gpu_place
(
place_
))
{
platform
::
SetDeviceId
(
boost
::
get
<
platform
::
CUDAPlace
>
(
place_
).
device
);
PADDLE_ENFORCE
(
cudaStreamCreate
(
&
stream
));
}
#endif
cpu_buffer_
.
resize
(
buffer_size
);
gpu_buffer_
.
resize
(
buffer_size
);
ReadTillBufferFullAsync
();
...
...
@@ -54,14 +67,35 @@ void BufferedReader::ReadAsync(size_t i) {
return
-
1UL
;
}
#ifdef PADDLE_WITH_CUDA
// NOTE(liangdun): using async copy instead of TensorCopySync
// TensorCopySync would block other stream
if
(
platform
::
is_gpu_place
(
place_
))
{
TensorVec
&
gpu
=
gpu_buffer_
[
i
];
gpu
.
resize
(
cpu
.
size
());
for
(
size_t
i
=
0
;
i
<
cpu
.
size
();
++
i
)
{
framework
::
TensorCopySync
(
cpu
[
i
],
place_
,
&
gpu
[
i
]);
gpu
[
i
].
Resize
(
cpu
[
i
].
dims
());
gpu
[
i
].
set_layout
(
cpu
[
i
].
layout
());
auto
cpu_place
=
cpu
[
i
].
place
();
auto
cpu_ptr
=
cpu
[
i
].
data
<
void
>
();
auto
gpu_ptr
=
gpu
[
i
].
mutable_data
(
place_
,
cpu
[
i
].
type
());
auto
size
=
cpu
[
i
].
numel
()
*
paddle
::
framework
::
SizeOfType
(
cpu
[
i
].
type
());
if
(
platform
::
is_cuda_pinned_place
(
cpu_place
))
memory
::
Copy
(
boost
::
get
<
platform
::
CUDAPlace
>
(
place_
),
gpu_ptr
,
boost
::
get
<
platform
::
CUDAPinnedPlace
>
(
cpu_place
),
cpu_ptr
,
size
,
stream
);
else
// if cpu place is not pinned, async copy is slower than sync copy,
// so we use sync copy instead.
memory
::
Copy
(
boost
::
get
<
platform
::
CUDAPlace
>
(
place_
),
gpu_ptr
,
boost
::
get
<
platform
::
CPUPlace
>
(
cpu_place
),
cpu_ptr
,
size
,
0
);
gpu
[
i
].
set_lod
(
cpu
[
i
].
lod
());
}
PADDLE_ENFORCE
(
cudaStreamSynchronize
(
stream
));
}
#endif
return
i
;
}));
}
...
...
paddle/fluid/operators/reader/buffered_reader.h
浏览文件 @
a900015c
...
...
@@ -19,6 +19,9 @@
#include <vector>
#include "ThreadPool.h"
#include "paddle/fluid/framework/reader.h"
#ifdef PADDLE_WITH_CUDA
#include "paddle/fluid/platform/gpu_info.h"
#endif
namespace
paddle
{
namespace
operators
{
...
...
@@ -59,6 +62,9 @@ class BufferedReader : public framework::DecoratedReader {
std
::
vector
<
TensorVec
>
cpu_buffer_
;
std
::
vector
<
TensorVec
>
gpu_buffer_
;
size_t
prev_pos_
{
-
1UL
};
#ifdef PADDLE_WITH_CUDA
cudaStream_t
stream
;
#endif
};
}
// namespace reader
...
...
python/paddle/fluid/layers/io.py
浏览文件 @
a900015c
...
...
@@ -483,6 +483,7 @@ def _py_reader(capacity,
lod_levels
=
None
,
name
=
None
,
use_double_buffer
=
True
,
use_cuda_pinned_place
=
False
,
feed_list
=
None
):
if
feed_list
is
not
None
:
...
...
@@ -565,7 +566,10 @@ def _py_reader(capacity,
for
item
in
tensors
:
if
not
isinstance
(
item
,
core
.
LoDTensor
):
tmp
=
core
.
LoDTensor
()
tmp
.
set
(
item
,
core
.
CPUPlace
())
if
use_cuda_pinned_place
:
tmp
.
set
(
item
,
core
.
CUDAPinnedPlace
())
else
:
tmp
.
set
(
item
,
core
.
CPUPlace
())
item
=
tmp
array
.
append
(
item
)
...
...
@@ -635,7 +639,8 @@ def py_reader(capacity,
dtypes
,
lod_levels
=
None
,
name
=
None
,
use_double_buffer
=
True
):
use_double_buffer
=
True
,
use_cuda_pinned_place
=
None
):
"""
Create a Python reader for data feeding in Python
...
...
@@ -659,6 +664,9 @@ def py_reader(capacity,
name(basestring): The prefix Python queue name and Reader name. None will
be generated automatically.
use_double_buffer(bool): Whether use double buffer or not.
use_cuda_pinned_place(bool): Whether use cuda pinned place or not,
this option only works with double buffer and cuda enabled.
None will be enabled when double buffer and cuda are enabled.
Returns:
Variable: A Reader from which we can get feeding data.
...
...
@@ -754,13 +762,22 @@ def py_reader(capacity,
>>> except fluid.core.EOFException:
>>> test_reader.reset()
"""
if
use_double_buffer
and
core
.
is_compiled_with_cuda
():
if
use_cuda_pinned_place
==
None
:
use_cuda_pinned_place
=
True
else
:
if
use_cuda_pinned_place
:
raise
RuntimeError
(
"use_cuda_pinned_place can only be used with double buffer and cuda enabled."
)
return
_py_reader
(
capacity
=
capacity
,
shapes
=
shapes
,
dtypes
=
dtypes
,
lod_levels
=
lod_levels
,
name
=
name
,
use_double_buffer
=
use_double_buffer
)
use_double_buffer
=
use_double_buffer
,
use_cuda_pinned_place
=
use_cuda_pinned_place
)
def
create_py_reader_by_data
(
capacity
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录