Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
08773b60
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
08773b60
编写于
4月 29, 2019
作者:
Z
Zeng Jinle
提交者:
GitHub
4月 29, 2019
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix reader default stream,test=develop (#17106)
上级
aa5307ce
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
24 addition
and
16 deletion
+24
-16
paddle/fluid/memory/allocation/retry_allocator_test.cc
paddle/fluid/memory/allocation/retry_allocator_test.cc
+1
-1
paddle/fluid/operators/reader/buffered_reader.cc
paddle/fluid/operators/reader/buffered_reader.cc
+23
-15
未找到文件。
paddle/fluid/memory/allocation/retry_allocator_test.cc
浏览文件 @
08773b60
...
...
@@ -39,7 +39,7 @@ TEST(RetryAllocator, RetryAllocator) {
std
::
unique_ptr
<
LockedAllocator
>
locked_allocator
(
new
LockedAllocator
(
std
::
move
(
best_fit_allocator
)));
size_t
thread_num
=
32
;
size_t
thread_num
=
8
;
size_t
sleep_time
=
40
;
size_t
extra_time
=
10
;
...
...
paddle/fluid/operators/reader/buffered_reader.cc
浏览文件 @
08773b60
...
...
@@ -74,12 +74,6 @@ void BufferedReader::ReadTillBufferFullAsync() {
}
void
BufferedReader
::
ReadAsync
(
size_t
i
)
{
#ifdef PADDLE_WITH_CUDA
if
(
platform
::
is_gpu_place
(
place_
))
{
platform
::
SetDeviceId
(
boost
::
get
<
platform
::
CUDAPlace
>
(
place_
).
device
);
PADDLE_ENFORCE
(
cudaEventRecord
(
events_
[
i
],
compute_stream_
));
}
#endif
position_
.
emplace
(
thread_pool_
.
enqueue
([
this
,
i
]()
->
size_t
{
TensorVec
&
cpu
=
cpu_buffer_
[
i
];
reader_
->
ReadNext
(
&
cpu
);
...
...
@@ -94,17 +88,34 @@ void BufferedReader::ReadAsync(size_t i) {
// issues the copying command to the default stream, it will make two
// commands from different streams cannot run concurrently.
if
(
platform
::
is_gpu_place
(
place_
))
{
platform
::
SetDeviceId
(
boost
::
get
<
platform
::
CUDAPlace
>
(
place_
).
device
);
PADDLE_ENFORCE
(
cudaStreamWaitEvent
(
stream_
,
events_
[
i
],
0
));
TensorVec
&
gpu
=
gpu_buffer_
[
i
];
gpu
.
resize
(
cpu
.
size
());
platform
::
RecordEvent
record_event
(
"BufferedReader:MemoryCopy"
);
if
(
gpu
.
empty
())
{
gpu
.
resize
(
cpu
.
size
());
}
else
{
PADDLE_ENFORCE_EQ
(
gpu
.
size
(),
cpu
.
size
(),
"Input tensor number not matched"
);
}
std
::
vector
<
void
*>
gpu_ptrs
;
gpu_ptrs
.
reserve
(
cpu
.
size
());
for
(
size_t
i
=
0
;
i
<
cpu
.
size
();
++
i
)
{
gpu
[
i
].
Resize
(
cpu
[
i
].
dims
());
gpu
[
i
].
set_layout
(
cpu
[
i
].
layout
());
gpu_ptrs
.
emplace_back
(
gpu
[
i
].
mutable_data
(
place_
,
cpu
[
i
].
type
()));
}
// NOTE(zjl): cudaStreamWaitEvent() must be called after all
// gpu[i].mutable_data() is called, since some ops release
// gpu memory immediately without waiting gpu kernel ends
platform
::
SetDeviceId
(
boost
::
get
<
platform
::
CUDAPlace
>
(
place_
).
device
);
PADDLE_ENFORCE
(
cudaEventRecord
(
events_
[
i
],
compute_stream_
));
PADDLE_ENFORCE
(
cudaStreamWaitEvent
(
stream_
,
events_
[
i
],
0
));
platform
::
RecordEvent
record_event
(
"BufferedReader:MemoryCopy"
);
for
(
size_t
i
=
0
;
i
<
cpu
.
size
();
++
i
)
{
auto
cpu_place
=
cpu
[
i
].
place
();
auto
cpu_ptr
=
cpu
[
i
].
data
<
void
>
();
auto
gpu_ptr
=
gpu
[
i
].
mutable_data
(
place_
,
cpu
[
i
].
type
())
;
auto
gpu_ptr
=
gpu
_ptrs
[
i
]
;
auto
size
=
cpu
[
i
].
numel
()
*
paddle
::
framework
::
SizeOfType
(
cpu
[
i
].
type
());
if
(
platform
::
is_cuda_pinned_place
(
cpu_place
))
{
...
...
@@ -116,12 +127,9 @@ void BufferedReader::ReadAsync(size_t i) {
boost
::
get
<
platform
::
CUDAPlace
>
(
cpu_place
),
cpu_ptr
,
size
,
stream_
);
}
else
{
// if cpu place is not pinned, async copy is slower than sync copy,
// so we use sync copy instead.
// TODO(zcd): The default stream should not be used here.
memory
::
Copy
(
boost
::
get
<
platform
::
CUDAPlace
>
(
place_
),
gpu_ptr
,
boost
::
get
<
platform
::
CPUPlace
>
(
cpu_place
),
cpu_ptr
,
size
,
0
);
stream_
);
}
gpu
[
i
].
set_lod
(
cpu
[
i
].
lod
());
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录