Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
16146088
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
16146088
编写于
8月 17, 2021
作者:
K
Kaipeng Deng
提交者:
GitHub
8月 17, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix drop_last not work on IterableDataset (#34801)
* fix drop_last not work in IterableDataset. test=develop
上级
181f7cec
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
36 addition
and
8 deletion
+36
-8
python/paddle/fluid/dataloader/dataloader_iter.py
python/paddle/fluid/dataloader/dataloader_iter.py
+4
-3
python/paddle/fluid/dataloader/worker.py
python/paddle/fluid/dataloader/worker.py
+4
-3
python/paddle/fluid/reader.py
python/paddle/fluid/reader.py
+1
-0
python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_dataset.py
...d/tests/unittests/test_multiprocess_dataloader_dataset.py
+25
-0
python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_exception.py
...tests/unittests/test_multiprocess_dataloader_exception.py
+2
-2
未找到文件。
python/paddle/fluid/dataloader/dataloader_iter.py
浏览文件 @
16146088
...
...
@@ -59,6 +59,7 @@ class _DataLoaderIterBase(object):
self
.
_places
=
loader
.
places
self
.
_return_list
=
loader
.
return_list
self
.
_batch_sampler
=
loader
.
batch_sampler
self
.
_drop_last
=
loader
.
drop_last
self
.
_auto_collate_batch
=
loader
.
auto_collate_batch
self
.
_num_workers
=
loader
.
num_workers
self
.
_use_buffer_reader
=
loader
.
use_buffer_reader
...
...
@@ -111,7 +112,7 @@ class _DataLoaderIterSingleProcess(_DataLoaderIterBase):
self
.
_dataset_fetcher
=
_DatasetKind
.
create_fetcher
(
self
.
_dataset_kind
,
self
.
_dataset
,
self
.
_auto_collate_batch
,
self
.
_collate_fn
,
True
)
self
.
_collate_fn
,
self
.
_drop_last
)
# NOTE: _structrue_infos used to record the data structure of
# batch to restore batch structure after reading Tensor
...
...
@@ -309,8 +310,8 @@ class _DataLoaderIterMultiProcess(_DataLoaderIterBase):
args
=
(
self
.
_dataset
,
self
.
_dataset_kind
,
indices_queue
,
self
.
_data_queue
,
self
.
_workers_done_event
,
self
.
_auto_collate_batch
,
self
.
_collate_fn
,
self
.
_
worker_init_fn
,
i
,
self
.
_num_workers
,
self
.
_use_shared_memory
))
self
.
_
drop_last
,
self
.
_worker_init_fn
,
i
,
self
.
_
num_workers
,
self
.
_
use_shared_memory
))
worker
.
daemon
=
True
worker
.
start
()
self
.
_workers
.
append
(
worker
)
...
...
python/paddle/fluid/dataloader/worker.py
浏览文件 @
16146088
...
...
@@ -253,7 +253,7 @@ def _generate_states(base_seed=0, worker_id=0):
def
_worker_loop
(
dataset
,
dataset_kind
,
indices_queue
,
out_queue
,
done_event
,
auto_collate_batch
,
collate_fn
,
init_fn
,
worker_id
,
auto_collate_batch
,
collate_fn
,
drop_last
,
init_fn
,
worker_id
,
num_workers
,
use_shared_memory
):
try
:
# NOTE: [ mmap files clear ] When the child process exits unexpectedly,
...
...
@@ -282,8 +282,9 @@ def _worker_loop(dataset, dataset_kind, indices_queue, out_queue, done_event,
try
:
if
init_fn
is
not
None
:
init_fn
(
worker_id
)
fetcher
=
_DatasetKind
.
create_fetcher
(
dataset_kind
,
dataset
,
auto_collate_batch
,
collate_fn
,
True
)
fetcher
=
_DatasetKind
.
create_fetcher
(
dataset_kind
,
dataset
,
auto_collate_batch
,
collate_fn
,
drop_last
)
except
:
init_exception
=
_WorkerException
(
worker_id
)
...
...
python/paddle/fluid/reader.py
浏览文件 @
16146088
...
...
@@ -401,6 +401,7 @@ class DataLoader(object):
shuffle
=
shuffle
,
drop_last
=
drop_last
)
self
.
drop_last
=
drop_last
self
.
auto_collate_batch
=
self
.
batch_sampler
is
not
None
self
.
pin_memory
=
False
...
...
python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_dataset.py
浏览文件 @
16146088
...
...
@@ -397,5 +397,30 @@ class TestDataLoaderGenerateStates(unittest.TestCase):
assert
out
==
outp
class
TestDatasetWithDropLast
(
unittest
.
TestCase
):
def
run_main
(
self
,
dataset
,
num_samples
,
batch_size
):
for
num_workers
in
[
0
,
1
]:
for
drop_last
in
[
True
,
False
]:
steps
=
(
num_samples
+
(
1
-
int
(
drop_last
))
*
\
(
batch_size
-
1
))
//
batch_size
dataloader
=
DataLoader
(
dataset
,
batch_size
=
batch_size
,
drop_last
=
drop_last
,
num_workers
=
num_workers
)
datas
=
[]
for
data
in
dataloader
:
datas
.
append
(
data
)
assert
len
(
datas
)
==
steps
def
test_map_dataset
(
self
):
dataset
=
RandomDataset
(
10
)
self
.
run_main
(
dataset
,
10
,
3
)
def
test_iterable_dataset
(
self
):
dataset
=
RandomIterableDataset
(
10
)
self
.
run_main
(
dataset
,
10
,
3
)
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_exception.py
浏览文件 @
16146088
...
...
@@ -180,7 +180,7 @@ class TestDataLoaderWorkerLoop(unittest.TestCase):
indices_queue
.
put
(
None
)
_worker_loop
(
loader
.
_dataset
,
0
,
indices_queue
,
loader
.
_data_queue
,
loader
.
_workers_done_event
,
True
,
_collate_fn
,
_init_fn
,
0
,
1
,
True
,
_collate_fn
,
True
,
_init_fn
,
0
,
1
,
loader
.
_use_shared_memory
)
self
.
assertTrue
(
False
)
except
AssertionError
:
...
...
@@ -224,7 +224,7 @@ class TestDataLoaderWorkerLoop(unittest.TestCase):
loader
.
_workers_done_event
.
set
()
_worker_loop
(
loader
.
_dataset
,
0
,
indices_queue
,
loader
.
_data_queue
,
loader
.
_workers_done_event
,
True
,
_collate_fn
,
_init_fn
,
0
,
1
,
True
,
_collate_fn
,
True
,
_init_fn
,
0
,
1
,
loader
.
_use_shared_memory
)
self
.
assertTrue
(
True
)
except
AssertionError
:
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录