Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
5c84eac8
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
5c84eac8
编写于
7月 07, 2020
作者:
K
Kaipeng Deng
提交者:
GitHub
7月 07, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[cherry-pick] make default_collate_fn visible (#25324)
* make default_collate_fn visible. test=develop. test=release/1.8
上级
914fd819
变更
4
显示空白变更内容
内联
并排
Showing
4 changed file
with
228 addition
and
7 deletion
+228
-7
python/paddle/fluid/dataloader/dataloader_iter.py
python/paddle/fluid/dataloader/dataloader_iter.py
+22
-2
python/paddle/fluid/reader.py
python/paddle/fluid/reader.py
+2
-2
python/paddle/fluid/tests/unittests/CMakeLists.txt
python/paddle/fluid/tests/unittests/CMakeLists.txt
+5
-3
python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_exception.py
...tests/unittests/test_multiprocess_dataloader_exception.py
+199
-0
未找到文件。
python/paddle/fluid/dataloader/dataloader_iter.py
浏览文件 @
5c84eac8
...
@@ -38,7 +38,27 @@ from ..multiprocess_utils import CleanupFuncRegistrar, _cleanup_mmap, _set_SIGCH
...
@@ -38,7 +38,27 @@ from ..multiprocess_utils import CleanupFuncRegistrar, _cleanup_mmap, _set_SIGCH
MP_INDICES_CHECK_INTERVAL
=
5
MP_INDICES_CHECK_INTERVAL
=
5
def
_default_collate_fn
(
batch
):
def
default_collate_fn
(
batch
):
"""
Default batch collating function for :code:`fluid.io.DataLoader`,
batch should be a list of samples, and each sample should be a list
of fields as follows:
[[field1, field2, ...], [field1, field2, ...], ...]
This default collate function zipped each field together and stack
each field as the batch field as follows:
[batch_field1, batch_field2, ...]
Args:
batch(list of list of numpy array): the batch data, each fields
should be a numpy array, each sample should be a list of
fields, and batch should be a list of sample.
Returns:
a list of numpy array: collated batch
"""
sample
=
batch
[
0
]
sample
=
batch
[
0
]
# dataset has only 1 field
# dataset has only 1 field
if
isinstance
(
sample
,
np
.
ndarray
):
if
isinstance
(
sample
,
np
.
ndarray
):
...
@@ -82,7 +102,7 @@ class _DataLoaderIterBase(object):
...
@@ -82,7 +102,7 @@ class _DataLoaderIterBase(object):
self
.
_return_list
=
loader
.
return_list
self
.
_return_list
=
loader
.
return_list
self
.
_batch_sampler
=
loader
.
batch_sampler
self
.
_batch_sampler
=
loader
.
batch_sampler
self
.
_sampler_iter
=
iter
(
loader
.
batch_sampler
)
self
.
_sampler_iter
=
iter
(
loader
.
batch_sampler
)
self
.
_collate_fn
=
loader
.
collate_fn
or
_
default_collate_fn
self
.
_collate_fn
=
loader
.
collate_fn
or
default_collate_fn
self
.
_num_workers
=
loader
.
num_workers
self
.
_num_workers
=
loader
.
num_workers
self
.
_use_buffer_reader
=
loader
.
use_buffer_reader
self
.
_use_buffer_reader
=
loader
.
use_buffer_reader
self
.
_use_shared_memory
=
loader
.
use_shared_memory
self
.
_use_shared_memory
=
loader
.
use_shared_memory
...
...
python/paddle/fluid/reader.py
浏览文件 @
5c84eac8
...
@@ -23,7 +23,7 @@ from .executor import global_scope
...
@@ -23,7 +23,7 @@ from .executor import global_scope
from
.data_feeder
import
DataFeeder
,
BatchedTensorProvider
from
.data_feeder
import
DataFeeder
,
BatchedTensorProvider
from
.multiprocess_utils
import
multiprocess_queue_set
,
CleanupFuncRegistrar
,
_cleanup_mmap
,
_cleanup
,
_set_SIGCHLD_handler
from
.multiprocess_utils
import
multiprocess_queue_set
,
CleanupFuncRegistrar
,
_cleanup_mmap
,
_cleanup
,
_set_SIGCHLD_handler
from
.dataloader
import
BatchSampler
,
Dataset
from
.dataloader
import
BatchSampler
,
Dataset
from
.dataloader.dataloader_iter
import
_DataLoaderIterSingleProcess
,
_DataLoaderIterMultiProcess
from
.dataloader.dataloader_iter
import
_DataLoaderIterSingleProcess
,
_DataLoaderIterMultiProcess
,
default_collate_fn
from
.layers.io
import
monkey_patch_reader_methods
,
_copy_reader_var_
,
double_buffer
from
.layers.io
import
monkey_patch_reader_methods
,
_copy_reader_var_
,
double_buffer
from
.unique_name
import
UniqueNameGenerator
from
.unique_name
import
UniqueNameGenerator
import
logging
import
logging
...
@@ -43,7 +43,7 @@ else:
...
@@ -43,7 +43,7 @@ else:
# NOTE: [ avoid hanging & failed quickly ] These value is used in getting data from another process
# NOTE: [ avoid hanging & failed quickly ] These value is used in getting data from another process
QUEUE_GET_TIMEOUT
=
60
QUEUE_GET_TIMEOUT
=
60
__all__
=
[
'PyReader'
,
'DataLoader'
]
__all__
=
[
'PyReader'
,
'DataLoader'
,
'default_collate_fn'
]
data_loader_unique_name_generator
=
UniqueNameGenerator
()
data_loader_unique_name_generator
=
UniqueNameGenerator
()
...
...
python/paddle/fluid/tests/unittests/CMakeLists.txt
浏览文件 @
5c84eac8
...
@@ -210,6 +210,7 @@ if (APPLE OR WIN32)
...
@@ -210,6 +210,7 @@ if (APPLE OR WIN32)
list
(
REMOVE_ITEM TEST_OPS test_imperative_data_loader_fds_clear
)
list
(
REMOVE_ITEM TEST_OPS test_imperative_data_loader_fds_clear
)
list
(
REMOVE_ITEM TEST_OPS test_imperative_data_loader_exit_func
)
list
(
REMOVE_ITEM TEST_OPS test_imperative_data_loader_exit_func
)
list
(
REMOVE_ITEM TEST_OPS test_imperative_signal_handler
)
list
(
REMOVE_ITEM TEST_OPS test_imperative_signal_handler
)
list
(
REMOVE_ITEM TEST_OPS test_multiprocess_dataloader_exception
)
endif
()
endif
()
if
(
NOT WITH_GPU OR WIN32 OR APPLE
)
if
(
NOT WITH_GPU OR WIN32 OR APPLE
)
...
@@ -378,7 +379,8 @@ set_tests_properties(test_parallel_executor_crf test_sync_batch_norm_op test_inp
...
@@ -378,7 +379,8 @@ set_tests_properties(test_parallel_executor_crf test_sync_batch_norm_op test_inp
PROPERTIES LABELS
"RUN_TYPE=DIST"
RUN_SERIAL TRUE
)
PROPERTIES LABELS
"RUN_TYPE=DIST"
RUN_SERIAL TRUE
)
if
(
NOT WIN32 AND NOT APPLE
)
if
(
NOT WIN32 AND NOT APPLE
)
set_tests_properties
(
test_imperative_data_loader_base PROPERTIES LABELS
"RUN_TYPE=EXCLUSIVE"
RUN_SERIAL TRUE
)
set_tests_properties
(
test_imperative_data_loader_base PROPERTIES LABELS
"RUN_TYPE=EXCLUSIVE"
)
set_tests_properties
(
test_imperative_data_loader_exception PROPERTIES LABELS
"RUN_TYPE=EXCLUSIVE"
RUN_SERIAL TRUE
)
set_tests_properties
(
test_imperative_data_loader_fds_clear PROPERTIES LABELS
"RUN_TYPE=EXCLUSIVE"
)
set_tests_properties
(
test_imperative_data_loader_fds_clear PROPERTIES LABELS
"RUN_TYPE=EXCLUSIVE"
RUN_SERIAL TRUE
)
set_tests_properties
(
test_imperative_data_loader_exception PROPERTIES LABELS
"RUN_TYPE=EXCLUSIVE"
)
set_tests_properties
(
test_multiprocess_dataloader_exception PROPERTIES LABELS
"RUN_TYPE=EXCLUSIVE"
)
endif
()
endif
()
python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_exception.py
0 → 100644
浏览文件 @
5c84eac8
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
division
import
os
import
sys
import
six
import
time
import
unittest
import
multiprocessing
import
numpy
as
np
import
paddle.fluid
as
fluid
from
paddle.fluid.io
import
Dataset
,
BatchSampler
,
DataLoader
from
paddle.fluid.dygraph.nn
import
Linear
from
paddle.fluid.dygraph.base
import
to_variable
class
RandomDataset
(
Dataset
):
def
__init__
(
self
,
sample_num
):
self
.
sample_num
=
sample_num
def
__getitem__
(
self
,
idx
):
np
.
random
.
seed
(
idx
)
image
=
np
.
random
.
random
([
784
]).
astype
(
'float32'
)
label
=
np
.
random
.
randint
(
0
,
9
,
(
1
,
)).
astype
(
'int64'
)
return
image
,
label
def
__len__
(
self
):
return
self
.
sample_num
class
TestDataLoaderAssert
(
unittest
.
TestCase
):
def
test_main
(
self
):
place
=
fluid
.
cpu_places
()[
0
]
with
fluid
.
dygraph
.
guard
(
place
):
dataset
=
RandomDataset
(
100
)
batch_sampler
=
BatchSampler
(
dataset
=
dataset
,
batch_size
=
4
)
# dataset is not instance of Dataset
try
:
loader
=
DataLoader
(
dataset
=
batch_sampler
,
places
=
place
)
self
.
assertTrue
(
False
)
except
AssertionError
:
pass
# places is None
try
:
loader
=
DataLoader
(
dataset
=
dataset
,
places
=
None
)
self
.
assertTrue
(
False
)
except
AssertionError
:
pass
# num_workers < 0
try
:
loader
=
DataLoader
(
dataset
=
dataset
,
places
=
place
,
num_workers
=-
1
)
self
.
assertTrue
(
False
)
except
AssertionError
:
pass
# timeout < 0
try
:
loader
=
DataLoader
(
dataset
=
dataset
,
places
=
place
,
timeout
=-
1
)
self
.
assertTrue
(
False
)
except
AssertionError
:
pass
# batch_sampler is not instance of BatchSampler
try
:
loader
=
DataLoader
(
dataset
=
dataset
,
places
=
place
,
batch_sampler
=
dataset
)
self
.
assertTrue
(
False
)
except
AssertionError
:
pass
# set batch_sampler and shuffle/batch_size/drop_last
try
:
loader
=
DataLoader
(
dataset
=
dataset
,
places
=
place
,
batch_sampler
=
batch_sampler
,
shuffle
=
True
,
drop_last
=
True
)
self
.
assertTrue
(
False
)
except
AssertionError
:
pass
# set batch_sampler correctly
try
:
loader
=
DataLoader
(
dataset
=
dataset
,
places
=
place
,
batch_sampler
=
batch_sampler
)
self
.
assertTrue
(
True
)
except
AssertionError
:
self
.
assertTrue
(
False
)
# CI Converage cannot record stub in subprocess,
# HACK a _worker_loop in main process call here
class
TestDataLoaderWorkerLoop
(
unittest
.
TestCase
):
def
run_without_worker_done
(
self
,
use_shared_memory
=
True
):
try
:
place
=
fluid
.
cpu_places
()[
0
]
with
fluid
.
dygraph
.
guard
(
place
):
dataset
=
RandomDataset
(
800
)
# test init_fn
def
_init_fn
(
worker_id
):
pass
# test collate_fn
def
_collate_fn
(
sample_list
):
return
[
np
.
stack
(
s
,
axis
=
0
)
for
s
in
list
(
zip
(
*
sample_list
))
]
loader
=
DataLoader
(
dataset
,
num_workers
=
1
,
places
=
place
,
use_shared_memory
=
use_shared_memory
)
assert
loader
.
num_workers
>
0
,
\
"go to AssertionError and pass in Mac and Windows"
loader
=
iter
(
loader
)
print
(
"loader length"
,
len
(
loader
))
indices_queue
=
multiprocessing
.
Queue
()
for
i
in
range
(
10
):
indices_queue
.
put
([
i
,
i
+
10
])
indices_queue
.
put
(
None
)
loader
.
_worker_loop
(
loader
.
_dataset
,
indices_queue
,
loader
.
_data_queue
,
loader
.
_workers_done_event
,
_collate_fn
,
_init_fn
,
0
)
self
.
assertTrue
(
False
)
except
AssertionError
:
pass
except
Exception
:
self
.
assertTrue
(
False
)
def
run_with_worker_done
(
self
,
use_shared_memory
=
True
):
try
:
place
=
fluid
.
cpu_places
()[
0
]
with
fluid
.
dygraph
.
guard
(
place
):
dataset
=
RandomDataset
(
800
)
# test init_fn
def
_init_fn
(
worker_id
):
pass
# test collate_fn
def
_collate_fn
(
sample_list
):
return
[
np
.
stack
(
s
,
axis
=
0
)
for
s
in
list
(
zip
(
*
sample_list
))
]
loader
=
DataLoader
(
dataset
,
num_workers
=
1
,
places
=
place
,
use_shared_memory
=
use_shared_memory
)
assert
loader
.
num_workers
>
0
,
\
"go to AssertionError and pass in Mac and Windows"
loader
=
iter
(
loader
)
print
(
"loader length"
,
len
(
loader
))
indices_queue
=
multiprocessing
.
Queue
()
for
i
in
range
(
10
):
indices_queue
.
put
([
i
,
i
+
10
])
indices_queue
.
put
(
None
)
loader
.
_workers_done_event
.
set
()
loader
.
_worker_loop
(
loader
.
_dataset
,
indices_queue
,
loader
.
_data_queue
,
loader
.
_workers_done_event
,
_collate_fn
,
_init_fn
,
0
)
self
.
assertTrue
(
True
)
except
AssertionError
:
pass
except
Exception
:
self
.
assertTrue
(
False
)
def
test_main
(
self
):
for
use_shared_memory
in
[
True
,
False
]:
self
.
run_without_worker_done
(
use_shared_memory
)
self
.
run_with_worker_done
(
use_shared_memory
)
if
__name__
==
'__main__'
:
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录