Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
s920243400
PaddleDetection
提交
3787cffa
P
PaddleDetection
项目概览
s920243400
/
PaddleDetection
与 Fork 源项目一致
Fork自
PaddlePaddle / PaddleDetection
通知
2
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
3787cffa
编写于
3月 26, 2021
作者:
K
Kaipeng Deng
提交者:
GitHub
3月 26, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
update DataLoader & speed up Mask RCNN (#2435)
* update DataLoader & speed up Mask RCNN
上级
0e6468c7
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
27 addition
and
58 deletion
+27
-58
configs/mask_rcnn/_base_/mask_fpn_reader.yml
configs/mask_rcnn/_base_/mask_fpn_reader.yml
+1
-0
configs/mask_rcnn/_base_/mask_reader.yml
configs/mask_rcnn/_base_/mask_reader.yml
+1
-0
ppdet/data/reader.py
ppdet/data/reader.py
+25
-58
未找到文件。
configs/mask_rcnn/_base_/mask_fpn_reader.yml
浏览文件 @
3787cffa
...
...
@@ -12,6 +12,7 @@ TrainReader:
shuffle
:
true
drop_last
:
true
collate_batch
:
false
use_shared_memory
:
true
EvalReader
:
sample_transforms
:
...
...
configs/mask_rcnn/_base_/mask_reader.yml
浏览文件 @
3787cffa
...
...
@@ -12,6 +12,7 @@ TrainReader:
shuffle
:
true
drop_last
:
true
collate_batch
:
false
use_shared_memory
:
true
EvalReader
:
...
...
ppdet/data/reader.py
浏览文件 @
3787cffa
...
...
@@ -24,8 +24,8 @@ else:
import
Queue
import
numpy
as
np
from
paddle.io
import
DataLoader
from
paddle.
io
import
DistributedBatchSampler
from
paddle.io
import
DataLoader
,
DistributedBatchSampler
from
paddle.
fluid.dataloader.collate
import
default_collate_fn
from
ppdet.core.workspace
import
register
,
serializable
,
create
from
.
import
transform
...
...
@@ -44,11 +44,9 @@ class Compose(object):
for
t
in
self
.
transforms
:
for
k
,
v
in
t
.
items
():
op_cls
=
getattr
(
transform
,
k
)
f
=
op_cls
(
**
v
)
if
hasattr
(
f
,
'num_classes'
):
f
.
num_classes
=
num_classes
self
.
transforms_cls
.
append
(
f
)
self
.
transforms_cls
.
append
(
op_cls
(
**
v
))
if
hasattr
(
op_cls
,
'num_classes'
):
op_cls
.
num_classes
=
num_classes
def
__call__
(
self
,
data
):
for
f
in
self
.
transforms_cls
:
...
...
@@ -56,8 +54,9 @@ class Compose(object):
data
=
f
(
data
)
except
Exception
as
e
:
stack_info
=
traceback
.
format_exc
()
logger
.
warn
(
"fail to map op [{}] with error: {} and stack:
\n
{}"
.
format
(
f
,
e
,
str
(
stack_info
)))
logger
.
warn
(
"fail to map sample transform [{}] "
"with error: {} and stack:
\n
{}"
.
format
(
f
,
e
,
str
(
stack_info
)))
raise
e
return
data
...
...
@@ -66,8 +65,6 @@ class Compose(object):
class
BatchCompose
(
Compose
):
def
__init__
(
self
,
transforms
,
num_classes
=
80
,
collate_batch
=
True
):
super
(
BatchCompose
,
self
).
__init__
(
transforms
,
num_classes
)
self
.
output_fields
=
mp
.
Manager
().
list
([])
self
.
lock
=
mp
.
Lock
()
self
.
collate_batch
=
collate_batch
def
__call__
(
self
,
data
):
...
...
@@ -76,54 +73,31 @@ class BatchCompose(Compose):
data
=
f
(
data
)
except
Exception
as
e
:
stack_info
=
traceback
.
format_exc
()
logger
.
warn
(
"fail to map op [{}] with error: {} and stack:
\n
{}"
.
format
(
f
,
e
,
str
(
stack_info
)))
logger
.
warn
(
"fail to map batch transform [{}] "
"with error: {} and stack:
\n
{}"
.
format
(
f
,
e
,
str
(
stack_info
)))
raise
e
# accessing ListProxy in main process (no worker subprocess)
# may incur errors in some enviroments, ListProxy back to
# list if no worker process start, while this `__call__`
# will be called in main process
global
MAIN_PID
if
os
.
getpid
()
==
MAIN_PID
and
\
isinstance
(
self
.
output_fields
,
mp
.
managers
.
ListProxy
):
self
.
output_fields
=
[]
# parse output fields by first sample
# **this shoule be fixed if paddle.io.DataLoader support**
# For paddle.io.DataLoader not support dict currently,
# we need to parse the key from the first sample,
# BatchCompose.__call__ will be called in each worker
# process, so lock is need here.
if
len
(
self
.
output_fields
)
==
0
:
self
.
lock
.
acquire
()
if
len
(
self
.
output_fields
)
==
0
:
for
k
,
v
in
data
[
0
].
items
():
# FIXME(dkp): for more elegent coding
if
k
not
in
[
'flipped'
,
'h'
,
'w'
]:
self
.
output_fields
.
append
(
k
)
self
.
lock
.
release
()
batch_data
=
[]
# If set collate_batch=True, all data will collate a batch
# and it will transfor to paddle.tensor.
# If set collate_batch=False, `image`, `im_shape` and
# `scale_factor` will collate a batch, but `gt` data(such as:
# gt_bbox, gt_class, gt_poly.etc.) will not collate a batch
# and it will transfor to list[Tensor] or list[list].
# remove keys which is not needed by model
extra_key
=
[
'h'
,
'w'
,
'flipped'
]
for
k
in
extra_key
:
for
sample
in
data
:
if
k
in
sample
:
sample
.
pop
(
k
)
# batch data, if user-define batch function needed
# use user-defined here
if
self
.
collate_batch
:
data
=
[[
data
[
i
][
k
]
for
k
in
self
.
output_fields
]
for
i
in
range
(
len
(
data
))]
data
=
list
(
zip
(
*
data
))
batch_data
=
[
np
.
stack
(
d
,
axis
=
0
)
for
d
in
data
]
batch_data
=
default_collate_fn
(
data
)
else
:
for
k
in
self
.
output_fields
:
batch_data
=
{}
for
k
in
data
[
0
].
keys
():
tmp_data
=
[]
for
i
in
range
(
len
(
data
)):
tmp_data
.
append
(
data
[
i
][
k
])
if
not
'gt_'
in
k
and
not
'is_crowd'
in
k
:
tmp_data
=
np
.
stack
(
tmp_data
,
axis
=
0
)
batch_data
.
append
(
tmp_data
)
batch_data
[
k
]
=
tmp_data
return
batch_data
...
...
@@ -227,15 +201,8 @@ class BaseDataLoader(object):
return
self
def
__next__
(
self
):
# pack {filed_name: field_data} here
# looking forward to support dictionary
# data structure in paddle.io.DataLoader
try
:
data
=
next
(
self
.
loader
)
return
{
k
:
v
for
k
,
v
in
zip
(
self
.
_batch_transforms
.
output_fields
,
data
)
}
return
next
(
self
.
loader
)
except
StopIteration
:
self
.
loader
=
iter
(
self
.
dataloader
)
six
.
reraise
(
*
sys
.
exc_info
())
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录