Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleDetection
提交
897d86ac
P
PaddleDetection
项目概览
PaddlePaddle
/
PaddleDetection
大约 1 年 前同步成功
通知
695
Star
11112
Fork
2696
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
184
列表
看板
标记
里程碑
合并请求
40
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
184
Issue
184
列表
看板
标记
里程碑
合并请求
40
合并请求
40
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
897d86ac
编写于
7月 21, 2020
作者:
S
sunxl1988
提交者:
GitHub
7月 21, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
test=dygraph sync reader from static ppdet (#1084)
sync reader from static ppdet
上级
ce71cdc2
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
701 addition
and
143 deletion
+701
-143
ppdet/data/reader.py
ppdet/data/reader.py
+29
-5
ppdet/data/source/widerface.py
ppdet/data/source/widerface.py
+75
-50
ppdet/data/transform/batch_operators.py
ppdet/data/transform/batch_operators.py
+113
-55
ppdet/data/transform/gridmask_utils.py
ppdet/data/transform/gridmask_utils.py
+83
-0
ppdet/data/transform/op_helper.py
ppdet/data/transform/op_helper.py
+24
-4
ppdet/data/transform/operators.py
ppdet/data/transform/operators.py
+377
-29
未找到文件。
ppdet/data/reader.py
浏览文件 @
897d86ac
...
...
@@ -16,6 +16,7 @@ from __future__ import absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
os
import
copy
import
functools
import
collections
...
...
@@ -167,6 +168,8 @@ class Reader(object):
Default True.
mixup_epoch (int): mixup epoc number. Default is -1, meaning
not use mixup.
cutmix_epoch (int): cutmix epoc number. Default is -1, meaning
not use cutmix.
class_aware_sampling (bool): whether use class-aware sampling or not.
Default False.
worker_num (int): number of working threads/processes.
...
...
@@ -191,6 +194,7 @@ class Reader(object):
drop_last
=
False
,
drop_empty
=
True
,
mixup_epoch
=-
1
,
cutmix_epoch
=-
1
,
class_aware_sampling
=
False
,
worker_num
=-
1
,
use_process
=
False
,
...
...
@@ -241,6 +245,7 @@ class Reader(object):
# sampling
self
.
_mixup_epoch
=
mixup_epoch
self
.
_cutmix_epoch
=
cutmix_epoch
self
.
_class_aware_sampling
=
class_aware_sampling
self
.
_load_img
=
False
...
...
@@ -253,6 +258,8 @@ class Reader(object):
self
.
_pos
=
-
1
self
.
_epoch
=
-
1
self
.
_curr_iter
=
0
# multi-process
self
.
_worker_num
=
worker_num
self
.
_parallel
=
None
...
...
@@ -274,6 +281,11 @@ class Reader(object):
def
reset
(
self
):
"""implementation of Dataset.reset
"""
if
self
.
_epoch
<
0
:
self
.
_epoch
=
0
else
:
self
.
_epoch
+=
1
self
.
indexes
=
[
i
for
i
in
range
(
self
.
size
())]
if
self
.
_class_aware_sampling
:
self
.
indexes
=
np
.
random
.
choice
(
...
...
@@ -283,17 +295,18 @@ class Reader(object):
p
=
self
.
img_weights
)
if
self
.
_shuffle
:
trainer_id
=
int
(
os
.
getenv
(
"PADDLE_TRAINER_ID"
,
0
))
np
.
random
.
seed
(
self
.
_epoch
+
trainer_id
)
np
.
random
.
shuffle
(
self
.
indexes
)
if
self
.
_mixup_epoch
>
0
and
len
(
self
.
indexes
)
<
2
:
logger
.
debug
(
"Disable mixup for dataset samples "
"less than 2 samples"
)
self
.
_mixup_epoch
=
-
1
if
self
.
_epoch
<
0
:
self
.
_epoch
=
0
else
:
self
.
_epoch
+=
1
if
self
.
_cutmix_epoch
>
0
and
len
(
self
.
indexes
)
<
2
:
logger
.
info
(
"Disable cutmix for dataset samples "
"less than 2 samples"
)
self
.
_cutmix_epoch
=
-
1
self
.
_pos
=
0
...
...
@@ -306,6 +319,7 @@ class Reader(object):
if
self
.
drained
():
raise
StopIteration
batch
=
self
.
_load_batch
()
self
.
_curr_iter
+=
1
if
self
.
_drop_last
and
len
(
batch
)
<
self
.
_batch_size
:
raise
StopIteration
if
self
.
_worker_num
>
-
1
:
...
...
@@ -321,6 +335,7 @@ class Reader(object):
break
pos
=
self
.
indexes
[
self
.
_pos
]
sample
=
copy
.
deepcopy
(
self
.
_roidbs
[
pos
])
sample
[
"curr_iter"
]
=
self
.
_curr_iter
self
.
_pos
+=
1
if
self
.
_drop_empty
and
self
.
_fields
and
'gt_mask'
in
self
.
_fields
:
...
...
@@ -343,9 +358,18 @@ class Reader(object):
mix_idx
=
np
.
random
.
randint
(
1
,
num
)
mix_idx
=
self
.
indexes
[(
mix_idx
+
self
.
_pos
-
1
)
%
num
]
sample
[
'mixup'
]
=
copy
.
deepcopy
(
self
.
_roidbs
[
mix_idx
])
sample
[
'mixup'
][
"curr_iter"
]
=
self
.
_curr_iter
if
self
.
_load_img
:
sample
[
'mixup'
][
'image'
]
=
self
.
_load_image
(
sample
[
'mixup'
][
'im_file'
])
if
self
.
_epoch
<
self
.
_cutmix_epoch
:
num
=
len
(
self
.
indexes
)
mix_idx
=
np
.
random
.
randint
(
1
,
num
)
sample
[
'cutmix'
]
=
copy
.
deepcopy
(
self
.
_roidbs
[
mix_idx
])
sample
[
'cutmix'
][
"curr_iter"
]
=
self
.
_curr_iter
if
self
.
_load_img
:
sample
[
'cutmix'
][
'image'
]
=
self
.
_load_image
(
sample
[
'cutmix'
][
'im_file'
])
batch
.
append
(
sample
)
bs
+=
1
...
...
ppdet/data/source/widerface.py
浏览文件 @
897d86ac
...
...
@@ -41,7 +41,8 @@ class WIDERFaceDataSet(DataSet):
image_dir
=
None
,
anno_path
=
None
,
sample_num
=-
1
,
with_background
=
True
):
with_background
=
True
,
with_lmk
=
False
):
super
(
WIDERFaceDataSet
,
self
).
__init__
(
image_dir
=
image_dir
,
anno_path
=
anno_path
,
...
...
@@ -53,6 +54,7 @@ class WIDERFaceDataSet(DataSet):
self
.
with_background
=
with_background
self
.
roidbs
=
None
self
.
cname2cid
=
None
self
.
with_lmk
=
with_lmk
def
load_roidb_and_cname2cid
(
self
):
anno_path
=
os
.
path
.
join
(
self
.
dataset_dir
,
self
.
anno_path
)
...
...
@@ -62,33 +64,23 @@ class WIDERFaceDataSet(DataSet):
records
=
[]
ct
=
0
file_lists
=
_load_file_list
(
txt_file
)
file_lists
=
self
.
_load_file_list
(
txt_file
)
cname2cid
=
widerface_label
(
self
.
with_background
)
for
item
in
file_lists
:
im_fname
=
item
[
0
]
im_id
=
np
.
array
([
ct
])
gt_bbox
=
np
.
zeros
((
len
(
item
)
-
2
,
4
),
dtype
=
np
.
float32
)
gt_class
=
np
.
ones
((
len
(
item
)
-
2
,
1
),
dtype
=
np
.
int32
)
gt_bbox
=
np
.
zeros
((
len
(
item
)
-
1
,
4
),
dtype
=
np
.
float32
)
gt_class
=
np
.
ones
((
len
(
item
)
-
1
,
1
),
dtype
=
np
.
int32
)
gt_lmk_labels
=
np
.
zeros
((
len
(
item
)
-
1
,
10
),
dtype
=
np
.
float32
)
lmk_ignore_flag
=
np
.
zeros
((
len
(
item
)
-
1
,
1
),
dtype
=
np
.
int32
)
for
index_box
in
range
(
len
(
item
)):
if
index_box
>=
2
:
temp_info_box
=
item
[
index_box
].
split
(
' '
)
xmin
=
float
(
temp_info_box
[
0
])
ymin
=
float
(
temp_info_box
[
1
])
w
=
float
(
temp_info_box
[
2
])
h
=
float
(
temp_info_box
[
3
])
# Filter out wrong labels
if
w
<
0
or
h
<
0
:
logger
.
warn
(
'Illegal box with w: {}, h: {} in '
'img: {}, and it will be ignored'
.
format
(
w
,
h
,
im_fname
))
continue
xmin
=
max
(
0
,
xmin
)
ymin
=
max
(
0
,
ymin
)
xmax
=
xmin
+
w
ymax
=
ymin
+
h
gt_bbox
[
index_box
-
2
]
=
[
xmin
,
ymin
,
xmax
,
ymax
]
if
index_box
<
1
:
continue
gt_bbox
[
index_box
-
1
]
=
item
[
index_box
][
0
]
if
self
.
with_lmk
:
gt_lmk_labels
[
index_box
-
1
]
=
item
[
index_box
][
1
]
lmk_ignore_flag
[
index_box
-
1
]
=
item
[
index_box
][
2
]
im_fname
=
os
.
path
.
join
(
image_dir
,
im_fname
)
if
image_dir
else
im_fname
widerface_rec
=
{
...
...
@@ -97,7 +89,10 @@ class WIDERFaceDataSet(DataSet):
'gt_bbox'
:
gt_bbox
,
'gt_class'
:
gt_class
,
}
# logger.debug
if
self
.
with_lmk
:
widerface_rec
[
'gt_keypoint'
]
=
gt_lmk_labels
widerface_rec
[
'keypoint_ignore'
]
=
lmk_ignore_flag
if
len
(
item
)
!=
0
:
records
.
append
(
widerface_rec
)
...
...
@@ -108,34 +103,64 @@ class WIDERFaceDataSet(DataSet):
logger
.
debug
(
'{} samples in file {}'
.
format
(
ct
,
anno_path
))
self
.
roidbs
,
self
.
cname2cid
=
records
,
cname2cid
def
_load_file_list
(
input_txt
):
with
open
(
input_txt
,
'r'
)
as
f_dir
:
lines_input_txt
=
f_dir
.
readlines
()
file_dict
=
{}
num_class
=
0
for
i
in
range
(
len
(
lines_input_txt
)):
line_txt
=
lines_input_txt
[
i
].
strip
(
'
\n\t\r
'
)
if
'.jpg'
in
line_txt
:
if
i
!=
0
:
num_class
+=
1
file_dict
[
num_class
]
=
[]
file_dict
[
num_class
].
append
(
line_txt
)
if
'.jpg'
not
in
line_txt
:
if
len
(
line_txt
)
>
6
:
split_str
=
line_txt
.
split
(
' '
)
x1_min
=
float
(
split_str
[
0
])
y1_min
=
float
(
split_str
[
1
])
x2_max
=
float
(
split_str
[
2
])
y2_max
=
float
(
split_str
[
3
])
line_txt
=
str
(
x1_min
)
+
' '
+
str
(
y1_min
)
+
' '
+
str
(
x2_max
)
+
' '
+
str
(
y2_max
)
def
_load_file_list
(
self
,
input_txt
):
with
open
(
input_txt
,
'r'
)
as
f_dir
:
lines_input_txt
=
f_dir
.
readlines
()
file_dict
=
{}
num_class
=
0
for
i
in
range
(
len
(
lines_input_txt
)):
line_txt
=
lines_input_txt
[
i
].
strip
(
'
\n\t\r
'
)
if
'.jpg'
in
line_txt
:
if
i
!=
0
:
num_class
+=
1
file_dict
[
num_class
]
=
[]
file_dict
[
num_class
].
append
(
line_txt
)
else
:
file_dict
[
num_class
].
append
(
line_txt
)
return
list
(
file_dict
.
values
())
if
'.jpg'
not
in
line_txt
:
if
len
(
line_txt
)
<=
6
:
continue
result_boxs
=
[]
split_str
=
line_txt
.
split
(
' '
)
xmin
=
float
(
split_str
[
0
])
ymin
=
float
(
split_str
[
1
])
w
=
float
(
split_str
[
2
])
h
=
float
(
split_str
[
3
])
# Filter out wrong labels
if
w
<
0
or
h
<
0
:
logger
.
warn
(
'Illegal box with w: {}, h: {} in '
'img: {}, and it will be ignored'
.
format
(
w
,
h
,
file_dict
[
num_class
][
0
]))
continue
xmin
=
max
(
0
,
xmin
)
ymin
=
max
(
0
,
ymin
)
xmax
=
xmin
+
w
ymax
=
ymin
+
h
gt_bbox
=
[
xmin
,
ymin
,
xmax
,
ymax
]
result_boxs
.
append
(
gt_bbox
)
if
self
.
with_lmk
:
assert
len
(
split_str
)
>
18
,
'When `with_lmk=True`, the number'
\
'of characters per line in the annotation file should'
\
'exceed 18.'
lmk0_x
=
float
(
split_str
[
5
])
lmk0_y
=
float
(
split_str
[
6
])
lmk1_x
=
float
(
split_str
[
8
])
lmk1_y
=
float
(
split_str
[
9
])
lmk2_x
=
float
(
split_str
[
11
])
lmk2_y
=
float
(
split_str
[
12
])
lmk3_x
=
float
(
split_str
[
14
])
lmk3_y
=
float
(
split_str
[
15
])
lmk4_x
=
float
(
split_str
[
17
])
lmk4_y
=
float
(
split_str
[
18
])
lmk_ignore_flag
=
0
if
lmk0_x
==
-
1
else
1
gt_lmk_label
=
[
lmk0_x
,
lmk0_y
,
lmk1_x
,
lmk1_y
,
lmk2_x
,
lmk2_y
,
lmk3_x
,
lmk3_y
,
lmk4_x
,
lmk4_y
]
result_boxs
.
append
(
gt_lmk_label
)
result_boxs
.
append
(
lmk_ignore_flag
)
file_dict
[
num_class
].
append
(
result_boxs
)
return
list
(
file_dict
.
values
())
def
widerface_label
(
with_background
=
True
):
...
...
ppdet/data/transform/batch_operators.py
浏览文件 @
897d86ac
...
...
@@ -26,13 +26,17 @@ import cv2
import
numpy
as
np
from
.operators
import
register_op
,
BaseOperator
from
.op_helper
import
jaccard_overlap
from
.op_helper
import
jaccard_overlap
,
gaussian2D
logger
=
logging
.
getLogger
(
__name__
)
__all__
=
[
'PadBatch'
,
'RandomShape'
,
'PadMultiScaleTest'
,
'Gt2YoloTarget'
,
'Gt2FCOSTarget'
'PadBatch'
,
'RandomShape'
,
'PadMultiScaleTest'
,
'Gt2YoloTarget'
,
'Gt2FCOSTarget'
,
'Gt2TTFTarget'
,
]
...
...
@@ -41,17 +45,15 @@ class PadBatch(BaseOperator):
"""
Pad a batch of samples so they can be divisible by a stride.
The layout of each image should be 'CHW'.
Args:
pad_to_stride (int): If `pad_to_stride > 0`, pad zeros to ensure
height and width is divisible by `pad_to_stride`.
"""
def
__init__
(
self
,
pad_to_stride
=
0
,
use_padded_im_info
=
True
,
pad_gt
=
False
):
def
__init__
(
self
,
pad_to_stride
=
0
,
use_padded_im_info
=
True
):
super
(
PadBatch
,
self
).
__init__
()
self
.
pad_to_stride
=
pad_to_stride
self
.
use_padded_im_info
=
use_padded_im_info
self
.
pad_gt
=
pad_gt
def
__call__
(
self
,
samples
,
context
=
None
):
"""
...
...
@@ -61,9 +63,9 @@ class PadBatch(BaseOperator):
coarsest_stride
=
self
.
pad_to_stride
if
coarsest_stride
==
0
:
return
samples
max_shape
=
np
.
array
([
data
[
'image'
].
shape
for
data
in
samples
]).
max
(
axis
=
0
)
if
coarsest_stride
>
0
:
max_shape
[
1
]
=
int
(
np
.
ceil
(
max_shape
[
1
]
/
coarsest_stride
)
*
coarsest_stride
)
...
...
@@ -80,52 +82,6 @@ class PadBatch(BaseOperator):
data
[
'image'
]
=
padding_im
if
self
.
use_padded_im_info
:
data
[
'im_info'
][:
2
]
=
max_shape
[
1
:
3
]
if
self
.
pad_gt
:
gt_num
=
[]
if
data
[
'gt_poly'
]
is
not
None
and
len
(
data
[
'gt_poly'
])
>
0
:
pad_mask
=
True
else
:
pad_mask
=
False
if
pad_mask
:
poly_num
=
[]
poly_part_num
=
[]
point_num
=
[]
for
data
in
samples
:
gt_num
.
append
(
data
[
'gt_bbox'
].
shape
[
0
])
if
pad_mask
:
poly_num
.
append
(
len
(
data
[
'gt_poly'
]))
for
poly
in
data
[
'gt_poly'
]:
poly_part_num
.
append
(
int
(
len
(
poly
)))
for
p_p
in
poly
:
point_num
.
append
(
int
(
len
(
p_p
)
/
2
))
gt_num_max
=
max
(
gt_num
)
gt_box_data
=
np
.
zeros
([
gt_num_max
,
4
])
gt_class_data
=
np
.
zeros
([
gt_num_max
])
is_crowd_data
=
np
.
ones
([
gt_num_max
])
if
pad_mask
:
poly_num_max
=
max
(
poly_num
)
poly_part_num_max
=
max
(
poly_part_num
)
point_num_max
=
max
(
point_num
)
gt_masks_data
=
-
np
.
ones
(
[
poly_num_max
,
poly_part_num_max
,
point_num_max
,
2
])
for
i
,
data
in
enumerate
(
samples
):
gt_num
=
data
[
'gt_bbox'
].
shape
[
0
]
gt_box_data
[
0
:
gt_num
,
:]
=
data
[
'gt_bbox'
]
gt_class_data
[
0
:
gt_num
]
=
np
.
squeeze
(
data
[
'gt_class'
])
is_crowd_data
[
0
:
gt_num
]
=
np
.
squeeze
(
data
[
'is_crowd'
])
if
pad_mask
:
for
j
,
poly
in
enumerate
(
data
[
'gt_poly'
]):
for
k
,
p_p
in
enumerate
(
poly
):
pp_np
=
np
.
array
(
p_p
).
reshape
(
-
1
,
2
)
gt_masks_data
[
j
,
k
,
:
pp_np
.
shape
[
0
],
:]
=
pp_np
data
[
'gt_poly'
]
=
gt_masks_data
data
[
'gt_bbox'
]
=
gt_box_data
data
[
'gt_class'
]
=
gt_class_data
data
[
'is_crowd_data'
]
=
is_crowd_data
return
samples
...
...
@@ -136,13 +92,12 @@ class RandomShape(BaseOperator):
select one an interpolation algorithm [cv2.INTER_NEAREST, cv2.INTER_LINEAR,
cv2.INTER_AREA, cv2.INTER_CUBIC, cv2.INTER_LANCZOS4]. If random_inter is
False, use cv2.INTER_NEAREST.
Args:
sizes (list): list of int, random choose a size from these
random_inter (bool): whether to randomly interpolation, defalut true.
"""
def
__init__
(
self
,
sizes
=
[],
random_inter
=
False
):
def
__init__
(
self
,
sizes
=
[],
random_inter
=
False
,
resize_box
=
False
):
super
(
RandomShape
,
self
).
__init__
()
self
.
sizes
=
sizes
self
.
random_inter
=
random_inter
...
...
@@ -153,6 +108,7 @@ class RandomShape(BaseOperator):
cv2
.
INTER_CUBIC
,
cv2
.
INTER_LANCZOS4
,
]
if
random_inter
else
[]
self
.
resize_box
=
resize_box
def
__call__
(
self
,
samples
,
context
=
None
):
shape
=
np
.
random
.
choice
(
self
.
sizes
)
...
...
@@ -166,6 +122,12 @@ class RandomShape(BaseOperator):
im
=
cv2
.
resize
(
im
,
None
,
None
,
fx
=
scale_x
,
fy
=
scale_y
,
interpolation
=
method
)
samples
[
i
][
'image'
]
=
im
if
self
.
resize_box
and
'gt_bbox'
in
samples
[
i
]
and
len
(
samples
[
0
][
'gt_bbox'
])
>
0
:
scale_array
=
np
.
array
([
scale_x
,
scale_y
]
*
2
,
dtype
=
np
.
float32
)
samples
[
i
][
'gt_bbox'
]
=
np
.
clip
(
samples
[
i
][
'gt_bbox'
]
*
scale_array
,
0
,
float
(
shape
)
-
1
)
return
samples
...
...
@@ -525,3 +487,99 @@ class Gt2FCOSTarget(BaseOperator):
sample
[
'centerness{}'
.
format
(
lvl
)]
=
np
.
reshape
(
ctn_targets_by_level
[
lvl
],
newshape
=
[
grid_h
,
grid_w
,
1
])
return
samples
@
register_op
class
Gt2TTFTarget
(
BaseOperator
):
"""
Gt2TTFTarget
Generate TTFNet targets by ground truth data
Args:
num_classes(int): the number of classes.
down_ratio(int): the down ratio from images to heatmap, 4 by default.
alpha(float): the alpha parameter to generate gaussian target.
0.54 by default.
"""
def
__init__
(
self
,
num_classes
,
down_ratio
=
4
,
alpha
=
0.54
):
super
(
Gt2TTFTarget
,
self
).
__init__
()
self
.
down_ratio
=
down_ratio
self
.
num_classes
=
num_classes
self
.
alpha
=
alpha
def
__call__
(
self
,
samples
,
context
=
None
):
output_size
=
samples
[
0
][
'image'
].
shape
[
1
]
feat_size
=
output_size
//
self
.
down_ratio
for
sample
in
samples
:
heatmap
=
np
.
zeros
(
(
self
.
num_classes
,
feat_size
,
feat_size
),
dtype
=
'float32'
)
box_target
=
np
.
ones
(
(
4
,
feat_size
,
feat_size
),
dtype
=
'float32'
)
*
-
1
reg_weight
=
np
.
zeros
((
1
,
feat_size
,
feat_size
),
dtype
=
'float32'
)
gt_bbox
=
sample
[
'gt_bbox'
]
gt_class
=
sample
[
'gt_class'
]
bbox_w
=
gt_bbox
[:,
2
]
-
gt_bbox
[:,
0
]
+
1
bbox_h
=
gt_bbox
[:,
3
]
-
gt_bbox
[:,
1
]
+
1
area
=
bbox_w
*
bbox_h
boxes_areas_log
=
np
.
log
(
area
)
boxes_ind
=
np
.
argsort
(
boxes_areas_log
,
axis
=
0
)[::
-
1
]
boxes_area_topk_log
=
boxes_areas_log
[
boxes_ind
]
gt_bbox
=
gt_bbox
[
boxes_ind
]
gt_class
=
gt_class
[
boxes_ind
]
feat_gt_bbox
=
gt_bbox
/
self
.
down_ratio
feat_gt_bbox
=
np
.
clip
(
feat_gt_bbox
,
0
,
feat_size
-
1
)
feat_hs
,
feat_ws
=
(
feat_gt_bbox
[:,
3
]
-
feat_gt_bbox
[:,
1
],
feat_gt_bbox
[:,
2
]
-
feat_gt_bbox
[:,
0
])
ct_inds
=
np
.
stack
(
[(
gt_bbox
[:,
0
]
+
gt_bbox
[:,
2
])
/
2
,
(
gt_bbox
[:,
1
]
+
gt_bbox
[:,
3
])
/
2
],
axis
=
1
)
/
self
.
down_ratio
h_radiuses_alpha
=
(
feat_hs
/
2.
*
self
.
alpha
).
astype
(
'int32'
)
w_radiuses_alpha
=
(
feat_ws
/
2.
*
self
.
alpha
).
astype
(
'int32'
)
for
k
in
range
(
len
(
gt_bbox
)):
cls_id
=
gt_class
[
k
]
fake_heatmap
=
np
.
zeros
((
feat_size
,
feat_size
),
dtype
=
'float32'
)
self
.
draw_truncate_gaussian
(
fake_heatmap
,
ct_inds
[
k
],
h_radiuses_alpha
[
k
],
w_radiuses_alpha
[
k
])
heatmap
[
cls_id
]
=
np
.
maximum
(
heatmap
[
cls_id
],
fake_heatmap
)
box_target_inds
=
fake_heatmap
>
0
box_target
[:,
box_target_inds
]
=
gt_bbox
[
k
][:,
None
]
local_heatmap
=
fake_heatmap
[
box_target_inds
]
ct_div
=
np
.
sum
(
local_heatmap
)
local_heatmap
*=
boxes_area_topk_log
[
k
]
reg_weight
[
0
,
box_target_inds
]
=
local_heatmap
/
ct_div
sample
[
'ttf_heatmap'
]
=
heatmap
sample
[
'ttf_box_target'
]
=
box_target
sample
[
'ttf_reg_weight'
]
=
reg_weight
return
samples
def
draw_truncate_gaussian
(
self
,
heatmap
,
center
,
h_radius
,
w_radius
):
h
,
w
=
2
*
h_radius
+
1
,
2
*
w_radius
+
1
sigma_x
=
w
/
6
sigma_y
=
h
/
6
gaussian
=
gaussian2D
((
h
,
w
),
sigma_x
,
sigma_y
)
x
,
y
=
int
(
center
[
0
]),
int
(
center
[
1
])
height
,
width
=
heatmap
.
shape
[
0
:
2
]
left
,
right
=
min
(
x
,
w_radius
),
min
(
width
-
x
,
w_radius
+
1
)
top
,
bottom
=
min
(
y
,
h_radius
),
min
(
height
-
y
,
h_radius
+
1
)
masked_heatmap
=
heatmap
[
y
-
top
:
y
+
bottom
,
x
-
left
:
x
+
right
]
masked_gaussian
=
gaussian
[
h_radius
-
top
:
h_radius
+
bottom
,
w_radius
-
left
:
w_radius
+
right
]
if
min
(
masked_gaussian
.
shape
)
>
0
and
min
(
masked_heatmap
.
shape
)
>
0
:
heatmap
[
y
-
top
:
y
+
bottom
,
x
-
left
:
x
+
right
]
=
np
.
maximum
(
masked_heatmap
,
masked_gaussian
)
return
heatmap
ppdet/data/transform/gridmask_utils.py
0 → 100644
浏览文件 @
897d86ac
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
print_function
from
__future__
import
division
import
numpy
as
np
from
PIL
import
Image
class
GridMask
(
object
):
def
__init__
(
self
,
use_h
=
True
,
use_w
=
True
,
rotate
=
1
,
offset
=
False
,
ratio
=
0.5
,
mode
=
1
,
prob
=
0.7
,
upper_iter
=
360000
):
super
(
GridMask
,
self
).
__init__
()
self
.
use_h
=
use_h
self
.
use_w
=
use_w
self
.
rotate
=
rotate
self
.
offset
=
offset
self
.
ratio
=
ratio
self
.
mode
=
mode
self
.
prob
=
prob
self
.
st_prob
=
prob
self
.
upper_iter
=
upper_iter
def
__call__
(
self
,
x
,
curr_iter
):
self
.
prob
=
self
.
st_prob
*
min
(
1
,
1.0
*
curr_iter
/
self
.
upper_iter
)
if
np
.
random
.
rand
()
>
self
.
prob
:
return
x
_
,
h
,
w
=
x
.
shape
hh
=
int
(
1.5
*
h
)
ww
=
int
(
1.5
*
w
)
d
=
np
.
random
.
randint
(
2
,
h
)
self
.
l
=
min
(
max
(
int
(
d
*
self
.
ratio
+
0.5
),
1
),
d
-
1
)
mask
=
np
.
ones
((
hh
,
ww
),
np
.
float32
)
st_h
=
np
.
random
.
randint
(
d
)
st_w
=
np
.
random
.
randint
(
d
)
if
self
.
use_h
:
for
i
in
range
(
hh
//
d
):
s
=
d
*
i
+
st_h
t
=
min
(
s
+
self
.
l
,
hh
)
mask
[
s
:
t
,
:]
*=
0
if
self
.
use_w
:
for
i
in
range
(
ww
//
d
):
s
=
d
*
i
+
st_w
t
=
min
(
s
+
self
.
l
,
ww
)
mask
[:,
s
:
t
]
*=
0
r
=
np
.
random
.
randint
(
self
.
rotate
)
mask
=
Image
.
fromarray
(
np
.
uint8
(
mask
))
mask
=
mask
.
rotate
(
r
)
mask
=
np
.
asarray
(
mask
)
mask
=
mask
[(
hh
-
h
)
//
2
:(
hh
-
h
)
//
2
+
h
,
(
ww
-
w
)
//
2
:(
ww
-
w
)
//
2
+
w
].
astype
(
np
.
float32
)
if
self
.
mode
==
1
:
mask
=
1
-
mask
mask
=
np
.
expand_dims
(
mask
,
axis
=
0
)
if
self
.
offset
:
offset
=
(
2
*
(
np
.
random
.
rand
(
h
,
w
)
-
0.5
)).
astype
(
np
.
float32
)
x
=
(
x
*
mask
+
offset
*
(
1
-
mask
)).
astype
(
x
.
dtype
)
else
:
x
=
(
x
*
mask
).
astype
(
x
.
dtype
)
return
x
ppdet/data/transform/op_helper.py
浏览文件 @
897d86ac
...
...
@@ -61,10 +61,13 @@ def is_overlap(object_bbox, sample_bbox):
return
True
def
filter_and_process
(
sample_bbox
,
bboxes
,
labels
,
scores
=
None
):
def
filter_and_process
(
sample_bbox
,
bboxes
,
labels
,
scores
=
None
,
keypoints
=
None
):
new_bboxes
=
[]
new_labels
=
[]
new_scores
=
[]
new_keypoints
=
[]
new_kp_ignore
=
[]
for
i
in
range
(
len
(
bboxes
)):
new_bbox
=
[
0
,
0
,
0
,
0
]
obj_bbox
=
[
bboxes
[
i
][
0
],
bboxes
[
i
][
1
],
bboxes
[
i
][
2
],
bboxes
[
i
][
3
]]
...
...
@@ -84,9 +87,24 @@ def filter_and_process(sample_bbox, bboxes, labels, scores=None):
new_labels
.
append
([
labels
[
i
][
0
]])
if
scores
is
not
None
:
new_scores
.
append
([
scores
[
i
][
0
]])
if
keypoints
is
not
None
:
sample_keypoint
=
keypoints
[
0
][
i
]
for
j
in
range
(
len
(
sample_keypoint
)):
kp_len
=
sample_height
if
j
%
2
else
sample_width
sample_coord
=
sample_bbox
[
1
]
if
j
%
2
else
sample_bbox
[
0
]
sample_keypoint
[
j
]
=
(
sample_keypoint
[
j
]
-
sample_coord
)
/
kp_len
sample_keypoint
[
j
]
=
max
(
min
(
sample_keypoint
[
j
],
1.0
),
0.0
)
new_keypoints
.
append
(
sample_keypoint
)
new_kp_ignore
.
append
(
keypoints
[
1
][
i
])
bboxes
=
np
.
array
(
new_bboxes
)
labels
=
np
.
array
(
new_labels
)
scores
=
np
.
array
(
new_scores
)
if
keypoints
is
not
None
:
keypoints
=
np
.
array
(
new_keypoints
)
new_kp_ignore
=
np
.
array
(
new_kp_ignore
)
return
bboxes
,
labels
,
scores
,
(
keypoints
,
new_kp_ignore
)
return
bboxes
,
labels
,
scores
...
...
@@ -420,7 +438,8 @@ def gaussian_radius(bbox_size, min_overlap):
def
draw_gaussian
(
heatmap
,
center
,
radius
,
k
=
1
,
delte
=
6
):
diameter
=
2
*
radius
+
1
gaussian
=
gaussian2D
((
diameter
,
diameter
),
sigma
=
diameter
/
delte
)
sigma
=
diameter
/
delte
gaussian
=
gaussian2D
((
diameter
,
diameter
),
sigma_x
=
sigma
,
sigma_y
=
sigma
)
x
,
y
=
center
...
...
@@ -435,10 +454,11 @@ def draw_gaussian(heatmap, center, radius, k=1, delte=6):
np
.
maximum
(
masked_heatmap
,
masked_gaussian
*
k
,
out
=
masked_heatmap
)
def
gaussian2D
(
shape
,
sigma
=
1
):
def
gaussian2D
(
shape
,
sigma
_x
=
1
,
sigma_y
=
1
):
m
,
n
=
[(
ss
-
1.
)
/
2.
for
ss
in
shape
]
y
,
x
=
np
.
ogrid
[
-
m
:
m
+
1
,
-
n
:
n
+
1
]
h
=
np
.
exp
(
-
(
x
*
x
+
y
*
y
)
/
(
2
*
sigma
*
sigma
))
h
=
np
.
exp
(
-
(
x
*
x
/
(
2
*
sigma_x
*
sigma_x
)
+
y
*
y
/
(
2
*
sigma_y
*
sigma_y
)))
h
[
h
<
np
.
finfo
(
h
.
dtype
).
eps
*
h
.
max
()]
=
0
return
h
ppdet/data/transform/operators.py
浏览文件 @
897d86ac
...
...
@@ -32,9 +32,10 @@ import logging
import
random
import
math
import
numpy
as
np
import
os
import
cv2
from
PIL
import
Image
,
ImageEnhance
from
PIL
import
Image
,
ImageEnhance
,
ImageDraw
from
ppdet.core.workspace
import
serializable
from
ppdet.modeling.ops
import
AnchorGrid
...
...
@@ -89,21 +90,24 @@ class BaseOperator(object):
@
register_op
class
DecodeImage
(
BaseOperator
):
def
__init__
(
self
,
to_rgb
=
True
,
with_mixup
=
False
):
def
__init__
(
self
,
to_rgb
=
True
,
with_mixup
=
False
,
with_cutmix
=
False
):
""" Transform the image data to numpy format.
Args:
to_rgb (bool): whether to convert BGR to RGB
with_mixup (bool): whether or not to mixup image and gt_bbbox/gt_score
with_cutmix (bool): whether or not to cutmix image and gt_bbbox/gt_score
"""
super
(
DecodeImage
,
self
).
__init__
()
self
.
to_rgb
=
to_rgb
self
.
with_mixup
=
with_mixup
self
.
with_cutmix
=
with_cutmix
if
not
isinstance
(
self
.
to_rgb
,
bool
):
raise
TypeError
(
"{}: input type is invalid."
.
format
(
self
))
if
not
isinstance
(
self
.
with_mixup
,
bool
):
raise
TypeError
(
"{}: input type is invalid."
.
format
(
self
))
if
not
isinstance
(
self
.
with_cutmix
,
bool
):
raise
TypeError
(
"{}: input type is invalid."
.
format
(
self
))
def
__call__
(
self
,
sample
,
context
=
None
):
""" load image if 'im_file' field is not empty but 'image' is"""
...
...
@@ -142,6 +146,10 @@ class DecodeImage(BaseOperator):
# decode mixup image
if
self
.
with_mixup
and
'mixup'
in
sample
:
self
.
__call__
(
sample
[
'mixup'
],
context
)
# decode cutmix image
if
self
.
with_cutmix
and
'cutmix'
in
sample
:
self
.
__call__
(
sample
[
'cutmix'
],
context
)
return
sample
...
...
@@ -156,7 +164,6 @@ class MultiscaleTestResize(BaseOperator):
use_flip
=
True
):
"""
Rescale image to the each size in target size, and capped at max_size.
Args:
origin_target_size(int): original target size of image's short side.
origin_max_size(int): original max size of image.
...
...
@@ -265,7 +272,6 @@ class ResizeImage(BaseOperator):
if max_size != 0.
If target_size is list, selected a scale randomly as the specified
target size.
Args:
target_size (int|list): the target size of image's short side,
multi-scale training is adopted when type is list.
...
...
@@ -392,6 +398,16 @@ class RandomFlipImage(BaseOperator):
flipped_segms
.
append
(
_flip_rle
(
segm
,
height
,
width
))
return
flipped_segms
def
flip_keypoint
(
self
,
gt_keypoint
,
width
):
for
i
in
range
(
gt_keypoint
.
shape
[
1
]):
if
i
%
2
==
0
:
old_x
=
gt_keypoint
[:,
i
].
copy
()
if
self
.
is_normalized
:
gt_keypoint
[:,
i
]
=
1
-
old_x
else
:
gt_keypoint
[:,
i
]
=
width
-
old_x
-
1
return
gt_keypoint
def
__call__
(
self
,
sample
,
context
=
None
):
"""Filp the image and bounding box.
Operators:
...
...
@@ -439,12 +455,130 @@ class RandomFlipImage(BaseOperator):
if
self
.
is_mask_flip
and
len
(
sample
[
'gt_poly'
])
!=
0
:
sample
[
'gt_poly'
]
=
self
.
flip_segms
(
sample
[
'gt_poly'
],
height
,
width
)
if
'gt_keypoint'
in
sample
.
keys
():
sample
[
'gt_keypoint'
]
=
self
.
flip_keypoint
(
sample
[
'gt_keypoint'
],
width
)
sample
[
'flipped'
]
=
True
sample
[
'image'
]
=
im
sample
=
samples
if
batch_input
else
samples
[
0
]
return
sample
@
register_op
class
RandomErasingImage
(
BaseOperator
):
def
__init__
(
self
,
prob
=
0.5
,
sl
=
0.02
,
sh
=
0.4
,
r1
=
0.3
):
"""
Random Erasing Data Augmentation, see https://arxiv.org/abs/1708.04896
Args:
prob (float): probability to carry out random erasing
sl (float): lower limit of the erasing area ratio
sh (float): upper limit of the erasing area ratio
r1 (float): aspect ratio of the erasing region
"""
super
(
RandomErasingImage
,
self
).
__init__
()
self
.
prob
=
prob
self
.
sl
=
sl
self
.
sh
=
sh
self
.
r1
=
r1
def
__call__
(
self
,
sample
,
context
=
None
):
samples
=
sample
batch_input
=
True
if
not
isinstance
(
samples
,
Sequence
):
batch_input
=
False
samples
=
[
samples
]
for
sample
in
samples
:
gt_bbox
=
sample
[
'gt_bbox'
]
im
=
sample
[
'image'
]
if
not
isinstance
(
im
,
np
.
ndarray
):
raise
TypeError
(
"{}: image is not a numpy array."
.
format
(
self
))
if
len
(
im
.
shape
)
!=
3
:
raise
ImageError
(
"{}: image is not 3-dimensional."
.
format
(
self
))
for
idx
in
range
(
gt_bbox
.
shape
[
0
]):
if
self
.
prob
<=
np
.
random
.
rand
():
continue
x1
,
y1
,
x2
,
y2
=
gt_bbox
[
idx
,
:]
w_bbox
=
x2
-
x1
+
1
h_bbox
=
y2
-
y1
+
1
area
=
w_bbox
*
h_bbox
target_area
=
random
.
uniform
(
self
.
sl
,
self
.
sh
)
*
area
aspect_ratio
=
random
.
uniform
(
self
.
r1
,
1
/
self
.
r1
)
h
=
int
(
round
(
math
.
sqrt
(
target_area
*
aspect_ratio
)))
w
=
int
(
round
(
math
.
sqrt
(
target_area
/
aspect_ratio
)))
if
w
<
w_bbox
and
h
<
h_bbox
:
off_y1
=
random
.
randint
(
0
,
int
(
h_bbox
-
h
))
off_x1
=
random
.
randint
(
0
,
int
(
w_bbox
-
w
))
im
[
int
(
y1
+
off_y1
):
int
(
y1
+
off_y1
+
h
),
int
(
x1
+
off_x1
):
int
(
x1
+
off_x1
+
w
),
:]
=
0
sample
[
'image'
]
=
im
sample
=
samples
if
batch_input
else
samples
[
0
]
return
sample
@
register_op
class
GridMaskOp
(
BaseOperator
):
def
__init__
(
self
,
use_h
=
True
,
use_w
=
True
,
rotate
=
1
,
offset
=
False
,
ratio
=
0.5
,
mode
=
1
,
prob
=
0.7
,
upper_iter
=
360000
):
"""
GridMask Data Augmentation, see https://arxiv.org/abs/2001.04086
Args:
use_h (bool): whether to mask vertically
use_w (boo;): whether to mask horizontally
rotate (float): angle for the mask to rotate
offset (float): mask offset
ratio (float): mask ratio
mode (int): gridmask mode
prob (float): max probability to carry out gridmask
upper_iter (int): suggested to be equal to global max_iter
"""
super
(
GridMaskOp
,
self
).
__init__
()
self
.
use_h
=
use_h
self
.
use_w
=
use_w
self
.
rotate
=
rotate
self
.
offset
=
offset
self
.
ratio
=
ratio
self
.
mode
=
mode
self
.
prob
=
prob
self
.
upper_iter
=
upper_iter
from
.gridmask_utils
import
GridMask
self
.
gridmask_op
=
GridMask
(
use_h
,
use_w
,
rotate
=
rotate
,
offset
=
offset
,
ratio
=
ratio
,
mode
=
mode
,
prob
=
prob
,
upper_iter
=
upper_iter
)
def
__call__
(
self
,
sample
,
context
=
None
):
samples
=
sample
batch_input
=
True
if
not
isinstance
(
samples
,
Sequence
):
batch_input
=
False
samples
=
[
samples
]
for
sample
in
samples
:
sample
[
'image'
]
=
self
.
gridmask_op
(
sample
[
'image'
],
sample
[
'curr_iter'
])
if
not
batch_input
:
samples
=
samples
[
0
]
return
sample
@
register_op
class
AutoAugmentImage
(
BaseOperator
):
def
__init__
(
self
,
is_normalized
=
False
,
autoaug_type
=
"v1"
):
...
...
@@ -733,8 +867,17 @@ class ExpandImage(BaseOperator):
im
=
Image
.
fromarray
(
im
)
expand_im
.
paste
(
im
,
(
int
(
w_off
),
int
(
h_off
)))
expand_im
=
np
.
asarray
(
expand_im
)
gt_bbox
,
gt_class
,
_
=
filter_and_process
(
expand_bbox
,
gt_bbox
,
gt_class
)
if
'gt_keypoint'
in
sample
.
keys
(
)
and
'keypoint_ignore'
in
sample
.
keys
():
keypoints
=
(
sample
[
'gt_keypoint'
],
sample
[
'keypoint_ignore'
])
gt_bbox
,
gt_class
,
_
,
gt_keypoints
=
filter_and_process
(
expand_bbox
,
gt_bbox
,
gt_class
,
keypoints
=
keypoints
)
sample
[
'gt_keypoint'
]
=
gt_keypoints
[
0
]
sample
[
'keypoint_ignore'
]
=
gt_keypoints
[
1
]
else
:
gt_bbox
,
gt_class
,
_
=
filter_and_process
(
expand_bbox
,
gt_bbox
,
gt_class
)
sample
[
'image'
]
=
expand_im
sample
[
'gt_bbox'
]
=
gt_bbox
sample
[
'gt_class'
]
=
gt_class
...
...
@@ -808,7 +951,7 @@ class CropImage(BaseOperator):
sample_bbox
=
sampled_bbox
.
pop
(
idx
)
sample_bbox
=
clip_bbox
(
sample_bbox
)
crop_bbox
,
crop_class
,
crop_score
=
\
filter_and_process
(
sample_bbox
,
gt_bbox
,
gt_class
,
gt_score
)
filter_and_process
(
sample_bbox
,
gt_bbox
,
gt_class
,
scores
=
gt_score
)
if
self
.
avoid_no_bbox
:
if
len
(
crop_bbox
)
<
1
:
continue
...
...
@@ -911,8 +1054,16 @@ class CropImageWithDataAchorSampling(BaseOperator):
idx
=
int
(
np
.
random
.
uniform
(
0
,
len
(
sampled_bbox
)))
sample_bbox
=
sampled_bbox
.
pop
(
idx
)
crop_bbox
,
crop_class
,
crop_score
=
filter_and_process
(
sample_bbox
,
gt_bbox
,
gt_class
,
gt_score
)
if
'gt_keypoint'
in
sample
.
keys
():
keypoints
=
(
sample
[
'gt_keypoint'
],
sample
[
'keypoint_ignore'
])
crop_bbox
,
crop_class
,
crop_score
,
gt_keypoints
=
\
filter_and_process
(
sample_bbox
,
gt_bbox
,
gt_class
,
scores
=
gt_score
,
keypoints
=
keypoints
)
else
:
crop_bbox
,
crop_class
,
crop_score
=
filter_and_process
(
sample_bbox
,
gt_bbox
,
gt_class
,
scores
=
gt_score
)
crop_bbox
,
crop_class
,
crop_score
=
bbox_area_sampling
(
crop_bbox
,
crop_class
,
crop_score
,
self
.
target_size
,
self
.
min_size
)
...
...
@@ -926,6 +1077,9 @@ class CropImageWithDataAchorSampling(BaseOperator):
sample
[
'gt_bbox'
]
=
crop_bbox
sample
[
'gt_class'
]
=
crop_class
sample
[
'gt_score'
]
=
crop_score
if
'gt_keypoint'
in
sample
.
keys
():
sample
[
'gt_keypoint'
]
=
gt_keypoints
[
0
]
sample
[
'keypoint_ignore'
]
=
gt_keypoints
[
1
]
return
sample
return
sample
...
...
@@ -947,8 +1101,16 @@ class CropImageWithDataAchorSampling(BaseOperator):
sample_bbox
=
sampled_bbox
.
pop
(
idx
)
sample_bbox
=
clip_bbox
(
sample_bbox
)
crop_bbox
,
crop_class
,
crop_score
=
filter_and_process
(
sample_bbox
,
gt_bbox
,
gt_class
,
gt_score
)
if
'gt_keypoint'
in
sample
.
keys
():
keypoints
=
(
sample
[
'gt_keypoint'
],
sample
[
'keypoint_ignore'
])
crop_bbox
,
crop_class
,
crop_score
,
gt_keypoints
=
\
filter_and_process
(
sample_bbox
,
gt_bbox
,
gt_class
,
scores
=
gt_score
,
keypoints
=
keypoints
)
else
:
crop_bbox
,
crop_class
,
crop_score
=
filter_and_process
(
sample_bbox
,
gt_bbox
,
gt_class
,
scores
=
gt_score
)
# sampling bbox according the bbox area
crop_bbox
,
crop_class
,
crop_score
=
bbox_area_sampling
(
crop_bbox
,
crop_class
,
crop_score
,
self
.
target_size
,
...
...
@@ -966,6 +1128,9 @@ class CropImageWithDataAchorSampling(BaseOperator):
sample
[
'gt_bbox'
]
=
crop_bbox
sample
[
'gt_class'
]
=
crop_class
sample
[
'gt_score'
]
=
crop_score
if
'gt_keypoint'
in
sample
.
keys
():
sample
[
'gt_keypoint'
]
=
gt_keypoints
[
0
]
sample
[
'keypoint_ignore'
]
=
gt_keypoints
[
1
]
return
sample
return
sample
...
...
@@ -987,6 +1152,17 @@ class NormalizeBox(BaseOperator):
gt_bbox
[
i
][
2
]
=
gt_bbox
[
i
][
2
]
/
width
gt_bbox
[
i
][
3
]
=
gt_bbox
[
i
][
3
]
/
height
sample
[
'gt_bbox'
]
=
gt_bbox
if
'gt_keypoint'
in
sample
.
keys
():
gt_keypoint
=
sample
[
'gt_keypoint'
]
for
i
in
range
(
gt_keypoint
.
shape
[
1
]):
if
i
%
2
:
gt_keypoint
[:,
i
]
=
gt_keypoint
[:,
i
]
/
height
else
:
gt_keypoint
[:,
i
]
=
gt_keypoint
[:,
i
]
/
width
sample
[
'gt_keypoint'
]
=
gt_keypoint
return
sample
...
...
@@ -998,7 +1174,6 @@ class Permute(BaseOperator):
Args:
to_bgr (bool): confirm whether to convert RGB to BGR
channel_first (bool): confirm whether to change channel
"""
super
(
Permute
,
self
).
__init__
()
self
.
to_bgr
=
to_bgr
...
...
@@ -1094,6 +1269,84 @@ class MixupImage(BaseOperator):
return
sample
@
register_op
class
CutmixImage
(
BaseOperator
):
def
__init__
(
self
,
alpha
=
1.5
,
beta
=
1.5
):
"""
CutMix: Regularization Strategy to Train Strong Classifiers with Localizable Features, see https://https://arxiv.org/abs/1905.04899
Cutmix image and gt_bbbox/gt_score
Args:
alpha (float): alpha parameter of beta distribute
beta (float): beta parameter of beta distribute
"""
super
(
CutmixImage
,
self
).
__init__
()
self
.
alpha
=
alpha
self
.
beta
=
beta
if
self
.
alpha
<=
0.0
:
raise
ValueError
(
"alpha shold be positive in {}"
.
format
(
self
))
if
self
.
beta
<=
0.0
:
raise
ValueError
(
"beta shold be positive in {}"
.
format
(
self
))
def
_rand_bbox
(
self
,
img1
,
img2
,
factor
):
""" _rand_bbox """
h
=
max
(
img1
.
shape
[
0
],
img2
.
shape
[
0
])
w
=
max
(
img1
.
shape
[
1
],
img2
.
shape
[
1
])
cut_rat
=
np
.
sqrt
(
1.
-
factor
)
cut_w
=
np
.
int
(
w
*
cut_rat
)
cut_h
=
np
.
int
(
h
*
cut_rat
)
# uniform
cx
=
np
.
random
.
randint
(
w
)
cy
=
np
.
random
.
randint
(
h
)
bbx1
=
np
.
clip
(
cx
-
cut_w
//
2
,
0
,
w
)
bby1
=
np
.
clip
(
cy
-
cut_h
//
2
,
0
,
h
)
bbx2
=
np
.
clip
(
cx
+
cut_w
//
2
,
0
,
w
)
bby2
=
np
.
clip
(
cy
+
cut_h
//
2
,
0
,
h
)
img_1
=
np
.
zeros
((
h
,
w
,
img1
.
shape
[
2
]),
'float32'
)
img_1
[:
img1
.
shape
[
0
],
:
img1
.
shape
[
1
],
:]
=
\
img1
.
astype
(
'float32'
)
img_2
=
np
.
zeros
((
h
,
w
,
img2
.
shape
[
2
]),
'float32'
)
img_2
[:
img2
.
shape
[
0
],
:
img2
.
shape
[
1
],
:]
=
\
img2
.
astype
(
'float32'
)
img_1
[
bby1
:
bby2
,
bbx1
:
bbx2
,
:]
=
img2
[
bby1
:
bby2
,
bbx1
:
bbx2
,
:]
return
img_1
def
__call__
(
self
,
sample
,
context
=
None
):
if
'cutmix'
not
in
sample
:
return
sample
factor
=
np
.
random
.
beta
(
self
.
alpha
,
self
.
beta
)
factor
=
max
(
0.0
,
min
(
1.0
,
factor
))
if
factor
>=
1.0
:
sample
.
pop
(
'cutmix'
)
return
sample
if
factor
<=
0.0
:
return
sample
[
'cutmix'
]
img1
=
sample
[
'image'
]
img2
=
sample
[
'cutmix'
][
'image'
]
img
=
self
.
_rand_bbox
(
img1
,
img2
,
factor
)
gt_bbox1
=
sample
[
'gt_bbox'
]
gt_bbox2
=
sample
[
'cutmix'
][
'gt_bbox'
]
gt_bbox
=
np
.
concatenate
((
gt_bbox1
,
gt_bbox2
),
axis
=
0
)
gt_class1
=
sample
[
'gt_class'
]
gt_class2
=
sample
[
'cutmix'
][
'gt_class'
]
gt_class
=
np
.
concatenate
((
gt_class1
,
gt_class2
),
axis
=
0
)
gt_score1
=
sample
[
'gt_score'
]
gt_score2
=
sample
[
'cutmix'
][
'gt_score'
]
gt_score
=
np
.
concatenate
(
(
gt_score1
*
factor
,
gt_score2
*
(
1.
-
factor
)),
axis
=
0
)
sample
[
'image'
]
=
img
sample
[
'gt_bbox'
]
=
gt_bbox
sample
[
'gt_score'
]
=
gt_score
sample
[
'gt_class'
]
=
gt_class
sample
[
'h'
]
=
img
.
shape
[
0
]
sample
[
'w'
]
=
img
.
shape
[
1
]
sample
.
pop
(
'cutmix'
)
return
sample
@
register_op
class
RandomInterpImage
(
BaseOperator
):
def
__init__
(
self
,
target_size
=
0
,
max_size
=
0
):
...
...
@@ -1129,7 +1382,6 @@ class RandomInterpImage(BaseOperator):
@
register_op
class
Resize
(
BaseOperator
):
"""Resize image and bbox.
Args:
target_dim (int or list): target size, can be a single number or a list
(for random shape).
...
...
@@ -1162,6 +1414,7 @@ class Resize(BaseOperator):
scale_array
=
np
.
array
([
scale_x
,
scale_y
]
*
2
,
dtype
=
np
.
float32
)
sample
[
'gt_bbox'
]
=
np
.
clip
(
sample
[
'gt_bbox'
]
*
scale_array
,
0
,
dim
-
1
)
sample
[
'scale_factor'
]
=
[
scale_x
,
scale_y
]
*
2
sample
[
'h'
]
=
resize_h
sample
[
'w'
]
=
resize_w
...
...
@@ -1173,7 +1426,6 @@ class Resize(BaseOperator):
@
register_op
class
ColorDistort
(
BaseOperator
):
"""Random color distortion.
Args:
hue (list): hue settings.
in [lower, upper, probability] format.
...
...
@@ -1185,6 +1437,8 @@ class ColorDistort(BaseOperator):
in [lower, upper, probability] format.
random_apply (bool): whether to apply in random (yolo) or fixed (SSD)
order.
hsv_format (bool): whether to convert color from BGR to HSV
random_channel (bool): whether to swap channels randomly
"""
def
__init__
(
self
,
...
...
@@ -1192,13 +1446,17 @@ class ColorDistort(BaseOperator):
saturation
=
[
0.5
,
1.5
,
0.5
],
contrast
=
[
0.5
,
1.5
,
0.5
],
brightness
=
[
0.5
,
1.5
,
0.5
],
random_apply
=
True
):
random_apply
=
True
,
hsv_format
=
False
,
random_channel
=
False
):
super
(
ColorDistort
,
self
).
__init__
()
self
.
hue
=
hue
self
.
saturation
=
saturation
self
.
contrast
=
contrast
self
.
brightness
=
brightness
self
.
random_apply
=
random_apply
self
.
hsv_format
=
hsv_format
self
.
random_channel
=
random_channel
def
apply_hue
(
self
,
img
):
low
,
high
,
prob
=
self
.
hue
...
...
@@ -1206,6 +1464,11 @@ class ColorDistort(BaseOperator):
return
img
img
=
img
.
astype
(
np
.
float32
)
if
self
.
hsv_format
:
img
[...,
0
]
+=
random
.
uniform
(
low
,
high
)
img
[...,
0
][
img
[...,
0
]
>
360
]
-=
360
img
[...,
0
][
img
[...,
0
]
<
0
]
+=
360
return
img
# XXX works, but result differ from HSV version
delta
=
np
.
random
.
uniform
(
low
,
high
)
...
...
@@ -1225,8 +1488,10 @@ class ColorDistort(BaseOperator):
if
np
.
random
.
uniform
(
0.
,
1.
)
<
prob
:
return
img
delta
=
np
.
random
.
uniform
(
low
,
high
)
img
=
img
.
astype
(
np
.
float32
)
if
self
.
hsv_format
:
img
[...,
1
]
*=
delta
return
img
gray
=
img
*
np
.
array
([[[
0.299
,
0.587
,
0.114
]]],
dtype
=
np
.
float32
)
gray
=
gray
.
sum
(
axis
=
2
,
keepdims
=
True
)
gray
*=
(
1.0
-
delta
)
...
...
@@ -1273,12 +1538,24 @@ class ColorDistort(BaseOperator):
if
np
.
random
.
randint
(
0
,
2
):
img
=
self
.
apply_contrast
(
img
)
if
self
.
hsv_format
:
img
=
cv2
.
cvtColor
(
img
,
cv2
.
COLOR_RGB2HSV
)
img
=
self
.
apply_saturation
(
img
)
img
=
self
.
apply_hue
(
img
)
if
self
.
hsv_format
:
img
=
cv2
.
cvtColor
(
img
,
cv2
.
COLOR_HSV2RGB
)
else
:
if
self
.
hsv_format
:
img
=
cv2
.
cvtColor
(
img
,
cv2
.
COLOR_RGB2HSV
)
img
=
self
.
apply_saturation
(
img
)
img
=
self
.
apply_hue
(
img
)
if
self
.
hsv_format
:
img
=
cv2
.
cvtColor
(
img
,
cv2
.
COLOR_HSV2RGB
)
img
=
self
.
apply_contrast
(
img
)
if
self
.
random_channel
:
if
np
.
random
.
randint
(
0
,
2
):
img
=
img
[...,
np
.
random
.
permutation
(
3
)]
sample
[
'image'
]
=
img
return
sample
...
...
@@ -1346,7 +1623,6 @@ class CornerRandColor(ColorDistort):
@
register_op
class
NormalizePermute
(
BaseOperator
):
"""Normalize and permute channel order.
Args:
mean (list): mean values in RGB order.
std (list): std values in RGB order.
...
...
@@ -1376,7 +1652,6 @@ class NormalizePermute(BaseOperator):
@
register_op
class
RandomExpand
(
BaseOperator
):
"""Random expand the canvas.
Args:
ratio (float): maximum expansion ratio.
prob (float): probability to expand.
...
...
@@ -1468,7 +1743,6 @@ class RandomExpand(BaseOperator):
@
register_op
class
RandomCrop
(
BaseOperator
):
"""Random crop image and bboxes.
Args:
aspect_ratio (list): aspect ratio of cropped region.
in [min, max] format.
...
...
@@ -1595,11 +1869,23 @@ class RandomCrop(BaseOperator):
found
=
False
for
i
in
range
(
self
.
num_attempts
):
scale
=
np
.
random
.
uniform
(
*
self
.
scaling
)
min_ar
,
max_ar
=
self
.
aspect_ratio
aspect_ratio
=
np
.
random
.
uniform
(
max
(
min_ar
,
scale
**
2
),
min
(
max_ar
,
scale
**-
2
))
crop_h
=
int
(
h
*
scale
/
np
.
sqrt
(
aspect_ratio
))
crop_w
=
int
(
w
*
scale
*
np
.
sqrt
(
aspect_ratio
))
if
self
.
aspect_ratio
is
not
None
:
min_ar
,
max_ar
=
self
.
aspect_ratio
aspect_ratio
=
np
.
random
.
uniform
(
max
(
min_ar
,
scale
**
2
),
min
(
max_ar
,
scale
**-
2
))
h_scale
=
scale
/
np
.
sqrt
(
aspect_ratio
)
w_scale
=
scale
*
np
.
sqrt
(
aspect_ratio
)
else
:
h_scale
=
np
.
random
.
uniform
(
*
self
.
scaling
)
w_scale
=
np
.
random
.
uniform
(
*
self
.
scaling
)
crop_h
=
h
*
h_scale
crop_w
=
w
*
w_scale
if
self
.
aspect_ratio
is
None
:
if
crop_h
/
crop_w
<
0.5
or
crop_h
/
crop_w
>
2.0
:
continue
crop_h
=
int
(
crop_h
)
crop_w
=
int
(
crop_w
)
crop_y
=
np
.
random
.
randint
(
0
,
h
-
crop_h
)
crop_x
=
np
.
random
.
randint
(
0
,
w
-
crop_w
)
crop_box
=
[
crop_x
,
crop_y
,
crop_x
+
crop_w
,
crop_y
+
crop_h
]
...
...
@@ -1751,7 +2037,6 @@ class BboxXYXY2XYWH(BaseOperator):
return
sample
@
register_op
class
Lighting
(
BaseOperator
):
"""
Lighting the imagen by eigenvalues and eigenvectors
...
...
@@ -1991,7 +2276,6 @@ class CornerRatio(BaseOperator):
class
RandomScaledCrop
(
BaseOperator
):
"""Resize image and bbox based on long side (with optional random scaling),
then crop or pad image to target size.
Args:
target_dim (int): target size.
scale_range (list): random scale range.
...
...
@@ -2046,7 +2330,6 @@ class RandomScaledCrop(BaseOperator):
@
register_op
class
ResizeAndPad
(
BaseOperator
):
"""Resize image and bbox, then pad image to target size.
Args:
target_dim (int): target size
interp (int): interpolation method, default to `cv2.INTER_LINEAR`.
...
...
@@ -2085,7 +2368,6 @@ class ResizeAndPad(BaseOperator):
@
register_op
class
TargetAssign
(
BaseOperator
):
"""Assign regression target and labels.
Args:
image_size (int or list): input image size, a single integer or list of
[h, w]. Default: 512
...
...
@@ -2184,3 +2466,69 @@ class TargetAssign(BaseOperator):
targets
[
matched_indices
]
=
matched_targets
sample
[
'fg_num'
]
=
np
.
array
(
len
(
matched_targets
),
dtype
=
np
.
int32
)
return
sample
@
register_op
class
DebugVisibleImage
(
BaseOperator
):
"""
In debug mode, visualize images according to `gt_box`.
(Currently only supported when not cropping and flipping image.)
"""
def
__init__
(
self
,
output_dir
=
'output/debug'
,
is_normalized
=
False
):
super
(
DebugVisibleImage
,
self
).
__init__
()
self
.
is_normalized
=
is_normalized
self
.
output_dir
=
output_dir
if
not
os
.
path
.
isdir
(
output_dir
):
os
.
makedirs
(
output_dir
)
if
not
isinstance
(
self
.
is_normalized
,
bool
):
raise
TypeError
(
"{}: input type is invalid."
.
format
(
self
))
def
__call__
(
self
,
sample
,
context
=
None
):
image
=
Image
.
open
(
sample
[
'im_file'
]).
convert
(
'RGB'
)
out_file_name
=
sample
[
'im_file'
].
split
(
'/'
)[
-
1
]
width
=
sample
[
'w'
]
height
=
sample
[
'h'
]
gt_bbox
=
sample
[
'gt_bbox'
]
gt_class
=
sample
[
'gt_class'
]
draw
=
ImageDraw
.
Draw
(
image
)
for
i
in
range
(
gt_bbox
.
shape
[
0
]):
if
self
.
is_normalized
:
gt_bbox
[
i
][
0
]
=
gt_bbox
[
i
][
0
]
*
width
gt_bbox
[
i
][
1
]
=
gt_bbox
[
i
][
1
]
*
height
gt_bbox
[
i
][
2
]
=
gt_bbox
[
i
][
2
]
*
width
gt_bbox
[
i
][
3
]
=
gt_bbox
[
i
][
3
]
*
height
xmin
,
ymin
,
xmax
,
ymax
=
gt_bbox
[
i
]
draw
.
line
(
[(
xmin
,
ymin
),
(
xmin
,
ymax
),
(
xmax
,
ymax
),
(
xmax
,
ymin
),
(
xmin
,
ymin
)],
width
=
2
,
fill
=
'green'
)
# draw label
text
=
str
(
gt_class
[
i
][
0
])
tw
,
th
=
draw
.
textsize
(
text
)
draw
.
rectangle
(
[(
xmin
+
1
,
ymin
-
th
),
(
xmin
+
tw
+
1
,
ymin
)],
fill
=
'green'
)
draw
.
text
((
xmin
+
1
,
ymin
-
th
),
text
,
fill
=
(
255
,
255
,
255
))
if
'gt_keypoint'
in
sample
.
keys
():
gt_keypoint
=
sample
[
'gt_keypoint'
]
if
self
.
is_normalized
:
for
i
in
range
(
gt_keypoint
.
shape
[
1
]):
if
i
%
2
:
gt_keypoint
[:,
i
]
=
gt_keypoint
[:,
i
]
*
height
else
:
gt_keypoint
[:,
i
]
=
gt_keypoint
[:,
i
]
*
width
for
i
in
range
(
gt_keypoint
.
shape
[
0
]):
keypoint
=
gt_keypoint
[
i
]
for
j
in
range
(
int
(
keypoint
.
shape
[
0
]
/
2
)):
x1
=
round
(
keypoint
[
2
*
j
]).
astype
(
np
.
int32
)
y1
=
round
(
keypoint
[
2
*
j
+
1
]).
astype
(
np
.
int32
)
draw
.
ellipse
(
(
x1
,
y1
,
x1
+
5
,
y1i
+
5
),
fill
=
'green'
,
outline
=
'green'
)
save_path
=
os
.
path
.
join
(
self
.
output_dir
,
out_file_name
)
image
.
save
(
save_path
,
quality
=
95
)
return
sample
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录