Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleHub
提交
5148424a
P
PaddleHub
项目概览
PaddlePaddle
/
PaddleHub
大约 2 年 前同步成功
通知
285
Star
12117
Fork
2091
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
200
列表
看板
标记
里程碑
合并请求
4
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleHub
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
200
Issue
200
列表
看板
标记
里程碑
合并请求
4
合并请求
4
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
You need to sign in or sign up before continuing.
提交
5148424a
编写于
9月 21, 2020
作者:
H
haoyuying
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add yolov3_darknet_pascalvoc
上级
9a1eac7b
变更
8
显示空白变更内容
内联
并排
Showing
8 changed file
with
1171 addition
and
50 deletion
+1171
-50
demo/detection/yolov3_darknet53_pascalvoc/4026.jpeg
demo/detection/yolov3_darknet53_pascalvoc/4026.jpeg
+0
-0
demo/detection/yolov3_darknet53_pascalvoc/predict.py
demo/detection/yolov3_darknet53_pascalvoc/predict.py
+9
-0
demo/detection/yolov3_darknet53_pascalvoc/train.py
demo/detection/yolov3_darknet53_pascalvoc/train.py
+22
-0
hub_module/modules/image/object_detection/yolov3_darknet53_pascalvoc/darknet.py
...ge/object_detection/yolov3_darknet53_pascalvoc/darknet.py
+144
-0
hub_module/modules/image/object_detection/yolov3_darknet53_pascalvoc/module.py
...age/object_detection/yolov3_darknet53_pascalvoc/module.py
+247
-0
paddlehub/datasets/pascalvoc.py
paddlehub/datasets/pascalvoc.py
+74
-0
paddlehub/module/cv_module.py
paddlehub/module/cv_module.py
+112
-13
paddlehub/process/transforms.py
paddlehub/process/transforms.py
+563
-37
未找到文件。
demo/detection/yolov3_darknet53_pascalvoc/4026.jpeg
0 → 100644
浏览文件 @
5148424a
83.0 KB
demo/detection/yolov3_darknet53_pascalvoc/predict.py
0 → 100644
浏览文件 @
5148424a
import
paddle
import
paddlehub
as
hub
if
__name__
==
'__main__'
:
place
=
paddle
.
CUDAPlace
(
0
)
paddle
.
disable_static
()
model
=
model
=
hub
.
Module
(
name
=
'yolov3_darknet53_pascalvoc'
,
is_train
=
False
)
model
.
eval
()
model
.
predict
(
imgpath
=
"/PATH/TO/IMAGE"
,
filelist
=
"/PATH/TO/JSON/FILE"
)
demo/detection/yolov3_darknet53_pascalvoc/train.py
0 → 100644
浏览文件 @
5148424a
import
paddle
import
paddlehub
as
hub
import
paddle.nn
as
nn
from
paddlehub.finetune.trainer
import
Trainer
from
paddlehub.datasets.pascalvoc
import
DetectionData
from
paddlehub.process.transforms
import
DetectTrainReader
,
DetectTestReader
if
__name__
==
"__main__"
:
place
=
paddle
.
CUDAPlace
(
0
)
paddle
.
disable_static
()
is_train
=
True
if
is_train
:
transform
=
DetectTrainReader
()
train_reader
=
DetectionData
(
transform
)
else
:
transform
=
DetectTestReader
()
test_reader
=
DetectionData
(
transform
)
model
=
hub
.
Module
(
name
=
'yolov3_darknet53_pascalvoc'
)
model
.
train
()
optimizer
=
paddle
.
optimizer
.
Adam
(
learning_rate
=
0.0001
,
parameters
=
model
.
parameters
())
trainer
=
Trainer
(
model
,
optimizer
,
checkpoint_dir
=
'test_ckpt_img_cls'
)
trainer
.
train
(
train_reader
,
epochs
=
5
,
batch_size
=
4
,
eval_dataset
=
train_reader
,
log_interval
=
1
,
save_interval
=
1
)
hub_module/modules/image/object_detection/yolov3_darknet53_pascalvoc/darknet.py
0 → 100644
浏览文件 @
5148424a
import
paddle
import
paddle.nn
as
nn
import
paddle.nn.functional
as
F
from
paddle.regularizer
import
L2Decay
from
paddle.nn.initializer
import
Normal
class
ConvBNLayer
(
nn
.
Layer
):
"""Basic block for Darknet"""
def
__init__
(
self
,
ch_in
:
int
,
ch_out
:
int
,
filter_size
:
int
=
3
,
stride
:
int
=
1
,
groups
:
int
=
1
,
padding
:
int
=
0
,
act
:
str
=
'leakly'
,
is_test
:
bool
=
False
):
super
(
ConvBNLayer
,
self
).
__init__
()
self
.
conv
=
nn
.
Conv2d
(
ch_in
,
ch_out
,
filter_size
,
padding
=
padding
,
stride
=
stride
,
groups
=
groups
,
weight_attr
=
paddle
.
ParamAttr
(
initializer
=
Normal
(
0.
,
0.02
)),
bias_attr
=
False
)
self
.
batch_norm
=
nn
.
BatchNorm
(
num_channels
=
ch_out
,
is_test
=
is_test
,
param_attr
=
paddle
.
ParamAttr
(
initializer
=
Normal
(
0.
,
0.02
),
regularizer
=
L2Decay
(
0.
)))
self
.
act
=
act
def
forward
(
self
,
inputs
:
paddle
.
Tensor
)
->
paddle
.
Tensor
:
out
=
self
.
conv
(
inputs
)
out
=
self
.
batch_norm
(
out
)
if
self
.
act
==
"leakly"
:
out
=
F
.
leaky_relu
(
x
=
out
,
negative_slope
=
0.1
)
return
out
class
DownSample
(
nn
.
Layer
):
"""Downsample block for Darknet"""
def
__init__
(
self
,
ch_in
:
int
,
ch_out
:
int
,
filter_size
:
int
=
3
,
stride
:
int
=
2
,
padding
:
int
=
1
,
is_test
:
bool
=
False
):
super
(
DownSample
,
self
).
__init__
()
self
.
conv_bn_layer
=
ConvBNLayer
(
ch_in
=
ch_in
,
ch_out
=
ch_out
,
filter_size
=
filter_size
,
stride
=
stride
,
padding
=
padding
,
is_test
=
is_test
)
self
.
ch_out
=
ch_out
def
forward
(
self
,
inputs
:
paddle
.
Tensor
)
->
paddle
.
Tensor
:
out
=
self
.
conv_bn_layer
(
inputs
)
return
out
class
BasicBlock
(
nn
.
Layer
):
"""Basic residual block for Darknet"""
def
__init__
(
self
,
ch_in
:
int
,
ch_out
:
int
,
is_test
:
bool
=
False
):
super
(
BasicBlock
,
self
).
__init__
()
self
.
conv1
=
ConvBNLayer
(
ch_in
=
ch_in
,
ch_out
=
ch_out
,
filter_size
=
1
,
stride
=
1
,
padding
=
0
,
is_test
=
is_test
)
self
.
conv2
=
ConvBNLayer
(
ch_in
=
ch_out
,
ch_out
=
ch_out
*
2
,
filter_size
=
3
,
stride
=
1
,
padding
=
1
,
is_test
=
is_test
)
def
forward
(
self
,
inputs
:
paddle
.
Tensor
)
->
paddle
.
Tensor
:
conv1
=
self
.
conv1
(
inputs
)
conv2
=
self
.
conv2
(
conv1
)
out
=
paddle
.
elementwise_add
(
x
=
inputs
,
y
=
conv2
,
act
=
None
)
return
out
class
LayerWarp
(
nn
.
Layer
):
"""Warp layer composed by basic residual blocks"""
def
__init__
(
self
,
ch_in
:
int
,
ch_out
:
int
,
count
:
int
,
is_test
:
bool
=
False
):
super
(
LayerWarp
,
self
).
__init__
()
self
.
basicblock0
=
BasicBlock
(
ch_in
,
ch_out
,
is_test
=
is_test
)
self
.
res_out_list
=
[]
for
i
in
range
(
1
,
count
):
res_out
=
self
.
add_sublayer
(
"basic_block_%d"
%
(
i
),
BasicBlock
(
ch_out
*
2
,
ch_out
,
is_test
=
is_test
))
self
.
res_out_list
.
append
(
res_out
)
self
.
ch_out
=
ch_out
def
forward
(
self
,
inputs
:
paddle
.
Tensor
)
->
paddle
.
Tensor
:
y
=
self
.
basicblock0
(
inputs
)
for
basic_block_i
in
self
.
res_out_list
:
y
=
basic_block_i
(
y
)
return
y
DarkNet_cfg
=
{
53
:
([
1
,
2
,
8
,
8
,
4
])}
class
DarkNet53_conv_body
(
nn
.
Layer
):
"""Darknet53
Args:
ch_in(int): Input channels, default is 3.
is_test (bool): Set the test mode, default is True.
"""
def
__init__
(
self
,
ch_in
:
int
=
3
,
is_test
:
bool
=
False
):
super
(
DarkNet53_conv_body
,
self
).
__init__
()
self
.
stages
=
DarkNet_cfg
[
53
]
self
.
stages
=
self
.
stages
[
0
:
5
]
self
.
conv0
=
ConvBNLayer
(
ch_in
=
ch_in
,
ch_out
=
32
,
filter_size
=
3
,
stride
=
1
,
padding
=
1
,
is_test
=
is_test
)
self
.
downsample0
=
DownSample
(
ch_in
=
32
,
ch_out
=
32
*
2
,
is_test
=
is_test
)
self
.
darknet53_conv_block_list
=
[]
self
.
downsample_list
=
[]
ch_in
=
[
64
,
128
,
256
,
512
,
1024
]
for
i
,
stage
in
enumerate
(
self
.
stages
):
conv_block
=
self
.
add_sublayer
(
"stage_%d"
%
(
i
),
LayerWarp
(
int
(
ch_in
[
i
]),
32
*
(
2
**
i
),
stage
,
is_test
=
is_test
))
self
.
darknet53_conv_block_list
.
append
(
conv_block
)
for
i
in
range
(
len
(
self
.
stages
)
-
1
):
downsample
=
self
.
add_sublayer
(
"stage_%d_downsample"
%
i
,
DownSample
(
ch_in
=
32
*
(
2
**
(
i
+
1
)),
ch_out
=
32
*
(
2
**
(
i
+
2
)),
is_test
=
is_test
))
self
.
downsample_list
.
append
(
downsample
)
def
forward
(
self
,
inputs
:
paddle
.
Tensor
)
->
paddle
.
Tensor
:
out
=
self
.
conv0
(
inputs
)
out
=
self
.
downsample0
(
out
)
blocks
=
[]
for
i
,
conv_block_i
in
enumerate
(
self
.
darknet53_conv_block_list
):
out
=
conv_block_i
(
out
)
blocks
.
append
(
out
)
if
i
<
len
(
self
.
stages
)
-
1
:
out
=
self
.
downsample_list
[
i
](
out
)
return
blocks
[
-
1
:
-
4
:
-
1
]
hub_module/modules/image/object_detection/yolov3_darknet53_pascalvoc/module.py
0 → 100644
浏览文件 @
5148424a
import
os
import
paddle
import
paddle.nn
as
nn
import
paddle.nn.functional
as
F
from
paddle.nn.initializer
import
Normal
,
Constant
from
paddle.regularizer
import
L2Decay
from
pycocotools.coco
import
COCO
from
darknet
import
DarkNet53_conv_body
from
darknet
import
ConvBNLayer
from
paddlehub.module.cv_module
import
Yolov3Module
from
paddlehub.process.transforms
import
DetectTrainReader
,
DetectTestReader
from
paddlehub.module.module
import
moduleinfo
class
YoloDetectionBlock
(
nn
.
Layer
):
"""Basic block for Yolov3"""
def
__init__
(
self
,
ch_in
:
int
,
channel
:
int
,
is_test
:
bool
=
True
):
super
(
YoloDetectionBlock
,
self
).
__init__
()
assert
channel
%
2
==
0
,
\
"channel {} cannot be divided by 2"
.
format
(
channel
)
self
.
conv0
=
ConvBNLayer
(
ch_in
=
ch_in
,
ch_out
=
channel
,
filter_size
=
1
,
stride
=
1
,
padding
=
0
,
is_test
=
is_test
)
self
.
conv1
=
ConvBNLayer
(
ch_in
=
channel
,
ch_out
=
channel
*
2
,
filter_size
=
3
,
stride
=
1
,
padding
=
1
,
is_test
=
is_test
)
self
.
conv2
=
ConvBNLayer
(
ch_in
=
channel
*
2
,
ch_out
=
channel
,
filter_size
=
1
,
stride
=
1
,
padding
=
0
,
is_test
=
is_test
)
self
.
conv3
=
ConvBNLayer
(
ch_in
=
channel
,
ch_out
=
channel
*
2
,
filter_size
=
3
,
stride
=
1
,
padding
=
1
,
is_test
=
is_test
)
self
.
route
=
ConvBNLayer
(
ch_in
=
channel
*
2
,
ch_out
=
channel
,
filter_size
=
1
,
stride
=
1
,
padding
=
0
,
is_test
=
is_test
)
self
.
tip
=
ConvBNLayer
(
ch_in
=
channel
,
ch_out
=
channel
*
2
,
filter_size
=
3
,
stride
=
1
,
padding
=
1
,
is_test
=
is_test
)
def
forward
(
self
,
inputs
):
out
=
self
.
conv0
(
inputs
)
out
=
self
.
conv1
(
out
)
out
=
self
.
conv2
(
out
)
out
=
self
.
conv3
(
out
)
route
=
self
.
route
(
out
)
tip
=
self
.
tip
(
route
)
return
route
,
tip
class
Upsample
(
nn
.
Layer
):
"""Upsample block for Yolov3"""
def
__init__
(
self
,
scale
:
int
=
2
):
super
(
Upsample
,
self
).
__init__
()
self
.
scale
=
scale
def
forward
(
self
,
inputs
:
paddle
.
Tensor
):
shape_nchw
=
paddle
.
to_tensor
(
inputs
.
shape
)
shape_hw
=
paddle
.
slice
(
shape_nchw
,
axes
=
[
0
],
starts
=
[
2
],
ends
=
[
4
])
shape_hw
.
stop_gradient
=
True
in_shape
=
paddle
.
cast
(
shape_hw
,
dtype
=
'int32'
)
out_shape
=
in_shape
*
self
.
scale
out_shape
.
stop_gradient
=
True
out
=
F
.
resize_nearest
(
input
=
inputs
,
scale
=
self
.
scale
,
actual_shape
=
out_shape
)
return
out
@
moduleinfo
(
name
=
"yolov3_darknet53_pascalvoc"
,
type
=
"CV/image_editing"
,
author
=
"paddlepaddle"
,
author_email
=
""
,
summary
=
"Yolov3 is a detection model, this module is trained with VOC dataset."
,
version
=
"1.0.0"
,
meta
=
Yolov3Module
)
class
YOLOv3
(
nn
.
Layer
):
"""YOLOV3 for detection
Args:
ch_in(int): Input channels, default is 3.
class_num(int): Categories for detection,if dataset is voc, class_num is 20.
ignore_thresh(float): The ignore threshold to ignore confidence loss.
valid_thresh(float): Threshold to filter out bounding boxes with low confidence score.
nms_topk(int): Maximum number of detections to be kept according to the confidences after the filtering
detections based on score_threshold.
nms_posk(int): Number of total bboxes to be kept per image after NMS step. -1 means keeping all bboxes after NMS
step.
nms_thresh (float): The threshold to be used in NMS. Default: 0.3.
is_train (bool): Set the train mode, default is True.
load_checkpoint(str): Whether to load checkpoint.
"""
def
__init__
(
self
,
ch_in
:
int
=
3
,
class_num
:
int
=
20
,
ignore_thresh
:
float
=
0.7
,
valid_thresh
:
float
=
0.005
,
nms_topk
:
int
=
400
,
nms_posk
:
int
=
100
,
nms_thresh
:
float
=
0.45
,
is_train
:
bool
=
True
,
load_checkpoint
:
str
=
None
):
super
(
YOLOv3
,
self
).
__init__
()
self
.
is_train
=
is_train
self
.
block
=
DarkNet53_conv_body
(
ch_in
=
ch_in
,
is_test
=
not
self
.
is_train
)
self
.
block_outputs
=
[]
self
.
yolo_blocks
=
[]
self
.
route_blocks_2
=
[]
self
.
anchor_masks
=
[[
6
,
7
,
8
],
[
3
,
4
,
5
],
[
0
,
1
,
2
]]
self
.
anchors
=
[
10
,
13
,
16
,
30
,
33
,
23
,
30
,
61
,
62
,
45
,
59
,
119
,
116
,
90
,
156
,
198
,
373
,
326
]
self
.
class_num
=
class_num
self
.
ignore_thresh
=
ignore_thresh
self
.
valid_thresh
=
valid_thresh
self
.
nms_topk
=
nms_topk
self
.
nms_posk
=
nms_posk
self
.
nms_thresh
=
nms_thresh
ch_in_list
=
[
1024
,
768
,
384
]
for
i
in
range
(
3
):
yolo_block
=
self
.
add_sublayer
(
"yolo_detecton_block_%d"
%
(
i
),
YoloDetectionBlock
(
ch_in_list
[
i
],
channel
=
512
//
(
2
**
i
),
is_test
=
not
self
.
is_train
))
self
.
yolo_blocks
.
append
(
yolo_block
)
num_filters
=
len
(
self
.
anchor_masks
[
i
])
*
(
self
.
class_num
+
5
)
block_out
=
self
.
add_sublayer
(
"block_out_%d"
%
(
i
),
nn
.
Conv2d
(
1024
//
(
2
**
i
),
num_filters
,
1
,
stride
=
1
,
padding
=
0
,
weight_attr
=
paddle
.
ParamAttr
(
initializer
=
Normal
(
0.
,
0.02
)),
bias_attr
=
paddle
.
ParamAttr
(
initializer
=
Constant
(
0.0
),
regularizer
=
L2Decay
(
0.
))))
self
.
block_outputs
.
append
(
block_out
)
if
i
<
2
:
route
=
self
.
add_sublayer
(
"route2_%d"
%
i
,
ConvBNLayer
(
ch_in
=
512
//
(
2
**
i
),
ch_out
=
256
//
(
2
**
i
),
filter_size
=
1
,
stride
=
1
,
padding
=
0
,
is_test
=
(
not
self
.
is_train
)))
self
.
route_blocks_2
.
append
(
route
)
self
.
upsample
=
Upsample
()
if
load_checkpoint
is
not
None
:
model_dict
=
paddle
.
load
(
load_checkpoint
)[
0
]
self
.
set_dict
(
model_dict
)
print
(
"load custom checkpoint success"
)
else
:
checkpoint
=
os
.
path
.
join
(
self
.
directory
,
'yolov3_70000.pdparams'
)
if
not
os
.
path
.
exists
(
checkpoint
):
os
.
system
(
'wget https://bj.bcebos.com/paddlehub/model/image/object_detection/yolov3_70000.pdparams -O '
\
+
checkpoint
)
model_dict
=
paddle
.
load
(
checkpoint
)[
0
]
self
.
set_dict
(
model_dict
)
print
(
"load pretrained checkpoint success"
)
def
transform
(
self
,
img
:
paddle
.
Tensor
,
size
:
int
):
if
self
.
is_train
:
transforms
=
DetectTrainReader
()
else
:
transforms
=
DetectTestReader
()
return
transforms
(
img
,
size
)
def
get_label_infos
(
self
,
file_list
:
str
):
self
.
COCO
=
COCO
(
file_list
)
label_names
=
[]
categories
=
self
.
COCO
.
loadCats
(
self
.
COCO
.
getCatIds
())
for
category
in
categories
:
label_names
.
append
(
category
[
'name'
])
return
label_names
def
forward
(
self
,
inputs
:
paddle
.
Tensor
,
gtbox
:
paddle
.
Tensor
=
None
,
gtlabel
:
paddle
.
Tensor
=
None
,
gtscore
:
paddle
.
Tensor
=
None
,
im_shape
:
paddle
.
Tensor
=
None
):
self
.
gtbox
=
gtbox
self
.
gtlabel
=
gtlabel
self
.
gtscore
=
gtscore
self
.
im_shape
=
im_shape
self
.
outputs
=
[]
self
.
boxes
=
[]
self
.
scores
=
[]
self
.
losses
=
[]
self
.
pred
=
[]
self
.
downsample
=
32
blocks
=
self
.
block
(
inputs
)
route
=
None
for
i
,
block
in
enumerate
(
blocks
):
if
i
>
0
:
block
=
paddle
.
concat
([
route
,
block
],
axis
=
1
)
route
,
tip
=
self
.
yolo_blocks
[
i
](
block
)
block_out
=
self
.
block_outputs
[
i
](
tip
)
self
.
outputs
.
append
(
block_out
)
if
i
<
2
:
route
=
self
.
route_blocks_2
[
i
](
route
)
route
=
self
.
upsample
(
route
)
for
i
,
out
in
enumerate
(
self
.
outputs
):
anchor_mask
=
self
.
anchor_masks
[
i
]
if
self
.
is_train
:
loss
=
F
.
yolov3_loss
(
x
=
out
,
gt_box
=
self
.
gtbox
,
gt_label
=
self
.
gtlabel
,
gt_score
=
self
.
gtscore
,
anchors
=
self
.
anchors
,
anchor_mask
=
anchor_mask
,
class_num
=
self
.
class_num
,
ignore_thresh
=
self
.
ignore_thresh
,
downsample_ratio
=
self
.
downsample
,
use_label_smooth
=
False
)
else
:
loss
=
paddle
.
to_tensor
(
0.0
)
self
.
losses
.
append
(
paddle
.
reduce_mean
(
loss
))
mask_anchors
=
[]
for
m
in
anchor_mask
:
mask_anchors
.
append
((
self
.
anchors
[
2
*
m
]))
mask_anchors
.
append
(
self
.
anchors
[
2
*
m
+
1
])
boxes
,
scores
=
F
.
yolo_box
(
x
=
out
,
img_size
=
self
.
im_shape
,
anchors
=
mask_anchors
,
class_num
=
self
.
class_num
,
conf_thresh
=
self
.
valid_thresh
,
downsample_ratio
=
self
.
downsample
,
name
=
"yolo_box"
+
str
(
i
))
self
.
boxes
.
append
(
boxes
)
self
.
scores
.
append
(
paddle
.
transpose
(
scores
,
perm
=
[
0
,
2
,
1
]))
self
.
downsample
//=
2
for
i
in
range
(
self
.
boxes
[
0
].
shape
[
0
]):
yolo_boxes
=
paddle
.
unsqueeze
(
paddle
.
concat
([
self
.
boxes
[
0
][
i
],
self
.
boxes
[
1
][
i
],
self
.
boxes
[
2
][
i
]],
axis
=
0
),
0
)
yolo_scores
=
paddle
.
unsqueeze
(
paddle
.
concat
([
self
.
scores
[
0
][
i
],
self
.
scores
[
1
][
i
],
self
.
scores
[
2
][
i
]],
axis
=
1
),
0
)
pred
=
F
.
multiclass_nms
(
bboxes
=
yolo_boxes
,
scores
=
yolo_scores
,
score_threshold
=
self
.
valid_thresh
,
nms_top_k
=
self
.
nms_topk
,
keep_top_k
=
self
.
nms_posk
,
nms_threshold
=
self
.
nms_thresh
,
background_label
=-
1
)
self
.
pred
.
append
(
pred
)
return
sum
(
self
.
losses
),
self
.
pred
paddlehub/datasets/pascalvoc.py
0 → 100644
浏览文件 @
5148424a
# coding:utf-8
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
os
from
typing
import
Callable
import
paddle
from
paddlehub.env
import
DATA_HOME
from
pycocotools.coco
import
COCO
from
paddlehub.process.transforms
import
DetectCatagory
,
ParseImages
class
DetectionData
(
paddle
.
io
.
Dataset
):
"""
Dataset for image detection.
Args:
transform(callmethod) : The method of preprocess images.
mode(str): The mode for preparing dataset.
Returns:
DataSet: An iterable object for data iterating
"""
def
__init__
(
self
,
transform
:
Callable
,
size
:
int
=
416
,
mode
:
str
=
'train'
):
self
.
mode
=
mode
self
.
transform
=
transform
self
.
size
=
size
if
self
.
mode
==
'train'
:
train_file_list
=
'annotations/instances_train2017.json'
train_data_dir
=
'train2017'
self
.
train_file_list
=
os
.
path
.
join
(
DATA_HOME
,
'voc'
,
train_file_list
)
self
.
train_data_dir
=
os
.
path
.
join
(
DATA_HOME
,
'voc'
,
train_data_dir
)
self
.
COCO
=
COCO
(
self
.
train_file_list
)
self
.
img_dir
=
self
.
train_data_dir
elif
self
.
mode
==
'test'
:
val_file_list
=
'annotations/instances_val2017.json'
val_data_dir
=
'val2017'
self
.
val_file_list
=
os
.
path
.
join
(
DATA_HOME
,
'voc'
,
val_file_list
)
self
.
val_data_dir
=
os
.
path
.
join
(
DATA_HOME
,
'voc'
,
val_data_dir
)
self
.
COCO
=
COCO
(
self
.
val_file_list
)
self
.
img_dir
=
self
.
val_data_dir
parse_dataset_catagory
=
DetectCatagory
(
self
.
COCO
,
self
.
img_dir
)
self
.
label_names
,
self
.
label_ids
,
self
.
category_to_id_map
=
parse_dataset_catagory
()
parse_images
=
ParseImages
(
self
.
COCO
,
self
.
mode
,
self
.
img_dir
,
self
.
category_to_id_map
)
self
.
data
=
parse_images
()
def
__getitem__
(
self
,
idx
:
int
):
if
self
.
mode
==
"train"
:
img
=
self
.
data
[
idx
]
out_img
,
gt_boxes
,
gt_labels
,
gt_scores
=
self
.
transform
(
img
,
416
)
return
out_img
,
gt_boxes
,
gt_labels
,
gt_scores
elif
self
.
mode
==
"test"
:
img
=
self
.
data
[
idx
]
out_img
,
id
,
(
h
,
w
)
=
self
.
transform
(
img
)
return
out_img
,
id
,
(
h
,
w
)
def
__len__
(
self
):
return
len
(
self
.
data
)
paddlehub/module/cv_module.py
浏览文件 @
5148424a
...
...
@@ -26,7 +26,7 @@ from PIL import Image
from
paddlehub.module.module
import
serving
,
RunModule
from
paddlehub.utils.utils
import
base64_to_cv2
from
paddlehub.process.transforms
import
ConvertColorSpace
,
ColorPostprocess
,
Resize
from
paddlehub.process.transforms
import
ConvertColorSpace
,
ColorPostprocess
,
Resize
,
BoxTool
class
ImageServing
(
object
):
...
...
@@ -141,7 +141,7 @@ class ImageColorizeModule(RunModule, ImageServing):
visual_ret
[
'real'
]
=
process
(
real
)
fake
=
lab2rgb
(
np
.
concatenate
((
batch
[
0
].
numpy
(),
out_reg
.
numpy
()),
axis
=
1
))[
i
]
visual_ret
[
'fake_reg'
]
=
process
(
fake
)
mse
=
np
.
mean
((
visual_ret
[
'real'
]
*
1.0
-
visual_ret
[
'fake_reg'
]
*
1.0
)
**
2
)
mse
=
np
.
mean
((
visual_ret
[
'real'
]
*
1.0
-
visual_ret
[
'fake_reg'
]
*
1.0
)
**
2
)
psnr_value
=
20
*
np
.
log10
(
255.
/
np
.
sqrt
(
mse
))
psnrs
.
append
(
psnr_value
)
psnr
=
paddle
.
to_variable
(
np
.
array
(
psnrs
))
...
...
@@ -186,7 +186,106 @@ class ImageColorizeModule(RunModule, ImageServing):
visual_gray
=
Image
.
fromarray
(
visual_ret
[
'fake_reg'
])
visual_gray
.
save
(
fake_path
)
mse
=
np
.
mean
((
visual_ret
[
'real'
]
*
1.0
-
visual_ret
[
'fake_reg'
]
*
1.0
)
**
2
)
mse
=
np
.
mean
((
visual_ret
[
'real'
]
*
1.0
-
visual_ret
[
'fake_reg'
]
*
1.0
)
**
2
)
psnr_value
=
20
*
np
.
log10
(
255.
/
np
.
sqrt
(
mse
))
result
.
append
(
visual_ret
)
return
result
class
Yolov3Module
(
RunModule
,
ImageServing
):
def
training_step
(
self
,
batch
:
int
,
batch_idx
:
int
)
->
dict
:
'''
One step for training, which should be called as forward computation.
Args:
batch(list[paddle.Tensor]): The one batch data, which contains images, ground truth boxes, labels and scores.
batch_idx(int): The index of batch.
Returns:
results(dict): The model outputs, such as loss.
'''
return
self
.
validation_step
(
batch
,
batch_idx
)
def
validation_step
(
self
,
batch
:
int
,
batch_idx
:
int
)
->
dict
:
'''
One step for validation, which should be called as forward computation.
Args:
batch(list[paddle.Tensor]): The one batch data, which contains images, ground truth boxes, labels and scores.
batch_idx(int): The index of batch.
Returns:
results(dict) : The model outputs, such as metrics.
'''
ious
=
[]
boxtool
=
BoxTool
()
img
=
batch
[
0
].
astype
(
'float32'
)
B
,
C
,
W
,
H
=
img
.
shape
im_shape
=
np
.
array
([(
W
,
H
)]
*
B
).
astype
(
'int32'
)
im_shape
=
paddle
.
to_tensor
(
im_shape
)
gt_box
=
batch
[
1
].
astype
(
'float32'
)
gt_label
=
batch
[
2
].
astype
(
'int32'
)
gt_score
=
batch
[
3
].
astype
(
"float32"
)
loss
,
pred
=
self
(
img
,
gt_box
,
gt_label
,
gt_score
,
im_shape
)
for
i
in
range
(
len
(
pred
)):
bboxes
=
pred
[
i
].
numpy
()
labels
=
bboxes
[:,
0
].
astype
(
'int32'
)
scores
=
bboxes
[:,
1
].
astype
(
'float32'
)
boxes
=
bboxes
[:,
2
:].
astype
(
'float32'
)
iou
=
[]
for
j
,
(
box
,
score
,
label
)
in
enumerate
(
zip
(
boxes
,
scores
,
labels
)):
x1
,
y1
,
x2
,
y2
=
box
w
=
x2
-
x1
+
1
h
=
y2
-
y1
+
1
bbox
=
[
x1
,
y1
,
w
,
h
]
bbox
=
np
.
expand_dims
(
boxtool
.
coco_anno_box_to_center_relative
(
bbox
,
H
,
W
),
0
)
gt
=
gt_box
[
i
].
numpy
()
iou
.
append
(
max
(
boxtool
.
box_iou_xywh
(
bbox
,
gt
)))
ious
.
append
(
max
(
iou
))
ious
=
paddle
.
to_tensor
(
np
.
array
(
ious
))
return
{
'loss'
:
loss
,
'metrics'
:
{
'iou'
:
ious
}}
def
predict
(
self
,
imgpath
:
str
,
filelist
:
str
,
visualization
:
bool
=
True
,
save_path
:
str
=
'result'
):
'''
Detect images
Args:
imgpath(str): Image path .
filelist(str): Path to get label name.
visualization(bool): Whether to save result image.
save_path(str) : Path to save detected images.
Returns:
boxes(np.ndarray): Predict box information.
scores(np.ndarray): Predict score.
labels(np.ndarray): Predict labels.
'''
boxtool
=
BoxTool
()
img
=
{}
img
[
'image'
]
=
imgpath
img
[
'id'
]
=
0
im
,
im_id
,
im_shape
=
self
.
transform
(
img
,
416
)
label_names
=
self
.
get_label_infos
(
filelist
)
img_data
=
np
.
array
([
im
]).
astype
(
'float32'
)
img_data
=
paddle
.
to_tensor
(
img_data
)
im_shape
=
np
.
array
([
im_shape
]).
astype
(
'int32'
)
im_shape
=
paddle
.
to_tensor
(
im_shape
)
output
,
pred
=
self
(
img_data
,
None
,
None
,
None
,
im_shape
)
for
i
in
range
(
len
(
pred
)):
bboxes
=
pred
[
i
].
numpy
()
labels
=
bboxes
[:,
0
].
astype
(
'int32'
)
scores
=
bboxes
[:,
1
].
astype
(
'float32'
)
boxes
=
bboxes
[:,
2
:].
astype
(
'float32'
)
if
visualization
:
boxtool
.
draw_boxes_on_image
(
imgpath
,
boxes
,
scores
,
labels
,
label_names
,
0.5
)
return
boxes
,
scores
,
labels
paddlehub/process/transforms.py
浏览文件 @
5148424a
...
...
@@ -12,16 +12,22 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
os
import
random
import
copy
from
typing
import
Callable
from
collections
import
OrderedDict
import
cv2
import
numpy
as
np
from
PIL
import
Image
import
matplotlib
from
PIL
import
Image
,
ImageEnhance
from
matplotlib
import
pyplot
as
plt
from
paddlehub.process.functional
import
*
matplotlib
.
use
(
'Agg'
)
class
Compose
:
def
__init__
(
self
,
transforms
,
to_rgb
=
True
,
stay_rgb
=
False
):
...
...
@@ -52,8 +58,6 @@ class Compose:
return
im
class
RandomHorizontalFlip
:
def
__init__
(
self
,
prob
=
0.5
):
self
.
prob
=
prob
...
...
@@ -239,8 +243,13 @@ class RandomPaddingCrop:
pad_height
=
max
(
crop_height
-
img_height
,
0
)
pad_width
=
max
(
crop_width
-
img_width
,
0
)
if
(
pad_height
>
0
or
pad_width
>
0
):
im
=
cv2
.
copyMakeBorder
(
im
,
0
,
pad_height
,
0
,
pad_width
,
cv2
.
BORDER_CONSTANT
,
value
=
self
.
im_padding_value
)
im
=
cv2
.
copyMakeBorder
(
im
,
0
,
pad_height
,
0
,
pad_width
,
cv2
.
BORDER_CONSTANT
,
value
=
self
.
im_padding_value
)
if
crop_height
>
0
and
crop_width
>
0
:
h_off
=
np
.
random
.
randint
(
img_height
-
crop_height
+
1
)
...
...
@@ -295,8 +304,7 @@ class RandomRotation:
r
[
0
,
2
]
+=
(
nw
/
2
)
-
cx
r
[
1
,
2
]
+=
(
nh
/
2
)
-
cy
dsize
=
(
nw
,
nh
)
im
=
cv2
.
warpAffine
(
im
,
im
=
cv2
.
warpAffine
(
im
,
r
,
dsize
=
dsize
,
flags
=
cv2
.
INTER_LINEAR
,
...
...
@@ -429,7 +437,7 @@ class ConvertColorSpace:
"""
mask
=
(
rgb
>
0.04045
)
np
.
seterr
(
invalid
=
'ignore'
)
rgb
=
(((
rgb
+
.
055
)
/
1.055
)
**
2.4
)
*
mask
+
rgb
/
12.92
*
(
1
-
mask
)
rgb
=
(((
rgb
+
.
055
)
/
1.055
)
**
2.4
)
*
mask
+
rgb
/
12.92
*
(
1
-
mask
)
rgb
=
np
.
nan_to_num
(
rgb
)
x
=
.
412453
*
rgb
[:,
0
,
:,
:]
+
.
357580
*
rgb
[:,
1
,
:,
:]
+
.
180423
*
rgb
[:,
2
,
:,
:]
y
=
.
212671
*
rgb
[:,
0
,
:,
:]
+
.
715160
*
rgb
[:,
1
,
:,
:]
+
.
072169
*
rgb
[:,
2
,
:,
:]
...
...
@@ -490,7 +498,7 @@ class ConvertColorSpace:
rgb
=
np
.
maximum
(
rgb
,
0
)
# sometimes reaches a small negative number, which causes NaNs
mask
=
(
rgb
>
.
0031308
).
astype
(
np
.
float32
)
np
.
seterr
(
invalid
=
'ignore'
)
out
=
(
1.055
*
(
rgb
**
(
1.
/
2.4
))
-
0.055
)
*
mask
+
12.92
*
rgb
*
(
1
-
mask
)
out
=
(
1.055
*
(
rgb
**
(
1.
/
2.4
))
-
0.055
)
*
mask
+
12.92
*
rgb
*
(
1
-
mask
)
out
=
np
.
nan_to_num
(
out
)
return
out
...
...
@@ -511,7 +519,7 @@ class ConvertColorSpace:
out
=
np
.
concatenate
((
x_int
[:,
None
,
:,
:],
y_int
[:,
None
,
:,
:],
z_int
[:,
None
,
:,
:]),
axis
=
1
)
mask
=
(
out
>
.
2068966
).
astype
(
np
.
float32
)
np
.
seterr
(
invalid
=
'ignore'
)
out
=
(
out
**
3.
)
*
mask
+
(
out
-
16.
/
116.
)
/
7.787
*
(
1
-
mask
)
out
=
(
out
**
3.
)
*
mask
+
(
out
-
16.
/
116.
)
/
7.787
*
(
1
-
mask
)
out
=
np
.
nan_to_num
(
out
)
sc
=
np
.
array
((
0.95047
,
1.
,
1.08883
))[
None
,
:,
None
,
None
]
out
=
out
*
sc
...
...
@@ -566,7 +574,17 @@ class ColorizeHint:
self
.
use_avg
=
use_avg
def
__call__
(
self
,
data
:
np
.
ndarray
,
hint
:
np
.
ndarray
,
mask
:
np
.
ndarray
):
sample_Ps
=
[
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
]
sample_Ps
=
[
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
]
self
.
data
=
data
self
.
hint
=
hint
self
.
mask
=
mask
...
...
@@ -593,9 +611,11 @@ class ColorizeHint:
# add color point
if
self
.
use_avg
:
# embed()
hint
[
nn
,
:,
h
:
h
+
P
,
w
:
w
+
P
]
=
np
.
mean
(
np
.
mean
(
data
[
nn
,
:,
h
:
h
+
P
,
w
:
w
+
P
],
axis
=
2
,
keepdims
=
True
),
axis
=
1
,
keepdims
=
True
).
reshape
(
1
,
C
,
1
,
1
)
hint
[
nn
,
:,
h
:
h
+
P
,
w
:
w
+
P
]
=
np
.
mean
(
np
.
mean
(
data
[
nn
,
:,
h
:
h
+
P
,
w
:
w
+
P
],
axis
=
2
,
keepdims
=
True
),
axis
=
1
,
keepdims
=
True
).
reshape
(
1
,
C
,
1
,
1
)
else
:
hint
[
nn
,
:,
h
:
h
+
P
,
w
:
w
+
P
]
=
data
[
nn
,
:,
h
:
h
+
P
,
w
:
w
+
P
]
mask
[
nn
,
:,
h
:
h
+
P
,
w
:
w
+
P
]
=
1
...
...
@@ -641,7 +661,8 @@ class ColorizePreprocess:
data(dict):The preprocessed data for colorization.
"""
def
__init__
(
self
,
ab_thresh
:
float
=
0.
,
def
__init__
(
self
,
ab_thresh
:
float
=
0.
,
p
:
float
=
.
125
,
num_points
:
int
=
None
,
samp
:
str
=
'normal'
,
...
...
@@ -668,11 +689,14 @@ class ColorizePreprocess:
"""
data
=
{}
A
=
2
*
110
/
10
+
1
data
[
'A'
]
=
data_lab
[:,
[
0
,
],
:,
:]
data
[
'A'
]
=
data_lab
[:,
[
0
,
],
:,
:]
data
[
'B'
]
=
data_lab
[:,
1
:,
:,
:]
if
self
.
ab_thresh
>
0
:
# mask out grayscale images
thresh
=
1.
*
self
.
ab_thresh
/
110
mask
=
np
.
sum
(
np
.
abs
(
np
.
max
(
np
.
max
(
data
[
'B'
],
axis
=
3
),
axis
=
2
)
-
np
.
min
(
np
.
min
(
data
[
'B'
],
axis
=
3
),
axis
=
2
)),
axis
=
1
)
mask
=
np
.
sum
(
np
.
abs
(
np
.
max
(
np
.
max
(
data
[
'B'
],
axis
=
3
),
axis
=
2
)
-
np
.
min
(
np
.
min
(
data
[
'B'
],
axis
=
3
),
axis
=
2
)),
axis
=
1
)
mask
=
(
mask
>=
thresh
)
data
[
'A'
]
=
data
[
'A'
][
mask
,
:,
:,
:]
data
[
'B'
]
=
data
[
'B'
][
mask
,
:,
:,
:]
...
...
@@ -713,3 +737,505 @@ class ColorPostprocess:
img
=
np
.
clip
(
img
,
0
,
1
)
*
255
img
=
img
.
astype
(
self
.
type
)
return
img
class
DetectCatagory
:
"""Load label name, id and map from detection dataset.
Args:
COCO(Callable): Method for get detection attributes for images.
data_dir(str): Image dataset path.
Returns:
label_names(List(str)): The dataset label names.
label_ids(List(int)): The dataset label ids.
category_to_id_map(dict): Mapping relations of category and id for images.
"""
def
__init__
(
self
,
COCO
:
Callable
,
data_dir
:
str
):
self
.
COCO
=
COCO
self
.
img_dir
=
data_dir
def
__call__
(
self
):
self
.
categories
=
self
.
COCO
.
loadCats
(
self
.
COCO
.
getCatIds
())
self
.
num_category
=
len
(
self
.
categories
)
label_names
=
[]
label_ids
=
[]
for
category
in
self
.
categories
:
label_names
.
append
(
category
[
'name'
])
label_ids
.
append
(
int
(
category
[
'id'
]))
category_to_id_map
=
{
v
:
i
for
i
,
v
in
enumerate
(
label_ids
)}
return
label_names
,
label_ids
,
category_to_id_map
class
ParseImages
:
"""Prepare images for detection.
Args:
COCO(Callable): Method for get detection attributes for images.
is_train(bool): Select the mode for train or test.
data_dir(str): Image dataset path.
category_to_id_map(dict): Mapping relations of category and id for images.
Returns:
imgs(dict): The input for detection model, it is a dict.
"""
def
__init__
(
self
,
COCO
:
Callable
,
is_train
:
bool
,
data_dir
:
str
,
category_to_id_map
:
dict
):
self
.
COCO
=
COCO
self
.
is_train
=
is_train
self
.
img_dir
=
data_dir
self
.
category_to_id_map
=
category_to_id_map
self
.
parse_gt_annotations
=
GTAnotations
(
self
.
COCO
,
self
.
category_to_id_map
)
def
__call__
(
self
):
image_ids
=
self
.
COCO
.
getImgIds
()
image_ids
.
sort
()
imgs
=
copy
.
deepcopy
(
self
.
COCO
.
loadImgs
(
image_ids
))
for
img
in
imgs
:
img
[
'image'
]
=
os
.
path
.
join
(
self
.
img_dir
,
img
[
'file_name'
])
assert
os
.
path
.
exists
(
img
[
'image'
]),
\
"image {} not found."
.
format
(
img
[
'image'
])
box_num
=
50
img
[
'gt_boxes'
]
=
np
.
zeros
((
box_num
,
4
),
dtype
=
np
.
float32
)
img
[
'gt_labels'
]
=
np
.
zeros
((
box_num
),
dtype
=
np
.
int32
)
if
self
.
is_train
:
img
=
self
.
parse_gt_annotations
(
img
)
return
imgs
class
GTAnotations
:
"""Set gt boxes and gt labels for train.
Args:
COCO(Callable): Method for get detection attributes for images.
category_to_id_map(dict): Mapping relations of category and id for images.
img(dict): Input for detection model.
Returns:
img(dict): Set specific value on the attributes of 'gt boxes' and 'gt labels' for input.
"""
def
__init__
(
self
,
COCO
:
Callable
,
category_to_id_map
:
dict
):
self
.
COCO
=
COCO
self
.
category_to_id_map
=
category_to_id_map
self
.
boxtool
=
BoxTool
()
def
__call__
(
self
,
img
:
dict
):
img_height
=
img
[
'height'
]
img_width
=
img
[
'width'
]
anno
=
self
.
COCO
.
loadAnns
(
self
.
COCO
.
getAnnIds
(
imgIds
=
img
[
'id'
],
iscrowd
=
None
))
gt_index
=
0
for
target
in
anno
:
if
target
[
'area'
]
<
-
1
:
continue
if
'ignore'
in
target
and
target
[
'ignore'
]:
continue
box
=
self
.
boxtool
.
coco_anno_box_to_center_relative
(
target
[
'bbox'
],
img_height
,
img_width
)
if
box
[
2
]
<=
0
and
box
[
3
]
<=
0
:
continue
img
[
'gt_boxes'
][
gt_index
]
=
box
img
[
'gt_labels'
][
gt_index
]
=
\
self
.
category_to_id_map
[
target
[
'category_id'
]]
gt_index
+=
1
if
gt_index
>=
50
:
break
return
img
class
DetectTestReader
:
"""Preprocess for detection dataset on test mode.
Args:
mean(list): Mean values for normalization, default is [0.485, 0.456, 0.406].
std(list): Standard deviation for normalization, default is [0.229, 0.224, 0.225].
img(dict): Prepared input for detection model.
size(int): Image size for detection.
Returns:
out_img(np.ndarray): Normalized image, shape is [C, H, W].
id(int): Id number for corresponding out_img.
(h, w)(tuple): height and weight for corresponding out_img.
"""
def
__init__
(
self
,
mean
:
list
=
[
0.485
,
0.456
,
0.406
],
std
:
list
=
[
0.229
,
0.224
,
0.225
]):
self
.
mean
=
mean
self
.
std
=
std
def
__call__
(
self
,
img
,
size
):
im_path
=
img
[
'image'
]
im
=
cv2
.
imread
(
im_path
).
astype
(
'float32'
)
im
=
cv2
.
cvtColor
(
im
,
cv2
.
COLOR_BGR2RGB
)
h
,
w
,
_
=
im
.
shape
im_scale_x
=
size
/
float
(
w
)
im_scale_y
=
size
/
float
(
h
)
out_img
=
cv2
.
resize
(
im
,
None
,
None
,
fx
=
im_scale_x
,
fy
=
im_scale_y
,
interpolation
=
cv2
.
INTER_CUBIC
)
mean
=
np
.
array
(
self
.
mean
).
reshape
((
1
,
1
,
-
1
))
std
=
np
.
array
(
self
.
std
).
reshape
((
1
,
1
,
-
1
))
out_img
=
(
out_img
/
255.0
-
mean
)
/
std
out_img
=
out_img
.
transpose
((
2
,
0
,
1
))
id
=
int
(
img
[
'id'
])
return
out_img
,
id
,
(
h
,
w
)
class
DetectTrainReader
:
"""Preprocess for detection dataset on train mode.
Args:
mean(list): Mean values for normalization, default is [0.485, 0.456, 0.406].
std(list): Standard deviation for normalization, default is [0.229, 0.224, 0.225].
img(dict): Prepared input for detection model.
size(int): Image size for detection.
Returns:
out_img(np.ndarray): Normalized image, shape is [C, H, W].
gt_boxes(np.ndarray): Ground truth boxes information.
gt_labels(np.ndarray): Ground truth labels.
gt_scores(np.ndarray): Ground truth scores.
"""
def
__init__
(
self
,
mean
=
[
0.485
,
0.456
,
0.406
],
std
=
[
0.229
,
0.224
,
0.225
]):
self
.
mean
=
mean
self
.
std
=
std
self
.
boxtool
=
BoxTool
()
def
__call__
(
self
,
img
,
size
):
im_path
=
img
[
'image'
]
im
=
cv2
.
imread
(
im_path
)
im
=
cv2
.
cvtColor
(
im
,
cv2
.
COLOR_BGR2RGB
)
gt_boxes
=
img
[
'gt_boxes'
].
copy
()
gt_labels
=
img
[
'gt_labels'
].
copy
()
gt_scores
=
np
.
ones_like
(
gt_labels
)
im
,
gt_boxes
,
gt_labels
,
gt_scores
=
self
.
boxtool
.
image_augment
(
im
,
gt_boxes
,
gt_labels
,
gt_scores
,
size
,
self
.
mean
)
mean
=
np
.
array
(
self
.
mean
).
reshape
((
1
,
1
,
-
1
))
std
=
np
.
array
(
self
.
std
).
reshape
((
1
,
1
,
-
1
))
out_img
=
(
im
/
255.0
-
mean
)
/
std
out_img
=
out_img
.
astype
(
'float32'
).
transpose
((
2
,
0
,
1
))
return
out_img
,
gt_boxes
,
gt_labels
,
gt_scores
class
BoxTool
:
"""This class provides common methods for box processing in detection tasks."""
def
__init__
(
self
):
super
(
BoxTool
,
self
).
__init__
()
def
coco_anno_box_to_center_relative
(
self
,
box
:
list
,
img_height
:
int
,
img_width
:
int
)
->
np
.
ndarray
:
"""
Convert COCO annotations box with format [x1, y1, w, h] to
center mode [center_x, center_y, w, h] and divide image width
and height to get relative value in range[0, 1]
"""
assert
len
(
box
)
==
4
,
"box should be a len(4) list or tuple"
x
,
y
,
w
,
h
=
box
x1
=
max
(
x
,
0
)
x2
=
min
(
x
+
w
-
1
,
img_width
-
1
)
y1
=
max
(
y
,
0
)
y2
=
min
(
y
+
h
-
1
,
img_height
-
1
)
x
=
(
x1
+
x2
)
/
2
/
img_width
y
=
(
y1
+
y2
)
/
2
/
img_height
w
=
(
x2
-
x1
)
/
img_width
h
=
(
y2
-
y1
)
/
img_height
return
np
.
array
([
x
,
y
,
w
,
h
])
def
clip_relative_box_in_image
(
self
,
x
:
int
,
y
:
int
,
w
:
int
,
h
:
int
)
->
int
:
"""Clip relative box coordinates x, y, w, h to [0, 1]"""
x1
=
max
(
x
-
w
/
2
,
0.
)
x2
=
min
(
x
+
w
/
2
,
1.
)
y1
=
min
(
y
-
h
/
2
,
0.
)
y2
=
max
(
y
+
h
/
2
,
1.
)
x
=
(
x1
+
x2
)
/
2
y
=
(
y1
+
y2
)
/
2
w
=
x2
-
x1
h
=
y2
-
y1
return
x
,
y
,
w
,
h
def
box_xywh_to_xyxy
(
self
,
box
:
np
.
ndarray
)
->
np
.
ndarray
:
"""Change box from xywh to xyxy"""
shape
=
box
.
shape
assert
shape
[
-
1
]
==
4
,
"Box shape[-1] should be 4."
box
=
box
.
reshape
((
-
1
,
4
))
box
[:,
0
],
box
[:,
2
]
=
box
[:,
0
]
-
box
[:,
2
]
/
2
,
box
[:,
0
]
+
box
[:,
2
]
/
2
box
[:,
1
],
box
[:,
3
]
=
box
[:,
1
]
-
box
[:,
3
]
/
2
,
box
[:,
1
]
+
box
[:,
3
]
/
2
box
=
box
.
reshape
(
shape
)
return
box
def
box_iou_xywh
(
self
,
box1
:
np
.
ndarray
,
box2
:
np
.
ndarray
)
->
float
:
"""Calculate iou by xywh"""
assert
box1
.
shape
[
-
1
]
==
4
,
"Box1 shape[-1] should be 4."
assert
box2
.
shape
[
-
1
]
==
4
,
"Box2 shape[-1] should be 4."
b1_x1
,
b1_x2
=
box1
[:,
0
]
-
box1
[:,
2
]
/
2
,
box1
[:,
0
]
+
box1
[:,
2
]
/
2
b1_y1
,
b1_y2
=
box1
[:,
1
]
-
box1
[:,
3
]
/
2
,
box1
[:,
1
]
+
box1
[:,
3
]
/
2
b2_x1
,
b2_x2
=
box2
[:,
0
]
-
box2
[:,
2
]
/
2
,
box2
[:,
0
]
+
box2
[:,
2
]
/
2
b2_y1
,
b2_y2
=
box2
[:,
1
]
-
box2
[:,
3
]
/
2
,
box2
[:,
1
]
+
box2
[:,
3
]
/
2
inter_x1
=
np
.
maximum
(
b1_x1
,
b2_x1
)
inter_x2
=
np
.
minimum
(
b1_x2
,
b2_x2
)
inter_y1
=
np
.
maximum
(
b1_y1
,
b2_y1
)
inter_y2
=
np
.
minimum
(
b1_y2
,
b2_y2
)
inter_w
=
inter_x2
-
inter_x1
inter_h
=
inter_y2
-
inter_y1
inter_w
[
inter_w
<
0
]
=
0
inter_h
[
inter_h
<
0
]
=
0
inter_area
=
inter_w
*
inter_h
b1_area
=
(
b1_x2
-
b1_x1
)
*
(
b1_y2
-
b1_y1
)
b2_area
=
(
b2_x2
-
b2_x1
)
*
(
b2_y2
-
b2_y1
)
return
inter_area
/
(
b1_area
+
b2_area
-
inter_area
)
def
box_iou_xyxy
(
self
,
box1
:
np
.
ndarray
,
box2
:
np
.
ndarray
)
->
float
:
"""Calculate iou by xyxy"""
assert
box1
.
shape
[
-
1
]
==
4
,
"Box1 shape[-1] should be 4."
assert
box2
.
shape
[
-
1
]
==
4
,
"Box2 shape[-1] should be 4."
b1_x1
,
b1_y1
,
b1_x2
,
b1_y2
=
box1
[:,
0
],
box1
[:,
1
],
box1
[:,
2
],
box1
[:,
3
]
b2_x1
,
b2_y1
,
b2_x2
,
b2_y2
=
box2
[:,
0
],
box2
[:,
1
],
box2
[:,
2
],
box2
[:,
3
]
inter_x1
=
np
.
maximum
(
b1_x1
,
b2_x1
)
inter_x2
=
np
.
minimum
(
b1_x2
,
b2_x2
)
inter_y1
=
np
.
maximum
(
b1_y1
,
b2_y1
)
inter_y2
=
np
.
minimum
(
b1_y2
,
b2_y2
)
inter_w
=
inter_x2
-
inter_x1
inter_h
=
inter_y2
-
inter_y1
inter_w
[
inter_w
<
0
]
=
0
inter_h
[
inter_h
<
0
]
=
0
inter_area
=
inter_w
*
inter_h
b1_area
=
(
b1_x2
-
b1_x1
)
*
(
b1_y2
-
b1_y1
)
b2_area
=
(
b2_x2
-
b2_x1
)
*
(
b2_y2
-
b2_y1
)
return
inter_area
/
(
b1_area
+
b2_area
-
inter_area
)
def
box_crop
(
self
,
boxes
:
np
.
ndarray
,
labels
:
np
.
ndarray
,
scores
:
np
.
ndarray
,
crop
:
list
,
img_shape
:
list
):
"""Crop the boxes ,labels, scores according to the given shape"""
x
,
y
,
w
,
h
=
map
(
float
,
crop
)
im_w
,
im_h
=
map
(
float
,
img_shape
)
boxes
=
boxes
.
copy
()
boxes
[:,
0
],
boxes
[:,
2
]
=
(
boxes
[:,
0
]
-
boxes
[:,
2
]
/
2
)
*
im_w
,
(
boxes
[:,
0
]
+
boxes
[:,
2
]
/
2
)
*
im_w
boxes
[:,
1
],
boxes
[:,
3
]
=
(
boxes
[:,
1
]
-
boxes
[:,
3
]
/
2
)
*
im_h
,
(
boxes
[:,
1
]
+
boxes
[:,
3
]
/
2
)
*
im_h
crop_box
=
np
.
array
([
x
,
y
,
x
+
w
,
y
+
h
])
centers
=
(
boxes
[:,
:
2
]
+
boxes
[:,
2
:])
/
2.0
mask
=
np
.
logical_and
(
crop_box
[:
2
]
<=
centers
,
centers
<=
crop_box
[
2
:]).
all
(
axis
=
1
)
boxes
[:,
:
2
]
=
np
.
maximum
(
boxes
[:,
:
2
],
crop_box
[:
2
])
boxes
[:,
2
:]
=
np
.
minimum
(
boxes
[:,
2
:],
crop_box
[
2
:])
boxes
[:,
:
2
]
-=
crop_box
[:
2
]
boxes
[:,
2
:]
-=
crop_box
[:
2
]
mask
=
np
.
logical_and
(
mask
,
(
boxes
[:,
:
2
]
<
boxes
[:,
2
:]).
all
(
axis
=
1
))
boxes
=
boxes
*
np
.
expand_dims
(
mask
.
astype
(
'float32'
),
axis
=
1
)
labels
=
labels
*
mask
.
astype
(
'float32'
)
scores
=
scores
*
mask
.
astype
(
'float32'
)
boxes
[:,
0
],
boxes
[:,
2
]
=
(
boxes
[:,
0
]
+
boxes
[:,
2
])
/
2
/
w
,
(
boxes
[:,
2
]
-
boxes
[:,
0
])
/
w
boxes
[:,
1
],
boxes
[:,
3
]
=
(
boxes
[:,
1
]
+
boxes
[:,
3
])
/
2
/
h
,
(
boxes
[:,
3
]
-
boxes
[:,
1
])
/
h
return
boxes
,
labels
,
scores
,
mask
.
sum
()
def
random_distort
(
self
,
img
):
""" Distort the input image randomly."""
def
random_brightness
(
img
,
lower
=
0.5
,
upper
=
1.5
):
e
=
np
.
random
.
uniform
(
lower
,
upper
)
return
ImageEnhance
.
Brightness
(
img
).
enhance
(
e
)
def
random_contrast
(
img
,
lower
=
0.5
,
upper
=
1.5
):
e
=
np
.
random
.
uniform
(
lower
,
upper
)
return
ImageEnhance
.
Contrast
(
img
).
enhance
(
e
)
def
random_color
(
img
,
lower
=
0.5
,
upper
=
1.5
):
e
=
np
.
random
.
uniform
(
lower
,
upper
)
return
ImageEnhance
.
Color
(
img
).
enhance
(
e
)
ops
=
[
random_brightness
,
random_contrast
,
random_color
]
np
.
random
.
shuffle
(
ops
)
img
=
Image
.
fromarray
(
img
)
img
=
ops
[
0
](
img
)
img
=
ops
[
1
](
img
)
img
=
ops
[
2
](
img
)
img
=
np
.
asarray
(
img
)
return
img
def
random_crop
(
self
,
img
,
boxes
,
labels
,
scores
,
scales
=
[
0.3
,
1.0
],
max_ratio
=
2.0
,
constraints
=
None
,
max_trial
=
50
):
"""Random crop the input image according to constraints."""
if
len
(
boxes
)
==
0
:
return
img
,
boxes
if
not
constraints
:
constraints
=
[(
0.1
,
1.0
),
(
0.3
,
1.0
),
(
0.5
,
1.0
),
(
0.7
,
1.0
),
(
0.9
,
1.0
),
(
0.0
,
1.0
)]
img
=
Image
.
fromarray
(
img
)
w
,
h
=
img
.
size
crops
=
[(
0
,
0
,
w
,
h
)]
for
min_iou
,
max_iou
in
constraints
:
for
_
in
range
(
max_trial
):
scale
=
random
.
uniform
(
scales
[
0
],
scales
[
1
])
aspect_ratio
=
random
.
uniform
(
max
(
1
/
max_ratio
,
scale
*
scale
),
\
min
(
max_ratio
,
1
/
scale
/
scale
))
crop_h
=
int
(
h
*
scale
/
np
.
sqrt
(
aspect_ratio
))
crop_w
=
int
(
w
*
scale
*
np
.
sqrt
(
aspect_ratio
))
crop_x
=
random
.
randrange
(
w
-
crop_w
)
crop_y
=
random
.
randrange
(
h
-
crop_h
)
crop_box
=
np
.
array
([[(
crop_x
+
crop_w
/
2.0
)
/
w
,
(
crop_y
+
crop_h
/
2.0
)
/
h
,
crop_w
/
float
(
w
),
crop_h
/
float
(
h
)]])
iou
=
self
.
box_iou_xywh
(
crop_box
,
boxes
)
if
min_iou
<=
iou
.
min
()
and
max_iou
>=
iou
.
max
():
crops
.
append
((
crop_x
,
crop_y
,
crop_w
,
crop_h
))
break
while
crops
:
crop
=
crops
.
pop
(
np
.
random
.
randint
(
0
,
len
(
crops
)))
crop_boxes
,
crop_labels
,
crop_scores
,
box_num
=
\
self
.
box_crop
(
boxes
,
labels
,
scores
,
crop
,
(
w
,
h
))
if
box_num
<
1
:
continue
img
=
img
.
crop
((
crop
[
0
],
crop
[
1
],
crop
[
0
]
+
crop
[
2
],
crop
[
1
]
+
crop
[
3
])).
resize
(
img
.
size
,
Image
.
LANCZOS
)
img
=
np
.
asarray
(
img
)
return
img
,
crop_boxes
,
crop_labels
,
crop_scores
img
=
np
.
asarray
(
img
)
return
img
,
boxes
,
labels
,
scores
def
random_flip
(
self
,
img
,
gtboxes
,
thresh
=
0.5
):
"""Flip the images randomly"""
if
random
.
random
()
>
thresh
:
img
=
img
[:,
::
-
1
,
:]
gtboxes
[:,
0
]
=
1.0
-
gtboxes
[:,
0
]
return
img
,
gtboxes
def
random_interp
(
self
,
img
,
size
,
interp
=
None
):
interp_method
=
[
cv2
.
INTER_NEAREST
,
cv2
.
INTER_LINEAR
,
cv2
.
INTER_AREA
,
cv2
.
INTER_CUBIC
,
cv2
.
INTER_LANCZOS4
,
]
if
not
interp
or
interp
not
in
interp_method
:
interp
=
interp_method
[
random
.
randint
(
0
,
len
(
interp_method
)
-
1
)]
h
,
w
,
_
=
img
.
shape
im_scale_x
=
size
/
float
(
w
)
im_scale_y
=
size
/
float
(
h
)
img
=
cv2
.
resize
(
img
,
None
,
None
,
fx
=
im_scale_x
,
fy
=
im_scale_y
,
interpolation
=
interp
)
return
img
def
random_expand
(
self
,
img
,
gtboxes
,
max_ratio
=
4.
,
fill
=
None
,
keep_ratio
=
True
,
thresh
=
0.5
):
"""Expand input image and ground truth box by random ratio."""
if
random
.
random
()
>
thresh
:
return
img
,
gtboxes
if
max_ratio
<
1.0
:
return
img
,
gtboxes
h
,
w
,
c
=
img
.
shape
ratio_x
=
random
.
uniform
(
1
,
max_ratio
)
if
keep_ratio
:
ratio_y
=
ratio_x
else
:
ratio_y
=
random
.
uniform
(
1
,
max_ratio
)
oh
=
int
(
h
*
ratio_y
)
ow
=
int
(
w
*
ratio_x
)
off_x
=
random
.
randint
(
0
,
ow
-
w
)
off_y
=
random
.
randint
(
0
,
oh
-
h
)
out_img
=
np
.
zeros
((
oh
,
ow
,
c
))
if
fill
and
len
(
fill
)
==
c
:
for
i
in
range
(
c
):
out_img
[:,
:,
i
]
=
fill
[
i
]
*
255.0
out_img
[
off_y
:
off_y
+
h
,
off_x
:
off_x
+
w
,
:]
=
img
gtboxes
[:,
0
]
=
((
gtboxes
[:,
0
]
*
w
)
+
off_x
)
/
float
(
ow
)
gtboxes
[:,
1
]
=
((
gtboxes
[:,
1
]
*
h
)
+
off_y
)
/
float
(
oh
)
gtboxes
[:,
2
]
=
gtboxes
[:,
2
]
/
ratio_x
gtboxes
[:,
3
]
=
gtboxes
[:,
3
]
/
ratio_y
return
out_img
.
astype
(
'uint8'
),
gtboxes
def
shuffle_gtbox
(
self
,
gtbox
,
gtlabel
,
gtscore
):
"""Shuffle gt box."""
gt
=
np
.
concatenate
([
gtbox
,
gtlabel
[:,
np
.
newaxis
],
gtscore
[:,
np
.
newaxis
]],
axis
=
1
)
idx
=
np
.
arange
(
gt
.
shape
[
0
])
np
.
random
.
shuffle
(
idx
)
gt
=
gt
[
idx
,
:]
return
gt
[:,
:
4
],
gt
[:,
4
],
gt
[:,
5
]
def
image_augment
(
self
,
img
,
gtboxes
,
gtlabels
,
gtscores
,
size
,
means
=
None
):
"""Random processes for input image."""
img
=
self
.
random_distort
(
img
)
img
,
gtboxes
=
self
.
random_expand
(
img
,
gtboxes
,
fill
=
means
)
img
,
gtboxes
,
gtlabels
,
gtscores
=
\
self
.
random_crop
(
img
,
gtboxes
,
gtlabels
,
gtscores
)
img
=
self
.
random_interp
(
img
,
size
)
img
,
gtboxes
=
self
.
random_flip
(
img
,
gtboxes
)
gtboxes
,
gtlabels
,
gtscores
=
self
.
shuffle_gtbox
(
gtboxes
,
gtlabels
,
gtscores
)
return
img
.
astype
(
'float32'
),
gtboxes
.
astype
(
'float32'
),
\
gtlabels
.
astype
(
'int32'
),
gtscores
.
astype
(
'float32'
)
def
draw_boxes_on_image
(
self
,
image_path
:
str
,
boxes
:
np
.
ndarray
,
scores
:
np
.
ndarray
,
labels
:
np
.
ndarray
,
label_names
:
list
,
score_thresh
:
float
=
0.5
):
"""Draw boxes on images"""
image
=
np
.
array
(
Image
.
open
(
image_path
))
plt
.
figure
()
_
,
ax
=
plt
.
subplots
(
1
)
ax
.
imshow
(
image
)
image_name
=
image_path
.
split
(
'/'
)[
-
1
]
print
(
"Image {} detect: "
.
format
(
image_name
))
colors
=
{}
for
box
,
score
,
label
in
zip
(
boxes
,
scores
,
labels
):
if
score
<
score_thresh
:
continue
if
box
[
2
]
<=
box
[
0
]
or
box
[
3
]
<=
box
[
1
]:
continue
label
=
int
(
label
)
if
label
not
in
colors
:
colors
[
label
]
=
plt
.
get_cmap
(
'hsv'
)(
label
/
len
(
label_names
))
x1
,
y1
,
x2
,
y2
=
box
[
0
],
box
[
1
],
box
[
2
],
box
[
3
]
rect
=
plt
.
Rectangle
((
x1
,
y1
),
x2
-
x1
,
y2
-
y1
,
fill
=
False
,
linewidth
=
2.0
,
edgecolor
=
colors
[
label
])
ax
.
add_patch
(
rect
)
ax
.
text
(
x1
,
y1
,
'{} {:.4f}'
.
format
(
label_names
[
label
],
score
),
verticalalignment
=
'bottom'
,
horizontalalignment
=
'left'
,
bbox
=
{
'facecolor'
:
colors
[
label
],
'alpha'
:
0.5
,
'pad'
:
0
},
fontsize
=
8
,
color
=
'white'
)
print
(
"
\t
{:15s} at {:25} score: {:.5f}"
.
format
(
label_names
[
int
(
label
)],
str
(
list
(
map
(
int
,
list
(
box
)))),
score
))
image_name
=
image_name
.
replace
(
'jpg'
,
'png'
)
plt
.
axis
(
'off'
)
plt
.
gca
().
xaxis
.
set_major_locator
(
plt
.
NullLocator
())
plt
.
gca
().
yaxis
.
set_major_locator
(
plt
.
NullLocator
())
plt
.
savefig
(
"./output/{}"
.
format
(
image_name
),
bbox_inches
=
'tight'
,
pad_inches
=
0.0
)
print
(
"Detect result save at ./output/{}
\n
"
.
format
(
image_name
))
plt
.
cla
()
plt
.
close
(
'all'
)
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录