Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleHub
提交
3acfe6bd
P
PaddleHub
项目概览
PaddlePaddle
/
PaddleHub
大约 2 年 前同步成功
通知
285
Star
12117
Fork
2091
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
200
列表
看板
标记
里程碑
合并请求
4
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleHub
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
200
Issue
200
列表
看板
标记
里程碑
合并请求
4
合并请求
4
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
3acfe6bd
编写于
4月 29, 2020
作者:
D
David Lin
提交者:
GitHub
4月 29, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
update 11 object detection modules. (#555)
* update 11 object detection modules.
上级
6a477596
变更
77
显示空白变更内容
内联
并排
Showing
77 changed file
with
6973 addition
and
1660 deletion
+6973
-1660
hub_module/modules/image/object_detection/faster_rcnn/module.py
...dule/modules/image/object_detection/faster_rcnn/module.py
+0
-182
hub_module/modules/image/object_detection/faster_rcnn_resnet50_coco2017/README.md
.../object_detection/faster_rcnn_resnet50_coco2017/README.md
+144
-0
hub_module/modules/image/object_detection/faster_rcnn_resnet50_coco2017/bbox_assigner.py
..._detection/faster_rcnn_resnet50_coco2017/bbox_assigner.py
+19
-0
hub_module/modules/image/object_detection/faster_rcnn_resnet50_coco2017/bbox_head.py
...ject_detection/faster_rcnn_resnet50_coco2017/bbox_head.py
+269
-0
hub_module/modules/image/object_detection/faster_rcnn_resnet50_coco2017/data_feed.py
...ject_detection/faster_rcnn_resnet50_coco2017/data_feed.py
+14
-9
hub_module/modules/image/object_detection/faster_rcnn_resnet50_coco2017/module.py
.../object_detection/faster_rcnn_resnet50_coco2017/module.py
+171
-61
hub_module/modules/image/object_detection/faster_rcnn_resnet50_coco2017/processor.py
...ject_detection/faster_rcnn_resnet50_coco2017/processor.py
+35
-20
hub_module/modules/image/object_detection/faster_rcnn_resnet50_coco2017/roi_extractor.py
..._detection/faster_rcnn_resnet50_coco2017/roi_extractor.py
+13
-0
hub_module/modules/image/object_detection/faster_rcnn_resnet50_coco2017/rpn_head.py
...bject_detection/faster_rcnn_resnet50_coco2017/rpn_head.py
+302
-0
hub_module/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/README.md
...ect_detection/faster_rcnn_resnet50_fpn_coco2017/README.md
+144
-0
hub_module/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/bbox_assigner.py
...ection/faster_rcnn_resnet50_fpn_coco2017/bbox_assigner.py
+0
-0
hub_module/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/bbox_head.py
..._detection/faster_rcnn_resnet50_fpn_coco2017/bbox_head.py
+0
-1
hub_module/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/data_feed.py
..._detection/faster_rcnn_resnet50_fpn_coco2017/data_feed.py
+118
-0
hub_module/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/module.py
...ect_detection/faster_rcnn_resnet50_fpn_coco2017/module.py
+176
-61
hub_module/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/processor.py
..._detection/faster_rcnn_resnet50_fpn_coco2017/processor.py
+36
-25
hub_module/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/roi_extractor.py
...ection/faster_rcnn_resnet50_fpn_coco2017/roi_extractor.py
+1
-13
hub_module/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/rpn_head.py
...t_detection/faster_rcnn_resnet50_fpn_coco2017/rpn_head.py
+0
-0
hub_module/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/README.md
...bject_detection/retinanet_resnet50_fpn_coco2017/README.md
+142
-0
hub_module/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/data_feed.py
...ct_detection/retinanet_resnet50_fpn_coco2017/data_feed.py
+15
-9
hub_module/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/module.py
...bject_detection/retinanet_resnet50_fpn_coco2017/module.py
+50
-31
hub_module/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/processor.py
...ct_detection/retinanet_resnet50_fpn_coco2017/processor.py
+35
-20
hub_module/modules/image/object_detection/ssd/module.py
hub_module/modules/image/object_detection/ssd/module.py
+0
-127
hub_module/modules/image/object_detection/ssd/multi_box_head.py
...dule/modules/image/object_detection/ssd/multi_box_head.py
+0
-30
hub_module/modules/image/object_detection/ssd/output_decoder.py
...dule/modules/image/object_detection/ssd/output_decoder.py
+0
-15
hub_module/modules/image/object_detection/ssd_mobilenet_v1_pascal/README.md
.../image/object_detection/ssd_mobilenet_v1_pascal/README.md
+142
-0
hub_module/modules/image/object_detection/ssd_mobilenet_v1_pascal/config.yml
...image/object_detection/ssd_mobilenet_v1_pascal/config.yml
+8
-1
hub_module/modules/image/object_detection/ssd_mobilenet_v1_pascal/data_feed.py
...age/object_detection/ssd_mobilenet_v1_pascal/data_feed.py
+218
-0
hub_module/modules/image/object_detection/ssd_mobilenet_v1_pascal/module.py
.../image/object_detection/ssd_mobilenet_v1_pascal/module.py
+194
-159
hub_module/modules/image/object_detection/ssd_mobilenet_v1_pascal/processor.py
...age/object_detection/ssd_mobilenet_v1_pascal/processor.py
+176
-0
hub_module/modules/image/object_detection/ssd_vgg16_300_coco2017/README.md
...s/image/object_detection/ssd_vgg16_300_coco2017/README.md
+142
-0
hub_module/modules/image/object_detection/ssd_vgg16_300_coco2017/__init__.py
...image/object_detection/ssd_vgg16_300_coco2017/__init__.py
+0
-0
hub_module/modules/image/object_detection/ssd_vgg16_300_coco2017/data_feed.py
...mage/object_detection/ssd_vgg16_300_coco2017/data_feed.py
+212
-0
hub_module/modules/image/object_detection/ssd_vgg16_300_coco2017/module.py
...s/image/object_detection/ssd_vgg16_300_coco2017/module.py
+190
-138
hub_module/modules/image/object_detection/ssd_vgg16_300_coco2017/processor.py
...mage/object_detection/ssd_vgg16_300_coco2017/processor.py
+176
-0
hub_module/modules/image/object_detection/ssd_vgg16_512_coco2017/README.md
...s/image/object_detection/ssd_vgg16_512_coco2017/README.md
+142
-0
hub_module/modules/image/object_detection/ssd_vgg16_512_coco2017/__init__.py
...image/object_detection/ssd_vgg16_512_coco2017/__init__.py
+0
-0
hub_module/modules/image/object_detection/ssd_vgg16_512_coco2017/data_feed.py
...mage/object_detection/ssd_vgg16_512_coco2017/data_feed.py
+14
-14
hub_module/modules/image/object_detection/ssd_vgg16_512_coco2017/module.py
...s/image/object_detection/ssd_vgg16_512_coco2017/module.py
+190
-138
hub_module/modules/image/object_detection/ssd_vgg16_512_coco2017/processor.py
...mage/object_detection/ssd_vgg16_512_coco2017/processor.py
+176
-0
hub_module/modules/image/object_detection/yolov3/module.py
hub_module/modules/image/object_detection/yolov3/module.py
+0
-95
hub_module/modules/image/object_detection/yolov3_darknet53_coco2017/README.md
...mage/object_detection/yolov3_darknet53_coco2017/README.md
+142
-0
hub_module/modules/image/object_detection/yolov3_darknet53_coco2017/darknet.py
...age/object_detection/yolov3_darknet53_coco2017/darknet.py
+3
-5
hub_module/modules/image/object_detection/yolov3_darknet53_coco2017/data_feed.py
...e/object_detection/yolov3_darknet53_coco2017/data_feed.py
+8
-5
hub_module/modules/image/object_detection/yolov3_darknet53_coco2017/module.py
...mage/object_detection/yolov3_darknet53_coco2017/module.py
+180
-101
hub_module/modules/image/object_detection/yolov3_darknet53_coco2017/processor.py
...e/object_detection/yolov3_darknet53_coco2017/processor.py
+10
-1
hub_module/modules/image/object_detection/yolov3_darknet53_coco2017/yolo_head.py
...e/object_detection/yolov3_darknet53_coco2017/yolo_head.py
+0
-0
hub_module/modules/image/object_detection/yolov3_darknet53_pedestrian/README.md
...ge/object_detection/yolov3_darknet53_pedestrian/README.md
+1
-3
hub_module/modules/image/object_detection/yolov3_darknet53_pedestrian/data_feed.py
...object_detection/yolov3_darknet53_pedestrian/data_feed.py
+71
-0
hub_module/modules/image/object_detection/yolov3_darknet53_pedestrian/module.py
...ge/object_detection/yolov3_darknet53_pedestrian/module.py
+62
-29
hub_module/modules/image/object_detection/yolov3_darknet53_pedestrian/processor.py
...object_detection/yolov3_darknet53_pedestrian/processor.py
+180
-0
hub_module/modules/image/object_detection/yolov3_darknet53_pedestrian/serving.py
...e/object_detection/yolov3_darknet53_pedestrian/serving.py
+0
-10
hub_module/modules/image/object_detection/yolov3_darknet53_pedestrian/yolo_head.py
...object_detection/yolov3_darknet53_pedestrian/yolo_head.py
+273
-0
hub_module/modules/image/object_detection/yolov3_darknet53_vehicles/README.md
...mage/object_detection/yolov3_darknet53_vehicles/README.md
+1
-3
hub_module/modules/image/object_detection/yolov3_darknet53_vehicles/data_feed.py
...e/object_detection/yolov3_darknet53_vehicles/data_feed.py
+71
-0
hub_module/modules/image/object_detection/yolov3_darknet53_vehicles/module.py
...mage/object_detection/yolov3_darknet53_vehicles/module.py
+65
-31
hub_module/modules/image/object_detection/yolov3_darknet53_vehicles/processor.py
...e/object_detection/yolov3_darknet53_vehicles/processor.py
+180
-0
hub_module/modules/image/object_detection/yolov3_darknet53_vehicles/serving.py
...age/object_detection/yolov3_darknet53_vehicles/serving.py
+0
-10
hub_module/modules/image/object_detection/yolov3_darknet53_vehicles/yolo_head.py
...e/object_detection/yolov3_darknet53_vehicles/yolo_head.py
+273
-0
hub_module/modules/image/object_detection/yolov3_mobilenet_v1_coco2017/README.md
...e/object_detection/yolov3_mobilenet_v1_coco2017/README.md
+142
-0
hub_module/modules/image/object_detection/yolov3_mobilenet_v1_coco2017/data_feed.py
...bject_detection/yolov3_mobilenet_v1_coco2017/data_feed.py
+71
-0
hub_module/modules/image/object_detection/yolov3_mobilenet_v1_coco2017/module.py
...e/object_detection/yolov3_mobilenet_v1_coco2017/module.py
+176
-101
hub_module/modules/image/object_detection/yolov3_mobilenet_v1_coco2017/processor.py
...bject_detection/yolov3_mobilenet_v1_coco2017/processor.py
+180
-0
hub_module/modules/image/object_detection/yolov3_mobilenet_v1_coco2017/yolo_head.py
...bject_detection/yolov3_mobilenet_v1_coco2017/yolo_head.py
+273
-0
hub_module/modules/image/object_detection/yolov3_resnet34_coco2017/README.md
...image/object_detection/yolov3_resnet34_coco2017/README.md
+142
-0
hub_module/modules/image/object_detection/yolov3_resnet34_coco2017/data_feed.py
...ge/object_detection/yolov3_resnet34_coco2017/data_feed.py
+71
-0
hub_module/modules/image/object_detection/yolov3_resnet34_coco2017/module.py
...image/object_detection/yolov3_resnet34_coco2017/module.py
+195
-120
hub_module/modules/image/object_detection/yolov3_resnet34_coco2017/processor.py
...ge/object_detection/yolov3_resnet34_coco2017/processor.py
+180
-0
hub_module/modules/image/object_detection/yolov3_resnet34_coco2017/yolo_head.py
...ge/object_detection/yolov3_resnet34_coco2017/yolo_head.py
+273
-0
hub_module/tests/unittests/test_faster_rcnn_resnet50.py
hub_module/tests/unittests/test_faster_rcnn_resnet50.py
+6
-10
hub_module/tests/unittests/test_faster_rcnn_resnet50_fpn.py
hub_module/tests/unittests/test_faster_rcnn_resnet50_fpn.py
+6
-9
hub_module/tests/unittests/test_retinanet_resnet50_fpn.py
hub_module/tests/unittests/test_retinanet_resnet50_fpn.py
+6
-10
hub_module/tests/unittests/test_ssd_mobilenet_v1_pascal.py
hub_module/tests/unittests/test_ssd_mobilenet_v1_pascal.py
+11
-10
hub_module/tests/unittests/test_ssd_vgg16_300_coco2017.py
hub_module/tests/unittests/test_ssd_vgg16_300_coco2017.py
+15
-14
hub_module/tests/unittests/test_ssd_vgg16_512_coco2017.py
hub_module/tests/unittests/test_ssd_vgg16_512_coco2017.py
+15
-14
hub_module/tests/unittests/test_yolov3_darknet53_coco2017.py
hub_module/tests/unittests/test_yolov3_darknet53_coco2017.py
+6
-6
hub_module/tests/unittests/test_yolov3_mobilenet_v1_coco2017.py
...dule/tests/unittests/test_yolov3_mobilenet_v1_coco2017.py
+11
-10
hub_module/tests/unittests/test_yolov3_resnet34_coco2017.py
hub_module/tests/unittests/test_yolov3_resnet34_coco2017.py
+10
-9
未找到文件。
hub_module/modules/image/object_detection/faster_rcnn/module.py
已删除
100644 → 0
浏览文件 @
6a477596
# coding=utf-8
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
from
collections
import
OrderedDict
import
paddle.fluid
as
fluid
import
paddlehub
as
hub
from
paddlehub.module.module
import
moduleinfo
from
paddlehub.common.paddle_helper
import
add_vars_prefix
from
faster_rcnn.data_feed
import
test_reader
,
padding_minibatch
from
faster_rcnn.processor
import
load_label_info
,
postprocess
from
faster_rcnn.bbox_head
import
MultiClassNMS
,
BBoxHead
,
SmoothL1Loss
,
TwoFCHead
from
faster_rcnn.rpn_head
import
AnchorGenerator
,
RPNTargetAssign
,
GenerateProposals
,
RPNHead
,
FPNRPNHead
from
faster_rcnn.bbox_assigner
import
BBoxAssigner
from
faster_rcnn.roi_extractor
import
RoIAlign
,
FPNRoIAlign
@
moduleinfo
(
name
=
"faster_rcnn"
,
version
=
"1.0.0"
,
type
=
"cv/object_detection"
,
summary
=
"Baidu's Faster R-CNN model for object detection."
,
author
=
"paddlepaddle"
,
author_email
=
"paddle-dev@baidu.com"
)
class
FasterRCNNBase
(
hub
.
Module
):
def
_initialize
(
self
):
# data_feed
self
.
test_reader
=
test_reader
self
.
padding_minibatch
=
padding_minibatch
# processor
self
.
load_label_info
=
load_label_info
self
.
postprocess
=
postprocess
# bbox_head
self
.
MultiClassNMS
=
MultiClassNMS
self
.
TwoFCHead
=
TwoFCHead
self
.
BBoxHead
=
BBoxHead
self
.
SmoothL1Loss
=
SmoothL1Loss
# rpn_head
self
.
AnchorGenerator
=
AnchorGenerator
self
.
RPNTargetAssign
=
RPNTargetAssign
self
.
GenerateProposals
=
GenerateProposals
self
.
RPNHead
=
RPNHead
self
.
FPNRPNHead
=
FPNRPNHead
# bbox_assigner
self
.
BBoxAssigner
=
BBoxAssigner
# roi_extractor
self
.
RoIAlign
=
RoIAlign
self
.
FPNRoIAlign
=
FPNRoIAlign
def
context
(
self
,
body_feats
,
fpn
,
rpn_head
,
roi_extractor
,
bbox_head
,
bbox_assigner
,
image
,
trainable
,
var_prefix
,
phase
):
"""Distill the Head Features, so as to perform transfer learning.
:param body_feats: feature map of image classification to distill feature map.
:type body_feats: list
:param fpn: Feature Pyramid Network.
:type fpn: <class 'FPN' object>
:param rpn_head: Head of Region Proposal Network.
:type rpn_head: <class 'RPNHead' object> or <class 'FPNRPNHead' object>
:param roi_extractor:
:type roi_extractor:
:param bbox_head: Head of Bounding Box.
:type bbox_head: <class 'BBoxHead' object>
:param bbox_assigner: Parameters of fluid.layers.generate_proposal_labels.
:type bbox_assigner: <class 'BBoxAssigner' object>
:param image: image tensor.
:type image: <class 'paddle.fluid.framework.Variable'>
:param trainable: whether to set parameters trainable.
:type trainable: bool
:param var_prefix: the prefix of variables in faster_rcnn
:type var_prefix: str
:param phase: Optional Choice: 'predict', 'train'
:type phase: str
"""
context_prog
=
image
.
block
.
program
with
fluid
.
program_guard
(
context_prog
):
im_info
=
fluid
.
layers
.
data
(
name
=
'im_info'
,
shape
=
[
3
],
dtype
=
'float32'
,
lod_level
=
0
)
im_shape
=
fluid
.
layers
.
data
(
name
=
'im_shape'
,
shape
=
[
3
],
dtype
=
'float32'
,
lod_level
=
0
)
#body_feats = backbone(image)
body_feat_names
=
list
(
body_feats
.
keys
())
# fpn
if
fpn
is
not
None
:
body_feats
,
spatial_scale
=
fpn
.
get_output
(
body_feats
)
# rpn_head: RPNHead
rois
=
rpn_head
.
get_proposals
(
body_feats
,
im_info
,
mode
=
phase
)
# train
if
phase
==
'train'
:
gt_bbox
=
fluid
.
layers
.
data
(
name
=
'gt_bbox'
,
shape
=
[
4
],
dtype
=
'float32'
,
lod_level
=
1
)
is_crowd
=
fluid
.
layers
.
data
(
name
=
'is_crowd'
,
shape
=
[
1
],
dtype
=
'int32'
,
lod_level
=
1
)
gt_class
=
fluid
.
layers
.
data
(
name
=
'gt_class'
,
shape
=
[
1
],
dtype
=
'int32'
,
lod_level
=
1
)
rpn_loss
=
rpn_head
.
get_loss
(
im_info
,
gt_bbox
,
is_crowd
)
# bbox_assigner: BBoxAssigner
outs
=
fluid
.
layers
.
generate_proposal_labels
(
rpn_rois
=
rois
,
gt_classes
=
gt_class
,
is_crowd
=
is_crowd
,
gt_boxes
=
gt_bbox
,
im_info
=
im_info
,
batch_size_per_im
=
bbox_assigner
.
batch_size_per_im
,
fg_fraction
=
bbox_assigner
.
fg_fraction
,
fg_thresh
=
bbox_assigner
.
fg_thresh
,
bg_thresh_hi
=
bbox_assigner
.
bg_thresh_hi
,
bg_thresh_lo
=
bbox_assigner
.
bg_thresh_lo
,
bbox_reg_weights
=
bbox_assigner
.
bbox_reg_weights
,
class_nums
=
bbox_assigner
.
class_nums
,
use_random
=
bbox_assigner
.
use_random
)
rois
=
outs
[
0
]
if
fpn
is
None
:
body_feat
=
body_feats
[
body_feat_names
[
-
1
]]
# roi_extractor: RoIAlign
roi_feat
=
fluid
.
layers
.
roi_align
(
input
=
body_feat
,
rois
=
rois
,
pooled_height
=
roi_extractor
.
pooled_height
,
pooled_width
=
roi_extractor
.
pooled_width
,
spatial_scale
=
roi_extractor
.
spatial_scale
,
sampling_ratio
=
roi_extractor
.
sampling_ratio
)
else
:
# roi_extractor: FPNRoIAlign
roi_feat
=
roi_extractor
(
head_inputs
=
body_feats
,
rois
=
rois
,
spatial_scale
=
spatial_scale
)
# head_feat
head_feat
=
bbox_head
.
head
(
roi_feat
)
if
isinstance
(
head_feat
,
OrderedDict
):
head_feat
=
list
(
head_feat
.
values
())[
0
]
if
phase
==
'train'
:
inputs
=
{
'image'
:
var_prefix
+
image
.
name
,
'im_info'
:
var_prefix
+
im_info
.
name
,
'im_shape'
:
var_prefix
+
im_shape
.
name
,
'gt_class'
:
var_prefix
+
gt_class
.
name
,
'gt_bbox'
:
var_prefix
+
gt_bbox
.
name
,
'is_crowd'
:
var_prefix
+
is_crowd
.
name
}
outputs
=
{
'head_feat'
:
var_prefix
+
head_feat
.
name
,
'rpn_cls_loss'
:
var_prefix
+
rpn_loss
[
'rpn_cls_loss'
].
name
,
'rpn_reg_loss'
:
var_prefix
+
rpn_loss
[
'rpn_reg_loss'
].
name
,
'generate_proposal_labels'
:
[
var_prefix
+
var
.
name
for
var
in
outs
]
}
elif
phase
==
'predict'
:
pred
=
bbox_head
.
get_prediction
(
roi_feat
,
rois
,
im_info
,
im_shape
)
inputs
=
{
'image'
:
var_prefix
+
image
.
name
,
'im_info'
:
var_prefix
+
im_info
.
name
,
'im_shape'
:
var_prefix
+
im_shape
.
name
}
outputs
=
{
'head_feat'
:
var_prefix
+
head_feat
.
name
,
'rois'
:
var_prefix
+
rois
.
name
,
'bbox_out'
:
var_prefix
+
pred
.
name
}
add_vars_prefix
(
context_prog
,
var_prefix
)
add_vars_prefix
(
fluid
.
default_startup_program
(),
var_prefix
)
global_vars
=
context_prog
.
global_block
().
vars
inputs
=
{
key
:
global_vars
[
value
]
for
key
,
value
in
inputs
.
items
()}
outputs
=
{
key
:
global_vars
[
value
]
if
not
isinstance
(
value
,
list
)
else
[
global_vars
[
var
]
for
var
in
value
]
for
key
,
value
in
outputs
.
items
()
}
place
=
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
place
)
for
param
in
context_prog
.
global_block
().
iter_parameters
():
param
.
trainable
=
trainable
return
inputs
,
outputs
,
context_prog
hub_module/modules/image/object_detection/faster_rcnn_resnet50_coco2017/README.md
0 → 100644
浏览文件 @
3acfe6bd
```
shell
$
hub
install
faster_rcnn_resnet50_coco2017
==
1.1.0
```
## 命令行预测
```
hub run faster_rcnn_resnet50_coco2017 --input_path "/PATH/TO/IMAGE"
```
## API
```
def context(num_classes=81,
trainable=True,
pretrained=True,
phase='train')
```
提取头部特征,用于迁移学习。
**参数**
*
num
\_
classes (int): 类别数;
*
trainable(bool): 将参数的trainable 属性设为trainable;
*
pretrained (bool): 是否加载预训练模型;
*
phase (str): 可选值为 'train'/'predict','trian' 用于训练,'predict' 用于预测。
**返回**
*
inputs (dict): 模型的输入,keys 包括 'image', 'im
\_
size',相应的取值为:
*
image (Variable): 图像变量
*
im
\_
size (Variable): 图片的尺寸
*
outputs (dict): 模型的输出。如果 get
\_
prediction 为 False,输出 'head
\_
fatures',否则输出 'bbox
\_
out'。
*
context
\_
prog (Program): 用于迁移学习的 Program。
```
python
def
object_detection
(
paths
=
None
,
images
=
None
,
batch_size
=
1
,
use_gpu
=
False
,
output_dir
=
'detection_result'
,
score_thresh
=
0.5
,
visualization
=
True
)
```
预测API,检测输入图片中的所有目标的位置。
**参数**
*
paths (list
\[
str
\]
): 图片的路径;
*
images (list
\[
numpy.ndarray
\]
): 图片数据,ndarray.shape 为
\[
H, W, C
\]
,BGR格式;
*
batch
\_
size (int): batch 的大小;
*
use
\_
gpu (bool): 是否使用 GPU;
*
score
\_
thresh (float): 识别置信度的阈值;
*
visualization (bool): 是否将识别结果保存为图片文件;
*
output
\_
dir (str): 图片的保存路径,默认设为 detection
\_
result;
**返回**
*
res (list
\[
dict
\]
): 识别结果的列表,列表中每一个元素为 dict,各字段为:
*
data (list): 检测结果,list的每一个元素为 dict,各字段为:
*
confidence (float): 识别的置信度;
*
label (str): 标签;
*
left (int): 边界框的左上角x坐标;
*
top (int): 边界框的左上角y坐标;
*
right (int): 边界框的右下角x坐标;
*
bottom (int): 边界框的右下角y坐标;
*
save
\_
path (str, optional): 识别结果的保存路径 (仅当visualization=True时存在)。
```
python
def
save_inference_model
(
dirname
,
model_filename
=
None
,
params_filename
=
None
,
combined
=
True
)
```
将模型保存到指定路径。
**参数**
*
dirname: 存在模型的目录名称
*
model
\_
filename: 模型文件名称,默认为
\_\_
model
\_\_
*
params
\_
filename: 参数文件名称,默认为
\_\_
params
\_\_
(仅当
`combined`
为True时生效)
*
combined: 是否将参数保存到统一的一个文件中
## 代码示例
```
python
import
paddlehub
as
hub
import
cv2
object_detector
=
hub
.
Module
(
name
=
"faster_rcnn_resnet50_coco2017"
)
result
=
object_detector
.
object_detection
(
images
=
[
cv2
.
imread
(
'/PATH/TO/IMAGE'
)])
# or
# result = object_detector.object_detection((paths=['/PATH/TO/IMAGE'])
```
## 服务部署
PaddleHub Serving 可以部署一个目标检测的在线服务。
## 第一步:启动PaddleHub Serving
运行启动命令:
```
shell
$
hub serving start
-m
faster_rcnn_resnet50_coco2017
```
这样就完成了一个目标检测的服务化API的部署,默认端口号为8866。
**NOTE:**
如使用GPU预测,则需要在启动服务之前,请设置CUDA
\_
VISIBLE
\_
DEVICES环境变量,否则不用设置。
## 第二步:发送预测请求
配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果
```
python
import
requests
import
json
import
cv2
import
base64
def
cv2_to_base64
(
image
):
data
=
cv2
.
imencode
(
'.jpg'
,
image
)[
1
]
return
base64
.
b64encode
(
data
.
tostring
()).
decode
(
'utf8'
)
# 发送HTTP请求
data
=
{
'images'
:[
cv2_to_base64
(
cv2
.
imread
(
"/PATH/TO/IMAGE"
))]}
headers
=
{
"Content-type"
:
"application/json"
}
url
=
"http://127.0.0.1:8866/predict/faster_rcnn_resnet50_coco2017"
r
=
requests
.
post
(
url
=
url
,
headers
=
headers
,
data
=
json
.
dumps
(
data
))
# 打印预测结果
print
(
r
.
json
()[
"results"
])
```
### 依赖
paddlepaddle >= 1.6.2
paddlehub >= 1.6.0
hub_module/modules/image/object_detection/faster_rcnn_resnet50_coco2017/bbox_assigner.py
0 → 100644
浏览文件 @
3acfe6bd
class
BBoxAssigner
(
object
):
def
__init__
(
self
,
batch_size_per_im
=
512
,
fg_fraction
=
.
25
,
fg_thresh
=
.
5
,
bg_thresh_hi
=
.
5
,
bg_thresh_lo
=
0.
,
bbox_reg_weights
=
[
0.1
,
0.1
,
0.2
,
0.2
],
class_nums
=
81
,
shuffle_before_sample
=
True
):
super
(
BBoxAssigner
,
self
).
__init__
()
self
.
batch_size_per_im
=
batch_size_per_im
self
.
fg_fraction
=
fg_fraction
self
.
fg_thresh
=
fg_thresh
self
.
bg_thresh_hi
=
bg_thresh_hi
self
.
bg_thresh_lo
=
bg_thresh_lo
self
.
bbox_reg_weights
=
bbox_reg_weights
self
.
class_nums
=
class_nums
self
.
use_random
=
shuffle_before_sample
hub_module/modules/image/object_detection/faster_rcnn_resnet50_coco2017/bbox_head.py
0 → 100644
浏览文件 @
3acfe6bd
# coding=utf-8
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
from
collections
import
OrderedDict
from
paddle
import
fluid
from
paddle.fluid.param_attr
import
ParamAttr
from
paddle.fluid.initializer
import
Normal
,
Xavier
from
paddle.fluid.regularizer
import
L2Decay
from
paddle.fluid.initializer
import
MSRA
class
MultiClassNMS
(
object
):
# __op__ = fluid.layers.multiclass_nms
def
__init__
(
self
,
score_threshold
=
.
05
,
nms_top_k
=-
1
,
keep_top_k
=
100
,
nms_threshold
=
.
5
,
normalized
=
False
,
nms_eta
=
1.0
,
background_label
=
0
):
super
(
MultiClassNMS
,
self
).
__init__
()
self
.
score_threshold
=
score_threshold
self
.
nms_top_k
=
nms_top_k
self
.
keep_top_k
=
keep_top_k
self
.
nms_threshold
=
nms_threshold
self
.
normalized
=
normalized
self
.
nms_eta
=
nms_eta
self
.
background_label
=
background_label
class
SmoothL1Loss
(
object
):
'''
Smooth L1 loss
Args:
sigma (float): hyper param in smooth l1 loss
'''
def
__init__
(
self
,
sigma
=
1.0
):
super
(
SmoothL1Loss
,
self
).
__init__
()
self
.
sigma
=
sigma
def
__call__
(
self
,
x
,
y
,
inside_weight
=
None
,
outside_weight
=
None
):
return
fluid
.
layers
.
smooth_l1
(
x
,
y
,
inside_weight
=
inside_weight
,
outside_weight
=
outside_weight
,
sigma
=
self
.
sigma
)
class
BoxCoder
(
object
):
def
__init__
(
self
,
prior_box_var
=
[
0.1
,
0.1
,
0.2
,
0.2
],
code_type
=
'decode_center_size'
,
box_normalized
=
False
,
axis
=
1
):
super
(
BoxCoder
,
self
).
__init__
()
self
.
prior_box_var
=
prior_box_var
self
.
code_type
=
code_type
self
.
box_normalized
=
box_normalized
self
.
axis
=
axis
class
TwoFCHead
(
object
):
"""
RCNN head with two Fully Connected layers
Args:
mlp_dim (int): num of filters for the fc layers
"""
def
__init__
(
self
,
mlp_dim
=
1024
):
super
(
TwoFCHead
,
self
).
__init__
()
self
.
mlp_dim
=
mlp_dim
def
__call__
(
self
,
roi_feat
):
fan
=
roi_feat
.
shape
[
1
]
*
roi_feat
.
shape
[
2
]
*
roi_feat
.
shape
[
3
]
fc6
=
fluid
.
layers
.
fc
(
input
=
roi_feat
,
size
=
self
.
mlp_dim
,
act
=
'relu'
,
name
=
'fc6'
,
param_attr
=
ParamAttr
(
name
=
'fc6_w'
,
initializer
=
Xavier
(
fan_out
=
fan
)),
bias_attr
=
ParamAttr
(
name
=
'fc6_b'
,
learning_rate
=
2.
,
regularizer
=
L2Decay
(
0.
)))
head_feat
=
fluid
.
layers
.
fc
(
input
=
fc6
,
size
=
self
.
mlp_dim
,
act
=
'relu'
,
name
=
'fc7'
,
param_attr
=
ParamAttr
(
name
=
'fc7_w'
,
initializer
=
Xavier
()),
bias_attr
=
ParamAttr
(
name
=
'fc7_b'
,
learning_rate
=
2.
,
regularizer
=
L2Decay
(
0.
)))
return
head_feat
class
BBoxHead
(
object
):
"""
RCNN bbox head
Args:
head (object): the head module instance, e.g., `ResNetC5`, `TwoFCHead`
box_coder (object): `BoxCoder` instance
nms (object): `MultiClassNMS` instance
num_classes: number of output classes
"""
__inject__
=
[
'head'
,
'box_coder'
,
'nms'
,
'bbox_loss'
]
__shared__
=
[
'num_classes'
]
def
__init__
(
self
,
head
,
box_coder
=
BoxCoder
(),
nms
=
MultiClassNMS
(),
bbox_loss
=
SmoothL1Loss
(),
num_classes
=
81
):
super
(
BBoxHead
,
self
).
__init__
()
self
.
head
=
head
self
.
num_classes
=
num_classes
self
.
box_coder
=
box_coder
self
.
nms
=
nms
self
.
bbox_loss
=
bbox_loss
self
.
head_feat
=
None
def
get_head_feat
(
self
,
input
=
None
):
"""
Get the bbox head feature map.
"""
if
input
is
not
None
:
feat
=
self
.
head
(
input
)
if
isinstance
(
feat
,
OrderedDict
):
feat
=
list
(
feat
.
values
())[
0
]
self
.
head_feat
=
feat
return
self
.
head_feat
def
_get_output
(
self
,
roi_feat
):
"""
Get bbox head output.
Args:
roi_feat (Variable): RoI feature from RoIExtractor.
Returns:
cls_score(Variable): Output of rpn head with shape of
[N, num_anchors, H, W].
bbox_pred(Variable): Output of rpn head with shape of
[N, num_anchors * 4, H, W].
"""
head_feat
=
self
.
get_head_feat
(
roi_feat
)
# when ResNetC5 output a single feature map
if
not
isinstance
(
self
.
head
,
TwoFCHead
):
head_feat
=
fluid
.
layers
.
pool2d
(
head_feat
,
pool_type
=
'avg'
,
global_pooling
=
True
)
cls_score
=
fluid
.
layers
.
fc
(
input
=
head_feat
,
size
=
self
.
num_classes
,
act
=
None
,
name
=
'cls_score'
,
param_attr
=
ParamAttr
(
name
=
'cls_score_w'
,
initializer
=
Normal
(
loc
=
0.0
,
scale
=
0.01
)),
bias_attr
=
ParamAttr
(
name
=
'cls_score_b'
,
learning_rate
=
2.
,
regularizer
=
L2Decay
(
0.
)))
bbox_pred
=
fluid
.
layers
.
fc
(
input
=
head_feat
,
size
=
4
*
self
.
num_classes
,
act
=
None
,
name
=
'bbox_pred'
,
param_attr
=
ParamAttr
(
name
=
'bbox_pred_w'
,
initializer
=
Normal
(
loc
=
0.0
,
scale
=
0.001
)),
bias_attr
=
ParamAttr
(
name
=
'bbox_pred_b'
,
learning_rate
=
2.
,
regularizer
=
L2Decay
(
0.
)))
return
cls_score
,
bbox_pred
def
get_loss
(
self
,
roi_feat
,
labels_int32
,
bbox_targets
,
bbox_inside_weights
,
bbox_outside_weights
):
"""
Get bbox_head loss.
Args:
roi_feat (Variable): RoI feature from RoIExtractor.
labels_int32(Variable): Class label of a RoI with shape [P, 1].
P is the number of RoI.
bbox_targets(Variable): Box label of a RoI with shape
[P, 4 * class_nums].
bbox_inside_weights(Variable): Indicates whether a box should
contribute to loss. Same shape as bbox_targets.
bbox_outside_weights(Variable): Indicates whether a box should
contribute to loss. Same shape as bbox_targets.
Return:
Type: Dict
loss_cls(Variable): bbox_head loss.
loss_bbox(Variable): bbox_head loss.
"""
cls_score
,
bbox_pred
=
self
.
_get_output
(
roi_feat
)
labels_int64
=
fluid
.
layers
.
cast
(
x
=
labels_int32
,
dtype
=
'int64'
)
labels_int64
.
stop_gradient
=
True
loss_cls
=
fluid
.
layers
.
softmax_with_cross_entropy
(
logits
=
cls_score
,
label
=
labels_int64
,
numeric_stable_mode
=
True
)
loss_cls
=
fluid
.
layers
.
reduce_mean
(
loss_cls
)
loss_bbox
=
self
.
bbox_loss
(
x
=
bbox_pred
,
y
=
bbox_targets
,
inside_weight
=
bbox_inside_weights
,
outside_weight
=
bbox_outside_weights
)
loss_bbox
=
fluid
.
layers
.
reduce_mean
(
loss_bbox
)
return
{
'loss_cls'
:
loss_cls
,
'loss_bbox'
:
loss_bbox
}
def
get_prediction
(
self
,
roi_feat
,
rois
,
im_info
,
im_shape
,
return_box_score
=
False
):
"""
Get prediction bounding box in test stage.
Args:
roi_feat (Variable): RoI feature from RoIExtractor.
rois (Variable): Output of generate_proposals in rpn head.
im_info (Variable): A 2-D LoDTensor with shape [B, 3]. B is the
number of input images, each element consists of im_height,
im_width, im_scale.
im_shape (Variable): Actual shape of original image with shape
[B, 3]. B is the number of images, each element consists of
original_height, original_width, 1
Returns:
pred_result(Variable): Prediction result with shape [N, 6]. Each
row has 6 values: [label, confidence, xmin, ymin, xmax, ymax].
N is the total number of prediction.
"""
cls_score
,
bbox_pred
=
self
.
_get_output
(
roi_feat
)
im_scale
=
fluid
.
layers
.
slice
(
im_info
,
[
1
],
starts
=
[
2
],
ends
=
[
3
])
im_scale
=
fluid
.
layers
.
sequence_expand
(
im_scale
,
rois
)
boxes
=
rois
/
im_scale
cls_prob
=
fluid
.
layers
.
softmax
(
cls_score
,
use_cudnn
=
False
)
bbox_pred
=
fluid
.
layers
.
reshape
(
bbox_pred
,
(
-
1
,
self
.
num_classes
,
4
))
# self.box_coder
decoded_box
=
fluid
.
layers
.
box_coder
(
prior_box
=
boxes
,
target_box
=
bbox_pred
,
prior_box_var
=
self
.
box_coder
.
prior_box_var
,
code_type
=
self
.
box_coder
.
code_type
,
box_normalized
=
self
.
box_coder
.
box_normalized
,
axis
=
self
.
box_coder
.
axis
)
cliped_box
=
fluid
.
layers
.
box_clip
(
input
=
decoded_box
,
im_info
=
im_shape
)
if
return_box_score
:
return
{
'bbox'
:
cliped_box
,
'score'
:
cls_prob
}
# self.nms
pred_result
=
fluid
.
layers
.
multiclass_nms
(
bboxes
=
cliped_box
,
scores
=
cls_prob
,
score_threshold
=
self
.
nms
.
score_threshold
,
nms_top_k
=
self
.
nms
.
nms_top_k
,
keep_top_k
=
self
.
nms
.
keep_top_k
,
nms_threshold
=
self
.
nms
.
nms_threshold
,
normalized
=
self
.
nms
.
normalized
,
nms_eta
=
self
.
nms
.
nms_eta
,
background_label
=
self
.
nms
.
background_label
)
return
pred_result
hub_module/modules/image/object_detection/faster_rcnn/data_feed.py
→
hub_module/modules/image/object_detection/faster_rcnn
_resnet50_coco2017
/data_feed.py
浏览文件 @
3acfe6bd
...
@@ -15,14 +15,19 @@ __all__ = ['test_reader']
...
@@ -15,14 +15,19 @@ __all__ = ['test_reader']
def
test_reader
(
paths
=
None
,
images
=
None
):
def
test_reader
(
paths
=
None
,
images
=
None
):
"""data generator
"""
data generator
Args:
paths (list[str]): paths to images.
images (list(numpy.ndarray)): data of images, shape of each is [H, W, C]
:param paths: path to images.
Yield:
:type paths: list, each element is a str
res (dict): key contains 'image' and 'im_info', the corresponding values is:
:param images: data of images, [N, H, W, C]
image (numpy.ndarray): the image to be fed into network
:type images: numpy.ndarray
im_info (numpy.ndarray): the info about the preprocessed.
"""
"""
img_list
=
[]
img_list
=
list
()
if
paths
:
if
paths
:
for
img_path
in
paths
:
for
img_path
in
paths
:
assert
os
.
path
.
isfile
(
assert
os
.
path
.
isfile
(
...
@@ -87,9 +92,9 @@ def padding_minibatch(batch_data, coarsest_stride=0, use_padded_im_info=True):
...
@@ -87,9 +92,9 @@ def padding_minibatch(batch_data, coarsest_stride=0, use_padded_im_info=True):
else
:
else
:
max_shape
=
max_shape_org
.
astype
(
'int32'
)
max_shape
=
max_shape_org
.
astype
(
'int32'
)
padding_image
=
[]
padding_image
=
list
()
padding_info
=
[]
padding_info
=
list
()
padding_shape
=
[]
padding_shape
=
list
()
for
data
in
batch_data
:
for
data
in
batch_data
:
im_c
,
im_h
,
im_w
=
data
[
'image'
].
shape
im_c
,
im_h
,
im_w
=
data
[
'image'
].
shape
...
...
hub_module/modules/image/object_detection/faster_rcnn_resnet50_coco2017/module.py
浏览文件 @
3acfe6bd
...
@@ -13,11 +13,18 @@ from math import ceil
...
@@ -13,11 +13,18 @@ from math import ceil
import
numpy
as
np
import
numpy
as
np
import
paddle.fluid
as
fluid
import
paddle.fluid
as
fluid
import
paddlehub
as
hub
import
paddlehub
as
hub
from
paddlehub.module.module
import
moduleinfo
,
runnable
from
paddlehub.module.module
import
moduleinfo
,
runnable
,
serving
from
paddle.fluid.core
import
PaddleTensor
,
AnalysisConfig
,
create_paddle_predictor
from
paddle.fluid.core
import
PaddleTensor
,
AnalysisConfig
,
create_paddle_predictor
from
paddlehub.io.parser
import
txt_parser
from
paddlehub.io.parser
import
txt_parser
from
paddlehub.common.paddle_helper
import
add_vars_prefix
from
faster_rcnn_resnet50_coco2017.processor
import
load_label_info
,
postprocess
,
base64_to_cv2
from
faster_rcnn_resnet50_coco2017.data_feed
import
test_reader
,
padding_minibatch
from
faster_rcnn_resnet50_coco2017.resnet
import
ResNet
,
ResNetC5
from
faster_rcnn_resnet50_coco2017.resnet
import
ResNet
,
ResNetC5
from
faster_rcnn_resnet50_coco2017.rpn_head
import
AnchorGenerator
,
RPNTargetAssign
,
GenerateProposals
,
RPNHead
from
faster_rcnn_resnet50_coco2017.bbox_head
import
MultiClassNMS
,
BBoxHead
,
SmoothL1Loss
from
faster_rcnn_resnet50_coco2017.bbox_assigner
import
BBoxAssigner
from
faster_rcnn_resnet50_coco2017.roi_extractor
import
RoIAlign
@
moduleinfo
(
@
moduleinfo
(
...
@@ -27,17 +34,14 @@ from faster_rcnn_resnet50_coco2017.resnet import ResNet, ResNetC5
...
@@ -27,17 +34,14 @@ from faster_rcnn_resnet50_coco2017.resnet import ResNet, ResNetC5
summary
=
summary
=
"Baidu's Faster R-CNN model for object detection with backbone ResNet50, trained with dataset COCO2017"
,
"Baidu's Faster R-CNN model for object detection with backbone ResNet50, trained with dataset COCO2017"
,
author
=
"paddlepaddle"
,
author
=
"paddlepaddle"
,
author_email
=
"
paddle-dev@baidu.com
"
)
author_email
=
""
)
class
FasterRCNNResNet50
(
hub
.
Module
):
class
FasterRCNNResNet50
(
hub
.
Module
):
def
_initialize
(
self
):
def
_initialize
(
self
):
self
.
faster_rcnn
=
hub
.
Module
(
name
=
"faster_rcnn"
)
# default pretrained model, Faster-RCNN with backbone ResNet50, shape of input tensor is [3, 800, 1333]
# default pretrained model, Faster-RCNN with backbone ResNet50, shape of input tensor is [3, 800, 1333]
self
.
default_pretrained_model_path
=
os
.
path
.
join
(
self
.
default_pretrained_model_path
=
os
.
path
.
join
(
self
.
directory
,
"faster_rcnn_resnet50_model"
)
self
.
directory
,
"faster_rcnn_resnet50_model"
)
self
.
label_names
=
self
.
faster_rcnn
.
load_label_info
(
self
.
label_names
=
load_label_info
(
os
.
path
.
join
(
self
.
directory
,
"label_file.txt"
))
os
.
path
.
join
(
self
.
directory
,
"label_file.txt"
))
self
.
infer_prog
=
None
self
.
bbox_out
=
None
self
.
_set_config
()
self
.
_set_config
()
def
_set_config
(
self
):
def
_set_config
(
self
):
...
@@ -66,20 +70,23 @@ class FasterRCNNResNet50(hub.Module):
...
@@ -66,20 +70,23 @@ class FasterRCNNResNet50(hub.Module):
trainable
=
True
,
trainable
=
True
,
pretrained
=
True
,
pretrained
=
True
,
phase
=
'train'
):
phase
=
'train'
):
"""Distill the Head Features, so as to perform transfer learning.
:param trainable: whether to set parameters trainable.
:type trainable: bool
:param pretrained: whether to load default pretrained model.
:type pretrained: bool
:param param_prefix: the prefix of parameters in neural network.
:type param_prefix: str
:param phase: Optional Choice: 'predict', 'train'
:type phase: str
"""
"""
wrapped_prog
=
fluid
.
Program
()
Distill the Head Features, so as to perform transfer learning.
Args:
trainable (bool): whether to set parameters trainable.
pretrained (bool): whether to load default pretrained model.
get_prediction (bool): whether to get prediction.
phase (str): optional choices are 'train' and 'predict'.
Returns:
inputs (dict): the input variables.
outputs (dict): the output variables.
context_prog (Program): the program to execute transfer learning.
"""
context_prog
=
fluid
.
Program
()
startup_program
=
fluid
.
Program
()
startup_program
=
fluid
.
Program
()
with
fluid
.
program_guard
(
wrapped
_prog
,
startup_program
):
with
fluid
.
program_guard
(
context
_prog
,
startup_program
):
with
fluid
.
unique_name
.
guard
():
with
fluid
.
unique_name
.
guard
():
image
=
fluid
.
layers
.
data
(
image
=
fluid
.
layers
.
data
(
name
=
'image'
,
shape
=
[
3
,
800
,
1333
],
dtype
=
'float32'
)
name
=
'image'
,
shape
=
[
3
,
800
,
1333
],
dtype
=
'float32'
)
...
@@ -91,18 +98,106 @@ class FasterRCNNResNet50(hub.Module):
...
@@ -91,18 +98,106 @@ class FasterRCNNResNet50(hub.Module):
freeze_at
=
2
)
freeze_at
=
2
)
body_feats
=
backbone
(
image
)
body_feats
=
backbone
(
image
)
# Base Class
# var_prefix
inputs
,
outputs
,
context_prog
=
self
.
faster_rcnn
.
context
(
var_prefix
=
'@HUB_{}@'
.
format
(
self
.
name
)
body_feats
=
body_feats
,
im_info
=
fluid
.
layers
.
data
(
fpn
=
None
,
name
=
'im_info'
,
shape
=
[
3
],
dtype
=
'float32'
,
lod_level
=
0
)
rpn_head
=
self
.
rpn_head
(),
im_shape
=
fluid
.
layers
.
data
(
roi_extractor
=
self
.
roi_extractor
(),
name
=
'im_shape'
,
shape
=
[
3
],
dtype
=
'float32'
,
lod_level
=
0
)
bbox_head
=
self
.
bbox_head
(
num_classes
),
body_feat_names
=
list
(
body_feats
.
keys
())
bbox_assigner
=
self
.
bbox_assigner
(
num_classes
),
# rpn_head: RPNHead
image
=
image
,
rpn_head
=
self
.
rpn_head
()
trainable
=
trainable
,
rois
=
rpn_head
.
get_proposals
(
body_feats
,
im_info
,
mode
=
phase
)
var_prefix
=
'@HUB_{}@'
.
format
(
self
.
name
),
# train
phase
=
phase
)
if
phase
==
'train'
:
gt_bbox
=
fluid
.
layers
.
data
(
name
=
'gt_bbox'
,
shape
=
[
4
],
dtype
=
'float32'
,
lod_level
=
1
)
is_crowd
=
fluid
.
layers
.
data
(
name
=
'is_crowd'
,
shape
=
[
1
],
dtype
=
'int32'
,
lod_level
=
1
)
gt_class
=
fluid
.
layers
.
data
(
name
=
'gt_class'
,
shape
=
[
1
],
dtype
=
'int32'
,
lod_level
=
1
)
rpn_loss
=
rpn_head
.
get_loss
(
im_info
,
gt_bbox
,
is_crowd
)
# bbox_assigner: BBoxAssigner
bbox_assigner
=
self
.
bbox_assigner
(
num_classes
)
outs
=
fluid
.
layers
.
generate_proposal_labels
(
rpn_rois
=
rois
,
gt_classes
=
gt_class
,
is_crowd
=
is_crowd
,
gt_boxes
=
gt_bbox
,
im_info
=
im_info
,
batch_size_per_im
=
bbox_assigner
.
batch_size_per_im
,
fg_fraction
=
bbox_assigner
.
fg_fraction
,
fg_thresh
=
bbox_assigner
.
fg_thresh
,
bg_thresh_hi
=
bbox_assigner
.
bg_thresh_hi
,
bg_thresh_lo
=
bbox_assigner
.
bg_thresh_lo
,
bbox_reg_weights
=
bbox_assigner
.
bbox_reg_weights
,
class_nums
=
bbox_assigner
.
class_nums
,
use_random
=
bbox_assigner
.
use_random
)
rois
=
outs
[
0
]
body_feat
=
body_feats
[
body_feat_names
[
-
1
]]
# roi_extractor: RoIAlign
roi_extractor
=
self
.
roi_extractor
()
roi_feat
=
fluid
.
layers
.
roi_align
(
input
=
body_feat
,
rois
=
rois
,
pooled_height
=
roi_extractor
.
pooled_height
,
pooled_width
=
roi_extractor
.
pooled_width
,
spatial_scale
=
roi_extractor
.
spatial_scale
,
sampling_ratio
=
roi_extractor
.
sampling_ratio
)
# head_feat
bbox_head
=
self
.
bbox_head
(
num_classes
)
head_feat
=
bbox_head
.
head
(
roi_feat
)
if
isinstance
(
head_feat
,
OrderedDict
):
head_feat
=
list
(
head_feat
.
values
())[
0
]
if
phase
==
'train'
:
inputs
=
{
'image'
:
var_prefix
+
image
.
name
,
'im_info'
:
var_prefix
+
im_info
.
name
,
'im_shape'
:
var_prefix
+
im_shape
.
name
,
'gt_class'
:
var_prefix
+
gt_class
.
name
,
'gt_bbox'
:
var_prefix
+
gt_bbox
.
name
,
'is_crowd'
:
var_prefix
+
is_crowd
.
name
}
outputs
=
{
'head_feat'
:
var_prefix
+
head_feat
.
name
,
'rpn_cls_loss'
:
var_prefix
+
rpn_loss
[
'rpn_cls_loss'
].
name
,
'rpn_reg_loss'
:
var_prefix
+
rpn_loss
[
'rpn_reg_loss'
].
name
,
'generate_proposal_labels'
:
[
var_prefix
+
var
.
name
for
var
in
outs
]
}
elif
phase
==
'predict'
:
pred
=
bbox_head
.
get_prediction
(
roi_feat
,
rois
,
im_info
,
im_shape
)
inputs
=
{
'image'
:
var_prefix
+
image
.
name
,
'im_info'
:
var_prefix
+
im_info
.
name
,
'im_shape'
:
var_prefix
+
im_shape
.
name
}
outputs
=
{
'head_feat'
:
var_prefix
+
head_feat
.
name
,
'rois'
:
var_prefix
+
rois
.
name
,
'bbox_out'
:
var_prefix
+
pred
.
name
}
add_vars_prefix
(
context_prog
,
var_prefix
)
add_vars_prefix
(
startup_program
,
var_prefix
)
global_vars
=
context_prog
.
global_block
().
vars
inputs
=
{
key
:
global_vars
[
value
]
for
key
,
value
in
inputs
.
items
()
}
outputs
=
{
key
:
global_vars
[
value
]
if
not
isinstance
(
value
,
list
)
else
[
global_vars
[
var
]
for
var
in
value
]
for
key
,
value
in
outputs
.
items
()
}
for
param
in
context_prog
.
global_block
().
iter_parameters
():
param
.
trainable
=
trainable
place
=
fluid
.
CPUPlace
()
place
=
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
place
)
exe
=
fluid
.
Executor
(
place
)
...
@@ -122,43 +217,42 @@ class FasterRCNNResNet50(hub.Module):
...
@@ -122,43 +217,42 @@ class FasterRCNNResNet50(hub.Module):
return
inputs
,
outputs
,
context_prog
return
inputs
,
outputs
,
context_prog
def
rpn_head
(
self
):
def
rpn_head
(
self
):
return
self
.
faster_rcnn
.
RPNHead
(
return
RPNHead
(
anchor_generator
=
self
.
faster_rcnn
.
AnchorGenerator
(
anchor_generator
=
AnchorGenerator
(
anchor_sizes
=
[
32
,
64
,
128
,
256
,
512
],
anchor_sizes
=
[
32
,
64
,
128
,
256
,
512
],
aspect_ratios
=
[
0.5
,
1.0
,
2.0
],
aspect_ratios
=
[
0.5
,
1.0
,
2.0
],
stride
=
[
16.0
,
16.0
],
stride
=
[
16.0
,
16.0
],
variance
=
[
1.0
,
1.0
,
1.0
,
1.0
]),
variance
=
[
1.0
,
1.0
,
1.0
,
1.0
]),
rpn_target_assign
=
self
.
faster_rcnn
.
RPNTargetAssign
(
rpn_target_assign
=
RPNTargetAssign
(
rpn_batch_size_per_im
=
256
,
rpn_batch_size_per_im
=
256
,
rpn_fg_fraction
=
0.5
,
rpn_fg_fraction
=
0.5
,
rpn_negative_overlap
=
0.3
,
rpn_negative_overlap
=
0.3
,
rpn_positive_overlap
=
0.7
,
rpn_positive_overlap
=
0.7
,
rpn_straddle_thresh
=
0.0
),
rpn_straddle_thresh
=
0.0
),
train_proposal
=
self
.
faster_rcnn
.
GenerateProposals
(
train_proposal
=
GenerateProposals
(
min_size
=
0.0
,
min_size
=
0.0
,
nms_thresh
=
0.7
,
nms_thresh
=
0.7
,
post_nms_top_n
=
12000
,
post_nms_top_n
=
12000
,
pre_nms_top_n
=
2000
),
pre_nms_top_n
=
2000
),
test_proposal
=
self
.
faster_rcnn
.
GenerateProposals
(
test_proposal
=
GenerateProposals
(
min_size
=
0.0
,
min_size
=
0.0
,
nms_thresh
=
0.7
,
nms_thresh
=
0.7
,
post_nms_top_n
=
6000
,
post_nms_top_n
=
6000
,
pre_nms_top_n
=
1000
))
pre_nms_top_n
=
1000
))
def
roi_extractor
(
self
):
def
roi_extractor
(
self
):
return
self
.
faster_rcnn
.
RoIAlign
(
return
RoIAlign
(
resolution
=
14
,
sampling_ratio
=
0
,
spatial_scale
=
0.0625
)
resolution
=
14
,
sampling_ratio
=
0
,
spatial_scale
=
0.0625
)
def
bbox_head
(
self
,
num_classes
):
def
bbox_head
(
self
,
num_classes
):
return
self
.
faster_rcnn
.
BBoxHead
(
return
BBoxHead
(
head
=
ResNetC5
(
depth
=
50
,
norm_type
=
'affine_channel'
),
head
=
ResNetC5
(
depth
=
50
,
norm_type
=
'affine_channel'
),
nms
=
self
.
faster_rcnn
.
MultiClassNMS
(
nms
=
MultiClassNMS
(
keep_top_k
=
100
,
nms_threshold
=
0.5
,
score_threshold
=
0.05
),
keep_top_k
=
100
,
nms_threshold
=
0.5
,
score_threshold
=
0.05
),
bbox_loss
=
self
.
faster_rcnn
.
SmoothL1Loss
(),
bbox_loss
=
SmoothL1Loss
(),
num_classes
=
num_classes
)
num_classes
=
num_classes
)
def
bbox_assigner
(
self
,
num_classes
):
def
bbox_assigner
(
self
,
num_classes
):
return
self
.
faster_rcnn
.
BBoxAssigner
(
return
BBoxAssigner
(
batch_size_per_im
=
512
,
batch_size_per_im
=
512
,
bbox_reg_weights
=
[
0.1
,
0.1
,
0.2
,
0.2
],
bbox_reg_weights
=
[
0.1
,
0.1
,
0.2
,
0.2
],
bg_thresh_hi
=
0.5
,
bg_thresh_hi
=
0.5
,
...
@@ -178,26 +272,32 @@ class FasterRCNNResNet50(hub.Module):
...
@@ -178,26 +272,32 @@ class FasterRCNNResNet50(hub.Module):
visualization
=
True
):
visualization
=
True
):
"""API of Object Detection.
"""API of Object Detection.
:param paths: the path of images.
Args:
:type paths: list, each element is correspond to the path of an image.
paths (list[str]): The paths of images.
:param images: data of images, [N, H, W, C]
images (list(numpy.ndarray)): images data, shape of each is [H, W, C]
:type images: numpy.ndarray
batch_size (int): batch size.
:param use_gpu: whether to use gpu or not.
use_gpu (bool): Whether to use gpu.
:type use_gpu: bool
output_dir (str): The path to store output images.
:param batch_size: bathc size.
visualization (bool): Whether to save image or not.
:type batch_size: int
score_thresh (float): threshold for object detecion.
:param output_dir: the directory to store the detection result.
:type output_dir: str
Returns:
:param score_thresh: the threshold of detection confidence.
res (list[dict]): The result of coco2017 detecion. keys include 'data', 'save_path', the corresponding value is:
:type score_thresh: float
data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is:
:param visualization: whether to draw box and save images.
left (float): The X coordinate of the upper left corner of the bounding box;
:type visualization: bool
top (float): The Y coordinate of the upper left corner of the bounding box;
right (float): The X coordinate of the lower right corner of the bounding box;
bottom (float): The Y coordinate of the lower right corner of the bounding box;
label (str): The label of detection result;
confidence (float): The confidence of detection result.
save_path (str, optional): The path to save output images.
"""
"""
paths
=
paths
if
paths
else
list
()
if
data
and
'image'
in
data
:
if
data
and
'image'
in
data
:
paths
=
data
[
'image'
]
if
not
paths
else
paths
+
data
[
'image'
]
paths
+=
data
[
'image'
]
all_images
=
[]
paths
=
paths
if
paths
else
[]
all_images
=
list
()
for
yield_return
in
self
.
faster_rcnn
.
test_reader
(
paths
,
images
):
for
yield_return
in
test_reader
(
paths
,
images
):
all_images
.
append
(
yield_return
)
all_images
.
append
(
yield_return
)
images_num
=
len
(
all_images
)
images_num
=
len
(
all_images
)
...
@@ -211,7 +311,8 @@ class FasterRCNNResNet50(hub.Module):
...
@@ -211,7 +311,8 @@ class FasterRCNNResNet50(hub.Module):
batch_data
.
append
(
all_images
[
handle_id
+
image_id
])
batch_data
.
append
(
all_images
[
handle_id
+
image_id
])
except
:
except
:
pass
pass
padding_image
,
padding_info
,
padding_shape
=
self
.
faster_rcnn
.
padding_minibatch
(
padding_image
,
padding_info
,
padding_shape
=
padding_minibatch
(
batch_data
)
batch_data
)
padding_image_tensor
=
PaddleTensor
(
padding_image
.
copy
())
padding_image_tensor
=
PaddleTensor
(
padding_image
.
copy
())
padding_info_tensor
=
PaddleTensor
(
padding_info
.
copy
())
padding_info_tensor
=
PaddleTensor
(
padding_info
.
copy
())
...
@@ -223,7 +324,7 @@ class FasterRCNNResNet50(hub.Module):
...
@@ -223,7 +324,7 @@ class FasterRCNNResNet50(hub.Module):
data_out
=
self
.
gpu_predictor
.
run
(
feed_list
)
data_out
=
self
.
gpu_predictor
.
run
(
feed_list
)
else
:
else
:
data_out
=
self
.
cpu_predictor
.
run
(
feed_list
)
data_out
=
self
.
cpu_predictor
.
run
(
feed_list
)
output
=
self
.
faster_rcnn
.
postprocess
(
output
=
postprocess
(
paths
=
paths
,
paths
=
paths
,
images
=
images
,
images
=
images
,
data_out
=
data_out
,
data_out
=
data_out
,
...
@@ -275,6 +376,15 @@ class FasterRCNNResNet50(hub.Module):
...
@@ -275,6 +376,15 @@ class FasterRCNNResNet50(hub.Module):
input_data
=
txt_parser
.
parse
(
args
.
input_file
,
use_strip
=
True
)
input_data
=
txt_parser
.
parse
(
args
.
input_file
,
use_strip
=
True
)
return
input_data
return
input_data
@
serving
def
serving_method
(
self
,
images
,
**
kwargs
):
"""
Run as a service.
"""
images_decode
=
[
base64_to_cv2
(
image
)
for
image
in
images
]
results
=
self
.
object_detection
(
images_decode
,
**
kwargs
)
return
results
@
runnable
@
runnable
def
run_cmd
(
self
,
argvs
):
def
run_cmd
(
self
,
argvs
):
self
.
parser
=
argparse
.
ArgumentParser
(
self
.
parser
=
argparse
.
ArgumentParser
(
...
...
hub_module/modules/image/object_detection/faster_rcnn/processor.py
→
hub_module/modules/image/object_detection/faster_rcnn
_resnet50_coco2017
/processor.py
浏览文件 @
3acfe6bd
# coding=utf-8
# coding=utf-8
import
base64
import
os
import
os
import
cv2
import
numpy
as
np
import
numpy
as
np
from
PIL
import
Image
,
ImageDraw
from
PIL
import
Image
,
ImageDraw
__all__
=
[
__all__
=
[
'get_save_image_name'
,
'draw_bounding_box_on_image'
,
'clip_bbox'
,
'base64_to_cv2'
,
'load_label_info'
'load_label_info'
,
'postprocess'
,
]
]
def
base64_to_cv2
(
b64str
):
data
=
base64
.
b64decode
(
b64str
.
encode
(
'utf8'
))
data
=
np
.
fromstring
(
data
,
np
.
uint8
)
data
=
cv2
.
imdecode
(
data
,
cv2
.
IMREAD_COLOR
)
return
data
def
get_save_image_name
(
img
,
output_dir
,
image_path
):
def
get_save_image_name
(
img
,
output_dir
,
image_path
):
"""Get save image name from source image path.
"""Get save image name from source image path.
"""
"""
...
@@ -90,24 +100,29 @@ def postprocess(paths,
...
@@ -90,24 +100,29 @@ def postprocess(paths,
output_dir
,
output_dir
,
handle_id
,
handle_id
,
visualization
=
True
):
visualization
=
True
):
"""postprocess the lod_tensor produced by fluid.Executor.run
"""
postprocess the lod_tensor produced by fluid.Executor.run
:param paths: the path of images.
:type paths: list, each element is a str
Args:
:param images: data of images, [N, H, W, C]
paths (list[str]): the path of images.
:type images: numpy.ndarray
images (list(numpy.ndarray)): list of images, shape of each is [H, W, C].
:param data_out: data produced by executor.run
data_out (lod_tensor): data produced by executor.run.
:type data_out: lod_tensor
score_thresh (float): the low limit of bounding box.
:param score_thresh: the low limit of bounding box.
label_names (list[str]): label names.
:type score_thresh: float
output_dir (str): output directory.
:param label_names: label names
handle_id (int): The number of images that have been handled.
:type label_names: list
visualization (bool): whether to save as images.
:param output_dir: output directory.
:type output_dir: str
Returns:
:param handle_id: The number of images that have been handled.
res (list[dict]): The result of vehicles detecion. keys include 'data', 'save_path', the corresponding value is:
:type handle_id: int
data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is:
:param visualization: whether to draw bbox and save images.
left (float): The X coordinate of the upper left corner of the bounding box;
:param visualization: bool
top (float): The Y coordinate of the upper left corner of the bounding box;
right (float): The X coordinate of the lower right corner of the bounding box;
bottom (float): The Y coordinate of the lower right corner of the bounding box;
label (str): The label of detection result;
confidence (float): The confidence of detection result.
save_path (str): The path to save output images.
"""
"""
lod_tensor
=
data_out
[
0
]
lod_tensor
=
data_out
[
0
]
lod
=
lod_tensor
.
lod
[
0
]
lod
=
lod_tensor
.
lod
[
0
]
...
...
hub_module/modules/image/object_detection/faster_rcnn_resnet50_coco2017/roi_extractor.py
0 → 100644
浏览文件 @
3acfe6bd
# coding=utf-8
__all__
=
[
'RoIAlign'
]
class
RoIAlign
(
object
):
def
__init__
(
self
,
resolution
=
7
,
spatial_scale
=
0.0625
,
sampling_ratio
=
0
):
super
(
RoIAlign
,
self
).
__init__
()
if
isinstance
(
resolution
,
int
):
resolution
=
[
resolution
,
resolution
]
self
.
pooled_height
=
resolution
[
0
]
self
.
pooled_width
=
resolution
[
1
]
self
.
spatial_scale
=
spatial_scale
self
.
sampling_ratio
=
sampling_ratio
hub_module/modules/image/object_detection/faster_rcnn_resnet50_coco2017/rpn_head.py
0 → 100644
浏览文件 @
3acfe6bd
# coding=utf-8
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
from
paddle
import
fluid
from
paddle.fluid.param_attr
import
ParamAttr
from
paddle.fluid.initializer
import
Normal
from
paddle.fluid.regularizer
import
L2Decay
__all__
=
[
'AnchorGenerator'
,
'RPNTargetAssign'
,
'GenerateProposals'
,
'RPNHead'
]
class
AnchorGenerator
(
object
):
# __op__ = fluid.layers.anchor_generator
def
__init__
(
self
,
stride
=
[
16.0
,
16.0
],
anchor_sizes
=
[
32
,
64
,
128
,
256
,
512
],
aspect_ratios
=
[
0.5
,
1.
,
2.
],
variance
=
[
1.
,
1.
,
1.
,
1.
]):
super
(
AnchorGenerator
,
self
).
__init__
()
self
.
anchor_sizes
=
anchor_sizes
self
.
aspect_ratios
=
aspect_ratios
self
.
variance
=
variance
self
.
stride
=
stride
class
RPNTargetAssign
(
object
):
# __op__ = fluid.layers.rpn_target_assign
def
__init__
(
self
,
rpn_batch_size_per_im
=
256
,
rpn_straddle_thresh
=
0.
,
rpn_fg_fraction
=
0.5
,
rpn_positive_overlap
=
0.7
,
rpn_negative_overlap
=
0.3
,
use_random
=
True
):
super
(
RPNTargetAssign
,
self
).
__init__
()
self
.
rpn_batch_size_per_im
=
rpn_batch_size_per_im
self
.
rpn_straddle_thresh
=
rpn_straddle_thresh
self
.
rpn_fg_fraction
=
rpn_fg_fraction
self
.
rpn_positive_overlap
=
rpn_positive_overlap
self
.
rpn_negative_overlap
=
rpn_negative_overlap
self
.
use_random
=
use_random
class
GenerateProposals
(
object
):
# __op__ = fluid.layers.generate_proposals
def
__init__
(
self
,
pre_nms_top_n
=
6000
,
post_nms_top_n
=
1000
,
nms_thresh
=
.
5
,
min_size
=
.
1
,
eta
=
1.
):
super
(
GenerateProposals
,
self
).
__init__
()
self
.
pre_nms_top_n
=
pre_nms_top_n
self
.
post_nms_top_n
=
post_nms_top_n
self
.
nms_thresh
=
nms_thresh
self
.
min_size
=
min_size
self
.
eta
=
eta
class
RPNHead
(
object
):
"""
RPN Head
Args:
anchor_generator (object): `AnchorGenerator` instance
rpn_target_assign (object): `RPNTargetAssign` instance
train_proposal (object): `GenerateProposals` instance for training
test_proposal (object): `GenerateProposals` instance for testing
num_classes (int): number of classes in rpn output
"""
__inject__
=
[
'anchor_generator'
,
'rpn_target_assign'
,
'train_proposal'
,
'test_proposal'
]
def
__init__
(
self
,
anchor_generator
,
rpn_target_assign
,
train_proposal
,
test_proposal
,
num_classes
=
1
):
super
(
RPNHead
,
self
).
__init__
()
self
.
anchor_generator
=
anchor_generator
self
.
rpn_target_assign
=
rpn_target_assign
self
.
train_proposal
=
train_proposal
self
.
test_proposal
=
test_proposal
self
.
num_classes
=
num_classes
def
_get_output
(
self
,
input
):
"""
Get anchor and RPN head output.
Args:
input(Variable): feature map from backbone with shape of [N, C, H, W]
Returns:
rpn_cls_score(Variable): Output of rpn head with shape of [N, num_anchors, H, W].
rpn_bbox_pred(Variable): Output of rpn head with shape of [N, num_anchors * 4, H, W].
"""
dim_out
=
input
.
shape
[
1
]
rpn_conv
=
fluid
.
layers
.
conv2d
(
input
=
input
,
num_filters
=
dim_out
,
filter_size
=
3
,
stride
=
1
,
padding
=
1
,
act
=
'relu'
,
name
=
'conv_rpn'
,
param_attr
=
ParamAttr
(
name
=
"conv_rpn_w"
,
initializer
=
Normal
(
loc
=
0.
,
scale
=
0.01
)),
bias_attr
=
ParamAttr
(
name
=
"conv_rpn_b"
,
learning_rate
=
2.
,
regularizer
=
L2Decay
(
0.
)))
# Generate anchors self.anchor_generator
self
.
anchor
,
self
.
anchor_var
=
fluid
.
layers
.
anchor_generator
(
input
=
rpn_conv
,
anchor_sizes
=
self
.
anchor_generator
.
anchor_sizes
,
aspect_ratios
=
self
.
anchor_generator
.
aspect_ratios
,
variance
=
self
.
anchor_generator
.
variance
,
stride
=
self
.
anchor_generator
.
stride
)
num_anchor
=
self
.
anchor
.
shape
[
2
]
# Proposal classification scores
self
.
rpn_cls_score
=
fluid
.
layers
.
conv2d
(
rpn_conv
,
num_filters
=
num_anchor
*
self
.
num_classes
,
filter_size
=
1
,
stride
=
1
,
padding
=
0
,
act
=
None
,
name
=
'rpn_cls_score'
,
param_attr
=
ParamAttr
(
name
=
"rpn_cls_logits_w"
,
initializer
=
Normal
(
loc
=
0.
,
scale
=
0.01
)),
bias_attr
=
ParamAttr
(
name
=
"rpn_cls_logits_b"
,
learning_rate
=
2.
,
regularizer
=
L2Decay
(
0.
)))
# Proposal bbox regression deltas
self
.
rpn_bbox_pred
=
fluid
.
layers
.
conv2d
(
rpn_conv
,
num_filters
=
4
*
num_anchor
,
filter_size
=
1
,
stride
=
1
,
padding
=
0
,
act
=
None
,
name
=
'rpn_bbox_pred'
,
param_attr
=
ParamAttr
(
name
=
"rpn_bbox_pred_w"
,
initializer
=
Normal
(
loc
=
0.
,
scale
=
0.01
)),
bias_attr
=
ParamAttr
(
name
=
"rpn_bbox_pred_b"
,
learning_rate
=
2.
,
regularizer
=
L2Decay
(
0.
)))
return
self
.
rpn_cls_score
,
self
.
rpn_bbox_pred
def
get_proposals
(
self
,
body_feats
,
im_info
,
mode
=
'train'
):
"""
Get proposals according to the output of backbone.
Args:
body_feats (dict): The dictionary of feature maps from backbone.
im_info(Variable): The information of image with shape [N, 3] with
shape (height, width, scale).
body_feat_names(list): A list of names of feature maps from
backbone.
Returns:
rpn_rois(Variable): Output proposals with shape of (rois_num, 4).
"""
# In RPN Heads, only the last feature map of backbone is used.
# And body_feat_names[-1] represents the last level name of backbone.
body_feat
=
list
(
body_feats
.
values
())[
-
1
]
rpn_cls_score
,
rpn_bbox_pred
=
self
.
_get_output
(
body_feat
)
if
self
.
num_classes
==
1
:
rpn_cls_prob
=
fluid
.
layers
.
sigmoid
(
rpn_cls_score
,
name
=
'rpn_cls_prob'
)
else
:
rpn_cls_score
=
fluid
.
layers
.
transpose
(
rpn_cls_score
,
perm
=
[
0
,
2
,
3
,
1
])
rpn_cls_score
=
fluid
.
layers
.
reshape
(
rpn_cls_score
,
shape
=
(
0
,
0
,
0
,
-
1
,
self
.
num_classes
))
rpn_cls_prob_tmp
=
fluid
.
layers
.
softmax
(
rpn_cls_score
,
use_cudnn
=
False
,
name
=
'rpn_cls_prob'
)
rpn_cls_prob_slice
=
fluid
.
layers
.
slice
(
rpn_cls_prob_tmp
,
axes
=
[
4
],
starts
=
[
1
],
ends
=
[
self
.
num_classes
])
rpn_cls_prob
,
_
=
fluid
.
layers
.
topk
(
rpn_cls_prob_slice
,
1
)
rpn_cls_prob
=
fluid
.
layers
.
reshape
(
rpn_cls_prob
,
shape
=
(
0
,
0
,
0
,
-
1
))
rpn_cls_prob
=
fluid
.
layers
.
transpose
(
rpn_cls_prob
,
perm
=
[
0
,
3
,
1
,
2
])
prop_op
=
self
.
train_proposal
if
mode
==
'train'
else
self
.
test_proposal
# prop_op
rpn_rois
,
rpn_roi_probs
=
fluid
.
layers
.
generate_proposals
(
scores
=
rpn_cls_prob
,
bbox_deltas
=
rpn_bbox_pred
,
im_info
=
im_info
,
anchors
=
self
.
anchor
,
variances
=
self
.
anchor_var
,
pre_nms_top_n
=
prop_op
.
pre_nms_top_n
,
post_nms_top_n
=
prop_op
.
post_nms_top_n
,
nms_thresh
=
prop_op
.
nms_thresh
,
min_size
=
prop_op
.
min_size
,
eta
=
prop_op
.
eta
)
return
rpn_rois
def
_transform_input
(
self
,
rpn_cls_score
,
rpn_bbox_pred
,
anchor
,
anchor_var
):
rpn_cls_score
=
fluid
.
layers
.
transpose
(
rpn_cls_score
,
perm
=
[
0
,
2
,
3
,
1
])
rpn_bbox_pred
=
fluid
.
layers
.
transpose
(
rpn_bbox_pred
,
perm
=
[
0
,
2
,
3
,
1
])
anchor
=
fluid
.
layers
.
reshape
(
anchor
,
shape
=
(
-
1
,
4
))
anchor_var
=
fluid
.
layers
.
reshape
(
anchor_var
,
shape
=
(
-
1
,
4
))
rpn_cls_score
=
fluid
.
layers
.
reshape
(
x
=
rpn_cls_score
,
shape
=
(
0
,
-
1
,
self
.
num_classes
))
rpn_bbox_pred
=
fluid
.
layers
.
reshape
(
x
=
rpn_bbox_pred
,
shape
=
(
0
,
-
1
,
4
))
return
rpn_cls_score
,
rpn_bbox_pred
,
anchor
,
anchor_var
def
_get_loss_input
(
self
):
for
attr
in
[
'rpn_cls_score'
,
'rpn_bbox_pred'
,
'anchor'
,
'anchor_var'
]:
if
not
getattr
(
self
,
attr
,
None
):
raise
ValueError
(
"self.{} should not be None,"
.
format
(
attr
),
"call RPNHead.get_proposals first"
)
return
self
.
_transform_input
(
self
.
rpn_cls_score
,
self
.
rpn_bbox_pred
,
self
.
anchor
,
self
.
anchor_var
)
def
get_loss
(
self
,
im_info
,
gt_box
,
is_crowd
,
gt_label
=
None
):
"""
Sample proposals and Calculate rpn loss.
Args:
im_info(Variable): The information of image with shape [N, 3] with
shape (height, width, scale).
gt_box(Variable): The ground-truth bounding boxes with shape [M, 4].
M is the number of groundtruth.
is_crowd(Variable): Indicates groud-truth is crowd or not with
shape [M, 1]. M is the number of groundtruth.
Returns:
Type: dict
rpn_cls_loss(Variable): RPN classification loss.
rpn_bbox_loss(Variable): RPN bounding box regression loss.
"""
rpn_cls
,
rpn_bbox
,
anchor
,
anchor_var
=
self
.
_get_loss_input
()
if
self
.
num_classes
==
1
:
# self.rpn_target_assign
score_pred
,
loc_pred
,
score_tgt
,
loc_tgt
,
bbox_weight
=
\
fluid
.
layers
.
rpn_target_assign
(
bbox_pred
=
rpn_bbox
,
cls_logits
=
rpn_cls
,
anchor_box
=
anchor
,
anchor_var
=
anchor_var
,
gt_boxes
=
gt_box
,
is_crowd
=
is_crowd
,
im_info
=
im_info
,
rpn_batch_size_per_im
=
self
.
rpn_target_assign
.
rpn_batch_size_per_im
,
rpn_straddle_thresh
=
self
.
rpn_target_assign
.
rpn_straddle_thresh
,
rpn_fg_fraction
=
self
.
rpn_target_assign
.
rpn_fg_fraction
,
rpn_positive_overlap
=
self
.
rpn_target_assign
.
rpn_positive_overlap
,
rpn_negative_overlap
=
self
.
rpn_target_assign
.
rpn_negative_overlap
,
use_random
=
self
.
rpn_target_assign
.
use_random
)
score_tgt
=
fluid
.
layers
.
cast
(
x
=
score_tgt
,
dtype
=
'float32'
)
score_tgt
.
stop_gradient
=
True
rpn_cls_loss
=
fluid
.
layers
.
sigmoid_cross_entropy_with_logits
(
x
=
score_pred
,
label
=
score_tgt
)
else
:
score_pred
,
loc_pred
,
score_tgt
,
loc_tgt
,
bbox_weight
=
\
self
.
rpn_target_assign
(
bbox_pred
=
rpn_bbox
,
cls_logits
=
rpn_cls
,
anchor_box
=
anchor
,
anchor_var
=
anchor_var
,
gt_boxes
=
gt_box
,
gt_labels
=
gt_label
,
is_crowd
=
is_crowd
,
num_classes
=
self
.
num_classes
,
im_info
=
im_info
)
labels_int64
=
fluid
.
layers
.
cast
(
x
=
score_tgt
,
dtype
=
'int64'
)
labels_int64
.
stop_gradient
=
True
rpn_cls_loss
=
fluid
.
layers
.
softmax_with_cross_entropy
(
logits
=
score_pred
,
label
=
labels_int64
,
numeric_stable_mode
=
True
)
rpn_cls_loss
=
fluid
.
layers
.
reduce_mean
(
rpn_cls_loss
,
name
=
'loss_rpn_cls'
)
loc_tgt
=
fluid
.
layers
.
cast
(
x
=
loc_tgt
,
dtype
=
'float32'
)
loc_tgt
.
stop_gradient
=
True
rpn_reg_loss
=
fluid
.
layers
.
smooth_l1
(
x
=
loc_pred
,
y
=
loc_tgt
,
sigma
=
3.0
,
inside_weight
=
bbox_weight
,
outside_weight
=
bbox_weight
)
rpn_reg_loss
=
fluid
.
layers
.
reduce_sum
(
rpn_reg_loss
,
name
=
'loss_rpn_bbox'
)
score_shape
=
fluid
.
layers
.
shape
(
score_tgt
)
score_shape
=
fluid
.
layers
.
cast
(
x
=
score_shape
,
dtype
=
'float32'
)
norm
=
fluid
.
layers
.
reduce_prod
(
score_shape
)
norm
.
stop_gradient
=
True
rpn_reg_loss
=
rpn_reg_loss
/
norm
return
{
'rpn_cls_loss'
:
rpn_cls_loss
,
'rpn_reg_loss'
:
rpn_reg_loss
}
hub_module/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/README.md
0 → 100644
浏览文件 @
3acfe6bd
```
shell
$
hub
install
faster_rcnn_resnet50_fpn_coco2017
==
1.0.0
```
## 命令行预测
```
hub run faster_rcnn_resnet50_fpn_coco2017 --input_path "/PATH/TO/IMAGE"
```
## API
```
def context(num_classes=81,
trainable=True,
pretrained=True,
phase='train')
```
提取头部特征,用于迁移学习。
**参数**
*
num
\_
classes (int): 类别数;
*
trainable(bool): 将参数的trainable 属性设为trainable;
*
pretrained (bool): 是否加载预训练模型;
*
phase (str): 可选值为 'train'/'predict','trian' 用于训练,'predict' 用于预测。
**返回**
*
inputs (dict): 模型的输入,keys 包括 'image', 'im
\_
size',相应的取值为:
*
image (Variable): 图像变量
*
im
\_
size (Variable): 图片的尺寸
*
outputs (dict): 模型的输出。如果 get
\_
prediction 为 False,输出 'head
\_
fatures',否则输出 'bbox
\_
out'。
*
context
\_
prog (Program): 用于迁移学习的 Program。
```
python
def
object_detection
(
paths
=
None
,
images
=
None
,
batch_size
=
1
,
use_gpu
=
False
,
output_dir
=
'detection_result'
,
score_thresh
=
0.5
,
visualization
=
True
)
```
预测API,检测输入图片中的所有目标的位置。
**参数**
*
paths (list
\[
str
\]
): 图片的路径;
*
images (list
\[
numpy.ndarray
\]
): 图片数据,ndarray.shape 为
\[
H, W, C
\]
,BGR格式;
*
batch
\_
size (int): batch 的大小;
*
use
\_
gpu (bool): 是否使用 GPU;
*
score
\_
thresh (float): 识别置信度的阈值;
*
visualization (bool): 是否将识别结果保存为图片文件;
*
output
\_
dir (str): 图片的保存路径,默认设为 detection
\_
result;
**返回**
*
res (list
\[
dict
\]
): 识别结果的列表,列表中每一个元素为 dict,各字段为:
*
data (list): 检测结果,list的每一个元素为 dict,各字段为:
*
confidence (float): 识别的置信度;
*
label (str): 标签;
*
left (int): 边界框的左上角x坐标;
*
top (int): 边界框的左上角y坐标;
*
right (int): 边界框的右下角x坐标;
*
bottom (int): 边界框的右下角y坐标;
*
save
\_
path (str, optional): 识别结果的保存路径 (仅当visualization=True时存在)。
```
python
def
save_inference_model
(
dirname
,
model_filename
=
None
,
params_filename
=
None
,
combined
=
True
)
```
将模型保存到指定路径。
**参数**
*
dirname: 存在模型的目录名称
*
model
\_
filename: 模型文件名称,默认为
\_\_
model
\_\_
*
params
\_
filename: 参数文件名称,默认为
\_\_
params
\_\_
(仅当
`combined`
为True时生效)
*
combined: 是否将参数保存到统一的一个文件中
## 代码示例
```
python
import
paddlehub
as
hub
import
cv2
object_detector
=
hub
.
Module
(
name
=
"faster_rcnn_resnet50_fpn_coco2017"
)
result
=
object_detector
.
object_detection
(
images
=
[
cv2
.
imread
(
'/PATH/TO/IMAGE'
)])
# or
# result = object_detector.object_detection((paths=['/PATH/TO/IMAGE'])
```
## 服务部署
PaddleHub Serving 可以部署一个目标检测的在线服务。
## 第一步:启动PaddleHub Serving
运行启动命令:
```
shell
$
hub serving start
-m
faster_rcnn_resnet50_fpn_coco2017
```
这样就完成了一个目标检测的服务化API的部署,默认端口号为8866。
**NOTE:**
如使用GPU预测,则需要在启动服务之前,请设置CUDA
\_
VISIBLE
\_
DEVICES环境变量,否则不用设置。
## 第二步:发送预测请求
配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果
```
python
import
requests
import
json
import
cv2
import
base64
def
cv2_to_base64
(
image
):
data
=
cv2
.
imencode
(
'.jpg'
,
image
)[
1
]
return
base64
.
b64encode
(
data
.
tostring
()).
decode
(
'utf8'
)
# 发送HTTP请求
data
=
{
'images'
:[
cv2_to_base64
(
cv2
.
imread
(
"/PATH/TO/IMAGE"
))]}
headers
=
{
"Content-type"
:
"application/json"
}
url
=
"http://127.0.0.1:8866/predict/faster_rcnn_resnet50_fpn_coco2017"
r
=
requests
.
post
(
url
=
url
,
headers
=
headers
,
data
=
json
.
dumps
(
data
))
# 打印预测结果
print
(
r
.
json
()[
"results"
])
```
### 依赖
paddlepaddle >= 1.6.2
paddlehub >= 1.6.0
hub_module/modules/image/object_detection/faster_rcnn/bbox_assigner.py
→
hub_module/modules/image/object_detection/faster_rcnn
_resnet50_fpn_coco2017
/bbox_assigner.py
浏览文件 @
3acfe6bd
文件已移动
hub_module/modules/image/object_detection/faster_rcnn/bbox_head.py
→
hub_module/modules/image/object_detection/faster_rcnn
_resnet50_fpn_coco2017
/bbox_head.py
浏览文件 @
3acfe6bd
...
@@ -53,7 +53,6 @@ class SmoothL1Loss(object):
...
@@ -53,7 +53,6 @@ class SmoothL1Loss(object):
class
BoxCoder
(
object
):
class
BoxCoder
(
object
):
# __op__ = fluid.layers.box_coder
def
__init__
(
self
,
def
__init__
(
self
,
prior_box_var
=
[
0.1
,
0.1
,
0.2
,
0.2
],
prior_box_var
=
[
0.1
,
0.1
,
0.2
,
0.2
],
code_type
=
'decode_center_size'
,
code_type
=
'decode_center_size'
,
...
...
hub_module/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/data_feed.py
0 → 100644
浏览文件 @
3acfe6bd
# coding=utf-8
from
__future__
import
absolute_import
from
__future__
import
print_function
from
__future__
import
division
import
os
from
collections
import
OrderedDict
import
cv2
import
numpy
as
np
from
PIL
import
Image
,
ImageEnhance
from
paddle
import
fluid
__all__
=
[
'test_reader'
]
def
test_reader
(
paths
=
None
,
images
=
None
):
"""
data generator
Args:
paths (list[str]): paths to images.
images (list(numpy.ndarray)): data of images, shape of each is [H, W, C]
Yield:
res (dict): key contains 'image', 'im_info', 'im_shape', the corresponding values is:
image (numpy.ndarray): the image to be fed into network
im_info (numpy.ndarray): the info about the preprocessed.
im_shape (numpy.ndarray): the shape of image.
"""
img_list
=
list
()
if
paths
:
for
img_path
in
paths
:
assert
os
.
path
.
isfile
(
img_path
),
"The {} isn't a valid file path."
.
format
(
img_path
)
img
=
cv2
.
imread
(
img_path
).
astype
(
'float32'
)
img_list
.
append
(
img
)
if
images
is
not
None
:
for
img
in
images
:
img_list
.
append
(
img
)
for
im
in
img_list
:
im
=
cv2
.
cvtColor
(
im
,
cv2
.
COLOR_BGR2RGB
)
im
=
im
.
astype
(
np
.
float32
,
copy
=
False
)
mean
=
[
0.485
,
0.456
,
0.406
]
std
=
[
0.229
,
0.224
,
0.225
]
mean
=
np
.
array
(
mean
)[
np
.
newaxis
,
np
.
newaxis
,
:]
std
=
np
.
array
(
std
)[
np
.
newaxis
,
np
.
newaxis
,
:]
im
=
im
/
255.0
im
-=
mean
im
/=
std
target_size
=
800
max_size
=
1333
shape
=
im
.
shape
# im_shape holds the original shape of image.
im_shape
=
np
.
array
([
shape
[
0
],
shape
[
1
],
1.0
]).
astype
(
'float32'
)
im_size_min
=
np
.
min
(
shape
[
0
:
2
])
im_size_max
=
np
.
max
(
shape
[
0
:
2
])
im_scale
=
float
(
target_size
)
/
float
(
im_size_min
)
if
np
.
round
(
im_scale
*
im_size_max
)
>
max_size
:
im_scale
=
float
(
max_size
)
/
float
(
im_size_max
)
resize_w
=
np
.
round
(
im_scale
*
float
(
shape
[
1
]))
resize_h
=
np
.
round
(
im_scale
*
float
(
shape
[
0
]))
# im_info holds the resize info of image.
im_info
=
np
.
array
([
resize_h
,
resize_w
,
im_scale
]).
astype
(
'float32'
)
im
=
cv2
.
resize
(
im
,
None
,
None
,
fx
=
im_scale
,
fy
=
im_scale
,
interpolation
=
cv2
.
INTER_LINEAR
)
# HWC --> CHW
im
=
np
.
swapaxes
(
im
,
1
,
2
)
im
=
np
.
swapaxes
(
im
,
1
,
0
)
yield
{
'image'
:
im
,
'im_info'
:
im_info
,
'im_shape'
:
im_shape
}
def
padding_minibatch
(
batch_data
,
coarsest_stride
=
0
,
use_padded_im_info
=
True
):
max_shape_org
=
np
.
array
(
[
data
[
'image'
].
shape
for
data
in
batch_data
]).
max
(
axis
=
0
)
if
coarsest_stride
>
0
:
max_shape
=
np
.
zeros
((
3
)).
astype
(
'int32'
)
max_shape
[
1
]
=
int
(
np
.
ceil
(
max_shape_org
[
1
]
/
coarsest_stride
)
*
coarsest_stride
)
max_shape
[
2
]
=
int
(
np
.
ceil
(
max_shape_org
[
2
]
/
coarsest_stride
)
*
coarsest_stride
)
else
:
max_shape
=
max_shape_org
.
astype
(
'int32'
)
padding_image
=
list
()
padding_info
=
list
()
padding_shape
=
list
()
for
data
in
batch_data
:
im_c
,
im_h
,
im_w
=
data
[
'image'
].
shape
# image
padding_im
=
np
.
zeros
((
im_c
,
max_shape
[
1
],
max_shape
[
2
]),
dtype
=
np
.
float32
)
padding_im
[:,
0
:
im_h
,
0
:
im_w
]
=
data
[
'image'
]
padding_image
.
append
(
padding_im
)
# im_info
data
[
'im_info'
][
0
]
=
max_shape
[
1
]
if
use_padded_im_info
else
max_shape_org
[
1
]
data
[
'im_info'
][
1
]
=
max_shape
[
2
]
if
use_padded_im_info
else
max_shape_org
[
2
]
padding_info
.
append
(
data
[
'im_info'
])
padding_shape
.
append
(
data
[
'im_shape'
])
padding_image
=
np
.
array
(
padding_image
).
astype
(
'float32'
)
padding_info
=
np
.
array
(
padding_info
).
astype
(
'float32'
)
padding_shape
=
np
.
array
(
padding_shape
).
astype
(
'float32'
)
return
padding_image
,
padding_info
,
padding_shape
hub_module/modules/image/object_detection/faster_rcnn_resnet50_fpn_coco2017/module.py
浏览文件 @
3acfe6bd
...
@@ -13,12 +13,19 @@ from math import ceil
...
@@ -13,12 +13,19 @@ from math import ceil
import
numpy
as
np
import
numpy
as
np
import
paddle.fluid
as
fluid
import
paddle.fluid
as
fluid
import
paddlehub
as
hub
import
paddlehub
as
hub
from
paddlehub.module.module
import
moduleinfo
,
runnable
from
paddlehub.module.module
import
moduleinfo
,
runnable
,
serving
from
paddle.fluid.core
import
PaddleTensor
,
AnalysisConfig
,
create_paddle_predictor
from
paddle.fluid.core
import
PaddleTensor
,
AnalysisConfig
,
create_paddle_predictor
from
paddlehub.io.parser
import
txt_parser
from
paddlehub.io.parser
import
txt_parser
from
paddlehub.common.paddle_helper
import
add_vars_prefix
from
faster_rcnn_resnet50_fpn_coco2017.processor
import
load_label_info
,
postprocess
,
base64_to_cv2
from
faster_rcnn_resnet50_fpn_coco2017.data_feed
import
test_reader
,
padding_minibatch
from
faster_rcnn_resnet50_fpn_coco2017.fpn
import
FPN
from
faster_rcnn_resnet50_fpn_coco2017.fpn
import
FPN
from
faster_rcnn_resnet50_fpn_coco2017.resnet
import
ResNet
,
ResNetC5
from
faster_rcnn_resnet50_fpn_coco2017.resnet
import
ResNet
from
faster_rcnn_resnet50_fpn_coco2017.rpn_head
import
AnchorGenerator
,
RPNTargetAssign
,
GenerateProposals
,
FPNRPNHead
from
faster_rcnn_resnet50_fpn_coco2017.bbox_head
import
MultiClassNMS
,
BBoxHead
,
TwoFCHead
from
faster_rcnn_resnet50_fpn_coco2017.bbox_assigner
import
BBoxAssigner
from
faster_rcnn_resnet50_fpn_coco2017.roi_extractor
import
FPNRoIAlign
@
moduleinfo
(
@
moduleinfo
(
...
@@ -28,18 +35,15 @@ from faster_rcnn_resnet50_fpn_coco2017.resnet import ResNet, ResNetC5
...
@@ -28,18 +35,15 @@ from faster_rcnn_resnet50_fpn_coco2017.resnet import ResNet, ResNetC5
summary
=
summary
=
"Baidu's Faster-RCNN model for object detection, whose backbone is ResNet50, processed with Feature Pyramid Networks"
,
"Baidu's Faster-RCNN model for object detection, whose backbone is ResNet50, processed with Feature Pyramid Networks"
,
author
=
"paddlepaddle"
,
author
=
"paddlepaddle"
,
author_email
=
"
paddle-dev@baidu.com
"
)
author_email
=
""
)
class
FasterRCNNResNet50RPN
(
hub
.
Module
):
class
FasterRCNNResNet50RPN
(
hub
.
Module
):
def
_initialize
(
self
):
def
_initialize
(
self
):
self
.
faster_rcnn
=
hub
.
Module
(
name
=
"faster_rcnn"
)
# default pretrained model, Faster-RCNN with backbone ResNet50, shape of input tensor is [3, 800, 1333]
# default pretrained model, Faster-RCNN with backbone ResNet50, shape of input tensor is [3, 800, 1333]
self
.
default_pretrained_model_path
=
os
.
path
.
join
(
self
.
default_pretrained_model_path
=
os
.
path
.
join
(
self
.
directory
,
"faster_rcnn_resnet50_fpn_model"
)
self
.
directory
,
"faster_rcnn_resnet50_fpn_model"
)
self
.
label_names
=
self
.
faster_rcnn
.
load_label_info
(
self
.
label_names
=
load_label_info
(
os
.
path
.
join
(
self
.
directory
,
"label_file.txt"
))
os
.
path
.
join
(
self
.
directory
,
"label_file.txt"
))
self
.
infer_prog
=
None
# self._set_config()
self
.
bbox_out
=
None
self
.
_set_config
()
def
_set_config
(
self
):
def
_set_config
(
self
):
"""
"""
...
@@ -67,18 +71,23 @@ class FasterRCNNResNet50RPN(hub.Module):
...
@@ -67,18 +71,23 @@ class FasterRCNNResNet50RPN(hub.Module):
trainable
=
True
,
trainable
=
True
,
pretrained
=
True
,
pretrained
=
True
,
phase
=
'train'
):
phase
=
'train'
):
"""Distill the Head Features, so as to perform transfer learning.
:param trainable: whether to set parameters trainable.
:type trainable: bool
:param pretrained: whether to load default pretrained model.
:type pretrained: bool
:param phase: Optional Choice: 'predict', 'train'
:type phase: str
"""
"""
wrapped_prog
=
fluid
.
Program
()
Distill the Head Features, so as to perform transfer learning.
Args:
trainable (bool): whether to set parameters trainable.
pretrained (bool): whether to load default pretrained model.
get_prediction (bool): whether to get prediction.
phase (str): optional choices are 'train' and 'predict'.
Returns:
inputs (dict): the input variables.
outputs (dict): the output variables.
context_prog (Program): the program to execute transfer learning.
"""
context_prog
=
fluid
.
Program
()
startup_program
=
fluid
.
Program
()
startup_program
=
fluid
.
Program
()
with
fluid
.
program_guard
(
wrapped
_prog
,
startup_program
):
with
fluid
.
program_guard
(
context
_prog
,
startup_program
):
with
fluid
.
unique_name
.
guard
():
with
fluid
.
unique_name
.
guard
():
image
=
fluid
.
layers
.
data
(
image
=
fluid
.
layers
.
data
(
name
=
'image'
,
shape
=
[
3
,
800
,
1333
],
dtype
=
'float32'
)
name
=
'image'
,
shape
=
[
3
,
800
,
1333
],
dtype
=
'float32'
)
...
@@ -89,25 +98,107 @@ class FasterRCNNResNet50RPN(hub.Module):
...
@@ -89,25 +98,107 @@ class FasterRCNNResNet50RPN(hub.Module):
feature_maps
=
[
2
,
3
,
4
,
5
],
feature_maps
=
[
2
,
3
,
4
,
5
],
freeze_at
=
2
)
freeze_at
=
2
)
body_feats
=
backbone
(
image
)
body_feats
=
backbone
(
image
)
# fpn
: FPN
# fpn
fpn
=
FPN
(
fpn
=
FPN
(
max_level
=
6
,
max_level
=
6
,
min_level
=
2
,
min_level
=
2
,
num_chan
=
256
,
num_chan
=
256
,
spatial_scale
=
[
0.03125
,
0.0625
,
0.125
,
0.25
])
spatial_scale
=
[
0.03125
,
0.0625
,
0.125
,
0.25
])
var_prefix
=
'@HUB_{}@'
.
format
(
self
.
name
)
im_info
=
fluid
.
layers
.
data
(
name
=
'im_info'
,
shape
=
[
3
],
dtype
=
'float32'
,
lod_level
=
0
)
im_shape
=
fluid
.
layers
.
data
(
name
=
'im_shape'
,
shape
=
[
3
],
dtype
=
'float32'
,
lod_level
=
0
)
body_feat_names
=
list
(
body_feats
.
keys
())
body_feats
,
spatial_scale
=
fpn
.
get_output
(
body_feats
)
# rpn_head: RPNHead
rpn_head
=
self
.
rpn_head
()
rois
=
rpn_head
.
get_proposals
(
body_feats
,
im_info
,
mode
=
phase
)
# train
if
phase
==
'train'
:
gt_bbox
=
fluid
.
layers
.
data
(
name
=
'gt_bbox'
,
shape
=
[
4
],
dtype
=
'float32'
,
lod_level
=
1
)
is_crowd
=
fluid
.
layers
.
data
(
name
=
'is_crowd'
,
shape
=
[
1
],
dtype
=
'int32'
,
lod_level
=
1
)
gt_class
=
fluid
.
layers
.
data
(
name
=
'gt_class'
,
shape
=
[
1
],
dtype
=
'int32'
,
lod_level
=
1
)
rpn_loss
=
rpn_head
.
get_loss
(
im_info
,
gt_bbox
,
is_crowd
)
# bbox_assigner: BBoxAssigner
bbox_assigner
=
self
.
bbox_assigner
(
num_classes
)
outs
=
fluid
.
layers
.
generate_proposal_labels
(
rpn_rois
=
rois
,
gt_classes
=
gt_class
,
is_crowd
=
is_crowd
,
gt_boxes
=
gt_bbox
,
im_info
=
im_info
,
batch_size_per_im
=
bbox_assigner
.
batch_size_per_im
,
fg_fraction
=
bbox_assigner
.
fg_fraction
,
fg_thresh
=
bbox_assigner
.
fg_thresh
,
bg_thresh_hi
=
bbox_assigner
.
bg_thresh_hi
,
bg_thresh_lo
=
bbox_assigner
.
bg_thresh_lo
,
bbox_reg_weights
=
bbox_assigner
.
bbox_reg_weights
,
class_nums
=
bbox_assigner
.
class_nums
,
use_random
=
bbox_assigner
.
use_random
)
rois
=
outs
[
0
]
roi_extractor
=
self
.
roi_extractor
()
roi_feat
=
roi_extractor
(
head_inputs
=
body_feats
,
rois
=
rois
,
spatial_scale
=
spatial_scale
)
# head_feat
bbox_head
=
self
.
bbox_head
(
num_classes
)
head_feat
=
bbox_head
.
head
(
roi_feat
)
if
isinstance
(
head_feat
,
OrderedDict
):
head_feat
=
list
(
head_feat
.
values
())[
0
]
if
phase
==
'train'
:
inputs
=
{
'image'
:
var_prefix
+
image
.
name
,
'im_info'
:
var_prefix
+
im_info
.
name
,
'im_shape'
:
var_prefix
+
im_shape
.
name
,
'gt_class'
:
var_prefix
+
gt_class
.
name
,
'gt_bbox'
:
var_prefix
+
gt_bbox
.
name
,
'is_crowd'
:
var_prefix
+
is_crowd
.
name
}
outputs
=
{
'head_feat'
:
var_prefix
+
head_feat
.
name
,
'rpn_cls_loss'
:
var_prefix
+
rpn_loss
[
'rpn_cls_loss'
].
name
,
'rpn_reg_loss'
:
var_prefix
+
rpn_loss
[
'rpn_reg_loss'
].
name
,
'generate_proposal_labels'
:
[
var_prefix
+
var
.
name
for
var
in
outs
]
}
elif
phase
==
'predict'
:
pred
=
bbox_head
.
get_prediction
(
roi_feat
,
rois
,
im_info
,
im_shape
)
inputs
=
{
'image'
:
var_prefix
+
image
.
name
,
'im_info'
:
var_prefix
+
im_info
.
name
,
'im_shape'
:
var_prefix
+
im_shape
.
name
}
outputs
=
{
'head_feat'
:
var_prefix
+
head_feat
.
name
,
'rois'
:
var_prefix
+
rois
.
name
,
'bbox_out'
:
var_prefix
+
pred
.
name
}
add_vars_prefix
(
context_prog
,
var_prefix
)
add_vars_prefix
(
startup_program
,
var_prefix
)
# Base Class
global_vars
=
context_prog
.
global_block
().
vars
inputs
,
outputs
,
context_prog
=
self
.
faster_rcnn
.
context
(
inputs
=
{
body_feats
=
body_feats
,
key
:
global_vars
[
value
]
fpn
=
fpn
,
for
key
,
value
in
inputs
.
items
()
rpn_head
=
self
.
rpn_head
(),
}
roi_extractor
=
self
.
roi_extractor
(),
outputs
=
{
bbox_head
=
self
.
bbox_head
(
num_classes
),
key
:
global_vars
[
value
]
if
not
isinstance
(
value
,
list
)
else
bbox_assigner
=
self
.
bbox_assigner
(
num_classes
),
[
global_vars
[
var
]
for
var
in
value
]
image
=
image
,
for
key
,
value
in
outputs
.
items
()
trainable
=
trainable
,
}
var_prefix
=
'@HUB_{}@'
.
format
(
self
.
name
),
phase
=
phase
)
for
param
in
context_prog
.
global_block
().
iter_parameters
():
param
.
trainable
=
trainable
place
=
fluid
.
CPUPlace
()
place
=
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
place
)
exe
=
fluid
.
Executor
(
place
)
...
@@ -127,24 +218,24 @@ class FasterRCNNResNet50RPN(hub.Module):
...
@@ -127,24 +218,24 @@ class FasterRCNNResNet50RPN(hub.Module):
return
inputs
,
outputs
,
context_prog
return
inputs
,
outputs
,
context_prog
def
rpn_head
(
self
):
def
rpn_head
(
self
):
return
self
.
faster_rcnn
.
FPNRPNHead
(
return
FPNRPNHead
(
anchor_generator
=
self
.
faster_rcnn
.
AnchorGenerator
(
anchor_generator
=
AnchorGenerator
(
anchor_sizes
=
[
32
,
64
,
128
,
256
,
512
],
anchor_sizes
=
[
32
,
64
,
128
,
256
,
512
],
aspect_ratios
=
[
0.5
,
1.0
,
2.0
],
aspect_ratios
=
[
0.5
,
1.0
,
2.0
],
stride
=
[
16.0
,
16.0
],
stride
=
[
16.0
,
16.0
],
variance
=
[
1.0
,
1.0
,
1.0
,
1.0
]),
variance
=
[
1.0
,
1.0
,
1.0
,
1.0
]),
rpn_target_assign
=
self
.
faster_rcnn
.
RPNTargetAssign
(
rpn_target_assign
=
RPNTargetAssign
(
rpn_batch_size_per_im
=
256
,
rpn_batch_size_per_im
=
256
,
rpn_fg_fraction
=
0.5
,
rpn_fg_fraction
=
0.5
,
rpn_negative_overlap
=
0.3
,
rpn_negative_overlap
=
0.3
,
rpn_positive_overlap
=
0.7
,
rpn_positive_overlap
=
0.7
,
rpn_straddle_thresh
=
0.0
),
rpn_straddle_thresh
=
0.0
),
train_proposal
=
self
.
faster_rcnn
.
GenerateProposals
(
train_proposal
=
GenerateProposals
(
min_size
=
0.0
,
min_size
=
0.0
,
nms_thresh
=
0.7
,
nms_thresh
=
0.7
,
post_nms_top_n
=
2000
,
post_nms_top_n
=
2000
,
pre_nms_top_n
=
2000
),
pre_nms_top_n
=
2000
),
test_proposal
=
self
.
faster_rcnn
.
GenerateProposals
(
test_proposal
=
GenerateProposals
(
min_size
=
0.0
,
min_size
=
0.0
,
nms_thresh
=
0.7
,
nms_thresh
=
0.7
,
post_nms_top_n
=
1000
,
post_nms_top_n
=
1000
,
...
@@ -155,7 +246,7 @@ class FasterRCNNResNet50RPN(hub.Module):
...
@@ -155,7 +246,7 @@ class FasterRCNNResNet50RPN(hub.Module):
max_level
=
6
)
max_level
=
6
)
def
roi_extractor
(
self
):
def
roi_extractor
(
self
):
return
self
.
faster_rcnn
.
FPNRoIAlign
(
return
FPNRoIAlign
(
canconical_level
=
4
,
canconical_level
=
4
,
canonical_size
=
224
,
canonical_size
=
224
,
max_level
=
5
,
max_level
=
5
,
...
@@ -164,14 +255,14 @@ class FasterRCNNResNet50RPN(hub.Module):
...
@@ -164,14 +255,14 @@ class FasterRCNNResNet50RPN(hub.Module):
sampling_ratio
=
2
)
sampling_ratio
=
2
)
def
bbox_head
(
self
,
num_classes
):
def
bbox_head
(
self
,
num_classes
):
return
self
.
faster_rcnn
.
BBoxHead
(
return
BBoxHead
(
head
=
self
.
faster_rcnn
.
TwoFCHead
(
mlp_dim
=
1024
),
head
=
TwoFCHead
(
mlp_dim
=
1024
),
nms
=
self
.
faster_rcnn
.
MultiClassNMS
(
nms
=
MultiClassNMS
(
keep_top_k
=
100
,
nms_threshold
=
0.5
,
score_threshold
=
0.05
),
keep_top_k
=
100
,
nms_threshold
=
0.5
,
score_threshold
=
0.05
),
num_classes
=
num_classes
)
num_classes
=
num_classes
)
def
bbox_assigner
(
self
,
num_classes
):
def
bbox_assigner
(
self
,
num_classes
):
return
self
.
faster_rcnn
.
BBoxAssigner
(
return
BBoxAssigner
(
batch_size_per_im
=
512
,
batch_size_per_im
=
512
,
bbox_reg_weights
=
[
0.1
,
0.1
,
0.2
,
0.2
],
bbox_reg_weights
=
[
0.1
,
0.1
,
0.2
,
0.2
],
bg_thresh_hi
=
0.5
,
bg_thresh_hi
=
0.5
,
...
@@ -183,6 +274,7 @@ class FasterRCNNResNet50RPN(hub.Module):
...
@@ -183,6 +274,7 @@ class FasterRCNNResNet50RPN(hub.Module):
def
object_detection
(
self
,
def
object_detection
(
self
,
paths
=
None
,
paths
=
None
,
images
=
None
,
images
=
None
,
data
=
None
,
use_gpu
=
False
,
use_gpu
=
False
,
batch_size
=
1
,
batch_size
=
1
,
output_dir
=
'detection_result'
,
output_dir
=
'detection_result'
,
...
@@ -190,37 +282,49 @@ class FasterRCNNResNet50RPN(hub.Module):
...
@@ -190,37 +282,49 @@ class FasterRCNNResNet50RPN(hub.Module):
visualization
=
True
):
visualization
=
True
):
"""API of Object Detection.
"""API of Object Detection.
:param paths: the path of images.
Args:
:type paths: list, each element is correspond to the path of an image.
paths (list[str]): The paths of images.
:param images: data of images, [N, H, W, C]
images (list(numpy.ndarray)): images data, shape of each is [H, W, C]
:type images: numpy.ndarray
batch_size (int): batch size.
:param use_gpu: whether to use gpu or not.
use_gpu (bool): Whether to use gpu.
:type use_gpu: bool
output_dir (str): The path to store output images.
:param batch_size: bathc size.
visualization (bool): Whether to save image or not.
:type batch_size: int
score_thresh (float): threshold for object detecion.
:param output_dir: the directory to store the detection result.
:type output_dir: str
Returns:
:param score_thresh: the threshold of detection confidence.
res (list[dict]): The result of coco2017 detecion. keys include 'data', 'save_path', the corresponding value is:
:type score_thresh: float
data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is:
:param visualization: whether to draw box and save images.
left (float): The X coordinate of the upper left corner of the bounding box;
:type visualization: bool
top (float): The Y coordinate of the upper left corner of the bounding box;
right (float): The X coordinate of the lower right corner of the bounding box;
bottom (float): The Y coordinate of the lower right corner of the bounding box;
label (str): The label of detection result;
confidence (float): The confidence of detection result.
save_path (str, optional): The path to save output images.
"""
"""
all_images
=
[]
paths
=
paths
if
paths
else
list
()
paths
=
paths
if
paths
else
[]
if
data
and
'image'
in
data
:
for
yield_data
in
self
.
faster_rcnn
.
test_reader
(
paths
,
images
):
paths
+=
data
[
'image'
]
all_images
=
list
()
for
yield_data
in
test_reader
(
paths
,
images
):
all_images
.
append
(
yield_data
)
all_images
.
append
(
yield_data
)
images_num
=
len
(
all_images
)
images_num
=
len
(
all_images
)
loop_num
=
ceil
(
images_num
/
batch_size
)
loop_num
=
ceil
(
images_num
/
batch_size
)
res
=
[]
res
=
[]
for
iter_id
in
range
(
loop_num
):
for
iter_id
in
range
(
loop_num
):
batch_data
=
[]
batch_data
=
[]
handle_id
=
iter_id
*
batch_size
handle_id
=
iter_id
*
batch_size
for
image_id
in
range
(
batch_size
):
for
image_id
in
range
(
batch_size
):
try
:
try
:
batch_data
.
append
(
all_images
[
handle_id
+
image_id
])
batch_data
.
append
(
all_images
[
handle_id
+
image_id
])
except
:
except
:
pass
pass
padding_image
,
padding_info
,
padding_shape
=
self
.
faster_rcnn
.
padding_minibatch
(
padding_image
,
padding_info
,
padding_shape
=
padding_minibatch
(
batch_data
,
coarsest_stride
=
32
,
use_padded_im_info
=
True
)
batch_data
,
coarsest_stride
=
32
,
use_padded_im_info
=
True
)
padding_image_tensor
=
PaddleTensor
(
padding_image
.
copy
())
padding_image_tensor
=
PaddleTensor
(
padding_image
.
copy
())
padding_info_tensor
=
PaddleTensor
(
padding_info
.
copy
())
padding_info_tensor
=
PaddleTensor
(
padding_info
.
copy
())
...
@@ -228,12 +332,13 @@ class FasterRCNNResNet50RPN(hub.Module):
...
@@ -228,12 +332,13 @@ class FasterRCNNResNet50RPN(hub.Module):
feed_list
=
[
feed_list
=
[
padding_image_tensor
,
padding_info_tensor
,
padding_shape_tensor
padding_image_tensor
,
padding_info_tensor
,
padding_shape_tensor
]
]
if
use_gpu
:
if
use_gpu
:
data_out
=
self
.
gpu_predictor
.
run
(
feed_list
)
data_out
=
self
.
gpu_predictor
.
run
(
feed_list
)
else
:
else
:
data_out
=
self
.
cpu_predictor
.
run
(
feed_list
)
data_out
=
self
.
cpu_predictor
.
run
(
feed_list
)
output
=
self
.
faster_rcnn
.
postprocess
(
output
=
postprocess
(
paths
=
paths
,
paths
=
paths
,
images
=
images
,
images
=
images
,
data_out
=
data_out
,
data_out
=
data_out
,
...
@@ -243,6 +348,7 @@ class FasterRCNNResNet50RPN(hub.Module):
...
@@ -243,6 +348,7 @@ class FasterRCNNResNet50RPN(hub.Module):
handle_id
=
handle_id
,
handle_id
=
handle_id
,
visualization
=
visualization
)
visualization
=
visualization
)
res
+=
output
res
+=
output
return
res
return
res
def
add_module_config_arg
(
self
):
def
add_module_config_arg
(
self
):
...
@@ -269,7 +375,7 @@ class FasterRCNNResNet50RPN(hub.Module):
...
@@ -269,7 +375,7 @@ class FasterRCNNResNet50RPN(hub.Module):
'--input_path'
,
type
=
str
,
default
=
None
,
help
=
"input data"
)
'--input_path'
,
type
=
str
,
default
=
None
,
help
=
"input data"
)
self
.
arg_input_group
.
add_argument
(
self
.
arg_input_group
.
add_argument
(
'--input_
path
'
,
'--input_
file
'
,
type
=
str
,
type
=
str
,
default
=
None
,
default
=
None
,
help
=
"file contain input data"
)
help
=
"file contain input data"
)
...
@@ -285,6 +391,15 @@ class FasterRCNNResNet50RPN(hub.Module):
...
@@ -285,6 +391,15 @@ class FasterRCNNResNet50RPN(hub.Module):
input_data
=
txt_parser
.
parse
(
args
.
input_file
,
use_strip
=
True
)
input_data
=
txt_parser
.
parse
(
args
.
input_file
,
use_strip
=
True
)
return
input_data
return
input_data
@
serving
def
serving_method
(
self
,
images
,
**
kwargs
):
"""
Run as a service.
"""
images_decode
=
[
base64_to_cv2
(
image
)
for
image
in
images
]
results
=
self
.
object_detection
(
images_decode
,
**
kwargs
)
return
results
@
runnable
@
runnable
def
run_cmd
(
self
,
argvs
):
def
run_cmd
(
self
,
argvs
):
self
.
parser
=
argparse
.
ArgumentParser
(
self
.
parser
=
argparse
.
ArgumentParser
(
...
...
hub_module/modules/image/object_detection/
ssd
/processor.py
→
hub_module/modules/image/object_detection/
faster_rcnn_resnet50_fpn_coco2017
/processor.py
浏览文件 @
3acfe6bd
# coding=utf-8
# coding=utf-8
import
base64
import
os
import
os
import
cv2
import
numpy
as
np
import
numpy
as
np
from
PIL
import
Image
,
ImageDraw
from
PIL
import
Image
,
ImageDraw
__all__
=
[
__all__
=
[
'get_save_image_name'
,
'draw_bounding_box_on_image'
,
'clip_bbox'
,
'base64_to_cv2'
,
'load_label_info'
'load_label_info'
,
'postprocess'
,
]
]
def
base64_to_cv2
(
b64str
):
data
=
base64
.
b64decode
(
b64str
.
encode
(
'utf8'
))
data
=
np
.
fromstring
(
data
,
np
.
uint8
)
data
=
cv2
.
imdecode
(
data
,
cv2
.
IMREAD_COLOR
)
return
data
def
get_save_image_name
(
img
,
output_dir
,
image_path
):
def
get_save_image_name
(
img
,
output_dir
,
image_path
):
"""Get save image name from source image path.
"""Get save image name from source image path.
"""
"""
...
@@ -62,7 +72,6 @@ def draw_bounding_box_on_image(image_path, data_list, save_dir):
...
@@ -62,7 +72,6 @@ def draw_bounding_box_on_image(image_path, data_list, save_dir):
os
.
remove
(
save_name
)
os
.
remove
(
save_name
)
image
.
save
(
save_name
)
image
.
save
(
save_name
)
return
save_name
return
save_name
...
@@ -91,28 +100,34 @@ def postprocess(paths,
...
@@ -91,28 +100,34 @@ def postprocess(paths,
output_dir
,
output_dir
,
handle_id
,
handle_id
,
visualization
=
True
):
visualization
=
True
):
"""postprocess the lod_tensor produced by fluid.Executor.run
"""
postprocess the lod_tensor produced by fluid.Executor.run
:param paths: the path of images.
:type paths: list, each element is a str
Args:
:param images: data of images, [N, H, W, C]
paths (list[str]): the path of images.
:type images: numpy.ndarray
images (list(numpy.ndarray)): list of images, shape of each is [H, W, C].
:param data_out: data produced by executor.run
data_out (lod_tensor): data produced by executor.run.
:type data_out: lod_tensor
score_thresh (float): the low limit of bounding box.
:param score_thresh: the low limit of bounding box.
label_names (list[str]): label names.
:type score_thresh: float
output_dir (str): output directory.
:param label_names: label names
handle_id (int): The number of images that have been handled.
:type label_names: list
visualization (bool): whether to save as images.
:param output_dir: output directory.
:type output_dir: str
Returns:
:param handle_id: The number of images that have been handled.
res (list[dict]): The result of vehicles detecion. keys include 'data', 'save_path', the corresponding value is:
:type handle_id: int
data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is:
:param visualization: whether to draw bbox.
left (float): The X coordinate of the upper left corner of the bounding box;
:param visualization: bool
top (float): The Y coordinate of the upper left corner of the bounding box;
right (float): The X coordinate of the lower right corner of the bounding box;
bottom (float): The Y coordinate of the lower right corner of the bounding box;
label (str): The label of detection result;
confidence (float): The confidence of detection result.
save_path (str): The path to save output images.
"""
"""
lod_tensor
=
data_out
[
0
]
lod_tensor
=
data_out
[
0
]
lod
=
lod_tensor
.
lod
[
0
]
lod
=
lod_tensor
.
lod
[
0
]
results
=
lod_tensor
.
as_ndarray
()
results
=
lod_tensor
.
as_ndarray
()
if
handle_id
<
len
(
paths
):
if
handle_id
<
len
(
paths
):
unhandled_paths
=
paths
[
handle_id
:]
unhandled_paths
=
paths
[
handle_id
:]
unhandled_paths_num
=
len
(
unhandled_paths
)
unhandled_paths_num
=
len
(
unhandled_paths
)
...
@@ -146,10 +161,6 @@ def postprocess(paths,
...
@@ -146,10 +161,6 @@ def postprocess(paths,
category_id
=
int
(
row
[
0
])
category_id
=
int
(
row
[
0
])
confidence
=
row
[
1
]
confidence
=
row
[
1
]
bbox
=
row
[
2
:]
bbox
=
row
[
2
:]
bbox
[
0
]
=
bbox
[
0
]
*
org_img_width
bbox
[
1
]
=
bbox
[
1
]
*
org_img_height
bbox
[
2
]
=
bbox
[
2
]
*
org_img_width
bbox
[
3
]
=
bbox
[
3
]
*
org_img_height
dt
=
{}
dt
=
{}
dt
[
'label'
]
=
label_names
[
category_id
]
dt
[
'label'
]
=
label_names
[
category_id
]
dt
[
'confidence'
]
=
confidence
dt
[
'confidence'
]
=
confidence
...
...
hub_module/modules/image/object_detection/faster_rcnn/roi_extractor.py
→
hub_module/modules/image/object_detection/faster_rcnn
_resnet50_fpn_coco2017
/roi_extractor.py
浏览文件 @
3acfe6bd
# coding=utf-8
# coding=utf-8
import
paddle.fluid
as
fluid
import
paddle.fluid
as
fluid
__all__
=
[
'RoIAlign'
,
'FPNRoIAlign'
]
__all__
=
[
'FPNRoIAlign'
]
class
RoIAlign
(
object
):
# __op__ = fluid.layers.roi_align
def
__init__
(
self
,
resolution
=
7
,
spatial_scale
=
0.0625
,
sampling_ratio
=
0
):
super
(
RoIAlign
,
self
).
__init__
()
if
isinstance
(
resolution
,
int
):
resolution
=
[
resolution
,
resolution
]
self
.
pooled_height
=
resolution
[
0
]
self
.
pooled_width
=
resolution
[
1
]
self
.
spatial_scale
=
spatial_scale
self
.
sampling_ratio
=
sampling_ratio
class
FPNRoIAlign
(
object
):
class
FPNRoIAlign
(
object
):
...
...
hub_module/modules/image/object_detection/faster_rcnn/rpn_head.py
→
hub_module/modules/image/object_detection/faster_rcnn
_resnet50_fpn_coco2017
/rpn_head.py
浏览文件 @
3acfe6bd
文件已移动
hub_module/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/README.md
0 → 100644
浏览文件 @
3acfe6bd
```
shell
$
hub
install
retinanet_resnet50_fpn_coco2017
==
1.0.0
```
## 命令行预测
```
hub run retinanet_resnet50_fpn_coco2017 --input_path "/PATH/TO/IMAGE"
```
## API
```
def context(trainable=True,
pretrained=True,
get_prediction=False)
```
特征提取,用于迁移学习。
**参数**
*
trainable(bool): 将参数的trainable属性设为trainable;
*
pretrained (bool): 是否加载预训练模型;
*
get
\_
prediction (bool): 是否执行预测。
**返回**
*
inputs (dict): 模型的输入,keys 包括 'image', 'im
\_
size',相应的取值为:
*
image (Variable): 图像变量
*
im
\_
size (Variable): 图片的尺寸
*
outputs (dict): 模型的输出。如果 get
\_
prediction 为 False,输出 'head
\_
fatures',否则输出 'bbox
\_
out'。
*
context
\_
prog (Program): 用于迁移学习的 Program.
```
python
def
object_detection
(
paths
=
None
,
images
=
None
,
batch_size
=
1
,
use_gpu
=
False
,
output_dir
=
'detection_result'
,
score_thresh
=
0.5
,
visualization
=
True
)
```
预测API,检测输入图片中的所有目标的位置。
**参数**
*
paths (list
\[
str
\]
): 图片的路径;
*
images (list
\[
numpy.ndarray
\]
): 图片数据,ndarray.shape 为
\[
H, W, C
\]
,BGR格式;
*
batch
\_
size (int): batch 的大小;
*
use
\_
gpu (bool): 是否使用 GPU;
*
score
\_
thresh (float): 识别置信度的阈值;
*
visualization (bool): 是否将识别结果保存为图片文件;
*
output
\_
dir (str): 图片的保存路径,默认设为 detection
\_
result;
**返回**
*
res (list
\[
dict
\]
): 识别结果的列表,列表中每一个元素为 dict,各字段为:
*
data (list): 检测结果,list的每一个元素为 dict,各字段为:
*
confidence (float): 识别的置信度;
*
label (str): 标签;
*
left (int): 边界框的左上角x坐标;
*
top (int): 边界框的左上角y坐标;
*
right (int): 边界框的右下角x坐标;
*
bottom (int): 边界框的右下角y坐标;
*
save
\_
path (str, optional): 识别结果的保存路径 (仅当visualization=True时存在)。
```
python
def
save_inference_model
(
dirname
,
model_filename
=
None
,
params_filename
=
None
,
combined
=
True
)
```
将模型保存到指定路径。
**参数**
*
dirname: 存在模型的目录名称
*
model
\_
filename: 模型文件名称,默认为
\_\_
model
\_\_
*
params
\_
filename: 参数文件名称,默认为
\_\_
params
\_\_
(仅当
`combined`
为True时生效)
*
combined: 是否将参数保存到统一的一个文件中
## 代码示例
```
python
import
paddlehub
as
hub
import
cv2
object_detector
=
hub
.
Module
(
name
=
"retinanet_resnet50_fpn_coco2017"
)
result
=
object_detector
.
object_detection
(
images
=
[
cv2
.
imread
(
'/PATH/TO/IMAGE'
)])
# or
# result = object_detector.object_detection((paths=['/PATH/TO/IMAGE'])
```
## 服务部署
PaddleHub Serving可以部署一个目标检测的在线服务。
## 第一步:启动PaddleHub Serving
运行启动命令:
```
shell
$
hub serving start
-m
retinanet_resnet50_fpn_coco2017
```
这样就完成了一个目标检测的服务化API的部署,默认端口号为8866。
**NOTE:**
如使用GPU预测,则需要在启动服务之前,请设置CUDA
\_
VISIBLE
\_
DEVICES环境变量,否则不用设置。
## 第二步:发送预测请求
配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果
```
python
import
requests
import
json
import
cv2
import
base64
def
cv2_to_base64
(
image
):
data
=
cv2
.
imencode
(
'.jpg'
,
image
)[
1
]
return
base64
.
b64encode
(
data
.
tostring
()).
decode
(
'utf8'
)
# 发送HTTP请求
data
=
{
'images'
:[
cv2_to_base64
(
cv2
.
imread
(
"/PATH/TO/IMAGE"
))]}
headers
=
{
"Content-type"
:
"application/json"
}
url
=
"http://127.0.0.1:8866/predict/retinanet_resnet50_fpn_coco2017"
r
=
requests
.
post
(
url
=
url
,
headers
=
headers
,
data
=
json
.
dumps
(
data
))
# 打印预测结果
print
(
r
.
json
()[
"results"
])
```
### 依赖
paddlepaddle >= 1.6.2
paddlehub >= 1.6.0
hub_module/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/data_feed.py
浏览文件 @
3acfe6bd
...
@@ -15,13 +15,19 @@ __all__ = ['test_reader', 'padding_minibatch']
...
@@ -15,13 +15,19 @@ __all__ = ['test_reader', 'padding_minibatch']
def
test_reader
(
paths
=
None
,
images
=
None
):
def
test_reader
(
paths
=
None
,
images
=
None
):
"""data generator
:param paths: path to images.
:type paths: list, each element is a str
:param images: data of images, [N, H, W, C]
:type images: numpy.ndarray
"""
"""
img_list
=
[]
data generator
Args:
paths (list[str]): paths to images.
images (list(numpy.ndarray)): data of images, shape of each is [H, W, C]
Yield:
res (dict): key contains 'image' and 'im_info', the corresponding values is:
image (numpy.ndarray): the image to be fed into network
im_info (numpy.ndarray): the info about the preprocessed.
"""
img_list
=
list
()
if
paths
:
if
paths
:
for
img_path
in
paths
:
for
img_path
in
paths
:
assert
os
.
path
.
isfile
(
assert
os
.
path
.
isfile
(
...
@@ -83,9 +89,9 @@ def padding_minibatch(batch_data, coarsest_stride=0, use_padded_im_info=True):
...
@@ -83,9 +89,9 @@ def padding_minibatch(batch_data, coarsest_stride=0, use_padded_im_info=True):
else
:
else
:
max_shape
=
max_shape_org
.
astype
(
'int32'
)
max_shape
=
max_shape_org
.
astype
(
'int32'
)
padding_image
=
[]
padding_image
=
list
()
padding_info
=
[]
padding_info
=
list
()
padding_shape
=
[]
padding_shape
=
list
()
for
data
in
batch_data
:
for
data
in
batch_data
:
im_c
,
im_h
,
im_w
=
data
[
'image'
].
shape
im_c
,
im_h
,
im_w
=
data
[
'image'
].
shape
...
...
hub_module/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/module.py
浏览文件 @
3acfe6bd
...
@@ -11,13 +11,13 @@ from functools import partial
...
@@ -11,13 +11,13 @@ from functools import partial
import
numpy
as
np
import
numpy
as
np
import
paddle.fluid
as
fluid
import
paddle.fluid
as
fluid
import
paddlehub
as
hub
import
paddlehub
as
hub
from
paddlehub.module.module
import
moduleinfo
,
runnable
from
paddlehub.module.module
import
moduleinfo
,
runnable
,
serving
from
paddle.fluid.core
import
PaddleTensor
,
AnalysisConfig
,
create_paddle_predictor
from
paddle.fluid.core
import
PaddleTensor
,
AnalysisConfig
,
create_paddle_predictor
from
paddlehub.io.parser
import
txt_parser
from
paddlehub.io.parser
import
txt_parser
from
retinanet_resnet50_fpn_coco2017.fpn
import
FPN
from
retinanet_resnet50_fpn_coco2017.fpn
import
FPN
from
retinanet_resnet50_fpn_coco2017.retina_head
import
AnchorGenerator
,
RetinaTargetAssign
,
RetinaOutputDecoder
,
RetinaHead
from
retinanet_resnet50_fpn_coco2017.retina_head
import
AnchorGenerator
,
RetinaTargetAssign
,
RetinaOutputDecoder
,
RetinaHead
from
retinanet_resnet50_fpn_coco2017.processor
import
load_label_info
,
postprocess
from
retinanet_resnet50_fpn_coco2017.processor
import
load_label_info
,
postprocess
,
base64_to_cv2
from
retinanet_resnet50_fpn_coco2017.data_feed
import
test_reader
,
padding_minibatch
from
retinanet_resnet50_fpn_coco2017.data_feed
import
test_reader
,
padding_minibatch
from
retinanet_resnet50_fpn_coco2017.resnet
import
ResNet
from
retinanet_resnet50_fpn_coco2017.resnet
import
ResNet
...
@@ -29,7 +29,7 @@ from retinanet_resnet50_fpn_coco2017.resnet import ResNet
...
@@ -29,7 +29,7 @@ from retinanet_resnet50_fpn_coco2017.resnet import ResNet
summary
=
summary
=
"Baidu's RetinaNet model for object detection, with backbone ResNet50 and FPN."
,
"Baidu's RetinaNet model for object detection, with backbone ResNet50 and FPN."
,
author
=
"paddlepaddle"
,
author
=
"paddlepaddle"
,
author_email
=
"
paddle-dev@baidu.com
"
)
author_email
=
""
)
class
RetinaNetResNet50FPN
(
hub
.
Module
):
class
RetinaNetResNet50FPN
(
hub
.
Module
):
def
_initialize
(
self
):
def
_initialize
(
self
):
# default pretrained model of Retinanet_ResNet50_FPN, the shape of input image tensor is (3, 608, 608)
# default pretrained model of Retinanet_ResNet50_FPN, the shape of input image tensor is (3, 608, 608)
...
@@ -69,16 +69,19 @@ class RetinaNetResNet50FPN(hub.Module):
...
@@ -69,16 +69,19 @@ class RetinaNetResNet50FPN(hub.Module):
trainable
=
True
,
trainable
=
True
,
pretrained
=
True
,
pretrained
=
True
,
get_prediction
=
False
):
get_prediction
=
False
):
"""Distill the Head Features, so as to perform transfer learning.
"""
Distill the Head Features, so as to perform transfer learning.
Args:
num_classes (int): number of classes.
trainable (bool): whether to set parameters trainable.
pretrained (bool): whether to load default pretrained model.
get_prediction (bool): whether to get prediction.
:param trainable: whether to set parameters trainable.
Returns:
:type trainable: bool
inputs(dict): the input variables.
:param pretrained: whether to load default pretrained model.
outputs(dict): the output variables.
:type pretrained: bool
context_prog (Program): the program to execute transfer learning.
:param get_prediction: whether to get prediction,
if True, outputs is {'bbox_out': bbox_out},
if False, outputs is {'head_features': head_features}.
:type get_prediction: bool
"""
"""
context_prog
=
fluid
.
Program
()
context_prog
=
fluid
.
Program
()
startup_program
=
fluid
.
Program
()
startup_program
=
fluid
.
Program
()
...
@@ -166,31 +169,38 @@ class RetinaNetResNet50FPN(hub.Module):
...
@@ -166,31 +169,38 @@ class RetinaNetResNet50FPN(hub.Module):
score_thresh
=
0.5
,
score_thresh
=
0.5
,
visualization
=
True
):
visualization
=
True
):
"""API of Object Detection.
"""API of Object Detection.
:param paths: the path of images.
:type paths: list, each element is correspond to the path of an image.
Args:
:param images: data of images, [N, H, W, C]
paths (list[str]): The paths of images.
:type images: numpy.ndarray
images (list(numpy.ndarray)): images data, shape of each is [H, W, C]
:param use_gpu: whether to use gpu or not.
batch_size (int): batch size.
:type use_gpu: bool
use_gpu (bool): Whether to use gpu.
:param batch_size: bathc size.
output_dir (str): The path to store output images.
:type batch_size: int
visualization (bool): Whether to save image or not.
:param output_dir: the directory to store the detection result.
score_thresh (float): threshold for object detecion.
:type output_dir: str
visualization (bool): whether to save result as images.
:param score_thresh: the threshold of detection confidence.
:type score_thresh: float
Returns:
:param visualization: whether to draw bounding box and save images.
res (list[dict]): The result of coco2017 detecion. keys include 'data', 'save_path', the corresponding value is:
:type visualization: bool
data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is:
left (float): The X coordinate of the upper left corner of the bounding box;
top (float): The Y coordinate of the upper left corner of the bounding box;
right (float): The X coordinate of the lower right corner of the bounding box;
bottom (float): The Y coordinate of the lower right corner of the bounding box;
label (str): The label of detection result;
confidence (float): The confidence of detection result.
save_path (str, optional): The path to save output images.
"""
"""
all_images
=
[]
all_images
=
list
()
paths
=
paths
if
paths
else
[]
paths
=
paths
if
paths
else
list
()
for
yield_data
in
test_reader
(
paths
,
images
):
for
yield_data
in
test_reader
(
paths
,
images
):
all_images
.
append
(
yield_data
)
all_images
.
append
(
yield_data
)
images_num
=
len
(
all_images
)
images_num
=
len
(
all_images
)
loop_num
=
int
(
np
.
ceil
(
images_num
/
batch_size
))
loop_num
=
int
(
np
.
ceil
(
images_num
/
batch_size
))
res
=
[]
res
=
list
()
for
iter_id
in
range
(
loop_num
):
for
iter_id
in
range
(
loop_num
):
batch_data
=
[]
batch_data
=
list
()
handle_id
=
iter_id
*
batch_size
handle_id
=
iter_id
*
batch_size
for
image_id
in
range
(
batch_size
):
for
image_id
in
range
(
batch_size
):
try
:
try
:
...
@@ -248,7 +258,7 @@ class RetinaNetResNet50FPN(hub.Module):
...
@@ -248,7 +258,7 @@ class RetinaNetResNet50FPN(hub.Module):
help
=
"file contain input data"
)
help
=
"file contain input data"
)
def
check_input_data
(
self
,
args
):
def
check_input_data
(
self
,
args
):
input_data
=
[]
input_data
=
list
()
if
args
.
input_path
:
if
args
.
input_path
:
input_data
=
[
args
.
input_path
]
input_data
=
[
args
.
input_path
]
elif
args
.
input_file
:
elif
args
.
input_file
:
...
@@ -258,6 +268,15 @@ class RetinaNetResNet50FPN(hub.Module):
...
@@ -258,6 +268,15 @@ class RetinaNetResNet50FPN(hub.Module):
input_data
=
txt_parser
.
parse
(
args
.
input_file
,
use_strip
=
True
)
input_data
=
txt_parser
.
parse
(
args
.
input_file
,
use_strip
=
True
)
return
input_data
return
input_data
@
serving
def
serving_method
(
self
,
images
,
**
kwargs
):
"""
Run as a service.
"""
images_decode
=
[
base64_to_cv2
(
image
)
for
image
in
images
]
results
=
self
.
object_detection
(
images_decode
,
**
kwargs
)
return
results
@
runnable
@
runnable
def
run_cmd
(
self
,
argvs
):
def
run_cmd
(
self
,
argvs
):
self
.
parser
=
argparse
.
ArgumentParser
(
self
.
parser
=
argparse
.
ArgumentParser
(
...
...
hub_module/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/processor.py
浏览文件 @
3acfe6bd
# coding=utf-8
# coding=utf-8
import
base64
import
os
import
os
import
cv2
import
numpy
as
np
import
numpy
as
np
from
PIL
import
Image
,
ImageDraw
from
PIL
import
Image
,
ImageDraw
__all__
=
[
__all__
=
[
'get_save_image_name'
,
'draw_bounding_box_on_image'
,
'clip_bbox'
,
'base64_to_cv2'
,
'load_label_info'
'load_label_info'
,
'postprocess'
,
]
]
def
base64_to_cv2
(
b64str
):
data
=
base64
.
b64decode
(
b64str
.
encode
(
'utf8'
))
data
=
np
.
fromstring
(
data
,
np
.
uint8
)
data
=
cv2
.
imdecode
(
data
,
cv2
.
IMREAD_COLOR
)
return
data
def
get_save_image_name
(
img
,
output_dir
,
image_path
):
def
get_save_image_name
(
img
,
output_dir
,
image_path
):
"""Get save image name from source image path.
"""Get save image name from source image path.
"""
"""
...
@@ -80,24 +90,29 @@ def load_label_info(file_path):
...
@@ -80,24 +90,29 @@ def load_label_info(file_path):
def
postprocess
(
paths
,
images
,
data_out
,
score_thresh
,
label_names
,
output_dir
,
def
postprocess
(
paths
,
images
,
data_out
,
score_thresh
,
label_names
,
output_dir
,
handle_id
,
visualization
):
handle_id
,
visualization
):
"""postprocess the lod_tensor produced by fluid.Executor.run
"""
postprocess the lod_tensor produced by fluid.Executor.run
:param paths: the path of images.
:type paths: list, each element is a str
Args:
:param images: data of images, [N, H, W, C]
paths (list[str]): the path of images.
:type images: numpy.ndarray
images (list(numpy.ndarray)): list of images, shape of each is [H, W, C].
:param data_out: data produced by executor.run
data_out (lod_tensor): data produced by executor.run.
:type data_out: lod_tensor
score_thresh (float): the low limit of bounding box.
:param score_thresh: the low limit of bounding box.
label_names (list[str]): label names.
:type score_thresh: float
output_dir (str): output directory.
:param label_names: label names
handle_id (int): The number of images that have been handled.
:type label_names: list
visualization (bool): whether to save as images.
:param output_dir: output directory.
:type output_dir: str
Returns:
:param handle_id: The number of images that have been handled.
res (list[dict]): The result of vehicles detecion. keys include 'data', 'save_path', the corresponding value is:
:type handle_id: int
data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is:
:param visualization: whether to draw bbox.
left (float): The X coordinate of the upper left corner of the bounding box;
:param visualization: bool
top (float): The Y coordinate of the upper left corner of the bounding box;
right (float): The X coordinate of the lower right corner of the bounding box;
bottom (float): The Y coordinate of the lower right corner of the bounding box;
label (str): The label of detection result;
confidence (float): The confidence of detection result.
save_path (str): The path to save output images.
"""
"""
lod_tensor
=
data_out
[
0
]
lod_tensor
=
data_out
[
0
]
lod
=
lod_tensor
.
lod
[
0
]
lod
=
lod_tensor
.
lod
[
0
]
...
...
hub_module/modules/image/object_detection/ssd/module.py
已删除
100644 → 0
浏览文件 @
6a477596
# coding=utf-8
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
from
collections
import
OrderedDict
import
paddle.fluid
as
fluid
import
paddlehub
as
hub
from
paddlehub.module.module
import
moduleinfo
from
paddlehub.common.paddle_helper
import
add_vars_prefix
from
ssd.data_feed
import
reader
,
DecodeImage
,
ResizeImage
,
NormalizeImage
,
Permute
from
ssd.processor
import
load_label_info
,
postprocess
from
ssd.multi_box_head
import
MultiBoxHead
from
ssd.output_decoder
import
SSDOutputDecoder
@
moduleinfo
(
name
=
"ssd"
,
version
=
"1.0.0"
,
type
=
"cv/object_detection"
,
summary
=
"SSD (Single Shot MultiBox Detector) is a object detection model, which trained with PASCAL VOC dataset."
,
author
=
"paddlepaddle"
,
author_email
=
"paddle-dev@baidu.com"
)
class
SSD
(
hub
.
Module
):
def
_initialize
(
self
):
self
.
reader
=
reader
self
.
load_label_info
=
load_label_info
self
.
postprocess
=
postprocess
self
.
MultiBoxHead
=
MultiBoxHead
self
.
SSDOutputDecoder
=
SSDOutputDecoder
self
.
DecodeImage
=
DecodeImage
self
.
ResizeImage
=
ResizeImage
self
.
NormalizeImage
=
NormalizeImage
self
.
Permute
=
Permute
def
context
(
self
,
body_feats
,
multi_box_head
,
ssd_output_decoder
,
image
,
trainable
=
True
,
var_prefix
=
''
,
get_prediction
=
False
):
"""Distill the Head Features, so as to perform transfer learning.
:param body_feats: feature mps of backbone outputs
:type body_feats: list
:param multi_box_head: SSD head of MultiBoxHead.
:type multi_box_head: <class 'MultiBoxHead' object>
:param ssd_output_decoder: SSD output decoder
:type ssd_output_decoder: <class 'SSDOutputDecoder' object>
:param image: image tensor.
:type image: <class 'paddle.fluid.framework.Variable'>
:param trainable: whether to set parameters trainable.
:type trainable: bool
:param var_prefix: the prefix of variables in ssd
:type var_prefix: str
:param get_prediction: whether to get prediction,
if True, outputs is bbox_out,
if False, outputs is body_features.
:type get_prediction: bool
"""
context_prog
=
image
.
block
.
program
with
fluid
.
program_guard
(
context_prog
):
im_size
=
fluid
.
layers
.
data
(
name
=
'im_size'
,
shape
=
[
2
],
dtype
=
'int32'
)
inputs
=
{
'image'
:
var_prefix
+
image
.
name
,
'im_size'
:
var_prefix
+
im_size
.
name
}
if
not
get_prediction
:
outputs
=
{
'body_features'
:
[
var_prefix
+
var
.
name
for
var
in
body_feats
]
}
else
:
locs
,
confs
,
box
,
box_var
=
fluid
.
layers
.
multi_box_head
(
inputs
=
body_feats
,
image
=
image
,
base_size
=
multi_box_head
.
base_size
,
num_classes
=
multi_box_head
.
num_classes
,
aspect_ratios
=
multi_box_head
.
aspect_ratios
,
min_ratio
=
multi_box_head
.
min_ratio
,
max_ratio
=
multi_box_head
.
max_ratio
,
min_sizes
=
multi_box_head
.
min_sizes
,
max_sizes
=
multi_box_head
.
max_sizes
,
steps
=
multi_box_head
.
steps
,
offset
=
multi_box_head
.
offset
,
flip
=
multi_box_head
.
flip
,
kernel_size
=
multi_box_head
.
kernel_size
,
pad
=
multi_box_head
.
pad
,
min_max_aspect_ratios_order
=
multi_box_head
.
min_max_aspect_ratios_order
)
pred
=
fluid
.
layers
.
detection_output
(
loc
=
locs
,
scores
=
confs
,
prior_box
=
box
,
prior_box_var
=
box_var
,
nms_threshold
=
ssd_output_decoder
.
nms_threshold
,
nms_top_k
=
ssd_output_decoder
.
nms_top_k
,
keep_top_k
=
ssd_output_decoder
.
keep_top_k
,
score_threshold
=
ssd_output_decoder
.
score_threshold
,
nms_eta
=
ssd_output_decoder
.
nms_eta
,
background_label
=
ssd_output_decoder
.
background_label
)
outputs
=
{
'bbox_out'
:
var_prefix
+
pred
.
name
}
add_vars_prefix
(
context_prog
,
var_prefix
)
add_vars_prefix
(
fluid
.
default_startup_program
(),
var_prefix
)
inputs
=
{
key
:
context_prog
.
global_block
().
vars
[
value
]
for
key
,
value
in
inputs
.
items
()
}
outputs
=
{
key
:
[
context_prog
.
global_block
().
vars
[
varname
]
for
varname
in
value
]
for
key
,
value
in
outputs
.
items
()
}
for
param
in
context_prog
.
global_block
().
iter_parameters
():
param
.
trainable
=
trainable
return
inputs
,
outputs
,
context_prog
hub_module/modules/image/object_detection/ssd/multi_box_head.py
已删除
100644 → 0
浏览文件 @
6a477596
# coding=utf-8
class
MultiBoxHead
(
object
):
# __op__ = fluid.layers.multi_box_head
def
__init__
(
self
,
base_size
,
num_classes
,
aspect_ratios
,
min_ratio
=
None
,
max_ratio
=
None
,
min_sizes
=
None
,
max_sizes
=
None
,
steps
=
None
,
offset
=
0.5
,
flip
=
True
,
kernel_size
=
1
,
pad
=
0
,
min_max_aspect_ratios_order
=
False
):
self
.
base_size
=
base_size
self
.
num_classes
=
num_classes
self
.
aspect_ratios
=
aspect_ratios
self
.
min_ratio
=
min_ratio
self
.
max_ratio
=
max_ratio
self
.
min_sizes
=
min_sizes
self
.
max_sizes
=
max_sizes
self
.
steps
=
steps
self
.
offset
=
offset
self
.
flip
=
flip
self
.
kernel_size
=
kernel_size
self
.
pad
=
pad
self
.
min_max_aspect_ratios_order
=
min_max_aspect_ratios_order
hub_module/modules/image/object_detection/ssd/output_decoder.py
已删除
100644 → 0
浏览文件 @
6a477596
class
SSDOutputDecoder
(
object
):
# __op__ = fluid.layers.detection_output
def
__init__
(
self
,
nms_threshold
=
0.3
,
nms_top_k
=
400
,
keep_top_k
=
200
,
score_threshold
=
0.01
,
nms_eta
=
1.0
,
background_label
=
0
):
self
.
nms_threshold
=
nms_threshold
self
.
background_label
=
background_label
self
.
nms_top_k
=
nms_top_k
self
.
keep_top_k
=
keep_top_k
self
.
score_threshold
=
score_threshold
self
.
nms_eta
=
nms_eta
hub_module/modules/image/object_detection/ssd_mobilenet_v1_pascal/README.md
0 → 100644
浏览文件 @
3acfe6bd
```
shell
$
hub
install
ssd_mobilenet_v1_pascal
==
1.1.0
```
## 命令行预测
```
hub run ssd_mobilenet_v1_pascal --input_path "/PATH/TO/IMAGE"
```
## API
```
def context(trainable=True,
pretrained=True,
get_prediction=False)
```
特征提取,用于迁移学习。
**参数**
*
trainable(bool): 设置参数的 trainable 属性;
*
pretrained (bool): 是否加载预训练模型;
*
get
\_
prediction (bool): 是否执行预测。
**返回**
*
inputs (dict): 模型的输入,keys 包括 'image', 'im
\_
size',相应的取值为:
*
image (Variable): 图像变量
*
im
\_
size (Variable): 图片的尺寸
*
outputs (dict): 模型的输出。如果 get
\_
prediction 为 False,输出 'head
\_
fatures',否则输出 'bbox
\_
out'。
*
context
\_
prog (Program): 用于迁移学习的 Program.
```
python
def
object_detection
(
paths
=
None
,
images
=
None
,
batch_size
=
1
,
use_gpu
=
False
,
output_dir
=
'detection_result'
,
score_thresh
=
0.5
,
visualization
=
True
)
```
预测API,检测输入图片中的所有目标的位置。
**参数**
*
paths (list
\[
str
\]
): 图片的路径;
*
images (list
\[
numpy.ndarray
\]
): 图片数据,ndarray.shape 为
\[
H, W, C
\]
,BGR格式;
*
batch
\_
size (int): batch 的大小;
*
use
\_
gpu (bool): 是否使用 GPU;
*
score
\_
thresh (float): 识别置信度的阈值;
*
visualization (bool): 是否将识别结果保存为图片文件;
*
output
\_
dir (str): 图片的保存路径,默认设为 detection
\_
result;
**返回**
*
res (list
\[
dict
\]
): 识别结果的列表,列表中每一个元素为 dict,各字段为:
*
data (list): 检测结果,list的每一个元素为 dict,各字段为:
*
confidence (float): 识别的置信度;
*
label (str): 标签;
*
left (int): 边界框的左上角x坐标;
*
top (int): 边界框的左上角y坐标;
*
right (int): 边界框的右下角x坐标;
*
bottom (int): 边界框的右下角y坐标;
*
save
\_
path (str, optional): 识别结果的保存路径 (仅当visualization=True时存在)。
```
python
def
save_inference_model
(
dirname
,
model_filename
=
None
,
params_filename
=
None
,
combined
=
True
)
```
将模型保存到指定路径。
**参数**
*
dirname: 存在模型的目录名称
*
model
\_
filename: 模型文件名称,默认为
\_\_
model
\_\_
*
params
\_
filename: 参数文件名称,默认为
\_\_
params
\_\_
(仅当
`combined`
为True时生效)
*
combined: 是否将参数保存到统一的一个文件中
## 代码示例
```
python
import
paddlehub
as
hub
import
cv2
object_detector
=
hub
.
Module
(
name
=
"ssd_mobilenet_v1_pascal"
)
result
=
object_detector
.
object_detection
(
images
=
[
cv2
.
imread
(
'/PATH/TO/IMAGE'
)])
# or
# result = object_detector.object_detection((paths=['/PATH/TO/IMAGE'])
```
## 服务部署
PaddleHub Serving可以部署一个目标检测的在线服务。
## 第一步:启动PaddleHub Serving
运行启动命令:
```
shell
$
hub serving start
-m
ssd_mobilenet_v1_pascal
```
这样就完成了一个目标检测的服务化API的部署,默认端口号为8866。
**NOTE:**
如使用GPU预测,则需要在启动服务之前,请设置CUDA
\_
VISIBLE
\_
DEVICES环境变量,否则不用设置。
## 第二步:发送预测请求
配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果
```
python
import
requests
import
json
import
cv2
import
base64
def
cv2_to_base64
(
image
):
data
=
cv2
.
imencode
(
'.jpg'
,
image
)[
1
]
return
base64
.
b64encode
(
data
.
tostring
()).
decode
(
'utf8'
)
# 发送HTTP请求
data
=
{
'images'
:[
cv2_to_base64
(
cv2
.
imread
(
"/PATH/TO/IMAGE"
))]}
headers
=
{
"Content-type"
:
"application/json"
}
url
=
"http://127.0.0.1:8866/predict/ssd_mobilenet_v1_pascal"
r
=
requests
.
post
(
url
=
url
,
headers
=
headers
,
data
=
json
.
dumps
(
data
))
# 打印预测结果
print
(
r
.
json
()[
"results"
])
```
### 依赖
paddlepaddle >= 1.6.2
paddlehub >= 1.6.0
hub_module/modules/image/object_detection/ssd_mobilenet_v1_pascal/config.yml
浏览文件 @
3acfe6bd
MobileNet
:
norm_decay
:
0.
conv_group_scale
:
1
conv_learning_rate
:
0.1
extra_block_filters
:
[[
256
,
512
],
[
128
,
256
],
[
128
,
256
],
[
64
,
128
]]
with_extra_blocks
:
True
SSDOutputDecoder
:
SSDOutputDecoder
:
background_label
:
0
background_label
:
0
keep_top_k
:
200
keep_top_k
:
200
...
@@ -9,7 +16,7 @@ SSDOutputDecoder:
...
@@ -9,7 +16,7 @@ SSDOutputDecoder:
MultiBoxHead
:
MultiBoxHead
:
aspect_ratios
:
[[
2.
],
[
2.
,
3.
],
[
2.
,
3.
],
[
2.
,
3.
],
[
2.
,
3.
],
[
2.
,
3.
]]
aspect_ratios
:
[[
2.
],
[
2.
,
3.
],
[
2.
,
3.
],
[
2.
,
3.
],
[
2.
,
3.
],
[
2.
,
3.
]]
base_size
:
300
base_size
:
300
flip
:
t
rue
flip
:
T
rue
max_ratio
:
90
max_ratio
:
90
max_sizes
:
[[],
150.0
,
195.0
,
240.0
,
285.0
,
300.0
]
max_sizes
:
[[],
150.0
,
195.0
,
240.0
,
285.0
,
300.0
]
min_ratio
:
20
min_ratio
:
20
...
...
hub_module/modules/image/object_detection/ssd_mobilenet_v1_pascal/data_feed.py
0 → 100644
浏览文件 @
3acfe6bd
# coding=utf-8
from
__future__
import
absolute_import
from
__future__
import
print_function
from
__future__
import
division
import
os
import
random
from
collections
import
OrderedDict
import
cv2
import
numpy
as
np
from
PIL
import
Image
from
paddle
import
fluid
__all__
=
[
'reader'
]
class
DecodeImage
(
object
):
def
__init__
(
self
,
to_rgb
=
True
,
with_mixup
=
False
):
""" Transform the image data to numpy format.
Args:
to_rgb (bool): whether to convert BGR to RGB
with_mixup (bool): whether or not to mixup image and gt_bbbox/gt_score
"""
self
.
to_rgb
=
to_rgb
self
.
with_mixup
=
with_mixup
def
__call__
(
self
,
im
):
if
self
.
to_rgb
:
im
=
cv2
.
cvtColor
(
im
,
cv2
.
COLOR_BGR2RGB
)
return
im
class
ResizeImage
(
object
):
def
__init__
(
self
,
target_size
=
0
,
max_size
=
0
,
interp
=
cv2
.
INTER_LINEAR
,
use_cv2
=
True
):
"""
Rescale image to the specified target size, and capped at max_size
if max_size != 0.
If target_size is list, selected a scale randomly as the specified
target size.
Args:
target_size (int|list): the target size of image's short side,
multi-scale training is adopted when type is list.
max_size (int): the max size of image
interp (int): the interpolation method
use_cv2 (bool): use the cv2 interpolation method or use PIL
interpolation method
"""
self
.
max_size
=
int
(
max_size
)
self
.
interp
=
int
(
interp
)
self
.
use_cv2
=
use_cv2
self
.
target_size
=
target_size
def
__call__
(
self
,
im
):
if
not
isinstance
(
im
,
np
.
ndarray
):
raise
TypeError
(
"{}: image type is not numpy."
.
format
(
self
))
if
len
(
im
.
shape
)
!=
3
:
raise
ValueError
(
'{}: image is not 3-dimensional.'
.
format
(
self
))
im_shape
=
im
.
shape
im_size_min
=
np
.
min
(
im_shape
[
0
:
2
])
im_size_max
=
np
.
max
(
im_shape
[
0
:
2
])
if
isinstance
(
self
.
target_size
,
list
):
# Case for multi-scale training
selected_size
=
random
.
choice
(
self
.
target_size
)
else
:
selected_size
=
self
.
target_size
if
float
(
im_size_min
)
==
0
:
raise
ZeroDivisionError
(
'{}: min size of image is 0'
.
format
(
self
))
if
self
.
max_size
!=
0
:
im_scale
=
float
(
selected_size
)
/
float
(
im_size_min
)
# Prevent the biggest axis from being more than max_size
if
np
.
round
(
im_scale
*
im_size_max
)
>
self
.
max_size
:
im_scale
=
float
(
self
.
max_size
)
/
float
(
im_size_max
)
im_scale_x
=
im_scale
im_scale_y
=
im_scale
resize_w
=
im_scale_x
*
float
(
im_shape
[
1
])
resize_h
=
im_scale_y
*
float
(
im_shape
[
0
])
im_info
=
[
resize_h
,
resize_w
,
im_scale
]
else
:
im_scale_x
=
float
(
selected_size
)
/
float
(
im_shape
[
1
])
im_scale_y
=
float
(
selected_size
)
/
float
(
im_shape
[
0
])
resize_w
=
selected_size
resize_h
=
selected_size
if
self
.
use_cv2
:
im
=
cv2
.
resize
(
im
,
None
,
None
,
fx
=
im_scale_x
,
fy
=
im_scale_y
,
interpolation
=
self
.
interp
)
else
:
if
self
.
max_size
!=
0
:
raise
TypeError
(
'If you set max_size to cap the maximum size of image,'
'please set use_cv2 to True to resize the image.'
)
im
=
im
.
astype
(
'uint8'
)
im
=
Image
.
fromarray
(
im
)
im
=
im
.
resize
((
int
(
resize_w
),
int
(
resize_h
)),
self
.
interp
)
im
=
np
.
array
(
im
)
return
im
class
NormalizeImage
(
object
):
def
__init__
(
self
,
mean
=
[
0.485
,
0.456
,
0.406
],
std
=
[
1
,
1
,
1
],
is_scale
=
True
,
is_channel_first
=
True
):
"""
Args:
mean (list): the pixel mean
std (list): the pixel variance
"""
self
.
mean
=
mean
self
.
std
=
std
self
.
is_scale
=
is_scale
self
.
is_channel_first
=
is_channel_first
def
__call__
(
self
,
im
):
"""Normalize the image.
Operators:
1.(optional) Scale the image to [0,1]
2. Each pixel minus mean and is divided by std
"""
im
=
im
.
astype
(
np
.
float32
,
copy
=
False
)
if
self
.
is_channel_first
:
mean
=
np
.
array
(
self
.
mean
)[:,
np
.
newaxis
,
np
.
newaxis
]
std
=
np
.
array
(
self
.
std
)[:,
np
.
newaxis
,
np
.
newaxis
]
else
:
mean
=
np
.
array
(
self
.
mean
)[
np
.
newaxis
,
np
.
newaxis
,
:]
std
=
np
.
array
(
self
.
std
)[
np
.
newaxis
,
np
.
newaxis
,
:]
if
self
.
is_scale
:
im
=
im
/
255.0
im
-=
mean
im
/=
std
return
im
class
Permute
(
object
):
def
__init__
(
self
,
to_bgr
=
True
,
channel_first
=
True
):
"""
Change the channel.
Args:
to_bgr (bool): confirm whether to convert RGB to BGR
channel_first (bool): confirm whether to change channel
"""
self
.
to_bgr
=
to_bgr
self
.
channel_first
=
channel_first
def
__call__
(
self
,
im
):
if
self
.
channel_first
:
im
=
np
.
swapaxes
(
im
,
1
,
2
)
im
=
np
.
swapaxes
(
im
,
1
,
0
)
if
self
.
to_bgr
:
im
=
im
[[
2
,
1
,
0
],
:,
:]
return
im
def
reader
(
paths
=
[],
images
=
None
,
decode_image
=
DecodeImage
(
to_rgb
=
True
,
with_mixup
=
False
),
resize_image
=
ResizeImage
(
target_size
=
512
,
interp
=
1
,
max_size
=
0
,
use_cv2
=
False
),
permute_image
=
Permute
(
to_bgr
=
False
),
normalize_image
=
NormalizeImage
(
mean
=
[
104
,
117
,
123
],
std
=
[
1
,
1
,
1
],
is_scale
=
False
)):
"""
data generator
Args:
paths (list[str]): paths to images.
images (list(numpy.ndarray)): data of images, shape of each is [H, W, C]
decode_image (class object): instance of <class 'DecodeImage' object>
resize_image (class object): instance of <class 'ResizeImage' object>
permute_image (class object): instance of <class 'Permute' object>
normalize_image (class object): instance of <class 'NormalizeImage' object>
"""
img_list
=
[]
if
paths
is
not
None
:
assert
type
(
paths
)
is
list
,
"type(paths) is not list."
for
img_path
in
paths
:
assert
os
.
path
.
isfile
(
img_path
),
"The {} isn't a valid file path."
.
format
(
img_path
)
img
=
cv2
.
imread
(
img_path
).
astype
(
'float32'
)
img_list
.
append
(
img
)
if
images
is
not
None
:
for
img
in
images
:
img_list
.
append
(
img
)
decode_image
=
DecodeImage
(
to_rgb
=
True
,
with_mixup
=
False
)
resize_image
=
ResizeImage
(
target_size
=
300
,
interp
=
1
,
max_size
=
0
,
use_cv2
=
False
)
permute_image
=
Permute
()
normalize_image
=
NormalizeImage
(
mean
=
[
127.5
,
127.5
,
127.5
],
std
=
[
127.502231
,
127.502231
,
127.502231
],
is_scale
=
False
)
for
img
in
img_list
:
preprocessed_img
=
decode_image
(
img
)
preprocessed_img
=
resize_image
(
preprocessed_img
)
preprocessed_img
=
permute_image
(
preprocessed_img
)
preprocessed_img
=
normalize_image
(
preprocessed_img
)
yield
[
preprocessed_img
]
hub_module/modules/image/object_detection/ssd_mobilenet_v1_pascal/module.py
浏览文件 @
3acfe6bd
# coding=utf-8
# coding=utf-8
from
__future__
import
absolute_import
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
os
import
ast
import
ast
import
argparse
import
argparse
import
os
from
functools
import
partial
from
functools
import
partial
import
yaml
import
numpy
as
np
import
numpy
as
np
import
paddle.fluid
as
fluid
import
paddle.fluid
as
fluid
import
paddlehub
as
hub
import
paddlehub
as
hub
from
paddlehub.module.module
import
moduleinfo
,
runnable
from
paddle.fluid.core
import
PaddleTensor
,
AnalysisConfig
,
create_paddle_predictor
from
paddle.fluid.core
import
PaddleTensor
,
AnalysisConfig
,
create_paddle_predictor
from
paddlehub.
io.parser
import
txt_parser
from
paddlehub.
module.module
import
moduleinfo
,
runnable
,
serving
import
yaml
from
paddlehub.common.paddle_helper
import
add_vars_prefix
from
ssd_mobilenet_v1_pascal.mobilenet_v1
import
MobileNet
from
ssd_mobilenet_v1_pascal.mobilenet_v1
import
MobileNet
from
ssd_mobilenet_v1_pascal.processor
import
load_label_info
,
postprocess
,
base64_to_cv2
from
ssd_mobilenet_v1_pascal.data_feed
import
reader
@
moduleinfo
(
@
moduleinfo
(
...
@@ -25,25 +25,18 @@ from ssd_mobilenet_v1_pascal.mobilenet_v1 import MobileNet
...
@@ -25,25 +25,18 @@ from ssd_mobilenet_v1_pascal.mobilenet_v1 import MobileNet
type
=
"cv/object_detection"
,
type
=
"cv/object_detection"
,
summary
=
"SSD with backbone MobileNet_V1, trained with dataset Pasecal VOC."
,
summary
=
"SSD with backbone MobileNet_V1, trained with dataset Pasecal VOC."
,
author
=
"paddlepaddle"
,
author
=
"paddlepaddle"
,
author_email
=
"
paddle-dev@baidu.com
"
)
author_email
=
""
)
class
SSDMobileNetv1
(
hub
.
Module
):
class
SSDMobileNetv1
(
hub
.
Module
):
def
_initialize
(
self
):
def
_initialize
(
self
):
self
.
ssd
=
hub
.
Module
(
name
=
"ssd"
)
# default pretrained model of SSD_MobileNet_V1_VOC, the shape of image tensor is (3, 300, 300)
self
.
default_pretrained_model_path
=
os
.
path
.
join
(
self
.
default_pretrained_model_path
=
os
.
path
.
join
(
self
.
directory
,
"ssd_mobilenet_v1_model"
)
self
.
directory
,
"ssd_mobilenet_v1_model"
)
self
.
label_names
=
self
.
ssd
.
load_label_info
(
self
.
label_names
=
load_label_info
(
os
.
path
.
join
(
self
.
directory
,
"label_file.txt"
))
os
.
path
.
join
(
self
.
directory
,
"label_file.txt"
))
self
.
infer_prog
=
None
self
.
model_config
=
None
self
.
image
=
None
self
.
bbox_out
=
None
self
.
_set_config
()
self
.
_set_config
()
self
.
_config
=
None
def
_set_config
(
self
):
def
_set_config
(
self
):
"""
# predictor config setting.
predictor config setting
"""
cpu_config
=
AnalysisConfig
(
self
.
default_pretrained_model_path
)
cpu_config
=
AnalysisConfig
(
self
.
default_pretrained_model_path
)
cpu_config
.
disable_glog_info
()
cpu_config
.
disable_glog_info
()
cpu_config
.
disable_gpu
()
cpu_config
.
disable_gpu
()
...
@@ -62,51 +55,92 @@ class SSDMobileNetv1(hub.Module):
...
@@ -62,51 +55,92 @@ class SSDMobileNetv1(hub.Module):
gpu_config
.
enable_use_gpu
(
memory_pool_init_size_mb
=
500
,
device_id
=
0
)
gpu_config
.
enable_use_gpu
(
memory_pool_init_size_mb
=
500
,
device_id
=
0
)
self
.
gpu_predictor
=
create_paddle_predictor
(
gpu_config
)
self
.
gpu_predictor
=
create_paddle_predictor
(
gpu_config
)
def
context
(
self
,
# model config setting.
num_classes
=
21
,
if
not
self
.
model_config
:
trainable
=
True
,
with
open
(
os
.
path
.
join
(
self
.
directory
,
'config.yml'
))
as
fp
:
pretrained
=
True
,
self
.
model_config
=
yaml
.
load
(
fp
.
read
(),
Loader
=
yaml
.
FullLoader
)
get_prediction
=
False
):
"""Distill the Head Features, so as to perform transfer learning.
self
.
multi_box_head_config
=
self
.
model_config
[
'MultiBoxHead'
]
self
.
output_decoder_config
=
self
.
model_config
[
'SSDOutputDecoder'
]
self
.
mobilenet_config
=
self
.
model_config
[
'MobileNet'
]
:param trainable: whether to set parameters trainable.
def
context
(
self
,
trainable
=
True
,
pretrained
=
True
,
get_prediction
=
False
):
:type trainable: bool
:param pretrained: whether to load default pretrained model.
:type pretrained: bool
:param get_prediction: whether to get prediction,
if True, outputs is {'bbox_out': bbox_out},
if False, outputs is {'body_features': body_features}.
:type get_prediction: bool
"""
"""
wrapped_prog
=
fluid
.
Program
()
Distill the Head Features, so as to perform transfer learning.
Args:
trainable (bool): whether to set parameters trainable.
pretrained (bool): whether to load default pretrained model.
get_prediction (bool): whether to get prediction.
Returns:
inputs(dict): the input variables.
outputs(dict): the output variables.
context_prog (Program): the program to execute transfer learning.
"""
context_prog
=
fluid
.
Program
()
startup_program
=
fluid
.
Program
()
startup_program
=
fluid
.
Program
()
with
fluid
.
program_guard
(
wrapped
_prog
,
startup_program
):
with
fluid
.
program_guard
(
context
_prog
,
startup_program
):
with
fluid
.
unique_name
.
guard
():
with
fluid
.
unique_name
.
guard
():
# image
# image
image
=
fluid
.
layers
.
data
(
image
=
fluid
.
layers
.
data
(
name
=
'image'
,
shape
=
[
3
,
300
,
300
],
dtype
=
'float32'
)
name
=
'image'
,
shape
=
[
3
,
300
,
300
],
dtype
=
'float32'
)
backbone
=
MobileNet
(
# backbone
norm_decay
=
0.
,
backbone
=
MobileNet
(
**
self
.
mobilenet_config
)
conv_group_scale
=
1
,
# body_feats
conv_learning_rate
=
0.1
,
extra_block_filters
=
[[
256
,
512
],
[
128
,
256
],
[
128
,
256
],
[
64
,
128
]],
with_extra_blocks
=
True
)
body_feats
=
backbone
(
image
)
body_feats
=
backbone
(
image
)
# call ssd.context
# im_size
inputs
,
outputs
,
context_prog
=
self
.
ssd
.
context
(
im_size
=
fluid
.
layers
.
data
(
body_feats
=
body_feats
,
name
=
'im_size'
,
shape
=
[
2
],
dtype
=
'int32'
)
multi_box_head
=
self
.
ssd
.
MultiBoxHead
(
# var_prefix
num_classes
=
num_classes
,
**
self
.
multi_box_head_config
),
var_prefix
=
'@HUB_{}@'
.
format
(
self
.
name
)
ssd_output_decoder
=
self
.
ssd
.
SSDOutputDecoder
(
# names of inputs
**
self
.
output_decoder_config
),
inputs
=
{
'image'
:
var_prefix
+
image
.
name
,
'im_size'
:
var_prefix
+
im_size
.
name
}
# names of outputs
if
get_prediction
:
locs
,
confs
,
box
,
box_var
=
fluid
.
layers
.
multi_box_head
(
inputs
=
body_feats
,
image
=
image
,
image
=
image
,
trainable
=
trainable
,
num_classes
=
21
,
var_prefix
=
'@HUB_{}@'
.
format
(
self
.
name
),
**
self
.
multi_box_head_config
)
get_prediction
=
get_prediction
)
pred
=
fluid
.
layers
.
detection_output
(
loc
=
locs
,
scores
=
confs
,
prior_box
=
box
,
prior_box_var
=
box_var
,
**
self
.
output_decoder_config
)
outputs
=
{
'bbox_out'
:
[
var_prefix
+
pred
.
name
]}
else
:
outputs
=
{
'body_features'
:
[
var_prefix
+
var
.
name
for
var
in
body_feats
]
}
# add_vars_prefix
add_vars_prefix
(
context_prog
,
var_prefix
)
add_vars_prefix
(
fluid
.
default_startup_program
(),
var_prefix
)
# inputs
inputs
=
{
key
:
context_prog
.
global_block
().
vars
[
value
]
for
key
,
value
in
inputs
.
items
()
}
outputs
=
{
out_key
:
[
context_prog
.
global_block
().
vars
[
varname
]
for
varname
in
out_value
]
for
out_key
,
out_value
in
outputs
.
items
()
}
# trainable
for
param
in
context_prog
.
global_block
().
iter_parameters
():
param
.
trainable
=
trainable
place
=
fluid
.
CPUPlace
()
place
=
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
place
)
exe
=
fluid
.
Executor
(
place
)
# pretrained
if
pretrained
:
if
pretrained
:
def
_if_exist
(
var
):
def
_if_exist
(
var
):
...
@@ -120,82 +154,56 @@ class SSDMobileNetv1(hub.Module):
...
@@ -120,82 +154,56 @@ class SSDMobileNetv1(hub.Module):
predicate
=
_if_exist
)
predicate
=
_if_exist
)
else
:
else
:
exe
.
run
(
startup_program
)
exe
.
run
(
startup_program
)
return
inputs
,
outputs
,
context_prog
@
property
return
inputs
,
outputs
,
context_prog
def
config
(
self
):
if
not
self
.
_config
:
with
open
(
os
.
path
.
join
(
self
.
directory
,
'config.yml'
))
as
file
:
self
.
_config
=
yaml
.
load
(
file
.
read
(),
Loader
=
yaml
.
FullLoader
)
return
self
.
_config
@
property
def
multi_box_head_config
(
self
):
return
self
.
config
[
'MultiBoxHead'
]
@
property
def
output_decoder_config
(
self
):
return
self
.
config
[
'SSDOutputDecoder'
]
def
object_detection
(
self
,
def
object_detection
(
self
,
paths
=
None
,
paths
=
None
,
images
=
None
,
images
=
None
,
data
=
None
,
data
=
None
,
use_gpu
=
False
,
batch_size
=
1
,
batch_size
=
1
,
use_gpu
=
False
,
output_dir
=
'detection_result'
,
output_dir
=
'detection_result'
,
score_thresh
=
0.5
,
score_thresh
=
0.5
,
visualization
=
True
):
visualization
=
True
):
"""API of Object Detection.
"""API of Object Detection.
:param paths: the path of images.
Args:
:type paths: list, each element is correspond to the path of an image.
paths (list[str]): The paths of images.
:param images: data of images, [N, H, W, C]
images (list(numpy.ndarray)): images data, shape of each is [H, W, C]
:type images: numpy.ndarray
batch_size (int): batch size.
:param use_gpu: whether to use gpu or not.
use_gpu (bool): Whether to use gpu.
:type use_gpu: bool
output_dir (str): The path to store output images.
:param batch_size: bathc size.
visualization (bool): Whether to save image or not.
:type batch_size: int
score_thresh (float): threshold for object detecion.
:param output_dir: the directory to store the detection result.
:type output_dir: str
Returns:
:param score_thresh: the threshold of detection confidence.
res (list[dict]): The result of coco2017 detecion. keys include 'data', 'save_path', the corresponding value is:
:type score_thresh: float
data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is:
:param visualization: whether to draw bounding box and save images.
left (float): The X coordinate of the upper left corner of the bounding box;
:type visualization: bool
top (float): The Y coordinate of the upper left corner of the bounding box;
right (float): The X coordinate of the lower right corner of the bounding box;
bottom (float): The Y coordinate of the lower right corner of the bounding box;
label (str): The label of detection result;
confidence (float): The confidence of detection result.
save_path (str, optional): The path to save output images.
"""
"""
paths
=
paths
if
paths
else
list
()
if
data
and
'image'
in
data
:
if
data
and
'image'
in
data
:
paths
=
data
[
'image'
]
if
not
paths
else
paths
+
data
[
'image'
]
paths
+=
data
[
'image'
]
decode_image
=
self
.
ssd
.
DecodeImage
(
to_rgb
=
True
,
with_mixup
=
False
)
resize_image
=
self
.
ssd
.
ResizeImage
(
data_reader
=
partial
(
reader
,
paths
,
images
)
target_size
=
300
,
interp
=
1
,
max_size
=
0
,
use_cv2
=
False
)
permute_image
=
self
.
ssd
.
Permute
()
normalize_image
=
self
.
ssd
.
NormalizeImage
(
mean
=
[
127.5
,
127.5
,
127.5
],
std
=
[
127.502231
,
127.502231
,
127.502231
],
is_scale
=
False
)
data_reader
=
partial
(
self
.
ssd
.
reader
,
paths
,
images
,
decode_image
=
decode_image
,
resize_image
=
resize_image
,
permute_image
=
permute_image
,
normalize_image
=
normalize_image
)
batch_reader
=
fluid
.
io
.
batch
(
data_reader
,
batch_size
=
batch_size
)
batch_reader
=
fluid
.
io
.
batch
(
data_reader
,
batch_size
=
batch_size
)
paths
=
paths
if
paths
else
[]
res
=
[]
res
=
[]
for
iter_id
,
feed_data
in
enumerate
(
batch_reader
()):
for
iter_id
,
feed_data
in
enumerate
(
batch_reader
()):
np_data
=
np
.
array
(
feed_data
).
astype
(
'float32'
)
feed_data
=
np
.
array
(
feed_data
)
if
np_data
.
shape
==
1
:
image_tensor
=
PaddleTensor
(
np
.
array
(
list
(
feed_data
[:,
0
])).
copy
())
np_data
=
np_data
[
0
]
else
:
np_data
=
np
.
squeeze
(
np_data
,
axis
=
1
)
data_tensor
=
PaddleTensor
(
np_data
.
copy
())
if
use_gpu
:
if
use_gpu
:
data_out
=
self
.
gpu_predictor
.
run
([
data
_tensor
])
data_out
=
self
.
gpu_predictor
.
run
([
image
_tensor
])
else
:
else
:
data_out
=
self
.
cpu_predictor
.
run
([
data_tensor
])
data_out
=
self
.
cpu_predictor
.
run
([
image_tensor
])
output
=
self
.
ssd
.
postprocess
(
output
=
postprocess
(
paths
=
paths
,
paths
=
paths
,
images
=
images
,
images
=
images
,
data_out
=
data_out
,
data_out
=
data_out
,
...
@@ -204,55 +212,49 @@ class SSDMobileNetv1(hub.Module):
...
@@ -204,55 +212,49 @@ class SSDMobileNetv1(hub.Module):
output_dir
=
output_dir
,
output_dir
=
output_dir
,
handle_id
=
iter_id
*
batch_size
,
handle_id
=
iter_id
*
batch_size
,
visualization
=
visualization
)
visualization
=
visualization
)
res
+=
output
res
.
extend
(
output
)
return
res
return
res
def
add_module_config_arg
(
self
):
def
save_inference_model
(
self
,
"""
dirname
,
Add the command config options
model_filename
=
None
,
"""
params_filename
=
None
,
self
.
arg_config_group
.
add_argument
(
combined
=
True
):
'--use_gpu'
,
if
combined
:
type
=
ast
.
literal_eval
,
model_filename
=
"__model__"
if
not
model_filename
else
model_filename
default
=
False
,
params_filename
=
"__params__"
if
not
params_filename
else
params_filename
help
=
"whether use GPU or not"
)
place
=
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
place
)
self
.
arg_config_group
.
add_argument
(
program
,
feeded_var_names
,
target_vars
=
fluid
.
io
.
load_inference_model
(
'--batch_size'
,
dirname
=
self
.
default_pretrained_model_path
,
executor
=
exe
)
type
=
int
,
default
=
1
,
help
=
"batch size for prediction"
)
def
add_module_input_arg
(
self
):
fluid
.
io
.
save_inference_model
(
dirname
=
dirname
,
main_program
=
program
,
executor
=
exe
,
feeded_var_names
=
feeded_var_names
,
target_vars
=
target_vars
,
model_filename
=
model_filename
,
params_filename
=
params_filename
)
@
serving
def
serving_method
(
self
,
images
,
**
kwargs
):
"""
"""
Add the command input options
Run as a service.
"""
"""
self
.
arg_input_group
.
add_argument
(
images_decode
=
[
base64_to_cv2
(
image
)
for
image
in
images
]
'--input_path'
,
type
=
str
,
default
=
None
,
help
=
"input data"
)
results
=
self
.
object_detection
(
images_decode
,
**
kwargs
)
return
results
self
.
arg_input_group
.
add_argument
(
'--input_file'
,
type
=
str
,
default
=
None
,
help
=
"file contain input data"
)
def
check_input_data
(
self
,
args
):
input_data
=
[]
if
args
.
input_path
:
input_data
=
[
args
.
input_path
]
elif
args
.
input_file
:
if
not
os
.
path
.
exists
(
args
.
input_file
):
raise
RuntimeError
(
"File %s is not exist."
%
args
.
input_file
)
else
:
input_data
=
txt_parser
.
parse
(
args
.
input_file
,
use_strip
=
True
)
return
input_data
@
runnable
@
runnable
def
run_cmd
(
self
,
argvs
):
def
run_cmd
(
self
,
argvs
):
"""
Run as a command.
"""
self
.
parser
=
argparse
.
ArgumentParser
(
self
.
parser
=
argparse
.
ArgumentParser
(
description
=
"Run the {}"
.
format
(
self
.
name
),
description
=
"Run the {}
module.
"
.
format
(
self
.
name
),
prog
=
"hub run {}"
.
format
(
self
.
name
),
prog
=
'hub run {}'
.
format
(
self
.
name
),
usage
=
'%(prog)s'
,
usage
=
'%(prog)s'
,
add_help
=
True
)
add_help
=
True
)
self
.
arg_input_group
=
self
.
parser
.
add_argument_group
(
self
.
arg_input_group
=
self
.
parser
.
add_argument_group
(
...
@@ -262,17 +264,50 @@ class SSDMobileNetv1(hub.Module):
...
@@ -262,17 +264,50 @@ class SSDMobileNetv1(hub.Module):
description
=
description
=
"Run configuration for controlling module behavior, not required."
)
"Run configuration for controlling module behavior, not required."
)
self
.
add_module_config_arg
()
self
.
add_module_config_arg
()
self
.
add_module_input_arg
()
self
.
add_module_input_arg
()
args
=
self
.
parser
.
parse_args
(
argvs
)
args
=
self
.
parser
.
parse_args
(
argvs
)
input_data
=
self
.
check_input_data
(
args
)
results
=
self
.
face_detection
(
if
len
(
input_data
)
==
0
:
paths
=
[
args
.
input_path
],
self
.
parser
.
print_help
()
batch_size
=
args
.
batch_size
,
exit
(
1
)
use_gpu
=
args
.
use_gpu
,
else
:
output_dir
=
args
.
output_dir
,
for
image_path
in
input_data
:
visualization
=
args
.
visualization
,
if
not
os
.
path
.
exists
(
image_path
):
score_thresh
=
args
.
score_thresh
)
raise
RuntimeError
(
return
results
"File %s or %s is not exist."
%
image_path
)
return
self
.
object_detection
(
def
add_module_config_arg
(
self
):
paths
=
input_data
,
use_gpu
=
args
.
use_gpu
,
batch_size
=
args
.
batch_size
)
"""
Add the command config options.
"""
self
.
arg_config_group
.
add_argument
(
'--use_gpu'
,
type
=
ast
.
literal_eval
,
default
=
False
,
help
=
"whether use GPU or not"
)
self
.
arg_config_group
.
add_argument
(
'--output_dir'
,
type
=
str
,
default
=
'detection_result'
,
help
=
"The directory to save output images."
)
self
.
arg_config_group
.
add_argument
(
'--visualization'
,
type
=
ast
.
literal_eval
,
default
=
False
,
help
=
"whether to save output as images."
)
def
add_module_input_arg
(
self
):
"""
Add the command input options.
"""
self
.
arg_input_group
.
add_argument
(
'--input_path'
,
type
=
str
,
help
=
"path to image."
)
self
.
arg_input_group
.
add_argument
(
'--batch_size'
,
type
=
ast
.
literal_eval
,
default
=
1
,
help
=
"batch size."
)
self
.
arg_input_group
.
add_argument
(
'--score_thresh'
,
type
=
ast
.
literal_eval
,
default
=
0.5
,
help
=
"threshold for object detecion."
)
hub_module/modules/image/object_detection/ssd_mobilenet_v1_pascal/processor.py
0 → 100644
浏览文件 @
3acfe6bd
# coding=utf-8
import
base64
import
os
import
cv2
import
numpy
as
np
from
PIL
import
Image
,
ImageDraw
__all__
=
[
'base64_to_cv2'
,
'load_label_info'
,
'postprocess'
]
def
base64_to_cv2
(
b64str
):
data
=
base64
.
b64decode
(
b64str
.
encode
(
'utf8'
))
data
=
np
.
fromstring
(
data
,
np
.
uint8
)
data
=
cv2
.
imdecode
(
data
,
cv2
.
IMREAD_COLOR
)
return
data
def
get_save_image_name
(
img
,
output_dir
,
image_path
):
"""
Get save image name from source image path.
"""
if
not
os
.
path
.
exists
(
output_dir
):
os
.
makedirs
(
output_dir
)
image_name
=
os
.
path
.
split
(
image_path
)[
-
1
]
name
,
ext
=
os
.
path
.
splitext
(
image_name
)
if
img
.
format
==
'PNG'
:
ext
=
'.png'
elif
img
.
format
==
'JPEG'
:
ext
=
'.jpg'
elif
img
.
format
==
'BMP'
:
ext
=
'.bmp'
else
:
if
img
.
mode
==
"RGB"
or
img
.
mode
==
"L"
:
ext
=
".jpg"
elif
img
.
mode
==
"RGBA"
or
img
.
mode
==
"P"
:
ext
=
'.png'
return
os
.
path
.
join
(
output_dir
,
"{}"
.
format
(
name
))
+
ext
def
draw_bounding_box_on_image
(
image_path
,
data_list
,
save_dir
):
image
=
Image
.
open
(
image_path
)
draw
=
ImageDraw
.
Draw
(
image
)
for
data
in
data_list
:
left
,
right
,
top
,
bottom
=
data
[
'left'
],
data
[
'right'
],
data
[
'top'
],
data
[
'bottom'
]
# draw bbox
draw
.
line
([(
left
,
top
),
(
left
,
bottom
),
(
right
,
bottom
),
(
right
,
top
),
(
left
,
top
)],
width
=
2
,
fill
=
'red'
)
# draw label
if
image
.
mode
==
'RGB'
:
text
=
data
[
'label'
]
+
": %.2f%%"
%
(
100
*
data
[
'confidence'
])
textsize_width
,
textsize_height
=
draw
.
textsize
(
text
=
text
)
draw
.
rectangle
(
xy
=
(
left
,
top
-
(
textsize_height
+
5
),
left
+
textsize_width
+
10
,
top
),
fill
=
(
255
,
255
,
255
))
draw
.
text
(
xy
=
(
left
,
top
-
15
),
text
=
text
,
fill
=
(
0
,
0
,
0
))
save_name
=
get_save_image_name
(
image
,
save_dir
,
image_path
)
if
os
.
path
.
exists
(
save_name
):
os
.
remove
(
save_name
)
image
.
save
(
save_name
)
return
save_name
def
clip_bbox
(
bbox
,
img_width
,
img_height
):
xmin
=
max
(
min
(
bbox
[
0
],
img_width
),
0.
)
ymin
=
max
(
min
(
bbox
[
1
],
img_height
),
0.
)
xmax
=
max
(
min
(
bbox
[
2
],
img_width
),
0.
)
ymax
=
max
(
min
(
bbox
[
3
],
img_height
),
0.
)
return
xmin
,
ymin
,
xmax
,
ymax
def
load_label_info
(
file_path
):
with
open
(
file_path
,
'r'
)
as
fr
:
text
=
fr
.
readlines
()
label_names
=
[]
for
info
in
text
:
label_names
.
append
(
info
.
strip
())
return
label_names
def
postprocess
(
paths
,
images
,
data_out
,
score_thresh
,
label_names
,
output_dir
,
handle_id
,
visualization
=
True
):
"""
postprocess the lod_tensor produced by fluid.Executor.run
Args:
paths (list[str]): the path of images.
images (list(numpy.ndarray)): list of images, shape of each is [H, W, C].
data_out (lod_tensor): data produced by executor.run.
score_thresh (float): the low limit of bounding box.
label_names (list[str]): label names.
output_dir (str): output directory.
handle_id (int): The number of images that have been handled.
visualization (bool): whether to save as images.
Returns:
res (list[dict]): The result of vehicles detecion. keys include 'data', 'save_path', the corresponding value is:
data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is:
left (float): The X coordinate of the upper left corner of the bounding box;
top (float): The Y coordinate of the upper left corner of the bounding box;
right (float): The X coordinate of the lower right corner of the bounding box;
bottom (float): The Y coordinate of the lower right corner of the bounding box;
label (str): The label of detection result;
confidence (float): The confidence of detection result.
save_path (str): The path to save output images.
"""
lod_tensor
=
data_out
[
0
]
lod
=
lod_tensor
.
lod
[
0
]
results
=
lod_tensor
.
as_ndarray
()
if
handle_id
<
len
(
paths
):
unhandled_paths
=
paths
[
handle_id
:]
unhandled_paths_num
=
len
(
unhandled_paths
)
else
:
unhandled_paths_num
=
0
output
=
[]
for
index
in
range
(
len
(
lod
)
-
1
):
output_i
=
{
'data'
:
[]}
if
index
<
unhandled_paths_num
:
org_img_path
=
unhandled_paths
[
index
]
org_img
=
Image
.
open
(
org_img_path
)
output_i
[
'path'
]
=
org_img_path
else
:
org_img
=
images
[
index
-
unhandled_paths_num
]
org_img
=
org_img
.
astype
(
np
.
uint8
)
org_img
=
Image
.
fromarray
(
org_img
[:,
:,
::
-
1
])
if
visualization
:
org_img_path
=
get_save_image_name
(
org_img
,
output_dir
,
'image_numpy_{}'
.
format
(
(
handle_id
+
index
)))
org_img
.
save
(
org_img_path
)
org_img_height
=
org_img
.
height
org_img_width
=
org_img
.
width
result_i
=
results
[
lod
[
index
]:
lod
[
index
+
1
]]
for
row
in
result_i
:
if
len
(
row
)
!=
6
:
continue
if
row
[
1
]
<
score_thresh
:
continue
category_id
=
int
(
row
[
0
])
confidence
=
row
[
1
]
bbox
=
row
[
2
:]
bbox
[
0
]
=
bbox
[
0
]
*
org_img_width
bbox
[
1
]
=
bbox
[
1
]
*
org_img_height
bbox
[
2
]
=
bbox
[
2
]
*
org_img_width
bbox
[
3
]
=
bbox
[
3
]
*
org_img_height
dt
=
{}
dt
[
'label'
]
=
label_names
[
category_id
]
dt
[
'confidence'
]
=
confidence
dt
[
'left'
],
dt
[
'top'
],
dt
[
'right'
],
dt
[
'bottom'
]
=
clip_bbox
(
bbox
,
org_img_width
,
org_img_height
)
output_i
[
'data'
].
append
(
dt
)
output
.
append
(
output_i
)
if
visualization
:
output_i
[
'save_path'
]
=
draw_bounding_box_on_image
(
org_img_path
,
output_i
[
'data'
],
output_dir
)
return
output
hub_module/modules/image/object_detection/ssd_vgg16_300_coco2017/README.md
0 → 100644
浏览文件 @
3acfe6bd
```
shell
$
hub
install
ssd_vgg16_300_coco2017
==
1.0.0
```
## 命令行预测
```
hub run ssd_vgg16_300_coco2017 --input_path "/PATH/TO/IMAGE"
```
## API
```
def context(trainable=True,
pretrained=True,
get_prediction=False)
```
特征提取,用于迁移学习。
**参数**
*
trainable(bool): 设置参数的 trainable 属性;
*
pretrained (bool): 是否加载预训练模型;
*
get
\_
prediction (bool): 是否执行预测。
**返回**
*
inputs (dict): 模型的输入,keys 包括 'image', 'im
\_
size',相应的取值为:
*
image (Variable): 图像变量
*
im
\_
size (Variable): 图片的尺寸
*
outputs (dict): 模型的输出。如果 get
\_
prediction 为 False,输出 'head
\_
fatures',否则输出 'bbox
\_
out'。
*
context
\_
prog (Program): 用于迁移学习的 Program.
```
python
def
object_detection
(
paths
=
None
,
images
=
None
,
batch_size
=
1
,
use_gpu
=
False
,
output_dir
=
'detection_result'
,
score_thresh
=
0.5
,
visualization
=
True
)
```
预测API,检测输入图片中的所有目标的位置。
**参数**
*
paths (list
\[
str
\]
): 图片的路径;
*
images (list
\[
numpy.ndarray
\]
): 图片数据,ndarray.shape 为
\[
H, W, C
\]
,BGR格式;
*
batch
\_
size (int): batch 的大小;
*
use
\_
gpu (bool): 是否使用 GPU;
*
score
\_
thresh (float): 识别置信度的阈值;
*
visualization (bool): 是否将识别结果保存为图片文件;
*
output
\_
dir (str): 图片的保存路径,默认设为 detection
\_
result;
**返回**
*
res (list
\[
dict
\]
): 识别结果的列表,列表中每一个元素为 dict,各字段为:
*
data (list): 检测结果,list的每一个元素为 dict,各字段为:
*
confidence (float): 识别的置信度;
*
label (str): 标签;
*
left (int): 边界框的左上角x坐标;
*
top (int): 边界框的左上角y坐标;
*
right (int): 边界框的右下角x坐标;
*
bottom (int): 边界框的右下角y坐标;
*
save
\_
path (str, optional): 识别结果的保存路径 (仅当visualization=True时存在)。
```
python
def
save_inference_model
(
dirname
,
model_filename
=
None
,
params_filename
=
None
,
combined
=
True
)
```
将模型保存到指定路径。
**参数**
*
dirname: 存在模型的目录名称
*
model
\_
filename: 模型文件名称,默认为
\_\_
model
\_\_
*
params
\_
filename: 参数文件名称,默认为
\_\_
params
\_\_
(仅当
`combined`
为True时生效)
*
combined: 是否将参数保存到统一的一个文件中
## 代码示例
```
python
import
paddlehub
as
hub
import
cv2
object_detector
=
hub
.
Module
(
name
=
"ssd_vgg16_300_coco2017"
)
result
=
object_detector
.
object_detection
(
images
=
[
cv2
.
imread
(
'/PATH/TO/IMAGE'
)])
# or
# result = object_detector.object_detection((paths=['/PATH/TO/IMAGE'])
```
## 服务部署
PaddleHub Serving可以部署一个目标检测的在线服务。
## 第一步:启动PaddleHub Serving
运行启动命令:
```
shell
$
hub serving start
-m
ssd_vgg16_300_coco2017
```
这样就完成了一个目标检测的服务化API的部署,默认端口号为8866。
**NOTE:**
如使用GPU预测,则需要在启动服务之前,请设置CUDA
\_
VISIBLE
\_
DEVICES环境变量,否则不用设置。
## 第二步:发送预测请求
配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果
```
python
import
requests
import
json
import
cv2
import
base64
def
cv2_to_base64
(
image
):
data
=
cv2
.
imencode
(
'.jpg'
,
image
)[
1
]
return
base64
.
b64encode
(
data
.
tostring
()).
decode
(
'utf8'
)
# 发送HTTP请求
data
=
{
'images'
:[
cv2_to_base64
(
cv2
.
imread
(
"/PATH/TO/IMAGE"
))]}
headers
=
{
"Content-type"
:
"application/json"
}
url
=
"http://127.0.0.1:8866/predict/ssd_vgg16_300_coco2017"
r
=
requests
.
post
(
url
=
url
,
headers
=
headers
,
data
=
json
.
dumps
(
data
))
# 打印预测结果
print
(
r
.
json
()[
"results"
])
```
### 依赖
paddlepaddle >= 1.6.2
paddlehub >= 1.6.0
hub_module/modules/image/object_detection/ssd/__init__.py
→
hub_module/modules/image/object_detection/ssd
_vgg16_300_coco2017
/__init__.py
浏览文件 @
3acfe6bd
文件已移动
hub_module/modules/image/object_detection/ssd_vgg16_300_coco2017/data_feed.py
0 → 100644
浏览文件 @
3acfe6bd
# coding=utf-8
from
__future__
import
absolute_import
from
__future__
import
print_function
from
__future__
import
division
import
os
import
random
from
collections
import
OrderedDict
import
cv2
import
numpy
as
np
from
PIL
import
Image
from
paddle
import
fluid
__all__
=
[
'reader'
]
class
DecodeImage
(
object
):
def
__init__
(
self
,
to_rgb
=
True
,
with_mixup
=
False
):
""" Transform the image data to numpy format.
Args:
to_rgb (bool): whether to convert BGR to RGB
with_mixup (bool): whether or not to mixup image and gt_bbbox/gt_score
"""
self
.
to_rgb
=
to_rgb
self
.
with_mixup
=
with_mixup
def
__call__
(
self
,
im
):
if
self
.
to_rgb
:
im
=
cv2
.
cvtColor
(
im
,
cv2
.
COLOR_BGR2RGB
)
return
im
class
ResizeImage
(
object
):
def
__init__
(
self
,
target_size
=
0
,
max_size
=
0
,
interp
=
cv2
.
INTER_LINEAR
,
use_cv2
=
True
):
"""
Rescale image to the specified target size, and capped at max_size
if max_size != 0.
If target_size is list, selected a scale randomly as the specified
target size.
Args:
target_size (int|list): the target size of image's short side,
multi-scale training is adopted when type is list.
max_size (int): the max size of image
interp (int): the interpolation method
use_cv2 (bool): use the cv2 interpolation method or use PIL
interpolation method
"""
self
.
max_size
=
int
(
max_size
)
self
.
interp
=
int
(
interp
)
self
.
use_cv2
=
use_cv2
self
.
target_size
=
target_size
def
__call__
(
self
,
im
):
if
not
isinstance
(
im
,
np
.
ndarray
):
raise
TypeError
(
"{}: image type is not numpy."
.
format
(
self
))
if
len
(
im
.
shape
)
!=
3
:
raise
ValueError
(
'{}: image is not 3-dimensional.'
.
format
(
self
))
im_shape
=
im
.
shape
im_size_min
=
np
.
min
(
im_shape
[
0
:
2
])
im_size_max
=
np
.
max
(
im_shape
[
0
:
2
])
if
isinstance
(
self
.
target_size
,
list
):
# Case for multi-scale training
selected_size
=
random
.
choice
(
self
.
target_size
)
else
:
selected_size
=
self
.
target_size
if
float
(
im_size_min
)
==
0
:
raise
ZeroDivisionError
(
'{}: min size of image is 0'
.
format
(
self
))
if
self
.
max_size
!=
0
:
im_scale
=
float
(
selected_size
)
/
float
(
im_size_min
)
# Prevent the biggest axis from being more than max_size
if
np
.
round
(
im_scale
*
im_size_max
)
>
self
.
max_size
:
im_scale
=
float
(
self
.
max_size
)
/
float
(
im_size_max
)
im_scale_x
=
im_scale
im_scale_y
=
im_scale
resize_w
=
im_scale_x
*
float
(
im_shape
[
1
])
resize_h
=
im_scale_y
*
float
(
im_shape
[
0
])
im_info
=
[
resize_h
,
resize_w
,
im_scale
]
else
:
im_scale_x
=
float
(
selected_size
)
/
float
(
im_shape
[
1
])
im_scale_y
=
float
(
selected_size
)
/
float
(
im_shape
[
0
])
resize_w
=
selected_size
resize_h
=
selected_size
if
self
.
use_cv2
:
im
=
cv2
.
resize
(
im
,
None
,
None
,
fx
=
im_scale_x
,
fy
=
im_scale_y
,
interpolation
=
self
.
interp
)
else
:
if
self
.
max_size
!=
0
:
raise
TypeError
(
'If you set max_size to cap the maximum size of image,'
'please set use_cv2 to True to resize the image.'
)
im
=
im
.
astype
(
'uint8'
)
im
=
Image
.
fromarray
(
im
)
im
=
im
.
resize
((
int
(
resize_w
),
int
(
resize_h
)),
self
.
interp
)
im
=
np
.
array
(
im
)
return
im
class
NormalizeImage
(
object
):
def
__init__
(
self
,
mean
=
[
0.485
,
0.456
,
0.406
],
std
=
[
1
,
1
,
1
],
is_scale
=
True
,
is_channel_first
=
True
):
"""
Args:
mean (list): the pixel mean
std (list): the pixel variance
"""
self
.
mean
=
mean
self
.
std
=
std
self
.
is_scale
=
is_scale
self
.
is_channel_first
=
is_channel_first
def
__call__
(
self
,
im
):
"""Normalize the image.
Operators:
1.(optional) Scale the image to [0,1]
2. Each pixel minus mean and is divided by std
"""
im
=
im
.
astype
(
np
.
float32
,
copy
=
False
)
if
self
.
is_channel_first
:
mean
=
np
.
array
(
self
.
mean
)[:,
np
.
newaxis
,
np
.
newaxis
]
std
=
np
.
array
(
self
.
std
)[:,
np
.
newaxis
,
np
.
newaxis
]
else
:
mean
=
np
.
array
(
self
.
mean
)[
np
.
newaxis
,
np
.
newaxis
,
:]
std
=
np
.
array
(
self
.
std
)[
np
.
newaxis
,
np
.
newaxis
,
:]
if
self
.
is_scale
:
im
=
im
/
255.0
im
-=
mean
im
/=
std
return
im
class
Permute
(
object
):
def
__init__
(
self
,
to_bgr
=
True
,
channel_first
=
True
):
"""
Change the channel.
Args:
to_bgr (bool): confirm whether to convert RGB to BGR
channel_first (bool): confirm whether to change channel
"""
self
.
to_bgr
=
to_bgr
self
.
channel_first
=
channel_first
def
__call__
(
self
,
im
):
if
self
.
channel_first
:
im
=
np
.
swapaxes
(
im
,
1
,
2
)
im
=
np
.
swapaxes
(
im
,
1
,
0
)
if
self
.
to_bgr
:
im
=
im
[[
2
,
1
,
0
],
:,
:]
return
im
def
reader
(
paths
=
[],
images
=
None
,
decode_image
=
DecodeImage
(
to_rgb
=
True
,
with_mixup
=
False
),
resize_image
=
ResizeImage
(
target_size
=
512
,
interp
=
1
,
max_size
=
0
,
use_cv2
=
False
),
permute_image
=
Permute
(
to_bgr
=
False
),
normalize_image
=
NormalizeImage
(
mean
=
[
104
,
117
,
123
],
std
=
[
1
,
1
,
1
],
is_scale
=
False
)):
"""
data generator
Args:
paths (list[str]): paths to images.
images (list(numpy.ndarray)): data of images, shape of each is [H, W, C]
decode_image (class object): instance of <class 'DecodeImage' object>
resize_image (class object): instance of <class 'ResizeImage' object>
permute_image (class object): instance of <class 'Permute' object>
normalize_image (class object): instance of <class 'NormalizeImage' object>
"""
img_list
=
[]
if
paths
is
not
None
:
assert
type
(
paths
)
is
list
,
"type(paths) is not list."
for
img_path
in
paths
:
assert
os
.
path
.
isfile
(
img_path
),
"The {} isn't a valid file path."
.
format
(
img_path
)
img
=
cv2
.
imread
(
img_path
).
astype
(
'float32'
)
img_list
.
append
(
img
)
if
images
is
not
None
:
for
img
in
images
:
img_list
.
append
(
img
)
resize_image
=
ResizeImage
(
target_size
=
300
,
interp
=
1
,
max_size
=
0
,
use_cv2
=
False
)
for
img
in
img_list
:
preprocessed_img
=
decode_image
(
img
)
preprocessed_img
=
resize_image
(
preprocessed_img
)
preprocessed_img
=
permute_image
(
preprocessed_img
)
preprocessed_img
=
normalize_image
(
preprocessed_img
)
yield
[
preprocessed_img
]
hub_module/modules/image/object_detection/ssd_vgg16_300_coco2017/module.py
浏览文件 @
3acfe6bd
# coding=utf-8
# coding=utf-8
from
__future__
import
absolute_import
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
os
import
ast
import
ast
import
argparse
import
argparse
import
os
from
functools
import
partial
from
functools
import
partial
import
yaml
import
numpy
as
np
import
numpy
as
np
import
paddle.fluid
as
fluid
import
paddle.fluid
as
fluid
import
paddlehub
as
hub
import
paddlehub
as
hub
from
paddlehub.module.module
import
moduleinfo
,
runnable
from
paddle.fluid.core
import
PaddleTensor
,
AnalysisConfig
,
create_paddle_predictor
from
paddle.fluid.core
import
PaddleTensor
,
AnalysisConfig
,
create_paddle_predictor
from
paddlehub.
io.parser
import
txt_parser
from
paddlehub.
module.module
import
moduleinfo
,
runnable
,
serving
import
yaml
from
paddlehub.common.paddle_helper
import
add_vars_prefix
from
ssd_vgg16_300_coco2017.vgg
import
VGG
from
ssd_vgg16_300_coco2017.vgg
import
VGG
from
ssd_vgg16_300_coco2017.processor
import
load_label_info
,
postprocess
,
base64_to_cv2
from
ssd_vgg16_300_coco2017.data_feed
import
reader
@
moduleinfo
(
@
moduleinfo
(
...
@@ -25,28 +25,22 @@ from ssd_vgg16_300_coco2017.vgg import VGG
...
@@ -25,28 +25,22 @@ from ssd_vgg16_300_coco2017.vgg import VGG
type
=
"cv/object_detection"
,
type
=
"cv/object_detection"
,
summary
=
"SSD with backbone VGG16, trained with dataset COCO."
,
summary
=
"SSD with backbone VGG16, trained with dataset COCO."
,
author
=
"paddlepaddle"
,
author
=
"paddlepaddle"
,
author_email
=
"
paddle-dev@baidu.com
"
)
author_email
=
""
)
class
SSDVGG16
(
hub
.
Module
):
class
SSDVGG16
(
hub
.
Module
):
def
_initialize
(
self
):
def
_initialize
(
self
):
self
.
ssd
=
hub
.
Module
(
name
=
"ssd"
)
# default pretrained model of SSD_VGG16, the shape of image tensor is (3, 300, 300)
self
.
default_pretrained_model_path
=
os
.
path
.
join
(
self
.
default_pretrained_model_path
=
os
.
path
.
join
(
self
.
directory
,
"ssd_vgg16_300_model"
)
self
.
directory
,
"ssd_vgg16_300_model"
)
self
.
label_names
=
self
.
ssd
.
load_label_info
(
self
.
label_names
=
load_label_info
(
os
.
path
.
join
(
self
.
directory
,
"label_file.txt"
))
os
.
path
.
join
(
self
.
directory
,
"label_file.txt"
))
self
.
infer_prog
=
None
self
.
model_config
=
None
self
.
image
=
None
self
.
bbox_out
=
None
self
.
_set_config
()
self
.
_set_config
()
self
.
_config
=
None
def
_set_config
(
self
):
def
_set_config
(
self
):
"""
# predictor config setting.
predictor config setting
"""
cpu_config
=
AnalysisConfig
(
self
.
default_pretrained_model_path
)
cpu_config
=
AnalysisConfig
(
self
.
default_pretrained_model_path
)
cpu_config
.
disable_glog_info
()
cpu_config
.
disable_glog_info
()
cpu_config
.
disable_gpu
()
cpu_config
.
disable_gpu
()
cpu_config
.
switch_ir_optim
(
False
)
self
.
cpu_predictor
=
create_paddle_predictor
(
cpu_config
)
self
.
cpu_predictor
=
create_paddle_predictor
(
cpu_config
)
try
:
try
:
...
@@ -61,25 +55,31 @@ class SSDVGG16(hub.Module):
...
@@ -61,25 +55,31 @@ class SSDVGG16(hub.Module):
gpu_config
.
enable_use_gpu
(
memory_pool_init_size_mb
=
500
,
device_id
=
0
)
gpu_config
.
enable_use_gpu
(
memory_pool_init_size_mb
=
500
,
device_id
=
0
)
self
.
gpu_predictor
=
create_paddle_predictor
(
gpu_config
)
self
.
gpu_predictor
=
create_paddle_predictor
(
gpu_config
)
def
context
(
self
,
# model config setting.
num_classes
=
81
,
if
not
self
.
model_config
:
trainable
=
True
,
with
open
(
os
.
path
.
join
(
self
.
directory
,
'config.yml'
))
as
fp
:
pretrained
=
True
,
self
.
model_config
=
yaml
.
load
(
fp
.
read
(),
Loader
=
yaml
.
FullLoader
)
get_prediction
=
False
):
"""Distill the Head Features, so as to perform transfer learning.
self
.
multi_box_head_config
=
self
.
model_config
[
'MultiBoxHead'
]
self
.
output_decoder_config
=
self
.
model_config
[
'SSDOutputDecoder'
]
def
context
(
self
,
trainable
=
True
,
pretrained
=
True
,
get_prediction
=
False
):
"""
Distill the Head Features, so as to perform transfer learning.
Args:
trainable (bool): whether to set parameters trainable.
pretrained (bool): whether to load default pretrained model.
get_prediction (bool): whether to get prediction.
:param trainable: whether to set parameters trainable.
Returns:
:type trainable: bool
inputs(dict): the input variables.
:param pretrained: whether to load default pretrained model.
outputs(dict): the output variables.
:type pretrained: bool
context_prog (Program): the program to execute transfer learning.
:param get_prediction: whether to get prediction,
if True, outputs is {'bbox_out': bbox_out},
if False, outputs is {'head_features': head_features}.
:type get_prediction: bool
"""
"""
wrapped
_prog
=
fluid
.
Program
()
context
_prog
=
fluid
.
Program
()
startup_program
=
fluid
.
Program
()
startup_program
=
fluid
.
Program
()
with
fluid
.
program_guard
(
wrapped
_prog
,
startup_program
):
with
fluid
.
program_guard
(
context
_prog
,
startup_program
):
with
fluid
.
unique_name
.
guard
():
with
fluid
.
unique_name
.
guard
():
# image
# image
image
=
fluid
.
layers
.
data
(
image
=
fluid
.
layers
.
data
(
...
@@ -89,21 +89,60 @@ class SSDVGG16(hub.Module):
...
@@ -89,21 +89,60 @@ class SSDVGG16(hub.Module):
depth
=
16
,
depth
=
16
,
with_extra_blocks
=
True
,
with_extra_blocks
=
True
,
normalizations
=
[
20.
,
-
1
,
-
1
,
-
1
,
-
1
,
-
1
])
normalizations
=
[
20.
,
-
1
,
-
1
,
-
1
,
-
1
,
-
1
])
# body_feats
body_feats
=
backbone
(
image
)
body_feats
=
backbone
(
image
)
# call ssd.context
# im_size
inputs
,
outputs
,
context_prog
=
self
.
ssd
.
context
(
im_size
=
fluid
.
layers
.
data
(
body_feats
=
body_feats
,
name
=
'im_size'
,
shape
=
[
2
],
dtype
=
'int32'
)
multi_box_head
=
self
.
ssd
.
MultiBoxHead
(
# var_prefix
num_classes
=
num_classes
,
**
self
.
multi_box_head_config
),
var_prefix
=
'@HUB_{}@'
.
format
(
self
.
name
)
ssd_output_decoder
=
self
.
ssd
.
SSDOutputDecoder
(
# names of inputs
**
self
.
output_decoder_config
),
inputs
=
{
'image'
:
var_prefix
+
image
.
name
,
'im_size'
:
var_prefix
+
im_size
.
name
}
# names of outputs
if
get_prediction
:
locs
,
confs
,
box
,
box_var
=
fluid
.
layers
.
multi_box_head
(
inputs
=
body_feats
,
image
=
image
,
image
=
image
,
trainable
=
trainable
,
num_classes
=
81
,
var_prefix
=
'@HUB_{}@'
.
format
(
self
.
name
),
**
self
.
multi_box_head_config
)
get_prediction
=
get_prediction
)
pred
=
fluid
.
layers
.
detection_output
(
loc
=
locs
,
scores
=
confs
,
prior_box
=
box
,
prior_box_var
=
box_var
,
**
self
.
output_decoder_config
)
outputs
=
{
'bbox_out'
:
[
var_prefix
+
pred
.
name
]}
else
:
outputs
=
{
'body_features'
:
[
var_prefix
+
var
.
name
for
var
in
body_feats
]
}
# add_vars_prefix
add_vars_prefix
(
context_prog
,
var_prefix
)
add_vars_prefix
(
fluid
.
default_startup_program
(),
var_prefix
)
# inputs
inputs
=
{
key
:
context_prog
.
global_block
().
vars
[
value
]
for
key
,
value
in
inputs
.
items
()
}
outputs
=
{
out_key
:
[
context_prog
.
global_block
().
vars
[
varname
]
for
varname
in
out_value
]
for
out_key
,
out_value
in
outputs
.
items
()
}
# trainable
for
param
in
context_prog
.
global_block
().
iter_parameters
():
param
.
trainable
=
trainable
place
=
fluid
.
CPUPlace
()
place
=
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
place
)
exe
=
fluid
.
Executor
(
place
)
# pretrained
if
pretrained
:
if
pretrained
:
def
_if_exist
(
var
):
def
_if_exist
(
var
):
...
@@ -117,67 +156,52 @@ class SSDVGG16(hub.Module):
...
@@ -117,67 +156,52 @@ class SSDVGG16(hub.Module):
predicate
=
_if_exist
)
predicate
=
_if_exist
)
else
:
else
:
exe
.
run
(
startup_program
)
exe
.
run
(
startup_program
)
return
inputs
,
outputs
,
context_prog
@
property
def
config
(
self
):
if
not
self
.
_config
:
with
open
(
os
.
path
.
join
(
self
.
directory
,
'config.yml'
))
as
file
:
self
.
_config
=
yaml
.
load
(
file
.
read
(),
Loader
=
yaml
.
FullLoader
)
return
self
.
_config
@
property
return
inputs
,
outputs
,
context_prog
def
multi_box_head_config
(
self
):
return
self
.
config
[
'MultiBoxHead'
]
@
property
def
output_decoder_config
(
self
):
return
self
.
config
[
'SSDOutputDecoder'
]
def
object_detection
(
self
,
def
object_detection
(
self
,
paths
=
None
,
paths
=
None
,
images
=
None
,
images
=
None
,
use_gpu
=
False
,
batch_size
=
1
,
batch_size
=
1
,
use_gpu
=
False
,
output_dir
=
'detection_result'
,
output_dir
=
'detection_result'
,
score_thresh
=
0.5
,
score_thresh
=
0.5
,
visualization
=
True
):
visualization
=
True
):
"""API of Object Detection.
"""API of Object Detection.
:param paths: the path of images.
Args:
:type paths: list, each element is correspond to the path of an image.
paths (list[str]): The paths of images.
:param images: data of images, [N, H, W, C]
images (list(numpy.ndarray)): images data, shape of each is [H, W, C]
:type images: numpy.ndarray
batch_size (int): batch size.
:param use_gpu: whether to use gpu or not.
use_gpu (bool): Whether to use gpu.
:type use_gpu: bool
output_dir (str): The path to store output images.
:param batch_size: bathc size.
visualization (bool): Whether to save image or not.
:type batch_size: int
score_thresh (float): threshold for object detecion.
:param output_dir: the directory to store the detection result.
:type output_dir: str
Returns:
:param score_thresh: the threshold of detection confidence.
res (list[dict]): The result of coco2017 detecion. keys include 'data', 'save_path', the corresponding value is:
:type score_thresh: float
data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is:
:param visualization: whether to draw bounding box and save images.
left (float): The X coordinate of the upper left corner of the bounding box;
:type visualization: bool
top (float): The Y coordinate of the upper left corner of the bounding box;
right (float): The X coordinate of the lower right corner of the bounding box;
bottom (float): The Y coordinate of the lower right corner of the bounding box;
label (str): The label of detection result;
confidence (float): The confidence of detection result.
save_path (str, optional): The path to save output images.
"""
"""
resize_image
=
self
.
ssd
.
ResizeImage
(
paths
=
paths
if
paths
else
list
()
target_size
=
300
,
interp
=
1
,
max_size
=
0
,
use_cv2
=
False
)
data_reader
=
partial
(
reader
,
paths
,
images
)
data_reader
=
partial
(
self
.
ssd
.
reader
,
paths
,
images
,
resize_image
=
resize_image
)
batch_reader
=
fluid
.
io
.
batch
(
data_reader
,
batch_size
=
batch_size
)
batch_reader
=
fluid
.
io
.
batch
(
data_reader
,
batch_size
=
batch_size
)
paths
=
paths
if
paths
else
[]
res
=
[]
res
=
[]
for
iter_id
,
feed_data
in
enumerate
(
batch_reader
()):
for
iter_id
,
feed_data
in
enumerate
(
batch_reader
()):
np_data
=
np
.
array
(
feed_data
).
astype
(
'float32'
)
feed_data
=
np
.
array
(
feed_data
)
if
np_data
.
shape
==
1
:
image_tensor
=
PaddleTensor
(
np
.
array
(
list
(
feed_data
[:,
0
])).
copy
())
np_data
=
np_data
[
0
]
else
:
np_data
=
np
.
squeeze
(
np_data
,
axis
=
1
)
data_tensor
=
PaddleTensor
(
np_data
.
copy
())
if
use_gpu
:
if
use_gpu
:
data_out
=
self
.
gpu_predictor
.
run
([
data
_tensor
])
data_out
=
self
.
gpu_predictor
.
run
([
image
_tensor
])
else
:
else
:
data_out
=
self
.
cpu_predictor
.
run
([
data_tensor
])
data_out
=
self
.
cpu_predictor
.
run
([
image_tensor
])
output
=
self
.
ssd
.
postprocess
(
output
=
postprocess
(
paths
=
paths
,
paths
=
paths
,
images
=
images
,
images
=
images
,
data_out
=
data_out
,
data_out
=
data_out
,
...
@@ -186,53 +210,49 @@ class SSDVGG16(hub.Module):
...
@@ -186,53 +210,49 @@ class SSDVGG16(hub.Module):
output_dir
=
output_dir
,
output_dir
=
output_dir
,
handle_id
=
iter_id
*
batch_size
,
handle_id
=
iter_id
*
batch_size
,
visualization
=
visualization
)
visualization
=
visualization
)
res
+=
output
res
.
extend
(
output
)
return
res
return
res
def
add_module_config_arg
(
self
):
def
save_inference_model
(
self
,
"""
dirname
,
Add the command config options
model_filename
=
None
,
"""
params_filename
=
None
,
self
.
arg_config_group
.
add_argument
(
combined
=
True
):
'--use_gpu'
,
if
combined
:
type
=
ast
.
literal_eval
,
model_filename
=
"__model__"
if
not
model_filename
else
model_filename
default
=
False
,
params_filename
=
"__params__"
if
not
params_filename
else
params_filename
help
=
"whether use GPU or not"
)
place
=
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
place
)
self
.
arg_config_group
.
add_argument
(
program
,
feeded_var_names
,
target_vars
=
fluid
.
io
.
load_inference_model
(
'--batch_size'
,
dirname
=
self
.
default_pretrained_model_path
,
executor
=
exe
)
type
=
int
,
default
=
1
,
help
=
"batch size for prediction"
)
def
add_module_input_arg
(
self
):
fluid
.
io
.
save_inference_model
(
dirname
=
dirname
,
main_program
=
program
,
executor
=
exe
,
feeded_var_names
=
feeded_var_names
,
target_vars
=
target_vars
,
model_filename
=
model_filename
,
params_filename
=
params_filename
)
@
serving
def
serving_method
(
self
,
images
,
**
kwargs
):
"""
"""
Add the command input options
Run as a service.
"""
"""
self
.
arg_input_group
.
add_argument
(
images_decode
=
[
base64_to_cv2
(
image
)
for
image
in
images
]
'--input_path'
,
type
=
str
,
default
=
None
,
help
=
"input data"
)
results
=
self
.
object_detection
(
images_decode
,
**
kwargs
)
self
.
arg_input_group
.
add_argument
(
return
results
'--input_file'
,
type
=
str
,
default
=
None
,
help
=
"file contain input data"
)
def
check_input_data
(
self
,
args
):
input_data
=
[]
if
args
.
input_path
:
input_data
=
[
args
.
input_path
]
elif
args
.
input_file
:
if
not
os
.
path
.
exists
(
args
.
input_file
):
raise
RuntimeError
(
"File %s is not exist."
%
args
.
input_file
)
else
:
input_data
=
txt_parser
.
parse
(
args
.
input_file
,
use_strip
=
True
)
return
input_data
@
runnable
@
runnable
def
run_cmd
(
self
,
argvs
):
def
run_cmd
(
self
,
argvs
):
"""
Run as a command.
"""
self
.
parser
=
argparse
.
ArgumentParser
(
self
.
parser
=
argparse
.
ArgumentParser
(
description
=
"Run the {}"
.
format
(
self
.
name
),
description
=
"Run the {}
module.
"
.
format
(
self
.
name
),
prog
=
"hub run {}"
.
format
(
self
.
name
),
prog
=
'hub run {}'
.
format
(
self
.
name
),
usage
=
'%(prog)s'
,
usage
=
'%(prog)s'
,
add_help
=
True
)
add_help
=
True
)
self
.
arg_input_group
=
self
.
parser
.
add_argument_group
(
self
.
arg_input_group
=
self
.
parser
.
add_argument_group
(
...
@@ -242,18 +262,50 @@ class SSDVGG16(hub.Module):
...
@@ -242,18 +262,50 @@ class SSDVGG16(hub.Module):
description
=
description
=
"Run configuration for controlling module behavior, not required."
)
"Run configuration for controlling module behavior, not required."
)
self
.
add_module_config_arg
()
self
.
add_module_config_arg
()
self
.
add_module_input_arg
()
self
.
add_module_input_arg
()
args
=
self
.
parser
.
parse_args
(
argvs
)
args
=
self
.
parser
.
parse_args
(
argvs
)
input_path
=
args
.
input_path
results
=
self
.
face_detection
(
input_data
=
self
.
check_input_data
(
args
)
paths
=
[
args
.
input_path
],
if
len
(
input_data
)
==
0
:
batch_size
=
args
.
batch_size
,
self
.
parser
.
print_help
()
use_gpu
=
args
.
use_gpu
,
exit
(
1
)
output_dir
=
args
.
output_dir
,
else
:
visualization
=
args
.
visualization
,
for
image_path
in
input_data
:
score_thresh
=
args
.
score_thresh
)
if
not
os
.
path
.
exists
(
image_path
):
return
results
raise
RuntimeError
(
"File %s or %s is not exist."
%
image_path
)
def
add_module_config_arg
(
self
):
return
self
.
object_detection
(
"""
paths
=
input_data
,
use_gpu
=
args
.
use_gpu
,
batch_size
=
args
.
batch_size
)
Add the command config options.
"""
self
.
arg_config_group
.
add_argument
(
'--use_gpu'
,
type
=
ast
.
literal_eval
,
default
=
False
,
help
=
"whether use GPU or not"
)
self
.
arg_config_group
.
add_argument
(
'--output_dir'
,
type
=
str
,
default
=
'detection_result'
,
help
=
"The directory to save output images."
)
self
.
arg_config_group
.
add_argument
(
'--visualization'
,
type
=
ast
.
literal_eval
,
default
=
False
,
help
=
"whether to save output as images."
)
def
add_module_input_arg
(
self
):
"""
Add the command input options.
"""
self
.
arg_input_group
.
add_argument
(
'--input_path'
,
type
=
str
,
help
=
"path to image."
)
self
.
arg_input_group
.
add_argument
(
'--batch_size'
,
type
=
ast
.
literal_eval
,
default
=
1
,
help
=
"batch size."
)
self
.
arg_input_group
.
add_argument
(
'--score_thresh'
,
type
=
ast
.
literal_eval
,
default
=
0.5
,
help
=
"threshold for object detecion."
)
hub_module/modules/image/object_detection/ssd_vgg16_300_coco2017/processor.py
0 → 100644
浏览文件 @
3acfe6bd
# coding=utf-8
import
base64
import
os
import
cv2
import
numpy
as
np
from
PIL
import
Image
,
ImageDraw
__all__
=
[
'base64_to_cv2'
,
'load_label_info'
,
'postprocess'
]
def
base64_to_cv2
(
b64str
):
data
=
base64
.
b64decode
(
b64str
.
encode
(
'utf8'
))
data
=
np
.
fromstring
(
data
,
np
.
uint8
)
data
=
cv2
.
imdecode
(
data
,
cv2
.
IMREAD_COLOR
)
return
data
def
get_save_image_name
(
img
,
output_dir
,
image_path
):
"""
Get save image name from source image path.
"""
if
not
os
.
path
.
exists
(
output_dir
):
os
.
makedirs
(
output_dir
)
image_name
=
os
.
path
.
split
(
image_path
)[
-
1
]
name
,
ext
=
os
.
path
.
splitext
(
image_name
)
if
img
.
format
==
'PNG'
:
ext
=
'.png'
elif
img
.
format
==
'JPEG'
:
ext
=
'.jpg'
elif
img
.
format
==
'BMP'
:
ext
=
'.bmp'
else
:
if
img
.
mode
==
"RGB"
or
img
.
mode
==
"L"
:
ext
=
".jpg"
elif
img
.
mode
==
"RGBA"
or
img
.
mode
==
"P"
:
ext
=
'.png'
return
os
.
path
.
join
(
output_dir
,
"{}"
.
format
(
name
))
+
ext
def
draw_bounding_box_on_image
(
image_path
,
data_list
,
save_dir
):
image
=
Image
.
open
(
image_path
)
draw
=
ImageDraw
.
Draw
(
image
)
for
data
in
data_list
:
left
,
right
,
top
,
bottom
=
data
[
'left'
],
data
[
'right'
],
data
[
'top'
],
data
[
'bottom'
]
# draw bbox
draw
.
line
([(
left
,
top
),
(
left
,
bottom
),
(
right
,
bottom
),
(
right
,
top
),
(
left
,
top
)],
width
=
2
,
fill
=
'red'
)
# draw label
if
image
.
mode
==
'RGB'
:
text
=
data
[
'label'
]
+
": %.2f%%"
%
(
100
*
data
[
'confidence'
])
textsize_width
,
textsize_height
=
draw
.
textsize
(
text
=
text
)
draw
.
rectangle
(
xy
=
(
left
,
top
-
(
textsize_height
+
5
),
left
+
textsize_width
+
10
,
top
),
fill
=
(
255
,
255
,
255
))
draw
.
text
(
xy
=
(
left
,
top
-
15
),
text
=
text
,
fill
=
(
0
,
0
,
0
))
save_name
=
get_save_image_name
(
image
,
save_dir
,
image_path
)
if
os
.
path
.
exists
(
save_name
):
os
.
remove
(
save_name
)
image
.
save
(
save_name
)
return
save_name
def
clip_bbox
(
bbox
,
img_width
,
img_height
):
xmin
=
max
(
min
(
bbox
[
0
],
img_width
),
0.
)
ymin
=
max
(
min
(
bbox
[
1
],
img_height
),
0.
)
xmax
=
max
(
min
(
bbox
[
2
],
img_width
),
0.
)
ymax
=
max
(
min
(
bbox
[
3
],
img_height
),
0.
)
return
xmin
,
ymin
,
xmax
,
ymax
def
load_label_info
(
file_path
):
with
open
(
file_path
,
'r'
)
as
fr
:
text
=
fr
.
readlines
()
label_names
=
[]
for
info
in
text
:
label_names
.
append
(
info
.
strip
())
return
label_names
def
postprocess
(
paths
,
images
,
data_out
,
score_thresh
,
label_names
,
output_dir
,
handle_id
,
visualization
=
True
):
"""
postprocess the lod_tensor produced by fluid.Executor.run
Args:
paths (list[str]): the path of images.
images (list(numpy.ndarray)): list of images, shape of each is [H, W, C].
data_out (lod_tensor): data produced by executor.run.
score_thresh (float): the low limit of bounding box.
label_names (list[str]): label names.
output_dir (str): output directory.
handle_id (int): The number of images that have been handled.
visualization (bool): whether to save as images.
Returns:
res (list[dict]): The result of vehicles detecion. keys include 'data', 'save_path', the corresponding value is:
data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is:
left (float): The X coordinate of the upper left corner of the bounding box;
top (float): The Y coordinate of the upper left corner of the bounding box;
right (float): The X coordinate of the lower right corner of the bounding box;
bottom (float): The Y coordinate of the lower right corner of the bounding box;
label (str): The label of detection result;
confidence (float): The confidence of detection result.
save_path (str): The path to save output images.
"""
lod_tensor
=
data_out
[
0
]
lod
=
lod_tensor
.
lod
[
0
]
results
=
lod_tensor
.
as_ndarray
()
if
handle_id
<
len
(
paths
):
unhandled_paths
=
paths
[
handle_id
:]
unhandled_paths_num
=
len
(
unhandled_paths
)
else
:
unhandled_paths_num
=
0
output
=
[]
for
index
in
range
(
len
(
lod
)
-
1
):
output_i
=
{
'data'
:
[]}
if
index
<
unhandled_paths_num
:
org_img_path
=
unhandled_paths
[
index
]
org_img
=
Image
.
open
(
org_img_path
)
output_i
[
'path'
]
=
org_img_path
else
:
org_img
=
images
[
index
-
unhandled_paths_num
]
org_img
=
org_img
.
astype
(
np
.
uint8
)
org_img
=
Image
.
fromarray
(
org_img
[:,
:,
::
-
1
])
if
visualization
:
org_img_path
=
get_save_image_name
(
org_img
,
output_dir
,
'image_numpy_{}'
.
format
(
(
handle_id
+
index
)))
org_img
.
save
(
org_img_path
)
org_img_height
=
org_img
.
height
org_img_width
=
org_img
.
width
result_i
=
results
[
lod
[
index
]:
lod
[
index
+
1
]]
for
row
in
result_i
:
if
len
(
row
)
!=
6
:
continue
if
row
[
1
]
<
score_thresh
:
continue
category_id
=
int
(
row
[
0
])
confidence
=
row
[
1
]
bbox
=
row
[
2
:]
bbox
[
0
]
=
bbox
[
0
]
*
org_img_width
bbox
[
1
]
=
bbox
[
1
]
*
org_img_height
bbox
[
2
]
=
bbox
[
2
]
*
org_img_width
bbox
[
3
]
=
bbox
[
3
]
*
org_img_height
dt
=
{}
dt
[
'label'
]
=
label_names
[
category_id
]
dt
[
'confidence'
]
=
confidence
dt
[
'left'
],
dt
[
'top'
],
dt
[
'right'
],
dt
[
'bottom'
]
=
clip_bbox
(
bbox
,
org_img_width
,
org_img_height
)
output_i
[
'data'
].
append
(
dt
)
output
.
append
(
output_i
)
if
visualization
:
output_i
[
'save_path'
]
=
draw_bounding_box_on_image
(
org_img_path
,
output_i
[
'data'
],
output_dir
)
return
output
hub_module/modules/image/object_detection/ssd_vgg16_512_coco2017/README.md
0 → 100644
浏览文件 @
3acfe6bd
```
shell
$
hub
install
ssd_vgg16_512_coco2017
==
1.0.0
```
## 命令行预测
```
hub run ssd_vgg16_512_coco2017 --input_path "/PATH/TO/IMAGE"
```
## API
```
def context(trainable=True,
pretrained=True,
get_prediction=False)
```
特征提取,用于迁移学习。
**参数**
*
trainable(bool): 设置参数的 trainable 属性;
*
pretrained (bool): 是否加载预训练模型;
*
get
\_
prediction (bool): 是否执行预测。
**返回**
*
inputs (dict): 模型的输入,keys 包括 'image', 'im
\_
size',相应的取值为:
*
image (Variable): 图像变量
*
im
\_
size (Variable): 图片的尺寸
*
outputs (dict): 模型的输出。如果 get
\_
prediction 为 False,输出 'head
\_
fatures',否则输出 'bbox
\_
out'。
*
context
\_
prog (Program): 用于迁移学习的 Program.
```
python
def
object_detection
(
paths
=
None
,
images
=
None
,
batch_size
=
1
,
use_gpu
=
False
,
output_dir
=
'detection_result'
,
score_thresh
=
0.5
,
visualization
=
True
)
```
预测API,检测输入图片中的所有目标的位置。
**参数**
*
paths (list
\[
str
\]
): 图片的路径;
*
images (list
\[
numpy.ndarray
\]
): 图片数据,ndarray.shape 为
\[
H, W, C
\]
,BGR格式;
*
batch
\_
size (int): batch 的大小;
*
use
\_
gpu (bool): 是否使用 GPU;
*
score
\_
thresh (float): 识别置信度的阈值;
*
visualization (bool): 是否将识别结果保存为图片文件;
*
output
\_
dir (str): 图片的保存路径,默认设为 detection
\_
result;
**返回**
*
res (list
\[
dict
\]
): 识别结果的列表,列表中每一个元素为 dict,各字段为:
*
data (list): 检测结果,list的每一个元素为 dict,各字段为:
*
confidence (float): 识别的置信度;
*
label (str): 标签;
*
left (int): 边界框的左上角x坐标;
*
top (int): 边界框的左上角y坐标;
*
right (int): 边界框的右下角x坐标;
*
bottom (int): 边界框的右下角y坐标;
*
save
\_
path (str, optional): 识别结果的保存路径 (仅当visualization=True时存在)。
```
python
def
save_inference_model
(
dirname
,
model_filename
=
None
,
params_filename
=
None
,
combined
=
True
)
```
将模型保存到指定路径。
**参数**
*
dirname: 存在模型的目录名称
*
model
\_
filename: 模型文件名称,默认为
\_\_
model
\_\_
*
params
\_
filename: 参数文件名称,默认为
\_\_
params
\_\_
(仅当
`combined`
为True时生效)
*
combined: 是否将参数保存到统一的一个文件中
## 代码示例
```
python
import
paddlehub
as
hub
import
cv2
object_detector
=
hub
.
Module
(
name
=
"ssd_vgg16_512_coco2017"
)
result
=
object_detector
.
object_detection
(
images
=
[
cv2
.
imread
(
'/PATH/TO/IMAGE'
)])
# or
# result = object_detector.object_detection((paths=['/PATH/TO/IMAGE'])
```
## 服务部署
PaddleHub Serving可以部署一个目标检测的在线服务。
## 第一步:启动PaddleHub Serving
运行启动命令:
```
shell
$
hub serving start
-m
ssd_vgg16_512_coco2017
```
这样就完成了一个目标检测的服务化API的部署,默认端口号为8866。
**NOTE:**
如使用GPU 预测,则需要在启动服务之前,请设置CUDA
\_
VISIBLE
\_
DEVICES环境变量,否则不用设置。
## 第二步:发送预测请求
配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果
```
python
import
requests
import
json
import
cv2
import
base64
def
cv2_to_base64
(
image
):
data
=
cv2
.
imencode
(
'.jpg'
,
image
)[
1
]
return
base64
.
b64encode
(
data
.
tostring
()).
decode
(
'utf8'
)
# 发送HTTP请求
data
=
{
'images'
:[
cv2_to_base64
(
cv2
.
imread
(
"/PATH/TO/IMAGE"
))]}
headers
=
{
"Content-type"
:
"application/json"
}
url
=
"http://127.0.0.1:8866/predict/ssd_vgg16_512_coco2017"
r
=
requests
.
post
(
url
=
url
,
headers
=
headers
,
data
=
json
.
dumps
(
data
))
# 打印预测结果
print
(
r
.
json
()[
"results"
])
```
### 依赖
paddlepaddle >= 1.6.2
paddlehub >= 1.6.0
hub_module/modules/image/object_detection/ssd_vgg16_512_coco2017/__init__.py
0 → 100644
浏览文件 @
3acfe6bd
hub_module/modules/image/object_detection/ssd/data_feed.py
→
hub_module/modules/image/object_detection/ssd
_vgg16_512_coco2017
/data_feed.py
浏览文件 @
3acfe6bd
...
@@ -9,10 +9,10 @@ from collections import OrderedDict
...
@@ -9,10 +9,10 @@ from collections import OrderedDict
import
cv2
import
cv2
import
numpy
as
np
import
numpy
as
np
from
PIL
import
Image
,
ImageEnhance
from
PIL
import
Image
from
paddle
import
fluid
from
paddle
import
fluid
__all__
=
[
'reader'
,
'DecodeImage'
,
'ResizeImage'
,
'NormalizeImage'
,
'Permute'
]
__all__
=
[
'reader'
]
class
DecodeImage
(
object
):
class
DecodeImage
(
object
):
...
@@ -59,8 +59,6 @@ class ResizeImage(object):
...
@@ -59,8 +59,6 @@ class ResizeImage(object):
self
.
target_size
=
target_size
self
.
target_size
=
target_size
def
__call__
(
self
,
im
):
def
__call__
(
self
,
im
):
""" Resize the image numpy.
"""
if
not
isinstance
(
im
,
np
.
ndarray
):
if
not
isinstance
(
im
,
np
.
ndarray
):
raise
TypeError
(
"{}: image type is not numpy."
.
format
(
self
))
raise
TypeError
(
"{}: image type is not numpy."
.
format
(
self
))
if
len
(
im
.
shape
)
!=
3
:
if
len
(
im
.
shape
)
!=
3
:
...
@@ -132,6 +130,7 @@ class NormalizeImage(object):
...
@@ -132,6 +130,7 @@ class NormalizeImage(object):
def
__call__
(
self
,
im
):
def
__call__
(
self
,
im
):
"""Normalize the image.
"""Normalize the image.
Operators:
Operators:
1.(optional) Scale the image to [0,1]
1.(optional) Scale the image to [0,1]
2. Each pixel minus mean and is divided by std
2. Each pixel minus mean and is divided by std
...
@@ -154,6 +153,7 @@ class Permute(object):
...
@@ -154,6 +153,7 @@ class Permute(object):
def
__init__
(
self
,
to_bgr
=
True
,
channel_first
=
True
):
def
__init__
(
self
,
to_bgr
=
True
,
channel_first
=
True
):
"""
"""
Change the channel.
Change the channel.
Args:
Args:
to_bgr (bool): confirm whether to convert RGB to BGR
to_bgr (bool): confirm whether to convert RGB to BGR
channel_first (bool): confirm whether to change channel
channel_first (bool): confirm whether to change channel
...
@@ -178,16 +178,16 @@ def reader(paths=[],
...
@@ -178,16 +178,16 @@ def reader(paths=[],
permute_image
=
Permute
(
to_bgr
=
False
),
permute_image
=
Permute
(
to_bgr
=
False
),
normalize_image
=
NormalizeImage
(
normalize_image
=
NormalizeImage
(
mean
=
[
104
,
117
,
123
],
std
=
[
1
,
1
,
1
],
is_scale
=
False
)):
mean
=
[
104
,
117
,
123
],
std
=
[
1
,
1
,
1
],
is_scale
=
False
)):
"""
data generator
"""
data generator
:param paths: path to images.
:type paths: list, each element is a str
Args:
:param images: data of images, [N, H, W, C]
paths (list[str]): paths to images.
:type images: numpy.ndarray
images (list(numpy.ndarray)): data of images, shape of each is [H, W, C]
:param decode_image
: instance of <class 'DecodeImage' object>
decode_image (class object)
: instance of <class 'DecodeImage' object>
:param resize_image
: instance of <class 'ResizeImage' object>
resize_image (class object)
: instance of <class 'ResizeImage' object>
:param permute_image
: instance of <class 'Permute' object>
permute_image (class object)
: instance of <class 'Permute' object>
:param normalize_image
: instance of <class 'NormalizeImage' object>
normalize_image (class object)
: instance of <class 'NormalizeImage' object>
"""
"""
img_list
=
[]
img_list
=
[]
if
paths
is
not
None
:
if
paths
is
not
None
:
...
...
hub_module/modules/image/object_detection/ssd_vgg16_512_coco2017/module.py
浏览文件 @
3acfe6bd
# coding=utf-8
# coding=utf-8
from
__future__
import
absolute_import
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
os
import
ast
import
ast
import
argparse
import
argparse
import
os
from
functools
import
partial
from
functools
import
partial
import
yaml
import
numpy
as
np
import
numpy
as
np
import
paddle.fluid
as
fluid
import
paddle.fluid
as
fluid
import
paddlehub
as
hub
import
paddlehub
as
hub
from
paddlehub.module.module
import
moduleinfo
,
runnable
from
paddle.fluid.core
import
PaddleTensor
,
AnalysisConfig
,
create_paddle_predictor
from
paddle.fluid.core
import
PaddleTensor
,
AnalysisConfig
,
create_paddle_predictor
from
paddlehub.
io.parser
import
txt_parser
from
paddlehub.
module.module
import
moduleinfo
,
runnable
,
serving
import
yaml
from
paddlehub.common.paddle_helper
import
add_vars_prefix
from
ssd_vgg16_512_coco2017.vgg
import
VGG
from
ssd_vgg16_512_coco2017.vgg
import
VGG
from
ssd_vgg16_512_coco2017.processor
import
load_label_info
,
postprocess
,
base64_to_cv2
from
ssd_vgg16_512_coco2017.data_feed
import
reader
@
moduleinfo
(
@
moduleinfo
(
...
@@ -25,25 +25,18 @@ from ssd_vgg16_512_coco2017.vgg import VGG
...
@@ -25,25 +25,18 @@ from ssd_vgg16_512_coco2017.vgg import VGG
type
=
"cv/object_detection"
,
type
=
"cv/object_detection"
,
summary
=
"SSD with backbone VGG16, trained with dataset COCO."
,
summary
=
"SSD with backbone VGG16, trained with dataset COCO."
,
author
=
"paddlepaddle"
,
author
=
"paddlepaddle"
,
author_email
=
"
paddle-dev@baidu.com
"
)
author_email
=
""
)
class
SSDVGG16
(
hub
.
Module
):
class
SSDVGG16
_512
(
hub
.
Module
):
def
_initialize
(
self
):
def
_initialize
(
self
):
self
.
ssd
=
hub
.
Module
(
name
=
"ssd"
)
# default pretrained model of SSD, the shape of input image tensor is (3, 512, 512)
self
.
default_pretrained_model_path
=
os
.
path
.
join
(
self
.
default_pretrained_model_path
=
os
.
path
.
join
(
self
.
directory
,
"ssd_vgg16_512_model"
)
self
.
directory
,
"ssd_vgg16_512_model"
)
self
.
label_names
=
self
.
ssd
.
load_label_info
(
self
.
label_names
=
load_label_info
(
os
.
path
.
join
(
self
.
directory
,
"label_file.txt"
))
os
.
path
.
join
(
self
.
directory
,
"label_file.txt"
))
self
.
infer_prog
=
None
self
.
model_config
=
None
self
.
image
=
None
self
.
bbox_out
=
None
self
.
_set_config
()
self
.
_set_config
()
self
.
_config
=
None
def
_set_config
(
self
):
def
_set_config
(
self
):
"""
# predictor config setting.
predictor config setting
"""
cpu_config
=
AnalysisConfig
(
self
.
default_pretrained_model_path
)
cpu_config
=
AnalysisConfig
(
self
.
default_pretrained_model_path
)
cpu_config
.
disable_glog_info
()
cpu_config
.
disable_glog_info
()
cpu_config
.
disable_gpu
()
cpu_config
.
disable_gpu
()
...
@@ -62,25 +55,31 @@ class SSDVGG16(hub.Module):
...
@@ -62,25 +55,31 @@ class SSDVGG16(hub.Module):
gpu_config
.
enable_use_gpu
(
memory_pool_init_size_mb
=
500
,
device_id
=
0
)
gpu_config
.
enable_use_gpu
(
memory_pool_init_size_mb
=
500
,
device_id
=
0
)
self
.
gpu_predictor
=
create_paddle_predictor
(
gpu_config
)
self
.
gpu_predictor
=
create_paddle_predictor
(
gpu_config
)
def
context
(
self
,
# model config setting.
num_classes
=
81
,
if
not
self
.
model_config
:
trainable
=
True
,
with
open
(
os
.
path
.
join
(
self
.
directory
,
'config.yml'
))
as
fp
:
pretrained
=
True
,
self
.
model_config
=
yaml
.
load
(
fp
.
read
(),
Loader
=
yaml
.
FullLoader
)
get_prediction
=
False
):
"""Distill the Head Features, so as to perform transfer learning.
self
.
multi_box_head_config
=
self
.
model_config
[
'MultiBoxHead'
]
self
.
output_decoder_config
=
self
.
model_config
[
'SSDOutputDecoder'
]
def
context
(
self
,
trainable
=
True
,
pretrained
=
True
,
get_prediction
=
False
):
"""
Distill the Head Features, so as to perform transfer learning.
Args:
trainable (bool): whether to set parameters trainable.
pretrained (bool): whether to load default pretrained model.
get_prediction (bool): whether to get prediction.
:param trainable: whether to set parameters trainable.
Returns:
:type trainable: bool
inputs(dict): the input variables.
:param pretrained: whether to load default pretrained model.
outputs(dict): the output variables.
:type pretrained: bool
context_prog (Program): the program to execute transfer learning.
:param get_prediction: whether to get prediction,
if True, outputs is {'bbox_out': bbox_out},
if False, outputs is {'head_features': head_features}.
:type get_prediction: bool
"""
"""
wrapped
_prog
=
fluid
.
Program
()
context
_prog
=
fluid
.
Program
()
startup_program
=
fluid
.
Program
()
startup_program
=
fluid
.
Program
()
with
fluid
.
program_guard
(
wrapped
_prog
,
startup_program
):
with
fluid
.
program_guard
(
context
_prog
,
startup_program
):
with
fluid
.
unique_name
.
guard
():
with
fluid
.
unique_name
.
guard
():
# image
# image
image
=
fluid
.
layers
.
data
(
image
=
fluid
.
layers
.
data
(
...
@@ -95,21 +94,60 @@ class SSDVGG16(hub.Module):
...
@@ -95,21 +94,60 @@ class SSDVGG16(hub.Module):
[
128
,
256
,
1
,
2
,
[
128
,
256
,
1
,
2
,
3
],
[
128
,
256
,
1
,
2
,
3
],
3
],
[
128
,
256
,
1
,
2
,
3
],
[
128
,
256
,
1
,
1
,
4
]])
[
128
,
256
,
1
,
1
,
4
]])
# body_feats
body_feats
=
backbone
(
image
)
body_feats
=
backbone
(
image
)
# call ssd.context
# im_size
inputs
,
outputs
,
context_prog
=
self
.
ssd
.
context
(
im_size
=
fluid
.
layers
.
data
(
body_feats
=
body_feats
,
name
=
'im_size'
,
shape
=
[
2
],
dtype
=
'int32'
)
multi_box_head
=
self
.
ssd
.
MultiBoxHead
(
# var_prefix
num_classes
=
num_classes
,
**
self
.
multi_box_head_config
),
var_prefix
=
'@HUB_{}@'
.
format
(
self
.
name
)
ssd_output_decoder
=
self
.
ssd
.
SSDOutputDecoder
(
# names of inputs
**
self
.
output_decoder_config
),
inputs
=
{
'image'
:
var_prefix
+
image
.
name
,
'im_size'
:
var_prefix
+
im_size
.
name
}
# names of outputs
if
get_prediction
:
locs
,
confs
,
box
,
box_var
=
fluid
.
layers
.
multi_box_head
(
inputs
=
body_feats
,
image
=
image
,
image
=
image
,
trainable
=
trainable
,
num_classes
=
81
,
var_prefix
=
'@HUB_{}@'
.
format
(
self
.
name
),
**
self
.
multi_box_head_config
)
get_prediction
=
get_prediction
)
pred
=
fluid
.
layers
.
detection_output
(
loc
=
locs
,
scores
=
confs
,
prior_box
=
box
,
prior_box_var
=
box_var
,
**
self
.
output_decoder_config
)
outputs
=
{
'bbox_out'
:
[
var_prefix
+
pred
.
name
]}
else
:
outputs
=
{
'body_features'
:
[
var_prefix
+
var
.
name
for
var
in
body_feats
]
}
# add_vars_prefix
add_vars_prefix
(
context_prog
,
var_prefix
)
add_vars_prefix
(
fluid
.
default_startup_program
(),
var_prefix
)
# inputs
inputs
=
{
key
:
context_prog
.
global_block
().
vars
[
value
]
for
key
,
value
in
inputs
.
items
()
}
outputs
=
{
out_key
:
[
context_prog
.
global_block
().
vars
[
varname
]
for
varname
in
out_value
]
for
out_key
,
out_value
in
outputs
.
items
()
}
# trainable
for
param
in
context_prog
.
global_block
().
iter_parameters
():
param
.
trainable
=
trainable
place
=
fluid
.
CPUPlace
()
place
=
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
place
)
exe
=
fluid
.
Executor
(
place
)
# pretrained
if
pretrained
:
if
pretrained
:
def
_if_exist
(
var
):
def
_if_exist
(
var
):
...
@@ -123,67 +161,52 @@ class SSDVGG16(hub.Module):
...
@@ -123,67 +161,52 @@ class SSDVGG16(hub.Module):
predicate
=
_if_exist
)
predicate
=
_if_exist
)
else
:
else
:
exe
.
run
(
startup_program
)
exe
.
run
(
startup_program
)
return
inputs
,
outputs
,
context_prog
@
property
def
config
(
self
):
if
not
self
.
_config
:
with
open
(
os
.
path
.
join
(
self
.
directory
,
'config.yml'
))
as
file
:
self
.
_config
=
yaml
.
load
(
file
.
read
(),
Loader
=
yaml
.
FullLoader
)
return
self
.
_config
@
property
return
inputs
,
outputs
,
context_prog
def
multi_box_head_config
(
self
):
return
self
.
config
[
'MultiBoxHead'
]
@
property
def
output_decoder_config
(
self
):
return
self
.
config
[
'SSDOutputDecoder'
]
def
object_detection
(
self
,
def
object_detection
(
self
,
paths
=
None
,
paths
=
None
,
images
=
None
,
images
=
None
,
use_gpu
=
False
,
batch_size
=
1
,
batch_size
=
1
,
use_gpu
=
False
,
output_dir
=
'detection_result'
,
output_dir
=
'detection_result'
,
score_thresh
=
0.5
,
score_thresh
=
0.5
,
visualization
=
True
):
visualization
=
True
):
"""API of Object Detection.
"""API of Object Detection.
:param paths: the path of images.
Args:
:type paths: list, each element is correspond to the path of an image.
paths (list[str]): The paths of images.
:param images: data of images, [N, H, W, C]
images (list(numpy.ndarray)): images data, shape of each is [H, W, C]
:type images: numpy.ndarray
batch_size (int): batch size.
:param use_gpu: whether to use gpu or not.
use_gpu (bool): Whether to use gpu.
:type use_gpu: bool
output_dir (str): The path to store output images.
:param batch_size: bathc size.
visualization (bool): Whether to save image or not.
:type batch_size: int
score_thresh (float): threshold for object detecion.
:param output_dir: the directory to store the detection result.
:type output_dir: str
Returns:
:param score_thresh: the threshold of detection confidence.
res (list[dict]): The result of coco2017 detecion. keys include 'data', 'save_path', the corresponding value is:
:type score_thresh: float
data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is:
:param visualization: whether to draw bounding box and save images.
left (float): The X coordinate of the upper left corner of the bounding box;
:type visualization: bool
top (float): The Y coordinate of the upper left corner of the bounding box;
right (float): The X coordinate of the lower right corner of the bounding box;
bottom (float): The Y coordinate of the lower right corner of the bounding box;
label (str): The label of detection result;
confidence (float): The confidence of detection result.
save_path (str, optional): The path to save output images.
"""
"""
resize_image
=
self
.
ssd
.
ResizeImage
(
paths
=
paths
if
paths
else
list
()
target_size
=
300
,
interp
=
1
,
max_size
=
0
,
use_cv2
=
False
)
data_reader
=
partial
(
reader
,
paths
,
images
)
data_reader
=
partial
(
self
.
ssd
.
reader
,
paths
,
images
,
resize_image
=
resize_image
)
batch_reader
=
fluid
.
io
.
batch
(
data_reader
,
batch_size
=
batch_size
)
batch_reader
=
fluid
.
io
.
batch
(
data_reader
,
batch_size
=
batch_size
)
paths
=
paths
if
paths
else
[]
res
=
[]
res
=
[]
for
iter_id
,
feed_data
in
enumerate
(
batch_reader
()):
for
iter_id
,
feed_data
in
enumerate
(
batch_reader
()):
np_data
=
np
.
array
(
feed_data
).
astype
(
'float32'
)
feed_data
=
np
.
array
(
feed_data
)
if
np_data
.
shape
==
1
:
image_tensor
=
PaddleTensor
(
np
.
array
(
list
(
feed_data
[:,
0
])).
copy
())
np_data
=
np_data
[
0
]
else
:
np_data
=
np
.
squeeze
(
np_data
,
axis
=
1
)
data_tensor
=
PaddleTensor
(
np_data
.
copy
())
if
use_gpu
:
if
use_gpu
:
data_out
=
self
.
gpu_predictor
.
run
([
data
_tensor
])
data_out
=
self
.
gpu_predictor
.
run
([
image
_tensor
])
else
:
else
:
data_out
=
self
.
cpu_predictor
.
run
([
data_tensor
])
data_out
=
self
.
cpu_predictor
.
run
([
image_tensor
])
output
=
self
.
ssd
.
postprocess
(
output
=
postprocess
(
paths
=
paths
,
paths
=
paths
,
images
=
images
,
images
=
images
,
data_out
=
data_out
,
data_out
=
data_out
,
...
@@ -192,54 +215,49 @@ class SSDVGG16(hub.Module):
...
@@ -192,54 +215,49 @@ class SSDVGG16(hub.Module):
output_dir
=
output_dir
,
output_dir
=
output_dir
,
handle_id
=
iter_id
*
batch_size
,
handle_id
=
iter_id
*
batch_size
,
visualization
=
visualization
)
visualization
=
visualization
)
res
+=
output
res
.
extend
(
output
)
return
res
return
res
def
add_module_config_arg
(
self
):
def
save_inference_model
(
self
,
"""
dirname
,
Add the command config options
model_filename
=
None
,
"""
params_filename
=
None
,
self
.
arg_config_group
.
add_argument
(
combined
=
True
):
'--use_gpu'
,
if
combined
:
type
=
ast
.
literal_eval
,
model_filename
=
"__model__"
if
not
model_filename
else
model_filename
default
=
False
,
params_filename
=
"__params__"
if
not
params_filename
else
params_filename
help
=
"whether use GPU or not"
)
place
=
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
place
)
self
.
arg_config_group
.
add_argument
(
program
,
feeded_var_names
,
target_vars
=
fluid
.
io
.
load_inference_model
(
'--batch_size'
,
dirname
=
self
.
default_pretrained_model_path
,
executor
=
exe
)
type
=
int
,
default
=
1
,
help
=
"batch size for prediction"
)
def
add_module_input_arg
(
self
):
fluid
.
io
.
save_inference_model
(
dirname
=
dirname
,
main_program
=
program
,
executor
=
exe
,
feeded_var_names
=
feeded_var_names
,
target_vars
=
target_vars
,
model_filename
=
model_filename
,
params_filename
=
params_filename
)
@
serving
def
serving_method
(
self
,
images
,
**
kwargs
):
"""
"""
Add the command input options
Run as a service.
"""
"""
self
.
arg_input_group
.
add_argument
(
images_decode
=
[
base64_to_cv2
(
image
)
for
image
in
images
]
'--input_path'
,
type
=
str
,
default
=
None
,
help
=
"input data"
)
results
=
self
.
object_detection
(
images_decode
,
**
kwargs
)
return
results
self
.
arg_input_group
.
add_argument
(
'--input_file'
,
type
=
str
,
default
=
None
,
help
=
"file contain input data"
)
def
check_input_data
(
self
,
args
):
input_data
=
[]
if
args
.
input_path
:
input_data
=
[
args
.
input_path
]
elif
args
.
input_file
:
if
not
os
.
path
.
exists
(
args
.
input_file
):
raise
RuntimeError
(
"File %s is not exist."
%
args
.
input_file
)
else
:
input_data
=
txt_parser
.
parse
(
args
.
input_file
,
use_strip
=
True
)
return
input_data
@
runnable
@
runnable
def
run_cmd
(
self
,
argvs
):
def
run_cmd
(
self
,
argvs
):
"""
Run as a command.
"""
self
.
parser
=
argparse
.
ArgumentParser
(
self
.
parser
=
argparse
.
ArgumentParser
(
description
=
"Run the {}"
.
format
(
self
.
name
),
description
=
"Run the {}
module.
"
.
format
(
self
.
name
),
prog
=
"hub run {}"
.
format
(
self
.
name
),
prog
=
'hub run {}'
.
format
(
self
.
name
),
usage
=
'%(prog)s'
,
usage
=
'%(prog)s'
,
add_help
=
True
)
add_help
=
True
)
self
.
arg_input_group
=
self
.
parser
.
add_argument_group
(
self
.
arg_input_group
=
self
.
parser
.
add_argument_group
(
...
@@ -251,14 +269,48 @@ class SSDVGG16(hub.Module):
...
@@ -251,14 +269,48 @@ class SSDVGG16(hub.Module):
self
.
add_module_config_arg
()
self
.
add_module_config_arg
()
self
.
add_module_input_arg
()
self
.
add_module_input_arg
()
args
=
self
.
parser
.
parse_args
(
argvs
)
args
=
self
.
parser
.
parse_args
(
argvs
)
input_data
=
self
.
check_input_data
(
args
)
results
=
self
.
face_detection
(
if
len
(
input_data
)
==
0
:
paths
=
[
args
.
input_path
],
self
.
parser
.
print_help
()
batch_size
=
args
.
batch_size
,
exit
(
1
)
use_gpu
=
args
.
use_gpu
,
else
:
output_dir
=
args
.
output_dir
,
for
image_path
in
input_data
:
visualization
=
args
.
visualization
,
if
not
os
.
path
.
exists
(
image_path
):
score_thresh
=
args
.
score_thresh
)
raise
RuntimeError
(
return
results
"File %s or %s is not exist."
%
image_path
)
return
self
.
object_detection
(
def
add_module_config_arg
(
self
):
paths
=
input_data
,
use_gpu
=
args
.
use_gpu
,
batch_size
=
args
.
batch_size
)
"""
Add the command config options.
"""
self
.
arg_config_group
.
add_argument
(
'--use_gpu'
,
type
=
ast
.
literal_eval
,
default
=
False
,
help
=
"whether use GPU or not"
)
self
.
arg_config_group
.
add_argument
(
'--output_dir'
,
type
=
str
,
default
=
'detection_result'
,
help
=
"The directory to save output images."
)
self
.
arg_config_group
.
add_argument
(
'--visualization'
,
type
=
ast
.
literal_eval
,
default
=
False
,
help
=
"whether to save output as images."
)
def
add_module_input_arg
(
self
):
"""
Add the command input options.
"""
self
.
arg_input_group
.
add_argument
(
'--input_path'
,
type
=
str
,
help
=
"path to image."
)
self
.
arg_input_group
.
add_argument
(
'--batch_size'
,
type
=
ast
.
literal_eval
,
default
=
1
,
help
=
"batch size."
)
self
.
arg_input_group
.
add_argument
(
'--score_thresh'
,
type
=
ast
.
literal_eval
,
default
=
0.5
,
help
=
"threshold for object detecion."
)
hub_module/modules/image/object_detection/ssd_vgg16_512_coco2017/processor.py
0 → 100644
浏览文件 @
3acfe6bd
# coding=utf-8
import
base64
import
os
import
cv2
import
numpy
as
np
from
PIL
import
Image
,
ImageDraw
__all__
=
[
'base64_to_cv2'
,
'load_label_info'
,
'postprocess'
]
def
base64_to_cv2
(
b64str
):
data
=
base64
.
b64decode
(
b64str
.
encode
(
'utf8'
))
data
=
np
.
fromstring
(
data
,
np
.
uint8
)
data
=
cv2
.
imdecode
(
data
,
cv2
.
IMREAD_COLOR
)
return
data
def
get_save_image_name
(
img
,
output_dir
,
image_path
):
"""
Get save image name from source image path.
"""
if
not
os
.
path
.
exists
(
output_dir
):
os
.
makedirs
(
output_dir
)
image_name
=
os
.
path
.
split
(
image_path
)[
-
1
]
name
,
ext
=
os
.
path
.
splitext
(
image_name
)
if
img
.
format
==
'PNG'
:
ext
=
'.png'
elif
img
.
format
==
'JPEG'
:
ext
=
'.jpg'
elif
img
.
format
==
'BMP'
:
ext
=
'.bmp'
else
:
if
img
.
mode
==
"RGB"
or
img
.
mode
==
"L"
:
ext
=
".jpg"
elif
img
.
mode
==
"RGBA"
or
img
.
mode
==
"P"
:
ext
=
'.png'
return
os
.
path
.
join
(
output_dir
,
"{}"
.
format
(
name
))
+
ext
def
draw_bounding_box_on_image
(
image_path
,
data_list
,
save_dir
):
image
=
Image
.
open
(
image_path
)
draw
=
ImageDraw
.
Draw
(
image
)
for
data
in
data_list
:
left
,
right
,
top
,
bottom
=
data
[
'left'
],
data
[
'right'
],
data
[
'top'
],
data
[
'bottom'
]
# draw bbox
draw
.
line
([(
left
,
top
),
(
left
,
bottom
),
(
right
,
bottom
),
(
right
,
top
),
(
left
,
top
)],
width
=
2
,
fill
=
'red'
)
# draw label
if
image
.
mode
==
'RGB'
:
text
=
data
[
'label'
]
+
": %.2f%%"
%
(
100
*
data
[
'confidence'
])
textsize_width
,
textsize_height
=
draw
.
textsize
(
text
=
text
)
draw
.
rectangle
(
xy
=
(
left
,
top
-
(
textsize_height
+
5
),
left
+
textsize_width
+
10
,
top
),
fill
=
(
255
,
255
,
255
))
draw
.
text
(
xy
=
(
left
,
top
-
15
),
text
=
text
,
fill
=
(
0
,
0
,
0
))
save_name
=
get_save_image_name
(
image
,
save_dir
,
image_path
)
if
os
.
path
.
exists
(
save_name
):
os
.
remove
(
save_name
)
image
.
save
(
save_name
)
return
save_name
def
clip_bbox
(
bbox
,
img_width
,
img_height
):
xmin
=
max
(
min
(
bbox
[
0
],
img_width
),
0.
)
ymin
=
max
(
min
(
bbox
[
1
],
img_height
),
0.
)
xmax
=
max
(
min
(
bbox
[
2
],
img_width
),
0.
)
ymax
=
max
(
min
(
bbox
[
3
],
img_height
),
0.
)
return
xmin
,
ymin
,
xmax
,
ymax
def
load_label_info
(
file_path
):
with
open
(
file_path
,
'r'
)
as
fr
:
text
=
fr
.
readlines
()
label_names
=
[]
for
info
in
text
:
label_names
.
append
(
info
.
strip
())
return
label_names
def
postprocess
(
paths
,
images
,
data_out
,
score_thresh
,
label_names
,
output_dir
,
handle_id
,
visualization
=
True
):
"""
postprocess the lod_tensor produced by fluid.Executor.run
Args:
paths (list[str]): the path of images.
images (list(numpy.ndarray)): list of images, shape of each is [H, W, C].
data_out (lod_tensor): data produced by executor.run.
score_thresh (float): the low limit of bounding box.
label_names (list[str]): label names.
output_dir (str): output directory.
handle_id (int): The number of images that have been handled.
visualization (bool): whether to save as images.
Returns:
res (list[dict]): The result of vehicles detecion. keys include 'data', 'save_path', the corresponding value is:
data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is:
left (float): The X coordinate of the upper left corner of the bounding box;
top (float): The Y coordinate of the upper left corner of the bounding box;
right (float): The X coordinate of the lower right corner of the bounding box;
bottom (float): The Y coordinate of the lower right corner of the bounding box;
label (str): The label of detection result;
confidence (float): The confidence of detection result.
save_path (str): The path to save output images.
"""
lod_tensor
=
data_out
[
0
]
lod
=
lod_tensor
.
lod
[
0
]
results
=
lod_tensor
.
as_ndarray
()
if
handle_id
<
len
(
paths
):
unhandled_paths
=
paths
[
handle_id
:]
unhandled_paths_num
=
len
(
unhandled_paths
)
else
:
unhandled_paths_num
=
0
output
=
[]
for
index
in
range
(
len
(
lod
)
-
1
):
output_i
=
{
'data'
:
[]}
if
index
<
unhandled_paths_num
:
org_img_path
=
unhandled_paths
[
index
]
org_img
=
Image
.
open
(
org_img_path
)
output_i
[
'path'
]
=
org_img_path
else
:
org_img
=
images
[
index
-
unhandled_paths_num
]
org_img
=
org_img
.
astype
(
np
.
uint8
)
org_img
=
Image
.
fromarray
(
org_img
[:,
:,
::
-
1
])
if
visualization
:
org_img_path
=
get_save_image_name
(
org_img
,
output_dir
,
'image_numpy_{}'
.
format
(
(
handle_id
+
index
)))
org_img
.
save
(
org_img_path
)
org_img_height
=
org_img
.
height
org_img_width
=
org_img
.
width
result_i
=
results
[
lod
[
index
]:
lod
[
index
+
1
]]
for
row
in
result_i
:
if
len
(
row
)
!=
6
:
continue
if
row
[
1
]
<
score_thresh
:
continue
category_id
=
int
(
row
[
0
])
confidence
=
row
[
1
]
bbox
=
row
[
2
:]
bbox
[
0
]
=
bbox
[
0
]
*
org_img_width
bbox
[
1
]
=
bbox
[
1
]
*
org_img_height
bbox
[
2
]
=
bbox
[
2
]
*
org_img_width
bbox
[
3
]
=
bbox
[
3
]
*
org_img_height
dt
=
{}
dt
[
'label'
]
=
label_names
[
category_id
]
dt
[
'confidence'
]
=
confidence
dt
[
'left'
],
dt
[
'top'
],
dt
[
'right'
],
dt
[
'bottom'
]
=
clip_bbox
(
bbox
,
org_img_width
,
org_img_height
)
output_i
[
'data'
].
append
(
dt
)
output
.
append
(
output_i
)
if
visualization
:
output_i
[
'save_path'
]
=
draw_bounding_box_on_image
(
org_img_path
,
output_i
[
'data'
],
output_dir
)
return
output
hub_module/modules/image/object_detection/yolov3/module.py
已删除
100644 → 0
浏览文件 @
6a477596
# coding=utf-8
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
from
collections
import
OrderedDict
import
paddle.fluid
as
fluid
import
paddlehub
as
hub
from
paddlehub.module.module
import
moduleinfo
from
paddlehub.common.paddle_helper
import
add_vars_prefix
from
yolov3.data_feed
import
reader
from
yolov3.processor
import
load_label_info
,
postprocess
from
yolov3.yolo_head
import
MultiClassNMS
,
YOLOv3Head
@
moduleinfo
(
name
=
"yolov3"
,
version
=
"1.0.0"
,
type
=
"cv/object_detection"
,
summary
=
"Baidu's YOLOv3 model for object detection."
,
author
=
"paddlepaddle"
,
author_email
=
"paddle-dev@baidu.com"
)
class
YOLOv3
(
hub
.
Module
):
def
_initialize
(
self
):
self
.
reader
=
reader
self
.
load_label_info
=
load_label_info
self
.
postprocess
=
postprocess
self
.
MultiClassNMS
=
MultiClassNMS
self
.
YOLOv3Head
=
YOLOv3Head
def
context
(
self
,
body_feats
,
yolo_head
,
image
,
trainable
=
True
,
var_prefix
=
''
,
get_prediction
=
False
):
"""
Distill the Head Features, so as to perform transfer learning.
Args:
body_feats (feature maps of backbone): feature maps of backbone.
yolo_head (<class 'YOLOv3Head' object>): yolo_head of YOLOv3
image (Variable): image tensor.
trainable (bool): whether to set parameters trainable.
var_prefix (str): the prefix of variables in yolo_head and backbone.
get_prediction (bool): whether to get prediction or not.
Returns:
inputs(dict): the input variables.
outputs(dict): the output variables.
context_prog (Program): the program to execute transfer learning.
"""
context_prog
=
image
.
block
.
program
with
fluid
.
program_guard
(
context_prog
):
im_size
=
fluid
.
layers
.
data
(
name
=
'im_size'
,
shape
=
[
2
],
dtype
=
'int32'
)
head_features
=
yolo_head
.
_get_outputs
(
body_feats
,
is_train
=
trainable
)
inputs
=
{
'image'
:
var_prefix
+
image
.
name
,
'im_size'
:
var_prefix
+
im_size
.
name
}
if
get_prediction
:
bbox_out
=
yolo_head
.
get_prediction
(
head_features
,
im_size
)
outputs
=
{
'bbox_out'
:
[
var_prefix
+
bbox_out
.
name
]}
else
:
outputs
=
{
'head_features'
:
[
var_prefix
+
var
.
name
for
var
in
head_features
]
}
place
=
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
place
)
exe
.
run
(
fluid
.
default_startup_program
())
add_vars_prefix
(
context_prog
,
var_prefix
)
add_vars_prefix
(
fluid
.
default_startup_program
(),
var_prefix
)
inputs
=
{
key
:
context_prog
.
global_block
().
vars
[
value
]
for
key
,
value
in
inputs
.
items
()
}
outputs
=
{
key
:
[
context_prog
.
global_block
().
vars
[
varname
]
for
varname
in
value
]
for
key
,
value
in
outputs
.
items
()
}
for
param
in
context_prog
.
global_block
().
iter_parameters
():
param
.
trainable
=
trainable
return
inputs
,
outputs
,
context_prog
hub_module/modules/image/object_detection/yolov3_darknet53_coco2017/README.md
0 → 100644
浏览文件 @
3acfe6bd
```
shell
$
hub
install
yolov3_darknet53_coco2017
==
1.1.0
```
## 命令行预测
```
hub run yolov3_darknet53_coco2017 --input_path "/PATH/TO/IMAGE"
```
## API
```
def context(trainable=True,
pretrained=True,
get_prediction=False)
```
特征提取,用于迁移学习。
**参数**
*
trainable(bool): 将参数的trainable属性设为trainable;
*
pretrained (bool): 是否加载预训练模型;
*
get
\_
prediction (bool): 是否执行预测。
**返回**
*
inputs (dict): 模型的输入,keys 包括 'image', 'im
\_
size',相应的取值为:
*
image (Variable): 图像变量
*
im
\_
size (Variable): 图片的尺寸
*
outputs (dict): 模型的输出。如果 get
\_
prediction 为 False,输出 'head
\_
fatures',否则输出 'bbox
\_
out'。
*
context
\_
prog (Program): 用于迁移学习的 Program.
```
python
def
object_detection
(
paths
=
None
,
images
=
None
,
batch_size
=
1
,
use_gpu
=
False
,
output_dir
=
'detection_result'
,
score_thresh
=
0.5
,
visualization
=
True
)
```
预测API,检测输入图片中的所有目标的位置。
**参数**
*
paths (list
\[
str
\]
): 图片的路径;
*
images (list
\[
numpy.ndarray
\]
): 图片数据,ndarray.shape 为
\[
H, W, C
\]
,BGR格式;
*
batch
\_
size (int): batch 的大小;
*
use
\_
gpu (bool): 是否使用 GPU;
*
score
\_
thresh (float): 识别置信度的阈值;
*
visualization (bool): 是否将识别结果保存为图片文件;
*
output
\_
dir (str): 图片的保存路径,默认设为 detection
\_
result;
**返回**
*
res (list
\[
dict
\]
): 识别结果的列表,列表中每一个元素为 dict,各字段为:
*
data (list): 检测结果,list的每一个元素为 dict,各字段为:
*
confidence (float): 识别的置信度;
*
label (str): 标签;
*
left (int): 边界框的左上角x坐标;
*
top (int): 边界框的左上角y坐标;
*
right (int): 边界框的右下角x坐标;
*
bottom (int): 边界框的右下角y坐标;
*
save
\_
path (str, optional): 识别结果的保存路径 (仅当visualization=True时存在)。
```
python
def
save_inference_model
(
dirname
,
model_filename
=
None
,
params_filename
=
None
,
combined
=
True
)
```
将模型保存到指定路径。
**参数**
*
dirname: 存在模型的目录名称
*
model
\_
filename: 模型文件名称,默认为
\_\_
model
\_\_
*
params
\_
filename: 参数文件名称,默认为
\_\_
params
\_\_
(仅当
`combined`
为True时生效)
*
combined: 是否将参数保存到统一的一个文件中
## 代码示例
```
python
import
paddlehub
as
hub
import
cv2
object_detector
=
hub
.
Module
(
name
=
"yolov3_darknet53_coco2017"
)
result
=
object_detector
.
object_detection
(
images
=
[
cv2
.
imread
(
'/PATH/TO/IMAGE'
)])
# or
# result = object_detector.object_detection((paths=['/PATH/TO/IMAGE'])
```
## 服务部署
PaddleHub Serving可以部署一个目标检测的在线服务。
## 第一步:启动PaddleHub Serving
运行启动命令:
```
shell
$
hub serving start
-m
yolov3_darknet53_coco2017
```
这样就完成了一个目标检测的服务化API的部署,默认端口号为8866。
**NOTE:**
如使用GPU预测,则需要在启动服务之前,请设置CUDA
\_
VISIBLE
\_
DEVICES环境变量,否则不用设置。
## 第二步:发送预测请求
配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果
```
python
import
requests
import
json
import
cv2
import
base64
def
cv2_to_base64
(
image
):
data
=
cv2
.
imencode
(
'.jpg'
,
image
)[
1
]
return
base64
.
b64encode
(
data
.
tostring
()).
decode
(
'utf8'
)
# 发送HTTP请求
data
=
{
'images'
:[
cv2_to_base64
(
cv2
.
imread
(
"/PATH/TO/IMAGE"
))]}
headers
=
{
"Content-type"
:
"application/json"
}
url
=
"http://127.0.0.1:8866/predict/yolov3_darknet53_coco2017"
r
=
requests
.
post
(
url
=
url
,
headers
=
headers
,
data
=
json
.
dumps
(
data
))
# 打印预测结果
print
(
r
.
json
()[
"results"
])
```
### 依赖
paddlepaddle >= 1.6.2
paddlehub >= 1.6.0
hub_module/modules/image/object_detection/yolov3_darknet53_coco2017/darknet.py
浏览文件 @
3acfe6bd
...
@@ -15,6 +15,7 @@ __all__ = ['DarkNet']
...
@@ -15,6 +15,7 @@ __all__ = ['DarkNet']
class
DarkNet
(
object
):
class
DarkNet
(
object
):
"""DarkNet, see https://pjreddie.com/darknet/yolo/
"""DarkNet, see https://pjreddie.com/darknet/yolo/
Args:
Args:
depth (int): network depth, currently only darknet 53 is supported
depth (int): network depth, currently only darknet 53 is supported
norm_type (str): normalization type, 'bn' and 'sync_bn' are supported
norm_type (str): normalization type, 'bn' and 'sync_bn' are supported
...
@@ -120,11 +121,8 @@ class DarkNet(object):
...
@@ -120,11 +121,8 @@ class DarkNet(object):
return
out
return
out
def
__call__
(
self
,
input
):
def
__call__
(
self
,
input
):
"""Get the backbone of DarkNet, that is output for the 5 stages.
"""
Get the backbone of DarkNet, that is output for the 5 stages.
:param input: Variable of input image
:type input: Variable
:Returns: The last variables of each stage.
"""
"""
stages
,
block_func
=
self
.
depth_cfg
[
self
.
depth
]
stages
,
block_func
=
self
.
depth_cfg
[
self
.
depth
]
stages
=
stages
[
0
:
5
]
stages
=
stages
[
0
:
5
]
...
...
hub_module/modules/image/object_detection/yolov3/data_feed.py
→
hub_module/modules/image/object_detection/yolov3
_darknet53_coco2017
/data_feed.py
浏览文件 @
3acfe6bd
...
@@ -12,12 +12,15 @@ __all__ = ['reader']
...
@@ -12,12 +12,15 @@ __all__ = ['reader']
def
reader
(
paths
=
[],
images
=
None
):
def
reader
(
paths
=
[],
images
=
None
):
"""data generator
"""
data generator
Args:
paths (list[str]): paths to images.
images (list(numpy.ndarray)): data of images, shape of each is [H, W, C]
:param paths: path to images.
Yield:
:type paths: list, each element is a str
res (list): preprocessed image and the size of original image.
:param images: data of images, [N, H, W, C]
:type images: numpy.ndarray
"""
"""
img_list
=
[]
img_list
=
[]
if
paths
:
if
paths
:
...
...
hub_module/modules/image/object_detection/yolov3_darknet53_coco2017/module.py
浏览文件 @
3acfe6bd
# coding=utf-8
# coding=utf-8
from
__future__
import
absolute_import
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
os
import
ast
import
ast
import
argparse
import
argparse
import
os
from
functools
import
partial
from
functools
import
partial
import
numpy
as
np
import
numpy
as
np
import
paddle.fluid
as
fluid
import
paddle.fluid
as
fluid
import
paddlehub
as
hub
import
paddlehub
as
hub
from
paddlehub.module.module
import
moduleinfo
,
runnable
from
paddle.fluid.core
import
PaddleTensor
,
AnalysisConfig
,
create_paddle_predictor
from
paddle.fluid.core
import
PaddleTensor
,
AnalysisConfig
,
create_paddle_predictor
from
paddlehub.io.parser
import
txt_parser
from
paddlehub.module.module
import
moduleinfo
,
runnable
,
serving
from
paddlehub.common.paddle_helper
import
add_vars_prefix
from
yolov3_darknet53_coco2017.darknet
import
DarkNet
from
yolov3_darknet53_coco2017.darknet
import
DarkNet
from
yolov3_darknet53_coco2017.processor
import
load_label_info
,
postprocess
,
base64_to_cv2
from
yolov3_darknet53_coco2017.data_feed
import
reader
from
yolov3_darknet53_coco2017.yolo_head
import
MultiClassNMS
,
YOLOv3Head
@
moduleinfo
(
@
moduleinfo
(
name
=
"yolov3_darknet53_coco2017"
,
name
=
"yolov3_darknet53_coco2017"
,
version
=
"1.1.0"
,
version
=
"1.1.0"
,
type
=
"cv/object_detection"
,
type
=
"CV/object_detection"
,
summary
=
"Baidu's YOLOv3 model for object detection, with backbone DarkNet."
,
summary
=
"Baidu's YOLOv3 model for object detection, with backbone DarkNet53, trained with dataset coco2017."
,
author
=
"paddlepaddle"
,
author
=
"paddlepaddle"
,
author_email
=
"
paddle-dev@baidu.com
"
)
author_email
=
""
)
class
YOLOv3DarkNet53
(
hub
.
Module
):
class
YOLOv3DarkNet53
Coco2017
(
hub
.
Module
):
def
_initialize
(
self
):
def
_initialize
(
self
):
self
.
yolov3
=
hub
.
Module
(
name
=
"yolov3"
)
# default pretrained model of YOLOv3_DarkNet53, the shape of input image tensor is (3, 608, 608)
self
.
default_pretrained_model_path
=
os
.
path
.
join
(
self
.
default_pretrained_model_path
=
os
.
path
.
join
(
self
.
directory
,
"yolov3_darknet53_model"
)
self
.
directory
,
"yolov3_darknet53_model"
)
self
.
label_names
=
self
.
yolov3
.
load_label_info
(
self
.
label_names
=
load_label_info
(
os
.
path
.
join
(
self
.
directory
,
"label_file.txt"
))
os
.
path
.
join
(
self
.
directory
,
"label_file.txt"
))
self
.
infer_prog
=
None
self
.
image
=
None
self
.
im_size
=
None
self
.
bbox_out
=
None
self
.
_set_config
()
self
.
_set_config
()
def
_set_config
(
self
):
def
_set_config
(
self
):
"""
"""
predictor config setting
predictor config setting
.
"""
"""
cpu_config
=
AnalysisConfig
(
self
.
default_pretrained_model_path
)
cpu_config
=
AnalysisConfig
(
self
.
default_pretrained_model_path
)
cpu_config
.
disable_glog_info
()
cpu_config
.
disable_glog_info
()
...
@@ -61,34 +57,80 @@ class YOLOv3DarkNet53(hub.Module):
...
@@ -61,34 +57,80 @@ class YOLOv3DarkNet53(hub.Module):
gpu_config
.
enable_use_gpu
(
memory_pool_init_size_mb
=
500
,
device_id
=
0
)
gpu_config
.
enable_use_gpu
(
memory_pool_init_size_mb
=
500
,
device_id
=
0
)
self
.
gpu_predictor
=
create_paddle_predictor
(
gpu_config
)
self
.
gpu_predictor
=
create_paddle_predictor
(
gpu_config
)
def
context
(
self
,
num_classes
=
80
,
trainable
=
True
,
pretrained
=
True
):
def
context
(
self
,
trainable
=
True
,
pretrained
=
True
,
get_prediction
=
False
):
"""Distill the Head Features, so as to perform transfer learning.
"""
Distill the Head Features, so as to perform transfer learning.
Args:
trainable (bool): whether to set parameters trainable.
pretrained (bool): whether to load default pretrained model.
get_prediction (bool): whether to get prediction.
:param trainable: whether to set parameters trainable.
Returns:
:type trainable: bool
inputs(dict): the input variables.
:param pretrained: whether to load default pretrained model
.
outputs(dict): the output variables
.
:type pretrained: bool
context_prog (Program): the program to execute transfer learning.
"""
"""
wrapped
_prog
=
fluid
.
Program
()
context
_prog
=
fluid
.
Program
()
startup_program
=
fluid
.
Program
()
startup_program
=
fluid
.
Program
()
with
fluid
.
program_guard
(
wrapped
_prog
,
startup_program
):
with
fluid
.
program_guard
(
context
_prog
,
startup_program
):
with
fluid
.
unique_name
.
guard
():
with
fluid
.
unique_name
.
guard
():
# image
# image
image
=
fluid
.
layers
.
data
(
image
=
fluid
.
layers
.
data
(
name
=
'image'
,
shape
=
[
3
,
608
,
608
],
dtype
=
'float32'
)
name
=
'image'
,
shape
=
[
3
,
608
,
608
],
dtype
=
'float32'
)
# yolo_head
# backbone
yolo_head
=
self
.
yolov3
.
YOLOv3Head
(
num_classes
=
num_classes
)
backbone
=
DarkNet
(
norm_type
=
'bn'
,
norm_decay
=
0.
,
depth
=
53
)
backbone
=
DarkNet
(
norm_type
=
'bn'
,
norm_decay
=
0.
,
depth
=
53
)
# body_feats
body_feats
=
backbone
(
image
)
body_feats
=
backbone
(
image
)
inputs
,
outputs
,
context_prog
=
self
.
yolov3
.
context
(
# im_size
body_feats
=
body_feats
,
im_size
=
fluid
.
layers
.
data
(
yolo_head
=
yolo_head
,
name
=
'im_size'
,
shape
=
[
2
],
dtype
=
'int32'
)
image
=
image
,
# yolo_head
trainable
=
trainable
,
yolo_head
=
YOLOv3Head
(
num_classes
=
80
)
var_prefix
=
'@HUB_{}@'
.
format
(
self
.
name
))
# head_features
head_features
=
yolo_head
.
_get_outputs
(
body_feats
,
is_train
=
trainable
)
place
=
fluid
.
CPUPlace
()
place
=
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
place
)
exe
=
fluid
.
Executor
(
place
)
exe
.
run
(
fluid
.
default_startup_program
())
# var_prefix
var_prefix
=
'@HUB_{}@'
.
format
(
self
.
name
)
# name of inputs
inputs
=
{
'image'
:
var_prefix
+
image
.
name
,
'im_size'
:
var_prefix
+
im_size
.
name
}
# name of outputs
if
get_prediction
:
bbox_out
=
yolo_head
.
get_prediction
(
head_features
,
im_size
)
outputs
=
{
'bbox_out'
:
[
var_prefix
+
bbox_out
.
name
]}
else
:
outputs
=
{
'head_features'
:
[
var_prefix
+
var
.
name
for
var
in
head_features
]
}
# add_vars_prefix
add_vars_prefix
(
context_prog
,
var_prefix
)
add_vars_prefix
(
fluid
.
default_startup_program
(),
var_prefix
)
# inputs
inputs
=
{
key
:
context_prog
.
global_block
().
vars
[
value
]
for
key
,
value
in
inputs
.
items
()
}
# outputs
outputs
=
{
key
:
[
context_prog
.
global_block
().
vars
[
varname
]
for
varname
in
value
]
for
key
,
value
in
outputs
.
items
()
}
# trainable
for
param
in
context_prog
.
global_block
().
iter_parameters
():
param
.
trainable
=
trainable
# pretrained
if
pretrained
:
if
pretrained
:
def
_if_exist
(
var
):
def
_if_exist
(
var
):
...
@@ -102,38 +144,45 @@ class YOLOv3DarkNet53(hub.Module):
...
@@ -102,38 +144,45 @@ class YOLOv3DarkNet53(hub.Module):
predicate
=
_if_exist
)
predicate
=
_if_exist
)
else
:
else
:
exe
.
run
(
startup_program
)
exe
.
run
(
startup_program
)
return
inputs
,
outputs
,
context_prog
return
inputs
,
outputs
,
context_prog
def
object_detection
(
self
,
def
object_detection
(
self
,
paths
=
None
,
paths
=
None
,
images
=
None
,
images
=
None
,
data
=
None
,
data
=
None
,
use_gpu
=
False
,
batch_size
=
1
,
batch_size
=
1
,
use_gpu
=
False
,
output_dir
=
'detection_result'
,
output_dir
=
'detection_result'
,
score_thresh
=
0.5
,
score_thresh
=
0.5
,
visualization
=
True
):
visualization
=
True
):
"""API of Object Detection.
"""API of Object Detection.
:param paths: the path of images.
Args:
:type paths: list, each element is correspond to the path of an image.
paths (list[str]): The paths of images.
:param images: data of images, [N, H, W, C]
images (list(numpy.ndarray)): images data, shape of each is [H, W, C]
:type images: numpy.ndarray
batch_size (int): batch size.
:param use_gpu: whether to use gpu or not.
use_gpu (bool): Whether to use gpu.
:type use_gpu: bool
output_dir (str): The path to store output images.
:param batch_size: bathc size.
visualization (bool): Whether to save image or not.
:type batch_size: int
score_thresh (float): threshold for object detecion.
:param output_dir: the directory to store the detection result.
:type output_dir: str
Returns:
:param score_thresh: the threshold of detection confidence.
res (list[dict]): The result of coco2017 detecion. keys include 'data', 'save_path', the corresponding value is:
:type score_thresh: float
data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is:
:param visualization: whether to draw bounding box and save images.
left (float): The X coordinate of the upper left corner of the bounding box;
:type visualization: bool
top (float): The Y coordinate of the upper left corner of the bounding box;
right (float): The X coordinate of the lower right corner of the bounding box;
bottom (float): The Y coordinate of the lower right corner of the bounding box;
label (str): The label of detection result;
confidence (float): The confidence of detection result.
save_path (str, optional): The path to save output images.
"""
"""
paths
=
paths
if
paths
else
list
()
if
data
and
'image'
in
data
:
if
data
and
'image'
in
data
:
paths
=
data
[
'image'
]
if
not
paths
else
paths
+
data
[
'image'
]
paths
+=
data
[
'image'
]
paths
=
paths
if
paths
else
[]
data_reader
=
partial
(
self
.
yolov3
.
reader
,
paths
,
images
)
data_reader
=
partial
(
reader
,
paths
,
images
)
batch_reader
=
fluid
.
io
.
batch
(
data_reader
,
batch_size
=
batch_size
)
batch_reader
=
fluid
.
io
.
batch
(
data_reader
,
batch_size
=
batch_size
)
res
=
[]
res
=
[]
for
iter_id
,
feed_data
in
enumerate
(
batch_reader
()):
for
iter_id
,
feed_data
in
enumerate
(
batch_reader
()):
...
@@ -146,7 +195,8 @@ class YOLOv3DarkNet53(hub.Module):
...
@@ -146,7 +195,8 @@ class YOLOv3DarkNet53(hub.Module):
else
:
else
:
data_out
=
self
.
cpu_predictor
.
run
(
data_out
=
self
.
cpu_predictor
.
run
(
[
image_tensor
,
im_size_tensor
])
[
image_tensor
,
im_size_tensor
])
output
=
self
.
yolov3
.
postprocess
(
output
=
postprocess
(
paths
=
paths
,
paths
=
paths
,
images
=
images
,
images
=
images
,
data_out
=
data_out
,
data_out
=
data_out
,
...
@@ -155,54 +205,49 @@ class YOLOv3DarkNet53(hub.Module):
...
@@ -155,54 +205,49 @@ class YOLOv3DarkNet53(hub.Module):
output_dir
=
output_dir
,
output_dir
=
output_dir
,
handle_id
=
iter_id
*
batch_size
,
handle_id
=
iter_id
*
batch_size
,
visualization
=
visualization
)
visualization
=
visualization
)
res
+=
output
res
.
extend
(
output
)
return
res
return
res
def
add_module_config_arg
(
self
):
def
save_inference_model
(
self
,
"""
dirname
,
Add the command config options
model_filename
=
None
,
"""
params_filename
=
None
,
self
.
arg_config_group
.
add_argument
(
combined
=
True
):
'--use_gpu'
,
if
combined
:
type
=
ast
.
literal_eval
,
model_filename
=
"__model__"
if
not
model_filename
else
model_filename
default
=
False
,
params_filename
=
"__params__"
if
not
params_filename
else
params_filename
help
=
"whether use GPU or not"
)
place
=
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
place
)
self
.
arg_config_group
.
add_argument
(
program
,
feeded_var_names
,
target_vars
=
fluid
.
io
.
load_inference_model
(
'--batch_size'
,
dirname
=
self
.
default_pretrained_model_path
,
executor
=
exe
)
type
=
int
,
default
=
1
,
help
=
"batch size for prediction"
)
def
add_module_input_arg
(
self
):
fluid
.
io
.
save_inference_model
(
dirname
=
dirname
,
main_program
=
program
,
executor
=
exe
,
feeded_var_names
=
feeded_var_names
,
target_vars
=
target_vars
,
model_filename
=
model_filename
,
params_filename
=
params_filename
)
@
serving
def
serving_method
(
self
,
images
,
**
kwargs
):
"""
"""
Add the command input options
Run as a service.
"""
"""
self
.
arg_input_group
.
add_argument
(
images_decode
=
[
base64_to_cv2
(
image
)
for
image
in
images
]
'--input_path'
,
type
=
str
,
default
=
None
,
help
=
"input data"
)
results
=
self
.
object_detection
(
images_decode
,
**
kwargs
)
return
results
self
.
arg_input_group
.
add_argument
(
'--input_file'
,
type
=
str
,
default
=
None
,
help
=
"file contain input data"
)
def
check_input_data
(
self
,
args
):
input_data
=
[]
if
args
.
input_path
:
input_data
=
[
args
.
input_path
]
elif
args
.
input_file
:
if
not
os
.
path
.
exists
(
args
.
input_file
):
raise
RuntimeError
(
"File %s is not exist."
%
args
.
input_file
)
else
:
input_data
=
txt_parser
.
parse
(
args
.
input_file
,
use_strip
=
True
)
return
input_data
@
runnable
@
runnable
def
run_cmd
(
self
,
argvs
):
def
run_cmd
(
self
,
argvs
):
"""
Run as a command.
"""
self
.
parser
=
argparse
.
ArgumentParser
(
self
.
parser
=
argparse
.
ArgumentParser
(
description
=
"Run the {}"
.
format
(
self
.
name
),
description
=
"Run the {}
module.
"
.
format
(
self
.
name
),
prog
=
"hub run {}"
.
format
(
self
.
name
),
prog
=
'hub run {}'
.
format
(
self
.
name
),
usage
=
'%(prog)s'
,
usage
=
'%(prog)s'
,
add_help
=
True
)
add_help
=
True
)
self
.
arg_input_group
=
self
.
parser
.
add_argument_group
(
self
.
arg_input_group
=
self
.
parser
.
add_argument_group
(
...
@@ -214,14 +259,48 @@ class YOLOv3DarkNet53(hub.Module):
...
@@ -214,14 +259,48 @@ class YOLOv3DarkNet53(hub.Module):
self
.
add_module_config_arg
()
self
.
add_module_config_arg
()
self
.
add_module_input_arg
()
self
.
add_module_input_arg
()
args
=
self
.
parser
.
parse_args
(
argvs
)
args
=
self
.
parser
.
parse_args
(
argvs
)
input_data
=
self
.
check_input_data
(
args
)
results
=
self
.
face_detection
(
if
len
(
input_data
)
==
0
:
paths
=
[
args
.
input_path
],
self
.
parser
.
print_help
()
batch_size
=
args
.
batch_size
,
exit
(
1
)
use_gpu
=
args
.
use_gpu
,
else
:
output_dir
=
args
.
output_dir
,
for
image_path
in
input_data
:
visualization
=
args
.
visualization
,
if
not
os
.
path
.
exists
(
image_path
):
score_thresh
=
args
.
score_thresh
)
raise
RuntimeError
(
return
results
"File %s or %s is not exist."
%
image_path
)
return
self
.
object_detection
(
def
add_module_config_arg
(
self
):
paths
=
input_data
,
use_gpu
=
args
.
use_gpu
,
batch_size
=
args
.
batch_size
)
"""
Add the command config options.
"""
self
.
arg_config_group
.
add_argument
(
'--use_gpu'
,
type
=
ast
.
literal_eval
,
default
=
False
,
help
=
"whether use GPU or not"
)
self
.
arg_config_group
.
add_argument
(
'--output_dir'
,
type
=
str
,
default
=
'detection_result'
,
help
=
"The directory to save output images."
)
self
.
arg_config_group
.
add_argument
(
'--visualization'
,
type
=
ast
.
literal_eval
,
default
=
False
,
help
=
"whether to save output as images."
)
def
add_module_input_arg
(
self
):
"""
Add the command input options.
"""
self
.
arg_input_group
.
add_argument
(
'--input_path'
,
type
=
str
,
help
=
"path to image."
)
self
.
arg_input_group
.
add_argument
(
'--batch_size'
,
type
=
ast
.
literal_eval
,
default
=
1
,
help
=
"batch size."
)
self
.
arg_input_group
.
add_argument
(
'--score_thresh'
,
type
=
ast
.
literal_eval
,
default
=
0.5
,
help
=
"threshold for object detecion."
)
hub_module/modules/image/object_detection/yolov3/processor.py
→
hub_module/modules/image/object_detection/yolov3
_darknet53_coco2017
/processor.py
浏览文件 @
3acfe6bd
# coding=utf-8
# coding=utf-8
import
base64
import
os
import
os
import
cv2
import
numpy
as
np
import
numpy
as
np
from
PIL
import
Image
,
ImageDraw
from
PIL
import
Image
,
ImageDraw
__all__
=
[
'load_label_info'
,
'postprocess'
]
__all__
=
[
'base64_to_cv2'
,
'load_label_info'
,
'postprocess'
]
def
base64_to_cv2
(
b64str
):
data
=
base64
.
b64decode
(
b64str
.
encode
(
'utf8'
))
data
=
np
.
fromstring
(
data
,
np
.
uint8
)
data
=
cv2
.
imdecode
(
data
,
cv2
.
IMREAD_COLOR
)
return
data
def
check_dir
(
dir_path
):
def
check_dir
(
dir_path
):
...
...
hub_module/modules/image/object_detection/yolov3/yolo_head.py
→
hub_module/modules/image/object_detection/yolov3
_darknet53_coco2017
/yolo_head.py
浏览文件 @
3acfe6bd
文件已移动
hub_module/modules/image/object_detection/yolov3_darknet53_pedestrian/README.md
浏览文件 @
3acfe6bd
...
@@ -13,17 +13,15 @@ hub run yolov3_darknet53_pedestrian --input_path "/PATH/TO/IMAGE"
...
@@ -13,17 +13,15 @@ hub run yolov3_darknet53_pedestrian --input_path "/PATH/TO/IMAGE"
```
```
def context(trainable=True,
def context(trainable=True,
pretrained=True,
pretrained=True,
var_prefix='',
get_prediction=False)
get_prediction=False)
```
```
提取头部特征
,用于迁移学习。
特征提取
,用于迁移学习。
**参数**
**参数**
*
trainable(bool): 将参数的trainable属性设为trainable;
*
trainable(bool): 将参数的trainable属性设为trainable;
*
pretrained (bool): 是否加载预训练模型;
*
pretrained (bool): 是否加载预训练模型;
*
var
\_
prefix (str): 在变量的name 中加上前缀;
*
get
\_
prediction (bool): 是否执行预测。
*
get
\_
prediction (bool): 是否执行预测。
**返回**
**返回**
...
...
hub_module/modules/image/object_detection/yolov3_darknet53_pedestrian/data_feed.py
0 → 100644
浏览文件 @
3acfe6bd
# coding=utf-8
from
__future__
import
absolute_import
from
__future__
import
print_function
from
__future__
import
division
import
os
import
cv2
import
numpy
as
np
__all__
=
[
'reader'
]
def
reader
(
paths
=
[],
images
=
None
):
"""
data generator
Args:
paths (list[str]): paths to images.
images (list(numpy.ndarray)): data of images, shape of each is [H, W, C]
Yield:
res (list): preprocessed image and the size of original image.
"""
img_list
=
[]
if
paths
:
assert
type
(
paths
)
is
list
,
"type(paths) is not list."
for
img_path
in
paths
:
assert
os
.
path
.
isfile
(
img_path
),
"The {} isn't a valid file path."
.
format
(
img_path
)
img
=
cv2
.
imread
(
img_path
).
astype
(
'float32'
)
img_list
.
append
(
img
)
if
images
is
not
None
:
for
img
in
images
:
img_list
.
append
(
img
)
for
im
in
img_list
:
# im_size
im_shape
=
im
.
shape
im_size
=
np
.
array
([
im_shape
[
0
],
im_shape
[
1
]],
dtype
=
np
.
int32
)
# decode image
im
=
cv2
.
cvtColor
(
im
,
cv2
.
COLOR_BGR2RGB
)
# resize image
target_size
=
608
im_size_min
=
np
.
min
(
im_shape
[
0
:
2
])
im_size_max
=
np
.
max
(
im_shape
[
0
:
2
])
if
float
(
im_size_min
)
==
0
:
raise
ZeroDivisionError
(
'min size of image is 0'
)
im_scale_x
=
float
(
target_size
)
/
float
(
im_shape
[
1
])
im_scale_y
=
float
(
target_size
)
/
float
(
im_shape
[
0
])
im
=
cv2
.
resize
(
im
,
None
,
None
,
fx
=
im_scale_x
,
fy
=
im_scale_y
,
interpolation
=
2
)
# normalize image
mean
=
[
0.485
,
0.456
,
0.406
]
std
=
[
0.229
,
0.224
,
0.225
]
im
=
im
.
astype
(
np
.
float32
,
copy
=
False
)
mean
=
np
.
array
(
mean
)[
np
.
newaxis
,
np
.
newaxis
,
:]
std
=
np
.
array
(
std
)[
np
.
newaxis
,
np
.
newaxis
,
:]
im
=
im
/
255.0
im
-=
mean
im
/=
std
# permute
im
=
np
.
swapaxes
(
im
,
1
,
2
)
im
=
np
.
swapaxes
(
im
,
1
,
0
)
yield
[
im
,
im_size
]
hub_module/modules/image/object_detection/yolov3_darknet53_pedestrian/module.py
浏览文件 @
3acfe6bd
# coding=utf-8
# coding=utf-8
from
__future__
import
absolute_import
from
__future__
import
absolute_import
from
__future__
import
division
import
ast
import
ast
import
argparse
import
argparse
...
@@ -12,9 +11,12 @@ import paddle.fluid as fluid
...
@@ -12,9 +11,12 @@ import paddle.fluid as fluid
import
paddlehub
as
hub
import
paddlehub
as
hub
from
paddle.fluid.core
import
PaddleTensor
,
AnalysisConfig
,
create_paddle_predictor
from
paddle.fluid.core
import
PaddleTensor
,
AnalysisConfig
,
create_paddle_predictor
from
paddlehub.module.module
import
moduleinfo
,
runnable
,
serving
from
paddlehub.module.module
import
moduleinfo
,
runnable
,
serving
from
paddlehub.common.paddle_helper
import
add_vars_prefix
from
yolov3_darknet53_pedestrian.darknet
import
DarkNet
from
yolov3_darknet53_pedestrian.darknet
import
DarkNet
from
yolov3_darknet53_pedestrian.serving
import
base64_to_cv2
from
yolov3_darknet53_pedestrian.processor
import
load_label_info
,
postprocess
,
base64_to_cv2
from
yolov3_darknet53_pedestrian.data_feed
import
reader
from
yolov3_darknet53_pedestrian.yolo_head
import
MultiClassNMS
,
YOLOv3Head
@
moduleinfo
(
@
moduleinfo
(
...
@@ -24,13 +26,12 @@ from yolov3_darknet53_pedestrian.serving import base64_to_cv2
...
@@ -24,13 +26,12 @@ from yolov3_darknet53_pedestrian.serving import base64_to_cv2
summary
=
summary
=
"Baidu's YOLOv3 model for pedestrian detection, with backbone DarkNet53."
,
"Baidu's YOLOv3 model for pedestrian detection, with backbone DarkNet53."
,
author
=
"paddlepaddle"
,
author
=
"paddlepaddle"
,
author_email
=
"
paddle-dev@baidu.com
"
)
author_email
=
""
)
class
YOLOv3DarkNet53Pedestrian
(
hub
.
Module
):
class
YOLOv3DarkNet53Pedestrian
(
hub
.
Module
):
def
_initialize
(
self
):
def
_initialize
(
self
):
self
.
yolov3
=
hub
.
Module
(
name
=
"yolov3"
)
self
.
default_pretrained_model_path
=
os
.
path
.
join
(
self
.
default_pretrained_model_path
=
os
.
path
.
join
(
self
.
directory
,
"yolov3_darknet53_pedestrian_model"
)
self
.
directory
,
"yolov3_darknet53_pedestrian_model"
)
self
.
label_names
=
self
.
yolov3
.
load_label_info
(
self
.
label_names
=
load_label_info
(
os
.
path
.
join
(
self
.
directory
,
"label_file.txt"
))
os
.
path
.
join
(
self
.
directory
,
"label_file.txt"
))
self
.
_set_config
()
self
.
_set_config
()
...
@@ -56,18 +57,13 @@ class YOLOv3DarkNet53Pedestrian(hub.Module):
...
@@ -56,18 +57,13 @@ class YOLOv3DarkNet53Pedestrian(hub.Module):
gpu_config
.
enable_use_gpu
(
memory_pool_init_size_mb
=
500
,
device_id
=
0
)
gpu_config
.
enable_use_gpu
(
memory_pool_init_size_mb
=
500
,
device_id
=
0
)
self
.
gpu_predictor
=
create_paddle_predictor
(
gpu_config
)
self
.
gpu_predictor
=
create_paddle_predictor
(
gpu_config
)
def
context
(
self
,
def
context
(
self
,
trainable
=
True
,
pretrained
=
True
,
get_prediction
=
False
):
trainable
=
True
,
pretrained
=
True
,
var_prefix
=
''
,
get_prediction
=
False
):
"""
"""
Distill the Head Features, so as to perform transfer learning.
Distill the Head Features, so as to perform transfer learning.
Args:
Args:
trainable (bool): whether to set parameters trainable.
trainable (bool): whether to set parameters trainable.
pretrained (bool): whether to load default pretrained model.
pretrained (bool): whether to load default pretrained model.
var_prefix (str): prefix to append to the varibles.
get_prediction (bool): whether to get prediction.
get_prediction (bool): whether to get prediction.
Returns:
Returns:
...
@@ -75,15 +71,22 @@ class YOLOv3DarkNet53Pedestrian(hub.Module):
...
@@ -75,15 +71,22 @@ class YOLOv3DarkNet53Pedestrian(hub.Module):
outputs(dict): the output variables.
outputs(dict): the output variables.
context_prog (Program): the program to execute transfer learning.
context_prog (Program): the program to execute transfer learning.
"""
"""
wrapped
_prog
=
fluid
.
Program
()
context
_prog
=
fluid
.
Program
()
startup_program
=
fluid
.
Program
()
startup_program
=
fluid
.
Program
()
with
fluid
.
program_guard
(
wrapped
_prog
,
startup_program
):
with
fluid
.
program_guard
(
context
_prog
,
startup_program
):
with
fluid
.
unique_name
.
guard
():
with
fluid
.
unique_name
.
guard
():
# image
# image
image
=
fluid
.
layers
.
data
(
image
=
fluid
.
layers
.
data
(
name
=
'image'
,
shape
=
[
3
,
608
,
608
],
dtype
=
'float32'
)
name
=
'image'
,
shape
=
[
3
,
608
,
608
],
dtype
=
'float32'
)
# backbone
backbone
=
DarkNet
(
norm_type
=
'sync_bn'
,
norm_decay
=
0.
,
depth
=
53
)
# body_feats
body_feats
=
backbone
(
image
)
# im_size
im_size
=
fluid
.
layers
.
data
(
name
=
'im_size'
,
shape
=
[
2
],
dtype
=
'int32'
)
# yolo_head
# yolo_head
yolo_head
=
self
.
yolov3
.
YOLOv3Head
(
yolo_head
=
YOLOv3Head
(
anchor_masks
=
[[
6
,
7
,
8
],
[
3
,
4
,
5
],
[
0
,
1
,
2
]],
anchor_masks
=
[[
6
,
7
,
8
],
[
3
,
4
,
5
],
[
0
,
1
,
2
]],
anchors
=
[[
10
,
13
],
[
16
,
30
],
[
33
,
23
],
[
30
,
61
],
[
62
,
45
],
anchors
=
[[
10
,
13
],
[
16
,
30
],
[
33
,
23
],
[
30
,
61
],
[
62
,
45
],
[
59
,
119
],
[
116
,
90
],
[
156
,
198
],
[
373
,
326
]],
[
59
,
119
],
[
116
,
90
],
[
156
,
198
],
[
373
,
326
]],
...
@@ -91,27 +94,57 @@ class YOLOv3DarkNet53Pedestrian(hub.Module):
...
@@ -91,27 +94,57 @@ class YOLOv3DarkNet53Pedestrian(hub.Module):
num_classes
=
1
,
num_classes
=
1
,
ignore_thresh
=
0.7
,
ignore_thresh
=
0.7
,
label_smooth
=
True
,
label_smooth
=
True
,
nms
=
self
.
yolov3
.
MultiClassNMS
(
nms
=
MultiClassNMS
(
background_label
=-
1
,
background_label
=-
1
,
keep_top_k
=
100
,
keep_top_k
=
100
,
nms_threshold
=
0.45
,
nms_threshold
=
0.45
,
nms_top_k
=
1000
,
nms_top_k
=
1000
,
normalized
=
False
,
normalized
=
False
,
score_threshold
=
0.01
))
score_threshold
=
0.01
))
backbone
=
DarkNet
(
norm_type
=
'sync_bn'
,
norm_decay
=
0.
,
depth
=
53
)
# head_features
body_feats
=
backbone
(
image
)
head_features
=
yolo_head
.
_get_outputs
(
var_prefix
=
var_prefix
if
var_prefix
else
'@HUB_{}@'
.
format
(
body_feats
,
is_train
=
trainable
)
self
.
name
)
inputs
,
outputs
,
context_prog
=
self
.
yolov3
.
context
(
body_feats
=
body_feats
,
yolo_head
=
yolo_head
,
image
=
image
,
trainable
=
trainable
,
var_prefix
=
var_prefix
,
get_prediction
=
get_prediction
)
place
=
fluid
.
CPUPlace
()
place
=
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
place
)
exe
=
fluid
.
Executor
(
place
)
exe
.
run
(
fluid
.
default_startup_program
())
# var_prefix
var_prefix
=
'@HUB_{}@'
.
format
(
self
.
name
)
# name of inputs
inputs
=
{
'image'
:
var_prefix
+
image
.
name
,
'im_size'
:
var_prefix
+
im_size
.
name
}
# name of outputs
if
get_prediction
:
bbox_out
=
yolo_head
.
get_prediction
(
head_features
,
im_size
)
outputs
=
{
'bbox_out'
:
[
var_prefix
+
bbox_out
.
name
]}
else
:
outputs
=
{
'head_features'
:
[
var_prefix
+
var
.
name
for
var
in
head_features
]
}
# add_vars_prefix
add_vars_prefix
(
context_prog
,
var_prefix
)
add_vars_prefix
(
fluid
.
default_startup_program
(),
var_prefix
)
# inputs
inputs
=
{
key
:
context_prog
.
global_block
().
vars
[
value
]
for
key
,
value
in
inputs
.
items
()
}
# outputs
outputs
=
{
key
:
[
context_prog
.
global_block
().
vars
[
varname
]
for
varname
in
value
]
for
key
,
value
in
outputs
.
items
()
}
# trainable
for
param
in
context_prog
.
global_block
().
iter_parameters
():
param
.
trainable
=
trainable
# pretrained
if
pretrained
:
if
pretrained
:
def
_if_exist
(
var
):
def
_if_exist
(
var
):
...
@@ -159,7 +192,7 @@ class YOLOv3DarkNet53Pedestrian(hub.Module):
...
@@ -159,7 +192,7 @@ class YOLOv3DarkNet53Pedestrian(hub.Module):
save_path (str, optional): The path to save output images.
save_path (str, optional): The path to save output images.
"""
"""
paths
=
paths
if
paths
else
list
()
paths
=
paths
if
paths
else
list
()
data_reader
=
partial
(
self
.
yolov3
.
reader
,
paths
,
images
)
data_reader
=
partial
(
reader
,
paths
,
images
)
batch_reader
=
fluid
.
io
.
batch
(
data_reader
,
batch_size
=
batch_size
)
batch_reader
=
fluid
.
io
.
batch
(
data_reader
,
batch_size
=
batch_size
)
res
=
[]
res
=
[]
for
iter_id
,
feed_data
in
enumerate
(
batch_reader
()):
for
iter_id
,
feed_data
in
enumerate
(
batch_reader
()):
...
@@ -173,7 +206,7 @@ class YOLOv3DarkNet53Pedestrian(hub.Module):
...
@@ -173,7 +206,7 @@ class YOLOv3DarkNet53Pedestrian(hub.Module):
data_out
=
self
.
cpu_predictor
.
run
(
data_out
=
self
.
cpu_predictor
.
run
(
[
image_tensor
,
im_size_tensor
])
[
image_tensor
,
im_size_tensor
])
output
=
self
.
yolov3
.
postprocess
(
output
=
postprocess
(
paths
=
paths
,
paths
=
paths
,
images
=
images
,
images
=
images
,
data_out
=
data_out
,
data_out
=
data_out
,
...
@@ -214,7 +247,7 @@ class YOLOv3DarkNet53Pedestrian(hub.Module):
...
@@ -214,7 +247,7 @@ class YOLOv3DarkNet53Pedestrian(hub.Module):
Run as a service.
Run as a service.
"""
"""
images_decode
=
[
base64_to_cv2
(
image
)
for
image
in
images
]
images_decode
=
[
base64_to_cv2
(
image
)
for
image
in
images
]
results
=
self
.
face
_detection
(
images_decode
,
**
kwargs
)
results
=
self
.
object
_detection
(
images_decode
,
**
kwargs
)
return
results
return
results
@
runnable
@
runnable
...
...
hub_module/modules/image/object_detection/yolov3_darknet53_pedestrian/processor.py
0 → 100644
浏览文件 @
3acfe6bd
# coding=utf-8
import
base64
import
os
import
cv2
import
numpy
as
np
from
PIL
import
Image
,
ImageDraw
__all__
=
[
'base64_to_cv2'
,
'load_label_info'
,
'postprocess'
]
def
base64_to_cv2
(
b64str
):
data
=
base64
.
b64decode
(
b64str
.
encode
(
'utf8'
))
data
=
np
.
fromstring
(
data
,
np
.
uint8
)
data
=
cv2
.
imdecode
(
data
,
cv2
.
IMREAD_COLOR
)
return
data
def
check_dir
(
dir_path
):
if
not
os
.
path
.
exists
(
dir_path
):
os
.
makedirs
(
dir_path
)
elif
os
.
path
.
isfile
(
dir_path
):
os
.
remove
(
dir_path
)
os
.
makedirs
(
dir_path
)
def
get_save_image_name
(
img
,
output_dir
,
image_path
):
"""Get save image name from source image path.
"""
image_name
=
os
.
path
.
split
(
image_path
)[
-
1
]
name
,
ext
=
os
.
path
.
splitext
(
image_name
)
if
ext
==
''
:
if
img
.
format
==
'PNG'
:
ext
=
'.png'
elif
img
.
format
==
'JPEG'
:
ext
=
'.jpg'
elif
img
.
format
==
'BMP'
:
ext
=
'.bmp'
else
:
if
img
.
mode
==
"RGB"
or
img
.
mode
==
"L"
:
ext
=
".jpg"
elif
img
.
mode
==
"RGBA"
or
img
.
mode
==
"P"
:
ext
=
'.png'
return
os
.
path
.
join
(
output_dir
,
"{}"
.
format
(
name
))
+
ext
def
draw_bounding_box_on_image
(
image_path
,
data_list
,
save_dir
):
image
=
Image
.
open
(
image_path
)
draw
=
ImageDraw
.
Draw
(
image
)
for
data
in
data_list
:
left
,
right
,
top
,
bottom
=
data
[
'left'
],
data
[
'right'
],
data
[
'top'
],
data
[
'bottom'
]
# draw bbox
draw
.
line
([(
left
,
top
),
(
left
,
bottom
),
(
right
,
bottom
),
(
right
,
top
),
(
left
,
top
)],
width
=
2
,
fill
=
'red'
)
# draw label
if
image
.
mode
==
'RGB'
:
text
=
data
[
'label'
]
+
": %.2f%%"
%
(
100
*
data
[
'confidence'
])
textsize_width
,
textsize_height
=
draw
.
textsize
(
text
=
text
)
draw
.
rectangle
(
xy
=
(
left
,
top
-
(
textsize_height
+
5
),
left
+
textsize_width
+
10
,
top
),
fill
=
(
255
,
255
,
255
))
draw
.
text
(
xy
=
(
left
,
top
-
15
),
text
=
text
,
fill
=
(
0
,
0
,
0
))
save_name
=
get_save_image_name
(
image
,
save_dir
,
image_path
)
if
os
.
path
.
exists
(
save_name
):
os
.
remove
(
save_name
)
image
.
save
(
save_name
)
return
save_name
def
clip_bbox
(
bbox
,
img_width
,
img_height
):
xmin
=
max
(
min
(
bbox
[
0
],
img_width
),
0.
)
ymin
=
max
(
min
(
bbox
[
1
],
img_height
),
0.
)
xmax
=
max
(
min
(
bbox
[
2
],
img_width
),
0.
)
ymax
=
max
(
min
(
bbox
[
3
],
img_height
),
0.
)
return
xmin
,
ymin
,
xmax
,
ymax
def
load_label_info
(
file_path
):
with
open
(
file_path
,
'r'
)
as
fr
:
text
=
fr
.
readlines
()
label_names
=
[]
for
info
in
text
:
label_names
.
append
(
info
.
strip
())
return
label_names
def
postprocess
(
paths
,
images
,
data_out
,
score_thresh
,
label_names
,
output_dir
,
handle_id
,
visualization
=
True
):
"""
postprocess the lod_tensor produced by fluid.Executor.run
Args:
paths (list[str]): The paths of images.
images (list(numpy.ndarray)): images data, shape of each is [H, W, C]
data_out (lod_tensor): data output of predictor.
batch_size (int): batch size.
use_gpu (bool): Whether to use gpu.
output_dir (str): The path to store output images.
visualization (bool): Whether to save image or not.
score_thresh (float): the low limit of bounding box.
label_names (list[str]): label names.
handle_id (int): The number of images that have been handled.
Returns:
res (list[dict]): The result of vehicles detecion. keys include 'data', 'save_path', the corresponding value is:
data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is:
left (float): The X coordinate of the upper left corner of the bounding box;
top (float): The Y coordinate of the upper left corner of the bounding box;
right (float): The X coordinate of the lower right corner of the bounding box;
bottom (float): The Y coordinate of the lower right corner of the bounding box;
label (str): The label of detection result;
confidence (float): The confidence of detection result.
save_path (str): The path to save output images.
"""
lod_tensor
=
data_out
[
0
]
lod
=
lod_tensor
.
lod
[
0
]
results
=
lod_tensor
.
as_ndarray
()
check_dir
(
output_dir
)
assert
type
(
paths
)
is
list
,
"type(paths) is not list."
if
handle_id
<
len
(
paths
):
unhandled_paths
=
paths
[
handle_id
:]
unhandled_paths_num
=
len
(
unhandled_paths
)
else
:
unhandled_paths_num
=
0
output
=
list
()
for
index
in
range
(
len
(
lod
)
-
1
):
output_i
=
{
'data'
:
[]}
if
index
<
unhandled_paths_num
:
org_img_path
=
unhandled_paths
[
index
]
org_img
=
Image
.
open
(
org_img_path
)
else
:
org_img
=
images
[
index
-
unhandled_paths_num
]
org_img
=
org_img
.
astype
(
np
.
uint8
)
org_img
=
Image
.
fromarray
(
org_img
[:,
:,
::
-
1
])
if
visualization
:
org_img_path
=
get_save_image_name
(
org_img
,
output_dir
,
'image_numpy_{}'
.
format
(
(
handle_id
+
index
)))
org_img
.
save
(
org_img_path
)
org_img_height
=
org_img
.
height
org_img_width
=
org_img
.
width
result_i
=
results
[
lod
[
index
]:
lod
[
index
+
1
]]
for
row
in
result_i
:
if
len
(
row
)
!=
6
:
continue
if
row
[
1
]
<
score_thresh
:
continue
category_id
=
int
(
row
[
0
])
confidence
=
row
[
1
]
bbox
=
row
[
2
:]
dt
=
{}
dt
[
'label'
]
=
label_names
[
category_id
]
dt
[
'confidence'
]
=
confidence
dt
[
'left'
],
dt
[
'top'
],
dt
[
'right'
],
dt
[
'bottom'
]
=
clip_bbox
(
bbox
,
org_img_width
,
org_img_height
)
output_i
[
'data'
].
append
(
dt
)
output
.
append
(
output_i
)
if
visualization
:
output_i
[
'save_path'
]
=
draw_bounding_box_on_image
(
org_img_path
,
output_i
[
'data'
],
output_dir
)
return
output
hub_module/modules/image/object_detection/yolov3_darknet53_pedestrian/serving.py
已删除
100644 → 0
浏览文件 @
6a477596
import
base64
import
cv2
import
numpy
as
np
def
base64_to_cv2
(
b64str
):
data
=
base64
.
b64decode
(
b64str
.
encode
(
'utf8'
))
data
=
np
.
fromstring
(
data
,
np
.
uint8
)
data
=
cv2
.
imdecode
(
data
,
cv2
.
IMREAD_COLOR
)
return
data
hub_module/modules/image/object_detection/yolov3_darknet53_pedestrian/yolo_head.py
0 → 100644
浏览文件 @
3acfe6bd
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
from
collections
import
OrderedDict
from
paddle
import
fluid
from
paddle.fluid.param_attr
import
ParamAttr
from
paddle.fluid.regularizer
import
L2Decay
__all__
=
[
'MultiClassNMS'
,
'YOLOv3Head'
]
class
MultiClassNMS
(
object
):
# __op__ = fluid.layers.multiclass_nms
def
__init__
(
self
,
background_label
,
keep_top_k
,
nms_threshold
,
nms_top_k
,
normalized
,
score_threshold
):
super
(
MultiClassNMS
,
self
).
__init__
()
self
.
background_label
=
background_label
self
.
keep_top_k
=
keep_top_k
self
.
nms_threshold
=
nms_threshold
self
.
nms_top_k
=
nms_top_k
self
.
normalized
=
normalized
self
.
score_threshold
=
score_threshold
class
YOLOv3Head
(
object
):
"""Head block for YOLOv3 network
Args:
norm_decay (float): weight decay for normalization layer weights
num_classes (int): number of output classes
ignore_thresh (float): threshold to ignore confidence loss
label_smooth (bool): whether to use label smoothing
anchors (list): anchors
anchor_masks (list): anchor masks
nms (object): an instance of `MultiClassNMS`
"""
def
__init__
(
self
,
norm_decay
=
0.
,
num_classes
=
80
,
ignore_thresh
=
0.7
,
label_smooth
=
True
,
anchors
=
[[
10
,
13
],
[
16
,
30
],
[
33
,
23
],
[
30
,
61
],
[
62
,
45
],
[
59
,
119
],
[
116
,
90
],
[
156
,
198
],
[
373
,
326
]],
anchor_masks
=
[[
6
,
7
,
8
],
[
3
,
4
,
5
],
[
0
,
1
,
2
]],
nms
=
MultiClassNMS
(
background_label
=-
1
,
keep_top_k
=
100
,
nms_threshold
=
0.45
,
nms_top_k
=
1000
,
normalized
=
True
,
score_threshold
=
0.01
),
weight_prefix_name
=
''
):
self
.
norm_decay
=
norm_decay
self
.
num_classes
=
num_classes
self
.
ignore_thresh
=
ignore_thresh
self
.
label_smooth
=
label_smooth
self
.
anchor_masks
=
anchor_masks
self
.
_parse_anchors
(
anchors
)
self
.
nms
=
nms
self
.
prefix_name
=
weight_prefix_name
def
_conv_bn
(
self
,
input
,
ch_out
,
filter_size
,
stride
,
padding
,
act
=
'leaky'
,
is_test
=
True
,
name
=
None
):
conv
=
fluid
.
layers
.
conv2d
(
input
=
input
,
num_filters
=
ch_out
,
filter_size
=
filter_size
,
stride
=
stride
,
padding
=
padding
,
act
=
None
,
param_attr
=
ParamAttr
(
name
=
name
+
".conv.weights"
),
bias_attr
=
False
)
bn_name
=
name
+
".bn"
bn_param_attr
=
ParamAttr
(
regularizer
=
L2Decay
(
self
.
norm_decay
),
name
=
bn_name
+
'.scale'
)
bn_bias_attr
=
ParamAttr
(
regularizer
=
L2Decay
(
self
.
norm_decay
),
name
=
bn_name
+
'.offset'
)
out
=
fluid
.
layers
.
batch_norm
(
input
=
conv
,
act
=
None
,
is_test
=
is_test
,
param_attr
=
bn_param_attr
,
bias_attr
=
bn_bias_attr
,
moving_mean_name
=
bn_name
+
'.mean'
,
moving_variance_name
=
bn_name
+
'.var'
)
if
act
==
'leaky'
:
out
=
fluid
.
layers
.
leaky_relu
(
x
=
out
,
alpha
=
0.1
)
return
out
def
_detection_block
(
self
,
input
,
channel
,
is_test
=
True
,
name
=
None
):
assert
channel
%
2
==
0
,
\
"channel {} cannot be divided by 2 in detection block {}"
\
.
format
(
channel
,
name
)
conv
=
input
for
j
in
range
(
2
):
conv
=
self
.
_conv_bn
(
conv
,
channel
,
filter_size
=
1
,
stride
=
1
,
padding
=
0
,
is_test
=
is_test
,
name
=
'{}.{}.0'
.
format
(
name
,
j
))
conv
=
self
.
_conv_bn
(
conv
,
channel
*
2
,
filter_size
=
3
,
stride
=
1
,
padding
=
1
,
is_test
=
is_test
,
name
=
'{}.{}.1'
.
format
(
name
,
j
))
route
=
self
.
_conv_bn
(
conv
,
channel
,
filter_size
=
1
,
stride
=
1
,
padding
=
0
,
is_test
=
is_test
,
name
=
'{}.2'
.
format
(
name
))
tip
=
self
.
_conv_bn
(
route
,
channel
*
2
,
filter_size
=
3
,
stride
=
1
,
padding
=
1
,
is_test
=
is_test
,
name
=
'{}.tip'
.
format
(
name
))
return
route
,
tip
def
_upsample
(
self
,
input
,
scale
=
2
,
name
=
None
):
out
=
fluid
.
layers
.
resize_nearest
(
input
=
input
,
scale
=
float
(
scale
),
name
=
name
)
return
out
def
_parse_anchors
(
self
,
anchors
):
"""
Check ANCHORS/ANCHOR_MASKS in config and parse mask_anchors
"""
self
.
anchors
=
[]
self
.
mask_anchors
=
[]
assert
len
(
anchors
)
>
0
,
"ANCHORS not set."
assert
len
(
self
.
anchor_masks
)
>
0
,
"ANCHOR_MASKS not set."
for
anchor
in
anchors
:
assert
len
(
anchor
)
==
2
,
"anchor {} len should be 2"
.
format
(
anchor
)
self
.
anchors
.
extend
(
anchor
)
anchor_num
=
len
(
anchors
)
for
masks
in
self
.
anchor_masks
:
self
.
mask_anchors
.
append
([])
for
mask
in
masks
:
assert
mask
<
anchor_num
,
"anchor mask index overflow"
self
.
mask_anchors
[
-
1
].
extend
(
anchors
[
mask
])
def
_get_outputs
(
self
,
input
,
is_train
=
True
):
"""
Get YOLOv3 head output
Args:
input (list): List of Variables, output of backbone stages
is_train (bool): whether in train or test mode
Returns:
outputs (list): Variables of each output layer
"""
outputs
=
[]
# get last out_layer_num blocks in reverse order
out_layer_num
=
len
(
self
.
anchor_masks
)
if
isinstance
(
input
,
OrderedDict
):
blocks
=
list
(
input
.
values
())[
-
1
:
-
out_layer_num
-
1
:
-
1
]
else
:
blocks
=
input
[
-
1
:
-
out_layer_num
-
1
:
-
1
]
route
=
None
for
i
,
block
in
enumerate
(
blocks
):
if
i
>
0
:
# perform concat in first 2 detection_block
block
=
fluid
.
layers
.
concat
(
input
=
[
route
,
block
],
axis
=
1
)
route
,
tip
=
self
.
_detection_block
(
block
,
channel
=
512
//
(
2
**
i
),
is_test
=
(
not
is_train
),
name
=
self
.
prefix_name
+
"yolo_block.{}"
.
format
(
i
))
# out channel number = mask_num * (5 + class_num)
num_filters
=
len
(
self
.
anchor_masks
[
i
])
*
(
self
.
num_classes
+
5
)
block_out
=
fluid
.
layers
.
conv2d
(
input
=
tip
,
num_filters
=
num_filters
,
filter_size
=
1
,
stride
=
1
,
padding
=
0
,
act
=
None
,
param_attr
=
ParamAttr
(
name
=
self
.
prefix_name
+
"yolo_output.{}.conv.weights"
.
format
(
i
)),
bias_attr
=
ParamAttr
(
regularizer
=
L2Decay
(
0.
),
name
=
self
.
prefix_name
+
"yolo_output.{}.conv.bias"
.
format
(
i
)))
outputs
.
append
(
block_out
)
if
i
<
len
(
blocks
)
-
1
:
# do not perform upsample in the last detection_block
route
=
self
.
_conv_bn
(
input
=
route
,
ch_out
=
256
//
(
2
**
i
),
filter_size
=
1
,
stride
=
1
,
padding
=
0
,
is_test
=
(
not
is_train
),
name
=
self
.
prefix_name
+
"yolo_transition.{}"
.
format
(
i
))
# upsample
route
=
self
.
_upsample
(
route
)
return
outputs
def
get_prediction
(
self
,
outputs
,
im_size
):
"""
Get prediction result of YOLOv3 network
Args:
outputs (list): list of Variables, return from _get_outputs
im_size (Variable): Variable of size([h, w]) of each image
Returns:
pred (Variable): The prediction result after non-max suppress.
"""
boxes
=
[]
scores
=
[]
downsample
=
32
for
i
,
output
in
enumerate
(
outputs
):
box
,
score
=
fluid
.
layers
.
yolo_box
(
x
=
output
,
img_size
=
im_size
,
anchors
=
self
.
mask_anchors
[
i
],
class_num
=
self
.
num_classes
,
conf_thresh
=
self
.
nms
.
score_threshold
,
downsample_ratio
=
downsample
,
name
=
self
.
prefix_name
+
"yolo_box"
+
str
(
i
))
boxes
.
append
(
box
)
scores
.
append
(
fluid
.
layers
.
transpose
(
score
,
perm
=
[
0
,
2
,
1
]))
downsample
//=
2
yolo_boxes
=
fluid
.
layers
.
concat
(
boxes
,
axis
=
1
)
yolo_scores
=
fluid
.
layers
.
concat
(
scores
,
axis
=
2
)
pred
=
fluid
.
layers
.
multiclass_nms
(
bboxes
=
yolo_boxes
,
scores
=
yolo_scores
,
score_threshold
=
self
.
nms
.
score_threshold
,
nms_top_k
=
self
.
nms
.
nms_top_k
,
keep_top_k
=
self
.
nms
.
keep_top_k
,
nms_threshold
=
self
.
nms
.
nms_threshold
,
background_label
=
self
.
nms
.
background_label
,
normalized
=
self
.
nms
.
normalized
,
name
=
"multiclass_nms"
)
return
pred
hub_module/modules/image/object_detection/yolov3_darknet53_vehicles/README.md
浏览文件 @
3acfe6bd
...
@@ -13,17 +13,15 @@ hub run yolov3_darknet53_vehicles --input_path "/PATH/TO/IMAGE"
...
@@ -13,17 +13,15 @@ hub run yolov3_darknet53_vehicles --input_path "/PATH/TO/IMAGE"
```
```
def context(trainable=True,
def context(trainable=True,
pretrained=True,
pretrained=True,
var_prefix='',
get_prediction=False)
get_prediction=False)
```
```
提取头部特征
,用于迁移学习。
特征提取
,用于迁移学习。
**参数**
**参数**
*
trainable(bool): 将参数的trainable属性设为trainable;
*
trainable(bool): 将参数的trainable属性设为trainable;
*
pretrained (bool): 是否加载预训练模型;
*
pretrained (bool): 是否加载预训练模型;
*
var
\_
prefix (str): 在变量的name 中加上前缀;
*
get
\_
prediction (bool): 是否执行预测。
*
get
\_
prediction (bool): 是否执行预测。
**返回**
**返回**
...
...
hub_module/modules/image/object_detection/yolov3_darknet53_vehicles/data_feed.py
0 → 100644
浏览文件 @
3acfe6bd
# coding=utf-8
from
__future__
import
absolute_import
from
__future__
import
print_function
from
__future__
import
division
import
os
import
cv2
import
numpy
as
np
__all__
=
[
'reader'
]
def
reader
(
paths
=
[],
images
=
None
):
"""
data generator
Args:
paths (list[str]): paths to images.
images (list(numpy.ndarray)): data of images, shape of each is [H, W, C]
Yield:
res (list): preprocessed image and the size of original image.
"""
img_list
=
[]
if
paths
:
assert
type
(
paths
)
is
list
,
"type(paths) is not list."
for
img_path
in
paths
:
assert
os
.
path
.
isfile
(
img_path
),
"The {} isn't a valid file path."
.
format
(
img_path
)
img
=
cv2
.
imread
(
img_path
).
astype
(
'float32'
)
img_list
.
append
(
img
)
if
images
is
not
None
:
for
img
in
images
:
img_list
.
append
(
img
)
for
im
in
img_list
:
# im_size
im_shape
=
im
.
shape
im_size
=
np
.
array
([
im_shape
[
0
],
im_shape
[
1
]],
dtype
=
np
.
int32
)
# decode image
im
=
cv2
.
cvtColor
(
im
,
cv2
.
COLOR_BGR2RGB
)
# resize image
target_size
=
608
im_size_min
=
np
.
min
(
im_shape
[
0
:
2
])
im_size_max
=
np
.
max
(
im_shape
[
0
:
2
])
if
float
(
im_size_min
)
==
0
:
raise
ZeroDivisionError
(
'min size of image is 0'
)
im_scale_x
=
float
(
target_size
)
/
float
(
im_shape
[
1
])
im_scale_y
=
float
(
target_size
)
/
float
(
im_shape
[
0
])
im
=
cv2
.
resize
(
im
,
None
,
None
,
fx
=
im_scale_x
,
fy
=
im_scale_y
,
interpolation
=
2
)
# normalize image
mean
=
[
0.485
,
0.456
,
0.406
]
std
=
[
0.229
,
0.224
,
0.225
]
im
=
im
.
astype
(
np
.
float32
,
copy
=
False
)
mean
=
np
.
array
(
mean
)[
np
.
newaxis
,
np
.
newaxis
,
:]
std
=
np
.
array
(
std
)[
np
.
newaxis
,
np
.
newaxis
,
:]
im
=
im
/
255.0
im
-=
mean
im
/=
std
# permute
im
=
np
.
swapaxes
(
im
,
1
,
2
)
im
=
np
.
swapaxes
(
im
,
1
,
0
)
yield
[
im
,
im_size
]
hub_module/modules/image/object_detection/yolov3_darknet53_vehicles/module.py
浏览文件 @
3acfe6bd
# coding=utf-8
# coding=utf-8
from
__future__
import
absolute_import
from
__future__
import
absolute_import
from
__future__
import
division
import
ast
import
ast
import
argparse
import
argparse
...
@@ -12,9 +11,12 @@ import paddle.fluid as fluid
...
@@ -12,9 +11,12 @@ import paddle.fluid as fluid
import
paddlehub
as
hub
import
paddlehub
as
hub
from
paddle.fluid.core
import
PaddleTensor
,
AnalysisConfig
,
create_paddle_predictor
from
paddle.fluid.core
import
PaddleTensor
,
AnalysisConfig
,
create_paddle_predictor
from
paddlehub.module.module
import
moduleinfo
,
runnable
,
serving
from
paddlehub.module.module
import
moduleinfo
,
runnable
,
serving
from
paddlehub.common.paddle_helper
import
add_vars_prefix
from
yolov3_darknet53_vehicles.darknet
import
DarkNet
from
yolov3_darknet53_vehicles.darknet
import
DarkNet
from
yolov3_darknet53_vehicles.serving
import
base64_to_cv2
from
yolov3_darknet53_vehicles.processor
import
load_label_info
,
postprocess
,
base64_to_cv2
from
yolov3_darknet53_vehicles.data_feed
import
reader
from
yolov3_darknet53_vehicles.yolo_head
import
MultiClassNMS
,
YOLOv3Head
@
moduleinfo
(
@
moduleinfo
(
...
@@ -24,13 +26,12 @@ from yolov3_darknet53_vehicles.serving import base64_to_cv2
...
@@ -24,13 +26,12 @@ from yolov3_darknet53_vehicles.serving import base64_to_cv2
summary
=
summary
=
"Baidu's YOLOv3 model for vehicles detection, with backbone DarkNet53."
,
"Baidu's YOLOv3 model for vehicles detection, with backbone DarkNet53."
,
author
=
"paddlepaddle"
,
author
=
"paddlepaddle"
,
author_email
=
"
paddle-dev@baidu.com
"
)
author_email
=
""
)
class
YOLOv3DarkNet53Vehicles
(
hub
.
Module
):
class
YOLOv3DarkNet53Vehicles
(
hub
.
Module
):
def
_initialize
(
self
):
def
_initialize
(
self
):
self
.
yolov3
=
hub
.
Module
(
name
=
"yolov3"
)
self
.
default_pretrained_model_path
=
os
.
path
.
join
(
self
.
default_pretrained_model_path
=
os
.
path
.
join
(
self
.
directory
,
"yolov3_darknet53_vehicles_model"
)
self
.
directory
,
"yolov3_darknet53_vehicles_model"
)
self
.
label_names
=
self
.
yolov3
.
load_label_info
(
self
.
label_names
=
load_label_info
(
os
.
path
.
join
(
self
.
directory
,
"label_file.txt"
))
os
.
path
.
join
(
self
.
directory
,
"label_file.txt"
))
self
.
_set_config
()
self
.
_set_config
()
...
@@ -56,18 +57,13 @@ class YOLOv3DarkNet53Vehicles(hub.Module):
...
@@ -56,18 +57,13 @@ class YOLOv3DarkNet53Vehicles(hub.Module):
gpu_config
.
enable_use_gpu
(
memory_pool_init_size_mb
=
500
,
device_id
=
0
)
gpu_config
.
enable_use_gpu
(
memory_pool_init_size_mb
=
500
,
device_id
=
0
)
self
.
gpu_predictor
=
create_paddle_predictor
(
gpu_config
)
self
.
gpu_predictor
=
create_paddle_predictor
(
gpu_config
)
def
context
(
self
,
def
context
(
self
,
trainable
=
True
,
pretrained
=
True
,
get_prediction
=
False
):
trainable
=
True
,
pretrained
=
True
,
var_prefix
=
''
,
get_prediction
=
False
):
"""
"""
Distill the Head Features, so as to perform transfer learning.
Distill the Head Features, so as to perform transfer learning.
Args:
Args:
trainable (bool): whether to set parameters trainable.
trainable (bool): whether to set parameters trainable.
pretrained (bool): whether to load default pretrained model.
pretrained (bool): whether to load default pretrained model.
var_prefix (str): prefix to append to the varibles.
get_prediction (bool): whether to get prediction.
get_prediction (bool): whether to get prediction.
Returns:
Returns:
...
@@ -75,15 +71,22 @@ class YOLOv3DarkNet53Vehicles(hub.Module):
...
@@ -75,15 +71,22 @@ class YOLOv3DarkNet53Vehicles(hub.Module):
outputs(dict): the output variables.
outputs(dict): the output variables.
context_prog (Program): the program to execute transfer learning.
context_prog (Program): the program to execute transfer learning.
"""
"""
wrapped
_prog
=
fluid
.
Program
()
context
_prog
=
fluid
.
Program
()
startup_program
=
fluid
.
Program
()
startup_program
=
fluid
.
Program
()
with
fluid
.
program_guard
(
wrapped
_prog
,
startup_program
):
with
fluid
.
program_guard
(
context
_prog
,
startup_program
):
with
fluid
.
unique_name
.
guard
():
with
fluid
.
unique_name
.
guard
():
# image
# image
image
=
fluid
.
layers
.
data
(
image
=
fluid
.
layers
.
data
(
name
=
'image'
,
shape
=
[
3
,
608
,
608
],
dtype
=
'float32'
)
name
=
'image'
,
shape
=
[
3
,
608
,
608
],
dtype
=
'float32'
)
# backbone
backbone
=
DarkNet
(
norm_type
=
'sync_bn'
,
norm_decay
=
0.
,
depth
=
53
)
# body_feats
body_feats
=
backbone
(
image
)
# im_size
im_size
=
fluid
.
layers
.
data
(
name
=
'im_size'
,
shape
=
[
2
],
dtype
=
'int32'
)
# yolo_head
# yolo_head
yolo_head
=
self
.
yolov3
.
YOLOv3Head
(
yolo_head
=
YOLOv3Head
(
anchor_masks
=
[[
6
,
7
,
8
],
[
3
,
4
,
5
],
[
0
,
1
,
2
]],
anchor_masks
=
[[
6
,
7
,
8
],
[
3
,
4
,
5
],
[
0
,
1
,
2
]],
anchors
=
[[
8
,
9
],
[
10
,
23
],
[
19
,
15
],
[
23
,
33
],
[
40
,
25
],
anchors
=
[[
8
,
9
],
[
10
,
23
],
[
19
,
15
],
[
23
,
33
],
[
40
,
25
],
[
54
,
50
],
[
101
,
80
],
[
139
,
145
],
[
253
,
224
]],
[
54
,
50
],
[
101
,
80
],
[
139
,
145
],
[
253
,
224
]],
...
@@ -91,27 +94,57 @@ class YOLOv3DarkNet53Vehicles(hub.Module):
...
@@ -91,27 +94,57 @@ class YOLOv3DarkNet53Vehicles(hub.Module):
num_classes
=
6
,
num_classes
=
6
,
ignore_thresh
=
0.7
,
ignore_thresh
=
0.7
,
label_smooth
=
False
,
label_smooth
=
False
,
nms
=
self
.
yolov3
.
MultiClassNMS
(
nms
=
MultiClassNMS
(
background_label
=-
1
,
background_label
=-
1
,
keep_top_k
=
100
,
keep_top_k
=
100
,
nms_threshold
=
0.45
,
nms_threshold
=
0.45
,
nms_top_k
=
400
,
nms_top_k
=
400
,
normalized
=
False
,
normalized
=
False
,
score_threshold
=
0.005
))
score_threshold
=
0.005
))
backbone
=
DarkNet
(
norm_type
=
'sync_bn'
,
norm_decay
=
0.
,
depth
=
53
)
# head_features
body_feats
=
backbone
(
image
)
head_features
=
yolo_head
.
_get_outputs
(
var_prefix
=
var_prefix
if
var_prefix
else
'@HUB_{}@'
.
format
(
body_feats
,
is_train
=
trainable
)
self
.
name
)
inputs
,
outputs
,
context_prog
=
self
.
yolov3
.
context
(
body_feats
=
body_feats
,
yolo_head
=
yolo_head
,
image
=
image
,
trainable
=
trainable
,
var_prefix
=
var_prefix
,
get_prediction
=
get_prediction
)
place
=
fluid
.
CPUPlace
()
place
=
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
place
)
exe
=
fluid
.
Executor
(
place
)
exe
.
run
(
fluid
.
default_startup_program
())
# var_prefix
var_prefix
=
'@HUB_{}@'
.
format
(
self
.
name
)
# name of inputs
inputs
=
{
'image'
:
var_prefix
+
image
.
name
,
'im_size'
:
var_prefix
+
im_size
.
name
}
# name of outputs
if
get_prediction
:
bbox_out
=
yolo_head
.
get_prediction
(
head_features
,
im_size
)
outputs
=
{
'bbox_out'
:
[
var_prefix
+
bbox_out
.
name
]}
else
:
outputs
=
{
'head_features'
:
[
var_prefix
+
var
.
name
for
var
in
head_features
]
}
# add_vars_prefix
add_vars_prefix
(
context_prog
,
var_prefix
)
add_vars_prefix
(
fluid
.
default_startup_program
(),
var_prefix
)
# inputs
inputs
=
{
key
:
context_prog
.
global_block
().
vars
[
value
]
for
key
,
value
in
inputs
.
items
()
}
# outputs
outputs
=
{
key
:
[
context_prog
.
global_block
().
vars
[
varname
]
for
varname
in
value
]
for
key
,
value
in
outputs
.
items
()
}
# trainable
for
param
in
context_prog
.
global_block
().
iter_parameters
():
param
.
trainable
=
trainable
# pretrained
if
pretrained
:
if
pretrained
:
def
_if_exist
(
var
):
def
_if_exist
(
var
):
...
@@ -125,6 +158,7 @@ class YOLOv3DarkNet53Vehicles(hub.Module):
...
@@ -125,6 +158,7 @@ class YOLOv3DarkNet53Vehicles(hub.Module):
predicate
=
_if_exist
)
predicate
=
_if_exist
)
else
:
else
:
exe
.
run
(
startup_program
)
exe
.
run
(
startup_program
)
return
inputs
,
outputs
,
context_prog
return
inputs
,
outputs
,
context_prog
def
object_detection
(
self
,
def
object_detection
(
self
,
...
@@ -155,10 +189,10 @@ class YOLOv3DarkNet53Vehicles(hub.Module):
...
@@ -155,10 +189,10 @@ class YOLOv3DarkNet53Vehicles(hub.Module):
bottom (float): The Y coordinate of the lower right corner of the bounding box;
bottom (float): The Y coordinate of the lower right corner of the bounding box;
label (str): The label of detection result;
label (str): The label of detection result;
confidence (float): The confidence of detection result.
confidence (float): The confidence of detection result.
save_path (str): The path to save output images.
save_path (str
, optional
): The path to save output images.
"""
"""
paths
=
paths
if
paths
else
list
()
paths
=
paths
if
paths
else
list
()
data_reader
=
partial
(
self
.
yolov3
.
reader
,
paths
,
images
)
data_reader
=
partial
(
reader
,
paths
,
images
)
batch_reader
=
fluid
.
io
.
batch
(
data_reader
,
batch_size
=
batch_size
)
batch_reader
=
fluid
.
io
.
batch
(
data_reader
,
batch_size
=
batch_size
)
res
=
[]
res
=
[]
for
iter_id
,
feed_data
in
enumerate
(
batch_reader
()):
for
iter_id
,
feed_data
in
enumerate
(
batch_reader
()):
...
@@ -172,7 +206,7 @@ class YOLOv3DarkNet53Vehicles(hub.Module):
...
@@ -172,7 +206,7 @@ class YOLOv3DarkNet53Vehicles(hub.Module):
data_out
=
self
.
cpu_predictor
.
run
(
data_out
=
self
.
cpu_predictor
.
run
(
[
image_tensor
,
im_size_tensor
])
[
image_tensor
,
im_size_tensor
])
output
=
self
.
yolov3
.
postprocess
(
output
=
postprocess
(
paths
=
paths
,
paths
=
paths
,
images
=
images
,
images
=
images
,
data_out
=
data_out
,
data_out
=
data_out
,
...
@@ -213,7 +247,7 @@ class YOLOv3DarkNet53Vehicles(hub.Module):
...
@@ -213,7 +247,7 @@ class YOLOv3DarkNet53Vehicles(hub.Module):
Run as a service.
Run as a service.
"""
"""
images_decode
=
[
base64_to_cv2
(
image
)
for
image
in
images
]
images_decode
=
[
base64_to_cv2
(
image
)
for
image
in
images
]
results
=
self
.
face
_detection
(
images_decode
,
**
kwargs
)
results
=
self
.
object
_detection
(
images_decode
,
**
kwargs
)
return
results
return
results
@
runnable
@
runnable
...
@@ -235,7 +269,7 @@ class YOLOv3DarkNet53Vehicles(hub.Module):
...
@@ -235,7 +269,7 @@ class YOLOv3DarkNet53Vehicles(hub.Module):
self
.
add_module_config_arg
()
self
.
add_module_config_arg
()
self
.
add_module_input_arg
()
self
.
add_module_input_arg
()
args
=
self
.
parser
.
parse_args
(
argvs
)
args
=
self
.
parser
.
parse_args
(
argvs
)
results
=
self
.
object
_detection
(
results
=
self
.
face
_detection
(
paths
=
[
args
.
input_path
],
paths
=
[
args
.
input_path
],
batch_size
=
args
.
batch_size
,
batch_size
=
args
.
batch_size
,
use_gpu
=
args
.
use_gpu
,
use_gpu
=
args
.
use_gpu
,
...
...
hub_module/modules/image/object_detection/yolov3_darknet53_vehicles/processor.py
0 → 100644
浏览文件 @
3acfe6bd
# coding=utf-8
import
base64
import
os
import
cv2
import
numpy
as
np
from
PIL
import
Image
,
ImageDraw
__all__
=
[
'base64_to_cv2'
,
'load_label_info'
,
'postprocess'
]
def
base64_to_cv2
(
b64str
):
data
=
base64
.
b64decode
(
b64str
.
encode
(
'utf8'
))
data
=
np
.
fromstring
(
data
,
np
.
uint8
)
data
=
cv2
.
imdecode
(
data
,
cv2
.
IMREAD_COLOR
)
return
data
def
check_dir
(
dir_path
):
if
not
os
.
path
.
exists
(
dir_path
):
os
.
makedirs
(
dir_path
)
elif
os
.
path
.
isfile
(
dir_path
):
os
.
remove
(
dir_path
)
os
.
makedirs
(
dir_path
)
def
get_save_image_name
(
img
,
output_dir
,
image_path
):
"""Get save image name from source image path.
"""
image_name
=
os
.
path
.
split
(
image_path
)[
-
1
]
name
,
ext
=
os
.
path
.
splitext
(
image_name
)
if
ext
==
''
:
if
img
.
format
==
'PNG'
:
ext
=
'.png'
elif
img
.
format
==
'JPEG'
:
ext
=
'.jpg'
elif
img
.
format
==
'BMP'
:
ext
=
'.bmp'
else
:
if
img
.
mode
==
"RGB"
or
img
.
mode
==
"L"
:
ext
=
".jpg"
elif
img
.
mode
==
"RGBA"
or
img
.
mode
==
"P"
:
ext
=
'.png'
return
os
.
path
.
join
(
output_dir
,
"{}"
.
format
(
name
))
+
ext
def
draw_bounding_box_on_image
(
image_path
,
data_list
,
save_dir
):
image
=
Image
.
open
(
image_path
)
draw
=
ImageDraw
.
Draw
(
image
)
for
data
in
data_list
:
left
,
right
,
top
,
bottom
=
data
[
'left'
],
data
[
'right'
],
data
[
'top'
],
data
[
'bottom'
]
# draw bbox
draw
.
line
([(
left
,
top
),
(
left
,
bottom
),
(
right
,
bottom
),
(
right
,
top
),
(
left
,
top
)],
width
=
2
,
fill
=
'red'
)
# draw label
if
image
.
mode
==
'RGB'
:
text
=
data
[
'label'
]
+
": %.2f%%"
%
(
100
*
data
[
'confidence'
])
textsize_width
,
textsize_height
=
draw
.
textsize
(
text
=
text
)
draw
.
rectangle
(
xy
=
(
left
,
top
-
(
textsize_height
+
5
),
left
+
textsize_width
+
10
,
top
),
fill
=
(
255
,
255
,
255
))
draw
.
text
(
xy
=
(
left
,
top
-
15
),
text
=
text
,
fill
=
(
0
,
0
,
0
))
save_name
=
get_save_image_name
(
image
,
save_dir
,
image_path
)
if
os
.
path
.
exists
(
save_name
):
os
.
remove
(
save_name
)
image
.
save
(
save_name
)
return
save_name
def
clip_bbox
(
bbox
,
img_width
,
img_height
):
xmin
=
max
(
min
(
bbox
[
0
],
img_width
),
0.
)
ymin
=
max
(
min
(
bbox
[
1
],
img_height
),
0.
)
xmax
=
max
(
min
(
bbox
[
2
],
img_width
),
0.
)
ymax
=
max
(
min
(
bbox
[
3
],
img_height
),
0.
)
return
xmin
,
ymin
,
xmax
,
ymax
def
load_label_info
(
file_path
):
with
open
(
file_path
,
'r'
)
as
fr
:
text
=
fr
.
readlines
()
label_names
=
[]
for
info
in
text
:
label_names
.
append
(
info
.
strip
())
return
label_names
def
postprocess
(
paths
,
images
,
data_out
,
score_thresh
,
label_names
,
output_dir
,
handle_id
,
visualization
=
True
):
"""
postprocess the lod_tensor produced by fluid.Executor.run
Args:
paths (list[str]): The paths of images.
images (list(numpy.ndarray)): images data, shape of each is [H, W, C]
data_out (lod_tensor): data output of predictor.
batch_size (int): batch size.
use_gpu (bool): Whether to use gpu.
output_dir (str): The path to store output images.
visualization (bool): Whether to save image or not.
score_thresh (float): the low limit of bounding box.
label_names (list[str]): label names.
handle_id (int): The number of images that have been handled.
Returns:
res (list[dict]): The result of vehicles detecion. keys include 'data', 'save_path', the corresponding value is:
data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is:
left (float): The X coordinate of the upper left corner of the bounding box;
top (float): The Y coordinate of the upper left corner of the bounding box;
right (float): The X coordinate of the lower right corner of the bounding box;
bottom (float): The Y coordinate of the lower right corner of the bounding box;
label (str): The label of detection result;
confidence (float): The confidence of detection result.
save_path (str): The path to save output images.
"""
lod_tensor
=
data_out
[
0
]
lod
=
lod_tensor
.
lod
[
0
]
results
=
lod_tensor
.
as_ndarray
()
check_dir
(
output_dir
)
assert
type
(
paths
)
is
list
,
"type(paths) is not list."
if
handle_id
<
len
(
paths
):
unhandled_paths
=
paths
[
handle_id
:]
unhandled_paths_num
=
len
(
unhandled_paths
)
else
:
unhandled_paths_num
=
0
output
=
list
()
for
index
in
range
(
len
(
lod
)
-
1
):
output_i
=
{
'data'
:
[]}
if
index
<
unhandled_paths_num
:
org_img_path
=
unhandled_paths
[
index
]
org_img
=
Image
.
open
(
org_img_path
)
else
:
org_img
=
images
[
index
-
unhandled_paths_num
]
org_img
=
org_img
.
astype
(
np
.
uint8
)
org_img
=
Image
.
fromarray
(
org_img
[:,
:,
::
-
1
])
if
visualization
:
org_img_path
=
get_save_image_name
(
org_img
,
output_dir
,
'image_numpy_{}'
.
format
(
(
handle_id
+
index
)))
org_img
.
save
(
org_img_path
)
org_img_height
=
org_img
.
height
org_img_width
=
org_img
.
width
result_i
=
results
[
lod
[
index
]:
lod
[
index
+
1
]]
for
row
in
result_i
:
if
len
(
row
)
!=
6
:
continue
if
row
[
1
]
<
score_thresh
:
continue
category_id
=
int
(
row
[
0
])
confidence
=
row
[
1
]
bbox
=
row
[
2
:]
dt
=
{}
dt
[
'label'
]
=
label_names
[
category_id
]
dt
[
'confidence'
]
=
confidence
dt
[
'left'
],
dt
[
'top'
],
dt
[
'right'
],
dt
[
'bottom'
]
=
clip_bbox
(
bbox
,
org_img_width
,
org_img_height
)
output_i
[
'data'
].
append
(
dt
)
output
.
append
(
output_i
)
if
visualization
:
output_i
[
'save_path'
]
=
draw_bounding_box_on_image
(
org_img_path
,
output_i
[
'data'
],
output_dir
)
return
output
hub_module/modules/image/object_detection/yolov3_darknet53_vehicles/serving.py
已删除
100644 → 0
浏览文件 @
6a477596
import
base64
import
cv2
import
numpy
as
np
def
base64_to_cv2
(
b64str
):
data
=
base64
.
b64decode
(
b64str
.
encode
(
'utf8'
))
data
=
np
.
fromstring
(
data
,
np
.
uint8
)
data
=
cv2
.
imdecode
(
data
,
cv2
.
IMREAD_COLOR
)
return
data
hub_module/modules/image/object_detection/yolov3_darknet53_vehicles/yolo_head.py
0 → 100644
浏览文件 @
3acfe6bd
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
from
collections
import
OrderedDict
from
paddle
import
fluid
from
paddle.fluid.param_attr
import
ParamAttr
from
paddle.fluid.regularizer
import
L2Decay
__all__
=
[
'MultiClassNMS'
,
'YOLOv3Head'
]
class
MultiClassNMS
(
object
):
# __op__ = fluid.layers.multiclass_nms
def
__init__
(
self
,
background_label
,
keep_top_k
,
nms_threshold
,
nms_top_k
,
normalized
,
score_threshold
):
super
(
MultiClassNMS
,
self
).
__init__
()
self
.
background_label
=
background_label
self
.
keep_top_k
=
keep_top_k
self
.
nms_threshold
=
nms_threshold
self
.
nms_top_k
=
nms_top_k
self
.
normalized
=
normalized
self
.
score_threshold
=
score_threshold
class
YOLOv3Head
(
object
):
"""Head block for YOLOv3 network
Args:
norm_decay (float): weight decay for normalization layer weights
num_classes (int): number of output classes
ignore_thresh (float): threshold to ignore confidence loss
label_smooth (bool): whether to use label smoothing
anchors (list): anchors
anchor_masks (list): anchor masks
nms (object): an instance of `MultiClassNMS`
"""
def
__init__
(
self
,
norm_decay
=
0.
,
num_classes
=
80
,
ignore_thresh
=
0.7
,
label_smooth
=
True
,
anchors
=
[[
10
,
13
],
[
16
,
30
],
[
33
,
23
],
[
30
,
61
],
[
62
,
45
],
[
59
,
119
],
[
116
,
90
],
[
156
,
198
],
[
373
,
326
]],
anchor_masks
=
[[
6
,
7
,
8
],
[
3
,
4
,
5
],
[
0
,
1
,
2
]],
nms
=
MultiClassNMS
(
background_label
=-
1
,
keep_top_k
=
100
,
nms_threshold
=
0.45
,
nms_top_k
=
1000
,
normalized
=
True
,
score_threshold
=
0.01
),
weight_prefix_name
=
''
):
self
.
norm_decay
=
norm_decay
self
.
num_classes
=
num_classes
self
.
ignore_thresh
=
ignore_thresh
self
.
label_smooth
=
label_smooth
self
.
anchor_masks
=
anchor_masks
self
.
_parse_anchors
(
anchors
)
self
.
nms
=
nms
self
.
prefix_name
=
weight_prefix_name
def
_conv_bn
(
self
,
input
,
ch_out
,
filter_size
,
stride
,
padding
,
act
=
'leaky'
,
is_test
=
True
,
name
=
None
):
conv
=
fluid
.
layers
.
conv2d
(
input
=
input
,
num_filters
=
ch_out
,
filter_size
=
filter_size
,
stride
=
stride
,
padding
=
padding
,
act
=
None
,
param_attr
=
ParamAttr
(
name
=
name
+
".conv.weights"
),
bias_attr
=
False
)
bn_name
=
name
+
".bn"
bn_param_attr
=
ParamAttr
(
regularizer
=
L2Decay
(
self
.
norm_decay
),
name
=
bn_name
+
'.scale'
)
bn_bias_attr
=
ParamAttr
(
regularizer
=
L2Decay
(
self
.
norm_decay
),
name
=
bn_name
+
'.offset'
)
out
=
fluid
.
layers
.
batch_norm
(
input
=
conv
,
act
=
None
,
is_test
=
is_test
,
param_attr
=
bn_param_attr
,
bias_attr
=
bn_bias_attr
,
moving_mean_name
=
bn_name
+
'.mean'
,
moving_variance_name
=
bn_name
+
'.var'
)
if
act
==
'leaky'
:
out
=
fluid
.
layers
.
leaky_relu
(
x
=
out
,
alpha
=
0.1
)
return
out
def
_detection_block
(
self
,
input
,
channel
,
is_test
=
True
,
name
=
None
):
assert
channel
%
2
==
0
,
\
"channel {} cannot be divided by 2 in detection block {}"
\
.
format
(
channel
,
name
)
conv
=
input
for
j
in
range
(
2
):
conv
=
self
.
_conv_bn
(
conv
,
channel
,
filter_size
=
1
,
stride
=
1
,
padding
=
0
,
is_test
=
is_test
,
name
=
'{}.{}.0'
.
format
(
name
,
j
))
conv
=
self
.
_conv_bn
(
conv
,
channel
*
2
,
filter_size
=
3
,
stride
=
1
,
padding
=
1
,
is_test
=
is_test
,
name
=
'{}.{}.1'
.
format
(
name
,
j
))
route
=
self
.
_conv_bn
(
conv
,
channel
,
filter_size
=
1
,
stride
=
1
,
padding
=
0
,
is_test
=
is_test
,
name
=
'{}.2'
.
format
(
name
))
tip
=
self
.
_conv_bn
(
route
,
channel
*
2
,
filter_size
=
3
,
stride
=
1
,
padding
=
1
,
is_test
=
is_test
,
name
=
'{}.tip'
.
format
(
name
))
return
route
,
tip
def
_upsample
(
self
,
input
,
scale
=
2
,
name
=
None
):
out
=
fluid
.
layers
.
resize_nearest
(
input
=
input
,
scale
=
float
(
scale
),
name
=
name
)
return
out
def
_parse_anchors
(
self
,
anchors
):
"""
Check ANCHORS/ANCHOR_MASKS in config and parse mask_anchors
"""
self
.
anchors
=
[]
self
.
mask_anchors
=
[]
assert
len
(
anchors
)
>
0
,
"ANCHORS not set."
assert
len
(
self
.
anchor_masks
)
>
0
,
"ANCHOR_MASKS not set."
for
anchor
in
anchors
:
assert
len
(
anchor
)
==
2
,
"anchor {} len should be 2"
.
format
(
anchor
)
self
.
anchors
.
extend
(
anchor
)
anchor_num
=
len
(
anchors
)
for
masks
in
self
.
anchor_masks
:
self
.
mask_anchors
.
append
([])
for
mask
in
masks
:
assert
mask
<
anchor_num
,
"anchor mask index overflow"
self
.
mask_anchors
[
-
1
].
extend
(
anchors
[
mask
])
def
_get_outputs
(
self
,
input
,
is_train
=
True
):
"""
Get YOLOv3 head output
Args:
input (list): List of Variables, output of backbone stages
is_train (bool): whether in train or test mode
Returns:
outputs (list): Variables of each output layer
"""
outputs
=
[]
# get last out_layer_num blocks in reverse order
out_layer_num
=
len
(
self
.
anchor_masks
)
if
isinstance
(
input
,
OrderedDict
):
blocks
=
list
(
input
.
values
())[
-
1
:
-
out_layer_num
-
1
:
-
1
]
else
:
blocks
=
input
[
-
1
:
-
out_layer_num
-
1
:
-
1
]
route
=
None
for
i
,
block
in
enumerate
(
blocks
):
if
i
>
0
:
# perform concat in first 2 detection_block
block
=
fluid
.
layers
.
concat
(
input
=
[
route
,
block
],
axis
=
1
)
route
,
tip
=
self
.
_detection_block
(
block
,
channel
=
512
//
(
2
**
i
),
is_test
=
(
not
is_train
),
name
=
self
.
prefix_name
+
"yolo_block.{}"
.
format
(
i
))
# out channel number = mask_num * (5 + class_num)
num_filters
=
len
(
self
.
anchor_masks
[
i
])
*
(
self
.
num_classes
+
5
)
block_out
=
fluid
.
layers
.
conv2d
(
input
=
tip
,
num_filters
=
num_filters
,
filter_size
=
1
,
stride
=
1
,
padding
=
0
,
act
=
None
,
param_attr
=
ParamAttr
(
name
=
self
.
prefix_name
+
"yolo_output.{}.conv.weights"
.
format
(
i
)),
bias_attr
=
ParamAttr
(
regularizer
=
L2Decay
(
0.
),
name
=
self
.
prefix_name
+
"yolo_output.{}.conv.bias"
.
format
(
i
)))
outputs
.
append
(
block_out
)
if
i
<
len
(
blocks
)
-
1
:
# do not perform upsample in the last detection_block
route
=
self
.
_conv_bn
(
input
=
route
,
ch_out
=
256
//
(
2
**
i
),
filter_size
=
1
,
stride
=
1
,
padding
=
0
,
is_test
=
(
not
is_train
),
name
=
self
.
prefix_name
+
"yolo_transition.{}"
.
format
(
i
))
# upsample
route
=
self
.
_upsample
(
route
)
return
outputs
def
get_prediction
(
self
,
outputs
,
im_size
):
"""
Get prediction result of YOLOv3 network
Args:
outputs (list): list of Variables, return from _get_outputs
im_size (Variable): Variable of size([h, w]) of each image
Returns:
pred (Variable): The prediction result after non-max suppress.
"""
boxes
=
[]
scores
=
[]
downsample
=
32
for
i
,
output
in
enumerate
(
outputs
):
box
,
score
=
fluid
.
layers
.
yolo_box
(
x
=
output
,
img_size
=
im_size
,
anchors
=
self
.
mask_anchors
[
i
],
class_num
=
self
.
num_classes
,
conf_thresh
=
self
.
nms
.
score_threshold
,
downsample_ratio
=
downsample
,
name
=
self
.
prefix_name
+
"yolo_box"
+
str
(
i
))
boxes
.
append
(
box
)
scores
.
append
(
fluid
.
layers
.
transpose
(
score
,
perm
=
[
0
,
2
,
1
]))
downsample
//=
2
yolo_boxes
=
fluid
.
layers
.
concat
(
boxes
,
axis
=
1
)
yolo_scores
=
fluid
.
layers
.
concat
(
scores
,
axis
=
2
)
pred
=
fluid
.
layers
.
multiclass_nms
(
bboxes
=
yolo_boxes
,
scores
=
yolo_scores
,
score_threshold
=
self
.
nms
.
score_threshold
,
nms_top_k
=
self
.
nms
.
nms_top_k
,
keep_top_k
=
self
.
nms
.
keep_top_k
,
nms_threshold
=
self
.
nms
.
nms_threshold
,
background_label
=
self
.
nms
.
background_label
,
normalized
=
self
.
nms
.
normalized
,
name
=
"multiclass_nms"
)
return
pred
hub_module/modules/image/object_detection/yolov3_mobilenet_v1_coco2017/README.md
0 → 100644
浏览文件 @
3acfe6bd
```
shell
hub
install
yolov3_mobilenet_v1_coco2017
==
1.1.0
```
## 命令行预测
```
hub run yolov3_mobilenet_v1_coco2017 --input_path "/PATH/TO/IMAGE"
```
## API
```
def context(trainable=True,
pretrained=True,
get_prediction=False)
```
特征提取,用于迁移学习。
**参数**
*
trainable(bool): 将参数的 trainable 属性设为 trainable;
*
pretrained (bool): 是否加载预训练模型;
*
get
\_
prediction (bool): 是否执行预测。
**返回**
*
inputs (dict): 模型的输入,keys 包括 'image', 'im
\_
size',相应的取值为:
*
image (Variable): 图像变量;
*
im
\_
size (Variable): 图片的尺寸
*
outputs (dict): 模型的输出。如果 get
\_
prediction 为 False,输出 'head
\_
fatures',否则输出 'bbox
\_
out'。
*
context
\_
prog (Program): 用于迁移学习的 Program.
```
python
def
object_detection
(
paths
=
None
,
images
=
None
,
batch_size
=
1
,
use_gpu
=
False
,
output_dir
=
'detection_result'
,
score_thresh
=
0.5
,
visualization
=
True
)
```
预测API,检测输入图片中的所有目标的位置。
**参数**
*
paths (list
\[
str
\]
): 图片的路径;
*
images (list
\[
numpy.ndarray
\]
): 图片数据,ndarray.shape 为
\[
H, W, C
\]
,BGR格式;
*
batch
\_
size (int): batch 的大小;
*
use
\_
gpu (bool): 是否使用 GPU;
*
score
\_
thresh (float): 识别置信度的阈值;
*
visualization (bool): 是否将识别结果保存为图片文件;
*
output
\_
dir (str): 图片的保存路径,默认设为 detection
\_
result;
**返回**
*
res (list
\[
dict
\]
): 识别结果的列表,列表中每一个元素为 dict,各字段为:
*
data (list): 检测结果,list的每一个元素为 dict,各字段为:
*
confidence (float): 识别的置信度;
*
label (str): 标签;
*
left (int): 边界框的左上角x坐标;
*
top (int): 边界框的左上角y坐标;
*
right (int): 边界框的右下角x坐标;
*
bottom (int): 边界框的右下角y坐标;
*
save
\_
path (str, optional): 识别结果的保存路径 (仅当visualization=True时存在)。
```
python
def
save_inference_model
(
dirname
,
model_filename
=
None
,
params_filename
=
None
,
combined
=
True
)
```
将模型保存到指定路径。
**参数**
*
dirname: 存在模型的目录名称
*
model
\_
filename: 模型文件名称,默认为
\_\_
model
\_\_
*
params
\_
filename: 参数文件名称,默认为
\_\_
params
\_\_
(仅当
`combined`
为True时生效)
*
combined: 是否将参数保存到统一的一个文件中
## 代码示例
```
python
import
paddlehub
as
hub
import
cv2
object_detector
=
hub
.
Module
(
name
=
"yolov3_mobilenet_v1_coco2017"
)
result
=
object_detector
.
object_detection
(
images
=
[
cv2
.
imread
(
'/PATH/TO/IMAGE'
)])
# or
# result = object_detector.object_detection((paths=['/PATH/TO/IMAGE'])
```
## 服务部署
PaddleHub Serving可以部署一个目标检测的在线服务。
## 第一步:启动PaddleHub Serving
运行启动命令:
```
shell
$
hub serving start
-m
yolov3_mobilenet_v1_coco2017
```
这样就完成了一个目标检测的服务化API的部署,默认端口号为8866。
**NOTE:**
如使用GPU预测,则需要在启动服务之前,请设置CUDA
\_
VISIBLE
\_
DEVICES环境变量,否则不用设置。
## 第二步:发送预测请求
配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果
```
python
import
requests
import
json
import
cv2
import
base64
def
cv2_to_base64
(
image
):
data
=
cv2
.
imencode
(
'.jpg'
,
image
)[
1
]
return
base64
.
b64encode
(
data
.
tostring
()).
decode
(
'utf8'
)
# 发送HTTP请求
data
=
{
'images'
:[
cv2_to_base64
(
cv2
.
imread
(
"/PATH/TO/IMAGE"
))]}
headers
=
{
"Content-type"
:
"application/json"
}
url
=
"http://127.0.0.1:8866/predict/yolov3_mobilenet_v1_coco2017"
r
=
requests
.
post
(
url
=
url
,
headers
=
headers
,
data
=
json
.
dumps
(
data
))
# 打印预测结果
print
(
r
.
json
()[
"results"
])
```
### 依赖
paddlepaddle >= 1.6.2
paddlehub >= 1.6.0
hub_module/modules/image/object_detection/yolov3_mobilenet_v1_coco2017/data_feed.py
0 → 100644
浏览文件 @
3acfe6bd
# coding=utf-8
from
__future__
import
absolute_import
from
__future__
import
print_function
from
__future__
import
division
import
os
import
cv2
import
numpy
as
np
__all__
=
[
'reader'
]
def
reader
(
paths
=
[],
images
=
None
):
"""
data generator
Args:
paths (list[str]): paths to images.
images (list(numpy.ndarray)): data of images, shape of each is [H, W, C]
Yield:
res (list): preprocessed image and the size of original image.
"""
img_list
=
[]
if
paths
:
assert
type
(
paths
)
is
list
,
"type(paths) is not list."
for
img_path
in
paths
:
assert
os
.
path
.
isfile
(
img_path
),
"The {} isn't a valid file path."
.
format
(
img_path
)
img
=
cv2
.
imread
(
img_path
).
astype
(
'float32'
)
img_list
.
append
(
img
)
if
images
is
not
None
:
for
img
in
images
:
img_list
.
append
(
img
)
for
im
in
img_list
:
# im_size
im_shape
=
im
.
shape
im_size
=
np
.
array
([
im_shape
[
0
],
im_shape
[
1
]],
dtype
=
np
.
int32
)
# decode image
im
=
cv2
.
cvtColor
(
im
,
cv2
.
COLOR_BGR2RGB
)
# resize image
target_size
=
608
im_size_min
=
np
.
min
(
im_shape
[
0
:
2
])
im_size_max
=
np
.
max
(
im_shape
[
0
:
2
])
if
float
(
im_size_min
)
==
0
:
raise
ZeroDivisionError
(
'min size of image is 0'
)
im_scale_x
=
float
(
target_size
)
/
float
(
im_shape
[
1
])
im_scale_y
=
float
(
target_size
)
/
float
(
im_shape
[
0
])
im
=
cv2
.
resize
(
im
,
None
,
None
,
fx
=
im_scale_x
,
fy
=
im_scale_y
,
interpolation
=
2
)
# normalize image
mean
=
[
0.485
,
0.456
,
0.406
]
std
=
[
0.229
,
0.224
,
0.225
]
im
=
im
.
astype
(
np
.
float32
,
copy
=
False
)
mean
=
np
.
array
(
mean
)[
np
.
newaxis
,
np
.
newaxis
,
:]
std
=
np
.
array
(
std
)[
np
.
newaxis
,
np
.
newaxis
,
:]
im
=
im
/
255.0
im
-=
mean
im
/=
std
# permute
im
=
np
.
swapaxes
(
im
,
1
,
2
)
im
=
np
.
swapaxes
(
im
,
1
,
0
)
yield
[
im
,
im_size
]
hub_module/modules/image/object_detection/yolov3_mobilenet_v1_coco2017/module.py
浏览文件 @
3acfe6bd
# coding=utf-8
# coding=utf-8
from
__future__
import
absolute_import
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
os
import
ast
import
ast
import
argparse
import
argparse
import
os
from
functools
import
partial
from
functools
import
partial
import
numpy
as
np
import
numpy
as
np
import
paddle.fluid
as
fluid
import
paddle.fluid
as
fluid
import
paddlehub
as
hub
import
paddlehub
as
hub
from
paddlehub.module.module
import
moduleinfo
,
runnable
from
paddle.fluid.core
import
PaddleTensor
,
AnalysisConfig
,
create_paddle_predictor
from
paddle.fluid.core
import
PaddleTensor
,
AnalysisConfig
,
create_paddle_predictor
from
paddlehub.io.parser
import
txt_parser
from
paddlehub.module.module
import
moduleinfo
,
runnable
,
serving
from
paddlehub.common.paddle_helper
import
add_vars_prefix
from
yolov3_mobilenet_v1_coco2017.mobilenet_v1
import
MobileNet
from
yolov3_mobilenet_v1_coco2017.mobilenet_v1
import
MobileNet
from
yolov3_mobilenet_v1_coco2017.processor
import
load_label_info
,
postprocess
,
base64_to_cv2
from
yolov3_mobilenet_v1_coco2017.data_feed
import
reader
from
yolov3_mobilenet_v1_coco2017.yolo_head
import
MultiClassNMS
,
YOLOv3Head
@
moduleinfo
(
@
moduleinfo
(
name
=
"yolov3_mobilenet_v1_coco2017"
,
name
=
"yolov3_mobilenet_v1_coco2017"
,
version
=
"1.
0
.0"
,
version
=
"1.
1
.0"
,
type
=
"
cv
/object_detection"
,
type
=
"
CV
/object_detection"
,
summary
=
summary
=
"Baidu's YOLOv3 model for object detection with backbone MobileNet_V1, trained with dataset COCO2017."
,
"Baidu's YOLOv3 model for object detection with backbone MobileNet_V1, trained with dataset COCO2017."
,
author
=
"paddlepaddle"
,
author
=
"paddlepaddle"
,
author_email
=
"
paddle-dev@baidu.com
"
)
author_email
=
""
)
class
YOLOv3MobileNet
v1
(
hub
.
Module
):
class
YOLOv3MobileNet
V1Coco2017
(
hub
.
Module
):
def
_initialize
(
self
):
def
_initialize
(
self
):
self
.
yolov3
=
hub
.
Module
(
name
=
"yolov3"
)
# default pretrained model of YOLOv3, the shape of input image tensor is (3, 608, 608)
self
.
default_pretrained_model_path
=
os
.
path
.
join
(
self
.
default_pretrained_model_path
=
os
.
path
.
join
(
self
.
directory
,
"yolov3_mobilenet_v1_model"
)
self
.
directory
,
"yolov3_mobilenet_v1_model"
)
self
.
label_names
=
self
.
yolov3
.
load_label_info
(
self
.
label_names
=
load_label_info
(
os
.
path
.
join
(
self
.
directory
,
"label_file.txt"
))
os
.
path
.
join
(
self
.
directory
,
"label_file.txt"
))
self
.
infer_prog
=
None
self
.
image
=
None
self
.
im_size
=
None
self
.
bbox_out
=
None
self
.
_set_config
()
self
.
_set_config
()
def
_set_config
(
self
):
def
_set_config
(
self
):
"""
"""
predictor config setting
predictor config setting
.
"""
"""
cpu_config
=
AnalysisConfig
(
self
.
default_pretrained_model_path
)
cpu_config
=
AnalysisConfig
(
self
.
default_pretrained_model_path
)
cpu_config
.
disable_glog_info
()
cpu_config
.
disable_glog_info
()
...
@@ -62,39 +57,84 @@ class YOLOv3MobileNetv1(hub.Module):
...
@@ -62,39 +57,84 @@ class YOLOv3MobileNetv1(hub.Module):
gpu_config
.
enable_use_gpu
(
memory_pool_init_size_mb
=
500
,
device_id
=
0
)
gpu_config
.
enable_use_gpu
(
memory_pool_init_size_mb
=
500
,
device_id
=
0
)
self
.
gpu_predictor
=
create_paddle_predictor
(
gpu_config
)
self
.
gpu_predictor
=
create_paddle_predictor
(
gpu_config
)
def
context
(
self
,
num_classes
=
80
,
trainable
=
True
,
pretrained
=
True
):
def
context
(
self
,
trainable
=
True
,
pretrained
=
True
,
get_prediction
=
False
):
"""Distill the Head Features, so as to perform transfer learning.
"""
Distill the Head Features, so as to perform transfer learning.
Args:
trainable (bool): whether to set parameters trainable.
pretrained (bool): whether to load default pretrained model.
get_prediction (bool): whether to get prediction.
:param trainable: whether to set parameters trainable.
Returns:
:type trainable: bool
inputs(dict): the input variables.
:param pretrained: whether to load default pretrained model
.
outputs(dict): the output variables
.
:type pretrained: bool
context_prog (Program): the program to execute transfer learning.
"""
"""
wrapped
_prog
=
fluid
.
Program
()
context
_prog
=
fluid
.
Program
()
startup_program
=
fluid
.
Program
()
startup_program
=
fluid
.
Program
()
with
fluid
.
program_guard
(
wrapped
_prog
,
startup_program
):
with
fluid
.
program_guard
(
context
_prog
,
startup_program
):
with
fluid
.
unique_name
.
guard
():
with
fluid
.
unique_name
.
guard
():
# image
# image
image
=
fluid
.
layers
.
data
(
image
=
fluid
.
layers
.
data
(
name
=
'image'
,
shape
=
[
3
,
608
,
608
],
dtype
=
'float32'
)
name
=
'image'
,
shape
=
[
3
,
608
,
608
],
dtype
=
'float32'
)
# yolo_head
yolo_head
=
self
.
yolov3
.
YOLOv3Head
(
num_classes
=
num_classes
)
# backbone
# backbone
backbone
=
MobileNet
(
backbone
=
MobileNet
(
norm_type
=
'sync_bn'
,
norm_type
=
'sync_bn'
,
norm_decay
=
0.
,
norm_decay
=
0.
,
conv_group_scale
=
1
,
conv_group_scale
=
1
,
with_extra_blocks
=
False
)
with_extra_blocks
=
False
)
# body_feats
body_feats
=
backbone
(
image
)
body_feats
=
backbone
(
image
)
inputs
,
outputs
,
context_prog
=
self
.
yolov3
.
context
(
# im_size
body_feats
=
body_feats
,
im_size
=
fluid
.
layers
.
data
(
yolo_head
=
yolo_head
,
name
=
'im_size'
,
shape
=
[
2
],
dtype
=
'int32'
)
image
=
image
,
# yolo_head
trainable
=
trainable
,
yolo_head
=
YOLOv3Head
(
num_classes
=
80
)
var_prefix
=
'@HUB_{}@'
.
format
(
self
.
name
))
# head_features
head_features
=
yolo_head
.
_get_outputs
(
body_feats
,
is_train
=
trainable
)
place
=
fluid
.
CPUPlace
()
place
=
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
place
)
exe
=
fluid
.
Executor
(
place
)
exe
.
run
(
startup_program
)
# var_prefix
var_prefix
=
'@HUB_{}@'
.
format
(
self
.
name
)
# name of inputs
inputs
=
{
'image'
:
var_prefix
+
image
.
name
,
'im_size'
:
var_prefix
+
im_size
.
name
}
# name of outputs
if
get_prediction
:
bbox_out
=
yolo_head
.
get_prediction
(
head_features
,
im_size
)
outputs
=
{
'bbox_out'
:
[
var_prefix
+
bbox_out
.
name
]}
else
:
outputs
=
{
'head_features'
:
[
var_prefix
+
var
.
name
for
var
in
head_features
]
}
# add_vars_prefix
add_vars_prefix
(
context_prog
,
var_prefix
)
add_vars_prefix
(
startup_program
,
var_prefix
)
# inputs
inputs
=
{
key
:
context_prog
.
global_block
().
vars
[
value
]
for
key
,
value
in
inputs
.
items
()
}
# outputs
outputs
=
{
key
:
[
context_prog
.
global_block
().
vars
[
varname
]
for
varname
in
value
]
for
key
,
value
in
outputs
.
items
()
}
# trainable
for
param
in
context_prog
.
global_block
().
iter_parameters
():
param
.
trainable
=
trainable
# pretrained
if
pretrained
:
if
pretrained
:
def
_if_exist
(
var
):
def
_if_exist
(
var
):
...
@@ -108,34 +148,41 @@ class YOLOv3MobileNetv1(hub.Module):
...
@@ -108,34 +148,41 @@ class YOLOv3MobileNetv1(hub.Module):
predicate
=
_if_exist
)
predicate
=
_if_exist
)
else
:
else
:
exe
.
run
(
startup_program
)
exe
.
run
(
startup_program
)
return
inputs
,
outputs
,
context_prog
return
inputs
,
outputs
,
context_prog
def
object_detection
(
self
,
def
object_detection
(
self
,
paths
=
[]
,
paths
=
None
,
images
=
None
,
images
=
None
,
use_gpu
=
False
,
batch_size
=
1
,
batch_size
=
1
,
use_gpu
=
False
,
output_dir
=
'detection_result'
,
output_dir
=
'detection_result'
,
score_thresh
=
0.5
,
score_thresh
=
0.5
,
visualization
=
True
):
visualization
=
True
):
"""API of Object Detection.
"""API of Object Detection.
:param paths: the path of images.
Args:
:type paths: list, each element is correspond to the path of an image.
paths (list[str]): The paths of images.
:param images: data of images, [N, H, W, C]
images (list(numpy.ndarray)): images data, shape of each is [H, W, C]
:type images: numpy.ndarray
batch_size (int): batch size.
:param use_gpu: whether to use gpu or not.
use_gpu (bool): Whether to use gpu.
:type use_gpu: bool
output_dir (str): The path to store output images.
:param batch_size: bathc size.
visualization (bool): Whether to save image or not.
:type batch_size: int
score_thresh (float): threshold for object detecion.
:param output_dir: the directory to store the detection result.
:type output_dir: str
Returns:
:param score_thresh: the threshold of detection confidence.
res (list[dict]): The result of coco2017 detecion. keys include 'data', 'save_path', the corresponding value is:
:type score_thresh: float
data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is:
:param visualization: whether to draw bounding box and save images.
left (float): The X coordinate of the upper left corner of the bounding box;
:type visualization: bool
top (float): The Y coordinate of the upper left corner of the bounding box;
right (float): The X coordinate of the lower right corner of the bounding box;
bottom (float): The Y coordinate of the lower right corner of the bounding box;
label (str): The label of detection result;
confidence (float): The confidence of detection result.
save_path (str, optional): The path to save output images.
"""
"""
data_reader
=
partial
(
self
.
yolov3
.
reader
,
paths
,
images
)
paths
=
paths
if
paths
else
list
()
data_reader
=
partial
(
reader
,
paths
,
images
)
batch_reader
=
fluid
.
io
.
batch
(
data_reader
,
batch_size
=
batch_size
)
batch_reader
=
fluid
.
io
.
batch
(
data_reader
,
batch_size
=
batch_size
)
res
=
[]
res
=
[]
for
iter_id
,
feed_data
in
enumerate
(
batch_reader
()):
for
iter_id
,
feed_data
in
enumerate
(
batch_reader
()):
...
@@ -149,7 +196,7 @@ class YOLOv3MobileNetv1(hub.Module):
...
@@ -149,7 +196,7 @@ class YOLOv3MobileNetv1(hub.Module):
data_out
=
self
.
cpu_predictor
.
run
(
data_out
=
self
.
cpu_predictor
.
run
(
[
image_tensor
,
im_size_tensor
])
[
image_tensor
,
im_size_tensor
])
output
=
self
.
yolov3
.
postprocess
(
output
=
postprocess
(
paths
=
paths
,
paths
=
paths
,
images
=
images
,
images
=
images
,
data_out
=
data_out
,
data_out
=
data_out
,
...
@@ -158,54 +205,49 @@ class YOLOv3MobileNetv1(hub.Module):
...
@@ -158,54 +205,49 @@ class YOLOv3MobileNetv1(hub.Module):
output_dir
=
output_dir
,
output_dir
=
output_dir
,
handle_id
=
iter_id
*
batch_size
,
handle_id
=
iter_id
*
batch_size
,
visualization
=
visualization
)
visualization
=
visualization
)
res
+=
output
res
.
extend
(
output
)
return
res
return
res
def
add_module_config_arg
(
self
):
def
save_inference_model
(
self
,
"""
dirname
,
Add the command config options
model_filename
=
None
,
"""
params_filename
=
None
,
self
.
arg_config_group
.
add_argument
(
combined
=
True
):
'--use_gpu'
,
if
combined
:
type
=
ast
.
literal_eval
,
model_filename
=
"__model__"
if
not
model_filename
else
model_filename
default
=
False
,
params_filename
=
"__params__"
if
not
params_filename
else
params_filename
help
=
"whether use GPU or not"
)
place
=
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
place
)
self
.
arg_config_group
.
add_argument
(
program
,
feeded_var_names
,
target_vars
=
fluid
.
io
.
load_inference_model
(
'--batch_size'
,
dirname
=
self
.
default_pretrained_model_path
,
executor
=
exe
)
type
=
int
,
default
=
1
,
help
=
"batch size for prediction"
)
def
add_module_input_arg
(
self
):
fluid
.
io
.
save_inference_model
(
dirname
=
dirname
,
main_program
=
program
,
executor
=
exe
,
feeded_var_names
=
feeded_var_names
,
target_vars
=
target_vars
,
model_filename
=
model_filename
,
params_filename
=
params_filename
)
@
serving
def
serving_method
(
self
,
images
,
**
kwargs
):
"""
"""
Add the command input options
Run as a service.
"""
"""
self
.
arg_input_group
.
add_argument
(
images_decode
=
[
base64_to_cv2
(
image
)
for
image
in
images
]
'--input_path'
,
type
=
str
,
default
=
None
,
help
=
"input data"
)
results
=
self
.
face_detection
(
images_decode
,
**
kwargs
)
return
results
self
.
arg_input_group
.
add_argument
(
'--input_file'
,
type
=
str
,
default
=
None
,
help
=
"file contain input data"
)
def
check_input_data
(
self
,
args
):
input_data
=
[]
if
args
.
input_path
:
input_data
=
[
args
.
input_path
]
elif
args
.
input_file
:
if
not
os
.
path
.
exists
(
args
.
input_file
):
raise
RuntimeError
(
"File %s is not exist."
%
args
.
input_file
)
else
:
input_data
=
txt_parser
.
parse
(
args
.
input_file
,
use_strip
=
True
)
return
input_data
@
runnable
@
runnable
def
run_cmd
(
self
,
argvs
):
def
run_cmd
(
self
,
argvs
):
"""
Run as a command.
"""
self
.
parser
=
argparse
.
ArgumentParser
(
self
.
parser
=
argparse
.
ArgumentParser
(
description
=
"Run the {}"
.
format
(
self
.
name
),
description
=
"Run the {}
module.
"
.
format
(
self
.
name
),
prog
=
"hub run {}"
.
format
(
self
.
name
),
prog
=
'hub run {}'
.
format
(
self
.
name
),
usage
=
'%(prog)s'
,
usage
=
'%(prog)s'
,
add_help
=
True
)
add_help
=
True
)
self
.
arg_input_group
=
self
.
parser
.
add_argument_group
(
self
.
arg_input_group
=
self
.
parser
.
add_argument_group
(
...
@@ -215,17 +257,50 @@ class YOLOv3MobileNetv1(hub.Module):
...
@@ -215,17 +257,50 @@ class YOLOv3MobileNetv1(hub.Module):
description
=
description
=
"Run configuration for controlling module behavior, not required."
)
"Run configuration for controlling module behavior, not required."
)
self
.
add_module_config_arg
()
self
.
add_module_config_arg
()
self
.
add_module_input_arg
()
self
.
add_module_input_arg
()
args
=
self
.
parser
.
parse_args
(
argvs
)
args
=
self
.
parser
.
parse_args
(
argvs
)
input_data
=
self
.
check_input_data
(
args
)
results
=
self
.
face_detection
(
if
len
(
input_data
)
==
0
:
paths
=
[
args
.
input_path
],
self
.
parser
.
print_help
()
batch_size
=
args
.
batch_size
,
exit
(
1
)
use_gpu
=
args
.
use_gpu
,
else
:
output_dir
=
args
.
output_dir
,
for
image_path
in
input_data
:
visualization
=
args
.
visualization
,
if
not
os
.
path
.
exists
(
image_path
):
score_thresh
=
args
.
score_thresh
)
raise
RuntimeError
(
return
results
"File %s or %s is not exist."
%
image_path
)
return
self
.
object_detection
(
def
add_module_config_arg
(
self
):
paths
=
input_data
,
use_gpu
=
args
.
use_gpu
,
batch_size
=
args
.
batch_size
)
"""
Add the command config options.
"""
self
.
arg_config_group
.
add_argument
(
'--use_gpu'
,
type
=
ast
.
literal_eval
,
default
=
False
,
help
=
"whether use GPU or not"
)
self
.
arg_config_group
.
add_argument
(
'--output_dir'
,
type
=
str
,
default
=
'detection_result'
,
help
=
"The directory to save output images."
)
self
.
arg_config_group
.
add_argument
(
'--visualization'
,
type
=
ast
.
literal_eval
,
default
=
False
,
help
=
"whether to save output as images."
)
def
add_module_input_arg
(
self
):
"""
Add the command input options.
"""
self
.
arg_input_group
.
add_argument
(
'--input_path'
,
type
=
str
,
help
=
"path to image."
)
self
.
arg_input_group
.
add_argument
(
'--batch_size'
,
type
=
ast
.
literal_eval
,
default
=
1
,
help
=
"batch size."
)
self
.
arg_input_group
.
add_argument
(
'--score_thresh'
,
type
=
ast
.
literal_eval
,
default
=
0.5
,
help
=
"threshold for object detecion."
)
hub_module/modules/image/object_detection/yolov3_mobilenet_v1_coco2017/processor.py
0 → 100644
浏览文件 @
3acfe6bd
# coding=utf-8
import
base64
import
os
import
cv2
import
numpy
as
np
from
PIL
import
Image
,
ImageDraw
__all__
=
[
'base64_to_cv2'
,
'load_label_info'
,
'postprocess'
]
def
base64_to_cv2
(
b64str
):
data
=
base64
.
b64decode
(
b64str
.
encode
(
'utf8'
))
data
=
np
.
fromstring
(
data
,
np
.
uint8
)
data
=
cv2
.
imdecode
(
data
,
cv2
.
IMREAD_COLOR
)
return
data
def
check_dir
(
dir_path
):
if
not
os
.
path
.
exists
(
dir_path
):
os
.
makedirs
(
dir_path
)
elif
os
.
path
.
isfile
(
dir_path
):
os
.
remove
(
dir_path
)
os
.
makedirs
(
dir_path
)
def
get_save_image_name
(
img
,
output_dir
,
image_path
):
"""Get save image name from source image path.
"""
image_name
=
os
.
path
.
split
(
image_path
)[
-
1
]
name
,
ext
=
os
.
path
.
splitext
(
image_name
)
if
ext
==
''
:
if
img
.
format
==
'PNG'
:
ext
=
'.png'
elif
img
.
format
==
'JPEG'
:
ext
=
'.jpg'
elif
img
.
format
==
'BMP'
:
ext
=
'.bmp'
else
:
if
img
.
mode
==
"RGB"
or
img
.
mode
==
"L"
:
ext
=
".jpg"
elif
img
.
mode
==
"RGBA"
or
img
.
mode
==
"P"
:
ext
=
'.png'
return
os
.
path
.
join
(
output_dir
,
"{}"
.
format
(
name
))
+
ext
def
draw_bounding_box_on_image
(
image_path
,
data_list
,
save_dir
):
image
=
Image
.
open
(
image_path
)
draw
=
ImageDraw
.
Draw
(
image
)
for
data
in
data_list
:
left
,
right
,
top
,
bottom
=
data
[
'left'
],
data
[
'right'
],
data
[
'top'
],
data
[
'bottom'
]
# draw bbox
draw
.
line
([(
left
,
top
),
(
left
,
bottom
),
(
right
,
bottom
),
(
right
,
top
),
(
left
,
top
)],
width
=
2
,
fill
=
'red'
)
# draw label
if
image
.
mode
==
'RGB'
:
text
=
data
[
'label'
]
+
": %.2f%%"
%
(
100
*
data
[
'confidence'
])
textsize_width
,
textsize_height
=
draw
.
textsize
(
text
=
text
)
draw
.
rectangle
(
xy
=
(
left
,
top
-
(
textsize_height
+
5
),
left
+
textsize_width
+
10
,
top
),
fill
=
(
255
,
255
,
255
))
draw
.
text
(
xy
=
(
left
,
top
-
15
),
text
=
text
,
fill
=
(
0
,
0
,
0
))
save_name
=
get_save_image_name
(
image
,
save_dir
,
image_path
)
if
os
.
path
.
exists
(
save_name
):
os
.
remove
(
save_name
)
image
.
save
(
save_name
)
return
save_name
def
clip_bbox
(
bbox
,
img_width
,
img_height
):
xmin
=
max
(
min
(
bbox
[
0
],
img_width
),
0.
)
ymin
=
max
(
min
(
bbox
[
1
],
img_height
),
0.
)
xmax
=
max
(
min
(
bbox
[
2
],
img_width
),
0.
)
ymax
=
max
(
min
(
bbox
[
3
],
img_height
),
0.
)
return
xmin
,
ymin
,
xmax
,
ymax
def
load_label_info
(
file_path
):
with
open
(
file_path
,
'r'
)
as
fr
:
text
=
fr
.
readlines
()
label_names
=
[]
for
info
in
text
:
label_names
.
append
(
info
.
strip
())
return
label_names
def
postprocess
(
paths
,
images
,
data_out
,
score_thresh
,
label_names
,
output_dir
,
handle_id
,
visualization
=
True
):
"""
postprocess the lod_tensor produced by fluid.Executor.run
Args:
paths (list[str]): The paths of images.
images (list(numpy.ndarray)): images data, shape of each is [H, W, C]
data_out (lod_tensor): data output of predictor.
batch_size (int): batch size.
use_gpu (bool): Whether to use gpu.
output_dir (str): The path to store output images.
visualization (bool): Whether to save image or not.
score_thresh (float): the low limit of bounding box.
label_names (list[str]): label names.
handle_id (int): The number of images that have been handled.
Returns:
res (list[dict]): The result of vehicles detecion. keys include 'data', 'save_path', the corresponding value is:
data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is:
left (float): The X coordinate of the upper left corner of the bounding box;
top (float): The Y coordinate of the upper left corner of the bounding box;
right (float): The X coordinate of the lower right corner of the bounding box;
bottom (float): The Y coordinate of the lower right corner of the bounding box;
label (str): The label of detection result;
confidence (float): The confidence of detection result.
save_path (str): The path to save output images.
"""
lod_tensor
=
data_out
[
0
]
lod
=
lod_tensor
.
lod
[
0
]
results
=
lod_tensor
.
as_ndarray
()
check_dir
(
output_dir
)
assert
type
(
paths
)
is
list
,
"type(paths) is not list."
if
handle_id
<
len
(
paths
):
unhandled_paths
=
paths
[
handle_id
:]
unhandled_paths_num
=
len
(
unhandled_paths
)
else
:
unhandled_paths_num
=
0
output
=
list
()
for
index
in
range
(
len
(
lod
)
-
1
):
output_i
=
{
'data'
:
[]}
if
index
<
unhandled_paths_num
:
org_img_path
=
unhandled_paths
[
index
]
org_img
=
Image
.
open
(
org_img_path
)
else
:
org_img
=
images
[
index
-
unhandled_paths_num
]
org_img
=
org_img
.
astype
(
np
.
uint8
)
org_img
=
Image
.
fromarray
(
org_img
[:,
:,
::
-
1
])
if
visualization
:
org_img_path
=
get_save_image_name
(
org_img
,
output_dir
,
'image_numpy_{}'
.
format
(
(
handle_id
+
index
)))
org_img
.
save
(
org_img_path
)
org_img_height
=
org_img
.
height
org_img_width
=
org_img
.
width
result_i
=
results
[
lod
[
index
]:
lod
[
index
+
1
]]
for
row
in
result_i
:
if
len
(
row
)
!=
6
:
continue
if
row
[
1
]
<
score_thresh
:
continue
category_id
=
int
(
row
[
0
])
confidence
=
row
[
1
]
bbox
=
row
[
2
:]
dt
=
{}
dt
[
'label'
]
=
label_names
[
category_id
]
dt
[
'confidence'
]
=
confidence
dt
[
'left'
],
dt
[
'top'
],
dt
[
'right'
],
dt
[
'bottom'
]
=
clip_bbox
(
bbox
,
org_img_width
,
org_img_height
)
output_i
[
'data'
].
append
(
dt
)
output
.
append
(
output_i
)
if
visualization
:
output_i
[
'save_path'
]
=
draw_bounding_box_on_image
(
org_img_path
,
output_i
[
'data'
],
output_dir
)
return
output
hub_module/modules/image/object_detection/yolov3_mobilenet_v1_coco2017/yolo_head.py
0 → 100644
浏览文件 @
3acfe6bd
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
from
collections
import
OrderedDict
from
paddle
import
fluid
from
paddle.fluid.param_attr
import
ParamAttr
from
paddle.fluid.regularizer
import
L2Decay
__all__
=
[
'MultiClassNMS'
,
'YOLOv3Head'
]
class
MultiClassNMS
(
object
):
# __op__ = fluid.layers.multiclass_nms
def
__init__
(
self
,
background_label
,
keep_top_k
,
nms_threshold
,
nms_top_k
,
normalized
,
score_threshold
):
super
(
MultiClassNMS
,
self
).
__init__
()
self
.
background_label
=
background_label
self
.
keep_top_k
=
keep_top_k
self
.
nms_threshold
=
nms_threshold
self
.
nms_top_k
=
nms_top_k
self
.
normalized
=
normalized
self
.
score_threshold
=
score_threshold
class
YOLOv3Head
(
object
):
"""Head block for YOLOv3 network
Args:
norm_decay (float): weight decay for normalization layer weights
num_classes (int): number of output classes
ignore_thresh (float): threshold to ignore confidence loss
label_smooth (bool): whether to use label smoothing
anchors (list): anchors
anchor_masks (list): anchor masks
nms (object): an instance of `MultiClassNMS`
"""
def
__init__
(
self
,
norm_decay
=
0.
,
num_classes
=
80
,
ignore_thresh
=
0.7
,
label_smooth
=
True
,
anchors
=
[[
10
,
13
],
[
16
,
30
],
[
33
,
23
],
[
30
,
61
],
[
62
,
45
],
[
59
,
119
],
[
116
,
90
],
[
156
,
198
],
[
373
,
326
]],
anchor_masks
=
[[
6
,
7
,
8
],
[
3
,
4
,
5
],
[
0
,
1
,
2
]],
nms
=
MultiClassNMS
(
background_label
=-
1
,
keep_top_k
=
100
,
nms_threshold
=
0.45
,
nms_top_k
=
1000
,
normalized
=
True
,
score_threshold
=
0.01
),
weight_prefix_name
=
''
):
self
.
norm_decay
=
norm_decay
self
.
num_classes
=
num_classes
self
.
ignore_thresh
=
ignore_thresh
self
.
label_smooth
=
label_smooth
self
.
anchor_masks
=
anchor_masks
self
.
_parse_anchors
(
anchors
)
self
.
nms
=
nms
self
.
prefix_name
=
weight_prefix_name
def
_conv_bn
(
self
,
input
,
ch_out
,
filter_size
,
stride
,
padding
,
act
=
'leaky'
,
is_test
=
True
,
name
=
None
):
conv
=
fluid
.
layers
.
conv2d
(
input
=
input
,
num_filters
=
ch_out
,
filter_size
=
filter_size
,
stride
=
stride
,
padding
=
padding
,
act
=
None
,
param_attr
=
ParamAttr
(
name
=
name
+
".conv.weights"
),
bias_attr
=
False
)
bn_name
=
name
+
".bn"
bn_param_attr
=
ParamAttr
(
regularizer
=
L2Decay
(
self
.
norm_decay
),
name
=
bn_name
+
'.scale'
)
bn_bias_attr
=
ParamAttr
(
regularizer
=
L2Decay
(
self
.
norm_decay
),
name
=
bn_name
+
'.offset'
)
out
=
fluid
.
layers
.
batch_norm
(
input
=
conv
,
act
=
None
,
is_test
=
is_test
,
param_attr
=
bn_param_attr
,
bias_attr
=
bn_bias_attr
,
moving_mean_name
=
bn_name
+
'.mean'
,
moving_variance_name
=
bn_name
+
'.var'
)
if
act
==
'leaky'
:
out
=
fluid
.
layers
.
leaky_relu
(
x
=
out
,
alpha
=
0.1
)
return
out
def
_detection_block
(
self
,
input
,
channel
,
is_test
=
True
,
name
=
None
):
assert
channel
%
2
==
0
,
\
"channel {} cannot be divided by 2 in detection block {}"
\
.
format
(
channel
,
name
)
conv
=
input
for
j
in
range
(
2
):
conv
=
self
.
_conv_bn
(
conv
,
channel
,
filter_size
=
1
,
stride
=
1
,
padding
=
0
,
is_test
=
is_test
,
name
=
'{}.{}.0'
.
format
(
name
,
j
))
conv
=
self
.
_conv_bn
(
conv
,
channel
*
2
,
filter_size
=
3
,
stride
=
1
,
padding
=
1
,
is_test
=
is_test
,
name
=
'{}.{}.1'
.
format
(
name
,
j
))
route
=
self
.
_conv_bn
(
conv
,
channel
,
filter_size
=
1
,
stride
=
1
,
padding
=
0
,
is_test
=
is_test
,
name
=
'{}.2'
.
format
(
name
))
tip
=
self
.
_conv_bn
(
route
,
channel
*
2
,
filter_size
=
3
,
stride
=
1
,
padding
=
1
,
is_test
=
is_test
,
name
=
'{}.tip'
.
format
(
name
))
return
route
,
tip
def
_upsample
(
self
,
input
,
scale
=
2
,
name
=
None
):
out
=
fluid
.
layers
.
resize_nearest
(
input
=
input
,
scale
=
float
(
scale
),
name
=
name
)
return
out
def
_parse_anchors
(
self
,
anchors
):
"""
Check ANCHORS/ANCHOR_MASKS in config and parse mask_anchors
"""
self
.
anchors
=
[]
self
.
mask_anchors
=
[]
assert
len
(
anchors
)
>
0
,
"ANCHORS not set."
assert
len
(
self
.
anchor_masks
)
>
0
,
"ANCHOR_MASKS not set."
for
anchor
in
anchors
:
assert
len
(
anchor
)
==
2
,
"anchor {} len should be 2"
.
format
(
anchor
)
self
.
anchors
.
extend
(
anchor
)
anchor_num
=
len
(
anchors
)
for
masks
in
self
.
anchor_masks
:
self
.
mask_anchors
.
append
([])
for
mask
in
masks
:
assert
mask
<
anchor_num
,
"anchor mask index overflow"
self
.
mask_anchors
[
-
1
].
extend
(
anchors
[
mask
])
def
_get_outputs
(
self
,
input
,
is_train
=
True
):
"""
Get YOLOv3 head output
Args:
input (list): List of Variables, output of backbone stages
is_train (bool): whether in train or test mode
Returns:
outputs (list): Variables of each output layer
"""
outputs
=
[]
# get last out_layer_num blocks in reverse order
out_layer_num
=
len
(
self
.
anchor_masks
)
if
isinstance
(
input
,
OrderedDict
):
blocks
=
list
(
input
.
values
())[
-
1
:
-
out_layer_num
-
1
:
-
1
]
else
:
blocks
=
input
[
-
1
:
-
out_layer_num
-
1
:
-
1
]
route
=
None
for
i
,
block
in
enumerate
(
blocks
):
if
i
>
0
:
# perform concat in first 2 detection_block
block
=
fluid
.
layers
.
concat
(
input
=
[
route
,
block
],
axis
=
1
)
route
,
tip
=
self
.
_detection_block
(
block
,
channel
=
512
//
(
2
**
i
),
is_test
=
(
not
is_train
),
name
=
self
.
prefix_name
+
"yolo_block.{}"
.
format
(
i
))
# out channel number = mask_num * (5 + class_num)
num_filters
=
len
(
self
.
anchor_masks
[
i
])
*
(
self
.
num_classes
+
5
)
block_out
=
fluid
.
layers
.
conv2d
(
input
=
tip
,
num_filters
=
num_filters
,
filter_size
=
1
,
stride
=
1
,
padding
=
0
,
act
=
None
,
param_attr
=
ParamAttr
(
name
=
self
.
prefix_name
+
"yolo_output.{}.conv.weights"
.
format
(
i
)),
bias_attr
=
ParamAttr
(
regularizer
=
L2Decay
(
0.
),
name
=
self
.
prefix_name
+
"yolo_output.{}.conv.bias"
.
format
(
i
)))
outputs
.
append
(
block_out
)
if
i
<
len
(
blocks
)
-
1
:
# do not perform upsample in the last detection_block
route
=
self
.
_conv_bn
(
input
=
route
,
ch_out
=
256
//
(
2
**
i
),
filter_size
=
1
,
stride
=
1
,
padding
=
0
,
is_test
=
(
not
is_train
),
name
=
self
.
prefix_name
+
"yolo_transition.{}"
.
format
(
i
))
# upsample
route
=
self
.
_upsample
(
route
)
return
outputs
def
get_prediction
(
self
,
outputs
,
im_size
):
"""
Get prediction result of YOLOv3 network
Args:
outputs (list): list of Variables, return from _get_outputs
im_size (Variable): Variable of size([h, w]) of each image
Returns:
pred (Variable): The prediction result after non-max suppress.
"""
boxes
=
[]
scores
=
[]
downsample
=
32
for
i
,
output
in
enumerate
(
outputs
):
box
,
score
=
fluid
.
layers
.
yolo_box
(
x
=
output
,
img_size
=
im_size
,
anchors
=
self
.
mask_anchors
[
i
],
class_num
=
self
.
num_classes
,
conf_thresh
=
self
.
nms
.
score_threshold
,
downsample_ratio
=
downsample
,
name
=
self
.
prefix_name
+
"yolo_box"
+
str
(
i
))
boxes
.
append
(
box
)
scores
.
append
(
fluid
.
layers
.
transpose
(
score
,
perm
=
[
0
,
2
,
1
]))
downsample
//=
2
yolo_boxes
=
fluid
.
layers
.
concat
(
boxes
,
axis
=
1
)
yolo_scores
=
fluid
.
layers
.
concat
(
scores
,
axis
=
2
)
pred
=
fluid
.
layers
.
multiclass_nms
(
bboxes
=
yolo_boxes
,
scores
=
yolo_scores
,
score_threshold
=
self
.
nms
.
score_threshold
,
nms_top_k
=
self
.
nms
.
nms_top_k
,
keep_top_k
=
self
.
nms
.
keep_top_k
,
nms_threshold
=
self
.
nms
.
nms_threshold
,
background_label
=
self
.
nms
.
background_label
,
normalized
=
self
.
nms
.
normalized
,
name
=
"multiclass_nms"
)
return
pred
hub_module/modules/image/object_detection/yolov3_resnet34_coco2017/README.md
0 → 100644
浏览文件 @
3acfe6bd
```
shell
$
hub
install
yolov3_resnet34_coco2017
==
1.1.0
```
## 命令行预测
```
hub run yolov3_resnet34_coco2017 --input_path "/PATH/TO/IMAGE"
```
## API
```
def context(trainable=True,
pretrained=True,
get_prediction=False)
```
提取头部特征,用于迁移学习。
**参数**
*
trainable(bool): 将参数的trainable属性设为trainable;
*
pretrained (bool): 是否加载预训练模型;
*
get
\_
prediction (bool): 是否执行预测。
**返回**
*
inputs (dict): 模型的输入,keys 包括 'image', 'im
\_
size',相应的取值为:
*
image (Variable): 图像变量
*
im
\_
size (Variable): 图片的尺寸
*
outputs (dict): 模型的输出。如果 get
\_
prediction 为 False,输出 'head
\_
fatures',否则输出 'bbox
\_
out'。
*
context
\_
prog (Program): 用于迁移学习的 Program.
```
python
def
object_detection
(
paths
=
None
,
images
=
None
,
batch_size
=
1
,
use_gpu
=
False
,
output_dir
=
'detection_result'
,
score_thresh
=
0.5
,
visualization
=
True
)
```
预测API,检测输入图片中的所有目标的位置。
**参数**
*
paths (list
\[
str
\]
): 图片的路径;
*
images (list
\[
numpy.ndarray
\]
): 图片数据,ndarray.shape 为
\[
H, W, C
\]
,BGR格式;
*
batch
\_
size (int): batch 的大小;
*
use
\_
gpu (bool): 是否使用 GPU;
*
score
\_
thresh (float): 识别置信度的阈值;
*
visualization (bool): 是否将识别结果保存为图片文件;
*
output
\_
dir (str): 图片的保存路径,默认设为 detection
\_
result;
**返回**
*
res (list
\[
dict
\]
): 识别结果的列表,列表中每一个元素为 dict,各字段为:
*
data (list): 检测结果,list的每一个元素为 dict,各字段为:
*
confidence (float): 识别的置信度;
*
label (str): 标签;
*
left (int): 边界框的左上角x坐标;
*
top (int): 边界框的左上角y坐标;
*
right (int): 边界框的右下角x坐标;
*
bottom (int): 边界框的右下角y坐标;
*
save
\_
path (str, optional): 识别结果的保存路径 (仅当visualization=True时存在)。
```
python
def
save_inference_model
(
dirname
,
model_filename
=
None
,
params_filename
=
None
,
combined
=
True
)
```
将模型保存到指定路径。
**参数**
*
dirname: 存在模型的目录名称
*
model
\_
filename: 模型文件名称,默认为
\_\_
model
\_\_
*
params
\_
filename: 参数文件名称,默认为
\_\_
params
\_\_
(仅当
`combined`
为True时生效)
*
combined: 是否将参数保存到统一的一个文件中。
## 代码示例
```
python
import
paddlehub
as
hub
import
cv2
object_detector
=
hub
.
Module
(
name
=
"yolov3_resnet34_coco2017"
)
result
=
object_detector
.
object_detection
(
images
=
[
cv2
.
imread
(
'/PATH/TO/IMAGE'
)])
# or
# result = object_detector.object_detection((paths=['/PATH/TO/IMAGE'])
```
## 服务部署
PaddleHub Serving 可以部署一个目标检测的在线服务。
## 第一步:启动PaddleHub Serving
运行启动命令:
```
shell
$
hub serving start
-m
yolov3_resnet34_coco2017
```
这样就完成了一个目标检测的服务化API的部署,默认端口号为8866。
**NOTE:**
如使用GPU预测,则需要在启动服务之前,请设置CUDA
\_
VISIBLE
\_
DEVICES环境变量,否则不用设置。
## 第二步:发送预测请求
配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果
```
python
import
requests
import
json
import
cv2
import
base64
def
cv2_to_base64
(
image
):
data
=
cv2
.
imencode
(
'.jpg'
,
image
)[
1
]
return
base64
.
b64encode
(
data
.
tostring
()).
decode
(
'utf8'
)
# 发送HTTP请求
data
=
{
'images'
:[
cv2_to_base64
(
cv2
.
imread
(
"/PATH/TO/IMAGE"
))]}
headers
=
{
"Content-type"
:
"application/json"
}
url
=
"http://127.0.0.1:8866/predict/yolov3_resnet34_coco2017"
r
=
requests
.
post
(
url
=
url
,
headers
=
headers
,
data
=
json
.
dumps
(
data
))
# 打印预测结果
print
(
r
.
json
()[
"results"
])
```
### 依赖
paddlepaddle >= 1.6.2
paddlehub >= 1.6.0
hub_module/modules/image/object_detection/yolov3_resnet34_coco2017/data_feed.py
0 → 100644
浏览文件 @
3acfe6bd
# coding=utf-8
from
__future__
import
absolute_import
from
__future__
import
print_function
from
__future__
import
division
import
os
import
cv2
import
numpy
as
np
__all__
=
[
'reader'
]
def
reader
(
paths
=
[],
images
=
None
):
"""
data generator
Args:
paths (list[str]): paths to images.
images (list(numpy.ndarray)): data of images, shape of each is [H, W, C]
Yield:
res (list): preprocessed image and the size of original image.
"""
img_list
=
[]
if
paths
:
assert
type
(
paths
)
is
list
,
"type(paths) is not list."
for
img_path
in
paths
:
assert
os
.
path
.
isfile
(
img_path
),
"The {} isn't a valid file path."
.
format
(
img_path
)
img
=
cv2
.
imread
(
img_path
).
astype
(
'float32'
)
img_list
.
append
(
img
)
if
images
is
not
None
:
for
img
in
images
:
img_list
.
append
(
img
)
for
im
in
img_list
:
# im_size
im_shape
=
im
.
shape
im_size
=
np
.
array
([
im_shape
[
0
],
im_shape
[
1
]],
dtype
=
np
.
int32
)
# decode image
im
=
cv2
.
cvtColor
(
im
,
cv2
.
COLOR_BGR2RGB
)
# resize image
target_size
=
608
im_size_min
=
np
.
min
(
im_shape
[
0
:
2
])
im_size_max
=
np
.
max
(
im_shape
[
0
:
2
])
if
float
(
im_size_min
)
==
0
:
raise
ZeroDivisionError
(
'min size of image is 0'
)
im_scale_x
=
float
(
target_size
)
/
float
(
im_shape
[
1
])
im_scale_y
=
float
(
target_size
)
/
float
(
im_shape
[
0
])
im
=
cv2
.
resize
(
im
,
None
,
None
,
fx
=
im_scale_x
,
fy
=
im_scale_y
,
interpolation
=
2
)
# normalize image
mean
=
[
0.485
,
0.456
,
0.406
]
std
=
[
0.229
,
0.224
,
0.225
]
im
=
im
.
astype
(
np
.
float32
,
copy
=
False
)
mean
=
np
.
array
(
mean
)[
np
.
newaxis
,
np
.
newaxis
,
:]
std
=
np
.
array
(
std
)[
np
.
newaxis
,
np
.
newaxis
,
:]
im
=
im
/
255.0
im
-=
mean
im
/=
std
# permute
im
=
np
.
swapaxes
(
im
,
1
,
2
)
im
=
np
.
swapaxes
(
im
,
1
,
0
)
yield
[
im
,
im_size
]
hub_module/modules/image/object_detection/yolov3_resnet34_coco2017/module.py
浏览文件 @
3acfe6bd
# coding=utf-8
# coding=utf-8
from
__future__
import
absolute_import
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
os
import
ast
import
ast
import
argparse
import
argparse
import
os
from
functools
import
partial
from
functools
import
partial
import
numpy
as
np
import
numpy
as
np
import
paddle.fluid
as
fluid
import
paddle.fluid
as
fluid
import
paddlehub
as
hub
import
paddlehub
as
hub
from
paddlehub.module.module
import
moduleinfo
,
runnable
from
paddle.fluid.core
import
PaddleTensor
,
AnalysisConfig
,
create_paddle_predictor
from
paddle.fluid.core
import
PaddleTensor
,
AnalysisConfig
,
create_paddle_predictor
from
paddlehub.io.parser
import
txt_parser
from
paddlehub.module.module
import
moduleinfo
,
runnable
,
serving
from
paddlehub.common.paddle_helper
import
add_vars_prefix
from
yolov3_resnet34_coco2017.resnet
import
ResNet
from
yolov3_resnet34_coco2017.resnet
import
ResNet
from
yolov3_resnet34_coco2017.processor
import
load_label_info
,
postprocess
,
base64_to_cv2
from
yolov3_resnet34_coco2017.data_feed
import
reader
from
yolov3_resnet34_coco2017.yolo_head
import
MultiClassNMS
,
YOLOv3Head
@
moduleinfo
(
@
moduleinfo
(
name
=
"yolov3_resnet34_coco2017"
,
name
=
"yolov3_resnet34_coco2017"
,
version
=
"1.
0
.0"
,
version
=
"1.
1
.0"
,
type
=
"
cv
/object_detection"
,
type
=
"
CV
/object_detection"
,
summary
=
summary
=
"Baidu's YOLOv3 model for object detection with backbone ResNet34, trained with dataset
COCO
2017."
,
"Baidu's YOLOv3 model for object detection with backbone ResNet34, trained with dataset
coco
2017."
,
author
=
"paddlepaddle"
,
author
=
"paddlepaddle"
,
author_email
=
"
paddle-dev@baidu.com
"
)
author_email
=
""
)
class
YOLOv3
ResNet34
(
hub
.
Module
):
class
YOLOv3
DarkNet53Coco2017
(
hub
.
Module
):
def
_initialize
(
self
):
def
_initialize
(
self
):
self
.
yolov3
=
hub
.
Module
(
name
=
"yolov3"
)
# default pretrained model of YOLOv3_ResNet34, the shape of input image tensor is (3, 608, 608)
self
.
default_pretrained_model_path
=
os
.
path
.
join
(
self
.
default_pretrained_model_path
=
os
.
path
.
join
(
self
.
directory
,
"yolov3_resnet34_model"
)
self
.
directory
,
"yolov3_resnet34_model"
)
self
.
label_names
=
self
.
yolov3
.
load_label_info
(
self
.
label_names
=
load_label_info
(
os
.
path
.
join
(
self
.
directory
,
"label_file.txt"
))
os
.
path
.
join
(
self
.
directory
,
"label_file.txt"
))
self
.
infer_prog
=
None
self
.
image
=
None
self
.
im_size
=
None
self
.
bbox_out
=
None
self
.
_set_config
()
self
.
_set_config
()
def
_set_config
(
self
):
def
_set_config
(
self
):
"""
"""
predictor config setting
predictor config setting
.
"""
"""
cpu_config
=
AnalysisConfig
(
self
.
default_pretrained_model_path
)
cpu_config
=
AnalysisConfig
(
self
.
default_pretrained_model_path
)
cpu_config
.
disable_glog_info
()
cpu_config
.
disable_glog_info
()
...
@@ -62,23 +57,27 @@ class YOLOv3ResNet34(hub.Module):
...
@@ -62,23 +57,27 @@ class YOLOv3ResNet34(hub.Module):
gpu_config
.
enable_use_gpu
(
memory_pool_init_size_mb
=
500
,
device_id
=
0
)
gpu_config
.
enable_use_gpu
(
memory_pool_init_size_mb
=
500
,
device_id
=
0
)
self
.
gpu_predictor
=
create_paddle_predictor
(
gpu_config
)
self
.
gpu_predictor
=
create_paddle_predictor
(
gpu_config
)
def
context
(
self
,
num_classes
=
80
,
trainable
=
True
,
pretrained
=
True
):
def
context
(
self
,
trainable
=
True
,
pretrained
=
True
,
get_prediction
=
False
):
"""Distill the Head Features, so as to perform transfer learning.
"""
Distill the Head Features, so as to perform transfer learning.
Args:
trainable (bool): whether to set parameters trainable.
pretrained (bool): whether to load default pretrained model.
get_prediction (bool): whether to get prediction.
:param trainable: whether to set parameters trainable.
Returns:
:type trainable: bool
inputs(dict): the input variables.
:param pretrained: whether to load default pretrained model
.
outputs(dict): the output variables
.
:type pretrained: bool
context_prog (Program): the program to execute transfer learning.
"""
"""
wrapped
_prog
=
fluid
.
Program
()
context
_prog
=
fluid
.
Program
()
startup_program
=
fluid
.
Program
()
startup_program
=
fluid
.
Program
()
with
fluid
.
program_guard
(
wrapped
_prog
,
startup_program
):
with
fluid
.
program_guard
(
context
_prog
,
startup_program
):
with
fluid
.
unique_name
.
guard
():
with
fluid
.
unique_name
.
guard
():
# image
# image
image
=
fluid
.
layers
.
data
(
image
=
fluid
.
layers
.
data
(
name
=
'image'
,
shape
=
[
3
,
608
,
608
],
dtype
=
'float32'
)
name
=
'image'
,
shape
=
[
3
,
608
,
608
],
dtype
=
'float32'
)
# yolo_head
yolo_head
=
self
.
yolov3
.
YOLOv3Head
(
num_classes
=
num_classes
)
# backbone
# backbone
backbone
=
ResNet
(
backbone
=
ResNet
(
norm_type
=
'bn'
,
norm_type
=
'bn'
,
...
@@ -87,16 +86,57 @@ class YOLOv3ResNet34(hub.Module):
...
@@ -87,16 +86,57 @@ class YOLOv3ResNet34(hub.Module):
norm_decay
=
0.
,
norm_decay
=
0.
,
depth
=
34
,
depth
=
34
,
feature_maps
=
[
3
,
4
,
5
])
feature_maps
=
[
3
,
4
,
5
])
# body_feats
body_feats
=
backbone
(
image
)
body_feats
=
backbone
(
image
)
inputs
,
outputs
,
context_prog
=
self
.
yolov3
.
context
(
# im_size
body_feats
=
body_feats
,
im_size
=
fluid
.
layers
.
data
(
yolo_head
=
yolo_head
,
name
=
'im_size'
,
shape
=
[
2
],
dtype
=
'int32'
)
image
=
image
,
# yolo_head
trainable
=
trainable
,
yolo_head
=
YOLOv3Head
(
num_classes
=
80
)
var_prefix
=
'@HUB_{}@'
.
format
(
self
.
name
))
# head_features
head_features
=
yolo_head
.
_get_outputs
(
body_feats
,
is_train
=
trainable
)
place
=
fluid
.
CPUPlace
()
place
=
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
place
)
exe
=
fluid
.
Executor
(
place
)
exe
.
run
(
fluid
.
default_startup_program
())
# var_prefix
var_prefix
=
'@HUB_{}@'
.
format
(
self
.
name
)
# name of inputs
inputs
=
{
'image'
:
var_prefix
+
image
.
name
,
'im_size'
:
var_prefix
+
im_size
.
name
}
# name of outputs
if
get_prediction
:
bbox_out
=
yolo_head
.
get_prediction
(
head_features
,
im_size
)
outputs
=
{
'bbox_out'
:
[
var_prefix
+
bbox_out
.
name
]}
else
:
outputs
=
{
'head_features'
:
[
var_prefix
+
var
.
name
for
var
in
head_features
]
}
# add_vars_prefix
add_vars_prefix
(
context_prog
,
var_prefix
)
add_vars_prefix
(
fluid
.
default_startup_program
(),
var_prefix
)
# inputs
inputs
=
{
key
:
context_prog
.
global_block
().
vars
[
value
]
for
key
,
value
in
inputs
.
items
()
}
# outputs
outputs
=
{
key
:
[
context_prog
.
global_block
().
vars
[
varname
]
for
varname
in
value
]
for
key
,
value
in
outputs
.
items
()
}
# trainable
for
param
in
context_prog
.
global_block
().
iter_parameters
():
param
.
trainable
=
trainable
# pretrained
if
pretrained
:
if
pretrained
:
def
_if_exist
(
var
):
def
_if_exist
(
var
):
...
@@ -110,34 +150,41 @@ class YOLOv3ResNet34(hub.Module):
...
@@ -110,34 +150,41 @@ class YOLOv3ResNet34(hub.Module):
predicate
=
_if_exist
)
predicate
=
_if_exist
)
else
:
else
:
exe
.
run
(
startup_program
)
exe
.
run
(
startup_program
)
return
inputs
,
outputs
,
context_prog
return
inputs
,
outputs
,
context_prog
def
object_detection
(
self
,
def
object_detection
(
self
,
paths
=
[]
,
paths
=
None
,
images
=
None
,
images
=
None
,
use_gpu
=
False
,
batch_size
=
1
,
batch_size
=
1
,
use_gpu
=
False
,
output_dir
=
'detection_result'
,
output_dir
=
'detection_result'
,
score_thresh
=
0.5
,
score_thresh
=
0.5
,
visualization
=
True
):
visualization
=
True
):
"""API of Object Detection.
"""API of Object Detection.
:param paths: the path of images.
Args:
:type paths: list, each element is correspond to the path of an image.
paths (list[str]): The paths of images.
:param images: data of images, [N, H, W, C]
images (list(numpy.ndarray)): images data, shape of each is [H, W, C]
:type images: numpy.ndarray
batch_size (int): batch size.
:param use_gpu: whether to use gpu or not.
use_gpu (bool): Whether to use gpu.
:type use_gpu: bool
output_dir (str): The path to store output images.
:param batch_size: bathc size.
visualization (bool): Whether to save image or not.
:type batch_size: int
score_thresh (float): threshold for object detecion.
:param output_dir: the directory to store the detection result.
:type output_dir: str
Returns:
:param score_thresh: the threshold of detection confidence.
res (list[dict]): The result of coco2017 detecion. keys include 'data', 'save_path', the corresponding value is:
:type score_thresh: float
data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is:
:param visualization: whether to draw bounding box and save images.
left (float): The X coordinate of the upper left corner of the bounding box;
:type visualization: bool
top (float): The Y coordinate of the upper left corner of the bounding box;
right (float): The X coordinate of the lower right corner of the bounding box;
bottom (float): The Y coordinate of the lower right corner of the bounding box;
label (str): The label of detection result;
confidence (float): The confidence of detection result.
save_path (str, optional): The path to save output images.
"""
"""
data_reader
=
partial
(
self
.
yolov3
.
reader
,
paths
,
images
)
paths
=
paths
if
paths
else
list
()
data_reader
=
partial
(
reader
,
paths
,
images
)
batch_reader
=
fluid
.
io
.
batch
(
data_reader
,
batch_size
=
batch_size
)
batch_reader
=
fluid
.
io
.
batch
(
data_reader
,
batch_size
=
batch_size
)
res
=
[]
res
=
[]
for
iter_id
,
feed_data
in
enumerate
(
batch_reader
()):
for
iter_id
,
feed_data
in
enumerate
(
batch_reader
()):
...
@@ -150,7 +197,8 @@ class YOLOv3ResNet34(hub.Module):
...
@@ -150,7 +197,8 @@ class YOLOv3ResNet34(hub.Module):
else
:
else
:
data_out
=
self
.
cpu_predictor
.
run
(
data_out
=
self
.
cpu_predictor
.
run
(
[
image_tensor
,
im_size_tensor
])
[
image_tensor
,
im_size_tensor
])
output
=
self
.
yolov3
.
postprocess
(
output
=
postprocess
(
paths
=
paths
,
paths
=
paths
,
images
=
images
,
images
=
images
,
data_out
=
data_out
,
data_out
=
data_out
,
...
@@ -159,55 +207,49 @@ class YOLOv3ResNet34(hub.Module):
...
@@ -159,55 +207,49 @@ class YOLOv3ResNet34(hub.Module):
output_dir
=
output_dir
,
output_dir
=
output_dir
,
handle_id
=
iter_id
*
batch_size
,
handle_id
=
iter_id
*
batch_size
,
visualization
=
visualization
)
visualization
=
visualization
)
res
.
extend
(
output
)
res
+=
output
return
res
return
res
def
add_module_config_arg
(
self
):
def
save_inference_model
(
self
,
"""
dirname
,
Add the command config options
model_filename
=
None
,
"""
params_filename
=
None
,
self
.
arg_config_group
.
add_argument
(
combined
=
True
):
'--use_gpu'
,
if
combined
:
type
=
ast
.
literal_eval
,
model_filename
=
"__model__"
if
not
model_filename
else
model_filename
default
=
False
,
params_filename
=
"__params__"
if
not
params_filename
else
params_filename
help
=
"whether use GPU or not"
)
place
=
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
place
)
self
.
arg_config_group
.
add_argument
(
program
,
feeded_var_names
,
target_vars
=
fluid
.
io
.
load_inference_model
(
'--batch_size'
,
dirname
=
self
.
default_pretrained_model_path
,
executor
=
exe
)
type
=
int
,
default
=
1
,
help
=
"batch size for prediction"
)
def
add_module_input_arg
(
self
):
fluid
.
io
.
save_inference_model
(
dirname
=
dirname
,
main_program
=
program
,
executor
=
exe
,
feeded_var_names
=
feeded_var_names
,
target_vars
=
target_vars
,
model_filename
=
model_filename
,
params_filename
=
params_filename
)
@
serving
def
serving_method
(
self
,
images
,
**
kwargs
):
"""
"""
Add the command input options
Run as a service.
"""
"""
self
.
arg_input_group
.
add_argument
(
images_decode
=
[
base64_to_cv2
(
image
)
for
image
in
images
]
'--input_path'
,
type
=
str
,
default
=
None
,
help
=
"input data"
)
results
=
self
.
object_detection
(
images_decode
,
**
kwargs
)
return
results
self
.
arg_input_group
.
add_argument
(
'--input_file'
,
type
=
str
,
default
=
None
,
help
=
"file contain input data"
)
def
check_input_data
(
self
,
args
):
input_data
=
[]
if
args
.
input_path
:
input_data
=
[
args
.
input_path
]
elif
args
.
input_file
:
if
not
os
.
path
.
exists
(
args
.
input_file
):
raise
RuntimeError
(
"File %s is not exist."
%
args
.
input_file
)
else
:
input_data
=
txt_parser
.
parse
(
args
.
input_file
,
use_strip
=
True
)
return
input_data
@
runnable
@
runnable
def
run_cmd
(
self
,
argvs
):
def
run_cmd
(
self
,
argvs
):
"""
Run as a command.
"""
self
.
parser
=
argparse
.
ArgumentParser
(
self
.
parser
=
argparse
.
ArgumentParser
(
description
=
"Run the {}"
.
format
(
self
.
name
),
description
=
"Run the {}
module.
"
.
format
(
self
.
name
),
prog
=
"hub run {}"
.
format
(
self
.
name
),
prog
=
'hub run {}'
.
format
(
self
.
name
),
usage
=
'%(prog)s'
,
usage
=
'%(prog)s'
,
add_help
=
True
)
add_help
=
True
)
self
.
arg_input_group
=
self
.
parser
.
add_argument_group
(
self
.
arg_input_group
=
self
.
parser
.
add_argument_group
(
...
@@ -217,17 +259,50 @@ class YOLOv3ResNet34(hub.Module):
...
@@ -217,17 +259,50 @@ class YOLOv3ResNet34(hub.Module):
description
=
description
=
"Run configuration for controlling module behavior, not required."
)
"Run configuration for controlling module behavior, not required."
)
self
.
add_module_config_arg
()
self
.
add_module_config_arg
()
self
.
add_module_input_arg
()
self
.
add_module_input_arg
()
args
=
self
.
parser
.
parse_args
(
argvs
)
args
=
self
.
parser
.
parse_args
(
argvs
)
input_data
=
self
.
check_input_data
(
args
)
results
=
self
.
face_detection
(
if
len
(
input_data
)
==
0
:
paths
=
[
args
.
input_path
],
self
.
parser
.
print_help
()
batch_size
=
args
.
batch_size
,
exit
(
1
)
use_gpu
=
args
.
use_gpu
,
else
:
output_dir
=
args
.
output_dir
,
for
image_path
in
input_data
:
visualization
=
args
.
visualization
,
if
not
os
.
path
.
exists
(
image_path
):
score_thresh
=
args
.
score_thresh
)
raise
RuntimeError
(
return
results
"File %s or %s is not exist."
%
image_path
)
return
self
.
object_detection
(
def
add_module_config_arg
(
self
):
paths
=
input_data
,
use_gpu
=
args
.
use_gpu
,
batch_size
=
args
.
batch_size
)
"""
Add the command config options.
"""
self
.
arg_config_group
.
add_argument
(
'--use_gpu'
,
type
=
ast
.
literal_eval
,
default
=
False
,
help
=
"whether use GPU or not"
)
self
.
arg_config_group
.
add_argument
(
'--output_dir'
,
type
=
str
,
default
=
'detection_result'
,
help
=
"The directory to save output images."
)
self
.
arg_config_group
.
add_argument
(
'--visualization'
,
type
=
ast
.
literal_eval
,
default
=
False
,
help
=
"whether to save output as images."
)
def
add_module_input_arg
(
self
):
"""
Add the command input options.
"""
self
.
arg_input_group
.
add_argument
(
'--input_path'
,
type
=
str
,
help
=
"path to image."
)
self
.
arg_input_group
.
add_argument
(
'--batch_size'
,
type
=
ast
.
literal_eval
,
default
=
1
,
help
=
"batch size."
)
self
.
arg_input_group
.
add_argument
(
'--score_thresh'
,
type
=
ast
.
literal_eval
,
default
=
0.5
,
help
=
"threshold for object detecion."
)
hub_module/modules/image/object_detection/yolov3_resnet34_coco2017/processor.py
0 → 100644
浏览文件 @
3acfe6bd
# coding=utf-8
import
base64
import
os
import
cv2
import
numpy
as
np
from
PIL
import
Image
,
ImageDraw
__all__
=
[
'base64_to_cv2'
,
'load_label_info'
,
'postprocess'
]
def
base64_to_cv2
(
b64str
):
data
=
base64
.
b64decode
(
b64str
.
encode
(
'utf8'
))
data
=
np
.
fromstring
(
data
,
np
.
uint8
)
data
=
cv2
.
imdecode
(
data
,
cv2
.
IMREAD_COLOR
)
return
data
def
check_dir
(
dir_path
):
if
not
os
.
path
.
exists
(
dir_path
):
os
.
makedirs
(
dir_path
)
elif
os
.
path
.
isfile
(
dir_path
):
os
.
remove
(
dir_path
)
os
.
makedirs
(
dir_path
)
def
get_save_image_name
(
img
,
output_dir
,
image_path
):
"""Get save image name from source image path.
"""
image_name
=
os
.
path
.
split
(
image_path
)[
-
1
]
name
,
ext
=
os
.
path
.
splitext
(
image_name
)
if
ext
==
''
:
if
img
.
format
==
'PNG'
:
ext
=
'.png'
elif
img
.
format
==
'JPEG'
:
ext
=
'.jpg'
elif
img
.
format
==
'BMP'
:
ext
=
'.bmp'
else
:
if
img
.
mode
==
"RGB"
or
img
.
mode
==
"L"
:
ext
=
".jpg"
elif
img
.
mode
==
"RGBA"
or
img
.
mode
==
"P"
:
ext
=
'.png'
return
os
.
path
.
join
(
output_dir
,
"{}"
.
format
(
name
))
+
ext
def
draw_bounding_box_on_image
(
image_path
,
data_list
,
save_dir
):
image
=
Image
.
open
(
image_path
)
draw
=
ImageDraw
.
Draw
(
image
)
for
data
in
data_list
:
left
,
right
,
top
,
bottom
=
data
[
'left'
],
data
[
'right'
],
data
[
'top'
],
data
[
'bottom'
]
# draw bbox
draw
.
line
([(
left
,
top
),
(
left
,
bottom
),
(
right
,
bottom
),
(
right
,
top
),
(
left
,
top
)],
width
=
2
,
fill
=
'red'
)
# draw label
if
image
.
mode
==
'RGB'
:
text
=
data
[
'label'
]
+
": %.2f%%"
%
(
100
*
data
[
'confidence'
])
textsize_width
,
textsize_height
=
draw
.
textsize
(
text
=
text
)
draw
.
rectangle
(
xy
=
(
left
,
top
-
(
textsize_height
+
5
),
left
+
textsize_width
+
10
,
top
),
fill
=
(
255
,
255
,
255
))
draw
.
text
(
xy
=
(
left
,
top
-
15
),
text
=
text
,
fill
=
(
0
,
0
,
0
))
save_name
=
get_save_image_name
(
image
,
save_dir
,
image_path
)
if
os
.
path
.
exists
(
save_name
):
os
.
remove
(
save_name
)
image
.
save
(
save_name
)
return
save_name
def
clip_bbox
(
bbox
,
img_width
,
img_height
):
xmin
=
max
(
min
(
bbox
[
0
],
img_width
),
0.
)
ymin
=
max
(
min
(
bbox
[
1
],
img_height
),
0.
)
xmax
=
max
(
min
(
bbox
[
2
],
img_width
),
0.
)
ymax
=
max
(
min
(
bbox
[
3
],
img_height
),
0.
)
return
xmin
,
ymin
,
xmax
,
ymax
def
load_label_info
(
file_path
):
with
open
(
file_path
,
'r'
)
as
fr
:
text
=
fr
.
readlines
()
label_names
=
[]
for
info
in
text
:
label_names
.
append
(
info
.
strip
())
return
label_names
def
postprocess
(
paths
,
images
,
data_out
,
score_thresh
,
label_names
,
output_dir
,
handle_id
,
visualization
=
True
):
"""
postprocess the lod_tensor produced by fluid.Executor.run
Args:
paths (list[str]): The paths of images.
images (list(numpy.ndarray)): images data, shape of each is [H, W, C]
data_out (lod_tensor): data output of predictor.
batch_size (int): batch size.
use_gpu (bool): Whether to use gpu.
output_dir (str): The path to store output images.
visualization (bool): Whether to save image or not.
score_thresh (float): the low limit of bounding box.
label_names (list[str]): label names.
handle_id (int): The number of images that have been handled.
Returns:
res (list[dict]): The result of vehicles detecion. keys include 'data', 'save_path', the corresponding value is:
data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is:
left (float): The X coordinate of the upper left corner of the bounding box;
top (float): The Y coordinate of the upper left corner of the bounding box;
right (float): The X coordinate of the lower right corner of the bounding box;
bottom (float): The Y coordinate of the lower right corner of the bounding box;
label (str): The label of detection result;
confidence (float): The confidence of detection result.
save_path (str): The path to save output images.
"""
lod_tensor
=
data_out
[
0
]
lod
=
lod_tensor
.
lod
[
0
]
results
=
lod_tensor
.
as_ndarray
()
check_dir
(
output_dir
)
assert
type
(
paths
)
is
list
,
"type(paths) is not list."
if
handle_id
<
len
(
paths
):
unhandled_paths
=
paths
[
handle_id
:]
unhandled_paths_num
=
len
(
unhandled_paths
)
else
:
unhandled_paths_num
=
0
output
=
list
()
for
index
in
range
(
len
(
lod
)
-
1
):
output_i
=
{
'data'
:
[]}
if
index
<
unhandled_paths_num
:
org_img_path
=
unhandled_paths
[
index
]
org_img
=
Image
.
open
(
org_img_path
)
else
:
org_img
=
images
[
index
-
unhandled_paths_num
]
org_img
=
org_img
.
astype
(
np
.
uint8
)
org_img
=
Image
.
fromarray
(
org_img
[:,
:,
::
-
1
])
if
visualization
:
org_img_path
=
get_save_image_name
(
org_img
,
output_dir
,
'image_numpy_{}'
.
format
(
(
handle_id
+
index
)))
org_img
.
save
(
org_img_path
)
org_img_height
=
org_img
.
height
org_img_width
=
org_img
.
width
result_i
=
results
[
lod
[
index
]:
lod
[
index
+
1
]]
for
row
in
result_i
:
if
len
(
row
)
!=
6
:
continue
if
row
[
1
]
<
score_thresh
:
continue
category_id
=
int
(
row
[
0
])
confidence
=
row
[
1
]
bbox
=
row
[
2
:]
dt
=
{}
dt
[
'label'
]
=
label_names
[
category_id
]
dt
[
'confidence'
]
=
confidence
dt
[
'left'
],
dt
[
'top'
],
dt
[
'right'
],
dt
[
'bottom'
]
=
clip_bbox
(
bbox
,
org_img_width
,
org_img_height
)
output_i
[
'data'
].
append
(
dt
)
output
.
append
(
output_i
)
if
visualization
:
output_i
[
'save_path'
]
=
draw_bounding_box_on_image
(
org_img_path
,
output_i
[
'data'
],
output_dir
)
return
output
hub_module/modules/image/object_detection/yolov3_resnet34_coco2017/yolo_head.py
0 → 100644
浏览文件 @
3acfe6bd
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
from
collections
import
OrderedDict
from
paddle
import
fluid
from
paddle.fluid.param_attr
import
ParamAttr
from
paddle.fluid.regularizer
import
L2Decay
__all__
=
[
'MultiClassNMS'
,
'YOLOv3Head'
]
class
MultiClassNMS
(
object
):
# __op__ = fluid.layers.multiclass_nms
def
__init__
(
self
,
background_label
,
keep_top_k
,
nms_threshold
,
nms_top_k
,
normalized
,
score_threshold
):
super
(
MultiClassNMS
,
self
).
__init__
()
self
.
background_label
=
background_label
self
.
keep_top_k
=
keep_top_k
self
.
nms_threshold
=
nms_threshold
self
.
nms_top_k
=
nms_top_k
self
.
normalized
=
normalized
self
.
score_threshold
=
score_threshold
class
YOLOv3Head
(
object
):
"""Head block for YOLOv3 network
Args:
norm_decay (float): weight decay for normalization layer weights
num_classes (int): number of output classes
ignore_thresh (float): threshold to ignore confidence loss
label_smooth (bool): whether to use label smoothing
anchors (list): anchors
anchor_masks (list): anchor masks
nms (object): an instance of `MultiClassNMS`
"""
def
__init__
(
self
,
norm_decay
=
0.
,
num_classes
=
80
,
ignore_thresh
=
0.7
,
label_smooth
=
True
,
anchors
=
[[
10
,
13
],
[
16
,
30
],
[
33
,
23
],
[
30
,
61
],
[
62
,
45
],
[
59
,
119
],
[
116
,
90
],
[
156
,
198
],
[
373
,
326
]],
anchor_masks
=
[[
6
,
7
,
8
],
[
3
,
4
,
5
],
[
0
,
1
,
2
]],
nms
=
MultiClassNMS
(
background_label
=-
1
,
keep_top_k
=
100
,
nms_threshold
=
0.45
,
nms_top_k
=
1000
,
normalized
=
True
,
score_threshold
=
0.01
),
weight_prefix_name
=
''
):
self
.
norm_decay
=
norm_decay
self
.
num_classes
=
num_classes
self
.
ignore_thresh
=
ignore_thresh
self
.
label_smooth
=
label_smooth
self
.
anchor_masks
=
anchor_masks
self
.
_parse_anchors
(
anchors
)
self
.
nms
=
nms
self
.
prefix_name
=
weight_prefix_name
def
_conv_bn
(
self
,
input
,
ch_out
,
filter_size
,
stride
,
padding
,
act
=
'leaky'
,
is_test
=
True
,
name
=
None
):
conv
=
fluid
.
layers
.
conv2d
(
input
=
input
,
num_filters
=
ch_out
,
filter_size
=
filter_size
,
stride
=
stride
,
padding
=
padding
,
act
=
None
,
param_attr
=
ParamAttr
(
name
=
name
+
".conv.weights"
),
bias_attr
=
False
)
bn_name
=
name
+
".bn"
bn_param_attr
=
ParamAttr
(
regularizer
=
L2Decay
(
self
.
norm_decay
),
name
=
bn_name
+
'.scale'
)
bn_bias_attr
=
ParamAttr
(
regularizer
=
L2Decay
(
self
.
norm_decay
),
name
=
bn_name
+
'.offset'
)
out
=
fluid
.
layers
.
batch_norm
(
input
=
conv
,
act
=
None
,
is_test
=
is_test
,
param_attr
=
bn_param_attr
,
bias_attr
=
bn_bias_attr
,
moving_mean_name
=
bn_name
+
'.mean'
,
moving_variance_name
=
bn_name
+
'.var'
)
if
act
==
'leaky'
:
out
=
fluid
.
layers
.
leaky_relu
(
x
=
out
,
alpha
=
0.1
)
return
out
def
_detection_block
(
self
,
input
,
channel
,
is_test
=
True
,
name
=
None
):
assert
channel
%
2
==
0
,
\
"channel {} cannot be divided by 2 in detection block {}"
\
.
format
(
channel
,
name
)
conv
=
input
for
j
in
range
(
2
):
conv
=
self
.
_conv_bn
(
conv
,
channel
,
filter_size
=
1
,
stride
=
1
,
padding
=
0
,
is_test
=
is_test
,
name
=
'{}.{}.0'
.
format
(
name
,
j
))
conv
=
self
.
_conv_bn
(
conv
,
channel
*
2
,
filter_size
=
3
,
stride
=
1
,
padding
=
1
,
is_test
=
is_test
,
name
=
'{}.{}.1'
.
format
(
name
,
j
))
route
=
self
.
_conv_bn
(
conv
,
channel
,
filter_size
=
1
,
stride
=
1
,
padding
=
0
,
is_test
=
is_test
,
name
=
'{}.2'
.
format
(
name
))
tip
=
self
.
_conv_bn
(
route
,
channel
*
2
,
filter_size
=
3
,
stride
=
1
,
padding
=
1
,
is_test
=
is_test
,
name
=
'{}.tip'
.
format
(
name
))
return
route
,
tip
def
_upsample
(
self
,
input
,
scale
=
2
,
name
=
None
):
out
=
fluid
.
layers
.
resize_nearest
(
input
=
input
,
scale
=
float
(
scale
),
name
=
name
)
return
out
def
_parse_anchors
(
self
,
anchors
):
"""
Check ANCHORS/ANCHOR_MASKS in config and parse mask_anchors
"""
self
.
anchors
=
[]
self
.
mask_anchors
=
[]
assert
len
(
anchors
)
>
0
,
"ANCHORS not set."
assert
len
(
self
.
anchor_masks
)
>
0
,
"ANCHOR_MASKS not set."
for
anchor
in
anchors
:
assert
len
(
anchor
)
==
2
,
"anchor {} len should be 2"
.
format
(
anchor
)
self
.
anchors
.
extend
(
anchor
)
anchor_num
=
len
(
anchors
)
for
masks
in
self
.
anchor_masks
:
self
.
mask_anchors
.
append
([])
for
mask
in
masks
:
assert
mask
<
anchor_num
,
"anchor mask index overflow"
self
.
mask_anchors
[
-
1
].
extend
(
anchors
[
mask
])
def
_get_outputs
(
self
,
input
,
is_train
=
True
):
"""
Get YOLOv3 head output
Args:
input (list): List of Variables, output of backbone stages
is_train (bool): whether in train or test mode
Returns:
outputs (list): Variables of each output layer
"""
outputs
=
[]
# get last out_layer_num blocks in reverse order
out_layer_num
=
len
(
self
.
anchor_masks
)
if
isinstance
(
input
,
OrderedDict
):
blocks
=
list
(
input
.
values
())[
-
1
:
-
out_layer_num
-
1
:
-
1
]
else
:
blocks
=
input
[
-
1
:
-
out_layer_num
-
1
:
-
1
]
route
=
None
for
i
,
block
in
enumerate
(
blocks
):
if
i
>
0
:
# perform concat in first 2 detection_block
block
=
fluid
.
layers
.
concat
(
input
=
[
route
,
block
],
axis
=
1
)
route
,
tip
=
self
.
_detection_block
(
block
,
channel
=
512
//
(
2
**
i
),
is_test
=
(
not
is_train
),
name
=
self
.
prefix_name
+
"yolo_block.{}"
.
format
(
i
))
# out channel number = mask_num * (5 + class_num)
num_filters
=
len
(
self
.
anchor_masks
[
i
])
*
(
self
.
num_classes
+
5
)
block_out
=
fluid
.
layers
.
conv2d
(
input
=
tip
,
num_filters
=
num_filters
,
filter_size
=
1
,
stride
=
1
,
padding
=
0
,
act
=
None
,
param_attr
=
ParamAttr
(
name
=
self
.
prefix_name
+
"yolo_output.{}.conv.weights"
.
format
(
i
)),
bias_attr
=
ParamAttr
(
regularizer
=
L2Decay
(
0.
),
name
=
self
.
prefix_name
+
"yolo_output.{}.conv.bias"
.
format
(
i
)))
outputs
.
append
(
block_out
)
if
i
<
len
(
blocks
)
-
1
:
# do not perform upsample in the last detection_block
route
=
self
.
_conv_bn
(
input
=
route
,
ch_out
=
256
//
(
2
**
i
),
filter_size
=
1
,
stride
=
1
,
padding
=
0
,
is_test
=
(
not
is_train
),
name
=
self
.
prefix_name
+
"yolo_transition.{}"
.
format
(
i
))
# upsample
route
=
self
.
_upsample
(
route
)
return
outputs
def
get_prediction
(
self
,
outputs
,
im_size
):
"""
Get prediction result of YOLOv3 network
Args:
outputs (list): list of Variables, return from _get_outputs
im_size (Variable): Variable of size([h, w]) of each image
Returns:
pred (Variable): The prediction result after non-max suppress.
"""
boxes
=
[]
scores
=
[]
downsample
=
32
for
i
,
output
in
enumerate
(
outputs
):
box
,
score
=
fluid
.
layers
.
yolo_box
(
x
=
output
,
img_size
=
im_size
,
anchors
=
self
.
mask_anchors
[
i
],
class_num
=
self
.
num_classes
,
conf_thresh
=
self
.
nms
.
score_threshold
,
downsample_ratio
=
downsample
,
name
=
self
.
prefix_name
+
"yolo_box"
+
str
(
i
))
boxes
.
append
(
box
)
scores
.
append
(
fluid
.
layers
.
transpose
(
score
,
perm
=
[
0
,
2
,
1
]))
downsample
//=
2
yolo_boxes
=
fluid
.
layers
.
concat
(
boxes
,
axis
=
1
)
yolo_scores
=
fluid
.
layers
.
concat
(
scores
,
axis
=
2
)
pred
=
fluid
.
layers
.
multiclass_nms
(
bboxes
=
yolo_boxes
,
scores
=
yolo_scores
,
score_threshold
=
self
.
nms
.
score_threshold
,
nms_top_k
=
self
.
nms
.
nms_top_k
,
keep_top_k
=
self
.
nms
.
keep_top_k
,
nms_threshold
=
self
.
nms
.
nms_threshold
,
background_label
=
self
.
nms
.
background_label
,
normalized
=
self
.
nms
.
normalized
,
name
=
"multiclass_nms"
)
return
pred
hub_module/tests/unittests/test_faster_rcnn_resnet50.py
浏览文件 @
3acfe6bd
...
@@ -7,6 +7,8 @@ import numpy as np
...
@@ -7,6 +7,8 @@ import numpy as np
import
paddle.fluid
as
fluid
import
paddle.fluid
as
fluid
import
paddlehub
as
hub
import
paddlehub
as
hub
image_dir
=
'../image_dataset/object_detection/'
class
TestFasterRCNNResNet50
(
unittest
.
TestCase
):
class
TestFasterRCNNResNet50
(
unittest
.
TestCase
):
@
classmethod
@
classmethod
...
@@ -29,13 +31,8 @@ class TestFasterRCNNResNet50(unittest.TestCase):
...
@@ -29,13 +31,8 @@ class TestFasterRCNNResNet50(unittest.TestCase):
def
test_context
(
self
):
def
test_context
(
self
):
with
fluid
.
program_guard
(
self
.
test_prog
):
with
fluid
.
program_guard
(
self
.
test_prog
):
input_image
=
fluid
.
layers
.
data
(
name
=
'image'
,
shape
=
[
3
,
800
,
1333
],
dtype
=
'float32'
)
inputs
,
outputs
,
program
=
self
.
faster_rcnn_r50
.
context
(
inputs
,
outputs
,
program
=
self
.
faster_rcnn_r50
.
context
(
input_image
=
input_image
,
pretrained
=
False
,
trainable
=
True
,
phase
=
'train'
)
pretrained
=
False
,
trainable
=
True
,
phase
=
'train'
)
image
=
inputs
[
'image'
]
image
=
inputs
[
'image'
]
im_info
=
inputs
[
'im_info'
]
im_info
=
inputs
[
'im_info'
]
im_shape
=
inputs
[
'im_shape'
]
im_shape
=
inputs
[
'im_shape'
]
...
@@ -49,19 +46,18 @@ class TestFasterRCNNResNet50(unittest.TestCase):
...
@@ -49,19 +46,18 @@ class TestFasterRCNNResNet50(unittest.TestCase):
def
test_object_detection
(
self
):
def
test_object_detection
(
self
):
with
fluid
.
program_guard
(
self
.
test_prog
):
with
fluid
.
program_guard
(
self
.
test_prog
):
image_dir
=
'../image_dataset/'
zebra
=
cv2
.
imread
(
os
.
path
.
join
(
image_dir
,
zebra
=
cv2
.
imread
(
os
.
path
.
join
(
image_dir
,
'zebra.jpg'
)).
astype
(
'float32'
)
'zebra.jpg'
)).
astype
(
'float32'
)
zebra
=
np
.
array
([
zebra
,
zebra
])
zebra
s
=
[
zebra
,
zebra
]
detection_results
=
self
.
faster_rcnn_r50
.
object_detection
(
detection_results
=
self
.
faster_rcnn_r50
.
object_detection
(
paths
=
[
paths
=
[
os
.
path
.
join
(
image_dir
,
'cat.jpg'
),
os
.
path
.
join
(
image_dir
,
'cat.jpg'
),
os
.
path
.
join
(
image_dir
,
'dog.jpg'
),
os
.
path
.
join
(
image_dir
,
'dog.jpg'
),
os
.
path
.
join
(
image_dir
,
'giraffe.jpg'
)
os
.
path
.
join
(
image_dir
,
'giraffe.jpg'
)
],
],
images
=
zebra
,
images
=
zebra
s
,
batch_size
=
2
,
batch_size
=
2
,
use_gpu
=
Fals
e
,
use_gpu
=
Tru
e
,
score_thresh
=
0.5
)
score_thresh
=
0.5
)
print
(
detection_results
)
print
(
detection_results
)
...
...
hub_module/tests/unittests/test_faster_rcnn_resnet50_fpn.py
浏览文件 @
3acfe6bd
...
@@ -7,6 +7,8 @@ import numpy as np
...
@@ -7,6 +7,8 @@ import numpy as np
import
paddle.fluid
as
fluid
import
paddle.fluid
as
fluid
import
paddlehub
as
hub
import
paddlehub
as
hub
image_dir
=
'../image_dataset/object_detection/'
class
TestFasterRCNNR50FPN
(
unittest
.
TestCase
):
class
TestFasterRCNNR50FPN
(
unittest
.
TestCase
):
@
classmethod
@
classmethod
...
@@ -14,6 +16,7 @@ class TestFasterRCNNR50FPN(unittest.TestCase):
...
@@ -14,6 +16,7 @@ class TestFasterRCNNR50FPN(unittest.TestCase):
"""Prepare the environment once before execution of all tests."""
"""Prepare the environment once before execution of all tests."""
self
.
faster_rcnn_r50_fpn
=
hub
.
Module
(
self
.
faster_rcnn_r50_fpn
=
hub
.
Module
(
name
=
"faster_rcnn_resnet50_fpn_coco2017"
)
name
=
"faster_rcnn_resnet50_fpn_coco2017"
)
# self.faster_rcnn_r50_fpn = hub.Module(directory='')
@
classmethod
@
classmethod
def
tearDownClass
(
self
):
def
tearDownClass
(
self
):
...
@@ -30,13 +33,8 @@ class TestFasterRCNNR50FPN(unittest.TestCase):
...
@@ -30,13 +33,8 @@ class TestFasterRCNNR50FPN(unittest.TestCase):
def
test_context
(
self
):
def
test_context
(
self
):
with
fluid
.
program_guard
(
self
.
test_prog
):
with
fluid
.
program_guard
(
self
.
test_prog
):
input_image
=
fluid
.
layers
.
data
(
name
=
'image'
,
shape
=
[
3
,
800
,
1333
],
dtype
=
'float32'
)
inputs
,
outputs
,
program
=
self
.
faster_rcnn_r50_fpn
.
context
(
inputs
,
outputs
,
program
=
self
.
faster_rcnn_r50_fpn
.
context
(
input_image
=
input_image
,
pretrained
=
False
,
trainable
=
True
,
phase
=
'train'
)
pretrained
=
False
,
trainable
=
True
,
phase
=
'train'
)
image
=
inputs
[
'image'
]
image
=
inputs
[
'image'
]
im_info
=
inputs
[
'im_info'
]
im_info
=
inputs
[
'im_info'
]
im_shape
=
inputs
[
'im_shape'
]
im_shape
=
inputs
[
'im_shape'
]
...
@@ -50,17 +48,16 @@ class TestFasterRCNNR50FPN(unittest.TestCase):
...
@@ -50,17 +48,16 @@ class TestFasterRCNNR50FPN(unittest.TestCase):
def
test_object_detection
(
self
):
def
test_object_detection
(
self
):
with
fluid
.
program_guard
(
self
.
test_prog
):
with
fluid
.
program_guard
(
self
.
test_prog
):
image_dir
=
'../image_dataset/'
zebra
=
cv2
.
imread
(
os
.
path
.
join
(
image_dir
,
zebra
=
cv2
.
imread
(
os
.
path
.
join
(
image_dir
,
'zebra.jpg'
)).
astype
(
'float32'
)
'zebra.jpg'
)).
astype
(
'float32'
)
zebra
=
np
.
array
([
zebra
,
zebra
])
zebra
s
=
[
zebra
,
zebra
]
detection_results
=
self
.
faster_rcnn_r50_fpn
.
object_detection
(
detection_results
=
self
.
faster_rcnn_r50_fpn
.
object_detection
(
paths
=
[
paths
=
[
os
.
path
.
join
(
image_dir
,
'cat.jpg'
),
os
.
path
.
join
(
image_dir
,
'cat.jpg'
),
os
.
path
.
join
(
image_dir
,
'dog.jpg'
),
os
.
path
.
join
(
image_dir
,
'dog.jpg'
),
os
.
path
.
join
(
image_dir
,
'giraffe.jpg'
)
os
.
path
.
join
(
image_dir
,
'giraffe.jpg'
)
],
],
images
=
zebra
,
images
=
zebra
s
,
batch_size
=
2
,
batch_size
=
2
,
use_gpu
=
False
,
use_gpu
=
False
,
score_thresh
=
0.5
)
score_thresh
=
0.5
)
...
...
hub_module/tests/unittests/test_retinanet_resnet50_fpn.py
浏览文件 @
3acfe6bd
...
@@ -29,30 +29,26 @@ class TestRetinaNet(unittest.TestCase):
...
@@ -29,30 +29,26 @@ class TestRetinaNet(unittest.TestCase):
def
test_context
(
self
):
def
test_context
(
self
):
with
fluid
.
program_guard
(
self
.
test_prog
):
with
fluid
.
program_guard
(
self
.
test_prog
):
image
=
fluid
.
layers
.
data
(
name
=
'image'
,
shape
=
[
3
,
608
,
608
],
dtype
=
'float32'
)
inputs
,
outputs
,
program
=
self
.
retinanet
.
context
(
inputs
,
outputs
,
program
=
self
.
retinanet
.
context
(
input_image
=
image
,
pretrained
=
False
,
trainable
=
True
)
pretrained
=
False
,
trainable
=
True
,
param_prefix
=
'BaiDu'
)
image
=
inputs
[
"image"
]
image
=
inputs
[
"image"
]
im_info
=
inputs
[
"im_info"
]
im_info
=
inputs
[
"im_info"
]
def
test_object_detection
(
self
):
def
test_object_detection
(
self
):
with
fluid
.
program_guard
(
self
.
test_prog
):
with
fluid
.
program_guard
(
self
.
test_prog
):
image_dir
=
'../image_dataset/'
image_dir
=
'../image_dataset/
object_detection
'
zebra
=
cv2
.
imread
(
os
.
path
.
join
(
image_dir
,
zebra
=
cv2
.
imread
(
os
.
path
.
join
(
image_dir
,
'zebra.jpg'
)).
astype
(
'float32'
)
'zebra.jpg'
)).
astype
(
'float32'
)
zebra
=
np
.
array
([
zebra
,
zebra
])
zebra
s
=
[
zebra
,
zebra
]
detection_results
=
self
.
retinanet
.
object_detection
(
detection_results
=
self
.
retinanet
.
object_detection
(
paths
=
[
paths
=
[
os
.
path
.
join
(
image_dir
,
'cat.jpg'
),
os
.
path
.
join
(
image_dir
,
'cat.jpg'
),
os
.
path
.
join
(
image_dir
,
'dog.jpg'
),
os
.
path
.
join
(
image_dir
,
'dog.jpg'
),
os
.
path
.
join
(
image_dir
,
'giraffe.jpg'
)
os
.
path
.
join
(
image_dir
,
'giraffe.jpg'
)
],
],
images
=
zebra
,
images
=
zebras
,
batch_size
=
2
)
batch_size
=
2
,
use_gpu
=
True
)
print
(
detection_results
)
print
(
detection_results
)
...
...
hub_module/tests/unittests/test_ssd_mobilenet_v1_pascal.py
浏览文件 @
3acfe6bd
...
@@ -7,6 +7,8 @@ import numpy as np
...
@@ -7,6 +7,8 @@ import numpy as np
import
paddle.fluid
as
fluid
import
paddle.fluid
as
fluid
import
paddlehub
as
hub
import
paddlehub
as
hub
image_dir
=
'../image_dataset/object_detection/pascal_voc/'
class
TestSSDMobileNet
(
unittest
.
TestCase
):
class
TestSSDMobileNet
(
unittest
.
TestCase
):
@
classmethod
@
classmethod
...
@@ -29,22 +31,21 @@ class TestSSDMobileNet(unittest.TestCase):
...
@@ -29,22 +31,21 @@ class TestSSDMobileNet(unittest.TestCase):
def
test_context
(
self
):
def
test_context
(
self
):
with
fluid
.
program_guard
(
self
.
test_prog
):
with
fluid
.
program_guard
(
self
.
test_prog
):
image
=
fluid
.
layers
.
data
(
get_prediction
=
True
name
=
'image'
,
shape
=
[
3
,
300
,
300
],
dtype
=
'float32'
)
inputs
,
outputs
,
program
=
self
.
ssd
.
context
(
inputs
,
outputs
,
program
=
self
.
ssd
.
context
(
input_image
=
image
,
pretrained
=
True
,
trainable
=
True
,
get_prediction
=
get_prediction
)
pretrained
=
False
,
trainable
=
True
,
param_prefix
=
'BaiDu'
)
image
=
inputs
[
"image"
]
image
=
inputs
[
"image"
]
head_features
=
outputs
[
"body_feats"
]
im_size
=
inputs
[
"im_size"
]
if
get_prediction
:
bbox_out
=
outputs
[
'bbox_out'
]
else
:
body_features
=
outputs
[
'body_features'
]
def
test_object_detection
(
self
):
def
test_object_detection
(
self
):
with
fluid
.
program_guard
(
self
.
test_prog
):
with
fluid
.
program_guard
(
self
.
test_prog
):
image_dir
=
'../image_dataset/pascal_voc/'
airplane
=
cv2
.
imread
(
os
.
path
.
join
(
airplane
=
cv2
.
imread
(
os
.
path
.
join
(
image_dir
,
'airplane.jpg'
)).
astype
(
'float32'
)
image_dir
,
'airplane.jpg'
)).
astype
(
'float32'
)
airplanes
=
np
.
array
([
airplane
,
airplane
])
airplanes
=
[
airplane
,
airplane
]
detection_results
=
self
.
ssd
.
object_detection
(
detection_results
=
self
.
ssd
.
object_detection
(
paths
=
[
paths
=
[
os
.
path
.
join
(
image_dir
,
'bird.jpg'
),
os
.
path
.
join
(
image_dir
,
'bird.jpg'
),
...
@@ -54,7 +55,7 @@ class TestSSDMobileNet(unittest.TestCase):
...
@@ -54,7 +55,7 @@ class TestSSDMobileNet(unittest.TestCase):
os
.
path
.
join
(
image_dir
,
'train.jpg'
)
os
.
path
.
join
(
image_dir
,
'train.jpg'
)
],
],
images
=
airplanes
,
images
=
airplanes
,
batch_size
=
2
)
batch_size
=
1
)
print
(
detection_results
)
print
(
detection_results
)
...
...
hub_module/tests/unittests/test_ssd_vgg16_300_coco2017.py
浏览文件 @
3acfe6bd
...
@@ -7,8 +7,10 @@ import numpy as np
...
@@ -7,8 +7,10 @@ import numpy as np
import
paddle.fluid
as
fluid
import
paddle.fluid
as
fluid
import
paddlehub
as
hub
import
paddlehub
as
hub
image_dir
=
'../image_dataset/object_detection/'
class
TestSSDVGG
(
unittest
.
TestCase
):
class
TestSSDVGG300
(
unittest
.
TestCase
):
@
classmethod
@
classmethod
def
setUpClass
(
self
):
def
setUpClass
(
self
):
"""Prepare the environment once before execution of all tests."""
"""Prepare the environment once before execution of all tests."""
...
@@ -29,28 +31,27 @@ class TestSSDVGG(unittest.TestCase):
...
@@ -29,28 +31,27 @@ class TestSSDVGG(unittest.TestCase):
def
test_context
(
self
):
def
test_context
(
self
):
with
fluid
.
program_guard
(
self
.
test_prog
):
with
fluid
.
program_guard
(
self
.
test_prog
):
image
=
fluid
.
layers
.
data
(
get_prediction
=
True
name
=
'image'
,
shape
=
[
3
,
300
,
300
],
dtype
=
'float32'
)
inputs
,
outputs
,
program
=
self
.
ssd
.
context
(
inputs
,
outputs
,
program
=
self
.
ssd
.
context
(
input_image
=
image
,
pretrained
=
True
,
trainable
=
True
,
get_prediction
=
get_prediction
)
pretrained
=
False
,
trainable
=
True
,
param_prefix
=
'BaiDu'
)
image
=
inputs
[
"image"
]
image
=
inputs
[
"image"
]
head_features
=
outputs
[
"body_feats"
]
im_size
=
inputs
[
"im_size"
]
if
get_prediction
:
bbox_out
=
outputs
[
'bbox_out'
]
else
:
body_features
=
outputs
[
'body_features'
]
def
test_object_detection
(
self
):
def
test_object_detection
(
self
):
with
fluid
.
program_guard
(
self
.
test_prog
):
with
fluid
.
program_guard
(
self
.
test_prog
):
image_dir
=
'../image_dataset/'
zebra
=
cv2
.
imread
(
os
.
path
.
join
(
image_dir
,
zebra
=
cv2
.
imread
(
os
.
path
.
join
(
image_dir
,
'zebra.jpg'
)).
astype
(
'float32'
)
'zebra.jpg'
)).
astype
(
'float32'
)
zebra
=
np
.
array
([
zebra
,
zebra
])
zebra
s
=
[
zebra
,
zebra
]
## only paths
## only paths
print
(
print
(
self
.
ssd
.
object_detection
(
self
.
ssd
.
object_detection
(
paths
=
[
os
.
path
.
join
(
image_dir
,
'cat.jpg'
)]))
paths
=
[
os
.
path
.
join
(
image_dir
,
'cat.jpg'
)]))
## only images
## only images
print
(
self
.
ssd
.
object_detection
(
images
=
zebra
))
print
(
self
.
ssd
.
object_detection
(
images
=
zebra
s
))
## paths and images
## paths and images
print
(
print
(
self
.
ssd
.
object_detection
(
self
.
ssd
.
object_detection
(
...
@@ -59,14 +60,14 @@ class TestSSDVGG(unittest.TestCase):
...
@@ -59,14 +60,14 @@ class TestSSDVGG(unittest.TestCase):
os
.
path
.
join
(
image_dir
,
'dog.jpg'
),
os
.
path
.
join
(
image_dir
,
'dog.jpg'
),
os
.
path
.
join
(
image_dir
,
'giraffe.jpg'
)
os
.
path
.
join
(
image_dir
,
'giraffe.jpg'
)
],
],
images
=
zebra
,
images
=
zebra
s
,
batch_size
=
2
,
batch_size
=
2
,
score_thresh
=
0.5
))
score_thresh
=
0.5
))
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
suite
=
unittest
.
TestSuite
()
suite
=
unittest
.
TestSuite
()
suite
.
addTest
(
TestSSDVGG
(
'test_object_detection'
))
suite
.
addTest
(
TestSSDVGG
300
(
'test_object_detection'
))
suite
.
addTest
(
TestSSDVGG
(
'test_context'
))
suite
.
addTest
(
TestSSDVGG
300
(
'test_context'
))
runner
=
unittest
.
TextTestRunner
(
verbosity
=
2
)
runner
=
unittest
.
TextTestRunner
(
verbosity
=
2
)
runner
.
run
(
suite
)
runner
.
run
(
suite
)
hub_module/tests/unittests/test_ssd_vgg16_512_coco2017.py
浏览文件 @
3acfe6bd
...
@@ -7,8 +7,10 @@ import numpy as np
...
@@ -7,8 +7,10 @@ import numpy as np
import
paddle.fluid
as
fluid
import
paddle.fluid
as
fluid
import
paddlehub
as
hub
import
paddlehub
as
hub
image_dir
=
'../image_dataset/object_detection/'
class
TestSSDVGG
(
unittest
.
TestCase
):
class
TestSSDVGG512
(
unittest
.
TestCase
):
@
classmethod
@
classmethod
def
setUpClass
(
self
):
def
setUpClass
(
self
):
"""Prepare the environment once before execution of all tests."""
"""Prepare the environment once before execution of all tests."""
...
@@ -29,28 +31,27 @@ class TestSSDVGG(unittest.TestCase):
...
@@ -29,28 +31,27 @@ class TestSSDVGG(unittest.TestCase):
def
test_context
(
self
):
def
test_context
(
self
):
with
fluid
.
program_guard
(
self
.
test_prog
):
with
fluid
.
program_guard
(
self
.
test_prog
):
image
=
fluid
.
layers
.
data
(
get_prediction
=
True
name
=
'image'
,
shape
=
[
3
,
512
,
512
],
dtype
=
'float32'
)
inputs
,
outputs
,
program
=
self
.
ssd
.
context
(
inputs
,
outputs
,
program
=
self
.
ssd
.
context
(
input_image
=
image
,
pretrained
=
True
,
trainable
=
True
,
get_prediction
=
get_prediction
)
pretrained
=
False
,
trainable
=
True
,
param_prefix
=
'BaiDu'
)
image
=
inputs
[
"image"
]
image
=
inputs
[
"image"
]
head_features
=
outputs
[
"body_feats"
]
im_size
=
inputs
[
"im_size"
]
if
get_prediction
:
bbox_out
=
outputs
[
'bbox_out'
]
else
:
body_features
=
outputs
[
'body_features'
]
def
test_object_detection
(
self
):
def
test_object_detection
(
self
):
with
fluid
.
program_guard
(
self
.
test_prog
):
with
fluid
.
program_guard
(
self
.
test_prog
):
image_dir
=
'../image_dataset/'
zebra
=
cv2
.
imread
(
os
.
path
.
join
(
image_dir
,
zebra
=
cv2
.
imread
(
os
.
path
.
join
(
image_dir
,
'zebra.jpg'
)).
astype
(
'float32'
)
'zebra.jpg'
)).
astype
(
'float32'
)
zebra
=
np
.
array
([
zebra
,
zebra
])
zebra
s
=
[
zebra
,
zebra
]
## only paths
## only paths
print
(
print
(
self
.
ssd
.
object_detection
(
self
.
ssd
.
object_detection
(
paths
=
[
os
.
path
.
join
(
image_dir
,
'cat.jpg'
)]))
paths
=
[
os
.
path
.
join
(
image_dir
,
'cat.jpg'
)]))
## only images
## only images
print
(
self
.
ssd
.
object_detection
(
images
=
zebra
))
print
(
self
.
ssd
.
object_detection
(
images
=
zebra
s
))
## paths and images
## paths and images
print
(
print
(
self
.
ssd
.
object_detection
(
self
.
ssd
.
object_detection
(
...
@@ -59,14 +60,14 @@ class TestSSDVGG(unittest.TestCase):
...
@@ -59,14 +60,14 @@ class TestSSDVGG(unittest.TestCase):
os
.
path
.
join
(
image_dir
,
'dog.jpg'
),
os
.
path
.
join
(
image_dir
,
'dog.jpg'
),
os
.
path
.
join
(
image_dir
,
'giraffe.jpg'
)
os
.
path
.
join
(
image_dir
,
'giraffe.jpg'
)
],
],
images
=
zebra
,
images
=
zebra
s
,
batch_size
=
2
,
batch_size
=
2
,
score_thresh
=
0.5
))
score_thresh
=
0.5
))
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
suite
=
unittest
.
TestSuite
()
suite
=
unittest
.
TestSuite
()
suite
.
addTest
(
TestSSDVGG
(
'test_object_detection'
))
suite
.
addTest
(
TestSSDVGG
512
(
'test_object_detection'
))
suite
.
addTest
(
TestSSDVGG
(
'test_context'
))
suite
.
addTest
(
TestSSDVGG
512
(
'test_context'
))
runner
=
unittest
.
TextTestRunner
(
verbosity
=
2
)
runner
=
unittest
.
TextTestRunner
(
verbosity
=
2
)
runner
.
run
(
suite
)
runner
.
run
(
suite
)
hub_module/tests/unittests/test_yolov3_darknet53.py
→
hub_module/tests/unittests/test_yolov3_darknet53
_coco2017
.py
浏览文件 @
3acfe6bd
...
@@ -31,15 +31,15 @@ class TestYoloV3DarkNet53(unittest.TestCase):
...
@@ -31,15 +31,15 @@ class TestYoloV3DarkNet53(unittest.TestCase):
def
test_context
(
self
):
def
test_context
(
self
):
with
fluid
.
program_guard
(
self
.
test_prog
):
with
fluid
.
program_guard
(
self
.
test_prog
):
image
=
fluid
.
layers
.
data
(
get_prediction
=
True
name
=
'image'
,
shape
=
[
3
,
608
,
608
],
dtype
=
'float32'
)
inputs
,
outputs
,
program
=
self
.
yolov3
.
context
(
inputs
,
outputs
,
program
=
self
.
yolov3
.
context
(
input_image
=
image
,
pretrained
=
True
,
trainable
=
True
,
get_prediction
=
get_prediction
)
pretrained
=
False
,
trainable
=
True
,
param_prefix
=
'BaiDu'
)
image
=
inputs
[
"image"
]
image
=
inputs
[
"image"
]
im_size
=
inputs
[
"im_size"
]
im_size
=
inputs
[
"im_size"
]
if
get_prediction
:
bbox_out
=
outputs
[
'bbox_out'
]
else
:
head_features
=
outputs
[
'head_features'
]
def
test_object_detection
(
self
):
def
test_object_detection
(
self
):
with
fluid
.
program_guard
(
self
.
test_prog
):
with
fluid
.
program_guard
(
self
.
test_prog
):
...
...
hub_module/tests/unittests/test_yolov3_mobilenet_v1.py
→
hub_module/tests/unittests/test_yolov3_mobilenet_v1
_coco2017
.py
浏览文件 @
3acfe6bd
...
@@ -7,6 +7,8 @@ import numpy as np
...
@@ -7,6 +7,8 @@ import numpy as np
import
paddle.fluid
as
fluid
import
paddle.fluid
as
fluid
import
paddlehub
as
hub
import
paddlehub
as
hub
image_dir
=
'../image_dataset/object_detection/'
class
TestYoloV3MoobileNetV1
(
unittest
.
TestCase
):
class
TestYoloV3MoobileNetV1
(
unittest
.
TestCase
):
@
classmethod
@
classmethod
...
@@ -29,30 +31,29 @@ class TestYoloV3MoobileNetV1(unittest.TestCase):
...
@@ -29,30 +31,29 @@ class TestYoloV3MoobileNetV1(unittest.TestCase):
def
test_context
(
self
):
def
test_context
(
self
):
with
fluid
.
program_guard
(
self
.
test_prog
):
with
fluid
.
program_guard
(
self
.
test_prog
):
image
=
fluid
.
layers
.
data
(
get_prediction
=
True
name
=
'image'
,
shape
=
[
3
,
608
,
608
],
dtype
=
'float32'
)
inputs
,
outputs
,
program
=
self
.
yolov3
.
context
(
inputs
,
outputs
,
program
=
self
.
yolov3
.
context
(
input_image
=
image
,
pretrained
=
True
,
trainable
=
True
,
get_prediction
=
get_prediction
)
pretrained
=
False
,
trainable
=
True
,
param_prefix
=
'BaiDu'
)
image
=
inputs
[
"image"
]
image
=
inputs
[
"image"
]
im_size
=
inputs
[
"im_size"
]
im_size
=
inputs
[
"im_size"
]
if
get_prediction
:
bbox_out
=
outputs
[
'bbox_out'
]
else
:
head_features
=
outputs
[
'head_features'
]
def
test_object_detection
(
self
):
def
test_object_detection
(
self
):
with
fluid
.
program_guard
(
self
.
test_prog
):
with
fluid
.
program_guard
(
self
.
test_prog
):
image_dir
=
'../image_dataset/'
zebra
=
cv2
.
imread
(
os
.
path
.
join
(
image_dir
,
zebra
=
cv2
.
imread
(
os
.
path
.
join
(
image_dir
,
'zebra.jpg'
)).
astype
(
'float32'
)
'zebra.jpg'
)).
astype
(
'float32'
)
zebra
=
np
.
array
([
zebra
,
zebra
])
zebra
s
=
[
zebra
,
zebra
]
detection_results
=
self
.
yolov3
.
object_detection
(
detection_results
=
self
.
yolov3
.
object_detection
(
paths
=
[
paths
=
[
os
.
path
.
join
(
image_dir
,
'cat.jpg'
),
os
.
path
.
join
(
image_dir
,
'cat.jpg'
),
os
.
path
.
join
(
image_dir
,
'dog.jpg'
),
os
.
path
.
join
(
image_dir
,
'dog.jpg'
),
os
.
path
.
join
(
image_dir
,
'giraffe.jpg'
)
os
.
path
.
join
(
image_dir
,
'giraffe.jpg'
)
],
],
images
=
zebra
,
images
=
zebra
s
,
batch_size
=
1
)
batch_size
=
2
)
print
(
detection_results
)
print
(
detection_results
)
...
...
hub_module/tests/unittests/test_yolov3_resnet34.py
→
hub_module/tests/unittests/test_yolov3_resnet34
_coco2017
.py
浏览文件 @
3acfe6bd
...
@@ -7,6 +7,8 @@ import numpy as np
...
@@ -7,6 +7,8 @@ import numpy as np
import
paddle.fluid
as
fluid
import
paddle.fluid
as
fluid
import
paddlehub
as
hub
import
paddlehub
as
hub
image_dir
=
'../image_dataset/object_detection/'
class
TestYoloV3ResNet34
(
unittest
.
TestCase
):
class
TestYoloV3ResNet34
(
unittest
.
TestCase
):
@
classmethod
@
classmethod
...
@@ -29,29 +31,28 @@ class TestYoloV3ResNet34(unittest.TestCase):
...
@@ -29,29 +31,28 @@ class TestYoloV3ResNet34(unittest.TestCase):
def
test_context
(
self
):
def
test_context
(
self
):
with
fluid
.
program_guard
(
self
.
test_prog
):
with
fluid
.
program_guard
(
self
.
test_prog
):
image
=
fluid
.
layers
.
data
(
get_prediction
=
True
name
=
'image'
,
shape
=
[
3
,
608
,
608
],
dtype
=
'float32'
)
inputs
,
outputs
,
program
=
self
.
yolov3
.
context
(
inputs
,
outputs
,
program
=
self
.
yolov3
.
context
(
input_image
=
image
,
pretrained
=
True
,
trainable
=
True
,
get_prediction
=
get_prediction
)
pretrained
=
False
,
trainable
=
True
,
param_prefix
=
'BaiDu'
)
image
=
inputs
[
"image"
]
image
=
inputs
[
"image"
]
im_size
=
inputs
[
"im_size"
]
im_size
=
inputs
[
"im_size"
]
if
get_prediction
:
bbox_out
=
outputs
[
'bbox_out'
]
else
:
head_features
=
outputs
[
'head_features'
]
def
test_object_detection
(
self
):
def
test_object_detection
(
self
):
with
fluid
.
program_guard
(
self
.
test_prog
):
with
fluid
.
program_guard
(
self
.
test_prog
):
image_dir
=
'../image_dataset/'
zebra
=
cv2
.
imread
(
os
.
path
.
join
(
image_dir
,
zebra
=
cv2
.
imread
(
os
.
path
.
join
(
image_dir
,
'zebra.jpg'
)).
astype
(
'float32'
)
'zebra.jpg'
)).
astype
(
'float32'
)
zebra
=
np
.
array
([
zebra
,
zebra
])
zebra
s
=
[
zebra
,
zebra
]
detection_results
=
self
.
yolov3
.
object_detection
(
detection_results
=
self
.
yolov3
.
object_detection
(
paths
=
[
paths
=
[
os
.
path
.
join
(
image_dir
,
'cat.jpg'
),
os
.
path
.
join
(
image_dir
,
'cat.jpg'
),
os
.
path
.
join
(
image_dir
,
'dog.jpg'
),
os
.
path
.
join
(
image_dir
,
'dog.jpg'
),
os
.
path
.
join
(
image_dir
,
'giraffe.jpg'
)
os
.
path
.
join
(
image_dir
,
'giraffe.jpg'
)
],
],
images
=
zebra
,
images
=
zebra
s
,
batch_size
=
2
)
batch_size
=
2
)
print
(
detection_results
)
print
(
detection_results
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录