未验证 提交 3acfe6bd 编写于 作者: D David Lin 提交者: GitHub

update 11 object detection modules. (#555)

* update 11 object detection modules.
上级 6a477596
# coding=utf-8
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from collections import OrderedDict
import paddle.fluid as fluid
import paddlehub as hub
from paddlehub.module.module import moduleinfo
from paddlehub.common.paddle_helper import add_vars_prefix
from faster_rcnn.data_feed import test_reader, padding_minibatch
from faster_rcnn.processor import load_label_info, postprocess
from faster_rcnn.bbox_head import MultiClassNMS, BBoxHead, SmoothL1Loss, TwoFCHead
from faster_rcnn.rpn_head import AnchorGenerator, RPNTargetAssign, GenerateProposals, RPNHead, FPNRPNHead
from faster_rcnn.bbox_assigner import BBoxAssigner
from faster_rcnn.roi_extractor import RoIAlign, FPNRoIAlign
@moduleinfo(
name="faster_rcnn",
version="1.0.0",
type="cv/object_detection",
summary="Baidu's Faster R-CNN model for object detection.",
author="paddlepaddle",
author_email="paddle-dev@baidu.com")
class FasterRCNNBase(hub.Module):
def _initialize(self):
# data_feed
self.test_reader = test_reader
self.padding_minibatch = padding_minibatch
# processor
self.load_label_info = load_label_info
self.postprocess = postprocess
# bbox_head
self.MultiClassNMS = MultiClassNMS
self.TwoFCHead = TwoFCHead
self.BBoxHead = BBoxHead
self.SmoothL1Loss = SmoothL1Loss
# rpn_head
self.AnchorGenerator = AnchorGenerator
self.RPNTargetAssign = RPNTargetAssign
self.GenerateProposals = GenerateProposals
self.RPNHead = RPNHead
self.FPNRPNHead = FPNRPNHead
# bbox_assigner
self.BBoxAssigner = BBoxAssigner
# roi_extractor
self.RoIAlign = RoIAlign
self.FPNRoIAlign = FPNRoIAlign
def context(self, body_feats, fpn, rpn_head, roi_extractor, bbox_head,
bbox_assigner, image, trainable, var_prefix, phase):
"""Distill the Head Features, so as to perform transfer learning.
:param body_feats: feature map of image classification to distill feature map.
:type body_feats: list
:param fpn: Feature Pyramid Network.
:type fpn: <class 'FPN' object>
:param rpn_head: Head of Region Proposal Network.
:type rpn_head: <class 'RPNHead' object> or <class 'FPNRPNHead' object>
:param roi_extractor:
:type roi_extractor:
:param bbox_head: Head of Bounding Box.
:type bbox_head: <class 'BBoxHead' object>
:param bbox_assigner: Parameters of fluid.layers.generate_proposal_labels.
:type bbox_assigner: <class 'BBoxAssigner' object>
:param image: image tensor.
:type image: <class 'paddle.fluid.framework.Variable'>
:param trainable: whether to set parameters trainable.
:type trainable: bool
:param var_prefix: the prefix of variables in faster_rcnn
:type var_prefix: str
:param phase: Optional Choice: 'predict', 'train'
:type phase: str
"""
context_prog = image.block.program
with fluid.program_guard(context_prog):
im_info = fluid.layers.data(
name='im_info', shape=[3], dtype='float32', lod_level=0)
im_shape = fluid.layers.data(
name='im_shape', shape=[3], dtype='float32', lod_level=0)
#body_feats = backbone(image)
body_feat_names = list(body_feats.keys())
# fpn
if fpn is not None:
body_feats, spatial_scale = fpn.get_output(body_feats)
# rpn_head: RPNHead
rois = rpn_head.get_proposals(body_feats, im_info, mode=phase)
# train
if phase == 'train':
gt_bbox = fluid.layers.data(
name='gt_bbox', shape=[4], dtype='float32', lod_level=1)
is_crowd = fluid.layers.data(
name='is_crowd', shape=[1], dtype='int32', lod_level=1)
gt_class = fluid.layers.data(
name='gt_class', shape=[1], dtype='int32', lod_level=1)
rpn_loss = rpn_head.get_loss(im_info, gt_bbox, is_crowd)
# bbox_assigner: BBoxAssigner
outs = fluid.layers.generate_proposal_labels(
rpn_rois=rois,
gt_classes=gt_class,
is_crowd=is_crowd,
gt_boxes=gt_bbox,
im_info=im_info,
batch_size_per_im=bbox_assigner.batch_size_per_im,
fg_fraction=bbox_assigner.fg_fraction,
fg_thresh=bbox_assigner.fg_thresh,
bg_thresh_hi=bbox_assigner.bg_thresh_hi,
bg_thresh_lo=bbox_assigner.bg_thresh_lo,
bbox_reg_weights=bbox_assigner.bbox_reg_weights,
class_nums=bbox_assigner.class_nums,
use_random=bbox_assigner.use_random)
rois = outs[0]
if fpn is None:
body_feat = body_feats[body_feat_names[-1]]
# roi_extractor: RoIAlign
roi_feat = fluid.layers.roi_align(
input=body_feat,
rois=rois,
pooled_height=roi_extractor.pooled_height,
pooled_width=roi_extractor.pooled_width,
spatial_scale=roi_extractor.spatial_scale,
sampling_ratio=roi_extractor.sampling_ratio)
else:
# roi_extractor: FPNRoIAlign
roi_feat = roi_extractor(
head_inputs=body_feats,
rois=rois,
spatial_scale=spatial_scale)
# head_feat
head_feat = bbox_head.head(roi_feat)
if isinstance(head_feat, OrderedDict):
head_feat = list(head_feat.values())[0]
if phase == 'train':
inputs = {
'image': var_prefix + image.name,
'im_info': var_prefix + im_info.name,
'im_shape': var_prefix + im_shape.name,
'gt_class': var_prefix + gt_class.name,
'gt_bbox': var_prefix + gt_bbox.name,
'is_crowd': var_prefix + is_crowd.name
}
outputs = {
'head_feat':
var_prefix + head_feat.name,
'rpn_cls_loss':
var_prefix + rpn_loss['rpn_cls_loss'].name,
'rpn_reg_loss':
var_prefix + rpn_loss['rpn_reg_loss'].name,
'generate_proposal_labels':
[var_prefix + var.name for var in outs]
}
elif phase == 'predict':
pred = bbox_head.get_prediction(roi_feat, rois, im_info,
im_shape)
inputs = {
'image': var_prefix + image.name,
'im_info': var_prefix + im_info.name,
'im_shape': var_prefix + im_shape.name
}
outputs = {
'head_feat': var_prefix + head_feat.name,
'rois': var_prefix + rois.name,
'bbox_out': var_prefix + pred.name
}
add_vars_prefix(context_prog, var_prefix)
add_vars_prefix(fluid.default_startup_program(), var_prefix)
global_vars = context_prog.global_block().vars
inputs = {key: global_vars[value] for key, value in inputs.items()}
outputs = {
key: global_vars[value] if not isinstance(value, list) else
[global_vars[var] for var in value]
for key, value in outputs.items()
}
place = fluid.CPUPlace()
exe = fluid.Executor(place)
for param in context_prog.global_block().iter_parameters():
param.trainable = trainable
return inputs, outputs, context_prog
```shell
$ hub install faster_rcnn_resnet50_coco2017==1.1.0
```
## 命令行预测
```
hub run faster_rcnn_resnet50_coco2017 --input_path "/PATH/TO/IMAGE"
```
## API
```
def context(num_classes=81,
trainable=True,
pretrained=True,
phase='train')
```
提取头部特征,用于迁移学习。
**参数**
* num\_classes (int): 类别数;
* trainable(bool): 将参数的trainable 属性设为trainable;
* pretrained (bool): 是否加载预训练模型;
* phase (str): 可选值为 'train'/'predict','trian' 用于训练,'predict' 用于预测。
**返回**
* inputs (dict): 模型的输入,keys 包括 'image', 'im\_size',相应的取值为:
* image (Variable): 图像变量
* im\_size (Variable): 图片的尺寸
* outputs (dict): 模型的输出。如果 get\_prediction 为 False,输出 'head\_fatures',否则输出 'bbox\_out'。
* context\_prog (Program): 用于迁移学习的 Program。
```python
def object_detection(paths=None,
images=None,
batch_size=1,
use_gpu=False,
output_dir='detection_result',
score_thresh=0.5,
visualization=True)
```
预测API,检测输入图片中的所有目标的位置。
**参数**
* paths (list\[str\]): 图片的路径;
* images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\],BGR格式;
* batch\_size (int): batch 的大小;
* use\_gpu (bool): 是否使用 GPU;
* score\_thresh (float): 识别置信度的阈值;
* visualization (bool): 是否将识别结果保存为图片文件;
* output\_dir (str): 图片的保存路径,默认设为 detection\_result;
**返回**
* res (list\[dict\]): 识别结果的列表,列表中每一个元素为 dict,各字段为:
* data (list): 检测结果,list的每一个元素为 dict,各字段为:
* confidence (float): 识别的置信度;
* label (str): 标签;
* left (int): 边界框的左上角x坐标;
* top (int): 边界框的左上角y坐标;
* right (int): 边界框的右下角x坐标;
* bottom (int): 边界框的右下角y坐标;
* save\_path (str, optional): 识别结果的保存路径 (仅当visualization=True时存在)。
```python
def save_inference_model(dirname,
model_filename=None,
params_filename=None,
combined=True)
```
将模型保存到指定路径。
**参数**
* dirname: 存在模型的目录名称
* model\_filename: 模型文件名称,默认为\_\_model\_\_
* params\_filename: 参数文件名称,默认为\_\_params\_\_(仅当`combined`为True时生效)
* combined: 是否将参数保存到统一的一个文件中
## 代码示例
```python
import paddlehub as hub
import cv2
object_detector = hub.Module(name="faster_rcnn_resnet50_coco2017")
result = object_detector.object_detection(images=[cv2.imread('/PATH/TO/IMAGE')])
# or
# result = object_detector.object_detection((paths=['/PATH/TO/IMAGE'])
```
## 服务部署
PaddleHub Serving 可以部署一个目标检测的在线服务。
## 第一步:启动PaddleHub Serving
运行启动命令:
```shell
$ hub serving start -m faster_rcnn_resnet50_coco2017
```
这样就完成了一个目标检测的服务化API的部署,默认端口号为8866。
**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。
## 第二步:发送预测请求
配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果
```python
import requests
import json
import cv2
import base64
def cv2_to_base64(image):
data = cv2.imencode('.jpg', image)[1]
return base64.b64encode(data.tostring()).decode('utf8')
# 发送HTTP请求
data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]}
headers = {"Content-type": "application/json"}
url = "http://127.0.0.1:8866/predict/faster_rcnn_resnet50_coco2017"
r = requests.post(url=url, headers=headers, data=json.dumps(data))
# 打印预测结果
print(r.json()["results"])
```
### 依赖
paddlepaddle >= 1.6.2
paddlehub >= 1.6.0
class BBoxAssigner(object):
def __init__(self,
batch_size_per_im=512,
fg_fraction=.25,
fg_thresh=.5,
bg_thresh_hi=.5,
bg_thresh_lo=0.,
bbox_reg_weights=[0.1, 0.1, 0.2, 0.2],
class_nums=81,
shuffle_before_sample=True):
super(BBoxAssigner, self).__init__()
self.batch_size_per_im = batch_size_per_im
self.fg_fraction = fg_fraction
self.fg_thresh = fg_thresh
self.bg_thresh_hi = bg_thresh_hi
self.bg_thresh_lo = bg_thresh_lo
self.bbox_reg_weights = bbox_reg_weights
self.class_nums = class_nums
self.use_random = shuffle_before_sample
# coding=utf-8
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from collections import OrderedDict
from paddle import fluid
from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.initializer import Normal, Xavier
from paddle.fluid.regularizer import L2Decay
from paddle.fluid.initializer import MSRA
class MultiClassNMS(object):
# __op__ = fluid.layers.multiclass_nms
def __init__(self,
score_threshold=.05,
nms_top_k=-1,
keep_top_k=100,
nms_threshold=.5,
normalized=False,
nms_eta=1.0,
background_label=0):
super(MultiClassNMS, self).__init__()
self.score_threshold = score_threshold
self.nms_top_k = nms_top_k
self.keep_top_k = keep_top_k
self.nms_threshold = nms_threshold
self.normalized = normalized
self.nms_eta = nms_eta
self.background_label = background_label
class SmoothL1Loss(object):
'''
Smooth L1 loss
Args:
sigma (float): hyper param in smooth l1 loss
'''
def __init__(self, sigma=1.0):
super(SmoothL1Loss, self).__init__()
self.sigma = sigma
def __call__(self, x, y, inside_weight=None, outside_weight=None):
return fluid.layers.smooth_l1(
x,
y,
inside_weight=inside_weight,
outside_weight=outside_weight,
sigma=self.sigma)
class BoxCoder(object):
def __init__(self,
prior_box_var=[0.1, 0.1, 0.2, 0.2],
code_type='decode_center_size',
box_normalized=False,
axis=1):
super(BoxCoder, self).__init__()
self.prior_box_var = prior_box_var
self.code_type = code_type
self.box_normalized = box_normalized
self.axis = axis
class TwoFCHead(object):
"""
RCNN head with two Fully Connected layers
Args:
mlp_dim (int): num of filters for the fc layers
"""
def __init__(self, mlp_dim=1024):
super(TwoFCHead, self).__init__()
self.mlp_dim = mlp_dim
def __call__(self, roi_feat):
fan = roi_feat.shape[1] * roi_feat.shape[2] * roi_feat.shape[3]
fc6 = fluid.layers.fc(
input=roi_feat,
size=self.mlp_dim,
act='relu',
name='fc6',
param_attr=ParamAttr(name='fc6_w', initializer=Xavier(fan_out=fan)),
bias_attr=ParamAttr(
name='fc6_b', learning_rate=2., regularizer=L2Decay(0.)))
head_feat = fluid.layers.fc(
input=fc6,
size=self.mlp_dim,
act='relu',
name='fc7',
param_attr=ParamAttr(name='fc7_w', initializer=Xavier()),
bias_attr=ParamAttr(
name='fc7_b', learning_rate=2., regularizer=L2Decay(0.)))
return head_feat
class BBoxHead(object):
"""
RCNN bbox head
Args:
head (object): the head module instance, e.g., `ResNetC5`, `TwoFCHead`
box_coder (object): `BoxCoder` instance
nms (object): `MultiClassNMS` instance
num_classes: number of output classes
"""
__inject__ = ['head', 'box_coder', 'nms', 'bbox_loss']
__shared__ = ['num_classes']
def __init__(self,
head,
box_coder=BoxCoder(),
nms=MultiClassNMS(),
bbox_loss=SmoothL1Loss(),
num_classes=81):
super(BBoxHead, self).__init__()
self.head = head
self.num_classes = num_classes
self.box_coder = box_coder
self.nms = nms
self.bbox_loss = bbox_loss
self.head_feat = None
def get_head_feat(self, input=None):
"""
Get the bbox head feature map.
"""
if input is not None:
feat = self.head(input)
if isinstance(feat, OrderedDict):
feat = list(feat.values())[0]
self.head_feat = feat
return self.head_feat
def _get_output(self, roi_feat):
"""
Get bbox head output.
Args:
roi_feat (Variable): RoI feature from RoIExtractor.
Returns:
cls_score(Variable): Output of rpn head with shape of
[N, num_anchors, H, W].
bbox_pred(Variable): Output of rpn head with shape of
[N, num_anchors * 4, H, W].
"""
head_feat = self.get_head_feat(roi_feat)
# when ResNetC5 output a single feature map
if not isinstance(self.head, TwoFCHead):
head_feat = fluid.layers.pool2d(
head_feat, pool_type='avg', global_pooling=True)
cls_score = fluid.layers.fc(
input=head_feat,
size=self.num_classes,
act=None,
name='cls_score',
param_attr=ParamAttr(
name='cls_score_w', initializer=Normal(loc=0.0, scale=0.01)),
bias_attr=ParamAttr(
name='cls_score_b', learning_rate=2., regularizer=L2Decay(0.)))
bbox_pred = fluid.layers.fc(
input=head_feat,
size=4 * self.num_classes,
act=None,
name='bbox_pred',
param_attr=ParamAttr(
name='bbox_pred_w', initializer=Normal(loc=0.0, scale=0.001)),
bias_attr=ParamAttr(
name='bbox_pred_b', learning_rate=2., regularizer=L2Decay(0.)))
return cls_score, bbox_pred
def get_loss(self, roi_feat, labels_int32, bbox_targets,
bbox_inside_weights, bbox_outside_weights):
"""
Get bbox_head loss.
Args:
roi_feat (Variable): RoI feature from RoIExtractor.
labels_int32(Variable): Class label of a RoI with shape [P, 1].
P is the number of RoI.
bbox_targets(Variable): Box label of a RoI with shape
[P, 4 * class_nums].
bbox_inside_weights(Variable): Indicates whether a box should
contribute to loss. Same shape as bbox_targets.
bbox_outside_weights(Variable): Indicates whether a box should
contribute to loss. Same shape as bbox_targets.
Return:
Type: Dict
loss_cls(Variable): bbox_head loss.
loss_bbox(Variable): bbox_head loss.
"""
cls_score, bbox_pred = self._get_output(roi_feat)
labels_int64 = fluid.layers.cast(x=labels_int32, dtype='int64')
labels_int64.stop_gradient = True
loss_cls = fluid.layers.softmax_with_cross_entropy(
logits=cls_score, label=labels_int64, numeric_stable_mode=True)
loss_cls = fluid.layers.reduce_mean(loss_cls)
loss_bbox = self.bbox_loss(
x=bbox_pred,
y=bbox_targets,
inside_weight=bbox_inside_weights,
outside_weight=bbox_outside_weights)
loss_bbox = fluid.layers.reduce_mean(loss_bbox)
return {'loss_cls': loss_cls, 'loss_bbox': loss_bbox}
def get_prediction(self,
roi_feat,
rois,
im_info,
im_shape,
return_box_score=False):
"""
Get prediction bounding box in test stage.
Args:
roi_feat (Variable): RoI feature from RoIExtractor.
rois (Variable): Output of generate_proposals in rpn head.
im_info (Variable): A 2-D LoDTensor with shape [B, 3]. B is the
number of input images, each element consists of im_height,
im_width, im_scale.
im_shape (Variable): Actual shape of original image with shape
[B, 3]. B is the number of images, each element consists of
original_height, original_width, 1
Returns:
pred_result(Variable): Prediction result with shape [N, 6]. Each
row has 6 values: [label, confidence, xmin, ymin, xmax, ymax].
N is the total number of prediction.
"""
cls_score, bbox_pred = self._get_output(roi_feat)
im_scale = fluid.layers.slice(im_info, [1], starts=[2], ends=[3])
im_scale = fluid.layers.sequence_expand(im_scale, rois)
boxes = rois / im_scale
cls_prob = fluid.layers.softmax(cls_score, use_cudnn=False)
bbox_pred = fluid.layers.reshape(bbox_pred, (-1, self.num_classes, 4))
# self.box_coder
decoded_box = fluid.layers.box_coder(
prior_box=boxes,
target_box=bbox_pred,
prior_box_var=self.box_coder.prior_box_var,
code_type=self.box_coder.code_type,
box_normalized=self.box_coder.box_normalized,
axis=self.box_coder.axis)
cliped_box = fluid.layers.box_clip(input=decoded_box, im_info=im_shape)
if return_box_score:
return {'bbox': cliped_box, 'score': cls_prob}
# self.nms
pred_result = fluid.layers.multiclass_nms(
bboxes=cliped_box,
scores=cls_prob,
score_threshold=self.nms.score_threshold,
nms_top_k=self.nms.nms_top_k,
keep_top_k=self.nms.keep_top_k,
nms_threshold=self.nms.nms_threshold,
normalized=self.nms.normalized,
nms_eta=self.nms.nms_eta,
background_label=self.nms.background_label)
return pred_result
......@@ -15,14 +15,19 @@ __all__ = ['test_reader']
def test_reader(paths=None, images=None):
"""data generator
"""
data generator
Args:
paths (list[str]): paths to images.
images (list(numpy.ndarray)): data of images, shape of each is [H, W, C]
:param paths: path to images.
:type paths: list, each element is a str
:param images: data of images, [N, H, W, C]
:type images: numpy.ndarray
Yield:
res (dict): key contains 'image' and 'im_info', the corresponding values is:
image (numpy.ndarray): the image to be fed into network
im_info (numpy.ndarray): the info about the preprocessed.
"""
img_list = []
img_list = list()
if paths:
for img_path in paths:
assert os.path.isfile(
......@@ -87,9 +92,9 @@ def padding_minibatch(batch_data, coarsest_stride=0, use_padded_im_info=True):
else:
max_shape = max_shape_org.astype('int32')
padding_image = []
padding_info = []
padding_shape = []
padding_image = list()
padding_info = list()
padding_shape = list()
for data in batch_data:
im_c, im_h, im_w = data['image'].shape
......
......@@ -13,11 +13,18 @@ from math import ceil
import numpy as np
import paddle.fluid as fluid
import paddlehub as hub
from paddlehub.module.module import moduleinfo, runnable
from paddlehub.module.module import moduleinfo, runnable, serving
from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor
from paddlehub.io.parser import txt_parser
from paddlehub.common.paddle_helper import add_vars_prefix
from faster_rcnn_resnet50_coco2017.processor import load_label_info, postprocess, base64_to_cv2
from faster_rcnn_resnet50_coco2017.data_feed import test_reader, padding_minibatch
from faster_rcnn_resnet50_coco2017.resnet import ResNet, ResNetC5
from faster_rcnn_resnet50_coco2017.rpn_head import AnchorGenerator, RPNTargetAssign, GenerateProposals, RPNHead
from faster_rcnn_resnet50_coco2017.bbox_head import MultiClassNMS, BBoxHead, SmoothL1Loss
from faster_rcnn_resnet50_coco2017.bbox_assigner import BBoxAssigner
from faster_rcnn_resnet50_coco2017.roi_extractor import RoIAlign
@moduleinfo(
......@@ -27,17 +34,14 @@ from faster_rcnn_resnet50_coco2017.resnet import ResNet, ResNetC5
summary=
"Baidu's Faster R-CNN model for object detection with backbone ResNet50, trained with dataset COCO2017",
author="paddlepaddle",
author_email="paddle-dev@baidu.com")
author_email="")
class FasterRCNNResNet50(hub.Module):
def _initialize(self):
self.faster_rcnn = hub.Module(name="faster_rcnn")
# default pretrained model, Faster-RCNN with backbone ResNet50, shape of input tensor is [3, 800, 1333]
self.default_pretrained_model_path = os.path.join(
self.directory, "faster_rcnn_resnet50_model")
self.label_names = self.faster_rcnn.load_label_info(
self.label_names = load_label_info(
os.path.join(self.directory, "label_file.txt"))
self.infer_prog = None
self.bbox_out = None
self._set_config()
def _set_config(self):
......@@ -66,20 +70,23 @@ class FasterRCNNResNet50(hub.Module):
trainable=True,
pretrained=True,
phase='train'):
"""Distill the Head Features, so as to perform transfer learning.
:param trainable: whether to set parameters trainable.
:type trainable: bool
:param pretrained: whether to load default pretrained model.
:type pretrained: bool
:param param_prefix: the prefix of parameters in neural network.
:type param_prefix: str
:param phase: Optional Choice: 'predict', 'train'
:type phase: str
"""
wrapped_prog = fluid.Program()
Distill the Head Features, so as to perform transfer learning.
Args:
trainable (bool): whether to set parameters trainable.
pretrained (bool): whether to load default pretrained model.
get_prediction (bool): whether to get prediction.
phase (str): optional choices are 'train' and 'predict'.
Returns:
inputs (dict): the input variables.
outputs (dict): the output variables.
context_prog (Program): the program to execute transfer learning.
"""
context_prog = fluid.Program()
startup_program = fluid.Program()
with fluid.program_guard(wrapped_prog, startup_program):
with fluid.program_guard(context_prog, startup_program):
with fluid.unique_name.guard():
image = fluid.layers.data(
name='image', shape=[3, 800, 1333], dtype='float32')
......@@ -91,18 +98,106 @@ class FasterRCNNResNet50(hub.Module):
freeze_at=2)
body_feats = backbone(image)
# Base Class
inputs, outputs, context_prog = self.faster_rcnn.context(
body_feats=body_feats,
fpn=None,
rpn_head=self.rpn_head(),
roi_extractor=self.roi_extractor(),
bbox_head=self.bbox_head(num_classes),
bbox_assigner=self.bbox_assigner(num_classes),
image=image,
trainable=trainable,
var_prefix='@HUB_{}@'.format(self.name),
phase=phase)
# var_prefix
var_prefix = '@HUB_{}@'.format(self.name)
im_info = fluid.layers.data(
name='im_info', shape=[3], dtype='float32', lod_level=0)
im_shape = fluid.layers.data(
name='im_shape', shape=[3], dtype='float32', lod_level=0)
body_feat_names = list(body_feats.keys())
# rpn_head: RPNHead
rpn_head = self.rpn_head()
rois = rpn_head.get_proposals(body_feats, im_info, mode=phase)
# train
if phase == 'train':
gt_bbox = fluid.layers.data(
name='gt_bbox', shape=[4], dtype='float32', lod_level=1)
is_crowd = fluid.layers.data(
name='is_crowd', shape=[1], dtype='int32', lod_level=1)
gt_class = fluid.layers.data(
name='gt_class', shape=[1], dtype='int32', lod_level=1)
rpn_loss = rpn_head.get_loss(im_info, gt_bbox, is_crowd)
# bbox_assigner: BBoxAssigner
bbox_assigner = self.bbox_assigner(num_classes)
outs = fluid.layers.generate_proposal_labels(
rpn_rois=rois,
gt_classes=gt_class,
is_crowd=is_crowd,
gt_boxes=gt_bbox,
im_info=im_info,
batch_size_per_im=bbox_assigner.batch_size_per_im,
fg_fraction=bbox_assigner.fg_fraction,
fg_thresh=bbox_assigner.fg_thresh,
bg_thresh_hi=bbox_assigner.bg_thresh_hi,
bg_thresh_lo=bbox_assigner.bg_thresh_lo,
bbox_reg_weights=bbox_assigner.bbox_reg_weights,
class_nums=bbox_assigner.class_nums,
use_random=bbox_assigner.use_random)
rois = outs[0]
body_feat = body_feats[body_feat_names[-1]]
# roi_extractor: RoIAlign
roi_extractor = self.roi_extractor()
roi_feat = fluid.layers.roi_align(
input=body_feat,
rois=rois,
pooled_height=roi_extractor.pooled_height,
pooled_width=roi_extractor.pooled_width,
spatial_scale=roi_extractor.spatial_scale,
sampling_ratio=roi_extractor.sampling_ratio)
# head_feat
bbox_head = self.bbox_head(num_classes)
head_feat = bbox_head.head(roi_feat)
if isinstance(head_feat, OrderedDict):
head_feat = list(head_feat.values())[0]
if phase == 'train':
inputs = {
'image': var_prefix + image.name,
'im_info': var_prefix + im_info.name,
'im_shape': var_prefix + im_shape.name,
'gt_class': var_prefix + gt_class.name,
'gt_bbox': var_prefix + gt_bbox.name,
'is_crowd': var_prefix + is_crowd.name
}
outputs = {
'head_feat':
var_prefix + head_feat.name,
'rpn_cls_loss':
var_prefix + rpn_loss['rpn_cls_loss'].name,
'rpn_reg_loss':
var_prefix + rpn_loss['rpn_reg_loss'].name,
'generate_proposal_labels':
[var_prefix + var.name for var in outs]
}
elif phase == 'predict':
pred = bbox_head.get_prediction(roi_feat, rois, im_info,
im_shape)
inputs = {
'image': var_prefix + image.name,
'im_info': var_prefix + im_info.name,
'im_shape': var_prefix + im_shape.name
}
outputs = {
'head_feat': var_prefix + head_feat.name,
'rois': var_prefix + rois.name,
'bbox_out': var_prefix + pred.name
}
add_vars_prefix(context_prog, var_prefix)
add_vars_prefix(startup_program, var_prefix)
global_vars = context_prog.global_block().vars
inputs = {
key: global_vars[value]
for key, value in inputs.items()
}
outputs = {
key: global_vars[value] if not isinstance(value, list) else
[global_vars[var] for var in value]
for key, value in outputs.items()
}
for param in context_prog.global_block().iter_parameters():
param.trainable = trainable
place = fluid.CPUPlace()
exe = fluid.Executor(place)
......@@ -122,43 +217,42 @@ class FasterRCNNResNet50(hub.Module):
return inputs, outputs, context_prog
def rpn_head(self):
return self.faster_rcnn.RPNHead(
anchor_generator=self.faster_rcnn.AnchorGenerator(
return RPNHead(
anchor_generator=AnchorGenerator(
anchor_sizes=[32, 64, 128, 256, 512],
aspect_ratios=[0.5, 1.0, 2.0],
stride=[16.0, 16.0],
variance=[1.0, 1.0, 1.0, 1.0]),
rpn_target_assign=self.faster_rcnn.RPNTargetAssign(
rpn_target_assign=RPNTargetAssign(
rpn_batch_size_per_im=256,
rpn_fg_fraction=0.5,
rpn_negative_overlap=0.3,
rpn_positive_overlap=0.7,
rpn_straddle_thresh=0.0),
train_proposal=self.faster_rcnn.GenerateProposals(
train_proposal=GenerateProposals(
min_size=0.0,
nms_thresh=0.7,
post_nms_top_n=12000,
pre_nms_top_n=2000),
test_proposal=self.faster_rcnn.GenerateProposals(
test_proposal=GenerateProposals(
min_size=0.0,
nms_thresh=0.7,
post_nms_top_n=6000,
pre_nms_top_n=1000))
def roi_extractor(self):
return self.faster_rcnn.RoIAlign(
resolution=14, sampling_ratio=0, spatial_scale=0.0625)
return RoIAlign(resolution=14, sampling_ratio=0, spatial_scale=0.0625)
def bbox_head(self, num_classes):
return self.faster_rcnn.BBoxHead(
return BBoxHead(
head=ResNetC5(depth=50, norm_type='affine_channel'),
nms=self.faster_rcnn.MultiClassNMS(
nms=MultiClassNMS(
keep_top_k=100, nms_threshold=0.5, score_threshold=0.05),
bbox_loss=self.faster_rcnn.SmoothL1Loss(),
bbox_loss=SmoothL1Loss(),
num_classes=num_classes)
def bbox_assigner(self, num_classes):
return self.faster_rcnn.BBoxAssigner(
return BBoxAssigner(
batch_size_per_im=512,
bbox_reg_weights=[0.1, 0.1, 0.2, 0.2],
bg_thresh_hi=0.5,
......@@ -178,26 +272,32 @@ class FasterRCNNResNet50(hub.Module):
visualization=True):
"""API of Object Detection.
:param paths: the path of images.
:type paths: list, each element is correspond to the path of an image.
:param images: data of images, [N, H, W, C]
:type images: numpy.ndarray
:param use_gpu: whether to use gpu or not.
:type use_gpu: bool
:param batch_size: bathc size.
:type batch_size: int
:param output_dir: the directory to store the detection result.
:type output_dir: str
:param score_thresh: the threshold of detection confidence.
:type score_thresh: float
:param visualization: whether to draw box and save images.
:type visualization: bool
Args:
paths (list[str]): The paths of images.
images (list(numpy.ndarray)): images data, shape of each is [H, W, C]
batch_size (int): batch size.
use_gpu (bool): Whether to use gpu.
output_dir (str): The path to store output images.
visualization (bool): Whether to save image or not.
score_thresh (float): threshold for object detecion.
Returns:
res (list[dict]): The result of coco2017 detecion. keys include 'data', 'save_path', the corresponding value is:
data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is:
left (float): The X coordinate of the upper left corner of the bounding box;
top (float): The Y coordinate of the upper left corner of the bounding box;
right (float): The X coordinate of the lower right corner of the bounding box;
bottom (float): The Y coordinate of the lower right corner of the bounding box;
label (str): The label of detection result;
confidence (float): The confidence of detection result.
save_path (str, optional): The path to save output images.
"""
paths = paths if paths else list()
if data and 'image' in data:
paths = data['image'] if not paths else paths + data['image']
all_images = []
paths = paths if paths else []
for yield_return in self.faster_rcnn.test_reader(paths, images):
paths += data['image']
all_images = list()
for yield_return in test_reader(paths, images):
all_images.append(yield_return)
images_num = len(all_images)
......@@ -211,7 +311,8 @@ class FasterRCNNResNet50(hub.Module):
batch_data.append(all_images[handle_id + image_id])
except:
pass
padding_image, padding_info, padding_shape = self.faster_rcnn.padding_minibatch(
padding_image, padding_info, padding_shape = padding_minibatch(
batch_data)
padding_image_tensor = PaddleTensor(padding_image.copy())
padding_info_tensor = PaddleTensor(padding_info.copy())
......@@ -223,7 +324,7 @@ class FasterRCNNResNet50(hub.Module):
data_out = self.gpu_predictor.run(feed_list)
else:
data_out = self.cpu_predictor.run(feed_list)
output = self.faster_rcnn.postprocess(
output = postprocess(
paths=paths,
images=images,
data_out=data_out,
......@@ -275,6 +376,15 @@ class FasterRCNNResNet50(hub.Module):
input_data = txt_parser.parse(args.input_file, use_strip=True)
return input_data
@serving
def serving_method(self, images, **kwargs):
"""
Run as a service.
"""
images_decode = [base64_to_cv2(image) for image in images]
results = self.object_detection(images_decode, **kwargs)
return results
@runnable
def run_cmd(self, argvs):
self.parser = argparse.ArgumentParser(
......
# coding=utf-8
import base64
import os
import cv2
import numpy as np
from PIL import Image, ImageDraw
__all__ = [
'get_save_image_name', 'draw_bounding_box_on_image', 'clip_bbox',
'load_label_info'
'base64_to_cv2',
'load_label_info',
'postprocess',
]
def base64_to_cv2(b64str):
data = base64.b64decode(b64str.encode('utf8'))
data = np.fromstring(data, np.uint8)
data = cv2.imdecode(data, cv2.IMREAD_COLOR)
return data
def get_save_image_name(img, output_dir, image_path):
"""Get save image name from source image path.
"""
......@@ -90,24 +100,29 @@ def postprocess(paths,
output_dir,
handle_id,
visualization=True):
"""postprocess the lod_tensor produced by fluid.Executor.run
:param paths: the path of images.
:type paths: list, each element is a str
:param images: data of images, [N, H, W, C]
:type images: numpy.ndarray
:param data_out: data produced by executor.run
:type data_out: lod_tensor
:param score_thresh: the low limit of bounding box.
:type score_thresh: float
:param label_names: label names
:type label_names: list
:param output_dir: output directory.
:type output_dir: str
:param handle_id: The number of images that have been handled.
:type handle_id: int
:param visualization: whether to draw bbox and save images.
:param visualization: bool
"""
postprocess the lod_tensor produced by fluid.Executor.run
Args:
paths (list[str]): the path of images.
images (list(numpy.ndarray)): list of images, shape of each is [H, W, C].
data_out (lod_tensor): data produced by executor.run.
score_thresh (float): the low limit of bounding box.
label_names (list[str]): label names.
output_dir (str): output directory.
handle_id (int): The number of images that have been handled.
visualization (bool): whether to save as images.
Returns:
res (list[dict]): The result of vehicles detecion. keys include 'data', 'save_path', the corresponding value is:
data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is:
left (float): The X coordinate of the upper left corner of the bounding box;
top (float): The Y coordinate of the upper left corner of the bounding box;
right (float): The X coordinate of the lower right corner of the bounding box;
bottom (float): The Y coordinate of the lower right corner of the bounding box;
label (str): The label of detection result;
confidence (float): The confidence of detection result.
save_path (str): The path to save output images.
"""
lod_tensor = data_out[0]
lod = lod_tensor.lod[0]
......
# coding=utf-8
__all__ = ['RoIAlign']
class RoIAlign(object):
def __init__(self, resolution=7, spatial_scale=0.0625, sampling_ratio=0):
super(RoIAlign, self).__init__()
if isinstance(resolution, int):
resolution = [resolution, resolution]
self.pooled_height = resolution[0]
self.pooled_width = resolution[1]
self.spatial_scale = spatial_scale
self.sampling_ratio = sampling_ratio
# coding=utf-8
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from paddle import fluid
from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.initializer import Normal
from paddle.fluid.regularizer import L2Decay
__all__ = ['AnchorGenerator', 'RPNTargetAssign', 'GenerateProposals', 'RPNHead']
class AnchorGenerator(object):
# __op__ = fluid.layers.anchor_generator
def __init__(self,
stride=[16.0, 16.0],
anchor_sizes=[32, 64, 128, 256, 512],
aspect_ratios=[0.5, 1., 2.],
variance=[1., 1., 1., 1.]):
super(AnchorGenerator, self).__init__()
self.anchor_sizes = anchor_sizes
self.aspect_ratios = aspect_ratios
self.variance = variance
self.stride = stride
class RPNTargetAssign(object):
# __op__ = fluid.layers.rpn_target_assign
def __init__(self,
rpn_batch_size_per_im=256,
rpn_straddle_thresh=0.,
rpn_fg_fraction=0.5,
rpn_positive_overlap=0.7,
rpn_negative_overlap=0.3,
use_random=True):
super(RPNTargetAssign, self).__init__()
self.rpn_batch_size_per_im = rpn_batch_size_per_im
self.rpn_straddle_thresh = rpn_straddle_thresh
self.rpn_fg_fraction = rpn_fg_fraction
self.rpn_positive_overlap = rpn_positive_overlap
self.rpn_negative_overlap = rpn_negative_overlap
self.use_random = use_random
class GenerateProposals(object):
# __op__ = fluid.layers.generate_proposals
def __init__(self,
pre_nms_top_n=6000,
post_nms_top_n=1000,
nms_thresh=.5,
min_size=.1,
eta=1.):
super(GenerateProposals, self).__init__()
self.pre_nms_top_n = pre_nms_top_n
self.post_nms_top_n = post_nms_top_n
self.nms_thresh = nms_thresh
self.min_size = min_size
self.eta = eta
class RPNHead(object):
"""
RPN Head
Args:
anchor_generator (object): `AnchorGenerator` instance
rpn_target_assign (object): `RPNTargetAssign` instance
train_proposal (object): `GenerateProposals` instance for training
test_proposal (object): `GenerateProposals` instance for testing
num_classes (int): number of classes in rpn output
"""
__inject__ = [
'anchor_generator', 'rpn_target_assign', 'train_proposal',
'test_proposal'
]
def __init__(self,
anchor_generator,
rpn_target_assign,
train_proposal,
test_proposal,
num_classes=1):
super(RPNHead, self).__init__()
self.anchor_generator = anchor_generator
self.rpn_target_assign = rpn_target_assign
self.train_proposal = train_proposal
self.test_proposal = test_proposal
self.num_classes = num_classes
def _get_output(self, input):
"""
Get anchor and RPN head output.
Args:
input(Variable): feature map from backbone with shape of [N, C, H, W]
Returns:
rpn_cls_score(Variable): Output of rpn head with shape of [N, num_anchors, H, W].
rpn_bbox_pred(Variable): Output of rpn head with shape of [N, num_anchors * 4, H, W].
"""
dim_out = input.shape[1]
rpn_conv = fluid.layers.conv2d(
input=input,
num_filters=dim_out,
filter_size=3,
stride=1,
padding=1,
act='relu',
name='conv_rpn',
param_attr=ParamAttr(
name="conv_rpn_w", initializer=Normal(loc=0., scale=0.01)),
bias_attr=ParamAttr(
name="conv_rpn_b", learning_rate=2., regularizer=L2Decay(0.)))
# Generate anchors self.anchor_generator
self.anchor, self.anchor_var = fluid.layers.anchor_generator(
input=rpn_conv,
anchor_sizes=self.anchor_generator.anchor_sizes,
aspect_ratios=self.anchor_generator.aspect_ratios,
variance=self.anchor_generator.variance,
stride=self.anchor_generator.stride)
num_anchor = self.anchor.shape[2]
# Proposal classification scores
self.rpn_cls_score = fluid.layers.conv2d(
rpn_conv,
num_filters=num_anchor * self.num_classes,
filter_size=1,
stride=1,
padding=0,
act=None,
name='rpn_cls_score',
param_attr=ParamAttr(
name="rpn_cls_logits_w", initializer=Normal(loc=0.,
scale=0.01)),
bias_attr=ParamAttr(
name="rpn_cls_logits_b",
learning_rate=2.,
regularizer=L2Decay(0.)))
# Proposal bbox regression deltas
self.rpn_bbox_pred = fluid.layers.conv2d(
rpn_conv,
num_filters=4 * num_anchor,
filter_size=1,
stride=1,
padding=0,
act=None,
name='rpn_bbox_pred',
param_attr=ParamAttr(
name="rpn_bbox_pred_w", initializer=Normal(loc=0., scale=0.01)),
bias_attr=ParamAttr(
name="rpn_bbox_pred_b",
learning_rate=2.,
regularizer=L2Decay(0.)))
return self.rpn_cls_score, self.rpn_bbox_pred
def get_proposals(self, body_feats, im_info, mode='train'):
"""
Get proposals according to the output of backbone.
Args:
body_feats (dict): The dictionary of feature maps from backbone.
im_info(Variable): The information of image with shape [N, 3] with
shape (height, width, scale).
body_feat_names(list): A list of names of feature maps from
backbone.
Returns:
rpn_rois(Variable): Output proposals with shape of (rois_num, 4).
"""
# In RPN Heads, only the last feature map of backbone is used.
# And body_feat_names[-1] represents the last level name of backbone.
body_feat = list(body_feats.values())[-1]
rpn_cls_score, rpn_bbox_pred = self._get_output(body_feat)
if self.num_classes == 1:
rpn_cls_prob = fluid.layers.sigmoid(
rpn_cls_score, name='rpn_cls_prob')
else:
rpn_cls_score = fluid.layers.transpose(
rpn_cls_score, perm=[0, 2, 3, 1])
rpn_cls_score = fluid.layers.reshape(
rpn_cls_score, shape=(0, 0, 0, -1, self.num_classes))
rpn_cls_prob_tmp = fluid.layers.softmax(
rpn_cls_score, use_cudnn=False, name='rpn_cls_prob')
rpn_cls_prob_slice = fluid.layers.slice(
rpn_cls_prob_tmp, axes=[4], starts=[1], ends=[self.num_classes])
rpn_cls_prob, _ = fluid.layers.topk(rpn_cls_prob_slice, 1)
rpn_cls_prob = fluid.layers.reshape(
rpn_cls_prob, shape=(0, 0, 0, -1))
rpn_cls_prob = fluid.layers.transpose(
rpn_cls_prob, perm=[0, 3, 1, 2])
prop_op = self.train_proposal if mode == 'train' else self.test_proposal
# prop_op
rpn_rois, rpn_roi_probs = fluid.layers.generate_proposals(
scores=rpn_cls_prob,
bbox_deltas=rpn_bbox_pred,
im_info=im_info,
anchors=self.anchor,
variances=self.anchor_var,
pre_nms_top_n=prop_op.pre_nms_top_n,
post_nms_top_n=prop_op.post_nms_top_n,
nms_thresh=prop_op.nms_thresh,
min_size=prop_op.min_size,
eta=prop_op.eta)
return rpn_rois
def _transform_input(self, rpn_cls_score, rpn_bbox_pred, anchor,
anchor_var):
rpn_cls_score = fluid.layers.transpose(rpn_cls_score, perm=[0, 2, 3, 1])
rpn_bbox_pred = fluid.layers.transpose(rpn_bbox_pred, perm=[0, 2, 3, 1])
anchor = fluid.layers.reshape(anchor, shape=(-1, 4))
anchor_var = fluid.layers.reshape(anchor_var, shape=(-1, 4))
rpn_cls_score = fluid.layers.reshape(
x=rpn_cls_score, shape=(0, -1, self.num_classes))
rpn_bbox_pred = fluid.layers.reshape(x=rpn_bbox_pred, shape=(0, -1, 4))
return rpn_cls_score, rpn_bbox_pred, anchor, anchor_var
def _get_loss_input(self):
for attr in ['rpn_cls_score', 'rpn_bbox_pred', 'anchor', 'anchor_var']:
if not getattr(self, attr, None):
raise ValueError("self.{} should not be None,".format(attr),
"call RPNHead.get_proposals first")
return self._transform_input(self.rpn_cls_score, self.rpn_bbox_pred,
self.anchor, self.anchor_var)
def get_loss(self, im_info, gt_box, is_crowd, gt_label=None):
"""
Sample proposals and Calculate rpn loss.
Args:
im_info(Variable): The information of image with shape [N, 3] with
shape (height, width, scale).
gt_box(Variable): The ground-truth bounding boxes with shape [M, 4].
M is the number of groundtruth.
is_crowd(Variable): Indicates groud-truth is crowd or not with
shape [M, 1]. M is the number of groundtruth.
Returns:
Type: dict
rpn_cls_loss(Variable): RPN classification loss.
rpn_bbox_loss(Variable): RPN bounding box regression loss.
"""
rpn_cls, rpn_bbox, anchor, anchor_var = self._get_loss_input()
if self.num_classes == 1:
# self.rpn_target_assign
score_pred, loc_pred, score_tgt, loc_tgt, bbox_weight = \
fluid.layers.rpn_target_assign(
bbox_pred=rpn_bbox,
cls_logits=rpn_cls,
anchor_box=anchor,
anchor_var=anchor_var,
gt_boxes=gt_box,
is_crowd=is_crowd,
im_info=im_info,
rpn_batch_size_per_im=self.rpn_target_assign.rpn_batch_size_per_im,
rpn_straddle_thresh=self.rpn_target_assign.rpn_straddle_thresh,
rpn_fg_fraction=self.rpn_target_assign.rpn_fg_fraction,
rpn_positive_overlap=self.rpn_target_assign.rpn_positive_overlap,
rpn_negative_overlap=self.rpn_target_assign.rpn_negative_overlap,
use_random=self.rpn_target_assign.use_random)
score_tgt = fluid.layers.cast(x=score_tgt, dtype='float32')
score_tgt.stop_gradient = True
rpn_cls_loss = fluid.layers.sigmoid_cross_entropy_with_logits(
x=score_pred, label=score_tgt)
else:
score_pred, loc_pred, score_tgt, loc_tgt, bbox_weight = \
self.rpn_target_assign(
bbox_pred=rpn_bbox,
cls_logits=rpn_cls,
anchor_box=anchor,
anchor_var=anchor_var,
gt_boxes=gt_box,
gt_labels=gt_label,
is_crowd=is_crowd,
num_classes=self.num_classes,
im_info=im_info)
labels_int64 = fluid.layers.cast(x=score_tgt, dtype='int64')
labels_int64.stop_gradient = True
rpn_cls_loss = fluid.layers.softmax_with_cross_entropy(
logits=score_pred, label=labels_int64, numeric_stable_mode=True)
rpn_cls_loss = fluid.layers.reduce_mean(
rpn_cls_loss, name='loss_rpn_cls')
loc_tgt = fluid.layers.cast(x=loc_tgt, dtype='float32')
loc_tgt.stop_gradient = True
rpn_reg_loss = fluid.layers.smooth_l1(
x=loc_pred,
y=loc_tgt,
sigma=3.0,
inside_weight=bbox_weight,
outside_weight=bbox_weight)
rpn_reg_loss = fluid.layers.reduce_sum(
rpn_reg_loss, name='loss_rpn_bbox')
score_shape = fluid.layers.shape(score_tgt)
score_shape = fluid.layers.cast(x=score_shape, dtype='float32')
norm = fluid.layers.reduce_prod(score_shape)
norm.stop_gradient = True
rpn_reg_loss = rpn_reg_loss / norm
return {'rpn_cls_loss': rpn_cls_loss, 'rpn_reg_loss': rpn_reg_loss}
```shell
$ hub install faster_rcnn_resnet50_fpn_coco2017==1.0.0
```
## 命令行预测
```
hub run faster_rcnn_resnet50_fpn_coco2017 --input_path "/PATH/TO/IMAGE"
```
## API
```
def context(num_classes=81,
trainable=True,
pretrained=True,
phase='train')
```
提取头部特征,用于迁移学习。
**参数**
* num\_classes (int): 类别数;
* trainable(bool): 将参数的trainable 属性设为trainable;
* pretrained (bool): 是否加载预训练模型;
* phase (str): 可选值为 'train'/'predict','trian' 用于训练,'predict' 用于预测。
**返回**
* inputs (dict): 模型的输入,keys 包括 'image', 'im\_size',相应的取值为:
* image (Variable): 图像变量
* im\_size (Variable): 图片的尺寸
* outputs (dict): 模型的输出。如果 get\_prediction 为 False,输出 'head\_fatures',否则输出 'bbox\_out'。
* context\_prog (Program): 用于迁移学习的 Program。
```python
def object_detection(paths=None,
images=None,
batch_size=1,
use_gpu=False,
output_dir='detection_result',
score_thresh=0.5,
visualization=True)
```
预测API,检测输入图片中的所有目标的位置。
**参数**
* paths (list\[str\]): 图片的路径;
* images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\],BGR格式;
* batch\_size (int): batch 的大小;
* use\_gpu (bool): 是否使用 GPU;
* score\_thresh (float): 识别置信度的阈值;
* visualization (bool): 是否将识别结果保存为图片文件;
* output\_dir (str): 图片的保存路径,默认设为 detection\_result;
**返回**
* res (list\[dict\]): 识别结果的列表,列表中每一个元素为 dict,各字段为:
* data (list): 检测结果,list的每一个元素为 dict,各字段为:
* confidence (float): 识别的置信度;
* label (str): 标签;
* left (int): 边界框的左上角x坐标;
* top (int): 边界框的左上角y坐标;
* right (int): 边界框的右下角x坐标;
* bottom (int): 边界框的右下角y坐标;
* save\_path (str, optional): 识别结果的保存路径 (仅当visualization=True时存在)。
```python
def save_inference_model(dirname,
model_filename=None,
params_filename=None,
combined=True)
```
将模型保存到指定路径。
**参数**
* dirname: 存在模型的目录名称
* model\_filename: 模型文件名称,默认为\_\_model\_\_
* params\_filename: 参数文件名称,默认为\_\_params\_\_(仅当`combined`为True时生效)
* combined: 是否将参数保存到统一的一个文件中
## 代码示例
```python
import paddlehub as hub
import cv2
object_detector = hub.Module(name="faster_rcnn_resnet50_fpn_coco2017")
result = object_detector.object_detection(images=[cv2.imread('/PATH/TO/IMAGE')])
# or
# result = object_detector.object_detection((paths=['/PATH/TO/IMAGE'])
```
## 服务部署
PaddleHub Serving 可以部署一个目标检测的在线服务。
## 第一步:启动PaddleHub Serving
运行启动命令:
```shell
$ hub serving start -m faster_rcnn_resnet50_fpn_coco2017
```
这样就完成了一个目标检测的服务化API的部署,默认端口号为8866。
**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。
## 第二步:发送预测请求
配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果
```python
import requests
import json
import cv2
import base64
def cv2_to_base64(image):
data = cv2.imencode('.jpg', image)[1]
return base64.b64encode(data.tostring()).decode('utf8')
# 发送HTTP请求
data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]}
headers = {"Content-type": "application/json"}
url = "http://127.0.0.1:8866/predict/faster_rcnn_resnet50_fpn_coco2017"
r = requests.post(url=url, headers=headers, data=json.dumps(data))
# 打印预测结果
print(r.json()["results"])
```
### 依赖
paddlepaddle >= 1.6.2
paddlehub >= 1.6.0
......@@ -53,7 +53,6 @@ class SmoothL1Loss(object):
class BoxCoder(object):
# __op__ = fluid.layers.box_coder
def __init__(self,
prior_box_var=[0.1, 0.1, 0.2, 0.2],
code_type='decode_center_size',
......
# coding=utf-8
from __future__ import absolute_import
from __future__ import print_function
from __future__ import division
import os
from collections import OrderedDict
import cv2
import numpy as np
from PIL import Image, ImageEnhance
from paddle import fluid
__all__ = ['test_reader']
def test_reader(paths=None, images=None):
"""
data generator
Args:
paths (list[str]): paths to images.
images (list(numpy.ndarray)): data of images, shape of each is [H, W, C]
Yield:
res (dict): key contains 'image', 'im_info', 'im_shape', the corresponding values is:
image (numpy.ndarray): the image to be fed into network
im_info (numpy.ndarray): the info about the preprocessed.
im_shape (numpy.ndarray): the shape of image.
"""
img_list = list()
if paths:
for img_path in paths:
assert os.path.isfile(
img_path), "The {} isn't a valid file path.".format(img_path)
img = cv2.imread(img_path).astype('float32')
img_list.append(img)
if images is not None:
for img in images:
img_list.append(img)
for im in img_list:
im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
im = im.astype(np.float32, copy=False)
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]
mean = np.array(mean)[np.newaxis, np.newaxis, :]
std = np.array(std)[np.newaxis, np.newaxis, :]
im = im / 255.0
im -= mean
im /= std
target_size = 800
max_size = 1333
shape = im.shape
# im_shape holds the original shape of image.
im_shape = np.array([shape[0], shape[1], 1.0]).astype('float32')
im_size_min = np.min(shape[0:2])
im_size_max = np.max(shape[0:2])
im_scale = float(target_size) / float(im_size_min)
if np.round(im_scale * im_size_max) > max_size:
im_scale = float(max_size) / float(im_size_max)
resize_w = np.round(im_scale * float(shape[1]))
resize_h = np.round(im_scale * float(shape[0]))
# im_info holds the resize info of image.
im_info = np.array([resize_h, resize_w, im_scale]).astype('float32')
im = cv2.resize(
im,
None,
None,
fx=im_scale,
fy=im_scale,
interpolation=cv2.INTER_LINEAR)
# HWC --> CHW
im = np.swapaxes(im, 1, 2)
im = np.swapaxes(im, 1, 0)
yield {'image': im, 'im_info': im_info, 'im_shape': im_shape}
def padding_minibatch(batch_data, coarsest_stride=0, use_padded_im_info=True):
max_shape_org = np.array(
[data['image'].shape for data in batch_data]).max(axis=0)
if coarsest_stride > 0:
max_shape = np.zeros((3)).astype('int32')
max_shape[1] = int(
np.ceil(max_shape_org[1] / coarsest_stride) * coarsest_stride)
max_shape[2] = int(
np.ceil(max_shape_org[2] / coarsest_stride) * coarsest_stride)
else:
max_shape = max_shape_org.astype('int32')
padding_image = list()
padding_info = list()
padding_shape = list()
for data in batch_data:
im_c, im_h, im_w = data['image'].shape
# image
padding_im = np.zeros((im_c, max_shape[1], max_shape[2]),
dtype=np.float32)
padding_im[:, 0:im_h, 0:im_w] = data['image']
padding_image.append(padding_im)
# im_info
data['im_info'][
0] = max_shape[1] if use_padded_im_info else max_shape_org[1]
data['im_info'][
1] = max_shape[2] if use_padded_im_info else max_shape_org[2]
padding_info.append(data['im_info'])
padding_shape.append(data['im_shape'])
padding_image = np.array(padding_image).astype('float32')
padding_info = np.array(padding_info).astype('float32')
padding_shape = np.array(padding_shape).astype('float32')
return padding_image, padding_info, padding_shape
......@@ -13,12 +13,19 @@ from math import ceil
import numpy as np
import paddle.fluid as fluid
import paddlehub as hub
from paddlehub.module.module import moduleinfo, runnable
from paddlehub.module.module import moduleinfo, runnable, serving
from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor
from paddlehub.io.parser import txt_parser
from paddlehub.common.paddle_helper import add_vars_prefix
from faster_rcnn_resnet50_fpn_coco2017.processor import load_label_info, postprocess, base64_to_cv2
from faster_rcnn_resnet50_fpn_coco2017.data_feed import test_reader, padding_minibatch
from faster_rcnn_resnet50_fpn_coco2017.fpn import FPN
from faster_rcnn_resnet50_fpn_coco2017.resnet import ResNet, ResNetC5
from faster_rcnn_resnet50_fpn_coco2017.resnet import ResNet
from faster_rcnn_resnet50_fpn_coco2017.rpn_head import AnchorGenerator, RPNTargetAssign, GenerateProposals, FPNRPNHead
from faster_rcnn_resnet50_fpn_coco2017.bbox_head import MultiClassNMS, BBoxHead, TwoFCHead
from faster_rcnn_resnet50_fpn_coco2017.bbox_assigner import BBoxAssigner
from faster_rcnn_resnet50_fpn_coco2017.roi_extractor import FPNRoIAlign
@moduleinfo(
......@@ -28,18 +35,15 @@ from faster_rcnn_resnet50_fpn_coco2017.resnet import ResNet, ResNetC5
summary=
"Baidu's Faster-RCNN model for object detection, whose backbone is ResNet50, processed with Feature Pyramid Networks",
author="paddlepaddle",
author_email="paddle-dev@baidu.com")
author_email="")
class FasterRCNNResNet50RPN(hub.Module):
def _initialize(self):
self.faster_rcnn = hub.Module(name="faster_rcnn")
# default pretrained model, Faster-RCNN with backbone ResNet50, shape of input tensor is [3, 800, 1333]
self.default_pretrained_model_path = os.path.join(
self.directory, "faster_rcnn_resnet50_fpn_model")
self.label_names = self.faster_rcnn.load_label_info(
self.label_names = load_label_info(
os.path.join(self.directory, "label_file.txt"))
self.infer_prog = None
self.bbox_out = None
self._set_config()
# self._set_config()
def _set_config(self):
"""
......@@ -67,18 +71,23 @@ class FasterRCNNResNet50RPN(hub.Module):
trainable=True,
pretrained=True,
phase='train'):
"""Distill the Head Features, so as to perform transfer learning.
:param trainable: whether to set parameters trainable.
:type trainable: bool
:param pretrained: whether to load default pretrained model.
:type pretrained: bool
:param phase: Optional Choice: 'predict', 'train'
:type phase: str
"""
wrapped_prog = fluid.Program()
Distill the Head Features, so as to perform transfer learning.
Args:
trainable (bool): whether to set parameters trainable.
pretrained (bool): whether to load default pretrained model.
get_prediction (bool): whether to get prediction.
phase (str): optional choices are 'train' and 'predict'.
Returns:
inputs (dict): the input variables.
outputs (dict): the output variables.
context_prog (Program): the program to execute transfer learning.
"""
context_prog = fluid.Program()
startup_program = fluid.Program()
with fluid.program_guard(wrapped_prog, startup_program):
with fluid.program_guard(context_prog, startup_program):
with fluid.unique_name.guard():
image = fluid.layers.data(
name='image', shape=[3, 800, 1333], dtype='float32')
......@@ -89,25 +98,107 @@ class FasterRCNNResNet50RPN(hub.Module):
feature_maps=[2, 3, 4, 5],
freeze_at=2)
body_feats = backbone(image)
# fpn: FPN
# fpn
fpn = FPN(
max_level=6,
min_level=2,
num_chan=256,
spatial_scale=[0.03125, 0.0625, 0.125, 0.25])
var_prefix = '@HUB_{}@'.format(self.name)
im_info = fluid.layers.data(
name='im_info', shape=[3], dtype='float32', lod_level=0)
im_shape = fluid.layers.data(
name='im_shape', shape=[3], dtype='float32', lod_level=0)
body_feat_names = list(body_feats.keys())
body_feats, spatial_scale = fpn.get_output(body_feats)
# rpn_head: RPNHead
rpn_head = self.rpn_head()
rois = rpn_head.get_proposals(body_feats, im_info, mode=phase)
# train
if phase == 'train':
gt_bbox = fluid.layers.data(
name='gt_bbox', shape=[4], dtype='float32', lod_level=1)
is_crowd = fluid.layers.data(
name='is_crowd', shape=[1], dtype='int32', lod_level=1)
gt_class = fluid.layers.data(
name='gt_class', shape=[1], dtype='int32', lod_level=1)
rpn_loss = rpn_head.get_loss(im_info, gt_bbox, is_crowd)
# bbox_assigner: BBoxAssigner
bbox_assigner = self.bbox_assigner(num_classes)
outs = fluid.layers.generate_proposal_labels(
rpn_rois=rois,
gt_classes=gt_class,
is_crowd=is_crowd,
gt_boxes=gt_bbox,
im_info=im_info,
batch_size_per_im=bbox_assigner.batch_size_per_im,
fg_fraction=bbox_assigner.fg_fraction,
fg_thresh=bbox_assigner.fg_thresh,
bg_thresh_hi=bbox_assigner.bg_thresh_hi,
bg_thresh_lo=bbox_assigner.bg_thresh_lo,
bbox_reg_weights=bbox_assigner.bbox_reg_weights,
class_nums=bbox_assigner.class_nums,
use_random=bbox_assigner.use_random)
rois = outs[0]
roi_extractor = self.roi_extractor()
roi_feat = roi_extractor(
head_inputs=body_feats,
rois=rois,
spatial_scale=spatial_scale)
# head_feat
bbox_head = self.bbox_head(num_classes)
head_feat = bbox_head.head(roi_feat)
if isinstance(head_feat, OrderedDict):
head_feat = list(head_feat.values())[0]
if phase == 'train':
inputs = {
'image': var_prefix + image.name,
'im_info': var_prefix + im_info.name,
'im_shape': var_prefix + im_shape.name,
'gt_class': var_prefix + gt_class.name,
'gt_bbox': var_prefix + gt_bbox.name,
'is_crowd': var_prefix + is_crowd.name
}
outputs = {
'head_feat':
var_prefix + head_feat.name,
'rpn_cls_loss':
var_prefix + rpn_loss['rpn_cls_loss'].name,
'rpn_reg_loss':
var_prefix + rpn_loss['rpn_reg_loss'].name,
'generate_proposal_labels':
[var_prefix + var.name for var in outs]
}
elif phase == 'predict':
pred = bbox_head.get_prediction(roi_feat, rois, im_info,
im_shape)
inputs = {
'image': var_prefix + image.name,
'im_info': var_prefix + im_info.name,
'im_shape': var_prefix + im_shape.name
}
outputs = {
'head_feat': var_prefix + head_feat.name,
'rois': var_prefix + rois.name,
'bbox_out': var_prefix + pred.name
}
add_vars_prefix(context_prog, var_prefix)
add_vars_prefix(startup_program, var_prefix)
# Base Class
inputs, outputs, context_prog = self.faster_rcnn.context(
body_feats=body_feats,
fpn=fpn,
rpn_head=self.rpn_head(),
roi_extractor=self.roi_extractor(),
bbox_head=self.bbox_head(num_classes),
bbox_assigner=self.bbox_assigner(num_classes),
image=image,
trainable=trainable,
var_prefix='@HUB_{}@'.format(self.name),
phase=phase)
global_vars = context_prog.global_block().vars
inputs = {
key: global_vars[value]
for key, value in inputs.items()
}
outputs = {
key: global_vars[value] if not isinstance(value, list) else
[global_vars[var] for var in value]
for key, value in outputs.items()
}
for param in context_prog.global_block().iter_parameters():
param.trainable = trainable
place = fluid.CPUPlace()
exe = fluid.Executor(place)
......@@ -127,24 +218,24 @@ class FasterRCNNResNet50RPN(hub.Module):
return inputs, outputs, context_prog
def rpn_head(self):
return self.faster_rcnn.FPNRPNHead(
anchor_generator=self.faster_rcnn.AnchorGenerator(
return FPNRPNHead(
anchor_generator=AnchorGenerator(
anchor_sizes=[32, 64, 128, 256, 512],
aspect_ratios=[0.5, 1.0, 2.0],
stride=[16.0, 16.0],
variance=[1.0, 1.0, 1.0, 1.0]),
rpn_target_assign=self.faster_rcnn.RPNTargetAssign(
rpn_target_assign=RPNTargetAssign(
rpn_batch_size_per_im=256,
rpn_fg_fraction=0.5,
rpn_negative_overlap=0.3,
rpn_positive_overlap=0.7,
rpn_straddle_thresh=0.0),
train_proposal=self.faster_rcnn.GenerateProposals(
train_proposal=GenerateProposals(
min_size=0.0,
nms_thresh=0.7,
post_nms_top_n=2000,
pre_nms_top_n=2000),
test_proposal=self.faster_rcnn.GenerateProposals(
test_proposal=GenerateProposals(
min_size=0.0,
nms_thresh=0.7,
post_nms_top_n=1000,
......@@ -155,7 +246,7 @@ class FasterRCNNResNet50RPN(hub.Module):
max_level=6)
def roi_extractor(self):
return self.faster_rcnn.FPNRoIAlign(
return FPNRoIAlign(
canconical_level=4,
canonical_size=224,
max_level=5,
......@@ -164,14 +255,14 @@ class FasterRCNNResNet50RPN(hub.Module):
sampling_ratio=2)
def bbox_head(self, num_classes):
return self.faster_rcnn.BBoxHead(
head=self.faster_rcnn.TwoFCHead(mlp_dim=1024),
nms=self.faster_rcnn.MultiClassNMS(
return BBoxHead(
head=TwoFCHead(mlp_dim=1024),
nms=MultiClassNMS(
keep_top_k=100, nms_threshold=0.5, score_threshold=0.05),
num_classes=num_classes)
def bbox_assigner(self, num_classes):
return self.faster_rcnn.BBoxAssigner(
return BBoxAssigner(
batch_size_per_im=512,
bbox_reg_weights=[0.1, 0.1, 0.2, 0.2],
bg_thresh_hi=0.5,
......@@ -183,6 +274,7 @@ class FasterRCNNResNet50RPN(hub.Module):
def object_detection(self,
paths=None,
images=None,
data=None,
use_gpu=False,
batch_size=1,
output_dir='detection_result',
......@@ -190,37 +282,49 @@ class FasterRCNNResNet50RPN(hub.Module):
visualization=True):
"""API of Object Detection.
:param paths: the path of images.
:type paths: list, each element is correspond to the path of an image.
:param images: data of images, [N, H, W, C]
:type images: numpy.ndarray
:param use_gpu: whether to use gpu or not.
:type use_gpu: bool
:param batch_size: bathc size.
:type batch_size: int
:param output_dir: the directory to store the detection result.
:type output_dir: str
:param score_thresh: the threshold of detection confidence.
:type score_thresh: float
:param visualization: whether to draw box and save images.
:type visualization: bool
Args:
paths (list[str]): The paths of images.
images (list(numpy.ndarray)): images data, shape of each is [H, W, C]
batch_size (int): batch size.
use_gpu (bool): Whether to use gpu.
output_dir (str): The path to store output images.
visualization (bool): Whether to save image or not.
score_thresh (float): threshold for object detecion.
Returns:
res (list[dict]): The result of coco2017 detecion. keys include 'data', 'save_path', the corresponding value is:
data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is:
left (float): The X coordinate of the upper left corner of the bounding box;
top (float): The Y coordinate of the upper left corner of the bounding box;
right (float): The X coordinate of the lower right corner of the bounding box;
bottom (float): The Y coordinate of the lower right corner of the bounding box;
label (str): The label of detection result;
confidence (float): The confidence of detection result.
save_path (str, optional): The path to save output images.
"""
all_images = []
paths = paths if paths else []
for yield_data in self.faster_rcnn.test_reader(paths, images):
paths = paths if paths else list()
if data and 'image' in data:
paths += data['image']
all_images = list()
for yield_data in test_reader(paths, images):
all_images.append(yield_data)
images_num = len(all_images)
loop_num = ceil(images_num / batch_size)
res = []
for iter_id in range(loop_num):
batch_data = []
handle_id = iter_id * batch_size
for image_id in range(batch_size):
try:
batch_data.append(all_images[handle_id + image_id])
except:
pass
padding_image, padding_info, padding_shape = self.faster_rcnn.padding_minibatch(
padding_image, padding_info, padding_shape = padding_minibatch(
batch_data, coarsest_stride=32, use_padded_im_info=True)
padding_image_tensor = PaddleTensor(padding_image.copy())
padding_info_tensor = PaddleTensor(padding_info.copy())
......@@ -228,12 +332,13 @@ class FasterRCNNResNet50RPN(hub.Module):
feed_list = [
padding_image_tensor, padding_info_tensor, padding_shape_tensor
]
if use_gpu:
data_out = self.gpu_predictor.run(feed_list)
else:
data_out = self.cpu_predictor.run(feed_list)
output = self.faster_rcnn.postprocess(
output = postprocess(
paths=paths,
images=images,
data_out=data_out,
......@@ -243,6 +348,7 @@ class FasterRCNNResNet50RPN(hub.Module):
handle_id=handle_id,
visualization=visualization)
res += output
return res
def add_module_config_arg(self):
......@@ -269,7 +375,7 @@ class FasterRCNNResNet50RPN(hub.Module):
'--input_path', type=str, default=None, help="input data")
self.arg_input_group.add_argument(
'--input_path',
'--input_file',
type=str,
default=None,
help="file contain input data")
......@@ -285,6 +391,15 @@ class FasterRCNNResNet50RPN(hub.Module):
input_data = txt_parser.parse(args.input_file, use_strip=True)
return input_data
@serving
def serving_method(self, images, **kwargs):
"""
Run as a service.
"""
images_decode = [base64_to_cv2(image) for image in images]
results = self.object_detection(images_decode, **kwargs)
return results
@runnable
def run_cmd(self, argvs):
self.parser = argparse.ArgumentParser(
......
# coding=utf-8
import base64
import os
import cv2
import numpy as np
from PIL import Image, ImageDraw
__all__ = [
'get_save_image_name', 'draw_bounding_box_on_image', 'clip_bbox',
'load_label_info'
'base64_to_cv2',
'load_label_info',
'postprocess',
]
def base64_to_cv2(b64str):
data = base64.b64decode(b64str.encode('utf8'))
data = np.fromstring(data, np.uint8)
data = cv2.imdecode(data, cv2.IMREAD_COLOR)
return data
def get_save_image_name(img, output_dir, image_path):
"""Get save image name from source image path.
"""
......@@ -62,7 +72,6 @@ def draw_bounding_box_on_image(image_path, data_list, save_dir):
os.remove(save_name)
image.save(save_name)
return save_name
......@@ -91,28 +100,34 @@ def postprocess(paths,
output_dir,
handle_id,
visualization=True):
"""postprocess the lod_tensor produced by fluid.Executor.run
:param paths: the path of images.
:type paths: list, each element is a str
:param images: data of images, [N, H, W, C]
:type images: numpy.ndarray
:param data_out: data produced by executor.run
:type data_out: lod_tensor
:param score_thresh: the low limit of bounding box.
:type score_thresh: float
:param label_names: label names
:type label_names: list
:param output_dir: output directory.
:type output_dir: str
:param handle_id: The number of images that have been handled.
:type handle_id: int
:param visualization: whether to draw bbox.
:param visualization: bool
"""
postprocess the lod_tensor produced by fluid.Executor.run
Args:
paths (list[str]): the path of images.
images (list(numpy.ndarray)): list of images, shape of each is [H, W, C].
data_out (lod_tensor): data produced by executor.run.
score_thresh (float): the low limit of bounding box.
label_names (list[str]): label names.
output_dir (str): output directory.
handle_id (int): The number of images that have been handled.
visualization (bool): whether to save as images.
Returns:
res (list[dict]): The result of vehicles detecion. keys include 'data', 'save_path', the corresponding value is:
data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is:
left (float): The X coordinate of the upper left corner of the bounding box;
top (float): The Y coordinate of the upper left corner of the bounding box;
right (float): The X coordinate of the lower right corner of the bounding box;
bottom (float): The Y coordinate of the lower right corner of the bounding box;
label (str): The label of detection result;
confidence (float): The confidence of detection result.
save_path (str): The path to save output images.
"""
lod_tensor = data_out[0]
lod = lod_tensor.lod[0]
results = lod_tensor.as_ndarray()
if handle_id < len(paths):
unhandled_paths = paths[handle_id:]
unhandled_paths_num = len(unhandled_paths)
......@@ -146,10 +161,6 @@ def postprocess(paths,
category_id = int(row[0])
confidence = row[1]
bbox = row[2:]
bbox[0] = bbox[0] * org_img_width
bbox[1] = bbox[1] * org_img_height
bbox[2] = bbox[2] * org_img_width
bbox[3] = bbox[3] * org_img_height
dt = {}
dt['label'] = label_names[category_id]
dt['confidence'] = confidence
......
# coding=utf-8
import paddle.fluid as fluid
__all__ = ['RoIAlign', 'FPNRoIAlign']
class RoIAlign(object):
# __op__ = fluid.layers.roi_align
def __init__(self, resolution=7, spatial_scale=0.0625, sampling_ratio=0):
super(RoIAlign, self).__init__()
if isinstance(resolution, int):
resolution = [resolution, resolution]
self.pooled_height = resolution[0]
self.pooled_width = resolution[1]
self.spatial_scale = spatial_scale
self.sampling_ratio = sampling_ratio
__all__ = ['FPNRoIAlign']
class FPNRoIAlign(object):
......
```shell
$ hub install retinanet_resnet50_fpn_coco2017==1.0.0
```
## 命令行预测
```
hub run retinanet_resnet50_fpn_coco2017 --input_path "/PATH/TO/IMAGE"
```
## API
```
def context(trainable=True,
pretrained=True,
get_prediction=False)
```
特征提取,用于迁移学习。
**参数**
* trainable(bool): 将参数的trainable属性设为trainable;
* pretrained (bool): 是否加载预训练模型;
* get\_prediction (bool): 是否执行预测。
**返回**
* inputs (dict): 模型的输入,keys 包括 'image', 'im\_size',相应的取值为:
* image (Variable): 图像变量
* im\_size (Variable): 图片的尺寸
* outputs (dict): 模型的输出。如果 get\_prediction 为 False,输出 'head\_fatures',否则输出 'bbox\_out'。
* context\_prog (Program): 用于迁移学习的 Program.
```python
def object_detection(paths=None,
images=None,
batch_size=1,
use_gpu=False,
output_dir='detection_result',
score_thresh=0.5,
visualization=True)
```
预测API,检测输入图片中的所有目标的位置。
**参数**
* paths (list\[str\]): 图片的路径;
* images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\],BGR格式;
* batch\_size (int): batch 的大小;
* use\_gpu (bool): 是否使用 GPU;
* score\_thresh (float): 识别置信度的阈值;
* visualization (bool): 是否将识别结果保存为图片文件;
* output\_dir (str): 图片的保存路径,默认设为 detection\_result;
**返回**
* res (list\[dict\]): 识别结果的列表,列表中每一个元素为 dict,各字段为:
* data (list): 检测结果,list的每一个元素为 dict,各字段为:
* confidence (float): 识别的置信度;
* label (str): 标签;
* left (int): 边界框的左上角x坐标;
* top (int): 边界框的左上角y坐标;
* right (int): 边界框的右下角x坐标;
* bottom (int): 边界框的右下角y坐标;
* save\_path (str, optional): 识别结果的保存路径 (仅当visualization=True时存在)。
```python
def save_inference_model(dirname,
model_filename=None,
params_filename=None,
combined=True)
```
将模型保存到指定路径。
**参数**
* dirname: 存在模型的目录名称
* model\_filename: 模型文件名称,默认为\_\_model\_\_
* params\_filename: 参数文件名称,默认为\_\_params\_\_(仅当`combined`为True时生效)
* combined: 是否将参数保存到统一的一个文件中
## 代码示例
```python
import paddlehub as hub
import cv2
object_detector = hub.Module(name="retinanet_resnet50_fpn_coco2017")
result = object_detector.object_detection(images=[cv2.imread('/PATH/TO/IMAGE')])
# or
# result = object_detector.object_detection((paths=['/PATH/TO/IMAGE'])
```
## 服务部署
PaddleHub Serving可以部署一个目标检测的在线服务。
## 第一步:启动PaddleHub Serving
运行启动命令:
```shell
$ hub serving start -m retinanet_resnet50_fpn_coco2017
```
这样就完成了一个目标检测的服务化API的部署,默认端口号为8866。
**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。
## 第二步:发送预测请求
配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果
```python
import requests
import json
import cv2
import base64
def cv2_to_base64(image):
data = cv2.imencode('.jpg', image)[1]
return base64.b64encode(data.tostring()).decode('utf8')
# 发送HTTP请求
data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]}
headers = {"Content-type": "application/json"}
url = "http://127.0.0.1:8866/predict/retinanet_resnet50_fpn_coco2017"
r = requests.post(url=url, headers=headers, data=json.dumps(data))
# 打印预测结果
print(r.json()["results"])
```
### 依赖
paddlepaddle >= 1.6.2
paddlehub >= 1.6.0
......@@ -15,13 +15,19 @@ __all__ = ['test_reader', 'padding_minibatch']
def test_reader(paths=None, images=None):
"""data generator
:param paths: path to images.
:type paths: list, each element is a str
:param images: data of images, [N, H, W, C]
:type images: numpy.ndarray
"""
img_list = []
data generator
Args:
paths (list[str]): paths to images.
images (list(numpy.ndarray)): data of images, shape of each is [H, W, C]
Yield:
res (dict): key contains 'image' and 'im_info', the corresponding values is:
image (numpy.ndarray): the image to be fed into network
im_info (numpy.ndarray): the info about the preprocessed.
"""
img_list = list()
if paths:
for img_path in paths:
assert os.path.isfile(
......@@ -83,9 +89,9 @@ def padding_minibatch(batch_data, coarsest_stride=0, use_padded_im_info=True):
else:
max_shape = max_shape_org.astype('int32')
padding_image = []
padding_info = []
padding_shape = []
padding_image = list()
padding_info = list()
padding_shape = list()
for data in batch_data:
im_c, im_h, im_w = data['image'].shape
......
......@@ -11,13 +11,13 @@ from functools import partial
import numpy as np
import paddle.fluid as fluid
import paddlehub as hub
from paddlehub.module.module import moduleinfo, runnable
from paddlehub.module.module import moduleinfo, runnable, serving
from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor
from paddlehub.io.parser import txt_parser
from retinanet_resnet50_fpn_coco2017.fpn import FPN
from retinanet_resnet50_fpn_coco2017.retina_head import AnchorGenerator, RetinaTargetAssign, RetinaOutputDecoder, RetinaHead
from retinanet_resnet50_fpn_coco2017.processor import load_label_info, postprocess
from retinanet_resnet50_fpn_coco2017.processor import load_label_info, postprocess, base64_to_cv2
from retinanet_resnet50_fpn_coco2017.data_feed import test_reader, padding_minibatch
from retinanet_resnet50_fpn_coco2017.resnet import ResNet
......@@ -29,7 +29,7 @@ from retinanet_resnet50_fpn_coco2017.resnet import ResNet
summary=
"Baidu's RetinaNet model for object detection, with backbone ResNet50 and FPN.",
author="paddlepaddle",
author_email="paddle-dev@baidu.com")
author_email="")
class RetinaNetResNet50FPN(hub.Module):
def _initialize(self):
# default pretrained model of Retinanet_ResNet50_FPN, the shape of input image tensor is (3, 608, 608)
......@@ -69,16 +69,19 @@ class RetinaNetResNet50FPN(hub.Module):
trainable=True,
pretrained=True,
get_prediction=False):
"""Distill the Head Features, so as to perform transfer learning.
"""
Distill the Head Features, so as to perform transfer learning.
Args:
num_classes (int): number of classes.
trainable (bool): whether to set parameters trainable.
pretrained (bool): whether to load default pretrained model.
get_prediction (bool): whether to get prediction.
:param trainable: whether to set parameters trainable.
:type trainable: bool
:param pretrained: whether to load default pretrained model.
:type pretrained: bool
:param get_prediction: whether to get prediction,
if True, outputs is {'bbox_out': bbox_out},
if False, outputs is {'head_features': head_features}.
:type get_prediction: bool
Returns:
inputs(dict): the input variables.
outputs(dict): the output variables.
context_prog (Program): the program to execute transfer learning.
"""
context_prog = fluid.Program()
startup_program = fluid.Program()
......@@ -166,31 +169,38 @@ class RetinaNetResNet50FPN(hub.Module):
score_thresh=0.5,
visualization=True):
"""API of Object Detection.
:param paths: the path of images.
:type paths: list, each element is correspond to the path of an image.
:param images: data of images, [N, H, W, C]
:type images: numpy.ndarray
:param use_gpu: whether to use gpu or not.
:type use_gpu: bool
:param batch_size: bathc size.
:type batch_size: int
:param output_dir: the directory to store the detection result.
:type output_dir: str
:param score_thresh: the threshold of detection confidence.
:type score_thresh: float
:param visualization: whether to draw bounding box and save images.
:type visualization: bool
Args:
paths (list[str]): The paths of images.
images (list(numpy.ndarray)): images data, shape of each is [H, W, C]
batch_size (int): batch size.
use_gpu (bool): Whether to use gpu.
output_dir (str): The path to store output images.
visualization (bool): Whether to save image or not.
score_thresh (float): threshold for object detecion.
visualization (bool): whether to save result as images.
Returns:
res (list[dict]): The result of coco2017 detecion. keys include 'data', 'save_path', the corresponding value is:
data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is:
left (float): The X coordinate of the upper left corner of the bounding box;
top (float): The Y coordinate of the upper left corner of the bounding box;
right (float): The X coordinate of the lower right corner of the bounding box;
bottom (float): The Y coordinate of the lower right corner of the bounding box;
label (str): The label of detection result;
confidence (float): The confidence of detection result.
save_path (str, optional): The path to save output images.
"""
all_images = []
paths = paths if paths else []
all_images = list()
paths = paths if paths else list()
for yield_data in test_reader(paths, images):
all_images.append(yield_data)
images_num = len(all_images)
loop_num = int(np.ceil(images_num / batch_size))
res = []
res = list()
for iter_id in range(loop_num):
batch_data = []
batch_data = list()
handle_id = iter_id * batch_size
for image_id in range(batch_size):
try:
......@@ -248,7 +258,7 @@ class RetinaNetResNet50FPN(hub.Module):
help="file contain input data")
def check_input_data(self, args):
input_data = []
input_data = list()
if args.input_path:
input_data = [args.input_path]
elif args.input_file:
......@@ -258,6 +268,15 @@ class RetinaNetResNet50FPN(hub.Module):
input_data = txt_parser.parse(args.input_file, use_strip=True)
return input_data
@serving
def serving_method(self, images, **kwargs):
"""
Run as a service.
"""
images_decode = [base64_to_cv2(image) for image in images]
results = self.object_detection(images_decode, **kwargs)
return results
@runnable
def run_cmd(self, argvs):
self.parser = argparse.ArgumentParser(
......
# coding=utf-8
import base64
import os
import cv2
import numpy as np
from PIL import Image, ImageDraw
__all__ = [
'get_save_image_name', 'draw_bounding_box_on_image', 'clip_bbox',
'load_label_info'
'base64_to_cv2',
'load_label_info',
'postprocess',
]
def base64_to_cv2(b64str):
data = base64.b64decode(b64str.encode('utf8'))
data = np.fromstring(data, np.uint8)
data = cv2.imdecode(data, cv2.IMREAD_COLOR)
return data
def get_save_image_name(img, output_dir, image_path):
"""Get save image name from source image path.
"""
......@@ -80,24 +90,29 @@ def load_label_info(file_path):
def postprocess(paths, images, data_out, score_thresh, label_names, output_dir,
handle_id, visualization):
"""postprocess the lod_tensor produced by fluid.Executor.run
:param paths: the path of images.
:type paths: list, each element is a str
:param images: data of images, [N, H, W, C]
:type images: numpy.ndarray
:param data_out: data produced by executor.run
:type data_out: lod_tensor
:param score_thresh: the low limit of bounding box.
:type score_thresh: float
:param label_names: label names
:type label_names: list
:param output_dir: output directory.
:type output_dir: str
:param handle_id: The number of images that have been handled.
:type handle_id: int
:param visualization: whether to draw bbox.
:param visualization: bool
"""
postprocess the lod_tensor produced by fluid.Executor.run
Args:
paths (list[str]): the path of images.
images (list(numpy.ndarray)): list of images, shape of each is [H, W, C].
data_out (lod_tensor): data produced by executor.run.
score_thresh (float): the low limit of bounding box.
label_names (list[str]): label names.
output_dir (str): output directory.
handle_id (int): The number of images that have been handled.
visualization (bool): whether to save as images.
Returns:
res (list[dict]): The result of vehicles detecion. keys include 'data', 'save_path', the corresponding value is:
data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is:
left (float): The X coordinate of the upper left corner of the bounding box;
top (float): The Y coordinate of the upper left corner of the bounding box;
right (float): The X coordinate of the lower right corner of the bounding box;
bottom (float): The Y coordinate of the lower right corner of the bounding box;
label (str): The label of detection result;
confidence (float): The confidence of detection result.
save_path (str): The path to save output images.
"""
lod_tensor = data_out[0]
lod = lod_tensor.lod[0]
......
# coding=utf-8
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from collections import OrderedDict
import paddle.fluid as fluid
import paddlehub as hub
from paddlehub.module.module import moduleinfo
from paddlehub.common.paddle_helper import add_vars_prefix
from ssd.data_feed import reader, DecodeImage, ResizeImage, NormalizeImage, Permute
from ssd.processor import load_label_info, postprocess
from ssd.multi_box_head import MultiBoxHead
from ssd.output_decoder import SSDOutputDecoder
@moduleinfo(
name="ssd",
version="1.0.0",
type="cv/object_detection",
summary=
"SSD (Single Shot MultiBox Detector) is a object detection model, which trained with PASCAL VOC dataset.",
author="paddlepaddle",
author_email="paddle-dev@baidu.com")
class SSD(hub.Module):
def _initialize(self):
self.reader = reader
self.load_label_info = load_label_info
self.postprocess = postprocess
self.MultiBoxHead = MultiBoxHead
self.SSDOutputDecoder = SSDOutputDecoder
self.DecodeImage = DecodeImage
self.ResizeImage = ResizeImage
self.NormalizeImage = NormalizeImage
self.Permute = Permute
def context(self,
body_feats,
multi_box_head,
ssd_output_decoder,
image,
trainable=True,
var_prefix='',
get_prediction=False):
"""Distill the Head Features, so as to perform transfer learning.
:param body_feats: feature mps of backbone outputs
:type body_feats: list
:param multi_box_head: SSD head of MultiBoxHead.
:type multi_box_head: <class 'MultiBoxHead' object>
:param ssd_output_decoder: SSD output decoder
:type ssd_output_decoder: <class 'SSDOutputDecoder' object>
:param image: image tensor.
:type image: <class 'paddle.fluid.framework.Variable'>
:param trainable: whether to set parameters trainable.
:type trainable: bool
:param var_prefix: the prefix of variables in ssd
:type var_prefix: str
:param get_prediction: whether to get prediction,
if True, outputs is bbox_out,
if False, outputs is body_features.
:type get_prediction: bool
"""
context_prog = image.block.program
with fluid.program_guard(context_prog):
im_size = fluid.layers.data(
name='im_size', shape=[2], dtype='int32')
inputs = {
'image': var_prefix + image.name,
'im_size': var_prefix + im_size.name
}
if not get_prediction:
outputs = {
'body_features':
[var_prefix + var.name for var in body_feats]
}
else:
locs, confs, box, box_var = fluid.layers.multi_box_head(
inputs=body_feats,
image=image,
base_size=multi_box_head.base_size,
num_classes=multi_box_head.num_classes,
aspect_ratios=multi_box_head.aspect_ratios,
min_ratio=multi_box_head.min_ratio,
max_ratio=multi_box_head.max_ratio,
min_sizes=multi_box_head.min_sizes,
max_sizes=multi_box_head.max_sizes,
steps=multi_box_head.steps,
offset=multi_box_head.offset,
flip=multi_box_head.flip,
kernel_size=multi_box_head.kernel_size,
pad=multi_box_head.pad,
min_max_aspect_ratios_order=multi_box_head.
min_max_aspect_ratios_order)
pred = fluid.layers.detection_output(
loc=locs,
scores=confs,
prior_box=box,
prior_box_var=box_var,
nms_threshold=ssd_output_decoder.nms_threshold,
nms_top_k=ssd_output_decoder.nms_top_k,
keep_top_k=ssd_output_decoder.keep_top_k,
score_threshold=ssd_output_decoder.score_threshold,
nms_eta=ssd_output_decoder.nms_eta,
background_label=ssd_output_decoder.background_label)
outputs = {'bbox_out': var_prefix + pred.name}
add_vars_prefix(context_prog, var_prefix)
add_vars_prefix(fluid.default_startup_program(), var_prefix)
inputs = {
key: context_prog.global_block().vars[value]
for key, value in inputs.items()
}
outputs = {
key: [
context_prog.global_block().vars[varname]
for varname in value
]
for key, value in outputs.items()
}
for param in context_prog.global_block().iter_parameters():
param.trainable = trainable
return inputs, outputs, context_prog
# coding=utf-8
class MultiBoxHead(object):
# __op__ = fluid.layers.multi_box_head
def __init__(self,
base_size,
num_classes,
aspect_ratios,
min_ratio=None,
max_ratio=None,
min_sizes=None,
max_sizes=None,
steps=None,
offset=0.5,
flip=True,
kernel_size=1,
pad=0,
min_max_aspect_ratios_order=False):
self.base_size = base_size
self.num_classes = num_classes
self.aspect_ratios = aspect_ratios
self.min_ratio = min_ratio
self.max_ratio = max_ratio
self.min_sizes = min_sizes
self.max_sizes = max_sizes
self.steps = steps
self.offset = offset
self.flip = flip
self.kernel_size = kernel_size
self.pad = pad
self.min_max_aspect_ratios_order = min_max_aspect_ratios_order
class SSDOutputDecoder(object):
# __op__ = fluid.layers.detection_output
def __init__(self,
nms_threshold=0.3,
nms_top_k=400,
keep_top_k=200,
score_threshold=0.01,
nms_eta=1.0,
background_label=0):
self.nms_threshold = nms_threshold
self.background_label = background_label
self.nms_top_k = nms_top_k
self.keep_top_k = keep_top_k
self.score_threshold = score_threshold
self.nms_eta = nms_eta
```shell
$ hub install ssd_mobilenet_v1_pascal==1.1.0
```
## 命令行预测
```
hub run ssd_mobilenet_v1_pascal --input_path "/PATH/TO/IMAGE"
```
## API
```
def context(trainable=True,
pretrained=True,
get_prediction=False)
```
特征提取,用于迁移学习。
**参数**
* trainable(bool): 设置参数的 trainable 属性;
* pretrained (bool): 是否加载预训练模型;
* get\_prediction (bool): 是否执行预测。
**返回**
* inputs (dict): 模型的输入,keys 包括 'image', 'im\_size',相应的取值为:
* image (Variable): 图像变量
* im\_size (Variable): 图片的尺寸
* outputs (dict): 模型的输出。如果 get\_prediction 为 False,输出 'head\_fatures',否则输出 'bbox\_out'。
* context\_prog (Program): 用于迁移学习的 Program.
```python
def object_detection(paths=None,
images=None,
batch_size=1,
use_gpu=False,
output_dir='detection_result',
score_thresh=0.5,
visualization=True)
```
预测API,检测输入图片中的所有目标的位置。
**参数**
* paths (list\[str\]): 图片的路径;
* images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\],BGR格式;
* batch\_size (int): batch 的大小;
* use\_gpu (bool): 是否使用 GPU;
* score\_thresh (float): 识别置信度的阈值;
* visualization (bool): 是否将识别结果保存为图片文件;
* output\_dir (str): 图片的保存路径,默认设为 detection\_result;
**返回**
* res (list\[dict\]): 识别结果的列表,列表中每一个元素为 dict,各字段为:
* data (list): 检测结果,list的每一个元素为 dict,各字段为:
* confidence (float): 识别的置信度;
* label (str): 标签;
* left (int): 边界框的左上角x坐标;
* top (int): 边界框的左上角y坐标;
* right (int): 边界框的右下角x坐标;
* bottom (int): 边界框的右下角y坐标;
* save\_path (str, optional): 识别结果的保存路径 (仅当visualization=True时存在)。
```python
def save_inference_model(dirname,
model_filename=None,
params_filename=None,
combined=True)
```
将模型保存到指定路径。
**参数**
* dirname: 存在模型的目录名称
* model\_filename: 模型文件名称,默认为\_\_model\_\_
* params\_filename: 参数文件名称,默认为\_\_params\_\_(仅当`combined`为True时生效)
* combined: 是否将参数保存到统一的一个文件中
## 代码示例
```python
import paddlehub as hub
import cv2
object_detector = hub.Module(name="ssd_mobilenet_v1_pascal")
result = object_detector.object_detection(images=[cv2.imread('/PATH/TO/IMAGE')])
# or
# result = object_detector.object_detection((paths=['/PATH/TO/IMAGE'])
```
## 服务部署
PaddleHub Serving可以部署一个目标检测的在线服务。
## 第一步:启动PaddleHub Serving
运行启动命令:
```shell
$ hub serving start -m ssd_mobilenet_v1_pascal
```
这样就完成了一个目标检测的服务化API的部署,默认端口号为8866。
**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。
## 第二步:发送预测请求
配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果
```python
import requests
import json
import cv2
import base64
def cv2_to_base64(image):
data = cv2.imencode('.jpg', image)[1]
return base64.b64encode(data.tostring()).decode('utf8')
# 发送HTTP请求
data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]}
headers = {"Content-type": "application/json"}
url = "http://127.0.0.1:8866/predict/ssd_mobilenet_v1_pascal"
r = requests.post(url=url, headers=headers, data=json.dumps(data))
# 打印预测结果
print(r.json()["results"])
```
### 依赖
paddlepaddle >= 1.6.2
paddlehub >= 1.6.0
MobileNet:
norm_decay: 0.
conv_group_scale: 1
conv_learning_rate: 0.1
extra_block_filters: [[256, 512], [128, 256], [128, 256], [64, 128]]
with_extra_blocks: True
SSDOutputDecoder:
background_label: 0
keep_top_k: 200
......@@ -9,7 +16,7 @@ SSDOutputDecoder:
MultiBoxHead:
aspect_ratios: [[2.], [2., 3.], [2., 3.], [2., 3.], [2., 3.], [2., 3.]]
base_size: 300
flip: true
flip: True
max_ratio: 90
max_sizes: [[], 150.0, 195.0, 240.0, 285.0, 300.0]
min_ratio: 20
......
# coding=utf-8
from __future__ import absolute_import
from __future__ import print_function
from __future__ import division
import os
import random
from collections import OrderedDict
import cv2
import numpy as np
from PIL import Image
from paddle import fluid
__all__ = ['reader']
class DecodeImage(object):
def __init__(self, to_rgb=True, with_mixup=False):
""" Transform the image data to numpy format.
Args:
to_rgb (bool): whether to convert BGR to RGB
with_mixup (bool): whether or not to mixup image and gt_bbbox/gt_score
"""
self.to_rgb = to_rgb
self.with_mixup = with_mixup
def __call__(self, im):
if self.to_rgb:
im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
return im
class ResizeImage(object):
def __init__(self,
target_size=0,
max_size=0,
interp=cv2.INTER_LINEAR,
use_cv2=True):
"""
Rescale image to the specified target size, and capped at max_size
if max_size != 0.
If target_size is list, selected a scale randomly as the specified
target size.
Args:
target_size (int|list): the target size of image's short side,
multi-scale training is adopted when type is list.
max_size (int): the max size of image
interp (int): the interpolation method
use_cv2 (bool): use the cv2 interpolation method or use PIL
interpolation method
"""
self.max_size = int(max_size)
self.interp = int(interp)
self.use_cv2 = use_cv2
self.target_size = target_size
def __call__(self, im):
if not isinstance(im, np.ndarray):
raise TypeError("{}: image type is not numpy.".format(self))
if len(im.shape) != 3:
raise ValueError('{}: image is not 3-dimensional.'.format(self))
im_shape = im.shape
im_size_min = np.min(im_shape[0:2])
im_size_max = np.max(im_shape[0:2])
if isinstance(self.target_size, list):
# Case for multi-scale training
selected_size = random.choice(self.target_size)
else:
selected_size = self.target_size
if float(im_size_min) == 0:
raise ZeroDivisionError('{}: min size of image is 0'.format(self))
if self.max_size != 0:
im_scale = float(selected_size) / float(im_size_min)
# Prevent the biggest axis from being more than max_size
if np.round(im_scale * im_size_max) > self.max_size:
im_scale = float(self.max_size) / float(im_size_max)
im_scale_x = im_scale
im_scale_y = im_scale
resize_w = im_scale_x * float(im_shape[1])
resize_h = im_scale_y * float(im_shape[0])
im_info = [resize_h, resize_w, im_scale]
else:
im_scale_x = float(selected_size) / float(im_shape[1])
im_scale_y = float(selected_size) / float(im_shape[0])
resize_w = selected_size
resize_h = selected_size
if self.use_cv2:
im = cv2.resize(
im,
None,
None,
fx=im_scale_x,
fy=im_scale_y,
interpolation=self.interp)
else:
if self.max_size != 0:
raise TypeError(
'If you set max_size to cap the maximum size of image,'
'please set use_cv2 to True to resize the image.')
im = im.astype('uint8')
im = Image.fromarray(im)
im = im.resize((int(resize_w), int(resize_h)), self.interp)
im = np.array(im)
return im
class NormalizeImage(object):
def __init__(self,
mean=[0.485, 0.456, 0.406],
std=[1, 1, 1],
is_scale=True,
is_channel_first=True):
"""
Args:
mean (list): the pixel mean
std (list): the pixel variance
"""
self.mean = mean
self.std = std
self.is_scale = is_scale
self.is_channel_first = is_channel_first
def __call__(self, im):
"""Normalize the image.
Operators:
1.(optional) Scale the image to [0,1]
2. Each pixel minus mean and is divided by std
"""
im = im.astype(np.float32, copy=False)
if self.is_channel_first:
mean = np.array(self.mean)[:, np.newaxis, np.newaxis]
std = np.array(self.std)[:, np.newaxis, np.newaxis]
else:
mean = np.array(self.mean)[np.newaxis, np.newaxis, :]
std = np.array(self.std)[np.newaxis, np.newaxis, :]
if self.is_scale:
im = im / 255.0
im -= mean
im /= std
return im
class Permute(object):
def __init__(self, to_bgr=True, channel_first=True):
"""
Change the channel.
Args:
to_bgr (bool): confirm whether to convert RGB to BGR
channel_first (bool): confirm whether to change channel
"""
self.to_bgr = to_bgr
self.channel_first = channel_first
def __call__(self, im):
if self.channel_first:
im = np.swapaxes(im, 1, 2)
im = np.swapaxes(im, 1, 0)
if self.to_bgr:
im = im[[2, 1, 0], :, :]
return im
def reader(paths=[],
images=None,
decode_image=DecodeImage(to_rgb=True, with_mixup=False),
resize_image=ResizeImage(
target_size=512, interp=1, max_size=0, use_cv2=False),
permute_image=Permute(to_bgr=False),
normalize_image=NormalizeImage(
mean=[104, 117, 123], std=[1, 1, 1], is_scale=False)):
"""
data generator
Args:
paths (list[str]): paths to images.
images (list(numpy.ndarray)): data of images, shape of each is [H, W, C]
decode_image (class object): instance of <class 'DecodeImage' object>
resize_image (class object): instance of <class 'ResizeImage' object>
permute_image (class object): instance of <class 'Permute' object>
normalize_image (class object): instance of <class 'NormalizeImage' object>
"""
img_list = []
if paths is not None:
assert type(paths) is list, "type(paths) is not list."
for img_path in paths:
assert os.path.isfile(
img_path), "The {} isn't a valid file path.".format(img_path)
img = cv2.imread(img_path).astype('float32')
img_list.append(img)
if images is not None:
for img in images:
img_list.append(img)
decode_image = DecodeImage(to_rgb=True, with_mixup=False)
resize_image = ResizeImage(
target_size=300, interp=1, max_size=0, use_cv2=False)
permute_image = Permute()
normalize_image = NormalizeImage(
mean=[127.5, 127.5, 127.5],
std=[127.502231, 127.502231, 127.502231],
is_scale=False)
for img in img_list:
preprocessed_img = decode_image(img)
preprocessed_img = resize_image(preprocessed_img)
preprocessed_img = permute_image(preprocessed_img)
preprocessed_img = normalize_image(preprocessed_img)
yield [preprocessed_img]
# coding=utf-8
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import ast
import argparse
import os
from functools import partial
import yaml
import numpy as np
import paddle.fluid as fluid
import paddlehub as hub
from paddlehub.module.module import moduleinfo, runnable
from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor
from paddlehub.io.parser import txt_parser
import yaml
from paddlehub.module.module import moduleinfo, runnable, serving
from paddlehub.common.paddle_helper import add_vars_prefix
from ssd_mobilenet_v1_pascal.mobilenet_v1 import MobileNet
from ssd_mobilenet_v1_pascal.processor import load_label_info, postprocess, base64_to_cv2
from ssd_mobilenet_v1_pascal.data_feed import reader
@moduleinfo(
......@@ -25,25 +25,18 @@ from ssd_mobilenet_v1_pascal.mobilenet_v1 import MobileNet
type="cv/object_detection",
summary="SSD with backbone MobileNet_V1, trained with dataset Pasecal VOC.",
author="paddlepaddle",
author_email="paddle-dev@baidu.com")
author_email="")
class SSDMobileNetv1(hub.Module):
def _initialize(self):
self.ssd = hub.Module(name="ssd")
# default pretrained model of SSD_MobileNet_V1_VOC, the shape of image tensor is (3, 300, 300)
self.default_pretrained_model_path = os.path.join(
self.directory, "ssd_mobilenet_v1_model")
self.label_names = self.ssd.load_label_info(
self.label_names = load_label_info(
os.path.join(self.directory, "label_file.txt"))
self.infer_prog = None
self.image = None
self.bbox_out = None
self.model_config = None
self._set_config()
self._config = None
def _set_config(self):
"""
predictor config setting
"""
# predictor config setting.
cpu_config = AnalysisConfig(self.default_pretrained_model_path)
cpu_config.disable_glog_info()
cpu_config.disable_gpu()
......@@ -62,51 +55,92 @@ class SSDMobileNetv1(hub.Module):
gpu_config.enable_use_gpu(memory_pool_init_size_mb=500, device_id=0)
self.gpu_predictor = create_paddle_predictor(gpu_config)
def context(self,
num_classes=21,
trainable=True,
pretrained=True,
get_prediction=False):
"""Distill the Head Features, so as to perform transfer learning.
# model config setting.
if not self.model_config:
with open(os.path.join(self.directory, 'config.yml')) as fp:
self.model_config = yaml.load(fp.read(), Loader=yaml.FullLoader)
self.multi_box_head_config = self.model_config['MultiBoxHead']
self.output_decoder_config = self.model_config['SSDOutputDecoder']
self.mobilenet_config = self.model_config['MobileNet']
def context(self, trainable=True, pretrained=True, get_prediction=False):
"""
Distill the Head Features, so as to perform transfer learning.
Args:
trainable (bool): whether to set parameters trainable.
pretrained (bool): whether to load default pretrained model.
get_prediction (bool): whether to get prediction.
:param trainable: whether to set parameters trainable.
:type trainable: bool
:param pretrained: whether to load default pretrained model.
:type pretrained: bool
:param get_prediction: whether to get prediction,
if True, outputs is {'bbox_out': bbox_out},
if False, outputs is {'body_features': body_features}.
:type get_prediction: bool
Returns:
inputs(dict): the input variables.
outputs(dict): the output variables.
context_prog (Program): the program to execute transfer learning.
"""
wrapped_prog = fluid.Program()
context_prog = fluid.Program()
startup_program = fluid.Program()
with fluid.program_guard(wrapped_prog, startup_program):
with fluid.program_guard(context_prog, startup_program):
with fluid.unique_name.guard():
# image
image = fluid.layers.data(
name='image', shape=[3, 300, 300], dtype='float32')
backbone = MobileNet(
norm_decay=0.,
conv_group_scale=1,
conv_learning_rate=0.1,
extra_block_filters=[[256, 512], [128, 256], [128, 256],
[64, 128]],
with_extra_blocks=True)
# backbone
backbone = MobileNet(**self.mobilenet_config)
# body_feats
body_feats = backbone(image)
# call ssd.context
inputs, outputs, context_prog = self.ssd.context(
body_feats=body_feats,
multi_box_head=self.ssd.MultiBoxHead(
num_classes=num_classes, **self.multi_box_head_config),
ssd_output_decoder=self.ssd.SSDOutputDecoder(
**self.output_decoder_config),
image=image,
trainable=trainable,
var_prefix='@HUB_{}@'.format(self.name),
get_prediction=get_prediction)
# im_size
im_size = fluid.layers.data(
name='im_size', shape=[2], dtype='int32')
# var_prefix
var_prefix = '@HUB_{}@'.format(self.name)
# names of inputs
inputs = {
'image': var_prefix + image.name,
'im_size': var_prefix + im_size.name
}
# names of outputs
if get_prediction:
locs, confs, box, box_var = fluid.layers.multi_box_head(
inputs=body_feats,
image=image,
num_classes=21,
**self.multi_box_head_config)
pred = fluid.layers.detection_output(
loc=locs,
scores=confs,
prior_box=box,
prior_box_var=box_var,
**self.output_decoder_config)
outputs = {'bbox_out': [var_prefix + pred.name]}
else:
outputs = {
'body_features':
[var_prefix + var.name for var in body_feats]
}
# add_vars_prefix
add_vars_prefix(context_prog, var_prefix)
add_vars_prefix(fluid.default_startup_program(), var_prefix)
# inputs
inputs = {
key: context_prog.global_block().vars[value]
for key, value in inputs.items()
}
outputs = {
out_key: [
context_prog.global_block().vars[varname]
for varname in out_value
]
for out_key, out_value in outputs.items()
}
# trainable
for param in context_prog.global_block().iter_parameters():
param.trainable = trainable
place = fluid.CPUPlace()
exe = fluid.Executor(place)
# pretrained
if pretrained:
def _if_exist(var):
......@@ -120,82 +154,56 @@ class SSDMobileNetv1(hub.Module):
predicate=_if_exist)
else:
exe.run(startup_program)
return inputs, outputs, context_prog
@property
def config(self):
if not self._config:
with open(os.path.join(self.directory, 'config.yml')) as file:
self._config = yaml.load(file.read(), Loader=yaml.FullLoader)
return self._config
@property
def multi_box_head_config(self):
return self.config['MultiBoxHead']
@property
def output_decoder_config(self):
return self.config['SSDOutputDecoder']
return inputs, outputs, context_prog
def object_detection(self,
paths=None,
images=None,
data=None,
use_gpu=False,
batch_size=1,
use_gpu=False,
output_dir='detection_result',
score_thresh=0.5,
visualization=True):
"""API of Object Detection.
:param paths: the path of images.
:type paths: list, each element is correspond to the path of an image.
:param images: data of images, [N, H, W, C]
:type images: numpy.ndarray
:param use_gpu: whether to use gpu or not.
:type use_gpu: bool
:param batch_size: bathc size.
:type batch_size: int
:param output_dir: the directory to store the detection result.
:type output_dir: str
:param score_thresh: the threshold of detection confidence.
:type score_thresh: float
:param visualization: whether to draw bounding box and save images.
:type visualization: bool
Args:
paths (list[str]): The paths of images.
images (list(numpy.ndarray)): images data, shape of each is [H, W, C]
batch_size (int): batch size.
use_gpu (bool): Whether to use gpu.
output_dir (str): The path to store output images.
visualization (bool): Whether to save image or not.
score_thresh (float): threshold for object detecion.
Returns:
res (list[dict]): The result of coco2017 detecion. keys include 'data', 'save_path', the corresponding value is:
data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is:
left (float): The X coordinate of the upper left corner of the bounding box;
top (float): The Y coordinate of the upper left corner of the bounding box;
right (float): The X coordinate of the lower right corner of the bounding box;
bottom (float): The Y coordinate of the lower right corner of the bounding box;
label (str): The label of detection result;
confidence (float): The confidence of detection result.
save_path (str, optional): The path to save output images.
"""
paths = paths if paths else list()
if data and 'image' in data:
paths = data['image'] if not paths else paths + data['image']
decode_image = self.ssd.DecodeImage(to_rgb=True, with_mixup=False)
resize_image = self.ssd.ResizeImage(
target_size=300, interp=1, max_size=0, use_cv2=False)
permute_image = self.ssd.Permute()
normalize_image = self.ssd.NormalizeImage(
mean=[127.5, 127.5, 127.5],
std=[127.502231, 127.502231, 127.502231],
is_scale=False)
data_reader = partial(
self.ssd.reader,
paths,
images,
decode_image=decode_image,
resize_image=resize_image,
permute_image=permute_image,
normalize_image=normalize_image)
paths += data['image']
data_reader = partial(reader, paths, images)
batch_reader = fluid.io.batch(data_reader, batch_size=batch_size)
paths = paths if paths else []
res = []
for iter_id, feed_data in enumerate(batch_reader()):
np_data = np.array(feed_data).astype('float32')
if np_data.shape == 1:
np_data = np_data[0]
else:
np_data = np.squeeze(np_data, axis=1)
data_tensor = PaddleTensor(np_data.copy())
feed_data = np.array(feed_data)
image_tensor = PaddleTensor(np.array(list(feed_data[:, 0])).copy())
if use_gpu:
data_out = self.gpu_predictor.run([data_tensor])
data_out = self.gpu_predictor.run([image_tensor])
else:
data_out = self.cpu_predictor.run([data_tensor])
output = self.ssd.postprocess(
data_out = self.cpu_predictor.run([image_tensor])
output = postprocess(
paths=paths,
images=images,
data_out=data_out,
......@@ -204,55 +212,49 @@ class SSDMobileNetv1(hub.Module):
output_dir=output_dir,
handle_id=iter_id * batch_size,
visualization=visualization)
res += output
res.extend(output)
return res
def add_module_config_arg(self):
"""
Add the command config options
"""
self.arg_config_group.add_argument(
'--use_gpu',
type=ast.literal_eval,
default=False,
help="whether use GPU or not")
def save_inference_model(self,
dirname,
model_filename=None,
params_filename=None,
combined=True):
if combined:
model_filename = "__model__" if not model_filename else model_filename
params_filename = "__params__" if not params_filename else params_filename
place = fluid.CPUPlace()
exe = fluid.Executor(place)
self.arg_config_group.add_argument(
'--batch_size',
type=int,
default=1,
help="batch size for prediction")
program, feeded_var_names, target_vars = fluid.io.load_inference_model(
dirname=self.default_pretrained_model_path, executor=exe)
def add_module_input_arg(self):
fluid.io.save_inference_model(
dirname=dirname,
main_program=program,
executor=exe,
feeded_var_names=feeded_var_names,
target_vars=target_vars,
model_filename=model_filename,
params_filename=params_filename)
@serving
def serving_method(self, images, **kwargs):
"""
Add the command input options
Run as a service.
"""
self.arg_input_group.add_argument(
'--input_path', type=str, default=None, help="input data")
self.arg_input_group.add_argument(
'--input_file',
type=str,
default=None,
help="file contain input data")
def check_input_data(self, args):
input_data = []
if args.input_path:
input_data = [args.input_path]
elif args.input_file:
if not os.path.exists(args.input_file):
raise RuntimeError("File %s is not exist." % args.input_file)
else:
input_data = txt_parser.parse(args.input_file, use_strip=True)
return input_data
images_decode = [base64_to_cv2(image) for image in images]
results = self.object_detection(images_decode, **kwargs)
return results
@runnable
def run_cmd(self, argvs):
"""
Run as a command.
"""
self.parser = argparse.ArgumentParser(
description="Run the {}".format(self.name),
prog="hub run {}".format(self.name),
description="Run the {} module.".format(self.name),
prog='hub run {}'.format(self.name),
usage='%(prog)s',
add_help=True)
self.arg_input_group = self.parser.add_argument_group(
......@@ -262,17 +264,50 @@ class SSDMobileNetv1(hub.Module):
description=
"Run configuration for controlling module behavior, not required.")
self.add_module_config_arg()
self.add_module_input_arg()
args = self.parser.parse_args(argvs)
input_data = self.check_input_data(args)
if len(input_data) == 0:
self.parser.print_help()
exit(1)
else:
for image_path in input_data:
if not os.path.exists(image_path):
raise RuntimeError(
"File %s or %s is not exist." % image_path)
return self.object_detection(
paths=input_data, use_gpu=args.use_gpu, batch_size=args.batch_size)
results = self.face_detection(
paths=[args.input_path],
batch_size=args.batch_size,
use_gpu=args.use_gpu,
output_dir=args.output_dir,
visualization=args.visualization,
score_thresh=args.score_thresh)
return results
def add_module_config_arg(self):
"""
Add the command config options.
"""
self.arg_config_group.add_argument(
'--use_gpu',
type=ast.literal_eval,
default=False,
help="whether use GPU or not")
self.arg_config_group.add_argument(
'--output_dir',
type=str,
default='detection_result',
help="The directory to save output images.")
self.arg_config_group.add_argument(
'--visualization',
type=ast.literal_eval,
default=False,
help="whether to save output as images.")
def add_module_input_arg(self):
"""
Add the command input options.
"""
self.arg_input_group.add_argument(
'--input_path', type=str, help="path to image.")
self.arg_input_group.add_argument(
'--batch_size',
type=ast.literal_eval,
default=1,
help="batch size.")
self.arg_input_group.add_argument(
'--score_thresh',
type=ast.literal_eval,
default=0.5,
help="threshold for object detecion.")
# coding=utf-8
import base64
import os
import cv2
import numpy as np
from PIL import Image, ImageDraw
__all__ = ['base64_to_cv2', 'load_label_info', 'postprocess']
def base64_to_cv2(b64str):
data = base64.b64decode(b64str.encode('utf8'))
data = np.fromstring(data, np.uint8)
data = cv2.imdecode(data, cv2.IMREAD_COLOR)
return data
def get_save_image_name(img, output_dir, image_path):
"""
Get save image name from source image path.
"""
if not os.path.exists(output_dir):
os.makedirs(output_dir)
image_name = os.path.split(image_path)[-1]
name, ext = os.path.splitext(image_name)
if img.format == 'PNG':
ext = '.png'
elif img.format == 'JPEG':
ext = '.jpg'
elif img.format == 'BMP':
ext = '.bmp'
else:
if img.mode == "RGB" or img.mode == "L":
ext = ".jpg"
elif img.mode == "RGBA" or img.mode == "P":
ext = '.png'
return os.path.join(output_dir, "{}".format(name)) + ext
def draw_bounding_box_on_image(image_path, data_list, save_dir):
image = Image.open(image_path)
draw = ImageDraw.Draw(image)
for data in data_list:
left, right, top, bottom = data['left'], data['right'], data[
'top'], data['bottom']
# draw bbox
draw.line([(left, top), (left, bottom), (right, bottom), (right, top),
(left, top)],
width=2,
fill='red')
# draw label
if image.mode == 'RGB':
text = data['label'] + ": %.2f%%" % (100 * data['confidence'])
textsize_width, textsize_height = draw.textsize(text=text)
draw.rectangle(
xy=(left, top - (textsize_height + 5),
left + textsize_width + 10, top),
fill=(255, 255, 255))
draw.text(xy=(left, top - 15), text=text, fill=(0, 0, 0))
save_name = get_save_image_name(image, save_dir, image_path)
if os.path.exists(save_name):
os.remove(save_name)
image.save(save_name)
return save_name
def clip_bbox(bbox, img_width, img_height):
xmin = max(min(bbox[0], img_width), 0.)
ymin = max(min(bbox[1], img_height), 0.)
xmax = max(min(bbox[2], img_width), 0.)
ymax = max(min(bbox[3], img_height), 0.)
return xmin, ymin, xmax, ymax
def load_label_info(file_path):
with open(file_path, 'r') as fr:
text = fr.readlines()
label_names = []
for info in text:
label_names.append(info.strip())
return label_names
def postprocess(paths,
images,
data_out,
score_thresh,
label_names,
output_dir,
handle_id,
visualization=True):
"""
postprocess the lod_tensor produced by fluid.Executor.run
Args:
paths (list[str]): the path of images.
images (list(numpy.ndarray)): list of images, shape of each is [H, W, C].
data_out (lod_tensor): data produced by executor.run.
score_thresh (float): the low limit of bounding box.
label_names (list[str]): label names.
output_dir (str): output directory.
handle_id (int): The number of images that have been handled.
visualization (bool): whether to save as images.
Returns:
res (list[dict]): The result of vehicles detecion. keys include 'data', 'save_path', the corresponding value is:
data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is:
left (float): The X coordinate of the upper left corner of the bounding box;
top (float): The Y coordinate of the upper left corner of the bounding box;
right (float): The X coordinate of the lower right corner of the bounding box;
bottom (float): The Y coordinate of the lower right corner of the bounding box;
label (str): The label of detection result;
confidence (float): The confidence of detection result.
save_path (str): The path to save output images.
"""
lod_tensor = data_out[0]
lod = lod_tensor.lod[0]
results = lod_tensor.as_ndarray()
if handle_id < len(paths):
unhandled_paths = paths[handle_id:]
unhandled_paths_num = len(unhandled_paths)
else:
unhandled_paths_num = 0
output = []
for index in range(len(lod) - 1):
output_i = {'data': []}
if index < unhandled_paths_num:
org_img_path = unhandled_paths[index]
org_img = Image.open(org_img_path)
output_i['path'] = org_img_path
else:
org_img = images[index - unhandled_paths_num]
org_img = org_img.astype(np.uint8)
org_img = Image.fromarray(org_img[:, :, ::-1])
if visualization:
org_img_path = get_save_image_name(
org_img, output_dir, 'image_numpy_{}'.format(
(handle_id + index)))
org_img.save(org_img_path)
org_img_height = org_img.height
org_img_width = org_img.width
result_i = results[lod[index]:lod[index + 1]]
for row in result_i:
if len(row) != 6:
continue
if row[1] < score_thresh:
continue
category_id = int(row[0])
confidence = row[1]
bbox = row[2:]
bbox[0] = bbox[0] * org_img_width
bbox[1] = bbox[1] * org_img_height
bbox[2] = bbox[2] * org_img_width
bbox[3] = bbox[3] * org_img_height
dt = {}
dt['label'] = label_names[category_id]
dt['confidence'] = confidence
dt['left'], dt['top'], dt['right'], dt['bottom'] = clip_bbox(
bbox, org_img_width, org_img_height)
output_i['data'].append(dt)
output.append(output_i)
if visualization:
output_i['save_path'] = draw_bounding_box_on_image(
org_img_path, output_i['data'], output_dir)
return output
```shell
$ hub install ssd_vgg16_300_coco2017==1.0.0
```
## 命令行预测
```
hub run ssd_vgg16_300_coco2017 --input_path "/PATH/TO/IMAGE"
```
## API
```
def context(trainable=True,
pretrained=True,
get_prediction=False)
```
特征提取,用于迁移学习。
**参数**
* trainable(bool): 设置参数的 trainable 属性;
* pretrained (bool): 是否加载预训练模型;
* get\_prediction (bool): 是否执行预测。
**返回**
* inputs (dict): 模型的输入,keys 包括 'image', 'im\_size',相应的取值为:
* image (Variable): 图像变量
* im\_size (Variable): 图片的尺寸
* outputs (dict): 模型的输出。如果 get\_prediction 为 False,输出 'head\_fatures',否则输出 'bbox\_out'。
* context\_prog (Program): 用于迁移学习的 Program.
```python
def object_detection(paths=None,
images=None,
batch_size=1,
use_gpu=False,
output_dir='detection_result',
score_thresh=0.5,
visualization=True)
```
预测API,检测输入图片中的所有目标的位置。
**参数**
* paths (list\[str\]): 图片的路径;
* images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\],BGR格式;
* batch\_size (int): batch 的大小;
* use\_gpu (bool): 是否使用 GPU;
* score\_thresh (float): 识别置信度的阈值;
* visualization (bool): 是否将识别结果保存为图片文件;
* output\_dir (str): 图片的保存路径,默认设为 detection\_result;
**返回**
* res (list\[dict\]): 识别结果的列表,列表中每一个元素为 dict,各字段为:
* data (list): 检测结果,list的每一个元素为 dict,各字段为:
* confidence (float): 识别的置信度;
* label (str): 标签;
* left (int): 边界框的左上角x坐标;
* top (int): 边界框的左上角y坐标;
* right (int): 边界框的右下角x坐标;
* bottom (int): 边界框的右下角y坐标;
* save\_path (str, optional): 识别结果的保存路径 (仅当visualization=True时存在)。
```python
def save_inference_model(dirname,
model_filename=None,
params_filename=None,
combined=True)
```
将模型保存到指定路径。
**参数**
* dirname: 存在模型的目录名称
* model\_filename: 模型文件名称,默认为\_\_model\_\_
* params\_filename: 参数文件名称,默认为\_\_params\_\_(仅当`combined`为True时生效)
* combined: 是否将参数保存到统一的一个文件中
## 代码示例
```python
import paddlehub as hub
import cv2
object_detector = hub.Module(name="ssd_vgg16_300_coco2017")
result = object_detector.object_detection(images=[cv2.imread('/PATH/TO/IMAGE')])
# or
# result = object_detector.object_detection((paths=['/PATH/TO/IMAGE'])
```
## 服务部署
PaddleHub Serving可以部署一个目标检测的在线服务。
## 第一步:启动PaddleHub Serving
运行启动命令:
```shell
$ hub serving start -m ssd_vgg16_300_coco2017
```
这样就完成了一个目标检测的服务化API的部署,默认端口号为8866。
**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。
## 第二步:发送预测请求
配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果
```python
import requests
import json
import cv2
import base64
def cv2_to_base64(image):
data = cv2.imencode('.jpg', image)[1]
return base64.b64encode(data.tostring()).decode('utf8')
# 发送HTTP请求
data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]}
headers = {"Content-type": "application/json"}
url = "http://127.0.0.1:8866/predict/ssd_vgg16_300_coco2017"
r = requests.post(url=url, headers=headers, data=json.dumps(data))
# 打印预测结果
print(r.json()["results"])
```
### 依赖
paddlepaddle >= 1.6.2
paddlehub >= 1.6.0
# coding=utf-8
from __future__ import absolute_import
from __future__ import print_function
from __future__ import division
import os
import random
from collections import OrderedDict
import cv2
import numpy as np
from PIL import Image
from paddle import fluid
__all__ = ['reader']
class DecodeImage(object):
def __init__(self, to_rgb=True, with_mixup=False):
""" Transform the image data to numpy format.
Args:
to_rgb (bool): whether to convert BGR to RGB
with_mixup (bool): whether or not to mixup image and gt_bbbox/gt_score
"""
self.to_rgb = to_rgb
self.with_mixup = with_mixup
def __call__(self, im):
if self.to_rgb:
im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
return im
class ResizeImage(object):
def __init__(self,
target_size=0,
max_size=0,
interp=cv2.INTER_LINEAR,
use_cv2=True):
"""
Rescale image to the specified target size, and capped at max_size
if max_size != 0.
If target_size is list, selected a scale randomly as the specified
target size.
Args:
target_size (int|list): the target size of image's short side,
multi-scale training is adopted when type is list.
max_size (int): the max size of image
interp (int): the interpolation method
use_cv2 (bool): use the cv2 interpolation method or use PIL
interpolation method
"""
self.max_size = int(max_size)
self.interp = int(interp)
self.use_cv2 = use_cv2
self.target_size = target_size
def __call__(self, im):
if not isinstance(im, np.ndarray):
raise TypeError("{}: image type is not numpy.".format(self))
if len(im.shape) != 3:
raise ValueError('{}: image is not 3-dimensional.'.format(self))
im_shape = im.shape
im_size_min = np.min(im_shape[0:2])
im_size_max = np.max(im_shape[0:2])
if isinstance(self.target_size, list):
# Case for multi-scale training
selected_size = random.choice(self.target_size)
else:
selected_size = self.target_size
if float(im_size_min) == 0:
raise ZeroDivisionError('{}: min size of image is 0'.format(self))
if self.max_size != 0:
im_scale = float(selected_size) / float(im_size_min)
# Prevent the biggest axis from being more than max_size
if np.round(im_scale * im_size_max) > self.max_size:
im_scale = float(self.max_size) / float(im_size_max)
im_scale_x = im_scale
im_scale_y = im_scale
resize_w = im_scale_x * float(im_shape[1])
resize_h = im_scale_y * float(im_shape[0])
im_info = [resize_h, resize_w, im_scale]
else:
im_scale_x = float(selected_size) / float(im_shape[1])
im_scale_y = float(selected_size) / float(im_shape[0])
resize_w = selected_size
resize_h = selected_size
if self.use_cv2:
im = cv2.resize(
im,
None,
None,
fx=im_scale_x,
fy=im_scale_y,
interpolation=self.interp)
else:
if self.max_size != 0:
raise TypeError(
'If you set max_size to cap the maximum size of image,'
'please set use_cv2 to True to resize the image.')
im = im.astype('uint8')
im = Image.fromarray(im)
im = im.resize((int(resize_w), int(resize_h)), self.interp)
im = np.array(im)
return im
class NormalizeImage(object):
def __init__(self,
mean=[0.485, 0.456, 0.406],
std=[1, 1, 1],
is_scale=True,
is_channel_first=True):
"""
Args:
mean (list): the pixel mean
std (list): the pixel variance
"""
self.mean = mean
self.std = std
self.is_scale = is_scale
self.is_channel_first = is_channel_first
def __call__(self, im):
"""Normalize the image.
Operators:
1.(optional) Scale the image to [0,1]
2. Each pixel minus mean and is divided by std
"""
im = im.astype(np.float32, copy=False)
if self.is_channel_first:
mean = np.array(self.mean)[:, np.newaxis, np.newaxis]
std = np.array(self.std)[:, np.newaxis, np.newaxis]
else:
mean = np.array(self.mean)[np.newaxis, np.newaxis, :]
std = np.array(self.std)[np.newaxis, np.newaxis, :]
if self.is_scale:
im = im / 255.0
im -= mean
im /= std
return im
class Permute(object):
def __init__(self, to_bgr=True, channel_first=True):
"""
Change the channel.
Args:
to_bgr (bool): confirm whether to convert RGB to BGR
channel_first (bool): confirm whether to change channel
"""
self.to_bgr = to_bgr
self.channel_first = channel_first
def __call__(self, im):
if self.channel_first:
im = np.swapaxes(im, 1, 2)
im = np.swapaxes(im, 1, 0)
if self.to_bgr:
im = im[[2, 1, 0], :, :]
return im
def reader(paths=[],
images=None,
decode_image=DecodeImage(to_rgb=True, with_mixup=False),
resize_image=ResizeImage(
target_size=512, interp=1, max_size=0, use_cv2=False),
permute_image=Permute(to_bgr=False),
normalize_image=NormalizeImage(
mean=[104, 117, 123], std=[1, 1, 1], is_scale=False)):
"""
data generator
Args:
paths (list[str]): paths to images.
images (list(numpy.ndarray)): data of images, shape of each is [H, W, C]
decode_image (class object): instance of <class 'DecodeImage' object>
resize_image (class object): instance of <class 'ResizeImage' object>
permute_image (class object): instance of <class 'Permute' object>
normalize_image (class object): instance of <class 'NormalizeImage' object>
"""
img_list = []
if paths is not None:
assert type(paths) is list, "type(paths) is not list."
for img_path in paths:
assert os.path.isfile(
img_path), "The {} isn't a valid file path.".format(img_path)
img = cv2.imread(img_path).astype('float32')
img_list.append(img)
if images is not None:
for img in images:
img_list.append(img)
resize_image = ResizeImage(
target_size=300, interp=1, max_size=0, use_cv2=False)
for img in img_list:
preprocessed_img = decode_image(img)
preprocessed_img = resize_image(preprocessed_img)
preprocessed_img = permute_image(preprocessed_img)
preprocessed_img = normalize_image(preprocessed_img)
yield [preprocessed_img]
# coding=utf-8
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import ast
import argparse
import os
from functools import partial
import yaml
import numpy as np
import paddle.fluid as fluid
import paddlehub as hub
from paddlehub.module.module import moduleinfo, runnable
from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor
from paddlehub.io.parser import txt_parser
import yaml
from paddlehub.module.module import moduleinfo, runnable, serving
from paddlehub.common.paddle_helper import add_vars_prefix
from ssd_vgg16_300_coco2017.vgg import VGG
from ssd_vgg16_300_coco2017.processor import load_label_info, postprocess, base64_to_cv2
from ssd_vgg16_300_coco2017.data_feed import reader
@moduleinfo(
......@@ -25,28 +25,22 @@ from ssd_vgg16_300_coco2017.vgg import VGG
type="cv/object_detection",
summary="SSD with backbone VGG16, trained with dataset COCO.",
author="paddlepaddle",
author_email="paddle-dev@baidu.com")
author_email="")
class SSDVGG16(hub.Module):
def _initialize(self):
self.ssd = hub.Module(name="ssd")
# default pretrained model of SSD_VGG16, the shape of image tensor is (3, 300, 300)
self.default_pretrained_model_path = os.path.join(
self.directory, "ssd_vgg16_300_model")
self.label_names = self.ssd.load_label_info(
self.label_names = load_label_info(
os.path.join(self.directory, "label_file.txt"))
self.infer_prog = None
self.image = None
self.bbox_out = None
self.model_config = None
self._set_config()
self._config = None
def _set_config(self):
"""
predictor config setting
"""
# predictor config setting.
cpu_config = AnalysisConfig(self.default_pretrained_model_path)
cpu_config.disable_glog_info()
cpu_config.disable_gpu()
cpu_config.switch_ir_optim(False)
self.cpu_predictor = create_paddle_predictor(cpu_config)
try:
......@@ -61,25 +55,31 @@ class SSDVGG16(hub.Module):
gpu_config.enable_use_gpu(memory_pool_init_size_mb=500, device_id=0)
self.gpu_predictor = create_paddle_predictor(gpu_config)
def context(self,
num_classes=81,
trainable=True,
pretrained=True,
get_prediction=False):
"""Distill the Head Features, so as to perform transfer learning.
# model config setting.
if not self.model_config:
with open(os.path.join(self.directory, 'config.yml')) as fp:
self.model_config = yaml.load(fp.read(), Loader=yaml.FullLoader)
self.multi_box_head_config = self.model_config['MultiBoxHead']
self.output_decoder_config = self.model_config['SSDOutputDecoder']
:param trainable: whether to set parameters trainable.
:type trainable: bool
:param pretrained: whether to load default pretrained model.
:type pretrained: bool
:param get_prediction: whether to get prediction,
if True, outputs is {'bbox_out': bbox_out},
if False, outputs is {'head_features': head_features}.
:type get_prediction: bool
def context(self, trainable=True, pretrained=True, get_prediction=False):
"""
wrapped_prog = fluid.Program()
Distill the Head Features, so as to perform transfer learning.
Args:
trainable (bool): whether to set parameters trainable.
pretrained (bool): whether to load default pretrained model.
get_prediction (bool): whether to get prediction.
Returns:
inputs(dict): the input variables.
outputs(dict): the output variables.
context_prog (Program): the program to execute transfer learning.
"""
context_prog = fluid.Program()
startup_program = fluid.Program()
with fluid.program_guard(wrapped_prog, startup_program):
with fluid.program_guard(context_prog, startup_program):
with fluid.unique_name.guard():
# image
image = fluid.layers.data(
......@@ -89,21 +89,60 @@ class SSDVGG16(hub.Module):
depth=16,
with_extra_blocks=True,
normalizations=[20., -1, -1, -1, -1, -1])
# body_feats
body_feats = backbone(image)
# call ssd.context
inputs, outputs, context_prog = self.ssd.context(
body_feats=body_feats,
multi_box_head=self.ssd.MultiBoxHead(
num_classes=num_classes, **self.multi_box_head_config),
ssd_output_decoder=self.ssd.SSDOutputDecoder(
**self.output_decoder_config),
image=image,
trainable=trainable,
var_prefix='@HUB_{}@'.format(self.name),
get_prediction=get_prediction)
# im_size
im_size = fluid.layers.data(
name='im_size', shape=[2], dtype='int32')
# var_prefix
var_prefix = '@HUB_{}@'.format(self.name)
# names of inputs
inputs = {
'image': var_prefix + image.name,
'im_size': var_prefix + im_size.name
}
# names of outputs
if get_prediction:
locs, confs, box, box_var = fluid.layers.multi_box_head(
inputs=body_feats,
image=image,
num_classes=81,
**self.multi_box_head_config)
pred = fluid.layers.detection_output(
loc=locs,
scores=confs,
prior_box=box,
prior_box_var=box_var,
**self.output_decoder_config)
outputs = {'bbox_out': [var_prefix + pred.name]}
else:
outputs = {
'body_features':
[var_prefix + var.name for var in body_feats]
}
# add_vars_prefix
add_vars_prefix(context_prog, var_prefix)
add_vars_prefix(fluid.default_startup_program(), var_prefix)
# inputs
inputs = {
key: context_prog.global_block().vars[value]
for key, value in inputs.items()
}
outputs = {
out_key: [
context_prog.global_block().vars[varname]
for varname in out_value
]
for out_key, out_value in outputs.items()
}
# trainable
for param in context_prog.global_block().iter_parameters():
param.trainable = trainable
place = fluid.CPUPlace()
exe = fluid.Executor(place)
# pretrained
if pretrained:
def _if_exist(var):
......@@ -117,67 +156,52 @@ class SSDVGG16(hub.Module):
predicate=_if_exist)
else:
exe.run(startup_program)
return inputs, outputs, context_prog
@property
def config(self):
if not self._config:
with open(os.path.join(self.directory, 'config.yml')) as file:
self._config = yaml.load(file.read(), Loader=yaml.FullLoader)
return self._config
@property
def multi_box_head_config(self):
return self.config['MultiBoxHead']
@property
def output_decoder_config(self):
return self.config['SSDOutputDecoder']
return inputs, outputs, context_prog
def object_detection(self,
paths=None,
images=None,
use_gpu=False,
batch_size=1,
use_gpu=False,
output_dir='detection_result',
score_thresh=0.5,
visualization=True):
"""API of Object Detection.
:param paths: the path of images.
:type paths: list, each element is correspond to the path of an image.
:param images: data of images, [N, H, W, C]
:type images: numpy.ndarray
:param use_gpu: whether to use gpu or not.
:type use_gpu: bool
:param batch_size: bathc size.
:type batch_size: int
:param output_dir: the directory to store the detection result.
:type output_dir: str
:param score_thresh: the threshold of detection confidence.
:type score_thresh: float
:param visualization: whether to draw bounding box and save images.
:type visualization: bool
Args:
paths (list[str]): The paths of images.
images (list(numpy.ndarray)): images data, shape of each is [H, W, C]
batch_size (int): batch size.
use_gpu (bool): Whether to use gpu.
output_dir (str): The path to store output images.
visualization (bool): Whether to save image or not.
score_thresh (float): threshold for object detecion.
Returns:
res (list[dict]): The result of coco2017 detecion. keys include 'data', 'save_path', the corresponding value is:
data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is:
left (float): The X coordinate of the upper left corner of the bounding box;
top (float): The Y coordinate of the upper left corner of the bounding box;
right (float): The X coordinate of the lower right corner of the bounding box;
bottom (float): The Y coordinate of the lower right corner of the bounding box;
label (str): The label of detection result;
confidence (float): The confidence of detection result.
save_path (str, optional): The path to save output images.
"""
resize_image = self.ssd.ResizeImage(
target_size=300, interp=1, max_size=0, use_cv2=False)
data_reader = partial(
self.ssd.reader, paths, images, resize_image=resize_image)
paths = paths if paths else list()
data_reader = partial(reader, paths, images)
batch_reader = fluid.io.batch(data_reader, batch_size=batch_size)
paths = paths if paths else []
res = []
for iter_id, feed_data in enumerate(batch_reader()):
np_data = np.array(feed_data).astype('float32')
if np_data.shape == 1:
np_data = np_data[0]
else:
np_data = np.squeeze(np_data, axis=1)
data_tensor = PaddleTensor(np_data.copy())
feed_data = np.array(feed_data)
image_tensor = PaddleTensor(np.array(list(feed_data[:, 0])).copy())
if use_gpu:
data_out = self.gpu_predictor.run([data_tensor])
data_out = self.gpu_predictor.run([image_tensor])
else:
data_out = self.cpu_predictor.run([data_tensor])
output = self.ssd.postprocess(
data_out = self.cpu_predictor.run([image_tensor])
output = postprocess(
paths=paths,
images=images,
data_out=data_out,
......@@ -186,53 +210,49 @@ class SSDVGG16(hub.Module):
output_dir=output_dir,
handle_id=iter_id * batch_size,
visualization=visualization)
res += output
res.extend(output)
return res
def add_module_config_arg(self):
"""
Add the command config options
"""
self.arg_config_group.add_argument(
'--use_gpu',
type=ast.literal_eval,
default=False,
help="whether use GPU or not")
def save_inference_model(self,
dirname,
model_filename=None,
params_filename=None,
combined=True):
if combined:
model_filename = "__model__" if not model_filename else model_filename
params_filename = "__params__" if not params_filename else params_filename
place = fluid.CPUPlace()
exe = fluid.Executor(place)
self.arg_config_group.add_argument(
'--batch_size',
type=int,
default=1,
help="batch size for prediction")
program, feeded_var_names, target_vars = fluid.io.load_inference_model(
dirname=self.default_pretrained_model_path, executor=exe)
def add_module_input_arg(self):
fluid.io.save_inference_model(
dirname=dirname,
main_program=program,
executor=exe,
feeded_var_names=feeded_var_names,
target_vars=target_vars,
model_filename=model_filename,
params_filename=params_filename)
@serving
def serving_method(self, images, **kwargs):
"""
Add the command input options
Run as a service.
"""
self.arg_input_group.add_argument(
'--input_path', type=str, default=None, help="input data")
self.arg_input_group.add_argument(
'--input_file',
type=str,
default=None,
help="file contain input data")
def check_input_data(self, args):
input_data = []
if args.input_path:
input_data = [args.input_path]
elif args.input_file:
if not os.path.exists(args.input_file):
raise RuntimeError("File %s is not exist." % args.input_file)
else:
input_data = txt_parser.parse(args.input_file, use_strip=True)
return input_data
images_decode = [base64_to_cv2(image) for image in images]
results = self.object_detection(images_decode, **kwargs)
return results
@runnable
def run_cmd(self, argvs):
"""
Run as a command.
"""
self.parser = argparse.ArgumentParser(
description="Run the {}".format(self.name),
prog="hub run {}".format(self.name),
description="Run the {} module.".format(self.name),
prog='hub run {}'.format(self.name),
usage='%(prog)s',
add_help=True)
self.arg_input_group = self.parser.add_argument_group(
......@@ -242,18 +262,50 @@ class SSDVGG16(hub.Module):
description=
"Run configuration for controlling module behavior, not required.")
self.add_module_config_arg()
self.add_module_input_arg()
args = self.parser.parse_args(argvs)
input_path = args.input_path
input_data = self.check_input_data(args)
if len(input_data) == 0:
self.parser.print_help()
exit(1)
else:
for image_path in input_data:
if not os.path.exists(image_path):
raise RuntimeError(
"File %s or %s is not exist." % image_path)
return self.object_detection(
paths=input_data, use_gpu=args.use_gpu, batch_size=args.batch_size)
results = self.face_detection(
paths=[args.input_path],
batch_size=args.batch_size,
use_gpu=args.use_gpu,
output_dir=args.output_dir,
visualization=args.visualization,
score_thresh=args.score_thresh)
return results
def add_module_config_arg(self):
"""
Add the command config options.
"""
self.arg_config_group.add_argument(
'--use_gpu',
type=ast.literal_eval,
default=False,
help="whether use GPU or not")
self.arg_config_group.add_argument(
'--output_dir',
type=str,
default='detection_result',
help="The directory to save output images.")
self.arg_config_group.add_argument(
'--visualization',
type=ast.literal_eval,
default=False,
help="whether to save output as images.")
def add_module_input_arg(self):
"""
Add the command input options.
"""
self.arg_input_group.add_argument(
'--input_path', type=str, help="path to image.")
self.arg_input_group.add_argument(
'--batch_size',
type=ast.literal_eval,
default=1,
help="batch size.")
self.arg_input_group.add_argument(
'--score_thresh',
type=ast.literal_eval,
default=0.5,
help="threshold for object detecion.")
# coding=utf-8
import base64
import os
import cv2
import numpy as np
from PIL import Image, ImageDraw
__all__ = ['base64_to_cv2', 'load_label_info', 'postprocess']
def base64_to_cv2(b64str):
data = base64.b64decode(b64str.encode('utf8'))
data = np.fromstring(data, np.uint8)
data = cv2.imdecode(data, cv2.IMREAD_COLOR)
return data
def get_save_image_name(img, output_dir, image_path):
"""
Get save image name from source image path.
"""
if not os.path.exists(output_dir):
os.makedirs(output_dir)
image_name = os.path.split(image_path)[-1]
name, ext = os.path.splitext(image_name)
if img.format == 'PNG':
ext = '.png'
elif img.format == 'JPEG':
ext = '.jpg'
elif img.format == 'BMP':
ext = '.bmp'
else:
if img.mode == "RGB" or img.mode == "L":
ext = ".jpg"
elif img.mode == "RGBA" or img.mode == "P":
ext = '.png'
return os.path.join(output_dir, "{}".format(name)) + ext
def draw_bounding_box_on_image(image_path, data_list, save_dir):
image = Image.open(image_path)
draw = ImageDraw.Draw(image)
for data in data_list:
left, right, top, bottom = data['left'], data['right'], data[
'top'], data['bottom']
# draw bbox
draw.line([(left, top), (left, bottom), (right, bottom), (right, top),
(left, top)],
width=2,
fill='red')
# draw label
if image.mode == 'RGB':
text = data['label'] + ": %.2f%%" % (100 * data['confidence'])
textsize_width, textsize_height = draw.textsize(text=text)
draw.rectangle(
xy=(left, top - (textsize_height + 5),
left + textsize_width + 10, top),
fill=(255, 255, 255))
draw.text(xy=(left, top - 15), text=text, fill=(0, 0, 0))
save_name = get_save_image_name(image, save_dir, image_path)
if os.path.exists(save_name):
os.remove(save_name)
image.save(save_name)
return save_name
def clip_bbox(bbox, img_width, img_height):
xmin = max(min(bbox[0], img_width), 0.)
ymin = max(min(bbox[1], img_height), 0.)
xmax = max(min(bbox[2], img_width), 0.)
ymax = max(min(bbox[3], img_height), 0.)
return xmin, ymin, xmax, ymax
def load_label_info(file_path):
with open(file_path, 'r') as fr:
text = fr.readlines()
label_names = []
for info in text:
label_names.append(info.strip())
return label_names
def postprocess(paths,
images,
data_out,
score_thresh,
label_names,
output_dir,
handle_id,
visualization=True):
"""
postprocess the lod_tensor produced by fluid.Executor.run
Args:
paths (list[str]): the path of images.
images (list(numpy.ndarray)): list of images, shape of each is [H, W, C].
data_out (lod_tensor): data produced by executor.run.
score_thresh (float): the low limit of bounding box.
label_names (list[str]): label names.
output_dir (str): output directory.
handle_id (int): The number of images that have been handled.
visualization (bool): whether to save as images.
Returns:
res (list[dict]): The result of vehicles detecion. keys include 'data', 'save_path', the corresponding value is:
data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is:
left (float): The X coordinate of the upper left corner of the bounding box;
top (float): The Y coordinate of the upper left corner of the bounding box;
right (float): The X coordinate of the lower right corner of the bounding box;
bottom (float): The Y coordinate of the lower right corner of the bounding box;
label (str): The label of detection result;
confidence (float): The confidence of detection result.
save_path (str): The path to save output images.
"""
lod_tensor = data_out[0]
lod = lod_tensor.lod[0]
results = lod_tensor.as_ndarray()
if handle_id < len(paths):
unhandled_paths = paths[handle_id:]
unhandled_paths_num = len(unhandled_paths)
else:
unhandled_paths_num = 0
output = []
for index in range(len(lod) - 1):
output_i = {'data': []}
if index < unhandled_paths_num:
org_img_path = unhandled_paths[index]
org_img = Image.open(org_img_path)
output_i['path'] = org_img_path
else:
org_img = images[index - unhandled_paths_num]
org_img = org_img.astype(np.uint8)
org_img = Image.fromarray(org_img[:, :, ::-1])
if visualization:
org_img_path = get_save_image_name(
org_img, output_dir, 'image_numpy_{}'.format(
(handle_id + index)))
org_img.save(org_img_path)
org_img_height = org_img.height
org_img_width = org_img.width
result_i = results[lod[index]:lod[index + 1]]
for row in result_i:
if len(row) != 6:
continue
if row[1] < score_thresh:
continue
category_id = int(row[0])
confidence = row[1]
bbox = row[2:]
bbox[0] = bbox[0] * org_img_width
bbox[1] = bbox[1] * org_img_height
bbox[2] = bbox[2] * org_img_width
bbox[3] = bbox[3] * org_img_height
dt = {}
dt['label'] = label_names[category_id]
dt['confidence'] = confidence
dt['left'], dt['top'], dt['right'], dt['bottom'] = clip_bbox(
bbox, org_img_width, org_img_height)
output_i['data'].append(dt)
output.append(output_i)
if visualization:
output_i['save_path'] = draw_bounding_box_on_image(
org_img_path, output_i['data'], output_dir)
return output
```shell
$ hub install ssd_vgg16_512_coco2017==1.0.0
```
## 命令行预测
```
hub run ssd_vgg16_512_coco2017 --input_path "/PATH/TO/IMAGE"
```
## API
```
def context(trainable=True,
pretrained=True,
get_prediction=False)
```
特征提取,用于迁移学习。
**参数**
* trainable(bool): 设置参数的 trainable 属性;
* pretrained (bool): 是否加载预训练模型;
* get\_prediction (bool): 是否执行预测。
**返回**
* inputs (dict): 模型的输入,keys 包括 'image', 'im\_size',相应的取值为:
* image (Variable): 图像变量
* im\_size (Variable): 图片的尺寸
* outputs (dict): 模型的输出。如果 get\_prediction 为 False,输出 'head\_fatures',否则输出 'bbox\_out'。
* context\_prog (Program): 用于迁移学习的 Program.
```python
def object_detection(paths=None,
images=None,
batch_size=1,
use_gpu=False,
output_dir='detection_result',
score_thresh=0.5,
visualization=True)
```
预测API,检测输入图片中的所有目标的位置。
**参数**
* paths (list\[str\]): 图片的路径;
* images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\],BGR格式;
* batch\_size (int): batch 的大小;
* use\_gpu (bool): 是否使用 GPU;
* score\_thresh (float): 识别置信度的阈值;
* visualization (bool): 是否将识别结果保存为图片文件;
* output\_dir (str): 图片的保存路径,默认设为 detection\_result;
**返回**
* res (list\[dict\]): 识别结果的列表,列表中每一个元素为 dict,各字段为:
* data (list): 检测结果,list的每一个元素为 dict,各字段为:
* confidence (float): 识别的置信度;
* label (str): 标签;
* left (int): 边界框的左上角x坐标;
* top (int): 边界框的左上角y坐标;
* right (int): 边界框的右下角x坐标;
* bottom (int): 边界框的右下角y坐标;
* save\_path (str, optional): 识别结果的保存路径 (仅当visualization=True时存在)。
```python
def save_inference_model(dirname,
model_filename=None,
params_filename=None,
combined=True)
```
将模型保存到指定路径。
**参数**
* dirname: 存在模型的目录名称
* model\_filename: 模型文件名称,默认为\_\_model\_\_
* params\_filename: 参数文件名称,默认为\_\_params\_\_(仅当`combined`为True时生效)
* combined: 是否将参数保存到统一的一个文件中
## 代码示例
```python
import paddlehub as hub
import cv2
object_detector = hub.Module(name="ssd_vgg16_512_coco2017")
result = object_detector.object_detection(images=[cv2.imread('/PATH/TO/IMAGE')])
# or
# result = object_detector.object_detection((paths=['/PATH/TO/IMAGE'])
```
## 服务部署
PaddleHub Serving可以部署一个目标检测的在线服务。
## 第一步:启动PaddleHub Serving
运行启动命令:
```shell
$ hub serving start -m ssd_vgg16_512_coco2017
```
这样就完成了一个目标检测的服务化API的部署,默认端口号为8866。
**NOTE:** 如使用GPU 预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。
## 第二步:发送预测请求
配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果
```python
import requests
import json
import cv2
import base64
def cv2_to_base64(image):
data = cv2.imencode('.jpg', image)[1]
return base64.b64encode(data.tostring()).decode('utf8')
# 发送HTTP请求
data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]}
headers = {"Content-type": "application/json"}
url = "http://127.0.0.1:8866/predict/ssd_vgg16_512_coco2017"
r = requests.post(url=url, headers=headers, data=json.dumps(data))
# 打印预测结果
print(r.json()["results"])
```
### 依赖
paddlepaddle >= 1.6.2
paddlehub >= 1.6.0
......@@ -9,10 +9,10 @@ from collections import OrderedDict
import cv2
import numpy as np
from PIL import Image, ImageEnhance
from PIL import Image
from paddle import fluid
__all__ = ['reader', 'DecodeImage', 'ResizeImage', 'NormalizeImage', 'Permute']
__all__ = ['reader']
class DecodeImage(object):
......@@ -59,8 +59,6 @@ class ResizeImage(object):
self.target_size = target_size
def __call__(self, im):
""" Resize the image numpy.
"""
if not isinstance(im, np.ndarray):
raise TypeError("{}: image type is not numpy.".format(self))
if len(im.shape) != 3:
......@@ -132,6 +130,7 @@ class NormalizeImage(object):
def __call__(self, im):
"""Normalize the image.
Operators:
1.(optional) Scale the image to [0,1]
2. Each pixel minus mean and is divided by std
......@@ -154,6 +153,7 @@ class Permute(object):
def __init__(self, to_bgr=True, channel_first=True):
"""
Change the channel.
Args:
to_bgr (bool): confirm whether to convert RGB to BGR
channel_first (bool): confirm whether to change channel
......@@ -178,16 +178,16 @@ def reader(paths=[],
permute_image=Permute(to_bgr=False),
normalize_image=NormalizeImage(
mean=[104, 117, 123], std=[1, 1, 1], is_scale=False)):
"""data generator
:param paths: path to images.
:type paths: list, each element is a str
:param images: data of images, [N, H, W, C]
:type images: numpy.ndarray
:param decode_image: instance of <class 'DecodeImage' object>
:param resize_image: instance of <class 'ResizeImage' object>
:param permute_image: instance of <class 'Permute' object>
:param normalize_image: instance of <class 'NormalizeImage' object>
"""
data generator
Args:
paths (list[str]): paths to images.
images (list(numpy.ndarray)): data of images, shape of each is [H, W, C]
decode_image (class object): instance of <class 'DecodeImage' object>
resize_image (class object): instance of <class 'ResizeImage' object>
permute_image (class object): instance of <class 'Permute' object>
normalize_image (class object): instance of <class 'NormalizeImage' object>
"""
img_list = []
if paths is not None:
......
# coding=utf-8
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from collections import OrderedDict
import paddle.fluid as fluid
import paddlehub as hub
from paddlehub.module.module import moduleinfo
from paddlehub.common.paddle_helper import add_vars_prefix
from yolov3.data_feed import reader
from yolov3.processor import load_label_info, postprocess
from yolov3.yolo_head import MultiClassNMS, YOLOv3Head
@moduleinfo(
name="yolov3",
version="1.0.0",
type="cv/object_detection",
summary="Baidu's YOLOv3 model for object detection.",
author="paddlepaddle",
author_email="paddle-dev@baidu.com")
class YOLOv3(hub.Module):
def _initialize(self):
self.reader = reader
self.load_label_info = load_label_info
self.postprocess = postprocess
self.MultiClassNMS = MultiClassNMS
self.YOLOv3Head = YOLOv3Head
def context(self,
body_feats,
yolo_head,
image,
trainable=True,
var_prefix='',
get_prediction=False):
"""
Distill the Head Features, so as to perform transfer learning.
Args:
body_feats (feature maps of backbone): feature maps of backbone.
yolo_head (<class 'YOLOv3Head' object>): yolo_head of YOLOv3
image (Variable): image tensor.
trainable (bool): whether to set parameters trainable.
var_prefix (str): the prefix of variables in yolo_head and backbone.
get_prediction (bool): whether to get prediction or not.
Returns:
inputs(dict): the input variables.
outputs(dict): the output variables.
context_prog (Program): the program to execute transfer learning.
"""
context_prog = image.block.program
with fluid.program_guard(context_prog):
im_size = fluid.layers.data(
name='im_size', shape=[2], dtype='int32')
head_features = yolo_head._get_outputs(
body_feats, is_train=trainable)
inputs = {
'image': var_prefix + image.name,
'im_size': var_prefix + im_size.name
}
if get_prediction:
bbox_out = yolo_head.get_prediction(head_features, im_size)
outputs = {'bbox_out': [var_prefix + bbox_out.name]}
else:
outputs = {
'head_features':
[var_prefix + var.name for var in head_features]
}
place = fluid.CPUPlace()
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
add_vars_prefix(context_prog, var_prefix)
add_vars_prefix(fluid.default_startup_program(), var_prefix)
inputs = {
key: context_prog.global_block().vars[value]
for key, value in inputs.items()
}
outputs = {
key: [
context_prog.global_block().vars[varname]
for varname in value
]
for key, value in outputs.items()
}
for param in context_prog.global_block().iter_parameters():
param.trainable = trainable
return inputs, outputs, context_prog
......@@ -15,6 +15,7 @@ __all__ = ['DarkNet']
class DarkNet(object):
"""DarkNet, see https://pjreddie.com/darknet/yolo/
Args:
depth (int): network depth, currently only darknet 53 is supported
norm_type (str): normalization type, 'bn' and 'sync_bn' are supported
......@@ -120,11 +121,8 @@ class DarkNet(object):
return out
def __call__(self, input):
"""Get the backbone of DarkNet, that is output for the 5 stages.
:param input: Variable of input image
:type input: Variable
:Returns: The last variables of each stage.
"""
Get the backbone of DarkNet, that is output for the 5 stages.
"""
stages, block_func = self.depth_cfg[self.depth]
stages = stages[0:5]
......
......@@ -12,12 +12,15 @@ __all__ = ['reader']
def reader(paths=[], images=None):
"""data generator
"""
data generator
Args:
paths (list[str]): paths to images.
images (list(numpy.ndarray)): data of images, shape of each is [H, W, C]
:param paths: path to images.
:type paths: list, each element is a str
:param images: data of images, [N, H, W, C]
:type images: numpy.ndarray
Yield:
res (list): preprocessed image and the size of original image.
"""
img_list = []
if paths:
......
# coding=utf-8
import base64
import os
import cv2
import numpy as np
from PIL import Image, ImageDraw
__all__ = ['load_label_info', 'postprocess']
__all__ = ['base64_to_cv2', 'load_label_info', 'postprocess']
def base64_to_cv2(b64str):
data = base64.b64decode(b64str.encode('utf8'))
data = np.fromstring(data, np.uint8)
data = cv2.imdecode(data, cv2.IMREAD_COLOR)
return data
def check_dir(dir_path):
......
import base64
import cv2
import numpy as np
def base64_to_cv2(b64str):
data = base64.b64decode(b64str.encode('utf8'))
data = np.fromstring(data, np.uint8)
data = cv2.imdecode(data, cv2.IMREAD_COLOR)
return data
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册