未验证 提交 3acfe6bd 编写于 作者: D David Lin 提交者: GitHub

update 11 object detection modules. (#555)

* update 11 object detection modules.
上级 6a477596
# coding=utf-8
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from collections import OrderedDict
import paddle.fluid as fluid
import paddlehub as hub
from paddlehub.module.module import moduleinfo
from paddlehub.common.paddle_helper import add_vars_prefix
from faster_rcnn.data_feed import test_reader, padding_minibatch
from faster_rcnn.processor import load_label_info, postprocess
from faster_rcnn.bbox_head import MultiClassNMS, BBoxHead, SmoothL1Loss, TwoFCHead
from faster_rcnn.rpn_head import AnchorGenerator, RPNTargetAssign, GenerateProposals, RPNHead, FPNRPNHead
from faster_rcnn.bbox_assigner import BBoxAssigner
from faster_rcnn.roi_extractor import RoIAlign, FPNRoIAlign
@moduleinfo(
name="faster_rcnn",
version="1.0.0",
type="cv/object_detection",
summary="Baidu's Faster R-CNN model for object detection.",
author="paddlepaddle",
author_email="paddle-dev@baidu.com")
class FasterRCNNBase(hub.Module):
def _initialize(self):
# data_feed
self.test_reader = test_reader
self.padding_minibatch = padding_minibatch
# processor
self.load_label_info = load_label_info
self.postprocess = postprocess
# bbox_head
self.MultiClassNMS = MultiClassNMS
self.TwoFCHead = TwoFCHead
self.BBoxHead = BBoxHead
self.SmoothL1Loss = SmoothL1Loss
# rpn_head
self.AnchorGenerator = AnchorGenerator
self.RPNTargetAssign = RPNTargetAssign
self.GenerateProposals = GenerateProposals
self.RPNHead = RPNHead
self.FPNRPNHead = FPNRPNHead
# bbox_assigner
self.BBoxAssigner = BBoxAssigner
# roi_extractor
self.RoIAlign = RoIAlign
self.FPNRoIAlign = FPNRoIAlign
def context(self, body_feats, fpn, rpn_head, roi_extractor, bbox_head,
bbox_assigner, image, trainable, var_prefix, phase):
"""Distill the Head Features, so as to perform transfer learning.
:param body_feats: feature map of image classification to distill feature map.
:type body_feats: list
:param fpn: Feature Pyramid Network.
:type fpn: <class 'FPN' object>
:param rpn_head: Head of Region Proposal Network.
:type rpn_head: <class 'RPNHead' object> or <class 'FPNRPNHead' object>
:param roi_extractor:
:type roi_extractor:
:param bbox_head: Head of Bounding Box.
:type bbox_head: <class 'BBoxHead' object>
:param bbox_assigner: Parameters of fluid.layers.generate_proposal_labels.
:type bbox_assigner: <class 'BBoxAssigner' object>
:param image: image tensor.
:type image: <class 'paddle.fluid.framework.Variable'>
:param trainable: whether to set parameters trainable.
:type trainable: bool
:param var_prefix: the prefix of variables in faster_rcnn
:type var_prefix: str
:param phase: Optional Choice: 'predict', 'train'
:type phase: str
"""
context_prog = image.block.program
with fluid.program_guard(context_prog):
im_info = fluid.layers.data(
name='im_info', shape=[3], dtype='float32', lod_level=0)
im_shape = fluid.layers.data(
name='im_shape', shape=[3], dtype='float32', lod_level=0)
#body_feats = backbone(image)
body_feat_names = list(body_feats.keys())
# fpn
if fpn is not None:
body_feats, spatial_scale = fpn.get_output(body_feats)
# rpn_head: RPNHead
rois = rpn_head.get_proposals(body_feats, im_info, mode=phase)
# train
if phase == 'train':
gt_bbox = fluid.layers.data(
name='gt_bbox', shape=[4], dtype='float32', lod_level=1)
is_crowd = fluid.layers.data(
name='is_crowd', shape=[1], dtype='int32', lod_level=1)
gt_class = fluid.layers.data(
name='gt_class', shape=[1], dtype='int32', lod_level=1)
rpn_loss = rpn_head.get_loss(im_info, gt_bbox, is_crowd)
# bbox_assigner: BBoxAssigner
outs = fluid.layers.generate_proposal_labels(
rpn_rois=rois,
gt_classes=gt_class,
is_crowd=is_crowd,
gt_boxes=gt_bbox,
im_info=im_info,
batch_size_per_im=bbox_assigner.batch_size_per_im,
fg_fraction=bbox_assigner.fg_fraction,
fg_thresh=bbox_assigner.fg_thresh,
bg_thresh_hi=bbox_assigner.bg_thresh_hi,
bg_thresh_lo=bbox_assigner.bg_thresh_lo,
bbox_reg_weights=bbox_assigner.bbox_reg_weights,
class_nums=bbox_assigner.class_nums,
use_random=bbox_assigner.use_random)
rois = outs[0]
if fpn is None:
body_feat = body_feats[body_feat_names[-1]]
# roi_extractor: RoIAlign
roi_feat = fluid.layers.roi_align(
input=body_feat,
rois=rois,
pooled_height=roi_extractor.pooled_height,
pooled_width=roi_extractor.pooled_width,
spatial_scale=roi_extractor.spatial_scale,
sampling_ratio=roi_extractor.sampling_ratio)
else:
# roi_extractor: FPNRoIAlign
roi_feat = roi_extractor(
head_inputs=body_feats,
rois=rois,
spatial_scale=spatial_scale)
# head_feat
head_feat = bbox_head.head(roi_feat)
if isinstance(head_feat, OrderedDict):
head_feat = list(head_feat.values())[0]
if phase == 'train':
inputs = {
'image': var_prefix + image.name,
'im_info': var_prefix + im_info.name,
'im_shape': var_prefix + im_shape.name,
'gt_class': var_prefix + gt_class.name,
'gt_bbox': var_prefix + gt_bbox.name,
'is_crowd': var_prefix + is_crowd.name
}
outputs = {
'head_feat':
var_prefix + head_feat.name,
'rpn_cls_loss':
var_prefix + rpn_loss['rpn_cls_loss'].name,
'rpn_reg_loss':
var_prefix + rpn_loss['rpn_reg_loss'].name,
'generate_proposal_labels':
[var_prefix + var.name for var in outs]
}
elif phase == 'predict':
pred = bbox_head.get_prediction(roi_feat, rois, im_info,
im_shape)
inputs = {
'image': var_prefix + image.name,
'im_info': var_prefix + im_info.name,
'im_shape': var_prefix + im_shape.name
}
outputs = {
'head_feat': var_prefix + head_feat.name,
'rois': var_prefix + rois.name,
'bbox_out': var_prefix + pred.name
}
add_vars_prefix(context_prog, var_prefix)
add_vars_prefix(fluid.default_startup_program(), var_prefix)
global_vars = context_prog.global_block().vars
inputs = {key: global_vars[value] for key, value in inputs.items()}
outputs = {
key: global_vars[value] if not isinstance(value, list) else
[global_vars[var] for var in value]
for key, value in outputs.items()
}
place = fluid.CPUPlace()
exe = fluid.Executor(place)
for param in context_prog.global_block().iter_parameters():
param.trainable = trainable
return inputs, outputs, context_prog
```shell
$ hub install faster_rcnn_resnet50_coco2017==1.1.0
```
## 命令行预测
```
hub run faster_rcnn_resnet50_coco2017 --input_path "/PATH/TO/IMAGE"
```
## API
```
def context(num_classes=81,
trainable=True,
pretrained=True,
phase='train')
```
提取头部特征,用于迁移学习。
**参数**
* num\_classes (int): 类别数;
* trainable(bool): 将参数的trainable 属性设为trainable;
* pretrained (bool): 是否加载预训练模型;
* phase (str): 可选值为 'train'/'predict','trian' 用于训练,'predict' 用于预测。
**返回**
* inputs (dict): 模型的输入,keys 包括 'image', 'im\_size',相应的取值为:
* image (Variable): 图像变量
* im\_size (Variable): 图片的尺寸
* outputs (dict): 模型的输出。如果 get\_prediction 为 False,输出 'head\_fatures',否则输出 'bbox\_out'。
* context\_prog (Program): 用于迁移学习的 Program。
```python
def object_detection(paths=None,
images=None,
batch_size=1,
use_gpu=False,
output_dir='detection_result',
score_thresh=0.5,
visualization=True)
```
预测API,检测输入图片中的所有目标的位置。
**参数**
* paths (list\[str\]): 图片的路径;
* images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\],BGR格式;
* batch\_size (int): batch 的大小;
* use\_gpu (bool): 是否使用 GPU;
* score\_thresh (float): 识别置信度的阈值;
* visualization (bool): 是否将识别结果保存为图片文件;
* output\_dir (str): 图片的保存路径,默认设为 detection\_result;
**返回**
* res (list\[dict\]): 识别结果的列表,列表中每一个元素为 dict,各字段为:
* data (list): 检测结果,list的每一个元素为 dict,各字段为:
* confidence (float): 识别的置信度;
* label (str): 标签;
* left (int): 边界框的左上角x坐标;
* top (int): 边界框的左上角y坐标;
* right (int): 边界框的右下角x坐标;
* bottom (int): 边界框的右下角y坐标;
* save\_path (str, optional): 识别结果的保存路径 (仅当visualization=True时存在)。
```python
def save_inference_model(dirname,
model_filename=None,
params_filename=None,
combined=True)
```
将模型保存到指定路径。
**参数**
* dirname: 存在模型的目录名称
* model\_filename: 模型文件名称,默认为\_\_model\_\_
* params\_filename: 参数文件名称,默认为\_\_params\_\_(仅当`combined`为True时生效)
* combined: 是否将参数保存到统一的一个文件中
## 代码示例
```python
import paddlehub as hub
import cv2
object_detector = hub.Module(name="faster_rcnn_resnet50_coco2017")
result = object_detector.object_detection(images=[cv2.imread('/PATH/TO/IMAGE')])
# or
# result = object_detector.object_detection((paths=['/PATH/TO/IMAGE'])
```
## 服务部署
PaddleHub Serving 可以部署一个目标检测的在线服务。
## 第一步:启动PaddleHub Serving
运行启动命令:
```shell
$ hub serving start -m faster_rcnn_resnet50_coco2017
```
这样就完成了一个目标检测的服务化API的部署,默认端口号为8866。
**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。
## 第二步:发送预测请求
配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果
```python
import requests
import json
import cv2
import base64
def cv2_to_base64(image):
data = cv2.imencode('.jpg', image)[1]
return base64.b64encode(data.tostring()).decode('utf8')
# 发送HTTP请求
data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]}
headers = {"Content-type": "application/json"}
url = "http://127.0.0.1:8866/predict/faster_rcnn_resnet50_coco2017"
r = requests.post(url=url, headers=headers, data=json.dumps(data))
# 打印预测结果
print(r.json()["results"])
```
### 依赖
paddlepaddle >= 1.6.2
paddlehub >= 1.6.0
class BBoxAssigner(object):
def __init__(self,
batch_size_per_im=512,
fg_fraction=.25,
fg_thresh=.5,
bg_thresh_hi=.5,
bg_thresh_lo=0.,
bbox_reg_weights=[0.1, 0.1, 0.2, 0.2],
class_nums=81,
shuffle_before_sample=True):
super(BBoxAssigner, self).__init__()
self.batch_size_per_im = batch_size_per_im
self.fg_fraction = fg_fraction
self.fg_thresh = fg_thresh
self.bg_thresh_hi = bg_thresh_hi
self.bg_thresh_lo = bg_thresh_lo
self.bbox_reg_weights = bbox_reg_weights
self.class_nums = class_nums
self.use_random = shuffle_before_sample
# coding=utf-8
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from collections import OrderedDict
from paddle import fluid
from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.initializer import Normal, Xavier
from paddle.fluid.regularizer import L2Decay
from paddle.fluid.initializer import MSRA
class MultiClassNMS(object):
# __op__ = fluid.layers.multiclass_nms
def __init__(self,
score_threshold=.05,
nms_top_k=-1,
keep_top_k=100,
nms_threshold=.5,
normalized=False,
nms_eta=1.0,
background_label=0):
super(MultiClassNMS, self).__init__()
self.score_threshold = score_threshold
self.nms_top_k = nms_top_k
self.keep_top_k = keep_top_k
self.nms_threshold = nms_threshold
self.normalized = normalized
self.nms_eta = nms_eta
self.background_label = background_label
class SmoothL1Loss(object):
'''
Smooth L1 loss
Args:
sigma (float): hyper param in smooth l1 loss
'''
def __init__(self, sigma=1.0):
super(SmoothL1Loss, self).__init__()
self.sigma = sigma
def __call__(self, x, y, inside_weight=None, outside_weight=None):
return fluid.layers.smooth_l1(
x,
y,
inside_weight=inside_weight,
outside_weight=outside_weight,
sigma=self.sigma)
class BoxCoder(object):
def __init__(self,
prior_box_var=[0.1, 0.1, 0.2, 0.2],
code_type='decode_center_size',
box_normalized=False,
axis=1):
super(BoxCoder, self).__init__()
self.prior_box_var = prior_box_var
self.code_type = code_type
self.box_normalized = box_normalized
self.axis = axis
class TwoFCHead(object):
"""
RCNN head with two Fully Connected layers
Args:
mlp_dim (int): num of filters for the fc layers
"""
def __init__(self, mlp_dim=1024):
super(TwoFCHead, self).__init__()
self.mlp_dim = mlp_dim
def __call__(self, roi_feat):
fan = roi_feat.shape[1] * roi_feat.shape[2] * roi_feat.shape[3]
fc6 = fluid.layers.fc(
input=roi_feat,
size=self.mlp_dim,
act='relu',
name='fc6',
param_attr=ParamAttr(name='fc6_w', initializer=Xavier(fan_out=fan)),
bias_attr=ParamAttr(
name='fc6_b', learning_rate=2., regularizer=L2Decay(0.)))
head_feat = fluid.layers.fc(
input=fc6,
size=self.mlp_dim,
act='relu',
name='fc7',
param_attr=ParamAttr(name='fc7_w', initializer=Xavier()),
bias_attr=ParamAttr(
name='fc7_b', learning_rate=2., regularizer=L2Decay(0.)))
return head_feat
class BBoxHead(object):
"""
RCNN bbox head
Args:
head (object): the head module instance, e.g., `ResNetC5`, `TwoFCHead`
box_coder (object): `BoxCoder` instance
nms (object): `MultiClassNMS` instance
num_classes: number of output classes
"""
__inject__ = ['head', 'box_coder', 'nms', 'bbox_loss']
__shared__ = ['num_classes']
def __init__(self,
head,
box_coder=BoxCoder(),
nms=MultiClassNMS(),
bbox_loss=SmoothL1Loss(),
num_classes=81):
super(BBoxHead, self).__init__()
self.head = head
self.num_classes = num_classes
self.box_coder = box_coder
self.nms = nms
self.bbox_loss = bbox_loss
self.head_feat = None
def get_head_feat(self, input=None):
"""
Get the bbox head feature map.
"""
if input is not None:
feat = self.head(input)
if isinstance(feat, OrderedDict):
feat = list(feat.values())[0]
self.head_feat = feat
return self.head_feat
def _get_output(self, roi_feat):
"""
Get bbox head output.
Args:
roi_feat (Variable): RoI feature from RoIExtractor.
Returns:
cls_score(Variable): Output of rpn head with shape of
[N, num_anchors, H, W].
bbox_pred(Variable): Output of rpn head with shape of
[N, num_anchors * 4, H, W].
"""
head_feat = self.get_head_feat(roi_feat)
# when ResNetC5 output a single feature map
if not isinstance(self.head, TwoFCHead):
head_feat = fluid.layers.pool2d(
head_feat, pool_type='avg', global_pooling=True)
cls_score = fluid.layers.fc(
input=head_feat,
size=self.num_classes,
act=None,
name='cls_score',
param_attr=ParamAttr(
name='cls_score_w', initializer=Normal(loc=0.0, scale=0.01)),
bias_attr=ParamAttr(
name='cls_score_b', learning_rate=2., regularizer=L2Decay(0.)))
bbox_pred = fluid.layers.fc(
input=head_feat,
size=4 * self.num_classes,
act=None,
name='bbox_pred',
param_attr=ParamAttr(
name='bbox_pred_w', initializer=Normal(loc=0.0, scale=0.001)),
bias_attr=ParamAttr(
name='bbox_pred_b', learning_rate=2., regularizer=L2Decay(0.)))
return cls_score, bbox_pred
def get_loss(self, roi_feat, labels_int32, bbox_targets,
bbox_inside_weights, bbox_outside_weights):
"""
Get bbox_head loss.
Args:
roi_feat (Variable): RoI feature from RoIExtractor.
labels_int32(Variable): Class label of a RoI with shape [P, 1].
P is the number of RoI.
bbox_targets(Variable): Box label of a RoI with shape
[P, 4 * class_nums].
bbox_inside_weights(Variable): Indicates whether a box should
contribute to loss. Same shape as bbox_targets.
bbox_outside_weights(Variable): Indicates whether a box should
contribute to loss. Same shape as bbox_targets.
Return:
Type: Dict
loss_cls(Variable): bbox_head loss.
loss_bbox(Variable): bbox_head loss.
"""
cls_score, bbox_pred = self._get_output(roi_feat)
labels_int64 = fluid.layers.cast(x=labels_int32, dtype='int64')
labels_int64.stop_gradient = True
loss_cls = fluid.layers.softmax_with_cross_entropy(
logits=cls_score, label=labels_int64, numeric_stable_mode=True)
loss_cls = fluid.layers.reduce_mean(loss_cls)
loss_bbox = self.bbox_loss(
x=bbox_pred,
y=bbox_targets,
inside_weight=bbox_inside_weights,
outside_weight=bbox_outside_weights)
loss_bbox = fluid.layers.reduce_mean(loss_bbox)
return {'loss_cls': loss_cls, 'loss_bbox': loss_bbox}
def get_prediction(self,
roi_feat,
rois,
im_info,
im_shape,
return_box_score=False):
"""
Get prediction bounding box in test stage.
Args:
roi_feat (Variable): RoI feature from RoIExtractor.
rois (Variable): Output of generate_proposals in rpn head.
im_info (Variable): A 2-D LoDTensor with shape [B, 3]. B is the
number of input images, each element consists of im_height,
im_width, im_scale.
im_shape (Variable): Actual shape of original image with shape
[B, 3]. B is the number of images, each element consists of
original_height, original_width, 1
Returns:
pred_result(Variable): Prediction result with shape [N, 6]. Each
row has 6 values: [label, confidence, xmin, ymin, xmax, ymax].
N is the total number of prediction.
"""
cls_score, bbox_pred = self._get_output(roi_feat)
im_scale = fluid.layers.slice(im_info, [1], starts=[2], ends=[3])
im_scale = fluid.layers.sequence_expand(im_scale, rois)
boxes = rois / im_scale
cls_prob = fluid.layers.softmax(cls_score, use_cudnn=False)
bbox_pred = fluid.layers.reshape(bbox_pred, (-1, self.num_classes, 4))
# self.box_coder
decoded_box = fluid.layers.box_coder(
prior_box=boxes,
target_box=bbox_pred,
prior_box_var=self.box_coder.prior_box_var,
code_type=self.box_coder.code_type,
box_normalized=self.box_coder.box_normalized,
axis=self.box_coder.axis)
cliped_box = fluid.layers.box_clip(input=decoded_box, im_info=im_shape)
if return_box_score:
return {'bbox': cliped_box, 'score': cls_prob}
# self.nms
pred_result = fluid.layers.multiclass_nms(
bboxes=cliped_box,
scores=cls_prob,
score_threshold=self.nms.score_threshold,
nms_top_k=self.nms.nms_top_k,
keep_top_k=self.nms.keep_top_k,
nms_threshold=self.nms.nms_threshold,
normalized=self.nms.normalized,
nms_eta=self.nms.nms_eta,
background_label=self.nms.background_label)
return pred_result
...@@ -15,14 +15,19 @@ __all__ = ['test_reader'] ...@@ -15,14 +15,19 @@ __all__ = ['test_reader']
def test_reader(paths=None, images=None): def test_reader(paths=None, images=None):
"""data generator """
data generator
Args:
paths (list[str]): paths to images.
images (list(numpy.ndarray)): data of images, shape of each is [H, W, C]
:param paths: path to images. Yield:
:type paths: list, each element is a str res (dict): key contains 'image' and 'im_info', the corresponding values is:
:param images: data of images, [N, H, W, C] image (numpy.ndarray): the image to be fed into network
:type images: numpy.ndarray im_info (numpy.ndarray): the info about the preprocessed.
""" """
img_list = [] img_list = list()
if paths: if paths:
for img_path in paths: for img_path in paths:
assert os.path.isfile( assert os.path.isfile(
...@@ -87,9 +92,9 @@ def padding_minibatch(batch_data, coarsest_stride=0, use_padded_im_info=True): ...@@ -87,9 +92,9 @@ def padding_minibatch(batch_data, coarsest_stride=0, use_padded_im_info=True):
else: else:
max_shape = max_shape_org.astype('int32') max_shape = max_shape_org.astype('int32')
padding_image = [] padding_image = list()
padding_info = [] padding_info = list()
padding_shape = [] padding_shape = list()
for data in batch_data: for data in batch_data:
im_c, im_h, im_w = data['image'].shape im_c, im_h, im_w = data['image'].shape
......
...@@ -13,11 +13,18 @@ from math import ceil ...@@ -13,11 +13,18 @@ from math import ceil
import numpy as np import numpy as np
import paddle.fluid as fluid import paddle.fluid as fluid
import paddlehub as hub import paddlehub as hub
from paddlehub.module.module import moduleinfo, runnable from paddlehub.module.module import moduleinfo, runnable, serving
from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor
from paddlehub.io.parser import txt_parser from paddlehub.io.parser import txt_parser
from paddlehub.common.paddle_helper import add_vars_prefix
from faster_rcnn_resnet50_coco2017.processor import load_label_info, postprocess, base64_to_cv2
from faster_rcnn_resnet50_coco2017.data_feed import test_reader, padding_minibatch
from faster_rcnn_resnet50_coco2017.resnet import ResNet, ResNetC5 from faster_rcnn_resnet50_coco2017.resnet import ResNet, ResNetC5
from faster_rcnn_resnet50_coco2017.rpn_head import AnchorGenerator, RPNTargetAssign, GenerateProposals, RPNHead
from faster_rcnn_resnet50_coco2017.bbox_head import MultiClassNMS, BBoxHead, SmoothL1Loss
from faster_rcnn_resnet50_coco2017.bbox_assigner import BBoxAssigner
from faster_rcnn_resnet50_coco2017.roi_extractor import RoIAlign
@moduleinfo( @moduleinfo(
...@@ -27,17 +34,14 @@ from faster_rcnn_resnet50_coco2017.resnet import ResNet, ResNetC5 ...@@ -27,17 +34,14 @@ from faster_rcnn_resnet50_coco2017.resnet import ResNet, ResNetC5
summary= summary=
"Baidu's Faster R-CNN model for object detection with backbone ResNet50, trained with dataset COCO2017", "Baidu's Faster R-CNN model for object detection with backbone ResNet50, trained with dataset COCO2017",
author="paddlepaddle", author="paddlepaddle",
author_email="paddle-dev@baidu.com") author_email="")
class FasterRCNNResNet50(hub.Module): class FasterRCNNResNet50(hub.Module):
def _initialize(self): def _initialize(self):
self.faster_rcnn = hub.Module(name="faster_rcnn")
# default pretrained model, Faster-RCNN with backbone ResNet50, shape of input tensor is [3, 800, 1333] # default pretrained model, Faster-RCNN with backbone ResNet50, shape of input tensor is [3, 800, 1333]
self.default_pretrained_model_path = os.path.join( self.default_pretrained_model_path = os.path.join(
self.directory, "faster_rcnn_resnet50_model") self.directory, "faster_rcnn_resnet50_model")
self.label_names = self.faster_rcnn.load_label_info( self.label_names = load_label_info(
os.path.join(self.directory, "label_file.txt")) os.path.join(self.directory, "label_file.txt"))
self.infer_prog = None
self.bbox_out = None
self._set_config() self._set_config()
def _set_config(self): def _set_config(self):
...@@ -66,20 +70,23 @@ class FasterRCNNResNet50(hub.Module): ...@@ -66,20 +70,23 @@ class FasterRCNNResNet50(hub.Module):
trainable=True, trainable=True,
pretrained=True, pretrained=True,
phase='train'): phase='train'):
"""Distill the Head Features, so as to perform transfer learning.
:param trainable: whether to set parameters trainable.
:type trainable: bool
:param pretrained: whether to load default pretrained model.
:type pretrained: bool
:param param_prefix: the prefix of parameters in neural network.
:type param_prefix: str
:param phase: Optional Choice: 'predict', 'train'
:type phase: str
""" """
wrapped_prog = fluid.Program() Distill the Head Features, so as to perform transfer learning.
Args:
trainable (bool): whether to set parameters trainable.
pretrained (bool): whether to load default pretrained model.
get_prediction (bool): whether to get prediction.
phase (str): optional choices are 'train' and 'predict'.
Returns:
inputs (dict): the input variables.
outputs (dict): the output variables.
context_prog (Program): the program to execute transfer learning.
"""
context_prog = fluid.Program()
startup_program = fluid.Program() startup_program = fluid.Program()
with fluid.program_guard(wrapped_prog, startup_program): with fluid.program_guard(context_prog, startup_program):
with fluid.unique_name.guard(): with fluid.unique_name.guard():
image = fluid.layers.data( image = fluid.layers.data(
name='image', shape=[3, 800, 1333], dtype='float32') name='image', shape=[3, 800, 1333], dtype='float32')
...@@ -91,18 +98,106 @@ class FasterRCNNResNet50(hub.Module): ...@@ -91,18 +98,106 @@ class FasterRCNNResNet50(hub.Module):
freeze_at=2) freeze_at=2)
body_feats = backbone(image) body_feats = backbone(image)
# Base Class # var_prefix
inputs, outputs, context_prog = self.faster_rcnn.context( var_prefix = '@HUB_{}@'.format(self.name)
body_feats=body_feats, im_info = fluid.layers.data(
fpn=None, name='im_info', shape=[3], dtype='float32', lod_level=0)
rpn_head=self.rpn_head(), im_shape = fluid.layers.data(
roi_extractor=self.roi_extractor(), name='im_shape', shape=[3], dtype='float32', lod_level=0)
bbox_head=self.bbox_head(num_classes), body_feat_names = list(body_feats.keys())
bbox_assigner=self.bbox_assigner(num_classes), # rpn_head: RPNHead
image=image, rpn_head = self.rpn_head()
trainable=trainable, rois = rpn_head.get_proposals(body_feats, im_info, mode=phase)
var_prefix='@HUB_{}@'.format(self.name), # train
phase=phase) if phase == 'train':
gt_bbox = fluid.layers.data(
name='gt_bbox', shape=[4], dtype='float32', lod_level=1)
is_crowd = fluid.layers.data(
name='is_crowd', shape=[1], dtype='int32', lod_level=1)
gt_class = fluid.layers.data(
name='gt_class', shape=[1], dtype='int32', lod_level=1)
rpn_loss = rpn_head.get_loss(im_info, gt_bbox, is_crowd)
# bbox_assigner: BBoxAssigner
bbox_assigner = self.bbox_assigner(num_classes)
outs = fluid.layers.generate_proposal_labels(
rpn_rois=rois,
gt_classes=gt_class,
is_crowd=is_crowd,
gt_boxes=gt_bbox,
im_info=im_info,
batch_size_per_im=bbox_assigner.batch_size_per_im,
fg_fraction=bbox_assigner.fg_fraction,
fg_thresh=bbox_assigner.fg_thresh,
bg_thresh_hi=bbox_assigner.bg_thresh_hi,
bg_thresh_lo=bbox_assigner.bg_thresh_lo,
bbox_reg_weights=bbox_assigner.bbox_reg_weights,
class_nums=bbox_assigner.class_nums,
use_random=bbox_assigner.use_random)
rois = outs[0]
body_feat = body_feats[body_feat_names[-1]]
# roi_extractor: RoIAlign
roi_extractor = self.roi_extractor()
roi_feat = fluid.layers.roi_align(
input=body_feat,
rois=rois,
pooled_height=roi_extractor.pooled_height,
pooled_width=roi_extractor.pooled_width,
spatial_scale=roi_extractor.spatial_scale,
sampling_ratio=roi_extractor.sampling_ratio)
# head_feat
bbox_head = self.bbox_head(num_classes)
head_feat = bbox_head.head(roi_feat)
if isinstance(head_feat, OrderedDict):
head_feat = list(head_feat.values())[0]
if phase == 'train':
inputs = {
'image': var_prefix + image.name,
'im_info': var_prefix + im_info.name,
'im_shape': var_prefix + im_shape.name,
'gt_class': var_prefix + gt_class.name,
'gt_bbox': var_prefix + gt_bbox.name,
'is_crowd': var_prefix + is_crowd.name
}
outputs = {
'head_feat':
var_prefix + head_feat.name,
'rpn_cls_loss':
var_prefix + rpn_loss['rpn_cls_loss'].name,
'rpn_reg_loss':
var_prefix + rpn_loss['rpn_reg_loss'].name,
'generate_proposal_labels':
[var_prefix + var.name for var in outs]
}
elif phase == 'predict':
pred = bbox_head.get_prediction(roi_feat, rois, im_info,
im_shape)
inputs = {
'image': var_prefix + image.name,
'im_info': var_prefix + im_info.name,
'im_shape': var_prefix + im_shape.name
}
outputs = {
'head_feat': var_prefix + head_feat.name,
'rois': var_prefix + rois.name,
'bbox_out': var_prefix + pred.name
}
add_vars_prefix(context_prog, var_prefix)
add_vars_prefix(startup_program, var_prefix)
global_vars = context_prog.global_block().vars
inputs = {
key: global_vars[value]
for key, value in inputs.items()
}
outputs = {
key: global_vars[value] if not isinstance(value, list) else
[global_vars[var] for var in value]
for key, value in outputs.items()
}
for param in context_prog.global_block().iter_parameters():
param.trainable = trainable
place = fluid.CPUPlace() place = fluid.CPUPlace()
exe = fluid.Executor(place) exe = fluid.Executor(place)
...@@ -122,43 +217,42 @@ class FasterRCNNResNet50(hub.Module): ...@@ -122,43 +217,42 @@ class FasterRCNNResNet50(hub.Module):
return inputs, outputs, context_prog return inputs, outputs, context_prog
def rpn_head(self): def rpn_head(self):
return self.faster_rcnn.RPNHead( return RPNHead(
anchor_generator=self.faster_rcnn.AnchorGenerator( anchor_generator=AnchorGenerator(
anchor_sizes=[32, 64, 128, 256, 512], anchor_sizes=[32, 64, 128, 256, 512],
aspect_ratios=[0.5, 1.0, 2.0], aspect_ratios=[0.5, 1.0, 2.0],
stride=[16.0, 16.0], stride=[16.0, 16.0],
variance=[1.0, 1.0, 1.0, 1.0]), variance=[1.0, 1.0, 1.0, 1.0]),
rpn_target_assign=self.faster_rcnn.RPNTargetAssign( rpn_target_assign=RPNTargetAssign(
rpn_batch_size_per_im=256, rpn_batch_size_per_im=256,
rpn_fg_fraction=0.5, rpn_fg_fraction=0.5,
rpn_negative_overlap=0.3, rpn_negative_overlap=0.3,
rpn_positive_overlap=0.7, rpn_positive_overlap=0.7,
rpn_straddle_thresh=0.0), rpn_straddle_thresh=0.0),
train_proposal=self.faster_rcnn.GenerateProposals( train_proposal=GenerateProposals(
min_size=0.0, min_size=0.0,
nms_thresh=0.7, nms_thresh=0.7,
post_nms_top_n=12000, post_nms_top_n=12000,
pre_nms_top_n=2000), pre_nms_top_n=2000),
test_proposal=self.faster_rcnn.GenerateProposals( test_proposal=GenerateProposals(
min_size=0.0, min_size=0.0,
nms_thresh=0.7, nms_thresh=0.7,
post_nms_top_n=6000, post_nms_top_n=6000,
pre_nms_top_n=1000)) pre_nms_top_n=1000))
def roi_extractor(self): def roi_extractor(self):
return self.faster_rcnn.RoIAlign( return RoIAlign(resolution=14, sampling_ratio=0, spatial_scale=0.0625)
resolution=14, sampling_ratio=0, spatial_scale=0.0625)
def bbox_head(self, num_classes): def bbox_head(self, num_classes):
return self.faster_rcnn.BBoxHead( return BBoxHead(
head=ResNetC5(depth=50, norm_type='affine_channel'), head=ResNetC5(depth=50, norm_type='affine_channel'),
nms=self.faster_rcnn.MultiClassNMS( nms=MultiClassNMS(
keep_top_k=100, nms_threshold=0.5, score_threshold=0.05), keep_top_k=100, nms_threshold=0.5, score_threshold=0.05),
bbox_loss=self.faster_rcnn.SmoothL1Loss(), bbox_loss=SmoothL1Loss(),
num_classes=num_classes) num_classes=num_classes)
def bbox_assigner(self, num_classes): def bbox_assigner(self, num_classes):
return self.faster_rcnn.BBoxAssigner( return BBoxAssigner(
batch_size_per_im=512, batch_size_per_im=512,
bbox_reg_weights=[0.1, 0.1, 0.2, 0.2], bbox_reg_weights=[0.1, 0.1, 0.2, 0.2],
bg_thresh_hi=0.5, bg_thresh_hi=0.5,
...@@ -178,26 +272,32 @@ class FasterRCNNResNet50(hub.Module): ...@@ -178,26 +272,32 @@ class FasterRCNNResNet50(hub.Module):
visualization=True): visualization=True):
"""API of Object Detection. """API of Object Detection.
:param paths: the path of images. Args:
:type paths: list, each element is correspond to the path of an image. paths (list[str]): The paths of images.
:param images: data of images, [N, H, W, C] images (list(numpy.ndarray)): images data, shape of each is [H, W, C]
:type images: numpy.ndarray batch_size (int): batch size.
:param use_gpu: whether to use gpu or not. use_gpu (bool): Whether to use gpu.
:type use_gpu: bool output_dir (str): The path to store output images.
:param batch_size: bathc size. visualization (bool): Whether to save image or not.
:type batch_size: int score_thresh (float): threshold for object detecion.
:param output_dir: the directory to store the detection result.
:type output_dir: str Returns:
:param score_thresh: the threshold of detection confidence. res (list[dict]): The result of coco2017 detecion. keys include 'data', 'save_path', the corresponding value is:
:type score_thresh: float data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is:
:param visualization: whether to draw box and save images. left (float): The X coordinate of the upper left corner of the bounding box;
:type visualization: bool top (float): The Y coordinate of the upper left corner of the bounding box;
right (float): The X coordinate of the lower right corner of the bounding box;
bottom (float): The Y coordinate of the lower right corner of the bounding box;
label (str): The label of detection result;
confidence (float): The confidence of detection result.
save_path (str, optional): The path to save output images.
""" """
paths = paths if paths else list()
if data and 'image' in data: if data and 'image' in data:
paths = data['image'] if not paths else paths + data['image'] paths += data['image']
all_images = []
paths = paths if paths else [] all_images = list()
for yield_return in self.faster_rcnn.test_reader(paths, images): for yield_return in test_reader(paths, images):
all_images.append(yield_return) all_images.append(yield_return)
images_num = len(all_images) images_num = len(all_images)
...@@ -211,7 +311,8 @@ class FasterRCNNResNet50(hub.Module): ...@@ -211,7 +311,8 @@ class FasterRCNNResNet50(hub.Module):
batch_data.append(all_images[handle_id + image_id]) batch_data.append(all_images[handle_id + image_id])
except: except:
pass pass
padding_image, padding_info, padding_shape = self.faster_rcnn.padding_minibatch(
padding_image, padding_info, padding_shape = padding_minibatch(
batch_data) batch_data)
padding_image_tensor = PaddleTensor(padding_image.copy()) padding_image_tensor = PaddleTensor(padding_image.copy())
padding_info_tensor = PaddleTensor(padding_info.copy()) padding_info_tensor = PaddleTensor(padding_info.copy())
...@@ -223,7 +324,7 @@ class FasterRCNNResNet50(hub.Module): ...@@ -223,7 +324,7 @@ class FasterRCNNResNet50(hub.Module):
data_out = self.gpu_predictor.run(feed_list) data_out = self.gpu_predictor.run(feed_list)
else: else:
data_out = self.cpu_predictor.run(feed_list) data_out = self.cpu_predictor.run(feed_list)
output = self.faster_rcnn.postprocess( output = postprocess(
paths=paths, paths=paths,
images=images, images=images,
data_out=data_out, data_out=data_out,
...@@ -275,6 +376,15 @@ class FasterRCNNResNet50(hub.Module): ...@@ -275,6 +376,15 @@ class FasterRCNNResNet50(hub.Module):
input_data = txt_parser.parse(args.input_file, use_strip=True) input_data = txt_parser.parse(args.input_file, use_strip=True)
return input_data return input_data
@serving
def serving_method(self, images, **kwargs):
"""
Run as a service.
"""
images_decode = [base64_to_cv2(image) for image in images]
results = self.object_detection(images_decode, **kwargs)
return results
@runnable @runnable
def run_cmd(self, argvs): def run_cmd(self, argvs):
self.parser = argparse.ArgumentParser( self.parser = argparse.ArgumentParser(
......
# coding=utf-8 # coding=utf-8
import base64
import os import os
import cv2
import numpy as np import numpy as np
from PIL import Image, ImageDraw from PIL import Image, ImageDraw
__all__ = [ __all__ = [
'get_save_image_name', 'draw_bounding_box_on_image', 'clip_bbox', 'base64_to_cv2',
'load_label_info' 'load_label_info',
'postprocess',
] ]
def base64_to_cv2(b64str):
data = base64.b64decode(b64str.encode('utf8'))
data = np.fromstring(data, np.uint8)
data = cv2.imdecode(data, cv2.IMREAD_COLOR)
return data
def get_save_image_name(img, output_dir, image_path): def get_save_image_name(img, output_dir, image_path):
"""Get save image name from source image path. """Get save image name from source image path.
""" """
...@@ -90,24 +100,29 @@ def postprocess(paths, ...@@ -90,24 +100,29 @@ def postprocess(paths,
output_dir, output_dir,
handle_id, handle_id,
visualization=True): visualization=True):
"""postprocess the lod_tensor produced by fluid.Executor.run """
postprocess the lod_tensor produced by fluid.Executor.run
:param paths: the path of images.
:type paths: list, each element is a str Args:
:param images: data of images, [N, H, W, C] paths (list[str]): the path of images.
:type images: numpy.ndarray images (list(numpy.ndarray)): list of images, shape of each is [H, W, C].
:param data_out: data produced by executor.run data_out (lod_tensor): data produced by executor.run.
:type data_out: lod_tensor score_thresh (float): the low limit of bounding box.
:param score_thresh: the low limit of bounding box. label_names (list[str]): label names.
:type score_thresh: float output_dir (str): output directory.
:param label_names: label names handle_id (int): The number of images that have been handled.
:type label_names: list visualization (bool): whether to save as images.
:param output_dir: output directory.
:type output_dir: str Returns:
:param handle_id: The number of images that have been handled. res (list[dict]): The result of vehicles detecion. keys include 'data', 'save_path', the corresponding value is:
:type handle_id: int data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is:
:param visualization: whether to draw bbox and save images. left (float): The X coordinate of the upper left corner of the bounding box;
:param visualization: bool top (float): The Y coordinate of the upper left corner of the bounding box;
right (float): The X coordinate of the lower right corner of the bounding box;
bottom (float): The Y coordinate of the lower right corner of the bounding box;
label (str): The label of detection result;
confidence (float): The confidence of detection result.
save_path (str): The path to save output images.
""" """
lod_tensor = data_out[0] lod_tensor = data_out[0]
lod = lod_tensor.lod[0] lod = lod_tensor.lod[0]
......
# coding=utf-8
__all__ = ['RoIAlign']
class RoIAlign(object):
def __init__(self, resolution=7, spatial_scale=0.0625, sampling_ratio=0):
super(RoIAlign, self).__init__()
if isinstance(resolution, int):
resolution = [resolution, resolution]
self.pooled_height = resolution[0]
self.pooled_width = resolution[1]
self.spatial_scale = spatial_scale
self.sampling_ratio = sampling_ratio
# coding=utf-8
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from paddle import fluid
from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.initializer import Normal
from paddle.fluid.regularizer import L2Decay
__all__ = ['AnchorGenerator', 'RPNTargetAssign', 'GenerateProposals', 'RPNHead']
class AnchorGenerator(object):
# __op__ = fluid.layers.anchor_generator
def __init__(self,
stride=[16.0, 16.0],
anchor_sizes=[32, 64, 128, 256, 512],
aspect_ratios=[0.5, 1., 2.],
variance=[1., 1., 1., 1.]):
super(AnchorGenerator, self).__init__()
self.anchor_sizes = anchor_sizes
self.aspect_ratios = aspect_ratios
self.variance = variance
self.stride = stride
class RPNTargetAssign(object):
# __op__ = fluid.layers.rpn_target_assign
def __init__(self,
rpn_batch_size_per_im=256,
rpn_straddle_thresh=0.,
rpn_fg_fraction=0.5,
rpn_positive_overlap=0.7,
rpn_negative_overlap=0.3,
use_random=True):
super(RPNTargetAssign, self).__init__()
self.rpn_batch_size_per_im = rpn_batch_size_per_im
self.rpn_straddle_thresh = rpn_straddle_thresh
self.rpn_fg_fraction = rpn_fg_fraction
self.rpn_positive_overlap = rpn_positive_overlap
self.rpn_negative_overlap = rpn_negative_overlap
self.use_random = use_random
class GenerateProposals(object):
# __op__ = fluid.layers.generate_proposals
def __init__(self,
pre_nms_top_n=6000,
post_nms_top_n=1000,
nms_thresh=.5,
min_size=.1,
eta=1.):
super(GenerateProposals, self).__init__()
self.pre_nms_top_n = pre_nms_top_n
self.post_nms_top_n = post_nms_top_n
self.nms_thresh = nms_thresh
self.min_size = min_size
self.eta = eta
class RPNHead(object):
"""
RPN Head
Args:
anchor_generator (object): `AnchorGenerator` instance
rpn_target_assign (object): `RPNTargetAssign` instance
train_proposal (object): `GenerateProposals` instance for training
test_proposal (object): `GenerateProposals` instance for testing
num_classes (int): number of classes in rpn output
"""
__inject__ = [
'anchor_generator', 'rpn_target_assign', 'train_proposal',
'test_proposal'
]
def __init__(self,
anchor_generator,
rpn_target_assign,
train_proposal,
test_proposal,
num_classes=1):
super(RPNHead, self).__init__()
self.anchor_generator = anchor_generator
self.rpn_target_assign = rpn_target_assign
self.train_proposal = train_proposal
self.test_proposal = test_proposal
self.num_classes = num_classes
def _get_output(self, input):
"""
Get anchor and RPN head output.
Args:
input(Variable): feature map from backbone with shape of [N, C, H, W]
Returns:
rpn_cls_score(Variable): Output of rpn head with shape of [N, num_anchors, H, W].
rpn_bbox_pred(Variable): Output of rpn head with shape of [N, num_anchors * 4, H, W].
"""
dim_out = input.shape[1]
rpn_conv = fluid.layers.conv2d(
input=input,
num_filters=dim_out,
filter_size=3,
stride=1,
padding=1,
act='relu',
name='conv_rpn',
param_attr=ParamAttr(
name="conv_rpn_w", initializer=Normal(loc=0., scale=0.01)),
bias_attr=ParamAttr(
name="conv_rpn_b", learning_rate=2., regularizer=L2Decay(0.)))
# Generate anchors self.anchor_generator
self.anchor, self.anchor_var = fluid.layers.anchor_generator(
input=rpn_conv,
anchor_sizes=self.anchor_generator.anchor_sizes,
aspect_ratios=self.anchor_generator.aspect_ratios,
variance=self.anchor_generator.variance,
stride=self.anchor_generator.stride)
num_anchor = self.anchor.shape[2]
# Proposal classification scores
self.rpn_cls_score = fluid.layers.conv2d(
rpn_conv,
num_filters=num_anchor * self.num_classes,
filter_size=1,
stride=1,
padding=0,
act=None,
name='rpn_cls_score',
param_attr=ParamAttr(
name="rpn_cls_logits_w", initializer=Normal(loc=0.,
scale=0.01)),
bias_attr=ParamAttr(
name="rpn_cls_logits_b",
learning_rate=2.,
regularizer=L2Decay(0.)))
# Proposal bbox regression deltas
self.rpn_bbox_pred = fluid.layers.conv2d(
rpn_conv,
num_filters=4 * num_anchor,
filter_size=1,
stride=1,
padding=0,
act=None,
name='rpn_bbox_pred',
param_attr=ParamAttr(
name="rpn_bbox_pred_w", initializer=Normal(loc=0., scale=0.01)),
bias_attr=ParamAttr(
name="rpn_bbox_pred_b",
learning_rate=2.,
regularizer=L2Decay(0.)))
return self.rpn_cls_score, self.rpn_bbox_pred
def get_proposals(self, body_feats, im_info, mode='train'):
"""
Get proposals according to the output of backbone.
Args:
body_feats (dict): The dictionary of feature maps from backbone.
im_info(Variable): The information of image with shape [N, 3] with
shape (height, width, scale).
body_feat_names(list): A list of names of feature maps from
backbone.
Returns:
rpn_rois(Variable): Output proposals with shape of (rois_num, 4).
"""
# In RPN Heads, only the last feature map of backbone is used.
# And body_feat_names[-1] represents the last level name of backbone.
body_feat = list(body_feats.values())[-1]
rpn_cls_score, rpn_bbox_pred = self._get_output(body_feat)
if self.num_classes == 1:
rpn_cls_prob = fluid.layers.sigmoid(
rpn_cls_score, name='rpn_cls_prob')
else:
rpn_cls_score = fluid.layers.transpose(
rpn_cls_score, perm=[0, 2, 3, 1])
rpn_cls_score = fluid.layers.reshape(
rpn_cls_score, shape=(0, 0, 0, -1, self.num_classes))
rpn_cls_prob_tmp = fluid.layers.softmax(
rpn_cls_score, use_cudnn=False, name='rpn_cls_prob')
rpn_cls_prob_slice = fluid.layers.slice(
rpn_cls_prob_tmp, axes=[4], starts=[1], ends=[self.num_classes])
rpn_cls_prob, _ = fluid.layers.topk(rpn_cls_prob_slice, 1)
rpn_cls_prob = fluid.layers.reshape(
rpn_cls_prob, shape=(0, 0, 0, -1))
rpn_cls_prob = fluid.layers.transpose(
rpn_cls_prob, perm=[0, 3, 1, 2])
prop_op = self.train_proposal if mode == 'train' else self.test_proposal
# prop_op
rpn_rois, rpn_roi_probs = fluid.layers.generate_proposals(
scores=rpn_cls_prob,
bbox_deltas=rpn_bbox_pred,
im_info=im_info,
anchors=self.anchor,
variances=self.anchor_var,
pre_nms_top_n=prop_op.pre_nms_top_n,
post_nms_top_n=prop_op.post_nms_top_n,
nms_thresh=prop_op.nms_thresh,
min_size=prop_op.min_size,
eta=prop_op.eta)
return rpn_rois
def _transform_input(self, rpn_cls_score, rpn_bbox_pred, anchor,
anchor_var):
rpn_cls_score = fluid.layers.transpose(rpn_cls_score, perm=[0, 2, 3, 1])
rpn_bbox_pred = fluid.layers.transpose(rpn_bbox_pred, perm=[0, 2, 3, 1])
anchor = fluid.layers.reshape(anchor, shape=(-1, 4))
anchor_var = fluid.layers.reshape(anchor_var, shape=(-1, 4))
rpn_cls_score = fluid.layers.reshape(
x=rpn_cls_score, shape=(0, -1, self.num_classes))
rpn_bbox_pred = fluid.layers.reshape(x=rpn_bbox_pred, shape=(0, -1, 4))
return rpn_cls_score, rpn_bbox_pred, anchor, anchor_var
def _get_loss_input(self):
for attr in ['rpn_cls_score', 'rpn_bbox_pred', 'anchor', 'anchor_var']:
if not getattr(self, attr, None):
raise ValueError("self.{} should not be None,".format(attr),
"call RPNHead.get_proposals first")
return self._transform_input(self.rpn_cls_score, self.rpn_bbox_pred,
self.anchor, self.anchor_var)
def get_loss(self, im_info, gt_box, is_crowd, gt_label=None):
"""
Sample proposals and Calculate rpn loss.
Args:
im_info(Variable): The information of image with shape [N, 3] with
shape (height, width, scale).
gt_box(Variable): The ground-truth bounding boxes with shape [M, 4].
M is the number of groundtruth.
is_crowd(Variable): Indicates groud-truth is crowd or not with
shape [M, 1]. M is the number of groundtruth.
Returns:
Type: dict
rpn_cls_loss(Variable): RPN classification loss.
rpn_bbox_loss(Variable): RPN bounding box regression loss.
"""
rpn_cls, rpn_bbox, anchor, anchor_var = self._get_loss_input()
if self.num_classes == 1:
# self.rpn_target_assign
score_pred, loc_pred, score_tgt, loc_tgt, bbox_weight = \
fluid.layers.rpn_target_assign(
bbox_pred=rpn_bbox,
cls_logits=rpn_cls,
anchor_box=anchor,
anchor_var=anchor_var,
gt_boxes=gt_box,
is_crowd=is_crowd,
im_info=im_info,
rpn_batch_size_per_im=self.rpn_target_assign.rpn_batch_size_per_im,
rpn_straddle_thresh=self.rpn_target_assign.rpn_straddle_thresh,
rpn_fg_fraction=self.rpn_target_assign.rpn_fg_fraction,
rpn_positive_overlap=self.rpn_target_assign.rpn_positive_overlap,
rpn_negative_overlap=self.rpn_target_assign.rpn_negative_overlap,
use_random=self.rpn_target_assign.use_random)
score_tgt = fluid.layers.cast(x=score_tgt, dtype='float32')
score_tgt.stop_gradient = True
rpn_cls_loss = fluid.layers.sigmoid_cross_entropy_with_logits(
x=score_pred, label=score_tgt)
else:
score_pred, loc_pred, score_tgt, loc_tgt, bbox_weight = \
self.rpn_target_assign(
bbox_pred=rpn_bbox,
cls_logits=rpn_cls,
anchor_box=anchor,
anchor_var=anchor_var,
gt_boxes=gt_box,
gt_labels=gt_label,
is_crowd=is_crowd,
num_classes=self.num_classes,
im_info=im_info)
labels_int64 = fluid.layers.cast(x=score_tgt, dtype='int64')
labels_int64.stop_gradient = True
rpn_cls_loss = fluid.layers.softmax_with_cross_entropy(
logits=score_pred, label=labels_int64, numeric_stable_mode=True)
rpn_cls_loss = fluid.layers.reduce_mean(
rpn_cls_loss, name='loss_rpn_cls')
loc_tgt = fluid.layers.cast(x=loc_tgt, dtype='float32')
loc_tgt.stop_gradient = True
rpn_reg_loss = fluid.layers.smooth_l1(
x=loc_pred,
y=loc_tgt,
sigma=3.0,
inside_weight=bbox_weight,
outside_weight=bbox_weight)
rpn_reg_loss = fluid.layers.reduce_sum(
rpn_reg_loss, name='loss_rpn_bbox')
score_shape = fluid.layers.shape(score_tgt)
score_shape = fluid.layers.cast(x=score_shape, dtype='float32')
norm = fluid.layers.reduce_prod(score_shape)
norm.stop_gradient = True
rpn_reg_loss = rpn_reg_loss / norm
return {'rpn_cls_loss': rpn_cls_loss, 'rpn_reg_loss': rpn_reg_loss}
```shell
$ hub install faster_rcnn_resnet50_fpn_coco2017==1.0.0
```
## 命令行预测
```
hub run faster_rcnn_resnet50_fpn_coco2017 --input_path "/PATH/TO/IMAGE"
```
## API
```
def context(num_classes=81,
trainable=True,
pretrained=True,
phase='train')
```
提取头部特征,用于迁移学习。
**参数**
* num\_classes (int): 类别数;
* trainable(bool): 将参数的trainable 属性设为trainable;
* pretrained (bool): 是否加载预训练模型;
* phase (str): 可选值为 'train'/'predict','trian' 用于训练,'predict' 用于预测。
**返回**
* inputs (dict): 模型的输入,keys 包括 'image', 'im\_size',相应的取值为:
* image (Variable): 图像变量
* im\_size (Variable): 图片的尺寸
* outputs (dict): 模型的输出。如果 get\_prediction 为 False,输出 'head\_fatures',否则输出 'bbox\_out'。
* context\_prog (Program): 用于迁移学习的 Program。
```python
def object_detection(paths=None,
images=None,
batch_size=1,
use_gpu=False,
output_dir='detection_result',
score_thresh=0.5,
visualization=True)
```
预测API,检测输入图片中的所有目标的位置。
**参数**
* paths (list\[str\]): 图片的路径;
* images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\],BGR格式;
* batch\_size (int): batch 的大小;
* use\_gpu (bool): 是否使用 GPU;
* score\_thresh (float): 识别置信度的阈值;
* visualization (bool): 是否将识别结果保存为图片文件;
* output\_dir (str): 图片的保存路径,默认设为 detection\_result;
**返回**
* res (list\[dict\]): 识别结果的列表,列表中每一个元素为 dict,各字段为:
* data (list): 检测结果,list的每一个元素为 dict,各字段为:
* confidence (float): 识别的置信度;
* label (str): 标签;
* left (int): 边界框的左上角x坐标;
* top (int): 边界框的左上角y坐标;
* right (int): 边界框的右下角x坐标;
* bottom (int): 边界框的右下角y坐标;
* save\_path (str, optional): 识别结果的保存路径 (仅当visualization=True时存在)。
```python
def save_inference_model(dirname,
model_filename=None,
params_filename=None,
combined=True)
```
将模型保存到指定路径。
**参数**
* dirname: 存在模型的目录名称
* model\_filename: 模型文件名称,默认为\_\_model\_\_
* params\_filename: 参数文件名称,默认为\_\_params\_\_(仅当`combined`为True时生效)
* combined: 是否将参数保存到统一的一个文件中
## 代码示例
```python
import paddlehub as hub
import cv2
object_detector = hub.Module(name="faster_rcnn_resnet50_fpn_coco2017")
result = object_detector.object_detection(images=[cv2.imread('/PATH/TO/IMAGE')])
# or
# result = object_detector.object_detection((paths=['/PATH/TO/IMAGE'])
```
## 服务部署
PaddleHub Serving 可以部署一个目标检测的在线服务。
## 第一步:启动PaddleHub Serving
运行启动命令:
```shell
$ hub serving start -m faster_rcnn_resnet50_fpn_coco2017
```
这样就完成了一个目标检测的服务化API的部署,默认端口号为8866。
**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。
## 第二步:发送预测请求
配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果
```python
import requests
import json
import cv2
import base64
def cv2_to_base64(image):
data = cv2.imencode('.jpg', image)[1]
return base64.b64encode(data.tostring()).decode('utf8')
# 发送HTTP请求
data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]}
headers = {"Content-type": "application/json"}
url = "http://127.0.0.1:8866/predict/faster_rcnn_resnet50_fpn_coco2017"
r = requests.post(url=url, headers=headers, data=json.dumps(data))
# 打印预测结果
print(r.json()["results"])
```
### 依赖
paddlepaddle >= 1.6.2
paddlehub >= 1.6.0
...@@ -53,7 +53,6 @@ class SmoothL1Loss(object): ...@@ -53,7 +53,6 @@ class SmoothL1Loss(object):
class BoxCoder(object): class BoxCoder(object):
# __op__ = fluid.layers.box_coder
def __init__(self, def __init__(self,
prior_box_var=[0.1, 0.1, 0.2, 0.2], prior_box_var=[0.1, 0.1, 0.2, 0.2],
code_type='decode_center_size', code_type='decode_center_size',
......
# coding=utf-8
from __future__ import absolute_import
from __future__ import print_function
from __future__ import division
import os
from collections import OrderedDict
import cv2
import numpy as np
from PIL import Image, ImageEnhance
from paddle import fluid
__all__ = ['test_reader']
def test_reader(paths=None, images=None):
"""
data generator
Args:
paths (list[str]): paths to images.
images (list(numpy.ndarray)): data of images, shape of each is [H, W, C]
Yield:
res (dict): key contains 'image', 'im_info', 'im_shape', the corresponding values is:
image (numpy.ndarray): the image to be fed into network
im_info (numpy.ndarray): the info about the preprocessed.
im_shape (numpy.ndarray): the shape of image.
"""
img_list = list()
if paths:
for img_path in paths:
assert os.path.isfile(
img_path), "The {} isn't a valid file path.".format(img_path)
img = cv2.imread(img_path).astype('float32')
img_list.append(img)
if images is not None:
for img in images:
img_list.append(img)
for im in img_list:
im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
im = im.astype(np.float32, copy=False)
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]
mean = np.array(mean)[np.newaxis, np.newaxis, :]
std = np.array(std)[np.newaxis, np.newaxis, :]
im = im / 255.0
im -= mean
im /= std
target_size = 800
max_size = 1333
shape = im.shape
# im_shape holds the original shape of image.
im_shape = np.array([shape[0], shape[1], 1.0]).astype('float32')
im_size_min = np.min(shape[0:2])
im_size_max = np.max(shape[0:2])
im_scale = float(target_size) / float(im_size_min)
if np.round(im_scale * im_size_max) > max_size:
im_scale = float(max_size) / float(im_size_max)
resize_w = np.round(im_scale * float(shape[1]))
resize_h = np.round(im_scale * float(shape[0]))
# im_info holds the resize info of image.
im_info = np.array([resize_h, resize_w, im_scale]).astype('float32')
im = cv2.resize(
im,
None,
None,
fx=im_scale,
fy=im_scale,
interpolation=cv2.INTER_LINEAR)
# HWC --> CHW
im = np.swapaxes(im, 1, 2)
im = np.swapaxes(im, 1, 0)
yield {'image': im, 'im_info': im_info, 'im_shape': im_shape}
def padding_minibatch(batch_data, coarsest_stride=0, use_padded_im_info=True):
max_shape_org = np.array(
[data['image'].shape for data in batch_data]).max(axis=0)
if coarsest_stride > 0:
max_shape = np.zeros((3)).astype('int32')
max_shape[1] = int(
np.ceil(max_shape_org[1] / coarsest_stride) * coarsest_stride)
max_shape[2] = int(
np.ceil(max_shape_org[2] / coarsest_stride) * coarsest_stride)
else:
max_shape = max_shape_org.astype('int32')
padding_image = list()
padding_info = list()
padding_shape = list()
for data in batch_data:
im_c, im_h, im_w = data['image'].shape
# image
padding_im = np.zeros((im_c, max_shape[1], max_shape[2]),
dtype=np.float32)
padding_im[:, 0:im_h, 0:im_w] = data['image']
padding_image.append(padding_im)
# im_info
data['im_info'][
0] = max_shape[1] if use_padded_im_info else max_shape_org[1]
data['im_info'][
1] = max_shape[2] if use_padded_im_info else max_shape_org[2]
padding_info.append(data['im_info'])
padding_shape.append(data['im_shape'])
padding_image = np.array(padding_image).astype('float32')
padding_info = np.array(padding_info).astype('float32')
padding_shape = np.array(padding_shape).astype('float32')
return padding_image, padding_info, padding_shape
...@@ -13,12 +13,19 @@ from math import ceil ...@@ -13,12 +13,19 @@ from math import ceil
import numpy as np import numpy as np
import paddle.fluid as fluid import paddle.fluid as fluid
import paddlehub as hub import paddlehub as hub
from paddlehub.module.module import moduleinfo, runnable from paddlehub.module.module import moduleinfo, runnable, serving
from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor
from paddlehub.io.parser import txt_parser from paddlehub.io.parser import txt_parser
from paddlehub.common.paddle_helper import add_vars_prefix
from faster_rcnn_resnet50_fpn_coco2017.processor import load_label_info, postprocess, base64_to_cv2
from faster_rcnn_resnet50_fpn_coco2017.data_feed import test_reader, padding_minibatch
from faster_rcnn_resnet50_fpn_coco2017.fpn import FPN from faster_rcnn_resnet50_fpn_coco2017.fpn import FPN
from faster_rcnn_resnet50_fpn_coco2017.resnet import ResNet, ResNetC5 from faster_rcnn_resnet50_fpn_coco2017.resnet import ResNet
from faster_rcnn_resnet50_fpn_coco2017.rpn_head import AnchorGenerator, RPNTargetAssign, GenerateProposals, FPNRPNHead
from faster_rcnn_resnet50_fpn_coco2017.bbox_head import MultiClassNMS, BBoxHead, TwoFCHead
from faster_rcnn_resnet50_fpn_coco2017.bbox_assigner import BBoxAssigner
from faster_rcnn_resnet50_fpn_coco2017.roi_extractor import FPNRoIAlign
@moduleinfo( @moduleinfo(
...@@ -28,18 +35,15 @@ from faster_rcnn_resnet50_fpn_coco2017.resnet import ResNet, ResNetC5 ...@@ -28,18 +35,15 @@ from faster_rcnn_resnet50_fpn_coco2017.resnet import ResNet, ResNetC5
summary= summary=
"Baidu's Faster-RCNN model for object detection, whose backbone is ResNet50, processed with Feature Pyramid Networks", "Baidu's Faster-RCNN model for object detection, whose backbone is ResNet50, processed with Feature Pyramid Networks",
author="paddlepaddle", author="paddlepaddle",
author_email="paddle-dev@baidu.com") author_email="")
class FasterRCNNResNet50RPN(hub.Module): class FasterRCNNResNet50RPN(hub.Module):
def _initialize(self): def _initialize(self):
self.faster_rcnn = hub.Module(name="faster_rcnn")
# default pretrained model, Faster-RCNN with backbone ResNet50, shape of input tensor is [3, 800, 1333] # default pretrained model, Faster-RCNN with backbone ResNet50, shape of input tensor is [3, 800, 1333]
self.default_pretrained_model_path = os.path.join( self.default_pretrained_model_path = os.path.join(
self.directory, "faster_rcnn_resnet50_fpn_model") self.directory, "faster_rcnn_resnet50_fpn_model")
self.label_names = self.faster_rcnn.load_label_info( self.label_names = load_label_info(
os.path.join(self.directory, "label_file.txt")) os.path.join(self.directory, "label_file.txt"))
self.infer_prog = None # self._set_config()
self.bbox_out = None
self._set_config()
def _set_config(self): def _set_config(self):
""" """
...@@ -67,18 +71,23 @@ class FasterRCNNResNet50RPN(hub.Module): ...@@ -67,18 +71,23 @@ class FasterRCNNResNet50RPN(hub.Module):
trainable=True, trainable=True,
pretrained=True, pretrained=True,
phase='train'): phase='train'):
"""Distill the Head Features, so as to perform transfer learning.
:param trainable: whether to set parameters trainable.
:type trainable: bool
:param pretrained: whether to load default pretrained model.
:type pretrained: bool
:param phase: Optional Choice: 'predict', 'train'
:type phase: str
""" """
wrapped_prog = fluid.Program() Distill the Head Features, so as to perform transfer learning.
Args:
trainable (bool): whether to set parameters trainable.
pretrained (bool): whether to load default pretrained model.
get_prediction (bool): whether to get prediction.
phase (str): optional choices are 'train' and 'predict'.
Returns:
inputs (dict): the input variables.
outputs (dict): the output variables.
context_prog (Program): the program to execute transfer learning.
"""
context_prog = fluid.Program()
startup_program = fluid.Program() startup_program = fluid.Program()
with fluid.program_guard(wrapped_prog, startup_program): with fluid.program_guard(context_prog, startup_program):
with fluid.unique_name.guard(): with fluid.unique_name.guard():
image = fluid.layers.data( image = fluid.layers.data(
name='image', shape=[3, 800, 1333], dtype='float32') name='image', shape=[3, 800, 1333], dtype='float32')
...@@ -89,25 +98,107 @@ class FasterRCNNResNet50RPN(hub.Module): ...@@ -89,25 +98,107 @@ class FasterRCNNResNet50RPN(hub.Module):
feature_maps=[2, 3, 4, 5], feature_maps=[2, 3, 4, 5],
freeze_at=2) freeze_at=2)
body_feats = backbone(image) body_feats = backbone(image)
# fpn: FPN # fpn
fpn = FPN( fpn = FPN(
max_level=6, max_level=6,
min_level=2, min_level=2,
num_chan=256, num_chan=256,
spatial_scale=[0.03125, 0.0625, 0.125, 0.25]) spatial_scale=[0.03125, 0.0625, 0.125, 0.25])
var_prefix = '@HUB_{}@'.format(self.name)
im_info = fluid.layers.data(
name='im_info', shape=[3], dtype='float32', lod_level=0)
im_shape = fluid.layers.data(
name='im_shape', shape=[3], dtype='float32', lod_level=0)
body_feat_names = list(body_feats.keys())
body_feats, spatial_scale = fpn.get_output(body_feats)
# rpn_head: RPNHead
rpn_head = self.rpn_head()
rois = rpn_head.get_proposals(body_feats, im_info, mode=phase)
# train
if phase == 'train':
gt_bbox = fluid.layers.data(
name='gt_bbox', shape=[4], dtype='float32', lod_level=1)
is_crowd = fluid.layers.data(
name='is_crowd', shape=[1], dtype='int32', lod_level=1)
gt_class = fluid.layers.data(
name='gt_class', shape=[1], dtype='int32', lod_level=1)
rpn_loss = rpn_head.get_loss(im_info, gt_bbox, is_crowd)
# bbox_assigner: BBoxAssigner
bbox_assigner = self.bbox_assigner(num_classes)
outs = fluid.layers.generate_proposal_labels(
rpn_rois=rois,
gt_classes=gt_class,
is_crowd=is_crowd,
gt_boxes=gt_bbox,
im_info=im_info,
batch_size_per_im=bbox_assigner.batch_size_per_im,
fg_fraction=bbox_assigner.fg_fraction,
fg_thresh=bbox_assigner.fg_thresh,
bg_thresh_hi=bbox_assigner.bg_thresh_hi,
bg_thresh_lo=bbox_assigner.bg_thresh_lo,
bbox_reg_weights=bbox_assigner.bbox_reg_weights,
class_nums=bbox_assigner.class_nums,
use_random=bbox_assigner.use_random)
rois = outs[0]
roi_extractor = self.roi_extractor()
roi_feat = roi_extractor(
head_inputs=body_feats,
rois=rois,
spatial_scale=spatial_scale)
# head_feat
bbox_head = self.bbox_head(num_classes)
head_feat = bbox_head.head(roi_feat)
if isinstance(head_feat, OrderedDict):
head_feat = list(head_feat.values())[0]
if phase == 'train':
inputs = {
'image': var_prefix + image.name,
'im_info': var_prefix + im_info.name,
'im_shape': var_prefix + im_shape.name,
'gt_class': var_prefix + gt_class.name,
'gt_bbox': var_prefix + gt_bbox.name,
'is_crowd': var_prefix + is_crowd.name
}
outputs = {
'head_feat':
var_prefix + head_feat.name,
'rpn_cls_loss':
var_prefix + rpn_loss['rpn_cls_loss'].name,
'rpn_reg_loss':
var_prefix + rpn_loss['rpn_reg_loss'].name,
'generate_proposal_labels':
[var_prefix + var.name for var in outs]
}
elif phase == 'predict':
pred = bbox_head.get_prediction(roi_feat, rois, im_info,
im_shape)
inputs = {
'image': var_prefix + image.name,
'im_info': var_prefix + im_info.name,
'im_shape': var_prefix + im_shape.name
}
outputs = {
'head_feat': var_prefix + head_feat.name,
'rois': var_prefix + rois.name,
'bbox_out': var_prefix + pred.name
}
add_vars_prefix(context_prog, var_prefix)
add_vars_prefix(startup_program, var_prefix)
# Base Class global_vars = context_prog.global_block().vars
inputs, outputs, context_prog = self.faster_rcnn.context( inputs = {
body_feats=body_feats, key: global_vars[value]
fpn=fpn, for key, value in inputs.items()
rpn_head=self.rpn_head(), }
roi_extractor=self.roi_extractor(), outputs = {
bbox_head=self.bbox_head(num_classes), key: global_vars[value] if not isinstance(value, list) else
bbox_assigner=self.bbox_assigner(num_classes), [global_vars[var] for var in value]
image=image, for key, value in outputs.items()
trainable=trainable, }
var_prefix='@HUB_{}@'.format(self.name),
phase=phase) for param in context_prog.global_block().iter_parameters():
param.trainable = trainable
place = fluid.CPUPlace() place = fluid.CPUPlace()
exe = fluid.Executor(place) exe = fluid.Executor(place)
...@@ -127,24 +218,24 @@ class FasterRCNNResNet50RPN(hub.Module): ...@@ -127,24 +218,24 @@ class FasterRCNNResNet50RPN(hub.Module):
return inputs, outputs, context_prog return inputs, outputs, context_prog
def rpn_head(self): def rpn_head(self):
return self.faster_rcnn.FPNRPNHead( return FPNRPNHead(
anchor_generator=self.faster_rcnn.AnchorGenerator( anchor_generator=AnchorGenerator(
anchor_sizes=[32, 64, 128, 256, 512], anchor_sizes=[32, 64, 128, 256, 512],
aspect_ratios=[0.5, 1.0, 2.0], aspect_ratios=[0.5, 1.0, 2.0],
stride=[16.0, 16.0], stride=[16.0, 16.0],
variance=[1.0, 1.0, 1.0, 1.0]), variance=[1.0, 1.0, 1.0, 1.0]),
rpn_target_assign=self.faster_rcnn.RPNTargetAssign( rpn_target_assign=RPNTargetAssign(
rpn_batch_size_per_im=256, rpn_batch_size_per_im=256,
rpn_fg_fraction=0.5, rpn_fg_fraction=0.5,
rpn_negative_overlap=0.3, rpn_negative_overlap=0.3,
rpn_positive_overlap=0.7, rpn_positive_overlap=0.7,
rpn_straddle_thresh=0.0), rpn_straddle_thresh=0.0),
train_proposal=self.faster_rcnn.GenerateProposals( train_proposal=GenerateProposals(
min_size=0.0, min_size=0.0,
nms_thresh=0.7, nms_thresh=0.7,
post_nms_top_n=2000, post_nms_top_n=2000,
pre_nms_top_n=2000), pre_nms_top_n=2000),
test_proposal=self.faster_rcnn.GenerateProposals( test_proposal=GenerateProposals(
min_size=0.0, min_size=0.0,
nms_thresh=0.7, nms_thresh=0.7,
post_nms_top_n=1000, post_nms_top_n=1000,
...@@ -155,7 +246,7 @@ class FasterRCNNResNet50RPN(hub.Module): ...@@ -155,7 +246,7 @@ class FasterRCNNResNet50RPN(hub.Module):
max_level=6) max_level=6)
def roi_extractor(self): def roi_extractor(self):
return self.faster_rcnn.FPNRoIAlign( return FPNRoIAlign(
canconical_level=4, canconical_level=4,
canonical_size=224, canonical_size=224,
max_level=5, max_level=5,
...@@ -164,14 +255,14 @@ class FasterRCNNResNet50RPN(hub.Module): ...@@ -164,14 +255,14 @@ class FasterRCNNResNet50RPN(hub.Module):
sampling_ratio=2) sampling_ratio=2)
def bbox_head(self, num_classes): def bbox_head(self, num_classes):
return self.faster_rcnn.BBoxHead( return BBoxHead(
head=self.faster_rcnn.TwoFCHead(mlp_dim=1024), head=TwoFCHead(mlp_dim=1024),
nms=self.faster_rcnn.MultiClassNMS( nms=MultiClassNMS(
keep_top_k=100, nms_threshold=0.5, score_threshold=0.05), keep_top_k=100, nms_threshold=0.5, score_threshold=0.05),
num_classes=num_classes) num_classes=num_classes)
def bbox_assigner(self, num_classes): def bbox_assigner(self, num_classes):
return self.faster_rcnn.BBoxAssigner( return BBoxAssigner(
batch_size_per_im=512, batch_size_per_im=512,
bbox_reg_weights=[0.1, 0.1, 0.2, 0.2], bbox_reg_weights=[0.1, 0.1, 0.2, 0.2],
bg_thresh_hi=0.5, bg_thresh_hi=0.5,
...@@ -183,6 +274,7 @@ class FasterRCNNResNet50RPN(hub.Module): ...@@ -183,6 +274,7 @@ class FasterRCNNResNet50RPN(hub.Module):
def object_detection(self, def object_detection(self,
paths=None, paths=None,
images=None, images=None,
data=None,
use_gpu=False, use_gpu=False,
batch_size=1, batch_size=1,
output_dir='detection_result', output_dir='detection_result',
...@@ -190,37 +282,49 @@ class FasterRCNNResNet50RPN(hub.Module): ...@@ -190,37 +282,49 @@ class FasterRCNNResNet50RPN(hub.Module):
visualization=True): visualization=True):
"""API of Object Detection. """API of Object Detection.
:param paths: the path of images. Args:
:type paths: list, each element is correspond to the path of an image. paths (list[str]): The paths of images.
:param images: data of images, [N, H, W, C] images (list(numpy.ndarray)): images data, shape of each is [H, W, C]
:type images: numpy.ndarray batch_size (int): batch size.
:param use_gpu: whether to use gpu or not. use_gpu (bool): Whether to use gpu.
:type use_gpu: bool output_dir (str): The path to store output images.
:param batch_size: bathc size. visualization (bool): Whether to save image or not.
:type batch_size: int score_thresh (float): threshold for object detecion.
:param output_dir: the directory to store the detection result.
:type output_dir: str Returns:
:param score_thresh: the threshold of detection confidence. res (list[dict]): The result of coco2017 detecion. keys include 'data', 'save_path', the corresponding value is:
:type score_thresh: float data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is:
:param visualization: whether to draw box and save images. left (float): The X coordinate of the upper left corner of the bounding box;
:type visualization: bool top (float): The Y coordinate of the upper left corner of the bounding box;
right (float): The X coordinate of the lower right corner of the bounding box;
bottom (float): The Y coordinate of the lower right corner of the bounding box;
label (str): The label of detection result;
confidence (float): The confidence of detection result.
save_path (str, optional): The path to save output images.
""" """
all_images = [] paths = paths if paths else list()
paths = paths if paths else [] if data and 'image' in data:
for yield_data in self.faster_rcnn.test_reader(paths, images): paths += data['image']
all_images = list()
for yield_data in test_reader(paths, images):
all_images.append(yield_data) all_images.append(yield_data)
images_num = len(all_images) images_num = len(all_images)
loop_num = ceil(images_num / batch_size) loop_num = ceil(images_num / batch_size)
res = [] res = []
for iter_id in range(loop_num): for iter_id in range(loop_num):
batch_data = [] batch_data = []
handle_id = iter_id * batch_size handle_id = iter_id * batch_size
for image_id in range(batch_size): for image_id in range(batch_size):
try: try:
batch_data.append(all_images[handle_id + image_id]) batch_data.append(all_images[handle_id + image_id])
except: except:
pass pass
padding_image, padding_info, padding_shape = self.faster_rcnn.padding_minibatch(
padding_image, padding_info, padding_shape = padding_minibatch(
batch_data, coarsest_stride=32, use_padded_im_info=True) batch_data, coarsest_stride=32, use_padded_im_info=True)
padding_image_tensor = PaddleTensor(padding_image.copy()) padding_image_tensor = PaddleTensor(padding_image.copy())
padding_info_tensor = PaddleTensor(padding_info.copy()) padding_info_tensor = PaddleTensor(padding_info.copy())
...@@ -228,12 +332,13 @@ class FasterRCNNResNet50RPN(hub.Module): ...@@ -228,12 +332,13 @@ class FasterRCNNResNet50RPN(hub.Module):
feed_list = [ feed_list = [
padding_image_tensor, padding_info_tensor, padding_shape_tensor padding_image_tensor, padding_info_tensor, padding_shape_tensor
] ]
if use_gpu: if use_gpu:
data_out = self.gpu_predictor.run(feed_list) data_out = self.gpu_predictor.run(feed_list)
else: else:
data_out = self.cpu_predictor.run(feed_list) data_out = self.cpu_predictor.run(feed_list)
output = self.faster_rcnn.postprocess( output = postprocess(
paths=paths, paths=paths,
images=images, images=images,
data_out=data_out, data_out=data_out,
...@@ -243,6 +348,7 @@ class FasterRCNNResNet50RPN(hub.Module): ...@@ -243,6 +348,7 @@ class FasterRCNNResNet50RPN(hub.Module):
handle_id=handle_id, handle_id=handle_id,
visualization=visualization) visualization=visualization)
res += output res += output
return res return res
def add_module_config_arg(self): def add_module_config_arg(self):
...@@ -269,7 +375,7 @@ class FasterRCNNResNet50RPN(hub.Module): ...@@ -269,7 +375,7 @@ class FasterRCNNResNet50RPN(hub.Module):
'--input_path', type=str, default=None, help="input data") '--input_path', type=str, default=None, help="input data")
self.arg_input_group.add_argument( self.arg_input_group.add_argument(
'--input_path', '--input_file',
type=str, type=str,
default=None, default=None,
help="file contain input data") help="file contain input data")
...@@ -285,6 +391,15 @@ class FasterRCNNResNet50RPN(hub.Module): ...@@ -285,6 +391,15 @@ class FasterRCNNResNet50RPN(hub.Module):
input_data = txt_parser.parse(args.input_file, use_strip=True) input_data = txt_parser.parse(args.input_file, use_strip=True)
return input_data return input_data
@serving
def serving_method(self, images, **kwargs):
"""
Run as a service.
"""
images_decode = [base64_to_cv2(image) for image in images]
results = self.object_detection(images_decode, **kwargs)
return results
@runnable @runnable
def run_cmd(self, argvs): def run_cmd(self, argvs):
self.parser = argparse.ArgumentParser( self.parser = argparse.ArgumentParser(
......
# coding=utf-8 # coding=utf-8
import base64
import os import os
import cv2
import numpy as np import numpy as np
from PIL import Image, ImageDraw from PIL import Image, ImageDraw
__all__ = [ __all__ = [
'get_save_image_name', 'draw_bounding_box_on_image', 'clip_bbox', 'base64_to_cv2',
'load_label_info' 'load_label_info',
'postprocess',
] ]
def base64_to_cv2(b64str):
data = base64.b64decode(b64str.encode('utf8'))
data = np.fromstring(data, np.uint8)
data = cv2.imdecode(data, cv2.IMREAD_COLOR)
return data
def get_save_image_name(img, output_dir, image_path): def get_save_image_name(img, output_dir, image_path):
"""Get save image name from source image path. """Get save image name from source image path.
""" """
...@@ -62,7 +72,6 @@ def draw_bounding_box_on_image(image_path, data_list, save_dir): ...@@ -62,7 +72,6 @@ def draw_bounding_box_on_image(image_path, data_list, save_dir):
os.remove(save_name) os.remove(save_name)
image.save(save_name) image.save(save_name)
return save_name return save_name
...@@ -91,28 +100,34 @@ def postprocess(paths, ...@@ -91,28 +100,34 @@ def postprocess(paths,
output_dir, output_dir,
handle_id, handle_id,
visualization=True): visualization=True):
"""postprocess the lod_tensor produced by fluid.Executor.run """
postprocess the lod_tensor produced by fluid.Executor.run
:param paths: the path of images.
:type paths: list, each element is a str Args:
:param images: data of images, [N, H, W, C] paths (list[str]): the path of images.
:type images: numpy.ndarray images (list(numpy.ndarray)): list of images, shape of each is [H, W, C].
:param data_out: data produced by executor.run data_out (lod_tensor): data produced by executor.run.
:type data_out: lod_tensor score_thresh (float): the low limit of bounding box.
:param score_thresh: the low limit of bounding box. label_names (list[str]): label names.
:type score_thresh: float output_dir (str): output directory.
:param label_names: label names handle_id (int): The number of images that have been handled.
:type label_names: list visualization (bool): whether to save as images.
:param output_dir: output directory.
:type output_dir: str Returns:
:param handle_id: The number of images that have been handled. res (list[dict]): The result of vehicles detecion. keys include 'data', 'save_path', the corresponding value is:
:type handle_id: int data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is:
:param visualization: whether to draw bbox. left (float): The X coordinate of the upper left corner of the bounding box;
:param visualization: bool top (float): The Y coordinate of the upper left corner of the bounding box;
right (float): The X coordinate of the lower right corner of the bounding box;
bottom (float): The Y coordinate of the lower right corner of the bounding box;
label (str): The label of detection result;
confidence (float): The confidence of detection result.
save_path (str): The path to save output images.
""" """
lod_tensor = data_out[0] lod_tensor = data_out[0]
lod = lod_tensor.lod[0] lod = lod_tensor.lod[0]
results = lod_tensor.as_ndarray() results = lod_tensor.as_ndarray()
if handle_id < len(paths): if handle_id < len(paths):
unhandled_paths = paths[handle_id:] unhandled_paths = paths[handle_id:]
unhandled_paths_num = len(unhandled_paths) unhandled_paths_num = len(unhandled_paths)
...@@ -146,10 +161,6 @@ def postprocess(paths, ...@@ -146,10 +161,6 @@ def postprocess(paths,
category_id = int(row[0]) category_id = int(row[0])
confidence = row[1] confidence = row[1]
bbox = row[2:] bbox = row[2:]
bbox[0] = bbox[0] * org_img_width
bbox[1] = bbox[1] * org_img_height
bbox[2] = bbox[2] * org_img_width
bbox[3] = bbox[3] * org_img_height
dt = {} dt = {}
dt['label'] = label_names[category_id] dt['label'] = label_names[category_id]
dt['confidence'] = confidence dt['confidence'] = confidence
......
# coding=utf-8 # coding=utf-8
import paddle.fluid as fluid import paddle.fluid as fluid
__all__ = ['RoIAlign', 'FPNRoIAlign'] __all__ = ['FPNRoIAlign']
class RoIAlign(object):
# __op__ = fluid.layers.roi_align
def __init__(self, resolution=7, spatial_scale=0.0625, sampling_ratio=0):
super(RoIAlign, self).__init__()
if isinstance(resolution, int):
resolution = [resolution, resolution]
self.pooled_height = resolution[0]
self.pooled_width = resolution[1]
self.spatial_scale = spatial_scale
self.sampling_ratio = sampling_ratio
class FPNRoIAlign(object): class FPNRoIAlign(object):
......
```shell
$ hub install retinanet_resnet50_fpn_coco2017==1.0.0
```
## 命令行预测
```
hub run retinanet_resnet50_fpn_coco2017 --input_path "/PATH/TO/IMAGE"
```
## API
```
def context(trainable=True,
pretrained=True,
get_prediction=False)
```
特征提取,用于迁移学习。
**参数**
* trainable(bool): 将参数的trainable属性设为trainable;
* pretrained (bool): 是否加载预训练模型;
* get\_prediction (bool): 是否执行预测。
**返回**
* inputs (dict): 模型的输入,keys 包括 'image', 'im\_size',相应的取值为:
* image (Variable): 图像变量
* im\_size (Variable): 图片的尺寸
* outputs (dict): 模型的输出。如果 get\_prediction 为 False,输出 'head\_fatures',否则输出 'bbox\_out'。
* context\_prog (Program): 用于迁移学习的 Program.
```python
def object_detection(paths=None,
images=None,
batch_size=1,
use_gpu=False,
output_dir='detection_result',
score_thresh=0.5,
visualization=True)
```
预测API,检测输入图片中的所有目标的位置。
**参数**
* paths (list\[str\]): 图片的路径;
* images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\],BGR格式;
* batch\_size (int): batch 的大小;
* use\_gpu (bool): 是否使用 GPU;
* score\_thresh (float): 识别置信度的阈值;
* visualization (bool): 是否将识别结果保存为图片文件;
* output\_dir (str): 图片的保存路径,默认设为 detection\_result;
**返回**
* res (list\[dict\]): 识别结果的列表,列表中每一个元素为 dict,各字段为:
* data (list): 检测结果,list的每一个元素为 dict,各字段为:
* confidence (float): 识别的置信度;
* label (str): 标签;
* left (int): 边界框的左上角x坐标;
* top (int): 边界框的左上角y坐标;
* right (int): 边界框的右下角x坐标;
* bottom (int): 边界框的右下角y坐标;
* save\_path (str, optional): 识别结果的保存路径 (仅当visualization=True时存在)。
```python
def save_inference_model(dirname,
model_filename=None,
params_filename=None,
combined=True)
```
将模型保存到指定路径。
**参数**
* dirname: 存在模型的目录名称
* model\_filename: 模型文件名称,默认为\_\_model\_\_
* params\_filename: 参数文件名称,默认为\_\_params\_\_(仅当`combined`为True时生效)
* combined: 是否将参数保存到统一的一个文件中
## 代码示例
```python
import paddlehub as hub
import cv2
object_detector = hub.Module(name="retinanet_resnet50_fpn_coco2017")
result = object_detector.object_detection(images=[cv2.imread('/PATH/TO/IMAGE')])
# or
# result = object_detector.object_detection((paths=['/PATH/TO/IMAGE'])
```
## 服务部署
PaddleHub Serving可以部署一个目标检测的在线服务。
## 第一步:启动PaddleHub Serving
运行启动命令:
```shell
$ hub serving start -m retinanet_resnet50_fpn_coco2017
```
这样就完成了一个目标检测的服务化API的部署,默认端口号为8866。
**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。
## 第二步:发送预测请求
配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果
```python
import requests
import json
import cv2
import base64
def cv2_to_base64(image):
data = cv2.imencode('.jpg', image)[1]
return base64.b64encode(data.tostring()).decode('utf8')
# 发送HTTP请求
data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]}
headers = {"Content-type": "application/json"}
url = "http://127.0.0.1:8866/predict/retinanet_resnet50_fpn_coco2017"
r = requests.post(url=url, headers=headers, data=json.dumps(data))
# 打印预测结果
print(r.json()["results"])
```
### 依赖
paddlepaddle >= 1.6.2
paddlehub >= 1.6.0
...@@ -15,13 +15,19 @@ __all__ = ['test_reader', 'padding_minibatch'] ...@@ -15,13 +15,19 @@ __all__ = ['test_reader', 'padding_minibatch']
def test_reader(paths=None, images=None): def test_reader(paths=None, images=None):
"""data generator
:param paths: path to images.
:type paths: list, each element is a str
:param images: data of images, [N, H, W, C]
:type images: numpy.ndarray
""" """
img_list = [] data generator
Args:
paths (list[str]): paths to images.
images (list(numpy.ndarray)): data of images, shape of each is [H, W, C]
Yield:
res (dict): key contains 'image' and 'im_info', the corresponding values is:
image (numpy.ndarray): the image to be fed into network
im_info (numpy.ndarray): the info about the preprocessed.
"""
img_list = list()
if paths: if paths:
for img_path in paths: for img_path in paths:
assert os.path.isfile( assert os.path.isfile(
...@@ -83,9 +89,9 @@ def padding_minibatch(batch_data, coarsest_stride=0, use_padded_im_info=True): ...@@ -83,9 +89,9 @@ def padding_minibatch(batch_data, coarsest_stride=0, use_padded_im_info=True):
else: else:
max_shape = max_shape_org.astype('int32') max_shape = max_shape_org.astype('int32')
padding_image = [] padding_image = list()
padding_info = [] padding_info = list()
padding_shape = [] padding_shape = list()
for data in batch_data: for data in batch_data:
im_c, im_h, im_w = data['image'].shape im_c, im_h, im_w = data['image'].shape
......
...@@ -11,13 +11,13 @@ from functools import partial ...@@ -11,13 +11,13 @@ from functools import partial
import numpy as np import numpy as np
import paddle.fluid as fluid import paddle.fluid as fluid
import paddlehub as hub import paddlehub as hub
from paddlehub.module.module import moduleinfo, runnable from paddlehub.module.module import moduleinfo, runnable, serving
from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor
from paddlehub.io.parser import txt_parser from paddlehub.io.parser import txt_parser
from retinanet_resnet50_fpn_coco2017.fpn import FPN from retinanet_resnet50_fpn_coco2017.fpn import FPN
from retinanet_resnet50_fpn_coco2017.retina_head import AnchorGenerator, RetinaTargetAssign, RetinaOutputDecoder, RetinaHead from retinanet_resnet50_fpn_coco2017.retina_head import AnchorGenerator, RetinaTargetAssign, RetinaOutputDecoder, RetinaHead
from retinanet_resnet50_fpn_coco2017.processor import load_label_info, postprocess from retinanet_resnet50_fpn_coco2017.processor import load_label_info, postprocess, base64_to_cv2
from retinanet_resnet50_fpn_coco2017.data_feed import test_reader, padding_minibatch from retinanet_resnet50_fpn_coco2017.data_feed import test_reader, padding_minibatch
from retinanet_resnet50_fpn_coco2017.resnet import ResNet from retinanet_resnet50_fpn_coco2017.resnet import ResNet
...@@ -29,7 +29,7 @@ from retinanet_resnet50_fpn_coco2017.resnet import ResNet ...@@ -29,7 +29,7 @@ from retinanet_resnet50_fpn_coco2017.resnet import ResNet
summary= summary=
"Baidu's RetinaNet model for object detection, with backbone ResNet50 and FPN.", "Baidu's RetinaNet model for object detection, with backbone ResNet50 and FPN.",
author="paddlepaddle", author="paddlepaddle",
author_email="paddle-dev@baidu.com") author_email="")
class RetinaNetResNet50FPN(hub.Module): class RetinaNetResNet50FPN(hub.Module):
def _initialize(self): def _initialize(self):
# default pretrained model of Retinanet_ResNet50_FPN, the shape of input image tensor is (3, 608, 608) # default pretrained model of Retinanet_ResNet50_FPN, the shape of input image tensor is (3, 608, 608)
...@@ -69,16 +69,19 @@ class RetinaNetResNet50FPN(hub.Module): ...@@ -69,16 +69,19 @@ class RetinaNetResNet50FPN(hub.Module):
trainable=True, trainable=True,
pretrained=True, pretrained=True,
get_prediction=False): get_prediction=False):
"""Distill the Head Features, so as to perform transfer learning. """
Distill the Head Features, so as to perform transfer learning.
Args:
num_classes (int): number of classes.
trainable (bool): whether to set parameters trainable.
pretrained (bool): whether to load default pretrained model.
get_prediction (bool): whether to get prediction.
:param trainable: whether to set parameters trainable. Returns:
:type trainable: bool inputs(dict): the input variables.
:param pretrained: whether to load default pretrained model. outputs(dict): the output variables.
:type pretrained: bool context_prog (Program): the program to execute transfer learning.
:param get_prediction: whether to get prediction,
if True, outputs is {'bbox_out': bbox_out},
if False, outputs is {'head_features': head_features}.
:type get_prediction: bool
""" """
context_prog = fluid.Program() context_prog = fluid.Program()
startup_program = fluid.Program() startup_program = fluid.Program()
...@@ -166,31 +169,38 @@ class RetinaNetResNet50FPN(hub.Module): ...@@ -166,31 +169,38 @@ class RetinaNetResNet50FPN(hub.Module):
score_thresh=0.5, score_thresh=0.5,
visualization=True): visualization=True):
"""API of Object Detection. """API of Object Detection.
:param paths: the path of images.
:type paths: list, each element is correspond to the path of an image. Args:
:param images: data of images, [N, H, W, C] paths (list[str]): The paths of images.
:type images: numpy.ndarray images (list(numpy.ndarray)): images data, shape of each is [H, W, C]
:param use_gpu: whether to use gpu or not. batch_size (int): batch size.
:type use_gpu: bool use_gpu (bool): Whether to use gpu.
:param batch_size: bathc size. output_dir (str): The path to store output images.
:type batch_size: int visualization (bool): Whether to save image or not.
:param output_dir: the directory to store the detection result. score_thresh (float): threshold for object detecion.
:type output_dir: str visualization (bool): whether to save result as images.
:param score_thresh: the threshold of detection confidence.
:type score_thresh: float Returns:
:param visualization: whether to draw bounding box and save images. res (list[dict]): The result of coco2017 detecion. keys include 'data', 'save_path', the corresponding value is:
:type visualization: bool data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is:
left (float): The X coordinate of the upper left corner of the bounding box;
top (float): The Y coordinate of the upper left corner of the bounding box;
right (float): The X coordinate of the lower right corner of the bounding box;
bottom (float): The Y coordinate of the lower right corner of the bounding box;
label (str): The label of detection result;
confidence (float): The confidence of detection result.
save_path (str, optional): The path to save output images.
""" """
all_images = [] all_images = list()
paths = paths if paths else [] paths = paths if paths else list()
for yield_data in test_reader(paths, images): for yield_data in test_reader(paths, images):
all_images.append(yield_data) all_images.append(yield_data)
images_num = len(all_images) images_num = len(all_images)
loop_num = int(np.ceil(images_num / batch_size)) loop_num = int(np.ceil(images_num / batch_size))
res = [] res = list()
for iter_id in range(loop_num): for iter_id in range(loop_num):
batch_data = [] batch_data = list()
handle_id = iter_id * batch_size handle_id = iter_id * batch_size
for image_id in range(batch_size): for image_id in range(batch_size):
try: try:
...@@ -248,7 +258,7 @@ class RetinaNetResNet50FPN(hub.Module): ...@@ -248,7 +258,7 @@ class RetinaNetResNet50FPN(hub.Module):
help="file contain input data") help="file contain input data")
def check_input_data(self, args): def check_input_data(self, args):
input_data = [] input_data = list()
if args.input_path: if args.input_path:
input_data = [args.input_path] input_data = [args.input_path]
elif args.input_file: elif args.input_file:
...@@ -258,6 +268,15 @@ class RetinaNetResNet50FPN(hub.Module): ...@@ -258,6 +268,15 @@ class RetinaNetResNet50FPN(hub.Module):
input_data = txt_parser.parse(args.input_file, use_strip=True) input_data = txt_parser.parse(args.input_file, use_strip=True)
return input_data return input_data
@serving
def serving_method(self, images, **kwargs):
"""
Run as a service.
"""
images_decode = [base64_to_cv2(image) for image in images]
results = self.object_detection(images_decode, **kwargs)
return results
@runnable @runnable
def run_cmd(self, argvs): def run_cmd(self, argvs):
self.parser = argparse.ArgumentParser( self.parser = argparse.ArgumentParser(
......
# coding=utf-8 # coding=utf-8
import base64
import os import os
import cv2
import numpy as np import numpy as np
from PIL import Image, ImageDraw from PIL import Image, ImageDraw
__all__ = [ __all__ = [
'get_save_image_name', 'draw_bounding_box_on_image', 'clip_bbox', 'base64_to_cv2',
'load_label_info' 'load_label_info',
'postprocess',
] ]
def base64_to_cv2(b64str):
data = base64.b64decode(b64str.encode('utf8'))
data = np.fromstring(data, np.uint8)
data = cv2.imdecode(data, cv2.IMREAD_COLOR)
return data
def get_save_image_name(img, output_dir, image_path): def get_save_image_name(img, output_dir, image_path):
"""Get save image name from source image path. """Get save image name from source image path.
""" """
...@@ -80,24 +90,29 @@ def load_label_info(file_path): ...@@ -80,24 +90,29 @@ def load_label_info(file_path):
def postprocess(paths, images, data_out, score_thresh, label_names, output_dir, def postprocess(paths, images, data_out, score_thresh, label_names, output_dir,
handle_id, visualization): handle_id, visualization):
"""postprocess the lod_tensor produced by fluid.Executor.run """
postprocess the lod_tensor produced by fluid.Executor.run
:param paths: the path of images.
:type paths: list, each element is a str Args:
:param images: data of images, [N, H, W, C] paths (list[str]): the path of images.
:type images: numpy.ndarray images (list(numpy.ndarray)): list of images, shape of each is [H, W, C].
:param data_out: data produced by executor.run data_out (lod_tensor): data produced by executor.run.
:type data_out: lod_tensor score_thresh (float): the low limit of bounding box.
:param score_thresh: the low limit of bounding box. label_names (list[str]): label names.
:type score_thresh: float output_dir (str): output directory.
:param label_names: label names handle_id (int): The number of images that have been handled.
:type label_names: list visualization (bool): whether to save as images.
:param output_dir: output directory.
:type output_dir: str Returns:
:param handle_id: The number of images that have been handled. res (list[dict]): The result of vehicles detecion. keys include 'data', 'save_path', the corresponding value is:
:type handle_id: int data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is:
:param visualization: whether to draw bbox. left (float): The X coordinate of the upper left corner of the bounding box;
:param visualization: bool top (float): The Y coordinate of the upper left corner of the bounding box;
right (float): The X coordinate of the lower right corner of the bounding box;
bottom (float): The Y coordinate of the lower right corner of the bounding box;
label (str): The label of detection result;
confidence (float): The confidence of detection result.
save_path (str): The path to save output images.
""" """
lod_tensor = data_out[0] lod_tensor = data_out[0]
lod = lod_tensor.lod[0] lod = lod_tensor.lod[0]
......
# coding=utf-8
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from collections import OrderedDict
import paddle.fluid as fluid
import paddlehub as hub
from paddlehub.module.module import moduleinfo
from paddlehub.common.paddle_helper import add_vars_prefix
from ssd.data_feed import reader, DecodeImage, ResizeImage, NormalizeImage, Permute
from ssd.processor import load_label_info, postprocess
from ssd.multi_box_head import MultiBoxHead
from ssd.output_decoder import SSDOutputDecoder
@moduleinfo(
name="ssd",
version="1.0.0",
type="cv/object_detection",
summary=
"SSD (Single Shot MultiBox Detector) is a object detection model, which trained with PASCAL VOC dataset.",
author="paddlepaddle",
author_email="paddle-dev@baidu.com")
class SSD(hub.Module):
def _initialize(self):
self.reader = reader
self.load_label_info = load_label_info
self.postprocess = postprocess
self.MultiBoxHead = MultiBoxHead
self.SSDOutputDecoder = SSDOutputDecoder
self.DecodeImage = DecodeImage
self.ResizeImage = ResizeImage
self.NormalizeImage = NormalizeImage
self.Permute = Permute
def context(self,
body_feats,
multi_box_head,
ssd_output_decoder,
image,
trainable=True,
var_prefix='',
get_prediction=False):
"""Distill the Head Features, so as to perform transfer learning.
:param body_feats: feature mps of backbone outputs
:type body_feats: list
:param multi_box_head: SSD head of MultiBoxHead.
:type multi_box_head: <class 'MultiBoxHead' object>
:param ssd_output_decoder: SSD output decoder
:type ssd_output_decoder: <class 'SSDOutputDecoder' object>
:param image: image tensor.
:type image: <class 'paddle.fluid.framework.Variable'>
:param trainable: whether to set parameters trainable.
:type trainable: bool
:param var_prefix: the prefix of variables in ssd
:type var_prefix: str
:param get_prediction: whether to get prediction,
if True, outputs is bbox_out,
if False, outputs is body_features.
:type get_prediction: bool
"""
context_prog = image.block.program
with fluid.program_guard(context_prog):
im_size = fluid.layers.data(
name='im_size', shape=[2], dtype='int32')
inputs = {
'image': var_prefix + image.name,
'im_size': var_prefix + im_size.name
}
if not get_prediction:
outputs = {
'body_features':
[var_prefix + var.name for var in body_feats]
}
else:
locs, confs, box, box_var = fluid.layers.multi_box_head(
inputs=body_feats,
image=image,
base_size=multi_box_head.base_size,
num_classes=multi_box_head.num_classes,
aspect_ratios=multi_box_head.aspect_ratios,
min_ratio=multi_box_head.min_ratio,
max_ratio=multi_box_head.max_ratio,
min_sizes=multi_box_head.min_sizes,
max_sizes=multi_box_head.max_sizes,
steps=multi_box_head.steps,
offset=multi_box_head.offset,
flip=multi_box_head.flip,
kernel_size=multi_box_head.kernel_size,
pad=multi_box_head.pad,
min_max_aspect_ratios_order=multi_box_head.
min_max_aspect_ratios_order)
pred = fluid.layers.detection_output(
loc=locs,
scores=confs,
prior_box=box,
prior_box_var=box_var,
nms_threshold=ssd_output_decoder.nms_threshold,
nms_top_k=ssd_output_decoder.nms_top_k,
keep_top_k=ssd_output_decoder.keep_top_k,
score_threshold=ssd_output_decoder.score_threshold,
nms_eta=ssd_output_decoder.nms_eta,
background_label=ssd_output_decoder.background_label)
outputs = {'bbox_out': var_prefix + pred.name}
add_vars_prefix(context_prog, var_prefix)
add_vars_prefix(fluid.default_startup_program(), var_prefix)
inputs = {
key: context_prog.global_block().vars[value]
for key, value in inputs.items()
}
outputs = {
key: [
context_prog.global_block().vars[varname]
for varname in value
]
for key, value in outputs.items()
}
for param in context_prog.global_block().iter_parameters():
param.trainable = trainable
return inputs, outputs, context_prog
# coding=utf-8
class MultiBoxHead(object):
# __op__ = fluid.layers.multi_box_head
def __init__(self,
base_size,
num_classes,
aspect_ratios,
min_ratio=None,
max_ratio=None,
min_sizes=None,
max_sizes=None,
steps=None,
offset=0.5,
flip=True,
kernel_size=1,
pad=0,
min_max_aspect_ratios_order=False):
self.base_size = base_size
self.num_classes = num_classes
self.aspect_ratios = aspect_ratios
self.min_ratio = min_ratio
self.max_ratio = max_ratio
self.min_sizes = min_sizes
self.max_sizes = max_sizes
self.steps = steps
self.offset = offset
self.flip = flip
self.kernel_size = kernel_size
self.pad = pad
self.min_max_aspect_ratios_order = min_max_aspect_ratios_order
class SSDOutputDecoder(object):
# __op__ = fluid.layers.detection_output
def __init__(self,
nms_threshold=0.3,
nms_top_k=400,
keep_top_k=200,
score_threshold=0.01,
nms_eta=1.0,
background_label=0):
self.nms_threshold = nms_threshold
self.background_label = background_label
self.nms_top_k = nms_top_k
self.keep_top_k = keep_top_k
self.score_threshold = score_threshold
self.nms_eta = nms_eta
```shell
$ hub install ssd_mobilenet_v1_pascal==1.1.0
```
## 命令行预测
```
hub run ssd_mobilenet_v1_pascal --input_path "/PATH/TO/IMAGE"
```
## API
```
def context(trainable=True,
pretrained=True,
get_prediction=False)
```
特征提取,用于迁移学习。
**参数**
* trainable(bool): 设置参数的 trainable 属性;
* pretrained (bool): 是否加载预训练模型;
* get\_prediction (bool): 是否执行预测。
**返回**
* inputs (dict): 模型的输入,keys 包括 'image', 'im\_size',相应的取值为:
* image (Variable): 图像变量
* im\_size (Variable): 图片的尺寸
* outputs (dict): 模型的输出。如果 get\_prediction 为 False,输出 'head\_fatures',否则输出 'bbox\_out'。
* context\_prog (Program): 用于迁移学习的 Program.
```python
def object_detection(paths=None,
images=None,
batch_size=1,
use_gpu=False,
output_dir='detection_result',
score_thresh=0.5,
visualization=True)
```
预测API,检测输入图片中的所有目标的位置。
**参数**
* paths (list\[str\]): 图片的路径;
* images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\],BGR格式;
* batch\_size (int): batch 的大小;
* use\_gpu (bool): 是否使用 GPU;
* score\_thresh (float): 识别置信度的阈值;
* visualization (bool): 是否将识别结果保存为图片文件;
* output\_dir (str): 图片的保存路径,默认设为 detection\_result;
**返回**
* res (list\[dict\]): 识别结果的列表,列表中每一个元素为 dict,各字段为:
* data (list): 检测结果,list的每一个元素为 dict,各字段为:
* confidence (float): 识别的置信度;
* label (str): 标签;
* left (int): 边界框的左上角x坐标;
* top (int): 边界框的左上角y坐标;
* right (int): 边界框的右下角x坐标;
* bottom (int): 边界框的右下角y坐标;
* save\_path (str, optional): 识别结果的保存路径 (仅当visualization=True时存在)。
```python
def save_inference_model(dirname,
model_filename=None,
params_filename=None,
combined=True)
```
将模型保存到指定路径。
**参数**
* dirname: 存在模型的目录名称
* model\_filename: 模型文件名称,默认为\_\_model\_\_
* params\_filename: 参数文件名称,默认为\_\_params\_\_(仅当`combined`为True时生效)
* combined: 是否将参数保存到统一的一个文件中
## 代码示例
```python
import paddlehub as hub
import cv2
object_detector = hub.Module(name="ssd_mobilenet_v1_pascal")
result = object_detector.object_detection(images=[cv2.imread('/PATH/TO/IMAGE')])
# or
# result = object_detector.object_detection((paths=['/PATH/TO/IMAGE'])
```
## 服务部署
PaddleHub Serving可以部署一个目标检测的在线服务。
## 第一步:启动PaddleHub Serving
运行启动命令:
```shell
$ hub serving start -m ssd_mobilenet_v1_pascal
```
这样就完成了一个目标检测的服务化API的部署,默认端口号为8866。
**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。
## 第二步:发送预测请求
配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果
```python
import requests
import json
import cv2
import base64
def cv2_to_base64(image):
data = cv2.imencode('.jpg', image)[1]
return base64.b64encode(data.tostring()).decode('utf8')
# 发送HTTP请求
data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]}
headers = {"Content-type": "application/json"}
url = "http://127.0.0.1:8866/predict/ssd_mobilenet_v1_pascal"
r = requests.post(url=url, headers=headers, data=json.dumps(data))
# 打印预测结果
print(r.json()["results"])
```
### 依赖
paddlepaddle >= 1.6.2
paddlehub >= 1.6.0
MobileNet:
norm_decay: 0.
conv_group_scale: 1
conv_learning_rate: 0.1
extra_block_filters: [[256, 512], [128, 256], [128, 256], [64, 128]]
with_extra_blocks: True
SSDOutputDecoder: SSDOutputDecoder:
background_label: 0 background_label: 0
keep_top_k: 200 keep_top_k: 200
...@@ -9,7 +16,7 @@ SSDOutputDecoder: ...@@ -9,7 +16,7 @@ SSDOutputDecoder:
MultiBoxHead: MultiBoxHead:
aspect_ratios: [[2.], [2., 3.], [2., 3.], [2., 3.], [2., 3.], [2., 3.]] aspect_ratios: [[2.], [2., 3.], [2., 3.], [2., 3.], [2., 3.], [2., 3.]]
base_size: 300 base_size: 300
flip: true flip: True
max_ratio: 90 max_ratio: 90
max_sizes: [[], 150.0, 195.0, 240.0, 285.0, 300.0] max_sizes: [[], 150.0, 195.0, 240.0, 285.0, 300.0]
min_ratio: 20 min_ratio: 20
......
# coding=utf-8
from __future__ import absolute_import
from __future__ import print_function
from __future__ import division
import os
import random
from collections import OrderedDict
import cv2
import numpy as np
from PIL import Image
from paddle import fluid
__all__ = ['reader']
class DecodeImage(object):
def __init__(self, to_rgb=True, with_mixup=False):
""" Transform the image data to numpy format.
Args:
to_rgb (bool): whether to convert BGR to RGB
with_mixup (bool): whether or not to mixup image and gt_bbbox/gt_score
"""
self.to_rgb = to_rgb
self.with_mixup = with_mixup
def __call__(self, im):
if self.to_rgb:
im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
return im
class ResizeImage(object):
def __init__(self,
target_size=0,
max_size=0,
interp=cv2.INTER_LINEAR,
use_cv2=True):
"""
Rescale image to the specified target size, and capped at max_size
if max_size != 0.
If target_size is list, selected a scale randomly as the specified
target size.
Args:
target_size (int|list): the target size of image's short side,
multi-scale training is adopted when type is list.
max_size (int): the max size of image
interp (int): the interpolation method
use_cv2 (bool): use the cv2 interpolation method or use PIL
interpolation method
"""
self.max_size = int(max_size)
self.interp = int(interp)
self.use_cv2 = use_cv2
self.target_size = target_size
def __call__(self, im):
if not isinstance(im, np.ndarray):
raise TypeError("{}: image type is not numpy.".format(self))
if len(im.shape) != 3:
raise ValueError('{}: image is not 3-dimensional.'.format(self))
im_shape = im.shape
im_size_min = np.min(im_shape[0:2])
im_size_max = np.max(im_shape[0:2])
if isinstance(self.target_size, list):
# Case for multi-scale training
selected_size = random.choice(self.target_size)
else:
selected_size = self.target_size
if float(im_size_min) == 0:
raise ZeroDivisionError('{}: min size of image is 0'.format(self))
if self.max_size != 0:
im_scale = float(selected_size) / float(im_size_min)
# Prevent the biggest axis from being more than max_size
if np.round(im_scale * im_size_max) > self.max_size:
im_scale = float(self.max_size) / float(im_size_max)
im_scale_x = im_scale
im_scale_y = im_scale
resize_w = im_scale_x * float(im_shape[1])
resize_h = im_scale_y * float(im_shape[0])
im_info = [resize_h, resize_w, im_scale]
else:
im_scale_x = float(selected_size) / float(im_shape[1])
im_scale_y = float(selected_size) / float(im_shape[0])
resize_w = selected_size
resize_h = selected_size
if self.use_cv2:
im = cv2.resize(
im,
None,
None,
fx=im_scale_x,
fy=im_scale_y,
interpolation=self.interp)
else:
if self.max_size != 0:
raise TypeError(
'If you set max_size to cap the maximum size of image,'
'please set use_cv2 to True to resize the image.')
im = im.astype('uint8')
im = Image.fromarray(im)
im = im.resize((int(resize_w), int(resize_h)), self.interp)
im = np.array(im)
return im
class NormalizeImage(object):
def __init__(self,
mean=[0.485, 0.456, 0.406],
std=[1, 1, 1],
is_scale=True,
is_channel_first=True):
"""
Args:
mean (list): the pixel mean
std (list): the pixel variance
"""
self.mean = mean
self.std = std
self.is_scale = is_scale
self.is_channel_first = is_channel_first
def __call__(self, im):
"""Normalize the image.
Operators:
1.(optional) Scale the image to [0,1]
2. Each pixel minus mean and is divided by std
"""
im = im.astype(np.float32, copy=False)
if self.is_channel_first:
mean = np.array(self.mean)[:, np.newaxis, np.newaxis]
std = np.array(self.std)[:, np.newaxis, np.newaxis]
else:
mean = np.array(self.mean)[np.newaxis, np.newaxis, :]
std = np.array(self.std)[np.newaxis, np.newaxis, :]
if self.is_scale:
im = im / 255.0
im -= mean
im /= std
return im
class Permute(object):
def __init__(self, to_bgr=True, channel_first=True):
"""
Change the channel.
Args:
to_bgr (bool): confirm whether to convert RGB to BGR
channel_first (bool): confirm whether to change channel
"""
self.to_bgr = to_bgr
self.channel_first = channel_first
def __call__(self, im):
if self.channel_first:
im = np.swapaxes(im, 1, 2)
im = np.swapaxes(im, 1, 0)
if self.to_bgr:
im = im[[2, 1, 0], :, :]
return im
def reader(paths=[],
images=None,
decode_image=DecodeImage(to_rgb=True, with_mixup=False),
resize_image=ResizeImage(
target_size=512, interp=1, max_size=0, use_cv2=False),
permute_image=Permute(to_bgr=False),
normalize_image=NormalizeImage(
mean=[104, 117, 123], std=[1, 1, 1], is_scale=False)):
"""
data generator
Args:
paths (list[str]): paths to images.
images (list(numpy.ndarray)): data of images, shape of each is [H, W, C]
decode_image (class object): instance of <class 'DecodeImage' object>
resize_image (class object): instance of <class 'ResizeImage' object>
permute_image (class object): instance of <class 'Permute' object>
normalize_image (class object): instance of <class 'NormalizeImage' object>
"""
img_list = []
if paths is not None:
assert type(paths) is list, "type(paths) is not list."
for img_path in paths:
assert os.path.isfile(
img_path), "The {} isn't a valid file path.".format(img_path)
img = cv2.imread(img_path).astype('float32')
img_list.append(img)
if images is not None:
for img in images:
img_list.append(img)
decode_image = DecodeImage(to_rgb=True, with_mixup=False)
resize_image = ResizeImage(
target_size=300, interp=1, max_size=0, use_cv2=False)
permute_image = Permute()
normalize_image = NormalizeImage(
mean=[127.5, 127.5, 127.5],
std=[127.502231, 127.502231, 127.502231],
is_scale=False)
for img in img_list:
preprocessed_img = decode_image(img)
preprocessed_img = resize_image(preprocessed_img)
preprocessed_img = permute_image(preprocessed_img)
preprocessed_img = normalize_image(preprocessed_img)
yield [preprocessed_img]
# coding=utf-8 # coding=utf-8
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import ast import ast
import argparse import argparse
import os
from functools import partial from functools import partial
import yaml
import numpy as np import numpy as np
import paddle.fluid as fluid import paddle.fluid as fluid
import paddlehub as hub import paddlehub as hub
from paddlehub.module.module import moduleinfo, runnable
from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor
from paddlehub.io.parser import txt_parser from paddlehub.module.module import moduleinfo, runnable, serving
import yaml from paddlehub.common.paddle_helper import add_vars_prefix
from ssd_mobilenet_v1_pascal.mobilenet_v1 import MobileNet from ssd_mobilenet_v1_pascal.mobilenet_v1 import MobileNet
from ssd_mobilenet_v1_pascal.processor import load_label_info, postprocess, base64_to_cv2
from ssd_mobilenet_v1_pascal.data_feed import reader
@moduleinfo( @moduleinfo(
...@@ -25,25 +25,18 @@ from ssd_mobilenet_v1_pascal.mobilenet_v1 import MobileNet ...@@ -25,25 +25,18 @@ from ssd_mobilenet_v1_pascal.mobilenet_v1 import MobileNet
type="cv/object_detection", type="cv/object_detection",
summary="SSD with backbone MobileNet_V1, trained with dataset Pasecal VOC.", summary="SSD with backbone MobileNet_V1, trained with dataset Pasecal VOC.",
author="paddlepaddle", author="paddlepaddle",
author_email="paddle-dev@baidu.com") author_email="")
class SSDMobileNetv1(hub.Module): class SSDMobileNetv1(hub.Module):
def _initialize(self): def _initialize(self):
self.ssd = hub.Module(name="ssd")
# default pretrained model of SSD_MobileNet_V1_VOC, the shape of image tensor is (3, 300, 300)
self.default_pretrained_model_path = os.path.join( self.default_pretrained_model_path = os.path.join(
self.directory, "ssd_mobilenet_v1_model") self.directory, "ssd_mobilenet_v1_model")
self.label_names = self.ssd.load_label_info( self.label_names = load_label_info(
os.path.join(self.directory, "label_file.txt")) os.path.join(self.directory, "label_file.txt"))
self.infer_prog = None self.model_config = None
self.image = None
self.bbox_out = None
self._set_config() self._set_config()
self._config = None
def _set_config(self): def _set_config(self):
""" # predictor config setting.
predictor config setting
"""
cpu_config = AnalysisConfig(self.default_pretrained_model_path) cpu_config = AnalysisConfig(self.default_pretrained_model_path)
cpu_config.disable_glog_info() cpu_config.disable_glog_info()
cpu_config.disable_gpu() cpu_config.disable_gpu()
...@@ -62,51 +55,92 @@ class SSDMobileNetv1(hub.Module): ...@@ -62,51 +55,92 @@ class SSDMobileNetv1(hub.Module):
gpu_config.enable_use_gpu(memory_pool_init_size_mb=500, device_id=0) gpu_config.enable_use_gpu(memory_pool_init_size_mb=500, device_id=0)
self.gpu_predictor = create_paddle_predictor(gpu_config) self.gpu_predictor = create_paddle_predictor(gpu_config)
def context(self, # model config setting.
num_classes=21, if not self.model_config:
trainable=True, with open(os.path.join(self.directory, 'config.yml')) as fp:
pretrained=True, self.model_config = yaml.load(fp.read(), Loader=yaml.FullLoader)
get_prediction=False):
"""Distill the Head Features, so as to perform transfer learning. self.multi_box_head_config = self.model_config['MultiBoxHead']
self.output_decoder_config = self.model_config['SSDOutputDecoder']
self.mobilenet_config = self.model_config['MobileNet']
:param trainable: whether to set parameters trainable. def context(self, trainable=True, pretrained=True, get_prediction=False):
:type trainable: bool
:param pretrained: whether to load default pretrained model.
:type pretrained: bool
:param get_prediction: whether to get prediction,
if True, outputs is {'bbox_out': bbox_out},
if False, outputs is {'body_features': body_features}.
:type get_prediction: bool
""" """
wrapped_prog = fluid.Program() Distill the Head Features, so as to perform transfer learning.
Args:
trainable (bool): whether to set parameters trainable.
pretrained (bool): whether to load default pretrained model.
get_prediction (bool): whether to get prediction.
Returns:
inputs(dict): the input variables.
outputs(dict): the output variables.
context_prog (Program): the program to execute transfer learning.
"""
context_prog = fluid.Program()
startup_program = fluid.Program() startup_program = fluid.Program()
with fluid.program_guard(wrapped_prog, startup_program): with fluid.program_guard(context_prog, startup_program):
with fluid.unique_name.guard(): with fluid.unique_name.guard():
# image # image
image = fluid.layers.data( image = fluid.layers.data(
name='image', shape=[3, 300, 300], dtype='float32') name='image', shape=[3, 300, 300], dtype='float32')
backbone = MobileNet( # backbone
norm_decay=0., backbone = MobileNet(**self.mobilenet_config)
conv_group_scale=1, # body_feats
conv_learning_rate=0.1,
extra_block_filters=[[256, 512], [128, 256], [128, 256],
[64, 128]],
with_extra_blocks=True)
body_feats = backbone(image) body_feats = backbone(image)
# call ssd.context # im_size
inputs, outputs, context_prog = self.ssd.context( im_size = fluid.layers.data(
body_feats=body_feats, name='im_size', shape=[2], dtype='int32')
multi_box_head=self.ssd.MultiBoxHead( # var_prefix
num_classes=num_classes, **self.multi_box_head_config), var_prefix = '@HUB_{}@'.format(self.name)
ssd_output_decoder=self.ssd.SSDOutputDecoder( # names of inputs
**self.output_decoder_config), inputs = {
'image': var_prefix + image.name,
'im_size': var_prefix + im_size.name
}
# names of outputs
if get_prediction:
locs, confs, box, box_var = fluid.layers.multi_box_head(
inputs=body_feats,
image=image, image=image,
trainable=trainable, num_classes=21,
var_prefix='@HUB_{}@'.format(self.name), **self.multi_box_head_config)
get_prediction=get_prediction) pred = fluid.layers.detection_output(
loc=locs,
scores=confs,
prior_box=box,
prior_box_var=box_var,
**self.output_decoder_config)
outputs = {'bbox_out': [var_prefix + pred.name]}
else:
outputs = {
'body_features':
[var_prefix + var.name for var in body_feats]
}
# add_vars_prefix
add_vars_prefix(context_prog, var_prefix)
add_vars_prefix(fluid.default_startup_program(), var_prefix)
# inputs
inputs = {
key: context_prog.global_block().vars[value]
for key, value in inputs.items()
}
outputs = {
out_key: [
context_prog.global_block().vars[varname]
for varname in out_value
]
for out_key, out_value in outputs.items()
}
# trainable
for param in context_prog.global_block().iter_parameters():
param.trainable = trainable
place = fluid.CPUPlace() place = fluid.CPUPlace()
exe = fluid.Executor(place) exe = fluid.Executor(place)
# pretrained
if pretrained: if pretrained:
def _if_exist(var): def _if_exist(var):
...@@ -120,82 +154,56 @@ class SSDMobileNetv1(hub.Module): ...@@ -120,82 +154,56 @@ class SSDMobileNetv1(hub.Module):
predicate=_if_exist) predicate=_if_exist)
else: else:
exe.run(startup_program) exe.run(startup_program)
return inputs, outputs, context_prog
@property return inputs, outputs, context_prog
def config(self):
if not self._config:
with open(os.path.join(self.directory, 'config.yml')) as file:
self._config = yaml.load(file.read(), Loader=yaml.FullLoader)
return self._config
@property
def multi_box_head_config(self):
return self.config['MultiBoxHead']
@property
def output_decoder_config(self):
return self.config['SSDOutputDecoder']
def object_detection(self, def object_detection(self,
paths=None, paths=None,
images=None, images=None,
data=None, data=None,
use_gpu=False,
batch_size=1, batch_size=1,
use_gpu=False,
output_dir='detection_result', output_dir='detection_result',
score_thresh=0.5, score_thresh=0.5,
visualization=True): visualization=True):
"""API of Object Detection. """API of Object Detection.
:param paths: the path of images. Args:
:type paths: list, each element is correspond to the path of an image. paths (list[str]): The paths of images.
:param images: data of images, [N, H, W, C] images (list(numpy.ndarray)): images data, shape of each is [H, W, C]
:type images: numpy.ndarray batch_size (int): batch size.
:param use_gpu: whether to use gpu or not. use_gpu (bool): Whether to use gpu.
:type use_gpu: bool output_dir (str): The path to store output images.
:param batch_size: bathc size. visualization (bool): Whether to save image or not.
:type batch_size: int score_thresh (float): threshold for object detecion.
:param output_dir: the directory to store the detection result.
:type output_dir: str Returns:
:param score_thresh: the threshold of detection confidence. res (list[dict]): The result of coco2017 detecion. keys include 'data', 'save_path', the corresponding value is:
:type score_thresh: float data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is:
:param visualization: whether to draw bounding box and save images. left (float): The X coordinate of the upper left corner of the bounding box;
:type visualization: bool top (float): The Y coordinate of the upper left corner of the bounding box;
right (float): The X coordinate of the lower right corner of the bounding box;
bottom (float): The Y coordinate of the lower right corner of the bounding box;
label (str): The label of detection result;
confidence (float): The confidence of detection result.
save_path (str, optional): The path to save output images.
""" """
paths = paths if paths else list()
if data and 'image' in data: if data and 'image' in data:
paths = data['image'] if not paths else paths + data['image'] paths += data['image']
decode_image = self.ssd.DecodeImage(to_rgb=True, with_mixup=False)
resize_image = self.ssd.ResizeImage( data_reader = partial(reader, paths, images)
target_size=300, interp=1, max_size=0, use_cv2=False)
permute_image = self.ssd.Permute()
normalize_image = self.ssd.NormalizeImage(
mean=[127.5, 127.5, 127.5],
std=[127.502231, 127.502231, 127.502231],
is_scale=False)
data_reader = partial(
self.ssd.reader,
paths,
images,
decode_image=decode_image,
resize_image=resize_image,
permute_image=permute_image,
normalize_image=normalize_image)
batch_reader = fluid.io.batch(data_reader, batch_size=batch_size) batch_reader = fluid.io.batch(data_reader, batch_size=batch_size)
paths = paths if paths else []
res = [] res = []
for iter_id, feed_data in enumerate(batch_reader()): for iter_id, feed_data in enumerate(batch_reader()):
np_data = np.array(feed_data).astype('float32') feed_data = np.array(feed_data)
if np_data.shape == 1: image_tensor = PaddleTensor(np.array(list(feed_data[:, 0])).copy())
np_data = np_data[0]
else:
np_data = np.squeeze(np_data, axis=1)
data_tensor = PaddleTensor(np_data.copy())
if use_gpu: if use_gpu:
data_out = self.gpu_predictor.run([data_tensor]) data_out = self.gpu_predictor.run([image_tensor])
else: else:
data_out = self.cpu_predictor.run([data_tensor]) data_out = self.cpu_predictor.run([image_tensor])
output = self.ssd.postprocess(
output = postprocess(
paths=paths, paths=paths,
images=images, images=images,
data_out=data_out, data_out=data_out,
...@@ -204,55 +212,49 @@ class SSDMobileNetv1(hub.Module): ...@@ -204,55 +212,49 @@ class SSDMobileNetv1(hub.Module):
output_dir=output_dir, output_dir=output_dir,
handle_id=iter_id * batch_size, handle_id=iter_id * batch_size,
visualization=visualization) visualization=visualization)
res += output res.extend(output)
return res return res
def add_module_config_arg(self): def save_inference_model(self,
""" dirname,
Add the command config options model_filename=None,
""" params_filename=None,
self.arg_config_group.add_argument( combined=True):
'--use_gpu', if combined:
type=ast.literal_eval, model_filename = "__model__" if not model_filename else model_filename
default=False, params_filename = "__params__" if not params_filename else params_filename
help="whether use GPU or not") place = fluid.CPUPlace()
exe = fluid.Executor(place)
self.arg_config_group.add_argument( program, feeded_var_names, target_vars = fluid.io.load_inference_model(
'--batch_size', dirname=self.default_pretrained_model_path, executor=exe)
type=int,
default=1,
help="batch size for prediction")
def add_module_input_arg(self): fluid.io.save_inference_model(
dirname=dirname,
main_program=program,
executor=exe,
feeded_var_names=feeded_var_names,
target_vars=target_vars,
model_filename=model_filename,
params_filename=params_filename)
@serving
def serving_method(self, images, **kwargs):
""" """
Add the command input options Run as a service.
""" """
self.arg_input_group.add_argument( images_decode = [base64_to_cv2(image) for image in images]
'--input_path', type=str, default=None, help="input data") results = self.object_detection(images_decode, **kwargs)
return results
self.arg_input_group.add_argument(
'--input_file',
type=str,
default=None,
help="file contain input data")
def check_input_data(self, args):
input_data = []
if args.input_path:
input_data = [args.input_path]
elif args.input_file:
if not os.path.exists(args.input_file):
raise RuntimeError("File %s is not exist." % args.input_file)
else:
input_data = txt_parser.parse(args.input_file, use_strip=True)
return input_data
@runnable @runnable
def run_cmd(self, argvs): def run_cmd(self, argvs):
"""
Run as a command.
"""
self.parser = argparse.ArgumentParser( self.parser = argparse.ArgumentParser(
description="Run the {}".format(self.name), description="Run the {} module.".format(self.name),
prog="hub run {}".format(self.name), prog='hub run {}'.format(self.name),
usage='%(prog)s', usage='%(prog)s',
add_help=True) add_help=True)
self.arg_input_group = self.parser.add_argument_group( self.arg_input_group = self.parser.add_argument_group(
...@@ -262,17 +264,50 @@ class SSDMobileNetv1(hub.Module): ...@@ -262,17 +264,50 @@ class SSDMobileNetv1(hub.Module):
description= description=
"Run configuration for controlling module behavior, not required.") "Run configuration for controlling module behavior, not required.")
self.add_module_config_arg() self.add_module_config_arg()
self.add_module_input_arg() self.add_module_input_arg()
args = self.parser.parse_args(argvs) args = self.parser.parse_args(argvs)
input_data = self.check_input_data(args) results = self.face_detection(
if len(input_data) == 0: paths=[args.input_path],
self.parser.print_help() batch_size=args.batch_size,
exit(1) use_gpu=args.use_gpu,
else: output_dir=args.output_dir,
for image_path in input_data: visualization=args.visualization,
if not os.path.exists(image_path): score_thresh=args.score_thresh)
raise RuntimeError( return results
"File %s or %s is not exist." % image_path)
return self.object_detection( def add_module_config_arg(self):
paths=input_data, use_gpu=args.use_gpu, batch_size=args.batch_size) """
Add the command config options.
"""
self.arg_config_group.add_argument(
'--use_gpu',
type=ast.literal_eval,
default=False,
help="whether use GPU or not")
self.arg_config_group.add_argument(
'--output_dir',
type=str,
default='detection_result',
help="The directory to save output images.")
self.arg_config_group.add_argument(
'--visualization',
type=ast.literal_eval,
default=False,
help="whether to save output as images.")
def add_module_input_arg(self):
"""
Add the command input options.
"""
self.arg_input_group.add_argument(
'--input_path', type=str, help="path to image.")
self.arg_input_group.add_argument(
'--batch_size',
type=ast.literal_eval,
default=1,
help="batch size.")
self.arg_input_group.add_argument(
'--score_thresh',
type=ast.literal_eval,
default=0.5,
help="threshold for object detecion.")
# coding=utf-8
import base64
import os
import cv2
import numpy as np
from PIL import Image, ImageDraw
__all__ = ['base64_to_cv2', 'load_label_info', 'postprocess']
def base64_to_cv2(b64str):
data = base64.b64decode(b64str.encode('utf8'))
data = np.fromstring(data, np.uint8)
data = cv2.imdecode(data, cv2.IMREAD_COLOR)
return data
def get_save_image_name(img, output_dir, image_path):
"""
Get save image name from source image path.
"""
if not os.path.exists(output_dir):
os.makedirs(output_dir)
image_name = os.path.split(image_path)[-1]
name, ext = os.path.splitext(image_name)
if img.format == 'PNG':
ext = '.png'
elif img.format == 'JPEG':
ext = '.jpg'
elif img.format == 'BMP':
ext = '.bmp'
else:
if img.mode == "RGB" or img.mode == "L":
ext = ".jpg"
elif img.mode == "RGBA" or img.mode == "P":
ext = '.png'
return os.path.join(output_dir, "{}".format(name)) + ext
def draw_bounding_box_on_image(image_path, data_list, save_dir):
image = Image.open(image_path)
draw = ImageDraw.Draw(image)
for data in data_list:
left, right, top, bottom = data['left'], data['right'], data[
'top'], data['bottom']
# draw bbox
draw.line([(left, top), (left, bottom), (right, bottom), (right, top),
(left, top)],
width=2,
fill='red')
# draw label
if image.mode == 'RGB':
text = data['label'] + ": %.2f%%" % (100 * data['confidence'])
textsize_width, textsize_height = draw.textsize(text=text)
draw.rectangle(
xy=(left, top - (textsize_height + 5),
left + textsize_width + 10, top),
fill=(255, 255, 255))
draw.text(xy=(left, top - 15), text=text, fill=(0, 0, 0))
save_name = get_save_image_name(image, save_dir, image_path)
if os.path.exists(save_name):
os.remove(save_name)
image.save(save_name)
return save_name
def clip_bbox(bbox, img_width, img_height):
xmin = max(min(bbox[0], img_width), 0.)
ymin = max(min(bbox[1], img_height), 0.)
xmax = max(min(bbox[2], img_width), 0.)
ymax = max(min(bbox[3], img_height), 0.)
return xmin, ymin, xmax, ymax
def load_label_info(file_path):
with open(file_path, 'r') as fr:
text = fr.readlines()
label_names = []
for info in text:
label_names.append(info.strip())
return label_names
def postprocess(paths,
images,
data_out,
score_thresh,
label_names,
output_dir,
handle_id,
visualization=True):
"""
postprocess the lod_tensor produced by fluid.Executor.run
Args:
paths (list[str]): the path of images.
images (list(numpy.ndarray)): list of images, shape of each is [H, W, C].
data_out (lod_tensor): data produced by executor.run.
score_thresh (float): the low limit of bounding box.
label_names (list[str]): label names.
output_dir (str): output directory.
handle_id (int): The number of images that have been handled.
visualization (bool): whether to save as images.
Returns:
res (list[dict]): The result of vehicles detecion. keys include 'data', 'save_path', the corresponding value is:
data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is:
left (float): The X coordinate of the upper left corner of the bounding box;
top (float): The Y coordinate of the upper left corner of the bounding box;
right (float): The X coordinate of the lower right corner of the bounding box;
bottom (float): The Y coordinate of the lower right corner of the bounding box;
label (str): The label of detection result;
confidence (float): The confidence of detection result.
save_path (str): The path to save output images.
"""
lod_tensor = data_out[0]
lod = lod_tensor.lod[0]
results = lod_tensor.as_ndarray()
if handle_id < len(paths):
unhandled_paths = paths[handle_id:]
unhandled_paths_num = len(unhandled_paths)
else:
unhandled_paths_num = 0
output = []
for index in range(len(lod) - 1):
output_i = {'data': []}
if index < unhandled_paths_num:
org_img_path = unhandled_paths[index]
org_img = Image.open(org_img_path)
output_i['path'] = org_img_path
else:
org_img = images[index - unhandled_paths_num]
org_img = org_img.astype(np.uint8)
org_img = Image.fromarray(org_img[:, :, ::-1])
if visualization:
org_img_path = get_save_image_name(
org_img, output_dir, 'image_numpy_{}'.format(
(handle_id + index)))
org_img.save(org_img_path)
org_img_height = org_img.height
org_img_width = org_img.width
result_i = results[lod[index]:lod[index + 1]]
for row in result_i:
if len(row) != 6:
continue
if row[1] < score_thresh:
continue
category_id = int(row[0])
confidence = row[1]
bbox = row[2:]
bbox[0] = bbox[0] * org_img_width
bbox[1] = bbox[1] * org_img_height
bbox[2] = bbox[2] * org_img_width
bbox[3] = bbox[3] * org_img_height
dt = {}
dt['label'] = label_names[category_id]
dt['confidence'] = confidence
dt['left'], dt['top'], dt['right'], dt['bottom'] = clip_bbox(
bbox, org_img_width, org_img_height)
output_i['data'].append(dt)
output.append(output_i)
if visualization:
output_i['save_path'] = draw_bounding_box_on_image(
org_img_path, output_i['data'], output_dir)
return output
```shell
$ hub install ssd_vgg16_300_coco2017==1.0.0
```
## 命令行预测
```
hub run ssd_vgg16_300_coco2017 --input_path "/PATH/TO/IMAGE"
```
## API
```
def context(trainable=True,
pretrained=True,
get_prediction=False)
```
特征提取,用于迁移学习。
**参数**
* trainable(bool): 设置参数的 trainable 属性;
* pretrained (bool): 是否加载预训练模型;
* get\_prediction (bool): 是否执行预测。
**返回**
* inputs (dict): 模型的输入,keys 包括 'image', 'im\_size',相应的取值为:
* image (Variable): 图像变量
* im\_size (Variable): 图片的尺寸
* outputs (dict): 模型的输出。如果 get\_prediction 为 False,输出 'head\_fatures',否则输出 'bbox\_out'。
* context\_prog (Program): 用于迁移学习的 Program.
```python
def object_detection(paths=None,
images=None,
batch_size=1,
use_gpu=False,
output_dir='detection_result',
score_thresh=0.5,
visualization=True)
```
预测API,检测输入图片中的所有目标的位置。
**参数**
* paths (list\[str\]): 图片的路径;
* images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\],BGR格式;
* batch\_size (int): batch 的大小;
* use\_gpu (bool): 是否使用 GPU;
* score\_thresh (float): 识别置信度的阈值;
* visualization (bool): 是否将识别结果保存为图片文件;
* output\_dir (str): 图片的保存路径,默认设为 detection\_result;
**返回**
* res (list\[dict\]): 识别结果的列表,列表中每一个元素为 dict,各字段为:
* data (list): 检测结果,list的每一个元素为 dict,各字段为:
* confidence (float): 识别的置信度;
* label (str): 标签;
* left (int): 边界框的左上角x坐标;
* top (int): 边界框的左上角y坐标;
* right (int): 边界框的右下角x坐标;
* bottom (int): 边界框的右下角y坐标;
* save\_path (str, optional): 识别结果的保存路径 (仅当visualization=True时存在)。
```python
def save_inference_model(dirname,
model_filename=None,
params_filename=None,
combined=True)
```
将模型保存到指定路径。
**参数**
* dirname: 存在模型的目录名称
* model\_filename: 模型文件名称,默认为\_\_model\_\_
* params\_filename: 参数文件名称,默认为\_\_params\_\_(仅当`combined`为True时生效)
* combined: 是否将参数保存到统一的一个文件中
## 代码示例
```python
import paddlehub as hub
import cv2
object_detector = hub.Module(name="ssd_vgg16_300_coco2017")
result = object_detector.object_detection(images=[cv2.imread('/PATH/TO/IMAGE')])
# or
# result = object_detector.object_detection((paths=['/PATH/TO/IMAGE'])
```
## 服务部署
PaddleHub Serving可以部署一个目标检测的在线服务。
## 第一步:启动PaddleHub Serving
运行启动命令:
```shell
$ hub serving start -m ssd_vgg16_300_coco2017
```
这样就完成了一个目标检测的服务化API的部署,默认端口号为8866。
**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。
## 第二步:发送预测请求
配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果
```python
import requests
import json
import cv2
import base64
def cv2_to_base64(image):
data = cv2.imencode('.jpg', image)[1]
return base64.b64encode(data.tostring()).decode('utf8')
# 发送HTTP请求
data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]}
headers = {"Content-type": "application/json"}
url = "http://127.0.0.1:8866/predict/ssd_vgg16_300_coco2017"
r = requests.post(url=url, headers=headers, data=json.dumps(data))
# 打印预测结果
print(r.json()["results"])
```
### 依赖
paddlepaddle >= 1.6.2
paddlehub >= 1.6.0
# coding=utf-8
from __future__ import absolute_import
from __future__ import print_function
from __future__ import division
import os
import random
from collections import OrderedDict
import cv2
import numpy as np
from PIL import Image
from paddle import fluid
__all__ = ['reader']
class DecodeImage(object):
def __init__(self, to_rgb=True, with_mixup=False):
""" Transform the image data to numpy format.
Args:
to_rgb (bool): whether to convert BGR to RGB
with_mixup (bool): whether or not to mixup image and gt_bbbox/gt_score
"""
self.to_rgb = to_rgb
self.with_mixup = with_mixup
def __call__(self, im):
if self.to_rgb:
im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
return im
class ResizeImage(object):
def __init__(self,
target_size=0,
max_size=0,
interp=cv2.INTER_LINEAR,
use_cv2=True):
"""
Rescale image to the specified target size, and capped at max_size
if max_size != 0.
If target_size is list, selected a scale randomly as the specified
target size.
Args:
target_size (int|list): the target size of image's short side,
multi-scale training is adopted when type is list.
max_size (int): the max size of image
interp (int): the interpolation method
use_cv2 (bool): use the cv2 interpolation method or use PIL
interpolation method
"""
self.max_size = int(max_size)
self.interp = int(interp)
self.use_cv2 = use_cv2
self.target_size = target_size
def __call__(self, im):
if not isinstance(im, np.ndarray):
raise TypeError("{}: image type is not numpy.".format(self))
if len(im.shape) != 3:
raise ValueError('{}: image is not 3-dimensional.'.format(self))
im_shape = im.shape
im_size_min = np.min(im_shape[0:2])
im_size_max = np.max(im_shape[0:2])
if isinstance(self.target_size, list):
# Case for multi-scale training
selected_size = random.choice(self.target_size)
else:
selected_size = self.target_size
if float(im_size_min) == 0:
raise ZeroDivisionError('{}: min size of image is 0'.format(self))
if self.max_size != 0:
im_scale = float(selected_size) / float(im_size_min)
# Prevent the biggest axis from being more than max_size
if np.round(im_scale * im_size_max) > self.max_size:
im_scale = float(self.max_size) / float(im_size_max)
im_scale_x = im_scale
im_scale_y = im_scale
resize_w = im_scale_x * float(im_shape[1])
resize_h = im_scale_y * float(im_shape[0])
im_info = [resize_h, resize_w, im_scale]
else:
im_scale_x = float(selected_size) / float(im_shape[1])
im_scale_y = float(selected_size) / float(im_shape[0])
resize_w = selected_size
resize_h = selected_size
if self.use_cv2:
im = cv2.resize(
im,
None,
None,
fx=im_scale_x,
fy=im_scale_y,
interpolation=self.interp)
else:
if self.max_size != 0:
raise TypeError(
'If you set max_size to cap the maximum size of image,'
'please set use_cv2 to True to resize the image.')
im = im.astype('uint8')
im = Image.fromarray(im)
im = im.resize((int(resize_w), int(resize_h)), self.interp)
im = np.array(im)
return im
class NormalizeImage(object):
def __init__(self,
mean=[0.485, 0.456, 0.406],
std=[1, 1, 1],
is_scale=True,
is_channel_first=True):
"""
Args:
mean (list): the pixel mean
std (list): the pixel variance
"""
self.mean = mean
self.std = std
self.is_scale = is_scale
self.is_channel_first = is_channel_first
def __call__(self, im):
"""Normalize the image.
Operators:
1.(optional) Scale the image to [0,1]
2. Each pixel minus mean and is divided by std
"""
im = im.astype(np.float32, copy=False)
if self.is_channel_first:
mean = np.array(self.mean)[:, np.newaxis, np.newaxis]
std = np.array(self.std)[:, np.newaxis, np.newaxis]
else:
mean = np.array(self.mean)[np.newaxis, np.newaxis, :]
std = np.array(self.std)[np.newaxis, np.newaxis, :]
if self.is_scale:
im = im / 255.0
im -= mean
im /= std
return im
class Permute(object):
def __init__(self, to_bgr=True, channel_first=True):
"""
Change the channel.
Args:
to_bgr (bool): confirm whether to convert RGB to BGR
channel_first (bool): confirm whether to change channel
"""
self.to_bgr = to_bgr
self.channel_first = channel_first
def __call__(self, im):
if self.channel_first:
im = np.swapaxes(im, 1, 2)
im = np.swapaxes(im, 1, 0)
if self.to_bgr:
im = im[[2, 1, 0], :, :]
return im
def reader(paths=[],
images=None,
decode_image=DecodeImage(to_rgb=True, with_mixup=False),
resize_image=ResizeImage(
target_size=512, interp=1, max_size=0, use_cv2=False),
permute_image=Permute(to_bgr=False),
normalize_image=NormalizeImage(
mean=[104, 117, 123], std=[1, 1, 1], is_scale=False)):
"""
data generator
Args:
paths (list[str]): paths to images.
images (list(numpy.ndarray)): data of images, shape of each is [H, W, C]
decode_image (class object): instance of <class 'DecodeImage' object>
resize_image (class object): instance of <class 'ResizeImage' object>
permute_image (class object): instance of <class 'Permute' object>
normalize_image (class object): instance of <class 'NormalizeImage' object>
"""
img_list = []
if paths is not None:
assert type(paths) is list, "type(paths) is not list."
for img_path in paths:
assert os.path.isfile(
img_path), "The {} isn't a valid file path.".format(img_path)
img = cv2.imread(img_path).astype('float32')
img_list.append(img)
if images is not None:
for img in images:
img_list.append(img)
resize_image = ResizeImage(
target_size=300, interp=1, max_size=0, use_cv2=False)
for img in img_list:
preprocessed_img = decode_image(img)
preprocessed_img = resize_image(preprocessed_img)
preprocessed_img = permute_image(preprocessed_img)
preprocessed_img = normalize_image(preprocessed_img)
yield [preprocessed_img]
# coding=utf-8 # coding=utf-8
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import ast import ast
import argparse import argparse
import os
from functools import partial from functools import partial
import yaml
import numpy as np import numpy as np
import paddle.fluid as fluid import paddle.fluid as fluid
import paddlehub as hub import paddlehub as hub
from paddlehub.module.module import moduleinfo, runnable
from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor
from paddlehub.io.parser import txt_parser from paddlehub.module.module import moduleinfo, runnable, serving
import yaml from paddlehub.common.paddle_helper import add_vars_prefix
from ssd_vgg16_300_coco2017.vgg import VGG from ssd_vgg16_300_coco2017.vgg import VGG
from ssd_vgg16_300_coco2017.processor import load_label_info, postprocess, base64_to_cv2
from ssd_vgg16_300_coco2017.data_feed import reader
@moduleinfo( @moduleinfo(
...@@ -25,28 +25,22 @@ from ssd_vgg16_300_coco2017.vgg import VGG ...@@ -25,28 +25,22 @@ from ssd_vgg16_300_coco2017.vgg import VGG
type="cv/object_detection", type="cv/object_detection",
summary="SSD with backbone VGG16, trained with dataset COCO.", summary="SSD with backbone VGG16, trained with dataset COCO.",
author="paddlepaddle", author="paddlepaddle",
author_email="paddle-dev@baidu.com") author_email="")
class SSDVGG16(hub.Module): class SSDVGG16(hub.Module):
def _initialize(self): def _initialize(self):
self.ssd = hub.Module(name="ssd")
# default pretrained model of SSD_VGG16, the shape of image tensor is (3, 300, 300)
self.default_pretrained_model_path = os.path.join( self.default_pretrained_model_path = os.path.join(
self.directory, "ssd_vgg16_300_model") self.directory, "ssd_vgg16_300_model")
self.label_names = self.ssd.load_label_info( self.label_names = load_label_info(
os.path.join(self.directory, "label_file.txt")) os.path.join(self.directory, "label_file.txt"))
self.infer_prog = None self.model_config = None
self.image = None
self.bbox_out = None
self._set_config() self._set_config()
self._config = None
def _set_config(self): def _set_config(self):
""" # predictor config setting.
predictor config setting
"""
cpu_config = AnalysisConfig(self.default_pretrained_model_path) cpu_config = AnalysisConfig(self.default_pretrained_model_path)
cpu_config.disable_glog_info() cpu_config.disable_glog_info()
cpu_config.disable_gpu() cpu_config.disable_gpu()
cpu_config.switch_ir_optim(False)
self.cpu_predictor = create_paddle_predictor(cpu_config) self.cpu_predictor = create_paddle_predictor(cpu_config)
try: try:
...@@ -61,25 +55,31 @@ class SSDVGG16(hub.Module): ...@@ -61,25 +55,31 @@ class SSDVGG16(hub.Module):
gpu_config.enable_use_gpu(memory_pool_init_size_mb=500, device_id=0) gpu_config.enable_use_gpu(memory_pool_init_size_mb=500, device_id=0)
self.gpu_predictor = create_paddle_predictor(gpu_config) self.gpu_predictor = create_paddle_predictor(gpu_config)
def context(self, # model config setting.
num_classes=81, if not self.model_config:
trainable=True, with open(os.path.join(self.directory, 'config.yml')) as fp:
pretrained=True, self.model_config = yaml.load(fp.read(), Loader=yaml.FullLoader)
get_prediction=False):
"""Distill the Head Features, so as to perform transfer learning. self.multi_box_head_config = self.model_config['MultiBoxHead']
self.output_decoder_config = self.model_config['SSDOutputDecoder']
def context(self, trainable=True, pretrained=True, get_prediction=False):
"""
Distill the Head Features, so as to perform transfer learning.
Args:
trainable (bool): whether to set parameters trainable.
pretrained (bool): whether to load default pretrained model.
get_prediction (bool): whether to get prediction.
:param trainable: whether to set parameters trainable. Returns:
:type trainable: bool inputs(dict): the input variables.
:param pretrained: whether to load default pretrained model. outputs(dict): the output variables.
:type pretrained: bool context_prog (Program): the program to execute transfer learning.
:param get_prediction: whether to get prediction,
if True, outputs is {'bbox_out': bbox_out},
if False, outputs is {'head_features': head_features}.
:type get_prediction: bool
""" """
wrapped_prog = fluid.Program() context_prog = fluid.Program()
startup_program = fluid.Program() startup_program = fluid.Program()
with fluid.program_guard(wrapped_prog, startup_program): with fluid.program_guard(context_prog, startup_program):
with fluid.unique_name.guard(): with fluid.unique_name.guard():
# image # image
image = fluid.layers.data( image = fluid.layers.data(
...@@ -89,21 +89,60 @@ class SSDVGG16(hub.Module): ...@@ -89,21 +89,60 @@ class SSDVGG16(hub.Module):
depth=16, depth=16,
with_extra_blocks=True, with_extra_blocks=True,
normalizations=[20., -1, -1, -1, -1, -1]) normalizations=[20., -1, -1, -1, -1, -1])
# body_feats
body_feats = backbone(image) body_feats = backbone(image)
# call ssd.context # im_size
inputs, outputs, context_prog = self.ssd.context( im_size = fluid.layers.data(
body_feats=body_feats, name='im_size', shape=[2], dtype='int32')
multi_box_head=self.ssd.MultiBoxHead( # var_prefix
num_classes=num_classes, **self.multi_box_head_config), var_prefix = '@HUB_{}@'.format(self.name)
ssd_output_decoder=self.ssd.SSDOutputDecoder( # names of inputs
**self.output_decoder_config), inputs = {
'image': var_prefix + image.name,
'im_size': var_prefix + im_size.name
}
# names of outputs
if get_prediction:
locs, confs, box, box_var = fluid.layers.multi_box_head(
inputs=body_feats,
image=image, image=image,
trainable=trainable, num_classes=81,
var_prefix='@HUB_{}@'.format(self.name), **self.multi_box_head_config)
get_prediction=get_prediction) pred = fluid.layers.detection_output(
loc=locs,
scores=confs,
prior_box=box,
prior_box_var=box_var,
**self.output_decoder_config)
outputs = {'bbox_out': [var_prefix + pred.name]}
else:
outputs = {
'body_features':
[var_prefix + var.name for var in body_feats]
}
# add_vars_prefix
add_vars_prefix(context_prog, var_prefix)
add_vars_prefix(fluid.default_startup_program(), var_prefix)
# inputs
inputs = {
key: context_prog.global_block().vars[value]
for key, value in inputs.items()
}
outputs = {
out_key: [
context_prog.global_block().vars[varname]
for varname in out_value
]
for out_key, out_value in outputs.items()
}
# trainable
for param in context_prog.global_block().iter_parameters():
param.trainable = trainable
place = fluid.CPUPlace() place = fluid.CPUPlace()
exe = fluid.Executor(place) exe = fluid.Executor(place)
# pretrained
if pretrained: if pretrained:
def _if_exist(var): def _if_exist(var):
...@@ -117,67 +156,52 @@ class SSDVGG16(hub.Module): ...@@ -117,67 +156,52 @@ class SSDVGG16(hub.Module):
predicate=_if_exist) predicate=_if_exist)
else: else:
exe.run(startup_program) exe.run(startup_program)
return inputs, outputs, context_prog
@property
def config(self):
if not self._config:
with open(os.path.join(self.directory, 'config.yml')) as file:
self._config = yaml.load(file.read(), Loader=yaml.FullLoader)
return self._config
@property return inputs, outputs, context_prog
def multi_box_head_config(self):
return self.config['MultiBoxHead']
@property
def output_decoder_config(self):
return self.config['SSDOutputDecoder']
def object_detection(self, def object_detection(self,
paths=None, paths=None,
images=None, images=None,
use_gpu=False,
batch_size=1, batch_size=1,
use_gpu=False,
output_dir='detection_result', output_dir='detection_result',
score_thresh=0.5, score_thresh=0.5,
visualization=True): visualization=True):
"""API of Object Detection. """API of Object Detection.
:param paths: the path of images. Args:
:type paths: list, each element is correspond to the path of an image. paths (list[str]): The paths of images.
:param images: data of images, [N, H, W, C] images (list(numpy.ndarray)): images data, shape of each is [H, W, C]
:type images: numpy.ndarray batch_size (int): batch size.
:param use_gpu: whether to use gpu or not. use_gpu (bool): Whether to use gpu.
:type use_gpu: bool output_dir (str): The path to store output images.
:param batch_size: bathc size. visualization (bool): Whether to save image or not.
:type batch_size: int score_thresh (float): threshold for object detecion.
:param output_dir: the directory to store the detection result.
:type output_dir: str Returns:
:param score_thresh: the threshold of detection confidence. res (list[dict]): The result of coco2017 detecion. keys include 'data', 'save_path', the corresponding value is:
:type score_thresh: float data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is:
:param visualization: whether to draw bounding box and save images. left (float): The X coordinate of the upper left corner of the bounding box;
:type visualization: bool top (float): The Y coordinate of the upper left corner of the bounding box;
right (float): The X coordinate of the lower right corner of the bounding box;
bottom (float): The Y coordinate of the lower right corner of the bounding box;
label (str): The label of detection result;
confidence (float): The confidence of detection result.
save_path (str, optional): The path to save output images.
""" """
resize_image = self.ssd.ResizeImage( paths = paths if paths else list()
target_size=300, interp=1, max_size=0, use_cv2=False) data_reader = partial(reader, paths, images)
data_reader = partial(
self.ssd.reader, paths, images, resize_image=resize_image)
batch_reader = fluid.io.batch(data_reader, batch_size=batch_size) batch_reader = fluid.io.batch(data_reader, batch_size=batch_size)
paths = paths if paths else []
res = [] res = []
for iter_id, feed_data in enumerate(batch_reader()): for iter_id, feed_data in enumerate(batch_reader()):
np_data = np.array(feed_data).astype('float32') feed_data = np.array(feed_data)
if np_data.shape == 1: image_tensor = PaddleTensor(np.array(list(feed_data[:, 0])).copy())
np_data = np_data[0]
else:
np_data = np.squeeze(np_data, axis=1)
data_tensor = PaddleTensor(np_data.copy())
if use_gpu: if use_gpu:
data_out = self.gpu_predictor.run([data_tensor]) data_out = self.gpu_predictor.run([image_tensor])
else: else:
data_out = self.cpu_predictor.run([data_tensor]) data_out = self.cpu_predictor.run([image_tensor])
output = self.ssd.postprocess(
output = postprocess(
paths=paths, paths=paths,
images=images, images=images,
data_out=data_out, data_out=data_out,
...@@ -186,53 +210,49 @@ class SSDVGG16(hub.Module): ...@@ -186,53 +210,49 @@ class SSDVGG16(hub.Module):
output_dir=output_dir, output_dir=output_dir,
handle_id=iter_id * batch_size, handle_id=iter_id * batch_size,
visualization=visualization) visualization=visualization)
res += output res.extend(output)
return res return res
def add_module_config_arg(self): def save_inference_model(self,
""" dirname,
Add the command config options model_filename=None,
""" params_filename=None,
self.arg_config_group.add_argument( combined=True):
'--use_gpu', if combined:
type=ast.literal_eval, model_filename = "__model__" if not model_filename else model_filename
default=False, params_filename = "__params__" if not params_filename else params_filename
help="whether use GPU or not") place = fluid.CPUPlace()
exe = fluid.Executor(place)
self.arg_config_group.add_argument( program, feeded_var_names, target_vars = fluid.io.load_inference_model(
'--batch_size', dirname=self.default_pretrained_model_path, executor=exe)
type=int,
default=1,
help="batch size for prediction")
def add_module_input_arg(self): fluid.io.save_inference_model(
dirname=dirname,
main_program=program,
executor=exe,
feeded_var_names=feeded_var_names,
target_vars=target_vars,
model_filename=model_filename,
params_filename=params_filename)
@serving
def serving_method(self, images, **kwargs):
""" """
Add the command input options Run as a service.
""" """
self.arg_input_group.add_argument( images_decode = [base64_to_cv2(image) for image in images]
'--input_path', type=str, default=None, help="input data") results = self.object_detection(images_decode, **kwargs)
self.arg_input_group.add_argument( return results
'--input_file',
type=str,
default=None,
help="file contain input data")
def check_input_data(self, args):
input_data = []
if args.input_path:
input_data = [args.input_path]
elif args.input_file:
if not os.path.exists(args.input_file):
raise RuntimeError("File %s is not exist." % args.input_file)
else:
input_data = txt_parser.parse(args.input_file, use_strip=True)
return input_data
@runnable @runnable
def run_cmd(self, argvs): def run_cmd(self, argvs):
"""
Run as a command.
"""
self.parser = argparse.ArgumentParser( self.parser = argparse.ArgumentParser(
description="Run the {}".format(self.name), description="Run the {} module.".format(self.name),
prog="hub run {}".format(self.name), prog='hub run {}'.format(self.name),
usage='%(prog)s', usage='%(prog)s',
add_help=True) add_help=True)
self.arg_input_group = self.parser.add_argument_group( self.arg_input_group = self.parser.add_argument_group(
...@@ -242,18 +262,50 @@ class SSDVGG16(hub.Module): ...@@ -242,18 +262,50 @@ class SSDVGG16(hub.Module):
description= description=
"Run configuration for controlling module behavior, not required.") "Run configuration for controlling module behavior, not required.")
self.add_module_config_arg() self.add_module_config_arg()
self.add_module_input_arg() self.add_module_input_arg()
args = self.parser.parse_args(argvs) args = self.parser.parse_args(argvs)
input_path = args.input_path results = self.face_detection(
input_data = self.check_input_data(args) paths=[args.input_path],
if len(input_data) == 0: batch_size=args.batch_size,
self.parser.print_help() use_gpu=args.use_gpu,
exit(1) output_dir=args.output_dir,
else: visualization=args.visualization,
for image_path in input_data: score_thresh=args.score_thresh)
if not os.path.exists(image_path): return results
raise RuntimeError(
"File %s or %s is not exist." % image_path) def add_module_config_arg(self):
return self.object_detection( """
paths=input_data, use_gpu=args.use_gpu, batch_size=args.batch_size) Add the command config options.
"""
self.arg_config_group.add_argument(
'--use_gpu',
type=ast.literal_eval,
default=False,
help="whether use GPU or not")
self.arg_config_group.add_argument(
'--output_dir',
type=str,
default='detection_result',
help="The directory to save output images.")
self.arg_config_group.add_argument(
'--visualization',
type=ast.literal_eval,
default=False,
help="whether to save output as images.")
def add_module_input_arg(self):
"""
Add the command input options.
"""
self.arg_input_group.add_argument(
'--input_path', type=str, help="path to image.")
self.arg_input_group.add_argument(
'--batch_size',
type=ast.literal_eval,
default=1,
help="batch size.")
self.arg_input_group.add_argument(
'--score_thresh',
type=ast.literal_eval,
default=0.5,
help="threshold for object detecion.")
# coding=utf-8
import base64
import os
import cv2
import numpy as np
from PIL import Image, ImageDraw
__all__ = ['base64_to_cv2', 'load_label_info', 'postprocess']
def base64_to_cv2(b64str):
data = base64.b64decode(b64str.encode('utf8'))
data = np.fromstring(data, np.uint8)
data = cv2.imdecode(data, cv2.IMREAD_COLOR)
return data
def get_save_image_name(img, output_dir, image_path):
"""
Get save image name from source image path.
"""
if not os.path.exists(output_dir):
os.makedirs(output_dir)
image_name = os.path.split(image_path)[-1]
name, ext = os.path.splitext(image_name)
if img.format == 'PNG':
ext = '.png'
elif img.format == 'JPEG':
ext = '.jpg'
elif img.format == 'BMP':
ext = '.bmp'
else:
if img.mode == "RGB" or img.mode == "L":
ext = ".jpg"
elif img.mode == "RGBA" or img.mode == "P":
ext = '.png'
return os.path.join(output_dir, "{}".format(name)) + ext
def draw_bounding_box_on_image(image_path, data_list, save_dir):
image = Image.open(image_path)
draw = ImageDraw.Draw(image)
for data in data_list:
left, right, top, bottom = data['left'], data['right'], data[
'top'], data['bottom']
# draw bbox
draw.line([(left, top), (left, bottom), (right, bottom), (right, top),
(left, top)],
width=2,
fill='red')
# draw label
if image.mode == 'RGB':
text = data['label'] + ": %.2f%%" % (100 * data['confidence'])
textsize_width, textsize_height = draw.textsize(text=text)
draw.rectangle(
xy=(left, top - (textsize_height + 5),
left + textsize_width + 10, top),
fill=(255, 255, 255))
draw.text(xy=(left, top - 15), text=text, fill=(0, 0, 0))
save_name = get_save_image_name(image, save_dir, image_path)
if os.path.exists(save_name):
os.remove(save_name)
image.save(save_name)
return save_name
def clip_bbox(bbox, img_width, img_height):
xmin = max(min(bbox[0], img_width), 0.)
ymin = max(min(bbox[1], img_height), 0.)
xmax = max(min(bbox[2], img_width), 0.)
ymax = max(min(bbox[3], img_height), 0.)
return xmin, ymin, xmax, ymax
def load_label_info(file_path):
with open(file_path, 'r') as fr:
text = fr.readlines()
label_names = []
for info in text:
label_names.append(info.strip())
return label_names
def postprocess(paths,
images,
data_out,
score_thresh,
label_names,
output_dir,
handle_id,
visualization=True):
"""
postprocess the lod_tensor produced by fluid.Executor.run
Args:
paths (list[str]): the path of images.
images (list(numpy.ndarray)): list of images, shape of each is [H, W, C].
data_out (lod_tensor): data produced by executor.run.
score_thresh (float): the low limit of bounding box.
label_names (list[str]): label names.
output_dir (str): output directory.
handle_id (int): The number of images that have been handled.
visualization (bool): whether to save as images.
Returns:
res (list[dict]): The result of vehicles detecion. keys include 'data', 'save_path', the corresponding value is:
data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is:
left (float): The X coordinate of the upper left corner of the bounding box;
top (float): The Y coordinate of the upper left corner of the bounding box;
right (float): The X coordinate of the lower right corner of the bounding box;
bottom (float): The Y coordinate of the lower right corner of the bounding box;
label (str): The label of detection result;
confidence (float): The confidence of detection result.
save_path (str): The path to save output images.
"""
lod_tensor = data_out[0]
lod = lod_tensor.lod[0]
results = lod_tensor.as_ndarray()
if handle_id < len(paths):
unhandled_paths = paths[handle_id:]
unhandled_paths_num = len(unhandled_paths)
else:
unhandled_paths_num = 0
output = []
for index in range(len(lod) - 1):
output_i = {'data': []}
if index < unhandled_paths_num:
org_img_path = unhandled_paths[index]
org_img = Image.open(org_img_path)
output_i['path'] = org_img_path
else:
org_img = images[index - unhandled_paths_num]
org_img = org_img.astype(np.uint8)
org_img = Image.fromarray(org_img[:, :, ::-1])
if visualization:
org_img_path = get_save_image_name(
org_img, output_dir, 'image_numpy_{}'.format(
(handle_id + index)))
org_img.save(org_img_path)
org_img_height = org_img.height
org_img_width = org_img.width
result_i = results[lod[index]:lod[index + 1]]
for row in result_i:
if len(row) != 6:
continue
if row[1] < score_thresh:
continue
category_id = int(row[0])
confidence = row[1]
bbox = row[2:]
bbox[0] = bbox[0] * org_img_width
bbox[1] = bbox[1] * org_img_height
bbox[2] = bbox[2] * org_img_width
bbox[3] = bbox[3] * org_img_height
dt = {}
dt['label'] = label_names[category_id]
dt['confidence'] = confidence
dt['left'], dt['top'], dt['right'], dt['bottom'] = clip_bbox(
bbox, org_img_width, org_img_height)
output_i['data'].append(dt)
output.append(output_i)
if visualization:
output_i['save_path'] = draw_bounding_box_on_image(
org_img_path, output_i['data'], output_dir)
return output
```shell
$ hub install ssd_vgg16_512_coco2017==1.0.0
```
## 命令行预测
```
hub run ssd_vgg16_512_coco2017 --input_path "/PATH/TO/IMAGE"
```
## API
```
def context(trainable=True,
pretrained=True,
get_prediction=False)
```
特征提取,用于迁移学习。
**参数**
* trainable(bool): 设置参数的 trainable 属性;
* pretrained (bool): 是否加载预训练模型;
* get\_prediction (bool): 是否执行预测。
**返回**
* inputs (dict): 模型的输入,keys 包括 'image', 'im\_size',相应的取值为:
* image (Variable): 图像变量
* im\_size (Variable): 图片的尺寸
* outputs (dict): 模型的输出。如果 get\_prediction 为 False,输出 'head\_fatures',否则输出 'bbox\_out'。
* context\_prog (Program): 用于迁移学习的 Program.
```python
def object_detection(paths=None,
images=None,
batch_size=1,
use_gpu=False,
output_dir='detection_result',
score_thresh=0.5,
visualization=True)
```
预测API,检测输入图片中的所有目标的位置。
**参数**
* paths (list\[str\]): 图片的路径;
* images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\],BGR格式;
* batch\_size (int): batch 的大小;
* use\_gpu (bool): 是否使用 GPU;
* score\_thresh (float): 识别置信度的阈值;
* visualization (bool): 是否将识别结果保存为图片文件;
* output\_dir (str): 图片的保存路径,默认设为 detection\_result;
**返回**
* res (list\[dict\]): 识别结果的列表,列表中每一个元素为 dict,各字段为:
* data (list): 检测结果,list的每一个元素为 dict,各字段为:
* confidence (float): 识别的置信度;
* label (str): 标签;
* left (int): 边界框的左上角x坐标;
* top (int): 边界框的左上角y坐标;
* right (int): 边界框的右下角x坐标;
* bottom (int): 边界框的右下角y坐标;
* save\_path (str, optional): 识别结果的保存路径 (仅当visualization=True时存在)。
```python
def save_inference_model(dirname,
model_filename=None,
params_filename=None,
combined=True)
```
将模型保存到指定路径。
**参数**
* dirname: 存在模型的目录名称
* model\_filename: 模型文件名称,默认为\_\_model\_\_
* params\_filename: 参数文件名称,默认为\_\_params\_\_(仅当`combined`为True时生效)
* combined: 是否将参数保存到统一的一个文件中
## 代码示例
```python
import paddlehub as hub
import cv2
object_detector = hub.Module(name="ssd_vgg16_512_coco2017")
result = object_detector.object_detection(images=[cv2.imread('/PATH/TO/IMAGE')])
# or
# result = object_detector.object_detection((paths=['/PATH/TO/IMAGE'])
```
## 服务部署
PaddleHub Serving可以部署一个目标检测的在线服务。
## 第一步:启动PaddleHub Serving
运行启动命令:
```shell
$ hub serving start -m ssd_vgg16_512_coco2017
```
这样就完成了一个目标检测的服务化API的部署,默认端口号为8866。
**NOTE:** 如使用GPU 预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。
## 第二步:发送预测请求
配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果
```python
import requests
import json
import cv2
import base64
def cv2_to_base64(image):
data = cv2.imencode('.jpg', image)[1]
return base64.b64encode(data.tostring()).decode('utf8')
# 发送HTTP请求
data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]}
headers = {"Content-type": "application/json"}
url = "http://127.0.0.1:8866/predict/ssd_vgg16_512_coco2017"
r = requests.post(url=url, headers=headers, data=json.dumps(data))
# 打印预测结果
print(r.json()["results"])
```
### 依赖
paddlepaddle >= 1.6.2
paddlehub >= 1.6.0
...@@ -9,10 +9,10 @@ from collections import OrderedDict ...@@ -9,10 +9,10 @@ from collections import OrderedDict
import cv2 import cv2
import numpy as np import numpy as np
from PIL import Image, ImageEnhance from PIL import Image
from paddle import fluid from paddle import fluid
__all__ = ['reader', 'DecodeImage', 'ResizeImage', 'NormalizeImage', 'Permute'] __all__ = ['reader']
class DecodeImage(object): class DecodeImage(object):
...@@ -59,8 +59,6 @@ class ResizeImage(object): ...@@ -59,8 +59,6 @@ class ResizeImage(object):
self.target_size = target_size self.target_size = target_size
def __call__(self, im): def __call__(self, im):
""" Resize the image numpy.
"""
if not isinstance(im, np.ndarray): if not isinstance(im, np.ndarray):
raise TypeError("{}: image type is not numpy.".format(self)) raise TypeError("{}: image type is not numpy.".format(self))
if len(im.shape) != 3: if len(im.shape) != 3:
...@@ -132,6 +130,7 @@ class NormalizeImage(object): ...@@ -132,6 +130,7 @@ class NormalizeImage(object):
def __call__(self, im): def __call__(self, im):
"""Normalize the image. """Normalize the image.
Operators: Operators:
1.(optional) Scale the image to [0,1] 1.(optional) Scale the image to [0,1]
2. Each pixel minus mean and is divided by std 2. Each pixel minus mean and is divided by std
...@@ -154,6 +153,7 @@ class Permute(object): ...@@ -154,6 +153,7 @@ class Permute(object):
def __init__(self, to_bgr=True, channel_first=True): def __init__(self, to_bgr=True, channel_first=True):
""" """
Change the channel. Change the channel.
Args: Args:
to_bgr (bool): confirm whether to convert RGB to BGR to_bgr (bool): confirm whether to convert RGB to BGR
channel_first (bool): confirm whether to change channel channel_first (bool): confirm whether to change channel
...@@ -178,16 +178,16 @@ def reader(paths=[], ...@@ -178,16 +178,16 @@ def reader(paths=[],
permute_image=Permute(to_bgr=False), permute_image=Permute(to_bgr=False),
normalize_image=NormalizeImage( normalize_image=NormalizeImage(
mean=[104, 117, 123], std=[1, 1, 1], is_scale=False)): mean=[104, 117, 123], std=[1, 1, 1], is_scale=False)):
"""data generator """
data generator
:param paths: path to images.
:type paths: list, each element is a str Args:
:param images: data of images, [N, H, W, C] paths (list[str]): paths to images.
:type images: numpy.ndarray images (list(numpy.ndarray)): data of images, shape of each is [H, W, C]
:param decode_image: instance of <class 'DecodeImage' object> decode_image (class object): instance of <class 'DecodeImage' object>
:param resize_image: instance of <class 'ResizeImage' object> resize_image (class object): instance of <class 'ResizeImage' object>
:param permute_image: instance of <class 'Permute' object> permute_image (class object): instance of <class 'Permute' object>
:param normalize_image: instance of <class 'NormalizeImage' object> normalize_image (class object): instance of <class 'NormalizeImage' object>
""" """
img_list = [] img_list = []
if paths is not None: if paths is not None:
......
# coding=utf-8
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from collections import OrderedDict
import paddle.fluid as fluid
import paddlehub as hub
from paddlehub.module.module import moduleinfo
from paddlehub.common.paddle_helper import add_vars_prefix
from yolov3.data_feed import reader
from yolov3.processor import load_label_info, postprocess
from yolov3.yolo_head import MultiClassNMS, YOLOv3Head
@moduleinfo(
name="yolov3",
version="1.0.0",
type="cv/object_detection",
summary="Baidu's YOLOv3 model for object detection.",
author="paddlepaddle",
author_email="paddle-dev@baidu.com")
class YOLOv3(hub.Module):
def _initialize(self):
self.reader = reader
self.load_label_info = load_label_info
self.postprocess = postprocess
self.MultiClassNMS = MultiClassNMS
self.YOLOv3Head = YOLOv3Head
def context(self,
body_feats,
yolo_head,
image,
trainable=True,
var_prefix='',
get_prediction=False):
"""
Distill the Head Features, so as to perform transfer learning.
Args:
body_feats (feature maps of backbone): feature maps of backbone.
yolo_head (<class 'YOLOv3Head' object>): yolo_head of YOLOv3
image (Variable): image tensor.
trainable (bool): whether to set parameters trainable.
var_prefix (str): the prefix of variables in yolo_head and backbone.
get_prediction (bool): whether to get prediction or not.
Returns:
inputs(dict): the input variables.
outputs(dict): the output variables.
context_prog (Program): the program to execute transfer learning.
"""
context_prog = image.block.program
with fluid.program_guard(context_prog):
im_size = fluid.layers.data(
name='im_size', shape=[2], dtype='int32')
head_features = yolo_head._get_outputs(
body_feats, is_train=trainable)
inputs = {
'image': var_prefix + image.name,
'im_size': var_prefix + im_size.name
}
if get_prediction:
bbox_out = yolo_head.get_prediction(head_features, im_size)
outputs = {'bbox_out': [var_prefix + bbox_out.name]}
else:
outputs = {
'head_features':
[var_prefix + var.name for var in head_features]
}
place = fluid.CPUPlace()
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
add_vars_prefix(context_prog, var_prefix)
add_vars_prefix(fluid.default_startup_program(), var_prefix)
inputs = {
key: context_prog.global_block().vars[value]
for key, value in inputs.items()
}
outputs = {
key: [
context_prog.global_block().vars[varname]
for varname in value
]
for key, value in outputs.items()
}
for param in context_prog.global_block().iter_parameters():
param.trainable = trainable
return inputs, outputs, context_prog
...@@ -15,6 +15,7 @@ __all__ = ['DarkNet'] ...@@ -15,6 +15,7 @@ __all__ = ['DarkNet']
class DarkNet(object): class DarkNet(object):
"""DarkNet, see https://pjreddie.com/darknet/yolo/ """DarkNet, see https://pjreddie.com/darknet/yolo/
Args: Args:
depth (int): network depth, currently only darknet 53 is supported depth (int): network depth, currently only darknet 53 is supported
norm_type (str): normalization type, 'bn' and 'sync_bn' are supported norm_type (str): normalization type, 'bn' and 'sync_bn' are supported
...@@ -120,11 +121,8 @@ class DarkNet(object): ...@@ -120,11 +121,8 @@ class DarkNet(object):
return out return out
def __call__(self, input): def __call__(self, input):
"""Get the backbone of DarkNet, that is output for the 5 stages. """
Get the backbone of DarkNet, that is output for the 5 stages.
:param input: Variable of input image
:type input: Variable
:Returns: The last variables of each stage.
""" """
stages, block_func = self.depth_cfg[self.depth] stages, block_func = self.depth_cfg[self.depth]
stages = stages[0:5] stages = stages[0:5]
......
...@@ -12,12 +12,15 @@ __all__ = ['reader'] ...@@ -12,12 +12,15 @@ __all__ = ['reader']
def reader(paths=[], images=None): def reader(paths=[], images=None):
"""data generator """
data generator
Args:
paths (list[str]): paths to images.
images (list(numpy.ndarray)): data of images, shape of each is [H, W, C]
:param paths: path to images. Yield:
:type paths: list, each element is a str res (list): preprocessed image and the size of original image.
:param images: data of images, [N, H, W, C]
:type images: numpy.ndarray
""" """
img_list = [] img_list = []
if paths: if paths:
......
# coding=utf-8 # coding=utf-8
import base64
import os import os
import cv2
import numpy as np import numpy as np
from PIL import Image, ImageDraw from PIL import Image, ImageDraw
__all__ = ['load_label_info', 'postprocess'] __all__ = ['base64_to_cv2', 'load_label_info', 'postprocess']
def base64_to_cv2(b64str):
data = base64.b64decode(b64str.encode('utf8'))
data = np.fromstring(data, np.uint8)
data = cv2.imdecode(data, cv2.IMREAD_COLOR)
return data
def check_dir(dir_path): def check_dir(dir_path):
......
import base64
import cv2
import numpy as np
def base64_to_cv2(b64str):
data = base64.b64decode(b64str.encode('utf8'))
data = np.fromstring(data, np.uint8)
data = cv2.imdecode(data, cv2.IMREAD_COLOR)
return data
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册