diff --git a/paddle/fluid/operators/prior_box_op.cc b/paddle/fluid/operators/prior_box_op.cc index ed48603e17f38f89705186fb9fb992f69d26d2ff..1385a6cdce838b7f376cd784a8eaa63f591c7ef2 100644 --- a/paddle/fluid/operators/prior_box_op.cc +++ b/paddle/fluid/operators/prior_box_op.cc @@ -38,8 +38,8 @@ class PriorBoxOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_LT(input_dims[3], image_dims[3], "The width of input must smaller than image."); - auto min_sizes = ctx->Attrs().Get>("min_sizes"); - auto max_sizes = ctx->Attrs().Get>("max_sizes"); + auto min_sizes = ctx->Attrs().Get>("min_sizes"); + auto max_sizes = ctx->Attrs().Get>("max_sizes"); auto variances = ctx->Attrs().Get>("variances"); auto aspect_ratios = ctx->Attrs().Get>("aspect_ratios"); bool flip = ctx->Attrs().Get("flip"); @@ -47,15 +47,15 @@ class PriorBoxOp : public framework::OperatorWithKernel { std::vector aspect_ratios_vec; ExpandAspectRatios(aspect_ratios, flip, aspect_ratios_vec); - int num_priors = aspect_ratios_vec.size() * min_sizes.size(); + size_t num_priors = aspect_ratios_vec.size() * min_sizes.size(); if (max_sizes.size() > 0) { PADDLE_ENFORCE_EQ(max_sizes.size(), min_sizes.size(), "The number of min_size and max_size must be equal."); - for (size_t i = 0; i < min_sizes.size(); ++i) { + num_priors += max_sizes.size(); + for (size_t i = 0; i < max_sizes.size(); ++i) { PADDLE_ENFORCE_GT(max_sizes[i], min_sizes[i], "max_size[%d] must be greater than min_size[%d].", i, i); - num_priors += 1; } } @@ -90,20 +90,20 @@ class PriorBoxOpMaker : public framework::OpProtoAndCheckerMaker { "H is the height of input, W is the width of input, num_priors " "is the box count of each position."); - AddAttr>("min_sizes", - "(vector) List of min sizes " - "of generated prior boxes.") - .AddCustomChecker([](const std::vector& min_sizes) { + AddAttr>("min_sizes", + "(vector) List of min sizes " + "of generated prior boxes.") + .AddCustomChecker([](const std::vector& min_sizes) { PADDLE_ENFORCE_GT(min_sizes.size(), 0, "Size of min_sizes must be at least 1."); for (size_t i = 0; i < min_sizes.size(); ++i) { - PADDLE_ENFORCE_GT(min_sizes[i], 0, + PADDLE_ENFORCE_GT(min_sizes[i], 0.0, "min_sizes[%d] must be positive.", i); } }); - AddAttr>( + AddAttr>( "max_sizes", - "(vector) List of max sizes of generated prior boxes."); + "(vector) List of max sizes of generated prior boxes."); AddAttr>( "aspect_ratios", "(vector) List of aspect ratios of generated prior boxes."); @@ -125,16 +125,16 @@ class PriorBoxOpMaker : public framework::OpProtoAndCheckerMaker { .SetDefault(true); AddAttr("step_w", - "Prior boxes step across width, 0 for auto calculation.") + "Prior boxes step across width, 0.0 for auto calculation.") .SetDefault(0.0) .AddCustomChecker([](const float& step_w) { - PADDLE_ENFORCE_GT(step_w, 0.0, "step_w should be larger than 0."); + PADDLE_ENFORCE_GE(step_w, 0.0, "step_w should be larger than 0."); }); AddAttr("step_h", - "Prior boxes step across height, 0 for auto calculation.") + "Prior boxes step across height, 0.0 for auto calculation.") .SetDefault(0.0) .AddCustomChecker([](const float& step_h) { - PADDLE_ENFORCE_GT(step_h, 0.0, "step_h should be larger than 0."); + PADDLE_ENFORCE_GE(step_h, 0.0, "step_h should be larger than 0."); }); AddAttr("offset", diff --git a/paddle/fluid/operators/prior_box_op.h b/paddle/fluid/operators/prior_box_op.h index fd07041233495660605e9cf9acb33d57eb57bc30..e2c9514ed0814f21fec6c4184b7e971c4528d489 100644 --- a/paddle/fluid/operators/prior_box_op.h +++ b/paddle/fluid/operators/prior_box_op.h @@ -60,8 +60,8 @@ class PriorBoxOpKernel : public framework::OpKernel { auto* boxes = ctx.Output("Boxes"); auto* vars = ctx.Output("Variances"); - auto min_sizes = ctx.Attr>("min_sizes"); - auto max_sizes = ctx.Attr>("max_sizes"); + auto min_sizes = ctx.Attr>("min_sizes"); + auto max_sizes = ctx.Attr>("max_sizes"); auto input_aspect_ratio = ctx.Attr>("aspect_ratios"); auto variances = ctx.Attr>("variances"); auto flip = ctx.Attr("flip"); @@ -108,7 +108,7 @@ class PriorBoxOpKernel : public framework::OpKernel { T box_width, box_height; int idx = 0; for (size_t s = 0; s < min_sizes.size(); ++s) { - int min_size = min_sizes[s]; + auto min_size = min_sizes[s]; // first prior: aspect_ratio = 1, size = min_size box_width = box_height = min_size; // xmin @@ -124,7 +124,7 @@ class PriorBoxOpKernel : public framework::OpKernel { idx++; if (max_sizes.size() > 0) { - int max_size = max_sizes[s]; + auto max_size = max_sizes[s]; // second prior: aspect_ratio = 1, // size = sqrt(min_size * max_size) box_width = box_height = sqrt(min_size * max_size); diff --git a/python/paddle/v2/fluid/layers/__init__.py b/python/paddle/v2/fluid/layers/__init__.py index 89b9f30668ee3ed84a9b728932c4ba0227e454b3..cfbbf710b6ac63b9a0fe7d51b0d1940532e948fc 100644 --- a/python/paddle/v2/fluid/layers/__init__.py +++ b/python/paddle/v2/fluid/layers/__init__.py @@ -28,8 +28,11 @@ import device from device import * import math_op_patch from math_op_patch import * +import detection +from detection import * __all__ = [] +__all__ += math_op_patch.__all__ __all__ += detection.__all__ __all__ += nn.__all__ __all__ += io.__all__ @@ -37,4 +40,4 @@ __all__ += tensor.__all__ __all__ += control_flow.__all__ __all__ += ops.__all__ __all__ += device.__all__ -__all__ += math_op_patch.__all__ +__all__ += detection.__all__ diff --git a/python/paddle/v2/fluid/layers/detection.py b/python/paddle/v2/fluid/layers/detection.py index 054443cb435fa2578b3056995e8a7b7b7eba83ff..0f3256d7652b25845fe577838030009a45ed16cd 100644 --- a/python/paddle/v2/fluid/layers/detection.py +++ b/python/paddle/v2/fluid/layers/detection.py @@ -1,4 +1,4 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -16,8 +16,15 @@ All layers just related to the detection neural network. """ from ..layer_helper import LayerHelper +from ..framework import Variable +from tensor import concat +from ops import reshape +import math -__all__ = ['detection_output', ] +__all__ = [ + 'detection_output', + 'prior_box', +] def detection_output(scores, @@ -114,3 +121,208 @@ def detection_output(scores, 'nms_eta': 1.0 }) return nmsed_outs + + +def prior_box(inputs, + image, + min_ratio, + max_ratio, + aspect_ratios, + base_size, + steps=None, + step_w=None, + step_h=None, + offset=0.5, + variance=[0.1, 0.1, 0.1, 0.1], + flip=False, + clip=False, + min_sizes=None, + max_sizes=None, + name=None): + """ + **Prior_boxes** + + Generate prior boxes for SSD(Single Shot MultiBox Detector) + algorithm. The details of this algorithm, please refer the + section 2.2 of SSD paper (SSD: Single Shot MultiBox Detector) + `_ . + + Args: + inputs(list): The list of input Variables, the format + of all Variables is NCHW. + image(Variable): The input image data of PriorBoxOp, + the layout is NCHW. + min_ratio(int): the min ratio of generated prior boxes. + max_ratio(int): the max ratio of generated prior boxes. + aspect_ratios(list): the aspect ratios of generated prior + boxes. The length of input and aspect_ratios must be equal. + base_size(int): the base_size is used to get min_size + and max_size according to min_ratio and max_ratio. + step_w(list, optional, default=None): Prior boxes step + across width. If step_w[i] == 0.0, the prior boxes step + across width of the inputs[i] will be automatically calculated. + step_h(list, optional, default=None): Prior boxes step + across height, If step_h[i] == 0.0, the prior boxes + step across height of the inputs[i] will be automatically calculated. + offset(float, optional, default=0.5): Prior boxes center offset. + variance(list, optional, default=[0.1, 0.1, 0.1, 0.1]): the variances + to be encoded in prior boxes. + flip(bool, optional, default=False): Whether to flip + aspect ratios. + clip(bool, optional, default=False): Whether to clip + out-of-boundary boxes. + min_sizes(list, optional, default=None): If `len(inputs) <=2`, + min_sizes must be set up, and the length of min_sizes + should equal to the length of inputs. + max_sizes(list, optional, default=None): If `len(inputs) <=2`, + max_sizes must be set up, and the length of min_sizes + should equal to the length of inputs. + name(str, optional, None): Name of the prior box layer. + + Returns: + boxes(Variable): the output prior boxes of PriorBoxOp. + The layout is [num_priors, 4]. num_priors is the total + box count of each position of inputs. + Variances(Variable): the expanded variances of PriorBoxOp. + The layout is [num_priors, 4]. num_priors is the total + box count of each position of inputs + + Examples: + .. code-block:: python + + prior_box( + inputs = [conv1, conv2, conv3, conv4, conv5, conv6], + image = data, + min_ratio = 20, # 0.20 + max_ratio = 90, # 0.90 + offset = 0.5, + base_size = 300, + variance = [0.1,0.1,0.1,0.1], + aspect_ratios = [[2.], [2., 3.], [2., 3.], [2., 3.], [2.], [2.]], + flip=True, + clip=True) + """ + + def _prior_box_(input, + image, + min_sizes, + max_sizes, + aspect_ratios, + variance, + flip=False, + clip=False, + step_w=0.0, + step_h=0.0, + offset=0.5, + name=None): + helper = LayerHelper("prior_box", **locals()) + dtype = helper.input_dtype() + + box = helper.create_tmp_variable(dtype) + var = helper.create_tmp_variable(dtype) + helper.append_op( + type="prior_box", + inputs={"Input": input, + "Image": image}, + outputs={"Boxes": box, + "Variances": var}, + attrs={ + 'min_sizes': min_sizes, + 'max_sizes': max_sizes, + 'aspect_ratios': aspect_ratios, + 'variances': variance, + 'flip': flip, + 'clip': clip, + 'step_w': step_w, + 'step_h': step_h, + 'offset': offset + }) + return box, var + + def _reshape_with_axis_(input, axis=1): + if not (axis > 0 and axis < len(input.shape)): + raise ValueError("The axis should be smaller than " + "the arity of input and bigger than 0.") + new_shape = [ + -1, reduce(lambda x, y: x * y, input.shape[axis:len(input.shape)]) + ] + out = reshape(x=input, shape=new_shape) + return out + + assert isinstance(inputs, list), 'inputs should be a list.' + num_layer = len(inputs) + + if num_layer <= 2: + assert min_sizes is not None and max_sizes is not None + assert len(min_sizes) == num_layer and len(max_sizes) == num_layer + else: + min_sizes = [] + max_sizes = [] + step = int(math.floor(((max_ratio - min_ratio)) / (num_layer - 2))) + for ratio in xrange(min_ratio, max_ratio + 1, step): + min_sizes.append(base_size * ratio / 100.) + max_sizes.append(base_size * (ratio + step) / 100.) + min_sizes = [base_size * .10] + min_sizes + max_sizes = [base_size * .20] + max_sizes + + if aspect_ratios: + if not (isinstance(aspect_ratios, list) and + len(aspect_ratios) == num_layer): + raise ValueError( + 'aspect_ratios should be list and the length of inputs ' + 'and aspect_ratios should be the same.') + if step_h: + if not (isinstance(step_h, list) and len(step_h) == num_layer): + raise ValueError( + 'step_h should be list and the length of inputs and ' + 'step_h should be the same.') + if step_w: + if not (isinstance(step_w, list) and len(step_w) == num_layer): + raise ValueError( + 'step_w should be list and the length of inputs and ' + 'step_w should be the same.') + if steps: + if not (isinstance(steps, list) and len(steps) == num_layer): + raise ValueError( + 'steps should be list and the length of inputs and ' + 'step_w should be the same.') + step_w = steps + step_h = steps + + box_results = [] + var_results = [] + for i, input in enumerate(inputs): + min_size = min_sizes[i] + max_size = max_sizes[i] + aspect_ratio = [] + if not isinstance(min_size, list): + min_size = [min_size] + if not isinstance(max_size, list): + max_size = [max_size] + if aspect_ratios: + aspect_ratio = aspect_ratios[i] + if not isinstance(aspect_ratio, list): + aspect_ratio = [aspect_ratio] + + box, var = _prior_box_(input, image, min_size, max_size, aspect_ratio, + variance, flip, clip, step_w[i] + if step_w else 0.0, step_h[i] + if step_w else 0.0, offset) + + box_results.append(box) + var_results.append(var) + + if len(box_results) == 1: + box = box_results[0] + var = var_results[0] + else: + reshaped_boxes = [] + reshaped_vars = [] + for i in range(len(box_results)): + reshaped_boxes.append(_reshape_with_axis_(box_results[i], axis=3)) + reshaped_vars.append(_reshape_with_axis_(var_results[i], axis=3)) + + box = concat(reshaped_boxes) + var = concat(reshaped_vars) + + return box, var diff --git a/python/paddle/v2/fluid/tests/test_detection.py b/python/paddle/v2/fluid/tests/test_detection.py index 75498ad7703614d2438ce7a521b8d1bc53c70f4b..fecc2a6226f5655adcb60ac3efef8fc26eec8ba2 100644 --- a/python/paddle/v2/fluid/tests/test_detection.py +++ b/python/paddle/v2/fluid/tests/test_detection.py @@ -13,10 +13,13 @@ # limitations under the License. from __future__ import print_function -import unittest - +import paddle.v2.fluid as fluid +import paddle.v2.fluid.core as core import paddle.v2.fluid.layers as layers +import paddle.v2.fluid.layers.detection as detection from paddle.v2.fluid.framework import Program, program_guard +import unittest +import numpy as np class TestBook(unittest.TestCase): @@ -49,5 +52,62 @@ class TestBook(unittest.TestCase): print(str(program)) +class TestPriorBox(unittest.TestCase): + def test_prior_box(self): + data_shape = [3, 224, 224] + box, var = self.prior_box_output(data_shape) + + assert len(box.shape) == 2 + assert box.shape == var.shape + assert box.shape[1] == 4 + + def prior_box_output(self, data_shape): + images = fluid.layers.data( + name='pixel', shape=data_shape, dtype='float32') + conv1 = fluid.layers.conv2d( + input=images, + num_filters=3, + filter_size=3, + stride=2, + use_cudnn=False) + conv2 = fluid.layers.conv2d( + input=conv1, + num_filters=3, + filter_size=3, + stride=2, + use_cudnn=False) + conv3 = fluid.layers.conv2d( + input=conv2, + num_filters=3, + filter_size=3, + stride=2, + use_cudnn=False) + conv4 = fluid.layers.conv2d( + input=conv3, + num_filters=3, + filter_size=3, + stride=2, + use_cudnn=False) + conv5 = fluid.layers.conv2d( + input=conv4, + num_filters=3, + filter_size=3, + stride=2, + use_cudnn=False) + + box, var = detection.prior_box( + inputs=[conv1, conv2, conv3, conv4, conv5, conv5], + image=images, + min_ratio=20, + max_ratio=90, + # steps=[8, 16, 32, 64, 100, 300], + aspect_ratios=[[2.], [2., 3.], [2., 3.], [2., 3.], [2.], [2.]], + base_size=300, + offset=0.5, + flip=True, + clip=True) + return box, var + + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/v2/fluid/tests/test_prior_box_op.py b/python/paddle/v2/fluid/tests/test_prior_box_op.py index ca8d2bca74ce2d4be8160c8851e393489691ae56..a6c21af49f63269720156ec833c94688d0e3230e 100644 --- a/python/paddle/v2/fluid/tests/test_prior_box_op.py +++ b/python/paddle/v2/fluid/tests/test_prior_box_op.py @@ -65,9 +65,9 @@ class TestPriorBoxOp(OpTest): self.batch_size = 10 self.min_sizes = [2, 4] - self.min_sizes = np.array(self.min_sizes).astype('int64') + self.min_sizes = np.array(self.min_sizes).astype('float32').tolist() self.max_sizes = [5, 10] - self.max_sizes = np.array(self.max_sizes).astype('int64') + self.max_sizes = np.array(self.max_sizes).astype('float32').tolist() self.aspect_ratios = [2.0, 3.0] self.flip = True self.real_aspect_ratios = [1, 2.0, 1.0 / 2.0, 3.0, 1.0 / 3.0]