# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """ All layers just related to the detection neural network. """ from ..layer_helper import LayerHelper from ..framework import Variable from tensor import concat from ops import reshape import math __all__ = [ 'prior_box', 'prior_boxes', ] def prior_boxes(inputs, image, min_ratio, max_ratio, aspect_ratios, base_size, steps=None, step_w=None, step_h=None, offset=0.5, variance=[0.1, 0.1, 0.1, 0.1], flip=False, clip=False, min_sizes=None, max_sizes=None, name=None): """ **Prior_boxes** Generate prior boxes for SSD(Single Shot MultiBox Detector) algorithm. The details of this algorithm, please refer the section 2.2 of SSD paper (SSD: Single Shot MultiBox Detector)`_ . Args: inputs(list): The list of input Variables, the format of all Variables is NCHW. image(Variable): The input image data of PriorBoxOp, the layout is NCHW. min_ratio(int): the min ratio of generated prior boxes. max_ratio(int): the max ratio of generated prior boxes. aspect_ratios(list): the aspect ratios of generated prior boxes. The length of input and aspect_ratios must be equal. base_size(int): the base_size is used to get min_size and max_size according to min_ratio and max_ratio. step_w(list, optional, default=None): Prior boxes step across width. If step_w[i] == 0.0, the prior boxes step across width of the inputs[i] will be automatically calculated. step_h(list, optional, default=None): Prior boxes step across height, If step_h[i] == 0.0, the prior boxes step across height of the inputs[i] will be automatically calculated. offset(float, optional, default=0.5): Prior boxes center offset. variance(list, optional, default=[0.1, 0.1, 0.1, 0.1]): the variances to be encoded in prior boxes. flip(bool, optional, default=False): Whether to flip aspect ratios. clip(bool, optional, default=False): Whether to clip out-of-boundary boxes. min_sizes(list, optional, default=None): If `len(inputs) <=2`, min_sizes must be set up, and the length of min_sizes should equal to the length of inputs. max_sizes(list, optional, default=None): If `len(inputs) <=2`, max_sizes must be set up, and the length of min_sizes should equal to the length of inputs. name(str, optional, None): Name of the prior box layer. Returns: boxes(Variable): the output prior boxes of PriorBoxOp. The layout is [num_priors, 4]. num_priors is the total box count of each position of inputs. Variances(Variable): the expanded variances of PriorBoxOp. The layout is [num_priors, 4]. num_priors is the total box count of each position of inputs Examples: .. code-block:: python prior_boxes( inputs = [conv1, conv2, conv3, conv4, conv5, conv6], image = data, min_ratio = 20, # 0.20 max_ratio = 90, # 0.90 steps = [8., 16., 32., 64., 100., 300.], aspect_ratios = [[2.], [2., 3.], [2., 3.], [2., 3.], [2.], [2.]], base_size = 300, offset = 0.5, variance = [0.1,0.1,0.1,0.1], flip=True, clip=True) """ def _prior_box_(input, image, min_sizes, max_sizes, aspect_ratios, variance, flip=False, clip=False, step_w=0.0, step_h=0.0, offset=0.5, name=None): helper = LayerHelper("prior_box", **locals()) dtype = helper.input_dtype() box = helper.create_tmp_variable(dtype) var = helper.create_tmp_variable(dtype) helper.append_op( type="prior_box", inputs={"Input": input, "Image": image}, outputs={"Boxes": box, "Variances": var}, attrs={ 'min_sizes': min_sizes, 'max_sizes': max_sizes, 'aspect_ratios': aspect_ratios, 'variances': variance, 'flip': flip, 'clip': clip, 'step_w': step_w, 'step_h': step_h, 'offset': offset }) return box, var def _reshape_with_axis_(input, axis=1): if not (axis > 0 and axis < len(input.shape)): raise ValueError( "The axis should be smaller than the arity of input's shape.") new_shape = [-1, reduce(mul, input.shape[axis:len(input.shape)], 1)] out = reshape([input], shape=new_shape) return out assert isinstance(inputs, list), 'inputs should be a list.' num_layer = len(inputs) if num_layer <= 2: assert min_sizes is not None and max_sizes is not None assert len(min_sizes) == num_layer and len(max_sizes) == num_layer else: min_sizes = [] max_sizes = [] step = int(math.floor(((max_ratio - min_ratio)) / (num_layer - 2))) for ratio in xrange(min_ratio, max_ratio + 1, step): min_sizes.append(base_size * ratio / 100.) max_sizes.append(base_size * (ratio + step) / 100.) min_sizes = [base_size * .10] + min_sizes max_sizes = [base_size * .20] + max_sizes if aspect_ratios: if not (isinstance(aspect_ratios, list) and len(aspect_ratios) == num_layer): raise ValueError( 'aspect_ratios should be list and the length of inputs ' 'and aspect_ratios should be the same.') if step_h: if not (isinstance(step_h, list) and len(step_h) == num_layer): raise ValueError( 'step_h should be list and the length of inputs and ' 'step_h should be the same.') if step_w: if not (isinstance(step_w, list) and len(step_w) == num_layer): raise ValueError( 'step_w should be list and the length of inputs and ' 'step_w should be the same.') if steps: if not (isinstance(steps, list) and len(steps) == num_layer): raise ValueError( 'steps should be list and the length of inputs and ' 'step_w should be the same.') step_w = steps step_h = steps box_results = [] var_results = [] for i, input in enumerate(inputs): min_size = min_sizes[i] max_size = max_sizes[i] aspect_ratio = [] if not isinstance(min_size, list): min_size = [min_size] if not isinstance(max_size, list): max_size = [max_size] if aspect_ratios: aspect_ratio = aspect_ratios[i] if not isinstance(aspect_ratio, list): aspect_ratio = [aspect_ratio] box, var = _prior_box_(input, image, min_size, max_size, aspect_ratio, variance, flip, clip, step_w[i] if step_w else 0.0, step_h[i] if step_w else 0.0, offset) box_results.append(box) var_results.append(var) if len(box_results) == 1: box = box_results[0] var = var_results[0] else: reshaped_boxes = [] reshaped_vars = [] for i in range(len(box_results)): reshaped_boxes.append(_reshape_with_axis_(box_results[i], axis=3)) reshaped_vars.append(_reshape_with_axis_(var_results[i], axis=3)) box = concat(reshaped_boxes) var = concat(reshaped_vars) return box, var