diff --git a/python/paddle/v2/fluid/layers/detection.py b/python/paddle/v2/fluid/layers/detection.py index b0c25c11debe811ace3854cd5e48e58944196497..cc38796042227a84bd3f1c227d342a8d0872e61e 100644 --- a/python/paddle/v2/fluid/layers/detection.py +++ b/python/paddle/v2/fluid/layers/detection.py @@ -17,11 +17,8 @@ All layers just related to the detection neural network. from ..layer_helper import LayerHelper from ..framework import Variable -from ..param_attr import ParamAttr -from ..framework import Variable -from layer_function_generator import autodoc from tensor import concat -from nn import flatten +from ops import reshape import math __all__ = [ @@ -30,91 +27,6 @@ __all__ = [ ] -def prior_box(input, - image, - min_sizes, - max_sizes, - aspect_ratios, - variance, - flip=False, - clip=False, - step_w=0.0, - step_h=0.0, - offset=0.5, - name=None): - """ - **Prior_box** - - Generate prior boxes for SSD(Single Shot MultiBox Detector) algorithm. - Each position of the input produce N prior boxes, N is determined by - the count of min_sizes, max_sizes and aspect_ratios, The size of the - box is in range(min_size, max_size) interval, which is generated in - sequence according to the aspect_ratios. - - Args: - input(variable): The input feature data of PriorBox, - the layout is NCHW. - image(variable): The input image data of PriorBox, the - layout is NCHW. - min_sizes(list): the min sizes of generated prior boxes. - max_sizes(list): the max sizes of generated prior boxes. - aspect_ratios(list): the aspect ratios of generated prior boxes. - variance(list): the variances to be encoded in prior boxes. - flip(bool, optional, default=False): Whether to flip aspect ratios. - clip(bool, optional, default=False)): Whether to clip - out-of-boundary boxes. - step_w(int, optional, default=0.0): Prior boxes step across - width, 0.0 for auto calculation. - step_h(int, optional, default=0.0): Prior boxes step across - height, 0.0 for auto calculation. - offset(float, optional, default=0.5): Prior boxes center offset. - name(str, optional, default=None): Name of the prior box layer. - - Returns: - boxes(variable): the output prior boxes of PriorBoxOp. The layout is - [H, W, num_priors, 4]. H is the height of input, W is the width - of input, num_priors is the box count of each position. Where num_priors = - len(aspect_ratios) * len(min_sizes) + len(max_sizes) - Variances(variable): the expanded variances of PriorBoxOp. The layout - is [H, W, num_priors, 4]. H is the height of input, W is the width - of input, num_priors is the box count of each position. Where num_priors = - len(aspect_ratios) * len(min_sizes) + len(max_sizes) - Examples: - .. code-block:: python - - data = fluid.layers.data(name="data", shape=[3, 32, 32], dtype="float32") - conv2d = fluid.layers.conv2d( - input=data, num_filters=2, filter_size=3) - box, var = fluid.layers.prior_box(conv2d, data, - min_size, max_size, aspect_ratio, - variance, flip, clip, - step_w, step_h, offset) - """ - helper = LayerHelper("prior_box", **locals()) - dtype = helper.input_dtype() - - box = helper.create_tmp_variable(dtype) - var = helper.create_tmp_variable(dtype) - helper.append_op( - type="prior_box", - inputs={"Input": input, - "Image": image}, - outputs={"Boxes": box, - "Variances": var}, - attrs={ - 'min_sizes': min_sizes, - 'max_sizes': max_sizes, - 'aspect_ratios': aspect_ratios, - 'variances': variance, - 'flip': flip, - 'clip': clip, - 'step_w': step_w, - 'step_h': step_h, - 'offset': offset - }) - return box, var - - def prior_boxes(inputs, image, min_ratio, @@ -128,20 +40,19 @@ def prior_boxes(inputs, variance=[0.1, 0.1, 0.1, 0.1], flip=False, clip=False, + min_sizes=None, + max_sizes=None, name=None): """ **Prior_boxes** Generate prior boxes for SSD(Single Shot MultiBox Detector) algorithm. - Each position of the inputs produces many prior boxes respectly, the number - of prior boxes which is produced by inputs respectly is determined by - the count of min_ratio, max_ratio and aspect_ratios, The size of the - box is in range(min_ratio, max_ratio) interval, which is generated in - sequence according to the aspect_ratios. + The details of this algorithm, please refer the section 2.2 of SSD paper + (SSD: Single Shot MultiBox Detector)`_ . Args: - inputs(list): The list of input variables, the format of all variables is NCHW. - image(variable): The input image data of PriorBoxOp, the layout is NCHW. + inputs(list): The list of input Variables, the format of all Variables is NCHW. + image(Variable): The input image data of PriorBoxOp, the layout is NCHW. min_ratio(int): the min ratio of generated prior boxes. max_ratio(int): the max ratio of generated prior boxes. aspect_ratios(list): the aspect ratios of generated prior boxes. @@ -159,13 +70,17 @@ def prior_boxes(inputs, to be encoded in prior boxes. flip(bool, optional, default=False): Whether to flip aspect ratios. clip(bool, optional, default=False): Whether to clip out-of-boundary boxes. + min_sizes(list, optional, default=None): If `len(inputs) <=2`, min_sizes must + be set up, and the length of min_sizes should equal to the length of inputs. + max_sizes(list, optional, default=None): If `len(inputs) <=2`, max_sizes must + be set up, and the length of min_sizes should equal to the length of inputs. name(str, optional, None): Name of the prior box layer. Returns: - boxes(variable): the output prior boxes of PriorBoxOp. The layout is + boxes(Variable): the output prior boxes of PriorBoxOp. The layout is [num_priors, 4]. num_priors is the total box count of each position of inputs. - Variances(variable): the expanded variances of PriorBoxOp. The layout + Variances(Variable): the expanded variances of PriorBoxOp. The layout is [num_priors, 4]. num_priors is the total box count of each position of inputs @@ -185,13 +100,60 @@ def prior_boxes(inputs, flip=True, clip=True) """ + + def _prior_box_(input, + image, + min_sizes, + max_sizes, + aspect_ratios, + variance, + flip=False, + clip=False, + step_w=0.0, + step_h=0.0, + offset=0.5, + name=None): + helper = LayerHelper("prior_box", **locals()) + dtype = helper.input_dtype() + + box = helper.create_tmp_variable(dtype) + var = helper.create_tmp_variable(dtype) + helper.append_op( + type="prior_box", + inputs={"Input": input, + "Image": image}, + outputs={"Boxes": box, + "Variances": var}, + attrs={ + 'min_sizes': min_sizes, + 'max_sizes': max_sizes, + 'aspect_ratios': aspect_ratios, + 'variances': variance, + 'flip': flip, + 'clip': clip, + 'step_w': step_w, + 'step_h': step_h, + 'offset': offset + }) + return box, var + + def _reshape_with_axis_(input, axis=1): + if not (axis > 0 and axis < len(input.shape)): + raise ValueError( + "The axis should be smaller than the arity of input's shape.") + new_shape = [-1, reduce(mul, input.shape[axis:len(input.shape)], 1)] + out = reshape([input], shape=new_shape) + return out + assert isinstance(inputs, list), 'inputs should be a list.' num_layer = len(inputs) - assert num_layer > 2 # TODO(zcd): currently, num_layer must be bigger than two. - min_sizes = [] - max_sizes = [] - if num_layer > 2: + if num_layer <= 2: + assert min_sizes is not None and max_sizes is not None + assert len(min_sizes) == num_layer and len(max_sizes) == num_layer + else: + min_sizes = [] + max_sizes = [] step = int(math.floor(((max_ratio - min_ratio)) / (num_layer - 2))) for ratio in xrange(min_ratio, max_ratio + 1, step): min_sizes.append(base_size * ratio / 100.) @@ -199,21 +161,29 @@ def prior_boxes(inputs, min_sizes = [base_size * .10] + min_sizes max_sizes = [base_size * .20] + max_sizes + if aspect_ratios: + if not (isinstance(aspect_ratios, list) and + len(aspect_ratios) == num_layer): + raise ValueError( + 'aspect_ratios should be list and the length of inputs ' + 'and aspect_ratios should be the same.') if step_h: - assert isinstance(step_h,list) and len(step_h) == num_layer, \ - 'step_h should be list and inputs and step_h should have same length' + if not (isinstance(step_h, list) and len(step_h) == num_layer): + raise ValueError( + 'step_h should be list and the length of inputs and ' + 'step_h should be the same.') if step_w: - assert isinstance(step_w,list) and len(step_w) == num_layer, \ - 'step_w should be list and inputs and step_w should have same length' + if not (isinstance(step_w, list) and len(step_w) == num_layer): + raise ValueError( + 'step_w should be list and the length of inputs and ' + 'step_w should be the same.') if steps: - assert isinstance(steps,list) and len(steps) == num_layer, \ - 'steps should be list and inputs and step_w should have same length' + if not (isinstance(steps, list) and len(steps) == num_layer): + raise ValueError( + 'steps should be list and the length of inputs and ' + 'step_w should be the same.') step_w = steps step_h = steps - if aspect_ratios: - assert isinstance(aspect_ratios, list) and len(aspect_ratios) == num_layer, \ - 'aspect_ratios should be list and inputs and aspect_ratios should ' \ - 'have same length' box_results = [] var_results = [] @@ -230,10 +200,10 @@ def prior_boxes(inputs, if not isinstance(aspect_ratio, list): aspect_ratio = [aspect_ratio] - box, var = prior_box(input, image, min_size, max_size, aspect_ratio, - variance, flip, clip, step_w[i] - if step_w else 0.0, step_h[i] - if step_w else 0.0, offset) + box, var = _prior_box_(input, image, min_size, max_size, aspect_ratio, + variance, flip, clip, step_w[i] + if step_w else 0.0, step_h[i] + if step_w else 0.0, offset) box_results.append(box) var_results.append(var) @@ -242,17 +212,11 @@ def prior_boxes(inputs, box = box_results[0] var = var_results[0] else: - axis = 3 reshaped_boxes = [] reshaped_vars = [] for i in range(len(box_results)): - reshaped_boxes += [flatten(box_results[i], axis=3)] - reshaped_vars += [flatten(var_results[i], axis=3)] - - helper = LayerHelper("concat", **locals()) - dtype = helper.input_dtype() - box = helper.create_tmp_variable(dtype) - var = helper.create_tmp_variable(dtype) + reshaped_boxes.append(_reshape_with_axis_(box_results[i], axis=3)) + reshaped_vars.append(_reshape_with_axis_(var_results[i], axis=3)) box = concat(reshaped_boxes) var = concat(reshaped_vars) diff --git a/python/paddle/v2/fluid/layers/nn.py b/python/paddle/v2/fluid/layers/nn.py index 4d2de38c35ed60755c1b9f53e8dacafe615fee07..5ebd329fc0285a39111a23b3c58c80944cfe23f6 100644 --- a/python/paddle/v2/fluid/layers/nn.py +++ b/python/paddle/v2/fluid/layers/nn.py @@ -21,8 +21,6 @@ from ..framework import Variable from ..param_attr import ParamAttr from layer_function_generator import autodoc from tensor import concat -import math -from operator import mul __all__ = [ 'fc', @@ -66,8 +64,6 @@ __all__ = [ 'nce', 'beam_search', 'row_conv', - 'reshape_with_axis', - 'flatten', 'multiplex', 'layer_norm', ] @@ -3095,86 +3091,3 @@ def multiplex(inputs, index): 'Ids': index}, outputs={'Out': [out]}) return out - - -def reshape_with_axis(input, axis): - """ - **ReshapeWithAxis Layer** - - ReshapeWithAxis is used to merge adjacent dimensions according to axis. - - Args: - input(variable): The input tensor. - axis(list): The axis which is used to merge the adjacent dimensions. - - Returns: - Variable: A tensor variable. - - Examples: - .. code-block:: python - - x = fluid.layers.data(name="data", shape=[3, 32, 32], dtype="float32") - reshaped = fluid.layers.reshape_with_axis(input=x, axis=[2]) - reshaped.shape - >> [-1, 1024] - reshaped = fluid.layers.reshape_with_axis(input=x, axis=[1,3]) - reshaped.shape - >> [-1, 96, 32] - """ - assert isinstance(axis, list), "axis should be list." - assert len(input.shape) > len( - axis), "the length of axis should be litter than input.shape's." - input_shape = input.shape - temp = 0 - for ax in axis: - assert ax < len(input.shape) and ax > 0, \ - 'The data of Axis should be between 1 and len(input.shape)' - assert ax > temp, 'Axis should be incremented sequence' - temp = ax - axis += [len(input.shape)] - - new_shape = [] - for i in range(len(axis) - 1): - new_shape += [reduce(mul, input_shape[axis[i]:axis[i + 1]], 1)] - new_shape = [-1] + new_shape - - helper = LayerHelper('reshape', **locals()) - out = helper.create_tmp_variable(helper.input_dtype()) - helper.append_op( - type='reshape', - inputs={'X': [input]}, - outputs={'Out': [out]}, - attrs={'shape': new_shape}) - return out - - -def flatten(input, axis=1): - """ - **Flatten Layer** - ReshapeWithAxis is used to merge adjacent dimensions according to axis. - Args: - input(variable): The input tensor. - axis(int): - Returns: - Variable: A tensor variable. - Examples: - .. code-block:: python - x = fluid.layers.data(name="data", shape=[3, 32, 32], dtype="float32") - reshaped = fluid.layers.reshape_with_axis(input=x, axis=2) - reshaped.shape - >> [-1, 1024] - """ - assert len(input.shape) > axis and axis > 0, \ - "the axis should be litter than input.shape's." - input_shape = input.shape - - new_shape = [-1, reduce(mul, input_shape[axis:len(input_shape)], 1)] - - helper = LayerHelper('reshape', **locals()) - out = helper.create_tmp_variable(helper.input_dtype()) - helper.append_op( - type='reshape', - inputs={'X': [input]}, - outputs={'Out': [out]}, - attrs={'shape': new_shape}) - return out diff --git a/python/paddle/v2/fluid/tests/object_detection/test_prior_boxes.py b/python/paddle/v2/fluid/tests/object_detection/test_prior_boxes.py index 50b5249d985569ac657ded5407f91f9a466c966b..1b093c6463e038fc58a1bb920e2056ed3b27e540 100644 --- a/python/paddle/v2/fluid/tests/object_detection/test_prior_boxes.py +++ b/python/paddle/v2/fluid/tests/object_detection/test_prior_boxes.py @@ -51,15 +51,15 @@ def main(use_cuda): if use_cuda: # prior_box only support CPU. return - box, var = prior_box_output(data_shape=[3, 224, 224]) + data_shape = [3, 224, 224] + box, var = prior_box_output(data_shape) place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) batch = [128] - for i in range(1): - # print("iteration : %d" % i) + for _ in range(1): x = np.random.random(batch + data_shape).astype("float32") tensor_x = core.LoDTensor() tensor_x.set(x, place) @@ -75,12 +75,10 @@ def main(use_cuda): class TestFitALine(unittest.TestCase): def test_cpu(self): - with self.program_scope_guard(): - main(use_cuda=False) + main(use_cuda=False) def test_cuda(self): - with self.program_scope_guard(): - main(use_cuda=True) + main(use_cuda=True) if __name__ == '__main__':