From e9fa7a7b3a29f68c374e239054f261b41c03c985 Mon Sep 17 00:00:00 2001 From: chengduoZH Date: Mon, 12 Feb 2018 14:03:59 +0800 Subject: [PATCH] follow comments of qingqing and code refine --- python/paddle/v2/fluid/layers/detection.py | 74 ++++++++++++------ .../paddle/v2/fluid/tests/test_detection.py | 75 ++++--------------- 2 files changed, 66 insertions(+), 83 deletions(-) diff --git a/python/paddle/v2/fluid/layers/detection.py b/python/paddle/v2/fluid/layers/detection.py index b045e1c56c..aab9f032bd 100644 --- a/python/paddle/v2/fluid/layers/detection.py +++ b/python/paddle/v2/fluid/layers/detection.py @@ -18,10 +18,9 @@ All layers just related to the detection neural network. from ..layer_helper import LayerHelper from ..param_attr import ParamAttr from ..framework import Variable -from ..nets import img_conv_with_bn -from tensor import concat -from ops import reshape -from nn import transpose +import tensor +import ops +import nn import math __all__ = [ @@ -184,10 +183,10 @@ def prior_box(inputs, name(str, optional, None): Name of the prior box layer. Returns: - boxes(Variable): the output prior boxes of PriorBoxOp. + boxes(Variable): the output prior boxes of PriorBox. The layout is [num_priors, 4]. num_priors is the total box count of each position of inputs. - Variances(Variable): the expanded variances of PriorBoxOp. + Variances(Variable): the expanded variances of PriorBox. The layout is [num_priors, 4]. num_priors is the total box count of each position of inputs @@ -250,7 +249,7 @@ def prior_box(inputs, new_shape = [ -1, reduce(lambda x, y: x * y, input.shape[axis:len(input.shape)]) ] - out = reshape(x=input, shape=new_shape) + out = ops.reshape(x=input, shape=new_shape) return out assert isinstance(inputs, list), 'inputs should be a list.' @@ -326,8 +325,8 @@ def prior_box(inputs, reshaped_boxes.append(_reshape_with_axis_(box_results[i], axis=3)) reshaped_vars.append(_reshape_with_axis_(var_results[i], axis=3)) - box = concat(reshaped_boxes) - var = concat(reshaped_vars) + box = tensor.concat(reshaped_boxes) + var = tensor.concat(reshaped_vars) return box, var @@ -345,12 +344,14 @@ def multi_box_head(inputs, pad=1, stride=1, use_batchnorm=False, - base_size=None, - name=None): + base_size=None): """ **Multi Box Head** - input many Variable, and return mbox_loc, mbox_conf + Generate prior boxes' location and confidence for SSD(Single + Shot MultiBox Detector)algorithm. The details of this algorithm, + please refer the section 2.1 of SSD paper (SSD: Single Shot + MultiBox Detector)`_ . Args: inputs(list): The list of input Variables, the format @@ -376,12 +377,12 @@ def multi_box_head(inputs, Returns: - mbox_loc(list): the output prior boxes of PriorBoxOp. The layout is - [num_priors, 4]. num_priors is the total box count of each - position of inputs. - mbox_conf(list): the expanded variances of PriorBoxOp. The layout - is [num_priors, 4]. num_priors is the total box count of each - position of inputs + mbox_loc(list): The predicted boxes' location of the inputs. + The layout of each element is [N, H, W, Priors]. Priors + is the number of predicted boxof each position of each input. + mbox_conf(list): The predicted boxes' confidence of the inputs. + The layout of each element is [N, H, W, Priors]. Priors + is the number of predicted box of each position of each input. Examples: .. code-block:: python @@ -396,6 +397,35 @@ def multi_box_head(inputs, flip=True) """ + def _conv_with_bn_(input, + conv_num_filter, + conv_padding=1, + conv_filter_size=3, + conv_stride=1, + conv_act=None, + param_attr=None, + conv_with_batchnorm=False, + conv_batchnorm_drop_rate=0.0, + use_cudnn=True): + + conv2d = nn.conv2d( + input=input, + num_filters=conv_num_filter, + filter_size=conv_filter_size, + padding=conv_padding, + stride=conv_stride, + param_attr=param_attr, + act=conv_act, + use_cudnn=use_cudnn) + + if conv_with_batchnorm: + conv2d = nn.batch_norm(input=conv2d) + drop_rate = conv_batchnorm_drop_rate + if abs(drop_rate) > 1e-5: + conv2d = nn.dropout(x=conv2d, dropout_prob=drop_rate) + + return conv2d + if not (isinstance(inputs, list)): raise ValueError('inputs should be a list.') @@ -469,26 +499,26 @@ def multi_box_head(inputs, if share_location: num_loc_output *= num_classes - mbox_loc = img_conv_with_bn( + mbox_loc = _conv_with_bn_( input=input, conv_num_filter=num_loc_output, conv_padding=pad, conv_stride=stride, conv_filter_size=kernel_size, conv_with_batchnorm=use_batchnorm) - mbox_loc = transpose(mbox_loc, perm=[0, 2, 3, 1]) + mbox_loc = nn.transpose(mbox_loc, perm=[0, 2, 3, 1]) mbox_locs.append(mbox_loc) # get conf_loc num_conf_output = num_priors_per_location * num_classes - conf_loc = img_conv_with_bn( + conf_loc = _conv_with_bn_( input=input, conv_num_filter=num_conf_output, conv_padding=pad, conv_stride=stride, conv_filter_size=kernel_size, conv_with_batchnorm=use_batchnorm) - conf_loc = transpose(conf_loc, perm=[0, 2, 3, 1]) + conf_loc = nn.transpose(conf_loc, perm=[0, 2, 3, 1]) mbox_confs.append(conf_loc) return mbox_locs, mbox_confs diff --git a/python/paddle/v2/fluid/tests/test_detection.py b/python/paddle/v2/fluid/tests/test_detection.py index d50efb3f74..2f1ecd6677 100644 --- a/python/paddle/v2/fluid/tests/test_detection.py +++ b/python/paddle/v2/fluid/tests/test_detection.py @@ -47,7 +47,7 @@ class TestBook(unittest.TestCase): out = layers.detection_output( scores=scores, loc=loc, prior_box=pb, prior_box_var=pbv) self.assertIsNotNone(out) - print(str(program)) + # print(str(program)) class TestPriorBox(unittest.TestCase): @@ -62,36 +62,11 @@ class TestPriorBox(unittest.TestCase): def prior_box_output(self, data_shape): images = fluid.layers.data( name='pixel', shape=data_shape, dtype='float32') - conv1 = fluid.layers.conv2d( - input=images, - num_filters=3, - filter_size=3, - stride=2, - use_cudnn=False) - conv2 = fluid.layers.conv2d( - input=conv1, - num_filters=3, - filter_size=3, - stride=2, - use_cudnn=False) - conv3 = fluid.layers.conv2d( - input=conv2, - num_filters=3, - filter_size=3, - stride=2, - use_cudnn=False) - conv4 = fluid.layers.conv2d( - input=conv3, - num_filters=3, - filter_size=3, - stride=2, - use_cudnn=False) - conv5 = fluid.layers.conv2d( - input=conv4, - num_filters=3, - filter_size=3, - stride=2, - use_cudnn=False) + conv1 = fluid.layers.conv2d(images, 3, 3, 2) + conv2 = fluid.layers.conv2d(conv1, 3, 3, 2) + conv3 = fluid.layers.conv2d(conv2, 3, 3, 2) + conv4 = fluid.layers.conv2d(conv3, 3, 3, 2) + conv5 = fluid.layers.conv2d(conv4, 3, 3, 2) box, var = detection.prior_box( inputs=[conv1, conv2, conv3, conv4, conv5, conv5], @@ -112,39 +87,17 @@ class TestMultiBoxHead(unittest.TestCase): data_shape = [3, 224, 224] mbox_locs, mbox_confs = self.multi_box_output(data_shape) + for loc, conf in zip(mbox_locs, mbox_confs): + assert loc.shape[1:3] == conf.shape[1:3] + def multi_box_output(self, data_shape): images = fluid.layers.data( name='pixel', shape=data_shape, dtype='float32') - conv1 = fluid.layers.conv2d( - input=images, - num_filters=3, - filter_size=3, - stride=2, - use_cudnn=False) - conv2 = fluid.layers.conv2d( - input=conv1, - num_filters=3, - filter_size=3, - stride=2, - use_cudnn=False) - conv3 = fluid.layers.conv2d( - input=conv2, - num_filters=3, - filter_size=3, - stride=2, - use_cudnn=False) - conv4 = fluid.layers.conv2d( - input=conv3, - num_filters=3, - filter_size=3, - stride=2, - use_cudnn=False) - conv5 = fluid.layers.conv2d( - input=conv4, - num_filters=3, - filter_size=3, - stride=2, - use_cudnn=False) + conv1 = fluid.layers.conv2d(images, 3, 3, 2) + conv2 = fluid.layers.conv2d(conv1, 3, 3, 2) + conv3 = fluid.layers.conv2d(conv2, 3, 3, 2) + conv4 = fluid.layers.conv2d(conv3, 3, 3, 2) + conv5 = fluid.layers.conv2d(conv4, 3, 3, 2) mbox_locs, mbox_confs = detection.multi_box_head( inputs=[conv1, conv2, conv3, conv4, conv5, conv5], -- GitLab