提交 e9fa7a7b 编写于 作者: C chengduoZH

follow comments of qingqing and code refine

上级 99c9dbf5
...@@ -18,10 +18,9 @@ All layers just related to the detection neural network. ...@@ -18,10 +18,9 @@ All layers just related to the detection neural network.
from ..layer_helper import LayerHelper from ..layer_helper import LayerHelper
from ..param_attr import ParamAttr from ..param_attr import ParamAttr
from ..framework import Variable from ..framework import Variable
from ..nets import img_conv_with_bn import tensor
from tensor import concat import ops
from ops import reshape import nn
from nn import transpose
import math import math
__all__ = [ __all__ = [
...@@ -184,10 +183,10 @@ def prior_box(inputs, ...@@ -184,10 +183,10 @@ def prior_box(inputs,
name(str, optional, None): Name of the prior box layer. name(str, optional, None): Name of the prior box layer.
Returns: Returns:
boxes(Variable): the output prior boxes of PriorBoxOp. boxes(Variable): the output prior boxes of PriorBox.
The layout is [num_priors, 4]. num_priors is the total The layout is [num_priors, 4]. num_priors is the total
box count of each position of inputs. box count of each position of inputs.
Variances(Variable): the expanded variances of PriorBoxOp. Variances(Variable): the expanded variances of PriorBox.
The layout is [num_priors, 4]. num_priors is the total The layout is [num_priors, 4]. num_priors is the total
box count of each position of inputs box count of each position of inputs
...@@ -250,7 +249,7 @@ def prior_box(inputs, ...@@ -250,7 +249,7 @@ def prior_box(inputs,
new_shape = [ new_shape = [
-1, reduce(lambda x, y: x * y, input.shape[axis:len(input.shape)]) -1, reduce(lambda x, y: x * y, input.shape[axis:len(input.shape)])
] ]
out = reshape(x=input, shape=new_shape) out = ops.reshape(x=input, shape=new_shape)
return out return out
assert isinstance(inputs, list), 'inputs should be a list.' assert isinstance(inputs, list), 'inputs should be a list.'
...@@ -326,8 +325,8 @@ def prior_box(inputs, ...@@ -326,8 +325,8 @@ def prior_box(inputs,
reshaped_boxes.append(_reshape_with_axis_(box_results[i], axis=3)) reshaped_boxes.append(_reshape_with_axis_(box_results[i], axis=3))
reshaped_vars.append(_reshape_with_axis_(var_results[i], axis=3)) reshaped_vars.append(_reshape_with_axis_(var_results[i], axis=3))
box = concat(reshaped_boxes) box = tensor.concat(reshaped_boxes)
var = concat(reshaped_vars) var = tensor.concat(reshaped_vars)
return box, var return box, var
...@@ -345,12 +344,14 @@ def multi_box_head(inputs, ...@@ -345,12 +344,14 @@ def multi_box_head(inputs,
pad=1, pad=1,
stride=1, stride=1,
use_batchnorm=False, use_batchnorm=False,
base_size=None, base_size=None):
name=None):
""" """
**Multi Box Head** **Multi Box Head**
input many Variable, and return mbox_loc, mbox_conf Generate prior boxes' location and confidence for SSD(Single
Shot MultiBox Detector)algorithm. The details of this algorithm,
please refer the section 2.1 of SSD paper (SSD: Single Shot
MultiBox Detector)<https://arxiv.org/abs/1512.02325>`_ .
Args: Args:
inputs(list): The list of input Variables, the format inputs(list): The list of input Variables, the format
...@@ -376,12 +377,12 @@ def multi_box_head(inputs, ...@@ -376,12 +377,12 @@ def multi_box_head(inputs,
Returns: Returns:
mbox_loc(list): the output prior boxes of PriorBoxOp. The layout is mbox_loc(list): The predicted boxes' location of the inputs.
[num_priors, 4]. num_priors is the total box count of each The layout of each element is [N, H, W, Priors]. Priors
position of inputs. is the number of predicted boxof each position of each input.
mbox_conf(list): the expanded variances of PriorBoxOp. The layout mbox_conf(list): The predicted boxes' confidence of the inputs.
is [num_priors, 4]. num_priors is the total box count of each The layout of each element is [N, H, W, Priors]. Priors
position of inputs is the number of predicted box of each position of each input.
Examples: Examples:
.. code-block:: python .. code-block:: python
...@@ -396,6 +397,35 @@ def multi_box_head(inputs, ...@@ -396,6 +397,35 @@ def multi_box_head(inputs,
flip=True) flip=True)
""" """
def _conv_with_bn_(input,
conv_num_filter,
conv_padding=1,
conv_filter_size=3,
conv_stride=1,
conv_act=None,
param_attr=None,
conv_with_batchnorm=False,
conv_batchnorm_drop_rate=0.0,
use_cudnn=True):
conv2d = nn.conv2d(
input=input,
num_filters=conv_num_filter,
filter_size=conv_filter_size,
padding=conv_padding,
stride=conv_stride,
param_attr=param_attr,
act=conv_act,
use_cudnn=use_cudnn)
if conv_with_batchnorm:
conv2d = nn.batch_norm(input=conv2d)
drop_rate = conv_batchnorm_drop_rate
if abs(drop_rate) > 1e-5:
conv2d = nn.dropout(x=conv2d, dropout_prob=drop_rate)
return conv2d
if not (isinstance(inputs, list)): if not (isinstance(inputs, list)):
raise ValueError('inputs should be a list.') raise ValueError('inputs should be a list.')
...@@ -469,26 +499,26 @@ def multi_box_head(inputs, ...@@ -469,26 +499,26 @@ def multi_box_head(inputs,
if share_location: if share_location:
num_loc_output *= num_classes num_loc_output *= num_classes
mbox_loc = img_conv_with_bn( mbox_loc = _conv_with_bn_(
input=input, input=input,
conv_num_filter=num_loc_output, conv_num_filter=num_loc_output,
conv_padding=pad, conv_padding=pad,
conv_stride=stride, conv_stride=stride,
conv_filter_size=kernel_size, conv_filter_size=kernel_size,
conv_with_batchnorm=use_batchnorm) conv_with_batchnorm=use_batchnorm)
mbox_loc = transpose(mbox_loc, perm=[0, 2, 3, 1]) mbox_loc = nn.transpose(mbox_loc, perm=[0, 2, 3, 1])
mbox_locs.append(mbox_loc) mbox_locs.append(mbox_loc)
# get conf_loc # get conf_loc
num_conf_output = num_priors_per_location * num_classes num_conf_output = num_priors_per_location * num_classes
conf_loc = img_conv_with_bn( conf_loc = _conv_with_bn_(
input=input, input=input,
conv_num_filter=num_conf_output, conv_num_filter=num_conf_output,
conv_padding=pad, conv_padding=pad,
conv_stride=stride, conv_stride=stride,
conv_filter_size=kernel_size, conv_filter_size=kernel_size,
conv_with_batchnorm=use_batchnorm) conv_with_batchnorm=use_batchnorm)
conf_loc = transpose(conf_loc, perm=[0, 2, 3, 1]) conf_loc = nn.transpose(conf_loc, perm=[0, 2, 3, 1])
mbox_confs.append(conf_loc) mbox_confs.append(conf_loc)
return mbox_locs, mbox_confs return mbox_locs, mbox_confs
...@@ -47,7 +47,7 @@ class TestBook(unittest.TestCase): ...@@ -47,7 +47,7 @@ class TestBook(unittest.TestCase):
out = layers.detection_output( out = layers.detection_output(
scores=scores, loc=loc, prior_box=pb, prior_box_var=pbv) scores=scores, loc=loc, prior_box=pb, prior_box_var=pbv)
self.assertIsNotNone(out) self.assertIsNotNone(out)
print(str(program)) # print(str(program))
class TestPriorBox(unittest.TestCase): class TestPriorBox(unittest.TestCase):
...@@ -62,36 +62,11 @@ class TestPriorBox(unittest.TestCase): ...@@ -62,36 +62,11 @@ class TestPriorBox(unittest.TestCase):
def prior_box_output(self, data_shape): def prior_box_output(self, data_shape):
images = fluid.layers.data( images = fluid.layers.data(
name='pixel', shape=data_shape, dtype='float32') name='pixel', shape=data_shape, dtype='float32')
conv1 = fluid.layers.conv2d( conv1 = fluid.layers.conv2d(images, 3, 3, 2)
input=images, conv2 = fluid.layers.conv2d(conv1, 3, 3, 2)
num_filters=3, conv3 = fluid.layers.conv2d(conv2, 3, 3, 2)
filter_size=3, conv4 = fluid.layers.conv2d(conv3, 3, 3, 2)
stride=2, conv5 = fluid.layers.conv2d(conv4, 3, 3, 2)
use_cudnn=False)
conv2 = fluid.layers.conv2d(
input=conv1,
num_filters=3,
filter_size=3,
stride=2,
use_cudnn=False)
conv3 = fluid.layers.conv2d(
input=conv2,
num_filters=3,
filter_size=3,
stride=2,
use_cudnn=False)
conv4 = fluid.layers.conv2d(
input=conv3,
num_filters=3,
filter_size=3,
stride=2,
use_cudnn=False)
conv5 = fluid.layers.conv2d(
input=conv4,
num_filters=3,
filter_size=3,
stride=2,
use_cudnn=False)
box, var = detection.prior_box( box, var = detection.prior_box(
inputs=[conv1, conv2, conv3, conv4, conv5, conv5], inputs=[conv1, conv2, conv3, conv4, conv5, conv5],
...@@ -112,39 +87,17 @@ class TestMultiBoxHead(unittest.TestCase): ...@@ -112,39 +87,17 @@ class TestMultiBoxHead(unittest.TestCase):
data_shape = [3, 224, 224] data_shape = [3, 224, 224]
mbox_locs, mbox_confs = self.multi_box_output(data_shape) mbox_locs, mbox_confs = self.multi_box_output(data_shape)
for loc, conf in zip(mbox_locs, mbox_confs):
assert loc.shape[1:3] == conf.shape[1:3]
def multi_box_output(self, data_shape): def multi_box_output(self, data_shape):
images = fluid.layers.data( images = fluid.layers.data(
name='pixel', shape=data_shape, dtype='float32') name='pixel', shape=data_shape, dtype='float32')
conv1 = fluid.layers.conv2d( conv1 = fluid.layers.conv2d(images, 3, 3, 2)
input=images, conv2 = fluid.layers.conv2d(conv1, 3, 3, 2)
num_filters=3, conv3 = fluid.layers.conv2d(conv2, 3, 3, 2)
filter_size=3, conv4 = fluid.layers.conv2d(conv3, 3, 3, 2)
stride=2, conv5 = fluid.layers.conv2d(conv4, 3, 3, 2)
use_cudnn=False)
conv2 = fluid.layers.conv2d(
input=conv1,
num_filters=3,
filter_size=3,
stride=2,
use_cudnn=False)
conv3 = fluid.layers.conv2d(
input=conv2,
num_filters=3,
filter_size=3,
stride=2,
use_cudnn=False)
conv4 = fluid.layers.conv2d(
input=conv3,
num_filters=3,
filter_size=3,
stride=2,
use_cudnn=False)
conv5 = fluid.layers.conv2d(
input=conv4,
num_filters=3,
filter_size=3,
stride=2,
use_cudnn=False)
mbox_locs, mbox_confs = detection.multi_box_head( mbox_locs, mbox_confs = detection.multi_box_head(
inputs=[conv1, conv2, conv3, conv4, conv5, conv5], inputs=[conv1, conv2, conv3, conv4, conv5, conv5],
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册