提交 ebe2fc68 编写于 作者: G Guanghua Yu 提交者: qingqing01

Add FaceBoxes and Blazeface model config and WIDERFace dataloader. (#3436)

* Add backbone and architecture for FaceBoxes and Blazeface.
* Add dataloader for WIDERFace dataset.
上级 af232bf3
......@@ -120,6 +120,10 @@ def load(fname,
elif fname.endswith('.json'):
from . import coco_loader
records, cname2cid = coco_loader.load(fname, samples, with_background)
elif "wider_face" in fname:
from . import widerface_loader
records = widerface_loader.load(fname, samples)
return records
elif os.path.isfile(fname):
from . import voc_loader
if use_default_label is None or cname2cid is not None:
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import numpy as np
import logging
logger = logging.getLogger(__name__)
def load(anno_path,
sample_num=-1,
cname2cid=None,
with_background=True):
"""
Load WiderFace records with 'anno_path'
Args:
anno_path (str): root directory for voc annotation data
sample_num (int): number of samples to load, -1 means all
with_background (bool): whether load background as a class.
if True, total class number will
be 2. default True
Returns:
(records, catname2clsid)
'records' is list of dict whose structure is:
{
'im_file': im_fname, # image file name
'im_id': im_id, # image id
'gt_class': gt_class,
'gt_bbox': gt_bbox,
}
'cname2id' is a dict to map category name to class id
"""
txt_file = anno_path
records = []
ct = 0
file_lists = _load_file_list(txt_file)
cname2cid = widerface_label(with_background)
for item in file_lists:
im_fname = item[0]
im_id = np.array([ct])
gt_bbox = np.zeros((len(item) - 2, 4), dtype=np.float32)
gt_class = np.ones((len(item) - 2, 1), dtype=np.int32)
for index_box in range(len(item)):
if index_box >= 2:
temp_info_box = item[index_box].split(' ')
xmin = float(temp_info_box[0])
ymin = float(temp_info_box[1])
w = float(temp_info_box[2])
h = float(temp_info_box[3])
# Filter out wrong labels
if w < 0 or h < 0:
continue
xmin = max(0, xmin)
ymin = max(0, ymin)
xmax = xmin + w
ymax = ymin + h
gt_bbox[index_box - 2] = [xmin, ymin, xmax, ymax]
widerface_rec = {
'im_file': im_fname,
'im_id': im_id,
'gt_bbox': gt_bbox,
'gt_class': gt_class,
}
# logger.debug
if len(item) != 0:
records.append(widerface_rec)
ct += 1
if sample_num > 0 and ct >= sample_num:
break
assert len(records) > 0, 'not found any widerface in %s' % (anno_path)
logger.info('{} samples in file {}'.format(ct, anno_path))
return records, cname2cid
def _load_file_list(input_txt):
with open(input_txt, 'r') as f_dir:
lines_input_txt = f_dir.readlines()
file_dict = {}
num_class = 0
for i in range(len(lines_input_txt)):
line_txt = lines_input_txt[i].strip('\n\t\r')
if '.jpg' in line_txt:
if i != 0:
num_class += 1
file_dict[num_class] = []
file_dict[num_class].append(line_txt)
if '.jpg' not in line_txt:
if len(line_txt) > 6:
split_str = line_txt.split(' ')
x1_min = float(split_str[0])
y1_min = float(split_str[1])
x2_max = float(split_str[2])
y2_max = float(split_str[3])
line_txt = str(x1_min) + ' ' + str(y1_min) + ' ' + str(
x2_max) + ' ' + str(y2_max)
file_dict[num_class].append(line_txt)
else:
file_dict[num_class].append(line_txt)
return list(file_dict.values())
def widerface_label(with_background=True):
labels_map = {
'face': 1
}
if not with_background:
labels_map = {k: v - 1 for k, v in labels_map.items()}
return labels_map
......@@ -21,6 +21,8 @@ from . import cascade_mask_rcnn
from . import yolov3
from . import ssd
from . import retinanet
from . import blazeface
from . import faceboxes
from .faster_rcnn import *
from .mask_rcnn import *
......@@ -29,3 +31,5 @@ from .cascade_mask_rcnn import *
from .yolov3 import *
from .ssd import *
from .retinanet import *
from .blazeface import *
from .faceboxes import *
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
from paddle import fluid
from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.regularizer import L2Decay
from ppdet.core.workspace import register
from ppdet.modeling.ops import SSDOutputDecoder
__all__ = ['BlazeFace']
@register
class BlazeFace(object):
"""
BlazeFace: Sub-millisecond Neural Face Detection on Mobile GPUs,
see https://arxiv.org/abs/1907.05047
Args:
backbone (object): backbone instance
output_decoder (object): `SSDOutputDecoder` instance
min_sizes (list|None): min sizes of generated prior boxes.
max_sizes (list|None): max sizes of generated prior boxes. Default: None.
num_classes (int): number of output classes
use_density_prior_box (bool): whether or not use density_prior_box
instead of prior_box
densities (list|None): the densities of generated density prior boxes,
this attribute should be a list or tuple of integers
"""
__category__ = 'architecture'
__inject__ = ['backbone', 'output_decoder']
__shared__ = ['num_classes']
def __init__(self,
backbone="BlazeNet",
output_decoder=SSDOutputDecoder().__dict__,
min_sizes=[[16., 24.], [32., 48., 64., 80., 96., 128.]],
max_sizes=None,
steps=[8., 16.],
num_classes=2,
use_density_prior_box=False,
densities=[[2, 2], [2, 1, 1, 1, 1, 1]]):
super(BlazeFace, self).__init__()
self.backbone = backbone
self.num_classes = num_classes
self.output_decoder = output_decoder
if isinstance(output_decoder, dict):
self.output_decoder = SSDOutputDecoder(**output_decoder)
self.min_sizes = min_sizes
self.max_sizes = max_sizes
self.steps = steps
self.use_density_prior_box = use_density_prior_box
self.densities = densities
def build(self, feed_vars, mode='train'):
im = feed_vars['image']
if mode == 'train':
gt_box = feed_vars['gt_box']
gt_label = feed_vars['gt_label']
body_feats = self.backbone(im)
locs, confs, box, box_var = self._multi_box_head(
inputs=body_feats,
image=im,
num_classes=self.num_classes,
use_density_prior_box=self.use_density_prior_box)
if mode == 'train':
loss = fluid.layers.ssd_loss(
locs,
confs,
gt_box,
gt_label,
box,
box_var,
overlap_threshold=0.35,
neg_overlap=0.35)
loss = fluid.layers.reduce_sum(loss)
loss.persistable = True
return {'loss': loss}
else:
pred = self.output_decoder(locs, confs, box, box_var)
return {'bbox': pred}
def _multi_box_head(self,
inputs,
image,
num_classes=2,
use_density_prior_box=False):
def permute_and_reshape(input, last_dim):
trans = fluid.layers.transpose(input, perm=[0, 2, 3, 1])
compile_shape = [
trans.shape[0], np.prod(trans.shape[1:]) // last_dim, last_dim
]
return fluid.layers.reshape(trans, shape=compile_shape)
def _is_list_or_tuple_(data):
return (isinstance(data, list) or isinstance(data, tuple))
locs, confs = [], []
boxes, vars = [], []
b_attr = ParamAttr(learning_rate=2., regularizer=L2Decay(0.))
for i, input in enumerate(inputs):
min_size = self.min_sizes[i]
if use_density_prior_box:
densities = self.densities[i]
box, var = fluid.layers.density_prior_box(
input,
image,
densities=densities,
fixed_sizes=min_size,
fixed_ratios=[1.],
clip=False,
offset=0.5)
else:
box, var = fluid.layers.prior_box(
input,
image,
min_sizes=min_size,
max_sizes=None,
steps=[self.steps[i]] * 2,
aspect_ratios=[1.],
clip=False,
flip=False,
offset=0.5)
num_boxes = box.shape[2]
box = fluid.layers.reshape(box, shape=[-1, 4])
var = fluid.layers.reshape(var, shape=[-1, 4])
num_loc_output = num_boxes * 4
num_conf_output = num_boxes * num_classes
# get loc
mbox_loc = fluid.layers.conv2d(
input, num_loc_output, 3, 1, 1, bias_attr=b_attr)
loc = permute_and_reshape(mbox_loc, 4)
# get conf
mbox_conf = fluid.layers.conv2d(
input, num_conf_output, 3, 1, 1, bias_attr=b_attr)
conf = permute_and_reshape(mbox_conf, 2)
locs.append(loc)
confs.append(conf)
boxes.append(box)
vars.append(var)
face_mbox_loc = fluid.layers.concat(locs, axis=1)
face_mbox_conf = fluid.layers.concat(confs, axis=1)
prior_boxes = fluid.layers.concat(boxes)
box_vars = fluid.layers.concat(vars)
return face_mbox_loc, face_mbox_conf, prior_boxes, box_vars
def train(self, feed_vars):
return self.build(feed_vars, 'train')
def eval(self, feed_vars):
return self.build(feed_vars, 'eval')
def test(self, feed_vars):
return self.build(feed_vars, 'test')
def is_bbox_normalized(self):
return True
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
from paddle import fluid
from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.regularizer import L2Decay
from ppdet.core.workspace import register
from ppdet.modeling.ops import SSDOutputDecoder
__all__ = ['FaceBoxes']
@register
class FaceBoxes(object):
"""
FaceBoxes: Sub-millisecond Neural Face Detection on Mobile GPUs,
see https://https://arxiv.org/abs/1708.05234
Args:
backbone (object): backbone instance
output_decoder (object): `SSDOutputDecoder` instance
densities (list|None): the densities of generated density prior boxes,
this attribute should be a list or tuple of integers.
fixed_sizes (list|None): the fixed sizes of generated density prior boxes,
this attribute should a list or tuple of same length with `densities`.
num_classes (int): number of output classes
"""
__category__ = 'architecture'
__inject__ = ['backbone', 'output_decoder']
__shared__ = ['num_classes']
def __init__(self,
backbone="FaceBoxNet",
output_decoder=SSDOutputDecoder().__dict__,
densities=[[4, 2, 1], [1], [1]],
fixed_sizes=[[32., 64., 128.], [256.], [512.]],
num_classes=2):
super(FaceBoxes, self).__init__()
self.backbone = backbone
self.num_classes = num_classes
self.output_decoder = output_decoder
if isinstance(output_decoder, dict):
self.output_decoder = SSDOutputDecoder(**output_decoder)
self.densities = densities
self.fixed_sizes = fixed_sizes
def build(self, feed_vars, mode='train'):
im = feed_vars['image']
if mode == 'train':
gt_box = feed_vars['gt_box']
gt_label = feed_vars['gt_label']
body_feats = self.backbone(im)
locs, confs, box, box_var = self._multi_box_head(
inputs=body_feats, image=im, num_classes=self.num_classes)
if mode == 'train':
loss = fluid.layers.ssd_loss(
locs,
confs,
gt_box,
gt_label,
box,
box_var,
overlap_threshold=0.35,
neg_overlap=0.35)
loss = fluid.layers.reduce_sum(loss)
loss.persistable = True
return {'loss': loss}
else:
pred = self.output_decoder(locs, confs, box, box_var)
return {'bbox': pred}
def _multi_box_head(self, inputs, image, num_classes=2):
def permute_and_reshape(input, last_dim):
trans = fluid.layers.transpose(input, perm=[0, 2, 3, 1])
compile_shape = [
trans.shape[0], np.prod(trans.shape[1:]) // last_dim, last_dim
]
return fluid.layers.reshape(trans, shape=compile_shape)
def _is_list_or_tuple_(data):
return (isinstance(data, list) or isinstance(data, tuple))
locs, confs = [], []
boxes, vars = [], []
b_attr = ParamAttr(learning_rate=2., regularizer=L2Decay(0.))
for i, input in enumerate(inputs):
densities = self.densities[i]
fixed_sizes = self.fixed_sizes[i]
box, var = fluid.layers.density_prior_box(
input,
image,
densities=densities,
fixed_sizes=fixed_sizes,
fixed_ratios=[1.],
clip=False,
offset=0.5)
num_boxes = box.shape[2]
box = fluid.layers.reshape(box, shape=[-1, 4])
var = fluid.layers.reshape(var, shape=[-1, 4])
num_loc_output = num_boxes * 4
num_conf_output = num_boxes * num_classes
# get loc
mbox_loc = fluid.layers.conv2d(
input, num_loc_output, 3, 1, 1, bias_attr=b_attr)
loc = permute_and_reshape(mbox_loc, 4)
# get conf
mbox_conf = fluid.layers.conv2d(
input, num_conf_output, 3, 1, 1, bias_attr=b_attr)
conf = permute_and_reshape(mbox_conf, 2)
locs.append(loc)
confs.append(conf)
boxes.append(box)
vars.append(var)
face_mbox_loc = fluid.layers.concat(locs, axis=1)
face_mbox_conf = fluid.layers.concat(confs, axis=1)
prior_boxes = fluid.layers.concat(boxes)
box_vars = fluid.layers.concat(vars)
return face_mbox_loc, face_mbox_conf, prior_boxes, box_vars
def train(self, feed_vars):
return self.build(feed_vars, 'train')
def eval(self, feed_vars):
return self.build(feed_vars, 'eval')
def test(self, feed_vars):
return self.build(feed_vars, 'test')
def is_bbox_normalized(self):
return True
......@@ -21,6 +21,8 @@ from . import mobilenet
from . import senet
from . import fpn
from . import vgg
from . import blazenet
from . import faceboxnet
from .resnet import *
from .resnext import *
......@@ -29,3 +31,5 @@ from .mobilenet import *
from .senet import *
from .fpn import *
from .vgg import *
from .blazenet import *
from .faceboxnet import *
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from paddle import fluid
from paddle.fluid.param_attr import ParamAttr
from ppdet.core.workspace import register
__all__ = ['BlazeNet']
@register
class BlazeNet(object):
"""
BlazeFace, see https://arxiv.org/abs/1907.05047
Args:
blaze_filters (list): number of filter for each blaze block
double_blaze_filters (list): number of filter for each double_blaze block
with_extra_blocks (bool): whether or not extra blocks should be added
lite_edition (bool): whether or not is blazeface-lite
"""
def __init__(
self,
blaze_filters=[[24, 24], [24, 24], [24, 48, 2], [48, 48], [48, 48]],
double_blaze_filters=[[48, 24, 96, 2], [96, 24, 96], [96, 24, 96],
[96, 24, 96, 2], [96, 24, 96], [96, 24, 96]],
with_extra_blocks=True,
lite_edition=False):
super(BlazeNet, self).__init__()
self.blaze_filters = blaze_filters
self.double_blaze_filters = double_blaze_filters
self.with_extra_blocks = with_extra_blocks
self.lite_edition = lite_edition
def __call__(self, input):
if not self.lite_edition:
conv1_num_filters = self.blaze_filters[0][0]
conv = self._conv_norm(
input=input,
num_filters=conv1_num_filters,
filter_size=3,
stride=2,
padding=1,
act='relu',
name="conv1")
for k, v in enumerate(self.blaze_filters):
assert len(v) in [2, 3], \
"blaze_filters {} not in [2, 3]"
if len(v) == 2:
conv = self.BlazeBlock(
conv, v[0], v[1], name='blaze_{}'.format(k))
elif len(v) == 3:
conv = self.BlazeBlock(
conv,
v[0],
v[1],
stride=v[2],
name='blaze_{}'.format(k))
layers = []
for k, v in enumerate(self.double_blaze_filters):
assert len(v) in [3, 4], \
"blaze_filters {} not in [3, 4]"
if len(v) == 3:
conv = self.BlazeBlock(
conv,
v[0],
v[1],
double_channels=v[2],
name='double_blaze_{}'.format(k))
elif len(v) == 4:
layers.append(conv)
conv = self.BlazeBlock(
conv,
v[0],
v[1],
double_channels=v[2],
stride=v[3],
name='double_blaze_{}'.format(k))
layers.append(conv)
if not self.with_extra_blocks:
return layers[-1]
return layers[-2], layers[-1]
else:
conv1 = self._conv_norm(
input=input,
num_filters=24,
filter_size=5,
stride=2,
padding=2,
act='relu',
name="conv1")
conv2 = self.Blaze_lite(conv1, 24, 24, 1, 'conv2')
conv3 = self.Blaze_lite(conv2, 24, 28, 1, 'conv3')
conv4 = self.Blaze_lite(conv3, 28, 32, 2, 'conv4')
conv5 = self.Blaze_lite(conv4, 32, 36, 1, 'conv5')
conv6 = self.Blaze_lite(conv5, 36, 42, 1, 'conv6')
conv7 = self.Blaze_lite(conv6, 42, 48, 2, 'conv7')
in_ch = 48
for i in range(5):
conv7 = self.Blaze_lite(conv7, in_ch, in_ch + 8, 1,
'conv{}'.format(8 + i))
in_ch += 8
assert in_ch == 88
conv13 = self.Blaze_lite(conv7, 88, 96, 2, 'conv13')
for i in range(4):
conv13 = self.Blaze_lite(conv13, 96, 96, 1,
'conv{}'.format(14 + i))
return conv7, conv13
def BlazeBlock(self,
input,
in_channels,
out_channels,
double_channels=None,
stride=1,
use_5x5kernel=True,
name=None):
assert stride in [1, 2]
use_pool = not stride == 1
use_double_block = double_channels is not None
act = 'relu' if use_double_block else None
if use_5x5kernel:
conv_dw = self._conv_norm(
input=input,
filter_size=5,
num_filters=in_channels,
stride=stride,
padding=2,
num_groups=in_channels,
use_cudnn=False,
name=name + "1_dw")
else:
conv_dw_1 = self._conv_norm(
input=input,
filter_size=3,
num_filters=in_channels,
stride=1,
padding=1,
num_groups=in_channels,
use_cudnn=False,
name=name + "1_dw_1")
conv_dw = self._conv_norm(
input=conv_dw_1,
filter_size=3,
num_filters=in_channels,
stride=stride,
padding=1,
num_groups=in_channels,
use_cudnn=False,
name=name + "1_dw_2")
conv_pw = self._conv_norm(
input=conv_dw,
filter_size=1,
num_filters=out_channels,
stride=1,
padding=0,
act=act,
name=name + "1_sep")
if use_double_block:
if use_5x5kernel:
conv_dw = self._conv_norm(
input=conv_pw,
filter_size=5,
num_filters=out_channels,
stride=1,
padding=2,
use_cudnn=False,
name=name + "2_dw")
else:
conv_dw_1 = self._conv_norm(
input=conv_pw,
filter_size=3,
num_filters=out_channels,
stride=1,
padding=1,
num_groups=out_channels,
use_cudnn=False,
name=name + "2_dw_1")
conv_dw = self._conv_norm(
input=conv_dw_1,
filter_size=3,
num_filters=out_channels,
stride=1,
padding=1,
num_groups=out_channels,
use_cudnn=False,
name=name + "2_dw_2")
conv_pw = self._conv_norm(
input=conv_dw,
filter_size=1,
num_filters=double_channels,
stride=1,
padding=0,
name=name + "2_sep")
# shortcut
if use_pool:
shortcut_channel = double_channels or out_channels
shortcut_pool = self._pooling_block(input, stride, stride)
channel_pad = self._conv_norm(
input=shortcut_pool,
filter_size=1,
num_filters=shortcut_channel,
stride=1,
padding=0,
name="shortcut" + name)
return fluid.layers.elementwise_add(
x=channel_pad, y=conv_pw, act='relu')
return fluid.layers.elementwise_add(x=input, y=conv_pw, act='relu')
def Blaze_lite(self, input, in_channels, out_channels, stride=1, name=None):
assert stride in [1, 2]
use_pool = not stride == 1
ues_pad = not in_channels == out_channels
conv_dw = self._conv_norm(
input=input,
filter_size=3,
num_filters=in_channels,
stride=stride,
padding=1,
num_groups=in_channels,
name=name + "_dw")
conv_pw = self._conv_norm(
input=conv_dw,
filter_size=1,
num_filters=out_channels,
stride=1,
padding=0,
name=name + "_sep")
if use_pool:
shortcut_pool = self._pooling_block(input, stride, stride)
if ues_pad:
conv_pad = shortcut_pool if use_pool else input
channel_pad = self._conv_norm(
input=conv_pad,
filter_size=1,
num_filters=out_channels,
stride=1,
padding=0,
name="shortcut" + name)
return fluid.layers.elementwise_add(
x=channel_pad, y=conv_pw, act='relu')
return fluid.layers.elementwise_add(x=input, y=conv_pw, act='relu')
def _conv_norm(
self,
input,
filter_size,
num_filters,
stride,
padding,
num_groups=1,
act='relu', # None
use_cudnn=True,
name=None):
parameter_attr = ParamAttr(
learning_rate=0.1,
initializer=fluid.initializer.MSRA(),
name=name + "_weights")
conv = fluid.layers.conv2d(
input=input,
num_filters=num_filters,
filter_size=filter_size,
stride=stride,
padding=padding,
groups=num_groups,
act=None,
use_cudnn=use_cudnn,
param_attr=parameter_attr,
bias_attr=False)
return fluid.layers.batch_norm(input=conv, act=act)
def _pooling_block(self,
conv,
pool_size,
pool_stride,
pool_padding=0,
ceil_mode=True):
pool = fluid.layers.pool2d(
input=conv,
pool_size=pool_size,
pool_type='max',
pool_stride=pool_stride,
pool_padding=pool_padding,
ceil_mode=ceil_mode)
return pool
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from paddle import fluid
from paddle.fluid.param_attr import ParamAttr
from ppdet.core.workspace import register
__all__ = ['FaceBoxNet']
@register
class FaceBoxNet(object):
"""
FaceBoxes, see https://https://arxiv.org/abs/1708.05234
Args:
with_extra_blocks (bool): whether or not extra blocks should be added
lite_edition (bool): whether or not is FaceBoxes-lite
"""
def __init__(self,
with_extra_blocks=True,
lite_edition=False):
super(FaceBoxNet, self).__init__()
self.with_extra_blocks = with_extra_blocks
self.lite_edition = lite_edition
def __call__(self, input):
if self.lite_edition:
return self._simplified_edition(input)
else:
return self._original_edition(input)
def _simplified_edition(self, input):
conv_1_1 = self._conv_norm_crelu(
input=input,
num_filters=8,
filter_size=3,
stride=2,
padding=1,
act='relu',
name="conv_1_1")
conv_1_2 = self._conv_norm_crelu(
input=conv_1_1,
num_filters=24,
filter_size=3,
stride=2,
padding=1,
act='relu',
name="conv_1_2")
pool1 = fluid.layers.pool2d(
input=conv_1_2,
pool_size=3,
pool_padding=1,
pool_type='avg',
name="pool_1")
conv_2_1 = self._conv_norm(
input=pool1,
num_filters=48,
filter_size=3,
stride=2,
padding=1,
act='relu',
name="conv_2_1")
conv_2_2 = self._conv_norm(
input=conv_2_1,
num_filters=64,
filter_size=1,
stride=1,
padding=0,
act='relu',
name="conv_2_2")
conv_inception = conv_2_2
for i in range(3):
conv_inception = self._inceptionA(conv_inception, i)
layers = []
layers.append(conv_inception)
conv_3_1 = self._conv_norm(
input=conv_inception,
num_filters=128,
filter_size=1,
stride=1,
padding=0,
act='relu',
name="conv_3_1")
conv_3_2 = self._conv_norm(
input=conv_3_1,
num_filters=256,
filter_size=3,
stride=2,
padding=1,
act='relu',
name="conv_3_2")
layers.append(conv_3_2)
if not self.with_extra_blocks:
return layers[-1]
return layers[-2], layers[-1]
def _original_edition(self, input):
conv_1 = self._conv_norm_crelu(
input=input,
num_filters=24,
filter_size=7,
stride=4,
padding=3,
act='relu',
name="conv_1")
pool_1 = fluid.layers.pool2d(
input=conv_1,
pool_size=3,
pool_stride=2,
pool_padding=1,
pool_type='max',
name="pool_1")
conv_2 = self._conv_norm_crelu(
input=pool_1,
num_filters=64,
filter_size=5,
stride=2,
padding=2,
act='relu',
name="conv_2")
pool_2 = fluid.layers.pool2d(
input=conv_1,
pool_size=3,
pool_stride=2,
pool_padding=1,
pool_type='max',
name="pool_2")
conv_inception = pool_2
for i in range(3):
conv_inception = self._inceptionA(conv_inception, i)
layers = []
layers.append(conv_inception)
conv_3_1 = self._conv_norm(
input=conv_inception,
num_filters=128,
filter_size=1,
stride=1,
padding=0,
act='relu',
name="conv_3_1")
conv_3_2 = self._conv_norm(
input=conv_3_1,
num_filters=256,
filter_size=3,
stride=2,
padding=1,
act='relu',
name="conv_3_2")
layers.append(conv_3_2)
conv_4_1 = self._conv_norm(
input=conv_3_2,
num_filters=128,
filter_size=1,
stride=1,
padding=0,
act='relu',
name="conv_4_1")
conv_4_2 = self._conv_norm(
input=conv_4_1,
num_filters=256,
filter_size=3,
stride=2,
padding=1,
act='relu',
name="conv_4_2")
layers.append(conv_4_2)
if not self.with_extra_blocks:
return layers[-1]
return layers[-3], layers[-2], layers[-1]
def _conv_norm(
self,
input,
filter_size,
num_filters,
stride,
padding,
num_groups=1,
act='relu',
use_cudnn=True,
name=None):
parameter_attr = ParamAttr(
learning_rate=0.1,
initializer=fluid.initializer.MSRA(),
name=name + "_weights")
conv = fluid.layers.conv2d(
input=input,
num_filters=num_filters,
filter_size=filter_size,
stride=stride,
padding=padding,
groups=num_groups,
act=None,
use_cudnn=use_cudnn,
param_attr=parameter_attr,
bias_attr=False)
print("{}:{}".format(name, conv.shape))
return fluid.layers.batch_norm(input=conv, act=act)
def _conv_norm_crelu(
self,
input,
filter_size,
num_filters,
stride,
padding,
num_groups=1,
act='relu',
use_cudnn=True,
name=None):
parameter_attr = ParamAttr(
learning_rate=0.1,
initializer=fluid.initializer.MSRA(),
name=name + "_weights")
conv = fluid.layers.conv2d(
input=input,
num_filters=num_filters,
filter_size=filter_size,
stride=stride,
padding=padding,
groups=num_groups,
act=None,
use_cudnn=use_cudnn,
param_attr=parameter_attr,
bias_attr=False)
conv_a = fluid.layers.batch_norm(input=conv, act=act)
conv_b = fluid.layers.scale(conv_a, -1)
concat = fluid.layers.concat([conv_a, conv_b], axis=1)
return concat
def _pooling_block(self,
conv,
pool_size,
pool_stride,
pool_padding=0,
ceil_mode=True):
pool = fluid.layers.pool2d(
input=conv,
pool_size=pool_size,
pool_type='max',
pool_stride=pool_stride,
pool_padding=pool_padding,
ceil_mode=ceil_mode)
return pool
def _inceptionA(self, data, idx):
idx = str(idx)
pool1 = fluid.layers.pool2d(
input=data,
pool_size=3,
pool_padding=1,
pool_type='avg',
name='inceptionA_' + idx + '_pool1')
conv1 = self._conv_norm(
input=pool1,
filter_size=1,
num_filters=32,
stride=1,
padding=0,
act='relu',
name='inceptionA_' + idx + '_conv1')
conv2 = self._conv_norm(
input=data,
filter_size=1,
num_filters=32,
stride=1,
padding=0,
act='relu',
name='inceptionA_' + idx + '_conv2')
conv3 = self._conv_norm(
input=data,
filter_size=1,
num_filters=24,
stride=1,
padding=0,
act='relu',
name='inceptionA_' + idx + '_conv3_1')
conv3 = self._conv_norm(
input=conv3,
filter_size=3,
num_filters=32,
stride=1,
padding=1,
act='relu',
name='inceptionA_' + idx + '_conv3_2')
conv4 = self._conv_norm(
input=data,
filter_size=1,
num_filters=24,
stride=1,
padding=0,
act='relu',
name='inceptionA_' + idx + '_conv4_1')
conv4 = self._conv_norm(
input=conv4,
filter_size=3,
num_filters=32,
stride=1,
padding=1,
act='relu',
name='inceptionA_' + idx + '_conv4_2')
conv4 = self._conv_norm(
input=conv4,
filter_size=3,
num_filters=32,
stride=1,
padding=1,
act='relu',
name='inceptionA_' + idx + '_conv4_3')
concat = fluid.layers.concat(
[conv1, conv2, conv3, conv4], axis=1)
return concat
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册