提交 773cbfb3 编写于 作者: B breezedeus

add shorter mode to pooling images to 1/8 width

上级 3f3caf6e
......@@ -7,7 +7,15 @@ from .__version__ import __version__
# 如: __version__ = '1.2.*',对应的 MODEL_VERSION 都是 '1.2.0'
MODEL_VERSION = '.'.join(__version__.split('.', maxsplit=2)[:2]) + '.0'
EMB_MODEL_TYPES = ['conv', 'conv-lite', 'densenet', 'densenet-lite']
EMB_MODEL_TYPES = [
'conv', # seq_len == 35, deprecated
'conv-lite', # seq_len == 69
'conv-lite-s', # seq_len == 35
'densenet', # seq_len == 70, deprecated
'densenet-lite', # seq_len == 70
'densenet-s', # seq_len == 35
'densenet-lite-s', # seq_len == 35
]
SEQ_MODEL_TYPES = ['lstm', 'gru', 'fc']
root_url = (
......
......@@ -35,19 +35,25 @@ def gen_network(model_name, hp):
model_name = model_name.lower()
if model_name.startswith('densenet'):
hp.seq_len_cmpr_ratio = 4
hp.set_seq_length(hp.img_width // 4)
layer_channels = (
(32, 64, 128, 256)
if model_name.startswith('densenet-lite')
else (64, 128, 256, 512)
)
densenet = DenseNet(layer_channels)
shorter = model_name.startswith('densenet-s-') or model_name.startswith(
'densenet-lite-s-'
)
seq_len = hp.img_width // 8 if shorter else hp.img_width // 4
hp.set_seq_length(seq_len)
densenet = DenseNet(layer_channels, shorter=shorter)
densenet.hybridize()
model = CRnn(hp, densenet)
elif model_name.startswith('conv-lite'):
hp.seq_len_cmpr_ratio = 4
hp.set_seq_length(hp.img_width // 4 - 1)
model = lambda data: crnn_lstm_lite(hp, data)
shorter = model_name.startswith('conv-lite-s-')
seq_len = hp.img_width // 8 if shorter else hp.img_width // 4 - 1
hp.set_seq_length(seq_len)
model = lambda data: crnn_lstm_lite(hp, data, shorter=shorter)
elif model_name.startswith('conv'):
hp.seq_len_cmpr_ratio = 8
hp.set_seq_length(hp.img_width // 8)
......@@ -254,7 +260,7 @@ def crnn_lstm(hp, data):
return hidden_concat
def crnn_lstm_lite(hp, data):
def crnn_lstm_lite(hp, data, *, shorter=False):
kernel_size = [(3, 3), (3, 3), (3, 3), (3, 3), (3, 3), (3, 3)]
padding_size = [(1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (1, 1)]
layer_size = [min(32 * 2 ** (i + 1), 512) for i in range(len(kernel_size))]
......@@ -289,9 +295,11 @@ def crnn_lstm_lite(hp, data):
# print('4', net.infer_shape()[1])
net = bottle_conv(4, net, kernel_size[4], layer_size[4], padding_size[4])
net = bottle_conv(5, net, kernel_size[5], layer_size[5], padding_size[5], True) + x
# res: bz x 512 x 4 x 69,长度从70变成69的原因是pooling后没用padding
width_stride = 2 if shorter else 1
# res: bz x 512 x 4 x 69 or bz x 512 x 4 x 35
# 长度从70变成69的原因是pooling后没用padding
net = mx.symbol.Pooling(
data=net, name='pool-2', pool_type='max', kernel=(2, 2), stride=(2, 1)
data=net, name='pool-2', pool_type='max', kernel=(2, 2), stride=(2, width_stride)
)
# print('5', net.infer_shape()[1])
# net = mx.symbol.Convolution(name='conv-%d' % 6, data=net, kernel=(4, 1), num_filter=layer_size[5])
......
......@@ -72,29 +72,23 @@ def _make_residual(cell_net):
class DenseNet(HybridBlock):
r"""Densenet-BC model from the
r"""Densenet model adapted with DenseNet in Gluon.
"from gluoncv.model_zoo.densenet import DenseNet"
`"Densely Connected Convolutional Networks" <https://arxiv.org/pdf/1608.06993.pdf>`_ paper.
Parameters
----------
num_init_features : int
Number of filters to learn in the first convolution layer.
growth_rate : int
Number of filters to add each layer (`k` in the paper).
block_config : list of int
List of integers for numbers of layers in each pooling block.
bn_size : int, default 4
Multiplicative factor for number of bottle neck layers.
(i.e. bn_size * k features in the bottleneck layer)
dropout : float, default 0
Rate of dropout after each dense layer.
classes : int, default 1000
Number of classification classes.
layer_channels: tuple or list with length 4,
such as `layer_channels = (64, 128, 256, 512)`
shorter: pooling to 1/8 length if shorter is True, else pooling to 1/4
"""
def __init__(self, layer_channels, **kwargs):
def __init__(self, layer_channels, *, shorter=False, **kwargs):
assert len(layer_channels) == 4
super(DenseNet, self).__init__(**kwargs)
self.shorter = shorter
with self.name_scope():
# Stage 0
self.features = nn.HybridSequential(prefix='')
......@@ -123,7 +117,12 @@ class DenseNet(HybridBlock):
self.features.add(_make_last_transition(layer_channels[3]))
# Stage 3
self.features.add(_make_final_stage_net(3, out_channels=layer_channels[3]))
pool_size = strides = (2, 2) if self.shorter else (2, 1)
self.features.add(
_make_final_stage_net(
3, pool_size, strides, out_channels=layer_channels[3]
)
)
# num_features = num_init_features
# for i, num_layers in enumerate(block_config):
......@@ -147,8 +146,12 @@ class DenseNet(HybridBlock):
:return: with shape (batch_size, embed_size, 1, img_width // 4)
"""
x = self.features(x) # res: (batch_size, embed_size, 2, img_width // 4)
x = F.reshape(x, (0, -3, 0)) # res: (batch_size, embed_size * 2, img_width // 4)
x = F.expand_dims(x, axis=2) # res: (batch_size, embed_size * 2, 1, img_width // 4)
x = F.reshape(
x, (0, -3, 0)
) # res: (batch_size, embed_size * 2, img_width // 4)
x = F.expand_dims(
x, axis=2
) # res: (batch_size, embed_size * 2, 1, img_width // 4)
return x
......@@ -212,7 +215,7 @@ def _make_last_transition(num_output_features):
return out
def _make_final_stage_net(stage_index, out_channels):
def _make_final_stage_net(stage_index, pool_size, strides, out_channels):
features = nn.HybridSequential(prefix='stage%d_' % stage_index)
with features.name_scope():
features.add(nn.BatchNorm())
......@@ -225,5 +228,5 @@ def _make_final_stage_net(stage_index, out_channels):
# )
# features.add(nn.BatchNorm())
# features.add(nn.Activation('relu'))
features.add(nn.MaxPool2D(pool_size=(2, 1), strides=(2, 1)))
features.add(nn.MaxPool2D(pool_size=pool_size, strides=strides))
return features
......@@ -37,15 +37,19 @@ def test_dense_layer():
def test_densenet():
x = nd.random.randn(128, 64, 32, 280)
width = 280
x = nd.random.randn(128, 64, 32, width)
layer_channels = (64, 128, 256, 512)
net = DenseNet(layer_channels)
net.initialize()
y = net(x)
logger.info(net)
logger.info(y.shape) # (128, 512, 1, 70)
assert y.shape[2] == 1
logger.info('number of parameters: %d', cal_num_params(net)) # 1748224
for shorter in (False, True):
net = DenseNet(layer_channels, shorter=shorter)
net.initialize()
y = net(x)
logger.info(net)
logger.info(y.shape) # (128, 512, 1, 70) or (128, 512, 1, 35)
assert y.shape[2] == 1
expected_seq_len = width // 8 if shorter else width // 4
assert y.shape[3] == expected_seq_len
logger.info('number of parameters: %d', cal_num_params(net)) # 1748224
def test_crnn():
......@@ -77,17 +81,19 @@ def test_crnn_lstm():
def test_crnn_lstm_lite():
hp = deepcopy(HP)
hp.set_seq_length(hp.img_width // 4 - 1)
data = mx.sym.Variable('data', shape=(128, 1, 32, 280))
pred = crnn_lstm_lite(HP, data)
pred_shape = pred.infer_shape()[1][0]
logger.info('shape of pred: %s', pred_shape)
assert pred_shape == (hp.seq_length, hp.batch_size, 2 * hp.num_hidden)
width = hp.img_width # 280
data = mx.sym.Variable('data', shape=(128, 1, 32, width))
for shorter in (False, True):
pred = crnn_lstm_lite(HP, data, shorter=shorter)
pred_shape = pred.infer_shape()[1][0]
logger.info('shape of pred: %s', pred_shape)
seq_len = hp.img_width // 8 if shorter else hp.img_width // 4 - 1
assert pred_shape == (seq_len, hp.batch_size, 2 * hp.num_hidden)
def test_pipline():
hp = deepcopy(HP)
hp.set_seq_length(hp.img_width // 4 - 1)
hp.set_seq_length(hp.img_width // 4)
hp._loss_type = None # infer mode
layer_channels_list = [(64, 128, 256, 512), (32, 64, 128, 256)]
for layer_channels in layer_channels_list:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册