未验证 提交 9bfa3013 编写于 作者: W Wang Meng 提交者: GitHub

Merge pull request #6631 from will-am/img_separable_conv

Add separable convolution
...@@ -25,10 +25,10 @@ from paddle.trainer.config_parser import * ...@@ -25,10 +25,10 @@ from paddle.trainer.config_parser import *
__all__ = [ __all__ = [
'sequence_conv_pool', 'simple_lstm', "simple_img_conv_pool", 'sequence_conv_pool', 'simple_lstm', "simple_img_conv_pool",
"img_conv_bn_pool", 'lstmemory_group', 'lstmemory_unit', 'small_vgg', "img_conv_bn_pool", 'lstmemory_group', 'lstmemory_unit', 'small_vgg',
'img_conv_group', 'vgg_16_network', 'gru_unit', 'gru_group', 'simple_gru', 'img_conv_group', 'img_separable_conv', 'vgg_16_network', 'gru_unit',
'simple_attention', 'dot_product_attention', 'multi_head_attention', 'gru_group', 'simple_gru', 'simple_attention', 'dot_product_attention',
'simple_gru2', 'bidirectional_gru', 'text_conv_pool', 'bidirectional_lstm', 'multi_head_attention', 'simple_gru2', 'bidirectional_gru',
'inputs', 'outputs' 'text_conv_pool', 'bidirectional_lstm', 'inputs', 'outputs'
] ]
###################################################### ######################################################
...@@ -251,13 +251,13 @@ def img_conv_bn_pool(input, ...@@ -251,13 +251,13 @@ def img_conv_bn_pool(input,
pool_layer_attr=None): pool_layer_attr=None):
""" """
Convolution, batch normalization, pooling group. Convolution, batch normalization, pooling group.
Img input => Conv => BN => Pooling => Output. Img input => Conv => BN => Pooling => Output.
:param name: group name. :param name: group name.
:type name: basestring :type name: basestring
:param input: input layer. :param input: input layer.
:type input: LayerOutput :type input: LayerOutput
:param filter_size: see img_conv_layer for details. :param filter_size: see img_conv_layer for details.
:type filter_size: int :type filter_size: int
:param num_filters: see img_conv_layer for details. :param num_filters: see img_conv_layer for details.
...@@ -435,6 +435,85 @@ def img_conv_group(input, ...@@ -435,6 +435,85 @@ def img_conv_group(input,
input=tmp, stride=pool_stride, pool_size=pool_size, pool_type=pool_type) input=tmp, stride=pool_stride, pool_size=pool_size, pool_type=pool_type)
@wrap_name_default("separable_conv")
def img_separable_conv(input,
num_channels,
num_out_channels,
filter_size,
stride=1,
padding=0,
depth_multiplier=1,
act=None,
bias_attr=None,
param_attr=None,
shared_bias=True,
layer_type='exconv',
name=None):
"""
Separable Convolution.
The separable convolution module is consisted of a depthwise convolution
that acts separately on input channels, followed by a pointwise convolution
with 1*1 kernels that mixes channels. It is used for Xception:
https://arxiv.org/pdf/1610.02357.pdf
:param input: input layer.
:type input: LayerOutput
:param num_channels: the number of input channels.
:type num_channels: int
:param num_out_channels: the number of output channels.
:type num_out_channels: int
:param filter_size: the filter size for the depthwise convolution.
:type filter_size: int|tuple
:param stride: the stride size for the depthwise convolution.
:type stride: int|tuple
:param padding: the padding size for the depthwise convolution.
:type padding: int|tuple
:param depth_multiplier: the number of filter for one channel in the
depthwize convolution.
:type depth_multiplier: int
:param act: the activation function for the output.
:type act: BaseActivation
:param bias_attr: see img_conv_layer for details.
:type bias_attr: ParameterAttribute
:param param_attr: see img_conv_layer for details.
:type param_attr: ParameterAttribute
:param shared_bias: see img_conv_layer for details.
:type shared_bias: bool
:param layer_type: see img_conv_layer for details.
:type layer_type: bool
:return: layer's output
:rtype: LayerOutput
"""
__depthwise_conv__ = img_conv_layer(
name="%s_depthwise_conv" % name,
input=input,
num_channels=num_channels,
num_filters=num_channels * depth_multiplier,
groups=num_channels,
filter_size=filter_size,
stride=stride,
padding=padding,
act=LinearActivation(),
bias_attr=bias_attr,
param_attr=param_attr,
shared_biases=shared_bias,
layer_type=layer_type)
__pointwise_conv__ = img_conv_layer(
name="%s_pointwise_conv" % name,
input=__depthwise_conv__,
num_channels=num_channels * depth_multiplier,
num_filters=num_out_channels,
filter_size=1,
stride=1,
padding=0,
act=act,
bias_attr=bias_attr,
param_attr=param_attr,
shared_biases=shared_bias)
return __pointwise_conv__
def small_vgg(input_image, num_channels, num_classes): def small_vgg(input_image, num_channels, num_classes):
def __vgg__(ipt, num_filter, times, dropouts, num_channels_=None): def __vgg__(ipt, num_filter, times, dropouts, num_channels_=None):
return img_conv_group( return img_conv_group(
...@@ -648,7 +727,7 @@ def lstmemory_unit(input, ...@@ -648,7 +727,7 @@ def lstmemory_unit(input,
lstm_bias_attr=None, lstm_bias_attr=None,
lstm_layer_attr=None): lstm_layer_attr=None):
""" """
lstmemory_unit defines the caculation process of a LSTM unit during a lstmemory_unit defines the caculation process of a LSTM unit during a
single time step. This function is not a recurrent layer, so it can not be single time step. This function is not a recurrent layer, so it can not be
directly used to process sequence input. This function is always used in directly used to process sequence input. This function is always used in
recurrent_group (see layers.py for more details) to implement attention recurrent_group (see layers.py for more details) to implement attention
...@@ -869,7 +948,7 @@ def gru_unit(input, ...@@ -869,7 +948,7 @@ def gru_unit(input,
gru_layer_attr=None, gru_layer_attr=None,
naive=False): naive=False):
""" """
gru_unit defines the calculation process of a gated recurrent unit during a single gru_unit defines the calculation process of a gated recurrent unit during a single
time step. This function is not a recurrent layer, so it can not be time step. This function is not a recurrent layer, so it can not be
directly used to process sequence input. This function is always used in directly used to process sequence input. This function is always used in
the recurrent_group (see layers.py for more details) to implement attention the recurrent_group (see layers.py for more details) to implement attention
...@@ -1012,7 +1091,7 @@ def simple_gru(input, ...@@ -1012,7 +1091,7 @@ def simple_gru(input,
simple_gru in network.py. The reason why there are so many interfaces is simple_gru in network.py. The reason why there are so many interfaces is
that we have two ways to implement recurrent neural network. One way is to that we have two ways to implement recurrent neural network. One way is to
use one complete layer to implement rnn (including simple rnn, gru and lstm) use one complete layer to implement rnn (including simple rnn, gru and lstm)
with multiple time steps, such as recurrent_layer, lstmemory, grumemory. But with multiple time steps, such as recurrent_layer, lstmemory, grumemory. But
the multiplication operation :math:`W x_t` is not computed in these layers. the multiplication operation :math:`W x_t` is not computed in these layers.
See details in their interfaces in layers.py. See details in their interfaces in layers.py.
The other implementation is to use an recurrent group which can ensemble a The other implementation is to use an recurrent group which can ensemble a
...@@ -1116,7 +1195,7 @@ def simple_gru2(input, ...@@ -1116,7 +1195,7 @@ def simple_gru2(input,
:type act: BaseActivation :type act: BaseActivation
:param gate_act: gate activiation type of gru :param gate_act: gate activiation type of gru
:type gate_act: BaseActivation :type gate_act: BaseActivation
:param gru_bias_attr: bias parameter attribute of gru layer, :param gru_bias_attr: bias parameter attribute of gru layer,
False means no bias, None means default bias. False means no bias, None means default bias.
:type gru_bias_attr: ParameterAttribute|False|None :type gru_bias_attr: ParameterAttribute|False|None
:param gru_param_attr: param parameter attribute of gru layer, :param gru_param_attr: param parameter attribute of gru layer,
...@@ -1189,7 +1268,7 @@ def bidirectional_gru(input, ...@@ -1189,7 +1268,7 @@ def bidirectional_gru(input,
:type size: int :type size: int
:param return_seq: If set False, the last time step of output are :param return_seq: If set False, the last time step of output are
concatenated and returned. concatenated and returned.
If set True, the entire output sequences in forward If set True, the entire output sequences in forward
and backward directions are concatenated and returned. and backward directions are concatenated and returned.
:type return_seq: bool :type return_seq: bool
:return: LayerOutput object. :return: LayerOutput object.
...@@ -1278,7 +1357,7 @@ def bidirectional_lstm(input, ...@@ -1278,7 +1357,7 @@ def bidirectional_lstm(input,
:type size: int :type size: int
:param return_seq: If set False, the last time step of output are :param return_seq: If set False, the last time step of output are
concatenated and returned. concatenated and returned.
If set True, the entire output sequences in forward If set True, the entire output sequences in forward
and backward directions are concatenated and returned. and backward directions are concatenated and returned.
:type return_seq: bool :type return_seq: bool
:return: LayerOutput object. :return: LayerOutput object.
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册