diff --git a/doc/howto/deep_model/rnn/hierarchical_layer_cn.rst b/doc/howto/deep_model/rnn/hierarchical_layer_cn.rst index 79048e92482851af6c2dd7d055868ebcaa7a298b..e05173c2006ff47ecb6ca5a4fe1502de750acc59 100644 --- a/doc/howto/deep_model/rnn/hierarchical_layer_cn.rst +++ b/doc/howto/deep_model/rnn/hierarchical_layer_cn.rst @@ -28,17 +28,17 @@ pooling 的使用示例如下,详细见 :ref:`api_v2.layer_pooling` 配置API seq_pool = pooling(input=layer, pooling_type=pooling.Max(), - agg_level=AggregateLevel.EACH_SEQUENCE) + agg_level=AggregateLevel.TO_SEQUENCE) - `pooling_type` 目前支持两种,分别是:pooling.Max()和pooling.Avg()。 -- `agg_level=AggregateLevel.EACH_TIMESTEP` 时(默认值): +- `agg_level=AggregateLevel.TO_NO_SEQUENCE` 时(默认值): - 作用:双层序列经过运算变成一个0层序列,或单层序列经过运算变成一个0层序列 - 输入:一个双层序列,或一个单层序列 - 输出:一个0层序列,即整个输入序列(单层或双层)的平均值(或最大值) -- `agg_level=AggregateLevel.EACH_SEQUENCE` 时: +- `agg_level=AggregateLevel.TO_SEQUENCE` 时: - 作用:一个双层序列经过运算变成一个单层序列 - 输入:必须是一个双层序列 @@ -52,15 +52,15 @@ last_seq 的使用示例如下( :ref:`api_v2.layer_first_seq` 类似),详 .. code-block:: bash last = last_seq(input=layer, - agg_level=AggregateLevel.EACH_SEQUENCE) + agg_level=AggregateLevel.TO_SEQUENCE) -- `agg_level=AggregateLevel.EACH_TIMESTEP` 时(默认值): +- `agg_level=AggregateLevel.TO_NO_SEQUENCE` 时(默认值): - 作用:一个双层序列经过运算变成一个0层序列,或一个单层序列经过运算变成一个0层序列 - 输入:一个双层序列或一个单层序列 - 输出:一个0层序列,即整个输入序列(双层或者单层)最后一个,或第一个元素。 -- `agg_level=AggregateLevel.EACH_SEQUENCE` 时: +- `agg_level=AggregateLevel.TO_SEQUENCE` 时: - 作用:一个双层序列经过运算变成一个单层序列 - 输入:必须是一个双层序列 - 输出:一个单层序列,其中每个元素是双层序列中每个subseq最后一个(或第一个)元素。 @@ -74,9 +74,9 @@ expand 的使用示例如下,详细见 :ref:`api_v2.layer_expand` 配置API。 ex = expand(input=layer1, expand_as=layer2, - expand_level=ExpandLevel.FROM_TIMESTEP) + expand_level=ExpandLevel.FROM_NO_SEQUENCE) -- `expand_level=ExpandLevel.FROM_TIMESTEP` 时(默认值): +- `expand_level=ExpandLevel.FROM_NO_SEQUENCE` 时(默认值): - 作用:一个0层序列经过运算扩展成一个单层序列,或者一个双层序列 - 输入:layer1必须是一个0层序列,是待扩展的数据;layer2 可以是一个单层序列,或者是一个双层序列,提供扩展的长度信息 diff --git a/doc/howto/deep_model/rnn/hrnn_rnn_api_compare_cn.rst b/doc/howto/deep_model/rnn/hrnn_rnn_api_compare_cn.rst index 96e52b910a22576fd75c9d4e1bef6e2cf74bc84f..efdc44455ea4dc81a87b4d4fc8a81e78b15cb06a 100644 --- a/doc/howto/deep_model/rnn/hrnn_rnn_api_compare_cn.rst +++ b/doc/howto/deep_model/rnn/hrnn_rnn_api_compare_cn.rst @@ -81,7 +81,7 @@ * 在本例中,我们将原始数据的每一组,通过\ :code:`recurrent_group`\ 进行拆解,拆解成的每一句话再通过一个LSTM网络。这和单层RNN的配置是等价的。 -* 与单层RNN的配置类似,我们只需要使用LSTM encode成的最后一个向量。所以对\ :code:`recurrent_group`\ 进行了\ :code:`last_seq`\ 操作。但和单层RNN不同,我们是对每一个子序列取最后一个元素,因此\ :code:`agg_level=AggregateLevel.EACH_SEQUENCE`\ 。 +* 与单层RNN的配置类似,我们只需要使用LSTM encode成的最后一个向量。所以对\ :code:`recurrent_group`\ 进行了\ :code:`last_seq`\ 操作。但和单层RNN不同,我们是对每一个子序列取最后一个元素,因此\ :code:`agg_level=AggregateLevel.TO_SEQUENCE`\ 。 * 至此,\ :code:`lstm_last`\ 便和单层RNN配置中的\ :code:`lstm_last`\ 具有相同的结果了。 diff --git a/paddle/gserver/tests/sequence_nest_layer_group.conf b/paddle/gserver/tests/sequence_nest_layer_group.conf index c01b95f7a29ae73c2b3ccd5b56ad1d316cbc72ec..71ef53d08a2cea070806afb2c65ef15c4dd28f31 100644 --- a/paddle/gserver/tests/sequence_nest_layer_group.conf +++ b/paddle/gserver/tests/sequence_nest_layer_group.conf @@ -59,7 +59,7 @@ lstm_nest_group = recurrent_group( input=SubsequenceInput(emb_group), step=lstm_group, name="lstm_nest_group") # hasSubseq ->(seqlastins) seq lstm_last = last_seq( - input=lstm_nest_group, agg_level=AggregateLevel.EACH_SEQUENCE) + input=lstm_nest_group, agg_level=AggregateLevel.TO_SEQUENCE) # seq ->(expand) hasSubseq lstm_expand = expand_layer( @@ -71,7 +71,7 @@ lstm_expand = expand_layer( lstm_average = pooling_layer( input=lstm_expand, pooling_type=AvgPooling(), - agg_level=AggregateLevel.EACH_SEQUENCE) + agg_level=AggregateLevel.TO_SEQUENCE) with mixed_layer( size=label_dim, act=SoftmaxActivation(), bias_attr=True) as output: diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 27fba42c70a312845fd4ad867329c77485817f8f..81cce31feccf0f1b26888738c1a7e1e3504c7616 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -237,16 +237,19 @@ class AggregateLevel(object): Accordingly, AggregateLevel supports two modes: - - :code:`AggregateLevel.EACH_TIMESTEP` means the aggregation acts on each + - :code:`AggregateLevel.TO_NO_SEQUENCE` means the aggregation acts on each timestep of a sequence, both :code:`SUB_SEQUENCE` and :code:`SEQUENCE` will be aggregated to :code:`NO_SEQUENCE`. - - :code:`AggregateLevel.EACH_SEQUENCE` means the aggregation acts on each + - :code:`AggregateLevel.TO_SEQUENCE` means the aggregation acts on each sequence of a nested sequence, :code:`SUB_SEQUENCE` will be aggregated to :code:`SEQUENCE`. """ - EACH_TIMESTEP = 'non-seq' - EACH_SEQUENCE = 'seq' + TO_NO_SEQUENCE = 'non-seq' + TO_SEQUENCE = 'seq' + # compatible with previous configuration + EACH_TIMESTEP = TO_NO_SEQUENCE + EACH_SEQUENCE = TO_SEQUENCE class LayerOutput(object): @@ -1083,7 +1086,7 @@ def pooling_layer(input, pooling_type=None, name=None, bias_attr=None, - agg_level=AggregateLevel.EACH_TIMESTEP, + agg_level=AggregateLevel.TO_NO_SEQUENCE, layer_attr=None): """ Pooling layer for sequence inputs, not used for Image. @@ -1094,10 +1097,10 @@ def pooling_layer(input, seq_pool = pooling_layer(input=layer, pooling_type=AvgPooling(), - agg_level=AggregateLevel.EACH_SEQUENCE) + agg_level=AggregateLevel.TO_NO_SEQUENCE) - :param agg_level: AggregateLevel.EACH_TIMESTEP or - AggregateLevel.EACH_SEQUENCE + :param agg_level: AggregateLevel.TO_NO_SEQUENCE or + AggregateLevel.TO_SEQUENCE :type agg_level: AggregateLevel :param name: layer name. :type name: basestring @@ -1367,7 +1370,7 @@ def grumemory(input, @layer_support() def last_seq(input, name=None, - agg_level=AggregateLevel.EACH_TIMESTEP, + agg_level=AggregateLevel.TO_NO_SEQUENCE, stride=-1, layer_attr=None): """ @@ -1402,7 +1405,7 @@ def last_seq(input, " series information at all. Maybe you want to use" " first_seq instead.") - if agg_level == AggregateLevel.EACH_SEQUENCE: + if agg_level == AggregateLevel.TO_SEQUENCE: assert stride == -1 Layer( @@ -1423,7 +1426,7 @@ def last_seq(input, @layer_support() def first_seq(input, name=None, - agg_level=AggregateLevel.EACH_TIMESTEP, + agg_level=AggregateLevel.TO_NO_SEQUENCE, stride=-1, layer_attr=None): """ @@ -1459,7 +1462,7 @@ def first_seq(input, ' time series information at all. Maybe you want to use' ' last_seq instead.') - if agg_level == AggregateLevel.EACH_SEQUENCE: + if agg_level == AggregateLevel.TO_SEQUENCE: assert stride == -1 Layer( @@ -1482,16 +1485,18 @@ class ExpandLevel(object): ExpandLevel supports two modes: - - :code:`ExpandLevel.FROM_TIMESTEP` means the expandation acts on each - timestep of a sequence, :code:`NO_SEQUENCE` will be expanded to + - :code:`ExpandLevel.FROM_NO_SEQUENCE` means the expansion acts on + :code:`NO_SEQUENCE`, which will be expanded to :code:`SEQUENCE` or :code:`SUB_SEQUENCE`. - - :code:`ExpandLevel.FROM_SEQUENCE` means the expandation acts on each - sequence of a nested sequence, :code:`SEQUENCE` will be expanded to + - :code:`ExpandLevel.FROM_SEQUENCE` means the expansion acts on + :code:`SEQUENCE`, which will be expanded to :code:`SUB_SEQUENCE`. """ - FROM_TIMESTEP = AggregateLevel.EACH_TIMESTEP - FROM_SEQUENCE = AggregateLevel.EACH_SEQUENCE + FROM_NO_SEQUENCE = AggregateLevel.TO_NO_SEQUENCE + FROM_SEQUENCE = AggregateLevel.TO_SEQUENCE + # compatible with previous configuration + FROM_TIMESTEP = FROM_NO_SEQUENCE @wrap_name_default() @@ -1500,7 +1505,7 @@ def expand_layer(input, expand_as, name=None, bias_attr=False, - expand_level=ExpandLevel.FROM_TIMESTEP, + expand_level=ExpandLevel.FROM_NO_SEQUENCE, layer_attr=None): """ A layer for "Expand Dense data or (sequence data where the length of each @@ -1512,7 +1517,7 @@ def expand_layer(input, expand = expand_layer(input=layer1, expand_as=layer2, - expand_level=ExpandLevel.FROM_TIMESTEP) + expand_level=ExpandLevel.FROM_NO_SEQUENCE) :param input: Input layer :type input: LayerOutput diff --git a/python/paddle/trainer_config_helpers/tests/configs/last_first_seq.py b/python/paddle/trainer_config_helpers/tests/configs/last_first_seq.py index 3c6dbc95e54898ca1e44c3dc010c9fb73a3bee30..f87237f9b59a833825841bcdd605c2332c2d5941 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/last_first_seq.py +++ b/python/paddle/trainer_config_helpers/tests/configs/last_first_seq.py @@ -6,7 +6,7 @@ din = data_layer(name='data', size=30) seq_op = [first_seq, last_seq] -agg_level = [AggregateLevel.EACH_SEQUENCE, AggregateLevel.EACH_TIMESTEP] +agg_level = [AggregateLevel.TO_SEQUENCE, AggregateLevel.TO_NO_SEQUENCE] opts = [] @@ -15,6 +15,7 @@ for op in seq_op: opts.append(op(input=din, agg_level=al)) for op in seq_op: - opts.append(op(input=din, agg_level=AggregateLevel.EACH_TIMESTEP, stride=5)) + opts.append( + op(input=din, agg_level=AggregateLevel.TO_NO_SEQUENCE, stride=5)) outputs(opts) diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_expand_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_expand_layer.py index 81e5161ebc165f13ebb919fd3c0fe617167be048..c53f10e0a410b27d86b2415d98178c4790e0b0ba 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/test_expand_layer.py +++ b/python/paddle/trainer_config_helpers/tests/configs/test_expand_layer.py @@ -9,4 +9,6 @@ outputs( expand_layer( input=din, expand_as=data_seq, expand_level=ExpandLevel.FROM_SEQUENCE), expand_layer( - input=din, expand_as=data_seq, expand_level=ExpandLevel.FROM_TIMESTEP)) + input=din, + expand_as=data_seq, + expand_level=ExpandLevel.FROM_NO_SEQUENCE)) diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_sequence_pooling.py b/python/paddle/trainer_config_helpers/tests/configs/test_sequence_pooling.py index f67b6364d88560dfedb07428869482795be6af0c..3c49eb56c1363a6a3f365fe56e16a8b484c8a004 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/test_sequence_pooling.py +++ b/python/paddle/trainer_config_helpers/tests/configs/test_sequence_pooling.py @@ -6,7 +6,7 @@ din = data_layer(name='dat_in', size=100) POOL_TYPE = [MaxPooling, AvgPooling, SumPooling] -AGG_LEVEL = [AggregateLevel.EACH_SEQUENCE, AggregateLevel.EACH_TIMESTEP] +AGG_LEVEL = [AggregateLevel.TO_SEQUENCE, AggregateLevel.TO_NO_SEQUENCE] opts = [] diff --git a/python/paddle/v2/tests/test_layer.py b/python/paddle/v2/tests/test_layer.py index 341da1c8520131645e5cd2ad64a6e1ab50458d2c..2d25b1a9dcaa12a7dfffe962ffab34edc0a95f1a 100644 --- a/python/paddle/v2/tests/test_layer.py +++ b/python/paddle/v2/tests/test_layer.py @@ -73,7 +73,7 @@ class AggregateLayerTest(unittest.TestCase): pool = layer.pooling( input=pixel, pooling_type=pooling.Avg(), - agg_level=layer.AggregateLevel.EACH_SEQUENCE) + agg_level=layer.AggregateLevel.TO_SEQUENCE) last_seq = layer.last_seq(input=pixel) first_seq = layer.first_seq(input=pixel) concat = layer.concat(input=[last_seq, first_seq]) @@ -109,7 +109,7 @@ class ReshapeLayerTest(unittest.TestCase): expand = layer.expand( input=weight, expand_as=pixel, - expand_level=layer.ExpandLevel.FROM_TIMESTEP) + expand_level=layer.ExpandLevel.FROM_NO_SEQUENCE) repeat = layer.repeat(input=pixel, num_repeats=4) reshape = layer.seq_reshape(input=pixel, reshape_size=4) rotate = layer.rotate(input=pixel, height=16, width=49)