From 226f810352db3211bdecaece8be1e189cbbba713 Mon Sep 17 00:00:00 2001 From: xuwei06 Date: Fri, 16 Jun 2017 09:59:41 -0700 Subject: [PATCH] Add activiation for repeat_layer Also remove active_type argument for many layers in config_parser.py because it is automatically handled by LayerBase. --- python/paddle/trainer/config_parser.py | 57 ++++--------------- .../paddle/trainer_config_helpers/layers.py | 12 +++- .../tests/configs/file_list.sh | 2 +- .../configs/protostr/last_first_seq.protostr | 12 ++-- .../configs/protostr/shared_gru.protostr | 4 +- .../configs/protostr/shared_lstm.protostr | 4 +- .../protostr/simple_rnn_layers.protostr | 12 ++-- .../protostr/test_repeat_layer.protostr | 42 ++++++++++++++ .../configs/protostr/test_rnn_group.protostr | 12 ++-- .../protostr/test_seq_concat_reshape.protostr | 2 +- .../protostr/test_sequence_pooling.protostr | 14 ++--- 11 files changed, 94 insertions(+), 79 deletions(-) create mode 100644 python/paddle/trainer_config_helpers/tests/configs/protostr/test_repeat_layer.protostr diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index 8e3c3241623..86f091ab59d 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -1949,7 +1949,6 @@ class BatchNormLayer(LayerBase): def __init__(self, name, inputs, - active_type="linear", bias=True, use_global_stats=True, moving_average_fraction=0.9, @@ -1987,12 +1986,7 @@ class BatchNormLayer(LayerBase): cudnn_version >= 4007 self.layer_type = "cudnn_batch_norm" if use_cudnn else "batch_norm" super(BatchNormLayer, self).__init__( - name, - self.layer_type, - 0, - active_type=active_type, - inputs=inputs, - **xargs) + name, self.layer_type, 0, inputs=inputs, **xargs) if use_global_stats is not None: self.config.use_global_stats = use_global_stats @@ -2431,12 +2425,12 @@ class FeatMapExpandLayer(LayerBase): def __init__(self, name, inputs, - device=None, num_filters=None, as_row_vector=True, - bias=False): + bias=False, + **xargs): super(FeatMapExpandLayer, self).__init__( - name, 'featmap_expand', 0, inputs=inputs, device=device) + name, 'featmap_expand', 0, inputs=inputs, **xargs) config_assert( len(self.inputs) == 1, 'ExpandLayer takes 1 and only 1 inputs') if num_filters is not None: @@ -2454,14 +2448,12 @@ class MaxLayer(LayerBase): name, inputs, trans_type='non-seq', - active_type='linear', bias=False, output_max_index=None, **xargs): super(MaxLayer, self).__init__(name, 'max', 0, inputs=inputs, **xargs) config_assert(len(self.inputs) == 1, 'MaxLayer must have 1 input') self.config.trans_type = trans_type - self.config.active_type = active_type for input_index in xrange(len(self.inputs)): input_layer = self.get_input_layer(input_index) self.set_layer_size(input_layer.size) @@ -2503,18 +2495,12 @@ class SequenceLastInstanceLayer(LayerBase): def __init__(self, name, inputs, - active_type='linear', trans_type='non-seq', bias=False, stride=-1, **xargs): super(SequenceLastInstanceLayer, self).__init__( - name, - 'seqlastins', - 0, - inputs=inputs, - active_type=active_type, - **xargs) + name, 'seqlastins', 0, inputs=inputs, **xargs) config_assert( len(inputs) == 1, 'SequenceLastInstanceLayer must have 1 input') if trans_type == 'seq': @@ -2530,7 +2516,6 @@ class SequenceFirstInstanceLayer(SequenceLastInstanceLayer): def __init__(self, name, inputs, - active_type='linear', trans_type='non-seq', bias=False, stride=-1, @@ -2538,7 +2523,6 @@ class SequenceFirstInstanceLayer(SequenceLastInstanceLayer): super(SequenceFirstInstanceLayer, self).__init__( name, inputs=inputs, - active_type=active_type, trans_type=trans_type, bias=bias, stride=stride, @@ -2548,14 +2532,9 @@ class SequenceFirstInstanceLayer(SequenceLastInstanceLayer): @config_layer('seqconcat') class SequenceConcatLayer(LayerBase): - def __init__(self, name, inputs, active_type='linear', bias=False, **xargs): + def __init__(self, name, inputs, bias=False, **xargs): super(SequenceConcatLayer, self).__init__( - name, - 'seqconcat', - 0, - inputs=inputs, - active_type=active_type, - **xargs) + name, 'seqconcat', 0, inputs=inputs, **xargs) config_assert( len(inputs) == 2, 'SequenceConcatLayer must have 2 inputs') for input_index in xrange(len(self.inputs)): @@ -2566,20 +2545,9 @@ class SequenceConcatLayer(LayerBase): @config_layer('seqreshape') class SequenceReshapeLayer(LayerBase): - def __init__(self, - name, - size, - inputs, - active_type='linear', - bias=False, - **xargs): + def __init__(self, name, size, inputs, bias=False, **xargs): super(SequenceReshapeLayer, self).__init__( - name, - 'seqreshape', - size, - inputs=inputs, - active_type=active_type, - **xargs) + name, 'seqreshape', size, inputs=inputs, **xargs) config_assert( len(inputs) == 1, 'SequenceReshapeLayer must have 1 inputs') self.set_layer_size(size) @@ -2588,9 +2556,9 @@ class SequenceReshapeLayer(LayerBase): @config_layer('subseq') class SubSequenceLayer(LayerBase): - def __init__(self, name, inputs, active_type='linear', bias=False, **xargs): + def __init__(self, name, inputs, bias=False, **xargs): super(SubSequenceLayer, self).__init__( - name, 'subseq', 0, inputs=inputs, active_type=active_type, **xargs) + name, 'subseq', 0, inputs=inputs, **xargs) config_assert(len(inputs) == 3, 'SubSequenceLayer must have 3 inputs') input_layer0 = self.get_input_layer(0) size = input_layer0.size @@ -2746,11 +2714,10 @@ class AverageLayer(LayerBase): inputs, average_strategy='average', trans_type='non-seq', - active_type='linear', bias=False, **xargs): super(AverageLayer, self).__init__( - name, 'average', 0, inputs=inputs, active_type=active_type, **xargs) + name, 'average', 0, inputs=inputs, **xargs) self.config.average_strategy = average_strategy self.config.trans_type = trans_type config_assert(len(inputs) == 1, 'AverageLayer must have 1 input') diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index f84b883bc2e..caa474e679a 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -1565,10 +1565,12 @@ def expand_layer(input, @wrap_name_default() +@wrap_act_default(act=IdentityActivation()) @layer_support() def repeat_layer(input, num_repeats, as_row_vector=True, + act=None, name=None, layer_attr=None): """ @@ -1599,6 +1601,8 @@ def repeat_layer(input, False for treating input as column vector and repeating in the row direction. :type as_row_vector: bool + :param act: Activation type. + :type act: BaseActivation :type name: basestring :param layer_attr: extra layer attributes. :type layer_attr: ExtraLayerAttribute. @@ -1609,6 +1613,7 @@ def repeat_layer(input, l = Layer( inputs=[input.name], name=name, + active_type=act.name, num_filters=num_repeats, as_row_vector=as_row_vector, type=LayerType.FEATURE_MAP_EXPAND_LAYER, @@ -1617,6 +1622,7 @@ def repeat_layer(input, name=name, size=l.config.size, layer_type=LayerType.FEATURE_MAP_EXPAND_LAYER, + activation=act, parents=[input]) @@ -2873,7 +2879,7 @@ def seq_concat_layer(a, b, act=None, name=None, layer_attr=None, .. code-block:: python - concat = seq_concat_layer(al=layer1, b=layer2) + concat = seq_concat_layer(a=layer1, b=layer2) :param name: Layer name. :type name: basestring @@ -5625,13 +5631,13 @@ def row_conv_layer(input, to deploy in an online and low-latency setting. The lookahead convolution incorporates information from future subsequences in a computationally efficient manner to improve unidirectional recurrent neural networks. - + The connection of row convolution is different form the 1D sequence convolution. Assumed that, the future context-length is k, that is to say, it can get the output at timestep t by using the the input feature from t-th timestep to (t+k+1)-th timestep. Assumed that the hidden dim of input activations are d, the activations r_t for the new layer at time-step t are: - + .. math:: r_{t,r} = \sum_{j=1}^{k + 1} {w_{i,j}h_{t+j-1, i}} diff --git a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh index c24102255f5..c0e87d6de37 100755 --- a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh +++ b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh @@ -1,5 +1,5 @@ #!/bin/bash -export configs=(test_fc layer_activations projections test_print_layer +export configs=(test_repeat_layer test_fc layer_activations projections test_print_layer test_sequence_pooling test_lstmemory_layer test_grumemory_layer last_first_seq test_expand_layer test_ntm_layers test_hsigmoid img_layers img_trans_layers util_layers simple_rnn_layers unused_layers test_cost_layers diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/last_first_seq.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/last_first_seq.protostr index 12b2255f3a4..fee0f8e462b 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/last_first_seq.protostr +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/last_first_seq.protostr @@ -9,7 +9,7 @@ layers { name: "__first_seq_0__" type: "seqlastins" size: 30 - active_type: "linear" + active_type: "" inputs { input_layer_name: "data" } @@ -21,7 +21,7 @@ layers { name: "__first_seq_1__" type: "seqlastins" size: 30 - active_type: "linear" + active_type: "" inputs { input_layer_name: "data" } @@ -33,7 +33,7 @@ layers { name: "__last_seq_0__" type: "seqlastins" size: 30 - active_type: "linear" + active_type: "" inputs { input_layer_name: "data" } @@ -44,7 +44,7 @@ layers { name: "__last_seq_1__" type: "seqlastins" size: 30 - active_type: "linear" + active_type: "" inputs { input_layer_name: "data" } @@ -55,7 +55,7 @@ layers { name: "__first_seq_2__" type: "seqlastins" size: 30 - active_type: "linear" + active_type: "" inputs { input_layer_name: "data" } @@ -67,7 +67,7 @@ layers { name: "__last_seq_2__" type: "seqlastins" size: 30 - active_type: "linear" + active_type: "" inputs { input_layer_name: "data" } diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_gru.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_gru.protostr index 64530146a14..712887447d9 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_gru.protostr +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_gru.protostr @@ -123,7 +123,7 @@ layers { name: "__last_seq_0__" type: "seqlastins" size: 200 - active_type: "linear" + active_type: "" inputs { input_layer_name: "__simple_gru_0__" } @@ -134,7 +134,7 @@ layers { name: "__last_seq_1__" type: "seqlastins" size: 200 - active_type: "linear" + active_type: "" inputs { input_layer_name: "__simple_gru_1__" } diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_lstm.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_lstm.protostr index 79fa4c74f08..b2a00ef225c 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_lstm.protostr +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_lstm.protostr @@ -205,7 +205,7 @@ layers { name: "__last_seq_0__" type: "seqlastins" size: 100 - active_type: "linear" + active_type: "" inputs { input_layer_name: "__lstm_group_0__" } @@ -216,7 +216,7 @@ layers { name: "__last_seq_1__" type: "seqlastins" size: 100 - active_type: "linear" + active_type: "" inputs { input_layer_name: "__lstm_group_1__" } diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/simple_rnn_layers.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/simple_rnn_layers.protostr index 68fa881b4f1..0d51f70ee01 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/simple_rnn_layers.protostr +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/simple_rnn_layers.protostr @@ -138,7 +138,7 @@ layers { name: "__last_seq_0__" type: "seqlastins" size: 200 - active_type: "linear" + active_type: "" inputs { input_layer_name: "__recurrent_layer_0__" } @@ -149,7 +149,7 @@ layers { name: "__first_seq_0__" type: "seqlastins" size: 200 - active_type: "linear" + active_type: "" inputs { input_layer_name: "__recurrent_layer_1__" } @@ -161,7 +161,7 @@ layers { name: "__last_seq_1__" type: "seqlastins" size: 200 - active_type: "linear" + active_type: "" inputs { input_layer_name: "__lstmemory_0__" } @@ -172,7 +172,7 @@ layers { name: "__first_seq_1__" type: "seqlastins" size: 200 - active_type: "linear" + active_type: "" inputs { input_layer_name: "__lstmemory_1__" } @@ -184,7 +184,7 @@ layers { name: "__last_seq_2__" type: "seqlastins" size: 200 - active_type: "linear" + active_type: "" inputs { input_layer_name: "__gru_0__" } @@ -195,7 +195,7 @@ layers { name: "__first_seq_2__" type: "seqlastins" size: 200 - active_type: "linear" + active_type: "" inputs { input_layer_name: "__gru_1__" } diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_repeat_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_repeat_layer.protostr new file mode 100644 index 00000000000..e012386ff95 --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_repeat_layer.protostr @@ -0,0 +1,42 @@ +type: "nn" +layers { + name: "data" + type: "data" + size: 30 + active_type: "" +} +layers { + name: "__repeat_layer_0__" + type: "featmap_expand" + size: 300 + active_type: "" + inputs { + input_layer_name: "data" + } + num_filters: 10 +} +layers { + name: "__repeat_layer_1__" + type: "featmap_expand" + size: 300 + active_type: "tanh" + inputs { + input_layer_name: "data" + } + num_filters: 10 + user_arg: "as_col_vec" +} +input_layer_names: "data" +output_layer_names: "__repeat_layer_0__" +output_layer_names: "__repeat_layer_1__" +sub_models { + name: "root" + layer_names: "data" + layer_names: "__repeat_layer_0__" + layer_names: "__repeat_layer_1__" + input_layer_names: "data" + output_layer_names: "__repeat_layer_0__" + output_layer_names: "__repeat_layer_1__" + is_recurrent_layer_group: false +} + diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_rnn_group.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_rnn_group.protostr index 77b447aa9db..3a3e2c49398 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_rnn_group.protostr +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_rnn_group.protostr @@ -91,7 +91,7 @@ layers { name: "__last_seq_0__" type: "seqlastins" size: 200 - active_type: "linear" + active_type: "" inputs { input_layer_name: "rnn_forward" } @@ -140,7 +140,7 @@ layers { name: "__first_seq_0__" type: "seqlastins" size: 200 - active_type: "linear" + active_type: "" inputs { input_layer_name: "rnn_back" } @@ -190,7 +190,7 @@ layers { name: "__last_seq_1__" type: "seqlastins" size: 200 - active_type: "linear" + active_type: "" inputs { input_layer_name: "rnn_subseq_forward" } @@ -280,7 +280,7 @@ layers { name: "__last_seq_2__" type: "seqlastins" size: 100 - active_type: "linear" + active_type: "" inputs { input_layer_name: "__lstm_group_0__" } @@ -329,7 +329,7 @@ layers { name: "__last_seq_3__" type: "seqlastins" size: 100 - active_type: "linear" + active_type: "" inputs { input_layer_name: "__gru_group_0__" } @@ -378,7 +378,7 @@ layers { name: "__last_seq_4__" type: "seqlastins" size: 200 - active_type: "linear" + active_type: "" inputs { input_layer_name: "__fc_layer_0__" } diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_seq_concat_reshape.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_seq_concat_reshape.protostr index 91284b4fb32..9d1b41c9d55 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_seq_concat_reshape.protostr +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_seq_concat_reshape.protostr @@ -27,7 +27,7 @@ layers { name: "__seqreshape_0__" type: "seqreshape" size: 5 - active_type: "linear" + active_type: "" inputs { input_layer_name: "data1" } diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_sequence_pooling.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_sequence_pooling.protostr index 1999c006d23..5a217f5544a 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_sequence_pooling.protostr +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_sequence_pooling.protostr @@ -9,7 +9,7 @@ layers { name: "__seq_pooling_0__" type: "max" size: 100 - active_type: "linear" + active_type: "" inputs { input_layer_name: "dat_in" } @@ -19,7 +19,7 @@ layers { name: "__seq_pooling_1__" type: "max" size: 100 - active_type: "linear" + active_type: "" inputs { input_layer_name: "dat_in" } @@ -29,7 +29,7 @@ layers { name: "__seq_pooling_2__" type: "average" size: 100 - active_type: "linear" + active_type: "" inputs { input_layer_name: "dat_in" } @@ -40,7 +40,7 @@ layers { name: "__seq_pooling_3__" type: "average" size: 100 - active_type: "linear" + active_type: "" inputs { input_layer_name: "dat_in" } @@ -51,7 +51,7 @@ layers { name: "__seq_pooling_4__" type: "average" size: 100 - active_type: "linear" + active_type: "" inputs { input_layer_name: "dat_in" } @@ -62,7 +62,7 @@ layers { name: "__seq_pooling_5__" type: "average" size: 100 - active_type: "linear" + active_type: "" inputs { input_layer_name: "dat_in" } @@ -73,7 +73,7 @@ layers { name: "__seq_pooling_6__" type: "max" size: 100 - active_type: "linear" + active_type: "" inputs { input_layer_name: "dat_in" } -- GitLab