From 226f810352db3211bdecaece8be1e189cbbba713 Mon Sep 17 00:00:00 2001
From: xuwei06 <xuwei06@baidu.com>
Date: Fri, 16 Jun 2017 09:59:41 -0700
Subject: [PATCH] Add activiation for repeat_layer

Also remove active_type argument for many layers in config_parser.py because it is automatically
handled by LayerBase.
---
 python/paddle/trainer/config_parser.py        | 57 ++++---------------
 .../paddle/trainer_config_helpers/layers.py   | 12 +++-
 .../tests/configs/file_list.sh                |  2 +-
 .../configs/protostr/last_first_seq.protostr  | 12 ++--
 .../configs/protostr/shared_gru.protostr      |  4 +-
 .../configs/protostr/shared_lstm.protostr     |  4 +-
 .../protostr/simple_rnn_layers.protostr       | 12 ++--
 .../protostr/test_repeat_layer.protostr       | 42 ++++++++++++++
 .../configs/protostr/test_rnn_group.protostr  | 12 ++--
 .../protostr/test_seq_concat_reshape.protostr |  2 +-
 .../protostr/test_sequence_pooling.protostr   | 14 ++---
 11 files changed, 94 insertions(+), 79 deletions(-)
 create mode 100644 python/paddle/trainer_config_helpers/tests/configs/protostr/test_repeat_layer.protostr

diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py
index 8e3c3241623..86f091ab59d 100644
--- a/python/paddle/trainer/config_parser.py
+++ b/python/paddle/trainer/config_parser.py
@@ -1949,7 +1949,6 @@ class BatchNormLayer(LayerBase):
     def __init__(self,
                  name,
                  inputs,
-                 active_type="linear",
                  bias=True,
                  use_global_stats=True,
                  moving_average_fraction=0.9,
@@ -1987,12 +1986,7 @@ class BatchNormLayer(LayerBase):
             cudnn_version >= 4007
         self.layer_type = "cudnn_batch_norm" if use_cudnn else "batch_norm"
         super(BatchNormLayer, self).__init__(
-            name,
-            self.layer_type,
-            0,
-            active_type=active_type,
-            inputs=inputs,
-            **xargs)
+            name, self.layer_type, 0, inputs=inputs, **xargs)
 
         if use_global_stats is not None:
             self.config.use_global_stats = use_global_stats
@@ -2431,12 +2425,12 @@ class FeatMapExpandLayer(LayerBase):
     def __init__(self,
                  name,
                  inputs,
-                 device=None,
                  num_filters=None,
                  as_row_vector=True,
-                 bias=False):
+                 bias=False,
+                 **xargs):
         super(FeatMapExpandLayer, self).__init__(
-            name, 'featmap_expand', 0, inputs=inputs, device=device)
+            name, 'featmap_expand', 0, inputs=inputs, **xargs)
         config_assert(
             len(self.inputs) == 1, 'ExpandLayer takes 1 and only 1 inputs')
         if num_filters is not None:
@@ -2454,14 +2448,12 @@ class MaxLayer(LayerBase):
                  name,
                  inputs,
                  trans_type='non-seq',
-                 active_type='linear',
                  bias=False,
                  output_max_index=None,
                  **xargs):
         super(MaxLayer, self).__init__(name, 'max', 0, inputs=inputs, **xargs)
         config_assert(len(self.inputs) == 1, 'MaxLayer must have 1 input')
         self.config.trans_type = trans_type
-        self.config.active_type = active_type
         for input_index in xrange(len(self.inputs)):
             input_layer = self.get_input_layer(input_index)
             self.set_layer_size(input_layer.size)
@@ -2503,18 +2495,12 @@ class SequenceLastInstanceLayer(LayerBase):
     def __init__(self,
                  name,
                  inputs,
-                 active_type='linear',
                  trans_type='non-seq',
                  bias=False,
                  stride=-1,
                  **xargs):
         super(SequenceLastInstanceLayer, self).__init__(
-            name,
-            'seqlastins',
-            0,
-            inputs=inputs,
-            active_type=active_type,
-            **xargs)
+            name, 'seqlastins', 0, inputs=inputs, **xargs)
         config_assert(
             len(inputs) == 1, 'SequenceLastInstanceLayer must have 1 input')
         if trans_type == 'seq':
@@ -2530,7 +2516,6 @@ class SequenceFirstInstanceLayer(SequenceLastInstanceLayer):
     def __init__(self,
                  name,
                  inputs,
-                 active_type='linear',
                  trans_type='non-seq',
                  bias=False,
                  stride=-1,
@@ -2538,7 +2523,6 @@ class SequenceFirstInstanceLayer(SequenceLastInstanceLayer):
         super(SequenceFirstInstanceLayer, self).__init__(
             name,
             inputs=inputs,
-            active_type=active_type,
             trans_type=trans_type,
             bias=bias,
             stride=stride,
@@ -2548,14 +2532,9 @@ class SequenceFirstInstanceLayer(SequenceLastInstanceLayer):
 
 @config_layer('seqconcat')
 class SequenceConcatLayer(LayerBase):
-    def __init__(self, name, inputs, active_type='linear', bias=False, **xargs):
+    def __init__(self, name, inputs, bias=False, **xargs):
         super(SequenceConcatLayer, self).__init__(
-            name,
-            'seqconcat',
-            0,
-            inputs=inputs,
-            active_type=active_type,
-            **xargs)
+            name, 'seqconcat', 0, inputs=inputs, **xargs)
         config_assert(
             len(inputs) == 2, 'SequenceConcatLayer must have 2 inputs')
         for input_index in xrange(len(self.inputs)):
@@ -2566,20 +2545,9 @@ class SequenceConcatLayer(LayerBase):
 
 @config_layer('seqreshape')
 class SequenceReshapeLayer(LayerBase):
-    def __init__(self,
-                 name,
-                 size,
-                 inputs,
-                 active_type='linear',
-                 bias=False,
-                 **xargs):
+    def __init__(self, name, size, inputs, bias=False, **xargs):
         super(SequenceReshapeLayer, self).__init__(
-            name,
-            'seqreshape',
-            size,
-            inputs=inputs,
-            active_type=active_type,
-            **xargs)
+            name, 'seqreshape', size, inputs=inputs, **xargs)
         config_assert(
             len(inputs) == 1, 'SequenceReshapeLayer must have 1 inputs')
         self.set_layer_size(size)
@@ -2588,9 +2556,9 @@ class SequenceReshapeLayer(LayerBase):
 
 @config_layer('subseq')
 class SubSequenceLayer(LayerBase):
-    def __init__(self, name, inputs, active_type='linear', bias=False, **xargs):
+    def __init__(self, name, inputs, bias=False, **xargs):
         super(SubSequenceLayer, self).__init__(
-            name, 'subseq', 0, inputs=inputs, active_type=active_type, **xargs)
+            name, 'subseq', 0, inputs=inputs, **xargs)
         config_assert(len(inputs) == 3, 'SubSequenceLayer must have 3 inputs')
         input_layer0 = self.get_input_layer(0)
         size = input_layer0.size
@@ -2746,11 +2714,10 @@ class AverageLayer(LayerBase):
                  inputs,
                  average_strategy='average',
                  trans_type='non-seq',
-                 active_type='linear',
                  bias=False,
                  **xargs):
         super(AverageLayer, self).__init__(
-            name, 'average', 0, inputs=inputs, active_type=active_type, **xargs)
+            name, 'average', 0, inputs=inputs, **xargs)
         self.config.average_strategy = average_strategy
         self.config.trans_type = trans_type
         config_assert(len(inputs) == 1, 'AverageLayer must have 1 input')
diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py
index f84b883bc2e..caa474e679a 100755
--- a/python/paddle/trainer_config_helpers/layers.py
+++ b/python/paddle/trainer_config_helpers/layers.py
@@ -1565,10 +1565,12 @@ def expand_layer(input,
 
 
 @wrap_name_default()
+@wrap_act_default(act=IdentityActivation())
 @layer_support()
 def repeat_layer(input,
                  num_repeats,
                  as_row_vector=True,
+                 act=None,
                  name=None,
                  layer_attr=None):
     """
@@ -1599,6 +1601,8 @@ def repeat_layer(input,
                           False for treating input as column vector and repeating
                           in the row direction.
     :type as_row_vector: bool
+    :param act: Activation type.
+    :type act: BaseActivation
     :type name: basestring
     :param layer_attr: extra layer attributes.
     :type layer_attr: ExtraLayerAttribute.
@@ -1609,6 +1613,7 @@ def repeat_layer(input,
     l = Layer(
         inputs=[input.name],
         name=name,
+        active_type=act.name,
         num_filters=num_repeats,
         as_row_vector=as_row_vector,
         type=LayerType.FEATURE_MAP_EXPAND_LAYER,
@@ -1617,6 +1622,7 @@ def repeat_layer(input,
         name=name,
         size=l.config.size,
         layer_type=LayerType.FEATURE_MAP_EXPAND_LAYER,
+        activation=act,
         parents=[input])
 
 
@@ -2873,7 +2879,7 @@ def seq_concat_layer(a, b, act=None, name=None, layer_attr=None,
 
     ..  code-block:: python
 
-        concat = seq_concat_layer(al=layer1, b=layer2)
+        concat = seq_concat_layer(a=layer1, b=layer2)
 
     :param name: Layer name.
     :type name: basestring
@@ -5625,13 +5631,13 @@ def row_conv_layer(input,
     to deploy in an online and low-latency setting. The lookahead convolution
     incorporates information from future subsequences in a computationally
     efficient manner to improve unidirectional recurrent neural networks.
- 
+
     The connection of row convolution is different form the 1D sequence
     convolution. Assumed that, the future context-length is k, that is to say,
     it can get the output at timestep t by using the the input feature from t-th
     timestep to (t+k+1)-th timestep. Assumed that the hidden dim of input
     activations are d, the activations r_t for the new layer at time-step t are:
- 
+
     .. math::
 
         r_{t,r} = \sum_{j=1}^{k + 1} {w_{i,j}h_{t+j-1, i}}
diff --git a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh
index c24102255f5..c0e87d6de37 100755
--- a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh
+++ b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-export configs=(test_fc layer_activations projections test_print_layer
+export configs=(test_repeat_layer test_fc layer_activations projections test_print_layer
 test_sequence_pooling test_lstmemory_layer test_grumemory_layer
 last_first_seq test_expand_layer test_ntm_layers test_hsigmoid
 img_layers img_trans_layers util_layers simple_rnn_layers unused_layers test_cost_layers
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/last_first_seq.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/last_first_seq.protostr
index 12b2255f3a4..fee0f8e462b 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/last_first_seq.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/last_first_seq.protostr
@@ -9,7 +9,7 @@ layers {
   name: "__first_seq_0__"
   type: "seqlastins"
   size: 30
-  active_type: "linear"
+  active_type: ""
   inputs {
     input_layer_name: "data"
   }
@@ -21,7 +21,7 @@ layers {
   name: "__first_seq_1__"
   type: "seqlastins"
   size: 30
-  active_type: "linear"
+  active_type: ""
   inputs {
     input_layer_name: "data"
   }
@@ -33,7 +33,7 @@ layers {
   name: "__last_seq_0__"
   type: "seqlastins"
   size: 30
-  active_type: "linear"
+  active_type: ""
   inputs {
     input_layer_name: "data"
   }
@@ -44,7 +44,7 @@ layers {
   name: "__last_seq_1__"
   type: "seqlastins"
   size: 30
-  active_type: "linear"
+  active_type: ""
   inputs {
     input_layer_name: "data"
   }
@@ -55,7 +55,7 @@ layers {
   name: "__first_seq_2__"
   type: "seqlastins"
   size: 30
-  active_type: "linear"
+  active_type: ""
   inputs {
     input_layer_name: "data"
   }
@@ -67,7 +67,7 @@ layers {
   name: "__last_seq_2__"
   type: "seqlastins"
   size: 30
-  active_type: "linear"
+  active_type: ""
   inputs {
     input_layer_name: "data"
   }
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_gru.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_gru.protostr
index 64530146a14..712887447d9 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_gru.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_gru.protostr
@@ -123,7 +123,7 @@ layers {
   name: "__last_seq_0__"
   type: "seqlastins"
   size: 200
-  active_type: "linear"
+  active_type: ""
   inputs {
     input_layer_name: "__simple_gru_0__"
   }
@@ -134,7 +134,7 @@ layers {
   name: "__last_seq_1__"
   type: "seqlastins"
   size: 200
-  active_type: "linear"
+  active_type: ""
   inputs {
     input_layer_name: "__simple_gru_1__"
   }
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_lstm.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_lstm.protostr
index 79fa4c74f08..b2a00ef225c 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_lstm.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/shared_lstm.protostr
@@ -205,7 +205,7 @@ layers {
   name: "__last_seq_0__"
   type: "seqlastins"
   size: 100
-  active_type: "linear"
+  active_type: ""
   inputs {
     input_layer_name: "__lstm_group_0__"
   }
@@ -216,7 +216,7 @@ layers {
   name: "__last_seq_1__"
   type: "seqlastins"
   size: 100
-  active_type: "linear"
+  active_type: ""
   inputs {
     input_layer_name: "__lstm_group_1__"
   }
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/simple_rnn_layers.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/simple_rnn_layers.protostr
index 68fa881b4f1..0d51f70ee01 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/simple_rnn_layers.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/simple_rnn_layers.protostr
@@ -138,7 +138,7 @@ layers {
   name: "__last_seq_0__"
   type: "seqlastins"
   size: 200
-  active_type: "linear"
+  active_type: ""
   inputs {
     input_layer_name: "__recurrent_layer_0__"
   }
@@ -149,7 +149,7 @@ layers {
   name: "__first_seq_0__"
   type: "seqlastins"
   size: 200
-  active_type: "linear"
+  active_type: ""
   inputs {
     input_layer_name: "__recurrent_layer_1__"
   }
@@ -161,7 +161,7 @@ layers {
   name: "__last_seq_1__"
   type: "seqlastins"
   size: 200
-  active_type: "linear"
+  active_type: ""
   inputs {
     input_layer_name: "__lstmemory_0__"
   }
@@ -172,7 +172,7 @@ layers {
   name: "__first_seq_1__"
   type: "seqlastins"
   size: 200
-  active_type: "linear"
+  active_type: ""
   inputs {
     input_layer_name: "__lstmemory_1__"
   }
@@ -184,7 +184,7 @@ layers {
   name: "__last_seq_2__"
   type: "seqlastins"
   size: 200
-  active_type: "linear"
+  active_type: ""
   inputs {
     input_layer_name: "__gru_0__"
   }
@@ -195,7 +195,7 @@ layers {
   name: "__first_seq_2__"
   type: "seqlastins"
   size: 200
-  active_type: "linear"
+  active_type: ""
   inputs {
     input_layer_name: "__gru_1__"
   }
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_repeat_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_repeat_layer.protostr
new file mode 100644
index 00000000000..e012386ff95
--- /dev/null
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_repeat_layer.protostr
@@ -0,0 +1,42 @@
+type: "nn"
+layers {
+  name: "data"
+  type: "data"
+  size: 30
+  active_type: ""
+}
+layers {
+  name: "__repeat_layer_0__"
+  type: "featmap_expand"
+  size: 300
+  active_type: ""
+  inputs {
+    input_layer_name: "data"
+  }
+  num_filters: 10
+}
+layers {
+  name: "__repeat_layer_1__"
+  type: "featmap_expand"
+  size: 300
+  active_type: "tanh"
+  inputs {
+    input_layer_name: "data"
+  }
+  num_filters: 10
+  user_arg: "as_col_vec"
+}
+input_layer_names: "data"
+output_layer_names: "__repeat_layer_0__"
+output_layer_names: "__repeat_layer_1__"
+sub_models {
+  name: "root"
+  layer_names: "data"
+  layer_names: "__repeat_layer_0__"
+  layer_names: "__repeat_layer_1__"
+  input_layer_names: "data"
+  output_layer_names: "__repeat_layer_0__"
+  output_layer_names: "__repeat_layer_1__"
+  is_recurrent_layer_group: false
+}
+
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_rnn_group.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_rnn_group.protostr
index 77b447aa9db..3a3e2c49398 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_rnn_group.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_rnn_group.protostr
@@ -91,7 +91,7 @@ layers {
   name: "__last_seq_0__"
   type: "seqlastins"
   size: 200
-  active_type: "linear"
+  active_type: ""
   inputs {
     input_layer_name: "rnn_forward"
   }
@@ -140,7 +140,7 @@ layers {
   name: "__first_seq_0__"
   type: "seqlastins"
   size: 200
-  active_type: "linear"
+  active_type: ""
   inputs {
     input_layer_name: "rnn_back"
   }
@@ -190,7 +190,7 @@ layers {
   name: "__last_seq_1__"
   type: "seqlastins"
   size: 200
-  active_type: "linear"
+  active_type: ""
   inputs {
     input_layer_name: "rnn_subseq_forward"
   }
@@ -280,7 +280,7 @@ layers {
   name: "__last_seq_2__"
   type: "seqlastins"
   size: 100
-  active_type: "linear"
+  active_type: ""
   inputs {
     input_layer_name: "__lstm_group_0__"
   }
@@ -329,7 +329,7 @@ layers {
   name: "__last_seq_3__"
   type: "seqlastins"
   size: 100
-  active_type: "linear"
+  active_type: ""
   inputs {
     input_layer_name: "__gru_group_0__"
   }
@@ -378,7 +378,7 @@ layers {
   name: "__last_seq_4__"
   type: "seqlastins"
   size: 200
-  active_type: "linear"
+  active_type: ""
   inputs {
     input_layer_name: "__fc_layer_0__"
   }
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_seq_concat_reshape.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_seq_concat_reshape.protostr
index 91284b4fb32..9d1b41c9d55 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_seq_concat_reshape.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_seq_concat_reshape.protostr
@@ -27,7 +27,7 @@ layers {
   name: "__seqreshape_0__"
   type: "seqreshape"
   size: 5
-  active_type: "linear"
+  active_type: ""
   inputs {
     input_layer_name: "data1"
   }
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_sequence_pooling.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_sequence_pooling.protostr
index 1999c006d23..5a217f5544a 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_sequence_pooling.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_sequence_pooling.protostr
@@ -9,7 +9,7 @@ layers {
   name: "__seq_pooling_0__"
   type: "max"
   size: 100
-  active_type: "linear"
+  active_type: ""
   inputs {
     input_layer_name: "dat_in"
   }
@@ -19,7 +19,7 @@ layers {
   name: "__seq_pooling_1__"
   type: "max"
   size: 100
-  active_type: "linear"
+  active_type: ""
   inputs {
     input_layer_name: "dat_in"
   }
@@ -29,7 +29,7 @@ layers {
   name: "__seq_pooling_2__"
   type: "average"
   size: 100
-  active_type: "linear"
+  active_type: ""
   inputs {
     input_layer_name: "dat_in"
   }
@@ -40,7 +40,7 @@ layers {
   name: "__seq_pooling_3__"
   type: "average"
   size: 100
-  active_type: "linear"
+  active_type: ""
   inputs {
     input_layer_name: "dat_in"
   }
@@ -51,7 +51,7 @@ layers {
   name: "__seq_pooling_4__"
   type: "average"
   size: 100
-  active_type: "linear"
+  active_type: ""
   inputs {
     input_layer_name: "dat_in"
   }
@@ -62,7 +62,7 @@ layers {
   name: "__seq_pooling_5__"
   type: "average"
   size: 100
-  active_type: "linear"
+  active_type: ""
   inputs {
     input_layer_name: "dat_in"
   }
@@ -73,7 +73,7 @@ layers {
   name: "__seq_pooling_6__"
   type: "max"
   size: 100
-  active_type: "linear"
+  active_type: ""
   inputs {
     input_layer_name: "dat_in"
   }
-- 
GitLab