Merge pull request #2412 from lcy-seso/add_config_helper_for_prelu

add configuration helper for prelu layer.

Merge pull request #2412 from lcy-seso/add_config_helper_for_prelu
add configuration helper for prelu layer.
a3123e21 · Cao Ying · GitHub · f703b5b4 · 99661481 · a3123e21
6 changed file
--- a/doc/api/v2/config/layer.rst
+++ b/doc/api/v2/config/layer.rst
@@ -130,7 +130,7 @@ recurrent_group
 ---------------
 ..  autoclass:: paddle.v2.layer.recurrent_group
    :noindex:
 lstm_step
 ---------
 ..  autoclass:: paddle.v2.layer.lstm_step
@@ -145,12 +145,12 @@ beam_search
 ------------
 ..  autoclass:: paddle.v2.layer.beam_search
    :noindex:
 get_output
 ----------
 ..  autoclass:: paddle.v2.layer.get_output
    :noindex:
 Mixed Layer
 ===========
@@ -203,7 +203,7 @@ trans_full_matrix_projection
 ----------------------------
 ..  autoclass:: paddle.v2.layer.trans_full_matrix_projection
    :noindex:
 Aggregate Layers
 ================
@@ -434,10 +434,19 @@ smooth_l1_cost
 ..  autoclass:: paddle.v2.layer.smooth_l1_cost
    :noindex:
-Check Layer 
+Check Layer
 ============
 eos
 ---
 ..  autoclass:: paddle.v2.layer.eos
    :noindex:
+Activation with learnable parameter
+===================================
+prelu
+--------
+..  autoclass:: paddle.v2.layer.prelu
+    :noindex:
--- a/python/paddle/trainer/config_parser.py
+++ b/python/paddle/trainer/config_parser.py
@@ -73,7 +73,6 @@ To use this from paddle_trainer, paddle_trainer should be called with
 --config_args=extension_module_name=[MODULE_NAME]
 '''
 import copy
 import logging
 import os
@@ -1731,9 +1730,10 @@ class ParameterReluLayer(LayerBase):
    def __init__(self, name, inputs, partial_sum=1, **args):
        super(ParameterReluLayer, self).__init__(
            name, self.layer_type, 0, inputs=inputs, **args)
-        config_assert(len(self.inputs) == 1)
-        config_assert(self.input_layer.size % partial_sum == 0)
        input_layer = self.get_input_layer(0)
+        config_assert(len(self.inputs) == 1, "prelu layer has only one input.")
+        config_assert(input_layer.size % partial_sum == 0,
+                      "a wrong setting for partial_sum")
        self.set_layer_size(input_layer.size)
        self.create_input_parameter(0, input_layer.size / partial_sum)

--- a/python/paddle/trainer_config_helpers/layers.py
+++ b/python/paddle/trainer_config_helpers/layers.py
@@ -31,31 +31,31 @@ except ImportError:
 import copy
 __all__ = [
-    "full_matrix_projection",
+    'full_matrix_projection',
-    "AggregateLevel",
+    'AggregateLevel',
-    "ExpandLevel",
+    'ExpandLevel',
-    "identity_projection",
+    'identity_projection',
-    "dotmul_projection",
+    'dotmul_projection',
-    "dotmul_operator",
+    'dotmul_operator',
-    "repeat_layer",
+    'repeat_layer',
-    "seq_reshape_layer",
+    'seq_reshape_layer',
-    "table_projection",
+    'table_projection',
-    "mixed_layer",
+    'mixed_layer',
-    "data_layer",
+    'data_layer',
-    "embedding_layer",
+    'embedding_layer',
-    "fc_layer",
+    'fc_layer',
-    "grumemory",
+    'grumemory',
-    "pooling_layer",
+    'pooling_layer',
-    "lstmemory",
+    'lstmemory',
-    "last_seq",
+    'last_seq',
-    "first_seq",
+    'first_seq',
-    "cos_sim",
+    'cos_sim',
-    "hsigmoid",
+    'hsigmoid',
-    "conv_projection",
+    'conv_projection',
-    "mse_cost",
+    'mse_cost',
-    "regression_cost",
+    'regression_cost',
    'classification_cost',
-    "LayerOutput",
+    'LayerOutput',
    'img_conv_layer',
    'img_pool_layer',
    'batch_norm_layer',
@@ -121,6 +121,7 @@ __all__ = [
    'smooth_l1_cost',
    'layer_support',
    'multiplex_layer',
+    'prelu_layer',
 ]
@@ -129,26 +130,26 @@ class LayerType(object):
    Layer type enumerations.
    """
-    DATA = "data"
+    DATA = 'data'
-    MIXED_LAYER = "mixed"
+    MIXED_LAYER = 'mixed'
-    LSTMEMORY = "lstmemory"
+    LSTMEMORY = 'lstmemory'
-    GRUMEMORY = "gated_recurrent"
+    GRUMEMORY = 'gated_recurrent'
-    SEQUENCE_LAST_INSTANCE = "seqlastins"
+    SEQUENCE_LAST_INSTANCE = 'seqlastins'
-    SEQUENCE_FIRST_INSTANCE = "seqfirstins"
+    SEQUENCE_FIRST_INSTANCE = 'seqfirstins'
-    SEQUENCE_RESHAPE = "seqreshape"
+    SEQUENCE_RESHAPE = 'seqreshape'
-    POOLING_MAX = "max"
+    POOLING_MAX = 'max'
    POOLING_AVG = 'average'
-    FC_LAYER = "fc"
+    FC_LAYER = 'fc'
    COST = 'cost'
    COSINE_SIM_VEC = 'cos_vm'
    COSINE_SIM = 'cos'
    HSIGMOID = 'hsigmoid'
-    CONV_LAYER = "conv"
+    CONV_LAYER = 'conv'
-    CONVTRANS_LAYER = "convt"
+    CONVTRANS_LAYER = 'convt'
-    EXCONV_LAYER = "exconv"
+    EXCONV_LAYER = 'exconv'
-    EXCONVTRANS_LAYER = "exconvt"
+    EXCONVTRANS_LAYER = 'exconvt'
-    CUDNNCONV_LAYER = "cudnn_conv"
+    CUDNNCONV_LAYER = 'cudnn_conv'
-    POOL_LAYER = "pool"
+    POOL_LAYER = 'pool'
    BATCH_NORM_LAYER = 'batch_norm'
    NORM_LAYER = 'norm'
    SUM_TO_ONE_NORM_LAYER = 'sum_to_one_norm'
@@ -177,36 +178,38 @@ class LayerType(object):
    EOSID_LAYER = 'eos_id'
    RECURRENT_LAYER = 'recurrent'
-    CONV_SHIFT_LAYER = "conv_shift"
+    CONV_SHIFT_LAYER = 'conv_shift'
-    TENSOR_LAYER = "tensor"
+    TENSOR_LAYER = 'tensor'
-    SEL_FC_LAYER = "selective_fc"
+    SEL_FC_LAYER = 'selective_fc'
-    SAMPLING_ID_LAYER = "sampling_id"
+    SAMPLING_ID_LAYER = 'sampling_id'
-    SLOPE_INTERCEPT_LAYER = "slope_intercept"
+    SLOPE_INTERCEPT_LAYER = 'slope_intercept'
-    LINEAR_COMBINATION_LAYER = "convex_comb"
+    LINEAR_COMBINATION_LAYER = 'convex_comb'
-    BLOCK_EXPAND = "blockexpand"
+    BLOCK_EXPAND = 'blockexpand'
-    MAXOUT = "maxout"
+    MAXOUT = 'maxout'
-    SPP_LAYER = "spp"
+    SPP_LAYER = 'spp'
-    PAD_LAYER = "pad"
+    PAD_LAYER = 'pad'
-    MULTIPLEX_LAYER = "multiplex"
+    MULTIPLEX_LAYER = 'multiplex'
-    PRINT_LAYER = "print"
+    PRINT_LAYER = 'print'
-    PRIORBOX_LAYER = "priorbox"
+    PRIORBOX_LAYER = 'priorbox'
-    CTC_LAYER = "ctc"
+    CTC_LAYER = 'ctc'
-    WARP_CTC_LAYER = "warp_ctc"
+    WARP_CTC_LAYER = 'warp_ctc'
-    CRF_LAYER = "crf"
+    CRF_LAYER = 'crf'
-    CRF_DECODING_LAYER = "crf_decoding"
+    CRF_DECODING_LAYER = 'crf_decoding'
    NCE_LAYER = 'nce'
-    RANK_COST = "rank-cost"
+    RANK_COST = 'rank-cost'
-    LAMBDA_COST = "lambda_cost"
+    LAMBDA_COST = 'lambda_cost'
-    HUBER = "huber"
+    HUBER = 'huber'
-    CROSS_ENTROPY = "multi-class-cross-entropy"
+    CROSS_ENTROPY = 'multi-class-cross-entropy'
-    CROSS_ENTROPY_WITH_SELFNORM = "multi_class_cross_entropy_with_selfnorm"
+    CROSS_ENTROPY_WITH_SELFNORM = 'multi_class_cross_entropy_with_selfnorm'
-    SOFT_BIN_CLASS_CROSS_ENTROPY = "soft_binary_class_cross_entropy"
+    SOFT_BIN_CLASS_CROSS_ENTROPY = 'soft_binary_class_cross_entropy'
-    MULTI_BIN_LABEL_CROSS_ENTROPY = "multi_binary_label_cross_entropy"
+    MULTI_BIN_LABEL_CROSS_ENTROPY = 'multi_binary_label_cross_entropy'
-    SUM_COST = "sum_cost"
+    SUM_COST = 'sum_cost'
-    SMOOTH_L1 = "smooth_l1"
+    SMOOTH_L1 = 'smooth_l1'
+    PRELU = 'prelu'
    @staticmethod
    def is_layer_type(type_name):
@@ -4722,7 +4725,7 @@ def ctc_layer(input,
        fc_layer with softmax activation, should be num_classes + 1. The size of ctc_layer
        should also be num_classes + 1.
-    The simple usage:
+    The example usage is:
    .. code-block:: python
@@ -4809,7 +4812,7 @@ def warp_ctc_layer(input,
        - As a native 'softmax' activation is interated to the warp-ctc library,
          'linear' activation is expected instead in the 'input' layer.
-    The simple usage:
+    The example usage is:
    .. code-block:: python
@@ -4870,7 +4873,7 @@ def crf_layer(input,
    A layer for calculating the cost of sequential conditional random
    field model.
-    The simple usage:
+    The example usage is:
    .. code-block:: python
@@ -4944,7 +4947,7 @@ def crf_decoding_layer(input,
    this layer will also calculate error. output.value[i] is 1 for incorrect
    decoding or 0 for correct decoding.
-    The simple usage:
+    The example usage is:
    .. code-block:: python
@@ -5137,7 +5140,7 @@ def rank_cost(left,
      - :math:`o_i` and :math:`o_j`: the left output and right output.
        Their dimension is one.
-    The simple usage:
+    The example usage is:
    .. code-block:: python
@@ -5194,7 +5197,7 @@ def lambda_cost(input,
    """
    lambdaCost for lambdaRank LTR approach.
-    The simple usage:
+    The example usage is:
    .. code-block:: python
@@ -5252,6 +5255,8 @@ def cross_entropy(input,
    """
    A loss layer for multi class entropy.
+    The example usage is:
    .. code-block:: python
       cost = cross_entropy(input=input_layer,
@@ -5298,6 +5303,8 @@ def cross_entropy_with_selfnorm(input,
    A loss layer for multi class entropy with selfnorm.
    Input should be a vector of positive numbers, without normalization.
+    The example usage is:
    .. code-block:: python
       cost = cross_entropy_with_selfnorm(input=input_layer,
@@ -5339,6 +5346,8 @@ def sum_cost(input, name=None, layer_attr=None):
    """
    A loss layer which calculate the sum of the input as loss
+    The example usage is:
    .. code-block:: python
       cost = sum_cost(input=input_layer)
@@ -5368,6 +5377,8 @@ def huber_cost(input, label, name=None, coeff=1.0, layer_attr=None):
    """
    A loss layer for huber loss.
+    The example usage is:
    .. code-block:: python
       cost = huber_cost(input=input_layer,
@@ -5408,6 +5419,8 @@ def multi_binary_label_cross_entropy(input,
    """
    A loss layer for multi binary label cross entropy.
+    The example usage is:
    .. code-block:: python
       cost = multi_binary_label_cross_entropy(input=input_layer,
@@ -5467,6 +5480,8 @@ def smooth_l1_cost(input, label, name=None, coeff=1.0, layer_attr=None):
    More details can be found by referring to `Fast R-CNN
    <https://arxiv.org/pdf/1504.08083v2.pdf>`_
+    The example usage is:
    .. code-block:: python
       cost = smooth_l1_cost(input=input_layer,
@@ -5516,6 +5531,8 @@ def multiplex_layer(input, name=None, layer_attr=None):
    where, y is output. :math:`x_{k}` is the k-th input layer and
    :math:`k = x_{0}[i] + 1`.
+    The example usage is:
    .. code-block:: python
       maxid = multiplex_layer(input=layers)
@@ -5548,3 +5565,64 @@ def multiplex_layer(input, name=None, layer_attr=None):
        layer_type=LayerType.MULTIPLEX_LAYER,
        parents=input,
        size=l.config.size)
+@wrap_name_default()
+@layer_support()
+@wrap_name_default()
+@wrap_param_attr_default()
+def prelu_layer(input,
+                name=None,
+                partial_sum=1,
+                param_attr=None,
+                layer_attr=None):
+    """
+    The Parameter Relu activation that actives outputs with a learnable weight.
+    Reference:
+        Delving Deep into Rectifiers: Surpassing Human-Level Performance on
+        ImageNet Classification http://arxiv.org/pdf/1502.01852v1.pdf
+    .. math::
+       z_i &\\quad if \\quad z_i > 0 \\\\
+       a_i * z_i  &\\quad \\mathrm{otherwise}
+    The example usage is:
+    .. code-block:: python
+       prelu = prelu_layer(input=layers, partial_sum=1)
+    :param name: Name of this layer.
+    :type name: basestring
+    :param input: The input layer.
+    :type input: LayerOutput
+    :param partial_sum: this parameter makes a group of inputs share a same weight.
+        - partial_sum = 1, indicates the element-wise activation: each element has a weight.
+        - partial_sum = number of elements in one channel, indicates the channel-wise activation, elements in a channel share a same weight.
+        - partial_sum = number of outputs, indicates all elements share a same weight.
+    :type partial_sum: int
+    :param param_attr: The parameter attribute. See ParameterAttribute for details.
+    :type param_attr: ParameterAttribute|None
+    :param layer_attr: Extra layer configurations. Default is None.
+    :type layer_attr: ExtraLayerAttribute|None
+    :return: LayerOutput object.
+    :rtype: LayerOutput
+    """
+    assert isinstance(input, LayerOutput), 'prelu_layer only accepts one input'
+    assert isinstance(param_attr, ParameterAttribute)
+    l = Layer(
+        name=name,
+        type=LayerType.PRELU,
+        inputs=Input(input.name, **param_attr.attr),
+        partial_sum=partial_sum,
+        **ExtraLayerAttribute.to_kwargs(layer_attr))
+    return LayerOutput(
+        name=name,
+        layer_type=LayerType.PRELU,
+        parents=input,
+        size=l.config.size)
--- a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh
+++ b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh
@@ -5,6 +5,7 @@ last_first_seq test_expand_layer test_ntm_layers test_hsigmoid
 img_layers img_trans_layers util_layers simple_rnn_layers unused_layers test_cost_layers
 test_rnn_group shared_fc shared_lstm shared_gru test_cost_layers_with_weight
 test_spp_layer test_bilinear_interp test_maxout test_bi_grumemory math_ops
-test_seq_concat_reshape test_pad test_smooth_l1 test_multiplex_layer)
+test_seq_concat_reshape test_pad test_smooth_l1 test_multiplex_layer
+test_prelu_layer)
 export whole_configs=(test_split_datasource)
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_prelu_layer.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_prelu_layer.protostr
+type: "nn"
+layers {
+  name: "input"
+  type: "data"
+  size: 300
+  active_type: ""
+}
+layers {
+  name: "__prelu_layer_0__"
+  type: "prelu"
+  size: 300
+  active_type: ""
+  inputs {
+    input_layer_name: "input"
+    input_parameter_name: "___prelu_layer_0__.w0"
+  }
+}
+parameters {
+  name: "___prelu_layer_0__.w0"
+  size: 300
+  initial_mean: 0.0
+  initial_std: 0.057735026919
+  initial_strategy: 0
+  initial_smart: true
+}
+input_layer_names: "input"
+output_layer_names: "__prelu_layer_0__"
+sub_models {
+  name: "root"
+  layer_names: "input"
+  layer_names: "__prelu_layer_0__"
+  input_layer_names: "input"
+  output_layer_names: "__prelu_layer_0__"
+  is_recurrent_layer_group: false
+}
--- a/python/paddle/trainer_config_helpers/tests/configs/test_prelu_layer.py
+++ b/python/paddle/trainer_config_helpers/tests/configs/test_prelu_layer.py
+from paddle.trainer_config_helpers import *
+data = data_layer(name='input', size=300)
+prelu = prelu_layer(input=data)
+outputs(prelu)