提交 a3123e21 编写于 作者: C Cao Ying 提交者: GitHub

Merge pull request #2412 from lcy-seso/add_config_helper_for_prelu

add configuration helper for prelu layer.
......@@ -130,7 +130,7 @@ recurrent_group
---------------
.. autoclass:: paddle.v2.layer.recurrent_group
:noindex:
lstm_step
---------
.. autoclass:: paddle.v2.layer.lstm_step
......@@ -145,12 +145,12 @@ beam_search
------------
.. autoclass:: paddle.v2.layer.beam_search
:noindex:
get_output
----------
.. autoclass:: paddle.v2.layer.get_output
:noindex:
Mixed Layer
===========
......@@ -203,7 +203,7 @@ trans_full_matrix_projection
----------------------------
.. autoclass:: paddle.v2.layer.trans_full_matrix_projection
:noindex:
Aggregate Layers
================
......@@ -434,10 +434,19 @@ smooth_l1_cost
.. autoclass:: paddle.v2.layer.smooth_l1_cost
:noindex:
Check Layer
Check Layer
============
eos
---
.. autoclass:: paddle.v2.layer.eos
:noindex:
Activation with learnable parameter
===================================
prelu
--------
.. autoclass:: paddle.v2.layer.prelu
:noindex:
......@@ -73,7 +73,6 @@ To use this from paddle_trainer, paddle_trainer should be called with
--config_args=extension_module_name=[MODULE_NAME]
'''
import copy
import logging
import os
......@@ -1731,9 +1730,10 @@ class ParameterReluLayer(LayerBase):
def __init__(self, name, inputs, partial_sum=1, **args):
super(ParameterReluLayer, self).__init__(
name, self.layer_type, 0, inputs=inputs, **args)
config_assert(len(self.inputs) == 1)
config_assert(self.input_layer.size % partial_sum == 0)
input_layer = self.get_input_layer(0)
config_assert(len(self.inputs) == 1, "prelu layer has only one input.")
config_assert(input_layer.size % partial_sum == 0,
"a wrong setting for partial_sum")
self.set_layer_size(input_layer.size)
self.create_input_parameter(0, input_layer.size / partial_sum)
......
......@@ -31,31 +31,31 @@ except ImportError:
import copy
__all__ = [
"full_matrix_projection",
"AggregateLevel",
"ExpandLevel",
"identity_projection",
"dotmul_projection",
"dotmul_operator",
"repeat_layer",
"seq_reshape_layer",
"table_projection",
"mixed_layer",
"data_layer",
"embedding_layer",
"fc_layer",
"grumemory",
"pooling_layer",
"lstmemory",
"last_seq",
"first_seq",
"cos_sim",
"hsigmoid",
"conv_projection",
"mse_cost",
"regression_cost",
'full_matrix_projection',
'AggregateLevel',
'ExpandLevel',
'identity_projection',
'dotmul_projection',
'dotmul_operator',
'repeat_layer',
'seq_reshape_layer',
'table_projection',
'mixed_layer',
'data_layer',
'embedding_layer',
'fc_layer',
'grumemory',
'pooling_layer',
'lstmemory',
'last_seq',
'first_seq',
'cos_sim',
'hsigmoid',
'conv_projection',
'mse_cost',
'regression_cost',
'classification_cost',
"LayerOutput",
'LayerOutput',
'img_conv_layer',
'img_pool_layer',
'batch_norm_layer',
......@@ -121,6 +121,7 @@ __all__ = [
'smooth_l1_cost',
'layer_support',
'multiplex_layer',
'prelu_layer',
]
......@@ -129,26 +130,26 @@ class LayerType(object):
Layer type enumerations.
"""
DATA = "data"
MIXED_LAYER = "mixed"
LSTMEMORY = "lstmemory"
GRUMEMORY = "gated_recurrent"
SEQUENCE_LAST_INSTANCE = "seqlastins"
SEQUENCE_FIRST_INSTANCE = "seqfirstins"
SEQUENCE_RESHAPE = "seqreshape"
POOLING_MAX = "max"
DATA = 'data'
MIXED_LAYER = 'mixed'
LSTMEMORY = 'lstmemory'
GRUMEMORY = 'gated_recurrent'
SEQUENCE_LAST_INSTANCE = 'seqlastins'
SEQUENCE_FIRST_INSTANCE = 'seqfirstins'
SEQUENCE_RESHAPE = 'seqreshape'
POOLING_MAX = 'max'
POOLING_AVG = 'average'
FC_LAYER = "fc"
FC_LAYER = 'fc'
COST = 'cost'
COSINE_SIM_VEC = 'cos_vm'
COSINE_SIM = 'cos'
HSIGMOID = 'hsigmoid'
CONV_LAYER = "conv"
CONVTRANS_LAYER = "convt"
EXCONV_LAYER = "exconv"
EXCONVTRANS_LAYER = "exconvt"
CUDNNCONV_LAYER = "cudnn_conv"
POOL_LAYER = "pool"
CONV_LAYER = 'conv'
CONVTRANS_LAYER = 'convt'
EXCONV_LAYER = 'exconv'
EXCONVTRANS_LAYER = 'exconvt'
CUDNNCONV_LAYER = 'cudnn_conv'
POOL_LAYER = 'pool'
BATCH_NORM_LAYER = 'batch_norm'
NORM_LAYER = 'norm'
SUM_TO_ONE_NORM_LAYER = 'sum_to_one_norm'
......@@ -177,36 +178,38 @@ class LayerType(object):
EOSID_LAYER = 'eos_id'
RECURRENT_LAYER = 'recurrent'
CONV_SHIFT_LAYER = "conv_shift"
TENSOR_LAYER = "tensor"
SEL_FC_LAYER = "selective_fc"
SAMPLING_ID_LAYER = "sampling_id"
SLOPE_INTERCEPT_LAYER = "slope_intercept"
LINEAR_COMBINATION_LAYER = "convex_comb"
BLOCK_EXPAND = "blockexpand"
MAXOUT = "maxout"
SPP_LAYER = "spp"
PAD_LAYER = "pad"
MULTIPLEX_LAYER = "multiplex"
PRINT_LAYER = "print"
PRIORBOX_LAYER = "priorbox"
CTC_LAYER = "ctc"
WARP_CTC_LAYER = "warp_ctc"
CRF_LAYER = "crf"
CRF_DECODING_LAYER = "crf_decoding"
CONV_SHIFT_LAYER = 'conv_shift'
TENSOR_LAYER = 'tensor'
SEL_FC_LAYER = 'selective_fc'
SAMPLING_ID_LAYER = 'sampling_id'
SLOPE_INTERCEPT_LAYER = 'slope_intercept'
LINEAR_COMBINATION_LAYER = 'convex_comb'
BLOCK_EXPAND = 'blockexpand'
MAXOUT = 'maxout'
SPP_LAYER = 'spp'
PAD_LAYER = 'pad'
MULTIPLEX_LAYER = 'multiplex'
PRINT_LAYER = 'print'
PRIORBOX_LAYER = 'priorbox'
CTC_LAYER = 'ctc'
WARP_CTC_LAYER = 'warp_ctc'
CRF_LAYER = 'crf'
CRF_DECODING_LAYER = 'crf_decoding'
NCE_LAYER = 'nce'
RANK_COST = "rank-cost"
LAMBDA_COST = "lambda_cost"
HUBER = "huber"
CROSS_ENTROPY = "multi-class-cross-entropy"
CROSS_ENTROPY_WITH_SELFNORM = "multi_class_cross_entropy_with_selfnorm"
SOFT_BIN_CLASS_CROSS_ENTROPY = "soft_binary_class_cross_entropy"
MULTI_BIN_LABEL_CROSS_ENTROPY = "multi_binary_label_cross_entropy"
SUM_COST = "sum_cost"
SMOOTH_L1 = "smooth_l1"
RANK_COST = 'rank-cost'
LAMBDA_COST = 'lambda_cost'
HUBER = 'huber'
CROSS_ENTROPY = 'multi-class-cross-entropy'
CROSS_ENTROPY_WITH_SELFNORM = 'multi_class_cross_entropy_with_selfnorm'
SOFT_BIN_CLASS_CROSS_ENTROPY = 'soft_binary_class_cross_entropy'
MULTI_BIN_LABEL_CROSS_ENTROPY = 'multi_binary_label_cross_entropy'
SUM_COST = 'sum_cost'
SMOOTH_L1 = 'smooth_l1'
PRELU = 'prelu'
@staticmethod
def is_layer_type(type_name):
......@@ -4722,7 +4725,7 @@ def ctc_layer(input,
fc_layer with softmax activation, should be num_classes + 1. The size of ctc_layer
should also be num_classes + 1.
The simple usage:
The example usage is:
.. code-block:: python
......@@ -4809,7 +4812,7 @@ def warp_ctc_layer(input,
- As a native 'softmax' activation is interated to the warp-ctc library,
'linear' activation is expected instead in the 'input' layer.
The simple usage:
The example usage is:
.. code-block:: python
......@@ -4870,7 +4873,7 @@ def crf_layer(input,
A layer for calculating the cost of sequential conditional random
field model.
The simple usage:
The example usage is:
.. code-block:: python
......@@ -4944,7 +4947,7 @@ def crf_decoding_layer(input,
this layer will also calculate error. output.value[i] is 1 for incorrect
decoding or 0 for correct decoding.
The simple usage:
The example usage is:
.. code-block:: python
......@@ -5137,7 +5140,7 @@ def rank_cost(left,
- :math:`o_i` and :math:`o_j`: the left output and right output.
Their dimension is one.
The simple usage:
The example usage is:
.. code-block:: python
......@@ -5194,7 +5197,7 @@ def lambda_cost(input,
"""
lambdaCost for lambdaRank LTR approach.
The simple usage:
The example usage is:
.. code-block:: python
......@@ -5252,6 +5255,8 @@ def cross_entropy(input,
"""
A loss layer for multi class entropy.
The example usage is:
.. code-block:: python
cost = cross_entropy(input=input_layer,
......@@ -5298,6 +5303,8 @@ def cross_entropy_with_selfnorm(input,
A loss layer for multi class entropy with selfnorm.
Input should be a vector of positive numbers, without normalization.
The example usage is:
.. code-block:: python
cost = cross_entropy_with_selfnorm(input=input_layer,
......@@ -5339,6 +5346,8 @@ def sum_cost(input, name=None, layer_attr=None):
"""
A loss layer which calculate the sum of the input as loss
The example usage is:
.. code-block:: python
cost = sum_cost(input=input_layer)
......@@ -5368,6 +5377,8 @@ def huber_cost(input, label, name=None, coeff=1.0, layer_attr=None):
"""
A loss layer for huber loss.
The example usage is:
.. code-block:: python
cost = huber_cost(input=input_layer,
......@@ -5408,6 +5419,8 @@ def multi_binary_label_cross_entropy(input,
"""
A loss layer for multi binary label cross entropy.
The example usage is:
.. code-block:: python
cost = multi_binary_label_cross_entropy(input=input_layer,
......@@ -5467,6 +5480,8 @@ def smooth_l1_cost(input, label, name=None, coeff=1.0, layer_attr=None):
More details can be found by referring to `Fast R-CNN
<https://arxiv.org/pdf/1504.08083v2.pdf>`_
The example usage is:
.. code-block:: python
cost = smooth_l1_cost(input=input_layer,
......@@ -5516,6 +5531,8 @@ def multiplex_layer(input, name=None, layer_attr=None):
where, y is output. :math:`x_{k}` is the k-th input layer and
:math:`k = x_{0}[i] + 1`.
The example usage is:
.. code-block:: python
maxid = multiplex_layer(input=layers)
......@@ -5548,3 +5565,64 @@ def multiplex_layer(input, name=None, layer_attr=None):
layer_type=LayerType.MULTIPLEX_LAYER,
parents=input,
size=l.config.size)
@wrap_name_default()
@layer_support()
@wrap_name_default()
@wrap_param_attr_default()
def prelu_layer(input,
name=None,
partial_sum=1,
param_attr=None,
layer_attr=None):
"""
The Parameter Relu activation that actives outputs with a learnable weight.
Reference:
Delving Deep into Rectifiers: Surpassing Human-Level Performance on
ImageNet Classification http://arxiv.org/pdf/1502.01852v1.pdf
.. math::
z_i &\\quad if \\quad z_i > 0 \\\\
a_i * z_i &\\quad \\mathrm{otherwise}
The example usage is:
.. code-block:: python
prelu = prelu_layer(input=layers, partial_sum=1)
:param name: Name of this layer.
:type name: basestring
:param input: The input layer.
:type input: LayerOutput
:param partial_sum: this parameter makes a group of inputs share a same weight.
- partial_sum = 1, indicates the element-wise activation: each element has a weight.
- partial_sum = number of elements in one channel, indicates the channel-wise activation, elements in a channel share a same weight.
- partial_sum = number of outputs, indicates all elements share a same weight.
:type partial_sum: int
:param param_attr: The parameter attribute. See ParameterAttribute for details.
:type param_attr: ParameterAttribute|None
:param layer_attr: Extra layer configurations. Default is None.
:type layer_attr: ExtraLayerAttribute|None
:return: LayerOutput object.
:rtype: LayerOutput
"""
assert isinstance(input, LayerOutput), 'prelu_layer only accepts one input'
assert isinstance(param_attr, ParameterAttribute)
l = Layer(
name=name,
type=LayerType.PRELU,
inputs=Input(input.name, **param_attr.attr),
partial_sum=partial_sum,
**ExtraLayerAttribute.to_kwargs(layer_attr))
return LayerOutput(
name=name,
layer_type=LayerType.PRELU,
parents=input,
size=l.config.size)
......@@ -5,6 +5,7 @@ last_first_seq test_expand_layer test_ntm_layers test_hsigmoid
img_layers img_trans_layers util_layers simple_rnn_layers unused_layers test_cost_layers
test_rnn_group shared_fc shared_lstm shared_gru test_cost_layers_with_weight
test_spp_layer test_bilinear_interp test_maxout test_bi_grumemory math_ops
test_seq_concat_reshape test_pad test_smooth_l1 test_multiplex_layer)
test_seq_concat_reshape test_pad test_smooth_l1 test_multiplex_layer
test_prelu_layer)
export whole_configs=(test_split_datasource)
type: "nn"
layers {
name: "input"
type: "data"
size: 300
active_type: ""
}
layers {
name: "__prelu_layer_0__"
type: "prelu"
size: 300
active_type: ""
inputs {
input_layer_name: "input"
input_parameter_name: "___prelu_layer_0__.w0"
}
}
parameters {
name: "___prelu_layer_0__.w0"
size: 300
initial_mean: 0.0
initial_std: 0.057735026919
initial_strategy: 0
initial_smart: true
}
input_layer_names: "input"
output_layer_names: "__prelu_layer_0__"
sub_models {
name: "root"
layer_names: "input"
layer_names: "__prelu_layer_0__"
input_layer_names: "input"
output_layer_names: "__prelu_layer_0__"
is_recurrent_layer_group: false
}
from paddle.trainer_config_helpers import *
data = data_layer(name='input', size=300)
prelu = prelu_layer(input=data)
outputs(prelu)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册