diff --git a/doc/api/v2/config/layer.rst b/doc/api/v2/config/layer.rst index 1efa74ecda4170332d96603ca2253c68468474f9..46f81bf9a2d92154e1f90201ac0730fcb5a378d3 100644 --- a/doc/api/v2/config/layer.rst +++ b/doc/api/v2/config/layer.rst @@ -130,7 +130,7 @@ recurrent_group --------------- .. autoclass:: paddle.v2.layer.recurrent_group :noindex: - + lstm_step --------- .. autoclass:: paddle.v2.layer.lstm_step @@ -145,12 +145,12 @@ beam_search ------------ .. autoclass:: paddle.v2.layer.beam_search :noindex: - + get_output ---------- .. autoclass:: paddle.v2.layer.get_output :noindex: - + Mixed Layer =========== @@ -203,7 +203,7 @@ trans_full_matrix_projection ---------------------------- .. autoclass:: paddle.v2.layer.trans_full_matrix_projection :noindex: - + Aggregate Layers ================ @@ -434,10 +434,19 @@ smooth_l1_cost .. autoclass:: paddle.v2.layer.smooth_l1_cost :noindex: -Check Layer +Check Layer ============ eos --- .. autoclass:: paddle.v2.layer.eos :noindex: + +Activation with learnable parameter +=================== + +prelu +-------- +.. autoclass:: paddle.v2.layer.prelu + :noindex: + diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index 5d540664a7f56b4fc27ecd5dc46bf36b0268eb98..3712135b69773deab90f22545d517255b769d5df 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -73,7 +73,6 @@ To use this from paddle_trainer, paddle_trainer should be called with --config_args=extension_module_name=[MODULE_NAME] ''' - import copy import logging import os @@ -1731,9 +1730,10 @@ class ParameterReluLayer(LayerBase): def __init__(self, name, inputs, partial_sum=1, **args): super(ParameterReluLayer, self).__init__( name, self.layer_type, 0, inputs=inputs, **args) - config_assert(len(self.inputs) == 1) - config_assert(self.input_layer.size % partial_sum == 0) input_layer = self.get_input_layer(0) + config_assert(len(self.inputs) == 1, "prelu layer has only one input.") + config_assert(input_layer.size % partial_sum == 0, + "a wrong setting for partial_sum") self.set_layer_size(input_layer.size) self.create_input_parameter(0, input_layer.size / partial_sum) diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 5667e5ff2bccd38f2da00a3b17ea8bc8e3a6fb8e..8044c7aa92a94f130863c8e349762e2d0d7799ac 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -31,31 +31,31 @@ except ImportError: import copy __all__ = [ - "full_matrix_projection", - "AggregateLevel", - "ExpandLevel", - "identity_projection", - "dotmul_projection", - "dotmul_operator", - "repeat_layer", - "seq_reshape_layer", - "table_projection", - "mixed_layer", - "data_layer", - "embedding_layer", - "fc_layer", - "grumemory", - "pooling_layer", - "lstmemory", - "last_seq", - "first_seq", - "cos_sim", - "hsigmoid", - "conv_projection", - "mse_cost", - "regression_cost", + 'full_matrix_projection', + 'AggregateLevel', + 'ExpandLevel', + 'identity_projection', + 'dotmul_projection', + 'dotmul_operator', + 'repeat_layer', + 'seq_reshape_layer', + 'table_projection', + 'mixed_layer', + 'data_layer', + 'embedding_layer', + 'fc_layer', + 'grumemory', + 'pooling_layer', + 'lstmemory', + 'last_seq', + 'first_seq', + 'cos_sim', + 'hsigmoid', + 'conv_projection', + 'mse_cost', + 'regression_cost', 'classification_cost', - "LayerOutput", + 'LayerOutput', 'img_conv_layer', 'img_pool_layer', 'batch_norm_layer', @@ -121,6 +121,7 @@ __all__ = [ 'smooth_l1_cost', 'layer_support', 'multiplex_layer', + 'prelu_layer', ] @@ -129,26 +130,26 @@ class LayerType(object): Layer type enumerations. """ - DATA = "data" - MIXED_LAYER = "mixed" - LSTMEMORY = "lstmemory" - GRUMEMORY = "gated_recurrent" - SEQUENCE_LAST_INSTANCE = "seqlastins" - SEQUENCE_FIRST_INSTANCE = "seqfirstins" - SEQUENCE_RESHAPE = "seqreshape" - POOLING_MAX = "max" + DATA = 'data' + MIXED_LAYER = 'mixed' + LSTMEMORY = 'lstmemory' + GRUMEMORY = 'gated_recurrent' + SEQUENCE_LAST_INSTANCE = 'seqlastins' + SEQUENCE_FIRST_INSTANCE = 'seqfirstins' + SEQUENCE_RESHAPE = 'seqreshape' + POOLING_MAX = 'max' POOLING_AVG = 'average' - FC_LAYER = "fc" + FC_LAYER = 'fc' COST = 'cost' COSINE_SIM_VEC = 'cos_vm' COSINE_SIM = 'cos' HSIGMOID = 'hsigmoid' - CONV_LAYER = "conv" - CONVTRANS_LAYER = "convt" - EXCONV_LAYER = "exconv" - EXCONVTRANS_LAYER = "exconvt" - CUDNNCONV_LAYER = "cudnn_conv" - POOL_LAYER = "pool" + CONV_LAYER = 'conv' + CONVTRANS_LAYER = 'convt' + EXCONV_LAYER = 'exconv' + EXCONVTRANS_LAYER = 'exconvt' + CUDNNCONV_LAYER = 'cudnn_conv' + POOL_LAYER = 'pool' BATCH_NORM_LAYER = 'batch_norm' NORM_LAYER = 'norm' SUM_TO_ONE_NORM_LAYER = 'sum_to_one_norm' @@ -177,36 +178,38 @@ class LayerType(object): EOSID_LAYER = 'eos_id' RECURRENT_LAYER = 'recurrent' - CONV_SHIFT_LAYER = "conv_shift" - TENSOR_LAYER = "tensor" - SEL_FC_LAYER = "selective_fc" - SAMPLING_ID_LAYER = "sampling_id" - SLOPE_INTERCEPT_LAYER = "slope_intercept" - LINEAR_COMBINATION_LAYER = "convex_comb" - BLOCK_EXPAND = "blockexpand" - MAXOUT = "maxout" - SPP_LAYER = "spp" - PAD_LAYER = "pad" - MULTIPLEX_LAYER = "multiplex" - - PRINT_LAYER = "print" - PRIORBOX_LAYER = "priorbox" - - CTC_LAYER = "ctc" - WARP_CTC_LAYER = "warp_ctc" - CRF_LAYER = "crf" - CRF_DECODING_LAYER = "crf_decoding" + CONV_SHIFT_LAYER = 'conv_shift' + TENSOR_LAYER = 'tensor' + SEL_FC_LAYER = 'selective_fc' + SAMPLING_ID_LAYER = 'sampling_id' + SLOPE_INTERCEPT_LAYER = 'slope_intercept' + LINEAR_COMBINATION_LAYER = 'convex_comb' + BLOCK_EXPAND = 'blockexpand' + MAXOUT = 'maxout' + SPP_LAYER = 'spp' + PAD_LAYER = 'pad' + MULTIPLEX_LAYER = 'multiplex' + + PRINT_LAYER = 'print' + PRIORBOX_LAYER = 'priorbox' + + CTC_LAYER = 'ctc' + WARP_CTC_LAYER = 'warp_ctc' + CRF_LAYER = 'crf' + CRF_DECODING_LAYER = 'crf_decoding' NCE_LAYER = 'nce' - RANK_COST = "rank-cost" - LAMBDA_COST = "lambda_cost" - HUBER = "huber" - CROSS_ENTROPY = "multi-class-cross-entropy" - CROSS_ENTROPY_WITH_SELFNORM = "multi_class_cross_entropy_with_selfnorm" - SOFT_BIN_CLASS_CROSS_ENTROPY = "soft_binary_class_cross_entropy" - MULTI_BIN_LABEL_CROSS_ENTROPY = "multi_binary_label_cross_entropy" - SUM_COST = "sum_cost" - SMOOTH_L1 = "smooth_l1" + RANK_COST = 'rank-cost' + LAMBDA_COST = 'lambda_cost' + HUBER = 'huber' + CROSS_ENTROPY = 'multi-class-cross-entropy' + CROSS_ENTROPY_WITH_SELFNORM = 'multi_class_cross_entropy_with_selfnorm' + SOFT_BIN_CLASS_CROSS_ENTROPY = 'soft_binary_class_cross_entropy' + MULTI_BIN_LABEL_CROSS_ENTROPY = 'multi_binary_label_cross_entropy' + SUM_COST = 'sum_cost' + SMOOTH_L1 = 'smooth_l1' + + PRELU = 'prelu' @staticmethod def is_layer_type(type_name): @@ -5551,3 +5554,69 @@ def multiplex_layer(input, name=None, layer_attr=None): layer_type=LayerType.MULTIPLEX_LAYER, parents=input, size=l.config.size) + + +@wrap_name_default() +@layer_support() +@wrap_name_default() +@wrap_param_attr_default() +def prelu_layer(input, + name=None, + partial_sum=1, + param_attr=None, + layer_attr=None): + """ + The Parameter Relu activation that actives outputs with a learnable weight. + + Reference: + Delving Deep into Rectifiers: Surpassing Human-Level Performance on + ImageNet Classification http://arxiv.org/pdf/1502.01852v1.pdf + + .. math:: + z_i &\\quad if \\quad z_i > 0 \\\\ + a_i * z_i &\\quad \\mathrm{otherwise} + + :param name: Name of this layer. + :type name: basestring + :param input: The input layer. + :type input: LayerOutput + :param partial_sum: this parameter makes a group of inputs share a same weight. + 1. partial_sum = 1 indicates the element-wise activation: + each element has a weight + 2. partial_sum = number of elements in one channel indicates the channel-wise + activation, elements in a channel share a same weight + 3. partial_sum = number of outputs indicates all elements share a same weight + :type int + :param param_attr: The parameter attribute. See ParameterAttribute for details. + :type param_attr: ParameterAttribute|None + :param layer_attr: Extra layer configurations. Default is None. + :type layer_attr: ExtraLayerAttribute|None + :return: LayerOutput object. + :rtype: LayerOutput + """ + + if isinstance(input, collections.Sequence): + assert len(input) == 1, 'prelu_layer only accepts one input' + else: + input = [input] + assert isinstance(input[0], LayerOutput) + + if isinstance(param_attr, collections.Sequence): + assert len(param_attr) == 1, ( + 'because prelu_layer only accepts one input ' + 'it requires only one parameter setting.') + else: + param_attr = [param_attr] + assert isinstance(param_attr[0], ParameterAttribute) + + l = Layer( + name=name, + type='prelu', + inputs=Input(input[0].name, **param_attr[0].attr), + partial_sum=partial_sum, + **ExtraLayerAttribute.to_kwargs(layer_attr)) + return LayerOutput( + name=name, + layer_type=LayerType.PRELU, + parents=input, + size=l.config.size) diff --git a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh index 981ccbf248391b5db4339570d918404df6033f3d..bef14bffaf648b92e608a6a18cd46d57e850550e 100755 --- a/python/paddle/trainer_config_helpers/tests/configs/file_list.sh +++ b/python/paddle/trainer_config_helpers/tests/configs/file_list.sh @@ -5,6 +5,7 @@ last_first_seq test_expand_layer test_ntm_layers test_hsigmoid img_layers img_trans_layers util_layers simple_rnn_layers unused_layers test_cost_layers test_rnn_group shared_fc shared_lstm shared_gru test_cost_layers_with_weight test_spp_layer test_bilinear_interp test_maxout test_bi_grumemory math_ops -test_seq_concat_reshape test_pad test_smooth_l1 test_multiplex_layer) +test_seq_concat_reshape test_pad test_smooth_l1 test_multiplex_layer +test_prelu_layer) export whole_configs=(test_split_datasource) diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_prelu_layer.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_prelu_layer.protostr new file mode 100644 index 0000000000000000000000000000000000000000..64d227565f2b21ff43d4391c682ca90c0f47908e --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_prelu_layer.protostr @@ -0,0 +1,36 @@ +type: "nn" +layers { + name: "input" + type: "data" + size: 300 + active_type: "" +} +layers { + name: "__prelu_layer_0__" + type: "prelu" + size: 300 + active_type: "" + inputs { + input_layer_name: "input" + input_parameter_name: "___prelu_layer_0__.w0" + } +} +parameters { + name: "___prelu_layer_0__.w0" + size: 300 + initial_mean: 0.0 + initial_std: 0.057735026919 + initial_strategy: 0 + initial_smart: true +} +input_layer_names: "input" +output_layer_names: "__prelu_layer_0__" +sub_models { + name: "root" + layer_names: "input" + layer_names: "__prelu_layer_0__" + input_layer_names: "input" + output_layer_names: "__prelu_layer_0__" + is_recurrent_layer_group: false +} + diff --git a/python/paddle/trainer_config_helpers/tests/configs/test_prelu_layer.py b/python/paddle/trainer_config_helpers/tests/configs/test_prelu_layer.py new file mode 100644 index 0000000000000000000000000000000000000000..2e3057f323db22ffc3911cce30ec2e8bb95e3dbe --- /dev/null +++ b/python/paddle/trainer_config_helpers/tests/configs/test_prelu_layer.py @@ -0,0 +1,6 @@ +from paddle.trainer_config_helpers import * + +data = data_layer(name='input', size=300) +prelu = prelu_layer(input=data) + +outputs(prelu)