diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 686704cb7c9b0bda20101f65b66f1657c9f63770..d45a9b53dcc94f0cfe3d402795c1d0f889853783 100644 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -2799,7 +2799,9 @@ def __cost_input__(input, label, weight=None): @wrap_name_default() -def regression_cost(input, label, weight=None, name=None): +@layer_support() +def regression_cost(input, label, weight=None, name=None, + layer_attr=None): """ Regression Layer. @@ -2814,12 +2816,15 @@ def regression_cost(input, label, weight=None, name=None): :param weight: The weight affects the cost, namely the scale of cost. It is an optional argument. :type weight: LayerOutput + :param layer_attr: layer's extra attribute. + :type layer_attr: ExtraLayerAttribute :return: LayerOutput object. :rtype: LayerOutput """ ipts, parents = __cost_input__(input, label, weight) - Layer(inputs=ipts, type="square_error", name=name) + Layer(inputs=ipts, type="square_error", name=name, + **ExtraLayerAttribute.to_kwargs(layer_attr)) return LayerOutput(name, LayerType.COST, parents=parents) @@ -2948,7 +2953,8 @@ def conv_operator(img, filter, filter_size, num_filters, @wrap_name_default() -def conv_shift_layer(a, b, name=None): +@layer_support() +def conv_shift_layer(a, b, name=None, layer_attr=None): """ This layer performs cyclic convolution for two input. For example: - a[in]: contains M elements. @@ -2977,6 +2983,8 @@ def conv_shift_layer(a, b, name=None): :type a: LayerOutput :param b: input layer b :type b: LayerOutput + :param layer_attr: layer's extra attribute. + :type layer_attr: ExtraLayerAttribute :return: LayerOutput object. :rtype: LayerOutput """ @@ -2986,6 +2994,7 @@ def conv_shift_layer(a, b, name=None): name=name, type=LayerType.CONV_SHIFT_LAYER, inputs=[a.name, b.name], + **ExtraLayerAttribute.to_kwargs(layer_attr) ) return LayerOutput(name, LayerType.CONV_SHIFT_LAYER, parents=[a, b], @@ -3059,6 +3068,7 @@ def tensor_layer(a, b, size, act=None, name=None, @wrap_param_attr_default() @wrap_bias_attr_default() @wrap_act_default() +@layer_support() def selective_fc_layer(input, select, size, act=None, name=None, pass_generation=False, has_selected_colums=True, @@ -3131,7 +3141,8 @@ def selective_fc_layer(input, select, size, act=None, name=None, @wrap_name_default() -def sampling_id_layer(input, name=None): +@layer_support() +def sampling_id_layer(input, name=None, layer_attr=None): """ A layer for sampling id from multinomial distribution from the input layer. Sampling one id for one sample. @@ -3146,6 +3157,8 @@ def sampling_id_layer(input, name=None): :type input: LayerOutput :param name: The Layer Name. :type name: basestring + :param layer_attr: Extra Layer config. + :type layer_attr: ExtraLayerAttribute|None :return: LayerOutput object. :rtype: LayerOutput """ @@ -3153,12 +3166,15 @@ def sampling_id_layer(input, name=None): name=name, type=LayerType.SAMPLING_ID_LAYER, inputs=[Input(input.name)], + **ExtraLayerAttribute.to_kwargs(layer_attr) ) return LayerOutput(name, LayerType.SAMPLING_ID_LAYER, input) @wrap_name_default() -def slope_intercept_layer(input, name=None, slope=1.0, intercept=0.0): +@layer_support() +def slope_intercept_layer(input, name=None, slope=1.0, intercept=0.0, + layer_attr=None): """ This layer for applying a slope and an intercept to the input element-wise. There is no activation and weight. @@ -3180,6 +3196,8 @@ def slope_intercept_layer(input, name=None, slope=1.0, intercept=0.0): :type slope: float. :param intercept: the offset. :type intercept: float. + :param layer_attr: Extra Layer config. + :type layer_attr: ExtraLayerAttribute|None :return: LayerOutput object. :rtype: LayerOutput """ @@ -3189,12 +3207,15 @@ def slope_intercept_layer(input, name=None, slope=1.0, intercept=0.0): slope=slope, intercept=intercept, inputs=[Input(input.name)], + **ExtraLayerAttribute.to_kwargs(layer_attr) ) return LayerOutput(name, LayerType.SLOPE_INTERCEPT_LAYER, input) @wrap_name_default() -def linear_comb_layer(weights, vectors, size=None, name=None): +@layer_support() +def linear_comb_layer(weights, vectors, size=None, name=None, + layer_attr=None): """ A layer for weighted sum of vectors takes two inputs. - Input: size of weights is M @@ -3235,6 +3256,8 @@ def linear_comb_layer(weights, vectors, size=None, name=None): :type size: int :param name: The Layer Name. :type name: basestring + :param layer_attr: Extra Layer config. + :type layer_attr: ExtraLayerAttribute|None :return: LayerOutput object. :rtype: LayerOutput """ @@ -3250,6 +3273,7 @@ def linear_comb_layer(weights, vectors, size=None, name=None): type=LayerType.LINEAR_COMBINATION_LAYER, size=size, inputs=[Input(weights.name), Input(vectors.name)], + **ExtraLayerAttribute.to_kwargs(layer_attr) ) return LayerOutput(name, LayerType.LINEAR_COMBINATION_LAYER, [weights, vectors], size=size) @@ -3259,6 +3283,7 @@ convex_comb_layer = linear_comb_layer @wrap_name_default() +@layer_support() def block_expand_layer(input, channel=0, block_x=0, @@ -3267,7 +3292,8 @@ def block_expand_layer(input, stride_y=0, padding_x=0, padding_y=0, - name=None): + name=None, + layer_attr=None): """ Expand feature map to minibatch matrix. - matrix width is: block_y * block_x * channel @@ -3314,6 +3340,8 @@ def block_expand_layer(input, :type padding_y: int :param name: The name of this layer, which can not specify. :type name: None|basestring. + :param layer_attr: Extra Layer config. + :type layer_attr: ExtraLayerAttribute|None :return: LayerOutput object. :rtype: LayerOutput """ @@ -3328,13 +3356,16 @@ def block_expand_layer(input, padding_y=padding_y) ), type=LayerType.BLOCK_EXPAND, + **ExtraLayerAttribute.to_kwargs(layer_attr) ) return LayerOutput(name, LayerType.BLOCK_EXPAND, parents=[input]) @wrap_name_default() -def ctc_layer(input, label, size=None, name=None, norm_by_times=False): +@layer_support() +def ctc_layer(input, label, size=None, name=None, norm_by_times=False, + layer_attr=None): """ Connectionist Temporal Classification (CTC) is designed for temporal classication task. That is, for sequence labeling problems where the @@ -3371,6 +3402,8 @@ def ctc_layer(input, label, size=None, name=None, norm_by_times=False): :type name: basestring|None :param norm_by_times: Whether to normalization by times. False by default. :type norm_by_times: bool + :param layer_attr: Extra Layer config. + :type layer_attr: ExtraLayerAttribute|None :return: LayerOutput object. :rtype: LayerOutput """ @@ -3386,14 +3419,17 @@ def ctc_layer(input, label, size=None, name=None, norm_by_times=False): type=LayerType.CTC_LAYER, size=size, norm_by_times=norm_by_times, - inputs=[input.name, label.name] + inputs=[input.name, label.name], + **ExtraLayerAttribute.to_kwargs(layer_attr) ) return LayerOutput(name, LayerType.CTC_LAYER, [input, label], size=size) @wrap_name_default() @wrap_param_attr_default() -def crf_layer(input, label, size=None, weight=None, param_attr=None, name=None): +@layer_support() +def crf_layer(input, label, size=None, weight=None, param_attr=None, name=None, + layer_attr=None): """ A layer for calculating the cost of sequential conditional random field model. @@ -3419,6 +3455,8 @@ def crf_layer(input, label, size=None, weight=None, param_attr=None, name=None): :type param_attr: ParameterAttribute :param name: The name of this layers. It is not necessary. :type name: None|basestring + :param layer_attr: Extra Layer config. + :type layer_attr: ExtraLayerAttribute|None :return: LayerOutput object. :rtype: LayerOutput """ @@ -3442,6 +3480,7 @@ def crf_layer(input, label, size=None, weight=None, param_attr=None, name=None): type=LayerType.CRF_LAYER, size=size, inputs=ipts, + **ExtraLayerAttribute.to_kwargs(layer_attr) ) parents = [input, label] if weight is not None: @@ -3451,7 +3490,9 @@ def crf_layer(input, label, size=None, weight=None, param_attr=None, name=None): @wrap_name_default() @wrap_param_attr_default() -def crf_decoding_layer(input, size, label=None, param_attr=None, name=None): +@layer_support() +def crf_decoding_layer(input, size, label=None, param_attr=None, name=None, + layer_attr=None): """ A layer for calculating the decoding sequence of sequential conditional random field model. The decoding sequence is stored in output.ids. @@ -3469,6 +3510,8 @@ def crf_decoding_layer(input, size, label=None, param_attr=None, name=None): :type param_attr: ParameterAttribute :param name: The name of this layers. It is not necessary. :type name: None|basestring + :param layer_attr: Extra Layer config. + :type layer_attr: ExtraLayerAttribute|None :return: LayerOutput object. :rtype: LayerOutput """ @@ -3485,6 +3528,7 @@ def crf_decoding_layer(input, size, label=None, param_attr=None, name=None): type=LayerType.CRF_DECODING_LAYER, size=size, inputs=ipts, + **ExtraLayerAttribute.to_kwargs(layer_attr) ) parents = [input] if label is not None: @@ -3575,7 +3619,8 @@ following are cost Layers. @wrap_name_default() -def rank_cost(left, right, label, weight=None, name=None, coeff=1.0): +@layer_support() +def rank_cost(left, right, label, weight=None, name=None, coeff=1.0, layer_attr=None): """ A cost Layer for learning to rank using gradient descent. Details can refer to `papers