fix bug in dotmul_operator's api and anotation (#99)

* fix bug in dotmul_operator's api and anotation * update rnn document * remove redundant info of projection and operator in layers.py

fix bug in dotmul_operator's api and anotation (#99)
* fix bug in dotmul_operator's api and anotation * update rnn document * remove redundant info of projection and operator in layers.py
8e957df4 · luotao1 · emailweixu · 98bc889c · 8e957df4 · 8e957df4
4 changed file
--- a/doc/algorithm/rnn/rnn.rst
+++ b/doc/algorithm/rnn/rnn.rst
@@ -142,12 +142,15 @@ We also project the encoder vector to :code:`decoder_size` dimensional space, ge
 The decoder uses :code:`recurrent_group` to define the recurrent neural network. The step and output functions are defined in :code:`gru_decoder_with_attention`:

 .. code-block:: python
-
+    group_inputs=[StaticInput(input=encoded_vector,is_seq=True),
+                  StaticInput(input=encoded_proj,is_seq=True)]
    trg_embedding = embedding_layer(
        input=data_layer(name='target_language_word',
                         size=target_dict_dim),
        size=word_vector_dim,
        param_attr=ParamAttr(name='_target_language_embedding'))
+    group_inputs.append(trg_embedding)
+
    # For decoder equipped with attention mechanism, in training,
    # target embedding (the groudtruth) is the data input,
    # while encoded source sequence is accessed to as an unbounded memory.
@@ -156,13 +159,7 @@ The decoder uses :code:`recurrent_group` to define the recurrent neural network.
    # All sequence inputs should have the same length.
    decoder = recurrent_group(name=decoder_group_name,
                              step=gru_decoder_with_attention,
-                              input=[
-                                  StaticInput(input=encoded_vector,
-                                              is_seq=True),
-                                  StaticInput(input=encoded_proj,
-                                              is_seq=True),
-                                  trg_embedding
-                              ])
+                              input=group_inputs)


 The implementation of the step function is listed as below. First, it defines the **memory** of the decoder network. Then it defines attention, gated recurrent unit step function, and the output function:
@@ -217,10 +214,8 @@ The code is listed below:

 .. code-block:: python

-    gen_inputs = [StaticInput(input=encoded_vector,
-                              is_seq=True),
-                  StaticInput(input=encoded_proj,
-                              is_seq=True), ]
+    group_inputs=[StaticInput(input=encoded_vector,is_seq=True),
+                  StaticInput(input=encoded_proj,is_seq=True)]
    # In generation, decoder predicts a next target word based on
    # the encoded source sequence and the last generated target word.
    # The encoded source sequence (encoder's output) must be specified by
@@ -231,10 +226,10 @@ The code is listed below:
        size=target_dict_dim,
        embedding_name='_target_language_embedding',
        embedding_size=word_vector_dim)
-    gen_inputs.append(trg_embedding)
+    group_inputs.append(trg_embedding)
    beam_gen = beam_search(name=decoder_group_name,
                           step=gru_decoder_with_attention,
-                           input=gen_inputs,
+                           input=group_inputs,
                           id_input=data_layer(name="sent_id",
                                               size=1),
                           dict_file=trg_dict_path,

--- a/doc/ui/api/trainer_config_helpers/layers.rst
+++ b/doc/ui/api/trainer_config_helpers/layers.rst
@@ -169,6 +169,12 @@ dotmul_projection
    :members: dotmul_projection
    :noindex:

+dotmul_operator
+---------------
+..  automodule:: paddle.trainer_config_helpers.layers
+    :members: dotmul_operator
+    :noindex:
+
 full_matrix_projection
 ----------------------
 ..  automodule:: paddle.trainer_config_helpers.layers

--- a/python/paddle/trainer/config_parser.py
+++ b/python/paddle/trainer/config_parser.py
@@ -2464,11 +2464,11 @@ class MixedLayer(LayerBase):
                if size != 0:
                    self.set_layer_size(size)
            else:
-                size = operator.calc_output_size(operator_conf.input_sizes)
-                if size != 0:
-                    config_assert(size == self.config.size,
+                sz = operator.calc_output_size(operator_conf.input_sizes)
+                if sz != 0:
+                    config_assert(sz == self.config.size,
                                  "different inputs have different size: %s vs. %s" %
-                                  (size, self.config.size))
+                                  (sz, self.config.size))
        for input_index in xrange(len(self.inputs)):
            input_layer = self.get_input_layer(input_index)
            input = self.inputs[input_index]

--- a/python/paddle/trainer_config_helpers/layers.py
+++ b/python/paddle/trainer_config_helpers/layers.py
@@ -286,7 +286,6 @@ def full_matrix_projection(input, size=0, param_attr=None):
                                size=size,
                                **param_attr.attr)
    proj.origin = input
-    proj.origin.projection = "matrix"
    return proj


@@ -333,7 +332,6 @@ def table_projection(input, size=0, param_attr=None):
                           size=size,
                           **param_attr.attr)
    proj.origin = input
-    proj.origin.projection = "table"
    return proj


@@ -377,17 +375,15 @@ def identity_projection(input, offset=None):
    if offset is None:
        proj = IdentityProjection(input_layer_name=input.name)
        proj.origin = input
-        proj.origin.projection = 'identity'
    else:
        proj = IdentityOffsetProjection(input_layer_name=input.name,
                                        offset=offset)
        proj.origin = input
-        proj.origin.projection = 'identity_offset'
    return proj


 @wrap_param_attr_default()
-def dotmul_projection(input, param_attr=None, scale=1):
+def dotmul_projection(input, param_attr=None):
    """
    DotMulProjection with a layer as input.
    It performs element-wise multiplication with weight.
@@ -407,30 +403,35 @@ def dotmul_projection(input, param_attr=None, scale=1):
    :type input: LayerOutput
    :param param_attr: Parameter config, None if use default.
    :type param_attr: ParameterAttribute
-    :param scale: config scalar, default value is one.
-    :type scale: float
    :return: A DotMulProjection Object.
    :rtype: DotMulProjection
    """
    proj = DotMulProjection(input_layer_name=input.name,
-                                size=input.size,
-                                **param_attr.attr)
-    proj.origin = input
+                            size=input.size,
+                            **param_attr.attr)
+    proj.origin = input 
    return proj

 def dotmul_operator(x, y, scale=1):
    """
    DotMulOperator takes two inputs and performs element-wise multiplication:
+
    .. math::
-       out.row[i] += scale * (in1.row[i] .* in2.row[i])
+       out.row[i] += scale * (x.row[i] .* y.row[i])
+
    where :math:`.*` means element-wise multiplication, and
    scale is a config scalar, its default value is one.
+
    The example usage is:
+
    .. code-block:: python
-       op = dotmul_operator(x, y,
-                              scale=1)
-    :param input: Input layer
-    :type input: LayerOutput
+
+       op = dotmul_operator(x=layer1, y=layer2, scale=0.5)
+
+    :param x: Input layer1
+    :type x: LayerOutput
+    :param y: Input layer2
+    :type y: LayerOutput
    :param scale: config scalar, default value is one.
    :type scale: float
    :return: A DotMulOperator Object.
@@ -487,7 +488,6 @@ def context_projection(input, context_len, context_start=None,
                             trainable_padding=trainable,
                             **extra_dict)
    proj.origin = input
-    proj.origin.projection = 'context'
    return proj


@@ -2728,7 +2728,6 @@ def conv_operator(img, filter, filter_size, num_filters,
                                     stride_y=stride_y,
                                     groups=groups))
    op.origin = [img, filter]
-    op.origin.operator = "conv_op"
    return op