From 8e957df4b2f74cdf6587f4877c26de6d71894d6d Mon Sep 17 00:00:00 2001 From: luotao1 Date: Thu, 22 Sep 2016 12:31:31 +0800 Subject: [PATCH] fix bug in dotmul_operator's api and anotation (#99) * fix bug in dotmul_operator's api and anotation * update rnn document * remove redundant info of projection and operator in layers.py --- doc/algorithm/rnn/rnn.rst | 23 +++++-------- doc/ui/api/trainer_config_helpers/layers.rst | 6 ++++ python/paddle/trainer/config_parser.py | 8 ++--- .../paddle/trainer_config_helpers/layers.py | 33 +++++++++---------- 4 files changed, 35 insertions(+), 35 deletions(-) diff --git a/doc/algorithm/rnn/rnn.rst b/doc/algorithm/rnn/rnn.rst index 9653ddbf371..4753db450b7 100644 --- a/doc/algorithm/rnn/rnn.rst +++ b/doc/algorithm/rnn/rnn.rst @@ -142,12 +142,15 @@ We also project the encoder vector to :code:`decoder_size` dimensional space, ge The decoder uses :code:`recurrent_group` to define the recurrent neural network. The step and output functions are defined in :code:`gru_decoder_with_attention`: .. code-block:: python - + group_inputs=[StaticInput(input=encoded_vector,is_seq=True), + StaticInput(input=encoded_proj,is_seq=True)] trg_embedding = embedding_layer( input=data_layer(name='target_language_word', size=target_dict_dim), size=word_vector_dim, param_attr=ParamAttr(name='_target_language_embedding')) + group_inputs.append(trg_embedding) + # For decoder equipped with attention mechanism, in training, # target embedding (the groudtruth) is the data input, # while encoded source sequence is accessed to as an unbounded memory. @@ -156,13 +159,7 @@ The decoder uses :code:`recurrent_group` to define the recurrent neural network. # All sequence inputs should have the same length. decoder = recurrent_group(name=decoder_group_name, step=gru_decoder_with_attention, - input=[ - StaticInput(input=encoded_vector, - is_seq=True), - StaticInput(input=encoded_proj, - is_seq=True), - trg_embedding - ]) + input=group_inputs) The implementation of the step function is listed as below. First, it defines the **memory** of the decoder network. Then it defines attention, gated recurrent unit step function, and the output function: @@ -217,10 +214,8 @@ The code is listed below: .. code-block:: python - gen_inputs = [StaticInput(input=encoded_vector, - is_seq=True), - StaticInput(input=encoded_proj, - is_seq=True), ] + group_inputs=[StaticInput(input=encoded_vector,is_seq=True), + StaticInput(input=encoded_proj,is_seq=True)] # In generation, decoder predicts a next target word based on # the encoded source sequence and the last generated target word. # The encoded source sequence (encoder's output) must be specified by @@ -231,10 +226,10 @@ The code is listed below: size=target_dict_dim, embedding_name='_target_language_embedding', embedding_size=word_vector_dim) - gen_inputs.append(trg_embedding) + group_inputs.append(trg_embedding) beam_gen = beam_search(name=decoder_group_name, step=gru_decoder_with_attention, - input=gen_inputs, + input=group_inputs, id_input=data_layer(name="sent_id", size=1), dict_file=trg_dict_path, diff --git a/doc/ui/api/trainer_config_helpers/layers.rst b/doc/ui/api/trainer_config_helpers/layers.rst index f902d1c995b..c1d7a7ce815 100644 --- a/doc/ui/api/trainer_config_helpers/layers.rst +++ b/doc/ui/api/trainer_config_helpers/layers.rst @@ -169,6 +169,12 @@ dotmul_projection :members: dotmul_projection :noindex: +dotmul_operator +--------------- +.. automodule:: paddle.trainer_config_helpers.layers + :members: dotmul_operator + :noindex: + full_matrix_projection ---------------------- .. automodule:: paddle.trainer_config_helpers.layers diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index 79b85214228..a57e9065c6f 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -2464,11 +2464,11 @@ class MixedLayer(LayerBase): if size != 0: self.set_layer_size(size) else: - size = operator.calc_output_size(operator_conf.input_sizes) - if size != 0: - config_assert(size == self.config.size, + sz = operator.calc_output_size(operator_conf.input_sizes) + if sz != 0: + config_assert(sz == self.config.size, "different inputs have different size: %s vs. %s" % - (size, self.config.size)) + (sz, self.config.size)) for input_index in xrange(len(self.inputs)): input_layer = self.get_input_layer(input_index) input = self.inputs[input_index] diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 9963b381340..8b7cabf2fad 100644 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -286,7 +286,6 @@ def full_matrix_projection(input, size=0, param_attr=None): size=size, **param_attr.attr) proj.origin = input - proj.origin.projection = "matrix" return proj @@ -333,7 +332,6 @@ def table_projection(input, size=0, param_attr=None): size=size, **param_attr.attr) proj.origin = input - proj.origin.projection = "table" return proj @@ -377,17 +375,15 @@ def identity_projection(input, offset=None): if offset is None: proj = IdentityProjection(input_layer_name=input.name) proj.origin = input - proj.origin.projection = 'identity' else: proj = IdentityOffsetProjection(input_layer_name=input.name, offset=offset) proj.origin = input - proj.origin.projection = 'identity_offset' return proj @wrap_param_attr_default() -def dotmul_projection(input, param_attr=None, scale=1): +def dotmul_projection(input, param_attr=None): """ DotMulProjection with a layer as input. It performs element-wise multiplication with weight. @@ -407,30 +403,35 @@ def dotmul_projection(input, param_attr=None, scale=1): :type input: LayerOutput :param param_attr: Parameter config, None if use default. :type param_attr: ParameterAttribute - :param scale: config scalar, default value is one. - :type scale: float :return: A DotMulProjection Object. :rtype: DotMulProjection """ proj = DotMulProjection(input_layer_name=input.name, - size=input.size, - **param_attr.attr) - proj.origin = input + size=input.size, + **param_attr.attr) + proj.origin = input return proj def dotmul_operator(x, y, scale=1): """ DotMulOperator takes two inputs and performs element-wise multiplication: + .. math:: - out.row[i] += scale * (in1.row[i] .* in2.row[i]) + out.row[i] += scale * (x.row[i] .* y.row[i]) + where :math:`.*` means element-wise multiplication, and scale is a config scalar, its default value is one. + The example usage is: + .. code-block:: python - op = dotmul_operator(x, y, - scale=1) - :param input: Input layer - :type input: LayerOutput + + op = dotmul_operator(x=layer1, y=layer2, scale=0.5) + + :param x: Input layer1 + :type x: LayerOutput + :param y: Input layer2 + :type y: LayerOutput :param scale: config scalar, default value is one. :type scale: float :return: A DotMulOperator Object. @@ -487,7 +488,6 @@ def context_projection(input, context_len, context_start=None, trainable_padding=trainable, **extra_dict) proj.origin = input - proj.origin.projection = 'context' return proj @@ -2728,7 +2728,6 @@ def conv_operator(img, filter, filter_size, num_filters, stride_y=stride_y, groups=groups)) op.origin = [img, filter] - op.origin.operator = "conv_op" return op -- GitLab