network_conf.py 4.5 KB
Newer Older
C
caoying03 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50
import paddle.v2 as paddle
import sys
import gzip


def seq2seq_net(source_dict_dim,
                target_dict_dim,
                word_vector_dim=620,
                rnn_hidden_size=1000,
                beam_size=1,
                max_length=50,
                is_generating=False):
    """
    Define the network structure of NMT, including encoder and decoder.

    :param source_dict_dim: size of source dictionary
    :type source_dict_dim : int
    :param target_dict_dim: size of target dictionary
    :type target_dict_dim: int
    :param word_vector_dim: size of source language word embedding
    :type word_vector_dim: int
    :param rnn_hidden_size: size of hidden state of encoder and decoder RNN
    :type rnn_hidden_size: int
    :param beam_size: expansion width in each step when generating
    :type beam_size: int
    :param max_length: max iteration number in generation
    :type max_length: int
    :param generating: whether to generate sequence or to train
    :type generating: bool
    """

    decoder_size = encoder_size = rnn_hidden_size

    src_word_id = paddle.layer.data(
        name="source_language_word",
        type=paddle.data_type.integer_value_sequence(source_dict_dim))
    src_embedding = paddle.layer.embedding(
        input=src_word_id, size=word_vector_dim)

    # use bidirectional_gru as the encoder
    encoded_vector = paddle.networks.bidirectional_gru(
        input=src_embedding,
        size=encoder_size,
        fwd_act=paddle.activation.Tanh(),
        fwd_gate_act=paddle.activation.Sigmoid(),
        bwd_act=paddle.activation.Tanh(),
        bwd_gate_act=paddle.activation.Sigmoid(),
        return_seq=True)
    #### Decoder
    encoder_last = paddle.layer.last_seq(input=encoded_vector)
51 52 53
    encoder_last_projected = paddle.layer.fc(size=decoder_size,
                                             act=paddle.activation.Tanh(),
                                             input=encoder_last)
C
caoying03 已提交
54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71

    # gru step
    def gru_decoder_without_attention(enc_vec, current_word):
        """
        Step function for gru decoder

        :param enc_vec: encoded vector of source language
        :type enc_vec: layer object
        :param current_word: current input of decoder
        :type current_word: layer object
        """
        decoder_mem = paddle.layer.memory(
            name="gru_decoder",
            size=decoder_size,
            boot_layer=encoder_last_projected)

        context = paddle.layer.last_seq(input=enc_vec)

72 73
        decoder_inputs = paddle.layer.fc(size=decoder_size * 3,
                                         input=[context, current_word])
C
caoying03 已提交
74 75 76 77 78 79 80 81 82

        gru_step = paddle.layer.gru_step(
            name="gru_decoder",
            act=paddle.activation.Tanh(),
            gate_act=paddle.activation.Sigmoid(),
            input=decoder_inputs,
            output_mem=decoder_mem,
            size=decoder_size)

83 84 85 86
        out = paddle.layer.fc(size=target_dict_dim,
                              bias_attr=True,
                              act=paddle.activation.Softmax(),
                              input=gru_step)
C
caoying03 已提交
87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129
        return out

    group_input1 = paddle.layer.StaticInput(input=encoded_vector)
    group_inputs = [group_input1]

    decoder_group_name = "decoder_group"
    if is_generating:
        trg_embedding = paddle.layer.GeneratedInput(
            size=target_dict_dim,
            embedding_name="_target_language_embedding",
            embedding_size=word_vector_dim)
        group_inputs.append(trg_embedding)

        beam_gen = paddle.layer.beam_search(
            name=decoder_group_name,
            step=gru_decoder_without_attention,
            input=group_inputs,
            bos_id=0,
            eos_id=1,
            beam_size=beam_size,
            max_length=max_length)

        return beam_gen
    else:
        trg_embedding = paddle.layer.embedding(
            input=paddle.layer.data(
                name="target_language_word",
                type=paddle.data_type.integer_value_sequence(target_dict_dim)),
            size=word_vector_dim,
            param_attr=paddle.attr.ParamAttr(name="_target_language_embedding"))
        group_inputs.append(trg_embedding)

        decoder = paddle.layer.recurrent_group(
            name=decoder_group_name,
            step=gru_decoder_without_attention,
            input=group_inputs)

        lbl = paddle.layer.data(
            name="target_language_next_word",
            type=paddle.data_type.integer_value_sequence(target_dict_dim))
        cost = paddle.layer.classification_cost(input=decoder, label=lbl)

        return cost