network_conf.py 4.4 KB
Newer Older
C
caoying03 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129
import paddle.v2 as paddle
import sys
import gzip


def seq2seq_net(source_dict_dim,
                target_dict_dim,
                word_vector_dim=620,
                rnn_hidden_size=1000,
                beam_size=1,
                max_length=50,
                is_generating=False):
    """
    Define the network structure of NMT, including encoder and decoder.

    :param source_dict_dim: size of source dictionary
    :type source_dict_dim : int
    :param target_dict_dim: size of target dictionary
    :type target_dict_dim: int
    :param word_vector_dim: size of source language word embedding
    :type word_vector_dim: int
    :param rnn_hidden_size: size of hidden state of encoder and decoder RNN
    :type rnn_hidden_size: int
    :param beam_size: expansion width in each step when generating
    :type beam_size: int
    :param max_length: max iteration number in generation
    :type max_length: int
    :param generating: whether to generate sequence or to train
    :type generating: bool
    """

    decoder_size = encoder_size = rnn_hidden_size

    src_word_id = paddle.layer.data(
        name="source_language_word",
        type=paddle.data_type.integer_value_sequence(source_dict_dim))
    src_embedding = paddle.layer.embedding(
        input=src_word_id, size=word_vector_dim)

    # use bidirectional_gru as the encoder
    encoded_vector = paddle.networks.bidirectional_gru(
        input=src_embedding,
        size=encoder_size,
        fwd_act=paddle.activation.Tanh(),
        fwd_gate_act=paddle.activation.Sigmoid(),
        bwd_act=paddle.activation.Tanh(),
        bwd_gate_act=paddle.activation.Sigmoid(),
        return_seq=True)
    #### Decoder
    encoder_last = paddle.layer.last_seq(input=encoded_vector)
    encoder_last_projected = paddle.layer.fc(
        size=decoder_size, act=paddle.activation.Tanh(), input=encoder_last)

    # gru step
    def gru_decoder_without_attention(enc_vec, current_word):
        """
        Step function for gru decoder

        :param enc_vec: encoded vector of source language
        :type enc_vec: layer object
        :param current_word: current input of decoder
        :type current_word: layer object
        """
        decoder_mem = paddle.layer.memory(
            name="gru_decoder",
            size=decoder_size,
            boot_layer=encoder_last_projected)

        context = paddle.layer.last_seq(input=enc_vec)

        decoder_inputs = paddle.layer.fc(
            size=decoder_size * 3, input=[context, current_word])

        gru_step = paddle.layer.gru_step(
            name="gru_decoder",
            act=paddle.activation.Tanh(),
            gate_act=paddle.activation.Sigmoid(),
            input=decoder_inputs,
            output_mem=decoder_mem,
            size=decoder_size)

        out = paddle.layer.fc(
            size=target_dict_dim,
            bias_attr=True,
            act=paddle.activation.Softmax(),
            input=gru_step)
        return out

    group_input1 = paddle.layer.StaticInput(input=encoded_vector)
    group_inputs = [group_input1]

    decoder_group_name = "decoder_group"
    if is_generating:
        trg_embedding = paddle.layer.GeneratedInput(
            size=target_dict_dim,
            embedding_name="_target_language_embedding",
            embedding_size=word_vector_dim)
        group_inputs.append(trg_embedding)

        beam_gen = paddle.layer.beam_search(
            name=decoder_group_name,
            step=gru_decoder_without_attention,
            input=group_inputs,
            bos_id=0,
            eos_id=1,
            beam_size=beam_size,
            max_length=max_length)

        return beam_gen
    else:
        trg_embedding = paddle.layer.embedding(
            input=paddle.layer.data(
                name="target_language_word",
                type=paddle.data_type.integer_value_sequence(target_dict_dim)),
            size=word_vector_dim,
            param_attr=paddle.attr.ParamAttr(name="_target_language_embedding"))
        group_inputs.append(trg_embedding)

        decoder = paddle.layer.recurrent_group(
            name=decoder_group_name,
            step=gru_decoder_without_attention,
            input=group_inputs)

        lbl = paddle.layer.data(
            name="target_language_next_word",
            type=paddle.data_type.integer_value_sequence(target_dict_dim))
        cost = paddle.layer.classification_cost(input=decoder, label=lbl)

        return cost