test_machine_translation.py 3.8 KB
Newer Older
Y
Yan Chunwei 已提交
1 2
import numpy as np
import paddle.v2 as paddle
Q
Qiao Longfei 已提交
3
import paddle.v2.fluid as fluid
Y
Yan Chunwei 已提交
4 5 6
import paddle.v2.fluid.core as core
import paddle.v2.fluid.framework as framework
import paddle.v2.fluid.layers as layers
Q
Qiao Longfei 已提交
7
from paddle.v2.fluid.executor import Executor
Y
Yan Chunwei 已提交
8 9 10 11

dict_size = 30000
source_dict_dim = target_dict_dim = dict_size
src_dict, trg_dict = paddle.dataset.wmt14.get_dict(dict_size)
Q
Qiao Longfei 已提交
12 13
hidden_dim = 32
word_dim = 16
Y
Yan Chunwei 已提交
14
IS_SPARSE = True
Q
Qiao Longfei 已提交
15
batch_size = 10
Y
Yan Chunwei 已提交
16 17 18 19
max_length = 50
topk_size = 50
trg_dic_size = 10000

Q
Qiao Longfei 已提交
20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35
decoder_size = hidden_dim


def encoder_decoder():
    # encoder
    src_word_id = layers.data(
        name="src_word_id", shape=[1], dtype='int64', lod_level=1)
    src_embedding = layers.embedding(
        input=src_word_id,
        size=[dict_size, word_dim],
        dtype='float32',
        is_sparse=IS_SPARSE,
        param_attr=fluid.ParamAttr(name='vemb'))

    fc1 = fluid.layers.fc(input=src_embedding, size=hidden_dim * 4, act='tanh')
    lstm_hidden0, lstm_0 = layers.dynamic_lstm(input=fc1, size=hidden_dim * 4)
36
    encoder_out = layers.sequence_last_step(input=lstm_hidden0)
Q
Qiao Longfei 已提交
37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59

    # decoder
    trg_language_word = layers.data(
        name="target_language_word", shape=[1], dtype='int64', lod_level=1)
    trg_embedding = layers.embedding(
        input=trg_language_word,
        size=[dict_size, word_dim],
        dtype='float32',
        is_sparse=IS_SPARSE,
        param_attr=fluid.ParamAttr(name='vemb'))

    rnn = fluid.layers.DynamicRNN()
    with rnn.block():
        current_word = rnn.step_input(trg_embedding)
        mem = rnn.memory(init=encoder_out)
        fc1 = fluid.layers.fc(input=[current_word, mem],
                              size=decoder_size,
                              act='tanh')
        out = fluid.layers.fc(input=fc1, size=target_dict_dim, act='softmax')
        rnn.update_memory(mem, fc1)
        rnn.output(out)

    return rnn()
Y
Yan Chunwei 已提交
60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77


def to_lodtensor(data, place):
    seq_lens = [len(seq) for seq in data]
    cur_len = 0
    lod = [cur_len]
    for l in seq_lens:
        cur_len += l
        lod.append(cur_len)
    flattened_data = np.concatenate(data, axis=0).astype("int64")
    flattened_data = flattened_data.reshape([len(flattened_data), 1])
    res = core.LoDTensor()
    res.set(flattened_data, place)
    res.set_lod([lod])
    return res


def main():
Q
Qiao Longfei 已提交
78 79 80 81 82 83 84 85
    rnn_out = encoder_decoder()
    label = layers.data(
        name="target_language_next_word", shape=[1], dtype='int64', lod_level=1)
    cost = layers.cross_entropy(input=rnn_out, label=label)
    avg_cost = fluid.layers.mean(x=cost)

    optimizer = fluid.optimizer.Adagrad(learning_rate=1e-4)
    optimizer.minimize(avg_cost)
Y
Yan Chunwei 已提交
86 87 88

    train_data = paddle.batch(
        paddle.reader.shuffle(
Q
Qiao Longfei 已提交
89
            paddle.dataset.wmt14.train(dict_size), buf_size=1000),
Y
Yan Chunwei 已提交
90 91 92 93 94 95 96 97 98 99 100
        batch_size=batch_size)

    place = core.CPUPlace()
    exe = Executor(place)

    exe.run(framework.default_startup_program())

    batch_id = 0
    for pass_id in xrange(2):
        for data in train_data():
            word_data = to_lodtensor(map(lambda x: x[0], data), place)
Q
Qiao Longfei 已提交
101 102
            trg_word = to_lodtensor(map(lambda x: x[1], data), place)
            trg_word_next = to_lodtensor(map(lambda x: x[2], data), place)
Y
Yan Chunwei 已提交
103
            outs = exe.run(framework.default_main_program(),
Q
Qiao Longfei 已提交
104 105 106 107 108 109 110 111 112 113 114 115
                           feed={
                               'src_word_id': word_data,
                               'target_language_word': trg_word,
                               'target_language_next_word': trg_word_next
                           },
                           fetch_list=[avg_cost])
            avg_cost_val = np.array(outs[0])
            print('pass_id=' + str(pass_id) + ' batch=' + str(batch_id) +
                  " avg_cost=" + str(avg_cost_val))
            if batch_id > 3:
                exit(0)
            batch_id += 1
Y
Yan Chunwei 已提交
116 117 118 119


if __name__ == '__main__':
    main()