train.py 3.8 KB
Newer Older
1 2
import math
import os
H
Helin Wang 已提交
3

4
import numpy
H
Helin Wang 已提交
5 6
import paddle.v2 as paddle

D
dzhwinter 已提交
7 8
with_gpu = os.getenv('WITH_GPU', '0') != '0'

H
Helin Wang 已提交
9 10 11 12 13 14 15 16 17 18
embsize = 32
hiddensize = 256
N = 5


def wordemb(inlayer):
    wordemb = paddle.layer.table_projection(
        input=inlayer,
        size=embsize,
        param_attr=paddle.attr.Param(
19
            name="_proj", initial_std=0.001, learning_rate=1, l2_rate=0))
H
Helin Wang 已提交
20 21 22
    return wordemb


23 24 25 26 27 28
# save and load word dict and embedding table
def save_dict_and_embedding(word_dict, embeddings):
    with open("word_dict", "w") as f:
        for key in word_dict:
            f.write(key + " " + str(word_dict[key]) + "\n")
    with open("embedding_table", "w") as f:
29
        numpy.savetxt(f, embeddings, delimiter=',', newline='\n')
30 31 32 33 34 35 36


def load_dict_and_embedding():
    word_dict = dict()
    with open("word_dict", "r") as f:
        for line in f:
            key, value = line.strip().split(" ")
Q
qiaolongfei 已提交
37
            word_dict[key] = int(value)
38 39

    embeddings = numpy.loadtxt("embedding_table", delimiter=",")
40 41 42
    return word_dict, embeddings


H
Helin Wang 已提交
43
def main():
D
dzhwinter 已提交
44
    paddle.init(use_gpu=with_gpu, trainer_count=3)
H
Helin Wang 已提交
45 46
    word_dict = paddle.dataset.imikolov.build_dict()
    dict_size = len(word_dict)
47
    # Every layer takes integer value of range [0, dict_size)
H
Helin Wang 已提交
48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64
    firstword = paddle.layer.data(
        name="firstw", type=paddle.data_type.integer_value(dict_size))
    secondword = paddle.layer.data(
        name="secondw", type=paddle.data_type.integer_value(dict_size))
    thirdword = paddle.layer.data(
        name="thirdw", type=paddle.data_type.integer_value(dict_size))
    fourthword = paddle.layer.data(
        name="fourthw", type=paddle.data_type.integer_value(dict_size))
    nextword = paddle.layer.data(
        name="fifthw", type=paddle.data_type.integer_value(dict_size))

    Efirst = wordemb(firstword)
    Esecond = wordemb(secondword)
    Ethird = wordemb(thirdword)
    Efourth = wordemb(fourthword)

    contextemb = paddle.layer.concat(input=[Efirst, Esecond, Ethird, Efourth])
65 66 67 68 69 70 71 72 73 74 75 76 77
    hidden1 = paddle.layer.fc(
        input=contextemb,
        size=hiddensize,
        act=paddle.activation.Sigmoid(),
        layer_attr=paddle.attr.Extra(drop_rate=0.5),
        bias_attr=paddle.attr.Param(learning_rate=2),
        param_attr=paddle.attr.Param(
            initial_std=1. / math.sqrt(embsize * 8), learning_rate=1))
    predictword = paddle.layer.fc(
        input=hidden1,
        size=dict_size,
        bias_attr=paddle.attr.Param(learning_rate=2),
        act=paddle.activation.Softmax())
H
Helin Wang 已提交
78

79 80 81 82 83 84 85
    cost = paddle.layer.classification_cost(input=predictword, label=nextword)
    parameters = paddle.parameters.create(cost)
    adagrad = paddle.optimizer.AdaGrad(
        learning_rate=3e-3,
        regularization=paddle.optimizer.L2Regularization(8e-4))
    trainer = paddle.trainer.SGD(cost, parameters, adagrad)

H
Helin Wang 已提交
86 87 88
    def event_handler(event):
        if isinstance(event, paddle.event.EndIteration):
            if event.batch_id % 100 == 0:
89 90 91 92 93 94 95 96 97
                print "Pass %d, Batch %d, Cost %f, %s" % (
                    event.pass_id, event.batch_id, event.cost, event.metrics)

        if isinstance(event, paddle.event.EndPass):
            result = trainer.test(
                paddle.batch(paddle.dataset.imikolov.test(word_dict, N), 32))
            print "Pass %d, Testing metrics %s" % (event.pass_id,
                                                   result.metrics)
            with open("model_%d.tar" % event.pass_id, 'w') as f:
98
                trainer.save_parameter_to_tar(f)
P
Peng Li 已提交
99

H
Helin Wang 已提交
100 101
    trainer.train(
        paddle.batch(paddle.dataset.imikolov.train(word_dict, N), 32),
102
        num_passes=100,
H
Helin Wang 已提交
103 104
        event_handler=event_handler)

105 106 107 108
    # save word dict and embedding table
    embeddings = parameters.get("_proj").reshape(len(word_dict), embsize)
    save_dict_and_embedding(word_dict, embeddings)

H
Helin Wang 已提交
109 110 111

if __name__ == '__main__':
    main()