api_train_v2.py 3.4 KB
Newer Older
H
Helin Wang 已提交
1 2 3 4 5 6 7 8 9 10
import math

import paddle.v2 as paddle

embsize = 32
hiddensize = 256
N = 5


def wordemb(inlayer):
11
    wordemb = paddle.layer.embedding(
H
Helin Wang 已提交
12 13 14 15 16 17
        input=inlayer,
        size=embsize,
        param_attr=paddle.attr.Param(
            name="_proj",
            initial_std=0.001,
            learning_rate=1,
18 19
            l2_rate=0,
            sparse_update=True))
H
Helin Wang 已提交
20 21 22 23
    return wordemb


def main():
24 25 26 27 28 29 30 31 32 33 34 35 36
    # for local training
    cluster_train = False

    if not cluster_train:
        paddle.init(use_gpu=False, trainer_count=1)
    else:
        paddle.init(
            use_gpu=False,
            trainer_count=1,
            port=7164,
            ports_num=1,
            ports_num_for_sparse=1,
            num_gradient_servers=1)
H
Helin Wang 已提交
37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71
    word_dict = paddle.dataset.imikolov.build_dict()
    dict_size = len(word_dict)
    firstword = paddle.layer.data(
        name="firstw", type=paddle.data_type.integer_value(dict_size))
    secondword = paddle.layer.data(
        name="secondw", type=paddle.data_type.integer_value(dict_size))
    thirdword = paddle.layer.data(
        name="thirdw", type=paddle.data_type.integer_value(dict_size))
    fourthword = paddle.layer.data(
        name="fourthw", type=paddle.data_type.integer_value(dict_size))
    nextword = paddle.layer.data(
        name="fifthw", type=paddle.data_type.integer_value(dict_size))

    Efirst = wordemb(firstword)
    Esecond = wordemb(secondword)
    Ethird = wordemb(thirdword)
    Efourth = wordemb(fourthword)

    contextemb = paddle.layer.concat(input=[Efirst, Esecond, Ethird, Efourth])
    hidden1 = paddle.layer.fc(input=contextemb,
                              size=hiddensize,
                              act=paddle.activation.Sigmoid(),
                              layer_attr=paddle.attr.Extra(drop_rate=0.5),
                              bias_attr=paddle.attr.Param(learning_rate=2),
                              param_attr=paddle.attr.Param(
                                  initial_std=1. / math.sqrt(embsize * 8),
                                  learning_rate=1))
    predictword = paddle.layer.fc(input=hidden1,
                                  size=dict_size,
                                  bias_attr=paddle.attr.Param(learning_rate=2),
                                  act=paddle.activation.Softmax())

    def event_handler(event):
        if isinstance(event, paddle.event.EndIteration):
            if event.batch_id % 100 == 0:
Q
qiaolongfei 已提交
72
                trainer.save_parameter("output", "batch-" + str(event.batch_id))
H
Helin Wang 已提交
73
                result = trainer.test(
74 75
                    paddle.batch(
                        paddle.dataset.imikolov.test(word_dict, N), 32))
H
Helin Wang 已提交
76 77 78 79 80
                print "Pass %d, Batch %d, Cost %f, %s, Testing metrics %s" % (
                    event.pass_id, event.batch_id, event.cost, event.metrics,
                    result.metrics)

    cost = paddle.layer.classification_cost(input=predictword, label=nextword)
81

H
Helin Wang 已提交
82
    parameters = paddle.parameters.create(cost)
83
    adagrad = paddle.optimizer.AdaGrad(
H
Helin Wang 已提交
84 85
        learning_rate=3e-3,
        regularization=paddle.optimizer.L2Regularization(8e-4))
86 87 88 89
    trainer = paddle.trainer.SGD(cost,
                                 parameters,
                                 adagrad,
                                 is_local=not cluster_train)
H
Helin Wang 已提交
90
    trainer.train(
91
        paddle.batch(paddle.dataset.imikolov.train(word_dict, N), 32),
H
Helin Wang 已提交
92
        num_passes=30,
93
        event_handler=event_handler)
H
Helin Wang 已提交
94 95 96 97


if __name__ == '__main__':
    main()