train.py 4.0 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14
#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

15 16
from __future__ import print_function

17 18 19 20 21 22 23
import paddle.fluid as fluid
import numpy
import sys

TRAIN_FILES = ['train.recordio']
TEST_FILES = ['test.recordio']

24
DICT_DIM = 5147
25 26 27 28 29 30 31 32 33 34

# embedding dim
emb_dim = 128

# hidden dim
hid_dim = 128

# class num
class_dim = 2

35 36
# epoch num
epoch_num = 10
37 38


39 40 41 42 43
def build_program(is_train):
    file_obj_handle = fluid.layers.io.open_files(
        filenames=TRAIN_FILES if is_train else TEST_FILES,
        shapes=[[-1, 1], [-1, 1]],
        lod_levels=[1, 0],
F
fengjiayi 已提交
44 45 46
        dtypes=['int64', 'int64'])

    file_obj = fluid.layers.io.double_buffer(file_obj_handle)
47

48
    with fluid.unique_name.guard():
49 50 51 52 53 54 55 56

        data, label = fluid.layers.read_file(file_obj)

        emb = fluid.layers.embedding(input=data, size=[DICT_DIM, emb_dim])

        conv_3 = fluid.nets.sequence_conv_pool(
            input=emb,
            num_filters=hid_dim,
F
fengjiayi 已提交
57
            filter_size=3,
58
            act="tanh",
F
fengjiayi 已提交
59
            pool_type="sqrt")
60

F
fengjiayi 已提交
61 62 63 64 65 66
        conv_4 = fluid.nets.sequence_conv_pool(
            input=emb,
            num_filters=hid_dim,
            filter_size=4,
            act="tanh",
            pool_type="sqrt")
67

F
fengjiayi 已提交
68
        prediction = fluid.layers.fc(input=[conv_3, conv_4],
69 70
                                     size=class_dim,
                                     act="softmax")
F
fengjiayi 已提交
71

72 73 74 75 76 77 78 79 80
        # cross entropy loss
        cost = fluid.layers.cross_entropy(input=prediction, label=label)

        # mean loss
        avg_cost = fluid.layers.mean(x=cost)
        acc = fluid.layers.accuracy(input=prediction, label=label)

        if is_train:
            # SGD optimizer
81
            sgd_optimizer = fluid.optimizer.Adagrad(learning_rate=0.001)
82 83
            sgd_optimizer.minimize(avg_cost)

84
    return {'loss': avg_cost, 'log': [avg_cost, acc], 'file': file_obj_handle}
85 86 87 88 89


def main():
    train = fluid.Program()
    startup = fluid.Program()
90
    test = fluid.Program()
91 92

    with fluid.program_guard(train, startup):
93
        train_args = build_program(is_train=True)
94

95 96
    with fluid.program_guard(test, startup):
        test_args = build_program(is_train=False)
97

98
    use_cuda = fluid.core.is_compiled_with_cuda()
99
    # startup
100
    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
101 102 103 104
    exe = fluid.Executor(place=place)
    exe.run(startup)

    train_exe = fluid.ParallelExecutor(
105 106 107 108 109
        use_cuda=use_cuda,
        loss_name=train_args['loss'].name,
        main_program=train)
    test_exe = fluid.ParallelExecutor(
        use_cuda=use_cuda, main_program=test, share_vars_from=train_exe)
110 111

    fetch_var_list = [var.name for var in train_args['log']]
112 113 114 115 116
    for epoch_id in range(epoch_num):
        # train
        try:
            batch_id = 0
            while True:
F
fengjiayi 已提交
117 118 119
                loss, acc = map(numpy.array,
                                train_exe.run(fetch_list=fetch_var_list))
                print 'Train epoch', epoch_id, 'batch', batch_id, 'loss:', loss, 'acc:', acc
120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135
                batch_id += 1
        except fluid.core.EOFException:
            print 'End of epoch', epoch_id
            train_args['file'].reset()

        # test
        loss = []
        acc = []
        try:
            while True:
                loss_np, acc_np = map(numpy.array,
                                      test_exe.run(fetch_list=fetch_var_list))
                loss.append(loss_np[0])
                acc.append(acc_np[0])
        except:
            test_args['file'].reset()
F
fengjiayi 已提交
136
            print 'Test loss:', numpy.mean(loss), 'acc:', numpy.mean(acc)
137 138 139 140


if __name__ == '__main__':
    main()