train.py 6.8 KB
Newer Older
F
frankwhzhang 已提交
1 2 3 4 5 6 7 8 9 10 11
import os
import sys
import time
import six
import numpy as np
import math
import argparse
import paddle.fluid as fluid
import paddle
import time
import utils
F
8.3  
frankwhzhang 已提交
12

F
frankwhzhang 已提交
13 14
SEED = 102

F
8.3  
frankwhzhang 已提交
15

F
frankwhzhang 已提交
16 17
def parse_args():
    parser = argparse.ArgumentParser("gru4rec benchmark.")
F
8.3  
frankwhzhang 已提交
18 19
    parser.add_argument('train_file')
    parser.add_argument('test_file')
20 21
    parser.add_argument('--use_cuda', help='whether use gpu')
    parser.add_argument('--parallel', help='whether parallel')
F
frankwhzhang 已提交
22 23 24 25 26 27
    parser.add_argument(
        '--enable_ce',
        action='store_true',
        help='If set, run \
        the task with continuous evaluation logs.')
    parser.add_argument(
F
8.3  
frankwhzhang 已提交
28
        '--num_devices', type=int, default=1, help='Number of GPU devices')
F
frankwhzhang 已提交
29 30 31
    args = parser.parse_args()
    return args

F
frankwhzhang 已提交
32

F
frankwhzhang 已提交
33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
def network(src, dst, vocab_size, hid_size, init_low_bound, init_high_bound):
    """ network definition """
    emb_lr_x = 10.0
    gru_lr_x = 1.0
    fc_lr_x = 1.0
    emb = fluid.layers.embedding(
        input=src,
        size=[vocab_size, hid_size],
        param_attr=fluid.ParamAttr(
            initializer=fluid.initializer.Uniform(
                low=init_low_bound, high=init_high_bound),
            learning_rate=emb_lr_x),
        is_sparse=True)

    fc0 = fluid.layers.fc(input=emb,
                          size=hid_size * 3,
                          param_attr=fluid.ParamAttr(
                              initializer=fluid.initializer.Uniform(
                                  low=init_low_bound, high=init_high_bound),
                              learning_rate=gru_lr_x))
    gru_h0 = fluid.layers.dynamic_gru(
        input=fc0,
        size=hid_size,
        param_attr=fluid.ParamAttr(
            initializer=fluid.initializer.Uniform(
                low=init_low_bound, high=init_high_bound),
            learning_rate=gru_lr_x))

    fc = fluid.layers.fc(input=gru_h0,
                         size=vocab_size,
                         act='softmax',
                         param_attr=fluid.ParamAttr(
                             initializer=fluid.initializer.Uniform(
                                 low=init_low_bound, high=init_high_bound),
                             learning_rate=fc_lr_x))

    cost = fluid.layers.cross_entropy(input=fc, label=dst)
    acc = fluid.layers.accuracy(input=fc, label=dst, k=20)
    return cost, acc

F
frankwhzhang 已提交
73

F
frankwhzhang 已提交
74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102
def train(train_reader,
          vocab,
          network,
          hid_size,
          base_lr,
          batch_size,
          pass_num,
          use_cuda,
          parallel,
          model_dir,
          init_low_bound=-0.04,
          init_high_bound=0.04):
    """ train network """

    args = parse_args()
    if args.enable_ce:
        # random seed must set before configuring the network.
        fluid.default_startup_program().random_seed = SEED

    vocab_size = len(vocab)

    # Input data
    src_wordseq = fluid.layers.data(
        name="src_wordseq", shape=[1], dtype="int64", lod_level=1)
    dst_wordseq = fluid.layers.data(
        name="dst_wordseq", shape=[1], dtype="int64", lod_level=1)

    # Train program
    avg_cost = None
F
8.3  
frankwhzhang 已提交
103 104
    cost, acc = network(src_wordseq, dst_wordseq, vocab_size, hid_size,
                        init_low_bound, init_high_bound)
F
frankwhzhang 已提交
105 106 107 108 109 110 111 112 113 114
    avg_cost = fluid.layers.mean(x=cost)

    # Optimization to minimize lost
    sgd_optimizer = fluid.optimizer.Adagrad(learning_rate=base_lr)
    sgd_optimizer.minimize(avg_cost)

    # Initialize executor
    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
    exe = fluid.Executor(place)
    exe.run(fluid.default_startup_program())
F
8.3  
frankwhzhang 已提交
115 116 117 118
    if parallel:
        train_exe = fluid.ParallelExecutor(
            use_cuda=use_cuda, loss_name=avg_cost.name)
    else:
F
frankwhzhang 已提交
119 120 121 122 123
        train_exe = exe
    total_time = 0.0
    fetch_list = [avg_cost.name]
    for pass_idx in six.moves.xrange(pass_num):
        epoch_idx = pass_idx + 1
F
frankwhzhang 已提交
124
        print("epoch_%d start" % epoch_idx)
F
frankwhzhang 已提交
125 126 127

        t0 = time.time()
        i = 0
F
8.3  
frankwhzhang 已提交
128
        newest_ppl = 0
F
frankwhzhang 已提交
129 130
        for data in train_reader():
            i += 1
F
8.3  
frankwhzhang 已提交
131 132 133 134 135
            lod_src_wordseq = utils.to_lodtensor([dat[0] for dat in data],
                                                 place)
            lod_dst_wordseq = utils.to_lodtensor([dat[1] for dat in data],
                                                 place)
            ret_avg_cost = train_exe.run(feed={
F
frankwhzhang 已提交
136 137 138
                "src_wordseq": lod_src_wordseq,
                "dst_wordseq": lod_dst_wordseq
            },
F
8.3  
frankwhzhang 已提交
139
                                         fetch_list=fetch_list)
F
frankwhzhang 已提交
140
            avg_ppl = np.exp(ret_avg_cost[0])
F
8.3  
frankwhzhang 已提交
141
            newest_ppl = np.mean(avg_ppl)
F
frankwhzhang 已提交
142
            if i % 10 == 0:
F
8.3  
frankwhzhang 已提交
143
                print("step:%d ppl:%.3f" % (i, newest_ppl))
F
frankwhzhang 已提交
144 145 146

        t1 = time.time()
        total_time += t1 - t0
F
8.3  
frankwhzhang 已提交
147 148
        print("epoch:%d num_steps:%d time_cost(s):%f" %
              (epoch_idx, i, total_time / epoch_idx))
F
frankwhzhang 已提交
149

F
8.3  
frankwhzhang 已提交
150
        if pass_idx == pass_num - 1 and args.enable_ce:
F
frankwhzhang 已提交
151 152 153 154
            #Note: The following logs are special for CE monitoring.
            #Other situations do not need to care about these logs.
            gpu_num = get_cards(args.enable_ce)
            if gpu_num == 1:
F
8.3  
frankwhzhang 已提交
155
                print("kpis    rsc15_pass_duration    %s" %
F
frankwhzhang 已提交
156
                      (total_time / epoch_idx))
F
8.3  
frankwhzhang 已提交
157
                print("kpis    rsc15_avg_ppl    %s" % newest_ppl)
F
frankwhzhang 已提交
158
            else:
F
8.3  
frankwhzhang 已提交
159
                print("kpis    rsc15_pass_duration_card%s    %s" % \
F
frankwhzhang 已提交
160
                      (gpu_num, total_time / epoch_idx))
F
8.3  
frankwhzhang 已提交
161
                print("kpis    rsc15_avg_ppl_card%s    %s" %
F
frankwhzhang 已提交
162 163 164 165 166 167 168 169 170
                      (gpu_num, newest_ppl))
        save_dir = "%s/epoch_%d" % (model_dir, epoch_idx)
        feed_var_names = ["src_wordseq", "dst_wordseq"]
        fetch_vars = [avg_cost, acc]
        fluid.io.save_inference_model(save_dir, feed_var_names, fetch_vars, exe)
        print("model saved in %s" % save_dir)

    print("finish training")

F
8.3  
frankwhzhang 已提交
171

F
frankwhzhang 已提交
172 173 174 175 176 177 178 179
def get_cards(args):
    if args.enable_ce:
        cards = os.environ.get('CUDA_VISIBLE_DEVICES')
        num = len(cards.split(","))
        return num
    else:
        return args.num_devices

F
frankwhzhang 已提交
180

F
frankwhzhang 已提交
181 182 183
def train_net():
    """ do training """
    args = parse_args()
F
8.3  
frankwhzhang 已提交
184 185
    train_file = args.train_file
    test_file = args.test_file
186 187
    use_cuda = True if args.use_cuda else False
    parallel = True if args.parallel else False
F
frankwhzhang 已提交
188
    print("use_cuda:", use_cuda, "parallel:", parallel)
F
8.3  
frankwhzhang 已提交
189
    batch_size = 50
F
frankwhzhang 已提交
190
    vocab, train_reader, test_reader = utils.prepare_data(
F
8.3  
frankwhzhang 已提交
191 192
        train_file, test_file,batch_size=batch_size * get_cards(args),\
        buffer_size=1000, word_freq_threshold=0)
F
frankwhzhang 已提交
193 194 195 196 197 198 199 200
    train(
        train_reader=train_reader,
        vocab=vocab,
        network=network,
        hid_size=100,
        base_lr=0.01,
        batch_size=batch_size,
        pass_num=10,
201 202
        use_cuda=use_cuda,
        parallel=parallel,
F
8.3  
frankwhzhang 已提交
203
        model_dir="model_recall20",
F
frankwhzhang 已提交
204 205 206
        init_low_bound=-0.1,
        init_high_bound=0.1)

F
frankwhzhang 已提交
207

F
frankwhzhang 已提交
208 209
if __name__ == "__main__":
    train_net()