run_classifier.py 12.8 KB
Newer Older
L
Li Fuchen 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13
#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
14

Y
Yibing Liu 已提交
15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33
"""
Emotion Detection Task
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os
import time
import multiprocessing
import sys
sys.path.append("../")

import paddle
import paddle.fluid as fluid
import numpy as np

from models.classification import nets
34
from models.model_check import check_cuda
35
from models.model_check import check_version
36
from config import PDConfig
Y
Yibing Liu 已提交
37 38 39
import reader
import utils

L
Li Fuchen 已提交
40

Y
Yibing Liu 已提交
41 42
def create_model(args,
                 num_labels,
43
                 is_prediction=False):
Y
Yibing Liu 已提交
44
    """
45
    Create Model for Emotion Detection
Y
Yibing Liu 已提交
46
    """
47 48 49
    data = fluid.data(name="words", shape=[-1, args.max_seq_len], dtype="int64")
    label = fluid.data(name="label", shape=[-1, 1], dtype="int64")
    seq_len = fluid.data(name="seq_len", shape=[-1], dtype="int64")
50 51

    if is_prediction:
52
        loader = fluid.io.DataLoader.from_generator(
53
            feed_list=[data, seq_len],
Y
Yibing Liu 已提交
54
            capacity=16,
55 56
            iterable=False,
            return_list=False)
Y
Yibing Liu 已提交
57
    else:
58
        loader = fluid.io.DataLoader.from_generator(
59
            feed_list=[data, label, seq_len],
Y
Yibing Liu 已提交
60
            capacity=16,
61 62
            iterable=False,
            return_list=False)
Y
Yibing Liu 已提交
63

64
    if args.model_type == "cnn_net":
Y
Yibing Liu 已提交
65
        network = nets.cnn_net
66
    elif args.model_type == "bow_net":
Y
Yibing Liu 已提交
67
        network = nets.bow_net
68
    elif args.model_type == "lstm_net":
Y
Yibing Liu 已提交
69
        network = nets.lstm_net
70
    elif args.model_type == "bilstm_net":
Y
Yibing Liu 已提交
71
        network = nets.bilstm_net
72
    elif args.model_type == "gru_net":
Y
Yibing Liu 已提交
73
        network = nets.gru_net
74
    elif args.model_type == "textcnn_net":
Y
Yibing Liu 已提交
75 76 77 78
        network = nets.textcnn_net
    else:
        raise ValueError("Unknown network type!")

79 80
    if is_prediction:
        probs = network(data, seq_len, None, args.vocab_size, class_dim=num_labels, is_prediction=True)
81
        return loader, probs, [data.name, seq_len.name]
82 83

    avg_loss, probs = network(data, seq_len, label, args.vocab_size, class_dim=num_labels)
Y
Yibing Liu 已提交
84 85
    num_seqs = fluid.layers.create_tensor(dtype='int64')
    accuracy = fluid.layers.accuracy(input=probs, label=label, total=num_seqs)
86
    return loader, avg_loss, accuracy, num_seqs
Y
Yibing Liu 已提交
87 88


89
def evaluate(exe, test_program, test_loader, fetch_list, eval_phase):
Y
Yibing Liu 已提交
90 91 92
    """
    Evaluation Function
    """
93
    test_loader.start()
Y
Yibing Liu 已提交
94 95 96 97 98
    total_cost, total_acc, total_num_seqs = [], [], []
    time_begin = time.time()
    while True:
        try:
            np_loss, np_acc, np_num_seqs = exe.run(program=test_program,
L
Li Fuchen 已提交
99 100
                                                   fetch_list=fetch_list,
                                                   return_numpy=False)
Y
Yibing Liu 已提交
101 102 103 104 105 106 107
            np_loss = np.array(np_loss)
            np_acc = np.array(np_acc)
            np_num_seqs = np.array(np_num_seqs)
            total_cost.extend(np_loss * np_num_seqs)
            total_acc.extend(np_acc * np_num_seqs)
            total_num_seqs.extend(np_num_seqs)
        except fluid.core.EOFException:
108
            test_loader.reset()
Y
Yibing Liu 已提交
109 110 111
            break
    time_end = time.time()
    print("[%s evaluation] avg loss: %f, avg acc: %f, elapsed time: %f s" %
L
Li Fuchen 已提交
112 113
          (eval_phase, np.sum(total_cost) / np.sum(total_num_seqs),
           np.sum(total_acc) / np.sum(total_num_seqs), time_end - time_begin))
Y
Yibing Liu 已提交
114 115


116 117
def infer(exe, infer_program, infer_loader, fetch_list, infer_phase):
    infer_loader.start()
Y
Yibing Liu 已提交
118 119 120 121
    time_begin = time.time()
    while True:
        try:
            batch_probs = exe.run(program=infer_program,
L
Li Fuchen 已提交
122 123
                                  fetch_list=fetch_list,
                                  return_numpy=True)
Y
Yibing Liu 已提交
124
            for probs in batch_probs[0]:
L
Li Fuchen 已提交
125 126
                print("%d\t%f\t%f\t%f" %
                      (np.argmax(probs), probs[0], probs[1], probs[2]))
Y
Yibing Liu 已提交
127
        except fluid.core.EOFException as e:
128
            infer_loader.reset()
Y
Yibing Liu 已提交
129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144
            break
    time_end = time.time()
    print("[%s] elapsed time: %f s" % (infer_phase, time_end - time_begin))


def main(args):
    """
    Main Function
    """
    if args.use_cuda:
        place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0')))
    else:
        place = fluid.CPUPlace()
    exe = fluid.Executor(place)

    task_name = args.task_name.lower()
145 146 147 148 149
    processor = reader.EmoTectProcessor(data_dir=args.data_dir,
                                      vocab_path=args.vocab_path,
                                      random_seed=args.random_seed)
    #num_labels = len(processor.get_labels())
    num_labels = args.num_labels
Y
Yibing Liu 已提交
150 151 152 153 154 155 156 157 158 159 160

    if not (args.do_train or args.do_val or args.do_infer):
        raise ValueError("For args `do_train`, `do_val` and `do_infer`, at "
                         "least one of them must be True.")

    startup_prog = fluid.Program()
    if args.random_seed is not None:
        startup_prog.random_seed = args.random_seed

    if args.do_train:
        train_data_generator = processor.data_generator(
L
Li Fuchen 已提交
161
            batch_size=args.batch_size, phase='train', epoch=args.epoch)
Y
Yibing Liu 已提交
162 163 164 165 166 167 168 169

        num_train_examples = processor.get_num_examples(phase="train")
        max_train_steps = args.epoch * num_train_examples // args.batch_size + 1

        print("Num train examples: %d" % num_train_examples)
        print("Max train steps: %d" % max_train_steps)

        train_program = fluid.Program()
Z
zhengya01 已提交
170 171
        if args.random_seed is not None:
            train_program.random_seed = args.random_seed
Y
Yibing Liu 已提交
172 173 174

        with fluid.program_guard(train_program, startup_prog):
            with fluid.unique_name.guard():
175
                train_loader, loss, accuracy, num_seqs = create_model(
Y
Yibing Liu 已提交
176 177
                    args,
                    num_labels=num_labels,
178
                    is_prediction=False)
Y
Yibing Liu 已提交
179 180 181 182 183 184 185 186

                sgd_optimizer = fluid.optimizer.Adagrad(learning_rate=args.lr)
                sgd_optimizer.minimize(loss)

        if args.verbose:
            lower_mem, upper_mem, unit = fluid.contrib.memory_usage(
                program=train_program, batch_size=args.batch_size)
            print("Theoretical memory usage in training: %.3f - %.3f %s" %
L
Li Fuchen 已提交
187
                  (lower_mem, upper_mem, unit))
Y
Yibing Liu 已提交
188 189

    if args.do_val:
190 191 192 193 194 195 196 197 198 199 200
        if args.do_train:
            test_data_generator = processor.data_generator(
                batch_size=args.batch_size,
                phase='dev',
                epoch=1)
        else:
            test_data_generator = processor.data_generator(
                batch_size=args.batch_size,
                phase='test',
                epoch=1)

Y
Yibing Liu 已提交
201 202 203
        test_prog = fluid.Program()
        with fluid.program_guard(test_prog, startup_prog):
            with fluid.unique_name.guard():
204
                test_loader, loss, accuracy, num_seqs = create_model(
Y
Yibing Liu 已提交
205 206
                    args,
                    num_labels=num_labels,
207
                    is_prediction=False)
Y
Yibing Liu 已提交
208 209 210
        test_prog = test_prog.clone(for_test=True)

    if args.do_infer:
211 212 213 214 215
        infer_data_generator = processor.data_generator(
            batch_size=args.batch_size,
            phase='infer',
            epoch=1)

Y
Yibing Liu 已提交
216 217 218
        test_prog = fluid.Program()
        with fluid.program_guard(test_prog, startup_prog):
            with fluid.unique_name.guard():
219
                infer_loader, probs, _ = create_model(
Y
Yibing Liu 已提交
220 221
                    args,
                    num_labels=num_labels,
222
                    is_prediction=True)
Y
Yibing Liu 已提交
223 224 225 226 227 228 229
        test_prog = test_prog.clone(for_test=True)

    exe.run(startup_prog)

    if args.do_train:
        if args.init_checkpoint:
            utils.init_checkpoint(
L
Li Fuchen 已提交
230
                exe, args.init_checkpoint, main_program=startup_prog)
Y
Yibing Liu 已提交
231 232 233 234
    elif args.do_val or args.do_infer:
        if not args.init_checkpoint:
            raise ValueError("args 'init_checkpoint' should be set if"
                             "only doing validation or infer!")
L
Li Fuchen 已提交
235
        utils.init_checkpoint(exe, args.init_checkpoint, main_program=test_prog)
Y
Yibing Liu 已提交
236 237 238

    if args.do_train:
        train_exe = exe
239
        train_loader.set_sample_list_generator(train_data_generator)
Y
Yibing Liu 已提交
240 241
    else:
        train_exe = None
242
    if args.do_val:
Y
Yibing Liu 已提交
243
        test_exe = exe
244
        test_loader.set_sample_list_generator(test_data_generator)
245 246
    if args.do_infer:
        test_exe = exe
247
        infer_loader.set_sample_list_generator(infer_data_generator)
Y
Yibing Liu 已提交
248 249

    if args.do_train:
250
        train_loader.start()
Y
Yibing Liu 已提交
251 252 253
        steps = 0
        total_cost, total_acc, total_num_seqs = [], [], []
        time_begin = time.time()
Z
zhengya01 已提交
254
        ce_info = []
Y
Yibing Liu 已提交
255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275
        while True:
            try:
                steps += 1
                if steps % args.skip_steps == 0:
                    fetch_list = [loss.name, accuracy.name, num_seqs.name]
                else:
                    fetch_list = []

                outputs = train_exe.run(program=train_program,
                                        fetch_list=fetch_list,
                                        return_numpy=False)
                if steps % args.skip_steps == 0:
                    np_loss, np_acc, np_num_seqs = outputs
                    np_loss = np.array(np_loss)
                    np_acc = np.array(np_acc)
                    np_num_seqs = np.array(np_num_seqs)
                    total_cost.extend(np_loss * np_num_seqs)
                    total_acc.extend(np_acc * np_num_seqs)
                    total_num_seqs.extend(np_num_seqs)

                    if args.verbose:
276
                        verbose = "train loader queue size: %d, " % train_loader.queue.size(
L
Li Fuchen 已提交
277
                        )
Y
Yibing Liu 已提交
278 279 280 281 282
                        print(verbose)

                    time_end = time.time()
                    used_time = time_end - time_begin
                    print("step: %d, avg loss: %f, "
L
Li Fuchen 已提交
283 284 285 286 287 288 289 290
                          "avg acc: %f, speed: %f steps/s" %
                          (steps, np.sum(total_cost) / np.sum(total_num_seqs),
                           np.sum(total_acc) / np.sum(total_num_seqs),
                           args.skip_steps / used_time))
                    ce_info.append([
                        np.sum(total_cost) / np.sum(total_num_seqs),
                        np.sum(total_acc) / np.sum(total_num_seqs), used_time
                    ])
Y
Yibing Liu 已提交
291 292 293 294
                    total_cost, total_acc, total_num_seqs = [], [], []
                    time_begin = time.time()

                if steps % args.save_steps == 0:
295
                    save_path = os.path.join(args.save_checkpoint_dir, "step_" + str(steps))
Y
Yibing Liu 已提交
296 297 298 299 300
                    fluid.io.save_persistables(exe, save_path, train_program)

                if steps % args.validation_steps == 0:
                    # evaluate on dev set
                    if args.do_val:
301
                        evaluate(test_exe, test_prog, test_loader,
L
Li Fuchen 已提交
302 303
                                 [loss.name, accuracy.name, num_seqs.name],
                                 "dev")
Y
Yibing Liu 已提交
304 305

            except fluid.core.EOFException:
306 307
                print("final step: %d " % steps)
                if args.do_val:
308
                    evaluate(test_exe, test_prog, test_loader,
309 310 311 312
                        [loss.name, accuracy.name, num_seqs.name],
                        "dev")

                save_path = os.path.join(args.save_checkpoint_dir, "step_" + str(steps))
Y
Yibing Liu 已提交
313
                fluid.io.save_persistables(exe, save_path, train_program)
314
                train_loader.reset()
Y
Yibing Liu 已提交
315 316
                break

Z
zhengya01 已提交
317 318 319 320 321 322 323 324 325 326 327 328
    if args.do_train and args.enable_ce:
        card_num = get_cards()
        ce_loss = 0
        ce_acc = 0
        ce_time = 0
        try:
            ce_loss = ce_info[-2][0]
            ce_acc = ce_info[-2][1]
            ce_time = ce_info[-2][2]
        except:
            print("ce info error")
        print("kpis\teach_step_duration_%s_card%s\t%s" %
L
Li Fuchen 已提交
329 330 331
              (task_name, card_num, ce_time))
        print("kpis\ttrain_loss_%s_card%s\t%f" % (task_name, card_num, ce_loss))
        print("kpis\ttrain_acc_%s_card%s\t%f" % (task_name, card_num, ce_acc))
Z
zhengya01 已提交
332

Y
Yibing Liu 已提交
333 334 335
    # evaluate on test set
    if not args.do_train and args.do_val:
        print("Final test result:")
336
        evaluate(test_exe, test_prog, test_loader,
337 338
                 [loss.name, accuracy.name, num_seqs.name],
                 "test")
Y
Yibing Liu 已提交
339 340 341

    # infer
    if args.do_infer:
342
        print("Final infer result:")
343
        infer(test_exe, test_prog, infer_loader,
344 345
             [probs.name],
             "infer")
Y
Yibing Liu 已提交
346

Z
zhengya01 已提交
347 348 349 350 351 352 353 354 355

def get_cards():
    num = 0
    cards = os.environ.get('CUDA_VISIBLE_DEVICES', '')
    if cards != '':
        num = len(cards.split(","))
    return num


Y
Yibing Liu 已提交
356
if __name__ == "__main__":
357 358 359
    args = PDConfig('config.json')
    args.build()
    args.print_arguments()
360
    check_cuda(args.use_cuda)
361
    check_version()
Y
Yibing Liu 已提交
362
    main(args)