run_classifier.py 17.1 KB
Newer Older
王肖 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45
#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
SimNet Task
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os
import time
import argparse
import multiprocessing
import sys

defaultencoding = 'utf-8'
if sys.getdefaultencoding() != defaultencoding:
    reload(sys)
    sys.setdefaultencoding(defaultencoding)

sys.path.append("..")

import paddle
import paddle.fluid as fluid
import numpy as np
import config
import utils
import reader
import nets.paddle_layers as layers
import io
import logging

from utils import ArgConfig
46
from utils import load_dygraph
王肖 已提交
47 48 49
from model_check import check_version
from model_check import check_cuda

50

王肖 已提交
51 52 53 54
def train(conf_dict, args):
    """
    train process
    """
55

王肖 已提交
56 57 58 59 60
    # Get device
    if args.use_cuda:
        place = fluid.CUDAPlace(0)
    else:
        place = fluid.CPUPlace()
61

王肖 已提交
62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98
    # run train
    logging.info("start train process ...")

    def valid_and_test(pred_list, process, mode):
        """
        return auc and acc
        """
        pred_list = np.vstack(pred_list)
        if mode == "test":
            label_list = process.get_test_label()
        elif mode == "valid":
            label_list = process.get_valid_label()
        if args.task_mode == "pairwise":
            pred_list = (pred_list + 1) / 2
            pred_list = np.hstack(
                (np.ones_like(pred_list) - pred_list, pred_list))
        metric.reset()
        metric.update(pred_list, label_list)
        auc = metric.eval()
        if args.compute_accuracy:
            acc = utils.get_accuracy(pred_list, label_list, args.task_mode,
                                     args.lamda)
            return auc, acc
        else:
            return auc

    with fluid.dygraph.guard(place):
        # used for continuous evaluation 
        if args.enable_ce:
            SEED = 102
            fluid.default_startup_program().random_seed = SEED
            fluid.default_main_program().random_seed = SEED

        # loading vocabulary
        vocab = utils.load_vocab(args.vocab_path)
        # get vocab size
        conf_dict['dict_size'] = len(vocab)
99 100
        conf_dict['seq_len'] = args.seq_len

王肖 已提交
101
        # Load network structure dynamically
102 103
        net = utils.import_class("./nets", conf_dict["net"]["module_name"],
                                 conf_dict["net"]["class_name"])(conf_dict)
王肖 已提交
104
        if args.init_checkpoint is not "":
105
            model, _ = load_dygraph(args.init_checkpoint)
王肖 已提交
106 107 108
            net.set_dict(model)
        # Load loss function dynamically
        loss = utils.import_class("./nets/losses",
109 110
                                  conf_dict["loss"]["module_name"],
                                  conf_dict["loss"]["class_name"])(conf_dict)
王肖 已提交
111 112 113
        # Load Optimization method
        learning_rate = conf_dict["optimizer"]["learning_rate"]
        optimizer_name = conf_dict["optimizer"]["class_name"]
114 115 116 117
        if optimizer_name == 'SGDOptimizer':
            optimizer = fluid.optimizer.SGDOptimizer(
                learning_rate, parameter_list=net.parameters())
        elif optimizer_name == 'AdamOptimizer':
王肖 已提交
118 119 120 121
            beta1 = conf_dict["optimizer"]["beta1"]
            beta2 = conf_dict["optimizer"]["beta2"]
            epsilon = conf_dict["optimizer"]["epsilon"]
            optimizer = fluid.optimizer.AdamOptimizer(
122 123 124 125 126
                learning_rate,
                beta1=beta1,
                beta2=beta2,
                epsilon=epsilon,
                parameter_list=net.parameters())
王肖 已提交
127 128 129

        # load auc method
        metric = fluid.metrics.Auc(name="auc")
130
        simnet_process = reader.SimNetProcessor(args, vocab)
王肖 已提交
131 132 133 134 135 136 137

        # set global step
        global_step = 0
        ce_info = []
        losses = []
        start_time = time.time()

138 139 140 141 142 143 144 145 146 147
        train_loader = fluid.io.DataLoader.from_generator(
            capacity=16,
            return_list=True,
            iterable=True,
            use_double_buffer=True)
        get_train_examples = simnet_process.get_reader(
            "train", epoch=args.epoch)
        train_loader.set_sample_list_generator(
            paddle.batch(
                get_train_examples, batch_size=args.batch_size), place)
王肖 已提交
148
        if args.do_valid:
149 150 151 152 153 154 155 156 157
            valid_loader = fluid.io.DataLoader.from_generator(
                capacity=16,
                return_list=True,
                iterable=True,
                use_double_buffer=True)
            get_valid_examples = simnet_process.get_reader("valid")
            valid_loader.set_sample_list_generator(
                paddle.batch(
                    get_valid_examples, batch_size=args.batch_size),
王肖 已提交
158 159 160 161
                place)
            pred_list = []

        if args.task_mode == "pairwise":
162 163

            for left, pos_right, neg_right in train_loader():
王肖 已提交
164 165 166 167 168 169
                net.train()
                global_step += 1
                left_feat, pos_score = net(left, pos_right)
                pred = pos_score
                _, neg_score = net(left, neg_right)
                avg_cost = loss.compute(pos_score, neg_score)
170
                losses.append(np.mean(avg_cost.numpy()))
王肖 已提交
171 172 173
                avg_cost.backward()
                optimizer.minimize(avg_cost)
                net.clear_gradients()
174

王肖 已提交
175
                if args.do_valid and global_step % args.validation_steps == 0:
176
                    for left, pos_right in valid_loader():
王肖 已提交
177 178 179
                        net.eval()
                        left_feat, pos_score = net(left, pos_right)
                        pred = pos_score
180 181 182 183

                        pred_list += list(pred.numpy())
                    valid_result = valid_and_test(pred_list, simnet_process,
                                                  "valid")
王肖 已提交
184 185 186
                    if args.compute_accuracy:
                        valid_auc, valid_acc = valid_result
                        logging.info(
187 188 189
                            "global_steps: %d, valid_auc: %f, valid_acc: %f, valid_loss: %f"
                            % (global_step, valid_auc, valid_acc,
                               np.mean(losses)))
王肖 已提交
190 191
                    else:
                        valid_auc = valid_result
192 193 194
                        logging.info(
                            "global_steps: %d, valid_auc: %f, valid_loss: %f" %
                            (global_step, valid_auc, np.mean(losses)))
王肖 已提交
195 196 197

                if global_step % args.save_steps == 0:
                    model_save_dir = os.path.join(args.output_dir,
198
                                                  conf_dict["model_path"])
王肖 已提交
199
                    model_path = os.path.join(model_save_dir, str(global_step))
200

王肖 已提交
201 202 203
                    if not os.path.exists(model_save_dir):
                        os.makedirs(model_save_dir)
                    fluid.dygraph.save_dygraph(net.state_dict(), model_path)
204

王肖 已提交
205 206
                    logging.info("saving infer model in %s" % model_path)
        else:
207
            for left, right, label in train_loader():
王肖 已提交
208
                net.train()
209
                global_step += 1
王肖 已提交
210 211
                left_feat, pred = net(left, right)
                avg_cost = loss.compute(pred, label)
212
                losses.append(np.mean(avg_cost.numpy()))
王肖 已提交
213 214 215
                avg_cost.backward()
                optimizer.minimize(avg_cost)
                net.clear_gradients()
216

王肖 已提交
217
                if args.do_valid and global_step % args.validation_steps == 0:
218
                    for left, right in valid_loader():
王肖 已提交
219 220 221
                        net.eval()
                        left_feat, pred = net(left, right)
                        pred_list += list(pred.numpy())
222 223
                    valid_result = valid_and_test(pred_list, simnet_process,
                                                  "valid")
王肖 已提交
224 225 226
                    if args.compute_accuracy:
                        valid_auc, valid_acc = valid_result
                        logging.info(
227 228 229
                            "global_steps: %d, valid_auc: %f, valid_acc: %f, valid_loss: %f"
                            % (global_step, valid_auc, valid_acc,
                               np.mean(losses)))
王肖 已提交
230 231
                    else:
                        valid_auc = valid_result
232 233 234
                        logging.info(
                            "global_steps: %d, valid_auc: %f, valid_loss: %f" %
                            (global_step, valid_auc, np.mean(losses)))
王肖 已提交
235 236 237

                if global_step % args.save_steps == 0:
                    model_save_dir = os.path.join(args.output_dir,
238
                                                  conf_dict["model_path"])
王肖 已提交
239
                    model_path = os.path.join(model_save_dir, str(global_step))
240

王肖 已提交
241 242 243
                    if not os.path.exists(model_save_dir):
                        os.makedirs(model_save_dir)
                    fluid.dygraph.save_dygraph(net.state_dict(), model_path)
244

王肖 已提交
245 246
                    logging.info("saving infer model in %s" % model_path)

247
        end_time = time.time()
王肖 已提交
248 249
        ce_info.append([np.mean(losses), end_time - start_time])
        # final save
250 251
        logging.info("the final step is %s" % global_step)
        model_save_dir = os.path.join(args.output_dir, conf_dict["model_path"])
王肖 已提交
252
        model_path = os.path.join(model_save_dir, str(global_step))
253

王肖 已提交
254 255 256 257 258 259 260 261 262 263 264 265 266 267 268
        if not os.path.exists(model_save_dir):
            os.makedirs(model_save_dir)
        fluid.dygraph.save_dygraph(net.state_dict(), model_path)
        logging.info("saving infer model in %s" % model_path)
        # used for continuous evaluation
        if args.enable_ce:
            card_num = get_cards()
            ce_loss = 0
            ce_time = 0
            try:
                ce_loss = ce_info[-1][0]
                ce_time = ce_info[-1][1]
            except:
                logging.info("ce info err!")
            print("kpis\teach_step_duration_%s_card%s\t%s" %
269
                  (args.task_name, card_num, ce_time))
王肖 已提交
270
            print("kpis\ttrain_loss_%s_card%s\t%f" %
271
                  (args.task_name, card_num, ce_loss))
王肖 已提交
272 273 274

        if args.do_test:
            # Get Feeder and Reader
275 276 277 278 279
            test_loader = fluid.io.DataLoader.from_generator(
                capacity=16,
                return_list=True,
                iterable=True,
                use_double_buffer=True)
王肖 已提交
280
            get_test_examples = simnet_process.get_reader("test")
281 282 283
            test_loader.set_sample_list_generator(
                paddle.batch(
                    get_test_examples, batch_size=args.batch_size),
王肖 已提交
284 285
                place)
            pred_list = []
286
            for left, pos_right in test_loader():
王肖 已提交
287 288 289 290
                net.eval()
                left_feat, pos_score = net(left, pos_right)
                pred = pos_score
                pred_list += list(pred.numpy())
291 292
            test_result = valid_and_test(pred_list, simnet_process, "test")
            if args.compute_accuracy:
王肖 已提交
293 294
                test_auc, test_acc = test_result
                logging.info("AUC of test is %f, Accuracy of test is %f" %
295
                             (test_auc, test_acc))
王肖 已提交
296 297 298
            else:
                test_auc = test_result
                logging.info("AUC of test is %f" % test_auc)
299

王肖 已提交
300 301 302 303 304 305 306 307 308 309 310

def test(conf_dict, args):
    """
    Evaluation Function
    """
    logging.info("start test process ...")
    if args.use_cuda:
        place = fluid.CUDAPlace(0)
    else:
        place = fluid.CPUPlace()
    with fluid.dygraph.guard(place):
311

王肖 已提交
312 313
        vocab = utils.load_vocab(args.vocab_path)
        simnet_process = reader.SimNetProcessor(args, vocab)
314 315 316 317 318
        test_loader = fluid.io.DataLoader.from_generator(
            capacity=16,
            return_list=True,
            iterable=True,
            use_double_buffer=True)
王肖 已提交
319
        get_test_examples = simnet_process.get_reader("test")
320 321 322
        test_loader.set_sample_list_generator(
            paddle.batch(
                get_test_examples, batch_size=args.batch_size), place)
323

324
        conf_dict['dict_size'] = len(vocab)
325
        conf_dict['seq_len'] = args.seq_len
王肖 已提交
326

327 328 329
        net = utils.import_class("./nets", conf_dict["net"]["module_name"],
                                 conf_dict["net"]["class_name"])(conf_dict)

330
        model, _ = load_dygraph(args.init_checkpoint)
王肖 已提交
331 332 333
        net.set_dict(model)
        metric = fluid.metrics.Auc(name="auc")
        pred_list = []
334 335
        with io.open(
                "predictions.txt", "w", encoding="utf8") as predictions_file:
王肖 已提交
336
            if args.task_mode == "pairwise":
337
                for left, pos_right in test_loader():
王肖 已提交
338 339
                    left_feat, pos_score = net(left, pos_right)
                    pred = pos_score
340

341 342
                    pred_list += list(
                        map(lambda item: float(item[0]), pred.numpy()))
王肖 已提交
343
                    predictions_file.write(u"\n".join(
344 345
                        map(lambda item: str((item[0] + 1) / 2), pred.numpy()))
                                           + "\n")
346

王肖 已提交
347
            else:
348
                for left, right in test_loader():
王肖 已提交
349
                    left_feat, pred = net(left, right)
350

351 352
                    pred_list += list(
                        map(lambda item: float(item[0]), pred.numpy()))
王肖 已提交
353
                    predictions_file.write(u"\n".join(
354 355
                        map(lambda item: str(np.argmax(item)), pred.numpy())) +
                                           "\n")
王肖 已提交
356 357 358 359 360 361 362 363 364 365 366 367 368

            if args.task_mode == "pairwise":
                pred_list = np.array(pred_list).reshape((-1, 1))
                pred_list = (pred_list + 1) / 2
                pred_list = np.hstack(
                    (np.ones_like(pred_list) - pred_list, pred_list))
            else:
                pred_list = np.array(pred_list)
            labels = simnet_process.get_test_label()

            metric.update(pred_list, labels)
            if args.compute_accuracy:
                acc = utils.get_accuracy(pred_list, labels, args.task_mode,
369
                                         args.lamda)
王肖 已提交
370
                logging.info("AUC of test is %f, Accuracy of test is %f" %
371
                             (metric.eval(), acc))
王肖 已提交
372 373 374 375 376 377
            else:
                logging.info("AUC of test is %f" % metric.eval())

        if args.verbose_result:
            utils.get_result_file(args)
            logging.info("test result saved in %s" %
378
                         os.path.join(os.getcwd(), args.test_result_path))
王肖 已提交
379 380 381 382 383 384 385 386 387 388 389 390


def infer(conf_dict, args):
    """
    run predict
    """
    logging.info("start test process ...")
    if args.use_cuda:
        place = fluid.CUDAPlace(0)
    else:
        place = fluid.CPUPlace()

391 392 393 394
    with fluid.dygraph.guard(place):
        vocab = utils.load_vocab(args.vocab_path)
        simnet_process = reader.SimNetProcessor(args, vocab)
        get_infer_examples = simnet_process.get_infer_reader
395 396 397 398 399 400 401 402 403 404
        infer_loader = fluid.io.DataLoader.from_generator(
            capacity=16,
            return_list=True,
            iterable=True,
            use_double_buffer=True)
        infer_loader.set_sample_list_generator(
            paddle.batch(
                get_infer_examples, batch_size=args.batch_size), place)

        conf_dict['dict_size'] = len(vocab)
405 406
        conf_dict['seq_len'] = args.seq_len

407 408
        net = utils.import_class("./nets", conf_dict["net"]["module_name"],
                                 conf_dict["net"]["class_name"])(conf_dict)
409
        model, _ = load_dygraph(args.init_checkpoint)
410
        net.set_dict(model)
411

412 413
        pred_list = []
        if args.task_mode == "pairwise":
414 415
            for left, pos_right in infer_loader():

416 417 418
                left_feat, pos_score = net(left, pos_right)
                pred = pos_score
                pred_list += list(
419 420
                    map(lambda item: str((item[0] + 1) / 2), pred.numpy()))

421
        else:
422
            for left, right in infer_loader():
423
                left_feat, pred = net(left, right)
424 425 426 427 428
                pred_list += map(lambda item: str(np.argmax(item)),
                                 pred.numpy())

        with io.open(
                args.infer_result_path, "w", encoding="utf8") as infer_file:
429 430 431
            for _data, _pred in zip(simnet_process.get_infer_data(), pred_list):
                infer_file.write(_data + "\t" + _pred + "\n")
        logging.info("infer result saved in %s" %
432
                     os.path.join(os.getcwd(), args.infer_result_path))
王肖 已提交
433 434 435 436 437 438 439 440 441


def get_cards():
    num = 0
    cards = os.environ.get('CUDA_VISIBLE_DEVICES', '')
    if cards != '':
        num = len(cards.split(","))
    return num

442

王肖 已提交
443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461
if __name__ == "__main__":

    args = ArgConfig()
    args = args.build_conf()

    utils.print_arguments(args)
    check_cuda(args.use_cuda)
    check_version()
    utils.init_log("./log/TextSimilarityNet")
    conf_dict = config.SimNetConfig(args)
    if args.do_train:
        train(conf_dict, args)
    elif args.do_test:
        test(conf_dict, args)
    elif args.do_infer:
        infer(conf_dict, args)
    else:
        raise ValueError(
            "one of do_train and do_test and do_infer must be True")