run_classifier.py 18.6 KB
Newer Older
王肖 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45
#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
SimNet Task
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os
import time
import argparse
import multiprocessing
import sys

defaultencoding = 'utf-8'
if sys.getdefaultencoding() != defaultencoding:
    reload(sys)
    sys.setdefaultencoding(defaultencoding)

sys.path.append("..")

import paddle
import paddle.fluid as fluid
import numpy as np
import config
import utils
import reader
import nets.paddle_layers as layers
import io
import logging

from utils import ArgConfig
46
from utils import load_dygraph
王肖 已提交
47 48 49
from model_check import check_version
from model_check import check_cuda

50

王肖 已提交
51 52 53 54
def train(conf_dict, args):
    """
    train process
    """
55

王肖 已提交
56 57 58 59 60
    # Get device
    if args.use_cuda:
        place = fluid.CUDAPlace(0)
    else:
        place = fluid.CPUPlace()
61

王肖 已提交
62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98
    # run train
    logging.info("start train process ...")

    def valid_and_test(pred_list, process, mode):
        """
        return auc and acc
        """
        pred_list = np.vstack(pred_list)
        if mode == "test":
            label_list = process.get_test_label()
        elif mode == "valid":
            label_list = process.get_valid_label()
        if args.task_mode == "pairwise":
            pred_list = (pred_list + 1) / 2
            pred_list = np.hstack(
                (np.ones_like(pred_list) - pred_list, pred_list))
        metric.reset()
        metric.update(pred_list, label_list)
        auc = metric.eval()
        if args.compute_accuracy:
            acc = utils.get_accuracy(pred_list, label_list, args.task_mode,
                                     args.lamda)
            return auc, acc
        else:
            return auc

    with fluid.dygraph.guard(place):
        # used for continuous evaluation 
        if args.enable_ce:
            SEED = 102
            fluid.default_startup_program().random_seed = SEED
            fluid.default_main_program().random_seed = SEED

        # loading vocabulary
        vocab = utils.load_vocab(args.vocab_path)
        # get vocab size
        conf_dict['dict_size'] = len(vocab)
99 100
        conf_dict['seq_len'] = args.seq_len

王肖 已提交
101
        # Load network structure dynamically
102 103
        net = utils.import_class("./nets", conf_dict["net"]["module_name"],
                                 conf_dict["net"]["class_name"])(conf_dict)
王肖 已提交
104
        if args.init_checkpoint is not "":
105
            model, _ = load_dygraph(args.init_checkpoint)
王肖 已提交
106 107 108
            net.set_dict(model)
        # Load loss function dynamically
        loss = utils.import_class("./nets/losses",
109 110
                                  conf_dict["loss"]["module_name"],
                                  conf_dict["loss"]["class_name"])(conf_dict)
王肖 已提交
111 112 113
        # Load Optimization method
        learning_rate = conf_dict["optimizer"]["learning_rate"]
        optimizer_name = conf_dict["optimizer"]["class_name"]
114 115 116 117
        if optimizer_name == 'SGDOptimizer':
            optimizer = fluid.optimizer.SGDOptimizer(
                learning_rate, parameter_list=net.parameters())
        elif optimizer_name == 'AdamOptimizer':
王肖 已提交
118 119 120 121
            beta1 = conf_dict["optimizer"]["beta1"]
            beta2 = conf_dict["optimizer"]["beta2"]
            epsilon = conf_dict["optimizer"]["epsilon"]
            optimizer = fluid.optimizer.AdamOptimizer(
122 123 124 125 126
                learning_rate,
                beta1=beta1,
                beta2=beta2,
                epsilon=epsilon,
                parameter_list=net.parameters())
王肖 已提交
127 128 129

        # load auc method
        metric = fluid.metrics.Auc(name="auc")
130
        simnet_process = reader.SimNetProcessor(args, vocab)
王肖 已提交
131 132 133 134 135 136 137

        # set global step
        global_step = 0
        ce_info = []
        losses = []
        start_time = time.time()

138 139 140 141 142 143 144 145 146 147
        train_loader = fluid.io.DataLoader.from_generator(
            capacity=16,
            return_list=True,
            iterable=True,
            use_double_buffer=True)
        get_train_examples = simnet_process.get_reader(
            "train", epoch=args.epoch)
        train_loader.set_sample_list_generator(
            paddle.batch(
                get_train_examples, batch_size=args.batch_size), place)
王肖 已提交
148
        if args.do_valid:
149 150 151 152 153 154 155 156 157
            valid_loader = fluid.io.DataLoader.from_generator(
                capacity=16,
                return_list=True,
                iterable=True,
                use_double_buffer=True)
            get_valid_examples = simnet_process.get_reader("valid")
            valid_loader.set_sample_list_generator(
                paddle.batch(
                    get_valid_examples, batch_size=args.batch_size),
王肖 已提交
158 159 160 161
                place)
            pred_list = []

        if args.task_mode == "pairwise":
162 163 164

            for left, pos_right, neg_right in train_loader():

王肖 已提交
165 166 167 168 169 170 171 172 173
                left = fluid.layers.reshape(left, shape=[-1, 1])
                pos_right = fluid.layers.reshape(pos_right, shape=[-1, 1])
                neg_right = fluid.layers.reshape(neg_right, shape=[-1, 1])
                net.train()
                global_step += 1
                left_feat, pos_score = net(left, pos_right)
                pred = pos_score
                _, neg_score = net(left, neg_right)
                avg_cost = loss.compute(pos_score, neg_score)
174
                losses.append(np.mean(avg_cost.numpy()))
王肖 已提交
175 176 177
                avg_cost.backward()
                optimizer.minimize(avg_cost)
                net.clear_gradients()
178

王肖 已提交
179
                if args.do_valid and global_step % args.validation_steps == 0:
180
                    for left, pos_right in valid_loader():
王肖 已提交
181
                        left = fluid.layers.reshape(left, shape=[-1, 1])
182 183
                        pos_right = fluid.layers.reshape(
                            pos_right, shape=[-1, 1])
王肖 已提交
184 185 186
                        net.eval()
                        left_feat, pos_score = net(left, pos_right)
                        pred = pos_score
187 188 189 190

                        pred_list += list(pred.numpy())
                    valid_result = valid_and_test(pred_list, simnet_process,
                                                  "valid")
王肖 已提交
191 192 193
                    if args.compute_accuracy:
                        valid_auc, valid_acc = valid_result
                        logging.info(
194 195 196
                            "global_steps: %d, valid_auc: %f, valid_acc: %f, valid_loss: %f"
                            % (global_step, valid_auc, valid_acc,
                               np.mean(losses)))
王肖 已提交
197 198
                    else:
                        valid_auc = valid_result
199 200 201
                        logging.info(
                            "global_steps: %d, valid_auc: %f, valid_loss: %f" %
                            (global_step, valid_auc, np.mean(losses)))
王肖 已提交
202 203 204

                if global_step % args.save_steps == 0:
                    model_save_dir = os.path.join(args.output_dir,
205
                                                  conf_dict["model_path"])
王肖 已提交
206
                    model_path = os.path.join(model_save_dir, str(global_step))
207

王肖 已提交
208 209 210
                    if not os.path.exists(model_save_dir):
                        os.makedirs(model_save_dir)
                    fluid.dygraph.save_dygraph(net.state_dict(), model_path)
211

王肖 已提交
212 213
                    logging.info("saving infer model in %s" % model_path)
        else:
214
            for left, right, label in train_loader():
王肖 已提交
215 216 217 218
                left = fluid.layers.reshape(left, shape=[-1, 1])
                right = fluid.layers.reshape(right, shape=[-1, 1])
                label = fluid.layers.reshape(label, shape=[-1, 1])
                net.train()
219
                global_step += 1
王肖 已提交
220 221
                left_feat, pred = net(left, right)
                avg_cost = loss.compute(pred, label)
222
                losses.append(np.mean(avg_cost.numpy()))
王肖 已提交
223 224 225
                avg_cost.backward()
                optimizer.minimize(avg_cost)
                net.clear_gradients()
226

王肖 已提交
227
                if args.do_valid and global_step % args.validation_steps == 0:
228
                    for left, right in valid_loader():
王肖 已提交
229 230 231 232 233
                        left = fluid.layers.reshape(left, shape=[-1, 1])
                        right = fluid.layers.reshape(right, shape=[-1, 1])
                        net.eval()
                        left_feat, pred = net(left, right)
                        pred_list += list(pred.numpy())
234 235
                    valid_result = valid_and_test(pred_list, simnet_process,
                                                  "valid")
王肖 已提交
236 237 238
                    if args.compute_accuracy:
                        valid_auc, valid_acc = valid_result
                        logging.info(
239 240 241
                            "global_steps: %d, valid_auc: %f, valid_acc: %f, valid_loss: %f"
                            % (global_step, valid_auc, valid_acc,
                               np.mean(losses)))
王肖 已提交
242 243
                    else:
                        valid_auc = valid_result
244 245 246
                        logging.info(
                            "global_steps: %d, valid_auc: %f, valid_loss: %f" %
                            (global_step, valid_auc, np.mean(losses)))
王肖 已提交
247 248 249

                if global_step % args.save_steps == 0:
                    model_save_dir = os.path.join(args.output_dir,
250
                                                  conf_dict["model_path"])
王肖 已提交
251
                    model_path = os.path.join(model_save_dir, str(global_step))
252

王肖 已提交
253 254 255
                    if not os.path.exists(model_save_dir):
                        os.makedirs(model_save_dir)
                    fluid.dygraph.save_dygraph(net.state_dict(), model_path)
256

王肖 已提交
257 258
                    logging.info("saving infer model in %s" % model_path)

259
        end_time = time.time()
王肖 已提交
260 261
        ce_info.append([np.mean(losses), end_time - start_time])
        # final save
262 263
        logging.info("the final step is %s" % global_step)
        model_save_dir = os.path.join(args.output_dir, conf_dict["model_path"])
王肖 已提交
264
        model_path = os.path.join(model_save_dir, str(global_step))
265

王肖 已提交
266 267 268 269 270 271 272 273 274 275 276 277 278 279 280
        if not os.path.exists(model_save_dir):
            os.makedirs(model_save_dir)
        fluid.dygraph.save_dygraph(net.state_dict(), model_path)
        logging.info("saving infer model in %s" % model_path)
        # used for continuous evaluation
        if args.enable_ce:
            card_num = get_cards()
            ce_loss = 0
            ce_time = 0
            try:
                ce_loss = ce_info[-1][0]
                ce_time = ce_info[-1][1]
            except:
                logging.info("ce info err!")
            print("kpis\teach_step_duration_%s_card%s\t%s" %
281
                  (args.task_name, card_num, ce_time))
王肖 已提交
282
            print("kpis\ttrain_loss_%s_card%s\t%f" %
283
                  (args.task_name, card_num, ce_loss))
王肖 已提交
284 285 286

        if args.do_test:
            # Get Feeder and Reader
287 288 289 290 291
            test_loader = fluid.io.DataLoader.from_generator(
                capacity=16,
                return_list=True,
                iterable=True,
                use_double_buffer=True)
王肖 已提交
292
            get_test_examples = simnet_process.get_reader("test")
293 294 295
            test_loader.set_sample_list_generator(
                paddle.batch(
                    get_test_examples, batch_size=args.batch_size),
王肖 已提交
296 297
                place)
            pred_list = []
298
            for left, pos_right in test_loader():
王肖 已提交
299 300 301 302 303 304 305 306
                left = fluid.layers.reshape(left, shape=[-1, 1])
                pos_right = fluid.layers.reshape(pos_right, shape=[-1, 1])
                net.eval()
                left = fluid.layers.reshape(left, shape=[-1, 1])
                pos_right = fluid.layers.reshape(pos_right, shape=[-1, 1])
                left_feat, pos_score = net(left, pos_right)
                pred = pos_score
                pred_list += list(pred.numpy())
307 308
            test_result = valid_and_test(pred_list, simnet_process, "test")
            if args.compute_accuracy:
王肖 已提交
309 310
                test_auc, test_acc = test_result
                logging.info("AUC of test is %f, Accuracy of test is %f" %
311
                             (test_auc, test_acc))
王肖 已提交
312 313 314
            else:
                test_auc = test_result
                logging.info("AUC of test is %f" % test_auc)
315

王肖 已提交
316 317 318 319 320 321 322 323 324 325 326

def test(conf_dict, args):
    """
    Evaluation Function
    """
    logging.info("start test process ...")
    if args.use_cuda:
        place = fluid.CUDAPlace(0)
    else:
        place = fluid.CPUPlace()
    with fluid.dygraph.guard(place):
327

王肖 已提交
328 329
        vocab = utils.load_vocab(args.vocab_path)
        simnet_process = reader.SimNetProcessor(args, vocab)
330 331 332 333 334
        test_loader = fluid.io.DataLoader.from_generator(
            capacity=16,
            return_list=True,
            iterable=True,
            use_double_buffer=True)
王肖 已提交
335
        get_test_examples = simnet_process.get_reader("test")
336 337 338
        test_loader.set_sample_list_generator(
            paddle.batch(
                get_test_examples, batch_size=args.batch_size), place)
339

340
        conf_dict['dict_size'] = len(vocab)
341
        conf_dict['seq_len'] = args.seq_len
王肖 已提交
342

343 344 345
        net = utils.import_class("./nets", conf_dict["net"]["module_name"],
                                 conf_dict["net"]["class_name"])(conf_dict)

346
        model, _ = load_dygraph(args.init_checkpoint)
王肖 已提交
347 348 349
        net.set_dict(model)
        metric = fluid.metrics.Auc(name="auc")
        pred_list = []
350 351
        with io.open(
                "predictions.txt", "w", encoding="utf8") as predictions_file:
王肖 已提交
352
            if args.task_mode == "pairwise":
353
                for left, pos_right in test_loader():
王肖 已提交
354 355
                    left = fluid.layers.reshape(left, shape=[-1, 1])
                    pos_right = fluid.layers.reshape(pos_right, shape=[-1, 1])
356

王肖 已提交
357 358
                    left_feat, pos_score = net(left, pos_right)
                    pred = pos_score
359

360 361
                    pred_list += list(
                        map(lambda item: float(item[0]), pred.numpy()))
王肖 已提交
362
                    predictions_file.write(u"\n".join(
363 364
                        map(lambda item: str((item[0] + 1) / 2), pred.numpy()))
                                           + "\n")
365

王肖 已提交
366
            else:
367
                for left, right in test_loader():
王肖 已提交
368 369 370
                    left = fluid.layers.reshape(left, shape=[-1, 1])
                    right = fluid.layers.reshape(right, shape=[-1, 1])
                    left_feat, pred = net(left, right)
371

372 373
                    pred_list += list(
                        map(lambda item: float(item[0]), pred.numpy()))
王肖 已提交
374
                    predictions_file.write(u"\n".join(
375 376
                        map(lambda item: str(np.argmax(item)), pred.numpy())) +
                                           "\n")
王肖 已提交
377 378 379 380 381 382 383 384 385 386 387 388 389

            if args.task_mode == "pairwise":
                pred_list = np.array(pred_list).reshape((-1, 1))
                pred_list = (pred_list + 1) / 2
                pred_list = np.hstack(
                    (np.ones_like(pred_list) - pred_list, pred_list))
            else:
                pred_list = np.array(pred_list)
            labels = simnet_process.get_test_label()

            metric.update(pred_list, labels)
            if args.compute_accuracy:
                acc = utils.get_accuracy(pred_list, labels, args.task_mode,
390
                                         args.lamda)
王肖 已提交
391
                logging.info("AUC of test is %f, Accuracy of test is %f" %
392
                             (metric.eval(), acc))
王肖 已提交
393 394 395 396 397 398
            else:
                logging.info("AUC of test is %f" % metric.eval())

        if args.verbose_result:
            utils.get_result_file(args)
            logging.info("test result saved in %s" %
399
                         os.path.join(os.getcwd(), args.test_result_path))
王肖 已提交
400 401 402 403 404 405 406 407 408 409 410 411


def infer(conf_dict, args):
    """
    run predict
    """
    logging.info("start test process ...")
    if args.use_cuda:
        place = fluid.CUDAPlace(0)
    else:
        place = fluid.CPUPlace()

412 413 414 415
    with fluid.dygraph.guard(place):
        vocab = utils.load_vocab(args.vocab_path)
        simnet_process = reader.SimNetProcessor(args, vocab)
        get_infer_examples = simnet_process.get_infer_reader
416 417 418 419 420 421 422 423 424 425
        infer_loader = fluid.io.DataLoader.from_generator(
            capacity=16,
            return_list=True,
            iterable=True,
            use_double_buffer=True)
        infer_loader.set_sample_list_generator(
            paddle.batch(
                get_infer_examples, batch_size=args.batch_size), place)

        conf_dict['dict_size'] = len(vocab)
426 427
        conf_dict['seq_len'] = args.seq_len

428 429
        net = utils.import_class("./nets", conf_dict["net"]["module_name"],
                                 conf_dict["net"]["class_name"])(conf_dict)
430
        model, _ = load_dygraph(args.init_checkpoint)
431
        net.set_dict(model)
432

433 434
        pred_list = []
        if args.task_mode == "pairwise":
435
            for left, pos_right in infer_loader():
436 437
                left = fluid.layers.reshape(left, shape=[-1, 1])
                pos_right = fluid.layers.reshape(pos_right, shape=[-1, 1])
438

439 440 441
                left_feat, pos_score = net(left, pos_right)
                pred = pos_score
                pred_list += list(
442 443
                    map(lambda item: str((item[0] + 1) / 2), pred.numpy()))

444
        else:
445
            for left, right in infer_loader():
446 447 448
                left = fluid.layers.reshape(left, shape=[-1, 1])
                pos_right = fluid.layers.reshape(right, shape=[-1, 1])
                left_feat, pred = net(left, right)
449 450 451 452 453
                pred_list += map(lambda item: str(np.argmax(item)),
                                 pred.numpy())

        with io.open(
                args.infer_result_path, "w", encoding="utf8") as infer_file:
454 455 456
            for _data, _pred in zip(simnet_process.get_infer_data(), pred_list):
                infer_file.write(_data + "\t" + _pred + "\n")
        logging.info("infer result saved in %s" %
457
                     os.path.join(os.getcwd(), args.infer_result_path))
王肖 已提交
458 459 460 461 462 463 464 465 466


def get_cards():
    num = 0
    cards = os.environ.get('CUDA_VISIBLE_DEVICES', '')
    if cards != '':
        num = len(cards.split(","))
    return num

467

王肖 已提交
468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486
if __name__ == "__main__":

    args = ArgConfig()
    args = args.build_conf()

    utils.print_arguments(args)
    check_cuda(args.use_cuda)
    check_version()
    utils.init_log("./log/TextSimilarityNet")
    conf_dict = config.SimNetConfig(args)
    if args.do_train:
        train(conf_dict, args)
    elif args.do_test:
        test(conf_dict, args)
    elif args.do_infer:
        infer(conf_dict, args)
    else:
        raise ValueError(
            "one of do_train and do_test and do_infer must be True")