callbacks.py 27.3 KB
Newer Older
K
Kaipeng Deng 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 
#   
# Licensed under the Apache License, Version 2.0 (the "License");   
# you may not use this file except in compliance with the License.  
# You may obtain a copy of the License at   
#   
#     http://www.apache.org/licenses/LICENSE-2.0    
#   
# Unless required by applicable law or agreed to in writing, software   
# distributed under the License is distributed on an "AS IS" BASIS, 
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
# See the License for the specific language governing permissions and   
# limitations under the License.

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os
20
import sys
K
Kaipeng Deng 已提交
21
import datetime
22
import six
23 24
import copy
import json
K
Kaipeng Deng 已提交
25

26
import paddle
W
wangguanzhong 已提交
27
import paddle.distributed as dist
K
Kaipeng Deng 已提交
28

29
from ppdet.utils.checkpoint import save_model, save_semi_model
30
from ppdet.metrics import get_infer_results
K
Kaipeng Deng 已提交
31 32

from ppdet.utils.logger import setup_logger
33
logger = setup_logger('ppdet.engine')
K
Kaipeng Deng 已提交
34

S
shangliang Xu 已提交
35 36 37 38
__all__ = [
    'Callback', 'ComposeCallback', 'LogPrinter', 'Checkpointer',
    'VisualDLWriter', 'SniperProposalsGenerator'
]
K
Kaipeng Deng 已提交
39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56


class Callback(object):
    def __init__(self, model):
        self.model = model

    def on_step_begin(self, status):
        pass

    def on_step_end(self, status):
        pass

    def on_epoch_begin(self, status):
        pass

    def on_epoch_end(self, status):
        pass

57 58 59 60 61 62
    def on_train_begin(self, status):
        pass

    def on_train_end(self, status):
        pass

K
Kaipeng Deng 已提交
63 64 65

class ComposeCallback(object):
    def __init__(self, callbacks):
66 67 68 69
        callbacks = [c for c in list(callbacks) if c is not None]
        for c in callbacks:
            assert isinstance(
                c, Callback), "callback should be subclass of Callback"
K
Kaipeng Deng 已提交
70 71 72
        self._callbacks = callbacks

    def on_step_begin(self, status):
73 74
        for c in self._callbacks:
            c.on_step_begin(status)
K
Kaipeng Deng 已提交
75 76

    def on_step_end(self, status):
77 78
        for c in self._callbacks:
            c.on_step_end(status)
K
Kaipeng Deng 已提交
79 80

    def on_epoch_begin(self, status):
81 82
        for c in self._callbacks:
            c.on_epoch_begin(status)
K
Kaipeng Deng 已提交
83 84

    def on_epoch_end(self, status):
85 86
        for c in self._callbacks:
            c.on_epoch_end(status)
K
Kaipeng Deng 已提交
87

88 89 90 91 92 93 94 95
    def on_train_begin(self, status):
        for c in self._callbacks:
            c.on_train_begin(status)

    def on_train_end(self, status):
        for c in self._callbacks:
            c.on_train_end(status)

K
Kaipeng Deng 已提交
96 97 98 99 100 101

class LogPrinter(Callback):
    def __init__(self, model):
        super(LogPrinter, self).__init__(model)

    def on_step_end(self, status):
W
wangguanzhong 已提交
102
        if dist.get_world_size() < 2 or dist.get_rank() == 0:
K
Kaipeng Deng 已提交
103 104
            mode = status['mode']
            if mode == 'train':
K
Kaipeng Deng 已提交
105 106 107 108 109 110 111 112
                epoch_id = status['epoch_id']
                step_id = status['step_id']
                steps_per_epoch = status['steps_per_epoch']
                training_staus = status['training_staus']
                batch_time = status['batch_time']
                data_time = status['data_time']

                epoches = self.model.cfg.epoch
K
Kaipeng Deng 已提交
113 114
                batch_size = self.model.cfg['{}Reader'.format(mode.capitalize(
                ))]['batch_size']
K
Kaipeng Deng 已提交
115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143

                logs = training_staus.log()
                space_fmt = ':' + str(len(str(steps_per_epoch))) + 'd'
                if step_id % self.model.cfg.log_iter == 0:
                    eta_steps = (epoches - epoch_id) * steps_per_epoch - step_id
                    eta_sec = eta_steps * batch_time.global_avg
                    eta_str = str(datetime.timedelta(seconds=int(eta_sec)))
                    ips = float(batch_size) / batch_time.avg
                    fmt = ' '.join([
                        'Epoch: [{}]',
                        '[{' + space_fmt + '}/{}]',
                        'learning_rate: {lr:.6f}',
                        '{meters}',
                        'eta: {eta}',
                        'batch_cost: {btime}',
                        'data_cost: {dtime}',
                        'ips: {ips:.4f} images/s',
                    ])
                    fmt = fmt.format(
                        epoch_id,
                        step_id,
                        steps_per_epoch,
                        lr=status['learning_rate'],
                        meters=logs,
                        eta=eta_str,
                        btime=str(batch_time),
                        dtime=str(data_time),
                        ips=ips)
                    logger.info(fmt)
F
Feng Ni 已提交
144 145 146 147
            if mode == 'eval':
                step_id = status['step_id']
                if step_id % 100 == 0:
                    logger.info("Eval iter: {}".format(step_id))
K
Kaipeng Deng 已提交
148 149

    def on_epoch_end(self, status):
W
wangguanzhong 已提交
150
        if dist.get_world_size() < 2 or dist.get_rank() == 0:
K
Kaipeng Deng 已提交
151 152
            mode = status['mode']
            if mode == 'eval':
K
Kaipeng Deng 已提交
153 154
                sample_num = status['sample_num']
                cost_time = status['cost_time']
W
wangguanzhong 已提交
155
                logger.info('Total sample number: {}, average FPS: {}'.format(
K
Kaipeng Deng 已提交
156 157 158 159 160 161
                    sample_num, sample_num / cost_time))


class Checkpointer(Callback):
    def __init__(self, model):
        super(Checkpointer, self).__init__(model)
162
        self.best_ap = -1000.
163 164
        self.save_dir = os.path.join(self.model.cfg.save_dir,
                                     self.model.cfg.filename)
165 166 167 168
        if hasattr(self.model.model, 'student_model'):
            self.weight = self.model.model.student_model
        else:
            self.weight = self.model.model
K
Kaipeng Deng 已提交
169 170

    def on_epoch_end(self, status):
K
Kaipeng Deng 已提交
171 172
        # Checkpointer only performed during training
        mode = status['mode']
173 174 175
        epoch_id = status['epoch_id']
        weight = None
        save_name = None
W
wangguanzhong 已提交
176
        if dist.get_world_size() < 2 or dist.get_rank() == 0:
177 178
            if mode == 'train':
                end_epoch = self.model.cfg.epoch
179 180 181
                if (
                        epoch_id + 1
                ) % self.model.cfg.snapshot_epoch == 0 or epoch_id == end_epoch - 1:
182 183
                    save_name = str(
                        epoch_id) if epoch_id != end_epoch - 1 else "model_final"
S
shangliang Xu 已提交
184
                    weight = self.weight.state_dict()
185 186 187 188
            elif mode == 'eval':
                if 'save_best_model' in status and status['save_best_model']:
                    for metric in self.model._metrics:
                        map_res = metric.get_results()
189 190 191 192 193
                        eval_func = "ap"
                        if 'pose3d' in map_res:
                            key = 'pose3d'
                            eval_func = "mpjpe"
                        elif 'bbox' in map_res:
194 195 196 197 198
                            key = 'bbox'
                        elif 'keypoint' in map_res:
                            key = 'keypoint'
                        else:
                            key = 'mask'
199
                        if key not in map_res:
200
                            logger.warning("Evaluation results empty, this may be due to " \
201 202 203
                                        "training iterations being too few or not " \
                                        "loading the correct weights.")
                            return
204
                        if map_res[key][0] >= self.best_ap:
205 206
                            self.best_ap = map_res[key][0]
                            save_name = 'best_model'
S
shangliang Xu 已提交
207
                            weight = self.weight.state_dict()
208 209
                        logger.info("Best test {} {} is {:0.3f}.".format(
                            key, eval_func, abs(self.best_ap)))
210
            if weight:
S
shangliang Xu 已提交
211
                if self.model.use_ema:
212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237
                    exchange_save_model = status.get('exchange_save_model',
                                                     False)
                    if not exchange_save_model:
                        # save model and ema_model
                        save_model(
                            status['weight'],
                            self.model.optimizer,
                            self.save_dir,
                            save_name,
                            epoch_id + 1,
                            ema_model=weight)
                    else:
                        # save model(student model) and ema_model(teacher model)
                        # in DenseTeacher SSOD, the teacher model will be higher,
                        # so exchange when saving pdparams
                        student_model = status['weight']  # model
                        teacher_model = weight  # ema_model
                        save_model(
                            teacher_model,
                            self.model.optimizer,
                            self.save_dir,
                            save_name,
                            epoch_id + 1,
                            ema_model=student_model)
                        del teacher_model
                        del student_model
S
shangliang Xu 已提交
238
                else:
S
shangliang Xu 已提交
239 240
                    save_model(weight, self.model.optimizer, self.save_dir,
                               save_name, epoch_id + 1)
241 242 243 244 245 246 247 248 249 250 251 252


class WiferFaceEval(Callback):
    def __init__(self, model):
        super(WiferFaceEval, self).__init__(model)

    def on_epoch_begin(self, status):
        assert self.model.mode == 'eval', \
            "WiferFaceEval can only be set during evaluation"
        for metric in self.model._metrics:
            metric.update(self.model.model)
        sys.exit()
253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269


class VisualDLWriter(Callback):
    """
    Use VisualDL to log data or image
    """

    def __init__(self, model):
        super(VisualDLWriter, self).__init__(model)

        assert six.PY3, "VisualDL requires Python >= 3.5"
        try:
            from visualdl import LogWriter
        except Exception as e:
            logger.error('visualdl not found, plaese install visualdl. '
                         'for example: `pip install visualdl`.')
            raise e
M
Manuel Garcia 已提交
270 271
        self.vdl_writer = LogWriter(
            model.cfg.get('vdl_log_dir', 'vdl_log_dir/scalar'))
272 273 274 275 276 277 278
        self.vdl_loss_step = 0
        self.vdl_mAP_step = 0
        self.vdl_image_step = 0
        self.vdl_image_frame = 0

    def on_step_end(self, status):
        mode = status['mode']
W
wangguanzhong 已提交
279
        if dist.get_world_size() < 2 or dist.get_rank() == 0:
280 281 282 283 284
            if mode == 'train':
                training_staus = status['training_staus']
                for loss_name, loss_value in training_staus.get().items():
                    self.vdl_writer.add_scalar(loss_name, loss_value,
                                               self.vdl_loss_step)
285
                self.vdl_loss_step += 1
286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302
            elif mode == 'test':
                ori_image = status['original_image']
                result_image = status['result_image']
                self.vdl_writer.add_image(
                    "original/frame_{}".format(self.vdl_image_frame), ori_image,
                    self.vdl_image_step)
                self.vdl_writer.add_image(
                    "result/frame_{}".format(self.vdl_image_frame),
                    result_image, self.vdl_image_step)
                self.vdl_image_step += 1
                # each frame can display ten pictures at most.
                if self.vdl_image_step % 10 == 0:
                    self.vdl_image_step = 0
                    self.vdl_image_frame += 1

    def on_epoch_end(self, status):
        mode = status['mode']
W
wangguanzhong 已提交
303
        if dist.get_world_size() < 2 or dist.get_rank() == 0:
304 305 306 307 308 309 310
            if mode == 'eval':
                for metric in self.model._metrics:
                    for key, map_value in metric.get_results().items():
                        self.vdl_writer.add_scalar("{}-mAP".format(key),
                                                   map_value[0],
                                                   self.vdl_mAP_step)
                self.vdl_mAP_step += 1
311

312

313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331
class WandbCallback(Callback):
    def __init__(self, model):
        super(WandbCallback, self).__init__(model)

        try:
            import wandb
            self.wandb = wandb
        except Exception as e:
            logger.error('wandb not found, please install wandb. '
                         'Use: `pip install wandb`.')
            raise e

        self.wandb_params = model.cfg.get('wandb', None)
        self.save_dir = os.path.join(self.model.cfg.save_dir,
                                     self.model.cfg.filename)
        if self.wandb_params is None:
            self.wandb_params = {}
        for k, v in model.cfg.items():
            if k.startswith("wandb_"):
332 333
                self.wandb_params.update({k.lstrip("wandb_"): v})

334 335 336 337 338 339 340
        self._run = None
        if dist.get_world_size() < 2 or dist.get_rank() == 0:
            _ = self.run
            self.run.config.update(self.model.cfg)
            self.run.define_metric("epoch")
            self.run.define_metric("eval/*", step_metric="epoch")

341
        self.best_ap = -1000.
342
        self.fps = []
343

344 345 346 347
    @property
    def run(self):
        if self._run is None:
            if self.wandb.run is not None:
348 349 350 351
                logger.info(
                    "There is an ongoing wandb run which will be used"
                    "for logging. Please use `wandb.finish()` to end that"
                    "if the behaviour is not intended")
352 353 354 355
                self._run = self.wandb.run
            else:
                self._run = self.wandb.init(**self.wandb_params)
        return self._run
356

357
    def save_model(self,
358 359 360 361 362 363
                   optimizer,
                   save_dir,
                   save_name,
                   last_epoch,
                   ema_model=None,
                   ap=None,
364
                   fps=None,
365
                   tags=None):
366 367 368 369 370 371
        if dist.get_world_size() < 2 or dist.get_rank() == 0:
            model_path = os.path.join(save_dir, save_name)
            metadata = {}
            metadata["last_epoch"] = last_epoch
            if ap:
                metadata["ap"] = ap
W
wangguanzhong 已提交
372

373 374 375
            if fps:
                metadata["fps"] = fps

376
            if ema_model is None:
377 378 379 380 381 382 383 384
                ema_artifact = self.wandb.Artifact(
                    name="ema_model-{}".format(self.run.id),
                    type="model",
                    metadata=metadata)
                model_artifact = self.wandb.Artifact(
                    name="model-{}".format(self.run.id),
                    type="model",
                    metadata=metadata)
385 386 387 388 389 390 391

                ema_artifact.add_file(model_path + ".pdema", name="model_ema")
                model_artifact.add_file(model_path + ".pdparams", name="model")

                self.run.log_artifact(ema_artifact, aliases=tags)
                self.run.log_artfact(model_artifact, aliases=tags)
            else:
392 393 394 395
                model_artifact = self.wandb.Artifact(
                    name="model-{}".format(self.run.id),
                    type="model",
                    metadata=metadata)
396 397
                model_artifact.add_file(model_path + ".pdparams", name="model")
                self.run.log_artifact(model_artifact, aliases=tags)
398

399 400 401 402 403 404 405 406
    def on_step_end(self, status):

        mode = status['mode']
        if dist.get_world_size() < 2 or dist.get_rank() == 0:
            if mode == 'train':
                training_status = status['training_staus'].get()
                for k, v in training_status.items():
                    training_status[k] = float(v)
407 408 409 410 411 412 413 414 415

                # calculate ips, data_cost, batch_cost
                batch_time = status['batch_time']
                data_time = status['data_time']
                batch_size = self.model.cfg['{}Reader'.format(mode.capitalize(
                ))]['batch_size']

                ips = float(batch_size) / float(batch_time.avg)
                data_cost = float(data_time.avg)
F
Feng Ni 已提交
416
                batch_cost = float(batch_time.avg)
417

418
                metrics = {"train/" + k: v for k, v in training_status.items()}
419 420 421 422 423 424

                metrics["train/ips"] = ips
                metrics["train/data_cost"] = data_cost
                metrics["train/batch_cost"] = batch_cost

                self.fps.append(ips)
425
                self.run.log(metrics)
426

427 428 429 430 431 432
    def on_epoch_end(self, status):
        mode = status['mode']
        epoch_id = status['epoch_id']
        save_name = None
        if dist.get_world_size() < 2 or dist.get_rank() == 0:
            if mode == 'train':
433 434 435
                fps = sum(self.fps) / len(self.fps)
                self.fps = []

436 437 438 439
                end_epoch = self.model.cfg.epoch
                if (
                        epoch_id + 1
                ) % self.model.cfg.snapshot_epoch == 0 or epoch_id == end_epoch - 1:
440 441
                    save_name = str(
                        epoch_id) if epoch_id != end_epoch - 1 else "model_final"
442 443 444 445 446 447 448
                    tags = ["latest", "epoch_{}".format(epoch_id)]
                    self.save_model(
                        self.model.optimizer,
                        self.save_dir,
                        save_name,
                        epoch_id + 1,
                        self.model.use_ema,
449
                        fps=fps,
450
                        tags=tags)
451
            if mode == 'eval':
452 453 454 455 456
                sample_num = status['sample_num']
                cost_time = status['cost_time']

                fps = sample_num / cost_time

457 458 459 460 461
                merged_dict = {}
                for metric in self.model._metrics:
                    for key, map_value in metric.get_results().items():
                        merged_dict["eval/{}-mAP".format(key)] = map_value[0]
                merged_dict["epoch"] = status["epoch_id"]
462 463
                merged_dict["eval/fps"] = sample_num / cost_time

464 465 466 467 468
                self.run.log(merged_dict)

                if 'save_best_model' in status and status['save_best_model']:
                    for metric in self.model._metrics:
                        map_res = metric.get_results()
469 470 471
                        if 'pose3d' in map_res:
                            key = 'pose3d'
                        elif 'bbox' in map_res:
472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492
                            key = 'bbox'
                        elif 'keypoint' in map_res:
                            key = 'keypoint'
                        else:
                            key = 'mask'
                        if key not in map_res:
                            logger.warning("Evaluation results empty, this may be due to " \
                                        "training iterations being too few or not " \
                                        "loading the correct weights.")
                            return
                        if map_res[key][0] >= self.best_ap:
                            self.best_ap = map_res[key][0]
                            save_name = 'best_model'
                            tags = ["best", "epoch_{}".format(epoch_id)]

                            self.save_model(
                                self.model.optimizer,
                                self.save_dir,
                                save_name,
                                last_epoch=epoch_id + 1,
                                ema_model=self.model.use_ema,
493
                                ap=abs(self.best_ap),
494
                                fps=fps,
495 496
                                tags=tags)

497 498 499
    def on_train_end(self, status):
        self.run.finish()

500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557

class SniperProposalsGenerator(Callback):
    def __init__(self, model):
        super(SniperProposalsGenerator, self).__init__(model)
        ori_dataset = self.model.dataset
        self.dataset = self._create_new_dataset(ori_dataset)
        self.loader = self.model.loader
        self.cfg = self.model.cfg
        self.infer_model = self.model.model

    def _create_new_dataset(self, ori_dataset):
        dataset = copy.deepcopy(ori_dataset)
        # init anno_cropper
        dataset.init_anno_cropper()
        # generate infer roidbs
        ori_roidbs = dataset.get_ori_roidbs()
        roidbs = dataset.anno_cropper.crop_infer_anno_records(ori_roidbs)
        # set new roidbs
        dataset.set_roidbs(roidbs)

        return dataset

    def _eval_with_loader(self, loader):
        results = []
        with paddle.no_grad():
            self.infer_model.eval()
            for step_id, data in enumerate(loader):
                outs = self.infer_model(data)
                for key in ['im_shape', 'scale_factor', 'im_id']:
                    outs[key] = data[key]
                for key, value in outs.items():
                    if hasattr(value, 'numpy'):
                        outs[key] = value.numpy()

                results.append(outs)

        return results

    def on_train_end(self, status):
        self.loader.dataset = self.dataset
        results = self._eval_with_loader(self.loader)
        results = self.dataset.anno_cropper.aggregate_chips_detections(results)
        # sniper
        proposals = []
        clsid2catid = {v: k for k, v in self.dataset.catid2clsid.items()}
        for outs in results:
            batch_res = get_infer_results(outs, clsid2catid)
            start = 0
            for i, im_id in enumerate(outs['im_id']):
                bbox_num = outs['bbox_num']
                end = start + bbox_num[i]
                bbox_res = batch_res['bbox'][start:end] \
                    if 'bbox' in batch_res else None
                if bbox_res:
                    proposals += bbox_res
        logger.info("save proposals in {}".format(self.cfg.proposals_path))
        with open(self.cfg.proposals_path, 'w') as f:
            json.dump(proposals, f)
558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695


class SemiLogPrinter(LogPrinter):
    def __init__(self, model):
        super(SemiLogPrinter, self).__init__(model)

    def on_step_end(self, status):
        if dist.get_world_size() < 2 or dist.get_rank() == 0:
            mode = status['mode']
            if mode == 'train':
                epoch_id = status['epoch_id']
                step_id = status['step_id']
                iter_id = status['iter_id']
                steps_per_epoch = status['steps_per_epoch']
                training_staus = status['training_staus']
                batch_time = status['batch_time']
                data_time = status['data_time']

                epoches = self.model.cfg.epoch
                batch_size = self.model.cfg['{}Reader'.format(mode.capitalize(
                ))]['batch_size']
                iters = epoches * steps_per_epoch
                logs = training_staus.log()
                iter_space_fmt = ':' + str(len(str(iters))) + 'd'
                space_fmt = ':' + str(len(str(iters))) + 'd'
                if step_id % self.model.cfg.log_iter == 0:
                    eta_steps = (epoches - epoch_id) * steps_per_epoch - step_id
                    eta_sec = eta_steps * batch_time.global_avg
                    eta_str = str(datetime.timedelta(seconds=int(eta_sec)))
                    ips = float(batch_size) / batch_time.avg
                    fmt = ' '.join([
                        '{' + iter_space_fmt + '}/{} iters',
                        'Epoch: [{}]',
                        '[{' + space_fmt + '}/{}]',
                        'learning_rate: {lr:.6f}',
                        '{meters}',
                        'eta: {eta}',
                        'batch_cost: {btime}',
                        'data_cost: {dtime}',
                        'ips: {ips:.4f} images/s',
                    ])
                    fmt = fmt.format(
                        iter_id,
                        iters,
                        epoch_id,
                        step_id,
                        steps_per_epoch,
                        lr=status['learning_rate'],
                        meters=logs,
                        eta=eta_str,
                        btime=str(batch_time),
                        dtime=str(data_time),
                        ips=ips)
                    logger.info(fmt)
            if mode == 'eval':
                step_id = status['step_id']
                if step_id % 100 == 0:
                    logger.info("Eval iter: {}".format(step_id))


class SemiCheckpointer(Checkpointer):
    def __init__(self, model):
        super(SemiCheckpointer, self).__init__(model)
        cfg = self.model.cfg
        self.best_ap = 0.
        self.save_dir = os.path.join(self.model.cfg.save_dir,
                                     self.model.cfg.filename)
        if hasattr(self.model.model, 'student') and hasattr(self.model.model,
                                                            'teacher'):
            self.weight = (self.model.model.teacher, self.model.model.student)
        elif hasattr(self.model.model, 'student') or hasattr(self.model.model,
                                                             'teacher'):
            raise AttributeError(
                "model has no attribute 'student' or 'teacher'")
        else:
            raise AttributeError(
                "model has no attribute 'student' and 'teacher'")

    def every_n_iters(self, iter_id, n):
        return (iter_id + 1) % n == 0 if n > 0 else False

    def on_step_end(self, status):
        # Checkpointer only performed during training
        mode = status['mode']
        eval_interval = status['eval_interval']
        save_interval = status['save_interval']
        iter_id = status['iter_id']
        epoch_id = status['epoch_id']
        t_weight = None
        s_weight = None
        save_name = None
        if dist.get_world_size() < 2 or dist.get_rank() == 0:
            if self.every_n_iters(iter_id, save_interval) and mode == 'train':
                save_name = "last_epoch"
                # save_name = str(iter_id + 1)
                t_weight = self.weight[0].state_dict()
                s_weight = self.weight[1].state_dict()
                save_semi_model(t_weight, s_weight, self.model.optimizer,
                                self.save_dir, save_name, epoch_id + 1,
                                iter_id + 1)

    def on_epoch_end(self, status):
        # Checkpointer only performed during training
        mode = status['mode']
        eval_interval = status['eval_interval']
        save_interval = status['save_interval']
        iter_id = status['iter_id']
        epoch_id = status['epoch_id']
        t_weight = None
        s_weight = None
        save_name = None
        if dist.get_world_size() < 2 or dist.get_rank() == 0:
            if self.every_n_iters(iter_id, eval_interval) and mode == 'eval':
                if 'save_best_model' in status and status['save_best_model']:
                    for metric in self.model._metrics:
                        map_res = metric.get_results()
                        if 'bbox' in map_res:
                            key = 'bbox'
                        elif 'keypoint' in map_res:
                            key = 'keypoint'
                        else:
                            key = 'mask'
                        if key not in map_res:
                            logger.warning("Evaluation results empty, this may be due to " \
                                        "training iterations being too few or not " \
                                        "loading the correct weights.")
                            return
                        if map_res[key][0] > self.best_ap:
                            self.best_ap = map_res[key][0]
                            save_name = 'best_model'
                            t_weight = self.weight[0].state_dict()
                            s_weight = self.weight[1].state_dict()
                        logger.info("Best teacher test {} ap is {:0.3f}.".
                                    format(key, self.best_ap))
                    if t_weight and s_weight:
                        save_semi_model(t_weight, s_weight,
                                        self.model.optimizer, self.save_dir,
                                        save_name, epoch_id + 1, iter_id + 1)