main.py 8.4 KB
Newer Older
D
dengkaipeng 已提交
1
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Y
Yang Zhang 已提交
2 3 4 5 6 7 8 9 10 11 12 13 14 15
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import division
Y
Yang Zhang 已提交
16
from __future__ import print_function
Y
Yang Zhang 已提交
17 18 19 20 21 22 23

import argparse
import contextlib
import os

import numpy as np

D
dengkaipeng 已提交
24 25 26
from paddle import fluid
from paddle.fluid.optimizer import Momentum
from paddle.fluid.io import DataLoader
Y
Yang Zhang 已提交
27

D
dengkaipeng 已提交
28 29
from model import Model, Input, set_device
from distributed import DistributedBatchSampler
D
dengkaipeng 已提交
30 31
from models import yolov3_darknet53, YoloLoss

D
dengkaipeng 已提交
32 33 34
from coco_metric import COCOMetric
from coco import COCODataset
from transforms import *
D
dengkaipeng 已提交
35

D
dengkaipeng 已提交
36
NUM_MAX_BOXES = 50
Y
Yang Zhang 已提交
37 38


D
dengkaipeng 已提交
39
def make_optimizer(step_per_epoch, parameter_list=None):
Y
Yang Zhang 已提交
40
    base_lr = FLAGS.lr
D
dengkaipeng 已提交
41
    warm_up_iter = 1000
Y
Yang Zhang 已提交
42 43
    momentum = 0.9
    weight_decay = 5e-4
D
dengkaipeng 已提交
44
    boundaries = [step_per_epoch * e for e in [200, 250]]
Y
Yang Zhang 已提交
45
    values = [base_lr * (0.1 ** i) for i in range(len(boundaries) + 1)]
Y
Yang Zhang 已提交
46
    learning_rate = fluid.layers.piecewise_decay(
Y
Yang Zhang 已提交
47 48
        boundaries=boundaries,
        values=values)
Y
Yang Zhang 已提交
49 50
    learning_rate = fluid.layers.linear_lr_warmup(
        learning_rate=learning_rate,
Y
Yang Zhang 已提交
51 52 53 54
        warmup_steps=warm_up_iter,
        start_lr=0.0,
        end_lr=base_lr)
    optimizer = fluid.optimizer.Momentum(
Y
Yang Zhang 已提交
55
        learning_rate=learning_rate,
Y
Yang Zhang 已提交
56 57 58 59 60 61 62
        regularization=fluid.regularizer.L2Decay(weight_decay),
        momentum=momentum,
        parameter_list=parameter_list)
    return optimizer


def main():
D
dengkaipeng 已提交
63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108
    device = set_device(FLAGS.device)
    fluid.enable_dygraph(device) if FLAGS.dynamic else None
    
    inputs = [Input([None, 3], 'int32', name='img_info'),
              Input([None, 3, None, None], 'float32', name='image')]
    labels = [Input([None, NUM_MAX_BOXES, 4], 'float32', name='gt_bbox'),
	      Input([None, NUM_MAX_BOXES], 'int32', name='gt_label'),
	      Input([None, NUM_MAX_BOXES], 'float32', name='gt_score')]

    if not FLAGS.eval_only: # training mode
        train_transform = Compose([ColorDistort(),
                                   RandomExpand(),
                                   RandomCrop(),
                                   RandomFlip(),
                                   NormalizeBox(),
                                   PadBox(),
                                   BboxXYXY2XYWH()])
        train_collate_fn = BatchCompose([RandomShape(), NormalizeImage()])
        dataset = COCODataset(dataset_dir=FLAGS.data,
                              anno_path='annotations/instances_train2017.json',
                              image_dir='train2017',
                              with_background=False,
                              mixup=True,
                              transform=train_transform)
        batch_sampler = DistributedBatchSampler(dataset,
                                                batch_size=FLAGS.batch_size,
                                                shuffle=True,
                                                drop_last=True)
        loader = DataLoader(dataset,
                            batch_sampler=batch_sampler,
                            places=device,
                            num_workers=FLAGS.num_workers,
                            return_list=True,
                            collate_fn=train_collate_fn)
    else: # evaluation mode
        eval_transform = Compose([ResizeImage(target_size=608),
                                  NormalizeBox(),
                                  PadBox(),
                                  BboxXYXY2XYWH()])
        eval_collate_fn = BatchCompose([NormalizeImage()])
        dataset = COCODataset(dataset_dir=FLAGS.data,
                              anno_path='annotations/instances_val2017.json',
                              image_dir='val2017',
                              with_background=False,
                              transform=eval_transform)
        # batch_size can only be 1 in evaluation for YOLOv3
D
dengkaipeng 已提交
109
        # prediction bbox is a LoDTensor
D
dengkaipeng 已提交
110 111 112 113 114 115 116 117 118 119 120
        batch_sampler = DistributedBatchSampler(dataset,
                                                batch_size=1,
                                                shuffle=False,
                                                drop_last=False)
        loader = DataLoader(dataset,
                            batch_sampler=batch_sampler,
                            places=device,
                            num_workers=FLAGS.num_workers,
                            return_list=True,
                            collate_fn=eval_collate_fn)

D
dengkaipeng 已提交
121 122 123 124 125
    pretrained = FLAGS.eval_only and FLAGS.weights is None
    model = yolov3_darknet53(num_classes=dataset.num_classes,
                   model_mode='eval' if FLAGS.eval_only else 'train',
                   pretrained=pretrained)

D
dengkaipeng 已提交
126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145
    if FLAGS.pretrain_weights is not None:
        model.load(FLAGS.pretrain_weights, skip_mismatch=True, reset_optimizer=True)

    optim = make_optimizer(len(batch_sampler), parameter_list=model.parameters())

    model.prepare(optim,
                  YoloLoss(num_classes=dataset.num_classes),
                  inputs=inputs, labels=labels,
                  device=FLAGS.device)

    # NOTE: we implement COCO metric of YOLOv3 model here, separately
    # from 'prepare' and 'fit' framework for follwing reason:
    # 1. YOLOv3 network structure is different between 'train' and
    # 'eval' mode, in 'eval' mode, output prediction bbox is not the
    # feature map used for YoloLoss calculating
    # 2. COCO metric behavior is also different from defined Metric
    # for COCO metric should not perform accumulate in each iteration
    # but only accumulate at the end of an epoch
    if FLAGS.eval_only:
        if FLAGS.weights is not None:
D
dengkaipeng 已提交
146
            model.load(FLAGS.weights, reset_optimizer=True)
D
dengkaipeng 已提交
147
        preds = model.predict(loader, stack_outputs=False)
D
dengkaipeng 已提交
148
        _, _, _, img_ids, bboxes = preds
Y
Yang Zhang 已提交
149

D
dengkaipeng 已提交
150 151 152 153 154 155 156
        anno_path = os.path.join(FLAGS.data, 'annotations/instances_val2017.json')
        coco_metric = COCOMetric(anno_path=anno_path, with_background=False)
        for img_id, bbox in zip(img_ids, bboxes):
            coco_metric.update(img_id, bbox)
        coco_metric.accumulate()
        coco_metric.reset()
        return
Y
Yang Zhang 已提交
157

D
dengkaipeng 已提交
158 159
    if FLAGS.resume is not None:
        model.load(FLAGS.resume)
Y
Yang Zhang 已提交
160

D
dengkaipeng 已提交
161 162 163 164
    model.fit(train_data=loader,
              epochs=FLAGS.epoch - FLAGS.no_mixup_epoch,
              save_dir="yolo_checkpoint/mixup",
              save_freq=10)
Y
Yang Zhang 已提交
165

D
dengkaipeng 已提交
166 167 168 169 170 171
    # do not use image mixup transfrom in laste FLAGS.no_mixup_epoch epoches
    dataset.mixup = False
    model.fit(train_data=loader,
              epochs=FLAGS.no_mixup_epoch,
              save_dir="yolo_checkpoint/no_mixup",
              save_freq=5)
Y
Yang Zhang 已提交
172 173 174


if __name__ == '__main__':
D
dengkaipeng 已提交
175 176 177 178
    parser = argparse.ArgumentParser("Yolov3 Training on VOC")
    parser.add_argument(
        "--data", type=str, default='dataset/voc',
        help="path to dataset directory")
D
dengkaipeng 已提交
179 180
    parser.add_argument(
        "--device", type=str, default='gpu', help="device to use, gpu or cpu")
Y
Yang Zhang 已提交
181 182
    parser.add_argument(
        "-d", "--dynamic", action='store_true', help="enable dygraph mode")
D
dengkaipeng 已提交
183 184
    parser.add_argument(
        "--eval_only", action='store_true', help="run evaluation only")
Y
Yang Zhang 已提交
185 186
    parser.add_argument(
        "-e", "--epoch", default=300, type=int, help="number of epoch")
D
dengkaipeng 已提交
187 188 189
    parser.add_argument(
        "--no_mixup_epoch", default=30, type=int,
        help="number of the last N epoch without image mixup")
Y
Yang Zhang 已提交
190
    parser.add_argument(
Y
Yang Zhang 已提交
191 192
        '--lr', '--learning-rate', default=0.001, type=float, metavar='LR',
        help='initial learning rate')
Y
Yang Zhang 已提交
193
    parser.add_argument(
D
dengkaipeng 已提交
194 195 196
        "-b", "--batch_size", default=8, type=int, help="batch size")
    parser.add_argument(
        "-j", "--num_workers", default=4, type=int, help="reader worker number")
Y
Yang Zhang 已提交
197
    parser.add_argument(
D
dengkaipeng 已提交
198
        "-p", "--pretrain_weights", default=None, type=str,
Y
Yang Zhang 已提交
199
        help="path to pretrained weights")
D
dengkaipeng 已提交
200
    parser.add_argument(
D
dengkaipeng 已提交
201
        "-r", "--resume", default=None, type=str,
D
dengkaipeng 已提交
202
        help="path to model weights")
D
dengkaipeng 已提交
203 204 205
    parser.add_argument(
        "-w", "--weights", default=None, type=str,
        help="path to weights for evaluation")
Y
Yang Zhang 已提交
206
    FLAGS = parser.parse_args()
Y
Yang Zhang 已提交
207
    assert FLAGS.data, "error: must provide data path"
Y
Yang Zhang 已提交
208
    main()