main.py 8.5 KB
Newer Older
D
dengkaipeng 已提交
1
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Y
Yang Zhang 已提交
2 3 4 5 6 7 8 9 10 11 12 13 14 15
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import division
Y
Yang Zhang 已提交
16
from __future__ import print_function
Y
Yang Zhang 已提交
17 18 19 20 21 22 23

import argparse
import contextlib
import os

import numpy as np

D
dengkaipeng 已提交
24 25 26
from paddle import fluid
from paddle.fluid.optimizer import Momentum
from paddle.fluid.io import DataLoader
Y
Yang Zhang 已提交
27

D
dengkaipeng 已提交
28 29
from model import Model, Input, set_device
from distributed import DistributedBatchSampler
D
dengkaipeng 已提交
30 31
from models import yolov3_darknet53, YoloLoss

D
dengkaipeng 已提交
32 33 34
from coco_metric import COCOMetric
from coco import COCODataset
from transforms import *
D
dengkaipeng 已提交
35

D
dengkaipeng 已提交
36
NUM_MAX_BOXES = 50
Y
Yang Zhang 已提交
37 38


D
dengkaipeng 已提交
39
def make_optimizer(step_per_epoch, parameter_list=None):
Y
Yang Zhang 已提交
40
    base_lr = FLAGS.lr
D
dengkaipeng 已提交
41
    warm_up_iter = 1000
Y
Yang Zhang 已提交
42 43
    momentum = 0.9
    weight_decay = 5e-4
D
dengkaipeng 已提交
44
    boundaries = [step_per_epoch * e for e in [200, 250]]
Y
Yang Zhang 已提交
45
    values = [base_lr * (0.1 ** i) for i in range(len(boundaries) + 1)]
Y
Yang Zhang 已提交
46
    learning_rate = fluid.layers.piecewise_decay(
Y
Yang Zhang 已提交
47 48
        boundaries=boundaries,
        values=values)
Y
Yang Zhang 已提交
49 50
    learning_rate = fluid.layers.linear_lr_warmup(
        learning_rate=learning_rate,
Y
Yang Zhang 已提交
51 52 53 54
        warmup_steps=warm_up_iter,
        start_lr=0.0,
        end_lr=base_lr)
    optimizer = fluid.optimizer.Momentum(
Y
Yang Zhang 已提交
55
        learning_rate=learning_rate,
Y
Yang Zhang 已提交
56 57 58 59 60 61 62
        regularization=fluid.regularizer.L2Decay(weight_decay),
        momentum=momentum,
        parameter_list=parameter_list)
    return optimizer


def main():
D
dengkaipeng 已提交
63 64 65
    device = set_device(FLAGS.device)
    fluid.enable_dygraph(device) if FLAGS.dynamic else None
    
D
dengkaipeng 已提交
66 67
    inputs = [Input([None, 1], 'int64', name='img_id'),
              Input([None, 2], 'int32', name='img_shape'),
D
dengkaipeng 已提交
68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109
              Input([None, 3, None, None], 'float32', name='image')]
    labels = [Input([None, NUM_MAX_BOXES, 4], 'float32', name='gt_bbox'),
	      Input([None, NUM_MAX_BOXES], 'int32', name='gt_label'),
	      Input([None, NUM_MAX_BOXES], 'float32', name='gt_score')]

    if not FLAGS.eval_only: # training mode
        train_transform = Compose([ColorDistort(),
                                   RandomExpand(),
                                   RandomCrop(),
                                   RandomFlip(),
                                   NormalizeBox(),
                                   PadBox(),
                                   BboxXYXY2XYWH()])
        train_collate_fn = BatchCompose([RandomShape(), NormalizeImage()])
        dataset = COCODataset(dataset_dir=FLAGS.data,
                              anno_path='annotations/instances_train2017.json',
                              image_dir='train2017',
                              with_background=False,
                              mixup=True,
                              transform=train_transform)
        batch_sampler = DistributedBatchSampler(dataset,
                                                batch_size=FLAGS.batch_size,
                                                shuffle=True,
                                                drop_last=True)
        loader = DataLoader(dataset,
                            batch_sampler=batch_sampler,
                            places=device,
                            num_workers=FLAGS.num_workers,
                            return_list=True,
                            collate_fn=train_collate_fn)
    else: # evaluation mode
        eval_transform = Compose([ResizeImage(target_size=608),
                                  NormalizeBox(),
                                  PadBox(),
                                  BboxXYXY2XYWH()])
        eval_collate_fn = BatchCompose([NormalizeImage()])
        dataset = COCODataset(dataset_dir=FLAGS.data,
                              anno_path='annotations/instances_val2017.json',
                              image_dir='val2017',
                              with_background=False,
                              transform=eval_transform)
        # batch_size can only be 1 in evaluation for YOLOv3
D
dengkaipeng 已提交
110
        # prediction bbox is a LoDTensor
D
dengkaipeng 已提交
111 112 113 114 115 116 117 118 119 120 121
        batch_sampler = DistributedBatchSampler(dataset,
                                                batch_size=1,
                                                shuffle=False,
                                                drop_last=False)
        loader = DataLoader(dataset,
                            batch_sampler=batch_sampler,
                            places=device,
                            num_workers=FLAGS.num_workers,
                            return_list=True,
                            collate_fn=eval_collate_fn)

D
dengkaipeng 已提交
122 123 124 125 126
    pretrained = FLAGS.eval_only and FLAGS.weights is None
    model = yolov3_darknet53(num_classes=dataset.num_classes,
                   model_mode='eval' if FLAGS.eval_only else 'train',
                   pretrained=pretrained)

D
dengkaipeng 已提交
127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146
    if FLAGS.pretrain_weights is not None:
        model.load(FLAGS.pretrain_weights, skip_mismatch=True, reset_optimizer=True)

    optim = make_optimizer(len(batch_sampler), parameter_list=model.parameters())

    model.prepare(optim,
                  YoloLoss(num_classes=dataset.num_classes),
                  inputs=inputs, labels=labels,
                  device=FLAGS.device)

    # NOTE: we implement COCO metric of YOLOv3 model here, separately
    # from 'prepare' and 'fit' framework for follwing reason:
    # 1. YOLOv3 network structure is different between 'train' and
    # 'eval' mode, in 'eval' mode, output prediction bbox is not the
    # feature map used for YoloLoss calculating
    # 2. COCO metric behavior is also different from defined Metric
    # for COCO metric should not perform accumulate in each iteration
    # but only accumulate at the end of an epoch
    if FLAGS.eval_only:
        if FLAGS.weights is not None:
D
dengkaipeng 已提交
147
            model.load(FLAGS.weights, reset_optimizer=True)
D
dengkaipeng 已提交
148
        preds = model.predict(loader, stack_outputs=False)
D
dengkaipeng 已提交
149
        _, _, _, img_ids, bboxes = preds
Y
Yang Zhang 已提交
150

D
dengkaipeng 已提交
151 152 153 154 155 156 157
        anno_path = os.path.join(FLAGS.data, 'annotations/instances_val2017.json')
        coco_metric = COCOMetric(anno_path=anno_path, with_background=False)
        for img_id, bbox in zip(img_ids, bboxes):
            coco_metric.update(img_id, bbox)
        coco_metric.accumulate()
        coco_metric.reset()
        return
Y
Yang Zhang 已提交
158

D
dengkaipeng 已提交
159 160
    if FLAGS.resume is not None:
        model.load(FLAGS.resume)
Y
Yang Zhang 已提交
161

D
dengkaipeng 已提交
162 163 164 165
    model.fit(train_data=loader,
              epochs=FLAGS.epoch - FLAGS.no_mixup_epoch,
              save_dir="yolo_checkpoint/mixup",
              save_freq=10)
Y
Yang Zhang 已提交
166

D
dengkaipeng 已提交
167 168 169 170 171 172
    # do not use image mixup transfrom in laste FLAGS.no_mixup_epoch epoches
    dataset.mixup = False
    model.fit(train_data=loader,
              epochs=FLAGS.no_mixup_epoch,
              save_dir="yolo_checkpoint/no_mixup",
              save_freq=5)
Y
Yang Zhang 已提交
173 174 175


if __name__ == '__main__':
D
dengkaipeng 已提交
176 177 178 179
    parser = argparse.ArgumentParser("Yolov3 Training on VOC")
    parser.add_argument(
        "--data", type=str, default='dataset/voc',
        help="path to dataset directory")
D
dengkaipeng 已提交
180 181
    parser.add_argument(
        "--device", type=str, default='gpu', help="device to use, gpu or cpu")
Y
Yang Zhang 已提交
182 183
    parser.add_argument(
        "-d", "--dynamic", action='store_true', help="enable dygraph mode")
D
dengkaipeng 已提交
184 185
    parser.add_argument(
        "--eval_only", action='store_true', help="run evaluation only")
Y
Yang Zhang 已提交
186 187
    parser.add_argument(
        "-e", "--epoch", default=300, type=int, help="number of epoch")
D
dengkaipeng 已提交
188 189 190
    parser.add_argument(
        "--no_mixup_epoch", default=30, type=int,
        help="number of the last N epoch without image mixup")
Y
Yang Zhang 已提交
191
    parser.add_argument(
Y
Yang Zhang 已提交
192 193
        '--lr', '--learning-rate', default=0.001, type=float, metavar='LR',
        help='initial learning rate')
Y
Yang Zhang 已提交
194
    parser.add_argument(
D
dengkaipeng 已提交
195 196 197
        "-b", "--batch_size", default=8, type=int, help="batch size")
    parser.add_argument(
        "-j", "--num_workers", default=4, type=int, help="reader worker number")
Y
Yang Zhang 已提交
198
    parser.add_argument(
D
dengkaipeng 已提交
199
        "-p", "--pretrain_weights", default=None, type=str,
Y
Yang Zhang 已提交
200
        help="path to pretrained weights")
D
dengkaipeng 已提交
201
    parser.add_argument(
D
dengkaipeng 已提交
202
        "-r", "--resume", default=None, type=str,
D
dengkaipeng 已提交
203
        help="path to model weights")
D
dengkaipeng 已提交
204 205 206
    parser.add_argument(
        "-w", "--weights", default=None, type=str,
        help="path to weights for evaluation")
Y
Yang Zhang 已提交
207
    FLAGS = parser.parse_args()
Y
Yang Zhang 已提交
208
    assert FLAGS.data, "error: must provide data path"
Y
Yang Zhang 已提交
209
    main()