main.py 8.7 KB
Newer Older
D
dengkaipeng 已提交
1
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Y
Yang Zhang 已提交
2 3 4 5 6 7 8 9 10 11 12 13 14 15
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import division
Y
Yang Zhang 已提交
16
from __future__ import print_function
Y
Yang Zhang 已提交
17 18 19 20 21 22 23

import argparse
import contextlib
import os

import numpy as np

D
dengkaipeng 已提交
24 25 26
from paddle import fluid
from paddle.fluid.optimizer import Momentum
from paddle.fluid.io import DataLoader
Y
Yang Zhang 已提交
27

D
dengkaipeng 已提交
28 29
from model import Model, Input, set_device
from distributed import DistributedBatchSampler
D
dengkaipeng 已提交
30 31 32 33
from modeling import yolov3_darknet53, YoloLoss
from coco_metric import COCOMetric
from coco import COCODataset
from transforms import *
D
dengkaipeng 已提交
34

D
dengkaipeng 已提交
35
NUM_MAX_BOXES = 50
Y
Yang Zhang 已提交
36 37


D
dengkaipeng 已提交
38
def make_optimizer(step_per_epoch, parameter_list=None):
Y
Yang Zhang 已提交
39
    base_lr = FLAGS.lr
D
dengkaipeng 已提交
40
    warm_up_iter = 1000
Y
Yang Zhang 已提交
41 42
    momentum = 0.9
    weight_decay = 5e-4
D
dengkaipeng 已提交
43
    boundaries = [step_per_epoch * e for e in [200, 250]]
Y
Yang Zhang 已提交
44
    values = [base_lr * (0.1 ** i) for i in range(len(boundaries) + 1)]
Y
Yang Zhang 已提交
45
    learning_rate = fluid.layers.piecewise_decay(
Y
Yang Zhang 已提交
46 47
        boundaries=boundaries,
        values=values)
Y
Yang Zhang 已提交
48 49
    learning_rate = fluid.layers.linear_lr_warmup(
        learning_rate=learning_rate,
Y
Yang Zhang 已提交
50 51 52 53
        warmup_steps=warm_up_iter,
        start_lr=0.0,
        end_lr=base_lr)
    optimizer = fluid.optimizer.Momentum(
Y
Yang Zhang 已提交
54
        learning_rate=learning_rate,
Y
Yang Zhang 已提交
55 56 57 58 59 60 61
        regularization=fluid.regularizer.L2Decay(weight_decay),
        momentum=momentum,
        parameter_list=parameter_list)
    return optimizer


def main():
D
dengkaipeng 已提交
62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109
    device = set_device(FLAGS.device)
    fluid.enable_dygraph(device) if FLAGS.dynamic else None
    
    inputs = [Input([None, 3], 'int32', name='img_info'),
              Input([None, 3, None, None], 'float32', name='image')]
    labels = [Input([None, NUM_MAX_BOXES, 4], 'float32', name='gt_bbox'),
	      Input([None, NUM_MAX_BOXES], 'int32', name='gt_label'),
	      Input([None, NUM_MAX_BOXES], 'float32', name='gt_score')]

    if not FLAGS.eval_only: # training mode
        train_transform = Compose([ColorDistort(),
                                   RandomExpand(),
                                   RandomCrop(),
                                   RandomFlip(),
                                   NormalizeBox(),
                                   PadBox(),
                                   BboxXYXY2XYWH()])
        train_collate_fn = BatchCompose([RandomShape(), NormalizeImage()])
        dataset = COCODataset(dataset_dir=FLAGS.data,
                              anno_path='annotations/instances_train2017.json',
                              image_dir='train2017',
                              with_background=False,
                              mixup=True,
                              transform=train_transform)
        batch_sampler = DistributedBatchSampler(dataset,
                                                batch_size=FLAGS.batch_size,
                                                shuffle=True,
                                                drop_last=True)
        loader = DataLoader(dataset,
                            batch_sampler=batch_sampler,
                            places=device,
                            feed_list=[i.forward() for i in inputs + labels] \
                                        if not FLAGS.dynamic else None,
                            num_workers=FLAGS.num_workers,
                            return_list=True,
                            collate_fn=train_collate_fn)
    else: # evaluation mode
        eval_transform = Compose([ResizeImage(target_size=608),
                                  NormalizeBox(),
                                  PadBox(),
                                  BboxXYXY2XYWH()])
        eval_collate_fn = BatchCompose([NormalizeImage()])
        dataset = COCODataset(dataset_dir=FLAGS.data,
                              anno_path='annotations/instances_val2017.json',
                              image_dir='val2017',
                              with_background=False,
                              transform=eval_transform)
        # batch_size can only be 1 in evaluation for YOLOv3
D
dengkaipeng 已提交
110
        # prediction bbox is a LoDTensor
D
dengkaipeng 已提交
111 112 113 114 115 116 117 118 119 120 121 122 123
        batch_sampler = DistributedBatchSampler(dataset,
                                                batch_size=1,
                                                shuffle=False,
                                                drop_last=False)
        loader = DataLoader(dataset,
                            batch_sampler=batch_sampler,
                            places=device,
                            feed_list=[i.forward() for i in inputs + labels] \
                                        if not FLAGS.dynamic else None,
                            num_workers=FLAGS.num_workers,
                            return_list=True,
                            collate_fn=eval_collate_fn)

D
dengkaipeng 已提交
124 125 126 127 128
    pretrained = FLAGS.eval_only and FLAGS.weights is None
    model = yolov3_darknet53(num_classes=dataset.num_classes,
                   model_mode='eval' if FLAGS.eval_only else 'train',
                   pretrained=pretrained)

D
dengkaipeng 已提交
129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148
    if FLAGS.pretrain_weights is not None:
        model.load(FLAGS.pretrain_weights, skip_mismatch=True, reset_optimizer=True)

    optim = make_optimizer(len(batch_sampler), parameter_list=model.parameters())

    model.prepare(optim,
                  YoloLoss(num_classes=dataset.num_classes),
                  inputs=inputs, labels=labels,
                  device=FLAGS.device)

    # NOTE: we implement COCO metric of YOLOv3 model here, separately
    # from 'prepare' and 'fit' framework for follwing reason:
    # 1. YOLOv3 network structure is different between 'train' and
    # 'eval' mode, in 'eval' mode, output prediction bbox is not the
    # feature map used for YoloLoss calculating
    # 2. COCO metric behavior is also different from defined Metric
    # for COCO metric should not perform accumulate in each iteration
    # but only accumulate at the end of an epoch
    if FLAGS.eval_only:
        if FLAGS.weights is not None:
D
dengkaipeng 已提交
149
            model.load(FLAGS.weights, reset_optimizer=True)
D
dengkaipeng 已提交
150
        preds = model.predict(loader, stack_outputs=False)
D
dengkaipeng 已提交
151
        _, _, _, img_ids, bboxes = preds
Y
Yang Zhang 已提交
152

D
dengkaipeng 已提交
153 154 155 156 157 158 159
        anno_path = os.path.join(FLAGS.data, 'annotations/instances_val2017.json')
        coco_metric = COCOMetric(anno_path=anno_path, with_background=False)
        for img_id, bbox in zip(img_ids, bboxes):
            coco_metric.update(img_id, bbox)
        coco_metric.accumulate()
        coco_metric.reset()
        return
Y
Yang Zhang 已提交
160

D
dengkaipeng 已提交
161 162
    if FLAGS.resume is not None:
        model.load(FLAGS.resume)
Y
Yang Zhang 已提交
163

D
dengkaipeng 已提交
164 165 166 167
    model.fit(train_data=loader,
              epochs=FLAGS.epoch - FLAGS.no_mixup_epoch,
              save_dir="yolo_checkpoint/mixup",
              save_freq=10)
Y
Yang Zhang 已提交
168

D
dengkaipeng 已提交
169 170 171 172 173 174
    # do not use image mixup transfrom in laste FLAGS.no_mixup_epoch epoches
    dataset.mixup = False
    model.fit(train_data=loader,
              epochs=FLAGS.no_mixup_epoch,
              save_dir="yolo_checkpoint/no_mixup",
              save_freq=5)
Y
Yang Zhang 已提交
175 176 177


if __name__ == '__main__':
D
dengkaipeng 已提交
178 179 180 181
    parser = argparse.ArgumentParser("Yolov3 Training on VOC")
    parser.add_argument(
        "--data", type=str, default='dataset/voc',
        help="path to dataset directory")
D
dengkaipeng 已提交
182 183
    parser.add_argument(
        "--device", type=str, default='gpu', help="device to use, gpu or cpu")
Y
Yang Zhang 已提交
184 185
    parser.add_argument(
        "-d", "--dynamic", action='store_true', help="enable dygraph mode")
D
dengkaipeng 已提交
186 187
    parser.add_argument(
        "--eval_only", action='store_true', help="run evaluation only")
Y
Yang Zhang 已提交
188 189
    parser.add_argument(
        "-e", "--epoch", default=300, type=int, help="number of epoch")
D
dengkaipeng 已提交
190 191 192
    parser.add_argument(
        "--no_mixup_epoch", default=30, type=int,
        help="number of the last N epoch without image mixup")
Y
Yang Zhang 已提交
193
    parser.add_argument(
Y
Yang Zhang 已提交
194 195
        '--lr', '--learning-rate', default=0.001, type=float, metavar='LR',
        help='initial learning rate')
Y
Yang Zhang 已提交
196
    parser.add_argument(
D
dengkaipeng 已提交
197 198 199
        "-b", "--batch_size", default=8, type=int, help="batch size")
    parser.add_argument(
        "-j", "--num_workers", default=4, type=int, help="reader worker number")
Y
Yang Zhang 已提交
200
    parser.add_argument(
D
dengkaipeng 已提交
201
        "-p", "--pretrain_weights", default=None, type=str,
Y
Yang Zhang 已提交
202
        help="path to pretrained weights")
D
dengkaipeng 已提交
203
    parser.add_argument(
D
dengkaipeng 已提交
204
        "-r", "--resume", default=None, type=str,
D
dengkaipeng 已提交
205
        help="path to model weights")
D
dengkaipeng 已提交
206 207 208
    parser.add_argument(
        "-w", "--weights", default=None, type=str,
        help="path to weights for evaluation")
Y
Yang Zhang 已提交
209
    FLAGS = parser.parse_args()
Y
Yang Zhang 已提交
210
    assert FLAGS.data, "error: must provide data path"
Y
Yang Zhang 已提交
211
    main()