提交 83737851 编写于 作者: L LielinJiang

adapt yolo

上级 353c3260
......@@ -27,7 +27,7 @@ from paddle.io import DataLoader
from hapi.model import Model, Input, set_device
from hapi.distributed import DistributedBatchSampler
from hapi.vision.transforms import Compose, BatchCompose
from hapi.vision.transforms import BatchCompose
from modeling import yolov3_darknet53, YoloLoss
from coco import COCODataset
......@@ -43,10 +43,9 @@ def make_optimizer(step_per_epoch, parameter_list=None):
momentum = 0.9
weight_decay = 5e-4
boundaries = [step_per_epoch * e for e in [200, 250]]
values = [base_lr * (0.1 ** i) for i in range(len(boundaries) + 1)]
values = [base_lr * (0.1**i) for i in range(len(boundaries) + 1)]
learning_rate = fluid.layers.piecewise_decay(
boundaries=boundaries,
values=values)
boundaries=boundaries, values=values)
learning_rate = fluid.layers.linear_lr_warmup(
learning_rate=learning_rate,
warmup_steps=warm_up_iter,
......@@ -63,77 +62,85 @@ def make_optimizer(step_per_epoch, parameter_list=None):
def main():
device = set_device(FLAGS.device)
fluid.enable_dygraph(device) if FLAGS.dynamic else None
inputs = [Input([None, 1], 'int64', name='img_id'),
Input([None, 2], 'int32', name='img_shape'),
Input([None, 3, None, None], 'float32', name='image')]
labels = [Input([None, NUM_MAX_BOXES, 4], 'float32', name='gt_bbox'),
Input([None, NUM_MAX_BOXES], 'int32', name='gt_label'),
Input([None, NUM_MAX_BOXES], 'float32', name='gt_score')]
if not FLAGS.eval_only: # training mode
train_transform = Compose([ColorDistort(),
RandomExpand(),
RandomCrop(),
RandomFlip(),
NormalizeBox(),
PadBox(),
BboxXYXY2XYWH()])
inputs = [
Input(
[None, 1], 'int64', name='img_id'), Input(
[None, 2], 'int32', name='img_shape'), Input(
[None, 3, None, None], 'float32', name='image')
]
labels = [
Input(
[None, NUM_MAX_BOXES, 4], 'float32', name='gt_bbox'), Input(
[None, NUM_MAX_BOXES], 'int32', name='gt_label'), Input(
[None, NUM_MAX_BOXES], 'float32', name='gt_score')
]
if not FLAGS.eval_only: # training mode
train_transform = Compose([
ColorDistort(), RandomExpand(), RandomCrop(), RandomFlip(),
NormalizeBox(), PadBox(), BboxXYXY2XYWH()
])
train_collate_fn = BatchCompose([RandomShape(), NormalizeImage()])
dataset = COCODataset(dataset_dir=FLAGS.data,
anno_path='annotations/instances_train2017.json',
image_dir='train2017',
with_background=False,
mixup=True,
transform=train_transform)
batch_sampler = DistributedBatchSampler(dataset,
batch_size=FLAGS.batch_size,
shuffle=True,
drop_last=True)
loader = DataLoader(dataset,
batch_sampler=batch_sampler,
places=device,
num_workers=FLAGS.num_workers,
return_list=True,
collate_fn=train_collate_fn)
else: # evaluation mode
eval_transform = Compose([ResizeImage(target_size=608),
NormalizeBox(),
PadBox(),
BboxXYXY2XYWH()])
dataset = COCODataset(
dataset_dir=FLAGS.data,
anno_path='annotations/instances_train2017.json',
image_dir='train2017',
with_background=False,
mixup=True,
transform=train_transform)
batch_sampler = DistributedBatchSampler(
dataset, batch_size=FLAGS.batch_size, shuffle=True, drop_last=True)
loader = DataLoader(
dataset,
batch_sampler=batch_sampler,
places=device,
num_workers=FLAGS.num_workers,
return_list=True,
collate_fn=train_collate_fn)
else: # evaluation mode
eval_transform = Compose([
ResizeImage(target_size=608), NormalizeBox(), PadBox(),
BboxXYXY2XYWH()
])
eval_collate_fn = BatchCompose([NormalizeImage()])
dataset = COCODataset(dataset_dir=FLAGS.data,
anno_path='annotations/instances_val2017.json',
image_dir='val2017',
with_background=False,
transform=eval_transform)
dataset = COCODataset(
dataset_dir=FLAGS.data,
anno_path='annotations/instances_val2017.json',
image_dir='val2017',
with_background=False,
transform=eval_transform)
# batch_size can only be 1 in evaluation for YOLOv3
# prediction bbox is a LoDTensor
batch_sampler = DistributedBatchSampler(dataset,
batch_size=1,
shuffle=False,
drop_last=False)
loader = DataLoader(dataset,
batch_sampler=batch_sampler,
places=device,
num_workers=FLAGS.num_workers,
return_list=True,
collate_fn=eval_collate_fn)
batch_sampler = DistributedBatchSampler(
dataset, batch_size=1, shuffle=False, drop_last=False)
loader = DataLoader(
dataset,
batch_sampler=batch_sampler,
places=device,
num_workers=FLAGS.num_workers,
return_list=True,
collate_fn=eval_collate_fn)
pretrained = FLAGS.eval_only and FLAGS.weights is None
model = yolov3_darknet53(num_classes=dataset.num_classes,
model_mode='eval' if FLAGS.eval_only else 'train',
pretrained=pretrained)
model = yolov3_darknet53(
num_classes=dataset.num_classes,
model_mode='eval' if FLAGS.eval_only else 'train',
pretrained=pretrained)
if FLAGS.pretrain_weights and not FLAGS.eval_only:
model.load(FLAGS.pretrain_weights, skip_mismatch=True, reset_optimizer=True)
model.load(
FLAGS.pretrain_weights, skip_mismatch=True, reset_optimizer=True)
optim = make_optimizer(len(batch_sampler), parameter_list=model.parameters())
optim = make_optimizer(
len(batch_sampler), parameter_list=model.parameters())
model.prepare(optim,
YoloLoss(num_classes=dataset.num_classes),
inputs=inputs, labels=labels,
device=FLAGS.device)
model.prepare(
optim,
YoloLoss(num_classes=dataset.num_classes),
inputs=inputs,
labels=labels,
device=FLAGS.device)
# NOTE: we implement COCO metric of YOLOv3 model here, separately
# from 'prepare' and 'fit' framework for follwing reason:
......@@ -149,7 +156,8 @@ def main():
preds = model.predict(loader, stack_outputs=False)
_, _, _, img_ids, bboxes = preds
anno_path = os.path.join(FLAGS.data, 'annotations/instances_val2017.json')
anno_path = os.path.join(FLAGS.data,
'annotations/instances_val2017.json')
coco_metric = COCOMetric(anno_path=anno_path, with_background=False)
for img_id, bbox in zip(img_ids, bboxes):
coco_metric.update(img_id, bbox)
......@@ -176,7 +184,9 @@ def main():
if __name__ == '__main__':
parser = argparse.ArgumentParser("Yolov3 Training on VOC")
parser.add_argument(
"--data", type=str, default='dataset/voc',
"--data",
type=str,
default='dataset/voc',
help="path to dataset directory")
parser.add_argument(
"--device", type=str, default='gpu', help="device to use, gpu or cpu")
......@@ -187,23 +197,38 @@ if __name__ == '__main__':
parser.add_argument(
"-e", "--epoch", default=300, type=int, help="number of epoch")
parser.add_argument(
"--no_mixup_epoch", default=30, type=int,
"--no_mixup_epoch",
default=30,
type=int,
help="number of the last N epoch without image mixup")
parser.add_argument(
'--lr', '--learning-rate', default=0.001, type=float, metavar='LR',
'--lr',
'--learning-rate',
default=0.001,
type=float,
metavar='LR',
help='initial learning rate')
parser.add_argument(
"-b", "--batch_size", default=8, type=int, help="batch size")
parser.add_argument(
"-j", "--num_workers", default=4, type=int, help="reader worker number")
"-j",
"--num_workers",
default=4,
type=int,
help="reader worker number")
parser.add_argument(
"-p", "--pretrain_weights", default=None, type=str,
"-p",
"--pretrain_weights",
default=None,
type=str,
help="path to pretrained weights")
parser.add_argument(
"-r", "--resume", default=None, type=str,
help="path to model weights")
"-r", "--resume", default=None, type=str, help="path to model weights")
parser.add_argument(
"-w", "--weights", default=None, type=str,
"-w",
"--weights",
default=None,
type=str,
help="path to weights for evaluation")
FLAGS = parser.parse_args()
assert FLAGS.data, "error: must provide data path"
......
......@@ -73,6 +73,7 @@ class ConvBNLayer(fluid.dygraph.Layer):
out = fluid.layers.leaky_relu(x=out, alpha=0.1)
return out
class YoloDetectionBlock(fluid.dygraph.Layer):
def __init__(self, ch_in, channel):
super(YoloDetectionBlock, self).__init__()
......@@ -81,38 +82,34 @@ class YoloDetectionBlock(fluid.dygraph.Layer):
"channel {} cannot be divided by 2".format(channel)
self.conv0 = ConvBNLayer(
ch_in=ch_in,
ch_out=channel,
filter_size=1,
stride=1,
padding=0)
ch_in=ch_in, ch_out=channel, filter_size=1, stride=1, padding=0)
self.conv1 = ConvBNLayer(
ch_in=channel,
ch_out=channel*2,
ch_out=channel * 2,
filter_size=3,
stride=1,
padding=1)
self.conv2 = ConvBNLayer(
ch_in=channel*2,
ch_in=channel * 2,
ch_out=channel,
filter_size=1,
stride=1,
padding=0)
self.conv3 = ConvBNLayer(
ch_in=channel,
ch_out=channel*2,
ch_out=channel * 2,
filter_size=3,
stride=1,
padding=1)
self.route = ConvBNLayer(
ch_in=channel*2,
ch_in=channel * 2,
ch_out=channel,
filter_size=1,
stride=1,
padding=0)
self.tip = ConvBNLayer(
ch_in=channel,
ch_out=channel*2,
ch_out=channel * 2,
filter_size=3,
stride=1,
padding=1)
......@@ -149,8 +146,10 @@ class YOLOv3(Model):
"model_mode should be 'train' 'eval' or 'test', but got " \
"{}".format(model_mode)
self.model_mode = str.lower(model_mode)
self.anchors = [10, 13, 16, 30, 33, 23, 30, 61, 62, 45,
59, 119, 116, 90, 156, 198, 373, 326]
self.anchors = [
10, 13, 16, 30, 33, 23, 30, 61, 62, 45, 59, 119, 116, 90, 156, 198,
373, 326
]
self.anchor_masks = [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
self.valid_thresh = 0.005
self.nms_thresh = 0.45
......@@ -158,7 +157,10 @@ class YOLOv3(Model):
self.nms_posk = 100
self.draw_thresh = 0.5
self.backbone = darknet53(pretrained=(model_mode=='train'))
self.backbone = darknet53(
pretrained=(model_mode == 'train'),
with_pool=False,
num_classes=-1)
self.block_outputs = []
self.yolo_blocks = []
self.route_blocks = []
......@@ -173,32 +175,46 @@ class YOLOv3(Model):
block_out = self.add_sublayer(
"block_out_{}".format(idx),
Conv2D(num_channels=1024 // (2**idx),
num_filters=num_filters,
filter_size=1,
act=None,
param_attr=ParamAttr(
initializer=fluid.initializer.Normal(0., 0.02)),
bias_attr=ParamAttr(
initializer=fluid.initializer.Constant(0.0),
regularizer=L2Decay(0.))))
Conv2D(
num_channels=1024 // (2**idx),
num_filters=num_filters,
filter_size=1,
act=None,
param_attr=ParamAttr(
initializer=fluid.initializer.Normal(0., 0.02)),
bias_attr=ParamAttr(
initializer=fluid.initializer.Constant(0.0),
regularizer=L2Decay(0.))))
self.block_outputs.append(block_out)
if idx < 2:
route = self.add_sublayer(
"route2_{}".format(idx),
ConvBNLayer(ch_in=512 // (2**idx),
ch_out=256 // (2**idx),
filter_size=1,
act='leaky_relu'))
ConvBNLayer(
ch_in=512 // (2**idx),
ch_out=256 // (2**idx),
filter_size=1,
act='leaky_relu'))
self.route_blocks.append(route)
def extract_feats(self, inputs):
out = self.backbone.conv0(inputs)
out = self.backbone.downsample0(out)
blocks = []
for i, conv_block_i in enumerate(
self.backbone.darknet53_conv_block_list):
out = conv_block_i(out)
blocks.append(out)
if i < len(self.backbone.stages) - 1:
out = self.backbone.downsample_list[i](out)
return blocks[-1:-4:-1]
def forward(self, img_id, img_shape, inputs):
outputs = []
boxes = []
scores = []
downsample = 32
feats = self.backbone(inputs)
feats = self.extract_feats(inputs)
route = None
for idx, feat in enumerate(feats):
if idx > 0:
......@@ -233,15 +249,18 @@ class YOLOv3(Model):
if self.model_mode == 'train':
return outputs
preds = [img_id,
fluid.layers.multiclass_nms(
bboxes=fluid.layers.concat(boxes, axis=1),
scores=fluid.layers.concat(scores, axis=2),
score_threshold=self.valid_thresh,
nms_top_k=self.nms_topk,
keep_top_k=self.nms_posk,
nms_threshold=self.nms_thresh,
background_label=-1)]
preds = [
img_id, fluid.layers.multiclass_nms(
bboxes=fluid.layers.concat(
boxes, axis=1),
scores=fluid.layers.concat(
scores, axis=2),
score_threshold=self.valid_thresh,
nms_top_k=self.nms_topk,
keep_top_k=self.nms_posk,
nms_threshold=self.nms_thresh,
background_label=-1)
]
if self.model_mode == 'test':
return preds
......@@ -249,14 +268,17 @@ class YOLOv3(Model):
# model_mode == "eval"
return outputs + preds
class YoloLoss(Loss):
def __init__(self, num_classes=80, num_max_boxes=50):
super(YoloLoss, self).__init__()
self.num_classes = num_classes
self.num_max_boxes = num_max_boxes
self.ignore_thresh = 0.7
self.anchors = [10, 13, 16, 30, 33, 23, 30, 61, 62, 45,
59, 119, 116, 90, 156, 198, 373, 326]
self.anchors = [
10, 13, 16, 30, 33, 23, 30, 61, 62, 45, 59, 119, 116, 90, 156, 198,
373, 326
]
self.anchor_masks = [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
def forward(self, outputs, labels):
......@@ -265,7 +287,7 @@ class YoloLoss(Loss):
losses = []
for idx, out in enumerate(outputs):
if idx == 3: break # debug
if idx == 3: break # debug
anchor_mask = self.anchor_masks[idx]
loss = fluid.layers.yolov3_loss(
x=out,
......@@ -284,8 +306,10 @@ class YoloLoss(Loss):
return losses
def _yolov3_darknet(num_layers=53, num_classes=80,
model_mode='train', pretrained=True):
def _yolov3_darknet(num_layers=53,
num_classes=80,
model_mode='train',
pretrained=True):
model = YOLOv3(num_classes, model_mode)
if pretrained:
assert num_layers in pretrain_infos.keys(), \
......
......@@ -20,6 +20,7 @@ import traceback
import numpy as np
__all__ = [
"Compose",
'ColorDistort',
'RandomExpand',
'RandomCrop',
......@@ -33,6 +34,37 @@ __all__ = [
]
class Compose(object):
"""Composes several transforms together.
Args:
transforms (list of ``Transform`` objects): list of transforms to compose.
"""
def __init__(self, transforms):
self.transforms = transforms
def __call__(self, *data):
for f in self.transforms:
try:
data = f(*data)
except Exception as e:
stack_info = traceback.format_exc()
print("fail to perform transform [{}] with error: "
"{} and stack:\n{}".format(f, e, str(stack_info)))
raise e
return data
def __repr__(self):
format_string = self.__class__.__name__ + '('
for t in self.transforms:
format_string += '\n'
format_string += ' {0}'.format(t)
format_string += '\n)'
return format_string
class ColorDistort(object):
"""Random color distortion.
......@@ -147,7 +179,10 @@ class RandomExpand(object):
fill_value (list): color value used to fill the canvas. in RGB order.
"""
def __init__(self, ratio=4., prob=0.5, fill_value=[123.675, 116.28, 103.53]):
def __init__(self,
ratio=4.,
prob=0.5,
fill_value=[123.675, 116.28, 103.53]):
assert ratio > 1.01, "expand ratio must be larger than 1.01"
self.ratio = ratio
self.prob = prob
......@@ -493,8 +528,7 @@ def _crop_box_with_center_constraint(box, crop):
cropped_box[:, :2] -= crop[:2]
cropped_box[:, 2:] -= crop[:2]
centers = (box[:, :2] + box[:, 2:]) / 2
valid = np.logical_and(
crop[:2] <= centers, centers < crop[2:]).all(axis=1)
valid = np.logical_and(crop[:2] <= centers, centers < crop[2:]).all(axis=1)
valid = np.logical_and(
valid, (cropped_box[:, :2] < cropped_box[:, 2:]).all(axis=1))
return cropped_box, np.where(valid)[0]
......@@ -517,8 +551,8 @@ def random_crop(inputs):
for i in range(50):
scale = np.random.uniform(*scaling)
min_ar, max_ar = aspect_ratios
ar = np.random.uniform(max(min_ar, scale**2),
min(max_ar, scale**-2))
ar = np.random.uniform(
max(min_ar, scale**2), min(max_ar, scale**-2))
crop_h = int(h * scale / np.sqrt(ar))
crop_w = int(w * scale * np.sqrt(ar))
crop_y = np.random.randint(0, h - crop_h)
......@@ -529,7 +563,8 @@ def random_crop(inputs):
continue
cropped_box, valid_ids = _crop_box_with_center_constraint(
gt_box, np.array(crop_box, dtype=np.float32))
gt_box, np.array(
crop_box, dtype=np.float32))
if valid_ids.size > 0:
found = True
break
......@@ -545,9 +580,7 @@ def random_crop(inputs):
class ResizeImage(object):
def __init__(self,
target_size=0,
interp=cv2.INTER_CUBIC):
def __init__(self, target_size=0, interp=cv2.INTER_CUBIC):
"""
Rescale image to the specified target size.
If target_size is list, selected a scale randomly as the specified
......@@ -574,8 +607,8 @@ class ResizeImage(object):
raise ImageError('{}: image is not 3-dimensional.'.format(self))
im_scale_x = float(self.target_size) / float(im.shape[1])
im_scale_y = float(self.target_size) / float(im.shape[0])
resize_w = self.target_size
resize_h = self.target_size
resize_w = self.target_size
resize_h = self.target_size
im = cv2.resize(
im,
......@@ -586,4 +619,3 @@ class ResizeImage(object):
interpolation=self.interp)
return [im_id, im_shape, im, gt_bbox, gt_class, gt_score]
......@@ -1184,9 +1184,9 @@ class Model(fluid.dygraph.Layer):
outputs = []
count = 0
for i, data in tqdm.tqdm(enumerate(loader)):
for data in tqdm.tqdm(loader):
data = flatten(data)
out = to_list(self.test(data[:len(self._inputs)]))
out = to_list(self.test_batch(data[:len(self._inputs)]))
outputs.append(out)
count += out[0].shape[0]
......@@ -1199,15 +1199,10 @@ class Model(fluid.dygraph.Layer):
# NOTE: for lod tensor output, we should not stack outputs
# for stacking may loss its detail info
outputs = list(zip(*outputs))
if stack_outputs:
stack_outs = []
for i in range(len(outputs[0])):
split_outs = []
for out in outputs:
split_outs.append(out[i])
stack_outs.append(np.vstack(split_outs))
outputs = stack_outs
outputs = [np.vstack(outs) for outs in outputs]
self._test_dataloader = None
......
......@@ -156,7 +156,7 @@ class DarkNet(Model):
.format(DarkNet_cfg.keys())
self.stages = DarkNet_cfg[num_layers]
self.stages = self.stages[0:5]
self.num_classes = 1000
self.num_classes = num_classes
self.with_pool = True
ch_in = 3
self.conv0 = ConvBNLayer(
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册