未验证 提交 2458c1fb 编写于 作者: L LielinJiang 提交者: GitHub

Merge pull request #49 from LielinJiang/lenet

Add lenet
...@@ -85,8 +85,9 @@ CUDA_VISIBLE_DEVICES=0,1,2,3 python -m paddle.distributed.launch main.py --arch ...@@ -85,8 +85,9 @@ CUDA_VISIBLE_DEVICES=0,1,2,3 python -m paddle.distributed.launch main.py --arch
| [vgg16](https://paddle-hapi.bj.bcebos.com/models/vgg16.pdparams) | 71.92 | 90.65 | | [vgg16](https://paddle-hapi.bj.bcebos.com/models/vgg16.pdparams) | 71.92 | 90.65 |
| [mobilenet_v1](https://paddle-hapi.bj.bcebos.com/models/mobilenet_v1_x1.0.pdparams) | 71.16 | 89.89 | | [mobilenet_v1](https://paddle-hapi.bj.bcebos.com/models/mobilenet_v1_x1.0.pdparams) | 71.16 | 89.89 |
| [mobilenet_v2](https://paddle-hapi.bj.bcebos.com/models/mobilenet_v2_x1.0.pdparams) | 72.30 | 90.74 | | [mobilenet_v2](https://paddle-hapi.bj.bcebos.com/models/mobilenet_v2_x1.0.pdparams) | 72.30 | 90.74 |
| [darknet53](https://paddle-hapi.bj.bcebos.com/models/darknet53.pdparams) | 78.43 | 94.24 |
上述模型的复现参数请参考scripts下的脚本。 上述部分模型的复现参数请参考scripts下的脚本。需要注意的是darknet要使用image size为256的输入来预测, 即```--image-size 256```
## 参考文献 ## 参考文献
......
...@@ -24,7 +24,11 @@ from paddle import fluid ...@@ -24,7 +24,11 @@ from paddle import fluid
class ImageNetDataset(DatasetFolder): class ImageNetDataset(DatasetFolder):
def __init__(self, path, mode='train'): def __init__(self,
path,
mode='train',
image_size=224,
resize_short_size=256):
super(ImageNetDataset, self).__init__(path) super(ImageNetDataset, self).__init__(path)
self.mode = mode self.mode = mode
...@@ -32,13 +36,14 @@ class ImageNetDataset(DatasetFolder): ...@@ -32,13 +36,14 @@ class ImageNetDataset(DatasetFolder):
mean=[123.675, 116.28, 103.53], std=[58.395, 57.120, 57.375]) mean=[123.675, 116.28, 103.53], std=[58.395, 57.120, 57.375])
if self.mode == 'train': if self.mode == 'train':
self.transform = transforms.Compose([ self.transform = transforms.Compose([
transforms.RandomResizedCrop(224), transforms.RandomResizedCrop(image_size),
transforms.RandomHorizontalFlip(), transforms.RandomHorizontalFlip(),
transforms.Permute(mode='CHW'), normalize transforms.Permute(mode='CHW'), normalize
]) ])
else: else:
self.transform = transforms.Compose([ self.transform = transforms.Compose([
transforms.Resize(256), transforms.CenterCrop(224), transforms.Resize(resize_short_size),
transforms.CenterCrop(image_size),
transforms.Permute(mode='CHW'), normalize transforms.Permute(mode='CHW'), normalize
]) ])
...@@ -46,7 +51,7 @@ class ImageNetDataset(DatasetFolder): ...@@ -46,7 +51,7 @@ class ImageNetDataset(DatasetFolder):
img_path, label = self.samples[idx] img_path, label = self.samples[idx]
img = cv2.imread(img_path).astype(np.float32) img = cv2.imread(img_path).astype(np.float32)
label = np.array([label]) label = np.array([label])
return self.transform(img, label) return self.transform(img), label
def __len__(self): def __len__(self):
return len(self.samples) return len(self.samples)
...@@ -18,8 +18,6 @@ from __future__ import print_function ...@@ -18,8 +18,6 @@ from __future__ import print_function
import argparse import argparse
import contextlib import contextlib
import os import os
import sys
sys.path.append('../')
import time import time
import math import math
...@@ -89,8 +87,16 @@ def main(): ...@@ -89,8 +87,16 @@ def main():
labels = [Input([None, 1], 'int64', name='label')] labels = [Input([None, 1], 'int64', name='label')]
train_dataset = ImageNetDataset( train_dataset = ImageNetDataset(
os.path.join(FLAGS.data, 'train'), mode='train') os.path.join(FLAGS.data, 'train'),
val_dataset = ImageNetDataset(os.path.join(FLAGS.data, 'val'), mode='val') mode='train',
image_size=FLAGS.image_size,
resize_short_size=FLAGS.resize_short_size)
val_dataset = ImageNetDataset(
os.path.join(FLAGS.data, 'val'),
mode='val',
image_size=FLAGS.image_size,
resize_short_size=FLAGS.resize_short_size)
optim = make_optimizer( optim = make_optimizer(
np.ceil( np.ceil(
...@@ -176,6 +182,13 @@ if __name__ == '__main__': ...@@ -176,6 +182,13 @@ if __name__ == '__main__':
parser.add_argument( parser.add_argument(
"--weight-decay", default=1e-4, type=float, help="weight decay") "--weight-decay", default=1e-4, type=float, help="weight decay")
parser.add_argument("--momentum", default=0.9, type=float, help="momentum") parser.add_argument("--momentum", default=0.9, type=float, help="momentum")
parser.add_argument(
"--image-size", default=224, type=int, help="intput image size")
parser.add_argument(
"--resize-short-size",
default=256,
type=int,
help="short size of keeping ratio resize")
FLAGS = parser.parse_args() FLAGS = parser.parse_args()
assert FLAGS.data, "error: must provide data path" assert FLAGS.data, "error: must provide data path"
main() main()
...@@ -27,7 +27,7 @@ from paddle.io import DataLoader ...@@ -27,7 +27,7 @@ from paddle.io import DataLoader
from hapi.model import Model, Input, set_device from hapi.model import Model, Input, set_device
from hapi.distributed import DistributedBatchSampler from hapi.distributed import DistributedBatchSampler
from hapi.vision.transforms import Compose, BatchCompose from hapi.vision.transforms import BatchCompose
from modeling import yolov3_darknet53, YoloLoss from modeling import yolov3_darknet53, YoloLoss
from coco import COCODataset from coco import COCODataset
...@@ -43,10 +43,9 @@ def make_optimizer(step_per_epoch, parameter_list=None): ...@@ -43,10 +43,9 @@ def make_optimizer(step_per_epoch, parameter_list=None):
momentum = 0.9 momentum = 0.9
weight_decay = 5e-4 weight_decay = 5e-4
boundaries = [step_per_epoch * e for e in [200, 250]] boundaries = [step_per_epoch * e for e in [200, 250]]
values = [base_lr * (0.1 ** i) for i in range(len(boundaries) + 1)] values = [base_lr * (0.1**i) for i in range(len(boundaries) + 1)]
learning_rate = fluid.layers.piecewise_decay( learning_rate = fluid.layers.piecewise_decay(
boundaries=boundaries, boundaries=boundaries, values=values)
values=values)
learning_rate = fluid.layers.linear_lr_warmup( learning_rate = fluid.layers.linear_lr_warmup(
learning_rate=learning_rate, learning_rate=learning_rate,
warmup_steps=warm_up_iter, warmup_steps=warm_up_iter,
...@@ -63,77 +62,88 @@ def make_optimizer(step_per_epoch, parameter_list=None): ...@@ -63,77 +62,88 @@ def make_optimizer(step_per_epoch, parameter_list=None):
def main(): def main():
device = set_device(FLAGS.device) device = set_device(FLAGS.device)
fluid.enable_dygraph(device) if FLAGS.dynamic else None fluid.enable_dygraph(device) if FLAGS.dynamic else None
inputs = [Input([None, 1], 'int64', name='img_id'), inputs = [
Input([None, 2], 'int32', name='img_shape'), Input(
Input([None, 3, None, None], 'float32', name='image')] [None, 1], 'int64', name='img_id'), Input(
labels = [Input([None, NUM_MAX_BOXES, 4], 'float32', name='gt_bbox'), [None, 2], 'int32', name='img_shape'), Input(
Input([None, NUM_MAX_BOXES], 'int32', name='gt_label'), [None, 3, None, None], 'float32', name='image')
Input([None, NUM_MAX_BOXES], 'float32', name='gt_score')] ]
if not FLAGS.eval_only: # training mode labels = [
train_transform = Compose([ColorDistort(), Input(
RandomExpand(), [None, NUM_MAX_BOXES, 4], 'float32', name='gt_bbox'), Input(
RandomCrop(), [None, NUM_MAX_BOXES], 'int32', name='gt_label'), Input(
RandomFlip(), [None, NUM_MAX_BOXES], 'float32', name='gt_score')
NormalizeBox(), ]
PadBox(),
BboxXYXY2XYWH()]) if not FLAGS.eval_only: # training mode
train_transform = Compose([
ColorDistort(), RandomExpand(), RandomCrop(), RandomFlip(),
NormalizeBox(), PadBox(), BboxXYXY2XYWH()
])
train_collate_fn = BatchCompose([RandomShape(), NormalizeImage()]) train_collate_fn = BatchCompose([RandomShape(), NormalizeImage()])
dataset = COCODataset(dataset_dir=FLAGS.data, dataset = COCODataset(
anno_path='annotations/instances_train2017.json', dataset_dir=FLAGS.data,
image_dir='train2017', anno_path='annotations/instances_train2017.json',
with_background=False, image_dir='train2017',
mixup=True, with_background=False,
transform=train_transform) mixup=True,
batch_sampler = DistributedBatchSampler(dataset, transform=train_transform)
batch_size=FLAGS.batch_size, batch_sampler = DistributedBatchSampler(
shuffle=True, dataset, batch_size=FLAGS.batch_size, shuffle=True, drop_last=True)
drop_last=True) loader = DataLoader(
loader = DataLoader(dataset, dataset,
batch_sampler=batch_sampler, batch_sampler=batch_sampler,
places=device, places=device,
num_workers=FLAGS.num_workers, num_workers=FLAGS.num_workers,
return_list=True, return_list=True,
collate_fn=train_collate_fn) collate_fn=train_collate_fn)
else: # evaluation mode else: # evaluation mode
eval_transform = Compose([ResizeImage(target_size=608), eval_transform = Compose([
NormalizeBox(), ResizeImage(target_size=608), NormalizeBox(), PadBox(),
PadBox(), BboxXYXY2XYWH()
BboxXYXY2XYWH()]) ])
eval_collate_fn = BatchCompose([NormalizeImage()]) eval_collate_fn = BatchCompose([NormalizeImage()])
dataset = COCODataset(dataset_dir=FLAGS.data, dataset = COCODataset(
anno_path='annotations/instances_val2017.json', dataset_dir=FLAGS.data,
image_dir='val2017', anno_path='annotations/instances_val2017.json',
with_background=False, image_dir='val2017',
transform=eval_transform) with_background=False,
transform=eval_transform)
# batch_size can only be 1 in evaluation for YOLOv3 # batch_size can only be 1 in evaluation for YOLOv3
# prediction bbox is a LoDTensor # prediction bbox is a LoDTensor
batch_sampler = DistributedBatchSampler(dataset, batch_sampler = DistributedBatchSampler(
batch_size=1, dataset, batch_size=1, shuffle=False, drop_last=False)
shuffle=False, loader = DataLoader(
drop_last=False) dataset,
loader = DataLoader(dataset, batch_sampler=batch_sampler,
batch_sampler=batch_sampler, places=device,
places=device, num_workers=FLAGS.num_workers,
num_workers=FLAGS.num_workers, return_list=True,
return_list=True, collate_fn=eval_collate_fn)
collate_fn=eval_collate_fn)
pretrained = FLAGS.eval_only and FLAGS.weights is None pretrained = FLAGS.eval_only and FLAGS.weights is None
model = yolov3_darknet53(num_classes=dataset.num_classes, model = yolov3_darknet53(
model_mode='eval' if FLAGS.eval_only else 'train', num_classes=dataset.num_classes,
pretrained=pretrained) model_mode='eval' if FLAGS.eval_only else 'train',
pretrained=pretrained)
if FLAGS.pretrain_weights and not FLAGS.eval_only: if FLAGS.pretrain_weights and not FLAGS.eval_only:
model.load(FLAGS.pretrain_weights, skip_mismatch=True, reset_optimizer=True) model.load(
FLAGS.pretrain_weights, skip_mismatch=True, reset_optimizer=True)
optim = make_optimizer(len(batch_sampler), parameter_list=model.parameters()) optim = make_optimizer(
len(batch_sampler), parameter_list=model.parameters())
model.prepare(optim, model.prepare(
YoloLoss(num_classes=dataset.num_classes), optim,
inputs=inputs, labels=labels, YoloLoss(num_classes=dataset.num_classes),
device=FLAGS.device) inputs=inputs,
labels=labels,
device=FLAGS.device)
# NOTE: we implement COCO metric of YOLOv3 model here, separately # NOTE: we implement COCO metric of YOLOv3 model here, separately
# from 'prepare' and 'fit' framework for follwing reason: # from 'prepare' and 'fit' framework for follwing reason:
...@@ -149,7 +159,8 @@ def main(): ...@@ -149,7 +159,8 @@ def main():
preds = model.predict(loader, stack_outputs=False) preds = model.predict(loader, stack_outputs=False)
_, _, _, img_ids, bboxes = preds _, _, _, img_ids, bboxes = preds
anno_path = os.path.join(FLAGS.data, 'annotations/instances_val2017.json') anno_path = os.path.join(FLAGS.data,
'annotations/instances_val2017.json')
coco_metric = COCOMetric(anno_path=anno_path, with_background=False) coco_metric = COCOMetric(anno_path=anno_path, with_background=False)
for img_id, bbox in zip(img_ids, bboxes): for img_id, bbox in zip(img_ids, bboxes):
coco_metric.update(img_id, bbox) coco_metric.update(img_id, bbox)
...@@ -176,7 +187,9 @@ def main(): ...@@ -176,7 +187,9 @@ def main():
if __name__ == '__main__': if __name__ == '__main__':
parser = argparse.ArgumentParser("Yolov3 Training on VOC") parser = argparse.ArgumentParser("Yolov3 Training on VOC")
parser.add_argument( parser.add_argument(
"--data", type=str, default='dataset/voc', "--data",
type=str,
default='dataset/voc',
help="path to dataset directory") help="path to dataset directory")
parser.add_argument( parser.add_argument(
"--device", type=str, default='gpu', help="device to use, gpu or cpu") "--device", type=str, default='gpu', help="device to use, gpu or cpu")
...@@ -187,23 +200,38 @@ if __name__ == '__main__': ...@@ -187,23 +200,38 @@ if __name__ == '__main__':
parser.add_argument( parser.add_argument(
"-e", "--epoch", default=300, type=int, help="number of epoch") "-e", "--epoch", default=300, type=int, help="number of epoch")
parser.add_argument( parser.add_argument(
"--no_mixup_epoch", default=30, type=int, "--no_mixup_epoch",
default=30,
type=int,
help="number of the last N epoch without image mixup") help="number of the last N epoch without image mixup")
parser.add_argument( parser.add_argument(
'--lr', '--learning-rate', default=0.001, type=float, metavar='LR', '--lr',
'--learning-rate',
default=0.001,
type=float,
metavar='LR',
help='initial learning rate') help='initial learning rate')
parser.add_argument( parser.add_argument(
"-b", "--batch_size", default=8, type=int, help="batch size") "-b", "--batch_size", default=8, type=int, help="batch size")
parser.add_argument( parser.add_argument(
"-j", "--num_workers", default=4, type=int, help="reader worker number") "-j",
"--num_workers",
default=4,
type=int,
help="reader worker number")
parser.add_argument( parser.add_argument(
"-p", "--pretrain_weights", default=None, type=str, "-p",
"--pretrain_weights",
default=None,
type=str,
help="path to pretrained weights") help="path to pretrained weights")
parser.add_argument( parser.add_argument(
"-r", "--resume", default=None, type=str, "-r", "--resume", default=None, type=str, help="path to model weights")
help="path to model weights")
parser.add_argument( parser.add_argument(
"-w", "--weights", default=None, type=str, "-w",
"--weights",
default=None,
type=str,
help="path to weights for evaluation") help="path to weights for evaluation")
FLAGS = parser.parse_args() FLAGS = parser.parse_args()
assert FLAGS.data, "error: must provide data path" assert FLAGS.data, "error: must provide data path"
......
...@@ -73,6 +73,7 @@ class ConvBNLayer(fluid.dygraph.Layer): ...@@ -73,6 +73,7 @@ class ConvBNLayer(fluid.dygraph.Layer):
out = fluid.layers.leaky_relu(x=out, alpha=0.1) out = fluid.layers.leaky_relu(x=out, alpha=0.1)
return out return out
class YoloDetectionBlock(fluid.dygraph.Layer): class YoloDetectionBlock(fluid.dygraph.Layer):
def __init__(self, ch_in, channel): def __init__(self, ch_in, channel):
super(YoloDetectionBlock, self).__init__() super(YoloDetectionBlock, self).__init__()
...@@ -81,38 +82,34 @@ class YoloDetectionBlock(fluid.dygraph.Layer): ...@@ -81,38 +82,34 @@ class YoloDetectionBlock(fluid.dygraph.Layer):
"channel {} cannot be divided by 2".format(channel) "channel {} cannot be divided by 2".format(channel)
self.conv0 = ConvBNLayer( self.conv0 = ConvBNLayer(
ch_in=ch_in, ch_in=ch_in, ch_out=channel, filter_size=1, stride=1, padding=0)
ch_out=channel,
filter_size=1,
stride=1,
padding=0)
self.conv1 = ConvBNLayer( self.conv1 = ConvBNLayer(
ch_in=channel, ch_in=channel,
ch_out=channel*2, ch_out=channel * 2,
filter_size=3, filter_size=3,
stride=1, stride=1,
padding=1) padding=1)
self.conv2 = ConvBNLayer( self.conv2 = ConvBNLayer(
ch_in=channel*2, ch_in=channel * 2,
ch_out=channel, ch_out=channel,
filter_size=1, filter_size=1,
stride=1, stride=1,
padding=0) padding=0)
self.conv3 = ConvBNLayer( self.conv3 = ConvBNLayer(
ch_in=channel, ch_in=channel,
ch_out=channel*2, ch_out=channel * 2,
filter_size=3, filter_size=3,
stride=1, stride=1,
padding=1) padding=1)
self.route = ConvBNLayer( self.route = ConvBNLayer(
ch_in=channel*2, ch_in=channel * 2,
ch_out=channel, ch_out=channel,
filter_size=1, filter_size=1,
stride=1, stride=1,
padding=0) padding=0)
self.tip = ConvBNLayer( self.tip = ConvBNLayer(
ch_in=channel, ch_in=channel,
ch_out=channel*2, ch_out=channel * 2,
filter_size=3, filter_size=3,
stride=1, stride=1,
padding=1) padding=1)
...@@ -149,8 +146,10 @@ class YOLOv3(Model): ...@@ -149,8 +146,10 @@ class YOLOv3(Model):
"model_mode should be 'train' 'eval' or 'test', but got " \ "model_mode should be 'train' 'eval' or 'test', but got " \
"{}".format(model_mode) "{}".format(model_mode)
self.model_mode = str.lower(model_mode) self.model_mode = str.lower(model_mode)
self.anchors = [10, 13, 16, 30, 33, 23, 30, 61, 62, 45, self.anchors = [
59, 119, 116, 90, 156, 198, 373, 326] 10, 13, 16, 30, 33, 23, 30, 61, 62, 45, 59, 119, 116, 90, 156, 198,
373, 326
]
self.anchor_masks = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] self.anchor_masks = [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
self.valid_thresh = 0.005 self.valid_thresh = 0.005
self.nms_thresh = 0.45 self.nms_thresh = 0.45
...@@ -158,7 +157,10 @@ class YOLOv3(Model): ...@@ -158,7 +157,10 @@ class YOLOv3(Model):
self.nms_posk = 100 self.nms_posk = 100
self.draw_thresh = 0.5 self.draw_thresh = 0.5
self.backbone = darknet53(pretrained=(model_mode=='train')) self.backbone = darknet53(
pretrained=(model_mode == 'train'),
with_pool=False,
num_classes=-1)
self.block_outputs = [] self.block_outputs = []
self.yolo_blocks = [] self.yolo_blocks = []
self.route_blocks = [] self.route_blocks = []
...@@ -173,32 +175,46 @@ class YOLOv3(Model): ...@@ -173,32 +175,46 @@ class YOLOv3(Model):
block_out = self.add_sublayer( block_out = self.add_sublayer(
"block_out_{}".format(idx), "block_out_{}".format(idx),
Conv2D(num_channels=1024 // (2**idx), Conv2D(
num_filters=num_filters, num_channels=1024 // (2**idx),
filter_size=1, num_filters=num_filters,
act=None, filter_size=1,
param_attr=ParamAttr( act=None,
initializer=fluid.initializer.Normal(0., 0.02)), param_attr=ParamAttr(
bias_attr=ParamAttr( initializer=fluid.initializer.Normal(0., 0.02)),
initializer=fluid.initializer.Constant(0.0), bias_attr=ParamAttr(
regularizer=L2Decay(0.)))) initializer=fluid.initializer.Constant(0.0),
regularizer=L2Decay(0.))))
self.block_outputs.append(block_out) self.block_outputs.append(block_out)
if idx < 2: if idx < 2:
route = self.add_sublayer( route = self.add_sublayer(
"route2_{}".format(idx), "route2_{}".format(idx),
ConvBNLayer(ch_in=512 // (2**idx), ConvBNLayer(
ch_out=256 // (2**idx), ch_in=512 // (2**idx),
filter_size=1, ch_out=256 // (2**idx),
act='leaky_relu')) filter_size=1,
act='leaky_relu'))
self.route_blocks.append(route) self.route_blocks.append(route)
def extract_feats(self, inputs):
out = self.backbone.conv0(inputs)
out = self.backbone.downsample0(out)
blocks = []
for i, conv_block_i in enumerate(
self.backbone.darknet53_conv_block_list):
out = conv_block_i(out)
blocks.append(out)
if i < len(self.backbone.stages) - 1:
out = self.backbone.downsample_list[i](out)
return blocks[-1:-4:-1]
def forward(self, img_id, img_shape, inputs): def forward(self, img_id, img_shape, inputs):
outputs = [] outputs = []
boxes = [] boxes = []
scores = [] scores = []
downsample = 32 downsample = 32
feats = self.backbone(inputs) feats = self.extract_feats(inputs)
route = None route = None
for idx, feat in enumerate(feats): for idx, feat in enumerate(feats):
if idx > 0: if idx > 0:
...@@ -233,15 +249,18 @@ class YOLOv3(Model): ...@@ -233,15 +249,18 @@ class YOLOv3(Model):
if self.model_mode == 'train': if self.model_mode == 'train':
return outputs return outputs
preds = [img_id, preds = [
fluid.layers.multiclass_nms( img_id, fluid.layers.multiclass_nms(
bboxes=fluid.layers.concat(boxes, axis=1), bboxes=fluid.layers.concat(
scores=fluid.layers.concat(scores, axis=2), boxes, axis=1),
score_threshold=self.valid_thresh, scores=fluid.layers.concat(
nms_top_k=self.nms_topk, scores, axis=2),
keep_top_k=self.nms_posk, score_threshold=self.valid_thresh,
nms_threshold=self.nms_thresh, nms_top_k=self.nms_topk,
background_label=-1)] keep_top_k=self.nms_posk,
nms_threshold=self.nms_thresh,
background_label=-1)
]
if self.model_mode == 'test': if self.model_mode == 'test':
return preds return preds
...@@ -249,14 +268,17 @@ class YOLOv3(Model): ...@@ -249,14 +268,17 @@ class YOLOv3(Model):
# model_mode == "eval" # model_mode == "eval"
return outputs + preds return outputs + preds
class YoloLoss(Loss): class YoloLoss(Loss):
def __init__(self, num_classes=80, num_max_boxes=50): def __init__(self, num_classes=80, num_max_boxes=50):
super(YoloLoss, self).__init__() super(YoloLoss, self).__init__()
self.num_classes = num_classes self.num_classes = num_classes
self.num_max_boxes = num_max_boxes self.num_max_boxes = num_max_boxes
self.ignore_thresh = 0.7 self.ignore_thresh = 0.7
self.anchors = [10, 13, 16, 30, 33, 23, 30, 61, 62, 45, self.anchors = [
59, 119, 116, 90, 156, 198, 373, 326] 10, 13, 16, 30, 33, 23, 30, 61, 62, 45, 59, 119, 116, 90, 156, 198,
373, 326
]
self.anchor_masks = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] self.anchor_masks = [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
def forward(self, outputs, labels): def forward(self, outputs, labels):
...@@ -265,7 +287,7 @@ class YoloLoss(Loss): ...@@ -265,7 +287,7 @@ class YoloLoss(Loss):
losses = [] losses = []
for idx, out in enumerate(outputs): for idx, out in enumerate(outputs):
if idx == 3: break # debug if idx == 3: break # debug
anchor_mask = self.anchor_masks[idx] anchor_mask = self.anchor_masks[idx]
loss = fluid.layers.yolov3_loss( loss = fluid.layers.yolov3_loss(
x=out, x=out,
...@@ -284,8 +306,10 @@ class YoloLoss(Loss): ...@@ -284,8 +306,10 @@ class YoloLoss(Loss):
return losses return losses
def _yolov3_darknet(num_layers=53, num_classes=80, def _yolov3_darknet(num_layers=53,
model_mode='train', pretrained=True): num_classes=80,
model_mode='train',
pretrained=True):
model = YOLOv3(num_classes, model_mode) model = YOLOv3(num_classes, model_mode)
if pretrained: if pretrained:
assert num_layers in pretrain_infos.keys(), \ assert num_layers in pretrain_infos.keys(), \
......
...@@ -20,6 +20,7 @@ import traceback ...@@ -20,6 +20,7 @@ import traceback
import numpy as np import numpy as np
__all__ = [ __all__ = [
"Compose",
'ColorDistort', 'ColorDistort',
'RandomExpand', 'RandomExpand',
'RandomCrop', 'RandomCrop',
...@@ -33,6 +34,37 @@ __all__ = [ ...@@ -33,6 +34,37 @@ __all__ = [
] ]
class Compose(object):
"""Composes several transforms together.
Args:
transforms (list of ``Transform`` objects): list of transforms to compose.
"""
def __init__(self, transforms):
self.transforms = transforms
def __call__(self, *data):
for f in self.transforms:
try:
data = f(*data)
except Exception as e:
stack_info = traceback.format_exc()
print("fail to perform transform [{}] with error: "
"{} and stack:\n{}".format(f, e, str(stack_info)))
raise e
return data
def __repr__(self):
format_string = self.__class__.__name__ + '('
for t in self.transforms:
format_string += '\n'
format_string += ' {0}'.format(t)
format_string += '\n)'
return format_string
class ColorDistort(object): class ColorDistort(object):
"""Random color distortion. """Random color distortion.
...@@ -147,7 +179,10 @@ class RandomExpand(object): ...@@ -147,7 +179,10 @@ class RandomExpand(object):
fill_value (list): color value used to fill the canvas. in RGB order. fill_value (list): color value used to fill the canvas. in RGB order.
""" """
def __init__(self, ratio=4., prob=0.5, fill_value=[123.675, 116.28, 103.53]): def __init__(self,
ratio=4.,
prob=0.5,
fill_value=[123.675, 116.28, 103.53]):
assert ratio > 1.01, "expand ratio must be larger than 1.01" assert ratio > 1.01, "expand ratio must be larger than 1.01"
self.ratio = ratio self.ratio = ratio
self.prob = prob self.prob = prob
...@@ -493,8 +528,7 @@ def _crop_box_with_center_constraint(box, crop): ...@@ -493,8 +528,7 @@ def _crop_box_with_center_constraint(box, crop):
cropped_box[:, :2] -= crop[:2] cropped_box[:, :2] -= crop[:2]
cropped_box[:, 2:] -= crop[:2] cropped_box[:, 2:] -= crop[:2]
centers = (box[:, :2] + box[:, 2:]) / 2 centers = (box[:, :2] + box[:, 2:]) / 2
valid = np.logical_and( valid = np.logical_and(crop[:2] <= centers, centers < crop[2:]).all(axis=1)
crop[:2] <= centers, centers < crop[2:]).all(axis=1)
valid = np.logical_and( valid = np.logical_and(
valid, (cropped_box[:, :2] < cropped_box[:, 2:]).all(axis=1)) valid, (cropped_box[:, :2] < cropped_box[:, 2:]).all(axis=1))
return cropped_box, np.where(valid)[0] return cropped_box, np.where(valid)[0]
...@@ -517,8 +551,8 @@ def random_crop(inputs): ...@@ -517,8 +551,8 @@ def random_crop(inputs):
for i in range(50): for i in range(50):
scale = np.random.uniform(*scaling) scale = np.random.uniform(*scaling)
min_ar, max_ar = aspect_ratios min_ar, max_ar = aspect_ratios
ar = np.random.uniform(max(min_ar, scale**2), ar = np.random.uniform(
min(max_ar, scale**-2)) max(min_ar, scale**2), min(max_ar, scale**-2))
crop_h = int(h * scale / np.sqrt(ar)) crop_h = int(h * scale / np.sqrt(ar))
crop_w = int(w * scale * np.sqrt(ar)) crop_w = int(w * scale * np.sqrt(ar))
crop_y = np.random.randint(0, h - crop_h) crop_y = np.random.randint(0, h - crop_h)
...@@ -529,7 +563,8 @@ def random_crop(inputs): ...@@ -529,7 +563,8 @@ def random_crop(inputs):
continue continue
cropped_box, valid_ids = _crop_box_with_center_constraint( cropped_box, valid_ids = _crop_box_with_center_constraint(
gt_box, np.array(crop_box, dtype=np.float32)) gt_box, np.array(
crop_box, dtype=np.float32))
if valid_ids.size > 0: if valid_ids.size > 0:
found = True found = True
break break
...@@ -545,9 +580,7 @@ def random_crop(inputs): ...@@ -545,9 +580,7 @@ def random_crop(inputs):
class ResizeImage(object): class ResizeImage(object):
def __init__(self, def __init__(self, target_size=0, interp=cv2.INTER_CUBIC):
target_size=0,
interp=cv2.INTER_CUBIC):
""" """
Rescale image to the specified target size. Rescale image to the specified target size.
If target_size is list, selected a scale randomly as the specified If target_size is list, selected a scale randomly as the specified
...@@ -574,8 +607,8 @@ class ResizeImage(object): ...@@ -574,8 +607,8 @@ class ResizeImage(object):
raise ImageError('{}: image is not 3-dimensional.'.format(self)) raise ImageError('{}: image is not 3-dimensional.'.format(self))
im_scale_x = float(self.target_size) / float(im.shape[1]) im_scale_x = float(self.target_size) / float(im.shape[1])
im_scale_y = float(self.target_size) / float(im.shape[0]) im_scale_y = float(self.target_size) / float(im.shape[0])
resize_w = self.target_size resize_w = self.target_size
resize_h = self.target_size resize_h = self.target_size
im = cv2.resize( im = cv2.resize(
im, im,
...@@ -586,4 +619,3 @@ class ResizeImage(object): ...@@ -586,4 +619,3 @@ class ResizeImage(object):
interpolation=self.interp) interpolation=self.interp)
return [im_id, im_shape, im, gt_bbox, gt_class, gt_score] return [im_id, im_shape, im, gt_bbox, gt_class, gt_score]
...@@ -150,7 +150,7 @@ class DatasetFolder(Dataset): ...@@ -150,7 +150,7 @@ class DatasetFolder(Dataset):
path, target = self.samples[index] path, target = self.samples[index]
sample = self.loader(path) sample = self.loader(path)
if self.transform is not None: if self.transform is not None:
sample, target = self.transform(sample, target) sample, target = self.transform(sample)
return sample, target return sample, target
......
...@@ -1135,7 +1135,7 @@ class Model(fluid.dygraph.Layer): ...@@ -1135,7 +1135,7 @@ class Model(fluid.dygraph.Layer):
test_data, test_data,
batch_size=1, batch_size=1,
num_workers=0, num_workers=0,
stack_outputs=True): stack_outputs=False):
""" """
FIXME: add more comments and usage FIXME: add more comments and usage
Args: Args:
...@@ -1183,20 +1183,29 @@ class Model(fluid.dygraph.Layer): ...@@ -1183,20 +1183,29 @@ class Model(fluid.dygraph.Layer):
loader = test_loader() loader = test_loader()
outputs = [] outputs = []
count = 0
for data in tqdm.tqdm(loader): for data in tqdm.tqdm(loader):
data = flatten(data) data = flatten(data)
outputs.append(self.test_batch(data[:len(self._inputs)])) out = to_list(self.test_batch(data[:len(self._inputs)]))
outputs.append(out)
count += out[0].shape[0]
if test_loader is not None and self._adapter._nranks > 1 \
and isinstance(test_loader, DataLoader) \
and count > len(test_loader.dataset):
size = outputs[-1][0].shape[0] - (count - len(test_loader.dataset))
outputs[-1] = [o[:size] for o in outputs[-1]]
# NOTE: for lod tensor output, we should not stack outputs # NOTE: for lod tensor output, we should not stack outputs
# for stacking may loss its detail info # for stacking may loss its detail info
outputs = list(zip(*outputs)) outputs = list(zip(*outputs))
if stack_outputs: if stack_outputs:
outputs = [np.stack(outs, axis=0) for outs in outputs] outputs = [np.vstack(outs) for outs in outputs]
self._test_dataloader = None self._test_dataloader = None
if test_loader is not None and self._adapter._nranks > 1 \
and isinstance(test_loader, DataLoader):
outputs = [o[:len(test_loader.dataset)] for o in outputs]
return outputs return outputs
def _run_one_epoch(self, def _run_one_epoch(self,
......
...@@ -12,11 +12,12 @@ ...@@ -12,11 +12,12 @@
#See the License for the specific language governing permissions and #See the License for the specific language governing permissions and
#limitations under the License. #limitations under the License.
import math
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.param_attr import ParamAttr from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.regularizer import L2Decay from paddle.fluid.regularizer import L2Decay
from paddle.fluid.dygraph.nn import Conv2D, BatchNorm from paddle.fluid.dygraph.nn import Conv2D, BatchNorm, Pool2D, Linear
from hapi.model import Model from hapi.model import Model
from hapi.download import get_weights_path from hapi.download import get_weights_path
...@@ -25,8 +26,8 @@ __all__ = ['DarkNet', 'darknet53'] ...@@ -25,8 +26,8 @@ __all__ = ['DarkNet', 'darknet53']
# {num_layers: (url, md5)} # {num_layers: (url, md5)}
pretrain_infos = { pretrain_infos = {
53: ('https://paddlemodels.bj.bcebos.com/hapi/darknet53.pdparams', 53: ('https://paddle-hapi.bj.bcebos.com/models/darknet53.pdparams',
'2506357a5c31e865785112fc614a487d') 'ca506a90e2efecb9a2093f8ada808708')
} }
...@@ -66,17 +67,14 @@ class ConvBNLayer(fluid.dygraph.Layer): ...@@ -66,17 +67,14 @@ class ConvBNLayer(fluid.dygraph.Layer):
def forward(self, inputs): def forward(self, inputs):
out = self.conv(inputs) out = self.conv(inputs)
out = self.batch_norm(out) out = self.batch_norm(out)
# out = fluid.layers.relu(out)
if self.act == 'leaky': if self.act == 'leaky':
out = fluid.layers.leaky_relu(x=out, alpha=0.1) out = fluid.layers.leaky_relu(x=out, alpha=0.1)
return out return out
class DownSample(fluid.dygraph.Layer): class DownSample(fluid.dygraph.Layer):
def __init__(self, def __init__(self, ch_in, ch_out, filter_size=3, stride=2, padding=1):
ch_in,
ch_out,
filter_size=3,
stride=2,
padding=1):
super(DownSample, self).__init__() super(DownSample, self).__init__()
...@@ -87,46 +85,45 @@ class DownSample(fluid.dygraph.Layer): ...@@ -87,46 +85,45 @@ class DownSample(fluid.dygraph.Layer):
stride=stride, stride=stride,
padding=padding) padding=padding)
self.ch_out = ch_out self.ch_out = ch_out
def forward(self, inputs): def forward(self, inputs):
out = self.conv_bn_layer(inputs) out = self.conv_bn_layer(inputs)
return out return out
class BasicBlock(fluid.dygraph.Layer): class BasicBlock(fluid.dygraph.Layer):
def __init__(self, ch_in, ch_out): def __init__(self, ch_in, ch_out):
super(BasicBlock, self).__init__() super(BasicBlock, self).__init__()
self.conv1 = ConvBNLayer( self.conv1 = ConvBNLayer(
ch_in=ch_in, ch_in=ch_in, ch_out=ch_out, filter_size=1, stride=1, padding=0)
ch_out=ch_out,
filter_size=1,
stride=1,
padding=0)
self.conv2 = ConvBNLayer( self.conv2 = ConvBNLayer(
ch_in=ch_out, ch_in=ch_out,
ch_out=ch_out*2, ch_out=ch_out * 2,
filter_size=3, filter_size=3,
stride=1, stride=1,
padding=1) padding=1)
def forward(self, inputs): def forward(self, inputs):
conv1 = self.conv1(inputs) conv1 = self.conv1(inputs)
conv2 = self.conv2(conv1) conv2 = self.conv2(conv1)
out = fluid.layers.elementwise_add(x=inputs, y=conv2, act=None) out = fluid.layers.elementwise_add(x=inputs, y=conv2, act=None)
return out return out
class LayerWarp(fluid.dygraph.Layer): class LayerWarp(fluid.dygraph.Layer):
def __init__(self, ch_in, ch_out, count): def __init__(self, ch_in, ch_out, count):
super(LayerWarp,self).__init__() super(LayerWarp, self).__init__()
self.basicblock0 = BasicBlock(ch_in, ch_out) self.basicblock0 = BasicBlock(ch_in, ch_out)
self.res_out_list = [] self.res_out_list = []
for i in range(1,count): for i in range(1, count):
res_out = self.add_sublayer("basic_block_%d" % (i), res_out = self.add_sublayer("basic_block_%d" % (i),
BasicBlock( BasicBlock(ch_out * 2, ch_out))
ch_out*2,
ch_out))
self.res_out_list.append(res_out) self.res_out_list.append(res_out)
self.ch_out = ch_out self.ch_out = ch_out
def forward(self,inputs):
def forward(self, inputs):
y = self.basicblock0(inputs) y = self.basicblock0(inputs)
for basic_block_i in self.res_out_list: for basic_block_i in self.res_out_list:
y = basic_block_i(y) y = basic_block_i(y)
...@@ -142,61 +139,82 @@ class DarkNet(Model): ...@@ -142,61 +139,82 @@ class DarkNet(Model):
Args: Args:
num_layers (int): layer number of DarkNet, only 53 supported currently, default: 53. num_layers (int): layer number of DarkNet, only 53 supported currently, default: 53.
ch_in (int): channel number of input data, default 3. num_classes (int): output dim of last fc layer. If num_classes <=0, last fc layer
will not be defined. Default: 1000.
with_pool (bool): use pool before the last fc layer or not. Default: True.
classifier_activation (str): activation for the last fc layer. Default: 'softmax'.
""" """
def __init__(self, num_layers=53, ch_in=3): def __init__(self,
num_layers=53,
num_classes=1000,
with_pool=True,
classifier_activation='softmax'):
super(DarkNet, self).__init__() super(DarkNet, self).__init__()
assert num_layers in DarkNet_cfg.keys(), \ assert num_layers in DarkNet_cfg.keys(), \
"only support num_layers in {} currently" \ "only support num_layers in {} currently" \
.format(DarkNet_cfg.keys()) .format(DarkNet_cfg.keys())
self.stages = DarkNet_cfg[num_layers] self.stages = DarkNet_cfg[num_layers]
self.stages = self.stages[0:5] self.stages = self.stages[0:5]
self.num_classes = num_classes
self.with_pool = True
ch_in = 3
self.conv0 = ConvBNLayer( self.conv0 = ConvBNLayer(
ch_in=ch_in, ch_in=ch_in, ch_out=32, filter_size=3, stride=1, padding=1)
ch_out=32,
filter_size=3,
stride=1,
padding=1)
self.downsample0 = DownSample( self.downsample0 = DownSample(ch_in=32, ch_out=32 * 2)
ch_in=32,
ch_out=32 * 2)
self.darknet53_conv_block_list = [] self.darknet53_conv_block_list = []
self.downsample_list = [] self.downsample_list = []
ch_in = [64,128,256,512,1024] ch_in = [64, 128, 256, 512, 1024]
for i, stage in enumerate(self.stages): for i, stage in enumerate(self.stages):
conv_block = self.add_sublayer( conv_block = self.add_sublayer("stage_%d" % (i),
"stage_%d" % (i), LayerWarp(
LayerWarp( int(ch_in[i]), 32 * (2**i),
int(ch_in[i]), stage))
32*(2**i),
stage))
self.darknet53_conv_block_list.append(conv_block) self.darknet53_conv_block_list.append(conv_block)
for i in range(len(self.stages) - 1): for i in range(len(self.stages) - 1):
downsample = self.add_sublayer( downsample = self.add_sublayer(
"stage_%d_downsample" % i, "stage_%d_downsample" % i,
DownSample( DownSample(
ch_in = 32*(2**(i+1)), ch_in=32 * (2**(i + 1)), ch_out=32 * (2**(i + 2))))
ch_out = 32*(2**(i+2))))
self.downsample_list.append(downsample) self.downsample_list.append(downsample)
def forward(self,inputs): if self.with_pool:
self.global_pool = Pool2D(
pool_size=7, pool_type='avg', global_pooling=True)
if self.num_classes > 0:
stdv = 1.0 / math.sqrt(32 * (2**(i + 2)))
self.fc_input_dim = 32 * (2**(i + 2))
self.fc = Linear(
self.fc_input_dim,
num_classes,
act='softmax',
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv)))
def forward(self, inputs):
out = self.conv0(inputs) out = self.conv0(inputs)
out = self.downsample0(out) out = self.downsample0(out)
blocks = []
for i, conv_block_i in enumerate(self.darknet53_conv_block_list): for i, conv_block_i in enumerate(self.darknet53_conv_block_list):
out = conv_block_i(out) out = conv_block_i(out)
blocks.append(out)
if i < len(self.stages) - 1: if i < len(self.stages) - 1:
out = self.downsample_list[i](out) out = self.downsample_list[i](out)
return blocks[-1:-4:-1]
if self.with_pool:
out = self.global_pool(out)
if self.num_classes > 0:
out = fluid.layers.reshape(out, shape=[-1, self.fc_input_dim])
out = self.fc(out)
return out
def _darknet(num_layers=53, input_channels=3, pretrained=True): def _darknet(num_layers=53, pretrained=False, **kwargs):
model = DarkNet(num_layers, input_channels) model = DarkNet(num_layers, **kwargs)
if pretrained: if pretrained:
assert num_layers in pretrain_infos.keys(), \ assert num_layers in pretrain_infos.keys(), \
"DarkNet{} do not have pretrained weights now, " \ "DarkNet{} do not have pretrained weights now, " \
...@@ -208,7 +226,7 @@ def _darknet(num_layers=53, input_channels=3, pretrained=True): ...@@ -208,7 +226,7 @@ def _darknet(num_layers=53, input_channels=3, pretrained=True):
return model return model
def darknet53(input_channels=3, pretrained=True): def darknet53(pretrained=False, **kwargs):
"""DarkNet 53-layer model """DarkNet 53-layer model
Args: Args:
...@@ -216,4 +234,4 @@ def darknet53(input_channels=3, pretrained=True): ...@@ -216,4 +234,4 @@ def darknet53(input_channels=3, pretrained=True):
pretrained (bool): If True, returns a model pre-trained on ImageNet, pretrained (bool): If True, returns a model pre-trained on ImageNet,
default True. default True.
""" """
return _darknet(53, input_channels, pretrained) return _darknet(53, pretrained, **kwargs)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
import paddle.fluid as fluid
from paddle.fluid.dygraph.nn import Conv2D, BatchNorm, Pool2D, Linear
from paddle.fluid.dygraph.container import Sequential
from hapi.model import Model
__all__ = ['LeNet']
class LeNet(Model):
"""LeNet model from
`"LeCun Y, Bottou L, Bengio Y, et al. Gradient-based learning applied to document recognition[J]. Proceedings of the IEEE, 1998, 86(11): 2278-2324.`_
Args:
num_classes (int): output dim of last fc layer. If num_classes <=0, last fc layer
will not be defined. Default: 10.
classifier_activation (str): activation for the last fc layer. Default: 'softmax'.
"""
def __init__(self, num_classes=10, classifier_activation='softmax'):
super(LeNet, self).__init__()
self.num_classes = num_classes
self.features = Sequential(
Conv2D(
1, 6, 3, stride=1, padding=1),
Pool2D(2, 'max', 2),
Conv2D(
6, 16, 5, stride=1, padding=0),
Pool2D(2, 'max', 2))
if num_classes > 0:
self.fc = Sequential(
Linear(400, 120),
Linear(120, 84),
Linear(
84, 10, act=classifier_activation))
def forward(self, inputs):
x = self.features(inputs)
if self.num_classes > 0:
x = fluid.layers.flatten(x, 1)
x = self.fc(x)
return x
...@@ -64,10 +64,10 @@ class Compose(object): ...@@ -64,10 +64,10 @@ class Compose(object):
def __init__(self, transforms): def __init__(self, transforms):
self.transforms = transforms self.transforms = transforms
def __call__(self, *data): def __call__(self, data):
for f in self.transforms: for f in self.transforms:
try: try:
data = f(*data) data = f(data)
except Exception as e: except Exception as e:
stack_info = traceback.format_exc() stack_info = traceback.format_exc()
print("fail to perform transform [{}] with error: " print("fail to perform transform [{}] with error: "
...@@ -130,8 +130,8 @@ class Resize(object): ...@@ -130,8 +130,8 @@ class Resize(object):
self.size = size self.size = size
self.interpolation = interpolation self.interpolation = interpolation
def __call__(self, img, lbl): def __call__(self, img):
return F.resize(img, self.size, self.interpolation), lbl return F.resize(img, self.size, self.interpolation)
class RandomResizedCrop(object): class RandomResizedCrop(object):
...@@ -193,10 +193,10 @@ class RandomResizedCrop(object): ...@@ -193,10 +193,10 @@ class RandomResizedCrop(object):
y = (height - h) // 2 y = (height - h) // 2
return x, y, w, h return x, y, w, h
def __call__(self, img, lbl): def __call__(self, img):
x, y, w, h = self._get_params(img) x, y, w, h = self._get_params(img)
cropped_img = img[y:y + h, x:x + w] cropped_img = img[y:y + h, x:x + w]
return F.resize(cropped_img, self.output_size, self.interpolation), lbl return F.resize(cropped_img, self.output_size, self.interpolation)
class CenterCropResize(object): class CenterCropResize(object):
...@@ -224,10 +224,10 @@ class CenterCropResize(object): ...@@ -224,10 +224,10 @@ class CenterCropResize(object):
y = (w + 1 - c) // 2 y = (w + 1 - c) // 2
return c, x, y return c, x, y
def __call__(self, img, lbl): def __call__(self, img):
c, x, y = self._get_params(img) c, x, y = self._get_params(img)
cropped_img = img[x:x + c, y:y + c, :] cropped_img = img[x:x + c, y:y + c, :]
return F.resize(cropped_img, self.size, self.interpolation), lbl return F.resize(cropped_img, self.size, self.interpolation)
class CenterCrop(object): class CenterCrop(object):
...@@ -251,10 +251,10 @@ class CenterCrop(object): ...@@ -251,10 +251,10 @@ class CenterCrop(object):
y = int(round((h - th) / 2.0)) y = int(round((h - th) / 2.0))
return x, y return x, y
def __call__(self, img, lbl): def __call__(self, img):
x, y = self._get_params(img) x, y = self._get_params(img)
th, tw = self.output_size th, tw = self.output_size
return img[y:y + th, x:x + tw], lbl return img[y:y + th, x:x + tw]
class RandomHorizontalFlip(object): class RandomHorizontalFlip(object):
...@@ -267,10 +267,10 @@ class RandomHorizontalFlip(object): ...@@ -267,10 +267,10 @@ class RandomHorizontalFlip(object):
def __init__(self, prob=0.5): def __init__(self, prob=0.5):
self.prob = prob self.prob = prob
def __call__(self, img, lbl): def __call__(self, img):
if np.random.random() < self.prob: if np.random.random() < self.prob:
return F.flip(img, code=1), lbl return F.flip(img, code=1)
return img, lbl return img
class RandomVerticalFlip(object): class RandomVerticalFlip(object):
...@@ -283,10 +283,10 @@ class RandomVerticalFlip(object): ...@@ -283,10 +283,10 @@ class RandomVerticalFlip(object):
def __init__(self, prob=0.5): def __init__(self, prob=0.5):
self.prob = prob self.prob = prob
def __call__(self, img, lbl): def __call__(self, img):
if np.random.random() < self.prob: if np.random.random() < self.prob:
return F.flip(img, code=0), lbl return F.flip(img, code=0)
return img, lbl return img
class Normalize(object): class Normalize(object):
...@@ -311,8 +311,8 @@ class Normalize(object): ...@@ -311,8 +311,8 @@ class Normalize(object):
self.mean = np.array(mean, dtype=np.float32).reshape(len(mean), 1, 1) self.mean = np.array(mean, dtype=np.float32).reshape(len(mean), 1, 1)
self.std = np.array(std, dtype=np.float32).reshape(len(std), 1, 1) self.std = np.array(std, dtype=np.float32).reshape(len(std), 1, 1)
def __call__(self, img, lbl): def __call__(self, img):
return (img - self.mean) / self.std, lbl return (img - self.mean) / self.std
class Permute(object): class Permute(object):
...@@ -333,12 +333,12 @@ class Permute(object): ...@@ -333,12 +333,12 @@ class Permute(object):
self.mode = mode self.mode = mode
self.to_rgb = to_rgb self.to_rgb = to_rgb
def __call__(self, img, lbl): def __call__(self, img):
if self.to_rgb: if self.to_rgb:
img = img[..., ::-1] img = img[..., ::-1]
if self.mode == "CHW": if self.mode == "CHW":
return img.transpose((2, 0, 1)), lbl return img.transpose((2, 0, 1))
return img, lbl return img
class GaussianNoise(object): class GaussianNoise(object):
...@@ -354,11 +354,11 @@ class GaussianNoise(object): ...@@ -354,11 +354,11 @@ class GaussianNoise(object):
self.mean = np.array(mean, dtype=np.float32) self.mean = np.array(mean, dtype=np.float32)
self.std = np.array(std, dtype=np.float32) self.std = np.array(std, dtype=np.float32)
def __call__(self, img, lbl): def __call__(self, img):
dtype = img.dtype dtype = img.dtype
noise = np.random.normal(self.mean, self.std, img.shape) * 255 noise = np.random.normal(self.mean, self.std, img.shape) * 255
img = img + noise.astype(np.float32) img = img + noise.astype(np.float32)
return np.clip(img, 0, 255).astype(dtype), lbl return np.clip(img, 0, 255).astype(dtype)
class BrightnessTransform(object): class BrightnessTransform(object):
...@@ -374,15 +374,15 @@ class BrightnessTransform(object): ...@@ -374,15 +374,15 @@ class BrightnessTransform(object):
raise ValueError("brightness value should be non-negative") raise ValueError("brightness value should be non-negative")
self.value = value self.value = value
def __call__(self, img, lbl): def __call__(self, img):
if self.value == 0: if self.value == 0:
return img, lbl return img
dtype = img.dtype dtype = img.dtype
img = img.astype(np.float32) img = img.astype(np.float32)
alpha = np.random.uniform(max(0, 1 - self.value), 1 + self.value) alpha = np.random.uniform(max(0, 1 - self.value), 1 + self.value)
img = img * alpha img = img * alpha
return img.clip(0, 255).astype(dtype), lbl return img.clip(0, 255).astype(dtype)
class ContrastTransform(object): class ContrastTransform(object):
...@@ -398,16 +398,16 @@ class ContrastTransform(object): ...@@ -398,16 +398,16 @@ class ContrastTransform(object):
raise ValueError("contrast value should be non-negative") raise ValueError("contrast value should be non-negative")
self.value = value self.value = value
def __call__(self, img, lbl): def __call__(self, img):
if self.value == 0: if self.value == 0:
return img, lbl return img
dtype = img.dtype dtype = img.dtype
img = img.astype(np.float32) img = img.astype(np.float32)
alpha = np.random.uniform(max(0, 1 - self.value), 1 + self.value) alpha = np.random.uniform(max(0, 1 - self.value), 1 + self.value)
img = img * alpha + cv2.cvtColor(img, cv2.COLOR_BGR2GRAY).mean() * ( img = img * alpha + cv2.cvtColor(img, cv2.COLOR_BGR2GRAY).mean() * (
1 - alpha) 1 - alpha)
return img.clip(0, 255).astype(dtype), lbl return img.clip(0, 255).astype(dtype)
class SaturationTransform(object): class SaturationTransform(object):
...@@ -423,9 +423,9 @@ class SaturationTransform(object): ...@@ -423,9 +423,9 @@ class SaturationTransform(object):
raise ValueError("saturation value should be non-negative") raise ValueError("saturation value should be non-negative")
self.value = value self.value = value
def __call__(self, img, lbl): def __call__(self, img):
if self.value == 0: if self.value == 0:
return img, lbl return img
dtype = img.dtype dtype = img.dtype
img = img.astype(np.float32) img = img.astype(np.float32)
...@@ -433,7 +433,7 @@ class SaturationTransform(object): ...@@ -433,7 +433,7 @@ class SaturationTransform(object):
gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
gray_img = gray_img[..., np.newaxis] gray_img = gray_img[..., np.newaxis]
img = img * alpha + gray_img * (1 - alpha) img = img * alpha + gray_img * (1 - alpha)
return img.clip(0, 255).astype(dtype), lbl return img.clip(0, 255).astype(dtype)
class HueTransform(object): class HueTransform(object):
...@@ -449,9 +449,9 @@ class HueTransform(object): ...@@ -449,9 +449,9 @@ class HueTransform(object):
raise ValueError("hue value should be in [0.0, 0.5]") raise ValueError("hue value should be in [0.0, 0.5]")
self.value = value self.value = value
def __call__(self, img, lbl): def __call__(self, img):
if self.value == 0: if self.value == 0:
return img, lbl return img
dtype = img.dtype dtype = img.dtype
img = img.astype(np.uint8) img = img.astype(np.uint8)
...@@ -464,7 +464,7 @@ class HueTransform(object): ...@@ -464,7 +464,7 @@ class HueTransform(object):
with np.errstate(over="ignore"): with np.errstate(over="ignore"):
h += np.uint8(alpha * 255) h += np.uint8(alpha * 255)
hsv_img = cv2.merge([h, s, v]) hsv_img = cv2.merge([h, s, v])
return cv2.cvtColor(hsv_img, cv2.COLOR_HSV2BGR_FULL).astype(dtype), lbl return cv2.cvtColor(hsv_img, cv2.COLOR_HSV2BGR_FULL).astype(dtype)
class ColorJitter(object): class ColorJitter(object):
...@@ -499,5 +499,5 @@ class ColorJitter(object): ...@@ -499,5 +499,5 @@ class ColorJitter(object):
random.shuffle(transforms) random.shuffle(transforms)
self.transforms = Compose(transforms) self.transforms = Compose(transforms)
def __call__(self, img, lbl): def __call__(self, img):
return self.transforms(img, lbl) return self.transforms(img)
...@@ -24,10 +24,10 @@ import numpy as np ...@@ -24,10 +24,10 @@ import numpy as np
from paddle import fluid from paddle import fluid
from paddle.fluid.optimizer import Momentum from paddle.fluid.optimizer import Momentum
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear
from vision.datasets import MNIST as MnistDataset from hapi.datasets.mnist import MNIST as MnistDataset
from model import Model, CrossEntropy, Input, set_device from hapi.model import Model, CrossEntropy, Input, set_device
from metrics import Accuracy from hapi.metrics import Accuracy
class SimpleImgConvPool(fluid.dygraph.Layer): class SimpleImgConvPool(fluid.dygraph.Layer):
......
...@@ -190,7 +190,8 @@ class TestModel(unittest.TestCase): ...@@ -190,7 +190,8 @@ class TestModel(unittest.TestCase):
eval_result = model.evaluate(val_dataset, batch_size=batch_size) eval_result = model.evaluate(val_dataset, batch_size=batch_size)
output = model.predict(test_dataset, batch_size=batch_size) output = model.predict(
test_dataset, batch_size=batch_size, stack_outputs=True)
np.testing.assert_equal(output[0].shape[0], len(test_dataset)) np.testing.assert_equal(output[0].shape[0], len(test_dataset))
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册