未验证 提交 2458c1fb 编写于 作者: L LielinJiang 提交者: GitHub

Merge pull request #49 from LielinJiang/lenet

Add lenet
......@@ -85,8 +85,9 @@ CUDA_VISIBLE_DEVICES=0,1,2,3 python -m paddle.distributed.launch main.py --arch
| [vgg16](https://paddle-hapi.bj.bcebos.com/models/vgg16.pdparams) | 71.92 | 90.65 |
| [mobilenet_v1](https://paddle-hapi.bj.bcebos.com/models/mobilenet_v1_x1.0.pdparams) | 71.16 | 89.89 |
| [mobilenet_v2](https://paddle-hapi.bj.bcebos.com/models/mobilenet_v2_x1.0.pdparams) | 72.30 | 90.74 |
| [darknet53](https://paddle-hapi.bj.bcebos.com/models/darknet53.pdparams) | 78.43 | 94.24 |
上述模型的复现参数请参考scripts下的脚本。
上述部分模型的复现参数请参考scripts下的脚本。需要注意的是darknet要使用image size为256的输入来预测, 即```--image-size 256```
## 参考文献
......
......@@ -24,7 +24,11 @@ from paddle import fluid
class ImageNetDataset(DatasetFolder):
def __init__(self, path, mode='train'):
def __init__(self,
path,
mode='train',
image_size=224,
resize_short_size=256):
super(ImageNetDataset, self).__init__(path)
self.mode = mode
......@@ -32,13 +36,14 @@ class ImageNetDataset(DatasetFolder):
mean=[123.675, 116.28, 103.53], std=[58.395, 57.120, 57.375])
if self.mode == 'train':
self.transform = transforms.Compose([
transforms.RandomResizedCrop(224),
transforms.RandomResizedCrop(image_size),
transforms.RandomHorizontalFlip(),
transforms.Permute(mode='CHW'), normalize
])
else:
self.transform = transforms.Compose([
transforms.Resize(256), transforms.CenterCrop(224),
transforms.Resize(resize_short_size),
transforms.CenterCrop(image_size),
transforms.Permute(mode='CHW'), normalize
])
......@@ -46,7 +51,7 @@ class ImageNetDataset(DatasetFolder):
img_path, label = self.samples[idx]
img = cv2.imread(img_path).astype(np.float32)
label = np.array([label])
return self.transform(img, label)
return self.transform(img), label
def __len__(self):
return len(self.samples)
......@@ -18,8 +18,6 @@ from __future__ import print_function
import argparse
import contextlib
import os
import sys
sys.path.append('../')
import time
import math
......@@ -89,8 +87,16 @@ def main():
labels = [Input([None, 1], 'int64', name='label')]
train_dataset = ImageNetDataset(
os.path.join(FLAGS.data, 'train'), mode='train')
val_dataset = ImageNetDataset(os.path.join(FLAGS.data, 'val'), mode='val')
os.path.join(FLAGS.data, 'train'),
mode='train',
image_size=FLAGS.image_size,
resize_short_size=FLAGS.resize_short_size)
val_dataset = ImageNetDataset(
os.path.join(FLAGS.data, 'val'),
mode='val',
image_size=FLAGS.image_size,
resize_short_size=FLAGS.resize_short_size)
optim = make_optimizer(
np.ceil(
......@@ -176,6 +182,13 @@ if __name__ == '__main__':
parser.add_argument(
"--weight-decay", default=1e-4, type=float, help="weight decay")
parser.add_argument("--momentum", default=0.9, type=float, help="momentum")
parser.add_argument(
"--image-size", default=224, type=int, help="intput image size")
parser.add_argument(
"--resize-short-size",
default=256,
type=int,
help="short size of keeping ratio resize")
FLAGS = parser.parse_args()
assert FLAGS.data, "error: must provide data path"
main()
......@@ -27,7 +27,7 @@ from paddle.io import DataLoader
from hapi.model import Model, Input, set_device
from hapi.distributed import DistributedBatchSampler
from hapi.vision.transforms import Compose, BatchCompose
from hapi.vision.transforms import BatchCompose
from modeling import yolov3_darknet53, YoloLoss
from coco import COCODataset
......@@ -43,10 +43,9 @@ def make_optimizer(step_per_epoch, parameter_list=None):
momentum = 0.9
weight_decay = 5e-4
boundaries = [step_per_epoch * e for e in [200, 250]]
values = [base_lr * (0.1 ** i) for i in range(len(boundaries) + 1)]
values = [base_lr * (0.1**i) for i in range(len(boundaries) + 1)]
learning_rate = fluid.layers.piecewise_decay(
boundaries=boundaries,
values=values)
boundaries=boundaries, values=values)
learning_rate = fluid.layers.linear_lr_warmup(
learning_rate=learning_rate,
warmup_steps=warm_up_iter,
......@@ -64,56 +63,62 @@ def main():
device = set_device(FLAGS.device)
fluid.enable_dygraph(device) if FLAGS.dynamic else None
inputs = [Input([None, 1], 'int64', name='img_id'),
Input([None, 2], 'int32', name='img_shape'),
Input([None, 3, None, None], 'float32', name='image')]
labels = [Input([None, NUM_MAX_BOXES, 4], 'float32', name='gt_bbox'),
Input([None, NUM_MAX_BOXES], 'int32', name='gt_label'),
Input([None, NUM_MAX_BOXES], 'float32', name='gt_score')]
inputs = [
Input(
[None, 1], 'int64', name='img_id'), Input(
[None, 2], 'int32', name='img_shape'), Input(
[None, 3, None, None], 'float32', name='image')
]
labels = [
Input(
[None, NUM_MAX_BOXES, 4], 'float32', name='gt_bbox'), Input(
[None, NUM_MAX_BOXES], 'int32', name='gt_label'), Input(
[None, NUM_MAX_BOXES], 'float32', name='gt_score')
]
if not FLAGS.eval_only: # training mode
train_transform = Compose([ColorDistort(),
RandomExpand(),
RandomCrop(),
RandomFlip(),
NormalizeBox(),
PadBox(),
BboxXYXY2XYWH()])
train_transform = Compose([
ColorDistort(), RandomExpand(), RandomCrop(), RandomFlip(),
NormalizeBox(), PadBox(), BboxXYXY2XYWH()
])
train_collate_fn = BatchCompose([RandomShape(), NormalizeImage()])
dataset = COCODataset(dataset_dir=FLAGS.data,
dataset = COCODataset(
dataset_dir=FLAGS.data,
anno_path='annotations/instances_train2017.json',
image_dir='train2017',
with_background=False,
mixup=True,
transform=train_transform)
batch_sampler = DistributedBatchSampler(dataset,
batch_size=FLAGS.batch_size,
shuffle=True,
drop_last=True)
loader = DataLoader(dataset,
batch_sampler = DistributedBatchSampler(
dataset, batch_size=FLAGS.batch_size, shuffle=True, drop_last=True)
loader = DataLoader(
dataset,
batch_sampler=batch_sampler,
places=device,
num_workers=FLAGS.num_workers,
return_list=True,
collate_fn=train_collate_fn)
else: # evaluation mode
eval_transform = Compose([ResizeImage(target_size=608),
NormalizeBox(),
PadBox(),
BboxXYXY2XYWH()])
eval_transform = Compose([
ResizeImage(target_size=608), NormalizeBox(), PadBox(),
BboxXYXY2XYWH()
])
eval_collate_fn = BatchCompose([NormalizeImage()])
dataset = COCODataset(dataset_dir=FLAGS.data,
dataset = COCODataset(
dataset_dir=FLAGS.data,
anno_path='annotations/instances_val2017.json',
image_dir='val2017',
with_background=False,
transform=eval_transform)
# batch_size can only be 1 in evaluation for YOLOv3
# prediction bbox is a LoDTensor
batch_sampler = DistributedBatchSampler(dataset,
batch_size=1,
shuffle=False,
drop_last=False)
loader = DataLoader(dataset,
batch_sampler = DistributedBatchSampler(
dataset, batch_size=1, shuffle=False, drop_last=False)
loader = DataLoader(
dataset,
batch_sampler=batch_sampler,
places=device,
num_workers=FLAGS.num_workers,
......@@ -121,18 +126,23 @@ def main():
collate_fn=eval_collate_fn)
pretrained = FLAGS.eval_only and FLAGS.weights is None
model = yolov3_darknet53(num_classes=dataset.num_classes,
model = yolov3_darknet53(
num_classes=dataset.num_classes,
model_mode='eval' if FLAGS.eval_only else 'train',
pretrained=pretrained)
if FLAGS.pretrain_weights and not FLAGS.eval_only:
model.load(FLAGS.pretrain_weights, skip_mismatch=True, reset_optimizer=True)
model.load(
FLAGS.pretrain_weights, skip_mismatch=True, reset_optimizer=True)
optim = make_optimizer(len(batch_sampler), parameter_list=model.parameters())
optim = make_optimizer(
len(batch_sampler), parameter_list=model.parameters())
model.prepare(optim,
model.prepare(
optim,
YoloLoss(num_classes=dataset.num_classes),
inputs=inputs, labels=labels,
inputs=inputs,
labels=labels,
device=FLAGS.device)
# NOTE: we implement COCO metric of YOLOv3 model here, separately
......@@ -149,7 +159,8 @@ def main():
preds = model.predict(loader, stack_outputs=False)
_, _, _, img_ids, bboxes = preds
anno_path = os.path.join(FLAGS.data, 'annotations/instances_val2017.json')
anno_path = os.path.join(FLAGS.data,
'annotations/instances_val2017.json')
coco_metric = COCOMetric(anno_path=anno_path, with_background=False)
for img_id, bbox in zip(img_ids, bboxes):
coco_metric.update(img_id, bbox)
......@@ -176,7 +187,9 @@ def main():
if __name__ == '__main__':
parser = argparse.ArgumentParser("Yolov3 Training on VOC")
parser.add_argument(
"--data", type=str, default='dataset/voc',
"--data",
type=str,
default='dataset/voc',
help="path to dataset directory")
parser.add_argument(
"--device", type=str, default='gpu', help="device to use, gpu or cpu")
......@@ -187,23 +200,38 @@ if __name__ == '__main__':
parser.add_argument(
"-e", "--epoch", default=300, type=int, help="number of epoch")
parser.add_argument(
"--no_mixup_epoch", default=30, type=int,
"--no_mixup_epoch",
default=30,
type=int,
help="number of the last N epoch without image mixup")
parser.add_argument(
'--lr', '--learning-rate', default=0.001, type=float, metavar='LR',
'--lr',
'--learning-rate',
default=0.001,
type=float,
metavar='LR',
help='initial learning rate')
parser.add_argument(
"-b", "--batch_size", default=8, type=int, help="batch size")
parser.add_argument(
"-j", "--num_workers", default=4, type=int, help="reader worker number")
"-j",
"--num_workers",
default=4,
type=int,
help="reader worker number")
parser.add_argument(
"-p", "--pretrain_weights", default=None, type=str,
"-p",
"--pretrain_weights",
default=None,
type=str,
help="path to pretrained weights")
parser.add_argument(
"-r", "--resume", default=None, type=str,
help="path to model weights")
"-r", "--resume", default=None, type=str, help="path to model weights")
parser.add_argument(
"-w", "--weights", default=None, type=str,
"-w",
"--weights",
default=None,
type=str,
help="path to weights for evaluation")
FLAGS = parser.parse_args()
assert FLAGS.data, "error: must provide data path"
......
......@@ -73,6 +73,7 @@ class ConvBNLayer(fluid.dygraph.Layer):
out = fluid.layers.leaky_relu(x=out, alpha=0.1)
return out
class YoloDetectionBlock(fluid.dygraph.Layer):
def __init__(self, ch_in, channel):
super(YoloDetectionBlock, self).__init__()
......@@ -81,38 +82,34 @@ class YoloDetectionBlock(fluid.dygraph.Layer):
"channel {} cannot be divided by 2".format(channel)
self.conv0 = ConvBNLayer(
ch_in=ch_in,
ch_out=channel,
filter_size=1,
stride=1,
padding=0)
ch_in=ch_in, ch_out=channel, filter_size=1, stride=1, padding=0)
self.conv1 = ConvBNLayer(
ch_in=channel,
ch_out=channel*2,
ch_out=channel * 2,
filter_size=3,
stride=1,
padding=1)
self.conv2 = ConvBNLayer(
ch_in=channel*2,
ch_in=channel * 2,
ch_out=channel,
filter_size=1,
stride=1,
padding=0)
self.conv3 = ConvBNLayer(
ch_in=channel,
ch_out=channel*2,
ch_out=channel * 2,
filter_size=3,
stride=1,
padding=1)
self.route = ConvBNLayer(
ch_in=channel*2,
ch_in=channel * 2,
ch_out=channel,
filter_size=1,
stride=1,
padding=0)
self.tip = ConvBNLayer(
ch_in=channel,
ch_out=channel*2,
ch_out=channel * 2,
filter_size=3,
stride=1,
padding=1)
......@@ -149,8 +146,10 @@ class YOLOv3(Model):
"model_mode should be 'train' 'eval' or 'test', but got " \
"{}".format(model_mode)
self.model_mode = str.lower(model_mode)
self.anchors = [10, 13, 16, 30, 33, 23, 30, 61, 62, 45,
59, 119, 116, 90, 156, 198, 373, 326]
self.anchors = [
10, 13, 16, 30, 33, 23, 30, 61, 62, 45, 59, 119, 116, 90, 156, 198,
373, 326
]
self.anchor_masks = [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
self.valid_thresh = 0.005
self.nms_thresh = 0.45
......@@ -158,7 +157,10 @@ class YOLOv3(Model):
self.nms_posk = 100
self.draw_thresh = 0.5
self.backbone = darknet53(pretrained=(model_mode=='train'))
self.backbone = darknet53(
pretrained=(model_mode == 'train'),
with_pool=False,
num_classes=-1)
self.block_outputs = []
self.yolo_blocks = []
self.route_blocks = []
......@@ -173,7 +175,8 @@ class YOLOv3(Model):
block_out = self.add_sublayer(
"block_out_{}".format(idx),
Conv2D(num_channels=1024 // (2**idx),
Conv2D(
num_channels=1024 // (2**idx),
num_filters=num_filters,
filter_size=1,
act=None,
......@@ -186,19 +189,32 @@ class YOLOv3(Model):
if idx < 2:
route = self.add_sublayer(
"route2_{}".format(idx),
ConvBNLayer(ch_in=512 // (2**idx),
ConvBNLayer(
ch_in=512 // (2**idx),
ch_out=256 // (2**idx),
filter_size=1,
act='leaky_relu'))
self.route_blocks.append(route)
def extract_feats(self, inputs):
out = self.backbone.conv0(inputs)
out = self.backbone.downsample0(out)
blocks = []
for i, conv_block_i in enumerate(
self.backbone.darknet53_conv_block_list):
out = conv_block_i(out)
blocks.append(out)
if i < len(self.backbone.stages) - 1:
out = self.backbone.downsample_list[i](out)
return blocks[-1:-4:-1]
def forward(self, img_id, img_shape, inputs):
outputs = []
boxes = []
scores = []
downsample = 32
feats = self.backbone(inputs)
feats = self.extract_feats(inputs)
route = None
for idx, feat in enumerate(feats):
if idx > 0:
......@@ -233,15 +249,18 @@ class YOLOv3(Model):
if self.model_mode == 'train':
return outputs
preds = [img_id,
fluid.layers.multiclass_nms(
bboxes=fluid.layers.concat(boxes, axis=1),
scores=fluid.layers.concat(scores, axis=2),
preds = [
img_id, fluid.layers.multiclass_nms(
bboxes=fluid.layers.concat(
boxes, axis=1),
scores=fluid.layers.concat(
scores, axis=2),
score_threshold=self.valid_thresh,
nms_top_k=self.nms_topk,
keep_top_k=self.nms_posk,
nms_threshold=self.nms_thresh,
background_label=-1)]
background_label=-1)
]
if self.model_mode == 'test':
return preds
......@@ -249,14 +268,17 @@ class YOLOv3(Model):
# model_mode == "eval"
return outputs + preds
class YoloLoss(Loss):
def __init__(self, num_classes=80, num_max_boxes=50):
super(YoloLoss, self).__init__()
self.num_classes = num_classes
self.num_max_boxes = num_max_boxes
self.ignore_thresh = 0.7
self.anchors = [10, 13, 16, 30, 33, 23, 30, 61, 62, 45,
59, 119, 116, 90, 156, 198, 373, 326]
self.anchors = [
10, 13, 16, 30, 33, 23, 30, 61, 62, 45, 59, 119, 116, 90, 156, 198,
373, 326
]
self.anchor_masks = [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
def forward(self, outputs, labels):
......@@ -284,8 +306,10 @@ class YoloLoss(Loss):
return losses
def _yolov3_darknet(num_layers=53, num_classes=80,
model_mode='train', pretrained=True):
def _yolov3_darknet(num_layers=53,
num_classes=80,
model_mode='train',
pretrained=True):
model = YOLOv3(num_classes, model_mode)
if pretrained:
assert num_layers in pretrain_infos.keys(), \
......
......@@ -20,6 +20,7 @@ import traceback
import numpy as np
__all__ = [
"Compose",
'ColorDistort',
'RandomExpand',
'RandomCrop',
......@@ -33,6 +34,37 @@ __all__ = [
]
class Compose(object):
"""Composes several transforms together.
Args:
transforms (list of ``Transform`` objects): list of transforms to compose.
"""
def __init__(self, transforms):
self.transforms = transforms
def __call__(self, *data):
for f in self.transforms:
try:
data = f(*data)
except Exception as e:
stack_info = traceback.format_exc()
print("fail to perform transform [{}] with error: "
"{} and stack:\n{}".format(f, e, str(stack_info)))
raise e
return data
def __repr__(self):
format_string = self.__class__.__name__ + '('
for t in self.transforms:
format_string += '\n'
format_string += ' {0}'.format(t)
format_string += '\n)'
return format_string
class ColorDistort(object):
"""Random color distortion.
......@@ -147,7 +179,10 @@ class RandomExpand(object):
fill_value (list): color value used to fill the canvas. in RGB order.
"""
def __init__(self, ratio=4., prob=0.5, fill_value=[123.675, 116.28, 103.53]):
def __init__(self,
ratio=4.,
prob=0.5,
fill_value=[123.675, 116.28, 103.53]):
assert ratio > 1.01, "expand ratio must be larger than 1.01"
self.ratio = ratio
self.prob = prob
......@@ -493,8 +528,7 @@ def _crop_box_with_center_constraint(box, crop):
cropped_box[:, :2] -= crop[:2]
cropped_box[:, 2:] -= crop[:2]
centers = (box[:, :2] + box[:, 2:]) / 2
valid = np.logical_and(
crop[:2] <= centers, centers < crop[2:]).all(axis=1)
valid = np.logical_and(crop[:2] <= centers, centers < crop[2:]).all(axis=1)
valid = np.logical_and(
valid, (cropped_box[:, :2] < cropped_box[:, 2:]).all(axis=1))
return cropped_box, np.where(valid)[0]
......@@ -517,8 +551,8 @@ def random_crop(inputs):
for i in range(50):
scale = np.random.uniform(*scaling)
min_ar, max_ar = aspect_ratios
ar = np.random.uniform(max(min_ar, scale**2),
min(max_ar, scale**-2))
ar = np.random.uniform(
max(min_ar, scale**2), min(max_ar, scale**-2))
crop_h = int(h * scale / np.sqrt(ar))
crop_w = int(w * scale * np.sqrt(ar))
crop_y = np.random.randint(0, h - crop_h)
......@@ -529,7 +563,8 @@ def random_crop(inputs):
continue
cropped_box, valid_ids = _crop_box_with_center_constraint(
gt_box, np.array(crop_box, dtype=np.float32))
gt_box, np.array(
crop_box, dtype=np.float32))
if valid_ids.size > 0:
found = True
break
......@@ -545,9 +580,7 @@ def random_crop(inputs):
class ResizeImage(object):
def __init__(self,
target_size=0,
interp=cv2.INTER_CUBIC):
def __init__(self, target_size=0, interp=cv2.INTER_CUBIC):
"""
Rescale image to the specified target size.
If target_size is list, selected a scale randomly as the specified
......@@ -586,4 +619,3 @@ class ResizeImage(object):
interpolation=self.interp)
return [im_id, im_shape, im, gt_bbox, gt_class, gt_score]
......@@ -150,7 +150,7 @@ class DatasetFolder(Dataset):
path, target = self.samples[index]
sample = self.loader(path)
if self.transform is not None:
sample, target = self.transform(sample, target)
sample, target = self.transform(sample)
return sample, target
......
......@@ -1135,7 +1135,7 @@ class Model(fluid.dygraph.Layer):
test_data,
batch_size=1,
num_workers=0,
stack_outputs=True):
stack_outputs=False):
"""
FIXME: add more comments and usage
Args:
......@@ -1183,20 +1183,29 @@ class Model(fluid.dygraph.Layer):
loader = test_loader()
outputs = []
count = 0
for data in tqdm.tqdm(loader):
data = flatten(data)
outputs.append(self.test_batch(data[:len(self._inputs)]))
out = to_list(self.test_batch(data[:len(self._inputs)]))
outputs.append(out)
count += out[0].shape[0]
if test_loader is not None and self._adapter._nranks > 1 \
and isinstance(test_loader, DataLoader) \
and count > len(test_loader.dataset):
size = outputs[-1][0].shape[0] - (count - len(test_loader.dataset))
outputs[-1] = [o[:size] for o in outputs[-1]]
# NOTE: for lod tensor output, we should not stack outputs
# for stacking may loss its detail info
outputs = list(zip(*outputs))
if stack_outputs:
outputs = [np.stack(outs, axis=0) for outs in outputs]
outputs = [np.vstack(outs) for outs in outputs]
self._test_dataloader = None
if test_loader is not None and self._adapter._nranks > 1 \
and isinstance(test_loader, DataLoader):
outputs = [o[:len(test_loader.dataset)] for o in outputs]
return outputs
def _run_one_epoch(self,
......
......@@ -12,11 +12,12 @@
#See the License for the specific language governing permissions and
#limitations under the License.
import math
import paddle.fluid as fluid
from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.regularizer import L2Decay
from paddle.fluid.dygraph.nn import Conv2D, BatchNorm
from paddle.fluid.dygraph.nn import Conv2D, BatchNorm, Pool2D, Linear
from hapi.model import Model
from hapi.download import get_weights_path
......@@ -25,8 +26,8 @@ __all__ = ['DarkNet', 'darknet53']
# {num_layers: (url, md5)}
pretrain_infos = {
53: ('https://paddlemodels.bj.bcebos.com/hapi/darknet53.pdparams',
'2506357a5c31e865785112fc614a487d')
53: ('https://paddle-hapi.bj.bcebos.com/models/darknet53.pdparams',
'ca506a90e2efecb9a2093f8ada808708')
}
......@@ -66,17 +67,14 @@ class ConvBNLayer(fluid.dygraph.Layer):
def forward(self, inputs):
out = self.conv(inputs)
out = self.batch_norm(out)
# out = fluid.layers.relu(out)
if self.act == 'leaky':
out = fluid.layers.leaky_relu(x=out, alpha=0.1)
return out
class DownSample(fluid.dygraph.Layer):
def __init__(self,
ch_in,
ch_out,
filter_size=3,
stride=2,
padding=1):
def __init__(self, ch_in, ch_out, filter_size=3, stride=2, padding=1):
super(DownSample, self).__init__()
......@@ -87,46 +85,45 @@ class DownSample(fluid.dygraph.Layer):
stride=stride,
padding=padding)
self.ch_out = ch_out
def forward(self, inputs):
out = self.conv_bn_layer(inputs)
return out
class BasicBlock(fluid.dygraph.Layer):
def __init__(self, ch_in, ch_out):
super(BasicBlock, self).__init__()
self.conv1 = ConvBNLayer(
ch_in=ch_in,
ch_out=ch_out,
filter_size=1,
stride=1,
padding=0)
ch_in=ch_in, ch_out=ch_out, filter_size=1, stride=1, padding=0)
self.conv2 = ConvBNLayer(
ch_in=ch_out,
ch_out=ch_out*2,
ch_out=ch_out * 2,
filter_size=3,
stride=1,
padding=1)
def forward(self, inputs):
conv1 = self.conv1(inputs)
conv2 = self.conv2(conv1)
out = fluid.layers.elementwise_add(x=inputs, y=conv2, act=None)
return out
class LayerWarp(fluid.dygraph.Layer):
def __init__(self, ch_in, ch_out, count):
super(LayerWarp,self).__init__()
super(LayerWarp, self).__init__()
self.basicblock0 = BasicBlock(ch_in, ch_out)
self.res_out_list = []
for i in range(1,count):
for i in range(1, count):
res_out = self.add_sublayer("basic_block_%d" % (i),
BasicBlock(
ch_out*2,
ch_out))
BasicBlock(ch_out * 2, ch_out))
self.res_out_list.append(res_out)
self.ch_out = ch_out
def forward(self,inputs):
def forward(self, inputs):
y = self.basicblock0(inputs)
for basic_block_i in self.res_out_list:
y = basic_block_i(y)
......@@ -142,61 +139,82 @@ class DarkNet(Model):
Args:
num_layers (int): layer number of DarkNet, only 53 supported currently, default: 53.
ch_in (int): channel number of input data, default 3.
num_classes (int): output dim of last fc layer. If num_classes <=0, last fc layer
will not be defined. Default: 1000.
with_pool (bool): use pool before the last fc layer or not. Default: True.
classifier_activation (str): activation for the last fc layer. Default: 'softmax'.
"""
def __init__(self, num_layers=53, ch_in=3):
def __init__(self,
num_layers=53,
num_classes=1000,
with_pool=True,
classifier_activation='softmax'):
super(DarkNet, self).__init__()
assert num_layers in DarkNet_cfg.keys(), \
"only support num_layers in {} currently" \
.format(DarkNet_cfg.keys())
self.stages = DarkNet_cfg[num_layers]
self.stages = self.stages[0:5]
self.num_classes = num_classes
self.with_pool = True
ch_in = 3
self.conv0 = ConvBNLayer(
ch_in=ch_in,
ch_out=32,
filter_size=3,
stride=1,
padding=1)
ch_in=ch_in, ch_out=32, filter_size=3, stride=1, padding=1)
self.downsample0 = DownSample(
ch_in=32,
ch_out=32 * 2)
self.downsample0 = DownSample(ch_in=32, ch_out=32 * 2)
self.darknet53_conv_block_list = []
self.downsample_list = []
ch_in = [64,128,256,512,1024]
ch_in = [64, 128, 256, 512, 1024]
for i, stage in enumerate(self.stages):
conv_block = self.add_sublayer(
"stage_%d" % (i),
conv_block = self.add_sublayer("stage_%d" % (i),
LayerWarp(
int(ch_in[i]),
32*(2**i),
int(ch_in[i]), 32 * (2**i),
stage))
self.darknet53_conv_block_list.append(conv_block)
for i in range(len(self.stages) - 1):
downsample = self.add_sublayer(
"stage_%d_downsample" % i,
DownSample(
ch_in = 32*(2**(i+1)),
ch_out = 32*(2**(i+2))))
ch_in=32 * (2**(i + 1)), ch_out=32 * (2**(i + 2))))
self.downsample_list.append(downsample)
def forward(self,inputs):
if self.with_pool:
self.global_pool = Pool2D(
pool_size=7, pool_type='avg', global_pooling=True)
if self.num_classes > 0:
stdv = 1.0 / math.sqrt(32 * (2**(i + 2)))
self.fc_input_dim = 32 * (2**(i + 2))
self.fc = Linear(
self.fc_input_dim,
num_classes,
act='softmax',
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv)))
def forward(self, inputs):
out = self.conv0(inputs)
out = self.downsample0(out)
blocks = []
for i, conv_block_i in enumerate(self.darknet53_conv_block_list):
out = conv_block_i(out)
blocks.append(out)
if i < len(self.stages) - 1:
out = self.downsample_list[i](out)
return blocks[-1:-4:-1]
if self.with_pool:
out = self.global_pool(out)
if self.num_classes > 0:
out = fluid.layers.reshape(out, shape=[-1, self.fc_input_dim])
out = self.fc(out)
return out
def _darknet(num_layers=53, input_channels=3, pretrained=True):
model = DarkNet(num_layers, input_channels)
def _darknet(num_layers=53, pretrained=False, **kwargs):
model = DarkNet(num_layers, **kwargs)
if pretrained:
assert num_layers in pretrain_infos.keys(), \
"DarkNet{} do not have pretrained weights now, " \
......@@ -208,7 +226,7 @@ def _darknet(num_layers=53, input_channels=3, pretrained=True):
return model
def darknet53(input_channels=3, pretrained=True):
def darknet53(pretrained=False, **kwargs):
"""DarkNet 53-layer model
Args:
......@@ -216,4 +234,4 @@ def darknet53(input_channels=3, pretrained=True):
pretrained (bool): If True, returns a model pre-trained on ImageNet,
default True.
"""
return _darknet(53, input_channels, pretrained)
return _darknet(53, pretrained, **kwargs)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
import paddle.fluid as fluid
from paddle.fluid.dygraph.nn import Conv2D, BatchNorm, Pool2D, Linear
from paddle.fluid.dygraph.container import Sequential
from hapi.model import Model
__all__ = ['LeNet']
class LeNet(Model):
"""LeNet model from
`"LeCun Y, Bottou L, Bengio Y, et al. Gradient-based learning applied to document recognition[J]. Proceedings of the IEEE, 1998, 86(11): 2278-2324.`_
Args:
num_classes (int): output dim of last fc layer. If num_classes <=0, last fc layer
will not be defined. Default: 10.
classifier_activation (str): activation for the last fc layer. Default: 'softmax'.
"""
def __init__(self, num_classes=10, classifier_activation='softmax'):
super(LeNet, self).__init__()
self.num_classes = num_classes
self.features = Sequential(
Conv2D(
1, 6, 3, stride=1, padding=1),
Pool2D(2, 'max', 2),
Conv2D(
6, 16, 5, stride=1, padding=0),
Pool2D(2, 'max', 2))
if num_classes > 0:
self.fc = Sequential(
Linear(400, 120),
Linear(120, 84),
Linear(
84, 10, act=classifier_activation))
def forward(self, inputs):
x = self.features(inputs)
if self.num_classes > 0:
x = fluid.layers.flatten(x, 1)
x = self.fc(x)
return x
......@@ -64,10 +64,10 @@ class Compose(object):
def __init__(self, transforms):
self.transforms = transforms
def __call__(self, *data):
def __call__(self, data):
for f in self.transforms:
try:
data = f(*data)
data = f(data)
except Exception as e:
stack_info = traceback.format_exc()
print("fail to perform transform [{}] with error: "
......@@ -130,8 +130,8 @@ class Resize(object):
self.size = size
self.interpolation = interpolation
def __call__(self, img, lbl):
return F.resize(img, self.size, self.interpolation), lbl
def __call__(self, img):
return F.resize(img, self.size, self.interpolation)
class RandomResizedCrop(object):
......@@ -193,10 +193,10 @@ class RandomResizedCrop(object):
y = (height - h) // 2
return x, y, w, h
def __call__(self, img, lbl):
def __call__(self, img):
x, y, w, h = self._get_params(img)
cropped_img = img[y:y + h, x:x + w]
return F.resize(cropped_img, self.output_size, self.interpolation), lbl
return F.resize(cropped_img, self.output_size, self.interpolation)
class CenterCropResize(object):
......@@ -224,10 +224,10 @@ class CenterCropResize(object):
y = (w + 1 - c) // 2
return c, x, y
def __call__(self, img, lbl):
def __call__(self, img):
c, x, y = self._get_params(img)
cropped_img = img[x:x + c, y:y + c, :]
return F.resize(cropped_img, self.size, self.interpolation), lbl
return F.resize(cropped_img, self.size, self.interpolation)
class CenterCrop(object):
......@@ -251,10 +251,10 @@ class CenterCrop(object):
y = int(round((h - th) / 2.0))
return x, y
def __call__(self, img, lbl):
def __call__(self, img):
x, y = self._get_params(img)
th, tw = self.output_size
return img[y:y + th, x:x + tw], lbl
return img[y:y + th, x:x + tw]
class RandomHorizontalFlip(object):
......@@ -267,10 +267,10 @@ class RandomHorizontalFlip(object):
def __init__(self, prob=0.5):
self.prob = prob
def __call__(self, img, lbl):
def __call__(self, img):
if np.random.random() < self.prob:
return F.flip(img, code=1), lbl
return img, lbl
return F.flip(img, code=1)
return img
class RandomVerticalFlip(object):
......@@ -283,10 +283,10 @@ class RandomVerticalFlip(object):
def __init__(self, prob=0.5):
self.prob = prob
def __call__(self, img, lbl):
def __call__(self, img):
if np.random.random() < self.prob:
return F.flip(img, code=0), lbl
return img, lbl
return F.flip(img, code=0)
return img
class Normalize(object):
......@@ -311,8 +311,8 @@ class Normalize(object):
self.mean = np.array(mean, dtype=np.float32).reshape(len(mean), 1, 1)
self.std = np.array(std, dtype=np.float32).reshape(len(std), 1, 1)
def __call__(self, img, lbl):
return (img - self.mean) / self.std, lbl
def __call__(self, img):
return (img - self.mean) / self.std
class Permute(object):
......@@ -333,12 +333,12 @@ class Permute(object):
self.mode = mode
self.to_rgb = to_rgb
def __call__(self, img, lbl):
def __call__(self, img):
if self.to_rgb:
img = img[..., ::-1]
if self.mode == "CHW":
return img.transpose((2, 0, 1)), lbl
return img, lbl
return img.transpose((2, 0, 1))
return img
class GaussianNoise(object):
......@@ -354,11 +354,11 @@ class GaussianNoise(object):
self.mean = np.array(mean, dtype=np.float32)
self.std = np.array(std, dtype=np.float32)
def __call__(self, img, lbl):
def __call__(self, img):
dtype = img.dtype
noise = np.random.normal(self.mean, self.std, img.shape) * 255
img = img + noise.astype(np.float32)
return np.clip(img, 0, 255).astype(dtype), lbl
return np.clip(img, 0, 255).astype(dtype)
class BrightnessTransform(object):
......@@ -374,15 +374,15 @@ class BrightnessTransform(object):
raise ValueError("brightness value should be non-negative")
self.value = value
def __call__(self, img, lbl):
def __call__(self, img):
if self.value == 0:
return img, lbl
return img
dtype = img.dtype
img = img.astype(np.float32)
alpha = np.random.uniform(max(0, 1 - self.value), 1 + self.value)
img = img * alpha
return img.clip(0, 255).astype(dtype), lbl
return img.clip(0, 255).astype(dtype)
class ContrastTransform(object):
......@@ -398,16 +398,16 @@ class ContrastTransform(object):
raise ValueError("contrast value should be non-negative")
self.value = value
def __call__(self, img, lbl):
def __call__(self, img):
if self.value == 0:
return img, lbl
return img
dtype = img.dtype
img = img.astype(np.float32)
alpha = np.random.uniform(max(0, 1 - self.value), 1 + self.value)
img = img * alpha + cv2.cvtColor(img, cv2.COLOR_BGR2GRAY).mean() * (
1 - alpha)
return img.clip(0, 255).astype(dtype), lbl
return img.clip(0, 255).astype(dtype)
class SaturationTransform(object):
......@@ -423,9 +423,9 @@ class SaturationTransform(object):
raise ValueError("saturation value should be non-negative")
self.value = value
def __call__(self, img, lbl):
def __call__(self, img):
if self.value == 0:
return img, lbl
return img
dtype = img.dtype
img = img.astype(np.float32)
......@@ -433,7 +433,7 @@ class SaturationTransform(object):
gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
gray_img = gray_img[..., np.newaxis]
img = img * alpha + gray_img * (1 - alpha)
return img.clip(0, 255).astype(dtype), lbl
return img.clip(0, 255).astype(dtype)
class HueTransform(object):
......@@ -449,9 +449,9 @@ class HueTransform(object):
raise ValueError("hue value should be in [0.0, 0.5]")
self.value = value
def __call__(self, img, lbl):
def __call__(self, img):
if self.value == 0:
return img, lbl
return img
dtype = img.dtype
img = img.astype(np.uint8)
......@@ -464,7 +464,7 @@ class HueTransform(object):
with np.errstate(over="ignore"):
h += np.uint8(alpha * 255)
hsv_img = cv2.merge([h, s, v])
return cv2.cvtColor(hsv_img, cv2.COLOR_HSV2BGR_FULL).astype(dtype), lbl
return cv2.cvtColor(hsv_img, cv2.COLOR_HSV2BGR_FULL).astype(dtype)
class ColorJitter(object):
......@@ -499,5 +499,5 @@ class ColorJitter(object):
random.shuffle(transforms)
self.transforms = Compose(transforms)
def __call__(self, img, lbl):
return self.transforms(img, lbl)
def __call__(self, img):
return self.transforms(img)
......@@ -24,10 +24,10 @@ import numpy as np
from paddle import fluid
from paddle.fluid.optimizer import Momentum
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear
from vision.datasets import MNIST as MnistDataset
from hapi.datasets.mnist import MNIST as MnistDataset
from model import Model, CrossEntropy, Input, set_device
from metrics import Accuracy
from hapi.model import Model, CrossEntropy, Input, set_device
from hapi.metrics import Accuracy
class SimpleImgConvPool(fluid.dygraph.Layer):
......
......@@ -190,7 +190,8 @@ class TestModel(unittest.TestCase):
eval_result = model.evaluate(val_dataset, batch_size=batch_size)
output = model.predict(test_dataset, batch_size=batch_size)
output = model.predict(
test_dataset, batch_size=batch_size, stack_outputs=True)
np.testing.assert_equal(output[0].shape[0], len(test_dataset))
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册