提交 0935e730 编写于 作者: Y Yang Zhang

Add `resnet` demo

上级 b5ebb2fc
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import division
import argparse
import contextlib
import math
import os
import random
import cv2
import numpy as np
import paddle
import paddle.fluid as fluid
from paddle.fluid.layer_helper import LayerHelper
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear
from model import Model
def center_crop_resize(img):
h, w = img.shape[:2]
c = int(224 / 256 * min((h, w)))
i = (h + 1 - c) // 2
j = (w + 1 - c) // 2
img = img[i: i + c, j: j + c, :]
return cv2.resize(img, (224, 224), 0, 0, cv2.INTER_LINEAR)
def random_crop_resize(img):
height, width = img.shape[:2]
area = height * width
for attempt in range(10):
target_area = random.uniform(0.08, 1.) * area
log_ratio = (math.log(3 / 4), math.log(4 / 3))
aspect_ratio = math.exp(random.uniform(*log_ratio))
w = int(round(math.sqrt(target_area * aspect_ratio)))
h = int(round(math.sqrt(target_area / aspect_ratio)))
if w <= width and h <= height:
i = random.randint(0, height - h)
j = random.randint(0, width - w)
img = img[i: i + h, j: j + w, :]
return cv2.resize(img, (224, 224), 0, 0, cv2.INTER_LINEAR)
return center_crop_resize(img)
def random_flip(img):
return img[:, ::-1, :]
def normalize_permute(img):
# transpose and convert to RGB from BGR
img = img.astype(np.float32).transpose((2, 0, 1))[::-1, ...]
mean = np.array([123.675, 116.28, 103.53], dtype=np.float32)
std = np.array([58.395, 57.120, 57.375], dtype=np.float32)
invstd = 1. / std
for v, m, s in zip(img, mean, invstd):
v.__isub__(m).__imul__(s)
return img
def compose(functions):
def process(sample):
img, label = sample
for fn in functions:
img = fn(img)
return img, label
return process
def image_folder(path, shuffle=False):
valid_ext = ('.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.webp')
classes = [d for d in os.listdir(path) if
os.path.isdir(os.path.join(path, d))]
classes.sort()
class_map = {cls: idx for idx, cls in enumerate(classes)}
samples = []
for dir in sorted(class_map.keys()):
d = os.path.join(path, dir)
for root, _, fnames in sorted(os.walk(d)):
for fname in sorted(fnames):
p = os.path.join(root, fname)
if os.path.splitext(p)[1].lower() in valid_ext:
samples.append((p, class_map[dir]))
if shuffle:
random.shuffle(samples)
def iterator():
for s in samples:
yield s
return iterator
class ConvBNLayer(fluid.dygraph.Layer):
def __init__(self,
num_channels,
num_filters,
filter_size,
stride=1,
groups=1,
act=None):
super(ConvBNLayer, self).__init__()
self._conv = Conv2D(
num_channels=num_channels,
num_filters=num_filters,
filter_size=filter_size,
stride=stride,
padding=(filter_size - 1) // 2,
groups=groups,
act=None,
bias_attr=False)
self._batch_norm = BatchNorm(num_filters, act=act)
def forward(self, inputs):
x = self._conv(inputs)
x = self._batch_norm(x)
return x
class BottleneckBlock(fluid.dygraph.Layer):
def __init__(self,
num_channels,
num_filters,
stride,
shortcut=True):
super(BottleneckBlock, self).__init__()
self.conv0 = ConvBNLayer(
num_channels=num_channels,
num_filters=num_filters,
filter_size=1,
act='relu')
self.conv1 = ConvBNLayer(
num_channels=num_filters,
num_filters=num_filters,
filter_size=3,
stride=stride,
act='relu')
self.conv2 = ConvBNLayer(
num_channels=num_filters,
num_filters=num_filters * 4,
filter_size=1,
act=None)
if not shortcut:
self.short = ConvBNLayer(
num_channels=num_channels,
num_filters=num_filters * 4,
filter_size=1,
stride=stride)
self.shortcut = shortcut
self._num_channels_out = num_filters * 4
def forward(self, inputs):
x = self.conv0(inputs)
conv1 = self.conv1(x)
conv2 = self.conv2(conv1)
if self.shortcut:
short = inputs
else:
short = self.short(inputs)
x = fluid.layers.elementwise_add(x=short, y=conv2)
layer_helper = LayerHelper(self.full_name(), act='relu')
return layer_helper.append_activation(x)
class ResNet(Model):
def __init__(self, depth=50, num_classes=1000):
super(ResNet, self).__init__()
layer_config = {
50: [3, 4, 6, 3],
101: [3, 4, 23, 3],
152: [3, 8, 36, 3],
}
assert depth in layer_config.keys(), \
"supported depth are {} but input layer is {}".format(
layer_config.keys(), depth)
layers = layer_config[depth]
num_channels = [64, 256, 512, 1024]
num_filters = [64, 128, 256, 512]
self.conv = ConvBNLayer(
num_channels=3,
num_filters=64,
filter_size=7,
stride=2,
act='relu')
self.pool = Pool2D(
pool_size=3,
pool_stride=2,
pool_padding=1,
pool_type='max')
self.blocks = []
for b in range(len(layers)):
shortcut = False
for i in range(layers[b]):
block = self.add_sublayer(
'layer_{}_{}'.format(b, i),
BottleneckBlock(
num_channels=num_channels[b]
if i == 0 else num_filters[b] * 4,
num_filters=num_filters[b],
stride=2 if i == 0 and b != 0 else 1,
shortcut=shortcut))
self.blocks.append(block)
shortcut = True
self.global_pool = Pool2D(
pool_size=7, pool_type='avg', global_pooling=True)
stdv = 1.0 / math.sqrt(2048 * 1.0)
self.fc_input_dim = num_filters[len(num_filters) - 1] * 4 * 1 * 1
self.fc = Linear(self.fc_input_dim,
num_classes,
act='softmax',
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(
-stdv, stdv)))
def forward(self, inputs):
x = self.conv(inputs)
x = self.pool(x)
for block in self.blocks:
x = block(x)
x = self.global_pool(x)
x = fluid.layers.reshape(x, shape=[-1, self.fc_input_dim])
x = self.fc(x)
return x
def make_optimizer(parameter_list=None):
total_images = 1281167
base_lr = 0.1
momentum = 0.9
l2_decay = 1e-4
step_per_epoch = int(math.floor(float(total_images) / FLAGS.batch_size))
boundaries = [step_per_epoch * e for e in [30, 60, 80]]
lr = [base_lr * (0.1**i) for i in range(len(boundaries) + 1)]
optimizer = fluid.optimizer.Momentum(
learning_rate=fluid.layers.piecewise_decay(
boundaries=boundaries, values=lr),
momentum=momentum,
regularization=fluid.regularizer.L2Decay(l2_decay),
parameter_list=parameter_list)
return optimizer
def accuracy(pred, label, topk=(1, )):
maxk = max(topk)
pred = np.argsort(pred)[:, ::-1][:, :maxk]
correct = (pred == np.repeat(label, maxk, 1))
batch_size = label.shape[0]
res = []
for k in topk:
correct_k = correct[:, :k].sum()
res.append(100.0 * correct_k / batch_size)
return res
def run(model, loader, mode='train'):
total_loss = 0.0
total_acc1 = 0.0
total_acc5 = 0.0
num_steps = 0
device_ids = list(range(FLAGS.num_devices))
for idx, batch in enumerate(loader()):
outputs, losses = getattr(model, mode)(
batch[0], batch[1], device='gpu', device_ids=device_ids)
top1, top5 = accuracy(outputs[0], batch[1], topk=(1, 5))
total_loss += np.sum(losses)
total_acc1 += top1
total_acc5 += top5
num_steps += 1
if idx % 10 == 0:
print("{:04d}: loss {:0.3f} top1: {:0.3f}% top5: {:0.3f}%".format(
idx, total_loss / num_steps,
total_acc1 / num_steps, total_acc5 / num_steps))
num_steps += 1
def main():
@contextlib.contextmanager
def null_guard():
yield
epoch = FLAGS.epoch
batch_size = FLAGS.batch_size
if FLAGS.dynamic:
guard = fluid.dygraph.guard()
else:
guard = null_guard()
train_dir = os.path.join(FLAGS.data, 'train')
val_dir = os.path.join(FLAGS.data, 'val')
train_loader = fluid.io.xmap_readers(
lambda batch: (np.array([b[0] for b in batch]),
np.array([b[1] for b in batch]).reshape(-1, 1)),
paddle.batch(
fluid.io.xmap_readers(
compose([cv2.imread, random_crop_resize, random_flip,
normalize_permute]),
image_folder(train_dir, shuffle=True),
process_num=8,
buffer_size=4 * batch_size),
batch_size=batch_size,
drop_last=True),
process_num=2, buffer_size=4)
val_loader = fluid.io.xmap_readers(
lambda batch: (np.array([b[0] for b in batch]),
np.array([b[1] for b in batch]).reshape(-1, 1)),
paddle.batch(
fluid.io.xmap_readers(
compose([cv2.imread, center_crop_resize, normalize_permute]),
image_folder(val_dir),
process_num=8,
buffer_size=4 * batch_size),
batch_size=batch_size),
process_num=2, buffer_size=4)
if not os.path.exists('checkpoints'):
os.mkdir('checkpoints')
with guard:
model = ResNet()
sgd = make_optimizer(parameter_list=model.parameters())
model.prepare(sgd, 'cross_entropy')
for e in range(epoch):
print("======== train epoch {} ========".format(e))
run(model, train_loader)
model.save('checkpoints/{:02d}'.format(e))
print("======== eval epoch {} ========".format(e))
run(model, val_loader, mode='eval')
if __name__ == '__main__':
parser = argparse.ArgumentParser("Resnet Training on ImageNet")
parser.add_argument('data', metavar='DIR', help='path to dataset '
'(should have subdirectories named "train" and "val"')
parser.add_argument(
"-e", "--epoch", default=90, type=int, help="number of epoch")
parser.add_argument(
"-b", "--batch_size", default=512, type=int, help="batch size")
parser.add_argument(
"-n", "--num_devices", default=4, type=int, help="number of devices")
parser.add_argument(
"-d", "--dynamic", action='store_true', help="enable dygraph mode")
FLAGS = parser.parse_args()
main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册