未验证 提交 c4289fd7 编写于 作者: L LielinJiang 提交者: GitHub

Merge pull request #74 from LielinJiang/move-mnist-to-example

Move mnist to examples
# MNIST
当我们学习编程的时候,编写的第一个程序一般是实现打印"Hello World"。而机器学习(或深度学习)的入门教程,一般都是 MNIST 数据库上的手写识别问题。原因是手写识别属于典型的图像分类问题,比较简单,同时MNIST数据集也很完备。
本页将介绍如何使用PaddlePaddle高级API(hapi)实现MNIST,包括[安装](#installation)[训练](#training-a-model)[输出](#log)[参数保存](#save)[模型评估](#evaluation)
## 安装
在当前目录下运行样例代码需要PadddlePaddle的v2.0.0或以上的版本。如果你的运行环境中的PaddlePaddle低于此版本,请根据安装文档中的说明来更新PaddlePaddle。
## 训练
可以通过如下的方式启动训练:
```
python mnist.py
```
上面的方式默认使用的静态图模式,切换动态图模式训练可以加```--dynamic```
```
python mnist.py --dynamic
```
多卡进行模型训练,启动训练的方式:
```
python -m paddle.distributed.launch mnist.py
```
## 输出
执行训练开始后,将得到类似如下的输出。
```
Epoch 1/10
step 10/469 - loss: 2.4547 - acc_top1: 0.1273 - acc_top2: 0.2305 - 94ms/step
step 20/469 - loss: 1.2578 - acc_top1: 0.3063 - acc_top2: 0.4316 - 48ms/step
step 30/469 - loss: 0.7918 - acc_top1: 0.4344 - acc_top2: 0.5638 - 33ms/step
step 40/469 - loss: 0.6947 - acc_top1: 0.5148 - acc_top2: 0.6412 - 25ms/step
step 50/469 - loss: 0.5452 - acc_top1: 0.5731 - acc_top2: 0.6959 - 20ms/step
step 60/469 - loss: 0.4184 - acc_top1: 0.6133 - acc_top2: 0.7314 - 17ms/step
step 70/469 - loss: 0.5143 - acc_top1: 0.6423 - acc_top2: 0.7595 - 15ms/step
step 80/469 - loss: 0.5688 - acc_top1: 0.6658 - acc_top2: 0.7808 - 13ms/step
...
```
## 参数保存
训练好的模型默认会保存在```mnist_checkpoint/```文件加下,可以通过```--output-dir```命令来指定你想要保存的文件夹位置。
## 模型评估
执行如下命令进行评估,```--resume```后面指定训练好的模型路径
```
python mnist.py --resume mnist_checkpoint/final.pdparams --eval-only
```
切换动态图模式评估:
```
python mnist.py --resume mnist_checkpoint/final.pdparams --eval-only --dynamic
```
多卡评估
```
python -m paddle.distributed.launch mnist.py --resume mnist_checkpoint/final.pdparams --eval-only
```
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......@@ -16,94 +16,15 @@ from __future__ import division
from __future__ import print_function
import argparse
import contextlib
import os
import numpy as np
from paddle import fluid
from paddle.fluid.optimizer import Momentum
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear
from hapi.datasets.mnist import MNIST as MnistDataset
from hapi.model import Model, Input, set_device
from hapi.model import Input, set_device
from hapi.loss import CrossEntropy
from hapi.metrics import Accuracy
class SimpleImgConvPool(fluid.dygraph.Layer):
def __init__(self,
num_channels,
num_filters,
filter_size,
pool_size,
pool_stride,
pool_padding=0,
pool_type='max',
global_pooling=False,
conv_stride=1,
conv_padding=0,
conv_dilation=1,
conv_groups=None,
act=None,
use_cudnn=False,
param_attr=None,
bias_attr=None):
super(SimpleImgConvPool, self).__init__('SimpleConv')
self._conv2d = Conv2D(
num_channels=num_channels,
num_filters=num_filters,
filter_size=filter_size,
stride=conv_stride,
padding=conv_padding,
dilation=conv_dilation,
groups=conv_groups,
param_attr=None,
bias_attr=None,
use_cudnn=use_cudnn)
self._pool2d = Pool2D(
pool_size=pool_size,
pool_type=pool_type,
pool_stride=pool_stride,
pool_padding=pool_padding,
global_pooling=global_pooling,
use_cudnn=use_cudnn)
def forward(self, inputs):
x = self._conv2d(inputs)
x = self._pool2d(x)
return x
class MNIST(Model):
def __init__(self):
super(MNIST, self).__init__()
self._simple_img_conv_pool_1 = SimpleImgConvPool(
1, 20, 5, 2, 2, act="relu")
self._simple_img_conv_pool_2 = SimpleImgConvPool(
20, 50, 5, 2, 2, act="relu")
pool_2_shape = 50 * 4 * 4
SIZE = 10
scale = (2.0 / (pool_2_shape**2 * SIZE))**0.5
self._fc = Linear(
800,
10,
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.NormalInitializer(
loc=0.0, scale=scale)),
act="softmax")
def forward(self, inputs):
inputs = fluid.layers.reshape(inputs, [-1, 1, 28, 28])
x = self._simple_img_conv_pool_1(inputs)
x = self._simple_img_conv_pool_2(x)
x = fluid.layers.flatten(x, axis=1)
x = self._fc(x)
return x
from hapi.vision.models import LeNet
def main():
......@@ -113,10 +34,10 @@ def main():
train_dataset = MnistDataset(mode='train')
val_dataset = MnistDataset(mode='test')
inputs = [Input([None, 784], 'float32', name='image')]
inputs = [Input([None, 1, 28, 28], 'float32', name='image')]
labels = [Input([None, 1], 'int64', name='label')]
model = MNIST()
model = LeNet()
optim = Momentum(
learning_rate=FLAGS.lr, momentum=.9, parameter_list=model.parameters())
......@@ -127,14 +48,19 @@ def main():
inputs,
labels,
device=FLAGS.device)
if FLAGS.resume is not None:
model.load(FLAGS.resume)
if FLAGS.eval_only:
model.evaluate(val_dataset, batch_size=FLAGS.batch_size)
return
model.fit(train_dataset,
val_dataset,
epochs=FLAGS.epoch,
batch_size=FLAGS.batch_size,
save_dir='mnist_checkpoint')
save_dir=FLAGS.output_dir)
if __name__ == '__main__':
......@@ -144,7 +70,7 @@ if __name__ == '__main__':
parser.add_argument(
"-d", "--dynamic", action='store_true', help="enable dygraph mode")
parser.add_argument(
"-e", "--epoch", default=2, type=int, help="number of epoch")
"-e", "--epoch", default=10, type=int, help="number of epoch")
parser.add_argument(
'--lr',
'--learning-rate',
......@@ -155,12 +81,17 @@ if __name__ == '__main__':
parser.add_argument(
"-b", "--batch_size", default=128, type=int, help="batch size")
parser.add_argument(
"-n", "--num_devices", default=1, type=int, help="number of devices")
"--output-dir",
type=str,
default='mnist_checkpoint',
help="checkpoint save dir")
parser.add_argument(
"-r",
"--resume",
default=None,
type=str,
help="checkpoint path to resume")
parser.add_argument(
"--eval-only", action='store_true', help="only evaluate the model")
FLAGS = parser.parse_args()
main()
......@@ -311,7 +311,8 @@ class ProgBarLogger(Callback):
def on_eval_end(self, logs=None):
logs = logs or {}
if self._is_print() and (self.steps is not None):
if self._is_print() and (self.eval_steps is not None):
self._updates(logs, 'eval')
print('Eval samples: %d' % (self.evaled_samples))
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import division
from __future__ import print_function
import argparse
import contextlib
import math
import os
import random
import time
import cv2
import numpy as np
import paddle
import paddle.fluid as fluid
from paddle.fluid.layer_helper import LayerHelper
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear
from paddle.fluid.dygraph.container import Sequential
from model import Model, CrossEntropy
class ConvBNLayer(fluid.dygraph.Layer):
def __init__(self,
num_channels,
num_filters,
filter_size,
stride=1,
groups=1,
act=None):
super(ConvBNLayer, self).__init__()
self._conv = Conv2D(
num_channels=num_channels,
num_filters=num_filters,
filter_size=filter_size,
stride=stride,
padding=(filter_size - 1) // 2,
groups=groups,
act=None,
bias_attr=False)
self._batch_norm = BatchNorm(num_filters, act=act)
def forward(self, inputs):
x = self._conv(inputs)
x = self._batch_norm(x)
return x
class BottleneckBlock(fluid.dygraph.Layer):
def __init__(self,
num_channels,
num_filters,
stride,
shortcut=True):
super(BottleneckBlock, self).__init__()
self.conv0 = ConvBNLayer(
num_channels=num_channels,
num_filters=num_filters,
filter_size=1,
act='relu')
self.conv1 = ConvBNLayer(
num_channels=num_filters,
num_filters=num_filters,
filter_size=3,
stride=stride,
act='relu')
self.conv2 = ConvBNLayer(
num_channels=num_filters,
num_filters=num_filters * 4,
filter_size=1,
act=None)
if not shortcut:
self.short = ConvBNLayer(
num_channels=num_channels,
num_filters=num_filters * 4,
filter_size=1,
stride=stride)
self.shortcut = shortcut
self._num_channels_out = num_filters * 4
def forward(self, inputs):
x = self.conv0(inputs)
conv1 = self.conv1(x)
conv2 = self.conv2(conv1)
if self.shortcut:
short = inputs
else:
short = self.short(inputs)
x = fluid.layers.elementwise_add(x=short, y=conv2)
layer_helper = LayerHelper(self.full_name(), act='relu')
return layer_helper.append_activation(x)
class ResNet(Model):
def __init__(self, depth=50, num_classes=1000):
super(ResNet, self).__init__()
layer_config = {
50: [3, 4, 6, 3],
101: [3, 4, 23, 3],
152: [3, 8, 36, 3],
}
assert depth in layer_config.keys(), \
"supported depth are {} but input layer is {}".format(
layer_config.keys(), depth)
layers = layer_config[depth]
num_in = [64, 256, 512, 1024]
num_out = [64, 128, 256, 512]
self.conv = ConvBNLayer(
num_channels=3,
num_filters=64,
filter_size=7,
stride=2,
act='relu')
self.pool = Pool2D(
pool_size=3,
pool_stride=2,
pool_padding=1,
pool_type='max')
self.layers = []
for idx, num_blocks in enumerate(layers):
blocks = []
shortcut = False
for b in range(num_blocks):
block = BottleneckBlock(
num_channels=num_in[idx] if b == 0 else num_out[idx] * 4,
num_filters=num_out[idx],
stride=2 if b == 0 and idx != 0 else 1,
shortcut=shortcut)
blocks.append(block)
shortcut = True
layer = self.add_sublayer(
"layer_{}".format(idx),
Sequential(*blocks))
self.layers.append(layer)
self.global_pool = Pool2D(
pool_size=7, pool_type='avg', global_pooling=True)
stdv = 1.0 / math.sqrt(2048 * 1.0)
self.fc_input_dim = num_out[-1] * 4 * 1 * 1
self.fc = Linear(self.fc_input_dim,
num_classes,
act='softmax',
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(
-stdv, stdv)))
def forward(self, inputs):
x = self.conv(inputs)
x = self.pool(x)
for layer in self.layers:
x = layer(x)
x = self.global_pool(x)
x = fluid.layers.reshape(x, shape=[-1, self.fc_input_dim])
x = self.fc(x)
return x
def make_optimizer(parameter_list=None):
total_images = 1281167
base_lr = FLAGS.lr
momentum = 0.9
weight_decay = 1e-4
step_per_epoch = int(math.floor(float(total_images) / FLAGS.batch_size))
boundaries = [step_per_epoch * e for e in [30, 60, 80]]
values = [base_lr * (0.1**i) for i in range(len(boundaries) + 1)]
learning_rate = fluid.layers.piecewise_decay(
boundaries=boundaries, values=values)
learning_rate = fluid.layers.linear_lr_warmup(
learning_rate=learning_rate,
warmup_steps=5 * step_per_epoch,
start_lr=0.,
end_lr=base_lr)
optimizer = fluid.optimizer.Momentum(
learning_rate=learning_rate,
momentum=momentum,
regularization=fluid.regularizer.L2Decay(weight_decay),
parameter_list=parameter_list)
return optimizer
def accuracy(pred, label, topk=(1, )):
maxk = max(topk)
pred = np.argsort(pred)[:, ::-1][:, :maxk]
correct = (pred == np.repeat(label, maxk, 1))
batch_size = label.shape[0]
res = []
for k in topk:
correct_k = correct[:, :k].sum()
res.append(100.0 * correct_k / batch_size)
return res
def center_crop_resize(img):
h, w = img.shape[:2]
c = int(224 / 256 * min((h, w)))
i = (h + 1 - c) // 2
j = (w + 1 - c) // 2
img = img[i: i + c, j: j + c, :]
return cv2.resize(img, (224, 224), 0, 0, cv2.INTER_LINEAR)
def random_crop_resize(img):
height, width = img.shape[:2]
area = height * width
for attempt in range(10):
target_area = random.uniform(0.08, 1.) * area
log_ratio = (math.log(3 / 4), math.log(4 / 3))
aspect_ratio = math.exp(random.uniform(*log_ratio))
w = int(round(math.sqrt(target_area * aspect_ratio)))
h = int(round(math.sqrt(target_area / aspect_ratio)))
if w <= width and h <= height:
i = random.randint(0, height - h)
j = random.randint(0, width - w)
img = img[i: i + h, j: j + w, :]
return cv2.resize(img, (224, 224), 0, 0, cv2.INTER_LINEAR)
return center_crop_resize(img)
def random_flip(img):
return img[:, ::-1, :]
def normalize_permute(img):
# transpose and convert to RGB from BGR
img = img.astype(np.float32).transpose((2, 0, 1))[::-1, ...]
mean = np.array([123.675, 116.28, 103.53], dtype=np.float32)
std = np.array([58.395, 57.120, 57.375], dtype=np.float32)
invstd = 1. / std
for v, m, s in zip(img, mean, invstd):
v.__isub__(m).__imul__(s)
return img
def compose(functions):
def process(sample):
img, label = sample
for fn in functions:
img = fn(img)
return img, label
return process
def image_folder(path, shuffle=False):
valid_ext = ('.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.webp')
classes = [d for d in os.listdir(path) if
os.path.isdir(os.path.join(path, d))]
classes.sort()
class_map = {cls: idx for idx, cls in enumerate(classes)}
samples = []
for dir in sorted(class_map.keys()):
d = os.path.join(path, dir)
for root, _, fnames in sorted(os.walk(d)):
for fname in sorted(fnames):
p = os.path.join(root, fname)
if os.path.splitext(p)[1].lower() in valid_ext:
samples.append((p, class_map[dir]))
def iterator():
if shuffle:
random.shuffle(samples)
for s in samples:
yield s
return iterator
def run(model, loader, mode='train'):
total_loss = 0.
total_acc1 = 0.
total_acc5 = 0.
total_time = 0.
start = time.time()
device_ids = list(range(FLAGS.num_devices))
start = time.time()
for idx, batch in enumerate(loader()):
outputs, losses = getattr(model, mode)(
batch[0], batch[1], device='gpu', device_ids=device_ids)
top1, top5 = accuracy(outputs[0], batch[1], topk=(1, 5))
total_loss += np.sum(losses)
total_acc1 += top1
total_acc5 += top5
if idx > 1: # skip first two steps
total_time += time.time() - start
if idx % 10 == 0:
print(("{:04d} loss: {:0.3f} top1: {:0.3f}% top5: {:0.3f}% "
"time: {:0.3f}").format(
idx, total_loss / (idx + 1), total_acc1 / (idx + 1),
total_acc5 / (idx + 1), total_time / max(1, (idx - 1))))
start = time.time()
def main():
@contextlib.contextmanager
def null_guard():
yield
epoch = FLAGS.epoch
batch_size = FLAGS.batch_size
guard = fluid.dygraph.guard() if FLAGS.dynamic else null_guard()
train_dir = os.path.join(FLAGS.data, 'train')
val_dir = os.path.join(FLAGS.data, 'val')
train_loader = fluid.io.xmap_readers(
lambda batch: (np.array([b[0] for b in batch]),
np.array([b[1] for b in batch]).reshape(-1, 1)),
paddle.batch(
fluid.io.xmap_readers(
compose([cv2.imread, random_crop_resize, random_flip,
normalize_permute]),
image_folder(train_dir, shuffle=True),
process_num=8,
buffer_size=4 * batch_size),
batch_size=batch_size,
drop_last=True),
process_num=2, buffer_size=4)
val_loader = fluid.io.xmap_readers(
lambda batch: (np.array([b[0] for b in batch]),
np.array([b[1] for b in batch]).reshape(-1, 1)),
paddle.batch(
fluid.io.xmap_readers(
compose([cv2.imread, center_crop_resize, normalize_permute]),
image_folder(val_dir),
process_num=8,
buffer_size=4 * batch_size),
batch_size=batch_size),
process_num=2, buffer_size=4)
if not os.path.exists('resnet_checkpoints'):
os.mkdir('resnet_checkpoints')
with guard:
model = ResNet()
optim = make_optimizer(parameter_list=model.parameters())
model.prepare(optim, CrossEntropy())
if FLAGS.resume is not None:
model.load(FLAGS.resume)
for e in range(epoch):
print("======== train epoch {} ========".format(e))
run(model, train_loader)
model.save('resnet_checkpoints/{:02d}'.format(e))
print("======== eval epoch {} ========".format(e))
run(model, val_loader, mode='eval')
if __name__ == '__main__':
parser = argparse.ArgumentParser("Resnet Training on ImageNet")
parser.add_argument('data', metavar='DIR', help='path to dataset '
'(should have subdirectories named "train" and "val"')
parser.add_argument(
"-d", "--dynamic", action='store_true', help="enable dygraph mode")
parser.add_argument(
"-e", "--epoch", default=90, type=int, help="number of epoch")
parser.add_argument(
'--lr', '--learning-rate', default=0.1, type=float, metavar='LR',
help='initial learning rate')
parser.add_argument(
"-b", "--batch_size", default=256, type=int, help="batch size")
parser.add_argument(
"-n", "--num_devices", default=4, type=int, help="number of devices")
parser.add_argument(
"-r", "--resume", default=None, type=str,
help="checkpoint path to resume")
FLAGS = parser.parse_args()
assert FLAGS.data, "error: must provide data path"
main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册