mv mnist to examples

d61339cc · LielinJiang · 701a823e · d61339cc · 701a823e
隐藏空白更改
内联并排

Showing with 94 addition and 404 deletion

examples/handwritten_number_recognition/mnist.py examples/handwritten_number_recognition/mnist.py +94 -0

resnet.py resnet.py +0 -404

未找到文件。
--- a/mnist.py
+++ b/mnist.py
@@ -16,94 +16,15 @@ from __future__ import division
 from __future__ import print_function

 import argparse
-import contextlib
-import os
-
-import numpy as np

 from paddle import fluid
 from paddle.fluid.optimizer import Momentum
-from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear
 from hapi.datasets.mnist import MNIST as MnistDataset

-from hapi.model import Model, Input, set_device
+from hapi.model import Input, set_device
 from hapi.loss import CrossEntropy
 from hapi.metrics import Accuracy
-
-
-class SimpleImgConvPool(fluid.dygraph.Layer):
-    def __init__(self,
-                 num_channels,
-                 num_filters,
-                 filter_size,
-                 pool_size,
-                 pool_stride,
-                 pool_padding=0,
-                 pool_type='max',
-                 global_pooling=False,
-                 conv_stride=1,
-                 conv_padding=0,
-                 conv_dilation=1,
-                 conv_groups=None,
-                 act=None,
-                 use_cudnn=False,
-                 param_attr=None,
-                 bias_attr=None):
-        super(SimpleImgConvPool, self).__init__('SimpleConv')
-
-        self._conv2d = Conv2D(
-            num_channels=num_channels,
-            num_filters=num_filters,
-            filter_size=filter_size,
-            stride=conv_stride,
-            padding=conv_padding,
-            dilation=conv_dilation,
-            groups=conv_groups,
-            param_attr=None,
-            bias_attr=None,
-            use_cudnn=use_cudnn)
-
-        self._pool2d = Pool2D(
-            pool_size=pool_size,
-            pool_type=pool_type,
-            pool_stride=pool_stride,
-            pool_padding=pool_padding,
-            global_pooling=global_pooling,
-            use_cudnn=use_cudnn)
-
-    def forward(self, inputs):
-        x = self._conv2d(inputs)
-        x = self._pool2d(x)
-        return x
-
-
-class MNIST(Model):
-    def __init__(self):
-        super(MNIST, self).__init__()
-        self._simple_img_conv_pool_1 = SimpleImgConvPool(
-            1, 20, 5, 2, 2, act="relu")
-
-        self._simple_img_conv_pool_2 = SimpleImgConvPool(
-            20, 50, 5, 2, 2, act="relu")
-
-        pool_2_shape = 50 * 4 * 4
-        SIZE = 10
-        scale = (2.0 / (pool_2_shape**2 * SIZE))**0.5
-        self._fc = Linear(
-            800,
-            10,
-            param_attr=fluid.param_attr.ParamAttr(
-                initializer=fluid.initializer.NormalInitializer(
-                    loc=0.0, scale=scale)),
-            act="softmax")
-
-    def forward(self, inputs):
-        inputs = fluid.layers.reshape(inputs, [-1, 1, 28, 28])
-        x = self._simple_img_conv_pool_1(inputs)
-        x = self._simple_img_conv_pool_2(x)
-        x = fluid.layers.flatten(x, axis=1)
-        x = self._fc(x)
-        return x
+from hapi.vision.models import LeNet


 def main():
@@ -113,10 +34,10 @@ def main():
    train_dataset = MnistDataset(mode='train')
    val_dataset = MnistDataset(mode='test')

-    inputs = [Input([None, 784], 'float32', name='image')]
+    inputs = [Input([None, 1, 28, 28], 'float32', name='image')]
    labels = [Input([None, 1], 'int64', name='label')]

-    model = MNIST()
+    model = LeNet()
    optim = Momentum(
        learning_rate=FLAGS.lr, momentum=.9, parameter_list=model.parameters())

@@ -127,9 +48,14 @@ def main():
        inputs,
        labels,
        device=FLAGS.device)
+
    if FLAGS.resume is not None:
        model.load(FLAGS.resume)

+    if FLAGS.eval_only:
+        model.evaluate(val_dataset, batch_size=FLAGS.batch_size)
+        return
+
    model.fit(train_dataset,
              val_dataset,
              epochs=FLAGS.epoch,
@@ -144,7 +70,7 @@ if __name__ == '__main__':
    parser.add_argument(
        "-d", "--dynamic", action='store_true', help="enable dygraph mode")
    parser.add_argument(
-        "-e", "--epoch", default=2, type=int, help="number of epoch")
+        "-e", "--epoch", default=10, type=int, help="number of epoch")
    parser.add_argument(
        '--lr',
        '--learning-rate',
@@ -155,12 +81,14 @@ if __name__ == '__main__':
    parser.add_argument(
        "-b", "--batch_size", default=128, type=int, help="batch size")
    parser.add_argument(
-        "-n", "--num_devices", default=1, type=int, help="number of devices")
+        "--output-dir", type=str, default='output', help="checkpoint save dir")
    parser.add_argument(
        "-r",
        "--resume",
        default=None,
        type=str,
        help="checkpoint path to resume")
+    parser.add_argument(
+        "--eval-only", action='store_true', help="only evaluate the model")
    FLAGS = parser.parse_args()
    main()
--- a/resnet.py
+++ b/resnet.py
-# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import division
-from __future__ import print_function
-
-import argparse
-import contextlib
-import math
-import os
-import random
-import time
-
-import cv2
-import numpy as np
-
-import paddle
-import paddle.fluid as fluid
-from paddle.fluid.layer_helper import LayerHelper
-from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear
-from paddle.fluid.dygraph.container import Sequential
-
-from model import Model, CrossEntropy
-
-
-class ConvBNLayer(fluid.dygraph.Layer):
-    def __init__(self,
-                 num_channels,
-                 num_filters,
-                 filter_size,
-                 stride=1,
-                 groups=1,
-                 act=None):
-        super(ConvBNLayer, self).__init__()
-
-        self._conv = Conv2D(
-            num_channels=num_channels,
-            num_filters=num_filters,
-            filter_size=filter_size,
-            stride=stride,
-            padding=(filter_size - 1) // 2,
-            groups=groups,
-            act=None,
-            bias_attr=False)
-
-        self._batch_norm = BatchNorm(num_filters, act=act)
-
-    def forward(self, inputs):
-        x = self._conv(inputs)
-        x = self._batch_norm(x)
-
-        return x
-
-
-class BottleneckBlock(fluid.dygraph.Layer):
-    def __init__(self,
-                 num_channels,
-                 num_filters,
-                 stride,
-                 shortcut=True):
-        super(BottleneckBlock, self).__init__()
-
-        self.conv0 = ConvBNLayer(
-            num_channels=num_channels,
-            num_filters=num_filters,
-            filter_size=1,
-            act='relu')
-        self.conv1 = ConvBNLayer(
-            num_channels=num_filters,
-            num_filters=num_filters,
-            filter_size=3,
-            stride=stride,
-            act='relu')
-        self.conv2 = ConvBNLayer(
-            num_channels=num_filters,
-            num_filters=num_filters * 4,
-            filter_size=1,
-            act=None)
-
-        if not shortcut:
-            self.short = ConvBNLayer(
-                num_channels=num_channels,
-                num_filters=num_filters * 4,
-                filter_size=1,
-                stride=stride)
-
-        self.shortcut = shortcut
-
-        self._num_channels_out = num_filters * 4
-
-    def forward(self, inputs):
-        x = self.conv0(inputs)
-        conv1 = self.conv1(x)
-        conv2 = self.conv2(conv1)
-
-        if self.shortcut:
-            short = inputs
-        else:
-            short = self.short(inputs)
-
-        x = fluid.layers.elementwise_add(x=short, y=conv2)
-
-        layer_helper = LayerHelper(self.full_name(), act='relu')
-        return layer_helper.append_activation(x)
-
-
-class ResNet(Model):
-    def __init__(self, depth=50, num_classes=1000):
-        super(ResNet, self).__init__()
-
-        layer_config = {
-            50: [3, 4, 6, 3],
-            101: [3, 4, 23, 3],
-            152: [3, 8, 36, 3],
-        }
-        assert depth in layer_config.keys(), \
-            "supported depth are {} but input layer is {}".format(
-                layer_config.keys(), depth)
-
-        layers = layer_config[depth]
-        num_in = [64, 256, 512, 1024]
-        num_out = [64, 128, 256, 512]
-
-        self.conv = ConvBNLayer(
-            num_channels=3,
-            num_filters=64,
-            filter_size=7,
-            stride=2,
-            act='relu')
-        self.pool = Pool2D(
-            pool_size=3,
-            pool_stride=2,
-            pool_padding=1,
-            pool_type='max')
-
-        self.layers = []
-        for idx, num_blocks in enumerate(layers):
-            blocks = []
-            shortcut = False
-            for b in range(num_blocks):
-                block = BottleneckBlock(
-                    num_channels=num_in[idx] if b == 0 else num_out[idx] * 4,
-                    num_filters=num_out[idx],
-                    stride=2 if b == 0 and idx != 0 else 1,
-                    shortcut=shortcut)
-                blocks.append(block)
-                shortcut = True
-            layer = self.add_sublayer(
-                "layer_{}".format(idx),
-                Sequential(*blocks))
-            self.layers.append(layer)
-
-        self.global_pool = Pool2D(
-            pool_size=7, pool_type='avg', global_pooling=True)
-
-        stdv = 1.0 / math.sqrt(2048 * 1.0)
-        self.fc_input_dim = num_out[-1] * 4 * 1 * 1
-        self.fc = Linear(self.fc_input_dim,
-                         num_classes,
-                         act='softmax',
-                         param_attr=fluid.param_attr.ParamAttr(
-                             initializer=fluid.initializer.Uniform(
-                                 -stdv, stdv)))
-
-    def forward(self, inputs):
-        x = self.conv(inputs)
-        x = self.pool(x)
-        for layer in self.layers:
-            x = layer(x)
-        x = self.global_pool(x)
-        x = fluid.layers.reshape(x, shape=[-1, self.fc_input_dim])
-        x = self.fc(x)
-        return x
-
-
-def make_optimizer(parameter_list=None):
-    total_images = 1281167
-    base_lr = FLAGS.lr
-    momentum = 0.9
-    weight_decay = 1e-4
-    step_per_epoch = int(math.floor(float(total_images) / FLAGS.batch_size))
-    boundaries = [step_per_epoch * e for e in [30, 60, 80]]
-    values = [base_lr * (0.1**i) for i in range(len(boundaries) + 1)]
-    learning_rate = fluid.layers.piecewise_decay(
-        boundaries=boundaries, values=values)
-    learning_rate = fluid.layers.linear_lr_warmup(
-        learning_rate=learning_rate,
-        warmup_steps=5 * step_per_epoch,
-        start_lr=0.,
-        end_lr=base_lr)
-    optimizer = fluid.optimizer.Momentum(
-        learning_rate=learning_rate,
-        momentum=momentum,
-        regularization=fluid.regularizer.L2Decay(weight_decay),
-        parameter_list=parameter_list)
-    return optimizer
-
-
-def accuracy(pred, label, topk=(1, )):
-    maxk = max(topk)
-    pred = np.argsort(pred)[:, ::-1][:, :maxk]
-    correct = (pred == np.repeat(label, maxk, 1))
-
-    batch_size = label.shape[0]
-    res = []
-    for k in topk:
-        correct_k = correct[:, :k].sum()
-        res.append(100.0 * correct_k / batch_size)
-    return res
-
-
-def center_crop_resize(img):
-    h, w = img.shape[:2]
-    c = int(224 / 256 * min((h, w)))
-    i = (h + 1 - c) // 2
-    j = (w + 1 - c) // 2
-    img = img[i: i + c, j: j + c, :]
-    return cv2.resize(img, (224, 224), 0, 0, cv2.INTER_LINEAR)
-
-
-def random_crop_resize(img):
-    height, width = img.shape[:2]
-    area = height * width
-
-    for attempt in range(10):
-        target_area = random.uniform(0.08, 1.) * area
-        log_ratio = (math.log(3 / 4), math.log(4 / 3))
-        aspect_ratio = math.exp(random.uniform(*log_ratio))
-
-        w = int(round(math.sqrt(target_area * aspect_ratio)))
-        h = int(round(math.sqrt(target_area / aspect_ratio)))
-
-        if w <= width and h <= height:
-            i = random.randint(0, height - h)
-            j = random.randint(0, width - w)
-            img = img[i: i + h, j: j + w, :]
-            return cv2.resize(img, (224, 224), 0, 0, cv2.INTER_LINEAR)
-
-    return center_crop_resize(img)
-
-
-def random_flip(img):
-    return img[:, ::-1, :]
-
-
-def normalize_permute(img):
-    # transpose and convert to RGB from BGR
-    img = img.astype(np.float32).transpose((2, 0, 1))[::-1, ...]
-    mean = np.array([123.675, 116.28, 103.53], dtype=np.float32)
-    std = np.array([58.395, 57.120, 57.375], dtype=np.float32)
-    invstd = 1. / std
-    for v, m, s in zip(img, mean, invstd):
-        v.__isub__(m).__imul__(s)
-    return img
-
-
-def compose(functions):
-    def process(sample):
-        img, label = sample
-        for fn in functions:
-            img = fn(img)
-        return img, label
-    return process
-
-
-def image_folder(path, shuffle=False):
-    valid_ext = ('.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.webp')
-    classes = [d for d in os.listdir(path) if
-               os.path.isdir(os.path.join(path, d))]
-    classes.sort()
-    class_map = {cls: idx for idx, cls in enumerate(classes)}
-    samples = []
-    for dir in sorted(class_map.keys()):
-        d = os.path.join(path, dir)
-        for root, _, fnames in sorted(os.walk(d)):
-            for fname in sorted(fnames):
-                p = os.path.join(root, fname)
-                if os.path.splitext(p)[1].lower() in valid_ext:
-                    samples.append((p, class_map[dir]))
-
-    def iterator():
-        if shuffle:
-            random.shuffle(samples)
-        for s in samples:
-            yield s
-
-    return iterator
-
-
-def run(model, loader, mode='train'):
-    total_loss = 0.
-    total_acc1 = 0.
-    total_acc5 = 0.
-    total_time = 0.
-    start = time.time()
-    device_ids = list(range(FLAGS.num_devices))
-    start = time.time()
-
-    for idx, batch in enumerate(loader()):
-        outputs, losses = getattr(model, mode)(
-            batch[0], batch[1], device='gpu', device_ids=device_ids)
-        top1, top5 = accuracy(outputs[0], batch[1], topk=(1, 5))
-
-        total_loss += np.sum(losses)
-        total_acc1 += top1
-        total_acc5 += top5
-        if idx > 1:  # skip first two steps
-            total_time += time.time() - start
-        if idx % 10 == 0:
-            print(("{:04d} loss: {:0.3f} top1: {:0.3f}% top5: {:0.3f}% "
-                   "time: {:0.3f}").format(
-                       idx, total_loss / (idx + 1), total_acc1 / (idx + 1),
-                       total_acc5 / (idx + 1), total_time / max(1, (idx - 1))))
-        start = time.time()
-
-
-def main():
-    @contextlib.contextmanager
-    def null_guard():
-        yield
-
-    epoch = FLAGS.epoch
-    batch_size = FLAGS.batch_size
-    guard = fluid.dygraph.guard() if FLAGS.dynamic else null_guard()
-
-    train_dir = os.path.join(FLAGS.data, 'train')
-    val_dir = os.path.join(FLAGS.data, 'val')
-
-    train_loader = fluid.io.xmap_readers(
-        lambda batch: (np.array([b[0] for b in batch]),
-                       np.array([b[1] for b in batch]).reshape(-1, 1)),
-        paddle.batch(
-            fluid.io.xmap_readers(
-                compose([cv2.imread, random_crop_resize, random_flip,
-                         normalize_permute]),
-                image_folder(train_dir, shuffle=True),
-                process_num=8,
-                buffer_size=4 * batch_size),
-            batch_size=batch_size,
-            drop_last=True),
-        process_num=2, buffer_size=4)
-
-    val_loader = fluid.io.xmap_readers(
-        lambda batch: (np.array([b[0] for b in batch]),
-                       np.array([b[1] for b in batch]).reshape(-1, 1)),
-        paddle.batch(
-            fluid.io.xmap_readers(
-                compose([cv2.imread, center_crop_resize, normalize_permute]),
-                image_folder(val_dir),
-                process_num=8,
-                buffer_size=4 * batch_size),
-            batch_size=batch_size),
-        process_num=2, buffer_size=4)
-
-    if not os.path.exists('resnet_checkpoints'):
-        os.mkdir('resnet_checkpoints')
-
-    with guard:
-        model = ResNet()
-        optim = make_optimizer(parameter_list=model.parameters())
-        model.prepare(optim, CrossEntropy())
-        if FLAGS.resume is not None:
-            model.load(FLAGS.resume)
-
-        for e in range(epoch):
-            print("======== train epoch {} ========".format(e))
-            run(model, train_loader)
-            model.save('resnet_checkpoints/{:02d}'.format(e))
-            print("======== eval epoch {} ========".format(e))
-            run(model, val_loader, mode='eval')
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser("Resnet Training on ImageNet")
-    parser.add_argument('data', metavar='DIR', help='path to dataset '
-                        '(should have subdirectories named "train" and "val"')
-    parser.add_argument(
-        "-d", "--dynamic", action='store_true', help="enable dygraph mode")
-    parser.add_argument(
-        "-e", "--epoch", default=90, type=int, help="number of epoch")
-    parser.add_argument(
-        '--lr', '--learning-rate', default=0.1, type=float, metavar='LR',
-        help='initial learning rate')
-    parser.add_argument(
-        "-b", "--batch_size", default=256, type=int, help="batch size")
-    parser.add_argument(
-        "-n", "--num_devices", default=4, type=int, help="number of devices")
-    parser.add_argument(
-        "-r", "--resume", default=None, type=str,
-        help="checkpoint path to resume")
-    FLAGS = parser.parse_args()
-    assert FLAGS.data, "error: must provide data path"
-    main()