From d61339ccf8ee1f177614a12c93784f632c3fb6f7 Mon Sep 17 00:00:00 2001 From: LielinJiang Date: Thu, 7 May 2020 08:19:36 +0000 Subject: [PATCH] mv mnist to examples --- .../handwritten_number_recognition/mnist.py | 94 ++++ mnist.py | 166 ------- resnet.py | 404 ------------------ 3 files changed, 94 insertions(+), 570 deletions(-) create mode 100644 examples/handwritten_number_recognition/mnist.py delete mode 100644 mnist.py delete mode 100644 resnet.py diff --git a/examples/handwritten_number_recognition/mnist.py b/examples/handwritten_number_recognition/mnist.py new file mode 100644 index 0000000..3407849 --- /dev/null +++ b/examples/handwritten_number_recognition/mnist.py @@ -0,0 +1,94 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import division +from __future__ import print_function + +import argparse + +from paddle import fluid +from paddle.fluid.optimizer import Momentum +from hapi.datasets.mnist import MNIST as MnistDataset + +from hapi.model import Input, set_device +from hapi.loss import CrossEntropy +from hapi.metrics import Accuracy +from hapi.vision.models import LeNet + + +def main(): + device = set_device(FLAGS.device) + fluid.enable_dygraph(device) if FLAGS.dynamic else None + + train_dataset = MnistDataset(mode='train') + val_dataset = MnistDataset(mode='test') + + inputs = [Input([None, 1, 28, 28], 'float32', name='image')] + labels = [Input([None, 1], 'int64', name='label')] + + model = LeNet() + optim = Momentum( + learning_rate=FLAGS.lr, momentum=.9, parameter_list=model.parameters()) + + model.prepare( + optim, + CrossEntropy(), + Accuracy(topk=(1, 2)), + inputs, + labels, + device=FLAGS.device) + + if FLAGS.resume is not None: + model.load(FLAGS.resume) + + if FLAGS.eval_only: + model.evaluate(val_dataset, batch_size=FLAGS.batch_size) + return + + model.fit(train_dataset, + val_dataset, + epochs=FLAGS.epoch, + batch_size=FLAGS.batch_size, + save_dir='mnist_checkpoint') + + +if __name__ == '__main__': + parser = argparse.ArgumentParser("CNN training on MNIST") + parser.add_argument( + "--device", type=str, default='gpu', help="device to use, gpu or cpu") + parser.add_argument( + "-d", "--dynamic", action='store_true', help="enable dygraph mode") + parser.add_argument( + "-e", "--epoch", default=10, type=int, help="number of epoch") + parser.add_argument( + '--lr', + '--learning-rate', + default=1e-3, + type=float, + metavar='LR', + help='initial learning rate') + parser.add_argument( + "-b", "--batch_size", default=128, type=int, help="batch size") + parser.add_argument( + "--output-dir", type=str, default='output', help="checkpoint save dir") + parser.add_argument( + "-r", + "--resume", + default=None, + type=str, + help="checkpoint path to resume") + parser.add_argument( + "--eval-only", action='store_true', help="only evaluate the model") + FLAGS = parser.parse_args() + main() diff --git a/mnist.py b/mnist.py deleted file mode 100644 index 397c51e..0000000 --- a/mnist.py +++ /dev/null @@ -1,166 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import division -from __future__ import print_function - -import argparse -import contextlib -import os - -import numpy as np - -from paddle import fluid -from paddle.fluid.optimizer import Momentum -from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear -from hapi.datasets.mnist import MNIST as MnistDataset - -from hapi.model import Model, Input, set_device -from hapi.loss import CrossEntropy -from hapi.metrics import Accuracy - - -class SimpleImgConvPool(fluid.dygraph.Layer): - def __init__(self, - num_channels, - num_filters, - filter_size, - pool_size, - pool_stride, - pool_padding=0, - pool_type='max', - global_pooling=False, - conv_stride=1, - conv_padding=0, - conv_dilation=1, - conv_groups=None, - act=None, - use_cudnn=False, - param_attr=None, - bias_attr=None): - super(SimpleImgConvPool, self).__init__('SimpleConv') - - self._conv2d = Conv2D( - num_channels=num_channels, - num_filters=num_filters, - filter_size=filter_size, - stride=conv_stride, - padding=conv_padding, - dilation=conv_dilation, - groups=conv_groups, - param_attr=None, - bias_attr=None, - use_cudnn=use_cudnn) - - self._pool2d = Pool2D( - pool_size=pool_size, - pool_type=pool_type, - pool_stride=pool_stride, - pool_padding=pool_padding, - global_pooling=global_pooling, - use_cudnn=use_cudnn) - - def forward(self, inputs): - x = self._conv2d(inputs) - x = self._pool2d(x) - return x - - -class MNIST(Model): - def __init__(self): - super(MNIST, self).__init__() - self._simple_img_conv_pool_1 = SimpleImgConvPool( - 1, 20, 5, 2, 2, act="relu") - - self._simple_img_conv_pool_2 = SimpleImgConvPool( - 20, 50, 5, 2, 2, act="relu") - - pool_2_shape = 50 * 4 * 4 - SIZE = 10 - scale = (2.0 / (pool_2_shape**2 * SIZE))**0.5 - self._fc = Linear( - 800, - 10, - param_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.NormalInitializer( - loc=0.0, scale=scale)), - act="softmax") - - def forward(self, inputs): - inputs = fluid.layers.reshape(inputs, [-1, 1, 28, 28]) - x = self._simple_img_conv_pool_1(inputs) - x = self._simple_img_conv_pool_2(x) - x = fluid.layers.flatten(x, axis=1) - x = self._fc(x) - return x - - -def main(): - device = set_device(FLAGS.device) - fluid.enable_dygraph(device) if FLAGS.dynamic else None - - train_dataset = MnistDataset(mode='train') - val_dataset = MnistDataset(mode='test') - - inputs = [Input([None, 784], 'float32', name='image')] - labels = [Input([None, 1], 'int64', name='label')] - - model = MNIST() - optim = Momentum( - learning_rate=FLAGS.lr, momentum=.9, parameter_list=model.parameters()) - - model.prepare( - optim, - CrossEntropy(), - Accuracy(topk=(1, 2)), - inputs, - labels, - device=FLAGS.device) - if FLAGS.resume is not None: - model.load(FLAGS.resume) - - model.fit(train_dataset, - val_dataset, - epochs=FLAGS.epoch, - batch_size=FLAGS.batch_size, - save_dir='mnist_checkpoint') - - -if __name__ == '__main__': - parser = argparse.ArgumentParser("CNN training on MNIST") - parser.add_argument( - "--device", type=str, default='gpu', help="device to use, gpu or cpu") - parser.add_argument( - "-d", "--dynamic", action='store_true', help="enable dygraph mode") - parser.add_argument( - "-e", "--epoch", default=2, type=int, help="number of epoch") - parser.add_argument( - '--lr', - '--learning-rate', - default=1e-3, - type=float, - metavar='LR', - help='initial learning rate') - parser.add_argument( - "-b", "--batch_size", default=128, type=int, help="batch size") - parser.add_argument( - "-n", "--num_devices", default=1, type=int, help="number of devices") - parser.add_argument( - "-r", - "--resume", - default=None, - type=str, - help="checkpoint path to resume") - FLAGS = parser.parse_args() - main() diff --git a/resnet.py b/resnet.py deleted file mode 100644 index 4752d76..0000000 --- a/resnet.py +++ /dev/null @@ -1,404 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import division -from __future__ import print_function - -import argparse -import contextlib -import math -import os -import random -import time - -import cv2 -import numpy as np - -import paddle -import paddle.fluid as fluid -from paddle.fluid.layer_helper import LayerHelper -from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear -from paddle.fluid.dygraph.container import Sequential - -from model import Model, CrossEntropy - - -class ConvBNLayer(fluid.dygraph.Layer): - def __init__(self, - num_channels, - num_filters, - filter_size, - stride=1, - groups=1, - act=None): - super(ConvBNLayer, self).__init__() - - self._conv = Conv2D( - num_channels=num_channels, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - act=None, - bias_attr=False) - - self._batch_norm = BatchNorm(num_filters, act=act) - - def forward(self, inputs): - x = self._conv(inputs) - x = self._batch_norm(x) - - return x - - -class BottleneckBlock(fluid.dygraph.Layer): - def __init__(self, - num_channels, - num_filters, - stride, - shortcut=True): - super(BottleneckBlock, self).__init__() - - self.conv0 = ConvBNLayer( - num_channels=num_channels, - num_filters=num_filters, - filter_size=1, - act='relu') - self.conv1 = ConvBNLayer( - num_channels=num_filters, - num_filters=num_filters, - filter_size=3, - stride=stride, - act='relu') - self.conv2 = ConvBNLayer( - num_channels=num_filters, - num_filters=num_filters * 4, - filter_size=1, - act=None) - - if not shortcut: - self.short = ConvBNLayer( - num_channels=num_channels, - num_filters=num_filters * 4, - filter_size=1, - stride=stride) - - self.shortcut = shortcut - - self._num_channels_out = num_filters * 4 - - def forward(self, inputs): - x = self.conv0(inputs) - conv1 = self.conv1(x) - conv2 = self.conv2(conv1) - - if self.shortcut: - short = inputs - else: - short = self.short(inputs) - - x = fluid.layers.elementwise_add(x=short, y=conv2) - - layer_helper = LayerHelper(self.full_name(), act='relu') - return layer_helper.append_activation(x) - - -class ResNet(Model): - def __init__(self, depth=50, num_classes=1000): - super(ResNet, self).__init__() - - layer_config = { - 50: [3, 4, 6, 3], - 101: [3, 4, 23, 3], - 152: [3, 8, 36, 3], - } - assert depth in layer_config.keys(), \ - "supported depth are {} but input layer is {}".format( - layer_config.keys(), depth) - - layers = layer_config[depth] - num_in = [64, 256, 512, 1024] - num_out = [64, 128, 256, 512] - - self.conv = ConvBNLayer( - num_channels=3, - num_filters=64, - filter_size=7, - stride=2, - act='relu') - self.pool = Pool2D( - pool_size=3, - pool_stride=2, - pool_padding=1, - pool_type='max') - - self.layers = [] - for idx, num_blocks in enumerate(layers): - blocks = [] - shortcut = False - for b in range(num_blocks): - block = BottleneckBlock( - num_channels=num_in[idx] if b == 0 else num_out[idx] * 4, - num_filters=num_out[idx], - stride=2 if b == 0 and idx != 0 else 1, - shortcut=shortcut) - blocks.append(block) - shortcut = True - layer = self.add_sublayer( - "layer_{}".format(idx), - Sequential(*blocks)) - self.layers.append(layer) - - self.global_pool = Pool2D( - pool_size=7, pool_type='avg', global_pooling=True) - - stdv = 1.0 / math.sqrt(2048 * 1.0) - self.fc_input_dim = num_out[-1] * 4 * 1 * 1 - self.fc = Linear(self.fc_input_dim, - num_classes, - act='softmax', - param_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.Uniform( - -stdv, stdv))) - - def forward(self, inputs): - x = self.conv(inputs) - x = self.pool(x) - for layer in self.layers: - x = layer(x) - x = self.global_pool(x) - x = fluid.layers.reshape(x, shape=[-1, self.fc_input_dim]) - x = self.fc(x) - return x - - -def make_optimizer(parameter_list=None): - total_images = 1281167 - base_lr = FLAGS.lr - momentum = 0.9 - weight_decay = 1e-4 - step_per_epoch = int(math.floor(float(total_images) / FLAGS.batch_size)) - boundaries = [step_per_epoch * e for e in [30, 60, 80]] - values = [base_lr * (0.1**i) for i in range(len(boundaries) + 1)] - learning_rate = fluid.layers.piecewise_decay( - boundaries=boundaries, values=values) - learning_rate = fluid.layers.linear_lr_warmup( - learning_rate=learning_rate, - warmup_steps=5 * step_per_epoch, - start_lr=0., - end_lr=base_lr) - optimizer = fluid.optimizer.Momentum( - learning_rate=learning_rate, - momentum=momentum, - regularization=fluid.regularizer.L2Decay(weight_decay), - parameter_list=parameter_list) - return optimizer - - -def accuracy(pred, label, topk=(1, )): - maxk = max(topk) - pred = np.argsort(pred)[:, ::-1][:, :maxk] - correct = (pred == np.repeat(label, maxk, 1)) - - batch_size = label.shape[0] - res = [] - for k in topk: - correct_k = correct[:, :k].sum() - res.append(100.0 * correct_k / batch_size) - return res - - -def center_crop_resize(img): - h, w = img.shape[:2] - c = int(224 / 256 * min((h, w))) - i = (h + 1 - c) // 2 - j = (w + 1 - c) // 2 - img = img[i: i + c, j: j + c, :] - return cv2.resize(img, (224, 224), 0, 0, cv2.INTER_LINEAR) - - -def random_crop_resize(img): - height, width = img.shape[:2] - area = height * width - - for attempt in range(10): - target_area = random.uniform(0.08, 1.) * area - log_ratio = (math.log(3 / 4), math.log(4 / 3)) - aspect_ratio = math.exp(random.uniform(*log_ratio)) - - w = int(round(math.sqrt(target_area * aspect_ratio))) - h = int(round(math.sqrt(target_area / aspect_ratio))) - - if w <= width and h <= height: - i = random.randint(0, height - h) - j = random.randint(0, width - w) - img = img[i: i + h, j: j + w, :] - return cv2.resize(img, (224, 224), 0, 0, cv2.INTER_LINEAR) - - return center_crop_resize(img) - - -def random_flip(img): - return img[:, ::-1, :] - - -def normalize_permute(img): - # transpose and convert to RGB from BGR - img = img.astype(np.float32).transpose((2, 0, 1))[::-1, ...] - mean = np.array([123.675, 116.28, 103.53], dtype=np.float32) - std = np.array([58.395, 57.120, 57.375], dtype=np.float32) - invstd = 1. / std - for v, m, s in zip(img, mean, invstd): - v.__isub__(m).__imul__(s) - return img - - -def compose(functions): - def process(sample): - img, label = sample - for fn in functions: - img = fn(img) - return img, label - return process - - -def image_folder(path, shuffle=False): - valid_ext = ('.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.webp') - classes = [d for d in os.listdir(path) if - os.path.isdir(os.path.join(path, d))] - classes.sort() - class_map = {cls: idx for idx, cls in enumerate(classes)} - samples = [] - for dir in sorted(class_map.keys()): - d = os.path.join(path, dir) - for root, _, fnames in sorted(os.walk(d)): - for fname in sorted(fnames): - p = os.path.join(root, fname) - if os.path.splitext(p)[1].lower() in valid_ext: - samples.append((p, class_map[dir])) - - def iterator(): - if shuffle: - random.shuffle(samples) - for s in samples: - yield s - - return iterator - - -def run(model, loader, mode='train'): - total_loss = 0. - total_acc1 = 0. - total_acc5 = 0. - total_time = 0. - start = time.time() - device_ids = list(range(FLAGS.num_devices)) - start = time.time() - - for idx, batch in enumerate(loader()): - outputs, losses = getattr(model, mode)( - batch[0], batch[1], device='gpu', device_ids=device_ids) - top1, top5 = accuracy(outputs[0], batch[1], topk=(1, 5)) - - total_loss += np.sum(losses) - total_acc1 += top1 - total_acc5 += top5 - if idx > 1: # skip first two steps - total_time += time.time() - start - if idx % 10 == 0: - print(("{:04d} loss: {:0.3f} top1: {:0.3f}% top5: {:0.3f}% " - "time: {:0.3f}").format( - idx, total_loss / (idx + 1), total_acc1 / (idx + 1), - total_acc5 / (idx + 1), total_time / max(1, (idx - 1)))) - start = time.time() - - -def main(): - @contextlib.contextmanager - def null_guard(): - yield - - epoch = FLAGS.epoch - batch_size = FLAGS.batch_size - guard = fluid.dygraph.guard() if FLAGS.dynamic else null_guard() - - train_dir = os.path.join(FLAGS.data, 'train') - val_dir = os.path.join(FLAGS.data, 'val') - - train_loader = fluid.io.xmap_readers( - lambda batch: (np.array([b[0] for b in batch]), - np.array([b[1] for b in batch]).reshape(-1, 1)), - paddle.batch( - fluid.io.xmap_readers( - compose([cv2.imread, random_crop_resize, random_flip, - normalize_permute]), - image_folder(train_dir, shuffle=True), - process_num=8, - buffer_size=4 * batch_size), - batch_size=batch_size, - drop_last=True), - process_num=2, buffer_size=4) - - val_loader = fluid.io.xmap_readers( - lambda batch: (np.array([b[0] for b in batch]), - np.array([b[1] for b in batch]).reshape(-1, 1)), - paddle.batch( - fluid.io.xmap_readers( - compose([cv2.imread, center_crop_resize, normalize_permute]), - image_folder(val_dir), - process_num=8, - buffer_size=4 * batch_size), - batch_size=batch_size), - process_num=2, buffer_size=4) - - if not os.path.exists('resnet_checkpoints'): - os.mkdir('resnet_checkpoints') - - with guard: - model = ResNet() - optim = make_optimizer(parameter_list=model.parameters()) - model.prepare(optim, CrossEntropy()) - if FLAGS.resume is not None: - model.load(FLAGS.resume) - - for e in range(epoch): - print("======== train epoch {} ========".format(e)) - run(model, train_loader) - model.save('resnet_checkpoints/{:02d}'.format(e)) - print("======== eval epoch {} ========".format(e)) - run(model, val_loader, mode='eval') - - -if __name__ == '__main__': - parser = argparse.ArgumentParser("Resnet Training on ImageNet") - parser.add_argument('data', metavar='DIR', help='path to dataset ' - '(should have subdirectories named "train" and "val"') - parser.add_argument( - "-d", "--dynamic", action='store_true', help="enable dygraph mode") - parser.add_argument( - "-e", "--epoch", default=90, type=int, help="number of epoch") - parser.add_argument( - '--lr', '--learning-rate', default=0.1, type=float, metavar='LR', - help='initial learning rate') - parser.add_argument( - "-b", "--batch_size", default=256, type=int, help="batch size") - parser.add_argument( - "-n", "--num_devices", default=4, type=int, help="number of devices") - parser.add_argument( - "-r", "--resume", default=None, type=str, - help="checkpoint path to resume") - FLAGS = parser.parse_args() - assert FLAGS.data, "error: must provide data path" - main() -- GitLab