# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from __future__ import division from __future__ import print_function import argparse import contextlib import math import os import random import time import cv2 import numpy as np import paddle import paddle.fluid as fluid from paddle.fluid.layer_helper import LayerHelper from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear from paddle.fluid.dygraph.container import Sequential from model import Model, CrossEntropy class ConvBNLayer(fluid.dygraph.Layer): def __init__(self, num_channels, num_filters, filter_size, stride=1, groups=1, act=None): super(ConvBNLayer, self).__init__() self._conv = Conv2D( num_channels=num_channels, num_filters=num_filters, filter_size=filter_size, stride=stride, padding=(filter_size - 1) // 2, groups=groups, act=None, bias_attr=False) self._batch_norm = BatchNorm(num_filters, act=act) def forward(self, inputs): x = self._conv(inputs) x = self._batch_norm(x) return x class BottleneckBlock(fluid.dygraph.Layer): def __init__(self, num_channels, num_filters, stride, shortcut=True): super(BottleneckBlock, self).__init__() self.conv0 = ConvBNLayer( num_channels=num_channels, num_filters=num_filters, filter_size=1, act='relu') self.conv1 = ConvBNLayer( num_channels=num_filters, num_filters=num_filters, filter_size=3, stride=stride, act='relu') self.conv2 = ConvBNLayer( num_channels=num_filters, num_filters=num_filters * 4, filter_size=1, act=None) if not shortcut: self.short = ConvBNLayer( num_channels=num_channels, num_filters=num_filters * 4, filter_size=1, stride=stride) self.shortcut = shortcut self._num_channels_out = num_filters * 4 def forward(self, inputs): x = self.conv0(inputs) conv1 = self.conv1(x) conv2 = self.conv2(conv1) if self.shortcut: short = inputs else: short = self.short(inputs) x = fluid.layers.elementwise_add(x=short, y=conv2) layer_helper = LayerHelper(self.full_name(), act='relu') return layer_helper.append_activation(x) class ResNet(Model): def __init__(self, depth=50, num_classes=1000): super(ResNet, self).__init__() layer_config = { 50: [3, 4, 6, 3], 101: [3, 4, 23, 3], 152: [3, 8, 36, 3], } assert depth in layer_config.keys(), \ "supported depth are {} but input layer is {}".format( layer_config.keys(), depth) layers = layer_config[depth] num_in = [64, 256, 512, 1024] num_out = [64, 128, 256, 512] self.conv = ConvBNLayer( num_channels=3, num_filters=64, filter_size=7, stride=2, act='relu') self.pool = Pool2D( pool_size=3, pool_stride=2, pool_padding=1, pool_type='max') self.layers = [] for idx, num_blocks in enumerate(layers): blocks = [] shortcut = False for b in range(num_blocks): block = BottleneckBlock( num_channels=num_in[idx] if b == 0 else num_out[idx] * 4, num_filters=num_out[idx], stride=2 if b == 0 and idx != 0 else 1, shortcut=shortcut) blocks.append(block) shortcut = True layer = self.add_sublayer( "layer_{}".format(idx), Sequential(*blocks)) self.layers.append(layer) self.global_pool = Pool2D( pool_size=7, pool_type='avg', global_pooling=True) stdv = 1.0 / math.sqrt(2048 * 1.0) self.fc_input_dim = num_out[-1] * 4 * 1 * 1 self.fc = Linear(self.fc_input_dim, num_classes, act='softmax', param_attr=fluid.param_attr.ParamAttr( initializer=fluid.initializer.Uniform( -stdv, stdv))) def forward(self, inputs): x = self.conv(inputs) x = self.pool(x) for layer in self.layers: x = layer(x) x = self.global_pool(x) x = fluid.layers.reshape(x, shape=[-1, self.fc_input_dim]) x = self.fc(x) return x def make_optimizer(parameter_list=None): total_images = 1281167 base_lr = FLAGS.lr momentum = 0.9 weight_decay = 1e-4 step_per_epoch = int(math.floor(float(total_images) / FLAGS.batch_size)) boundaries = [step_per_epoch * e for e in [30, 60, 80]] values = [base_lr * (0.1**i) for i in range(len(boundaries) + 1)] learning_rate = fluid.layers.piecewise_decay( boundaries=boundaries, values=values) learning_rate = fluid.layers.linear_lr_warmup( learning_rate=learning_rate, warmup_steps=5 * step_per_epoch, start_lr=0., end_lr=base_lr) optimizer = fluid.optimizer.Momentum( learning_rate=learning_rate, momentum=momentum, regularization=fluid.regularizer.L2Decay(weight_decay), parameter_list=parameter_list) return optimizer def accuracy(pred, label, topk=(1, )): maxk = max(topk) pred = np.argsort(pred)[:, ::-1][:, :maxk] correct = (pred == np.repeat(label, maxk, 1)) batch_size = label.shape[0] res = [] for k in topk: correct_k = correct[:, :k].sum() res.append(100.0 * correct_k / batch_size) return res def center_crop_resize(img): h, w = img.shape[:2] c = int(224 / 256 * min((h, w))) i = (h + 1 - c) // 2 j = (w + 1 - c) // 2 img = img[i: i + c, j: j + c, :] return cv2.resize(img, (224, 224), 0, 0, cv2.INTER_LINEAR) def random_crop_resize(img): height, width = img.shape[:2] area = height * width for attempt in range(10): target_area = random.uniform(0.08, 1.) * area log_ratio = (math.log(3 / 4), math.log(4 / 3)) aspect_ratio = math.exp(random.uniform(*log_ratio)) w = int(round(math.sqrt(target_area * aspect_ratio))) h = int(round(math.sqrt(target_area / aspect_ratio))) if w <= width and h <= height: i = random.randint(0, height - h) j = random.randint(0, width - w) img = img[i: i + h, j: j + w, :] return cv2.resize(img, (224, 224), 0, 0, cv2.INTER_LINEAR) return center_crop_resize(img) def random_flip(img): return img[:, ::-1, :] def normalize_permute(img): # transpose and convert to RGB from BGR img = img.astype(np.float32).transpose((2, 0, 1))[::-1, ...] mean = np.array([123.675, 116.28, 103.53], dtype=np.float32) std = np.array([58.395, 57.120, 57.375], dtype=np.float32) invstd = 1. / std for v, m, s in zip(img, mean, invstd): v.__isub__(m).__imul__(s) return img def compose(functions): def process(sample): img, label = sample for fn in functions: img = fn(img) return img, label return process def image_folder(path, shuffle=False): valid_ext = ('.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.webp') classes = [d for d in os.listdir(path) if os.path.isdir(os.path.join(path, d))] classes.sort() class_map = {cls: idx for idx, cls in enumerate(classes)} samples = [] for dir in sorted(class_map.keys()): d = os.path.join(path, dir) for root, _, fnames in sorted(os.walk(d)): for fname in sorted(fnames): p = os.path.join(root, fname) if os.path.splitext(p)[1].lower() in valid_ext: samples.append((p, class_map[dir])) def iterator(): if shuffle: random.shuffle(samples) for s in samples: yield s return iterator def run(model, loader, mode='train'): total_loss = 0. total_acc1 = 0. total_acc5 = 0. total_time = 0. start = time.time() device_ids = list(range(FLAGS.num_devices)) start = time.time() for idx, batch in enumerate(loader()): outputs, losses = getattr(model, mode)( batch[0], batch[1], device='gpu', device_ids=device_ids) top1, top5 = accuracy(outputs[0], batch[1], topk=(1, 5)) total_loss += np.sum(losses) total_acc1 += top1 total_acc5 += top5 if idx > 1: # skip first two steps total_time += time.time() - start if idx % 10 == 0: print(("{:04d} loss: {:0.3f} top1: {:0.3f}% top5: {:0.3f}% " "time: {:0.3f}").format( idx, total_loss / (idx + 1), total_acc1 / (idx + 1), total_acc5 / (idx + 1), total_time / max(1, (idx - 1)))) start = time.time() def main(): @contextlib.contextmanager def null_guard(): yield epoch = FLAGS.epoch batch_size = FLAGS.batch_size guard = fluid.dygraph.guard() if FLAGS.dynamic else null_guard() train_dir = os.path.join(FLAGS.data, 'train') val_dir = os.path.join(FLAGS.data, 'val') train_loader = fluid.io.xmap_readers( lambda batch: (np.array([b[0] for b in batch]), np.array([b[1] for b in batch]).reshape(-1, 1)), paddle.batch( fluid.io.xmap_readers( compose([cv2.imread, random_crop_resize, random_flip, normalize_permute]), image_folder(train_dir, shuffle=True), process_num=8, buffer_size=4 * batch_size), batch_size=batch_size, drop_last=True), process_num=2, buffer_size=4) val_loader = fluid.io.xmap_readers( lambda batch: (np.array([b[0] for b in batch]), np.array([b[1] for b in batch]).reshape(-1, 1)), paddle.batch( fluid.io.xmap_readers( compose([cv2.imread, center_crop_resize, normalize_permute]), image_folder(val_dir), process_num=8, buffer_size=4 * batch_size), batch_size=batch_size), process_num=2, buffer_size=4) if not os.path.exists('resnet_checkpoints'): os.mkdir('resnet_checkpoints') with guard: model = ResNet() optim = make_optimizer(parameter_list=model.parameters()) model.prepare(optim, CrossEntropy()) if FLAGS.resume is not None: model.load(FLAGS.resume) for e in range(epoch): print("======== train epoch {} ========".format(e)) run(model, train_loader) model.save('resnet_checkpoints/{:02d}'.format(e)) print("======== eval epoch {} ========".format(e)) run(model, val_loader, mode='eval') if __name__ == '__main__': parser = argparse.ArgumentParser("Resnet Training on ImageNet") parser.add_argument('data', metavar='DIR', help='path to dataset ' '(should have subdirectories named "train" and "val"') parser.add_argument( "-d", "--dynamic", action='store_true', help="enable dygraph mode") parser.add_argument( "-e", "--epoch", default=90, type=int, help="number of epoch") parser.add_argument( '--lr', '--learning-rate', default=0.1, type=float, metavar='LR', help='initial learning rate') parser.add_argument( "-b", "--batch_size", default=256, type=int, help="batch size") parser.add_argument( "-n", "--num_devices", default=4, type=int, help="number of devices") parser.add_argument( "-r", "--resume", default=None, type=str, help="checkpoint path to resume") FLAGS = parser.parse_args() assert FLAGS.data, "error: must provide data path" main()