未验证 提交 58b345d1 编写于 作者: Y Yan Xu 提交者: GitHub

Merge pull request #1769 from Yancey1989/fast_resnet

Add fast resnet model
...@@ -3,3 +3,4 @@ ...@@ -3,3 +3,4 @@
*.user *.user
*.pyc *.pyc
*~ *~
*.vscode
# PaddlePaddle Fast ImageNet Training
PaddlePaddle Fast ImageNet can train ImageNet dataset with fewer epochs. We implemented the it according to the blog
[Now anyone can train Imagenet in 18 minutes](https://www.fast.ai/2018/08/10/fastai-diu-imagenet/) which published on the [fast.ai] website.
PaddlePaddle Fast ImageNet using the dynmiac batch size, dynamic image size, rectangular images validation and etc... so that the Fast ImageNet can achieve the baseline
(acc1: 75%, acc5: 93%) by 27 epochs on 8 * V100 GPUs.
## Experiment
1. Prepare the training data, resize the images to 160 and 352 using `resize.py`, the prepared data folder should look like:
``` text
`-ImageNet
|-train
|-validation
|-160
|-train
`-validation
`-352
|-train
`-validation
```
1. Install the requirements by `pip install -r requirement.txt`.
1. Launch the training job: `python train.py --data_dir /data/imagenet`
1. Learning curve, we launch the training job on V100 GPU card:
<p align="center">
<img src="src/acc_curve.png" hspace='10' /> <br />
</p>
from PIL import Image
from pathlib import Path
from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor
from functools import partial
import multiprocessing
cpus = multiprocessing.cpu_count()
cpus = min(36,cpus)
PATH = Path('/data/imagenet2')
DEST = Path('/data/imagenet2/sz')
def mkdir(path):
if not path.exists():
path.mkdir()
mkdir(DEST)
szs = (160, 352)
def resize_img(p, im, fn, sz):
w,h = im.size
ratio = min(h/sz,w/sz)
im = im.resize((int(w/ratio), int(h/ratio)), resample=Image.BICUBIC)
new_fn = DEST/str(sz)/fn.relative_to(PATH)
mkdir(new_fn.parent())
im.save(new_fn)
def resizes(p, fn):
im = Image.open(fn)
for sz in szs: resize_img(p, im, fn, sz)
def resize_imgs(p):
files = p.glob('*/*.jpeg')
with ProcessPoolExecutor(cpus) as e: e.map(partial(resizes, p), files)
for sz in szs:
ssz=str(sz)
mkdir((DEST/ssz))
for ds in ('validation','train'): mkdir((DEST/ssz/ds))
for ds in ('train',): mkdir((DEST/ssz/ds))
for ds in ("validation", "train"):
print(PATH/ds)
resize_imgs(PATH/ds)
\ No newline at end of file
import os
import numpy as np
import math
import random
import torch
import torch.utils.data
from torch.utils.data.distributed import DistributedSampler
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data.sampler import Sampler
import torchvision
import pickle
from tqdm import tqdm
import time
import multiprocessing
FINISH_EVENT = "FINISH_EVENT"
class PaddleDataLoader(object):
def __init__(self, torch_dataset, indices=None, concurrent=24, queue_size=3072, shuffle=True, shuffle_seed=0):
self.torch_dataset = torch_dataset
self.data_queue = multiprocessing.Queue(queue_size)
self.indices = indices
self.concurrent = concurrent
self.shuffle = shuffle
self.shuffle_seed=shuffle_seed
def _worker_loop(self, dataset, worker_indices, worker_id):
cnt = 0
for idx in worker_indices:
cnt += 1
img, label = self.torch_dataset[idx]
img = np.array(img).astype('uint8').transpose((2, 0, 1))
self.data_queue.put((img, label))
print("worker: [%d] read [%d] samples. " % (worker_id, cnt))
self.data_queue.put(FINISH_EVENT)
def reader(self):
def _reader_creator():
worker_processes = []
total_img = len(self.torch_dataset)
print("total image: ", total_img)
if self.shuffle:
self.indices = [i for i in xrange(total_img)]
random.seed(self.shuffle_seed)
random.shuffle(self.indices)
print("shuffle indices: %s ..." % self.indices[:10])
imgs_per_worker = int(math.ceil(total_img / self.concurrent))
for i in xrange(self.concurrent):
start = i * imgs_per_worker
end = (i + 1) * imgs_per_worker if i != self.concurrent - 1 else None
sliced_indices = self.indices[start:end]
w = multiprocessing.Process(
target=self._worker_loop,
args=(self.torch_dataset, sliced_indices, i)
)
w.daemon = True
w.start()
worker_processes.append(w)
finish_workers = 0
worker_cnt = len(worker_processes)
while finish_workers < worker_cnt:
sample = self.data_queue.get()
if sample == FINISH_EVENT:
finish_workers += 1
else:
yield sample
return _reader_creator
def train(traindir, sz, min_scale=0.08, shuffle_seed=0):
train_tfms = [
transforms.RandomResizedCrop(sz, scale=(min_scale, 1.0)),
transforms.RandomHorizontalFlip()
]
train_dataset = datasets.ImageFolder(traindir, transforms.Compose(train_tfms))
return PaddleDataLoader(train_dataset, shuffle_seed=shuffle_seed).reader()
def test(valdir, bs, sz, rect_val=False):
if rect_val:
idx_ar_sorted = sort_ar(valdir)
idx_sorted, _ = zip(*idx_ar_sorted)
idx2ar = map_idx2ar(idx_ar_sorted, bs)
ar_tfms = [transforms.Resize(int(sz* 1.14)), CropArTfm(idx2ar, sz)]
val_dataset = ValDataset(valdir, transform=ar_tfms)
return PaddleDataLoader(val_dataset, concurrent=1, indices=idx_sorted, shuffle=False).reader()
val_tfms = [transforms.Resize(int(sz* 1.14)), transforms.CenterCrop(sz)]
val_dataset = datasets.ImageFolder(valdir, transforms.Compose(val_tfms))
return PaddleDataLoader(val_dataset).reader()
class ValDataset(datasets.ImageFolder):
def __init__(self, root, transform=None, target_transform=None):
super(ValDataset, self).__init__(root, transform, target_transform)
def __getitem__(self, index):
path, target = self.imgs[index]
sample = self.loader(path)
if self.transform is not None:
for tfm in self.transform:
if isinstance(tfm, CropArTfm):
sample = tfm(sample, index)
else:
sample = tfm(sample)
if self.target_transform is not None:
target = self.target_transform(target)
return sample, target
class CropArTfm(object):
def __init__(self, idx2ar, target_size):
self.idx2ar, self.target_size = idx2ar, target_size
def __call__(self, img, idx):
target_ar = self.idx2ar[idx]
if target_ar < 1:
w = int(self.target_size / target_ar)
size = (w // 8 * 8, self.target_size)
else:
h = int(self.target_size * target_ar)
size = (self.target_size, h // 8 * 8)
return torchvision.transforms.functional.center_crop(img, size)
def sort_ar(valdir):
idx2ar_file = valdir + '/../sorted_idxar.p'
if os.path.isfile(idx2ar_file):
return pickle.load(open(idx2ar_file, 'rb'))
print('Creating AR indexes. Please be patient this may take a couple minutes...')
val_dataset = datasets.ImageFolder(valdir) # AS: TODO: use Image.open instead of looping through dataset
sizes = [img[0].size for img in tqdm(val_dataset, total=len(val_dataset))]
idx_ar = [(i, round(s[0] * 1.0/ s[1], 5)) for i, s in enumerate(sizes)]
sorted_idxar = sorted(idx_ar, key=lambda x: x[1])
pickle.dump(sorted_idxar, open(idx2ar_file, 'wb'))
print('Done')
return sorted_idxar
def chunks(l, n):
n = max(1, n)
return (l[i:i + n] for i in range(0, len(l), n))
def map_idx2ar(idx_ar_sorted, batch_size):
ar_chunks = list(chunks(idx_ar_sorted, batch_size))
idx2ar = {}
for chunk in ar_chunks:
idxs, ars = list(zip(*chunk))
mean = round(np.mean(ars), 5)
for idx in idxs:
idx2ar[idx] = mean
return idx2ar
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import cProfile
import time
import os
import traceback
import numpy as np
import torchvision_reader
import torch
import paddle
import paddle.fluid as fluid
import paddle.fluid.core as core
import paddle.fluid.profiler as profiler
import paddle.fluid.transpiler.distribute_transpiler as distribute_transpiler
import sys
sys.path.append("..")
from utility import add_arguments, print_arguments
import functools
from models.fast_imagenet import FastImageNet, lr_decay
import utils
def parse_args():
parser = argparse.ArgumentParser(description=__doc__)
add_arg = functools.partial(add_arguments, argparser=parser)
# yapf: disable
add_arg('total_images', int, 1281167, "Training image number.")
add_arg('num_epochs', int, 120, "number of epochs.")
add_arg('image_shape', str, "3,224,224", "input image size")
add_arg('model_save_dir', str, "output", "model save directory")
add_arg('pretrained_model', str, None, "Whether to use pretrained model.")
add_arg('checkpoint', str, None, "Whether to resume checkpoint.")
add_arg('lr', float, 1.0, "set learning rate.")
add_arg('lr_strategy', str, "piecewise_decay", "Set the learning rate decay strategy.")
add_arg('data_dir', str, "./data/ILSVRC2012", "The ImageNet dataset root dir.")
add_arg('model_category', str, "models", "Whether to use models_name or not, valid value:'models','models_name'" )
add_arg('fp16', bool, False, "Enable half precision training with fp16." )
add_arg('scale_loss', float, 1.0, "Scale loss for fp16." )
# for distributed
add_arg('start_test_pass', int, 0, "Start test after x passes.")
add_arg('num_threads', int, 8, "Use num_threads to run the fluid program.")
add_arg('reduce_strategy', str, "allreduce", "Choose from reduce or allreduce.")
add_arg('log_period', int, 30, "Print period, defualt is 5.")
add_arg('memory_optimize', bool, True, "Whether to enable memory optimize.")
add_arg('best_acc5', float, 0.93, "The best acc5, default is 93%.")
# yapf: enable
args = parser.parse_args()
return args
def get_device_num():
import subprocess
visible_device = os.getenv('CUDA_VISIBLE_DEVICES')
if visible_device:
device_num = len(visible_device.split(','))
else:
device_num = subprocess.check_output(
['nvidia-smi', '-L']).decode().count('\n')
return device_num
DEVICE_NUM = get_device_num()
def test_parallel(exe, test_args, args, test_reader, feeder, bs):
acc_evaluators = []
for i in xrange(len(test_args[2])):
acc_evaluators.append(fluid.metrics.Accuracy())
to_fetch = [v.name for v in test_args[2]]
batch_id = 0
start_ts = time.time()
for batch_id, data in enumerate(test_reader()):
acc_rets = exe.run(fetch_list=to_fetch, feed=feeder.feed(data))
ret_result = [np.mean(np.array(ret)) for ret in acc_rets]
print("Test batch: [%d], acc_rets: [%s]" % (batch_id, ret_result))
for i, e in enumerate(acc_evaluators):
e.update(
value=np.array(acc_rets[i]), weight=bs)
num_samples = batch_id * bs * DEVICE_NUM
print_train_time(start_ts, time.time(), num_samples)
return [e.eval() for e in acc_evaluators]
def build_program(args, is_train, main_prog, startup_prog, py_reader_startup_prog, sz, trn_dir, bs, min_scale, rect_val=False):
dshape=[3, sz, sz]
class_dim=1000
pyreader = None
with fluid.program_guard(main_prog, startup_prog):
with fluid.unique_name.guard():
if is_train:
with fluid.program_guard(main_prog, py_reader_startup_prog):
with fluid.unique_name.guard():
pyreader = fluid.layers.py_reader(
capacity=bs * DEVICE_NUM,
shapes=([-1] + dshape, (-1, 1)),
dtypes=('uint8', 'int64'),
name="train_reader_" + str(sz) if is_train else "test_reader_" + str(sz),
use_double_buffer=True)
input, label = fluid.layers.read_file(pyreader)
else:
input = fluid.layers.data(name="image", shape=[3, 244, 244], dtype="uint8")
label = fluid.layers.data(name="label", shape=[1], dtype="int64")
cast_img_type = "float16" if args.fp16 else "float32"
cast = fluid.layers.cast(input, cast_img_type)
img_mean = fluid.layers.create_global_var([3, 1, 1], 0.0, cast_img_type, name="img_mean", persistable=True)
img_std = fluid.layers.create_global_var([3, 1, 1], 0.0, cast_img_type, name="img_std", persistable=True)
# image = (image - (mean * 255.0)) / (std * 255.0)
t1 = fluid.layers.elementwise_sub(cast, img_mean, axis=1)
t2 = fluid.layers.elementwise_div(t1, img_std, axis=1)
model = FastImageNet(is_train=is_train)
predict = model.net(t2, class_dim=class_dim, img_size=sz)
cost, pred = fluid.layers.softmax_with_cross_entropy(predict, label, return_softmax=True)
if args.scale_loss > 1:
avg_cost = fluid.layers.mean(x=cost) * float(args.scale_loss)
else:
avg_cost = fluid.layers.mean(x=cost)
batch_acc1 = fluid.layers.accuracy(input=pred, label=label, k=1)
batch_acc5 = fluid.layers.accuracy(input=pred, label=label, k=5)
# configure optimize
optimizer = None
if is_train:
total_images = args.total_images
lr = args.lr
epochs = [(0,7), (7,13), (13, 22), (22, 25), (25, 28)]
bs_epoch = [bs*DEVICE_NUM for bs in [224, 224, 96, 96, 50]]
bs_scale = [bs*1.0 / bs_epoch[0] for bs in bs_epoch]
lrs = [(lr, lr*2), (lr*2, lr/4), (lr*bs_scale[2], lr/10*bs_scale[2]), (lr/10*bs_scale[2], lr/100*bs_scale[2]), (lr/100*bs_scale[4], lr/1000*bs_scale[4]), lr/1000*bs_scale[4]]
boundaries, values = lr_decay(lrs, epochs, bs_epoch, total_images)
optimizer = fluid.optimizer.Momentum(
learning_rate=fluid.layers.piecewise_decay(boundaries=boundaries, values=values),
momentum=0.9)
if args.fp16:
params_grads = optimizer.backward(avg_cost)
master_params_grads = utils.create_master_params_grads(
params_grads, main_prog, startup_prog, args.scale_loss)
optimizer.apply_gradients(master_params_grads)
utils.master_param_to_train_param(master_params_grads, params_grads, main_prog)
else:
optimizer.minimize(avg_cost)
if args.memory_optimize:
fluid.memory_optimize(main_prog, skip_grads=True)
return avg_cost, optimizer, [batch_acc1, batch_acc5], pyreader
def refresh_program(args, epoch, sz, trn_dir, bs, val_bs, need_update_start_prog=False, min_scale=0.08, rect_val=False):
print('refresh program: epoch: [%d], image size: [%d], trn_dir: [%s], batch_size:[%d]' % (epoch, sz, trn_dir, bs))
train_prog = fluid.Program()
test_prog = fluid.Program()
startup_prog = fluid.Program()
py_reader_startup_prog = fluid.Program()
train_args = build_program(args, True, train_prog, startup_prog, py_reader_startup_prog, sz, trn_dir, bs, min_scale)
test_args = build_program(args, False, test_prog, startup_prog, py_reader_startup_prog, sz, trn_dir, val_bs, min_scale, rect_val=rect_val)
place = core.CUDAPlace(0)
startup_exe = fluid.Executor(place)
startup_exe.run(py_reader_startup_prog)
if need_update_start_prog:
startup_exe.run(startup_prog)
conv2d_w_vars = [var for var in startup_prog.global_block().vars.values() if var.name.startswith('conv2d_')]
for var in conv2d_w_vars:
torch_w = torch.empty(var.shape)
kaiming_np = torch.nn.init.kaiming_normal_(torch_w, mode='fan_out', nonlinearity='relu').numpy()
tensor = fluid.global_scope().find_var(var.name).get_tensor()
if args.fp16:
tensor.set(np.array(kaiming_np, dtype="float16").view(np.uint16), place)
else:
tensor.set(np.array(kaiming_np, dtype="float32"), place)
np_tensors = {}
np_tensors["img_mean"] = np.array([0.485 * 255.0, 0.456 * 255.0, 0.406 * 255.0]).astype("float16" if args.fp16 else "float32").reshape((3, 1, 1))
np_tensors["img_std"] = np.array([0.229 * 255.0, 0.224 * 255.0, 0.225 * 255.0]).astype("float16" if args.fp16 else "float32").reshape((3, 1, 1))
for vname, np_tensor in np_tensors.items():
var = fluid.global_scope().find_var(vname)
if args.fp16:
var.get_tensor().set(np_tensor.view(np.uint16), place)
else:
var.get_tensor().set(np_tensor, place)
strategy = fluid.ExecutionStrategy()
strategy.num_threads = args.num_threads
strategy.allow_op_delay = False
strategy.num_iteration_per_drop_scope = 1
build_strategy = fluid.BuildStrategy()
build_strategy.reduce_strategy = fluid.BuildStrategy().ReduceStrategy.AllReduce
avg_loss = train_args[0]
train_exe = fluid.ParallelExecutor(
True,
avg_loss.name,
main_program=train_prog,
exec_strategy=strategy,
build_strategy=build_strategy)
test_exe = fluid.ParallelExecutor(
True, main_program=test_prog, share_vars_from=train_exe)
return train_args, test_args, test_prog, train_exe, test_exe
def prepare_reader(epoch_id, train_py_reader, train_bs, val_bs, trn_dir, img_dim, min_scale, rect_val):
train_reader = torchvision_reader.train(
traindir="/data/imagenet/%strain" % trn_dir, sz=img_dim, min_scale=min_scale, shuffle_seed=epoch_id+1)
train_py_reader.decorate_paddle_reader(paddle.batch(train_reader, batch_size=train_bs))
test_reader = torchvision_reader.test(
valdir="/data/imagenet/%svalidation" % trn_dir, bs=val_bs*DEVICE_NUM, sz=img_dim, rect_val=rect_val)
test_batched_reader = paddle.batch(test_reader, batch_size=val_bs * DEVICE_NUM)
return test_batched_reader
def train_parallel(args):
over_all_start = time.time()
test_prog = fluid.Program()
exe = None
test_exe = None
train_args = None
test_args = None
## dynamic batch size, image size...
bs = 224
val_bs = 64
trn_dir = "sz/160/"
img_dim=128
min_scale=0.08
rect_val=False
for epoch_id in range(args.num_epochs):
# refresh program
if epoch_id == 0:
train_args, test_args, test_prog, exe, test_exe = refresh_program(args, epoch_id, sz=img_dim, trn_dir=trn_dir, bs=bs, val_bs=val_bs, need_update_start_prog=True)
elif epoch_id == 13: #13
bs = 96
trn_dir="sz/352/"
img_dim=224
min_scale=0.087
train_args, test_args, test_prog, exe, test_exe = refresh_program(args, epoch_id, sz=img_dim, trn_dir=trn_dir, bs=bs, val_bs=val_bs, min_scale=min_scale)
elif epoch_id == 25: #25
bs = 50
val_bs=8
trn_dir=""
img_dim=288
min_scale=0.5
rect_val=True
train_args, test_args, test_prog, exe, test_exe = refresh_program(args, epoch_id, sz=img_dim, trn_dir=trn_dir, bs=bs, val_bs=val_bs, min_scale=min_scale, rect_val=rect_val)
else:
pass
avg_loss = train_args[0]
num_samples = 0
iters = 0
start_time = time.time()
train_py_reader = train_args[3]
test_reader = prepare_reader(epoch_id, train_py_reader, bs, val_bs, trn_dir, img_dim=img_dim, min_scale=min_scale, rect_val=rect_val)
train_py_reader.start() # start pyreader
batch_start_time = time.time()
while True:
fetch_list = [avg_loss.name]
acc_name_list = [v.name for v in train_args[2]]
fetch_list.extend(acc_name_list)
fetch_list.append("learning_rate")
if iters % args.log_period == 0:
should_print = True
else:
should_print = False
fetch_ret = []
try:
if should_print:
fetch_ret = exe.run(fetch_list)
else:
exe.run([])
except fluid.core.EOFException as eof:
print("Finish current epoch, will reset pyreader...")
train_py_reader.reset()
break
except fluid.core.EnforceNotMet as ex:
traceback.print_exc()
exit(1)
num_samples += bs * DEVICE_NUM
if should_print:
fetched_data = [np.mean(np.array(d)) for d in fetch_ret]
print("Epoch %d, batch %d, loss %s, accucacys: %s, learning_rate %s, py_reader queue_size: %d, avg batch time: %0.4f secs" %
(epoch_id, iters, fetched_data[0], fetched_data[1:-1], fetched_data[-1], train_py_reader.queue.size(), (time.time() - batch_start_time)*1.0/args.log_period))
batch_start_time = time.time()
iters += 1
print_train_time(start_time, time.time(), num_samples)
feed_list = [test_prog.global_block().var(varname) for varname in ("image", "label")]
test_feeder = fluid.DataFeeder(feed_list=feed_list, place=fluid.CUDAPlace(0))
test_ret = test_parallel(test_exe, test_args, args, test_reader, test_feeder, val_bs)
test_acc1, test_acc5 = [np.mean(np.array(v)) for v in test_ret]
print("Epoch: %d, Test Accuracy: %s, Spend %.2f hours\n" %
(epoch_id, [test_acc1, test_acc5], (time.time() - over_all_start) / 3600))
if np.mean(np.array(test_ret[1])) > args.best_acc5:
print("Achieve the best top-1 acc %f, top-5 acc: %f" % (test_acc1, test_acc5))
break
print("total train time: ", time.time() - over_all_start)
def print_train_time(start_time, end_time, num_samples):
train_elapsed = end_time - start_time
examples_per_sec = num_samples / train_elapsed
print('\nTotal examples: %d, total time: %.5f, %.5f examples/sed\n' %
(num_samples, train_elapsed, examples_per_sec))
def print_paddle_envs():
print('----------- Configuration envs -----------')
print("DEVICE_NUM: %d" % DEVICE_NUM)
for k in os.environ:
if "PADDLE_" in k:
print "ENV %s:%s" % (k, os.environ[k])
print('------------------------------------------------')
def main():
args = parse_args()
print_arguments(args)
print_paddle_envs()
train_parallel(args)
if __name__ == "__main__":
main()
\ No newline at end of file
...@@ -9,3 +9,4 @@ from .inception_v4 import InceptionV4 ...@@ -9,3 +9,4 @@ from .inception_v4 import InceptionV4
from .se_resnext import SE_ResNeXt50_32x4d, SE_ResNeXt101_32x4d, SE_ResNeXt152_32x4d from .se_resnext import SE_ResNeXt50_32x4d, SE_ResNeXt101_32x4d, SE_ResNeXt152_32x4d
from .dpn import DPN68, DPN92, DPN98, DPN107, DPN131 from .dpn import DPN68, DPN92, DPN98, DPN107, DPN131
from .shufflenet_v2 import ShuffleNetV2_x0_5, ShuffleNetV2_x1_0, ShuffleNetV2_x1_5, ShuffleNetV2_x2_0 from .shufflenet_v2 import ShuffleNetV2_x0_5, ShuffleNetV2_x1_0, ShuffleNetV2_x1_5, ShuffleNetV2_x2_0
from .fast_imagenet import FastImageNet
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import functools
import numpy as np
import time
import os
import math
import cProfile, pstats, StringIO
import paddle
import paddle.fluid as fluid
import paddle.fluid.core as core
import paddle.fluid.profiler as profiler
import utils
__all__ = ["FastImageNet"]
class FastImageNet():
def __init__(self, layers=50, is_train=True):
self.layers = layers
self.is_train = is_train
def net(self, input, class_dim=1000, img_size=224, is_train=True):
layers = self.layers
supported_layers = [50, 101, 152]
assert layers in supported_layers, \
"supported layers are {} but input layer is {}".format(supported_layers, layers)
if layers == 50:
depth = [3, 4, 6, 3]
elif layers == 101:
depth = [3, 4, 23, 3]
elif layers == 152:
depth = [3, 8, 36, 3]
num_filters = [64, 128, 256, 512]
conv = self.conv_bn_layer(
input=input, num_filters=64, filter_size=7, stride=2, act='relu')
conv = fluid.layers.pool2d(
input=conv,
pool_size=3,
pool_stride=2,
pool_padding=1,
pool_type='max')
for block in range(len(depth)):
for i in range(depth[block]):
conv = self.bottleneck_block(
input=conv,
num_filters=num_filters[block],
stride=2 if i == 0 and block != 0 else 1)
pool_size = int(img_size / 32)
pool = fluid.layers.pool2d(
input=conv, pool_size=pool_size, pool_type='avg', global_pooling=True)
out = fluid.layers.fc(input=pool,
size=class_dim,
act=None,
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.NormalInitializer(0.0, 0.01),
regularizer=fluid.regularizer.L2Decay(1e-4)),
bias_attr=fluid.ParamAttr(
regularizer=fluid.regularizer.L2Decay(1e-4)))
return out
def conv_bn_layer(self,
input,
num_filters,
filter_size,
stride=1,
groups=1,
act=None,
bn_init_value=1.0):
conv = fluid.layers.conv2d(
input=input,
num_filters=num_filters,
filter_size=filter_size,
stride=stride,
padding=(filter_size - 1) // 2,
groups=groups,
act=None,
bias_attr=False,
param_attr=fluid.ParamAttr(regularizer=fluid.regularizer.L2Decay(1e-4)))
return fluid.layers.batch_norm(input=conv, act=act, is_test=not self.is_train,
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Constant(bn_init_value),
regularizer=None))
def shortcut(self, input, ch_out, stride):
ch_in = input.shape[1]
if ch_in != ch_out or stride != 1:
return self.conv_bn_layer(input, ch_out, 1, stride)
else:
return input
def bottleneck_block(self, input, num_filters, stride):
conv0 = self.conv_bn_layer(
input=input, num_filters=num_filters, filter_size=1, act='relu')
conv1 = self.conv_bn_layer(
input=conv0,
num_filters=num_filters,
filter_size=3,
stride=stride,
act='relu')
# init bn-weight0
conv2 = self.conv_bn_layer(
input=conv1, num_filters=num_filters * 4, filter_size=1, act=None, bn_init_value=0.0)
short = self.shortcut(input, num_filters * 4, stride)
return fluid.layers.elementwise_add(x=short, y=conv2, act='relu')
def lr_decay(lrs, epochs, bs, total_image):
boundaries = []
values = []
for idx, epoch in enumerate(epochs):
step = total_image // bs[idx]
if step * bs[idx] < total_image:
step += 1
ratio = (lrs[idx][1] - lrs[idx][0])*1.0 / (epoch[1] - epoch[0])
lr_base = lrs[idx][0]
for s in xrange(epoch[0], epoch[1]):
if boundaries:
boundaries.append(boundaries[-1] + step + 1)
else:
boundaries = [step]
lr = lr_base + ratio * (s - epoch[0])
values.append(lr)
print("epoch: [%d], steps: [%d], lr: [%f]" % (s, boundaries[-1], values[-1]))
values.append(lrs[-1])
print("epoch: [%d:], steps: [%d:], lr:[%f]" % (epochs[-1][-1], boundaries[-1], values[-1]))
return boundaries, values
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册