提交 f66de96c 编写于 作者: Y Yancey1989

add fast resnet model

上级 4d78bac7
......@@ -3,3 +3,4 @@
*.user
*.pyc
*~
*.vscode
import os
import numpy as np
import math
import random
import torch
import torch.utils.data
from torch.utils.data.distributed import DistributedSampler
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data.sampler import Sampler
import torchvision
import pickle
from tqdm import tqdm
import time
import multiprocessing
TRAINER_NUMS = int(os.getenv("PADDLE_TRAINER_NUM", "1"))
TRAINER_ID = int(os.getenv("PADDLE_TRAINER_ID", "0"))
epoch = 0
FINISH_EVENT = "FINISH_EVENT"
#def paddle_data_loader(torch_dataset, indices=None, concurrent=1, queue_size=3072, use_uint8_reader=False):
class PaddleDataLoader(object):
def __init__(self, torch_dataset, indices=None, concurrent=16, queue_size=3072):
self.torch_dataset = torch_dataset
self.data_queue = multiprocessing.Queue(queue_size)
self.indices = indices
self.concurrent = concurrent
def _worker_loop(self, dataset, worker_indices, worker_id):
cnt = 0
for idx in worker_indices:
cnt += 1
img, label = self.torch_dataset[idx]
img = np.array(img).astype('uint8').transpose((2, 0, 1))
self.data_queue.put((img, label))
print("worker: [%d] read [%d] samples. " % (worker_id, cnt))
self.data_queue.put(FINISH_EVENT)
def reader(self):
def _reader_creator():
worker_processes = []
total_img = len(self.torch_dataset)
print("total image: ", total_img)
if self.indices is None:
self.indices = [i for i in xrange(total_img)]
random.seed(time.time())
random.shuffle(self.indices)
print("shuffle indices: %s ..." % self.indices[:10])
imgs_per_worker = int(math.ceil(total_img / self.concurrent))
for i in xrange(self.concurrent):
start = i * imgs_per_worker
end = (i + 1) * imgs_per_worker if i != self.concurrent - 1 else None
sliced_indices = self.indices[start:end]
w = multiprocessing.Process(
target=self._worker_loop,
args=(self.torch_dataset, sliced_indices, i)
)
w.daemon = True
w.start()
worker_processes.append(w)
finish_workers = 0
worker_cnt = len(worker_processes)
while finish_workers < worker_cnt:
sample = self.data_queue.get()
if sample == FINISH_EVENT:
finish_workers += 1
else:
yield sample
return _reader_creator
def train(traindir, sz, min_scale=0.08):
train_tfms = [
transforms.RandomResizedCrop(sz, scale=(min_scale, 1.0)),
transforms.RandomHorizontalFlip()
]
train_dataset = datasets.ImageFolder(traindir, transforms.Compose(train_tfms))
return PaddleDataLoader(train_dataset).reader()
def test(valdir, bs, sz, rect_val=False):
if rect_val:
idx_ar_sorted = sort_ar(valdir)
idx_sorted, _ = zip(*idx_ar_sorted)
idx2ar = map_idx2ar(idx_ar_sorted, bs)
ar_tfms = [transforms.Resize(int(sz* 1.14)), CropArTfm(idx2ar, sz)]
val_dataset = ValDataset(valdir, transform=ar_tfms)
return PaddleDataLoader(val_dataset, concurrent=1, indices=idx_sorted).reader()
val_tfms = [transforms.Resize(int(sz* 1.14)), transforms.CenterCrop(sz)]
val_dataset = datasets.ImageFolder(valdir, transforms.Compose(val_tfms))
return PaddleDataLoader(val_dataset).reader()
def create_validation_set(valdir, batch_size, target_size, rect_val, distributed):
print("create_validation_set", valdir, batch_size, target_size, rect_val, distributed)
if rect_val:
idx_ar_sorted = sort_ar(valdir)
idx_sorted, _ = zip(*idx_ar_sorted)
idx2ar = map_idx2ar(idx_ar_sorted, batch_size)
ar_tfms = [transforms.Resize(int(target_size * 1.14)), CropArTfm(idx2ar, target_size)]
val_dataset = ValDataset(valdir, transform=ar_tfms)
val_sampler = DistValSampler(idx_sorted, batch_size=batch_size, distributed=distributed)
return val_dataset, val_sampler
val_tfms = [transforms.Resize(int(target_size * 1.14)), transforms.CenterCrop(target_size)]
val_dataset = datasets.ImageFolder(valdir, transforms.Compose(val_tfms))
val_sampler = DistValSampler(list(range(len(val_dataset))), batch_size=batch_size, distributed=distributed)
return val_dataset, val_sampler
class ValDataset(datasets.ImageFolder):
def __init__(self, root, transform=None, target_transform=None):
super(ValDataset, self).__init__(root, transform, target_transform)
def __getitem__(self, index):
path, target = self.imgs[index]
sample = self.loader(path)
if self.transform is not None:
for tfm in self.transform:
if isinstance(tfm, CropArTfm):
sample = tfm(sample, index)
else:
sample = tfm(sample)
if self.target_transform is not None:
target = self.target_transform(target)
return sample, target
class DistValSampler(Sampler):
# DistValSampler distrbutes batches equally (based on batch size) to every gpu (even if there aren't enough images)
# WARNING: Some baches will contain an empty array to signify there aren't enough images
# Distributed=False - same validation happens on every single gpu
def __init__(self, indices, batch_size, distributed=True):
self.indices = indices
self.batch_size = batch_size
if distributed:
self.world_size = TRAINER_NUMS
self.global_rank = TRAINER_ID
else:
self.global_rank = 0
self.world_size = 1
# expected number of batches per sample. Need this so each distributed gpu validates on same number of batches.
# even if there isn't enough data to go around
self.expected_num_batches = int(math.ceil(len(self.indices) / self.world_size / self.batch_size))
# num_samples = total images / world_size. This is what we distribute to each gpu
self.num_samples = self.expected_num_batches * self.batch_size
def __iter__(self):
offset = self.num_samples * self.global_rank
sampled_indices = self.indices[offset:offset + self.num_samples]
print("DistValSampler: self.world_size: ", self.world_size, " self.global_rank: ", self.global_rank)
for i in range(self.expected_num_batches):
offset = i * self.batch_size
yield sampled_indices[offset:offset + self.batch_size]
def __len__(self):
return self.expected_num_batches
def set_epoch(self, epoch):
return
class CropArTfm(object):
def __init__(self, idx2ar, target_size):
self.idx2ar, self.target_size = idx2ar, target_size
def __call__(self, img, idx):
target_ar = self.idx2ar[idx]
if target_ar < 1:
w = int(self.target_size / target_ar)
size = (w // 8 * 8, self.target_size)
else:
h = int(self.target_size * target_ar)
size = (self.target_size, h // 8 * 8)
return torchvision.transforms.functional.center_crop(img, size)
def sort_ar(valdir):
idx2ar_file = valdir + '/../sorted_idxar.p'
if os.path.isfile(idx2ar_file):
return pickle.load(open(idx2ar_file, 'rb'))
print('Creating AR indexes. Please be patient this may take a couple minutes...')
val_dataset = datasets.ImageFolder(valdir) # AS: TODO: use Image.open instead of looping through dataset
sizes = [img[0].size for img in tqdm(val_dataset, total=len(val_dataset))]
idx_ar = [(i, round(s[0] * 1.0/ s[1], 5)) for i, s in enumerate(sizes)]
sorted_idxar = sorted(idx_ar, key=lambda x: x[1])
pickle.dump(sorted_idxar, open(idx2ar_file, 'wb'))
print('Done')
return sorted_idxar
def chunks(l, n):
n = max(1, n)
return (l[i:i + n] for i in range(0, len(l), n))
def map_idx2ar(idx_ar_sorted, batch_size):
ar_chunks = list(chunks(idx_ar_sorted, batch_size))
idx2ar = {}
for chunk in ar_chunks:
idxs, ars = list(zip(*chunk))
mean = round(np.mean(ars), 5)
for idx in idxs:
idx2ar[idx] = mean
return idx2ar
if __name__ == "__main__":
#ds, sampler = create_validation_set("/data/imagenet/validation", 128, 288, True, True)
#for item in sampler:
# for idx in item:
# ds[idx]
import time
test_reader = test(valdir="/data/imagenet/validation", bs=50, sz=288, rect_val=True)
start_ts = time.time()
for idx, data in enumerate(test_reader()):
print(idx, data[0].shape, data[1])
if idx == 10:
break
if (idx + 1) % 1000 == 0:
cost = (time.time() - start_ts)
print("%d samples per second" % (1000 / cost))
start_ts = time.time()
\ No newline at end of file
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import cProfile
import time
import os
import traceback
import numpy as np
import paddle
import paddle.fluid as fluid
import paddle.fluid.core as core
import paddle.fluid.profiler as profiler
import paddle.fluid.transpiler.distribute_transpiler as distribute_transpiler
import torchvision_reader
import sys
sys.path.append("..")
from utility import add_arguments, print_arguments
import functools
import models
import utils
DEBUG_PROG = bool(os.getenv("DEBUG_PROG", "0"))
def parse_args():
parser = argparse.ArgumentParser(description=__doc__)
add_arg = functools.partial(add_arguments, argparser=parser)
# yapf: disable
add_arg('batch_size', int, 256, "Minibatch size.")
add_arg('use_gpu', bool, True, "Whether to use GPU or not.")
add_arg('total_images', int, 1281167, "Training image number.")
add_arg('num_epochs', int, 120, "number of epochs.")
add_arg('class_dim', int, 1000, "Class number.")
add_arg('image_shape', str, "3,224,224", "input image size")
add_arg('model_save_dir', str, "output", "model save directory")
add_arg('with_mem_opt', bool, False, "Whether to use memory optimization or not.")
add_arg('pretrained_model', str, None, "Whether to use pretrained model.")
add_arg('checkpoint', str, None, "Whether to resume checkpoint.")
add_arg('lr', float, 0.1, "set learning rate.")
add_arg('lr_strategy', str, "piecewise_decay", "Set the learning rate decay strategy.")
add_arg('model', str, "FastResNet", "Set the network to use.")
add_arg('data_dir', str, "./data/ILSVRC2012", "The ImageNet dataset root dir.")
add_arg('model_category', str, "models", "Whether to use models_name or not, valid value:'models','models_name'" )
add_arg('fp16', bool, False, "Enable half precision training with fp16." )
add_arg('scale_loss', float, 1.0, "Scale loss for fp16." )
# for distributed
add_arg('start_test_pass', int, 0, "Start test after x passes.")
add_arg('num_threads', int, 8, "Use num_threads to run the fluid program.")
add_arg('reduce_strategy', str, "allreduce", "Choose from reduce or allreduce.")
add_arg('log_period', int, 5, "Print period, defualt is 5.")
add_arg('init_conv2d_kaiming', bool, False, "Whether to initliaze conv2d weight by kaiming.")
add_arg('memory_optimize', bool, True, "Whether to enable memory optimize.")
# yapf: enable
args = parser.parse_args()
return args
def get_device_num():
import subprocess
visible_device = os.getenv('CUDA_VISIBLE_DEVICES')
if visible_device:
device_num = len(visible_device.split(','))
else:
device_num = subprocess.check_output(
['nvidia-smi', '-L']).decode().count('\n')
return device_num
def linear_lr_decay(lr_values, epochs, bs_values, total_images):
"""Applies cosine decay to the learning rate.
lr = 0.05 * (math.cos(epoch * (math.pi / 120)) + 1)
"""
from paddle.fluid.layers.learning_rate_scheduler import _decay_step_counter
import paddle.fluid.layers.tensor as tensor
import math
with paddle.fluid.default_main_program()._lr_schedule_guard():
global_step = _decay_step_counter()
lr = tensor.create_global_var(
shape=[1],
value=0.0,
dtype='float32',
persistable=True,
name="learning_rate")
with fluid.layers.control_flow.Switch() as switch:
last_steps = 0
for idx, epoch_bound in enumerate(epochs):
start_epoch, end_epoch = epoch_bound
linear_epoch = end_epoch - start_epoch
start_lr, end_lr = lr_values[idx]
linear_lr = end_lr - start_lr
steps = last_steps + math.ceil(total_images * 1.0 / bs_values[idx]) * linear_epoch
linear_lr = end_lr = start_lr
with switch.case(global_step < steps):
decayed_lr = start_lr + linear_lr * ((global_step - last_steps) * 1.0/steps)
last_steps = steps
fluid.layers.tensor.assign(decayed_lr, lr)
last_value_var = tensor.fill_constant(
shape=[1],
dtype='float32',
value=float(lr_values[-1]))
with switch.default():
fluid.layers.tensor.assign(last_value_var, lr)
return lr
return decayed_lr
def test_parallel(exe, test_args, args, test_prog, feeder, bs):
acc_evaluators = []
for i in xrange(len(test_args[2])):
acc_evaluators.append(fluid.metrics.Accuracy())
to_fetch = [v.name for v in test_args[2]]
test_reader = test_args[3]
batch_id = 0
start_ts = time.time()
for batch_id, data in enumerate(test_reader()):
acc_rets = exe.run(fetch_list=to_fetch, feed=feeder.feed(data))
ret_result = [np.mean(np.array(ret)) for ret in acc_rets]
print("Test batch: [%d], acc_rets: [%s]" % (batch_id, ret_result))
for i, e in enumerate(acc_evaluators):
e.update(
value=np.array(acc_rets[i]), weight=bs)
num_samples = batch_id * bs * get_device_num()
print_train_time(start_ts, time.time(), num_samples)
return [e.eval() for e in acc_evaluators]
def build_program(args, is_train, main_prog, startup_prog, py_reader_startup_prog, img_size, trn_dir, batch_size, min_scale, rect_val):
if is_train:
reader = torchvision_reader.train(traindir=os.path.join(args.data_dir, trn_dir, "train"), sz=img_size, min_scale=min_scale)
else:
reader = torchvision_reader.test(valdir=os.path.join(args.data_dir, trn_dir, "validation"), bs=batch_size * get_device_num(), sz=img_size, rect_val=rect_val)
dshape = [3, img_size, img_size]
class_dim = 1000
pyreader = None
batched_reader = None
model_name = args.model
model_list = [m for m in dir(models) if "__" not in m]
assert model_name in model_list, "{} is not in lists: {}".format(args.model,
model_list)
model = models.__dict__[model_name]()
with fluid.program_guard(main_prog, startup_prog):
with fluid.unique_name.guard():
if is_train:
with fluid.program_guard(main_prog, py_reader_startup_prog):
with fluid.unique_name.guard():
pyreader = fluid.layers.py_reader(
capacity=batch_size * get_device_num(),
shapes=([-1] + dshape, (-1, 1)),
dtypes=('uint8', 'int64'),
name="train_reader_" + str(img_size) if is_train else "test_reader_" + str(img_size),
use_double_buffer=True)
input, label = fluid.layers.read_file(pyreader)
pyreader.decorate_paddle_reader(paddle.batch(reader, batch_size=batch_size))
else:
input = fluid.layers.data(name="image", shape=[3, 244, 244], dtype="uint8")
label = fluid.layers.data(name="label", shape=[1], dtype="int64")
batched_reader = paddle.batch(reader, batch_size=batch_size * get_device_num())
cast_img_type = "float16" if args.fp16 else "float32"
cast = fluid.layers.cast(input, cast_img_type)
img_mean = fluid.layers.create_global_var([3, 1, 1], 0.0, cast_img_type, name="img_mean", persistable=True)
img_std = fluid.layers.create_global_var([3, 1, 1], 0.0, cast_img_type, name="img_std", persistable=True)
# image = (image - (mean * 255.0)) / (std * 255.0)
t1 = fluid.layers.elementwise_sub(cast, img_mean, axis=1)
t2 = fluid.layers.elementwise_div(t1, img_std, axis=1)
predict = model.net(t2, class_dim=class_dim, img_size=img_size, is_train=is_train)
cost, pred = fluid.layers.softmax_with_cross_entropy(predict, label, return_softmax=True)
if args.scale_loss > 1:
avg_cost = fluid.layers.mean(x=cost) * float(args.scale_loss)
else:
avg_cost = fluid.layers.mean(x=cost)
batch_acc1 = fluid.layers.accuracy(input=pred, label=label, k=1)
batch_acc5 = fluid.layers.accuracy(input=pred, label=label, k=5)
# configure optimize
optimizer = None
if is_train:
#total_images = 1281167 / trainer_count
epochs = [(0,7), (7,13), (13, 22), (22, 25), (25, 28)]
bs_epoch = [x * get_device_num() for x in [224, 224, 96, 96, 50]]
lrs = [(1.0, 2.0), (2.0, 0.25), (0.42857142857142855, 0.04285714285714286), (0.04285714285714286, 0.004285714285714286), (0.0022321428571428575, 0.00022321428571428573), 0.00022321428571428573]
#boundaries, values = lr_decay(lrs, epochs, bs_epoch, total_images)
#print("lr linear decay boundaries: ", boundaries, " \nvalues: ", values)
optimizer = fluid.optimizer.Momentum(
learning_rate=linear_lr_decay(lrs, epochs, bs_epoch, args.total_images),
momentum=0.9,
regularization=fluid.regularizer.L2Decay(1e-4))
if args.fp16:
params_grads = optimizer.backward(avg_cost)
master_params_grads = utils.create_master_params_grads(
params_grads, main_prog, startup_prog, args.scale_loss)
optimizer.apply_gradients(master_params_grads)
utils.master_param_to_train_param(master_params_grads, params_grads, main_prog)
else:
optimizer.minimize(avg_cost)
if args.memory_optimize:
fluid.memory_optimize(main_prog, skip_grads=True)
return avg_cost, optimizer, [batch_acc1,
batch_acc5], batched_reader, pyreader, py_reader_startup_prog
def refresh_program(args, epoch, sz, trn_dir, bs, val_bs, need_update_start_prog=False, min_scale=0.08, rect_val=False):
print('program changed: epoch: [%d], image size: [%d], trn_dir: [%s], batch_size:[%d]' % (epoch, sz, trn_dir, bs))
train_prog = fluid.Program()
test_prog = fluid.Program()
startup_prog = fluid.Program()
py_reader_startup_prog = fluid.Program()
train_args = build_program(args, True, train_prog, startup_prog, py_reader_startup_prog, sz, trn_dir, bs, min_scale, False)
test_args = build_program(args, False, test_prog, startup_prog, py_reader_startup_prog, sz, trn_dir, val_bs, min_scale, rect_val)
place = core.CUDAPlace(0)
startup_exe = fluid.Executor(place)
print("execute py_reader startup program")
startup_exe.run(py_reader_startup_prog)
if need_update_start_prog:
print("execute startup program")
startup_exe.run(startup_prog)
if args.init_conv2d_kaiming:
import torch
conv2d_w_vars = [var for var in startup_prog.global_block().vars.values() if var.name.startswith('conv2d_')]
for var in conv2d_w_vars:
torch_w = torch.empty(var.shape)
kaiming_np = torch.nn.init.kaiming_normal_(torch_w, mode='fan_out', nonlinearity='relu').numpy()
tensor = fluid.global_scope().find_var(var.name).get_tensor()
if args.fp16:
tensor.set(np.array(kaiming_np, dtype="float16").view(np.uint16), place)
else:
tensor.set(np.array(kaiming_np, dtype="float32"), place)
np_tensors = {}
np_tensors["img_mean"] = np.array([0.485 * 255.0, 0.456 * 255.0, 0.406 * 255.0]).astype("float16" if args.fp16 else "float32").reshape((3, 1, 1))
np_tensors["img_std"] = np.array([0.229 * 255.0, 0.224 * 255.0, 0.225 * 255.0]).astype("float16" if args.fp16 else "float32").reshape((3, 1, 1))
for vname, np_tensor in np_tensors.items():
var = fluid.global_scope().find_var(vname)
if args.fp16:
var.get_tensor().set(np_tensor.view(np.uint16), place)
else:
var.get_tensor().set(np_tensor, place)
if DEBUG_PROG:
with open('/tmp/train_prog_pass%d' % epoch, 'w') as f: f.write(train_prog.to_string(True))
with open('/tmp/test_prog_pass%d' % epoch, 'w') as f: f.write(test_prog.to_string(True))
with open('/tmp/startup_prog_pass%d' % epoch, 'w') as f: f.write(startup_prog.to_string(True))
with open('/tmp/py_reader_startup_prog_pass%d' % epoch, 'w') as f: f.write(py_reader_startup_prog.to_string(True))
strategy = fluid.ExecutionStrategy()
strategy.num_threads = args.num_threads
strategy.allow_op_delay = False
build_strategy = fluid.BuildStrategy()
build_strategy.reduce_strategy = fluid.BuildStrategy().ReduceStrategy.AllReduce
avg_loss = train_args[0]
train_exe = fluid.ParallelExecutor(
True,
avg_loss.name,
main_program=train_prog,
exec_strategy=strategy,
build_strategy=build_strategy)
test_exe = fluid.ParallelExecutor(
True, main_program=test_prog, share_vars_from=train_exe)
return train_args, test_args, test_prog, train_exe, test_exe
# NOTE: only need to benchmark using parallelexe
def train_parallel(args):
over_all_start = time.time()
test_prog = fluid.Program()
exe = None
test_exe = None
train_args = None
test_args = None
bs = 224
val_bs = 64
for pass_id in range(args.num_epochs):
# program changed
if pass_id == 0:
train_args, test_args, test_prog, exe, test_exe = refresh_program(args, pass_id, sz=128, trn_dir="sz/160/", bs=bs, val_bs=val_bs, need_update_start_prog=True)
elif pass_id == 13: #13
bs = 96
train_args, test_args, test_prog, exe, test_exe = refresh_program(args, pass_id, sz=224, trn_dir="sz/352/", bs=bs, val_bs=val_bs, min_scale=0.087)
elif pass_id == 25: #25
bs = 50
val_bs=4
train_args, test_args, test_prog, exe, test_exe = refresh_program(args, pass_id, sz=288, trn_dir="", bs=bs, val_bs=val_bs, min_scale=0.5, rect_val=True)
else:
pass
avg_loss = train_args[0]
num_samples = 0
iters = 0
start_time = time.time()
train_args[4].start() # start pyreader
while True:
fetch_list = [avg_loss.name]
acc_name_list = [v.name for v in train_args[2]]
fetch_list.extend(acc_name_list)
fetch_list.append("learning_rate")
if iters % args.log_period == 0:
should_print = True
else:
should_print = False
fetch_ret = []
try:
if should_print:
fetch_ret = exe.run(fetch_list)
else:
exe.run([])
except fluid.core.EOFException as eof:
print("Finish current epoch, will reset pyreader...")
train_args[4].reset()
break
except fluid.core.EnforceNotMet as ex:
traceback.print_exc()
exit(1)
num_samples += bs * get_device_num()
if should_print:
fetched_data = [np.mean(np.array(d)) for d in fetch_ret]
print("Pass %d, batch %d, loss %s, accucacys: %s, learning_rate %s, py_reader queue_size: %d" %
(pass_id, iters, fetched_data[0], fetched_data[1:-1], fetched_data[-1], train_args[4].queue.size()))
iters += 1
print_train_time(start_time, time.time(), num_samples)
feed_list = [test_prog.global_block().var(varname) for varname in ("image", "label")]
test_feeder = fluid.DataFeeder(feed_list=feed_list, place=fluid.CUDAPlace(0))
test_ret = test_parallel(test_exe, test_args, args, test_prog, test_feeder, bs)
print("Pass: %d, Test Accuracy: %s, Spend %.2f hours\n" %
(pass_id, [np.mean(np.array(v)) for v in test_ret], (time.time() - over_all_start) / 3600))
print("total train time: ", time.time() - over_all_start)
def print_train_time(start_time, end_time, num_samples):
train_elapsed = end_time - start_time
examples_per_sec = num_samples / train_elapsed
print('\nTotal examples: %d, total time: %.5f, %.5f examples/sed\n' %
(num_samples, train_elapsed, examples_per_sec))
def print_paddle_envs():
print('----------- Configuration envs -----------')
for k in os.environ:
if "PADDLE_" in k:
print "ENV %s:%s" % (k, os.environ[k])
print('------------------------------------------------')
def main():
args = parse_args()
print_arguments(args)
print_paddle_envs()
train_parallel(args)
if __name__ == "__main__":
main()
\ No newline at end of file
......@@ -9,3 +9,4 @@ from .inception_v4 import InceptionV4
from .se_resnext import SE_ResNeXt50_32x4d, SE_ResNeXt101_32x4d, SE_ResNeXt152_32x4d
from .dpn import DPN68, DPN92, DPN98, DPN107, DPN131
from .shufflenet_v2 import ShuffleNetV2_x0_5, ShuffleNetV2_x1_0, ShuffleNetV2_x1_5, ShuffleNetV2_x2_0
from .fast_resnet import FastResNet
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import functools
import numpy as np
import time
import os
import math
import cProfile, pstats, StringIO
import paddle
import paddle.fluid as fluid
import paddle.fluid.core as core
import paddle.fluid.profiler as profiler
import utils
## visreader for imagenet
import torchvision_reader
__all__ = ["FastResNet"]
class FastResNet():
def __init__(self, layers=50):
self.layers = layers
def net(self, input, class_dim=1000, img_size=224, is_train=True):
layers = self.layers
supported_layers = [50, 101, 152]
assert layers in supported_layers, \
"supported layers are {} but input layer is {}".format(supported_layers, layers)
if layers == 50:
depth = [3, 4, 6, 3]
elif layers == 101:
depth = [3, 4, 23, 3]
elif layers == 152:
depth = [3, 8, 36, 3]
num_filters = [64, 128, 256, 512]
conv = self.conv_bn_layer(
input=input, num_filters=64, filter_size=7, stride=2, act='relu', is_train=is_train)
conv = fluid.layers.pool2d(
input=conv,
pool_size=3,
pool_stride=2,
pool_padding=1,
pool_type='max')
for block in range(len(depth)):
for i in range(depth[block]):
conv = self.bottleneck_block(
input=conv,
num_filters=num_filters[block],
stride=2 if i == 0 and block != 0 else 1)
pool_size = int(img_size / 32)
pool = fluid.layers.pool2d(
input=conv, pool_size=pool_size, pool_type='avg', global_pooling=True)
out = fluid.layers.fc(input=pool,
size=class_dim,
act=None,
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.NormalInitializer(0.0, 0.01),
regularizer=fluid.regularizer.L2Decay(1e-4)),
bias_attr=fluid.ParamAttr(
regularizer=fluid.regularizer.L2Decay(1e-4)))
return out
def conv_bn_layer(self,
input,
num_filters,
filter_size,
stride=1,
groups=1,
act=None,
bn_init_value=1.0,
is_train=True):
conv = fluid.layers.conv2d(
input=input,
num_filters=num_filters,
filter_size=filter_size,
stride=stride,
padding=(filter_size - 1) // 2,
groups=groups,
act=None,
bias_attr=False,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.MSRAInitializer(),
regularizer=fluid.regularizer.L2Decay(1e-4)))
return fluid.layers.batch_norm(input=conv, act=act, is_test=not is_train,
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Constant(bn_init_value),
regularizer=None))
def shortcut(self, input, ch_out, stride):
ch_in = input.shape[1]
if ch_in != ch_out or stride != 1:
return self.conv_bn_layer(input, ch_out, 1, stride)
else:
return input
def bottleneck_block(self, input, num_filters, stride):
conv0 = self.conv_bn_layer(
input=input, num_filters=num_filters, filter_size=1, act='relu')
conv1 = self.conv_bn_layer(
input=conv0,
num_filters=num_filters,
filter_size=3,
stride=stride,
act='relu')
# init bn-weight0
conv2 = self.conv_bn_layer(
input=conv1, num_filters=num_filters * 4, filter_size=1, act=None, bn_init_value=0.0)
short = self.shortcut(input, num_filters * 4, stride)
return fluid.layers.elementwise_add(x=short, y=conv2, act='relu')
def _model_reader_dshape_classdim(args, is_train, val_bs=None, sz=224, trn_dir="", min_scale=0.08, rect_val=False):
reader = None
if args.data_set == "imagenet":
class_dim = 1000
if args.data_format == 'NCHW':
dshape = [3, sz, sz]
else:
dshape = [sz, sz, 3]
if is_train:
reader = torchvision_reader.train(
traindir="/data/imagenet/%strain" % trn_dir, sz=sz, min_scale=min_scale)
else:
reader = torchvision_reader.test(
valdir="/data/imagenet/%svalidation" % trn_dir, bs=val_bs, sz=sz, rect_val=rect_val)
else:
raise ValueError("only support imagenet dataset.")
return None, reader, dshape, class_dim
def lr_decay(lrs, epochs, bs, total_image):
boundaries = []
values = []
import math
for idx, epoch in enumerate(epochs):
step = math.ceil(total_image * 1.0 / (bs[idx] * 8))
ratio = (lrs[idx][1] - lrs[idx][0]) / (epoch[1] - epoch[0])
lr_base = lrs[idx][0]
for s in xrange(epoch[0], epoch[1]):
if boundaries:
boundaries.append(boundaries[-1] + step)
else:
boundaries = [step]
values.append(lr_base + ratio * (s - epoch[0]))
values.append(lrs[-1])
return boundaries, values
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册