From 3aa8cb2f3b636a3828cdc37da0d7e1c79a79d563 Mon Sep 17 00:00:00 2001 From: Yancey1989 Date: Tue, 12 Mar 2019 20:44:05 +0800 Subject: [PATCH] clean up code --- .../fast_resnet/torchvision_reader.py | 24 ++------ .../image_classification/fast_resnet/train.py | 59 ++++++++++++------- .../models/fast_resnet.py | 45 +------------- 3 files changed, 43 insertions(+), 85 deletions(-) diff --git a/fluid/PaddleCV/image_classification/fast_resnet/torchvision_reader.py b/fluid/PaddleCV/image_classification/fast_resnet/torchvision_reader.py index ac8a8396..c1b0fb95 100644 --- a/fluid/PaddleCV/image_classification/fast_resnet/torchvision_reader.py +++ b/fluid/PaddleCV/image_classification/fast_resnet/torchvision_reader.py @@ -18,12 +18,13 @@ import multiprocessing FINISH_EVENT = "FINISH_EVENT" class PaddleDataLoader(object): - def __init__(self, torch_dataset, indices=None, concurrent=24, queue_size=3072, shuffle=True): + def __init__(self, torch_dataset, indices=None, concurrent=24, queue_size=3072, shuffle=True, shuffle_seed=0): self.torch_dataset = torch_dataset self.data_queue = multiprocessing.Queue(queue_size) self.indices = indices self.concurrent = concurrent self.shuffle = shuffle + self.shuffle_seed=shuffle_seed def _worker_loop(self, dataset, worker_indices, worker_id): cnt = 0 @@ -40,10 +41,9 @@ class PaddleDataLoader(object): worker_processes = [] total_img = len(self.torch_dataset) print("total image: ", total_img) - #if self.indices is None: if self.shuffle: self.indices = [i for i in xrange(total_img)] - random.seed(time.time()) + random.seed(self.shuffle_seed) random.shuffle(self.indices) print("shuffle indices: %s ..." % self.indices[:10]) @@ -70,13 +70,13 @@ class PaddleDataLoader(object): return _reader_creator -def train(traindir, sz, min_scale=0.08): +def train(traindir, sz, min_scale=0.08, shuffle_seed=0): train_tfms = [ transforms.RandomResizedCrop(sz, scale=(min_scale, 1.0)), transforms.RandomHorizontalFlip() ] train_dataset = datasets.ImageFolder(traindir, transforms.Compose(train_tfms)) - return PaddleDataLoader(train_dataset).reader() + return PaddleDataLoader(train_dataset, shuffle_seed=shuffle_seed).reader() def test(valdir, bs, sz, rect_val=False): if rect_val: @@ -155,17 +155,3 @@ def map_idx2ar(idx_ar_sorted, batch_size): for idx in idxs: idx2ar[idx] = mean return idx2ar - -if __name__ == "__main__": - #ds, sampler = create_validation_set("/data/imagenet/validation", 128, 288, True, True) - #for item in sampler: - # for idx in item: - # ds[idx] - - import time - test_reader = test(valdir="/data/imagenet/validation", bs=64, sz=288, rect_val=True) - start_ts = time.time() - for idx, data in enumerate(test_reader()): - print(idx, data[0], data[0].shape, data[1]) - if idx == 2: - break \ No newline at end of file diff --git a/fluid/PaddleCV/image_classification/fast_resnet/train.py b/fluid/PaddleCV/image_classification/fast_resnet/train.py index 2da9390c..22d224f9 100644 --- a/fluid/PaddleCV/image_classification/fast_resnet/train.py +++ b/fluid/PaddleCV/image_classification/fast_resnet/train.py @@ -75,13 +75,12 @@ def get_device_num(): DEVICE_NUM = get_device_num() -def test_parallel(exe, test_args, args, test_prog, feeder, bs): +def test_parallel(exe, test_args, args, test_reader, feeder, bs): acc_evaluators = [] for i in xrange(len(test_args[2])): acc_evaluators.append(fluid.metrics.Accuracy()) to_fetch = [v.name for v in test_args[2]] - test_reader = test_args[3] batch_id = 0 start_ts = time.time() for batch_id, data in enumerate(test_reader()): @@ -100,15 +99,8 @@ def test_parallel(exe, test_args, args, test_prog, feeder, bs): def build_program(args, is_train, main_prog, startup_prog, py_reader_startup_prog, sz, trn_dir, bs, min_scale, rect_val=False): dshape=[3, sz, sz] - class_dim=1000 - if is_train: - reader = torchvision_reader.train( - traindir="/data/imagenet/%strain" % trn_dir, sz=sz, min_scale=min_scale) - else: - reader = torchvision_reader.test( - valdir="/data/imagenet/%svalidation" % trn_dir, bs=bs*DEVICE_NUM, sz=sz, rect_val=rect_val) + class_dim=1000 pyreader = None - batched_reader = None with fluid.program_guard(main_prog, startup_prog): with fluid.unique_name.guard(): if is_train: @@ -121,11 +113,9 @@ def build_program(args, is_train, main_prog, startup_prog, py_reader_startup_pro name="train_reader_" + str(sz) if is_train else "test_reader_" + str(sz), use_double_buffer=True) input, label = fluid.layers.read_file(pyreader) - pyreader.decorate_paddle_reader(paddle.batch(reader, batch_size=bs)) else: input = fluid.layers.data(name="image", shape=[3, 244, 244], dtype="uint8") label = fluid.layers.data(name="label", shape=[1], dtype="int64") - batched_reader = paddle.batch(reader, batch_size=bs * DEVICE_NUM) cast_img_type = "float16" if args.fp16 else "float32" cast = fluid.layers.cast(input, cast_img_type) img_mean = fluid.layers.create_global_var([3, 1, 1], 0.0, cast_img_type, name="img_mean", persistable=True) @@ -173,8 +163,7 @@ def build_program(args, is_train, main_prog, startup_prog, py_reader_startup_pro if args.memory_optimize: fluid.memory_optimize(main_prog, skip_grads=True) - return avg_cost, optimizer, [batch_acc1, - batch_acc5], batched_reader, pyreader, py_reader_startup_prog + return avg_cost, optimizer, [batch_acc1, batch_acc5], pyreader def refresh_program(args, epoch, sz, trn_dir, bs, val_bs, need_update_start_prog=False, min_scale=0.08, rect_val=False): @@ -233,6 +222,18 @@ def refresh_program(args, epoch, sz, trn_dir, bs, val_bs, need_update_start_prog return train_args, test_args, test_prog, train_exe, test_exe +def prepare_reader(epoch_id, train_py_reader, train_bs, val_bs, trn_dir, img_dim, min_scale, rect_val): + train_reader = torchvision_reader.train( + traindir="/data/imagenet/%strain" % trn_dir, sz=img_dim, min_scale=min_scale, shuffle_seed=epoch_id+1) + train_py_reader.decorate_paddle_reader(paddle.batch(train_reader, batch_size=train_bs)) + + test_reader = torchvision_reader.test( + valdir="/data/imagenet/%svalidation" % trn_dir, bs=val_bs*DEVICE_NUM, sz=img_dim, rect_val=rect_val) + test_batched_reader = paddle.batch(test_reader, batch_size=val_bs * DEVICE_NUM) + + return test_batched_reader + + # NOTE: only need to benchmark using parallelexe def train_parallel(args): over_all_start = time.time() @@ -242,19 +243,31 @@ def train_parallel(args): test_exe = None train_args = None test_args = None + ## dynamic batch size, image size... bs = 224 val_bs = 64 + trn_dir = "sz/160/" + img_dim=128 + min_scale=0.08 + rect_val=False for epoch_id in range(args.num_epochs): - # program changed + # refresh program if epoch_id == 0: - train_args, test_args, test_prog, exe, test_exe = refresh_program(args, epoch_id, sz=128, trn_dir="sz/160/", bs=bs, val_bs=val_bs, need_update_start_prog=True) + train_args, test_args, test_prog, exe, test_exe = refresh_program(args, epoch_id, sz=img_dim, trn_dir=trn_dir, bs=bs, val_bs=val_bs, need_update_start_prog=True) elif epoch_id == 13: #13 bs = 96 - train_args, test_args, test_prog, exe, test_exe = refresh_program(args, epoch_id, sz=224, trn_dir="sz/352/", bs=bs, val_bs=val_bs, min_scale=0.087) + trn_dir="sz/352/" + img_dim=224 + min_scale=0.087 + train_args, test_args, test_prog, exe, test_exe = refresh_program(args, epoch_id, sz=img_dim, trn_dir=trn_dir, bs=bs, val_bs=val_bs, min_scale=min_scale) elif epoch_id == 25: #25 bs = 50 val_bs=8 - train_args, test_args, test_prog, exe, test_exe = refresh_program(args, epoch_id, sz=288, trn_dir="", bs=bs, val_bs=val_bs, min_scale=0.5, rect_val=True) + trn_dir="" + img_dim=288 + min_scale=0.5 + rect_val=True + train_args, test_args, test_prog, exe, test_exe = refresh_program(args, epoch_id, sz=img_dim, trn_dir=trn_dir, bs=bs, val_bs=val_bs, min_scale=min_scale, rect_val=rect_val) else: pass @@ -262,7 +275,9 @@ def train_parallel(args): num_samples = 0 iters = 0 start_time = time.time() - train_args[4].start() # start pyreader + train_py_reader = train_args[3] + test_reader = prepare_reader(epoch_id, train_py_reader, bs, val_bs, trn_dir, img_dim=img_dim, min_scale=min_scale, rect_val=rect_val) + train_py_reader.start() # start pyreader batch_start_time = time.time() while True: fetch_list = [avg_loss.name] @@ -282,7 +297,7 @@ def train_parallel(args): exe.run([]) except fluid.core.EOFException as eof: print("Finish current epoch, will reset pyreader...") - train_args[4].reset() + train_py_reader.reset() break except fluid.core.EnforceNotMet as ex: traceback.print_exc() @@ -293,14 +308,14 @@ def train_parallel(args): if should_print: fetched_data = [np.mean(np.array(d)) for d in fetch_ret] print("Epoch %d, batch %d, loss %s, accucacys: %s, learning_rate %s, py_reader queue_size: %d, avg batch time: %0.4f secs" % - (epoch_id, iters, fetched_data[0], fetched_data[1:-1], fetched_data[-1], train_args[4].queue.size(), (time.time() - batch_start_time)*1.0/args.log_period)) + (epoch_id, iters, fetched_data[0], fetched_data[1:-1], fetched_data[-1], train_py_reader.queue.size(), (time.time() - batch_start_time)*1.0/args.log_period)) batch_start_time = time.time() iters += 1 print_train_time(start_time, time.time(), num_samples) feed_list = [test_prog.global_block().var(varname) for varname in ("image", "label")] test_feeder = fluid.DataFeeder(feed_list=feed_list, place=fluid.CUDAPlace(0)) - test_ret = test_parallel(test_exe, test_args, args, test_prog, test_feeder, val_bs) + test_ret = test_parallel(test_exe, test_args, args, test_reader, test_feeder, val_bs) test_acc1, test_acc5 = [np.mean(np.array(v)) for v in test_ret] print("Epoch: %d, Test Accuracy: %s, Spend %.2f hours\n" % (epoch_id, [test_acc1, test_acc5], (time.time() - over_all_start) / 3600)) diff --git a/fluid/PaddleCV/image_classification/models/fast_resnet.py b/fluid/PaddleCV/image_classification/models/fast_resnet.py index 65a9df92..0cfb8e3c 100644 --- a/fluid/PaddleCV/image_classification/models/fast_resnet.py +++ b/fluid/PaddleCV/image_classification/models/fast_resnet.py @@ -137,7 +137,7 @@ def lr_decay(lrs, epochs, bs, total_image): lr_base = lrs[idx][0] for s in xrange(epoch[0], epoch[1]): if boundaries: - boundaries.append(boundaries[-1] + step) + boundaries.append(boundaries[-1] + step + 1) else: boundaries = [step] lr = lr_base + ratio * (s - epoch[0]) @@ -146,46 +146,3 @@ def lr_decay(lrs, epochs, bs, total_image): values.append(lrs[-1]) print("epoch: [%d:], steps: [%d:], lr:[%f]" % (epochs[-1][-1], boundaries[-1], values[-1])) return boundaries, values - -def linear_lr_decay_by_epoch(lr_values, epochs, bs_values, total_images): - from paddle.fluid.layers.learning_rate_scheduler import _decay_step_counter - import paddle.fluid.layers.tensor as tensor - import math - - with paddle.fluid.default_main_program()._lr_schedule_guard(): - global_step = _decay_step_counter() - - lr = tensor.create_global_var( - shape=[1], - value=0.0, - dtype='float32', - persistable=True, - name="learning_rate") - with fluid.layers.control_flow.Switch() as switch: - last_steps = 0 - for idx, epoch_bound in enumerate(epochs): - start_epoch, end_epoch = epoch_bound - linear_epoch = end_epoch - start_epoch - start_lr, end_lr = lr_values[idx] - linear_lr = end_lr - start_lr - for epoch_step in xrange(linear_epoch): - steps = last_steps + (1 + epoch_step) * total_images / bs_values[idx] + 1 - boundary_val = tensor.fill_constant( - shape=[1], - dtype='float32', - value=float(steps), - force_cpu=True) - decayed_lr = start_lr + epoch_step * linear_lr * 1.0 / linear_epoch - with switch.case(global_step < boundary_val): - value_var = tensor.fill_constant(shape=[1], dtype='float32', value=float(decayed_lr)) - print("steps: [%d], epoch : [%d], decayed_lr: [%f]" % (steps, start_epoch + epoch_step, decayed_lr)) - fluid.layers.tensor.assign(value_var, lr) - last_steps = steps - last_value_var = tensor.fill_constant( - shape=[1], - dtype='float32', - value=float(lr_values[-1])) - with switch.default(): - fluid.layers.tensor.assign(last_value_var, lr) - - return lr \ No newline at end of file -- GitLab