diff --git a/fluid/PaddleCV/image_classification/fast_resnet/torchvision_reader.py b/fluid/PaddleCV/image_classification/fast_resnet/torchvision_reader.py index ce6ee1706e3f5790894950e9cd00b863f5c0d5cf..1ec19e034ee0da7861f26ecde6b07fef761e4a9f 100644 --- a/fluid/PaddleCV/image_classification/fast_resnet/torchvision_reader.py +++ b/fluid/PaddleCV/image_classification/fast_resnet/torchvision_reader.py @@ -17,7 +17,7 @@ TRAINER_ID = int(os.getenv("PADDLE_TRAINER_ID", "0")) FINISH_EVENT = "FINISH_EVENT" class PaddleDataLoader(object): - def __init__(self, torch_dataset, indices=None, concurrent=16, queue_size=1024, shuffle=True, batch_size=224, is_distributed=True): + def __init__(self, torch_dataset, indices=None, concurrent=16, queue_size=3072, shuffle=True, batch_size=224, is_distributed=True): self.torch_dataset = torch_dataset self.data_queue = multiprocessing.Queue(queue_size) self.indices = indices @@ -54,7 +54,7 @@ class PaddleDataLoader(object): offset = TRAINER_ID * cnt_per_node worker_indices = self.indices[offset: (offset + cnt_per_node)] if len(worker_indices) % self.batch_size != 0: - worker_indices += worker_indices[:(self.batch_size - (len(worker_indices) % self.batch_size))] + worker_indices += worker_indices[-(self.batch_size - (len(worker_indices) % self.batch_size)):] print("shuffle: [%d], shuffle seed: [%d], worker indices len: [%d], %s" % (self.shuffle, self.shuffle_seed, len(worker_indices), worker_indices[:10])) cnt_per_thread = int(math.ceil(len(worker_indices) / self.concurrent)) diff --git a/fluid/PaddleCV/image_classification/fast_resnet/train.py b/fluid/PaddleCV/image_classification/fast_resnet/train.py index 6bc82cf8e5452568ffd315cd3c9bce372e2bd149..eb09245ff59e13c67c43412e7f4831e86f54d40e 100644 --- a/fluid/PaddleCV/image_classification/fast_resnet/train.py +++ b/fluid/PaddleCV/image_classification/fast_resnet/train.py @@ -111,7 +111,7 @@ def linear_lr_decay(lr_values, epochs, bs_values, total_images): linear_epoch = end_epoch - start_epoch start_lr, end_lr = lr_values[idx] linear_lr = end_lr - start_lr - steps = last_steps + linear_epoch * total_images / bs_values[idx] + steps = last_steps + linear_epoch * total_images / bs_values[idx] + 1 with switch.case(global_step < steps): decayed_lr = start_lr + linear_lr * ((global_step - last_steps)* 1.0/(steps - last_steps)) last_steps = steps @@ -167,6 +167,7 @@ def linear_lr_decay_by_epoch(lr_values, epochs, bs_values, total_images): fluid.layers.tensor.assign(last_value_var, lr) return lr + def test_parallel(exe, test_args, args, test_prog, feeder, bs): acc_evaluators = [] for i in xrange(len(test_args[2])): @@ -234,12 +235,9 @@ def build_program(args, is_train, main_prog, startup_prog, py_reader_startup_pro name="train_reader_" + str(img_size) if is_train else "test_reader_" + str(img_size), use_double_buffer=True) input, label = fluid.layers.read_file(pyreader) - #pyreader.decorate_paddle_reader(paddle.batch(imagenet_reader.train(os.path.join(args.data_dir, trn_dir, "train")), batch_size=batch_size)) - #pyreader.decorate_paddle_reader(paddle.batch(dataloader.reader(), batch_size=batch_size)) else: input = fluid.layers.data(name="image", shape=[3, 244, 244], dtype="uint8") label = fluid.layers.data(name="label", shape=[1], dtype="int64") - #batched_reader = paddle.batch(dataloader.reader(), batch_size=batch_size) cast_img_type = "float16" if args.fp16 else "float32" cast = fluid.layers.cast(input, cast_img_type) img_mean = fluid.layers.create_global_var([3, 1, 1], 0.0, cast_img_type, name="img_mean", persistable=True) @@ -265,6 +263,7 @@ def build_program(args, is_train, main_prog, startup_prog, py_reader_startup_pro bs_epoch = [x if is_mp_mode() else x * get_device_num() for x in [224, 224, 96, 96, 50]] lrs = [(1.0, 2.0), (2.0, 0.25), (0.42857142857142855, 0.04285714285714286), (0.04285714285714286, 0.004285714285714286), (0.0022321428571428575, 0.00022321428571428573), 0.00022321428571428573] images_per_worker = args.total_images / get_device_num() if is_mp_mode() else args.total_images + optimizer = fluid.optimizer.Momentum( learning_rate=linear_lr_decay_by_epoch(lrs, epochs, bs_epoch, images_per_worker), momentum=0.9, @@ -330,11 +329,10 @@ def refresh_program(args, epoch, sz, trn_dir, bs, val_bs, need_update_start_prog else: var.get_tensor().set(np_tensor, place) - strategy = fluid.ExecutionStrategy() strategy.num_threads = args.num_threads strategy.allow_op_delay = False - strategy.num_iteration_per_drop_scope = 30 + strategy.num_iteration_per_drop_scope = 1 build_strategy = fluid.BuildStrategy() build_strategy.reduce_strategy = fluid.BuildStrategy().ReduceStrategy.AllReduce @@ -389,6 +387,7 @@ def train_parallel(args): train_dataloader.shuffle_seed = pass_id + 1 train_args[4].start() # start pyreader batch_time_start = time.time() + samples_per_step = bs if is_mp_mode() else bs * get_device_num() while True: fetch_list = [avg_loss.name] acc_name_list = [v.name for v in train_args[2]] @@ -412,13 +411,12 @@ def train_parallel(args): except fluid.core.EnforceNotMet as ex: traceback.print_exc() exit(1) - - num_samples += bs if is_mp_mode() else bs * get_device_num() + num_samples += samples_per_step if should_print: fetched_data = [np.mean(np.array(d)) for d in fetch_ret] - print("Pass %d, batch %d, loss %s, accucacys: %s, learning_rate %s, py_reader queue_size: %d, avg batch time: %0.2f " % - (pass_id, iters, fetched_data[0], fetched_data[1:-1], fetched_data[-1], train_args[4].queue.size(), (time.time() - batch_time_start) * 1.0 / bs )) + print("Pass %d, batch %d, loss %s, accucacys: %s, learning_rate %s, py_reader queue_size: %d, avg batch time: %0.4f secs" % + (pass_id, iters, fetched_data[0], fetched_data[1:-1], fetched_data[-1], train_args[4].queue.size(), (time.time() - batch_time_start) * 1.0 / args.log_period )) batch_time_start = time.time() iters += 1