From 56cfb006be5c3b8048336d6bd25662630d2e0537 Mon Sep 17 00:00:00 2001 From: suytingwan Date: Mon, 11 May 2020 17:03:41 +0800 Subject: [PATCH] metric learning one_hot api usage update according to paddle 1.8 (#4596) * test=develop update one_hot usage according to 1.8 * test=develop update dataloader api by 1.8 * test=develop fix image shape --- PaddleCV/metric_learning/eval.py | 24 ++- PaddleCV/metric_learning/infer.py | 19 +- .../metric_learning/losses/arcmarginloss.py | 3 +- PaddleCV/metric_learning/reader.py | 3 +- PaddleCV/metric_learning/train_elem.py | 173 ++++++++++-------- PaddleCV/metric_learning/train_pair.py | 149 ++++++++------- 6 files changed, 207 insertions(+), 164 deletions(-) diff --git a/PaddleCV/metric_learning/eval.py b/PaddleCV/metric_learning/eval.py index 79572ff9..a11e6bc1 100644 --- a/PaddleCV/metric_learning/eval.py +++ b/PaddleCV/metric_learning/eval.py @@ -52,8 +52,13 @@ def eval(args): assert model_name in model_list, "{} is not in lists: {}".format(args.model, model_list) - image = fluid.layers.data(name='image', shape=image_shape, dtype='float32') - label = fluid.layers.data(name='label', shape=[1], dtype='int64') + image = fluid.layers.data(name='image', shape=[None] + image_shape, dtype='float32') + label = fluid.layers.data(name='label', shape=[None, 1], dtype='int64') + test_loader = fluid.io.DataLoader.from_generator( + feed_list=[image, label], + capacity=64, + use_double_buffer=True, + iterable=True) # model definition model = models.__dict__[model_name]() @@ -72,16 +77,21 @@ def eval(args): fluid.io.load_vars(exe, pretrained_model, predicate=if_exist) - test_reader = paddle.batch(reader.test(args), batch_size=args.batch_size, drop_last=False) - feeder = fluid.DataFeeder(place=place, feed_list=[image, label]) + test_loader.set_sample_generator( + reader.test(args), + batch_size=args.batch_size, + drop_last=False, + places=place) fetch_list = [out.name] f, l = [], [] - for batch_id, data in enumerate(test_reader()): + for batch_id, data in enumerate(test_loader()): t1 = time.time() - [feas] = exe.run(test_program, fetch_list=fetch_list, feed=feeder.feed(data)) - label = np.asarray([x[1] for x in data]) + [feas] = exe.run(test_program, fetch_list=fetch_list, feed=data) + label = np.asarray(data[0]['label']) + label = np.squeeze(label) + f.append(feas) l.append(label) diff --git a/PaddleCV/metric_learning/infer.py b/PaddleCV/metric_learning/infer.py index e36382a9..4710cfc8 100644 --- a/PaddleCV/metric_learning/infer.py +++ b/PaddleCV/metric_learning/infer.py @@ -51,7 +51,13 @@ def infer(args): assert model_name in model_list, "{} is not in lists: {}".format(args.model, model_list) - image = fluid.layers.data(name='image', shape=image_shape, dtype='float32') + image = fluid.layers.data(name='image', shape=[None] + image_shape, dtype='float32') + + infer_loader = fluid.io.DataLoader.from_generator( + feed_list=[image], + capacity=64, + use_double_buffer=True, + iterable=True) # model definition model = models.__dict__[model_name]() @@ -70,13 +76,16 @@ def infer(args): fluid.io.load_vars(exe, pretrained_model, predicate=if_exist) - infer_reader = paddle.batch(reader.infer(args), batch_size=args.batch_size, drop_last=False) - feeder = fluid.DataFeeder(place=place, feed_list=[image]) + infer_loader.set_sample_generator( + reader.test(args), + batch_size=args.batch_size, + drop_last=False, + places=place) fetch_list = [out.name] - for batch_id, data in enumerate(infer_reader()): - result = exe.run(test_program, fetch_list=fetch_list, feed=feeder.feed(data)) + for batch_id, data in enumerate(infer_loader()): + result = exe.run(test_program, fetch_list=fetch_list, feed=data) result = result[0][0].reshape(-1) print("Test-{0}-feature: {1}".format(batch_id, result[:5])) sys.stdout.flush() diff --git a/PaddleCV/metric_learning/losses/arcmarginloss.py b/PaddleCV/metric_learning/losses/arcmarginloss.py index c765c039..a62d3d76 100644 --- a/PaddleCV/metric_learning/losses/arcmarginloss.py +++ b/PaddleCV/metric_learning/losses/arcmarginloss.py @@ -61,7 +61,8 @@ class ArcMarginLoss(): else: phi = self.paddle_where_more_than(cosine, th, phi, cosine-mm) - one_hot = fluid.layers.one_hot(input=label, depth=out_dim) + one_hot = fluid.one_hot(input=label, depth=out_dim) + one_hot = fluid.layers.squeeze(input=one_hot, axes=[1]) output = fluid.layers.elementwise_mul(one_hot, phi) + fluid.layers.elementwise_mul((1.0 - one_hot), cosine) output = output * s return output diff --git a/PaddleCV/metric_learning/reader.py b/PaddleCV/metric_learning/reader.py index 3f1f839d..81778ea6 100644 --- a/PaddleCV/metric_learning/reader.py +++ b/PaddleCV/metric_learning/reader.py @@ -22,6 +22,7 @@ import functools import numpy as np import paddle from imgtool import process_image +import paddle.fluid as fluid random.seed(0) @@ -187,7 +188,7 @@ def createreader(settings, mode): keep_order = False if mode != 'train' or settings.loss_name in ['softmax', 'arcmargin'] else True image_mapper = functools.partial(process_image, mode=mode, color_jitter=False, rotate=False, crop_size=image_size) - reader = paddle.reader.xmap_readers( + reader = fluid.io.xmap_readers( image_mapper, metric_reader, 8, 1000, order=keep_order) return reader diff --git a/PaddleCV/metric_learning/train_elem.py b/PaddleCV/metric_learning/train_elem.py index 83d93e3e..a06a3490 100644 --- a/PaddleCV/metric_learning/train_elem.py +++ b/PaddleCV/metric_learning/train_elem.py @@ -107,19 +107,16 @@ def build_program(is_train, main_prog, startup_prog, args): image_shape = [int(m) for m in args.image_shape.split(",")] model = models.__dict__[args.model]() with fluid.program_guard(main_prog, startup_prog): - if is_train: - queue_capacity = 64 - py_reader = fluid.layers.py_reader( + queue_capacity = 64 + image = fluid.layers.data( + name='image', shape=[None] + image_shape, dtype='float32') + label = fluid.layers.data( + name='label', shape=[None, 1], dtype='int64') + loader = fluid.io.DataLoader.from_generator( + feed_list=[image, label], capacity=queue_capacity, - shapes=[[-1] + image_shape, [-1, 1]], - lod_levels=[0, 0], - dtypes=["float32", "int64"], - use_double_buffer=True) - image, label = fluid.layers.read_file(py_reader) - else: - image = fluid.layers.data( - name='image', shape=image_shape, dtype='float32') - label = fluid.layers.data(name='label', shape=[1], dtype='int64') + use_double_buffer=True, + iterable=True) with fluid.unique_name.guard(): avg_cost, acc_top1, acc_top5, out = net_config(image, label, model, @@ -137,9 +134,9 @@ def build_program(is_train, main_prog, startup_prog, args): main_prog = main_prog.clone(for_test=True) """ if is_train: - return py_reader, avg_cost, acc_top1, acc_top5, global_lr + return loader, avg_cost, acc_top1, acc_top5, global_lr else: - return out, image, label + return loader, out def train_async(args): @@ -163,12 +160,12 @@ def train_async(args): train_prog.random_seed = 1000 tmp_prog.random_seed = 1000 - train_py_reader, train_cost, train_acc1, train_acc5, global_lr = build_program( + train_loader, train_cost, train_acc1, train_acc5, global_lr = build_program( is_train=True, main_prog=train_prog, startup_prog=startup_prog, args=args) - test_feas, image, label = build_program( + test_loader, test_feas = build_program( is_train=False, main_prog=tmp_prog, startup_prog=startup_prog, @@ -182,6 +179,11 @@ def train_async(args): place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) + num_trainers = int(os.environ.get('PADDLE_TRAINERS_NUM', 1)) + if num_trainers <= 1 and args.use_gpu: + places = fluid.framework.cuda_places() + else: + places = place exe.run(startup_prog) @@ -206,12 +208,17 @@ def train_async(args): train_batch_size = args.train_batch_size // devicenum test_batch_size = args.test_batch_size - train_reader = paddle.batch( - reader.train(args), batch_size=train_batch_size, drop_last=True) - test_reader = paddle.batch( - reader.test(args), batch_size=test_batch_size, drop_last=False) - test_feeder = fluid.DataFeeder(place=place, feed_list=[image, label]) - train_py_reader.decorate_paddle_reader(train_reader) + train_loader.set_sample_generator( + reader.train(args), + batch_size=train_batch_size, + drop_last=True, + places=places) + + test_loader.set_sample_generator( + reader.test(args), + batch_size=test_batch_size, + drop_last=False, + places=place) train_exe = fluid.ParallelExecutor( main_program=train_prog, @@ -219,72 +226,76 @@ def train_async(args): loss_name=train_cost.name) totalruntime = 0 - train_py_reader.start() iter_no = 0 train_info = [0, 0, 0, 0] while iter_no <= args.total_iter_num: - t1 = time.time() - lr, loss, acc1, acc5 = train_exe.run(fetch_list=train_fetch_list) - t2 = time.time() - period = t2 - t1 - lr = np.mean(np.array(lr)) - train_info[0] += np.mean(np.array(loss)) - train_info[1] += np.mean(np.array(acc1)) - train_info[2] += np.mean(np.array(acc5)) - train_info[3] += 1 - if iter_no % args.display_iter_step == 0: - avgruntime = totalruntime / args.display_iter_step - avg_loss = train_info[0] / train_info[3] - avg_acc1 = train_info[1] / train_info[3] - avg_acc5 = train_info[2] / train_info[3] - print("[%s] trainbatch %d, lr %.6f, loss %.6f, "\ + for train_batch in train_loader(): + t1 = time.time() + lr, loss, acc1, acc5 = train_exe.run( + feed=train_batch, + fetch_list=train_fetch_list) + t2 = time.time() + period = t2 - t1 + lr = np.mean(np.array(lr)) + train_info[0] += np.mean(np.array(loss)) + train_info[1] += np.mean(np.array(acc1)) + train_info[2] += np.mean(np.array(acc5)) + train_info[3] += 1 + if iter_no % args.display_iter_step == 0: + avgruntime = totalruntime / args.display_iter_step + avg_loss = train_info[0] / train_info[3] + avg_acc1 = train_info[1] / train_info[3] + avg_acc5 = train_info[2] / train_info[3] + print("[%s] trainbatch %d, lr %.6f, loss %.6f, "\ "acc1 %.4f, acc5 %.4f, time %2.2f sec" % \ (fmt_time(), iter_no, lr, avg_loss, avg_acc1, avg_acc5, avgruntime)) - sys.stdout.flush() - totalruntime = 0 - if iter_no % 1000 == 0: - train_info = [0, 0, 0, 0] - - totalruntime += period - - if iter_no % args.test_iter_step == 0 and iter_no != 0: - f, l = [], [] - for batch_id, data in enumerate(test_reader()): - t1 = time.time() - [feas] = exe.run(test_prog, - fetch_list=test_fetch_list, - feed=test_feeder.feed(data)) - label = np.asarray([x[1] for x in data]) - f.append(feas) - l.append(label) - - t2 = time.time() - period = t2 - t1 - if batch_id % 20 == 0: - print("[%s] testbatch %d, time %2.2f sec" % \ + sys.stdout.flush() + totalruntime = 0 + if iter_no % 1000 == 0: + train_info = [0, 0, 0, 0] + + totalruntime += period + + if iter_no % args.test_iter_step == 0 and iter_no != 0: + f, l = [], [] + for batch_id, test_batch in enumerate(test_loader()): + t1 = time.time() + [feas] = exe.run(test_prog, + feed=test_batch, + fetch_list=test_fetch_list) + + label = np.asarray(test_batch[0]['label']) + label = np.squeeze(label) + f.append(feas) + l.append(label) + + t2 = time.time() + period = t2 - t1 + if batch_id % 20 == 0: + print("[%s] testbatch %d, time %2.2f sec" % \ (fmt_time(), batch_id, period)) - f = np.vstack(f) - l = np.hstack(l) - recall = recall_topk(f, l, k=1) - print("[%s] test_img_num %d, trainbatch %d, test_recall %.5f" % \ - (fmt_time(), len(f), iter_no, recall)) - sys.stdout.flush() - - if iter_no % args.save_iter_step == 0 and iter_no != 0: - model_path = os.path.join(model_save_dir + '/' + model_name, - str(iter_no)) - if not os.path.isdir(model_path): - os.makedirs(model_path) - fluid.io.save_persistables(exe, model_path, main_program=train_prog) - - iter_no += 1 - - # This is for continuous evaluation only - if args.enable_ce: - # Use the mean cost/acc for training - print("kpis\ttrain_cost\t{}".format(avg_loss)) - print("kpis\ttest_recall\t{}".format(recall)) + f = np.vstack(f) + l = np.hstack(l) + recall = recall_topk(f, l, k=1) + print("[%s] test_img_num %d, trainbatch %d, test_recall %.5f" % \ + (fmt_time(), len(f), iter_no, recall)) + sys.stdout.flush() + + if iter_no % args.save_iter_step == 0 and iter_no != 0: + model_path = os.path.join(model_save_dir + '/' + model_name, + str(iter_no)) + if not os.path.isdir(model_path): + os.makedirs(model_path) + fluid.io.save_persistables(exe, model_path, main_program=train_prog) + + iter_no += 1 + + # This is for continuous evaluation only + if args.enable_ce: + # Use the mean cost/acc for training + print("kpis\ttrain_cost\t{}".format(avg_loss)) + print("kpis\ttest_recall\t{}".format(recall)) def initlogging(): diff --git a/PaddleCV/metric_learning/train_pair.py b/PaddleCV/metric_learning/train_pair.py index 599f0322..9bc2b5be 100644 --- a/PaddleCV/metric_learning/train_pair.py +++ b/PaddleCV/metric_learning/train_pair.py @@ -114,19 +114,16 @@ def build_program(is_train, main_prog, startup_prog, args): image_shape = [int(m) for m in args.image_shape.split(",")] model = models.__dict__[args.model]() with fluid.program_guard(main_prog, startup_prog): - if is_train: - queue_capacity = 64 - py_reader = fluid.layers.py_reader( + queue_capacity = 64 + image = fluid.layers.data( + name='image', shape=[None] + image_shape, dtype='float32') + label = fluid.layers.data( + name='label', shape=[None, 1], dtype='int64') + loader = fluid.io.DataLoader.from_generator( + feed_list=[image, label], capacity=queue_capacity, - shapes=[[-1] + image_shape, [-1, 1]], - lod_levels=[0, 0], - dtypes=["float32", "int64"], - use_double_buffer=True) - image, label = fluid.layers.read_file(py_reader) - else: - image = fluid.layers.data( - name='image', shape=image_shape, dtype='float32') - label = fluid.layers.data(name='label', shape=[1], dtype='int64') + use_double_buffer=True, + iterable=True) with fluid.unique_name.guard(): avg_cost, out = net_config(image, label, model, args, is_train) @@ -143,9 +140,9 @@ def build_program(is_train, main_prog, startup_prog, args): main_prog = main_prog.clone(for_test=True) """ if is_train: - return py_reader, avg_cost, global_lr, out, label + return loader, avg_cost, global_lr, out, label else: - return out, image, label + return loader, out def train_async(args): @@ -161,12 +158,12 @@ def train_async(args): train_prog = fluid.Program() tmp_prog = fluid.Program() - train_py_reader, train_cost, global_lr, train_feas, train_label = build_program( + train_loader, train_cost, global_lr, train_feas, train_label = build_program( is_train=True, main_prog=train_prog, startup_prog=startup_prog, args=args) - test_feas, image, label = build_program( + test_loader, test_feas = build_program( is_train=False, main_prog=tmp_prog, startup_prog=startup_prog, @@ -180,6 +177,11 @@ def train_async(args): place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) + num_trainers = int(os.environ.get('PADDLE_TRAINERS_NUM', 1)) + if num_trainers <= 1 and args.use_gpu: + places = fluid.framework.cuda_places() + else: + places = place exe.run(startup_prog) @@ -204,12 +206,17 @@ def train_async(args): train_batch_size = args.train_batch_size / devicenum test_batch_size = args.test_batch_size - train_reader = paddle.batch( - reader.train(args), batch_size=train_batch_size, drop_last=True) - test_reader = paddle.batch( - reader.test(args), batch_size=test_batch_size, drop_last=False) - test_feeder = fluid.DataFeeder(place=place, feed_list=[image, label]) - train_py_reader.decorate_paddle_reader(train_reader) + train_loader.set_sample_generator( + reader.train(args), + batch_size=train_batch_size, + drop_last=True, + places=places) + + test_loader.set_sample_generator( + reader.test(args), + batch_size=test_batch_size, + drop_last=False, + places=place) train_exe = fluid.ParallelExecutor( main_program=train_prog, @@ -217,64 +224,68 @@ def train_async(args): loss_name=train_cost.name) totalruntime = 0 - train_py_reader.start() iter_no = 0 train_info = [0, 0, 0] while iter_no <= args.total_iter_num: - t1 = time.time() - lr, loss, feas, label = train_exe.run(fetch_list=train_fetch_list) - t2 = time.time() - period = t2 - t1 - lr = np.mean(np.array(lr)) - train_info[0] += np.mean(np.array(loss)) - train_info[1] += recall_topk(feas, label, k=1) - train_info[2] += 1 - if iter_no % args.display_iter_step == 0: - avgruntime = totalruntime / args.display_iter_step - avg_loss = train_info[0] / train_info[2] - avg_recall = train_info[1] / train_info[2] - print("[%s] trainbatch %d, lr %.6f, loss %.6f, "\ + for train_batch in train_loader(): + t1 = time.time() + lr, loss, feas, label = train_exe.run( + feed=train_batch, + fetch_list=train_fetch_list) + t2 = time.time() + period = t2 - t1 + lr = np.mean(np.array(lr)) + train_info[0] += np.mean(np.array(loss)) + train_info[1] += recall_topk(feas, label, k=1) + train_info[2] += 1 + if iter_no % args.display_iter_step == 0: + avgruntime = totalruntime / args.display_iter_step + avg_loss = train_info[0] / train_info[2] + avg_recall = train_info[1] / train_info[2] + print("[%s] trainbatch %d, lr %.6f, loss %.6f, "\ "recall %.4f, time %2.2f sec" % \ (fmt_time(), iter_no, lr, avg_loss, avg_recall, avgruntime)) - sys.stdout.flush() - totalruntime = 0 - if iter_no % 1000 == 0: - train_info = [0, 0, 0] - - totalruntime += period - - if iter_no % args.test_iter_step == 0 and iter_no != 0: - f, l = [], [] - for batch_id, data in enumerate(test_reader()): - t1 = time.time() - [feas] = exe.run(test_prog, - fetch_list=test_fetch_list, - feed=test_feeder.feed(data)) - label = np.asarray([x[1] for x in data]) - f.append(feas) - l.append(label) - - t2 = time.time() - period = t2 - t1 - if batch_id % 20 == 0: - print("[%s] testbatch %d, time %2.2f sec" % \ + sys.stdout.flush() + totalruntime = 0 + if iter_no % 1000 == 0: + train_info = [0, 0, 0] + + totalruntime += period + + if iter_no % args.test_iter_step == 0 and iter_no != 0: + f, l = [], [] + for batch_id, test_batch in enumerate(test_loader()): + t1 = time.time() + [feas] = exe.run(test_prog, + feed=test_batch, + fetch_list=test_fetch_list) + + label = np.asarray(test_batch[0]['label']) + label = np.squeeze(label) + f.append(feas) + l.append(label) + + t2 = time.time() + period = t2 - t1 + if batch_id % 20 == 0: + print("[%s] testbatch %d, time %2.2f sec" % \ (fmt_time(), batch_id, period)) - f = np.vstack(f) - l = np.hstack(l) - recall = recall_topk(f, l, k=1) - print("[%s] test_img_num %d, trainbatch %d, test_recall %.5f" % \ + f = np.vstack(f) + l = np.hstack(l) + recall = recall_topk(f, l, k=1) + print("[%s] test_img_num %d, trainbatch %d, test_recall %.5f" % \ (fmt_time(), len(f), iter_no, recall)) - sys.stdout.flush() + sys.stdout.flush() - if iter_no % args.save_iter_step == 0 and iter_no != 0: - model_path = os.path.join(model_save_dir + '/' + model_name, + if iter_no % args.save_iter_step == 0 and iter_no != 0: + model_path = os.path.join(model_save_dir + '/' + model_name, str(iter_no)) - if not os.path.isdir(model_path): - os.makedirs(model_path) - fluid.io.save_persistables(exe, model_path, main_program=train_prog) + if not os.path.isdir(model_path): + os.makedirs(model_path) + fluid.io.save_persistables(exe, model_path, main_program=train_prog) - iter_no += 1 + iter_no += 1 def initlogging(): -- GitLab