From 8d14b3953f50f1a311d9b02d8e24e616f485e085 Mon Sep 17 00:00:00 2001 From: "yi.wu" Date: Wed, 6 Jun 2018 13:02:29 +0800 Subject: [PATCH] follow comments --- benchmark/fluid/fluid_benchmark.py | 33 ++++++++++++++----- benchmark/fluid/models/machine_translation.py | 2 +- benchmark/fluid/models/mnist.py | 2 +- benchmark/fluid/models/resnet.py | 2 +- .../fluid/models/stacked_dynamic_lstm.py | 2 +- benchmark/fluid/models/vgg.py | 2 +- 6 files changed, 30 insertions(+), 13 deletions(-) diff --git a/benchmark/fluid/fluid_benchmark.py b/benchmark/fluid/fluid_benchmark.py index 399f5fb49c1..c2771ba5db1 100644 --- a/benchmark/fluid/fluid_benchmark.py +++ b/benchmark/fluid/fluid_benchmark.py @@ -38,7 +38,10 @@ def parse_args(): default='resnet', help='The model to run benchmark with.') parser.add_argument( - '--batch_size', type=int, default=32, help='The minibatch size.') + '--batch_size', + type=int, + default=32, + help='The batch size on each gpu.') parser.add_argument( '--learning_rate', type=float, default=0.001, help='The learning rate.') parser.add_argument( @@ -229,27 +232,35 @@ def train(avg_loss, infer_prog, optimizer, train_reader, test_reader, batch_acc, iters, num_samples, start_time = 0, 0, time.time() for pass_id in range(args.pass_num): train_losses = [] - reader_generator = train_reader() + if not args.use_reader_op: + reader_generator = train_reader() batch_id = 0 data = None while True: if not args.use_reader_op: data = next(reader_generator, None) - if iters == args.iterations or data == None: + if data == None: + break + if iters == args.iterations: break if iters == args.skip_batch_num: start_time = time.time() num_samples = 0 if args.use_reader_op: - loss = exe.run(train_prog, fetch_list=[avg_loss]) + try: + loss = exe.run(train_prog, fetch_list=[avg_loss]) + except fluid.core.EnforceNotMet as ex: + break else: loss = exe.run(train_prog, feed=feeder.feed(data), fetch_list=[avg_loss]) iters += 1 batch_id += 1 - # FIXME(wuyi): last batch size maybe different + # FIXME(wuyi): For use_reader_op, if the current + # pass is not the last, the last batch of this pass + # is also equal to args.batch_size. num_samples += len(args.batch_size) train_losses.append(loss) print("Pass: %d, Iter: %d, Loss: %f\n" % @@ -315,13 +326,16 @@ def train_parallel(avg_loss, infer_prog, optimizer, train_reader, test_reader, num_samples = 0 iters = 0 start_time = time.time() - reader_generator = train_reader() + if not args.use_reader_op: + reader_generator = train_reader() batch_id = 0 data = None while True: if not args.use_reader_op: data = next(reader_generator, None) - if iters == args.iterations or data == None: + if data == None: + break + if iters == args.iterations: break if args.profile and pass_id == 0 and batch_id == 5: profiler.start_profiler("All") @@ -335,7 +349,10 @@ def train_parallel(avg_loss, infer_prog, optimizer, train_reader, test_reader, if args.use_reader_op and iters >= args.iterations / args.gpus: break if args.use_fake_data or args.use_reader_op: - loss, = exe.run([avg_loss.name]) + try: + loss, = exe.run([avg_loss.name]) + except fluid.core.EnforceNotMet as ex: + break else: loss, = exe.run([avg_loss.name], feed=feeder.feed(data)) if args.update_method == "pserver": diff --git a/benchmark/fluid/models/machine_translation.py b/benchmark/fluid/models/machine_translation.py index 3024882725d..69541adf6b7 100644 --- a/benchmark/fluid/models/machine_translation.py +++ b/benchmark/fluid/models/machine_translation.py @@ -223,7 +223,7 @@ def get_model(args): train_batch_generator = paddle.batch( paddle.reader.shuffle( paddle.dataset.wmt14.train(dict_size), buf_size=1000), - batch_size=args.batch_size) + batch_size=args.batch_size * args.gpus) test_batch_generator = paddle.batch( paddle.reader.shuffle( diff --git a/benchmark/fluid/models/mnist.py b/benchmark/fluid/models/mnist.py index 5d3da68dafa..54206c252ca 100644 --- a/benchmark/fluid/models/mnist.py +++ b/benchmark/fluid/models/mnist.py @@ -103,7 +103,7 @@ def get_model(args): # Reader train_reader = paddle.batch( - paddle.dataset.mnist.train(), batch_size=args.batch_size) + paddle.dataset.mnist.train(), batch_size=args.batch_size * args.gpus) test_reader = paddle.batch( paddle.dataset.mnist.test(), batch_size=args.batch_size) return avg_cost, inference_program, opt, train_reader, test_reader, batch_acc diff --git a/benchmark/fluid/models/resnet.py b/benchmark/fluid/models/resnet.py index 47d8d026edc..3c87076724b 100644 --- a/benchmark/fluid/models/resnet.py +++ b/benchmark/fluid/models/resnet.py @@ -184,7 +184,7 @@ def get_model(args): batched_train_reader = paddle.batch( paddle.reader.shuffle( train_reader, buf_size=5120), - batch_size=args.batch_size) + batch_size=args.batch_size * args.gpus) batched_test_reader = paddle.batch(train_reader, batch_size=args.batch_size) return avg_cost, inference_program, optimizer, batched_train_reader, batched_test_reader, batch_acc diff --git a/benchmark/fluid/models/stacked_dynamic_lstm.py b/benchmark/fluid/models/stacked_dynamic_lstm.py index e2a8cf45ac3..211869af4e8 100644 --- a/benchmark/fluid/models/stacked_dynamic_lstm.py +++ b/benchmark/fluid/models/stacked_dynamic_lstm.py @@ -118,7 +118,7 @@ def get_model(args): train_reader = batch( paddle.reader.shuffle( crop_sentence(imdb.train(word_dict), crop_size), buf_size=25000), - batch_size=args.batch_size) + batch_size=args.batch_size * args.gpus) test_reader = batch( paddle.reader.shuffle( crop_sentence(imdb.test(word_dict), crop_size), buf_size=25000), diff --git a/benchmark/fluid/models/vgg.py b/benchmark/fluid/models/vgg.py index b84e118a88e..cb0dc977634 100644 --- a/benchmark/fluid/models/vgg.py +++ b/benchmark/fluid/models/vgg.py @@ -110,7 +110,7 @@ def get_model(args): paddle.dataset.cifar.train10() if args.data_set == 'cifar10' else paddle.dataset.flowers.train(), buf_size=5120), - batch_size=args.batch_size) + batch_size=args.batch_size * args.gpus) test_reader = paddle.batch( paddle.dataset.cifar.test10() if args.data_set == 'cifar10' else paddle.dataset.flowers.test(), -- GitLab