From 5e363043f6bc2000cf470401f881ca3cbe8a9f44 Mon Sep 17 00:00:00 2001 From: zhouzj <41366441+zzjjay@users.noreply.github.com> Date: Tue, 22 Jun 2021 17:59:00 +0800 Subject: [PATCH] adjust batch size (#812) --- demo/distillation/distill.py | 23 ++++++++--------------- demo/dygraph/quant/train.py | 2 +- 2 files changed, 9 insertions(+), 16 deletions(-) diff --git a/demo/distillation/distill.py b/demo/distillation/distill.py index d7417470..5b362927 100644 --- a/demo/distillation/distill.py +++ b/demo/distillation/distill.py @@ -22,7 +22,7 @@ _logger.setLevel(logging.INFO) parser = argparse.ArgumentParser(description=__doc__) add_arg = functools.partial(add_arguments, argparser=parser) # yapf: disable -add_arg('batch_size', int, 64, "Minibatch size.") +add_arg('batch_size', int, 256, "Minibatch size.") add_arg('use_gpu', bool, True, "Whether to use GPU or not.") add_arg('save_inference', bool, False, "Whether to save inference model.") add_arg('total_images', int, 1281167, "Training image number.") @@ -45,12 +45,7 @@ model_list = [m for m in dir(models) if "__" not in m] def piecewise_decay(args): - if args.use_gpu: - devices_num = paddle.fluid.core.get_cuda_device_count() - else: - devices_num = int(os.environ.get('CPU_NUM', 1)) - step = int( - math.ceil(float(args.total_images) / args.batch_size) / devices_num) + step = int(math.ceil(float(args.total_images) / args.batch_size)) bd = [step * e for e in args.step_epochs] lr = [args.lr * (0.1**i) for i in range(len(bd) + 1)] learning_rate = paddle.optimizer.lr.PiecewiseDecay( @@ -63,12 +58,7 @@ def piecewise_decay(args): def cosine_decay(args): - if args.use_gpu: - devices_num = paddle.fluid.core.get_cuda_device_count() - else: - devices_num = int(os.environ.get('CPU_NUM', 1)) - step = int( - math.ceil(float(args.total_images) / args.batch_size) / devices_num) + step = int(math.ceil(float(args.total_images) / args.batch_size)) learning_rate = paddle.optimizer.lr.CosineAnnealingDecay( learning_rate=args.lr, T_max=step * args.num_epochs, verbose=False) optimizer = paddle.optimizer.Momentum( @@ -108,7 +98,10 @@ def compress(args): places = paddle.static.cuda_places( ) if args.use_gpu else paddle.static.cpu_places() place = places[0] - + if args.use_gpu: + devices_num = paddle.fluid.core.get_cuda_device_count() + else: + devices_num = int(os.environ.get('CPU_NUM', 1)) with paddle.static.program_guard(student_program, s_startup): with paddle.fluid.unique_name.guard(): image = paddle.static.data( @@ -120,7 +113,7 @@ def compress(args): places=places, feed_list=[image, label], drop_last=True, - batch_size=args.batch_size, + batch_size=int(args.batch_size / devices_num), return_list=False, shuffle=True, use_shared_memory=True, diff --git a/demo/dygraph/quant/train.py b/demo/dygraph/quant/train.py index a9853653..55188e04 100644 --- a/demo/dygraph/quant/train.py +++ b/demo/dygraph/quant/train.py @@ -44,7 +44,7 @@ _logger = get_logger(__name__, level=logging.INFO) parser = argparse.ArgumentParser(description=__doc__) add_arg = functools.partial(add_arguments, argparser=parser) # yapf: disable -add_arg('batch_size', int, 256, "Single Card Minibatch size.") +add_arg('batch_size', int, 64, "Single Card Minibatch size.") add_arg('use_gpu', bool, True, "Whether to use GPU or not.") add_arg('model', str, "mobilenet_v3", "The target model.") add_arg('pretrained_model', str, "MobileNetV3_large_x1_0_ssld_pretrained", "Whether to use pretrained model.") -- GitLab