From 40950addd809c420ac2a07d9d0fcac80841f515c Mon Sep 17 00:00:00 2001 From: chenguowei01 Date: Fri, 19 Jun 2020 16:47:51 +0800 Subject: [PATCH] update train.py --- dygraph/train.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/dygraph/train.py b/dygraph/train.py index 88b1ccb6..24ddbcc3 100644 --- a/dygraph/train.py +++ b/dygraph/train.py @@ -144,8 +144,6 @@ def train(model, return_list=True, ) - num_steps_each_epoch = len(train_dataset) // batch_size - for epoch in range(num_epochs): for step, data in enumerate(loader): images = data[0] @@ -165,8 +163,7 @@ def train(model, loss.numpy())) if ((epoch + 1) % save_interval_epochs == 0 - or num_steps_each_epoch == num_epochs - 1 - ) and ParallelEnv().local_rank == 0: + or epoch == num_epochs - 1) and ParallelEnv().local_rank == 0: current_save_dir = os.path.join(save_dir, "epoch_{}".format(epoch + 1)) if not os.path.isdir(current_save_dir): @@ -223,7 +220,10 @@ def main(args): num_classes=train_dataset.num_classes, ignore_index=255) # Creat optimizer - num_steps_each_epoch = len(train_dataset) // args.batch_size + # todo, may less one than len(loader) + num_steps_each_epoch = len(train_dataset) // ( + args.batch_size * ParallelEnv().nranks) + print(num_steps_each_epoch, 'num_steps_each_epoch') decay_step = args.num_epochs * num_steps_each_epoch lr_decay = fluid.layers.polynomial_decay( args.learning_rate, decay_step, end_learning_rate=0, power=0.9) -- GitLab