add time computation

4e55315e · chenguowei01 · dbad6f4c · 4e55315e · 4e55315e · 4e55315e
隐藏空白更改
内联并排

Showing with 87 addition and 4 deletion

dygraph/train.py dygraph/train.py +26 -4

dygraph/utils/__init__.py dygraph/utils/__init__.py +1 -0

dygraph/utils/timer.py dygraph/utils/timer.py +60 -0

未找到文件。
--- a/dygraph/train.py
+++ b/dygraph/train.py
@@ -14,6 +14,7 @@
 import argparse
 import os
+import time
 import paddle.fluid as fluid
 from paddle.fluid.dygraph.parallel import ParallelEnv
@@ -27,6 +28,7 @@ import utils.logging as logging
 from utils import get_environ_info
 from utils import load_pretrained_model
 from utils import resume
+from utils import Timer, calculate_eta
 from val import evaluate
@@ -111,6 +113,12 @@ def parse_args():
        dest='do_eval',
        help='Eval while training',
        action='store_true')
+    parser.add_argument(
+        '--log_steps',
+        dest='log_steps',
+        help='Display logging information at every log_steps',
+        default=10,
+        type=int)
    return parser.parse_args()
@@ -126,6 +134,7 @@ def train(model,
          pretrained_model=None,
          resume_model=None,
          save_interval_epochs=1,
+          log_steps=10,
          num_classes=None,
          num_workers=8):
    ignore_index = model.ignore_index
@@ -156,6 +165,10 @@ def train(model,
        return_list=True,
    )
+    timer = Timer()
+    timer.start()
+    steps_per_epoch = len(batch_sampler)
+    avg_loss = 0.0
    for epoch in range(start_epoch, num_epochs):
        for step, data in enumerate(loader):
            images = data[0]
@@ -170,11 +183,19 @@ def train(model,
                loss.backward()
            optimizer.minimize(loss)
            model.clear_gradients()
+            avg_loss += loss.numpy()
            lr = optimizer.current_step_lr()
-            logging.info(
+            if step % log_steps == 0:
-                "[TRAIN] Epoch={}/{}, Step={}/{}, loss={}, lr={}".format(
+                avg_loss /= log_steps
-                    epoch + 1, num_epochs, step + 1, len(batch_sampler),
+                time_step = timer.elapsed_time() / log_steps
-                    loss.numpy(), lr))
+                remain_step = (num_epochs - epoch) * steps_per_epoch - step + 1
+                logging.info(
+                    "[TRAIN] Epoch={}/{}, Step={}/{}, loss={:.4f}, lr={:.6f}, sec/step={:.4f} | ETA {}"
+                    .format(epoch + 1, num_epochs, step + 1, steps_per_epoch,
+                            avg_loss, lr, time_step,
+                            calculate_eta(remain_step, time_step)))
+                avg_loss = 0.0
+                timer.restart()
        if ((epoch + 1) % save_interval_epochs == 0
                or epoch == num_epochs - 1) and ParallelEnv().local_rank == 0:
@@ -260,6 +281,7 @@ def main(args):
            pretrained_model=args.pretrained_model,
            resume_model=args.resume_model,
            save_interval_epochs=args.save_interval_epochs,
+            log_steps=args.log_steps,
            num_classes=train_dataset.num_classes,
            num_workers=args.num_workers)

--- a/dygraph/utils/__init__.py
+++ b/dygraph/utils/__init__.py
@@ -16,3 +16,4 @@ from . import logging
 from . import download
 from .metrics import ConfusionMatrix
 from .utils import *
+from .timer import Timer, calculate_eta
--- a/dygraph/utils/timer.py
+++ b/dygraph/utils/timer.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import time
+class Timer(object):
+    """ Simple timer class for measuring time consuming """
+    def __init__(self):
+        self._start_time = 0.0
+        self._end_time = 0.0
+        self._elapsed_time = 0.0
+        self._is_running = False
+    def start(self):
+        self._is_running = True
+        self._start_time = time.time()
+    def restart(self):
+        self.start()
+    def stop(self):
+        self._is_running = False
+        self._end_time = time.time()
+    def elapsed_time(self):
+        self._end_time = time.time()
+        self._elapsed_time = self._end_time - self._start_time
+        if not self.is_running:
+            return 0.0
+        return self._elapsed_time
+    @property
+    def is_running(self):
+        return self._is_running
+def calculate_eta(remaining_step, speed):
+    if remaining_step < 0:
+        remaining_step = 0
+    remaining_time = int(remaining_step * speed)
+    result = "{:0>2}:{:0>2}:{:0>2}"
+    arr = []
+    for i in range(2, -1, -1):
+        arr.append(int(remaining_time / 60**i))
+        remaining_time %= 60**i
+    return result.format(*arr)