提交 d3183617 编写于 作者: C chenguowei01

add time computation

上级 dbad6f4c
......@@ -14,6 +14,7 @@
import argparse
import os
import time
import paddle.fluid as fluid
from paddle.fluid.dygraph.parallel import ParallelEnv
......@@ -27,6 +28,7 @@ import utils.logging as logging
from utils import get_environ_info
from utils import load_pretrained_model
from utils import resume
from utils import Timer, calculate_eta
from val import evaluate
......@@ -111,6 +113,12 @@ def parse_args():
dest='do_eval',
help='Eval while training',
action='store_true')
parser.add_argument(
'--log_steps',
dest='log_steps',
help='Display logging information at every log_steps',
default=10,
type=int)
return parser.parse_args()
......@@ -126,6 +134,7 @@ def train(model,
pretrained_model=None,
resume_model=None,
save_interval_epochs=1,
log_steps=10,
num_classes=None,
num_workers=8):
ignore_index = model.ignore_index
......@@ -156,6 +165,10 @@ def train(model,
return_list=True,
)
timer = Timer()
timer.start()
steps_per_epoch = len(batch_sampler)
avg_loss = 0.0
for epoch in range(start_epoch, num_epochs):
for step, data in enumerate(loader):
images = data[0]
......@@ -170,11 +183,19 @@ def train(model,
loss.backward()
optimizer.minimize(loss)
model.clear_gradients()
avg_loss += loss.numpy()[0]
lr = optimizer.current_step_lr()
logging.info(
"[TRAIN] Epoch={}/{}, Step={}/{}, loss={}, lr={}".format(
epoch + 1, num_epochs, step + 1, len(batch_sampler),
loss.numpy(), lr))
if step % log_steps == 0:
avg_loss /= log_steps
time_step = timer.elapsed_time() / log_steps
remain_step = (num_epochs - epoch) * steps_per_epoch - step - 1
logging.info(
"[TRAIN] Epoch={}/{}, Step={}/{}, loss={:.4f}, lr={:.6f}, sec/step={:.4f} | ETA {}"
.format(epoch + 1, num_epochs, step + 1, steps_per_epoch,
avg_loss, lr, time_step,
calculate_eta(remain_step, time_step)))
avg_loss = 0.0
timer.restart()
if ((epoch + 1) % save_interval_epochs == 0
or epoch == num_epochs - 1) and ParallelEnv().local_rank == 0:
......@@ -260,6 +281,7 @@ def main(args):
pretrained_model=args.pretrained_model,
resume_model=args.resume_model,
save_interval_epochs=args.save_interval_epochs,
log_steps=args.log_steps,
num_classes=train_dataset.num_classes,
num_workers=args.num_workers)
......
......@@ -16,3 +16,4 @@ from . import logging
from . import download
from .metrics import ConfusionMatrix
from .utils import *
from .timer import Timer, calculate_eta
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import time
class Timer(object):
""" Simple timer class for measuring time consuming """
def __init__(self):
self._start_time = 0.0
self._end_time = 0.0
self._elapsed_time = 0.0
self._is_running = False
def start(self):
self._is_running = True
self._start_time = time.time()
def restart(self):
self.start()
def stop(self):
self._is_running = False
self._end_time = time.time()
def elapsed_time(self):
self._end_time = time.time()
self._elapsed_time = self._end_time - self._start_time
if not self.is_running:
return 0.0
return self._elapsed_time
@property
def is_running(self):
return self._is_running
def calculate_eta(remaining_step, speed):
if remaining_step < 0:
remaining_step = 0
remaining_time = int(remaining_step * speed)
result = "{:0>2}:{:0>2}:{:0>2}"
arr = []
for i in range(2, -1, -1):
arr.append(int(remaining_time / 60**i))
remaining_time %= 60**i
return result.format(*arr)
......@@ -29,6 +29,7 @@ import models
import utils.logging as logging
from utils import get_environ_info
from utils import ConfusionMatrix
from utils import Timer, calculate_eta
def parse_args():
......@@ -96,12 +97,14 @@ def evaluate(model,
places=places,
return_list=True,
)
total_steps = math.ceil(len(eval_dataset) * 1.0 / batch_size)
total_steps = len(batch_sampler)
conf_mat = ConfusionMatrix(num_classes, streaming=True)
logging.info(
"Start to evaluating(total_samples={}, total_steps={})...".format(
len(eval_dataset), total_steps))
timer = Timer()
timer.start()
for step, data in enumerate(loader):
images = data[0]
labels = data[1].astype('int64')
......@@ -113,8 +116,13 @@ def evaluate(model,
conf_mat.calculate(pred=pred, label=labels, ignore=mask)
_, iou = conf_mat.mean_iou()
logging.info("[EVAL] Epoch={}, Step={}/{}, iou={}".format(
epoch_id, step + 1, total_steps, iou))
time_step = timer.elapsed_time()
remain_step = total_steps - step - 1
logging.info(
"[EVAL] Epoch={}, Step={}/{}, iou={}, sec/step={:.4f} | ETA {}".
format(epoch_id, step + 1, total_steps, iou, time_step,
calculate_eta(remain_step, time_step)))
timer.restart()
category_iou, miou = conf_mat.mean_iou()
category_acc, macc = conf_mat.accuracy()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册