diff --git a/core/trainers/framework/runner.py b/core/trainers/framework/runner.py index 79d7be66e58d0c4244980cf4bf871f42984d186e..4375b7267359e50e8cf9d739ba9dc1f58529e36a 100644 --- a/core/trainers/framework/runner.py +++ b/core/trainers/framework/runner.py @@ -18,11 +18,15 @@ import os import time import warnings import numpy as np +import logging import paddle.fluid as fluid from paddlerec.core.utils import envs from paddlerec.core.metric import Metric +logging.basicConfig( + format='%(asctime)s - %(levelname)s: %(message)s', level=logging.INFO) + __all__ = [ "RunnerBase", "SingleRunner", "PSRunner", "CollectiveRunner", "PslibRunner" ] @@ -140,8 +144,16 @@ class RunnerBase(object): metrics_varnames = [] metrics_format = [] + + if context["is_infer"]: + metrics_format.append("\t[Infer]\t{}: {{}}".format("batch")) + else: + metrics_format.append("\t[Train]\t{}: {{}}".format("batch")) + + metrics_format.append("{}: {{:.2f}}s".format("time_each_interval")) + metrics_names = ["total_batch"] - metrics_format.append("{}: {{}}".format("batch")) + for name, var in metrics.items(): metrics_names.append(name) metrics_varnames.append(var.name) @@ -151,6 +163,7 @@ class RunnerBase(object): reader = context["model"][model_dict["name"]]["model"]._data_loader reader.start() batch_id = 0 + begin_time = time.time() scope = context["model"][model_name]["scope"] result = None with fluid.scope_guard(scope): @@ -160,8 +173,8 @@ class RunnerBase(object): program=program, fetch_list=metrics_varnames, return_numpy=False) - metrics = [batch_id] + metrics = [batch_id] metrics_rets = [ as_numpy(metrics_tensor) for metrics_tensor in metrics_tensors @@ -169,7 +182,13 @@ class RunnerBase(object): metrics.extend(metrics_rets) if batch_id % fetch_period == 0 and batch_id != 0: - print(metrics_format.format(*metrics)) + end_time = time.time() + seconds = end_time - begin_time + metrics_logging = metrics[:] + metrics_logging = metrics.insert(1, seconds) + begin_time = end_time + + logging.info(metrics_format.format(*metrics)) batch_id += 1 except fluid.core.EOFException: reader.reset() diff --git a/models/multitask/mmoe/config.yaml b/models/multitask/mmoe/config.yaml index d22b78e4481be78787df5aa828961af411cbc17b..354bd218a9e63eeaa7657b2d77c94d9507a3e8bc 100644 --- a/models/multitask/mmoe/config.yaml +++ b/models/multitask/mmoe/config.yaml @@ -17,12 +17,12 @@ workspace: "models/multitask/mmoe" dataset: - name: dataset_train batch_size: 5 - type: QueueDataset + type: DataLoader # or QueueDataset data_path: "{workspace}/data/train" data_converter: "{workspace}/census_reader.py" - name: dataset_infer batch_size: 5 - type: QueueDataset + type: DataLoader # or QueueDataset data_path: "{workspace}/data/train" data_converter: "{workspace}/census_reader.py" @@ -37,7 +37,6 @@ hyper_parameters: learning_rate: 0.001 strategy: async -#use infer_runner mode and modify 'phase' below if infer mode: [train_runner, infer_runner] runner: @@ -49,10 +48,10 @@ runner: save_inference_interval: 4 save_checkpoint_path: "increment" save_inference_path: "inference" - print_interval: 10 + print_interval: 1 - name: infer_runner class: infer - init_model_path: "increment/0" + init_model_path: "increment/1" device: cpu phase: diff --git a/models/rank/fibinet/config.yaml b/models/rank/fibinet/config.yaml index 4f0951682e4e96c2fb7c4c373a56de4e6d6bc951..d9ae592f3321d46aa221648d309b8d2e2944f53a 100644 --- a/models/rank/fibinet/config.yaml +++ b/models/rank/fibinet/config.yaml @@ -102,9 +102,9 @@ phase: - name: phase1 model: "{workspace}/model.py" # user-defined model dataset_name: dataloader_train # select dataset by name - thread_num: 8 + thread_num: 1 - name: phase2 model: "{workspace}/model.py" # user-defined model dataset_name: dataset_infer # select dataset by name - thread_num: 8 + thread_num: 1