From 457f693c4de4369d5da49b999a4d6f42165cedea Mon Sep 17 00:00:00 2001 From: liuyuhui Date: Mon, 31 Aug 2020 13:49:58 +0800 Subject: [PATCH] fix a bug , transfer from print() to logging --- core/engine/cluster/cluster.py | 8 ++++- core/engine/local_cluster.py | 10 ++++-- core/engine/local_mpi.py | 7 +++- core/factory.py | 7 +++- core/trainer.py | 24 ++++++++----- core/trainers/finetuning_trainer.py | 2 +- core/trainers/framework/dataset.py | 12 +++++-- core/trainers/framework/instance.py | 13 ++++--- core/trainers/framework/network.py | 15 +++++--- core/trainers/framework/runner.py | 37 ++++++++++--------- core/trainers/framework/startup.py | 23 +++++++----- core/trainers/framework/terminal.py | 9 +++-- core/trainers/general_trainer.py | 7 +++- core/utils/dataloader_instance.py | 13 ++++--- core/utils/dataset_holder.py | 7 +++- core/utils/envs.py | 12 +++++-- core/utils/fs.py | 8 ++++- core/utils/util.py | 9 +++-- core/utils/validation.py | 55 +++++++++++++++++------------ 19 files changed, 189 insertions(+), 89 deletions(-) diff --git a/core/engine/cluster/cluster.py b/core/engine/cluster/cluster.py index a64e99e3..4fb0f009 100644 --- a/core/engine/cluster/cluster.py +++ b/core/engine/cluster/cluster.py @@ -19,11 +19,16 @@ import copy import os import subprocess import warnings +import logging from paddlerec.core.engine.engine import Engine from paddlerec.core.factory import TrainerFactory from paddlerec.core.utils import envs +logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s") +logger = logging.getLogger() +logger.setLevel(logging.INFO) + class ClusterEngine(Engine): def __init_impl__(self): @@ -220,7 +225,8 @@ class ClusterEnvBase(object): def env_set(self): envs.set_runtime_environs(self.cluster_env) flattens = envs.flatten_environs(self.cluster_env) - print(envs.pretty_print_envs(flattens, ("Cluster Envs", "Value"))) + logger.info( + envs.pretty_print_envs(flattens, ("Cluster Envs", "Value"))) class PaddleCloudMpiEnv(ClusterEnvBase): diff --git a/core/engine/local_cluster.py b/core/engine/local_cluster.py index cf9b6032..6b8eb215 100755 --- a/core/engine/local_cluster.py +++ b/core/engine/local_cluster.py @@ -19,10 +19,15 @@ import copy import os import sys import subprocess +import logging from paddlerec.core.engine.engine import Engine from paddlerec.core.utils import envs +logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s") +logger = logging.getLogger() +logger.setLevel(logging.INFO) + class LocalClusterEngine(Engine): def start_procs(self): @@ -57,7 +62,8 @@ class LocalClusterEngine(Engine): ] factory = "paddlerec.core.factory" - cmd = [sys.executable, "-u", "-m", factory, self.trainer] + cmd = [sys.executable, "-u", "-m", factory, + self.trainer] #problems for i in range(server_num): current_env.update({ @@ -145,7 +151,7 @@ class LocalClusterEngine(Engine): if len(log_fns) > 0: log_fns[i].close() procs[i].terminate() - print( + logger.info( "all workers already completed, you can view logs under the `{}` directory". format(logs_dir), file=sys.stderr) diff --git a/core/engine/local_mpi.py b/core/engine/local_mpi.py index 830bf28c..c45d3627 100755 --- a/core/engine/local_mpi.py +++ b/core/engine/local_mpi.py @@ -19,9 +19,14 @@ import copy import os import sys import subprocess +import logging from paddlerec.core.engine.engine import Engine +logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s") +logger = logging.getLogger() +logger.setLevel(logging.INFO) + class LocalMPIEngine(Engine): def start_procs(self): @@ -51,7 +56,7 @@ class LocalMPIEngine(Engine): if len(log_fns) > 0: log_fns[i].close() procs[i].wait() - print( + logger.info( "all workers and parameter servers already completed", file=sys.stderr) diff --git a/core/factory.py b/core/factory.py index 95e0e777..97967ce4 100755 --- a/core/factory.py +++ b/core/factory.py @@ -14,8 +14,13 @@ import os import sys +import logging from paddlerec.core.utils import envs +logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s") +logger = logging.getLogger("fluid") +logger.setLevel(logging.INFO) + trainer_abs = os.path.join( os.path.dirname(os.path.abspath(__file__)), "trainers") trainers = {} @@ -53,7 +58,7 @@ class TrainerFactory(object): @staticmethod def _build_trainer(yaml_path): - print(envs.pretty_print_envs(envs.get_global_envs())) + logger.info(envs.pretty_print_envs(envs.get_global_envs())) train_mode = envs.get_trainer() trainer_abs = trainers.get(train_mode, None) diff --git a/core/trainer.py b/core/trainer.py index bbba6250..4ee633c7 100755 --- a/core/trainer.py +++ b/core/trainer.py @@ -17,11 +17,16 @@ import os import time import sys import traceback +import logging from paddle import fluid from paddlerec.core.utils import envs +logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s") +logger = logging.getLogger("fluid") +logger.setLevel(logging.INFO) + class EngineMode: """ @@ -88,7 +93,7 @@ class Trainer(object): phases.append(phase) self._context["phases"] = phases - print("PaddleRec: Runner {} Begin".format(self._runner_name)) + logger.info("PaddleRec: Runner {} Begin".format(self._runner_name)) self.which_engine() self.which_device() self.which_fleet_mode() @@ -107,7 +112,7 @@ class Trainer(object): self.device = Device.GPU gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0)) self._place = fluid.CUDAPlace(gpu_id) - print("PaddleRec run on device GPU: {}".format(gpu_id)) + logger.info("PaddleRec run on device GPU: {}".format(gpu_id)) self._exe = fluid.Executor(self._place) elif device == "CPU": self.device = Device.CPU @@ -169,7 +174,7 @@ class Trainer(object): def which_cluster_type(self): cluster_type = os.getenv("PADDLEREC_CLUSTER_TYPE", "MPI") - print("PADDLEREC_CLUSTER_TYPE: {}".format(cluster_type)) + logger.info("PADDLEREC_CLUSTER_TYPE: {}".format(cluster_type)) if cluster_type and cluster_type.upper() == "K8S": self._context["cluster_type"] = "K8S" else: @@ -184,7 +189,7 @@ class Trainer(object): self.is_infer = False else: self.is_infer = True - print("Executor Mode: {}".format(executor_mode)) + logger.info("Executor Mode: {}".format(executor_mode)) self._context["is_infer"] = self.is_infer def legality_check(self): @@ -224,7 +229,7 @@ class Trainer(object): Return: None, just sleep in base """ - print('unknow context_status:%s, do nothing' % context['status']) + logger.info('unknow context_status:%s, do nothing' % context['status']) time.sleep(60) def handle_processor_exception(self, context, exception): @@ -233,9 +238,10 @@ class Trainer(object): Return: bool exit_app or not """ - print("\n--------------------------------\nPaddleRec Error Message " - "Summary:\n--------------------------------\n") - print( + logger.info( + "\n--------------------------------\nPaddleRec Error Message " + "Summary:\n--------------------------------\n") + logger.info( 'Exit PaddleRec. catch exception in precoss status: [%s], except: %s' % (context['status'], str(exception))) return True @@ -258,7 +264,7 @@ class Trainer(object): break except Exception as err: traceback.print_exc() - print('Catch Exception:%s' % str(err)) + logger.info('Catch Exception:%s' % str(err)) sys.stdout.flush() self.handle_processor_exception(self._context, err) sys.exit(type(err).__name__) diff --git a/core/trainers/finetuning_trainer.py b/core/trainers/finetuning_trainer.py index 4525a188..e4434a47 100644 --- a/core/trainers/finetuning_trainer.py +++ b/core/trainers/finetuning_trainer.py @@ -34,7 +34,7 @@ class FineTuningTrainer(Trainer): self.runner_env_name = "runner." + self._context["runner_name"] def processor_register(self): - print("processor_register begin") + logger.info("processor_register begin") self.regist_context_processor('uninit', self.instance) self.regist_context_processor('network_pass', self.network) self.regist_context_processor('startup_pass', self.startup) diff --git a/core/trainers/framework/dataset.py b/core/trainers/framework/dataset.py index 239b568b..4bdae7b1 100644 --- a/core/trainers/framework/dataset.py +++ b/core/trainers/framework/dataset.py @@ -15,6 +15,7 @@ from __future__ import print_function import os +import logging import paddle.fluid as fluid from paddlerec.core.utils import envs @@ -25,6 +26,10 @@ from paddlerec.core.utils.util import split_files, check_filelist __all__ = ["DatasetBase", "DataLoader", "QueueDataset"] +logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s") +logger = logging.getLogger("fluid") +logger.setLevel(logging.INFO) + class DatasetBase(object): """R @@ -83,7 +88,8 @@ class QueueDataset(DatasetBase): name = "dataset." + dataset_name + "." type_name = envs.get_global_env(name + "type") if envs.get_platform() != "LINUX": - print("platform ", envs.get_platform(), "Reader To Dataloader") + logger.info("platform ", + envs.get_platform(), "Reader To Dataloader") type_name = "DataLoader" if type_name == "DataLoader": @@ -126,7 +132,7 @@ class QueueDataset(DatasetBase): data_file_list=[], train_data_path=train_data_path) if (hidden_file_list is not None): - print( + logger.info( "Warning:please make sure there are no hidden files in the dataset folder and check these hidden files:{}". format(hidden_file_list)) @@ -143,7 +149,7 @@ class QueueDataset(DatasetBase): if need_split_files: file_list = split_files(file_list, context["fleet"].worker_index(), context["fleet"].worker_num()) - print("File_list: {}".format(file_list)) + logger.info("File_list: {}".format(file_list)) dataset.set_filelist(file_list) for model_dict in context["phases"]: diff --git a/core/trainers/framework/instance.py b/core/trainers/framework/instance.py index 32b39eb7..00db0689 100644 --- a/core/trainers/framework/instance.py +++ b/core/trainers/framework/instance.py @@ -15,6 +15,7 @@ from __future__ import print_function import warnings +import logging import paddle.fluid as fluid from paddlerec.core.utils import envs @@ -24,6 +25,10 @@ __all__ = [ "CollectiveInstance" ] +logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s") +logger = logging.getLogger("fluid") +logger.setLevel(logging.INFO) + class InstanceBase(object): """R @@ -38,7 +43,7 @@ class InstanceBase(object): class SingleInstance(InstanceBase): def __init__(self, context): - print("Running SingleInstance.") + logger.info("Running SingleInstance.") pass def instance(self, context): @@ -47,7 +52,7 @@ class SingleInstance(InstanceBase): class PSInstance(InstanceBase): def __init__(self, context): - print("Running PSInstance.") + logger.info("Running PSInstance.") pass def instance(self, context): @@ -61,7 +66,7 @@ class PSInstance(InstanceBase): class PslibInstance(InstanceBase): def __init__(self, context): - print("Running PslibInstance.") + logger.info("Running PslibInstance.") pass def instance(self, context): @@ -73,7 +78,7 @@ class PslibInstance(InstanceBase): class CollectiveInstance(InstanceBase): def __init__(self, context): - print("Running CollectiveInstance.") + logger.info("Running CollectiveInstance.") pass def instance(self, context): diff --git a/core/trainers/framework/network.py b/core/trainers/framework/network.py index 7d7a8273..9a5eae83 100644 --- a/core/trainers/framework/network.py +++ b/core/trainers/framework/network.py @@ -16,6 +16,7 @@ from __future__ import print_function import os import warnings +import logging import paddle.fluid as fluid from paddlerec.core.utils import envs @@ -26,6 +27,10 @@ __all__ = [ "CollectiveNetwork", "FineTuningNetwork" ] +logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s") +logger = logging.getLogger("fluid") +logger.setLevel(logging.INFO) + class NetworkBase(object): """R @@ -43,7 +48,7 @@ class SingleNetwork(NetworkBase): """ def __init__(self, context): - print("Running SingleNetwork.") + logger.info("Running SingleNetwork.") pass def build_network(self, context): @@ -114,7 +119,7 @@ class FineTuningNetwork(NetworkBase): """ def __init__(self, context): - print("Running FineTuningNetwork.") + logger.info("Running FineTuningNetwork.") def build_network(self, context): context["model"] = {} @@ -193,7 +198,7 @@ class FineTuningNetwork(NetworkBase): class PSNetwork(NetworkBase): def __init__(self, context): - print("Running PSNetwork.") + logger.info("Running PSNetwork.") pass def build_network(self, context): @@ -285,7 +290,7 @@ class PSNetwork(NetworkBase): class PslibNetwork(NetworkBase): def __init__(self, context): - print("Running PslibNetwork.") + logger.info("Running PslibNetwork.") pass def build_network(self, context): @@ -357,7 +362,7 @@ class PslibNetwork(NetworkBase): class CollectiveNetwork(NetworkBase): def __init__(self, context): - print("Running CollectiveNetwork.") + logger.info("Running CollectiveNetwork.") pass def build_network(self, context): diff --git a/core/trainers/framework/runner.py b/core/trainers/framework/runner.py index 79d7be66..854528d5 100644 --- a/core/trainers/framework/runner.py +++ b/core/trainers/framework/runner.py @@ -17,6 +17,7 @@ from __future__ import print_function import os import time import warnings +import logging import numpy as np import paddle.fluid as fluid @@ -27,6 +28,10 @@ __all__ = [ "RunnerBase", "SingleRunner", "PSRunner", "CollectiveRunner", "PslibRunner" ] +logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s") +logger = logging.getLogger("fluid") +logger.setLevel(logging.INFO) + def as_numpy(tensor): """ @@ -169,7 +174,7 @@ class RunnerBase(object): metrics.extend(metrics_rets) if batch_id % fetch_period == 0 and batch_id != 0: - print(metrics_format.format(*metrics)) + logger.info(metrics_format.format(*metrics)) batch_id += 1 except fluid.core.EOFException: reader.reset() @@ -365,7 +370,7 @@ class SingleRunner(RunnerBase): """ def __init__(self, context): - print("Running SingleRunner.") + logger.info("Running SingleRunner.") pass def run(self, context): @@ -381,7 +386,7 @@ class SingleRunner(RunnerBase): result = self._run(context, model_dict) end_time = time.time() seconds = end_time - begin_time - message = "epoch {} done, use time: {}".format(epoch, seconds) + message = "epoch {} done, use time: {}s".format(epoch, seconds) metrics_result = [] for key in metrics: if isinstance(metrics[key], Metric): @@ -394,7 +399,7 @@ class SingleRunner(RunnerBase): metrics_result.append(_str) if len(metrics_result) > 0: message += ", global metrics: " + ", ".join(metrics_result) - print(message) + logger.info(message) with fluid.scope_guard(context["model"][model_dict["name"]][ "scope"]): @@ -409,7 +414,7 @@ class SingleRunner(RunnerBase): class PSRunner(RunnerBase): def __init__(self, context): - print("Running PSRunner.") + logger.info("Running PSRunner.") pass def run(self, context): @@ -424,7 +429,7 @@ class PSRunner(RunnerBase): result = self._run(context, model_dict) end_time = time.time() seconds = end_time - begin_time - message = "epoch {} done, use time: {}".format(epoch, seconds) + message = "epoch {} done, use time: {}s".format(epoch, seconds) # TODO, wait for PaddleCloudRoleMaker supports gloo from paddle.fluid.incubate.fleet.base.role_maker import GeneralRoleMaker @@ -442,7 +447,7 @@ class PSRunner(RunnerBase): metrics_result.append(_str) if len(metrics_result) > 0: message += ", global metrics: " + ", ".join(metrics_result) - print(message) + logger.info(message) with fluid.scope_guard(context["model"][model_dict["name"]][ "scope"]): train_prog = context["model"][model_dict["name"]][ @@ -456,7 +461,7 @@ class PSRunner(RunnerBase): class CollectiveRunner(RunnerBase): def __init__(self, context): - print("Running CollectiveRunner.") + logger.info("Running CollectiveRunner.") pass def run(self, context): @@ -469,7 +474,7 @@ class CollectiveRunner(RunnerBase): self._run(context, model_dict) end_time = time.time() seconds = end_time - begin_time - print("epoch {} done, use time: {}".format(epoch, seconds)) + logger.info("epoch {} done, use time: {}s".format(epoch, seconds)) with fluid.scope_guard(context["model"][model_dict["name"]][ "scope"]): train_prog = context["model"][model_dict["name"]][ @@ -483,7 +488,7 @@ class CollectiveRunner(RunnerBase): class PslibRunner(RunnerBase): def __init__(self, context): - print("Running PSRunner.") + logger.info("Running PSRunner.") pass def run(self, context): @@ -497,7 +502,7 @@ class PslibRunner(RunnerBase): self._run(context, model_dict) end_time = time.time() seconds = end_time - begin_time - print("epoch {} done, use time: {}".format(epoch, seconds)) + logger.info("epoch {} done, use time: {}s".format(epoch, seconds)) """ # online Training Can do more, As shown below: @@ -527,7 +532,7 @@ class PslibRunner(RunnerBase): self._run(context, model_dict) end_time = time.time() seconds = end_time - begin_time - print("epoch {} done, use time: {}".format(epoch, seconds)) + logger.info("epoch {} done, use time: {}".format(epoch, seconds)) with fluid.scope_guard(context["model"][model_dict["name"]] ["scope"]): train_prog = context["model"][model_dict["name"]][ @@ -543,7 +548,7 @@ class PslibRunner(RunnerBase): class SingleInferRunner(RunnerBase): def __init__(self, context): - print("Running SingleInferRunner.") + logger.info("Running SingleInferRunner.") pass def run(self, context): @@ -559,7 +564,7 @@ class SingleInferRunner(RunnerBase): result = self._run(context, model_dict) end_time = time.time() seconds = end_time - begin_time - message = "Infer {} of epoch {} done, use time: {}".format( + message = "Infer {} of epoch {} done, use time: {}s".format( model_dict["name"], epoch_name, seconds) metrics_result = [] for key in metrics: @@ -573,14 +578,14 @@ class SingleInferRunner(RunnerBase): metrics_result.append(_str) if len(metrics_result) > 0: message += ", global metrics: " + ", ".join(metrics_result) - print(message) + logger.info(message) context["status"] = "terminal_pass" def _load(self, context, model_dict, model_path): if model_path is None or model_path == "": return - print("load persistables from", model_path) + logger.info("load persistables from", model_path) with fluid.scope_guard(context["model"][model_dict["name"]]["scope"]): train_prog = context["model"][model_dict["name"]]["main_program"] diff --git a/core/trainers/framework/startup.py b/core/trainers/framework/startup.py index a38dbd5b..30e298ca 100644 --- a/core/trainers/framework/startup.py +++ b/core/trainers/framework/startup.py @@ -15,6 +15,7 @@ from __future__ import print_function import warnings +import logging import paddle.fluid as fluid import paddle.fluid.core as core @@ -25,6 +26,10 @@ __all__ = [ "FineTuningStartup" ] +logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s") +logger = logging.getLogger("fluid") +logger.setLevel(logging.INFO) + class StartupBase(object): """R @@ -41,10 +46,10 @@ class StartupBase(object): "runner." + context["runner_name"] + ".init_model_path", None) if dirname is None or dirname == "": return - print("going to load ", dirname) + logger.info("going to load ", dirname) fluid.io.load_persistables( context["exe"], dirname, main_program=main_program) - print("load from {} success".format(dirname)) + logger.info("load from {} success".format(dirname)) class SingleStartup(StartupBase): @@ -52,7 +57,7 @@ class SingleStartup(StartupBase): """ def __init__(self, context): - print("Running SingleStartup.") + logger.info("Running SingleStartup.") pass def startup(self, context): @@ -79,7 +84,7 @@ class FineTuningStartup(StartupBase): self.self.op_role_var_attr_name = core.op_proto_and_checker_maker.kOpRoleVarAttrName( ) - print("Running SingleStartup.") + logger.info("Running SingleStartup.") def _is_opt_role_op(self, op): # NOTE: depend on oprole to find out whether this op is for @@ -155,7 +160,7 @@ class FineTuningStartup(StartupBase): "runner." + context["runner_name"] + ".init_model_path", None) if dirname is None or dirname == "": return - print("going to load ", dirname) + logger.info("going to load ", dirname) params_grads = self._get_params_grads(main_program) update_params = [p for p, _ in params_grads] @@ -169,7 +174,7 @@ class FineTuningStartup(StartupBase): fluid.io.load_vars(context["exe"], dirname, main_program, need_load_vars) - print("load from {} success".format(dirname)) + logger.info("load from {} success".format(dirname)) def startup(self, context): for model_dict in context["phases"]: @@ -187,7 +192,7 @@ class FineTuningStartup(StartupBase): class PSStartup(StartupBase): def __init__(self, context): - print("Running PSStartup.") + logger.info("Running PSStartup.") pass def startup(self, context): @@ -204,7 +209,7 @@ class PSStartup(StartupBase): class CollectiveStartup(StartupBase): def __init__(self, context): - print("Running CollectiveStartup.") + logger.info("Running CollectiveStartup.") pass def startup(self, context): @@ -222,7 +227,7 @@ class CollectiveStartup(StartupBase): class SingleInferStartup(StartupBase): def __init__(self, context): - print("Running SingleInferStartup.") + logger.info("Running SingleInferStartup.") pass def startup(self, context): diff --git a/core/trainers/framework/terminal.py b/core/trainers/framework/terminal.py index 96e762c3..03dc63dc 100644 --- a/core/trainers/framework/terminal.py +++ b/core/trainers/framework/terminal.py @@ -15,12 +15,17 @@ from __future__ import print_function import warnings +import logging import paddle.fluid as fluid from paddlerec.core.utils import envs __all__ = ["TerminalBase", "PSTerminalBase"] +logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s") +logger = logging.getLogger("fluid") +logger.setLevel(logging.INFO) + class TerminalBase(object): """R @@ -30,7 +35,7 @@ class TerminalBase(object): pass def terminal(self, context): - print("PaddleRec Finish") + logger.info("PaddleRec Finish") class PSTerminal(TerminalBase): @@ -42,4 +47,4 @@ class PSTerminal(TerminalBase): def terminal(self, context): context["fleet"].stop_worker() - print("PaddleRec Finish") + logger.info("PaddleRec Finish") diff --git a/core/trainers/general_trainer.py b/core/trainers/general_trainer.py index 20a7d54c..c45a3a21 100644 --- a/core/trainers/general_trainer.py +++ b/core/trainers/general_trainer.py @@ -17,10 +17,15 @@ General Trainer, applicable to many situations: Single/Cluster/Local_Cluster + P from __future__ import print_function import os +import logging from paddlerec.core.utils import envs from paddlerec.core.trainer import Trainer, EngineMode, FleetMode +logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s") +logger = logging.getLogger() +logger.setLevel(logging.INFO) + class GeneralTrainer(Trainer): """ @@ -34,7 +39,7 @@ class GeneralTrainer(Trainer): self.runner_env_name = "runner." + self._context["runner_name"] def processor_register(self): - print("processor_register begin") + logger.info("processor_register begin") self.regist_context_processor('uninit', self.instance) self.regist_context_processor('network_pass', self.network) self.regist_context_processor('startup_pass', self.startup) diff --git a/core/utils/dataloader_instance.py b/core/utils/dataloader_instance.py index 03e6f0a6..40825dd6 100755 --- a/core/utils/dataloader_instance.py +++ b/core/utils/dataloader_instance.py @@ -14,6 +14,7 @@ from __future__ import print_function import os +import logging from paddlerec.core.utils.envs import lazy_instance_by_fliename from paddlerec.core.utils.envs import get_global_env from paddlerec.core.utils.envs import get_runtime_environ @@ -21,6 +22,10 @@ from paddlerec.core.reader import SlotReader from paddlerec.core.trainer import EngineMode from paddlerec.core.utils.util import split_files, check_filelist +logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s") +logger = logging.getLogger() +logger.setLevel(logging.INFO) + def dataloader_by_name(readerclass, dataset_name, @@ -41,7 +46,7 @@ def dataloader_by_name(readerclass, hidden_file_list, files = check_filelist( hidden_file_list=[], data_file_list=[], train_data_path=data_path) if (hidden_file_list is not None): - print( + logger.info( "Warning:please make sure there are no hidden files in the dataset folder and check these hidden files:{}". format(hidden_file_list)) @@ -55,7 +60,7 @@ def dataloader_by_name(readerclass, "cluster_type"] == "K8S": # for k8s mount mode, split files for every node need_split_files = True - print("need_split_files: {}".format(need_split_files)) + logger.info("need_split_files: {}".format(need_split_files)) if need_split_files: files = split_files(files, context["fleet"].worker_index(), context["fleet"].worker_num()) @@ -103,7 +108,7 @@ def slotdataloader_by_name(readerclass, dataset_name, yaml_file, context): hidden_file_list, files = check_filelist( hidden_file_list=[], data_file_list=[], train_data_path=data_path) if (hidden_file_list is not None): - print( + logger.info( "Warning:please make sure there are no hidden files in the dataset folder and check these hidden files:{}". format(hidden_file_list)) @@ -173,7 +178,7 @@ def slotdataloader(readerclass, train, yaml_file, context): hidden_file_list, files = check_filelist( hidden_file_list=[], data_file_list=[], train_data_path=data_path) if (hidden_file_list is not None): - print( + logger.info( "Warning:please make sure there are no hidden files in the dataset folder and check these hidden files:{}". format(hidden_file_list)) diff --git a/core/utils/dataset_holder.py b/core/utils/dataset_holder.py index 70355a54..e89c1100 100755 --- a/core/utils/dataset_holder.py +++ b/core/utils/dataset_holder.py @@ -15,12 +15,17 @@ import abc import datetime import time +import logging import paddle.fluid as fluid from paddlerec.core.utils import fs as fs from paddlerec.core.utils import util as util +logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s") +logger = logging.getLogger("fluid") +logger.setLevel(logging.INFO) + class DatasetHolder(object): """ @@ -187,7 +192,7 @@ class TimeSplitDatasetHolder(DatasetHolder): windown_min = params['time_window_min'] if begin_time not in self._datasets: while self.check_ready(begin_time, windown_min) == False: - print("dataset not ready, time:" + begin_time) + logger.info("dataset not ready, time:" + begin_time) time.sleep(30) file_list = self.get_file_list(begin_time, windown_min, params['node_num'], diff --git a/core/utils/envs.py b/core/utils/envs.py index ddcc9a94..eac73674 100755 --- a/core/utils/envs.py +++ b/core/utils/envs.py @@ -21,6 +21,12 @@ import sys import six import traceback import six +import time +import logging + +logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s") +logger = logging.getLogger() +logger.setLevel(logging.INFO) global_envs = {} global_envs_flatten = {} @@ -104,7 +110,7 @@ def set_global_envs(envs): global_envs[name] = "DataLoader" if get_platform() == "LINUX" and six.PY3: - print("QueueDataset can not support PY3, change to DataLoader") + logger.info("QueueDataset can not support PY3, change to DataLoader") for dataset in envs["dataset"]: name = ".".join(["dataset", dataset["name"], "type"]) global_envs[name] = "DataLoader" @@ -207,7 +213,7 @@ def lazy_instance_by_package(package, class_name): return instance except Exception as err: traceback.print_exc() - print('Catch Exception:%s' % str(err)) + logger.info('Catch Exception:%s' % str(err)) return None @@ -223,7 +229,7 @@ def lazy_instance_by_fliename(abs, class_name): return instance except Exception as err: traceback.print_exc() - print('Catch Exception:%s' % str(err)) + logger.info('Catch Exception:%s' % str(err)) return None diff --git a/core/utils/fs.py b/core/utils/fs.py index fab84496..3e36532a 100755 --- a/core/utils/fs.py +++ b/core/utils/fs.py @@ -13,9 +13,15 @@ # limitations under the License. import os +import time +import logging from paddle.fluid.incubate.fleet.utils.hdfs import HDFSClient +logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s") +logger = logging.getLogger() +logger.setLevel(logging.INFO) + def is_afs_path(path): """is_afs_path @@ -177,4 +183,4 @@ class FileHandler(object): return self._hdfs_client.upload(dest_path, org_path) if org_is_afs and not dest_is_afs: return self._hdfs_client.download(org_path, dest_path) - print("Not Suppor hdfs cp currently") + logger.info("Not Suppor hdfs cp currently") diff --git a/core/utils/util.py b/core/utils/util.py index f6acfe20..f797e160 100755 --- a/core/utils/util.py +++ b/core/utils/util.py @@ -16,9 +16,14 @@ import datetime import os import sys import time +import logging import numpy as np from paddle import fluid +logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s") +logger = logging.getLogger("fluid") +logger.setLevel(logging.INFO) + def save_program_proto(path, program=None): if program is None: @@ -146,9 +151,9 @@ def print_log(log_str, params): log_str = time_str + " " + log_str if 'master' in params and params['master']: if 'index' in params and params['index'] == 0: - print(log_str) + logger.info(log_str) else: - print(log_str) + logger.info(log_str) sys.stdout.flush() if 'stdout' in params: params['stdout'] += log_str + '\n' diff --git a/core/utils/validation.py b/core/utils/validation.py index 7448daf8..c393d901 100755 --- a/core/utils/validation.py +++ b/core/utils/validation.py @@ -12,8 +12,14 @@ # See the License for the specific language governing permissions and # limitations under the License. +import time +import logging from paddlerec.core.utils import envs +logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s") +logger = logging.getLogger() +logger.setLevel(logging.INFO) + class ValueFormat: def __init__(self, value_type, value, value_handler, required=False): @@ -41,64 +47,67 @@ class ValueFormat: def is_type_valid(self, name, value): if self.value_type == "int": if not isinstance(value, int): - print("\nattr {} should be int, but {} now\n".format( + logger.info("\nattr {} should be int, but {} now\n".format( name, type(value))) return False return True elif self.value_type == "str": if not isinstance(value, str): - print("\nattr {} should be str, but {} now\n".format( + logger.info("\nattr {} should be str, but {} now\n".format( name, type(value))) return False return True elif self.value_type == "strs": if not isinstance(value, list): - print("\nattr {} should be list(str), but {} now\n".format( - name, type(value))) + logger.info("\nattr {} should be list(str), but {} now\n". + format(name, type(value))) return False for v in value: if not isinstance(v, str): - print("\nattr {} should be list(str), but list({}) now\n". - format(name, type(v))) + logger.info( + "\nattr {} should be list(str), but list({}) now\n". + format(name, type(v))) return False return True elif self.value_type == "dict": if not isinstance(value, dict): - print("\nattr {} should be str, but {} now\n".format( + logger.info("\nattr {} should be str, but {} now\n".format( name, type(value))) return False return True elif self.value_type == "dicts": if not isinstance(value, list): - print("\nattr {} should be list(dist), but {} now\n".format( - name, type(value))) + logger.info("\nattr {} should be list(dist), but {} now\n". + format(name, type(value))) return False for v in value: if not isinstance(v, dict): - print("\nattr {} should be list(dist), but list({}) now\n". - format(name, type(v))) + logger.info( + "\nattr {} should be list(dist), but list({}) now\n". + format(name, type(v))) return False return True elif self.value_type == "ints": if not isinstance(value, list): - print("\nattr {} should be list(int), but {} now\n".format( - name, type(value))) + logger.info("\nattr {} should be list(int), but {} now\n". + format(name, type(value))) return False for v in value: if not isinstance(v, int): - print("\nattr {} should be list(int), but list({}) now\n". - format(name, type(v))) + logger.info( + "\nattr {} should be list(int), but list({}) now\n". + format(name, type(v))) return False return True else: - print("\nattr {}'s type is {}, can not be supported now\n".format( - name, type(value))) + logger.info("\nattr {}'s type is {}, can not be supported now\n". + format(name, type(value))) return False def is_value_valid(self, name, value): @@ -108,7 +117,7 @@ class ValueFormat: def in_value_handler(name, value, values): if value not in values: - print("\nattr {}'s value is {}, but {} is expected\n".format( + logger.info("\nattr {}'s value is {}, but {} is expected\n".format( name, value, values)) return False return True @@ -116,7 +125,7 @@ def in_value_handler(name, value, values): def eq_value_handler(name, value, values): if value != values: - print("\nattr {}'s value is {}, but == {} is expected\n".format( + logger.info("\nattr {}'s value is {}, but == {} is expected\n".format( name, value, values)) return False return True @@ -124,7 +133,7 @@ def eq_value_handler(name, value, values): def ge_value_handler(name, value, values): if value < values: - print("\nattr {}'s value is {}, but >= {} is expected\n".format( + logger.info("\nattr {}'s value is {}, but >= {} is expected\n".format( name, value, values)) return False return True @@ -132,7 +141,7 @@ def ge_value_handler(name, value, values): def le_value_handler(name, value, values): if value > values: - print("\nattr {}'s value is {}, but <= {} is expected\n".format( + logger.info("\nattr {}'s value is {}, but <= {} is expected\n".format( name, value, values)) return False return True @@ -160,8 +169,8 @@ def yaml_validation(config): for required in require_checkers: if required not in _config.keys(): - print("\ncan not find {} in yaml, which is required\n".format( - required)) + logger.info("\ncan not find {} in yaml, which is required\n". + format(required)) return False for name, value in _config.items(): -- GitLab