diff --git a/core/engine/cluster/cluster.py b/core/engine/cluster/cluster.py index a64e99e38b2df3033e480706bedd02eadea1dc90..4fb0f0091e9c1d691f2fa28ecbda3ff82cba0617 100644 --- a/core/engine/cluster/cluster.py +++ b/core/engine/cluster/cluster.py @@ -19,11 +19,16 @@ import copy import os import subprocess import warnings +import logging from paddlerec.core.engine.engine import Engine from paddlerec.core.factory import TrainerFactory from paddlerec.core.utils import envs +logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s") +logger = logging.getLogger() +logger.setLevel(logging.INFO) + class ClusterEngine(Engine): def __init_impl__(self): @@ -220,7 +225,8 @@ class ClusterEnvBase(object): def env_set(self): envs.set_runtime_environs(self.cluster_env) flattens = envs.flatten_environs(self.cluster_env) - print(envs.pretty_print_envs(flattens, ("Cluster Envs", "Value"))) + logger.info( + envs.pretty_print_envs(flattens, ("Cluster Envs", "Value"))) class PaddleCloudMpiEnv(ClusterEnvBase): diff --git a/core/engine/local_cluster.py b/core/engine/local_cluster.py index cf9b6032162a61b16e4f01552c23cff7312b3965..6b8eb215e974002c59d14817667b727edc691815 100755 --- a/core/engine/local_cluster.py +++ b/core/engine/local_cluster.py @@ -19,10 +19,15 @@ import copy import os import sys import subprocess +import logging from paddlerec.core.engine.engine import Engine from paddlerec.core.utils import envs +logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s") +logger = logging.getLogger() +logger.setLevel(logging.INFO) + class LocalClusterEngine(Engine): def start_procs(self): @@ -57,7 +62,8 @@ class LocalClusterEngine(Engine): ] factory = "paddlerec.core.factory" - cmd = [sys.executable, "-u", "-m", factory, self.trainer] + cmd = [sys.executable, "-u", "-m", factory, + self.trainer] #problems for i in range(server_num): current_env.update({ @@ -145,7 +151,7 @@ class LocalClusterEngine(Engine): if len(log_fns) > 0: log_fns[i].close() procs[i].terminate() - print( + logger.info( "all workers already completed, you can view logs under the `{}` directory". format(logs_dir), file=sys.stderr) diff --git a/core/engine/local_mpi.py b/core/engine/local_mpi.py index 830bf28c4957e342d317070ab2060cde1de6d6a6..c45d362795c2944d612e260eda4d7832afc13e29 100755 --- a/core/engine/local_mpi.py +++ b/core/engine/local_mpi.py @@ -19,9 +19,14 @@ import copy import os import sys import subprocess +import logging from paddlerec.core.engine.engine import Engine +logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s") +logger = logging.getLogger() +logger.setLevel(logging.INFO) + class LocalMPIEngine(Engine): def start_procs(self): @@ -51,7 +56,7 @@ class LocalMPIEngine(Engine): if len(log_fns) > 0: log_fns[i].close() procs[i].wait() - print( + logger.info( "all workers and parameter servers already completed", file=sys.stderr) diff --git a/core/factory.py b/core/factory.py index 95e0e7778141ad76d1166205213bccdaae67aff7..97967ce4cc9f1fe63f541ddfd3a55715b5305f2b 100755 --- a/core/factory.py +++ b/core/factory.py @@ -14,8 +14,13 @@ import os import sys +import logging from paddlerec.core.utils import envs +logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s") +logger = logging.getLogger("fluid") +logger.setLevel(logging.INFO) + trainer_abs = os.path.join( os.path.dirname(os.path.abspath(__file__)), "trainers") trainers = {} @@ -53,7 +58,7 @@ class TrainerFactory(object): @staticmethod def _build_trainer(yaml_path): - print(envs.pretty_print_envs(envs.get_global_envs())) + logger.info(envs.pretty_print_envs(envs.get_global_envs())) train_mode = envs.get_trainer() trainer_abs = trainers.get(train_mode, None) diff --git a/core/trainer.py b/core/trainer.py index bbba6250529283d24389e2719b7110f8aa321973..4ee633c7acf23ddef8ff447053317de5205b25c9 100755 --- a/core/trainer.py +++ b/core/trainer.py @@ -17,11 +17,16 @@ import os import time import sys import traceback +import logging from paddle import fluid from paddlerec.core.utils import envs +logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s") +logger = logging.getLogger("fluid") +logger.setLevel(logging.INFO) + class EngineMode: """ @@ -88,7 +93,7 @@ class Trainer(object): phases.append(phase) self._context["phases"] = phases - print("PaddleRec: Runner {} Begin".format(self._runner_name)) + logger.info("PaddleRec: Runner {} Begin".format(self._runner_name)) self.which_engine() self.which_device() self.which_fleet_mode() @@ -107,7 +112,7 @@ class Trainer(object): self.device = Device.GPU gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0)) self._place = fluid.CUDAPlace(gpu_id) - print("PaddleRec run on device GPU: {}".format(gpu_id)) + logger.info("PaddleRec run on device GPU: {}".format(gpu_id)) self._exe = fluid.Executor(self._place) elif device == "CPU": self.device = Device.CPU @@ -169,7 +174,7 @@ class Trainer(object): def which_cluster_type(self): cluster_type = os.getenv("PADDLEREC_CLUSTER_TYPE", "MPI") - print("PADDLEREC_CLUSTER_TYPE: {}".format(cluster_type)) + logger.info("PADDLEREC_CLUSTER_TYPE: {}".format(cluster_type)) if cluster_type and cluster_type.upper() == "K8S": self._context["cluster_type"] = "K8S" else: @@ -184,7 +189,7 @@ class Trainer(object): self.is_infer = False else: self.is_infer = True - print("Executor Mode: {}".format(executor_mode)) + logger.info("Executor Mode: {}".format(executor_mode)) self._context["is_infer"] = self.is_infer def legality_check(self): @@ -224,7 +229,7 @@ class Trainer(object): Return: None, just sleep in base """ - print('unknow context_status:%s, do nothing' % context['status']) + logger.info('unknow context_status:%s, do nothing' % context['status']) time.sleep(60) def handle_processor_exception(self, context, exception): @@ -233,9 +238,10 @@ class Trainer(object): Return: bool exit_app or not """ - print("\n--------------------------------\nPaddleRec Error Message " - "Summary:\n--------------------------------\n") - print( + logger.info( + "\n--------------------------------\nPaddleRec Error Message " + "Summary:\n--------------------------------\n") + logger.info( 'Exit PaddleRec. catch exception in precoss status: [%s], except: %s' % (context['status'], str(exception))) return True @@ -258,7 +264,7 @@ class Trainer(object): break except Exception as err: traceback.print_exc() - print('Catch Exception:%s' % str(err)) + logger.info('Catch Exception:%s' % str(err)) sys.stdout.flush() self.handle_processor_exception(self._context, err) sys.exit(type(err).__name__) diff --git a/core/trainers/finetuning_trainer.py b/core/trainers/finetuning_trainer.py index 4525a18867ff232121256c876c185c502427c130..e4434a47d498e817dd00dd7dada84165a1550cda 100644 --- a/core/trainers/finetuning_trainer.py +++ b/core/trainers/finetuning_trainer.py @@ -34,7 +34,7 @@ class FineTuningTrainer(Trainer): self.runner_env_name = "runner." + self._context["runner_name"] def processor_register(self): - print("processor_register begin") + logger.info("processor_register begin") self.regist_context_processor('uninit', self.instance) self.regist_context_processor('network_pass', self.network) self.regist_context_processor('startup_pass', self.startup) diff --git a/core/trainers/framework/dataset.py b/core/trainers/framework/dataset.py index 239b568be34793c5ddb0830e9cca06951da143f4..4bdae7b13899dacba07479486b9e3dbf8eedcf31 100644 --- a/core/trainers/framework/dataset.py +++ b/core/trainers/framework/dataset.py @@ -15,6 +15,7 @@ from __future__ import print_function import os +import logging import paddle.fluid as fluid from paddlerec.core.utils import envs @@ -25,6 +26,10 @@ from paddlerec.core.utils.util import split_files, check_filelist __all__ = ["DatasetBase", "DataLoader", "QueueDataset"] +logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s") +logger = logging.getLogger("fluid") +logger.setLevel(logging.INFO) + class DatasetBase(object): """R @@ -83,7 +88,8 @@ class QueueDataset(DatasetBase): name = "dataset." + dataset_name + "." type_name = envs.get_global_env(name + "type") if envs.get_platform() != "LINUX": - print("platform ", envs.get_platform(), "Reader To Dataloader") + logger.info("platform ", + envs.get_platform(), "Reader To Dataloader") type_name = "DataLoader" if type_name == "DataLoader": @@ -126,7 +132,7 @@ class QueueDataset(DatasetBase): data_file_list=[], train_data_path=train_data_path) if (hidden_file_list is not None): - print( + logger.info( "Warning:please make sure there are no hidden files in the dataset folder and check these hidden files:{}". format(hidden_file_list)) @@ -143,7 +149,7 @@ class QueueDataset(DatasetBase): if need_split_files: file_list = split_files(file_list, context["fleet"].worker_index(), context["fleet"].worker_num()) - print("File_list: {}".format(file_list)) + logger.info("File_list: {}".format(file_list)) dataset.set_filelist(file_list) for model_dict in context["phases"]: diff --git a/core/trainers/framework/instance.py b/core/trainers/framework/instance.py index 32b39eb7ad3063796cc9c9f6d22c904cd614d694..00db0689912c7e96296fa2d3d94d575f3add46d9 100644 --- a/core/trainers/framework/instance.py +++ b/core/trainers/framework/instance.py @@ -15,6 +15,7 @@ from __future__ import print_function import warnings +import logging import paddle.fluid as fluid from paddlerec.core.utils import envs @@ -24,6 +25,10 @@ __all__ = [ "CollectiveInstance" ] +logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s") +logger = logging.getLogger("fluid") +logger.setLevel(logging.INFO) + class InstanceBase(object): """R @@ -38,7 +43,7 @@ class InstanceBase(object): class SingleInstance(InstanceBase): def __init__(self, context): - print("Running SingleInstance.") + logger.info("Running SingleInstance.") pass def instance(self, context): @@ -47,7 +52,7 @@ class SingleInstance(InstanceBase): class PSInstance(InstanceBase): def __init__(self, context): - print("Running PSInstance.") + logger.info("Running PSInstance.") pass def instance(self, context): @@ -61,7 +66,7 @@ class PSInstance(InstanceBase): class PslibInstance(InstanceBase): def __init__(self, context): - print("Running PslibInstance.") + logger.info("Running PslibInstance.") pass def instance(self, context): @@ -73,7 +78,7 @@ class PslibInstance(InstanceBase): class CollectiveInstance(InstanceBase): def __init__(self, context): - print("Running CollectiveInstance.") + logger.info("Running CollectiveInstance.") pass def instance(self, context): diff --git a/core/trainers/framework/network.py b/core/trainers/framework/network.py index 7d7a8273b6a402bd163f653a7beb3900de899ae3..9a5eae838a651640639dfea2f1ca0d8695f979d2 100644 --- a/core/trainers/framework/network.py +++ b/core/trainers/framework/network.py @@ -16,6 +16,7 @@ from __future__ import print_function import os import warnings +import logging import paddle.fluid as fluid from paddlerec.core.utils import envs @@ -26,6 +27,10 @@ __all__ = [ "CollectiveNetwork", "FineTuningNetwork" ] +logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s") +logger = logging.getLogger("fluid") +logger.setLevel(logging.INFO) + class NetworkBase(object): """R @@ -43,7 +48,7 @@ class SingleNetwork(NetworkBase): """ def __init__(self, context): - print("Running SingleNetwork.") + logger.info("Running SingleNetwork.") pass def build_network(self, context): @@ -114,7 +119,7 @@ class FineTuningNetwork(NetworkBase): """ def __init__(self, context): - print("Running FineTuningNetwork.") + logger.info("Running FineTuningNetwork.") def build_network(self, context): context["model"] = {} @@ -193,7 +198,7 @@ class FineTuningNetwork(NetworkBase): class PSNetwork(NetworkBase): def __init__(self, context): - print("Running PSNetwork.") + logger.info("Running PSNetwork.") pass def build_network(self, context): @@ -285,7 +290,7 @@ class PSNetwork(NetworkBase): class PslibNetwork(NetworkBase): def __init__(self, context): - print("Running PslibNetwork.") + logger.info("Running PslibNetwork.") pass def build_network(self, context): @@ -357,7 +362,7 @@ class PslibNetwork(NetworkBase): class CollectiveNetwork(NetworkBase): def __init__(self, context): - print("Running CollectiveNetwork.") + logger.info("Running CollectiveNetwork.") pass def build_network(self, context): diff --git a/core/trainers/framework/runner.py b/core/trainers/framework/runner.py index 79d7be66e58d0c4244980cf4bf871f42984d186e..854528d55be9cd885744c62c774ec4bb7ffba82d 100644 --- a/core/trainers/framework/runner.py +++ b/core/trainers/framework/runner.py @@ -17,6 +17,7 @@ from __future__ import print_function import os import time import warnings +import logging import numpy as np import paddle.fluid as fluid @@ -27,6 +28,10 @@ __all__ = [ "RunnerBase", "SingleRunner", "PSRunner", "CollectiveRunner", "PslibRunner" ] +logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s") +logger = logging.getLogger("fluid") +logger.setLevel(logging.INFO) + def as_numpy(tensor): """ @@ -169,7 +174,7 @@ class RunnerBase(object): metrics.extend(metrics_rets) if batch_id % fetch_period == 0 and batch_id != 0: - print(metrics_format.format(*metrics)) + logger.info(metrics_format.format(*metrics)) batch_id += 1 except fluid.core.EOFException: reader.reset() @@ -365,7 +370,7 @@ class SingleRunner(RunnerBase): """ def __init__(self, context): - print("Running SingleRunner.") + logger.info("Running SingleRunner.") pass def run(self, context): @@ -381,7 +386,7 @@ class SingleRunner(RunnerBase): result = self._run(context, model_dict) end_time = time.time() seconds = end_time - begin_time - message = "epoch {} done, use time: {}".format(epoch, seconds) + message = "epoch {} done, use time: {}s".format(epoch, seconds) metrics_result = [] for key in metrics: if isinstance(metrics[key], Metric): @@ -394,7 +399,7 @@ class SingleRunner(RunnerBase): metrics_result.append(_str) if len(metrics_result) > 0: message += ", global metrics: " + ", ".join(metrics_result) - print(message) + logger.info(message) with fluid.scope_guard(context["model"][model_dict["name"]][ "scope"]): @@ -409,7 +414,7 @@ class SingleRunner(RunnerBase): class PSRunner(RunnerBase): def __init__(self, context): - print("Running PSRunner.") + logger.info("Running PSRunner.") pass def run(self, context): @@ -424,7 +429,7 @@ class PSRunner(RunnerBase): result = self._run(context, model_dict) end_time = time.time() seconds = end_time - begin_time - message = "epoch {} done, use time: {}".format(epoch, seconds) + message = "epoch {} done, use time: {}s".format(epoch, seconds) # TODO, wait for PaddleCloudRoleMaker supports gloo from paddle.fluid.incubate.fleet.base.role_maker import GeneralRoleMaker @@ -442,7 +447,7 @@ class PSRunner(RunnerBase): metrics_result.append(_str) if len(metrics_result) > 0: message += ", global metrics: " + ", ".join(metrics_result) - print(message) + logger.info(message) with fluid.scope_guard(context["model"][model_dict["name"]][ "scope"]): train_prog = context["model"][model_dict["name"]][ @@ -456,7 +461,7 @@ class PSRunner(RunnerBase): class CollectiveRunner(RunnerBase): def __init__(self, context): - print("Running CollectiveRunner.") + logger.info("Running CollectiveRunner.") pass def run(self, context): @@ -469,7 +474,7 @@ class CollectiveRunner(RunnerBase): self._run(context, model_dict) end_time = time.time() seconds = end_time - begin_time - print("epoch {} done, use time: {}".format(epoch, seconds)) + logger.info("epoch {} done, use time: {}s".format(epoch, seconds)) with fluid.scope_guard(context["model"][model_dict["name"]][ "scope"]): train_prog = context["model"][model_dict["name"]][ @@ -483,7 +488,7 @@ class CollectiveRunner(RunnerBase): class PslibRunner(RunnerBase): def __init__(self, context): - print("Running PSRunner.") + logger.info("Running PSRunner.") pass def run(self, context): @@ -497,7 +502,7 @@ class PslibRunner(RunnerBase): self._run(context, model_dict) end_time = time.time() seconds = end_time - begin_time - print("epoch {} done, use time: {}".format(epoch, seconds)) + logger.info("epoch {} done, use time: {}s".format(epoch, seconds)) """ # online Training Can do more, As shown below: @@ -527,7 +532,7 @@ class PslibRunner(RunnerBase): self._run(context, model_dict) end_time = time.time() seconds = end_time - begin_time - print("epoch {} done, use time: {}".format(epoch, seconds)) + logger.info("epoch {} done, use time: {}".format(epoch, seconds)) with fluid.scope_guard(context["model"][model_dict["name"]] ["scope"]): train_prog = context["model"][model_dict["name"]][ @@ -543,7 +548,7 @@ class PslibRunner(RunnerBase): class SingleInferRunner(RunnerBase): def __init__(self, context): - print("Running SingleInferRunner.") + logger.info("Running SingleInferRunner.") pass def run(self, context): @@ -559,7 +564,7 @@ class SingleInferRunner(RunnerBase): result = self._run(context, model_dict) end_time = time.time() seconds = end_time - begin_time - message = "Infer {} of epoch {} done, use time: {}".format( + message = "Infer {} of epoch {} done, use time: {}s".format( model_dict["name"], epoch_name, seconds) metrics_result = [] for key in metrics: @@ -573,14 +578,14 @@ class SingleInferRunner(RunnerBase): metrics_result.append(_str) if len(metrics_result) > 0: message += ", global metrics: " + ", ".join(metrics_result) - print(message) + logger.info(message) context["status"] = "terminal_pass" def _load(self, context, model_dict, model_path): if model_path is None or model_path == "": return - print("load persistables from", model_path) + logger.info("load persistables from", model_path) with fluid.scope_guard(context["model"][model_dict["name"]]["scope"]): train_prog = context["model"][model_dict["name"]]["main_program"] diff --git a/core/trainers/framework/startup.py b/core/trainers/framework/startup.py index a38dbd5bb3c2cea268fc5551e10e488f2fbdabd6..30e298ca2945df2a38b24e7e6db3b6cdb73f35a4 100644 --- a/core/trainers/framework/startup.py +++ b/core/trainers/framework/startup.py @@ -15,6 +15,7 @@ from __future__ import print_function import warnings +import logging import paddle.fluid as fluid import paddle.fluid.core as core @@ -25,6 +26,10 @@ __all__ = [ "FineTuningStartup" ] +logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s") +logger = logging.getLogger("fluid") +logger.setLevel(logging.INFO) + class StartupBase(object): """R @@ -41,10 +46,10 @@ class StartupBase(object): "runner." + context["runner_name"] + ".init_model_path", None) if dirname is None or dirname == "": return - print("going to load ", dirname) + logger.info("going to load ", dirname) fluid.io.load_persistables( context["exe"], dirname, main_program=main_program) - print("load from {} success".format(dirname)) + logger.info("load from {} success".format(dirname)) class SingleStartup(StartupBase): @@ -52,7 +57,7 @@ class SingleStartup(StartupBase): """ def __init__(self, context): - print("Running SingleStartup.") + logger.info("Running SingleStartup.") pass def startup(self, context): @@ -79,7 +84,7 @@ class FineTuningStartup(StartupBase): self.self.op_role_var_attr_name = core.op_proto_and_checker_maker.kOpRoleVarAttrName( ) - print("Running SingleStartup.") + logger.info("Running SingleStartup.") def _is_opt_role_op(self, op): # NOTE: depend on oprole to find out whether this op is for @@ -155,7 +160,7 @@ class FineTuningStartup(StartupBase): "runner." + context["runner_name"] + ".init_model_path", None) if dirname is None or dirname == "": return - print("going to load ", dirname) + logger.info("going to load ", dirname) params_grads = self._get_params_grads(main_program) update_params = [p for p, _ in params_grads] @@ -169,7 +174,7 @@ class FineTuningStartup(StartupBase): fluid.io.load_vars(context["exe"], dirname, main_program, need_load_vars) - print("load from {} success".format(dirname)) + logger.info("load from {} success".format(dirname)) def startup(self, context): for model_dict in context["phases"]: @@ -187,7 +192,7 @@ class FineTuningStartup(StartupBase): class PSStartup(StartupBase): def __init__(self, context): - print("Running PSStartup.") + logger.info("Running PSStartup.") pass def startup(self, context): @@ -204,7 +209,7 @@ class PSStartup(StartupBase): class CollectiveStartup(StartupBase): def __init__(self, context): - print("Running CollectiveStartup.") + logger.info("Running CollectiveStartup.") pass def startup(self, context): @@ -222,7 +227,7 @@ class CollectiveStartup(StartupBase): class SingleInferStartup(StartupBase): def __init__(self, context): - print("Running SingleInferStartup.") + logger.info("Running SingleInferStartup.") pass def startup(self, context): diff --git a/core/trainers/framework/terminal.py b/core/trainers/framework/terminal.py index 96e762c3c920f276feb2046a68d3eab49e65ccc6..03dc63dc11dffa81d6d31323ab0b508b41867e95 100644 --- a/core/trainers/framework/terminal.py +++ b/core/trainers/framework/terminal.py @@ -15,12 +15,17 @@ from __future__ import print_function import warnings +import logging import paddle.fluid as fluid from paddlerec.core.utils import envs __all__ = ["TerminalBase", "PSTerminalBase"] +logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s") +logger = logging.getLogger("fluid") +logger.setLevel(logging.INFO) + class TerminalBase(object): """R @@ -30,7 +35,7 @@ class TerminalBase(object): pass def terminal(self, context): - print("PaddleRec Finish") + logger.info("PaddleRec Finish") class PSTerminal(TerminalBase): @@ -42,4 +47,4 @@ class PSTerminal(TerminalBase): def terminal(self, context): context["fleet"].stop_worker() - print("PaddleRec Finish") + logger.info("PaddleRec Finish") diff --git a/core/trainers/general_trainer.py b/core/trainers/general_trainer.py index 20a7d54c7251a27d5a33a8e7d7904ec021a95500..c45a3a212fad05e08a50127437fa1f421ba50ae6 100644 --- a/core/trainers/general_trainer.py +++ b/core/trainers/general_trainer.py @@ -17,10 +17,15 @@ General Trainer, applicable to many situations: Single/Cluster/Local_Cluster + P from __future__ import print_function import os +import logging from paddlerec.core.utils import envs from paddlerec.core.trainer import Trainer, EngineMode, FleetMode +logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s") +logger = logging.getLogger() +logger.setLevel(logging.INFO) + class GeneralTrainer(Trainer): """ @@ -34,7 +39,7 @@ class GeneralTrainer(Trainer): self.runner_env_name = "runner." + self._context["runner_name"] def processor_register(self): - print("processor_register begin") + logger.info("processor_register begin") self.regist_context_processor('uninit', self.instance) self.regist_context_processor('network_pass', self.network) self.regist_context_processor('startup_pass', self.startup) diff --git a/core/utils/dataloader_instance.py b/core/utils/dataloader_instance.py index 03e6f0a67884917e9af2d02d13eb86576620ceef..40825dd670c2811f97dd848375e9ae97b97b5aac 100755 --- a/core/utils/dataloader_instance.py +++ b/core/utils/dataloader_instance.py @@ -14,6 +14,7 @@ from __future__ import print_function import os +import logging from paddlerec.core.utils.envs import lazy_instance_by_fliename from paddlerec.core.utils.envs import get_global_env from paddlerec.core.utils.envs import get_runtime_environ @@ -21,6 +22,10 @@ from paddlerec.core.reader import SlotReader from paddlerec.core.trainer import EngineMode from paddlerec.core.utils.util import split_files, check_filelist +logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s") +logger = logging.getLogger() +logger.setLevel(logging.INFO) + def dataloader_by_name(readerclass, dataset_name, @@ -41,7 +46,7 @@ def dataloader_by_name(readerclass, hidden_file_list, files = check_filelist( hidden_file_list=[], data_file_list=[], train_data_path=data_path) if (hidden_file_list is not None): - print( + logger.info( "Warning:please make sure there are no hidden files in the dataset folder and check these hidden files:{}". format(hidden_file_list)) @@ -55,7 +60,7 @@ def dataloader_by_name(readerclass, "cluster_type"] == "K8S": # for k8s mount mode, split files for every node need_split_files = True - print("need_split_files: {}".format(need_split_files)) + logger.info("need_split_files: {}".format(need_split_files)) if need_split_files: files = split_files(files, context["fleet"].worker_index(), context["fleet"].worker_num()) @@ -103,7 +108,7 @@ def slotdataloader_by_name(readerclass, dataset_name, yaml_file, context): hidden_file_list, files = check_filelist( hidden_file_list=[], data_file_list=[], train_data_path=data_path) if (hidden_file_list is not None): - print( + logger.info( "Warning:please make sure there are no hidden files in the dataset folder and check these hidden files:{}". format(hidden_file_list)) @@ -173,7 +178,7 @@ def slotdataloader(readerclass, train, yaml_file, context): hidden_file_list, files = check_filelist( hidden_file_list=[], data_file_list=[], train_data_path=data_path) if (hidden_file_list is not None): - print( + logger.info( "Warning:please make sure there are no hidden files in the dataset folder and check these hidden files:{}". format(hidden_file_list)) diff --git a/core/utils/dataset_holder.py b/core/utils/dataset_holder.py index 70355a5489eaf5da79240d4828e8c3d7e25ce338..e89c1100119ccb9985736d73301ea14b707b9355 100755 --- a/core/utils/dataset_holder.py +++ b/core/utils/dataset_holder.py @@ -15,12 +15,17 @@ import abc import datetime import time +import logging import paddle.fluid as fluid from paddlerec.core.utils import fs as fs from paddlerec.core.utils import util as util +logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s") +logger = logging.getLogger("fluid") +logger.setLevel(logging.INFO) + class DatasetHolder(object): """ @@ -187,7 +192,7 @@ class TimeSplitDatasetHolder(DatasetHolder): windown_min = params['time_window_min'] if begin_time not in self._datasets: while self.check_ready(begin_time, windown_min) == False: - print("dataset not ready, time:" + begin_time) + logger.info("dataset not ready, time:" + begin_time) time.sleep(30) file_list = self.get_file_list(begin_time, windown_min, params['node_num'], diff --git a/core/utils/envs.py b/core/utils/envs.py index ddcc9a94b3adc47cda2023c4d9e196b9fb16faeb..eac73674f628c018256f88dd958d1a1baf1bde2b 100755 --- a/core/utils/envs.py +++ b/core/utils/envs.py @@ -21,6 +21,12 @@ import sys import six import traceback import six +import time +import logging + +logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s") +logger = logging.getLogger() +logger.setLevel(logging.INFO) global_envs = {} global_envs_flatten = {} @@ -104,7 +110,7 @@ def set_global_envs(envs): global_envs[name] = "DataLoader" if get_platform() == "LINUX" and six.PY3: - print("QueueDataset can not support PY3, change to DataLoader") + logger.info("QueueDataset can not support PY3, change to DataLoader") for dataset in envs["dataset"]: name = ".".join(["dataset", dataset["name"], "type"]) global_envs[name] = "DataLoader" @@ -207,7 +213,7 @@ def lazy_instance_by_package(package, class_name): return instance except Exception as err: traceback.print_exc() - print('Catch Exception:%s' % str(err)) + logger.info('Catch Exception:%s' % str(err)) return None @@ -223,7 +229,7 @@ def lazy_instance_by_fliename(abs, class_name): return instance except Exception as err: traceback.print_exc() - print('Catch Exception:%s' % str(err)) + logger.info('Catch Exception:%s' % str(err)) return None diff --git a/core/utils/fs.py b/core/utils/fs.py index fab84496c5761e4214f4e5bb3666960408abf68c..3e36532a7733047a089d37766c2f7ca89c268ac7 100755 --- a/core/utils/fs.py +++ b/core/utils/fs.py @@ -13,9 +13,15 @@ # limitations under the License. import os +import time +import logging from paddle.fluid.incubate.fleet.utils.hdfs import HDFSClient +logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s") +logger = logging.getLogger() +logger.setLevel(logging.INFO) + def is_afs_path(path): """is_afs_path @@ -177,4 +183,4 @@ class FileHandler(object): return self._hdfs_client.upload(dest_path, org_path) if org_is_afs and not dest_is_afs: return self._hdfs_client.download(org_path, dest_path) - print("Not Suppor hdfs cp currently") + logger.info("Not Suppor hdfs cp currently") diff --git a/core/utils/util.py b/core/utils/util.py index f6acfe203612326a77f41326581583278dac4183..f797e160e5cc11a3e929c0a3bb894158664292bc 100755 --- a/core/utils/util.py +++ b/core/utils/util.py @@ -16,9 +16,14 @@ import datetime import os import sys import time +import logging import numpy as np from paddle import fluid +logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s") +logger = logging.getLogger("fluid") +logger.setLevel(logging.INFO) + def save_program_proto(path, program=None): if program is None: @@ -146,9 +151,9 @@ def print_log(log_str, params): log_str = time_str + " " + log_str if 'master' in params and params['master']: if 'index' in params and params['index'] == 0: - print(log_str) + logger.info(log_str) else: - print(log_str) + logger.info(log_str) sys.stdout.flush() if 'stdout' in params: params['stdout'] += log_str + '\n' diff --git a/core/utils/validation.py b/core/utils/validation.py index 7448daf8c763d9d6311356e369ddf4855d7eac97..c393d901a383a36dad769a2f65ec18c6b8431b6f 100755 --- a/core/utils/validation.py +++ b/core/utils/validation.py @@ -12,8 +12,14 @@ # See the License for the specific language governing permissions and # limitations under the License. +import time +import logging from paddlerec.core.utils import envs +logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s") +logger = logging.getLogger() +logger.setLevel(logging.INFO) + class ValueFormat: def __init__(self, value_type, value, value_handler, required=False): @@ -41,64 +47,67 @@ class ValueFormat: def is_type_valid(self, name, value): if self.value_type == "int": if not isinstance(value, int): - print("\nattr {} should be int, but {} now\n".format( + logger.info("\nattr {} should be int, but {} now\n".format( name, type(value))) return False return True elif self.value_type == "str": if not isinstance(value, str): - print("\nattr {} should be str, but {} now\n".format( + logger.info("\nattr {} should be str, but {} now\n".format( name, type(value))) return False return True elif self.value_type == "strs": if not isinstance(value, list): - print("\nattr {} should be list(str), but {} now\n".format( - name, type(value))) + logger.info("\nattr {} should be list(str), but {} now\n". + format(name, type(value))) return False for v in value: if not isinstance(v, str): - print("\nattr {} should be list(str), but list({}) now\n". - format(name, type(v))) + logger.info( + "\nattr {} should be list(str), but list({}) now\n". + format(name, type(v))) return False return True elif self.value_type == "dict": if not isinstance(value, dict): - print("\nattr {} should be str, but {} now\n".format( + logger.info("\nattr {} should be str, but {} now\n".format( name, type(value))) return False return True elif self.value_type == "dicts": if not isinstance(value, list): - print("\nattr {} should be list(dist), but {} now\n".format( - name, type(value))) + logger.info("\nattr {} should be list(dist), but {} now\n". + format(name, type(value))) return False for v in value: if not isinstance(v, dict): - print("\nattr {} should be list(dist), but list({}) now\n". - format(name, type(v))) + logger.info( + "\nattr {} should be list(dist), but list({}) now\n". + format(name, type(v))) return False return True elif self.value_type == "ints": if not isinstance(value, list): - print("\nattr {} should be list(int), but {} now\n".format( - name, type(value))) + logger.info("\nattr {} should be list(int), but {} now\n". + format(name, type(value))) return False for v in value: if not isinstance(v, int): - print("\nattr {} should be list(int), but list({}) now\n". - format(name, type(v))) + logger.info( + "\nattr {} should be list(int), but list({}) now\n". + format(name, type(v))) return False return True else: - print("\nattr {}'s type is {}, can not be supported now\n".format( - name, type(value))) + logger.info("\nattr {}'s type is {}, can not be supported now\n". + format(name, type(value))) return False def is_value_valid(self, name, value): @@ -108,7 +117,7 @@ class ValueFormat: def in_value_handler(name, value, values): if value not in values: - print("\nattr {}'s value is {}, but {} is expected\n".format( + logger.info("\nattr {}'s value is {}, but {} is expected\n".format( name, value, values)) return False return True @@ -116,7 +125,7 @@ def in_value_handler(name, value, values): def eq_value_handler(name, value, values): if value != values: - print("\nattr {}'s value is {}, but == {} is expected\n".format( + logger.info("\nattr {}'s value is {}, but == {} is expected\n".format( name, value, values)) return False return True @@ -124,7 +133,7 @@ def eq_value_handler(name, value, values): def ge_value_handler(name, value, values): if value < values: - print("\nattr {}'s value is {}, but >= {} is expected\n".format( + logger.info("\nattr {}'s value is {}, but >= {} is expected\n".format( name, value, values)) return False return True @@ -132,7 +141,7 @@ def ge_value_handler(name, value, values): def le_value_handler(name, value, values): if value > values: - print("\nattr {}'s value is {}, but <= {} is expected\n".format( + logger.info("\nattr {}'s value is {}, but <= {} is expected\n".format( name, value, values)) return False return True @@ -160,8 +169,8 @@ def yaml_validation(config): for required in require_checkers: if required not in _config.keys(): - print("\ncan not find {} in yaml, which is required\n".format( - required)) + logger.info("\ncan not find {} in yaml, which is required\n". + format(required)) return False for name, value in _config.items():