diff --git a/core/utils/dataset_holder.py b/core/utils/dataset_holder.py index 748ce4473ca106abc447497ba2d78b9d96cc22be..70355a5489eaf5da79240d4828e8c3d7e25ce338 100755 --- a/core/utils/dataset_holder.py +++ b/core/utils/dataset_holder.py @@ -66,6 +66,7 @@ class TimeSplitDatasetHolder(DatasetHolder): """ Dataset with time split dir. root_path/$DAY/$HOUR """ + def __init__(self, config): """ init data root_path, time_split_interval, data_path_format @@ -112,8 +113,8 @@ class TimeSplitDatasetHolder(DatasetHolder): True/False """ is_ready = True - data_time, windows_mins = self._format_data_time( - daytime_str, time_window_mins) + data_time, windows_mins = self._format_data_time(daytime_str, + time_window_mins) while time_window_mins > 0: file_path = self._path_generator.generate_path( 'donefile_path', {'time_format': data_time}) @@ -141,19 +142,19 @@ class TimeSplitDatasetHolder(DatasetHolder): list, data_shard[node_idx] """ data_file_list = [] - data_time, windows_mins = self._format_data_time( - daytime_str, time_window_mins) + data_time, windows_mins = self._format_data_time(daytime_str, + time_window_mins) while time_window_mins > 0: file_path = self._path_generator.generate_path( 'data_path', {'time_format': data_time}) sub_file_list = self._data_file_handler.ls(file_path) for sub_file in sub_file_list: sub_file_name = self._data_file_handler.get_file_name(sub_file) - if not sub_file_name.startswith( - self._config['filename_prefix']): + if not sub_file_name.startswith(self._config[ + 'filename_prefix']): continue - postfix = sub_file_name.split( - self._config['filename_prefix'])[1] + postfix = sub_file_name.split(self._config['filename_prefix'])[ + 1] if postfix.isdigit(): if int(postfix) % node_num == node_idx: data_file_list.append(sub_file) @@ -167,8 +168,8 @@ class TimeSplitDatasetHolder(DatasetHolder): def _alloc_dataset(self, file_list): """ """ - dataset = fluid.DatasetFactory().create_dataset( - self._config['dataset_type']) + dataset = fluid.DatasetFactory().create_dataset(self._config[ + 'dataset_type']) dataset.set_batch_size(self._config['batch_size']) dataset.set_thread(self._config['load_thread']) dataset.set_hdfs_config(self._config['fs_name'], @@ -207,8 +208,8 @@ class TimeSplitDatasetHolder(DatasetHolder): params['node_num'], params['node_idx']) self._datasets[begin_time] = self._alloc_dataset(file_list) - self._datasets[begin_time].preload_into_memory( - self._config['preload_thread']) + self._datasets[begin_time].preload_into_memory(self._config[ + 'preload_thread']) return True return False diff --git a/core/utils/envs.py b/core/utils/envs.py index 09691259b837891e7fa458f2491c1eebf866e575..15036f67a7f7f7a60a872baf0dd2ba332b926cd8 100755 --- a/core/utils/envs.py +++ b/core/utils/envs.py @@ -70,8 +70,8 @@ def set_global_envs(envs): nests = copy.deepcopy(namespace_nests) nests.append(k) fatten_env_namespace(nests, v) - elif (k == "dataset" or k == "phase" - or k == "runner") and isinstance(v, list): + elif (k == "dataset" or k == "phase" or + k == "runner") and isinstance(v, list): for i in v: if i.get("name") is None: raise ValueError("name must be in dataset list ", v) @@ -169,8 +169,8 @@ def pretty_print_envs(envs, header=None): def lazy_instance_by_package(package, class_name): try: - model_package = __import__(package, globals(), locals(), - package.split(".")) + model_package = __import__(package, + globals(), locals(), package.split(".")) instance = getattr(model_package, class_name) return instance except Exception, err: @@ -185,8 +185,8 @@ def lazy_instance_by_fliename(abs, class_name): sys.path.append(dirname) package = os.path.splitext(os.path.basename(abs))[0] - model_package = __import__(package, globals(), locals(), - package.split(".")) + model_package = __import__(package, + globals(), locals(), package.split(".")) instance = getattr(model_package, class_name) return instance except Exception, err: diff --git a/core/utils/util.py b/core/utils/util.py index 5b0ea22ff6003283ed1bd119cc6867c23166baa1..381d35cade663c89b93608eadc95601b234cdffe 100755 --- a/core/utils/util.py +++ b/core/utils/util.py @@ -175,6 +175,7 @@ class CostPrinter(object): """ For count cost time && print cost log """ + def __init__(self, callback, callback_params): """R """ @@ -210,6 +211,7 @@ class PathGenerator(object): """ generate path with template & runtime variables """ + def __init__(self, config): """R """ @@ -230,8 +232,8 @@ class PathGenerator(object): """ if template_name in self._templates: if 'time_format' in param: - str = param['time_format'].strftime( - self._templates[template_name]) + str = param['time_format'].strftime(self._templates[ + template_name]) return str.format(**param) return self._templates[template_name].format(**param) else: