From 45dd3491db4f9fcd9274418fbb0ad762a9e05262 Mon Sep 17 00:00:00 2001 From: tangwei12 Date: Thu, 20 Dec 2018 14:11:34 +0800 Subject: [PATCH] lookup table utils fix (#14730) * fix utils config, test=develop * reweite tools in lookup table utils, test=develop * merge develop, test=develop * add copy in setup, test=develop * update api spec, test=develop * add doc, test=develop * prettifying doc, test=develop * Update API.spec update api.spec, test=develop * Update lookup_table_utils.py test=develop * Update lookup_table_utils.py test=develop * merge develop, test=develop * merge develop, test=develop * init fix, test=develop * fix in downloads, test=develop * fix in setup.in, test=develop --- paddle/fluid/API.spec | 16 + python/paddle/fluid/contrib/__init__.py | 3 + python/paddle/fluid/contrib/utils/__init__.py | 9 +- .../paddle/fluid/contrib/utils/hdfs_utils.py | 301 ++++++++++-------- .../fluid/contrib/utils/lookup_table_utils.py | 183 +++++++---- python/setup.py.in | 2 +- 6 files changed, 313 insertions(+), 201 deletions(-) diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec index 5e9901bb87c..170e0f83971 100644 --- a/paddle/fluid/API.spec +++ b/paddle/fluid/API.spec @@ -350,6 +350,22 @@ paddle.fluid.contrib.QuantizeTranspiler.__init__ ArgSpec(args=['self', 'weight_b paddle.fluid.contrib.QuantizeTranspiler.convert_to_int8 ArgSpec(args=['self', 'program', 'place', 'scope'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.contrib.QuantizeTranspiler.freeze_program ArgSpec(args=['self', 'program', 'place', 'fuse_bn', 'scope'], varargs=None, keywords=None, defaults=(False, None)) paddle.fluid.contrib.QuantizeTranspiler.training_transpile ArgSpec(args=['self', 'program', 'startup_program'], varargs=None, keywords=None, defaults=(None, None)) +paddle.fluid.contrib.load_persistables_for_increment ArgSpec(args=['dirname', 'executor', 'program', 'lookup_table_var', 'lookup_table_var_path'], varargs=None, keywords=None, defaults=None) +paddle.fluid.contrib.load_persistables_for_inference ArgSpec(args=['dirname', 'executor', 'program', 'lookup_table_var_name'], varargs=None, keywords=None, defaults=None) +paddle.fluid.contrib.convert_dist_to_sparse_program ArgSpec(args=['program'], varargs=None, keywords=None, defaults=None) +paddle.fluid.contrib.HDFSClient.__init__ ArgSpec(args=['self', 'hadoop_home', 'configs'], varargs=None, keywords=None, defaults=None) +paddle.fluid.contrib.HDFSClient.delete ArgSpec(args=['self', 'hdfs_path'], varargs=None, keywords=None, defaults=None) +paddle.fluid.contrib.HDFSClient.download ArgSpec(args=['self', 'hdfs_path', 'local_path', 'overwrite', 'unzip'], varargs=None, keywords=None, defaults=(False, False)) +paddle.fluid.contrib.HDFSClient.is_dir ArgSpec(args=['self', 'hdfs_path'], varargs=None, keywords=None, defaults=(None,)) +paddle.fluid.contrib.HDFSClient.is_exist ArgSpec(args=['self', 'hdfs_path'], varargs=None, keywords=None, defaults=(None,)) +paddle.fluid.contrib.HDFSClient.ls ArgSpec(args=['self', 'hdfs_path'], varargs=None, keywords=None, defaults=None) +paddle.fluid.contrib.HDFSClient.lsr ArgSpec(args=['self', 'hdfs_path', 'only_file', 'sort'], varargs=None, keywords=None, defaults=(True, True)) +paddle.fluid.contrib.HDFSClient.make_local_dirs ArgSpec(args=['local_path'], varargs=None, keywords=None, defaults=None) +paddle.fluid.contrib.HDFSClient.makedirs ArgSpec(args=['self', 'hdfs_path'], varargs=None, keywords=None, defaults=None) +paddle.fluid.contrib.HDFSClient.rename ArgSpec(args=['self', 'hdfs_src_path', 'hdfs_dst_path', 'overwrite'], varargs=None, keywords=None, defaults=(False,)) +paddle.fluid.contrib.HDFSClient.upload ArgSpec(args=['self', 'hdfs_path', 'local_path', 'overwrite', 'retry_times'], varargs=None, keywords=None, defaults=(False, 5)) +paddle.fluid.contrib.multi_download ArgSpec(args=['client', 'hdfs_path', 'local_path', 'trainer_id', 'trainers', 'multi_processes'], varargs=None, keywords=None, defaults=(5,)) +paddle.fluid.contrib.multi_upload ArgSpec(args=['client', 'hdfs_path', 'local_path', 'multi_processes', 'overwrite', 'sync'], varargs=None, keywords=None, defaults=(5, False, True)) paddle.fluid.transpiler.DistributeTranspiler.__init__ ArgSpec(args=['self', 'config'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.transpiler.DistributeTranspiler.get_pserver_program ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None) paddle.fluid.transpiler.DistributeTranspiler.get_pserver_programs ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None) diff --git a/python/paddle/fluid/contrib/__init__.py b/python/paddle/fluid/contrib/__init__.py index 3bf2fe5db0c..ece97b661fd 100644 --- a/python/paddle/fluid/contrib/__init__.py +++ b/python/paddle/fluid/contrib/__init__.py @@ -22,9 +22,12 @@ from . import op_frequence from .op_frequence import * from . import quantize from .quantize import * +from . import utils +from .utils import * __all__ = [] __all__ += decoder.__all__ __all__ += memory_usage_calc.__all__ __all__ += op_frequence.__all__ __all__ += quantize.__all__ +__all__ += utils.__all__ diff --git a/python/paddle/fluid/contrib/utils/__init__.py b/python/paddle/fluid/contrib/utils/__init__.py index 20b2cc381aa..1c1c2fb2270 100644 --- a/python/paddle/fluid/contrib/utils/__init__.py +++ b/python/paddle/fluid/contrib/utils/__init__.py @@ -13,10 +13,11 @@ # limitations under the License. from __future__ import print_function -#from . import lookup_table_utils -#from .lookup_table_utils import * +from . import lookup_table_utils +from .lookup_table_utils import * from . import hdfs_utils from .hdfs_utils import * -#__all__ = lookup_table_utils.__all__ -__all__ = hdfs_utils.__all__ +__all__ = [] +__all__ += lookup_table_utils.__all__ +__all__ += hdfs_utils.__all__ diff --git a/python/paddle/fluid/contrib/utils/hdfs_utils.py b/python/paddle/fluid/contrib/utils/hdfs_utils.py index baea57ccce0..35ddf97ff23 100644 --- a/python/paddle/fluid/contrib/utils/hdfs_utils.py +++ b/python/paddle/fluid/contrib/utils/hdfs_utils.py @@ -14,6 +14,7 @@ """HDFS Utils""" import os +import sys import subprocess import multiprocessing from datetime import datetime @@ -24,7 +25,7 @@ import errno import logging -__all__ = ["HDFSClient", "multi_download"] +__all__ = ["HDFSClient", "multi_download", "multi_upload"] logging.basicConfig(format='%(asctime)s - %(levelname)s - %(message)s') _logger = logging.getLogger("hdfs_utils") @@ -93,13 +94,15 @@ class HDFSClient(object): def upload(self, hdfs_path, local_path, overwrite=False, retry_times=5): """ - upload the local file to hdfs - Args: - hdfs_path: hdfs path, target path - local_path: local file path, source path - overwrite: will overwrite the original file - retry_times: max times retry to upload - Returns: + upload the local file to hdfs + + Args: + hdfs_path(str): the hdfs file path + local_path(str): the local file path + overwrite(bool|None): will overwrite the file on HDFS or not + retry_times(int|5): retry times + + Returns: True or False """ assert hdfs_path is not None @@ -109,7 +112,7 @@ class HDFSClient(object): _logger.warn( "The Local path: {} is dir and I will support it later, return". format(local_path)) - return + return False base = os.path.basename(local_path) if not self.is_exist(hdfs_path): @@ -141,14 +144,16 @@ class HDFSClient(object): def download(self, hdfs_path, local_path, overwrite=False, unzip=False): """ - download from hdfs - Args: - hdfs_path: hdfs path, target path - local_path: local file path, source path - overwrite: will remove original file and overwrite it. - unzip: ignore this param - Returns - True or False + download file from HDFS + + Args: + hdfs_path(str): the hdfs file path + local_path(str): the local file path + overwrite(bool|None): will overwrite the file on HDFS or not + unzip(bool|False): if the download file is compressed by zip, unzip it or not. + + Returns: + True or False """ _logger.info('Downloading %r to %r.', hdfs_path, local_path) _logger.info('Download of %s to %r complete.', hdfs_path, local_path) @@ -188,13 +193,13 @@ class HDFSClient(object): def is_exist(self, hdfs_path=None): """ - whether the remote hdfs path exists? - Args: - hdfs_path: default value(${OUTPUT_PATH}/${SYS_USER_ID}/${SYS_JOB_ID}/tmp) - fs_name: The default values are the same as in the job configuration - fs_ugi: The default values are the same as in the job configuration - Returns: - True or False + whether the remote HDFS path exists + + Args: + hdfs_path(str): the hdfs file path + + Returns: + True or False """ exist_cmd = ['-test', '-e', hdfs_path] returncode, output, errors = self.__run_hdfs_cmd( @@ -211,13 +216,13 @@ class HDFSClient(object): def is_dir(self, hdfs_path=None): """ - whether the remote hdfs path exists? - Args: - remote_file_path: default value(${OUTPUT_PATH}/${SYS_USER_ID}/${SYS_JOB_ID}/tmp) - fs_name: The default values are the same as in the job configuration - fs_ugi: The default values are the same as in the job configuration - Returns: - True or False + whether the remote HDFS path is directory + + Args: + hdfs_path(str): the hdfs file path + + Returns: + True or False """ if not self.is_exist(hdfs_path): @@ -237,17 +242,17 @@ class HDFSClient(object): def delete(self, hdfs_path): """ - Remove a file or directory from HDFS. + Remove a file or directory from HDFS. + + whether the remote HDFS path exists Args: - param hdfs_path: HDFS path. - param recursive: Recursively delete files and directories. By default, - this method will raise an :class:`HdfsError` if trying to delete a - non-empty directory. + hdfs_path: HDFS path. + Returns: + True or False This function returns `True` if the deletion was successful and `False` if no file or directory previously existed at `hdfs_path`. - """ _logger.info('Deleting %r.', hdfs_path) @@ -273,16 +278,14 @@ class HDFSClient(object): def rename(self, hdfs_src_path, hdfs_dst_path, overwrite=False): """ - Rename a file or folder. - Args: - :param hdfs_src_path: Source path. - :param hdfs_dst_path: Destination path. If the path already exists and is - a directory, the source will be moved into it. If the path exists and is - a file, or if a parent destination directory is missing, this method will - raise an :class:`HdfsError`. + Move a file or folder on HDFS. + + Args: + hdfs_path(str): HDFS path. + overwrite(bool|False): If the path already exists and overwrite is False, will return False. + Returns: - This function returns `True` if the rename was successful and `False` if - rename was faild. + True or False """ assert hdfs_src_path is not None assert hdfs_dst_path is not None @@ -320,17 +323,20 @@ class HDFSClient(object): raise def makedirs(self, hdfs_path): - """Create a remote directory, recursively if necessary. + """ + Create a remote directory, recursively if necessary. + Args: - :param hdfs_path: Remote path. Intermediate directories will be created - appropriately. + hdfs_path(str): Remote path. Intermediate directories will be created appropriately. + Returns: - True if make a directories was successful, False when make a directiries was failed. + True or False """ _logger.info('Creating directories to %r.', hdfs_path) assert hdfs_path is not None if self.is_exist(hdfs_path): + _logger.error("HDFS path is exist: {}".format(hdfs_path)) return mkdirs_commands = ['-mkdir', hdfs_path] @@ -346,11 +352,13 @@ class HDFSClient(object): def ls(self, hdfs_path): """ - ls a hdfs_path. - Args: - :param hdfs_path: hdfs_path will be ls. + ls directory contents about HDFS hdfs_path + + Args: + hdfs_path(str): Remote HDFS path will be ls. + Returns: - This function returns a `list` that contaion all files in the hdfs_path. + List: a contents list about hdfs_path. """ assert hdfs_path is not None @@ -378,11 +386,15 @@ class HDFSClient(object): def lsr(self, hdfs_path, only_file=True, sort=True): """ - ls a hdfs_path sort by time. - Args: - :param hdfs_path: hdfs_path will be ls. + list directory contents about HDFS hdfs_path recursively + + Args: + hdfs_path(str): Remote HDFS path. + only_file(bool|True): will discard folders. + sort(bool|True): will be sorted by create time. + Returns: - This function returns a `list` that contaion all files sorted by time in the hdfs_path. + List: a contents list about hdfs_path. """ def sort_by_time(v1, v2): @@ -422,21 +434,106 @@ class HDFSClient(object): return ret_lines +def multi_download(client, + hdfs_path, + local_path, + trainer_id, + trainers, + multi_processes=5): + """ + Download files from HDFS using multi process. + + Args: + client(HDFSClient): instance of HDFSClient + hdfs_path(str): path on hdfs + local_path(str): path on local + trainer_id(int): current trainer id + trainers(int): all trainers number + multi_processes(int|5): the download data process at the same time, default=5 + + Returns: + List: + Download files in local folder. + """ + + def __subprocess_download(datas): + for data in datas: + re_path = os.path.relpath(os.path.dirname(data), hdfs_path) + if re_path == os.curdir: + sub_local_re_path = local_path + else: + sub_local_re_path = os.path.join(local_path, re_path) + client.download(data, sub_local_re_path) + + assert isinstance(client, HDFSClient) + + client.make_local_dirs(local_path) + _logger.info("Make local dir {} successfully".format(local_path)) + + all_need_download = client.lsr(hdfs_path, sort=True) + need_download = all_need_download[trainer_id::trainers] + _logger.info("Get {} files From all {} files need to be download from {}". + format(len(need_download), len(all_need_download), hdfs_path)) + + _logger.info("Start {} multi process to download datas".format( + multi_processes)) + procs = [] + for i in range(multi_processes): + process_datas = need_download[i::multi_processes] + p = multiprocessing.Process( + target=__subprocess_download, args=(process_datas, )) + procs.append(p) + p.start() + + # complete the processes + for proc in procs: + proc.join() + + _logger.info("Finish {} multi process to download datas".format( + multi_processes)) + + local_downloads = [] + for data in need_download: + data_name = os.path.basename(data) + re_path = os.path.relpath(os.path.dirname(data), hdfs_path) + if re_path == os.curdir: + local_re_path = os.path.join(local_path, data_name) + else: + local_re_path = os.path.join(local_path, re_path, data_name) + local_downloads.append(local_re_path) + + return local_downloads + + +def getfilelist(path): + rlist = [] + for dir, folder, file in os.walk(path): + for i in file: + t = os.path.join(dir, i) + rlist.append(t) + for r in rlist: + print(r) + + def multi_upload(client, hdfs_path, local_path, multi_processes=5, - overwrite=False): + overwrite=False, + sync=True): """ - Upload file to hdfs. + Upload files to HDFS using multi process. + Args: - :param overwrite: will overwrite hdfs file or not - :param multi_processes: the upload data process at the same time, default=5 - :param client: instance of HDFSClient - :param hdfs_path: path on hdfs - :param local_path: path on local + client(HDFSClient): instance of HDFSClient + hdfs_path(str): path on hdfs + local_path(str): path on local + multi_processes(int|5): the upload data process at the same time, default=5 + overwrite(bool|False): will overwrite file on HDFS or not + sync(bool|True): upload files sync or not. + Returns: - + None """ def __subprocess_upload(datas): @@ -446,13 +543,6 @@ def multi_upload(client, client.upload(hdfs_re_path, data, overwrite, retry_times=5) def get_local_files(path): - """ - Get all local files - Args: - path: local file path - Returns: - A list that contation all files in the path. - """ rlist = [] if not os.path.isdir(path): @@ -488,71 +578,6 @@ def multi_upload(client, multi_processes)) -def multi_download(client, - hdfs_path, - local_path, - trainer_id, - trainers, - file_cnt, - multi_processes=5): - """ - multi_download - Args: - :param client: instance of HDFSClient - :param hdfs_path: path on hdfs - :param local_path: path on local - :param trainer_id: current trainer id - :param trainers: all trainers number - :param file_cnt: all file number - :param multi_processes: the download data process at the same time, default=5 - :return: None - Returns: - A list that be downloaded. - """ - - def __subprocess_download(datas): - for data in datas: - re_path = os.path.relpath(os.path.dirname(data), hdfs_path) - local_re_path = os.path.join(local_path, re_path) - client.download(data, local_re_path) - - assert isinstance(client, HDFSClient) - - client.make_local_dirs(local_path) - _logger.info("Make local dir {} successfully".format(local_path)) - - all_need_download = client.lsr(hdfs_path, sort=True)[:file_cnt] - need_download = all_need_download[trainer_id::trainers] - _logger.info("Get {} files From all {} files need to be download from {}". - format(len(need_download), len(all_need_download), hdfs_path)) - - _logger.info("Start {} multi process to download datas".format( - multi_processes)) - procs = [] - for i in range(multi_processes): - process_datas = need_download[i::multi_processes] - p = multiprocessing.Process( - target=__subprocess_download, args=(process_datas, )) - procs.append(p) - p.start() - - # complete the processes - for proc in procs: - proc.join() - - _logger.info("Finish {} multi process to download datas".format( - multi_processes)) - - local_downloads = [] - for data in need_download: - data_name = os.path.basename(data) - re_path = os.path.relpath(os.path.dirname(data), hdfs_path) - local_re_path = os.path.join(local_path, re_path, data_name) - local_downloads.append(local_re_path) - - return local_downloads - - if __name__ == "__main__": hadoop_home = "/home/client/hadoop-client/hadoop/" diff --git a/python/paddle/fluid/contrib/utils/lookup_table_utils.py b/python/paddle/fluid/contrib/utils/lookup_table_utils.py index cc2418238f9..20e6328d81c 100644 --- a/python/paddle/fluid/contrib/utils/lookup_table_utils.py +++ b/python/paddle/fluid/contrib/utils/lookup_table_utils.py @@ -18,14 +18,12 @@ import os import time import logging -import paddle -import paddle.fluid as fluid from paddle.fluid import core from paddle.fluid import io from paddle.fluid import Program __all__ = [ - "load_inference_model", "load_persistable_vars", + "load_persistables_for_increment", "load_persistables_for_inference", "convert_dist_to_sparse_program" ] @@ -80,19 +78,28 @@ def __get_prefetch_op_tuples(main_program): return prefetch_op_tuples -def convert_dist_to_sparse_program(main_program): - if not main_program._distributed_lookup_table: +def convert_dist_to_sparse_program(program): + """ + WARNING: this function will only be used for distributed training with distributed lookup table. + when we train model with distributed lookup table but want to do the local inference, we can use + this function to convert the train program with distributed lookup table to sparse lookup table. + + :param program(Program): the program must be the trainer program, which will be get by the distribute transpiler. + :return: + program: The `program` is a Program, it's the program replace distributed lookup table to sparse lookup table. + """ + if not program._distributed_lookup_table: _logger.warn( "There are no distributed lookup tables need to be converted") return # create table param and grad var in pserver program - origin_emb_var = "{}.origin".format(main_program._distributed_lookup_table) - emb_var = main_program._distributed_lookup_table - main_program.global_block()._rename_var(emb_var, origin_emb_var) - origin_param_var = main_program.global_block().vars[origin_emb_var] + origin_emb_var = "{}.origin".format(program._distributed_lookup_table) + emb_var = program._distributed_lookup_table + program.global_block()._rename_var(emb_var, origin_emb_var) + origin_param_var = program.global_block().vars[origin_emb_var] - param_var = main_program.global_block().create_var( + param_var = program.global_block().create_var( name=emb_var, shape=origin_param_var.shape, dtype=origin_param_var.dtype, @@ -100,28 +107,28 @@ def convert_dist_to_sparse_program(main_program): persistable=True) # parameter must be selected rows param_var.desc.set_type(core.VarDesc.VarType.SELECTED_ROWS) - main_program._sync_with_cpp() + program._sync_with_cpp() - prefetch_op_tuples = __get_prefetch_op_tuples(main_program) + prefetch_op_tuples = __get_prefetch_op_tuples(program) split_ids_id = prefetch_op_tuples[0] for idx in range(split_ids_id + 2, split_ids_id - 1, -1): - main_program.global_block()._remove_op(idx) - main_program.desc.flush() + program.global_block()._remove_op(idx) + program.desc.flush() in_out_pairs = zip(prefetch_op_tuples[1], prefetch_op_tuples[2]) for in_out_pair in in_out_pairs: idx = split_ids_id - ids = main_program.global_block().vars[in_out_pair[0]] - out = main_program.global_block().vars[in_out_pair[1]] - __insert_lookup_sparse_table_op(main_program, idx, ids, param_var, out) - main_program.desc.flush() - return main_program + ids = program.global_block().vars[in_out_pair[0]] + out = program.global_block().vars[in_out_pair[1]] + __insert_lookup_sparse_table_op(program, idx, ids, param_var, out) + program.desc.flush() + return program -def load_persistable_vars(executor, dirname, program, lookup_table_var): +def _load_persistable_vars(executor, dirname, program, lookup_table_vars): def _is_checkpoint_var(exclude_fluid_vars=None): """ the checkpoint will not save or load all the variables. @@ -159,8 +166,82 @@ def load_persistable_vars(executor, dirname, program, lookup_table_var): return is_valid - def _load_lookup_table_vars(executor, dirname, main_program, - lookup_table_vars): + io.load_vars( + executor, + dirname=dirname, + main_program=program, + predicate=_is_checkpoint_var(lookup_table_vars), + filename=None) + + +def load_persistables_for_increment(dirname, executor, program, + lookup_table_var, lookup_table_var_path): + """ + WARNING: this function will only be used for distributed training with distributed lookup table. + for increment trainning, the pserver will not only load dense variables, + but also load the suitable lookup table var. Because of slice lookup table + var with HASH, we must load the correct slice var. + + + :param dirname(str): The directory path + :param executor(Executor): The executor to run for loading inference model. + :param program(Program): The parameter server program, which will run on Pserver. + :param lookup_table_var: the distributed lookup tables var name. + :param lookup_table_var_path: the the distributed lookup tables var location. + :return: None + """ + + def __load_lookup_table_vars(executor, main_program, lookup_table_var, + lookup_table_var_path): + emb_var = main_program.global_block().var(lookup_table_var) + + load_program = Program() + load_block = load_program.global_block() + load_block.append_op( + type='load', + inputs={}, + outputs={'Out': [emb_var]}, + attrs={'file_path': lookup_table_var_path}) + executor.run(load_program) + + if not os.path.isdir(dirname): + raise ValueError("There is no directory named '%s'", dirname) + + if not os.path.exists(lookup_table_var_path): + raise ValueError("There is no file named '%s'", lookup_table_var_path) + + if not isinstance(program, Program): + raise ValueError("program must be an instance of fluid.Program") + + _logger.info("Start Load Sparse Program With " + "Distributed Lookup Table Vars from {}, time = {}".format( + dirname, time.ctime())) + + _load_persistable_vars(executor, dirname, program, [lookup_table_var]) + __load_lookup_table_vars(executor, program, lookup_table_var, + lookup_table_var_path) + + _logger.info("Finish Load Sparse Program With " + "Distributed Lookup Table Vars from {}, time = {}".format( + dirname, time.ctime())) + + +def load_persistables_for_inference(dirname, executor, program, + lookup_table_var_name): + """ + WARNING: this function will only be used for inference with distributed lookup table. + Inference with distributed lookup table is a little funky, this function will load distributed + lookup table vars into sparse var, can be used in local inference mode. + + :param dirname(str): The directory path + :param executor(Executor): The executor to run for loading inference model. + :param program(Program): The parameter server program, which will run on Pserver. + :param lookup_table_var_name: the distributed lookup tables var name. + :return: None + """ + + def __load_lookup_table_vars(executor, dirname, main_program, + lookup_table_vars): if not os.path.isdir(dirname): raise ValueError("There is no directory named '%s'", dirname) @@ -209,48 +290,34 @@ def load_persistable_vars(executor, dirname, program, lookup_table_var): global_block.append_op(type='delete_var', inputs={'X': sums}) executor.run(convert_program) - _logger.info("Start Load Sparse Program With " - "Distributed Lookup Table Vars from {}, time = {}".format( - dirname, time.ctime())) - - lookup_table_vars = [lookup_table_var] - - io.load_vars( - executor, - dirname=dirname, - main_program=program, - predicate=_is_checkpoint_var(lookup_table_vars), - filename=None) - - _load_lookup_table_vars(executor, dirname, program, lookup_table_vars) - - _logger.info("Finish Load Sparse Program With " - "Distributed Lookup Table Vars from {}, time = {}".format( - dirname, time.ctime())) - - -def load_inference_model(dirname, executor, lookup_table_var_name): if not os.path.isdir(dirname): raise ValueError("There is no directory named '%s'", dirname) - local_model = os.path.join(dirname, model_filename) + if program: + if not isinstance(program, Program): + raise ValueError("program must be an instance of fluid.Program") + else: + local_model = os.path.join(dirname, model_filename) - with open(local_model, "rb") as f: - program_desc_str = f.read() + with open(local_model, "rb") as f: + program_desc_str = f.read() - program = Program.parse_from_string(program_desc_str) + program = Program.parse_from_string(program_desc_str) - if not core._is_program_version_supported(program._version()): - raise ValueError("Unsupported program version: %d\n" % - program._version()) + if not core._is_program_version_supported(program._version()): + raise ValueError("Unsupported program version: %d\n" % + program._version()) - # Binary data also need version. - load_persistable_vars(executor, dirname, program, lookup_table_var_name) + _logger.info("Start Load Sparse Program With " + "Distributed Lookup Table Vars from {}, time = {}".format( + dirname, time.ctime())) + + _load_persistable_vars(executor, dirname, program, [lookup_table_var_name]) + __load_lookup_table_vars(executor, dirname, program, + [lookup_table_var_name]) - feed_target_names = program.desc.get_feed_target_names() - fetch_target_names = program.desc.get_fetch_target_names() - fetch_targets = [ - program.global_block().var(name) for name in fetch_target_names - ] + _logger.info("Finish Load Sparse Program With " + "Distributed Lookup Table Vars from {}, time = {}".format( + dirname, time.ctime())) - return [program, feed_target_names, fetch_targets] + return program diff --git a/python/setup.py.in b/python/setup.py.in index 22b9537a90e..5d5f2dd0f18 100644 --- a/python/setup.py.in +++ b/python/setup.py.in @@ -107,9 +107,9 @@ packages=['paddle', 'paddle.fluid.distributed', 'paddle.fluid.layers', 'paddle.fluid.contrib', - 'paddle.fluid.contrib.utils', 'paddle.fluid.contrib.decoder', 'paddle.fluid.contrib.quantize', + 'paddle.fluid.contrib.utils', 'paddle.fluid.transpiler', 'paddle.fluid.transpiler.details'] -- GitLab