From baec7a353fbb917de246e8b343a0aa3e88fbc3c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=9F=B3=E6=99=93=E4=BC=9F?= <39303645+Shixiaowei02@users.noreply.github.com> Date: Thu, 13 Feb 2020 16:57:45 +0800 Subject: [PATCH] load inference model from memory buffer, test=release/1.7 (#22562) * 1. load model from memory 2. scale is no longer added when saving inference model test=develop * raise ci coverage, test=develop * supports saving weights to memory. test=develop * raise ci coverage, test=develop * fix PADDLE_ENFORCE messages, test=develop --- paddle/fluid/operators/save_combine_op.cc | 19 ++- paddle/fluid/operators/save_combine_op.h | 27 ++- paddle/fluid/pybind/pybind.cc | 4 + python/paddle/fluid/io.py | 156 +++++++++++------- .../unittests/test_inference_model_io.py | 55 ++++-- 5 files changed, 176 insertions(+), 85 deletions(-) diff --git a/paddle/fluid/operators/save_combine_op.cc b/paddle/fluid/operators/save_combine_op.cc index 68144e603bd..a4ac4f009e8 100644 --- a/paddle/fluid/operators/save_combine_op.cc +++ b/paddle/fluid/operators/save_combine_op.cc @@ -71,6 +71,23 @@ to a file on disk. "The \"file_path\" where the LoDTensor variables will be saved.") .AddCustomChecker( [](const std::string& path) { return !path.empty(); }); + AddAttr("save_to_memory", + "(boolean, default false)" + "If true, the variables will be saved to binary strings.") + .SetDefault(false); + AddOutput("Y", + "(RAW, default empty)." + "This output is used when saving variables to binary strings.") + .AsDispensable(); + } +}; + +class SaveCombineOpInferVarType : public framework::VarTypeInference { + public: + void operator()(framework::InferVarTypeContext* ctx) const override { + for (auto& o : ctx->Output("Y")) { + ctx->SetType(o, framework::proto::VarType::RAW); + } } }; @@ -80,7 +97,7 @@ to a file on disk. namespace ops = paddle::operators; REGISTER_OPERATOR(save_combine, ops::SaveCombineOp, - ops::SaveCombineOpProtoMaker); + ops::SaveCombineOpProtoMaker, ops::SaveCombineOpInferVarType); REGISTER_OP_CPU_KERNEL( save_combine, diff --git a/paddle/fluid/operators/save_combine_op.h b/paddle/fluid/operators/save_combine_op.h index 66648fffd4b..9ffc057294a 100644 --- a/paddle/fluid/operators/save_combine_op.h +++ b/paddle/fluid/operators/save_combine_op.h @@ -38,6 +38,8 @@ class SaveCombineOpKernel : public framework::OpKernel { auto filename = ctx.Attr("file_path"); auto overwrite = ctx.Attr("overwrite"); auto save_as_fp16 = ctx.Attr("save_as_fp16"); + auto save_to_memory = ctx.Attr("save_to_memory"); + auto output = ctx.Output("Y"); bool is_present = FileExists(filename); if (is_present && !overwrite) { @@ -45,11 +47,7 @@ class SaveCombineOpKernel : public framework::OpKernel { filename, overwrite); } - MkDirRecursively(DirName(filename).c_str()); - std::ofstream fout(filename, std::ios::binary); - PADDLE_ENFORCE(static_cast(fout), "Cannot open %s to write", - filename); - + std::ostringstream ss; auto inp_var_names = ctx.InputNames("X"); auto &inp_vars = ctx.MultiInputVar("X"); PADDLE_ENFORCE_GT(static_cast(inp_var_names.size()), 0, @@ -82,12 +80,25 @@ class SaveCombineOpKernel : public framework::OpKernel { // copy LoD info to the new tensor out.set_lod(tensor.lod()); framework::TransDataType(in_kernel_type, out_kernel_type, tensor, &out); - framework::SerializeToStream(fout, out, dev_ctx); + framework::SerializeToStream(ss, out, dev_ctx); } else { - framework::SerializeToStream(fout, tensor, dev_ctx); + framework::SerializeToStream(ss, tensor, dev_ctx); } } - fout.close(); + if (save_to_memory) { + PADDLE_ENFORCE_NE(output, nullptr, + platform::errors::InvalidArgument( + "Cannot find variable Y for save_combine_op")); + *output = ss.str(); + } else { + MkDirRecursively(DirName(filename).c_str()); + std::ofstream fout(filename, std::ios::binary); + PADDLE_ENFORCE_EQ( + static_cast(fout), true, + platform::errors::NotFound("Cannot open %s to write", filename)); + fout << ss.str(); + fout.close(); + } } }; diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index 796a56975f4..8ec2cbea638 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -915,6 +915,10 @@ All parameter, weight, gradient are variables in Paddle. return self.GetMutable(); }, py::return_value_policy::reference) + .def("get_bytes", + [](Variable &self) { + return py::bytes(*self.GetMutable()); + }) .def("get_lod_rank_table", [](Variable &self) { return self.GetMutable(); }, py::return_value_policy::reference) diff --git a/python/paddle/fluid/io.py b/python/paddle/fluid/io.py index 73273acf7a2..fa442a749d1 100644 --- a/python/paddle/fluid/io.py +++ b/python/paddle/fluid/io.py @@ -35,6 +35,7 @@ from paddle.fluid.framework import Program, Parameter, default_main_program, def from paddle.fluid.compiler import CompiledProgram from paddle.fluid.log_helper import get_logger from . import reader +from . import unique_name from .reader import * from . import core from .. import compat as cpt @@ -213,7 +214,8 @@ def save_vars(executor, Args: executor(Executor): The executor to run for saving variables. - dirname(str): The folder where to save variables. + dirname(str, optional): The folder where to save variables. + When you need to save the parameter to the memory, set it to None. main_program(Program, optional): The program whose variables will be saved. If it is None, the default main program will be used automatically. @@ -228,7 +230,8 @@ def save_vars(executor, Default: None Returns: - None + str: When saving parameters to a file, returns None. + When saving parameters to memory, returns a binary string containing parameters. Raises: TypeError: If `main_program` is not an instance of Program nor None. @@ -265,17 +268,21 @@ def save_vars(executor, fluid.io.save_vars(executor=exe, dirname=param_path, main_program=main_prog, vars=None, predicate = name_has_fc) # all variables whose names contain "fc " are saved. """ - save_dirname = os.path.normpath(dirname) + save_to_memory = False + if dirname is None and filename is None: + save_to_memory = True + main_program = _get_valid_program(main_program) if vars is None: - save_vars( + return save_vars( executor, main_program=main_program, - dirname=save_dirname, + dirname=dirname, vars=list(filter(predicate, main_program.list_vars())), filename=filename) else: + params_var_name = unique_name.generate("saved_params") # give warning when there is no var in model if len(list(vars)) == 0: warnings.warn( @@ -292,33 +299,45 @@ def save_vars(executor, if each_var.type == core.VarDesc.VarType.RAW: continue new_var = _clone_var_in_block_(save_block, each_var) - if filename is None: - save_file_path = os.path.join(save_dirname, new_var.name) - save_file_path = os.path.normpath(save_file_path) + if filename is None and save_to_memory is False: + save_file_path = os.path.join( + os.path.normpath(dirname), new_var.name) save_block.append_op( type='save', inputs={'X': [new_var]}, outputs={}, - attrs={'file_path': save_file_path}) + attrs={'file_path': os.path.normpath(save_file_path)}) else: save_var_map[new_var.name] = new_var - if filename is not None: + if filename is not None or save_to_memory: save_var_list = [] for name in sorted(save_var_map.keys()): save_var_list.append(save_var_map[name]) + save_path = str() + if save_to_memory is False: + save_path = os.path.join(os.path.normpath(dirname), filename) + + saved_params = save_block.create_var( + type=core.VarDesc.VarType.RAW, name=params_var_name) + saved_params.desc.set_persistable(True) save_block.append_op( type='save_combine', inputs={'X': save_var_list}, - outputs={}, - attrs={'file_path': os.path.join(save_dirname, filename)}) + outputs={'Y': saved_params}, + attrs={ + 'file_path': save_path, + 'save_to_memory': save_to_memory + }) #NOTE(zhiqiu): save op will add variable kLookupTablePath in save_program.desc, # which leads to diff on save_program and its desc. Call _sync_with_cpp # to keep consistency. save_program._sync_with_cpp() executor.run(save_program) + if save_to_memory: + return global_scope().find_var(params_var_name).get_bytes() def save_params(executor, dirname, main_program=None, filename=None): @@ -346,7 +365,8 @@ def save_params(executor, dirname, main_program=None, filename=None): Args: executor(Executor): The executor to run for saving parameters, You can refer to :ref:`api_guide_executor_en`. - dirname(str): The saving directory path. + dirname(str, optional): The saving directory path. + When you need to save the parameter to the memory, set it to None. main_program(Program, optional): The program whose parameters will be saved. You can refer to :ref:`api_guide_Program_en` for more @@ -359,7 +379,8 @@ def save_params(executor, dirname, main_program=None, filename=None): Default: None Returns: - None + str: When saving parameters to a file, returns None. + When saving parameters to memory, returns a binary string containing parameters. Examples: .. code-block:: python @@ -381,7 +402,7 @@ def save_params(executor, dirname, main_program=None, filename=None): # The parameters weights and bias of the fc layer in the network are going to # be saved in different files in the path "./my_paddle_model" """ - save_vars( + return save_vars( executor, dirname=dirname, main_program=main_program, @@ -558,7 +579,8 @@ def save_persistables(executor, dirname, main_program=None, filename=None): executor(Executor): The executor to run for saving persistable variables. You can refer to :ref:`api_guide_executor_en` for more details. - dirname(str): The saving directory path. + dirname(str, optional): The saving directory path. + When you need to save the parameter to the memory, set it to None. main_program(Program, optional): The program whose persistbale variables will be saved. You can refer to :ref:`api_guide_Program_en` for more details. @@ -570,7 +592,8 @@ def save_persistables(executor, dirname, main_program=None, filename=None): Default: None. Returns: - None + str: When saving parameters to a file, returns None. + When saving parameters to memory, returns a binary string containing parameters. Examples: .. code-block:: python @@ -594,10 +617,10 @@ def save_persistables(executor, dirname, main_program=None, filename=None): # "./my_paddle_model" """ if main_program and main_program._is_distributed: - _save_distributed_persistables( + return _save_distributed_persistables( executor, dirname=dirname, main_program=main_program) else: - save_vars( + return save_vars( executor, dirname=dirname, main_program=main_program, @@ -687,7 +710,11 @@ def load_vars(executor, # And all the variables are supposed to be saved in separate files. """ - load_dirname = os.path.normpath(dirname) + vars_from_memory = False + if dirname is not None: + dirname = os.path.normpath(dirname) + else: + vars_from_memory = True if vars is None: if main_program is None: @@ -697,7 +724,7 @@ def load_vars(executor, load_vars( executor, - dirname=load_dirname, + dirname=dirname, main_program=main_program, vars=list(filter(predicate, main_program.list_vars())), filename=filename) @@ -724,13 +751,15 @@ def load_vars(executor, )) new_var = _clone_var_in_block_(load_block, each_var) if filename is None: + if dirname is None: + raise ValueError( + "The directory path and params cannot be None at the same time." + ) load_block.append_op( type='load', inputs={}, outputs={'Out': [new_var]}, - attrs={ - 'file_path': os.path.join(load_dirname, new_var.name) - }) + attrs={'file_path': os.path.join(dirname, new_var.name)}) else: load_var_map[new_var.name] = new_var @@ -739,11 +768,17 @@ def load_vars(executor, for name in sorted(load_var_map.keys()): load_var_list.append(load_var_map[name]) + if vars_from_memory is False: + filename = os.path.join(dirname, filename) + load_block.append_op( type='load_combine', inputs={}, outputs={"Out": load_var_list}, - attrs={'file_path': os.path.join(load_dirname, filename)}) + attrs={ + 'file_path': filename, + 'model_from_memory': vars_from_memory + }) executor.run(load_prog) # check var shape @@ -1129,17 +1164,6 @@ def save_inference_model(dirname, ) break - # fix the bug that the activation op's output as target will be pruned. - # will affect the inference performance. - # TODO(Superjomn) add an IR pass to remove 1-scale op. - with program_guard(main_program): - uniq_target_vars = [] - for i, var in enumerate(target_vars): - if isinstance(var, Variable): - var = layers.scale( - var, 1., name="save_infer_model/scale_{}".format(i)) - uniq_target_vars.append(var) - target_vars = uniq_target_vars target_var_name_list = [var.name for var in target_vars] # when a pserver and a trainer running on the same machine, mkdir may conflict @@ -1223,19 +1247,22 @@ def load_inference_model(dirname, You can refer to :ref:`api_guide_model_save_reader_en` for more details. Args: - dirname(str): The given directory path. + dirname(str): One of the following: + - The given directory path. + - Set to None when reading the model from memory. executor(Executor): The executor to run for loading inference model. See :ref:`api_guide_executor_en` for more details about it. - model_filename(str, optional): The name of file to load the inference program. - If it is None, the default filename - ``__model__`` will be used. - Default: ``None``. - params_filename(str, optional): The name of file to load all parameters. - It is only used for the case that all - parameters were saved in a single binary - file. If parameters were saved in separate - files, set it as ``None``. - Default: ``None``. + model_filename(str, optional): One of the following: + - The name of file to load the inference program. + - If it is None, the default filename ``__model__`` will be used. + - When ``dirname`` is ``None``, it must be set to a string containing model. + Default: ``None``. + params_filename(str, optional): It is only used for the case that all + parameters were saved in a single binary file. One of the following: + - The name of file to load all parameters. + - When ``dirname`` is ``None``, it must be set to a string containing all the parameters. + - If parameters were saved in separate files, set it as ``None``. + Default: ``None``. pserver_endpoints(list, optional): It is only needed by the distributed inference. If using a distributed look up table during the training, @@ -1303,21 +1330,32 @@ def load_inference_model(dirname, # fetch_targets, we can use an executor to run the inference # program for getting the inference result. """ - load_dirname = os.path.normpath(dirname) - if not os.path.isdir(load_dirname): - raise ValueError("There is no directory named '%s'", dirname) + load_from_memory = False + if dirname is not None: + load_dirname = os.path.normpath(dirname) + if not os.path.isdir(load_dirname): + raise ValueError("There is no directory named '%s'", dirname) - if model_filename is not None: - model_filename = os.path.basename(model_filename) - else: - model_filename = "__model__" - model_filename = os.path.join(load_dirname, model_filename) + if model_filename is None: + model_filename = '__model__' - if params_filename is not None: - params_filename = os.path.basename(params_filename) + model_filename = os.path.join(load_dirname, + os.path.basename(model_filename)) + + if params_filename is not None: + params_filename = os.path.basename(params_filename) - with open(model_filename, "rb") as f: - program_desc_str = f.read() + with open(model_filename, "rb") as f: + program_desc_str = f.read() + else: + load_from_memory = True + if params_filename is None: + raise ValueError( + "The path of params cannot be None when the directory path is None." + ) + load_dirname = dirname + program_desc_str = model_filename + params_filename = params_filename program = Program.parse_from_string(program_desc_str) if not core._is_program_version_supported(program._version()): diff --git a/python/paddle/fluid/tests/unittests/test_inference_model_io.py b/python/paddle/fluid/tests/unittests/test_inference_model_io.py index 7060449041c..f9408dfee53 100644 --- a/python/paddle/fluid/tests/unittests/test_inference_model_io.py +++ b/python/paddle/fluid/tests/unittests/test_inference_model_io.py @@ -16,6 +16,7 @@ from __future__ import print_function import unittest +import os import six import numpy as np import paddle.fluid.core as core @@ -27,13 +28,20 @@ import paddle.fluid.layers as layers import paddle.fluid.optimizer as optimizer from paddle.fluid.compiler import CompiledProgram from paddle.fluid.framework import Program, program_guard -from paddle.fluid.io import save_inference_model, load_inference_model +from paddle.fluid.io import save_inference_model, load_inference_model, save_persistables from paddle.fluid.transpiler import memory_optimize class TestBook(unittest.TestCase): + class InferModel(object): + def __init__(self, list): + self.program = list[0] + self.feed_var_names = list[1] + self.fetch_vars = list[2] + def test_fit_line_inference_model(self): MODEL_DIR = "./tmp/inference_model" + UNI_MODEL_DIR = "./tmp/inference_model1" init_program = Program() program = Program() @@ -65,30 +73,43 @@ class TestBook(unittest.TestCase): 'y': tensor_y}, fetch_list=[avg_cost]) + # Separated model and unified model save_inference_model(MODEL_DIR, ["x", "y"], [avg_cost], exe, program) + save_inference_model(UNI_MODEL_DIR, ["x", "y"], [avg_cost], exe, + program, 'model', 'params') + main_program = program.clone()._prune_with_input( + feeded_var_names=["x", "y"], targets=[avg_cost]) + params_str = save_persistables(exe, None, main_program, None) + expected = exe.run(program, feed={'x': tensor_x, 'y': tensor_y}, fetch_list=[avg_cost])[0] six.moves.reload_module(executor) # reload to build a new scope - exe = executor.Executor(place) - [infer_prog, feed_var_names, fetch_vars] = load_inference_model( - MODEL_DIR, exe) - - outs = exe.run( - infer_prog, - feed={feed_var_names[0]: tensor_x, - feed_var_names[1]: tensor_y}, - fetch_list=fetch_vars) - actual = outs[0] - - self.assertEqual(feed_var_names, ["x", "y"]) - self.assertEqual(len(fetch_vars), 1) - print("fetch %s" % str(fetch_vars[0])) - self.assertTrue("scale" in str(fetch_vars[0])) - self.assertEqual(expected, actual) + model_0 = self.InferModel(load_inference_model(MODEL_DIR, exe)) + with open(os.path.join(UNI_MODEL_DIR, 'model'), "rb") as f: + model_str = f.read() + model_1 = self.InferModel( + load_inference_model(None, exe, model_str, params_str)) + + for model in [model_0, model_1]: + outs = exe.run(model.program, + feed={ + model.feed_var_names[0]: tensor_x, + model.feed_var_names[1]: tensor_y + }, + fetch_list=model.fetch_vars) + actual = outs[0] + + self.assertEqual(model.feed_var_names, ["x", "y"]) + self.assertEqual(len(model.fetch_vars), 1) + print("fetch %s" % str(model.fetch_vars[0])) + self.assertEqual(expected, actual) + + self.assertRaises(ValueError, fluid.io.load_inference_model, None, exe, + model_str, None) class TestSaveInferenceModel(unittest.TestCase): -- GitLab