[fluid clean]replace fliud.io.load_inference_model from util_factory.py (#49156)

* add index sample fp16 support * remove fluid APIs in distributed_strategy.py and role_maker.py * Revert "remove fluid APIs in distributed_strategy.py and role_maker.py" This reverts commit 223bbee990d3bf69e252fc3c0f19e3873550a264. * move load_inference_model to distributed * fix origin develop codes diff * move _endpoints_replacement * delete line * reset line * add unittest case of load_inference_model * fix unittest * fix unittest * fix coverage * fix coverage

[fluid clean]replace fliud.io.load_inference_model from util_factory.py (#49156)
* add index sample fp16 support * remove fluid APIs in distributed_strategy.py and role_maker.py * Revert "remove fluid APIs in distributed_strategy.py and role_maker.py" This reverts commit 223bbee990d3bf69e252fc3c0f19e3873550a264. * move load_inference_model to distributed * fix origin develop codes diff * move _endpoints_replacement * delete line * reset line * add unittest case of load_inference_model * fix unittest * fix unittest * fix coverage * fix coverage
3f896dce · wangxiaoning · GitHub · 945f777f · 3f896dce · 3f896dce
3 changed file
--- a/python/paddle/distributed/fleet/base/util_factory.py
+++ b/python/paddle/distributed/fleet/base/util_factory.py
@@ -498,7 +498,7 @@ class UtilBase:
                inference_program,
                feed_target_names,
                fetch_targets,
-            ) = paddle.fluid.io.load_inference_model(
+            ) = paddle.distributed.io.load_inference_model_distributed(
                config.dump_model_dir,
                exe,
                model_filename=model_filename,

--- a/python/paddle/distributed/io.py
+++ b/python/paddle/distributed/io.py
@@ -15,7 +15,8 @@
 import os
 import paddle
-from paddle.fluid.framework import Program
+from paddle.fluid.framework import Program, static_only
+from paddle.fluid.io import load_persistables
 from paddle.framework import core, dygraph_not_support
@@ -286,3 +287,146 @@ def save_persistables(executor, dirname, main_program=None, filename=None):
            predicate=is_persistable,
            filename=filename,
        )
+@static_only
+def load_inference_model_distributed(
+    dirname,
+    executor,
+    model_filename=None,
+    params_filename=None,
+    pserver_endpoints=None,
+):
+    """
+    Load the inference model from a given directory. By this API, you can get the model
+    structure(Inference Program) and model parameters. If you just want to load
+    parameters of the pre-trained model, please use the :ref:`api_fluid_io_load_params` API.
+    You can refer to :ref:`api_guide_model_save_reader_en` for more details.
+    Args:
+        dirname(str): One of the following:
+          - The given directory path.
+          - Set to None when reading the model from memory.
+        executor(Executor): The executor to run for loading inference model.
+                            See :ref:`api_guide_executor_en` for more details about it.
+        model_filename(str, optional): One of the following:
+          - The name of file to load the inference program.
+          - If it is None, the default filename ``__model__`` will be used.
+          - When ``dirname`` is ``None``, it must be set to a string containing model.
+          Default: ``None``.
+        params_filename(str, optional): It is only used for the case that all
+            parameters were saved in a single binary file. One of the following:
+          - The name of file to load all parameters.
+          - When ``dirname`` is ``None``, it must be set to a string containing all the parameters.
+          - If parameters were saved in separate files, set it as ``None``.
+            Default: ``None``.
+        pserver_endpoints(list, optional): It is only needed by the distributed inference.
+                                    If using a distributed look up table during the training,
+                                    this table is also needed by the inference process. Its value is
+                                    a list of pserver endpoints.
+    Returns:
+        list: The return of this API is a list with three elements:
+        (program, feed_target_names, fetch_targets). The `program` is a
+        ``Program`` (refer to :ref:`api_guide_Program_en`), which is used for inference.
+        The `feed_target_names` is a list of ``str``, which contains names of variables
+        that need to feed data in the inference program. The `fetch_targets` is a list of
+        ``Variable`` (refer to :ref:`api_guide_Program_en`). It contains variables from which
+        we can get inference results.
+    Examples:
+        .. code-block:: python
+            import paddle
+            import paddle.fluid as fluid
+            import numpy as np
+            paddle.enable_static()
+            # Build the model
+            main_prog = fluid.Program()
+            startup_prog = fluid.Program()
+            with fluid.program_guard(main_prog, startup_prog):
+                data = fluid.layers.data(name="img", shape=[64, 784], append_batch_size=False)
+                w = paddle.create_parameter(shape=[784, 200], dtype='float32')
+                b = paddle.create_parameter(shape=[200], dtype='float32')
+                hidden_w = paddle.matmul(x=data, y=w)
+                hidden_b = fluid.layers.elementwise_add(hidden_w, b)
+            place = fluid.CPUPlace()
+            exe = fluid.Executor(place)
+            exe.run(startup_prog)
+            # Save the inference model
+            path = "./infer_model"
+            fluid.io.save_inference_model(dirname=path, feeded_var_names=['img'],
+                         target_vars=[hidden_b], executor=exe, main_program=main_prog)
+            # Demo one. Not need to set the distributed look up table, because the
+            # training doesn't use a distributed look up table.
+            [inference_program, feed_target_names, fetch_targets] = (
+                paddle.distributed.io.load_inference_model_distributed(dirname=path, executor=exe))
+            tensor_img = np.array(np.random.random((1, 64, 784)), dtype=np.float32)
+            results = exe.run(inference_program,
+                          feed={feed_target_names[0]: tensor_img},
+                          fetch_list=fetch_targets)
+            # Demo two. If the training uses a distributed look up table, the pserver
+            # endpoints list should be supported when loading the inference model.
+            # The below is just an example.
+            endpoints = ["127.0.0.1:2023","127.0.0.1:2024"]
+            [dist_inference_program, dist_feed_target_names, dist_fetch_targets] = (
+                paddle.distributed.io.load_inference_model_distributed(dirname=path,
+                                              executor=exe,
+                                              pserver_endpoints=endpoints))
+            # In this example, the inference program was saved in the file
+            # "./infer_model/__model__" and parameters were saved in
+            # separate files under the directory "./infer_model".
+            # By the inference program, feed_target_names and
+            # fetch_targets, we can use an executor to run the inference
+            # program for getting the inference result.
+    """
+    load_from_memory = False
+    if dirname is not None:
+        load_dirname = os.path.normpath(dirname)
+        if not os.path.isdir(load_dirname):
+            raise ValueError("There is no directory named '%s'" % dirname)
+        if model_filename is None:
+            model_filename = '__model__'
+        model_filename = os.path.join(
+            load_dirname, os.path.basename(model_filename)
+        )
+        if params_filename is not None:
+            params_filename = os.path.basename(params_filename)
+        with open(model_filename, "rb") as f:
+            program_desc_str = f.read()
+    else:
+        load_from_memory = True
+        if params_filename is None:
+            raise ValueError(
+                "The path of params cannot be None when the directory path is None."
+            )
+        load_dirname = dirname
+        program_desc_str = model_filename
+        params_filename = params_filename
+    program = Program.parse_from_string(program_desc_str)
+    if not core._is_program_version_supported(program._version()):
+        raise ValueError(
+            "Unsupported program version: %d\n" % program._version()
+        )
+    # Binary data also need versioning.
+    load_persistables(executor, load_dirname, program, params_filename)
+    feed_target_names = program.desc.get_feed_target_names()
+    fetch_target_names = program.desc.get_fetch_target_names()
+    fetch_targets = [
+        program.global_block().var(name) for name in fetch_target_names
+    ]
+    return [program, feed_target_names, fetch_targets]
--- a/python/paddle/fluid/tests/unittests/test_inference_model_io.py
+++ b/python/paddle/fluid/tests/unittests/test_inference_model_io.py
@@ -26,6 +26,7 @@ import paddle.fluid.core as core
 import paddle.fluid.executor as executor
 import paddle.fluid.layers as layers
 import paddle.fluid.optimizer as optimizer
+from paddle.distributed.io import load_inference_model_distributed
 from paddle.fluid.compiler import CompiledProgram
 from paddle.fluid.framework import Program, program_guard
 from paddle.fluid.io import (
@@ -112,8 +113,12 @@ class TestBook(unittest.TestCase):
        model_1 = InferModel(
            load_inference_model(None, exe, model_str, params_str)
        )
+        model_2 = InferModel(load_inference_model_distributed(MODEL_DIR, exe))
+        model_3 = InferModel(
+            load_inference_model_distributed(None, exe, model_str, params_str)
+        )
-        for model in [model_0, model_1]:
+        for model in [model_0, model_1, model_2, model_3]:
            outs = exe.run(
                model.program,
                feed={
@@ -139,6 +144,14 @@ class TestBook(unittest.TestCase):
            model_str,
            None,
        )
+        self.assertRaises(
+            ValueError,
+            load_inference_model_distributed,
+            None,
+            exe,
+            model_str,
+            None,
+        )
 class TestSaveInferenceModel(unittest.TestCase):
@@ -530,6 +543,12 @@ class TestLoadInferenceModelError(unittest.TestCase):
        self.assertRaises(
            ValueError, load_inference_model, './test_not_exist_dir', exe
        )
+        self.assertRaises(
+            ValueError,
+            load_inference_model_distributed,
+            './test_not_exist_dir',
+            exe,
+        )
 if __name__ == '__main__':