Update save inference model to support dygraph (#25894)

* update save_inference_model for hapi * update save_inference_model to support dygraph * fix comments * fix comments * test=develop * test, test=develop * fix dim test, test=develop * test, test=develop * add test_export_deploy_model_dynamic * fix unittest for hapi: save_inference_model * fix code style * accept review by guoshengCS * fix coverage rate * update doc for save_inference_model and copyright * change test model back to LeNet() in test_export_deploy_model * copy jit.save, use LeNet() to test export deploy model * add return value for dygraph, and fix doc error * corrected the doc writing * Delete redundant import and correct import order in sample code. * remove 'fluid' and add prepare() and fit() in sample code * correct usage of API 2.0 in sample code * fix sample code bugs * fix code style bugs * fix test_model.py bugs * set for_inference=True * correct usage for static.InputSpec * update doc for model.save * correct usage of API 2.0 * rename param name for model.save * correct for_inference as training

Update save inference model to support dygraph (#25894)
* update save_inference_model for hapi * update save_inference_model to support dygraph * fix comments * fix comments * test=develop * test, test=develop * fix dim test, test=develop * test, test=develop * add test_export_deploy_model_dynamic * fix unittest for hapi: save_inference_model * fix code style * accept review by guoshengCS * fix coverage rate * update doc for save_inference_model and copyright * change test model back to LeNet() in test_export_deploy_model * copy jit.save, use LeNet() to test export deploy model * add return value for dygraph, and fix doc error * corrected the doc writing * Delete redundant import and correct import order in sample code. * remove 'fluid' and add prepare() and fit() in sample code * correct usage of API 2.0 in sample code * fix sample code bugs * fix code style bugs * fix test_model.py bugs * set for_inference=True * correct usage for static.InputSpec * update doc for model.save * correct usage of API 2.0 * rename param name for model.save * correct for_inference as training
761ed17d · LiuChiachi · GitHub · d32beea2 · 761ed17d · 761ed17d
Showing with 261 addition and 79 deletion

python/paddle/incubate/hapi/model.py python/paddle/incubate/hapi/model.py +197 -52

python/paddle/incubate/hapi/tests/test_model.py python/paddle/incubate/hapi/tests/test_model.py +64 -27

未找到文件。
--- a/python/paddle/incubate/hapi/model.py
+++ b/python/paddle/incubate/hapi/model.py
-# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -26,17 +26,22 @@ from collections import Iterable
 import paddle
 from paddle import fluid
+from paddle.fluid import core
+from paddle.fluid.framework import in_dygraph_mode, Variable, ParamBase, _current_expected_place
 # Note: Use alias `Input` temporarily before releasing hapi feature.
 from paddle.static import InputSpec as Input
-from paddle.fluid.framework import in_dygraph_mode, Variable
 from paddle.fluid.executor import global_scope
 from paddle.fluid.io import is_belong_to_optimizer
 from paddle.fluid.dygraph.base import to_variable
 from paddle.fluid.dygraph.parallel import ParallelEnv
+from paddle.fluid.dygraph.dygraph_to_static.program_translator import ProgramTranslator, FunctionSpec
 from paddle.fluid.layers.utils import flatten
 from paddle.fluid.incubate.fleet.collective import fleet, DistributedStrategy
 from paddle.fluid.incubate.fleet.base import role_maker
+from paddle.fluid.executor import scope_guard, Executor
 from paddle.io import DataLoader, Dataset
+from paddle.fluid.dygraph.layers import Layer
 from paddle.metric import Metric
 from .distributed import DistributedBatchSampler, _all_gather, prepare_distributed_context, _parallel_context_initialized
@@ -846,24 +851,32 @@ class Model(object):
        """
        return self._adapter.test_batch(inputs)
-    def save(self, path):
+    def save(self, path, training=True):
        """  
-        This function saves parameters, optimizer infomation to path.
+        This function saves parameters, optimizer information or model and 
+        paramters only for inference to path. It depends on the parameter
+        `training`.
-        The parameters contains all the trainable Variable, will save to
+        If `training` is set to True, the parameters saved contain all 
-        a file with suffix ".pdparams".
+        the trainable Variable, will save to a file with suffix ".pdparams".
        The optimizer information contains all the variable used by optimizer.
        For Adam optimizer, contains beta1, beta2, momentum etc. All the
        information will save to a file with suffix ".pdopt". (If the optimizer
        have no variable need to save (like SGD), the fill will not generated).
+        This function will silently overwrite existing file at the target location.
-        This function will silently overwrite existing file
+        If `training` is set to False, only inference model will be saved. It 
-        at the target location.
+        should be noted that before using `save`, you should run the model, and 
+        the shape of input you saved is as same as the input of its running.
+        `@paddle.jit.to_static` must be added on `forward` function of your layer 
+        in dynamic mode now and these will be optimized later.
        Args:
            path (str): The file prefix to save model. The format is
                'dirname/file_prefix' or 'file_prefix'. if empty str. A exception
                 will be raised.
+            training (bool, optional): Whether to save for training. If not, save
+                for inference only. Default: True.
        Returns:
            None
@@ -871,24 +884,46 @@ class Model(object):
        Examples:
            .. code-block:: python
                import paddle
                import paddle.incubate.hapi as hapi
+                from paddle.nn import Linear
+                from paddle.incubate.hapi.datasets.mnist import MNIST as MnistDataset
-              class MyNet(paddle.nn.Layer):
+                class Mnist(paddle.nn.Layer):
                    def __init__(self):
                        super(MyNet, self).__init__()
-                      self._fc = paddle.nn.Linear(784, 1, act='softmax')
+                        self._fc = Linear(784, 1, act='softmax')
+                  @paddle.jit.to_static # If save for inference in dygraph, need this
                  def forward(self, x):
                      y = self._fc(x)
                      return y
+                dynamic = True # False
                device = hapi.set_device('cpu')
-              paddle.disable_static(device)
+                # if use static graph, do not set
-              model = hapi.Model(MyNet())
+                paddle.disable_static(device) if dynamic else None
-              model.save('checkpoint/test')
+                # inputs and labels are not required for dynamic graph.
+                input = hapi.Input([None, 784], 'float32', 'x')
+                label = hapi.Input([None, 1], 'int64', 'label')
+                model = hapi.Model(Mnist(), input, label)
+                optim = paddle.optimizer.SGD(learning_rate=1e-3,
+                    parameter_list=model.parameters())
+                model.prepare(optim,
+                                paddle.nn.CrossEntropyLoss(),
+                                hapi.metrics.Accuracy())
+                mnist_data = hapi.datasets.MNIST(mode='train', chw_format=False)
+                model.fit(mnist_data, epochs=1, batch_size=32, verbose=0)
+                model.save('checkpoint/test') # save for training
+                model.save('inference_model', False) # save for inference
        """
        if ParallelEnv().local_rank == 0:
+            if not training:
+                self._save_inference_model(path)
+            else:
                self._adapter.save(path)
    def load(self, path, skip_mismatch=False, reset_optimizer=False):
@@ -1474,13 +1509,17 @@ class Model(object):
        cbks.on_end('test', logs)
        return outputs
-    def save_inference_model(self,
+    def _save_inference_model(self,
                              save_dir,
                              model_filename=None,
                              params_filename=None,
                              model_only=False):
        """
-        Save inference model must in static mode.
+        Save inference model can be in static or dynamic mode.
+        It should be noted that before using `save_inference_model`, you should
+        run the model, and the shape you saved is as same as the input of its
+        running. `@paddle.jit.to_static` must be added on `forward` function of
+        your layer in dynamic mode now and these will be optimized later.
        Args:
            save_dir (str): The directory path to save the inference model.
@@ -1496,21 +1535,127 @@ class Model(object):
        Returns:
            list: The fetch variables' name list
        Examples:
        .. code-block:: python
+            import numpy as np
+            import paddle
+            from paddle.static import InputSpec
            import paddle.incubate.hapi as hapi
+            from paddle.nn import Linear
+            from paddle.incubate.hapi.datasets.mnist import MNIST as MnistDataset
-            input = hapi.Input([-1, 1, 28, 28], 'float32', 'image')
+            class Mnist(Layer):
-            model = hapi.Model(hapi.vision.LeNet(), input)
+                def __init__(self, classifier_act=None):
-            model.prepare()
+                    super(Mnist, self).__init__()
+                    self.fc = Linear(input_dim=784, output_dim=10, act="softmax")
+                @paddle.jit.to_static # In static mode, you need to delete this.
+                def forward(self, inputs):
+                    outputs = self.fc(inputs)
+                    return outputs
+            dynamic = True # False
+            device = hapi.set_device('gpu')
+            # if use static graph, do not set
+            paddle.disable_static(device) if dynamic else None
+            # inputs and labels are not required for dynamic graph.
+            input = InputSpec([None, 784], 'float32', 'x')
+            label = InputSpec([None, 1], 'int64', 'label')
+            model = hapi.Model(Mnist(), input, label)
+            optim = paddle.optimizer.SGD(learning_rate=1e-3,
+                parameter_list=model.parameters())
+            model.prepare(optim,
+                            paddle.nn.CrossEntropyLoss(),
+                            hapi.metrics.Accuracy())
+            mnist_data = hapi.datasets.MNIST(mode='train', chw_format=False)
+            model.fit(mnist_data, epochs=1, batch_size=32, verbose=0)
            model.save_inference_model('inference_model')
        """
-        assert not fluid.in_dygraph_mode(
-        ), 'Save inference model must in static mode!'
+        def get_inout_spec(all_vars, return_name=False):
+            result_list = []
+            valid_vars = [var for var in all_vars if isinstance(var, Variable)]
+            result_list = valid_vars
+            if return_name:
+                result_list = [var.name for var in result_list]
+            return result_list
+        # TODO:
+        # 1. Make it Unnecessary to run model before calling `save_inference_model` for users in dygraph.
+        # 2. Save correct shape of input, now the interface stores the shape that the user sent to 
+        #    the inputs of the model in running.
+        # 3. Make it Unnecessary to add `@paddle.jit.to_static` for users in dynamic mode.
+        if fluid.in_dygraph_mode():
+            layer = self.network
+            fluid.disable_dygraph()
+            # 1. input check
+            prog_translator = ProgramTranslator()
+            if not prog_translator.enable_declarative:
+                raise RuntimeError(
+                    "save_inference_model doesn't work when setting ProgramTranslator.enable=False."
+                )
+            if not isinstance(layer, Layer):
+                raise TypeError(
+                    "The input layer should be 'Layer', but received layer type is %s."
+                    % type(layer))
+            # 2. get program of declarative Layer.forward
+            prog_cache = prog_translator.get_program_cache()
+            # make dummy args & kwargs, to get excepted FunctionSpec
+            layer_func = FunctionSpec(type(layer).forward, [layer], {})
+            concrete_program, _ = prog_cache.get_program(layer_func)
+            # NOTE: we maintain the mapping of variable name to
+            # structured name, the buffer variable (non-persistable)
+            # saved to inference program may not need by dygraph Layer,
+            # we only record the state_dict variable's structured name
+            state_names_dict = dict()
+            for structured_name, var in layer.state_dict().items():
+                state_names_dict[var.name] = structured_name
+            # 3. share parameters from Layer to scope & record var info
+            scope = core.Scope()
+            extra_var_info = dict()
+            for param_or_buffer in concrete_program.parameters:
+                # share to scope
+                param_or_buffer_tensor = scope.var(
+                    param_or_buffer.name).get_tensor()
+                src_tensor = param_or_buffer.value().get_tensor()
+                param_or_buffer_tensor._share_data_with(src_tensor)
+                # record var info
+                extra_info_dict = dict()
+                if param_or_buffer.name in state_names_dict:
+                    extra_info_dict['structured_name'] = state_names_dict[
+                        param_or_buffer.name]
+                extra_info_dict['stop_gradient'] = param_or_buffer.stop_gradient
+                if isinstance(param_or_buffer, ParamBase):
+                    extra_info_dict['trainable'] = param_or_buffer.trainable
+                extra_var_info[param_or_buffer.name] = extra_info_dict
+            # 4. build input & output spec
+            input_var_names = get_inout_spec(concrete_program.inputs, True)
+            output_vars = get_inout_spec(concrete_program.outputs)
+            # 5. save inference model
+            with scope_guard(scope):
+                return fluid.io.save_inference_model(
+                    dirname=save_dir,
+                    feeded_var_names=input_var_names,
+                    target_vars=output_vars,
+                    executor=Executor(_current_expected_place()),
+                    main_program=concrete_program.main_program.clone(),
+                    model_filename=model_filename,
+                    params_filename=params_filename,
+                    program_only=model_only)
+        else:
            prog = self._adapter._progs.get('test', None)
            assert prog, \
                "Model is not ready, please call `model.prepare()` first"

--- a/python/paddle/incubate/hapi/tests/test_model.py
+++ b/python/paddle/incubate/hapi/tests/test_model.py
@@ -33,6 +33,8 @@ from paddle.metric import Accuracy
 from paddle.incubate.hapi.datasets import MNIST
 from paddle.incubate.hapi.vision.models import LeNet
 from paddle.incubate.hapi.distributed import DistributedBatchSampler, prepare_distributed_context
+from paddle.fluid.dygraph.jit import declarative
+from paddle.fluid.dygraph.dygraph_to_static.program_translator import ProgramTranslator
 class LeNetDygraph(fluid.dygraph.Layer):
@@ -65,6 +67,37 @@ class LeNetDygraph(fluid.dygraph.Layer):
        return x
+class LeNetDeclarative(fluid.dygraph.Layer):
+    def __init__(self, num_classes=10, classifier_activation=None):
+        super(LeNetDeclarative, self).__init__()
+        self.num_classes = num_classes
+        self.features = Sequential(
+            Conv2d(
+                1, 6, 3, stride=1, padding=1),
+            ReLU(),
+            Pool2D(2, 'max', 2),
+            Conv2d(
+                6, 16, 5, stride=1, padding=0),
+            ReLU(),
+            Pool2D(2, 'max', 2))
+        if num_classes > 0:
+            self.fc = Sequential(
+                Linear(400, 120),
+                Linear(120, 84),
+                Linear(
+                    84, 10, act=classifier_activation))
+    @declarative
+    def forward(self, inputs):
+        x = self.features(inputs)
+        if self.num_classes > 0:
+            x = fluid.layers.flatten(x, 1)
+            x = self.fc(x)
+        return x
 class MnistDataset(MNIST):
    def __init__(self, mode, return_label=True, sample_num=None):
        super(MnistDataset, self).__init__(mode=mode)
@@ -335,7 +368,6 @@ class TestModelFunction(unittest.TestCase):
            model = Model(net, inputs, labels)
            model.prepare(optim2, loss=CrossEntropyLoss(reduction="sum"))
            loss, = model.train_batch([data], [label])
            np.testing.assert_allclose(loss.flatten(), ref.flatten())
            fluid.disable_dygraph() if dynamic else None
@@ -445,32 +477,37 @@ class TestModelFunction(unittest.TestCase):
            fluid.disable_dygraph() if dynamic else None
    def test_export_deploy_model(self):
-        net = LeNet()
+        for dynamic in [True, False]:
-        inputs = [Input([-1, 1, 28, 28], 'float32', 'image')]
+            fluid.enable_dygraph() if dynamic else None
+            # paddle.disable_static() if dynamic else None
+            prog_translator = ProgramTranslator()
+            prog_translator.enable(False) if not dynamic else None
+            net = LeNetDeclarative()
+            inputs = [Input([None, 1, 28, 28], 'float32', 'x')]
            model = Model(net, inputs)
            model.prepare()
            save_dir = tempfile.mkdtemp()
            if not os.path.exists(save_dir):
                os.makedirs(save_dir)
            tensor_img = np.array(
                np.random.random((1, 1, 28, 28)), dtype=np.float32)
            ori_results = model.test_batch(tensor_img)
+            model.save(save_dir, training=False)
-        model.save_inference_model(save_dir)
+            fluid.disable_dygraph() if dynamic else None
            place = fluid.CPUPlace() if not fluid.is_compiled_with_cuda(
            ) else fluid.CUDAPlace(0)
+            new_scope = fluid.Scope()
+            with fluid.scope_guard(new_scope):
                exe = fluid.Executor(place)
                [inference_program, feed_target_names, fetch_targets] = (
                    fluid.io.load_inference_model(
                        dirname=save_dir, executor=exe))
                results = exe.run(inference_program,
                                  feed={feed_target_names[0]: tensor_img},
                                  fetch_list=fetch_targets)
+                np.testing.assert_allclose(
-        np.testing.assert_allclose(results, ori_results, rtol=1e-6)
+                    results, ori_results, rtol=1e-5, atol=1e-7)
                shutil.rmtree(save_dir)