[cherry-pick] fix save_dygraph & save & DataParallel (#22303)

* cherry-pick #22266 * cherry-pick #22169 test=release/1.7, test=develop

[cherry-pick] fix save_dygraph & save & DataParallel (#22303)
* cherry-pick #22266 * cherry-pick #22169 test=release/1.7, test=develop
da24e423 · songyouwei · hong · c396f276 · da24e423 · da24e423
6 changed file
--- a/python/paddle/fluid/dygraph/checkpoint.py
+++ b/python/paddle/fluid/dygraph/checkpoint.py
@@ -82,7 +82,12 @@ def save_dygraph(state_dict, model_path):
        name_table[k] = v.name
    model_dict["StructuredToParameterName@@"] = name_table
-    with open(model_path + suffix, 'wb') as f:
+    file_name = model_path + suffix
+    dir_name = os.path.dirname(file_name)
+    if dir_name and not os.path.exists(dir_name):
+        os.makedirs(dir_name)
+    with open(file_name, 'wb') as f:
        pickle.dump(model_dict, f)
@@ -113,7 +118,7 @@ def load_dygraph(model_path, keep_name_table=False):
                adam = fluid.optimizer.Adam( learning_rate = fluid.layers.noam_decay( 100, 10000),
                                             parameter_list = emb.parameters() )
                state_dict = adam.state_dict()
-                fluid.save_dygraph( state_dict, "padle_dy")
+                fluid.save_dygraph( state_dict, "paddle_dy")
                para_state_dict, opti_state_dict = fluid.load_dygraph( "paddle_dy")

--- a/python/paddle/fluid/dygraph/parallel.py
+++ b/python/paddle/fluid/dygraph/parallel.py
@@ -254,3 +254,116 @@ class DataParallel(layers.Layer):
    def _is_data_parallel_mode(self):
        return self._strategy.nranks > 1
+    def state_dict(self,
+                   destination=None,
+                   include_sublayers=True,
+                   structured_name_prefix=""):
+        '''
+        Get all parameters of self._layers and its sub-layers. And set all the parameters into a dict
+        Parameters:
+            destination(dict, optional) : If provide, all the parameters will set to this dict . Default: None
+            include_sublayers(bool, optional) : If true, also include the parameters from sublayers. Default: True
+            structured_name_prefix(str, optional): If not empty str, all the key in state dict will start 
+                                                 with structured_name_prefix
+        Retruns:
+            dict: a dict contains all the parameters of self._layers
+        Examples:
+            .. code-block:: python
+                import paddle.fluid as fluid
+                with fluid.dygraph.guard():
+                    strategy=dygraph.parallel.prepare_context()
+                    emb = fluid.dygraph.Embedding([10, 10])
+                    emb = dygraph.parallel.DataParallel(emb, strategy)
+                    state_dict = emb.state_dict()
+                    fluid.save_dygraph( state_dict, "paddle_dy")
+        '''
+        return self._layers.state_dict(
+            destination=destination,
+            include_sublayers=include_sublayers,
+            structured_name_prefix=structured_name_prefix)
+    def set_dict(self,
+                 stat_dict,
+                 include_sublayers=True,
+                 use_structured_name=True):
+        '''
+        Set parameters of self._layers from stat_dict. All the parameters of self._layers will be reset by the tensor in the stat_dict
+        Parameters:
+            state_dict(dict) : Dict contains all the parameters
+            include_sublayers(bool, optional) : If true, also include the parameters from sublayers. Default: True
+            use_structured_name(bool, optional) : If true, use structured name as key, otherwise, use parameter name as key. 
+                                                  Default: True
+        Returns:
+            None
+        Examples:
+            .. code-block:: python
+                import paddle.fluid as fluid
+                with fluid.dygraph.guard():
+                    strategy=dygraph.parallel.prepare_context()
+                    emb = fluid.dygraph.Embedding([10, 10])
+                    emb = dygraph.parallel.DataParallel(emb, strategy)
+                    state_dict = emb.state_dict()
+                    fluid.save_dygraph( state_dict, "paddle_dy")
+                    para_state_dict, _ = fluid.load_dygraph( "paddle_dy")
+                    emb.set_dict( para_state_dict )
+        '''
+        self._layers.set_dict(
+            stat_dict,
+            include_sublayers=include_sublayers,
+            use_structured_name=use_structured_name)
+    def load_dict(self,
+                  stat_dict,
+                  include_sublayers=True,
+                  use_structured_name=True):
+        '''
+        Set parameters of self._layers from stat_dict. All the parameters of self._layers will be reset by the tensor in the stat_dict
+        This api will be Deprecated. Please use set_dict
+        Parameters:
+            state_dict(dict) : Dict contains all the parameters
+            include_sublayers(bool, optional) : If true, also include the parameters from sublayers. Default: True
+            use_structured_name(bool, optional) : If true, use structured name as key, otherwise, use parameter name as key.
+                                                  Default: True
+        Returns:
+            None
+        Examples:
+            .. code-block:: python
+                import paddle.fluid as fluid
+                with fluid.dygraph.guard():
+                    strategy=dygraph.parallel.prepare_context()
+                    emb = fluid.dygraph.Embedding([10, 10])
+                    emb = dygraph.parallel.DataParallel(emb, strategy)
+                    state_dict = emb.state_dict()
+                    fluid.save_dygraph( state_dict, "paddle_dy")
+                    para_state_dict, _ = fluid.load_dygraph( "paddle_dy")
+                    emb.load_dict( para_state_dict )
+        '''
+        self._layers.load_dict(
+            stat_dict,
+            include_sublayers=include_sublayers,
+            use_structured_name=use_structured_name)
--- a/python/paddle/fluid/io.py
+++ b/python/paddle/fluid/io.py
@@ -1518,6 +1518,10 @@ def save(program, model_path):
    assert base_name != "", \
        "model_path MUST be format of dirname/filename [dirname\\filename in Window], Now filename is empty str"
+    dir_name = os.path.dirname(model_path)
+    if dir_name and not os.path.exists(dir_name):
+        os.makedirs(dir_name)
    def get_tensor(var):
        t = global_scope().find_var(var.name).get_tensor()
        return np.array(t)

--- a/python/paddle/fluid/tests/unittests/test_imperative_data_parallel.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_data_parallel.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import print_function
+import contextlib
+import unittest
+import numpy as np
+import six
+import unittest
+import paddle
+import paddle.fluid as fluid
+import paddle.fluid.dygraph as dygraph
+from paddle.fluid.dygraph.nn import Linear
+import paddle.fluid.core as core
+class MLP(fluid.Layer):
+    def __init__(self, param_attr=None, bias_attr=None):
+        super(MLP, self).__init__()
+        self._linear1 = Linear(784, 10)
+        self._linear2 = Linear(10, 10)
+    def forward(self, inputs):
+        y = self._linear1(inputs)
+        y = self._linear2(y)
+        return y
+class TestDataParallelStateDict(unittest.TestCase):
+    def test_data_parallel_state_dict(self):
+        with fluid.dygraph.guard():
+            strategy = dygraph.parallel.prepare_context()
+            mlp = MLP()
+            parallel_mlp = dygraph.parallel.DataParallel(mlp, strategy)
+            single_state = mlp.state_dict()
+            parallel_state = parallel_mlp.state_dict()
+            base_para = {}
+            place = fluid.CPUPlace() if not core.is_compiled_with_cuda(
+            ) else fluid.CUDAPlace(0)
+            for k, v in single_state.items():
+                self.assertTrue(k in parallel_state)
+                self.assertTrue(
+                    np.array_equal(v.numpy(), parallel_state[k].numpy()))
+                base_para[k] = v.numpy()
+            for k, v in parallel_state.items():
+                np_t = v.numpy()
+                var = v.value().get_tensor()
+                var.set(np.zeros_like(np_t), place)
+                self.assertTrue(np.sum(np.abs(v.numpy())) == 0)
+            parallel_mlp.set_dict(base_para)
+            parallel_state = parallel_mlp.state_dict()
+            for k, v in parallel_state.items():
+                self.assertTrue(np.array_equal(v.numpy(), base_para[k]))
+            parallel_mlp.load_dict(base_para)
+if __name__ == '__main__':
+    unittest.main()
--- a/python/paddle/fluid/tests/unittests/test_imperative_save_load.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_save_load.py
@@ -14,6 +14,7 @@
 from __future__ import print_function
+import os
 import unittest
 import paddle.fluid as fluid
 import paddle.fluid.core as core
@@ -879,9 +880,10 @@ class TestDygraphPtbRnn(unittest.TestCase):
        with fluid.dygraph.guard():
            emb = fluid.dygraph.Embedding([10, 10])
            state_dict = emb.state_dict()
-            fluid.save_dygraph(state_dict, "emb_dy")
+            fluid.save_dygraph(state_dict, os.path.join('saved_dy', 'emb_dy'))
-            para_state_dict, opti_state_dict = fluid.load_dygraph("emb_dy")
+            para_state_dict, opti_state_dict = fluid.load_dygraph(
+                os.path.join('saved_dy', 'emb_dy'))
            self.assertTrue(opti_state_dict == None)

--- a/python/paddle/fluid/tests/unittests/test_static_save_load.py
+++ b/python/paddle/fluid/tests/unittests/test_static_save_load.py
@@ -609,7 +609,7 @@ class TestProgramStatePartial(unittest.TestCase):
                    self.assertTrue(np.sum(np.abs(t)) != 0)
                    base_map[var.name] = t
-            fluid.save(main_program, "./test_1")
+            fluid.save(main_program, os.path.join('some_dir', 'test_1'))
            # set var to zero
            for var in main_program.list_vars():
@@ -623,7 +623,8 @@ class TestProgramStatePartial(unittest.TestCase):
                    self.assertTrue(np.sum(np.abs(new_t)) == 0)
            #fluid.load(test_program, "./test_1", None )
-            program_state = fluid.load_program_state("./test_1")
+            program_state = fluid.load_program_state(
+                os.path.join('some_dir', 'test_1'))
            fluid.set_program_state(test_program, program_state)
            for var in test_program.list_vars():