From 737334989beaa95bab7f150937994e2a3440cd78 Mon Sep 17 00:00:00 2001
From: hong <43953930+phlrain@users.noreply.github.com>
Date: Wed, 15 Jan 2020 15:20:58 +0800
Subject: [PATCH] State dict do not count data parallel layers (#22169)

* DataParallel state dict don't include _layers.; test=develop

* add unitest of data parallel; test=develop

* add load state test; test=develop
---
 python/paddle/fluid/dygraph/parallel.py       | 113 ++++++++++++++++++
 .../test_imperative_data_parallel.py          |  82 +++++++++++++
 2 files changed, 195 insertions(+)
 create mode 100644 python/paddle/fluid/tests/unittests/test_imperative_data_parallel.py

diff --git a/python/paddle/fluid/dygraph/parallel.py b/python/paddle/fluid/dygraph/parallel.py
index 76a3d2c5dc..ac75a5dded 100644
--- a/python/paddle/fluid/dygraph/parallel.py
+++ b/python/paddle/fluid/dygraph/parallel.py
@@ -254,3 +254,116 @@ class DataParallel(layers.Layer):
 
     def _is_data_parallel_mode(self):
         return self._strategy.nranks > 1
+
+    def state_dict(self,
+                   destination=None,
+                   include_sublayers=True,
+                   structured_name_prefix=""):
+        '''
+        Get all parameters of self._layers and its sub-layers. And set all the parameters into a dict
+
+        Parameters:
+            destination(dict, optional) : If provide, all the parameters will set to this dict . Default: None
+            include_sublayers(bool, optional) : If true, also include the parameters from sublayers. Default: True
+            structured_name_prefix(str, optional): If not empty str, all the key in state dict will start 
+                                                 with structured_name_prefix
+
+        Retruns:
+            dict: a dict contains all the parameters of self._layers
+
+        Examples:
+            .. code-block:: python
+
+                import paddle.fluid as fluid
+                with fluid.dygraph.guard():
+                    strategy=dygraph.parallel.prepare_context()
+                    emb = fluid.dygraph.Embedding([10, 10])
+                    emb = dygraph.parallel.DataParallel(emb, strategy)
+
+                    state_dict = emb.state_dict()
+                    fluid.save_dygraph( state_dict, "paddle_dy")
+
+        '''
+
+        return self._layers.state_dict(
+            destination=destination,
+            include_sublayers=include_sublayers,
+            structured_name_prefix=structured_name_prefix)
+
+    def set_dict(self,
+                 stat_dict,
+                 include_sublayers=True,
+                 use_structured_name=True):
+        '''
+        Set parameters of self._layers from stat_dict. All the parameters of self._layers will be reset by the tensor in the stat_dict
+
+        Parameters:
+            state_dict(dict) : Dict contains all the parameters
+            include_sublayers(bool, optional) : If true, also include the parameters from sublayers. Default: True
+            use_structured_name(bool, optional) : If true, use structured name as key, otherwise, use parameter name as key. 
+                                                  Default: True
+        Returns:
+            None
+
+        Examples:
+            .. code-block:: python
+
+                import paddle.fluid as fluid
+                with fluid.dygraph.guard():
+                    strategy=dygraph.parallel.prepare_context()
+                    emb = fluid.dygraph.Embedding([10, 10])
+                    emb = dygraph.parallel.DataParallel(emb, strategy)
+
+                    state_dict = emb.state_dict()
+                    fluid.save_dygraph( state_dict, "paddle_dy")
+                    
+                    para_state_dict, _ = fluid.load_dygraph( "paddle_dy")
+
+                    emb.set_dict( para_state_dict )
+
+        '''
+
+        self._layers.set_dict(
+            stat_dict,
+            include_sublayers=include_sublayers,
+            use_structured_name=use_structured_name)
+
+    def load_dict(self,
+                  stat_dict,
+                  include_sublayers=True,
+                  use_structured_name=True):
+        '''
+        Set parameters of self._layers from stat_dict. All the parameters of self._layers will be reset by the tensor in the stat_dict
+
+        This api will be Deprecated. Please use set_dict
+
+        Parameters:
+            state_dict(dict) : Dict contains all the parameters
+            include_sublayers(bool, optional) : If true, also include the parameters from sublayers. Default: True
+            use_structured_name(bool, optional) : If true, use structured name as key, otherwise, use parameter name as key.
+                                                  Default: True
+        Returns:
+            None
+
+        Examples:
+            .. code-block:: python
+
+                import paddle.fluid as fluid
+                with fluid.dygraph.guard():
+                    strategy=dygraph.parallel.prepare_context()
+                    emb = fluid.dygraph.Embedding([10, 10])
+                    emb = dygraph.parallel.DataParallel(emb, strategy)
+
+                    state_dict = emb.state_dict()
+                    fluid.save_dygraph( state_dict, "paddle_dy")
+                    
+                    para_state_dict, _ = fluid.load_dygraph( "paddle_dy")
+
+                    emb.load_dict( para_state_dict )
+
+        '''
+
+        self._layers.load_dict(
+            stat_dict,
+            include_sublayers=include_sublayers,
+            use_structured_name=use_structured_name)
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_data_parallel.py b/python/paddle/fluid/tests/unittests/test_imperative_data_parallel.py
new file mode 100644
index 0000000000..d645a0a5ce
--- /dev/null
+++ b/python/paddle/fluid/tests/unittests/test_imperative_data_parallel.py
@@ -0,0 +1,82 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import print_function
+
+import contextlib
+import unittest
+import numpy as np
+import six
+import unittest
+
+import paddle
+import paddle.fluid as fluid
+import paddle.fluid.dygraph as dygraph
+from paddle.fluid.dygraph.nn import Linear
+import paddle.fluid.core as core
+
+
+class MLP(fluid.Layer):
+    def __init__(self, param_attr=None, bias_attr=None):
+        super(MLP, self).__init__()
+
+        self._linear1 = Linear(784, 10)
+        self._linear2 = Linear(10, 10)
+
+    def forward(self, inputs):
+        y = self._linear1(inputs)
+        y = self._linear2(y)
+        return y
+
+
+class TestDataParallelStateDict(unittest.TestCase):
+    def test_data_parallel_state_dict(self):
+        with fluid.dygraph.guard():
+            strategy = dygraph.parallel.prepare_context()
+            mlp = MLP()
+            parallel_mlp = dygraph.parallel.DataParallel(mlp, strategy)
+
+            single_state = mlp.state_dict()
+            parallel_state = parallel_mlp.state_dict()
+
+            base_para = {}
+            place = fluid.CPUPlace() if not core.is_compiled_with_cuda(
+            ) else fluid.CUDAPlace(0)
+            for k, v in single_state.items():
+                self.assertTrue(k in parallel_state)
+
+                self.assertTrue(
+                    np.array_equal(v.numpy(), parallel_state[k].numpy()))
+
+                base_para[k] = v.numpy()
+
+            for k, v in parallel_state.items():
+                np_t = v.numpy()
+                var = v.value().get_tensor()
+                var.set(np.zeros_like(np_t), place)
+
+                self.assertTrue(np.sum(np.abs(v.numpy())) == 0)
+
+            parallel_mlp.set_dict(base_para)
+
+            parallel_state = parallel_mlp.state_dict()
+
+            for k, v in parallel_state.items():
+                self.assertTrue(np.array_equal(v.numpy(), base_para[k]))
+
+            parallel_mlp.load_dict(base_para)
+
+
+if __name__ == '__main__':
+    unittest.main()
-- 
GitLab