diff --git a/python/paddle/fluid/dygraph/checkpoint.py b/python/paddle/fluid/dygraph/checkpoint.py
index 5ed4e2d412e49408df6077c0320c597be783d385..27658ba3d46baffa4b64cdf07931c638b9dee086 100644
--- a/python/paddle/fluid/dygraph/checkpoint.py
+++ b/python/paddle/fluid/dygraph/checkpoint.py
@@ -82,7 +82,12 @@ def save_dygraph(state_dict, model_path):
         name_table[k] = v.name
     model_dict["StructuredToParameterName@@"] = name_table
 
-    with open(model_path + suffix, 'wb') as f:
+    file_name = model_path + suffix
+    dir_name = os.path.dirname(file_name)
+    if dir_name and not os.path.exists(dir_name):
+        os.makedirs(dir_name)
+
+    with open(file_name, 'wb') as f:
         pickle.dump(model_dict, f)
 
 
@@ -113,7 +118,7 @@ def load_dygraph(model_path, keep_name_table=False):
                 adam = fluid.optimizer.Adam( learning_rate = fluid.layers.noam_decay( 100, 10000),
                                              parameter_list = emb.parameters() )
                 state_dict = adam.state_dict()
-                fluid.save_dygraph( state_dict, "padle_dy")
+                fluid.save_dygraph( state_dict, "paddle_dy")
 
                 para_state_dict, opti_state_dict = fluid.load_dygraph( "paddle_dy")
 
diff --git a/python/paddle/fluid/dygraph/parallel.py b/python/paddle/fluid/dygraph/parallel.py
index 76a3d2c5dcb8d2c3ce7a8379db043d89f3d0fe63..ac75a5dded024bbe6f4a2231db832a3d22c50f98 100644
--- a/python/paddle/fluid/dygraph/parallel.py
+++ b/python/paddle/fluid/dygraph/parallel.py
@@ -254,3 +254,116 @@ class DataParallel(layers.Layer):
 
     def _is_data_parallel_mode(self):
         return self._strategy.nranks > 1
+
+    def state_dict(self,
+                   destination=None,
+                   include_sublayers=True,
+                   structured_name_prefix=""):
+        '''
+        Get all parameters of self._layers and its sub-layers. And set all the parameters into a dict
+
+        Parameters:
+            destination(dict, optional) : If provide, all the parameters will set to this dict . Default: None
+            include_sublayers(bool, optional) : If true, also include the parameters from sublayers. Default: True
+            structured_name_prefix(str, optional): If not empty str, all the key in state dict will start 
+                                                 with structured_name_prefix
+
+        Retruns:
+            dict: a dict contains all the parameters of self._layers
+
+        Examples:
+            .. code-block:: python
+
+                import paddle.fluid as fluid
+                with fluid.dygraph.guard():
+                    strategy=dygraph.parallel.prepare_context()
+                    emb = fluid.dygraph.Embedding([10, 10])
+                    emb = dygraph.parallel.DataParallel(emb, strategy)
+
+                    state_dict = emb.state_dict()
+                    fluid.save_dygraph( state_dict, "paddle_dy")
+
+        '''
+
+        return self._layers.state_dict(
+            destination=destination,
+            include_sublayers=include_sublayers,
+            structured_name_prefix=structured_name_prefix)
+
+    def set_dict(self,
+                 stat_dict,
+                 include_sublayers=True,
+                 use_structured_name=True):
+        '''
+        Set parameters of self._layers from stat_dict. All the parameters of self._layers will be reset by the tensor in the stat_dict
+
+        Parameters:
+            state_dict(dict) : Dict contains all the parameters
+            include_sublayers(bool, optional) : If true, also include the parameters from sublayers. Default: True
+            use_structured_name(bool, optional) : If true, use structured name as key, otherwise, use parameter name as key. 
+                                                  Default: True
+        Returns:
+            None
+
+        Examples:
+            .. code-block:: python
+
+                import paddle.fluid as fluid
+                with fluid.dygraph.guard():
+                    strategy=dygraph.parallel.prepare_context()
+                    emb = fluid.dygraph.Embedding([10, 10])
+                    emb = dygraph.parallel.DataParallel(emb, strategy)
+
+                    state_dict = emb.state_dict()
+                    fluid.save_dygraph( state_dict, "paddle_dy")
+                    
+                    para_state_dict, _ = fluid.load_dygraph( "paddle_dy")
+
+                    emb.set_dict( para_state_dict )
+
+        '''
+
+        self._layers.set_dict(
+            stat_dict,
+            include_sublayers=include_sublayers,
+            use_structured_name=use_structured_name)
+
+    def load_dict(self,
+                  stat_dict,
+                  include_sublayers=True,
+                  use_structured_name=True):
+        '''
+        Set parameters of self._layers from stat_dict. All the parameters of self._layers will be reset by the tensor in the stat_dict
+
+        This api will be Deprecated. Please use set_dict
+
+        Parameters:
+            state_dict(dict) : Dict contains all the parameters
+            include_sublayers(bool, optional) : If true, also include the parameters from sublayers. Default: True
+            use_structured_name(bool, optional) : If true, use structured name as key, otherwise, use parameter name as key.
+                                                  Default: True
+        Returns:
+            None
+
+        Examples:
+            .. code-block:: python
+
+                import paddle.fluid as fluid
+                with fluid.dygraph.guard():
+                    strategy=dygraph.parallel.prepare_context()
+                    emb = fluid.dygraph.Embedding([10, 10])
+                    emb = dygraph.parallel.DataParallel(emb, strategy)
+
+                    state_dict = emb.state_dict()
+                    fluid.save_dygraph( state_dict, "paddle_dy")
+                    
+                    para_state_dict, _ = fluid.load_dygraph( "paddle_dy")
+
+                    emb.load_dict( para_state_dict )
+
+        '''
+
+        self._layers.load_dict(
+            stat_dict,
+            include_sublayers=include_sublayers,
+            use_structured_name=use_structured_name)
diff --git a/python/paddle/fluid/io.py b/python/paddle/fluid/io.py
index 0436137a80cb1032a37e6285833dad9d8ee1e258..73273acf7a2676e5542c4c5563b3ec2f5b801294 100644
--- a/python/paddle/fluid/io.py
+++ b/python/paddle/fluid/io.py
@@ -1518,6 +1518,10 @@ def save(program, model_path):
     assert base_name != "", \
         "model_path MUST be format of dirname/filename [dirname\\filename in Window], Now filename is empty str"
 
+    dir_name = os.path.dirname(model_path)
+    if dir_name and not os.path.exists(dir_name):
+        os.makedirs(dir_name)
+
     def get_tensor(var):
         t = global_scope().find_var(var.name).get_tensor()
         return np.array(t)
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_data_parallel.py b/python/paddle/fluid/tests/unittests/test_imperative_data_parallel.py
new file mode 100644
index 0000000000000000000000000000000000000000..d645a0a5ceb60f89e50eac3a83ff00f7b1e3927a
--- /dev/null
+++ b/python/paddle/fluid/tests/unittests/test_imperative_data_parallel.py
@@ -0,0 +1,82 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import print_function
+
+import contextlib
+import unittest
+import numpy as np
+import six
+import unittest
+
+import paddle
+import paddle.fluid as fluid
+import paddle.fluid.dygraph as dygraph
+from paddle.fluid.dygraph.nn import Linear
+import paddle.fluid.core as core
+
+
+class MLP(fluid.Layer):
+    def __init__(self, param_attr=None, bias_attr=None):
+        super(MLP, self).__init__()
+
+        self._linear1 = Linear(784, 10)
+        self._linear2 = Linear(10, 10)
+
+    def forward(self, inputs):
+        y = self._linear1(inputs)
+        y = self._linear2(y)
+        return y
+
+
+class TestDataParallelStateDict(unittest.TestCase):
+    def test_data_parallel_state_dict(self):
+        with fluid.dygraph.guard():
+            strategy = dygraph.parallel.prepare_context()
+            mlp = MLP()
+            parallel_mlp = dygraph.parallel.DataParallel(mlp, strategy)
+
+            single_state = mlp.state_dict()
+            parallel_state = parallel_mlp.state_dict()
+
+            base_para = {}
+            place = fluid.CPUPlace() if not core.is_compiled_with_cuda(
+            ) else fluid.CUDAPlace(0)
+            for k, v in single_state.items():
+                self.assertTrue(k in parallel_state)
+
+                self.assertTrue(
+                    np.array_equal(v.numpy(), parallel_state[k].numpy()))
+
+                base_para[k] = v.numpy()
+
+            for k, v in parallel_state.items():
+                np_t = v.numpy()
+                var = v.value().get_tensor()
+                var.set(np.zeros_like(np_t), place)
+
+                self.assertTrue(np.sum(np.abs(v.numpy())) == 0)
+
+            parallel_mlp.set_dict(base_para)
+
+            parallel_state = parallel_mlp.state_dict()
+
+            for k, v in parallel_state.items():
+                self.assertTrue(np.array_equal(v.numpy(), base_para[k]))
+
+            parallel_mlp.load_dict(base_para)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_save_load.py b/python/paddle/fluid/tests/unittests/test_imperative_save_load.py
index 01327ac647ffc69122e1fa489f88f3e207d6d2b2..6a621b8c75c0e3da85b2b6a03eece610c8094bf2 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_save_load.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_save_load.py
@@ -14,6 +14,7 @@
 
 from __future__ import print_function
 
+import os
 import unittest
 import paddle.fluid as fluid
 import paddle.fluid.core as core
@@ -879,9 +880,10 @@ class TestDygraphPtbRnn(unittest.TestCase):
         with fluid.dygraph.guard():
             emb = fluid.dygraph.Embedding([10, 10])
             state_dict = emb.state_dict()
-            fluid.save_dygraph(state_dict, "emb_dy")
+            fluid.save_dygraph(state_dict, os.path.join('saved_dy', 'emb_dy'))
 
-            para_state_dict, opti_state_dict = fluid.load_dygraph("emb_dy")
+            para_state_dict, opti_state_dict = fluid.load_dygraph(
+                os.path.join('saved_dy', 'emb_dy'))
 
             self.assertTrue(opti_state_dict == None)
 
diff --git a/python/paddle/fluid/tests/unittests/test_static_save_load.py b/python/paddle/fluid/tests/unittests/test_static_save_load.py
index 0dd767edc4c15cb496eeb7c3976224b5a0c6f9e0..24b61f514ce86d8a6e6d4726e847825c0af5c523 100644
--- a/python/paddle/fluid/tests/unittests/test_static_save_load.py
+++ b/python/paddle/fluid/tests/unittests/test_static_save_load.py
@@ -609,7 +609,7 @@ class TestProgramStatePartial(unittest.TestCase):
                     self.assertTrue(np.sum(np.abs(t)) != 0)
                     base_map[var.name] = t
 
-            fluid.save(main_program, "./test_1")
+            fluid.save(main_program, os.path.join('some_dir', 'test_1'))
 
             # set var to zero
             for var in main_program.list_vars():
@@ -623,7 +623,8 @@ class TestProgramStatePartial(unittest.TestCase):
                     self.assertTrue(np.sum(np.abs(new_t)) == 0)
 
             #fluid.load(test_program, "./test_1", None )
-            program_state = fluid.load_program_state("./test_1")
+            program_state = fluid.load_program_state(
+                os.path.join('some_dir', 'test_1'))
             fluid.set_program_state(test_program, program_state)
 
             for var in test_program.list_vars():