diff --git a/python/paddle/distribution.py b/python/paddle/distribution.py
index 7f0d71e3877f7c0c5ac8cd85d7eba6db60cfd718..d866f74b0e8b3b3a1bf9115b2389187b5fcde4f2 100644
--- a/python/paddle/distribution.py
+++ b/python/paddle/distribution.py
@@ -105,7 +105,7 @@ class Distribution(object):
         for arg in args:
             if isinstance(arg, float):
                 arg = [arg]
-            if not isinstance(arg, (list, np.ndarray, tensor.Variable)):
+            if not isinstance(arg, (list, tuple, np.ndarray, tensor.Variable)):
                 raise TypeError(
                     "Type of input args must be float, list, numpy.ndarray or Tensor, but received type {}".
                     format(type(arg)))
@@ -190,8 +190,8 @@ class Uniform(Distribution):
     [broadcasting](https://www.paddlepaddle.org.cn/documentation/docs/en/develop/beginners_guide/basic_concept/broadcasting_en.html) (e.g., `high - low` is a valid operation).
 
     Args:
-        low(int|float|list|numpy.ndarray|Tensor): The lower boundary of uniform distribution.The data type is int, float, list, numpy.ndarray or Tensor
-        high(int|float|list|numpy.ndarray|Tensor): The higher boundary of uniform distribution.The data type is int, float, list, numpy.ndarray or Tensor
+        low(int|float|list|tuple|numpy.ndarray|Tensor): The lower boundary of uniform distribution.The data type is int, float, list, numpy.ndarray or Tensor
+        high(int|float|list|tuple|numpy.ndarray|Tensor): The higher boundary of uniform distribution.The data type is int, float, list, numpy.ndarray or Tensor
         name(str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`.
 
     Examples:
@@ -229,10 +229,10 @@ class Uniform(Distribution):
     def __init__(self, low, high, name=None):
         if not in_dygraph_mode():
             check_type(low, 'low',
-                       (int, float, np.ndarray, tensor.Variable, list),
+                       (int, float, np.ndarray, tensor.Variable, list, tuple),
                        'Uniform')
             check_type(high, 'high',
-                       (int, float, np.ndarray, tensor.Variable, list),
+                       (int, float, np.ndarray, tensor.Variable, list, tuple),
                        'Uniform')
 
         self.all_arg_is_float = False
@@ -409,8 +409,8 @@ class Normal(Distribution):
     * :math:`Z`: is the normalization constant.
 
     Args:
-        loc(int|float|list|numpy.ndarray|Tensor): The mean of normal distribution.The data type is int, float, list, numpy.ndarray or Tensor.
-        scale(int|float|list|numpy.ndarray|Tensor): The std of normal distribution.The data type is int, float, list, numpy.ndarray or Tensor.
+        loc(int|float|list|tuple|numpy.ndarray|Tensor): The mean of normal distribution.The data type is int, float, list, numpy.ndarray or Tensor.
+        scale(int|float|list|tuple|numpy.ndarray|Tensor): The std of normal distribution.The data type is int, float, list, numpy.ndarray or Tensor.
         name(str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`.
 
     Examples:
@@ -451,10 +451,10 @@ class Normal(Distribution):
     def __init__(self, loc, scale, name=None):
         if not in_dygraph_mode():
             check_type(loc, 'loc',
-                       (int, float, np.ndarray, tensor.Variable, list),
+                       (int, float, np.ndarray, tensor.Variable, list, tuple),
                        'Normal')
             check_type(scale, 'scale',
-                       (int, float, np.ndarray, tensor.Variable, list),
+                       (int, float, np.ndarray, tensor.Variable, list, tuple),
                        'Normal')
 
         self.batch_size_unknown = False
@@ -655,7 +655,7 @@ class Categorical(Distribution):
     * :math:`[x=i]` : it evaluates to 1 if :math:`x==i` , 0 otherwise.
 
     Args:
-        logits(list|numpy.ndarray|Tensor): The logits input of categorical distribution. The data type is float32 or float64.
+        logits(list|tuple|numpy.ndarray|Tensor): The logits input of categorical distribution. The data type is float32 or float64.
         name(str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`.
 
     Examples:
@@ -702,11 +702,12 @@ class Categorical(Distribution):
     def __init__(self, logits, name=None):
         """
         Args:
-            logits(list|numpy.ndarray|Tensor): The logits input of categorical distribution. The data type is float32 or float64.
+            logits(list|tuple|numpy.ndarray|Tensor): The logits input of categorical distribution. The data type is float32 or float64.
             name(str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`.
         """
         if not in_dygraph_mode():
-            check_type(logits, 'logits', (np.ndarray, tensor.Variable, list),
+            check_type(logits, 'logits',
+                       (np.ndarray, tensor.Variable, list, tuple),
                        'Categorical')
 
         self.name = name if name is not None else 'Categorical'
diff --git a/python/paddle/fluid/backward.py b/python/paddle/fluid/backward.py
index 572ebb26d73cb435aaa1fb2d69b059511c193818..25412a86a8b940b9cba7210fbd17271955295bd1 100755
--- a/python/paddle/fluid/backward.py
+++ b/python/paddle/fluid/backward.py
@@ -1036,7 +1036,7 @@ def _append_backward_ops_(block,
             val(list) the op path of block(index)
     """
     if callbacks is not None:
-        assert (isinstance(callbacks, list))
+        assert (isinstance(callbacks, (list, tuple)))
         for cb in callbacks:
             if not hasattr(cb, '__call__'):
                 raise ValueError("'callback' must be a callable object.")
@@ -1157,7 +1157,7 @@ def _append_backward_ops_(block,
         new_op_desc._set_attr(op_role_attr_name, backward)
         grad_to_var["__current_op_desc__"] = new_op_desc
         if callbacks is not None:
-            assert (isinstance(callbacks, list))
+            assert (isinstance(callbacks, (list, tuple)))
             for cb in callbacks:
                 cb(block=target_block, context=grad_to_var)
 
@@ -1380,7 +1380,7 @@ def append_backward(loss,
 
     Parameters:
         loss(Tensor): The loss Tensor of the network.
-        parameter_list(list[Tensor|str], optional): List of Parameters or Parameter.names
+        parameter_list(list[Tensor|str]|tuple[Tensor|str], optional): List/Tuple of Parameters or Parameter.names
                                            that need to be updated by optimizers.
                                            If it is None, all parameters
                                            will be updated.
@@ -1391,7 +1391,7 @@ def append_backward(loss,
                                be automatically added into this set.
                                If this parameter is not None, the Tensors or Tensor.names in this set will be added to the default set.
                                Default: None.
-        callbacks(list[callable object], optional): List of callback functions.
+        callbacks(list[callable object]|tuple[callable object], optional): List/Tuple of callback functions.
                                                The callbacks are used for
                                                doing some custom jobs during
                                                backward part building. All
@@ -1477,7 +1477,7 @@ def append_backward(loss,
                       int(core.op_proto_and_checker_maker.OpRole.Loss))
 
     if callbacks is not None:
-        check_type(callbacks, 'callbacks', list,
+        check_type(callbacks, 'callbacks', (list, tuple),
                    'paddle.static.append_backward')
 
     program = loss.block.program
@@ -1823,9 +1823,9 @@ def calc_gradient(targets, inputs, target_gradients=None, no_grad_set=None):
     Backpropagate the gradients of targets to inputs.
 
     Args:
-        targets(Tensor|list[Tensor]): The target Tensors
-        inputs(Tensor|list[Tensor]): The input Tensors
-        target_gradients (Tensor|list[Tensor], optional): The gradient Tensors
+        targets(Tensor|list[Tensor]|tuple[Tensor]): The target Tensors
+        inputs(Tensor|list[Tensor]|tuple[Tensor]): The input Tensors
+        target_gradients (Tensor|list[Tensor]|tuple[Tensor], optional): The gradient Tensors
             of targets which has the same shape with targets, If None, ones will
             be created for them.
         no_grad_set(set[Tensor|str], optional): Set of Tensors or Tensor.names in the :ref:`api_guide_Block_en` 0 whose gradients
@@ -1962,9 +1962,9 @@ def gradients(targets, inputs, target_gradients=None, no_grad_set=None):
     Backpropagate the gradients of targets to inputs.
 
     Args:
-        targets (Tensor|list[Tensor]): The target Tensors.
-        inputs (Tensor|list[Tensor]): The input Tensors.
-        target_gradients (Tensor|list[Tensor], optional): The gradient Tensor
+        targets (Tensor|list[Tensor]|tuple[Tensor]): The target Tensors.
+        inputs (Tensor|list[Tensor]|tuple[Tensor]): The input Tensors.
+        target_gradients (Tensor|list[Tensor]|tuple[Tensor], optional): The gradient Tensor
             of targets which has the same shape with targets, If None, ones will
             be created for them.
         no_grad_set (set[Tensor|str], optional): Set of Tensors or Tensor.names in the :ref:`api_guide_Block_en` 0 whose gradients
@@ -1992,12 +1992,12 @@ def gradients(targets, inputs, target_gradients=None, no_grad_set=None):
             z = paddle.static.gradients([y], x)
             print(z) # [var x@GRAD : fluid.VarType.LOD_TENSOR.shape(-1L, 2L, 8L, 8L).astype(VarType.FP32)]
     """
-    check_type(targets, 'targets', (framework.Variable, list),
+    check_type(targets, 'targets', (framework.Variable, list, tuple),
                'paddle.static.gradients')
-    check_type(inputs, 'inputs', (framework.Variable, list),
+    check_type(inputs, 'inputs', (framework.Variable, list, tuple),
                'paddle.static.gradients')
     check_type(target_gradients, 'target_gradients', (
-        framework.Variable, list, type(None)), 'paddle.static.gradients')
+        framework.Variable, list, tuple, type(None)), 'paddle.static.gradients')
 
     outs = calc_gradient(targets, inputs, target_gradients, no_grad_set)
     return _as_list(outs)
diff --git a/python/paddle/fluid/dygraph/container.py b/python/paddle/fluid/dygraph/container.py
index 345b71d8999ebd6c6a4e587a1f0c4f803c32c929..c7ea412fec1b77cf0dd86c187250e8ac499a800b 100644
--- a/python/paddle/fluid/dygraph/container.py
+++ b/python/paddle/fluid/dygraph/container.py
@@ -29,7 +29,7 @@ class Sequential(Layer):
     The argument passed to the constructor can be iterable Layers or iterable name Layer pairs.
 
     Parameters:
-        *layers(tuple): Layers or iterable name Layer pairs.
+        layers(Layer|list|tuple): Layer or list/tuple of iterable name Layer pair.
 
     Examples:
         .. code-block:: python
@@ -59,7 +59,7 @@ class Sequential(Layer):
 
     def __init__(self, *layers):
         super(Sequential, self).__init__()
-        if len(layers) > 0 and isinstance(layers[0], tuple):
+        if len(layers) > 0 and isinstance(layers[0], (list, tuple)):
             for name, layer in layers:
                 self.add_sublayer(name, layer)
         else:
diff --git a/python/paddle/fluid/dygraph/jit.py b/python/paddle/fluid/dygraph/jit.py
index 40ab19184c9c8c4e2e6ca6753bb3dcb3b459b2eb..4c7c7b17eb1c47e7f6f3bda6d32a90892033704f 100644
--- a/python/paddle/fluid/dygraph/jit.py
+++ b/python/paddle/fluid/dygraph/jit.py
@@ -168,7 +168,7 @@ def declarative(function=None, input_spec=None):
 
     Args:
         function (callable): callable imperative function.
-        input_spec(list[InputSpec]): list of InputSpec to specific the shape/dtype/name
+        input_spec(list[InputSpec]|tuple[InputSpec]): list/tuple of InputSpec to specific the shape/dtype/name
             information of each input Tensor.
 
     Returns:
@@ -525,7 +525,7 @@ def save(layer, path, input_spec=None, **configs):
     Args:
         layer (Layer): The Layer to be saved.
         path (str): The path prefix to save model. The format is ``dirname/file_prefix`` or ``file_prefix``.
-        input_spec (list[InputSpec|Tensor], optional): Describes the input of the saved model's forward
+        input_spec (list[InputSpec|Tensor]|tuple[InputSpec|Tensor], optional): Describes the input of the saved model's forward
             method, which can be described by InputSpec or example Tensor. If None, all input variables of
             the original Layer's forward method would be the inputs of the saved model. Default None.
         **configs (dict, optional): Other save configuration options for compatibility. We do not
@@ -654,7 +654,7 @@ def save(layer, path, input_spec=None, **configs):
                 raise ValueError(
                     "If there are static functions other than 'forward' that need to be saved, the input 'input_spec' should be None, but received the type of 'input_spec' is %s."
                     % type(input_spec))
-        if not isinstance(input_spec, list):
+        if not isinstance(input_spec, (list, tuple)):
             raise TypeError(
                 "The input input_spec should be 'list', but received input_spec's type is %s."
                 % type(input_spec))
diff --git a/python/paddle/fluid/tests/unittests/test_distribution.py b/python/paddle/fluid/tests/unittests/test_distribution.py
index d5790811df94f3938faeeb6efa1cb51090366787..f1c12c90490c2513e945900f67030ac5c22c3409 100644
--- a/python/paddle/fluid/tests/unittests/test_distribution.py
+++ b/python/paddle/fluid/tests/unittests/test_distribution.py
@@ -301,6 +301,41 @@ class UniformTest9(UniformTest):
                 name='values', shape=[dims], dtype='float32')
 
 
+class UniformTest10(UniformTest):
+    def init_numpy_data(self, batch_size, dims):
+        # low and high are list.
+        self.low_np = np.random.randn(batch_size,
+                                      dims).astype('float32').tolist()
+        self.high_np = np.random.uniform(
+            5.0, 15.0, (batch_size, dims)).astype('float32').tolist()
+        self.values_np = np.random.randn(batch_size, dims).astype('float32')
+
+    def init_static_data(self, batch_size, dims):
+        self.static_low = self.low_np
+        self.static_high = self.high_np
+        with fluid.program_guard(self.test_program):
+            self.static_values = layers.data(
+                name='values', shape=[dims], dtype='float32')
+
+
+class UniformTest11(UniformTest):
+    def init_numpy_data(self, batch_size, dims):
+        # low and high are tuple.
+        self.low_np = tuple(
+            np.random.randn(batch_size, dims).astype('float32').tolist())
+        self.high_np = tuple(
+            np.random.uniform(5.0, 15.0, (batch_size, dims)).astype('float32')
+            .tolist())
+        self.values_np = np.random.randn(batch_size, dims).astype('float32')
+
+    def init_static_data(self, batch_size, dims):
+        self.static_low = self.low_np
+        self.static_high = self.high_np
+        with fluid.program_guard(self.test_program):
+            self.static_values = layers.data(
+                name='values', shape=[dims], dtype='float32')
+
+
 class NormalNumpy(DistributionNumpy):
     def __init__(self, loc, scale):
         self.loc = np.array(loc)
@@ -673,6 +708,66 @@ class NormalTest8(NormalTest):
                 name='other_scale', shape=[dims], dtype='float64')
 
 
+class NormalTest9(NormalTest):
+    def init_numpy_data(self, batch_size, dims):
+        # loc and scale are list.
+        self.loc_np = np.random.randn(batch_size,
+                                      dims).astype('float32').tolist()
+        self.scale_np = np.random.randn(batch_size, dims).astype('float32')
+        while not np.all(self.scale_np > 0):
+            self.scale_np = np.random.randn(batch_size, dims).astype('float32')
+        self.scale_np = self.scale_np.tolist()
+        self.values_np = np.random.randn(batch_size, dims).astype('float32')
+        # used to construct another Normal object to calculate kl_divergence
+        self.other_loc_np = np.random.randn(batch_size,
+                                            dims).astype('float32').tolist()
+        self.other_scale_np = np.random.randn(batch_size,
+                                              dims).astype('float32')
+        while not np.all(self.other_scale_np > 0):
+            self.other_scale_np = np.random.randn(batch_size,
+                                                  dims).astype('float32')
+        self.other_scale_np = self.other_scale_np.tolist()
+
+    def init_static_data(self, batch_size, dims):
+        self.static_loc = self.loc_np
+        self.static_scale = self.scale_np
+        self.static_other_loc = self.other_loc_np
+        self.static_other_scale = self.other_scale_np
+        with fluid.program_guard(self.test_program):
+            self.static_values = layers.data(
+                name='values', shape=[dims], dtype='float32')
+
+
+class NormalTest10(NormalTest):
+    def init_numpy_data(self, batch_size, dims):
+        # loc and scale are tuple.
+        self.loc_np = tuple(
+            np.random.randn(batch_size, dims).astype('float32').tolist())
+        self.scale_np = np.random.randn(batch_size, dims).astype('float32')
+        while not np.all(self.scale_np > 0):
+            self.scale_np = np.random.randn(batch_size, dims).astype('float32')
+        self.scale_np = tuple(self.scale_np.tolist())
+        self.values_np = np.random.randn(batch_size, dims).astype('float32')
+        # used to construct another Normal object to calculate kl_divergence
+        self.other_loc_np = tuple(
+            np.random.randn(batch_size, dims).astype('float32').tolist())
+        self.other_scale_np = np.random.randn(batch_size,
+                                              dims).astype('float32')
+        while not np.all(self.other_scale_np > 0):
+            self.other_scale_np = np.random.randn(batch_size,
+                                                  dims).astype('float32')
+        self.other_scale_np = tuple(self.other_scale_np.tolist())
+
+    def init_static_data(self, batch_size, dims):
+        self.static_loc = self.loc_np
+        self.static_scale = self.scale_np
+        self.static_other_loc = self.other_loc_np
+        self.static_other_scale = self.other_scale_np
+        with fluid.program_guard(self.test_program):
+            self.static_values = layers.data(
+                name='values', shape=[dims], dtype='float32')
+
+
 class CategoricalNumpy(DistributionNumpy):
     def __init__(self, logits):
         self.logits = np.array(logits).astype('float32')
@@ -961,6 +1056,38 @@ class CategoricalTest7(CategoricalTest):
         return np_probs
 
 
+class CategoricalTest8(CategoricalTest):
+    def init_dynamic_data(self, batch_size, dims):
+        # input logtis is 2-D list
+        # value used in probs and log_prob method is 1-D Tensor
+        self.logits = self.logits_np.tolist()
+        self.other_logits = self.other_logits_np.tolist()
+        self.value = paddle.to_tensor(self.value_np)
+
+    def init_static_data(self, batch_size, dims):
+        with fluid.program_guard(self.test_program):
+            self.logits_static = self.logits_np.tolist()
+            self.other_logits_static = self.other_logits_np.tolist()
+            self.value_static = fluid.data(
+                name='value', shape=self.value_shape, dtype='int64')
+
+
+class CategoricalTest9(CategoricalTest):
+    def init_dynamic_data(self, batch_size, dims):
+        # input logtis is 2-D tuple
+        # value used in probs and log_prob method is 1-D Tensor
+        self.logits = tuple(self.logits_np.tolist())
+        self.other_logits = tuple(self.other_logits_np.tolist())
+        self.value = paddle.to_tensor(self.value_np)
+
+    def init_static_data(self, batch_size, dims):
+        with fluid.program_guard(self.test_program):
+            self.logits_static = tuple(self.logits_np.tolist())
+            self.other_logits_static = tuple(self.other_logits_np.tolist())
+            self.value_static = fluid.data(
+                name='value', shape=self.value_shape, dtype='int64')
+
+
 class DistributionTestError(unittest.TestCase):
     def test_distribution_error(self):
         distribution = Distribution()
diff --git a/python/paddle/fluid/tests/unittests/test_dropout_op.py b/python/paddle/fluid/tests/unittests/test_dropout_op.py
index ba2abd72500788c4bbacf3c12d4ba711da1b01f3..89755d0365f2cb64ed2fd561ebcf169a89fc8e20 100644
--- a/python/paddle/fluid/tests/unittests/test_dropout_op.py
+++ b/python/paddle/fluid/tests/unittests/test_dropout_op.py
@@ -303,6 +303,12 @@ class TestDropoutFAPI(unittest.TestCase):
                 mode='downscale_in_infer')
             res10 = paddle.nn.functional.dropout(x=input, p=1., training=True)
             res11 = paddle.fluid.layers.dropout(x=input, dropout_prob=0.)
+            res12 = paddle.nn.functional.dropout(
+                x=input,
+                p=0.,
+                axis=(0, 1),
+                training=False,
+                mode='upscale_in_train')
 
             in_np = np.random.random([40, 40]).astype("float32")
             res_np = in_np
@@ -310,7 +316,8 @@ class TestDropoutFAPI(unittest.TestCase):
 
             exe = fluid.Executor(place)
             res_list = [
-                res1, res2, res3, res4, res5, res6, res7, res8, res9, res11
+                res1, res2, res3, res4, res5, res6, res7, res8, res9, res11,
+                res12
             ]
             for res in res_list:
                 fetches = exe.run(fluid.default_main_program(),
@@ -388,9 +395,16 @@ class TestDropoutFAPI(unittest.TestCase):
                     x=input, p=1., training=True)
                 dropout = paddle.fluid.dygraph.Dropout(p=0, )
                 res11 = dropout(input)
+                res12 = paddle.nn.functional.dropout(
+                    x=input,
+                    p=0.,
+                    axis=(0, 1),
+                    training=False,
+                    mode='upscale_in_train')
 
             res_list = [
-                res1, res2, res3, res4, res5, res6, res7, res8, res9, res11
+                res1, res2, res3, res4, res5, res6, res7, res8, res9, res11,
+                res12
             ]
             for res in res_list:
                 self.assertTrue(np.allclose(res.numpy(), res_np))
diff --git a/python/paddle/fluid/tests/unittests/test_imperative_container_sequential.py b/python/paddle/fluid/tests/unittests/test_imperative_container_sequential.py
index 846c84c8a58b5c4c437270be525af2f0fa5608c2..972f1b64e1407129db84459fb1d4fd4640a9ab0d 100644
--- a/python/paddle/fluid/tests/unittests/test_imperative_container_sequential.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_container_sequential.py
@@ -55,6 +55,41 @@ class TestImperativeContainerSequential(unittest.TestCase):
             loss2 = fluid.layers.reduce_mean(res2)
             loss2.backward()
 
+    def test_sequential_list_params(self):
+        data = np.random.uniform(-1, 1, [5, 10]).astype('float32')
+        with fluid.dygraph.guard():
+            data = fluid.dygraph.to_variable(data)
+            model1 = fluid.dygraph.Sequential(
+                fluid.Linear(10, 1), fluid.Linear(1, 2))
+            res1 = model1(data)
+            self.assertListEqual(res1.shape, [5, 2])
+            model1[1] = fluid.Linear(1, 3)
+            res1 = model1(data)
+            self.assertListEqual(res1.shape, [5, 3])
+            loss1 = fluid.layers.reduce_mean(res1)
+            loss1.backward()
+
+            l1 = fluid.Linear(10, 1)
+            l2 = fluid.Linear(1, 3)
+            model2 = fluid.dygraph.Sequential(['l1', l1], ['l2', l2])
+            self.assertEqual(len(model2), 2)
+            res2 = model2(data)
+            self.assertTrue(l1 is model2.l1)
+            self.assertListEqual(res2.shape, res1.shape)
+            self.assertEqual(len(model1.parameters()), len(model2.parameters()))
+            del model2['l2']
+            self.assertEqual(len(model2), 1)
+            res2 = model2(data)
+            self.assertListEqual(res2.shape, [5, 1])
+            model2.add_sublayer('l3', fluid.Linear(1, 3))
+            model2.add_sublayer('l4', fluid.Linear(3, 4))
+            self.assertEqual(len(model2), 3)
+            res2 = model2(data)
+            self.assertListEqual(res2.shape, [5, 4])
+
+            loss2 = fluid.layers.reduce_mean(res2)
+            loss2.backward()
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/test_initializer_nn.py b/python/paddle/fluid/tests/unittests/test_initializer_nn.py
index 08ec516ba95b0e53131a7742e870ad5de8ddf516..9ec78366226f81f31b30f0b9d5d6e00e564d62ee 100644
--- a/python/paddle/fluid/tests/unittests/test_initializer_nn.py
+++ b/python/paddle/fluid/tests/unittests/test_initializer_nn.py
@@ -718,6 +718,18 @@ class TestAssign(unittest.TestCase):
 
         self.assertTrue((linear_3.weight.numpy() == [2.0, 2.0]).all(), '')
 
+    def test_assign_initializer_dygraph_4(self):
+        """Test assign initializer in dygraph model.
+        """
+        paddle.disable_static()
+
+        weight_attr_4 = paddle.framework.ParamAttr(
+            name="linear_weight_4",
+            initializer=paddle.nn.initializer.Assign((2, 2)))
+        linear_4 = paddle.nn.Linear(2, 2, weight_attr=weight_attr_4)
+
+        self.assertTrue((linear_4.weight.numpy() == [2.0, 2.0]).all(), '')
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/test_jit_save_load.py b/python/paddle/fluid/tests/unittests/test_jit_save_load.py
index bf9912c89cb8736c17516d4498535f157fb2c914..16adcb8f241ea2d1734282c72e644f13eba10341 100644
--- a/python/paddle/fluid/tests/unittests/test_jit_save_load.py
+++ b/python/paddle/fluid/tests/unittests/test_jit_save_load.py
@@ -158,6 +158,22 @@ class LinearNetMultiInput(fluid.dygraph.Layer):
         return x_out, y_out, loss
 
 
+class LinearNetMultiInput1(fluid.dygraph.Layer):
+    def __init__(self, in_size, out_size):
+        super(LinearNetMultiInput1, self).__init__()
+        self._linear1 = Linear(in_size, out_size)
+        self._linear2 = Linear(in_size, out_size)
+
+    @declarative(input_spec=(InputSpec(
+        [None, 8], dtype='float32'), InputSpec(
+            [None, 8], dtype='float32')))
+    def forward(self, x, y):
+        x_out = self._linear1(x)
+        y_out = self._linear2(y)
+        loss = fluid.layers.mean(x_out + y_out)
+        return x_out, y_out, loss
+
+
 class MultiLoadingLinearNet(fluid.dygraph.Layer):
     def __init__(self, size, model_path):
         super(MultiLoadingLinearNet, self).__init__()
@@ -542,6 +558,42 @@ class TestSaveLoadWithInputSpec(unittest.TestCase):
         # 4. assert pred_x == pred_xx
         self.assertTrue(np.allclose(pred_x.numpy(), pred_xx.numpy()))
 
+    def test_multi_in_out1(self):
+        net = LinearNetMultiInput1(8, 8)
+
+        model_path = "multi_inout1.output_spec1/model"
+        # 1. check inputs and outputs
+        self.assertTrue(len(net.forward.inputs) == 2)
+        input_x = net.forward.inputs[0]
+        input_y = net.forward.inputs[1]
+        self.assertTrue(input_x.shape == (-1, 8))
+        self.assertTrue(input_y.shape == (-1, 8))
+
+        # 2. prune loss
+        output_spec = net.forward.outputs[:2]
+        paddle.jit.save(net, model_path, output_spec=output_spec)
+
+        # 3. load to infer
+        infer_layer = paddle.jit.load(model_path)
+        x = fluid.dygraph.to_variable(
+            np.random.random((4, 8)).astype('float32'))
+        y = fluid.dygraph.to_variable(
+            np.random.random((4, 8)).astype('float32'))
+        # 4. predict
+        pred_x, pred_y = infer_layer(x, y)
+
+        # 1. prune y and loss
+        model_path = "multi_inout1.output_spec2/model"
+        output_spec = net.forward.outputs[:1]
+        paddle.jit.save(net, model_path, (input_x, ), output_spec=output_spec)
+        # 2. load again
+        infer_layer2 = paddle.jit.load(model_path)
+        # 3. predict
+        pred_xx = infer_layer2(x)
+
+        # 4. assert pred_x == pred_xx
+        self.assertTrue(np.allclose(pred_x.numpy(), pred_xx.numpy()))
+
 
 class TestJitSaveLoadConfig(unittest.TestCase):
     def setUp(self):
diff --git a/python/paddle/hapi/model.py b/python/paddle/hapi/model.py
index 6cd879c388c1f6f8d5914e144841e8678119cb45..5a33d5b58dc1ae8e2b7a0878973d56ba28e3fa9a 100644
--- a/python/paddle/hapi/model.py
+++ b/python/paddle/hapi/model.py
@@ -236,7 +236,7 @@ def _update_input_info(inputs):
     if isinstance(inputs, Input):
         shapes = [list(inputs.shape)]
         dtypes = [inputs.dtype]
-    elif isinstance(inputs, list):
+    elif isinstance(inputs, (list, tuple)):
         shapes = [list(input.shape) for input in inputs]
         dtypes = [input.dtype for input in inputs]
     elif isinstance(inputs, dict):
@@ -895,12 +895,12 @@ class Model(object):
     Args:
         network (paddle.nn.Layer): The network is an instance of
             paddle.nn.Layer.
-        inputs (InputSpec|list|dict|None): `inputs`, entry points of network,
-            could be a InputSpec instance, or lits of InputSpec instances,
+        inputs (InputSpec|list|tuple|dict|None): `inputs`, entry points of network,
+            could be a InputSpec instance, or list/tuple of InputSpec instances,
             or dict ({name: InputSpec}), and it couldn't be None in static
             graph.
-        labels (InputSpec|list|None): `labels`, entry points of network,
-            could be a InputSpec instnace or lits of InputSpec instances,
+        labels (InputSpec|list|tuple|None): `labels`, entry points of network,
+            could be a InputSpec instnace or list/tuple of InputSpec instances,
             or None. For static graph, if labels is required in loss,
             labels must be set. Otherwise, it could be None.
 
@@ -994,9 +994,10 @@ class Model(object):
         self.stop_training = False
 
         if not in_dygraph_mode():
-            if not isinstance(inputs, (list, dict, Input)):
+            if not isinstance(inputs, (list, tuple, dict, Input)):
                 raise TypeError(
-                    "'inputs' must be list or dict, and couldn't be None.")
+                    "'inputs' must be list or tuple or dict, and couldn't be None."
+                )
         elif inputs:
             self._input_info = _update_input_info(inputs)
 
diff --git a/python/paddle/nn/functional/common.py b/python/paddle/nn/functional/common.py
index 0859d05af1cf90404024e5dcfe2a2b9e49ea54b1..5e8dc15cb4a301fbcc0c976656122bcccfeeedfd 100644
--- a/python/paddle/nn/functional/common.py
+++ b/python/paddle/nn/functional/common.py
@@ -764,8 +764,8 @@ def dropout(x,
 
     Args:
         x (Tensor): The input tensor. The data type is float32 or float64.
-        p (float | int): Probability of setting units to zero. Default 0.5.
-        axis (int | list): The axis along which the dropout is performed. Default None.
+        p (float|int): Probability of setting units to zero. Default 0.5.
+        axis (int|list|tuple): The axis along which the dropout is performed. Default None.
         training (bool): A flag indicating whether it is in train phrase or not. Default True.
         mode(str): ['upscale_in_train'(default) | 'downscale_in_infer'].
 
@@ -896,7 +896,7 @@ def dropout(x,
     if mode not in ('downscale_in_infer', 'upscale_in_train'):
         raise ValueError(
             "mode argument should be 'downscale_in_infer' or 'upscale_in_train'")
-    if axis and not isinstance(axis, (int, list)):
+    if axis and not isinstance(axis, (int, list, tuple)):
         raise TypeError("datatype of axis argument should be int or list")
 
     if axis == None:  # commonly used dropout
@@ -955,7 +955,7 @@ def dropout(x,
 
             #get mask shape
             input_shape = x.shape
-            drop_axes = [axis] if isinstance(axis, int) else axis
+            drop_axes = [axis] if isinstance(axis, int) else list(axis)
             if min(drop_axes) < 0 or max(drop_axes) > len(input_shape) - 1:
                 raise ValueError("axis value should be greater than or equal to 0 and less than dimensions of x:{}, but get axis value:{} " \
                                  .format(len(input_shape), max(drop_axes)))
diff --git a/python/paddle/nn/initializer/assign.py b/python/paddle/nn/initializer/assign.py
index a33301230e89e14b4d0d7c87bf7fa2dcc55ef179..94c4ddc1938823653db6ef78a823430928c724ff 100644
--- a/python/paddle/nn/initializer/assign.py
+++ b/python/paddle/nn/initializer/assign.py
@@ -26,7 +26,7 @@ class Assign(NumpyArrayInitializer):
     """Init an parameter with a numpy array, list, or tensor.
 
     Args:
-        value (Tensor|numpy.ndarray|list): numpy array, list, or tensor to initialize the parameter.
+        value (Tensor|numpy.ndarray|list|tuple): numpy array, list, tuple, or tensor to initialize the parameter.
         name(str, optional): The default value is None. Normally there is no need for user to set this
             property. For more information, please refer to :ref:`api_guide_Name`.
 
@@ -87,10 +87,10 @@ class Assign(NumpyArrayInitializer):
 
     def __init__(self, value, name=None):
         import numpy
-        check_type(value, 'value', (numpy.ndarray, list, framework.Variable),
-                   'Assign')
+        check_type(value, 'value',
+                   (numpy.ndarray, list, tuple, framework.Variable), 'Assign')
 
-        if (isinstance(value, list)):
+        if (isinstance(value, (list, tuple))):
             value = numpy.array(value)
 
         # TODO: value is already is a tensor, accounting efficiency maybe it does not need to convert tensor to numpy data and then initialized.
diff --git a/python/paddle/nn/layer/common.py b/python/paddle/nn/layer/common.py
index 2f71e5470fd951b8621b1a5f85b9633fb90f5068..db0a5a5cab3aa778203273aec800a660f382d0e7 100644
--- a/python/paddle/nn/layer/common.py
+++ b/python/paddle/nn/layer/common.py
@@ -680,8 +680,8 @@ class Dropout(layers.Layer):
     In dygraph mode, please use ``eval()`` to switch to evaluation mode, where dropout is disabled.
 
     Parameters:
-        p (float | int): Probability of setting units to zero. Default: 0.5
-        axis (int | list): The axis along which the dropout is performed. Default None.
+        p (float|int): Probability of setting units to zero. Default: 0.5
+        axis (int|list|tuple): The axis along which the dropout is performed. Default None.
         mode(str, optional): ['upscale_in_train'(default) | 'downscale_in_infer']
 
                                1. upscale_in_train(default), upscale the output at training time
diff --git a/python/paddle/tests/test_model.py b/python/paddle/tests/test_model.py
index 10ceb48796903864b979cc21534206d2d936cbcd..ae574a8241bfffccea7c9d0e7fe71a83a710e778 100644
--- a/python/paddle/tests/test_model.py
+++ b/python/paddle/tests/test_model.py
@@ -172,6 +172,12 @@ class TestModel(unittest.TestCase):
     def test_fit_static(self):
         self.fit(False)
 
+    def test_fit_dynamic_with_tuple_input(self):
+        self.fit_with_tuple_input(True)
+
+    def test_fit_static_with_tuple_input(self):
+        self.fit_with_tuple_input(False)
+
     def test_fit_dynamic_with_rank(self):
         self.fit(True, 2, 0)
 
@@ -240,6 +246,53 @@ class TestModel(unittest.TestCase):
         model.fit(train_loader, val_loader)
         fluid.disable_dygraph() if dynamic else None
 
+    def fit_with_tuple_input(self, dynamic, num_replicas=None, rank=None):
+        fluid.enable_dygraph(self.device) if dynamic else None
+        seed = 333
+        paddle.seed(seed)
+        paddle.framework.random._manual_program_seed(seed)
+
+        net = LeNet()
+        optim_new = fluid.optimizer.Adam(
+            learning_rate=0.001, parameter_list=net.parameters())
+        model = Model(net, inputs=tuple(self.inputs), labels=tuple(self.labels))
+        model.prepare(
+            optim_new,
+            loss=CrossEntropyLoss(reduction="sum"),
+            metrics=Accuracy())
+        model.fit(self.train_dataset, batch_size=64, shuffle=False)
+
+        result = model.evaluate(self.val_dataset, batch_size=64)
+        np.testing.assert_allclose(result['acc'], self.acc1)
+
+        train_sampler = DistributedBatchSampler(
+            self.train_dataset,
+            batch_size=64,
+            shuffle=False,
+            num_replicas=num_replicas,
+            rank=rank)
+        val_sampler = DistributedBatchSampler(
+            self.val_dataset,
+            batch_size=64,
+            shuffle=False,
+            num_replicas=num_replicas,
+            rank=rank)
+
+        train_loader = fluid.io.DataLoader(
+            self.train_dataset,
+            batch_sampler=train_sampler,
+            places=self.device,
+            return_list=True)
+
+        val_loader = fluid.io.DataLoader(
+            self.val_dataset,
+            batch_sampler=val_sampler,
+            places=self.device,
+            return_list=True)
+
+        model.fit(train_loader, val_loader)
+        fluid.disable_dygraph() if dynamic else None
+
     def evaluate(self, dynamic):
         fluid.enable_dygraph(self.device) if dynamic else None
         model = Model(LeNet(), self.inputs, self.labels)
diff --git a/python/paddle/tests/test_transforms.py b/python/paddle/tests/test_transforms.py
index 47977bdf5352bb867b69ed648492fb9a060a13c9..5086a12d945bcc107aef82d0c9b80edb2e42766b 100644
--- a/python/paddle/tests/test_transforms.py
+++ b/python/paddle/tests/test_transforms.py
@@ -454,6 +454,18 @@ class TestFunctional(unittest.TestCase):
         np.testing.assert_equal(rotated_np_img.shape,
                                 np.array(rotated_pil_img).shape)
 
+    def test_rotate1(self):
+        np_img = (np.random.rand(28, 28, 3) * 255).astype('uint8')
+        pil_img = Image.fromarray(np_img).convert('RGB')
+
+        rotated_np_img = F.rotate(
+            np_img, 80, expand=True, center=[0, 0], fill=[0, 0, 0])
+        rotated_pil_img = F.rotate(
+            pil_img, 80, expand=True, center=[0, 0], fill=[0, 0, 0])
+
+        np.testing.assert_equal(rotated_np_img.shape,
+                                np.array(rotated_pil_img).shape)
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/python/paddle/vision/transforms/functional.py b/python/paddle/vision/transforms/functional.py
index da90e4907e410a1a8587f33812c515f5106526fd..c65c2423d131900b62e5ee191288f029ae708776 100644
--- a/python/paddle/vision/transforms/functional.py
+++ b/python/paddle/vision/transforms/functional.py
@@ -538,10 +538,10 @@ def rotate(img,
             If true, expands the output image to make it large enough to hold the entire rotated image.
             If false or omitted, make the output image the same size as the input image.
             Note that the expand flag assumes rotation around the center and no translation.
-        center (2-tuple, optional): Optional center of rotation.
+        center (2-list|2-tuple, optional): Optional center of rotation.
             Origin is the upper left corner.
             Default is the center of the image.
-        fill (3-tuple or int): RGB pixel fill value for area outside the rotated image.
+        fill (3-list|3-tuple or int): RGB pixel fill value for area outside the rotated image.
             If int, it is used for all channels respectively.
 
 
@@ -568,6 +568,11 @@ def rotate(img,
             'img should be PIL Image or ndarray with dim=[2 or 3]. Got {}'.
             format(type(img)))
 
+    if isinstance(center, list):
+        center = tuple(center)
+    if isinstance(fill, list):
+        fill = tuple(fill)
+
     if _is_pil_image(img):
         return F_pil.rotate(img, angle, interpolation, expand, center, fill)
     else: