diff --git a/python/paddle/fluid/initializer.py b/python/paddle/fluid/initializer.py
index f09097b57bd7192923b638647ea7213b2c8d638a..df42516a43eaab8543928570cde233b5f34bcb29 100644
--- a/python/paddle/fluid/initializer.py
+++ b/python/paddle/fluid/initializer.py
@@ -676,20 +676,23 @@ class MSRAInitializer(Initializer):
 
     .. math::
 
-        x = \sqrt{\\frac{6.0}{fan\_in}}
+        x = gain \times \sqrt{\frac{3}{fan\_in}}
 
     In case of Normal distribution, the mean is 0 and the standard deviation
     is
 
     .. math::
 
-        \sqrt{\\frac{2.0}{fan\_in}}
+        \frac{gain}{\sqrt{{fan\_in}}}
 
     Args:
         uniform (bool): whether to use uniform or normal distribution
-        fan_in (float32|None): fan_in for MSRAInitializer. If None, it is\
-        inferred from the variable. default is None.
+        fan_in (float32|None): fan_in (in_features) of trainable Tensor,\
+        If None, it will be infered automaticly. If you don't want to use in_features of the Tensor,\
+        you can set the value of 'fan_in' smartly by yourself. default is None.
         seed (int32): random seed
+        negative_slope (float, optional): negative_slope (only used with leaky_relu). default is 0.0.
+        nonlinearity(str, optional): the non-linear function. default is relu.
 
     Note:
         It is recommended to set fan_in to None for most cases.
@@ -706,7 +709,12 @@ class MSRAInitializer(Initializer):
 
     """
 
-    def __init__(self, uniform=True, fan_in=None, seed=0):
+    def __init__(self,
+                 uniform=True,
+                 fan_in=None,
+                 seed=0,
+                 negative_slope=0,
+                 nonlinearity='relu'):
         """Constructor for MSRAInitializer
         """
         assert uniform is not None
@@ -715,6 +723,8 @@ class MSRAInitializer(Initializer):
         self._uniform = uniform
         self._fan_in = fan_in
         self._seed = seed
+        self._negative_slope = negative_slope
+        self._nonlinearity = nonlinearity
 
     def __call__(self, var, block=None):
         """Initialize the input tensor with MSRA initialization.
@@ -755,13 +765,16 @@ class MSRAInitializer(Initializer):
 
         if framework._non_static_mode():
             if self._uniform:
-                limit = math.sqrt(6.0 / float(fan_in))
+                gain = calculate_gain(self._nonlinearity, self._negative_slope)
+                limit = gain * math.sqrt(3.0 / float(fan_in))
+
                 out_var = _C_ops.uniform_random('shape', out_var.shape, 'min',
                                                 -limit, 'max', limit, 'seed',
                                                 self._seed, 'dtype',
                                                 int(out_dtype))
             else:
-                std = math.sqrt(2.0 / float(fan_in))
+                gain = calculate_gain(self._nonlinearity, self._negative_slope)
+                std = gain / math.sqrt(float(fan_in))
                 if in_dygraph_mode():
                     place = _current_expected_place()
                     out_var = _C_ops.final_state_gaussian_random(
@@ -783,7 +796,8 @@ class MSRAInitializer(Initializer):
             return None
         else:
             if self._uniform:
-                limit = math.sqrt(6.0 / float(fan_in))
+                gain = calculate_gain(self._nonlinearity, self._negative_slope)
+                limit = gain * math.sqrt(3.0 / float(fan_in))
                 op = block.append_op(type="uniform_random",
                                      inputs={},
                                      outputs={"Out": out_var},
@@ -797,7 +811,8 @@ class MSRAInitializer(Initializer):
                                      stop_gradient=True)
 
             else:
-                std = math.sqrt(2.0 / float(fan_in))
+                gain = calculate_gain(self._nonlinearity, self._negative_slope)
+                std = gain / math.sqrt(float(fan_in))
                 op = block.append_op(type="gaussian_random",
                                      outputs={"Out": out_var},
                                      attrs={
diff --git a/python/paddle/nn/initializer/kaiming.py b/python/paddle/nn/initializer/kaiming.py
index 456496571924e080b23477efd83e386ce4508876..8760c15096f8f6d707f31683327aaafe56a7ed70 100644
--- a/python/paddle/nn/initializer/kaiming.py
+++ b/python/paddle/nn/initializer/kaiming.py
@@ -33,11 +33,14 @@ class KaimingNormal(MSRAInitializer):
 
     .. math::
 
-        \sqrt{\frac{2.0}{fan\_in}}
+        \frac{gain}{\sqrt{{fan\_in}}}
 
     Args:
-        fan_in (float32|None, optional): fan_in for Kaiming normal Initializer. If None, it is 
-        inferred from the variable. default is None.
+        fan_in (float32|None): fan_in (in_features) of trainable Tensor,\
+        If None, it will be infered automaticly. If you don't want to use in_features of the Tensor,\
+        you can set the value of 'fan_in' smartly by yourself. default is None.
+        negative_slope (float, optional): negative_slope (only used with leaky_relu). default is 0.0.
+        nonlinearity(str, optional): the non-linear function. default is relu.
 
     Note:
         It is recommended to set fan_in to None for most cases.
@@ -56,10 +59,12 @@ class KaimingNormal(MSRAInitializer):
 
     """
 
-    def __init__(self, fan_in=None):
+    def __init__(self, fan_in=None, negative_slope=0.0, nonlinearity='relu'):
         super(KaimingNormal, self).__init__(uniform=False,
                                             fan_in=fan_in,
-                                            seed=0)
+                                            seed=0,
+                                            negative_slope=negative_slope,
+                                            nonlinearity=nonlinearity)
 
 
 class KaimingUniform(MSRAInitializer):
@@ -76,11 +81,14 @@ class KaimingUniform(MSRAInitializer):
 
     .. math::
 
-        x = \sqrt{\frac{6.0}{fan\_in}}
+        x = gain \times \sqrt{\frac{3}{fan\_in}}
 
     Args:
-        fan_in (float32|None, optional): fan_in for Kaiming uniform Initializer. If None, it is 
-        inferred from the variable. default is None.
+        fan_in (float32|None): fan_in (in_features) of trainable Tensor,\
+        If None, it will be infered automaticly. If you don't want to use in_features of the Tensor,\
+        you can set the value of 'fan_in' smartly by yourself. default is None.
+        negative_slope (float, optional): negative_slope (only used with leaky_relu). default is 0.0.
+        nonlinearity(str, optional): the non-linear function. default is relu.
 
     Note:
         It is recommended to set fan_in to None for most cases.
@@ -99,7 +107,9 @@ class KaimingUniform(MSRAInitializer):
 
     """
 
-    def __init__(self, fan_in=None):
+    def __init__(self, fan_in=None, negative_slope=0.0, nonlinearity='relu'):
         super(KaimingUniform, self).__init__(uniform=True,
                                              fan_in=fan_in,
-                                             seed=0)
+                                             seed=0,
+                                             negative_slope=negative_slope,
+                                             nonlinearity=nonlinearity)