diff --git a/python/paddle/fluid/initializer.py b/python/paddle/fluid/initializer.py index f09097b57bd7192923b638647ea7213b2c8d638a..df42516a43eaab8543928570cde233b5f34bcb29 100644 --- a/python/paddle/fluid/initializer.py +++ b/python/paddle/fluid/initializer.py @@ -676,20 +676,23 @@ class MSRAInitializer(Initializer): .. math:: - x = \sqrt{\\frac{6.0}{fan\_in}} + x = gain \times \sqrt{\frac{3}{fan\_in}} In case of Normal distribution, the mean is 0 and the standard deviation is .. math:: - \sqrt{\\frac{2.0}{fan\_in}} + \frac{gain}{\sqrt{{fan\_in}}} Args: uniform (bool): whether to use uniform or normal distribution - fan_in (float32|None): fan_in for MSRAInitializer. If None, it is\ - inferred from the variable. default is None. + fan_in (float32|None): fan_in (in_features) of trainable Tensor,\ + If None, it will be infered automaticly. If you don't want to use in_features of the Tensor,\ + you can set the value of 'fan_in' smartly by yourself. default is None. seed (int32): random seed + negative_slope (float, optional): negative_slope (only used with leaky_relu). default is 0.0. + nonlinearity(str, optional): the non-linear function. default is relu. Note: It is recommended to set fan_in to None for most cases. @@ -706,7 +709,12 @@ class MSRAInitializer(Initializer): """ - def __init__(self, uniform=True, fan_in=None, seed=0): + def __init__(self, + uniform=True, + fan_in=None, + seed=0, + negative_slope=0, + nonlinearity='relu'): """Constructor for MSRAInitializer """ assert uniform is not None @@ -715,6 +723,8 @@ class MSRAInitializer(Initializer): self._uniform = uniform self._fan_in = fan_in self._seed = seed + self._negative_slope = negative_slope + self._nonlinearity = nonlinearity def __call__(self, var, block=None): """Initialize the input tensor with MSRA initialization. @@ -755,13 +765,16 @@ class MSRAInitializer(Initializer): if framework._non_static_mode(): if self._uniform: - limit = math.sqrt(6.0 / float(fan_in)) + gain = calculate_gain(self._nonlinearity, self._negative_slope) + limit = gain * math.sqrt(3.0 / float(fan_in)) + out_var = _C_ops.uniform_random('shape', out_var.shape, 'min', -limit, 'max', limit, 'seed', self._seed, 'dtype', int(out_dtype)) else: - std = math.sqrt(2.0 / float(fan_in)) + gain = calculate_gain(self._nonlinearity, self._negative_slope) + std = gain / math.sqrt(float(fan_in)) if in_dygraph_mode(): place = _current_expected_place() out_var = _C_ops.final_state_gaussian_random( @@ -783,7 +796,8 @@ class MSRAInitializer(Initializer): return None else: if self._uniform: - limit = math.sqrt(6.0 / float(fan_in)) + gain = calculate_gain(self._nonlinearity, self._negative_slope) + limit = gain * math.sqrt(3.0 / float(fan_in)) op = block.append_op(type="uniform_random", inputs={}, outputs={"Out": out_var}, @@ -797,7 +811,8 @@ class MSRAInitializer(Initializer): stop_gradient=True) else: - std = math.sqrt(2.0 / float(fan_in)) + gain = calculate_gain(self._nonlinearity, self._negative_slope) + std = gain / math.sqrt(float(fan_in)) op = block.append_op(type="gaussian_random", outputs={"Out": out_var}, attrs={ diff --git a/python/paddle/nn/initializer/kaiming.py b/python/paddle/nn/initializer/kaiming.py index 456496571924e080b23477efd83e386ce4508876..8760c15096f8f6d707f31683327aaafe56a7ed70 100644 --- a/python/paddle/nn/initializer/kaiming.py +++ b/python/paddle/nn/initializer/kaiming.py @@ -33,11 +33,14 @@ class KaimingNormal(MSRAInitializer): .. math:: - \sqrt{\frac{2.0}{fan\_in}} + \frac{gain}{\sqrt{{fan\_in}}} Args: - fan_in (float32|None, optional): fan_in for Kaiming normal Initializer. If None, it is - inferred from the variable. default is None. + fan_in (float32|None): fan_in (in_features) of trainable Tensor,\ + If None, it will be infered automaticly. If you don't want to use in_features of the Tensor,\ + you can set the value of 'fan_in' smartly by yourself. default is None. + negative_slope (float, optional): negative_slope (only used with leaky_relu). default is 0.0. + nonlinearity(str, optional): the non-linear function. default is relu. Note: It is recommended to set fan_in to None for most cases. @@ -56,10 +59,12 @@ class KaimingNormal(MSRAInitializer): """ - def __init__(self, fan_in=None): + def __init__(self, fan_in=None, negative_slope=0.0, nonlinearity='relu'): super(KaimingNormal, self).__init__(uniform=False, fan_in=fan_in, - seed=0) + seed=0, + negative_slope=negative_slope, + nonlinearity=nonlinearity) class KaimingUniform(MSRAInitializer): @@ -76,11 +81,14 @@ class KaimingUniform(MSRAInitializer): .. math:: - x = \sqrt{\frac{6.0}{fan\_in}} + x = gain \times \sqrt{\frac{3}{fan\_in}} Args: - fan_in (float32|None, optional): fan_in for Kaiming uniform Initializer. If None, it is - inferred from the variable. default is None. + fan_in (float32|None): fan_in (in_features) of trainable Tensor,\ + If None, it will be infered automaticly. If you don't want to use in_features of the Tensor,\ + you can set the value of 'fan_in' smartly by yourself. default is None. + negative_slope (float, optional): negative_slope (only used with leaky_relu). default is 0.0. + nonlinearity(str, optional): the non-linear function. default is relu. Note: It is recommended to set fan_in to None for most cases. @@ -99,7 +107,9 @@ class KaimingUniform(MSRAInitializer): """ - def __init__(self, fan_in=None): + def __init__(self, fan_in=None, negative_slope=0.0, nonlinearity='relu'): super(KaimingUniform, self).__init__(uniform=True, fan_in=fan_in, - seed=0) + seed=0, + negative_slope=negative_slope, + nonlinearity=nonlinearity)