提交 323a0483 编写于 作者: Q qiaolongfei

add doc for BilinearInitializer MSRAInitializer

上级 86092a97
...@@ -19,10 +19,10 @@ from framework import convert_np_dtype_to_dtype_ ...@@ -19,10 +19,10 @@ from framework import convert_np_dtype_to_dtype_
from core import VarDesc from core import VarDesc
__all__ = [ __all__ = [
'Constant', 'Uniform', 'Normal', 'Xavier', 'Bilinear', 'force_init_on_cpu', 'Constant', 'Uniform', 'Normal', 'Xavier', 'Bilinear', 'MSRA',
'init_on_cpu', 'ConstantInitializer', 'UniformInitializer', 'force_init_on_cpu', 'init_on_cpu', 'ConstantInitializer',
'NormalInitializer', 'XavierInitializer', 'BilinearInitializer', 'UniformInitializer', 'NormalInitializer', 'XavierInitializer',
'MSRAInitializer' 'BilinearInitializer', 'MSRAInitializer'
] ]
_force_init_on_cpu_ = False _force_init_on_cpu_ = False
...@@ -353,30 +353,42 @@ class MSRAInitializer(Initializer): ...@@ -353,30 +353,42 @@ class MSRAInitializer(Initializer):
"""Implements the MSRA initializer a.k.a. Kaiming Initializer """Implements the MSRA initializer a.k.a. Kaiming Initializer
This class implements the weight initialization from the paper This class implements the weight initialization from the paper
Delving Deep into Rectifiers: Surpassing Human-Level Performance on `Delving Deep into Rectifiers: Surpassing Human-Level Performance on
ImageNet Classification[1] by Kaiming He, Xiangyu Zhang, Shaoqing Ren ImageNet Classification <https://arxiv.org/abs/1502.01852>`_
and Jian Sun. This is a robust initialization method that particularly by Kaiming He, Xiangyu Zhang, Shaoqing Ren and Jian Sun. This is a
considers the rectifier nonlinearities. In case of Uniform distribution, robust initialization method that particularly considers the rectifier
the range is [-x, x], where x = sqrt(6 / fan_in). In case of Normal nonlinearities. In case of Uniform distribution, the range is [-x, x], where
distribution, the mean is 0 and the standard deviation
is sqrt(2/ fan_in). .. math::
References: x = \sqrt{\\frac{6.0}{fan\_in}}
[1] Delving Deep into Rectifiers: Surpassing Human-Level Performance
on ImageNet Classification In case of Normal distribution, the mean is 0 and the standard deviation
(https://arxiv.org/abs/1502.01852) is
.. math::
\sqrt{\\frac{2.0}{fan\_in}}
Args:
uniform (bool): whether to use uniform or normal distribution
fan_in (float): fan_in for MSRAInitializer. If None, it is\
inferred from the variable.
seed (int): random seed
Note:
It is recommended to set fan_in to None for most cases.
Examples:
.. code-block:: python
fc = fluid.layers.fc(
input=queries, size=10,
param_attr=fluid.initializer.MSRA(uniform=False))
""" """
def __init__(self, uniform=True, fan_in=None, seed=0): def __init__(self, uniform=True, fan_in=None, seed=0):
"""Constructor for MSRAInitializer """Constructor for MSRAInitializer
Args:
uniform: whether to use uniform or normal distribution
fan_in: fan_in for MSRAInitializer. If None, it is
inferred from the variable.
seed: random seed
Note: It is recommended to set fan_in to None for most cases.
""" """
assert uniform is not None assert uniform is not None
assert seed is not None assert seed is not None
...@@ -436,34 +448,37 @@ class MSRAInitializer(Initializer): ...@@ -436,34 +448,37 @@ class MSRAInitializer(Initializer):
class BilinearInitializer(Initializer): class BilinearInitializer(Initializer):
"""Implements the bilinear initializer. """
This initializer can be used in transposed convolution operator to This initializer can be used in transposed convolution operator to
act as upsampling. Users can upsample a feature map with shape of act as upsampling. Users can upsample a feature map with shape of
(B, C, H, W) by any integer factor. The usage is: (B, C, H, W) by any integer factor. The usage is:
>>> factor = 2 Examples:
>>> w_attr = ParamAttr(learning_rate=0., regularizer=L2Decay(0.),
>>> initializer=Bilinear()) .. code-block:: python
>>> conv_up = fluid.layers.conv2d_transpose(
>>> input, factor = 2
>>> num_filters=C, w_attr = ParamAttr(learning_rate=0., regularizer=L2Decay(0.),
>>> output_size=None, initializer=Bilinear())
>>> filter_size=2 * factor - factor % 2, conv_up = fluid.layers.conv2d_transpose(
>>> padding=ceil((factor - 1) / 2.), input,
>>> stride=factor, num_filters=C,
>>> groups=C, output_size=None,
>>> param_attr=w_attr, filter_size=2 * factor - factor % 2,
>>> bias_attr=False) padding=ceil((factor - 1) / 2.),
stride=factor,
groups=C,
Where, `num_filters=C` and `groups=C` means this is channel-wise tranposed param_attr=w_attr,
bias_attr=False)
Where, `num_filters=C` and `groups=C` means this is channel-wise transposed
convolution. The filter shape will be (C, 1, K, K) where K is `filer_size`, convolution. The filter shape will be (C, 1, K, K) where K is `filer_size`,
This initializer will set a (K, K) interpolation kernel for every channel This initializer will set a (K, K) interpolation kernel for every channel
of the filter identically. The resulting shape of the output feature map of the filter identically. The resulting shape of the output feature map
will be (B, C, factor * H, factor * W). Note that the learning rate and the will be (B, C, factor * H, factor * W). Note that the learning rate and the
weight decay are set to 0 in order to keep coefficient values of bilinear weight decay are set to 0 in order to keep coefficient values of bilinear
interpolation unchanged during training. interpolation unchanged during training.
""" """
def __init__(self): def __init__(self):
...@@ -480,7 +495,7 @@ class BilinearInitializer(Initializer): ...@@ -480,7 +495,7 @@ class BilinearInitializer(Initializer):
be added. be added.
Returns: Returns:
the initialization op Operator: the initialization op
Raises: Raises:
ValueError: If type of `var` and `block` is not right. ValueError: If type of `var` and `block` is not right.
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册