diff --git a/python/paddle/fluid/tests/unittests/test_initializer_nn.py b/python/paddle/fluid/tests/unittests/test_initializer_nn.py index 6ad19658fd20376cbdd1d370fd0268c163183f4f..edf52c6ff902fd4786e010c90aa3c21b79ff7267 100644 --- a/python/paddle/fluid/tests/unittests/test_initializer_nn.py +++ b/python/paddle/fluid/tests/unittests/test_initializer_nn.py @@ -104,5 +104,97 @@ class TestConstantInitializer(unittest.TestCase): self.test_constant_initializer_dygraph("float16") +class TestKaimingInitializer(unittest.TestCase): + def static_test_kaiming_initializer_common(self, + init_inst, + dtype="float32", + uniform=False, + is_conv=False): + paddle.enable_static() + program = framework.Program() + block = program.global_block() + shape_mat = [5, 10, 15, 20] if is_conv else [5, 10] + for _ in range(2): + param = block.create_parameter( + dtype="float32", + shape=shape_mat, + lod_level=0, + name="param", + initializer=init_inst) + self.assertEqual(len(block.ops), 1) + init_op = block.ops[0] + if uniform: + self.assertEqual(init_op.type, 'uniform_random') + if is_conv: + receptive_field_size = float(15 * 20) + limit = np.sqrt(6.0 / (param.shape[1] * receptive_field_size)) + else: + limit = np.sqrt(6.0 / param.shape[0]) + self.assertAlmostEqual(init_op.attr('min'), -limit, delta=DELTA) + self.assertAlmostEqual(init_op.attr('max'), limit, delta=DELTA) + else: + self.assertEqual(init_op.type, 'gaussian_random') + if is_conv: + receptive_field_size = float(15 * 20) + std = np.sqrt(2.0 / (param.shape[1] * receptive_field_size)) + else: + std = np.sqrt(2.0 / param.shape[0]) + self.assertAlmostEqual(init_op.attr('mean'), 0.0, delta=DELTA) + self.assertAlmostEqual(init_op.attr('std'), std, delta=DELTA) + paddle.disable_static() + + def dygraph_test_kaiming_initializer_common(self, + init_inst, + dtype="float32", + uniform=False): + linear = nn.Linear(40, 20, weight_attr=init_inst) + + def test_kaiming_dygraph(self): + self.dygraph_test_kaiming_initializer_common( + init_inst=initializer.KaimingUniform(), + dtype="float32", + uniform=True) + self.dygraph_test_kaiming_initializer_common( + init_inst=initializer.KaimingNormal(), + dtype="float32", + uniform=False) + + def test_kaiming_uniform_initializer_static(self): + """Test Kaiming unorm initializer for matrix multiply. + """ + self.static_test_kaiming_initializer_common( + init_inst=initializer.KaimingUniform(), + dtype="float32", + uniform=True, + is_conv=False) + + def test_kaiming_uniform_initializer_conv_static(self): + """Test Kaiming unorm initializer for convolutions. + """ + self.static_test_kaiming_initializer_common( + init_inst=initializer.KaimingUniform(), + dtype="float32", + uniform=True, + is_conv=True) + + def test_kaiming_normal_initializer_static(self): + """Test Kaiming normal initializer for matrix multiply. + """ + self.static_test_kaiming_initializer_common( + init_inst=initializer.KaimingNormal(), + dtype="float32", + uniform=False, + is_conv=False) + + def test_kaiming_normal_initializer_conv_static(self): + """Test Kaiming normal initializer for convolutions. + """ + self.static_test_kaiming_initializer_common( + init_inst=initializer.KaimingNormal(), + dtype="float32", + uniform=False, + is_conv=True) + + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/nn/initializer/__init__.py b/python/paddle/nn/initializer/__init__.py index db0f5dbff2b80bfb1db95bdeed20f937dc8b242a..bd072164e1b45595548e02bce2d83c4e10bfe7ef 100644 --- a/python/paddle/nn/initializer/__init__.py +++ b/python/paddle/nn/initializer/__init__.py @@ -14,7 +14,6 @@ # TODO: define the initializers to create a Parameter in neural network from ...fluid.initializer import Bilinear #DEFINE_ALIAS -from ...fluid.initializer import MSRA #DEFINE_ALIAS from ...fluid.initializer import Normal #DEFINE_ALIAS from ...fluid.initializer import TruncatedNormal #DEFINE_ALIAS from ...fluid.initializer import Uniform #DEFINE_ALIAS @@ -23,9 +22,12 @@ from ...fluid.initializer import Xavier #DEFINE_ALIAS from . import constant from .constant import Constant #DEFINE_ALIAS +from . import kaiming +from .kaiming import KaimingNormal #DEFINE_ALIAS +from .kaiming import KaimingUniform #DEFINE_ALIAS + __all__ = [ 'Bilinear', - 'MSRA', 'Normal', 'TruncatedNormal', 'Uniform', @@ -33,3 +35,4 @@ __all__ = [ ] __all__ += constant.__all__ +__all__ += kaiming.__all__ diff --git a/python/paddle/nn/initializer/kaiming.py b/python/paddle/nn/initializer/kaiming.py new file mode 100644 index 0000000000000000000000000000000000000000..f0c6880e89d8eb6e1a0ecf166a0a926f8a1d87c3 --- /dev/null +++ b/python/paddle/nn/initializer/kaiming.py @@ -0,0 +1,103 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# TODO: define the initializers of Kaiming functions in neural network +from ...fluid.initializer import MSRAInitializer + +__all__ = ['KaimingUniform', 'KaimingNormal'] + + +class KaimingNormal(MSRAInitializer): + """Implements the Kaiming Normal initializer + + This class implements the weight initialization from the paper + `Delving Deep into Rectifiers: Surpassing Human-Level Performance on + ImageNet Classification `_ + by Kaiming He, Xiangyu Zhang, Shaoqing Ren and Jian Sun. This is a + robust initialization method that particularly considers the rectifier + nonlinearities. + + In case of Normal distribution, the mean is 0 and the standard deviation + is + + .. math:: + + \sqrt{\\frac{2.0}{fan\_in}} + + Args: + fan_in (float32|None): fan_in for Kaiming normal Initializer. If None, it is\ + inferred from the variable. default is None. + + Note: + It is recommended to set fan_in to None for most cases. + + Examples: + .. code-block:: python + + import paddle + import paddle.nn as nn + + linear = nn.Linear(2, + 4, + weight_attr=nn.initializer.KaimingNormal()) + data = paddle.rand([30, 10, 2], dtype='float32') + res = linear(data) + + """ + + def __init__(self, fan_in=None): + super(KaimingNormal, self).__init__( + uniform=False, fan_in=fan_in, seed=0) + + +class KaimingUniform(MSRAInitializer): + """Implements the Kaiming Uniform initializer + + This class implements the weight initialization from the paper + `Delving Deep into Rectifiers: Surpassing Human-Level Performance on + ImageNet Classification `_ + by Kaiming He, Xiangyu Zhang, Shaoqing Ren and Jian Sun. This is a + robust initialization method that particularly considers the rectifier + nonlinearities. + + In case of Uniform distribution, the range is [-x, x], where + + .. math:: + + x = \sqrt{\\frac{6.0}{fan\_in}} + + Args: + fan_in (float32|None): fan_in for Kaiming uniform Initializer. If None, it is\ + inferred from the variable. default is None. + + Note: + It is recommended to set fan_in to None for most cases. + + Examples: + .. code-block:: python + + import paddle + import paddle.nn as nn + + linear = nn.Linear(2, + 4, + weight_attr=nn.initializer.KaimingUniform()) + data = paddle.rand([30, 10, 2], dtype='float32') + res = linear(data) + + """ + + def __init__(self, fan_in=None): + super(KaimingUniform, self).__init__( + uniform=True, fan_in=fan_in, seed=0)