From 4a2b0ae4d38c1414510c275889eff1d07a709139 Mon Sep 17 00:00:00 2001 From: Abhinav Arora Date: Tue, 21 Nov 2017 19:53:22 +0530 Subject: [PATCH] Implementing the MSRA initializer for rectifier units --- python/paddle/v2/fluid/initializer.py | 83 ++++++++++++++ .../paddle/v2/fluid/tests/test_initializer.py | 104 ++++++++++++++++++ 2 files changed, 187 insertions(+) diff --git a/python/paddle/v2/fluid/initializer.py b/python/paddle/v2/fluid/initializer.py index ded144ecd5d..1a9d804ee7e 100644 --- a/python/paddle/v2/fluid/initializer.py +++ b/python/paddle/v2/fluid/initializer.py @@ -285,3 +285,86 @@ class XavierInitializer(Initializer): }) var.op = op return op + + +class MSRAInitializer(Initializer): + """Implements the MSRA initializer a.k.a. Kaiming Initializer + + This class implements the weight initialization from the paper + Delving Deep into Rectifiers: Surpassing Human-Level Performance on + ImageNet Classification[1] by Kaiming He, Xiangyu Zhang, Shaoqing Ren + and Jian Sun. This is a robust initialization method that particularly + considers the rectifier nonlinearities. In case of Uniform distribution, + the range is [-x, x], where x = sqrt(6 / fan_in). In case of Normal + distribution, the mean is 0 and the standard deviation + is sqrt(2/ fan_in). + + References: + [1] Delving Deep into Rectifiers: Surpassing Human-Level Performance + on ImageNet Classification + (https://arxiv.org/abs/1502.01852) + """ + + def __init__(self, uniform=True, fan_in=None, seed=0): + """Constructor for MSRAInitializer + + Args: + uniform: whether to use uniform or normal distribution + fan_in: fan_in for MSRAInitializer. If None, it is + inferred from the variable. + seed: random seed + + Note: It is recommended to set fan_in to None for most cases. + """ + assert uniform is not None + assert seed is not None + super(MSRAInitializer, self).__init__() + self._uniform = uniform + self._fan_in = fan_in + self._seed = seed + + def __call__(self, var, block): + """Add MSRA initialization ops for a variable + + Args: + var: Variable that needs to be initialized + block: The block in which initialization ops + should be added + + Returns: + the initialization op + """ + assert isinstance(var, framework.Variable) + assert isinstance(block, framework.Block) + f_in, f_out = self._compute_fans(var) + + # If fan_in is passed, use it + fan_in = f_in if self._fan_in is None else self._fan_in + + if self._uniform: + limit = np.sqrt(6.0 / float(fan_in)) + op = block.prepend_op( + type="uniform_random", + outputs={"Out": var}, + attrs={ + "shape": var.shape, + "data_type": int(var.data_type), + "min": -limit, + "max": limit, + "seed": self._seed + }) + + else: + std = np.sqrt(2.0 / float(fan_in)) + op = block.prepend_op( + type="gaussian_random", + outputs={"Out": var}, + attrs={ + "shape": var.shape, + "data_type": int(var.data_type), + "mean": 0.0, + "std": std, + "seed": self._seed + }) + var.op = op + return op diff --git a/python/paddle/v2/fluid/tests/test_initializer.py b/python/paddle/v2/fluid/tests/test_initializer.py index f2eb79b2096..6c20203f8ec 100644 --- a/python/paddle/v2/fluid/tests/test_initializer.py +++ b/python/paddle/v2/fluid/tests/test_initializer.py @@ -223,5 +223,109 @@ class TestXavierInitializer(unittest.TestCase): self.assertEqual(init_op.attr('seed'), 134) +class TestMSRAInitializer(unittest.TestCase): + def test_uniform_msra_initializer(self): + """Test MSRA initializer with uniform distribution on + for matrix multiply. + """ + program = framework.Program() + block = program.global_block() + param = block.create_parameter( + dtype="float32", + shape=[5, 10], + lod_level=0, + name="param", + initializer=initializer.MSRAInitializer()) + self.assertEqual(len(block.ops), 1) + init_op = block.ops[0] + self.assertEqual(init_op.type, 'uniform_random') + limit = np.sqrt(6.0 / param.shape[0]) + self.assertAlmostEqual(init_op.attr('min'), -limit, delta=DELTA) + self.assertAlmostEqual(init_op.attr('max'), limit, delta=DELTA) + self.assertEqual(init_op.attr('seed'), 0) + + def test_uniform_msra_initializer_conv(self): + """Test MSRA initializer with uniform distribution on + for convolutions. + """ + program = framework.Program() + block = program.global_block() + param = block.create_parameter( + dtype="float32", + shape=[5, 10, 15, 20], + lod_level=0, + name="param", + initializer=initializer.MSRAInitializer()) + self.assertEqual(len(block.ops), 1) + init_op = block.ops[0] + self.assertEqual(init_op.type, 'uniform_random') + receptive_field_size = float(15 * 20) + limit = np.sqrt(6.0 / (param.shape[1] * receptive_field_size)) + self.assertAlmostEqual(init_op.attr('min'), -limit, delta=DELTA) + self.assertAlmostEqual(init_op.attr('max'), limit, delta=DELTA) + self.assertEqual(init_op.attr('seed'), 0) + + def test_normal_msra_initializer(self): + """Test MSRA initializer with normal distribution on + for matrix multiply. + """ + program = framework.Program() + block = program.global_block() + param = block.create_parameter( + dtype="float32", + shape=[5, 10], + lod_level=0, + name="param", + initializer=initializer.MSRAInitializer(uniform=False)) + self.assertEqual(len(block.ops), 1) + init_op = block.ops[0] + self.assertEqual(init_op.type, 'gaussian_random') + std = np.sqrt(2.0 / param.shape[0]) + self.assertAlmostEqual(init_op.attr('mean'), 0.0, delta=DELTA) + self.assertAlmostEqual(init_op.attr('std'), std, delta=DELTA) + self.assertEqual(init_op.attr('seed'), 0) + + def test_normal_msra_initializer_conv(self): + """Test MSRA initializer with normal distribution on + for convolutions. + """ + program = framework.Program() + block = program.global_block() + param = block.create_parameter( + dtype="float32", + shape=[5, 10, 15, 20], + lod_level=0, + name="param", + initializer=initializer.MSRAInitializer(uniform=False)) + self.assertEqual(len(block.ops), 1) + init_op = block.ops[0] + self.assertEqual(init_op.type, 'gaussian_random') + receptive_field_size = float(15 * 20) + std = np.sqrt(2.0 / (param.shape[1] * receptive_field_size)) + self.assertAlmostEqual(init_op.attr('mean'), 0.0, delta=DELTA) + self.assertAlmostEqual(init_op.attr('std'), std, delta=DELTA) + self.assertEqual(init_op.attr('seed'), 0) + + def test_msra_initializer_supplied_arguments(self): + """Test the MSRA initializer with supplied arguments + """ + program = framework.Program() + block = program.global_block() + block.create_parameter( + dtype="float32", + shape=[5, 10], + lod_level=0, + name="param", + initializer=initializer.MSRAInitializer( + fan_in=12, seed=134)) + self.assertEqual(len(block.ops), 1) + init_op = block.ops[0] + self.assertEqual(init_op.type, 'uniform_random') + limit = np.sqrt(6.0 / 12) + self.assertAlmostEqual(init_op.attr('min'), -limit, delta=DELTA) + self.assertAlmostEqual(init_op.attr('max'), limit, delta=DELTA) + self.assertEqual(init_op.attr('seed'), 134) + + if __name__ == '__main__': unittest.main() -- GitLab