From f8c6dadae154ed41a8b9092cbbee13587846c063 Mon Sep 17 00:00:00 2001 From: Abhinav Arora Date: Wed, 25 Oct 2017 19:52:13 -0700 Subject: [PATCH] Implementing the python wrapper for Adamax optimizer (#5061) --- python/paddle/v2/framework/optimizer.py | 110 +++++++++++++++++- .../v2/framework/tests/test_optimizer.py | 49 ++++++++ 2 files changed, 157 insertions(+), 2 deletions(-) diff --git a/python/paddle/v2/framework/optimizer.py b/python/paddle/v2/framework/optimizer.py index 3ad87d7bf1..e9df5483e2 100644 --- a/python/paddle/v2/framework/optimizer.py +++ b/python/paddle/v2/framework/optimizer.py @@ -4,7 +4,8 @@ import paddle.v2.framework.framework as framework from paddle.v2.framework.backward import append_backward_ops __all__ = [ - 'SGDOptimizer', 'MomentumOptimizer', 'AdagradOptimizer', 'AdamOptimizer' + 'SGDOptimizer', 'MomentumOptimizer', 'AdagradOptimizer', 'AdamOptimizer', + 'AdamaxOptimizer' ] @@ -399,7 +400,7 @@ class AdamOptimizer(Optimizer): param_and_grad[0]) moment2 = self._get_accumulator(self._moment2_acc_str, param_and_grad[0]) - # create the momentum optimize op + # create the adam optimize op adam_op = block.append_op( type=self.type, inputs={ @@ -442,3 +443,108 @@ class AdamOptimizer(Optimizer): attrs={"scale": self._beta2}) return [scale_beta1, scale_beta2] + + +class AdamaxOptimizer(Optimizer): + """Implements the Adamax Optimizer + """ + _moment_acc_str = "moment" + _inf_norm_acc_str = "inf_norm" + + def __init__(self, + learning_rate=0.001, + beta1=0.9, + beta2=0.999, + epsilon=1e-8): + assert learning_rate is not None + assert beta1 is not None + assert beta2 is not None + assert epsilon is not None + super(AdamaxOptimizer, self).__init__() + self.type = "adamax" + self._learning_rate = learning_rate + self._beta1 = beta1 + self._beta2 = beta2 + self._epsilon = epsilon + + def _initialize_tensors(self, block): + assert isinstance(block, framework.Block) + lr_shape = [1] + # create a variable for learning_rate + self._lr = block.create_var( + dtype="float32", shape=lr_shape, lod_level=0) + + # create an op to init the learning_rate + # FIXME: Fix when Initialization design has been implemented + # https://github.com/PaddlePaddle/Paddle/pull/4852 + block.append_op( + type="fill_constant", + outputs={"Out": self._lr}, + attrs={"shape": lr_shape, + "value": self._learning_rate}) + + def _create_accumulators(self, block, parameters): + assert isinstance(block, framework.Block) + + global_block = block.program.global_block() + # Create beta1 power accumulator tensor + beta_shape = [1] + self._beta1_pow_acc = global_block.create_var( + dtype="float32", shape=beta_shape, lod_level=0) + + # Initialize beta1 power accumulator + # FIXME: Fix when Initialization design has been implemented + # https://github.com/PaddlePaddle/Paddle/pull/4852 + global_block.append_op( + type="fill_constant", + outputs={"Out": self._beta1_pow_acc}, + attrs={"shape": beta_shape, + "value": self._beta1}) + + # Create accumulator tensors for first moment and infinity norm + for p in parameters: + self._add_accumulator(block, self._moment_acc_str, p, 'float32') + self._add_accumulator(block, self._inf_norm_acc_str, p, 'float32') + + def _append_optimize_op(self, block, param_and_grad): + assert isinstance(block, framework.Block) + + moment = self._get_accumulator(self._moment_acc_str, param_and_grad[0]) + inf_norm = self._get_accumulator(self._inf_norm_acc_str, + param_and_grad[0]) + # create the adamax optimize op + adamax_op = block.append_op( + type=self.type, + inputs={ + "Param": param_and_grad[0], + "Grad": param_and_grad[1], + "LearningRate": self._lr, + "Moment": moment, + "InfNorm": inf_norm, + "Beta1Pow": self._beta1_pow_acc + }, + outputs={ + "ParamOut": param_and_grad[0], + "MomentOut": moment, + "InfNormOut": inf_norm + }, + attrs={ + "beta1": self._beta1, + "beta2": self._beta2, + "epsilon": self._epsilon + }) + + return adamax_op + + def _finish_update(self, block): + """Update Beta1 Power accumulator + """ + assert isinstance(block, framework.Block) + global_block = block.program.global_block() + scale_beta1 = global_block.append_op( + type="scale", + inputs={"X": self._beta1_pow_acc}, + outputs={"Out": self._beta1_pow_acc}, + attrs={"scale": self._beta1}) + + return [scale_beta1] diff --git a/python/paddle/v2/framework/tests/test_optimizer.py b/python/paddle/v2/framework/tests/test_optimizer.py index d1527e70c0..6dfd94e8c8 100644 --- a/python/paddle/v2/framework/tests/test_optimizer.py +++ b/python/paddle/v2/framework/tests/test_optimizer.py @@ -196,5 +196,54 @@ class TestAdamOptimizer(unittest.TestCase): self.assertTrue(mul_x.name in moment2_acc) +class TestAdamaxOptimizer(unittest.TestCase): + class MockAdamax(optimizer.AdamaxOptimizer): + def get_accumulators(self): + return self._accumulators + + def get_moment_str(self): + return self._moment_acc_str + + def get_inf_norm_str(self): + return self._inf_norm_acc_str + + def test_adamax_optimizer(self): + program = framework.Program() + block = program.global_block() + mul_x = block.create_parameter( + dtype="float32", shape=[5, 10], lod_level=0, name="mul.x") + mul_y = block.create_var( + dtype="float32", shape=[10, 8], lod_level=0, name="mul.y") + mul_out = block.create_var( + dtype="float32", shape=[5, 8], lod_level=0, name="mul.out") + block.append_op( + type="mul", + inputs={"X": mul_x, + "Y": mul_y}, + outputs={"Out": mul_out}, + attrs={"x_num_col_dims": 1}) + adamax_optimizer = self.MockAdamax( + learning_rate=0.01, beta1=0.9, beta2=0.999) + params_grads = append_backward_ops(mul_out) + self.assertEqual(len(params_grads), 1) + self.assertEqual(len(adamax_optimizer.get_accumulators()), 0) + opts = adamax_optimizer.create_optimization_pass(params_grads, mul_out) + self.assertEqual(len(opts), 2) + adam_op = opts[0] + self.assertEqual(adam_op.type, "adamax") + + # Check accumulators + accumulators = adamax_optimizer.get_accumulators() + self.assertEqual(len(accumulators), 2) + self.assertTrue(adamax_optimizer.get_moment_str() in accumulators) + self.assertTrue(adamax_optimizer.get_inf_norm_str() in accumulators) + moment_acc = accumulators[adamax_optimizer.get_moment_str()] + inf_norm_acc = accumulators[adamax_optimizer.get_inf_norm_str()] + self.assertEqual(len(moment_acc), 1) + self.assertEqual(len(inf_norm_acc), 1) + self.assertTrue(mul_x.name in moment_acc) + self.assertTrue(mul_x.name in inf_norm_acc) + + if __name__ == '__main__': unittest.main() -- GitLab