未验证 提交 d7bf0668 编写于 作者: K kexinzhao 提交者: GitHub

Adding interface for decayed adagrad optimizer (#5644)

* add decayed adagrad python code

* fix typo and order

* small fix
上级 1db1a0dc
...@@ -9,7 +9,7 @@ from paddle.v2.fluid.layer_helper import LayerHelper ...@@ -9,7 +9,7 @@ from paddle.v2.fluid.layer_helper import LayerHelper
__all__ = [ __all__ = [
'SGDOptimizer', 'MomentumOptimizer', 'AdagradOptimizer', 'AdamOptimizer', 'SGDOptimizer', 'MomentumOptimizer', 'AdagradOptimizer', 'AdamOptimizer',
'AdamaxOptimizer' 'AdamaxOptimizer', 'DecayedAdagradOptimizer'
] ]
...@@ -85,7 +85,7 @@ class Optimizer(object): ...@@ -85,7 +85,7 @@ class Optimizer(object):
""" """
if (name in self._accumulators and if (name in self._accumulators and
param.name in self._accumulators[name]): param.name in self._accumulators[name]):
raise Exception("Accumulator {} already exists for parmeter {}". raise Exception("Accumulator {} already exists for parameter {}".
format(name, param.name)) format(name, param.name))
assert isinstance(self.helper, LayerHelper) assert isinstance(self.helper, LayerHelper)
...@@ -307,7 +307,7 @@ class AdagradOptimizer(Optimizer): ...@@ -307,7 +307,7 @@ class AdagradOptimizer(Optimizer):
moment_acc = self._get_accumulator(self._moment_acc_str, moment_acc = self._get_accumulator(self._moment_acc_str,
param_and_grad[0]) param_and_grad[0])
# create the adagrad optimizer op # Create the adagrad optimizer op
adagrad_op = block.append_op( adagrad_op = block.append_op(
type=self.type, type=self.type,
inputs={ inputs={
...@@ -510,3 +510,51 @@ class AdamaxOptimizer(Optimizer): ...@@ -510,3 +510,51 @@ class AdamaxOptimizer(Optimizer):
attrs={"scale": self._beta1}) attrs={"scale": self._beta1})
return [scale_beta1] return [scale_beta1]
class DecayedAdagradOptimizer(Optimizer):
"""Simple Decayed Adagrad optimizer with moment state
"""
_moment_acc_str = "moment"
def __init__(self,
learning_rate,
decay=0.95,
epsilon=1.0e-6,
global_step=None):
assert learning_rate is not None
assert decay is not None
assert epsilon is not None
super(DecayedAdagradOptimizer, self).__init__(global_step)
self.type = "decayed_adagrad"
self._learning_rate = learning_rate
self._decay = decay
self._epsilon = epsilon
def _create_accumulators(self, block, parameters):
assert isinstance(block, framework.Block)
for p in parameters:
self._add_accumulator(self._moment_acc_str, p)
def _append_optimize_op(self, block, param_and_grad):
assert isinstance(block, framework.Block)
moment_acc = self._get_accumulator(self._moment_acc_str,
param_and_grad[0])
# Create the decayed adagrad optimizer op
decayed_adagrad_op = block.append_op(
type=self.type,
inputs={
"Param": param_and_grad[0],
"Grad": param_and_grad[1],
"Moment": moment_acc,
"LearningRate": self._create_param_lr(param_and_grad)
},
outputs={"ParamOut": param_and_grad[0],
"MomentOut": moment_acc},
attrs={"epsilon": self._epsilon})
return decayed_adagrad_op
...@@ -198,7 +198,7 @@ class TestAdagradOptimizer(unittest.TestCase): ...@@ -198,7 +198,7 @@ class TestAdagradOptimizer(unittest.TestCase):
adagrad_op = opts[0] adagrad_op = opts[0]
self.assertEqual(adagrad_op.type, "adagrad") self.assertEqual(adagrad_op.type, "adagrad")
# check accumulators # Check accumulators
accumulators = adagrad_optimizer.get_accumulators() accumulators = adagrad_optimizer.get_accumulators()
self.assertEqual(len(accumulators), 1) self.assertEqual(len(accumulators), 1)
self.assertTrue(adagrad_optimizer.get_moment_str() in accumulators) self.assertTrue(adagrad_optimizer.get_moment_str() in accumulators)
...@@ -331,5 +331,59 @@ class TestAdamaxOptimizer(unittest.TestCase): ...@@ -331,5 +331,59 @@ class TestAdamaxOptimizer(unittest.TestCase):
self.assertAlmostEqual(init_ops[0].attr('value'), learning_rate) self.assertAlmostEqual(init_ops[0].attr('value'), learning_rate)
class TestDecayedAdagradOptimizer(unittest.TestCase):
class MockDecayedAdagrad(optimizer.DecayedAdagradOptimizer):
def get_accumulators(self):
return self._accumulators
def get_moment_str(self):
return self._moment_acc_str
def test_decayed_adagrad_optimizer(self):
init_program = framework.Program()
program = framework.Program()
block = program.global_block()
mul_x = block.create_parameter(
dtype="float32", shape=[5, 10], lod_level=0, name="mul.x")
mul_y = block.create_var(
dtype="float32", shape=[10, 8], lod_level=0, name="mul.y")
mul_out = block.create_var(
dtype="float32", shape=[5, 8], lod_level=0, name="mul.out")
block.append_op(
type="mul",
inputs={"X": mul_x,
"Y": mul_y},
outputs={"Out": mul_out},
attrs={"x_num_col_dims": 1})
learning_rate = 0.01
decayed_adagrad_optimizer = self.MockDecayedAdagrad(
learning_rate=learning_rate, decay=0.95, epsilon=1.0e-6)
params_grads = append_backward_ops(mul_out)
self.assertEqual(len(params_grads), 1)
self.assertEqual(len(decayed_adagrad_optimizer.get_accumulators()), 0)
opts = decayed_adagrad_optimizer.create_optimization_pass(
params_grads, mul_out, init_program)
self.assertEqual(len(opts), 1)
decayed_adagrad_op = opts[0]
self.assertEqual(decayed_adagrad_op.type, "decayed_adagrad")
# Check accumulators
accumulators = decayed_adagrad_optimizer.get_accumulators()
self.assertEqual(len(accumulators), 1)
self.assertTrue(
decayed_adagrad_optimizer.get_moment_str() in accumulators)
moment_acc = accumulators[decayed_adagrad_optimizer.get_moment_str()]
self.assertEqual(len(moment_acc), 1)
self.assertTrue(mul_x.name in moment_acc)
# Check init_program
init_ops = init_program.global_block().ops
self.assertEqual(len(init_ops), 2)
self.assertEqual(init_ops[0].type, "fill_constant")
self.assertAlmostEqual(init_ops[0].attr('value'), learning_rate)
self.assertEqual(init_ops[1].type, "fill_constant")
self.assertAlmostEqual(init_ops[1].attr('value'), 0.0)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册