提交 d78d1193 编写于 作者: A Abhinav Arora 提交者: GitHub

Adding python wrapper for adam operator (#5021)

* Adding Adam Python wrapper

* Adding tests for Python Adam wrapper
上级 046b8151
import paddle.v2.framework.framework as framework
from collections import defaultdict
__all__ = ['SGDOptimizer', 'MomentumOptimizer', 'AdagradOptimizer']
__all__ = [
'SGDOptimizer', 'MomentumOptimizer', 'AdagradOptimizer', 'AdamOptimizer'
]
class Optimizer(object):
......@@ -43,6 +45,19 @@ class Optimizer(object):
"""
pass
def _finish_update(self, block):
"""Finish any custom updates needed
before completing an optimization step
Args:
block: the block in which the loss variable is present
parameters: list of parameter variables for the optimizer
Returns:
list of finish ops or None
"""
pass
def _add_accumulator(self, block, name, param, dtype=None, fill_value=0.0):
"""Utility function to add an accumulator for a parameter
......@@ -137,15 +152,17 @@ class Optimizer(object):
parameters_and_grads: a list of (variable, gradient) pair to update.
Returns:
optmization_op_list: a list of optimization operator that will update
parameter using gradient.
return_op_list: a list of operators that will complete one step of
optimization. This will include parameter update ops, global step
update ops and any other custom ops required by subclasses to manage
their internal state.
"""
# This is a default implementation of create_optimization_pass that
# can be shared by most optimizers. This implementation assumes that
# the subclass will implement the _append_optimize_op method and the
# _initialize_tensors method. The subclass can extend the
# _create_accumulators method if it needs to create accumulators
# for parameters.
# for parameters and extend _finish_update method to add custom ops.
# Create any accumulators
self._create_accumulators(loss.block,
......@@ -160,7 +177,17 @@ class Optimizer(object):
param_and_grad)
optimize_ops.append(optimize_op)
return optimize_ops
# Returned list of ops can include more ops in addition
# to optimization ops
return_ops = optimize_ops
# Get custom finish ops for subclasses
# FIXME: Need to fix this once we figure out how to handle dependencies
finish_ops = self._finish_update(loss.block)
if finish_ops is not None:
return_ops += finish_ops
return return_ops
def minimize(self, loss, parameter_list=None, no_grad_set=None):
"""Add operations to minimize `loss` by updating `parameter_list`.
......@@ -329,3 +356,124 @@ class AdagradOptimizer(Optimizer):
attrs={"epsilon": self._epsilon})
return adagrad_op
class AdamOptimizer(Optimizer):
"""Implements the Adam Optimizer
"""
_moment1_acc_str = "moment1"
_moment2_acc_str = "moment2"
def __init__(self,
learning_rate=0.001,
beta1=0.9,
beta2=0.999,
epsilon=1e-8):
assert learning_rate is not None
assert beta1 is not None
assert beta2 is not None
assert epsilon is not None
super(AdamOptimizer, self).__init__()
self.type = "adam"
self._learning_rate = learning_rate
self._beta1 = beta1
self._beta2 = beta2
self._epsilon = epsilon
def _initialize_tensors(self, block):
assert isinstance(block, framework.Block)
lr_shape = [1]
# create a variable for learning_rate
self._lr = block.create_var(
dtype="float32", shape=lr_shape, lod_level=0)
# create an op to init the learning_rate
# FIXME: Fix when Initialization design has been implemented
# https://github.com/PaddlePaddle/Paddle/pull/4852
block.append_op(
type="fill_constant",
outputs={"Out": self._lr},
attrs={"shape": lr_shape,
"value": self._learning_rate})
def _create_accumulators(self, block, parameters):
assert isinstance(block, framework.Block)
global_block = block.program.global_block()
# Create beta1 and beta2 power tensors
beta_shape = [1]
# Create variables for beta1 and beta2 powers
self._beta1_pow_acc = global_block.create_var(
dtype="float32", shape=beta_shape, lod_level=0)
self._beta2_pow_acc = global_block.create_var(
dtype="float32", shape=beta_shape, lod_level=0)
# Initialize beta1 and beta2 power accumulators
# FIXME: Fix when Initialization design has been implemented
# https://github.com/PaddlePaddle/Paddle/pull/4852
global_block.append_op(
type="fill_constant",
outputs={"Out": self._beta1_pow_acc},
attrs={"shape": beta_shape,
"value": self._beta1})
global_block.append_op(
type="fill_constant",
outputs={"Out": self._beta2_pow_acc},
attrs={"shape": beta_shape,
"value": self._beta2})
# Create accumulator tensors for first and second moments
for p in parameters:
self._add_accumulator(block, self._moment1_acc_str, p, 'float32')
self._add_accumulator(block, self._moment2_acc_str, p, 'float32')
def _append_optimize_op(self, block, param_and_grad):
assert isinstance(block, framework.Block)
moment1 = self._get_accumulator(self._moment1_acc_str,
param_and_grad[0])
moment2 = self._get_accumulator(self._moment2_acc_str,
param_and_grad[0])
# create the momentum optimize op
adam_op = block.append_op(
type=self.type,
inputs={
"Param": param_and_grad[0],
"Grad": param_and_grad[1],
"LearningRate": self._lr,
"Moment1": moment1,
"Moment2": moment2,
"Beta1Pow": self._beta1_pow_acc,
"Beta2Pow": self._beta2_pow_acc
},
outputs={
"ParamOut": param_and_grad[0],
"Moment1Out": moment1,
"Moment2Out": moment2
},
attrs={
"beta1": self._beta1,
"beta2": self._beta2,
"epsilon": self._epsilon
})
return adam_op
def _finish_update(self, block):
"""Update Beta1 and Beta2 Power accumulators
"""
assert isinstance(block, framework.Block)
global_block = block.program.global_block()
scale_beta1 = global_block.append_op(
type="scale",
inputs={"X": self._beta1_pow_acc},
outputs={"Out": self._beta1_pow_acc},
attrs={"scale": self._beta1})
scale_beta2 = global_block.append_op(
type="scale",
inputs={"X": self._beta2_pow_acc},
outputs={"Out": self._beta2_pow_acc},
attrs={"scale": self._beta2})
return [scale_beta1, scale_beta2]
......@@ -110,5 +110,54 @@ class TestAdagradOptimizer(unittest.TestCase):
self.assertTrue(mul_x.name in moment_acc)
class TestAdamOptimizer(unittest.TestCase):
class MockAdam(optimizer.AdamOptimizer):
def get_accumulators(self):
return self._accumulators
def get_moment1_str(self):
return self._moment1_acc_str
def get_moment2_str(self):
return self._moment2_acc_str
def test_adam_optimizer(self):
program = framework.Program()
block = program.global_block()
mul_x = block.create_parameter(
dtype="float32", shape=[5, 10], lod_level=0, name="mul.x")
mul_y = block.create_var(
dtype="float32", shape=[10, 8], lod_level=0, name="mul.y")
mul_out = block.create_var(
dtype="float32", shape=[5, 8], lod_level=0, name="mul.out")
block.append_op(
type="mul",
inputs={"X": mul_x,
"Y": mul_y},
outputs={"Out": mul_out},
attrs={"x_num_col_dims": 1})
adam_optimizer = self.MockAdam(
learning_rate=0.01, beta1=0.9, beta2=0.999)
params_grads = adam_optimizer.create_backward_pass(mul_out)
self.assertEqual(len(params_grads), 1)
self.assertEqual(len(adam_optimizer.get_accumulators()), 0)
opts = adam_optimizer.create_optimization_pass(params_grads, mul_out)
self.assertEqual(len(opts), 3)
adam_op = opts[0]
self.assertEqual(adam_op.type, "adam")
# Check accumulators
accumulators = adam_optimizer.get_accumulators()
self.assertEqual(len(accumulators), 2)
self.assertTrue(adam_optimizer.get_moment1_str() in accumulators)
self.assertTrue(adam_optimizer.get_moment2_str() in accumulators)
moment1_acc = accumulators[adam_optimizer.get_moment1_str()]
moment2_acc = accumulators[adam_optimizer.get_moment2_str()]
self.assertEqual(len(moment1_acc), 1)
self.assertEqual(len(moment2_acc), 1)
self.assertTrue(mul_x.name in moment1_acc)
self.assertTrue(mul_x.name in moment2_acc)
if __name__ == '__main__':
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册