未验证 提交 79c5a461 编写于 作者: A Abhinav Arora 提交者: GitHub

Handling global step increment in optimizer python wrapper (#5097)

* Adding the increment op for global step

* Changing list to single op as per code review feedback
上级 6783dcee
...@@ -18,7 +18,8 @@ class Optimizer(object): ...@@ -18,7 +18,8 @@ class Optimizer(object):
but need to use one of it's implementation. but need to use one of it's implementation.
""" """
def __init__(self): def __init__(self, global_step=None):
self._global_step = global_step
# Dictionary of accumulators. Some optimizer subclasses need to # Dictionary of accumulators. Some optimizer subclasses need to
# allocate and manage extra variables associated with the parameters # allocate and manage extra variables associated with the parameters
# to train. These variables are called accumulators. # to train. These variables are called accumulators.
...@@ -109,6 +110,26 @@ class Optimizer(object): ...@@ -109,6 +110,26 @@ class Optimizer(object):
format(name, param.name)) format(name, param.name))
return self._accumulators[name][param.name] return self._accumulators[name][param.name]
def _increment_global_step(self, block):
"""Increment the global step by 1 after every iteration
Args:
block: the block in which the loss variable is present
Returns:
list with global_step increment op as its only element
"""
assert isinstance(block, framework.Block)
assert self._global_step is not None
# create the increment op
increment_op = block.append_op(
type="increment",
inputs={"X": self._global_step},
outputs={"Out": self._global_step},
attrs={"step": 1.0})
return increment_op
def create_optimization_pass(self, parameters_and_grads, loss): def create_optimization_pass(self, parameters_and_grads, loss):
"""Add optimization operators to update gradients to variables. """Add optimization operators to update gradients to variables.
...@@ -152,6 +173,8 @@ class Optimizer(object): ...@@ -152,6 +173,8 @@ class Optimizer(object):
if finish_ops is not None: if finish_ops is not None:
return_ops += finish_ops return_ops += finish_ops
if self._global_step is not None:
return_ops.append(self._increment_global_step(loss.block))
return return_ops return return_ops
def minimize(self, loss, parameter_list=None, no_grad_set=None): def minimize(self, loss, parameter_list=None, no_grad_set=None):
...@@ -172,9 +195,9 @@ class SGDOptimizer(Optimizer): ...@@ -172,9 +195,9 @@ class SGDOptimizer(Optimizer):
""" Simple SGD optimizer without any state. """ Simple SGD optimizer without any state.
""" """
def __init__(self, learning_rate): def __init__(self, learning_rate, global_step=None):
assert learning_rate is not None assert learning_rate is not None
super(SGDOptimizer, self).__init__() super(SGDOptimizer, self).__init__(global_step)
self.type = "sgd" self.type = "sgd"
self._learning_rate = learning_rate self._learning_rate = learning_rate
...@@ -215,10 +238,14 @@ class MomentumOptimizer(Optimizer): ...@@ -215,10 +238,14 @@ class MomentumOptimizer(Optimizer):
""" """
_velocity_acc_str = "velocity" _velocity_acc_str = "velocity"
def __init__(self, learning_rate, momentum, use_nesterov=False): def __init__(self,
learning_rate,
momentum,
use_nesterov=False,
global_step=None):
assert learning_rate is not None assert learning_rate is not None
assert momentum is not None assert momentum is not None
super(MomentumOptimizer, self).__init__() super(MomentumOptimizer, self).__init__(global_step)
self.type = "momentum" self.type = "momentum"
self._learning_rate = learning_rate self._learning_rate = learning_rate
self._momentum = momentum self._momentum = momentum
...@@ -275,10 +302,10 @@ class AdagradOptimizer(Optimizer): ...@@ -275,10 +302,10 @@ class AdagradOptimizer(Optimizer):
""" """
_moment_acc_str = "moment" _moment_acc_str = "moment"
def __init__(self, learning_rate, epsilon=1.0e-6): def __init__(self, learning_rate, epsilon=1.0e-6, global_step=None):
assert learning_rate is not None assert learning_rate is not None
assert epsilon is not None assert epsilon is not None
super(AdagradOptimizer, self).__init__() super(AdagradOptimizer, self).__init__(global_step)
self.type = "adagrad" self.type = "adagrad"
self._learning_rate = learning_rate self._learning_rate = learning_rate
self._epsilon = epsilon self._epsilon = epsilon
...@@ -337,12 +364,13 @@ class AdamOptimizer(Optimizer): ...@@ -337,12 +364,13 @@ class AdamOptimizer(Optimizer):
learning_rate=0.001, learning_rate=0.001,
beta1=0.9, beta1=0.9,
beta2=0.999, beta2=0.999,
epsilon=1e-8): epsilon=1e-8,
global_step=None):
assert learning_rate is not None assert learning_rate is not None
assert beta1 is not None assert beta1 is not None
assert beta2 is not None assert beta2 is not None
assert epsilon is not None assert epsilon is not None
super(AdamOptimizer, self).__init__() super(AdamOptimizer, self).__init__(global_step)
self.type = "adam" self.type = "adam"
self._learning_rate = learning_rate self._learning_rate = learning_rate
self._beta1 = beta1 self._beta1 = beta1
...@@ -458,7 +486,8 @@ class AdamaxOptimizer(Optimizer): ...@@ -458,7 +486,8 @@ class AdamaxOptimizer(Optimizer):
learning_rate=0.001, learning_rate=0.001,
beta1=0.9, beta1=0.9,
beta2=0.999, beta2=0.999,
epsilon=1e-8): epsilon=1e-8,
global_step=None):
assert learning_rate is not None assert learning_rate is not None
assert beta1 is not None assert beta1 is not None
assert beta2 is not None assert beta2 is not None
......
...@@ -27,6 +27,32 @@ class TestOptimizer(unittest.TestCase): ...@@ -27,6 +27,32 @@ class TestOptimizer(unittest.TestCase):
sgd_op = opts[0] sgd_op = opts[0]
self.assertEqual(sgd_op.type, "sgd") self.assertEqual(sgd_op.type, "sgd")
def test_sgd_optimizer_with_global_step(self):
program = framework.Program()
block = program.global_block()
mul_x = block.create_parameter(
dtype="float32", shape=[5, 10], lod_level=0, name="mul.x")
mul_y = block.create_var(
dtype="float32", shape=[10, 8], lod_level=0, name="mul.y")
mul_out = block.create_var(
dtype="float32", shape=[5, 8], lod_level=0, name="mul.out")
block.append_op(
type="mul",
inputs={"X": mul_x,
"Y": mul_y},
outputs={"Out": mul_out},
attrs={"x_num_col_dims": 1})
global_step = block.create_var(
dtype="float32", shape=[1], lod_level=0, name="step")
sgd_optimizer = optimizer.SGDOptimizer(
learning_rate=0.01, global_step=global_step)
opts = sgd_optimizer.minimize(mul_out)
self.assertEqual(len(opts), 2)
sgd_op = opts[0]
self.assertEqual(sgd_op.type, "sgd")
increment_op = opts[1]
self.assertEqual(increment_op.type, "increment")
class TestMomentumOptimizer(unittest.TestCase): class TestMomentumOptimizer(unittest.TestCase):
class MockMomentum(optimizer.MomentumOptimizer): class MockMomentum(optimizer.MomentumOptimizer):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册