未验证 提交 79c5a461 编写于 作者: A Abhinav Arora 提交者: GitHub

Handling global step increment in optimizer python wrapper (#5097)

* Adding the increment op for global step

* Changing list to single op as per code review feedback
上级 6783dcee
......@@ -18,7 +18,8 @@ class Optimizer(object):
but need to use one of it's implementation.
"""
def __init__(self):
def __init__(self, global_step=None):
self._global_step = global_step
# Dictionary of accumulators. Some optimizer subclasses need to
# allocate and manage extra variables associated with the parameters
# to train. These variables are called accumulators.
......@@ -109,6 +110,26 @@ class Optimizer(object):
format(name, param.name))
return self._accumulators[name][param.name]
def _increment_global_step(self, block):
"""Increment the global step by 1 after every iteration
Args:
block: the block in which the loss variable is present
Returns:
list with global_step increment op as its only element
"""
assert isinstance(block, framework.Block)
assert self._global_step is not None
# create the increment op
increment_op = block.append_op(
type="increment",
inputs={"X": self._global_step},
outputs={"Out": self._global_step},
attrs={"step": 1.0})
return increment_op
def create_optimization_pass(self, parameters_and_grads, loss):
"""Add optimization operators to update gradients to variables.
......@@ -152,6 +173,8 @@ class Optimizer(object):
if finish_ops is not None:
return_ops += finish_ops
if self._global_step is not None:
return_ops.append(self._increment_global_step(loss.block))
return return_ops
def minimize(self, loss, parameter_list=None, no_grad_set=None):
......@@ -172,9 +195,9 @@ class SGDOptimizer(Optimizer):
""" Simple SGD optimizer without any state.
"""
def __init__(self, learning_rate):
def __init__(self, learning_rate, global_step=None):
assert learning_rate is not None
super(SGDOptimizer, self).__init__()
super(SGDOptimizer, self).__init__(global_step)
self.type = "sgd"
self._learning_rate = learning_rate
......@@ -215,10 +238,14 @@ class MomentumOptimizer(Optimizer):
"""
_velocity_acc_str = "velocity"
def __init__(self, learning_rate, momentum, use_nesterov=False):
def __init__(self,
learning_rate,
momentum,
use_nesterov=False,
global_step=None):
assert learning_rate is not None
assert momentum is not None
super(MomentumOptimizer, self).__init__()
super(MomentumOptimizer, self).__init__(global_step)
self.type = "momentum"
self._learning_rate = learning_rate
self._momentum = momentum
......@@ -275,10 +302,10 @@ class AdagradOptimizer(Optimizer):
"""
_moment_acc_str = "moment"
def __init__(self, learning_rate, epsilon=1.0e-6):
def __init__(self, learning_rate, epsilon=1.0e-6, global_step=None):
assert learning_rate is not None
assert epsilon is not None
super(AdagradOptimizer, self).__init__()
super(AdagradOptimizer, self).__init__(global_step)
self.type = "adagrad"
self._learning_rate = learning_rate
self._epsilon = epsilon
......@@ -337,12 +364,13 @@ class AdamOptimizer(Optimizer):
learning_rate=0.001,
beta1=0.9,
beta2=0.999,
epsilon=1e-8):
epsilon=1e-8,
global_step=None):
assert learning_rate is not None
assert beta1 is not None
assert beta2 is not None
assert epsilon is not None
super(AdamOptimizer, self).__init__()
super(AdamOptimizer, self).__init__(global_step)
self.type = "adam"
self._learning_rate = learning_rate
self._beta1 = beta1
......@@ -458,7 +486,8 @@ class AdamaxOptimizer(Optimizer):
learning_rate=0.001,
beta1=0.9,
beta2=0.999,
epsilon=1e-8):
epsilon=1e-8,
global_step=None):
assert learning_rate is not None
assert beta1 is not None
assert beta2 is not None
......
......@@ -27,6 +27,32 @@ class TestOptimizer(unittest.TestCase):
sgd_op = opts[0]
self.assertEqual(sgd_op.type, "sgd")
def test_sgd_optimizer_with_global_step(self):
program = framework.Program()
block = program.global_block()
mul_x = block.create_parameter(
dtype="float32", shape=[5, 10], lod_level=0, name="mul.x")
mul_y = block.create_var(
dtype="float32", shape=[10, 8], lod_level=0, name="mul.y")
mul_out = block.create_var(
dtype="float32", shape=[5, 8], lod_level=0, name="mul.out")
block.append_op(
type="mul",
inputs={"X": mul_x,
"Y": mul_y},
outputs={"Out": mul_out},
attrs={"x_num_col_dims": 1})
global_step = block.create_var(
dtype="float32", shape=[1], lod_level=0, name="step")
sgd_optimizer = optimizer.SGDOptimizer(
learning_rate=0.01, global_step=global_step)
opts = sgd_optimizer.minimize(mul_out)
self.assertEqual(len(opts), 2)
sgd_op = opts[0]
self.assertEqual(sgd_op.type, "sgd")
increment_op = opts[1]
self.assertEqual(increment_op.type, "increment")
class TestMomentumOptimizer(unittest.TestCase):
class MockMomentum(optimizer.MomentumOptimizer):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册