提交 dd0008d5 编写于 作者: Y Yu Yang 提交者: GitHub

Extract apply_backward_pass to backward.py (#5026)

* Extract apply_backward_pass to backward.py

Rename apply_backward_pass to append_backward_ops

* Fix CI

* Update design doc
上级 fd2eb550
...@@ -65,20 +65,6 @@ class Optimizer(object): ...@@ -65,20 +65,6 @@ class Optimizer(object):
def __init__(self): def __init__(self):
pass pass
def create_backward_pass(self, loss, parameter_list=None):
"""
create and add gradient Operators in BlockDesc to Compute gradients of `loss`
for parameters in parameter_list
Args:
loss: an variable generated by cost function.
parameter_list: parameters that need to compute gradient and update to optimize the lost.
Returns:
list of (parameters, gradients) pair.
"""
return None
def create_optimization_pass(self, parameters_and_grads): def create_optimization_pass(self, parameters_and_grads):
"""Add optimization operators to update gradients to variables. """Add optimization operators to update gradients to variables.
...@@ -93,7 +79,7 @@ class Optimizer(object): ...@@ -93,7 +79,7 @@ class Optimizer(object):
def minimize(self, loss, parameter_list): def minimize(self, loss, parameter_list):
"""Add operations to minimize `loss` by updating `parameter_list`. """Add operations to minimize `loss` by updating `parameter_list`.
This method combines interface `create_backward_pass()` and This method combines interface `append_backward_ops()` and
`create_optimization_pass()` into one. `create_optimization_pass()` into one.
""" """
params_grads = self.create_backward_pass(loss, parameter_list) params_grads = self.create_backward_pass(loss, parameter_list)
......
from paddle.v2.framework import framework as framework
__all__ = ['append_backward_ops']
def append_backward_ops(loss, parameter_list=None, no_grad_set=None):
"""
Create and add gradient Operators in BlockDesc to compute
gradients of `loss` for parameters in parameter_list
:param loss: an variable generated by cost function.
:type loss: Variable
:param no_grad_set: variable that should not create gradient
:type no_grad_set: set
:param parameter_list: parameters that need to compute gradient and
update to optimize the lost.
:type: list
:return: list of (parameters, gradients) pair.
:rtype: list[Variable]
"""
assert isinstance(loss, framework.Variable)
param_grad_map = loss.block.program.append_backward(loss, no_grad_set or
set())
if parameter_list is not None:
parameters = parameter_list
else:
params = loss.block.program.global_block().all_parameters()
parameters = [param.name for param in params]
params_and_grads = []
for param in parameters:
if param not in param_grad_map:
raise ValueError("param %s is not in map" % param)
grad_info = param_grad_map[param]
grad_block = loss.block.program.block(grad_info[1])
if not grad_block.has_var(grad_info[0]):
raise ValueError("grad block[{0}] did not have grad var {1}".format(
grad_info[1], grad_info[0]))
# Get the param var from the global block
param_var = loss.block.program.global_block().var(param)
grad_var = grad_block.var(grad_info[0])
if loss.block.has_var(grad_info[0]):
params_and_grads.append((param_var, grad_var))
else:
params_and_grads.append((param_var, None))
return params_and_grads
import paddle.v2.framework.framework as framework
from collections import defaultdict from collections import defaultdict
import paddle.v2.framework.framework as framework
from paddle.v2.framework.backward import append_backward_ops
__all__ = [ __all__ = [
'SGDOptimizer', 'MomentumOptimizer', 'AdagradOptimizer', 'AdamOptimizer' 'SGDOptimizer', 'MomentumOptimizer', 'AdagradOptimizer', 'AdamOptimizer'
] ]
...@@ -105,45 +107,6 @@ class Optimizer(object): ...@@ -105,45 +107,6 @@ class Optimizer(object):
format(name, param.name)) format(name, param.name))
return self._accumulators[name][param.name] return self._accumulators[name][param.name]
def create_backward_pass(self, loss, parameter_list=None, no_grad_set=None):
"""Create and add gradient Operators in BlockDesc to compute
gradients of `loss` for parameters in parameter_list
Args:
loss: an variable generated by cost function.
no_grad_set: variable that should not create gradient
parameter_list: parameters that need to compute gradient and
update to optimize the lost.
Returns:
list of (parameters, gradients) pair.
"""
assert isinstance(loss, framework.Variable)
param_grad_map = loss.block.program.append_backward(loss, no_grad_set or
set())
if parameter_list is not None:
parameters = parameter_list
else:
params = loss.block.program.global_block().all_parameters()
parameters = [param.name for param in params]
params_and_grads = []
for param in parameters:
if param not in param_grad_map:
raise Exception("param %s is not in map" % param)
grad_info = param_grad_map[param]
grad_block = loss.block.program.block(grad_info[1])
if not grad_block.has_var(grad_info[0]):
raise Exception("grad block[%d] did not have grad var %s" %
grad_info[1], grad_info[0])
# Get the param var from the global block
param_var = loss.block.program.global_block().var(param)
grad_var = grad_block.var(grad_info[0])
if loss.block.has_var(grad_info[0]):
params_and_grads.append((param_var, grad_var))
else:
params_and_grads.append((param_var, None))
return params_and_grads
def create_optimization_pass(self, parameters_and_grads, loss): def create_optimization_pass(self, parameters_and_grads, loss):
"""Add optimization operators to update gradients to variables. """Add optimization operators to update gradients to variables.
...@@ -192,11 +155,11 @@ class Optimizer(object): ...@@ -192,11 +155,11 @@ class Optimizer(object):
def minimize(self, loss, parameter_list=None, no_grad_set=None): def minimize(self, loss, parameter_list=None, no_grad_set=None):
"""Add operations to minimize `loss` by updating `parameter_list`. """Add operations to minimize `loss` by updating `parameter_list`.
This method combines interface `create_backward_pass()` and This method combines interface `append_backward_ops()` and
`create_optimization_pass()` into one. `create_optimization_pass()` into one.
""" """
params_grads = self.create_backward_pass(loss, parameter_list, params_grads = append_backward_ops(loss, parameter_list, no_grad_set or
no_grad_set or set()) set())
optimize_ops = self.create_optimization_pass(params_grads, loss) optimize_ops = self.create_optimization_pass(params_grads, loss)
return optimize_ops return optimize_ops
......
...@@ -2,6 +2,7 @@ import unittest ...@@ -2,6 +2,7 @@ import unittest
import paddle.v2.framework.framework as framework import paddle.v2.framework.framework as framework
import paddle.v2.framework.optimizer as optimizer import paddle.v2.framework.optimizer as optimizer
from paddle.v2.framework.backward import append_backward_ops
class TestOptimizer(unittest.TestCase): class TestOptimizer(unittest.TestCase):
...@@ -51,7 +52,7 @@ class TestMomentumOptimizer(unittest.TestCase): ...@@ -51,7 +52,7 @@ class TestMomentumOptimizer(unittest.TestCase):
outputs={"Out": mul_out}, outputs={"Out": mul_out},
attrs={"x_num_col_dims": 1}) attrs={"x_num_col_dims": 1})
momentum_optimizer = self.MockMomentum(learning_rate=0.01, momentum=0.2) momentum_optimizer = self.MockMomentum(learning_rate=0.01, momentum=0.2)
params_grads = momentum_optimizer.create_backward_pass(mul_out) params_grads = append_backward_ops(mul_out)
self.assertEqual(len(params_grads), 1) self.assertEqual(len(params_grads), 1)
self.assertEqual(len(momentum_optimizer.get_accumulators()), 0) self.assertEqual(len(momentum_optimizer.get_accumulators()), 0)
opts = momentum_optimizer.create_optimization_pass(params_grads, opts = momentum_optimizer.create_optimization_pass(params_grads,
...@@ -93,7 +94,7 @@ class TestAdagradOptimizer(unittest.TestCase): ...@@ -93,7 +94,7 @@ class TestAdagradOptimizer(unittest.TestCase):
outputs={"Out": mul_out}, outputs={"Out": mul_out},
attrs={"x_num_col_dims": 1}) attrs={"x_num_col_dims": 1})
adagrad_optimizer = self.MockAdagrad(learning_rate=0.01, epsilon=1.0e-6) adagrad_optimizer = self.MockAdagrad(learning_rate=0.01, epsilon=1.0e-6)
params_grads = adagrad_optimizer.create_backward_pass(mul_out) params_grads = append_backward_ops(mul_out)
self.assertEqual(len(params_grads), 1) self.assertEqual(len(params_grads), 1)
self.assertEqual(len(adagrad_optimizer.get_accumulators()), 0) self.assertEqual(len(adagrad_optimizer.get_accumulators()), 0)
opts = adagrad_optimizer.create_optimization_pass(params_grads, mul_out) opts = adagrad_optimizer.create_optimization_pass(params_grads, mul_out)
...@@ -138,7 +139,7 @@ class TestAdamOptimizer(unittest.TestCase): ...@@ -138,7 +139,7 @@ class TestAdamOptimizer(unittest.TestCase):
attrs={"x_num_col_dims": 1}) attrs={"x_num_col_dims": 1})
adam_optimizer = self.MockAdam( adam_optimizer = self.MockAdam(
learning_rate=0.01, beta1=0.9, beta2=0.999) learning_rate=0.01, beta1=0.9, beta2=0.999)
params_grads = adam_optimizer.create_backward_pass(mul_out) params_grads = append_backward_ops(mul_out)
self.assertEqual(len(params_grads), 1) self.assertEqual(len(params_grads), 1)
self.assertEqual(len(adam_optimizer.get_accumulators()), 0) self.assertEqual(len(adam_optimizer.get_accumulators()), 0)
opts = adam_optimizer.create_optimization_pass(params_grads, mul_out) opts = adam_optimizer.create_optimization_pass(params_grads, mul_out)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册