提交 dd0008d5 编写于 作者: Y Yu Yang 提交者: GitHub

Extract apply_backward_pass to backward.py (#5026)

* Extract apply_backward_pass to backward.py

Rename apply_backward_pass to append_backward_ops

* Fix CI

* Update design doc
上级 fd2eb550
......@@ -65,20 +65,6 @@ class Optimizer(object):
def __init__(self):
pass
def create_backward_pass(self, loss, parameter_list=None):
"""
create and add gradient Operators in BlockDesc to Compute gradients of `loss`
for parameters in parameter_list
Args:
loss: an variable generated by cost function.
parameter_list: parameters that need to compute gradient and update to optimize the lost.
Returns:
list of (parameters, gradients) pair.
"""
return None
def create_optimization_pass(self, parameters_and_grads):
"""Add optimization operators to update gradients to variables.
......@@ -93,7 +79,7 @@ class Optimizer(object):
def minimize(self, loss, parameter_list):
"""Add operations to minimize `loss` by updating `parameter_list`.
This method combines interface `create_backward_pass()` and
This method combines interface `append_backward_ops()` and
`create_optimization_pass()` into one.
"""
params_grads = self.create_backward_pass(loss, parameter_list)
......
from paddle.v2.framework import framework as framework
__all__ = ['append_backward_ops']
def append_backward_ops(loss, parameter_list=None, no_grad_set=None):
"""
Create and add gradient Operators in BlockDesc to compute
gradients of `loss` for parameters in parameter_list
:param loss: an variable generated by cost function.
:type loss: Variable
:param no_grad_set: variable that should not create gradient
:type no_grad_set: set
:param parameter_list: parameters that need to compute gradient and
update to optimize the lost.
:type: list
:return: list of (parameters, gradients) pair.
:rtype: list[Variable]
"""
assert isinstance(loss, framework.Variable)
param_grad_map = loss.block.program.append_backward(loss, no_grad_set or
set())
if parameter_list is not None:
parameters = parameter_list
else:
params = loss.block.program.global_block().all_parameters()
parameters = [param.name for param in params]
params_and_grads = []
for param in parameters:
if param not in param_grad_map:
raise ValueError("param %s is not in map" % param)
grad_info = param_grad_map[param]
grad_block = loss.block.program.block(grad_info[1])
if not grad_block.has_var(grad_info[0]):
raise ValueError("grad block[{0}] did not have grad var {1}".format(
grad_info[1], grad_info[0]))
# Get the param var from the global block
param_var = loss.block.program.global_block().var(param)
grad_var = grad_block.var(grad_info[0])
if loss.block.has_var(grad_info[0]):
params_and_grads.append((param_var, grad_var))
else:
params_and_grads.append((param_var, None))
return params_and_grads
import paddle.v2.framework.framework as framework
from collections import defaultdict
import paddle.v2.framework.framework as framework
from paddle.v2.framework.backward import append_backward_ops
__all__ = [
'SGDOptimizer', 'MomentumOptimizer', 'AdagradOptimizer', 'AdamOptimizer'
]
......@@ -105,45 +107,6 @@ class Optimizer(object):
format(name, param.name))
return self._accumulators[name][param.name]
def create_backward_pass(self, loss, parameter_list=None, no_grad_set=None):
"""Create and add gradient Operators in BlockDesc to compute
gradients of `loss` for parameters in parameter_list
Args:
loss: an variable generated by cost function.
no_grad_set: variable that should not create gradient
parameter_list: parameters that need to compute gradient and
update to optimize the lost.
Returns:
list of (parameters, gradients) pair.
"""
assert isinstance(loss, framework.Variable)
param_grad_map = loss.block.program.append_backward(loss, no_grad_set or
set())
if parameter_list is not None:
parameters = parameter_list
else:
params = loss.block.program.global_block().all_parameters()
parameters = [param.name for param in params]
params_and_grads = []
for param in parameters:
if param not in param_grad_map:
raise Exception("param %s is not in map" % param)
grad_info = param_grad_map[param]
grad_block = loss.block.program.block(grad_info[1])
if not grad_block.has_var(grad_info[0]):
raise Exception("grad block[%d] did not have grad var %s" %
grad_info[1], grad_info[0])
# Get the param var from the global block
param_var = loss.block.program.global_block().var(param)
grad_var = grad_block.var(grad_info[0])
if loss.block.has_var(grad_info[0]):
params_and_grads.append((param_var, grad_var))
else:
params_and_grads.append((param_var, None))
return params_and_grads
def create_optimization_pass(self, parameters_and_grads, loss):
"""Add optimization operators to update gradients to variables.
......@@ -192,11 +155,11 @@ class Optimizer(object):
def minimize(self, loss, parameter_list=None, no_grad_set=None):
"""Add operations to minimize `loss` by updating `parameter_list`.
This method combines interface `create_backward_pass()` and
This method combines interface `append_backward_ops()` and
`create_optimization_pass()` into one.
"""
params_grads = self.create_backward_pass(loss, parameter_list,
no_grad_set or set())
params_grads = append_backward_ops(loss, parameter_list, no_grad_set or
set())
optimize_ops = self.create_optimization_pass(params_grads, loss)
return optimize_ops
......
......@@ -2,6 +2,7 @@ import unittest
import paddle.v2.framework.framework as framework
import paddle.v2.framework.optimizer as optimizer
from paddle.v2.framework.backward import append_backward_ops
class TestOptimizer(unittest.TestCase):
......@@ -51,7 +52,7 @@ class TestMomentumOptimizer(unittest.TestCase):
outputs={"Out": mul_out},
attrs={"x_num_col_dims": 1})
momentum_optimizer = self.MockMomentum(learning_rate=0.01, momentum=0.2)
params_grads = momentum_optimizer.create_backward_pass(mul_out)
params_grads = append_backward_ops(mul_out)
self.assertEqual(len(params_grads), 1)
self.assertEqual(len(momentum_optimizer.get_accumulators()), 0)
opts = momentum_optimizer.create_optimization_pass(params_grads,
......@@ -93,7 +94,7 @@ class TestAdagradOptimizer(unittest.TestCase):
outputs={"Out": mul_out},
attrs={"x_num_col_dims": 1})
adagrad_optimizer = self.MockAdagrad(learning_rate=0.01, epsilon=1.0e-6)
params_grads = adagrad_optimizer.create_backward_pass(mul_out)
params_grads = append_backward_ops(mul_out)
self.assertEqual(len(params_grads), 1)
self.assertEqual(len(adagrad_optimizer.get_accumulators()), 0)
opts = adagrad_optimizer.create_optimization_pass(params_grads, mul_out)
......@@ -138,7 +139,7 @@ class TestAdamOptimizer(unittest.TestCase):
attrs={"x_num_col_dims": 1})
adam_optimizer = self.MockAdam(
learning_rate=0.01, beta1=0.9, beta2=0.999)
params_grads = adam_optimizer.create_backward_pass(mul_out)
params_grads = append_backward_ops(mul_out)
self.assertEqual(len(params_grads), 1)
self.assertEqual(len(adam_optimizer.get_accumulators()), 0)
opts = adam_optimizer.create_optimization_pass(params_grads, mul_out)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册