From dd0008d57f94b2b1db217e69ff6a4bd25812e739 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Tue, 24 Oct 2017 14:41:26 -0700 Subject: [PATCH] Extract apply_backward_pass to backward.py (#5026) * Extract apply_backward_pass to backward.py Rename apply_backward_pass to append_backward_ops * Fix CI * Update design doc --- doc/design/optimizer.md | 16 +----- python/paddle/v2/framework/backward.py | 45 +++++++++++++++++ python/paddle/v2/framework/optimizer.py | 49 +++---------------- .../v2/framework/tests/test_optimizer.py | 7 +-- 4 files changed, 56 insertions(+), 61 deletions(-) create mode 100644 python/paddle/v2/framework/backward.py diff --git a/doc/design/optimizer.md b/doc/design/optimizer.md index 17440fae502..202b4b65103 100644 --- a/doc/design/optimizer.md +++ b/doc/design/optimizer.md @@ -65,20 +65,6 @@ class Optimizer(object): def __init__(self): pass - def create_backward_pass(self, loss, parameter_list=None): - """ - create and add gradient Operators in BlockDesc to Compute gradients of `loss` - for parameters in parameter_list - - Args: - loss: an variable generated by cost function. - parameter_list: parameters that need to compute gradient and update to optimize the lost. - - Returns: - list of (parameters, gradients) pair. - """ - return None - def create_optimization_pass(self, parameters_and_grads): """Add optimization operators to update gradients to variables. @@ -93,7 +79,7 @@ class Optimizer(object): def minimize(self, loss, parameter_list): """Add operations to minimize `loss` by updating `parameter_list`. - This method combines interface `create_backward_pass()` and + This method combines interface `append_backward_ops()` and `create_optimization_pass()` into one. """ params_grads = self.create_backward_pass(loss, parameter_list) diff --git a/python/paddle/v2/framework/backward.py b/python/paddle/v2/framework/backward.py new file mode 100644 index 00000000000..6827792cb35 --- /dev/null +++ b/python/paddle/v2/framework/backward.py @@ -0,0 +1,45 @@ +from paddle.v2.framework import framework as framework + +__all__ = ['append_backward_ops'] + + +def append_backward_ops(loss, parameter_list=None, no_grad_set=None): + """ + Create and add gradient Operators in BlockDesc to compute + gradients of `loss` for parameters in parameter_list + + :param loss: an variable generated by cost function. + :type loss: Variable + :param no_grad_set: variable that should not create gradient + :type no_grad_set: set + :param parameter_list: parameters that need to compute gradient and + update to optimize the lost. + :type: list + :return: list of (parameters, gradients) pair. + :rtype: list[Variable] + """ + assert isinstance(loss, framework.Variable) + param_grad_map = loss.block.program.append_backward(loss, no_grad_set or + set()) + if parameter_list is not None: + parameters = parameter_list + else: + params = loss.block.program.global_block().all_parameters() + parameters = [param.name for param in params] + params_and_grads = [] + for param in parameters: + if param not in param_grad_map: + raise ValueError("param %s is not in map" % param) + grad_info = param_grad_map[param] + grad_block = loss.block.program.block(grad_info[1]) + if not grad_block.has_var(grad_info[0]): + raise ValueError("grad block[{0}] did not have grad var {1}".format( + grad_info[1], grad_info[0])) + # Get the param var from the global block + param_var = loss.block.program.global_block().var(param) + grad_var = grad_block.var(grad_info[0]) + if loss.block.has_var(grad_info[0]): + params_and_grads.append((param_var, grad_var)) + else: + params_and_grads.append((param_var, None)) + return params_and_grads diff --git a/python/paddle/v2/framework/optimizer.py b/python/paddle/v2/framework/optimizer.py index f7d35ca0658..a86908c6489 100644 --- a/python/paddle/v2/framework/optimizer.py +++ b/python/paddle/v2/framework/optimizer.py @@ -1,6 +1,8 @@ -import paddle.v2.framework.framework as framework from collections import defaultdict +import paddle.v2.framework.framework as framework +from paddle.v2.framework.backward import append_backward_ops + __all__ = [ 'SGDOptimizer', 'MomentumOptimizer', 'AdagradOptimizer', 'AdamOptimizer' ] @@ -105,45 +107,6 @@ class Optimizer(object): format(name, param.name)) return self._accumulators[name][param.name] - def create_backward_pass(self, loss, parameter_list=None, no_grad_set=None): - """Create and add gradient Operators in BlockDesc to compute - gradients of `loss` for parameters in parameter_list - - Args: - loss: an variable generated by cost function. - no_grad_set: variable that should not create gradient - parameter_list: parameters that need to compute gradient and - update to optimize the lost. - - Returns: - list of (parameters, gradients) pair. - """ - assert isinstance(loss, framework.Variable) - param_grad_map = loss.block.program.append_backward(loss, no_grad_set or - set()) - if parameter_list is not None: - parameters = parameter_list - else: - params = loss.block.program.global_block().all_parameters() - parameters = [param.name for param in params] - params_and_grads = [] - for param in parameters: - if param not in param_grad_map: - raise Exception("param %s is not in map" % param) - grad_info = param_grad_map[param] - grad_block = loss.block.program.block(grad_info[1]) - if not grad_block.has_var(grad_info[0]): - raise Exception("grad block[%d] did not have grad var %s" % - grad_info[1], grad_info[0]) - # Get the param var from the global block - param_var = loss.block.program.global_block().var(param) - grad_var = grad_block.var(grad_info[0]) - if loss.block.has_var(grad_info[0]): - params_and_grads.append((param_var, grad_var)) - else: - params_and_grads.append((param_var, None)) - return params_and_grads - def create_optimization_pass(self, parameters_and_grads, loss): """Add optimization operators to update gradients to variables. @@ -192,11 +155,11 @@ class Optimizer(object): def minimize(self, loss, parameter_list=None, no_grad_set=None): """Add operations to minimize `loss` by updating `parameter_list`. - This method combines interface `create_backward_pass()` and + This method combines interface `append_backward_ops()` and `create_optimization_pass()` into one. """ - params_grads = self.create_backward_pass(loss, parameter_list, - no_grad_set or set()) + params_grads = append_backward_ops(loss, parameter_list, no_grad_set or + set()) optimize_ops = self.create_optimization_pass(params_grads, loss) return optimize_ops diff --git a/python/paddle/v2/framework/tests/test_optimizer.py b/python/paddle/v2/framework/tests/test_optimizer.py index 4b267598efb..eb5d49bcbaf 100644 --- a/python/paddle/v2/framework/tests/test_optimizer.py +++ b/python/paddle/v2/framework/tests/test_optimizer.py @@ -2,6 +2,7 @@ import unittest import paddle.v2.framework.framework as framework import paddle.v2.framework.optimizer as optimizer +from paddle.v2.framework.backward import append_backward_ops class TestOptimizer(unittest.TestCase): @@ -51,7 +52,7 @@ class TestMomentumOptimizer(unittest.TestCase): outputs={"Out": mul_out}, attrs={"x_num_col_dims": 1}) momentum_optimizer = self.MockMomentum(learning_rate=0.01, momentum=0.2) - params_grads = momentum_optimizer.create_backward_pass(mul_out) + params_grads = append_backward_ops(mul_out) self.assertEqual(len(params_grads), 1) self.assertEqual(len(momentum_optimizer.get_accumulators()), 0) opts = momentum_optimizer.create_optimization_pass(params_grads, @@ -93,7 +94,7 @@ class TestAdagradOptimizer(unittest.TestCase): outputs={"Out": mul_out}, attrs={"x_num_col_dims": 1}) adagrad_optimizer = self.MockAdagrad(learning_rate=0.01, epsilon=1.0e-6) - params_grads = adagrad_optimizer.create_backward_pass(mul_out) + params_grads = append_backward_ops(mul_out) self.assertEqual(len(params_grads), 1) self.assertEqual(len(adagrad_optimizer.get_accumulators()), 0) opts = adagrad_optimizer.create_optimization_pass(params_grads, mul_out) @@ -138,7 +139,7 @@ class TestAdamOptimizer(unittest.TestCase): attrs={"x_num_col_dims": 1}) adam_optimizer = self.MockAdam( learning_rate=0.01, beta1=0.9, beta2=0.999) - params_grads = adam_optimizer.create_backward_pass(mul_out) + params_grads = append_backward_ops(mul_out) self.assertEqual(len(params_grads), 1) self.assertEqual(len(adam_optimizer.get_accumulators()), 0) opts = adam_optimizer.create_optimization_pass(params_grads, mul_out) -- GitLab