提交 5383f3c4 编写于 作者: F fengjiayi

pass test_machine_translation.py

上级 4d59b5ac
......@@ -79,7 +79,7 @@ class Optimizer(object):
def minimize(self, loss, parameter_list):
"""Add operations to minimize `loss` by updating `parameter_list`.
This method combines interface `append_backward_ops()` and
This method combines interface `append_backward()` and
`create_optimization_pass()` into one.
"""
params_grads = self.create_backward_pass(loss, parameter_list)
......
......@@ -3,7 +3,7 @@ from . import core
import collections
import pdb
__all__ = ['append_backward_ops']
__all__ = ['append_backward']
def _rename_arg_(op_desc_list, old_name, new_name, begin_idx=None,
......@@ -57,11 +57,10 @@ def _append_grad_suffix_(name):
return name + core.grad_var_suffix()
def _backward_impl_(target,
def _append_backward_ops_(target,
block,
target_block,
no_grad_set,
grad_info_map,
callback=None):
grad_op_descs = []
grad_to_var = dict()
......@@ -71,11 +70,10 @@ def _backward_impl_(target,
if each_op.has_attr("sub_block"):
sub_block_idx = each_op.block_attr("sub_block")
sub_block = program.block(sub_block_idx)
original_block_idx = program.current_block_idx
grad_sub_block = program.create_block(parent_idx=sub_block_idx)
program.current_block_idx = original_block_idx
_backward_impl_(target, sub_block, grad_sub_block, no_grad_set,
grad_info_map, callback)
sub_grad_to_var = _append_backward_ops_(
target, sub_block, grad_sub_block, no_grad_set, callback)
grad_to_var = dict(grad_to_var, **sub_grad_to_var)
grad_sub_block_list.append(grad_sub_block.desc)
grad_op_desc, op_grad_to_var = core.get_grad_op_desc(
each_op.desc, no_grad_set[block.idx], grad_sub_block_list)
......@@ -143,20 +141,7 @@ def _backward_impl_(target,
"fill_zeros_like", {"X": [_strip_grad_suffix_(arg)]}, {"Y": [arg]},
{})
grad_op_descs.insert(ele[1], fill_zeros_like_op)
# create new gradient variables in the target block desc
new_vars = set()
for op_desc in grad_op_descs:
for grad_var_name in op_desc.output_arg_names():
grad_var_name = grad_var_name.encode("ascii")
if target_block.desc.has_var_recursive(
grad_var_name) or grad_var_name == core.empty_var_name():
continue
target_block.desc.var(grad_var_name)
new_vars.add(grad_var_name)
if not grad_to_var.has_key(grad_var_name):
continue
grad_info_map[grad_to_var[grad_var_name]] = (grad_var_name,
target_block)
if target_block.idx == 0:
grad_target_name = _append_grad_suffix_(target.name)
target_block.desc.var(grad_target_name.encode("ascii"))
......@@ -171,20 +156,40 @@ def _backward_impl_(target,
"value": 1.0,
"dtype": core.DataType.FP32
}))
# insert backward operators to target_block
for op_desc in grad_op_descs:
op_desc.infer_var_type(target_block.desc)
op_desc.infer_shape(target_block.desc)
for arg in op_desc.output_arg_names():
if arg in new_vars:
_infer_var_data_type_(arg, target_block)
new_op_desc = target_block.desc.append_op()
new_op_desc.copy_from(op_desc)
target_block.sync_with_cpp()
return grad_to_var
def _append_backward_vars_(block, start_op_idx, grad_to_var, grad_info_map):
for op_idx in range(start_op_idx, block.desc.op_size()):
op_desc = block.desc.op(op_idx)
if op_desc.has_attr("sub_block"):
sub_block = block.program.block(op_desc.block_attr("sub_block"))
_append_backward_vars_(sub_block, 0, grad_to_var, grad_info_map)
new_vars = set()
# create new gradient variables
for grad_var_name in op_desc.output_arg_names():
grad_var_name = grad_var_name.encode("ascii")
if block.desc.has_var_recursive(
grad_var_name) or grad_var_name == core.empty_var_name():
continue
block.desc.var(grad_var_name)
new_vars.add(grad_var_name)
if not grad_to_var.has_key(grad_var_name):
continue
grad_info_map[grad_to_var[grad_var_name]] = (grad_var_name, block)
# infer_shape and infer_type
op_desc.infer_var_type(block.desc)
op_desc.infer_shape(block.desc)
for arg in op_desc.output_arg_names():
if arg in new_vars:
_infer_var_data_type_(arg, block)
def append_backward_ops(loss, parameter_list=None, no_grad_set=None):
def append_backward(loss, parameter_list=None, no_grad_set=None):
"""
Create and add gradient Operators in BlockDesc to compute
gradients of `loss` for parameters in parameter_list
......@@ -201,9 +206,9 @@ def append_backward_ops(loss, parameter_list=None, no_grad_set=None):
"""
assert isinstance(loss, framework.Variable)
program = loss.block.program
if no_grad_set is None:
no_grad_set = dict()
program = loss.block.program
assert isinstance(program, framework.Program)
for block in program.blocks:
assert isinstance(block, framework.Block)
......@@ -215,14 +220,20 @@ def append_backward_ops(loss, parameter_list=None, no_grad_set=None):
no_grad_set[block.idx] = block_no_grad_set
grad_info_map = dict()
root_block = loss.block.program.block(0)
root_block = program.block(0)
_backward_impl_(loss, root_block, root_block, no_grad_set, grad_info_map)
fwd_op_num = root_block.desc.op_size()
current_block_idx = program.current_block_idx
grad_to_var = _append_backward_ops_(loss, root_block, root_block,
no_grad_set)
_append_backward_vars_(root_block, fwd_op_num, grad_to_var, grad_info_map)
program.current_block_idx = current_block_idx
program.sync_with_cpp()
if parameter_list is not None:
parameters = parameter_list
else:
params = loss.block.program.global_block().all_parameters()
params = program.global_block().all_parameters()
parameters = [param.name for param in params]
params_and_grads = []
for param in parameters:
......@@ -234,7 +245,7 @@ def append_backward_ops(loss, parameter_list=None, no_grad_set=None):
raise ValueError("grad block[{0}] did not have grad var {1}".format(
grad_info[1], grad_info[0]))
# Get the param var from the global block
param_var = loss.block.program.global_block().var(param)
param_var = program.global_block().var(param)
grad_var = grad_block.var(grad_info[0])
if loss.block.has_var(grad_info[0]):
params_and_grads.append((param_var, grad_var))
......
from collections import defaultdict
import framework
from backward import append_backward_ops
from backward import append_backward
from framework import unique_name
from initializer import Constant
from layer_helper import LayerHelper
......@@ -195,10 +195,10 @@ class Optimizer(object):
no_grad_set=None):
"""Add operations to minimize `loss` by updating `parameter_list`.
This method combines interface `append_backward_ops()` and
This method combines interface `append_backward()` and
`create_optimization_pass()` into one.
"""
params_grads = append_backward_ops(loss, parameter_list, no_grad_set)
params_grads = append_backward(loss, parameter_list, no_grad_set)
# Add regularization if any
params_grads = append_regularization_ops(params_grads,
self.regularization)
......
......@@ -4,7 +4,7 @@ import random
import itertools
import paddle.v2.fluid.core as core
import collections
from paddle.v2.fluid.backward import append_backward_ops
from paddle.v2.fluid.backward import append_backward
from paddle.v2.fluid.op import Operator
from paddle.v2.fluid.executor import Executor
from paddle.v2.fluid.framework import Program, OpProtoHolder
......@@ -493,7 +493,7 @@ class OpTest(unittest.TestCase):
op_loss.desc.infer_var_type(block.desc)
op_loss.desc.infer_shape(block.desc)
param_grad_list = append_backward_ops(
param_grad_list = append_backward(
loss=loss, parameter_list=input_to_check, no_grad_set=no_grad_set)
feed_dict = {
......
......@@ -2,7 +2,7 @@ import unittest
import paddle.v2.fluid.core as core
import paddle.v2.fluid.layers as layers
from paddle.v2.fluid.executor import Executor
from paddle.v2.fluid.backward import append_backward_ops
from paddle.v2.fluid.backward import append_backward
from paddle.v2.fluid.framework import default_main_program
import numpy
......@@ -64,7 +64,7 @@ class TestArrayReadWrite(unittest.TestCase):
total_sum = layers.sums(input=[a_sum, x_sum])
total_sum_scaled = layers.scale(x=total_sum, scale=1 / 6.0)
append_backward_ops(total_sum_scaled)
append_backward(total_sum_scaled)
g_vars = map(default_main_program().global_block().var,
[each_x.name + "@GRAD" for each_x in x])
......
......@@ -3,7 +3,7 @@ import paddle.v2.fluid.layers as layers
import paddle.v2.fluid.core as core
from paddle.v2.fluid.framework import default_startup_program, default_main_program
from paddle.v2.fluid.executor import Executor
from paddle.v2.fluid.backward import append_backward_ops
from paddle.v2.fluid.backward import append_backward
import numpy
......@@ -26,7 +26,7 @@ class ConditionalBlock(unittest.TestCase):
outs = exe.run(feed={'X': x}, fetch_list=[out])[0]
print outs
loss = layers.mean(x=out)
append_backward_ops(loss=loss)
append_backward(loss=loss)
outs = exe.run(
feed={'X': x},
fetch_list=[
......
......@@ -4,7 +4,7 @@ import numpy
import paddle.v2.fluid.layers as layers
from paddle.v2.fluid.framework import Program
from paddle.v2.fluid.executor import Executor
from paddle.v2.fluid.backward import append_backward_ops
from paddle.v2.fluid.backward import append_backward
class TestCPULoDTensorArrayOps(unittest.TestCase):
......@@ -172,7 +172,7 @@ class TestCPULoDTensorArrayOpGrad(unittest.TestCase):
mean = layers.mean(x=result, main_program=program)
append_backward_ops(mean)
append_backward(mean)
tensor = core.LoDTensor()
tensor.set(numpy.arange(10).reshape(10, 1).astype('float32'), place)
......
......@@ -2,7 +2,7 @@ import unittest
import paddle.v2.fluid.framework as framework
import paddle.v2.fluid.optimizer as optimizer
from paddle.v2.fluid.backward import append_backward_ops
from paddle.v2.fluid.backward import append_backward
class TestOptimizer(unittest.TestCase):
......@@ -102,7 +102,7 @@ class TestMomentumOptimizer(unittest.TestCase):
dtype="float32", shape=[1], lod_level=0, name="mean.out")
block.append_op(
type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out})
params_grads = append_backward_ops(mean_out)
params_grads = append_backward(mean_out)
self.assertEqual(len(params_grads), 1)
self.assertEqual(len(momentum_optimizer.get_accumulators()), 0)
opts = momentum_optimizer.create_optimization_pass(
......@@ -151,7 +151,7 @@ class TestMomentumOptimizer(unittest.TestCase):
learning_rate = 0.01
momentum_optimizer = self.MockMomentum(
learning_rate=learning_rate, momentum=0.2, use_nesterov=True)
params_grads = append_backward_ops(mean_out)
params_grads = append_backward(mean_out)
self.assertEqual(len(params_grads), 1)
self.assertEqual(len(momentum_optimizer.get_accumulators()), 0)
opts = momentum_optimizer.create_optimization_pass(
......@@ -209,7 +209,7 @@ class TestAdagradOptimizer(unittest.TestCase):
learning_rate = 0.01
adagrad_optimizer = self.MockAdagrad(
learning_rate=learning_rate, epsilon=1.0e-6)
params_grads = append_backward_ops(mean_out)
params_grads = append_backward(mean_out)
self.assertEqual(len(params_grads), 1)
self.assertEqual(len(adagrad_optimizer.get_accumulators()), 0)
opts = adagrad_optimizer.create_optimization_pass(params_grads, mul_out,
......@@ -269,7 +269,7 @@ class TestAdamOptimizer(unittest.TestCase):
learning_rate = 0.01
adam_optimizer = self.MockAdam(
learning_rate=learning_rate, beta1=0.9, beta2=0.999)
params_grads = append_backward_ops(mean_out)
params_grads = append_backward(mean_out)
self.assertEqual(len(params_grads), 1)
self.assertEqual(len(adam_optimizer.get_accumulators()), 0)
opts = adam_optimizer.create_optimization_pass(params_grads, mul_out,
......@@ -331,7 +331,7 @@ class TestAdamaxOptimizer(unittest.TestCase):
learning_rate = 0.01
adamax_optimizer = self.MockAdamax(
learning_rate=learning_rate, beta1=0.9, beta2=0.999)
params_grads = append_backward_ops(mean_out)
params_grads = append_backward(mean_out)
self.assertEqual(len(params_grads), 1)
self.assertEqual(len(adamax_optimizer.get_accumulators()), 0)
opts = adamax_optimizer.create_optimization_pass(params_grads, mul_out,
......@@ -390,7 +390,7 @@ class TestDecayedAdagradOptimizer(unittest.TestCase):
learning_rate = 0.01
decayed_adagrad_optimizer = self.MockDecayedAdagrad(
learning_rate=learning_rate, decay=0.95, epsilon=1.0e-6)
params_grads = append_backward_ops(mean_out)
params_grads = append_backward(mean_out)
self.assertEqual(len(params_grads), 1)
self.assertEqual(len(decayed_adagrad_optimizer.get_accumulators()), 0)
opts = decayed_adagrad_optimizer.create_optimization_pass(
......
......@@ -3,7 +3,7 @@ import unittest
import paddle.v2.fluid.layers as layers
from paddle.v2.fluid.framework import Program
from paddle.v2.fluid.executor import Executor
from paddle.v2.fluid.backward import append_backward_ops
from paddle.v2.fluid.backward import append_backward
import numpy as np
import paddle.v2.fluid.core as core
......@@ -177,7 +177,7 @@ class RecurrentOpTest1(unittest.TestCase):
def test_backward(self):
self.check_forward()
append_backward_ops(self.output)
append_backward(self.output)
ana_grad = [np.array(x) for x in self.backward()]
......
......@@ -3,7 +3,7 @@ import unittest
import paddle.v2.fluid.framework as framework
import paddle.v2.fluid.optimizer as optimizer
import paddle.v2.fluid.regularizer as regularizer
from paddle.v2.fluid.backward import append_backward_ops
from paddle.v2.fluid.backward import append_backward
class TestL2DecayRegularizer(unittest.TestCase):
......@@ -33,7 +33,7 @@ class TestL2DecayRegularizer(unittest.TestCase):
dtype="float32", shape=[1], lod_level=0, name="mean.out")
block.append_op(
type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out})
params_grads = append_backward_ops(mean_out)
params_grads = append_backward(mean_out)
self.assertEqual(len(params_grads), 1)
count_ops = len(block.ops)
params_grads = optimizer.append_regularization_ops(params_grads)
......@@ -70,7 +70,7 @@ class TestL1DecayRegularizer(unittest.TestCase):
dtype="float32", shape=[1], lod_level=0, name="mean.out")
block.append_op(
type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out})
params_grads = append_backward_ops(mean_out)
params_grads = append_backward(mean_out)
self.assertEqual(len(params_grads), 1)
count_ops = len(block.ops)
params_grads = optimizer.append_regularization_ops(params_grads)
......
......@@ -2,7 +2,7 @@ import unittest
from paddle.v2.fluid.framework import Program
from paddle.v2.fluid.executor import Executor
from paddle.v2.fluid.backward import append_backward_ops
from paddle.v2.fluid.backward import append_backward
import numpy as np
import paddle.v2.fluid.core as core
......
......@@ -2,7 +2,7 @@ import unittest
import paddle.v2.fluid.core as core
from paddle.v2.fluid.executor import Executor
import paddle.v2.fluid.layers as layers
from paddle.v2.fluid.backward import append_backward_ops
from paddle.v2.fluid.backward import append_backward
from paddle.v2.fluid.framework import default_main_program
import numpy
......@@ -35,7 +35,7 @@ class TestShrinkRNNMemory(unittest.TestCase):
self.assertTrue(numpy.allclose(tensor_np[0:1], outs[2]))
mem3_mean = layers.mean(x=mem3)
append_backward_ops(loss=mem3_mean)
append_backward(loss=mem3_mean)
x_grad = exe.run(
feed={'x': tensor},
fetch_list=[main_program.global_block().var('x@GRAD')])[0]
......
......@@ -4,7 +4,7 @@ import numpy as np
import paddle.v2.fluid.layers as layers
from paddle.v2.fluid.framework import Program
from paddle.v2.fluid.executor import Executor
from paddle.v2.fluid.backward import append_backward_ops
from paddle.v2.fluid.backward import append_backward
class TestCPULoDTensorArrayOps(unittest.TestCase):
......@@ -150,7 +150,7 @@ class TestCPUSplitMergeLoDTensorGrad(unittest.TestCase):
main_program=program)
mean = layers.mean(x=out, main_program=program)
append_backward_ops(mean)
append_backward(mean)
tensor = core.LoDTensor()
tensor.set(np.arange(10).reshape(10, 1).astype('float32'), place)
......
......@@ -2,7 +2,7 @@ import unittest
import paddle.v2.fluid.layers as layers
from paddle.v2.fluid.executor import Executor
import paddle.v2.fluid.core as core
from paddle.v2.fluid.backward import append_backward_ops
from paddle.v2.fluid.backward import append_backward
import numpy
......@@ -46,7 +46,7 @@ class TestWhileOp(unittest.TestCase):
sum_result = layers.array_read(array=mem_array, i=i)
loss = layers.mean(x=sum_result)
append_backward_ops(loss)
append_backward(loss)
cpu = core.CPUPlace()
exe = Executor(cpu)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册