未验证 提交 569f7c47 编写于 作者: Q Qiao Longfei 提交者: GitHub

enforce shape of backward target to be {1} (#5745)

* enforce shape of backward target to be {1}

* fix test_regularizer.py

* rm unused code

* fix backward_test

* fix a type bug

* fix test_program
上级 6cfcf624
......@@ -513,19 +513,14 @@ ParamGradInfoMap AppendBackward(
const int root_block_idx = 0;
auto root_block = program_desc.MutableBlock(root_block_idx);
// insert fill one op for target
// TODO(qiao) add some check to the target.
std::string fill_one_op_out = GradVarName(target.Name());
std::vector<int64_t> target_shape_desc = target.Shape();
std::vector<int> target_shape;
std::transform(target_shape_desc.begin(), target_shape_desc.end(),
std::back_inserter(target_shape),
[](int64_t dim) { return static_cast<int>(dim); });
bool is_scalar = target.Shape() == std::vector<int64_t>{1};
PADDLE_ENFORCE(is_scalar, "target should be scalar");
VLOG(3) << "backward from loss=" << target.Name()
<< " data_type=" << target.GetDataType();
std::unique_ptr<OpDescBind> fill_one_op(
new OpDescBind("fill_constant", {}, {{"Out", {fill_one_op_out}}},
{{"shape", target_shape},
{{"shape", std::vector<int>{1}},
{"value", static_cast<float>(1.0)},
{"data_type", target.GetDataType()}}));
// infer var type of fill_one_op
......
......@@ -508,6 +508,7 @@ TEST(Backward, simple_single_op) {
op->SetOutput("Out", {"out"});
auto target = f::VarDescBind("out");
target.SetShape({1});
auto var_to_grad = AppendBackward(program, target, {});
ASSERT_EQ(block->AllOps().size(), 3UL);
......@@ -544,6 +545,7 @@ TEST(Backward, default_attribute) {
op->CheckAttrs();
auto target = f::VarDescBind("out");
target.SetShape({1});
AppendBackward(program, target, {});
ASSERT_EQ(block->AllOps().size(), 3UL);
......@@ -581,6 +583,7 @@ TEST(Backward, simple_mult_op) {
op3->SetOutput("Out", {"out3"});
auto target = f::VarDescBind("out3");
target.SetShape({1});
size_t forward_len = block->AllOps().size();
auto var_to_grad = AppendBackward(program, target, {});
......@@ -670,6 +673,7 @@ TEST(Backward, intermedia_var_no_grad) {
op4->SetOutput("Out", {"out4"});
auto target = f::VarDescBind("out4");
target.SetShape({1});
size_t forward_len = block->AllOps().size();
auto var_to_grad = AppendBackward(program, target, {"out3"});
......@@ -730,6 +734,7 @@ TEST(Backward, var_no_grad) {
op2->SetOutput("Z", {"z2"});
auto target = f::VarDescBind("z2");
target.SetShape({1});
size_t forward_len = block->AllOps().size();
auto var_to_grad = AppendBackward(program, target, {"z1"});
......@@ -810,6 +815,7 @@ TEST(Backward, shared_var) {
op3->SetOutput("Out", {"out3"});
auto target = f::VarDescBind("out3");
target.SetShape({1});
size_t forward_len = block->AllOps().size();
auto var_to_grad = AppendBackward(program, target, {});
......@@ -888,6 +894,7 @@ TEST(Backward, half_backward) {
op1->SetOutput("Out", {"out"});
auto target = f::VarDescBind("out");
target.SetShape({1});
size_t forward_len = block->AllOps().size();
auto var_to_grad = AppendBackward(program, target, {"b"});
f::OpDescBind *fill_op = block->AllOps()[forward_len];
......
......@@ -16,14 +16,18 @@ class TestOptimizer(unittest.TestCase):
dtype="float32", shape=[10, 8], lod_level=0, name="mul.y")
mul_out = block.create_var(
dtype="float32", shape=[5, 8], lod_level=0, name="mul.out")
mean_out = block.create_var(
dtype="float32", shape=[1], lod_level=0, name="mean.out")
block.append_op(
type="mul",
inputs={"X": mul_x,
"Y": mul_y},
outputs={"Out": mul_out},
attrs={"x_num_col_dims": 1})
block.append_op(
type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out})
sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.01)
opts = sgd_optimizer.minimize(mul_out, init_program)
opts = sgd_optimizer.minimize(mean_out, init_program)
self.assertEqual(len(opts), 1)
sgd_op = opts[0]
self.assertEqual(sgd_op.type, "sgd")
......@@ -44,12 +48,16 @@ class TestOptimizer(unittest.TestCase):
"Y": mul_y},
outputs={"Out": mul_out},
attrs={"x_num_col_dims": 1})
mean_out = block.create_var(
dtype="float32", shape=[1], lod_level=0, name="mean.out")
block.append_op(
type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out})
global_step = block.create_var(
dtype="float32", shape=[1], lod_level=0, name="step")
learning_rate = 0.01
sgd_optimizer = optimizer.SGDOptimizer(
learning_rate=learning_rate, global_step=global_step)
opts = sgd_optimizer.minimize(mul_out, init_program)
opts = sgd_optimizer.minimize(mean_out, init_program)
self.assertEqual(len(opts), 2)
sgd_op = opts[0]
self.assertEqual(sgd_op.type, "sgd")
......@@ -90,7 +98,11 @@ class TestMomentumOptimizer(unittest.TestCase):
learning_rate = 0.01
momentum_optimizer = self.MockMomentum(
learning_rate=learning_rate, momentum=0.2)
params_grads = append_backward_ops(mul_out)
mean_out = block.create_var(
dtype="float32", shape=[1], lod_level=0, name="mean.out")
block.append_op(
type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out})
params_grads = append_backward_ops(mean_out)
self.assertEqual(len(params_grads), 1)
self.assertEqual(len(momentum_optimizer.get_accumulators()), 0)
opts = momentum_optimizer.create_optimization_pass(
......@@ -132,10 +144,14 @@ class TestMomentumOptimizer(unittest.TestCase):
"Y": mul_y},
outputs={"Out": mul_out},
attrs={"x_num_col_dims": 1})
mean_out = block.create_var(
dtype="float32", shape=[1], lod_level=0, name="mean.out")
block.append_op(
type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out})
learning_rate = 0.01
momentum_optimizer = self.MockMomentum(
learning_rate=learning_rate, momentum=0.2, use_nesterov=True)
params_grads = append_backward_ops(mul_out)
params_grads = append_backward_ops(mean_out)
self.assertEqual(len(params_grads), 1)
self.assertEqual(len(momentum_optimizer.get_accumulators()), 0)
opts = momentum_optimizer.create_optimization_pass(
......@@ -186,10 +202,14 @@ class TestAdagradOptimizer(unittest.TestCase):
"Y": mul_y},
outputs={"Out": mul_out},
attrs={"x_num_col_dims": 1})
mean_out = block.create_var(
dtype="float32", shape=[1], lod_level=0, name="mean.out")
block.append_op(
type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out})
learning_rate = 0.01
adagrad_optimizer = self.MockAdagrad(
learning_rate=learning_rate, epsilon=1.0e-6)
params_grads = append_backward_ops(mul_out)
params_grads = append_backward_ops(mean_out)
self.assertEqual(len(params_grads), 1)
self.assertEqual(len(adagrad_optimizer.get_accumulators()), 0)
opts = adagrad_optimizer.create_optimization_pass(params_grads, mul_out,
......@@ -242,10 +262,14 @@ class TestAdamOptimizer(unittest.TestCase):
"Y": mul_y},
outputs={"Out": mul_out},
attrs={"x_num_col_dims": 1})
mean_out = block.create_var(
dtype="float32", shape=[1], lod_level=0, name="mean.out")
block.append_op(
type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out})
learning_rate = 0.01
adam_optimizer = self.MockAdam(
learning_rate=learning_rate, beta1=0.9, beta2=0.999)
params_grads = append_backward_ops(mul_out)
params_grads = append_backward_ops(mean_out)
self.assertEqual(len(params_grads), 1)
self.assertEqual(len(adam_optimizer.get_accumulators()), 0)
opts = adam_optimizer.create_optimization_pass(params_grads, mul_out,
......@@ -300,10 +324,14 @@ class TestAdamaxOptimizer(unittest.TestCase):
"Y": mul_y},
outputs={"Out": mul_out},
attrs={"x_num_col_dims": 1})
mean_out = block.create_var(
dtype="float32", shape=[1], lod_level=0, name="mean.out")
block.append_op(
type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out})
learning_rate = 0.01
adamax_optimizer = self.MockAdamax(
learning_rate=learning_rate, beta1=0.9, beta2=0.999)
params_grads = append_backward_ops(mul_out)
params_grads = append_backward_ops(mean_out)
self.assertEqual(len(params_grads), 1)
self.assertEqual(len(adamax_optimizer.get_accumulators()), 0)
opts = adamax_optimizer.create_optimization_pass(params_grads, mul_out,
......@@ -355,10 +383,14 @@ class TestDecayedAdagradOptimizer(unittest.TestCase):
"Y": mul_y},
outputs={"Out": mul_out},
attrs={"x_num_col_dims": 1})
mean_out = block.create_var(
dtype="float32", shape=[1], lod_level=0, name="mean.out")
block.append_op(
type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out})
learning_rate = 0.01
decayed_adagrad_optimizer = self.MockDecayedAdagrad(
learning_rate=learning_rate, decay=0.95, epsilon=1.0e-6)
params_grads = append_backward_ops(mul_out)
params_grads = append_backward_ops(mean_out)
self.assertEqual(len(params_grads), 1)
self.assertEqual(len(decayed_adagrad_optimizer.get_accumulators()), 0)
opts = decayed_adagrad_optimizer.create_optimization_pass(
......
import unittest
import paddle.v2.fluid.core as core
from paddle.v2.fluid.framework import Program
from paddle.v2.fluid.framework import g_main_program
......@@ -98,21 +97,26 @@ class TestProgram(unittest.TestCase):
"Y": add_y},
outputs={"Out": add_out},
attrs={"x_num_col_dims": 1})
mean_out = block.create_var(
dtype="float32", shape=[1], lod_level=0, name="mean.out")
block.append_op(
type="mean", inputs={"X": add_out}, outputs={"Out": mean_out})
self.assertEqual(mul_op.idx, 0)
self.assertEqual(add_op.idx, 1)
param_to_grad = prog.append_backward(add_out, set())
param_to_grad = prog.append_backward(mean_out, set())
def grad_name(name):
return name + "@GRAD"
for var_name in ("mul.x", "mul.y", "mul.out", "add.y", "add.out"):
for var_name in ("mul.x", "mul.y", "mul.out", "add.y", "add.out",
"mean.out"):
self.assertEqual(param_to_grad[var_name][0], grad_name(var_name))
self.assertEqual(param_to_grad[var_name][1], 0)
expect_ops = [
"mul", "elementwise_add", "fill_constant", "elementwise_add_grad",
"mul_grad"
"mul", "elementwise_add", "mean", "fill_constant", "mean_grad",
"elementwise_add_grad", "mul_grad"
]
actual_ops = []
for op in block.ops:
......
......@@ -29,7 +29,11 @@ class TestL2DecayRegularizer(unittest.TestCase):
"Y": mul_y},
outputs={"Out": mul_out},
attrs={"x_num_col_dims": 1})
params_grads = append_backward_ops(mul_out)
mean_out = block.create_var(
dtype="float32", shape=[1], lod_level=0, name="mean.out")
block.append_op(
type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out})
params_grads = append_backward_ops(mean_out)
self.assertEqual(len(params_grads), 1)
count_ops = len(block.ops)
params_grads = optimizer.append_regularization_ops(params_grads)
......@@ -62,7 +66,11 @@ class TestL1DecayRegularizer(unittest.TestCase):
"Y": mul_y},
outputs={"Out": mul_out},
attrs={"x_num_col_dims": 1})
params_grads = append_backward_ops(mul_out)
mean_out = block.create_var(
dtype="float32", shape=[1], lod_level=0, name="mean.out")
block.append_op(
type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out})
params_grads = append_backward_ops(mean_out)
self.assertEqual(len(params_grads), 1)
count_ops = len(block.ops)
params_grads = optimizer.append_regularization_ops(params_grads)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册