未验证 提交 569f7c47 编写于 作者: Q Qiao Longfei 提交者: GitHub

enforce shape of backward target to be {1} (#5745)

* enforce shape of backward target to be {1}

* fix test_regularizer.py

* rm unused code

* fix backward_test

* fix a type bug

* fix test_program
上级 6cfcf624
...@@ -513,19 +513,14 @@ ParamGradInfoMap AppendBackward( ...@@ -513,19 +513,14 @@ ParamGradInfoMap AppendBackward(
const int root_block_idx = 0; const int root_block_idx = 0;
auto root_block = program_desc.MutableBlock(root_block_idx); auto root_block = program_desc.MutableBlock(root_block_idx);
// insert fill one op for target
// TODO(qiao) add some check to the target.
std::string fill_one_op_out = GradVarName(target.Name()); std::string fill_one_op_out = GradVarName(target.Name());
std::vector<int64_t> target_shape_desc = target.Shape(); bool is_scalar = target.Shape() == std::vector<int64_t>{1};
std::vector<int> target_shape; PADDLE_ENFORCE(is_scalar, "target should be scalar");
std::transform(target_shape_desc.begin(), target_shape_desc.end(),
std::back_inserter(target_shape),
[](int64_t dim) { return static_cast<int>(dim); });
VLOG(3) << "backward from loss=" << target.Name() VLOG(3) << "backward from loss=" << target.Name()
<< " data_type=" << target.GetDataType(); << " data_type=" << target.GetDataType();
std::unique_ptr<OpDescBind> fill_one_op( std::unique_ptr<OpDescBind> fill_one_op(
new OpDescBind("fill_constant", {}, {{"Out", {fill_one_op_out}}}, new OpDescBind("fill_constant", {}, {{"Out", {fill_one_op_out}}},
{{"shape", target_shape}, {{"shape", std::vector<int>{1}},
{"value", static_cast<float>(1.0)}, {"value", static_cast<float>(1.0)},
{"data_type", target.GetDataType()}})); {"data_type", target.GetDataType()}}));
// infer var type of fill_one_op // infer var type of fill_one_op
......
...@@ -508,6 +508,7 @@ TEST(Backward, simple_single_op) { ...@@ -508,6 +508,7 @@ TEST(Backward, simple_single_op) {
op->SetOutput("Out", {"out"}); op->SetOutput("Out", {"out"});
auto target = f::VarDescBind("out"); auto target = f::VarDescBind("out");
target.SetShape({1});
auto var_to_grad = AppendBackward(program, target, {}); auto var_to_grad = AppendBackward(program, target, {});
ASSERT_EQ(block->AllOps().size(), 3UL); ASSERT_EQ(block->AllOps().size(), 3UL);
...@@ -544,6 +545,7 @@ TEST(Backward, default_attribute) { ...@@ -544,6 +545,7 @@ TEST(Backward, default_attribute) {
op->CheckAttrs(); op->CheckAttrs();
auto target = f::VarDescBind("out"); auto target = f::VarDescBind("out");
target.SetShape({1});
AppendBackward(program, target, {}); AppendBackward(program, target, {});
ASSERT_EQ(block->AllOps().size(), 3UL); ASSERT_EQ(block->AllOps().size(), 3UL);
...@@ -581,6 +583,7 @@ TEST(Backward, simple_mult_op) { ...@@ -581,6 +583,7 @@ TEST(Backward, simple_mult_op) {
op3->SetOutput("Out", {"out3"}); op3->SetOutput("Out", {"out3"});
auto target = f::VarDescBind("out3"); auto target = f::VarDescBind("out3");
target.SetShape({1});
size_t forward_len = block->AllOps().size(); size_t forward_len = block->AllOps().size();
auto var_to_grad = AppendBackward(program, target, {}); auto var_to_grad = AppendBackward(program, target, {});
...@@ -670,6 +673,7 @@ TEST(Backward, intermedia_var_no_grad) { ...@@ -670,6 +673,7 @@ TEST(Backward, intermedia_var_no_grad) {
op4->SetOutput("Out", {"out4"}); op4->SetOutput("Out", {"out4"});
auto target = f::VarDescBind("out4"); auto target = f::VarDescBind("out4");
target.SetShape({1});
size_t forward_len = block->AllOps().size(); size_t forward_len = block->AllOps().size();
auto var_to_grad = AppendBackward(program, target, {"out3"}); auto var_to_grad = AppendBackward(program, target, {"out3"});
...@@ -730,6 +734,7 @@ TEST(Backward, var_no_grad) { ...@@ -730,6 +734,7 @@ TEST(Backward, var_no_grad) {
op2->SetOutput("Z", {"z2"}); op2->SetOutput("Z", {"z2"});
auto target = f::VarDescBind("z2"); auto target = f::VarDescBind("z2");
target.SetShape({1});
size_t forward_len = block->AllOps().size(); size_t forward_len = block->AllOps().size();
auto var_to_grad = AppendBackward(program, target, {"z1"}); auto var_to_grad = AppendBackward(program, target, {"z1"});
...@@ -810,6 +815,7 @@ TEST(Backward, shared_var) { ...@@ -810,6 +815,7 @@ TEST(Backward, shared_var) {
op3->SetOutput("Out", {"out3"}); op3->SetOutput("Out", {"out3"});
auto target = f::VarDescBind("out3"); auto target = f::VarDescBind("out3");
target.SetShape({1});
size_t forward_len = block->AllOps().size(); size_t forward_len = block->AllOps().size();
auto var_to_grad = AppendBackward(program, target, {}); auto var_to_grad = AppendBackward(program, target, {});
...@@ -888,6 +894,7 @@ TEST(Backward, half_backward) { ...@@ -888,6 +894,7 @@ TEST(Backward, half_backward) {
op1->SetOutput("Out", {"out"}); op1->SetOutput("Out", {"out"});
auto target = f::VarDescBind("out"); auto target = f::VarDescBind("out");
target.SetShape({1});
size_t forward_len = block->AllOps().size(); size_t forward_len = block->AllOps().size();
auto var_to_grad = AppendBackward(program, target, {"b"}); auto var_to_grad = AppendBackward(program, target, {"b"});
f::OpDescBind *fill_op = block->AllOps()[forward_len]; f::OpDescBind *fill_op = block->AllOps()[forward_len];
......
...@@ -16,14 +16,18 @@ class TestOptimizer(unittest.TestCase): ...@@ -16,14 +16,18 @@ class TestOptimizer(unittest.TestCase):
dtype="float32", shape=[10, 8], lod_level=0, name="mul.y") dtype="float32", shape=[10, 8], lod_level=0, name="mul.y")
mul_out = block.create_var( mul_out = block.create_var(
dtype="float32", shape=[5, 8], lod_level=0, name="mul.out") dtype="float32", shape=[5, 8], lod_level=0, name="mul.out")
mean_out = block.create_var(
dtype="float32", shape=[1], lod_level=0, name="mean.out")
block.append_op( block.append_op(
type="mul", type="mul",
inputs={"X": mul_x, inputs={"X": mul_x,
"Y": mul_y}, "Y": mul_y},
outputs={"Out": mul_out}, outputs={"Out": mul_out},
attrs={"x_num_col_dims": 1}) attrs={"x_num_col_dims": 1})
block.append_op(
type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out})
sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.01) sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.01)
opts = sgd_optimizer.minimize(mul_out, init_program) opts = sgd_optimizer.minimize(mean_out, init_program)
self.assertEqual(len(opts), 1) self.assertEqual(len(opts), 1)
sgd_op = opts[0] sgd_op = opts[0]
self.assertEqual(sgd_op.type, "sgd") self.assertEqual(sgd_op.type, "sgd")
...@@ -44,12 +48,16 @@ class TestOptimizer(unittest.TestCase): ...@@ -44,12 +48,16 @@ class TestOptimizer(unittest.TestCase):
"Y": mul_y}, "Y": mul_y},
outputs={"Out": mul_out}, outputs={"Out": mul_out},
attrs={"x_num_col_dims": 1}) attrs={"x_num_col_dims": 1})
mean_out = block.create_var(
dtype="float32", shape=[1], lod_level=0, name="mean.out")
block.append_op(
type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out})
global_step = block.create_var( global_step = block.create_var(
dtype="float32", shape=[1], lod_level=0, name="step") dtype="float32", shape=[1], lod_level=0, name="step")
learning_rate = 0.01 learning_rate = 0.01
sgd_optimizer = optimizer.SGDOptimizer( sgd_optimizer = optimizer.SGDOptimizer(
learning_rate=learning_rate, global_step=global_step) learning_rate=learning_rate, global_step=global_step)
opts = sgd_optimizer.minimize(mul_out, init_program) opts = sgd_optimizer.minimize(mean_out, init_program)
self.assertEqual(len(opts), 2) self.assertEqual(len(opts), 2)
sgd_op = opts[0] sgd_op = opts[0]
self.assertEqual(sgd_op.type, "sgd") self.assertEqual(sgd_op.type, "sgd")
...@@ -90,7 +98,11 @@ class TestMomentumOptimizer(unittest.TestCase): ...@@ -90,7 +98,11 @@ class TestMomentumOptimizer(unittest.TestCase):
learning_rate = 0.01 learning_rate = 0.01
momentum_optimizer = self.MockMomentum( momentum_optimizer = self.MockMomentum(
learning_rate=learning_rate, momentum=0.2) learning_rate=learning_rate, momentum=0.2)
params_grads = append_backward_ops(mul_out) mean_out = block.create_var(
dtype="float32", shape=[1], lod_level=0, name="mean.out")
block.append_op(
type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out})
params_grads = append_backward_ops(mean_out)
self.assertEqual(len(params_grads), 1) self.assertEqual(len(params_grads), 1)
self.assertEqual(len(momentum_optimizer.get_accumulators()), 0) self.assertEqual(len(momentum_optimizer.get_accumulators()), 0)
opts = momentum_optimizer.create_optimization_pass( opts = momentum_optimizer.create_optimization_pass(
...@@ -132,10 +144,14 @@ class TestMomentumOptimizer(unittest.TestCase): ...@@ -132,10 +144,14 @@ class TestMomentumOptimizer(unittest.TestCase):
"Y": mul_y}, "Y": mul_y},
outputs={"Out": mul_out}, outputs={"Out": mul_out},
attrs={"x_num_col_dims": 1}) attrs={"x_num_col_dims": 1})
mean_out = block.create_var(
dtype="float32", shape=[1], lod_level=0, name="mean.out")
block.append_op(
type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out})
learning_rate = 0.01 learning_rate = 0.01
momentum_optimizer = self.MockMomentum( momentum_optimizer = self.MockMomentum(
learning_rate=learning_rate, momentum=0.2, use_nesterov=True) learning_rate=learning_rate, momentum=0.2, use_nesterov=True)
params_grads = append_backward_ops(mul_out) params_grads = append_backward_ops(mean_out)
self.assertEqual(len(params_grads), 1) self.assertEqual(len(params_grads), 1)
self.assertEqual(len(momentum_optimizer.get_accumulators()), 0) self.assertEqual(len(momentum_optimizer.get_accumulators()), 0)
opts = momentum_optimizer.create_optimization_pass( opts = momentum_optimizer.create_optimization_pass(
...@@ -186,10 +202,14 @@ class TestAdagradOptimizer(unittest.TestCase): ...@@ -186,10 +202,14 @@ class TestAdagradOptimizer(unittest.TestCase):
"Y": mul_y}, "Y": mul_y},
outputs={"Out": mul_out}, outputs={"Out": mul_out},
attrs={"x_num_col_dims": 1}) attrs={"x_num_col_dims": 1})
mean_out = block.create_var(
dtype="float32", shape=[1], lod_level=0, name="mean.out")
block.append_op(
type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out})
learning_rate = 0.01 learning_rate = 0.01
adagrad_optimizer = self.MockAdagrad( adagrad_optimizer = self.MockAdagrad(
learning_rate=learning_rate, epsilon=1.0e-6) learning_rate=learning_rate, epsilon=1.0e-6)
params_grads = append_backward_ops(mul_out) params_grads = append_backward_ops(mean_out)
self.assertEqual(len(params_grads), 1) self.assertEqual(len(params_grads), 1)
self.assertEqual(len(adagrad_optimizer.get_accumulators()), 0) self.assertEqual(len(adagrad_optimizer.get_accumulators()), 0)
opts = adagrad_optimizer.create_optimization_pass(params_grads, mul_out, opts = adagrad_optimizer.create_optimization_pass(params_grads, mul_out,
...@@ -242,10 +262,14 @@ class TestAdamOptimizer(unittest.TestCase): ...@@ -242,10 +262,14 @@ class TestAdamOptimizer(unittest.TestCase):
"Y": mul_y}, "Y": mul_y},
outputs={"Out": mul_out}, outputs={"Out": mul_out},
attrs={"x_num_col_dims": 1}) attrs={"x_num_col_dims": 1})
mean_out = block.create_var(
dtype="float32", shape=[1], lod_level=0, name="mean.out")
block.append_op(
type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out})
learning_rate = 0.01 learning_rate = 0.01
adam_optimizer = self.MockAdam( adam_optimizer = self.MockAdam(
learning_rate=learning_rate, beta1=0.9, beta2=0.999) learning_rate=learning_rate, beta1=0.9, beta2=0.999)
params_grads = append_backward_ops(mul_out) params_grads = append_backward_ops(mean_out)
self.assertEqual(len(params_grads), 1) self.assertEqual(len(params_grads), 1)
self.assertEqual(len(adam_optimizer.get_accumulators()), 0) self.assertEqual(len(adam_optimizer.get_accumulators()), 0)
opts = adam_optimizer.create_optimization_pass(params_grads, mul_out, opts = adam_optimizer.create_optimization_pass(params_grads, mul_out,
...@@ -300,10 +324,14 @@ class TestAdamaxOptimizer(unittest.TestCase): ...@@ -300,10 +324,14 @@ class TestAdamaxOptimizer(unittest.TestCase):
"Y": mul_y}, "Y": mul_y},
outputs={"Out": mul_out}, outputs={"Out": mul_out},
attrs={"x_num_col_dims": 1}) attrs={"x_num_col_dims": 1})
mean_out = block.create_var(
dtype="float32", shape=[1], lod_level=0, name="mean.out")
block.append_op(
type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out})
learning_rate = 0.01 learning_rate = 0.01
adamax_optimizer = self.MockAdamax( adamax_optimizer = self.MockAdamax(
learning_rate=learning_rate, beta1=0.9, beta2=0.999) learning_rate=learning_rate, beta1=0.9, beta2=0.999)
params_grads = append_backward_ops(mul_out) params_grads = append_backward_ops(mean_out)
self.assertEqual(len(params_grads), 1) self.assertEqual(len(params_grads), 1)
self.assertEqual(len(adamax_optimizer.get_accumulators()), 0) self.assertEqual(len(adamax_optimizer.get_accumulators()), 0)
opts = adamax_optimizer.create_optimization_pass(params_grads, mul_out, opts = adamax_optimizer.create_optimization_pass(params_grads, mul_out,
...@@ -355,10 +383,14 @@ class TestDecayedAdagradOptimizer(unittest.TestCase): ...@@ -355,10 +383,14 @@ class TestDecayedAdagradOptimizer(unittest.TestCase):
"Y": mul_y}, "Y": mul_y},
outputs={"Out": mul_out}, outputs={"Out": mul_out},
attrs={"x_num_col_dims": 1}) attrs={"x_num_col_dims": 1})
mean_out = block.create_var(
dtype="float32", shape=[1], lod_level=0, name="mean.out")
block.append_op(
type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out})
learning_rate = 0.01 learning_rate = 0.01
decayed_adagrad_optimizer = self.MockDecayedAdagrad( decayed_adagrad_optimizer = self.MockDecayedAdagrad(
learning_rate=learning_rate, decay=0.95, epsilon=1.0e-6) learning_rate=learning_rate, decay=0.95, epsilon=1.0e-6)
params_grads = append_backward_ops(mul_out) params_grads = append_backward_ops(mean_out)
self.assertEqual(len(params_grads), 1) self.assertEqual(len(params_grads), 1)
self.assertEqual(len(decayed_adagrad_optimizer.get_accumulators()), 0) self.assertEqual(len(decayed_adagrad_optimizer.get_accumulators()), 0)
opts = decayed_adagrad_optimizer.create_optimization_pass( opts = decayed_adagrad_optimizer.create_optimization_pass(
......
import unittest import unittest
import paddle.v2.fluid.core as core
from paddle.v2.fluid.framework import Program from paddle.v2.fluid.framework import Program
from paddle.v2.fluid.framework import g_main_program from paddle.v2.fluid.framework import g_main_program
...@@ -98,21 +97,26 @@ class TestProgram(unittest.TestCase): ...@@ -98,21 +97,26 @@ class TestProgram(unittest.TestCase):
"Y": add_y}, "Y": add_y},
outputs={"Out": add_out}, outputs={"Out": add_out},
attrs={"x_num_col_dims": 1}) attrs={"x_num_col_dims": 1})
mean_out = block.create_var(
dtype="float32", shape=[1], lod_level=0, name="mean.out")
block.append_op(
type="mean", inputs={"X": add_out}, outputs={"Out": mean_out})
self.assertEqual(mul_op.idx, 0) self.assertEqual(mul_op.idx, 0)
self.assertEqual(add_op.idx, 1) self.assertEqual(add_op.idx, 1)
param_to_grad = prog.append_backward(add_out, set()) param_to_grad = prog.append_backward(mean_out, set())
def grad_name(name): def grad_name(name):
return name + "@GRAD" return name + "@GRAD"
for var_name in ("mul.x", "mul.y", "mul.out", "add.y", "add.out"): for var_name in ("mul.x", "mul.y", "mul.out", "add.y", "add.out",
"mean.out"):
self.assertEqual(param_to_grad[var_name][0], grad_name(var_name)) self.assertEqual(param_to_grad[var_name][0], grad_name(var_name))
self.assertEqual(param_to_grad[var_name][1], 0) self.assertEqual(param_to_grad[var_name][1], 0)
expect_ops = [ expect_ops = [
"mul", "elementwise_add", "fill_constant", "elementwise_add_grad", "mul", "elementwise_add", "mean", "fill_constant", "mean_grad",
"mul_grad" "elementwise_add_grad", "mul_grad"
] ]
actual_ops = [] actual_ops = []
for op in block.ops: for op in block.ops:
......
...@@ -29,7 +29,11 @@ class TestL2DecayRegularizer(unittest.TestCase): ...@@ -29,7 +29,11 @@ class TestL2DecayRegularizer(unittest.TestCase):
"Y": mul_y}, "Y": mul_y},
outputs={"Out": mul_out}, outputs={"Out": mul_out},
attrs={"x_num_col_dims": 1}) attrs={"x_num_col_dims": 1})
params_grads = append_backward_ops(mul_out) mean_out = block.create_var(
dtype="float32", shape=[1], lod_level=0, name="mean.out")
block.append_op(
type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out})
params_grads = append_backward_ops(mean_out)
self.assertEqual(len(params_grads), 1) self.assertEqual(len(params_grads), 1)
count_ops = len(block.ops) count_ops = len(block.ops)
params_grads = optimizer.append_regularization_ops(params_grads) params_grads = optimizer.append_regularization_ops(params_grads)
...@@ -62,7 +66,11 @@ class TestL1DecayRegularizer(unittest.TestCase): ...@@ -62,7 +66,11 @@ class TestL1DecayRegularizer(unittest.TestCase):
"Y": mul_y}, "Y": mul_y},
outputs={"Out": mul_out}, outputs={"Out": mul_out},
attrs={"x_num_col_dims": 1}) attrs={"x_num_col_dims": 1})
params_grads = append_backward_ops(mul_out) mean_out = block.create_var(
dtype="float32", shape=[1], lod_level=0, name="mean.out")
block.append_op(
type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out})
params_grads = append_backward_ops(mean_out)
self.assertEqual(len(params_grads), 1) self.assertEqual(len(params_grads), 1)
count_ops = len(block.ops) count_ops = len(block.ops)
params_grads = optimizer.append_regularization_ops(params_grads) params_grads = optimizer.append_regularization_ops(params_grads)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册