diff --git a/paddle/framework/backward.cc b/paddle/framework/backward.cc index 00d9dd238ec5328be28f58f8118daad3a039e08c..b9018ecdba8303fd6b37c87edd99e192aa604228 100644 --- a/paddle/framework/backward.cc +++ b/paddle/framework/backward.cc @@ -513,19 +513,14 @@ ParamGradInfoMap AppendBackward( const int root_block_idx = 0; auto root_block = program_desc.MutableBlock(root_block_idx); - // insert fill one op for target - // TODO(qiao) add some check to the target. std::string fill_one_op_out = GradVarName(target.Name()); - std::vector target_shape_desc = target.Shape(); - std::vector target_shape; - std::transform(target_shape_desc.begin(), target_shape_desc.end(), - std::back_inserter(target_shape), - [](int64_t dim) { return static_cast(dim); }); + bool is_scalar = target.Shape() == std::vector{1}; + PADDLE_ENFORCE(is_scalar, "target should be scalar"); VLOG(3) << "backward from loss=" << target.Name() << " data_type=" << target.GetDataType(); std::unique_ptr fill_one_op( new OpDescBind("fill_constant", {}, {{"Out", {fill_one_op_out}}}, - {{"shape", target_shape}, + {{"shape", std::vector{1}}, {"value", static_cast(1.0)}, {"data_type", target.GetDataType()}})); // infer var type of fill_one_op diff --git a/paddle/framework/backward_test.cc b/paddle/framework/backward_test.cc index d485cdf6109274377ad0057223bdd8401e964aa7..2b858f5ea0874d7bf1a9cf38529f5d0d70cca7f2 100644 --- a/paddle/framework/backward_test.cc +++ b/paddle/framework/backward_test.cc @@ -508,6 +508,7 @@ TEST(Backward, simple_single_op) { op->SetOutput("Out", {"out"}); auto target = f::VarDescBind("out"); + target.SetShape({1}); auto var_to_grad = AppendBackward(program, target, {}); ASSERT_EQ(block->AllOps().size(), 3UL); @@ -544,6 +545,7 @@ TEST(Backward, default_attribute) { op->CheckAttrs(); auto target = f::VarDescBind("out"); + target.SetShape({1}); AppendBackward(program, target, {}); ASSERT_EQ(block->AllOps().size(), 3UL); @@ -581,6 +583,7 @@ TEST(Backward, simple_mult_op) { op3->SetOutput("Out", {"out3"}); auto target = f::VarDescBind("out3"); + target.SetShape({1}); size_t forward_len = block->AllOps().size(); auto var_to_grad = AppendBackward(program, target, {}); @@ -670,6 +673,7 @@ TEST(Backward, intermedia_var_no_grad) { op4->SetOutput("Out", {"out4"}); auto target = f::VarDescBind("out4"); + target.SetShape({1}); size_t forward_len = block->AllOps().size(); auto var_to_grad = AppendBackward(program, target, {"out3"}); @@ -730,6 +734,7 @@ TEST(Backward, var_no_grad) { op2->SetOutput("Z", {"z2"}); auto target = f::VarDescBind("z2"); + target.SetShape({1}); size_t forward_len = block->AllOps().size(); auto var_to_grad = AppendBackward(program, target, {"z1"}); @@ -810,6 +815,7 @@ TEST(Backward, shared_var) { op3->SetOutput("Out", {"out3"}); auto target = f::VarDescBind("out3"); + target.SetShape({1}); size_t forward_len = block->AllOps().size(); auto var_to_grad = AppendBackward(program, target, {}); @@ -888,6 +894,7 @@ TEST(Backward, half_backward) { op1->SetOutput("Out", {"out"}); auto target = f::VarDescBind("out"); + target.SetShape({1}); size_t forward_len = block->AllOps().size(); auto var_to_grad = AppendBackward(program, target, {"b"}); f::OpDescBind *fill_op = block->AllOps()[forward_len]; diff --git a/python/paddle/v2/fluid/tests/test_optimizer.py b/python/paddle/v2/fluid/tests/test_optimizer.py index 7b4237e7fdf5990019ddd85967036ceb598c33df..2459dfd664300d405edb36c4ca906c1769b5e7d2 100644 --- a/python/paddle/v2/fluid/tests/test_optimizer.py +++ b/python/paddle/v2/fluid/tests/test_optimizer.py @@ -16,14 +16,18 @@ class TestOptimizer(unittest.TestCase): dtype="float32", shape=[10, 8], lod_level=0, name="mul.y") mul_out = block.create_var( dtype="float32", shape=[5, 8], lod_level=0, name="mul.out") + mean_out = block.create_var( + dtype="float32", shape=[1], lod_level=0, name="mean.out") block.append_op( type="mul", inputs={"X": mul_x, "Y": mul_y}, outputs={"Out": mul_out}, attrs={"x_num_col_dims": 1}) + block.append_op( + type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out}) sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.01) - opts = sgd_optimizer.minimize(mul_out, init_program) + opts = sgd_optimizer.minimize(mean_out, init_program) self.assertEqual(len(opts), 1) sgd_op = opts[0] self.assertEqual(sgd_op.type, "sgd") @@ -44,12 +48,16 @@ class TestOptimizer(unittest.TestCase): "Y": mul_y}, outputs={"Out": mul_out}, attrs={"x_num_col_dims": 1}) + mean_out = block.create_var( + dtype="float32", shape=[1], lod_level=0, name="mean.out") + block.append_op( + type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out}) global_step = block.create_var( dtype="float32", shape=[1], lod_level=0, name="step") learning_rate = 0.01 sgd_optimizer = optimizer.SGDOptimizer( learning_rate=learning_rate, global_step=global_step) - opts = sgd_optimizer.minimize(mul_out, init_program) + opts = sgd_optimizer.minimize(mean_out, init_program) self.assertEqual(len(opts), 2) sgd_op = opts[0] self.assertEqual(sgd_op.type, "sgd") @@ -90,7 +98,11 @@ class TestMomentumOptimizer(unittest.TestCase): learning_rate = 0.01 momentum_optimizer = self.MockMomentum( learning_rate=learning_rate, momentum=0.2) - params_grads = append_backward_ops(mul_out) + mean_out = block.create_var( + dtype="float32", shape=[1], lod_level=0, name="mean.out") + block.append_op( + type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out}) + params_grads = append_backward_ops(mean_out) self.assertEqual(len(params_grads), 1) self.assertEqual(len(momentum_optimizer.get_accumulators()), 0) opts = momentum_optimizer.create_optimization_pass( @@ -132,10 +144,14 @@ class TestMomentumOptimizer(unittest.TestCase): "Y": mul_y}, outputs={"Out": mul_out}, attrs={"x_num_col_dims": 1}) + mean_out = block.create_var( + dtype="float32", shape=[1], lod_level=0, name="mean.out") + block.append_op( + type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out}) learning_rate = 0.01 momentum_optimizer = self.MockMomentum( learning_rate=learning_rate, momentum=0.2, use_nesterov=True) - params_grads = append_backward_ops(mul_out) + params_grads = append_backward_ops(mean_out) self.assertEqual(len(params_grads), 1) self.assertEqual(len(momentum_optimizer.get_accumulators()), 0) opts = momentum_optimizer.create_optimization_pass( @@ -186,10 +202,14 @@ class TestAdagradOptimizer(unittest.TestCase): "Y": mul_y}, outputs={"Out": mul_out}, attrs={"x_num_col_dims": 1}) + mean_out = block.create_var( + dtype="float32", shape=[1], lod_level=0, name="mean.out") + block.append_op( + type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out}) learning_rate = 0.01 adagrad_optimizer = self.MockAdagrad( learning_rate=learning_rate, epsilon=1.0e-6) - params_grads = append_backward_ops(mul_out) + params_grads = append_backward_ops(mean_out) self.assertEqual(len(params_grads), 1) self.assertEqual(len(adagrad_optimizer.get_accumulators()), 0) opts = adagrad_optimizer.create_optimization_pass(params_grads, mul_out, @@ -242,10 +262,14 @@ class TestAdamOptimizer(unittest.TestCase): "Y": mul_y}, outputs={"Out": mul_out}, attrs={"x_num_col_dims": 1}) + mean_out = block.create_var( + dtype="float32", shape=[1], lod_level=0, name="mean.out") + block.append_op( + type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out}) learning_rate = 0.01 adam_optimizer = self.MockAdam( learning_rate=learning_rate, beta1=0.9, beta2=0.999) - params_grads = append_backward_ops(mul_out) + params_grads = append_backward_ops(mean_out) self.assertEqual(len(params_grads), 1) self.assertEqual(len(adam_optimizer.get_accumulators()), 0) opts = adam_optimizer.create_optimization_pass(params_grads, mul_out, @@ -300,10 +324,14 @@ class TestAdamaxOptimizer(unittest.TestCase): "Y": mul_y}, outputs={"Out": mul_out}, attrs={"x_num_col_dims": 1}) + mean_out = block.create_var( + dtype="float32", shape=[1], lod_level=0, name="mean.out") + block.append_op( + type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out}) learning_rate = 0.01 adamax_optimizer = self.MockAdamax( learning_rate=learning_rate, beta1=0.9, beta2=0.999) - params_grads = append_backward_ops(mul_out) + params_grads = append_backward_ops(mean_out) self.assertEqual(len(params_grads), 1) self.assertEqual(len(adamax_optimizer.get_accumulators()), 0) opts = adamax_optimizer.create_optimization_pass(params_grads, mul_out, @@ -355,10 +383,14 @@ class TestDecayedAdagradOptimizer(unittest.TestCase): "Y": mul_y}, outputs={"Out": mul_out}, attrs={"x_num_col_dims": 1}) + mean_out = block.create_var( + dtype="float32", shape=[1], lod_level=0, name="mean.out") + block.append_op( + type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out}) learning_rate = 0.01 decayed_adagrad_optimizer = self.MockDecayedAdagrad( learning_rate=learning_rate, decay=0.95, epsilon=1.0e-6) - params_grads = append_backward_ops(mul_out) + params_grads = append_backward_ops(mean_out) self.assertEqual(len(params_grads), 1) self.assertEqual(len(decayed_adagrad_optimizer.get_accumulators()), 0) opts = decayed_adagrad_optimizer.create_optimization_pass( diff --git a/python/paddle/v2/fluid/tests/test_program.py b/python/paddle/v2/fluid/tests/test_program.py index ef2daf6916e14c015a39ae0193948e7ff6531449..e9bcefd21569aaa9225c676ea03b5c8e37d00333 100644 --- a/python/paddle/v2/fluid/tests/test_program.py +++ b/python/paddle/v2/fluid/tests/test_program.py @@ -1,6 +1,5 @@ import unittest -import paddle.v2.fluid.core as core from paddle.v2.fluid.framework import Program from paddle.v2.fluid.framework import g_main_program @@ -98,21 +97,26 @@ class TestProgram(unittest.TestCase): "Y": add_y}, outputs={"Out": add_out}, attrs={"x_num_col_dims": 1}) + mean_out = block.create_var( + dtype="float32", shape=[1], lod_level=0, name="mean.out") + block.append_op( + type="mean", inputs={"X": add_out}, outputs={"Out": mean_out}) self.assertEqual(mul_op.idx, 0) self.assertEqual(add_op.idx, 1) - param_to_grad = prog.append_backward(add_out, set()) + param_to_grad = prog.append_backward(mean_out, set()) def grad_name(name): return name + "@GRAD" - for var_name in ("mul.x", "mul.y", "mul.out", "add.y", "add.out"): + for var_name in ("mul.x", "mul.y", "mul.out", "add.y", "add.out", + "mean.out"): self.assertEqual(param_to_grad[var_name][0], grad_name(var_name)) self.assertEqual(param_to_grad[var_name][1], 0) expect_ops = [ - "mul", "elementwise_add", "fill_constant", "elementwise_add_grad", - "mul_grad" + "mul", "elementwise_add", "mean", "fill_constant", "mean_grad", + "elementwise_add_grad", "mul_grad" ] actual_ops = [] for op in block.ops: diff --git a/python/paddle/v2/fluid/tests/test_regularizer.py b/python/paddle/v2/fluid/tests/test_regularizer.py index f5d1eb3b96211bd7c7335dbe116a1d765d7bae50..24baf55e90c98f39bab926e8c85a791eee5ed4a4 100644 --- a/python/paddle/v2/fluid/tests/test_regularizer.py +++ b/python/paddle/v2/fluid/tests/test_regularizer.py @@ -29,7 +29,11 @@ class TestL2DecayRegularizer(unittest.TestCase): "Y": mul_y}, outputs={"Out": mul_out}, attrs={"x_num_col_dims": 1}) - params_grads = append_backward_ops(mul_out) + mean_out = block.create_var( + dtype="float32", shape=[1], lod_level=0, name="mean.out") + block.append_op( + type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out}) + params_grads = append_backward_ops(mean_out) self.assertEqual(len(params_grads), 1) count_ops = len(block.ops) params_grads = optimizer.append_regularization_ops(params_grads) @@ -62,7 +66,11 @@ class TestL1DecayRegularizer(unittest.TestCase): "Y": mul_y}, outputs={"Out": mul_out}, attrs={"x_num_col_dims": 1}) - params_grads = append_backward_ops(mul_out) + mean_out = block.create_var( + dtype="float32", shape=[1], lod_level=0, name="mean.out") + block.append_op( + type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out}) + params_grads = append_backward_ops(mean_out) self.assertEqual(len(params_grads), 1) count_ops = len(block.ops) params_grads = optimizer.append_regularization_ops(params_grads)