diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt index 312df0fd71e32e9aa2c56802784ab9f1fbfd5ee5..c8d9dac21d995d92b9d50436d42e47b63ea55f58 100644 --- a/paddle/framework/CMakeLists.txt +++ b/paddle/framework/CMakeLists.txt @@ -44,7 +44,7 @@ cc_test(backward_test SRCS backward_test.cc DEPS backward recurrent_op device_co cc_library(executor SRCS executor.cc DEPS op_registry device_context scope framework_proto backward) set(EXECUTOR_TEST_OP elementwise_add_op gaussian_random_op feed_op fetch_op - mul_op sum_op squared_l2_distance_op fill_constant_op sgd_op) + mul_op sum_op squared_l2_distance_op fill_constant_op sgd_op mean_op) if(WITH_GPU) nv_test(executor_test SRCS executor_test.cc DEPS executor ${EXECUTOR_TEST_OP}) else() diff --git a/paddle/framework/backward.cc b/paddle/framework/backward.cc index 1e20789a1f1b520e33c99b0f8740fbbcf2e792fa..5ebb0a5880a707c6aa541c573f4b6ea0b4eaea49 100644 --- a/paddle/framework/backward.cc +++ b/paddle/framework/backward.cc @@ -273,6 +273,21 @@ static bool AllGradInSet(const std::vector& names, return true; } +static void CreateGradVarInBlock(BlockDescBind* block_desc, + size_t grad_op_start_index) { + auto ops = block_desc->AllOps(); + for (size_t op_index = grad_op_start_index; op_index < ops.size(); + ++op_index) { + for (const auto& output : ops[op_index]->Outputs()) { + for (const auto& real_output : output.second) { + if (!block_desc->HasVar(real_output)) { + block_desc->Var(real_output); + } + } + } + } +} + std::vector> MakeOpGrad( const std::unique_ptr& op_desc, std::unordered_set* no_grad_vars, @@ -326,15 +341,16 @@ std::vector> MakeBlockBackward( std::unordered_map> dup_out_ops; size_t grad_desc_idx = 0; std::vector> backward_descs; + for (auto it = op_descs.rbegin(); it != op_descs.rend(); ++it) { std::vector> op_grads = MakeOpGrad(*it, no_grad_vars, grad_to_var); if ((*it)->Type() == "recurrent") { PADDLE_ENFORCE_EQ( - op_grads.size(), size_t(1), + op_grads.size(), static_cast(1), "rnn_op's gradient process should contain only one op."); - int step_block_idx = (*it)->GetBlockAttr("stop_block"); + int step_block_idx = (*it)->GetBlockAttr("step_block"); auto backward_block_op_descs = MakeBlockBackward( program_desc, step_block_idx, no_grad_vars, grad_to_var); BlockDescBind* backward_block = program_desc.AppendBlock(*cur_block); @@ -380,10 +396,11 @@ std::vector> MakeBlockBackward( backward_descs.insert(backward_descs.begin() + p.first + 1, std::move(p.second)); } + return backward_descs; } -void AppendBackward(ProgramDescBind& program_desc, +void AppendBackward(ProgramDescBind& program_desc, const VarDescBind& target, const std::unordered_set& no_grad_vars) { std::unordered_set no_grad_var_names; no_grad_var_names.reserve(no_grad_vars.size() + 1); @@ -391,13 +408,34 @@ void AppendBackward(ProgramDescBind& program_desc, for (auto& name : no_grad_vars) { no_grad_var_names.insert(GradVarName(name)); } + const int root_block_idx = 0; + auto root_block = program_desc.Block(root_block_idx); + auto& all_ops = root_block->ops_; + + // insert fill one op for target + std::string fill_one_op_out = GradVarName(target.Name()); + std::unique_ptr fill_one_op( + new OpDescBind("fill_constant", {}, {{"Out", {fill_one_op_out}}}, + {{"shape", std::vector{1}}, + {"value", static_cast(1.0)}, + {"dataType", framework::DataType::FP32}})); + all_ops.push_back(std::move(fill_one_op)); + size_t forward_op_num = all_ops.size(); + size_t forward_block_num = program_desc.Size(); std::unordered_map grad_to_var; auto backward_op_descs = MakeBlockBackward(program_desc, root_block_idx, &no_grad_var_names, &grad_to_var); - auto& forw_op_descs = program_desc.Block(root_block_idx)->ops_; for (auto& ptr : backward_op_descs) { - forw_op_descs.push_back(std::move(ptr)); + all_ops.push_back(std::move(ptr)); + } + root_block->Var(fill_one_op_out); + + // create grad_var for all blocks in this program + CreateGradVarInBlock(root_block, forward_op_num); + for (size_t block_index = forward_block_num; + block_index < program_desc.Size(); ++block_index) { + CreateGradVarInBlock(program_desc.Block(block_index), 0); } } diff --git a/paddle/framework/backward.h b/paddle/framework/backward.h index f1ab8056450c96f0a1b671e1efa46c4c68f9ea15..2c95d18ef7e2d997679bff442bf89d6364eb13ea 100644 --- a/paddle/framework/backward.h +++ b/paddle/framework/backward.h @@ -29,7 +29,7 @@ extern std::unique_ptr Backward( // TODO(jiayi): Add target as parameter and generate backward op // according to target. -void AppendBackward(ProgramDescBind& program_desc, +void AppendBackward(ProgramDescBind& program_desc, const VarDescBind& target, const std::unordered_set& no_grad_vars); } // namespace framework diff --git a/paddle/framework/backward_test.cc b/paddle/framework/backward_test.cc index 9b15331dff380a2c437e1cc42da92d7fd239d31c..d9ecfe0e801cdfab70b9a120a1cf3a0c2eb73a95 100644 --- a/paddle/framework/backward_test.cc +++ b/paddle/framework/backward_test.cc @@ -18,6 +18,7 @@ #include "paddle/framework/block_desc.h" #include "paddle/framework/op_desc.h" #include "paddle/framework/op_registry.h" +#include "paddle/framework/var_desc.h" #include "paddle/operators/net_op.h" namespace paddle { @@ -468,10 +469,14 @@ TEST(Backward, simple_single_op) { op->SetInput("b", {"b"}); op->SetOutput("Out", {"out"}); - AppendBackward(program, {}); + auto target = f::VarDescBind("out"); + AppendBackward(program, target, {}); - ASSERT_EQ(block->AllOps().size(), 2UL); - f::OpDescBind *grad_op = block->AllOps()[1]; + ASSERT_EQ(block->AllOps().size(), 3UL); + f::OpDescBind *fill_op = block->AllOps()[1]; + EXPECT_EQ(fill_op->Type(), "fill_constant"); + + f::OpDescBind *grad_op = block->AllOps()[2]; EXPECT_EQ(grad_op->Type(), "rowwise_add_grad"); ASSERT_EQ(grad_op->InputNames().size(), 1UL); ASSERT_EQ(grad_op->OutputNames().size(), 2UL); @@ -494,13 +499,17 @@ TEST(Backward, default_attribute) { op->SetOutput("Out", {"out"}); op->CheckAttrs(); - AppendBackward(program, {}); + auto target = f::VarDescBind("out"); + AppendBackward(program, target, {}); - ASSERT_EQ(block->AllOps().size(), 2UL); + ASSERT_EQ(block->AllOps().size(), 3UL); EXPECT_EQ(boost::get(op->GetAttr("x_num_col_dims")), 1); EXPECT_EQ(boost::get(op->GetAttr("y_num_col_dims")), 1); - f::OpDescBind *grad_op = block->AllOps()[1]; + f::OpDescBind *fill_op = block->AllOps()[1]; + EXPECT_EQ(fill_op->Type(), "fill_constant"); + + f::OpDescBind *grad_op = block->AllOps()[2]; ASSERT_EQ(grad_op->Type(), "mul_grad"); EXPECT_EQ(boost::get(grad_op->GetAttr("x_num_col_dims")), 1); EXPECT_EQ(boost::get(grad_op->GetAttr("y_num_col_dims")), 1); @@ -528,10 +537,15 @@ TEST(Backward, simple_mult_op) { op3->SetInput("b", {"b3"}); op3->SetOutput("Out", {"out3"}); - AppendBackward(program, {}); + auto target = f::VarDescBind("out3"); + size_t forward_len = block->AllOps().size(); + AppendBackward(program, target, {}); - ASSERT_EQ(block->AllOps().size(), 6UL); - f::OpDescBind *grad_op1 = block->AllOps()[5]; + ASSERT_EQ(block->AllOps().size(), 6UL + 1); + f::OpDescBind *fill_op = block->AllOps()[forward_len]; + EXPECT_EQ(fill_op->Type(), "fill_constant"); + + f::OpDescBind *grad_op1 = block->AllOps()[6]; EXPECT_EQ(grad_op1->Type(), "rowwise_add_grad"); ASSERT_EQ(grad_op1->InputNames().size(), 1UL); ASSERT_EQ(grad_op1->OutputNames().size(), 2UL); @@ -542,7 +556,7 @@ TEST(Backward, simple_mult_op) { EXPECT_EQ(grad_op1->Output(f::GradVarName("b")), std::vector({f::GradVarName("b1")})); - f::OpDescBind *grad_op2 = block->AllOps()[4]; + f::OpDescBind *grad_op2 = block->AllOps()[5]; EXPECT_EQ(grad_op2->Type(), "mul_grad"); ASSERT_EQ(grad_op2->InputNames().size(), 4UL); ASSERT_EQ(grad_op2->OutputNames().size(), 2UL); @@ -556,7 +570,7 @@ TEST(Backward, simple_mult_op) { EXPECT_EQ(grad_op2->Output(f::GradVarName("Y")), std::vector({f::GradVarName("y2")})); - f::OpDescBind *grad_op3 = block->AllOps()[3]; + f::OpDescBind *grad_op3 = block->AllOps()[4]; EXPECT_EQ(grad_op3->Type(), "rowwise_add_grad"); ASSERT_EQ(grad_op3->InputNames().size(), 1UL); ASSERT_EQ(grad_op3->OutputNames().size(), 2UL); @@ -596,10 +610,15 @@ TEST(Backward, intermedia_var_no_grad) { op4->SetInput("Y", {"out3"}); op4->SetOutput("Out", {"out4"}); - AppendBackward(program, {"out3"}); + auto target = f::VarDescBind("out4"); + size_t forward_len = block->AllOps().size(); + AppendBackward(program, target, {"out3"}); - ASSERT_EQ(block->AllOps().size(), 6UL); - f::OpDescBind *grad_op1 = block->AllOps()[5]; + ASSERT_EQ(block->AllOps().size(), 7UL); + f::OpDescBind *fill_op = block->AllOps()[forward_len]; + EXPECT_EQ(fill_op->Type(), "fill_constant"); + + f::OpDescBind *grad_op1 = block->AllOps()[6]; EXPECT_EQ(grad_op1->Type(), "rowwise_add_grad"); ASSERT_EQ(grad_op1->InputNames().size(), 1UL); ASSERT_EQ(grad_op1->OutputNames().size(), 2UL); @@ -610,7 +629,7 @@ TEST(Backward, intermedia_var_no_grad) { EXPECT_EQ(grad_op1->Output(f::GradVarName("b")), std::vector({f::GradVarName("b1")})); - f::OpDescBind *grad_op4 = block->AllOps()[4]; + f::OpDescBind *grad_op4 = block->AllOps()[5]; EXPECT_EQ(grad_op4->Type(), "mul_grad"); ASSERT_EQ(grad_op4->InputNames().size(), 4UL); ASSERT_EQ(grad_op4->OutputNames().size(), 2UL); @@ -642,10 +661,15 @@ TEST(Backward, var_no_grad) { op2->SetOutput("Y", {"y2"}); op2->SetOutput("Z", {"z2"}); - AppendBackward(program, {"z1"}); + auto target = f::VarDescBind("z2"); + size_t forward_len = block->AllOps().size(); + AppendBackward(program, target, {"z1"}); - ASSERT_EQ(block->AllOps().size(), 5UL); - f::OpDescBind *grad_op2 = block->AllOps()[2]; + ASSERT_EQ(block->AllOps().size(), 6UL); + f::OpDescBind *fill_op = block->AllOps()[forward_len]; + EXPECT_EQ(fill_op->Type(), "fill_constant"); + + f::OpDescBind *grad_op2 = block->AllOps()[3]; ASSERT_EQ(grad_op2->Type(), "mult_in_out_grad"); ASSERT_EQ(grad_op2->InputNames().size(), 6UL); ASSERT_EQ(grad_op2->OutputNames().size(), 2UL); @@ -661,7 +685,7 @@ TEST(Backward, var_no_grad) { std::vector({f::GradVarName("y1")})); EXPECT_EQ(grad_op2->Output(f::GradVarName("H")), std::vector()); - f::OpDescBind *fill_zero_op = block->AllOps()[3]; + f::OpDescBind *fill_zero_op = block->AllOps()[4]; ASSERT_EQ(fill_zero_op->Type(), "fill_zeros_like"); ASSERT_EQ(fill_zero_op->InputNames().size(), 1UL); ASSERT_EQ(fill_zero_op->OutputNames().size(), 1UL); @@ -669,7 +693,7 @@ TEST(Backward, var_no_grad) { EXPECT_EQ(fill_zero_op->Output("Y"), std::vector({std::string("z1") + f::kZeroVarSuffix})); - f::OpDescBind *grad_op1 = block->AllOps()[4]; + f::OpDescBind *grad_op1 = block->AllOps()[5]; ASSERT_EQ(grad_op1->Type(), "mult_in_out_grad"); ASSERT_EQ(grad_op1->InputNames().size(), 6UL); ASSERT_EQ(grad_op1->OutputNames().size(), 2UL); @@ -709,10 +733,15 @@ TEST(Backward, shared_var) { op3->SetInput("b", {"b3"}); op3->SetOutput("Out", {"out3"}); - AppendBackward(program, {}); + auto target = f::VarDescBind("out3"); + size_t forward_len = block->AllOps().size(); + AppendBackward(program, target, {}); - ASSERT_EQ(block->AllOps().size(), 7UL); - f::OpDescBind *grad_op3 = block->AllOps()[3]; + ASSERT_EQ(block->AllOps().size(), 8UL); + f::OpDescBind *fill_op = block->AllOps()[forward_len]; + EXPECT_EQ(fill_op->Type(), "fill_constant"); + + f::OpDescBind *grad_op3 = block->AllOps()[4]; ASSERT_EQ(grad_op3->Type(), "rowwise_add_grad"); ASSERT_EQ(grad_op3->InputNames().size(), 1UL); ASSERT_EQ(grad_op3->OutputNames().size(), 2UL); @@ -723,7 +752,7 @@ TEST(Backward, shared_var) { EXPECT_EQ(grad_op3->Output(f::GradVarName("b")), std::vector({f::GradVarName("b3")})); - f::OpDescBind *grad_op4 = block->AllOps()[4]; + f::OpDescBind *grad_op4 = block->AllOps()[5]; ASSERT_EQ(grad_op4->Type(), "mul_grad"); ASSERT_EQ(grad_op4->InputNames().size(), 4UL); ASSERT_EQ(grad_op4->OutputNames().size(), 2UL); @@ -737,7 +766,7 @@ TEST(Backward, shared_var) { EXPECT_EQ(grad_op4->Output(f::GradVarName("Y")), std::vector({f::GradVarName("y2")})); - f::OpDescBind *sum_op = block->AllOps()[5]; + f::OpDescBind *sum_op = block->AllOps()[6]; ASSERT_EQ(sum_op->Type(), "sum"); ASSERT_EQ(sum_op->InputNames().size(), 1UL); ASSERT_EQ(sum_op->OutputNames().size(), 1UL); @@ -747,7 +776,7 @@ TEST(Backward, shared_var) { EXPECT_EQ(sum_op->Output("Out"), std::vector({f::GradVarName("out1")})); - f::OpDescBind *grad_op1 = block->AllOps()[6]; + f::OpDescBind *grad_op1 = block->AllOps()[7]; ASSERT_EQ(grad_op1->Type(), "rowwise_add_grad"); ASSERT_EQ(grad_op1->InputNames().size(), 1UL); ASSERT_EQ(grad_op1->OutputNames().size(), 2UL); @@ -769,7 +798,11 @@ TEST(Backward, half_backward) { op1->SetInput("Y", {"b"}); op1->SetOutput("Out", {"out"}); - AppendBackward(program, {"b"}); + auto target = f::VarDescBind("out"); + size_t forward_len = block->AllOps().size(); + AppendBackward(program, target, {"b"}); + f::OpDescBind *fill_op = block->AllOps()[forward_len]; + EXPECT_EQ(fill_op->Type(), "fill_constant"); auto ops = block->AllOps(); - ASSERT_EQ(2UL, ops.size()); -} \ No newline at end of file + ASSERT_EQ(3UL, ops.size()); +} diff --git a/paddle/framework/block_desc.h b/paddle/framework/block_desc.h index 1dd18a902e97dd4146c4f1fce568635f420a36b2..e1424c4bdec338dc2fe085c464c13018d2324ed6 100644 --- a/paddle/framework/block_desc.h +++ b/paddle/framework/block_desc.h @@ -39,7 +39,7 @@ class BlockDescBind { std::unordered_map *grad_to_var); friend void AppendBackward( - ProgramDescBind &program_desc, + ProgramDescBind &program_desc, const VarDescBind &target, const std::unordered_set &no_grad_vars); BlockDescBind(ProgramDescBind *prog, BlockDesc *desc) diff --git a/paddle/framework/executor_test.cc b/paddle/framework/executor_test.cc index 6a8db8c66aa38d0c9a6d6f6f17d865a5e6ece652..fcd2e47cff57fcc6c177be77d7e14b167a28f4ae 100644 --- a/paddle/framework/executor_test.cc +++ b/paddle/framework/executor_test.cc @@ -34,6 +34,7 @@ USE_OP(mul); USE_OP(sum); USE_OP(squared_l2_distance); USE_OP(fill_constant); +USE_OP(mean); USE_OP(sgd); using namespace paddle::platform; @@ -45,9 +46,16 @@ void AddOp(const std::string& type, const VariableNameMap& inputs, // insert output for (auto kv : outputs) { for (auto v : kv.second) { - auto var = block->Var(v); - var->SetType(VarDesc::LOD_TENSOR); - var->SetDataType(paddle::framework::DataType::FP32); + // <<<<<<< HEAD + // auto var = block->Var(v); + // var->SetType(VarDesc::LOD_TENSOR); + // var->SetDataType(paddle::framework::DataType::FP32); + // ======= + if (!block->HasVar(v)) { + auto var = block->Var(v); + var->SetDataType(paddle::framework::DataType::FP32); + } + // >>>>>>> origin/develop } } @@ -147,12 +155,12 @@ class ExecutorTesterRandom : public ::testing::Test { AddOp("squared_l2_distance", {{"X", {"a"}}, {"Y", {"a_out"}}}, {{"Out", {"l2_distance"}}, {"sub_result", {"l2_distance_sub"}}}, {}, root_block); + AddOp("mean", {{"X", {"l2_distance"}}}, {{"Out", {"mean_out"}}}, {}, + root_block); // backward - AddOp("fill_constant", {}, {{"Out", {"l2_distance@GRAD"}}}, - {{"shape", std::vector{batch_size, 1}}, {"value", float(1.0)}}, - root_block); - AppendBackward(program, {}); + auto target = VarDescBind("mean_out"); + AppendBackward(program, target, {}); // update AddOp("fill_constant", {}, {{"Out", {"learning_rate"}}}, diff --git a/paddle/framework/var_desc.h b/paddle/framework/var_desc.h index 8ffb858eb244dc06c39a9543de3e037bcdf28f68..688a46f83982fc464c7602ec1041ad3f42122211 100644 --- a/paddle/framework/var_desc.h +++ b/paddle/framework/var_desc.h @@ -54,7 +54,10 @@ inline void VectorToRepeated(const std::vector &vec, class VarDescBind { public: - explicit VarDescBind(const std::string &name) { desc_.set_name(name); } + explicit VarDescBind(const std::string &name) { + desc_.set_name(name); + desc_.set_type(VarDesc::LOD_TENSOR); + } VarDesc *Proto() { return &desc_; } diff --git a/paddle/pybind/protobuf.cc b/paddle/pybind/protobuf.cc index b4512dae9090a977c7241cdb7b23a30745c52562..b6327f8500bbbb66575d3bc928b38ab208296a44 100644 --- a/paddle/pybind/protobuf.cc +++ b/paddle/pybind/protobuf.cc @@ -118,9 +118,9 @@ void BindProgramDesc(py::module &m) { .def("append_block", &ProgramDescBind::AppendBlock, py::return_value_policy::reference) .def("append_backward", - [](ProgramDescBind &program_desc, + [](ProgramDescBind &program_desc, const VarDescBind &target, const std::unordered_set &no_grad_vars) { - AppendBackward(program_desc, no_grad_vars); + AppendBackward(program_desc, target, no_grad_vars); }) .def("block", &ProgramDescBind::Block, py::return_value_policy::reference) .def("num_blocks", &ProgramDescBind::Size) diff --git a/python/paddle/v2/framework/framework.py b/python/paddle/v2/framework/framework.py index 0909da5378d3b314d525958dc7c5e5bf4f662c5d..7f5da571ba6d1c6f3570cbf5aecbcfa06d9a6862 100644 --- a/python/paddle/v2/framework/framework.py +++ b/python/paddle/v2/framework/framework.py @@ -176,6 +176,18 @@ class Operator(object): proto = OpProtoHolder.instance().get_op_proto(type) if inputs is not None: + given = set() + need = set() + for n in inputs: + given.add(n) + for m in proto.inputs: + need.add(m.name) + if not given == need: + raise ValueError( + "Incorrect setting for input(s) of operator \"%s\". Need: [%s] Given: [%s]" + % (type, ", ".join(str(e) for e in need), ", ".join( + str(e) for e in given))) + for in_proto in proto.inputs: in_argus = inputs[in_proto.name] if not isinstance(in_argus, list): @@ -190,6 +202,18 @@ class Operator(object): self.desc.set_input(in_proto.name, in_argu_names) if outputs is not None: + given = set() + need = set() + for n in outputs: + given.add(n) + for m in proto.outputs: + need.add(m.name) + if not given == need: + raise ValueError( + "Incorrect setting for output(s) of operator \"%s\". Need: [%s] Given: [%s]" + % (type, ", ".join(str(e) for e in need), ", ".join( + str(e) for e in given))) + for out_proto in proto.outputs: out_argus = outputs[out_proto.name] if not isinstance(out_argus, list): diff --git a/python/paddle/v2/framework/tests/test_program.py b/python/paddle/v2/framework/tests/test_program.py index 64b781e6ea21bff90646d312a157d60852f276df..c5674382a484a91268e0139ba5588b123531210e 100644 --- a/python/paddle/v2/framework/tests/test_program.py +++ b/python/paddle/v2/framework/tests/test_program.py @@ -51,11 +51,14 @@ class TestProgram(unittest.TestCase): sum_op_desc.set_input("Y", ["b1"]) sum_op_desc.set_output("Out", ["out2"]) + target = block.new_var("out2") + expect_ops = [ - "mul", "elementwise_add", "elementwise_add_grad", "mul_grad" + "mul", "elementwise_add", "fill_constant", "elementwise_add_grad", + "mul_grad" ] actual_ops = [] - prog.append_backward(set()) + prog.append_backward(target, set()) for op in block.all_ops(): actual_ops.append(op.type()) print(actual_ops)