/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/framework/backward.h" #include #include "paddle/framework/block_desc.h" #include "paddle/framework/op_desc.h" #include "paddle/framework/op_registry.h" #include "paddle/framework/var_desc.h" #include "paddle/operators/net_op.h" USE_NO_KERNEL_OP(fill_constant); namespace paddle { namespace framework { using DeviceContext = platform::DeviceContext; class NoneOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; protected: void InferShape(framework::InferShapeContext *ctx) const override {} }; template class NoneKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &context) const override {} }; class RowWiseAddOpMaker : public OpProtoAndCheckerMaker { public: RowWiseAddOpMaker(OpProto *proto, OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "Input X of Add"); AddInput("b", "Bias of Add"); AddOutput("Out", "Out of Add"); AddComment("Add Op"); } }; class RowWiseAddGradMaker : public SingleGradOpDescMaker { public: using SingleGradOpDescMaker::SingleGradOpDescMaker; protected: std::unique_ptr Apply() const override { auto grad_op = new OpDesc(); grad_op->SetInput(GradVarName("Out"), OutputGrad("Out")); grad_op->SetOutput(GradVarName("X"), InputGrad("X")); grad_op->SetOutput(GradVarName("b"), InputGrad("b")); grad_op->SetType("rowwise_add_grad"); return std::unique_ptr(grad_op); } }; class MulOpMaker : public OpProtoAndCheckerMaker { public: MulOpMaker(OpProto *proto, OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "A"); AddInput("Y", "B"); AddOutput("Out", "Out"); AddAttr("x_num_col_dims", "").SetDefault(1).EqualGreaterThan(1); AddAttr("y_num_col_dims", "").SetDefault(1).EqualGreaterThan(1); AddComment("Mul"); } }; class SigmoidOpMaker : public OpProtoAndCheckerMaker { public: SigmoidOpMaker(OpProto *proto, OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "X"); AddOutput("Out", "Y"); AddComment("Sigmoid"); } }; class NoGradOpMaker : public OpProtoAndCheckerMaker { public: NoGradOpMaker(OpProto *proto, OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "X input"); AddOutput("Out", "Y output"); AddComment("NoGradOp, same input output. no Grad"); } }; class FcOp : public operators::NetOp { public: FcOp(const std::string &type, const VariableNameMap &inputs, const VariableNameMap &outputs, const AttributeMap &attrs) : NetOp(type, inputs, outputs, attrs) { AppendOp(OpRegistry::CreateOp( "mul", {{"X", {Input("X")}}, {"Y", {Input("W")}}}, {{"Out", {Output("mul_result")}}}, AttributeMap{})); auto input_b = Inputs("b"); std::string before_act = "mul_result"; if (input_b.size() != 0) { AppendOp(OpRegistry::CreateOp( "rowwise_add", {{"X", {Output("mul_result")}}, {"b", {input_b[0]}}}, {{"Out", {Output("add_result")}}}, AttributeMap{})); before_act = "add_result"; } else { auto out_varname = Output("add_result"); if (out_varname != kEmptyVarName) { this->Rename(out_varname, kEmptyVarName); } } AppendOp(OpRegistry::CreateOp("sigmoid", {{"X", {Output(before_act)}}}, {{"Out", {Output("Out")}}}, AttributeMap{})); CompleteAddOp(false); } }; class FcOpMaker : public OpProtoAndCheckerMaker { public: FcOpMaker(OpProto *proto, OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "x"); AddInput("W", "w"); AddInput("b", "b"); AddOutput("mul_result", "").AsIntermediate(); AddOutput("add_result", "").AsIntermediate(); AddOutput("Out", ""); AddComment(""); } }; class ManyOutputOpMaker : public OpProtoAndCheckerMaker { public: ManyOutputOpMaker(OpProto *proto, OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("x", "x"); AddOutput("y", "y"); AddOutput("z", "z"); AddComment(""); } }; class FillZeroOpMaker : public OpProtoAndCheckerMaker { public: FillZeroOpMaker(OpProto *proto, OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "x"); AddOutput("Y", "out"); AddComment(""); } }; class SumOpMaker : public framework::OpProtoAndCheckerMaker { public: SumOpMaker(OpProto *proto, OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "the input tensors of sum operator.").AsDuplicable(); AddOutput("Out", "the output tensor of sum operator."); AddComment(""); } }; class MultInOutOpMaker : public OpProtoAndCheckerMaker { public: MultInOutOpMaker(OpProto *proto, OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", "x"); AddInput("H", "h"); AddOutput("Y", "y"); AddOutput("Z", "z"); AddComment(""); } }; class MinusGradOpDescMaker : public GradOpDescMakerBase { public: using GradOpDescMakerBase::GradOpDescMakerBase; std::vector> operator()() const override { std::vector> retv; auto x_g = InputGrad("X"); if (!x_g.empty()) { auto *op_desc = new OpDesc(); op_desc->SetType("scale"); op_desc->SetInput("X", OutputGrad("Out")); op_desc->SetOutput("Out", x_g); op_desc->SetAttr("scale", 1.0f); retv.emplace_back(op_desc); } auto y_g = InputGrad("Y"); if (!y_g.empty()) { auto *op_desc = new OpDesc(); op_desc->SetType("scale"); op_desc->SetInput("X", OutputGrad("Out")); op_desc->SetOutput("Out", y_g); op_desc->SetAttr("scale", -1.0f); retv.emplace_back(op_desc); } return retv; } }; class MinusOpMaker : public OpProtoAndCheckerMaker { public: MinusOpMaker(OpProto *proto, OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { AddInput("X", ""); AddInput("Y", ""); AddOutput("Out", ""); AddComment("minus for unittest"); } }; } // namespace framework } // namespace paddle namespace f = paddle::framework; namespace ops = paddle::operators; using EnforceNotMet = paddle::platform::EnforceNotMet; // rowwise_add REGISTER_OPERATOR(rowwise_add, f::NoneOp, f::RowWiseAddOpMaker, f::RowWiseAddGradMaker); REGISTER_OP_CPU_KERNEL(rowwise_add, f::NoneKernel); REGISTER_OPERATOR(rowwise_add_grad, f::NoneOp); REGISTER_OP_CPU_KERNEL(rowwise_add_grad, f::NoneKernel); // mul REGISTER_OP(mul, f::NoneOp, f::MulOpMaker, mul_grad, f::NoneOp); REGISTER_OP_CPU_KERNEL(mul, f::NoneKernel); REGISTER_OP_CPU_KERNEL(mul_grad, f::NoneKernel); // sigmoid REGISTER_OP(sigmoid, f::NoneOp, f::SigmoidOpMaker, sigmoid_grad, f::NoneOp); REGISTER_OP_CPU_KERNEL(sigmoid, f::NoneKernel); REGISTER_OP_WITHOUT_GRADIENT(nograd, f::NoneOp, f::NoGradOpMaker); // fill_zeros_like REGISTER_OP_WITHOUT_GRADIENT(fill_zeros_like, f::NoneOp, f::FillZeroOpMaker); REGISTER_OP_CPU_KERNEL(fill_zeros_like, f::NoneKernel); // sum REGISTER_OP(sum, f::NoneOp, f::SumOpMaker, sum_grad, f::NoneOp); REGISTER_OP_CPU_KERNEL(sum, f::NoneKernel); REGISTER_OP_CPU_KERNEL(sum_grad, f::NoneKernel); // fc REGISTER_OP_WITHOUT_GRADIENT(fc, f::FcOp, f::FcOpMaker); // many_output_op REGISTER_OP(many_output_op, f::NoneOp, f::ManyOutputOpMaker, many_output_op_grad, f::NoneOp); // mult_in_out REGISTER_OP(mult_in_out, f::NoneOp, f::MultInOutOpMaker, mult_in_out_grad, f::NoneOp); REGISTER_OP_CPU_KERNEL(mult_in_out, f::NoneKernel); REGISTER_OP_CPU_KERNEL(mult_in_out_grad, f::NoneKernel); // minus REGISTER_OPERATOR(minus, f::NoneOp, f::MinusOpMaker, f::MinusGradOpDescMaker); REGISTER_OP_CPU_KERNEL(minus, f::NoneKernel); // scale REGISTER_OPERATOR(scale, f::NoneOp); REGISTER_OP_CPU_KERNEL(scale, f::NoneKernel); TEST(Backward, simple_op_not_need_grad) { auto fwd = f::OpRegistry::CreateOp("rowwise_add", {{"X", {"x"}}, {"b", {"b"}}}, {{"Out", {"out"}}}, f::AttributeMap{}); ASSERT_NE(fwd, nullptr); auto gop = f::Backward(*fwd, {"x"}); ASSERT_EQ(gop->Output(f::GradVarName("X")), f::kEmptyVarName); auto no_input_gop = f::Backward(*fwd, {"x", "b"}); ASSERT_NE(no_input_gop, nullptr); ASSERT_TRUE(no_input_gop->IsNetOp()); ASSERT_EQ(0UL, static_cast(no_input_gop.get())->ops_.size()); } TEST(Backward, net_fc_backward_normal) { std::shared_ptr fwd = f::OpRegistry::CreateOp("fc", {{"X", {"x"}}, {"W", {"w"}}, {"b", {"b"}}}, {{"mul_result", {"mul_res"}}, {"add_result", {"add_re"}}, {"Out", {"out"}}}, f::AttributeMap{}); ASSERT_NE(fwd, nullptr); std::shared_ptr gop = f::Backward(*fwd, std::unordered_set{}); ASSERT_TRUE(gop->IsNetOp()); auto net = static_cast(gop.get()); ASSERT_NO_THROW(net->DebugString()); ASSERT_EQ(3UL, net->ops_.size()); f::OperatorBase &d_sigmoid = *net->ops_[0]; ASSERT_EQ("sigmoid_grad", d_sigmoid.Type()); f::OperatorBase &d_add = *net->ops_[1]; ASSERT_EQ("rowwise_add_grad", d_add.Type()); f::OperatorBase &d_mul = *net->ops_[2]; ASSERT_EQ("mul_grad", d_mul.Type()); } TEST(Backward, net_fc_backward_not_have_b) { std::shared_ptr fwd = f::OpRegistry::CreateOp("fc", {{"X", {"x"}}, {"W", {"w"}}, {"b", {}}}, {{"mul_result", {"mul_res"}}, {"add_result", {"add_res"}}, {"Out", {"tmp"}}}, f::AttributeMap{}); ASSERT_NE(fwd, nullptr); std::shared_ptr gop = f::Backward(*fwd, std::unordered_set{}); ASSERT_TRUE(gop->IsNetOp()); auto net = static_cast(gop.get()); ASSERT_NO_THROW(net->DebugString()); ASSERT_EQ(2UL, net->ops_.size()); f::OperatorBase &d_sigmoid = *net->ops_[0]; ASSERT_EQ("sigmoid_grad", d_sigmoid.Type()); f::OperatorBase &d_mul = *net->ops_[1]; ASSERT_EQ("mul_grad", d_mul.Type()); } TEST(Backward, net_input_of_network_not_need_grad) { ops::NetOp net; net.AppendOp(f::OpRegistry::CreateOp( "fc", {{"X", {"x"}}, {"W", {"W1"}}, {"b", {"b1"}}}, {{"mul_result", {"mul_tmp_0"}}, {"add_result", {"add_tmp_0"}}, {"Out", {"hidden0"}}}, f::AttributeMap{})); net.AppendOp(f::OpRegistry::CreateOp( "fc", {{"X", {"hidden0"}}, {"W", {"W2"}}, {"b", {"b2"}}}, {{"mul_result", {"mul_tmp_1"}}, {"add_result", {"add_tmp_1"}}, {"Out", {"hidden1"}}}, f::AttributeMap{})); net.CompleteAddOp(); auto bwd = Backward(net, {"x"}); // x@GRAD is not need. ASSERT_TRUE(bwd->IsNetOp()); auto bwd_net = static_cast(bwd.get()); auto output_vars = bwd_net->OutputVars(true); std::unordered_set all_outputs = std::unordered_set(output_vars.begin(), output_vars.end()); all_outputs.erase(f::kEmptyVarName); for (auto &out : {"W1", "b1", "hidden0", "W2", "b2"}) { ASSERT_NE(all_outputs.find(f::GradVarName(out)), all_outputs.end()); } // Not Generated X ASSERT_EQ(all_outputs.find(f::GradVarName("X")), all_outputs.end()); ASSERT_EQ(2UL, bwd_net->ops_.size()); ASSERT_TRUE(bwd_net->ops_[1]->IsNetOp()); auto first_fc_grad = static_cast(bwd_net->ops_[1].get()); ASSERT_EQ(3UL, first_fc_grad->ops_.size()); ASSERT_EQ(f::kEmptyVarName, first_fc_grad->ops_[2]->Output(f::GradVarName("X"))); } TEST(Backward, net_shared_weight) { ops::NetOp net; net.AppendOp(f::OpRegistry::CreateOp("mul", {{"X", {"x"}}, {"Y", {"w"}}}, {{"Out", {"out"}}}, f::AttributeMap{})); net.AppendOp(f::OpRegistry::CreateOp("mul", {{"X", {"out"}}, {"Y", {"w"}}}, {{"Out", {"FinalOut"}}}, f::AttributeMap{})); net.CompleteAddOp(); auto bwd = f::Backward(net, std::unordered_set{}); ASSERT_TRUE(bwd->IsNetOp()); auto bwd_net = static_cast(bwd.get()); ASSERT_EQ(3UL, bwd_net->ops_.size()); ASSERT_EQ("sum", bwd_net->ops_[2]->Type()); } TEST(Backward, op_all_input_are_not_need) { auto fwd = f::OpRegistry::CreateOp("rowwise_add", {{"X", {"x"}}, {"b", {"b"}}}, {{"Out", {"out"}}}, f::AttributeMap{}); auto backward = f::Backward(*fwd, {"x", "b"}); ASSERT_TRUE(backward->IsNetOp()); auto net = static_cast(backward.get()); ASSERT_TRUE(net->ops_.empty()); } TEST(Backward, op_all_output_are_not_need) { auto fwd = f::OpRegistry::CreateOp("rowwise_add", {{"X", {"x"}}, {"b", {"b"}}}, {{"Out", {"out"}}}, f::AttributeMap{}); auto backward = f::Backward(*fwd, {"out"}); ASSERT_TRUE(backward->IsNetOp()); auto net = static_cast(backward.get()); ASSERT_TRUE(net->ops_.empty()); } TEST(Backward, op_part_of_output_are_not_need) { auto fwd = f::OpRegistry::CreateOp("many_output_op", {{"x", {"X"}}}, {{"y", {"Y"}}, {"z", {"Z"}}}, f::AttributeMap{}); auto backward = f::Backward(*fwd, {"Z"}); ASSERT_TRUE(backward->IsNetOp()); auto net = static_cast(backward.get()); ASSERT_EQ(net->ops_.size(), 2UL); auto &fill_zero = *net->ops_[0]; ASSERT_EQ("fill_zeros_like", fill_zero.Type()); ASSERT_EQ(1UL, fill_zero.Inputs("X").size()); ASSERT_EQ("Z", fill_zero.Input("X")); ASSERT_EQ(1UL, fill_zero.Outputs("Out").size()); ASSERT_EQ(std::string("Z") + f::kZeroVarSuffix, fill_zero.Output("Out")); auto &d_many_out = *net->ops_[1]; ASSERT_EQ("many_output_op_grad", d_many_out.Type()); ASSERT_EQ(1UL + 2UL + 2UL, d_many_out.Inputs().size()); // I/O/OG ASSERT_EQ(std::string("Z") + f::kZeroVarSuffix, d_many_out.Input(f::GradVarName("z"))); ASSERT_EQ(f::GradVarName("Y"), d_many_out.Input(f::GradVarName("y"))); ASSERT_EQ(f::GradVarName("X"), d_many_out.Output(f::GradVarName("x"))); } TEST(Backward, op_part_of_input_are_not_need) { auto fwd = f::OpRegistry::CreateOp("mul", {{"X", {"a"}}, {"Y", {"b"}}}, {{"Out", {"out"}}}, f::AttributeMap{}); auto backward = f::Backward(*fwd, {"a"}); auto &grad_mul = *backward; ASSERT_EQ(grad_mul.Type(), "mul_grad"); ASSERT_EQ(grad_mul.Inputs().size(), 2UL + 1UL + 1UL); ASSERT_EQ(grad_mul.Outputs().size(), 2UL); ASSERT_EQ(grad_mul.Output(f::GradVarName("X")), f::kEmptyVarName); ASSERT_EQ(grad_mul.Output(f::GradVarName("Y")), f::GradVarName("b")); ASSERT_EQ(grad_mul.Input(f::GradVarName("Out")), f::GradVarName("out")); ASSERT_EQ(grad_mul.Input("X"), "a"); ASSERT_EQ(grad_mul.Input("Y"), "b"); ASSERT_EQ(grad_mul.Input("Out"), "out"); } TEST(Backward, linear_net_intermediate_variable_has_no_grad) { ops::NetOp net; net.AppendOp(f::OpRegistry::CreateOp( "fc", {{"X", {"x1"}}, {"W", {"w1"}}, {"b", {"b1"}}}, {{"mul_result", {"mul_out1"}}, {"add_result", {"add_out1"}}, {"Out", {"out1"}}}, f::AttributeMap{})); net.AppendOp(f::OpRegistry::CreateOp( "fc", {{"X", {"out1"}}, {"W", {"w2"}}, {"b", {"b2"}}}, {{"mul_result", {"mul_out2"}}, {"add_result", {"tmp_out2"}}, {"Out", {"out2"}}}, f::AttributeMap{})); net.AppendOp(f::OpRegistry::CreateOp( "fc", {{"X", {"out2"}}, {"W", {"w3"}}, {"b", {"b3"}}}, {{"mul_result", {"mul_out3"}}, {"add_result", {"tmp_out3"}}, {"Out", {"out3"}}}, f::AttributeMap{})); net.CompleteAddOp(); auto backward = f::Backward(net, {"mul_out2", "tmp_out2", "out2"}); ASSERT_TRUE(backward->IsNetOp()); auto bwd_net = static_cast(backward.get()); ASSERT_EQ(bwd_net->ops_.size(), 3UL); auto &grad_fc = *bwd_net->ops_[0]; const char *all = paddle::operators::NetOp::kAll; EXPECT_EQ(grad_fc.Inputs(all).size(), 2UL /* external input number */ + 1UL /* external output number*/ + 1UL /* number of gradient of external output*/ + 2UL /* internal variable number*/ ); EXPECT_EQ(grad_fc.Outputs(all).size(), 2UL /* input number of mul*/ + 2UL /* input number of rowwise_add*/ + 1UL /* input number of sigmod */ - 1UL /* out2 is not needed*/); EXPECT_EQ(bwd_net->ops_[1]->Inputs(all).size(), 0UL); EXPECT_EQ(bwd_net->ops_[1]->Outputs(all).size(), 0UL); EXPECT_EQ(bwd_net->ops_[2]->Inputs(all).size(), 0UL); EXPECT_EQ(bwd_net->ops_[2]->Outputs(all).size(), 0UL); } TEST(Backward, simple_single_op) { f::ProgramDesc program; f::BlockDesc *block = program.MutableBlock(0); f::OpDesc *op = block->AppendOp(); op->SetType("rowwise_add"); op->SetInput("X", {"x"}); op->SetInput("b", {"b"}); op->SetOutput("Out", {"out"}); auto target = f::VarDesc("out"); target.SetShape({1}); auto var_to_grad = AppendBackward(program, target, std::unordered_set{}); ASSERT_EQ(block->AllOps().size(), 3UL); f::OpDesc *fill_op = block->AllOps()[1]; EXPECT_EQ(fill_op->Type(), "fill_constant"); f::OpDesc *grad_op = block->AllOps()[2]; EXPECT_EQ(grad_op->Type(), "rowwise_add_grad"); ASSERT_EQ(grad_op->InputNames().size(), 1UL); ASSERT_EQ(grad_op->OutputNames().size(), 2UL); EXPECT_EQ(grad_op->Input(f::GradVarName("Out")), std::vector({f::GradVarName("out")})); EXPECT_EQ(grad_op->Output(f::GradVarName("X")), std::vector({f::GradVarName("x")})); EXPECT_EQ(grad_op->Output(f::GradVarName("b")), std::vector({f::GradVarName("b")})); EXPECT_EQ(var_to_grad.size(), 3UL); EXPECT_EQ(var_to_grad.at("b"), f::GradVarInfo(f::GradVarName("b"), 0, 2)); EXPECT_EQ(var_to_grad.at("x"), f::GradVarInfo(f::GradVarName("x"), 0, 2)); EXPECT_TRUE(block->HasVar(f::GradVarName("b"))); EXPECT_TRUE(block->HasVar(f::GradVarName("x"))); } TEST(Backward, default_attribute) { f::ProgramDesc program; f::BlockDesc *block = program.MutableBlock(0); f::OpDesc *op = block->AppendOp(); op->SetType("mul"); op->SetInput("X", {"x"}); op->SetInput("Y", {"y"}); op->SetOutput("Out", {"out"}); op->CheckAttrs(); auto target = f::VarDesc("out"); target.SetShape({1}); AppendBackward(program, target, std::unordered_set{}); ASSERT_EQ(block->AllOps().size(), 3UL); EXPECT_EQ(boost::get(op->GetAttr("x_num_col_dims")), 1); EXPECT_EQ(boost::get(op->GetAttr("y_num_col_dims")), 1); f::OpDesc *fill_op = block->AllOps()[1]; EXPECT_EQ(fill_op->Type(), "fill_constant"); f::OpDesc *grad_op = block->AllOps()[2]; ASSERT_EQ(grad_op->Type(), "mul_grad"); EXPECT_EQ(boost::get(grad_op->GetAttr("x_num_col_dims")), 1); EXPECT_EQ(boost::get(grad_op->GetAttr("y_num_col_dims")), 1); } TEST(Backward, simple_mult_op) { f::ProgramDesc program; f::BlockDesc *block = program.MutableBlock(0); f::OpDesc *op1 = block->AppendOp(); op1->SetType("rowwise_add"); op1->SetInput("X", {"x1"}); op1->SetInput("b", {"b1"}); op1->SetOutput("Out", {"out1"}); f::OpDesc *op2 = block->AppendOp(); op2->SetType("mul"); op2->SetInput("X", {"out1"}); op2->SetInput("Y", {"y2"}); op2->SetOutput("Out", {"out2"}); f::OpDesc *op3 = block->AppendOp(); op3->SetType("rowwise_add"); op3->SetInput("X", {"out2"}); op3->SetInput("b", {"b3"}); op3->SetOutput("Out", {"out3"}); auto target = f::VarDesc("out3"); target.SetShape({1}); size_t forward_len = block->AllOps().size(); auto var_to_grad = AppendBackward(program, target, std::unordered_set{}); ASSERT_EQ(block->AllOps().size(), 6UL + 1); f::OpDesc *fill_op = block->AllOps()[forward_len]; EXPECT_EQ(fill_op->Type(), "fill_constant"); f::OpDesc *grad_op1 = block->AllOps()[6]; EXPECT_EQ(grad_op1->Type(), "rowwise_add_grad"); ASSERT_EQ(grad_op1->InputNames().size(), 1UL); ASSERT_EQ(grad_op1->OutputNames().size(), 2UL); EXPECT_EQ(grad_op1->Input(f::GradVarName("Out")), std::vector({f::GradVarName("out1")})); EXPECT_EQ(grad_op1->Output(f::GradVarName("X")), std::vector({f::GradVarName("x1")})); EXPECT_EQ(grad_op1->Output(f::GradVarName("b")), std::vector({f::GradVarName("b1")})); f::OpDesc *grad_op2 = block->AllOps()[5]; EXPECT_EQ(grad_op2->Type(), "mul_grad"); ASSERT_EQ(grad_op2->InputNames().size(), 4UL); ASSERT_EQ(grad_op2->OutputNames().size(), 2UL); EXPECT_EQ(grad_op2->Input("X"), std::vector({"out1"})); EXPECT_EQ(grad_op2->Input("Y"), std::vector({"y2"})); EXPECT_EQ(grad_op2->Input("Out"), std::vector({"out2"})); EXPECT_EQ(grad_op2->Input(f::GradVarName("Out")), std::vector({f::GradVarName("out2")})); EXPECT_EQ(grad_op2->Output(f::GradVarName("X")), std::vector({f::GradVarName("out1")})); EXPECT_EQ(grad_op2->Output(f::GradVarName("Y")), std::vector({f::GradVarName("y2")})); f::OpDesc *grad_op3 = block->AllOps()[4]; EXPECT_EQ(grad_op3->Type(), "rowwise_add_grad"); ASSERT_EQ(grad_op3->InputNames().size(), 1UL); ASSERT_EQ(grad_op3->OutputNames().size(), 2UL); EXPECT_EQ(grad_op3->Input(f::GradVarName("Out")), std::vector({f::GradVarName("out3")})); EXPECT_EQ(grad_op3->Output(f::GradVarName("X")), std::vector({f::GradVarName("out2")})); EXPECT_EQ(grad_op3->Output(f::GradVarName("b")), std::vector({f::GradVarName("b3")})); EXPECT_EQ(var_to_grad.size(), 7UL); EXPECT_EQ(var_to_grad.at("x1"), f::GradVarInfo(f::GradVarName("x1"), 0, 6)); EXPECT_EQ(var_to_grad.at("b1"), f::GradVarInfo(f::GradVarName("b1"), 0, 6)); EXPECT_EQ(var_to_grad.at("out1"), f::GradVarInfo(f::GradVarName("out1"), 0, 5)); EXPECT_EQ(var_to_grad.at("y2"), f::GradVarInfo(f::GradVarName("y2"), 0, 5)); EXPECT_EQ(var_to_grad.at("out2"), f::GradVarInfo(f::GradVarName("out2"), 0, 4)); EXPECT_EQ(var_to_grad.at("b3"), f::GradVarInfo(f::GradVarName("b3"), 0, 4)); EXPECT_TRUE(block->HasVar(f::GradVarName("x1"))); EXPECT_TRUE(block->HasVar(f::GradVarName("b1"))); EXPECT_TRUE(block->HasVar(f::GradVarName("out1"))); EXPECT_TRUE(block->HasVar(f::GradVarName("y2"))); EXPECT_TRUE(block->HasVar(f::GradVarName("out2"))); EXPECT_TRUE(block->HasVar(f::GradVarName("b3"))); } TEST(Backward, intermedia_var_no_grad) { f::ProgramDesc program; f::BlockDesc *block = program.MutableBlock(0); f::OpDesc *op1 = block->AppendOp(); op1->SetType("rowwise_add"); op1->SetInput("X", {"x1"}); op1->SetInput("b", {"b1"}); op1->SetOutput("Out", {"out1"}); f::OpDesc *op2 = block->AppendOp(); op2->SetType("mul"); op2->SetInput("X", {"x2"}); op2->SetInput("Y", {"y2"}); op2->SetOutput("Out", {"out2"}); f::OpDesc *op3 = block->AppendOp(); op3->SetType("rowwise_add"); op3->SetInput("X", {"out2"}); op3->SetInput("b", {"b3"}); op3->SetOutput("Out", {"out3"}); f::OpDesc *op4 = block->AppendOp(); op4->SetType("mul"); op4->SetInput("X", {"out1"}); op4->SetInput("Y", {"out3"}); op4->SetOutput("Out", {"out4"}); auto target = f::VarDesc("out4"); target.SetShape({1}); size_t forward_len = block->AllOps().size(); auto var_to_grad = AppendBackward(program, target, {"out3"}); ASSERT_EQ(block->AllOps().size(), 7UL); f::OpDesc *fill_op = block->AllOps()[forward_len]; EXPECT_EQ(fill_op->Type(), "fill_constant"); f::OpDesc *grad_op1 = block->AllOps()[6]; EXPECT_EQ(grad_op1->Type(), "rowwise_add_grad"); ASSERT_EQ(grad_op1->InputNames().size(), 1UL); ASSERT_EQ(grad_op1->OutputNames().size(), 2UL); EXPECT_EQ(grad_op1->Input(f::GradVarName("Out")), std::vector({f::GradVarName("out1")})); EXPECT_EQ(grad_op1->Output(f::GradVarName("X")), std::vector({f::GradVarName("x1")})); EXPECT_EQ(grad_op1->Output(f::GradVarName("b")), std::vector({f::GradVarName("b1")})); f::OpDesc *grad_op4 = block->AllOps()[5]; EXPECT_EQ(grad_op4->Type(), "mul_grad"); ASSERT_EQ(grad_op4->InputNames().size(), 4UL); ASSERT_EQ(grad_op4->OutputNames().size(), 2UL); EXPECT_EQ(grad_op4->Input("X"), std::vector({"out1"})); EXPECT_EQ(grad_op4->Input("Y"), std::vector({"out3"})); EXPECT_EQ(grad_op4->Input("Out"), std::vector({"out4"})); EXPECT_EQ(grad_op4->Input(f::GradVarName("Out")), std::vector({f::GradVarName("out4")})); EXPECT_EQ(grad_op4->Output(f::GradVarName("X")), std::vector({f::GradVarName("out1")})); EXPECT_EQ(grad_op4->Output(f::GradVarName("Y")), std::vector()); EXPECT_EQ(var_to_grad.size(), 4UL); EXPECT_EQ(var_to_grad.at("x1"), f::GradVarInfo(f::GradVarName("x1"), 0, 6)); EXPECT_EQ(var_to_grad.at("b1"), f::GradVarInfo(f::GradVarName("b1"), 0, 6)); EXPECT_EQ(var_to_grad.at("out1"), f::GradVarInfo(f::GradVarName("out1"), 0, 5)); EXPECT_TRUE(block->HasVar(f::GradVarName("x1"))); EXPECT_TRUE(block->HasVar(f::GradVarName("b1"))); EXPECT_TRUE(block->HasVar(f::GradVarName("out1"))); } TEST(Backward, var_no_grad) { f::ProgramDesc program; f::BlockDesc *block = program.MutableBlock(0); f::OpDesc *op1 = block->AppendOp(); op1->SetType("mult_in_out"); op1->SetInput("X", {"x1"}); op1->SetInput("H", {"h1"}); op1->SetOutput("Y", {"y1"}); op1->SetOutput("Z", {"z1"}); f::OpDesc *op2 = block->AppendOp(); op2->SetType("mult_in_out"); op2->SetInput("X", {"y1"}); op2->SetInput("H", {"z1"}); op2->SetOutput("Y", {"y2"}); op2->SetOutput("Z", {"z2"}); auto target = f::VarDesc("z2"); target.SetShape({1}); size_t forward_len = block->AllOps().size(); auto var_to_grad = AppendBackward(program, target, {"z1"}); ASSERT_EQ(block->AllOps().size(), 6UL); f::OpDesc *fill_op = block->AllOps()[forward_len]; EXPECT_EQ(fill_op->Type(), "fill_constant"); f::OpDesc *grad_op2 = block->AllOps()[3]; ASSERT_EQ(grad_op2->Type(), "mult_in_out_grad"); ASSERT_EQ(grad_op2->InputNames().size(), 6UL); ASSERT_EQ(grad_op2->OutputNames().size(), 2UL); EXPECT_EQ(grad_op2->Input("X"), std::vector({"y1"})); EXPECT_EQ(grad_op2->Input("H"), std::vector({"z1"})); EXPECT_EQ(grad_op2->Input("Y"), std::vector({"y2"})); EXPECT_EQ(grad_op2->Input("Z"), std::vector({"z2"})); EXPECT_EQ(grad_op2->Input(f::GradVarName("Y")), std::vector({f::GradVarName("y2")})); EXPECT_EQ(grad_op2->Input(f::GradVarName("Z")), std::vector({f::GradVarName("z2")})); EXPECT_EQ(grad_op2->Output(f::GradVarName("X")), std::vector({f::GradVarName("y1")})); EXPECT_EQ(grad_op2->Output(f::GradVarName("H")), std::vector()); f::OpDesc *fill_zero_op = block->AllOps()[4]; ASSERT_EQ(fill_zero_op->Type(), "fill_zeros_like"); ASSERT_EQ(fill_zero_op->InputNames().size(), 1UL); ASSERT_EQ(fill_zero_op->OutputNames().size(), 1UL); EXPECT_EQ(fill_zero_op->Input("X"), std::vector({"z1"})); EXPECT_EQ(fill_zero_op->Output("Out"), std::vector({std::string("z1") + f::kZeroVarSuffix})); f::OpDesc *grad_op1 = block->AllOps()[5]; ASSERT_EQ(grad_op1->Type(), "mult_in_out_grad"); ASSERT_EQ(grad_op1->InputNames().size(), 6UL); ASSERT_EQ(grad_op1->OutputNames().size(), 2UL); EXPECT_EQ(grad_op1->Input("X"), std::vector({"x1"})); EXPECT_EQ(grad_op1->Input("H"), std::vector({"h1"})); EXPECT_EQ(grad_op1->Input("Y"), std::vector({"y1"})); EXPECT_EQ(grad_op1->Input("Z"), std::vector({"z1"})); EXPECT_EQ(grad_op1->Input(f::GradVarName("Y")), std::vector({f::GradVarName("y1")})); EXPECT_EQ(grad_op1->Input(f::GradVarName("Z")), std::vector({std::string("z1") + f::kZeroVarSuffix})); EXPECT_EQ(grad_op1->Output(f::GradVarName("X")), std::vector({f::GradVarName("x1")})); EXPECT_EQ(grad_op1->Output(f::GradVarName("H")), std::vector({f::GradVarName("h1")})); EXPECT_EQ(var_to_grad.size(), 4UL); EXPECT_EQ(var_to_grad.at("y1"), f::GradVarInfo(f::GradVarName("y1"), 0, 3)); EXPECT_EQ(var_to_grad.at("x1"), f::GradVarInfo(f::GradVarName("x1"), 0, 5)); EXPECT_EQ(var_to_grad.at("h1"), f::GradVarInfo(f::GradVarName("h1"), 0, 5)); EXPECT_TRUE(block->HasVar(f::GradVarName("y1"))); EXPECT_TRUE(block->HasVar(f::GradVarName("x1"))); EXPECT_TRUE(block->HasVar(f::GradVarName("h1"))); } TEST(Backward, shared_var) { f::ProgramDesc program; f::BlockDesc *block = program.MutableBlock(0); f::OpDesc *op1 = block->AppendOp(); op1->SetType("rowwise_add"); op1->SetInput("X", {"x1"}); op1->SetInput("b", {"b1"}); op1->SetOutput("Out", {"out1"}); f::OpDesc *op2 = block->AppendOp(); op2->SetType("mul"); op2->SetInput("X", {"out1"}); op2->SetInput("Y", {"y2"}); op2->SetOutput("Out", {"out2"}); f::OpDesc *op3 = block->AppendOp(); op3->SetType("rowwise_add"); op3->SetInput("X", {"out1"}); op3->SetInput("b", {"b3"}); op3->SetOutput("Out", {"out3"}); auto target = f::VarDesc("out3"); target.SetShape({1}); size_t forward_len = block->AllOps().size(); auto var_to_grad = AppendBackward(program, target, std::unordered_set{}); ASSERT_EQ(block->AllOps().size(), 8UL); f::OpDesc *fill_op = block->AllOps()[forward_len]; EXPECT_EQ(fill_op->Type(), "fill_constant"); f::OpDesc *grad_op3 = block->AllOps()[4]; ASSERT_EQ(grad_op3->Type(), "rowwise_add_grad"); ASSERT_EQ(grad_op3->InputNames().size(), 1UL); ASSERT_EQ(grad_op3->OutputNames().size(), 2UL); EXPECT_EQ(grad_op3->Input(f::GradVarName("Out")), std::vector({f::GradVarName("out3")})); EXPECT_EQ(grad_op3->Output(f::GradVarName("X")), std::vector({f::GradVarName("out1") + "@RENAME@0"})); EXPECT_EQ(grad_op3->Output(f::GradVarName("b")), std::vector({f::GradVarName("b3")})); f::OpDesc *grad_op4 = block->AllOps()[5]; ASSERT_EQ(grad_op4->Type(), "mul_grad"); ASSERT_EQ(grad_op4->InputNames().size(), 4UL); ASSERT_EQ(grad_op4->OutputNames().size(), 2UL); EXPECT_EQ(grad_op4->Input("X"), std::vector({"out1"})); EXPECT_EQ(grad_op4->Input("Y"), std::vector({"y2"})); EXPECT_EQ(grad_op4->Input("Out"), std::vector({"out2"})); EXPECT_EQ(grad_op4->Input(f::GradVarName("Out")), std::vector({f::GradVarName("out2")})); EXPECT_EQ(grad_op4->Output(f::GradVarName("X")), std::vector({f::GradVarName("out1") + "@RENAME@1"})); EXPECT_EQ(grad_op4->Output(f::GradVarName("Y")), std::vector({f::GradVarName("y2")})); f::OpDesc *sum_op = block->AllOps()[6]; ASSERT_EQ(sum_op->Type(), "sum"); ASSERT_EQ(sum_op->InputNames().size(), 1UL); ASSERT_EQ(sum_op->OutputNames().size(), 1UL); EXPECT_EQ(sum_op->Input("X"), std::vector({f::GradVarName("out1") + "@RENAME@0", f::GradVarName("out1") + "@RENAME@1"})); EXPECT_EQ(sum_op->Output("Out"), std::vector({f::GradVarName("out1")})); f::OpDesc *grad_op1 = block->AllOps()[7]; ASSERT_EQ(grad_op1->Type(), "rowwise_add_grad"); ASSERT_EQ(grad_op1->InputNames().size(), 1UL); ASSERT_EQ(grad_op1->OutputNames().size(), 2UL); EXPECT_EQ(grad_op1->Input(f::GradVarName("Out")), std::vector({f::GradVarName("out1")})); EXPECT_EQ(grad_op1->Output(f::GradVarName("X")), std::vector({f::GradVarName("x1")})); EXPECT_EQ(grad_op1->Output(f::GradVarName("b")), std::vector({f::GradVarName("b1")})); EXPECT_EQ(var_to_grad.size(), 6UL); EXPECT_EQ(var_to_grad.at("b3"), f::GradVarInfo(f::GradVarName("b3"), 0, 4)); EXPECT_EQ(var_to_grad.at("y2"), f::GradVarInfo(f::GradVarName("y2"), 0, 5)); EXPECT_EQ(var_to_grad.at("out1"), f::GradVarInfo(f::GradVarName("out1"), 0, 6)); EXPECT_EQ(var_to_grad.at("x1"), f::GradVarInfo(f::GradVarName("x1"), 0, 7)); EXPECT_EQ(var_to_grad.at("b1"), f::GradVarInfo(f::GradVarName("b1"), 0, 7)); EXPECT_TRUE(block->HasVar(f::GradVarName("b3"))); EXPECT_TRUE(block->HasVar(f::GradVarName("y2"))); EXPECT_TRUE(block->HasVar(f::GradVarName("out1"))); EXPECT_TRUE(block->HasVar(f::GradVarName("x1"))); EXPECT_TRUE(block->HasVar(f::GradVarName("b1"))); } TEST(Backward, half_backward) { f::ProgramDesc program; f::BlockDesc *block = program.MutableBlock(0); auto *op1 = block->AppendOp(); op1->SetType("minus"); op1->SetInput("X", {"a"}); op1->SetInput("Y", {"b"}); op1->SetOutput("Out", {"out"}); auto target = f::VarDesc("out"); target.SetShape({1}); size_t forward_len = block->AllOps().size(); auto var_to_grad = AppendBackward(program, target, {"b"}); f::OpDesc *fill_op = block->AllOps()[forward_len]; EXPECT_EQ(fill_op->Type(), "fill_constant"); auto ops = block->AllOps(); ASSERT_EQ(3UL, ops.size()); EXPECT_EQ(var_to_grad.size(), 2UL); EXPECT_EQ(var_to_grad.at("a"), f::GradVarInfo(f::GradVarName("a"), 0, forward_len + 1)); }