diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt index 4d54754cec00dc435000138d4f297af243813fc3..af4d375e314277fa1f0239bf031a39c3d47eace1 100644 --- a/paddle/fluid/framework/CMakeLists.txt +++ b/paddle/fluid/framework/CMakeLists.txt @@ -195,8 +195,7 @@ cc_library(prune SRCS prune.cc DEPS framework_proto) cc_test(prune_test SRCS prune_test.cc DEPS op_info prune recurrent_op device_context) cc_test(var_type_inference_test SRCS var_type_inference_test.cc DEPS op_registry proto_desc) -cc_test(inplace_op_inference_test SRCS inplace_op_inference_test.cc DEPS op_registry proto_desc op_info memory_optimize_helper) - +cc_test(inplace_op_inference_test SRCS inplace_op_inference_test.cc DEPS inplace_op_pass op_registry proto_desc op_info memory_optimize_helper pass_builder) cc_library(selected_rows SRCS selected_rows.cc DEPS tensor) cc_test(selected_rows_test SRCS selected_rows_test.cc DEPS selected_rows) diff --git a/paddle/fluid/framework/details/inplace_op_pass.cc b/paddle/fluid/framework/details/inplace_op_pass.cc index afbda33b0662e7831b7ea0d44dc7ae4ff3694b1c..79150f719e379ca4e2b87d2e7db1b2daeee9aa67 100644 --- a/paddle/fluid/framework/details/inplace_op_pass.cc +++ b/paddle/fluid/framework/details/inplace_op_pass.cc @@ -156,7 +156,6 @@ void InplacePass::ApplyImpl(ir::Graph* graph) const { continue; TryInplaceOpInputOutput(op, graph); } - // graph->ResolveHazard(var_nodes_); } void InplacePass::InplaceModifyDesc(const std::string& var, @@ -168,7 +167,7 @@ void InplacePass::InplaceModifyDesc(const std::string& var, auto* op_desc = op->Op(); op_desc->RenameInput(var, cache_var); op_desc->RenameOutput(var, cache_var); - if (op_desc->Block()->HasVar(var)) op_desc->Block()->RemoveVar(var); + op_desc->Flush(); } } @@ -265,8 +264,6 @@ void InplacePass::WithdrawModify(const NodeSwapQueue& nodes, void InplacePass::TryInplaceOpInputOutput(ir::Node* op, ir::Graph* graph) const { VLOG(4) << "Try to inplace op " << op->Name(); - // PADDLE_ENFORCE(op->Op() != nullptr && op->Op()->Block() != nullptr, - // "op_desc is nullptr"); // some pre-requirments need to meet if the op want to inplaced. PADDLE_ENFORCE(op->Op() != nullptr, "op_desc is nullptr"); @@ -446,19 +443,20 @@ bool GraphView::CheckDeps(ir::Node* var, ir::Node* current_op) const { // check if op2 depends on op1's output bool GraphView::CheckOpDeps(ir::Node* op1, ir::Node* op2) const { - auto print_op = [&](ir::Node* op, const char* name) { - std::ostringstream os; - os << " " << name << " : " << op->Name() << " "; - os << "Input args : "; - for (auto& arg : op->inputs) os << arg->Name() << " "; - os << "Output args : "; - for (auto& arg : op->outputs) os << arg->Name() << " "; - os << "Level : " << op_level_.at(op); - VLOG(4) << os.str(); - }; - print_op(op1, "OP1"); - print_op(op2, "OP2"); - + if (VLOG_IS_ON(4)) { + auto print_op = [&](ir::Node* op, const char* name) { + std::ostringstream os; + os << " " << name << " : " << op->Name() << " "; + os << "Input args : "; + for (auto& arg : op->inputs) os << arg->Name() << " "; + os << "Output args : "; + for (auto& arg : op->outputs) os << arg->Name() << " "; + os << "Level : " << op_level_.at(op); + VLOG(4) << os.str(); + }; + print_op(op1, "OP1"); + print_op(op2, "OP2"); + } if (op1 == op2) return true; if (op_level_.at(op1) >= op_level_.at(op2)) return false; diff --git a/paddle/fluid/framework/details/memory_optimize_helper_test.cc b/paddle/fluid/framework/details/memory_optimize_helper_test.cc index 453943af0f123a08b870f11dacb78a5fbd954a56..3fb02f69b1bb65a74a2e5f69e9de7994b4d012db 100644 --- a/paddle/fluid/framework/details/memory_optimize_helper_test.cc +++ b/paddle/fluid/framework/details/memory_optimize_helper_test.cc @@ -142,16 +142,15 @@ TEST(OrderedSet, FindBestFitNode) { for (auto& node : nodes) { pool.Insert(node.get()); } - // FIXME(liuwei1031) this API has changed, - // disable these tests temporarily - // FindNextBestFitNode - // auto* n = nodes[0].get(); - // auto* cache = pool.FindBestFitNode(n); - // PADDLE_ENFORCE(cache->Name() == "a"); - // cache = pool.FindNextBestFitNode(n, cache); - // PADDLE_ENFORCE(cache->Name() == "c"); - // cache = pool.FindNextBestFitNode(n, cache); - // PADDLE_ENFORCE(cache->Name() == "b"); + + auto* n = nodes[0].get(); + auto* cache = pool.FindBestFitNode(n); + ASSERT_TRUE(cache->Name() == "a" || cache->Name() == "c"); + auto* cache_b = pool.FindNextBestFitNode(n, cache); + ASSERT_TRUE(cache_b->Name() != cache->Name()); + ASSERT_TRUE(cache_b->Name() == "a" || cache_b->Name() == "c"); + cache = pool.FindNextBestFitNode(n, cache_b); + ASSERT_TRUE(cache == nullptr); } } // namespace details diff --git a/paddle/fluid/framework/inplace_op_inference_test.cc b/paddle/fluid/framework/inplace_op_inference_test.cc index c93e562955fb36ddc4363fac862f3942758af35d..a9b3b889229ee46bf66063c8381bdd02c7229cbd 100644 --- a/paddle/fluid/framework/inplace_op_inference_test.cc +++ b/paddle/fluid/framework/inplace_op_inference_test.cc @@ -12,9 +12,14 @@ See the License for the specific language governing permissions and limitations under the License. */ +#include #include +#include #include +#include #include "gtest/gtest.h" +#include "paddle/fluid/framework/details/inplace_op_pass.h" +#include "paddle/fluid/framework/ir/pass_builder.h" #include "paddle/fluid/framework/op_info.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" @@ -165,118 +170,147 @@ REGISTER_OPERATOR(multi_out_grad, f::NOP, f::MultiOutGradInplaceInToOut, namespace paddle { namespace framework { -// TEST(InferInplace, SingleOpInplaceInToOut) { -// ProgramDesc prog; -// auto* op = prog.MutableBlock(0)->AppendOp(); -// op->SetType("single_op"); -// op->SetInput("X", {"test2_a", "test2_b", "test2_c"}); -// op->SetOutput("Out", {"test2_out"}); -// -// prog.MutableBlock(0)->Var("test2_a")->SetType(proto::VarType::LOD_TENSOR); -// prog.MutableBlock(0)->Var("test2_a")->SetShape({32, 64, 128, 128}); -// prog.MutableBlock(0)->Var("test2_b")->SetType(proto::VarType::LOD_TENSOR); -// prog.MutableBlock(0)->Var("test2_c")->SetType(proto::VarType::LOD_TENSOR); -// prog.MutableBlock(0)->Var("test2_out"); -// prog.MutableBlock(0)->Var("test2_out")->SetShape({32, 16, 128, 128}); -// -// auto& infer_inplace = OpInfoMap::Instance().Get(op->Type()).infer_inplace_; -// auto in_to_outs = infer_inplace(*op); -// EXPECT_EQ(in_to_outs.size(), 1ul); -// auto it = in_to_outs.begin(); -// EXPECT_EQ(it->first, "test2_a"); -// EXPECT_EQ(it->second, "test2_out"); -// } -// -// TEST(InferInplace, SingleGradOpInplaceInToOut) { -// ProgramDesc prog; -// auto* op = prog.MutableBlock(0)->AppendOp(); -// op->SetType("single_op_grad"); -// op->SetInput(GradVarName("Out"), {"test2_out"}); -// op->SetOutput(GradVarName("X"), {"test2_a", "test2_b", "test2_c"}); -// -// prog.MutableBlock(0)->Var("test2_a")->SetType(proto::VarType::LOD_TENSOR); -// prog.MutableBlock(0)->Var("test2_a")->SetShape({32, 16, 1024, 1024}); -// prog.MutableBlock(0)->Var("test2_b")->SetType(proto::VarType::LOD_TENSOR); -// prog.MutableBlock(0)->Var("test2_c")->SetType(proto::VarType::LOD_TENSOR); -// prog.MutableBlock(0)->Var("test2_out"); -// prog.MutableBlock(0)->Var("test2_out")->SetShape({32, 16, 1024, 1024}); -// -// auto& infer_inplace = OpInfoMap::Instance().Get(op->Type()).infer_inplace_; -// auto in_to_outs = infer_inplace(*op); -// EXPECT_EQ(in_to_outs.size(), 1ul); -// auto it = in_to_outs.begin(); -// EXPECT_EQ(it->first, "test2_out"); -// EXPECT_EQ(it->second, "test2_a"); -// } -// -// TEST(InferInplace, MultiOutInplaceInToOut) { -// ProgramDesc prog; -// auto* op = prog.MutableBlock(0)->AppendOp(); -// op->SetType("multi_out_op"); -// op->SetInput("X", {"a0", "a1"}); -// op->SetInput("Y", {"b0"}); -// op->SetInput("Z", {"c0", "c1"}); -// op->SetOutput("Out", {"o0"}); -// op->SetOutput("YOut", {"y0"}); -// op->SetOutput("ZOut", {"z0"}); -// -// prog.MutableBlock(0)->Var("a0")->SetType(proto::VarType::LOD_TENSOR); -// prog.MutableBlock(0)->Var("b0")->SetType(proto::VarType::LOD_TENSOR); -// prog.MutableBlock(0)->Var("c0")->SetType(proto::VarType::LOD_TENSOR); -// prog.MutableBlock(0)->Var("c1")->SetType(proto::VarType::LOD_TENSOR); -// prog.MutableBlock(0)->Var("o0"); -// prog.MutableBlock(0)->Var("y0"); -// prog.MutableBlock(0)->Var("z0"); -// prog.MutableBlock(0)->Var("a0")->SetShape({32, 16, 1024, 1024}); -// prog.MutableBlock(0)->Var("b0")->SetShape({32, 16, 1024, 1024}); -// prog.MutableBlock(0)->Var("c0")->SetShape({32, 16, 1024, 1024}); -// prog.MutableBlock(0)->Var("o0")->SetShape({32, 16, 1024, 1024}); -// prog.MutableBlock(0)->Var("y0")->SetShape({32, 16, 1024, 1024}); -// prog.MutableBlock(0)->Var("z0")->SetShape({32, 16, 1024, 1024}); -// -// auto& infer_inplace = OpInfoMap::Instance().Get(op->Type()).infer_inplace_; -// auto in_to_outs = infer_inplace(*op); -// EXPECT_EQ(in_to_outs.size(), 3ul); -// std::unordered_map expects = { -// {"a0", "o0"}, {"b0", "y0"}, {"c0", "z0"}, -// }; -// EXPECT_TRUE(expects == in_to_outs); -// } -// -// TEST(InferInplace, MultiGradInplaceInToOut) { -// ProgramDesc prog; -// auto* op = prog.MutableBlock(0)->AppendOp(); -// op->SetType("multi_out_grad"); -// op->SetInput(GradVarName("Out"), {"o0"}); -// op->SetInput(GradVarName("YOut"), {"y0"}); -// op->SetInput(GradVarName("ZOut"), {"z0"}); -// op->SetOutput(GradVarName("X"), {"a0", "a1"}); -// op->SetOutput(GradVarName("Y"), {"b0"}); -// op->SetOutput(GradVarName("Z"), {"c0", "c1"}); -// -// prog.MutableBlock(0)->Var("a0")->SetType(proto::VarType::LOD_TENSOR); -// prog.MutableBlock(0)->Var("b0")->SetType(proto::VarType::LOD_TENSOR); -// prog.MutableBlock(0)->Var("c0")->SetType(proto::VarType::LOD_TENSOR); -// prog.MutableBlock(0)->Var("c1")->SetType(proto::VarType::LOD_TENSOR); -// prog.MutableBlock(0)->Var("o0"); -// prog.MutableBlock(0)->Var("y0"); -// prog.MutableBlock(0)->Var("z0"); -// prog.MutableBlock(0)->Var("a0")->SetShape({32, 16, 1024, 1024}); -// prog.MutableBlock(0)->Var("b0")->SetShape({32, 16, 1024, 1024}); -// prog.MutableBlock(0)->Var("c0")->SetShape({32, 16, 1024, 1024}); -// prog.MutableBlock(0)->Var("o0")->SetShape({32, 16, 1024, 1024}); -// prog.MutableBlock(0)->Var("y0")->SetShape({32, 16, 1024, 1024}); -// prog.MutableBlock(0)->Var("z0")->SetShape({32, 16, 1024, 1024}); -// -// auto& infer_inplace = OpInfoMap::Instance().Get(op->Type()).infer_inplace_; -// auto in_to_outs = infer_inplace(*op); -// -// EXPECT_EQ(in_to_outs.size(), 3ul); -// std::unordered_map expects = { -// {"o0", "a0"}, {"y0", "b0"}, {"z0", "c0"}, -// }; -// EXPECT_TRUE(expects == in_to_outs); -// } +void FakeSuccData(ProgramDesc* prog) { // NOLINT + prog->MutableBlock(0)->Var("test2_a")->SetType(proto::VarType::LOD_TENSOR); + prog->MutableBlock(0)->Var("test2_a")->SetShape({32, 64, 128, 128}); + prog->MutableBlock(0)->Var("test2_b")->SetType(proto::VarType::LOD_TENSOR); + prog->MutableBlock(0)->Var("test2_c")->SetType(proto::VarType::LOD_TENSOR); + prog->MutableBlock(0)->Var("test2_out"); + prog->MutableBlock(0)->Var("test2_out")->SetShape({64, 32, 128, 128}); +} + +void FakeNoInplaceData(ProgramDesc* prog) { // NOLINT + prog->MutableBlock(0)->Var("test2_a")->SetType(proto::VarType::LOD_TENSOR); + prog->MutableBlock(0)->Var("test2_a")->SetShape({32, 64, 128, 128}); + prog->MutableBlock(0)->Var("test2_b")->SetType(proto::VarType::LOD_TENSOR); + prog->MutableBlock(0)->Var("test2_c")->SetType(proto::VarType::LOD_TENSOR); + prog->MutableBlock(0)->Var("test2_out"); + prog->MutableBlock(0)->Var("test2_out")->SetShape({64, 31, 128, 128}); +} + +ir::Node* GetNodeFromGraph(ir::Graph* g, std::string name) { + ir::Node* op_node = nullptr; + for (auto& item : g->Nodes()) { + if (item->Name() == name) { + op_node = item; + break; + } + } + return op_node; +} + +std::unique_ptr test_SingleOpInplaceInToOut( + std::unique_ptr g) { + std::unique_ptr pass(new details::InplacePass()); + ir::Node* op_node = GetNodeFromGraph(g.get(), "single_op"); + EXPECT_NE(op_node, nullptr); + pass->Apply(g.get()); + return g; +} + +TEST(InferInplace, SingleOpInplaceInToOut) { + ProgramDesc prog; + auto* op = prog.MutableBlock(0)->AppendOp(); + op->SetType("single_op"); + op->SetInput("X", {"test2_a", "test2_b", "test2_c"}); + op->SetOutput("Out", {"test2_out"}); + + FakeSuccData(&prog); + std::unique_ptr g(new ir::Graph(prog)); + g = test_SingleOpInplaceInToOut(std::move(g)); + auto op_node = GetNodeFromGraph(g.get(), "single_op"); + + EXPECT_EQ(op_node->outputs[0]->Name(), "test2_a"); +} + +TEST(InferInplace, SingleOpInplaceInToOutNoInplace) { + ProgramDesc prog; + auto* op = prog.MutableBlock(0)->AppendOp(); + op->SetType("single_op"); + op->SetInput("X", {"test2_a", "test2_b", "test2_c"}); + op->SetOutput("Out", {"test2_out"}); + + FakeNoInplaceData(&prog); + std::unique_ptr g(new ir::Graph(prog)); + g = test_SingleOpInplaceInToOut(std::move(g)); + auto op_node = GetNodeFromGraph(g.get(), "single_op"); + + EXPECT_EQ(op_node->outputs[0]->Name(), "test2_out"); +} + +TEST(InferInplace, MultiOutInplaceInToOut) { + ProgramDesc prog; + auto* op = prog.MutableBlock(0)->AppendOp(); + op->SetType("multi_out_op"); + op->SetInput("X", {"a0", "a1"}); + op->SetInput("Y", {"b0"}); + op->SetInput("Z", {"c0", "c1"}); + op->SetOutput("Out", {"o0"}); + op->SetOutput("YOut", {"y0"}); + op->SetOutput("ZOut", {"z0"}); + + prog.MutableBlock(0)->Var("a0")->SetType(proto::VarType::LOD_TENSOR); + prog.MutableBlock(0)->Var("b0")->SetType(proto::VarType::LOD_TENSOR); + prog.MutableBlock(0)->Var("c0")->SetType(proto::VarType::LOD_TENSOR); + prog.MutableBlock(0)->Var("c1")->SetType(proto::VarType::LOD_TENSOR); + prog.MutableBlock(0)->Var("o0"); + prog.MutableBlock(0)->Var("y0"); + prog.MutableBlock(0)->Var("z0"); + prog.MutableBlock(0)->Var("a0")->SetShape({32, 16, 1024, 1024}); + prog.MutableBlock(0)->Var("b0")->SetShape({32, 16, 1024, 1024}); + prog.MutableBlock(0)->Var("c0")->SetShape({32, 16, 1024, 1024}); + prog.MutableBlock(0)->Var("o0")->SetShape({32, 16, 1024, 1024}); + prog.MutableBlock(0)->Var("y0")->SetShape({32, 16, 1024, 1024}); + prog.MutableBlock(0)->Var("z0")->SetShape({32, 16, 1024, 1024}); + + std::unique_ptr g(new ir::Graph(prog)); + std::unique_ptr pass(new details::InplacePass()); + pass->Apply(g.get()); + auto op_node = GetNodeFromGraph(g.get(), "multi_out_op"); + ASSERT_TRUE(op_node != nullptr); + EXPECT_EQ(op_node->outputs[0]->Name(), "a0"); + EXPECT_EQ(op_node->outputs[1]->Name(), "b0"); + EXPECT_EQ(op_node->outputs[2]->Name(), "c0"); +} + +TEST(InferInplace, MultiGradInplaceInToOut) { + ProgramDesc prog; + auto* op = prog.MutableBlock(0)->AppendOp(); + op->SetType("multi_out_grad"); + op->SetInput(GradVarName("Out"), {"o0"}); + op->SetInput(GradVarName("YOut"), {"y0"}); + op->SetInput(GradVarName("ZOut"), {"z0"}); + op->SetOutput(GradVarName("X"), {"a0", "a1"}); + op->SetOutput(GradVarName("Y"), {"b0"}); + op->SetOutput(GradVarName("Z"), {"c0", "c1"}); + + prog.MutableBlock(0)->Var("a0")->SetType(proto::VarType::LOD_TENSOR); + prog.MutableBlock(0)->Var("b0")->SetType(proto::VarType::LOD_TENSOR); + prog.MutableBlock(0)->Var("c0")->SetType(proto::VarType::LOD_TENSOR); + prog.MutableBlock(0)->Var("c1")->SetType(proto::VarType::LOD_TENSOR); + prog.MutableBlock(0)->Var("o0"); + prog.MutableBlock(0)->Var("y0"); + prog.MutableBlock(0)->Var("z0"); + prog.MutableBlock(0)->Var("a0")->SetShape({32, 16, 1024, 1024}); + prog.MutableBlock(0)->Var("b0")->SetShape({32, 16, 1024, 1024}); + prog.MutableBlock(0)->Var("c0")->SetShape({32, 16, 1024, 1024}); + prog.MutableBlock(0)->Var("o0")->SetShape({32, 16, 1024, 1024}); + prog.MutableBlock(0)->Var("y0")->SetShape({32, 16, 1024, 1024}); + prog.MutableBlock(0)->Var("z0")->SetShape({32, 15, 1024, 1024}); + + std::unique_ptr g(new ir::Graph(prog)); + std::unique_ptr pass(new details::InplacePass()); + pass->Apply(g.get()); + auto op_node = GetNodeFromGraph(g.get(), "multi_out_grad"); + ASSERT_TRUE(op_node != nullptr); + EXPECT_EQ(op_node->outputs[0]->Name(), "o0"); + EXPECT_EQ(op_node->outputs[2]->Name(), "y0"); + EXPECT_EQ(op_node->outputs[3]->Name(), "c0"); + + std::unordered_map expects = { + {"o0", "a0"}, {"y0", "b0"}, {"z0", "c0"}, + }; +} } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc index b0ac73f9f52076a9303417bc1b19208ba6e6f2ec..e6628da9f360ea45e31d6b905065109f9664a17f 100644 --- a/paddle/fluid/framework/operator.cc +++ b/paddle/fluid/framework/operator.cc @@ -56,8 +56,8 @@ proto::VarType::Type GetDataTypeOfVar(const Variable* var) { } } -static DDim GetDims(const Scope& scope, const std::string& name, - bool get_actual_dim = false) { +static DDim GetDimsDebug(const Scope& scope, const std::string& name, + bool get_actual_dim = false) { Variable* var = scope.FindVar(name); if (var == nullptr) { return DDim({-1}); @@ -65,9 +65,9 @@ static DDim GetDims(const Scope& scope, const std::string& name, if (var->IsType()) { const LoDTensor& tensor = var->Get(); - // if (UNLIKELY(!tensor.IsInitialized())) { - // return DDim({-1}); - // } + if (UNLIKELY(!tensor.IsInitialized())) { + return DDim({-1}); + } return tensor.dims(); } else if (var->IsType()) { if (get_actual_dim) { @@ -123,7 +123,7 @@ static int GetRowSize(const Scope& scope, const std::string& name) { return -1; } -static LoD GetLoD(const Scope& scope, const std::string& name) { +static LoD GetLoDDebug(const Scope& scope, const std::string& name) { Variable* var = scope.FindVar(name); auto default_lod = LoD({{}}); @@ -133,9 +133,9 @@ static LoD GetLoD(const Scope& scope, const std::string& name) { if (var->IsType()) { const LoDTensor& tensor = var->Get(); - // if (UNLIKELY(!tensor.IsInitialized())) { - // return default_lod; - // } + if (UNLIKELY(!tensor.IsInitialized())) { + return default_lod; + } return tensor.lod(); } else { return default_lod; @@ -274,8 +274,8 @@ std::string OperatorBase::DebugStringEx(const Scope* scope) const { } std::string dtype = GetDtype(*scope, var_name); ss << ":" << dtype; - ss << "[" << GetDims(*scope, var_name, true) << "]"; - ss << "(" << GetLoD(*scope, var_name) << ")"; + ss << "[" << GetDimsDebug(*scope, var_name, true) << "]"; + ss << "(" << GetLoDDebug(*scope, var_name) << ")"; } } if (i != input.second.size() - 1) { @@ -305,8 +305,8 @@ std::string OperatorBase::DebugStringEx(const Scope* scope) const { } std::string dtype = GetDtype(*scope, output.second[i]); ss << ":" << dtype; - ss << "[" << GetDims(*scope, var_name, true) << "]"; - ss << "(" << GetLoD(*scope, var_name) << ")"; + ss << "[" << GetDimsDebug(*scope, var_name, true) << "]"; + ss << "(" << GetLoDDebug(*scope, var_name) << ")"; } } if (i != output.second.size() - 1) {