diff --git a/paddle/fluid/framework/ir/graph.h b/paddle/fluid/framework/ir/graph.h index c9d55fbf525a1a476ac469e8e57462169a7db2da..5736a5c4e232698085936303d1f23760649f8245 100644 --- a/paddle/fluid/framework/ir/graph.h +++ b/paddle/fluid/framework/ir/graph.h @@ -28,6 +28,38 @@ namespace paddle { namespace framework { namespace ir { +/* + * The graph is a Directed Acyclic Single Static Assignment Graph. + * + * In more detail, the following properties must hold: + * + * The graph shouldn't contain cycle. Each node is a black-box to the graph + * so the node itself could be a loop operator. + * + * Each Variable-type node has only one input (thus single static assignment). + * + * The output/input of operator is variable and the output/input of variable + * is operator. + * + * The following data harzards in Program are addressed in the Graph: + * + * Write-After-Read + * a = op1(x) + * x = op2(b) + * A control-dependency connection is created bettwen op1 and op2 such that + * op1->op2, so as to ensure correct order. + * + * Write-After-Write + * x = op1(a) + * x = op2(b) + * A control-dependency connection is created between op1 and op2 such that + * op1->op2, so as to ensure correct order. + * + * Other properties currently hold, but is not enforced yet: + * + * Variable-type node (not control dep) with the same variable name share + * the same underlying VarDesc. + */ class Graph { public: explicit Graph(const ProgramDesc &program); diff --git a/paddle/fluid/framework/ir/graph_test.cc b/paddle/fluid/framework/ir/graph_test.cc index f9e6bdf3625bdced9d1a9195a979b0f46016d8bf..b1b8d1c586c98a327a8e5b4890ced00022155e6b 100644 --- a/paddle/fluid/framework/ir/graph_test.cc +++ b/paddle/fluid/framework/ir/graph_test.cc @@ -36,7 +36,7 @@ class SumOpMaker : public OpProtoAndCheckerMaker { public: void Make() { AddInput("X", "").AsDuplicable(); - AddOutput("Out", ""); + AddOutput("Out", "").AsDuplicable(); AddComment(""); } }; @@ -59,11 +59,27 @@ class SumOpVarTypeInference : public VarTypeInference { block->Var(out_var_name)->SetType(default_var_type); } }; + +class DummyOpMaker : public OpProtoAndCheckerMaker { + public: + void Make() { + AddInput("X", "").AsDuplicable(); + AddOutput("Out", "").AsDuplicable(); + AddComment(""); + } +}; + +class DummyOpVarTypeInference : public VarTypeInference { + public: + void operator()(const OpDesc &op_desc, BlockDesc *block) const override {} +}; } // namespace framework } // namespace paddle REGISTER_OPERATOR(sum, paddle::framework::NOP, paddle::framework::SumOpMaker, paddle::framework::SumOpVarTypeInference); +REGISTER_OPERATOR(dummy, paddle::framework::NOP, paddle::framework::SumOpMaker, + paddle::framework::SumOpVarTypeInference); REGISTER_OPERATOR(sum_without_infer_var_type, paddle::framework::NOP, paddle::framework::SumOpMaker); @@ -110,5 +126,83 @@ TEST(GraphTest, Basic) { } ASSERT_EQ(nodes.size(), 5); } + +TEST(GraphTest, WriteAfterRead) { + // void Test() { + ProgramDesc prog; + auto *op = prog.MutableBlock(0)->AppendOp(); + op->SetType("sum"); + op->SetInput("X", {"a"}); + op->SetOutput("Out", {"b"}); + op->SetAttr("op_role", 1); + + op = prog.MutableBlock(0)->AppendOp(); + op->SetType("dummy"); + op->SetInput("X", {"c"}); + op->SetOutput("Out", {"a"}); + op->SetAttr("op_role", 1); + + prog.MutableBlock(0)->Var("a")->SetType(proto::VarType::LOD_TENSOR); + prog.MutableBlock(0)->Var("b")->SetType(proto::VarType::LOD_TENSOR); + prog.MutableBlock(0)->Var("c")->SetType(proto::VarType::LOD_TENSOR); + + std::unique_ptr g(new ir::Graph(prog)); + ir::Node *control_dep1 = nullptr; + ir::Node *control_dep2 = nullptr; + for (ir::Node *n : g->Nodes()) { + if (n->Name() == "sum") { + ASSERT_EQ(n->outputs[0]->Name(), "b"); + ASSERT_TRUE(ir::IsControlDepVar(*n->outputs[1])); + control_dep1 = n->outputs[1]; + ASSERT_EQ(n->outputs.size(), 2); + } + if (n->Name() == "dummy") { + ASSERT_EQ(n->inputs[0]->Name(), "c"); + ASSERT_TRUE(ir::IsControlDepVar(*n->inputs[1])); + control_dep2 = n->inputs[1]; + ASSERT_EQ(n->inputs.size(), 2); + } + } + ASSERT_EQ(control_dep1, control_dep2); +} + +TEST(GraphTest, WriteAfterWrite) { + // void Test() { + ProgramDesc prog; + auto *op = prog.MutableBlock(0)->AppendOp(); + op->SetType("sum"); + op->SetInput("X", {"a"}); + op->SetOutput("Out", {"b"}); + op->SetAttr("op_role", 1); + + op = prog.MutableBlock(0)->AppendOp(); + op->SetType("dummy"); + op->SetInput("X", {"c"}); + op->SetOutput("Out", {"b"}); + op->SetAttr("op_role", 1); + + prog.MutableBlock(0)->Var("a")->SetType(proto::VarType::LOD_TENSOR); + prog.MutableBlock(0)->Var("b")->SetType(proto::VarType::LOD_TENSOR); + prog.MutableBlock(0)->Var("c")->SetType(proto::VarType::LOD_TENSOR); + + std::unique_ptr g(new ir::Graph(prog)); + ir::Node *control_dep1 = nullptr; + ir::Node *control_dep2 = nullptr; + for (ir::Node *n : g->Nodes()) { + if (n->Name() == "sum") { + ASSERT_EQ(n->outputs[0]->Name(), "b"); + ASSERT_TRUE(ir::IsControlDepVar(*n->outputs[1])); + ASSERT_EQ(n->outputs.size(), 2); + control_dep1 = n->outputs[1]; + } + if (n->Name() == "dummy") { + ASSERT_EQ(n->inputs[0]->Name(), "c"); + ASSERT_TRUE(ir::IsControlDepVar(*n->inputs[1])); + control_dep2 = n->inputs[1]; + ASSERT_EQ(n->inputs.size(), 2); + ASSERT_EQ(control_dep1, control_dep2); + } + } +} } // namespace framework } // namespace paddle diff --git a/paddle/fluid/platform/profiler.cc b/paddle/fluid/platform/profiler.cc index d0286719b9ea1aa671294f519051ac1e269c4e93..652a6ec7a4e2e823b28f39b449570cd375e88e18 100644 --- a/paddle/fluid/platform/profiler.cc +++ b/paddle/fluid/platform/profiler.cc @@ -270,12 +270,13 @@ struct EventItem { double min_time; double max_time; double ave_time; + float ratio; }; // Print results void PrintProfiler(const std::vector>& events_table, const std::string& sorted_domain, const size_t name_width, - const size_t data_width) { + const size_t data_width, double total) { // Output header information std::cout << "\n------------------------->" << " Profiling Report " @@ -300,7 +301,8 @@ void PrintProfiler(const std::vector>& events_table, std::cout << std::setw(name_width) << "Event" << std::setw(data_width) << "Calls" << std::setw(data_width) << "Total" << std::setw(data_width) << "Min." << std::setw(data_width) - << "Max." << std::setw(data_width) << "Ave." << std::endl; + << "Max." << std::setw(data_width) << "Ave." + << std::setw(data_width) << "Ratio." << std::endl; for (size_t i = 0; i < events_table.size(); ++i) { for (size_t j = 0; j < events_table[i].size(); ++j) { const EventItem& event_item = events_table[i][j]; @@ -309,7 +311,9 @@ void PrintProfiler(const std::vector>& events_table, << std::setw(data_width) << event_item.total_time << std::setw(data_width) << event_item.min_time << std::setw(data_width) << event_item.max_time - << std::setw(data_width) << event_item.ave_time << std::endl; + << std::setw(data_width) << event_item.ave_time + << std::setw(data_width) << event_item.total_time / total + << std::endl; } } std::cout << std::endl; @@ -359,6 +363,7 @@ void ParseEvents(const std::vector>& events, std::vector> events_table; size_t max_name_width = 0; + double total = 0.; // the total time for (size_t i = 0; i < events.size(); i++) { std::list pushed_events; std::vector event_items; @@ -379,6 +384,7 @@ void ParseEvents(const std::vector>& events, g_state == ProfilerState::kAll) ? rit->CudaElapsedMs(events[i][j]) : rit->CpuElapsedMs(events[i][j]); + total += event_time; std::string event_name = "thread" + std::to_string(rit->thread_id()) + "::" + rit->name(); @@ -387,7 +393,8 @@ void ParseEvents(const std::vector>& events, if (event_idx.find(event_name) == event_idx.end()) { event_idx[event_name] = event_items.size(); EventItem event_item = {event_name, 1, event_time, - event_time, event_time, event_time}; + event_time, event_time, event_time, + 0.}; event_items.push_back(event_item); } else { int index = event_idx[event_name]; @@ -431,7 +438,7 @@ void ParseEvents(const std::vector>& events, } // Print report - PrintProfiler(events_table, sorted_domain, max_name_width + 4, 12); + PrintProfiler(events_table, sorted_domain, max_name_width + 4, 12, total); } void DisableProfiler(EventSortingKey sorted_key,