diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt index 2c62d4ed6b0e61d4a36b61cda40fa539285ccb14..0668ff43c8192f53ff7e05abaeb575e2b78b1de4 100644 --- a/paddle/fluid/framework/CMakeLists.txt +++ b/paddle/fluid/framework/CMakeLists.txt @@ -107,11 +107,11 @@ cc_library(lod_rank_table SRCS lod_rank_table.cc DEPS lod_tensor) cc_library(feed_fetch_method SRCS feed_fetch_method.cc DEPS lod_tensor scope glog) if(WITH_DISTRIBUTE) - cc_library(executor SRCS executor.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method sendrecvop_grpc cares grpc++_unsecure grpc_unsecure gpr) + cc_library(executor SRCS executor.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method sendrecvop_grpc cares grpc++_unsecure grpc_unsecure gpr graph_to_program_pass) set(DISTRIBUTE_COMPILE_FLAGS "-Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor") set_source_files_properties(executor.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) else() - cc_library(executor SRCS executor.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method) + cc_library(executor SRCS executor.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method graph_to_program_pass) endif() if (NOT WIN32) diff --git a/paddle/fluid/framework/ir/CMakeLists.txt b/paddle/fluid/framework/ir/CMakeLists.txt index da0955a9a000e0d0bff3fe9d0bc3bd25171be3d2..9300573d7fbc7648f7e30ac97dc387e7249da1ff 100644 --- a/paddle/fluid/framework/ir/CMakeLists.txt +++ b/paddle/fluid/framework/ir/CMakeLists.txt @@ -3,6 +3,7 @@ cc_library(graph SRCS graph.cc DEPS node) cc_library(graph_helper SRCS graph_helper.cc DEPS graph) cc_library(pass SRCS pass.cc DEPS graph node graph_helper) cc_library(graph_viz_pass SRCS graph_viz_pass.cc DEPS graph pass graph_helper) +cc_library(graph_to_program_pass SRCS graph_to_program_pass.cc DEPS graph pass graph_helper) cc_library(graph_traits SRCS graph_traits.cc DEPS graph) cc_library(graph_pattern_detecter SRCS graph_pattern_detecter.cc DEPS graph graph_helper graph_traits) cc_library(fc_fuse_pass SRCS fc_fuse_pass.cc DEPS graph graph_pattern_detecter) @@ -12,5 +13,6 @@ cc_library(infer_clean_graph_pass SRCS infer_clean_graph_pass.cc DEPS graph pass cc_test(pass_test SRCS pass_test.cc DEPS graph pass graph_helper) cc_test(graph_test SRCS graph_test.cc DEPS graph graph_helper op_registry) cc_test(graph_helper_test SRCS graph_helper_test.cc DEPS graph graph_helper op_registry) +cc_test(graph_to_program_pass_test SRCS graph_to_program_pass_test.cc DEPS graph_to_program_pass) cc_test(test_graph_pattern_detecter SRCS graph_pattern_detecter_tester.cc DEPS graph_pattern_detecter) cc_test(test_fc_fuse_pass SRCS fc_fuse_pass_tester.cc DEPS fc_fuse_pass graph_pattern_detecter graph pass graph_traits framework_proto) diff --git a/paddle/fluid/framework/ir/graph_to_program_pass.cc b/paddle/fluid/framework/ir/graph_to_program_pass.cc new file mode 100644 index 0000000000000000000000000000000000000000..414d8f79b15de091c62af5fe099ffae144156e4e --- /dev/null +++ b/paddle/fluid/framework/ir/graph_to_program_pass.cc @@ -0,0 +1,65 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/framework/ir/graph_to_program_pass.h" + +#include +#include +#include + +#include "paddle/fluid/framework/ir/graph.h" +#include "paddle/fluid/framework/ir/graph_helper.h" + +#include "paddle/fluid/framework/program_desc.h" + +namespace paddle { +namespace framework { +namespace ir { + +std::unique_ptr GraphToProgramPass::ApplyImpl( + std::unique_ptr graph) const { + ProgramDesc& program = Get("program"); + + std::unique_ptr program_pb( + new proto::ProgramDesc(*program.Proto())); + + auto block = program_pb->mutable_blocks(kRootBlockIndex); + block->clear_vars(); + std::unordered_set visited_vars; + for (ir::Node* n : graph->Nodes()) { + if (n->NodeType() == ir::Node::Type::kVariable) { + if (n->Var() && visited_vars.count(n->Var()->Name()) == 0) { + visited_vars.insert(n->Var()->Name()); + block->add_vars()->MergeFrom(*n->Var()->Proto()); + } + } + } + + block->clear_ops(); + std::vector nodes = TopologySortOperations(*graph); + for (ir::Node* n : nodes) { + if (!n->Op()) { + continue; + } + block->add_ops()->MergeFrom(*n->Op()->Proto()); + } + + program.CopyFrom(*program_pb); + return graph; +} +} // namespace ir +} // namespace framework +} // namespace paddle + +REGISTER_PASS(graph_to_program_pass, paddle::framework::ir::GraphToProgramPass); diff --git a/paddle/fluid/framework/ir/graph_to_program_pass.h b/paddle/fluid/framework/ir/graph_to_program_pass.h new file mode 100644 index 0000000000000000000000000000000000000000..124ec5a8e771fb768b31fa2e9f5143db96154490 --- /dev/null +++ b/paddle/fluid/framework/ir/graph_to_program_pass.h @@ -0,0 +1,30 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "paddle/fluid/framework/ir/pass.h" + +namespace paddle { +namespace framework { +namespace ir { + +class GraphToProgramPass : public Pass { + protected: + std::unique_ptr ApplyImpl(std::unique_ptr graph) const override; +}; + +} // namespace ir +} // namespace framework +} // namespace paddle diff --git a/paddle/fluid/framework/ir/graph_to_program_pass_test.cc b/paddle/fluid/framework/ir/graph_to_program_pass_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..88ad17a0c65137d62484ec340f48e1a76ff893fe --- /dev/null +++ b/paddle/fluid/framework/ir/graph_to_program_pass_test.cc @@ -0,0 +1,110 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/framework/ir/graph_to_program_pass.h" + +#include +#include +#include "gtest/gtest.h" +#include "paddle/fluid/framework/program_desc.h" + +namespace paddle { +namespace framework { +namespace ir { + +void BuildNoCircleGraph(Graph* g) { + OpDesc op1; + op1.SetType("op1"); + OpDesc op2; + op2.SetType("op2"); + OpDesc op3; + op3.SetType("op3"); + OpDesc op4; + op4.SetType("op4"); + OpDesc op5; + op5.SetType("op5"); + VarDesc var1("var1"); + VarDesc var2("var2"); + VarDesc var3("var3"); + VarDesc var4("var4"); + + ir::Node* o1 = g->CreateOpNode(&op1); + ir::Node* o2 = g->CreateOpNode(&op2); + ir::Node* o3 = g->CreateOpNode(&op3); + ir::Node* o4 = g->CreateOpNode(&op4); + ir::Node* o5 = g->CreateOpNode(&op5); + ir::Node* v1 = g->CreateVarNode(&var1); + ir::Node* v2 = g->CreateVarNode(&var2); + ir::Node* v3 = g->CreateVarNode(&var3); + ir::Node* v4 = g->CreateVarNode(&var4); + + // o1->v1->o2 + o1->outputs.push_back(v1); + o2->inputs.push_back(v1); + v1->inputs.push_back(o1); + v1->outputs.push_back(o2); + // o2->v2->o3 + // o2->v2->o4 + o2->outputs.push_back(v2); + o3->inputs.push_back(v2); + o4->inputs.push_back(v2); + v2->outputs.push_back(o3); + v2->outputs.push_back(o4); + v2->inputs.push_back(o2); + // o2->v3->o5 + o2->outputs.push_back(v3); + o5->inputs.push_back(v3); + v3->inputs.push_back(o2); + v3->outputs.push_back(o5); + // o3-v4->o5 + o3->outputs.push_back(v4); + o5->inputs.push_back(v4); + v4->inputs.push_back(o3); + v4->outputs.push_back(o5); +} + +TEST(GraphToProgramPass, Basic) { + ProgramDesc prog; + std::unique_ptr g(new Graph(prog)); + BuildNoCircleGraph(g.get()); + + auto pass = paddle::framework::ir::PassRegistry::Instance().Get( + "graph_to_program_pass"); + + ProgramDesc compiled_prog; + pass->SetNotOwned("program", &compiled_prog); + pass->Apply(std::move(g)); + std::vector ops = compiled_prog.Block(0).AllOps(); + EXPECT_EQ(ops[0]->Type(), "op1"); + EXPECT_EQ(ops[1]->Type(), "op2"); + if (ops[2]->Type() == "op3") { + EXPECT_EQ(ops[3]->Type(), "op4"); + } else if (ops[2]->Type() == "op4") { + EXPECT_EQ(ops[3]->Type(), "op3"); + } + EXPECT_EQ(ops[4]->Type(), "op5"); + + std::unordered_set vars; + for (VarDesc* v : compiled_prog.Block(0).AllVars()) { + vars.insert(v->Name()); + } + EXPECT_TRUE(vars.find("var1") != vars.end()); + EXPECT_TRUE(vars.find("var2") != vars.end()); + EXPECT_TRUE(vars.find("var3") != vars.end()); +} +} // namespace ir +} // namespace framework +} // namespace paddle + +USE_PASS(graph_to_program_pass); diff --git a/paddle/fluid/framework/op_desc.cc b/paddle/fluid/framework/op_desc.cc index 122dc161b41246e5f08bd0ae8b763489e9ee22f9..59b6007284bd476d510d7fe2ef621fc239d17d99 100644 --- a/paddle/fluid/framework/op_desc.cc +++ b/paddle/fluid/framework/op_desc.cc @@ -132,7 +132,9 @@ OpDesc::OpDesc(const proto::OpDesc &desc, BlockDesc *block) std::string attr_name = attr.name(); // The sub_block referred to by the BLOCK attr hasn't been added // to ProgramDesc class yet, we skip setting BLOCK attr here. - if (attr.type() != proto::AttrType::BLOCK) { + // TODO(paddle-dev): Need copy fix this to copy Block as well. + if (attr.type() != proto::AttrType::BLOCK && + attr.type() != proto::AttrType::BLOCKS) { attrs_[attr_name] = GetAttrValue(attr); } } diff --git a/paddle/fluid/framework/program_desc.cc b/paddle/fluid/framework/program_desc.cc index 344c001a69b53c82967ee983783892a514c2490b..a63944eaee6132c1082947fddcad4e0d72e26df1 100644 --- a/paddle/fluid/framework/program_desc.cc +++ b/paddle/fluid/framework/program_desc.cc @@ -80,6 +80,12 @@ ProgramDesc::ProgramDesc(const proto::ProgramDesc &desc) { InitFromProto(); } +void ProgramDesc::CopyFrom(const proto::ProgramDesc &desc) { + blocks_.clear(); + desc_ = desc; + InitFromProto(); +} + ProgramDesc::ProgramDesc(const std::string &binary_str) { PADDLE_ENFORCE(desc_.ParseFromString(binary_str), "Fail to parse program_desc from binary string."); @@ -111,10 +117,16 @@ void ProgramDesc::InitFromProto() { const std::vector ProgramDesc::GetFeedTargetNames() { auto &global_block = Block(0); + // The order of feed_target_names must follow the index specified in `col`. + // since feed operator's order doesn't necessary follow 'col'. std::vector feed_target_names; for (auto *op : global_block.AllOps()) { if (op->Type() == kFeedOpType) { - feed_target_names.insert(feed_target_names.begin(), op->Output("Out")[0]); + int col = boost::get(op->GetAttr("col")); + if (col >= feed_target_names.size()) { + feed_target_names.resize(col + 1); + } + feed_target_names[col] = op->Output("Out")[0]; } } return feed_target_names; @@ -122,10 +134,16 @@ const std::vector ProgramDesc::GetFeedTargetNames() { const std::vector ProgramDesc::GetFetchTargetNames() { auto &global_block = Block(0); + // The order of fetch_target_names must follow the index specified in `col`. + // since fetch operator's order doesn't necessary follow 'col'. std::vector fetch_target_names; for (auto *op : global_block.AllOps()) { if (op->Type() == kFetchOpType) { - fetch_target_names.push_back(op->Input("X")[0]); + int col = boost::get(op->GetAttr("col")); + if (col >= fetch_target_names.size()) { + fetch_target_names.resize(col + 1); + } + fetch_target_names[col] = op->Input("X")[0]; } } return fetch_target_names; diff --git a/paddle/fluid/framework/program_desc.h b/paddle/fluid/framework/program_desc.h index f3afc85eb924e4b03b7597e043ffd4e267adc977..a0e81cade18c0ca5eb1b98fee8325ae2d917d1a2 100644 --- a/paddle/fluid/framework/program_desc.h +++ b/paddle/fluid/framework/program_desc.h @@ -53,6 +53,8 @@ class ProgramDesc { void Flush(); + void CopyFrom(const proto::ProgramDesc &desc); + proto::ProgramDesc *Proto(); // The output variable of feed_op is referenced as feed_target. diff --git a/paddle/fluid/inference/CMakeLists.txt b/paddle/fluid/inference/CMakeLists.txt index ba7645aa02413f28a648f35e381da7824604a455..a4f6364ae5b7d832096c92e9c6d8b3e865713cff 100644 --- a/paddle/fluid/inference/CMakeLists.txt +++ b/paddle/fluid/inference/CMakeLists.txt @@ -10,7 +10,7 @@ set(FLUID_CORE_MODULES proto_desc memory lod_tensor executor) # TODO(panyx0718): Should this be called paddle_fluid_inference_api_internal? cc_library(paddle_fluid_api SRCS io.cc - DEPS ${FLUID_CORE_MODULES} ${GLOB_OP_LIB}) + DEPS ${FLUID_CORE_MODULES} ${GLOB_OP_LIB} graph_to_program_pass) get_property(fluid_modules GLOBAL PROPERTY FLUID_MODULES) diff --git a/paddle/fluid/inference/tests/test_helper.h b/paddle/fluid/inference/tests/test_helper.h index 695790a37dce889e838462b401ca4e89f09271d5..94f0550df57e79fa68c135f5c9c4b7effe6ac156 100644 --- a/paddle/fluid/inference/tests/test_helper.h +++ b/paddle/fluid/inference/tests/test_helper.h @@ -18,6 +18,7 @@ limitations under the License. */ #include #include +#include "paddle/fluid/framework/ir/graph_to_program_pass.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/inference/io.h" #include "paddle/fluid/platform/profiler.h" @@ -135,6 +136,15 @@ std::vector> GetFeedTargetShapes( return feed_target_shapes; } +void Compile(paddle::framework::ProgramDesc* program) { + std::unique_ptr g( + new paddle::framework::ir::Graph(*program)); + auto pass = paddle::framework::ir::PassRegistry::Instance().Get( + "graph_to_program_pass"); + pass->SetNotOwned("program", program); + pass->Apply(std::move(g)); +} + template void TestInference(const std::string& dirname, const std::vector& cpu_feeds, @@ -172,6 +182,8 @@ void TestInference(const std::string& dirname, paddle::platform::DeviceContextPool::Instance().Get(place)); inference_program = InitProgram(&executor, scope, dirname, is_combined); } + Compile(inference_program.get()); + // Disable the profiler and print the timing information paddle::platform::DisableProfiler(paddle::platform::EventSortingKey::kDefault, "load_program_profiler"); @@ -249,3 +261,5 @@ void TestInference(const std::string& dirname, delete scope; } + +USE_PASS(graph_to_program_pass); diff --git a/paddle/fluid/operators/parallel_do_op.cc b/paddle/fluid/operators/parallel_do_op.cc index eb09470f37eabb5524f774bc289fc68f5884c540..97c36a83fc5eff421725d05f66fca05f5169d1bb 100644 --- a/paddle/fluid/operators/parallel_do_op.cc +++ b/paddle/fluid/operators/parallel_do_op.cc @@ -355,6 +355,7 @@ class ParallelDoGradOpDescMaker : public framework::SingleGradOpDescMaker { grad->SetInput(framework::GradVarName(output_param), og_names); } } + grad->SetInput("Communicator", {"nccl_com__do_not_change_"}); grad->SetAttrMap(this->Attrs()); grad->SetBlockAttr(kParallelBlock, grad_block_[0]); diff --git a/python/paddle/fluid/tests/book_memory_optimization/test_memopt_image_classification_train.py b/python/paddle/fluid/tests/book_memory_optimization/test_memopt_image_classification_train.py index 3951e7b8ca649b63eea4b311f6205a6c7d761804..a231bbfbc8d5712275c92b4d27580016825ea91b 100644 --- a/python/paddle/fluid/tests/book_memory_optimization/test_memopt_image_classification_train.py +++ b/python/paddle/fluid/tests/book_memory_optimization/test_memopt_image_classification_train.py @@ -125,8 +125,8 @@ opts = optimizer.minimize(avg_cost) batch_size = fluid.layers.create_tensor(dtype='int64') batch_acc = fluid.layers.accuracy(input=predict, label=label, total=batch_size) -# fluid.memory_optimize(fluid.default_main_program(), level=0) -fluid.release_memory(fluid.default_main_program()) +fluid.memory_optimize(fluid.default_main_program(), level=0) +# fluid.release_memory(fluid.default_main_program()) BATCH_SIZE = 16 PASS_NUM = 1 diff --git a/python/paddle/fluid/tests/book_memory_optimization/test_memopt_machine_translation.py b/python/paddle/fluid/tests/book_memory_optimization/test_memopt_machine_translation.py index 1ad51936b5b8f7c5149452d6033754a570c72654..e520c8965089263d1ba10a6057acda1a53cc34a9 100644 --- a/python/paddle/fluid/tests/book_memory_optimization/test_memopt_machine_translation.py +++ b/python/paddle/fluid/tests/book_memory_optimization/test_memopt_machine_translation.py @@ -92,8 +92,8 @@ def main(): optimizer = fluid.optimizer.Adagrad(learning_rate=1e-4) optimizer.minimize(avg_cost) - # fluid.memory_optimize(fluid.default_main_program()) - fluid.release_memory(fluid.default_main_program()) + fluid.memory_optimize(fluid.default_main_program()) + # fluid.release_memory(fluid.default_main_program()) # fix the order of training data train_data = paddle.batch(