From 9f252e0032af38b064ba049993d8b59ccc9bce3a Mon Sep 17 00:00:00 2001 From: Yan Chunwei Date: Wed, 14 Nov 2018 09:58:44 +0800 Subject: [PATCH] Combine Inference Analysis with IR (#13914) --- cmake/inference_lib.cmake | 6 +- cmake/tensorrt.cmake | 1 + paddle/fluid/framework/executor.cc | 1 + paddle/fluid/framework/ir/CMakeLists.txt | 4 +- .../framework/ir/attention_lstm_fuse_pass.cc | 8 +- paddle/fluid/framework/ir/graph.cc | 2 - paddle/fluid/framework/ir/graph.h | 20 +- .../framework/ir/graph_pattern_detector.h | 4 +- .../framework/ir/graph_to_program_pass.cc | 3 +- paddle/fluid/framework/ir/graph_traits.cc | 70 +++ paddle/fluid/framework/ir/graph_traits.h | 34 ++ paddle/fluid/framework/ir/node.cc | 1 - paddle/fluid/framework/ir/node.h | 19 +- paddle/fluid/framework/ir/pass.h | 1 + paddle/fluid/framework/naive_executor.cc | 68 ++- paddle/fluid/framework/naive_executor.h | 12 +- paddle/fluid/framework/naive_executor_test.cc | 2 +- paddle/fluid/framework/scope.cc | 82 ++- paddle/fluid/framework/scope.h | 11 +- paddle/fluid/inference/CMakeLists.txt | 8 +- .../fluid/inference/analysis/CMakeLists.txt | 43 +- .../fluid/inference/analysis/analysis_pass.h | 30 +- paddle/fluid/inference/analysis/analyzer.cc | 131 +---- paddle/fluid/inference/analysis/analyzer.h | 41 +- .../inference/analysis/analyzer_tester.cc | 16 +- paddle/fluid/inference/analysis/argument.h | 159 +++--- .../inference/analysis/data_flow_graph.cc | 496 ------------------ .../inference/analysis/data_flow_graph.h | 209 -------- .../analysis/data_flow_graph_tester.cc | 168 ------ .../analysis/data_flow_graph_to_fluid_pass.cc | 285 ---------- .../analysis/data_flow_graph_to_fluid_pass.h | 59 --- .../data_flow_graph_to_fluid_pass_tester.cc | 48 -- .../analysis/dfg_graphviz_draw_pass.cc | 59 --- .../analysis/dfg_graphviz_draw_pass.h | 78 --- .../analysis/dfg_graphviz_draw_pass_tester.cc | 54 -- paddle/fluid/inference/analysis/dot_tester.cc | 1 - .../analysis/fluid_to_data_flow_graph_pass.cc | 76 --- .../analysis/fluid_to_data_flow_graph_pass.h | 57 -- .../fluid_to_data_flow_graph_pass_tester.cc | 38 -- .../inference/analysis/fluid_to_ir_pass.cc | 60 --- .../inference/analysis/fluid_to_ir_pass.h | 128 ----- .../fluid/inference/analysis/graph_traits.cc | 15 - .../fluid/inference/analysis/graph_traits.h | 63 --- paddle/fluid/inference/analysis/helper.h | 10 +- .../inference/analysis/ir_pass_manager.cc | 70 ++- .../inference/analysis/ir_pass_manager.h | 19 +- .../analysis/ir_passes/CMakeLists.txt | 7 + .../subgraph_detector.cc} | 279 ++++++++-- .../analysis/ir_passes/subgraph_detector.h | 182 +++++++ .../ir_passes/tensorrt_subgraph_pass.cc | 220 ++++++++ .../tensorrt_subgraph_pass.h} | 31 +- .../inference/analysis/model_store_pass.cc | 67 --- paddle/fluid/inference/analysis/node.cc | 70 --- paddle/fluid/inference/analysis/node.h | 244 --------- .../fluid/inference/analysis/node_tester.cc | 55 -- .../fluid/inference/analysis/pass_manager.cc | 47 -- .../fluid/inference/analysis/pass_manager.h | 94 ---- .../inference/analysis/pass_manager_tester.cc | 54 -- .../inference/analysis/passes/CMakeLists.txt | 9 + .../passes/ir_analysis_compose_pass.cc | 83 +++ .../ir_analysis_compose_pass.h} | 39 +- .../analysis/passes/ir_analysis_pass.cc | 43 ++ .../ir_analysis_pass.h} | 23 +- .../analysis/passes/ir_graph_build_pass.cc | 73 +++ .../analysis/passes/ir_graph_build_pass.h | 46 ++ .../fluid/inference/analysis/passes/passes.cc | 34 ++ .../passes.h} | 30 +- .../inference/analysis/subgraph_splitter.h | 88 ---- .../analysis/subgraph_splitter_tester.cc | 92 ---- .../tensorrt_subgraph_node_mark_pass.cc | 80 --- .../tensorrt_subgraph_node_mark_pass.h | 60 --- ...tensorrt_subgraph_node_mark_pass_tester.cc | 50 -- .../analysis/tensorrt_subgraph_pass.cc | 36 -- .../analysis/tensorrt_subgraph_pass.h | 57 -- .../analysis/tensorrt_subgraph_pass_tester.cc | 73 --- paddle/fluid/inference/analysis/ut_helper.h | 25 - paddle/fluid/inference/api/CMakeLists.txt | 27 +- paddle/fluid/inference/api/README.md | 18 +- paddle/fluid/inference/api/analysis_config.cc | 103 ++++ .../fluid/inference/api/analysis_predictor.cc | 286 +++++++--- .../fluid/inference/api/analysis_predictor.h | 29 +- .../api/analysis_predictor_tester.cc | 127 ++++- paddle/fluid/inference/api/api.cc | 1 + .../fluid/inference/api/api_anakin_engine.h | 4 +- paddle/fluid/inference/api/api_impl_tester.cc | 9 +- .../api/api_tensorrt_subgraph_engine.cc | 188 ------- .../api_tensorrt_subgraph_engine_tester.cc | 92 ---- .../api/demo_ci/simple_on_word2vec.cc | 2 +- .../api/demo_ci/trt_mobilenet_demo.cc | 7 +- .../fluid/inference/api/demo_ci/vis_demo.cc | 12 +- .../inference/api/details/zero_copy_tensor.cc | 10 +- .../api/details/zero_copy_tensor_dummy.cc | 10 +- paddle/fluid/inference/api/helper.h | 65 +++ .../paddle_anakin_config.h} | 34 +- .../inference/api/paddle_analysis_config.h | 77 +++ paddle/fluid/inference/api/paddle_api.h | 220 ++++++++ .../inference/api/paddle_inference_api.h | 268 +--------- .../inference/api/paddle_pass_builder.cc | 68 +++ .../fluid/inference/api/paddle_pass_builder.h | 131 +++++ paddle/fluid/inference/tensorrt/engine.cc | 1 + .../fluid/inference/tests/api/CMakeLists.txt | 7 +- .../tests/api/analyzer_resnet50_tester.cc | 9 +- .../tests/api/analyzer_rnn1_tester.cc | 113 +--- .../analyzer_text_classification_tester.cc | 4 +- .../tests/api/analyzer_vis_tester.cc | 13 +- .../fluid/inference/tests/api/tester_helper.h | 45 +- .../inference/tests/api/trt_models_tester.cc | 106 ++-- paddle/fluid/operators/load_op.cc | 5 +- paddle/fluid/operators/mul_op.cc | 3 +- 109 files changed, 2722 insertions(+), 4433 deletions(-) delete mode 100644 paddle/fluid/inference/analysis/data_flow_graph.cc delete mode 100644 paddle/fluid/inference/analysis/data_flow_graph.h delete mode 100644 paddle/fluid/inference/analysis/data_flow_graph_tester.cc delete mode 100644 paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.cc delete mode 100644 paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.h delete mode 100644 paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass_tester.cc delete mode 100644 paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.cc delete mode 100644 paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.h delete mode 100644 paddle/fluid/inference/analysis/dfg_graphviz_draw_pass_tester.cc delete mode 100644 paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.cc delete mode 100644 paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.h delete mode 100644 paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass_tester.cc delete mode 100644 paddle/fluid/inference/analysis/fluid_to_ir_pass.cc delete mode 100644 paddle/fluid/inference/analysis/fluid_to_ir_pass.h delete mode 100644 paddle/fluid/inference/analysis/graph_traits.cc delete mode 100644 paddle/fluid/inference/analysis/graph_traits.h create mode 100644 paddle/fluid/inference/analysis/ir_passes/CMakeLists.txt rename paddle/fluid/inference/analysis/{subgraph_splitter.cc => ir_passes/subgraph_detector.cc} (54%) create mode 100644 paddle/fluid/inference/analysis/ir_passes/subgraph_detector.h create mode 100644 paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc rename paddle/fluid/inference/analysis/{model_store_pass_tester.cc => ir_passes/tensorrt_subgraph_pass.h} (55%) delete mode 100644 paddle/fluid/inference/analysis/model_store_pass.cc delete mode 100644 paddle/fluid/inference/analysis/node.cc delete mode 100644 paddle/fluid/inference/analysis/node.h delete mode 100644 paddle/fluid/inference/analysis/node_tester.cc delete mode 100644 paddle/fluid/inference/analysis/pass_manager.cc delete mode 100644 paddle/fluid/inference/analysis/pass_manager.h delete mode 100644 paddle/fluid/inference/analysis/pass_manager_tester.cc create mode 100644 paddle/fluid/inference/analysis/passes/CMakeLists.txt create mode 100644 paddle/fluid/inference/analysis/passes/ir_analysis_compose_pass.cc rename paddle/fluid/inference/analysis/{model_store_pass.h => passes/ir_analysis_compose_pass.h} (53%) create mode 100644 paddle/fluid/inference/analysis/passes/ir_analysis_pass.cc rename paddle/fluid/inference/analysis/{node_attr_flags.h => passes/ir_analysis_pass.h} (70%) create mode 100644 paddle/fluid/inference/analysis/passes/ir_graph_build_pass.cc create mode 100644 paddle/fluid/inference/analysis/passes/ir_graph_build_pass.h create mode 100644 paddle/fluid/inference/analysis/passes/passes.cc rename paddle/fluid/inference/analysis/{fluid_to_ir_pass_tester.cc => passes/passes.h} (61%) delete mode 100644 paddle/fluid/inference/analysis/subgraph_splitter.h delete mode 100644 paddle/fluid/inference/analysis/subgraph_splitter_tester.cc delete mode 100644 paddle/fluid/inference/analysis/tensorrt_subgraph_node_mark_pass.cc delete mode 100644 paddle/fluid/inference/analysis/tensorrt_subgraph_node_mark_pass.h delete mode 100644 paddle/fluid/inference/analysis/tensorrt_subgraph_node_mark_pass_tester.cc delete mode 100644 paddle/fluid/inference/analysis/tensorrt_subgraph_pass.cc delete mode 100644 paddle/fluid/inference/analysis/tensorrt_subgraph_pass.h delete mode 100644 paddle/fluid/inference/analysis/tensorrt_subgraph_pass_tester.cc create mode 100644 paddle/fluid/inference/api/analysis_config.cc delete mode 100644 paddle/fluid/inference/api/api_tensorrt_subgraph_engine.cc delete mode 100644 paddle/fluid/inference/api/api_tensorrt_subgraph_engine_tester.cc rename paddle/fluid/inference/{analysis/analyzer_main.cc => api/paddle_anakin_config.h} (56%) create mode 100644 paddle/fluid/inference/api/paddle_analysis_config.h create mode 100644 paddle/fluid/inference/api/paddle_api.h create mode 100644 paddle/fluid/inference/api/paddle_pass_builder.cc create mode 100644 paddle/fluid/inference/api/paddle_pass_builder.h diff --git a/cmake/inference_lib.cmake b/cmake/inference_lib.cmake index efdb093a7b..3cc1e028e7 100644 --- a/cmake/inference_lib.cmake +++ b/cmake/inference_lib.cmake @@ -164,7 +164,7 @@ endif() set(module "inference") copy(inference_lib DEPS ${inference_deps} SRCS ${src_dir}/${module}/*.h ${PADDLE_BINARY_DIR}/paddle/fluid/inference/libpaddle_fluid.* - ${src_dir}/${module}/api/paddle_inference_api.h + ${src_dir}/${module}/api/paddle_*.h ${PADDLE_BINARY_DIR}/paddle/fluid/inference/api/paddle_inference_pass.h DSTS ${dst_dir}/${module} ${dst_dir}/${module} ${dst_dir}/${module} ${dst_dir}/${module} ) @@ -202,10 +202,10 @@ copy(third_party DEPS fluid_lib_dist DSTS ${FLUID_INFERENCE_INSTALL_DIR} ${FLUID_INFERENCE_INSTALL_DIR} ) -# only need libpaddle_fluid.so/a and paddle_inference_api.h for inference-only library +# only need libpaddle_fluid.so/a and paddle_*.h for inference-only library copy(inference_api_lib DEPS fluid_lib_dist SRCS ${FLUID_INSTALL_DIR}/paddle/fluid/inference/libpaddle_fluid.* - ${FLUID_INSTALL_DIR}/paddle/fluid/inference/paddle_inference_api.h + ${FLUID_INSTALL_DIR}/paddle/fluid/inference/paddle_*.h DSTS ${FLUID_INFERENCE_INSTALL_DIR}/paddle/lib ${FLUID_INFERENCE_INSTALL_DIR}/paddle/include ) diff --git a/cmake/tensorrt.cmake b/cmake/tensorrt.cmake index fa0e834a1d..3dc7171551 100644 --- a/cmake/tensorrt.cmake +++ b/cmake/tensorrt.cmake @@ -34,4 +34,5 @@ if(TENSORRT_FOUND) "Current TensorRT version is v${TENSORRT_MAJOR_VERSION}. ") include_directories(${TENSORRT_INCLUDE_DIR}) list(APPEND EXTERNAL_LIBS ${TENSORRT_LIBRARY}) + add_definitions(-DPADDLE_WITH_TENSORRT) endif() diff --git a/paddle/fluid/framework/executor.cc b/paddle/fluid/framework/executor.cc index fc6b325286..47a221a944 100644 --- a/paddle/fluid/framework/executor.cc +++ b/paddle/fluid/framework/executor.cc @@ -359,6 +359,7 @@ std::vector> Executor::Prepare( void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope, bool create_local_scope, bool create_vars, bool keep_kids) { + PADDLE_ENFORCE_NOT_NULL(scope); Scope* local_scope = scope; if (create_vars) { if (create_local_scope) { diff --git a/paddle/fluid/framework/ir/CMakeLists.txt b/paddle/fluid/framework/ir/CMakeLists.txt index 4cf973253c..504f7e6d6c 100644 --- a/paddle/fluid/framework/ir/CMakeLists.txt +++ b/paddle/fluid/framework/ir/CMakeLists.txt @@ -5,6 +5,7 @@ file(APPEND ${pass_file} "\#include \"paddle/fluid/framework/ir/pass.h\"\n") # Usage: pass_library(target inference) will append to paddle_inference_pass.h +unset(INFER_IR_PASSES CACHE) # clear the global variable function(pass_library TARGET DEST) set(options "") set(oneValueArgs "") @@ -15,10 +16,11 @@ function(pass_library TARGET DEST) if (${DEST} STREQUAL "base" OR ${DEST} STREQUAL "inference") message(STATUS "add pass ${TARGET} ${DEST}") file(APPEND ${pass_file} "USE_PASS(${TARGET});\n") - set(PASS_LIBRARY ${TARGET} ${PASS_LIBRARY} PARENT_SCOPE) + set(INFER_IR_PASSES ${INFER_IR_PASSES} ${TARGET} CACHE INTERNAL "") endif() endfunction() + cc_library(node SRCS node.cc DEPS proto_desc) cc_library(graph SRCS graph.cc DEPS node pretty_log) cc_library(graph_helper SRCS graph_helper.cc DEPS graph) diff --git a/paddle/fluid/framework/ir/attention_lstm_fuse_pass.cc b/paddle/fluid/framework/ir/attention_lstm_fuse_pass.cc index 6b284b1c1a..8668007da1 100644 --- a/paddle/fluid/framework/ir/attention_lstm_fuse_pass.cc +++ b/paddle/fluid/framework/ir/attention_lstm_fuse_pass.cc @@ -91,10 +91,10 @@ void FindWhileOp(Graph* graph) { #undef OP_SET_IN #undef OP_SET_OUT - auto* X = graph->RetriveNode(34); - auto* LSTMOUT = graph->RetriveNode(81); - auto* cell_init = graph->RetriveNode(6); - auto* hidden_init = graph->RetriveNode(8); + auto* X = graph->RetrieveNode(34); + auto* LSTMOUT = graph->RetrieveNode(81); + auto* cell_init = graph->RetrieveNode(6); + auto* hidden_init = graph->RetrieveNode(8); auto* lstm_op = graph->CreateOpNode(&op_desc); PrepareParameters(graph, param); diff --git a/paddle/fluid/framework/ir/graph.cc b/paddle/fluid/framework/ir/graph.cc index a2a8baa5e4..ae0e42ff5e 100644 --- a/paddle/fluid/framework/ir/graph.cc +++ b/paddle/fluid/framework/ir/graph.cc @@ -84,8 +84,6 @@ void CheckProgram(const ProgramDesc &program) { Graph::Graph(const ProgramDesc &program) : program_(program) { CheckProgram(program_); - // Make the nodes id start from 0. - Node::ResetId(); auto var_nodes = InitFromProgram(program_); ResolveHazard(var_nodes); } diff --git a/paddle/fluid/framework/ir/graph.h b/paddle/fluid/framework/ir/graph.h index 6384d89d2f..0c856f8e61 100644 --- a/paddle/fluid/framework/ir/graph.h +++ b/paddle/fluid/framework/ir/graph.h @@ -116,13 +116,17 @@ class Graph { // Create a normal variable with non-null VarDesc. ir::Node *CreateVarNode(VarDesc *var_desc) { PADDLE_ENFORCE(var_desc); - return AddNode(new ir::Node(var_desc)); + auto *x = AddNode(new ir::Node(var_desc)); + x->SetId(num_node_created_++); + return x; } // Create a normal runnable operator with OpDesc. ir::Node *CreateOpNode(OpDesc *op_desc) { PADDLE_ENFORCE(op_desc); - return AddNode(new ir::Node(op_desc)); + auto *x = AddNode(new ir::Node(op_desc)); + x->SetId(num_node_created_++); + return x; } // Create a control dependency var that connects 2 operations. The @@ -132,13 +136,17 @@ class Graph { // TODO(panyx0718): control var name should be really unique. const std::string name = string::Sprintf( "%s@%llu", ir::Node::kControlDepVarName, node_set_.size()); - return AddNode(new ir::Node(name, ir::Node::Type::kVariable)); + auto *x = AddNode(new ir::Node(name, ir::Node::Type::kVariable)); + x->SetId(num_node_created_++); + return x; } // A more free style way of creating a graph node. Mostly use for test // or "copy" from another node. Avoid using it if possible. ir::Node *CreateEmptyNode(const std::string &name, ir::Node::Type type) { - return AddNode(new ir::Node(name, type)); + auto *x = AddNode(new ir::Node(name, type)); + x->SetId(num_node_created_++); + return x; } // Clear all node information of the graph and return the ownership of the @@ -160,7 +168,7 @@ class Graph { } // NOTE low performance, but simple and secure. - Node *RetriveNode(int id) { + Node *RetrieveNode(int id) { for (auto &node : nodes_) { if (node.second->id() == id) { return node.second.get(); @@ -169,6 +177,7 @@ class Graph { return nullptr; } + const ProgramDesc &program() const { return program_; } std::map> InitFromProgram( const ProgramDesc &program); @@ -190,6 +199,7 @@ class Graph { std::map> attr_dels_; std::map> nodes_; std::unordered_set node_set_; + size_t num_node_created_{0}; // help to generate a unique node id. }; bool IsControlDepVar(const ir::Node &var); diff --git a/paddle/fluid/framework/ir/graph_pattern_detector.h b/paddle/fluid/framework/ir/graph_pattern_detector.h index 9e462ac671..1c5155df78 100644 --- a/paddle/fluid/framework/ir/graph_pattern_detector.h +++ b/paddle/fluid/framework/ir/graph_pattern_detector.h @@ -310,8 +310,8 @@ void GraphSafeRemoveNodes(Graph* graph, const std::unordered_set& nodes); // Some pre-defined patterns those can be reused in multiple passes. -// The related Fluid Layer or Op should be one pattern here for better reusage -// accross different fusion. +// The related Fluid Layer or Op should be one pattern here for better re-usage +// across different fusion. namespace patterns { struct KeyCounter { diff --git a/paddle/fluid/framework/ir/graph_to_program_pass.cc b/paddle/fluid/framework/ir/graph_to_program_pass.cc index 414d8f79b1..36f3693326 100644 --- a/paddle/fluid/framework/ir/graph_to_program_pass.cc +++ b/paddle/fluid/framework/ir/graph_to_program_pass.cc @@ -35,10 +35,11 @@ std::unique_ptr GraphToProgramPass::ApplyImpl( new proto::ProgramDesc(*program.Proto())); auto block = program_pb->mutable_blocks(kRootBlockIndex); + block->set_idx(kRootBlockIndex); block->clear_vars(); std::unordered_set visited_vars; for (ir::Node* n : graph->Nodes()) { - if (n->NodeType() == ir::Node::Type::kVariable) { + if (n->IsVar()) { if (n->Var() && visited_vars.count(n->Var()->Name()) == 0) { visited_vars.insert(n->Var()->Name()); block->add_vars()->MergeFrom(*n->Var()->Proto()); diff --git a/paddle/fluid/framework/ir/graph_traits.cc b/paddle/fluid/framework/ir/graph_traits.cc index 084a4ba2de..2ee12cc410 100644 --- a/paddle/fluid/framework/ir/graph_traits.cc +++ b/paddle/fluid/framework/ir/graph_traits.cc @@ -66,6 +66,76 @@ NodesDFSIterator &NodesDFSIterator::operator=(const NodesDFSIterator &other) { } Node *NodesDFSIterator::operator->() { return stack_.top(); } +inline bool CheckNodeIndegreeEquals(const Node &node, size_t n) { + return node.inputs.size() == n; +} + +NodesTSIterator::NodesTSIterator(const std::vector &source) { + PADDLE_ENFORCE(!source.empty(), + "Start points of topological sorting should not be empty!"); + // CHECK all the inputs' in-degree is 0 + for (auto *node : source) { + PADDLE_ENFORCE(CheckNodeIndegreeEquals(*node, 0)); + } + + std::unordered_set visited; + std::unordered_set to_visit{source.begin(), source.end()}; + + std::vector inlink_visited; + while (!to_visit.empty()) { + std::vector queue(to_visit.begin(), to_visit.end()); + for (auto *p : queue) { + inlink_visited.clear(); + + std::copy_if(p->inputs.begin(), p->inputs.end(), + std::back_inserter(inlink_visited), + [&](Node *x) -> bool { return visited.count(x) != 0; }); + + if (inlink_visited.size() == p->inputs.size()) { + sorted_.push_back(p); + for (auto *_ : p->outputs) { + if (!visited.count(_)) { + to_visit.insert(_); + } + } + + to_visit.erase(p); + visited.insert(p); + } + } + } +} + +NodesTSIterator::NodesTSIterator(const NodesTSIterator &other) + : sorted_(other.sorted_), cursor_(other.cursor_) {} + +Node &NodesTSIterator::operator*() { + PADDLE_ENFORCE_LT(cursor_, sorted_.size()); + return *sorted_[cursor_]; +} + +NodesTSIterator &NodesTSIterator::operator++() { + if (++cursor_ >= sorted_.size()) { + sorted_.clear(); + cursor_ = 0; + } + return *this; +} +NodesTSIterator &NodesTSIterator::operator=(const NodesTSIterator &other) { + cursor_ = other.cursor_; + sorted_ = other.sorted_; + return *this; +} + +bool NodesTSIterator::operator==(const NodesTSIterator &other) { + return sorted_ == other.sorted_ && cursor_ == other.cursor_; +} + +Node *NodesTSIterator::operator->() { + PADDLE_ENFORCE_LT(cursor_, sorted_.size()); + return sorted_[cursor_]; +} + } // namespace ir } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/ir/graph_traits.h b/paddle/fluid/framework/ir/graph_traits.h index f42bab20ed..f6772f9a37 100644 --- a/paddle/fluid/framework/ir/graph_traits.h +++ b/paddle/fluid/framework/ir/graph_traits.h @@ -62,6 +62,32 @@ struct NodesDFSIterator std::unordered_set visited_; }; +// Topological sorting iterator on nodes. +struct NodesTSIterator + : public std::iterator { + NodesTSIterator() = default; + NodesTSIterator(const std::vector &source); + NodesTSIterator(NodesTSIterator &&other) + : sorted_(std::move(other.sorted_)), cursor_(other.cursor_) { + other.cursor_ = 0; + } + NodesTSIterator(const NodesTSIterator &other); + + Node &operator*(); + NodesTSIterator &operator++(); + // TODO(Superjomn) current implementation just compare the first + // element, need to compare the graph and all the elements in the queue and + // set. + NodesTSIterator &operator=(const NodesTSIterator &other); + bool operator==(const NodesTSIterator &other); + bool operator!=(const NodesTSIterator &other) { return !(*this == other); } + Node *operator->(); + + private: + std::vector sorted_; + size_t cursor_{0}; +}; + /* * GraphTraits contains some graph traversal algorithms. * @@ -76,6 +102,14 @@ struct GraphTraits { NodesDFSIterator()); } + static iterator_range TS(const Graph &g) { + auto start_points = ExtractStartPoints(g); + PADDLE_ENFORCE(!start_points.empty()); + NodesTSIterator x(start_points); + return iterator_range(NodesTSIterator(start_points), + NodesTSIterator()); + } + private: // The nodes those have no input will be treated as start points. static std::vector ExtractStartPoints(const Graph &g) { diff --git a/paddle/fluid/framework/ir/node.cc b/paddle/fluid/framework/ir/node.cc index 9277abe8c1..fe5d27bc4f 100644 --- a/paddle/fluid/framework/ir/node.cc +++ b/paddle/fluid/framework/ir/node.cc @@ -18,7 +18,6 @@ namespace paddle { namespace framework { namespace ir { constexpr char Node::kControlDepVarName[]; -int Node::count_ = 0; std::unique_ptr CreateNodeForTest(const std::string& name, Node::Type type) { diff --git a/paddle/fluid/framework/ir/node.h b/paddle/fluid/framework/ir/node.h index eedb375cf4..594bfc7363 100644 --- a/paddle/fluid/framework/ir/node.h +++ b/paddle/fluid/framework/ir/node.h @@ -115,37 +115,30 @@ class Node { int id_; private: + // ID can only set by a Graph. + void SetId(int id) { id_ = id; } + friend class Graph; friend std::unique_ptr CreateNodeForTest(const std::string& name, Node::Type type); explicit Node(const std::string& name, Type type) - : name_(name), - var_desc_(nullptr), - op_desc_(nullptr), - type_(type), - id_(count_++) {} + : name_(name), var_desc_(nullptr), op_desc_(nullptr), type_(type) {} explicit Node(VarDesc* var_desc) : name_(var_desc->Name()), var_desc_(new VarDesc(*var_desc)), op_desc_(nullptr), - type_(Type::kVariable), - id_(count_++) {} + type_(Type::kVariable) {} explicit Node(OpDesc* op_desc) : name_(op_desc->Type()), var_desc_(nullptr), op_desc_(new OpDesc(*op_desc, op_desc->Block())), - type_(Type::kOperation), - id_(count_++) {} + type_(Type::kOperation) {} Node() = delete; - static int count_; - // Please don't use this API or make this public. - static void ResetId() { count_ = 0; } - boost::any wrapper_; std::function wrapper_deleter_; std::type_index wrapper_type_ = std::type_index(typeid(void)); diff --git a/paddle/fluid/framework/ir/pass.h b/paddle/fluid/framework/ir/pass.h index 8ac8d7677e..e38c7ee192 100644 --- a/paddle/fluid/framework/ir/pass.h +++ b/paddle/fluid/framework/ir/pass.h @@ -93,6 +93,7 @@ class Pass { protected: virtual std::unique_ptr ApplyImpl(std::unique_ptr graph) const { LOG(FATAL) << "Calling virtual Pass not implemented."; + return graph; } private: diff --git a/paddle/fluid/framework/naive_executor.cc b/paddle/fluid/framework/naive_executor.cc index 8e660f97f0..c384456b64 100644 --- a/paddle/fluid/framework/naive_executor.cc +++ b/paddle/fluid/framework/naive_executor.cc @@ -57,60 +57,58 @@ static void InitializeVariable(Variable *var, proto::VarType::Type var_type) { } } -void NaiveExecutor::Prepare(Scope *parent_scope, - const ProgramDesc &program_desc, int block_id, - bool with_feed_fetch_ops) { - if (!parent_scope) { +void NaiveExecutor::Prepare(Scope *scope, const ProgramDesc &program_desc, + int block_id, bool with_feed_fetch_ops) { + if (!scope) { scope_ = new framework::Scope; } else { - scope_ = &parent_scope->NewScope(); + scope_ = scope; } - CreateVariables(program_desc, scope_, block_id); + + VLOG(3) << "NaiveExecutor init with scope " << scope; CreateOps(program_desc, block_id, with_feed_fetch_ops); } void NaiveExecutor::Run() { for (auto &op : ops_) { - VLOG(40) << "run " << op->Type(); + VLOG(3) << std::this_thread::get_id() << " run " << op->Type() + << " on scope " << scope_; op->Run(*scope_, place_); } } -void NaiveExecutor::CreateVariables(const ProgramDesc &desc, Scope *scope, - int block_id) { - PADDLE_ENFORCE(scope); +void NaiveExecutor::CreateVariables(const ProgramDesc &desc, int block_id, + bool persistable, Scope *scope) { + PADDLE_ENFORCE_NOT_NULL(scope); + auto &global_block = desc.Block(block_id); - const Scope *ancestor_scope = scope; - while (ancestor_scope->parent()) { - ancestor_scope = ancestor_scope->parent(); + const auto *anc = scope; + PADDLE_ENFORCE(anc->parent() != anc); + while (anc->parent()) { + anc = anc->parent(); } - if (ancestor_scope != scope) { - for (auto &var : global_block.AllVars()) { - if (var->Name() == framework::kEmptyVarName) { - continue; - } - // Create persistable vars in ancestor scope. - if (var->Persistable()) { - auto *ptr = const_cast(ancestor_scope)->Var(var->Name()); - InitializeVariable(ptr, var->GetType()); - VLOG(30) << "Create Variable " << var->Name() - << " global, which pointer is " << ptr; - } else { // Create temporary variables in local scope. - auto *ptr = scope->Var(var->Name()); + for (auto &var : global_block.AllVars()) { + if (var->Name() == framework::kEmptyVarName) { + continue; + } + + if (persistable == var->Persistable()) { + if (persistable) { + if (!anc->FindVar(var->Name())) { + auto *ptr = const_cast(anc)->Var(var->Name()); + VLOG(3) << scope << " Create persistable variable " << var->Name() + << ", which pointer is " << ptr; + InitializeVariable(ptr, var->GetType()); + } + } else { + auto *ptr = const_cast(scope)->Var(var->Name()); + VLOG(3) << scope << " Create variable " << var->Name() + << ", which pointer is " << ptr; InitializeVariable(ptr, var->GetType()); - VLOG(30) << "Create Variable " << var->Name() - << " locally, which pointer is " << ptr; } } - } else { - for (auto &var : global_block.AllVars()) { - auto *ptr = scope->Var(var->Name()); - InitializeVariable(ptr, var->GetType()); - VLOG(30) << "Create variable " << var->Name() << ", which pointer is " - << ptr; - } } } diff --git a/paddle/fluid/framework/naive_executor.h b/paddle/fluid/framework/naive_executor.h index ddfa6e1f4d..5e673f6857 100644 --- a/paddle/fluid/framework/naive_executor.h +++ b/paddle/fluid/framework/naive_executor.h @@ -35,8 +35,14 @@ class NaiveExecutor { // Create child scope. // Create variables. // @with_feed_fetch_ops: whether to work with the feed and fetch operators. - void Prepare(Scope* parent_scope, const ProgramDesc& program_desc, - int block_id, bool with_feed_fetch_ops); + void Prepare(Scope* scope, const ProgramDesc& program_desc, int block_id, + bool with_feed_fetch_ops); + + // Create variables before head. + // Create parameters if persistable is ture, or create the temporary variables + // instead. + void CreateVariables(const ProgramDesc& desc, int block_id, bool persistable, + Scope* scope); // Run all the operators. void Run(); @@ -49,8 +55,6 @@ class NaiveExecutor { void CleanFeedFetchOps(); protected: - void CreateVariables(const ProgramDesc& desc, Scope* scope, int block_id); - void CreateOps(const ProgramDesc& desc, int block_id, bool with_feed_fetch_ops); diff --git a/paddle/fluid/framework/naive_executor_test.cc b/paddle/fluid/framework/naive_executor_test.cc index 6b9f79b9d3..c917630666 100644 --- a/paddle/fluid/framework/naive_executor_test.cc +++ b/paddle/fluid/framework/naive_executor_test.cc @@ -39,7 +39,7 @@ TEST(NaiveExecutor, Basic) { auto place = platform::CPUPlace(); NaiveExecutor exe(place); - exe.Prepare(nullptr, program, 0, false /*with feed fetch ops*/); + exe.Prepare(nullptr, program, 0, false); auto* a_tensor = exe.FindTensor("a"); auto* b_tensor = exe.FindTensor("b"); auto* c_tensor = exe.FindTensor("c"); diff --git a/paddle/fluid/framework/scope.cc b/paddle/fluid/framework/scope.cc index 0c407f8c1d..bbeef15025 100644 --- a/paddle/fluid/framework/scope.cc +++ b/paddle/fluid/framework/scope.cc @@ -15,7 +15,9 @@ limitations under the License. */ #include "paddle/fluid/framework/scope.h" #include // for unique_ptr +#include #include +#include #include "glog/logging.h" #include "paddle/fluid/framework/threadpool.h" #include "paddle/fluid/string/printf.h" @@ -36,6 +38,16 @@ DEFINE_double( "Memory size threshold (GB) when the garbage collector clear tensors." "Disabled when this value is less than 0"); +// When in inference scenario, the scopes will not be written by two threads in +// a mean time, but a scope may be read by multiple threads concurrently, and +// the mutex will cause serious performance issue. +// So the mutex is disabled when `ON_INFER`. +#ifdef ON_INFER +#define SCOPE_LOCK_GUARD +#else +#define SCOPE_LOCK_GUARD std::lock_guard lock(mutex_); +#endif + namespace paddle { namespace framework { @@ -49,18 +61,18 @@ int64_t GetEagerDeletionThreshold() { Scope::~Scope() { DropKids(); } Scope& Scope::NewScope() const { - std::lock_guard lock(mutex_); + SCOPE_LOCK_GUARD kids_.push_back(new Scope(this)); return *kids_.back(); } Variable* Scope::Var(const std::string& name) { - std::lock_guard lock(mutex_); + SCOPE_LOCK_GUARD return VarInternal(name); } Variable* Scope::Var(std::string* name) { - std::lock_guard lock(mutex_); + SCOPE_LOCK_GUARD auto new_name = string::Sprintf("%p.%d", this, vars_.size()); if (name != nullptr) { *name = new_name; @@ -69,34 +81,34 @@ Variable* Scope::Var(std::string* name) { } Variable* Scope::FindVar(const std::string& name) const { - std::lock_guard lock(mutex_); + SCOPE_LOCK_GUARD return FindVarInternal(name); } Variable* Scope::FindLocalVar(const std::string& name) const { - std::lock_guard lock(mutex_); + SCOPE_LOCK_GUARD return FindVarLocally(name); } const Scope* Scope::FindScope(const Variable* var) const { - std::lock_guard lock(mutex_); + SCOPE_LOCK_GUARD return FindScopeInternal(var); } void Scope::DropKids() { - std::lock_guard lock(mutex_); + SCOPE_LOCK_GUARD for (Scope* s : kids_) delete s; kids_.clear(); } bool Scope::HasKid(const Scope* scope) const { - std::lock_guard lock(mutex_); + SCOPE_LOCK_GUARD auto it = std::find(this->kids_.begin(), this->kids_.end(), scope); return it != this->kids_.end(); } std::vector Scope::LocalVarNames() const { - std::lock_guard lock(mutex_); + SCOPE_LOCK_GUARD std::vector known_vars; known_vars.reserve(this->vars_.size()); for (auto& p : vars_) { @@ -106,9 +118,10 @@ std::vector Scope::LocalVarNames() const { } void Scope::DeleteScope(Scope* scope) const { - std::lock_guard lock(mutex_); + SCOPE_LOCK_GUARD auto it = std::find(this->kids_.begin(), this->kids_.end(), scope); - PADDLE_ENFORCE(it != this->kids_.end(), "Cannot find %p as kid scope", scope); + PADDLE_ENFORCE(it != this->kids_.end(), "%p Cannot find %p as kid scope", + this, scope); this->kids_.erase(it); // When making memory benchmark on Fluid, we have to delete scope sync. if (FLAGS_benchmark || FLAGS_eager_delete_scope) { @@ -119,7 +132,7 @@ void Scope::DeleteScope(Scope* scope) const { } void Scope::EraseVars(const std::vector& var_names) { - std::lock_guard lock(mutex_); + SCOPE_LOCK_GUARD std::set var_set(var_names.begin(), var_names.end()); for (auto it = vars_.begin(); it != vars_.end();) { if (var_set.find(it->first) != var_set.end()) { @@ -132,12 +145,12 @@ void Scope::EraseVars(const std::vector& var_names) { void Scope::Rename(const std::string& origin_name, const std::string& new_name) const { - std::lock_guard lock(mutex_); + SCOPE_LOCK_GUARD RenameInternal(origin_name, new_name); } std::string Scope::Rename(const std::string& origin_name) const { - std::lock_guard lock(mutex_); + SCOPE_LOCK_GUARD auto new_name = string::Sprintf("%p.%d", this, vars_.size()); RenameInternal(origin_name, new_name); return new_name; @@ -189,5 +202,46 @@ Variable* Scope::FindVarLocally(const std::string& name) const { return nullptr; } +std::string GenScopeTreeDebugInfo(Scope* root) { + std::stringstream os; + + if (!root) return ""; + + // level traversal + std::queue queue; + queue.push(root); + + std::vector scopes; + + while (!queue.empty()) { + auto* end = queue.back(); + Scope* q = nullptr; + while (q != end) { + q = queue.front(); + queue.pop(); + os << q << " "; + scopes.push_back(q); + + for (auto* c : q->kids()) { + queue.push(c); + } + } + // end of a level + os << "\n------------------------------------------\n"; + } + + os << "\nDetails:\n\n"; + + for (Scope* q : scopes) { + os << "====\n"; + os << q << ":\n"; + for (auto& var : q->LocalVarNames()) { + os << " - " << var << "\n"; + } + } + + return os.str(); +} + } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/scope.h b/paddle/fluid/framework/scope.h index 9462620e82..1901ffbe57 100644 --- a/paddle/fluid/framework/scope.h +++ b/paddle/fluid/framework/scope.h @@ -78,11 +78,11 @@ class Scope { /// Drop all kids scopes belonged to this scope. void DropKids(); - std::list& kids() const { return kids_; } - /// Find if a scope exists in the kid scopes bool HasKid(const Scope* scope) const; + const std::list& kids() const { return kids_; } + // enumerate all the variables current contains. std::vector LocalVarNames() const; @@ -118,12 +118,17 @@ class Scope { // Scope in `kids_` are owned by this class. mutable std::list kids_; - Scope const* parent_{nullptr}; + const Scope* parent_{nullptr}; DISABLE_COPY_AND_ASSIGN(Scope); private: mutable std::mutex mutex_; }; + +// Generate some debug string about the inherience structure of scope, quite +// naive. +std::string GenScopeTreeDebugInfo(Scope*); + } // namespace framework } // namespace paddle diff --git a/paddle/fluid/inference/CMakeLists.txt b/paddle/fluid/inference/CMakeLists.txt index e5678cf607..022d91b465 100644 --- a/paddle/fluid/inference/CMakeLists.txt +++ b/paddle/fluid/inference/CMakeLists.txt @@ -27,13 +27,9 @@ set(SHARED_INFERENCE_SRCS io.cc ${CMAKE_CURRENT_SOURCE_DIR}/api/api.cc ${CMAKE_CURRENT_SOURCE_DIR}/api/api_impl.cc ${CMAKE_CURRENT_SOURCE_DIR}/api/analysis_predictor.cc ${CMAKE_CURRENT_SOURCE_DIR}/api/details/zero_copy_tensor.cc) -if (WITH_GPU AND TENSORRT_FOUND) - set(STATIC_INFERENCE_APIS ${STATIC_INFERENCE_APIS} paddle_inference_tensorrt_subgraph_engine) - set(SHARED_INFERENCE_SRCS ${SHARED_INFERENCE_SRCS} ${CMAKE_CURRENT_SOURCE_DIR}/api/api_tensorrt_subgraph_engine.cc) -endif() # Create static library -cc_library(paddle_fluid DEPS ${fluid_modules} ${STATIC_INFERENCE_APIS} zero_copy_tensor reset_tensor_array) +cc_library(paddle_fluid DEPS ${fluid_modules} ${STATIC_INFERENCE_APIS} zero_copy_tensor reset_tensor_array analysis_config paddle_pass_builder) if(NOT APPLE) # TODO(liuyiqu: Temporarily disable the link flag because it is not support on Mac. @@ -43,7 +39,7 @@ endif() # Create shared library cc_library(paddle_fluid_shared SHARED SRCS ${SHARED_INFERENCE_SRCS} - DEPS ${fluid_modules} paddle_fluid_api reset_tensor_array) + DEPS ${fluid_modules} paddle_fluid_api reset_tensor_array analysis_config paddle_pass_builder) set_target_properties(paddle_fluid_shared PROPERTIES OUTPUT_NAME paddle_fluid) if(NOT APPLE) diff --git a/paddle/fluid/inference/analysis/CMakeLists.txt b/paddle/fluid/inference/analysis/CMakeLists.txt index 0354f9e6e9..eb89fc5e11 100644 --- a/paddle/fluid/inference/analysis/CMakeLists.txt +++ b/paddle/fluid/inference/analysis/CMakeLists.txt @@ -1,24 +1,25 @@ -cc_library(ir_pass_manager SRCS ir_pass_manager.cc DEPS graph pass) -set(analysis_deps - framework_proto proto_desc ir_pass_manager graph pass paddle_fluid_api executor pretty_log) +unset(analysis_deps CACHE) +set(analysis_deps # analysis_deps can be extended accross the project + framework_proto proto_desc graph pass paddle_fluid_api executor pretty_log + ir_pass_manager + CACHE INTERNAL "") -cc_library(analysis SRCS pass_manager.cc node.cc data_flow_graph.cc graph_traits.cc subgraph_splitter.cc +add_subdirectory(ir_passes) +add_subdirectory(passes) + +cc_library(ir_pass_manager SRCS ir_pass_manager.cc DEPS graph pass ${INFER_IR_PASSES}) + +cc_library(argument SRCS argument.cc DEPS scope proto_desc) +cc_library(analysis_pass SRCS analysis_pass.cc DEPS proto_desc) + +cc_library(analysis SRCS analyzer.cc helper.cc - # passes - analysis_pass.cc - fluid_to_data_flow_graph_pass.cc - data_flow_graph_to_fluid_pass.cc - dfg_graphviz_draw_pass.cc - tensorrt_subgraph_pass.cc - tensorrt_subgraph_node_mark_pass.cc - fluid_to_ir_pass.cc - model_store_pass.cc - DEPS ${analysis_deps}) + analysis_pass + DEPS ${analysis_deps} + ) -cc_test(test_node SRCS node_tester.cc DEPS analysis) cc_test(test_dot SRCS dot_tester.cc DEPS analysis) -cc_binary(inference_analyzer SRCS analyzer_main.cc DEPS analysis paddle_fluid) function(inference_analysis_test TARGET) if(WITH_TESTING) @@ -34,13 +35,3 @@ function(inference_analysis_test TARGET) endfunction(inference_analysis_test) inference_analysis_test(test_analyzer SRCS analyzer_tester.cc EXTRA_DEPS paddle_inference_api) -inference_analysis_test(test_data_flow_graph SRCS data_flow_graph_tester.cc) -inference_analysis_test(test_data_flow_graph_to_fluid_pass SRCS data_flow_graph_to_fluid_pass_tester.cc) -inference_analysis_test(test_fluid_to_ir_pass SRCS fluid_to_ir_pass_tester.cc) -inference_analysis_test(test_fluid_to_data_flow_graph_pass SRCS fluid_to_data_flow_graph_pass_tester.cc) -inference_analysis_test(test_subgraph_splitter SRCS subgraph_splitter_tester.cc) -inference_analysis_test(test_dfg_graphviz_draw_pass SRCS dfg_graphviz_draw_pass_tester.cc) -inference_analysis_test(test_tensorrt_subgraph_pass SRCS tensorrt_subgraph_pass_tester.cc) -inference_analysis_test(test_pass_manager SRCS pass_manager_tester.cc) -inference_analysis_test(test_tensorrt_subgraph_node_mark_pass SRCS tensorrt_subgraph_node_mark_pass_tester.cc) -inference_analysis_test(test_model_store_pass SRCS model_store_pass_tester.cc) diff --git a/paddle/fluid/inference/analysis/analysis_pass.h b/paddle/fluid/inference/analysis/analysis_pass.h index 13805ea4ac..299f235a74 100644 --- a/paddle/fluid/inference/analysis/analysis_pass.h +++ b/paddle/fluid/inference/analysis/analysis_pass.h @@ -19,42 +19,36 @@ limitations under the License. */ #include #include "paddle/fluid/framework/framework.pb.h" +#include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/inference/analysis/argument.h" -#include "paddle/fluid/inference/analysis/data_flow_graph.h" #include "paddle/fluid/inference/analysis/helper.h" -#include "paddle/fluid/inference/analysis/node.h" namespace paddle { namespace inference { namespace analysis { +/* + * AnalysisPass is a pass used to control the IR passes. + */ class AnalysisPass { public: AnalysisPass() = default; virtual ~AnalysisPass() = default; - // Mutable Pass. - virtual bool Initialize(Argument *argument) { return false; } - // Readonly Pass. - virtual bool Initialize(const Argument &argument) { return false; } - // Virtual method overriden by subclasses to do any necessary clean up after - // all passes have run. - virtual bool Finalize() { return false; } - - // Create a debugger Pass that draw the DFG by graphviz toolkit. - virtual AnalysisPass *CreateGraphvizDebugerPass() const { return nullptr; } - - // Run on a single DataFlowGraph. - virtual void Run(DataFlowGraph *x) = 0; + // Run on a single Graph. + void Run(Argument* argument) { RunImpl(argument); } // Human-readable short representation. virtual std::string repr() const = 0; // Human-readable long description. virtual std::string description() const { return "No DOC"; } -}; -// GraphPass processes on any GraphType. -class DataFlowGraphPass : public AnalysisPass {}; + protected: + // User should implement these. + virtual void RunImpl(Argument* argument) = 0; + + Argument* argument_{nullptr}; +}; } // namespace analysis } // namespace inference diff --git a/paddle/fluid/inference/analysis/analyzer.cc b/paddle/fluid/inference/analysis/analyzer.cc index d55303a51e..c8ed373ee7 100644 --- a/paddle/fluid/inference/analysis/analyzer.cc +++ b/paddle/fluid/inference/analysis/analyzer.cc @@ -15,138 +15,23 @@ #include "paddle/fluid/inference/analysis/analyzer.h" #include #include - -#include "paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.h" -#include "paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.h" -#include "paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.h" -#include "paddle/fluid/inference/analysis/fluid_to_ir_pass.h" -#include "paddle/fluid/inference/analysis/model_store_pass.h" -#include "paddle/fluid/inference/analysis/pass_manager.h" -#include "paddle/fluid/inference/analysis/tensorrt_subgraph_node_mark_pass.h" -#include "paddle/fluid/inference/analysis/tensorrt_subgraph_pass.h" - -DEFINE_bool(IA_enable_tensorrt_subgraph_engine, false, - "Enable subgraph to TensorRT engine for acceleration"); - -DEFINE_bool(IA_enable_ir, false, "Turn on IR support"); - -DEFINE_string(IA_graphviz_log_root, "./", - "Graphviz debuger for data flow graphs."); - -DEFINE_string(IA_output_storage_path, "", "optimized model output path"); +#include "paddle/fluid/inference/analysis/passes/ir_analysis_compose_pass.h" +#include "paddle/fluid/inference/analysis/passes/passes.h" namespace paddle { namespace inference { namespace analysis { -class DfgPassManagerImpl final : public DfgPassManager { - public: - DfgPassManagerImpl() { - // TODO(Superjomn) set the key with pass reprs. - if (!FLAGS_IA_enable_ir) { - AddPass("fluid-to-data-flow-graph", new FluidToDataFlowGraphPass); - } else { - AddPass("fluid-to-ir-pass", new FluidToIrPass); - } - TryAddTensorRtPass(); - AddPass("data-flow-graph-to-fluid", new DataFlowGraphToFluidPass); - if (!FLAGS_IA_output_storage_path.empty()) { - AddPass("model-store-pass", new ModelStorePass); - } - } +Analyzer::Analyzer() {} - std::string repr() const override { return "dfg-pass-manager"; } - std::string description() const override { return "DFG pass manager."; } +void Analyzer::Run(Argument *argument) { RunIrAnalysis(argument); } - private: - void AddPass(const std::string& name, AnalysisPass* pass) { - VLOG(30) << "Adding pass " << name; - Register(name, pass); - AddGraphvizDebugerPass(pass); - } +void Analyzer::RunIrAnalysis(Argument *argument) { + std::vector passes({"ir_analysis_compose_pass"}); - void TryAddTensorRtPass() { - if (FLAGS_IA_enable_tensorrt_subgraph_engine) { - auto trt_teller = [&](const Node* node) { - std::unordered_set teller_set( - {"mul", "conv2d", "pool2d", "relu", "softmax", "sigmoid", - "depthwise_conv2d", "batch_norm", "concat", "tanh", "pad", - "elementwise_add", "dropout"}); - if (!node->IsFunction()) return false; - - const auto* func = static_cast(node); - if (teller_set.count(func->func_type())) { - return true; - } else { - return false; - } - }; - - AddPass("tensorrt-subgraph-marker", - new TensorRTSubgraphNodeMarkPass(trt_teller)); - AddPass("tensorrt-subgraph", new TensorRTSubGraphPass(trt_teller)); - } - } - - // Add the graphviz debuger pass if the parent pass has one. - void AddGraphvizDebugerPass(AnalysisPass* pass) { - auto* debuger_pass = pass->CreateGraphvizDebugerPass(); - if (debuger_pass) { - Register(debuger_pass->repr(), debuger_pass); - } + for (auto &pass : passes) { + PassRegistry::Global().Retreive(pass)->Run(argument); } -}; - -Analyzer::Analyzer() { Register("manager1", new DfgPassManagerImpl); } - -void Analyzer::Run(Argument* argument) { - std::vector passes; - passes.push_back("graph_viz_pass"); // add graphviz for debug. -#ifdef PADDLE_WITH_MKLDNN - if (use_mkldnn_) { - VLOG(30) << "Adding MKL-DNN placement pass"; - passes.push_back("mkldnn_placement_pass"); - } -#endif - // infer_clean_graph_pass should be the first default pass - // after mkldnn_placement_pass. - passes.push_back("infer_clean_graph_pass"); - passes.push_back("graph_viz_pass"); // add graphviz for debug. - for (auto& pass : ir_passes_) { - // skip mkldnn pass when use_mkldnn_ = false; - bool skip_pass = (!use_mkldnn_) && pass.find("mkldnn") != std::string::npos; - if (!disabled_ir_passes_.count(pass) && !skip_pass) { - passes.push_back(pass); - passes.push_back("graph_viz_pass"); // add graphviz for debug. - } - } - argument->Set(kFluidToIrPassesAttr, new std::vector(passes)); - - for (auto& x : data_) { - PADDLE_ENFORCE(x->Initialize(argument)); - x->RunAll(); - PADDLE_ENFORCE(x->Finalize()); - } -} - -Analyzer& Analyzer::IncludeAllIrPasses() { - ir_passes_ = all_ir_passes_; - return *this; -} - -Analyzer& Analyzer::DisableIrPasses(const std::vector& passes) { - disabled_ir_passes_.insert(passes.begin(), passes.end()); - return *this; -} - -Analyzer& Analyzer::IncludeIrPasses(const std::vector& passes) { - ir_passes_ = passes; - return *this; -} - -Analyzer& Analyzer::SetUseMkldnn(bool use_mkldnn) { - use_mkldnn_ = use_mkldnn; - return *this; } } // namespace analysis diff --git a/paddle/fluid/inference/analysis/analyzer.h b/paddle/fluid/inference/analysis/analyzer.h index 3af1d572df..b43e67f20f 100644 --- a/paddle/fluid/inference/analysis/analyzer.h +++ b/paddle/fluid/inference/analysis/analyzer.h @@ -40,56 +40,21 @@ limitations under the License. */ #include #include "paddle/fluid/inference/analysis/analysis_pass.h" #include "paddle/fluid/inference/analysis/flags.h" -#include "paddle/fluid/inference/analysis/pass_manager.h" namespace paddle { namespace inference { namespace analysis { -class Analyzer : public OrderedRegistry { +class Analyzer final { public: - // Register all the pass-managers. Analyzer(); void Run(Argument* argument); - Analyzer& DisableIrPasses(const std::vector& passes); - Analyzer& IncludeIrPasses(const std::vector& passes); - Analyzer& IncludeAllIrPasses(); - Analyzer& SetUseMkldnn(bool use_mkldnn); - DISABLE_COPY_AND_ASSIGN(Analyzer); - private: - // All avaiable IR passes. - // The bigger fuse comes first, so that the small operators prefer to be - // merged in a larger fuse op. The small fusion will not break the pattern of - // larger fusion. - const std::vector all_ir_passes_{{ - // Manual update the passes here. - "attention_lstm_fuse_pass", // - "seqconv_eltadd_relu_fuse_pass", // - "embedding_fc_lstm_fuse_pass", // - "fc_lstm_fuse_pass", // - "mul_lstm_fuse_pass", // - "fc_gru_fuse_pass", // - "mul_gru_fuse_pass", // - "seq_concat_fc_fuse_pass", // - "fc_fuse_pass", // - "conv_bn_fuse_pass", // - "conv_eltwiseadd_bn_fuse_pass", // -#ifdef PADDLE_WITH_MKLDNN - "depthwise_conv_mkldnn_pass", // - "conv_bias_mkldnn_fuse_pass", // - "conv_relu_mkldnn_fuse_pass", // - "conv_elementwise_add_mkldnn_fuse_pass", // -#endif - }}; - - std::unordered_set disabled_ir_passes_; - // Ir passes to run - std::vector ir_passes_; - bool use_mkldnn_; + protected: + void RunIrAnalysis(Argument* argument); }; } // namespace analysis diff --git a/paddle/fluid/inference/analysis/analyzer_tester.cc b/paddle/fluid/inference/analysis/analyzer_tester.cc index 5430e5c1ef..48fc5dda2a 100644 --- a/paddle/fluid/inference/analysis/analyzer_tester.cc +++ b/paddle/fluid/inference/analysis/analyzer_tester.cc @@ -27,21 +27,21 @@ namespace analysis { using namespace framework; // NOLINT TEST(Analyzer, analysis_without_tensorrt) { - FLAGS_IA_enable_tensorrt_subgraph_engine = false; Argument argument; - argument.fluid_model_dir.reset(new std::string(FLAGS_inference_model_dir)); + argument.SetModelDir(FLAGS_inference_model_dir); + argument.SetIrAnalysisPasses({"infer_clean_graph_pass"}); + Analyzer analyser; analyser.Run(&argument); } TEST(Analyzer, analysis_with_tensorrt) { - FLAGS_IA_enable_tensorrt_subgraph_engine = true; Argument argument; - argument.Set("minimum_subgraph_size", new int(0)); - argument.Set("max_batch_size", new int(3)); - argument.Set("workspace_size", new int(1 << 20)); - argument.Set("precision_mode", new std::string("FP32")); - argument.fluid_model_dir.reset(new std::string(FLAGS_inference_model_dir)); + argument.SetTensorRtMaxBatchSize(3); + argument.SetTensorRtWorkspaceSize(1 << 20); + argument.SetModelDir(FLAGS_inference_model_dir); + argument.SetIrAnalysisPasses({"infer_clean_graph_pass"}); + Analyzer analyser; analyser.Run(&argument); } diff --git a/paddle/fluid/inference/analysis/argument.h b/paddle/fluid/inference/analysis/argument.h index 9495e2435c..d7a2f3d1e3 100644 --- a/paddle/fluid/inference/analysis/argument.h +++ b/paddle/fluid/inference/analysis/argument.h @@ -24,13 +24,16 @@ #pragma once #include +#include +#include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/program_desc.h" -#include "paddle/fluid/inference/analysis/data_flow_graph.h" +#include "paddle/fluid/framework/scope.h" #include "paddle/fluid/platform/variant.h" namespace paddle { namespace inference { namespace analysis { +using framework::ir::Graph; /* * The argument definition of both Pass and PassManagers. @@ -39,75 +42,99 @@ namespace analysis { */ struct Argument { Argument() = default; - explicit Argument(const std::string& fluid_model_dir) - : fluid_model_dir(new std::string(fluid_model_dir)) {} - // The directory of the trained model. - std::unique_ptr fluid_model_dir; - // The path of `__model__` and `param`, this is used when the file name of - // model and param is changed. - std::unique_ptr fluid_model_program_path; - std::unique_ptr fluid_model_param_path; - - // The graph that process by the Passes or PassManagers. - std::unique_ptr main_dfg; - - // The original program desc. - std::unique_ptr origin_program_desc; - - // The processed program desc. - std::unique_ptr transformed_program_desc; - - // The output storage path of ModelStorePass. - std::unique_ptr model_output_store_path; - - // Support for any other attributes. - template - void Set(const std::string& key, T* data) { - PADDLE_ENFORCE_NOT_NULL(data); - PADDLE_ENFORCE(!attrs_.count(key), "Duplicate set Argument's attr [%s]", - key); - attrs_[key] = data; - attr_deleters_[key] = [data, key]() { - VLOG(30) << "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"; - VLOG(30) << "argument delete attr: " << key; - delete data; - }; - } - - bool Has(const std::string& name) const { return attrs_.count(name); } - - template - T* Release(const std::string& key) { - PADDLE_ENFORCE(attrs_.count(key)); - auto* res = boost::any_cast(attrs_.at(key)); - attrs_.erase(key); - attr_deleters_.erase(key); - return res; - } - - template - T& Get(const std::string& key) { - PADDLE_ENFORCE(Has(key)); - return *boost::any_cast(attrs_.at(key)); - } - - ~Argument() { - for (auto& item : attr_deleters_) { - item.second(); - } - } + explicit Argument(const std::string& model_dir) { SetModelDir(model_dir); } + + using unique_ptr_t = std::unique_ptr>; + using fusion_statis_t = std::unordered_map; + + bool Has(const std::string& key) const { return valid_fields_.count(key); } + +#define DECL_ARGUMENT_FIELD(field__, Field, type__) \ + public: \ + type__& field__() { \ + PADDLE_ENFORCE(Has(#field__)); \ + return field__##_; \ + } \ + void Set##Field(const type__& x) { \ + field__##_ = x; \ + valid_fields_.insert(#field__); \ + } \ + DECL_ARGUMENT_FIELD_VALID(field__); \ + type__* field__##_ptr() { return &field__##_; } \ + \ + private: \ + type__ field__##_; + +#define DECL_ARGUMENT_FIELD_VALID(field__) \ + bool field__##_valid() { return Has(#field__); } + +#define DECL_ARGUMENT_UNIQUE_FIELD(field__, Field, type__) \ + public: \ + type__& field__() { \ + PADDLE_ENFORCE_NOT_NULL(field__##_); \ + PADDLE_ENFORCE(Has(#field__)); \ + return *static_cast(field__##_.get()); \ + } \ + void Set##Field(type__* x) { \ + field__##_ = \ + unique_ptr_t(x, [](void* x) { delete static_cast(x); }); \ + valid_fields_.insert(#field__); \ + } \ + void Set##Field##NotOwned(type__* x) { \ + valid_fields_.insert(#field__); \ + field__##_ = unique_ptr_t(x, [](void* x) {}); \ + } \ + DECL_ARGUMENT_FIELD_VALID(field__); \ + type__* field__##_ptr() { \ + PADDLE_ENFORCE(Has(#field__)); \ + return static_cast(field__##_.get()); \ + } \ + type__* Release##Field() { \ + PADDLE_ENFORCE(Has(#field__)); \ + valid_fields_.erase(#field__); \ + return static_cast(field__##_.release()); \ + } \ + \ + private: \ + unique_ptr_t field__##_; + + // Model path + DECL_ARGUMENT_FIELD(model_dir, ModelDir, std::string); + // Model specified with program and parameters files. + DECL_ARGUMENT_FIELD(model_program_path, ModelProgramPath, std::string); + DECL_ARGUMENT_FIELD(model_params_path, ModelParamsPath, std::string); + + // The overall graph to work on. + DECL_ARGUMENT_UNIQUE_FIELD(main_graph, MainGraph, framework::ir::Graph); + // The overall Scope to work on. + DECL_ARGUMENT_UNIQUE_FIELD(scope, Scope, framework::Scope); + + DECL_ARGUMENT_UNIQUE_FIELD(main_program, MainProgram, framework::ProgramDesc); + + // The ir passes to perform in analysis phase. + DECL_ARGUMENT_FIELD(ir_analysis_passes, IrAnalysisPasses, + std::vector); + + DECL_ARGUMENT_FIELD(use_gpu, UseGPU, bool); + DECL_ARGUMENT_FIELD(use_tensorrt, UseTensorRT, bool); + DECL_ARGUMENT_FIELD(tensorrt_node_teller, TensorRtNodeTeller, + std::function); + DECL_ARGUMENT_FIELD(tensorrt_max_batch_size, TensorRtMaxBatchSize, int); + DECL_ARGUMENT_FIELD(tensorrt_workspace_size, TensorRtWorkspaceSize, int); + + // The program transformed by IR analysis phase. + DECL_ARGUMENT_UNIQUE_FIELD(ir_analyzed_program, IrAnalyzedProgram, + framework::proto::ProgramDesc); + + DECL_ARGUMENT_FIELD(fusion_statis, FusionStatis, fusion_statis_t); private: - std::unordered_map attrs_; - std::unordered_map> attr_deleters_; + std::unordered_set valid_fields_; }; -#define UNLIKELY(condition) __builtin_expect(static_cast(condition), 0) -#define ANALYSIS_ARGUMENT_CHECK_FIELD(field__) \ - if (UNLIKELY(!(field__))) { \ - LOG(ERROR) << "field " << #field__ << " should be set."; \ - return false; \ - } +#define ARGUMENT_CHECK_FIELD(argument__, fieldname__) \ + PADDLE_ENFORCE(argument__->Has(#fieldname__), \ + "the argument field [%s] should be set", #fieldname__); } // namespace analysis } // namespace inference diff --git a/paddle/fluid/inference/analysis/data_flow_graph.cc b/paddle/fluid/inference/analysis/data_flow_graph.cc deleted file mode 100644 index 545017da07..0000000000 --- a/paddle/fluid/inference/analysis/data_flow_graph.cc +++ /dev/null @@ -1,496 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/inference/analysis/data_flow_graph.h" -#include "paddle/fluid/inference/analysis/dot.h" -#include "paddle/fluid/inference/analysis/node.h" - -namespace paddle { -namespace inference { -namespace analysis { -using ir_node_t = framework::ir::Node; -using ir_graph_t = framework::ir::Graph; - -// It is a better idea that the inputs and outputs of this graph is set manually -// before, but there must be a Pass that helps to prune the unnecessary ops that -// do not contribute to the given targets, so in this pass, analysis and get the -// inputs and outputs is OK. -void DataFlowGraph::Build() { - inputs_.clear(); - outputs_.clear(); - std::unordered_set ins; - std::unordered_set outs; - for (auto &node : nodes.nodes()) { - for (auto *in : node->inlinks) { - ins.insert(in); - } - for (auto *out : node->outlinks) { - outs.insert(out); - } - } - - // The nodes that in ins but not in outs is the graph's inputs - // similarly, the nodes that in outs but not in ins is the graphs' outputs - for (auto *in : ins) { - if (!outs.count(in)) { - inputs_.push_back(in); - } - } - for (auto *out : outs) { - if (!ins.count(out)) { - outputs_.push_back(out); - } - } - - Clean(); -} - -void DataFlowGraph::Build(const framework::proto::ProgramDesc &prog) { - // insert vars - // The `var2id` keeps a map from a variable's name to its Node-id, the Node-id - // will keep updating to its latest alias during the graph-building. - std::unordered_map var2id; - auto &main_block = prog.blocks(framework::kRootBlockIndex); - for (int i = 0; i < main_block.vars_size(); i++) { - const auto &var = main_block.vars(i); - auto *v = nodes.Create(Node::Type::kValue); - v->SetName(var.name()); - v->SetPbDesc(const_cast(static_cast(&var))); - v->SetPbMsg(var.SerializeAsString()); - var2id[var.name()] = v->id(); - } - - // The variables in a SSA can only write once, so if a variable is written - // multiple times(quite common in our ProgramDesc design), multiple alias - // Nodes of this variable will be created, and each will just write once. - - // An set that keep all the names of the variables(the original, not alias) - // that have been written(as outputs). Once an Op's output variable hit the - // set, it should create a new alias and update the global alias for this - // variable. And that make a Data Flow Graph a SSA. - std::unordered_set unique_written_vars; - for (int i = 0; i < main_block.ops_size(); i++) { - const auto &op = main_block.ops(i); - auto *o = nodes.Create(Node::Type::kFunction); - o->SetName(op.type()); - static_cast(o)->SetFuncType(op.type()); - // Link to the original protobuf message's memory, make it easier to - // generate from a data flow graph to fluid ProgramDesc. - o->SetPbDesc(const_cast(static_cast(&op))); - o->SetPbMsg(op.SerializeAsString()); - - // set inputs and outputs - for (int j = 0; j < op.inputs_size(); j++) { - auto &in_var = op.inputs(j); - for (int k = 0; k < in_var.arguments_size(); k++) { - auto *in = nodes.GetMutable(var2id.at(in_var.arguments(k))); - in->outlinks.push_back(o); - o->inlinks.push_back(in); - unique_written_vars.insert(in); - } - } - for (int j = 0; j < op.outputs_size(); j++) { - auto &out_var = op.outputs(j); - for (int k = 0; k < out_var.arguments_size(); k++) { - auto *out = nodes.GetMutable(var2id[out_var.arguments(k)]); - if (unique_written_vars.count(out)) { - // Loop found, for example, a = op(a), use SSA, change to a1 = op(a). - auto *out_alias = nodes.Create(Node::Type::kValue); - out_alias->SetName(out->name()); - out_alias->SetPbDesc(out->pb_desc()); - out_alias->SetPbMsg(out->pb_msg()); - var2id[out_alias->name()] = - out_alias->id(); // update variable's alias Node - VLOG(40) << "loop found in graph, create SSA alias node [" - << out_alias->repr() << "] for [" << out->repr() << "]"; - out = out_alias; - } - out->inlinks.push_back(o); - o->outlinks.push_back(out); - } - } - } - // Analysis and extract the inputs and outputs of this graph. - Build(); -} - -void DataFlowGraph::Build(const framework::ir::Graph &graph) { - // Create nodes - std::unordered_map ir_node_map; - for (auto *ir_node : graph.Nodes()) { - Node *x{nullptr}; - if (ir_node->IsOp()) { - PADDLE_ENFORCE(ir_node->Op()); - VLOG(40) << "get op " << ir_node << " " << ir_node->Name(); - x = nodes.Create(Node::Type::kFunction); - x->attr("ir_node").Pointer() = ir_node; - PADDLE_ENFORCE(ir_node->Op()->Proto()); - x->SetName(ir_node->Op()->Proto()->type()); - x->SetPbMsg(ir_node->Op()->Proto()->SerializeAsString()); - } else if (ir_node->IsVar()) { - // Not create a Node for IR ControlDepVar, considering Inference currently - // just used in single thread scenerio. - VLOG(40) << "get var " << ir_node->Name(); - x = nodes.Create(Node::Type::kValue); - x->attr("ir_node").Pointer() = ir_node; - x->SetName(ir_node->Name()); - // x->SetPbMsg(ir_node->Var()->Proto()->SerializeAsString()); - } else { - PADDLE_THROW("Failed to create an Node from IR, unknown type"); - } - ir_node_map.emplace(ir_node, x); - } - VLOG(40) << "finish creating Nodes"; - - VLOG(40) << "to create edge"; - // Create links - for (auto *ir_node : graph.Nodes()) { - auto it = ir_node_map.find(ir_node); - // Skip ControlDepVar. - if (it == ir_node_map.end()) continue; - auto *node = it->second; - for (auto *x : ir_node->inputs) { - if (!ir_node_map.count(x)) continue; - node->inlinks.push_back(ir_node_map.at(x)); - } - for (auto *x : ir_node->outputs) { - if (!ir_node_map.count(x)) continue; - node->outlinks.push_back(ir_node_map.at(x)); - } - } - - Build(); - PADDLE_ENFORCE(!inputs_.empty(), - "Can't deduce any inputs from the graph, Is the graph empty?"); - - ir_graph = &graph; - VLOG(30) << "finished build from IR"; -} - -void DataFlowGraph::Clean() { - for (auto &node : nodes.nodes()) { - std::unordered_set inlinks_set(node->inlinks.begin(), - node->inlinks.end()); - std::unordered_set outlinks_set(node->outlinks.begin(), - node->outlinks.end()); - if (inlinks_set.size() < node->inlinks.size()) { - node->inlinks.assign(inlinks_set.begin(), inlinks_set.end()); - } - if (outlinks_set.size() < node->outlinks.size()) { - node->outlinks.assign(outlinks_set.begin(), outlinks_set.end()); - } - } -} - -std::string DataFlowGraph::DotString() const { - Dot dot; - - // Add nodes - for (size_t i = 0; i < nodes.size(); i++) { - const Node &node = nodes.Get(i); - dot.AddNode(node.repr(), node.dot_attrs()); - } - - // Add edges - for (size_t i = 0; i < nodes.size(); i++) { - const Node &node = nodes.Get(i); - for (auto &in : node.inlinks) { - dot.AddEdge(in->repr(), node.repr(), {}); - } - } - return dot.Build(); -} - -std::string DataFlowGraph::HumanReadableInfo(bool show_values, - bool show_functions) const { - std::stringstream values, functions; - for (auto &n : nodes.nodes()) { - if (show_values && n->IsValue()) { - values << n->repr() << "\n"; - } - if (show_functions && n->IsFunction()) { - functions << n->repr() << "\n"; - } - } - return "Values:\n" + values.str() + "\n\n" + "Functions:\n" + functions.str(); -} - -// -// NodesBFSIterator -// - -GraphTraits::NodesBFSIterator::NodesBFSIterator( - const std::vector &source) - : queue_(source.begin(), source.end()) {} - -GraphTraits::NodesBFSIterator::NodesBFSIterator( - GraphTraits::NodesBFSIterator &&other) noexcept - : queue_(std::move(other.queue_)), - visited_(std::move(other.visited_)) {} - -GraphTraits::NodesBFSIterator::NodesBFSIterator( - const GraphTraits::NodesBFSIterator &other) - : queue_(other.queue_), visited_(other.visited_) {} - -Node &GraphTraits::NodesBFSIterator::operator*() { - PADDLE_ENFORCE(!queue_.empty()); - return *queue_.front(); -} - -Node *GraphTraits::NodesBFSIterator::operator->() { - PADDLE_ENFORCE(!queue_.empty()); - return queue_.front(); -} - -GraphTraits::NodesBFSIterator & -GraphTraits::NodesBFSIterator::operator=( - const GraphTraits::NodesBFSIterator &other) { - queue_ = other.queue_; - visited_ = other.visited_; - return *this; -} - -GraphTraits::NodesBFSIterator - &GraphTraits::NodesBFSIterator::operator++() { - PADDLE_ENFORCE(!queue_.empty()); - auto *cur = queue_.front(); - visited_.insert(cur); - queue_.pop_front(); - for (auto *output : cur->outlinks) { - if (!visited_.count(output)) { - queue_.push_back(output); - visited_.insert(output); - } - } - return *this; -} - -bool GraphTraits::NodesBFSIterator::operator==( - const GraphTraits::NodesBFSIterator &other) { - if (queue_.empty()) return other.queue_.empty(); - if ((!queue_.empty()) && (!other.queue_.empty())) { - return queue_.front() == other.queue_.front() && - visited_.size() == other.visited_.size(); - // equality of queue and - // visited. Just a light but week implementation. - } - return false; -} - -// -// NodesDFSIterator -// -GraphTraits::NodesDFSIterator::NodesDFSIterator( - const std::vector &source) { - for (auto *x : source) stack_.push(x); -} - -GraphTraits::NodesDFSIterator::NodesDFSIterator( - GraphTraits::NodesDFSIterator &&other) noexcept - : stack_(std::move(other.stack_)), - visited_(std::move(other.visited_)) {} - -GraphTraits::NodesDFSIterator::NodesDFSIterator( - const GraphTraits::NodesDFSIterator &other) - : stack_(other.stack_), visited_(other.visited_) {} - -Node &GraphTraits::NodesDFSIterator::operator*() { - PADDLE_ENFORCE(!stack_.empty()); - return *stack_.top(); -} - -GraphTraits::NodesDFSIterator - &GraphTraits::NodesDFSIterator::operator++() { - if (stack_.empty()) return *this; - visited_.insert(stack_.top()); - auto *cur = stack_.top(); - stack_.pop(); - for (auto *x : cur->outlinks) { - if (!visited_.count(x)) { - stack_.push(x); - visited_.insert(x); - } - } - return *this; -} -bool GraphTraits::NodesDFSIterator::operator==( - const GraphTraits::NodesDFSIterator &other) { - if (stack_.empty()) return other.stack_.empty(); - if ((!stack_.empty()) && (!other.stack_.empty())) { - return stack_.top() == other.stack_.top(); - } - return false; -} - -GraphTraits::NodesDFSIterator & -GraphTraits::NodesDFSIterator::operator=( - const GraphTraits::NodesDFSIterator &other) { - stack_ = other.stack_; - visited_ = other.visited_; - return *this; -} -Node *GraphTraits::NodesDFSIterator::operator->() { - return stack_.top(); -} - -inline bool CheckNodeIndegreeEquals(const Node &node, size_t n) { - return node.inlinks.size() == n; -} - -GraphTraits::NodesTSIterator::NodesTSIterator( - const std::vector &source) { - PADDLE_ENFORCE(!source.empty(), - "Start points of topological sorting should not be empty!"); - // CHECK all the inputs' in-degree is 0 - for (auto *node : source) { - PADDLE_ENFORCE(CheckNodeIndegreeEquals(*node, 0)); - } - - std::unordered_set visited; - std::unordered_set to_visit{source.begin(), source.end()}; - - std::vector inlink_visited; - while (!to_visit.empty()) { - std::vector queue(to_visit.begin(), to_visit.end()); - for (auto *p : queue) { - if (p->deleted()) { - visited.insert(p); - to_visit.erase(p); - continue; - } - inlink_visited.clear(); - - std::copy_if(p->inlinks.begin(), p->inlinks.end(), - std::back_inserter(inlink_visited), - [&](Node *x) { return visited.count(x); }); - - if (inlink_visited.size() == p->inlinks.size()) { - sorted_.push_back(p); - for (auto *_ : p->outlinks) { - if (!visited.count(_)) { - to_visit.insert(_); - } - } - - to_visit.erase(p); - visited.insert(p); - } - } - } -} - -GraphTraits::NodesTSIterator::NodesTSIterator( - const paddle::inference::analysis::GraphTraits< - DataFlowGraph>::NodesTSIterator &other) - : sorted_(other.sorted_), cursor_(other.cursor_) {} - -Node &GraphTraits::NodesTSIterator::operator*() { - PADDLE_ENFORCE_LT(cursor_, sorted_.size()); - return *sorted_[cursor_]; -} - -paddle::inference::analysis::GraphTraits::NodesTSIterator - &GraphTraits::NodesTSIterator::operator++() { - if (++cursor_ >= sorted_.size()) { - sorted_.clear(); - cursor_ = 0; - } - return *this; -} -paddle::inference::analysis::GraphTraits::NodesTSIterator & -GraphTraits::NodesTSIterator::operator=( - const paddle::inference::analysis::GraphTraits< - DataFlowGraph>::NodesTSIterator &other) { - cursor_ = other.cursor_; - sorted_ = other.sorted_; - return *this; -} - -bool GraphTraits::NodesTSIterator::operator==( - const paddle::inference::analysis::GraphTraits< - DataFlowGraph>::NodesTSIterator &other) { - return sorted_ == other.sorted_ && cursor_ == other.cursor_; -} - -Node *GraphTraits::NodesTSIterator::operator->() { - PADDLE_ENFORCE_LT(cursor_, sorted_.size()); - return sorted_[cursor_]; -} - -std::pair, std::vector> -ExtractInputAndOutputOfSubGraph(std::vector &graph) { // NOLINT - std::unordered_set nodes(graph.begin(), graph.end()); - std::unordered_set inputs; - std::unordered_set outputs; - // Input a Value, check whether its inlink is in the subgraph. - auto inlink_in_subgraph = [&](Node *n) { - for (auto *in : n->inlinks) { - if (nodes.count(in)) return true; - } - return false; - }; - - for (auto &node : graph) { - for (auto *in : node->inlinks) { - // The Value that is written by nodes inside a sub-graph shouldn't be the - // input of the sub-graph. - if (!nodes.count(in) && in->type() == Node::Type::kValue && - !inlink_in_subgraph(in)) { - inputs.insert(in); - } - } - for (auto *out : node->outlinks) { - if (!nodes.count(out) && out->type() == Node::Type::kValue) { - outputs.insert(out); - } - } - } - return std::make_pair(std::vector(inputs.begin(), inputs.end()), - std::vector(outputs.begin(), outputs.end())); -} - -// Filter the Intermediate results of the subgraph node. -void FilterRedundantOutputOfSubGraph(DataFlowGraph *graph) { - std::vector op_nodes; - for (auto &node : GraphTraits(*graph).nodes_in_TS()) { - if (node.type() == Node::Type::kValue || node.deleted()) { - continue; - } - op_nodes.push_back(&node); - } - size_t op_num = op_nodes.size(); - for (size_t i = 0; i < op_num; i++) { - if (op_nodes[i]->type() == Node::Type::kFunction) continue; - std::unordered_set follow_up_input_names; - for (size_t j = i + 1; j < op_num; j++) { - for (auto *in : op_nodes[j]->inlinks) { - follow_up_input_names.insert(in->name()); - } - } - std::vector filtered_subgraph_outlinks; - for (auto *out : op_nodes[i]->outlinks) { - if (follow_up_input_names.count(out->name())) { - filtered_subgraph_outlinks.push_back(out); - } else { - out->SetDeleted(); - } - } - // The filtered_subgraph_outlinks may be empty. - op_nodes[i]->outlinks = filtered_subgraph_outlinks; - } -} - -} // namespace analysis -} // namespace inference -} // namespace paddle diff --git a/paddle/fluid/inference/analysis/data_flow_graph.h b/paddle/fluid/inference/analysis/data_flow_graph.h deleted file mode 100644 index 437e097acd..0000000000 --- a/paddle/fluid/inference/analysis/data_flow_graph.h +++ /dev/null @@ -1,209 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -/* - * Data flow graph is an pass that build the basic graph. It contains a graph - * and the iterators that enable the iteration over the graph. - */ - -#pragma once - -#include -#include -#include -#include -#include -#include - -#include "paddle/fluid/framework/ir/graph.h" -#include "paddle/fluid/inference/analysis/graph_traits.h" -#include "paddle/fluid/inference/analysis/node.h" -#include "paddle/fluid/platform/enforce.h" - -namespace paddle { -namespace inference { -namespace analysis { - -/* - * DataFlowGraph - A container of Value and Function Nodes. - * - * This is the base graph for any other type of graphs, such as SSA or CFG. - */ -struct DataFlowGraph { - NodeMap nodes; - // inputs and outputs are deduced from the graph. - // Used to interact with IR. - const framework::ir::Graph *ir_graph{nullptr}; - - // Extract inputs and outputs of the graph. - void Build(); - - void Build(const framework::proto::ProgramDesc &prog); - - // Build a graph from ir::Graph. - void Build(const framework::ir::Graph &graph); - - // Get an attribute. - AnyAttr &Attr(const std::string &key) { return attrs_[key]; } - - // Output a DOT graph file for debug. - std::string DotString() const; - - std::string HumanReadableInfo(bool show_values = true, - bool show_functions = true) const; - - const std::vector &inputs() const { - PADDLE_ENFORCE(!inputs_.empty(), - "No inputs are deduced, need to Build() first."); - return inputs_; - } - const std::vector &outputs() const { - PADDLE_ENFORCE(!outputs_.empty(), - "No outputs are deduced, need to Build() first."); - return outputs_; - } - - private: - mutable std::vector inputs_; - mutable std::vector outputs_; - std::unordered_map attrs_; - - // Remove duplicate edges and so on. - void Clean(); -}; - -/* - * An graph trait help to traverse the graph using BFS. - * The BFS start from a graph's inputs, the graph should be fully-connected, so - * that the iterator can reach the end. - */ -template <> -struct GraphTraits { - // BFS iterator on nodes. - struct NodesBFSIterator - : public std::iterator { - NodesBFSIterator() = default; - explicit NodesBFSIterator(const std::vector &source); - NodesBFSIterator(NodesBFSIterator &&other) noexcept; - // NOTE Heavy to use. - NodesBFSIterator(const NodesBFSIterator &other); - - Node &operator*(); - NodesBFSIterator &operator++(); - Node *operator->(); - // TODO(Superjomn) current implementation just compare the first - // element, need to compare the graph and all the elements in the queue and - // set. - NodesBFSIterator &operator=(const NodesBFSIterator &other); - bool operator==(const NodesBFSIterator &other); - bool operator!=(const NodesBFSIterator &other) { return !(*this == other); } - - private: - std::deque queue_; - std::unordered_set visited_; - }; - - // DFS iterator on nodes. - struct NodesDFSIterator - : public std::iterator { - NodesDFSIterator() = default; - NodesDFSIterator(const std::vector &source); - NodesDFSIterator(NodesDFSIterator &&other) noexcept; - NodesDFSIterator(const NodesDFSIterator &other); - - Node &operator*(); - NodesDFSIterator &operator++(); - // TODO(Superjomn) current implementation just compare the first - // element, need to compare the graph and all the elements in the queue and - // set. - NodesDFSIterator &operator=(const NodesDFSIterator &other); - bool operator==(const NodesDFSIterator &other); - bool operator!=(const NodesDFSIterator &other) { return !(*this == other); } - Node *operator->(); - - private: - std::stack stack_; - std::unordered_set visited_; - }; - - // Topological sorting iterator on nodes. - struct NodesTSIterator - : public std::iterator { - NodesTSIterator() = default; - NodesTSIterator(const std::vector &source); - NodesTSIterator(NodesTSIterator &&other) - : sorted_(std::move(other.sorted_)), cursor_(other.cursor_) { - other.cursor_ = 0; - } - NodesTSIterator(const NodesTSIterator &other); - - Node &operator*(); - NodesTSIterator &operator++(); - // TODO(Superjomn) current implementation just compare the first - // element, need to compare the graph and all the elements in the queue and - // set. - NodesTSIterator &operator=(const NodesTSIterator &other); - bool operator==(const NodesTSIterator &other); - bool operator!=(const NodesTSIterator &other) { return !(*this == other); } - Node *operator->(); - - private: - std::vector sorted_; - size_t cursor_{0}; - }; - - explicit GraphTraits(const DataFlowGraph &graph) : graph_(graph) {} - - // default use BFS to visit the nodes. - iterator_range nodes() { - return iterator_range(nodes_bfs_begin(), nodes_bfs_end()); - } - iterator_range nodes_in_BFS() { - return iterator_range(nodes_bfs_begin(), nodes_bfs_end()); - } - iterator_range nodes_in_DFS() { - return iterator_range(nodes_dfs_begin(), nodes_dfs_end()); - } - iterator_range nodes_in_TS() { - return iterator_range(nodes_ts_begin(), nodes_ts_end()); - } - - private: - NodesBFSIterator nodes_bfs_begin() { - return NodesBFSIterator(graph_.inputs()); - } - NodesBFSIterator nodes_bfs_end() { return NodesBFSIterator(); } - - NodesDFSIterator nodes_dfs_begin() { - return NodesDFSIterator(graph_.inputs()); - } - NodesDFSIterator nodes_dfs_end() { return NodesDFSIterator(); } - - NodesTSIterator nodes_ts_begin() { return NodesTSIterator(graph_.inputs()); } - NodesTSIterator nodes_ts_end() { return NodesTSIterator(); } - - private: - const DataFlowGraph &graph_; -}; - -// Extract the inputs and outputs of a graph. The inputs and outputs of a -// sub-graph is the inputs nodes and output nodes that doesn't inside the -// sub-graph. -std::pair, std::vector> -ExtractInputAndOutputOfSubGraph(std::vector &graph); // NOLINT - -void FilterRedundantOutputOfSubGraph(DataFlowGraph *graph); -} // namespace analysis -} // namespace inference -} // namespace paddle diff --git a/paddle/fluid/inference/analysis/data_flow_graph_tester.cc b/paddle/fluid/inference/analysis/data_flow_graph_tester.cc deleted file mode 100644 index 50ce20621f..0000000000 --- a/paddle/fluid/inference/analysis/data_flow_graph_tester.cc +++ /dev/null @@ -1,168 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/inference/analysis/data_flow_graph.h" -#include "paddle/fluid/framework/op_proto_maker.h" -#include "paddle/fluid/framework/program_desc.h" -#include "paddle/fluid/inference/analysis/ut_helper.h" - -namespace paddle { -namespace inference { -namespace analysis { - -TEST(DataFlowGraph, BFS) { - auto desc = LoadProgramDesc(FLAGS_inference_model_dir + "/__model__"); - auto dfg = ProgramDescToDFG(desc); - dfg.Build(); - - for (auto* in : dfg.inputs()) { - LOG(INFO) << "inputs: " << in->name() << " " - << static_cast(in->type()); - } - for (auto* out : dfg.outputs()) { - LOG(INFO) << "outputs: " << out->name() << " " - << static_cast(out->type()); - } - - size_t count = 0; - for (auto& node : GraphTraits(dfg).nodes()) { - LOG(INFO) << "visiting " << node.name(); - ++count; - } - ASSERT_EQ(count, dfg.nodes.size()); -} - -TEST(DataFlowGraph, DFS) { - auto desc = LoadProgramDesc(FLAGS_inference_model_dir + "/__model__"); - DataFlowGraph dfg; - dfg.Build(desc); - size_t count = 0; - for (auto& node : GraphTraits(dfg).nodes_in_DFS()) { - LOG(INFO) << "visiting " << node.name(); - ++count; - } - ASSERT_EQ(count, dfg.nodes.size()); -} - -// Topological sorting. -/* - * Graph topology - * inputs: 0, 1, 2 - * 0 -> 4 - * 0 -> 5 - * 1 -> 6 - * 2 -> 7 - * 4 -> 5 - * 4 -> 7 - * 4 -> 3 - * 7 -> 3 - */ -TEST(DataFlowGraph, TS) { - DataFlowGraph graph; - - for (int i = 0; i < 8; i++) { - auto* node = graph.nodes.Create(Node::Type::kValue); - node->SetName("node-" + std::to_string(i)); - } - - auto add_link = [&](int i, int j) { - Node* source = graph.nodes.GetMutable(i); - Node* target = graph.nodes.GetMutable(j); - target->inlinks.push_back(source); - source->outlinks.push_back(target); - }; - - add_link(0, 4); - add_link(0, 5); - add_link(1, 6); - add_link(2, 7); - add_link(4, 5); - add_link(4, 7); - add_link(4, 3); - add_link(7, 3); - graph.Build(); - - auto its = GraphTraits(graph).nodes_in_TS(); - std::vector sorted_ids; - for (auto it = its.begin(); it != its.end(); ++it) { - LOG(INFO) << it->name(); - sorted_ids.push_back(it->id()); - } - - // Assert a occurs prior to b in the sorted_ids. - auto assert_positive_sequence_pair = [&](int a, int b) { - auto a_offset = std::find(sorted_ids.begin(), sorted_ids.end(), a); - auto b_offset = std::find(sorted_ids.begin(), sorted_ids.end(), b); - ASSERT_LT(a_offset, b_offset); - }; - - assert_positive_sequence_pair(2, 7); - assert_positive_sequence_pair(7, 3); - assert_positive_sequence_pair(4, 3); - assert_positive_sequence_pair(0, 4); - assert_positive_sequence_pair(0, 5); - assert_positive_sequence_pair(1, 6); - assert_positive_sequence_pair(4, 5); - assert_positive_sequence_pair(4, 7); -} - -TEST(DataFlowGraph, Build_ProgramDesc) { - auto desc = LoadProgramDesc(FLAGS_inference_model_dir + "/__model__"); - DataFlowGraph graph; - graph.Build(desc); - ASSERT_EQ(graph.nodes.size(), 38UL); -} - -void SetOp(framework::ProgramDesc* prog, const std::string& type, - const std::vector& inputs, - const std::vector& outputs) { - auto* op = prog->MutableBlock(0)->AppendOp(); - op->SetType(type); - op->SetInput("Xs", inputs); - op->SetOutput("Xs", outputs); - op->SetAttr(framework::OpProtoAndCheckerMaker::OpRoleAttrName(), - static_cast(framework::OpRole::kForward)); -} - -TEST(DataFlowGraph, Build_IR_Graph) { - framework::ProgramDesc prog; - for (auto& v : std::vector({"a", "b", "c", "d", "e", "f"})) { - auto* var = prog.MutableBlock(0)->Var(v); - var->SetType(framework::proto::VarType::SELECTED_ROWS); - if (v == "c") { - var->SetPersistable(true); - } - } - - SetOp(&prog, "OP0", std::vector({"a"}), - std::vector({"b"})); - SetOp(&prog, "OP1", std::vector({"a"}), - std::vector({"c"})); - SetOp(&prog, "mul", std::vector({"b", "c"}), - std::vector({"d"})); - SetOp(&prog, "elementwise_add", std::vector({"d", "e"}), - std::vector({"f"})); - - DataFlowGraph graph; - - framework::ir::Graph ir_graph(prog); - - graph.Build(ir_graph); - - ASSERT_EQ(graph.nodes.size(), ir_graph.Nodes().size()); -} - -} // namespace analysis -} // namespace inference -} // namespace paddle diff --git a/paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.cc b/paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.cc deleted file mode 100644 index dbe138514b..0000000000 --- a/paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.cc +++ /dev/null @@ -1,285 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.h" -#include -#include "paddle/fluid/framework/block_desc.h" -#include "paddle/fluid/framework/ir/fuse_pass_base.h" -#include "paddle/fluid/framework/op_desc.h" -#include "paddle/fluid/framework/proto_desc.h" -#include "paddle/fluid/inference/analysis/analyzer.h" -#include "paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.h" -#include "paddle/fluid/inference/io.h" - -namespace paddle { -namespace inference { - -namespace analysis { - -using framework::proto::ProgramDesc; - -std::vector ExtractParameters( - const std::vector> &nodes); - -bool DataFlowGraphToFluidPass::Initialize(Argument *argument) { - ANALYSIS_ARGUMENT_CHECK_FIELD(argument) - ANALYSIS_ARGUMENT_CHECK_FIELD(argument->origin_program_desc) - // The transformed_program_desc should inherit all the VarDesc and BlockDesc - // from the original program desc. The operators of the main block(the first - // block) should rewritten by data flow graph. - argument->transformed_program_desc.reset( - new ProgramDesc(*argument->origin_program_desc)); - argument->transformed_program_desc->mutable_blocks(framework::kRootBlockIndex) - ->clear_ops(); - desc_ = argument->transformed_program_desc.get(); - argument_ = argument; - return true; -} - -bool DataFlowGraphToFluidPass::Finalize() { return true; } - -void DataFlowGraphToFluidPass::Run(DataFlowGraph *graph) { - // FilterRedundantOutputOfSubGraph(graph); - for (auto &node : GraphTraits(*graph).nodes_in_TS()) { - if (node.deleted()) continue; - - switch (node.type()) { - case Node::Type::kFunction: { - AddFluidOp(&node); - } break; - case Node::Type::kFunctionBlock: { - AddEngineOp(&node); - } break; - default: - continue; - } - } - - if (argument_->Has(framework::ir::kParamScopeAttr)) { - LOG(WARNING) << "parameter changes in the scope takes effect"; - } - - PADDLE_ENFORCE(argument_->transformed_program_desc.get()); -} - -void DataFlowGraphToFluidPass::AddFluidOp(Node *node) { - PADDLE_ENFORCE(node); - PADDLE_ENFORCE(node->IsFunction()); - PADDLE_ENFORCE(node->pb_desc() || !node->pb_msg().empty(), - "node has invalid protobuf repr."); - - // currently only the main block is analyzed. - PADDLE_ENFORCE(desc_); - auto *main_block = desc_->mutable_blocks(framework::kRootBlockIndex); - auto *op = main_block->add_ops(); - - if (node->pb_desc()) { - auto *ori_op = static_cast(node->pb_desc()); - *op = - *ori_op; // copy the attributes, by default, these will not be changed - // by analysis phrase. - // The inputs and outputs of the existing ops are not changed by tensorrt - // subgraph pass. - // NOTE It might be changed by other passes in the long run. - } else { - op->ParseFromString(node->pb_msg()); - } -} - -void CreateTrtEngineOp(Node *node, Argument *argument, - framework::proto::BlockDesc *block) { - PADDLE_ENFORCE(argument->main_dfg.get()); - const DataFlowGraph &graph = *(argument->main_dfg); - static int counter{0}; - PADDLE_ENFORCE(node->IsFunctionBlock()); - framework::OpDesc desc; - auto *func = static_cast(node); - - // collect inputs - std::unordered_set input_names; - std::unordered_set input_names_with_id; - for (auto *x : func->inlinks) { - input_names.insert(x->name()); - input_names_with_id.insert(x->name() + std::to_string(x->id())); - } - desc.SetInput( - "Xs", std::vector(input_names.begin(), input_names.end())); - - std::unordered_set output_names; - std::unordered_set output_names_with_id; - for (auto *x : func->outlinks) { - output_names.insert(x->name()); - output_names_with_id.insert(x->name() + std::to_string(x->id())); - } - - desc.SetOutput( - "Ys", std::vector(output_names.begin(), output_names.end())); - desc.SetType("tensorrt_engine"); - - std::unordered_map output_name_map; - - // The following procedure is used to rename all the intermediate - // variables and the output variables of the subgraph. - // Why we do this? - // During the transition from fluid OP to tensorrt OP, we map - // the input and output Tensor(fluid data structure) of fluid OP - // to the correspondin ITensor (trt data structure) through the - // Tensor name. When we set up ITensor for an variable, we must - // ensure that it has not been set before. - // If there is variable in the fluid graph, which is not only the - // input of a OP, but also the output of a Op, there will be problems. - // So we have to rename the variable in the subgraph to make sure - // it is either an OP's input or an OP's output. - - auto subgraph_nodes = func->subgraph; - for (int index = 0; index < block->ops_size(); index++) { - framework::proto::OpDesc *op = block->mutable_ops(index); - auto correspond_node = subgraph_nodes[index]; - PADDLE_ENFORCE_EQ(correspond_node->name(), op->type()); - - std::unordered_map var2id; - for (auto *in_var : correspond_node->inlinks) { - var2id[in_var->name()] = in_var->id(); - } - // rename for the input variables of op inside subgraph - for (int i = 0; i < op->inputs_size(); i++) { - framework::proto::OpDesc_Var *in_var = op->mutable_inputs(i); - std::vector replaced_names; - for (int k = 0; k < in_var->arguments_size(); k++) { - std::string arg_value = in_var->arguments(k); - std::string arg_value_with_id = - arg_value + std::to_string(var2id[arg_value]); - if (input_names_with_id.count(arg_value_with_id)) { - replaced_names.push_back(arg_value); - } else { - replaced_names.push_back(arg_value_with_id); - } - } - in_var->clear_arguments(); - for (size_t k = 0; k < replaced_names.size(); k++) { - in_var->add_arguments(replaced_names[k]); - } - } - var2id.clear(); - for (auto out_var : correspond_node->outlinks) { - var2id[out_var->name()] = out_var->id(); - } - - // rename for the output variables of op inside subgraph - for (int i = 0; i < op->outputs_size(); i++) { - framework::proto::OpDesc_Var *out_var = op->mutable_outputs(i); - std::vector replaced_names; - for (int k = 0; k < out_var->arguments_size(); k++) { - std::string arg_value = out_var->arguments(k); - std::string arg_value_with_id = - arg_value + std::to_string(var2id[arg_value]); - if (output_names_with_id.count(arg_value_with_id)) { - output_name_map[arg_value] = arg_value_with_id; - } - replaced_names.push_back(arg_value_with_id); - } - out_var->clear_arguments(); - for (size_t k = 0; k < replaced_names.size(); k++) { - out_var->add_arguments(replaced_names[k]); - } - } - } - // When tensorrt engine runs at the end of the operation, - // output_mapping help us copy the data from the renamed ITensor - // to Tensor. - std::vector output_mapping; - for (auto name : output_names) { - PADDLE_ENFORCE(output_name_map.count(name) != 0); - output_mapping.push_back(output_name_map[name]); - } - - PADDLE_ENFORCE(!block->vars().empty(), "the block has no var-desc"); - // Set attrs - - SetAttr(desc.Proto(), "subgraph", block->SerializeAsString()); - SetAttr(desc.Proto(), "max_batch_size", argument->Get("max_batch_size")); - SetAttr(desc.Proto(), "workspace_size", argument->Get("workspace_size")); - SetAttr(desc.Proto(), "engine_uniq_key", "trt-" + std::to_string(counter++)); - SetAttr(desc.Proto(), "parameters", ExtractParameters(graph.nodes.nodes())); - SetAttr(desc.Proto(), "output_name_mapping", output_mapping); - node->SetPbMsg(desc.Proto()->SerializeAsString()); -} - -std::vector ExtractParameters( - const std::vector> &nodes) { - std::vector parameters; - for (const auto &node : nodes) { - if (!node->IsValue()) continue; - PADDLE_ENFORCE(!node->pb_msg().empty(), "pb_msg should be set first"); - framework::proto::VarDesc var; - var.ParseFromString(node->pb_msg()); - if (var.persistable()) { - parameters.push_back(var.name()); - } - } - return parameters; -} - -void DataFlowGraphToFluidPass::AddEngineOp(Node *node) { - // TODO(Superjomn) Here need to expose some arguments for default setting. - PADDLE_ENFORCE(node->IsFunctionBlock()); - auto *block_node = static_cast(node); - framework::proto::BlockDesc proto; - framework::BlockDesc block_desc(nullptr, &proto); - block_desc.Proto()->set_parent_idx(-1); - block_desc.Proto()->set_idx(0); - VLOG(40) << "origin variable size: " - << argument_->origin_program_desc->blocks(0).vars().size(); - VLOG(40) << "transformed variable size: " - << block_desc.Proto()->vars().size(); - // copy ops. - - for (auto *node : block_node->subgraph) { - auto *op = block_desc.AppendOp(); - PADDLE_ENFORCE(!node->pb_msg().empty()); - op->Proto()->ParseFromString(node->pb_msg()); - } - - *block_desc.Proto()->mutable_vars() = - argument_->origin_program_desc->blocks(0).vars(); - PADDLE_ENFORCE(!block_desc.Proto()->vars().empty()); - CreateTrtEngineOp(node, argument_, block_desc.Proto()); - auto *main_block = desc_->mutable_blocks(framework::kRootBlockIndex); - auto *op = main_block->add_ops(); - PADDLE_ENFORCE(!node->pb_msg().empty(), "failed to set desc for block"); - op->ParseFromString(node->pb_msg()); -} - -namespace { -class DFG_DebuggerPass : public DFG_GraphvizDrawPass { - public: - using Config = DFG_GraphvizDrawPass::Config; - explicit DFG_DebuggerPass(const Config &config) - : DFG_GraphvizDrawPass(config) {} - - std::string repr() const override { return "dfg-to-fluid-debuger-pass"; } - - bool Finalize() override { return true; } -}; -} // namespace - -AnalysisPass *DataFlowGraphToFluidPass::CreateGraphvizDebugerPass() const { - return new DFG_DebuggerPass(DFG_GraphvizDrawPass::Config( - FLAGS_IA_graphviz_log_root, - "data_flow_graph_to_fluid_graphviz_debugger")); -} - -} // namespace analysis -} // namespace inference -} // namespace paddle diff --git a/paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.h b/paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.h deleted file mode 100644 index 891c7226e2..0000000000 --- a/paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.h +++ /dev/null @@ -1,59 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ - -/* - * This file implements the transformation from fluid ProgramDesc to data flow - * graph. - */ - -#pragma once - -#include -#include "paddle/fluid/framework/program_desc.h" -#include "paddle/fluid/inference/analysis/analysis_pass.h" -#include "paddle/fluid/inference/analysis/data_flow_graph.h" - -namespace paddle { -namespace inference { - -namespace analysis { -class DataFlowGraphToFluidPass final : public DataFlowGraphPass { - public: - DataFlowGraphToFluidPass() = default; - - bool Initialize(Argument *argument) override; - bool Finalize() override; - - void Run(DataFlowGraph *graph) override; - - std::string repr() const override { return "DFG to fluid"; } - std::string description() const override { - return "Transform a DFG to a Fluid ProgramDesc"; - } - - AnalysisPass *CreateGraphvizDebugerPass() const override; - - protected: - // Add a Fluid Op into the ProgramDesc. - void AddFluidOp(Node *node); - // Add a EngineOp into the ProgramDesc. - void AddEngineOp(Node *node); - - private: - framework::proto::ProgramDesc *desc_; - Argument *argument_; -}; -} // namespace analysis -} // namespace inference -} // namespace paddle diff --git a/paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass_tester.cc b/paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass_tester.cc deleted file mode 100644 index 4ef381db29..0000000000 --- a/paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass_tester.cc +++ /dev/null @@ -1,48 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.h" - -#include -#include -#include -#include "paddle/fluid/framework/executor.h" -#include "paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.h" -#include "paddle/fluid/inference/analysis/ut_helper.h" -#include "paddle/fluid/inference/io.h" - -namespace paddle { -namespace inference { -namespace analysis { - -TEST(DataFlowGraph, Test) { - Argument argument(FLAGS_inference_model_dir); - - FluidToDataFlowGraphPass pass0; - DataFlowGraphToFluidPass pass1; - ASSERT_TRUE(pass0.Initialize(&argument)); - ASSERT_TRUE(pass1.Initialize(&argument)); - - pass0.Run(argument.main_dfg.get()); - pass1.Run(argument.main_dfg.get()); - - pass0.Finalize(); - pass1.Finalize(); - - LOG(INFO) << argument.main_dfg->nodes.size(); -} - -}; // namespace analysis -}; // namespace inference -}; // namespace paddle diff --git a/paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.cc b/paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.cc deleted file mode 100644 index 8888529a57..0000000000 --- a/paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.cc +++ /dev/null @@ -1,59 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.h" - -namespace paddle { -namespace inference { -namespace analysis { - -int DFG_GraphvizDrawPass::counter_{0}; - -void DFG_GraphvizDrawPass::Run(DataFlowGraph *graph) { - auto content = Draw(graph); - auto dot_path = GenDotPath(); - std::ofstream file(dot_path); - file.write(content.c_str(), content.size()); - file.close(); - - auto png_path = dot_path.substr(0, dot_path.size() - 4) + ".png"; - std::string message; - VLOG(30) << "draw to " << png_path; - ExecShellCommand("dot -Tpng " + dot_path + " -o " + png_path, &message); -} - -std::string DFG_GraphvizDrawPass::Draw(DataFlowGraph *graph) { - Dot dot; - // Add nodes - for (size_t i = 0; i < graph->nodes.size(); i++) { - const Node &node = graph->nodes.Get(i); - if (config_.display_deleted_node || !node.deleted()) { - dot.AddNode(node.repr(), node.dot_attrs()); - } - } - // Add edges - for (size_t i = 0; i < graph->nodes.size(); i++) { - const Node &node = graph->nodes.Get(i); - if (!config_.display_deleted_node && node.deleted()) continue; - for (auto &out : node.outlinks) { - if (!config_.display_deleted_node && out->deleted()) continue; - dot.AddEdge(node.repr(), out->repr(), {}); - } - } - return dot.Build(); -} - -} // namespace analysis -} // namespace inference -} // namespace paddle diff --git a/paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.h b/paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.h deleted file mode 100644 index e537bfc0e6..0000000000 --- a/paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.h +++ /dev/null @@ -1,78 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -/* - * This file create an DFG_GraphvizDrawPass which helps to draw a data flow - * graph's structure using graphviz. - */ - -#pragma once - -#include -#include -#include "paddle/fluid/inference/analysis/analysis_pass.h" -#include "paddle/fluid/inference/analysis/dot.h" - -namespace paddle { -namespace inference { -namespace analysis { - -/* - * Output a dot file and write to some place. - */ -class DFG_GraphvizDrawPass : public DataFlowGraphPass { - public: - struct Config { - Config(const std::string &dir, const std::string &id, - bool display_deleted_node = false) - : dir(dir), id(id), display_deleted_node(display_deleted_node) {} - - // The directory to store the .dot or .png files. - const std::string dir; - // The identifier for this dot file. - const std::string id; - // Whether to display deleted nodes, default false. - const bool display_deleted_node; - }; - - explicit DFG_GraphvizDrawPass(const Config &config) : config_(config) {} - - bool Initialize(Argument *argument) override { return true; } - void Run(DataFlowGraph *graph) override; - bool Finalize() override { return true; } - - std::string repr() const override { return "DFG graphviz drawer"; } - std::string description() const override { - return "Debug a DFG by draw with graphviz"; - } - - protected: - // A counter to add a number prefix to the debugger image output so that they - // will sort in the triggered order. - static int counter_; - - // Path of the dot file to output. - std::string GenDotPath() const { - return config_.dir + "/" + std::to_string(counter_++) + "-graph_" + - config_.id + ".dot"; - } - - virtual std::string Draw(DataFlowGraph *graph); - - Config config_; -}; - -} // namespace analysis -} // namespace inference -} // namespace paddle diff --git a/paddle/fluid/inference/analysis/dfg_graphviz_draw_pass_tester.cc b/paddle/fluid/inference/analysis/dfg_graphviz_draw_pass_tester.cc deleted file mode 100644 index 928be79170..0000000000 --- a/paddle/fluid/inference/analysis/dfg_graphviz_draw_pass_tester.cc +++ /dev/null @@ -1,54 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.h" - -#include -#include -#include -#include "paddle/fluid/inference/analysis/ut_helper.h" - -namespace paddle { -namespace inference { -namespace analysis { - -TEST(DFG_GraphvizDrawPass, dfg_graphviz_draw_pass_tester) { - Argument argument(FLAGS_inference_model_dir); - FluidToDataFlowGraphPass pass0; - ASSERT_TRUE(pass0.Initialize(&argument)); - pass0.Run(argument.main_dfg.get()); - - // auto dfg = ProgramDescToDFG(*argument.origin_program_desc); - - DFG_GraphvizDrawPass::Config config("./", "test"); - DFG_GraphvizDrawPass pass(config); - pass.Initialize(&argument); - pass.Run(argument.main_dfg.get()); - - // test content - std::ifstream file("./0-graph_test.dot"); - ASSERT_TRUE(file.is_open()); - - std::string line; - int no{0}; - while (std::getline(file, line)) { - no++; - } - // DFG is sensitive to ProgramDesc, be careful to change the existing models. - ASSERT_EQ(no, 83); -} - -} // namespace analysis -} // namespace inference -} // namespace paddle diff --git a/paddle/fluid/inference/analysis/dot_tester.cc b/paddle/fluid/inference/analysis/dot_tester.cc index 56ceb9bd5d..c785a312bf 100644 --- a/paddle/fluid/inference/analysis/dot_tester.cc +++ b/paddle/fluid/inference/analysis/dot_tester.cc @@ -16,7 +16,6 @@ #include #include -#include "paddle/fluid/inference/analysis/data_flow_graph.h" namespace paddle { namespace inference { diff --git a/paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.cc b/paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.cc deleted file mode 100644 index 2b7d632c83..0000000000 --- a/paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.cc +++ /dev/null @@ -1,76 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include -#include - -#include "paddle/fluid/inference/analysis/analyzer.h" -#include "paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.h" -#include "paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.h" - -namespace paddle { -namespace inference { -namespace analysis { - -bool FluidToDataFlowGraphPass::Initialize(Argument *argument) { - ANALYSIS_ARGUMENT_CHECK_FIELD(argument); - if (argument->origin_program_desc) { - LOG(WARNING) << "argument's origin_program_desc is already set, might " - "duplicate called"; - } - if (!argument->fluid_model_program_path) { - ANALYSIS_ARGUMENT_CHECK_FIELD(argument->fluid_model_dir); - argument->fluid_model_program_path.reset( - new std::string(*argument->fluid_model_dir + "/__model__")); - } - ANALYSIS_ARGUMENT_CHECK_FIELD(argument->fluid_model_program_path); - auto program = LoadProgramDesc(*argument->fluid_model_program_path); - argument->origin_program_desc.reset( - new framework::proto::ProgramDesc(program)); - - if (!argument->main_dfg) { - argument->main_dfg.reset(new DataFlowGraph); - } - desc_ = argument->origin_program_desc.get(); - return true; -} - -bool FluidToDataFlowGraphPass::Finalize() { return true; } - -void FluidToDataFlowGraphPass::Run(DataFlowGraph *graph) { - PADDLE_ENFORCE(graph); - PADDLE_ENFORCE(desc_); - graph->Build(*desc_); -} - -namespace { -class DFG_DebuggerPass : public DFG_GraphvizDrawPass { - public: - using Config = DFG_GraphvizDrawPass::Config; - explicit DFG_DebuggerPass(const Config &config) - : DFG_GraphvizDrawPass(config) {} - std::string repr() const override { return "fluid-to-dfg-debuger-pass"; } - bool Finalize() override { return true; } -}; -} - -AnalysisPass *FluidToDataFlowGraphPass::CreateGraphvizDebugerPass() const { - return new DFG_DebuggerPass(DFG_GraphvizDrawPass::Config( - FLAGS_IA_graphviz_log_root, "fluid-to-dfg-debuger")); -} - -} // namespace analysis -} // namespace inference -} // namespace paddle diff --git a/paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.h b/paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.h deleted file mode 100644 index b9e262020e..0000000000 --- a/paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.h +++ /dev/null @@ -1,57 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ - -/* - * This file implements the transformation from data flow graph to fluid - * ProgramDesc. - */ - -#pragma once - -#include - -#include "paddle/fluid/framework/program_desc.h" -#include "paddle/fluid/inference/analysis/analysis_pass.h" -#include "paddle/fluid/inference/analysis/data_flow_graph.h" - -namespace paddle { -namespace inference { -namespace analysis { - -/* - * Transform a FluidDesc to a SSA. - */ -class FluidToDataFlowGraphPass final : public DataFlowGraphPass { - public: - FluidToDataFlowGraphPass() = default; - - bool Initialize(Argument *argument) override; - bool Finalize() override; - - void Run(DataFlowGraph *graph) override; - - std::string repr() const override { return "fluid-to-data-flow-graph"; } - std::string description() const override { - return "transform a fluid ProgramDesc to a data flow graph."; - } - - AnalysisPass *CreateGraphvizDebugerPass() const override; - - private: - framework::proto::ProgramDesc const *desc_; -}; - -} // namespace analysis -} // namespace inference -} // namespace paddle diff --git a/paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass_tester.cc b/paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass_tester.cc deleted file mode 100644 index 267a0a84eb..0000000000 --- a/paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass_tester.cc +++ /dev/null @@ -1,38 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.h" - -#include -#include "paddle/fluid/inference/analysis/ut_helper.h" - -namespace paddle { -namespace inference { -namespace analysis { - -TEST(FluidToDataFlowGraphPass, Test) { - FluidToDataFlowGraphPass pass; - Argument argument(FLAGS_inference_model_dir); - pass.Initialize(&argument); - pass.Run(argument.main_dfg.get()); - // Analysis is sensitive to ProgramDesc, careful to change the original model. - ASSERT_EQ(argument.main_dfg->nodes.size(), 38UL); - pass.Finalize(); - ASSERT_FALSE(argument.main_dfg->DotString().empty()); - EXPECT_FALSE(argument.main_dfg->inputs().empty()); -} - -} // namespace analysis -} // namespace inference -} // namespace paddle diff --git a/paddle/fluid/inference/analysis/fluid_to_ir_pass.cc b/paddle/fluid/inference/analysis/fluid_to_ir_pass.cc deleted file mode 100644 index 9f52af670b..0000000000 --- a/paddle/fluid/inference/analysis/fluid_to_ir_pass.cc +++ /dev/null @@ -1,60 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/inference/analysis/fluid_to_ir_pass.h" -#include "paddle/fluid/framework/executor.h" -#include "paddle/fluid/framework/ir/fuse_pass_base.h" -#include "paddle/fluid/inference/io.h" -#include "paddle/fluid/platform/device_context.h" -#include "paddle/fluid/platform/place.h" - -namespace paddle { -namespace inference { -namespace analysis { - -void FluidToIrPass::EnableParamModify(const std::string &model_dir, - const std::string &prog_file, - const std::string ¶m_file) { - PADDLE_ENFORCE(argument_); - argument_->Set(framework::ir::kParamScopeAttr, new framework::Scope); - // Load parameters. - VLOG(30) << "Loading parameters from " << model_dir; - LoadParams(&argument_->Get(framework::ir::kParamScopeAttr), - model_dir, prog_file, param_file); -} - -bool FluidToIrPass::LoadParams(framework::Scope *scope, const std::string &dir, - const std::string &prog_file, - const std::string ¶m_file) { - platform::CPUPlace place; - platform::CPUDeviceContext ctx(place); - framework::Executor executor(place); - PADDLE_ENFORCE(argument_->origin_program_desc.get()); - framework::ProgramDesc program(*argument_->origin_program_desc); - if ((!prog_file.empty()) && (!param_file.empty())) { - LOG(INFO) << "load single model file from " << prog_file; - Load(&executor, scope, prog_file, param_file); - } else if (!dir.empty()) { - LOG(INFO) << "load from dir " << dir; - Load(&executor, scope, dir); - } else { - LOG(ERROR) << "failed to load parameters"; - return false; - } - return true; -} - -} // namespace analysis -} // namespace inference -} // namespace paddle diff --git a/paddle/fluid/inference/analysis/fluid_to_ir_pass.h b/paddle/fluid/inference/analysis/fluid_to_ir_pass.h deleted file mode 100644 index c2599e218a..0000000000 --- a/paddle/fluid/inference/analysis/fluid_to_ir_pass.h +++ /dev/null @@ -1,128 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include -#include - -#include "paddle/fluid/framework/ir/fuse_pass_base.h" -#include "paddle/fluid/inference/analysis/analysis_pass.h" -#include "paddle/fluid/inference/analysis/flags.h" -#include "paddle/fluid/inference/analysis/ir_pass_manager.h" - -namespace paddle { -namespace inference { -namespace analysis { - -static const char kFluidToIrPassesAttr[] = "__fluid_to_ir_passes__"; - -class FluidToIrPass final : public DataFlowGraphPass { - public: - FluidToIrPass() = default; - - bool Initialize(Argument *argument) override { - ANALYSIS_ARGUMENT_CHECK_FIELD(argument); - PADDLE_ENFORCE(argument->Has(kFluidToIrPassesAttr), - "argument need the attr %s", kFluidToIrPassesAttr); - argument_ = argument; - if (argument->origin_program_desc) { - LOG(WARNING) << "argument's origin_program_desc is already set, might " - "duplicate called"; - } - // set fluid model program path - if (!argument->fluid_model_program_path) { - ANALYSIS_ARGUMENT_CHECK_FIELD(argument->fluid_model_dir); - argument->fluid_model_program_path.reset( - new std::string(*argument->fluid_model_dir + "/__model__")); - } - ANALYSIS_ARGUMENT_CHECK_FIELD(argument->fluid_model_program_path); - // Load program. - auto program = LoadProgramDesc(*argument->fluid_model_program_path); - argument->origin_program_desc.reset( - new framework::proto::ProgramDesc(program)); - // Create main data flow graph. - if (!argument->main_dfg) { - argument->main_dfg.reset(new DataFlowGraph); - } - argument->Set("ir_program_desc", new ProgramDesc(program)); - - LOG(INFO) << "Loading parameters"; - // Load parameters to argument if needed. - if (argument->fluid_model_dir || (argument->fluid_model_program_path && - argument->fluid_model_param_path)) { -#define SAFE_GET(ATTR) std::string ATTR = argument->ATTR ? *argument->ATTR : ""; - SAFE_GET(fluid_model_dir); - SAFE_GET(fluid_model_program_path); - SAFE_GET(fluid_model_param_path); -#undef SAFE_GET - EnableParamModify(fluid_model_dir, fluid_model_program_path, - fluid_model_param_path); - } - - return true; - } - - bool Finalize() override { return true; } - - void Run(DataFlowGraph *graph) override { - // Call all the IR Passes - IRPassManager ir_passes(argument_->Get("ir_program_desc"), - nullptr); - // Pass the scope from analysis to IR if needed. - if (argument_->Has(framework::ir::kParamScopeAttr)) { - // Here the address is passed, attention that IR doesn't own the scope, so - // the real scope in analysis should live during the IR phase. - ir_passes.graph().Set( - framework::ir::kParamScopeAttr, - new framework::Scope *(&argument_->Get( - framework::ir::kParamScopeAttr))); - } - - if (FLAGS_IA_enable_ir) { - const auto &ir_passes_to_apply = - argument_->Get>(kFluidToIrPassesAttr); - ir_passes.Apply(ir_passes_to_apply); - } - - PADDLE_ENFORCE(argument_->main_dfg.get()); - argument_->main_dfg->Build(ir_passes.graph()); - // inherit the arguments from ir. - if (ir_passes.graph().Has(framework::ir::kFuseStatisAttr)) { - argument_->Set( - framework::ir::kFuseStatisAttr, - new std::unordered_map( - ir_passes.graph().Get>( - framework::ir::kFuseStatisAttr))); - } - } - - void EnableParamModify(const std::string &model_dir, - const std::string &prog_file, - const std::string ¶m_file); - - std::string repr() const override { return "fluid-to-ir-pass"; } - - private: - // Load parameters from a single file or from a directory. - bool LoadParams(framework::Scope *scope, const std::string &dir, - const std::string &prog_file, const std::string ¶m_file); - - private: - Argument *argument_{nullptr}; -}; - -} // namespace analysis -} // namespace inference -} // namespace paddle diff --git a/paddle/fluid/inference/analysis/graph_traits.cc b/paddle/fluid/inference/analysis/graph_traits.cc deleted file mode 100644 index 2ea70a1d20..0000000000 --- a/paddle/fluid/inference/analysis/graph_traits.cc +++ /dev/null @@ -1,15 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/inference/analysis/graph_traits.h" diff --git a/paddle/fluid/inference/analysis/graph_traits.h b/paddle/fluid/inference/analysis/graph_traits.h deleted file mode 100644 index aed2b1e8e2..0000000000 --- a/paddle/fluid/inference/analysis/graph_traits.h +++ /dev/null @@ -1,63 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -/* - * This file defines the GraphTraits template class that should be specified - * by classes that want to be iteratable by generic graph iterators. - * - * This file also defines the marker class Inverse that is used to iterate over - * graphs in a graph defined, inverse ordering... - */ - -#pragma once - -#include "paddle/fluid/inference/analysis/helper.h" - -namespace paddle { -namespace inference { -namespace analysis { - -/* - * This class should be specialized by different graph types... - * That's why the base class is empty. - */ -template -struct GraphTraits { - // using NodesBFSIterator = xxx - - // NodesBFSIterator nodes_begin(); - // NodesBFSIterator nodes_end(); -}; - -/* - * Inverse - This class is used as a marker class to tell the graph iterator to - * iterate in a graph defined Inverse order. - */ -template -struct Inverse { - const GraphType &graph; - - explicit Inverse(const GraphType &graph) : graph(graph) {} -}; - -/* - * Provide a partial specialization of GraphTraits so that the inverse of an - * inverse turns into the original graph. - */ -template -struct GraphTraits>> : GraphTraits {}; - -} // namespace analysis -} // namespace inference -} // namespace paddle diff --git a/paddle/fluid/inference/analysis/helper.h b/paddle/fluid/inference/analysis/helper.h index 5151e2b69a..5511a0481e 100644 --- a/paddle/fluid/inference/analysis/helper.h +++ b/paddle/fluid/inference/analysis/helper.h @@ -101,20 +101,20 @@ class OrderedRegistry { public: T *Register(const std::string &name, T *x) { PADDLE_ENFORCE(!dic_.count(name), "duplicate key [%s]", name); - dic_[name] = data_.size(); - data_.emplace_back(std::unique_ptr(x)); - return data_.back().get(); + dic_[name] = elements_.size(); + elements_.emplace_back(std::unique_ptr(x)); + return elements_.back().get(); } T *Lookup(const std::string &name) { auto it = dic_.find(name); if (it == dic_.end()) return nullptr; - return data_[it->second].get(); + return elements_[it->second].get(); } protected: std::unordered_map dic_; - std::vector> data_; + std::vector> elements_; }; template diff --git a/paddle/fluid/inference/analysis/ir_pass_manager.cc b/paddle/fluid/inference/analysis/ir_pass_manager.cc index e76708baf4..fce5e1cac9 100644 --- a/paddle/fluid/inference/analysis/ir_pass_manager.cc +++ b/paddle/fluid/inference/analysis/ir_pass_manager.cc @@ -18,6 +18,8 @@ #include "paddle/fluid/framework/ir/fuse_pass_base.h" #include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/scope.h" +#include "paddle/fluid/inference/analysis/argument.h" +#include "paddle/fluid/inference/analysis/ir_passes/subgraph_detector.h" #include "paddle/fluid/string/pretty_log.h" namespace paddle { @@ -27,21 +29,33 @@ using string::PrettyLogEndl; using string::PrettyLog; using string::Style; -IRPassManager::IRPassManager(const ProgramDesc &program, - framework::Scope *scope) - : program_(program) { - graph_.reset(new framework::ir::Graph(program)); - if (scope) - graph_->Set(framework::ir::kParamScopeAttr, new framework::Scope *(scope)); +IRPassManager::IRPassManager(Argument *argument) { + ARGUMENT_CHECK_FIELD(argument, main_program); + graph_ = std::unique_ptr(new Graph(argument->main_program())); + if (argument->Has("scope")) { + graph_->Set(framework::ir::kParamScopeAttr, + new framework::Scope *( + const_cast(&argument->scope()))); + } + + ARGUMENT_CHECK_FIELD(argument, ir_analysis_passes); + CreatePasses(argument, argument->ir_analysis_passes()); } -void IRPassManager::Apply(const std::vector &passes) { - // Apply all the passes +void IRPassManager::CreatePasses(Argument *argument, + const std::vector &passes) { std::string pre_pass; int pass_num = 0; for (const std::string &pass_name : passes) { - PrettyLogEndl(Style::H2(), "--- Running IR pass [%s]", pass_name); auto pass = framework::ir::PassRegistry::Instance().Get(pass_name); + + // Set some pass attributes. + if (pass_name == "ir_analysis_pass") { + pass->Set("tensorrt_node_teller", + new SubgraphDetector::NodeInsideSubgraphTeller( + argument->tensorrt_node_teller())); + } + if (pass_name == "graph_viz_pass") { std::string dot_file_path = std::to_string(pass_num) + "_ir_" + (pre_pass.empty() ? "origin" : pre_pass) + @@ -49,11 +63,47 @@ void IRPassManager::Apply(const std::vector &passes) { pass->Set("graph_viz_path", new std::string(std::move(dot_file_path))); pass_num++; } - graph_ = pass->Apply(std::move(graph_)); + + if (pass_name == "tensorrt_subgraph_pass") { + PADDLE_ENFORCE(argument->tensorrt_node_teller_valid()); + pass->SetNotOwned("tensorrt_node_teller", + argument->tensorrt_node_teller_ptr()); + pass->Set("workspace_size", new int(argument->tensorrt_workspace_size())); + pass->Set("max_batch_size", new int(argument->tensorrt_max_batch_size())); + } + + // graph_ = pass->Apply(std::move(graph_)); pre_pass = pass_name; + + passes_.emplace_back(std::move(pass)); } } +std::unique_ptr IRPassManager::Apply(std::unique_ptr graph) { + if (passes_.empty()) { + return graph; + } + PADDLE_ENFORCE(graph.get()); + // Apply all the passes + for (const auto &pass : passes_) { + PrettyLogEndl(Style::H2(), "--- Running IR pass [%s]", pass->Type()); + graph = pass->Apply(std::move(graph)); + } + return std::move(graph); +} + +framework::proto::ProgramDesc IRPassManager::AcquireProgram( + std::unique_ptr *graph, const ProgramDesc &program) const { + auto pass = + framework::ir::PassRegistry::Instance().Get("graph_to_program_pass"); + + ProgramDesc desc(program); + pass->SetNotOwned("program", &desc); + auto *the_graph = graph->release(); + *graph = pass->Apply(std::unique_ptr(the_graph)); + return *desc.Proto(); +} + } // namespace analysis } // namespace inference } // namespace paddle diff --git a/paddle/fluid/inference/analysis/ir_pass_manager.h b/paddle/fluid/inference/analysis/ir_pass_manager.h index bb230283b7..983a582649 100644 --- a/paddle/fluid/inference/analysis/ir_pass_manager.h +++ b/paddle/fluid/inference/analysis/ir_pass_manager.h @@ -20,27 +20,38 @@ * for inference. */ +#pragma once + +#include +#include #include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/ir/pass.h" #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/framework/scope.h" +#include "paddle/fluid/inference/analysis/argument.h" namespace paddle { namespace inference { namespace analysis { using framework::ProgramDesc; +using framework::ir::Graph; class IRPassManager final { public: - IRPassManager(const ProgramDesc &program, framework::Scope *scope); + explicit IRPassManager(Argument *argument); + + std::unique_ptr Apply(std::unique_ptr graph); - void Apply(const std::vector &passes); + framework::proto::ProgramDesc AcquireProgram( + std::unique_ptr *graph, const ProgramDesc &program) const; framework::ir::Graph &graph() const { return *graph_; } private: - std::unique_ptr graph_; - ProgramDesc program_; + void CreatePasses(Argument *argument, const std::vector &passes); + + std::unique_ptr graph_; + std::vector> passes_; }; } // namespace analysis diff --git a/paddle/fluid/inference/analysis/ir_passes/CMakeLists.txt b/paddle/fluid/inference/analysis/ir_passes/CMakeLists.txt new file mode 100644 index 0000000000..c71cff889e --- /dev/null +++ b/paddle/fluid/inference/analysis/ir_passes/CMakeLists.txt @@ -0,0 +1,7 @@ +cc_library(subgraph_detector SRCS subgraph_detector.cc DEPS proto_desc) +cc_library(tensorrt_subgraph_pass SRCS tensorrt_subgraph_pass.cc DEPS subgraph_detector) +set(analysis_deps ${analysis_deps} + subgraph_detector tensorrt_subgraph_pass + CACHE INTERNAL "") + +set(INFER_IR_PASSES ${INFER_IR_PASSES} tensorrt_subgraph_pass CACHE INTERNAL "") diff --git a/paddle/fluid/inference/analysis/subgraph_splitter.cc b/paddle/fluid/inference/analysis/ir_passes/subgraph_detector.cc similarity index 54% rename from paddle/fluid/inference/analysis/subgraph_splitter.cc rename to paddle/fluid/inference/analysis/ir_passes/subgraph_detector.cc index 3688ea15d9..e903ec54cc 100644 --- a/paddle/fluid/inference/analysis/subgraph_splitter.cc +++ b/paddle/fluid/inference/analysis/ir_passes/subgraph_detector.cc @@ -12,46 +12,110 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/inference/analysis/subgraph_splitter.h" +#include "paddle/fluid/inference/analysis/ir_passes/subgraph_detector.h" +#include +#include +#include "paddle/fluid/framework/ir/graph_helper.h" +#include "paddle/fluid/framework/ir/graph_pattern_detector.h" +#include "paddle/fluid/framework/ir/node.h" namespace paddle { namespace inference { namespace analysis { -const char *SubGraphSplitter::kMarkerAttrName = - "_sub_graph_splitter_inside_sub_graph"; +using framework::ir::Node; + +std::pair, std::vector> +ExtractInputAndOutputOfSubGraph(std::vector &graph) { // NOLINT + std::unordered_set nodes(graph.begin(), graph.end()); + std::unordered_set inputs; + std::unordered_set outputs; + // Input a Value, check whether its inlink is in the subgraph. + auto inlink_in_subgraph = [&](Node *n) { + for (auto *in : n->inputs) { + if (nodes.count(in)) return true; + } + return false; + }; + + for (auto &node : graph) { + for (auto *in : node->inputs) { + // The Value that is written by nodes inside a sub-graph shouldn't be the + // input of the sub-graph. + if (!nodes.count(in) && in->IsVar() && !inlink_in_subgraph(in)) { + inputs.insert(in); + } + } + for (auto *out : node->outputs) { + if (!nodes.count(out) && out->IsVar()) { + outputs.insert(out); + } + } + } + return std::make_pair(std::vector(inputs.begin(), inputs.end()), + std::vector(outputs.begin(), outputs.end())); +} + +// Filter the Intermediate results of the subgraph node. +void FilterRedundantOutputOfSubGraph(Graph *graph) { + std::vector op_nodes; + for (auto &node : TopologicalSort(*graph)) { + if (node.IsVar() || Agent(&node).deleted()) { + continue; + } + op_nodes.push_back(&node); + } + size_t op_num = op_nodes.size(); + for (size_t i = 0; i < op_num; i++) { + if (op_nodes[i]->IsOp()) continue; + std::unordered_set follow_up_input_names; + for (size_t j = i + 1; j < op_num; j++) { + for (auto *in : op_nodes[j]->inputs) { + follow_up_input_names.insert(in->Name()); + } + } + std::vector filtered_subgraph_outlinks; + for (auto *out : op_nodes[i]->outputs) { + if (follow_up_input_names.count(out->Name())) { + filtered_subgraph_outlinks.push_back(out); + } else { + Agent(out).set_deleted(true); + } + } + // The filtered_subgraph_outlinks may be empty. + op_nodes[i]->outputs = filtered_subgraph_outlinks; + } +} -std::vector> SubGraphSplitter::operator()() { +std::vector> SubgraphDetector::operator()() { MarkNodesInsideSubGraph(); return ExtractSubGraphs(); } // Mark the output variables inside a subgraph with the func. -inline void MarkOutLinksInSubGraph(const Function *func) { - for (auto *var : func->outlinks) { - var->attr(SubGraphSplitter::kMarkerAttrName).Bool() = true; +inline void MarkOutLinksInSubGraph(const Node *func) { + for (auto *var : func->outputs) { + Agent(var).set_marked(true); } } -void SubGraphSplitter::MarkNodesInsideSubGraph() { - for (auto &node : GraphTraits(*graph_).nodes()) { +void SubgraphDetector::MarkNodesInsideSubGraph() { + for (auto &node : framework::ir::GraphTraits::DFS(*graph_)) { if (node_inside_subgraph_teller_(&node)) { - node.attr(kMarkerAttrName).Bool() = true; - if (node.type() == Node::Type::kFunction) { + Agent(&node).set_marked(true); + if (node.IsOp()) { // If a function is inside the sub-graph, mark all the output variables // to be inside too, so that two marked functions will be inside a same // sub-graph, lets take a example: A_function->var->B_function, if // A_function is marked, var should also be marked, so that B_function // will be in the same sub-graph with A_function if B_function is // marked. - MarkOutLinksInSubGraph(static_cast(&node)); + MarkOutLinksInSubGraph(&node); } } } } -const char *kUnionFindParent = "_sub_graph_splitter_union_find_parent_"; - // Use the Union Find(UF) algorithm to find fully connected sub-graphs, if node // a's output is node b, that is a and b is in the same sub-graph. The UF // algorithm will group them to the same cluster. @@ -60,8 +124,8 @@ using node_map_t = std::unordered_map; int UnionFindGetAncestor(const node_map_t &node_map, size_t id) { int tmp = id; do { - tmp = node_map.at(tmp)->attr(kUnionFindParent).Int32(); - } while (node_map.at(tmp)->attr(kUnionFindParent).Int32() != tmp); + tmp = Agent(node_map.at(tmp)).union_find_parent(); + } while (Agent(node_map.at(tmp)).union_find_parent() != tmp); return tmp; } // Make this two node share the same ancestor. @@ -69,9 +133,9 @@ int UnionFindGetAncestor(const node_map_t &node_map, size_t id) { void UnionFindCombine(const node_map_t &node_map, size_t a, size_t b) { int a_ancestor = UnionFindGetAncestor(node_map, a); int b_ancestor = UnionFindGetAncestor(node_map, b); - node_map.at(b_ancestor)->attr(kUnionFindParent).Int32() = a_ancestor; - node_map.at(a)->attr(kUnionFindParent).Int32() = a_ancestor; - node_map.at(b)->attr(kUnionFindParent).Int32() = a_ancestor; + Agent(node_map.at(b_ancestor)).set_union_find_parent(a_ancestor); + Agent(node_map.at(a)).set_union_find_parent(a_ancestor); + Agent(node_map.at(b)).set_union_find_parent(a_ancestor); } // This is a simple representation of a graph. @@ -195,16 +259,21 @@ void FlexibleDFS(const std::vector &source, bool reverse, } } -std::vector> SubGraphSplitter::ExtractSubGraphs() { +std::vector> SubgraphDetector::ExtractSubGraphs() { // Run the Extract algorithm to find all subgraphs. std::vector marked_nodes; // We use brief_node_map to represent the original graph in order to avoid // changing the original graph. std::unordered_map brief_node_map; - for (auto &node : GraphTraits(*graph_).nodes_in_TS()) { + std::unordered_set valid_node_ids; + for (auto *node : graph_->Nodes()) { + valid_node_ids.insert(node->id()); + } + + for (auto &node : framework::ir::GraphTraits::TS(*graph_)) { brief_node_map[node.id()] = new BriefNode(&node); - if (node.attr(kMarkerAttrName).Bool()) { + if (Agent(&node).marked()) { marked_nodes.push_back(&node); } } @@ -213,26 +282,34 @@ std::vector> SubGraphSplitter::ExtractSubGraphs() { node_map_t node_map; // id to ptr for (auto *n : marked_nodes) { // n's parent == n.id means it is the ancestor - n->attr(kUnionFindParent).Int32() = n->id(); + Agent(n).set_union_find_parent(n->id()); node_map[n->id()] = n; } // create breif node map for (auto &itr : brief_node_map) { - for (Node *node : itr.second->node->inlinks) { - itr.second->inlinks.push_back(brief_node_map[node->id()]); + for (Node *node : itr.second->node->inputs) { + if (!valid_node_ids.count(node->id())) { + LOG(INFO) << "invalid node id " << node->id(); + continue; + } + itr.second->inlinks.push_back(brief_node_map.at(node->id())); } - for (Node *node : itr.second->node->outlinks) { - itr.second->outlinks.push_back(brief_node_map[node->id()]); + for (Node *node : itr.second->node->outputs) { + if (!valid_node_ids.count(node->id())) { + LOG(INFO) << "invalid node id " << node->id(); + continue; + } + itr.second->outlinks.push_back(brief_node_map.at(node->id())); } } for (auto &itr : brief_node_map) { BriefNode *brief_node = itr.second; - if (!brief_node->node->attr(kMarkerAttrName).Bool()) { - VLOG(40) << brief_node->node->id() << " node not a trt candicate."; + if (!Agent(brief_node->node).marked()) { + VLOG(4) << brief_node->node->id() << " node not a trt candidate."; continue; } @@ -254,7 +331,7 @@ std::vector> SubGraphSplitter::ExtractSubGraphs() { std::unordered_set contract_nodes; for (auto *out : brief_node->outlinks) { // must be an trt candidate - if (!out->node->attr(kMarkerAttrName).Bool()) continue; + if (!Agent(out->node).marked()) continue; // get all dst input nodes except src. std::vector source_nodes; for (auto *n : out->inlinks) { @@ -289,9 +366,8 @@ std::vector> SubGraphSplitter::ExtractSubGraphs() { std::unordered_map> clusters; for (auto *n : marked_nodes) { - if (n->type() == Node::Type::kFunction) { - clusters[UnionFindGetAncestor(node_map, - n->attr(kUnionFindParent).Int32())] + if (n->IsOp()) { + clusters[UnionFindGetAncestor(node_map, Agent(n).union_find_parent())] .push_back(n); } } @@ -304,28 +380,59 @@ std::vector> SubGraphSplitter::ExtractSubGraphs() { return result; } -void SubGraphFuse::operator()() { ReplaceNodesWithSubGraphs(); } +void SubGraphFuser::operator()() { ReplaceNodesWithSubGraphs(); } + +void RemoveIntermediateOutputInSubgraph(const std::vector &subgraph, + Graph *graph, + std::vector *outputs) { + std::unordered_set subgraph_set(subgraph.begin(), subgraph.end()); + std::unordered_set valid_output; + + for (auto *output : *outputs) { + int num_used = 0; + for (auto *node : output->outputs) { + if (!subgraph_set.count(node)) ++num_used; + if (num_used > 0) valid_output.insert(output); + } + } + + outputs->assign(valid_output.begin(), valid_output.end()); +} + +void DetachDeletedNodes(framework::ir::Graph *graph) { + std::unordered_set nodes; + for (auto *node : graph->Nodes()) { + if (Agent(node).deleted()) { + node->inputs.clear(); + node->outputs.clear(); + } + } +} -void SubGraphFuse::ReplaceNodesWithSubGraphs() { - auto subgraphs = SubGraphSplitter(graph_, node_inside_subgraph_teller_)(); +void SubGraphFuser::ReplaceNodesWithSubGraphs() { + auto subgraphs = SubgraphDetector(graph_, node_inside_subgraph_teller_)(); for (auto &subgraph : subgraphs) { - if (subgraph.size() <= argument_->Get("minimum_subgraph_size")) - continue; + if (subgraph.size() <= min_subgraph_size_) continue; + LOG(INFO) << "detect a subgraph size " << subgraph.size(); std::unordered_set subgraph_uniq(subgraph.begin(), subgraph.end()); // replace this sub-graph with the first node. Two steps: 1. Create a Block // Node that contains this subgraph 2. Mark the nodes inside the sub-graph // as deleted. 3. Replace the deleted node with the new Block Node. - auto *block_node = static_cast( - graph_->nodes.Create(Node::Type::kFunctionBlock)); + framework::OpDesc empty_desc; + empty_desc.SetType("tensorrt_engine"); + auto *block_node = graph_->CreateOpNode(&empty_desc); + Agent(block_node).set_subgraph({}); auto io = ExtractInputAndOutputOfSubGraph(subgraph); - block_node->inlinks = std::move(io.first); - block_node->outlinks = std::move(io.second); + block_node->inputs = std::move(io.first); + block_node->outputs = std::move(io.second); + + RemoveIntermediateOutputInSubgraph(subgraph, graph_, &block_node->outputs); for (auto *node : subgraph) { // TODO(Superjomn) need a unified mechanism to treat deleted node in each // pass. - node->SetDeleted(); - block_node->subgraph.push_back(node); + Agent(node).set_deleted(true); + Agent(block_node).subgraph()->push_back(node); } // Change all the sub-graph's inputs and outputs corresponding inlink and @@ -339,16 +446,92 @@ void SubGraphFuse::ReplaceNodesWithSubGraphs() { std::unordered_set uniq(nodes.begin(), nodes.end()); nodes.assign(uniq.begin(), uniq.end()); }; - for (auto *i : block_node->inlinks) { - inlink_or_outlink_cleaner(i->outlinks); + for (auto *i : block_node->inputs) { + inlink_or_outlink_cleaner(i->outputs); } - for (auto *&o : block_node->outlinks) { - inlink_or_outlink_cleaner(o->inlinks); + for (auto *&o : block_node->outputs) { + inlink_or_outlink_cleaner(o->inputs); } } + // DetachDeletedNodes(graph_); FilterRedundantOutputOfSubGraph(graph_); } +inline bool CheckNodeIndegreeEquals(const Node &node, size_t n) { + return node.inputs.size() == n; +} + +NodesTSIterator::NodesTSIterator(const std::vector &source) { + PADDLE_ENFORCE(!source.empty(), + "Start points of topological sorting should not be empty!"); + // CHECK all the inputs' in-degree is 0 + for (auto *node : source) { + PADDLE_ENFORCE(CheckNodeIndegreeEquals(*node, 0)); + } + + std::unordered_set visited; + std::unordered_set to_visit{source.begin(), source.end()}; + + std::vector inlink_visited; + while (!to_visit.empty()) { + std::vector queue(to_visit.begin(), to_visit.end()); + for (auto *p : queue) { + if (Agent(p).deleted()) { + visited.insert(p); + to_visit.erase(p); + } + + inlink_visited.clear(); + + std::copy_if(p->inputs.begin(), p->inputs.end(), + std::back_inserter(inlink_visited), + [&](Node *x) -> bool { return visited.count(x) != 0; }); + + if (inlink_visited.size() == p->inputs.size()) { + sorted_.push_back(p); + for (auto *_ : p->outputs) { + if (!visited.count(_)) { + to_visit.insert(_); + } + } + + to_visit.erase(p); + visited.insert(p); + } + } + } +} + +NodesTSIterator::NodesTSIterator(const NodesTSIterator &other) + : sorted_(other.sorted_), cursor_(other.cursor_) {} + +Node &NodesTSIterator::operator*() { + PADDLE_ENFORCE_LT(cursor_, sorted_.size()); + return *sorted_[cursor_]; +} + +NodesTSIterator &NodesTSIterator::operator++() { + if (++cursor_ >= sorted_.size()) { + sorted_.clear(); + cursor_ = 0; + } + return *this; +} +NodesTSIterator &NodesTSIterator::operator=(const NodesTSIterator &other) { + cursor_ = other.cursor_; + sorted_ = other.sorted_; + return *this; +} + +bool NodesTSIterator::operator==(const NodesTSIterator &other) { + return sorted_ == other.sorted_ && cursor_ == other.cursor_; +} + +Node *NodesTSIterator::operator->() { + PADDLE_ENFORCE_LT(cursor_, sorted_.size()); + return sorted_[cursor_]; +} + } // namespace analysis } // namespace inference } // namespace paddle diff --git a/paddle/fluid/inference/analysis/ir_passes/subgraph_detector.h b/paddle/fluid/inference/analysis/ir_passes/subgraph_detector.h new file mode 100644 index 0000000000..ea88edd042 --- /dev/null +++ b/paddle/fluid/inference/analysis/ir_passes/subgraph_detector.h @@ -0,0 +1,182 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +/* + * This file defines the the class to partition a graph. + */ + +#pragma once + +#include +#include "paddle/fluid/framework/ir/graph.h" +#include "paddle/fluid/framework/ir/graph_traits.h" +#include "paddle/fluid/framework/ir/node.h" +#include "paddle/fluid/inference/analysis/argument.h" +#include "paddle/fluid/inference/analysis/helper.h" + +namespace paddle { +namespace inference { +namespace analysis { + +using framework::ir::Graph; + +const char kIsFunctionNode[] = "__is_function_node__"; +const char kFunctionNodeSubGraph[] = "__function_node_sub_graph__"; +const char kSubgraphSplitterMarkerAttrName[] = + "_sub_graph_splitter_inside_sub_graph"; + +/* + * Detect the nodes in a sub-graph that meet some conditions. This class doesn't + * modify the graph. + */ +class SubgraphDetector { + public: + // Tell whether a node is inside a sub-graph. + using NodeInsideSubgraphTeller = + std::function; + + SubgraphDetector(Graph *graph, const NodeInsideSubgraphTeller &teller) + : graph_(graph), node_inside_subgraph_teller_(teller) {} + + std::vector> operator()(); + + protected: + // Mark the nodes inside the accepted sub-graph using + // node_inside_subgraph_teller. + void MarkNodesInsideSubGraph(); + + // Merge the marked nodes into sub-graphs and return the sub-graphs. + std::vector> ExtractSubGraphs(); + + private: + Graph *graph_; + NodeInsideSubgraphTeller node_inside_subgraph_teller_; +}; + +/* + * SubGraphFuser - Replace some nodes with the sub-graph node they are inside. + * To some extent, the TensorRT engine is just a fusion op for a model. + */ +class SubGraphFuser { + public: + using NodeInsideSubgraphTeller = SubgraphDetector::NodeInsideSubgraphTeller; + + SubGraphFuser(Graph *graph, const NodeInsideSubgraphTeller &teller, + int min_subgraph_size) + : graph_(graph), + node_inside_subgraph_teller_(teller), + min_subgraph_size_{min_subgraph_size} {} + + // The main method which run all the logic. + void operator()(); + + protected: + // Remove the nodes inside sub-graphs and replace with the SubGraphNode. + void ReplaceNodesWithSubGraphs(); + + private: + Graph *graph_; + NodeInsideSubgraphTeller node_inside_subgraph_teller_; + int min_subgraph_size_; +}; + +struct NodeWrapper { + bool deleted{false}; + bool marked{false}; + int union_find_parent{-1}; + std::vector subgraph; +}; + +/* + * ir::Node agent for subgraph detector. + */ +struct Agent { + explicit Agent(framework::ir::Node *x) : x_(x) {} + + NodeWrapper &wrapper() { + if (!x_->IsWrappedBy()) { + x_->WrappedBy(new NodeWrapper); + } + return x_->template Wrapper(); + } + + bool deleted() { return wrapper().deleted; } + void set_deleted(bool x) { wrapper().deleted = x; } + + bool marked() { return wrapper().marked; } + void set_marked(bool x) { wrapper().marked = x; } + + void set_subgraph(const std::vector &x) { + wrapper().subgraph = x; + } + + int union_find_parent() { return wrapper().union_find_parent; } + void set_union_find_parent(int v) { wrapper().union_find_parent = v; } + + std::vector *subgraph() { return &wrapper().subgraph; } + std::vector &inputs() { return x_->inputs; } + std::vector &outputs() { return x_->outputs; } + + private: + framework::ir::Node *x_; +}; + +// Topological sorting iterator on nodes. +struct NodesTSIterator + : public std::iterator { + NodesTSIterator() = default; + explicit NodesTSIterator(const std::vector &source); + NodesTSIterator(NodesTSIterator &&other) + : sorted_(std::move(other.sorted_)), cursor_(other.cursor_) { + other.cursor_ = 0; + } + NodesTSIterator(const NodesTSIterator &other); + + framework::ir::Node &operator*(); + NodesTSIterator &operator++(); + // TODO(Superjomn) current implementation just compare the first + // element, need to compare the graph and all the elements in the queue and + // set. + NodesTSIterator &operator=(const NodesTSIterator &other); + bool operator==(const NodesTSIterator &other); + bool operator!=(const NodesTSIterator &other) { return !(*this == other); } + framework::ir::Node *operator->(); + + private: + std::vector sorted_; + size_t cursor_{0}; +}; + +// The nodes those have no input will be treated as start points. +static std::vector ExtractStartPoints(const Graph &g) { + std::vector result; + for (auto *node : g.Nodes()) { + if (node->inputs.empty()) { + result.push_back(node); + } + } + return result; +} + +static iterator_range TopologicalSort(const Graph &g) { + auto start_points = ExtractStartPoints(g); + PADDLE_ENFORCE(!start_points.empty()); + NodesTSIterator x(start_points); + return iterator_range(NodesTSIterator(start_points), + NodesTSIterator()); +} + +} // namespace analysis +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc b/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc new file mode 100644 index 0000000000..f27347b9d1 --- /dev/null +++ b/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc @@ -0,0 +1,220 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.h" +#include +#include +#include "paddle/fluid/framework/ir/graph_pattern_detector.h" +#include "paddle/fluid/inference/analysis/helper.h" +#include "paddle/fluid/inference/analysis/ir_passes/subgraph_detector.h" + +namespace paddle { +namespace inference { +namespace analysis { + +using framework::ir::Node; + +std::vector ExtractParameters( + const std::unordered_set &nodes); + +std::unique_ptr analysis::TensorRtSubgraphPass::ApplyImpl( + + std::unique_ptr graph) const { + framework::ir::FusePassBase::Init("tensorrt_subgraph_pass", graph.get()); + + auto teller = + Get("tensorrt_node_teller"); + + SubGraphFuser fuser(graph.get(), teller, 2 /*min subgraph size*/); + fuser(); + + for (auto *node : graph->Nodes()) { + if (node->IsOp() && !Agent(node).subgraph()->empty()) { + CreateTensorRTOp(node, graph.get()); + + std::unordered_set nodes2remove( + Agent(node).subgraph()->begin(), Agent(node).subgraph()->end()); + framework::ir::GraphSafeRemoveNodes(graph.get(), nodes2remove); + } + } + + std::unordered_set nodes2remove; + for (auto *node : graph->Nodes()) { + if (node->IsOp() && Agent(node).deleted()) { + nodes2remove.insert(node); + } + } + framework::ir::GraphSafeRemoveNodes(graph.get(), nodes2remove); + + return graph; +} + +void TensorRtSubgraphPass::CreateTensorRTOp(framework::ir::Node *node, + Graph *graph) const { + auto *op_desc = node->Op(); + static int counter{0}; + auto &subgraph = *Agent(node).subgraph(); + PADDLE_ENFORCE(!subgraph.empty()); + + // An fake block desc. + framework::proto::BlockDesc block_proto; + framework::BlockDesc block_desc(nullptr, &block_proto); + block_desc.Proto()->set_parent_idx(-1); + block_desc.Proto()->set_idx(0); + for (auto *node : subgraph) { + auto *op = block_desc.AppendOp(); + *op->Proto() = *node->Op()->Proto(); + } + + // collect inputs + std::unordered_set input_names; + std::unordered_set input_names_with_id; + for (auto *x : node->inputs) { + input_names.insert(x->Name()); + input_names_with_id.insert(x->Name() + std::to_string(x->id())); + } + op_desc->SetInput( + "Xs", std::vector(input_names.begin(), input_names.end())); + + std::unordered_set output_names; + std::unordered_set output_names_with_id; + for (auto *x : node->outputs) { + output_names.insert(x->Name()); + output_names_with_id.insert(x->Name() + std::to_string(x->id())); + } + + op_desc->SetOutput( + "Ys", std::vector(output_names.begin(), output_names.end())); + op_desc->SetType("tensorrt_engine"); + + std::unordered_map output_name_map; + + // The following procedure is used to rename all the intermediate + // variables and the output variables of the subgraph. + // Why we do this? + // During the transition from fluid OP to tensorrt OP, we map + // the input and output Tensor(fluid data structure) of fluid OP + // to the corresponding ITensor (trt data structure) through the + // Tensor name. When we set up ITensor for an variable, we must + // ensure that it has not been set before. + // If there is variable in the fluid graph, which is not only the + // input of a OP, but also the output of a Op, there will be problems. + // So we have to rename the variable in the subgraph to make sure + // it is either an OP's input or an OP's output. + + auto &subgraph_nodes = *Agent(node).subgraph(); + for (int index = 0; index < block_desc.OpSize(); index++) { + framework::proto::OpDesc *op = block_desc.Op(index)->Proto(); + auto correspond_node = subgraph_nodes[index]; + PADDLE_ENFORCE_EQ(correspond_node->Name(), op->type()); + + std::unordered_map var2id; + for (auto *in_var : correspond_node->inputs) { + var2id[in_var->Name()] = in_var->id(); + } + // rename for the input variables of op inside subgraph + for (int i = 0; i < op->inputs_size(); i++) { + // one input + auto *in_var = op->mutable_inputs(i); + std::vector replaced_names; + for (int k = 0; k < in_var->arguments_size(); k++) { // all the arguments + std::string arg_value = in_var->arguments(k); + std::string arg_value_with_id = + arg_value + std::to_string(var2id[arg_value]); + if (input_names_with_id.count(arg_value_with_id)) { + replaced_names.push_back(arg_value); + } else { + replaced_names.push_back(arg_value_with_id); + } + } + in_var->clear_arguments(); + for (size_t k = 0; k < replaced_names.size(); k++) { + in_var->add_arguments(replaced_names[k]); + } + } + var2id.clear(); + for (auto out_var : correspond_node->outputs) { + var2id[out_var->Name()] = out_var->id(); + } + + // rename for the output variables of op inside subgraph + for (int i = 0; i < op->outputs_size(); i++) { + framework::proto::OpDesc_Var *out_var = op->mutable_outputs(i); + std::vector replaced_names; + for (int k = 0; k < out_var->arguments_size(); k++) { + std::string arg_value = out_var->arguments(k); + std::string arg_value_with_id = + arg_value + std::to_string(var2id[arg_value]); + if (output_names_with_id.count(arg_value_with_id)) { + output_name_map[arg_value] = arg_value_with_id; + } + replaced_names.push_back(arg_value_with_id); + } + out_var->clear_arguments(); + for (size_t k = 0; k < replaced_names.size(); k++) { + out_var->add_arguments(replaced_names[k]); + } + } + } + + // When tensorrt engine runs at the end of the operation, + // output_mapping help us copy the data from the renamed ITensor + // to Tensor. + std::vector output_mapping; + for (auto name : output_names) { + // LOG(INFO) << name << " " << output_name_map.size(); + PADDLE_ENFORCE(output_name_map.count(name) != 0); + output_mapping.push_back(output_name_map[name]); + } + + *block_desc.Proto()->mutable_vars() = + const_cast(&graph->program()) + ->Proto() + ->blocks(0) + .vars(); + PADDLE_ENFORCE(!block_desc.Proto()->vars().empty(), + "the block has no var-desc"); + PADDLE_ENFORCE(!output_mapping.empty()); + // Set attrs + SetAttr(op_desc->Proto(), "subgraph", + block_desc.Proto()->SerializeAsString()); + SetAttr(op_desc->Proto(), "max_batch_size", Get("max_batch_size")); + SetAttr(op_desc->Proto(), "workspace_size", Get("workspace_size")); + SetAttr(op_desc->Proto(), "engine_uniq_key", + "trt-" + std::to_string(counter++)); + SetAttr(op_desc->Proto(), "parameters", ExtractParameters(graph->Nodes())); + SetAttr(op_desc->Proto(), "output_name_mapping", output_mapping); +} + +std::vector ExtractParameters( + const std::unordered_set &nodes) { + std::vector parameters; + for (const auto &node : nodes) { + if (!node->IsVar()) continue; + if (node->Var()->Persistable()) { + parameters.push_back(node->Name()); + } + } + return parameters; +} + +} // namespace analysis +} // namespace inference +} // namespace paddle + +REGISTER_PASS(tensorrt_subgraph_pass, + paddle::inference::analysis::TensorRtSubgraphPass) + .RequirePassAttr("tensorrt_node_teller") + .RequirePassAttr("max_batch_size") + .RequirePassAttr("workspace_size"); diff --git a/paddle/fluid/inference/analysis/model_store_pass_tester.cc b/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.h similarity index 55% rename from paddle/fluid/inference/analysis/model_store_pass_tester.cc rename to paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.h index d6493fc25e..502353b95f 100644 --- a/paddle/fluid/inference/analysis/model_store_pass_tester.cc +++ b/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.h @@ -12,31 +12,24 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/inference/analysis/model_store_pass.h" - -#include -#include -#include "paddle/fluid/inference/analysis/analyzer.h" +#pragma once +#include +#include "paddle/fluid/framework/ir/pass.h" namespace paddle { namespace inference { namespace analysis { -DEFINE_string(inference_model_dir, "", "Model path"); - -TEST(DFG_StorePass, test) { - Analyzer analyzer; - Argument argument(FLAGS_inference_model_dir); - argument.model_output_store_path.reset( - new std::string("./_dfg_store_pass_tmp")); - // disable storage in alalyzer - FLAGS_IA_output_storage_path = ""; - analyzer.Run(&argument); +class TensorRtSubgraphPass : public framework::ir::FusePassBase { + public: + std::unique_ptr ApplyImpl( + std::unique_ptr graph) const override; - ModelStorePass pass; - pass.Initialize(&argument); - pass.Run(argument.main_dfg.get()); -} + private: + void CreateTensorRTOp(framework::ir::Node *x, + framework::ir::Graph *graph) const; + void CleanIntermediateOutputs(framework::ir::Node *node); +}; } // namespace analysis } // namespace inference diff --git a/paddle/fluid/inference/analysis/model_store_pass.cc b/paddle/fluid/inference/analysis/model_store_pass.cc deleted file mode 100644 index 4f40a7a1ad..0000000000 --- a/paddle/fluid/inference/analysis/model_store_pass.cc +++ /dev/null @@ -1,67 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include -#include - -#include "paddle/fluid/inference/analysis/analyzer.h" -#include "paddle/fluid/inference/analysis/argument.h" -#include "paddle/fluid/inference/analysis/model_store_pass.h" - -namespace paddle { -namespace inference { -namespace analysis { - -void ModelStorePass::Run(DataFlowGraph *x) { - if (!argument_->fluid_model_param_path) { - PADDLE_ENFORCE_NOT_NULL(argument_->fluid_model_dir); - argument_->fluid_model_param_path.reset( - new std::string(*argument_->fluid_model_dir + "param")); - } - PADDLE_ENFORCE_NOT_NULL(argument_->model_output_store_path); - // Directly copy param file to destination. - std::stringstream ss; - // NOTE these commands only works on linux. - ss << "mkdir -p " << *argument_->model_output_store_path; - VLOG(30) << "run command: " << ss.str(); - PADDLE_ENFORCE_EQ(system(ss.str().c_str()), 0); - ss.str(""); - - ss << "cp " << *argument_->fluid_model_dir << "/*" - << " " << *argument_->model_output_store_path; - VLOG(30) << "run command: " << ss.str(); - PADDLE_ENFORCE_EQ(system(ss.str().c_str()), 0); - - // Store program - PADDLE_ENFORCE_NOT_NULL(argument_->transformed_program_desc, - "program desc is not transformed, should call " - "DataFlowGraphToFluidPass first."); - VLOG(30) << "store analyzed program to " - << *argument_->model_output_store_path; - const std::string program_output_path = - *argument_->model_output_store_path + "/__model__"; - std::ofstream file(program_output_path, std::ios::binary); - PADDLE_ENFORCE(file.is_open(), "failed to open %s to write.", - program_output_path); - const std::string serialized_message = - argument_->transformed_program_desc->SerializeAsString(); - file.write(serialized_message.c_str(), serialized_message.size()); -} - -bool ModelStorePass::Finalize() { return true; } - -} // namespace analysis -} // namespace inference -} // namespace paddle diff --git a/paddle/fluid/inference/analysis/node.cc b/paddle/fluid/inference/analysis/node.cc deleted file mode 100644 index 3339b5044d..0000000000 --- a/paddle/fluid/inference/analysis/node.cc +++ /dev/null @@ -1,70 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/inference/analysis/node.h" -#include "glog/logging.h" -#include "paddle/fluid/platform/enforce.h" - -namespace paddle { -namespace inference { -namespace analysis { - -std::vector Value::dot_attrs() const { - return std::vector({Dot::Attr("style", "filled,rounded"), - Dot::Attr("shape", "box"), - Dot::Attr("fillcolor", "red")}); -} - -std::vector Function::dot_attrs() const { - return std::vector({Dot::Attr("style", "filled,rounded"), - Dot::Attr("shape", "diamond"), - Dot::Attr("fillcolor", "yellow")}); -} - -Node *NodeMap::Create(Node::Type type) { - switch (type) { - case Node::Type::kFunction: - nodes_.emplace_back(new Function); - break; - case Node::Type::kValue: - nodes_.emplace_back(new Value); - break; - case Node::Type::kFunctionBlock: - nodes_.emplace_back(new FunctionBlock); - break; - default: - PADDLE_THROW("Not supported node type."); - } - nodes_.back()->id_ = size() - 1; - return nodes_.back().get(); -} - -Node *NodeMap::GetMutable(size_t id) { - PADDLE_ENFORCE_GT(size(), id); - return nodes_[id].get(); -} - -const Node &NodeMap::Get(size_t id) const { - PADDLE_ENFORCE_GT(size(), id); - return *nodes_[id].get(); -} - -void NodeMap::Delete(size_t id) { - PADDLE_ENFORCE_LT(id, size()); - nodes_[id]->SetDeleted(); -} - -} // namespace analysis -} // namespace inference -} // namespace paddle diff --git a/paddle/fluid/inference/analysis/node.h b/paddle/fluid/inference/analysis/node.h deleted file mode 100644 index af34156bc2..0000000000 --- a/paddle/fluid/inference/analysis/node.h +++ /dev/null @@ -1,244 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -/* - * This file defines the Node class and its subclasses. A Node is the basis - * analysis element in a computation graph. - * There are basically two kinds of nodes, the function node and value node. - */ -#pragma once - -#include -#include -#include -#include -#include - -#include "paddle/fluid/framework/var_type.h" -#include "paddle/fluid/inference/analysis/device.h" -#include "paddle/fluid/inference/analysis/dot.h" -#include "paddle/fluid/inference/analysis/helper.h" -#include "paddle/fluid/platform/variant.h" - -namespace paddle { -namespace inference { -namespace analysis { - -class NodeMap; - -// A helper class to maintain the status from Pass. -struct AnyAttr { - using any_t = - boost::variant; - // NOTE T should be a primary type or a struct combined by several primary - // types. - // NOTE the STL containers should not use here. - // Some usages - // Attr attr; - // attr.Bool() = true; - bool &Bool() { return As(); } - float &Float() { return As(); } - int32_t &Int32() { return As(); } - int64_t &Int64() { return As(); } - void *&Pointer() { return As(); } - std::string &String() { return As(); } - - template - T &As() { - if (type_index_ == typeid(AnyAttr)) { - type_index_ = typeid(T); - any_data_ = T(); - } else { - PADDLE_ENFORCE(type_index_ == typeid(T), "fetch error type"); - } - return boost::get(any_data_); - } - - private: - any_t any_data_; - std::type_index type_index_{typeid(AnyAttr)}; -}; - -/* - * Node Representation. - * - * This is a very important class for analysis. It is the base class of all - * nodes computed by a program that may be used as operands to other nodes. - * Node is the super class of other important classes such as Function and - * Value, some nodes can have a name. - */ -class Node { - public: - // Node type. NOTE the new node types should add here. - enum class Type { kNone = -1, kFunction, kValue, kFunctionBlock }; - - Node() = default; - - // Cast to a subclass type, Function for example. - template - Subclass &As() { - return *dynamic_cast(this); - } - - // Formatted representation of this Node. - virtual std::string repr() const { - return name() + "(" + std::to_string(id()) + ")"; - } - - // DOT node representation. One Node type can customize its own node - // representation. - virtual std::vector dot_attrs() const { - return std::vector({Dot::Attr("style", "filled")}); - } - - // Get an additional attribute and convert it to T data type. NOTE this will - // silently create a new attribute if not exists. - AnyAttr &attr(const std::string &name) const { return attrs_[name]; } - - int id() const { return id_; } - - // The Protobuf description is set/get with a void* to decouple Node interface - // from a specific kind of Protobuf message. - void SetPbDesc(void *pb) { attr("pb_desc").Pointer() = pb; } - void *pb_desc() const { return attr("pb_desc").Pointer(); } - - void SetPbMsg(const std::string &s) { attr("pb_msg").String() = s; } - const std::string &pb_msg() const { return attr("pb_msg").String(); } - - void SetDeleted() { deleted_ = true; } - bool deleted() const { return deleted_; } - - void SetName(const std::string &name) { name_ = name; } - const std::string &name() const { return name_; } - - void SetType(Type type) { type_ = type; } - Type type() const { return type_; } - - // Input links. - std::vector inlinks; - // Output links. - std::vector outlinks; - - // Type checks. - bool IsFunction() const { return type_ == Node::Type::kFunction; } - bool IsValue() const { return type_ == Node::Type::kValue; } - bool IsFunctionBlock() const { return type_ == Node::Type::kFunctionBlock; } - - virtual ~Node() {} - - friend class NodeMap; - - PADDLE_DISALLOW_COPY_AND_ASSIGN(Node); - - protected: - // The id number not the name is a node's unique identifier in the computation - // graph. - int id_{-1}; - std::string name_; - Type type_{Type::kNone}; - // Mark this node is deleted by some pass. - bool deleted_{false}; - mutable std::unordered_map attrs_; -}; - -class Function; -/* - * Value represents a value node, it has some attributes including dims, data - * type and so on. - */ -class Value : public Node { - public: - enum class DataType { kInt32, kInt64, kFloat32, kFloat64 }; - using Dims = std::vector; - - void SetDataType(DataType data_type) { data_type_ = data_type; } - DataType data_type() const { return data_type_; } - - void SetDims(const Dims &dims) { dims_ = dims; } - const Dims &dims() const { return dims_; } - - Device device() const { return device_; } - void SetDevice(Device device) { device_ = device; } - - std::vector dot_attrs() const override; - - PADDLE_DISALLOW_COPY_AND_ASSIGN(Value); - - protected: - Value() { SetType(Node::Type::kValue); } - friend class NodeMap; - - private: - DataType data_type_; - Dims dims_; - Device device_; -}; - -/* - * Function represents any kind of executable concepts that takes several Values - * as input, and outputs several Values. - */ -class Function : public Node { - public: - std::vector dot_attrs() const override; - - // Get the operator's type from Desc. - const std::string &func_type() const { return func_type_; } - // Set the operator's type. - void SetFuncType(const std::string &func_type) { func_type_ = func_type; } - - PADDLE_DISALLOW_COPY_AND_ASSIGN(Function); - - protected: - std::string func_type_; - Function() { SetType(Node::Type::kFunction); } - friend class NodeMap; -}; - -/* - * FunctionBlock is a Node that contains a sub-graph multiple Node. - */ -struct FunctionBlock : public Node { - std::string repr() const override { return "block-" + std::to_string(id()); } - std::vector subgraph; - - protected: - FunctionBlock() { SetType(Node::Type::kFunctionBlock); } - friend class NodeMap; -}; - -class NodeMap { - public: - // Create a new node with type. - Node *Create(Node::Type type); - - // Get a node by its id. - Node *GetMutable(size_t id); - - const Node &Get(size_t id) const; - - void Delete(size_t id); - - const std::vector> &nodes() const { return nodes_; } - - size_t size() const { return nodes_.size(); } - - private: - std::vector> nodes_; - std::unordered_map map_; -}; - -} // namespace analysis -} // namespace inference -} // namespace paddle diff --git a/paddle/fluid/inference/analysis/node_tester.cc b/paddle/fluid/inference/analysis/node_tester.cc deleted file mode 100644 index 9207c15373..0000000000 --- a/paddle/fluid/inference/analysis/node_tester.cc +++ /dev/null @@ -1,55 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/inference/analysis/node.h" - -#include - -namespace paddle { -namespace inference { -namespace analysis { - -TEST(NodeAttr, bool) { - AnyAttr x; - x.Bool() = true; - ASSERT_EQ(x.Bool(), true); -} - -TEST(NodeAttr, int32) { - AnyAttr x; - x.Int32() = 32; - ASSERT_EQ(x.Int32(), 32); -} - -TEST(NodeAttr, string) { - AnyAttr x; - x.String() = "Hello"; - ASSERT_EQ(x.String(), "Hello"); -} - -TEST(Node, Attr) { - // Node is an abstract class, use Value instead for they share the same Attr - // logic. - NodeMap nodes; - auto* node = nodes.Create(Node::Type::kValue); - node->attr("v0").Int32() = 2008; - ASSERT_EQ(node->attr("v0").Int32(), 2008); - - node->attr("str").String() = "hello world"; - ASSERT_EQ(node->attr("str").String(), "hello world"); -} - -} // namespace analysis -} // namespace inference -} // namespace paddle diff --git a/paddle/fluid/inference/analysis/pass_manager.cc b/paddle/fluid/inference/analysis/pass_manager.cc deleted file mode 100644 index ce390ee831..0000000000 --- a/paddle/fluid/inference/analysis/pass_manager.cc +++ /dev/null @@ -1,47 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/inference/analysis/pass_manager.h" -#include "paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.h" -#include "paddle/fluid/string/pretty_log.h" - -namespace paddle { -namespace inference { -namespace analysis { - -bool PassManager::Initialize(Argument* argument) { - argument_ = argument; - for (auto& pass : data_) { - VLOG(30) << "Initializing pass [" << pass->repr() << "]"; - if (!pass->Initialize(argument)) { - LOG(ERROR) << "Failed to initialize pass [" << pass->repr() << "]"; - return false; - } - } - return true; -} - -void DfgPassManager::RunAll() { - PADDLE_ENFORCE(argument_); - VLOG(30) << "Total " << data_.size() << " Analysys passes"; - for (auto& pass : data_) { - string::PrettyLogEndl(string::Style::H1(), "* Running Analysis pass [%s]", - pass->repr()); - pass->Run(argument_->main_dfg.get()); - } -} - -} // namespace analysis -} // namespace inference -} // namespace paddle diff --git a/paddle/fluid/inference/analysis/pass_manager.h b/paddle/fluid/inference/analysis/pass_manager.h deleted file mode 100644 index 412747c4fc..0000000000 --- a/paddle/fluid/inference/analysis/pass_manager.h +++ /dev/null @@ -1,94 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -/* - * This file defines the logic of pass management. The analysis for inference is - * a pipeline of Passes, a PassManager is a agency that helps to manage the - * executation of the Passes. - * - * There are two modes of Passes, the first one is called NodePass and takes - * an Node as input and output; the second one is called DFGPass and takes a - * DFG(Data Flow Graph) as input and output. It is hard to put all the passes in - * the same pipeline, there are two kinds of PassManagers, both takes a DFG as - * input and output a DFG, but the Passes inside are different: - * - * 1. NodePassManager: the passes inside are all NodePasses, it can have - * different graph trivial algorithm, for example, DFS_NodePassManager will - * trigger the passes in depth first order; - * 2. DfgPassManager: the passes inside are all DfgPasses. - */ - -#pragma once - -#include -#include "paddle/fluid/framework/program_desc.h" -#include "paddle/fluid/inference/analysis/analysis_pass.h" - -namespace paddle { -namespace inference { -namespace analysis { - -/* - * PassManager is the base class for all pass managers, a pass manager has - * several Pass-es registered, and execute them in the linear order. - */ -class PassManager : public OrderedRegistry { - public: - PassManager() = default; - // Call all the passes' Initialize methods. The desc and data_flow_graph are - // globally shared, so pass them as the arguemnts for all the pass managers. - virtual bool Initialize(const Argument& argument) { return false; } - - virtual bool Initialize(Argument* argument); - - // Call all the passes' Finalize methods. - virtual bool Finalize() { - for (auto& pass : data_) { - if (!pass->Finalize()) { - LOG(ERROR) << "Failed to finalize pass [" << pass->repr() << "]"; - return false; - } - } - return true; - } - - // Run all the passes. - virtual void RunAll() = 0; - - // Short identifier. - virtual std::string repr() const = 0; - // Long description. - virtual std::string description() const = 0; - - virtual ~PassManager() = default; - - protected: - Argument* argument_{nullptr}; -}; - -/* - * A pass manager that process a DFG. - */ -class DfgPassManager : public PassManager { - public: - DfgPassManager() = default; - - void RunAll() override; - - virtual ~DfgPassManager() = default; -}; - -} // namespace analysis -} // namespace inference -} // namespace paddle diff --git a/paddle/fluid/inference/analysis/pass_manager_tester.cc b/paddle/fluid/inference/analysis/pass_manager_tester.cc deleted file mode 100644 index 72b0fbf7e5..0000000000 --- a/paddle/fluid/inference/analysis/pass_manager_tester.cc +++ /dev/null @@ -1,54 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include - -#include "paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.h" -#include "paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.h" -#include "paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.h" -#include "paddle/fluid/inference/analysis/pass_manager.h" -#include "paddle/fluid/inference/analysis/ut_helper.h" - -namespace paddle { -namespace inference { -namespace analysis { - -class TestDfgPassManager final : public DfgPassManager { - public: - TestDfgPassManager() = default; - virtual ~TestDfgPassManager() = default; - // Short identifier. - std::string repr() const override { return "test-pass-manager"; } - // Long description. - std::string description() const override { return "test doc"; } -}; - -TEST(PassManager, DFG_pass_manager) { - TestDfgPassManager manager; - DFG_GraphvizDrawPass::Config config("./", "dfg.dot"); - - manager.Register("fluid-to-flow-graph", new FluidToDataFlowGraphPass); - manager.Register("graphviz", new DFG_GraphvizDrawPass(config)); - manager.Register("dfg-to-fluid", new DataFlowGraphToFluidPass); - - Argument argument(FLAGS_inference_model_dir); - - ASSERT_TRUE(&argument); - ASSERT_TRUE(manager.Initialize(&argument)); - manager.RunAll(); -} - -} // namespace analysis -} // namespace inference -} // namespace paddle diff --git a/paddle/fluid/inference/analysis/passes/CMakeLists.txt b/paddle/fluid/inference/analysis/passes/CMakeLists.txt new file mode 100644 index 0000000000..a30c27b118 --- /dev/null +++ b/paddle/fluid/inference/analysis/passes/CMakeLists.txt @@ -0,0 +1,9 @@ +cc_library(ir_graph_build_pass SRCS ir_graph_build_pass.cc DEPS analysis_pass argument ir_pass_manager) +cc_library(ir_analysis_pass SRCS ir_analysis_pass.cc DEPS analysis_pass argument ir_pass_manager) +cc_library(analysis_passes SRCS passes.cc DEPS ir_graph_build_pass ir_analysis_pass) + +set(analysis_deps ${analysis_deps} + ir_graph_build_pass + ir_analysis_pass + analysis_passes + CACHE INTERNAL "") diff --git a/paddle/fluid/inference/analysis/passes/ir_analysis_compose_pass.cc b/paddle/fluid/inference/analysis/passes/ir_analysis_compose_pass.cc new file mode 100644 index 0000000000..dc4d0906c4 --- /dev/null +++ b/paddle/fluid/inference/analysis/passes/ir_analysis_compose_pass.cc @@ -0,0 +1,83 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/inference/analysis/passes/ir_analysis_compose_pass.h" +#include +#include +#include "paddle/fluid/framework/ir/fuse_pass_base.h" +#include "paddle/fluid/framework/ir/pass.h" +#include "paddle/fluid/inference/analysis/ir_pass_manager.h" +#include "paddle/fluid/inference/analysis/ir_passes/subgraph_detector.h" +#include "paddle/fluid/string/pretty_log.h" + +namespace paddle { +namespace inference { +namespace analysis { + +void IrAnalysisComposePass::RunImpl(Argument *argument) { + ARGUMENT_CHECK_FIELD(argument, ir_analysis_passes); + if (argument->use_tensorrt_valid() && argument->use_tensorrt()) { + InitTensorRTAttrs(argument); + } + ApplyIrPasses(argument); + CollectFusionStatis(argument); +} + +std::string IrAnalysisComposePass::repr() const { + return "ir-analysis-compose-pass"; +} + +void IrAnalysisComposePass::InitTensorRTAttrs(Argument *argument) { + if (argument->use_tensorrt_valid() && argument->use_tensorrt()) { + LOG(INFO) << "Initing TensorRT pass"; + argument->SetTensorRtNodeTeller([](const framework::ir::Node *node) { + std::unordered_set teller_set( + {"mul", "conv2d", "pool2d", "relu", "softmax", "sigmoid", + "depthwise_conv2d", "batch_norm", "concat", "tanh", "pad", + "elementwise_add", "dropout"}); + if (!node->IsOp()) return false; + + if (teller_set.count(node->Op()->Type())) { + return true; + } else { + return false; + } + }); + } +} + +void IrAnalysisComposePass::ApplyIrPasses(Argument *argument) { + std::vector passes({ + "ir_graph_build_pass", "ir_analysis_pass", + }); + for (const auto &pass : passes) { + VLOG(2) << "Run pass " << pass; + auto *the_pass = PassRegistry::Global().Retreive(pass); + the_pass->Run(argument); + } +} + +void IrAnalysisComposePass::CollectFusionStatis(Argument *argument) { + if (!argument->main_graph().Has(framework::ir::kFuseStatisAttr)) { + LOG(INFO) << "argument has no fuse statis"; + return; + } + argument->SetFusionStatis( + argument->main_graph().Get( + framework::ir::kFuseStatisAttr)); +} + +} // namespace analysis +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/analysis/model_store_pass.h b/paddle/fluid/inference/analysis/passes/ir_analysis_compose_pass.h similarity index 53% rename from paddle/fluid/inference/analysis/model_store_pass.h rename to paddle/fluid/inference/analysis/passes/ir_analysis_compose_pass.h index f14b49e09c..53e2ebb003 100644 --- a/paddle/fluid/inference/analysis/model_store_pass.h +++ b/paddle/fluid/inference/analysis/passes/ir_analysis_compose_pass.h @@ -12,42 +12,35 @@ // See the License for the specific language governing permissions and // limitations under the License. -/* - * This file defines ModelStorePass, which store the runtime DFG to a Paddle - * model in the disk, and that model can be reloaded for prediction. - */ - #pragma once + #include +#include #include "paddle/fluid/inference/analysis/analysis_pass.h" +#include "paddle/fluid/inference/analysis/passes/passes.h" namespace paddle { namespace inference { namespace analysis { -class ModelStorePass : public DataFlowGraphPass { +/* + * The analysis pass to run a list of IR passes (like a function call). + * Currently, it should be the first pass of analysis phase. + */ +class IrAnalysisComposePass : public AnalysisPass { public: - bool Initialize(Argument* argument) override { - if (!argument) { - LOG(ERROR) << "invalid argument"; - return false; - } - argument_ = argument; - return true; - } + void RunImpl(Argument* argument) override; + std::string repr() const override; - void Run(DataFlowGraph* x) override; + private: + void InitTensorRTAttrs(Argument* argument); - std::string repr() const override { return "DFG-store-pass"; } - std::string description() const override { - return R"DD(This file defines ModelStorePass, which store the runtime DFG to a Paddle - model in the disk, and that model can be reloaded for prediction again.)DD"; - } + void ApplyIrPasses(Argument* argument); - bool Finalize() override; + void CollectFusionStatis(Argument* argument); - private: - Argument* argument_{nullptr}; + // Assign a Scope for IR passes to modify the weights. + void AssignScopeToModify(Argument* argument); }; } // namespace analysis diff --git a/paddle/fluid/inference/analysis/passes/ir_analysis_pass.cc b/paddle/fluid/inference/analysis/passes/ir_analysis_pass.cc new file mode 100644 index 0000000000..e327bd39f0 --- /dev/null +++ b/paddle/fluid/inference/analysis/passes/ir_analysis_pass.cc @@ -0,0 +1,43 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/inference/analysis/passes/ir_analysis_pass.h" +#include "paddle/fluid/inference/analysis/ir_pass_manager.h" + +namespace paddle { +namespace inference { +namespace analysis { + +void IrAnalysisPass::RunImpl(Argument* argument) { + ARGUMENT_CHECK_FIELD(argument, ir_analysis_passes); + ARGUMENT_CHECK_FIELD(argument, main_program); + ARGUMENT_CHECK_FIELD(argument, scope); + + auto* the_graph = argument->ReleaseMainGraph(); + auto graph = std::unique_ptr(the_graph); + + // Apply passes. + IRPassManager the_ir_manager(argument); + graph = the_ir_manager.Apply(std::move(graph)); + PADDLE_ENFORCE_GT(graph->Nodes().size(), 0); + argument->SetIrAnalyzedProgram(new framework::proto::ProgramDesc( + the_ir_manager.AcquireProgram(&graph, argument->main_program()))); + argument->SetMainGraph(graph.release()); +} + +std::string IrAnalysisPass::repr() const { return "ir-analysis-pass"; } + +} // namespace analysis +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/analysis/node_attr_flags.h b/paddle/fluid/inference/analysis/passes/ir_analysis_pass.h similarity index 70% rename from paddle/fluid/inference/analysis/node_attr_flags.h rename to paddle/fluid/inference/analysis/passes/ir_analysis_pass.h index a3f70e5419..d8a7449807 100644 --- a/paddle/fluid/inference/analysis/node_attr_flags.h +++ b/paddle/fluid/inference/analysis/passes/ir_analysis_pass.h @@ -12,20 +12,25 @@ // See the License for the specific language governing permissions and // limitations under the License. -/* - * This file contains all the flags that declared in Node::Attr. - * - * The Node::Attr is designed to share information between different passes, one - * can get other's attributes in a Node by the flags in this file. - */ #pragma once + +#include +#include "paddle/fluid/inference/analysis/analysis_pass.h" + namespace paddle { namespace inference { namespace analysis { -#define DECLARE_NODE_ATTR(flag__) const char ATTR_##flag__[] = #flag__; - -DECLARE_NODE_ATTR(supported_by_tensorrt) // bool +/* + * Perform IR analysis passes. + * + * It is used to fuse some + */ +class IrAnalysisPass : public AnalysisPass { + public: + void RunImpl(Argument* argument) override; + std::string repr() const override; +}; } // namespace analysis } // namespace inference diff --git a/paddle/fluid/inference/analysis/passes/ir_graph_build_pass.cc b/paddle/fluid/inference/analysis/passes/ir_graph_build_pass.cc new file mode 100644 index 0000000000..a30fef08b5 --- /dev/null +++ b/paddle/fluid/inference/analysis/passes/ir_graph_build_pass.cc @@ -0,0 +1,73 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/inference/analysis/passes/ir_graph_build_pass.h" +#include +#include +#include "paddle/fluid/framework/executor.h" +#include "paddle/fluid/inference/io.h" +#include "paddle/fluid/platform/enforce.h" + +namespace paddle { +namespace inference { + +extern void ReadBinaryFile(const std::string &filename, std::string *contents); + +namespace analysis { + +void IrGraphBuildPass::RunImpl(Argument *argument) { + if (!argument->scope_valid()) { + argument->SetScope(new framework::Scope); + } + + if (argument->model_dir_valid()) { + auto program = LoadModel(argument->model_dir(), argument->scope_ptr()); + argument->SetMainProgram(program.release()); + } else if (argument->model_program_path_valid() && + argument->model_params_path_valid()) { + auto program = + LoadModel(argument->model_program_path(), argument->model_params_path(), + argument->scope_ptr()); + argument->SetMainProgram(program.release()); + } else { + PADDLE_THROW( + "either model_dir or (program path and parameter path) should be set."); + } + + auto graph = std::unique_ptr(new Graph(argument->main_program())); + argument->SetMainGraph(graph.release()); + argument->main_graph().Set(framework::ir::kParamScopeAttr, + new framework::Scope *(argument->scope_ptr())); +} + +std::unique_ptr IrGraphBuildPass::LoadModel( + const std::string &path, framework::Scope *scope) { + platform::CPUPlace place; + framework::Executor exe(place); + return Load(&exe, scope, path); +} + +std::unique_ptr IrGraphBuildPass::LoadModel( + const std::string &program_path, const std::string ¶ms_path, + framework::Scope *scope) { + platform::CPUPlace place; + framework::Executor exe(place); + return Load(&exe, scope, program_path, params_path); +} + +std::string IrGraphBuildPass::repr() const { return "ir-graph-build-pass"; } + +} // namespace analysis +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/analysis/passes/ir_graph_build_pass.h b/paddle/fluid/inference/analysis/passes/ir_graph_build_pass.h new file mode 100644 index 0000000000..3291e4f6ad --- /dev/null +++ b/paddle/fluid/inference/analysis/passes/ir_graph_build_pass.h @@ -0,0 +1,46 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include "paddle/fluid/framework/scope.h" +#include "paddle/fluid/inference/analysis/analysis_pass.h" + +namespace paddle { +namespace inference { +namespace analysis { + +/* + * Load program and parameter to memory from the disk. + */ +class IrGraphBuildPass : public AnalysisPass { + public: + void RunImpl(Argument *argument) override; + + std::string repr() const override; + + private: + std::unique_ptr LoadModel(const std::string &path, + framework::Scope *scope); + std::unique_ptr LoadModel( + const std::string &program_path, const std::string ¶ms_path, + framework::Scope *scope); + + std::string model_binary_str_; +}; + +} // namespace analysis +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/analysis/passes/passes.cc b/paddle/fluid/inference/analysis/passes/passes.cc new file mode 100644 index 0000000000..2ef515f45f --- /dev/null +++ b/paddle/fluid/inference/analysis/passes/passes.cc @@ -0,0 +1,34 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/inference/analysis/passes/passes.h" +#include "paddle/fluid/inference/analysis/passes/ir_analysis_compose_pass.cc" +#include "paddle/fluid/inference/analysis/passes/ir_analysis_pass.h" +#include "paddle/fluid/inference/analysis/passes/ir_graph_build_pass.h" + +namespace paddle { +namespace inference { +namespace analysis { +PassRegistry::PassRegistry() { + passes_.emplace("ir_analysis_pass", + std::unique_ptr(new IrAnalysisPass)); + passes_.emplace("ir_graph_build_pass", + std::unique_ptr(new IrGraphBuildPass)); + passes_.emplace("ir_analysis_compose_pass", + std::unique_ptr(new IrAnalysisComposePass)); +} + +} // namespace analysis +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/analysis/fluid_to_ir_pass_tester.cc b/paddle/fluid/inference/analysis/passes/passes.h similarity index 61% rename from paddle/fluid/inference/analysis/fluid_to_ir_pass_tester.cc rename to paddle/fluid/inference/analysis/passes/passes.h index 367c25805d..ea07e0dcbd 100644 --- a/paddle/fluid/inference/analysis/fluid_to_ir_pass_tester.cc +++ b/paddle/fluid/inference/analysis/passes/passes.h @@ -12,24 +12,30 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/inference/analysis/fluid_to_ir_pass.h" +#pragma once -#include -#include "paddle/fluid/inference/analysis/ut_helper.h" -#include "paddle/fluid/inference/api/paddle_inference_pass.h" +#include +#include "paddle/fluid/inference/analysis/analysis_pass.h" namespace paddle { namespace inference { namespace analysis { -TEST(FluidToIrPass, Test) { - FluidToIrPass pass; - Argument argument(FLAGS_inference_model_dir); - argument.Set(kFluidToIrPassesAttr, - new std::vector({"infer_clean_graph_pass"})); - pass.Initialize(&argument); - pass.Run(argument.main_dfg.get()); -} +struct PassRegistry { + PassRegistry(); + + AnalysisPass* Retreive(const std::string& pass_type) { + return passes_[pass_type].get(); + } + + static PassRegistry& Global() { + static auto* x = new PassRegistry; + return *x; + } + + private: + std::unordered_map> passes_; +}; } // namespace analysis } // namespace inference diff --git a/paddle/fluid/inference/analysis/subgraph_splitter.h b/paddle/fluid/inference/analysis/subgraph_splitter.h deleted file mode 100644 index 76e4fda024..0000000000 --- a/paddle/fluid/inference/analysis/subgraph_splitter.h +++ /dev/null @@ -1,88 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -/* - * This file defines the the class to partition a graph. - */ - -#pragma once - -#include - -#include "paddle/fluid/inference/analysis/argument.h" -#include "paddle/fluid/inference/analysis/data_flow_graph.h" -#include "paddle/fluid/inference/analysis/node.h" - -namespace paddle { -namespace inference { -namespace analysis { - -/* - * Detect the nodes in a sub-graph that meet some conditions. This class doesn't - * modify the graph. - */ -class SubGraphSplitter { - public: - static const char *kMarkerAttrName; - // Tell whether a node is inside a sub-graph. - using NodeInsideSubgraphTeller = std::function; - - SubGraphSplitter(DataFlowGraph *graph, const NodeInsideSubgraphTeller &teller) - : graph_(graph), node_inside_subgraph_teller_(teller) {} - - std::vector> operator()(); - - protected: - // Mark the nodes inside the accepted sub-graph using - // node_inside_subgraph_teller. - void MarkNodesInsideSubGraph(); - - // Merge the marked nodes into sub-graphs and return the sub-graphs. - std::vector> ExtractSubGraphs(); - - private: - DataFlowGraph *graph_; - NodeInsideSubgraphTeller node_inside_subgraph_teller_; -}; - -/* - * SubGraphFuse - Replace some nodes with the sub-graph node they are inside. To - * some extent, the TensorRT engine is just a fusion op for a model. - */ -class SubGraphFuse { - public: - using NodeInsideSubgraphTeller = SubGraphSplitter::NodeInsideSubgraphTeller; - - SubGraphFuse(DataFlowGraph *graph, const NodeInsideSubgraphTeller &teller, - Argument *argument) - : graph_(graph), - node_inside_subgraph_teller_(teller), - argument_(argument) {} - - // The main method which run all the logic. - void operator()(); - - protected: - // Remove the nodes inside sub-graphs and replace with the SubGraphNode. - void ReplaceNodesWithSubGraphs(); - - private: - DataFlowGraph *graph_; - NodeInsideSubgraphTeller node_inside_subgraph_teller_; - Argument *argument_; -}; - -} // namespace analysis -} // namespace inference -} // namespace paddle diff --git a/paddle/fluid/inference/analysis/subgraph_splitter_tester.cc b/paddle/fluid/inference/analysis/subgraph_splitter_tester.cc deleted file mode 100644 index e1dc89fab5..0000000000 --- a/paddle/fluid/inference/analysis/subgraph_splitter_tester.cc +++ /dev/null @@ -1,92 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/inference/analysis/subgraph_splitter.h" -#include "paddle/fluid/inference/analysis/ut_helper.h" - -namespace paddle { -namespace inference { -namespace analysis { - -SubGraphSplitter::NodeInsideSubgraphTeller teller = [](const Node* node) { - if (node->type() != Node::Type::kFunction) return false; - const auto* func = static_cast(node); - if (func->func_type() == "elementwise_add" || func->func_type() == "relu" || - func->func_type() == "conv2d" || func->func_type() == "mul" || - func->func_type() == "sigmoid" || func->func_type() == "softmax") { - LOG(INFO) << "sub-graph marked " << node->repr(); - return true; - } - return false; -}; - -TEST(SubGraphSplitter, Split) { - auto desc = LoadProgramDesc(FLAGS_inference_model_dir + "/__model__"); - auto dfg = ProgramDescToDFG(desc); - LOG(INFO) << "spliter\n" << dfg.DotString(); - - ASSERT_GT(dfg.nodes.size(), 5UL); - - auto subgraphs = SubGraphSplitter(&dfg, teller)(); - - // Check the number of the marked nodes. - int marked_nodes = 0; - for (auto& node : dfg.nodes.nodes()) { - if (node->IsFunction() && - node->attr(SubGraphSplitter::kMarkerAttrName).Bool()) { - ++marked_nodes; - } - } - EXPECT_EQ(marked_nodes, 6); - - // For human debug. - for (auto& subgraph : subgraphs) { - LOG(INFO) << "subgraph size " << subgraph.size(); - for (auto* node : subgraph) { - LOG(INFO) << "node " << node->repr(); - } - } - - ASSERT_EQ(subgraphs.size(), 1UL); - // The last sub-graph has 5 Functions. - ASSERT_EQ(subgraphs.back().size(), 6UL); -} - -TEST(SubGraphSplitter, Fuse) { - auto desc = LoadProgramDesc(FLAGS_inference_model_dir + "/__model__"); - auto dfg = ProgramDescToDFG(desc); - Argument argument; - argument.Set("minimum_subgraph_size", new int(3)); - - size_t count0 = dfg.nodes.size(); - - SubGraphFuse fuse(&dfg, teller, &argument); - fuse(); - - int count1 = 0; - for (auto& node : dfg.nodes.nodes()) { - if (node->deleted()) { - LOG(INFO) << "deleted " << node->repr(); - } - count1 += node->deleted(); - } - - // At least one nodes should be deleted. - ASSERT_EQ(dfg.nodes.size(), count0 + 1); // added a new FunctionBlock - ASSERT_EQ(11, count1); -} - -} // namespace analysis -} // namespace inference -} // namespace paddle diff --git a/paddle/fluid/inference/analysis/tensorrt_subgraph_node_mark_pass.cc b/paddle/fluid/inference/analysis/tensorrt_subgraph_node_mark_pass.cc deleted file mode 100644 index 174c8513f9..0000000000 --- a/paddle/fluid/inference/analysis/tensorrt_subgraph_node_mark_pass.cc +++ /dev/null @@ -1,80 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include - -#include "paddle/fluid/inference/analysis/analyzer.h" -#include "paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.h" -#include "paddle/fluid/inference/analysis/node_attr_flags.h" -#include "paddle/fluid/inference/analysis/tensorrt_subgraph_node_mark_pass.h" - -namespace paddle { -namespace inference { -namespace analysis { - -void TensorRTSubgraphNodeMarkPass::Run(DataFlowGraph *graph) { - for (auto &node : graph->nodes.nodes()) { - node->attr(ATTR_supported_by_tensorrt).Bool() = teller_(node.get()); - } -} - -class DfgDebuggerPass : public DFG_GraphvizDrawPass { - public: - explicit DfgDebuggerPass(const DFG_GraphvizDrawPass::Config &config) - : DFG_GraphvizDrawPass(config) {} - - std::string repr() const override { - return "tensorrt-subgraph-node-mark-debugger"; - } - - bool Finalize() override { return true; } - - protected: - std::string Draw(DataFlowGraph *graph) override { - Dot dot; - // Add nodes - for (size_t i = 0; i < graph->nodes.size(); i++) { - const Node &node = graph->nodes.Get(i); - if (config_.display_deleted_node || !node.deleted()) { - auto dot_attr = node.dot_attrs(); - if (node.attr(ATTR_supported_by_tensorrt).Bool()) { - dot_attr.assign( - {Dot::Attr{"color", "green"}, Dot::Attr{"style", "filled"}}); - } - dot.AddNode(node.repr(), dot_attr); - } - } - // Add edges - for (size_t i = 0; i < graph->nodes.size(); i++) { - const Node &node = graph->nodes.Get(i); - if (!config_.display_deleted_node && node.deleted()) continue; - for (auto &in : node.inlinks) { - if (!config_.display_deleted_node && in->deleted()) continue; - dot.AddEdge(in->repr(), node.repr(), {}); - } - } - return dot.Build(); - } -}; - -AnalysisPass *TensorRTSubgraphNodeMarkPass::CreateGraphvizDebugerPass() const { - DFG_GraphvizDrawPass::Config config(FLAGS_IA_graphviz_log_root, - "tensorrt_marked_node"); - return new DfgDebuggerPass(config); -} -bool TensorRTSubgraphNodeMarkPass::Finalize() { return true; } - -} // namespace analysis -} // namespace inference -} // namespace paddle diff --git a/paddle/fluid/inference/analysis/tensorrt_subgraph_node_mark_pass.h b/paddle/fluid/inference/analysis/tensorrt_subgraph_node_mark_pass.h deleted file mode 100644 index c881a54c24..0000000000 --- a/paddle/fluid/inference/analysis/tensorrt_subgraph_node_mark_pass.h +++ /dev/null @@ -1,60 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -/* - * This file defines TensorRTSubgraphNodeMarkPass which helps to mark the ops - * that supported by TensorRT engine. - */ - -#pragma once - -#include -#include "paddle/fluid/inference/analysis/analysis_pass.h" -#include "paddle/fluid/inference/analysis/subgraph_splitter.h" - -namespace paddle { -namespace inference { -namespace analysis { - -/* - * Mark the operators that TensorRT engine supports. - */ -class TensorRTSubgraphNodeMarkPass : public DataFlowGraphPass { - public: - using teller_t = SubGraphSplitter::NodeInsideSubgraphTeller; - - explicit TensorRTSubgraphNodeMarkPass(const teller_t& teller) - : teller_(teller) {} - - bool Initialize(Argument* argument) override { return true; } - - // This class get a sub-graph as input and determine whether to transform this - // sub-graph into TensorRT. - void Run(DataFlowGraph* graph) override; - - std::string repr() const override { return "tensorrt-sub-subgraph-mark"; } - std::string description() const override { - return "tensorrt sub-graph mark pass"; - } - - AnalysisPass* CreateGraphvizDebugerPass() const override; - bool Finalize() override; - - private: - teller_t teller_; -}; - -} // namespace analysis -} // namespace inference -} // namespace paddle diff --git a/paddle/fluid/inference/analysis/tensorrt_subgraph_node_mark_pass_tester.cc b/paddle/fluid/inference/analysis/tensorrt_subgraph_node_mark_pass_tester.cc deleted file mode 100644 index c1d932878e..0000000000 --- a/paddle/fluid/inference/analysis/tensorrt_subgraph_node_mark_pass_tester.cc +++ /dev/null @@ -1,50 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/inference/analysis/tensorrt_subgraph_node_mark_pass.h" - -#include -#include "paddle/fluid/inference/analysis/node_attr_flags.h" -#include "paddle/fluid/inference/analysis/ut_helper.h" - -namespace paddle { -namespace inference { -namespace analysis { - -TEST(TensorRTSubgraphNodeMarkPass, test) { - // init - FluidToDataFlowGraphPass pass; - Argument argument(FLAGS_inference_model_dir); - ASSERT_TRUE(pass.Initialize(&argument)); - pass.Run(argument.main_dfg.get()); - - TensorRTSubgraphNodeMarkPass::teller_t teller = [](const Node* node) { - return node->IsFunction() && - static_cast(node)->func_type() == "mul"; - }; - TensorRTSubgraphNodeMarkPass pass1(teller); - ASSERT_TRUE(pass1.Initialize(&argument)); - pass1.Run(argument.main_dfg.get()); - - int counter{0}; - for (auto& node : argument.main_dfg->nodes.nodes()) { - counter += node->attr(ATTR_supported_by_tensorrt).Bool(); - } - ASSERT_EQ(counter, 2); - LOG(INFO) << counter << " nodes marked"; -} - -} // namespace analysis -} // namespace inference -} // namespace paddle diff --git a/paddle/fluid/inference/analysis/tensorrt_subgraph_pass.cc b/paddle/fluid/inference/analysis/tensorrt_subgraph_pass.cc deleted file mode 100644 index 3aa65f223a..0000000000 --- a/paddle/fluid/inference/analysis/tensorrt_subgraph_pass.cc +++ /dev/null @@ -1,36 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/inference/analysis/tensorrt_subgraph_pass.h" -#include "paddle/fluid/inference/analysis/subgraph_splitter.h" - -namespace paddle { -namespace inference { -namespace analysis { - -TensorRTSubGraphPass::TensorRTSubGraphPass( - const TensorRTSubGraphPass::NodeInsideSubgraphTeller &teller) - : node_inside_subgraph_teller_(teller) {} - -void TensorRTSubGraphPass::Run(DataFlowGraph *graph) { - SubGraphFuse(graph, node_inside_subgraph_teller_, argument_)(); - VLOG(40) << "debug info " - << graph->HumanReadableInfo(false /*show_values*/, - true /*show_functions*/); -} - -} // namespace analysis -} // namespace inference - -} // namespace paddle diff --git a/paddle/fluid/inference/analysis/tensorrt_subgraph_pass.h b/paddle/fluid/inference/analysis/tensorrt_subgraph_pass.h deleted file mode 100644 index 3545da9109..0000000000 --- a/paddle/fluid/inference/analysis/tensorrt_subgraph_pass.h +++ /dev/null @@ -1,57 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include -#include "paddle/fluid/inference/analysis/analysis_pass.h" -#include "paddle/fluid/inference/analysis/node.h" -#include "paddle/fluid/inference/analysis/subgraph_splitter.h" - -namespace paddle { -namespace inference { -namespace analysis { - -/* - * Parse the graph and replace TensorRT supported nodes with SubGraphNode - */ -class TensorRTSubGraphPass : public DataFlowGraphPass { - public: - // Tell whether to transform a sub-graph into TensorRT. - using NodeInsideSubgraphTeller = SubGraphFuse::NodeInsideSubgraphTeller; - - explicit TensorRTSubGraphPass(const NodeInsideSubgraphTeller& teller); - - bool Initialize(Argument* argument) override { - argument_ = argument; - return true; - } - - // This class get a sub-graph as input and determine whether to transform this - // sub-graph into TensorRT. - void Run(DataFlowGraph* graph) override; - - bool Finalize() override { return true; } - - std::string repr() const override { return "tensorrt-sub-graph"; } - std::string description() const override { return "tensorrt sub graph pass"; } - - private: - NodeInsideSubgraphTeller node_inside_subgraph_teller_; - Argument* argument_; -}; - -} // namespace analysis -} // namespace inference -} // namespace paddle diff --git a/paddle/fluid/inference/analysis/tensorrt_subgraph_pass_tester.cc b/paddle/fluid/inference/analysis/tensorrt_subgraph_pass_tester.cc deleted file mode 100644 index 9748e24b06..0000000000 --- a/paddle/fluid/inference/analysis/tensorrt_subgraph_pass_tester.cc +++ /dev/null @@ -1,73 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/inference/analysis/tensorrt_subgraph_pass.h" - -#include -#include -#include "paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.h" -#include "paddle/fluid/inference/analysis/ut_helper.h" - -namespace paddle { -namespace inference { -namespace analysis { - -DEFINE_string(dot_dir, "./", ""); - -TEST(TensorRTSubGraphPass, main) { - std::unordered_set teller_set( - {"elementwise_add", "mul", "sigmoid"}); - SubGraphSplitter::NodeInsideSubgraphTeller teller = [&](const Node* node) { - if (node->type() != Node::Type::kFunction) return false; - const auto* func = static_cast(node); - if (teller_set.count(func->func_type())) return true; - return false; - }; - - Argument argument(FLAGS_inference_model_dir); - argument.Set("minimum_subgraph_size", new int(0)); - argument.Set("max_batch_size", new int(3)); - argument.Set("workspace_size", new int(1 << 20)); - argument.Set("precision_mode", new std::string("FP32")); - - DFG_GraphvizDrawPass::Config config{FLAGS_dot_dir, "origin"}; - DFG_GraphvizDrawPass::Config config1{FLAGS_dot_dir, "fusion"}; - - DFG_GraphvizDrawPass dfg_pass(config); - DFG_GraphvizDrawPass dfg_pass1(config1); - FluidToDataFlowGraphPass pass0; - TensorRTSubGraphPass trt_pass(std::move(teller)); - - dfg_pass.Initialize(&argument); - dfg_pass1.Initialize(&argument); - pass0.Initialize(&argument); - trt_pass.Initialize(&argument); - - argument.main_dfg.reset(new DataFlowGraph); - pass0.Run(argument.main_dfg.get()); - dfg_pass.Run(argument.main_dfg.get()); - trt_pass.Run(argument.main_dfg.get()); - dfg_pass1.Run(argument.main_dfg.get()); - - // Check the TRT op's block desc - for (auto& node : argument.main_dfg->nodes.nodes()) { - if (node->IsFunctionBlock()) { - LOG(INFO) << "get function block"; - } - } -} - -} // namespace analysis -} // namespace inference -} // namespace paddle diff --git a/paddle/fluid/inference/analysis/ut_helper.h b/paddle/fluid/inference/analysis/ut_helper.h index 1073a6f686..d599099a80 100644 --- a/paddle/fluid/inference/analysis/ut_helper.h +++ b/paddle/fluid/inference/analysis/ut_helper.h @@ -18,8 +18,6 @@ limitations under the License. */ #include #include #include "paddle/fluid/framework/executor.h" -#include "paddle/fluid/inference/analysis/data_flow_graph.h" -#include "paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.h" #include "paddle/fluid/inference/analysis/helper.h" namespace paddle { @@ -32,29 +30,6 @@ namespace analysis { DEFINE_string(inference_model_dir, "", "inference test model dir"); -static DataFlowGraph ProgramDescToDFG( - const framework::proto::ProgramDesc& desc) { - DataFlowGraph graph; - FluidToDataFlowGraphPass pass; - Argument argument; - argument.fluid_model_dir.reset(new std::string(FLAGS_inference_model_dir)); - argument.origin_program_desc.reset(new framework::proto::ProgramDesc(desc)); - pass.Initialize(&argument); - pass.Run(&graph); - pass.Finalize(); - return graph; -} - -class DFG_Tester : public ::testing::Test { - protected: - void SetUp() override { - auto desc = LoadProgramDesc(FLAGS_inference_model_dir + "/__model__"); - argument.origin_program_desc.reset(new framework::proto::ProgramDesc(desc)); - } - - Argument argument; -}; - } // namespace analysis } // namespace inference } // namespace paddle diff --git a/paddle/fluid/inference/api/CMakeLists.txt b/paddle/fluid/inference/api/CMakeLists.txt index fd05c96777..82f74a269a 100644 --- a/paddle/fluid/inference/api/CMakeLists.txt +++ b/paddle/fluid/inference/api/CMakeLists.txt @@ -17,17 +17,22 @@ if(APPLE) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=pessimizing-move") endif(APPLE) -set(inference_deps paddle_inference_api paddle_fluid_api analysis pass ir_pass_manager naive_executor ${GLOB_PASS_LIB}) + +set(inference_deps paddle_inference_api paddle_fluid_api analysis pass ir_pass_manager naive_executor analysis_predictor ${GLOB_PASS_LIB}) if(WITH_GPU AND TENSORRT_FOUND) - set(inference_deps ${inference_deps} paddle_inference_tensorrt_subgraph_engine analysis_predictor) + set(inference_deps ${inference_deps} tensorrt_engine tensorrt_converter) endif() cc_library(reset_tensor_array SRCS details/reset_tensor_array.cc DEPS lod_tensor scope) -cc_library(paddle_inference_api SRCS api.cc api_impl.cc helper.cc DEPS reset_tensor_array lod_tensor scope) -cc_library(analysis_predictor SRCS analysis_predictor.cc DEPS paddle_inference_api analysis naive_executor zero_copy_tensor) +cc_library(analysis_config SRCS analysis_config.cc DEPS lod_tensor paddle_pass_builder) +cc_library(paddle_pass_builder SRCS paddle_pass_builder.cc) +cc_library(paddle_inference_api SRCS api.cc api_impl.cc helper.cc DEPS lod_tensor scope paddle_pass_builder reset_tensor_array analysis_config analysis_config paddle_pass_builder) +cc_library(analysis_predictor SRCS analysis_predictor.cc DEPS paddle_inference_api analysis naive_executor zero_copy_tensor reset_tensor_array analysis_config paddle_pass_builder) cc_library(zero_copy_tensor SRCS details/zero_copy_tensor.cc DEPS paddle_inference_api) cc_library(zero_copy_tensor_dummy SRCS details/zero_copy_tensor_dummy.cc DEPS paddle_inference_api) + + cc_test(test_paddle_inference_api SRCS api_tester.cc DEPS paddle_inference_api) @@ -40,20 +45,10 @@ endif() cc_test(test_analysis_predictor SRCS analysis_predictor_tester.cc DEPS analysis_predictor ${inference_deps} ARGS --dirname=${WORD2VEC_MODEL_DIR}) -if(WITH_GPU AND TENSORRT_FOUND) -cc_library(paddle_inference_tensorrt_subgraph_engine - SRCS api_tensorrt_subgraph_engine.cc - DEPS paddle_inference_api analysis tensorrt_engine paddle_inference_api paddle_fluid_api tensorrt_converter zero_copy_tensor_dummy) - if(WITH_TESTING) - inference_base_test(test_api_tensorrt_subgraph_engine SRCS api_tensorrt_subgraph_engine_tester.cc DEPS ${inference_deps} - ARGS --dirname=${WORD2VEC_MODEL_DIR}) - endif() -endif() - if (WITH_ANAKIN AND WITH_MKL) # only needed in CI # compile the libinference_anakin_api.a and anakin.so. - cc_library(inference_anakin_api SRCS api.cc api_anakin_engine.cc DEPS anakin_shared anakin_saber mklml scope zero_copy_tensor_dummy) - cc_library(inference_anakin_api_shared SHARED SRCS api.cc api_anakin_engine.cc DEPS anakin_shared anakin_saber scope) + cc_library(inference_anakin_api SRCS api.cc api_anakin_engine.cc DEPS anakin_shared anakin_saber mklml zero_copy_tensor_dummy) + cc_library(inference_anakin_api_shared SHARED SRCS api.cc api_anakin_engine.cc DEPS anakin_shared anakin_saber zero_copy_tensor_dummy) function(anakin_target target_name) target_compile_options(${target_name} BEFORE PUBLIC ${ANAKIN_COMPILE_EXTRA_FLAGS}) endfunction() diff --git a/paddle/fluid/inference/api/README.md b/paddle/fluid/inference/api/README.md index 20969fac6c..a2d685d723 100644 --- a/paddle/fluid/inference/api/README.md +++ b/paddle/fluid/inference/api/README.md @@ -2,25 +2,15 @@ Paddle inference offers the APIs in `C` and `C++` languages. -One can easily deploy a model trained by Paddle following the steps as below: +You can easily deploy a model trained by Paddle following the steps as below: 1. Optimize the native model; 2. Write some codes for deployment. +## The APIs -Let's explain the steps in detail. - -## Optimize the native Fluid Model - -The native model that get from the training phase needs to be optimized for that. - -- Clean the noise such as the cost operators that do not need inference; -- Prune unnecessary computation fork that has nothing to do with the output; -- Remove extraneous variables; -- Memory reuse for native Fluid executor; -- Translate the model storage format to some third-party engine's, so that the inference API can utilize the engine for acceleration; - -We have an official tool to do the optimization, call `paddle_inference_optimize --help` for more information. +All the released APIs are located in the `paddle_inference_api.h` header file. +The stable APIs are wrapped by `namespace paddle`, the unstable APIs are protected by `namespace paddle::contrib`. ## Write some codes diff --git a/paddle/fluid/inference/api/analysis_config.cc b/paddle/fluid/inference/api/analysis_config.cc new file mode 100644 index 0000000000..5ccd2dc5ab --- /dev/null +++ b/paddle/fluid/inference/api/analysis_config.cc @@ -0,0 +1,103 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/framework/lod_tensor.h" +#include "paddle/fluid/framework/scope.h" +#include "paddle/fluid/inference/api/paddle_inference_api.h" +#include "paddle/fluid/platform/enforce.h" +#include "paddle_pass_builder.h" // NOLINT + +namespace paddle { + +PassStrategy *contrib::AnalysisConfig::pass_builder() const { + PADDLE_ENFORCE( + pass_builder_.get(), + "Should call constructor first, that will init the pass_builder_."); + return pass_builder_.get(); +} + +contrib::AnalysisConfig::AnalysisConfig(bool use_gpu) { + this->use_gpu = use_gpu; + if (use_gpu) { + pass_builder_.reset(new GpuPassStrategy); + } else { + pass_builder_.reset(new CpuPassStrategy); + } +} + +contrib::AnalysisConfig::AnalysisConfig(const contrib::AnalysisConfig &other) { + // fields from Config + model_dir = other.model_dir; + // fields from NativeConfig + use_gpu = other.use_gpu; + device = other.device; + fraction_of_gpu_memory = other.fraction_of_gpu_memory; + prog_file = other.prog_file; + param_file = other.param_file; + specify_input_name = other.specify_input_name; + // fields from this. + enable_ir_optim = other.enable_ir_optim; + use_feed_fetch_ops = other.use_feed_fetch_ops; + use_tensorrt_ = other.use_tensorrt_; + tensorrt_max_batchsize_ = other.tensorrt_max_batchsize_; + tensorrt_workspace_size_ = other.tensorrt_workspace_size_; + + if (use_gpu) { + pass_builder_.reset(new GpuPassStrategy( + *static_cast(other.pass_builder()))); + } else { + pass_builder_.reset(new CpuPassStrategy( + *static_cast(other.pass_builder()))); + } +} + +contrib::AnalysisConfig::AnalysisConfig(contrib::AnalysisConfig &&other) { + // fields from Config + model_dir = other.model_dir; + // fields from NativeConfig + use_gpu = other.use_gpu; + device = other.device; + fraction_of_gpu_memory = other.fraction_of_gpu_memory; + prog_file = other.prog_file; + param_file = other.param_file; + specify_input_name = other.specify_input_name; + // fields from this. + enable_ir_optim = other.enable_ir_optim; + use_feed_fetch_ops = other.use_feed_fetch_ops; + use_tensorrt_ = other.use_tensorrt_; + tensorrt_max_batchsize_ = other.tensorrt_max_batchsize_; + tensorrt_workspace_size_ = other.tensorrt_workspace_size_; + pass_builder_ = std::move(other.pass_builder_); +} + +void contrib::AnalysisConfig::EnableMKLDNN() { +#ifdef PADDLE_WITH_MKLDNN + pass_builder()->EnableMKLDNN(); + use_mkldnn_ = true; +#else + LOG(ERROR) << "Please compile with MKLDNN first to use MKLDNN"; + use_mkldnn_ = false; +#endif +} + +void contrib::AnalysisConfig::EnableTensorRtEngine(int workspace_size, + int max_batch_size) { + use_tensorrt_ = true; + tensorrt_workspace_size_ = workspace_size; + tensorrt_max_batchsize_ = max_batch_size; + // Append after the infer_clean pass. + pass_builder()->InsertPass(1, "tensorrt_subgraph_pass"); +} + +} // namespace paddle diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index dd295854a8..7407a1ba2f 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -13,10 +13,13 @@ // limitations under the License. #include "paddle/fluid/inference/api/analysis_predictor.h" +#include +#include #include #include #include #include "paddle/fluid/framework/feed_fetch_method.h" +#include "paddle/fluid/framework/feed_fetch_type.h" #include "paddle/fluid/framework/ir/fuse_pass_base.h" #include "paddle/fluid/framework/ir/pass.h" #include "paddle/fluid/framework/naive_executor.h" @@ -24,6 +27,9 @@ #include "paddle/fluid/inference/api/helper.h" #include "paddle/fluid/inference/api/paddle_inference_api.h" #include "paddle/fluid/inference/api/paddle_inference_pass.h" +#if PADDLE_WITH_TENSORRT +#include "paddle/fluid/inference/tensorrt/convert/op_converter.h" +#endif #include "paddle/fluid/inference/utils/singleton.h" #include "paddle/fluid/platform/cpu_helper.h" #include "paddle/fluid/platform/profiler.h" @@ -35,6 +41,17 @@ namespace paddle { using contrib::AnalysisConfig; +namespace { +bool IsPersistable(const framework::VarDesc *var) { + if (var->Persistable() && + var->GetType() != framework::proto::VarType::FEED_MINIBATCH && + var->GetType() != framework::proto::VarType::FETCH_LIST) { + return true; + } + return false; +} +} // namespace + bool AnalysisPredictor::Init( const std::shared_ptr &parent_scope, const std::shared_ptr &program) { @@ -52,36 +69,93 @@ bool AnalysisPredictor::Init( // no matter with or without MKLDNN paddle::platform::SetNumThreads(FLAGS_paddle_num_threads); - if (config_.use_gpu) { - place_ = paddle::platform::CUDAPlace(config_.device); - LOG(WARNING) << "ir optimize only supports CPU currently, enable_ir_optim " - "is turned false."; - config_.enable_ir_optim = false; - } else { - place_ = paddle::platform::CPUPlace(); + if (!PrepareScope(parent_scope)) { + return false; + } + if (!CreateExecutor()) { + return false; + } + if (!PrepareProgram(program)) { + return false; + } + + // Prepare executor, create local variables. + if (!PrepareExecutor()) { + return true; } + + // Get the feed_target_names and fetch_target_names + PrepareFeedFetch(); + + return true; +} + +bool AnalysisPredictor::PrepareScope( + const std::shared_ptr &parent_scope) { if (parent_scope) { + PADDLE_ENFORCE_NOT_NULL( + parent_scope, + "Both program and parent_scope should be set in Clone mode."); scope_ = parent_scope; - sub_scope_ = &(parent_scope->NewScope()); + status_is_cloned_ = true; } else { paddle::framework::InitDevices(false); scope_.reset(new paddle::framework::Scope()); + status_is_cloned_ = false; } - - executor_.reset(new paddle::framework::NaiveExecutor(place_)); - + sub_scope_ = &scope_->NewScope(); + return true; +} +bool AnalysisPredictor::PrepareProgram( + const std::shared_ptr &program) { if (!program) { if (!LoadProgramDesc()) return false; - OptimizeInferenceProgram(); + + // Optimize the program, and load parameters and modify them in the + // scope_. + // This will change the scope_ address. + if (config_.enable_ir_optim) { + status_ir_optim_enabled_ = true; + OptimizeInferenceProgram(); + } else { + // If the parent_scope is passed, we assert that the persistable variables + // are already created, so just create the no persistable variables. + + // If not cloned, the parameters should be loaded + // OptimizeInferenceProgram. + // So in both cases, just the local variables are needed to load, not the + // parematers. + executor_->CreateVariables(*inference_program_, 0, true, sub_scope_); + + // Load parameters + LOG(INFO) << "load parameters "; + LoadParameters(); + } } else { + // If the program is passed from external, no need to optimize it, this + // logic is used in the clone scenario. inference_program_ = program; } - executor_->Prepare(scope_.get(), *inference_program_, 0, + executor_->CreateVariables(*inference_program_, 0, false, sub_scope_); + + return true; +} +bool AnalysisPredictor::CreateExecutor() { + if (config_.use_gpu) { + status_use_gpu_ = true; + place_ = paddle::platform::CUDAPlace(config_.device); + } else { + place_ = paddle::platform::CPUPlace(); + } + executor_.reset(new paddle::framework::NaiveExecutor(place_)); + return true; +} +bool AnalysisPredictor::PrepareExecutor() { + executor_->Prepare(sub_scope_, *inference_program_, 0, config_.use_feed_fetch_ops); - // Get the feed_target_names and fetch_target_names - PrepareFeedFetch(); + PADDLE_ENFORCE_NOT_NULL(sub_scope_); return true; } @@ -206,54 +280,40 @@ bool AnalysisPredictor::GetFetch(std::vector *outputs, return true; } +// NOTE All the members in AnalysisConfig should be copied to Argument. void AnalysisPredictor::OptimizeInferenceProgram() { - LOG(INFO) << "optimize begin"; - FLAGS_IA_enable_ir = config_.enable_ir_optim; - FLAGS_IA_enable_tensorrt_subgraph_engine = false; - FLAGS_IA_output_storage_path = ""; // Don't output the model. + status_program_optimized_ = true; + + argument_.SetUseGPU(config_.use_gpu); // Analyze inference_program if (!config_.model_dir.empty()) { - argument_.fluid_model_dir.reset(new std::string(config_.model_dir)); + argument_.SetModelDir(config_.model_dir); } else { PADDLE_ENFORCE( !config_.param_file.empty(), "Either model_dir or (param_file, prog_file) should be set."); PADDLE_ENFORCE(!config_.prog_file.empty()); - argument_.fluid_model_program_path.reset( - new std::string(config_.prog_file)); - argument_.fluid_model_param_path.reset(new std::string(config_.param_file)); + argument_.SetModelProgramPath(config_.prog_file); + argument_.SetModelParamsPath(config_.param_file); } - argument_.origin_program_desc.reset( - new ProgramDesc(*inference_program_->Proto())); - - switch (config_.ir_mode) { - case contrib::AnalysisConfig::IrPassMode::kExclude: - Analyzer() - .IncludeAllIrPasses() - .SetUseMkldnn(config_._use_mkldnn) - .DisableIrPasses(config_.ir_passes) - .Run(&argument_); - break; - case contrib::AnalysisConfig::IrPassMode::kInclude: - Analyzer() - .SetUseMkldnn(config_._use_mkldnn) - .IncludeIrPasses(config_.ir_passes) - .Run(&argument_); - break; - default: - LOG(ERROR) << "Only kExclude and kInclude modes are supoorted yet."; + if (config_.use_gpu && config_.use_tensorrt_) { + argument_.SetUseTensorRT(true); + argument_.SetTensorRtWorkspaceSize(config_.tensorrt_workspace_size_); + argument_.SetTensorRtMaxBatchSize(config_.tensorrt_max_batchsize_); } - CHECK(argument_.transformed_program_desc); - VLOG(50) << "to prepare executor"; + auto passes = config_.pass_builder()->AllPasses(); + if (!config_.enable_ir_optim) passes.clear(); + argument_.SetIrAnalysisPasses(passes); + argument_.SetScopeNotOwned(const_cast(scope_.get())); + Analyzer().Run(&argument_); + + PADDLE_ENFORCE(argument_.scope_valid()); + VLOG(5) << "to prepare executor"; + ARGUMENT_CHECK_FIELD((&argument_), ir_analyzed_program); inference_program_.reset( - new framework::ProgramDesc(*argument_.transformed_program_desc)); - if (argument_.Has(framework::ir::kParamScopeAttr)) { - // Update scope. - scope_.reset( - argument_.Release(framework::ir::kParamScopeAttr)); - } + new framework::ProgramDesc(argument_.ir_analyzed_program())); LOG(INFO) << "== optimize end =="; } @@ -283,10 +343,12 @@ std::unique_ptr CreatePaddlePredictor< if (!dynamic_cast(predictor.get())->Init(nullptr)) { return nullptr; } - return predictor; + return std::move(predictor); } void AnalysisPredictor::PrepareFeedFetch() { + PADDLE_ENFORCE_NOT_NULL(sub_scope_); + CreateFeedFetchVar(sub_scope_); for (auto *op : inference_program_->Block(0).AllOps()) { if (op->Type() == "feed") { int idx = boost::get(op->GetAttr("col")); @@ -305,6 +367,14 @@ void AnalysisPredictor::PrepareFeedFetch() { } } +void AnalysisPredictor::CreateFeedFetchVar(framework::Scope *scope) { + PADDLE_ENFORCE_NOT_NULL(scope); + auto *var = scope->Var("feed"); + var->GetMutable(); + var = scope->Var("fetch"); + var->GetMutable(); +} + std::unique_ptr AnalysisPredictor::GetInputTensor( const std::string &name) { PADDLE_ENFORCE(executor_->scope()->FindVar(name), "no name called %s", name); @@ -335,27 +405,98 @@ bool AnalysisPredictor::ZeroCopyRun() { bool AnalysisPredictor::LoadProgramDesc() { // Initialize the inference program - std::unique_ptr tmp_exe( - new framework::Executor(platform::CPUPlace())); + std::string filename; if (!config_.model_dir.empty()) { - // Parameters are saved in separate files sited in - // the specified `dirname`. - inference_program_ = paddle::inference::Load( - static_cast(tmp_exe.get()), scope_.get(), - config_.model_dir); + filename = config_.model_dir + "/__model__"; } else if (!config_.prog_file.empty() && !config_.param_file.empty()) { // All parameters are saved in a single file. // The file names should be consistent with that used // in Python API `fluid.io.save_inference_model`. - inference_program_ = paddle::inference::Load( - static_cast(tmp_exe.get()), scope_.get(), - config_.prog_file, config_.param_file); + filename = config_.prog_file; } else { + if (config_.model_dir.empty() && config_.prog_file.empty()) { + LOG(ERROR) + << "Either model_dir or (prog_file, param_file) should be set."; + return false; + } LOG(ERROR) << string::Sprintf( "not valid model path '%s' or program path '%s'.", config_.model_dir, config_.param_file); return false; } + + std::string pb_content; + // Read binary + std::ifstream fin(filename, std::ios::in | std::ios::binary); + PADDLE_ENFORCE(static_cast(fin), "Cannot open file %s", filename); + fin.seekg(0, std::ios::end); + + pb_content.resize(fin.tellg()); + fin.seekg(0, std::ios::beg); + fin.read(&(pb_content.at(0)), pb_content.size()); + fin.close(); + + // Create ProgramDesc + framework::proto::ProgramDesc proto; + proto.ParseFromString(pb_content); + inference_program_.reset(new framework::ProgramDesc(proto)); + return true; +} + +bool AnalysisPredictor::LoadParameters() { + PADDLE_ENFORCE_NOT_NULL(inference_program_.get(), + "The inference program should be loaded first."); + const auto &global_block = inference_program_->MutableBlock(0); + + // create a temporary program to load parameters. + + std::unique_ptr load_program( + new framework::ProgramDesc()); + framework::BlockDesc *load_block = load_program->MutableBlock(0); + std::vector params; + + for (auto *var : global_block->AllVars()) { + if (IsPersistable(var)) { + VLOG(3) << "persistable variable's name: " << var->Name(); + + framework::VarDesc *new_var = load_block->Var(var->Name()); + new_var->SetShape(var->GetShape()); + new_var->SetDataType(var->GetDataType()); + new_var->SetType(var->GetType()); + new_var->SetLoDLevel(var->GetLoDLevel()); + new_var->SetPersistable(true); + + if (!config_.param_file.empty()) { + params.push_back(new_var->Name()); + } else { + // append_op + framework::OpDesc *op = load_block->AppendOp(); + op->SetType("load"); + op->SetOutput("Out", {new_var->Name()}); + op->SetAttr("file_path", {config_.model_dir + "/" + new_var->Name()}); + op->CheckAttrs(); + } + } + } + + if (!config_.param_file.empty()) { + // sort paramlist to have consistent ordering + std::sort(params.begin(), params.end()); + // append just the load_combine op + framework::OpDesc *op = load_block->AppendOp(); + op->SetType("load_combine"); + op->SetOutput("Out", params); + op->SetAttr("file_path", {config_.param_file}); + op->CheckAttrs(); + } + + // Use NaiveExecutor to Load parameters. + platform::CPUPlace place; + framework::NaiveExecutor e(place); + e.Prepare(scope_.get(), *load_program, 0, false); + e.Run(); + VLOG(3) << "get " << scope_->LocalVarNames().size() << " vars after load"; + return true; } @@ -385,3 +526,26 @@ std::unique_ptr CreatePaddlePredictor( } } // namespace paddle + +#if PADDLE_WITH_TENSORRT +USE_TRT_CONVERTER(elementwise_add_weight); +USE_TRT_CONVERTER(elementwise_add_tensor); +USE_TRT_CONVERTER(elementwise_sub_tensor); +USE_TRT_CONVERTER(elementwise_div_tensor); +USE_TRT_CONVERTER(elementwise_mul_tensor); +USE_TRT_CONVERTER(elementwise_max_tensor); +USE_TRT_CONVERTER(elementwise_min_tensor); +USE_TRT_CONVERTER(elementwise_pow_tensor); +USE_TRT_CONVERTER(mul); +USE_TRT_CONVERTER(conv2d); +USE_TRT_CONVERTER(relu); +USE_TRT_CONVERTER(sigmoid); +USE_TRT_CONVERTER(tanh); +USE_TRT_CONVERTER(fc); +USE_TRT_CONVERTER(pool2d); +USE_TRT_CONVERTER(softmax); +USE_TRT_CONVERTER(batch_norm); +USE_TRT_CONVERTER(concat); +USE_TRT_CONVERTER(dropout); +USE_TRT_CONVERTER(pad); +#endif diff --git a/paddle/fluid/inference/api/analysis_predictor.h b/paddle/fluid/inference/api/analysis_predictor.h index a9f4cce6df..cf81b7db73 100644 --- a/paddle/fluid/inference/api/analysis_predictor.h +++ b/paddle/fluid/inference/api/analysis_predictor.h @@ -23,7 +23,10 @@ #include "paddle/fluid/inference/api/details/reset_tensor_array.h" #include "paddle/fluid/inference/api/paddle_inference_api.h" #include "paddle/fluid/string/printf.h" - +#ifdef PADDLE_WITH_TESTING +#include +#include +#endif namespace paddle { using inference::analysis::Argument; @@ -54,6 +57,7 @@ class AnalysisPredictor : public PaddlePredictor { bool ZeroCopyRun() override; + void CreateFeedFetchVar(framework::Scope *scope); void PrepareFeedFetch(); void OptimizeInferenceProgram(); @@ -62,11 +66,17 @@ class AnalysisPredictor : public PaddlePredictor { std::unique_ptr Clone() override; - framework::Scope *scope() { return executor_->scope(); } + framework::Scope *scope() { return scope_.get(); } framework::ProgramDesc &program() { return *inference_program_; } protected: + bool PrepareProgram(const std::shared_ptr &program); + bool PrepareScope(const std::shared_ptr &parent_scope); + bool CreateExecutor(); + bool PrepareExecutor(); + bool LoadProgramDesc(); + bool LoadParameters(); bool SetFeed(const std::vector &input_datas, framework::Scope *scope); @@ -77,6 +87,14 @@ class AnalysisPredictor : public PaddlePredictor { PaddleTensor *output_data); ~AnalysisPredictor(); +// Some more detailed tests, they are made the friends of the predictor, so that +// the all the details can be tested. +#if PADDLE_WITH_TESTING + FRIEND_TEST(AnalysisPredictor, analysis_off); + FRIEND_TEST(AnalysisPredictor, analysis_on); + FRIEND_TEST(AnalysisPredictor, with_gpu); +#endif + private: contrib::AnalysisConfig config_; Argument argument_; @@ -92,6 +110,13 @@ class AnalysisPredictor : public PaddlePredictor { // concurrency problems, so cache them. std::vector feed_tensors_; details::TensorArrayBatchCleaner tensor_array_batch_cleaner_; + + private: + // Some status here that help to determine the status inside the predictor. + bool status_program_optimized_{false}; + bool status_is_cloned_{false}; + bool status_use_gpu_{false}; + bool status_ir_optim_enabled_{false}; }; } // namespace paddle diff --git a/paddle/fluid/inference/api/analysis_predictor_tester.cc b/paddle/fluid/inference/api/analysis_predictor_tester.cc index f75c45f3a0..1e6f75e364 100644 --- a/paddle/fluid/inference/api/analysis_predictor_tester.cc +++ b/paddle/fluid/inference/api/analysis_predictor_tester.cc @@ -12,16 +12,85 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "paddle/fluid/inference/api/analysis_predictor.h" #include #include +#include +#include "paddle/fluid/inference/api/helper.h" #include "paddle/fluid/inference/api/paddle_inference_api.h" DEFINE_string(dirname, "", "dirname to tests."); namespace paddle { -namespace inference { using contrib::AnalysisConfig; +TEST(AnalysisPredictor, analysis_off) { + AnalysisConfig config(false); + config.model_dir = FLAGS_dirname; + config.enable_ir_optim = false; + + auto _predictor = CreatePaddlePredictor(config); + auto* predictor = static_cast(_predictor.get()); + + // Without analysis, the scope_ and sub_scope_ are created by predictor + // itself. + ASSERT_TRUE(predictor->scope_); + ASSERT_TRUE(predictor->sub_scope_); + ASSERT_EQ(predictor->scope_->parent(), nullptr); + ASSERT_EQ(predictor->sub_scope_->parent(), predictor->scope_.get()); + // ir is turned off, so program shouldn't be optimized. + ASSERT_FALSE(predictor->status_program_optimized_); + LOG(INFO) << "scope parameters " << predictor->scope_->LocalVarNames().size(); + + // 2. Dummy Input Data + int64_t data[4] = {1, 2, 3, 4}; + PaddleTensor tensor; + tensor.shape = std::vector({4, 1}); + tensor.data.Reset(data, sizeof(data)); + tensor.dtype = PaddleDType::INT64; + + std::vector inputs(4, tensor); + std::vector outputs; + ASSERT_TRUE(predictor->Run(inputs, &outputs)); +} + +TEST(AnalysisPredictor, analysis_on) { + AnalysisConfig config(false); + config.model_dir = FLAGS_dirname; + config.enable_ir_optim = true; + + auto _predictor = CreatePaddlePredictor(config); + auto* predictor = static_cast(_predictor.get()); + + ASSERT_TRUE(predictor->scope_); + ASSERT_TRUE(predictor->sub_scope_); + ASSERT_EQ(predictor->scope_->parent(), nullptr); + ASSERT_EQ(predictor->sub_scope_->parent(), predictor->scope_.get()); + // ir is turned on, so program should be optimized. + ASSERT_TRUE(predictor->status_program_optimized_); + // 2. Dummy Input Data + int64_t data[4] = {1, 2, 3, 4}; + PaddleTensor tensor; + tensor.shape = std::vector({4, 1}); + tensor.data.Reset(data, sizeof(data)); + tensor.dtype = PaddleDType::INT64; + + std::vector inputs(4, tensor); + std::vector outputs; + ASSERT_TRUE(predictor->Run(inputs, &outputs)); + + for (auto& output : outputs) { + LOG(INFO) << inference::DescribeTensor(output); + } + + // compare with NativePredictor + auto naive_predictor = CreatePaddlePredictor(config); + std::vector naive_outputs; + ASSERT_TRUE(naive_predictor->Run(inputs, &naive_outputs)); + ASSERT_EQ(naive_outputs.size(), 1UL); + inference::CompareTensor(outputs.front(), naive_outputs.front()); +} + TEST(AnalysisPredictor, ZeroCopy) { AnalysisConfig config; config.model_dir = FLAGS_dirname; @@ -61,5 +130,59 @@ TEST(AnalysisPredictor, ZeroCopy) { LOG(INFO) << "output_data: " << out_data; } -} // namespace inference +TEST(AnalysisPredictor, Clone) { + AnalysisConfig config; + config.model_dir = FLAGS_dirname; + config.use_feed_fetch_ops = true; + config.enable_ir_optim = true; + + std::vector> predictors; + predictors.emplace_back(CreatePaddlePredictor(config)); + + LOG(INFO) << "************** to clone ************************"; + const int num_threads = 3; + for (int i = 1; i < num_threads; i++) { + predictors.emplace_back(predictors.front()->Clone()); + } + + auto* root_scope = + static_cast(predictors[0].get())->scope(); + ASSERT_FALSE(root_scope->kids().empty()); + LOG(INFO) << "***** scope ******\n" + << framework::GenScopeTreeDebugInfo(root_scope); + + // 2. Dummy Input Data + int64_t data[4] = {1, 2, 3, 4}; + PaddleTensor tensor; + tensor.shape = std::vector({4, 1}); + tensor.data.Reset(data, sizeof(data)); + tensor.dtype = PaddleDType::INT64; + + std::vector inputs(4, tensor); + std::vector outputs; + predictors[0]->Run(inputs, &outputs); + + LOG(INFO) << "Run with single thread"; + for (int i = 0; i < num_threads; i++) { + LOG(INFO) << "run predictor " << i; + ASSERT_TRUE(predictors[i]->Run(inputs, &outputs)); + } + + LOG(INFO) << "Run with multiple threads"; + std::vector threads; + for (int i = 0; i < num_threads; i++) { + threads.emplace_back([&predictors, &inputs, i] { + LOG(INFO) << "thread #" << i << " running"; + std::vector outputs; + for (int j = 0; j < 10; j++) { + ASSERT_TRUE(predictors[i]->Run(inputs, &outputs)); + } + }); + } + + for (auto& t : threads) { + t.join(); + } +} + } // namespace paddle diff --git a/paddle/fluid/inference/api/api.cc b/paddle/fluid/inference/api/api.cc index 20fab8078f..9be059c73e 100644 --- a/paddle/fluid/inference/api/api.cc +++ b/paddle/fluid/inference/api/api.cc @@ -15,6 +15,7 @@ #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/scope.h" #include "paddle/fluid/inference/api/paddle_inference_api.h" +#include "paddle/fluid/inference/api/paddle_pass_builder.h" #include "paddle/fluid/platform/enforce.h" namespace paddle { diff --git a/paddle/fluid/inference/api/api_anakin_engine.h b/paddle/fluid/inference/api/api_anakin_engine.h index 04536ea3a5..6a8b81cc57 100644 --- a/paddle/fluid/inference/api/api_anakin_engine.h +++ b/paddle/fluid/inference/api/api_anakin_engine.h @@ -19,11 +19,13 @@ limitations under the License. */ #pragma once +#define WITH_ANAKIN + #include #include "framework/core/net/net.h" #include "framework/graph/graph.h" -#include "paddle/fluid/inference/api/paddle_inference_api.h" +#include "paddle/fluid/inference/api/paddle_anakin_config.h" #include "saber/core/shape.h" #include "saber/saber_types.h" diff --git a/paddle/fluid/inference/api/api_impl_tester.cc b/paddle/fluid/inference/api/api_impl_tester.cc index 5152b8670d..014bdc6a37 100644 --- a/paddle/fluid/inference/api/api_impl_tester.cc +++ b/paddle/fluid/inference/api/api_impl_tester.cc @@ -292,7 +292,14 @@ TEST(inference_api_native, image_classification_gpu) { // TEST(inference_api_native, image_classification_gpu_threads) { // MainThreadsImageClassification(true /*use_gpu*/); // } - #endif +TEST(PassBuilder, Delete) { + contrib::AnalysisConfig config(false); + config.pass_builder()->DeletePass("attention_lstm_fuse_pass"); + const auto& passes = config.pass_builder()->AllPasses(); + auto it = std::find(passes.begin(), passes.end(), "attention_lstm_fuse_pass"); + ASSERT_EQ(it, passes.end()); +} + } // namespace paddle diff --git a/paddle/fluid/inference/api/api_tensorrt_subgraph_engine.cc b/paddle/fluid/inference/api/api_tensorrt_subgraph_engine.cc deleted file mode 100644 index 94b3933497..0000000000 --- a/paddle/fluid/inference/api/api_tensorrt_subgraph_engine.cc +++ /dev/null @@ -1,188 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/inference/analysis/analyzer.h" -#include "paddle/fluid/inference/api/api_impl.h" -#include "paddle/fluid/inference/api/paddle_inference_api.h" -#include "paddle/fluid/inference/tensorrt/convert/op_converter.h" -#include "paddle/fluid/inference/utils/singleton.h" -#include "paddle/fluid/operators/tensorrt_engine_op.h" - -namespace paddle { - -using inference::analysis::Argument; -using inference::Singleton; -using inference::analysis::Analyzer; -using framework::proto::ProgramDesc; -using paddle::contrib::MixedRTConfig; - -class TensorRTSubgraphPredictor : public NativePaddlePredictor { - public: - explicit TensorRTSubgraphPredictor(const MixedRTConfig& config) - : NativePaddlePredictor(config), config_(config) {} - - bool Init(const std::shared_ptr& parent_scope) { - FLAGS_IA_enable_tensorrt_subgraph_engine = true; - VLOG(30) << "Predictor::init()"; - if (config_.use_gpu) { - place_ = paddle::platform::CUDAPlace(config_.device); - } else { - place_ = paddle::platform::CPUPlace(); - } - if (parent_scope) { - scope_ = parent_scope; - sub_scope_ = &(parent_scope->NewScope()); - } else { - paddle::framework::InitDevices(false); - scope_.reset(new paddle::framework::Scope()); - } - - executor_.reset(new paddle::framework::Executor(place_)); - - // Initialize the inference program - if (!config_.model_dir.empty()) { - // Parameters are saved in separate files sited in - // the specified `dirname`. - inference_program_ = paddle::inference::Load( - executor_.get(), scope_.get(), config_.model_dir); - } else if (!config_.prog_file.empty() && !config_.param_file.empty()) { - // All parameters are saved in a single file. - // The file names should be consistent with that used - // in Python API `fluid.io.save_inference_model`. - inference_program_ = paddle::inference::Load( - executor_.get(), scope_.get(), config_.prog_file, config_.param_file); - } else { - LOG(ERROR) << "fail to load inference model."; - return false; - } - - OptimizeInferenceProgram(); - ctx_ = executor_->Prepare(*inference_program_, 0); - - VLOG(50) << "to create variables"; - executor_->CreateVariables(*inference_program_, - sub_scope_ ? sub_scope_ : scope_.get(), 0); - // Get the feed_target_names and fetch_target_names - PrepareFeedFetch(); - return true; - } - - bool Run(const std::vector& inputs, - std::vector* output_data, - int batch_size = -1) override { - PADDLE_ENFORCE_GT(batch_size, 0, - "TensorRT engine needs the argument batch_size set"); - FLAGS_tensorrt_engine_batch_size = batch_size; - return NativePaddlePredictor::Run(inputs, output_data, batch_size); - } - - void OptimizeInferenceProgram() { - // Analyze inference_program - Argument argument; - - argument.Set("minimum_subgraph_size", - new int(config_.minimum_subgraph_size)); - argument.Set("max_batch_size", new int(config_.max_batch_size)); - argument.Set("workspace_size", new int(config_.workspace_size)); - argument.Set("precision_mode", - new std::string(config_.precision_mode)); - - if (!config_.model_dir.empty()) { - argument.fluid_model_dir.reset(new std::string(config_.model_dir)); - } else { - PADDLE_ENFORCE( - !config_.param_file.empty(), - "Either model_dir or (param_file, prog_file) should be set."); - PADDLE_ENFORCE(!config_.prog_file.empty()); - argument.fluid_model_program_path.reset( - new std::string(config_.prog_file)); - argument.fluid_model_param_path.reset( - new std::string(config_.param_file)); - } - argument.origin_program_desc.reset( - new ProgramDesc(*inference_program_->Proto())); - Singleton::Global().Run(&argument); - CHECK(argument.transformed_program_desc); - VLOG(50) << "transformed program:\n" - << argument.transformed_program_desc->SerializeAsString(); - VLOG(50) << "to prepare executor"; - inference_program_.reset( - new framework::ProgramDesc(*argument.transformed_program_desc)); - } - - private: - MixedRTConfig config_; -}; - -template <> -std::unique_ptr -CreatePaddlePredictor( - const MixedRTConfig& config) { - VLOG(30) << "create TensorRTSubgraphPredictor"; - if (config.use_gpu) { - // 1. GPU memeroy - PADDLE_ENFORCE_GT( - config.fraction_of_gpu_memory, 0.f, - "fraction_of_gpu_memory in the config should be set to range (0., 1.]"); - PADDLE_ENFORCE_GE(config.device, 0, "Invalid device id %d", config.device); - std::vector flags; - if (config.fraction_of_gpu_memory >= 0.0f || - config.fraction_of_gpu_memory <= 0.95f) { - flags.push_back("dummpy"); - std::string flag = "--fraction_of_gpu_memory_to_use=" + - std::to_string(config.fraction_of_gpu_memory); - flags.push_back(flag); - VLOG(30) << "set flag: " << flag; - framework::InitGflags(flags); - } - } - - std::unique_ptr predictor( - new TensorRTSubgraphPredictor(config)); - if (!dynamic_cast(predictor.get()) - ->Init(nullptr)) { - return nullptr; - } - return std::move(predictor); -} - -template <> -std::unique_ptr CreatePaddlePredictor( - const MixedRTConfig& config) { - return CreatePaddlePredictor(config); -} - -} // namespace paddle - -USE_TRT_CONVERTER(elementwise_add_weight); -USE_TRT_CONVERTER(elementwise_add_tensor); -USE_TRT_CONVERTER(elementwise_sub_tensor); -USE_TRT_CONVERTER(elementwise_div_tensor); -USE_TRT_CONVERTER(elementwise_mul_tensor); -USE_TRT_CONVERTER(elementwise_max_tensor); -USE_TRT_CONVERTER(elementwise_min_tensor); -USE_TRT_CONVERTER(elementwise_pow_tensor); -USE_TRT_CONVERTER(mul); -USE_TRT_CONVERTER(conv2d); -USE_TRT_CONVERTER(relu); -USE_TRT_CONVERTER(sigmoid); -USE_TRT_CONVERTER(tanh); -USE_TRT_CONVERTER(fc); -USE_TRT_CONVERTER(pool2d); -USE_TRT_CONVERTER(softmax); -USE_TRT_CONVERTER(batch_norm); -USE_TRT_CONVERTER(concat); -USE_TRT_CONVERTER(dropout); -USE_TRT_CONVERTER(pad); diff --git a/paddle/fluid/inference/api/api_tensorrt_subgraph_engine_tester.cc b/paddle/fluid/inference/api/api_tensorrt_subgraph_engine_tester.cc deleted file mode 100644 index 89c9a65cb0..0000000000 --- a/paddle/fluid/inference/api/api_tensorrt_subgraph_engine_tester.cc +++ /dev/null @@ -1,92 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include -#include -#include "paddle/fluid/inference/analysis/analyzer.h" -#include "paddle/fluid/inference/api/paddle_inference_api.h" - -namespace paddle { - -using contrib::MixedRTConfig; - -DEFINE_string(dirname, "", "Directory of the inference model."); - -void CompareTensorRTWithFluid(bool enable_tensorrt) { - FLAGS_IA_enable_tensorrt_subgraph_engine = enable_tensorrt; - - //# 1. Create PaddlePredictor with a config. - NativeConfig config0; - config0.model_dir = FLAGS_dirname; - config0.use_gpu = true; - config0.fraction_of_gpu_memory = 0.3; - config0.device = 0; - - MixedRTConfig config1; - config1.model_dir = FLAGS_dirname; - config1.use_gpu = true; - config1.fraction_of_gpu_memory = 0.3; - config1.device = 0; - config1.max_batch_size = 10; - - auto predictor0 = CreatePaddlePredictor(config0); - auto predictor1 = CreatePaddlePredictor(config1); - - for (int batch_id = 0; batch_id < 1; batch_id++) { - //# 2. Prepare input. - std::vector data(20); - for (int i = 0; i < 20; i++) data[i] = i; - - PaddleTensor tensor; - tensor.shape = std::vector({10, 1}); - tensor.data = PaddleBuf(data.data(), data.size() * sizeof(int64_t)); - tensor.dtype = PaddleDType::INT64; - - // For simplicity, we set all the slots with the same data. - std::vector slots(4, tensor); - - //# 3. Run - std::vector outputs0; - std::vector outputs1; - CHECK(predictor0->Run(slots, &outputs0)); - CHECK(predictor1->Run(slots, &outputs1, 10)); - - //# 4. Get output. - ASSERT_EQ(outputs0.size(), 1UL); - ASSERT_EQ(outputs1.size(), 1UL); - - const size_t num_elements = outputs0.front().data.length() / sizeof(float); - const size_t num_elements1 = outputs1.front().data.length() / sizeof(float); - EXPECT_EQ(num_elements, num_elements1); - - auto *data0 = static_cast(outputs0.front().data.data()); - auto *data1 = static_cast(outputs1.front().data.data()); - - ASSERT_GT(num_elements, 0UL); - for (size_t i = 0; i < std::min(num_elements, num_elements1); i++) { - EXPECT_NEAR(data0[i], data1[i], 1e-3); - } - } -} - -TEST(paddle_inference_api_tensorrt_subgraph_engine, without_tensorrt) { - CompareTensorRTWithFluid(false); -} - -TEST(paddle_inference_api_tensorrt_subgraph_engine, with_tensorrt) { - CompareTensorRTWithFluid(true); -} - -} // namespace paddle diff --git a/paddle/fluid/inference/api/demo_ci/simple_on_word2vec.cc b/paddle/fluid/inference/api/demo_ci/simple_on_word2vec.cc index 5446fd4d42..6ae5198dab 100644 --- a/paddle/fluid/inference/api/demo_ci/simple_on_word2vec.cc +++ b/paddle/fluid/inference/api/demo_ci/simple_on_word2vec.cc @@ -23,7 +23,7 @@ limitations under the License. */ #include #include //NOLINT -#include "paddle/include/paddle_inference_api.h" +#include "utils.h" DEFINE_string(dirname, "", "Directory of the inference model."); DEFINE_bool(use_gpu, false, "Whether use gpu."); diff --git a/paddle/fluid/inference/api/demo_ci/trt_mobilenet_demo.cc b/paddle/fluid/inference/api/demo_ci/trt_mobilenet_demo.cc index 6460514f3f..72d20bc59e 100644 --- a/paddle/fluid/inference/api/demo_ci/trt_mobilenet_demo.cc +++ b/paddle/fluid/inference/api/demo_ci/trt_mobilenet_demo.cc @@ -36,14 +36,13 @@ namespace demo { */ void Main() { std::unique_ptr predictor; - paddle::contrib::MixedRTConfig config; + paddle::contrib::AnalysisConfig config(true); config.param_file = FLAGS_modeldir + "/__params__"; config.prog_file = FLAGS_modeldir + "/__model__"; - config.use_gpu = true; config.device = 0; - config.max_batch_size = 1; + config.EnableTensorRtEngine(); config.fraction_of_gpu_memory = 0.1; // set by yourself - predictor = CreatePaddlePredictor(config); + predictor = CreatePaddlePredictor(config); VLOG(30) << "begin to process data"; // Just a single batch of data. diff --git a/paddle/fluid/inference/api/demo_ci/vis_demo.cc b/paddle/fluid/inference/api/demo_ci/vis_demo.cc index d747f85580..bc8891455d 100644 --- a/paddle/fluid/inference/api/demo_ci/vis_demo.cc +++ b/paddle/fluid/inference/api/demo_ci/vis_demo.cc @@ -17,7 +17,7 @@ limitations under the License. */ */ #include -#include // use glog instead of CHECK to avoid importing other paddle header files. +#include #include "utils.h" // NOLINT #ifdef PADDLE_WITH_CUDA @@ -40,20 +40,17 @@ using contrib::AnalysisConfig; */ void Main(bool use_gpu) { std::unique_ptr predictor, analysis_predictor; - AnalysisConfig config; + AnalysisConfig config(use_gpu); config.param_file = FLAGS_modeldir + "/__params__"; config.prog_file = FLAGS_modeldir + "/__model__"; - config.use_gpu = use_gpu; config.device = 0; if (FLAGS_use_gpu) { config.fraction_of_gpu_memory = 0.1; // set by yourself } - VLOG(30) << "init predictor"; predictor = CreatePaddlePredictor(config); - analysis_predictor = CreatePaddlePredictor(config); + analysis_predictor = CreatePaddlePredictor(config); - VLOG(30) << "begin to process data"; // Just a single batch of data. std::string line; std::ifstream file(FLAGS_data); @@ -68,13 +65,10 @@ void Main(bool use_gpu) { PaddleBuf(record.data.data(), record.data.size() * sizeof(float)); input.dtype = PaddleDType::FLOAT32; - VLOG(30) << "run executor"; std::vector output, analysis_output; predictor->Run({input}, &output, 1); - VLOG(30) << "output.size " << output.size(); auto& tensor = output.front(); - VLOG(30) << "output: " << SummaryTensor(tensor); // compare with reference result CheckOutput(FLAGS_refer, tensor); diff --git a/paddle/fluid/inference/api/details/zero_copy_tensor.cc b/paddle/fluid/inference/api/details/zero_copy_tensor.cc index 14698f6dfc..0f540699b8 100644 --- a/paddle/fluid/inference/api/details/zero_copy_tensor.cc +++ b/paddle/fluid/inference/api/details/zero_copy_tensor.cc @@ -51,7 +51,7 @@ T *ZeroCopyTensor::mutable_data(PaddlePlace place) { } template -T *ZeroCopyTensor::data(PaddlePlace *place, int *size) { +T *ZeroCopyTensor::data(PaddlePlace *place, int *size) const { auto *tensor = static_cast(FindTensor()); auto *res = tensor->data(); @@ -67,8 +67,10 @@ T *ZeroCopyTensor::data(PaddlePlace *place, int *size) { return res; } -template float *ZeroCopyTensor::data(PaddlePlace *place, int *size); -template int64_t *ZeroCopyTensor::data(PaddlePlace *place, int *size); +template float *ZeroCopyTensor::data(PaddlePlace *place, + int *size) const; +template int64_t *ZeroCopyTensor::data(PaddlePlace *place, + int *size) const; template float *ZeroCopyTensor::mutable_data(PaddlePlace place); template int64_t *ZeroCopyTensor::mutable_data(PaddlePlace place); @@ -84,7 +86,7 @@ void *ZeroCopyTensor::FindTensor() const { return tensor; } -std::vector ZeroCopyTensor::shape() { +std::vector ZeroCopyTensor::shape() const { auto *tensor = static_cast(FindTensor()); PADDLE_ENFORCE(tensor, "not found tensor called %s in the scope", name_); return framework::vectorize(tensor->dims()); diff --git a/paddle/fluid/inference/api/details/zero_copy_tensor_dummy.cc b/paddle/fluid/inference/api/details/zero_copy_tensor_dummy.cc index 2d5b561d80..12071e09f8 100644 --- a/paddle/fluid/inference/api/details/zero_copy_tensor_dummy.cc +++ b/paddle/fluid/inference/api/details/zero_copy_tensor_dummy.cc @@ -24,18 +24,20 @@ T *ZeroCopyTensor::mutable_data(PaddlePlace place) { } template -T *ZeroCopyTensor::data(PaddlePlace *place, int *size) { +T *ZeroCopyTensor::data(PaddlePlace *place, int *size) const { return nullptr; } -template float *ZeroCopyTensor::data(PaddlePlace *place, int *size); -template int64_t *ZeroCopyTensor::data(PaddlePlace *place, int *size); +template float *ZeroCopyTensor::data(PaddlePlace *place, + int *size) const; +template int64_t *ZeroCopyTensor::data(PaddlePlace *place, + int *size) const; template float *ZeroCopyTensor::mutable_data(PaddlePlace place); template int64_t *ZeroCopyTensor::mutable_data(PaddlePlace place); void *ZeroCopyTensor::FindTensor() const { return nullptr; } -std::vector ZeroCopyTensor::shape() { return {}; } +std::vector ZeroCopyTensor::shape() const { return {}; } void ZeroCopyTensor::SetLoD(const std::vector> &x) {} diff --git a/paddle/fluid/inference/api/helper.h b/paddle/fluid/inference/api/helper.h index af21c0095c..252960d89e 100644 --- a/paddle/fluid/inference/api/helper.h +++ b/paddle/fluid/inference/api/helper.h @@ -125,6 +125,51 @@ static int ZeroCopyTensorAssignData(ZeroCopyTensor *tensor, return size; } +static bool CompareTensor(const PaddleTensor &a, const PaddleTensor &b) { + if (a.dtype != b.dtype) { + LOG(ERROR) << "dtype not match"; + return false; + } + + if (a.lod.size() != b.lod.size()) { + LOG(ERROR) << "lod not match"; + return false; + } + for (size_t i = 0; i < a.lod.size(); i++) { + if (a.lod[i].size() != b.lod[i].size()) { + LOG(ERROR) << "lod not match"; + return false; + } + for (size_t j = 0; j < a.lod[i].size(); j++) { + if (a.lod[i][j] != b.lod[i][j]) { + LOG(ERROR) << "lod not match"; + return false; + } + } + } + + if (a.shape.size() != b.shape.size()) { + LOG(INFO) << "shape not match"; + return false; + } + for (size_t i = 0; i < a.shape.size(); i++) { + if (a.shape[i] != b.shape[i]) { + LOG(ERROR) << "shape not match"; + return false; + } + } + + auto *adata = static_cast(a.data.data()); + auto *bdata = static_cast(b.data.data()); + for (int i = 0; i < VecReduceToInt(a.shape); i++) { + if (adata[i] != bdata[i]) { + LOG(ERROR) << "data not match"; + return false; + } + } + return true; +} + static std::string DescribeTensor(const PaddleTensor &tensor) { std::stringstream os; os << "Tensor [" << tensor.name << "]\n"; @@ -157,6 +202,26 @@ static std::string DescribeTensor(const PaddleTensor &tensor) { return os.str(); } +static std::string DescribeZeroCopyTensor(const ZeroCopyTensor &tensor) { + std::stringstream os; + os << "Tensor [" << tensor.name() << "]\n"; + + os << " - shape: " << to_string(tensor.shape()) << '\n'; + os << " - lod: "; + for (auto &l : tensor.lod()) { + os << to_string(l) << "; "; + } + os << "\n"; + os << " - data: "; + PaddlePlace place; + int size; + const auto *data = tensor.data(&place, &size); + for (int i = 0; i < size; i++) { + os << data[i] << " "; + } + return os.str(); +} + static void PrintTime(int batch_size, int repeat, int num_threads, int tid, double latency, int epoch = 1) { LOG(INFO) << "====== batch_size: " << batch_size << ", repeat: " << repeat diff --git a/paddle/fluid/inference/analysis/analyzer_main.cc b/paddle/fluid/inference/api/paddle_anakin_config.h similarity index 56% rename from paddle/fluid/inference/analysis/analyzer_main.cc rename to paddle/fluid/inference/api/paddle_anakin_config.h index 5e1fe3eb79..0e91c2624b 100644 --- a/paddle/fluid/inference/analysis/analyzer_main.cc +++ b/paddle/fluid/inference/api/paddle_anakin_config.h @@ -11,23 +11,25 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. +#pragma once -/* - * This file implements analysizer -- an executation help to analyze and - * optimize trained model. - */ -#include "paddle/fluid/inference/analysis/analyzer.h" -#include -#include +#include +#include +#include +#include -int main(int argc, char** argv) { - google::ParseCommandLineFlags(&argc, &argv, true); - using paddle::inference::analysis::Analyzer; - using paddle::inference::analysis::Argument; +#include "paddle_api.h" // NOLINT - Argument argument; - Analyzer analyzer; - analyzer.Run(&argument); +namespace paddle { +namespace contrib { +// Configurations for Anakin engine. +struct AnakinConfig : public PaddlePredictor::Config { + enum TargetType { NVGPU = 0, X86 }; + int device; + std::string model_file; + int max_batch_size{-1}; + TargetType target_type; +}; - return 0; -} +} // namespace contrib +} // namespace paddle diff --git a/paddle/fluid/inference/api/paddle_analysis_config.h b/paddle/fluid/inference/api/paddle_analysis_config.h new file mode 100644 index 0000000000..82c04e9f3f --- /dev/null +++ b/paddle/fluid/inference/api/paddle_analysis_config.h @@ -0,0 +1,77 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include +#include +#include +#include + +// Here we include some header files with relative paths, for that in deploy, +// the abstract path of this header file will be changed. +#include "paddle_api.h" // NOLINT +#include "paddle_pass_builder.h" // NOLINT + +namespace paddle { + +class AnalysisPredictor; +// == +// +// ----------------------------------------------------------------------------------- +// NOTE: The following APIs are not mature yet, we are still working on them. +namespace contrib { + +// NOTE WIP, not stable yet. +struct AnalysisConfig : public NativeConfig { + explicit AnalysisConfig(bool use_gpu = false); + explicit AnalysisConfig(const AnalysisConfig& other); + explicit AnalysisConfig(AnalysisConfig&& other); + + // Determine whether to perform graph optimization. + bool enable_ir_optim = true; + + // Get a pass builder for customize the passes in IR analysis phase. + PassStrategy* pass_builder() const; + + // NOT stable yet. + bool use_feed_fetch_ops{true}; + + void EnableTensorRtEngine(int workspace_size = 1 << 20, + int max_batch_size = 1); + // NOTE this is just for internal development, please not use it. + // NOT stable yet. + void EnableMKLDNN(); + bool use_mkldnn() const { return use_mkldnn_; } + + friend class ::paddle::AnalysisPredictor; + + protected: + bool use_tensorrt_{false}; + bool use_mkldnn_{false}; + int tensorrt_workspace_size_; + int tensorrt_max_batchsize_; + std::unique_ptr pass_builder_; +}; + +// Configurations for Anakin engine. +struct AnakinConfig : public PaddlePredictor::Config { + enum TargetType { NVGPU = 0, X86 }; + int device; + std::string model_file; + int max_batch_size{-1}; + TargetType target_type; +}; + +} // namespace contrib +} // namespace paddle diff --git a/paddle/fluid/inference/api/paddle_api.h b/paddle/fluid/inference/api/paddle_api.h new file mode 100644 index 0000000000..0a2a2a1a23 --- /dev/null +++ b/paddle/fluid/inference/api/paddle_api.h @@ -0,0 +1,220 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include +#include +#include +#include + +namespace paddle { + +// Data type. +enum PaddleDType { + FLOAT32, + INT64, + // TODO(Superjomn) support more data types if needed. +}; + +/* + * Memory menage for PaddleTensor. + * The PaddleBuf holds a buffer for data input or output. The memory can be + * allocated by user or by PaddleBuf itself, but in any case, the PaddleBuf + * should be reused for better performance. + * + * For user allocated memory, the following API can be used: + * - PaddleBuf(void* data, size_t length) to set an external memory by + * specifying + * the memory address and length. + * - Reset(void* data, size_t length) to reset the PaddleBuf with an external + * memory. + * ATTENTION, for user allocated memory, deallocation should be done by users + * externally after the program finished. The PaddleBuf won't do any allocation + * or deallocation. + * + * To have the PaddleBuf allocate and manage the memory: + * - PaddleBuf(size_t length) will allocate a memory of size `length`. + * - Resize(size_t length) resize the memory to no less than `length`, ATTENTION + * if the allocated memory is larger than `length`, nothing will done. + */ +class PaddleBuf { + public: + // PaddleBuf allocate memory internally, and manage it. + explicit PaddleBuf(size_t length) + : data_(new char[length]), length_(length), memory_owned_(true) {} + // Set external memory, the PaddleBuf won't manage it. + PaddleBuf(void* data, size_t length) + : data_(data), length_(length), memory_owned_{false} {} + // Copy only available when memory is managed externally. + explicit PaddleBuf(const PaddleBuf&); + + // Resize the memory. + void Resize(size_t length); + // Reset to external memory, with address and length set. + void Reset(void* data, size_t length); + // Tell whether the buffer is empty. + bool empty() const { return length_ == 0; } + // Get the memory address. + void* data() const { return data_; } + // Get the memory length. + size_t length() const { return length_; } + + ~PaddleBuf() { Free(); } + PaddleBuf& operator=(const PaddleBuf&); + PaddleBuf& operator=(PaddleBuf&&); + PaddleBuf() = default; + PaddleBuf(PaddleBuf&& other); + + private: + void Free(); + void* data_{nullptr}; // pointer to the data memory. + size_t length_{0}; // number of memory bytes. + bool memory_owned_{true}; +}; + +// Basic input and output data structure for PaddlePredictor. +struct PaddleTensor { + PaddleTensor() = default; + std::string name; // variable name. + std::vector shape; + PaddleBuf data; // blob of data. + PaddleDType dtype; + std::vector> lod; // Tensor+LoD equals LoDTensor +}; + +enum class PaddlePlace { kUNK = -1, kCPU, kGPU }; +// Tensor without copy, currently only supports AnalysisPredictor. +class ZeroCopyTensor { + public: + void Reshape(const std::vector& shape); + + // Get the memory in CPU or GPU with specific data type, should Reshape first + // to tell the data size. + // Once can directly call this data to feed the data. + // This is for write the input tensor. + template + T* mutable_data(PaddlePlace place); + // Get the memory directly, will return the place and memory size by pointer. + // This is for reading the output tensor. + template + T* data(PaddlePlace* place, int* size) const; + + std::vector shape() const; + + void SetLoD(const std::vector>& x); + std::vector> lod() const; + const std::string& name() const { return name_; } + + protected: + explicit ZeroCopyTensor(void* scope) : scope_{scope} {} + void SetName(const std::string& name) { name_ = name; } + void* FindTensor() const; + + private: + std::string name_; + bool input_or_output_; + friend class AnalysisPredictor; + void* scope_{nullptr}; +}; + +/* + * A simple Inference API for Paddle. + */ +class PaddlePredictor { + public: + struct Config; + PaddlePredictor() = default; + PaddlePredictor(const PaddlePredictor&) = delete; + PaddlePredictor& operator=(const PaddlePredictor&) = delete; + + // Predict an record. + // The caller should be responsible for allocating and releasing the memory of + // `inputs`. `inputs` should be available until Run returns. Caller should be + // responsible for the output tensor's buffer, either allocated or passed from + // outside. + virtual bool Run(const std::vector& inputs, + std::vector* output_data, + int batch_size = -1) = 0; + + // Zero copy input and output optimization. + // Get the input or output tensors, and operate on their memory directly, + // without copy. + virtual std::unique_ptr GetInputTensor( + const std::string& name) { + return nullptr; + } + virtual std::unique_ptr GetOutputTensor( + const std::string& name) { + return nullptr; + } + virtual bool ZeroCopyRun() { return false; } + + // Clone a predictor that share the model weights, the Cloned predictor should + // be thread-safe. + virtual std::unique_ptr Clone() = 0; + + // Destroy the Predictor. + virtual ~PaddlePredictor() = default; + + // The common configs for all the predictors. + struct Config { + std::string model_dir; // path to the model directory. + }; +}; + +struct NativeConfig : public PaddlePredictor::Config { + // GPU related fields. + bool use_gpu{false}; + int device{0}; + float fraction_of_gpu_memory{-1.f}; // Change to a float in (0,1] if needed. + + // Specify the exact path of program and parameter files. + std::string prog_file; + std::string param_file; + + // Specify the variable's name of each input if input tensors don't follow the + // `feeds` and `fetches` of the phase `save_inference_model`. + bool specify_input_name{false}; +}; + +// A factory to help create different predictors. +// +// Usage: +// +// NativeConfig config; +// ... // change the configs. +// auto native_predictor = CreatePaddlePredictor(config); +// +// FOR EXTENSION DEVELOPER: +// Different predictors are designated by config type. Similar configs can be +// merged, but there shouldn't be a huge config containing different fields for +// more than one kind of predictors. +template +std::unique_ptr CreatePaddlePredictor(const ConfigT& config); + +// NOTE The following APIs are too trivial, we will discard it in the following +// versions. +enum class PaddleEngineKind { + kNative = 0, // Use the native Fluid facility. + kAutoMixedTensorRT, // Automatically mix Fluid with TensorRT. + kAnalysis, // More optimization. + kAnakin // Use Anakin for inference, not mature yet. +}; + +template +std::unique_ptr CreatePaddlePredictor(const ConfigT& config); + +int PaddleDtypeSize(PaddleDType dtype); + +} // namespace paddle diff --git a/paddle/fluid/inference/api/paddle_inference_api.h b/paddle/fluid/inference/api/paddle_inference_api.h index a755ccb93b..92fb51d647 100644 --- a/paddle/fluid/inference/api/paddle_inference_api.h +++ b/paddle/fluid/inference/api/paddle_inference_api.h @@ -26,265 +26,9 @@ limitations under the License. */ #include #include -namespace paddle { - -// Data type. -enum PaddleDType { - FLOAT32, - INT64, - // TODO(Superjomn) support more data types if needed. -}; - -/* - * Memory menage for PaddleTensor. - * The PaddleBuf holds a buffer for data input or output. The memory can be - * allocated by user or by PaddleBuf itself, but in any case, the PaddleBuf - * should be reused for better performance. - * - * For user allocated memory, the following API can be used: - * - PaddleBuf(void* data, size_t length) to set an external memory by - * specifying - * the memory address and length. - * - Reset(void* data, size_t length) to reset the PaddleBuf with an external - * memory. - * ATTENTION, for user allocated memory, deallocation should be done by users - * externally after the program finished. The PaddleBuf won't do any allocation - * or deallocation. - * - * To have the PaddleBuf allocate and manage the memory: - * - PaddleBuf(size_t length) will allocate a memory of size `length`. - * - Resize(size_t length) resize the memory to no less than `length`, ATTENTION - * if the allocated memory is larger than `length`, nothing will done. - */ -class PaddleBuf { - public: - // PaddleBuf allocate memory internally, and manage it. - explicit PaddleBuf(size_t length) - : data_(new char[length]), length_(length), memory_owned_(true) {} - // Set external memory, the PaddleBuf won't manage it. - PaddleBuf(void* data, size_t length) - : data_(data), length_(length), memory_owned_{false} {} - // Copy only available when memory is managed externally. - explicit PaddleBuf(const PaddleBuf&); - - // Resize the memory. - void Resize(size_t length); - // Reset to external memory, with address and length set. - void Reset(void* data, size_t length); - // Tell whether the buffer is empty. - bool empty() const { return length_ == 0; } - // Get the memory address. - void* data() const { return data_; } - // Get the memory length. - size_t length() const { return length_; } - - ~PaddleBuf() { Free(); } - PaddleBuf& operator=(const PaddleBuf&); - PaddleBuf& operator=(PaddleBuf&&); - PaddleBuf() = default; - PaddleBuf(PaddleBuf&& other); - - private: - void Free(); - void* data_{nullptr}; // pointer to the data memory. - size_t length_{0}; // number of memory bytes. - bool memory_owned_{true}; -}; - -// Basic input and output data structure for PaddlePredictor. -struct PaddleTensor { - PaddleTensor() = default; - std::string name; // variable name. - std::vector shape; - PaddleBuf data; // blob of data. - PaddleDType dtype; - std::vector> lod; // Tensor+LoD equals LoDTensor -}; - -enum class PaddlePlace { kUNK = -1, kCPU, kGPU }; -// Tensor without copy, currently only supports AnalysisPredictor. -class ZeroCopyTensor { - public: - void Reshape(const std::vector& shape); - - // Get the memory in CPU or GPU with specific data type, should Reshape first - // to tell the data size. - // Once can directly call this data to feed the data. - // This is for write the input tensor. - template - T* mutable_data(PaddlePlace place); - // Get the memory directly, will return the place and memory size by pointer. - // This is for reading the output tensor. - template - T* data(PaddlePlace* place, int* size); - - std::vector shape(); - - void SetLoD(const std::vector>& x); - std::vector> lod() const; - - protected: - explicit ZeroCopyTensor(void* scope) : scope_{scope} {} - void SetName(const std::string& name) { name_ = name; } - void* FindTensor() const; - - private: - std::string name_; - bool input_or_output_; - friend class AnalysisPredictor; - void* scope_{nullptr}; -}; - -/* - * A simple Inference API for Paddle. - */ -class PaddlePredictor { - public: - struct Config; - PaddlePredictor() = default; - PaddlePredictor(const PaddlePredictor&) = delete; - PaddlePredictor& operator=(const PaddlePredictor&) = delete; - - // Predict an record. - // The caller should be responsible for allocating and releasing the memory of - // `inputs`. `inputs` should be available until Run returns. Caller should be - // responsible for the output tensor's buffer, either allocated or passed from - // outside. - virtual bool Run(const std::vector& inputs, - std::vector* output_data, - int batch_size = -1) = 0; - - // Zero copy input and output optimization. - // Get the input or output tensors, and operate on their memory directly, - // without copy. - virtual std::unique_ptr GetInputTensor( - const std::string& name) { - return nullptr; - } - virtual std::unique_ptr GetOutputTensor( - const std::string& name) { - return nullptr; - } - virtual bool ZeroCopyRun() { return false; } - - // Clone a predictor that share the model weights, the Cloned predictor should - // be thread-safe. - virtual std::unique_ptr Clone() = 0; - - // Destroy the Predictor. - virtual ~PaddlePredictor() = default; - - // The common configs for all the predictors. - struct Config { - std::string model_dir; // path to the model directory. - }; -}; - -struct NativeConfig : public PaddlePredictor::Config { - // GPU related fields. - bool use_gpu{false}; - int device{0}; - float fraction_of_gpu_memory{-1.f}; // Change to a float in (0,1] if needed. - - // Specify the exact path of program and parameter files. - std::string prog_file; - std::string param_file; - - // Specify the variable's name of each input if input tensors don't follow the - // `feeds` and `fetches` of the phase `save_inference_model`. - bool specify_input_name{false}; -}; - -// A factory to help create different predictors. -// -// Usage: -// -// NativeConfig config; -// ... // change the configs. -// auto native_predictor = CreatePaddlePredictor(config); -// -// FOR EXTENSION DEVELOPER: -// Different predictors are designated by config type. Similar configs can be -// merged, but there shouldn't be a huge config containing different fields for -// more than one kind of predictors. -template -std::unique_ptr CreatePaddlePredictor(const ConfigT& config); - -// NOTE The following APIs are too trivial, we will discard it in the following -// versions. -enum class PaddleEngineKind { - kNative = 0, // Use the native Fluid facility. - kAutoMixedTensorRT, // Automatically mix Fluid with TensorRT. - kAnalysis, // More optimization. - kAnakin // Use Anakin for inference, not mature yet. -}; - -template -std::unique_ptr CreatePaddlePredictor(const ConfigT& config); - -// == -// -// ----------------------------------------------------------------------------------- -// NOTE: The following APIs are not mature yet, we are still working on them. - -namespace contrib { - -// Accelerate GPU computation with TensorRT engine. -struct MixedRTConfig : public NativeConfig { - // Determine whether a subgraph will be executed by TRT. - int min_subgraph_size{1}; - // While TensorRT allows an engine optimized for a given max batch size - // to run at any smaller size, the performance for those smaller - // sizes may not be as well-optimized. Therefore, Max batch is best - // equivalent to the runtime batch size. - int max_batch_size{1}; - // For workspace_size, refer it from here: - // https://docs.nvidia.com/deeplearning/sdk/tensorrt-developer-guide/index.html#troubleshooting - int workspace_size{1 << 30}; - // We transform the Ops that can be converted into TRT layer in the model, - // and aggregate these Ops into subgraphs for TRT execution. - // We set this variable to control the minimum number of nodes in the - // subgraph, 3 as default value. - int minimum_subgraph_size = 3; - // Reserved configuration - // We just support "FP32" now, "FP16" and "INT8" will be supported. - std::string precision_mode = "FP32"; -}; - -// NOTE WIP, not stable yet. -struct AnalysisConfig : public NativeConfig { - enum class IrPassMode { - kSystem, // Use system default passes, not customize. - kInclude, // Specify the passes in `ir_passes`. - kExclude // Specify the disabled passes in `ir_passes`. - }; - - // Determine whether to perform graph optimization. - bool enable_ir_optim = true; - // Manually determine the IR passes to run. - IrPassMode ir_mode{IrPassMode::kExclude}; - // passes to be excluded/included - std::vector ir_passes{"embedding_fc_lstm_fuse_pass"}; - - // NOT stable yet. - bool use_feed_fetch_ops{true}; - - // NOTE this is just for internal development, please not use it. - // NOT stable yet. - bool _use_mkldnn{false}; -}; - -// Configurations for Anakin engine. -struct AnakinConfig : public PaddlePredictor::Config { - enum TargetType { NVGPU = 0, X86 }; - int device; - std::string model_file; - int max_batch_size{-1}; - TargetType target_type; -}; - -} // namespace contrib - -int PaddleDtypeSize(PaddleDType dtype); - -} // namespace paddle +#include "paddle_api.h" // NOLINT +#ifndef WITH_ANAKIN +#include "paddle_analysis_config.h" // NOLINT +#else +#include "paddle_anakin_config.h" // NOLINT +#endif diff --git a/paddle/fluid/inference/api/paddle_pass_builder.cc b/paddle/fluid/inference/api/paddle_pass_builder.cc new file mode 100644 index 0000000000..bc3ce72f08 --- /dev/null +++ b/paddle/fluid/inference/api/paddle_pass_builder.cc @@ -0,0 +1,68 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/inference/api/paddle_pass_builder.h" +#include + +namespace paddle { + +void PaddlePassBuilder::AppendPass(const std::string &pass_type) { + passes_.push_back(pass_type); +} + +void PaddlePassBuilder::TurnOnDebug() { + std::vector passes; + auto it = std::begin(passes_); + while (it != std::end(passes_)) { + if (*it != "graph_viz_pass") { + it = passes_.insert(it + 1, "graph_viz_pass"); + } else { + ++it; + } + } +} + +std::string PaddlePassBuilder::DebugString() { + std::stringstream ss; + ss << "Passes to apply:\n"; + for (auto &pass : passes_) { + ss << " - " << pass << '\n'; + } + return ss.str(); +} + +void PaddlePassBuilder::DeletePass(const std::string &pass_type) { + auto it = std::begin(passes_); + while (it != std::end(passes_)) { + if (*it == pass_type) { + it = passes_.erase(it); + } else { + ++it; + } + } +} + +void PaddlePassBuilder::InsertPass(size_t idx, const std::string &pass_type) { + passes_.insert(std::begin(passes_) + idx, pass_type); +} + +void PaddlePassBuilder::DeletePass(size_t idx) { + passes_.erase(std::begin(passes_) + idx); +} + +void GpuPassStrategy::EnableMKLDNN() { + LOG(ERROR) << "GPU not support MKLDNN yet"; +} + +} // namespace paddle diff --git a/paddle/fluid/inference/api/paddle_pass_builder.h b/paddle/fluid/inference/api/paddle_pass_builder.h new file mode 100644 index 0000000000..8aad5c5984 --- /dev/null +++ b/paddle/fluid/inference/api/paddle_pass_builder.h @@ -0,0 +1,131 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include + +namespace paddle { +/* + * This is a pass builder based on string. It is part of inference API. + */ +class PaddlePassBuilder { + public: + explicit PaddlePassBuilder(const std::vector &passes) + : passes_(passes) {} + + void AppendPass(const std::string &pass_type); + + void InsertPass(size_t idx, const std::string &pass_type); + + // Delete the `idx`-th pass. + void DeletePass(size_t idx); + + // Delete all the passes that has type `pass_type`. + void DeletePass(const std::string &pass_type); + + // Visualize the computation graph after each pass by generating a DOT + // language file, one can draw them with the Graphviz toolkit. + void TurnOnDebug(); + + // Human-readible information. + std::string DebugString(); + + const std::vector &AllPasses() const { return passes_; } + + protected: + std::vector passes_; +}; + +/* + * Pass strategy to help control the IR passes. + */ +class PassStrategy : public PaddlePassBuilder { + public: + explicit PassStrategy(const std::vector &passes) + : PaddlePassBuilder(passes) {} + + // The MKLDNN control exists in both CPU and GPU mode, because there can be + // still some CPU kernels running in CPU mode. + virtual void EnableMKLDNN() = 0; + + virtual ~PassStrategy() = default; +}; + +/* + * The CPU passes controller, it is used in AnalysisPredictor with CPU mode. + */ +class CpuPassStrategy : public PassStrategy { + public: + CpuPassStrategy() : PassStrategy({}) { + // NOTE the large fusions should be located in the front, so that they will + // not be damaged by smaller ones. + passes_.assign({ + "infer_clean_graph_pass", // + "attention_lstm_fuse_pass", // + "seqconv_eltadd_relu_fuse_pass", // + // "embedding_fc_lstm_fuse_pass", // + "fc_lstm_fuse_pass", // + "mul_lstm_fuse_pass", // + "fc_gru_fuse_pass", // + "mul_gru_fuse_pass", // + "seq_concat_fc_fuse_pass", // + "fc_fuse_pass", // + "conv_bn_fuse_pass", // + "conv_eltwiseadd_bn_fuse_pass", // + }); + } + + virtual ~CpuPassStrategy() = default; + + virtual void EnableMKLDNN() override { +// TODO(Superjomn) Consider the way to mix CPU with GPU. +#ifdef PADDLE_WITH_MKLDNN + passes_.insert(passes_.begin(), "mkldnn_placement_pass"); + + for (auto &pass : + std::vector({"depthwise_conv_mkldnn_pass", // + "conv_bias_mkldnn_fuse_pass", // + "conv_relu_mkldnn_fuse_pass", // + "conv_elementwise_add_mkldnn_fuse_pass"})) { + passes_.push_back(pass); + } +#endif + } + + CpuPassStrategy(const CpuPassStrategy &other) : PassStrategy(other.passes_) {} +}; + +/* + * The GPU passes strategy, it is used in + */ +class GpuPassStrategy : public PassStrategy { + public: + GpuPassStrategy() : PassStrategy({}) { + passes_.assign({ + "infer_clean_graph_pass", "conv_bn_fuse_pass", + }); + } + + GpuPassStrategy(const GpuPassStrategy &other) + : PassStrategy(other.AllPasses()) {} + + virtual void EnableMKLDNN() override; + + virtual ~GpuPassStrategy() = default; +}; + +} // namespace paddle diff --git a/paddle/fluid/inference/tensorrt/engine.cc b/paddle/fluid/inference/tensorrt/engine.cc index 9e0f958447..8adc3baca6 100644 --- a/paddle/fluid/inference/tensorrt/engine.cc +++ b/paddle/fluid/inference/tensorrt/engine.cc @@ -61,6 +61,7 @@ TensorRTEngine::~TensorRTEngine() { } void TensorRTEngine::FreezeNetwork() { + VLOG(3) << "TRT to freeze network"; freshDeviceId(); PADDLE_ENFORCE(infer_builder_ != nullptr, "Call InitNetwork first to initialize network."); diff --git a/paddle/fluid/inference/tests/api/CMakeLists.txt b/paddle/fluid/inference/tests/api/CMakeLists.txt index 5287cd51cd..fc3e44ffd7 100644 --- a/paddle/fluid/inference/tests/api/CMakeLists.txt +++ b/paddle/fluid/inference/tests/api/CMakeLists.txt @@ -108,7 +108,8 @@ if(WITH_GPU AND TENSORRT_FOUND) if (NOT EXISTS ${TRT_MODEL_INSTALL_DIR}) inference_download_and_uncompress(${TRT_MODEL_INSTALL_DIR} ${INFERENCE_URL}/tensorrt_test "trt_test_models.tar.gz") endif() - cc_test(test_trt_models SRCS trt_models_tester.cc - ARGS --dirname=${TRT_MODEL_INSTALL_DIR}/trt_test_models - DEPS paddle_inference_tensorrt_subgraph_engine SERIAL) + + inference_analysis_test(test_trt_models SRCS trt_models_tester.cc + EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} analysis ${analysis_deps} ir_pass_manager analysis_predictor + ARGS --dirname=${TRT_MODEL_INSTALL_DIR}/trt_test_models SERIAL) endif() diff --git a/paddle/fluid/inference/tests/api/analyzer_resnet50_tester.cc b/paddle/fluid/inference/tests/api/analyzer_resnet50_tester.cc index e5c8dfd22a..5c92096d9d 100644 --- a/paddle/fluid/inference/tests/api/analyzer_resnet50_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_resnet50_tester.cc @@ -37,7 +37,10 @@ void SetInput(std::vector> *inputs) { void profile(bool use_mkldnn = false) { AnalysisConfig cfg; SetConfig(&cfg); - cfg._use_mkldnn = use_mkldnn; + + if (use_mkldnn) { + cfg.EnableMKLDNN(); + } std::vector outputs; std::vector> input_slots_all; @@ -65,7 +68,9 @@ TEST(Analyzer_resnet50, fuse_statis) { void compare(bool use_mkldnn = false) { AnalysisConfig cfg; SetConfig(&cfg); - cfg._use_mkldnn = use_mkldnn; + if (use_mkldnn) { + cfg.EnableMKLDNN(); + } std::vector> input_slots_all; SetInput(&input_slots_all); diff --git a/paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc b/paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc index e0416ff953..612ae121b2 100644 --- a/paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc @@ -210,7 +210,6 @@ void SetConfig(AnalysisConfig *cfg) { cfg->device = 0; cfg->specify_input_name = true; cfg->enable_ir_optim = true; - cfg->ir_passes.clear(); // Do not exclude any pass. } void SetInput(std::vector> *inputs) { @@ -226,13 +225,15 @@ void SetInput(std::vector> *inputs) { // Easy for profiling independently. TEST(Analyzer_rnn1, profile) { - contrib::AnalysisConfig cfg; + contrib::AnalysisConfig cfg(false); SetConfig(&cfg); - cfg.use_gpu = false; + cfg.fraction_of_gpu_memory = 0.1; + cfg.pass_builder()->TurnOnDebug(); std::vector outputs; std::vector> input_slots_all; SetInput(&input_slots_all); + LOG(INFO) << "to test prediction"; TestPrediction(cfg, input_slots_all, &outputs, FLAGS_num_threads); } @@ -274,31 +275,6 @@ TEST(Analyzer_rnn1, multi_thread) { TestPrediction(cfg, input_slots_all, &outputs, 4 /* multi_thread */); } -bool CompareTensors(const framework::Scope &a_scope, - const framework::Scope &b_scope, - const std::vector &tensors) { - for (auto &x : tensors) { - auto *a_var = a_scope.FindVar(x); - auto *b_var = b_scope.FindVar(x); - if (a_var && b_var) { - if (a_var->Type() == typeid(framework::LoDTensor) || - a_var->Type() == typeid(framework::Tensor)) { - LOG(INFO) << "comparing tensor " << x; - auto &a_t = a_var->Get(); - auto &b_t = b_var->Get(); - if (!inference::CompareTensor(a_t, b_t)) { - LOG(ERROR) << string::Sprintf("tensor %s not match in two scopes", x); - } - } else { - LOG(INFO) << "skip no tensor " << x; - } - } else { - LOG(INFO) << "skip tensor " << x; - } - } - return true; -} - // Validate that the AnalysisPredictor + ZeroCopyTensor really works by testing // on the complex RNN1 model. TEST(Analyzer_rnn1, ZeroCopy) { @@ -307,7 +283,6 @@ TEST(Analyzer_rnn1, ZeroCopy) { config.use_feed_fetch_ops = false; PaddlePlace place; - int output_size{0}; auto predictor = CreatePaddlePredictor(config); @@ -353,86 +328,22 @@ TEST(Analyzer_rnn1, ZeroCopy) { Timer timer; double total_time{0}; - double native_total_time{0}; - double analysis_total_time{0.}; - for (int i = 0; i < FLAGS_repeat; i++) { timer.tic(); predictor->ZeroCopyRun(); total_time += timer.toc(); } + LOG(INFO) << "ZeroCopy output: " << DescribeZeroCopyTensor(*output_tensor); - auto *output_data = output_tensor->data(&place, &output_size); - ASSERT_GT(output_size, 0); // more than one output! - - for (int i = 0; i < FLAGS_repeat; i++) { - // Run native predictor. - timer.tic(); - ASSERT_TRUE(native_predictor->Run(native_inputs.front(), &native_outputs)); - native_total_time += timer.toc(); - } - - for (int i = 0; i < FLAGS_repeat; i++) { - timer.tic(); - ASSERT_TRUE( - analysis_predictor->Run(native_inputs.front(), &analysis_outputs)); - analysis_total_time += timer.toc(); - } - - if (!FLAGS_with_precision_check) { - return; - } - int native_output_size = VecReduceToInt(native_outputs.front().shape); - - EXPECT_EQ(native_output_size, output_size); + ASSERT_TRUE(native_predictor->Run(native_inputs.front(), &native_outputs)); + LOG(INFO) << "native output " << DescribeTensor(native_outputs.front()); - // Compare tensors between analysis and zerocopy - auto *p0 = static_cast(predictor.get()); - auto *p1 = static_cast(analysis_predictor.get()); - auto *p2 = static_cast(native_predictor.get()); - - std::vector tensor_names; - for (auto &var_desc : p0->program().Block(0).AllVars()) { - tensor_names.push_back(var_desc->Name()); - } - - LOG(INFO) << "Comparing tensors"; - ASSERT_TRUE( - CompareTensors(*p0->scope(), *p1->scope(), {"final_output.tmp_1"})); - ASSERT_TRUE( - CompareTensors(*p0->scope(), *p2->scope(), {"final_output.tmp_1"})); - - LOG(INFO) << "output1 " << inference::LoDTensorSummary( - p0->scope() - ->FindVar("final_output.tmp_1") - ->Get()); - LOG(INFO) << "output2 " << inference::LoDTensorSummary( - p1->scope() - ->FindVar("final_output.tmp_1") - ->Get()); - LOG(INFO) << "output3 " << inference::LoDTensorSummary( - p2->scope() - ->FindVar("final_output.tmp_1") - ->Get()); - - for (int i = 0; i < output_size; i++) { - LOG(INFO) << output_data[i] << " " - << static_cast(native_outputs.front().data.data())[i] - << " " - << static_cast(analysis_outputs.front().data.data())[i]; - EXPECT_NEAR(output_data[i], - static_cast(native_outputs.front().data.data())[i], - 1e-3); + int output_size{0}; + auto *zero_copy_data = output_tensor->data(&place, &output_size); + auto *native_data = static_cast(native_outputs.front().data.data()); + for (size_t i = 0; i < output_size / sizeof(float); i++) { + EXPECT_NEAR(zero_copy_data[i], native_data[i], 1e-3); } - - LOG(INFO) << "batch_size: " << FLAGS_batch_size; - - LOG(INFO) << "zero average time: " - << total_time / (FLAGS_repeat * FLAGS_batch_size); - LOG(INFO) << "analysis average time: " - << analysis_total_time / (FLAGS_repeat * FLAGS_batch_size); - LOG(INFO) << "native average time: " - << native_total_time / (FLAGS_repeat * FLAGS_batch_size); } TEST(Analyzer_rnn1, ZeroCopyMultiThread) { diff --git a/paddle/fluid/inference/tests/api/analyzer_text_classification_tester.cc b/paddle/fluid/inference/tests/api/analyzer_text_classification_tester.cc index ca19475bda..05bffede47 100644 --- a/paddle/fluid/inference/tests/api/analyzer_text_classification_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_text_classification_tester.cc @@ -108,9 +108,7 @@ TEST(Analyzer_Text_Classification, compare_against_embedding_fc_lstm_fused) { AnalysisConfig cfg; SetConfig(&cfg); // Enable embedding_fc_lstm_fuse_pass (disabled by default) - auto it = std::find(cfg.ir_passes.begin(), cfg.ir_passes.end(), - "embedding_fc_lstm_fuse_pass"); - if (it != cfg.ir_passes.end()) cfg.ir_passes.erase(it); + cfg.pass_builder()->InsertPass(2, "embedding_fc_lstm_fuse_pass"); std::vector> input_slots_all; SetInput(&input_slots_all); diff --git a/paddle/fluid/inference/tests/api/analyzer_vis_tester.cc b/paddle/fluid/inference/tests/api/analyzer_vis_tester.cc index b2cd49af9a..8fafd25b78 100644 --- a/paddle/fluid/inference/tests/api/analyzer_vis_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_vis_tester.cc @@ -58,7 +58,10 @@ void SetConfig(AnalysisConfig *cfg) { cfg->enable_ir_optim = true; cfg->specify_input_name = true; // TODO(TJ): fix fusion gru - cfg->ir_passes.push_back("fc_gru_fuse_pass"); + cfg->pass_builder()->DeletePass("fc_gru_fuse_pass"); +#ifdef PADDLE_WITH_MKLDNN + cfg->EnableMKLDNN(); +#endif } void SetInput(std::vector> *inputs) { @@ -84,7 +87,9 @@ void SetInput(std::vector> *inputs) { void profile(bool use_mkldnn = false) { AnalysisConfig cfg; SetConfig(&cfg); - cfg._use_mkldnn = use_mkldnn; + if (use_mkldnn) { + cfg.EnableMKLDNN(); + } std::vector outputs; std::vector> input_slots_all; @@ -125,7 +130,9 @@ TEST(Analyzer_vis, fuse_statis) { void compare(bool use_mkldnn = false) { AnalysisConfig cfg; SetConfig(&cfg); - cfg._use_mkldnn = use_mkldnn; + if (use_mkldnn) { + cfg.EnableMKLDNN(); + } std::vector> input_slots_all; SetInput(&input_slots_all); diff --git a/paddle/fluid/inference/tests/api/tester_helper.h b/paddle/fluid/inference/tests/api/tester_helper.h index 8c5888d8da..ab4ab20b58 100644 --- a/paddle/fluid/inference/tests/api/tester_helper.h +++ b/paddle/fluid/inference/tests/api/tester_helper.h @@ -20,6 +20,7 @@ #include // NOLINT #include #include "paddle/fluid/framework/ir/fuse_pass_base.h" +#include "paddle/fluid/framework/scope.h" #include "paddle/fluid/inference/analysis/analyzer.h" #include "paddle/fluid/inference/analysis/ut_helper.h" #include "paddle/fluid/inference/api/analysis_predictor.h" @@ -88,22 +89,25 @@ size_t GetSize(const PaddleTensor &out) { return VecReduceToInt(out.shape); } std::unordered_map GetFuseStatis(PaddlePredictor *predictor, int *num_ops) { + std::unordered_map res; auto *analysis_predictor = static_cast(predictor); - auto &fuse_statis = analysis_predictor->analysis_argument() - .Get>( - framework::ir::kFuseStatisAttr); - for (auto &item : fuse_statis) { + auto *fusion_status = + analysis_predictor->analysis_argument().fusion_statis_ptr(); + if (!fusion_status) { + return res; + } + for (auto &item : *fusion_status) { LOG(INFO) << "fused " << item.first << " " << item.second; } int num = 0; for (auto &node : - analysis_predictor->analysis_argument().main_dfg->nodes.nodes()) { - if (node->IsFunction()) { + analysis_predictor->analysis_argument().main_graph().Nodes()) { + if (node->IsOp()) { ++num; } } *num_ops = num; - return fuse_statis; + return *fusion_status; } void SetFakeImageInput(std::vector> *inputs, @@ -161,11 +165,12 @@ void TestMultiThreadPrediction( int num_times = FLAGS_repeat; std::vector threads; std::vector> predictors; - // TODO(yanchunwei): Bug here, the analyzer phase can't be parallelled - // because AttentionLSTM's hard code nodeid will be damanged. - for (int tid = 0; tid < num_threads; ++tid) { - predictors.emplace_back(CreateTestPredictor(config, use_analysis)); + predictors.emplace_back(CreateTestPredictor(config, use_analysis)); + for (int tid = 1; tid < num_threads; ++tid) { + predictors.emplace_back(predictors.front()->Clone()); } + + size_t total_time{0}; for (int tid = 0; tid < num_threads; ++tid) { threads.emplace_back([&, tid]() { #ifdef PADDLE_WITH_MKLDNN @@ -173,17 +178,21 @@ void TestMultiThreadPrediction( #endif // Each thread should have local inputs and outputs. // The inputs of each thread are all the same. - std::vector> inputs_tid = inputs; std::vector outputs_tid; + auto &predictor = predictors[tid]; + LOG(INFO) << "running thread " << tid; Timer timer; timer.tic(); for (int i = 0; i < num_times; i++) { - for (size_t j = 0; j < inputs_tid.size(); j++) { - predictors[tid]->Run(inputs_tid[j], &outputs_tid); + for (const auto &input : inputs) { + ASSERT_TRUE(predictor->Run(input, &outputs_tid)); } } - PrintTime(batch_size, num_times, num_threads, tid, - timer.toc() / num_times, inputs_tid.size()); + + auto time = timer.toc(); + total_time += time; + PrintTime(batch_size, num_times, num_threads, tid, time / num_times, + inputs.size()); }); } for (int i = 0; i < num_threads; ++i) { @@ -196,7 +205,7 @@ void TestPrediction(const AnalysisConfig &config, std::vector *outputs, int num_threads, bool use_analysis = FLAGS_use_analysis) { LOG(INFO) << "use_analysis: " << use_analysis - << ", use_mkldnn: " << config._use_mkldnn; + << ", use_mkldnn: " << config.use_mkldnn(); if (num_threads == 1) { TestOneThreadPrediction(config, inputs, outputs, use_analysis); } else { @@ -208,7 +217,7 @@ void TestPrediction(const AnalysisConfig &config, void CompareNativeAndAnalysis( const AnalysisConfig &config, const std::vector> &inputs) { - LOG(INFO) << "use_mkldnn: " << config._use_mkldnn; + LOG(INFO) << "use_mkldnn: " << config.use_mkldnn(); std::vector native_outputs, analysis_outputs; TestOneThreadPrediction(config, inputs, &native_outputs, false); TestOneThreadPrediction(config, inputs, &analysis_outputs, true); diff --git a/paddle/fluid/inference/tests/api/trt_models_tester.cc b/paddle/fluid/inference/tests/api/trt_models_tester.cc index 75840a9c43..71423154f8 100644 --- a/paddle/fluid/inference/tests/api/trt_models_tester.cc +++ b/paddle/fluid/inference/tests/api/trt_models_tester.cc @@ -16,10 +16,13 @@ #include #include #include "paddle/fluid/inference/analysis/analyzer.h" +#include "paddle/fluid/inference/api/helper.h" #include "paddle/fluid/inference/api/paddle_inference_api.h" +#include "paddle/fluid/inference/api/paddle_inference_pass.h" +#include "paddle/fluid/inference/tests/api/tester_helper.h" namespace paddle { -using paddle::contrib::MixedRTConfig; +using paddle::contrib::AnalysisConfig; DEFINE_string(dirname, "", "Directory of the inference model."); @@ -27,33 +30,24 @@ NativeConfig GetConfigNative() { NativeConfig config; config.model_dir = FLAGS_dirname; // LOG(INFO) << "dirname " << config.model_dir; - config.fraction_of_gpu_memory = 0.45; + config.fraction_of_gpu_memory = 0.15; config.use_gpu = true; config.device = 0; return config; } -MixedRTConfig GetConfigTRT() { - MixedRTConfig config; - config.model_dir = FLAGS_dirname; - config.use_gpu = true; - config.fraction_of_gpu_memory = 0.2; - config.device = 0; - config.max_batch_size = 3; - return config; +void PrepareTRTConfig(AnalysisConfig *config) { + config->model_dir = FLAGS_dirname + "/" + "mobilenet"; + config->fraction_of_gpu_memory = 0.15; + config->EnableTensorRtEngine(1 << 10, 5); + config->pass_builder()->DeletePass("conv_bn_fuse_pass"); + config->pass_builder()->DeletePass("fc_fuse_pass"); + config->pass_builder()->TurnOnDebug(); } -void CompareTensorRTWithFluid(int batch_size, std::string model_dirname) { - NativeConfig config0 = GetConfigNative(); - config0.model_dir = model_dirname; - - MixedRTConfig config1 = GetConfigTRT(); - config1.model_dir = model_dirname; - config1.max_batch_size = batch_size; - - auto predictor0 = CreatePaddlePredictor(config0); - auto predictor1 = CreatePaddlePredictor(config1); - // Prepare inputs +void PrepareInputs(std::vector *tensors, int batch_size) { + PADDLE_ENFORCE_EQ(tensors->size(), 1UL); + auto &tensor = tensors->front(); int height = 224; int width = 224; float *data = new float[batch_size * 3 * height * width]; @@ -61,25 +55,34 @@ void CompareTensorRTWithFluid(int batch_size, std::string model_dirname) { data[0] = 1.0f; // Prepare inputs - PaddleTensor tensor; tensor.name = "input_0"; tensor.shape = std::vector({batch_size, 3, height, width}); tensor.data = PaddleBuf(static_cast(data), sizeof(float) * (batch_size * 3 * height * width)); tensor.dtype = PaddleDType::FLOAT32; - std::vector paddle_tensor_feeds(1, tensor); +} + +void CompareTensorRTWithFluid(int batch_size, std::string model_dirname) { + auto config0 = GetConfigNative(); + config0.model_dir = model_dirname; + + AnalysisConfig config1(true); + PrepareTRTConfig(&config1); + config1.model_dir = model_dirname; + + auto predictor0 = CreatePaddlePredictor(config0); + auto predictor1 = CreatePaddlePredictor(config1); + + // Prepare inputs + std::vector paddle_tensor_feeds(1); + PrepareInputs(&paddle_tensor_feeds, batch_size); // Prepare outputs std::vector outputs0; std::vector outputs1; CHECK(predictor0->Run(paddle_tensor_feeds, &outputs0)); - CHECK(predictor1->Run(paddle_tensor_feeds, &outputs1, batch_size)); - // Get output. - ASSERT_EQ(outputs0.size(), 1UL); - ASSERT_EQ(outputs1.size(), 1UL); - const size_t num_elements = outputs0.front().data.length() / sizeof(float); const size_t num_elements1 = outputs1.front().data.length() / sizeof(float); EXPECT_EQ(num_elements, num_elements1); @@ -94,15 +97,52 @@ void CompareTensorRTWithFluid(int batch_size, std::string model_dirname) { } TEST(trt_models_test, mobilenet) { - CompareTensorRTWithFluid(1, FLAGS_dirname + "/mobilenet"); + CompareTensorRTWithFluid(1, FLAGS_dirname + "/" + "mobilenet"); } - TEST(trt_models_test, resnet50) { - CompareTensorRTWithFluid(1, FLAGS_dirname + "/resnet50"); + CompareTensorRTWithFluid(1, FLAGS_dirname + "/" + "resnet50"); } - TEST(trt_models_test, resnext50) { - CompareTensorRTWithFluid(1, FLAGS_dirname + "/resnext50"); + CompareTensorRTWithFluid(1, FLAGS_dirname + "/" + "resnext50"); +} + +TEST(trt_models_test, raw_gpu) { + std::string model_dir = FLAGS_dirname + "/" + "mobilenet"; + auto config0 = GetConfigNative(); + config0.model_dir = model_dir; + int batch_size = 2; + + AnalysisConfig config1(true); + config1.fraction_of_gpu_memory = 0.1; + config1.enable_ir_optim = true; + config1.model_dir = model_dir; + + auto predictor0 = CreatePaddlePredictor(config0); + auto predictor1 = CreatePaddlePredictor(config1); + + // Prepare inputs + std::vector paddle_tensor_feeds(1); + PrepareInputs(&paddle_tensor_feeds, batch_size); + + // Prepare outputs + std::vector outputs0; + std::vector outputs1; + CHECK(predictor0->Run(paddle_tensor_feeds, &outputs0)); + CHECK(predictor1->Run(paddle_tensor_feeds, &outputs1, batch_size)); + + const size_t num_elements = outputs0.front().data.length() / sizeof(float); + const size_t num_elements1 = outputs1.front().data.length() / sizeof(float); + EXPECT_EQ(num_elements, num_elements1); + + auto *data0 = static_cast(outputs0.front().data.data()); + auto *data1 = static_cast(outputs1.front().data.data()); + + ASSERT_GT(num_elements, 0UL); + for (size_t i = 0; i < std::min(num_elements, num_elements1); i++) { + EXPECT_NEAR(data0[i], data1[i], 1e-3); + } } } // namespace paddle + +USE_PASS(tensorrt_subgraph_pass); diff --git a/paddle/fluid/operators/load_op.cc b/paddle/fluid/operators/load_op.cc index 51219504ff..df1edc5c2e 100644 --- a/paddle/fluid/operators/load_op.cc +++ b/paddle/fluid/operators/load_op.cc @@ -40,8 +40,9 @@ class LoadOp : public framework::OperatorBase { auto out_var_name = Output("Out"); auto *out_var = scope.FindVar(out_var_name); - PADDLE_ENFORCE(out_var != nullptr, "Output variable %s cannot be found", - out_var_name); + PADDLE_ENFORCE(out_var != nullptr, + "Output variable %s cannot be found in scope %p", + out_var_name, &scope); if (out_var->IsType()) { LoadLodTensor(fin, place, out_var); diff --git a/paddle/fluid/operators/mul_op.cc b/paddle/fluid/operators/mul_op.cc index 08f2949d4a..7e434c293c 100644 --- a/paddle/fluid/operators/mul_op.cc +++ b/paddle/fluid/operators/mul_op.cc @@ -56,7 +56,8 @@ class MulOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ(x_mat_dims[1], y_mat_dims[0], "First matrix's width must be equal with second matrix's " - "height. %s, %s"); + "height. %s, %s", + x_mat_dims[1], y_mat_dims[0]); std::vector output_dims; output_dims.reserve( static_cast(x_num_col_dims + y_dims.size() - y_num_col_dims)); -- GitLab