diff --git a/doc/fluid/new_docs/user_guides/howto/debug/visualdl.md b/doc/fluid/new_docs/user_guides/howto/debug/visualdl.md index 84987ea5daee9abd0fe2fe71bdfde62ea3388ab5..99f8bee5ca1519ccf5d7c35ad2a64da4a8841ada 100644 --- a/doc/fluid/new_docs/user_guides/howto/debug/visualdl.md +++ b/doc/fluid/new_docs/user_guides/howto/debug/visualdl.md @@ -104,6 +104,7 @@ visualDL --logdir=scratch_log --port=8080 # 访问 http://127.0.0.1:8080 ``` +如果出现`TypeError: __init__() got an unexpected keyword argument 'file'`, 是因为protobuf不是3.5以上,运行`pip install --upgrade protobuf`就能解决。 如果在虚拟环境下仍然遇到安装问题,请尝试以下方法。 diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec index 70e5b97770a6c581c6a9c0145b03c42b83f14471..c2694144d708161a3bed214ceca745505656456f 100644 --- a/paddle/fluid/API.spec +++ b/paddle/fluid/API.spec @@ -43,6 +43,7 @@ paddle.fluid.Executor.run ArgSpec(args=['self', 'program', 'feed', 'fetch_list', paddle.fluid.global_scope ArgSpec(args=[], varargs=None, keywords=None, defaults=None) paddle.fluid.scope_guard ArgSpec(args=[], varargs='args', keywords='kwds', defaults=None) paddle.fluid.Trainer.__init__ ArgSpec(args=['self', 'train_func', 'optimizer_func', 'param_path', 'place', 'parallel', 'checkpoint_config'], varargs=None, keywords=None, defaults=(None, None, False, None)) +paddle.fluid.Trainer.save_inference_model ArgSpec(args=['self', 'param_path', 'feeded_var_names', 'target_var_indexes'], varargs=None, keywords=None, defaults=None) paddle.fluid.Trainer.save_params ArgSpec(args=['self', 'param_path'], varargs=None, keywords=None, defaults=None) paddle.fluid.Trainer.stop ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) paddle.fluid.Trainer.test ArgSpec(args=['self', 'reader', 'feed_order'], varargs=None, keywords=None, defaults=None) diff --git a/paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc b/paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc index 0d69dfa79aa26940f8f56f84b35ffed34f29f703..9512fd056e73836cdc34a9e409ab2d7809a6aff6 100644 --- a/paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc +++ b/paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc @@ -11,6 +11,7 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. + #include "paddle/fluid/framework/ir/fc_lstm_fuse_pass.h" #include #include "paddle/fluid/framework/lod_tensor.h" diff --git a/paddle/fluid/inference/analysis/CMakeLists.txt b/paddle/fluid/inference/analysis/CMakeLists.txt index f2e18a461fd221252e4a10262a13bc8e942f5988..b625a617a26cf9d34abe9931eaded9bc2797cf08 100644 --- a/paddle/fluid/inference/analysis/CMakeLists.txt +++ b/paddle/fluid/inference/analysis/CMakeLists.txt @@ -6,6 +6,7 @@ cc_library(analysis SRCS pass_manager.cc node.cc data_flow_graph.cc graph_traits analyzer.cc helper.cc # passes + analysis_pass.cc fluid_to_data_flow_graph_pass.cc data_flow_graph_to_fluid_pass.cc dfg_graphviz_draw_pass.cc @@ -105,6 +106,6 @@ if (NOT EXISTS ${TEXT_CLASSIFICATION_INSTALL_DIR} AND WITH_TESTING AND WITH_INFE inference_download_and_uncompress(${TEXT_CLASSIFICATION_INSTALL_DIR} ${TEXT_CLASSIFICATION_MODEL_URL} "text-classification-Senta.tar.gz") endif() -inference_analysis_test(test_text_classification SRCS test_text_classification.cc +inference_analysis_test(test_text_classification SRCS analyzer_text_classification_tester.cc EXTRA_DEPS paddle_inference_api paddle_fluid_api analysis_predictor ARGS --infer_model=${TEXT_CLASSIFICATION_INSTALL_DIR}/text-classification-Senta) diff --git a/paddle/fluid/inference/analysis/pass.cc b/paddle/fluid/inference/analysis/analysis_pass.cc similarity index 91% rename from paddle/fluid/inference/analysis/pass.cc rename to paddle/fluid/inference/analysis/analysis_pass.cc index 121b72c0a0aa9a0c568b04f7ee9a5bc5c1d6f5f8..9be9f755b9ed7273d842f8c0e2046f0ca0ce2247 100644 --- a/paddle/fluid/inference/analysis/pass.cc +++ b/paddle/fluid/inference/analysis/analysis_pass.cc @@ -12,4 +12,4 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/inference/analysis/pass.h" +#include "paddle/fluid/inference/analysis/analysis_pass.h" diff --git a/paddle/fluid/inference/analysis/pass.h b/paddle/fluid/inference/analysis/analysis_pass.h similarity index 59% rename from paddle/fluid/inference/analysis/pass.h rename to paddle/fluid/inference/analysis/analysis_pass.h index 7719c6f5ff3c940948c7bdbcb25513cdf430281b..b6edb5529ace2ad5bd1b35bfbee1f7a744457cc3 100644 --- a/paddle/fluid/inference/analysis/pass.h +++ b/paddle/fluid/inference/analysis/analysis_pass.h @@ -28,10 +28,10 @@ namespace paddle { namespace inference { namespace analysis { -class Pass { +class AnalysisPass { public: - Pass() = default; - virtual ~Pass() = default; + AnalysisPass() = default; + virtual ~AnalysisPass() = default; // Mutable Pass. virtual bool Initialize(Argument *argument) { return false; } // Readonly Pass. @@ -42,23 +42,16 @@ class Pass { virtual bool Finalize() { return false; } // Get a Pass appropriate to print the Node this pass operates on. - virtual Pass *CreatePrinterPass(std::ostream &os, - const std::string &banner) const { + virtual AnalysisPass *CreatePrinterPass(std::ostream &os, + const std::string &banner) const { return nullptr; } // Create a debugger Pass that draw the DFG by graphviz toolkit. - virtual Pass *CreateGraphvizDebugerPass() const { return nullptr; } + virtual AnalysisPass *CreateGraphvizDebugerPass() const { return nullptr; } - virtual void Run() { LOG(FATAL) << "not valid"; } - // Run on a single Node. - virtual void Run(Node *x) { LOG(FATAL) << "not valid"; } - // Run on a single Function. - virtual void Run(Function *x) { LOG(FATAL) << "not valid"; } - // Run on a single FunctionBlock. - virtual void Run(FunctionBlock *x) { LOG(FATAL) << "not valid"; } // Run on a single DataFlowGraph. - virtual void Run(DataFlowGraph *x) { LOG(FATAL) << "not valid"; } + virtual void Run(DataFlowGraph *x) = 0; // Human-readable short representation. virtual std::string repr() const = 0; @@ -66,29 +59,8 @@ class Pass { virtual std::string description() const { return "No DOC"; } }; -// NodePass process on any Node types. -class NodePass : public Pass { - public: - virtual void Run(Node *node) = 0; -}; - -// NodePass process on any Function node types. -class FunctionPass : public Pass { - public: - virtual void Run(Function *node) = 0; -}; - -// NodePass process on any FunctionBlock node types. -class FunctionBlockPass : public Pass { - public: - virtual void Run(FunctionBlock *node) = 0; -}; - // GraphPass processes on any GraphType. -class DataFlowGraphPass : public Pass { - public: - virtual void Run(DataFlowGraph *graph) = 0; -}; +class DataFlowGraphPass : public AnalysisPass {}; } // namespace analysis } // namespace inference diff --git a/paddle/fluid/inference/analysis/analyzer.cc b/paddle/fluid/inference/analysis/analyzer.cc index 1fd884435d173800563ea37809003ed3aee16c7c..6dc39cae0522efd48c2e2921611adebd6937ddf7 100644 --- a/paddle/fluid/inference/analysis/analyzer.cc +++ b/paddle/fluid/inference/analysis/analyzer.cc @@ -15,6 +15,7 @@ #include "paddle/fluid/inference/analysis/analyzer.h" #include #include + #include "paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.h" #include "paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.h" #include "paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.h" @@ -58,7 +59,7 @@ class DfgPassManagerImpl final : public DfgPassManager { std::string description() const override { return "DFG pass manager."; } private: - void AddPass(const std::string& name, Pass* pass) { + void AddPass(const std::string& name, AnalysisPass* pass) { VLOG(3) << "Adding pass " << name; Register(name, pass); AddGraphvizDebugerPass(pass); @@ -87,7 +88,7 @@ class DfgPassManagerImpl final : public DfgPassManager { } // Add the graphviz debuger pass if the parent pass has one. - void AddGraphvizDebugerPass(Pass* pass) { + void AddGraphvizDebugerPass(AnalysisPass* pass) { auto* debuger_pass = pass->CreateGraphvizDebugerPass(); if (debuger_pass) { Register(debuger_pass->repr(), debuger_pass); diff --git a/paddle/fluid/inference/analysis/analyzer.h b/paddle/fluid/inference/analysis/analyzer.h index 3fdd2b9ec7537c891a04efb3ca9a1d45075ffa5e..abc3021e7ec3f0f970d786b782ad17510b8bdbd8 100644 --- a/paddle/fluid/inference/analysis/analyzer.h +++ b/paddle/fluid/inference/analysis/analyzer.h @@ -36,8 +36,11 @@ limitations under the License. */ */ #include +#include +#include + +#include "paddle/fluid/inference/analysis/analysis_pass.h" #include "paddle/fluid/inference/analysis/flags.h" -#include "paddle/fluid/inference/analysis/pass.h" #include "paddle/fluid/inference/analysis/pass_manager.h" namespace paddle { diff --git a/paddle/fluid/inference/analysis/test_text_classification.cc b/paddle/fluid/inference/analysis/analyzer_text_classification_tester.cc similarity index 94% rename from paddle/fluid/inference/analysis/test_text_classification.cc rename to paddle/fluid/inference/analysis/analyzer_text_classification_tester.cc index 2913824f62301795aea967c22021b2af11f343c1..265e814acd594d6185251cbaa4d6880bb9ee7405 100644 --- a/paddle/fluid/inference/analysis/test_text_classification.cc +++ b/paddle/fluid/inference/analysis/analyzer_text_classification_tester.cc @@ -12,13 +12,14 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "paddle/fluid/inference/analysis/analyzer.h" #include #include // use glog instead of PADDLE_ENFORCE to avoid importing other paddle header files. #include #include "paddle/fluid/framework/ir/pass.h" -#include "paddle/fluid/inference/analysis/analyzer.h" #include "paddle/fluid/inference/analysis/ut_helper.h" #include "paddle/fluid/inference/api/paddle_inference_api.h" +#include "paddle/fluid/inference/api/paddle_inference_pass.h" #include "paddle/fluid/inference/api/timer.h" DEFINE_string(infer_model, "", "Directory of the inference model."); @@ -100,10 +101,3 @@ void Main(int batch_size) { TEST(text_classification, basic) { Main(FLAGS_batch_size); } } // namespace paddle - -USE_PASS(fc_fuse_pass); -USE_PASS(seq_concat_fc_fuse_pass); -USE_PASS(fc_lstm_fuse_pass); -USE_PASS(graph_viz_pass); -USE_PASS(infer_clean_graph_pass); -USE_PASS(attention_lstm_fuse_pass); diff --git a/paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.cc b/paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.cc index 80c85555e722433f3657e880520b3fe459f6ce1a..8579845d51e80d73d220465d25b70944f5ad9bf2 100644 --- a/paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.cc +++ b/paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.cc @@ -263,7 +263,7 @@ class DFG_DebuggerPass : public DFG_GraphvizDrawPass { }; } // namespace -Pass *DataFlowGraphToFluidPass::CreateGraphvizDebugerPass() const { +AnalysisPass *DataFlowGraphToFluidPass::CreateGraphvizDebugerPass() const { return new DFG_DebuggerPass(DFG_GraphvizDrawPass::Config( FLAGS_IA_graphviz_log_root, "data_flow_graph_to_fluid_graphviz_debugger")); diff --git a/paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.h b/paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.h index 0c9a8a0b7cae17bf2eaa714348ea1c9b5e43611b..891c7226e245fa3b92892785362c186185a61f62 100644 --- a/paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.h +++ b/paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.h @@ -21,8 +21,8 @@ #include #include "paddle/fluid/framework/program_desc.h" +#include "paddle/fluid/inference/analysis/analysis_pass.h" #include "paddle/fluid/inference/analysis/data_flow_graph.h" -#include "paddle/fluid/inference/analysis/pass.h" namespace paddle { namespace inference { @@ -42,7 +42,7 @@ class DataFlowGraphToFluidPass final : public DataFlowGraphPass { return "Transform a DFG to a Fluid ProgramDesc"; } - Pass *CreateGraphvizDebugerPass() const override; + AnalysisPass *CreateGraphvizDebugerPass() const override; protected: // Add a Fluid Op into the ProgramDesc. diff --git a/paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.h b/paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.h index 17445ab4407a159ca11345bc9a9226b3ad0044f0..e537bfc0e64d4ff46b3d61499a1a0298ed83533f 100644 --- a/paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.h +++ b/paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.h @@ -21,8 +21,8 @@ limitations under the License. */ #include #include +#include "paddle/fluid/inference/analysis/analysis_pass.h" #include "paddle/fluid/inference/analysis/dot.h" -#include "paddle/fluid/inference/analysis/pass.h" namespace paddle { namespace inference { diff --git a/paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.cc b/paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.cc index 51bd0ac42d455f68ac5d70f0ce9703dfad6070d4..2b7d632c839e735ca03c6e17b94307b40cc13374 100644 --- a/paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.cc +++ b/paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.cc @@ -66,7 +66,7 @@ class DFG_DebuggerPass : public DFG_GraphvizDrawPass { }; } -Pass *FluidToDataFlowGraphPass::CreateGraphvizDebugerPass() const { +AnalysisPass *FluidToDataFlowGraphPass::CreateGraphvizDebugerPass() const { return new DFG_DebuggerPass(DFG_GraphvizDrawPass::Config( FLAGS_IA_graphviz_log_root, "fluid-to-dfg-debuger")); } diff --git a/paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.h b/paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.h index fb948bf2242abcbc1e841fd3b8457e63358782c5..b9e262020e9522e167b998d57e2be2ac19b48447 100644 --- a/paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.h +++ b/paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.h @@ -22,8 +22,8 @@ #include #include "paddle/fluid/framework/program_desc.h" +#include "paddle/fluid/inference/analysis/analysis_pass.h" #include "paddle/fluid/inference/analysis/data_flow_graph.h" -#include "paddle/fluid/inference/analysis/pass.h" namespace paddle { namespace inference { @@ -46,7 +46,7 @@ class FluidToDataFlowGraphPass final : public DataFlowGraphPass { return "transform a fluid ProgramDesc to a data flow graph."; } - Pass *CreateGraphvizDebugerPass() const override; + AnalysisPass *CreateGraphvizDebugerPass() const override; private: framework::proto::ProgramDesc const *desc_; diff --git a/paddle/fluid/inference/analysis/fluid_to_ir_pass.h b/paddle/fluid/inference/analysis/fluid_to_ir_pass.h index 3086085710d6e850ed27e82d2323690dfdd3ef19..c2599e218a2306f9353b843b7ea3f18aeacb008e 100644 --- a/paddle/fluid/inference/analysis/fluid_to_ir_pass.h +++ b/paddle/fluid/inference/analysis/fluid_to_ir_pass.h @@ -14,15 +14,17 @@ #pragma once +#include +#include + #include "paddle/fluid/framework/ir/fuse_pass_base.h" +#include "paddle/fluid/inference/analysis/analysis_pass.h" #include "paddle/fluid/inference/analysis/flags.h" #include "paddle/fluid/inference/analysis/ir_pass_manager.h" -#include "paddle/fluid/inference/analysis/pass.h" namespace paddle { namespace inference { namespace analysis { -using namespace framework; static const char kFluidToIrPassesAttr[] = "__fluid_to_ir_passes__"; @@ -48,7 +50,8 @@ class FluidToIrPass final : public DataFlowGraphPass { ANALYSIS_ARGUMENT_CHECK_FIELD(argument->fluid_model_program_path); // Load program. auto program = LoadProgramDesc(*argument->fluid_model_program_path); - argument->origin_program_desc.reset(new proto::ProgramDesc(program)); + argument->origin_program_desc.reset( + new framework::proto::ProgramDesc(program)); // Create main data flow graph. if (!argument->main_dfg) { argument->main_dfg.reset(new DataFlowGraph); @@ -78,12 +81,13 @@ class FluidToIrPass final : public DataFlowGraphPass { IRPassManager ir_passes(argument_->Get("ir_program_desc"), nullptr); // Pass the scope from analysis to IR if needed. - if (argument_->Has(ir::kParamScopeAttr)) { + if (argument_->Has(framework::ir::kParamScopeAttr)) { // Here the address is passed, attention that IR doesn't own the scope, so // the real scope in analysis should live during the IR phase. ir_passes.graph().Set( - ir::kParamScopeAttr, - new Scope *(&argument_->Get(ir::kParamScopeAttr))); + framework::ir::kParamScopeAttr, + new framework::Scope *(&argument_->Get( + framework::ir::kParamScopeAttr))); } if (FLAGS_IA_enable_ir) { @@ -95,12 +99,12 @@ class FluidToIrPass final : public DataFlowGraphPass { PADDLE_ENFORCE(argument_->main_dfg.get()); argument_->main_dfg->Build(ir_passes.graph()); // inherit the arguments from ir. - if (ir_passes.graph().Has(ir::kFuseStatisAttr)) { + if (ir_passes.graph().Has(framework::ir::kFuseStatisAttr)) { argument_->Set( - ir::kFuseStatisAttr, + framework::ir::kFuseStatisAttr, new std::unordered_map( ir_passes.graph().Get>( - ir::kFuseStatisAttr))); + framework::ir::kFuseStatisAttr))); } } @@ -112,7 +116,7 @@ class FluidToIrPass final : public DataFlowGraphPass { private: // Load parameters from a single file or from a directory. - bool LoadParams(Scope *scope, const std::string &dir, + bool LoadParams(framework::Scope *scope, const std::string &dir, const std::string &prog_file, const std::string ¶m_file); private: diff --git a/paddle/fluid/inference/analysis/model_store_pass.h b/paddle/fluid/inference/analysis/model_store_pass.h index 3a2869e30bd80cfd0756f8e21acb414656620eaa..f14b49e09c2f8e79c6fc4accdbf17f4f7a9bb1a3 100644 --- a/paddle/fluid/inference/analysis/model_store_pass.h +++ b/paddle/fluid/inference/analysis/model_store_pass.h @@ -19,7 +19,7 @@ #pragma once #include -#include "paddle/fluid/inference/analysis/pass.h" +#include "paddle/fluid/inference/analysis/analysis_pass.h" namespace paddle { namespace inference { diff --git a/paddle/fluid/inference/analysis/pass_manager.cc b/paddle/fluid/inference/analysis/pass_manager.cc index ff5ec94265a4f05c1294ad6c8ac5f86c249b84b6..759b2b96a1944c060ac98b6865b58ba2f6369607 100644 --- a/paddle/fluid/inference/analysis/pass_manager.cc +++ b/paddle/fluid/inference/analysis/pass_manager.cc @@ -40,17 +40,6 @@ void DfgPassManager::RunAll() { } } -void NodePassManager::RunAll() { - PADDLE_ENFORCE(argument_); - PADDLE_ENFORCE(argument_->main_dfg.get()); - auto trait = GraphTraits(*argument_->main_dfg).nodes_in_DFS(); - for (auto& node : trait) { - for (auto& pass : data_) { - pass->Run(&node); - } - } -} - } // namespace analysis } // namespace inference } // namespace paddle diff --git a/paddle/fluid/inference/analysis/pass_manager.h b/paddle/fluid/inference/analysis/pass_manager.h index 81a17e0287a5aef8a328e43380ee3691f5a32379..412747c4fcce73303703f586f7a04edf4cc5ee76 100644 --- a/paddle/fluid/inference/analysis/pass_manager.h +++ b/paddle/fluid/inference/analysis/pass_manager.h @@ -33,7 +33,7 @@ limitations under the License. */ #include #include "paddle/fluid/framework/program_desc.h" -#include "paddle/fluid/inference/analysis/pass.h" +#include "paddle/fluid/inference/analysis/analysis_pass.h" namespace paddle { namespace inference { @@ -43,7 +43,7 @@ namespace analysis { * PassManager is the base class for all pass managers, a pass manager has * several Pass-es registered, and execute them in the linear order. */ -class PassManager : public OrderedRegistry { +class PassManager : public OrderedRegistry { public: PassManager() = default; // Call all the passes' Initialize methods. The desc and data_flow_graph are @@ -89,18 +89,6 @@ class DfgPassManager : public PassManager { virtual ~DfgPassManager() = default; }; -/* - * A pass manager that process a Node each time. - */ -class NodePassManager : public PassManager { - public: - NodePassManager() = default; - - void RunAll() override; - - virtual ~NodePassManager() = default; -}; - } // namespace analysis } // namespace inference } // namespace paddle diff --git a/paddle/fluid/inference/analysis/pass_manager_tester.cc b/paddle/fluid/inference/analysis/pass_manager_tester.cc index 13423e4837e12a96e7a5dfc9ca3f59bf8b14746a..72b0fbf7e571ec97a0ea093d01449c1d5ddb9b91 100644 --- a/paddle/fluid/inference/analysis/pass_manager_tester.cc +++ b/paddle/fluid/inference/analysis/pass_manager_tester.cc @@ -34,28 +34,6 @@ class TestDfgPassManager final : public DfgPassManager { std::string description() const override { return "test doc"; } }; -class TestNodePassManager final : public NodePassManager { - public: - virtual ~TestNodePassManager() = default; - - std::string repr() const override { return "test-node-pass-manager"; } - std::string description() const override { return "test doc"; } -}; - -class TestNodePass final : public NodePass { - public: - virtual ~TestNodePass() = default; - - bool Initialize(Argument* argument) override { return true; } - - void Run(Node* node) override { - LOG(INFO) << "- Processing node " << node->repr(); - } - - std::string repr() const override { return "test-node"; } - std::string description() const override { return "some doc"; } -}; - TEST(PassManager, DFG_pass_manager) { TestDfgPassManager manager; DFG_GraphvizDrawPass::Config config("./", "dfg.dot"); @@ -71,19 +49,6 @@ TEST(PassManager, DFG_pass_manager) { manager.RunAll(); } -TEST(PassManager, Node_pass_manager) { - Argument argument(FLAGS_inference_model_dir); - // Pre-process: initialize the DFG with the ProgramDesc first. - FluidToDataFlowGraphPass pass0; - pass0.Initialize(&argument); - pass0.Run(argument.main_dfg.get()); - - TestNodePassManager manager; - manager.Register("test-node-pass", new TestNodePass); - ASSERT_TRUE(manager.Initialize(&argument)); - manager.RunAll(); -} - } // namespace analysis } // namespace inference } // namespace paddle diff --git a/paddle/fluid/inference/analysis/tensorrt_subgraph_node_mark_pass.cc b/paddle/fluid/inference/analysis/tensorrt_subgraph_node_mark_pass.cc index 9f51fafe0b2a66f9d062a6b751fe7a3bc662ce7c..174c8513f92cf869419f04cab5a54af65e9673b8 100644 --- a/paddle/fluid/inference/analysis/tensorrt_subgraph_node_mark_pass.cc +++ b/paddle/fluid/inference/analysis/tensorrt_subgraph_node_mark_pass.cc @@ -68,7 +68,7 @@ class DfgDebuggerPass : public DFG_GraphvizDrawPass { } }; -Pass *TensorRTSubgraphNodeMarkPass::CreateGraphvizDebugerPass() const { +AnalysisPass *TensorRTSubgraphNodeMarkPass::CreateGraphvizDebugerPass() const { DFG_GraphvizDrawPass::Config config(FLAGS_IA_graphviz_log_root, "tensorrt_marked_node"); return new DfgDebuggerPass(config); diff --git a/paddle/fluid/inference/analysis/tensorrt_subgraph_node_mark_pass.h b/paddle/fluid/inference/analysis/tensorrt_subgraph_node_mark_pass.h index c558a6ebbde371071c7330a14cc986bf764d1773..c881a54c240538b68abdcb9060db69de3bf2b8bb 100644 --- a/paddle/fluid/inference/analysis/tensorrt_subgraph_node_mark_pass.h +++ b/paddle/fluid/inference/analysis/tensorrt_subgraph_node_mark_pass.h @@ -20,7 +20,7 @@ #pragma once #include -#include "paddle/fluid/inference/analysis/pass.h" +#include "paddle/fluid/inference/analysis/analysis_pass.h" #include "paddle/fluid/inference/analysis/subgraph_splitter.h" namespace paddle { @@ -48,7 +48,7 @@ class TensorRTSubgraphNodeMarkPass : public DataFlowGraphPass { return "tensorrt sub-graph mark pass"; } - Pass* CreateGraphvizDebugerPass() const override; + AnalysisPass* CreateGraphvizDebugerPass() const override; bool Finalize() override; private: diff --git a/paddle/fluid/inference/analysis/tensorrt_subgraph_pass.h b/paddle/fluid/inference/analysis/tensorrt_subgraph_pass.h index c6741a92095d33d261a4e1667c87a8ca02e51a9f..219e3f5470f627e81005aabf94f9c72c33fd2eed 100644 --- a/paddle/fluid/inference/analysis/tensorrt_subgraph_pass.h +++ b/paddle/fluid/inference/analysis/tensorrt_subgraph_pass.h @@ -15,8 +15,8 @@ limitations under the License. */ #pragma once #include +#include "paddle/fluid/inference/analysis/analysis_pass.h" #include "paddle/fluid/inference/analysis/node.h" -#include "paddle/fluid/inference/analysis/pass.h" #include "paddle/fluid/inference/analysis/subgraph_splitter.h" namespace paddle { diff --git a/paddle/fluid/inference/api/CMakeLists.txt b/paddle/fluid/inference/api/CMakeLists.txt index ea00bf364951b0a4304b380df492d00e84451136..6b8278a0395c9ae71e32337d9735409de7ba0c96 100644 --- a/paddle/fluid/inference/api/CMakeLists.txt +++ b/paddle/fluid/inference/api/CMakeLists.txt @@ -44,19 +44,7 @@ function(inference_api_test TARGET_NAME) endfunction(inference_api_test) cc_library(paddle_inference_api SRCS api.cc api_impl.cc helper.cc DEPS lod_tensor) -cc_library(analysis_predictor SRCS analysis_predictor.cc DEPS paddle_inference_api - analysis - ir_pass_manager - pass - fc_fuse_pass - fc_lstm_fuse_pass - seq_concat_fc_fuse_pass - graph_viz_pass - infer_clean_graph_pass - graph_pattern_detector - infer_clean_graph_pass - attention_lstm_fuse_pass - ) +cc_library(analysis_predictor SRCS analysis_predictor.cc DEPS paddle_inference_api analysis) cc_test(test_paddle_inference_api SRCS api_tester.cc diff --git a/paddle/fluid/operators/fusion_gru_op.cc b/paddle/fluid/operators/fusion_gru_op.cc index 582c75872ab2818cdf834f9a46278db1d6f91d54..916f84cb4a78c3721cb67bd3cf8d3759a8eaf1bf 100644 --- a/paddle/fluid/operators/fusion_gru_op.cc +++ b/paddle/fluid/operators/fusion_gru_op.cc @@ -30,14 +30,7 @@ void FusionGRUOp::InferShape(framework::InferShapeContext* ctx) const { "Input(WeightX) of GRU should not be null."); PADDLE_ENFORCE(ctx->HasInput("WeightH"), "Input(WeightH) of GRU should not be null."); - PADDLE_ENFORCE(ctx->HasOutput("XX"), "Output(XX) of GRU should not be null."); - PADDLE_ENFORCE(ctx->HasOutput("ReorderedH0"), - "Output(ReorderedH0) of GRU should not be null."); - PADDLE_ENFORCE(ctx->HasOutput("BatchedInput"), - "Output(BatchedInput) of GRU should not be null."); - PADDLE_ENFORCE(ctx->HasOutput("BatchedOut"), - "Output(BatchedOut) of GRU should not be null."); PADDLE_ENFORCE(ctx->HasOutput("Hidden"), "Output(Hidden) of GRU should not be null."); @@ -80,15 +73,20 @@ void FusionGRUOp::InferShape(framework::InferShapeContext* ctx) const { } framework::DDim out_dims({x_dims[0], frame_size}); ctx->SetOutputDim("Hidden", out_dims); - ctx->SetOutputDim("BatchedInput", {x_dims[0], wx_dims[1]}); - ctx->SetOutputDim("BatchedOut", out_dims); ctx->ShareLoD("X", "Hidden"); - int xx_width; if (ctx->Attrs().Get("use_seq")) { xx_width = wx_dims[1]; } else { xx_width = x_dims[1] > wx_dims[1] ? wx_dims[1] : x_dims[1]; + PADDLE_ENFORCE(ctx->HasOutput("ReorderedH0"), + "Output(ReorderedH0) of GRU should not be null."); + PADDLE_ENFORCE(ctx->HasOutput("BatchedInput"), + "Output(BatchedInput) of GRU should not be null."); + PADDLE_ENFORCE(ctx->HasOutput("BatchedOut"), + "Output(BatchedOut) of GRU should not be null."); + ctx->SetOutputDim("BatchedInput", {x_dims[0], wx_dims[1]}); + ctx->SetOutputDim("BatchedOut", out_dims); } ctx->SetOutputDim("XX", {x_dims[0], xx_width}); ctx->ShareLoD("X", "XX"); diff --git a/paddle/fluid/operators/fusion_lstm_op.cc b/paddle/fluid/operators/fusion_lstm_op.cc index 104e160e2d7069ec247cc51e927ce8824f1b69e8..ef23ab3f981786d33567619ad0194d21f31bdc8e 100644 --- a/paddle/fluid/operators/fusion_lstm_op.cc +++ b/paddle/fluid/operators/fusion_lstm_op.cc @@ -38,16 +38,6 @@ void FusionLSTMOp::InferShape(framework::InferShapeContext* ctx) const { "Output(Hidden) of LSTM should not be null."); PADDLE_ENFORCE(ctx->HasOutput("Cell"), "Output(Cell) of LSTM should not be null."); - PADDLE_ENFORCE(ctx->HasOutput("BatchedInput"), - "Output(BatchedInput) of LSTM should not be null."); - PADDLE_ENFORCE(ctx->HasOutput("BatchedHidden"), - "Output(BatchedHidden) of LSTM should not be null."); - PADDLE_ENFORCE(ctx->HasOutput("BatchedCell"), - "Output(BatchedCell) of LSTM should not be null."); - PADDLE_ENFORCE(ctx->HasOutput("ReorderedH0"), - "Output(ReorderedH0) of LSTM should not be null."); - PADDLE_ENFORCE(ctx->HasOutput("ReorderedC0"), - "Output(ReorderedC0) of LSTM should not be null."); auto x_dims = ctx->GetInputDim("X"); PADDLE_ENFORCE_EQ(x_dims.size(), 2, "Input(X)'s rank must be 2."); @@ -88,28 +78,36 @@ void FusionLSTMOp::InferShape(framework::InferShapeContext* ctx) const { PADDLE_ENFORCE_EQ(b_dims.size(), 2, "The rank of Input(Bias) should be 2."); PADDLE_ENFORCE_EQ(b_dims[0], 1, "The first dimension of Input(Bias) should be 1."); - - auto use_peepholes = ctx->Attrs().Get("use_peepholes"); - PADDLE_ENFORCE_EQ(b_dims[1], (use_peepholes ? 7 : 4) * frame_size, - "The second dimension of Input(Bias) should be " - "7 * %d if enable peepholes connection or" - "4 * %d if disable peepholes", - frame_size, frame_size); + PADDLE_ENFORCE_EQ( + b_dims[1], (ctx->Attrs().Get("use_peepholes") ? 7 : 4) * frame_size, + "The second dimension of Input(Bias) should be " + "7 * %d if enable peepholes connection or" + "4 * %d if disable peepholes", + frame_size, frame_size); framework::DDim out_dims({x_dims[0], frame_size}); ctx->SetOutputDim("Hidden", out_dims); ctx->SetOutputDim("Cell", out_dims); - ctx->SetOutputDim("BatchedInput", {x_dims[0], wx_dims[1]}); - ctx->SetOutputDim("BatchedHidden", out_dims); - ctx->SetOutputDim("BatchedCell", out_dims); ctx->ShareLoD("X", "Hidden"); ctx->ShareLoD("X", "Cell"); - int xx_width; if (ctx->Attrs().Get("use_seq")) { xx_width = wx_dims[1]; } else { xx_width = x_dims[1] > wx_dims[1] ? wx_dims[1] : x_dims[1]; + PADDLE_ENFORCE(ctx->HasOutput("BatchedInput"), + "Output(BatchedInput) of LSTM should not be null."); + PADDLE_ENFORCE(ctx->HasOutput("BatchedHidden"), + "Output(BatchedHidden) of LSTM should not be null."); + PADDLE_ENFORCE(ctx->HasOutput("BatchedCell"), + "Output(BatchedCell) of LSTM should not be null."); + PADDLE_ENFORCE(ctx->HasOutput("ReorderedH0"), + "Output(ReorderedH0) of LSTM should not be null."); + PADDLE_ENFORCE(ctx->HasOutput("ReorderedC0"), + "Output(ReorderedC0) of LSTM should not be null."); + ctx->SetOutputDim("BatchedInput", {x_dims[0], wx_dims[1]}); + ctx->SetOutputDim("BatchedHidden", out_dims); + ctx->SetOutputDim("BatchedCell", out_dims); } ctx->SetOutputDim("XX", {x_dims[0], xx_width}); ctx->ShareLoD("X", "XX"); @@ -232,18 +230,18 @@ class FuisonLSTMKernel : public framework::OpKernel { act_cand = act_functor(act_cand_str); \ } -#define INIT_BASE_INPUT_OUTPUT \ - auto* x = ctx.Input("X"); \ - auto* h0 = ctx.Input("H0"); \ - auto* c0 = ctx.Input("C0"); \ - auto* wx = ctx.Input("WeightX"); \ - auto* wh = ctx.Input("WeightH"); \ - auto* bias = ctx.Input("Bias"); \ - auto* xx = ctx.Output("XX"); \ - auto* hidden_out = ctx.Output("Hidden"); \ - auto* cell_out = ctx.Output("Cell"); \ - bool use_peepholes = ctx.Attr("use_peepholes"); \ - bool is_reverse = ctx.Attr("is_reverse"); +#define INIT_BASE_INPUT_OUTPUT \ + auto* x = ctx.Input("X"); \ + auto* h0 = ctx.Input("H0"); \ + auto* c0 = ctx.Input("C0"); \ + auto* wx = ctx.Input("WeightX"); \ + auto* wh = ctx.Input("WeightH"); \ + auto* bias = ctx.Input("Bias"); \ + auto* xx = ctx.Output("XX"); \ + auto* hidden_out = ctx.Output("Hidden"); \ + auto* cell_out = ctx.Output("Cell"); \ + bool is_reverse = ctx.Attr("is_reverse"); \ + bool use_peepholes = ctx.Attr("use_peepholes"); #define INIT_BASE_SIZES \ auto x_dims = x->dims(); /* T x M*/ \ @@ -254,172 +252,183 @@ class FuisonLSTMKernel : public framework::OpKernel { const int D3 = D * 3; \ const int D4 = wh_dims[1]; +#define INIT_BASE_INPUT_DATAS \ + const T* x_data = x->data(); \ + const T* wx_data = wx->data(); \ + const T* wh_data = wh->data(); \ + /* diagonal weight*/ \ + const T* wc_data = bias->data() + D4; \ + /* for peephole only*/ \ + Tensor checked_cell; \ + T* checked_cell_data = nullptr; \ + auto place = ctx.GetPlace(); \ + if (use_peepholes) { \ + /* w_ic * Ct-1, w_fc * Ct-1 ; w_oc * Ct => ih*/ \ + checked_cell_data = checked_cell.mutable_data({2, D}, place); \ + } + +/// Compute LSTM +#define GEMM_WH_ADDON(bs, prev, out) \ + blas.GEMM(CblasNoTrans, CblasNoTrans, bs, D4, D, static_cast(1), prev, D, \ + wh_data, D4, static_cast(1), out, D4) + +// gates: W_ch, W_ih, W_fh, W_oh +#define GET_Ct(ct_1, gates, ct) \ + /* C_t = C_t-1 * fgated + cand_gated * igated*/ \ + act_cand(D, gates, gates); \ + blas.VMUL(D, gates, gates + D, gates + D); \ + blas.VMUL(D, ct_1, gates + D2, gates + D2); \ + blas.VADD(D, gates + D, gates + D2, ct) + +#define GET_Ht(ct, gates, ht) \ + /* H_t = act_cell(C_t) * ogated */ \ + act_cell(D, ct, gates + D2); \ + blas.VMUL(D, gates + D2, gates + D3, ht) + +#define GET_Ct_NOH0C0(gates, ct) \ + /* C_t = igated * cgated*/ \ + act_gate(D, gates + D, gates + D); \ + act_cand(D, gates, gates); \ + blas.VMUL(D, gates, gates + D, ct) + +#define COMPUTE_CtHt_NOH0C0(gates, ct, ht) \ + GET_Ct_NOH0C0(gates, ct); \ + act_gate(D, gates + D3, gates + D3); \ + GET_Ht(ct, gates, ht) + +#define COMPUTE_CtHt_PEEPHOLE_NOH0C0(gates, ct, ht) \ + GET_Ct_NOH0C0(gates, ct); \ + /* get outgated, put W_oc * C_t on igated */ \ + blas.VMUL(D, wc_data + D2, ct, gates + D); \ + blas.VADD(D, gates + D, gates + D3, gates + D3); \ + act_gate(D, gates + D3, gates + D3); \ + GET_Ht(ct, gates, ht) + +#define COMPUTE_CtHt(gates, ct_1, ct, ht) \ + act_gate(D3, gates + D, gates + D); \ + GET_Ct(ct_1, gates, ct); \ + GET_Ht(ct, gates, ht) + +#define COMPUTE_CtHt_PEEPHOLE(gates, ct_1, ct, ht) \ + /* get fgated and igated*/ \ + blas.VMUL(D, wc_data, ct_1, checked_cell_data); \ + blas.VMUL(D, wc_data + D, ct_1, checked_cell_data + D); \ + blas.VADD(D2, checked_cell_data, gates + D, gates + D); \ + act_gate(D2, gates + D, gates + D); \ + GET_Ct(ct_1, gates, ct); \ + /* get ogated*/ \ + blas.VMUL(D, wc_data + D2, ct, gates + D); \ + blas.VADD(D, gates + D, gates + D3, gates + D3); \ + act_gate(D, gates + D3, gates + D3); \ + GET_Ht(ct, gates, ht) + void SeqCompute(const framework::ExecutionContext& ctx) const { using DeviceContext = paddle::platform::CPUDeviceContext; INIT_BASE_INPUT_OUTPUT INIT_BASE_SIZES INIT_VEC_FUNC + INIT_BASE_INPUT_DATAS auto x_lod = x->lod(); const int total_T = x_dims[0]; - const int N = x_lod[0].size() - 1; // batch size - - const T* x_data = x->data(); + const int N = x_lod[0].size() - 1; const T* h0_data = h0 ? h0->data() : nullptr; const T* c0_data = c0 ? c0->data() : nullptr; - const T* bias_data = bias->data(); - const T* wc_data = bias_data + D4; // w_ic, w_fc, w_oc - const T* wx_data = wx->data(); - const T* wh_data = wh->data(); - - T* xx_data = xx->mutable_data(ctx.GetPlace()); - T* hidden_out_data = hidden_out->mutable_data(ctx.GetPlace()); - T* cell_out_data = cell_out->mutable_data(ctx.GetPlace()); - - // use local variable - framework::DDim check_dims({3, D}); - Tensor checked_cell; // w_ic * Ct-1, w_fc * Ct-1, w_oc * Ct - auto checked_cell_data = - checked_cell.mutable_data(check_dims, ctx.GetPlace()); - + T* xx_data = xx->mutable_data(place); + T* h_out_data = hidden_out->mutable_data(place); + T* c_out_data = cell_out->mutable_data(place); auto blas = math::GetBlas(ctx); math::FCCompute(blas, total_T, D4, M, x_data, wx_data, xx_data, bias->data()); + int xx_offset = D4; int gate_offset = D; if (is_reverse) { const int offset = (total_T - 1) * D; xx_data = xx_data + offset * 4; - hidden_out_data = hidden_out_data + offset; - cell_out_data = cell_out_data + offset; + h_out_data = h_out_data + offset; + c_out_data = c_out_data + offset; xx_offset = -D4; gate_offset = -D; } - auto move_step = [&]() { - xx_data = xx_data + xx_offset; - hidden_out_data = hidden_out_data + gate_offset; - cell_out_data = cell_out_data + gate_offset; - }; - - for (int i = 0; i < N; ++i) { - int bid = is_reverse ? N - 1 - i : i; - int seq_len = x_lod[0][bid + 1] - x_lod[0][bid]; - const T* prev_c_data = nullptr; - const T* prev_h_data = nullptr; - - int tstart = 0; - if (h0_data) { - prev_h_data = h0_data + bid * D; - prev_c_data = c0_data + bid * D; - } else { - // If step == 0 and there is no initialized hidden state, that is to say - // the H0 is zeros. Then W_h * H_t-1 can be skipped - - // ~C_t - act_cand(D, xx_data, xx_data); - if (use_peepholes) { - // I_t, F_t - act_gate(D2, xx_data + D, xx_data + D); - } else { - // I_t, F_t, O_t - act_gate(D3, xx_data + D, xx_data + D); - } - // C_t = I_t * ~C_t - blas.VMUL(D, xx_data, xx_data + D, cell_out_data); - - if (use_peepholes) { - // + W_oc * C_t for peephole connection - blas.VMUL(D, wc_data + D2, cell_out_data, checked_cell_data + D2); - blas.VADD(D, xx_data + D3, checked_cell_data + D2, xx_data + D3); - // O_t - act_gate(D, xx_data + D3, xx_data + D3); - } - - // hidden out= act_state(cellout) * outgate - act_cell(D, cell_out_data, xx_data + D2); - // H_t = O_t * act_state(C_t) - blas.VMUL(D, xx_data + D2, xx_data + D3, hidden_out_data); - - // prev - prev_h_data = hidden_out_data; - prev_c_data = cell_out_data; - - tstart = 1; - move_step(); - } - - for (int step = tstart; step < seq_len; ++step) { - // + W_h * H_t-1 - blas.GEMM(CblasNoTrans, CblasNoTrans, 1, D4, D, static_cast(1), - prev_h_data, D, wh_data, D4, static_cast(1), xx_data, D4); +#define MOVE_ONE_STEP \ + prev_h_data = h_out_data; \ + prev_c_data = c_out_data; \ + xx_data = xx_data + xx_offset; \ + h_out_data = h_out_data + gate_offset; \ + c_out_data = c_out_data + gate_offset + +#define PROCESS_H0C0_DEFINES \ + int bid = is_reverse ? N - 1 - i : i; \ + int seq_len = x_lod[0][bid + 1] - x_lod[0][bid]; \ + const T* prev_c_data = nullptr; \ + const T* prev_h_data = nullptr; \ + int tstart = 0 + +#define PROCESS_H0C0_PEEPHOLE \ + PROCESS_H0C0_DEFINES; \ + if (h0_data) { \ + prev_h_data = h0_data + bid * D; \ + prev_c_data = c0_data + bid * D; \ + } else { \ + COMPUTE_CtHt_PEEPHOLE_NOH0C0(xx_data, c_out_data, h_out_data); \ + MOVE_ONE_STEP; \ + tstart = 1; \ + } - // ~C_t - act_cand(D, xx_data, xx_data); +#define PROCESS_H0C0 \ + PROCESS_H0C0_DEFINES; \ + if (h0_data) { \ + prev_h_data = h0_data + bid * D; \ + prev_c_data = c0_data + bid * D; \ + } else { \ + COMPUTE_CtHt_NOH0C0(xx_data, c_out_data, h_out_data); \ + MOVE_ONE_STEP; \ + tstart = 1; \ + } - if (use_peepholes) { - // + W_ic|W_fc * C_t-1 for peephole connection - blas.VMUL(D, wc_data, prev_c_data, checked_cell_data); - blas.VMUL(D, wc_data + D, prev_c_data, checked_cell_data + D); - blas.VADD(D2, xx_data + D, checked_cell_data, xx_data + D); - // I_t, F_t - act_gate(D2, xx_data + D, xx_data + D); - } else { - // I_t, F_t, O_t - act_gate(D3, xx_data + D, xx_data + D); + if (use_peepholes) { + for (int i = 0; i < N; ++i) { + PROCESS_H0C0_PEEPHOLE + for (int step = tstart; step < seq_len; ++step) { + GEMM_WH_ADDON(1, prev_h_data, xx_data); + COMPUTE_CtHt_PEEPHOLE(xx_data, prev_c_data, c_out_data, h_out_data); + MOVE_ONE_STEP; } - - // F_t * C_t-1 - blas.VMUL(D, xx_data + D2, prev_c_data, xx_data + D2); - // I_t * ~C_t - blas.VMUL(D, xx_data, xx_data + D, xx_data + D); - // C_t = F_t * C_t-1 + I_t * ~C_t - blas.VADD(D, xx_data + D, xx_data + D2, cell_out_data); - - if (use_peepholes) { - // + W_oc * C_t for peephole connection - blas.VMUL(D, wc_data + D2, cell_out_data, checked_cell_data + D2); - blas.VADD(D, xx_data + D3, checked_cell_data + D2, xx_data + D3); - // O_t - act_gate(D, xx_data + D3, xx_data + D3); + } + } else { + for (int i = 0; i < N; ++i) { + PROCESS_H0C0 + for (int step = tstart; step < seq_len; ++step) { + GEMM_WH_ADDON(1, prev_h_data, xx_data); + COMPUTE_CtHt(xx_data, prev_c_data, c_out_data, h_out_data); + MOVE_ONE_STEP; } - - // hidden out= act_state(cellout) * outgate - act_cell(D, cell_out_data, xx_data + D2); - // H_t = O_t * act_state(C_t) - blas.VMUL(D, xx_data + D2, xx_data + D3, hidden_out_data); - - // prev - prev_h_data = hidden_out_data; - prev_c_data = cell_out_data; - - move_step(); - } // for each step in batch - } // for each batch + } + } +#undef PROCESS_H0C0_DEFINES +#undef PROCESS_H0C0_PEEPHOLE +#undef PROCESS_H0C0 +#undef MOVE_ONE_STEP } void BatchCompute(const framework::ExecutionContext& ctx) const { using DeviceContext = platform::CPUDeviceContext; INIT_BASE_INPUT_OUTPUT - if (x->lod()[0].size() == 2) { // batch size == 1 + if (x->lod()[0].size() == 2) { SeqCompute(ctx); return; } INIT_BASE_SIZES INIT_VEC_FUNC + INIT_BASE_INPUT_DATAS auto* reordered_h0 = ctx.Output("ReorderedH0"); auto* reordered_c0 = ctx.Output("ReorderedC0"); auto* batched_input = ctx.Output("BatchedInput"); auto* batched_c_out = ctx.Output("BatchedCell"); auto* batched_h_out = ctx.Output("BatchedHidden"); - - const T* x_data = x->data(); - const T* wx_data = wx->data(); - const T* wh_data = wh->data(); - const T* bias_data = bias->data(); - const T* wc_data = bias_data + D4; // w_ic, w_fc, w_oc - auto place = ctx.GetPlace(); T* xx_data = xx->mutable_data(place); T* batched_input_data = batched_input->mutable_data(place); T* batched_c_out_data = batched_c_out->mutable_data(place); @@ -427,12 +436,6 @@ class FuisonLSTMKernel : public framework::OpKernel { hidden_out->mutable_data(place); cell_out->mutable_data(place); - // use local variable - framework::DDim check_dims({3, D}); - Tensor checked_cell; // w_ic * Ct-1, w_fc * Ct-1, w_oc * Ct - auto checked_cell_data = - checked_cell.mutable_data(check_dims, ctx.GetPlace()); - math::LoDTensor2BatchFunctor to_batch; auto& dev_ctx = ctx.template device_context(); auto blas = math::GetBlas(dev_ctx); @@ -454,27 +457,17 @@ class FuisonLSTMKernel : public framework::OpKernel { reordered_h0->Resize({max_bs, D}); reordered_c0->Resize({max_bs, D}); - T* prev_batch_h_data = nullptr; - T* prev_batch_c_data = nullptr; - T* cur_batch_in_data = batched_input_data; - T* cur_batch_h_out_data = batched_h_out_data; - T* cur_batch_c_out_data = batched_c_out_data; - - auto move_step = [&](int bs) { - cur_batch_in_data += bs * D4; - cur_batch_c_out_data += bs * D; - cur_batch_h_out_data += bs * D; - }; - int tstart = 0; + T* prev_h_data = nullptr; + T* prev_c_data = nullptr; if (h0) { // reorder h0, c0 T* reordered_h0_data = reordered_h0->mutable_data(place); T* reordered_c0_data = reordered_c0->mutable_data(place); const T* h0_data = h0->data(); const T* c0_data = c0->data(); - prev_batch_h_data = reordered_h0_data; - prev_batch_c_data = reordered_c0_data; + prev_h_data = reordered_h0_data; + prev_c_data = reordered_c0_data; size_t sz = sizeof(T) * D; for (int i = 0; i < max_bs; ++i) { std::memcpy(reordered_h0_data, h0_data + seq_order[i] * D, sz); @@ -483,123 +476,80 @@ class FuisonLSTMKernel : public framework::OpKernel { reordered_c0_data += D; } } else { - // Compute with no H0/C0 - T* cur_in_data = cur_batch_in_data; - T* cur_c_out_data = cur_batch_c_out_data; - T* cur_h_out_data = cur_batch_h_out_data; - - // If step == 0 and there is no initialized hidden state, that is to say - // the H0 is zeros. Then W_h * H_t-1 can be skiped - - for (int i = 0; i < max_bs; ++i) { // iterate each data in 1st batch - // ~C_t - act_cand(D, cur_in_data, cur_in_data); - - if (use_peepholes) { - // I_t, F_t - act_gate(D2, cur_in_data + D, cur_in_data + D); - } else { - // I_t, F_t, O_t - act_gate(D3, cur_in_data + D, cur_in_data + D); - } - - // C_t = I_t * ~C_t - blas.VMUL(D, cur_in_data, cur_in_data + D, cur_c_out_data); - + // compute without h0, c0 + T* cur_in_data = batched_input_data; + T* cur_h_out_data = batched_h_out_data; + T* cur_c_out_data = batched_c_out_data; + for (int i = 0; i < max_bs; ++i) { + GET_Ct_NOH0C0(cur_in_data, cur_c_out_data); if (use_peepholes) { - // + W_oc * C_t for peephole connection - blas.VMUL(D, wc_data + D2, cur_c_out_data, checked_cell_data + D2); - blas.VADD(D, cur_in_data + D3, checked_cell_data + D2, - cur_in_data + D3); - // O_t - act_gate(D, cur_in_data + D3, cur_in_data + D3); + blas.VMUL(D, wc_data + D2, cur_c_out_data, cur_in_data + D); + blas.VADD(D, cur_in_data + D, cur_in_data + D3, cur_in_data + D3); } - - // hidden out= act_state(cellout) * outgate - act_cell(D, cur_c_out_data, cur_in_data + D2); - // H_t = O_t * act_state(C_t) - blas.VMUL(D, cur_in_data + D2, cur_in_data + D3, cur_h_out_data); - - // move to next data in the same batch + act_gate(D, cur_in_data + D3, cur_in_data + D3); + GET_Ht(cur_c_out_data, cur_in_data, cur_h_out_data); cur_in_data += D4; cur_c_out_data += D; cur_h_out_data += D; } - - // move to data for next timestep - prev_batch_h_data = cur_batch_h_out_data; - prev_batch_c_data = cur_batch_c_out_data; - move_step(max_bs); tstart = 1; + prev_h_data = batched_h_out_data; + prev_c_data = batched_c_out_data; } - const auto& batch_starts = batched_lod[0]; const int max_seq_len = batch_starts.size() - 1; - for (int step = tstart; step < max_seq_len; ++step) { - const int cur_bs = batch_starts[step + 1] - batch_starts[step]; - // + W_h * H_t-1 - blas.GEMM(CblasNoTrans, CblasNoTrans, cur_bs, D4, D, static_cast(1), - prev_batch_h_data, D, wh_data, D4, static_cast(1), - cur_batch_in_data, D4); - - T* cur_in_data = cur_batch_in_data; - T* cur_c_out_data = cur_batch_c_out_data; - T* cur_h_out_data = cur_batch_h_out_data; - T* prev_c_data = prev_batch_c_data; // NULL if no C0 in step0 - T* prev_h_data = prev_batch_h_data; // NULL if no H0 in step0 - auto next_data_in_batch = [&]() { - cur_in_data += D4; - cur_c_out_data += D; - cur_h_out_data += D; - prev_c_data = prev_c_data ? prev_c_data + D : nullptr; - prev_h_data = prev_h_data ? prev_h_data + D : nullptr; - }; - - for (int i = 0; i < cur_bs; ++i) { // iterate each data in same batch - // ~C_t - act_cand(D, cur_in_data, cur_in_data); - - if (use_peepholes) { - // + W_ic|W_fc * C_t-1 for peephole connection - blas.VMUL(D, wc_data, prev_c_data, checked_cell_data); - blas.VMUL(D, wc_data + D, prev_c_data, checked_cell_data + D); - blas.VADD(D2, cur_in_data + D, checked_cell_data, cur_in_data + D); - // I_t, F_t - act_gate(D2, cur_in_data + D, cur_in_data + D); - } else { - // I_t, F_t, O_t - act_gate(D3, cur_in_data + D, cur_in_data + D); + const int offset = tstart * max_bs * D; + batched_input_data = batched_input_data + offset * 4; + batched_h_out_data = batched_h_out_data + offset; + batched_c_out_data = batched_c_out_data + offset; + +#define DEFINE_CUR \ + T* cur_in_data = batched_input_data; \ + T* cur_prev_c_data = prev_c_data; \ + T* cur_c_out_data = batched_c_out_data; \ + T* cur_h_out_data = batched_h_out_data + +#define MOVE_ONE_BATCH \ + cur_in_data += D4; \ + cur_prev_c_data += D; \ + cur_c_out_data += D; \ + cur_h_out_data += D + +#define MOVE_ONE_STEP \ + prev_c_data = batched_c_out_data; \ + prev_h_data = batched_h_out_data; \ + batched_c_out_data = cur_c_out_data; \ + batched_h_out_data = cur_h_out_data; \ + batched_input_data = cur_in_data + + if (use_peepholes) { + for (int step = tstart; step < max_seq_len; ++step) { + const int cur_bs = batch_starts[step + 1] - batch_starts[step]; + GEMM_WH_ADDON(cur_bs, prev_h_data, batched_input_data); + DEFINE_CUR; + for (int i = 0; i < cur_bs; ++i) { + COMPUTE_CtHt_PEEPHOLE(cur_in_data, cur_prev_c_data, cur_c_out_data, + cur_h_out_data); + MOVE_ONE_BATCH; } - - // F_t * C_t-1 - blas.VMUL(D, cur_in_data + D2, prev_c_data, cur_in_data + D2); - // I_t * ~C_t - blas.VMUL(D, cur_in_data, cur_in_data + D, cur_in_data + D); - // C_t = F_t * C_t-1 + I_t * ~C_t - blas.VADD(D, cur_in_data + D, cur_in_data + D2, cur_c_out_data); - - if (use_peepholes) { - // + W_oc * C_t for peephole connection - blas.VMUL(D, wc_data + D2, cur_c_out_data, checked_cell_data + D2); - blas.VADD(D, cur_in_data + D3, checked_cell_data + D2, - cur_in_data + D3); - // O_t - act_gate(D, cur_in_data + D3, cur_in_data + D3); + MOVE_ONE_STEP; + } + } else { + for (int step = tstart; step < max_seq_len; ++step) { + const int cur_bs = batch_starts[step + 1] - batch_starts[step]; + GEMM_WH_ADDON(cur_bs, prev_h_data, batched_input_data); + DEFINE_CUR; + for (int i = 0; i < cur_bs; ++i) { + COMPUTE_CtHt(cur_in_data, cur_prev_c_data, cur_c_out_data, + cur_h_out_data); + MOVE_ONE_BATCH; } - - // hidden out= act_state(cellout) * outgate - act_cell(D, cur_c_out_data, cur_in_data + D2); - // H_t = O_t * act_state(C_t) - blas.VMUL(D, cur_in_data + D2, cur_in_data + D3, cur_h_out_data); - - // move to next data in same batch - next_data_in_batch(); + MOVE_ONE_STEP; } - // move to data for next timestep - prev_batch_h_data = cur_batch_h_out_data; - prev_batch_c_data = cur_batch_c_out_data; - move_step(cur_bs); } +#undef MOVE_ONE_STEP +#undef MOVE_ONE_BATCH +#undef DEFINE_CUR math::Batch2LoDTensorFunctor to_seq; batched_h_out->set_lod(batched_lod); @@ -615,6 +565,16 @@ class FuisonLSTMKernel : public framework::OpKernel { BatchCompute(ctx); } } + +#undef COMPUTE_CtHt_PEEPHOLE +#undef COMPUTE_CtHt +#undef GET_Ct_NOH0C0 +#undef COMPUTE_CtHt_NOH0C0 +#undef COMPUTE_CtHt_PEEPHOLE_NOH0C0 +#undef GET_Ht +#undef GET_Ct +#undef GEMM_WH_ADDON +#undef INIT_BASE_INPUT_DATAS #undef INIT_BASE_SIZES #undef INIT_BASE_INPUT_OUTPUT #undef INIT_VEC_FUNC diff --git a/paddle/fluid/platform/dynload/dynamic_loader.cc b/paddle/fluid/platform/dynload/dynamic_loader.cc index 4fbfa6354ab45fed4839227a2a4be8fe147e5fd9..6a3ad2151081504fda2a3818c5f99ad47039d91d 100644 --- a/paddle/fluid/platform/dynload/dynamic_loader.cc +++ b/paddle/fluid/platform/dynload/dynamic_loader.cc @@ -121,6 +121,12 @@ static inline void* GetDsoHandleFromSearchPath(const std::string& search_root, if (nullptr == dso_handle) { LOG(WARNING) << "Failed to find dynamic library: " << dlPath << " (" << dlerror() << ")"; + if (dlPath.find("nccl") != std::string::npos) { + std::cout + << "You may need to install 'nccl2' from NVIDIA official website: " + << "https://developer.nvidia.com/nccl/nccl-download" + << "before install PaddlePaddle" << std::endl; + } dlPath = dso_name; dso_handle = GetDsoHandleFromDefaultPath(dlPath, dynload_flags); } diff --git a/paddle/scripts/paddle_build.sh b/paddle/scripts/paddle_build.sh index 2f12411659fab34dc5c2c8fe4b16eba508a05c5c..7481888174b5134a720ecfcaa4996b50f87ca8a2 100755 --- a/paddle/scripts/paddle_build.sh +++ b/paddle/scripts/paddle_build.sh @@ -500,7 +500,7 @@ EOF EOF if [[ ${WITH_GPU} == "ON" ]]; then - NCCL_DEPS="apt-get install -y --allow-downgrades libnccl2=2.1.2-1+cuda${CUDA_MAJOR} libnccl-dev=2.1.2-1+cuda${CUDA_MAJOR} &&" + NCCL_DEPS="apt-get install -y --allow-downgrades libnccl2=2.2.13-1+cuda${CUDA_MAJOR} libnccl-dev=2.2.13-1+cuda${CUDA_MAJOR} &&" else NCCL_DEPS="" fi diff --git a/python/paddle/dataset/image.py b/python/paddle/dataset/image.py index 920dbf3b4ebb0bc3d98c9ea986d7d039deed4a4c..19fc229e6fa84792f58aeeb00be09eb2401b19c7 100644 --- a/python/paddle/dataset/image.py +++ b/python/paddle/dataset/image.py @@ -104,7 +104,7 @@ def batch_images_from_tar(data_file, pickle.dump( output, open('%s/batch_%d' % (out_path, file_id), 'wb'), - protocol=pickle.HIGHEST_PROTOCOL) + protocol=2) file_id += 1 data = [] labels = [] @@ -113,9 +113,7 @@ def batch_images_from_tar(data_file, output['label'] = labels output['data'] = data pickle.dump( - output, - open('%s/batch_%d' % (out_path, file_id), 'wb'), - protocol=pickle.HIGHEST_PROTOCOL) + output, open('%s/batch_%d' % (out_path, file_id), 'wb'), protocol=2) with open(meta_file, 'a') as meta: for file in os.listdir(out_path): diff --git a/python/paddle/fluid/tests/book/high-level-api/fit_a_line/test_fit_a_line.py b/python/paddle/fluid/tests/book/high-level-api/fit_a_line/test_fit_a_line.py index f6017a455df7e8bd197ef2563a759f843b5e7c73..e1368a3392a9cab3e82eff0a73eb225a52aa03bf 100644 --- a/python/paddle/fluid/tests/book/high-level-api/fit_a_line/test_fit_a_line.py +++ b/python/paddle/fluid/tests/book/high-level-api/fit_a_line/test_fit_a_line.py @@ -47,14 +47,14 @@ def train_program(): loss = fluid.layers.square_error_cost(input=y_predict, label=y) avg_loss = fluid.layers.mean(loss) - return avg_loss + return [avg_loss, y_predict] def optimizer_func(): return fluid.optimizer.SGD(learning_rate=0.001) -def train(use_cuda, train_program, params_dirname): +def train(use_cuda, train_program, params_dirname, inference_model_dirname): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() trainer = fluid.Trainer( @@ -74,6 +74,8 @@ def train(use_cuda, train_program, params_dirname): ''' if params_dirname is not None: trainer.save_params(params_dirname) + trainer.save_inference_model(inference_model_dirname, + ['x'], [1]) trainer.stop() trainer.train( @@ -99,15 +101,55 @@ def infer(use_cuda, inference_program, params_dirname=None): print("infer results: ", results[0]) +def infer_by_saved_model(use_cuda, save_dirname=None): + if save_dirname is None: + return + + place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() + exe = fluid.Executor(place) + + inference_scope = fluid.core.Scope() + with fluid.scope_guard(inference_scope): + # Use fluid.io.load_inference_model to obtain the inference program desc, + # the feed_target_names (the names of variables that will be feeded + # data using feed operators), and the fetch_targets (variables that + # we want to obtain data from using fetch operators). + [inference_program, feed_target_names, + fetch_targets] = fluid.io.load_inference_model(save_dirname, exe) + + # The input's dimension should be 2-D and the second dim is 13 + # The input data should be >= 0 + batch_size = 10 + + test_reader = paddle.batch( + paddle.dataset.uci_housing.test(), batch_size=batch_size) + + test_data = next(test_reader()) + test_feat = numpy.array( + [data[0] for data in test_data]).astype("float32") + test_label = numpy.array( + [data[1] for data in test_data]).astype("float32") + + assert feed_target_names[0] == 'x' + results = exe.run(inference_program, + feed={feed_target_names[0]: numpy.array(test_feat)}, + fetch_list=fetch_targets) + print("infer shape: ", results[0].shape) + print("infer results: ", results[0]) + print("ground truth: ", test_label) + + def main(use_cuda): if use_cuda and not fluid.core.is_compiled_with_cuda(): return # Directory for saving the trained model - params_dirname = "fit_a_line.inference.model" + params_dirname = "fit_a_line.model" + inference_model_dirname = "fit_a_line.inference_model" - train(use_cuda, train_program, params_dirname) + train(use_cuda, train_program, params_dirname, inference_model_dirname) infer(use_cuda, inference_program, params_dirname) + infer_by_saved_model(use_cuda, inference_model_dirname) class TestFitALine(unittest.TestCase): diff --git a/python/paddle/fluid/tests/unittests/test_dist_base.py b/python/paddle/fluid/tests/unittests/test_dist_base.py index 58875a1dd19fd91f6f2bed928397ee7f73302dff..c0f5da5a1ae43847dff6348ea5f3e3bfd5e89ab9 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_base.py +++ b/python/paddle/fluid/tests/unittests/test_dist_base.py @@ -55,6 +55,7 @@ class TestDistRunnerBase(object): pserver_prog = t.get_pserver_program(args.current_endpoint) startup_prog = t.get_startup_program(args.current_endpoint, pserver_prog) + place = fluid.CPUPlace() exe = fluid.Executor(place) exe.run(startup_prog) @@ -147,6 +148,8 @@ def runtime_main(test_class): import paddle.compat as cpt +import socket +from contextlib import closing class TestDistBase(unittest.TestCase): @@ -156,13 +159,19 @@ class TestDistBase(unittest.TestCase): def setUp(self): self._trainers = 2 self._pservers = 2 - self._ps_endpoints = "127.0.0.1:9123,127.0.0.1:9124" + self._ps_endpoints = "127.0.0.1:%s,127.0.0.1:%s" % ( + self._find_free_port(), self._find_free_port()) self._python_interp = "python" self._sync_mode = True self._mem_opt = False self._use_reduce = False self._setup_config() + def _find_free_port(self): + with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s: + s.bind(('', 0)) + return s.getsockname()[1] + def start_pserver(self, model_file, check_error_log): ps0_ep, ps1_ep = self._ps_endpoints.split(",") ps_cmd = "%s %s --role pserver --endpoints %s --trainer_id 0 --current_endpoint %s --trainers %d --is_dist" diff --git a/python/paddle/fluid/tests/unittests/test_fusion_lstm_op.py b/python/paddle/fluid/tests/unittests/test_fusion_lstm_op.py index 4767e9433ea74d5da83867d646f2a63c9a092668..de0c86f96db958eebd7e74346bec244f0c804ed9 100644 --- a/python/paddle/fluid/tests/unittests/test_fusion_lstm_op.py +++ b/python/paddle/fluid/tests/unittests/test_fusion_lstm_op.py @@ -53,12 +53,11 @@ class TestFusionLSTMOp(OpTest): self.M = 8 self.D = 16 self.has_initial_state = False + self.use_peepholes = False self.is_reverse = False self.act_gate = 'sigmoid' self.act_cell = 'tanh' self.act_cand = 'tanh' - self.use_peepholes = False - self.use_seq = False self.set_conf() T = sum(self.lod[0]) @@ -108,7 +107,6 @@ class TestFusionLSTMOp(OpTest): } self.attrs = { 'use_peepholes': self.use_peepholes, - 'use_seq': self.use_seq, 'is_reverse': self.is_reverse, 'gate_activation': self.act_gate, 'cell_activation': self.act_cell, @@ -178,50 +176,18 @@ class TestFusionLSTMOpPeepholesReverse(TestFusionLSTMOp): self.is_reverse = True -class TestFusionLSTMOpPoopholesBS1(TestFusionLSTMOp): +class TestFusionLSTMOpPeepholesInitReverse(TestFusionLSTMOp): def set_conf(self): self.use_peepholes = True - self.lod = [[3]] - self.D = 16 - - -class TestFusionLSTMOpSeqInit(TestFusionLSTMOp): - def set_conf(self): - self.use_seq = True - self.has_initial_state = True - - -class TestFusionLSTMOpSeqReverse(TestFusionLSTMOp): - def set_conf(self): - self.use_seq = True - self.is_reverse = True - - -class TestFusionLSTMOpSeqInitReverse(TestFusionLSTMOp): - def set_conf(self): - self.use_seq = True self.has_initial_state = True self.is_reverse = True -class TestFusionLSTMOpSeqPeepholes(TestFusionLSTMOp): +class TestFusionLSTMOpPeepholesBS1(TestFusionLSTMOp): def set_conf(self): - self.use_seq = True self.use_peepholes = True - - -class TestFusionLSTMOpSeqPeepholesInit(TestFusionLSTMOp): - def set_conf(self): - self.use_seq = True - self.use_peepholes = True - self.has_initial_state = True - - -class TestFusionLSTMOpSeqPeepholesReverse(TestFusionLSTMOp): - def set_conf(self): - self.use_seq = True - self.use_peepholes = True - self.is_reverse = True + self.lod = [[2]] + self.D = 8 if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_fetch_feed.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_fetch_feed.py index 372ef748b2e704fd3858c382e048e51448ed3bd5..a49c5d9b43ae1bffa7cb57764db497f68030b151 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_executor_fetch_feed.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_fetch_feed.py @@ -85,6 +85,7 @@ class TestFetchOp(unittest.TestCase): assert not math.isnan(np.sum(ret[i])) and \ not math.isinf(np.sum(ret[i])) + @unittest.skip(reason="CI timeout") def test_fetch_op(self): tst_reader = paddle.batch(flowers.test(use_xmap=False), batch_size=16) tst_reader_iter = tst_reader() @@ -139,6 +140,7 @@ class TestFeedParallel(unittest.TestCase): if batch_id == 2: break + @unittest.skip(reason="CI timeout") def test_feed_op(self): os.environ['CPU_NUM'] = str(4) if core.is_compiled_with_cuda(): diff --git a/python/paddle/fluid/trainer.py b/python/paddle/fluid/trainer.py index d094647afe1900809fc32cae93f777765f72c675..30cdfe4ad2c9892184862b70ff49417ce5a08516 100644 --- a/python/paddle/fluid/trainer.py +++ b/python/paddle/fluid/trainer.py @@ -431,6 +431,28 @@ class Trainer(object): exe = executor.Executor(self.place) io.save_persistables(exe, dirname=param_path) + def save_inference_model(self, param_path, feeded_var_names, + target_var_indexes): + """ + Save model for cpp inference into :code:`param_path`. + + Args: + param_path(str): The path to save parameters. + feeded_var_names(list(str)): The name of the vars that you + need to feed in before run program. + target_var_indexes(list(int)): the index of target var that + you need to return in trainer.train_func. + Returns: + None + """ + with self._prog_and_scope_guard(): + exe = executor.Executor(self.place) + target_vars = [ + self.train_func_outputs[index] for index in target_var_indexes + ] + io.save_inference_model(param_path, feeded_var_names, target_vars, + exe) + @contextlib.contextmanager def _prog_and_scope_guard(self): with framework.program_guard(