diff --git a/paddle/contrib/inference/CMakeLists.txt b/paddle/contrib/inference/CMakeLists.txt index 45bbb4b23765099f235f5aa99989d6f1e063b683..153216abb41a9402eddab8881c61cb012d2410b6 100644 --- a/paddle/contrib/inference/CMakeLists.txt +++ b/paddle/contrib/inference/CMakeLists.txt @@ -18,7 +18,7 @@ if(APPLE) endif(APPLE) -set(inference_deps paddle_inference_api paddle_fluid_api) +set(inference_deps paddle_inference_api paddle_fluid_api paddle_inference_tensorrt_subgraph_engine) function(inference_api_test TARGET_NAME) if (WITH_TESTING) @@ -50,6 +50,14 @@ cc_test(test_paddle_inference_api inference_api_test(test_paddle_inference_api_impl ARGS test_word2vec test_image_classification) +if(WITH_GPU AND TENSORRT_FOUND) +cc_library(paddle_inference_tensorrt_subgraph_engine + SRCS paddle_inference_api_tensorrt_subgraph_engine.cc + DEPS paddle_inference_api analysis tensorrt_engine paddle_inference_api paddle_fluid_api) + +inference_api_test(test_paddle_inference_api_tensorrt_subgraph_engine ARGS test_word2vec) +endif() + if (WITH_ANAKIN AND WITH_TESTING) # only needed in CI # Due to Anakin do not have official library releases and the versions of protobuf and cuda do not match Paddle's, # so anakin library will not be merged to our official inference library. To use anakin prediction API, one need to diff --git a/paddle/contrib/inference/demo/CMakeLists.txt b/paddle/contrib/inference/demo/CMakeLists.txt index 566c7d1a0784f12aaeb9398f6d911ffa2b69e8b2..ecece6fe3471ad7b89c84c3e2b67af4ae9eb3c36 100644 --- a/paddle/contrib/inference/demo/CMakeLists.txt +++ b/paddle/contrib/inference/demo/CMakeLists.txt @@ -15,6 +15,11 @@ inference_api_test(simple_on_word2vec ARGS test_word2vec) +option(WITH_INFERENCE_DEMO "Compile with Inference demo" OFF) +if(NOT WITH_INFERENCE_DEMO) + return() +endif() + set(DEMO_INSTALL_DIR "${PADDLE_BINARY_DIR}/inference_demo") set(URL_ROOT http://paddlemodels.bj.bcebos.com/inference-vis-demos%2F) diff --git a/paddle/contrib/inference/paddle_inference_api.h b/paddle/contrib/inference/paddle_inference_api.h index 238d8c772ec875948701a1d8381e051ebeb7c2f5..b8ba2d14a5c161d491d838888ea14b776f769f23 100644 --- a/paddle/contrib/inference/paddle_inference_api.h +++ b/paddle/contrib/inference/paddle_inference_api.h @@ -73,12 +73,12 @@ struct PaddleTensor { }; enum class PaddleEngineKind { - kNative = 0, // Use the native Fluid facility. - kAnakin, // Use Anakin for inference. + kNative = 0, // Use the native Fluid facility. + kAnakin, // Use Anakin for inference. + kAutoMixedTensorRT, // Automatically mix Fluid with TensorRT. // TODO(Superjomn) support following engines latter. // kTensorRT, // Use TensorRT for inference. // kAutoMixedAnakin, // Automatically mix Fluid with Anakin. - // kAutoMixedTensorRT, // Automatically mix Fluid with TensorRT. }; /* @@ -130,6 +130,11 @@ struct AnakinConfig : public PaddlePredictor::Config { int max_batch_size{-1}; }; +struct TensorRTConfig : public NativeConfig { + // Determine whether a subgraph will be executed by TRT. + int min_subgraph_size{1}; +}; + // A factory to help create different predictors. // // FOR EXTENSION DEVELOPER: diff --git a/paddle/contrib/inference/paddle_inference_api_impl.cc b/paddle/contrib/inference/paddle_inference_api_impl.cc index d9129a704bc289ce1d416474537fc9234a07e5b8..b1e5b875981e0142f6970cf6864b7b598743654b 100644 --- a/paddle/contrib/inference/paddle_inference_api_impl.cc +++ b/paddle/contrib/inference/paddle_inference_api_impl.cc @@ -89,6 +89,7 @@ bool NativePaddlePredictor::Init( LOG(ERROR) << "fail to load inference model."; return false; } + ctx_ = executor_->Prepare(*inference_program_, 0); executor_->CreateVariables( *inference_program_, sub_scope_ ? sub_scope_ : scope_.get(), 0); @@ -119,6 +120,7 @@ bool NativePaddlePredictor::Run(const std::vector &inputs, return false; } for (size_t i = 0; i < feed_target_names_.size(); ++i) { + VLOG(4) << "setting " << i << "-th target"; feed_targets[feed_target_names_[i]] = &feeds[i]; } // get fetch variable @@ -130,14 +132,16 @@ bool NativePaddlePredictor::Run(const std::vector &inputs, } // Run the inference program // if share variables, we need not create variables + VLOG(4) << "Run prepared context"; executor_->RunPreparedContext( ctx_.get(), sub_scope_ != nullptr ? sub_scope_ : scope_.get(), &feed_targets, &fetch_targets, false /* don't create variable eatch time */); + VLOG(4) << "Finish prepared context"; if (!GetFetch(fetchs, output_data)) { - LOG(ERROR) << "fail to get fetchs"; + LOG(ERROR) << "fail to get fetches"; return false; } VLOG(3) << "predict cost: " << timer.toc() << "ms"; diff --git a/paddle/contrib/inference/paddle_inference_api_impl.h b/paddle/contrib/inference/paddle_inference_api_impl.h index 86d1db7bcc7567e104cd20c9f767ed4513f611f5..ba266b608da342fb71faf05d02ddf74330e21e98 100644 --- a/paddle/contrib/inference/paddle_inference_api_impl.h +++ b/paddle/contrib/inference/paddle_inference_api_impl.h @@ -44,7 +44,7 @@ class NativePaddlePredictor : public PaddlePredictor { ~NativePaddlePredictor() override; - private: + protected: bool SetFeed(const std::vector &input_datas, std::vector *feeds); bool GetFetch(const std::vector &fetchs, diff --git a/paddle/contrib/inference/paddle_inference_api_tensorrt_subgraph_engine.cc b/paddle/contrib/inference/paddle_inference_api_tensorrt_subgraph_engine.cc new file mode 100644 index 0000000000000000000000000000000000000000..a11396cee91a758e86af2efd9e58b9da68442590 --- /dev/null +++ b/paddle/contrib/inference/paddle_inference_api_tensorrt_subgraph_engine.cc @@ -0,0 +1,126 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/contrib/inference/paddle_inference_api.h" +#include "paddle/contrib/inference/paddle_inference_api_impl.h" +#include "paddle/fluid/inference/analysis/analyzer.h" +#include "paddle/fluid/inference/utils/singleton.h" + +namespace paddle { + +using inference::analysis::Argument; +using inference::Singleton; +using inference::analysis::Analyzer; +using framework::proto::ProgramDesc; + +class TensorRTSubgraphPredictor : public NativePaddlePredictor { + public: + explicit TensorRTSubgraphPredictor(const TensorRTConfig& config) + : NativePaddlePredictor(config), config_(config) {} + + bool Init(const std::shared_ptr& parent_scope) { + VLOG(3) << "Predictor::init()"; + + if (config_.use_gpu) { + place_ = paddle::platform::CUDAPlace(config_.device); + } else { + place_ = paddle::platform::CPUPlace(); + } + if (parent_scope) { + scope_ = parent_scope; + sub_scope_ = &(parent_scope->NewScope()); + } else { + paddle::framework::InitDevices(false); + scope_.reset(new paddle::framework::Scope()); + } + + executor_.reset(new paddle::framework::Executor(place_)); + + // Initialize the inference program + if (!config_.model_dir.empty()) { + // Parameters are saved in separate files sited in + // the specified `dirname`. + inference_program_ = paddle::inference::Load( + executor_.get(), scope_.get(), config_.model_dir); + } else if (!config_.prog_file.empty() && !config_.param_file.empty()) { + // All parameters are saved in a single file. + // The file names should be consistent with that used + // in Python API `fluid.io.save_inference_model`. + inference_program_ = paddle::inference::Load( + executor_.get(), scope_.get(), config_.prog_file, config_.param_file); + } else { + LOG(ERROR) << "fail to load inference model."; + return false; + } + + // Analyze inference_program + Argument argument; + argument.origin_program_desc.reset( + new ProgramDesc(*inference_program_->Proto())); + Singleton::Global().Run(&argument); + CHECK(argument.transformed_program_desc); + VLOG(5) << "transformed program:\n" + << argument.transformed_program_desc->SerializeAsString(); + VLOG(5) << "to prepare executor"; + *inference_program_->Proto() = *argument.transformed_program_desc; + ctx_ = executor_->Prepare(*inference_program_, 0); + + VLOG(5) << "to create variables"; + executor_->CreateVariables( + *inference_program_, sub_scope_ ? sub_scope_ : scope_.get(), 0); + + // Get the feed_target_names and fetch_target_names + feed_target_names_ = inference_program_->GetFeedTargetNames(); + fetch_target_names_ = inference_program_->GetFetchTargetNames(); + return true; + } + + private: + TensorRTConfig config_; +}; + +template <> +std::unique_ptr +CreatePaddlePredictor( + const TensorRTConfig& config) { + VLOG(3) << "create TensorRTSubgraphPredictor"; + if (config.use_gpu) { + // 1. GPU memeroy + PADDLE_ENFORCE_GT( + config.fraction_of_gpu_memory, + 0.f, + "fraction_of_gpu_memory in the config should be set to range (0., 1.]"); + PADDLE_ENFORCE_GE(config.device, 0, "Invalid device id %d", config.device); + std::vector flags; + if (config.fraction_of_gpu_memory >= 0.0f || + config.fraction_of_gpu_memory <= 0.95f) { + flags.push_back("dummpy"); + std::string flag = "--fraction_of_gpu_memory_to_use=" + + std::to_string(config.fraction_of_gpu_memory); + flags.push_back(flag); + VLOG(3) << "set flag: " << flag; + framework::InitGflags(flags); + } + } + + std::unique_ptr predictor( + new TensorRTSubgraphPredictor(config)); + if (!dynamic_cast(predictor.get()) + ->Init(nullptr)) { + return nullptr; + } + return std::move(predictor); +} + +} // namespace paddle diff --git a/paddle/contrib/inference/test_paddle_inference_api_tensorrt_subgraph_engine.cc b/paddle/contrib/inference/test_paddle_inference_api_tensorrt_subgraph_engine.cc new file mode 100644 index 0000000000000000000000000000000000000000..b100630dbe412ca811f1a8f2b8191356f5ebec2f --- /dev/null +++ b/paddle/contrib/inference/test_paddle_inference_api_tensorrt_subgraph_engine.cc @@ -0,0 +1,64 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include "paddle/contrib/inference/paddle_inference_api.h" + +namespace paddle { + +DEFINE_string(dirname, "", "Directory of the inference model."); + +void Main(bool use_gpu) { + //# 1. Create PaddlePredictor with a config. + TensorRTConfig config; + config.model_dir = FLAGS_dirname + "word2vec.inference.model"; + config.use_gpu = use_gpu; + config.fraction_of_gpu_memory = 0.15; + config.device = 0; + auto predictor = + CreatePaddlePredictor(config); + + for (int batch_id = 0; batch_id < 3; batch_id++) { + //# 2. Prepare input. + int64_t data[4] = {1, 2, 3, 4}; + + PaddleTensor tensor{.name = "", + .shape = std::vector({4, 1}), + .data = PaddleBuf(data, sizeof(data)), + .dtype = PaddleDType::INT64}; + + // For simplicity, we set all the slots with the same data. + std::vector slots(4, tensor); + + //# 3. Run + std::vector outputs; + CHECK(predictor->Run(slots, &outputs)); + + //# 4. Get output. + ASSERT_EQ(outputs.size(), 1UL); + LOG(INFO) << "output buffer size: " << outputs.front().data.length(); + const size_t num_elements = outputs.front().data.length() / sizeof(float); + // The outputs' buffers are in CPU memory. + for (size_t i = 0; i < std::min(5UL, num_elements); i++) { + LOG(INFO) << static_cast(outputs.front().data.data())[i]; + } + } +} + +TEST(paddle_inference_api_tensorrt_subgraph_engine, main) { Main(true); } + +} // namespace paddle \ No newline at end of file diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc index 122ee1dab35b8c7d42392a983b5b15b7c1be7869..c1329b06d7e9bcd6604fed14cefa305339c5c4b8 100644 --- a/paddle/fluid/framework/operator.cc +++ b/paddle/fluid/framework/operator.cc @@ -713,6 +713,10 @@ proto::VarType::Type OperatorWithKernel::IndicateDataType( t = &var->Get(); } else if (var->IsType()) { t = &(var->Get().value()); + } else if (var->IsType()) { + const LoDTensorArray& arr = var->Get(); + PADDLE_ENFORCE(arr.size() > 0); + t = &(arr[0]); } if (t != nullptr) { int tmp = static_cast(ToDataType(t->type())); diff --git a/paddle/fluid/inference/analysis/CMakeLists.txt b/paddle/fluid/inference/analysis/CMakeLists.txt index 2bb2c8135d8c317388e1a0d711589a390c7e8924..33b0e3b12709fb888ad36d0d86bc77a0f3f0b1f1 100644 --- a/paddle/fluid/inference/analysis/CMakeLists.txt +++ b/paddle/fluid/inference/analysis/CMakeLists.txt @@ -1,10 +1,12 @@ -set(FLUID_CORE_MODULES proto_desc memory lod_tensor executor init) cc_library(analysis SRCS pass_manager.cc dot.cc node.cc data_flow_graph.cc graph_traits.cc subgraph_splitter.cc fluid_to_data_flow_graph_pass.cc data_flow_graph_to_fluid_pass.cc - tensorrt_subgraph_pass.cc dfg_graphviz_draw_pass.cc - DEPS framework_proto) + tensorrt_subgraph_pass.cc + tensorrt_subgraph_node_mark_pass.cc + analyzer.cc + helper.cc + DEPS framework_proto proto_desc) cc_test(test_node SRCS node_tester.cc DEPS analysis) cc_test(test_dot SRCS dot_tester.cc DEPS analysis) @@ -28,5 +30,7 @@ inference_analysis_test(test_data_flow_graph_to_fluid_pass SRCS data_flow_graph_ inference_analysis_test(test_fluid_to_data_flow_graph_pass SRCS fluid_to_data_flow_graph_pass_tester.cc) inference_analysis_test(test_subgraph_splitter SRCS subgraph_splitter_tester.cc) inference_analysis_test(test_dfg_graphviz_draw_pass SRCS dfg_graphviz_draw_pass_tester.cc) -#inference_analysis_test(test_tensorrt_subgraph_pass SRCS tensorrt_subgraph_pass_tester.cc) +inference_analysis_test(test_tensorrt_subgraph_pass SRCS tensorrt_subgraph_pass_tester.cc) inference_analysis_test(test_pass_manager SRCS pass_manager_tester.cc) +inference_analysis_test(test_tensorrt_subgraph_node_mark_pass SRCS tensorrt_subgraph_node_mark_pass_tester.cc) +inference_analysis_test(test_analyzer SRCS analyzer_tester.cc) diff --git a/paddle/fluid/inference/analysis/analyzer.cc b/paddle/fluid/inference/analysis/analyzer.cc new file mode 100644 index 0000000000000000000000000000000000000000..5d85530969c5bec1c84d5f5b0d2626431a9e1c63 --- /dev/null +++ b/paddle/fluid/inference/analysis/analyzer.cc @@ -0,0 +1,82 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/inference/analysis/analyzer.h" +#include "paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.h" +#include "paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.h" +#include "paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.h" +#include "paddle/fluid/inference/analysis/pass_manager.h" +#include "paddle/fluid/inference/analysis/tensorrt_subgraph_node_mark_pass.h" +#include "paddle/fluid/inference/analysis/tensorrt_subgraph_pass.h" + +namespace paddle { +namespace inference { +namespace analysis { + +DEFINE_bool(inference_analysis_enable_tensorrt_subgraph_engine, false, + "Enable subgraph to TensorRT engine for acceleration"); + +DEFINE_string(inference_analysis_graphviz_log_root, "./", + "Graphviz debuger for data flow graphs."); + +class DfgPassManagerImpl final : public DfgPassManager { + public: + DfgPassManagerImpl() { + // TODO(Superjomn) set the key with pass reprs. + AddPass("fluid-to-data-flow-graph", new FluidToDataFlowGraphPass); + if (FLAGS_inference_analysis_enable_tensorrt_subgraph_engine) { + auto trt_teller = [](const Node* node) { + if (!node->IsFunction()) return false; + return static_cast(node)->func_type() == "mul"; + }; + AddPass("tensorrt-subgraph-marker", + new TensorRTSubgraphNodeMarkPass(trt_teller)); + AddPass("tensorrt-subgraph", new TensorRTSubGraphPass(trt_teller)); + } + AddPass("data-flow-graph-to-fluid", new DataFlowGraphToFluidPass); + } + + std::string repr() const override { return "dfg-pass-manager"; } + std::string description() const override { return "DFG pass manager."; } + + private: + void AddPass(const std::string& name, Pass* pass) { + LOG(INFO) << "Adding pass " << name; + Register(name, pass); + AddGraphvizDebugerPass(pass); + } + + // Add the graphviz debuger pass if the parent pass has one. + void AddGraphvizDebugerPass(Pass* pass) { + auto* debuger_pass = pass->CreateGraphvizDebugerPass(); + if (debuger_pass) { + LOG(INFO) << " - register debug pass [" << debuger_pass->repr() << "]"; + Register(debuger_pass->repr(), debuger_pass); + } + } +}; + +Analyzer::Analyzer() { Register("manager1", new DfgPassManagerImpl); } + +void Analyzer::Run(Argument* argument) { + for (auto& x : data_) { + PADDLE_ENFORCE(x->Initialize(argument)); + x->RunAll(); + PADDLE_ENFORCE(x->Finalize()); + } +} + +} // namespace analysis +} // namespace inference +} // namespace paddle \ No newline at end of file diff --git a/paddle/fluid/inference/analysis/analyzer.h b/paddle/fluid/inference/analysis/analyzer.h new file mode 100644 index 0000000000000000000000000000000000000000..f290a3777d5be2ef64667d8c17ec59adddc3ef1b --- /dev/null +++ b/paddle/fluid/inference/analysis/analyzer.h @@ -0,0 +1,66 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +/* + * This file contains Analyzer, an class that exposed as a library that analyze + * and optimize + * Fluid ProgramDesc for inference. Similar to LLVM, it has multiple flags to + * control whether + * an process is applied on the program. + * + * The processes are called Passes in analysis, the Passes are placed in a + * pipeline, the first + * Pass is the FluidToDataFlowGraphPass which transforms a Fluid ProgramDesc to + * a data flow + * graph, the last Pass is DataFlowGraphToFluidPass which transforms a data flow + * graph to a + * Fluid ProgramDesc. The passes in the middle of the pipeline can be any Passes + * which take a + * node or data flow graph as input. + * + * The Analyzer can be used in two methods, the first is a executable file which + * can be used to + * pre-process the inference model and can be controlled by passing difference + * command flags; + * the other way is to compose inside the inference API as a runtime pre-process + * phase in the + * inference service. + */ + +#include +#include "paddle/fluid/inference/analysis/pass.h" +#include "paddle/fluid/inference/analysis/pass_manager.h" + +namespace paddle { +namespace inference { +namespace analysis { + +// TODO(Superjomn) add a definition flag like PADDLE_WITH_TENSORRT and hide this +// flag if not available. +DECLARE_bool(inference_analysis_enable_tensorrt_subgraph_engine); +DECLARE_string(inference_analysis_graphviz_log_root); + +class Analyzer : public OrderedRegistry { + public: + // Register all the pass-managers. + Analyzer(); + + void Run(Argument* argument); + + DISABLE_COPY_AND_ASSIGN(Analyzer); +}; + +} // namespace analysis +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/analysis/analyzer_tester.cc b/paddle/fluid/inference/analysis/analyzer_tester.cc new file mode 100644 index 0000000000000000000000000000000000000000..d7c1a72932a39f878add2bb884e280b91d3c38c0 --- /dev/null +++ b/paddle/fluid/inference/analysis/analyzer_tester.cc @@ -0,0 +1,29 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/inference/analysis/analyzer.h" +#include "paddle/fluid/inference/analysis/ut_helper.h" + +namespace paddle { +namespace inference { +namespace analysis { + +TEST_F(DFG_Tester, main) { + Analyzer analyser; + analyser.Run(&argument); +} + +} // namespace analysis +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/analysis/argument.h b/paddle/fluid/inference/analysis/argument.h index f7f4e03968a723df1718bd3752bdd1c3430d02be..6d316f20bff7a68754b0afec6463bd5d7579227f 100644 --- a/paddle/fluid/inference/analysis/argument.h +++ b/paddle/fluid/inference/analysis/argument.h @@ -41,6 +41,9 @@ struct Argument { // The original program desc. std::unique_ptr origin_program_desc; + + // The processed program desc. + std::unique_ptr transformed_program_desc; }; #define UNLIKELY(condition) __builtin_expect(static_cast(condition), 0) diff --git a/paddle/fluid/inference/analysis/data_flow_graph.cc b/paddle/fluid/inference/analysis/data_flow_graph.cc index c30a7c26cecbe67f0ca73223e06b2095584aca94..d09bf3ed161703b0cf273522921e157c7360a0bc 100644 --- a/paddle/fluid/inference/analysis/data_flow_graph.cc +++ b/paddle/fluid/inference/analysis/data_flow_graph.cc @@ -20,7 +20,7 @@ namespace paddle { namespace inference { namespace analysis { -// It is a better idea that the inputs and outputs of this graph is set manully +// It is a better idea that the inputs and outputs of this graph is set manually // before, but there must be a Pass that helps to prune the unnecessary ops that // do not contribute to the given targets, so in this pass, analysis and get the // inputs and outputs is OK. @@ -50,6 +50,25 @@ void DataFlowGraph::Build() { outputs.push_back(out); } } + + Clean(); +} + +void DataFlowGraph::Clean() { + for (auto &node : nodes.nodes()) { + std::unordered_set inlinks_set(node->inlinks.begin(), + node->inlinks.end()); + std::unordered_set outlinks_set(node->outlinks.begin(), + node->outlinks.end()); + if (inlinks_set.size() < node->inlinks.size()) { + LOG(INFO) << "Clean: node " << node->repr() << " prune duplicate inputs"; + node->inlinks.assign(inlinks_set.begin(), inlinks_set.end()); + } + if (outlinks_set.size() < node->outlinks.size()) { + LOG(INFO) << "Clean: node " << node->repr() << " prune duplicate inputs"; + node->outlinks.assign(outlinks_set.begin(), outlinks_set.end()); + } + } } std::string DataFlowGraph::DotString() const { diff --git a/paddle/fluid/inference/analysis/data_flow_graph.h b/paddle/fluid/inference/analysis/data_flow_graph.h index 913e344d371ddf3ea05a53c216e5b3bea8f11c7b..30c60661f3492034248e164a70a682bae3819d23 100644 --- a/paddle/fluid/inference/analysis/data_flow_graph.h +++ b/paddle/fluid/inference/analysis/data_flow_graph.h @@ -47,6 +47,10 @@ struct DataFlowGraph { // Output a DOT graph file for debug. std::string DotString() const; + + private: + // Remove duplicate edges and so on. + void Clean(); }; /* @@ -133,17 +137,24 @@ struct GraphTraits { // Extract the inputs and outputs of a graph. The inputs and outputs of a // sub-graph is the inputs nodes and output nodes that doesn't inside the // sub-graph. -std::pair< - std::vector, - std::vector< - Node *>> static ExtractInputAndOutputOfSubGraph(std::vector - &graph) { +static std::pair, std::vector> +ExtractInputAndOutputOfSubGraph(std::vector &graph) { std::unordered_set nodes(graph.begin(), graph.end()); std::unordered_set inputs; std::unordered_set outputs; + // Input a Value, check whether its inlink is in the subgraph. + auto inlink_in_subgraph = [&](Node *n) { + for (auto *in : n->inlinks) { + if (nodes.count(in)) return true; + } + return false; + }; for (auto &node : graph) { for (auto *in : node->inlinks) { - if (!nodes.count(in) && in->type() == Node::Type::kValue) { + // The Value that is written by nodes inside a sub-graph shouldn't be the + // input of the sub-graph. + if (!nodes.count(in) && in->type() == Node::Type::kValue && + !inlink_in_subgraph(in)) { inputs.insert(in); } } diff --git a/paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.cc b/paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.cc index f7d4cca2132d11eb89eee5a71ed0a3cc7381e1ff..e74efd17b834db1d0314c8b7082f3e9c15d6eda3 100644 --- a/paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.cc +++ b/paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.cc @@ -13,21 +13,34 @@ // limitations under the License. #include "paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.h" +#include "paddle/fluid/framework/block_desc.h" +#include "paddle/fluid/framework/op_desc.h" #include "paddle/fluid/framework/proto_desc.h" +#include "paddle/fluid/inference/analysis/analyzer.h" +#include "paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.h" namespace paddle { namespace inference { namespace analysis { +using framework::proto::ProgramDesc; + +std::vector ExtractParameters( + const std::vector>& nodes); + bool DataFlowGraphToFluidPass::Initialize(Argument* argument) { ANALYSIS_ARGUMENT_CHECK_FIELD(argument) ANALYSIS_ARGUMENT_CHECK_FIELD(argument->origin_program_desc) - desc_ = argument->origin_program_desc.get(); - // Here some logic from program_desc.cc and will not add new interfaces into - // framework::ProgramDesc class, use some UT to assure the correctness. - auto* block = desc_->mutable_blocks()->Add(); - block->set_idx(framework::kRootBlockIndex); - block->set_parent_idx(framework::kNoneBlockIndex); + PADDLE_ENFORCE(!argument->transformed_program_desc); + // The transformed_program_desc should inherit all the VarDesc and BlockDesc + // from the original program desc. The operators of the main block(the first + // block) should rewritten by data flow graph. + argument->transformed_program_desc.reset( + new ProgramDesc(*argument->origin_program_desc)); + argument->transformed_program_desc->mutable_blocks(framework::kRootBlockIndex) + ->clear_ops(); + desc_ = argument->transformed_program_desc.get(); + argument_ = argument; return true; } @@ -37,14 +50,17 @@ void DataFlowGraphToFluidPass::Run(DataFlowGraph* graph) { auto traits = GraphTraits(graph); for (auto it = traits.nodes().begin(); it != traits.nodes().end(); ++it) { if (it->deleted()) continue; + switch (it->type()) { - case Node::Type::kFunction: - LOG(INFO) << "add function " << it->name(); + case Node::Type::kFunction: { + LOG(INFO) << "add function " << it->repr(); AddFluidOp(&(*it)); - break; - case Node::Type::kFunctionBlock: + } break; + case Node::Type::kFunctionBlock: { + LOG(INFO) << "add engine op " << it->repr() << " , " + << static_cast(&(*it))->subgraph.size(); AddEngineOp(&(*it)); - break; + } break; default: continue; } @@ -52,12 +68,10 @@ void DataFlowGraphToFluidPass::Run(DataFlowGraph* graph) { } void DataFlowGraphToFluidPass::AddFluidOp(Node* node) { - LOG(INFO) << "processing func " << node->name(); auto* ori_op = static_cast(node->pb_desc()); // currently only the main block is analyzed. auto* main_block = desc_->mutable_blocks(framework::kRootBlockIndex); auto* op = main_block->add_ops(); - LOG(INFO) << "to copy the op"; *op = *ori_op; // copy the attributes, by default, these will not be changed // by analysis phrase. // The inputs and outputs of the existing ops are not changed by tensorrt @@ -65,11 +79,89 @@ void DataFlowGraphToFluidPass::AddFluidOp(Node* node) { // NOTE It might be changed by other passes in the long run. } +void CreateTrtEngineOp(Node* node, const DataFlowGraph& graph, + const framework::proto::BlockDesc& block) { + static int counter{0}; + PADDLE_ENFORCE(node->IsFunctionBlock()); + framework::OpDesc desc; + auto* func = static_cast(node); + + // collect inputs + std::vector io; + for (auto* x : func->inlinks) { + io.push_back(x->name()); + } + desc.SetInput("Xs", io); + + // collect outputs + io.clear(); + for (auto* x : func->outlinks) { + io.push_back(x->name()); + } + desc.SetOutput("Ys", io); + + desc.SetType("tensorrt_engine"); + // Set attrs + SetAttr(desc.Proto(), "subgraph", block.SerializeAsString()); + SetAttr(desc.Proto(), "engine_unique_key", + "trt-" + std::to_string(counter++)); + SetAttr(desc.Proto(), "max_batch", 100); // TODO(Superjomn) add config latter + SetAttr(desc.Proto(), "max_workspace", + 1024); // TODO(Superjomn) add config latter + SetAttr(desc.Proto(), "parameters", ExtractParameters(graph.nodes.nodes())); + node->SetPbMsg(desc.Proto()->SerializeAsString()); +} + +std::vector ExtractParameters( + const std::vector>& nodes) { + std::vector parameters; + for (const auto& node : nodes) { + if (!node->IsValue()) continue; + PADDLE_ENFORCE(!node->pb_msg().empty(), "pb_msg should be set first"); + framework::proto::VarDesc var; + var.ParseFromString(node->pb_msg()); + if (var.persistable()) { + parameters.push_back(var.name()); + } + } + return parameters; +} + void DataFlowGraphToFluidPass::AddEngineOp(Node* node) { - // auto* ori_op = static_cast(node->extra_info()); - // auto* main_block = desc_->mutable_blocks(framework::kRootBlockIndex); - // auto* op = main_block->add_ops(); // TODO(Superjomn) Here need to expose some arguments for default setting. + PADDLE_ENFORCE(node->IsFunctionBlock()); + auto* block_node = static_cast(node); + framework::proto::BlockDesc proto; + framework::BlockDesc block_desc(nullptr, &proto); + // copy ops. + for (auto* node : block_node->subgraph) { + auto* op = block_desc.AppendOp(); + PADDLE_ENFORCE(!node->pb_msg().empty()); + op->Proto()->ParseFromString(node->pb_msg()); + } + CreateTrtEngineOp(node, *argument_->main_dfg, *block_desc.Proto()); + auto* main_block = desc_->mutable_blocks(framework::kRootBlockIndex); + auto* op = main_block->add_ops(); + PADDLE_ENFORCE(!node->pb_msg().empty(), "failed to set desc for block"); + op->ParseFromString(node->pb_msg()); +} + +namespace { +class DFG_DebuggerPass : public DFG_GraphvizDrawPass { + public: + using Config = DFG_GraphvizDrawPass::Config; + DFG_DebuggerPass(const Config& config) : DFG_GraphvizDrawPass(config) {} + + std::string repr() const override { return "dfg-to-fluid-debuger-pass"; } + + bool Finalize() override { return true; } +}; +} + +Pass* DataFlowGraphToFluidPass::CreateGraphvizDebugerPass() const { + return new DFG_DebuggerPass(DFG_GraphvizDrawPass::Config( + FLAGS_inference_analysis_graphviz_log_root, + "data_flow_graph_to_fluid_graphviz_debugger")); } } // namespace analysis diff --git a/paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.h b/paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.h index cbb05f622cc29c99c57e649b1c57cf3e54541191..1726e056ed37e2e5fbe2042851ca9bd188806bac 100644 --- a/paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.h +++ b/paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.h @@ -40,10 +40,7 @@ class DataFlowGraphToFluidPass final : public DataFlowGraphPass { return "Transform a DFG to a Fluid ProgramDesc"; } - Pass *CreatePrinterPass(std::ostream &os, - const std::string &banner) const override { - return nullptr; - } + Pass *CreateGraphvizDebugerPass() const override; protected: // Add a Fluid Op into the ProgramDesc. @@ -53,6 +50,7 @@ class DataFlowGraphToFluidPass final : public DataFlowGraphPass { private: framework::proto::ProgramDesc *desc_; + Argument *argument_; }; } // namespace analysis } // namespace inference diff --git a/paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.cc b/paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.cc index afffb3feb0c515faa554d0d4919c442ca4515294..a6f85484756417e103cbb60bcb664e8b800b9f28 100644 --- a/paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.cc +++ b/paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.cc @@ -18,12 +18,19 @@ namespace paddle { namespace inference { namespace analysis { +int DFG_GraphvizDrawPass::counter_{0}; + void DFG_GraphvizDrawPass::Run(DataFlowGraph *graph) { auto content = Draw(graph); - std::ofstream file(GenDotPath()); + auto dot_path = GenDotPath(); + std::ofstream file(dot_path); file.write(content.c_str(), content.size()); file.close(); - LOG(INFO) << "draw dot to " << GenDotPath(); + + auto png_path = dot_path.substr(0, dot_path.size() - 4) + ".png"; + std::string message; + LOG(INFO) << "draw to " << png_path; + ExecShellCommand("dot -Tpng " + dot_path + " -o " + png_path, &message); } std::string DFG_GraphvizDrawPass::Draw(DataFlowGraph *graph) { @@ -41,9 +48,7 @@ std::string DFG_GraphvizDrawPass::Draw(DataFlowGraph *graph) { if (!config_.display_deleted_node && node.deleted()) continue; for (auto &in : node.inlinks) { if (!config_.display_deleted_node && in->deleted()) continue; - for (auto &in : node.inlinks) { - dot.AddEdge(in->repr(), node.repr(), {}); - } + dot.AddEdge(in->repr(), node.repr(), {}); } } return dot.Build(); diff --git a/paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.h b/paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.h index 93ebff59ae9691394858f32c822a5e70f3345581..b064782586f6243353eda67ac8db040509716b20 100644 --- a/paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.h +++ b/paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.h @@ -50,20 +50,25 @@ class DFG_GraphvizDrawPass : public DataFlowGraphPass { bool Initialize(Argument *argument) override { return true; } void Run(DataFlowGraph *graph) override; - bool Finalize() override { return Pass::Finalize(); } + bool Finalize() override { return true; } std::string repr() const override { return "DFG graphviz drawer"; } std::string description() const override { return "Debug a DFG by draw with graphviz"; } - private: + protected: + // A counter to add a number prefix to the debugger image output so that they + // will sort in the triggered order. + static int counter_; + // Path of the dot file to output. std::string GenDotPath() const { - return config_.dir + "/" + "graph_" + config_.id + ".dot"; + return config_.dir + "/" + std::to_string(counter_++) + "-graph_" + + config_.id + ".dot"; } - std::string Draw(DataFlowGraph *graph); + virtual std::string Draw(DataFlowGraph *graph); Config config_; }; diff --git a/paddle/fluid/inference/analysis/dfg_graphviz_draw_pass_tester.cc b/paddle/fluid/inference/analysis/dfg_graphviz_draw_pass_tester.cc index f4b5c5fd2201cc9ff56d7ee8d8921376c2c9c59e..162455b9c4e06b7fbb4bdede30444faf6a8a1509 100644 --- a/paddle/fluid/inference/analysis/dfg_graphviz_draw_pass_tester.cc +++ b/paddle/fluid/inference/analysis/dfg_graphviz_draw_pass_tester.cc @@ -31,7 +31,7 @@ TEST_F(DFG_Tester, dfg_graphviz_draw_pass_tester) { pass.Run(&dfg); // test content - std::ifstream file("./graph_test.dot"); + std::ifstream file("./0-graph_test.dot"); ASSERT_TRUE(file.is_open()); std::string line; @@ -40,7 +40,7 @@ TEST_F(DFG_Tester, dfg_graphviz_draw_pass_tester) { no++; } // DFG is sensitive to ProgramDesc, be careful to change the existing models. - ASSERT_EQ(no, 112); + ASSERT_EQ(no, 82); } } // namespace analysis diff --git a/paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.cc b/paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.cc index 5f62eef52876ac68dfab00348f422a46de123cfe..5d7eb43b7cbd7bc45b5f0c940bf80ad72348e1b9 100644 --- a/paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.cc +++ b/paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.cc @@ -15,6 +15,8 @@ limitations under the License. */ #include #include +#include "analyzer.h" +#include "paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.h" #include "paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.h" namespace paddle { @@ -33,7 +35,7 @@ bool FluidToDataFlowGraphPass::Initialize(Argument *argument) { return true; } -bool FluidToDataFlowGraphPass::Finalize() { return Pass::Finalize(); } +bool FluidToDataFlowGraphPass::Finalize() { return true; } void FluidToDataFlowGraphPass::Run(DataFlowGraph *graph) { PADDLE_ENFORCE(graph); @@ -46,6 +48,7 @@ void FluidToDataFlowGraphPass::Run(DataFlowGraph *graph) { auto *v = graph->nodes.Create(Node::Type::kValue); v->SetName(var.name()); v->SetPbDesc(const_cast(static_cast(&var))); + v->SetPbMsg(var.SerializeAsString()); var2id[var.name()] = v->id(); } for (int i = 0; i < main_block.ops_size(); i++) { @@ -56,6 +59,8 @@ void FluidToDataFlowGraphPass::Run(DataFlowGraph *graph) { // Link to the original protobuf message's memory, make it easier to // generate from a data flow graph to fluid ProgramDesc. o->SetPbDesc(const_cast(static_cast(&op))); + o->SetPbMsg(op.SerializeAsString()); + // set inputs and outputs // TODO(Superjomn) make sure the InputNames is the real variable name. for (int j = 0; j < op.inputs_size(); j++) { @@ -79,9 +84,19 @@ void FluidToDataFlowGraphPass::Run(DataFlowGraph *graph) { graph->Build(); } -Pass *FluidToDataFlowGraphPass::CreatePrinterPass( - std::ostream &os, const std::string &banner) const { - return nullptr; +namespace { +class DFG_DebuggerPass : public DFG_GraphvizDrawPass { + public: + using Config = DFG_GraphvizDrawPass::Config; + DFG_DebuggerPass(const Config &config) : DFG_GraphvizDrawPass(config) {} + std::string repr() const override { return "fluid-to-dfg-debuger-pass"; } + bool Finalize() override { return true; } +}; +} + +Pass *FluidToDataFlowGraphPass::CreateGraphvizDebugerPass() const { + return new DFG_DebuggerPass(DFG_GraphvizDrawPass::Config( + FLAGS_inference_analysis_graphviz_log_root, "fluid-to-dfg-debuger")); } } // namespace analysis diff --git a/paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.h b/paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.h index 176faf0220cc98bf2c0384af75125d4bc493e753..da8463b63bd0bb1633bfcb9d7d41a884ddd632c7 100644 --- a/paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.h +++ b/paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.h @@ -46,8 +46,7 @@ class FluidToDataFlowGraphPass final : public DataFlowGraphPass { return "transform a fluid ProgramDesc to a data flow graph."; } - Pass *CreatePrinterPass(std::ostream &os, - const std::string &banner) const override; + Pass *CreateGraphvizDebugerPass() const override; private: framework::proto::ProgramDesc const *desc_; diff --git a/paddle/fluid/inference/analysis/helper.cc b/paddle/fluid/inference/analysis/helper.cc new file mode 100644 index 0000000000000000000000000000000000000000..ca40c01fc57dbcc2ca16770a1b7d798de8b5625b --- /dev/null +++ b/paddle/fluid/inference/analysis/helper.cc @@ -0,0 +1,60 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/inference/analysis/helper.h" +#include "paddle/fluid/framework/framework.pb.h" + +namespace paddle { +namespace inference { +namespace analysis { + +template <> +void SetAttr(framework::proto::OpDesc *op, const std::string &name, + const std::string &data) { + auto *attr = op->add_attrs(); + attr->set_name(name); + attr->set_type(paddle::framework::proto::AttrType::STRING); + attr->set_s(data); +} +template <> +void SetAttr(framework::proto::OpDesc *op, const std::string &name, + const int &data) { + auto *attr = op->add_attrs(); + attr->set_name(name); + attr->set_type(paddle::framework::proto::AttrType::INT); + attr->set_i(data); +} +template <> +void SetAttr(framework::proto::OpDesc *op, const std::string &name, + const int64_t &data) { + auto *attr = op->add_attrs(); + attr->set_name(name); + attr->set_type(paddle::framework::proto::AttrType::LONG); + attr->set_l(data); +} +template <> +void SetAttr>(framework::proto::OpDesc *op, + const std::string &name, + const std::vector &data) { + auto *attr = op->add_attrs(); + attr->set_name(name); + attr->set_type(paddle::framework::proto::AttrType::STRINGS); + for (const auto &s : data) { + attr->add_strings(s.c_str()); + } +} + +} // namespace analysis +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/analysis/helper.h b/paddle/fluid/inference/analysis/helper.h index f0039e113159fdcc0cc1c209a8bc899bc82984c1..fff1621d3f1bb31cfa04110d1f3cf5dbfe927331 100644 --- a/paddle/fluid/inference/analysis/helper.h +++ b/paddle/fluid/inference/analysis/helper.h @@ -14,10 +14,12 @@ limitations under the License. */ #pragma once +#include #include #include #include +#include "paddle/fluid/framework/framework.pb.h" #include "paddle/fluid/framework/scope.h" #include "paddle/fluid/framework/variable.h" #include "paddle/fluid/platform/enforce.h" @@ -26,6 +28,10 @@ namespace paddle { namespace inference { namespace analysis { +template +void SetAttr(framework::proto::OpDesc *op, const std::string &name, + const T &data); + template int AccuDims(Vec &&vec, int size) { int res = 1; @@ -93,7 +99,7 @@ template class OrderedRegistry { public: T *Register(const std::string &name, T *x) { - PADDLE_ENFORCE(!dic_.count(name)); + PADDLE_ENFORCE(!dic_.count(name), "duplicate key [%s]", name); dic_[name] = data_.size(); data_.emplace_back(std::unique_ptr(x)); return data_.back().get(); @@ -117,6 +123,20 @@ T &GetFromScope(const framework::Scope &scope, const std::string &name) { return *var->GetMutable(); } +static void ExecShellCommand(const std::string &cmd, std::string *message) { + char buffer[128]; + std::shared_ptr pipe(popen(cmd.c_str(), "r"), pclose); + if (!pipe) { + LOG(ERROR) << "error running command: " << cmd; + return; + } + while (!feof(pipe.get())) { + if (fgets(buffer, 128, pipe.get()) != nullptr) { + *message += buffer; + } + } +} + } // namespace analysis } // namespace inference } // namespace paddle diff --git a/paddle/fluid/inference/analysis/node.cc b/paddle/fluid/inference/analysis/node.cc index 3339b5044df0cf91d00aa9ddad310d4bf263bc3c..d9d265d225bb77a3f5f83cbd0b8b1c670fb34a31 100644 --- a/paddle/fluid/inference/analysis/node.cc +++ b/paddle/fluid/inference/analysis/node.cc @@ -20,6 +20,17 @@ namespace paddle { namespace inference { namespace analysis { +template <> +std::string &NodeAttr::As() { + if (data_.empty()) { + type_hash_ = typeid(std::string).hash_code(); + } + PADDLE_ENFORCE_EQ(type_hash_, typeid(std::string).hash_code()); + return data_; +} + +std::string &NodeAttr::String() { return As(); } + std::vector Value::dot_attrs() const { return std::vector({Dot::Attr("style", "filled,rounded"), Dot::Attr("shape", "box"), diff --git a/paddle/fluid/inference/analysis/node.h b/paddle/fluid/inference/analysis/node.h index 8c2e6d88b9605d9923d002f73b60cd92b5e551b7..8ecd1ae730e6ec6775f4a22fdc5dec0e8ca8e2d1 100644 --- a/paddle/fluid/inference/analysis/node.h +++ b/paddle/fluid/inference/analysis/node.h @@ -35,6 +35,44 @@ namespace analysis { class NodeMap; +// A helper class to maintain the status from Pass. +struct NodeAttr { + // NOTE T should be a primary type or a struct combined by several primary + // types. + // NOTE the STL containers should not use here. + // Some usages + // Attr attr; + // attr.Bool() = true; + + bool &Bool() { return As(); } + float &Float() { return As(); } + int32_t &Int32() { return As(); } + int64_t &Int64() { return As(); } + void *&Pointer() { return As(); } + std::string &String(); + + private: + template + T &As() { + // init storage in the first usage. + if (data_.empty()) { + VLOG(4) << "resize data to " << sizeof(T); + type_hash_ = typeid(T).hash_code(); + data_.resize(sizeof(T)); + } + PADDLE_ENFORCE(type_hash_ == typeid(T).hash_code(), + "type not matched, origin is %s, want %s", + DataTypeNamer::Global().repr(type_hash_), + DataTypeNamer::Global().repr()); + PADDLE_ENFORCE_EQ(data_.size(), sizeof(T), "Node attr type recast error"); + return *reinterpret_cast(&data_[0]); + } + + private: + std::string data_; + size_t type_hash_{std::numeric_limits::max()}; +}; + /* * Node Representation. * @@ -50,8 +88,6 @@ class Node { Node() = default; - struct Attr; - // Cast to a subclass type, Function for example. template Subclass &As() { @@ -71,7 +107,7 @@ class Node { // Get an additional attribute and convert it to T data type. NOTE this will // silently create a new attribute if not exists. - Attr &attr(const std::string &name) const { return attrs_[name]; } + NodeAttr &attr(const std::string &name) const { return attrs_[name]; } int id() const { return id_; } @@ -80,6 +116,9 @@ class Node { void SetPbDesc(void *pb) { attr("pb_desc").Pointer() = pb; } void *pb_desc() const { return attr("pb_desc").Pointer(); } + void SetPbMsg(const std::string &s) { attr("pb_msg").String() = s; } + const std::string &pb_msg() const { return attr("pb_msg").String(); } + void SetDeleted() { deleted_ = true; } bool deleted() const { return deleted_; } @@ -94,43 +133,6 @@ class Node { // Output links. std::vector outlinks; - // A helper class to maintain the status from Pass. - struct Attr { - // NOTE T should be a primary type or a struct combined by several primary - // types. - // NOTE the STL containers should not use here. - // Some usages - // Attr attr; - // attr.Bool() = true; - - bool &Bool() { return As(); } - float &Float() { return As(); } - int32_t &Int32() { return As(); } - int64_t &Int64() { return As(); } - void *&Pointer() { return As(); } - - private: - template - T &As() { - // init storage in the first usage. - if (data_.empty()) { - VLOG(4) << "resize data to " << sizeof(T); - type_hash_ = typeid(T).hash_code(); - data_.resize(sizeof(T)); - } - PADDLE_ENFORCE(type_hash_ == typeid(T).hash_code(), - "type not matched, origin is %s, want %s", - DataTypeNamer::Global().repr(type_hash_), - DataTypeNamer::Global().repr()); - PADDLE_ENFORCE_EQ(data_.size(), sizeof(T), "Node attr type recast error"); - return *reinterpret_cast(&data_[0]); - } - - private: - std::string data_; - size_t type_hash_{std::numeric_limits::max()}; - }; - // Type checks. bool IsFunction() const { return type_ == Node::Type::kFunction; } bool IsValue() const { return type_ == Node::Type::kValue; } @@ -150,7 +152,7 @@ class Node { Type type_{Type::kNone}; // Mark this node is deleted by some pass. bool deleted_{false}; - mutable std::unordered_map attrs_; + mutable std::unordered_map attrs_; }; class Function; @@ -213,6 +215,10 @@ class Function : public Node { struct FunctionBlock : public Node { std::string repr() const override { return "block-" + std::to_string(id()); } std::vector subgraph; + + protected: + FunctionBlock() { SetType(Node::Type::kFunctionBlock); } + friend class NodeMap; }; class NodeMap { @@ -227,7 +233,7 @@ class NodeMap { void Delete(size_t id); - const std::vector> &nodes() { return nodes_; } + const std::vector> &nodes() const { return nodes_; } size_t size() const { return nodes_.size(); } diff --git a/paddle/fluid/inference/analysis/node_attr_flags.h b/paddle/fluid/inference/analysis/node_attr_flags.h new file mode 100644 index 0000000000000000000000000000000000000000..a3f70e5419a66969e8fb20152a8a8ace39316f57 --- /dev/null +++ b/paddle/fluid/inference/analysis/node_attr_flags.h @@ -0,0 +1,32 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/* + * This file contains all the flags that declared in Node::Attr. + * + * The Node::Attr is designed to share information between different passes, one + * can get other's attributes in a Node by the flags in this file. + */ +#pragma once +namespace paddle { +namespace inference { +namespace analysis { + +#define DECLARE_NODE_ATTR(flag__) const char ATTR_##flag__[] = #flag__; + +DECLARE_NODE_ATTR(supported_by_tensorrt) // bool + +} // namespace analysis +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/analysis/pass.h b/paddle/fluid/inference/analysis/pass.h index 65632b749177add9dcb297bffad1e85f68a80b02..25c566ebfa41abe3a247bc6c6e5583c8620a6abb 100644 --- a/paddle/fluid/inference/analysis/pass.h +++ b/paddle/fluid/inference/analysis/pass.h @@ -60,6 +60,9 @@ class Pass { return nullptr; } + // Create a debugger Pass that draw the DFG by graphviz toolkit. + virtual Pass *CreateGraphvizDebugerPass() const { return nullptr; } + // Run on a single Node. virtual void Run(Node *x) { LOG(FATAL) << "not valid"; } // Run on a single Function. diff --git a/paddle/fluid/inference/analysis/pass_manager.cc b/paddle/fluid/inference/analysis/pass_manager.cc index b17c0e0d724ebeea7b84bf63024cd141891a78b4..b428bb22b1f0c5c1a47fc4c46c9070c1ace4a228 100644 --- a/paddle/fluid/inference/analysis/pass_manager.cc +++ b/paddle/fluid/inference/analysis/pass_manager.cc @@ -19,6 +19,18 @@ namespace paddle { namespace inference { namespace analysis { +bool PassManager::Initialize(Argument* argument) { + argument_ = argument; + for (auto& pass : data_) { + LOG(INFO) << "Initializing pass " << pass->repr(); + if (!pass->Initialize(argument)) { + LOG(ERROR) << "Failed to initialize pass [" << pass->repr() << "]"; + return false; + } + } + return true; +} + void DfgPassManager::RunAll() { PADDLE_ENFORCE(argument_); for (auto& pass : data_) { diff --git a/paddle/fluid/inference/analysis/pass_manager.h b/paddle/fluid/inference/analysis/pass_manager.h index 7841c4b9d08001264af9f3a248a96814d1c273c4..81a17e0287a5aef8a328e43380ee3691f5a32379 100644 --- a/paddle/fluid/inference/analysis/pass_manager.h +++ b/paddle/fluid/inference/analysis/pass_manager.h @@ -50,17 +50,7 @@ class PassManager : public OrderedRegistry { // globally shared, so pass them as the arguemnts for all the pass managers. virtual bool Initialize(const Argument& argument) { return false; } - virtual bool Initialize(Argument* argument) { - argument_ = argument; - for (auto& pass : data_) { - LOG(INFO) << "Initializing pass " << pass->repr(); - if (!pass->Initialize(argument)) { - LOG(ERROR) << "Failed to initialize pass [" << pass->repr() << "]"; - return false; - } - } - return true; - } + virtual bool Initialize(Argument* argument); // Call all the passes' Finalize methods. virtual bool Finalize() { diff --git a/paddle/fluid/inference/analysis/pass_manager_tester.cc b/paddle/fluid/inference/analysis/pass_manager_tester.cc index 7af6a199514636224f0b8303abea7d398400d278..6caba8f04237e014c5ddf1a3a077bcbadb0ddb71 100644 --- a/paddle/fluid/inference/analysis/pass_manager_tester.cc +++ b/paddle/fluid/inference/analysis/pass_manager_tester.cc @@ -64,6 +64,7 @@ TEST_F(DFG_Tester, DFG_pass_manager) { manager.Register("graphviz", new DFG_GraphvizDrawPass(config)); manager.Register("dfg-to-fluid", new DataFlowGraphToFluidPass); + ASSERT_TRUE(&argument); ASSERT_TRUE(manager.Initialize(&argument)); manager.RunAll(); } diff --git a/paddle/fluid/inference/analysis/subgraph_splitter.cc b/paddle/fluid/inference/analysis/subgraph_splitter.cc index 43ccac96c84e987ad1f494af3e314c810fc1ffe3..389f9e1a9148a4daf0e5b751cce5cb6325252a4e 100644 --- a/paddle/fluid/inference/analysis/subgraph_splitter.cc +++ b/paddle/fluid/inference/analysis/subgraph_splitter.cc @@ -119,10 +119,12 @@ void SubGraphFuse::operator()() { ReplaceNodesWithSubGraphs(); } void SubGraphFuse::ReplaceNodesWithSubGraphs() { auto subgraphs = SubGraphSplitter(graph_, node_inside_subgraph_teller_)(); for (auto &subgraph : subgraphs) { + std::unordered_set subgraph_uniq(subgraph.begin(), subgraph.end()); // replace this sub-graph with the first node. Two steps: 1. Create a Block // Node that contains this subgraph 2. Mark the nodes inside the sub-graph // as deleted. 3. Replace the deleted node with the new Block Node. - auto *block_node = graph_->nodes.Create(Node::Type::kFunctionBlock); + auto *block_node = static_cast( + graph_->nodes.Create(Node::Type::kFunctionBlock)); auto io = ExtractInputAndOutputOfSubGraph(subgraph); block_node->inlinks = std::move(io.first); block_node->outlinks = std::move(io.second); @@ -130,21 +132,25 @@ void SubGraphFuse::ReplaceNodesWithSubGraphs() { // TODO(Superjomn) need a unified mechanism to treat deleted node in each // pass. node->SetDeleted(); + block_node->subgraph.push_back(node); } - std::unordered_map - delelte_node_map; // deleted node to BlockNode - for (auto *n : block_node->inlinks) { - n->inlinks.clear(); - } - for (auto *n : block_node->outlinks) { - n->outlinks.clear(); - } - for (auto *n : block_node->inlinks) { - n->outlinks.push_back(block_node); + // Change all the sub-graph's inputs and outputs corresponding inlink and + // outlink to this sub-graph node. + auto inlink_or_outlink_cleaner = [&](std::vector &nodes) { + for (auto *&n : nodes) { + if (subgraph_uniq.count(n)) { + n = block_node; + } + } + std::unordered_set uniq(nodes.begin(), nodes.end()); + nodes.assign(uniq.begin(), uniq.end()); + }; + for (auto *i : block_node->inlinks) { + inlink_or_outlink_cleaner(i->outlinks); } - for (auto *n : block_node->outlinks) { - n->inlinks.push_back(n); + for (auto *&o : block_node->outlinks) { + inlink_or_outlink_cleaner(o->inlinks); } } } diff --git a/paddle/fluid/inference/analysis/tensorrt_subgraph_node_mark_pass.cc b/paddle/fluid/inference/analysis/tensorrt_subgraph_node_mark_pass.cc new file mode 100644 index 0000000000000000000000000000000000000000..5ad092a9ed201e5e6ab7770bcfd9ddf871779c12 --- /dev/null +++ b/paddle/fluid/inference/analysis/tensorrt_subgraph_node_mark_pass.cc @@ -0,0 +1,78 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/inference/analysis/tensorrt_subgraph_node_mark_pass.h" +#include "paddle/fluid/inference/analysis/analyzer.h" +#include "paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.h" +#include "paddle/fluid/inference/analysis/node_attr_flags.h" + +namespace paddle { +namespace inference { +namespace analysis { + +void TensorRTSubgraphNodeMarkPass::Run(DataFlowGraph *graph) { + for (auto &node : graph->nodes.nodes()) { + node->attr(ATTR_supported_by_tensorrt).Bool() = teller_(node.get()); + } +} + +class DfgDebuggerPass : public DFG_GraphvizDrawPass { + public: + DfgDebuggerPass(const DFG_GraphvizDrawPass::Config &config) + : DFG_GraphvizDrawPass(config) {} + + std::string repr() const override { + return "tensorrt-subgraph-node-mark-debugger"; + } + + bool Finalize() override { return true; } + + protected: + std::string Draw(DataFlowGraph *graph) override { + Dot dot; + // Add nodes + for (size_t i = 0; i < graph->nodes.size(); i++) { + const Node &node = graph->nodes.Get(i); + if (config_.display_deleted_node || !node.deleted()) { + auto dot_attr = node.dot_attrs(); + if (node.attr(ATTR_supported_by_tensorrt).Bool()) { + dot_attr.assign( + {Dot::Attr{"color", "green"}, Dot::Attr{"style", "filled"}}); + } + dot.AddNode(node.repr(), dot_attr); + } + } + // Add edges + for (size_t i = 0; i < graph->nodes.size(); i++) { + const Node &node = graph->nodes.Get(i); + if (!config_.display_deleted_node && node.deleted()) continue; + for (auto &in : node.inlinks) { + if (!config_.display_deleted_node && in->deleted()) continue; + dot.AddEdge(in->repr(), node.repr(), {}); + } + } + return dot.Build(); + } +}; + +Pass *TensorRTSubgraphNodeMarkPass::CreateGraphvizDebugerPass() const { + DFG_GraphvizDrawPass::Config config( + FLAGS_inference_analysis_graphviz_log_root, "tensorrt_marked_node"); + return new DfgDebuggerPass(config); +} +bool TensorRTSubgraphNodeMarkPass::Finalize() { return true; } + +} // namespace analysis +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/analysis/tensorrt_subgraph_node_mark_pass.h b/paddle/fluid/inference/analysis/tensorrt_subgraph_node_mark_pass.h new file mode 100644 index 0000000000000000000000000000000000000000..6cfac55d3b7b501e8ccc141cb7309f1428478672 --- /dev/null +++ b/paddle/fluid/inference/analysis/tensorrt_subgraph_node_mark_pass.h @@ -0,0 +1,53 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/* + * This file defines TensorRTSubgraphNodeMarkPass which helps to mark the ops + * that supported by TensorRT engine. + */ +#include "paddle/fluid/inference/analysis/pass.h" +#include "paddle/fluid/inference/analysis/subgraph_splitter.h" + +namespace paddle { +namespace inference { +namespace analysis { + +/* + * Mark the operators that TensorRT engine supports. + */ +class TensorRTSubgraphNodeMarkPass : public DataFlowGraphPass { + public: + using teller_t = SubGraphSplitter::NodeInsideSubgraphTeller; + + TensorRTSubgraphNodeMarkPass(const teller_t& teller) : teller_(teller) {} + + bool Initialize(Argument* argument) override { return true; } + + // This class get a sub-graph as input and determine whether to transform this + // sub-graph into TensorRT. + void Run(DataFlowGraph* graph) override; + + std::string repr() const { return "tensorrt-sub-subgraph-mark"; } + std::string description() const { return "tensorrt sub-graph mark pass"; } + + Pass* CreateGraphvizDebugerPass() const override; + bool Finalize() override; + + private: + teller_t teller_; +}; + +} // namespace analysis +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/analysis/tensorrt_subgraph_node_mark_pass_tester.cc b/paddle/fluid/inference/analysis/tensorrt_subgraph_node_mark_pass_tester.cc new file mode 100644 index 0000000000000000000000000000000000000000..a6c15e848b99ca318f4583e3d4b88345fe8e5ebc --- /dev/null +++ b/paddle/fluid/inference/analysis/tensorrt_subgraph_node_mark_pass_tester.cc @@ -0,0 +1,50 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/inference/analysis/tensorrt_subgraph_node_mark_pass.h" + +#include +#include "paddle/fluid/inference/analysis/node_attr_flags.h" +#include "paddle/fluid/inference/analysis/ut_helper.h" + +namespace paddle { +namespace inference { +namespace analysis { + +TEST_F(DFG_Tester, tensorrt_subgraph_node_mark_pass) { + // init + FluidToDataFlowGraphPass pass; + ASSERT_TRUE(pass.Initialize(&argument)); + argument.main_dfg.reset(new DataFlowGraph); + pass.Run(argument.main_dfg.get()); + + TensorRTSubgraphNodeMarkPass::teller_t teller = [](const Node* node) { + return node->IsFunction() && + static_cast(node)->func_type() == "mul"; + }; + TensorRTSubgraphNodeMarkPass pass1(teller); + ASSERT_TRUE(pass1.Initialize(&argument)); + pass1.Run(argument.main_dfg.get()); + + int counter{0}; + for (auto& node : argument.main_dfg->nodes.nodes()) { + counter += node->attr(ATTR_supported_by_tensorrt).Bool(); + } + + LOG(INFO) << counter << " nodes marked"; +} + +} // namespace analysis +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/analysis/tensorrt_subgraph_pass.cc b/paddle/fluid/inference/analysis/tensorrt_subgraph_pass.cc index c7f40d43c922a328febd343cea7240fcb09f3d02..9993de22800bc0aafdcbf46618e6b479ac1eb187 100644 --- a/paddle/fluid/inference/analysis/tensorrt_subgraph_pass.cc +++ b/paddle/fluid/inference/analysis/tensorrt_subgraph_pass.cc @@ -24,7 +24,7 @@ TensorRTSubGraphPass::TensorRTSubGraphPass( : node_inside_subgraph_teller_(teller) {} void TensorRTSubGraphPass::Run(DataFlowGraph *graph) { - SubGraphFuse(graph, node_inside_subgraph_teller_); + SubGraphFuse(graph, node_inside_subgraph_teller_)(); } } // namespace analysis diff --git a/paddle/fluid/inference/analysis/tensorrt_subgraph_pass.h b/paddle/fluid/inference/analysis/tensorrt_subgraph_pass.h index 79e9e2bcc9e626a102dfdab6f1f50c8d58f9bbdd..11e088069538414c79371b920cb8fa1509b24bb1 100644 --- a/paddle/fluid/inference/analysis/tensorrt_subgraph_pass.h +++ b/paddle/fluid/inference/analysis/tensorrt_subgraph_pass.h @@ -38,6 +38,11 @@ class TensorRTSubGraphPass : public DataFlowGraphPass { // sub-graph into TensorRT. void Run(DataFlowGraph* graph) override; + bool Finalize() override { return true; } + + std::string repr() const { return "tensorrt-sub-graph"; } + std::string description() const { return "tensorrt sub graph pass"; } + private: NodeInsideSubgraphTeller node_inside_subgraph_teller_; }; diff --git a/paddle/fluid/inference/analysis/tensorrt_subgraph_pass_tester.cc b/paddle/fluid/inference/analysis/tensorrt_subgraph_pass_tester.cc index d12dcf0d0fe7f9354f7ed1aac924aeab3403e9b8..1d749d3fa3f39b351ccee6ebeb82467f7220a0b6 100644 --- a/paddle/fluid/inference/analysis/tensorrt_subgraph_pass_tester.cc +++ b/paddle/fluid/inference/analysis/tensorrt_subgraph_pass_tester.cc @@ -23,49 +23,48 @@ namespace paddle { namespace inference { namespace analysis { -DEFINE_string(model_dir, "", "inference test model dir"); +DEFINE_string(dot_dir, "./", ""); -TEST(TensorRTSubGraph, single_pass) { - auto desc = LoadProgramDesc(); - auto dfg = ProgramDescToDFG(desc); - - SubGraphSplitter::NodeInsideSubgraphTeller teller = [](const Node* node) { +TEST_F(DFG_Tester, tensorrt_single_pass) { + std::unordered_set teller_set( + {"elementwise_add", "mul", "sigmoid"}); + SubGraphSplitter::NodeInsideSubgraphTeller teller = [&](const Node* node) { if (node->type() != Node::Type::kFunction) return false; const auto* func = static_cast(node); - if (func->func_type() == "elementwise_add" || func->func_type() == "relu" || - func->func_type() == "conv2d" || func->func_type() == "mul" || - func->func_type() == "sigmoid" || func->func_type() == "softmax") { - LOG(INFO) << "sub-graph marked " << node->repr(); - return true; - } + if (teller_set.count(func->func_type())) return true; return false; }; - DFG_GraphvizDrawPass::Config config{"./", "test"}; - DFG_GraphvizDrawPass dfg_pass(config); - dfg_pass.Initialize(); - - DFG_GraphvizDrawPass dfg_pass1(config); - dfg_pass1.Initialize(); - - dfg_pass.Run(&dfg); + LOG(INFO) << "init"; + DFG_GraphvizDrawPass::Config config{FLAGS_dot_dir, "origin"}; + DFG_GraphvizDrawPass::Config config1{FLAGS_dot_dir, "fusion"}; + DFG_GraphvizDrawPass dfg_pass(config); + DFG_GraphvizDrawPass dfg_pass1(config1); + FluidToDataFlowGraphPass pass0; TensorRTSubGraphPass trt_pass(std::move(teller)); - trt_pass.Initialize(); - trt_pass.Run(&dfg); + LOG(INFO) << "Initialize"; + dfg_pass.Initialize(&argument); + dfg_pass1.Initialize(&argument); + pass0.Initialize(&argument); + trt_pass.Initialize(&argument); - dfg_pass1.Run(&dfg); + LOG(INFO) << "Run"; + argument.main_dfg.reset(new DataFlowGraph); + pass0.Run(argument.main_dfg.get()); + dfg_pass.Run(argument.main_dfg.get()); + trt_pass.Run(argument.main_dfg.get()); + dfg_pass1.Run(argument.main_dfg.get()); // Check the TRT op's block desc - for (auto node : dfg.nodes.nodes()) { + for (auto& node : argument.main_dfg->nodes.nodes()) { if (node->IsFunctionBlock()) { + LOG(INFO) << "get function block"; } } } -TEST(TensorRTSubGraph, pass_manager) {} - } // namespace analysis } // namespace inference } // namespace paddle diff --git a/paddle/fluid/operators/CMakeLists.txt b/paddle/fluid/operators/CMakeLists.txt index 4c338c67d34fa229de17019ce97e8b8dc39ea737..9dc39ad0ddf8c5de3e1960a1171431e026de35ae 100644 --- a/paddle/fluid/operators/CMakeLists.txt +++ b/paddle/fluid/operators/CMakeLists.txt @@ -226,7 +226,8 @@ op_library(sequence_softmax_op DEPS softmax) if (WITH_GPU AND TENSORRT_FOUND) op_library(tensorrt_engine_op DEPS tensorrt_engine) nv_test(test_tensorrt_engine_op SRCS tensorrt_engine_op_test.cc - DEPS tensorrt_engine_op tensorrt_engine tensorrt_converter) + DEPS tensorrt_engine_op tensorrt_engine tensorrt_converter + analysis) else() set(DEPS_OPS ${DEPS_OPS} tensorrt_engine_op) endif() diff --git a/paddle/fluid/operators/adam_op.cc b/paddle/fluid/operators/adam_op.cc index 6ee73c3000fb45b4e1cd5bbb730da7d61b494b6f..5d670fe3b9d99a31a628ff707ff860564eca952e 100644 --- a/paddle/fluid/operators/adam_op.cc +++ b/paddle/fluid/operators/adam_op.cc @@ -56,9 +56,12 @@ class AdamOp : public framework::OperatorWithKernel { "Beta2 power accumulator should have 1 dimension"); auto param_dims = ctx->GetInputDim("Param"); - PADDLE_ENFORCE_EQ( - param_dims, ctx->GetInputDim("Grad"), - "Param and Grad input of AdamOp should have same dimension"); + if (ctx->GetInputsVarType("Grad")[0] == + framework::proto::VarType::LOD_TENSOR) { + PADDLE_ENFORCE_EQ( + param_dims, ctx->GetInputDim("Grad"), + "Param and Grad input of AdamOp should have same dimension"); + } PADDLE_ENFORCE_EQ( param_dims, ctx->GetInputDim("Moment1"), "Param and Moment1 input of AdamOp should have same dimension"); diff --git a/paddle/fluid/operators/adam_op.h b/paddle/fluid/operators/adam_op.h index f82ff47b52490c354f383515d430d14e24cbf6af..a7a28b02b67f2ef180ec0e273dbe7ef555f88ce2 100644 --- a/paddle/fluid/operators/adam_op.h +++ b/paddle/fluid/operators/adam_op.h @@ -282,6 +282,10 @@ class AdamOpKernel : public framework::OpKernel { } else if (grad_var->IsType()) { auto& grad = Ref(ctx.Input("Grad"), "Must set Grad"); + if (grad.rows().size() == 0) { + VLOG(3) << "grad row size is 0!!"; + return; + } // merge duplicated rows if any. scatter::MergeAdd merge_func; auto grad_merge = diff --git a/paddle/fluid/operators/average_accumulates_op.cc b/paddle/fluid/operators/average_accumulates_op.cc index 25864e95d7e290c7f684501893e99c828c511979..f389eab605e087c535b9918264e6502217062505 100644 --- a/paddle/fluid/operators/average_accumulates_op.cc +++ b/paddle/fluid/operators/average_accumulates_op.cc @@ -19,28 +19,28 @@ namespace operators { template <> void GetAccumulators( - const framework::ExecutionContext& ctx, int64_t* num_updates_, - int64_t* num_accumulates_, int64_t* old_num_accumulates_) { + const framework::ExecutionContext& ctx, int64_t* num_updates, + int64_t* num_accumulates, int64_t* old_num_accumulates) { auto* in_old_num_accumulates = ctx.Input("in_old_num_accumulates"); auto* in_num_accumulates = ctx.Input("in_num_accumulates"); auto* in_num_updates = ctx.Input("in_num_updates"); - *old_num_accumulates_ = in_old_num_accumulates->data()[0]; - *num_accumulates_ = in_num_accumulates->data()[0]; - *num_updates_ = in_num_updates->data()[0]; + *old_num_accumulates = in_old_num_accumulates->data()[0]; + *num_accumulates = in_num_accumulates->data()[0]; + *num_updates = in_num_updates->data()[0]; } template <> void SetAccumulators( - const framework::ExecutionContext& ctx, int64_t num_updates_, - int64_t num_accumulates_, int64_t old_num_accumulates_) { + const framework::ExecutionContext& ctx, int64_t num_updates, + int64_t num_accumulates, int64_t old_num_accumulates) { auto* out_old_num_accumulates = ctx.Output("out_old_num_accumulates"); auto* out_num_accumulates = ctx.Output("out_num_accumulates"); auto* out_num_updates = ctx.Output("out_num_updates"); - out_old_num_accumulates->data()[0] = old_num_accumulates_; - out_num_accumulates->data()[0] = num_accumulates_; - out_num_updates->data()[0] = num_updates_; + out_old_num_accumulates->data()[0] = old_num_accumulates; + out_num_accumulates->data()[0] = num_accumulates; + out_num_updates->data()[0] = num_updates; } class AverageAccumulatesOp : public framework::OperatorWithKernel { @@ -177,7 +177,7 @@ class AverageAccumulatesOpMaker : public framework::OpProtoAndCheckerMaker { AddComment(R"DOC( AverageAccumulates Operator. -Accumulate the sum of parameter whtin sliding window. The size of sliding window is +Accumulate the sum of parameter within sliding window. The size of sliding window is determined by 'average_window', 'max_average_window' and 'min_average_window'. Memory was shared by Input(in_sum_1) and Output(out_sum_1) which acts as an accumulator 'sum_1'. 'sum_2', 'sum_3', 'num_accumulates', 'old_num_accumulates' and 'num_updates' were the same as 'sum_1'. diff --git a/paddle/fluid/operators/average_accumulates_op.h b/paddle/fluid/operators/average_accumulates_op.h index 07ac5ced11605f6d0d5164d1c0f69acbd7bbed60..3958d3f685470f2505abf0e8bfd269d3834970ae 100644 --- a/paddle/fluid/operators/average_accumulates_op.h +++ b/paddle/fluid/operators/average_accumulates_op.h @@ -54,8 +54,9 @@ class AverageAccumulatesKernel : public framework::OpKernel { float average_window = ctx.Attr("average_window"); int64_t max_average_window = ctx.Attr("max_average_window"); int64_t min_average_window = ctx.Attr("min_average_window"); - min_average_window = - std::min(min_average_window, max_average_window); + PADDLE_ENFORCE_LE(min_average_window, max_average_window, + "min_average_window shouldn't be larger than " + "max_average_window"); // Get inputs auto* param = ctx.Input("param"); diff --git a/paddle/fluid/operators/fill_zeros_like_op.cc b/paddle/fluid/operators/fill_zeros_like_op.cc index d67bec36b3248be8602da562a88aeb58f5effe39..a9d47c017275193cdacc7db8f31e8e874b9b84de 100644 --- a/paddle/fluid/operators/fill_zeros_like_op.cc +++ b/paddle/fluid/operators/fill_zeros_like_op.cc @@ -26,8 +26,12 @@ class FillZerosLikeOp : public framework::OperatorWithKernel { "Input(X) of FillZerosLikeOp should not be null."); PADDLE_ENFORCE(ctx->HasOutput("Out"), "Output(Out) of FillZerosLikeOp should not be null."); - ctx->SetOutputDim("Out", ctx->GetInputDim("X")); - ctx->ShareLoD("X", /*->*/ "Out"); + + if (ctx->IsRuntime() && + ctx->GetOutputsVarType("Out")[0] == + framework::proto::VarType::LOD_TENSOR_ARRAY) { + return; // skip runtime infershape when is tensor array; + } } }; @@ -39,7 +43,7 @@ class FillZerosLikeOpMaker : public framework::OpProtoAndCheckerMaker { AddComment(R"DOC( FillZerosLike Operator. -Fill up a variable with zeros. +Fill up a variable with zeros, supporting both LoDTensor and LoDTensorArray. The output will have the same size as the input. )DOC"); diff --git a/paddle/fluid/operators/fill_zeros_like_op.h b/paddle/fluid/operators/fill_zeros_like_op.h index 4bbe0df6b6890122381c87494e510cf125792377..daa6521b32e583f733bc040afb61bf13c4236731 100644 --- a/paddle/fluid/operators/fill_zeros_like_op.h +++ b/paddle/fluid/operators/fill_zeros_like_op.h @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once +#include "paddle/fluid/framework/lod_tensor_array.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/math/math_function.h" @@ -23,12 +24,29 @@ template class FillZerosLikeKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto* out = context.Output("Out"); - out->mutable_data(context.GetPlace()); - - math::SetConstant setter; - setter(context.template device_context(), out, - static_cast(0)); + auto var = context.InputVar("X"); + if (var->IsType()) { + auto& input = *context.Input("X"); + auto& output = *context.Output("Out"); + output.Resize(input.dims()); + output.set_lod(input.lod()); + output.mutable_data(context.GetPlace()); + math::SetConstant setter; + setter(context.template device_context(), &(output), + static_cast(0)); + } else if (var->IsType()) { + auto& input = *context.Input("X"); + auto& output = *context.Output("Out"); + output.resize(input.size()); + for (auto i = 0; i < input.size(); i++) { + output[i].Resize(input[i].dims()); + output[i].set_lod(input[i].lod()); + output[i].mutable_data(context.GetPlace()); + math::SetConstant setter; + setter(context.template device_context(), &(output[i]), + static_cast(0)); + } + } } }; diff --git a/paddle/fluid/operators/tensorrt_engine_op.h b/paddle/fluid/operators/tensorrt_engine_op.h index 295d6ba0395b68cabab3bd4117cedd912df48f5d..1602a913aeebe43fabe2f9c9036edd18ac4c70fd 100644 --- a/paddle/fluid/operators/tensorrt_engine_op.h +++ b/paddle/fluid/operators/tensorrt_engine_op.h @@ -53,6 +53,7 @@ template class TensorRTEngineKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { + VLOG(4) << "TensorRTEngineKernel executing"; auto engine_name = context.Attr("engine_uniq_key"); if (!Singleton::Global().HasEngine(engine_name)) { Prepare(context); diff --git a/paddle/fluid/operators/tensorrt_engine_op_test.cc b/paddle/fluid/operators/tensorrt_engine_op_test.cc index 358e2d151bb8f990503ea8a51ba5f81e0a1dc816..82a16361e40513aeaf6f510e450f58989369fcdb 100644 --- a/paddle/fluid/operators/tensorrt_engine_op_test.cc +++ b/paddle/fluid/operators/tensorrt_engine_op_test.cc @@ -19,6 +19,7 @@ limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/framework/scope.h" +#include "paddle/fluid/inference/analysis/helper.h" #include "paddle/fluid/inference/tensorrt/convert/op_converter.h" #include "paddle/fluid/inference/tensorrt/convert/ut_helper.h" @@ -51,48 +52,10 @@ void AddTensorToBlockDesc(framework::proto::BlockDesc* block, *var = *desc.Proto(); } -template -void SetAttr(framework::proto::OpDesc* op, const std::string& name, - const T& data); - -template <> -void SetAttr(framework::proto::OpDesc* op, const std::string& name, - const std::string& data) { - auto* attr = op->add_attrs(); - attr->set_name(name); - attr->set_type(paddle::framework::proto::AttrType::STRING); - attr->set_s(data); -} -template <> -void SetAttr(framework::proto::OpDesc* op, const std::string& name, - const int& data) { - auto* attr = op->add_attrs(); - attr->set_name(name); - attr->set_type(paddle::framework::proto::AttrType::INT); - attr->set_i(data); -} -template <> -void SetAttr(framework::proto::OpDesc* op, const std::string& name, - const int64_t& data) { - auto* attr = op->add_attrs(); - attr->set_name(name); - attr->set_type(paddle::framework::proto::AttrType::LONG); - attr->set_l(data); -} -template <> -void SetAttr>(framework::proto::OpDesc* op, - const std::string& name, - const std::vector& data) { - auto* attr = op->add_attrs(); - attr->set_name(name); - attr->set_type(paddle::framework::proto::AttrType::STRINGS); - for (const auto& s : data) { - attr->add_strings(s.c_str()); - } -} - } // namespace +using inference::analysis::SetAttr; + TEST(TensorRTEngineOp, manual) { framework::ProgramDesc program; auto* block_ = program.Proto()->add_blocks(); diff --git a/paddle/scripts/paddle_build.sh b/paddle/scripts/paddle_build.sh index 017ac9d3ecb2c98c7343793c78c0747d3f5cbb6d..b66a05aaebda645196721fd6ed840e5584813348 100755 --- a/paddle/scripts/paddle_build.sh +++ b/paddle/scripts/paddle_build.sh @@ -107,6 +107,7 @@ function cmake_gen() { -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DWITH_CONTRIB=${WITH_CONTRIB:-ON} -DWITH_ANAKIN=${WITH_ANAKIN:-ON} + -DWITH_INFERENCE_DEMO=${WITH_INFERENCE_DEMO:-ON} ======================================== EOF # Disable UNITTEST_USE_VIRTUALENV in docker because @@ -134,7 +135,8 @@ EOF -DWITH_FLUID_ONLY=${WITH_FLUID_ONLY:-OFF} \ -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \ -DWITH_CONTRIB=${WITH_CONTRIB:-ON} \ - -DWITH_ANAKIN=${WITH_ANAKIN:-ON} + -DWITH_ANAKIN=${WITH_ANAKIN:-ON} \ + -DWITH_INFERENCE_DEMO=${WITH_INFERENCE_DEMO:-ON} } function abort(){ diff --git a/python/paddle/dataset/mnist.py b/python/paddle/dataset/mnist.py index 6a1b8b5fac223c0d134cae69a61a0c2c00bc1feb..9d05aeeb95c4f936cb773ece20407ecb32cbbf21 100644 --- a/python/paddle/dataset/mnist.py +++ b/python/paddle/dataset/mnist.py @@ -111,7 +111,7 @@ def fetch(): paddle.dataset.common.download(TRAIN_IMAGE_URL, 'mnist', TRAIN_IMAGE_MD5) paddle.dataset.common.download(TRAIN_LABEL_URL, 'mnist', TRAIN_LABEL_MD5) paddle.dataset.common.download(TEST_IMAGE_URL, 'mnist', TEST_IMAGE_MD5) - paddle.dataset.common.download(TEST_LABEL_URL, 'mnist', TRAIN_LABEL_MD5) + paddle.dataset.common.download(TEST_LABEL_URL, 'mnist', TEST_LABEL_MD5) def convert(path): diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index 64f48e259ad96b510059f25de3f03ede169b2b81..bc379da4e3b72cc8cf59e1d2e090e75e5a323e4b 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -95,6 +95,7 @@ __all__ = [ 'relu', 'log', 'crop', + 'fill_zeros_like', ] @@ -5184,3 +5185,40 @@ def crop(x, shape=None, offsets=None, name=None): outputs={'Out': out}, attrs=None if len(attrs) == 0 else attrs) return out + + +def fill_zeros_like(x): + """ + This layer takes an input and outputs a variable that has the same structure as + the input and with all the element values as zero. The variable can be a Tensor + or TensorArray. + + .. code-block:: text + + + Given + X = [[0, 1, 2, 0], + [0, 3, 4, 0], + [0, 0, 0, 0]], + output is: + Out = [[0, 0, 0, 0], + [0, 0, 0, 0], + [0, 0, 0, 0]]. + + Args: + x (Variable): The input variable, which could be a tensor or tensor array + + Returns: + Variable: The zero-filled variable, which has the same type and shape as + the input variable. + + Examples: + + .. code-block:: python + y = fluid.layers.fill_zeros_like(x) + """ + helper = LayerHelper('fill_zeros_like', **locals()) + out = helper.create_tmp_variable(dtype=x.dtype) + helper.append_op( + type='fill_zeros_like', inputs={'X': [x]}, outputs={'Out': [out]}) + return out diff --git a/python/paddle/fluid/tests/unittests/CMakeLists.txt b/python/paddle/fluid/tests/unittests/CMakeLists.txt index 219ab9bc2cc74a3c16f7bda69d4d782283574d7e..5f27864c140573086d07415f83caca708889a068 100644 --- a/python/paddle/fluid/tests/unittests/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/CMakeLists.txt @@ -51,3 +51,4 @@ py_test_modules(test_dist_train MODULES test_dist_train SERIAL) py_test_modules(test_parallel_executor_crf MODULES test_parallel_executor_crf SERIAL) py_test_modules(test_parallel_executor_fetch_feed MODULES test_parallel_executor_fetch_feed SERIAL) set_tests_properties(test_listen_and_serv_op PROPERTIES TIMEOUT 20) +set_tests_properties(test_dist_mnist PROPERTIES TIMEOUT 180) diff --git a/python/paddle/fluid/tests/unittests/test_dist_mnist.py b/python/paddle/fluid/tests/unittests/test_dist_mnist.py new file mode 100644 index 0000000000000000000000000000000000000000..ad2d57f7c5f127be87e963508e1dd150fdd30225 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_dist_mnist.py @@ -0,0 +1,210 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +import argparse +import time +import math + +import paddle +import paddle.fluid as fluid +import paddle.fluid.profiler as profiler +from paddle.fluid import core +import unittest +from multiprocessing import Process +import os +import signal + +SEED = 1 +DTYPE = "float32" +paddle.dataset.mnist.fetch() + + +# random seed must set before configuring the network. +# fluid.default_startup_program().random_seed = SEED +def cnn_model(data): + conv_pool_1 = fluid.nets.simple_img_conv_pool( + input=data, + filter_size=5, + num_filters=20, + pool_size=2, + pool_stride=2, + act="relu") + conv_pool_2 = fluid.nets.simple_img_conv_pool( + input=conv_pool_1, + filter_size=5, + num_filters=50, + pool_size=2, + pool_stride=2, + act="relu") + + # TODO(dzhwinter) : refine the initializer and random seed settting + SIZE = 10 + input_shape = conv_pool_2.shape + param_shape = [reduce(lambda a, b: a * b, input_shape[1:], 1)] + [SIZE] + scale = (2.0 / (param_shape[0]**2 * SIZE))**0.5 + + predict = fluid.layers.fc( + input=conv_pool_2, + size=SIZE, + act="softmax", + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.NormalInitializer( + loc=0.0, scale=scale))) + return predict + + +def get_model(batch_size): + # Input data + images = fluid.layers.data(name='pixel', shape=[1, 28, 28], dtype=DTYPE) + label = fluid.layers.data(name='label', shape=[1], dtype='int64') + + # Train program + predict = cnn_model(images) + cost = fluid.layers.cross_entropy(input=predict, label=label) + avg_cost = fluid.layers.mean(x=cost) + + # Evaluator + batch_size_tensor = fluid.layers.create_tensor(dtype='int64') + batch_acc = fluid.layers.accuracy( + input=predict, label=label, total=batch_size_tensor) + + inference_program = fluid.default_main_program().clone() + # Optimization + opt = fluid.optimizer.AdamOptimizer( + learning_rate=0.001, beta1=0.9, beta2=0.999) + + # Reader + train_reader = paddle.batch( + paddle.dataset.mnist.train(), batch_size=batch_size) + test_reader = paddle.batch( + paddle.dataset.mnist.test(), batch_size=batch_size) + opt.minimize(avg_cost) + return inference_program, avg_cost, train_reader, test_reader, batch_acc, predict + + +def get_transpiler(trainer_id, main_program, pserver_endpoints, trainers): + t = fluid.DistributeTranspiler() + t.transpile( + trainer_id=trainer_id, + program=main_program, + pservers=pserver_endpoints, + trainers=trainers) + return t + + +def run_pserver(pserver_endpoints, trainers, current_endpoint): + get_model(batch_size=20) + t = get_transpiler(0, + fluid.default_main_program(), pserver_endpoints, + trainers) + pserver_prog = t.get_pserver_program(current_endpoint) + startup_prog = t.get_startup_program(current_endpoint, pserver_prog) + + place = fluid.CPUPlace() + exe = fluid.Executor(place) + exe.run(startup_prog) + + exe.run(pserver_prog) + + +class TestDistMnist(unittest.TestCase): + def setUp(self): + self._trainers = 1 + self._pservers = 1 + self._ps_endpoints = "127.0.0.1:9123" + + def start_pserver(self, endpoint): + p = Process( + target=run_pserver, + args=(self._ps_endpoints, self._trainers, endpoint)) + p.start() + return p.pid + + def _wait_ps_ready(self, pid): + retry_times = 5 + while True: + assert retry_times >= 0, "wait ps ready failed" + time.sleep(1) + try: + # the listen_and_serv_op would touch a file which contains the listen port + # on the /tmp directory until it was ready to process all the RPC call. + os.stat("/tmp/paddle.%d.port" % pid) + return + except os.error: + retry_times -= 1 + + def stop_pserver(self, pid): + os.kill(pid, signal.SIGTERM) + + def test_with_place(self): + p = fluid.CUDAPlace(0) if core.is_compiled_with_cuda( + ) else fluid.CPUPlace() + + pserver_pid = self.start_pserver(self._ps_endpoints) + self._wait_ps_ready(pserver_pid) + + self.run_trainer(p, 0) + + self.stop_pserver(pserver_pid) + + def run_trainer(self, place, trainer_id): + test_program, avg_cost, train_reader, test_reader, batch_acc, predict = get_model( + batch_size=20) + t = get_transpiler(trainer_id, + fluid.default_main_program(), self._ps_endpoints, + self._trainers) + + trainer_prog = t.get_trainer_program() + + exe = fluid.Executor(place) + exe.run(fluid.default_startup_program()) + + feed_var_list = [ + var for var in trainer_prog.global_block().vars.itervalues() + if var.is_data + ] + + feeder = fluid.DataFeeder(feed_var_list, place) + for pass_id in xrange(10): + for batch_id, data in enumerate(train_reader()): + exe.run(trainer_prog, feed=feeder.feed(data)) + + if (batch_id + 1) % 10 == 0: + acc_set = [] + avg_loss_set = [] + for test_data in test_reader(): + acc_np, avg_loss_np = exe.run( + program=test_program, + feed=feeder.feed(test_data), + fetch_list=[batch_acc, avg_cost]) + acc_set.append(float(acc_np)) + avg_loss_set.append(float(avg_loss_np)) + # get test acc and loss + acc_val = np.array(acc_set).mean() + avg_loss_val = np.array(avg_loss_set).mean() + if float(acc_val + ) > 0.8: # Smaller value to increase CI speed + return + else: + print( + 'PassID {0:1}, BatchID {1:04}, Test Loss {2:2.2}, Acc {3:2.2}'. + format(pass_id, batch_id + 1, + float(avg_loss_val), float(acc_val))) + if math.isnan(float(avg_loss_val)): + assert ("got Nan loss, training failed.") + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_fill_zeros_like_op_for_array.py b/python/paddle/fluid/tests/unittests/test_fill_zeros_like_op_for_array.py new file mode 100644 index 0000000000000000000000000000000000000000..23871508d8042ade5253c2f0b3bc9f32ec71a135 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_fill_zeros_like_op_for_array.py @@ -0,0 +1,88 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import paddle.fluid.core as core +import numpy +import paddle.fluid.layers as layers +from paddle.fluid.framework import Program, program_guard +from paddle.fluid.executor import Executor + +import paddle.fluid as fluid +import paddle.fluid.core as core + + +class TestFillZerosLikeOpForTensorArray(unittest.TestCase): + def place(self): + return core.CPUPlace() + + def test_zero_filling_lod_tensor_array(self): + tensor = core.LoDTensor() + tensor.set( + numpy.arange(20).reshape(20, 1).astype('int32'), self.place()) + tensor.set_lod([[0, 2, 5], [0, 3, 9, 11, 17, 20]]) + + expect = [ + numpy.array( + [0, 0, 0, 0, 0], dtype='int32'), numpy.array( + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype='int32'), + numpy.array( + [0, 0, 0], dtype='int32') + ] + + lod = [[[0, 2, 5]], [[0, 6, 12]], [[0, 3]]] + self.main( + tensor=tensor, + expect_array=expect, + expect_lod=lod, + expect_max_len=3) + + def main(self, tensor, expect_array, expect_lod, expect_max_len, level=0): + place = self.place() + program = Program() + with program_guard(program): + x = layers.data(name='x', shape=[10]) + x.persistable = True + table = layers.lod_rank_table(x, level=level) + max_len = layers.max_sequence_len(table) + max_len.persistable = True + array = layers.lod_tensor_to_array(x, table) + array = layers.fill_zeros_like(array) + array.persistable = True + + result = layers.array_to_lod_tensor(array, table) + result.persistable = True + exe = Executor(place) + scope = core.Scope() + exe.run(program, feed={'x': tensor}, scope=scope) + var = scope.find_var(array.name) + array = var.get_lod_tensor_array() + if expect_array is not None and expect_lod is not None: + self.check_array_same(array, expect_array, expect_lod) + + self.assertEqual( + numpy.array(scope.find_var(max_len.name).get_tensor())[0], + expect_max_len) + + def check_array_same(self, array, expect_tensor, expect_lod): + self.assertEqual(len(expect_tensor), len(array)) + for i, exp in enumerate(zip(expect_tensor, expect_lod)): + exp_tensor, exp_lod = exp + exp_tensor = numpy.expand_dims(exp_tensor, axis=1) + self.assertTrue(numpy.allclose(exp_tensor, numpy.array(array[i]))) + self.assertEqual(exp_lod, array[i].lod()) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/trainer.py b/python/paddle/fluid/trainer.py index f191ef7df5caa04537e69ad9a0e018d161cd59ad..b6e0241265b18377874efb0d223441994b4650d0 100644 --- a/python/paddle/fluid/trainer.py +++ b/python/paddle/fluid/trainer.py @@ -315,7 +315,7 @@ class Trainer(object): for ip in worker_ips.split(","): worker_endpoints.append(':'.join([ip, port])) self.num_trainers = len(worker_endpoints) - current_endpoint = os.getenv("POD_IP") + ":" + port + current_endpoint = os.getenv("PADDLE_CURRENT_IP") + ":" + port worker_endpoints.remove(current_endpoint) # TODO(wuyi): use self.nccl_id_var, self.num_trainers and self.trainer_id # in ParallelExecutor to start diff --git a/python/paddle/v2/dataset/mnist.py b/python/paddle/v2/dataset/mnist.py index 9f675bed895223e054cd3bb6e504fe1607f19858..2b959c48e4bc62e08f6f57981b61b7c5fe3a1d06 100644 --- a/python/paddle/v2/dataset/mnist.py +++ b/python/paddle/v2/dataset/mnist.py @@ -112,7 +112,7 @@ def fetch(): paddle.v2.dataset.common.download(TRAIN_IMAGE_URL, 'mnist', TRAIN_IMAGE_MD5) paddle.v2.dataset.common.download(TRAIN_LABEL_URL, 'mnist', TRAIN_LABEL_MD5) paddle.v2.dataset.common.download(TEST_IMAGE_URL, 'mnist', TEST_IMAGE_MD5) - paddle.v2.dataset.common.download(TEST_LABEL_URL, 'mnist', TRAIN_LABEL_MD5) + paddle.v2.dataset.common.download(TEST_LABEL_URL, 'mnist', TEST_LABEL_MD5) def convert(path):