diff --git a/paddle/fluid/framework/naive_executor.cc b/paddle/fluid/framework/naive_executor.cc index 57e9a175b16f2406e9bf9c29eaacadaed30fa9f5..6c0daef26ffc8c76246e966ea0110014e844db27 100644 --- a/paddle/fluid/framework/naive_executor.cc +++ b/paddle/fluid/framework/naive_executor.cc @@ -15,8 +15,11 @@ #include "paddle/fluid/framework/naive_executor.h" #include +#include +#include #include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/framework/scope.h" #include "paddle/fluid/framework/variable_helper.h" #include "paddle/fluid/platform/denormal.h" #ifdef PADDLE_WITH_MKLDNN @@ -61,12 +64,31 @@ void NaiveExecutor::Run() { #ifdef PADDLE_WITH_INFERENCE_NVTX platform::CudaNvtxRangePush(op->Type(), platform::NvtxRangeColor::Green); #endif + + // According to reuse table, we share the out tensor's holder. + if (reuse_cache_.count(op.get())) { + for (auto &it : reuse_cache_[op.get()]) { + it.first->ShareBufferWith(*cluster_buffer_[it.second]); + } + } + op->Run(*scope_, place_); + + // Update the shared_holder so that only records the max one. + if (reuse_cache_.count(op.get())) { + for (auto &it : reuse_cache_[op.get()]) { + if (it.first->memory_size() > + cluster_buffer_[it.second]->memory_size()) { + cluster_buffer_[it.second] = it.first; + } + } + } + #ifdef PADDLE_WITH_INFERENCE_NVTX platform::CudaNvtxRangePop(); #endif - if (hookfunc_) { - hookfunc_(op.get()); + for (auto &func : hookfunc_) { + func(op.get()); } } #ifdef PADDLE_WITH_INFERENCE_NVTX @@ -146,7 +168,46 @@ phi::DenseTensor *NaiveExecutor::FindTensor(const std::string &name) { } void NaiveExecutor::RegisterOutputHook(const HookFunc &hookfunc) { - hookfunc_ = hookfunc; + hookfunc_.push_back(hookfunc); +} + +void NaiveExecutor::MakeReusePlan( + const std::unordered_map &reuse_table) { + std::unordered_map> clusters; + for (auto &it : reuse_table) { + clusters[it.second].insert(it.first); + } + + std::vector cluster_names; + for (auto &it : clusters) { + cluster_names.push_back(it.first); + } + cluster_buffer_.resize(cluster_names.size()); + + for (auto &op : ops_) { + for (auto &name : op->OutputVars(true)) { + if (reuse_table.count(name)) { + const auto &reuse_name = reuse_table.at(name); + auto it = + std::find(cluster_names.begin(), cluster_names.end(), reuse_name); + int idx = it - cluster_names.begin(); + auto *var = scope_->FindVar(name); + auto *reuse_var = scope_->FindVar(reuse_name); + if (var && reuse_var && var->IsType() && + reuse_var->IsType()) { + auto *tensor = var->GetMutable(); + auto *reuse_tensor = reuse_var->GetMutable(); + cluster_buffer_[idx] = reuse_tensor; + if (reuse_cache_.count(op.get())) { + reuse_cache_[op.get()].emplace(tensor, idx); + } else { + reuse_cache_[op.get()] = + std::unordered_map{{tensor, idx}}; + } + } + } + } + } } NaiveExecutor::~NaiveExecutor() { diff --git a/paddle/fluid/framework/naive_executor.h b/paddle/fluid/framework/naive_executor.h index 882f50b451a2957ce9ee4e2696d13f6679a72d2c..f1a4a036cde36b8dd83836e40e8a1c040ce47db9 100644 --- a/paddle/fluid/framework/naive_executor.h +++ b/paddle/fluid/framework/naive_executor.h @@ -17,6 +17,7 @@ #include #include #include +#include #include #include "paddle/fluid/framework/operator.h" @@ -67,6 +68,9 @@ class NaiveExecutor { Scope* GetScope() { return scope_; } + void MakeReusePlan( + const std::unordered_map& reuse_table); + void ResetTrtOps(int num); void RegisterOutputHook(const HookFunc& hookfunc); @@ -82,7 +86,12 @@ class NaiveExecutor { std::vector> ops_; Scope* scope_{nullptr}; - HookFunc hookfunc_{nullptr}; + std::vector hookfunc_; + + // Record information that tensor_a should ShareBufferWith tensor_b. + std::unordered_map> + reuse_cache_; + std::vector cluster_buffer_; }; } // namespace framework diff --git a/paddle/fluid/inference/analysis/CMakeLists.txt b/paddle/fluid/inference/analysis/CMakeLists.txt index 10d67c69f134482aa4d287e20e57d10c0bd617c8..06c4a55c5c9b694bb891a90f46663cf1087d43cc 100644 --- a/paddle/fluid/inference/analysis/CMakeLists.txt +++ b/paddle/fluid/inference/analysis/CMakeLists.txt @@ -20,7 +20,7 @@ cc_library( cc_library( ir_pass_manager - SRCS ir_pass_manager.cc + SRCS ir_pass_manager.cc pass_result_info.cc DEPS graph pass ${INFER_IR_PASSES} analysis_helper) cc_library( diff --git a/paddle/fluid/inference/analysis/argument.h b/paddle/fluid/inference/analysis/argument.h index fd5ba90eefb3fb8c81dd136197d2ec0e6ece323f..a8d1067c55471507bf783b53e1ab078d6a5d11ff 100644 --- a/paddle/fluid/inference/analysis/argument.h +++ b/paddle/fluid/inference/analysis/argument.h @@ -139,6 +139,7 @@ struct Argument { unique_ptr_t field__##_; DECL_ARGUMENT_FIELD(predictor_id, PredictorID, int); + DECL_ARGUMENT_FIELD(root_predictor_id, RootPredictorID, int); // Model path DECL_ARGUMENT_FIELD(model_dir, ModelDir, std::string); // Model specified with program and parameters files. diff --git a/paddle/fluid/inference/analysis/ir_pass_manager.cc b/paddle/fluid/inference/analysis/ir_pass_manager.cc index 862a019da6d57ca617ee5713bbe4e2474aa395df..b31f28a6a602f969cb8990641cd633ccdd71a0a4 100644 --- a/paddle/fluid/inference/analysis/ir_pass_manager.cc +++ b/paddle/fluid/inference/analysis/ir_pass_manager.cc @@ -229,6 +229,8 @@ void IRPassManager::CreatePasses(Argument *argument, argument->dlnne_input_shape_dict())); pass->Set("program", new framework::ProgramDesc *(&argument->main_program())); + } else if (pass_name == "memory_optimize_pass") { + pass->Set("root_predictor_id", new int(argument->root_predictor_id())); } if (pass_name == "lite_subgraph_pass") { bool lite_enable_int8 = diff --git a/paddle/fluid/inference/analysis/pass_result_info.cc b/paddle/fluid/inference/analysis/pass_result_info.cc new file mode 100644 index 0000000000000000000000000000000000000000..d22d208588f334a72ad1f9a22d93d206e86f1c69 --- /dev/null +++ b/paddle/fluid/inference/analysis/pass_result_info.cc @@ -0,0 +1,15 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/inference/analysis/pass_result_info.h" diff --git a/paddle/fluid/inference/analysis/pass_result_info.h b/paddle/fluid/inference/analysis/pass_result_info.h new file mode 100644 index 0000000000000000000000000000000000000000..7e42573e959119c998babd694aa26f2102fd3ec3 --- /dev/null +++ b/paddle/fluid/inference/analysis/pass_result_info.h @@ -0,0 +1,66 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#include +#include +#include "paddle/phi/core/enforce.h" +#include "paddle/utils/variant.h" + +namespace paddle { +namespace inference { +namespace analysis { + +class PassResultInfoForRuntime { + public: + using PassInfo = + paddle::variant, + std::unordered_map>; + + static PassResultInfoForRuntime* Instance() { + static PassResultInfoForRuntime info; + return &info; + } + + template + void Set(int predictor_id, const std::string& pass_name, T infos) { + map[predictor_id].emplace(pass_name, infos); + } + + template + T Get(int predictor_id, const std::string& pass_name) { + PADDLE_ENFORCE_EQ( + map.count(predictor_id) && map[predictor_id].count(pass_name), + true, + phi::errors::InvalidArgument( + "Not find predictor_id %d and pass_name %s", + predictor_id, + pass_name)); + return PADDLE_GET_CONST(T, map[predictor_id][pass_name]); + } + + private: + using PassResultInfoMap = + std::unordered_map>; + PassResultInfoMap map; +}; + +} // namespace analysis +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/analysis/passes/memory_optimize_pass.cc b/paddle/fluid/inference/analysis/passes/memory_optimize_pass.cc index 63aaa7d97967a411df67f644089e7332674f6aaf..2ff82986e945caf3ecd0ee91bac02c9a9ad48272 100644 --- a/paddle/fluid/inference/analysis/passes/memory_optimize_pass.cc +++ b/paddle/fluid/inference/analysis/passes/memory_optimize_pass.cc @@ -19,6 +19,7 @@ #include "glog/logging.h" #include "paddle/fluid/framework/ir/graph_helper.h" +#include "paddle/fluid/inference/analysis/pass_result_info.h" #include "paddle/fluid/platform/enforce.h" namespace paddle { @@ -310,7 +311,7 @@ void MemoryOptimizePass::RunImpl(Argument* argument) { // mapping table. if (!argument->enable_memory_optim()) return; // Because of pass is a singleton, graph can not be member - // variables,otherwise,errors will be caused under multithreading + // variables,otherwise, errors will be caused under multithreading // conditions. auto graph = argument->main_graph_ptr(); @@ -323,7 +324,11 @@ void MemoryOptimizePass::RunImpl(Argument* argument) { CollectLifeCycle(graph, &lifecycles, sort_kind); CollectVarMemorySize(graph, &space_table); MakeSimpleReusePlan(lifecycles, space_table, &node2cluster, &cluster_size); - UpdateOpDescsByReuse(graph, node2cluster, sort_kind); + + auto* pass_res_info = PassResultInfoForRuntime::Instance(); + pass_res_info->Set( + argument->root_predictor_id(), "memory_optimize_pass", node2cluster); + return; } diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 6a23f11e4522a72a85016e478a99246a11984415..1c27c008d8ca7b130502e00acb8d5a4180fb9010 100755 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -38,6 +38,7 @@ #include "paddle/fluid/framework/var_type_traits.h" #include "paddle/fluid/framework/version.h" #include "paddle/fluid/inference/analysis/helper.h" +#include "paddle/fluid/inference/analysis/pass_result_info.h" #include "paddle/fluid/inference/analysis/passes/convert_to_mixed_precision.h" #include "paddle/fluid/inference/analysis/passes/memory_optimize_pass.h" #include "paddle/fluid/inference/api/helper.h" @@ -262,6 +263,10 @@ bool AnalysisPredictor::Init( "generated."; } + if (!status_is_cloned_) { + root_predictor_id_ = predictor_id_; + } + // no matter with or without MKLDNN paddle::platform::SetNumThreads(config_.cpu_math_library_num_threads()); @@ -615,6 +620,15 @@ bool AnalysisPredictor::PrepareExecutor() { executor_->Prepare( sub_scope_, *inference_program_, 0, config_.use_feed_fetch_ops_); + if (config_.enable_memory_optim_) { + auto *pass_res_info = + inference::analysis::PassResultInfoForRuntime::Instance(); + auto reuse_table = + pass_res_info->Get>( + root_predictor_id_, "memory_optimize_pass"); + executor_->MakeReusePlan(reuse_table); + } + PADDLE_ENFORCE_NOT_NULL(sub_scope_, platform::errors::PreconditionNotMet( "The sub_scope should not be nullptr.")); @@ -1079,6 +1093,7 @@ void AnalysisPredictor::PrepareArgument() { argument_.SetModelFromMemory(config_.model_from_memory_); // Analyze inference_program argument_.SetPredictorID(predictor_id_); + argument_.SetRootPredictorID(root_predictor_id_); argument_.SetOptimCacheDir(config_.opt_cache_dir_); if (!config_.model_dir().empty()) { argument_.SetModelDir(config_.model_dir()); @@ -2114,6 +2129,7 @@ std::unique_ptr AnalysisPredictor::Clone(void *stream) { std::lock_guard lk(clone_mutex_); auto *x = new AnalysisPredictor(config_); x->status_is_cloned_ = true; + x->root_predictor_id_ = this->root_predictor_id_; if (config_.use_external_stream_ && stream == nullptr) { PADDLE_THROW(platform::errors::InvalidArgument( "config has been configured to use external stream, but the Clone " @@ -2175,12 +2191,6 @@ void AnalysisPredictor::SaveOptimModel(const std::string &dir) { } void AnalysisPredictor::RegisterOutputHook(const Exp_OutputHookFunc &hookfunc) { - if (config_.enable_memory_optim()) { - LOG(WARNING) << "If you want to run output hook function, you should " - "use config.EnableMemoryOptim(false) to turn off memory " - "reuse!"; - return; - } static std::once_flag register_hook_flag; std::call_once(register_hook_flag, [this] { executor_->RegisterOutputHook([this](framework::OperatorBase *op) { diff --git a/paddle/fluid/inference/api/analysis_predictor.h b/paddle/fluid/inference/api/analysis_predictor.h index 37d1511fa272d67190842f56f8e904320574b1bc..25595d12cb44a5a21f58bcfa5859f1711c612bfb 100644 --- a/paddle/fluid/inference/api/analysis_predictor.h +++ b/paddle/fluid/inference/api/analysis_predictor.h @@ -102,7 +102,6 @@ class AnalysisPredictor : public PaddlePredictor { explicit AnalysisPredictor(const AnalysisConfig &config) : config_(config) { if (config_.shape_range_info_collected()) { config_.SwitchIrOptim(false); - config_.EnableMemoryOptim(false); } predictor_id_ = inference::GetUniqueId(); } @@ -518,6 +517,7 @@ class AnalysisPredictor : public PaddlePredictor { int need_collect_var_shapes_{-1}; // -1 for default, 0 for false, 1 for true. std::vector>> batch_var_shapes_; int predictor_id_; + int root_predictor_id_{-1}; private: std::vector hookfuncs_;