From aa892113395a73f6789508b9ebb2e91213a5450a Mon Sep 17 00:00:00 2001 From: Wilber Date: Thu, 1 Dec 2022 19:52:53 +0800 Subject: [PATCH] [Inference] Optimize memory_optimize pass. (#48476) * update memory_optimize pass --- paddle/fluid/framework/naive_executor.cc | 67 ++++++++++++++++++- paddle/fluid/framework/naive_executor.h | 11 ++- .../fluid/inference/analysis/CMakeLists.txt | 2 +- paddle/fluid/inference/analysis/argument.h | 1 + .../inference/analysis/ir_pass_manager.cc | 2 + .../inference/analysis/pass_result_info.cc | 15 +++++ .../inference/analysis/pass_result_info.h | 66 ++++++++++++++++++ .../analysis/passes/memory_optimize_pass.cc | 9 ++- .../fluid/inference/api/analysis_predictor.cc | 22 ++++-- .../fluid/inference/api/analysis_predictor.h | 2 +- 10 files changed, 183 insertions(+), 14 deletions(-) create mode 100644 paddle/fluid/inference/analysis/pass_result_info.cc create mode 100644 paddle/fluid/inference/analysis/pass_result_info.h diff --git a/paddle/fluid/framework/naive_executor.cc b/paddle/fluid/framework/naive_executor.cc index 57e9a175b16..6c0daef26ff 100644 --- a/paddle/fluid/framework/naive_executor.cc +++ b/paddle/fluid/framework/naive_executor.cc @@ -15,8 +15,11 @@ #include "paddle/fluid/framework/naive_executor.h" #include +#include +#include #include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/framework/scope.h" #include "paddle/fluid/framework/variable_helper.h" #include "paddle/fluid/platform/denormal.h" #ifdef PADDLE_WITH_MKLDNN @@ -61,12 +64,31 @@ void NaiveExecutor::Run() { #ifdef PADDLE_WITH_INFERENCE_NVTX platform::CudaNvtxRangePush(op->Type(), platform::NvtxRangeColor::Green); #endif + + // According to reuse table, we share the out tensor's holder. + if (reuse_cache_.count(op.get())) { + for (auto &it : reuse_cache_[op.get()]) { + it.first->ShareBufferWith(*cluster_buffer_[it.second]); + } + } + op->Run(*scope_, place_); + + // Update the shared_holder so that only records the max one. + if (reuse_cache_.count(op.get())) { + for (auto &it : reuse_cache_[op.get()]) { + if (it.first->memory_size() > + cluster_buffer_[it.second]->memory_size()) { + cluster_buffer_[it.second] = it.first; + } + } + } + #ifdef PADDLE_WITH_INFERENCE_NVTX platform::CudaNvtxRangePop(); #endif - if (hookfunc_) { - hookfunc_(op.get()); + for (auto &func : hookfunc_) { + func(op.get()); } } #ifdef PADDLE_WITH_INFERENCE_NVTX @@ -146,7 +168,46 @@ phi::DenseTensor *NaiveExecutor::FindTensor(const std::string &name) { } void NaiveExecutor::RegisterOutputHook(const HookFunc &hookfunc) { - hookfunc_ = hookfunc; + hookfunc_.push_back(hookfunc); +} + +void NaiveExecutor::MakeReusePlan( + const std::unordered_map &reuse_table) { + std::unordered_map> clusters; + for (auto &it : reuse_table) { + clusters[it.second].insert(it.first); + } + + std::vector cluster_names; + for (auto &it : clusters) { + cluster_names.push_back(it.first); + } + cluster_buffer_.resize(cluster_names.size()); + + for (auto &op : ops_) { + for (auto &name : op->OutputVars(true)) { + if (reuse_table.count(name)) { + const auto &reuse_name = reuse_table.at(name); + auto it = + std::find(cluster_names.begin(), cluster_names.end(), reuse_name); + int idx = it - cluster_names.begin(); + auto *var = scope_->FindVar(name); + auto *reuse_var = scope_->FindVar(reuse_name); + if (var && reuse_var && var->IsType() && + reuse_var->IsType()) { + auto *tensor = var->GetMutable(); + auto *reuse_tensor = reuse_var->GetMutable(); + cluster_buffer_[idx] = reuse_tensor; + if (reuse_cache_.count(op.get())) { + reuse_cache_[op.get()].emplace(tensor, idx); + } else { + reuse_cache_[op.get()] = + std::unordered_map{{tensor, idx}}; + } + } + } + } + } } NaiveExecutor::~NaiveExecutor() { diff --git a/paddle/fluid/framework/naive_executor.h b/paddle/fluid/framework/naive_executor.h index 882f50b451a..f1a4a036cde 100644 --- a/paddle/fluid/framework/naive_executor.h +++ b/paddle/fluid/framework/naive_executor.h @@ -17,6 +17,7 @@ #include #include #include +#include #include #include "paddle/fluid/framework/operator.h" @@ -67,6 +68,9 @@ class NaiveExecutor { Scope* GetScope() { return scope_; } + void MakeReusePlan( + const std::unordered_map& reuse_table); + void ResetTrtOps(int num); void RegisterOutputHook(const HookFunc& hookfunc); @@ -82,7 +86,12 @@ class NaiveExecutor { std::vector> ops_; Scope* scope_{nullptr}; - HookFunc hookfunc_{nullptr}; + std::vector hookfunc_; + + // Record information that tensor_a should ShareBufferWith tensor_b. + std::unordered_map> + reuse_cache_; + std::vector cluster_buffer_; }; } // namespace framework diff --git a/paddle/fluid/inference/analysis/CMakeLists.txt b/paddle/fluid/inference/analysis/CMakeLists.txt index 10d67c69f13..06c4a55c5c9 100644 --- a/paddle/fluid/inference/analysis/CMakeLists.txt +++ b/paddle/fluid/inference/analysis/CMakeLists.txt @@ -20,7 +20,7 @@ cc_library( cc_library( ir_pass_manager - SRCS ir_pass_manager.cc + SRCS ir_pass_manager.cc pass_result_info.cc DEPS graph pass ${INFER_IR_PASSES} analysis_helper) cc_library( diff --git a/paddle/fluid/inference/analysis/argument.h b/paddle/fluid/inference/analysis/argument.h index fd5ba90eefb..a8d1067c554 100644 --- a/paddle/fluid/inference/analysis/argument.h +++ b/paddle/fluid/inference/analysis/argument.h @@ -139,6 +139,7 @@ struct Argument { unique_ptr_t field__##_; DECL_ARGUMENT_FIELD(predictor_id, PredictorID, int); + DECL_ARGUMENT_FIELD(root_predictor_id, RootPredictorID, int); // Model path DECL_ARGUMENT_FIELD(model_dir, ModelDir, std::string); // Model specified with program and parameters files. diff --git a/paddle/fluid/inference/analysis/ir_pass_manager.cc b/paddle/fluid/inference/analysis/ir_pass_manager.cc index 862a019da6d..b31f28a6a60 100644 --- a/paddle/fluid/inference/analysis/ir_pass_manager.cc +++ b/paddle/fluid/inference/analysis/ir_pass_manager.cc @@ -229,6 +229,8 @@ void IRPassManager::CreatePasses(Argument *argument, argument->dlnne_input_shape_dict())); pass->Set("program", new framework::ProgramDesc *(&argument->main_program())); + } else if (pass_name == "memory_optimize_pass") { + pass->Set("root_predictor_id", new int(argument->root_predictor_id())); } if (pass_name == "lite_subgraph_pass") { bool lite_enable_int8 = diff --git a/paddle/fluid/inference/analysis/pass_result_info.cc b/paddle/fluid/inference/analysis/pass_result_info.cc new file mode 100644 index 00000000000..d22d208588f --- /dev/null +++ b/paddle/fluid/inference/analysis/pass_result_info.cc @@ -0,0 +1,15 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/inference/analysis/pass_result_info.h" diff --git a/paddle/fluid/inference/analysis/pass_result_info.h b/paddle/fluid/inference/analysis/pass_result_info.h new file mode 100644 index 00000000000..7e42573e959 --- /dev/null +++ b/paddle/fluid/inference/analysis/pass_result_info.h @@ -0,0 +1,66 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#include +#include +#include "paddle/phi/core/enforce.h" +#include "paddle/utils/variant.h" + +namespace paddle { +namespace inference { +namespace analysis { + +class PassResultInfoForRuntime { + public: + using PassInfo = + paddle::variant, + std::unordered_map>; + + static PassResultInfoForRuntime* Instance() { + static PassResultInfoForRuntime info; + return &info; + } + + template + void Set(int predictor_id, const std::string& pass_name, T infos) { + map[predictor_id].emplace(pass_name, infos); + } + + template + T Get(int predictor_id, const std::string& pass_name) { + PADDLE_ENFORCE_EQ( + map.count(predictor_id) && map[predictor_id].count(pass_name), + true, + phi::errors::InvalidArgument( + "Not find predictor_id %d and pass_name %s", + predictor_id, + pass_name)); + return PADDLE_GET_CONST(T, map[predictor_id][pass_name]); + } + + private: + using PassResultInfoMap = + std::unordered_map>; + PassResultInfoMap map; +}; + +} // namespace analysis +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/analysis/passes/memory_optimize_pass.cc b/paddle/fluid/inference/analysis/passes/memory_optimize_pass.cc index 63aaa7d9796..2ff82986e94 100644 --- a/paddle/fluid/inference/analysis/passes/memory_optimize_pass.cc +++ b/paddle/fluid/inference/analysis/passes/memory_optimize_pass.cc @@ -19,6 +19,7 @@ #include "glog/logging.h" #include "paddle/fluid/framework/ir/graph_helper.h" +#include "paddle/fluid/inference/analysis/pass_result_info.h" #include "paddle/fluid/platform/enforce.h" namespace paddle { @@ -310,7 +311,7 @@ void MemoryOptimizePass::RunImpl(Argument* argument) { // mapping table. if (!argument->enable_memory_optim()) return; // Because of pass is a singleton, graph can not be member - // variables,otherwise,errors will be caused under multithreading + // variables,otherwise, errors will be caused under multithreading // conditions. auto graph = argument->main_graph_ptr(); @@ -323,7 +324,11 @@ void MemoryOptimizePass::RunImpl(Argument* argument) { CollectLifeCycle(graph, &lifecycles, sort_kind); CollectVarMemorySize(graph, &space_table); MakeSimpleReusePlan(lifecycles, space_table, &node2cluster, &cluster_size); - UpdateOpDescsByReuse(graph, node2cluster, sort_kind); + + auto* pass_res_info = PassResultInfoForRuntime::Instance(); + pass_res_info->Set( + argument->root_predictor_id(), "memory_optimize_pass", node2cluster); + return; } diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 6a23f11e452..1c27c008d8c 100755 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -38,6 +38,7 @@ #include "paddle/fluid/framework/var_type_traits.h" #include "paddle/fluid/framework/version.h" #include "paddle/fluid/inference/analysis/helper.h" +#include "paddle/fluid/inference/analysis/pass_result_info.h" #include "paddle/fluid/inference/analysis/passes/convert_to_mixed_precision.h" #include "paddle/fluid/inference/analysis/passes/memory_optimize_pass.h" #include "paddle/fluid/inference/api/helper.h" @@ -262,6 +263,10 @@ bool AnalysisPredictor::Init( "generated."; } + if (!status_is_cloned_) { + root_predictor_id_ = predictor_id_; + } + // no matter with or without MKLDNN paddle::platform::SetNumThreads(config_.cpu_math_library_num_threads()); @@ -615,6 +620,15 @@ bool AnalysisPredictor::PrepareExecutor() { executor_->Prepare( sub_scope_, *inference_program_, 0, config_.use_feed_fetch_ops_); + if (config_.enable_memory_optim_) { + auto *pass_res_info = + inference::analysis::PassResultInfoForRuntime::Instance(); + auto reuse_table = + pass_res_info->Get>( + root_predictor_id_, "memory_optimize_pass"); + executor_->MakeReusePlan(reuse_table); + } + PADDLE_ENFORCE_NOT_NULL(sub_scope_, platform::errors::PreconditionNotMet( "The sub_scope should not be nullptr.")); @@ -1079,6 +1093,7 @@ void AnalysisPredictor::PrepareArgument() { argument_.SetModelFromMemory(config_.model_from_memory_); // Analyze inference_program argument_.SetPredictorID(predictor_id_); + argument_.SetRootPredictorID(root_predictor_id_); argument_.SetOptimCacheDir(config_.opt_cache_dir_); if (!config_.model_dir().empty()) { argument_.SetModelDir(config_.model_dir()); @@ -2114,6 +2129,7 @@ std::unique_ptr AnalysisPredictor::Clone(void *stream) { std::lock_guard lk(clone_mutex_); auto *x = new AnalysisPredictor(config_); x->status_is_cloned_ = true; + x->root_predictor_id_ = this->root_predictor_id_; if (config_.use_external_stream_ && stream == nullptr) { PADDLE_THROW(platform::errors::InvalidArgument( "config has been configured to use external stream, but the Clone " @@ -2175,12 +2191,6 @@ void AnalysisPredictor::SaveOptimModel(const std::string &dir) { } void AnalysisPredictor::RegisterOutputHook(const Exp_OutputHookFunc &hookfunc) { - if (config_.enable_memory_optim()) { - LOG(WARNING) << "If you want to run output hook function, you should " - "use config.EnableMemoryOptim(false) to turn off memory " - "reuse!"; - return; - } static std::once_flag register_hook_flag; std::call_once(register_hook_flag, [this] { executor_->RegisterOutputHook([this](framework::OperatorBase *op) { diff --git a/paddle/fluid/inference/api/analysis_predictor.h b/paddle/fluid/inference/api/analysis_predictor.h index 37d1511fa27..25595d12cb4 100644 --- a/paddle/fluid/inference/api/analysis_predictor.h +++ b/paddle/fluid/inference/api/analysis_predictor.h @@ -102,7 +102,6 @@ class AnalysisPredictor : public PaddlePredictor { explicit AnalysisPredictor(const AnalysisConfig &config) : config_(config) { if (config_.shape_range_info_collected()) { config_.SwitchIrOptim(false); - config_.EnableMemoryOptim(false); } predictor_id_ = inference::GetUniqueId(); } @@ -518,6 +517,7 @@ class AnalysisPredictor : public PaddlePredictor { int need_collect_var_shapes_{-1}; // -1 for default, 0 for false, 1 for true. std::vector>> batch_var_shapes_; int predictor_id_; + int root_predictor_id_{-1}; private: std::vector hookfuncs_; -- GitLab