未验证 提交 aa892113 编写于 作者: W Wilber 提交者: GitHub

[Inference] Optimize memory_optimize pass. (#48476)

* update memory_optimize pass
上级 93099bb8
...@@ -15,8 +15,11 @@ ...@@ -15,8 +15,11 @@
#include "paddle/fluid/framework/naive_executor.h" #include "paddle/fluid/framework/naive_executor.h"
#include <string> #include <string>
#include <unordered_map>
#include <unordered_set>
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/variable_helper.h" #include "paddle/fluid/framework/variable_helper.h"
#include "paddle/fluid/platform/denormal.h" #include "paddle/fluid/platform/denormal.h"
#ifdef PADDLE_WITH_MKLDNN #ifdef PADDLE_WITH_MKLDNN
...@@ -61,12 +64,31 @@ void NaiveExecutor::Run() { ...@@ -61,12 +64,31 @@ void NaiveExecutor::Run() {
#ifdef PADDLE_WITH_INFERENCE_NVTX #ifdef PADDLE_WITH_INFERENCE_NVTX
platform::CudaNvtxRangePush(op->Type(), platform::NvtxRangeColor::Green); platform::CudaNvtxRangePush(op->Type(), platform::NvtxRangeColor::Green);
#endif #endif
// According to reuse table, we share the out tensor's holder.
if (reuse_cache_.count(op.get())) {
for (auto &it : reuse_cache_[op.get()]) {
it.first->ShareBufferWith(*cluster_buffer_[it.second]);
}
}
op->Run(*scope_, place_); op->Run(*scope_, place_);
// Update the shared_holder so that only records the max one.
if (reuse_cache_.count(op.get())) {
for (auto &it : reuse_cache_[op.get()]) {
if (it.first->memory_size() >
cluster_buffer_[it.second]->memory_size()) {
cluster_buffer_[it.second] = it.first;
}
}
}
#ifdef PADDLE_WITH_INFERENCE_NVTX #ifdef PADDLE_WITH_INFERENCE_NVTX
platform::CudaNvtxRangePop(); platform::CudaNvtxRangePop();
#endif #endif
if (hookfunc_) { for (auto &func : hookfunc_) {
hookfunc_(op.get()); func(op.get());
} }
} }
#ifdef PADDLE_WITH_INFERENCE_NVTX #ifdef PADDLE_WITH_INFERENCE_NVTX
...@@ -146,7 +168,46 @@ phi::DenseTensor *NaiveExecutor::FindTensor(const std::string &name) { ...@@ -146,7 +168,46 @@ phi::DenseTensor *NaiveExecutor::FindTensor(const std::string &name) {
} }
void NaiveExecutor::RegisterOutputHook(const HookFunc &hookfunc) { void NaiveExecutor::RegisterOutputHook(const HookFunc &hookfunc) {
hookfunc_ = hookfunc; hookfunc_.push_back(hookfunc);
}
void NaiveExecutor::MakeReusePlan(
const std::unordered_map<std::string, std::string> &reuse_table) {
std::unordered_map<std::string, std::unordered_set<std::string>> clusters;
for (auto &it : reuse_table) {
clusters[it.second].insert(it.first);
}
std::vector<std::string> cluster_names;
for (auto &it : clusters) {
cluster_names.push_back(it.first);
}
cluster_buffer_.resize(cluster_names.size());
for (auto &op : ops_) {
for (auto &name : op->OutputVars(true)) {
if (reuse_table.count(name)) {
const auto &reuse_name = reuse_table.at(name);
auto it =
std::find(cluster_names.begin(), cluster_names.end(), reuse_name);
int idx = it - cluster_names.begin();
auto *var = scope_->FindVar(name);
auto *reuse_var = scope_->FindVar(reuse_name);
if (var && reuse_var && var->IsType<phi::DenseTensor>() &&
reuse_var->IsType<phi::DenseTensor>()) {
auto *tensor = var->GetMutable<phi::DenseTensor>();
auto *reuse_tensor = reuse_var->GetMutable<phi::DenseTensor>();
cluster_buffer_[idx] = reuse_tensor;
if (reuse_cache_.count(op.get())) {
reuse_cache_[op.get()].emplace(tensor, idx);
} else {
reuse_cache_[op.get()] =
std::unordered_map<phi::DenseTensor *, int>{{tensor, idx}};
}
}
}
}
}
} }
NaiveExecutor::~NaiveExecutor() { NaiveExecutor::~NaiveExecutor() {
......
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#include <functional> #include <functional>
#include <memory> #include <memory>
#include <string> #include <string>
#include <unordered_map>
#include <vector> #include <vector>
#include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/operator.h"
...@@ -67,6 +68,9 @@ class NaiveExecutor { ...@@ -67,6 +68,9 @@ class NaiveExecutor {
Scope* GetScope() { return scope_; } Scope* GetScope() { return scope_; }
void MakeReusePlan(
const std::unordered_map<std::string, std::string>& reuse_table);
void ResetTrtOps(int num); void ResetTrtOps(int num);
void RegisterOutputHook(const HookFunc& hookfunc); void RegisterOutputHook(const HookFunc& hookfunc);
...@@ -82,7 +86,12 @@ class NaiveExecutor { ...@@ -82,7 +86,12 @@ class NaiveExecutor {
std::vector<std::unique_ptr<OperatorBase>> ops_; std::vector<std::unique_ptr<OperatorBase>> ops_;
Scope* scope_{nullptr}; Scope* scope_{nullptr};
HookFunc hookfunc_{nullptr}; std::vector<HookFunc> hookfunc_;
// Record information that tensor_a should ShareBufferWith tensor_b.
std::unordered_map<OperatorBase*, std::unordered_map<phi::DenseTensor*, int>>
reuse_cache_;
std::vector<phi::DenseTensor*> cluster_buffer_;
}; };
} // namespace framework } // namespace framework
......
...@@ -20,7 +20,7 @@ cc_library( ...@@ -20,7 +20,7 @@ cc_library(
cc_library( cc_library(
ir_pass_manager ir_pass_manager
SRCS ir_pass_manager.cc SRCS ir_pass_manager.cc pass_result_info.cc
DEPS graph pass ${INFER_IR_PASSES} analysis_helper) DEPS graph pass ${INFER_IR_PASSES} analysis_helper)
cc_library( cc_library(
......
...@@ -139,6 +139,7 @@ struct Argument { ...@@ -139,6 +139,7 @@ struct Argument {
unique_ptr_t field__##_; unique_ptr_t field__##_;
DECL_ARGUMENT_FIELD(predictor_id, PredictorID, int); DECL_ARGUMENT_FIELD(predictor_id, PredictorID, int);
DECL_ARGUMENT_FIELD(root_predictor_id, RootPredictorID, int);
// Model path // Model path
DECL_ARGUMENT_FIELD(model_dir, ModelDir, std::string); DECL_ARGUMENT_FIELD(model_dir, ModelDir, std::string);
// Model specified with program and parameters files. // Model specified with program and parameters files.
......
...@@ -229,6 +229,8 @@ void IRPassManager::CreatePasses(Argument *argument, ...@@ -229,6 +229,8 @@ void IRPassManager::CreatePasses(Argument *argument,
argument->dlnne_input_shape_dict())); argument->dlnne_input_shape_dict()));
pass->Set("program", pass->Set("program",
new framework::ProgramDesc *(&argument->main_program())); new framework::ProgramDesc *(&argument->main_program()));
} else if (pass_name == "memory_optimize_pass") {
pass->Set("root_predictor_id", new int(argument->root_predictor_id()));
} }
if (pass_name == "lite_subgraph_pass") { if (pass_name == "lite_subgraph_pass") {
bool lite_enable_int8 = bool lite_enable_int8 =
......
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/analysis/pass_result_info.h"
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <memory>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
#include "paddle/phi/core/enforce.h"
#include "paddle/utils/variant.h"
namespace paddle {
namespace inference {
namespace analysis {
class PassResultInfoForRuntime {
public:
using PassInfo =
paddle::variant<std::string,
std::vector<std::string>,
std::unordered_map<std::string, std::string>>;
static PassResultInfoForRuntime* Instance() {
static PassResultInfoForRuntime info;
return &info;
}
template <typename T>
void Set(int predictor_id, const std::string& pass_name, T infos) {
map[predictor_id].emplace(pass_name, infos);
}
template <typename T>
T Get(int predictor_id, const std::string& pass_name) {
PADDLE_ENFORCE_EQ(
map.count(predictor_id) && map[predictor_id].count(pass_name),
true,
phi::errors::InvalidArgument(
"Not find predictor_id %d and pass_name %s",
predictor_id,
pass_name));
return PADDLE_GET_CONST(T, map[predictor_id][pass_name]);
}
private:
using PassResultInfoMap =
std::unordered_map<int, std::unordered_map<std::string, PassInfo>>;
PassResultInfoMap map;
};
} // namespace analysis
} // namespace inference
} // namespace paddle
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#include "glog/logging.h" #include "glog/logging.h"
#include "paddle/fluid/framework/ir/graph_helper.h" #include "paddle/fluid/framework/ir/graph_helper.h"
#include "paddle/fluid/inference/analysis/pass_result_info.h"
#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/enforce.h"
namespace paddle { namespace paddle {
...@@ -310,7 +311,7 @@ void MemoryOptimizePass::RunImpl(Argument* argument) { ...@@ -310,7 +311,7 @@ void MemoryOptimizePass::RunImpl(Argument* argument) {
// mapping table. // mapping table.
if (!argument->enable_memory_optim()) return; if (!argument->enable_memory_optim()) return;
// Because of pass is a singleton, graph can not be member // Because of pass is a singleton, graph can not be member
// variables,otherwiseerrors will be caused under multithreading // variables,otherwise, errors will be caused under multithreading
// conditions. // conditions.
auto graph = argument->main_graph_ptr(); auto graph = argument->main_graph_ptr();
...@@ -323,7 +324,11 @@ void MemoryOptimizePass::RunImpl(Argument* argument) { ...@@ -323,7 +324,11 @@ void MemoryOptimizePass::RunImpl(Argument* argument) {
CollectLifeCycle(graph, &lifecycles, sort_kind); CollectLifeCycle(graph, &lifecycles, sort_kind);
CollectVarMemorySize(graph, &space_table); CollectVarMemorySize(graph, &space_table);
MakeSimpleReusePlan(lifecycles, space_table, &node2cluster, &cluster_size); MakeSimpleReusePlan(lifecycles, space_table, &node2cluster, &cluster_size);
UpdateOpDescsByReuse(graph, node2cluster, sort_kind);
auto* pass_res_info = PassResultInfoForRuntime::Instance();
pass_res_info->Set(
argument->root_predictor_id(), "memory_optimize_pass", node2cluster);
return; return;
} }
......
...@@ -38,6 +38,7 @@ ...@@ -38,6 +38,7 @@
#include "paddle/fluid/framework/var_type_traits.h" #include "paddle/fluid/framework/var_type_traits.h"
#include "paddle/fluid/framework/version.h" #include "paddle/fluid/framework/version.h"
#include "paddle/fluid/inference/analysis/helper.h" #include "paddle/fluid/inference/analysis/helper.h"
#include "paddle/fluid/inference/analysis/pass_result_info.h"
#include "paddle/fluid/inference/analysis/passes/convert_to_mixed_precision.h" #include "paddle/fluid/inference/analysis/passes/convert_to_mixed_precision.h"
#include "paddle/fluid/inference/analysis/passes/memory_optimize_pass.h" #include "paddle/fluid/inference/analysis/passes/memory_optimize_pass.h"
#include "paddle/fluid/inference/api/helper.h" #include "paddle/fluid/inference/api/helper.h"
...@@ -262,6 +263,10 @@ bool AnalysisPredictor::Init( ...@@ -262,6 +263,10 @@ bool AnalysisPredictor::Init(
"generated."; "generated.";
} }
if (!status_is_cloned_) {
root_predictor_id_ = predictor_id_;
}
// no matter with or without MKLDNN // no matter with or without MKLDNN
paddle::platform::SetNumThreads(config_.cpu_math_library_num_threads()); paddle::platform::SetNumThreads(config_.cpu_math_library_num_threads());
...@@ -615,6 +620,15 @@ bool AnalysisPredictor::PrepareExecutor() { ...@@ -615,6 +620,15 @@ bool AnalysisPredictor::PrepareExecutor() {
executor_->Prepare( executor_->Prepare(
sub_scope_, *inference_program_, 0, config_.use_feed_fetch_ops_); sub_scope_, *inference_program_, 0, config_.use_feed_fetch_ops_);
if (config_.enable_memory_optim_) {
auto *pass_res_info =
inference::analysis::PassResultInfoForRuntime::Instance();
auto reuse_table =
pass_res_info->Get<std::unordered_map<std::string, std::string>>(
root_predictor_id_, "memory_optimize_pass");
executor_->MakeReusePlan(reuse_table);
}
PADDLE_ENFORCE_NOT_NULL(sub_scope_, PADDLE_ENFORCE_NOT_NULL(sub_scope_,
platform::errors::PreconditionNotMet( platform::errors::PreconditionNotMet(
"The sub_scope should not be nullptr.")); "The sub_scope should not be nullptr."));
...@@ -1079,6 +1093,7 @@ void AnalysisPredictor::PrepareArgument() { ...@@ -1079,6 +1093,7 @@ void AnalysisPredictor::PrepareArgument() {
argument_.SetModelFromMemory(config_.model_from_memory_); argument_.SetModelFromMemory(config_.model_from_memory_);
// Analyze inference_program // Analyze inference_program
argument_.SetPredictorID(predictor_id_); argument_.SetPredictorID(predictor_id_);
argument_.SetRootPredictorID(root_predictor_id_);
argument_.SetOptimCacheDir(config_.opt_cache_dir_); argument_.SetOptimCacheDir(config_.opt_cache_dir_);
if (!config_.model_dir().empty()) { if (!config_.model_dir().empty()) {
argument_.SetModelDir(config_.model_dir()); argument_.SetModelDir(config_.model_dir());
...@@ -2114,6 +2129,7 @@ std::unique_ptr<PaddlePredictor> AnalysisPredictor::Clone(void *stream) { ...@@ -2114,6 +2129,7 @@ std::unique_ptr<PaddlePredictor> AnalysisPredictor::Clone(void *stream) {
std::lock_guard<std::mutex> lk(clone_mutex_); std::lock_guard<std::mutex> lk(clone_mutex_);
auto *x = new AnalysisPredictor(config_); auto *x = new AnalysisPredictor(config_);
x->status_is_cloned_ = true; x->status_is_cloned_ = true;
x->root_predictor_id_ = this->root_predictor_id_;
if (config_.use_external_stream_ && stream == nullptr) { if (config_.use_external_stream_ && stream == nullptr) {
PADDLE_THROW(platform::errors::InvalidArgument( PADDLE_THROW(platform::errors::InvalidArgument(
"config has been configured to use external stream, but the Clone " "config has been configured to use external stream, but the Clone "
...@@ -2175,12 +2191,6 @@ void AnalysisPredictor::SaveOptimModel(const std::string &dir) { ...@@ -2175,12 +2191,6 @@ void AnalysisPredictor::SaveOptimModel(const std::string &dir) {
} }
void AnalysisPredictor::RegisterOutputHook(const Exp_OutputHookFunc &hookfunc) { void AnalysisPredictor::RegisterOutputHook(const Exp_OutputHookFunc &hookfunc) {
if (config_.enable_memory_optim()) {
LOG(WARNING) << "If you want to run output hook function, you should "
"use config.EnableMemoryOptim(false) to turn off memory "
"reuse!";
return;
}
static std::once_flag register_hook_flag; static std::once_flag register_hook_flag;
std::call_once(register_hook_flag, [this] { std::call_once(register_hook_flag, [this] {
executor_->RegisterOutputHook([this](framework::OperatorBase *op) { executor_->RegisterOutputHook([this](framework::OperatorBase *op) {
......
...@@ -102,7 +102,6 @@ class AnalysisPredictor : public PaddlePredictor { ...@@ -102,7 +102,6 @@ class AnalysisPredictor : public PaddlePredictor {
explicit AnalysisPredictor(const AnalysisConfig &config) : config_(config) { explicit AnalysisPredictor(const AnalysisConfig &config) : config_(config) {
if (config_.shape_range_info_collected()) { if (config_.shape_range_info_collected()) {
config_.SwitchIrOptim(false); config_.SwitchIrOptim(false);
config_.EnableMemoryOptim(false);
} }
predictor_id_ = inference::GetUniqueId(); predictor_id_ = inference::GetUniqueId();
} }
...@@ -518,6 +517,7 @@ class AnalysisPredictor : public PaddlePredictor { ...@@ -518,6 +517,7 @@ class AnalysisPredictor : public PaddlePredictor {
int need_collect_var_shapes_{-1}; // -1 for default, 0 for false, 1 for true. int need_collect_var_shapes_{-1}; // -1 for default, 0 for false, 1 for true.
std::vector<std::map<std::string, std::vector<int>>> batch_var_shapes_; std::vector<std::map<std::string, std::vector<int>>> batch_var_shapes_;
int predictor_id_; int predictor_id_;
int root_predictor_id_{-1};
private: private:
std::vector<Exp_OutputHookFunc> hookfuncs_; std::vector<Exp_OutputHookFunc> hookfuncs_;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册