From eb193d8d75011c650125d58c572fdc11a9a07388 Mon Sep 17 00:00:00 2001 From: zhoutianzi666 <39978853+zhoutianzi666@users.noreply.github.com> Date: Fri, 19 May 2023 11:12:44 +0800 Subject: [PATCH] [Paddle-TRT] Decrease peak memory (#53930) * decrease_peak_memory --- paddle/fluid/framework/naive_executor.cc | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/paddle/fluid/framework/naive_executor.cc b/paddle/fluid/framework/naive_executor.cc index 556a167e74f..c31e0661140 100644 --- a/paddle/fluid/framework/naive_executor.cc +++ b/paddle/fluid/framework/naive_executor.cc @@ -66,13 +66,6 @@ void NaiveExecutor::Run() { platform::NvtxRangeColor::Green); #endif - // According to reuse table, we share the out tensor's holder. - if (reuse_cache_.count(op.get())) { - for (auto &it : reuse_cache_[op.get()]) { - it.first->ShareBufferWith(*cluster_buffer_[it.second], true); - } - } - op->Run(*scope_, place_); // Update the shared_holder so that only records the max one. @@ -81,6 +74,22 @@ void NaiveExecutor::Run() { if (it.first->memory_size() > cluster_buffer_[it.second]->memory_size()) { cluster_buffer_[it.second] = it.first; + int updated_cluster_id = it.second; + + // cluster_buffer_[it.second] has been updated to be a new + // phi::DenseTensor*, we need change all phi::DenseTensor's + // shared_holder in this cluster. The following two loops code looks + // ugly, it does work. The following two loops seem time-consuming, + // but once the memory reaches its peak, the cluster will not update, + // so it's ok. + for (auto &op_map : reuse_cache_) { + // op_map.second is std::unordered_map. + for (auto &it2 : op_map.second) { + if (it2.second == updated_cluster_id) { + it2.first->ShareBufferWith(*cluster_buffer_[it2.second], true); + } + } + } } } } -- GitLab