未验证 提交 eb193d8d 编写于 作者: Z zhoutianzi666 提交者: GitHub

[Paddle-TRT] Decrease peak memory (#53930)

* decrease_peak_memory
上级 6179a3ec
......@@ -66,13 +66,6 @@ void NaiveExecutor::Run() {
platform::NvtxRangeColor::Green);
#endif
// According to reuse table, we share the out tensor's holder.
if (reuse_cache_.count(op.get())) {
for (auto &it : reuse_cache_[op.get()]) {
it.first->ShareBufferWith(*cluster_buffer_[it.second], true);
}
}
op->Run(*scope_, place_);
// Update the shared_holder so that only records the max one.
......@@ -81,6 +74,22 @@ void NaiveExecutor::Run() {
if (it.first->memory_size() >
cluster_buffer_[it.second]->memory_size()) {
cluster_buffer_[it.second] = it.first;
int updated_cluster_id = it.second;
// cluster_buffer_[it.second] has been updated to be a new
// phi::DenseTensor*, we need change all phi::DenseTensor's
// shared_holder in this cluster. The following two loops code looks
// ugly, it does work. The following two loops seem time-consuming,
// but once the memory reaches its peak, the cluster will not update,
// so it's ok.
for (auto &op_map : reuse_cache_) {
// op_map.second is std::unordered_map<phi::DenseTensor*, int>.
for (auto &it2 : op_map.second) {
if (it2.second == updated_cluster_id) {
it2.first->ShareBufferWith(*cluster_buffer_[it2.second], true);
}
}
}
}
}
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册