未验证 提交 5925b82e 编写于 作者: J JingZhuangzhuang 提交者: GitHub

multithread memory optimize error fix (#37894) (#38737)

* multithread_memory_optimize
上级 aebc5a9c
...@@ -52,11 +52,11 @@ typedef struct { ...@@ -52,11 +52,11 @@ typedef struct {
// The traversal order also affect the lifecycles, so different sort_kind is // The traversal order also affect the lifecycles, so different sort_kind is
// used. // used.
void MemoryOptimizePass::CollectLifeCycle( void MemoryOptimizePass::CollectLifeCycle(
std::unordered_map<std::string, lifecycle_t>* lifecycles, Graph* graph, std::unordered_map<std::string, lifecycle_t>* lifecycles,
int sort_kind) const { int sort_kind) const {
max_lifecycle_ = 0; int max_lifecycle = 0;
for (auto* op_node : framework::ir::TopologyVarientSort( for (auto* op_node : framework::ir::TopologyVarientSort(
*graph_, static_cast<framework::ir::SortKind>(sort_kind))) { *graph, static_cast<framework::ir::SortKind>(sort_kind))) {
if (!op_node->IsOp()) continue; if (!op_node->IsOp()) continue;
auto reads = op_node->inputs; auto reads = op_node->inputs;
auto writes = op_node->outputs; auto writes = op_node->outputs;
...@@ -77,20 +77,20 @@ void MemoryOptimizePass::CollectLifeCycle( ...@@ -77,20 +77,20 @@ void MemoryOptimizePass::CollectLifeCycle(
if (node->Var()->Persistable()) continue; if (node->Var()->Persistable()) continue;
std::string var = node->Name(); std::string var = node->Name();
if (!lifecycles->count(var)) { if (!lifecycles->count(var)) {
(*lifecycles)[var] = std::make_pair(max_lifecycle_, max_lifecycle_); (*lifecycles)[var] = std::make_pair(max_lifecycle, max_lifecycle);
} else { } else {
(*lifecycles)[var].second = (*lifecycles)[var].second =
std::max(max_lifecycle_, lifecycles->at(var).second); // max() std::max(max_lifecycle, lifecycles->at(var).second); // max()
} }
} }
} }
++max_lifecycle_; ++max_lifecycle;
} }
} }
void MemoryOptimizePass::CollectVarMemorySize( void MemoryOptimizePass::CollectVarMemorySize(
space_table_t* space_table) const { Graph* graph, space_table_t* space_table) const {
const int fake_batch_size = 1; const int fake_batch_size = 1;
auto valid_var = [&](framework::ir::Node* node) -> bool { auto valid_var = [&](framework::ir::Node* node) -> bool {
...@@ -130,7 +130,7 @@ void MemoryOptimizePass::CollectVarMemorySize( ...@@ -130,7 +130,7 @@ void MemoryOptimizePass::CollectVarMemorySize(
// although it's not always the case. so black list is the best compromise // although it's not always the case. so black list is the best compromise
// between performance and underlying principle. // between performance and underlying principle.
std::unordered_set<std::string> black_list; std::unordered_set<std::string> black_list;
for (auto* node : graph_->Nodes()) { for (auto* node : graph->Nodes()) {
if (node->IsVar() && if (node->IsVar() &&
node->Var()->GetType() == node->Var()->GetType() ==
framework::proto::VarType::Type::VarType_Type_LOD_TENSOR) { framework::proto::VarType::Type::VarType_Type_LOD_TENSOR) {
...@@ -141,7 +141,7 @@ void MemoryOptimizePass::CollectVarMemorySize( ...@@ -141,7 +141,7 @@ void MemoryOptimizePass::CollectVarMemorySize(
} }
// Collect tensors from graph. // Collect tensors from graph.
for (auto* node : graph_->Nodes()) { for (auto* node : graph->Nodes()) {
if (node->IsVar() && if (node->IsVar() &&
node->Var()->GetType() == node->Var()->GetType() ==
framework::proto::VarType::Type::VarType_Type_LOD_TENSOR && framework::proto::VarType::Type::VarType_Type_LOD_TENSOR &&
...@@ -304,7 +304,10 @@ void MemoryOptimizePass::RunImpl(Argument* argument) { ...@@ -304,7 +304,10 @@ void MemoryOptimizePass::RunImpl(Argument* argument) {
// 3. Perform reuse plan: Replace all var's name in the model according to the // 3. Perform reuse plan: Replace all var's name in the model according to the
// mapping table. // mapping table.
if (!argument->enable_memory_optim()) return; if (!argument->enable_memory_optim()) return;
graph_ = argument->main_graph_ptr(); // Because of pass is a singleton, graph can not be member
// variables,otherwise,errors will be caused under multithreading
// conditions.
auto graph = argument->main_graph_ptr();
int sort_kind = 0; int sort_kind = 0;
std::unordered_map<std::string, lifecycle_t> lifecycles; std::unordered_map<std::string, lifecycle_t> lifecycles;
...@@ -312,10 +315,10 @@ void MemoryOptimizePass::RunImpl(Argument* argument) { ...@@ -312,10 +315,10 @@ void MemoryOptimizePass::RunImpl(Argument* argument) {
std::unordered_map<std::string, std::string> node2cluster; std::unordered_map<std::string, std::string> node2cluster;
std::unordered_map<std::string, int> cluster_size; std::unordered_map<std::string, int> cluster_size;
CollectLifeCycle(&lifecycles, sort_kind); CollectLifeCycle(graph, &lifecycles, sort_kind);
CollectVarMemorySize(&space_table); CollectVarMemorySize(graph, &space_table);
MakeSimpleReusePlan(lifecycles, space_table, &node2cluster, &cluster_size); MakeSimpleReusePlan(lifecycles, space_table, &node2cluster, &cluster_size);
UpdateOpDescsByReuse(graph_, node2cluster, sort_kind); UpdateOpDescsByReuse(graph, node2cluster, sort_kind);
return; return;
} }
......
...@@ -57,17 +57,15 @@ class MemoryOptimizePass : public AnalysisPass { ...@@ -57,17 +57,15 @@ class MemoryOptimizePass : public AnalysisPass {
private: private:
void CollectLifeCycle( void CollectLifeCycle(
framework::ir::Graph *graph,
std::unordered_map<std::string, lifecycle_t> *lifecycles, std::unordered_map<std::string, lifecycle_t> *lifecycles,
int sort_kind) const; int sort_kind) const;
void CollectVarMemorySize(space_table_t *space_table) const; void CollectVarMemorySize(framework::ir::Graph *graph,
space_table_t *space_table) const;
public: public:
std::string repr() const override; std::string repr() const override;
private:
mutable framework::ir::Graph *graph_{nullptr};
mutable int max_lifecycle_{-1};
}; };
} // namespace analysis } // namespace analysis
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册