From 0b09e61aa103daf4f42f8f1cbdc5a17199fafbc4 Mon Sep 17 00:00:00 2001
From: hjchen2 <chenhoujiangcug@gmail.com>
Date: Mon, 18 Mar 2019 21:25:32 +0800
Subject: [PATCH] Add memory optimize pass

---
 src/framework/executor.cpp                    |   2 +
 src/framework/program/program_desc.cpp        |   2 +-
 src/framework/program/program_desc.h          |  19 +--
 src/framework/tensor.h                        |   3 +-
 .../kernel/arm/convolution/conv_common.cpp    |   2 +-
 src/pass/memory_optimize.cpp                  | 134 ++++++++++++++++++
 src/pass/memory_optimize.h                    |  67 +++++++++
 7 files changed, 218 insertions(+), 11 deletions(-)
 create mode 100644 src/pass/memory_optimize.cpp
 create mode 100644 src/pass/memory_optimize.h
diff --git a/src/framework/executor.cpp b/src/framework/executor.cpp
index a15c0e6b4e..750c0da540 100644
--- a/src/framework/executor.cpp
+++ b/src/framework/executor.cpp
@@ -28,6 +28,7 @@ limitations under the License. */
 #include "framework/scope.h"
 #include "framework/tensor.h"
 #include "memory/t_malloc.h"
+#include "pass/memory_optimize.h"
 
 #ifdef PADDLE_MOBILE_CL
 #include "framework/cl/cl_image.h"
@@ -62,6 +63,7 @@ Executor<Device, T>::Executor(const Program<Device> &program,
       use_optimize_ ? program_.optimizeProgram : program_.originProgram;
   PADDLE_MOBILE_ENFORCE(program_desc_ != nullptr,
                         "program_desc_ should not be nullptr");
+  pass::MemoryOptPass()(program_desc_.get(), program_.scope.get());
   // resize feed and fetch list
   // should init feed and fetch variables before infer shape
   InitFeedFetchList();
diff --git a/src/framework/program/program_desc.cpp b/src/framework/program/program_desc.cpp
index b66c7a0dcf..23781fe779 100644
--- a/src/framework/program/program_desc.cpp
+++ b/src/framework/program/program_desc.cpp
@@ -46,7 +46,7 @@ ProgramDesc::ProgramDesc(PaddleMobile__Framework__Proto__ProgramDesc *desc) {
   }
 }
 
-void ProgramDesc::Description(std::string header) {
+void ProgramDesc::Description(std::string header) const {
 #ifdef PADDLE_MOBILE_DEBUG
   if (header.size()) {
     LOG(kLOG_INFO) << header;
diff --git a/src/framework/program/program_desc.h b/src/framework/program/program_desc.h
index 5c75c91522..f4551509ee 100644
--- a/src/framework/program/program_desc.h
+++ b/src/framework/program/program_desc.h
@@ -30,6 +30,14 @@ class ProgramDesc {
   friend class ProgramOptimize;
   explicit ProgramDesc(PaddleMobile__Framework__Proto__ProgramDesc *desc);
 
+  ProgramDesc(const ProgramDesc &program_desc) {
+    for (auto &block : program_desc.blocks_) {
+      std::shared_ptr<BlockDesc> copy_block =
+          std::make_shared<BlockDesc>(*block);
+      blocks_.push_back(copy_block);
+    }
+  }
+
   std::shared_ptr<BlockDesc> Block(size_t idx);
 
   BlockDesc *MutableBlock(size_t idx) {
@@ -40,16 +48,11 @@ class ProgramDesc {
     }
   }
 
-  const std::vector<std::shared_ptr<BlockDesc>> &Blocks() { return blocks_; }
-  ProgramDesc(const ProgramDesc &program_desc) {
-    for (auto &block : program_desc.blocks_) {
-      std::shared_ptr<BlockDesc> copy_block =
-          std::make_shared<BlockDesc>(*block);
-      blocks_.push_back(copy_block);
-    }
+  const std::vector<std::shared_ptr<BlockDesc>> &Blocks() const {
+    return blocks_;
   }
 
-  void Description(std::string header = "");
+  void Description(std::string header = "") const;
 
  private:
   std::vector<std::shared_ptr<BlockDesc>> blocks_;
diff --git a/src/framework/tensor.h b/src/framework/tensor.h
index 24f09662ea..63f074f4af 100644
--- a/src/framework/tensor.h
+++ b/src/framework/tensor.h
@@ -69,7 +69,8 @@ class Tensor : public TensorBase {
   inline Tensor &ShareDataWith(const Tensor &src) {
     src.check_memory_size();
     if (holder_.get() != src.holder_.get()) {
-      *this = src;
+      // *this = src;
+      holder_ = src.holder_;
     }
     return *this;
   }
diff --git a/src/operators/kernel/arm/convolution/conv_common.cpp b/src/operators/kernel/arm/convolution/conv_common.cpp
index 2a3a5e17e1..b0d6c4a5d1 100644
--- a/src/operators/kernel/arm/convolution/conv_common.cpp
+++ b/src/operators/kernel/arm/convolution/conv_common.cpp
@@ -52,7 +52,7 @@ void InitBaseConvKernel(ConvParam<CPU> *param) {
     } else if (depth5x5 && param->Strides()[0] == param->Strides()[1] &&
                param->Strides()[0] == 1) {
       param->ExecMode() = ConvParam<CPU>::EXEC_DEPTHWISE5x5_FLOAT;
-    } else if (conv3x3 && !depth3x3 &&
+    } else if (conv3x3 && param->Groups() == 1 &&
                param->Strides()[0] == param->Strides()[1] &&
                param->Dilations()[0] == param->Dilations()[1] &&
                param->Strides()[0] == 1 && param->Dilations()[0] == 1
diff --git a/src/pass/memory_optimize.cpp b/src/pass/memory_optimize.cpp
new file mode 100644
index 0000000000..7da698866b
--- /dev/null
+++ b/src/pass/memory_optimize.cpp
@@ -0,0 +1,134 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "pass/memory_optimize.h"
+#include "framework/lod_tensor.h"
+
+namespace paddle_mobile {
+namespace pass {
+
+void MemoryOptPass::InitBlockVars(const framework::BlockDesc *block) {
+  block_vars_.clear();
+  for (const auto var : block->Vars()) {
+    block_vars_[var->Name()] = var.get();
+  }
+}
+
+bool MemoryOptPass::IsPersistable(const std::string name) {
+  const auto it = block_vars_.find(name);
+  if (it != block_vars_.end()) {
+    return it->second->Persistable();
+  }
+  return false;
+}
+
+VarNode *MemoryOptPass::CreateNode(const std::string name) {
+  auto it = created_nodes_.find(name);
+  if (it != created_nodes_.end()) {
+    ++(it->second->count);
+    return it->second;
+  }
+  VarNode *var = new VarNode;
+  var->name = name;
+  var->count = 1;
+  var->visited = false;
+  created_nodes_[name] = var;
+  return var;
+}
+
+void MemoryOptPass::operator()(const framework::ProgramDesc *program,
+                               framework::Scope *scope) {
+  const auto &blocks = program->Blocks();
+  for (const auto &block : blocks) {
+    // access all variables in block, and stored in map
+    InitBlockVars(block.get());
+
+    visited_nodes_.clear();
+    reused_nodes_.clear();
+    // collect all not persistable variables, and accumulate
+    // it's reference count
+    std::stack<VarNode *> empty_var_nodes;
+    analysis_nodes_.swap(empty_var_nodes);
+
+    for (const auto &op : block->Ops()) {
+      DLOG << "op_desc->Type(): " << op->Type();
+      const auto &outputs_map = op->GetOutputs();
+      for (const auto &outputs : outputs_map) {
+        for (const auto &output : outputs.second) {
+          if (!IsPersistable(output)) {
+            DLOG << "output: " << output;
+            VarNode *node = CreateNode(output);
+            analysis_nodes_.push(node);
+          }
+        }
+      }
+      const auto &inputs_map = op->GetInputs();
+      for (const auto &inputs : inputs_map) {
+        for (const auto &input : inputs.second) {
+          if (!IsPersistable(input)) {
+            DLOG << "input: " << input;
+            VarNode *node = CreateNode(input);
+            analysis_nodes_.push(node);
+          }
+        }
+      }
+    }
+
+    // apply optimize
+    while (!analysis_nodes_.empty()) {
+      auto *node = analysis_nodes_.top();
+      analysis_nodes_.pop();
+      // only not visited node can reuse memory between other nodes
+      // with 0 count which indicate they will not be used any more
+      if (!node->visited) {
+        bool reused = false;
+        // find out a possable reuse list
+        for (auto &list : reused_nodes_) {
+          if (list.back()->count == 0) {
+            list.push_back(node);
+            reused = true;
+            break;
+          }
+        }
+        // create new list if can't find a reused list
+        if (!reused) {
+          std::vector<VarNode *> list;
+          list.push_back(node);
+          reused_nodes_.push_back(std::move(list));
+        }
+      }
+      node->visited = true;
+      node->count -= 1;
+    }
+  }
+  // shared data within all variables in the same reused list
+  for (const auto &list : reused_nodes_) {
+    DLOG << "\n";
+    DLOG << "share data within these variables";
+    std::string name = list[0]->name;
+    auto *reused_var = scope->Var(name);
+    auto *reuse_tensor =
+        reused_var->template GetMutable<framework::LoDTensor>();
+    reuse_tensor->mutable_data<float>();
+    for (const auto &node : list) {
+      DLOG << node->name;
+      auto *var = scope->Var(node->name);
+      auto *tensor = var->template GetMutable<framework::LoDTensor>();
+      tensor->ShareDataWith(*reuse_tensor);
+    }
+  }
+}
+
+}  // namespace pass
+}  // namespace paddle_mobile
diff --git a/src/pass/memory_optimize.h b/src/pass/memory_optimize.h
new file mode 100644
index 0000000000..f4e9b6c851
--- /dev/null
+++ b/src/pass/memory_optimize.h
@@ -0,0 +1,67 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include <stack>
+#include <string>
+#include <unordered_map>
+#include <vector>
+#include "framework/program/program.h"
+
+namespace paddle_mobile {
+namespace pass {
+
+typedef struct {
+  std::string name;  // variable name
+  int count;         // reference count
+  bool visited;
+} VarNode;
+
+class PassBase {
+ public:
+  PassBase() {}
+  virtual ~PassBase() {}
+};
+
+// MemoryOptPass will analyze the program, and reuse memory between
+// variables as much as possible
+class MemoryOptPass : public PassBase {
+ public:
+  MemoryOptPass() {}
+  virtual ~MemoryOptPass() {
+    for (auto &it : created_nodes_) {
+      delete it.second;
+    }
+  }
+
+  void operator()(const framework::ProgramDesc *program,
+                  framework::Scope *scope);
+
+  void InitBlockVars(const framework::BlockDesc *block);
+
+  bool IsPersistable(const std::string name);
+
+  VarNode *CreateNode(const std::string name);
+
+ private:
+  std::stack<VarNode *> analysis_nodes_;
+  std::vector<std::vector<VarNode *>> reused_nodes_;
+  std::unordered_map<std::string, VarNode *> created_nodes_;
+  std::unordered_map<std::string, VarNode *> visited_nodes_;
+  std::unordered_map<std::string, framework::VarDesc *> block_vars_;
+};
+
+}  // namespace pass
+}  // namespace paddle_mobile
-- 
GitLab