Add memory optimize pass

0b09e61a · hjchen2 · ba7458fa · 0b09e61a · 0b09e61a · 0b09e61a
7 changed file
--- a/src/framework/executor.cpp
+++ b/src/framework/executor.cpp
@@ -28,6 +28,7 @@ limitations under the License. */
 #include "framework/scope.h"
 #include "framework/tensor.h"
 #include "memory/t_malloc.h"
+#include "pass/memory_optimize.h"
 #ifdef PADDLE_MOBILE_CL
 #include "framework/cl/cl_image.h"
@@ -62,6 +63,7 @@ Executor<Device, T>::Executor(const Program<Device> &program,
      use_optimize_ ? program_.optimizeProgram : program_.originProgram;
  PADDLE_MOBILE_ENFORCE(program_desc_ != nullptr,
                        "program_desc_ should not be nullptr");
+  pass::MemoryOptPass()(program_desc_.get(), program_.scope.get());
  // resize feed and fetch list
  // should init feed and fetch variables before infer shape
  InitFeedFetchList();

--- a/src/framework/program/program_desc.cpp
+++ b/src/framework/program/program_desc.cpp
@@ -46,7 +46,7 @@ ProgramDesc::ProgramDesc(PaddleMobile__Framework__Proto__ProgramDesc *desc) {
  }
 }
-void ProgramDesc::Description(std::string header) {
+void ProgramDesc::Description(std::string header) const {
 #ifdef PADDLE_MOBILE_DEBUG
  if (header.size()) {
    LOG(kLOG_INFO) << header;

--- a/src/framework/program/program_desc.h
+++ b/src/framework/program/program_desc.h
@@ -30,6 +30,14 @@ class ProgramDesc {
  friend class ProgramOptimize;
  explicit ProgramDesc(PaddleMobile__Framework__Proto__ProgramDesc *desc);
+  ProgramDesc(const ProgramDesc &program_desc) {
+    for (auto &block : program_desc.blocks_) {
+      std::shared_ptr<BlockDesc> copy_block =
+          std::make_shared<BlockDesc>(*block);
+      blocks_.push_back(copy_block);
+    }
+  }
  std::shared_ptr<BlockDesc> Block(size_t idx);
  BlockDesc *MutableBlock(size_t idx) {
@@ -40,16 +48,11 @@ class ProgramDesc {
    }
  }
-  const std::vector<std::shared_ptr<BlockDesc>> &Blocks() { return blocks_; }
+  const std::vector<std::shared_ptr<BlockDesc>> &Blocks() const {
-  ProgramDesc(const ProgramDesc &program_desc) {
+    return blocks_;
-    for (auto &block : program_desc.blocks_) {
-      std::shared_ptr<BlockDesc> copy_block =
-          std::make_shared<BlockDesc>(*block);
-      blocks_.push_back(copy_block);
-    }
  }
-  void Description(std::string header = "");
+  void Description(std::string header = "") const;
 private:
  std::vector<std::shared_ptr<BlockDesc>> blocks_;

--- a/src/framework/tensor.h
+++ b/src/framework/tensor.h
@@ -69,7 +69,8 @@ class Tensor : public TensorBase {
  inline Tensor &ShareDataWith(const Tensor &src) {
    src.check_memory_size();
    if (holder_.get() != src.holder_.get()) {
-      *this = src;
+      // *this = src;
+      holder_ = src.holder_;
    }
    return *this;
  }

--- a/src/operators/kernel/arm/convolution/conv_common.cpp
+++ b/src/operators/kernel/arm/convolution/conv_common.cpp
@@ -52,7 +52,7 @@ void InitBaseConvKernel(ConvParam<CPU> *param) {
    } else if (depth5x5 && param->Strides()[0] == param->Strides()[1] &&
               param->Strides()[0] == 1) {
      param->ExecMode() = ConvParam<CPU>::EXEC_DEPTHWISE5x5_FLOAT;
-    } else if (conv3x3 && !depth3x3 &&
+    } else if (conv3x3 && param->Groups() == 1 &&
               param->Strides()[0] == param->Strides()[1] &&
               param->Dilations()[0] == param->Dilations()[1] &&
               param->Strides()[0] == 1 && param->Dilations()[0] == 1

--- a/src/pass/memory_optimize.cpp
+++ b/src/pass/memory_optimize.cpp
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#include "pass/memory_optimize.h"
+#include "framework/lod_tensor.h"
+namespace paddle_mobile {
+namespace pass {
+void MemoryOptPass::InitBlockVars(const framework::BlockDesc *block) {
+  block_vars_.clear();
+  for (const auto var : block->Vars()) {
+    block_vars_[var->Name()] = var.get();
+  }
+}
+bool MemoryOptPass::IsPersistable(const std::string name) {
+  const auto it = block_vars_.find(name);
+  if (it != block_vars_.end()) {
+    return it->second->Persistable();
+  }
+  return false;
+}
+VarNode *MemoryOptPass::CreateNode(const std::string name) {
+  auto it = created_nodes_.find(name);
+  if (it != created_nodes_.end()) {
+    ++(it->second->count);
+    return it->second;
+  }
+  VarNode *var = new VarNode;
+  var->name = name;
+  var->count = 1;
+  var->visited = false;
+  created_nodes_[name] = var;
+  return var;
+}
+void MemoryOptPass::operator()(const framework::ProgramDesc *program,
+                               framework::Scope *scope) {
+  const auto &blocks = program->Blocks();
+  for (const auto &block : blocks) {
+    // access all variables in block, and stored in map
+    InitBlockVars(block.get());
+    visited_nodes_.clear();
+    reused_nodes_.clear();
+    // collect all not persistable variables, and accumulate
+    // it's reference count
+    std::stack<VarNode *> empty_var_nodes;
+    analysis_nodes_.swap(empty_var_nodes);
+    for (const auto &op : block->Ops()) {
+      DLOG << "op_desc->Type(): " << op->Type();
+      const auto &outputs_map = op->GetOutputs();
+      for (const auto &outputs : outputs_map) {
+        for (const auto &output : outputs.second) {
+          if (!IsPersistable(output)) {
+            DLOG << "output: " << output;
+            VarNode *node = CreateNode(output);
+            analysis_nodes_.push(node);
+          }
+        }
+      }
+      const auto &inputs_map = op->GetInputs();
+      for (const auto &inputs : inputs_map) {
+        for (const auto &input : inputs.second) {
+          if (!IsPersistable(input)) {
+            DLOG << "input: " << input;
+            VarNode *node = CreateNode(input);
+            analysis_nodes_.push(node);
+          }
+        }
+      }
+    }
+    // apply optimize
+    while (!analysis_nodes_.empty()) {
+      auto *node = analysis_nodes_.top();
+      analysis_nodes_.pop();
+      // only not visited node can reuse memory between other nodes
+      // with 0 count which indicate they will not be used any more
+      if (!node->visited) {
+        bool reused = false;
+        // find out a possable reuse list
+        for (auto &list : reused_nodes_) {
+          if (list.back()->count == 0) {
+            list.push_back(node);
+            reused = true;
+            break;
+          }
+        }
+        // create new list if can't find a reused list
+        if (!reused) {
+          std::vector<VarNode *> list;
+          list.push_back(node);
+          reused_nodes_.push_back(std::move(list));
+        }
+      }
+      node->visited = true;
+      node->count -= 1;
+    }
+  }
+  // shared data within all variables in the same reused list
+  for (const auto &list : reused_nodes_) {
+    DLOG << "\n";
+    DLOG << "share data within these variables";
+    std::string name = list[0]->name;
+    auto *reused_var = scope->Var(name);
+    auto *reuse_tensor =
+        reused_var->template GetMutable<framework::LoDTensor>();
+    reuse_tensor->mutable_data<float>();
+    for (const auto &node : list) {
+      DLOG << node->name;
+      auto *var = scope->Var(node->name);
+      auto *tensor = var->template GetMutable<framework::LoDTensor>();
+      tensor->ShareDataWith(*reuse_tensor);
+    }
+  }
+}
+}  // namespace pass
+}  // namespace paddle_mobile
--- a/src/pass/memory_optimize.h
+++ b/src/pass/memory_optimize.h
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#pragma once
+#include <stack>
+#include <string>
+#include <unordered_map>
+#include <vector>
+#include "framework/program/program.h"
+namespace paddle_mobile {
+namespace pass {
+typedef struct {
+  std::string name;  // variable name
+  int count;         // reference count
+  bool visited;
+} VarNode;
+class PassBase {
+ public:
+  PassBase() {}
+  virtual ~PassBase() {}
+};
+// MemoryOptPass will analyze the program, and reuse memory between
+// variables as much as possible
+class MemoryOptPass : public PassBase {
+ public:
+  MemoryOptPass() {}
+  virtual ~MemoryOptPass() {
+    for (auto &it : created_nodes_) {
+      delete it.second;
+    }
+  }
+  void operator()(const framework::ProgramDesc *program,
+                  framework::Scope *scope);
+  void InitBlockVars(const framework::BlockDesc *block);
+  bool IsPersistable(const std::string name);
+  VarNode *CreateNode(const std::string name);
+ private:
+  std::stack<VarNode *> analysis_nodes_;
+  std::vector<std::vector<VarNode *>> reused_nodes_;
+  std::unordered_map<std::string, VarNode *> created_nodes_;
+  std::unordered_map<std::string, VarNode *> visited_nodes_;
+  std::unordered_map<std::string, framework::VarDesc *> block_vars_;
+};
+}  // namespace pass
+}  // namespace paddle_mobile