From ba8d089d18d457d1aa30b6c7741e0a91905defe0 Mon Sep 17 00:00:00 2001
From: hjchen2 <chenhoujiangcug@gmail.com>
Date: Tue, 19 Mar 2019 12:15:12 +0800
Subject: [PATCH] Refine memory optimize

---
 src/framework/executor.cpp   |  8 ++++----
 src/framework/tensor.h       | 22 ++++++++++++++++++++--
 src/framework/tensor_base.h  |  2 ++
 src/pass/memory_optimize.cpp | 18 ++++++++++++------
 src/pass/memory_optimize.h   |  1 -
 5 files changed, 38 insertions(+), 13 deletions(-)
diff --git a/src/framework/executor.cpp b/src/framework/executor.cpp
index 750c0da540..f1d50d6e99 100644
--- a/src/framework/executor.cpp
+++ b/src/framework/executor.cpp
@@ -12,12 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "framework/executor.h"
 #include <algorithm>
 #include <utility>
 #include <vector>
 #include "common/enforce.h"
 #include "common/log.h"
+#include "memory/t_malloc.h"
 #include "framework/context.h"
 #include "framework/framework.pb-c.h"
 #include "framework/lod_tensor.h"
@@ -27,9 +27,8 @@ limitations under the License. */
 #include "framework/program/var_desc.h"
 #include "framework/scope.h"
 #include "framework/tensor.h"
-#include "memory/t_malloc.h"
+#include "framework/executor.h"
 #include "pass/memory_optimize.h"
-
 #ifdef PADDLE_MOBILE_CL
 #include "framework/cl/cl_image.h"
 #endif
@@ -217,6 +216,7 @@ void Executor<Device, T>::InitMemory() {
           var->template GetMutable<framework::LoDTensorArray>();
           continue;
         }
+        DLOG << "init persistable var: " << var_desc->Name();
         char *origin_data =
             ReadFileToBuff(program_.model_path + "/" + var_desc->Name());
         char *data = origin_data;
@@ -329,7 +329,6 @@ bool Executor<Device, T>::varInputMemory(
   if (type == VARTYPE_TYPE_LOD_TENSOR) {
     auto data_type = var_desc->Tensor_desc().DataType();
     framework::LoDTensor *tensor = var->template GetMutable<LoDTensor>();
-    tensor->mutable_data(TypeId(data_type));
   } else if (type == VARTYPE_TYPE_STEP_SCOPES) {
     std::vector<framework::Scope *> *step_scopes =
         var->template GetMutable<std::vector<framework::Scope *>>();
@@ -465,6 +464,7 @@ PMStatus Executor<Device, T>::Predict() {
     clock_gettime(CLOCK_MONOTONIC, &ts);
     profile[op_index].runBegin = (uint64_t)ts.tv_sec * 1e9 + ts.tv_nsec;
 #endif
+    DLOG << "run op: " << op_handler->Type();
     if (lod_mode_) {
       op_handler->InferShape();
     }
diff --git a/src/framework/tensor.h b/src/framework/tensor.h
index 63f074f4af..8b56dc77c9 100644
--- a/src/framework/tensor.h
+++ b/src/framework/tensor.h
@@ -28,6 +28,8 @@ limitations under the License. */
 #include "framework/tensor_base.h"
 #include "memory/t_malloc.h"
 
+#include <iostream>
+
 namespace paddle_mobile {
 namespace framework {
 
@@ -69,7 +71,6 @@ class Tensor : public TensorBase {
   inline Tensor &ShareDataWith(const Tensor &src) {
     src.check_memory_size();
     if (holder_.get() != src.holder_.get()) {
-      // *this = src;
       holder_ = src.holder_;
     }
     return *this;
@@ -82,7 +83,13 @@ class Tensor : public TensorBase {
     PADDLE_MOBILE_ENFORCE(numel() >= 0, "the Tensor's numel must >=0.")
     int64_t size = numel() * SizeOfType(type);
     if (holder_ == nullptr || holder_->size() < size + offset_) {
-      holder_.reset(new PlaceholderImpl(size, type));
+      if (holder_ == nullptr) {
+        std::cout << "reset holder... size " << size << std::endl;
+        holder_.reset(new PlaceholderImpl(size, type));
+      } else {
+        std::cout << "resize holder... size " << size << std::endl;
+        holder_->resize(size);
+      }
       offset_ = 0;
     }
     return reinterpret_cast<void *>(
@@ -181,6 +188,7 @@ class Tensor : public TensorBase {
         : ptr_(static_cast<uint8_t *>(memory::Alloc(size)),
                memory::PODDeleter<uint8_t>()),
           size_(size),
+          capatity_(size),
           type_(type) {
       PADDLE_MOBILE_ENFORCE(ptr_ != nullptr,
                             "Insufficient memory to allocation");
@@ -194,11 +202,21 @@ class Tensor : public TensorBase {
 
     virtual void set_type(std::type_index type) { type_ = type; }
 
+    virtual void resize(size_t size) {
+      if (size > capatity_) {
+        capatity_ = size;
+        ptr_.reset(static_cast<uint8_t *>(memory::Alloc(capatity_)));
+      }
+      size_ = size;
+    }
+
     std::unique_ptr<uint8_t, memory::PODDeleter<uint8_t>> ptr_;
 
     /*! the size of memory block. */
     size_t size_;
 
+    size_t capatity_;
+
     /* the current type of memory */
     std::type_index type_;
   };
diff --git a/src/framework/tensor_base.h b/src/framework/tensor_base.h
index b41d7786c1..e5ab7793c0 100644
--- a/src/framework/tensor_base.h
+++ b/src/framework/tensor_base.h
@@ -117,6 +117,8 @@ class TensorBase {
     virtual std::type_index type() const = 0;
 
     virtual void set_type(std::type_index type) = 0;
+
+    virtual void resize(size_t size) = 0;
   };
 
   /**
diff --git a/src/pass/memory_optimize.cpp b/src/pass/memory_optimize.cpp
index 7da698866b..68c7d442f8 100644
--- a/src/pass/memory_optimize.cpp
+++ b/src/pass/memory_optimize.cpp
@@ -54,7 +54,6 @@ void MemoryOptPass::operator()(const framework::ProgramDesc *program,
     // access all variables in block, and stored in map
     InitBlockVars(block.get());
 
-    visited_nodes_.clear();
     reused_nodes_.clear();
     // collect all not persistable variables, and accumulate
     // it's reference count
@@ -63,8 +62,7 @@ void MemoryOptPass::operator()(const framework::ProgramDesc *program,
 
     for (const auto &op : block->Ops()) {
       DLOG << "op_desc->Type(): " << op->Type();
-      const auto &outputs_map = op->GetOutputs();
-      for (const auto &outputs : outputs_map) {
+      for (const auto &outputs : op->GetOutputs()) {
         for (const auto &output : outputs.second) {
           if (!IsPersistable(output)) {
             DLOG << "output: " << output;
@@ -73,8 +71,7 @@ void MemoryOptPass::operator()(const framework::ProgramDesc *program,
           }
         }
       }
-      const auto &inputs_map = op->GetInputs();
-      for (const auto &inputs : inputs_map) {
+      for (const auto &inputs : op->GetInputs()) {
         for (const auto &input : inputs.second) {
           if (!IsPersistable(input)) {
             DLOG << "input: " << input;
@@ -83,6 +80,15 @@ void MemoryOptPass::operator()(const framework::ProgramDesc *program,
           }
         }
       }
+      for (const auto &outputs : op->GetOutputs()) {
+        for (const auto &output : outputs.second) {
+          if (!IsPersistable(output)) {
+            DLOG << "output: " << output;
+            VarNode *node = CreateNode(output);
+            analysis_nodes_.push(node);
+          }
+        }
+      }
     }
 
     // apply optimize
@@ -115,7 +121,7 @@ void MemoryOptPass::operator()(const framework::ProgramDesc *program,
   // shared data within all variables in the same reused list
   for (const auto &list : reused_nodes_) {
     DLOG << "\n";
-    DLOG << "share data within these variables";
+    DLOG << "share memory within these variables";
     std::string name = list[0]->name;
     auto *reused_var = scope->Var(name);
     auto *reuse_tensor =
diff --git a/src/pass/memory_optimize.h b/src/pass/memory_optimize.h
index f4e9b6c851..466af72e46 100644
--- a/src/pass/memory_optimize.h
+++ b/src/pass/memory_optimize.h
@@ -59,7 +59,6 @@ class MemoryOptPass : public PassBase {
   std::stack<VarNode *> analysis_nodes_;
   std::vector<std::vector<VarNode *>> reused_nodes_;
   std::unordered_map<std::string, VarNode *> created_nodes_;
-  std::unordered_map<std::string, VarNode *> visited_nodes_;
   std::unordered_map<std::string, framework::VarDesc *> block_vars_;
 };
 
-- 
GitLab