Refine memory optimize

ba8d089d · hjchen2 · be8788b4 · ba8d089d · ba8d089d · ba8d089d
5 changed file
--- a/src/framework/executor.cpp
+++ b/src/framework/executor.cpp
@@ -12,12 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */

-#include "framework/executor.h"
 #include <algorithm>
 #include <utility>
 #include <vector>
 #include "common/enforce.h"
 #include "common/log.h"
+#include "memory/t_malloc.h"
 #include "framework/context.h"
 #include "framework/framework.pb-c.h"
 #include "framework/lod_tensor.h"
@@ -27,9 +27,8 @@ limitations under the License. */
 #include "framework/program/var_desc.h"
 #include "framework/scope.h"
 #include "framework/tensor.h"
-#include "memory/t_malloc.h"
+#include "framework/executor.h"
 #include "pass/memory_optimize.h"
-
 #ifdef PADDLE_MOBILE_CL
 #include "framework/cl/cl_image.h"
 #endif
@@ -217,6 +216,7 @@ void Executor<Device, T>::InitMemory() {
          var->template GetMutable<framework::LoDTensorArray>();
          continue;
        }
+        DLOG << "init persistable var: " << var_desc->Name();
        char *origin_data =
            ReadFileToBuff(program_.model_path + "/" + var_desc->Name());
        char *data = origin_data;
@@ -329,7 +329,6 @@ bool Executor<Device, T>::varInputMemory(
  if (type == VARTYPE_TYPE_LOD_TENSOR) {
    auto data_type = var_desc->Tensor_desc().DataType();
    framework::LoDTensor *tensor = var->template GetMutable<LoDTensor>();
-    tensor->mutable_data(TypeId(data_type));
  } else if (type == VARTYPE_TYPE_STEP_SCOPES) {
    std::vector<framework::Scope *> *step_scopes =
        var->template GetMutable<std::vector<framework::Scope *>>();
@@ -465,6 +464,7 @@ PMStatus Executor<Device, T>::Predict() {
    clock_gettime(CLOCK_MONOTONIC, &ts);
    profile[op_index].runBegin = (uint64_t)ts.tv_sec * 1e9 + ts.tv_nsec;
 #endif
+    DLOG << "run op: " << op_handler->Type();
    if (lod_mode_) {
      op_handler->InferShape();
    }

--- a/src/framework/tensor.h
+++ b/src/framework/tensor.h
@@ -28,6 +28,8 @@ limitations under the License. */
 #include "framework/tensor_base.h"
 #include "memory/t_malloc.h"

+#include <iostream>
+
 namespace paddle_mobile {
 namespace framework {

@@ -69,7 +71,6 @@ class Tensor : public TensorBase {
  inline Tensor &ShareDataWith(const Tensor &src) {
    src.check_memory_size();
    if (holder_.get() != src.holder_.get()) {
-      // *this = src;
      holder_ = src.holder_;
    }
    return *this;
@@ -82,7 +83,13 @@ class Tensor : public TensorBase {
    PADDLE_MOBILE_ENFORCE(numel() >= 0, "the Tensor's numel must >=0.")
    int64_t size = numel() * SizeOfType(type);
    if (holder_ == nullptr || holder_->size() < size + offset_) {
-      holder_.reset(new PlaceholderImpl(size, type));
+      if (holder_ == nullptr) {
+        std::cout << "reset holder... size " << size << std::endl;
+        holder_.reset(new PlaceholderImpl(size, type));
+      } else {
+        std::cout << "resize holder... size " << size << std::endl;
+        holder_->resize(size);
+      }
      offset_ = 0;
    }
    return reinterpret_cast<void *>(
@@ -181,6 +188,7 @@ class Tensor : public TensorBase {
        : ptr_(static_cast<uint8_t *>(memory::Alloc(size)),
               memory::PODDeleter<uint8_t>()),
          size_(size),
+          capatity_(size),
          type_(type) {
      PADDLE_MOBILE_ENFORCE(ptr_ != nullptr,
                            "Insufficient memory to allocation");
@@ -194,11 +202,21 @@ class Tensor : public TensorBase {

    virtual void set_type(std::type_index type) { type_ = type; }

+    virtual void resize(size_t size) {
+      if (size > capatity_) {
+        capatity_ = size;
+        ptr_.reset(static_cast<uint8_t *>(memory::Alloc(capatity_)));
+      }
+      size_ = size;
+    }
+
    std::unique_ptr<uint8_t, memory::PODDeleter<uint8_t>> ptr_;

    /*! the size of memory block. */
    size_t size_;

+    size_t capatity_;
+
    /* the current type of memory */
    std::type_index type_;
  };

--- a/src/framework/tensor_base.h
+++ b/src/framework/tensor_base.h
@@ -117,6 +117,8 @@ class TensorBase {
    virtual std::type_index type() const = 0;

    virtual void set_type(std::type_index type) = 0;
+
+    virtual void resize(size_t size) = 0;
  };

  /**

--- a/src/pass/memory_optimize.cpp
+++ b/src/pass/memory_optimize.cpp
@@ -54,7 +54,6 @@ void MemoryOptPass::operator()(const framework::ProgramDesc *program,
    // access all variables in block, and stored in map
    InitBlockVars(block.get());

-    visited_nodes_.clear();
    reused_nodes_.clear();
    // collect all not persistable variables, and accumulate
    // it's reference count
@@ -63,8 +62,7 @@ void MemoryOptPass::operator()(const framework::ProgramDesc *program,

    for (const auto &op : block->Ops()) {
      DLOG << "op_desc->Type(): " << op->Type();
-      const auto &outputs_map = op->GetOutputs();
-      for (const auto &outputs : outputs_map) {
+      for (const auto &outputs : op->GetOutputs()) {
        for (const auto &output : outputs.second) {
          if (!IsPersistable(output)) {
            DLOG << "output: " << output;
@@ -73,8 +71,7 @@ void MemoryOptPass::operator()(const framework::ProgramDesc *program,
          }
        }
      }
-      const auto &inputs_map = op->GetInputs();
-      for (const auto &inputs : inputs_map) {
+      for (const auto &inputs : op->GetInputs()) {
        for (const auto &input : inputs.second) {
          if (!IsPersistable(input)) {
            DLOG << "input: " << input;
@@ -83,6 +80,15 @@ void MemoryOptPass::operator()(const framework::ProgramDesc *program,
          }
        }
      }
+      for (const auto &outputs : op->GetOutputs()) {
+        for (const auto &output : outputs.second) {
+          if (!IsPersistable(output)) {
+            DLOG << "output: " << output;
+            VarNode *node = CreateNode(output);
+            analysis_nodes_.push(node);
+          }
+        }
+      }
    }

    // apply optimize
@@ -115,7 +121,7 @@ void MemoryOptPass::operator()(const framework::ProgramDesc *program,
  // shared data within all variables in the same reused list
  for (const auto &list : reused_nodes_) {
    DLOG << "\n";
-    DLOG << "share data within these variables";
+    DLOG << "share memory within these variables";
    std::string name = list[0]->name;
    auto *reused_var = scope->Var(name);
    auto *reuse_tensor =

--- a/src/pass/memory_optimize.h
+++ b/src/pass/memory_optimize.h
@@ -59,7 +59,6 @@ class MemoryOptPass : public PassBase {
  std::stack<VarNode *> analysis_nodes_;
  std::vector<std::vector<VarNode *>> reused_nodes_;
  std::unordered_map<std::string, VarNode *> created_nodes_;
-  std::unordered_map<std::string, VarNode *> visited_nodes_;
  std::unordered_map<std::string, framework::VarDesc *> block_vars_;
 };