support creating tensor with raw pointer. (#1714)

* support creating tensor with raw pointer. * fix style * fix fpga compilation error

support creating tensor with raw pointer. (#1714)
* support creating tensor with raw pointer. * fix style * fix fpga compilation error
84409bd8 · Yanzhan Yang · GitHub · 2292f6ef · 84409bd8 · 84409bd8
7 changed file
--- a/src/common/types.h
+++ b/src/common/types.h
@@ -132,9 +132,15 @@ enum PowerMode {
  AUTO = 4,                  // scheduled by system
 };
+enum MemoryOptimizationLevel {
+  NoMemoryOptimization = 0,
+  MemoryOptimizationWithoutFeeds = 1,
+  FullMemoryOptimization = 2,
+};
 struct PaddleMobileConfigInternal {
  bool load_when_predict = false;
-  bool enable_memory_optimization = true;
+  MemoryOptimizationLevel memory_optimization_level = FullMemoryOptimization;
 };
 extern const char *G_OP_TYPE_CONV;

--- a/src/framework/executor.cpp
+++ b/src/framework/executor.cpp
@@ -65,8 +65,9 @@ Executor<Device, T>::Executor(const Program<Device> &program,
                        "program_desc_ should not be nullptr");
 #if !defined(PADDLE_MOBILE_FPGA) && !defined(PADDLE_MOBILE_FPGA_KD) && \
    !defined(PADDLE_MOBILE_CL)
-  if (config_.enable_memory_optimization) {
+  if (config_.memory_optimization_level != NoMemoryOptimization) {
-    pass::MemoryOptPass()(program_desc_.get(), program_.scope.get());
+    pass::MemoryOptPass()(program_desc_.get(), program_.scope.get(),
+                          config_.memory_optimization_level);
  }
 #endif
  // resize feed and fetch list

--- a/src/framework/tensor.h
+++ b/src/framework/tensor.h
@@ -57,6 +57,20 @@ class Tensor : public TensorBase {
    }
  }
+  template <typename T>
+  Tensor(T *input, DDim ddim) {
+    // PADDLE_MOBILE_ENFORCE(
+    //     (sizeof(input) / sizeof(input[0])) == framework::product(ddim),
+    //     "input vector'length should be equal to tensor's length");
+    Resize(ddim);
+    auto type = type_id<T>().hash_code();
+    int64_t size = numel() * SizeOfType(type);
+    holder_.reset(new PlaceholderImpl(size, type, (uint8_t *)input));
+    holder_->set_type(type);
+    offset_ = 0;
+  }
  Tensor(const Tensor &inTensor) {
    this->dims_ = inTensor.dims_;
    this->holder_ = inTensor.holder_;
@@ -203,6 +217,15 @@ class Tensor : public TensorBase {
                            "Insufficient memory to allocation");
    }
+    PlaceholderImpl(size_t size, const kTypeId_t type, uint8_t *ptr)
+        : ptr_(ptr, memory::PODDeleter<uint8_t>()),
+          size_(size),
+          capatity_(size),
+          type_(type) {
+      PADDLE_MOBILE_ENFORCE(ptr_ != nullptr,
+                            "Insufficient memory to allocation");
+    }
    virtual size_t size() const { return size_; }
    virtual void *ptr() const { return static_cast<void *>(ptr_.get()); }

--- a/src/pass/memory_optimize.cpp
+++ b/src/pass/memory_optimize.cpp
@@ -14,6 +14,7 @@ limitations under the License. */
 #include "pass/memory_optimize.h"
 #include "framework/lod_tensor.h"
+#include <algorithm>
 namespace paddle_mobile {
 namespace pass {
@@ -47,8 +48,9 @@ VarNode *MemoryOptPass::CreateNode(const std::string name) {
  return var;
 }
-void MemoryOptPass::operator()(const framework::ProgramDesc *program,
+void MemoryOptPass::operator()(
-                               framework::Scope *scope) {
+    const framework::ProgramDesc *program, framework::Scope *scope,
+    MemoryOptimizationLevel memory_optimization_level) {
  const auto &blocks = program->Blocks();
  for (const auto &block : blocks) {
    // access all variables in each block
@@ -60,12 +62,29 @@ void MemoryOptPass::operator()(const framework::ProgramDesc *program,
    std::stack<VarNode *> empty_var_nodes;
    analysis_nodes_.swap(empty_var_nodes);
+    std::vector<std::string> exclude_var_names;
+    for (const auto &op : block->Ops()) {
+      for (const auto &inputs : op->GetInputs()) {
+        for (const auto &input : inputs.second) {
+          if (!IsPersistable(input)) {
+            if (memory_optimization_level == MemoryOptimizationWithoutFeeds) {
+              if (op->Type() == "feed") {
+                exclude_var_names.push_back(input);
+              }
+            }
+          }
+        }
+      }
+    }
    std::vector<VarNode *> fetch_var_nodes;
    for (const auto &op : block->Ops()) {
      DLOG << "op_desc->Type(): " << op->Type();
      for (const auto &outputs : op->GetOutputs()) {
        for (const auto &output : outputs.second) {
-          if (!IsPersistable(output)) {
+          if (!IsPersistable(output) &&
+              std::find(exclude_var_names.begin(), exclude_var_names.end(),
+                        output) == exclude_var_names.end()) {
            DLOG << "output: " << output;
            VarNode *node = CreateNode(output);
            analysis_nodes_.push(node);
@@ -74,7 +93,9 @@ void MemoryOptPass::operator()(const framework::ProgramDesc *program,
      }
      for (const auto &inputs : op->GetInputs()) {
        for (const auto &input : inputs.second) {
-          if (!IsPersistable(input)) {
+          if (!IsPersistable(input) &&
+              std::find(exclude_var_names.begin(), exclude_var_names.end(),
+                        input) == exclude_var_names.end()) {
            DLOG << "input: " << input;
            VarNode *node = CreateNode(input);
            analysis_nodes_.push(node);
@@ -86,7 +107,9 @@ void MemoryOptPass::operator()(const framework::ProgramDesc *program,
      }
      for (const auto &outputs : op->GetOutputs()) {
        for (const auto &output : outputs.second) {
-          if (!IsPersistable(output)) {
+          if (!IsPersistable(output) &&
+              std::find(exclude_var_names.begin(), exclude_var_names.end(),
+                        output) == exclude_var_names.end()) {
            DLOG << "output: " << output;
            VarNode *node = CreateNode(output);
            analysis_nodes_.push(node);

--- a/src/pass/memory_optimize.h
+++ b/src/pass/memory_optimize.h
@@ -47,7 +47,8 @@ class MemoryOptPass : public PassBase {
  }
  void operator()(const framework::ProgramDesc *program,
-                  framework::Scope *scope);
+                  framework::Scope *scope,
+                  MemoryOptimizationLevel memory_optimization_level);
  void AppendBlockVars(const framework::BlockDesc *block);

--- a/test/net/test_net.cpp
+++ b/test/net/test_net.cpp
@@ -31,7 +31,9 @@ void test(int argc, char *argv[]) {
  bool enable_memory_optimization = std::stoi(argv[arg_index]) == 1;
  arg_index++;
  paddle_mobile::PaddleMobileConfigInternal config;
-  config.enable_memory_optimization = enable_memory_optimization;
+  config.memory_optimization_level = enable_memory_optimization
+                                         ? MemoryOptimizationWithoutFeeds
+                                         : NoMemoryOptimization;
  paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile(config);
  paddle_mobile.SetThreadNum(1);
@@ -75,56 +77,74 @@ void test(int argc, char *argv[]) {
                         fuse, false, 1, true)) {
    auto time2 = time();
    std::cout << "auto-test"
-              << " load-time-cost :" << time_diff(time1, time1) << "ms"
+              << " load-time-cost :" << time_diff(time1, time2) << "ms"
              << std::endl;
-    std::vector<float> input_data;
+    float input_data_array[size];
    std::ifstream in("input.txt", std::ios::in);
    for (int i = 0; i < size; i++) {
      float num;
      in >> num;
-      input_data.push_back(num);
+      input_data_array[i] = num;
    }
    in.close();
-    paddle_mobile::framework::LoDTensor input_tensor;
+    auto time3 = time();
+    // std::vector<float> input_data;
+    // for (int i = 0; i < size; i++) {
+    //   float num = input_data_array[i];
+    //   input_data.push_back(num);
+    // }
+    // paddle_mobile::framework::Tensor input_tensor(input_data,
+    // paddle_mobile::framework::make_ddim(dims));
+    paddle_mobile::framework::Tensor input_tensor(
+        input_data_array, paddle_mobile::framework::make_ddim(dims));
+    auto time4 = time();
+    std::cout << "auto-test"
+              << " preprocess-time-cost :" << time_diff(time3, time4) << "ms"
+              << std::endl;
+    paddle_mobile::framework::LoDTensor input_lod_tensor;
    if (is_lod) {
-      input_tensor.Resize(paddle_mobile::framework::make_ddim(dims));
+      input_lod_tensor.Resize(paddle_mobile::framework::make_ddim(dims));
-      input_tensor.set_lod(lod);
+      input_lod_tensor.set_lod(lod);
-      auto *tensor_data = input_tensor.mutable_data<float>();
+      auto *tensor_data = input_lod_tensor.mutable_data<float>();
      for (int i = 0; i < size; i++) {
-        tensor_data[i] = input_data[i];
+        tensor_data[i] = input_data_array[i];
      }
    }
    // 预热10次
    for (int i = 0; i < 10; i++) {
      if (is_lod) {
-        auto out = paddle_mobile.Predict(input_tensor);
+        auto out = paddle_mobile.Predict(input_lod_tensor);
      } else {
-        auto out = paddle_mobile.Predict(input_data, dims);
+        paddle_mobile.Feed(var_names[0], input_tensor);
+        paddle_mobile.Predict();
      }
    }
    // 测速
-    auto time3 = time();
+    auto time5 = time();
    for (int i = 0; i < 50; i++) {
      if (is_lod) {
-        auto out = paddle_mobile.Predict(input_tensor);
+        auto out = paddle_mobile.Predict(input_lod_tensor);
      } else {
-        auto out = paddle_mobile.Predict(input_data, dims);
+        paddle_mobile.Feed(var_names[0], input_tensor);
+        paddle_mobile.Predict();
      }
    }
-    auto time4 = time();
+    auto time6 = time();
    std::cout << "auto-test"
-              << " predict-time-cost " << time_diff(time3, time4) / 50 << "ms"
+              << " predict-time-cost " << time_diff(time5, time6) / 50 << "ms"
              << std::endl;
    // 测试正确性
    if (is_lod) {
-      auto out = paddle_mobile.Predict(input_tensor);
+      auto out = paddle_mobile.Predict(input_lod_tensor);
    } else {
-      auto out = paddle_mobile.Predict(input_data, dims);
+      paddle_mobile.Feed(var_names[0], input_tensor);
+      paddle_mobile.Predict();
    }
    for (auto var_name : var_names) {
      auto out = paddle_mobile.Fetch(var_name);

--- a/tools/python/fluidtools/run.py
+++ b/tools/python/fluidtools/run.py
@@ -279,6 +279,8 @@ def check_mobile_results(args, fuse, mem_opt):
            pp_green("load time cost : {}".format(parts[2]), 1) 
        elif parts[1] == "predict-time-cost":
            pp_green("predict time cost : {}".format(parts[2]), 1) 
+        elif parts[1] == "preprocess-time-cost":
+            pp_green("preprocess time cost : {}".format(parts[2]), 1)
        elif parts[1] == "var":
            var_name = parts[2]
            values = list(map(lambda x: float(x), parts[3:]))