From e5f08787b3bda375bb14dd0f4618a420d0ea3781 Mon Sep 17 00:00:00 2001
From: Yanzhan Yang <yangyanzhan@gmail.com>
Date: Mon, 24 Jun 2019 18:02:34 +0800
Subject: [PATCH] 1.add last type sequence pool. 2.enable lod for auto debug
 tools. (#1700)

---
 src/common/types.h                            |  1 +
 .../kernel/arm/sequence_pool_kernel.cpp       | 18 ++++++++
 src/pass/memory_optimize.cpp                  |  8 ++++
 test/net/test_net.cpp                         | 41 +++++++++++++++++--
 test/test_include.h                           |  1 +
 tools/python/fluidtools/run.py                | 36 ++++++++++++++--
 6 files changed, 98 insertions(+), 7 deletions(-)
diff --git a/src/common/types.h b/src/common/types.h
index 8e793e959b..4d9f9b3e31 100644
--- a/src/common/types.h
+++ b/src/common/types.h
@@ -109,6 +109,7 @@ enum PoolingType {
   AVG = 1,
   SUM = 2,
   FIRST = 3,
+  LAST = 4,
 };
 
 enum PowerMode {
diff --git a/src/operators/kernel/arm/sequence_pool_kernel.cpp b/src/operators/kernel/arm/sequence_pool_kernel.cpp
index 2be2accf58..db1939d4d0 100644
--- a/src/operators/kernel/arm/sequence_pool_kernel.cpp
+++ b/src/operators/kernel/arm/sequence_pool_kernel.cpp
@@ -163,6 +163,22 @@ void SequencePoolImpl<FIRST, float>(const framework::LoDTensor &input,
   }
 }
 
+template <>
+void SequencePoolImpl<LAST, float>(const framework::LoDTensor &input,
+                                   framework::LoDTensor *output) {
+  const float *input_ptr = input.data<float>();
+  float *output_ptr = output->mutable_data<float>();
+  const auto &lod = input.lod()[0];
+  int64_t width = input.numel() / input.dims()[0];
+
+  for (int i = 0; i < static_cast<int>(lod.size()) - 1; ++i) {
+    int64_t seq_len = static_cast<int64_t>(lod[i + 1] - lod[i]);
+    const float *in_ptr = input_ptr + seq_len * width;
+    float *out_ptr = output_ptr + i * width;
+    memcpy(out_ptr, in_ptr - width, width * sizeof(float));
+  }
+}
+
 template <typename T>
 class SequencePoolKernel<CPU, T>
     : public framework::OpKernelBase<CPU, SequencePoolParam<CPU>> {
@@ -179,6 +195,8 @@ class SequencePoolKernel<CPU, T>
       SequencePoolImpl<MAX, T>(*input, output);
     } else if (param.pool_type_ == "FIRST") {
       SequencePoolImpl<FIRST, T>(*input, output);
+    } else if (param.pool_type_ == "LAST") {
+      SequencePoolImpl<LAST, T>(*input, output);
     } else if (param.pool_type_ == "SUM") {
       SequencePoolImpl<SUM, T>(*input, output);
     } else {
diff --git a/src/pass/memory_optimize.cpp b/src/pass/memory_optimize.cpp
index cc754491fa..96a610151c 100644
--- a/src/pass/memory_optimize.cpp
+++ b/src/pass/memory_optimize.cpp
@@ -60,6 +60,7 @@ void MemoryOptPass::operator()(const framework::ProgramDesc *program,
     std::stack<VarNode *> empty_var_nodes;
     analysis_nodes_.swap(empty_var_nodes);
 
+    std::vector<VarNode *> fetch_var_nodes;
     for (const auto &op : block->Ops()) {
       DLOG << "op_desc->Type(): " << op->Type();
       for (const auto &outputs : op->GetOutputs()) {
@@ -77,6 +78,9 @@ void MemoryOptPass::operator()(const framework::ProgramDesc *program,
             DLOG << "input: " << input;
             VarNode *node = CreateNode(input);
             analysis_nodes_.push(node);
+            if (op->Type() == "fetch") {
+              fetch_var_nodes.push_back(node);
+            }
           }
         }
       }
@@ -91,6 +95,10 @@ void MemoryOptPass::operator()(const framework::ProgramDesc *program,
       }
     }
 
+    for (const auto &node : fetch_var_nodes) {
+      analysis_nodes_.push(node);
+    }
+
     // apply optimize
     while (!analysis_nodes_.empty()) {
       auto *node = analysis_nodes_.top();
diff --git a/test/net/test_net.cpp b/test/net/test_net.cpp
index 24a5002111..be7c21db7b 100644
--- a/test/net/test_net.cpp
+++ b/test/net/test_net.cpp
@@ -46,6 +46,19 @@ void test(int argc, char *argv[]) {
   }
   arg_index += dim_count;
 
+  bool is_lod = std::stoi(argv[arg_index]) == 1;
+  arg_index++;
+  paddle_mobile::framework::LoD lod{{}};
+  if (is_lod) {
+    int lod_count = std::stoi(argv[arg_index]);
+    arg_index++;
+    for (int i = 0; i < lod_count; i++) {
+      int dim = std::stoi(argv[arg_index + i]);
+      lod[0].push_back(dim);
+    }
+    arg_index += lod_count;
+  }
+
   int var_count = std::stoi(argv[arg_index]);
   arg_index++;
   int sample_step = std::stoi(argv[arg_index]);
@@ -74,15 +87,33 @@ void test(int argc, char *argv[]) {
     }
     in.close();
 
+    paddle_mobile::framework::LoDTensor input_tensor;
+    if (is_lod) {
+      input_tensor.Resize(paddle_mobile::framework::make_ddim(dims));
+      input_tensor.set_lod(lod);
+      auto *tensor_data = input_tensor.mutable_data<float>();
+      for (int i = 0; i < size; i++) {
+        tensor_data[i] = input_data[i];
+      }
+    }
+
     // 预热10次
     for (int i = 0; i < 10; i++) {
-      auto out = paddle_mobile.Predict(input_data, dims);
+      if (is_lod) {
+        auto out = paddle_mobile.Predict(input_tensor);
+      } else {
+        auto out = paddle_mobile.Predict(input_data, dims);
+      }
     }
 
     // 测速
     auto time3 = time();
     for (int i = 0; i < 50; i++) {
-      auto out = paddle_mobile.Predict(input_data, dims);
+      if (is_lod) {
+        auto out = paddle_mobile.Predict(input_tensor);
+      } else {
+        auto out = paddle_mobile.Predict(input_data, dims);
+      }
     }
     auto time4 = time();
     std::cout << "auto-test"
@@ -90,7 +121,11 @@ void test(int argc, char *argv[]) {
               << std::endl;
 
     // 测试正确性
-    auto out = paddle_mobile.Predict(input_data, dims);
+    if (is_lod) {
+      auto out = paddle_mobile.Predict(input_tensor);
+    } else {
+      auto out = paddle_mobile.Predict(input_data, dims);
+    }
     for (auto var_name : var_names) {
       auto out = paddle_mobile.Fetch(var_name);
       auto len = out->numel();
diff --git a/test/test_include.h b/test/test_include.h
index 4728a46933..7f45d6e2c4 100644
--- a/test/test_include.h
+++ b/test/test_include.h
@@ -22,6 +22,7 @@ limitations under the License. */
 #include "common/enforce.h"
 #include "common/log.h"
 #include "executor_for_test.h"
+#include "framework/ddim.h"
 #include "framework/lod_tensor.h"
 #include "framework/operator.h"
 #include "framework/program/block_desc.h"
diff --git a/tools/python/fluidtools/run.py b/tools/python/fluidtools/run.py
index 8eb67b3c38..e7ac827b99 100644
--- a/tools/python/fluidtools/run.py
+++ b/tools/python/fluidtools/run.py
@@ -11,6 +11,7 @@ checked_model_path = "checked_model"
 feed_path = "feeds"
 output_path = "outputs"
 diff_threshold = 0.01
+is_lod = True
 
 np.set_printoptions(linewidth=150)
 
@@ -59,7 +60,7 @@ def load_model(model_path):
 prog, feeds, fetches = load_model(model_path)
 
 # 强制要求所有张量的形状，在model和params中一致，并重新保存模型
-def resave_model():
+def resave_model(feed_kv):
     ops = prog.current_block().ops
     vars = prog.current_block().vars
     # 强制所有var为可持久化
@@ -70,7 +71,7 @@ def resave_model():
         if not v.persistable:
             v.persistable = True
             p_names.append(name)
-    outputs = run_model()
+    outputs = run_model(feed_kv=feed_kv)
     has_found_wrong_shape = False
     # 修正每个var的形状
     for name in vars:
@@ -121,12 +122,14 @@ def save_feed_kv(feed_kv):
 
 last_feed_var_name = None
 last_feed_file_name = None
+last_feed_var_lod = None
 # 加载feed的key-value对
 def load_feed_kv():
     if not os.path.exists(feed_path):
         return None
     global last_feed_var_name
     global last_feed_file_name
+    global last_feed_var_lod
     feed_kv = {}
     pp_yellow(dot + dot + " checking feed info")
     pp_green("feed data is saved into directory 【{}】".format(feed_path), 1)
@@ -146,7 +149,23 @@ def load_feed_kv():
         if len(data) != expected_len:
             return None
         data = data.reshape(feed_shape).astype("float32")
-        feed_kv[feed_name] = data
+        
+        if is_lod:
+            data = data.reshape((1, *feed_shape)).astype("float32")
+            tensor = fluid.LoDTensor()
+            seq_lens = [len(seq) for seq in data]
+            cur_len = 0
+            lod = [cur_len]
+            for l in seq_lens:
+                cur_len += 1
+                lod.append(cur_len)
+            data = data.reshape(feed_shape)
+            tensor.set(data, fluid.CPUPlace())
+            tensor.set_lod([lod])
+            last_feed_var_lod = lod
+            feed_kv[feed_name] = tensor
+        else:
+            feed_kv[feed_name] = data
     return feed_kv
 
 # 运行模型
@@ -204,6 +223,8 @@ def save_all_op_output(feed_kv=None):
             var_name = name
             if "tmp" in name:
                 break
+        if "sequence_pool" in name:
+            continue
         try:
             data = get_var_data(var_name, feed_kv=feed_kv).flatten().tolist()
             sample = tensor_sample(data)
@@ -311,7 +332,7 @@ def main():
     pp_tab("fluid output : {}".format(outputs), 1)
     # 重新保存模型
     pp_yellow(dot + dot + " checking model correctness")
-    resave_model()
+    resave_model(feed_kv=feed_kv)
     # 输出所有中间结果
     pp_yellow(dot + dot + " checking output result of every op")
     save_all_op_output(feed_kv=feed_kv)
@@ -328,6 +349,13 @@ def main():
     args = str(len(last_feed_var_shape))
     for dim in last_feed_var_shape:
         args += " " + str(dim)
+    if is_lod:
+        args += " 1"
+        args += " " + str(len(last_feed_var_lod))
+        for dim in last_feed_var_lod:
+            args += " " + str(dim)
+    else:
+        args += " 0"
     args += " " + str(len(output_var_cache))
     args += " " + str(sample_step)
     for var_name in output_var_cache.keys():
-- 
GitLab