1.add last type sequence pool. 2.enable lod for auto debug tools. (#1700)

e5f08787 · Yanzhan Yang · GitHub · 21c7a2ba · e5f08787 · e5f08787
6 changed file
--- a/src/common/types.h
+++ b/src/common/types.h
@@ -109,6 +109,7 @@ enum PoolingType {
  AVG = 1,
  SUM = 2,
  FIRST = 3,
+  LAST = 4,
 };

 enum PowerMode {

--- a/src/operators/kernel/arm/sequence_pool_kernel.cpp
+++ b/src/operators/kernel/arm/sequence_pool_kernel.cpp
@@ -163,6 +163,22 @@ void SequencePoolImpl<FIRST, float>(const framework::LoDTensor &input,
  }
 }

+template <>
+void SequencePoolImpl<LAST, float>(const framework::LoDTensor &input,
+                                   framework::LoDTensor *output) {
+  const float *input_ptr = input.data<float>();
+  float *output_ptr = output->mutable_data<float>();
+  const auto &lod = input.lod()[0];
+  int64_t width = input.numel() / input.dims()[0];
+
+  for (int i = 0; i < static_cast<int>(lod.size()) - 1; ++i) {
+    int64_t seq_len = static_cast<int64_t>(lod[i + 1] - lod[i]);
+    const float *in_ptr = input_ptr + seq_len * width;
+    float *out_ptr = output_ptr + i * width;
+    memcpy(out_ptr, in_ptr - width, width * sizeof(float));
+  }
+}
+
 template <typename T>
 class SequencePoolKernel<CPU, T>
    : public framework::OpKernelBase<CPU, SequencePoolParam<CPU>> {
@@ -179,6 +195,8 @@ class SequencePoolKernel<CPU, T>
      SequencePoolImpl<MAX, T>(*input, output);
    } else if (param.pool_type_ == "FIRST") {
      SequencePoolImpl<FIRST, T>(*input, output);
+    } else if (param.pool_type_ == "LAST") {
+      SequencePoolImpl<LAST, T>(*input, output);
    } else if (param.pool_type_ == "SUM") {
      SequencePoolImpl<SUM, T>(*input, output);
    } else {

--- a/src/pass/memory_optimize.cpp
+++ b/src/pass/memory_optimize.cpp
@@ -60,6 +60,7 @@ void MemoryOptPass::operator()(const framework::ProgramDesc *program,
    std::stack<VarNode *> empty_var_nodes;
    analysis_nodes_.swap(empty_var_nodes);

+    std::vector<VarNode *> fetch_var_nodes;
    for (const auto &op : block->Ops()) {
      DLOG << "op_desc->Type(): " << op->Type();
      for (const auto &outputs : op->GetOutputs()) {
@@ -77,6 +78,9 @@ void MemoryOptPass::operator()(const framework::ProgramDesc *program,
            DLOG << "input: " << input;
            VarNode *node = CreateNode(input);
            analysis_nodes_.push(node);
+            if (op->Type() == "fetch") {
+              fetch_var_nodes.push_back(node);
+            }
          }
        }
      }
@@ -91,6 +95,10 @@ void MemoryOptPass::operator()(const framework::ProgramDesc *program,
      }
    }

+    for (const auto &node : fetch_var_nodes) {
+      analysis_nodes_.push(node);
+    }
+
    // apply optimize
    while (!analysis_nodes_.empty()) {
      auto *node = analysis_nodes_.top();

--- a/test/net/test_net.cpp
+++ b/test/net/test_net.cpp
@@ -46,6 +46,19 @@ void test(int argc, char *argv[]) {
  }
  arg_index += dim_count;

+  bool is_lod = std::stoi(argv[arg_index]) == 1;
+  arg_index++;
+  paddle_mobile::framework::LoD lod{{}};
+  if (is_lod) {
+    int lod_count = std::stoi(argv[arg_index]);
+    arg_index++;
+    for (int i = 0; i < lod_count; i++) {
+      int dim = std::stoi(argv[arg_index + i]);
+      lod[0].push_back(dim);
+    }
+    arg_index += lod_count;
+  }
+
  int var_count = std::stoi(argv[arg_index]);
  arg_index++;
  int sample_step = std::stoi(argv[arg_index]);
@@ -74,23 +87,45 @@ void test(int argc, char *argv[]) {
    }
    in.close();

+    paddle_mobile::framework::LoDTensor input_tensor;
+    if (is_lod) {
+      input_tensor.Resize(paddle_mobile::framework::make_ddim(dims));
+      input_tensor.set_lod(lod);
+      auto *tensor_data = input_tensor.mutable_data<float>();
+      for (int i = 0; i < size; i++) {
+        tensor_data[i] = input_data[i];
+      }
+    }
+
    // 预热10次
    for (int i = 0; i < 10; i++) {
+      if (is_lod) {
+        auto out = paddle_mobile.Predict(input_tensor);
+      } else {
        auto out = paddle_mobile.Predict(input_data, dims);
      }
+    }

    // 测速
    auto time3 = time();
    for (int i = 0; i < 50; i++) {
+      if (is_lod) {
+        auto out = paddle_mobile.Predict(input_tensor);
+      } else {
        auto out = paddle_mobile.Predict(input_data, dims);
      }
+    }
    auto time4 = time();
    std::cout << "auto-test"
              << " predict-time-cost " << time_diff(time3, time4) / 50 << "ms"
              << std::endl;

    // 测试正确性
+    if (is_lod) {
+      auto out = paddle_mobile.Predict(input_tensor);
+    } else {
      auto out = paddle_mobile.Predict(input_data, dims);
+    }
    for (auto var_name : var_names) {
      auto out = paddle_mobile.Fetch(var_name);
      auto len = out->numel();

--- a/test/test_include.h
+++ b/test/test_include.h
@@ -22,6 +22,7 @@ limitations under the License. */
 #include "common/enforce.h"
 #include "common/log.h"
 #include "executor_for_test.h"
+#include "framework/ddim.h"
 #include "framework/lod_tensor.h"
 #include "framework/operator.h"
 #include "framework/program/block_desc.h"

--- a/tools/python/fluidtools/run.py
+++ b/tools/python/fluidtools/run.py
@@ -11,6 +11,7 @@ checked_model_path = "checked_model"
 feed_path = "feeds"
 output_path = "outputs"
 diff_threshold = 0.01
+is_lod = True

 np.set_printoptions(linewidth=150)

@@ -59,7 +60,7 @@ def load_model(model_path):
 prog, feeds, fetches = load_model(model_path)

 # 强制要求所有张量的形状，在model和params中一致，并重新保存模型
-def resave_model():
+def resave_model(feed_kv):
    ops = prog.current_block().ops
    vars = prog.current_block().vars
    # 强制所有var为可持久化
@@ -70,7 +71,7 @@ def resave_model():
        if not v.persistable:
            v.persistable = True
            p_names.append(name)
-    outputs = run_model()
+    outputs = run_model(feed_kv=feed_kv)
    has_found_wrong_shape = False
    # 修正每个var的形状
    for name in vars:
@@ -121,12 +122,14 @@ def save_feed_kv(feed_kv):

 last_feed_var_name = None
 last_feed_file_name = None
+last_feed_var_lod = None
 # 加载feed的key-value对
 def load_feed_kv():
    if not os.path.exists(feed_path):
        return None
    global last_feed_var_name
    global last_feed_file_name
+    global last_feed_var_lod
    feed_kv = {}
    pp_yellow(dot + dot + " checking feed info")
    pp_green("feed data is saved into directory 【{}】".format(feed_path), 1)
@@ -146,6 +149,22 @@ def load_feed_kv():
        if len(data) != expected_len:
            return None
        data = data.reshape(feed_shape).astype("float32")
+        
+        if is_lod:
+            data = data.reshape((1, *feed_shape)).astype("float32")
+            tensor = fluid.LoDTensor()
+            seq_lens = [len(seq) for seq in data]
+            cur_len = 0
+            lod = [cur_len]
+            for l in seq_lens:
+                cur_len += 1
+                lod.append(cur_len)
+            data = data.reshape(feed_shape)
+            tensor.set(data, fluid.CPUPlace())
+            tensor.set_lod([lod])
+            last_feed_var_lod = lod
+            feed_kv[feed_name] = tensor
+        else:
            feed_kv[feed_name] = data
    return feed_kv

@@ -204,6 +223,8 @@ def save_all_op_output(feed_kv=None):
            var_name = name
            if "tmp" in name:
                break
+        if "sequence_pool" in name:
+            continue
        try:
            data = get_var_data(var_name, feed_kv=feed_kv).flatten().tolist()
            sample = tensor_sample(data)
@@ -311,7 +332,7 @@ def main():
    pp_tab("fluid output : {}".format(outputs), 1)
    # 重新保存模型
    pp_yellow(dot + dot + " checking model correctness")
-    resave_model()
+    resave_model(feed_kv=feed_kv)
    # 输出所有中间结果
    pp_yellow(dot + dot + " checking output result of every op")
    save_all_op_output(feed_kv=feed_kv)
@@ -328,6 +349,13 @@ def main():
    args = str(len(last_feed_var_shape))
    for dim in last_feed_var_shape:
        args += " " + str(dim)
+    if is_lod:
+        args += " 1"
+        args += " " + str(len(last_feed_var_lod))
+        for dim in last_feed_var_lod:
+            args += " " + str(dim)
+    else:
+        args += " 0"
    args += " " + str(len(output_var_cache))
    args += " " + str(sample_step)
    for var_name in output_var_cache.keys():