From 196267f8c83712477bebaa729a31faf831b5fa78 Mon Sep 17 00:00:00 2001
From: zhangyang <zhangyang49@baidu.com>
Date: Sat, 15 Sep 2018 14:56:58 +0800
Subject: [PATCH] implement predict_from_to for FPGA track

---
 src/io/executor.cpp                       | 63 +++++++++++++++++++++++
 src/io/executor.h                         |  9 ++++
 src/io/paddle_mobile.cpp                  | 27 ++++++++++
 src/io/paddle_mobile.h                    |  9 ++++
 src/operators/kernel/fpga/pool_kernel.cpp |  2 +-
 test/net/test_resnet.cpp                  | 15 +++++-
 6 files changed, 122 insertions(+), 3 deletions(-)
diff --git a/src/io/executor.cpp b/src/io/executor.cpp
index 562ba92adb..7543a9f7a4 100644
--- a/src/io/executor.cpp
+++ b/src/io/executor.cpp
@@ -93,6 +93,7 @@ Executor<Dtype, P>::Executor(const framework::Program<Dtype> p, int batch_size,
       depManager[i].analysisDep(ops_of_block_[*block_desc.get()]);
 #endif
     }
+    DLOG << "Total " << ops.size() << " ops have been created ";
   }
   if (program_.combined) {
     InitCombineMemory();
@@ -643,6 +644,68 @@ std::vector<typename Executor<Dtype, P>::Ptype> Executor<Dtype, P>::Predict(
   return result_vector;
 }
 
+#ifdef PADDLE_MOBILE_FPGA
+template <typename Dtype, Precision P>
+void Executor<Dtype, P>::FeedData(const framework::Tensor &t) {
+  framework::Variable *g_feed_value = program_.scope->Var("feed");
+  framework::Tensor *feed_tensor =
+      g_feed_value->GetMutable<framework::LoDTensor>();
+  feed_tensor->Resize(t.dims());
+  feed_tensor->ShareDataWith(t);
+};
+
+template <typename Dtype, Precision P>
+std::shared_ptr<framework::Tensor> Executor<Dtype, P>::FetchResult() {
+  std::shared_ptr<framework::BlockDesc> to_predict_block =
+      to_predict_program_->Block(0);
+  auto &ops = ops_of_block_[*to_predict_block.get()];
+  auto last_op = ops.rbegin();
+  auto output_map = (*last_op)->Outputs();
+  std::vector<std::string> out_keys = (*last_op)->GetOutKeys();
+  PADDLE_MOBILE_ENFORCE(!out_keys.empty(), "the last op contains no output");
+  auto *output_tensor = framework::GetVarValue<framework::LoDTensor>(
+      out_keys[0], output_map, *(program_.scope));
+  return std::make_shared<framework::Tensor>(framework::Tensor(*output_tensor));
+};
+
+template <typename Dtype, Precision P>
+void Executor<Dtype, P>::Predict_From_To(int start, int end) {
+  std::shared_ptr<framework::BlockDesc> to_predict_block =
+      to_predict_program_->Block(0);
+  auto &ops = ops_of_block_[*to_predict_block.get()];
+  end = end < 0 ? (int)ops.size() : end;
+  PADDLE_MOBILE_ENFORCE(start >= 0 && start < end && end <= ops.size(),
+                        "start or end parameter is wrong");
+
+#ifdef PADDLE_MOBILE_PROFILE
+  std::vector<ProfInfo> profile(ops.size());
+#endif
+  for (int i = start; i < end; i++) {
+#ifdef PADDLE_MOBILE_PROFILE
+    struct timespec ts;
+    clock_gettime(CLOCK_MONOTONIC, &ts);
+    profile[i].runBegin = (uint64_t)ts.tv_sec * 1e9 + ts.tv_nsec;
+#endif
+    ops[i]->Run();
+
+#ifdef PADDLE_MOBILE_PROFILE
+    clock_gettime(CLOCK_MONOTONIC, &ts);
+    profile[i].runEnd = (uint64_t)ts.tv_sec * 1e9 + ts.tv_nsec;
+#endif
+  }
+};
+
+template <typename Dtype, Precision P>
+void Executor<Dtype, P>::Predict_From(int start) {
+  Predict_From_To(start);
+};
+
+template <typename Dtype, Precision P>
+void Executor<Dtype, P>::Predict_To(int end) {
+  Predict_From_To(0, end);
+};
+#endif
+
 template class Executor<CPU, Precision::FP32>;
 template class Executor<GPU_MALI, Precision::FP32>;
 template class Executor<FPGA, Precision::FP32>;
diff --git a/src/io/executor.h b/src/io/executor.h
index 6074942c18..f1f3f9da7c 100644
--- a/src/io/executor.h
+++ b/src/io/executor.h
@@ -92,6 +92,15 @@ class Executor {
   bool varInputMemory(const std::shared_ptr<framework::VarDesc> &var_desc,
                       framework::Variable *var,
                       framework::LoDTensor *tensor) const;
+
+#ifdef PADDLE_MOBILE_FPGA
+ public:
+  void FeedData(const framework::Tensor &t);
+  std::shared_ptr<framework::Tensor> FetchResult();
+  void Predict_From_To(int start = 0, int end = -1);
+  void Predict_From(int start);
+  void Predict_To(int end);
+#endif
 };
 
 }  // namespace paddle_mobile
diff --git a/src/io/paddle_mobile.cpp b/src/io/paddle_mobile.cpp
index a69af82427..9056bac206 100644
--- a/src/io/paddle_mobile.cpp
+++ b/src/io/paddle_mobile.cpp
@@ -124,6 +124,33 @@ PaddleMobile<Dtype, P>::~PaddleMobile() {
   loader_ = nullptr;
 }
 
+#ifdef PADDLE_MOBILE_FPGA
+template <typename Dtype, Precision P>
+void PaddleMobile<Dtype, P>::FeedData(const framework::Tensor &t) {
+  return executor_->FeedData(t);
+};
+
+template <typename Dtype, Precision P>
+std::shared_ptr<framework::Tensor> PaddleMobile<Dtype, P>::FetchResult() {
+  return executor_->FetchResult();
+};
+
+template <typename Dtype, Precision P>
+void PaddleMobile<Dtype, P>::Predict_From_To(int start, int end) {
+  executor_->Predict_From_To(start, end);
+};
+
+template <typename Dtype, Precision P>
+void PaddleMobile<Dtype, P>::Predict_From(int start) {
+  executor_->Predict_From(start);
+};
+
+template <typename Dtype, Precision P>
+void PaddleMobile<Dtype, P>::Predict_To(int end) {
+  executor_->Predict_To(end);
+};
+#endif
+
 template class PaddleMobile<CPU, Precision::FP32>;
 
 template class PaddleMobile<FPGA, Precision::FP32>;
diff --git a/src/io/paddle_mobile.h b/src/io/paddle_mobile.h
index 2ea8614cc7..66d70b87f2 100644
--- a/src/io/paddle_mobile.h
+++ b/src/io/paddle_mobile.h
@@ -92,6 +92,15 @@ class PaddleMobile {
  private:
   std::shared_ptr<Loader<Dtype, P>> loader_;
   std::shared_ptr<Executor<Dtype, P>> executor_;
+
+#ifdef PADDLE_MOBILE_FPGA
+ public:
+  void FeedData(const framework::Tensor &t);
+  std::shared_ptr<framework::Tensor> FetchResult();
+  void Predict_From_To(int start = 0, int end = -1);
+  void Predict_From(int start);
+  void Predict_To(int end);
+#endif
 };
 
 }  // namespace paddle_mobile
diff --git a/src/operators/kernel/fpga/pool_kernel.cpp b/src/operators/kernel/fpga/pool_kernel.cpp
index 39c3977ce1..96599f3059 100644
--- a/src/operators/kernel/fpga/pool_kernel.cpp
+++ b/src/operators/kernel/fpga/pool_kernel.cpp
@@ -39,7 +39,7 @@ bool PoolKernel<FPGA, float>::Init(PoolParam<FPGA> *param) {
   poolArgs.image.pad_width = (uint32_t)paddings[1];
   poolArgs.image.scale_address = input->scale;
   poolArgs.output.address = output_ptr;
-  poolArgs.output.scale_address = input->scale;
+  poolArgs.output.scale_address = output->scale;
   poolArgs.kernel.height = (uint32_t)ksize[0];
   poolArgs.kernel.width = (uint32_t)ksize[1];
   poolArgs.kernel.stride_h = (uint32_t)strides[0];
diff --git a/test/net/test_resnet.cpp b/test/net/test_resnet.cpp
index f890646466..dfc40f86c4 100644
--- a/test/net/test_resnet.cpp
+++ b/test/net/test_resnet.cpp
@@ -36,7 +36,8 @@ int main() {
 
     std::vector<float> input(input_tensor.data<float>(),
                              input_tensor.data<float>() + input_tensor.numel());
-    // 预热十次
+#ifndef PADDLE_MOBILE_FPGA
+    //   预热十次
     for (int i = 0; i < 10; ++i) {
       paddle_mobile.Predict(input, dims);
     }
@@ -47,7 +48,17 @@ int main() {
     auto time4 = time();
     std::cout << "predict cost :" << time_diff(time3, time4) << "ms"
               << std::endl;
-  }
 
+#else
+    auto time3 = time();
+    paddle_mobile.FeedData(input_tensor);
+    paddle_mobile.Predict_To(10);
+    paddle_mobile.Predict_From(10);
+    paddle_mobile.FetchResult();
+    auto time4 = time();
+    std::cout << "predict cost :" << time_diff(time3, time4) << "ms"
+              << std::endl;
+#endif
+  }
   return 0;
 }
-- 
GitLab