Add parameter for FetchResutl() for FPGA track

95f8b158 · zhangyang · 041a31a2 · 95f8b158 · 95f8b158 · 95f8b158
7 changed file
--- a/src/fpga/api.h
+++ b/src/fpga/api.h
@@ -20,8 +20,6 @@ limitations under the License. */
 #include <limits>
 #include "framework/tensor.h"
-// memory management;
 namespace paddle_mobile {
 namespace fpga {
@@ -45,9 +43,6 @@ struct MemoryCopyArgs {
  size_t size;
 };
-/**
-Conv and Pooling kernel
-*/
 struct KernelArgs {
  uint32_t width;
  uint32_t height;
@@ -109,7 +104,6 @@ struct PoolingArgs {
  struct ImageOutputArgs output;
 };
-// elementwise add arguments
 struct EWAddArgs {
  bool relu_enabled;

--- a/src/framework/tensor.h
+++ b/src/framework/tensor.h
@@ -289,12 +289,8 @@ class Tensor {
    virtual std::type_index type() const { return type_; }
    virtual void set_type(std::type_index type) { type_ = type; }
-#ifndef PADDLE_MOBILE_FPGA
-    /*! the pointer of memory block. */
    std::unique_ptr<uint8_t, memory::PODDeleter<uint8_t>> ptr_;
-#else
-    std::shared_ptr<uint8_t> ptr_;
-#endif
    /*! the size of memory block. */
    size_t size_;

--- a/src/io/executor.cpp
+++ b/src/io/executor.cpp
@@ -662,13 +662,15 @@ void Executor<Dtype, P>::FeedData(const framework::Tensor &t) {
 };
 template <typename Dtype, Precision P>
-std::shared_ptr<framework::Tensor> Executor<Dtype, P>::FetchResult() {
+std::shared_ptr<framework::Tensor> Executor<Dtype, P>::FetchResult(int id) {
  std::shared_ptr<framework::BlockDesc> to_predict_block =
      to_predict_program_->Block(0);
  auto &ops = ops_of_block_[*to_predict_block.get()];
-  auto last_op = ops.rbegin();
-  auto output_map = (*last_op)->Outputs();
+  PADDLE_MOBILE_ENFORCE(id < ops.size(), "Index out of range");
-  std::vector<std::string> out_keys = (*last_op)->GetOutKeys();
+  auto last_op = id < 0 ? ops[ops.size() - 1] : ops[id];
+  auto output_map = last_op->Outputs();
+  std::vector<std::string> out_keys = last_op->GetOutKeys();
  PADDLE_MOBILE_ENFORCE(!out_keys.empty(), "the last op contains no output");
  auto *output_tensor = framework::GetVarValue<framework::LoDTensor>(
      out_keys[0], output_map, *(program_.scope));

--- a/src/io/executor.h
+++ b/src/io/executor.h
@@ -99,7 +99,7 @@ class Executor {
 public:
  void InjectVariable(const framework::Tensor &t, string var_name);
  void FeedData(const framework::Tensor &t);
-  std::shared_ptr<framework::Tensor> FetchResult();
+  std::shared_ptr<framework::Tensor> FetchResult(int id = -1);
  void Predict_From_To(int start = 0, int end = -1);
  void Predict_From(int start);
  void Predict_To(int end);

--- a/src/io/paddle_mobile.cpp
+++ b/src/io/paddle_mobile.cpp
@@ -138,8 +138,8 @@ void PaddleMobile<Dtype, P>::FeedData(const framework::Tensor &t) {
 };
 template <typename Dtype, Precision P>
-std::shared_ptr<framework::Tensor> PaddleMobile<Dtype, P>::FetchResult() {
+std::shared_ptr<framework::Tensor> PaddleMobile<Dtype, P>::FetchResult(int id) {
-  return executor_->FetchResult();
+  return executor_->FetchResult(id);
 };
 template <typename Dtype, Precision P>

--- a/src/io/paddle_mobile.h
+++ b/src/io/paddle_mobile.h
@@ -97,7 +97,7 @@ class PaddleMobile {
 public:
  void InjectVariable(const framework::Tensor &t, string var_name);
  void FeedData(const framework::Tensor &t);
-  std::shared_ptr<framework::Tensor> FetchResult();
+  std::shared_ptr<framework::Tensor> FetchResult(int id = -1);
  void Predict_From_To(int start = 0, int end = -1);
  void Predict_From(int start);
  void Predict_To(int end);

--- a/test/net/test_resnet.cpp
+++ b/test/net/test_resnet.cpp
@@ -54,7 +54,13 @@ int main() {
    paddle_mobile.FeedData(input_tensor);
    paddle_mobile.Predict_To(10);
    paddle_mobile.Predict_From(10);
-    paddle_mobile.FetchResult();
+    auto tensor_ptr = paddle_mobile.FetchResult(9);
+    std::cout << "Tensor element number for op[9]: " << tensor_ptr->numel()
+              << std::endl;
+    auto result_ptr = paddle_mobile.FetchResult();
+    std::cout << "Result tensor element number: " << result_ptr->numel()
+              << std::endl;
    auto time4 = time();
    std::cout << "predict cost :" << time_diff(time3, time4) << "ms"
              << std::endl;