From 3cb6c0a0599730e2ba5de6dbfcb8fa72cedec78b Mon Sep 17 00:00:00 2001
From: liu zhengxi <380185688@qq.com>
Date: Wed, 20 Nov 2019 14:18:53 +0800
Subject: [PATCH] Fix the CAPI ZeroCopy shape error and reuse the code to get
 output (#21240)

* fix the CAPI ZeroCopy shape error and reconstruct the output obtain

* use an anonymous namespace to cover the functor

* fix unit tests because of the output of typeid(T).name() is different from linux and windows, test=develop
---
 paddle/fluid/inference/capi/c_api.h           |   2 +-
 paddle/fluid/inference/capi/pd_predictor.cc   | 110 +++++++++++-------
 .../fluid/inference/tests/api/CMakeLists.txt  |   6 +-
 .../tests/api/analyzer_capi_int_tester.cc     |  42 +++----
 .../tests/api/analyzer_capi_tester.cc         |  50 ++++----
 5 files changed, 111 insertions(+), 99 deletions(-)
diff --git a/paddle/fluid/inference/capi/c_api.h b/paddle/fluid/inference/capi/c_api.h
index b5ef410aad..7a36587398 100644
--- a/paddle/fluid/inference/capi/c_api.h
+++ b/paddle/fluid/inference/capi/c_api.h
@@ -104,7 +104,7 @@ PADDLE_CAPI_EXPORT extern bool PD_PredictorRun(const PD_AnalysisConfig* config,
 
 PADDLE_CAPI_EXPORT extern bool PD_PredictorZeroCopyRun(
     const PD_AnalysisConfig* config, PD_ZeroCopyData* inputs, int in_size,
-    PD_ZeroCopyData** output, int** out_size);
+    PD_ZeroCopyData** output, int* out_size);
 
 // AnalysisConfig
 enum Precision { kFloat32 = 0, kInt8, kHalf };
diff --git a/paddle/fluid/inference/capi/pd_predictor.cc b/paddle/fluid/inference/capi/pd_predictor.cc
index 51f8237c95..d4d5e9c35f 100644
--- a/paddle/fluid/inference/capi/pd_predictor.cc
+++ b/paddle/fluid/inference/capi/pd_predictor.cc
@@ -14,6 +14,7 @@
 
 #include <algorithm>
 #include <map>
+#include <memory>
 #include <numeric>
 #include <vector>
 #include "paddle/fluid/inference/capi/c_api.h"
@@ -23,12 +24,61 @@ using paddle::ConvertToPaddleDType;
 using paddle::ConvertToPDDataType;
 using paddle::ConvertToACPrecision;
 
-extern "C" {
+namespace {
+#define _DataTypeHelper_(CALLBACK, CPP_TYPE, PD_TYPE) \
+  CALLBACK(CPP_TYPE, PD_DataType::PD_TYPE);
+
+#define _DataType_(CALLBACK)                     \
+  _DataTypeHelper_(CALLBACK, float, PD_FLOAT32); \
+  _DataTypeHelper_(CALLBACK, int32_t, PD_INT32); \
+  _DataTypeHelper_(CALLBACK, int64_t, PD_INT64); \
+  _DataTypeHelper_(CALLBACK, uint8_t, PD_UINT8);
+
+template <typename Visitor>
+inline void VisitDataType(PD_DataType type, Visitor visitor) {
+#define VisitDataTypeCallback(CPP_TYPE, PD_TYPE) \
+  do {                                           \
+    if (type == PD_TYPE) {                       \
+      visitor.template apply<CPP_TYPE>();        \
+      return;                                    \
+    }                                            \
+  } while (0)
+
+  _DataType_(VisitDataTypeCallback);
+#undef VisitDataTypeCallback
+  PADDLE_THROW_ERROR("Unsupported data type. ");
+}
+
+struct PD_ZeroCopyFunctor {
+  PD_ZeroCopyData* output_i;
+  paddle::ZeroCopyTensor* output_t;
 
+  PD_ZeroCopyFunctor(PD_ZeroCopyData* output_i_,
+                     paddle::ZeroCopyTensor* output_t_)
+      : output_i(output_i_), output_t(output_t_) {}
+
+  template <typename OutT>
+  void apply() {
+    std::vector<OutT> out_data;
+    int out_num =
+        std::accumulate(output_i->shape, output_i->shape + output_i->shape_size,
+                        1, std::multiplies<int>());
+    out_data.resize(out_num);
+    output_t->copy_to_cpu(out_data.data());
+    output_i->data = reinterpret_cast<void*>(malloc(out_num * sizeof(OutT)));
+    memmove(static_cast<OutT*>(output_i->data), out_data.data(),
+            out_num * sizeof(OutT));
+  }
+};
+
+}  // namespace
+
+extern "C" {
 bool PD_PredictorRun(const PD_AnalysisConfig* config, PD_Tensor* inputs,
                      int in_size, PD_Tensor** output_data, int* out_size,
                      int batch_size) {
   PADDLE_ENFORCE_NOT_NULL(config);
+  VLOG(3) << "Predoctor: PD_PredictorRun. ";
   static std::map<std::string, std::unique_ptr<paddle::PaddlePredictor>>
       predictors;
   if (!predictors.count(config->config.model_dir())) {
@@ -41,6 +91,7 @@ bool PD_PredictorRun(const PD_AnalysisConfig* config, PD_Tensor* inputs,
     in.emplace_back(inputs->tensor);
   }
   std::vector<paddle::PaddleTensor> out;
+  VLOG(3) << "Run predictor in CAPI encapsulation. ";
   if (predictor->Run(in, &out, batch_size)) {
     int osize = out.size();
     *output_data = new PD_Tensor[osize];
@@ -55,9 +106,15 @@ bool PD_PredictorRun(const PD_AnalysisConfig* config, PD_Tensor* inputs,
 
 bool PD_PredictorZeroCopyRun(const PD_AnalysisConfig* config,
                              PD_ZeroCopyData* inputs, int in_size,
-                             PD_ZeroCopyData** output, int** out_size) {
+                             PD_ZeroCopyData** output, int* out_size) {
   PADDLE_ENFORCE_NOT_NULL(config);
-  auto predictor = paddle::CreatePaddlePredictor(config->config);
+  static std::map<std::string, std::unique_ptr<paddle::PaddlePredictor>>
+      predictors;
+  if (!predictors.count(config->config.model_dir())) {
+    predictors[config->config.model_dir()] =
+        paddle::CreatePaddlePredictor(config->config);
+  }
+  auto& predictor = predictors[config->config.model_dir()];
   auto input_names = predictor->GetInputNames();
   VLOG(3) << "The inputs' size is " << input_names.size();
   PADDLE_ENFORCE_EQ(
@@ -87,13 +144,14 @@ bool PD_PredictorZeroCopyRun(const PD_AnalysisConfig* config,
         break;
     }
   }
+  VLOG(3) << "Run ZeroCopyRun() in CAPI encapsulation. ";
   CHECK(predictor->ZeroCopyRun());
   auto output_names = predictor->GetOutputNames();
   int osize = output_names.size();
-  *out_size = &osize;
+  *out_size = osize;
   *output = new PD_ZeroCopyData[osize];
   VLOG(3) << "The output size is " << osize;
-  for (int i = 0; i < osize; ++i) {
+  for (int i = 0; i < *out_size; ++i) {
     auto& output_i = (*output)[i];
     output_i.name = new char[output_names[i].length() + 1];
     snprintf(output_i.name, output_names[i].length() + 1, "%s",
@@ -102,45 +160,11 @@ bool PD_PredictorZeroCopyRun(const PD_AnalysisConfig* config,
     output_i.dtype = ConvertToPDDataType(output_t->type());
     std::vector<int> output_shape = output_t->shape();
     output_i.shape = new int[output_shape.size()];
-    output_i.shape = output_shape.data();
+    memmove(output_i.shape, output_shape.data(),
+            output_shape.size() * sizeof(int));
     output_i.shape_size = output_shape.size();
-    switch (output_i.dtype) {
-      case PD_FLOAT32: {
-        std::vector<float> out_data;
-        int out_num = std::accumulate(output_shape.begin(), output_shape.end(),
-                                      1, std::multiplies<int>());
-        out_data.resize(out_num);
-        output_t->copy_to_cpu(out_data.data());
-        output_i.data = static_cast<void*>(out_data.data());
-      } break;
-      case PD_INT32: {
-        std::vector<int32_t> out_data;
-        int out_num = std::accumulate(output_shape.begin(), output_shape.end(),
-                                      1, std::multiplies<int>());
-        out_data.resize(out_num);
-        output_t->copy_to_cpu(out_data.data());
-        output_i.data = static_cast<void*>(out_data.data());
-      } break;
-      case PD_INT64: {
-        std::vector<int64_t> out_data;
-        int out_num = std::accumulate(output_shape.begin(), output_shape.end(),
-                                      1, std::multiplies<int>());
-        out_data.resize(out_num);
-        output_t->copy_to_cpu(out_data.data());
-        output_i.data = static_cast<void*>(out_data.data());
-      } break;
-      case PD_UINT8: {
-        std::vector<uint8_t> out_data;
-        int out_num = std::accumulate(output_shape.begin(), output_shape.end(),
-                                      1, std::multiplies<int>());
-        out_data.resize(out_num);
-        output_t->copy_to_cpu(out_data.data());
-        output_i.data = static_cast<void*>(out_data.data());
-      } break;
-      default:
-        CHECK(false) << "Unsupport data type.";
-        break;
-    }
+    VisitDataType(output_i.dtype,
+                  PD_ZeroCopyFunctor(&output_i, std::move(output_t.get())));
   }
   return true;
 }
diff --git a/paddle/fluid/inference/tests/api/CMakeLists.txt b/paddle/fluid/inference/tests/api/CMakeLists.txt
index 715350c52f..b4fc327d67 100644
--- a/paddle/fluid/inference/tests/api/CMakeLists.txt
+++ b/paddle/fluid/inference/tests/api/CMakeLists.txt
@@ -313,13 +313,9 @@ if(WITH_GPU AND TENSORRT_FOUND)
             ARGS --infer_model=${TRT_MODEL_QUANT_RESNET_DIR})
 endif()
 
-set(CAPI_MODEL_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/capi_tests_models")
-if (NOT EXISTS ${CAPI_MODEL_INSTALL_DIR})
-    inference_download_and_uncompress(${CAPI_MODEL_INSTALL_DIR} ${INFERENCE_URL}/tensorrt_test "trt_inference_test_models.tar.gz")
-endif()
 inference_analysis_test(test_analyzer_capi SRCS analyzer_capi_tester.cc
             EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} paddle_fluid_c
-            ARGS --infer_model=${CAPI_MODEL_INSTALL_DIR}/trt_inference_test_models)
+            ARGS --infer_model=${RESNET50_MODEL_DIR}/model)
 
 inference_analysis_test(test_analyzer_capi_pd_tensor SRCS analyzer_capi_pd_tensor_tester.cc
             EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} paddle_fluid_c
diff --git a/paddle/fluid/inference/tests/api/analyzer_capi_int_tester.cc b/paddle/fluid/inference/tests/api/analyzer_capi_int_tester.cc
index 58d0053662..06d56ec6c9 100644
--- a/paddle/fluid/inference/tests/api/analyzer_capi_int_tester.cc
+++ b/paddle/fluid/inference/tests/api/analyzer_capi_int_tester.cc
@@ -18,7 +18,6 @@ limitations under the License. */
 #include <fstream>
 #include <iostream>
 #include <string>
-#include <typeinfo>
 #include <vector>
 #include "paddle/fluid/inference/capi/c_api.h"
 #include "paddle/fluid/inference/tests/api/tester_helper.h"
@@ -27,7 +26,6 @@ namespace paddle {
 namespace inference {
 namespace analysis {
 
-template <typename T>
 void zero_copy_run() {
   std::string model_dir = FLAGS_infer_model;
   PD_AnalysisConfig *config = PD_NewAnalysisConfig();
@@ -46,26 +44,15 @@ void zero_copy_run() {
   const int channels = 3;
   const int height = 224;
   const int width = 224;
-  T input[batch_size * channels * height * width] = {0};
+  float input[batch_size * channels * height * width] = {0};
   int shape[4] = {batch_size, channels, height, width};
   int shape_size = 4;
   int in_size = 2;
-  int *out_size;
+  int out_size;
   PD_ZeroCopyData *inputs = new PD_ZeroCopyData[2];
-  PD_ZeroCopyData *outputs = new PD_ZeroCopyData;
+  PD_ZeroCopyData *outputs = nullptr;
   inputs[0].data = static_cast<void *>(input);
-  std::string nm = typeid(T).name();
-  if ("f" == nm) {
-    inputs[0].dtype = PD_FLOAT32;
-  } else if ("i" == nm) {
-    inputs[0].dtype = PD_INT32;
-  } else if ("x" == nm) {
-    inputs[0].dtype = PD_INT64;
-  } else if ("h" == nm) {
-    inputs[0].dtype = PD_UINT8;
-  } else {
-    CHECK(false) << "Unsupport dtype. ";
-  }
+  inputs[0].dtype = PD_FLOAT32;
   inputs[0].name = new char[6];
   inputs[0].name[0] = 'i';
   inputs[0].name[1] = 'm';
@@ -94,15 +81,24 @@ void zero_copy_run() {
 
   PD_PredictorZeroCopyRun(config, inputs, in_size, &outputs, &out_size);
 
+  LOG(INFO) << "output size is: " << out_size;
   LOG(INFO) << outputs[0].name;
-  LOG(INFO) << outputs[0].shape_size;
+  for (int j = 0; j < out_size; ++j) {
+    LOG(INFO) << "output[" << j
+              << "]'s shape_size is: " << outputs[j].shape_size;
+    for (int i = 0; i < outputs[0].shape_size; ++i) {
+      LOG(INFO) << "output[" << j << "]'s shape is: " << outputs[j].shape[i];
+    }
+    LOG(INFO) << "output[" << j
+              << "]'s DATA is: " << *(static_cast<float *>(outputs[j].data));
+  }
+  delete[] outputs;
+  delete[] inputs;
 }
 
-TEST(PD_ZeroCopyRun, zero_copy_run) {
-  // zero_copy_run<int32_t>();
-  // zero_copy_run<int64_t>();
-  zero_copy_run<float>();
-}
+#ifdef PADDLE_WITH_MKLDNN
+TEST(PD_ZeroCopyRun, zero_copy_run) { zero_copy_run(); }
+#endif
 
 }  // namespace analysis
 }  // namespace inference
diff --git a/paddle/fluid/inference/tests/api/analyzer_capi_tester.cc b/paddle/fluid/inference/tests/api/analyzer_capi_tester.cc
index 7aa85cec14..ee4725f1c5 100644
--- a/paddle/fluid/inference/tests/api/analyzer_capi_tester.cc
+++ b/paddle/fluid/inference/tests/api/analyzer_capi_tester.cc
@@ -18,7 +18,6 @@ limitations under the License. */
 #include <fstream>
 #include <iostream>
 #include <string>
-#include <typeinfo>
 #include <vector>
 #include "paddle/fluid/inference/capi/c_api.h"
 #include "paddle/fluid/inference/tests/api/tester_helper.h"
@@ -27,16 +26,17 @@ namespace paddle {
 namespace inference {
 namespace analysis {
 
-template <typename T>
 void zero_copy_run() {
-  std::string model_dir = FLAGS_infer_model + "/mobilenet";
+  std::string model_dir = FLAGS_infer_model;
+  std::string prog_file = model_dir + "/model";
+  std::string params_file = model_dir + "/params";
   PD_AnalysisConfig *config = PD_NewAnalysisConfig();
   PD_DisableGpu(config);
   PD_SetCpuMathLibraryNumThreads(config, 10);
   PD_SwitchUseFeedFetchOps(config, false);
   PD_SwitchSpecifyInputNames(config, true);
   PD_SwitchIrDebug(config, true);
-  PD_SetModel(config, model_dir.c_str());  //, params_file1.c_str());
+  PD_SetModel(config, prog_file.c_str(), params_file.c_str());
   bool use_feed_fetch = PD_UseFeedFetchOpsEnabled(config);
   CHECK(!use_feed_fetch) << "NO";
   bool specify_input_names = PD_SpecifyInputName(config);
@@ -44,44 +44,40 @@ void zero_copy_run() {
 
   const int batch_size = 1;
   const int channels = 3;
-  const int height = 224;
-  const int width = 224;
-  T input[batch_size * channels * height * width] = {0};
+  const int height = 318;
+  const int width = 318;
+  float input[batch_size * channels * height * width] = {0};
 
   int shape[4] = {batch_size, channels, height, width};
   int shape_size = 4;
   int in_size = 1;
-  int *out_size;
+  int out_size;
   PD_ZeroCopyData *inputs = new PD_ZeroCopyData;
   PD_ZeroCopyData *outputs = new PD_ZeroCopyData;
   inputs->data = static_cast<void *>(input);
-  std::string nm = typeid(T).name();
-  if ("f" == nm) {
-    inputs->dtype = PD_FLOAT32;
-  } else if ("i" == nm) {
-    inputs->dtype = PD_INT32;
-  } else if ("x" == nm) {
-    inputs->dtype = PD_INT64;
-  } else if ("h" == nm) {
-    inputs->dtype = PD_UINT8;
-  } else {
-    CHECK(false) << "Unsupport dtype. ";
-  }
-  inputs->name = new char[2];
-  inputs->name[0] = 'x';
-  inputs->name[1] = '\0';
-  LOG(INFO) << inputs->name;
+  inputs->dtype = PD_FLOAT32;
+  inputs->name = new char[5];
+  inputs->name[0] = 'd';
+  inputs->name[1] = 'a';
+  inputs->name[2] = 't';
+  inputs->name[3] = 'a';
+  inputs->name[4] = '\0';
   inputs->shape = shape;
   inputs->shape_size = shape_size;
 
   PD_PredictorZeroCopyRun(config, inputs, in_size, &outputs, &out_size);
+
+  delete[] inputs;
+  delete[] outputs;
 }
 
-TEST(PD_ZeroCopyRun, zero_copy_run) { zero_copy_run<float>(); }
+TEST(PD_ZeroCopyRun, zero_copy_run) { zero_copy_run(); }
 
 #ifdef PADDLE_WITH_MKLDNN
 TEST(PD_AnalysisConfig, profile_mkldnn) {
-  std::string model_dir = FLAGS_infer_model + "/mobilenet";
+  std::string model_dir = FLAGS_infer_model;
+  std::string prog_file = model_dir + "/model";
+  std::string params_file = model_dir + "/params";
   PD_AnalysisConfig *config = PD_NewAnalysisConfig();
   PD_DisableGpu(config);
   PD_SetCpuMathLibraryNumThreads(config, 10);
@@ -95,7 +91,7 @@ TEST(PD_AnalysisConfig, profile_mkldnn) {
   bool quantizer_enable = PD_MkldnnQuantizerEnabled(config);
   CHECK(quantizer_enable) << "NO";
   PD_SetMkldnnCacheCapacity(config, 0);
-  PD_SetModel(config, model_dir.c_str());
+  PD_SetModel(config, prog_file.c_str(), params_file.c_str());
   PD_EnableAnakinEngine(config);
   bool anakin_enable = PD_AnakinEngineEnabled(config);
   LOG(INFO) << anakin_enable;
-- 
GitLab