Fix the CAPI ZeroCopy shape error and reuse the code to get output (#21240)

* fix the CAPI ZeroCopy shape error and reconstruct the output obtain * use an anonymous namespace to cover the functor * fix unit tests because of the output of typeid(T).name() is different from linux and windows, test=develop

Fix the CAPI ZeroCopy shape error and reuse the code to get output (#21240)
* fix the CAPI ZeroCopy shape error and reconstruct the output obtain * use an anonymous namespace to cover the functor * fix unit tests because of the output of typeid(T).name() is different from linux and windows, test=develop
3cb6c0a0 · liu zhengxi · GitHub · b0fc8227 · 3cb6c0a0 · 3cb6c0a0
5 changed file
--- a/paddle/fluid/inference/capi/c_api.h
+++ b/paddle/fluid/inference/capi/c_api.h
@@ -104,7 +104,7 @@ PADDLE_CAPI_EXPORT extern bool PD_PredictorRun(const PD_AnalysisConfig* config,
 PADDLE_CAPI_EXPORT extern bool PD_PredictorZeroCopyRun(
    const PD_AnalysisConfig* config, PD_ZeroCopyData* inputs, int in_size,
-    PD_ZeroCopyData** output, int** out_size);
+    PD_ZeroCopyData** output, int* out_size);
 // AnalysisConfig
 enum Precision { kFloat32 = 0, kInt8, kHalf };

--- a/paddle/fluid/inference/capi/pd_predictor.cc
+++ b/paddle/fluid/inference/capi/pd_predictor.cc
@@ -14,6 +14,7 @@
 #include <algorithm>
 #include <map>
+#include <memory>
 #include <numeric>
 #include <vector>
 #include "paddle/fluid/inference/capi/c_api.h"
@@ -23,12 +24,61 @@ using paddle::ConvertToPaddleDType;
 using paddle::ConvertToPDDataType;
 using paddle::ConvertToACPrecision;
-extern "C" {
+namespace {
+#define _DataTypeHelper_(CALLBACK, CPP_TYPE, PD_TYPE) \
+  CALLBACK(CPP_TYPE, PD_DataType::PD_TYPE);
+#define _DataType_(CALLBACK)                     \
+  _DataTypeHelper_(CALLBACK, float, PD_FLOAT32); \
+  _DataTypeHelper_(CALLBACK, int32_t, PD_INT32); \
+  _DataTypeHelper_(CALLBACK, int64_t, PD_INT64); \
+  _DataTypeHelper_(CALLBACK, uint8_t, PD_UINT8);
+template <typename Visitor>
+inline void VisitDataType(PD_DataType type, Visitor visitor) {
+#define VisitDataTypeCallback(CPP_TYPE, PD_TYPE) \
+  do {                                           \
+    if (type == PD_TYPE) {                       \
+      visitor.template apply<CPP_TYPE>();        \
+      return;                                    \
+    }                                            \
+  } while (0)
+  _DataType_(VisitDataTypeCallback);
+#undef VisitDataTypeCallback
+  PADDLE_THROW_ERROR("Unsupported data type. ");
+}
+struct PD_ZeroCopyFunctor {
+  PD_ZeroCopyData* output_i;
+  paddle::ZeroCopyTensor* output_t;
+  PD_ZeroCopyFunctor(PD_ZeroCopyData* output_i_,
+                     paddle::ZeroCopyTensor* output_t_)
+      : output_i(output_i_), output_t(output_t_) {}
+  template <typename OutT>
+  void apply() {
+    std::vector<OutT> out_data;
+    int out_num =
+        std::accumulate(output_i->shape, output_i->shape + output_i->shape_size,
+                        1, std::multiplies<int>());
+    out_data.resize(out_num);
+    output_t->copy_to_cpu(out_data.data());
+    output_i->data = reinterpret_cast<void*>(malloc(out_num * sizeof(OutT)));
+    memmove(static_cast<OutT*>(output_i->data), out_data.data(),
+            out_num * sizeof(OutT));
+  }
+};
+}  // namespace
+extern "C" {
 bool PD_PredictorRun(const PD_AnalysisConfig* config, PD_Tensor* inputs,
                     int in_size, PD_Tensor** output_data, int* out_size,
                     int batch_size) {
  PADDLE_ENFORCE_NOT_NULL(config);
+  VLOG(3) << "Predoctor: PD_PredictorRun. ";
  static std::map<std::string, std::unique_ptr<paddle::PaddlePredictor>>
      predictors;
  if (!predictors.count(config->config.model_dir())) {
@@ -41,6 +91,7 @@ bool PD_PredictorRun(const PD_AnalysisConfig* config, PD_Tensor* inputs,
    in.emplace_back(inputs->tensor);
  }
  std::vector<paddle::PaddleTensor> out;
+  VLOG(3) << "Run predictor in CAPI encapsulation. ";
  if (predictor->Run(in, &out, batch_size)) {
    int osize = out.size();
    *output_data = new PD_Tensor[osize];
@@ -55,9 +106,15 @@ bool PD_PredictorRun(const PD_AnalysisConfig* config, PD_Tensor* inputs,
 bool PD_PredictorZeroCopyRun(const PD_AnalysisConfig* config,
                             PD_ZeroCopyData* inputs, int in_size,
-                             PD_ZeroCopyData** output, int** out_size) {
+                             PD_ZeroCopyData** output, int* out_size) {
  PADDLE_ENFORCE_NOT_NULL(config);
-  auto predictor = paddle::CreatePaddlePredictor(config->config);
+  static std::map<std::string, std::unique_ptr<paddle::PaddlePredictor>>
+      predictors;
+  if (!predictors.count(config->config.model_dir())) {
+    predictors[config->config.model_dir()] =
+        paddle::CreatePaddlePredictor(config->config);
+  }
+  auto& predictor = predictors[config->config.model_dir()];
  auto input_names = predictor->GetInputNames();
  VLOG(3) << "The inputs' size is " << input_names.size();
  PADDLE_ENFORCE_EQ(
@@ -87,13 +144,14 @@ bool PD_PredictorZeroCopyRun(const PD_AnalysisConfig* config,
        break;
    }
  }
+  VLOG(3) << "Run ZeroCopyRun() in CAPI encapsulation. ";
  CHECK(predictor->ZeroCopyRun());
  auto output_names = predictor->GetOutputNames();
  int osize = output_names.size();
-  *out_size = &osize;
+  *out_size = osize;
  *output = new PD_ZeroCopyData[osize];
  VLOG(3) << "The output size is " << osize;
-  for (int i = 0; i < osize; ++i) {
+  for (int i = 0; i < *out_size; ++i) {
    auto& output_i = (*output)[i];
    output_i.name = new char[output_names[i].length() + 1];
    snprintf(output_i.name, output_names[i].length() + 1, "%s",
@@ -102,45 +160,11 @@ bool PD_PredictorZeroCopyRun(const PD_AnalysisConfig* config,
    output_i.dtype = ConvertToPDDataType(output_t->type());
    std::vector<int> output_shape = output_t->shape();
    output_i.shape = new int[output_shape.size()];
-    output_i.shape = output_shape.data();
+    memmove(output_i.shape, output_shape.data(),
+            output_shape.size() * sizeof(int));
    output_i.shape_size = output_shape.size();
-    switch (output_i.dtype) {
+    VisitDataType(output_i.dtype,
-      case PD_FLOAT32: {
+                  PD_ZeroCopyFunctor(&output_i, std::move(output_t.get())));
-        std::vector<float> out_data;
-        int out_num = std::accumulate(output_shape.begin(), output_shape.end(),
-                                      1, std::multiplies<int>());
-        out_data.resize(out_num);
-        output_t->copy_to_cpu(out_data.data());
-        output_i.data = static_cast<void*>(out_data.data());
-      } break;
-      case PD_INT32: {
-        std::vector<int32_t> out_data;
-        int out_num = std::accumulate(output_shape.begin(), output_shape.end(),
-                                      1, std::multiplies<int>());
-        out_data.resize(out_num);
-        output_t->copy_to_cpu(out_data.data());
-        output_i.data = static_cast<void*>(out_data.data());
-      } break;
-      case PD_INT64: {
-        std::vector<int64_t> out_data;
-        int out_num = std::accumulate(output_shape.begin(), output_shape.end(),
-                                      1, std::multiplies<int>());
-        out_data.resize(out_num);
-        output_t->copy_to_cpu(out_data.data());
-        output_i.data = static_cast<void*>(out_data.data());
-      } break;
-      case PD_UINT8: {
-        std::vector<uint8_t> out_data;
-        int out_num = std::accumulate(output_shape.begin(), output_shape.end(),
-                                      1, std::multiplies<int>());
-        out_data.resize(out_num);
-        output_t->copy_to_cpu(out_data.data());
-        output_i.data = static_cast<void*>(out_data.data());
-      } break;
-      default:
-        CHECK(false) << "Unsupport data type.";
-        break;
-    }
  }
  return true;
 }

--- a/paddle/fluid/inference/tests/api/CMakeLists.txt
+++ b/paddle/fluid/inference/tests/api/CMakeLists.txt
@@ -313,13 +313,9 @@ if(WITH_GPU AND TENSORRT_FOUND)
            ARGS --infer_model=${TRT_MODEL_QUANT_RESNET_DIR})
 endif()
-set(CAPI_MODEL_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/capi_tests_models")
-if (NOT EXISTS ${CAPI_MODEL_INSTALL_DIR})
-    inference_download_and_uncompress(${CAPI_MODEL_INSTALL_DIR} ${INFERENCE_URL}/tensorrt_test "trt_inference_test_models.tar.gz")
-endif()
 inference_analysis_test(test_analyzer_capi SRCS analyzer_capi_tester.cc
            EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} paddle_fluid_c
-            ARGS --infer_model=${CAPI_MODEL_INSTALL_DIR}/trt_inference_test_models)
+            ARGS --infer_model=${RESNET50_MODEL_DIR}/model)
 inference_analysis_test(test_analyzer_capi_pd_tensor SRCS analyzer_capi_pd_tensor_tester.cc
            EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} paddle_fluid_c

--- a/paddle/fluid/inference/tests/api/analyzer_capi_int_tester.cc
+++ b/paddle/fluid/inference/tests/api/analyzer_capi_int_tester.cc
@@ -18,7 +18,6 @@ limitations under the License. */
 #include <fstream>
 #include <iostream>
 #include <string>
-#include <typeinfo>
 #include <vector>
 #include "paddle/fluid/inference/capi/c_api.h"
 #include "paddle/fluid/inference/tests/api/tester_helper.h"
@@ -27,7 +26,6 @@ namespace paddle {
 namespace inference {
 namespace analysis {
-template <typename T>
 void zero_copy_run() {
  std::string model_dir = FLAGS_infer_model;
  PD_AnalysisConfig *config = PD_NewAnalysisConfig();
@@ -46,26 +44,15 @@ void zero_copy_run() {
  const int channels = 3;
  const int height = 224;
  const int width = 224;
-  T input[batch_size * channels * height * width] = {0};
+  float input[batch_size * channels * height * width] = {0};
  int shape[4] = {batch_size, channels, height, width};
  int shape_size = 4;
  int in_size = 2;
-  int *out_size;
+  int out_size;
  PD_ZeroCopyData *inputs = new PD_ZeroCopyData[2];
-  PD_ZeroCopyData *outputs = new PD_ZeroCopyData;
+  PD_ZeroCopyData *outputs = nullptr;
  inputs[0].data = static_cast<void *>(input);
-  std::string nm = typeid(T).name();
-  if ("f" == nm) {
  inputs[0].dtype = PD_FLOAT32;
-  } else if ("i" == nm) {
-    inputs[0].dtype = PD_INT32;
-  } else if ("x" == nm) {
-    inputs[0].dtype = PD_INT64;
-  } else if ("h" == nm) {
-    inputs[0].dtype = PD_UINT8;
-  } else {
-    CHECK(false) << "Unsupport dtype. ";
-  }
  inputs[0].name = new char[6];
  inputs[0].name[0] = 'i';
  inputs[0].name[1] = 'm';
@@ -94,15 +81,24 @@ void zero_copy_run() {
  PD_PredictorZeroCopyRun(config, inputs, in_size, &outputs, &out_size);
+  LOG(INFO) << "output size is: " << out_size;
  LOG(INFO) << outputs[0].name;
-  LOG(INFO) << outputs[0].shape_size;
+  for (int j = 0; j < out_size; ++j) {
+    LOG(INFO) << "output[" << j
+              << "]'s shape_size is: " << outputs[j].shape_size;
+    for (int i = 0; i < outputs[0].shape_size; ++i) {
+      LOG(INFO) << "output[" << j << "]'s shape is: " << outputs[j].shape[i];
+    }
+    LOG(INFO) << "output[" << j
+              << "]'s DATA is: " << *(static_cast<float *>(outputs[j].data));
+  }
+  delete[] outputs;
+  delete[] inputs;
 }
-TEST(PD_ZeroCopyRun, zero_copy_run) {
+#ifdef PADDLE_WITH_MKLDNN
-  // zero_copy_run<int32_t>();
+TEST(PD_ZeroCopyRun, zero_copy_run) { zero_copy_run(); }
-  // zero_copy_run<int64_t>();
+#endif
-  zero_copy_run<float>();
-}
 }  // namespace analysis
 }  // namespace inference

--- a/paddle/fluid/inference/tests/api/analyzer_capi_tester.cc
+++ b/paddle/fluid/inference/tests/api/analyzer_capi_tester.cc
@@ -18,7 +18,6 @@ limitations under the License. */
 #include <fstream>
 #include <iostream>
 #include <string>
-#include <typeinfo>
 #include <vector>
 #include "paddle/fluid/inference/capi/c_api.h"
 #include "paddle/fluid/inference/tests/api/tester_helper.h"
@@ -27,16 +26,17 @@ namespace paddle {
 namespace inference {
 namespace analysis {
-template <typename T>
 void zero_copy_run() {
-  std::string model_dir = FLAGS_infer_model + "/mobilenet";
+  std::string model_dir = FLAGS_infer_model;
+  std::string prog_file = model_dir + "/model";
+  std::string params_file = model_dir + "/params";
  PD_AnalysisConfig *config = PD_NewAnalysisConfig();
  PD_DisableGpu(config);
  PD_SetCpuMathLibraryNumThreads(config, 10);
  PD_SwitchUseFeedFetchOps(config, false);
  PD_SwitchSpecifyInputNames(config, true);
  PD_SwitchIrDebug(config, true);
-  PD_SetModel(config, model_dir.c_str());  //, params_file1.c_str());
+  PD_SetModel(config, prog_file.c_str(), params_file.c_str());
  bool use_feed_fetch = PD_UseFeedFetchOpsEnabled(config);
  CHECK(!use_feed_fetch) << "NO";
  bool specify_input_names = PD_SpecifyInputName(config);
@@ -44,44 +44,40 @@ void zero_copy_run() {
  const int batch_size = 1;
  const int channels = 3;
-  const int height = 224;
+  const int height = 318;
-  const int width = 224;
+  const int width = 318;
-  T input[batch_size * channels * height * width] = {0};
+  float input[batch_size * channels * height * width] = {0};
  int shape[4] = {batch_size, channels, height, width};
  int shape_size = 4;
  int in_size = 1;
-  int *out_size;
+  int out_size;
  PD_ZeroCopyData *inputs = new PD_ZeroCopyData;
  PD_ZeroCopyData *outputs = new PD_ZeroCopyData;
  inputs->data = static_cast<void *>(input);
-  std::string nm = typeid(T).name();
-  if ("f" == nm) {
  inputs->dtype = PD_FLOAT32;
-  } else if ("i" == nm) {
+  inputs->name = new char[5];
-    inputs->dtype = PD_INT32;
+  inputs->name[0] = 'd';
-  } else if ("x" == nm) {
+  inputs->name[1] = 'a';
-    inputs->dtype = PD_INT64;
+  inputs->name[2] = 't';
-  } else if ("h" == nm) {
+  inputs->name[3] = 'a';
-    inputs->dtype = PD_UINT8;
+  inputs->name[4] = '\0';
-  } else {
-    CHECK(false) << "Unsupport dtype. ";
-  }
-  inputs->name = new char[2];
-  inputs->name[0] = 'x';
-  inputs->name[1] = '\0';
-  LOG(INFO) << inputs->name;
  inputs->shape = shape;
  inputs->shape_size = shape_size;
  PD_PredictorZeroCopyRun(config, inputs, in_size, &outputs, &out_size);
+  delete[] inputs;
+  delete[] outputs;
 }
-TEST(PD_ZeroCopyRun, zero_copy_run) { zero_copy_run<float>(); }
+TEST(PD_ZeroCopyRun, zero_copy_run) { zero_copy_run(); }
 #ifdef PADDLE_WITH_MKLDNN
 TEST(PD_AnalysisConfig, profile_mkldnn) {
-  std::string model_dir = FLAGS_infer_model + "/mobilenet";
+  std::string model_dir = FLAGS_infer_model;
+  std::string prog_file = model_dir + "/model";
+  std::string params_file = model_dir + "/params";
  PD_AnalysisConfig *config = PD_NewAnalysisConfig();
  PD_DisableGpu(config);
  PD_SetCpuMathLibraryNumThreads(config, 10);
@@ -95,7 +91,7 @@ TEST(PD_AnalysisConfig, profile_mkldnn) {
  bool quantizer_enable = PD_MkldnnQuantizerEnabled(config);
  CHECK(quantizer_enable) << "NO";
  PD_SetMkldnnCacheCapacity(config, 0);
-  PD_SetModel(config, model_dir.c_str());
+  PD_SetModel(config, prog_file.c_str(), params_file.c_str());
  PD_EnableAnakinEngine(config);
  bool anakin_enable = PD_AnakinEngineEnabled(config);
  LOG(INFO) << anakin_enable;