未验证 提交 3cb6c0a0 编写于 作者: L liu zhengxi 提交者: GitHub

Fix the CAPI ZeroCopy shape error and reuse the code to get output (#21240)

* fix the CAPI ZeroCopy shape error and reconstruct the output obtain

* use an anonymous namespace to cover the functor

* fix unit tests because of the output of typeid(T).name() is different from linux and windows, test=develop
上级 b0fc8227
......@@ -104,7 +104,7 @@ PADDLE_CAPI_EXPORT extern bool PD_PredictorRun(const PD_AnalysisConfig* config,
PADDLE_CAPI_EXPORT extern bool PD_PredictorZeroCopyRun(
const PD_AnalysisConfig* config, PD_ZeroCopyData* inputs, int in_size,
PD_ZeroCopyData** output, int** out_size);
PD_ZeroCopyData** output, int* out_size);
// AnalysisConfig
enum Precision { kFloat32 = 0, kInt8, kHalf };
......
......@@ -14,6 +14,7 @@
#include <algorithm>
#include <map>
#include <memory>
#include <numeric>
#include <vector>
#include "paddle/fluid/inference/capi/c_api.h"
......@@ -23,12 +24,61 @@ using paddle::ConvertToPaddleDType;
using paddle::ConvertToPDDataType;
using paddle::ConvertToACPrecision;
extern "C" {
namespace {
#define _DataTypeHelper_(CALLBACK, CPP_TYPE, PD_TYPE) \
CALLBACK(CPP_TYPE, PD_DataType::PD_TYPE);
#define _DataType_(CALLBACK) \
_DataTypeHelper_(CALLBACK, float, PD_FLOAT32); \
_DataTypeHelper_(CALLBACK, int32_t, PD_INT32); \
_DataTypeHelper_(CALLBACK, int64_t, PD_INT64); \
_DataTypeHelper_(CALLBACK, uint8_t, PD_UINT8);
template <typename Visitor>
inline void VisitDataType(PD_DataType type, Visitor visitor) {
#define VisitDataTypeCallback(CPP_TYPE, PD_TYPE) \
do { \
if (type == PD_TYPE) { \
visitor.template apply<CPP_TYPE>(); \
return; \
} \
} while (0)
_DataType_(VisitDataTypeCallback);
#undef VisitDataTypeCallback
PADDLE_THROW_ERROR("Unsupported data type. ");
}
struct PD_ZeroCopyFunctor {
PD_ZeroCopyData* output_i;
paddle::ZeroCopyTensor* output_t;
PD_ZeroCopyFunctor(PD_ZeroCopyData* output_i_,
paddle::ZeroCopyTensor* output_t_)
: output_i(output_i_), output_t(output_t_) {}
template <typename OutT>
void apply() {
std::vector<OutT> out_data;
int out_num =
std::accumulate(output_i->shape, output_i->shape + output_i->shape_size,
1, std::multiplies<int>());
out_data.resize(out_num);
output_t->copy_to_cpu(out_data.data());
output_i->data = reinterpret_cast<void*>(malloc(out_num * sizeof(OutT)));
memmove(static_cast<OutT*>(output_i->data), out_data.data(),
out_num * sizeof(OutT));
}
};
} // namespace
extern "C" {
bool PD_PredictorRun(const PD_AnalysisConfig* config, PD_Tensor* inputs,
int in_size, PD_Tensor** output_data, int* out_size,
int batch_size) {
PADDLE_ENFORCE_NOT_NULL(config);
VLOG(3) << "Predoctor: PD_PredictorRun. ";
static std::map<std::string, std::unique_ptr<paddle::PaddlePredictor>>
predictors;
if (!predictors.count(config->config.model_dir())) {
......@@ -41,6 +91,7 @@ bool PD_PredictorRun(const PD_AnalysisConfig* config, PD_Tensor* inputs,
in.emplace_back(inputs->tensor);
}
std::vector<paddle::PaddleTensor> out;
VLOG(3) << "Run predictor in CAPI encapsulation. ";
if (predictor->Run(in, &out, batch_size)) {
int osize = out.size();
*output_data = new PD_Tensor[osize];
......@@ -55,9 +106,15 @@ bool PD_PredictorRun(const PD_AnalysisConfig* config, PD_Tensor* inputs,
bool PD_PredictorZeroCopyRun(const PD_AnalysisConfig* config,
PD_ZeroCopyData* inputs, int in_size,
PD_ZeroCopyData** output, int** out_size) {
PD_ZeroCopyData** output, int* out_size) {
PADDLE_ENFORCE_NOT_NULL(config);
auto predictor = paddle::CreatePaddlePredictor(config->config);
static std::map<std::string, std::unique_ptr<paddle::PaddlePredictor>>
predictors;
if (!predictors.count(config->config.model_dir())) {
predictors[config->config.model_dir()] =
paddle::CreatePaddlePredictor(config->config);
}
auto& predictor = predictors[config->config.model_dir()];
auto input_names = predictor->GetInputNames();
VLOG(3) << "The inputs' size is " << input_names.size();
PADDLE_ENFORCE_EQ(
......@@ -87,13 +144,14 @@ bool PD_PredictorZeroCopyRun(const PD_AnalysisConfig* config,
break;
}
}
VLOG(3) << "Run ZeroCopyRun() in CAPI encapsulation. ";
CHECK(predictor->ZeroCopyRun());
auto output_names = predictor->GetOutputNames();
int osize = output_names.size();
*out_size = &osize;
*out_size = osize;
*output = new PD_ZeroCopyData[osize];
VLOG(3) << "The output size is " << osize;
for (int i = 0; i < osize; ++i) {
for (int i = 0; i < *out_size; ++i) {
auto& output_i = (*output)[i];
output_i.name = new char[output_names[i].length() + 1];
snprintf(output_i.name, output_names[i].length() + 1, "%s",
......@@ -102,45 +160,11 @@ bool PD_PredictorZeroCopyRun(const PD_AnalysisConfig* config,
output_i.dtype = ConvertToPDDataType(output_t->type());
std::vector<int> output_shape = output_t->shape();
output_i.shape = new int[output_shape.size()];
output_i.shape = output_shape.data();
memmove(output_i.shape, output_shape.data(),
output_shape.size() * sizeof(int));
output_i.shape_size = output_shape.size();
switch (output_i.dtype) {
case PD_FLOAT32: {
std::vector<float> out_data;
int out_num = std::accumulate(output_shape.begin(), output_shape.end(),
1, std::multiplies<int>());
out_data.resize(out_num);
output_t->copy_to_cpu(out_data.data());
output_i.data = static_cast<void*>(out_data.data());
} break;
case PD_INT32: {
std::vector<int32_t> out_data;
int out_num = std::accumulate(output_shape.begin(), output_shape.end(),
1, std::multiplies<int>());
out_data.resize(out_num);
output_t->copy_to_cpu(out_data.data());
output_i.data = static_cast<void*>(out_data.data());
} break;
case PD_INT64: {
std::vector<int64_t> out_data;
int out_num = std::accumulate(output_shape.begin(), output_shape.end(),
1, std::multiplies<int>());
out_data.resize(out_num);
output_t->copy_to_cpu(out_data.data());
output_i.data = static_cast<void*>(out_data.data());
} break;
case PD_UINT8: {
std::vector<uint8_t> out_data;
int out_num = std::accumulate(output_shape.begin(), output_shape.end(),
1, std::multiplies<int>());
out_data.resize(out_num);
output_t->copy_to_cpu(out_data.data());
output_i.data = static_cast<void*>(out_data.data());
} break;
default:
CHECK(false) << "Unsupport data type.";
break;
}
VisitDataType(output_i.dtype,
PD_ZeroCopyFunctor(&output_i, std::move(output_t.get())));
}
return true;
}
......
......@@ -313,13 +313,9 @@ if(WITH_GPU AND TENSORRT_FOUND)
ARGS --infer_model=${TRT_MODEL_QUANT_RESNET_DIR})
endif()
set(CAPI_MODEL_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/capi_tests_models")
if (NOT EXISTS ${CAPI_MODEL_INSTALL_DIR})
inference_download_and_uncompress(${CAPI_MODEL_INSTALL_DIR} ${INFERENCE_URL}/tensorrt_test "trt_inference_test_models.tar.gz")
endif()
inference_analysis_test(test_analyzer_capi SRCS analyzer_capi_tester.cc
EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} paddle_fluid_c
ARGS --infer_model=${CAPI_MODEL_INSTALL_DIR}/trt_inference_test_models)
ARGS --infer_model=${RESNET50_MODEL_DIR}/model)
inference_analysis_test(test_analyzer_capi_pd_tensor SRCS analyzer_capi_pd_tensor_tester.cc
EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} paddle_fluid_c
......
......@@ -18,7 +18,6 @@ limitations under the License. */
#include <fstream>
#include <iostream>
#include <string>
#include <typeinfo>
#include <vector>
#include "paddle/fluid/inference/capi/c_api.h"
#include "paddle/fluid/inference/tests/api/tester_helper.h"
......@@ -27,7 +26,6 @@ namespace paddle {
namespace inference {
namespace analysis {
template <typename T>
void zero_copy_run() {
std::string model_dir = FLAGS_infer_model;
PD_AnalysisConfig *config = PD_NewAnalysisConfig();
......@@ -46,26 +44,15 @@ void zero_copy_run() {
const int channels = 3;
const int height = 224;
const int width = 224;
T input[batch_size * channels * height * width] = {0};
float input[batch_size * channels * height * width] = {0};
int shape[4] = {batch_size, channels, height, width};
int shape_size = 4;
int in_size = 2;
int *out_size;
int out_size;
PD_ZeroCopyData *inputs = new PD_ZeroCopyData[2];
PD_ZeroCopyData *outputs = new PD_ZeroCopyData;
PD_ZeroCopyData *outputs = nullptr;
inputs[0].data = static_cast<void *>(input);
std::string nm = typeid(T).name();
if ("f" == nm) {
inputs[0].dtype = PD_FLOAT32;
} else if ("i" == nm) {
inputs[0].dtype = PD_INT32;
} else if ("x" == nm) {
inputs[0].dtype = PD_INT64;
} else if ("h" == nm) {
inputs[0].dtype = PD_UINT8;
} else {
CHECK(false) << "Unsupport dtype. ";
}
inputs[0].dtype = PD_FLOAT32;
inputs[0].name = new char[6];
inputs[0].name[0] = 'i';
inputs[0].name[1] = 'm';
......@@ -94,15 +81,24 @@ void zero_copy_run() {
PD_PredictorZeroCopyRun(config, inputs, in_size, &outputs, &out_size);
LOG(INFO) << "output size is: " << out_size;
LOG(INFO) << outputs[0].name;
LOG(INFO) << outputs[0].shape_size;
for (int j = 0; j < out_size; ++j) {
LOG(INFO) << "output[" << j
<< "]'s shape_size is: " << outputs[j].shape_size;
for (int i = 0; i < outputs[0].shape_size; ++i) {
LOG(INFO) << "output[" << j << "]'s shape is: " << outputs[j].shape[i];
}
LOG(INFO) << "output[" << j
<< "]'s DATA is: " << *(static_cast<float *>(outputs[j].data));
}
delete[] outputs;
delete[] inputs;
}
TEST(PD_ZeroCopyRun, zero_copy_run) {
// zero_copy_run<int32_t>();
// zero_copy_run<int64_t>();
zero_copy_run<float>();
}
#ifdef PADDLE_WITH_MKLDNN
TEST(PD_ZeroCopyRun, zero_copy_run) { zero_copy_run(); }
#endif
} // namespace analysis
} // namespace inference
......
......@@ -18,7 +18,6 @@ limitations under the License. */
#include <fstream>
#include <iostream>
#include <string>
#include <typeinfo>
#include <vector>
#include "paddle/fluid/inference/capi/c_api.h"
#include "paddle/fluid/inference/tests/api/tester_helper.h"
......@@ -27,16 +26,17 @@ namespace paddle {
namespace inference {
namespace analysis {
template <typename T>
void zero_copy_run() {
std::string model_dir = FLAGS_infer_model + "/mobilenet";
std::string model_dir = FLAGS_infer_model;
std::string prog_file = model_dir + "/model";
std::string params_file = model_dir + "/params";
PD_AnalysisConfig *config = PD_NewAnalysisConfig();
PD_DisableGpu(config);
PD_SetCpuMathLibraryNumThreads(config, 10);
PD_SwitchUseFeedFetchOps(config, false);
PD_SwitchSpecifyInputNames(config, true);
PD_SwitchIrDebug(config, true);
PD_SetModel(config, model_dir.c_str()); //, params_file1.c_str());
PD_SetModel(config, prog_file.c_str(), params_file.c_str());
bool use_feed_fetch = PD_UseFeedFetchOpsEnabled(config);
CHECK(!use_feed_fetch) << "NO";
bool specify_input_names = PD_SpecifyInputName(config);
......@@ -44,44 +44,40 @@ void zero_copy_run() {
const int batch_size = 1;
const int channels = 3;
const int height = 224;
const int width = 224;
T input[batch_size * channels * height * width] = {0};
const int height = 318;
const int width = 318;
float input[batch_size * channels * height * width] = {0};
int shape[4] = {batch_size, channels, height, width};
int shape_size = 4;
int in_size = 1;
int *out_size;
int out_size;
PD_ZeroCopyData *inputs = new PD_ZeroCopyData;
PD_ZeroCopyData *outputs = new PD_ZeroCopyData;
inputs->data = static_cast<void *>(input);
std::string nm = typeid(T).name();
if ("f" == nm) {
inputs->dtype = PD_FLOAT32;
} else if ("i" == nm) {
inputs->dtype = PD_INT32;
} else if ("x" == nm) {
inputs->dtype = PD_INT64;
} else if ("h" == nm) {
inputs->dtype = PD_UINT8;
} else {
CHECK(false) << "Unsupport dtype. ";
}
inputs->name = new char[2];
inputs->name[0] = 'x';
inputs->name[1] = '\0';
LOG(INFO) << inputs->name;
inputs->dtype = PD_FLOAT32;
inputs->name = new char[5];
inputs->name[0] = 'd';
inputs->name[1] = 'a';
inputs->name[2] = 't';
inputs->name[3] = 'a';
inputs->name[4] = '\0';
inputs->shape = shape;
inputs->shape_size = shape_size;
PD_PredictorZeroCopyRun(config, inputs, in_size, &outputs, &out_size);
delete[] inputs;
delete[] outputs;
}
TEST(PD_ZeroCopyRun, zero_copy_run) { zero_copy_run<float>(); }
TEST(PD_ZeroCopyRun, zero_copy_run) { zero_copy_run(); }
#ifdef PADDLE_WITH_MKLDNN
TEST(PD_AnalysisConfig, profile_mkldnn) {
std::string model_dir = FLAGS_infer_model + "/mobilenet";
std::string model_dir = FLAGS_infer_model;
std::string prog_file = model_dir + "/model";
std::string params_file = model_dir + "/params";
PD_AnalysisConfig *config = PD_NewAnalysisConfig();
PD_DisableGpu(config);
PD_SetCpuMathLibraryNumThreads(config, 10);
......@@ -95,7 +91,7 @@ TEST(PD_AnalysisConfig, profile_mkldnn) {
bool quantizer_enable = PD_MkldnnQuantizerEnabled(config);
CHECK(quantizer_enable) << "NO";
PD_SetMkldnnCacheCapacity(config, 0);
PD_SetModel(config, model_dir.c_str());
PD_SetModel(config, prog_file.c_str(), params_file.c_str());
PD_EnableAnakinEngine(config);
bool anakin_enable = PD_AnakinEngineEnabled(config);
LOG(INFO) << anakin_enable;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册