未验证 提交 3cb6c0a0 编写于 作者: L liu zhengxi 提交者: GitHub

Fix the CAPI ZeroCopy shape error and reuse the code to get output (#21240)

* fix the CAPI ZeroCopy shape error and reconstruct the output obtain

* use an anonymous namespace to cover the functor

* fix unit tests because of the output of typeid(T).name() is different from linux and windows, test=develop
上级 b0fc8227
...@@ -104,7 +104,7 @@ PADDLE_CAPI_EXPORT extern bool PD_PredictorRun(const PD_AnalysisConfig* config, ...@@ -104,7 +104,7 @@ PADDLE_CAPI_EXPORT extern bool PD_PredictorRun(const PD_AnalysisConfig* config,
PADDLE_CAPI_EXPORT extern bool PD_PredictorZeroCopyRun( PADDLE_CAPI_EXPORT extern bool PD_PredictorZeroCopyRun(
const PD_AnalysisConfig* config, PD_ZeroCopyData* inputs, int in_size, const PD_AnalysisConfig* config, PD_ZeroCopyData* inputs, int in_size,
PD_ZeroCopyData** output, int** out_size); PD_ZeroCopyData** output, int* out_size);
// AnalysisConfig // AnalysisConfig
enum Precision { kFloat32 = 0, kInt8, kHalf }; enum Precision { kFloat32 = 0, kInt8, kHalf };
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#include <algorithm> #include <algorithm>
#include <map> #include <map>
#include <memory>
#include <numeric> #include <numeric>
#include <vector> #include <vector>
#include "paddle/fluid/inference/capi/c_api.h" #include "paddle/fluid/inference/capi/c_api.h"
...@@ -23,12 +24,61 @@ using paddle::ConvertToPaddleDType; ...@@ -23,12 +24,61 @@ using paddle::ConvertToPaddleDType;
using paddle::ConvertToPDDataType; using paddle::ConvertToPDDataType;
using paddle::ConvertToACPrecision; using paddle::ConvertToACPrecision;
extern "C" { namespace {
#define _DataTypeHelper_(CALLBACK, CPP_TYPE, PD_TYPE) \
CALLBACK(CPP_TYPE, PD_DataType::PD_TYPE);
#define _DataType_(CALLBACK) \
_DataTypeHelper_(CALLBACK, float, PD_FLOAT32); \
_DataTypeHelper_(CALLBACK, int32_t, PD_INT32); \
_DataTypeHelper_(CALLBACK, int64_t, PD_INT64); \
_DataTypeHelper_(CALLBACK, uint8_t, PD_UINT8);
template <typename Visitor>
inline void VisitDataType(PD_DataType type, Visitor visitor) {
#define VisitDataTypeCallback(CPP_TYPE, PD_TYPE) \
do { \
if (type == PD_TYPE) { \
visitor.template apply<CPP_TYPE>(); \
return; \
} \
} while (0)
_DataType_(VisitDataTypeCallback);
#undef VisitDataTypeCallback
PADDLE_THROW_ERROR("Unsupported data type. ");
}
struct PD_ZeroCopyFunctor {
PD_ZeroCopyData* output_i;
paddle::ZeroCopyTensor* output_t;
PD_ZeroCopyFunctor(PD_ZeroCopyData* output_i_,
paddle::ZeroCopyTensor* output_t_)
: output_i(output_i_), output_t(output_t_) {}
template <typename OutT>
void apply() {
std::vector<OutT> out_data;
int out_num =
std::accumulate(output_i->shape, output_i->shape + output_i->shape_size,
1, std::multiplies<int>());
out_data.resize(out_num);
output_t->copy_to_cpu(out_data.data());
output_i->data = reinterpret_cast<void*>(malloc(out_num * sizeof(OutT)));
memmove(static_cast<OutT*>(output_i->data), out_data.data(),
out_num * sizeof(OutT));
}
};
} // namespace
extern "C" {
bool PD_PredictorRun(const PD_AnalysisConfig* config, PD_Tensor* inputs, bool PD_PredictorRun(const PD_AnalysisConfig* config, PD_Tensor* inputs,
int in_size, PD_Tensor** output_data, int* out_size, int in_size, PD_Tensor** output_data, int* out_size,
int batch_size) { int batch_size) {
PADDLE_ENFORCE_NOT_NULL(config); PADDLE_ENFORCE_NOT_NULL(config);
VLOG(3) << "Predoctor: PD_PredictorRun. ";
static std::map<std::string, std::unique_ptr<paddle::PaddlePredictor>> static std::map<std::string, std::unique_ptr<paddle::PaddlePredictor>>
predictors; predictors;
if (!predictors.count(config->config.model_dir())) { if (!predictors.count(config->config.model_dir())) {
...@@ -41,6 +91,7 @@ bool PD_PredictorRun(const PD_AnalysisConfig* config, PD_Tensor* inputs, ...@@ -41,6 +91,7 @@ bool PD_PredictorRun(const PD_AnalysisConfig* config, PD_Tensor* inputs,
in.emplace_back(inputs->tensor); in.emplace_back(inputs->tensor);
} }
std::vector<paddle::PaddleTensor> out; std::vector<paddle::PaddleTensor> out;
VLOG(3) << "Run predictor in CAPI encapsulation. ";
if (predictor->Run(in, &out, batch_size)) { if (predictor->Run(in, &out, batch_size)) {
int osize = out.size(); int osize = out.size();
*output_data = new PD_Tensor[osize]; *output_data = new PD_Tensor[osize];
...@@ -55,9 +106,15 @@ bool PD_PredictorRun(const PD_AnalysisConfig* config, PD_Tensor* inputs, ...@@ -55,9 +106,15 @@ bool PD_PredictorRun(const PD_AnalysisConfig* config, PD_Tensor* inputs,
bool PD_PredictorZeroCopyRun(const PD_AnalysisConfig* config, bool PD_PredictorZeroCopyRun(const PD_AnalysisConfig* config,
PD_ZeroCopyData* inputs, int in_size, PD_ZeroCopyData* inputs, int in_size,
PD_ZeroCopyData** output, int** out_size) { PD_ZeroCopyData** output, int* out_size) {
PADDLE_ENFORCE_NOT_NULL(config); PADDLE_ENFORCE_NOT_NULL(config);
auto predictor = paddle::CreatePaddlePredictor(config->config); static std::map<std::string, std::unique_ptr<paddle::PaddlePredictor>>
predictors;
if (!predictors.count(config->config.model_dir())) {
predictors[config->config.model_dir()] =
paddle::CreatePaddlePredictor(config->config);
}
auto& predictor = predictors[config->config.model_dir()];
auto input_names = predictor->GetInputNames(); auto input_names = predictor->GetInputNames();
VLOG(3) << "The inputs' size is " << input_names.size(); VLOG(3) << "The inputs' size is " << input_names.size();
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
...@@ -87,13 +144,14 @@ bool PD_PredictorZeroCopyRun(const PD_AnalysisConfig* config, ...@@ -87,13 +144,14 @@ bool PD_PredictorZeroCopyRun(const PD_AnalysisConfig* config,
break; break;
} }
} }
VLOG(3) << "Run ZeroCopyRun() in CAPI encapsulation. ";
CHECK(predictor->ZeroCopyRun()); CHECK(predictor->ZeroCopyRun());
auto output_names = predictor->GetOutputNames(); auto output_names = predictor->GetOutputNames();
int osize = output_names.size(); int osize = output_names.size();
*out_size = &osize; *out_size = osize;
*output = new PD_ZeroCopyData[osize]; *output = new PD_ZeroCopyData[osize];
VLOG(3) << "The output size is " << osize; VLOG(3) << "The output size is " << osize;
for (int i = 0; i < osize; ++i) { for (int i = 0; i < *out_size; ++i) {
auto& output_i = (*output)[i]; auto& output_i = (*output)[i];
output_i.name = new char[output_names[i].length() + 1]; output_i.name = new char[output_names[i].length() + 1];
snprintf(output_i.name, output_names[i].length() + 1, "%s", snprintf(output_i.name, output_names[i].length() + 1, "%s",
...@@ -102,45 +160,11 @@ bool PD_PredictorZeroCopyRun(const PD_AnalysisConfig* config, ...@@ -102,45 +160,11 @@ bool PD_PredictorZeroCopyRun(const PD_AnalysisConfig* config,
output_i.dtype = ConvertToPDDataType(output_t->type()); output_i.dtype = ConvertToPDDataType(output_t->type());
std::vector<int> output_shape = output_t->shape(); std::vector<int> output_shape = output_t->shape();
output_i.shape = new int[output_shape.size()]; output_i.shape = new int[output_shape.size()];
output_i.shape = output_shape.data(); memmove(output_i.shape, output_shape.data(),
output_shape.size() * sizeof(int));
output_i.shape_size = output_shape.size(); output_i.shape_size = output_shape.size();
switch (output_i.dtype) { VisitDataType(output_i.dtype,
case PD_FLOAT32: { PD_ZeroCopyFunctor(&output_i, std::move(output_t.get())));
std::vector<float> out_data;
int out_num = std::accumulate(output_shape.begin(), output_shape.end(),
1, std::multiplies<int>());
out_data.resize(out_num);
output_t->copy_to_cpu(out_data.data());
output_i.data = static_cast<void*>(out_data.data());
} break;
case PD_INT32: {
std::vector<int32_t> out_data;
int out_num = std::accumulate(output_shape.begin(), output_shape.end(),
1, std::multiplies<int>());
out_data.resize(out_num);
output_t->copy_to_cpu(out_data.data());
output_i.data = static_cast<void*>(out_data.data());
} break;
case PD_INT64: {
std::vector<int64_t> out_data;
int out_num = std::accumulate(output_shape.begin(), output_shape.end(),
1, std::multiplies<int>());
out_data.resize(out_num);
output_t->copy_to_cpu(out_data.data());
output_i.data = static_cast<void*>(out_data.data());
} break;
case PD_UINT8: {
std::vector<uint8_t> out_data;
int out_num = std::accumulate(output_shape.begin(), output_shape.end(),
1, std::multiplies<int>());
out_data.resize(out_num);
output_t->copy_to_cpu(out_data.data());
output_i.data = static_cast<void*>(out_data.data());
} break;
default:
CHECK(false) << "Unsupport data type.";
break;
}
} }
return true; return true;
} }
......
...@@ -313,13 +313,9 @@ if(WITH_GPU AND TENSORRT_FOUND) ...@@ -313,13 +313,9 @@ if(WITH_GPU AND TENSORRT_FOUND)
ARGS --infer_model=${TRT_MODEL_QUANT_RESNET_DIR}) ARGS --infer_model=${TRT_MODEL_QUANT_RESNET_DIR})
endif() endif()
set(CAPI_MODEL_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/capi_tests_models")
if (NOT EXISTS ${CAPI_MODEL_INSTALL_DIR})
inference_download_and_uncompress(${CAPI_MODEL_INSTALL_DIR} ${INFERENCE_URL}/tensorrt_test "trt_inference_test_models.tar.gz")
endif()
inference_analysis_test(test_analyzer_capi SRCS analyzer_capi_tester.cc inference_analysis_test(test_analyzer_capi SRCS analyzer_capi_tester.cc
EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} paddle_fluid_c EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} paddle_fluid_c
ARGS --infer_model=${CAPI_MODEL_INSTALL_DIR}/trt_inference_test_models) ARGS --infer_model=${RESNET50_MODEL_DIR}/model)
inference_analysis_test(test_analyzer_capi_pd_tensor SRCS analyzer_capi_pd_tensor_tester.cc inference_analysis_test(test_analyzer_capi_pd_tensor SRCS analyzer_capi_pd_tensor_tester.cc
EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} paddle_fluid_c EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} paddle_fluid_c
......
...@@ -18,7 +18,6 @@ limitations under the License. */ ...@@ -18,7 +18,6 @@ limitations under the License. */
#include <fstream> #include <fstream>
#include <iostream> #include <iostream>
#include <string> #include <string>
#include <typeinfo>
#include <vector> #include <vector>
#include "paddle/fluid/inference/capi/c_api.h" #include "paddle/fluid/inference/capi/c_api.h"
#include "paddle/fluid/inference/tests/api/tester_helper.h" #include "paddle/fluid/inference/tests/api/tester_helper.h"
...@@ -27,7 +26,6 @@ namespace paddle { ...@@ -27,7 +26,6 @@ namespace paddle {
namespace inference { namespace inference {
namespace analysis { namespace analysis {
template <typename T>
void zero_copy_run() { void zero_copy_run() {
std::string model_dir = FLAGS_infer_model; std::string model_dir = FLAGS_infer_model;
PD_AnalysisConfig *config = PD_NewAnalysisConfig(); PD_AnalysisConfig *config = PD_NewAnalysisConfig();
...@@ -46,26 +44,15 @@ void zero_copy_run() { ...@@ -46,26 +44,15 @@ void zero_copy_run() {
const int channels = 3; const int channels = 3;
const int height = 224; const int height = 224;
const int width = 224; const int width = 224;
T input[batch_size * channels * height * width] = {0}; float input[batch_size * channels * height * width] = {0};
int shape[4] = {batch_size, channels, height, width}; int shape[4] = {batch_size, channels, height, width};
int shape_size = 4; int shape_size = 4;
int in_size = 2; int in_size = 2;
int *out_size; int out_size;
PD_ZeroCopyData *inputs = new PD_ZeroCopyData[2]; PD_ZeroCopyData *inputs = new PD_ZeroCopyData[2];
PD_ZeroCopyData *outputs = new PD_ZeroCopyData; PD_ZeroCopyData *outputs = nullptr;
inputs[0].data = static_cast<void *>(input); inputs[0].data = static_cast<void *>(input);
std::string nm = typeid(T).name();
if ("f" == nm) {
inputs[0].dtype = PD_FLOAT32; inputs[0].dtype = PD_FLOAT32;
} else if ("i" == nm) {
inputs[0].dtype = PD_INT32;
} else if ("x" == nm) {
inputs[0].dtype = PD_INT64;
} else if ("h" == nm) {
inputs[0].dtype = PD_UINT8;
} else {
CHECK(false) << "Unsupport dtype. ";
}
inputs[0].name = new char[6]; inputs[0].name = new char[6];
inputs[0].name[0] = 'i'; inputs[0].name[0] = 'i';
inputs[0].name[1] = 'm'; inputs[0].name[1] = 'm';
...@@ -94,15 +81,24 @@ void zero_copy_run() { ...@@ -94,15 +81,24 @@ void zero_copy_run() {
PD_PredictorZeroCopyRun(config, inputs, in_size, &outputs, &out_size); PD_PredictorZeroCopyRun(config, inputs, in_size, &outputs, &out_size);
LOG(INFO) << "output size is: " << out_size;
LOG(INFO) << outputs[0].name; LOG(INFO) << outputs[0].name;
LOG(INFO) << outputs[0].shape_size; for (int j = 0; j < out_size; ++j) {
LOG(INFO) << "output[" << j
<< "]'s shape_size is: " << outputs[j].shape_size;
for (int i = 0; i < outputs[0].shape_size; ++i) {
LOG(INFO) << "output[" << j << "]'s shape is: " << outputs[j].shape[i];
}
LOG(INFO) << "output[" << j
<< "]'s DATA is: " << *(static_cast<float *>(outputs[j].data));
}
delete[] outputs;
delete[] inputs;
} }
TEST(PD_ZeroCopyRun, zero_copy_run) { #ifdef PADDLE_WITH_MKLDNN
// zero_copy_run<int32_t>(); TEST(PD_ZeroCopyRun, zero_copy_run) { zero_copy_run(); }
// zero_copy_run<int64_t>(); #endif
zero_copy_run<float>();
}
} // namespace analysis } // namespace analysis
} // namespace inference } // namespace inference
......
...@@ -18,7 +18,6 @@ limitations under the License. */ ...@@ -18,7 +18,6 @@ limitations under the License. */
#include <fstream> #include <fstream>
#include <iostream> #include <iostream>
#include <string> #include <string>
#include <typeinfo>
#include <vector> #include <vector>
#include "paddle/fluid/inference/capi/c_api.h" #include "paddle/fluid/inference/capi/c_api.h"
#include "paddle/fluid/inference/tests/api/tester_helper.h" #include "paddle/fluid/inference/tests/api/tester_helper.h"
...@@ -27,16 +26,17 @@ namespace paddle { ...@@ -27,16 +26,17 @@ namespace paddle {
namespace inference { namespace inference {
namespace analysis { namespace analysis {
template <typename T>
void zero_copy_run() { void zero_copy_run() {
std::string model_dir = FLAGS_infer_model + "/mobilenet"; std::string model_dir = FLAGS_infer_model;
std::string prog_file = model_dir + "/model";
std::string params_file = model_dir + "/params";
PD_AnalysisConfig *config = PD_NewAnalysisConfig(); PD_AnalysisConfig *config = PD_NewAnalysisConfig();
PD_DisableGpu(config); PD_DisableGpu(config);
PD_SetCpuMathLibraryNumThreads(config, 10); PD_SetCpuMathLibraryNumThreads(config, 10);
PD_SwitchUseFeedFetchOps(config, false); PD_SwitchUseFeedFetchOps(config, false);
PD_SwitchSpecifyInputNames(config, true); PD_SwitchSpecifyInputNames(config, true);
PD_SwitchIrDebug(config, true); PD_SwitchIrDebug(config, true);
PD_SetModel(config, model_dir.c_str()); //, params_file1.c_str()); PD_SetModel(config, prog_file.c_str(), params_file.c_str());
bool use_feed_fetch = PD_UseFeedFetchOpsEnabled(config); bool use_feed_fetch = PD_UseFeedFetchOpsEnabled(config);
CHECK(!use_feed_fetch) << "NO"; CHECK(!use_feed_fetch) << "NO";
bool specify_input_names = PD_SpecifyInputName(config); bool specify_input_names = PD_SpecifyInputName(config);
...@@ -44,44 +44,40 @@ void zero_copy_run() { ...@@ -44,44 +44,40 @@ void zero_copy_run() {
const int batch_size = 1; const int batch_size = 1;
const int channels = 3; const int channels = 3;
const int height = 224; const int height = 318;
const int width = 224; const int width = 318;
T input[batch_size * channels * height * width] = {0}; float input[batch_size * channels * height * width] = {0};
int shape[4] = {batch_size, channels, height, width}; int shape[4] = {batch_size, channels, height, width};
int shape_size = 4; int shape_size = 4;
int in_size = 1; int in_size = 1;
int *out_size; int out_size;
PD_ZeroCopyData *inputs = new PD_ZeroCopyData; PD_ZeroCopyData *inputs = new PD_ZeroCopyData;
PD_ZeroCopyData *outputs = new PD_ZeroCopyData; PD_ZeroCopyData *outputs = new PD_ZeroCopyData;
inputs->data = static_cast<void *>(input); inputs->data = static_cast<void *>(input);
std::string nm = typeid(T).name();
if ("f" == nm) {
inputs->dtype = PD_FLOAT32; inputs->dtype = PD_FLOAT32;
} else if ("i" == nm) { inputs->name = new char[5];
inputs->dtype = PD_INT32; inputs->name[0] = 'd';
} else if ("x" == nm) { inputs->name[1] = 'a';
inputs->dtype = PD_INT64; inputs->name[2] = 't';
} else if ("h" == nm) { inputs->name[3] = 'a';
inputs->dtype = PD_UINT8; inputs->name[4] = '\0';
} else {
CHECK(false) << "Unsupport dtype. ";
}
inputs->name = new char[2];
inputs->name[0] = 'x';
inputs->name[1] = '\0';
LOG(INFO) << inputs->name;
inputs->shape = shape; inputs->shape = shape;
inputs->shape_size = shape_size; inputs->shape_size = shape_size;
PD_PredictorZeroCopyRun(config, inputs, in_size, &outputs, &out_size); PD_PredictorZeroCopyRun(config, inputs, in_size, &outputs, &out_size);
delete[] inputs;
delete[] outputs;
} }
TEST(PD_ZeroCopyRun, zero_copy_run) { zero_copy_run<float>(); } TEST(PD_ZeroCopyRun, zero_copy_run) { zero_copy_run(); }
#ifdef PADDLE_WITH_MKLDNN #ifdef PADDLE_WITH_MKLDNN
TEST(PD_AnalysisConfig, profile_mkldnn) { TEST(PD_AnalysisConfig, profile_mkldnn) {
std::string model_dir = FLAGS_infer_model + "/mobilenet"; std::string model_dir = FLAGS_infer_model;
std::string prog_file = model_dir + "/model";
std::string params_file = model_dir + "/params";
PD_AnalysisConfig *config = PD_NewAnalysisConfig(); PD_AnalysisConfig *config = PD_NewAnalysisConfig();
PD_DisableGpu(config); PD_DisableGpu(config);
PD_SetCpuMathLibraryNumThreads(config, 10); PD_SetCpuMathLibraryNumThreads(config, 10);
...@@ -95,7 +91,7 @@ TEST(PD_AnalysisConfig, profile_mkldnn) { ...@@ -95,7 +91,7 @@ TEST(PD_AnalysisConfig, profile_mkldnn) {
bool quantizer_enable = PD_MkldnnQuantizerEnabled(config); bool quantizer_enable = PD_MkldnnQuantizerEnabled(config);
CHECK(quantizer_enable) << "NO"; CHECK(quantizer_enable) << "NO";
PD_SetMkldnnCacheCapacity(config, 0); PD_SetMkldnnCacheCapacity(config, 0);
PD_SetModel(config, model_dir.c_str()); PD_SetModel(config, prog_file.c_str(), params_file.c_str());
PD_EnableAnakinEngine(config); PD_EnableAnakinEngine(config);
bool anakin_enable = PD_AnakinEngineEnabled(config); bool anakin_enable = PD_AnakinEngineEnabled(config);
LOG(INFO) << anakin_enable; LOG(INFO) << anakin_enable;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册