diff --git a/paddle/fluid/inference/capi/c_api.h b/paddle/fluid/inference/capi/c_api.h index b5ef410aadabe46c856261e5b8b9cee1c661b2df..7a36587398c3e384003b227b93c7088b4c026da6 100644 --- a/paddle/fluid/inference/capi/c_api.h +++ b/paddle/fluid/inference/capi/c_api.h @@ -104,7 +104,7 @@ PADDLE_CAPI_EXPORT extern bool PD_PredictorRun(const PD_AnalysisConfig* config, PADDLE_CAPI_EXPORT extern bool PD_PredictorZeroCopyRun( const PD_AnalysisConfig* config, PD_ZeroCopyData* inputs, int in_size, - PD_ZeroCopyData** output, int** out_size); + PD_ZeroCopyData** output, int* out_size); // AnalysisConfig enum Precision { kFloat32 = 0, kInt8, kHalf }; diff --git a/paddle/fluid/inference/capi/pd_predictor.cc b/paddle/fluid/inference/capi/pd_predictor.cc index 51f8237c95afa6ab8bc151f85401944799da9e3a..d4d5e9c35f9ee5e06af8ea73e624b33d3c1a2a7a 100644 --- a/paddle/fluid/inference/capi/pd_predictor.cc +++ b/paddle/fluid/inference/capi/pd_predictor.cc @@ -14,6 +14,7 @@ #include #include +#include #include #include #include "paddle/fluid/inference/capi/c_api.h" @@ -23,12 +24,61 @@ using paddle::ConvertToPaddleDType; using paddle::ConvertToPDDataType; using paddle::ConvertToACPrecision; -extern "C" { +namespace { +#define _DataTypeHelper_(CALLBACK, CPP_TYPE, PD_TYPE) \ + CALLBACK(CPP_TYPE, PD_DataType::PD_TYPE); + +#define _DataType_(CALLBACK) \ + _DataTypeHelper_(CALLBACK, float, PD_FLOAT32); \ + _DataTypeHelper_(CALLBACK, int32_t, PD_INT32); \ + _DataTypeHelper_(CALLBACK, int64_t, PD_INT64); \ + _DataTypeHelper_(CALLBACK, uint8_t, PD_UINT8); + +template +inline void VisitDataType(PD_DataType type, Visitor visitor) { +#define VisitDataTypeCallback(CPP_TYPE, PD_TYPE) \ + do { \ + if (type == PD_TYPE) { \ + visitor.template apply(); \ + return; \ + } \ + } while (0) + + _DataType_(VisitDataTypeCallback); +#undef VisitDataTypeCallback + PADDLE_THROW_ERROR("Unsupported data type. "); +} + +struct PD_ZeroCopyFunctor { + PD_ZeroCopyData* output_i; + paddle::ZeroCopyTensor* output_t; + PD_ZeroCopyFunctor(PD_ZeroCopyData* output_i_, + paddle::ZeroCopyTensor* output_t_) + : output_i(output_i_), output_t(output_t_) {} + + template + void apply() { + std::vector out_data; + int out_num = + std::accumulate(output_i->shape, output_i->shape + output_i->shape_size, + 1, std::multiplies()); + out_data.resize(out_num); + output_t->copy_to_cpu(out_data.data()); + output_i->data = reinterpret_cast(malloc(out_num * sizeof(OutT))); + memmove(static_cast(output_i->data), out_data.data(), + out_num * sizeof(OutT)); + } +}; + +} // namespace + +extern "C" { bool PD_PredictorRun(const PD_AnalysisConfig* config, PD_Tensor* inputs, int in_size, PD_Tensor** output_data, int* out_size, int batch_size) { PADDLE_ENFORCE_NOT_NULL(config); + VLOG(3) << "Predoctor: PD_PredictorRun. "; static std::map> predictors; if (!predictors.count(config->config.model_dir())) { @@ -41,6 +91,7 @@ bool PD_PredictorRun(const PD_AnalysisConfig* config, PD_Tensor* inputs, in.emplace_back(inputs->tensor); } std::vector out; + VLOG(3) << "Run predictor in CAPI encapsulation. "; if (predictor->Run(in, &out, batch_size)) { int osize = out.size(); *output_data = new PD_Tensor[osize]; @@ -55,9 +106,15 @@ bool PD_PredictorRun(const PD_AnalysisConfig* config, PD_Tensor* inputs, bool PD_PredictorZeroCopyRun(const PD_AnalysisConfig* config, PD_ZeroCopyData* inputs, int in_size, - PD_ZeroCopyData** output, int** out_size) { + PD_ZeroCopyData** output, int* out_size) { PADDLE_ENFORCE_NOT_NULL(config); - auto predictor = paddle::CreatePaddlePredictor(config->config); + static std::map> + predictors; + if (!predictors.count(config->config.model_dir())) { + predictors[config->config.model_dir()] = + paddle::CreatePaddlePredictor(config->config); + } + auto& predictor = predictors[config->config.model_dir()]; auto input_names = predictor->GetInputNames(); VLOG(3) << "The inputs' size is " << input_names.size(); PADDLE_ENFORCE_EQ( @@ -87,13 +144,14 @@ bool PD_PredictorZeroCopyRun(const PD_AnalysisConfig* config, break; } } + VLOG(3) << "Run ZeroCopyRun() in CAPI encapsulation. "; CHECK(predictor->ZeroCopyRun()); auto output_names = predictor->GetOutputNames(); int osize = output_names.size(); - *out_size = &osize; + *out_size = osize; *output = new PD_ZeroCopyData[osize]; VLOG(3) << "The output size is " << osize; - for (int i = 0; i < osize; ++i) { + for (int i = 0; i < *out_size; ++i) { auto& output_i = (*output)[i]; output_i.name = new char[output_names[i].length() + 1]; snprintf(output_i.name, output_names[i].length() + 1, "%s", @@ -102,45 +160,11 @@ bool PD_PredictorZeroCopyRun(const PD_AnalysisConfig* config, output_i.dtype = ConvertToPDDataType(output_t->type()); std::vector output_shape = output_t->shape(); output_i.shape = new int[output_shape.size()]; - output_i.shape = output_shape.data(); + memmove(output_i.shape, output_shape.data(), + output_shape.size() * sizeof(int)); output_i.shape_size = output_shape.size(); - switch (output_i.dtype) { - case PD_FLOAT32: { - std::vector out_data; - int out_num = std::accumulate(output_shape.begin(), output_shape.end(), - 1, std::multiplies()); - out_data.resize(out_num); - output_t->copy_to_cpu(out_data.data()); - output_i.data = static_cast(out_data.data()); - } break; - case PD_INT32: { - std::vector out_data; - int out_num = std::accumulate(output_shape.begin(), output_shape.end(), - 1, std::multiplies()); - out_data.resize(out_num); - output_t->copy_to_cpu(out_data.data()); - output_i.data = static_cast(out_data.data()); - } break; - case PD_INT64: { - std::vector out_data; - int out_num = std::accumulate(output_shape.begin(), output_shape.end(), - 1, std::multiplies()); - out_data.resize(out_num); - output_t->copy_to_cpu(out_data.data()); - output_i.data = static_cast(out_data.data()); - } break; - case PD_UINT8: { - std::vector out_data; - int out_num = std::accumulate(output_shape.begin(), output_shape.end(), - 1, std::multiplies()); - out_data.resize(out_num); - output_t->copy_to_cpu(out_data.data()); - output_i.data = static_cast(out_data.data()); - } break; - default: - CHECK(false) << "Unsupport data type."; - break; - } + VisitDataType(output_i.dtype, + PD_ZeroCopyFunctor(&output_i, std::move(output_t.get()))); } return true; } diff --git a/paddle/fluid/inference/tests/api/CMakeLists.txt b/paddle/fluid/inference/tests/api/CMakeLists.txt index 715350c52f73946cd06957e9151a0b123c443dcf..b4fc327d6794d2881e75b23c206522bb09c63f36 100644 --- a/paddle/fluid/inference/tests/api/CMakeLists.txt +++ b/paddle/fluid/inference/tests/api/CMakeLists.txt @@ -313,13 +313,9 @@ if(WITH_GPU AND TENSORRT_FOUND) ARGS --infer_model=${TRT_MODEL_QUANT_RESNET_DIR}) endif() -set(CAPI_MODEL_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/capi_tests_models") -if (NOT EXISTS ${CAPI_MODEL_INSTALL_DIR}) - inference_download_and_uncompress(${CAPI_MODEL_INSTALL_DIR} ${INFERENCE_URL}/tensorrt_test "trt_inference_test_models.tar.gz") -endif() inference_analysis_test(test_analyzer_capi SRCS analyzer_capi_tester.cc EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} paddle_fluid_c - ARGS --infer_model=${CAPI_MODEL_INSTALL_DIR}/trt_inference_test_models) + ARGS --infer_model=${RESNET50_MODEL_DIR}/model) inference_analysis_test(test_analyzer_capi_pd_tensor SRCS analyzer_capi_pd_tensor_tester.cc EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} paddle_fluid_c diff --git a/paddle/fluid/inference/tests/api/analyzer_capi_int_tester.cc b/paddle/fluid/inference/tests/api/analyzer_capi_int_tester.cc index 58d0053662e2c72e5e26a3ad1422c1c0d6e6e7ef..06d56ec6c97c9ad3c7f6f63bd6489db734e3335e 100644 --- a/paddle/fluid/inference/tests/api/analyzer_capi_int_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_capi_int_tester.cc @@ -18,7 +18,6 @@ limitations under the License. */ #include #include #include -#include #include #include "paddle/fluid/inference/capi/c_api.h" #include "paddle/fluid/inference/tests/api/tester_helper.h" @@ -27,7 +26,6 @@ namespace paddle { namespace inference { namespace analysis { -template void zero_copy_run() { std::string model_dir = FLAGS_infer_model; PD_AnalysisConfig *config = PD_NewAnalysisConfig(); @@ -46,26 +44,15 @@ void zero_copy_run() { const int channels = 3; const int height = 224; const int width = 224; - T input[batch_size * channels * height * width] = {0}; + float input[batch_size * channels * height * width] = {0}; int shape[4] = {batch_size, channels, height, width}; int shape_size = 4; int in_size = 2; - int *out_size; + int out_size; PD_ZeroCopyData *inputs = new PD_ZeroCopyData[2]; - PD_ZeroCopyData *outputs = new PD_ZeroCopyData; + PD_ZeroCopyData *outputs = nullptr; inputs[0].data = static_cast(input); - std::string nm = typeid(T).name(); - if ("f" == nm) { - inputs[0].dtype = PD_FLOAT32; - } else if ("i" == nm) { - inputs[0].dtype = PD_INT32; - } else if ("x" == nm) { - inputs[0].dtype = PD_INT64; - } else if ("h" == nm) { - inputs[0].dtype = PD_UINT8; - } else { - CHECK(false) << "Unsupport dtype. "; - } + inputs[0].dtype = PD_FLOAT32; inputs[0].name = new char[6]; inputs[0].name[0] = 'i'; inputs[0].name[1] = 'm'; @@ -94,15 +81,24 @@ void zero_copy_run() { PD_PredictorZeroCopyRun(config, inputs, in_size, &outputs, &out_size); + LOG(INFO) << "output size is: " << out_size; LOG(INFO) << outputs[0].name; - LOG(INFO) << outputs[0].shape_size; + for (int j = 0; j < out_size; ++j) { + LOG(INFO) << "output[" << j + << "]'s shape_size is: " << outputs[j].shape_size; + for (int i = 0; i < outputs[0].shape_size; ++i) { + LOG(INFO) << "output[" << j << "]'s shape is: " << outputs[j].shape[i]; + } + LOG(INFO) << "output[" << j + << "]'s DATA is: " << *(static_cast(outputs[j].data)); + } + delete[] outputs; + delete[] inputs; } -TEST(PD_ZeroCopyRun, zero_copy_run) { - // zero_copy_run(); - // zero_copy_run(); - zero_copy_run(); -} +#ifdef PADDLE_WITH_MKLDNN +TEST(PD_ZeroCopyRun, zero_copy_run) { zero_copy_run(); } +#endif } // namespace analysis } // namespace inference diff --git a/paddle/fluid/inference/tests/api/analyzer_capi_tester.cc b/paddle/fluid/inference/tests/api/analyzer_capi_tester.cc index 7aa85cec14eac152db99a791466a4f4ee4207ae1..ee4725f1c551d06867562fa7f50cc2979569e0ec 100644 --- a/paddle/fluid/inference/tests/api/analyzer_capi_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_capi_tester.cc @@ -18,7 +18,6 @@ limitations under the License. */ #include #include #include -#include #include #include "paddle/fluid/inference/capi/c_api.h" #include "paddle/fluid/inference/tests/api/tester_helper.h" @@ -27,16 +26,17 @@ namespace paddle { namespace inference { namespace analysis { -template void zero_copy_run() { - std::string model_dir = FLAGS_infer_model + "/mobilenet"; + std::string model_dir = FLAGS_infer_model; + std::string prog_file = model_dir + "/model"; + std::string params_file = model_dir + "/params"; PD_AnalysisConfig *config = PD_NewAnalysisConfig(); PD_DisableGpu(config); PD_SetCpuMathLibraryNumThreads(config, 10); PD_SwitchUseFeedFetchOps(config, false); PD_SwitchSpecifyInputNames(config, true); PD_SwitchIrDebug(config, true); - PD_SetModel(config, model_dir.c_str()); //, params_file1.c_str()); + PD_SetModel(config, prog_file.c_str(), params_file.c_str()); bool use_feed_fetch = PD_UseFeedFetchOpsEnabled(config); CHECK(!use_feed_fetch) << "NO"; bool specify_input_names = PD_SpecifyInputName(config); @@ -44,44 +44,40 @@ void zero_copy_run() { const int batch_size = 1; const int channels = 3; - const int height = 224; - const int width = 224; - T input[batch_size * channels * height * width] = {0}; + const int height = 318; + const int width = 318; + float input[batch_size * channels * height * width] = {0}; int shape[4] = {batch_size, channels, height, width}; int shape_size = 4; int in_size = 1; - int *out_size; + int out_size; PD_ZeroCopyData *inputs = new PD_ZeroCopyData; PD_ZeroCopyData *outputs = new PD_ZeroCopyData; inputs->data = static_cast(input); - std::string nm = typeid(T).name(); - if ("f" == nm) { - inputs->dtype = PD_FLOAT32; - } else if ("i" == nm) { - inputs->dtype = PD_INT32; - } else if ("x" == nm) { - inputs->dtype = PD_INT64; - } else if ("h" == nm) { - inputs->dtype = PD_UINT8; - } else { - CHECK(false) << "Unsupport dtype. "; - } - inputs->name = new char[2]; - inputs->name[0] = 'x'; - inputs->name[1] = '\0'; - LOG(INFO) << inputs->name; + inputs->dtype = PD_FLOAT32; + inputs->name = new char[5]; + inputs->name[0] = 'd'; + inputs->name[1] = 'a'; + inputs->name[2] = 't'; + inputs->name[3] = 'a'; + inputs->name[4] = '\0'; inputs->shape = shape; inputs->shape_size = shape_size; PD_PredictorZeroCopyRun(config, inputs, in_size, &outputs, &out_size); + + delete[] inputs; + delete[] outputs; } -TEST(PD_ZeroCopyRun, zero_copy_run) { zero_copy_run(); } +TEST(PD_ZeroCopyRun, zero_copy_run) { zero_copy_run(); } #ifdef PADDLE_WITH_MKLDNN TEST(PD_AnalysisConfig, profile_mkldnn) { - std::string model_dir = FLAGS_infer_model + "/mobilenet"; + std::string model_dir = FLAGS_infer_model; + std::string prog_file = model_dir + "/model"; + std::string params_file = model_dir + "/params"; PD_AnalysisConfig *config = PD_NewAnalysisConfig(); PD_DisableGpu(config); PD_SetCpuMathLibraryNumThreads(config, 10); @@ -95,7 +91,7 @@ TEST(PD_AnalysisConfig, profile_mkldnn) { bool quantizer_enable = PD_MkldnnQuantizerEnabled(config); CHECK(quantizer_enable) << "NO"; PD_SetMkldnnCacheCapacity(config, 0); - PD_SetModel(config, model_dir.c_str()); + PD_SetModel(config, prog_file.c_str(), params_file.c_str()); PD_EnableAnakinEngine(config); bool anakin_enable = PD_AnakinEngineEnabled(config); LOG(INFO) << anakin_enable;