From 68e0560c2f607f59f579d6bdb52c37bce5cfa8a2 Mon Sep 17 00:00:00 2001 From: Wilber Date: Fri, 28 Aug 2020 20:01:50 +0800 Subject: [PATCH] refine paddle inference api (#26774) * refine paddle inference api Co-authored-by: nhzlx --- paddle/fluid/inference/api/analysis_config.cc | 5 +- .../fluid/inference/api/analysis_predictor.cc | 122 +++++++++++++++++- paddle/fluid/inference/api/api.cc | 6 + paddle/fluid/inference/api/api_impl.cc | 4 + paddle/fluid/inference/api/paddle_api.h | 6 +- .../inference/api/paddle_inference_api.h | 115 +++++++++++++++++ .../fluid/inference/tests/api/CMakeLists.txt | 6 + .../inference/tests/api/lite_resnet50_test.cc | 56 ++++++++ .../tests/api/paddle_infer_api_test.cc | 95 ++++++++++++++ .../inference/tests/api/trt_mobilenet_test.cc | 26 +++- paddle/fluid/pybind/inference_api.cc | 4 +- 11 files changed, 435 insertions(+), 10 deletions(-) create mode 100644 paddle/fluid/inference/tests/api/paddle_infer_api_test.cc diff --git a/paddle/fluid/inference/api/analysis_config.cc b/paddle/fluid/inference/api/analysis_config.cc index a1c1e6de5fd..9fbc97d5509 100644 --- a/paddle/fluid/inference/api/analysis_config.cc +++ b/paddle/fluid/inference/api/analysis_config.cc @@ -15,7 +15,6 @@ #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/scope.h" #include "paddle/fluid/inference/api/paddle_analysis_config.h" -#include "paddle/fluid/inference/api/paddle_inference_api.h" #include "paddle/fluid/inference/api/paddle_pass_builder.h" #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/gpu_info.h" @@ -103,8 +102,8 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) { // params_file_ fields. CP_MEMBER(opt_cache_dir_); - prog_file_ = std::move(other.prog_file_); - params_file_ = std::move(other.params_file_); + CP_MEMBER(prog_file_); + CP_MEMBER(params_file_); CP_MEMBER(use_fc_padding_); // GPU related. diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index a8c8058c6b7..127a41aee89 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -32,7 +32,6 @@ #include "paddle/fluid/inference/analysis/helper.h" #include "paddle/fluid/inference/analysis/passes/memory_optimize_pass.h" #include "paddle/fluid/inference/api/helper.h" -#include "paddle/fluid/inference/api/paddle_inference_api.h" #include "paddle/fluid/inference/api/paddle_inference_pass.h" #include "paddle/fluid/inference/utils/singleton.h" #include "paddle/fluid/memory/memcpy.h" @@ -517,6 +516,8 @@ void AnalysisPredictor::OptimizeInferenceProgram() { template <> std::unique_ptr CreatePaddlePredictor< AnalysisConfig, PaddleEngineKind::kAnalysis>(const AnalysisConfig &config) { + // TODO(NHZlX): Should add the link to the doc of + // paddle_infer::CreatePredictor if (config.glog_info_disabled()) { FLAGS_logtostderr = 1; FLAGS_minloglevel = 2; // GLOG_ERROR @@ -1058,3 +1059,122 @@ USE_TRT_CONVERTER(skip_layernorm); USE_TRT_CONVERTER(slice); USE_TRT_CONVERTER(scale); #endif + +namespace paddle_infer { + +void Tensor::Reshape(const std::vector &shape) { tensor_->Reshape(shape); } + +std::vector Tensor::shape() const { return tensor_->shape(); } + +void Tensor::SetLoD(const std::vector> &x) { + return tensor_->SetLoD(x); +} + +std::vector> Tensor::lod() const { return tensor_->lod(); } + +const std::string &Tensor::name() const { return tensor_->name(); } + +DataType Tensor::type() const { return tensor_->type(); } + +Predictor::Predictor(const Config &config) { + const_cast(&config)->SwitchUseFeedFetchOps(false); + // The second parameter indicates that the discard log is not printed + predictor_ = paddle::CreatePaddlePredictor< + Config, paddle::PaddleEngineKind::kAnalysis>(config); +} + +std::vector Predictor::GetInputNames() { + return predictor_->GetInputNames(); +} + +std::unique_ptr Predictor::GetInputHandle(const std::string &name) { + auto zero_copy_tensor = predictor_->GetInputTensor(name); + std::unique_ptr tensor(new Tensor(std::move(zero_copy_tensor))); + return tensor; +} + +std::vector Predictor::GetOutputNames() { + return predictor_->GetOutputNames(); +} + +std::unique_ptr Predictor::GetOutputHandle(const std::string &name) { + auto zero_copy_tensor = predictor_->GetOutputTensor(name); + std::unique_ptr tensor(new Tensor(std::move(zero_copy_tensor))); + return tensor; +} + +bool Predictor::Run() { return predictor_->ZeroCopyRun(); } + +std::unique_ptr Predictor::Clone() { + auto analysis_pred = predictor_->Clone(); + std::unique_ptr pred(new Predictor(std::move(analysis_pred))); + return pred; +} + +void Predictor::ClearIntermediateTensor() { + predictor_->ClearIntermediateTensor(); +} + +int GetNumBytesOfDataType(DataType dtype) { + switch (dtype) { + case DataType::FLOAT32: + return sizeof(float); + case DataType::INT64: + return sizeof(int64_t); + case DataType::INT32: + return sizeof(int32_t); + case DataType::UINT8: + return sizeof(uint8_t); + default: + assert(false); + return -1; + } +} + +std::string GetVersion() { return paddle::get_version(); } + +std::string UpdateDllFlag(const char *name, const char *value) { + return paddle::UpdateDllFlag(name, value); +} + +} // namespace paddle_infer + +namespace paddle_infer { +std::shared_ptr CreatePredictor(const Config &config) { // NOLINT + std::shared_ptr predictor(new Predictor(config)); + return predictor; +} + +namespace services { +PredictorPool::PredictorPool(const Config &config, size_t size) { + PADDLE_ENFORCE_GE( + size, 1UL, + paddle::platform::errors::InvalidArgument( + "The predictor pool size should be greater than 1, but it's (%d)", + size)); + Config copy_config(config); + main_pred_.reset(new Predictor(config)); + for (size_t i = 0; i < size - 1; i++) { + if (config.tensorrt_engine_enabled()) { + Config config_tmp(copy_config); + preds_.push_back( + std::move(std::unique_ptr(new Predictor(config_tmp)))); + } else { + preds_.push_back(std::move(main_pred_->Clone())); + } + } +} + +Predictor *PredictorPool::Retrive(size_t idx) { + PADDLE_ENFORCE_LT( + idx, preds_.size() + 1, + paddle::platform::errors::InvalidArgument( + "There are (%d) predictors in the pool, but the idx is (%d)", idx, + preds_.size() + 1)); + if (idx == 0) { + return main_pred_.get(); + } + return preds_[idx - 1].get(); +} +} // namespace services +} // namespace paddle_infer diff --git a/paddle/fluid/inference/api/api.cc b/paddle/fluid/inference/api/api.cc index 458eecfeea6..2f608da531f 100644 --- a/paddle/fluid/inference/api/api.cc +++ b/paddle/fluid/inference/api/api.cc @@ -112,6 +112,12 @@ void PaddleBuf::Free() { } } +NativeConfig::NativeConfig() { + LOG(WARNING) << "The paddle::NativeConfig interface is going to be " + "deprecated in the next release, plase use the latest " + "paddle_infer::Config instead."; +} + std::string get_version() { std::stringstream ss; ss << "version: " << framework::paddle_version() << "\n"; diff --git a/paddle/fluid/inference/api/api_impl.cc b/paddle/fluid/inference/api/api_impl.cc index 3d5b40c93da..07d6dcf86e9 100644 --- a/paddle/fluid/inference/api/api_impl.cc +++ b/paddle/fluid/inference/api/api_impl.cc @@ -15,6 +15,7 @@ limitations under the License. */ #include #include #include +#include #include #include #include @@ -25,6 +26,7 @@ limitations under the License. */ #include "paddle/fluid/inference/api/api_impl.h" #include "paddle/fluid/inference/api/details/reset_tensor_array.h" #include "paddle/fluid/inference/api/helper.h" +#include "paddle/fluid/inference/api/paddle_inference_api.h" #include "paddle/fluid/memory/memcpy.h" #include "paddle/fluid/platform/cpu_helper.h" #include "paddle/fluid/platform/profiler.h" @@ -311,6 +313,8 @@ bool NativePaddlePredictor::GetFetch(std::vector *outputs, template <> std::unique_ptr CreatePaddlePredictor< NativeConfig, PaddleEngineKind::kNative>(const NativeConfig &config) { + // TODO(NHZlX): Should add the link to the doc of + // paddle_infer::CreatePredictor VLOG(3) << "create NativePaddlePredictor"; if (config.use_gpu) { // 1. GPU memory diff --git a/paddle/fluid/inference/api/paddle_api.h b/paddle/fluid/inference/api/paddle_api.h index 386d20103a7..064f6354268 100644 --- a/paddle/fluid/inference/api/paddle_api.h +++ b/paddle/fluid/inference/api/paddle_api.h @@ -347,6 +347,7 @@ class PD_INFER_DECL PaddlePredictor { /// place of inference, etc.) /// struct PD_INFER_DECL NativeConfig : public PaddlePredictor::Config { + NativeConfig(); /// GPU related fields. bool use_gpu{false}; int device{0}; @@ -421,7 +422,8 @@ enum class PaddleEngineKind { }; template -std::unique_ptr CreatePaddlePredictor(const ConfigT& config); +PD_INFER_DECL std::unique_ptr CreatePaddlePredictor( + const ConfigT& config); template <> PD_INFER_DECL std::unique_ptr CreatePaddlePredictor< @@ -437,6 +439,4 @@ PD_INFER_DECL std::string get_version(); PD_INFER_DECL std::string UpdateDllFlag(const char* name, const char* value); -PD_INFER_DECL std::shared_ptr MakeCipher( - const std::string& config_file); } // namespace paddle diff --git a/paddle/fluid/inference/api/paddle_inference_api.h b/paddle/fluid/inference/api/paddle_inference_api.h index 6f30ad95f16..da5d7411693 100644 --- a/paddle/fluid/inference/api/paddle_inference_api.h +++ b/paddle/fluid/inference/api/paddle_inference_api.h @@ -22,9 +22,124 @@ limitations under the License. */ #pragma once #include +#include #include #include +#include #include #include "paddle_analysis_config.h" // NOLINT #include "paddle_api.h" // NOLINT + +namespace paddle_infer { +using DataType = paddle::PaddleDType; +using PlaceType = paddle::PaddlePlace; +using PrecisionType = paddle::AnalysisConfig::Precision; +using Config = paddle::AnalysisConfig; + +class PD_INFER_DECL Tensor { + public: + // Can only be created by predictor->GetInputHandle(cosnt std::string& name) + // or predictor->GetOutputHandle(cosnt std::string& name) + Tensor() = delete; + explicit Tensor(std::unique_ptr&& tensor) + : tensor_(std::move(tensor)) {} + void Reshape(const std::vector& shape); + + template + void CopyFromCpu(const T* data); + + // should add the place + template + T* mutable_data(PlaceType place); + + template + void CopyToCpu(T* data); + + template + T* data(PlaceType* place, int* size) const; + + void SetLoD(const std::vector>& x); + std::vector> lod() const; + + DataType type() const; + + std::vector shape() const; + const std::string& name() const; + + private: + std::unique_ptr tensor_; +}; + +class PD_INFER_DECL Predictor { + public: + Predictor() = default; + ~Predictor() {} + // Use for clone + explicit Predictor(std::unique_ptr&& pred) + : predictor_(std::move(pred)) {} + + explicit Predictor(const Config& config); + + std::vector GetInputNames(); + std::unique_ptr GetInputHandle(const std::string& name); + + bool Run(); + + std::vector GetOutputNames(); + std::unique_ptr GetOutputHandle(const std::string& name); + + std::unique_ptr Clone(); + void ClearIntermediateTensor(); + + private: + std::unique_ptr predictor_; +}; + +PD_INFER_DECL std::shared_ptr CreatePredictor( + const Config& config); // NOLINT +PD_INFER_DECL int GetNumBytesOfDataType(DataType dtype); + +PD_INFER_DECL std::string GetVersion(); +PD_INFER_DECL std::string UpdateDllFlag(const char* name, const char* value); + +template +void Tensor::CopyFromCpu(const T* data) { + tensor_->copy_from_cpu(data); +} + +template +void Tensor::CopyToCpu(T* data) { + return tensor_->copy_to_cpu(data); +} + +template +T* Tensor::mutable_data(PlaceType place) { + return tensor_->mutable_data(place); +} + +template +T* Tensor::data(PlaceType* place, int* size) const { + return tensor_->data(place, size); +} + +} // namespace paddle_infer + +namespace paddle_infer { +namespace services { + +class PD_INFER_DECL PredictorPool { + public: + PredictorPool() = delete; + PredictorPool(const PredictorPool&) = delete; + PredictorPool& operator=(const PredictorPool&) = delete; + + explicit PredictorPool(const Config& config, size_t size = 1); + Predictor* Retrive(size_t idx); + + private: + std::shared_ptr main_pred_; + std::vector> preds_; +}; +} // namespace services +} // namespace paddle_infer diff --git a/paddle/fluid/inference/tests/api/CMakeLists.txt b/paddle/fluid/inference/tests/api/CMakeLists.txt index 814deda6729..2bd30bc0517 100644 --- a/paddle/fluid/inference/tests/api/CMakeLists.txt +++ b/paddle/fluid/inference/tests/api/CMakeLists.txt @@ -515,3 +515,9 @@ if(WITH_MKLDNN) inference_analysis_test(test_analyzer_capi_ner SRCS analyzer_capi_ner_tester.cc EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} paddle_fluid_c ARGS --infer_model=${CHINESE_NER_INSTALL_DIR}/model) + +if(WITH_GPU) + inference_analysis_test(paddle_infer_api_test SRCS paddle_infer_api_test.cc + EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} + ARGS --infer_model=${RESNET50_MODEL_DIR}) +endif() diff --git a/paddle/fluid/inference/tests/api/lite_resnet50_test.cc b/paddle/fluid/inference/tests/api/lite_resnet50_test.cc index 5840a4c42b3..31701c59ec3 100644 --- a/paddle/fluid/inference/tests/api/lite_resnet50_test.cc +++ b/paddle/fluid/inference/tests/api/lite_resnet50_test.cc @@ -72,3 +72,59 @@ TEST(AnalysisPredictor, use_gpu) { } // namespace inference } // namespace paddle + +namespace paddle_infer { + +TEST(Predictor, use_gpu) { + std::string model_dir = FLAGS_infer_model + "/" + "model"; + Config config; + config.EnableUseGpu(100, 0); + config.SetModel(model_dir + "/model", model_dir + "/params"); + config.EnableLiteEngine(PrecisionType::kFloat32); + + auto predictor = CreatePredictor(config); + const int batch = 1; + const int channel = 3; + const int height = 318; + const int width = 318; + const int input_num = batch * channel * height * width; + std::vector input(input_num, 1); + + auto input_names = predictor->GetInputNames(); + auto input_t = predictor->GetInputHandle(input_names[0]); + + input_t->Reshape({1, 3, 318, 318}); + input_t->CopyFromCpu(input.data()); + predictor->Run(); + + auto output_names = predictor->GetOutputNames(); + auto output_t = predictor->GetOutputHandle(output_names[0]); + std::vector output_shape = output_t->shape(); + size_t out_num = std::accumulate(output_shape.begin(), output_shape.end(), 1, + std::multiplies()); + + std::vector out_data; + out_data.resize(out_num); + output_t->CopyToCpu(out_data.data()); + + const std::vector truth_values = { + 127.780396f, 738.16656f, 1013.2264f, -438.17206f, 366.4022f, + 927.66187f, 736.2241f, -633.68567f, -329.92737f, -430.15637f, + -633.0639f, -146.54858f, -1324.2804f, -1349.3661f, -242.67671f, + 117.44864f, -801.7251f, -391.51495f, -404.8202f, 454.16132f, + 515.48206f, -133.03114f, 69.293076f, 590.09753f, -1434.6917f, + -1070.8903f, 307.0744f, 400.52573f, -316.12177f, -587.1265f, + -161.05742f, 800.3663f, -96.47157f, 748.708f, 868.17645f, + -447.9403f, 112.73656f, 1127.1992f, 47.43518f, 677.7219f, + 593.1881f, -336.4011f, 551.3634f, 397.82474f, 78.39835f, + -715.4006f, 405.96988f, 404.25684f, 246.01978f, -8.430191f, + 131.36617f, -648.0528f}; + + float* data_o = out_data.data(); + for (size_t j = 0; j < out_num; j += 10) { + EXPECT_NEAR((data_o[j] - truth_values[j / 10]) / truth_values[j / 10], 0., + 10e-5); + } +} + +} // namespace paddle_infer diff --git a/paddle/fluid/inference/tests/api/paddle_infer_api_test.cc b/paddle/fluid/inference/tests/api/paddle_infer_api_test.cc new file mode 100644 index 00000000000..fee7c35581d --- /dev/null +++ b/paddle/fluid/inference/tests/api/paddle_infer_api_test.cc @@ -0,0 +1,95 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include +#include +#include +#include +#include + +#include "paddle/fluid/inference/tests/api/trt_test_helper.h" + +namespace paddle_infer { + +TEST(Predictor, use_gpu) { + LOG(INFO) << GetVersion(); + UpdateDllFlag("conv_workspace_size_limit", "4000"); + std::string model_dir = FLAGS_infer_model + "/model"; + Config config; + config.SetModel(model_dir + "/model", model_dir + "/params"); + config.EnableUseGpu(100, 0); + + auto predictor = CreatePredictor(config); + auto pred_clone = predictor->Clone(); + + std::vector in_shape = {1, 3, 318, 318}; + int in_num = std::accumulate(in_shape.begin(), in_shape.end(), 1, + [](int &a, int &b) { return a * b; }); + + std::vector input(in_num, 0); + + auto input_names = predictor->GetInputNames(); + auto input_t = predictor->GetInputHandle(input_names[0]); + + input_t->Reshape(in_shape); + input_t->CopyFromCpu(input.data()); + predictor->Run(); + + auto output_names = predictor->GetOutputNames(); + auto output_t = predictor->GetOutputHandle(output_names[0]); + std::vector output_shape = output_t->shape(); + int out_num = std::accumulate(output_shape.begin(), output_shape.end(), 1, + std::multiplies()); + + std::vector out_data; + out_data.resize(out_num); + output_t->CopyToCpu(out_data.data()); + predictor->ClearIntermediateTensor(); +} + +TEST(PredictorPool, basic) { + LOG(INFO) << GetVersion(); + UpdateDllFlag("conv_workspace_size_limit", "4000"); + std::string model_dir = FLAGS_infer_model + "/model"; + Config config; + config.SetModel(model_dir + "/model", model_dir + "/params"); + config.EnableUseGpu(100, 0); + + services::PredictorPool pred_pool(config, 4); + auto pred = pred_pool.Retrive(2); + + std::vector in_shape = {1, 3, 318, 318}; + int in_num = std::accumulate(in_shape.begin(), in_shape.end(), 1, + [](int &a, int &b) { return a * b; }); + std::vector input(in_num, 0); + + auto in_names = pred->GetInputNames(); + auto input_t = pred->GetInputHandle(in_names[0]); + input_t->name(); + input_t->Reshape(in_shape); + input_t->CopyFromCpu(input.data()); + pred->Run(); + auto out_names = pred->GetOutputNames(); + auto output_t = pred->GetOutputHandle(out_names[0]); + auto out_type = output_t->type(); + LOG(INFO) << GetNumBytesOfDataType(out_type); + if (out_type == DataType::FLOAT32) { + PlaceType place; + int size; + output_t->data(&place, &size); + } +} + +} // namespace paddle_infer diff --git a/paddle/fluid/inference/tests/api/trt_mobilenet_test.cc b/paddle/fluid/inference/tests/api/trt_mobilenet_test.cc index 8ffa3efdf05..c7c7356b6e8 100644 --- a/paddle/fluid/inference/tests/api/trt_mobilenet_test.cc +++ b/paddle/fluid/inference/tests/api/trt_mobilenet_test.cc @@ -41,7 +41,7 @@ TEST(AnalysisPredictor, use_gpu) { SetFakeImageInput(&inputs_all, model_dir, false, "__model__", ""); std::vector outputs; - for (auto& input : inputs_all) { + for (auto &input : inputs_all) { ASSERT_TRUE(predictor->Run(input, &outputs)); predictor->ClearIntermediateTensor(); } @@ -49,3 +49,27 @@ TEST(AnalysisPredictor, use_gpu) { } // namespace inference } // namespace paddle + +namespace paddle_infer { +TEST(PredictorPool, use_gpu) { + std::string model_dir = FLAGS_infer_model + "/" + "mobilenet"; + Config config; + config.EnableUseGpu(100, 0); + config.SetModel(model_dir); + config.EnableTensorRtEngine(); + services::PredictorPool pred_pool(config, 1); + + auto predictor = pred_pool.Retrive(0); + auto input_names = predictor->GetInputNames(); + auto input_t = predictor->GetInputHandle(input_names[0]); + std::vector in_shape = {1, 3, 224, 224}; + int in_num = std::accumulate(in_shape.begin(), in_shape.end(), 1, + [](int &a, int &b) { return a * b; }); + + std::vector input(in_num, 0); + input_t->Reshape(in_shape); + input_t->CopyFromCpu(input.data()); + predictor->Run(); +} + +} // namespace paddle_infer diff --git a/paddle/fluid/pybind/inference_api.cc b/paddle/fluid/pybind/inference_api.cc index cf0dac022f7..040dd313f1c 100644 --- a/paddle/fluid/pybind/inference_api.cc +++ b/paddle/fluid/pybind/inference_api.cc @@ -206,9 +206,9 @@ void BindInferenceApi(py::module *m) { BindMkldnnQuantizerConfig(m); #endif m->def("create_paddle_predictor", - &paddle::CreatePaddlePredictor); + &paddle::CreatePaddlePredictor, py::arg("config")); m->def("create_paddle_predictor", - &paddle::CreatePaddlePredictor); + &paddle::CreatePaddlePredictor, py::arg("config")); m->def("paddle_dtype_size", &paddle::PaddleDtypeSize); m->def("paddle_tensor_to_bytes", &SerializePDTensorToBytes); } -- GitLab