// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #pragma once #include #include #include #include #include #include "paddle/fluid/framework/naive_executor.h" #include "paddle/fluid/framework/op_compatible_info.h" #include "paddle/fluid/inference/analysis/analyzer.h" #include "paddle/fluid/inference/api/api_impl.h" #include "paddle/fluid/inference/api/details/reset_tensor_array.h" #include "paddle/fluid/inference/api/helper.h" #include "paddle/fluid/inference/api/paddle_inference_api.h" #include "paddle/fluid/platform/device/gpu/gpu_types.h" #include "paddle/fluid/string/printf.h" #include "onnxruntime_c_api.h" // NOLINT #include "onnxruntime_cxx_api.h" // NOLINT #include "paddle2onnx/converter.h" #ifdef PADDLE_WITH_TESTING #include #include #endif /// /// \file onnxruntime_predictor.h /// /// \brief A predictor using ONNXRuntime /// /// \author heliqi@baidu.com /// \date 2022-02-14 /// \since 2.3.0 /// namespace paddle { bool CheckConvertToONNX(const AnalysisConfig &config); struct ONNXDesc { std::string name; std::vector shape; ONNXTensorElementDataType dtype; }; /// /// \class ONNXRuntimePredictor /// /// \brief The ONNXRuntimePredictor using ONNXRuntime for inference /// /// The predictor has the following typical uses: /// /// Get predictor /// \code{cpp} /// auto predictor = CreatePaddlePredictor(config); /// \endcode /// /// Get input or output names /// \code{cpp} /// auto input_names = predictor->GetInputNames(); /// auto output_names = predictor->GetOutputNames(); /// \endcode /// /// Get input or output tensors /// \code{cpp} /// auto input_t = predictor->GetInputTensor(input_names[0]); /// auto output_t = predictor->GetOutputTensor(output_names[0]); /// \endcode /// /// Run predictor /// \code{cpp} /// predictor->ZeroCopyRun(); /// \endcode /// class ONNXRuntimePredictor : public PaddlePredictor { public: /// /// \brief Construct a new ONNXRuntime Predictor object /// /// \param[in] AnalysisConfig config /// explicit ONNXRuntimePredictor(const AnalysisConfig &config) : config_(config) { predictor_id_ = inference::GetUniqueId(); env_ = Ort::Env(ORT_LOGGING_LEVEL_INFO, "onnx"); } /// /// \brief Destroy the ONNXRuntime Predictor object /// ~ONNXRuntimePredictor(); /// /// \brief Initialize predictor /// /// \return Whether the init function executed successfully /// bool Init(); /// /// \brief Get the input names /// /// \return input names /// std::vector GetInputNames(); /// /// \brief Get the output names /// /// \return output names /// std::vector GetOutputNames(); /// /// \brief Get the Input Tensor object /// /// \param[in] name input name /// \return input tensor /// std::unique_ptr GetInputTensor( const std::string &name) override; /// /// \brief Get the Output Tensor object /// /// \param[in] name otuput name /// \return output tensor /// std::unique_ptr GetOutputTensor( const std::string &name) override; /// /// \brief Get all input names and their corresponding shapes /// /// \return the map of input names and shapes /// std::map> GetInputTensorShape() override; /// Not supoort bool Run(const std::vector &inputs, std::vector *output_data, int batch_size = -1) override; /// /// \brief Run the prediction engine /// /// \return Whether the function executed successfully /// bool ZeroCopyRun() override; /// /// \brief Release all tmp tensor to compress the size of the memory pool. /// The memory pool is considered to be composed of a list of chunks, if /// the chunk is not occupied, it can be released. /// /// \return Number of bytes released. It may be smaller than the actual /// released memory, because part of the memory is not managed by the /// MemoryPool. /// uint64_t TryShrinkMemory() override; /// /// \brief Clone to get the new predictor. thread safe. /// /// \return get a new predictor /// std::unique_ptr Clone() override; std::shared_ptr scope_; private: /// /// \brief get the Ort Value(input Tensor). /// /// \param[in] desc ONNXDesce(name、shape、dtype) /// /// \param[in] device_name "cpu" or "gpu" of device /// /// \return get a Ort::Value /// Ort::Value GetOrtValue(const ONNXDesc &desc, const char *device_name); /// /// \brief Ort::Value to Paddle::ZeroCopyTensor. /// /// \param[in] value Ort::Value(output Tensor) /// /// \param[in] desc a ONNXDesce(name、shape、dtype) /// /// \return get a Ort::Value /// void AsTensor(const Ort::Value &value, const ONNXDesc &desc); private: AnalysisConfig config_; // ONNXRuntime Ort::Env env_; Ort::Session session_{nullptr}; platform::Place place_; framework::Scope *sub_scope_{nullptr}; std::vector input_desc_; std::vector output_desc_; int predictor_id_; // Some more detailed tests, they are made the friends of the predictor, so that // the all the details can be tested. #if PADDLE_WITH_TESTING FRIEND_TEST(ONNXRuntimePredictor, onnxruntime_on); #endif }; } // namespace paddle