trt_engine.h 4.4 KB
Newer Older
W
Wilber 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
// Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <NvInfer.h>
#include <NvInferRuntime.h>
#include "paddle/infrt/backends/tensorrt/trt_options.h"
#include "paddle/infrt/backends/tensorrt/trt_utils.h"
#include "paddle/phi/backends/dynload/tensorrt.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/common/place.h"
#include "paddle/phi/core/dense_tensor.h"

namespace infrt {
namespace backends {
namespace tensorrt {
using namespace nvinfer1;  // NOLINT

// The trt programing model as follows:
// 1. The build phase:
// IBuilder* builder = createInferBuilder(&logger_);
// 2. Create a network definition:
// INetworkDefinition* network = builder->createNetworkV2(...);
// 3. Build network:
// network->AddLayer(...)
// 4. Configure network:
// IBuilderConfig* config = builder->createBuilderConfig();
// config->setMaxWorkspaceSize(...)
// 5. Get cuda engine and deserializing a plan:
// IHostMemory* serialized_model = builder->buildSerializedNetwork(...);
// IRuntime* runtime = createInferRuntime(&logger_);
// ICudaEngine* engine = runtime->deserializeCudaEngine(...);
// 6. Get execution context:
// IExecutionContext* exec_context = engine->createExecutionContext();
// 7. Set input data:
// int32_t input_index = engine->getBindingIndex("input");
// int32_t output_index = engine->getBindingIndex("output");
// void* buffers[2];
// buffers[input_index] = input_buffer;
// buffers[output_index] = output_buffer;
// 8. Performance inference:
// exec_context->enqueueV2(buffers, stream, nullptr);
//
// We have encapsulated this logic, please use the following programming model.
//
W
Wilber 已提交
59
// TrtEngine trt_engine;
W
Wilber 已提交
60 61 62
// trt_engine.Build(...);
// trt_engine.SetUpInference(...);
// trt_engine.Run(...);
W
Wilber 已提交
63
class TrtEngine {
W
Wilber 已提交
64
 public:
W
Wilber 已提交
65 66 67 68 69 70
  explicit TrtEngine(int device_id = 0);

  TrtEngine(const TrtEngine&) = delete;
  TrtEngine& operator=(const TrtEngine&) = delete;
  TrtEngine(TrtEngine&&) = default;
  TrtEngine& operator=(TrtEngine&&) = default;
W
Wilber 已提交
71 72 73 74 75 76 77 78

  nvinfer1::IBuilder* GetTrtBuilder();

  // TODO(wilber): Modify signature after infrt-trt ready.
  void Build(TrtUniquePtr<nvinfer1::INetworkDefinition> network,
             const BuildOptions& build_options);

  // TODO(wilber): Modify signature after infrt-trt ready.
W
Wilber 已提交
79
  void Run(const ::phi::GPUContext& ctx);
W
Wilber 已提交
80 81 82 83

  // TODO(wilber): How to support multiple execution contexts?
  bool SetUpInference(
      const InferenceOptions& inference,
W
Wilber 已提交
84
      const std::unordered_map<std::string, ::phi::DenseTensor*>& inputs);
W
Wilber 已提交
85 86 87

  void GetEngineInfo();

W
Wilber 已提交
88 89 90
  void PrepareOutputHandle(const std::string& out_name);

  // TODO(wilber): The output tensor names are: output_0, output_1, ...
W
Wilber 已提交
91
  ::phi::DenseTensor* GetOutput(const std::string&);
W
Wilber 已提交
92 93 94

  size_t GetOutputNum() const;

W
Wilber 已提交
95 96 97 98 99 100 101 102 103 104 105 106
 private:
  void FreshDeviceId();

  bool SetupNetworkAndConfig(const BuildOptions& build,
                             INetworkDefinition& network,  // NOLINT
                             IBuilderConfig& config);      // NOLINT

  bool NetworkToEngine(const BuildOptions& build);

  bool ModelToBuildEnv(TrtUniquePtr<nvinfer1::INetworkDefinition> network,
                       const BuildOptions& build);

W
Wilber 已提交
107
  void StaticRun(const ::phi::GPUContext& ctx);
W
Wilber 已提交
108

W
Wilber 已提交
109
  void DynamicRun(const ::phi::GPUContext& ctx);
W
Wilber 已提交
110 111 112 113 114 115 116 117 118 119 120

 private:
  std::unique_ptr<TrtLogger> logger_{nullptr};
  TrtUniquePtr<nvinfer1::IBuilder> builder_{nullptr};
  TrtUniquePtr<INetworkDefinition> network_{nullptr};
  std::unique_ptr<IHostMemory> serialized_engine_{nullptr};
  TrtUniquePtr<nvinfer1::ICudaEngine> engine_{nullptr};
  std::vector<TrtUniquePtr<nvinfer1::IExecutionContext>> contexts_;
  std::vector<std::unique_ptr<Bindings>> bindings_;
  int device_id_{0};
  bool is_dynamic_shape_{false};
W
Wilber 已提交
121
  std::unordered_map<std::string, ::phi::DenseTensor> outputs_;
W
Wilber 已提交
122 123 124 125 126
};

}  // namespace tensorrt
}  // namespace backends
}  // namespace infrt