未验证 提交 ae79a56b 编写于 作者: Z zlsh80826 提交者: GitHub

[Paddle-TRT] TensorRT8 void** compatibility (#33662)

* add trt LT version helper

* trt8 requires void** to be void* const*
上级 1def9e05
...@@ -166,7 +166,11 @@ int AnchorGeneratorPlugin::enqueue_impl(int batch_size, ...@@ -166,7 +166,11 @@ int AnchorGeneratorPlugin::enqueue_impl(int batch_size,
} }
int AnchorGeneratorPlugin::enqueue(int batch_size, const void* const* inputs, int AnchorGeneratorPlugin::enqueue(int batch_size, const void* const* inputs,
#if IS_TRT_VERSION_LT(8000)
void** outputs, void* workspace, void** outputs, void* workspace,
#else
void* const* outputs, void* workspace,
#endif
cudaStream_t stream) { cudaStream_t stream) {
return enqueue_impl<float>(batch_size, inputs, outputs, workspace, stream); return enqueue_impl<float>(batch_size, inputs, outputs, workspace, stream);
} }
......
...@@ -42,7 +42,11 @@ class AnchorGeneratorPlugin : public nvinfer1::IPluginV2Ext { ...@@ -42,7 +42,11 @@ class AnchorGeneratorPlugin : public nvinfer1::IPluginV2Ext {
bool supportsFormat(nvinfer1::DataType type, bool supportsFormat(nvinfer1::DataType type,
nvinfer1::TensorFormat format) const override; nvinfer1::TensorFormat format) const override;
size_t getWorkspaceSize(int max_batch_size) const override; size_t getWorkspaceSize(int max_batch_size) const override;
#if IS_TRT_VERSION_LT(8000)
int enqueue(int batch_size, const void* const* inputs, void** outputs, int enqueue(int batch_size, const void* const* inputs, void** outputs,
#else
int enqueue(int batch_size, const void* const* inputs, void* const* outputs,
#endif
void* workspace, cudaStream_t stream) override; void* workspace, cudaStream_t stream) override;
int initialize() override; int initialize() override;
void terminate() override; void terminate() override;
......
...@@ -122,7 +122,11 @@ int ElementWisePlugin::initialize() { ...@@ -122,7 +122,11 @@ int ElementWisePlugin::initialize() {
} }
int ElementWisePlugin::enqueue(int batch_size, const void *const *inputs, int ElementWisePlugin::enqueue(int batch_size, const void *const *inputs,
#if IS_TRT_VERSION_LT(8000)
void **outputs, void *workspace, void **outputs, void *workspace,
#else
void *const *outputs, void *workspace,
#endif
cudaStream_t stream) { cudaStream_t stream) {
const float *x = reinterpret_cast<const float *>(inputs[0]); const float *x = reinterpret_cast<const float *>(inputs[0]);
const float *y = reinterpret_cast<const float *>(inputs[1]); const float *y = reinterpret_cast<const float *>(inputs[1]);
......
...@@ -58,8 +58,11 @@ class ElementWisePlugin : public PluginTensorRT { ...@@ -58,8 +58,11 @@ class ElementWisePlugin : public PluginTensorRT {
int initialize() override; int initialize() override;
// execute the layer #if IS_TRT_VERSION_LT(8000)
int enqueue(int batch_size, const void* const* inputs, void** outputs, int enqueue(int batch_size, const void* const* inputs, void** outputs,
#else
int enqueue(int batch_size, const void* const* inputs, void* const* outputs,
#endif
void* workspace, cudaStream_t stream); void* workspace, cudaStream_t stream);
protected: protected:
......
...@@ -100,7 +100,11 @@ __global__ void no_exact_gelu_kernel(const T a, const T b, const T c, int n, ...@@ -100,7 +100,11 @@ __global__ void no_exact_gelu_kernel(const T a, const T b, const T c, int n,
} }
int GeluPlugin::enqueue(int batch_size, const void* const* inputs, int GeluPlugin::enqueue(int batch_size, const void* const* inputs,
#if IS_TRT_VERSION_LT(8000)
void** outputs, void*, cudaStream_t stream) { void** outputs, void*, cudaStream_t stream) {
#else
void* const* outputs, void*, cudaStream_t stream) {
#endif
const auto& input_dims = this->getInputDims(0); const auto& input_dims = this->getInputDims(0);
int num = batch_size; int num = batch_size;
for (int i = 0; i < input_dims.nbDims; i++) { for (int i = 0; i < input_dims.nbDims; i++) {
......
...@@ -44,7 +44,11 @@ class GeluPlugin : public PluginTensorRT { ...@@ -44,7 +44,11 @@ class GeluPlugin : public PluginTensorRT {
nvinfer1::PluginFormat format) const override; nvinfer1::PluginFormat format) const override;
nvinfer1::Dims getOutputDimensions(int index, const nvinfer1::Dims* inputs, nvinfer1::Dims getOutputDimensions(int index, const nvinfer1::Dims* inputs,
int nb_input_dims) override; int nb_input_dims) override;
#if IS_TRT_VERSION_LT(8000)
int enqueue(int batch_size, const void* const* inputs, void** outputs, int enqueue(int batch_size, const void* const* inputs, void** outputs,
#else
int enqueue(int batch_size, const void* const* inputs, void* const* outputs,
#endif
void* workspace, cudaStream_t stream) override; void* workspace, cudaStream_t stream) override;
protected: protected:
......
...@@ -59,7 +59,11 @@ __global__ void hard_swish_kernel(float threshold, float scale, float offset, ...@@ -59,7 +59,11 @@ __global__ void hard_swish_kernel(float threshold, float scale, float offset,
} }
int HardSwishPlugin::enqueue(int batch_size, const void* const* inputs, int HardSwishPlugin::enqueue(int batch_size, const void* const* inputs,
#if IS_TRT_VERSION_LT(8000)
void** outputs, void*, cudaStream_t stream) { void** outputs, void*, cudaStream_t stream) {
#else
void* const* outputs, void*, cudaStream_t stream) {
#endif
const auto& input_dims = this->getInputDims(0); const auto& input_dims = this->getInputDims(0);
int num = batch_size; int num = batch_size;
for (int i = 0; i < input_dims.nbDims; i++) { for (int i = 0; i < input_dims.nbDims; i++) {
......
...@@ -49,7 +49,11 @@ class HardSwishPlugin : public PluginTensorRT { ...@@ -49,7 +49,11 @@ class HardSwishPlugin : public PluginTensorRT {
int initialize() override { return 0; } int initialize() override { return 0; }
nvinfer1::Dims getOutputDimensions(int index, const nvinfer1::Dims* inputs, nvinfer1::Dims getOutputDimensions(int index, const nvinfer1::Dims* inputs,
int nbInputDims) override; int nbInputDims) override;
#if IS_TRT_VERSION_LT(8000)
int enqueue(int batchSize, const void* const* inputs, void** outputs, int enqueue(int batchSize, const void* const* inputs, void** outputs,
#else
int enqueue(int batchSize, const void* const* inputs, void* const* outputs,
#endif
void* workspace, cudaStream_t stream) override; void* workspace, cudaStream_t stream) override;
protected: protected:
......
...@@ -59,7 +59,11 @@ nvinfer1::Dims InstanceNormPlugin::getOutputDimensions( ...@@ -59,7 +59,11 @@ nvinfer1::Dims InstanceNormPlugin::getOutputDimensions(
} }
int InstanceNormPlugin::enqueue(int batch_size, const void *const *inputs, int InstanceNormPlugin::enqueue(int batch_size, const void *const *inputs,
#if IS_TRT_VERSION_LT(8000)
void **outputs, void *workspace, void **outputs, void *workspace,
#else
void *const *outputs, void *workspace,
#endif
cudaStream_t stream) { cudaStream_t stream) {
const auto &input_dims = this->getInputDims(0); const auto &input_dims = this->getInputDims(0);
......
...@@ -101,7 +101,11 @@ class InstanceNormPlugin : public PluginTensorRT { ...@@ -101,7 +101,11 @@ class InstanceNormPlugin : public PluginTensorRT {
int getNbOutputs() const override { return 1; } int getNbOutputs() const override { return 1; }
nvinfer1::Dims getOutputDimensions(int index, const nvinfer1::Dims *inputs, nvinfer1::Dims getOutputDimensions(int index, const nvinfer1::Dims *inputs,
int nbInputDims) override; int nbInputDims) override;
#if IS_TRT_VERSION_LT(8000)
int enqueue(int batchSize, const void *const *inputs, void **outputs, int enqueue(int batchSize, const void *const *inputs, void **outputs,
#else
int enqueue(int batchSize, const void *const *inputs, void *const *outputs,
#endif
void *workspace, cudaStream_t stream) override; void *workspace, cudaStream_t stream) override;
bool supportsFormat(nvinfer1::DataType type, bool supportsFormat(nvinfer1::DataType type,
......
...@@ -43,7 +43,11 @@ nvinfer1::Dims LayerNormPlugin::getOutputDimensions( ...@@ -43,7 +43,11 @@ nvinfer1::Dims LayerNormPlugin::getOutputDimensions(
} }
int LayerNormPlugin::enqueue(int batch_size, const void *const *inputs, int LayerNormPlugin::enqueue(int batch_size, const void *const *inputs,
#if IS_TRT_VERSION_LT(8000)
void **outputs, void *workspace, void **outputs, void *workspace,
#else
void *const *outputs, void *workspace,
#endif
cudaStream_t stream) { cudaStream_t stream) {
const auto &input_dims = this->getInputDims(0); const auto &input_dims = this->getInputDims(0);
const float *input = reinterpret_cast<const float *>(inputs[0]); const float *input = reinterpret_cast<const float *>(inputs[0]);
......
...@@ -100,7 +100,11 @@ class LayerNormPlugin : public PluginTensorRT { ...@@ -100,7 +100,11 @@ class LayerNormPlugin : public PluginTensorRT {
int getNbOutputs() const override { return 1; } int getNbOutputs() const override { return 1; }
nvinfer1::Dims getOutputDimensions(int index, const nvinfer1::Dims* inputs, nvinfer1::Dims getOutputDimensions(int index, const nvinfer1::Dims* inputs,
int nbInputDims) override; int nbInputDims) override;
#if IS_TRT_VERSION_LT(8000)
int enqueue(int batchSize, const void* const* inputs, void** outputs, int enqueue(int batchSize, const void* const* inputs, void** outputs,
#else
int enqueue(int batchSize, const void* const* inputs, void* const* outputs,
#endif
void* workspace, cudaStream_t stream) override; void* workspace, cudaStream_t stream) override;
}; };
......
...@@ -42,7 +42,12 @@ nvinfer1::Dims PoolPlugin::getOutputDimensions(int index, ...@@ -42,7 +42,12 @@ nvinfer1::Dims PoolPlugin::getOutputDimensions(int index,
} }
int PoolPlugin::enqueue(int batchSize, const void *const *inputs, int PoolPlugin::enqueue(int batchSize, const void *const *inputs,
#if IS_TRT_VERSION_LT(8000)
void **outputs, void *workspace, cudaStream_t stream) { void **outputs, void *workspace, cudaStream_t stream) {
#else
void *const *outputs, void *workspace,
cudaStream_t stream) {
#endif
auto const &input_dims = this->getInputDims(0); auto const &input_dims = this->getInputDims(0);
int input_size = 0; int input_size = 0;
float const *idata = reinterpret_cast<float const *>(inputs[0]); float const *idata = reinterpret_cast<float const *>(inputs[0]);
......
...@@ -128,7 +128,11 @@ class PoolPlugin : public PluginTensorRT { ...@@ -128,7 +128,11 @@ class PoolPlugin : public PluginTensorRT {
nvinfer1::Dims getOutputDimensions(int index, const nvinfer1::Dims* inputs, nvinfer1::Dims getOutputDimensions(int index, const nvinfer1::Dims* inputs,
int nbInputDims) override; int nbInputDims) override;
int initialize() override { return 0; } int initialize() override { return 0; }
#if IS_TRT_VERSION_LT(8000)
int enqueue(int batchSize, const void* const* inputs, void** outputs, int enqueue(int batchSize, const void* const* inputs, void** outputs,
#else
int enqueue(int batchSize, const void* const* inputs, void* const* outputs,
#endif
void* workspace, cudaStream_t stream) override; void* workspace, cudaStream_t stream) override;
private: private:
......
...@@ -57,7 +57,12 @@ nvinfer1::Dims PReluPlugin::getOutputDimensions(int index, ...@@ -57,7 +57,12 @@ nvinfer1::Dims PReluPlugin::getOutputDimensions(int index,
} }
int PReluPlugin::enqueue(int batch_size, const void *const *inputs, int PReluPlugin::enqueue(int batch_size, const void *const *inputs,
#if IS_TRT_VERSION_LT(8000)
void **outputs, void *workspace, cudaStream_t stream) { void **outputs, void *workspace, cudaStream_t stream) {
#else
void *const *outputs, void *workspace,
cudaStream_t stream) {
#endif
// input dims is CHW. // input dims is CHW.
const auto &input_dims = this->getInputDims(0); const auto &input_dims = this->getInputDims(0);
const float *input = reinterpret_cast<const float *>(inputs[0]); const float *input = reinterpret_cast<const float *>(inputs[0]);
......
...@@ -80,7 +80,11 @@ class PReluPlugin : public PluginTensorRT { ...@@ -80,7 +80,11 @@ class PReluPlugin : public PluginTensorRT {
int getNbOutputs() const override { return 1; } int getNbOutputs() const override { return 1; }
nvinfer1::Dims getOutputDimensions(int index, const nvinfer1::Dims* inputs, nvinfer1::Dims getOutputDimensions(int index, const nvinfer1::Dims* inputs,
int nbInputDims) override; int nbInputDims) override;
#if IS_TRT_VERSION_LT(8000)
int enqueue(int batchSize, const void* const* inputs, void** outputs, int enqueue(int batchSize, const void* const* inputs, void** outputs,
#else
int enqueue(int batchSize, const void* const* inputs, void* const* outputs,
#endif
void* workspace, cudaStream_t stream) override; void* workspace, cudaStream_t stream) override;
}; };
......
...@@ -111,7 +111,12 @@ nvinfer1::Dims SlicePlugin::getOutputDimensions(int index, ...@@ -111,7 +111,12 @@ nvinfer1::Dims SlicePlugin::getOutputDimensions(int index,
} }
int SlicePlugin::enqueue(int batch_size, const void *const *inputs, int SlicePlugin::enqueue(int batch_size, const void *const *inputs,
#if IS_TRT_VERSION_LT(8000)
void **outputs, void *workspace, cudaStream_t stream) { void **outputs, void *workspace, cudaStream_t stream) {
#else
void *const *outputs, void *workspace,
cudaStream_t stream) {
#endif
auto input_dims = getInputDims(0); auto input_dims = getInputDims(0);
// notice input dims is [C, H, W], add input batch dim here // notice input dims is [C, H, W], add input batch dim here
......
...@@ -44,7 +44,11 @@ class SlicePlugin : public PluginTensorRT { ...@@ -44,7 +44,11 @@ class SlicePlugin : public PluginTensorRT {
nvinfer1::PluginFormat format) const override; nvinfer1::PluginFormat format) const override;
nvinfer1::Dims getOutputDimensions(int index, const nvinfer1::Dims* inputs, nvinfer1::Dims getOutputDimensions(int index, const nvinfer1::Dims* inputs,
int nb_input_dims) override; int nb_input_dims) override;
#if IS_TRT_VERSION_LT(8000)
int enqueue(int batch_size, const void* const* inputs, void** outputs, int enqueue(int batch_size, const void* const* inputs, void** outputs,
#else
int enqueue(int batch_size, const void* const* inputs, void* const* outputs,
#endif
void* workspace, cudaStream_t stream) override; void* workspace, cudaStream_t stream) override;
protected: protected:
......
...@@ -126,7 +126,12 @@ __global__ void split_kernel(int nsegment, ...@@ -126,7 +126,12 @@ __global__ void split_kernel(int nsegment,
} }
int SplitPlugin::enqueue(int batchSize, const void* const* inputs, int SplitPlugin::enqueue(int batchSize, const void* const* inputs,
#if IS_TRT_VERSION_LT(8000)
void** outputs, void* workspace, cudaStream_t stream) { void** outputs, void* workspace, cudaStream_t stream) {
#else
void* const* outputs, void* workspace,
cudaStream_t stream) {
#endif
const int* d_segment_offsets_ptr = const int* d_segment_offsets_ptr =
thrust::raw_pointer_cast(&d_segment_offsets_[0]); thrust::raw_pointer_cast(&d_segment_offsets_[0]);
float const* input_ptr = reinterpret_cast<float const*>(inputs[0]); float const* input_ptr = reinterpret_cast<float const*>(inputs[0]);
......
...@@ -60,7 +60,11 @@ class SplitPlugin : public PluginTensorRTV2Ext { ...@@ -60,7 +60,11 @@ class SplitPlugin : public PluginTensorRTV2Ext {
int initialize() override; int initialize() override;
void terminate() override; void terminate() override;
#if IS_TRT_VERSION_LT(8000)
int enqueue(int batch_size, const void* const* inputs, void** outputs, int enqueue(int batch_size, const void* const* inputs, void** outputs,
#else
int enqueue(int batch_size, const void* const* inputs, void* const* outputs,
#endif
void* workspace, cudaStream_t stream) override; void* workspace, cudaStream_t stream) override;
void destroy() override { delete this; } void destroy() override { delete this; }
......
...@@ -85,7 +85,12 @@ __global__ void swish_kernel<half>(int num, const half *input, half *output, ...@@ -85,7 +85,12 @@ __global__ void swish_kernel<half>(int num, const half *input, half *output,
} }
int SwishPlugin::enqueue(int batch_size, const void *const *inputs, int SwishPlugin::enqueue(int batch_size, const void *const *inputs,
#if IS_TRT_VERSION_LT(8000)
void **outputs, void *workspace, cudaStream_t stream) { void **outputs, void *workspace, cudaStream_t stream) {
#else
void *const *outputs, void *workspace,
cudaStream_t stream) {
#endif
// input dims is CHW. // input dims is CHW.
const auto &input_dims = this->getInputDims(0); const auto &input_dims = this->getInputDims(0);
const float *input = reinterpret_cast<const float *>(inputs[0]); const float *input = reinterpret_cast<const float *>(inputs[0]);
......
...@@ -67,7 +67,11 @@ class SwishPlugin : public PluginTensorRT { ...@@ -67,7 +67,11 @@ class SwishPlugin : public PluginTensorRT {
int getNbOutputs() const override { return 1; } int getNbOutputs() const override { return 1; }
nvinfer1::Dims getOutputDimensions(int index, const nvinfer1::Dims* inputs, nvinfer1::Dims getOutputDimensions(int index, const nvinfer1::Dims* inputs,
int nbInputDims) override; int nbInputDims) override;
#if IS_TRT_VERSION_LT(8000)
int enqueue(int batchSize, const void* const* inputs, void** outputs, int enqueue(int batchSize, const void* const* inputs, void** outputs,
#else
int enqueue(int batchSize, const void* const* inputs, void* const* outputs,
#endif
void* workspace, cudaStream_t stream) override; void* workspace, cudaStream_t stream) override;
}; };
......
...@@ -82,8 +82,13 @@ class PluginTensorRT : public nvinfer1::IPluginExt { ...@@ -82,8 +82,13 @@ class PluginTensorRT : public nvinfer1::IPluginExt {
int initialize() override { return 0; } int initialize() override { return 0; }
// Shutdown the layer. This is called when the engine is destroyed // Shutdown the layer. This is called when the engine is destroyed
void terminate() override {} void terminate() override {}
// Execute the layer // Execute the layer
#if IS_TRT_VERSION_LT(8000)
virtual int enqueue(int batch_size, const void* const* inputs, void** outputs, virtual int enqueue(int batch_size, const void* const* inputs, void** outputs,
#else
virtual int enqueue(int batch_size, const void* const* inputs,
void* const* outputs,
#endif
void* workspace, cudaStream_t stream) = 0; void* workspace, cudaStream_t stream) = 0;
// Find the size of the serialization buffer required // Find the size of the serialization buffer required
...@@ -188,8 +193,13 @@ class PluginTensorRTV2Ext : public nvinfer1::IPluginV2Ext { ...@@ -188,8 +193,13 @@ class PluginTensorRTV2Ext : public nvinfer1::IPluginV2Ext {
// Find the workspace size required by the layer // Find the workspace size required by the layer
size_t getWorkspaceSize(int) const override { return 0; } size_t getWorkspaceSize(int) const override { return 0; }
// Execute the layer // Execute the layer
#if IS_TRT_VERSION_LT(8000)
virtual int enqueue(int batch_size, const void* const* inputs, void** outputs, virtual int enqueue(int batch_size, const void* const* inputs, void** outputs,
#else
virtual int enqueue(int batch_size, const void* const* inputs,
void* const* outputs,
#endif
void* workspace, cudaStream_t stream) = 0; void* workspace, cudaStream_t stream) = 0;
// Find the size of the serialization buffer required // Find the size of the serialization buffer required
......
...@@ -243,7 +243,11 @@ int YoloBoxPlugin::enqueue_impl(int batch_size, const void* const* inputs, ...@@ -243,7 +243,11 @@ int YoloBoxPlugin::enqueue_impl(int batch_size, const void* const* inputs,
} }
int YoloBoxPlugin::enqueue(int batch_size, const void* const* inputs, int YoloBoxPlugin::enqueue(int batch_size, const void* const* inputs,
#if IS_TRT_VERSION_LT(8000)
void** outputs, void* workspace, void** outputs, void* workspace,
#else
void* const* outputs, void* workspace,
#endif
cudaStream_t stream) { cudaStream_t stream) {
if (data_type_ == nvinfer1::DataType::kFLOAT) { if (data_type_ == nvinfer1::DataType::kFLOAT) {
return enqueue_impl<float>(batch_size, inputs, outputs, workspace, stream); return enqueue_impl<float>(batch_size, inputs, outputs, workspace, stream);
......
...@@ -43,7 +43,11 @@ class YoloBoxPlugin : public nvinfer1::IPluginV2Ext { ...@@ -43,7 +43,11 @@ class YoloBoxPlugin : public nvinfer1::IPluginV2Ext {
bool supportsFormat(nvinfer1::DataType type, bool supportsFormat(nvinfer1::DataType type,
nvinfer1::TensorFormat format) const override; nvinfer1::TensorFormat format) const override;
size_t getWorkspaceSize(int max_batch_size) const override; size_t getWorkspaceSize(int max_batch_size) const override;
#if IS_TRT_VERSION_LT(8000)
int enqueue(int batch_size, const void* const* inputs, void** outputs, int enqueue(int batch_size, const void* const* inputs, void** outputs,
#else
int enqueue(int batch_size, const void* const* inputs, void* const* outputs,
#endif
void* workspace, cudaStream_t stream) override; void* workspace, cudaStream_t stream) override;
template <typename T> template <typename T>
int enqueue_impl(int batch_size, const void* const* inputs, void** outputs, int enqueue_impl(int batch_size, const void* const* inputs, void** outputs,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册