未验证 提交 e3a38d79 编写于 作者: Z zlsh80826 提交者: GitHub

[Paddle-TRT] roi_align_plugin (#31732)

* add roi_align_plugin

* add roi align unit_test

* add roi align serialization

* remove roi align static plugin because of batch dim issue

* refine roi align unittest and add fp16/serialization

* add trt roi align condition to op_teller

* refine error message

* remove unnecessary reshape layer
上级 bfb5cf55
......@@ -1192,6 +1192,7 @@ USE_TRT_CONVERTER(scale);
USE_TRT_CONVERTER(stack);
USE_TRT_CONVERTER(clip);
USE_TRT_CONVERTER(gather);
USE_TRT_CONVERTER(roi_align);
USE_TRT_CONVERTER(affine_channel);
USE_TRT_CONVERTER(multiclass_nms);
USE_TRT_CONVERTER(nearest_interp);
......
......@@ -6,6 +6,7 @@ nv_library(tensorrt_converter
shuffle_channel_op.cc swish_op.cc instance_norm_op.cc stack_op.cc transpose_op.cc flatten_op.cc
emb_eltwise_layernorm.cc skip_layernorm.cc scale_op.cc slice_op.cc hard_sigmoid_op.cc hard_swish_op.cc clip_op.cc
gather_op.cc
roi_align_op.cc
affine_channel_op.cc
multiclass_nms_op.cc
nearest_interp_op.cc
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
#include "paddle/fluid/inference/tensorrt/plugin/roi_align_op_plugin.h"
namespace paddle {
namespace framework {
class Scope;
namespace proto {
class OpDesc;
} // namespace proto
} // namespace framework
} // namespace paddle
namespace paddle {
namespace inference {
namespace tensorrt {
/*
* Roi Align Op
*/
class RoiAlignOpConverter : public OpConverter {
public:
void operator()(const framework::proto::OpDesc& op,
const framework::Scope& scope, bool test_mode) override {
VLOG(3) << "convert a fluid roi align op to tensorrt plugin";
framework::OpDesc op_desc(op, nullptr);
std::string input_name = op_desc.Input("X").front();
std::string rois_name = op_desc.Input("ROIs").front();
std::string output_name = op_desc.Output("Out").front();
const auto pooled_height =
BOOST_GET_CONST(int, op_desc.GetAttr("pooled_height"));
const auto pooled_width =
BOOST_GET_CONST(int, op_desc.GetAttr("pooled_width"));
const auto spatial_scale =
BOOST_GET_CONST(float, op_desc.GetAttr("spatial_scale"));
const auto sampling_ratio =
BOOST_GET_CONST(int, op_desc.GetAttr("sampling_ratio"));
const auto input_tensor = engine_->GetITensor(input_name);
const auto rois_tensor = engine_->GetITensor(rois_name);
const nvinfer1::DataType data_type_ = engine_->WithFp16()
? nvinfer1::DataType::kHALF
: nvinfer1::DataType::kFLOAT;
std::vector<nvinfer1::ITensor*> inputs{input_tensor, rois_tensor};
nvinfer1::ILayer* layer = nullptr;
PADDLE_ENFORCE_EQ(
engine_->with_dynamic_shape(), true,
platform::errors::InvalidArgument(
"TRT roi align plugin only accept the dynamic shape, because that "
"the roi_align will change the batch size."));
auto* roi_align_plugin = new plugin::RoiAlignPluginDynamic(
data_type_, pooled_height, pooled_width, spatial_scale, sampling_ratio);
auto roi_align_layer = engine_->network()->addPluginV2(
inputs.data(), inputs.size(), *roi_align_plugin);
layer = roi_align_layer;
std::vector<std::string> output_names{output_name};
RreplenishLayerAndOutput(layer, "roi_align", output_names, test_mode);
}
};
} // namespace tensorrt
} // namespace inference
} // namespace paddle
REGISTER_TRT_OP_CONVERTER(roi_align, RoiAlignOpConverter);
......@@ -111,6 +111,7 @@ struct SimpleOpTypeSetTeller : public Teller {
"flatten2",
"flatten",
"gather",
"roi_align",
"affine_channel",
"multiclass_nms",
"nearest_interp",
......@@ -263,6 +264,29 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8,
BOOST_GET_CONST(std::string, desc.GetAttr("interp_method"));
if (interp_method != "nearest") return false;
}
if (op_type == "roi_align") {
if (!with_dynamic_shape) return false;
std::vector<std::string> attrs{"pooled_height", "pooled_width",
"spatial_scale", "sampling_ratio"};
for (auto const attr : attrs) {
if (!desc.HasAttr(attr)) return false;
}
const auto pooled_height =
BOOST_GET_CONST(int, desc.GetAttr("pooled_height"));
if (pooled_height <= 0) return false;
const auto pooled_width =
BOOST_GET_CONST(int, desc.GetAttr("pooled_width"));
if (pooled_width <= 0) return false;
const auto spatial_scale =
BOOST_GET_CONST(float, desc.GetAttr("spatial_scale"));
if (spatial_scale <= 0.f) return false;
}
if ((*teller)(op_type, desc, use_no_calib_int8)) return true;
}
return false;
......
......@@ -5,6 +5,7 @@ nv_library(tensorrt_plugin
instance_norm_op_plugin.cu emb_eltwise_layernorm_plugin.cu
qkv_to_context_plugin.cu skip_layernorm_op_plugin.cu slice_op_plugin.cu
hard_swish_op_plugin.cu stack_op_plugin.cu special_slice_plugin.cu
roi_align_op_plugin.cu
DEPS enforce tensorrt_engine prelu tensor bert_encoder_functor)
nv_test(test_split_plugin SRCS test_split_plugin.cc DEPS
......
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <cuda_fp16.h>
#include <cuda_runtime.h>
#include <algorithm>
#include "paddle/fluid/inference/tensorrt/plugin/roi_align_op_plugin.h"
#include "paddle/fluid/inference/tensorrt/plugin/trt_plugin_factory.h"
namespace paddle {
namespace inference {
namespace tensorrt {
namespace plugin {
template <class T>
__inline__ __device__ T BilinearInterpolate(const T* input_data,
const int height, const int width,
T y, T x) {
if (y < -1.f || y > height || x < -1.f || x > width) return 0;
y = y <= 0.f ? 0.f : y;
x = x <= 0.f ? 0.f : x;
int y_low = static_cast<int>(y);
int x_low = static_cast<int>(x);
int y_high;
int x_high;
if (y_low >= height - 1) {
y_high = y_low = height - 1;
y = static_cast<T>(y_low);
} else {
y_high = y_low + 1;
}
if (x_low >= width - 1) {
x_high = x_low = width - 1;
x = static_cast<T>(x_low);
} else {
x_high = x_low + 1;
}
T ly = y - y_low, lx = x - x_low;
T hy = 1.f - ly, hx = 1.f - lx;
T v1 = input_data[y_low * width + x_low];
T v2 = input_data[y_low * width + x_high];
T v3 = input_data[y_high * width + x_low];
T v4 = input_data[y_high * width + x_high];
T w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx;
T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);
return val;
}
template <typename T, typename OutT, bool USE_SMEM>
__global__ void GPUROIAlignOpt(const int nthreads,
const T* __restrict__ input_data,
const T* __restrict__ input_rois,
const float spatial_scale, const int channels,
const int height, const int width,
const int pooled_height, const int pooled_width,
const int sampling_ratio, const int num_rois,
OutT* __restrict__ output_data) {
const int batch = blockIdx.x;
const int channel = blockIdx.y;
const T* offset_input_data =
input_data + (batch * channels + channel) * height * width;
extern __shared__ T s_input_data[];
if (USE_SMEM) {
for (int idx = threadIdx.x; idx < height * width; idx += blockDim.x) {
s_input_data[idx] = offset_input_data[idx];
}
__syncthreads();
}
for (int idx = threadIdx.x; idx < num_rois * pooled_height * pooled_width;
idx += blockDim.x) {
const int pw = idx % pooled_width;
const int ph = (idx / pooled_width) % pooled_height;
const int roi_idx = (idx / pooled_width / pooled_height) % num_rois;
const int n = batch * num_rois + roi_idx;
const float4 rois_offset = reinterpret_cast<const float4*>(input_rois)[n];
const T roi_xmin = rois_offset.x * spatial_scale;
const T roi_ymin = rois_offset.y * spatial_scale;
const T roi_xmax = rois_offset.z * spatial_scale;
const T roi_ymax = rois_offset.w * spatial_scale;
const T roi_width = max(roi_xmax - roi_xmin, static_cast<T>(1.f));
const T roi_height = max(roi_ymax - roi_ymin, static_cast<T>(1.f));
const T bin_size_h = roi_height / static_cast<T>(pooled_height);
const T bin_size_w = roi_width / static_cast<T>(pooled_width);
const int roi_bin_grid_h = (sampling_ratio > 0)
? sampling_ratio
: ceil(roi_height / pooled_height);
const int roi_bin_grid_w =
(sampling_ratio > 0) ? sampling_ratio : ceil(roi_width / pooled_width);
const T count = roi_bin_grid_h * roi_bin_grid_w;
T output_val = 0.f;
for (int iy = 0; iy < roi_bin_grid_h; ++iy) {
const T y = roi_ymin + ph * bin_size_h +
static_cast<T>(iy + .5f) * bin_size_h /
static_cast<T>(roi_bin_grid_h);
for (int ix = 0; ix < roi_bin_grid_w; ++ix) {
const T x = roi_xmin + pw * bin_size_w +
static_cast<T>(ix + .5f) * bin_size_w /
static_cast<T>(roi_bin_grid_w);
if (USE_SMEM) {
T val = BilinearInterpolate<T>(s_input_data, height, width, y, x);
output_val += val;
} else {
T val =
BilinearInterpolate<T>(offset_input_data, height, width, y, x);
output_val += val;
}
}
}
output_val /= count;
const int out_offset =
batch * num_rois * channels * pooled_height * pooled_width +
roi_idx * channels * pooled_height * pooled_width +
channel * pooled_height * pooled_width + ph * pooled_width + pw;
output_data[out_offset] = static_cast<OutT>(output_val);
}
}
#if IS_TRT_VERSION_GE(6000)
RoiAlignPluginDynamic::RoiAlignPluginDynamic(const nvinfer1::DataType data_type,
const int pooled_height,
const int pooled_width,
float spatial_scale,
int sampling_ratio)
: data_type_(data_type),
pooled_height_(pooled_height),
pooled_width_(pooled_width),
spatial_scale_(spatial_scale),
sampling_ratio_(sampling_ratio) {
bool data_type_is_valid = data_type_ == nvinfer1::DataType::kFLOAT ||
data_type_ == nvinfer1::DataType::kHALF;
PADDLE_ENFORCE_EQ(data_type_is_valid, true,
platform::errors::InvalidArgument(
"TRT RoiAlign plugin only accepts kFLOAT(%d) or "
"kHALF(%d) data type, but the received data type = %d",
static_cast<int>(nvinfer1::DataType::kFLOAT),
static_cast<int>(nvinfer1::DataType::kHALF),
static_cast<int>(data_type_)));
PADDLE_ENFORCE_GT(pooled_height_, 0,
platform::errors::InvalidArgument(
"TRT RoiAlign plugin only accepts pooled_height "
"greater than %d, but the received pooled_height = %d",
0, pooled_height_));
PADDLE_ENFORCE_GT(pooled_width_, 0,
platform::errors::InvalidArgument(
"TRT RoiAlign plugin only accepts pooled_width greater "
"than %d, but the received pooled_width = %d",
0, pooled_height_));
PADDLE_ENFORCE_GT(spatial_scale_, 0.f,
platform::errors::InvalidArgument(
"TRT RoiAlign plugin only accepts spatial_scale "
"greater than %f, but the received spatial_scale = %f",
0, spatial_scale_));
int smem_per_block = -1;
int device = -1;
cudaGetDevice(&device);
PADDLE_ENFORCE_GE(
device, 0,
platform::errors::InvalidArgument(
"The cuda device ID should be greater than %d, but device ID is %d",
0, device));
cudaDeviceGetAttribute(&smem_per_block, cudaDevAttrMaxSharedMemoryPerBlock,
device);
smem_per_block_ = smem_per_block;
}
RoiAlignPluginDynamic::RoiAlignPluginDynamic(void const* data, size_t length) {
DeserializeValue(&data, &length, &data_type_);
DeserializeValue(&data, &length, &pooled_height_);
DeserializeValue(&data, &length, &pooled_width_);
DeserializeValue(&data, &length, &spatial_scale_);
DeserializeValue(&data, &length, &sampling_ratio_);
int smem_per_block = -1;
int device = -1;
cudaGetDevice(&device);
PADDLE_ENFORCE_GE(
device, 0,
platform::errors::InvalidArgument(
"The cuda device ID should be greater than %d, but device ID is %d",
0, device));
cudaDeviceGetAttribute(&smem_per_block, cudaDevAttrMaxSharedMemoryPerBlock,
device);
smem_per_block_ = smem_per_block;
}
nvinfer1::IPluginV2DynamicExt* RoiAlignPluginDynamic::clone() const {
auto* plugin =
new RoiAlignPluginDynamic(data_type_, pooled_height_, pooled_width_,
spatial_scale_, sampling_ratio_);
plugin->setPluginNamespace(namespace_.c_str());
return plugin;
}
nvinfer1::DimsExprs RoiAlignPluginDynamic::getOutputDimensions(
int outputIndex, const nvinfer1::DimsExprs* inputs, int nbInputs,
nvinfer1::IExprBuilder& exprBuilder) {
nvinfer1::DimsExprs ret{};
ret.nbDims = 4;
ret.d[0] = inputs[1].d[0]; // roi
ret.d[1] = inputs[0].d[1]; // X
ret.d[2] = exprBuilder.constant(pooled_height_);
ret.d[3] = exprBuilder.constant(pooled_width_);
return ret;
}
bool RoiAlignPluginDynamic::supportsFormatCombination(
int pos, const nvinfer1::PluginTensorDesc* inOut, int nbInputs,
int nbOutputs) {
if (inOut[pos].format != nvinfer1::TensorFormat::kLINEAR) {
return false;
}
if (pos < 2) { // input
return inOut[pos].type == nvinfer1::DataType::kFLOAT;
}
return inOut[pos].type == data_type_;
}
void RoiAlignPluginDynamic::configurePlugin(
const nvinfer1::DynamicPluginTensorDesc* in, int nbInputs,
const nvinfer1::DynamicPluginTensorDesc* out, int nbOutputs) {}
size_t RoiAlignPluginDynamic::getWorkspaceSize(
const nvinfer1::PluginTensorDesc* inputs, int nbInputs,
const nvinfer1::PluginTensorDesc* outputs, int nbOutputs) const {
return 0;
}
template <typename T, typename OutT>
int RoiAlignPluginDynamic::enqueue_impl(
const nvinfer1::PluginTensorDesc* inputDesc,
const nvinfer1::PluginTensorDesc* outputDesc, const void* const* inputs,
void* const* outputs, void* workspace, cudaStream_t stream) {
auto in_dims = inputDesc[0].dims;
auto rois_dims = inputDesc[1].dims;
auto out_dims = outputDesc[0].dims;
int rois_num = rois_dims.d[0];
if (rois_num == 0) return cudaGetLastError() != cudaSuccess;
int batch = in_dims.d[0];
int channels = in_dims.d[1];
int height = in_dims.d[2];
int width = in_dims.d[3];
int output_size =
out_dims.d[0] * out_dims.d[1] * out_dims.d[2] * out_dims.d[3];
const dim3 blocks(batch, channels);
const int threads = 512;
if (smem_per_block_ < width * height * sizeof(T)) {
GPUROIAlignOpt<T, OutT, false><<<blocks, threads, 0, stream>>>(
output_size, static_cast<const T*>(inputs[0]),
static_cast<const T*>(inputs[1]), spatial_scale_, channels, height,
width, pooled_height_, pooled_width_, sampling_ratio_, rois_num / batch,
static_cast<OutT*>(outputs[0]));
} else {
GPUROIAlignOpt<
T, OutT, true><<<blocks, threads, width * height * sizeof(T), stream>>>(
output_size, static_cast<const T*>(inputs[0]),
static_cast<const T*>(inputs[1]), spatial_scale_, channels, height,
width, pooled_height_, pooled_width_, sampling_ratio_, rois_num / batch,
static_cast<OutT*>(outputs[0]));
}
return cudaGetLastError() != cudaSuccess;
}
int RoiAlignPluginDynamic::enqueue(const nvinfer1::PluginTensorDesc* inputDesc,
const nvinfer1::PluginTensorDesc* outputDesc,
const void* const* inputs,
void* const* outputs, void* workspace,
cudaStream_t stream) {
PADDLE_ENFORCE_EQ(outputDesc[0].type, data_type_,
platform::errors::InvalidArgument(
"TRT RoiAlignPluginDynamic expects outputDesc[0].type "
"equal to data_type_"));
if (data_type_ == nvinfer1::DataType::kHALF) {
return enqueue_impl<float, half>(inputDesc, outputDesc, inputs, outputs,
workspace, stream);
}
return enqueue_impl<float, float>(inputDesc, outputDesc, inputs, outputs,
workspace, stream);
}
nvinfer1::DataType RoiAlignPluginDynamic::getOutputDataType(
int index, const nvinfer1::DataType* inputTypes, int nbInputs) const {
return data_type_;
}
const char* RoiAlignPluginDynamic::getPluginType() const {
return "roi_align_plugin_dynamic";
}
int RoiAlignPluginDynamic::getNbOutputs() const { return 1; }
int RoiAlignPluginDynamic::initialize() { return 0; }
void RoiAlignPluginDynamic::terminate() {}
size_t RoiAlignPluginDynamic::getSerializationSize() const {
size_t serialize_size = 0;
serialize_size += SerializedSize(data_type_);
serialize_size += SerializedSize(pooled_height_);
serialize_size += SerializedSize(pooled_width_);
serialize_size += SerializedSize(spatial_scale_);
serialize_size += SerializedSize(sampling_ratio_);
return serialize_size;
}
void RoiAlignPluginDynamic::serialize(void* buffer) const {
SerializeValue(&buffer, data_type_);
SerializeValue(&buffer, pooled_height_);
SerializeValue(&buffer, pooled_width_);
SerializeValue(&buffer, spatial_scale_);
SerializeValue(&buffer, sampling_ratio_);
}
void RoiAlignPluginDynamic::destroy() {}
RoiAlignPluginDynamicCreator::RoiAlignPluginDynamicCreator() {}
void RoiAlignPluginDynamicCreator::setPluginNamespace(
const char* lib_namespace) {
namespace_ = std::string(lib_namespace);
}
const char* RoiAlignPluginDynamicCreator::getPluginNamespace() const {
return namespace_.c_str();
}
const char* RoiAlignPluginDynamicCreator::getPluginName() const {
return "roi_align_plugin_dynamic";
}
const char* RoiAlignPluginDynamicCreator::getPluginVersion() const {
return "1";
}
const nvinfer1::PluginFieldCollection*
RoiAlignPluginDynamicCreator::getFieldNames() {
return &field_collection_;
}
nvinfer1::IPluginV2Ext* RoiAlignPluginDynamicCreator::createPlugin(
const char* name, const nvinfer1::PluginFieldCollection* fc) {
const nvinfer1::PluginField* fields = fc->fields;
}
nvinfer1::IPluginV2Ext* RoiAlignPluginDynamicCreator::deserializePlugin(
const char* name, const void* serial_data, size_t serial_length) {
auto plugin = new RoiAlignPluginDynamic(serial_data, serial_length);
plugin->setPluginNamespace(namespace_.c_str());
return plugin;
}
#endif
} // namespace plugin
} // namespace tensorrt
} // namespace inference
} // namespace paddle
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include <vector>
#include "paddle/fluid/inference/tensorrt/engine.h"
#include "paddle/fluid/inference/tensorrt/plugin/trt_plugin.h"
namespace paddle {
namespace inference {
namespace tensorrt {
namespace plugin {
#if IS_TRT_VERSION_GE(6000)
class RoiAlignPluginDynamic : public DynamicPluginTensorRT {
public:
explicit RoiAlignPluginDynamic(const nvinfer1::DataType data_type,
const int pooled_height,
const int pooled_width, float spatial_scale,
int sampling_ratio);
RoiAlignPluginDynamic(void const* data, size_t length);
~RoiAlignPluginDynamic() = default;
nvinfer1::IPluginV2DynamicExt* clone() const override;
nvinfer1::DimsExprs getOutputDimensions(
int outputIndex, const nvinfer1::DimsExprs* inputs, int nbInputs,
nvinfer1::IExprBuilder& exprBuilder) override;
bool supportsFormatCombination(int pos,
const nvinfer1::PluginTensorDesc* inOut,
int nbInputs, int nbOutputs) override;
void configurePlugin(const nvinfer1::DynamicPluginTensorDesc* in,
int nbInputs,
const nvinfer1::DynamicPluginTensorDesc* out,
int nbOutputs) override;
size_t getWorkspaceSize(const nvinfer1::PluginTensorDesc* inputs,
int nbInputs,
const nvinfer1::PluginTensorDesc* outputs,
int nbOutputs) const override;
int enqueue(const nvinfer1::PluginTensorDesc* inputDesc,
const nvinfer1::PluginTensorDesc* outputDesc,
const void* const* inputs, void* const* outputs, void* workspace,
cudaStream_t stream) override;
nvinfer1::DataType getOutputDataType(int index,
const nvinfer1::DataType* inputTypes,
int nbInputs) const override;
const char* getPluginType() const override;
int getNbOutputs() const override;
int initialize() override;
void terminate() override;
size_t getSerializationSize() const override;
void serialize(void* buffer) const override;
void destroy() override;
private:
template <typename T, typename OutT>
int enqueue_impl(const nvinfer1::PluginTensorDesc* inputDesc,
const nvinfer1::PluginTensorDesc* outputDesc,
const void* const* inputs, void* const* outputs,
void* workspace, cudaStream_t stream);
nvinfer1::DataType data_type_;
int pooled_height_;
int pooled_width_;
float spatial_scale_;
int sampling_ratio_;
int smem_per_block_;
std::string namespace_;
};
class RoiAlignPluginDynamicCreator : public nvinfer1::IPluginCreator {
public:
RoiAlignPluginDynamicCreator();
~RoiAlignPluginDynamicCreator() override = default;
void setPluginNamespace(const char* lib_namespace) override;
const char* getPluginNamespace() const override;
const char* getPluginName() const override;
const char* getPluginVersion() const override;
const nvinfer1::PluginFieldCollection* getFieldNames() override;
nvinfer1::IPluginV2Ext* createPlugin(
const char* name, const nvinfer1::PluginFieldCollection* fc) override;
nvinfer1::IPluginV2Ext* deserializePlugin(const char* name,
const void* serial_data,
size_t serial_length) override;
private:
std::string namespace_;
nvinfer1::PluginFieldCollection field_collection_;
};
REGISTER_TRT_PLUGIN_V2(RoiAlignPluginDynamicCreator);
#endif
} // namespace plugin
} // namespace tensorrt
} // namespace inference
} // namespace paddle
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import unittest
import numpy as np
from inference_pass_test import InferencePassTest
import paddle.fluid as fluid
import paddle.fluid.core as core
from paddle.fluid.core import PassVersionChecker
from paddle.fluid.core import AnalysisConfig
class TRTRoiAlignTest(InferencePassTest):
def setUp(self):
self.bs = 2
self.num_rois = 4
self.channel = 16
self.height = 32
self.width = 32
self.precision = AnalysisConfig.Precision.Float32
self.serialize = False
self.enable_trt = True
def build(self):
self.trt_parameters = TRTRoiAlignTest.TensorRTParam(
1 << 30, self.bs * self.num_rois, 1, self.precision, self.serialize,
False)
with fluid.program_guard(self.main_program, self.startup_program):
data_shape = [-1, self.channel, self.height, self.width]
data = fluid.data(name='data', shape=data_shape, dtype='float32')
rois = fluid.data(
name='rois', shape=[-1, 4], dtype='float32', lod_level=1)
roi_align_out = fluid.layers.roi_align(data, rois)
out = fluid.layers.batch_norm(roi_align_out, is_test=True)
rois_lod = fluid.create_lod_tensor(
np.random.random([self.bs * self.num_rois, 4]).astype('float32'),
[[self.num_rois, self.num_rois]], fluid.CPUPlace())
data_shape[0] = self.bs
self.feeds = {
'data': np.random.random(data_shape).astype('float32'),
'rois': rois_lod,
}
self.fetch_list = [out]
def check_output(self):
if core.is_compiled_with_cuda():
use_gpu = True
atol = 1e-5
if self.trt_parameters.precision == AnalysisConfig.Precision.Half:
atol = 1e-3
self.check_output_with_option(use_gpu, atol, flatten=True)
self.assertTrue(
PassVersionChecker.IsCompatible('tensorrt_subgraph_pass'))
def set_dynamic(self):
min_shape_spec = dict()
max_shape_spec = dict()
opt_shape_spec = dict()
min_shape_spec['data'] = [
self.bs, self.channel, self.height // 2, self.width // 2
]
min_shape_spec['rois'] = [1, 4]
max_shape_spec[
'data'] = [self.bs, self.channel, self.height * 2, self.width * 2]
max_shape_spec['rois'] = [self.bs * self.num_rois, 4]
opt_shape_spec[
'data'] = [self.bs, self.channel, self.height, self.width]
opt_shape_spec['rois'] = [self.bs * self.num_rois, 4]
self.dynamic_shape_params = InferencePassTest.DynamicShapeParam(
min_shape_spec, max_shape_spec, opt_shape_spec, False)
def run_test(self):
self.build()
self.check_output()
def test_base(self):
self.run_test()
def test_fp16(self):
self.precision = AnalysisConfig.Precision.Half
self.run_test()
def test_serialize(self):
self.serialize = True
self.run_test()
def test_dynamic(self):
self.set_dynamic()
self.run_test()
def test_dynamic_fp16(self):
self.set_dynamic()
self.precision = AnalysisConfig.Precision.Half
self.run_test()
def test_dynamic_serialize(self):
self.set_dynamic()
self.serialize = True
self.run_test()
if __name__ == "__main__":
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册