未验证 提交 b807e408 编写于 作者: Z zlsh80826 提交者: GitHub

[Paddle-TRT] add anchor generator op plugin (#31730)

* add anchor generator op plugin

* add anchor generator unit_test

* remove dbg info

* remove redundant line

* replace assertion with paddle enforce

* dynamic plugin replaces assertion with paddle enforce

* anchor generator support dynamic shape on spatial axis

* anchor generator test with fp16, dynamic shape

* add anchor generator test all

* add back main

* reduce test input size to not exceed the timelimit of ci

* change super to InferencePassTest for python2 compatibility

* reuse paddle operator anchor generator

* move creator construct to header with default

* add cuda ifdef

* reduce line

* change super to InferencePassTest for python2 compatibility

* fix anchor generator fp16 serialize setting

* split unittest from test_all

* restrict anchor generator input format before version 7234

* anchor generator only support greater than trt7.1

* change min_graph_size to 2

* min_graph size to 3 if dynamic shape

* reduce dynamic shape size to avoid trt search tactic too long to exceed time limit

* remove anchor from fetch list

* anchor generator support all trt version

* fix memory not allocated but if serialized
上级 4acc87be
...@@ -1192,6 +1192,7 @@ USE_TRT_CONVERTER(scale); ...@@ -1192,6 +1192,7 @@ USE_TRT_CONVERTER(scale);
USE_TRT_CONVERTER(stack); USE_TRT_CONVERTER(stack);
USE_TRT_CONVERTER(clip); USE_TRT_CONVERTER(clip);
USE_TRT_CONVERTER(gather); USE_TRT_CONVERTER(gather);
USE_TRT_CONVERTER(anchor_generator);
USE_TRT_CONVERTER(yolo_box); USE_TRT_CONVERTER(yolo_box);
USE_TRT_CONVERTER(roi_align); USE_TRT_CONVERTER(roi_align);
USE_TRT_CONVERTER(affine_channel); USE_TRT_CONVERTER(affine_channel);
......
...@@ -6,6 +6,7 @@ nv_library(tensorrt_converter ...@@ -6,6 +6,7 @@ nv_library(tensorrt_converter
shuffle_channel_op.cc swish_op.cc instance_norm_op.cc stack_op.cc transpose_op.cc flatten_op.cc shuffle_channel_op.cc swish_op.cc instance_norm_op.cc stack_op.cc transpose_op.cc flatten_op.cc
emb_eltwise_layernorm.cc skip_layernorm.cc scale_op.cc slice_op.cc hard_sigmoid_op.cc hard_swish_op.cc clip_op.cc emb_eltwise_layernorm.cc skip_layernorm.cc scale_op.cc slice_op.cc hard_sigmoid_op.cc hard_swish_op.cc clip_op.cc
gather_op.cc gather_op.cc
anchor_generator_op.cc
yolo_box_op.cc yolo_box_op.cc
roi_align_op.cc roi_align_op.cc
affine_channel_op.cc affine_channel_op.cc
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
#include "paddle/fluid/inference/tensorrt/plugin/anchor_generator_op_plugin.h"
namespace paddle {
namespace inference {
namespace tensorrt {
/* Anchor Generator Op */
class AnchorGeneratorOpConverter : public OpConverter {
public:
void operator()(const paddle::framework::proto::OpDesc& op,
const paddle::framework::Scope& scope,
bool test_mode) override {
VLOG(3) << "convert a fluid anchor generator op to tensorrt plugin";
framework::OpDesc op_desc(op, nullptr);
std::string input_name = op_desc.Input("Input").front();
std::string anchor_name = op_desc.Output("Anchors").front();
std::string variance_name = op_desc.Output("Variances").front();
auto* input = engine_->GetITensor(input_name);
const auto input_dims = input->getDimensions(); // C, H, W
std::vector<std::string> output_names{anchor_name, variance_name};
const auto anchor_sizes =
BOOST_GET_CONST(std::vector<float>, op_desc.GetAttr("anchor_sizes"));
const auto aspect_ratios =
BOOST_GET_CONST(std::vector<float>, op_desc.GetAttr("aspect_ratios"));
const auto stride =
BOOST_GET_CONST(std::vector<float>, op_desc.GetAttr("stride"));
const auto variances =
BOOST_GET_CONST(std::vector<float>, op_desc.GetAttr("variances"));
const auto offset = BOOST_GET_CONST(float, op_desc.GetAttr("offset"));
const int num_anchors = aspect_ratios.size() * anchor_sizes.size();
bool is_dynamic = engine_->with_dynamic_shape();
const auto height = input_dims.d[1];
const auto width = input_dims.d[2];
const int box_num = width * height * num_anchors;
const nvinfer1::DataType data_type = nvinfer1::DataType::kFLOAT;
nvinfer1::IPluginV2* anchor_generator_plugin = nullptr;
if (is_dynamic) {
anchor_generator_plugin = new plugin::AnchorGeneratorPluginDynamic(
data_type, anchor_sizes, aspect_ratios, stride, variances, offset,
num_anchors);
} else {
anchor_generator_plugin = new plugin::AnchorGeneratorPlugin(
data_type, anchor_sizes, aspect_ratios, stride, variances, offset,
height, width, num_anchors, box_num);
}
std::vector<nvinfer1::ITensor*> anchor_generator_inputs{input};
auto* anchor_generator_layer = engine_->network()->addPluginV2(
anchor_generator_inputs.data(), anchor_generator_inputs.size(),
*anchor_generator_plugin);
RreplenishLayerAndOutput(anchor_generator_layer, "anchor_generator",
output_names, test_mode);
}
};
} // namespace tensorrt
} // namespace inference
} // namespace paddle
REGISTER_TRT_OP_CONVERTER(anchor_generator, AnchorGeneratorOpConverter);
...@@ -116,6 +116,7 @@ struct SimpleOpTypeSetTeller : public Teller { ...@@ -116,6 +116,7 @@ struct SimpleOpTypeSetTeller : public Teller {
"affine_channel", "affine_channel",
"multiclass_nms", "multiclass_nms",
"nearest_interp", "nearest_interp",
"anchor_generator",
}; };
}; };
......
...@@ -5,6 +5,7 @@ nv_library(tensorrt_plugin ...@@ -5,6 +5,7 @@ nv_library(tensorrt_plugin
instance_norm_op_plugin.cu emb_eltwise_layernorm_plugin.cu instance_norm_op_plugin.cu emb_eltwise_layernorm_plugin.cu
qkv_to_context_plugin.cu skip_layernorm_op_plugin.cu slice_op_plugin.cu qkv_to_context_plugin.cu skip_layernorm_op_plugin.cu slice_op_plugin.cu
hard_swish_op_plugin.cu stack_op_plugin.cu special_slice_plugin.cu hard_swish_op_plugin.cu stack_op_plugin.cu special_slice_plugin.cu
anchor_generator_op_plugin.cu
yolo_box_op_plugin.cu yolo_box_op_plugin.cu
roi_align_op_plugin.cu roi_align_op_plugin.cu
DEPS enforce tensorrt_engine prelu tensor bert_encoder_functor) DEPS enforce tensorrt_engine prelu tensor bert_encoder_functor)
......
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <cuda_fp16.h>
#include <cuda_runtime.h>
#include <algorithm>
#include <cassert>
#include "paddle/fluid/inference/tensorrt/plugin/anchor_generator_op_plugin.h"
#include "paddle/fluid/inference/tensorrt/plugin/trt_plugin_factory.h"
#include "paddle/fluid/operators/detection/anchor_generator_op.h"
namespace paddle {
namespace inference {
namespace tensorrt {
namespace plugin {
#define PrepareParamsOnDevice() \
constexpr int data_size = 4; \
cudaMalloc(&anchor_sizes_device_, anchor_sizes_.size() * data_size); \
cudaMalloc(&aspect_ratios_device_, aspect_ratios_.size() * data_size); \
cudaMalloc(&stride_device_, stride_.size() * data_size); \
cudaMalloc(&variances_device_, variances_.size() * data_size); \
cudaMemcpy(anchor_sizes_device_, anchor_sizes_.data(), \
anchor_sizes_.size() * data_size, cudaMemcpyHostToDevice); \
cudaMemcpy(aspect_ratios_device_, aspect_ratios_.data(), \
aspect_ratios_.size() * data_size, cudaMemcpyHostToDevice); \
cudaMemcpy(stride_device_, stride_.data(), stride_.size() * data_size, \
cudaMemcpyHostToDevice); \
cudaMemcpy(variances_device_, variances_.data(), \
variances_.size() * data_size, cudaMemcpyHostToDevice);
AnchorGeneratorPlugin::AnchorGeneratorPlugin(
const nvinfer1::DataType data_type, const std::vector<float>& anchor_sizes,
const std::vector<float>& aspect_ratios, const std::vector<float>& stride,
const std::vector<float>& variances, const float offset, const int height,
const int width, const int num_anchors, const int box_num)
: data_type_(data_type),
anchor_sizes_(anchor_sizes),
aspect_ratios_(aspect_ratios),
stride_(stride),
variances_(variances),
offset_(offset),
height_(height),
width_(width),
num_anchors_(num_anchors),
box_num_(box_num) {
// anchors must be float32, which is the generator proposals' input
PADDLE_ENFORCE_EQ(data_type_, nvinfer1::DataType::kFLOAT,
platform::errors::InvalidArgument(
"TRT anchor generator plugin only accepts float32."));
PADDLE_ENFORCE_GE(height_, 0,
platform::errors::InvalidArgument(
"TRT anchor generator plugin only accepts height "
"greater than 0, but receive height = %d.",
height_));
PADDLE_ENFORCE_GE(width_, 0,
platform::errors::InvalidArgument(
"TRT anchor generator plugin only accepts width "
"greater than 0, but receive width = %d.",
width_));
PADDLE_ENFORCE_GE(
num_anchors_, 0,
platform::errors::InvalidArgument(
"TRT anchor generator plugin only accepts number of anchors greater "
"than 0, but receive number of anchors = %d.",
num_anchors_));
PADDLE_ENFORCE_GE(box_num_, 0,
platform::errors::InvalidArgument(
"TRT anchor generator plugin only accepts box_num "
"greater than 0, but receive box_num = %d.",
box_num_));
PrepareParamsOnDevice();
}
AnchorGeneratorPlugin::~AnchorGeneratorPlugin() {
auto release_device_ptr = [](void* ptr) {
if (ptr) {
cudaFree(ptr);
ptr = nullptr;
}
};
release_device_ptr(anchor_sizes_device_);
release_device_ptr(aspect_ratios_device_);
release_device_ptr(stride_device_);
release_device_ptr(variances_device_);
}
AnchorGeneratorPlugin::AnchorGeneratorPlugin(const void* data, size_t length) {
DeserializeValue(&data, &length, &data_type_);
DeserializeValue(&data, &length, &anchor_sizes_);
DeserializeValue(&data, &length, &aspect_ratios_);
DeserializeValue(&data, &length, &stride_);
DeserializeValue(&data, &length, &variances_);
DeserializeValue(&data, &length, &offset_);
DeserializeValue(&data, &length, &height_);
DeserializeValue(&data, &length, &width_);
DeserializeValue(&data, &length, &num_anchors_);
DeserializeValue(&data, &length, &box_num_);
PrepareParamsOnDevice();
}
const char* AnchorGeneratorPlugin::getPluginType() const {
return "anchor_generator_plugin";
}
const char* AnchorGeneratorPlugin::getPluginVersion() const { return "1"; }
int AnchorGeneratorPlugin::getNbOutputs() const { return 2; }
nvinfer1::Dims AnchorGeneratorPlugin::getOutputDimensions(
int index, const nvinfer1::Dims* inputs, int nb_input_dims) {
nvinfer1::Dims dims{};
dims.nbDims = 4;
dims.d[0] = height_;
dims.d[1] = width_;
dims.d[2] = num_anchors_;
dims.d[3] = 4;
return dims;
}
bool AnchorGeneratorPlugin::supportsFormat(
nvinfer1::DataType type, nvinfer1::TensorFormat format) const {
// static shape plugin can't support different type between input/out
// it may cause addition overhead in half mode
return (type == data_type_ && format == nvinfer1::TensorFormat::kLINEAR);
}
size_t AnchorGeneratorPlugin::getWorkspaceSize(int max_batch_size) const {
return 0;
}
template <typename T>
int AnchorGeneratorPlugin::enqueue_impl(int batch_size,
const void* const* inputs,
void** outputs, void* workspace,
cudaStream_t stream) {
const int block = 512;
const int gen_anchor_grid = (box_num_ + block - 1) / block;
T* anchors = static_cast<T*>(outputs[0]);
T* vars = static_cast<T*>(outputs[1]);
const T* anchor_sizes_device = static_cast<const T*>(anchor_sizes_device_);
const T* aspect_ratios_device = static_cast<const T*>(aspect_ratios_device_);
const T* stride_device = static_cast<const T*>(stride_device_);
const T* variances_device = static_cast<const T*>(variances_device_);
paddle::operators::GenAnchors<T><<<gen_anchor_grid, block, 0, stream>>>(
anchors, aspect_ratios_device, aspect_ratios_.size(), anchor_sizes_device,
anchor_sizes_.size(), stride_device, stride_.size(), height_, width_,
offset_);
const int var_grid = (box_num_ * 4 + block - 1) / block;
paddle::operators::SetVariance<T><<<var_grid, block, 0, stream>>>(
vars, variances_device, variances_.size(), box_num_ * 4);
return cudaGetLastError() != cudaSuccess;
}
int AnchorGeneratorPlugin::enqueue(int batch_size, const void* const* inputs,
void** outputs, void* workspace,
cudaStream_t stream) {
return enqueue_impl<float>(batch_size, inputs, outputs, workspace, stream);
}
int AnchorGeneratorPlugin::initialize() { return 0; }
void AnchorGeneratorPlugin::terminate() {}
size_t AnchorGeneratorPlugin::getSerializationSize() const {
size_t serialize_size = 0;
serialize_size += SerializedSize(data_type_);
serialize_size += SerializedSize(anchor_sizes_);
serialize_size += SerializedSize(aspect_ratios_);
serialize_size += SerializedSize(stride_);
serialize_size += SerializedSize(variances_);
serialize_size += SerializedSize(offset_);
serialize_size += SerializedSize(height_);
serialize_size += SerializedSize(width_);
serialize_size += SerializedSize(num_anchors_);
serialize_size += SerializedSize(box_num_);
return serialize_size;
}
void AnchorGeneratorPlugin::serialize(void* buffer) const {
SerializeValue(&buffer, data_type_);
SerializeValue(&buffer, anchor_sizes_);
SerializeValue(&buffer, aspect_ratios_);
SerializeValue(&buffer, stride_);
SerializeValue(&buffer, variances_);
SerializeValue(&buffer, offset_);
SerializeValue(&buffer, height_);
SerializeValue(&buffer, width_);
SerializeValue(&buffer, num_anchors_);
SerializeValue(&buffer, box_num_);
}
void AnchorGeneratorPlugin::destroy() {}
void AnchorGeneratorPlugin::setPluginNamespace(const char* lib_namespace) {
namespace_ = std::string(lib_namespace);
}
const char* AnchorGeneratorPlugin::getPluginNamespace() const {
return namespace_.c_str();
}
nvinfer1::DataType AnchorGeneratorPlugin::getOutputDataType(
int index, const nvinfer1::DataType* input_type, int nb_inputs) const {
return data_type_;
}
bool AnchorGeneratorPlugin::isOutputBroadcastAcrossBatch(
int output_index, const bool* input_is_broadcast, int nb_inputs) const {
return true;
}
bool AnchorGeneratorPlugin::canBroadcastInputAcrossBatch(
int input_index) const {
return false;
}
void AnchorGeneratorPlugin::configurePlugin(
const nvinfer1::Dims* input_dims, int nb_inputs,
const nvinfer1::Dims* output_dims, int nb_outputs,
const nvinfer1::DataType* input_types,
const nvinfer1::DataType* output_types, const bool* input_is_broadcast,
const bool* output_is_broadcast, nvinfer1::PluginFormat float_format,
int max_batct_size) {}
nvinfer1::IPluginV2Ext* AnchorGeneratorPlugin::clone() const {
auto plugin = new AnchorGeneratorPlugin(
data_type_, anchor_sizes_, aspect_ratios_, stride_, variances_, offset_,
height_, width_, num_anchors_, box_num_);
plugin->setPluginNamespace(namespace_.c_str());
return plugin;
}
void AnchorGeneratorPluginCreator::setPluginNamespace(
const char* lib_namespace) {
namespace_ = std::string(lib_namespace);
}
const char* AnchorGeneratorPluginCreator::getPluginNamespace() const {
return namespace_.c_str();
}
const char* AnchorGeneratorPluginCreator::getPluginName() const {
return "anchor_generator_plugin";
}
const char* AnchorGeneratorPluginCreator::getPluginVersion() const {
return "1";
}
const nvinfer1::PluginFieldCollection*
AnchorGeneratorPluginCreator::getFieldNames() {
return &field_collection_;
}
nvinfer1::IPluginV2Ext* AnchorGeneratorPluginCreator::createPlugin(
const char* name, const nvinfer1::PluginFieldCollection* fc) {
const nvinfer1::PluginField* fields = fc->fields;
int type_id = -1;
std::vector<float> anchor_sizes, aspect_ratios, stride, variances;
float offset = .5;
int height = -1, width = -1;
int num_anchors = -1;
int box_num = -1;
for (int i = 0; i < fc->nbFields; ++i) {
const std::string field_name(fc->fields[i].name);
const auto length = fc->fields[i].length;
if (field_name.compare("type_id") == 0) {
type_id = *static_cast<const int*>(fc->fields[i].data);
} else if (field_name.compare("anchor_sizes")) {
const auto* data = static_cast<const float*>(fc->fields[i].data);
anchor_sizes.insert(anchor_sizes.end(), data, data + length);
} else if (field_name.compare("aspect_ratios")) {
const auto* data = static_cast<const float*>(fc->fields[i].data);
aspect_ratios.insert(aspect_ratios.end(), data, data + length);
} else if (field_name.compare("stride")) {
const auto* data = static_cast<const float*>(fc->fields[i].data);
stride.insert(stride.end(), data, data + length);
} else if (field_name.compare("variances")) {
const auto* data = static_cast<const float*>(fc->fields[i].data);
variances.insert(variances.end(), data, data + length);
} else if (field_name.compare("offset")) {
offset = *static_cast<const float*>(fc->fields[i].data);
} else if (field_name.compare("height")) {
height = *static_cast<const int*>(fc->fields[i].data);
} else if (field_name.compare("width")) {
width = *static_cast<const int*>(fc->fields[i].data);
} else if (field_name.compare("num_anchors")) {
num_anchors = *static_cast<const int*>(fc->fields[i].data);
} else if (field_name.compare("box_num")) {
box_num = *static_cast<const int*>(fc->fields[i].data);
} else {
assert(false && "unknown plugin field name.");
}
}
return new AnchorGeneratorPlugin(nvinfer1::DataType::kFLOAT, anchor_sizes,
aspect_ratios, stride, variances, offset,
height, width, num_anchors, box_num);
}
nvinfer1::IPluginV2Ext* AnchorGeneratorPluginCreator::deserializePlugin(
const char* name, const void* serial_data, size_t serial_length) {
auto plugin = new AnchorGeneratorPlugin(serial_data, serial_length);
plugin->setPluginNamespace(namespace_.c_str());
return plugin;
}
#if IS_TRT_VERSION_GE(6000)
AnchorGeneratorPluginDynamic::AnchorGeneratorPluginDynamic(
const nvinfer1::DataType data_type, const std::vector<float>& anchor_sizes,
const std::vector<float>& aspect_ratios, const std::vector<float>& stride,
const std::vector<float>& variances, const float offset,
const int num_anchors)
: data_type_(data_type),
anchor_sizes_(anchor_sizes),
aspect_ratios_(aspect_ratios),
stride_(stride),
variances_(variances),
offset_(offset),
num_anchors_(num_anchors) {
// data_type_ is used to determine the output data type
// data_type_ can only be float32
// height, width, num_anchors are calculated at configurePlugin
PADDLE_ENFORCE_EQ(data_type_, nvinfer1::DataType::kFLOAT,
platform::errors::InvalidArgument(
"TRT anchor generator plugin only accepts float32."));
PADDLE_ENFORCE_GE(
num_anchors_, 0,
platform::errors::InvalidArgument(
"TRT anchor generator plugin only accepts number of anchors greater "
"than 0, but receive number of anchors = %d.",
num_anchors_));
PrepareParamsOnDevice();
}
AnchorGeneratorPluginDynamic::~AnchorGeneratorPluginDynamic() {
auto release_device_ptr = [](void* ptr) {
if (ptr) {
cudaFree(ptr);
ptr = nullptr;
}
};
release_device_ptr(anchor_sizes_device_);
release_device_ptr(aspect_ratios_device_);
release_device_ptr(stride_device_);
release_device_ptr(variances_device_);
}
AnchorGeneratorPluginDynamic::AnchorGeneratorPluginDynamic(void const* data,
size_t length) {
DeserializeValue(&data, &length, &data_type_);
DeserializeValue(&data, &length, &anchor_sizes_);
DeserializeValue(&data, &length, &aspect_ratios_);
DeserializeValue(&data, &length, &stride_);
DeserializeValue(&data, &length, &variances_);
DeserializeValue(&data, &length, &offset_);
DeserializeValue(&data, &length, &num_anchors_);
PrepareParamsOnDevice();
}
nvinfer1::IPluginV2DynamicExt* AnchorGeneratorPluginDynamic::clone() const {
auto plugin = new AnchorGeneratorPluginDynamic(
data_type_, anchor_sizes_, aspect_ratios_, stride_, variances_, offset_,
num_anchors_);
plugin->setPluginNamespace(namespace_.c_str());
return plugin;
}
nvinfer1::DimsExprs AnchorGeneratorPluginDynamic::getOutputDimensions(
int outputIndex, const nvinfer1::DimsExprs* inputs, int nbInputs,
nvinfer1::IExprBuilder& exprBuilder) {
nvinfer1::DimsExprs ret{};
ret.nbDims = 4;
ret.d[0] = inputs[0].d[2]; // feature height
ret.d[1] = inputs[0].d[3]; // feature width
ret.d[2] = exprBuilder.constant(num_anchors_);
ret.d[3] = exprBuilder.constant(4);
return ret;
}
bool AnchorGeneratorPluginDynamic::supportsFormatCombination(
int pos, const nvinfer1::PluginTensorDesc* inOut, int nbInputs,
int nbOutputs) {
// input can be any, doesn't matter
// anchor generator doesn't read input raw data, only need the shape info
auto type = inOut[pos].type;
auto format = inOut[pos].format;
#if IS_TRT_VERSION_GE(7234)
if (pos == 0) return true;
#else
if (pos == 0) return format == nvinfer1::TensorFormat::kLINEAR;
#endif
return (type == nvinfer1::DataType::kFLOAT &&
format == nvinfer1::TensorFormat::kLINEAR);
}
void AnchorGeneratorPluginDynamic::configurePlugin(
const nvinfer1::DynamicPluginTensorDesc* in, int nbInputs,
const nvinfer1::DynamicPluginTensorDesc* out, int nbOutputs) {}
size_t AnchorGeneratorPluginDynamic::getWorkspaceSize(
const nvinfer1::PluginTensorDesc* inputs, int nbInputs,
const nvinfer1::PluginTensorDesc* outputs, int nbOutputs) const {
return 0;
}
template <typename T>
int AnchorGeneratorPluginDynamic::enqueue_impl(
const nvinfer1::PluginTensorDesc* inputDesc,
const nvinfer1::PluginTensorDesc* outputDesc, const void* const* inputs,
void* const* outputs, void* workspace, cudaStream_t stream) {
const int height = inputDesc[0].dims.d[2];
const int width = inputDesc[0].dims.d[3];
const int box_num = height * width * num_anchors_;
const int block = 512;
const int gen_anchor_grid = (box_num + block - 1) / block;
T* anchors = static_cast<T*>(outputs[0]);
T* vars = static_cast<T*>(outputs[1]);
const T* anchor_sizes_device = static_cast<const T*>(anchor_sizes_device_);
const T* aspect_ratios_device = static_cast<const T*>(aspect_ratios_device_);
const T* stride_device = static_cast<const T*>(stride_device_);
const T* variances_device = static_cast<const T*>(variances_device_);
paddle::operators::GenAnchors<T><<<gen_anchor_grid, block, 0, stream>>>(
anchors, aspect_ratios_device, aspect_ratios_.size(), anchor_sizes_device,
anchor_sizes_.size(), stride_device, stride_.size(), height, width,
offset_);
const int var_grid = (box_num * 4 + block - 1) / block;
paddle::operators::SetVariance<T><<<var_grid, block, 0, stream>>>(
vars, variances_device, variances_.size(), box_num * 4);
return cudaGetLastError() != cudaSuccess;
}
int AnchorGeneratorPluginDynamic::enqueue(
const nvinfer1::PluginTensorDesc* inputDesc,
const nvinfer1::PluginTensorDesc* outputDesc, const void* const* inputs,
void* const* outputs, void* workspace, cudaStream_t stream) {
assert(outputDesc[0].type == nvinfer1::DataType::kFLOAT);
assert(outputDesc[1].type == nvinfer1::DataType::kFLOAT);
return enqueue_impl<float>(inputDesc, outputDesc, inputs, outputs, workspace,
stream);
}
nvinfer1::DataType AnchorGeneratorPluginDynamic::getOutputDataType(
int index, const nvinfer1::DataType* inputTypes, int nbInputs) const {
return data_type_;
}
const char* AnchorGeneratorPluginDynamic::getPluginType() const {
return "anchor_generator_plugin_dynamic";
}
int AnchorGeneratorPluginDynamic::getNbOutputs() const { return 2; }
int AnchorGeneratorPluginDynamic::initialize() { return 0; }
void AnchorGeneratorPluginDynamic::terminate() {}
size_t AnchorGeneratorPluginDynamic::getSerializationSize() const {
size_t serialize_size = 0;
serialize_size += SerializedSize(data_type_);
serialize_size += SerializedSize(anchor_sizes_);
serialize_size += SerializedSize(aspect_ratios_);
serialize_size += SerializedSize(stride_);
serialize_size += SerializedSize(variances_);
serialize_size += SerializedSize(offset_);
serialize_size += SerializedSize(num_anchors_);
return serialize_size;
}
void AnchorGeneratorPluginDynamic::serialize(void* buffer) const {
SerializeValue(&buffer, data_type_);
SerializeValue(&buffer, anchor_sizes_);
SerializeValue(&buffer, aspect_ratios_);
SerializeValue(&buffer, stride_);
SerializeValue(&buffer, variances_);
SerializeValue(&buffer, offset_);
SerializeValue(&buffer, num_anchors_);
}
void AnchorGeneratorPluginDynamic::destroy() {}
void AnchorGeneratorPluginDynamicCreator::setPluginNamespace(
const char* lib_namespace) {
namespace_ = std::string(lib_namespace);
}
const char* AnchorGeneratorPluginDynamicCreator::getPluginNamespace() const {
return namespace_.c_str();
}
const char* AnchorGeneratorPluginDynamicCreator::getPluginName() const {
return "anchor_generator_plugin_dynamic";
}
const char* AnchorGeneratorPluginDynamicCreator::getPluginVersion() const {
return "1";
}
const nvinfer1::PluginFieldCollection*
AnchorGeneratorPluginDynamicCreator::getFieldNames() {
return &field_collection_;
}
nvinfer1::IPluginV2Ext* AnchorGeneratorPluginDynamicCreator::createPlugin(
const char* name, const nvinfer1::PluginFieldCollection* fc) {
const nvinfer1::PluginField* fields = fc->fields;
int type_id = -1;
std::vector<float> anchor_sizes, aspect_ratios, stride, variances;
float offset = .5;
int num_anchors = -1;
for (int i = 0; i < fc->nbFields; ++i) {
const std::string field_name(fc->fields[i].name);
const auto length = fc->fields[i].length;
if (field_name.compare("type_id") == 0) {
type_id = *static_cast<const int*>(fc->fields[i].data);
} else if (field_name.compare("anchor_sizes")) {
const auto* data = static_cast<const float*>(fc->fields[i].data);
anchor_sizes.insert(anchor_sizes.end(), data, data + length);
} else if (field_name.compare("aspect_ratios")) {
const auto* data = static_cast<const float*>(fc->fields[i].data);
aspect_ratios.insert(aspect_ratios.end(), data, data + length);
} else if (field_name.compare("stride")) {
const auto* data = static_cast<const float*>(fc->fields[i].data);
stride.insert(stride.end(), data, data + length);
} else if (field_name.compare("variances")) {
const auto* data = static_cast<const float*>(fc->fields[i].data);
variances.insert(variances.end(), data, data + length);
} else if (field_name.compare("offset")) {
offset = *static_cast<const float*>(fc->fields[i].data);
} else if (field_name.compare("num_anchors")) {
num_anchors = *static_cast<const int*>(fc->fields[i].data);
} else {
assert(false && "unknown plugin field name.");
}
}
return new AnchorGeneratorPluginDynamic(nvinfer1::DataType::kFLOAT,
anchor_sizes, aspect_ratios, stride,
variances, offset, num_anchors);
}
nvinfer1::IPluginV2Ext* AnchorGeneratorPluginDynamicCreator::deserializePlugin(
const char* name, const void* serial_data, size_t serial_length) {
auto plugin = new AnchorGeneratorPluginDynamic(serial_data, serial_length);
plugin->setPluginNamespace(namespace_.c_str());
return plugin;
}
#endif
} // namespace plugin
} // namespace tensorrt
} // namespace inference
} // namespace paddle
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include <vector>
#include "paddle/fluid/inference/tensorrt/engine.h"
#include "paddle/fluid/inference/tensorrt/plugin/trt_plugin.h"
namespace paddle {
namespace inference {
namespace tensorrt {
namespace plugin {
class AnchorGeneratorPlugin : public nvinfer1::IPluginV2Ext {
public:
explicit AnchorGeneratorPlugin(
const nvinfer1::DataType, const std::vector<float>& anchor_sizes,
const std::vector<float>& aspect_ratios, const std::vector<float>& stride,
const std::vector<float>& variances, const float offset, const int height,
const int width, const int num_anchors, const int box_num);
AnchorGeneratorPlugin(const void* data, size_t length);
~AnchorGeneratorPlugin() override;
const char* getPluginType() const override;
const char* getPluginVersion() const override;
int getNbOutputs() const override;
nvinfer1::Dims getOutputDimensions(int index, const nvinfer1::Dims* inputs,
int nb_input_dims) override;
bool supportsFormat(nvinfer1::DataType type,
nvinfer1::TensorFormat format) const override;
size_t getWorkspaceSize(int max_batch_size) const override;
int enqueue(int batch_size, const void* const* inputs, void** outputs,
void* workspace, cudaStream_t stream) override;
int initialize() override;
void terminate() override;
size_t getSerializationSize() const override;
void serialize(void* buffer) const override;
void destroy() override;
void setPluginNamespace(const char* lib_namespace) override;
const char* getPluginNamespace() const override;
nvinfer1::DataType getOutputDataType(int index,
const nvinfer1::DataType* input_type,
int nb_inputs) const override;
bool isOutputBroadcastAcrossBatch(int output_index,
const bool* input_is_broadcast,
int nb_inputs) const override;
bool canBroadcastInputAcrossBatch(int input_index) const override;
void configurePlugin(const nvinfer1::Dims* input_dims, int nb_inputs,
const nvinfer1::Dims* output_dims, int nb_outputs,
const nvinfer1::DataType* input_types,
const nvinfer1::DataType* output_types,
const bool* input_is_broadcast,
const bool* output_is_broadcast,
nvinfer1::PluginFormat float_format,
int max_batct_size) override;
nvinfer1::IPluginV2Ext* clone() const override;
private:
template <typename T>
int enqueue_impl(int batch_size, const void* const* inputs, void** outputs,
void* workspace, cudaStream_t stream);
nvinfer1::DataType data_type_;
std::vector<float> anchor_sizes_;
std::vector<float> aspect_ratios_;
std::vector<float> stride_;
std::vector<float> variances_;
float offset_;
void* anchor_sizes_device_;
void* aspect_ratios_device_;
void* stride_device_;
void* variances_device_;
int height_;
int width_;
int num_anchors_;
int box_num_;
std::string namespace_;
};
class AnchorGeneratorPluginCreator : public nvinfer1::IPluginCreator {
public:
AnchorGeneratorPluginCreator() = default;
~AnchorGeneratorPluginCreator() override = default;
void setPluginNamespace(const char* lib_namespace) override;
const char* getPluginNamespace() const override;
const char* getPluginName() const override;
const char* getPluginVersion() const override;
const nvinfer1::PluginFieldCollection* getFieldNames() override;
nvinfer1::IPluginV2Ext* createPlugin(
const char* name, const nvinfer1::PluginFieldCollection* fc) override;
nvinfer1::IPluginV2Ext* deserializePlugin(const char* name,
const void* serial_data,
size_t serial_length) override;
private:
std::string namespace_;
nvinfer1::PluginFieldCollection field_collection_;
};
REGISTER_TRT_PLUGIN_V2(AnchorGeneratorPluginCreator);
#if IS_TRT_VERSION_GE(6000)
class AnchorGeneratorPluginDynamic : public DynamicPluginTensorRT {
public:
explicit AnchorGeneratorPluginDynamic(const nvinfer1::DataType data_type,
const std::vector<float>& anchor_sizes,
const std::vector<float>& aspect_ratios,
const std::vector<float>& stride,
const std::vector<float>& variances,
const float offset,
const int num_anchors);
AnchorGeneratorPluginDynamic(void const* data, size_t length);
~AnchorGeneratorPluginDynamic();
nvinfer1::IPluginV2DynamicExt* clone() const override;
nvinfer1::DimsExprs getOutputDimensions(
int outputIndex, const nvinfer1::DimsExprs* inputs, int nbInputs,
nvinfer1::IExprBuilder& exprBuilder) override;
bool supportsFormatCombination(int pos,
const nvinfer1::PluginTensorDesc* inOut,
int nbInputs, int nbOutputs) override;
void configurePlugin(const nvinfer1::DynamicPluginTensorDesc* in,
int nbInputs,
const nvinfer1::DynamicPluginTensorDesc* out,
int nbOutputs) override;
size_t getWorkspaceSize(const nvinfer1::PluginTensorDesc* inputs,
int nbInputs,
const nvinfer1::PluginTensorDesc* outputs,
int nbOutputs) const override;
int enqueue(const nvinfer1::PluginTensorDesc* inputDesc,
const nvinfer1::PluginTensorDesc* outputDesc,
const void* const* inputs, void* const* outputs, void* workspace,
cudaStream_t stream) override;
nvinfer1::DataType getOutputDataType(int index,
const nvinfer1::DataType* inputTypes,
int nbInputs) const override;
const char* getPluginType() const override;
int getNbOutputs() const override;
int initialize() override;
void terminate() override;
size_t getSerializationSize() const override;
void serialize(void* buffer) const override;
void destroy() override;
private:
template <typename T>
int enqueue_impl(const nvinfer1::PluginTensorDesc* inputDesc,
const nvinfer1::PluginTensorDesc* outputDesc,
const void* const* inputs, void* const* outputs,
void* workspace, cudaStream_t stream);
nvinfer1::DataType data_type_;
std::vector<float> anchor_sizes_;
std::vector<float> aspect_ratios_;
std::vector<float> stride_;
std::vector<float> variances_;
float offset_;
void* anchor_sizes_device_;
void* aspect_ratios_device_;
void* stride_device_;
void* variances_device_;
int num_anchors_;
std::string namespace_;
};
class AnchorGeneratorPluginDynamicCreator : public nvinfer1::IPluginCreator {
public:
AnchorGeneratorPluginDynamicCreator() = default;
~AnchorGeneratorPluginDynamicCreator() override = default;
void setPluginNamespace(const char* lib_namespace) override;
const char* getPluginNamespace() const override;
const char* getPluginName() const override;
const char* getPluginVersion() const override;
const nvinfer1::PluginFieldCollection* getFieldNames() override;
nvinfer1::IPluginV2Ext* createPlugin(
const char* name, const nvinfer1::PluginFieldCollection* fc) override;
nvinfer1::IPluginV2Ext* deserializePlugin(const char* name,
const void* serial_data,
size_t serial_length) override;
private:
std::string namespace_;
nvinfer1::PluginFieldCollection field_collection_;
};
REGISTER_TRT_PLUGIN_V2(AnchorGeneratorPluginDynamicCreator);
#endif
} // namespace plugin
} // namespace tensorrt
} // namespace inference
} // namespace paddle
...@@ -49,14 +49,11 @@ __global__ void GenAnchors(T* out, const T* aspect_ratios, const int ar_num, ...@@ -49,14 +49,11 @@ __global__ void GenAnchors(T* out, const T* aspect_ratios, const int ar_num,
anchor_width = scale_w * base_w; anchor_width = scale_w * base_w;
anchor_height = scale_h * base_h; anchor_height = scale_h * base_h;
T xmin = (x_ctr - 0.5 * (anchor_width - 1)); T xmin = (x_ctr - .5f * (anchor_width - 1));
T ymin = (y_ctr - 0.5 * (anchor_height - 1)); T ymin = (y_ctr - .5f * (anchor_height - 1));
T xmax = (x_ctr + 0.5 * (anchor_width - 1)); T xmax = (x_ctr + .5f * (anchor_width - 1));
T ymax = (y_ctr + 0.5 * (anchor_height - 1)); T ymax = (y_ctr + .5f * (anchor_height - 1));
out[i * 4] = xmin; reinterpret_cast<float4*>(out)[i] = make_float4(xmin, ymin, xmax, ymax);
out[i * 4 + 1] = ymin;
out[i * 4 + 2] = xmax;
out[i * 4 + 3] = ymax;
} }
} }
......
...@@ -22,6 +22,19 @@ limitations under the License. */ ...@@ -22,6 +22,19 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
#ifdef PADDLE_WITH_CUDA
template <typename T>
extern __global__ void GenAnchors(T* out, const T* aspect_ratios,
const int ar_num, const T* anchor_sizes,
const int as_num, const T* stride,
const int sd_num, const int height,
const int width, const T offset);
template <typename T>
extern __global__ void SetVariance(T* out, const T* var, const int vnum,
const int num);
#endif
template <typename T> template <typename T>
class AnchorGeneratorOpKernel : public framework::OpKernel<T> { class AnchorGeneratorOpKernel : public framework::OpKernel<T> {
public: public:
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import unittest
import itertools
import numpy as np
from inference_pass_test import InferencePassTest
import paddle.fluid as fluid
import paddle.fluid.core as core
from paddle.fluid.core import PassVersionChecker
from paddle.fluid.core import AnalysisConfig
class TRTAnchorGeneratorBaseTest(InferencePassTest):
def setUp(self):
self.bs = 1
self.channel = 16
self.height = 32
self.width = 32
self.anchor_sizes = [64., 128., 256., 512.]
self.aspect_ratios = [.5, 1., 2.]
self.variance = [.1, .1, .2, .2]
self.stride = [8., 8.]
self.precision = AnalysisConfig.Precision.Float32
self.serialize = False
self.enable_trt = True
self.feeds = {
'data':
np.random.random([self.bs, self.channel, self.height,
self.width]).astype('float32'),
}
def build(self):
min_graph_size = 3 if self.dynamic_shape_params is not None else 2
self.trt_parameters = InferencePassTest.TensorRTParam(
1 << 30, self.bs, min_graph_size, self.precision, self.serialize,
False)
with fluid.program_guard(self.main_program, self.startup_program):
data = fluid.data(
name='data',
shape=[-1, self.channel, self.height, self.width],
dtype='float32')
anchor, var = fluid.layers.detection.anchor_generator(
data,
anchor_sizes=self.anchor_sizes,
aspect_ratios=self.aspect_ratios,
variance=self.variance,
stride=self.stride)
if self.dynamic_shape_params is not None:
anchor = fluid.layers.transpose(anchor, [2, 3, 0, 1])
out = fluid.layers.batch_norm(anchor, is_test=True)
self.fetch_list = [out, var]
def run_test(self):
self.build()
self.check_output()
def set_dynamic(self):
self.dynamic_shape_params = InferencePassTest.DynamicShapeParam({
'data': [self.bs, self.channel, self.height // 2, self.width // 2]
}, {
'data': [self.bs, self.channel, self.height, self.width]
}, {'data': [self.bs, self.channel, self.height, self.width]}, False)
def test_base(self):
self.run_test()
def test_fp16(self):
self.precision = AnalysisConfig.Precision.Half
self.run_test()
def test_serialize(self):
self.serialize = True
self.run_test()
def test_dynamic(self):
self.set_dynamic()
self.run_test()
def test_dynamic_fp16(self):
self.precision = AnalysisConfig.Precision.Half
self.set_dynamic()
self.run_test()
def test_dynamic_serialize(self):
self.serialize = True
self.set_dynamic()
self.run_test()
def test_dynamic_fp16_serialize(self):
self.serialize = True
self.precision = AnalysisConfig.Precision.Half
self.set_dynamic()
self.run_test()
def check_output(self):
if core.is_compiled_with_cuda():
use_gpu = True
atol = 1e-5
if self.trt_parameters.precision == AnalysisConfig.Precision.Half:
atol = 1e-3
self.check_output_with_option(use_gpu, atol, flatten=True)
self.assertTrue(
PassVersionChecker.IsCompatible('tensorrt_subgraph_pass'))
if __name__ == "__main__":
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册