From b807e4081ec8231ada96c1fd96aa0a93ebf9651d Mon Sep 17 00:00:00 2001 From: zlsh80826 Date: Thu, 1 Apr 2021 12:05:26 +0800 Subject: [PATCH] [Paddle-TRT] add anchor generator op plugin (#31730) * add anchor generator op plugin * add anchor generator unit_test * remove dbg info * remove redundant line * replace assertion with paddle enforce * dynamic plugin replaces assertion with paddle enforce * anchor generator support dynamic shape on spatial axis * anchor generator test with fp16, dynamic shape * add anchor generator test all * add back main * reduce test input size to not exceed the timelimit of ci * change super to InferencePassTest for python2 compatibility * reuse paddle operator anchor generator * move creator construct to header with default * add cuda ifdef * reduce line * change super to InferencePassTest for python2 compatibility * fix anchor generator fp16 serialize setting * split unittest from test_all * restrict anchor generator input format before version 7234 * anchor generator only support greater than trt7.1 * change min_graph_size to 2 * min_graph size to 3 if dynamic shape * reduce dynamic shape size to avoid trt search tactic too long to exceed time limit * remove anchor from fetch list * anchor generator support all trt version * fix memory not allocated but if serialized --- .../fluid/inference/api/analysis_predictor.cc | 1 + .../inference/tensorrt/convert/CMakeLists.txt | 1 + .../tensorrt/convert/anchor_generator_op.cc | 79 +++ paddle/fluid/inference/tensorrt/op_teller.cc | 1 + .../inference/tensorrt/plugin/CMakeLists.txt | 1 + .../plugin/anchor_generator_op_plugin.cu | 566 ++++++++++++++++++ .../plugin/anchor_generator_op_plugin.h | 201 +++++++ .../detection/anchor_generator_op.cu | 13 +- .../operators/detection/anchor_generator_op.h | 13 + .../inference/test_trt_anchor_generator_op.py | 122 ++++ 10 files changed, 990 insertions(+), 8 deletions(-) create mode 100644 paddle/fluid/inference/tensorrt/convert/anchor_generator_op.cc create mode 100644 paddle/fluid/inference/tensorrt/plugin/anchor_generator_op_plugin.cu create mode 100644 paddle/fluid/inference/tensorrt/plugin/anchor_generator_op_plugin.h create mode 100644 python/paddle/fluid/tests/unittests/ir/inference/test_trt_anchor_generator_op.py diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 21ef3b2312f..4b6c746d575 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -1192,6 +1192,7 @@ USE_TRT_CONVERTER(scale); USE_TRT_CONVERTER(stack); USE_TRT_CONVERTER(clip); USE_TRT_CONVERTER(gather); +USE_TRT_CONVERTER(anchor_generator); USE_TRT_CONVERTER(yolo_box); USE_TRT_CONVERTER(roi_align); USE_TRT_CONVERTER(affine_channel); diff --git a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt index 3f792300942..3820ac5d7cc 100644 --- a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt +++ b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt @@ -6,6 +6,7 @@ nv_library(tensorrt_converter shuffle_channel_op.cc swish_op.cc instance_norm_op.cc stack_op.cc transpose_op.cc flatten_op.cc emb_eltwise_layernorm.cc skip_layernorm.cc scale_op.cc slice_op.cc hard_sigmoid_op.cc hard_swish_op.cc clip_op.cc gather_op.cc + anchor_generator_op.cc yolo_box_op.cc roi_align_op.cc affine_channel_op.cc diff --git a/paddle/fluid/inference/tensorrt/convert/anchor_generator_op.cc b/paddle/fluid/inference/tensorrt/convert/anchor_generator_op.cc new file mode 100644 index 00000000000..56aab9785c9 --- /dev/null +++ b/paddle/fluid/inference/tensorrt/convert/anchor_generator_op.cc @@ -0,0 +1,79 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/inference/tensorrt/convert/op_converter.h" +#include "paddle/fluid/inference/tensorrt/plugin/anchor_generator_op_plugin.h" + +namespace paddle { +namespace inference { +namespace tensorrt { + +/* Anchor Generator Op */ +class AnchorGeneratorOpConverter : public OpConverter { + public: + void operator()(const paddle::framework::proto::OpDesc& op, + const paddle::framework::Scope& scope, + bool test_mode) override { + VLOG(3) << "convert a fluid anchor generator op to tensorrt plugin"; + framework::OpDesc op_desc(op, nullptr); + std::string input_name = op_desc.Input("Input").front(); + std::string anchor_name = op_desc.Output("Anchors").front(); + std::string variance_name = op_desc.Output("Variances").front(); + + auto* input = engine_->GetITensor(input_name); + const auto input_dims = input->getDimensions(); // C, H, W + std::vector output_names{anchor_name, variance_name}; + + const auto anchor_sizes = + BOOST_GET_CONST(std::vector, op_desc.GetAttr("anchor_sizes")); + const auto aspect_ratios = + BOOST_GET_CONST(std::vector, op_desc.GetAttr("aspect_ratios")); + const auto stride = + BOOST_GET_CONST(std::vector, op_desc.GetAttr("stride")); + const auto variances = + BOOST_GET_CONST(std::vector, op_desc.GetAttr("variances")); + const auto offset = BOOST_GET_CONST(float, op_desc.GetAttr("offset")); + const int num_anchors = aspect_ratios.size() * anchor_sizes.size(); + bool is_dynamic = engine_->with_dynamic_shape(); + const auto height = input_dims.d[1]; + const auto width = input_dims.d[2]; + const int box_num = width * height * num_anchors; + const nvinfer1::DataType data_type = nvinfer1::DataType::kFLOAT; + + nvinfer1::IPluginV2* anchor_generator_plugin = nullptr; + if (is_dynamic) { + anchor_generator_plugin = new plugin::AnchorGeneratorPluginDynamic( + data_type, anchor_sizes, aspect_ratios, stride, variances, offset, + num_anchors); + } else { + anchor_generator_plugin = new plugin::AnchorGeneratorPlugin( + data_type, anchor_sizes, aspect_ratios, stride, variances, offset, + height, width, num_anchors, box_num); + } + + std::vector anchor_generator_inputs{input}; + auto* anchor_generator_layer = engine_->network()->addPluginV2( + anchor_generator_inputs.data(), anchor_generator_inputs.size(), + *anchor_generator_plugin); + + RreplenishLayerAndOutput(anchor_generator_layer, "anchor_generator", + output_names, test_mode); + } +}; + +} // namespace tensorrt +} // namespace inference +} // namespace paddle + +REGISTER_TRT_OP_CONVERTER(anchor_generator, AnchorGeneratorOpConverter); diff --git a/paddle/fluid/inference/tensorrt/op_teller.cc b/paddle/fluid/inference/tensorrt/op_teller.cc index c95912a931e..f4e7c334632 100644 --- a/paddle/fluid/inference/tensorrt/op_teller.cc +++ b/paddle/fluid/inference/tensorrt/op_teller.cc @@ -116,6 +116,7 @@ struct SimpleOpTypeSetTeller : public Teller { "affine_channel", "multiclass_nms", "nearest_interp", + "anchor_generator", }; }; diff --git a/paddle/fluid/inference/tensorrt/plugin/CMakeLists.txt b/paddle/fluid/inference/tensorrt/plugin/CMakeLists.txt index b4e948edd8a..1804e6c5571 100644 --- a/paddle/fluid/inference/tensorrt/plugin/CMakeLists.txt +++ b/paddle/fluid/inference/tensorrt/plugin/CMakeLists.txt @@ -5,6 +5,7 @@ nv_library(tensorrt_plugin instance_norm_op_plugin.cu emb_eltwise_layernorm_plugin.cu qkv_to_context_plugin.cu skip_layernorm_op_plugin.cu slice_op_plugin.cu hard_swish_op_plugin.cu stack_op_plugin.cu special_slice_plugin.cu + anchor_generator_op_plugin.cu yolo_box_op_plugin.cu roi_align_op_plugin.cu DEPS enforce tensorrt_engine prelu tensor bert_encoder_functor) diff --git a/paddle/fluid/inference/tensorrt/plugin/anchor_generator_op_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/anchor_generator_op_plugin.cu new file mode 100644 index 00000000000..01ee86ceb48 --- /dev/null +++ b/paddle/fluid/inference/tensorrt/plugin/anchor_generator_op_plugin.cu @@ -0,0 +1,566 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include + +#include "paddle/fluid/inference/tensorrt/plugin/anchor_generator_op_plugin.h" +#include "paddle/fluid/inference/tensorrt/plugin/trt_plugin_factory.h" + +#include "paddle/fluid/operators/detection/anchor_generator_op.h" + +namespace paddle { +namespace inference { +namespace tensorrt { +namespace plugin { + +#define PrepareParamsOnDevice() \ + constexpr int data_size = 4; \ + cudaMalloc(&anchor_sizes_device_, anchor_sizes_.size() * data_size); \ + cudaMalloc(&aspect_ratios_device_, aspect_ratios_.size() * data_size); \ + cudaMalloc(&stride_device_, stride_.size() * data_size); \ + cudaMalloc(&variances_device_, variances_.size() * data_size); \ + cudaMemcpy(anchor_sizes_device_, anchor_sizes_.data(), \ + anchor_sizes_.size() * data_size, cudaMemcpyHostToDevice); \ + cudaMemcpy(aspect_ratios_device_, aspect_ratios_.data(), \ + aspect_ratios_.size() * data_size, cudaMemcpyHostToDevice); \ + cudaMemcpy(stride_device_, stride_.data(), stride_.size() * data_size, \ + cudaMemcpyHostToDevice); \ + cudaMemcpy(variances_device_, variances_.data(), \ + variances_.size() * data_size, cudaMemcpyHostToDevice); + +AnchorGeneratorPlugin::AnchorGeneratorPlugin( + const nvinfer1::DataType data_type, const std::vector& anchor_sizes, + const std::vector& aspect_ratios, const std::vector& stride, + const std::vector& variances, const float offset, const int height, + const int width, const int num_anchors, const int box_num) + : data_type_(data_type), + anchor_sizes_(anchor_sizes), + aspect_ratios_(aspect_ratios), + stride_(stride), + variances_(variances), + offset_(offset), + height_(height), + width_(width), + num_anchors_(num_anchors), + box_num_(box_num) { + // anchors must be float32, which is the generator proposals' input + PADDLE_ENFORCE_EQ(data_type_, nvinfer1::DataType::kFLOAT, + platform::errors::InvalidArgument( + "TRT anchor generator plugin only accepts float32.")); + PADDLE_ENFORCE_GE(height_, 0, + platform::errors::InvalidArgument( + "TRT anchor generator plugin only accepts height " + "greater than 0, but receive height = %d.", + height_)); + PADDLE_ENFORCE_GE(width_, 0, + platform::errors::InvalidArgument( + "TRT anchor generator plugin only accepts width " + "greater than 0, but receive width = %d.", + width_)); + PADDLE_ENFORCE_GE( + num_anchors_, 0, + platform::errors::InvalidArgument( + "TRT anchor generator plugin only accepts number of anchors greater " + "than 0, but receive number of anchors = %d.", + num_anchors_)); + PADDLE_ENFORCE_GE(box_num_, 0, + platform::errors::InvalidArgument( + "TRT anchor generator plugin only accepts box_num " + "greater than 0, but receive box_num = %d.", + box_num_)); + PrepareParamsOnDevice(); +} + +AnchorGeneratorPlugin::~AnchorGeneratorPlugin() { + auto release_device_ptr = [](void* ptr) { + if (ptr) { + cudaFree(ptr); + ptr = nullptr; + } + }; + release_device_ptr(anchor_sizes_device_); + release_device_ptr(aspect_ratios_device_); + release_device_ptr(stride_device_); + release_device_ptr(variances_device_); +} + +AnchorGeneratorPlugin::AnchorGeneratorPlugin(const void* data, size_t length) { + DeserializeValue(&data, &length, &data_type_); + DeserializeValue(&data, &length, &anchor_sizes_); + DeserializeValue(&data, &length, &aspect_ratios_); + DeserializeValue(&data, &length, &stride_); + DeserializeValue(&data, &length, &variances_); + DeserializeValue(&data, &length, &offset_); + DeserializeValue(&data, &length, &height_); + DeserializeValue(&data, &length, &width_); + DeserializeValue(&data, &length, &num_anchors_); + DeserializeValue(&data, &length, &box_num_); + PrepareParamsOnDevice(); +} + +const char* AnchorGeneratorPlugin::getPluginType() const { + return "anchor_generator_plugin"; +} + +const char* AnchorGeneratorPlugin::getPluginVersion() const { return "1"; } + +int AnchorGeneratorPlugin::getNbOutputs() const { return 2; } + +nvinfer1::Dims AnchorGeneratorPlugin::getOutputDimensions( + int index, const nvinfer1::Dims* inputs, int nb_input_dims) { + nvinfer1::Dims dims{}; + dims.nbDims = 4; + dims.d[0] = height_; + dims.d[1] = width_; + dims.d[2] = num_anchors_; + dims.d[3] = 4; + return dims; +} + +bool AnchorGeneratorPlugin::supportsFormat( + nvinfer1::DataType type, nvinfer1::TensorFormat format) const { + // static shape plugin can't support different type between input/out + // it may cause addition overhead in half mode + return (type == data_type_ && format == nvinfer1::TensorFormat::kLINEAR); +} + +size_t AnchorGeneratorPlugin::getWorkspaceSize(int max_batch_size) const { + return 0; +} + +template +int AnchorGeneratorPlugin::enqueue_impl(int batch_size, + const void* const* inputs, + void** outputs, void* workspace, + cudaStream_t stream) { + const int block = 512; + const int gen_anchor_grid = (box_num_ + block - 1) / block; + T* anchors = static_cast(outputs[0]); + T* vars = static_cast(outputs[1]); + const T* anchor_sizes_device = static_cast(anchor_sizes_device_); + const T* aspect_ratios_device = static_cast(aspect_ratios_device_); + const T* stride_device = static_cast(stride_device_); + const T* variances_device = static_cast(variances_device_); + paddle::operators::GenAnchors<<>>( + anchors, aspect_ratios_device, aspect_ratios_.size(), anchor_sizes_device, + anchor_sizes_.size(), stride_device, stride_.size(), height_, width_, + offset_); + const int var_grid = (box_num_ * 4 + block - 1) / block; + paddle::operators::SetVariance<<>>( + vars, variances_device, variances_.size(), box_num_ * 4); + return cudaGetLastError() != cudaSuccess; +} + +int AnchorGeneratorPlugin::enqueue(int batch_size, const void* const* inputs, + void** outputs, void* workspace, + cudaStream_t stream) { + return enqueue_impl(batch_size, inputs, outputs, workspace, stream); +} + +int AnchorGeneratorPlugin::initialize() { return 0; } + +void AnchorGeneratorPlugin::terminate() {} + +size_t AnchorGeneratorPlugin::getSerializationSize() const { + size_t serialize_size = 0; + serialize_size += SerializedSize(data_type_); + serialize_size += SerializedSize(anchor_sizes_); + serialize_size += SerializedSize(aspect_ratios_); + serialize_size += SerializedSize(stride_); + serialize_size += SerializedSize(variances_); + serialize_size += SerializedSize(offset_); + serialize_size += SerializedSize(height_); + serialize_size += SerializedSize(width_); + serialize_size += SerializedSize(num_anchors_); + serialize_size += SerializedSize(box_num_); + return serialize_size; +} + +void AnchorGeneratorPlugin::serialize(void* buffer) const { + SerializeValue(&buffer, data_type_); + SerializeValue(&buffer, anchor_sizes_); + SerializeValue(&buffer, aspect_ratios_); + SerializeValue(&buffer, stride_); + SerializeValue(&buffer, variances_); + SerializeValue(&buffer, offset_); + SerializeValue(&buffer, height_); + SerializeValue(&buffer, width_); + SerializeValue(&buffer, num_anchors_); + SerializeValue(&buffer, box_num_); +} + +void AnchorGeneratorPlugin::destroy() {} + +void AnchorGeneratorPlugin::setPluginNamespace(const char* lib_namespace) { + namespace_ = std::string(lib_namespace); +} + +const char* AnchorGeneratorPlugin::getPluginNamespace() const { + return namespace_.c_str(); +} + +nvinfer1::DataType AnchorGeneratorPlugin::getOutputDataType( + int index, const nvinfer1::DataType* input_type, int nb_inputs) const { + return data_type_; +} + +bool AnchorGeneratorPlugin::isOutputBroadcastAcrossBatch( + int output_index, const bool* input_is_broadcast, int nb_inputs) const { + return true; +} + +bool AnchorGeneratorPlugin::canBroadcastInputAcrossBatch( + int input_index) const { + return false; +} + +void AnchorGeneratorPlugin::configurePlugin( + const nvinfer1::Dims* input_dims, int nb_inputs, + const nvinfer1::Dims* output_dims, int nb_outputs, + const nvinfer1::DataType* input_types, + const nvinfer1::DataType* output_types, const bool* input_is_broadcast, + const bool* output_is_broadcast, nvinfer1::PluginFormat float_format, + int max_batct_size) {} + +nvinfer1::IPluginV2Ext* AnchorGeneratorPlugin::clone() const { + auto plugin = new AnchorGeneratorPlugin( + data_type_, anchor_sizes_, aspect_ratios_, stride_, variances_, offset_, + height_, width_, num_anchors_, box_num_); + plugin->setPluginNamespace(namespace_.c_str()); + return plugin; +} + +void AnchorGeneratorPluginCreator::setPluginNamespace( + const char* lib_namespace) { + namespace_ = std::string(lib_namespace); +} + +const char* AnchorGeneratorPluginCreator::getPluginNamespace() const { + return namespace_.c_str(); +} + +const char* AnchorGeneratorPluginCreator::getPluginName() const { + return "anchor_generator_plugin"; +} + +const char* AnchorGeneratorPluginCreator::getPluginVersion() const { + return "1"; +} + +const nvinfer1::PluginFieldCollection* +AnchorGeneratorPluginCreator::getFieldNames() { + return &field_collection_; +} + +nvinfer1::IPluginV2Ext* AnchorGeneratorPluginCreator::createPlugin( + const char* name, const nvinfer1::PluginFieldCollection* fc) { + const nvinfer1::PluginField* fields = fc->fields; + int type_id = -1; + std::vector anchor_sizes, aspect_ratios, stride, variances; + float offset = .5; + int height = -1, width = -1; + int num_anchors = -1; + int box_num = -1; + + for (int i = 0; i < fc->nbFields; ++i) { + const std::string field_name(fc->fields[i].name); + const auto length = fc->fields[i].length; + if (field_name.compare("type_id") == 0) { + type_id = *static_cast(fc->fields[i].data); + } else if (field_name.compare("anchor_sizes")) { + const auto* data = static_cast(fc->fields[i].data); + anchor_sizes.insert(anchor_sizes.end(), data, data + length); + } else if (field_name.compare("aspect_ratios")) { + const auto* data = static_cast(fc->fields[i].data); + aspect_ratios.insert(aspect_ratios.end(), data, data + length); + } else if (field_name.compare("stride")) { + const auto* data = static_cast(fc->fields[i].data); + stride.insert(stride.end(), data, data + length); + } else if (field_name.compare("variances")) { + const auto* data = static_cast(fc->fields[i].data); + variances.insert(variances.end(), data, data + length); + } else if (field_name.compare("offset")) { + offset = *static_cast(fc->fields[i].data); + } else if (field_name.compare("height")) { + height = *static_cast(fc->fields[i].data); + } else if (field_name.compare("width")) { + width = *static_cast(fc->fields[i].data); + } else if (field_name.compare("num_anchors")) { + num_anchors = *static_cast(fc->fields[i].data); + } else if (field_name.compare("box_num")) { + box_num = *static_cast(fc->fields[i].data); + } else { + assert(false && "unknown plugin field name."); + } + } + return new AnchorGeneratorPlugin(nvinfer1::DataType::kFLOAT, anchor_sizes, + aspect_ratios, stride, variances, offset, + height, width, num_anchors, box_num); +} + +nvinfer1::IPluginV2Ext* AnchorGeneratorPluginCreator::deserializePlugin( + const char* name, const void* serial_data, size_t serial_length) { + auto plugin = new AnchorGeneratorPlugin(serial_data, serial_length); + plugin->setPluginNamespace(namespace_.c_str()); + return plugin; +} + +#if IS_TRT_VERSION_GE(6000) +AnchorGeneratorPluginDynamic::AnchorGeneratorPluginDynamic( + const nvinfer1::DataType data_type, const std::vector& anchor_sizes, + const std::vector& aspect_ratios, const std::vector& stride, + const std::vector& variances, const float offset, + const int num_anchors) + : data_type_(data_type), + anchor_sizes_(anchor_sizes), + aspect_ratios_(aspect_ratios), + stride_(stride), + variances_(variances), + offset_(offset), + num_anchors_(num_anchors) { + // data_type_ is used to determine the output data type + // data_type_ can only be float32 + // height, width, num_anchors are calculated at configurePlugin + PADDLE_ENFORCE_EQ(data_type_, nvinfer1::DataType::kFLOAT, + platform::errors::InvalidArgument( + "TRT anchor generator plugin only accepts float32.")); + PADDLE_ENFORCE_GE( + num_anchors_, 0, + platform::errors::InvalidArgument( + "TRT anchor generator plugin only accepts number of anchors greater " + "than 0, but receive number of anchors = %d.", + num_anchors_)); + PrepareParamsOnDevice(); +} + +AnchorGeneratorPluginDynamic::~AnchorGeneratorPluginDynamic() { + auto release_device_ptr = [](void* ptr) { + if (ptr) { + cudaFree(ptr); + ptr = nullptr; + } + }; + release_device_ptr(anchor_sizes_device_); + release_device_ptr(aspect_ratios_device_); + release_device_ptr(stride_device_); + release_device_ptr(variances_device_); +} + +AnchorGeneratorPluginDynamic::AnchorGeneratorPluginDynamic(void const* data, + size_t length) { + DeserializeValue(&data, &length, &data_type_); + DeserializeValue(&data, &length, &anchor_sizes_); + DeserializeValue(&data, &length, &aspect_ratios_); + DeserializeValue(&data, &length, &stride_); + DeserializeValue(&data, &length, &variances_); + DeserializeValue(&data, &length, &offset_); + DeserializeValue(&data, &length, &num_anchors_); + PrepareParamsOnDevice(); +} + +nvinfer1::IPluginV2DynamicExt* AnchorGeneratorPluginDynamic::clone() const { + auto plugin = new AnchorGeneratorPluginDynamic( + data_type_, anchor_sizes_, aspect_ratios_, stride_, variances_, offset_, + num_anchors_); + plugin->setPluginNamespace(namespace_.c_str()); + return plugin; +} + +nvinfer1::DimsExprs AnchorGeneratorPluginDynamic::getOutputDimensions( + int outputIndex, const nvinfer1::DimsExprs* inputs, int nbInputs, + nvinfer1::IExprBuilder& exprBuilder) { + nvinfer1::DimsExprs ret{}; + ret.nbDims = 4; + ret.d[0] = inputs[0].d[2]; // feature height + ret.d[1] = inputs[0].d[3]; // feature width + ret.d[2] = exprBuilder.constant(num_anchors_); + ret.d[3] = exprBuilder.constant(4); + return ret; +} + +bool AnchorGeneratorPluginDynamic::supportsFormatCombination( + int pos, const nvinfer1::PluginTensorDesc* inOut, int nbInputs, + int nbOutputs) { + // input can be any, doesn't matter + // anchor generator doesn't read input raw data, only need the shape info + auto type = inOut[pos].type; + auto format = inOut[pos].format; +#if IS_TRT_VERSION_GE(7234) + if (pos == 0) return true; +#else + if (pos == 0) return format == nvinfer1::TensorFormat::kLINEAR; +#endif + return (type == nvinfer1::DataType::kFLOAT && + format == nvinfer1::TensorFormat::kLINEAR); +} + +void AnchorGeneratorPluginDynamic::configurePlugin( + const nvinfer1::DynamicPluginTensorDesc* in, int nbInputs, + const nvinfer1::DynamicPluginTensorDesc* out, int nbOutputs) {} + +size_t AnchorGeneratorPluginDynamic::getWorkspaceSize( + const nvinfer1::PluginTensorDesc* inputs, int nbInputs, + const nvinfer1::PluginTensorDesc* outputs, int nbOutputs) const { + return 0; +} + +template +int AnchorGeneratorPluginDynamic::enqueue_impl( + const nvinfer1::PluginTensorDesc* inputDesc, + const nvinfer1::PluginTensorDesc* outputDesc, const void* const* inputs, + void* const* outputs, void* workspace, cudaStream_t stream) { + const int height = inputDesc[0].dims.d[2]; + const int width = inputDesc[0].dims.d[3]; + const int box_num = height * width * num_anchors_; + const int block = 512; + const int gen_anchor_grid = (box_num + block - 1) / block; + T* anchors = static_cast(outputs[0]); + T* vars = static_cast(outputs[1]); + const T* anchor_sizes_device = static_cast(anchor_sizes_device_); + const T* aspect_ratios_device = static_cast(aspect_ratios_device_); + const T* stride_device = static_cast(stride_device_); + const T* variances_device = static_cast(variances_device_); + paddle::operators::GenAnchors<<>>( + anchors, aspect_ratios_device, aspect_ratios_.size(), anchor_sizes_device, + anchor_sizes_.size(), stride_device, stride_.size(), height, width, + offset_); + const int var_grid = (box_num * 4 + block - 1) / block; + paddle::operators::SetVariance<<>>( + vars, variances_device, variances_.size(), box_num * 4); + return cudaGetLastError() != cudaSuccess; +} + +int AnchorGeneratorPluginDynamic::enqueue( + const nvinfer1::PluginTensorDesc* inputDesc, + const nvinfer1::PluginTensorDesc* outputDesc, const void* const* inputs, + void* const* outputs, void* workspace, cudaStream_t stream) { + assert(outputDesc[0].type == nvinfer1::DataType::kFLOAT); + assert(outputDesc[1].type == nvinfer1::DataType::kFLOAT); + return enqueue_impl(inputDesc, outputDesc, inputs, outputs, workspace, + stream); +} + +nvinfer1::DataType AnchorGeneratorPluginDynamic::getOutputDataType( + int index, const nvinfer1::DataType* inputTypes, int nbInputs) const { + return data_type_; +} + +const char* AnchorGeneratorPluginDynamic::getPluginType() const { + return "anchor_generator_plugin_dynamic"; +} + +int AnchorGeneratorPluginDynamic::getNbOutputs() const { return 2; } + +int AnchorGeneratorPluginDynamic::initialize() { return 0; } + +void AnchorGeneratorPluginDynamic::terminate() {} + +size_t AnchorGeneratorPluginDynamic::getSerializationSize() const { + size_t serialize_size = 0; + serialize_size += SerializedSize(data_type_); + serialize_size += SerializedSize(anchor_sizes_); + serialize_size += SerializedSize(aspect_ratios_); + serialize_size += SerializedSize(stride_); + serialize_size += SerializedSize(variances_); + serialize_size += SerializedSize(offset_); + serialize_size += SerializedSize(num_anchors_); + return serialize_size; +} + +void AnchorGeneratorPluginDynamic::serialize(void* buffer) const { + SerializeValue(&buffer, data_type_); + SerializeValue(&buffer, anchor_sizes_); + SerializeValue(&buffer, aspect_ratios_); + SerializeValue(&buffer, stride_); + SerializeValue(&buffer, variances_); + SerializeValue(&buffer, offset_); + SerializeValue(&buffer, num_anchors_); +} + +void AnchorGeneratorPluginDynamic::destroy() {} + +void AnchorGeneratorPluginDynamicCreator::setPluginNamespace( + const char* lib_namespace) { + namespace_ = std::string(lib_namespace); +} + +const char* AnchorGeneratorPluginDynamicCreator::getPluginNamespace() const { + return namespace_.c_str(); +} + +const char* AnchorGeneratorPluginDynamicCreator::getPluginName() const { + return "anchor_generator_plugin_dynamic"; +} + +const char* AnchorGeneratorPluginDynamicCreator::getPluginVersion() const { + return "1"; +} + +const nvinfer1::PluginFieldCollection* +AnchorGeneratorPluginDynamicCreator::getFieldNames() { + return &field_collection_; +} + +nvinfer1::IPluginV2Ext* AnchorGeneratorPluginDynamicCreator::createPlugin( + const char* name, const nvinfer1::PluginFieldCollection* fc) { + const nvinfer1::PluginField* fields = fc->fields; + int type_id = -1; + std::vector anchor_sizes, aspect_ratios, stride, variances; + float offset = .5; + int num_anchors = -1; + for (int i = 0; i < fc->nbFields; ++i) { + const std::string field_name(fc->fields[i].name); + const auto length = fc->fields[i].length; + if (field_name.compare("type_id") == 0) { + type_id = *static_cast(fc->fields[i].data); + } else if (field_name.compare("anchor_sizes")) { + const auto* data = static_cast(fc->fields[i].data); + anchor_sizes.insert(anchor_sizes.end(), data, data + length); + } else if (field_name.compare("aspect_ratios")) { + const auto* data = static_cast(fc->fields[i].data); + aspect_ratios.insert(aspect_ratios.end(), data, data + length); + } else if (field_name.compare("stride")) { + const auto* data = static_cast(fc->fields[i].data); + stride.insert(stride.end(), data, data + length); + } else if (field_name.compare("variances")) { + const auto* data = static_cast(fc->fields[i].data); + variances.insert(variances.end(), data, data + length); + } else if (field_name.compare("offset")) { + offset = *static_cast(fc->fields[i].data); + } else if (field_name.compare("num_anchors")) { + num_anchors = *static_cast(fc->fields[i].data); + } else { + assert(false && "unknown plugin field name."); + } + } + return new AnchorGeneratorPluginDynamic(nvinfer1::DataType::kFLOAT, + anchor_sizes, aspect_ratios, stride, + variances, offset, num_anchors); +} + +nvinfer1::IPluginV2Ext* AnchorGeneratorPluginDynamicCreator::deserializePlugin( + const char* name, const void* serial_data, size_t serial_length) { + auto plugin = new AnchorGeneratorPluginDynamic(serial_data, serial_length); + plugin->setPluginNamespace(namespace_.c_str()); + return plugin; +} +#endif + +} // namespace plugin +} // namespace tensorrt +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/tensorrt/plugin/anchor_generator_op_plugin.h b/paddle/fluid/inference/tensorrt/plugin/anchor_generator_op_plugin.h new file mode 100644 index 00000000000..aff0b6a6802 --- /dev/null +++ b/paddle/fluid/inference/tensorrt/plugin/anchor_generator_op_plugin.h @@ -0,0 +1,201 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include + +#include "paddle/fluid/inference/tensorrt/engine.h" +#include "paddle/fluid/inference/tensorrt/plugin/trt_plugin.h" + +namespace paddle { +namespace inference { +namespace tensorrt { +namespace plugin { + +class AnchorGeneratorPlugin : public nvinfer1::IPluginV2Ext { + public: + explicit AnchorGeneratorPlugin( + const nvinfer1::DataType, const std::vector& anchor_sizes, + const std::vector& aspect_ratios, const std::vector& stride, + const std::vector& variances, const float offset, const int height, + const int width, const int num_anchors, const int box_num); + AnchorGeneratorPlugin(const void* data, size_t length); + ~AnchorGeneratorPlugin() override; + const char* getPluginType() const override; + const char* getPluginVersion() const override; + int getNbOutputs() const override; + nvinfer1::Dims getOutputDimensions(int index, const nvinfer1::Dims* inputs, + int nb_input_dims) override; + bool supportsFormat(nvinfer1::DataType type, + nvinfer1::TensorFormat format) const override; + size_t getWorkspaceSize(int max_batch_size) const override; + int enqueue(int batch_size, const void* const* inputs, void** outputs, + void* workspace, cudaStream_t stream) override; + int initialize() override; + void terminate() override; + size_t getSerializationSize() const override; + void serialize(void* buffer) const override; + void destroy() override; + void setPluginNamespace(const char* lib_namespace) override; + const char* getPluginNamespace() const override; + nvinfer1::DataType getOutputDataType(int index, + const nvinfer1::DataType* input_type, + int nb_inputs) const override; + bool isOutputBroadcastAcrossBatch(int output_index, + const bool* input_is_broadcast, + int nb_inputs) const override; + bool canBroadcastInputAcrossBatch(int input_index) const override; + void configurePlugin(const nvinfer1::Dims* input_dims, int nb_inputs, + const nvinfer1::Dims* output_dims, int nb_outputs, + const nvinfer1::DataType* input_types, + const nvinfer1::DataType* output_types, + const bool* input_is_broadcast, + const bool* output_is_broadcast, + nvinfer1::PluginFormat float_format, + int max_batct_size) override; + nvinfer1::IPluginV2Ext* clone() const override; + + private: + template + int enqueue_impl(int batch_size, const void* const* inputs, void** outputs, + void* workspace, cudaStream_t stream); + nvinfer1::DataType data_type_; + std::vector anchor_sizes_; + std::vector aspect_ratios_; + std::vector stride_; + std::vector variances_; + float offset_; + void* anchor_sizes_device_; + void* aspect_ratios_device_; + void* stride_device_; + void* variances_device_; + int height_; + int width_; + int num_anchors_; + int box_num_; + std::string namespace_; +}; + +class AnchorGeneratorPluginCreator : public nvinfer1::IPluginCreator { + public: + AnchorGeneratorPluginCreator() = default; + ~AnchorGeneratorPluginCreator() override = default; + void setPluginNamespace(const char* lib_namespace) override; + const char* getPluginNamespace() const override; + const char* getPluginName() const override; + const char* getPluginVersion() const override; + const nvinfer1::PluginFieldCollection* getFieldNames() override; + nvinfer1::IPluginV2Ext* createPlugin( + const char* name, const nvinfer1::PluginFieldCollection* fc) override; + nvinfer1::IPluginV2Ext* deserializePlugin(const char* name, + const void* serial_data, + size_t serial_length) override; + + private: + std::string namespace_; + nvinfer1::PluginFieldCollection field_collection_; +}; + +REGISTER_TRT_PLUGIN_V2(AnchorGeneratorPluginCreator); + +#if IS_TRT_VERSION_GE(6000) +class AnchorGeneratorPluginDynamic : public DynamicPluginTensorRT { + public: + explicit AnchorGeneratorPluginDynamic(const nvinfer1::DataType data_type, + const std::vector& anchor_sizes, + const std::vector& aspect_ratios, + const std::vector& stride, + const std::vector& variances, + const float offset, + const int num_anchors); + AnchorGeneratorPluginDynamic(void const* data, size_t length); + ~AnchorGeneratorPluginDynamic(); + nvinfer1::IPluginV2DynamicExt* clone() const override; + nvinfer1::DimsExprs getOutputDimensions( + int outputIndex, const nvinfer1::DimsExprs* inputs, int nbInputs, + nvinfer1::IExprBuilder& exprBuilder) override; + bool supportsFormatCombination(int pos, + const nvinfer1::PluginTensorDesc* inOut, + int nbInputs, int nbOutputs) override; + void configurePlugin(const nvinfer1::DynamicPluginTensorDesc* in, + int nbInputs, + const nvinfer1::DynamicPluginTensorDesc* out, + int nbOutputs) override; + size_t getWorkspaceSize(const nvinfer1::PluginTensorDesc* inputs, + int nbInputs, + const nvinfer1::PluginTensorDesc* outputs, + int nbOutputs) const override; + int enqueue(const nvinfer1::PluginTensorDesc* inputDesc, + const nvinfer1::PluginTensorDesc* outputDesc, + const void* const* inputs, void* const* outputs, void* workspace, + cudaStream_t stream) override; + nvinfer1::DataType getOutputDataType(int index, + const nvinfer1::DataType* inputTypes, + int nbInputs) const override; + const char* getPluginType() const override; + int getNbOutputs() const override; + int initialize() override; + void terminate() override; + size_t getSerializationSize() const override; + void serialize(void* buffer) const override; + void destroy() override; + + private: + template + int enqueue_impl(const nvinfer1::PluginTensorDesc* inputDesc, + const nvinfer1::PluginTensorDesc* outputDesc, + const void* const* inputs, void* const* outputs, + void* workspace, cudaStream_t stream); + nvinfer1::DataType data_type_; + std::vector anchor_sizes_; + std::vector aspect_ratios_; + std::vector stride_; + std::vector variances_; + float offset_; + void* anchor_sizes_device_; + void* aspect_ratios_device_; + void* stride_device_; + void* variances_device_; + int num_anchors_; + std::string namespace_; +}; + +class AnchorGeneratorPluginDynamicCreator : public nvinfer1::IPluginCreator { + public: + AnchorGeneratorPluginDynamicCreator() = default; + ~AnchorGeneratorPluginDynamicCreator() override = default; + void setPluginNamespace(const char* lib_namespace) override; + const char* getPluginNamespace() const override; + const char* getPluginName() const override; + const char* getPluginVersion() const override; + const nvinfer1::PluginFieldCollection* getFieldNames() override; + nvinfer1::IPluginV2Ext* createPlugin( + const char* name, const nvinfer1::PluginFieldCollection* fc) override; + nvinfer1::IPluginV2Ext* deserializePlugin(const char* name, + const void* serial_data, + size_t serial_length) override; + + private: + std::string namespace_; + nvinfer1::PluginFieldCollection field_collection_; +}; +REGISTER_TRT_PLUGIN_V2(AnchorGeneratorPluginDynamicCreator); +#endif + +} // namespace plugin +} // namespace tensorrt +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/operators/detection/anchor_generator_op.cu b/paddle/fluid/operators/detection/anchor_generator_op.cu index b4c27a63dbd..388b8531571 100644 --- a/paddle/fluid/operators/detection/anchor_generator_op.cu +++ b/paddle/fluid/operators/detection/anchor_generator_op.cu @@ -49,14 +49,11 @@ __global__ void GenAnchors(T* out, const T* aspect_ratios, const int ar_num, anchor_width = scale_w * base_w; anchor_height = scale_h * base_h; - T xmin = (x_ctr - 0.5 * (anchor_width - 1)); - T ymin = (y_ctr - 0.5 * (anchor_height - 1)); - T xmax = (x_ctr + 0.5 * (anchor_width - 1)); - T ymax = (y_ctr + 0.5 * (anchor_height - 1)); - out[i * 4] = xmin; - out[i * 4 + 1] = ymin; - out[i * 4 + 2] = xmax; - out[i * 4 + 3] = ymax; + T xmin = (x_ctr - .5f * (anchor_width - 1)); + T ymin = (y_ctr - .5f * (anchor_height - 1)); + T xmax = (x_ctr + .5f * (anchor_width - 1)); + T ymax = (y_ctr + .5f * (anchor_height - 1)); + reinterpret_cast(out)[i] = make_float4(xmin, ymin, xmax, ymax); } } diff --git a/paddle/fluid/operators/detection/anchor_generator_op.h b/paddle/fluid/operators/detection/anchor_generator_op.h index e0e499d76a1..599f6935736 100644 --- a/paddle/fluid/operators/detection/anchor_generator_op.h +++ b/paddle/fluid/operators/detection/anchor_generator_op.h @@ -22,6 +22,19 @@ limitations under the License. */ namespace paddle { namespace operators { +#ifdef PADDLE_WITH_CUDA +template +extern __global__ void GenAnchors(T* out, const T* aspect_ratios, + const int ar_num, const T* anchor_sizes, + const int as_num, const T* stride, + const int sd_num, const int height, + const int width, const T offset); + +template +extern __global__ void SetVariance(T* out, const T* var, const int vnum, + const int num); +#endif + template class AnchorGeneratorOpKernel : public framework::OpKernel { public: diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_anchor_generator_op.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_anchor_generator_op.py new file mode 100644 index 00000000000..1d6f1c2c459 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_anchor_generator_op.py @@ -0,0 +1,122 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import itertools +import numpy as np +from inference_pass_test import InferencePassTest +import paddle.fluid as fluid +import paddle.fluid.core as core +from paddle.fluid.core import PassVersionChecker +from paddle.fluid.core import AnalysisConfig + + +class TRTAnchorGeneratorBaseTest(InferencePassTest): + def setUp(self): + self.bs = 1 + self.channel = 16 + self.height = 32 + self.width = 32 + self.anchor_sizes = [64., 128., 256., 512.] + self.aspect_ratios = [.5, 1., 2.] + self.variance = [.1, .1, .2, .2] + self.stride = [8., 8.] + self.precision = AnalysisConfig.Precision.Float32 + self.serialize = False + self.enable_trt = True + self.feeds = { + 'data': + np.random.random([self.bs, self.channel, self.height, + self.width]).astype('float32'), + } + + def build(self): + min_graph_size = 3 if self.dynamic_shape_params is not None else 2 + self.trt_parameters = InferencePassTest.TensorRTParam( + 1 << 30, self.bs, min_graph_size, self.precision, self.serialize, + False) + with fluid.program_guard(self.main_program, self.startup_program): + data = fluid.data( + name='data', + shape=[-1, self.channel, self.height, self.width], + dtype='float32') + anchor, var = fluid.layers.detection.anchor_generator( + data, + anchor_sizes=self.anchor_sizes, + aspect_ratios=self.aspect_ratios, + variance=self.variance, + stride=self.stride) + if self.dynamic_shape_params is not None: + anchor = fluid.layers.transpose(anchor, [2, 3, 0, 1]) + out = fluid.layers.batch_norm(anchor, is_test=True) + + self.fetch_list = [out, var] + + def run_test(self): + self.build() + self.check_output() + + def set_dynamic(self): + self.dynamic_shape_params = InferencePassTest.DynamicShapeParam({ + 'data': [self.bs, self.channel, self.height // 2, self.width // 2] + }, { + 'data': [self.bs, self.channel, self.height, self.width] + }, {'data': [self.bs, self.channel, self.height, self.width]}, False) + + def test_base(self): + self.run_test() + + def test_fp16(self): + self.precision = AnalysisConfig.Precision.Half + self.run_test() + + def test_serialize(self): + self.serialize = True + self.run_test() + + def test_dynamic(self): + self.set_dynamic() + self.run_test() + + def test_dynamic_fp16(self): + self.precision = AnalysisConfig.Precision.Half + self.set_dynamic() + self.run_test() + + def test_dynamic_serialize(self): + self.serialize = True + self.set_dynamic() + self.run_test() + + def test_dynamic_fp16_serialize(self): + self.serialize = True + self.precision = AnalysisConfig.Precision.Half + self.set_dynamic() + self.run_test() + + def check_output(self): + if core.is_compiled_with_cuda(): + use_gpu = True + atol = 1e-5 + if self.trt_parameters.precision == AnalysisConfig.Precision.Half: + atol = 1e-3 + self.check_output_with_option(use_gpu, atol, flatten=True) + self.assertTrue( + PassVersionChecker.IsCompatible('tensorrt_subgraph_pass')) + + +if __name__ == "__main__": + unittest.main() -- GitLab