From 1948210cf499e945de0afc3aef2c6aba7b719d01 Mon Sep 17 00:00:00 2001 From: Pei Yang Date: Thu, 31 Oct 2019 12:24:43 +0800 Subject: [PATCH] Bug Fix: Paddle-TRT cannot handle adaptive pooling in pool2d op converter and "num" attribute in split op converter (#20733) (#20902) * fix pool2d trt converter, test=develop * add fix for split op converter, test=develop --- .../ir_passes/tensorrt_subgraph_pass.cc | 2 +- .../fluid/inference/api/analysis_predictor.cc | 1 - .../inference/tensorrt/convert/pool2d_op.cc | 22 +++-- .../inference/tensorrt/convert/split_op.cc | 17 +++- .../inference/tensorrt/plugin/CMakeLists.txt | 2 +- ...vg_pool_op_plugin.cu => pool_op_plugin.cu} | 41 +++++---- ...{avg_pool_op_plugin.h => pool_op_plugin.h} | 88 ++++++++++++------- .../fluid/inference/tests/api/CMakeLists.txt | 7 ++ .../tests/api/trt_split_converter_test.cc | 52 +++++++++++ paddle/fluid/operators/math/pooling.cu | 5 +- paddle/fluid/operators/math/pooling.h | 3 +- 11 files changed, 174 insertions(+), 66 deletions(-) rename paddle/fluid/inference/tensorrt/plugin/{avg_pool_op_plugin.cu => pool_op_plugin.cu} (54%) rename paddle/fluid/inference/tensorrt/plugin/{avg_pool_op_plugin.h => pool_op_plugin.h} (63%) create mode 100644 paddle/fluid/inference/tests/api/trt_split_converter_test.cc diff --git a/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc b/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc index bd2f79a12a..8c181ba2fd 100644 --- a/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc +++ b/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc @@ -213,7 +213,7 @@ void TensorRtSubgraphPass::CreateTensorRTOp( for (auto *x : node->inputs) { if (x->IsVar() && x->Var()) { framework::VarDesc *var = x->Var(); - SetAttr(op_desc->Proto(), var->Name() + "_shape", var->GetShape()); + op_desc->SetAttr(var->Name() + "_shape", var->GetShape()); } } diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 42209d9b0c..e00ba4d840 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -507,7 +507,6 @@ std::unique_ptr CreatePaddlePredictor< } } if (config.glog_info_disabled()) { - google::InitGoogleLogging("Init"); FLAGS_logtostderr = 1; FLAGS_minloglevel = google::WARNING; LOG(WARNING) << " - GLOG's LOG(INFO) is disabled."; diff --git a/paddle/fluid/inference/tensorrt/convert/pool2d_op.cc b/paddle/fluid/inference/tensorrt/convert/pool2d_op.cc index 1752c52c3f..09659af7af 100644 --- a/paddle/fluid/inference/tensorrt/convert/pool2d_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/pool2d_op.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/inference/tensorrt/convert/op_converter.h" -#include "paddle/fluid/inference/tensorrt/plugin/avg_pool_op_plugin.h" +#include "paddle/fluid/inference/tensorrt/plugin/pool_op_plugin.h" namespace paddle { namespace inference { @@ -75,12 +75,19 @@ class Pool2dOpConverter : public OpConverter { std::vector paddings = boost::get>(op_desc.GetAttr("paddings")); bool ceil_mode = boost::get(op_desc.GetAttr("ceil_mode")); + bool adaptive = false; + if (op_desc.HasAttr("adaptive")) + adaptive = boost::get(op_desc.GetAttr("adaptive")); nvinfer1::PoolingType nv_pool_type = nvinfer1::PoolingType::kMAX; + plugin::PoolPlugin::PoolType plugin_pool_type = + plugin::PoolPlugin::PoolType::max; if (pool_type == "max") { nv_pool_type = nvinfer1::PoolingType::kMAX; + plugin_pool_type = plugin::PoolPlugin::PoolType::max; } else if (pool_type == "avg") { nv_pool_type = nvinfer1::PoolingType::kAVERAGE; + plugin_pool_type = plugin::PoolPlugin::PoolType::avg; } else { PADDLE_THROW("TensorRT unsupported pooling type!"); } @@ -108,7 +115,7 @@ class Pool2dOpConverter : public OpConverter { return; } - if (pool_type == "max") { + if (!adaptive && pool_type == "max") { // Under ceil mode, the pre_pad and post_pad are used to // record the the padding size. In some ceil mode cases, // we do not need padding, so we initialize the two vars to 0. @@ -141,10 +148,13 @@ class Pool2dOpConverter : public OpConverter { for (int i = 0; i < input_dims; i++) { input_shape_v.push_back(input_shape.d[i]); } - plugin::AvgPoolPlugin *plugin = new plugin::AvgPoolPlugin( - ceil_mode, ksize, strides, paddings, input_shape_v); - auto *avg_pool_layer = engine_->AddPlugin(&input1, 1, plugin); - layer = avg_pool_layer; + plugin::PoolPlugin *plugin = + new plugin::PoolPlugin(ceil_mode, plugin_pool_type, adaptive, ksize, + strides, paddings, input_shape_v); + PADDLE_ENFORCE_NOT_NULL(plugin->getPluginType(), + "The plugin used must not be null"); + auto *pool_layer = engine_->AddPlugin(&input1, 1, plugin); + layer = pool_layer; } auto output_name = op_desc.Output("Out")[0]; diff --git a/paddle/fluid/inference/tensorrt/convert/split_op.cc b/paddle/fluid/inference/tensorrt/convert/split_op.cc index ae5b1b9806..43fdd305fe 100644 --- a/paddle/fluid/inference/tensorrt/convert/split_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/split_op.cc @@ -35,12 +35,23 @@ class SplitOpConverter : public OpConverter { // Get Attrs PADDLE_ENFORCE(input_num == 1); int axis = boost::get(op_desc.GetAttr("axis")); - std::vector output_lengths = - boost::get>(op_desc.GetAttr("sections")); // split on batch is not supported in TensorRT PADDLE_ENFORCE(axis != 0); axis += (axis < 0) ? input_dims.nbDims : -1; - + std::vector output_lengths = + boost::get>(op_desc.GetAttr("sections")); + output_lengths.reserve(output_num); + int num = boost::get(op_desc.GetAttr("num")); + if (num > 0) { + int64_t in_axis_dim = input_dims.d[axis]; + PADDLE_ENFORCE_EQ(in_axis_dim % num, 0, + "Tensor split does not result" + " in an equal division"); + size_t out_axis_dim = in_axis_dim / num; + for (size_t i = 0; i < output_num; ++i) { + output_lengths.push_back(out_axis_dim); + } + } PADDLE_ENFORCE(output_lengths.size() == output_num); plugin::SplitPlugin* plugin = new plugin::SplitPlugin(axis, output_lengths); nvinfer1::IPluginLayer* layer = diff --git a/paddle/fluid/inference/tensorrt/plugin/CMakeLists.txt b/paddle/fluid/inference/tensorrt/plugin/CMakeLists.txt index d01c5c823b..b505fa4662 100644 --- a/paddle/fluid/inference/tensorrt/plugin/CMakeLists.txt +++ b/paddle/fluid/inference/tensorrt/plugin/CMakeLists.txt @@ -1,5 +1,5 @@ nv_library(tensorrt_plugin SRCS trt_plugin.cc split_op_plugin.cu elementwise_op_plugin.cu prelu_op_plugin.cu trt_plugin_factory.cc - avg_pool_op_plugin.cu swish_op_plugin.cu + pool_op_plugin.cu swish_op_plugin.cu DEPS enforce tensorrt_engine prelu) diff --git a/paddle/fluid/inference/tensorrt/plugin/avg_pool_op_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/pool_op_plugin.cu similarity index 54% rename from paddle/fluid/inference/tensorrt/plugin/avg_pool_op_plugin.cu rename to paddle/fluid/inference/tensorrt/plugin/pool_op_plugin.cu index f27a838162..17904a4ebc 100644 --- a/paddle/fluid/inference/tensorrt/plugin/avg_pool_op_plugin.cu +++ b/paddle/fluid/inference/tensorrt/plugin/pool_op_plugin.cu @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/inference/tensorrt/plugin/avg_pool_op_plugin.h" +#include "paddle/fluid/inference/tensorrt/plugin/pool_op_plugin.h" #include "paddle/fluid/inference/tensorrt/plugin/trt_plugin_factory.h" #include "paddle/fluid/operators/math/pooling.h" @@ -21,14 +21,14 @@ namespace inference { namespace tensorrt { namespace plugin { -AvgPoolPlugin* CreateAvgPoolPluginDeserialize(const void* buffer, - size_t length) { - return new AvgPoolPlugin(buffer, length); +PoolPlugin* CreatePoolPluginDeserialize(const void* buffer, size_t length) { + return new PoolPlugin(buffer, length); } -REGISTER_TRT_PLUGIN("avg_pool_plugin", CreateAvgPoolPluginDeserialize); +REGISTER_TRT_PLUGIN("pool_plugin", CreatePoolPluginDeserialize); -nvinfer1::Dims AvgPoolPlugin::getOutputDimensions( - int index, const nvinfer1::Dims* inputDims, int nbInputs) { +nvinfer1::Dims PoolPlugin::getOutputDimensions(int index, + const nvinfer1::Dims* inputDims, + int nbInputs) { assert(nbInputs == 1); assert(index == 0); assert(inputDims[0].nbDims == 3); @@ -41,26 +41,33 @@ nvinfer1::Dims AvgPoolPlugin::getOutputDimensions( return output_dims; } -int AvgPoolPlugin::enqueue(int batchSize, const void* const* inputs, - void** outputs, void* workspace, - cudaStream_t stream) { +int PoolPlugin::enqueue(int batchSize, const void* const* inputs, + void** outputs, void* workspace, cudaStream_t stream) { auto const& input_dims = this->getInputDims(0); int input_size = 0; float const* idata = reinterpret_cast(inputs[0]); float** odatas = reinterpret_cast(outputs); - paddle::operators::math::AvgPool pool_process; - paddle::operators::math::Pool2dDirectCUDAFunctor< - paddle::operators::math::AvgPool, float> - pool2d_forward; - std::vector input_shape = input_shape_; std::vector output_shape = output_shape_; input_shape.insert(input_shape.begin(), batchSize); output_shape.insert(output_shape.begin(), batchSize); - pool2d_forward(idata, input_shape, output_shape, ksize_, strides_, paddings_, - pool_process, true, odatas[0], stream); + if (pool_type_ == PoolType::max) { + paddle::operators::math::MaxPool pool_process; + paddle::operators::math::Pool2dDirectCUDAFunctor< + paddle::operators::math::MaxPool, float> + pool2d_forward; + pool2d_forward(idata, input_shape, output_shape, ksize_, strides_, + paddings_, pool_process, true, adaptive_, odatas[0], stream); + } else if (pool_type_ == PoolType::avg) { + paddle::operators::math::AvgPool pool_process; + paddle::operators::math::Pool2dDirectCUDAFunctor< + paddle::operators::math::AvgPool, float> + pool2d_forward; + pool2d_forward(idata, input_shape, output_shape, ksize_, strides_, + paddings_, pool_process, true, adaptive_, odatas[0], stream); + } return cudaGetLastError() != cudaSuccess; } diff --git a/paddle/fluid/inference/tensorrt/plugin/avg_pool_op_plugin.h b/paddle/fluid/inference/tensorrt/plugin/pool_op_plugin.h similarity index 63% rename from paddle/fluid/inference/tensorrt/plugin/avg_pool_op_plugin.h rename to paddle/fluid/inference/tensorrt/plugin/pool_op_plugin.h index a7c0aa5794..9b0591259a 100644 --- a/paddle/fluid/inference/tensorrt/plugin/avg_pool_op_plugin.h +++ b/paddle/fluid/inference/tensorrt/plugin/pool_op_plugin.h @@ -13,7 +13,9 @@ // limitations under the License. #pragma once +#include #include +#include #include #include "paddle/fluid/inference/tensorrt/plugin/trt_plugin.h" @@ -22,18 +24,11 @@ namespace inference { namespace tensorrt { namespace plugin { -class AvgPoolPlugin : public PluginTensorRT { - private: - bool ceil_mode_; - std::vector ksize_; - std::vector strides_; - std::vector paddings_; - std::vector input_shape_; - std::vector output_shape_; - +class PoolPlugin : public PluginTensorRT { protected: size_t getSerializationSize() override { return SerializedSize(getPluginType()) + SerializedSize(ceil_mode_) + + SerializedSize(pool_type_) + SerializedSize(adaptive_) + SerializedSize(ksize_) + SerializedSize(strides_) + SerializedSize(paddings_) + SerializedSize(input_shape_) + SerializedSize(output_shape_) + getBaseSerializationSize(); @@ -45,6 +40,8 @@ class AvgPoolPlugin : public PluginTensorRT { SerializeValue(&buffer, getPluginType()); serializeBase(buffer); SerializeValue(&buffer, ceil_mode_); + SerializeValue(&buffer, pool_type_); + SerializeValue(&buffer, adaptive_); SerializeValue(&buffer, ksize_); SerializeValue(&buffer, strides_); SerializeValue(&buffer, paddings_); @@ -53,41 +50,54 @@ class AvgPoolPlugin : public PluginTensorRT { } public: - AvgPoolPlugin() {} - AvgPoolPlugin(bool ceil_mode, std::vector ksize, - std::vector strides, std::vector paddings, - std::vector input_shape) + enum class PoolType { + max = 0, + avg, + }; + PoolPlugin() {} + PoolPlugin(bool ceil_mode, PoolType pool_type, bool adaptive, + std::vector ksize, std::vector strides, + std::vector paddings, std::vector input_shape) : ceil_mode_(ceil_mode), + pool_type_(pool_type), + adaptive_(adaptive), ksize_(ksize), strides_(strides), paddings_(paddings), input_shape_(input_shape) { - int output_h, output_w; output_shape_ = input_shape_; - if (!ceil_mode_) { - output_h = - (input_shape[1] - ksize_[0] + 2 * paddings_[0]) / strides_[0] + 1; - output_w = - (input_shape[2] - ksize_[1] + 2 * paddings_[1]) / strides_[1] + 1; + if (adaptive_) { + output_shape_[1] = ksize[0]; + output_shape_[2] = ksize[1]; } else { - output_h = - (input_shape[1] - ksize_[0] + 2 * paddings_[0] + strides_[0] - 1) / - strides_[0] + - 1; - output_w = - (input_shape[2] - ksize_[1] + 2 * paddings_[1] + strides_[1] - 1) / - strides_[1] + - 1; + int output_h, output_w; + if (!ceil_mode_) { + output_h = + (input_shape[1] - ksize_[0] + 2 * paddings_[0]) / strides_[0] + 1; + output_w = + (input_shape[2] - ksize_[1] + 2 * paddings_[1]) / strides_[1] + 1; + } else { + output_h = + (input_shape[1] - ksize_[0] + 2 * paddings_[0] + strides_[0] - 1) / + strides_[0] + + 1; + output_w = + (input_shape[2] - ksize_[1] + 2 * paddings_[1] + strides_[1] - 1) / + strides_[1] + + 1; + } + output_shape_[1] = output_h; + output_shape_[2] = output_w; } - output_shape_[1] = output_h; - output_shape_[2] = output_w; } // It was used for tensorrt deserialization. // It should not be called by users. - AvgPoolPlugin(void const *serialData, size_t serialLength) { + PoolPlugin(void const *serialData, size_t serialLength) { deserializeBase(serialData, serialLength); DeserializeValue(&serialData, &serialLength, &ceil_mode_); + DeserializeValue(&serialData, &serialLength, &pool_type_); + DeserializeValue(&serialData, &serialLength, &adaptive_); DeserializeValue(&serialData, &serialLength, &ksize_); DeserializeValue(&serialData, &serialLength, &strides_); DeserializeValue(&serialData, &serialLength, &paddings_); @@ -95,18 +105,28 @@ class AvgPoolPlugin : public PluginTensorRT { DeserializeValue(&serialData, &serialLength, &output_shape_); } - AvgPoolPlugin *clone() const override { - return new AvgPoolPlugin(ceil_mode_, ksize_, strides_, paddings_, - input_shape_); + PoolPlugin *clone() const override { + return new PoolPlugin(ceil_mode_, pool_type_, adaptive_, ksize_, strides_, + paddings_, input_shape_); } - const char *getPluginType() const override { return "avg_pool_plugin"; } + const char *getPluginType() const override { return "pool_plugin"; } int getNbOutputs() const override { return 1; } nvinfer1::Dims getOutputDimensions(int index, const nvinfer1::Dims *inputs, int nbInputDims) override; int initialize() override { return 0; } int enqueue(int batchSize, const void *const *inputs, void **outputs, void *workspace, cudaStream_t stream) override; + + private: + bool ceil_mode_; + PoolType pool_type_; + bool adaptive_; + std::vector ksize_; + std::vector strides_; + std::vector paddings_; + std::vector input_shape_; + std::vector output_shape_; }; } // namespace plugin diff --git a/paddle/fluid/inference/tests/api/CMakeLists.txt b/paddle/fluid/inference/tests/api/CMakeLists.txt index e7f6e22aad..c14ca54862 100644 --- a/paddle/fluid/inference/tests/api/CMakeLists.txt +++ b/paddle/fluid/inference/tests/api/CMakeLists.txt @@ -268,6 +268,10 @@ if(WITH_GPU AND TENSORRT_FOUND) if (NOT EXISTS ${TRT_MODEL_INSTALL_DIR}) inference_download_and_uncompress(${TRT_MODEL_INSTALL_DIR} ${INFERENCE_URL}/tensorrt_test "trt_inference_test_models.tar.gz") endif() + set(TEST_SPLIT_CONVERTER_MODEL "${TRT_MODEL_INSTALL_DIR}/trt_split_op_converter_test") + if (NOT EXISTS ${TEST_SPLIT_CONVERTER_MODEL}) + inference_download_and_uncompress(${TEST_SPLIT_CONVERTER_MODEL} ${INFERENCE_URL}/tensorrt_test "split_converter.tgz") + endif() inference_analysis_test(trt_mobilenet_test SRCS trt_mobilenet_test.cc EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} ARGS --infer_model=${TRT_MODEL_INSTALL_DIR}/trt_inference_test_models) @@ -283,6 +287,9 @@ if(WITH_GPU AND TENSORRT_FOUND) inference_analysis_test(trt_cascade_rcnn_test SRCS trt_cascade_rcnn_test.cc EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} ARGS --infer_model=${TRT_MODEL_INSTALL_DIR}/trt_inference_test_models) + inference_analysis_test(trt_split_converter_test SRCS trt_split_converter_test.cc + EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} + ARGS --infer_model=${TEST_SPLIT_CONVERTER_MODEL}/) inference_analysis_test(test_analyzer_capi_gpu SRCS analyzer_capi_gpu_tester.cc EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} paddle_fluid_c ARGS --infer_model=${TRT_MODEL_INSTALL_DIR}/trt_inference_test_models) diff --git a/paddle/fluid/inference/tests/api/trt_split_converter_test.cc b/paddle/fluid/inference/tests/api/trt_split_converter_test.cc new file mode 100644 index 0000000000..3c2ff9601f --- /dev/null +++ b/paddle/fluid/inference/tests/api/trt_split_converter_test.cc @@ -0,0 +1,52 @@ +/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include +#include + +#include "paddle/fluid/inference/tests/api/trt_test_helper.h" + +namespace paddle { +namespace inference { + +TEST(TensorRT, split_converter) { + std::string model_dir = FLAGS_infer_model + "/split_converter"; + AnalysisConfig config; + int batch_size = 4; + config.EnableUseGpu(100, 0); + config.SetModel(model_dir); + config.SwitchUseFeedFetchOps(false); + config.EnableTensorRtEngine(1 << 20, batch_size, 1, + AnalysisConfig::Precision::kFloat32, false); + + auto predictor = CreatePaddlePredictor(config); + + int channels = 4; + int height = 4; + int width = 4; + int input_num = batch_size * channels * height * width; + float *input = new float[input_num]; + memset(input, 1.0, input_num * sizeof(float)); + + auto input_names = predictor->GetInputNames(); + auto input_t = predictor->GetInputTensor(input_names[0]); + input_t->Reshape({batch_size, channels, height, width}); + input_t->copy_from_cpu(input); + + ASSERT_TRUE(predictor->ZeroCopyRun()); +} + +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/operators/math/pooling.cu b/paddle/fluid/operators/math/pooling.cu index 29c0a85d40..b967dd2cfd 100644 --- a/paddle/fluid/operators/math/pooling.cu +++ b/paddle/fluid/operators/math/pooling.cu @@ -236,7 +236,8 @@ void Pool2dDirectCUDAFunctor::operator()( const T* input, const std::vector& input_shape, const std::vector& output_shape, const std::vector& ksize, const std::vector& strides, const std::vector& paddings, - PoolProcess pool_compute, bool exclusive, T* output, cudaStream_t stream) { + PoolProcess pool_compute, bool exclusive, bool adaptive, T* output, + cudaStream_t stream) { const int batch_size = input_shape[0]; const int input_channels = input_shape[1]; const int input_height = input_shape[2]; @@ -259,7 +260,7 @@ void Pool2dDirectCUDAFunctor::operator()( KernelPool2D<<>>( nthreads, input, input_channels, input_height, input_width, output_height, output_width, ksize_height, ksize_width, stride_height, stride_width, - padding_height, padding_width, pool_compute, exclusive, false, output); + padding_height, padding_width, pool_compute, exclusive, adaptive, output); } /* diff --git a/paddle/fluid/operators/math/pooling.h b/paddle/fluid/operators/math/pooling.h index 548612e8de..572295f138 100644 --- a/paddle/fluid/operators/math/pooling.h +++ b/paddle/fluid/operators/math/pooling.h @@ -105,7 +105,8 @@ class Pool2dDirectCUDAFunctor { const std::vector& ksize, const std::vector& strides, const std::vector& paddings, PoolProcess pool_compute, - bool exclusive, T* output, cudaStream_t stream); + bool exclusive, bool adaptive, T* output, + cudaStream_t stream); }; #endif -- GitLab