Bug Fix: Paddle-TRT cannot handle adaptive pooling in pool2d op converter and...

Bug Fix: Paddle-TRT cannot handle adaptive pooling in pool2d op converter and "num" attribute in split op converter (#20733) (#20902) * fix pool2d trt converter, test=develop * add fix for split op converter, test=develop

Bug Fix: Paddle-TRT cannot handle adaptive pooling in pool2d op converter and...
Bug Fix: Paddle-TRT cannot handle adaptive pooling in pool2d op converter and "num" attribute in split op converter (#20733) (#20902) * fix pool2d trt converter, test=develop * add fix for split op converter, test=develop
1948210c · Pei Yang · GitHub · 6fb04e8a · 1948210c · 1948210c
11 changed file
--- a/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
+++ b/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
@@ -213,7 +213,7 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
  for (auto *x : node->inputs) {
    if (x->IsVar() && x->Var()) {
      framework::VarDesc *var = x->Var();
-      SetAttr(op_desc->Proto(), var->Name() + "_shape", var->GetShape());
+      op_desc->SetAttr(var->Name() + "_shape", var->GetShape());
    }
  }


--- a/paddle/fluid/inference/api/analysis_predictor.cc
+++ b/paddle/fluid/inference/api/analysis_predictor.cc
@@ -507,7 +507,6 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
    }
  }
  if (config.glog_info_disabled()) {
-    google::InitGoogleLogging("Init");
    FLAGS_logtostderr = 1;
    FLAGS_minloglevel = google::WARNING;
    LOG(WARNING) << " - GLOG's LOG(INFO) is disabled.";

--- a/paddle/fluid/inference/tensorrt/convert/pool2d_op.cc
+++ b/paddle/fluid/inference/tensorrt/convert/pool2d_op.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License. */

 #include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
-#include "paddle/fluid/inference/tensorrt/plugin/avg_pool_op_plugin.h"
+#include "paddle/fluid/inference/tensorrt/plugin/pool_op_plugin.h"

 namespace paddle {
 namespace inference {
@@ -75,12 +75,19 @@ class Pool2dOpConverter : public OpConverter {
    std::vector<int> paddings =
        boost::get<std::vector<int>>(op_desc.GetAttr("paddings"));
    bool ceil_mode = boost::get<bool>(op_desc.GetAttr("ceil_mode"));
+    bool adaptive = false;
+    if (op_desc.HasAttr("adaptive"))
+      adaptive = boost::get<bool>(op_desc.GetAttr("adaptive"));

    nvinfer1::PoolingType nv_pool_type = nvinfer1::PoolingType::kMAX;
+    plugin::PoolPlugin::PoolType plugin_pool_type =
+        plugin::PoolPlugin::PoolType::max;
    if (pool_type == "max") {
      nv_pool_type = nvinfer1::PoolingType::kMAX;
+      plugin_pool_type = plugin::PoolPlugin::PoolType::max;
    } else if (pool_type == "avg") {
      nv_pool_type = nvinfer1::PoolingType::kAVERAGE;
+      plugin_pool_type = plugin::PoolPlugin::PoolType::avg;
    } else {
      PADDLE_THROW("TensorRT unsupported pooling type!");
    }
@@ -108,7 +115,7 @@ class Pool2dOpConverter : public OpConverter {
      return;
    }

-    if (pool_type == "max") {
+    if (!adaptive && pool_type == "max") {
      // Under ceil mode, the pre_pad and post_pad are used to
      // record the the padding size. In some ceil mode cases,
      // we do not need padding, so we initialize the two vars to 0.
@@ -141,10 +148,13 @@ class Pool2dOpConverter : public OpConverter {
      for (int i = 0; i < input_dims; i++) {
        input_shape_v.push_back(input_shape.d[i]);
      }
-      plugin::AvgPoolPlugin *plugin = new plugin::AvgPoolPlugin(
-          ceil_mode, ksize, strides, paddings, input_shape_v);
-      auto *avg_pool_layer = engine_->AddPlugin(&input1, 1, plugin);
-      layer = avg_pool_layer;
+      plugin::PoolPlugin *plugin =
+          new plugin::PoolPlugin(ceil_mode, plugin_pool_type, adaptive, ksize,
+                                 strides, paddings, input_shape_v);
+      PADDLE_ENFORCE_NOT_NULL(plugin->getPluginType(),
+                              "The plugin used must not be null");
+      auto *pool_layer = engine_->AddPlugin(&input1, 1, plugin);
+      layer = pool_layer;
    }

    auto output_name = op_desc.Output("Out")[0];

--- a/paddle/fluid/inference/tensorrt/convert/split_op.cc
+++ b/paddle/fluid/inference/tensorrt/convert/split_op.cc
@@ -35,12 +35,23 @@ class SplitOpConverter : public OpConverter {
    // Get Attrs
    PADDLE_ENFORCE(input_num == 1);
    int axis = boost::get<int>(op_desc.GetAttr("axis"));
-    std::vector<int> output_lengths =
-        boost::get<std::vector<int>>(op_desc.GetAttr("sections"));
    // split on batch is not supported in TensorRT
    PADDLE_ENFORCE(axis != 0);
    axis += (axis < 0) ? input_dims.nbDims : -1;
-
+    std::vector<int> output_lengths =
+        boost::get<std::vector<int>>(op_desc.GetAttr("sections"));
+    output_lengths.reserve(output_num);
+    int num = boost::get<int>(op_desc.GetAttr("num"));
+    if (num > 0) {
+      int64_t in_axis_dim = input_dims.d[axis];
+      PADDLE_ENFORCE_EQ(in_axis_dim % num, 0,
+                        "Tensor split does not result"
+                        " in an equal division");
+      size_t out_axis_dim = in_axis_dim / num;
+      for (size_t i = 0; i < output_num; ++i) {
+        output_lengths.push_back(out_axis_dim);
+      }
+    }
    PADDLE_ENFORCE(output_lengths.size() == output_num);
    plugin::SplitPlugin* plugin = new plugin::SplitPlugin(axis, output_lengths);
    nvinfer1::IPluginLayer* layer =

--- a/paddle/fluid/inference/tensorrt/plugin/CMakeLists.txt
+++ b/paddle/fluid/inference/tensorrt/plugin/CMakeLists.txt
 nv_library(tensorrt_plugin
           SRCS trt_plugin.cc split_op_plugin.cu elementwise_op_plugin.cu
           prelu_op_plugin.cu  trt_plugin_factory.cc
-           avg_pool_op_plugin.cu swish_op_plugin.cu
+           pool_op_plugin.cu swish_op_plugin.cu
           DEPS enforce tensorrt_engine prelu)
--- a/paddle/fluid/inference/tensorrt/plugin/avg_pool_op_plugin.cu
+++ b/paddle/fluid/inference/tensorrt/plugin/avg_pool_op_plugin.cu
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-#include "paddle/fluid/inference/tensorrt/plugin/avg_pool_op_plugin.h"
+#include "paddle/fluid/inference/tensorrt/plugin/pool_op_plugin.h"
 #include "paddle/fluid/inference/tensorrt/plugin/trt_plugin_factory.h"
 #include "paddle/fluid/operators/math/pooling.h"

@@ -21,14 +21,14 @@ namespace inference {
 namespace tensorrt {
 namespace plugin {

-AvgPoolPlugin* CreateAvgPoolPluginDeserialize(const void* buffer,
-                                              size_t length) {
-  return new AvgPoolPlugin(buffer, length);
+PoolPlugin* CreatePoolPluginDeserialize(const void* buffer, size_t length) {
+  return new PoolPlugin(buffer, length);
 }
-REGISTER_TRT_PLUGIN("avg_pool_plugin", CreateAvgPoolPluginDeserialize);
+REGISTER_TRT_PLUGIN("pool_plugin", CreatePoolPluginDeserialize);

-nvinfer1::Dims AvgPoolPlugin::getOutputDimensions(
-    int index, const nvinfer1::Dims* inputDims, int nbInputs) {
+nvinfer1::Dims PoolPlugin::getOutputDimensions(int index,
+                                               const nvinfer1::Dims* inputDims,
+                                               int nbInputs) {
  assert(nbInputs == 1);
  assert(index == 0);
  assert(inputDims[0].nbDims == 3);
@@ -41,26 +41,33 @@ nvinfer1::Dims AvgPoolPlugin::getOutputDimensions(
  return output_dims;
 }

-int AvgPoolPlugin::enqueue(int batchSize, const void* const* inputs,
-                           void** outputs, void* workspace,
-                           cudaStream_t stream) {
+int PoolPlugin::enqueue(int batchSize, const void* const* inputs,
+                        void** outputs, void* workspace, cudaStream_t stream) {
  auto const& input_dims = this->getInputDims(0);
  int input_size = 0;
  float const* idata = reinterpret_cast<float const*>(inputs[0]);
  float** odatas = reinterpret_cast<float**>(outputs);

-  paddle::operators::math::AvgPool<float> pool_process;
-  paddle::operators::math::Pool2dDirectCUDAFunctor<
-      paddle::operators::math::AvgPool<float>, float>
-      pool2d_forward;
-
  std::vector<int> input_shape = input_shape_;
  std::vector<int> output_shape = output_shape_;
  input_shape.insert(input_shape.begin(), batchSize);
  output_shape.insert(output_shape.begin(), batchSize);

-  pool2d_forward(idata, input_shape, output_shape, ksize_, strides_, paddings_,
-                 pool_process, true, odatas[0], stream);
+  if (pool_type_ == PoolType::max) {
+    paddle::operators::math::MaxPool<float> pool_process;
+    paddle::operators::math::Pool2dDirectCUDAFunctor<
+        paddle::operators::math::MaxPool<float>, float>
+        pool2d_forward;
+    pool2d_forward(idata, input_shape, output_shape, ksize_, strides_,
+                   paddings_, pool_process, true, adaptive_, odatas[0], stream);
+  } else if (pool_type_ == PoolType::avg) {
+    paddle::operators::math::AvgPool<float> pool_process;
+    paddle::operators::math::Pool2dDirectCUDAFunctor<
+        paddle::operators::math::AvgPool<float>, float>
+        pool2d_forward;
+    pool2d_forward(idata, input_shape, output_shape, ksize_, strides_,
+                   paddings_, pool_process, true, adaptive_, odatas[0], stream);
+  }

  return cudaGetLastError() != cudaSuccess;
 }

--- a/paddle/fluid/inference/tensorrt/plugin/avg_pool_op_plugin.h
+++ b/paddle/fluid/inference/tensorrt/plugin/avg_pool_op_plugin.h
@@ -13,7 +13,9 @@
 // limitations under the License.

 #pragma once
+#include <stdio.h>
 #include <cassert>
+#include <string>
 #include <vector>
 #include "paddle/fluid/inference/tensorrt/plugin/trt_plugin.h"

@@ -22,18 +24,11 @@ namespace inference {
 namespace tensorrt {
 namespace plugin {

-class AvgPoolPlugin : public PluginTensorRT {
- private:
-  bool ceil_mode_;
-  std::vector<int> ksize_;
-  std::vector<int> strides_;
-  std::vector<int> paddings_;
-  std::vector<int> input_shape_;
-  std::vector<int> output_shape_;
-
+class PoolPlugin : public PluginTensorRT {
 protected:
  size_t getSerializationSize() override {
    return SerializedSize(getPluginType()) + SerializedSize(ceil_mode_) +
+           SerializedSize(pool_type_) + SerializedSize(adaptive_) +
           SerializedSize(ksize_) + SerializedSize(strides_) +
           SerializedSize(paddings_) + SerializedSize(input_shape_) +
           SerializedSize(output_shape_) + getBaseSerializationSize();
@@ -45,6 +40,8 @@ class AvgPoolPlugin : public PluginTensorRT {
    SerializeValue(&buffer, getPluginType());
    serializeBase(buffer);
    SerializeValue(&buffer, ceil_mode_);
+    SerializeValue(&buffer, pool_type_);
+    SerializeValue(&buffer, adaptive_);
    SerializeValue(&buffer, ksize_);
    SerializeValue(&buffer, strides_);
    SerializeValue(&buffer, paddings_);
@@ -53,41 +50,54 @@ class AvgPoolPlugin : public PluginTensorRT {
  }

 public:
-  AvgPoolPlugin() {}
-  AvgPoolPlugin(bool ceil_mode, std::vector<int> ksize,
-                std::vector<int> strides, std::vector<int> paddings,
-                std::vector<int> input_shape)
+  enum class PoolType {
+    max = 0,
+    avg,
+  };
+  PoolPlugin() {}
+  PoolPlugin(bool ceil_mode, PoolType pool_type, bool adaptive,
+             std::vector<int> ksize, std::vector<int> strides,
+             std::vector<int> paddings, std::vector<int> input_shape)
      : ceil_mode_(ceil_mode),
+        pool_type_(pool_type),
+        adaptive_(adaptive),
        ksize_(ksize),
        strides_(strides),
        paddings_(paddings),
        input_shape_(input_shape) {
-    int output_h, output_w;
    output_shape_ = input_shape_;
-    if (!ceil_mode_) {
-      output_h =
-          (input_shape[1] - ksize_[0] + 2 * paddings_[0]) / strides_[0] + 1;
-      output_w =
-          (input_shape[2] - ksize_[1] + 2 * paddings_[1]) / strides_[1] + 1;
+    if (adaptive_) {
+      output_shape_[1] = ksize[0];
+      output_shape_[2] = ksize[1];
    } else {
-      output_h =
-          (input_shape[1] - ksize_[0] + 2 * paddings_[0] + strides_[0] - 1) /
-              strides_[0] +
-          1;
-      output_w =
-          (input_shape[2] - ksize_[1] + 2 * paddings_[1] + strides_[1] - 1) /
-              strides_[1] +
-          1;
+      int output_h, output_w;
+      if (!ceil_mode_) {
+        output_h =
+            (input_shape[1] - ksize_[0] + 2 * paddings_[0]) / strides_[0] + 1;
+        output_w =
+            (input_shape[2] - ksize_[1] + 2 * paddings_[1]) / strides_[1] + 1;
+      } else {
+        output_h =
+            (input_shape[1] - ksize_[0] + 2 * paddings_[0] + strides_[0] - 1) /
+                strides_[0] +
+            1;
+        output_w =
+            (input_shape[2] - ksize_[1] + 2 * paddings_[1] + strides_[1] - 1) /
+                strides_[1] +
+            1;
+      }
+      output_shape_[1] = output_h;
+      output_shape_[2] = output_w;
    }
-    output_shape_[1] = output_h;
-    output_shape_[2] = output_w;
  }

  // It was used for tensorrt deserialization.
  // It should not be called by users.
-  AvgPoolPlugin(void const *serialData, size_t serialLength) {
+  PoolPlugin(void const *serialData, size_t serialLength) {
    deserializeBase(serialData, serialLength);
    DeserializeValue(&serialData, &serialLength, &ceil_mode_);
+    DeserializeValue(&serialData, &serialLength, &pool_type_);
+    DeserializeValue(&serialData, &serialLength, &adaptive_);
    DeserializeValue(&serialData, &serialLength, &ksize_);
    DeserializeValue(&serialData, &serialLength, &strides_);
    DeserializeValue(&serialData, &serialLength, &paddings_);
@@ -95,18 +105,28 @@ class AvgPoolPlugin : public PluginTensorRT {
    DeserializeValue(&serialData, &serialLength, &output_shape_);
  }

-  AvgPoolPlugin *clone() const override {
-    return new AvgPoolPlugin(ceil_mode_, ksize_, strides_, paddings_,
-                             input_shape_);
+  PoolPlugin *clone() const override {
+    return new PoolPlugin(ceil_mode_, pool_type_, adaptive_, ksize_, strides_,
+                          paddings_, input_shape_);
  }

-  const char *getPluginType() const override { return "avg_pool_plugin"; }
+  const char *getPluginType() const override { return "pool_plugin"; }
  int getNbOutputs() const override { return 1; }
  nvinfer1::Dims getOutputDimensions(int index, const nvinfer1::Dims *inputs,
                                     int nbInputDims) override;
  int initialize() override { return 0; }
  int enqueue(int batchSize, const void *const *inputs, void **outputs,
              void *workspace, cudaStream_t stream) override;
+
+ private:
+  bool ceil_mode_;
+  PoolType pool_type_;
+  bool adaptive_;
+  std::vector<int> ksize_;
+  std::vector<int> strides_;
+  std::vector<int> paddings_;
+  std::vector<int> input_shape_;
+  std::vector<int> output_shape_;
 };

 }  // namespace plugin

--- a/paddle/fluid/inference/tests/api/CMakeLists.txt
+++ b/paddle/fluid/inference/tests/api/CMakeLists.txt
@@ -268,6 +268,10 @@ if(WITH_GPU AND TENSORRT_FOUND)
    if (NOT EXISTS ${TRT_MODEL_INSTALL_DIR})
        inference_download_and_uncompress(${TRT_MODEL_INSTALL_DIR} ${INFERENCE_URL}/tensorrt_test "trt_inference_test_models.tar.gz")
    endif()
+    set(TEST_SPLIT_CONVERTER_MODEL "${TRT_MODEL_INSTALL_DIR}/trt_split_op_converter_test")
+    if (NOT EXISTS ${TEST_SPLIT_CONVERTER_MODEL})
+        inference_download_and_uncompress(${TEST_SPLIT_CONVERTER_MODEL} ${INFERENCE_URL}/tensorrt_test "split_converter.tgz")
+    endif()
    inference_analysis_test(trt_mobilenet_test SRCS trt_mobilenet_test.cc
            EXTRA_DEPS ${INFERENCE_EXTRA_DEPS}
            ARGS --infer_model=${TRT_MODEL_INSTALL_DIR}/trt_inference_test_models)
@@ -283,6 +287,9 @@ if(WITH_GPU AND TENSORRT_FOUND)
    inference_analysis_test(trt_cascade_rcnn_test SRCS trt_cascade_rcnn_test.cc
            EXTRA_DEPS ${INFERENCE_EXTRA_DEPS}
            ARGS --infer_model=${TRT_MODEL_INSTALL_DIR}/trt_inference_test_models)
+    inference_analysis_test(trt_split_converter_test SRCS trt_split_converter_test.cc
+            EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} 
+            ARGS --infer_model=${TEST_SPLIT_CONVERTER_MODEL}/)
    inference_analysis_test(test_analyzer_capi_gpu SRCS analyzer_capi_gpu_tester.cc
            EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} paddle_fluid_c
            ARGS --infer_model=${TRT_MODEL_INSTALL_DIR}/trt_inference_test_models)

--- a/paddle/fluid/inference/tests/api/trt_split_converter_test.cc
+++ b/paddle/fluid/inference/tests/api/trt_split_converter_test.cc
+/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <gflags/gflags.h>
+#include <glog/logging.h>
+#include <gtest/gtest.h>
+
+#include "paddle/fluid/inference/tests/api/trt_test_helper.h"
+
+namespace paddle {
+namespace inference {
+
+TEST(TensorRT, split_converter) {
+  std::string model_dir = FLAGS_infer_model + "/split_converter";
+  AnalysisConfig config;
+  int batch_size = 4;
+  config.EnableUseGpu(100, 0);
+  config.SetModel(model_dir);
+  config.SwitchUseFeedFetchOps(false);
+  config.EnableTensorRtEngine(1 << 20, batch_size, 1,
+                              AnalysisConfig::Precision::kFloat32, false);
+
+  auto predictor = CreatePaddlePredictor(config);
+
+  int channels = 4;
+  int height = 4;
+  int width = 4;
+  int input_num = batch_size * channels * height * width;
+  float *input = new float[input_num];
+  memset(input, 1.0, input_num * sizeof(float));
+
+  auto input_names = predictor->GetInputNames();
+  auto input_t = predictor->GetInputTensor(input_names[0]);
+  input_t->Reshape({batch_size, channels, height, width});
+  input_t->copy_from_cpu(input);
+
+  ASSERT_TRUE(predictor->ZeroCopyRun());
+}
+
+}  // namespace inference
+}  // namespace paddle
--- a/paddle/fluid/operators/math/pooling.cu
+++ b/paddle/fluid/operators/math/pooling.cu
@@ -236,7 +236,8 @@ void Pool2dDirectCUDAFunctor<PoolProcess, T>::operator()(
    const T* input, const std::vector<int>& input_shape,
    const std::vector<int>& output_shape, const std::vector<int>& ksize,
    const std::vector<int>& strides, const std::vector<int>& paddings,
-    PoolProcess pool_compute, bool exclusive, T* output, cudaStream_t stream) {
+    PoolProcess pool_compute, bool exclusive, bool adaptive, T* output,
+    cudaStream_t stream) {
  const int batch_size = input_shape[0];
  const int input_channels = input_shape[1];
  const int input_height = input_shape[2];
@@ -259,7 +260,7 @@ void Pool2dDirectCUDAFunctor<PoolProcess, T>::operator()(
  KernelPool2D<PoolProcess, T><<<grid, threads, 0, stream>>>(
      nthreads, input, input_channels, input_height, input_width, output_height,
      output_width, ksize_height, ksize_width, stride_height, stride_width,
-      padding_height, padding_width, pool_compute, exclusive, false, output);
+      padding_height, padding_width, pool_compute, exclusive, adaptive, output);
 }

 /*

--- a/paddle/fluid/operators/math/pooling.h
+++ b/paddle/fluid/operators/math/pooling.h
@@ -105,7 +105,8 @@ class Pool2dDirectCUDAFunctor {
                  const std::vector<int>& ksize,
                  const std::vector<int>& strides,
                  const std::vector<int>& paddings, PoolProcess pool_compute,
-                  bool exclusive, T* output, cudaStream_t stream);
+                  bool exclusive, bool adaptive, T* output,
+                  cudaStream_t stream);
 };
 #endif