Merge pull request #12355 from NHZlX/add_tensorrt_pooling_converter

Add tensorrt pooling converter

Merge pull request #12355 from NHZlX/add_tensorrt_pooling_converter
Add tensorrt pooling converter
85c49127 · Zhaolong Xing · GitHub · 5bea9c14 · 4f71a3b1 · 85c49127
8 changed file
--- a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
+++ b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
 # Add TRT tests
 nv_library(tensorrt_converter
-  SRCS mul_op.cc conv2d_op.cc fc_op.cc
+  SRCS mul_op.cc conv2d_op.cc fc_op.cc pool2d_op.cc
  DEPS tensorrt_engine mul_op)
 nv_test(test_op_converter SRCS test_op_converter.cc DEPS
@@ -13,3 +13,6 @@ nv_test(test_trt_fc_op SRCS test_fc_op.cc fc_op.cc
        DEPS ${FLUID_CORE_MODULES} tensorrt_engine mul_op SERIAL)
 nv_test(test_trt_activation_op SRCS test_activation_op.cc activation_op.cc
        DEPS ${FLUID_CORE_MODULES} tensorrt_engine activation_op SERIAL)
+nv_test(test_trt_pool2d_op SRCS test_pool2d_op.cc pool2d_op.cc
+        DEPS ${FLUID_CORE_MODULES} tensorrt_engine pool_op SERIAL)
--- a/paddle/fluid/inference/tensorrt/convert/pool2d_op.cc
+++ b/paddle/fluid/inference/tensorrt/convert/pool2d_op.cc
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
+namespace paddle {
+namespace inference {
+namespace tensorrt {
+/*
+ * Pool2dOp, IPoolingLayer in TRT. This Layer doesn't has weights.
+ */
+class Pool2dOpConverter : public OpConverter {
+ public:
+  void operator()(const framework::proto::OpDesc& op,
+                  const framework::Scope& scope, bool test_mode) override {
+    VLOG(4)
+        << "convert a fluid pool2d op to tensorrt pool2d layer without bias";
+    framework::OpDesc op_desc(op, nullptr);
+    // Declare inputs
+    PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1);
+    PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1);
+    auto* input1 = engine_->GetITensor(op_desc.Input("X")[0]);
+    std::string pool_type =
+        boost::get<std::string>(op_desc.GetAttr("pooling_type"));
+    std::vector<int> ksize =
+        boost::get<std::vector<int>>(op_desc.GetAttr("ksize"));
+    std::vector<int> strides =
+        boost::get<std::vector<int>>(op_desc.GetAttr("strides"));
+    std::vector<int> paddings =
+        boost::get<std::vector<int>>(op_desc.GetAttr("paddings"));
+    const nvinfer1::DimsHW nv_ksize(ksize[0], ksize[1]);
+    const nvinfer1::DimsHW nv_strides(strides[0], strides[1]);
+    const nvinfer1::DimsHW nv_paddings(paddings[0], paddings[1]);
+    PADDLE_ENFORCE_EQ(input1->getDimensions().nbDims, 3UL);
+    nvinfer1::PoolingType nv_pool_type = nvinfer1::PoolingType::kMAX;
+    if (pool_type == "max") {
+      nv_pool_type = nvinfer1::PoolingType::kMAX;
+    } else if (pool_type == "avg") {
+      nv_pool_type = nvinfer1::PoolingType::kAVERAGE;
+    } else {
+      PADDLE_THROW("TensorRT unsupported pooling type!");
+    }
+    auto* layer = TRT_ENGINE_ADD_LAYER(engine_, Pooling,
+                                       *const_cast<nvinfer1::ITensor*>(input1),
+                                       nv_pool_type, nv_ksize);
+    PADDLE_ENFORCE_NOT_NULL(layer, "pool layer could not be created.");
+    layer->setStride(nv_strides);
+    layer->setPadding(nv_paddings);
+    auto output_name = op_desc.Output("Out")[0];
+    engine_->SetITensor(output_name, layer->getOutput(0));
+    if (test_mode) {
+      engine_->DeclareOutput(output_name);
+    }
+  }
+};
+}  // namespace tensorrt
+}  // namespace inference
+}  // namespace paddle
+USE_OP(pool2d);
+REGISTER_TRT_OP_CONVERTER(pool2d, Pool2dOpConverter);
--- a/paddle/fluid/inference/tensorrt/convert/test_activation_op.cc
+++ b/paddle/fluid/inference/tensorrt/convert/test_activation_op.cc
@@ -37,7 +37,7 @@ TEST(ReluOpConverter, main) {
  validator.SetOp(*desc.Proto());
  LOG(INFO) << "execute";
-  validator.Execute(1);
+  validator.Execute(5);
 }
 }  // namespace tensorrt

--- a/paddle/fluid/inference/tensorrt/convert/test_fc_op.cc
+++ b/paddle/fluid/inference/tensorrt/convert/test_fc_op.cc
@@ -24,9 +24,8 @@ TEST(fc_op, test) {
  std::unordered_set<std::string> parameters({"mul-Y"});
  framework::Scope scope;
  TRTConvertValidation validator(10, parameters, scope, 1000);
-  validator.DeclInputVar("mul-X", nvinfer1::Dims4(1, 10, 1, 1));
+  validator.DeclInputVar("mul-X", nvinfer1::Dims3(10, 1, 1));
  validator.DeclParamVar("mul-Y", nvinfer1::Dims2(10, 2));
-  // validator.DeclParamVar("mul-Y", nvinfer1::Dims2(8, 2));
  validator.DeclOutputVar("mul-Out", nvinfer1::Dims2(1, 2));
  // Prepare Op description
@@ -38,7 +37,7 @@ TEST(fc_op, test) {
  validator.SetOp(*desc.Proto());
-  validator.Execute(1);
+  validator.Execute(10);
 }
 }  // namespace tensorrt

--- a/paddle/fluid/inference/tensorrt/convert/test_mul_op.cc
+++ b/paddle/fluid/inference/tensorrt/convert/test_mul_op.cc
@@ -23,7 +23,7 @@ namespace tensorrt {
 TEST(MulOpConverter, main) {
  framework::Scope scope;
  std::unordered_set<std::string> parameters;
-  TRTConvertValidation validator(10, parameters, scope, 1000);
+  TRTConvertValidation validator(10, parameters, scope, 1000, false);
  validator.DeclInputVar("mul-X", nvinfer1::Dims2(10, 6));
  validator.DeclInputVar("mul-Y", nvinfer1::Dims2(6, 10));
  validator.DeclOutputVar("mul-Out", nvinfer1::Dims2(10, 10));
@@ -39,7 +39,7 @@ TEST(MulOpConverter, main) {
  validator.SetOp(*desc.Proto());
  LOG(INFO) << "execute";
-  validator.Execute(1);
+  validator.Execute(2);
 }
 }  // namespace tensorrt

--- a/paddle/fluid/inference/tensorrt/convert/test_pool2d_op.cc
+++ b/paddle/fluid/inference/tensorrt/convert/test_pool2d_op.cc
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+   http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+#include <gtest/gtest.h>
+#include <fstream>
+#include "paddle/fluid/framework/op_registry.h"
+#include "paddle/fluid/inference/tensorrt/convert/ut_helper.h"
+namespace paddle {
+namespace inference {
+namespace tensorrt {
+TEST(Pool2dOpConverter, main) {
+  framework::Scope scope;
+  std::unordered_set<std::string> parameters;
+  TRTConvertValidation validator(5, parameters, scope, 1 << 15);
+  // The ITensor's Dims should not contain the batch size.
+  // So, the ITensor's Dims of input and output should be C * H * W.
+  validator.DeclInputVar("pool2d-X", nvinfer1::Dims3(3, 4, 4));
+  validator.DeclOutputVar("pool2d-Out", nvinfer1::Dims3(3, 2, 2));
+  // Prepare Op description
+  framework::OpDesc desc;
+  desc.SetType("pool2d");
+  desc.SetInput("X", {"pool2d-X"});
+  desc.SetOutput("Out", {"pool2d-Out"});
+  std::vector<int> ksize({2, 2});
+  std::vector<int> strides({2, 2});
+  std::vector<int> paddings({0, 0});
+  std::string pooling_t = "max";
+  desc.SetAttr("pooling_type", pooling_t);
+  desc.SetAttr("ksize", ksize);
+  desc.SetAttr("strides", strides);
+  desc.SetAttr("paddings", paddings);
+  LOG(INFO) << "set OP";
+  validator.SetOp(*desc.Proto());
+  LOG(INFO) << "execute";
+  validator.Execute(3);
+}
+}  // namespace tensorrt
+}  // namespace inference
+}  // namespace paddle
+USE_OP(pool2d);
--- a/paddle/fluid/inference/tensorrt/convert/ut_helper.h
+++ b/paddle/fluid/inference/tensorrt/convert/ut_helper.h
@@ -63,13 +63,16 @@ class TRTConvertValidation {
 public:
  TRTConvertValidation() = delete;
-  TRTConvertValidation(int batch_size,
+  TRTConvertValidation(int max_batch_size,
                       const std::unordered_set<std::string>& parameters,
                       framework::Scope& scope,  // NOLINT
-                       int workspace_size = 1 << 10)
+                       int workspace_size = 1 << 10, bool if_add_batch = true)
-      : parameters_(parameters), scope_(scope) {
+      : parameters_(parameters),
+        scope_(scope),
+        if_add_batch_(if_add_batch),
+        max_batch_size_(max_batch_size) {
    // create engine.
-    engine_.reset(new TensorRTEngine(batch_size, workspace_size, &stream_));
+    engine_.reset(new TensorRTEngine(max_batch_size, workspace_size, &stream_));
    engine_->InitNetwork();
    PADDLE_ENFORCE_EQ(cudaStreamCreate(&stream_), 0);
@@ -84,7 +87,7 @@ class TRTConvertValidation {
  // Declare a parameter varaible in the scope.
  void DeclParamVar(const std::string& name, const nvinfer1::Dims& dims) {
-    DeclVar(name, dims);
+    DeclVar(name, dims, true);
  }
  void DeclOutputVar(const std::string& name, const nvinfer1::Dims& dims) {
@@ -92,12 +95,18 @@ class TRTConvertValidation {
  }
  // Declare a variable in a fluid Scope.
-  void DeclVar(const std::string& name, const nvinfer1::Dims& dims) {
+  void DeclVar(const std::string& name, const nvinfer1::Dims& dims,
+               bool is_param = false) {
    platform::CPUPlace place;
    platform::CPUDeviceContext ctx(place);
    // Init Fluid tensor.
    std::vector<int> dim_vec(dims.d, dims.d + dims.nbDims);
+    // There is no batchsize in ITensor's shape, but We should add it to
+    // tensor's shape of fluid. If the variable is not parameter and the
+    // if_add_batch_ flag is true, add the max batchsize to dim_vec.
+    if (is_param != true && if_add_batch_ == true)
+      dim_vec.insert(dim_vec.begin(), max_batch_size_);
    auto* x = scope_.Var(name);
    auto* x_tensor = x->GetMutable<framework::LoDTensor>();
    x_tensor->Resize(framework::make_ddim(dim_vec));
@@ -131,6 +140,7 @@ class TRTConvertValidation {
  void Execute(int batch_size) {
    // Execute Fluid Op
+    PADDLE_ENFORCE_LE(batch_size, max_batch_size_);
    platform::CPUPlace place;
    platform::CPUDeviceContext ctx(place);
    op_->Run(scope_, place);
@@ -149,9 +159,15 @@ class TRTConvertValidation {
      auto* var = scope_.FindVar(output);
      auto tensor = var->GetMutable<framework::LoDTensor>();
      framework::TensorToVector(*tensor, ctx, &fluid_out);
+      size_t fluid_out_size = fluid_out.size();
+      if (if_add_batch_ == true) {
+        fluid_out_size =
+            batch_size * (framework::product(tensor->dims()) / max_batch_size_);
+      }
      // Compare two output
      ASSERT_FALSE(fluid_out.empty());
-      for (size_t i = 0; i < fluid_out.size(); i++) {
+      for (size_t i = 0; i < fluid_out_size; i++) {
        // Loose the threshold for CI in different machine model.
        EXPECT_LT(std::abs(fluid_out[i] - trt_out[i]), 2e-5);
      }
@@ -167,6 +183,12 @@ class TRTConvertValidation {
  std::unique_ptr<framework::OpDesc> op_desc_;
  const std::unordered_set<std::string>& parameters_;
  framework::Scope& scope_;
+  // The ITensor of trt does not cotain the batch size,
+  // bug, in most cases, we need to set batch size for
+  // fluid's tensor shape. This variable indicates
+  // whether to add batch size to tensor shape of fluid.
+  bool if_add_batch_;
+  int max_batch_size_;
 };
 }  // namespace tensorrt

--- a/paddle/fluid/inference/tensorrt/test_engine.cc
+++ b/paddle/fluid/inference/tensorrt/test_engine.cc
@@ -113,7 +113,7 @@ TEST_F(TensorRTEngineTest, add_layer_multi_dim) {
  ASSERT_EQ(y_cpu[1], 14.5);
 }
-TEST_F(TensorRTEngineTest, test_conv2d_temp) {
+TEST_F(TensorRTEngineTest, test_conv2d) {
  // Weight in CPU memory.
  float raw_weight[9] = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0};
  float raw_bias[1] = {0};
@@ -146,6 +146,37 @@ TEST_F(TensorRTEngineTest, test_conv2d_temp) {
  ASSERT_EQ(y_cpu[1], 6.0);
 }
+TEST_F(TensorRTEngineTest, test_pool2d) {
+  // Weight in CPU memory.
+  auto* x = engine_->DeclareInput("x", nvinfer1::DataType::kFLOAT,
+                                  nvinfer1::Dims3{1, 2, 2});
+  nvinfer1::PoolingType pool_t = nvinfer1::PoolingType::kAVERAGE;
+  auto* pool_layer =
+      TRT_ENGINE_ADD_LAYER(engine_, Pooling, *const_cast<nvinfer1::ITensor*>(x),
+                           pool_t, nvinfer1::DimsHW{2, 2});
+  PADDLE_ENFORCE(pool_layer != nullptr);
+  pool_layer->setStride(nvinfer1::DimsHW{1, 1});
+  pool_layer->setPadding(nvinfer1::DimsHW{0, 0});
+  engine_->DeclareOutput(pool_layer, 0, "y");
+  engine_->FreezeNetwork();
+  ASSERT_EQ(engine_->engine()->getNbBindings(), 2);
+  float x_v[8] = {1.0, 2.0, 5.0, 0.0, 2.0, 3.0, 5.0, 10.0};
+  engine_->SetInputFromCPU("x", reinterpret_cast<void*>(&x_v),
+                           8 * sizeof(float));
+  engine_->Execute(2);
+  LOG(INFO) << "to get output";
+  float* y_cpu = new float[2];
+  engine_->GetOutputInCPU("y", &y_cpu[0], 2 * sizeof(float));
+  ASSERT_EQ(y_cpu[0], 2.0);
+  ASSERT_EQ(y_cpu[1], 5.0);
+}
 }  // namespace tensorrt
 }  // namespace inference
 }  // namespace paddle