Merge pull request #13938 from NHZlX/ocr_attention_support

ceil pool mode support for ocr attention model.

Merge pull request #13938 from NHZlX/ocr_attention_support
ceil pool mode support for ocr attention model.
2256fae4 · Zhaolong Xing · GitHub · e906c8e5 · 485ab5b3 · 2256fae4
2 changed file
--- a/paddle/fluid/inference/tensorrt/convert/pool2d_op.cc
+++ b/paddle/fluid/inference/tensorrt/convert/pool2d_op.cc
@@ -42,16 +42,22 @@ class Pool2dOpConverter : public OpConverter {
        boost::get<std::vector<int>>(op_desc.GetAttr("strides"));
    std::vector<int> paddings =
        boost::get<std::vector<int>>(op_desc.GetAttr("paddings"));
+    bool ceil_mode = boost::get<bool>(op_desc.GetAttr("ceil_mode"));

-    nvinfer1::DimsHW nv_ksize(ksize[0], ksize[1]);
-    if (global_pooling == true) {
    nvinfer1::Dims input_shape = input1->getDimensions();
    int nbDims = input_shape.nbDims;
+    nvinfer1::DimsHW nv_ksize(ksize[0], ksize[1]);
+    nvinfer1::DimsHW nv_strides(strides[0], strides[1]);
+    nvinfer1::DimsHW nv_paddings(paddings[0], paddings[1]);
+
+    if (global_pooling == true) {
      nv_ksize.d[0] = input_shape.d[nbDims - 2];
      nv_ksize.d[1] = input_shape.d[nbDims - 1];
+      nv_strides.h() = 1;
+      nv_strides.w() = 1;
+      nv_paddings.h() = 0;
+      nv_paddings.w() = 0;
    }
-    const nvinfer1::DimsHW nv_strides(strides[0], strides[1]);
-    const nvinfer1::DimsHW nv_paddings(paddings[0], paddings[1]);

    PADDLE_ENFORCE_EQ(input1->getDimensions().nbDims, 3UL);

@@ -64,6 +70,36 @@ class Pool2dOpConverter : public OpConverter {
      PADDLE_THROW("TensorRT unsupported pooling type!");
    }

+    if (ceil_mode) {
+      nvinfer1::DimsHW pre_pad(0, 0);
+      nvinfer1::DimsHW post_pad(0, 0);
+      int input_height = input_shape.d[nbDims - 2];
+      int input_width = input_shape.d[nbDims - 1];
+      int floor_h_output_size =
+          (input_height - ksize[0] + 2 * paddings[0]) / strides[0] + 1;
+      int ceil_h_output_size =
+          (input_height - ksize[0] + 2 * paddings[0] + strides[0] - 1) /
+              strides[0] +
+          1;
+
+      int floor_w_output_size =
+          (input_width - ksize[1] + 2 * paddings[1]) / strides[1] + 1;
+      int ceil_w_output_size =
+          (input_width - ksize[1] + 2 * paddings[1] + strides[1] - 1) /
+              strides[1] +
+          1;
+      if (floor_h_output_size != ceil_h_output_size) {
+        post_pad.h() = strides[0] - 1;
+      }
+
+      if (floor_w_output_size != ceil_w_output_size) {
+        post_pad.w() = strides[1] - 1;
+      }
+      auto* layer = TRT_ENGINE_ADD_LAYER(
+          engine_, Padding, *const_cast<nvinfer1::ITensor*>(input1), pre_pad,
+          post_pad);
+      input1 = layer->getOutput(0);
+    }
    auto* layer = TRT_ENGINE_ADD_LAYER(engine_, Pooling,
                                       *const_cast<nvinfer1::ITensor*>(input1),
                                       nv_pool_type, nv_ksize);

--- a/paddle/fluid/inference/tensorrt/convert/test_pool2d_op.cc
+++ b/paddle/fluid/inference/tensorrt/convert/test_pool2d_op.cc
@@ -20,18 +20,20 @@ namespace paddle {
 namespace inference {
 namespace tensorrt {

-void test_pool2d(bool global_pooling) {
+void test_pool2d(bool global_pooling, bool ceil_mode) {
  framework::Scope scope;
  std::unordered_set<std::string> parameters;
  TRTConvertValidation validator(5, parameters, scope, 1 << 15);

  // The ITensor's Dims should not contain the batch size.
  // So, the ITensor's Dims of input and output should be C * H * W.
-  validator.DeclInputVar("pool2d-X", nvinfer1::Dims3(3, 4, 4));
+  validator.DeclInputVar("pool2d-X", nvinfer1::Dims3(3, 13, 14));
  if (global_pooling)
    validator.DeclOutputVar("pool2d-Out", nvinfer1::Dims3(3, 1, 1));
+  else if (ceil_mode)
+    validator.DeclOutputVar("pool2d-Out", nvinfer1::Dims3(3, 6, 7));
  else
-    validator.DeclOutputVar("pool2d-Out", nvinfer1::Dims3(3, 2, 2));
+    validator.DeclOutputVar("pool2d-Out", nvinfer1::Dims3(3, 6, 6));

  // Prepare Op description
  framework::OpDesc desc;
@@ -39,7 +41,7 @@ void test_pool2d(bool global_pooling) {
  desc.SetInput("X", {"pool2d-X"});
  desc.SetOutput("Out", {"pool2d-Out"});

-  std::vector<int> ksize({2, 2});
+  std::vector<int> ksize({3, 3});
  std::vector<int> strides({2, 2});
  std::vector<int> paddings({0, 0});
  std::string pooling_t = "max";
@@ -49,6 +51,7 @@ void test_pool2d(bool global_pooling) {
  desc.SetAttr("strides", strides);
  desc.SetAttr("paddings", paddings);
  desc.SetAttr("global_pooling", global_pooling);
+  desc.SetAttr("ceil_mode", ceil_mode);

  LOG(INFO) << "set OP";
  validator.SetOp(*desc.Proto());
@@ -57,9 +60,10 @@ void test_pool2d(bool global_pooling) {
  validator.Execute(3);
 }

-TEST(Pool2dOpConverter, normal) { test_pool2d(false); }
+TEST(Pool2dOpConverter, normal) { test_pool2d(false, false); }
+TEST(Pool2dOpConverter, test_global_pooling) { test_pool2d(true, false); }

-TEST(Pool2dOpConverter, test_global_pooling) { test_pool2d(true); }
+TEST(Pool2dOpConverter, test_ceil_mode) { test_pool2d(false, true); }

 }  // namespace tensorrt
 }  // namespace inference