From 13288621dfce0f2b14341715ab6ce5982c318ccf Mon Sep 17 00:00:00 2001
From: zlsh80826 <zlsh80826@gmail.com>
Date: Wed, 26 Aug 2020 11:01:54 +0800
Subject: [PATCH] change preprocessing mask position

---
 .../tensorrt/convert/emb_eltwise_layernorm.cc | 27 ++++++++++-
 .../tensorrt/convert/multihead_matmul_op.cc   | 46 ++++++++++---------
 .../inference/tensorrt/convert/slice_op.cc    | 14 ++++--
 3 files changed, 60 insertions(+), 27 deletions(-)
diff --git a/paddle/fluid/inference/tensorrt/convert/emb_eltwise_layernorm.cc b/paddle/fluid/inference/tensorrt/convert/emb_eltwise_layernorm.cc
index cdc0e415d46..cb789a8cd35 100644
--- a/paddle/fluid/inference/tensorrt/convert/emb_eltwise_layernorm.cc
+++ b/paddle/fluid/inference/tensorrt/convert/emb_eltwise_layernorm.cc
@@ -11,6 +11,7 @@ limitations under the License. */
 
 #include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
 #include "paddle/fluid/inference/tensorrt/helper.h"
+#include "paddle/fluid/inference/tensorrt/plugin/cast_int_plugin.h"
 #include "paddle/fluid/inference/tensorrt/plugin/emb_eltwise_layernorm_plugin.h"
 
 namespace paddle {
@@ -80,11 +81,35 @@ class EmbEltwiseLayerNormOpConverter : public OpConverter {
     nvinfer1::ILayer* layer = nullptr;
 
     if (engine_->with_dynamic_shape()) {
+      auto pos_tensor = engine_->GetITensor("eval_placeholder_2");
+      plugin::CastIntPluginDynamic* cast_plugin =
+          new plugin::CastIntPluginDynamic();
+      auto cast_layer = engine_->AddPluginV2(&pos_tensor, 1, cast_plugin);
+
+      auto casted_pos_tensor = cast_layer->getOutput(0);
+      auto reshape_layer =
+          TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *casted_pos_tensor);
+
+      nvinfer1::Dims2 reshape_dim(0, 0);
+      nvinfer1::Permutation perm{1, 0, 2};
+      reshape_layer->setFirstTranspose(perm);
+      reshape_layer->setReshapeDimensions(reshape_dim);
+      auto imask_layer =
+          TRT_ENGINE_ADD_LAYER(engine_, Reduce, *reshape_layer->getOutput(0),
+                               nvinfer1::ReduceOperation::kMAX, 1, false);
+      engine_->SetITensor("imask_tensor", imask_layer->getOutput(0));
+
       plugin::DynamicPluginTensorRT* plugin = nullptr;
       plugin = new plugin::EmbEltwiseLayernormPluginDynamic<float>(
           input_embs, bias, scale, emb_sizes, bias_size, scale_size, hidden,
           eps);
-      layer = engine_->AddPluginV2(input_ids.data(), input_num, plugin);
+      auto plugin_layer =
+          engine_->AddPluginV2(input_ids.data(), input_num, plugin);
+      nvinfer1::Permutation permutation{1, 0, 2, 3, 4};
+      auto trans_layer =
+          TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *plugin_layer->getOutput(0));
+      trans_layer->setFirstTranspose(permutation);
+      layer = trans_layer;
     } else {
       PADDLE_THROW(platform::errors::Fatal(
           "You are running the Ernie(Bert) model in static"
diff --git a/paddle/fluid/inference/tensorrt/convert/multihead_matmul_op.cc b/paddle/fluid/inference/tensorrt/convert/multihead_matmul_op.cc
index d71a4f23374..e6903d05d38 100644
--- a/paddle/fluid/inference/tensorrt/convert/multihead_matmul_op.cc
+++ b/paddle/fluid/inference/tensorrt/convert/multihead_matmul_op.cc
@@ -13,7 +13,6 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
-#include "paddle/fluid/inference/tensorrt/plugin/cast_int_plugin.h"
 #include "paddle/fluid/inference/tensorrt/plugin/qkv_to_context_plugin.h"
 
 namespace paddle {
@@ -114,29 +113,32 @@ class MultiheadMatMulOpConverter : public OpConverter {
                              static_cast<void*>(bias_data),
                              static_cast<int32_t>(bias_t->numel())};
 
-      nvinfer1::Permutation permutation{1, 0, 2, 3, 4};
+      nvinfer1::Permutation permutation{0, 1, 2, 3, 4};
       auto trans_layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *input);
       trans_layer->setFirstTranspose(permutation);
 
       auto* fc_layer = TRT_ENGINE_ADD_LAYER(
           engine_, FullyConnected, *trans_layer->getOutput(0), n, weight, bias);
-
-      auto pos_tensor = engine_->GetITensor("eval_placeholder_2");
-      plugin::CastIntPluginDynamic* cast_plugin =
-          new plugin::CastIntPluginDynamic();
-      auto cast_layer = engine_->AddPluginV2(&pos_tensor, 1, cast_plugin);
-
-      auto casted_pos_tensor = cast_layer->getOutput(0);
-      auto reshape_layer =
-          TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *casted_pos_tensor);
-
-      nvinfer1::Dims2 reshape_dim(0, 0);
-      nvinfer1::Permutation perm{1, 0, 2};
-      reshape_layer->setFirstTranspose(perm);
-      reshape_layer->setReshapeDimensions(reshape_dim);
-      auto reduce_layer =
-          TRT_ENGINE_ADD_LAYER(engine_, Reduce, *reshape_layer->getOutput(0),
-                               nvinfer1::ReduceOperation::kMAX, 1, false);
+      /*
+            auto pos_tensor = engine_->GetITensor("eval_placeholder_2");
+            plugin::CastIntPluginDynamic* cast_plugin =
+                new plugin::CastIntPluginDynamic();
+            auto cast_layer = engine_->AddPluginV2(&pos_tensor, 1, cast_plugin);
+
+            auto casted_pos_tensor = cast_layer->getOutput(0);
+            auto reshape_layer =
+                TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *casted_pos_tensor);
+
+            nvinfer1::Dims2 reshape_dim(0, 0);
+            nvinfer1::Permutation perm{1, 0, 2};
+            reshape_layer->setFirstTranspose(perm);
+            reshape_layer->setReshapeDimensions(reshape_dim);
+            auto reduce_layer =
+                TRT_ENGINE_ADD_LAYER(engine_, Reduce,
+         *reshape_layer->getOutput(0),
+                                     nvinfer1::ReduceOperation::kMAX, 1, false);
+      */
+      auto imask_tensor = engine_->GetITensor("imask_tensor");
 
       auto creator = GetPluginRegistry()->getPluginCreator(
           "CustomQKVToContextPluginDynamic", "1");
@@ -149,8 +151,7 @@ class MultiheadMatMulOpConverter : public OpConverter {
           {"type_id", &type, nvinfer1::PluginFieldType::kINT32, 1},
           {"hidden_size", &hidden, nvinfer1::PluginFieldType::kINT32, 1},
           {"num_heads", &head_number, nvinfer1::PluginFieldType::kINT32, 1},
-          {"has_mask", &has_mask, nvinfer1::PluginFieldType::kINT32,
-           1},  // no bool type
+          {"has_mask", &has_mask, nvinfer1::PluginFieldType::kINT32, 1},
       };
       nvinfer1::PluginFieldCollection* pluginPtr =
           static_cast<nvinfer1::PluginFieldCollection*>(
@@ -164,7 +165,8 @@ class MultiheadMatMulOpConverter : public OpConverter {
           creator->createPlugin("CustomQKVToContextPluginDynamic", pluginPtr);
       std::vector<nvinfer1::ITensor*> plugin_inputs;
       plugin_inputs.push_back(fc_layer->getOutput(0));
-      plugin_inputs.push_back(reduce_layer->getOutput(0));
+      // plugin_inputs.push_back(reduce_layer->getOutput(0));
+      plugin_inputs.push_back(imask_tensor);
       auto plugin_layer = engine_->network()->addPluginV2(
           plugin_inputs.data(), plugin_inputs.size(), *pluginObj);
       assert(plugin_layer != nullptr);
diff --git a/paddle/fluid/inference/tensorrt/convert/slice_op.cc b/paddle/fluid/inference/tensorrt/convert/slice_op.cc
index ed75d7b1583..a8784ba5138 100644
--- a/paddle/fluid/inference/tensorrt/convert/slice_op.cc
+++ b/paddle/fluid/inference/tensorrt/convert/slice_op.cc
@@ -23,9 +23,8 @@ class SliceOpConverter : public OpConverter {
  public:
   void operator()(const framework::proto::OpDesc& op,
                   const framework::Scope& scope, bool test_mode) override {
-    // This OP is implemented by trt dynamic shpae plugin.
-    // Dynamic shape plugin requires TRT version greater than 6.0.
-    std::cerr << "slice op converter\n" << std::endl;
+// This OP is implemented by trt dynamic shpae plugin.
+// Dynamic shape plugin requires TRT version greater than 6.0.
 #if IS_TRT_VERSION_GE(6000)
     VLOG(4) << "convert slice op to tensorrt layer";
     framework::OpDesc op_desc(op, nullptr);
@@ -41,10 +40,17 @@ class SliceOpConverter : public OpConverter {
 
     nvinfer1::ILayer* layer = nullptr;
     if (engine_->with_dynamic_shape()) {
+      nvinfer1::Permutation permutation{1, 0, 2, 3, 4};
+      auto trans_layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *input);
+      trans_layer->setFirstTranspose(permutation);
+      std::vector<nvinfer1::ITensor*> plugin_inputs;
+      plugin_inputs.emplace_back(trans_layer->getOutput(0));
+
       bool ban_fp16 = engine_->disable_trt_plugin_fp16();
       plugin::SlicePluginDynamic* plugin =
           new plugin::SlicePluginDynamic(starts, ends, axes, ban_fp16);
-      layer = engine_->AddPluginV2(&input, 1, plugin);
+      layer = engine_->AddPluginV2(plugin_inputs.data(), plugin_inputs.size(),
+                                   plugin);
     } else {
       PADDLE_THROW(platform::errors::Fatal(
           "You are running the Ernie(Bert) model in static"
-- 
GitLab