change preprocessing mask position

13288621 · zlsh80826 · 2ca3fe5d · 13288621 · 13288621 · 13288621
3 changed file
--- a/paddle/fluid/inference/tensorrt/convert/emb_eltwise_layernorm.cc
+++ b/paddle/fluid/inference/tensorrt/convert/emb_eltwise_layernorm.cc
@@ -11,6 +11,7 @@ limitations under the License. */

 #include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
 #include "paddle/fluid/inference/tensorrt/helper.h"
+#include "paddle/fluid/inference/tensorrt/plugin/cast_int_plugin.h"
 #include "paddle/fluid/inference/tensorrt/plugin/emb_eltwise_layernorm_plugin.h"

 namespace paddle {
@@ -80,11 +81,35 @@ class EmbEltwiseLayerNormOpConverter : public OpConverter {
    nvinfer1::ILayer* layer = nullptr;

    if (engine_->with_dynamic_shape()) {
+      auto pos_tensor = engine_->GetITensor("eval_placeholder_2");
+      plugin::CastIntPluginDynamic* cast_plugin =
+          new plugin::CastIntPluginDynamic();
+      auto cast_layer = engine_->AddPluginV2(&pos_tensor, 1, cast_plugin);
+
+      auto casted_pos_tensor = cast_layer->getOutput(0);
+      auto reshape_layer =
+          TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *casted_pos_tensor);
+
+      nvinfer1::Dims2 reshape_dim(0, 0);
+      nvinfer1::Permutation perm{1, 0, 2};
+      reshape_layer->setFirstTranspose(perm);
+      reshape_layer->setReshapeDimensions(reshape_dim);
+      auto imask_layer =
+          TRT_ENGINE_ADD_LAYER(engine_, Reduce, *reshape_layer->getOutput(0),
+                               nvinfer1::ReduceOperation::kMAX, 1, false);
+      engine_->SetITensor("imask_tensor", imask_layer->getOutput(0));
+
      plugin::DynamicPluginTensorRT* plugin = nullptr;
      plugin = new plugin::EmbEltwiseLayernormPluginDynamic<float>(
          input_embs, bias, scale, emb_sizes, bias_size, scale_size, hidden,
          eps);
-      layer = engine_->AddPluginV2(input_ids.data(), input_num, plugin);
+      auto plugin_layer =
+          engine_->AddPluginV2(input_ids.data(), input_num, plugin);
+      nvinfer1::Permutation permutation{1, 0, 2, 3, 4};
+      auto trans_layer =
+          TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *plugin_layer->getOutput(0));
+      trans_layer->setFirstTranspose(permutation);
+      layer = trans_layer;
    } else {
      PADDLE_THROW(platform::errors::Fatal(
          "You are running the Ernie(Bert) model in static"

--- a/paddle/fluid/inference/tensorrt/convert/multihead_matmul_op.cc
+++ b/paddle/fluid/inference/tensorrt/convert/multihead_matmul_op.cc
@@ -13,7 +13,6 @@ See the License for the specific language governing permissions and
 limitations under the License. */

 #include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
-#include "paddle/fluid/inference/tensorrt/plugin/cast_int_plugin.h"
 #include "paddle/fluid/inference/tensorrt/plugin/qkv_to_context_plugin.h"

 namespace paddle {
@@ -114,29 +113,32 @@ class MultiheadMatMulOpConverter : public OpConverter {
                             static_cast<void*>(bias_data),
                             static_cast<int32_t>(bias_t->numel())};

-      nvinfer1::Permutation permutation{1, 0, 2, 3, 4};
+      nvinfer1::Permutation permutation{0, 1, 2, 3, 4};
      auto trans_layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *input);
      trans_layer->setFirstTranspose(permutation);

      auto* fc_layer = TRT_ENGINE_ADD_LAYER(
          engine_, FullyConnected, *trans_layer->getOutput(0), n, weight, bias);
-
-      auto pos_tensor = engine_->GetITensor("eval_placeholder_2");
-      plugin::CastIntPluginDynamic* cast_plugin =
-          new plugin::CastIntPluginDynamic();
-      auto cast_layer = engine_->AddPluginV2(&pos_tensor, 1, cast_plugin);
-
-      auto casted_pos_tensor = cast_layer->getOutput(0);
-      auto reshape_layer =
-          TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *casted_pos_tensor);
-
-      nvinfer1::Dims2 reshape_dim(0, 0);
-      nvinfer1::Permutation perm{1, 0, 2};
-      reshape_layer->setFirstTranspose(perm);
-      reshape_layer->setReshapeDimensions(reshape_dim);
-      auto reduce_layer =
-          TRT_ENGINE_ADD_LAYER(engine_, Reduce, *reshape_layer->getOutput(0),
-                               nvinfer1::ReduceOperation::kMAX, 1, false);
+      /*
+            auto pos_tensor = engine_->GetITensor("eval_placeholder_2");
+            plugin::CastIntPluginDynamic* cast_plugin =
+                new plugin::CastIntPluginDynamic();
+            auto cast_layer = engine_->AddPluginV2(&pos_tensor, 1, cast_plugin);
+
+            auto casted_pos_tensor = cast_layer->getOutput(0);
+            auto reshape_layer =
+                TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *casted_pos_tensor);
+
+            nvinfer1::Dims2 reshape_dim(0, 0);
+            nvinfer1::Permutation perm{1, 0, 2};
+            reshape_layer->setFirstTranspose(perm);
+            reshape_layer->setReshapeDimensions(reshape_dim);
+            auto reduce_layer =
+                TRT_ENGINE_ADD_LAYER(engine_, Reduce,
+         *reshape_layer->getOutput(0),
+                                     nvinfer1::ReduceOperation::kMAX, 1, false);
+      */
+      auto imask_tensor = engine_->GetITensor("imask_tensor");

      auto creator = GetPluginRegistry()->getPluginCreator(
          "CustomQKVToContextPluginDynamic", "1");
@@ -149,8 +151,7 @@ class MultiheadMatMulOpConverter : public OpConverter {
          {"type_id", &type, nvinfer1::PluginFieldType::kINT32, 1},
          {"hidden_size", &hidden, nvinfer1::PluginFieldType::kINT32, 1},
          {"num_heads", &head_number, nvinfer1::PluginFieldType::kINT32, 1},
-          {"has_mask", &has_mask, nvinfer1::PluginFieldType::kINT32,
-           1},  // no bool type
+          {"has_mask", &has_mask, nvinfer1::PluginFieldType::kINT32, 1},
      };
      nvinfer1::PluginFieldCollection* pluginPtr =
          static_cast<nvinfer1::PluginFieldCollection*>(
@@ -164,7 +165,8 @@ class MultiheadMatMulOpConverter : public OpConverter {
          creator->createPlugin("CustomQKVToContextPluginDynamic", pluginPtr);
      std::vector<nvinfer1::ITensor*> plugin_inputs;
      plugin_inputs.push_back(fc_layer->getOutput(0));
-      plugin_inputs.push_back(reduce_layer->getOutput(0));
+      // plugin_inputs.push_back(reduce_layer->getOutput(0));
+      plugin_inputs.push_back(imask_tensor);
      auto plugin_layer = engine_->network()->addPluginV2(
          plugin_inputs.data(), plugin_inputs.size(), *pluginObj);
      assert(plugin_layer != nullptr);

--- a/paddle/fluid/inference/tensorrt/convert/slice_op.cc
+++ b/paddle/fluid/inference/tensorrt/convert/slice_op.cc
@@ -23,9 +23,8 @@ class SliceOpConverter : public OpConverter {
 public:
  void operator()(const framework::proto::OpDesc& op,
                  const framework::Scope& scope, bool test_mode) override {
-    // This OP is implemented by trt dynamic shpae plugin.
-    // Dynamic shape plugin requires TRT version greater than 6.0.
-    std::cerr << "slice op converter\n" << std::endl;
+// This OP is implemented by trt dynamic shpae plugin.
+// Dynamic shape plugin requires TRT version greater than 6.0.
 #if IS_TRT_VERSION_GE(6000)
    VLOG(4) << "convert slice op to tensorrt layer";
    framework::OpDesc op_desc(op, nullptr);
@@ -41,10 +40,17 @@ class SliceOpConverter : public OpConverter {

    nvinfer1::ILayer* layer = nullptr;
    if (engine_->with_dynamic_shape()) {
+      nvinfer1::Permutation permutation{1, 0, 2, 3, 4};
+      auto trans_layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *input);
+      trans_layer->setFirstTranspose(permutation);
+      std::vector<nvinfer1::ITensor*> plugin_inputs;
+      plugin_inputs.emplace_back(trans_layer->getOutput(0));
+
      bool ban_fp16 = engine_->disable_trt_plugin_fp16();
      plugin::SlicePluginDynamic* plugin =
          new plugin::SlicePluginDynamic(starts, ends, axes, ban_fp16);
-      layer = engine_->AddPluginV2(&input, 1, plugin);
+      layer = engine_->AddPluginV2(plugin_inputs.data(), plugin_inputs.size(),
+                                   plugin);
    } else {
      PADDLE_THROW(platform::errors::Fatal(
          "You are running the Ernie(Bert) model in static"