From 13288621dfce0f2b14341715ab6ce5982c318ccf Mon Sep 17 00:00:00 2001 From: zlsh80826 Date: Wed, 26 Aug 2020 11:01:54 +0800 Subject: [PATCH] change preprocessing mask position --- .../tensorrt/convert/emb_eltwise_layernorm.cc | 27 ++++++++++- .../tensorrt/convert/multihead_matmul_op.cc | 46 ++++++++++--------- .../inference/tensorrt/convert/slice_op.cc | 14 ++++-- 3 files changed, 60 insertions(+), 27 deletions(-) diff --git a/paddle/fluid/inference/tensorrt/convert/emb_eltwise_layernorm.cc b/paddle/fluid/inference/tensorrt/convert/emb_eltwise_layernorm.cc index cdc0e415d46..cb789a8cd35 100644 --- a/paddle/fluid/inference/tensorrt/convert/emb_eltwise_layernorm.cc +++ b/paddle/fluid/inference/tensorrt/convert/emb_eltwise_layernorm.cc @@ -11,6 +11,7 @@ limitations under the License. */ #include "paddle/fluid/inference/tensorrt/convert/op_converter.h" #include "paddle/fluid/inference/tensorrt/helper.h" +#include "paddle/fluid/inference/tensorrt/plugin/cast_int_plugin.h" #include "paddle/fluid/inference/tensorrt/plugin/emb_eltwise_layernorm_plugin.h" namespace paddle { @@ -80,11 +81,35 @@ class EmbEltwiseLayerNormOpConverter : public OpConverter { nvinfer1::ILayer* layer = nullptr; if (engine_->with_dynamic_shape()) { + auto pos_tensor = engine_->GetITensor("eval_placeholder_2"); + plugin::CastIntPluginDynamic* cast_plugin = + new plugin::CastIntPluginDynamic(); + auto cast_layer = engine_->AddPluginV2(&pos_tensor, 1, cast_plugin); + + auto casted_pos_tensor = cast_layer->getOutput(0); + auto reshape_layer = + TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *casted_pos_tensor); + + nvinfer1::Dims2 reshape_dim(0, 0); + nvinfer1::Permutation perm{1, 0, 2}; + reshape_layer->setFirstTranspose(perm); + reshape_layer->setReshapeDimensions(reshape_dim); + auto imask_layer = + TRT_ENGINE_ADD_LAYER(engine_, Reduce, *reshape_layer->getOutput(0), + nvinfer1::ReduceOperation::kMAX, 1, false); + engine_->SetITensor("imask_tensor", imask_layer->getOutput(0)); + plugin::DynamicPluginTensorRT* plugin = nullptr; plugin = new plugin::EmbEltwiseLayernormPluginDynamic( input_embs, bias, scale, emb_sizes, bias_size, scale_size, hidden, eps); - layer = engine_->AddPluginV2(input_ids.data(), input_num, plugin); + auto plugin_layer = + engine_->AddPluginV2(input_ids.data(), input_num, plugin); + nvinfer1::Permutation permutation{1, 0, 2, 3, 4}; + auto trans_layer = + TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *plugin_layer->getOutput(0)); + trans_layer->setFirstTranspose(permutation); + layer = trans_layer; } else { PADDLE_THROW(platform::errors::Fatal( "You are running the Ernie(Bert) model in static" diff --git a/paddle/fluid/inference/tensorrt/convert/multihead_matmul_op.cc b/paddle/fluid/inference/tensorrt/convert/multihead_matmul_op.cc index d71a4f23374..e6903d05d38 100644 --- a/paddle/fluid/inference/tensorrt/convert/multihead_matmul_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/multihead_matmul_op.cc @@ -13,7 +13,6 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/inference/tensorrt/convert/op_converter.h" -#include "paddle/fluid/inference/tensorrt/plugin/cast_int_plugin.h" #include "paddle/fluid/inference/tensorrt/plugin/qkv_to_context_plugin.h" namespace paddle { @@ -114,29 +113,32 @@ class MultiheadMatMulOpConverter : public OpConverter { static_cast(bias_data), static_cast(bias_t->numel())}; - nvinfer1::Permutation permutation{1, 0, 2, 3, 4}; + nvinfer1::Permutation permutation{0, 1, 2, 3, 4}; auto trans_layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *input); trans_layer->setFirstTranspose(permutation); auto* fc_layer = TRT_ENGINE_ADD_LAYER( engine_, FullyConnected, *trans_layer->getOutput(0), n, weight, bias); - - auto pos_tensor = engine_->GetITensor("eval_placeholder_2"); - plugin::CastIntPluginDynamic* cast_plugin = - new plugin::CastIntPluginDynamic(); - auto cast_layer = engine_->AddPluginV2(&pos_tensor, 1, cast_plugin); - - auto casted_pos_tensor = cast_layer->getOutput(0); - auto reshape_layer = - TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *casted_pos_tensor); - - nvinfer1::Dims2 reshape_dim(0, 0); - nvinfer1::Permutation perm{1, 0, 2}; - reshape_layer->setFirstTranspose(perm); - reshape_layer->setReshapeDimensions(reshape_dim); - auto reduce_layer = - TRT_ENGINE_ADD_LAYER(engine_, Reduce, *reshape_layer->getOutput(0), - nvinfer1::ReduceOperation::kMAX, 1, false); + /* + auto pos_tensor = engine_->GetITensor("eval_placeholder_2"); + plugin::CastIntPluginDynamic* cast_plugin = + new plugin::CastIntPluginDynamic(); + auto cast_layer = engine_->AddPluginV2(&pos_tensor, 1, cast_plugin); + + auto casted_pos_tensor = cast_layer->getOutput(0); + auto reshape_layer = + TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *casted_pos_tensor); + + nvinfer1::Dims2 reshape_dim(0, 0); + nvinfer1::Permutation perm{1, 0, 2}; + reshape_layer->setFirstTranspose(perm); + reshape_layer->setReshapeDimensions(reshape_dim); + auto reduce_layer = + TRT_ENGINE_ADD_LAYER(engine_, Reduce, + *reshape_layer->getOutput(0), + nvinfer1::ReduceOperation::kMAX, 1, false); + */ + auto imask_tensor = engine_->GetITensor("imask_tensor"); auto creator = GetPluginRegistry()->getPluginCreator( "CustomQKVToContextPluginDynamic", "1"); @@ -149,8 +151,7 @@ class MultiheadMatMulOpConverter : public OpConverter { {"type_id", &type, nvinfer1::PluginFieldType::kINT32, 1}, {"hidden_size", &hidden, nvinfer1::PluginFieldType::kINT32, 1}, {"num_heads", &head_number, nvinfer1::PluginFieldType::kINT32, 1}, - {"has_mask", &has_mask, nvinfer1::PluginFieldType::kINT32, - 1}, // no bool type + {"has_mask", &has_mask, nvinfer1::PluginFieldType::kINT32, 1}, }; nvinfer1::PluginFieldCollection* pluginPtr = static_cast( @@ -164,7 +165,8 @@ class MultiheadMatMulOpConverter : public OpConverter { creator->createPlugin("CustomQKVToContextPluginDynamic", pluginPtr); std::vector plugin_inputs; plugin_inputs.push_back(fc_layer->getOutput(0)); - plugin_inputs.push_back(reduce_layer->getOutput(0)); + // plugin_inputs.push_back(reduce_layer->getOutput(0)); + plugin_inputs.push_back(imask_tensor); auto plugin_layer = engine_->network()->addPluginV2( plugin_inputs.data(), plugin_inputs.size(), *pluginObj); assert(plugin_layer != nullptr); diff --git a/paddle/fluid/inference/tensorrt/convert/slice_op.cc b/paddle/fluid/inference/tensorrt/convert/slice_op.cc index ed75d7b1583..a8784ba5138 100644 --- a/paddle/fluid/inference/tensorrt/convert/slice_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/slice_op.cc @@ -23,9 +23,8 @@ class SliceOpConverter : public OpConverter { public: void operator()(const framework::proto::OpDesc& op, const framework::Scope& scope, bool test_mode) override { - // This OP is implemented by trt dynamic shpae plugin. - // Dynamic shape plugin requires TRT version greater than 6.0. - std::cerr << "slice op converter\n" << std::endl; +// This OP is implemented by trt dynamic shpae plugin. +// Dynamic shape plugin requires TRT version greater than 6.0. #if IS_TRT_VERSION_GE(6000) VLOG(4) << "convert slice op to tensorrt layer"; framework::OpDesc op_desc(op, nullptr); @@ -41,10 +40,17 @@ class SliceOpConverter : public OpConverter { nvinfer1::ILayer* layer = nullptr; if (engine_->with_dynamic_shape()) { + nvinfer1::Permutation permutation{1, 0, 2, 3, 4}; + auto trans_layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *input); + trans_layer->setFirstTranspose(permutation); + std::vector plugin_inputs; + plugin_inputs.emplace_back(trans_layer->getOutput(0)); + bool ban_fp16 = engine_->disable_trt_plugin_fp16(); plugin::SlicePluginDynamic* plugin = new plugin::SlicePluginDynamic(starts, ends, axes, ban_fp16); - layer = engine_->AddPluginV2(&input, 1, plugin); + layer = engine_->AddPluginV2(plugin_inputs.data(), plugin_inputs.size(), + plugin); } else { PADDLE_THROW(platform::errors::Fatal( "You are running the Ernie(Bert) model in static" -- GitLab