From fba46ea3820beb2412f2f3e069616210b8b64891 Mon Sep 17 00:00:00 2001 From: Pei Yang Date: Sun, 25 Apr 2021 18:43:36 +0800 Subject: [PATCH] [Paddle-TRT] Fix AI-Rank BERT emb_eltwise_layernorm input order (#32482) * fix airank bert emb order * move input num check to converter * add input num check * add unused var check white list --- .../embedding_eltwise_layernorm_fuse_pass.cc | 10 +++++ paddle/fluid/framework/pipeline_trainer.cc | 2 +- paddle/fluid/framework/unused_var_check.cc | 43 ++++++++++--------- .../tensorrt/convert/emb_eltwise_layernorm.cc | 37 +++++++++++----- 4 files changed, 59 insertions(+), 33 deletions(-) diff --git a/paddle/fluid/framework/ir/embedding_eltwise_layernorm_fuse_pass.cc b/paddle/fluid/framework/ir/embedding_eltwise_layernorm_fuse_pass.cc index 59d071e1034..48f79e63b4f 100644 --- a/paddle/fluid/framework/ir/embedding_eltwise_layernorm_fuse_pass.cc +++ b/paddle/fluid/framework/ir/embedding_eltwise_layernorm_fuse_pass.cc @@ -290,10 +290,20 @@ static int BuildFusion(Graph* graph, const std::string& name_scope ids.push_back(inner_pattern_ins[js[iter]].first->Name()); embs.push_back(inner_pattern_ins[js[iter]].second->Name()); } + OpDesc new_op_desc; new_op_desc.SetType("fused_embedding_eltwise_layernorm"); new_op_desc.SetInput("Ids", ids); new_op_desc.SetInput("Embs", embs); + + new_op_desc.SetInput("WordId", {ids[0]}); + new_op_desc.SetInput("PosId", {ids[1]}); + new_op_desc.SetInput("SentId", {ids[2]}); + + new_op_desc.SetInput("WordEmbedding", {embs[0]}); + new_op_desc.SetInput("PosEmbedding", {embs[1]}); + new_op_desc.SetInput("SentEmbedding", {embs[2]}); + new_op_desc.SetInput("Bias", {end_pattern_biases[k]->Name()}); new_op_desc.SetInput("Scale", {end_pattern_scales[k]->Name()}); new_op_desc.SetOutput("Out", {end_pattern_out[k]->Name()}); diff --git a/paddle/fluid/framework/pipeline_trainer.cc b/paddle/fluid/framework/pipeline_trainer.cc index 3649e00e7c9..cdd2dbd5b1d 100644 --- a/paddle/fluid/framework/pipeline_trainer.cc +++ b/paddle/fluid/framework/pipeline_trainer.cc @@ -37,7 +37,7 @@ void PipelineTrainer::Initialize(const TrainerDesc& trainer_desc, int place_id = section_config.place_id(); #if (defined PADDLE_WITH_NCCL) place_ = platform::CUDAPlace(place_id); -#elif (defined WITH_ASCEND_CL) +#elif (defined WITH_ASCEND_CL) // NOLINT place_ = platform::NPUPlace(place_id); #endif worker_ = DeviceWorkerFactory::CreateDeviceWorker( diff --git a/paddle/fluid/framework/unused_var_check.cc b/paddle/fluid/framework/unused_var_check.cc index d2adbdd3451..0f8465ab894 100644 --- a/paddle/fluid/framework/unused_var_check.cc +++ b/paddle/fluid/framework/unused_var_check.cc @@ -53,27 +53,28 @@ static const std::unordered_set &GetOpWithUnusedVarAllowSet() { // Use pointer here for safe static deinitialization static auto *allow_set = new std::unordered_set({ // called once - "batch_norm", // 0 - "batch_norm_grad", // 0 - "sync_batch_norm", // 0 - "sync_batch_norm_grad", // 0 - "inplace_abn", // 0 - "inplace_abn_grad", // 0 - "dgc_momentum", // 0 - "fake_quantize_range_abs_max", // 0 - "rmsprop", // 0 - "sequence_conv_grad", // 0 - "roi_perspective_transform_grad", // 0 - "fill_zeros_like", // 1 - "fill_any_like", // 1 - "nce_grad", // 1 - "precision_recall", // 1 - "fusion_seqpool_cvm_concat", // 2 - "fused_batch_norm_act", // 2 - "fused_batch_norm_act_grad", // 2 - "data_norm", // 0 - "data_norm_grad", // 0 - "update_loss_scaling", // 0 + "batch_norm", // 0 + "batch_norm_grad", // 0 + "sync_batch_norm", // 0 + "sync_batch_norm_grad", // 0 + "inplace_abn", // 0 + "inplace_abn_grad", // 0 + "dgc_momentum", // 0 + "fake_quantize_range_abs_max", // 0 + "rmsprop", // 0 + "sequence_conv_grad", // 0 + "roi_perspective_transform_grad", // 0 + "fill_zeros_like", // 1 + "fill_any_like", // 1 + "nce_grad", // 1 + "precision_recall", // 1 + "fusion_seqpool_cvm_concat", // 2 + "fused_batch_norm_act", // 2 + "fused_batch_norm_act_grad", // 2 + "data_norm", // 0 + "data_norm_grad", // 0 + "update_loss_scaling", // 0 + "fused_embedding_eltwise_layernorm", // 0 }); return *allow_set; } diff --git a/paddle/fluid/inference/tensorrt/convert/emb_eltwise_layernorm.cc b/paddle/fluid/inference/tensorrt/convert/emb_eltwise_layernorm.cc index 57ac30b5f6b..66a682db07b 100644 --- a/paddle/fluid/inference/tensorrt/convert/emb_eltwise_layernorm.cc +++ b/paddle/fluid/inference/tensorrt/convert/emb_eltwise_layernorm.cc @@ -34,8 +34,17 @@ class EmbEltwiseLayerNormOpConverter : public OpConverter { VLOG(4) << "convert fluid EmbEltwiseLayerNorm op to tensorrt layer"; framework::OpDesc op_desc(op, nullptr); - auto id_names = op_desc.Input("Ids"); - auto emb_names = op_desc.Input("Embs"); + auto word_id_name = op_desc.Input("WordId").front(); + auto pos_id_name = op_desc.Input("PosId").front(); + auto sent_id_name = op_desc.Input("SentId").front(); + auto word_emb_name = op_desc.Input("WordEmbedding").front(); + auto pos_emb_name = op_desc.Input("PosEmbedding").front(); + auto sent_emb_name = op_desc.Input("SentEmbedding").front(); + std::vector id_names = {word_id_name, pos_id_name, + sent_id_name}; + std::vector emb_names = {word_emb_name, pos_emb_name, + sent_emb_name}; + int input_num = id_names.size(); // Declare inputs @@ -91,6 +100,12 @@ class EmbEltwiseLayerNormOpConverter : public OpConverter { if (enable_int8) { output_fp16 = 1; } + PADDLE_ENFORCE_EQ( + input_num, 3, + platform::errors::InvalidArgument( + "When using oss and var-len, embedding_eltwise_layernorm op" + "should have 3 inputs only, but got %d.", + input_num)); PADDLE_ENFORCE_EQ( output_fp16, 1, platform::errors::InvalidArgument( @@ -125,15 +140,15 @@ class EmbEltwiseLayerNormOpConverter : public OpConverter { plugin_ptr->fields = fields.data(); std::vector plugin_inputs; - plugin_inputs.emplace_back(engine_->GetITensor( - engine_->network()->getInput(0)->getName())); // word_embedding, - // eval_placeholder_0 - plugin_inputs.emplace_back(engine_->GetITensor( - engine_->network()->getInput(1)->getName())); // sent_embedding, - // eval_placeholder_1 - plugin_inputs.emplace_back(engine_->GetITensor( - engine_->network()->getInput(2)->getName())); // cu_seqlens, - // eval_placeholder_2 + plugin_inputs.emplace_back( + engine_->GetITensor(word_id_name)); // word_embedding, + // eval_placeholder_0 + plugin_inputs.emplace_back( + engine_->GetITensor(sent_id_name)); // sent_embedding, + // eval_placeholder_1 + plugin_inputs.emplace_back( + engine_->GetITensor(pos_id_name)); // cu_seqlens, + // eval_placeholder_2 auto max_seqlen_tensor = engine_->GetITensor(engine_->network()->getInput(3)->getName()); auto* shuffle_layer = -- GitLab