提交 13288621 编写于 作者: Z zlsh80826

change preprocessing mask position

上级 2ca3fe5d
...@@ -11,6 +11,7 @@ limitations under the License. */ ...@@ -11,6 +11,7 @@ limitations under the License. */
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h" #include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
#include "paddle/fluid/inference/tensorrt/helper.h" #include "paddle/fluid/inference/tensorrt/helper.h"
#include "paddle/fluid/inference/tensorrt/plugin/cast_int_plugin.h"
#include "paddle/fluid/inference/tensorrt/plugin/emb_eltwise_layernorm_plugin.h" #include "paddle/fluid/inference/tensorrt/plugin/emb_eltwise_layernorm_plugin.h"
namespace paddle { namespace paddle {
...@@ -80,11 +81,35 @@ class EmbEltwiseLayerNormOpConverter : public OpConverter { ...@@ -80,11 +81,35 @@ class EmbEltwiseLayerNormOpConverter : public OpConverter {
nvinfer1::ILayer* layer = nullptr; nvinfer1::ILayer* layer = nullptr;
if (engine_->with_dynamic_shape()) { if (engine_->with_dynamic_shape()) {
auto pos_tensor = engine_->GetITensor("eval_placeholder_2");
plugin::CastIntPluginDynamic* cast_plugin =
new plugin::CastIntPluginDynamic();
auto cast_layer = engine_->AddPluginV2(&pos_tensor, 1, cast_plugin);
auto casted_pos_tensor = cast_layer->getOutput(0);
auto reshape_layer =
TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *casted_pos_tensor);
nvinfer1::Dims2 reshape_dim(0, 0);
nvinfer1::Permutation perm{1, 0, 2};
reshape_layer->setFirstTranspose(perm);
reshape_layer->setReshapeDimensions(reshape_dim);
auto imask_layer =
TRT_ENGINE_ADD_LAYER(engine_, Reduce, *reshape_layer->getOutput(0),
nvinfer1::ReduceOperation::kMAX, 1, false);
engine_->SetITensor("imask_tensor", imask_layer->getOutput(0));
plugin::DynamicPluginTensorRT* plugin = nullptr; plugin::DynamicPluginTensorRT* plugin = nullptr;
plugin = new plugin::EmbEltwiseLayernormPluginDynamic<float>( plugin = new plugin::EmbEltwiseLayernormPluginDynamic<float>(
input_embs, bias, scale, emb_sizes, bias_size, scale_size, hidden, input_embs, bias, scale, emb_sizes, bias_size, scale_size, hidden,
eps); eps);
layer = engine_->AddPluginV2(input_ids.data(), input_num, plugin); auto plugin_layer =
engine_->AddPluginV2(input_ids.data(), input_num, plugin);
nvinfer1::Permutation permutation{1, 0, 2, 3, 4};
auto trans_layer =
TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *plugin_layer->getOutput(0));
trans_layer->setFirstTranspose(permutation);
layer = trans_layer;
} else { } else {
PADDLE_THROW(platform::errors::Fatal( PADDLE_THROW(platform::errors::Fatal(
"You are running the Ernie(Bert) model in static" "You are running the Ernie(Bert) model in static"
......
...@@ -13,7 +13,6 @@ See the License for the specific language governing permissions and ...@@ -13,7 +13,6 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h" #include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
#include "paddle/fluid/inference/tensorrt/plugin/cast_int_plugin.h"
#include "paddle/fluid/inference/tensorrt/plugin/qkv_to_context_plugin.h" #include "paddle/fluid/inference/tensorrt/plugin/qkv_to_context_plugin.h"
namespace paddle { namespace paddle {
...@@ -114,13 +113,13 @@ class MultiheadMatMulOpConverter : public OpConverter { ...@@ -114,13 +113,13 @@ class MultiheadMatMulOpConverter : public OpConverter {
static_cast<void*>(bias_data), static_cast<void*>(bias_data),
static_cast<int32_t>(bias_t->numel())}; static_cast<int32_t>(bias_t->numel())};
nvinfer1::Permutation permutation{1, 0, 2, 3, 4}; nvinfer1::Permutation permutation{0, 1, 2, 3, 4};
auto trans_layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *input); auto trans_layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *input);
trans_layer->setFirstTranspose(permutation); trans_layer->setFirstTranspose(permutation);
auto* fc_layer = TRT_ENGINE_ADD_LAYER( auto* fc_layer = TRT_ENGINE_ADD_LAYER(
engine_, FullyConnected, *trans_layer->getOutput(0), n, weight, bias); engine_, FullyConnected, *trans_layer->getOutput(0), n, weight, bias);
/*
auto pos_tensor = engine_->GetITensor("eval_placeholder_2"); auto pos_tensor = engine_->GetITensor("eval_placeholder_2");
plugin::CastIntPluginDynamic* cast_plugin = plugin::CastIntPluginDynamic* cast_plugin =
new plugin::CastIntPluginDynamic(); new plugin::CastIntPluginDynamic();
...@@ -135,8 +134,11 @@ class MultiheadMatMulOpConverter : public OpConverter { ...@@ -135,8 +134,11 @@ class MultiheadMatMulOpConverter : public OpConverter {
reshape_layer->setFirstTranspose(perm); reshape_layer->setFirstTranspose(perm);
reshape_layer->setReshapeDimensions(reshape_dim); reshape_layer->setReshapeDimensions(reshape_dim);
auto reduce_layer = auto reduce_layer =
TRT_ENGINE_ADD_LAYER(engine_, Reduce, *reshape_layer->getOutput(0), TRT_ENGINE_ADD_LAYER(engine_, Reduce,
*reshape_layer->getOutput(0),
nvinfer1::ReduceOperation::kMAX, 1, false); nvinfer1::ReduceOperation::kMAX, 1, false);
*/
auto imask_tensor = engine_->GetITensor("imask_tensor");
auto creator = GetPluginRegistry()->getPluginCreator( auto creator = GetPluginRegistry()->getPluginCreator(
"CustomQKVToContextPluginDynamic", "1"); "CustomQKVToContextPluginDynamic", "1");
...@@ -149,8 +151,7 @@ class MultiheadMatMulOpConverter : public OpConverter { ...@@ -149,8 +151,7 @@ class MultiheadMatMulOpConverter : public OpConverter {
{"type_id", &type, nvinfer1::PluginFieldType::kINT32, 1}, {"type_id", &type, nvinfer1::PluginFieldType::kINT32, 1},
{"hidden_size", &hidden, nvinfer1::PluginFieldType::kINT32, 1}, {"hidden_size", &hidden, nvinfer1::PluginFieldType::kINT32, 1},
{"num_heads", &head_number, nvinfer1::PluginFieldType::kINT32, 1}, {"num_heads", &head_number, nvinfer1::PluginFieldType::kINT32, 1},
{"has_mask", &has_mask, nvinfer1::PluginFieldType::kINT32, {"has_mask", &has_mask, nvinfer1::PluginFieldType::kINT32, 1},
1}, // no bool type
}; };
nvinfer1::PluginFieldCollection* pluginPtr = nvinfer1::PluginFieldCollection* pluginPtr =
static_cast<nvinfer1::PluginFieldCollection*>( static_cast<nvinfer1::PluginFieldCollection*>(
...@@ -164,7 +165,8 @@ class MultiheadMatMulOpConverter : public OpConverter { ...@@ -164,7 +165,8 @@ class MultiheadMatMulOpConverter : public OpConverter {
creator->createPlugin("CustomQKVToContextPluginDynamic", pluginPtr); creator->createPlugin("CustomQKVToContextPluginDynamic", pluginPtr);
std::vector<nvinfer1::ITensor*> plugin_inputs; std::vector<nvinfer1::ITensor*> plugin_inputs;
plugin_inputs.push_back(fc_layer->getOutput(0)); plugin_inputs.push_back(fc_layer->getOutput(0));
plugin_inputs.push_back(reduce_layer->getOutput(0)); // plugin_inputs.push_back(reduce_layer->getOutput(0));
plugin_inputs.push_back(imask_tensor);
auto plugin_layer = engine_->network()->addPluginV2( auto plugin_layer = engine_->network()->addPluginV2(
plugin_inputs.data(), plugin_inputs.size(), *pluginObj); plugin_inputs.data(), plugin_inputs.size(), *pluginObj);
assert(plugin_layer != nullptr); assert(plugin_layer != nullptr);
......
...@@ -23,9 +23,8 @@ class SliceOpConverter : public OpConverter { ...@@ -23,9 +23,8 @@ class SliceOpConverter : public OpConverter {
public: public:
void operator()(const framework::proto::OpDesc& op, void operator()(const framework::proto::OpDesc& op,
const framework::Scope& scope, bool test_mode) override { const framework::Scope& scope, bool test_mode) override {
// This OP is implemented by trt dynamic shpae plugin. // This OP is implemented by trt dynamic shpae plugin.
// Dynamic shape plugin requires TRT version greater than 6.0. // Dynamic shape plugin requires TRT version greater than 6.0.
std::cerr << "slice op converter\n" << std::endl;
#if IS_TRT_VERSION_GE(6000) #if IS_TRT_VERSION_GE(6000)
VLOG(4) << "convert slice op to tensorrt layer"; VLOG(4) << "convert slice op to tensorrt layer";
framework::OpDesc op_desc(op, nullptr); framework::OpDesc op_desc(op, nullptr);
...@@ -41,10 +40,17 @@ class SliceOpConverter : public OpConverter { ...@@ -41,10 +40,17 @@ class SliceOpConverter : public OpConverter {
nvinfer1::ILayer* layer = nullptr; nvinfer1::ILayer* layer = nullptr;
if (engine_->with_dynamic_shape()) { if (engine_->with_dynamic_shape()) {
nvinfer1::Permutation permutation{1, 0, 2, 3, 4};
auto trans_layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *input);
trans_layer->setFirstTranspose(permutation);
std::vector<nvinfer1::ITensor*> plugin_inputs;
plugin_inputs.emplace_back(trans_layer->getOutput(0));
bool ban_fp16 = engine_->disable_trt_plugin_fp16(); bool ban_fp16 = engine_->disable_trt_plugin_fp16();
plugin::SlicePluginDynamic* plugin = plugin::SlicePluginDynamic* plugin =
new plugin::SlicePluginDynamic(starts, ends, axes, ban_fp16); new plugin::SlicePluginDynamic(starts, ends, axes, ban_fp16);
layer = engine_->AddPluginV2(&input, 1, plugin); layer = engine_->AddPluginV2(plugin_inputs.data(), plugin_inputs.size(),
plugin);
} else { } else {
PADDLE_THROW(platform::errors::Fatal( PADDLE_THROW(platform::errors::Fatal(
"You are running the Ernie(Bert) model in static" "You are running the Ernie(Bert) model in static"
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册