未验证 提交 7c38114f 编写于 作者: W wenbin 提交者: GitHub

move semantic checks to op_teller (#32279)

* move semantic checks to op_teller

* more ops

* more ops

* revert block related change

* part1

* revert activation

* remove if

* remove const_cast

* reslove conflict

* remove const_cast

* delete useless var

* replace vlog(1) with vlog(3), replace assert with PADDLE_ENFORCE

* down to 19 files
上级 1b83de2e
......@@ -38,38 +38,6 @@ class BatchNormOpConverter : public OpConverter {
VLOG(3) << "convert a fluid batch norm op to tensorrt batch_norm";
framework::OpDesc op_desc(op, nullptr);
PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1,
platform::errors::InvalidArgument(
"Invalid input X's size of batch_norm TRT converter. "
"Expected 1, received %d.",
op_desc.Input("X").size()));
PADDLE_ENFORCE_EQ(op_desc.Input("Bias").size(), 1,
platform::errors::InvalidArgument(
"Invalid input Bias's size of batch_norm TRT "
"converter. Expected 1, received %d.",
op_desc.Input("Bias").size())); // Bias is a weight
PADDLE_ENFORCE_EQ(op_desc.Input("Mean").size(), 1,
platform::errors::InvalidArgument(
"Invalid input Mean's size of batch_norm TRT "
"converter. Expected 1, received %d.",
op_desc.Input("Mean").size())); // Mean is a weight
PADDLE_ENFORCE_EQ(op_desc.Input("Scale").size(), 1,
platform::errors::InvalidArgument(
"Invalid input Scale's size of batch_norm TRT "
"converter. Expected 1, received %d.",
op_desc.Input("Scale").size())); // Scale is a weight
PADDLE_ENFORCE_EQ(
op_desc.Input("Variance").size(), 1,
platform::errors::InvalidArgument(
"Invalid input Variance's size of batch_norm TRT converter. "
"Expected 1, received %d.",
op_desc.Input("Variance").size())); // Variance is a weight
PADDLE_ENFORCE_EQ(op_desc.Output("Y").size(), 1,
platform::errors::InvalidArgument(
"Invalid output Y's size of batch_norm TRT "
"converter. Expected 1, received %d.",
op_desc.Output("Y").size()));
auto* X = engine_->GetITensor(op_desc.Input("X").front());
// Declare weights
auto* Bias_v = scope.FindVar(op_desc.Input("Bias").front());
......
......@@ -36,18 +36,6 @@ void ConvertConv2d(TensorRTEngine* engine, const framework::proto::OpDesc& op,
VLOG(3) << "convert a fluid " << name << " op to tensorrt layer without bias";
framework::OpDesc op_desc(op, nullptr);
PADDLE_ENFORCE_EQ(op_desc.Input("Input").size(), 1UL,
platform::errors::InvalidArgument(
"TRT Conv2d expect 1 input, but got %d input.",
op_desc.Input("Input").size()));
PADDLE_ENFORCE_EQ(op_desc.Input("Filter").size(), 1UL,
platform::errors::InvalidArgument(
"TRT Conv2d expect 1 filter, but got %d filter.",
op_desc.Input("Filter").size()));
PADDLE_ENFORCE_EQ(op_desc.Output("Output").size(), 1UL,
platform::errors::InvalidArgument(
"TRT Conv2d expect 1 output, but got %d output.",
op_desc.Output("Output").size()));
auto* X = engine->GetITensor(op_desc.Input("Input").front());
std::string filter_var_name = op_desc.Input("Filter").front();
......@@ -61,13 +49,6 @@ void ConvertConv2d(TensorRTEngine* engine, const framework::proto::OpDesc& op,
if (enable_int8) {
#if IS_TRT_VERSION_GE(5000)
if (op_desc.Type() != "conv2d_transpose") {
PADDLE_ENFORCE_EQ(
op_desc.HasAttr("Input_scale"), true,
platform::errors::InvalidArgument("Input scale not found. TRT int8"
" requires conv/deconv to have "
"input quantization scales."));
}
float in_scale =
BOOST_GET_CONST(float, op_desc.GetAttr("Input_scale")) * 127;
auto weight_scale =
......@@ -184,14 +165,6 @@ class Deconv2dOpConverter : public OpConverter {
return layer;
},
[](nvinfer1::IDeconvolutionLayer* layer, nvinfer1::DimsHW& dilations) {
// In trt Deconv, dilation should be 1, ohter values are not
// supported.
bool condition = (dilations.d[0] == 1 && dilations.d[1] == 1);
PADDLE_ENFORCE_EQ(condition, true,
platform::errors::InvalidArgument(
"In Deconv, Dilations must be (1, 1) for "
"tensorRT, but given (%d, %d)",
dilations.d[0], dilations.d[1]));
},
"conv2d_transpose");
}
......
......@@ -43,25 +43,6 @@ class ElementwiseWeightOpConverter : public OpConverter {
framework::OpDesc op_desc(op, nullptr);
VLOG(3) << "Convert a fluid elementwise op to TensorRT IScaleLayer";
PADDLE_ENFORCE_EQ(
op_desc.Input("X").size(), 1,
platform::errors::InvalidArgument(
"The input op's Input(\"X\").size() "
"should equal to 1, but received Input(\"X\").size() = %u.",
op_desc.Input("X").size()));
PADDLE_ENFORCE_EQ(
op_desc.Input("Y").size(), 1,
platform::errors::InvalidArgument(
"The input op's Input(\"Y\").size() "
"should equal to 1, but received Input(\"Y\").size() = %u.",
op_desc.Input("Y").size())); // Y is a weight
PADDLE_ENFORCE_EQ(
op_desc.Output("Out").size(), 1,
platform::errors::InvalidArgument(
"The input op's Output(\"Out\").size() "
"should equal to 1, but reveceid Output(\"Out\").size() = %u.",
op_desc.Output("Out").size()));
auto* X = engine_->GetITensor(op_desc.Input("X").front());
auto* Y_v = scope.FindVar(op_desc.Input("Y").front());
PADDLE_ENFORCE_NOT_NULL(
......@@ -193,25 +174,6 @@ class ElementwiseTensorOpConverter : public OpConverter {
framework::OpDesc op_desc(op, nullptr);
nvinfer1::ILayer* layer = nullptr;
PADDLE_ENFORCE_EQ(
op_desc.Input("X").size(), 1,
platform::errors::InvalidArgument(
"The input op's Input(\"X\").size() "
"should equal to 1, but received Input(\"X\").size() = %u.",
op_desc.Input("X").size()));
PADDLE_ENFORCE_EQ(
op_desc.Input("Y").size(), 1,
platform::errors::InvalidArgument(
"The input op's Input(\"Y\").size() "
"should equal to 1, but received Input(\"Y\").size() = %u.",
op_desc.Input("Y").size())); // Y is a weight
PADDLE_ENFORCE_EQ(
op_desc.Output("Out").size(), 1,
platform::errors::InvalidArgument(
"The input op's Output(\"Out\").size() "
"should equal to 1, but received Output(\"Out\").size() = %u.",
op_desc.Output("Out").size()));
auto* X = engine_->GetITensor(op_desc.Input("X").front());
auto* Y = engine_->GetITensor(op_desc.Input("Y").front());
std::vector<nvinfer1::ITensor*> itensors;
......
......@@ -36,11 +36,6 @@ class EmbEltwiseLayerNormOpConverter : public OpConverter {
framework::OpDesc op_desc(op, nullptr);
auto id_names = op_desc.Input("Ids");
auto emb_names = op_desc.Input("Embs");
PADDLE_ENFORCE_EQ(id_names.size(), emb_names.size(),
platform::errors::InvalidArgument(
"The id and emb size of fused EmbEltwiseLayerNormOp "
"should be same "));
int input_num = id_names.size();
// Declare inputs
......@@ -91,99 +86,90 @@ class EmbEltwiseLayerNormOpConverter : public OpConverter {
nvinfer1::ILayer* layer = nullptr;
bool enable_int8 = op_desc.HasAttr("enable_int8");
if (engine_->with_dynamic_shape()) {
if (engine_->use_oss()) {
int output_fp16 = static_cast<int>((engine_->WithFp16() == 1) ? 1 : 0);
if (enable_int8) {
output_fp16 = 1;
}
PADDLE_ENFORCE_EQ(
output_fp16, 1,
platform::errors::InvalidArgument(
"Only Precision::KHalf(fp16) is supported when infering "
"ernie(bert) model with config.EnableTensorRtOSS(). "
"But Precision::KFloat32 is setted."));
const std::vector<nvinfer1::PluginField> fields{
{"bert_embeddings_layernorm_beta", bias,
nvinfer1::PluginFieldType::kFLOAT32,
static_cast<int32_t>(bias_size)},
{"bert_embeddings_layernorm_gamma", scale,
nvinfer1::PluginFieldType::kFLOAT32,
static_cast<int32_t>(scale_size)},
{"bert_embeddings_word_embeddings", input_embs[0],
nvinfer1::PluginFieldType::kFLOAT32,
static_cast<int32_t>(emb_sizes[0])},
{"bert_embeddings_token_type_embeddings", input_embs[2],
nvinfer1::PluginFieldType::kFLOAT32,
static_cast<int32_t>(emb_sizes[2])},
{"bert_embeddings_position_embeddings", input_embs[1],
nvinfer1::PluginFieldType::kFLOAT32,
static_cast<int32_t>(emb_sizes[1])},
{"output_fp16", &output_fp16, nvinfer1::PluginFieldType::kINT32, 1},
};
// remember to free
nvinfer1::PluginFieldCollection* plugin_ptr =
static_cast<nvinfer1::PluginFieldCollection*>(
malloc(sizeof(*plugin_ptr) +
fields.size() * sizeof(nvinfer1::PluginField)));
plugin_ptr->nbFields = static_cast<int>(fields.size());
plugin_ptr->fields = fields.data();
std::vector<nvinfer1::ITensor*> plugin_inputs;
plugin_inputs.emplace_back(engine_->GetITensor(
engine_->network()->getInput(0)->getName())); // word_embedding,
// eval_placeholder_0
plugin_inputs.emplace_back(engine_->GetITensor(
engine_->network()->getInput(1)->getName())); // sent_embedding,
// eval_placeholder_1
plugin_inputs.emplace_back(engine_->GetITensor(
engine_->network()->getInput(2)->getName())); // cu_seqlens,
// eval_placeholder_2
auto max_seqlen_tensor =
engine_->GetITensor(engine_->network()->getInput(3)->getName());
auto* shuffle_layer = TRT_ENGINE_ADD_LAYER(
engine_, Shuffle,
*const_cast<nvinfer1::ITensor*>(max_seqlen_tensor));
nvinfer1::Dims shape_dim;
shape_dim.nbDims = 1;
shape_dim.d[0] = -1;
shuffle_layer->setReshapeDimensions(shape_dim);
plugin_inputs.emplace_back(
shuffle_layer->getOutput(0)); // max_seqlen, eval_placeholder_3
auto creator = GetPluginRegistry()->getPluginCreator(
"CustomEmbLayerNormPluginDynamic", "2");
auto plugin_obj = creator->createPlugin(
"CustomEmbLayerNormPluginDynamic", plugin_ptr);
auto plugin_layer = engine_->network()->addPluginV2(
plugin_inputs.data(), plugin_inputs.size(), *plugin_obj);
layer = plugin_layer;
free(plugin_ptr);
auto output_name = op_desc.Output("Out")[0];
RreplenishLayerAndOutput(layer, "emb_eltwise_layernorm",
{output_name, std::string("qkv_plugin_mask")},
test_mode);
} else {
bool with_fp16 =
engine_->WithFp16() && !engine_->disable_trt_plugin_fp16();
float eps = BOOST_GET_CONST(float, op_desc.GetAttr("epsilon"));
plugin::DynamicPluginTensorRT* plugin = nullptr;
plugin = new plugin::EmbEltwiseLayernormPluginDynamic(
input_embs, bias, scale, emb_sizes, bias_size, scale_size, hidden,
eps, with_fp16);
layer = engine_->AddDynamicPlugin(input_ids.data(), input_num, plugin);
auto output_name = op_desc.Output("Out")[0];
RreplenishLayerAndOutput(layer, "emb_eltwise_layernorm", {output_name},
test_mode);
if (engine_->use_oss()) {
int output_fp16 = static_cast<int>((engine_->WithFp16() == 1) ? 1 : 0);
if (enable_int8) {
output_fp16 = 1;
}
PADDLE_ENFORCE_EQ(
output_fp16, 1,
platform::errors::InvalidArgument(
"Only Precision::KHalf(fp16) is supported when infering "
"ernie(bert) model with config.EnableTensorRtOSS(). "
"But Precision::KFloat32 is setted."));
const std::vector<nvinfer1::PluginField> fields{
{"bert_embeddings_layernorm_beta", bias,
nvinfer1::PluginFieldType::kFLOAT32,
static_cast<int32_t>(bias_size)},
{"bert_embeddings_layernorm_gamma", scale,
nvinfer1::PluginFieldType::kFLOAT32,
static_cast<int32_t>(scale_size)},
{"bert_embeddings_word_embeddings", input_embs[0],
nvinfer1::PluginFieldType::kFLOAT32,
static_cast<int32_t>(emb_sizes[0])},
{"bert_embeddings_token_type_embeddings", input_embs[2],
nvinfer1::PluginFieldType::kFLOAT32,
static_cast<int32_t>(emb_sizes[2])},
{"bert_embeddings_position_embeddings", input_embs[1],
nvinfer1::PluginFieldType::kFLOAT32,
static_cast<int32_t>(emb_sizes[1])},
{"output_fp16", &output_fp16, nvinfer1::PluginFieldType::kINT32, 1},
};
// remember to free
nvinfer1::PluginFieldCollection* plugin_ptr =
static_cast<nvinfer1::PluginFieldCollection*>(
malloc(sizeof(*plugin_ptr) +
fields.size() * sizeof(nvinfer1::PluginField)));
plugin_ptr->nbFields = static_cast<int>(fields.size());
plugin_ptr->fields = fields.data();
std::vector<nvinfer1::ITensor*> plugin_inputs;
plugin_inputs.emplace_back(engine_->GetITensor(
engine_->network()->getInput(0)->getName())); // word_embedding,
// eval_placeholder_0
plugin_inputs.emplace_back(engine_->GetITensor(
engine_->network()->getInput(1)->getName())); // sent_embedding,
// eval_placeholder_1
plugin_inputs.emplace_back(engine_->GetITensor(
engine_->network()->getInput(2)->getName())); // cu_seqlens,
// eval_placeholder_2
auto max_seqlen_tensor =
engine_->GetITensor(engine_->network()->getInput(3)->getName());
auto* shuffle_layer =
TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *max_seqlen_tensor);
nvinfer1::Dims shape_dim;
shape_dim.nbDims = 1;
shape_dim.d[0] = -1;
shuffle_layer->setReshapeDimensions(shape_dim);
plugin_inputs.emplace_back(
shuffle_layer->getOutput(0)); // max_seqlen, eval_placeholder_3
auto creator = GetPluginRegistry()->getPluginCreator(
"CustomEmbLayerNormPluginDynamic", "2");
auto plugin_obj =
creator->createPlugin("CustomEmbLayerNormPluginDynamic", plugin_ptr);
auto plugin_layer = engine_->network()->addPluginV2(
plugin_inputs.data(), plugin_inputs.size(), *plugin_obj);
layer = plugin_layer;
free(plugin_ptr);
auto output_name = op_desc.Output("Out")[0];
RreplenishLayerAndOutput(layer, "emb_eltwise_layernorm",
{output_name, std::string("qkv_plugin_mask")},
test_mode);
} else {
PADDLE_THROW(platform::errors::Fatal(
"You are running the Ernie(Bert) model in static"
"shape mode, which is not supported for the time being.\n"
"You can use the config.SetTRTDynamicShapeInfo(...) interface"
" to set the shape information to run the dynamic shape mode."));
bool with_fp16 =
engine_->WithFp16() && !engine_->disable_trt_plugin_fp16();
float eps = BOOST_GET_CONST(float, op_desc.GetAttr("epsilon"));
plugin::DynamicPluginTensorRT* plugin = nullptr;
plugin = new plugin::EmbEltwiseLayernormPluginDynamic(
input_embs, bias, scale, emb_sizes, bias_size, scale_size, hidden,
eps, with_fp16);
layer = engine_->AddDynamicPlugin(input_ids.data(), input_num, plugin);
auto output_name = op_desc.Output("Out")[0];
RreplenishLayerAndOutput(layer, "emb_eltwise_layernorm", {output_name},
test_mode);
}
#else
......
......@@ -47,15 +47,7 @@ class GeluOpConverter : public OpConverter {
framework::OpDesc op_desc(op, nullptr);
// Declare inputs
int input_num = op_desc.Input("X").size();
PADDLE_ENFORCE_EQ(input_num, 1,
platform::errors::InvalidArgument(
"gelu op has only 1 input, but got %d", input_num));
auto* input = engine_->GetITensor(op_desc.Input("X")[0]);
// Get output
size_t output_num = op_desc.Output("Out").size();
PADDLE_ENFORCE_EQ(output_num, 1,
platform::errors::InvalidArgument(
"gelu op has only 1 output, but got %d", output_num));
nvinfer1::ILayer* layer = nullptr;
if (engine_->with_dynamic_shape()) {
......
......@@ -41,17 +41,7 @@ class HardSwishOpConverter : public OpConverter {
framework::OpDesc op_desc(op, nullptr);
// Declare inputs
int input_num = op_desc.Input("X").size();
PADDLE_ENFORCE_EQ(
input_num, 1,
platform::errors::InvalidArgument(
"HardSwish op has only 1 input, but got %d", input_num));
auto* input = engine_->GetITensor(op_desc.Input("X")[0]);
// Get output
size_t output_num = op_desc.Output("Out").size();
PADDLE_ENFORCE_EQ(
output_num, 1,
platform::errors::InvalidArgument(
"HardSwish op has only 1 output, but got %d", output_num));
const float threshold =
op_desc.HasAttr("threshold")
......
......@@ -25,25 +25,6 @@ class LayerNormOpConverter : public OpConverter {
const framework::Scope& scope, bool test_mode) override {
VLOG(4) << "convert a fluid layer_norm op to tensorrt layer_norm plugin";
framework::OpDesc op_desc(op, nullptr);
PADDLE_ENFORCE_EQ(
op_desc.Input("X").size(), 1,
platform::errors::InvalidArgument(
"input of layer_norm op converter should be 1, got %d",
op_desc.Input("X").size()));
PADDLE_ENFORCE_EQ(op_desc.Input("Bias").size(), 1,
platform::errors::InvalidArgument(
"Bias of layer_norm op converter should be 1, got %d",
op_desc.Input("Bias").size())); // Bias is a weight
PADDLE_ENFORCE_EQ(
op_desc.Input("Scale").size(), 1,
platform::errors::InvalidArgument(
"Scale of layer_norm op converter should be 1, got %d",
op_desc.Input("Scale").size())); // Scale is a weight
PADDLE_ENFORCE_EQ(
op_desc.Output("Y").size(), 1,
platform::errors::InvalidArgument(
"output of layer_norm op converter should be 1, got %d",
op_desc.Input("Y").size()));
auto* X = engine_->GetITensor(op_desc.Input("X").front());
auto* Bias_v = scope.FindVar(op_desc.Input("Bias").front());
......
......@@ -36,21 +36,7 @@ class LeakyReluOpConverter : public OpConverter {
VLOG(4) << "convert fluid leaky_relu op to tensorrt layer";
framework::OpDesc op_desc(op, nullptr);
// Declare inputs
size_t input_num = op_desc.Input("X").size();
PADDLE_ENFORCE_EQ(input_num, 1UL,
platform::errors::InvalidArgument(
"Invalid number of TRT leaky_relu op converter "
"inputs. Expected 1, but received %d",
input_num));
auto* input = engine_->GetITensor(op_desc.Input("X")[0]);
// Get output
size_t output_num = op_desc.Output("Out").size();
PADDLE_ENFORCE_EQ(output_num, 1UL,
platform::errors::InvalidArgument(
"Invalid number of TRT leaky_relu op converter "
"outputs. Expected 1, but received %d",
output_num));
// Get attrs
float alpha = BOOST_GET_CONST(float, op_desc.GetAttr("alpha"));
nvinfer1::ILayer* output_layer = nullptr;
......
......@@ -65,13 +65,6 @@ class NearestInterpolateOpConverter : public OpConverter {
scale_w = scale;
} else {
// axis are different in static/dynamic mode
PADDLE_ENFORCE_GT(
out_h, 0, platform::errors::InvalidArgument(
"out_h must be greater than 0 if scale is not set."));
PADDLE_ENFORCE_GT(
out_w, 0, platform::errors::InvalidArgument(
"out_w must be greater than 0 if scale is not set."));
bool with_dynamic = engine_->with_dynamic_shape();
int h_axis = (data_layout == framework::DataLayout::kNCHW) + with_dynamic;
......
......@@ -43,8 +43,6 @@ class PadOpConverter : public OpConverter {
const std::vector<int> paddings =
BOOST_GET_CONST(std::vector<int>, op_desc.GetAttr("paddings"));
const float pad_value =
BOOST_GET_CONST(float, op_desc.GetAttr("pad_value"));
nvinfer1::Dims input_shape = input->getDimensions();
int nbDims = input_shape.nbDims;
......@@ -62,9 +60,6 @@ class PadOpConverter : public OpConverter {
"(nbDims + 1) * 2 == pad_size. But "
"received nbDims:%d, pad_size:%d.",
nbDims, pad_size));
PADDLE_ENFORCE_EQ(pad_value, 0.0,
platform::errors::InvalidArgument(
"The pad layer of TRT only support zero."));
nvinfer1::DimsHW pre_pad(paddings[pad_size - 4], paddings[pad_size - 2]);
nvinfer1::DimsHW post_pad(paddings[pad_size - 3], paddings[pad_size - 1]);
......
......@@ -66,15 +66,6 @@ class Pool2dOpConverter : public OpConverter {
VLOG(4)
<< "convert a fluid pool2d op to tensorrt pool2d layer without bias";
framework::OpDesc op_desc(op, nullptr);
PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1UL,
platform::errors::InvalidArgument(
"TRT Pool2d expect 1 input, but got %d input.",
op_desc.Input("X").size()));
PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1UL,
platform::errors::InvalidArgument(
"TRT Pool2d expect 1 Output, but got %d output.",
op_desc.Output("Out").size()));
auto *input1 = engine_->GetITensor(op_desc.Input("X")[0]);
nvinfer1::Dims input_shape = input1->getDimensions();
int input_dims = input_shape.nbDims;
......@@ -110,10 +101,6 @@ class Pool2dOpConverter : public OpConverter {
nv_pool_type = nvinfer1::PoolingType::kAVERAGE;
reduce_operation = nvinfer1::ReduceOperation::kAVG;
plugin_pool_type = plugin::PoolPlugin::PoolType::avg;
} else {
PADDLE_THROW(platform::errors::Fatal(
"Wrong pool op type, the trt do not support the %s pool type.",
pool_type));
}
nvinfer1::DimsHW nv_ksize(ksize[0], ksize[1]);
......
......@@ -31,19 +31,7 @@ class PReluOpConverter : public OpConverter {
framework::OpDesc op_desc(op, nullptr);
// Declare inputs
size_t input_num = op_desc.Input("X").size();
PADDLE_ENFORCE_EQ(input_num, 1UL,
platform::errors::InvalidArgument(
"Invalid input X's size of prelu TRT converter. "
"Expected 1, received %d.",
input_num));
auto* input = engine_->GetITensor(op_desc.Input("X")[0]);
// Get output
size_t output_num = op_desc.Output("Out").size();
PADDLE_ENFORCE_EQ(output_num, 1UL,
platform::errors::InvalidArgument(
"Invalid output Out's size of prelu TRT converter. "
"Expected 1, received %d.",
output_num));
// Get attrs
std::string mode = BOOST_GET_CONST(std::string, op_desc.GetAttr("mode"));
//
......
......@@ -62,12 +62,6 @@ class RoiAlignOpConverter : public OpConverter {
std::vector<nvinfer1::ITensor*> inputs{input_tensor, rois_tensor};
nvinfer1::ILayer* layer = nullptr;
PADDLE_ENFORCE_EQ(
engine_->with_dynamic_shape(), true,
platform::errors::InvalidArgument(
"TRT roi align plugin only accept the dynamic shape, because that "
"the roi_align will change the batch size."));
auto* roi_align_plugin = new plugin::RoiAlignPluginDynamic(
data_type_, pooled_height, pooled_width, spatial_scale, sampling_ratio);
auto roi_align_layer = engine_->network()->addPluginV2(
......
......@@ -50,12 +50,6 @@ class ShuffleChannelOpConverter : public OpConverter {
int w = input_dims.d[2];
int group = BOOST_GET_CONST(int, op_desc.GetAttr("group"));
if (engine_->with_dynamic_shape()) {
PADDLE_THROW(platform::errors::Fatal(
"You are running the TRT Dynamic Shape mode, "
"the shuffle_channel op does not support dynamic shape yet"));
}
auto* layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *input);
nvinfer1::Dims4 reshape_dim(group, c / group, h, w);
layer->setReshapeDimensions(reshape_dim);
......
......@@ -52,57 +52,57 @@ class SkipLayerNormOpConverter : public OpConverter {
bool enable_int8 = op_desc.HasAttr("enable_int8");
nvinfer1::ILayer* layer = nullptr;
if (engine_->with_dynamic_shape()) {
if (engine_->use_oss()) {
auto creator = GetPluginRegistry()->getPluginCreator(
"CustomSkipLayerNormPluginDynamic", "2");
assert(creator != nullptr);
int type = static_cast<int>((engine_->WithFp16() == 1)
? nvinfer1::DataType::kHALF
: nvinfer1::DataType::kFLOAT);
int ld = input1->getDimensions().d[2]; // hidden dimension
assert(ld > 0);
if (enable_int8) {
type = static_cast<int>(nvinfer1::DataType::kHALF);
}
const std::vector<nvinfer1::PluginField> fields{
{"type_id", &type, nvinfer1::PluginFieldType::kINT32, 1},
{"ld", &ld, nvinfer1::PluginFieldType::kINT32, 1},
{"beta", bias, nvinfer1::PluginFieldType::kFLOAT32, bias_size},
{"gamma", scale, nvinfer1::PluginFieldType::kFLOAT32, scale_size},
};
nvinfer1::PluginFieldCollection* pluginPtr =
static_cast<nvinfer1::PluginFieldCollection*>(
malloc(sizeof(*pluginPtr) +
fields.size() *
sizeof(nvinfer1::PluginField))); // remember to free
pluginPtr->nbFields = static_cast<int>(fields.size());
pluginPtr->fields = fields.data();
auto pluginObj = creator->createPlugin(
"CustomSkipLayerNormPluginDynamic", pluginPtr);
auto plugin_layer = engine_->network()->addPluginV2(
inputs.data(), inputs.size(), *pluginObj);
assert(plugin_layer != nullptr);
layer = plugin_layer;
} else {
float eps = BOOST_GET_CONST(float, op_desc.GetAttr("epsilon"));
bool with_fp16 =
engine_->WithFp16() && !engine_->disable_trt_plugin_fp16();
plugin::SkipLayerNormPluginDynamic* plugin =
new plugin::SkipLayerNormPluginDynamic(bias, scale, bias_size,
scale_size, eps, with_fp16);
layer = engine_->AddDynamicPlugin(inputs.data(), 2, plugin);
if (engine_->use_oss()) {
auto creator = GetPluginRegistry()->getPluginCreator(
"CustomSkipLayerNormPluginDynamic", "2");
PADDLE_ENFORCE_NE(
creator, nullptr,
platform::errors::InvalidArgument(
"fail to get creator of CustomSkipLayerNormPluginDynamic"));
int type = static_cast<int>((engine_->WithFp16() == 1)
? nvinfer1::DataType::kHALF
: nvinfer1::DataType::kFLOAT);
int ld = input1->getDimensions().d[2]; // hidden dimension
PADDLE_ENFORCE_GT(ld, 0, platform::errors::InvalidArgument(
"in CustomSkipLayerNormPluginDynamic hidden "
"dimension should > 0"));
if (enable_int8) {
type = static_cast<int>(nvinfer1::DataType::kHALF);
}
const std::vector<nvinfer1::PluginField> fields{
{"type_id", &type, nvinfer1::PluginFieldType::kINT32, 1},
{"ld", &ld, nvinfer1::PluginFieldType::kINT32, 1},
{"beta", bias, nvinfer1::PluginFieldType::kFLOAT32, bias_size},
{"gamma", scale, nvinfer1::PluginFieldType::kFLOAT32, scale_size},
};
nvinfer1::PluginFieldCollection* pluginPtr =
static_cast<nvinfer1::PluginFieldCollection*>(
malloc(sizeof(*pluginPtr) +
fields.size() *
sizeof(nvinfer1::PluginField))); // remember to free
pluginPtr->nbFields = static_cast<int>(fields.size());
pluginPtr->fields = fields.data();
auto pluginObj =
creator->createPlugin("CustomSkipLayerNormPluginDynamic", pluginPtr);
auto plugin_layer = engine_->network()->addPluginV2(
inputs.data(), inputs.size(), *pluginObj);
PADDLE_ENFORCE_NE(
plugin_layer, nullptr,
platform::errors::InvalidArgument(
"fail to add CustomSkipLayerNormPluginDynamic layer"));
layer = plugin_layer;
} else {
PADDLE_THROW(platform::errors::Fatal(
"You are running the Ernie(Bert) model in static"
"shape mode, which is not supported for the time being.\n"
"You can use the config.SetTRTDynamicShapeInfo(...) interface"
" to set the shape information to run the dynamic shape mode."));
float eps = BOOST_GET_CONST(float, op_desc.GetAttr("epsilon"));
bool with_fp16 =
engine_->WithFp16() && !engine_->disable_trt_plugin_fp16();
plugin::SkipLayerNormPluginDynamic* plugin =
new plugin::SkipLayerNormPluginDynamic(bias, scale, bias_size,
scale_size, eps, with_fp16);
layer = engine_->AddDynamicPlugin(inputs.data(), 2, plugin);
}
auto output_name = op_desc.Output("Out")[0];
......
......@@ -44,15 +44,6 @@ class SliceOpConverter : public OpConverter {
std::vector<int> ends =
BOOST_GET_CONST(std::vector<int>, op_desc.GetAttr("ends"));
PADDLE_ENFORCE_EQ(
starts.size(), axes.size(),
platform::errors::InvalidArgument(
"The size of starts must be equal to the size of axes."));
PADDLE_ENFORCE_EQ(
ends.size(), axes.size(),
platform::errors::InvalidArgument(
"The size of ends must be equal to the size of axes."));
auto input_dims = input->getDimensions();
if (!engine_->with_dynamic_shape()) {
// notice that input shape is [CHW] without batch axis when input has
......@@ -62,10 +53,6 @@ class SliceOpConverter : public OpConverter {
}
input_dims.d[0] = 1; // fake batchsize, not useful here
for (size_t i = 0; i < axes.size(); i++) {
// split on batch is not supported in TensorRT
PADDLE_ENFORCE_NE(axes[i], 0, platform::errors::InvalidArgument(
"Invalid slice axis. Slice on batch "
"axis is not supported in TensorRT"));
if (starts[i] < 0) {
starts[i] = std::max(starts[i] + input_dims.d[axes[i]], 0);
}
......
......@@ -33,17 +33,7 @@ class SplitOpConverter : public OpConverter {
size_t output_num = op_desc.Output("Out").size();
// Get Attrs
PADDLE_ENFORCE_EQ(input_num, 1UL,
platform::errors::InvalidArgument(
"Invalid input X's size of split TRT converter. "
"Expected 1, received %d.",
input_num));
int axis = BOOST_GET_CONST(int, op_desc.GetAttr("axis"));
// split on batch is not supported in TensorRT
PADDLE_ENFORCE_NE(
axis, 0,
platform::errors::InvalidArgument(
"Invalid split axis. Split on batch is not supported in TensorRT"));
std::vector<int> output_lengths =
BOOST_GET_CONST(std::vector<int>, op_desc.GetAttr("sections"));
......
......@@ -58,26 +58,19 @@ class StackOpConverter : public OpConverter {
}
nvinfer1::ILayer* layer = nullptr;
if (engine_->with_dynamic_shape()) {
#if IS_TRT_VERSION_GE(6000)
bool with_fp16 =
engine_->WithFp16() && !engine_->disable_trt_plugin_fp16();
plugin::StackPluginDynamic* plugin =
new plugin::StackPluginDynamic(axis, input_num, with_fp16);
layer = engine_->AddDynamicPlugin(inputs, input_num, plugin);
assert(layer != nullptr);
bool with_fp16 = engine_->WithFp16() && !engine_->disable_trt_plugin_fp16();
plugin::StackPluginDynamic* plugin =
new plugin::StackPluginDynamic(axis, input_num, with_fp16);
layer = engine_->AddDynamicPlugin(inputs, input_num, plugin);
PADDLE_ENFORCE_NOT_NULL(
layer, platform::errors::InvalidArgument(
"trt stack layer in converter could not be created."));
#else
PADDLE_THROW(platform::errors::Fatal(
"You are running the TRT Dynamic Shape mode, need to confirm that "
"your TRT version is no less than 6.0"));
PADDLE_THROW(platform::errors::Fatal(
"You are running the TRT Dynamic Shape mode, need to confirm that "
"your TRT version is no less than 6.0"));
#endif
} else {
PADDLE_THROW(platform::errors::Fatal(
"You are running the Ernie(Bert) model in static"
"shape mode, which is not supported for the time being.\n"
"You can use the config.SetTRTDynamicShapeInfo(...) interface"
" to set the shape information to run the dynamic shape mode."));
}
auto output_name = op_desc.Output("Y").front();
RreplenishLayerAndOutput(layer, "stack", {output_name}, test_mode);
free(inputs);
......
......@@ -137,13 +137,93 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8,
return false;
for (auto& teller : tellers_) {
if (op_type == "pool2d" || op_type == "conv2d" ||
op_type == "depthwise_conv2d" || op_type == "conv2d_transpose") {
if (op_type == "depthwise_conv2d") {
std::vector<int> paddings =
BOOST_GET_CONST(std::vector<int>, desc.GetAttr("paddings"));
if (paddings.size() > 2) return false;
}
if (op_type == "pool2d") {
std::vector<int> paddings =
BOOST_GET_CONST(std::vector<int>, desc.GetAttr("paddings"));
if (paddings.size() > 2) return false;
if (desc.Input("X").size() != 1) {
VLOG(3) << "TRT Pool2d expect 1 input, but got "
<< desc.Input("X").size();
return false;
}
if (desc.Output("Out").size() != 1) {
VLOG(3) << "TRT Pool2d has only 1 output, but got "
<< desc.Output("Out").size();
return false;
}
if (!desc.HasAttr("pooling_type")) {
return false;
} else {
std::string pool_type =
BOOST_GET_CONST(std::string, desc.GetAttr("pooling_type"));
if (pool_type != "max" && pool_type != "avg") {
VLOG(3) << "Wrong pool op type, the trt do not support the "
<< pool_type << " pool type.";
return false;
}
}
}
if (op_type == "conv2d" || op_type == "conv2d_transpose" ||
op_type == "conv2d_fusion") {
std::vector<int> paddings =
BOOST_GET_CONST(std::vector<int>, desc.GetAttr("paddings"));
// conv2d and conv2d_transpose need padding check
if (paddings.size() > 2 && op_type != "conv2d_fusion") return false;
if (desc.Input("Input").size() != 1) {
VLOG(3) << "TRT Conv2d expect 1 input, but got "
<< desc.Input("Input").size() << " input.";
return false;
}
if (desc.Input("Filter").size() != 1) {
VLOG(3) << "TRT Conv2d expect 1 filter, but got "
<< desc.Input("Filter").size() << " filter.";
return false;
}
if (desc.HasAttr("enable_int8")) {
if (op_type == "conv2d" || op_type == "conv2d_fusion") {
if (!desc.HasAttr("Input_scale")) {
VLOG(3) << "Input scale not found. TRT int8"
" requires conv/deconv to have "
"input quantization scales.";
return false;
}
}
}
if (op_type == "conv2d_transpose") {
if (!desc.HasAttr("dilations")) {
return false;
} else {
const std::vector<int> dilations =
BOOST_GET_CONST(std::vector<int>, desc.GetAttr("dilations"));
if (dilations[0] != 1 || dilations[1] != 1) {
VLOG(3) << "In conv2d_transpose, Dilations must be (1, 1) for "
"tensorRT, but given ("
<< dilations[0] << ", " << dilations[1] << ")";
return false;
}
}
}
if (desc.Output("Output").size() != 1) {
VLOG(3) << "TRT Conv2d expect 1 output, but got "
<< desc.Output("Output").size() << " output.";
return false;
}
}
if (op_type == "matmul") {
auto* block = desc.Block();
for (auto& param_name : desc.Inputs()) {
......@@ -151,7 +231,7 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8,
auto* var_desc = block->FindVar(var_name);
const auto shape = var_desc->GetShape();
if (shape.size() < 3) {
VLOG(1)
VLOG(3)
<< "matmul op dims < 3 not supported in tensorrt, but got dims "
<< shape.size() << ", so jump it.";
return false;
......@@ -189,7 +269,18 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8,
if (axis.size() >= nvinfer1::Dims::MAX_DIMS) return false;
}
}
if (op_type == "flatten2" || op_type == "flatten") {
if (op_type == "flatten2") {
// flatten doesn't support dynamic shape currently
if (!desc.HasAttr("axis")) {
return false;
} else {
if (with_dynamic_shape) return false;
int axis = BOOST_GET_CONST(int, desc.GetAttr("axis"));
if (axis != 1) return false;
}
}
if (op_type == "flatten") {
// flatten doesn't support dynamic shape currently
if (!desc.HasAttr("axis")) {
return false;
......@@ -229,7 +320,7 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8,
auto* var_desc = block->FindVar(var_name);
const auto shape = var_desc->GetShape();
if (shape.size() != 3) {
VLOG(1) << "multiclass_nms op dims != 3 not supported in tensorrt, "
VLOG(3) << "multiclass_nms op dims != 3 not supported in tensorrt, "
"but got dims "
<< shape.size() << ", so jump it.";
return false;
......@@ -252,7 +343,19 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8,
if (registry == nullptr) return false;
}
if (op_type == "fc" || op_type == "mul") {
if (op_type == "mul") {
const int x_num_col_dims =
desc.HasAttr("x_num_col_dims")
? BOOST_GET_CONST(int, desc.GetAttr("x_num_col_dims"))
: (desc.HasAttr("in_num_col_dims")
? BOOST_GET_CONST(int, desc.GetAttr("in_num_col_dims"))
: 1);
if (x_num_col_dims != 1 && x_num_col_dims != 2) {
return false;
}
}
if (op_type == "fc") {
const int x_num_col_dims =
desc.HasAttr("x_num_col_dims")
? BOOST_GET_CONST(int, desc.GetAttr("x_num_col_dims"))
......@@ -279,6 +382,25 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8,
auto interp_method =
BOOST_GET_CONST(std::string, desc.GetAttr("interp_method"));
if (interp_method != "nearest") return false;
if (!desc.HasAttr("scale") || !desc.HasAttr("out_h") ||
!desc.HasAttr("out_w")) {
return false;
} else {
auto scale = BOOST_GET_CONST(float, desc.GetAttr("scale"));
auto out_h = BOOST_GET_CONST(int, desc.GetAttr("out_h"));
auto out_w = BOOST_GET_CONST(int, desc.GetAttr("out_w"));
if (!(scale > 0.f && (out_h <= 0 && out_w <= 0))) {
if (out_h <= 0) {
VLOG(3) << "out_h must be greater than 0 if scale is not set.";
return false;
}
if (out_w <= 0) {
VLOG(3) << "out_w must be greater than 0 if scale is not set.";
return false;
}
}
}
}
if (op_type == "roi_align") {
......@@ -303,6 +425,235 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8,
if (spatial_scale <= 0.f) return false;
}
if (op_type == "hard_swish") {
if (desc.Input("X").size() != 1) {
VLOG(3) << "HardSwish op has only 1 input, but got "
<< desc.Input("X").size();
return false;
}
if (desc.Output("Out").size() != 1) {
VLOG(3) << "HardSwish op has only 1 output, but got "
<< desc.Output("Out").size();
return false;
}
}
if (op_type == "batch_norm") {
const std::vector<std::string> bn_inputs = {"X", "Bias", "Mean", "Scale",
"Variance"};
for (unsigned int i = 0; i < bn_inputs.size(); i++) {
if (desc.Input(bn_inputs[i]).size() != 1) {
VLOG(3) << "Invalid " << bn_inputs[i]
<< "'s size of batch_norm TRT "
"converter. Expected 1, received "
<< desc.Input(bn_inputs[i]).size() << ".";
return false;
}
}
if (desc.Output("Y").size() != 1) {
VLOG(3) << "Invalid output Y's size of batch_norm TRT "
"converter. Expected 1, received "
<< desc.Output("Y").size() << ".";
return false;
}
}
if (op_type == "split") {
if (desc.Input("X").size() != 1) {
VLOG(3) << "Invalid input X's size of split TRT converter. "
"Expected 1, received "
<< desc.Input("X").size() << ".";
return false;
}
if (!desc.HasAttr("axis")) {
return false;
} else {
int axis = BOOST_GET_CONST(int, desc.GetAttr("axis"));
if (axis == 0) {
VLOG(3) << "Invalid split axis. Split on batch is not supported in "
"TensorRT";
return false;
}
}
}
if (op_type == "slice") {
if (!desc.HasAttr("axes") || !desc.HasAttr("starts") ||
!desc.HasAttr("ends")) {
return false;
} else {
std::vector<int> axes =
BOOST_GET_CONST(std::vector<int>, desc.GetAttr("axes"));
std::vector<int> starts =
BOOST_GET_CONST(std::vector<int>, desc.GetAttr("starts"));
std::vector<int> ends =
BOOST_GET_CONST(std::vector<int>, desc.GetAttr("ends"));
if (axes.size() != starts.size() || axes.size() != ends.size()) {
return false;
}
if (!with_dynamic_shape) {
for (size_t i = 0; i < axes.size(); i++) {
if (axes[i] == 0) {
VLOG(3) << "Invalid slice axis. Slice on batch axis is not "
"supported in TensorRT";
return false;
}
}
}
}
}
if (op_type == "elementwise_add" || op_type == "elementwise_mul") {
if (desc.Input("X").size() != 1) {
VLOG(3) << "The input op's Input(\"X\").size() "
"should equal to 1, but received Input(\"X\").size() = "
<< desc.Input("X").size() << ".";
return false;
}
if (desc.Input("Y").size() != 1) {
VLOG(3) << "The input op's Input(\"Y\").size() "
"should equal to 1, but received Input(\"Y\").size() = "
<< desc.Input("Y").size() << ".";
return false;
}
if (desc.Output("Out").size() != 1) {
VLOG(3) << "The input op's Output(\"Out\").size() "
"should equal to 1, but reveceid Output(\"Out\").size() = "
<< desc.Output("Out").size() << ".";
return false;
}
}
if (op_type == "stack") {
if (!with_dynamic_shape) {
VLOG(3)
<< "static shape mode is not supported for TRT stack.\n"
"You can use the config.SetTRTDynamicShapeInfo(...) interface"
" to set the shape information to run the dynamic shape "
"mode.";
return false;
}
}
if (op_type == "fused_embedding_eltwise_layernorm") {
if (!with_dynamic_shape) {
VLOG(3) << "fused_embedding_eltwise_layernorm should run on dynamic "
"shape mode.";
return false;
}
if (desc.Input("Ids").size() != desc.Input("Embs").size()) {
VLOG(3) << "The id and emb size of fused EmbEltwiseLayerNormOp "
"should be same ";
return false;
}
}
if (op_type == "gelu") {
if (desc.Input("X").size() != 1) {
VLOG(3) << "gelu op has only 1 input, but got "
<< desc.Input("X").size();
return false;
}
if (desc.Output("Out").size() != 1) {
VLOG(3) << "gelu op has only 1 output, but got "
<< desc.Output("Out").size();
return false;
}
}
if (op_type == "layer_norm") {
if (desc.Input("X").size() != 1) {
VLOG(3) << "input of layer_norm op converter should be 1, got "
<< desc.Input("X").size();
return false;
}
if (desc.Input("Bias").size() != 1) {
VLOG(3) << "Bias of layer_norm op converter should be 1, got "
<< desc.Input("Bias").size();
return false;
}
if (desc.Input("Scale").size() != 1) {
VLOG(3) << "Scale of layer_norm op converter should be 1, got "
<< desc.Input("Scale").size();
return false;
}
if (desc.Output("Y").size() != 1) {
VLOG(3) << "output of layer_norm op converter should be 1, got "
<< desc.Output("Y").size();
return false;
}
}
if (op_type == "leaky_relu") {
if (desc.Input("X").size() != 1) {
VLOG(3) << "Invalid number of TRT leaky_relu op converter "
"inputs. Expected 1, but received "
<< desc.Input("X").size();
return false;
}
if (desc.Output("Out").size() != 1) {
VLOG(3) << "output of leaky_relu op converter should be 1, got "
<< desc.Output("Out").size();
return false;
}
}
if (op_type == "pad") {
const float pad_value = BOOST_GET_CONST(float, desc.GetAttr("pad_value"));
if (pad_value != 0.0f) {
VLOG(3) << "The pad layer of TRT only support zero.";
return false;
}
}
if (op_type == "prelu") {
if (desc.Input("X").size() != 1) {
VLOG(3) << "Invalid input X's size of prelu TRT converter. "
"Expected 1, received "
<< desc.Input("X").size() << ".";
return false;
}
if (desc.Output("Out").size() != 1) {
VLOG(3) << "Invalid output Out's size of prelu TRT converter. "
"Expected 1, received "
<< desc.Output("Out").size() << ".";
return false;
}
}
if (op_type == "roi_align") {
if (!with_dynamic_shape) {
VLOG(3) << "TRT roi align plugin only accept the dynamic shape, "
"because that "
"the roi_align will change the batch size.";
return false;
}
}
if (op_type == "shuffle_channel") {
if (with_dynamic_shape) {
VLOG(3) << "You are running the TRT Dynamic Shape mode, "
"the shuffle_channel op does not support dynamic shape yet";
return false;
}
}
if (op_type == "skip_layernorm") {
if (!with_dynamic_shape) {
VLOG(3) << "the skip_layernorm does not support static shape yet";
return false;
}
}
if (op_type == "multihead_matmul") {
if (!with_dynamic_shape) {
VLOG(3) << "the multihead_matmul does not support static shape yet";
return false;
}
}
if ((*teller)(op_type, desc, use_no_calib_int8)) return true;
}
return false;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册