未验证 提交 9d04ef73 编写于 作者: S Shang Zhizhou 提交者: GitHub

fix tensorrt output varible reshape (#31733)

* fix tensorrt output varible reshape

* move padding shape x 1 x 1 in ernie to qkv and fc

* update layer name

* fix softmax when input is dynamic, fc not padding any more

* fix varlen

* move fc x_dim assert to op_teller
上级 46dd1d4a
...@@ -168,11 +168,11 @@ void TensorRtSubgraphPass::CreateTensorRTOp( ...@@ -168,11 +168,11 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
std::set<std::string> output_names; std::set<std::string> output_names;
std::set<std::string> output_names_with_id; std::set<std::string> output_names_with_id;
std::vector<int> origin_output_dims; std::map<std::string, int> origin_name_output_dims;
for (auto *x : node->outputs) { for (auto *x : node->outputs) {
output_names.insert(x->Name()); output_names.insert(x->Name());
output_names_with_id.insert(x->Name() + std::to_string(x->id())); output_names_with_id.insert(x->Name() + std::to_string(x->id()));
origin_output_dims.push_back(x->Var()->GetShape().size()); origin_name_output_dims[x->Name()] = x->Var()->GetShape().size();
} }
std::unordered_map<std::string, std::string> output_name_map; std::unordered_map<std::string, std::string> output_name_map;
...@@ -216,11 +216,13 @@ void TensorRtSubgraphPass::CreateTensorRTOp( ...@@ -216,11 +216,13 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
// output_mapping help us copy the data from the renamed ITensor // output_mapping help us copy the data from the renamed ITensor
// to Tensor. // to Tensor.
std::vector<std::string> output_mapping; std::vector<std::string> output_mapping;
std::vector<int> renamed_output_dims;
for (auto name : output_names) { for (auto name : output_names) {
PADDLE_ENFORCE_NE(output_name_map.count(name), 0, PADDLE_ENFORCE_NE(output_name_map.count(name), 0,
platform::errors::PreconditionNotMet( platform::errors::PreconditionNotMet(
"The output_name_map should have %s", name)); "The output_name_map should have %s", name));
output_mapping.push_back(output_name_map[name]); output_mapping.push_back(output_name_map[name]);
renamed_output_dims.push_back(origin_name_output_dims[name]);
} }
PADDLE_ENFORCE_EQ(output_mapping.empty(), false, PADDLE_ENFORCE_EQ(output_mapping.empty(), false,
platform::errors::PreconditionNotMet( platform::errors::PreconditionNotMet(
...@@ -243,7 +245,7 @@ void TensorRtSubgraphPass::CreateTensorRTOp( ...@@ -243,7 +245,7 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
op_desc->SetAttr("workspace_size", Get<int>("workspace_size")); op_desc->SetAttr("workspace_size", Get<int>("workspace_size"));
op_desc->SetAttr("gpu_id", Get<int>("gpu_device_id")); op_desc->SetAttr("gpu_id", Get<int>("gpu_device_id"));
op_desc->SetAttr("output_name_mapping", output_mapping); op_desc->SetAttr("output_name_mapping", output_mapping);
op_desc->SetAttr("origin_output_dims", origin_output_dims); op_desc->SetAttr("origin_output_dims", renamed_output_dims);
op_desc->SetAttr("parameters", params); op_desc->SetAttr("parameters", params);
// we record all inputs' shapes in attr to check if they are consistent // we record all inputs' shapes in attr to check if they are consistent
......
...@@ -144,7 +144,69 @@ class FcOpConverter : public OpConverter { ...@@ -144,7 +144,69 @@ class FcOpConverter : public OpConverter {
static_cast<size_t>(bias_num)}; static_cast<size_t>(bias_num)};
if (engine_->with_dynamic_shape()) { if (engine_->with_dynamic_shape()) {
regist_fc(X, n_output, weight, bias); // not NCHW layout, but NLP layout with added 'x 1 x 1'
auto x_dim = X->getDimensions();
if (x_dim.nbDims == 3 || x_dim.nbDims == 2) {
auto output_name = op_desc.Output("Out").front();
// add shuffle before fc
nvinfer1::Dims reshape_before_fc_dim;
reshape_before_fc_dim.nbDims = x_dim.nbDims + 2;
for (int i = 0; i < x_dim.nbDims; i++) {
reshape_before_fc_dim.d[i] = 0;
}
reshape_before_fc_dim.d[x_dim.nbDims] = 1;
reshape_before_fc_dim.d[x_dim.nbDims + 1] = 1;
auto* reshape_before_fc_layer =
TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *X);
reshape_before_fc_layer->setReshapeDimensions(reshape_before_fc_dim);
reshape_before_fc_layer->setName(
("shuffle_before_fc(Output: " + output_name + ")").c_str());
// add fc layer
auto* fc_layer = TRT_ENGINE_ADD_LAYER(
engine_, FullyConnected, *reshape_before_fc_layer->getOutput(0),
n_output, weight.get(), bias.get());
fc_layer->setName(("fc_layer(Output: " + output_name + ")").c_str());
// add shuffle after fc
nvinfer1::Dims reshape_after_fc_dim;
if (x_dim.nbDims == 3) {
if (x_num_col_dims == 2) {
reshape_after_fc_dim.nbDims = 3;
reshape_after_fc_dim.d[0] = 0;
reshape_after_fc_dim.d[1] = 0;
reshape_after_fc_dim.d[2] = 0;
} else {
reshape_after_fc_dim.nbDims = 2;
reshape_after_fc_dim.d[0] = 0;
auto dim = fc_layer->getOutput(0)->getDimensions();
reshape_after_fc_dim.d[1] = dim.d[1] * dim.d[2];
}
// x_dim.nbDims == 2
} else {
reshape_after_fc_dim.nbDims = 2;
reshape_after_fc_dim.d[0] = 0;
reshape_after_fc_dim.d[1] = 0;
}
auto* reshape_after_fc_layer =
TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *fc_layer->getOutput(0));
reshape_after_fc_layer->setReshapeDimensions(reshape_after_fc_dim);
if (activation_type == "relu") {
reshape_after_fc_layer->setName(
("shuffle_after_fc(Output: " + output_name + ")").c_str());
nvinfer1::IActivationLayer* relu_layer = TRT_ENGINE_ADD_LAYER(
engine_, Activation, *(reshape_after_fc_layer->getOutput(0)),
nvinfer1::ActivationType::kRELU);
RreplenishLayerAndOutput(relu_layer, "relu_after_fc_shuffle",
{output_name}, test_mode);
} else {
RreplenishLayerAndOutput(reshape_after_fc_layer, "shuffle_after_fc",
{output_name}, test_mode);
}
} else {
regist_fc(X, n_output, weight, bias);
}
return; return;
} }
// in order to handle situations in NLP models(input dims < 3, // in order to handle situations in NLP models(input dims < 3,
...@@ -154,12 +216,6 @@ class FcOpConverter : public OpConverter { ...@@ -154,12 +216,6 @@ class FcOpConverter : public OpConverter {
auto input_d = X->getDimensions().d; auto input_d = X->getDimensions().d;
int reshape_dim3[3] = {0}; int reshape_dim3[3] = {0};
int reshape_dim4[4] = {0}; int reshape_dim4[4] = {0};
PADDLE_ENFORCE_EQ(
x_num_col_dims == 1 || x_num_col_dims == 2, true,
platform::errors::InvalidArgument(
"Wrong x_num_col_dims param of op mul. Paddle-TRT FC converter "
"expects x_num_col_dims is either 1 or 2, but got %d",
x_num_col_dims));
PADDLE_ENFORCE_LE(x_num_col_dims, input_dims, PADDLE_ENFORCE_LE(x_num_col_dims, input_dims,
platform::errors::InvalidArgument( platform::errors::InvalidArgument(
"Params and input dims mismatch. Paddle-TRT FC " "Params and input dims mismatch. Paddle-TRT FC "
......
...@@ -8,8 +8,8 @@ http://www.apache.org/licenses/LICENSE-2.0 ...@@ -8,8 +8,8 @@ http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS, distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See
See the License for the specific language governing permissions and the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h" #include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
...@@ -28,7 +28,6 @@ class MultiheadMatMulOpConverter : public OpConverter { ...@@ -28,7 +28,6 @@ class MultiheadMatMulOpConverter : public OpConverter {
"network structure"; "network structure";
framework::OpDesc op_desc(op, nullptr); framework::OpDesc op_desc(op, nullptr);
// Declare inputs // Declare inputs
// Shouble be a 5 dims tensor.
auto* input = engine_->GetITensor(op_desc.Input("Input").front()); auto* input = engine_->GetITensor(op_desc.Input("Input").front());
// fc weights and fc bias // fc weights and fc bias
...@@ -69,6 +68,7 @@ class MultiheadMatMulOpConverter : public OpConverter { ...@@ -69,6 +68,7 @@ class MultiheadMatMulOpConverter : public OpConverter {
int head_number = BOOST_GET_CONST(int, op_desc.GetAttr("head_number")); int head_number = BOOST_GET_CONST(int, op_desc.GetAttr("head_number"));
nvinfer1::ILayer* layer = nullptr; nvinfer1::ILayer* layer = nullptr;
auto output_name = op_desc.Output("Out")[0];
if (engine_->with_dynamic_shape()) { if (engine_->with_dynamic_shape()) {
if (engine_->use_oss()) { if (engine_->use_oss()) {
...@@ -171,6 +171,12 @@ class MultiheadMatMulOpConverter : public OpConverter { ...@@ -171,6 +171,12 @@ class MultiheadMatMulOpConverter : public OpConverter {
plugin_inputs.data(), plugin_inputs.size(), *plugin); plugin_inputs.data(), plugin_inputs.size(), *plugin);
layer = plugin_layer; layer = plugin_layer;
} else { } else {
PADDLE_ENFORCE_EQ(
input->getDimensions().nbDims, 3,
platform::errors::InvalidArgument(
"The Input dim of the MultiheadMatMul should be 3, "
"but it's (%d) now.",
input->getDimensions().nbDims));
// transpose weight_data from m * n to n * m // transpose weight_data from m * n to n * m
auto* input_bias_qk = auto* input_bias_qk =
engine_->GetITensor(op_desc.Input("BiasQK").front()); engine_->GetITensor(op_desc.Input("BiasQK").front());
...@@ -184,15 +190,37 @@ class MultiheadMatMulOpConverter : public OpConverter { ...@@ -184,15 +190,37 @@ class MultiheadMatMulOpConverter : public OpConverter {
static_cast<void*>(bias_data), static_cast<void*>(bias_data),
static_cast<size_t>(bias_t->numel())}; static_cast<size_t>(bias_t->numel())};
auto* fc_layer = TRT_ENGINE_ADD_LAYER(engine_, FullyConnected, *input, // add shuffle before fc
n, weight.get(), bias.get()); nvinfer1::Dims reshape_before_fc_dim;
auto* fc_out = fc_layer->getOutput(0); reshape_before_fc_dim.nbDims = 5;
reshape_before_fc_dim.d[0] = 0;
reshape_before_fc_dim.d[1] = 0;
reshape_before_fc_dim.d[2] = 0;
reshape_before_fc_dim.d[3] = 1;
reshape_before_fc_dim.d[4] = 1;
auto* reshape_before_fc_layer =
TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *input);
reshape_before_fc_layer->setReshapeDimensions(reshape_before_fc_dim);
reshape_before_fc_layer->setName(
("shuffle_before_multihead_mamul(Output: " + output_name + ")")
.c_str());
// add layer fc
auto* fc_layer = TRT_ENGINE_ADD_LAYER(
engine_, FullyConnected, *reshape_before_fc_layer->getOutput(0), n,
weight.get(), bias.get());
fc_layer->setName(
("multihead_mamul_fc(Output: " + output_name + ")").c_str());
// no need to add shuffle after fc, just change it in
// QkvToContextPluginDynamic
// add qkv to context // add qkv to context
int head_size = hidden_out / head_number; int head_size = hidden_out / head_number;
float scale = BOOST_GET_CONST(float, op_desc.GetAttr("alpha")); float scale = BOOST_GET_CONST(float, op_desc.GetAttr("alpha"));
std::vector<nvinfer1::ITensor*> plugin_inputs; std::vector<nvinfer1::ITensor*> plugin_inputs;
plugin_inputs.push_back(fc_out); plugin_inputs.push_back(fc_layer->getOutput(0));
plugin_inputs.push_back(input_bias_qk); plugin_inputs.push_back(input_bias_qk);
bool with_fp16 = bool with_fp16 =
engine_->WithFp16() && !engine_->disable_trt_plugin_fp16(); engine_->WithFp16() && !engine_->disable_trt_plugin_fp16();
...@@ -208,7 +236,6 @@ class MultiheadMatMulOpConverter : public OpConverter { ...@@ -208,7 +236,6 @@ class MultiheadMatMulOpConverter : public OpConverter {
"You can use the config.SetTRTDynamicShapeInfo(...) interface to set " "You can use the config.SetTRTDynamicShapeInfo(...) interface to set "
"the shape information to run the dynamic shape mode.")); "the shape information to run the dynamic shape mode."));
} }
auto output_name = op_desc.Output("Out")[0];
RreplenishLayerAndOutput(layer, "multihead_matmul", {output_name}, RreplenishLayerAndOutput(layer, "multihead_matmul", {output_name},
test_mode); test_mode);
#else #else
......
...@@ -51,6 +51,7 @@ class SoftMaxOpConverter : public OpConverter { ...@@ -51,6 +51,7 @@ class SoftMaxOpConverter : public OpConverter {
uint32_t axes = std::max(0, input_dims - 3); uint32_t axes = std::max(0, input_dims - 3);
// TODO(cryoco): Poor workaround. Fix padded dims problem when TRT layers // TODO(cryoco): Poor workaround. Fix padded dims problem when TRT layers
// support Nd. // support Nd.
// Tips: Dynammic shape alreay fixes.
int padded_dims = 0; int padded_dims = 0;
int explicit_batch = 0; int explicit_batch = 0;
if (engine_->with_dynamic_shape()) explicit_batch = 1; if (engine_->with_dynamic_shape()) explicit_batch = 1;
...@@ -62,16 +63,16 @@ class SoftMaxOpConverter : public OpConverter { ...@@ -62,16 +63,16 @@ class SoftMaxOpConverter : public OpConverter {
} }
} }
if (!engine_->with_dynamic_shape()) { if (!engine_->with_dynamic_shape()) {
if (axis == -1) { if (axis < 0) {
axes = input_dims - 1 - padded_dims; axes = input_dims + axis - padded_dims;
} else { } else {
axes = axis; axes = axis - 1;
} }
} else { } else {
if (axis == -1) { if (axis < 0) {
axes = input_dims - 1 - padded_dims; axes = input_dims + axis;
} else { } else {
axes = axis + 1; axes = axis;
} }
} }
layer->setAxes(1 << axes); layer->setAxes(1 << axes);
......
...@@ -195,7 +195,17 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8, ...@@ -195,7 +195,17 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8,
// current not support axis from input, use default 0 // current not support axis from input, use default 0
if (!with_dynamic_shape || desc.Input("Axis").size() > 0) return false; if (!with_dynamic_shape || desc.Input("Axis").size() > 0) return false;
} }
if (op_type == "fc" || op_type == "mul") {
const int x_num_col_dims =
desc.HasAttr("x_num_col_dims")
? BOOST_GET_CONST(int, desc.GetAttr("x_num_col_dims"))
: (desc.HasAttr("in_num_col_dims")
? BOOST_GET_CONST(int, desc.GetAttr("in_num_col_dims"))
: 1);
if (x_num_col_dims != 1 && x_num_col_dims != 2) {
return false;
}
}
if (op_type == "nearest_interp") { if (op_type == "nearest_interp") {
std::vector<std::string> attrs{"data_layout", "interp_method", std::vector<std::string> attrs{"data_layout", "interp_method",
"align_corners", "scale", "align_corners", "scale",
......
...@@ -200,12 +200,10 @@ nvinfer1::DimsExprs EmbEltwiseLayernormPluginDynamic::getOutputDimensions( ...@@ -200,12 +200,10 @@ nvinfer1::DimsExprs EmbEltwiseLayernormPluginDynamic::getOutputDimensions(
"but it's (%d)", "but it's (%d)",
output_index)); output_index));
nvinfer1::DimsExprs ret; nvinfer1::DimsExprs ret;
ret.nbDims = 5; ret.nbDims = 3;
ret.d[0] = inputs[0].d[0]; ret.d[0] = inputs[0].d[0];
ret.d[1] = inputs[0].d[1]; ret.d[1] = inputs[0].d[1];
ret.d[2] = expr_builder.constant(hidden_size_); ret.d[2] = expr_builder.constant(hidden_size_);
ret.d[3] = expr_builder.constant(1);
ret.d[4] = expr_builder.constant(1);
return ret; return ret;
} }
......
...@@ -169,12 +169,10 @@ nvinfer1::DimsExprs QkvToContextPluginDynamic::getOutputDimensions( ...@@ -169,12 +169,10 @@ nvinfer1::DimsExprs QkvToContextPluginDynamic::getOutputDimensions(
"it has (%d) inputs", "it has (%d) inputs",
nb_inputs)); nb_inputs));
nvinfer1::DimsExprs ret; nvinfer1::DimsExprs ret;
ret.nbDims = 5; ret.nbDims = 3;
ret.d[0] = inputs[0].d[0]; ret.d[0] = inputs[0].d[0];
ret.d[1] = inputs[0].d[1]; ret.d[1] = inputs[0].d[1];
ret.d[2] = expr_builder.constant(head_size_ * head_number_); ret.d[2] = expr_builder.constant(head_size_ * head_number_);
ret.d[3] = expr_builder.constant(1);
ret.d[4] = expr_builder.constant(1);
return ret; return ret;
} }
......
...@@ -54,11 +54,6 @@ void SkipLayerNormPluginDynamic::terminate() { ...@@ -54,11 +54,6 @@ void SkipLayerNormPluginDynamic::terminate() {
nvinfer1::DimsExprs SkipLayerNormPluginDynamic::getOutputDimensions( nvinfer1::DimsExprs SkipLayerNormPluginDynamic::getOutputDimensions(
int output_index, const nvinfer1::DimsExprs *inputs, int nb_inputs, int output_index, const nvinfer1::DimsExprs *inputs, int nb_inputs,
nvinfer1::IExprBuilder &expr_builder) { nvinfer1::IExprBuilder &expr_builder) {
PADDLE_ENFORCE_EQ(
inputs[0].nbDims, 5,
platform::errors::InvalidArgument(
"The Input dim of the SkipLayernorm should be 5, but it's (%d) now.",
inputs[0].nbDims));
return inputs[0]; return inputs[0];
} }
......
...@@ -62,6 +62,8 @@ nvinfer1::DimsExprs SpecialSlicePluginDynamic::getOutputDimensions( ...@@ -62,6 +62,8 @@ nvinfer1::DimsExprs SpecialSlicePluginDynamic::getOutputDimensions(
output.d[1] = one; output.d[1] = one;
output.d[0] = expr_builder.operation(nvinfer1::DimensionOperation::kSUB, output.d[0] = expr_builder.operation(nvinfer1::DimensionOperation::kSUB,
*inputs[1].d[0], *one); *inputs[1].d[0], *one);
// remove padding 1
output.nbDims -= 2;
return output; return output;
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册