未验证 提交 fcaa64b3 编写于 作者: B baoachun 提交者: GitHub

add multihead_matmul trt converter test case (#36023)

* add multihead_matmul trt converter test case

* move attribute check to op_teller
上级 8e19d1ba
......@@ -62,7 +62,7 @@ static int BuildFusion(Graph* graph, const std::string& name_scope) {
// BOOST_GET_CONST(bool, scale->Op()->GetAttr("bias_after_scale"));
// create multihead
OpDesc multihead_op_desc;
OpDesc multihead_op_desc(mul0->Op()->Block());
// create tmp tensor
VarDesc k_var_desc(*mul1_out->Var());
......@@ -847,7 +847,7 @@ int MultiHeadMatmulV2FusePass::BuildFusionV2(Graph* graph,
int head_number =
BOOST_GET_CONST(std::vector<int>, reshape_desc->GetAttr("shape")).at(2);
OpDesc multihead_op_desc;
OpDesc multihead_op_desc(mul0->Op()->Block());
multihead_op_desc.SetType("multihead_matmul");
multihead_op_desc.SetInput("Input", {input0->Name()});
......@@ -1287,7 +1287,7 @@ int MultiHeadMatmulV3FusePass::BuildFusionV3(Graph* graph,
int head_number =
BOOST_GET_CONST(std::vector<int>, reshape_desc->GetAttr("shape")).at(2);
OpDesc multihead_op_desc;
OpDesc multihead_op_desc(mul0->Op()->Block());
multihead_op_desc.SetType("multihead_matmul");
multihead_op_desc.SetInput("Input", {input0->Name()});
......
......@@ -23,7 +23,6 @@ class MultiheadMatMulOpConverter : public OpConverter {
public:
void operator()(const framework::proto::OpDesc& op,
const framework::Scope& scope, bool test_mode) override {
#if IS_TRT_VERSION_GE(6000)
VLOG(3) << "convert a fluid multihead_mamul op to a corresponding tensorrt "
"network structure";
framework::OpDesc op_desc(op, nullptr);
......@@ -46,10 +45,6 @@ class MultiheadMatMulOpConverter : public OpConverter {
float in_scale = 0.;
if (enable_int8) {
PADDLE_ENFORCE_EQ(
op_desc.HasAttr("Input_scale"), true,
platform::errors::InvalidArgument(
"must have input scale in multihead layers in int8 mode"));
in_scale = BOOST_GET_CONST(float, op_desc.GetAttr("Input_scale")) * 127;
auto weight_scale =
BOOST_GET_CONST(std::vector<float>, op_desc.GetAttr("weight_scale"));
......@@ -181,10 +176,7 @@ class MultiheadMatMulOpConverter : public OpConverter {
{"hidden_size", &hidden_out, nvinfer1::PluginFieldType::kINT32, 1},
{"num_heads", &head_number, nvinfer1::PluginFieldType::kINT32, 1},
{"has_mask", &has_mask, nvinfer1::PluginFieldType::kINT32, 1},
{ "var_seqlen",
&var_seqlen,
nvinfer1::PluginFieldType::kINT32,
1 }};
{"var_seqlen", &var_seqlen, nvinfer1::PluginFieldType::kINT32, 1}};
if (qkv2context_plugin_int8) {
fields.push_back(
{"dq_probs", &dp_probs, nvinfer1::PluginFieldType::kFLOAT32, 1});
......@@ -296,11 +288,6 @@ class MultiheadMatMulOpConverter : public OpConverter {
}
RreplenishLayerAndOutput(layer, "multihead_matmul", {output_name},
test_mode);
#else
PADDLE_THROW(platform::errors::Fatal(
"You are running the TRT Dynamic Shape mode, need to confirm that "
"your TRT version is no less than 6.0"));
#endif
}
};
......
......@@ -1085,6 +1085,42 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8,
VLOG(3) << "the multihead_matmul does not support static shape yet";
return false;
}
if (desc.HasAttr("enable_int8") && !desc.HasAttr("Input_scale")) {
VLOG(3) << "Multihead layers must have input scale in int8 mode.";
return false;
}
auto* block = desc.Block();
if (block == nullptr) {
VLOG(3) << "The block desc is nullptr, we can't continue to analyze. "
"Developers need to check whether block_desc is passed in "
"the pass.";
return false;
}
auto* input_desc = block->FindVar(desc.Input("Input").front());
const auto input_shape = input_desc->GetShape();
const auto head_number =
BOOST_GET_CONST(int, desc.GetAttr("head_number"));
auto* biasqk_desc = block->FindVar(desc.Input("BiasQK").front());
const auto biasqk_shape = biasqk_desc->GetShape();
// The BiasQK's shape requires to be
// [batch, 1, 1, length] or [batch, head, length, length].
bool has_same_shape = head_number == biasqk_shape[1] &&
input_shape[1] == biasqk_shape[2] &&
input_shape[1] == biasqk_shape[3];
bool is_broadcastable = biasqk_shape[1] == 1 && biasqk_shape[2] == 1 &&
input_shape[1] == biasqk_shape[3];
if (!(has_same_shape || is_broadcastable)) {
VLOG(3) << "The BiasQK's shape is invalid, expect [" << input_shape[0]
<< ", 1, 1, " << input_shape[1] << "] or [" << input_shape[0]
<< ", " << head_number << ", " << input_shape[1] << ", "
<< input_shape[1] << "] but [" << biasqk_shape[0] << ", "
<< biasqk_shape[1] << ", " << biasqk_shape[2] << ", "
<< biasqk_shape[3] << "].";
return false;
}
}
if (op_type == "fc") {
......
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from trt_layer_auto_scan_test import TrtLayerAutoScanTest, SkipReasons
from program_config import TensorConfig, ProgramConfig
import numpy as np
import paddle.inference as paddle_infer
from functools import partial
from typing import Optional, List, Callable, Dict, Any, Set
class TrtConvertMultiHeadMatmulTest(TrtLayerAutoScanTest):
def is_program_valid(self, program_config: ProgramConfig) -> bool:
return True
def sample_program_configs(self):
def generate_input1(batch, dim1):
return np.random.randn(batch, dim1, 768).astype(np.float32)
def generate_input2(shape):
return np.random.random(shape).astype(np.float32)
def generate_weight1():
return np.random.randn(768, 768).astype(np.float32)
def generate_weight2():
return np.random.randn(768).astype(np.float32)
for batch in [1, 2, 4]:
for reshape_shape in [[0, 0, 12, 64]]:
for dim1 in [128]:
input2_shapes = [[batch, reshape_shape[2], dim1, dim1],
[batch, 1, 1, dim1]]
for input2_shape in input2_shapes:
for axis in [0]:
dics = [{
"x_num_col_dims": 2,
"y_num_col_dims": 1
}, {
"axis": 2
}, {
"shape": reshape_shape
}, {
"axis": [0, 2, 1, 3]
}, {
"x_num_col_dims": 2,
"y_num_col_dims": 1
}, {
"axis": 2
}, {
"shape": reshape_shape
}, {
"axis": [0, 2, 1, 3]
}, {
"x_num_col_dims": 2,
"y_num_col_dims": 1
}, {
"axis": 2
}, {
"shape": reshape_shape
}, {
"axis": [0, 2, 1, 3]
}, {
"scale": 0.125,
"bias": 0.0,
"bias_after_scale": True
}, {
"alpha": 1.0,
"transpose_X": False,
"transpose_Y": True,
"fused_reshape_X": [],
"fused_reshape_Y": [],
"fused_transpose_X": [],
"fused_transpose_Y": [],
"fused_reshape_Out": [],
"fused_transpose_Out": []
}, {
"axis": axis
}, {
"axis": -1,
"is_test": True
}, {
"seed": 0,
"dropout_prob": 0.10000000149011612,
"dropout_implementation": "upscale_in_train",
"fix_seed": False,
"is_test": True
}, {
"alpha": 1.0,
"transpose_X": False,
"transpose_Y": False,
"fused_reshape_X": [],
"fused_reshape_Y": [],
"fused_transpose_X": [],
"fused_transpose_Y": [],
"fused_reshape_Out": [],
"fused_transpose_Out": []
}, {
"axis": [0, 2, 1, 3]
}, {
"shape": [0, 0, 768]
}, {
"x_num_col_dims": 2,
"y_num_col_dims": 1
}]
ops_config = [
{
"op_type": "mul",
"op_inputs": {
"X": ["input_data1"],
"Y": ["mul1_weight"]
},
"op_outputs": {
"Out": ["mul1_output"]
},
"op_attrs": dics[0]
},
{
"op_type": "elementwise_add",
"op_inputs": {
"X": ["mul1_output"],
"Y": ["elementwise_add1_weight"]
},
"op_outputs": {
"Out": ["elementwise_add1_output"]
},
"op_attrs": dics[1]
},
{
"op_type": "reshape2",
"op_inputs": {
"X": ["elementwise_add1_output"],
},
"op_outputs": {
"Out": ["reshape21_output"],
"XShape": ["reshape21_output_xshape"]
},
"op_attrs": dics[2]
},
{
"op_type": "transpose2",
"op_inputs": {
"X": ["reshape21_output"]
},
"op_outputs": {
"Out": ["transpose21_output"],
"XShape":
["transpose21_output_xshape"]
},
"op_attrs": dics[3]
},
{
"op_type": "mul",
"op_inputs": {
"X": ["input_data1"],
"Y": ["mul2_weight"]
},
"op_outputs": {
"Out": ["mul2_output"]
},
"op_attrs": dics[4]
},
{
"op_type": "elementwise_add",
"op_inputs": {
"X": ["mul2_output"],
"Y": ["elementwise_add2_weight"]
},
"op_outputs": {
"Out": ["elementwise_add2_output"]
},
"op_attrs": dics[5]
},
{
"op_type": "reshape2",
"op_inputs": {
"X": ["elementwise_add2_output"]
},
"op_outputs": {
"Out": ["reshape22_output"],
"XShape": ["reshape22_output_xshape"]
},
"op_attrs": dics[6]
},
{
"op_type": "transpose2",
"op_inputs": {
"X": ["reshape22_output"]
},
"op_outputs": {
"Out": ["transpose22_output"],
"XShape":
["transpose22_output_xshape"]
},
"op_attrs": dics[7]
},
{
"op_type": "mul",
"op_inputs": {
"X": ["input_data1"],
"Y": ["mul3_weight"]
},
"op_outputs": {
"Out": ["mul3_output"]
},
"op_attrs": dics[8]
},
{
"op_type": "elementwise_add",
"op_inputs": {
"X": ["mul3_output"],
"Y": ["elementwise_add3_weight"]
},
"op_outputs": {
"Out": ["elementwise_add3_output"]
},
"op_attrs": dics[9]
},
{
"op_type": "reshape2",
"op_inputs": {
"X": ["elementwise_add3_output"]
},
"op_outputs": {
"Out": ["reshape23_output"],
"XShape": ["reshape23_output_xshape"]
},
"op_attrs": dics[10]
},
{
"op_type": "transpose2",
"op_inputs": {
"X": ["reshape23_output"]
},
"op_outputs": {
"Out": ["transpose23_output"],
"XShape":
["transpose23_output_xshape"]
},
"op_attrs": dics[11]
},
{
"op_type": "scale",
"op_inputs": {
"X": ["transpose23_output"],
},
"op_outputs": {
"Out": ["scale_output"]
},
"op_attrs": dics[12]
},
{
"op_type": "matmul",
"op_inputs": {
"X": ["scale_output"],
"Y": ["transpose22_output"],
},
"op_outputs": {
"Out": ["matmul1_output"]
},
"op_attrs": dics[13]
},
{
"op_type": "elementwise_add",
"op_inputs": {
"X": ["matmul1_output"],
"Y": ["input_data2"]
},
"op_outputs": {
"Out": ["elementwise_add4_output"]
},
"op_attrs": dics[14]
},
{
"op_type": "softmax",
"op_inputs": {
"X": ["elementwise_add4_output"]
},
"op_outputs": {
"Out": ["softmax_output"]
},
"op_attrs": dics[15]
},
{
"op_type": "dropout",
"op_inputs": {
"X": ["softmax_output"],
},
"op_outputs": {
"Out": ["dropout3_output"]
},
"op_attrs": dics[16]
},
{
"op_type": "matmul",
"op_inputs": {
"X": ["dropout3_output"],
"Y": ["transpose21_output"],
},
"op_outputs": {
"Out": ["matmul2_output"]
},
"op_attrs": dics[17]
},
{
"op_type": "transpose2",
"op_inputs": {
"X": ["matmul2_output"]
},
"op_outputs": {
"Out": ["transpose24_output"],
"XShape":
["transpose24_output_xshape"]
},
"op_attrs": dics[18]
},
{
"op_type": "reshape2",
"op_inputs": {
"X": ["transpose24_output"]
},
"op_outputs": {
"Out": ["reshape24_output"],
"XShape": ["reshape24_output_xshape"]
},
"op_attrs": dics[19]
},
# In order to fuse ops with
# multihead_matmul_fuse_pass_v2, the last op
# must be mul.
{
"op_type": "mul",
"op_inputs": {
"X": ["reshape24_output"],
"Y": ["mul4_weight"]
},
"op_outputs": {
"Out": ["mul4_output"]
},
"op_attrs": dics[20]
}
]
ops = self.generate_op_config(ops_config)
program_config = ProgramConfig(
ops=ops,
weights={
"mul1_weight": TensorConfig(
data_gen=partial(generate_weight1)),
"mul2_weight": TensorConfig(
data_gen=partial(generate_weight1)),
"mul3_weight": TensorConfig(
data_gen=partial(generate_weight1)),
"mul4_weight": TensorConfig(
data_gen=partial(generate_weight1)),
"elementwise_add1_weight": TensorConfig(
data_gen=partial(generate_weight2)),
"elementwise_add2_weight": TensorConfig(
data_gen=partial(generate_weight2)),
"elementwise_add3_weight": TensorConfig(
data_gen=partial(generate_weight2)),
},
inputs={
"input_data1": TensorConfig(
data_gen=partial(generate_input1, batch,
dim1)),
"input_data2": TensorConfig(
data_gen=partial(generate_input2,
input2_shape)),
},
outputs=["mul4_output"])
yield program_config
def sample_predictor_configs(
self, program_config) -> (paddle_infer.Config, List[int], float):
def generate_dynamic_shape(attrs):
# The last dim of input1 and input2 should be static.
self.dynamic_shape.min_input_shape = {
"input_data1": [1, 8, 768],
"input_data2": [1, 1, 1, 128],
"reshape24_output": [1, 128, 768]
}
self.dynamic_shape.max_input_shape = {
"input_data1": [16, 512, 768],
"input_data2": [16, 256, 512, 128],
"reshape24_output": [1, 128, 768]
}
self.dynamic_shape.opt_input_shape = {
"input_data1": [8, 128, 768],
"input_data2": [8, 32, 64, 128],
"reshape24_output": [1, 128, 768]
}
def clear_dynamic_shape():
self.dynamic_shape.max_input_shape = {}
self.dynamic_shape.min_input_shape = {}
self.dynamic_shape.opt_input_shape = {}
attrs = [
program_config.ops[i].attrs
for i in range(len(program_config.ops))
]
# for static_shape
clear_dynamic_shape()
self.trt_param.precision = paddle_infer.PrecisionType.Float32
yield self.create_inference_config(), (1, 4), 1e-5
self.trt_param.precision = paddle_infer.PrecisionType.Half
yield self.create_inference_config(), (1, 4), 1e-5
# for dynamic_shape
generate_dynamic_shape(attrs)
self.trt_param.precision = paddle_infer.PrecisionType.Float32
yield self.create_inference_config(), (1, 3), 1e-5
self.trt_param.precision = paddle_infer.PrecisionType.Half
yield self.create_inference_config(), (1, 3), 1e-5
def test(self):
self.run_test()
if __name__ == "__main__":
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册