# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import unittest
from functools import partial
from typing import List

import numpy as np
from program_config import ProgramConfig, TensorConfig
from trt_layer_auto_scan_test import TrtLayerAutoScanTest

import paddle.inference as paddle_infer


class TrtConvertMultiHeadMatmulTest(TrtLayerAutoScanTest):
    def is_program_valid(self, program_config: ProgramConfig) -> bool:
        return True

    def sample_program_configs(self):
        def generate_input(shape):
            return np.full(shape, 0.1).astype(np.float32)

        def generate_weight(shape):
            return (
                np.random.rand(*shape).astype(np.float32).round(decimals=1) / 5
                - 0.1
            )

        for batch in [1, 4]:
            self.batch = batch
            for reshape_shape in [[0, 0, 12, 64]]:
                for dim1 in [128]:
                    input2_shapes = [
                        [batch, reshape_shape[2], dim1, dim1],
                        [batch, 1, 1, dim1],
                    ]
                    for input2_shape in input2_shapes:
                        for axis in [0]:
                            dics = [
                                {"x_num_col_dims": 2, "y_num_col_dims": 1},
                                {"axis": 2},
                                {"shape": reshape_shape},
                                {"axis": [0, 2, 1, 3]},
                                {"x_num_col_dims": 2, "y_num_col_dims": 1},
                                {"axis": 2},
                                {"shape": reshape_shape},
                                {"axis": [0, 2, 1, 3]},
                                {"x_num_col_dims": 2, "y_num_col_dims": 1},
                                {"axis": 2},
                                {"shape": reshape_shape},
                                {"axis": [0, 2, 1, 3]},
                                {
                                    "scale": 0.125,
                                    "bias": 0.0,
                                    "bias_after_scale": True,
                                },
                                {
                                    "alpha": 1.0,
                                    "transpose_X": False,
                                    "transpose_Y": True,
                                },
                                {"axis": axis},
                                {"axis": -1, "is_test": True},
                                {
                                    "seed": 0,
                                    "dropout_prob": 0.10000000149011612,
                                    "dropout_implementation": "upscale_in_train",
                                    "fix_seed": False,
                                    "is_test": True,
                                },
                                {
                                    "alpha": 1.0,
                                    "transpose_X": False,
                                    "transpose_Y": False,
                                },
                                {"axis": [0, 2, 1, 3]},
                                {"shape": [0, 0, 768]},
                                {"x_num_col_dims": 2, "y_num_col_dims": 1},
                            ]

                            ops_config = [
                                {
                                    "op_type": "mul",
                                    "op_inputs": {
                                        "X": ["input_data1"],
                                        "Y": ["mul1_weight"],
                                    },
                                    "op_outputs": {"Out": ["mul1_output"]},
                                    "op_attrs": dics[0],
                                },
                                {
                                    "op_type": "elementwise_add",
                                    "op_inputs": {
                                        "X": ["mul1_output"],
                                        "Y": ["elementwise_add1_weight"],
                                    },
                                    "op_outputs": {
                                        "Out": ["elementwise_add1_output"]
                                    },
                                    "op_attrs": dics[1],
                                },
                                {
                                    "op_type": "reshape2",
                                    "op_inputs": {
                                        "X": ["elementwise_add1_output"],
                                    },
                                    "op_outputs": {
                                        "Out": ["reshape21_output"],
                                        "XShape": ["reshape21_output_xshape"],
                                    },
                                    "op_attrs": dics[2],
                                },
                                {
                                    "op_type": "transpose2",
                                    "op_inputs": {"X": ["reshape21_output"]},
                                    "op_outputs": {
                                        "Out": ["transpose21_output"],
                                        "XShape": ["transpose21_output_xshape"],
                                    },
                                    "op_attrs": dics[3],
                                },
                                {
                                    "op_type": "mul",
                                    "op_inputs": {
                                        "X": ["input_data1"],
                                        "Y": ["mul2_weight"],
                                    },
                                    "op_outputs": {"Out": ["mul2_output"]},
                                    "op_attrs": dics[4],
                                },
                                {
                                    "op_type": "elementwise_add",
                                    "op_inputs": {
                                        "X": ["mul2_output"],
                                        "Y": ["elementwise_add2_weight"],
                                    },
                                    "op_outputs": {
                                        "Out": ["elementwise_add2_output"]
                                    },
                                    "op_attrs": dics[5],
                                },
                                {
                                    "op_type": "reshape2",
                                    "op_inputs": {
                                        "X": ["elementwise_add2_output"]
                                    },
                                    "op_outputs": {
                                        "Out": ["reshape22_output"],
                                        "XShape": ["reshape22_output_xshape"],
                                    },
                                    "op_attrs": dics[6],
                                },
                                {
                                    "op_type": "transpose2",
                                    "op_inputs": {"X": ["reshape22_output"]},
                                    "op_outputs": {
                                        "Out": ["transpose22_output"],
                                        "XShape": ["transpose22_output_xshape"],
                                    },
                                    "op_attrs": dics[7],
                                },
                                {
                                    "op_type": "mul",
                                    "op_inputs": {
                                        "X": ["input_data1"],
                                        "Y": ["mul3_weight"],
                                    },
                                    "op_outputs": {"Out": ["mul3_output"]},
                                    "op_attrs": dics[8],
                                },
                                {
                                    "op_type": "elementwise_add",
                                    "op_inputs": {
                                        "X": ["mul3_output"],
                                        "Y": ["elementwise_add3_weight"],
                                    },
                                    "op_outputs": {
                                        "Out": ["elementwise_add3_output"]
                                    },
                                    "op_attrs": dics[9],
                                },
                                {
                                    "op_type": "reshape2",
                                    "op_inputs": {
                                        "X": ["elementwise_add3_output"]
                                    },
                                    "op_outputs": {
                                        "Out": ["reshape23_output"],
                                        "XShape": ["reshape23_output_xshape"],
                                    },
                                    "op_attrs": dics[10],
                                },
                                {
                                    "op_type": "transpose2",
                                    "op_inputs": {"X": ["reshape23_output"]},
                                    "op_outputs": {
                                        "Out": ["transpose23_output"],
                                        "XShape": ["transpose23_output_xshape"],
                                    },
                                    "op_attrs": dics[11],
                                },
                                {
                                    "op_type": "scale",
                                    "op_inputs": {
                                        "X": ["transpose23_output"],
                                    },
                                    "op_outputs": {"Out": ["scale_output"]},
                                    "op_attrs": dics[12],
                                },
                                {
                                    "op_type": "matmul",
                                    "op_inputs": {
                                        "X": ["scale_output"],
                                        "Y": ["transpose22_output"],
                                    },
                                    "op_outputs": {"Out": ["matmul1_output"]},
                                    "op_attrs": dics[13],
                                },
                                {
                                    "op_type": "elementwise_add",
                                    "op_inputs": {
                                        "X": ["matmul1_output"],
                                        "Y": ["input_data2"],
                                    },
                                    "op_outputs": {
                                        "Out": ["elementwise_add4_output"]
                                    },
                                    "op_attrs": dics[14],
                                },
                                {
                                    "op_type": "softmax",
                                    "op_inputs": {
                                        "X": ["elementwise_add4_output"]
                                    },
                                    "op_outputs": {"Out": ["softmax_output"]},
                                    "op_attrs": dics[15],
                                },
                                {
                                    "op_type": "dropout",
                                    "op_inputs": {
                                        "X": ["softmax_output"],
                                    },
                                    "op_outputs": {"Out": ["dropout3_output"]},
                                    "op_attrs": dics[16],
                                },
                                {
                                    "op_type": "matmul",
                                    "op_inputs": {
                                        "X": ["dropout3_output"],
                                        "Y": ["transpose21_output"],
                                    },
                                    "op_outputs": {"Out": ["matmul2_output"]},
                                    "op_attrs": dics[17],
                                },
                                {
                                    "op_type": "transpose2",
                                    "op_inputs": {"X": ["matmul2_output"]},
                                    "op_outputs": {
                                        "Out": ["transpose24_output"],
                                        "XShape": ["transpose24_output_xshape"],
                                    },
                                    "op_attrs": dics[18],
                                },
                                {
                                    "op_type": "reshape2",
                                    "op_inputs": {"X": ["transpose24_output"]},
                                    "op_outputs": {
                                        "Out": ["reshape24_output"],
                                        "XShape": ["reshape24_output_xshape"],
                                    },
                                    "op_attrs": dics[19],
                                },
                                # In order to fuse ops with
                                # multihead_matmul_fuse_pass_v2, the last op
                                # must be mul.
                                {
                                    "op_type": "mul",
                                    "op_inputs": {
                                        "X": ["reshape24_output"],
                                        "Y": ["mul4_weight"],
                                    },
                                    "op_outputs": {"Out": ["mul4_output"]},
                                    "op_attrs": dics[20],
                                },
                            ]
                            ops = self.generate_op_config(ops_config)

                            program_config = ProgramConfig(
                                ops=ops,
                                weights={
                                    "mul1_weight": TensorConfig(
                                        data_gen=partial(
                                            generate_weight, (768, 768)
                                        )
                                    ),
                                    "mul2_weight": TensorConfig(
                                        data_gen=partial(
                                            generate_weight, (768, 768)
                                        )
                                    ),
                                    "mul3_weight": TensorConfig(
                                        data_gen=partial(
                                            generate_weight, (768, 768)
                                        )
                                    ),
                                    "mul4_weight": TensorConfig(
                                        data_gen=partial(
                                            generate_weight, (768, 768)
                                        )
                                    ),
                                    "elementwise_add1_weight": TensorConfig(
                                        data_gen=partial(
                                            generate_weight, (768,)
                                        )
                                    ),
                                    "elementwise_add2_weight": TensorConfig(
                                        data_gen=partial(
                                            generate_weight, (768,)
                                        )
                                    ),
                                    "elementwise_add3_weight": TensorConfig(
                                        data_gen=partial(
                                            generate_weight, (768,)
                                        )
                                    ),
                                },
                                inputs={
                                    "input_data1": TensorConfig(
                                        data_gen=partial(
                                            generate_input, (batch, dim1, 768)
                                        )
                                    ),
                                    "input_data2": TensorConfig(
                                        data_gen=partial(
                                            generate_input, input2_shape
                                        )
                                    ),
                                },
                                outputs=["mul4_output"],
                            )

                            yield program_config

    def sample_predictor_configs(
        self, program_config
    ) -> (paddle_infer.Config, List[int], float):
        def generate_dynamic_shape(attrs):
            # The last dim of input1 and input2 should be static.
            self.dynamic_shape.min_input_shape = {
                "input_data1": [1, 8, 768],
                "input_data2": [1, 1, 1, 128],
                "reshape24_output": [1, 128, 768],
            }
            self.dynamic_shape.max_input_shape = {
                "input_data1": [16, 512, 768],
                "input_data2": [16, 256, 512, 128],
                "reshape24_output": [1, 128, 768],
            }
            self.dynamic_shape.opt_input_shape = {
                "input_data1": [8, 128, 768],
                "input_data2": [8, 32, 64, 128],
                "reshape24_output": [1, 128, 768],
            }

        def clear_dynamic_shape():
            self.dynamic_shape.max_input_shape = {}
            self.dynamic_shape.min_input_shape = {}
            self.dynamic_shape.opt_input_shape = {}

        attrs = [
            program_config.ops[i].attrs for i in range(len(program_config.ops))
        ]

        # for dynamic_shape
        generate_dynamic_shape(attrs)
        self.trt_param.precision = paddle_infer.PrecisionType.Float32
        program_config.set_input_type(np.float32)
        self.trt_param.workspace_size = 2013265920
        yield self.create_inference_config(), (1, 3), (1e-5, 1e-5)
        self.trt_param.precision = paddle_infer.PrecisionType.Half
        program_config.set_input_type(np.float16)
        yield self.create_inference_config(), (1, 3), (1e-2, 1e-2)

    def test(self):
        self.run_test()


class TrtConvertMultiHeadMatmulTestInt8(TrtConvertMultiHeadMatmulTest):
    def sample_program_configs(self):
        def generate_input(shape):
            return np.full(shape, 0.1).astype(np.float32)

        def generate_weight(shape):
            return (
                np.random.rand(*shape).astype(np.float32).round(decimals=1) / 5
                - 0.1
            )

        for batch in [4]:
            self.batch = batch
            for reshape_shape in [[0, 0, 12, 64]]:
                for dim1 in [128]:
                    input2_shapes = [
                        [batch, reshape_shape[2], dim1, dim1],
                        [batch, 1, 1, dim1],
                    ]
                    for input2_shape in input2_shapes:
                        for axis in [0]:
                            dics = [
                                {
                                    "x_num_col_dims": 2,
                                    "y_num_col_dims": 1,
                                    "enable_int8": True,
                                    "Input_scale": 1.0,
                                },
                                {
                                    "axis": 2,
                                    "out_threshold": 1.0,
                                },
                                {"shape": reshape_shape},
                                {"axis": [0, 2, 1, 3]},
                                {
                                    "x_num_col_dims": 2,
                                    "y_num_col_dims": 1,
                                    "enable_int8": True,
                                    "Input_scale": 1.0,
                                },
                                {
                                    "axis": 2,
                                    "out_threshold": 1.0,
                                },
                                {"shape": reshape_shape},
                                {"axis": [0, 2, 1, 3]},
                                {
                                    "x_num_col_dims": 2,
                                    "y_num_col_dims": 1,
                                    "enable_int8": True,
                                    "Input_scale": 1.0,
                                },
                                {
                                    "axis": 2,
                                    "out_threshold": 1.0,
                                },
                                {"shape": reshape_shape},
                                {"axis": [0, 2, 1, 3]},
                                {
                                    "scale": 0.125,
                                    "bias": 0.0,
                                    "bias_after_scale": True,
                                },
                                {
                                    "alpha": 1.0,
                                    "transpose_X": False,
                                    "transpose_Y": True,
                                },
                                {"axis": axis},
                                {"axis": -1, "is_test": True},
                                {
                                    "seed": 0,
                                    "dropout_prob": 0.10000000149011612,
                                    "dropout_implementation": "upscale_in_train",
                                    "fix_seed": False,
                                    "is_test": True,
                                },
                                {
                                    "alpha": 1.0,
                                    "transpose_X": False,
                                    "transpose_Y": False,
                                },
                                {"axis": [0, 2, 1, 3]},
                                {"shape": [0, 0, 768]},
                                {"x_num_col_dims": 2, "y_num_col_dims": 1},
                            ]

                            ops_config = [
                                {
                                    "op_type": "mul",
                                    "op_inputs": {
                                        "X": ["input_data1"],
                                        "Y": ["mul1_weight"],
                                    },
                                    "op_outputs": {"Out": ["mul1_output"]},
                                    "op_attrs": dics[0],
                                },
                                {
                                    "op_type": "elementwise_add",
                                    "op_inputs": {
                                        "X": ["mul1_output"],
                                        "Y": ["elementwise_add1_weight"],
                                    },
                                    "op_outputs": {
                                        "Out": ["elementwise_add1_output"]
                                    },
                                    "op_attrs": dics[1],
                                },
                                {
                                    "op_type": "reshape2",
                                    "op_inputs": {
                                        "X": ["elementwise_add1_output"],
                                    },
                                    "op_outputs": {
                                        "Out": ["reshape21_output"],
                                        "XShape": ["reshape21_output_xshape"],
                                    },
                                    "op_attrs": dics[2],
                                },
                                {
                                    "op_type": "transpose2",
                                    "op_inputs": {"X": ["reshape21_output"]},
                                    "op_outputs": {
                                        "Out": ["transpose21_output"],
                                        "XShape": ["transpose21_output_xshape"],
                                    },
                                    "op_attrs": dics[3],
                                },
                                {
                                    "op_type": "mul",
                                    "op_inputs": {
                                        "X": ["input_data1"],
                                        "Y": ["mul2_weight"],
                                    },
                                    "op_outputs": {"Out": ["mul2_output"]},
                                    "op_attrs": dics[4],
                                },
                                {
                                    "op_type": "elementwise_add",
                                    "op_inputs": {
                                        "X": ["mul2_output"],
                                        "Y": ["elementwise_add2_weight"],
                                    },
                                    "op_outputs": {
                                        "Out": ["elementwise_add2_output"]
                                    },
                                    "op_attrs": dics[5],
                                },
                                {
                                    "op_type": "reshape2",
                                    "op_inputs": {
                                        "X": ["elementwise_add2_output"]
                                    },
                                    "op_outputs": {
                                        "Out": ["reshape22_output"],
                                        "XShape": ["reshape22_output_xshape"],
                                    },
                                    "op_attrs": dics[6],
                                },
                                {
                                    "op_type": "transpose2",
                                    "op_inputs": {"X": ["reshape22_output"]},
                                    "op_outputs": {
                                        "Out": ["transpose22_output"],
                                        "XShape": ["transpose22_output_xshape"],
                                    },
                                    "op_attrs": dics[7],
                                },
                                {
                                    "op_type": "mul",
                                    "op_inputs": {
                                        "X": ["input_data1"],
                                        "Y": ["mul3_weight"],
                                    },
                                    "op_outputs": {"Out": ["mul3_output"]},
                                    "op_attrs": dics[8],
                                },
                                {
                                    "op_type": "elementwise_add",
                                    "op_inputs": {
                                        "X": ["mul3_output"],
                                        "Y": ["elementwise_add3_weight"],
                                    },
                                    "op_outputs": {
                                        "Out": ["elementwise_add3_output"]
                                    },
                                    "op_attrs": dics[9],
                                },
                                {
                                    "op_type": "reshape2",
                                    "op_inputs": {
                                        "X": ["elementwise_add3_output"]
                                    },
                                    "op_outputs": {
                                        "Out": ["reshape23_output"],
                                        "XShape": ["reshape23_output_xshape"],
                                    },
                                    "op_attrs": dics[10],
                                },
                                {
                                    "op_type": "transpose2",
                                    "op_inputs": {"X": ["reshape23_output"]},
                                    "op_outputs": {
                                        "Out": ["transpose23_output"],
                                        "XShape": ["transpose23_output_xshape"],
                                    },
                                    "op_attrs": dics[11],
                                },
                                {
                                    "op_type": "scale",
                                    "op_inputs": {
                                        "X": ["transpose23_output"],
                                    },
                                    "op_outputs": {"Out": ["scale_output"]},
                                    "op_attrs": dics[12],
                                },
                                {
                                    "op_type": "matmul",
                                    "op_inputs": {
                                        "X": ["scale_output"],
                                        "Y": ["transpose22_output"],
                                    },
                                    "op_outputs": {"Out": ["matmul1_output"]},
                                    "op_attrs": dics[13],
                                },
                                {
                                    "op_type": "elementwise_add",
                                    "op_inputs": {
                                        "X": ["matmul1_output"],
                                        "Y": ["input_data2"],
                                    },
                                    "op_outputs": {
                                        "Out": ["elementwise_add4_output"]
                                    },
                                    "op_attrs": dics[14],
                                },
                                {
                                    "op_type": "softmax",
                                    "op_inputs": {
                                        "X": ["elementwise_add4_output"]
                                    },
                                    "op_outputs": {"Out": ["softmax_output"]},
                                    "op_attrs": dics[15],
                                },
                                {
                                    "op_type": "dropout",
                                    "op_inputs": {
                                        "X": ["softmax_output"],
                                    },
                                    "op_outputs": {"Out": ["dropout3_output"]},
                                    "op_attrs": dics[16],
                                },
                                {
                                    "op_type": "matmul",
                                    "op_inputs": {
                                        "X": ["dropout3_output"],
                                        "Y": ["transpose21_output"],
                                    },
                                    "op_outputs": {"Out": ["matmul2_output"]},
                                    "op_attrs": dics[17],
                                },
                                {
                                    "op_type": "transpose2",
                                    "op_inputs": {"X": ["matmul2_output"]},
                                    "op_outputs": {
                                        "Out": ["transpose24_output"],
                                        "XShape": ["transpose24_output_xshape"],
                                    },
                                    "op_attrs": dics[18],
                                },
                                {
                                    "op_type": "reshape2",
                                    "op_inputs": {"X": ["transpose24_output"]},
                                    "op_outputs": {
                                        "Out": ["reshape24_output"],
                                        "XShape": ["reshape24_output_xshape"],
                                    },
                                    "op_attrs": dics[19],
                                },
                                # In order to fuse ops with
                                # multihead_matmul_fuse_pass_v2, the last op
                                # must be mul.
                                {
                                    "op_type": "mul",
                                    "op_inputs": {
                                        "X": ["reshape24_output"],
                                        "Y": ["mul4_weight"],
                                    },
                                    "op_outputs": {"Out": ["mul4_output"]},
                                    "op_attrs": dics[20],
                                },
                            ]
                            ops = self.generate_op_config(ops_config)

                            program_config = ProgramConfig(
                                ops=ops,
                                weights={
                                    "mul1_weight": TensorConfig(
                                        data_gen=partial(
                                            generate_weight, (768, 768)
                                        )
                                    ),
                                    "mul2_weight": TensorConfig(
                                        data_gen=partial(
                                            generate_weight, (768, 768)
                                        )
                                    ),
                                    "mul3_weight": TensorConfig(
                                        data_gen=partial(
                                            generate_weight, (768, 768)
                                        )
                                    ),
                                    "mul4_weight": TensorConfig(
                                        data_gen=partial(
                                            generate_weight, (768, 768)
                                        )
                                    ),
                                    "elementwise_add1_weight": TensorConfig(
                                        data_gen=partial(
                                            generate_weight, (768,)
                                        )
                                    ),
                                    "elementwise_add2_weight": TensorConfig(
                                        data_gen=partial(
                                            generate_weight, (768,)
                                        )
                                    ),
                                    "elementwise_add3_weight": TensorConfig(
                                        data_gen=partial(
                                            generate_weight, (768,)
                                        )
                                    ),
                                },
                                inputs={
                                    "input_data1": TensorConfig(
                                        data_gen=partial(
                                            generate_input, (batch, dim1, 768)
                                        )
                                    ),
                                    "input_data2": TensorConfig(
                                        data_gen=partial(
                                            generate_input, input2_shape
                                        )
                                    ),
                                },
                                outputs=["mul4_output"],
                            )

                            yield program_config


class TrtConvertVitToMultiHeadMatmulTest(TrtLayerAutoScanTest):
    def is_program_valid(self, program_config: ProgramConfig) -> bool:
        return True

    def sample_program_configs(self):
        def generate_input1(batch, length):
            return np.full((batch, length, 768), 0.1).astype(np.float32)

        def generate_weight(shape):
            return (
                np.random.rand(*shape).astype(np.float32).round(decimals=1) / 5
                - 0.1
            )

        for batch in [4]:
            self.batch = batch
            for length in [197]:
                self.length = length
                ops_config = [
                    {
                        "op_type": "matmul_v2",
                        "op_inputs": {
                            "X": ["input_data1"],
                            "Y": ["matmul1_weight"],
                        },
                        "op_outputs": {"Out": ["matmul1_output"]},
                        "op_attrs": {
                            "trans_x": False,
                            "trans_y": False,
                            "Input_scale_layer": 1.0,
                        },
                    },
                    {
                        "op_type": "elementwise_add",
                        "op_inputs": {
                            "X": ["matmul1_output"],
                            "Y": ["elementwise_add1_weight"],
                        },
                        "op_outputs": {"Out": ["elementwise_add1_output"]},
                        "op_attrs": {
                            "scale_out": 1.0,
                            "scale_x": 1.0,
                            "scale_y": 1.0,
                            "axis": 2,
                            "Out": 1.0,
                        },
                    },
                    {
                        "op_type": "reshape2",
                        "op_inputs": {
                            "X": ["elementwise_add1_output"],
                        },
                        "op_outputs": {
                            "Out": ["reshape1_output"],
                            "XShape": ["reshape1_output_xshape"],
                        },
                        "op_attrs": {"shape": [-1, self.length, 3, 12, 64]},
                    },
                    {
                        "op_type": "transpose2",
                        "op_inputs": {"X": ["reshape1_output"]},
                        "op_outputs": {
                            "Out": ["transpose1_output"],
                            "XShape": ["transpose1_output_xshape"],
                        },
                        "op_attrs": {
                            "axis": [2, 0, 3, 1, 4],
                            "data_format": "AnyLayout",
                        },
                    },
                    {
                        "op_type": "slice",
                        "op_inputs": {
                            "Input": ["transpose1_output"],
                        },
                        "op_outputs": {"Out": ["slice1_output"]},
                        "op_attrs": {
                            "axes": [0],
                            "starts": [0],
                            "ends": [1],
                            "decrease_axis": [0],
                            "infer_flags": [1],
                        },
                    },
                    {
                        "op_type": "slice",
                        "op_inputs": {
                            "Input": ["transpose1_output"],
                        },
                        "op_outputs": {"Out": ["slice2_output"]},
                        "op_attrs": {
                            "axes": [0],
                            "starts": [1],
                            "ends": [2],
                            "decrease_axis": [0],
                            "infer_flags": [1],
                        },
                    },
                    {
                        "op_type": "slice",
                        "op_inputs": {
                            "Input": ["transpose1_output"],
                        },
                        "op_outputs": {"Out": ["slice3_output"]},
                        "op_attrs": {
                            "axes": [0],
                            "starts": [2],
                            "ends": [3],
                            "decrease_axis": [0],
                            "infer_flags": [1],
                        },
                    },
                    {
                        "op_type": "transpose2",
                        "op_inputs": {"X": ["slice2_output"]},
                        "op_outputs": {
                            "Out": ["transpose2_output"],
                        },
                        "op_attrs": {
                            "axis": [0, 1, 3, 2],
                            "data_format": "AnyLayout",
                        },
                    },
                    {
                        "op_type": "matmul_v2",
                        "op_inputs": {
                            "X": ["slice1_output"],
                            "Y": ["transpose2_output"],
                        },
                        "op_outputs": {"Out": ["matmul2_output"]},
                        "op_attrs": {"trans_x": False, "trans_y": False},
                    },
                    {
                        "op_type": "scale",
                        "op_inputs": {
                            "X": ["matmul2_output"],
                        },
                        "op_outputs": {"Out": ["scale_output"]},
                        "op_attrs": {
                            "scale": 0.125,
                            "bias": 0.0,
                            "bias_after_scale": True,
                        },
                    },
                    {
                        "op_type": "softmax",
                        "op_inputs": {"X": ["scale_output"]},
                        "op_outputs": {"Out": ["softmax_output"]},
                        "op_attrs": {"axis": -1, "data_format": "AnyLayout"},
                    },
                    {
                        "op_type": "matmul_v2",
                        "op_inputs": {
                            "X": ["softmax_output"],
                            "Y": ["slice3_output"],
                        },
                        "op_outputs": {"Out": ["matmul3_output"]},
                        "op_attrs": {"trans_x": False, "trans_y": False},
                    },
                    {
                        "op_type": "transpose2",
                        "op_inputs": {"X": ["matmul3_output"]},
                        "op_outputs": {
                            "Out": ["transpose3_output"],
                            "XShape": ["transpose3_output_xshape"],
                        },
                        "op_attrs": {
                            "axis": [0, 2, 1, 3],
                            "data_format": "AnyLayout",
                        },
                    },
                    {
                        "op_type": "reshape2",
                        "op_inputs": {"X": ["transpose3_output"]},
                        "op_outputs": {
                            "Out": ["reshape2_output"],
                            "XShape": ["reshape2_output_xshape"],
                        },
                        "op_attrs": {"shape": [-1, self.length, 768]},
                    },
                ]

                ops = self.generate_op_config(ops_config)

                program_config = ProgramConfig(
                    ops=ops,
                    weights={
                        "matmul1_weight": TensorConfig(
                            data_gen=partial(generate_weight, (768, 2304))
                        ),
                        "elementwise_add1_weight": TensorConfig(
                            data_gen=partial(generate_weight, (2304,))
                        ),
                    },
                    inputs={
                        "input_data1": TensorConfig(
                            data_gen=partial(generate_input1, batch, length)
                        )
                    },
                    outputs=["reshape2_output"],
                )

                yield program_config

    def sample_predictor_configs(
        self, program_config
    ) -> (paddle_infer.Config, List[int], float):
        def generate_dynamic_shape(attrs):
            # The last dim of input1 and input2 should be static.
            self.dynamic_shape.min_input_shape = {
                "input_data1": [1, 8, 768],
            }
            self.dynamic_shape.max_input_shape = {
                "input_data1": [16, 512, 768],
            }
            self.dynamic_shape.opt_input_shape = {
                "input_data1": [1, 197, 768],
            }

        def clear_dynamic_shape():
            self.dynamic_shape.max_input_shape = {}
            self.dynamic_shape.min_input_shape = {}
            self.dynamic_shape.opt_input_shape = {}

        attrs = [
            program_config.ops[i].attrs for i in range(len(program_config.ops))
        ]

        def generate_trt_nodes_num():
            ver = paddle_infer.get_trt_compile_version()
            if ver[0] * 1000 + ver[1] * 100 + ver[2] * 10 < 8000:
                return 0, 3
            return 1, 2

        # for dynamic_shape
        generate_dynamic_shape(attrs)
        self.trt_param.workspace_size = 2013265920
        self.trt_param.precision = paddle_infer.PrecisionType.Int8
        program_config.set_input_type(np.int8)
        yield self.create_inference_config(), generate_trt_nodes_num(), (
            1e-3,
            1e-3,
        )
        self.trt_param.precision = paddle_infer.PrecisionType.Half
        program_config.set_input_type(np.float16)
        yield self.create_inference_config(), generate_trt_nodes_num(), (
            1e-3,
            2e-2,
        )
        self.trt_param.precision = paddle_infer.PrecisionType.Float32
        program_config.set_input_type(np.float32)
        yield self.create_inference_config(), generate_trt_nodes_num(), (
            1e-5,
            1e-5,
        )

    def test(self):
        self.run_test()


class TrtConvertMultiHeadMatmulTest_biasqk_seqseq(TrtLayerAutoScanTest):
    def is_program_valid(self, program_config: ProgramConfig) -> bool:
        return True

    def sample_program_configs(self):
        def generate_input(shape):
            return np.full(shape, 0.1).astype(np.float32)

        def generate_weight(shape):
            return (
                np.random.rand(*shape).astype(np.float32).round(decimals=1) / 5
                - 0.1
            )

        for batch in [2]:
            self.batch = batch
            for reshape_shape in [[0, 0, 12, 64]]:
                for dim1 in [128]:
                    input2_shapes = [
                        [batch, reshape_shape[2], dim1, dim1],
                        [batch, 1, 1, dim1],
                    ]
                    for input2_shape in input2_shapes:
                        for axis in [0]:
                            dics = [
                                {"x_num_col_dims": 2, "y_num_col_dims": 1},
                                {"axis": 2},
                                {"shape": reshape_shape},
                                {"axis": [0, 2, 1, 3]},
                                {"x_num_col_dims": 2, "y_num_col_dims": 1},
                                {"axis": 2},
                                {"shape": reshape_shape},
                                {"axis": [0, 2, 1, 3]},
                                {"x_num_col_dims": 2, "y_num_col_dims": 1},
                                {"axis": 2},
                                {"shape": reshape_shape},
                                {"axis": [0, 2, 1, 3]},
                                {
                                    "scale": 0.125,
                                    "bias": 0.0,
                                    "bias_after_scale": True,
                                },
                                {
                                    "alpha": 1.0,
                                    "transpose_X": False,
                                    "transpose_Y": True,
                                },
                                {"axis": axis},
                                {"axis": -1, "is_test": True},
                                {
                                    "seed": 0,
                                    "dropout_prob": 0.10000000149011612,
                                    "dropout_implementation": "upscale_in_train",
                                    "fix_seed": False,
                                    "is_test": True,
                                },
                                {
                                    "alpha": 1.0,
                                    "transpose_X": False,
                                    "transpose_Y": False,
                                },
                                {"axis": [0, 2, 1, 3]},
                                {"shape": [0, 0, 768]},
                                {"x_num_col_dims": 2, "y_num_col_dims": 1},
                            ]

                            ops_config = [
                                {
                                    "op_type": "mul",
                                    "op_inputs": {
                                        "X": ["input_data1"],
                                        "Y": ["mul1_weight"],
                                    },
                                    "op_outputs": {"Out": ["mul1_output"]},
                                    "op_attrs": dics[0],
                                },
                                {
                                    "op_type": "elementwise_add",
                                    "op_inputs": {
                                        "X": ["mul1_output"],
                                        "Y": ["elementwise_add1_weight"],
                                    },
                                    "op_outputs": {
                                        "Out": ["elementwise_add1_output"]
                                    },
                                    "op_attrs": dics[1],
                                },
                                {
                                    "op_type": "reshape2",
                                    "op_inputs": {
                                        "X": ["elementwise_add1_output"],
                                    },
                                    "op_outputs": {
                                        "Out": ["reshape21_output"],
                                        "XShape": ["reshape21_output_xshape"],
                                    },
                                    "op_attrs": dics[2],
                                },
                                {
                                    "op_type": "transpose2",
                                    "op_inputs": {"X": ["reshape21_output"]},
                                    "op_outputs": {
                                        "Out": ["transpose21_output"],
                                        "XShape": ["transpose21_output_xshape"],
                                    },
                                    "op_attrs": dics[3],
                                },
                                {
                                    "op_type": "mul",
                                    "op_inputs": {
                                        "X": ["input_data1"],
                                        "Y": ["mul2_weight"],
                                    },
                                    "op_outputs": {"Out": ["mul2_output"]},
                                    "op_attrs": dics[4],
                                },
                                {
                                    "op_type": "elementwise_add",
                                    "op_inputs": {
                                        "X": ["mul2_output"],
                                        "Y": ["elementwise_add2_weight"],
                                    },
                                    "op_outputs": {
                                        "Out": ["elementwise_add2_output"]
                                    },
                                    "op_attrs": dics[5],
                                },
                                {
                                    "op_type": "reshape2",
                                    "op_inputs": {
                                        "X": ["elementwise_add2_output"]
                                    },
                                    "op_outputs": {
                                        "Out": ["reshape22_output"],
                                        "XShape": ["reshape22_output_xshape"],
                                    },
                                    "op_attrs": dics[6],
                                },
                                {
                                    "op_type": "transpose2",
                                    "op_inputs": {"X": ["reshape22_output"]},
                                    "op_outputs": {
                                        "Out": ["transpose22_output"],
                                        "XShape": ["transpose22_output_xshape"],
                                    },
                                    "op_attrs": dics[7],
                                },
                                {
                                    "op_type": "mul",
                                    "op_inputs": {
                                        "X": ["input_data1"],
                                        "Y": ["mul3_weight"],
                                    },
                                    "op_outputs": {"Out": ["mul3_output"]},
                                    "op_attrs": dics[8],
                                },
                                {
                                    "op_type": "elementwise_add",
                                    "op_inputs": {
                                        "X": ["mul3_output"],
                                        "Y": ["elementwise_add3_weight"],
                                    },
                                    "op_outputs": {
                                        "Out": ["elementwise_add3_output"]
                                    },
                                    "op_attrs": dics[9],
                                },
                                {
                                    "op_type": "reshape2",
                                    "op_inputs": {
                                        "X": ["elementwise_add3_output"]
                                    },
                                    "op_outputs": {
                                        "Out": ["reshape23_output"],
                                        "XShape": ["reshape23_output_xshape"],
                                    },
                                    "op_attrs": dics[10],
                                },
                                {
                                    "op_type": "transpose2",
                                    "op_inputs": {"X": ["reshape23_output"]},
                                    "op_outputs": {
                                        "Out": ["transpose23_output"],
                                        "XShape": ["transpose23_output_xshape"],
                                    },
                                    "op_attrs": dics[11],
                                },
                                {
                                    "op_type": "scale",
                                    "op_inputs": {
                                        "X": ["transpose23_output"],
                                    },
                                    "op_outputs": {"Out": ["scale_output"]},
                                    "op_attrs": dics[12],
                                },
                                {
                                    "op_type": "matmul",
                                    "op_inputs": {
                                        "X": ["scale_output"],
                                        "Y": ["transpose22_output"],
                                    },
                                    "op_outputs": {"Out": ["matmul1_output"]},
                                    "op_attrs": dics[13],
                                },
                                {
                                    "op_type": "elementwise_add",
                                    "op_inputs": {
                                        "X": ["matmul1_output"],
                                        "Y": ["input_data2"],
                                    },
                                    "op_outputs": {
                                        "Out": ["elementwise_add4_output"]
                                    },
                                    "op_attrs": dics[14],
                                },
                                {
                                    "op_type": "softmax",
                                    "op_inputs": {
                                        "X": ["elementwise_add4_output"]
                                    },
                                    "op_outputs": {"Out": ["softmax_output"]},
                                    "op_attrs": dics[15],
                                },
                                {
                                    "op_type": "dropout",
                                    "op_inputs": {
                                        "X": ["softmax_output"],
                                    },
                                    "op_outputs": {"Out": ["dropout3_output"]},
                                    "op_attrs": dics[16],
                                },
                                {
                                    "op_type": "matmul",
                                    "op_inputs": {
                                        "X": ["dropout3_output"],
                                        "Y": ["transpose21_output"],
                                    },
                                    "op_outputs": {"Out": ["matmul2_output"]},
                                    "op_attrs": dics[17],
                                },
                                {
                                    "op_type": "transpose2",
                                    "op_inputs": {"X": ["matmul2_output"]},
                                    "op_outputs": {
                                        "Out": ["transpose24_output"],
                                        "XShape": ["transpose24_output_xshape"],
                                    },
                                    "op_attrs": dics[18],
                                },
                                {
                                    "op_type": "reshape2",
                                    "op_inputs": {"X": ["transpose24_output"]},
                                    "op_outputs": {
                                        "Out": ["reshape24_output"],
                                        "XShape": ["reshape24_output_xshape"],
                                    },
                                    "op_attrs": dics[19],
                                },
                                # In order to fuse ops with
                                # multihead_matmul_fuse_pass_v2, the last op
                                # must be mul.
                                {
                                    "op_type": "mul",
                                    "op_inputs": {
                                        "X": ["reshape24_output"],
                                        "Y": ["mul4_weight"],
                                    },
                                    "op_outputs": {"Out": ["mul4_output"]},
                                    "op_attrs": dics[20],
                                },
                            ]
                            ops = self.generate_op_config(ops_config)

                            program_config = ProgramConfig(
                                ops=ops,
                                weights={
                                    "mul1_weight": TensorConfig(
                                        data_gen=partial(
                                            generate_weight, (768, 768)
                                        )
                                    ),
                                    "mul2_weight": TensorConfig(
                                        data_gen=partial(
                                            generate_weight, (768, 768)
                                        )
                                    ),
                                    "mul3_weight": TensorConfig(
                                        data_gen=partial(
                                            generate_weight, (768, 768)
                                        )
                                    ),
                                    "mul4_weight": TensorConfig(
                                        data_gen=partial(
                                            generate_weight, (768, 768)
                                        )
                                    ),
                                    "elementwise_add1_weight": TensorConfig(
                                        data_gen=partial(
                                            generate_weight, (768,)
                                        )
                                    ),
                                    "elementwise_add2_weight": TensorConfig(
                                        data_gen=partial(
                                            generate_weight, (768, 768)
                                        )
                                    ),
                                    "elementwise_add3_weight": TensorConfig(
                                        data_gen=partial(
                                            generate_weight, (768,)
                                        )
                                    ),
                                },
                                inputs={
                                    "input_data1": TensorConfig(
                                        data_gen=partial(
                                            generate_input, (batch, dim1, 768)
                                        )
                                    ),
                                    "input_data2": TensorConfig(
                                        data_gen=partial(
                                            generate_input, input2_shape
                                        )
                                    ),
                                },
                                outputs=["mul4_output"],
                            )

                            yield program_config

    def sample_predictor_configs(
        self, program_config
    ) -> (paddle_infer.Config, List[int], float):
        def generate_dynamic_shape(attrs):
            # The last dim of input1 and input2 should be static.
            self.dynamic_shape.min_input_shape = {
                "input_data1": [1, 8, 768],
                "input_data2": [1, 1, 1, 128],
                "reshape24_output": [1, 128, 768],
            }
            self.dynamic_shape.max_input_shape = {
                "input_data1": [16, 512, 768],
                "input_data2": [16, 256, 512, 128],
                "reshape24_output": [1, 128, 768],
            }
            self.dynamic_shape.opt_input_shape = {
                "input_data1": [8, 128, 768],
                "input_data2": [8, 32, 64, 128],
                "reshape24_output": [1, 128, 768],
            }

        def clear_dynamic_shape():
            self.dynamic_shape.max_input_shape = {}
            self.dynamic_shape.min_input_shape = {}
            self.dynamic_shape.opt_input_shape = {}

        attrs = [
            program_config.ops[i].attrs for i in range(len(program_config.ops))
        ]

        # for dynamic_shape
        generate_dynamic_shape(attrs)
        self.trt_param.precision = paddle_infer.PrecisionType.Float32
        program_config.set_input_type(np.float32)
        self.trt_param.workspace_size = 2013265920
        yield self.create_inference_config(), (1, 3), (1e-5, 1e-5)
        self.trt_param.precision = paddle_infer.PrecisionType.Half
        program_config.set_input_type(np.float16)
        yield self.create_inference_config(), (1, 3), (1e-2, 1e-2)

    def test(self):
        self.run_test()


if __name__ == "__main__":
    unittest.main()