未验证 提交 7eeff7b1 编写于 作者: L Leo Chen 提交者: GitHub

Fix UT failure (#55360)

* Fix TRT multihead matmul UT failure
上级 fa084e5e
...@@ -20,7 +20,10 @@ cc_library( ...@@ -20,7 +20,10 @@ cc_library(
paddle_infer_contrib paddle_infer_contrib
SRCS paddle_infer_contrib.cc SRCS paddle_infer_contrib.cc
DEPS zero_copy_tensor) DEPS zero_copy_tensor)
cc_library(paddle_pass_builder SRCS paddle_pass_builder.cc) cc_library(
paddle_pass_builder
SRCS paddle_pass_builder.cc
DEPS framework_proto)
set(paddle_inference_api_deps set(paddle_inference_api_deps
reset_tensor_array reset_tensor_array
...@@ -54,8 +57,8 @@ endif() ...@@ -54,8 +57,8 @@ endif()
cc_library( cc_library(
analysis_config analysis_config
SRCS analysis_config.cc SRCS analysis_config.cc
DEPS ${mkldnn_quantizer_cfg} paddle_inference_api lod_tensor DEPS ${mkldnn_quantizer_cfg} paddle_inference_api paddle_pass_builder
paddle_pass_builder table_printer utf8proc) table_printer utf8proc)
if(WIN32) if(WIN32)
target_link_libraries(paddle_inference_api phi) target_link_libraries(paddle_inference_api phi)
......
...@@ -19,6 +19,9 @@ ...@@ -19,6 +19,9 @@
#ifdef PADDLE_WITH_HIP #ifdef PADDLE_WITH_HIP
#include <miopen/miopen.h> #include <miopen/miopen.h>
#endif #endif
#ifdef PADDLE_WITH_TENSORRT
#include "paddle/fluid/inference/tensorrt/helper.h"
#endif
#include <glog/logging.h> #include <glog/logging.h>
...@@ -103,12 +106,16 @@ const std::vector<std::string> kTRTSubgraphPasses({ ...@@ -103,12 +106,16 @@ const std::vector<std::string> kTRTSubgraphPasses({
"trt_multihead_matmul_fuse_pass_v3", // "trt_multihead_matmul_fuse_pass_v3", //
"multihead_matmul_roformer_fuse_pass", // "multihead_matmul_roformer_fuse_pass", //
"constant_folding_pass", // "constant_folding_pass", //
"trt_flash_multihead_matmul_fuse_pass", // #ifdef PADDLE_WITH_TENSORRT
"trt_cross_multihead_matmul_fuse_pass", // #if !IS_TRT_VERSION_GE(8610)
"vit_attention_fuse_pass", // "trt_flash_multihead_matmul_fuse_pass", //
"trt_qk_multihead_matmul_fuse_pass", // "trt_cross_multihead_matmul_fuse_pass", //
"layernorm_shift_partition_fuse_pass", // #endif
"merge_layernorm_fuse_pass", // #endif
"vit_attention_fuse_pass", //
"trt_qk_multihead_matmul_fuse_pass", //
"layernorm_shift_partition_fuse_pass", //
"merge_layernorm_fuse_pass", //
#if !defined _WIN32 #if !defined _WIN32
"split_layernorm_to_math_ops_pass", // "split_layernorm_to_math_ops_pass", //
#endif #endif
......
...@@ -168,7 +168,6 @@ endif() ...@@ -168,7 +168,6 @@ endif()
if(WITH_GPU AND TENSORRT_FOUND) if(WITH_GPU AND TENSORRT_FOUND)
set_tests_properties(test_trt_subgraph_pass PROPERTIES TIMEOUT 120) set_tests_properties(test_trt_subgraph_pass PROPERTIES TIMEOUT 120)
# longer timeout for trt_activation_pass for longer trt optimization time in trt 8 # longer timeout for trt_activation_pass for longer trt optimization time in trt 8
set_tests_properties(test_trt_activation_pass PROPERTIES TIMEOUT 250)
set_tests_properties(test_trt_conv_pass PROPERTIES TIMEOUT 120) set_tests_properties(test_trt_conv_pass PROPERTIES TIMEOUT 120)
#set_tests_properties(test_trt_multiclass_nms_op PROPERTIES TIMEOUT 200) #set_tests_properties(test_trt_multiclass_nms_op PROPERTIES TIMEOUT 200)
set_tests_properties(test_trt_dynamic_shape PROPERTIES TIMEOUT 120) set_tests_properties(test_trt_dynamic_shape PROPERTIES TIMEOUT 120)
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import shutil
import unittest
import numpy as np
from inference_pass_test import InferencePassTest
import paddle
import paddle.nn.functional as F
from paddle import fluid
from paddle.fluid import core
from paddle.fluid.core import AnalysisConfig, PassVersionChecker
from paddle.static import nn
class TensorRTSubgraphPassActivationTest(InferencePassTest):
def setUpTensorRTParam(self):
self.enable_trt = True
self.trt_parameters = TensorRTSubgraphPassActivationTest.TensorRTParam(
1 << 30, 32, 0, AnalysisConfig.Precision.Float32, False, False
)
def setUp(self):
self.setUpTensorRTParam()
with fluid.program_guard(self.main_program, self.startup_program):
data = paddle.static.data(
name="data", shape=[-1, 6, 32, 32], dtype="float32"
)
act_out = self.append_act(data)
out = nn.batch_norm(act_out, is_test=True)
self.feeds = {
"data": np.random.random([1, 6, 32, 32]).astype("float32"),
}
self.fetch_list = [out]
def append_act(self, x):
return F.relu(x)
def test_check_output(self):
if core.is_compiled_with_cuda():
use_gpu = True
opt_path = os.path.join(self.path, '_opt_cache')
if os.path.exists(opt_path):
shutil.rmtree(opt_path)
if (
self.trt_parameters.precision
== AnalysisConfig.Precision.Float32
):
self.check_output_with_option(use_gpu)
else:
self.check_output_with_option(use_gpu, 1e-3)
self.assertTrue(
PassVersionChecker.IsCompatible('tensorrt_subgraph_pass')
)
class TensorRTSubgraphPassLeakyReluTest(TensorRTSubgraphPassActivationTest):
def append_act(self, x):
return paddle.nn.functional.leaky_relu(x)
class TensorRTSubgraphPassRelu6Test(TensorRTSubgraphPassActivationTest):
def append_act(self, x):
return paddle.nn.functional.relu6(x)
class TensorRTSubgraphPassSoftMaxTest(TensorRTSubgraphPassActivationTest):
def append_act(self, x):
return paddle.nn.functional.softmax(x)
class TensorRTSubgraphPassSigmoidTest(TensorRTSubgraphPassActivationTest):
def append_act(self, x):
return paddle.nn.functional.sigmoid(x)
class TensorRTSubgraphPassHardSwishTest(TensorRTSubgraphPassActivationTest):
def append_act(self, x):
return paddle.nn.functional.hardswish(x)
class TensorRTSubgraphPassHardSigmoidTest(TensorRTSubgraphPassActivationTest):
def append_act(self, x):
return paddle.nn.functional.hardsigmoid(x)
class TensorRTSubgraphPassHardSwishPluginTest(
TensorRTSubgraphPassActivationTest
):
def append_act(self, x):
return paddle.nn.functional.hardswish(x)
class TensorRTSubgraphPassClipTest(TensorRTSubgraphPassActivationTest):
def append_act(self, x):
return paddle.clip(x, 0, 1)
class TensorRTSubgraphPassTanhTest(TensorRTSubgraphPassActivationTest):
def append_act(self, x):
return paddle.tanh(x)
class TensorRTSubgraphPassSwishTest(TensorRTSubgraphPassActivationTest):
def setUpTensorRTParam(self):
self.enable_trt = True
self.trt_parameters = TensorRTSubgraphPassActivationTest.TensorRTParam(
1 << 30, 32, 0, AnalysisConfig.Precision.Float32, True, False
)
def append_act(self, x):
return paddle.nn.functional.swish(x)
class TensorRTSubgraphPassSwishFp16SerializeTest(
TensorRTSubgraphPassActivationTest
):
def setUpTensorRTParam(self):
self.enable_trt = True
self.trt_parameters = TensorRTSubgraphPassActivationTest.TensorRTParam(
1 << 30, 32, 0, AnalysisConfig.Precision.Half, True, False
)
def append_act(self, x):
return paddle.nn.functional.swish(x)
class TensorRTSubgraphPassDynamicSwishFp16SerializeTest(
TensorRTSubgraphPassActivationTest
):
def setUpTensorRTParam(self):
self.enable_trt = True
self.trt_parameters = TensorRTSubgraphPassActivationTest.TensorRTParam(
1 << 30, 32, 0, AnalysisConfig.Precision.Half, True, False
)
self.dynamic_shape_params = (
TensorRTSubgraphPassActivationTest.DynamicShapeParam(
{'data': [1, 6, 8, 8]},
{'data': [1, 6, 128, 128]},
{'data': [1, 6, 64, 64]},
False,
)
)
def append_act(self, x):
return paddle.nn.functional.swish(x)
class TensorRTSubgraphPassMishTest(TensorRTSubgraphPassActivationTest):
def setUpTensorRTParam(self):
self.enable_trt = True
self.trt_parameters = TensorRTSubgraphPassActivationTest.TensorRTParam(
1 << 30, 32, 0, AnalysisConfig.Precision.Float32, True, False
)
def append_act(self, x):
return paddle.nn.functional.mish(x)
class TensorRTSubgraphPassMishFp16SerializeTest(
TensorRTSubgraphPassActivationTest
):
def setUpTensorRTParam(self):
self.enable_trt = True
self.trt_parameters = TensorRTSubgraphPassActivationTest.TensorRTParam(
1 << 30, 32, 0, AnalysisConfig.Precision.Half, True, False
)
def append_act(self, x):
return paddle.nn.functional.mish(x)
class TensorRTSubgraphPassDynamicMishFp16SerializeTest(
TensorRTSubgraphPassActivationTest
):
def setUpTensorRTParam(self):
self.enable_trt = True
self.trt_parameters = TensorRTSubgraphPassActivationTest.TensorRTParam(
1 << 30, 32, 0, AnalysisConfig.Precision.Half, False, False
)
self.dynamic_shape_params = (
TensorRTSubgraphPassActivationTest.DynamicShapeParam(
{'data': [1, 6, 8, 8]},
{'data': [1, 6, 128, 128]},
{'data': [1, 6, 64, 64]},
False,
)
)
def append_act(self, x):
return paddle.nn.functional.mish(x)
class TensorRTSubgraphPassPreluAllTest(TensorRTSubgraphPassActivationTest):
def append_act(self, x):
return paddle.static.nn.prelu(x, mode='all')
class TensorRTSubgraphPassPreluChannelTest(TensorRTSubgraphPassActivationTest):
def append_act(self, x):
return paddle.static.nn.prelu(x, mode='channel')
class TensorRTSubgraphPassPreluElementTest(TensorRTSubgraphPassActivationTest):
def append_act(self, x):
return paddle.static.nn.prelu(x, mode='element')
class TensorRTSubgraphPassPreluDynamicTest(TensorRTSubgraphPassActivationTest):
def setUpTensorRTParam(self):
self.enable_trt = True
self.trt_parameters = TensorRTSubgraphPassActivationTest.TensorRTParam(
1 << 30, 32, 0, AnalysisConfig.Precision.Float32, False, False
)
self.dynamic_shape_params = (
TensorRTSubgraphPassActivationTest.DynamicShapeParam(
{'data': [1, 6, 8, 8]},
{'data': [1, 6, 128, 128]},
{'data': [1, 6, 64, 64]},
False,
)
)
def append_act(self, x):
return paddle.static.nn.prelu(x, mode='all')
class TensorRTSubgraphPassPreluFp16Test(TensorRTSubgraphPassActivationTest):
def setUpTensorRTParam(self):
self.enable_trt = True
self.trt_parameters = TensorRTSubgraphPassActivationTest.TensorRTParam(
1 << 30, 32, 0, AnalysisConfig.Precision.Half, False, False
)
def append_act(self, x):
return paddle.static.nn.prelu(x, mode='all')
class TensorRTSubgraphPassPreluFp16SerializeTest(
TensorRTSubgraphPassActivationTest
):
def setUpTensorRTParam(self):
self.enable_trt = True
self.trt_parameters = TensorRTSubgraphPassActivationTest.TensorRTParam(
1 << 30, 32, 0, AnalysisConfig.Precision.Half, True, False
)
def append_act(self, x):
return paddle.static.nn.prelu(x, mode='all')
class TensorRTSubgraphPassPreluFp16DynamicTest(
TensorRTSubgraphPassActivationTest
):
def setUpTensorRTParam(self):
self.enable_trt = True
self.trt_parameters = TensorRTSubgraphPassActivationTest.TensorRTParam(
1 << 30, 32, 0, AnalysisConfig.Precision.Half, False, False
)
self.dynamic_shape_params = (
TensorRTSubgraphPassActivationTest.DynamicShapeParam(
{'data': [1, 6, 8, 8]},
{'data': [1, 6, 128, 128]},
{'data': [1, 6, 64, 64]},
False,
)
)
def append_act(self, x):
return paddle.static.nn.prelu(x, mode='all')
class TensorRTSubgraphPassPreluFp16DynamicSerializeTest(
TensorRTSubgraphPassActivationTest
):
def setUpTensorRTParam(self):
self.enable_trt = True
self.trt_parameters = TensorRTSubgraphPassActivationTest.TensorRTParam(
1 << 30, 32, 0, AnalysisConfig.Precision.Half, True, False
)
self.dynamic_shape_params = (
TensorRTSubgraphPassActivationTest.DynamicShapeParam(
{'data': [1, 6, 8, 8]},
{'data': [1, 6, 128, 128]},
{'data': [1, 6, 64, 64]},
False,
)
)
def append_act(self, x):
return paddle.static.nn.prelu(x, mode='all')
class TensorRTSubgraphPassGeluTest(TensorRTSubgraphPassActivationTest):
def append_act(self, x):
return paddle.nn.functional.gelu(x)
class TensorRTSubgraphPassGeluDynamicTest(TensorRTSubgraphPassActivationTest):
def setUpTensorRTParam(self):
self.enable_trt = True
self.trt_parameters = TensorRTSubgraphPassActivationTest.TensorRTParam(
1 << 30, 32, 0, AnalysisConfig.Precision.Float32, False, False
)
self.dynamic_shape_params = (
TensorRTSubgraphPassActivationTest.DynamicShapeParam(
{'data': [1, 6, 8, 8]},
{'data': [1, 6, 128, 128]},
{'data': [1, 6, 64, 64]},
False,
)
)
def append_act(self, x):
return paddle.nn.functional.gelu(x)
class TensorRTSubgraphPassGeluFp16Test(TensorRTSubgraphPassActivationTest):
def setUpTensorRTParam(self):
self.enable_trt = True
self.trt_parameters = TensorRTSubgraphPassActivationTest.TensorRTParam(
1 << 30, 32, 0, AnalysisConfig.Precision.Half, False, False
)
def append_act(self, x):
return paddle.nn.functional.gelu(x)
class TensorRTSubgraphPassGeluFp16SerializeTest(
TensorRTSubgraphPassActivationTest
):
def setUpTensorRTParam(self):
self.enable_trt = True
self.trt_parameters = TensorRTSubgraphPassActivationTest.TensorRTParam(
1 << 30, 32, 0, AnalysisConfig.Precision.Half, True, False
)
def append_act(self, x):
return paddle.nn.functional.gelu(x)
class TensorRTSubgraphPassGeluFp16DynamicTest(
TensorRTSubgraphPassActivationTest
):
def setUpTensorRTParam(self):
self.enable_trt = True
self.trt_parameters = TensorRTSubgraphPassActivationTest.TensorRTParam(
1 << 30, 32, 0, AnalysisConfig.Precision.Half, False, False
)
self.dynamic_shape_params = (
TensorRTSubgraphPassActivationTest.DynamicShapeParam(
{'data': [1, 6, 8, 8]},
{'data': [1, 6, 128, 128]},
{'data': [1, 6, 64, 64]},
False,
)
)
def append_act(self, x):
return paddle.nn.functional.gelu(x)
class TensorRTSubgraphPassGeluFp16DynamicSerializeTest(
TensorRTSubgraphPassActivationTest
):
def setUpTensorRTParam(self):
self.enable_trt = True
self.trt_parameters = TensorRTSubgraphPassActivationTest.TensorRTParam(
1 << 30, 32, 0, AnalysisConfig.Precision.Half, True, False
)
self.dynamic_shape_params = (
TensorRTSubgraphPassActivationTest.DynamicShapeParam(
{'data': [1, 6, 8, 8]},
{'data': [1, 6, 128, 128]},
{'data': [1, 6, 64, 64]},
False,
)
)
def append_act(self, x):
return paddle.nn.functional.gelu(x)
if __name__ == "__main__":
unittest.main()
...@@ -28,17 +28,14 @@ class TrtConvertMultiHeadMatmulTest(TrtLayerAutoScanTest): ...@@ -28,17 +28,14 @@ class TrtConvertMultiHeadMatmulTest(TrtLayerAutoScanTest):
return True return True
def sample_program_configs(self): def sample_program_configs(self):
def generate_input1(batch, dim1): def generate_input(shape):
return np.full((batch, dim1, 768), 1).astype(np.float32) return np.full(shape, 0.1).astype(np.float32)
def generate_input2(shape): def generate_weight(shape):
return np.full(shape, 1).astype(np.float32) return (
np.random.rand(*shape).astype(np.float32).round(decimals=1) / 5
def generate_weight1(): - 0.1
return np.full((768, 768), 0.1).astype(np.float32) )
def generate_weight2():
return np.full((768), 0.1).astype(np.float32)
for batch in [1, 4]: for batch in [1, 4]:
self.batch = batch self.batch = batch
...@@ -303,36 +300,50 @@ class TrtConvertMultiHeadMatmulTest(TrtLayerAutoScanTest): ...@@ -303,36 +300,50 @@ class TrtConvertMultiHeadMatmulTest(TrtLayerAutoScanTest):
ops=ops, ops=ops,
weights={ weights={
"mul1_weight": TensorConfig( "mul1_weight": TensorConfig(
data_gen=partial(generate_weight1) data_gen=partial(
generate_weight, (768, 768)
)
), ),
"mul2_weight": TensorConfig( "mul2_weight": TensorConfig(
data_gen=partial(generate_weight1) data_gen=partial(
generate_weight, (768, 768)
)
), ),
"mul3_weight": TensorConfig( "mul3_weight": TensorConfig(
data_gen=partial(generate_weight1) data_gen=partial(
generate_weight, (768, 768)
)
), ),
"mul4_weight": TensorConfig( "mul4_weight": TensorConfig(
data_gen=partial(generate_weight1) data_gen=partial(
generate_weight, (768, 768)
)
), ),
"elementwise_add1_weight": TensorConfig( "elementwise_add1_weight": TensorConfig(
data_gen=partial(generate_weight2) data_gen=partial(
generate_weight, (768,)
)
), ),
"elementwise_add2_weight": TensorConfig( "elementwise_add2_weight": TensorConfig(
data_gen=partial(generate_weight2) data_gen=partial(
generate_weight, (768,)
)
), ),
"elementwise_add3_weight": TensorConfig( "elementwise_add3_weight": TensorConfig(
data_gen=partial(generate_weight2) data_gen=partial(
generate_weight, (768,)
)
), ),
}, },
inputs={ inputs={
"input_data1": TensorConfig( "input_data1": TensorConfig(
data_gen=partial( data_gen=partial(
generate_input1, batch, dim1 generate_input, (batch, dim1, 768)
) )
), ),
"input_data2": TensorConfig( "input_data2": TensorConfig(
data_gen=partial( data_gen=partial(
generate_input2, input2_shape generate_input, input2_shape
) )
), ),
}, },
...@@ -375,9 +386,9 @@ class TrtConvertMultiHeadMatmulTest(TrtLayerAutoScanTest): ...@@ -375,9 +386,9 @@ class TrtConvertMultiHeadMatmulTest(TrtLayerAutoScanTest):
generate_dynamic_shape(attrs) generate_dynamic_shape(attrs)
self.trt_param.precision = paddle_infer.PrecisionType.Float32 self.trt_param.precision = paddle_infer.PrecisionType.Float32
self.trt_param.workspace_size = 2013265920 self.trt_param.workspace_size = 2013265920
yield self.create_inference_config(), (1, 3), (1e-5, 1e-4) yield self.create_inference_config(), (1, 3), (1e-5, 1e-5)
self.trt_param.precision = paddle_infer.PrecisionType.Half self.trt_param.precision = paddle_infer.PrecisionType.Half
yield self.create_inference_config(), (1, 3), (1e-3, 1e-2) yield self.create_inference_config(), (1, 3), (1e-2, 1e-2)
def test(self): def test(self):
self.run_test() self.run_test()
...@@ -385,17 +396,14 @@ class TrtConvertMultiHeadMatmulTest(TrtLayerAutoScanTest): ...@@ -385,17 +396,14 @@ class TrtConvertMultiHeadMatmulTest(TrtLayerAutoScanTest):
class TrtConvertMultiHeadMatmulTestInt8(TrtConvertMultiHeadMatmulTest): class TrtConvertMultiHeadMatmulTestInt8(TrtConvertMultiHeadMatmulTest):
def sample_program_configs(self): def sample_program_configs(self):
def generate_input1(batch, dim1): def generate_input(shape):
return np.full((batch, dim1, 768), 1).astype(np.float32) return np.full(shape, 0.1).astype(np.float32)
def generate_input2(shape): def generate_weight(shape):
return np.full(shape, 1).astype(np.float32) return (
np.random.rand(*shape).astype(np.float32).round(decimals=1) / 5
def generate_weight1(): - 0.1
return np.full((768, 768), 0.1).astype(np.float32) )
def generate_weight2():
return np.full((768), 0.1).astype(np.float32)
for batch in [4]: for batch in [4]:
self.batch = batch self.batch = batch
...@@ -684,36 +692,50 @@ class TrtConvertMultiHeadMatmulTestInt8(TrtConvertMultiHeadMatmulTest): ...@@ -684,36 +692,50 @@ class TrtConvertMultiHeadMatmulTestInt8(TrtConvertMultiHeadMatmulTest):
ops=ops, ops=ops,
weights={ weights={
"mul1_weight": TensorConfig( "mul1_weight": TensorConfig(
data_gen=partial(generate_weight1) data_gen=partial(
generate_weight, (768, 768)
)
), ),
"mul2_weight": TensorConfig( "mul2_weight": TensorConfig(
data_gen=partial(generate_weight1) data_gen=partial(
generate_weight, (768, 768)
)
), ),
"mul3_weight": TensorConfig( "mul3_weight": TensorConfig(
data_gen=partial(generate_weight1) data_gen=partial(
generate_weight, (768, 768)
)
), ),
"mul4_weight": TensorConfig( "mul4_weight": TensorConfig(
data_gen=partial(generate_weight1) data_gen=partial(
generate_weight, (768, 768)
)
), ),
"elementwise_add1_weight": TensorConfig( "elementwise_add1_weight": TensorConfig(
data_gen=partial(generate_weight2) data_gen=partial(
generate_weight, (768,)
)
), ),
"elementwise_add2_weight": TensorConfig( "elementwise_add2_weight": TensorConfig(
data_gen=partial(generate_weight2) data_gen=partial(
generate_weight, (768,)
)
), ),
"elementwise_add3_weight": TensorConfig( "elementwise_add3_weight": TensorConfig(
data_gen=partial(generate_weight2) data_gen=partial(
generate_weight, (768,)
)
), ),
}, },
inputs={ inputs={
"input_data1": TensorConfig( "input_data1": TensorConfig(
data_gen=partial( data_gen=partial(
generate_input1, batch, dim1 generate_input, (batch, dim1, 768)
) )
), ),
"input_data2": TensorConfig( "input_data2": TensorConfig(
data_gen=partial( data_gen=partial(
generate_input2, input2_shape generate_input, input2_shape
) )
), ),
}, },
...@@ -731,11 +753,11 @@ class TrtConvertVitToMultiHeadMatmulTest(TrtLayerAutoScanTest): ...@@ -731,11 +753,11 @@ class TrtConvertVitToMultiHeadMatmulTest(TrtLayerAutoScanTest):
def generate_input1(batch, length): def generate_input1(batch, length):
return np.full((batch, length, 768), 0.1).astype(np.float32) return np.full((batch, length, 768), 0.1).astype(np.float32)
def generate_weight1(): def generate_weight(shape):
return np.full((768, 2304), 0.1).astype(np.float32) return (
np.random.rand(*shape).astype(np.float32).round(decimals=1) / 5
def generate_weight2(): - 0.1
return np.full((2304), 0.1).astype(np.float32) )
for batch in [4]: for batch in [4]:
self.batch = batch self.batch = batch
...@@ -911,10 +933,10 @@ class TrtConvertVitToMultiHeadMatmulTest(TrtLayerAutoScanTest): ...@@ -911,10 +933,10 @@ class TrtConvertVitToMultiHeadMatmulTest(TrtLayerAutoScanTest):
ops=ops, ops=ops,
weights={ weights={
"matmul1_weight": TensorConfig( "matmul1_weight": TensorConfig(
data_gen=partial(generate_weight1) data_gen=partial(generate_weight, (768, 2304))
), ),
"elementwise_add1_weight": TensorConfig( "elementwise_add1_weight": TensorConfig(
data_gen=partial(generate_weight2) data_gen=partial(generate_weight, (2304,))
), ),
}, },
inputs={ inputs={
...@@ -985,20 +1007,14 @@ class TrtConvertMultiHeadMatmulTest_biasqk_seqseq(TrtLayerAutoScanTest): ...@@ -985,20 +1007,14 @@ class TrtConvertMultiHeadMatmulTest_biasqk_seqseq(TrtLayerAutoScanTest):
return True return True
def sample_program_configs(self): def sample_program_configs(self):
def generate_input1(batch, dim1): def generate_input(shape):
return np.full((batch, dim1, 768), 1).astype(np.float32) return np.full(shape, 0.1).astype(np.float32)
def generate_input2(shape):
return np.full(shape, 1).astype(np.float32)
def generate_weight1():
return np.full((768, 768), 0.1).astype(np.float32)
def generate_weight2(): def generate_weight(shape):
return np.full((768), 0.1).astype(np.float32) return (
np.random.rand(*shape).astype(np.float32).round(decimals=1) / 5
def generate_weight3(): - 0.1
return np.full((768, 768), 0.1).astype(np.float32) )
for batch in [2]: for batch in [2]:
self.batch = batch self.batch = batch
...@@ -1263,36 +1279,50 @@ class TrtConvertMultiHeadMatmulTest_biasqk_seqseq(TrtLayerAutoScanTest): ...@@ -1263,36 +1279,50 @@ class TrtConvertMultiHeadMatmulTest_biasqk_seqseq(TrtLayerAutoScanTest):
ops=ops, ops=ops,
weights={ weights={
"mul1_weight": TensorConfig( "mul1_weight": TensorConfig(
data_gen=partial(generate_weight1) data_gen=partial(
generate_weight, (768, 768)
)
), ),
"mul2_weight": TensorConfig( "mul2_weight": TensorConfig(
data_gen=partial(generate_weight1) data_gen=partial(
generate_weight, (768, 768)
)
), ),
"mul3_weight": TensorConfig( "mul3_weight": TensorConfig(
data_gen=partial(generate_weight1) data_gen=partial(
generate_weight, (768, 768)
)
), ),
"mul4_weight": TensorConfig( "mul4_weight": TensorConfig(
data_gen=partial(generate_weight1) data_gen=partial(
generate_weight, (768, 768)
)
), ),
"elementwise_add1_weight": TensorConfig( "elementwise_add1_weight": TensorConfig(
data_gen=partial(generate_weight2) data_gen=partial(
generate_weight, (768,)
)
), ),
"elementwise_add2_weight": TensorConfig( "elementwise_add2_weight": TensorConfig(
data_gen=partial(generate_weight3) data_gen=partial(
generate_weight, (768, 768)
)
), ),
"elementwise_add3_weight": TensorConfig( "elementwise_add3_weight": TensorConfig(
data_gen=partial(generate_weight2) data_gen=partial(
generate_weight, (768,)
)
), ),
}, },
inputs={ inputs={
"input_data1": TensorConfig( "input_data1": TensorConfig(
data_gen=partial( data_gen=partial(
generate_input1, batch, dim1 generate_input, (batch, dim1, 768)
) )
), ),
"input_data2": TensorConfig( "input_data2": TensorConfig(
data_gen=partial( data_gen=partial(
generate_input2, input2_shape generate_input, input2_shape
) )
), ),
}, },
...@@ -1335,9 +1365,9 @@ class TrtConvertMultiHeadMatmulTest_biasqk_seqseq(TrtLayerAutoScanTest): ...@@ -1335,9 +1365,9 @@ class TrtConvertMultiHeadMatmulTest_biasqk_seqseq(TrtLayerAutoScanTest):
generate_dynamic_shape(attrs) generate_dynamic_shape(attrs)
self.trt_param.precision = paddle_infer.PrecisionType.Float32 self.trt_param.precision = paddle_infer.PrecisionType.Float32
self.trt_param.workspace_size = 2013265920 self.trt_param.workspace_size = 2013265920
yield self.create_inference_config(), (1, 3), (1e-5, 1e-4) yield self.create_inference_config(), (1, 3), (1e-5, 1e-5)
self.trt_param.precision = paddle_infer.PrecisionType.Half self.trt_param.precision = paddle_infer.PrecisionType.Half
yield self.create_inference_config(), (1, 3), (1e-3, 1e-2) yield self.create_inference_config(), (1, 3), (1e-2, 1e-2)
def test(self): def test(self):
self.run_test() self.run_test()
......
...@@ -23,7 +23,9 @@ from paddle import fluid ...@@ -23,7 +23,9 @@ from paddle import fluid
class TestMathOpPatches(unittest.TestCase): class TestMathOpPatches(unittest.TestCase):
@classmethod
def setUp(self): def setUp(self):
np.random.seed(1024)
paddle.enable_static() paddle.enable_static()
@prog_scope() @prog_scope()
......
...@@ -591,7 +591,7 @@ class TestPostTrainingAvgONNXFormatForMobilenetv1TensorRT( ...@@ -591,7 +591,7 @@ class TestPostTrainingAvgONNXFormatForMobilenetv1TensorRT(
): ):
def test_post_training_onnx_format_mobilenetv1_tensorrt(self): def test_post_training_onnx_format_mobilenetv1_tensorrt(self):
model = "MobileNet-V1" model = "MobileNet-V1"
algo = "avg" algo = "KL"
round_type = "round" round_type = "round"
data_urls = [ data_urls = [
'https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/MobileNetV1_infer.tar' 'https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/MobileNetV1_infer.tar'
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册