未验证 提交 cb7f736f 编写于 作者: W Wangzheee 提交者: GitHub

[Paddle Inference]fix some transformer unitest (#48929)

* fix some transformer unitest
上级 0db36aca
...@@ -67,7 +67,8 @@ class MatMulOpConverter : public OpConverter { ...@@ -67,7 +67,8 @@ class MatMulOpConverter : public OpConverter {
if (op_desc.HasAttr("support_int8") && if (op_desc.HasAttr("support_int8") &&
PADDLE_GET_CONST(bool, op_desc.GetAttr("support_int8")) && PADDLE_GET_CONST(bool, op_desc.GetAttr("support_int8")) &&
engine_->precision() == AnalysisConfig::Precision::kInt8 && engine_->precision() == AnalysisConfig::Precision::kInt8 &&
platform::GetGPUComputeCapability(0) >= 75) { platform::GetGPUComputeCapability(platform::GetCurrentDeviceId()) >=
75) {
if (engine_->with_dynamic_shape()) { if (engine_->with_dynamic_shape()) {
VLOG(3) << "Convert a fluid matmul_op_int8_dynamic to TensorRT " VLOG(3) << "Convert a fluid matmul_op_int8_dynamic to TensorRT "
"MatmulPluginLayer"; "MatmulPluginLayer";
......
...@@ -88,11 +88,10 @@ class MultiheadMatMulOpConverter : public OpConverter { ...@@ -88,11 +88,10 @@ class MultiheadMatMulOpConverter : public OpConverter {
engine_->tensorrt_transformer_posid() != "" && engine_->tensorrt_transformer_posid() != "" &&
engine_->tensorrt_transformer_maskid() != ""; engine_->tensorrt_transformer_maskid() != "";
if (engine_->with_dynamic_shape()) { if (engine_->with_dynamic_shape()) {
if (engine_->tensorrt_transformer_maskid() != "") { if (engine_->tensorrt_transformer_maskid() != "" &&
if (engine_->precision() == AnalysisConfig::Precision::kFloat32) { engine_->precision() != AnalysisConfig::Precision::kFloat32 &&
PADDLE_THROW(platform::errors::Fatal( platform::GetGPUComputeCapability(platform::GetCurrentDeviceId()) >=
"use use_varseqlen must be int8 or half, not float32.")); 75) {
}
nvinfer1::Weights weight{nvinfer1::DataType::kFLOAT, nvinfer1::Weights weight{nvinfer1::DataType::kFLOAT,
static_cast<void*>(weight_data), static_cast<void*>(weight_data),
static_cast<int32_t>(weight_t->numel())}; static_cast<int32_t>(weight_t->numel())};
...@@ -401,7 +400,8 @@ class MultiheadMatMulOpConverter : public OpConverter { ...@@ -401,7 +400,8 @@ class MultiheadMatMulOpConverter : public OpConverter {
} else { } else {
if (input_dims.d[1] <= 384 && !bias_qk_attr && if (input_dims.d[1] <= 384 && !bias_qk_attr &&
engine_->precision() != AnalysisConfig::Precision::kFloat32 && engine_->precision() != AnalysisConfig::Precision::kFloat32 &&
platform::GetGPUComputeCapability(0) >= 75) { platform::GetGPUComputeCapability(platform::GetCurrentDeviceId()) >=
75) {
/* /*
* input_dims.d[0]: batch(-1) * input_dims.d[0]: batch(-1)
* input_dims.d[1]: length:256 * input_dims.d[1]: length:256
......
...@@ -39,12 +39,12 @@ list( ...@@ -39,12 +39,12 @@ list(
generic_plugin.cu generic_plugin.cu
lookup_table.cu lookup_table.cu
many_emb_layernorm_plugin.cu many_emb_layernorm_plugin.cu
many_emb_Layernorm_kernel.cu) many_emb_layernorm_kernel.cu)
if(${TENSORRT_MAJOR_VERSION} GREATER_EQUAL 7) if(${TENSORRT_MAJOR_VERSION} GREATER_EQUAL 7)
list(APPEND TRT_FILES many_emb_layernorm_varseqlen_plugin.cu list(APPEND TRT_FILES many_emb_layernorm_varseqlen_plugin.cu
many_emb_Layernorm_varseqlen_kernel_mtron.cu many_emb_layernorm_varseqlen_kernel_mtron.cu
many_emb_Layernorm_varseqlen_kernel_hface.cu) many_emb_layernorm_varseqlen_kernel_hface.cu)
endif() endif()
if(CUSPARSELT_FOUND AND ${TENSORRT_MAJOR_VERSION} GREATER_EQUAL 8) if(CUSPARSELT_FOUND AND ${TENSORRT_MAJOR_VERSION} GREATER_EQUAL 8)
......
...@@ -139,6 +139,8 @@ static void trt_ernie(bool with_fp16, std::vector<float> result) { ...@@ -139,6 +139,8 @@ static void trt_ernie(bool with_fp16, std::vector<float> result) {
config.EnableTensorRtEngine(1 << 30, 1, 5, precision, true, false); config.EnableTensorRtEngine(1 << 30, 1, 5, precision, true, false);
config.SetTRTDynamicShapeInfo( config.SetTRTDynamicShapeInfo(
min_input_shape, max_input_shape, opt_input_shape); min_input_shape, max_input_shape, opt_input_shape);
paddle_infer::experimental::InternalUtils::SetTransformerMaskid(
&config, "read_file_0.tmp_4");
AnalysisConfig* config_deser = new AnalysisConfig(config); AnalysisConfig* config_deser = new AnalysisConfig(config);
std::vector<float> out_data; std::vector<float> out_data;
......
...@@ -133,6 +133,8 @@ void trt_ernie(bool with_fp16, ...@@ -133,6 +133,8 @@ void trt_ernie(bool with_fp16,
config.EnableTensorRtEngine(1 << 30, 1, 5, precision, false, false); config.EnableTensorRtEngine(1 << 30, 1, 5, precision, false, false);
config.SetTRTDynamicShapeInfo( config.SetTRTDynamicShapeInfo(
min_input_shape, max_input_shape, opt_input_shape); min_input_shape, max_input_shape, opt_input_shape);
paddle_infer::experimental::InternalUtils::SetTransformerMaskid(
&config, "read_file_0.tmp_4");
std::vector<float> out_data; std::vector<float> out_data;
run(config, &out_data, batch_size); run(config, &out_data, batch_size);
...@@ -423,7 +425,7 @@ void run(paddle_infer::Predictor* predictor, std::vector<float>* out_data) { ...@@ -423,7 +425,7 @@ void run(paddle_infer::Predictor* predictor, std::vector<float>* out_data) {
TEST(AnalysisPredictor, ernie_varlen) { TEST(AnalysisPredictor, ernie_varlen) {
#if IS_TRT_VERSION_GE(7234) #if IS_TRT_VERSION_GE(7234)
if (platform::GetGPUComputeCapability(0) >= 75) { if (platform::GetGPUComputeCapability(platform::GetCurrentDeviceId()) >= 75) {
auto predictor = InitPredictor(); auto predictor = InitPredictor();
std::vector<float> out_data; std::vector<float> out_data;
run(predictor.get(), &out_data); run(predictor.get(), &out_data);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册