diff --git a/paddle/fluid/inference/tensorrt/convert/multihead_matmul_op.cc b/paddle/fluid/inference/tensorrt/convert/multihead_matmul_op.cc index 0a238eadd95c9b7a09e1eecd5bb959804547ba7f..2ae972729f5e166c3a698fdca241fdbc6dac3c39 100644 --- a/paddle/fluid/inference/tensorrt/convert/multihead_matmul_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/multihead_matmul_op.cc @@ -400,7 +400,8 @@ class MultiheadMatMulOpConverter : public OpConverter { } } else { if (input_dims.d[1] <= 384 && !bias_qk_attr && - engine_->precision() != AnalysisConfig::Precision::kFloat32) { + engine_->precision() != AnalysisConfig::Precision::kFloat32 && + platform::GetGPUComputeCapability(0) >= 75) { /* * input_dims.d[0]: batch(-1) * input_dims.d[1]: length:256