未验证 提交 cb7f736f 编写于 作者: W Wangzheee 提交者: GitHub

[Paddle Inference]fix some transformer unitest (#48929)

* fix some transformer unitest
上级 0db36aca
......@@ -67,7 +67,8 @@ class MatMulOpConverter : public OpConverter {
if (op_desc.HasAttr("support_int8") &&
PADDLE_GET_CONST(bool, op_desc.GetAttr("support_int8")) &&
engine_->precision() == AnalysisConfig::Precision::kInt8 &&
platform::GetGPUComputeCapability(0) >= 75) {
platform::GetGPUComputeCapability(platform::GetCurrentDeviceId()) >=
75) {
if (engine_->with_dynamic_shape()) {
VLOG(3) << "Convert a fluid matmul_op_int8_dynamic to TensorRT "
"MatmulPluginLayer";
......
......@@ -88,11 +88,10 @@ class MultiheadMatMulOpConverter : public OpConverter {
engine_->tensorrt_transformer_posid() != "" &&
engine_->tensorrt_transformer_maskid() != "";
if (engine_->with_dynamic_shape()) {
if (engine_->tensorrt_transformer_maskid() != "") {
if (engine_->precision() == AnalysisConfig::Precision::kFloat32) {
PADDLE_THROW(platform::errors::Fatal(
"use use_varseqlen must be int8 or half, not float32."));
}
if (engine_->tensorrt_transformer_maskid() != "" &&
engine_->precision() != AnalysisConfig::Precision::kFloat32 &&
platform::GetGPUComputeCapability(platform::GetCurrentDeviceId()) >=
75) {
nvinfer1::Weights weight{nvinfer1::DataType::kFLOAT,
static_cast<void*>(weight_data),
static_cast<int32_t>(weight_t->numel())};
......@@ -401,7 +400,8 @@ class MultiheadMatMulOpConverter : public OpConverter {
} else {
if (input_dims.d[1] <= 384 && !bias_qk_attr &&
engine_->precision() != AnalysisConfig::Precision::kFloat32 &&
platform::GetGPUComputeCapability(0) >= 75) {
platform::GetGPUComputeCapability(platform::GetCurrentDeviceId()) >=
75) {
/*
* input_dims.d[0]: batch(-1)
* input_dims.d[1]: length:256
......
......@@ -39,12 +39,12 @@ list(
generic_plugin.cu
lookup_table.cu
many_emb_layernorm_plugin.cu
many_emb_Layernorm_kernel.cu)
many_emb_layernorm_kernel.cu)
if(${TENSORRT_MAJOR_VERSION} GREATER_EQUAL 7)
list(APPEND TRT_FILES many_emb_layernorm_varseqlen_plugin.cu
many_emb_Layernorm_varseqlen_kernel_mtron.cu
many_emb_Layernorm_varseqlen_kernel_hface.cu)
many_emb_layernorm_varseqlen_kernel_mtron.cu
many_emb_layernorm_varseqlen_kernel_hface.cu)
endif()
if(CUSPARSELT_FOUND AND ${TENSORRT_MAJOR_VERSION} GREATER_EQUAL 8)
......
......@@ -139,6 +139,8 @@ static void trt_ernie(bool with_fp16, std::vector<float> result) {
config.EnableTensorRtEngine(1 << 30, 1, 5, precision, true, false);
config.SetTRTDynamicShapeInfo(
min_input_shape, max_input_shape, opt_input_shape);
paddle_infer::experimental::InternalUtils::SetTransformerMaskid(
&config, "read_file_0.tmp_4");
AnalysisConfig* config_deser = new AnalysisConfig(config);
std::vector<float> out_data;
......
......@@ -133,6 +133,8 @@ void trt_ernie(bool with_fp16,
config.EnableTensorRtEngine(1 << 30, 1, 5, precision, false, false);
config.SetTRTDynamicShapeInfo(
min_input_shape, max_input_shape, opt_input_shape);
paddle_infer::experimental::InternalUtils::SetTransformerMaskid(
&config, "read_file_0.tmp_4");
std::vector<float> out_data;
run(config, &out_data, batch_size);
......@@ -423,7 +425,7 @@ void run(paddle_infer::Predictor* predictor, std::vector<float>* out_data) {
TEST(AnalysisPredictor, ernie_varlen) {
#if IS_TRT_VERSION_GE(7234)
if (platform::GetGPUComputeCapability(0) >= 75) {
if (platform::GetGPUComputeCapability(platform::GetCurrentDeviceId()) >= 75) {
auto predictor = InitPredictor();
std::vector<float> out_data;
run(predictor.get(), &out_data);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册