未验证 提交 e8789c11 编写于 作者: P Peihan 提交者: GitHub

support ernie-int8 test and prune op attribute test (#35890)

* support ernie-int8 test and prune op attribute test

* remove using and use namespace

* remove macro and use shell instead

* Revert "remove macro and use shell instead"

This reverts commit 615964b149d7de7825b341936b42be22a4bc0091.

* fix grammar error

* fix shell error
上级 7ebbcbbc
...@@ -93,20 +93,42 @@ if (USE_TENSORRT AND WITH_GPU) ...@@ -93,20 +93,42 @@ if (USE_TENSORRT AND WITH_GPU)
file(READ ${TENSORRT_INCLUDE_DIR}/NvInfer.h TENSORRT_VERSION_FILE_CONTENTS) file(READ ${TENSORRT_INCLUDE_DIR}/NvInfer.h TENSORRT_VERSION_FILE_CONTENTS)
string(REGEX MATCH "define NV_TENSORRT_MAJOR +([0-9]+)" TENSORRT_MAJOR_VERSION string(REGEX MATCH "define NV_TENSORRT_MAJOR +([0-9]+)" TENSORRT_MAJOR_VERSION
"${TENSORRT_VERSION_FILE_CONTENTS}") "${TENSORRT_VERSION_FILE_CONTENTS}")
string(REGEX MATCH "define NV_TENSORRT_MINOR +([0-9]+)" TENSORRT_MINOR_VERSION
"${TENSORRT_VERSION_FILE_CONTENTS}")
string(REGEX MATCH "define NV_TENSORRT_PATCH +([0-9]+)" TENSORRT_PATCH_VERSION
"${TENSORRT_VERSION_FILE_CONTENTS}")
string(REGEX MATCH "define NV_TENSORRT_BUILD +([0-9]+)" TENSORRT_BUILD_VERSION
"${TENSORRT_VERSION_FILE_CONTENTS}")
if("${TENSORRT_MAJOR_VERSION}" STREQUAL "") if("${TENSORRT_MAJOR_VERSION}" STREQUAL "")
file(READ ${TENSORRT_INCLUDE_DIR}/NvInferVersion.h TENSORRT_VERSION_FILE_CONTENTS) file(READ ${TENSORRT_INCLUDE_DIR}/NvInferVersion.h TENSORRT_VERSION_FILE_CONTENTS)
string(REGEX MATCH "define NV_TENSORRT_MAJOR +([0-9]+)" TENSORRT_MAJOR_VERSION string(REGEX MATCH "define NV_TENSORRT_MAJOR +([0-9]+)" TENSORRT_MAJOR_VERSION
"${TENSORRT_VERSION_FILE_CONTENTS}") "${TENSORRT_VERSION_FILE_CONTENTS}")
string(REGEX MATCH "define NV_TENSORRT_MINOR +([0-9]+)" TENSORRT_MINOR_VERSION
"${TENSORRT_VERSION_FILE_CONTENTS}")
string(REGEX MATCH "define NV_TENSORRT_PATCH +([0-9]+)" TENSORRT_PATCH_VERSION
"${TENSORRT_VERSION_FILE_CONTENTS}")
string(REGEX MATCH "define NV_TENSORRT_BUILD +([0-9]+)" TENSORRT_BUILD_VERSION
"${TENSORRT_VERSION_FILE_CONTENTS}")
endif() endif()
if("${TENSORRT_MAJOR_VERSION}" STREQUAL "") if("${TENSORRT_MAJOR_VERSION}" STREQUAL "")
message(SEND_ERROR "Failed to detect TensorRT version.") message(SEND_ERROR "Failed to detect TensorRT version.")
endif() endif()
string(REGEX REPLACE "define NV_TENSORRT_MAJOR +([0-9]+)" "\\1" string(REGEX REPLACE "define NV_TENSORRT_MAJOR +([0-9]+)" "\\1"
TENSORRT_MAJOR_VERSION "${TENSORRT_MAJOR_VERSION}") TENSORRT_MAJOR_VERSION "${TENSORRT_MAJOR_VERSION}")
string(REGEX REPLACE "define NV_TENSORRT_MINOR +([0-9]+)" "\\1"
TENSORRT_MINOR_VERSION "${TENSORRT_MINOR_VERSION}")
string(REGEX REPLACE "define NV_TENSORRT_PATCH +([0-9]+)" "\\1"
TENSORRT_PATCH_VERSION "${TENSORRT_PATCH_VERSION}")
string(REGEX REPLACE "define NV_TENSORRT_BUILD +([0-9]+)" "\\1"
TENSORRT_BUILD_VERSION "${TENSORRT_BUILD_VERSION}")
message(STATUS "Current TensorRT header is ${TENSORRT_INCLUDE_DIR}/NvInfer.h. " message(STATUS "Current TensorRT header is ${TENSORRT_INCLUDE_DIR}/NvInfer.h. "
"Current TensorRT version is v${TENSORRT_MAJOR_VERSION}. ") "Current TensorRT version is v${TENSORRT_MAJOR_VERSION}.${TENSORRT_MINOR_VERSION}.${TENSORRT_PATCH_VERSION}.${TENSORRT_BUILD_VERSION} ")
include_directories("${TENSORRT_INCLUDE_DIR}") include_directories("${TENSORRT_INCLUDE_DIR}")
link_directories("${TENSORRT_LIB_DIR}") link_directories("${TENSORRT_LIB_DIR}")
add_compile_definitions(NV_TENSORRT_MAJOR=${TENSORRT_MAJOR_VERSION})
add_compile_definitions(NV_TENSORRT_MINOR=${TENSORRT_MINOR_VERSION})
add_compile_definitions(NV_TENSORRT_PATCH=${TENSORRT_PATCH_VERSION})
add_compile_definitions(NV_TENSORRT_BUILD=${TENSORRT_BUILD_VERSION})
endif() endif()
if(WITH_MKL) if(WITH_MKL)
......
...@@ -115,6 +115,20 @@ for model_name in $unknown_download_list; do ...@@ -115,6 +115,20 @@ for model_name in $unknown_download_list; do
download $url_prefix $model_name download $url_prefix $model_name
done done
# ernie int8 quant with matmul
unknown_nlp_download_list='quant_post_model_xnli_predict_matmul'
for model_name in $unknown_nlp_download_list; do
url_prefix="https://paddle-qa.bj.bcebos.com/inference_model/unknown/nlp"
download $url_prefix $model_name
done
# mobilnetv1 with prune op attribute
dev_class_download_list='MobileNetV1'
for model_name in $dev_class_download_list; do
url_prefix="https://paddle-qa.bj.bcebos.com/inference_model/2021-09-16/class"
download $url_prefix $model_name
done
function compile_test() { function compile_test() {
mkdir -p ${build_dir} mkdir -p ${build_dir}
cd ${build_dir} cd ${build_dir}
...@@ -255,6 +269,31 @@ if [ $? -ne 0 ]; then ...@@ -255,6 +269,31 @@ if [ $? -ne 0 ]; then
EXIT_CODE=8 EXIT_CODE=8
fi fi
printf "${YELLOW} start test_ernie_xnli_int8 ${NC} \n";
compile_test "test_ernie_xnli_int8"
ernie_qat_model="quant_post_model_xnli_predict_matmul"
${exe_dir}/test_ernie_xnli_int8 \
--modeldir=$DATA_DIR/$ernie_qat_model/$ernie_qat_model \
--datadir=$DATA_DIR/$ernie_qat_model/$ernie_qat_model/xnli_var_len \
--truth_data=$DATA_DIR/$ernie_qat_model/$ernie_qat_model/truth_data \
--gtest_filter=${test_suite_list} \
--gtest_output=xml:${log_dir}/test_ernie_xnli_int8.xml
if [ $? -ne 0 ]; then
echo "${RED} test_ernie_xnli_int8 runs failed ${NC}" >> ${exe_dir}/test_summary.txt
EXIT_CODE=8
fi
printf "${YELLOW} start test_mobilnetv1 ${NC} \n";
compile_test "test_mobilnetv1"
${exe_dir}/test_mobilnetv1 \
--modeldir=$DATA_DIR/MobileNetV1/MobileNetV1 \
--gtest_filter=${test_suite_list} \
--gtest_output=xml:${log_dir}/test_mobilnetv1.xml
if [ $? -ne 0 ]; then
echo "${RED} test_mobilnetv1 runs failed ${NC}" >> ${exe_dir}/test_summary.txt
EXIT_CODE=8
fi
set +x set +x
test_suites=$(echo ${test_suite_list} | sed 's/:/ /g') test_suites=$(echo ${test_suite_list} | sed 's/:/ /g')
......
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "test_helper.h" // NOLINT
#include "test_suite.h" // NOLINT
DEFINE_string(modeldir, "", "Directory of the inference model.");
DEFINE_string(datadir, "", "dataset.");
DEFINE_string(truth_data, "", "Directory of the inference data truth result");
namespace paddle_infer {
std::shared_ptr<Predictor> InitPredictor() {
Config config;
config.SetModel(FLAGS_modeldir + "/__model__",
FLAGS_modeldir + "/__params__");
config.EnableUseGpu(1000, 0);
// Open the memory optim.
config.EnableMemoryOptim();
int max_batch = 32;
int max_single_seq_len = 128;
int opt_single_seq_len = 64;
int min_batch_seq_len = 1;
int max_batch_seq_len = 512;
int opt_batch_seq_len = 256;
std::string input_name0 = "eval_placeholder_0";
std::string input_name1 = "eval_placeholder_1";
std::string input_name2 = "eval_placeholder_2";
std::string input_name3 = "eval_placeholder_3";
std::vector<int> min_shape = {min_batch_seq_len};
std::vector<int> max_shape = {max_batch_seq_len};
std::vector<int> opt_shape = {opt_batch_seq_len};
// Set the input's min, max, opt shape
std::map<std::string, std::vector<int>> min_input_shape = {
{input_name0, min_shape},
{input_name1, min_shape},
{input_name2, {1}},
{input_name3, {1, min_batch_seq_len, 1}}};
std::map<std::string, std::vector<int>> max_input_shape = {
{input_name0, max_shape},
{input_name1, max_shape},
{input_name2, {max_batch + 1}},
{input_name3, {1, max_single_seq_len, 1}}};
std::map<std::string, std::vector<int>> opt_input_shape = {
{input_name0, opt_shape},
{input_name1, opt_shape},
{input_name2, {max_batch + 1}},
{input_name3, {1, opt_single_seq_len, 1}}};
// only kHalf supported
config.EnableTensorRtEngine(1 << 30, 1, 5, Config::Precision::kInt8, false,
false);
// erinie varlen must be used with dynamic shape
config.SetTRTDynamicShapeInfo(min_input_shape, max_input_shape,
opt_input_shape);
// erinie varlen must be used with oss
config.EnableTensorRtOSS();
return CreatePredictor(config);
}
// Parse tensor from string
template <typename T>
std::vector<T> ParseTensor(const std::string &field) {
std::string mat_str = field;
std::vector<T> mat;
paddle::test::Split(mat_str, ' ', &mat);
return mat;
}
void run(Predictor *predictor, std::vector<float> *out_data) {
clock_t start, end;
start = clock();
CHECK(predictor->Run());
end = clock();
auto output_names = predictor->GetOutputNames();
auto output_t = predictor->GetOutputHandle(output_names[0]);
std::vector<int> output_shape = output_t->shape();
int out_num = std::accumulate(output_shape.begin(), output_shape.end(), 1,
std::multiplies<int>());
out_data->resize(out_num);
output_t->CopyToCpu(out_data->data());
return;
}
auto PrepareOutput(std::string input_file) -> std::deque<float> {
std::ifstream fin(input_file);
std::string line;
std::vector<std::string> buffer;
while (std::getline(fin, line)) {
buffer.emplace_back(line);
}
std::deque<float> resDeque(buffer.size());
std::transform(buffer.begin(), buffer.end(), resDeque.begin(),
[](const std::string &val) { return std::stof(val); });
return resDeque;
} // PrepareOutput
TEST(tensorrt_tester_ernie_xnli, oss_varlen_truth_data_int8) {
auto resDeque = PrepareOutput(FLAGS_truth_data);
auto predictor = InitPredictor();
ASSERT_FALSE(FLAGS_datadir.empty());
std::ifstream fin(FLAGS_datadir);
std::string line;
int lineno = 0;
int max_seq_len = 128;
const int run_batch = 1;
int correct_num = 0;
while (std::getline(fin, line)) {
std::vector<std::string> fields;
paddle::test::Split(line, ';', &fields);
auto src_ids = ParseTensor<int32_t>(fields[0]);
auto sent_ids = ParseTensor<int32_t>(fields[1]);
auto pos_ids = ParseTensor<int64_t>(fields[2]);
int run_seq_len = src_ids.size();
int32_t i3[2] = {0, run_seq_len};
int32_t i4[max_seq_len] = {0};
auto input_names = predictor->GetInputNames();
// first input
auto input_t1 = predictor->GetInputHandle(input_names[0]);
input_t1->Reshape({run_seq_len});
input_t1->CopyFromCpu(src_ids.data());
// second input
auto input_t2 = predictor->GetInputHandle(input_names[1]);
input_t2->Reshape({run_seq_len});
input_t2->CopyFromCpu(sent_ids.data());
// third input
auto input_t3 = predictor->GetInputHandle(input_names[2]);
input_t3->Reshape({run_batch + 1});
input_t3->CopyFromCpu(i3);
// fourth input
auto input_t4 = predictor->GetInputHandle(input_names[3]);
input_t4->Reshape({1, max_seq_len, 1});
input_t4->CopyFromCpu(i4);
std::vector<float> out_data;
run(predictor.get(), &out_data);
lineno++;
int maxPosition =
max_element(out_data.begin(), out_data.end()) - out_data.begin();
if (maxPosition == resDeque[0]) {
correct_num += 1;
}
resDeque.pop_front();
VLOG(2) << "predict result: " << maxPosition;
for (auto r : out_data) {
VLOG(2) << r;
}
}
ASSERT_GT(correct_num,
4741); // total input 5010, int8 res should greater than 4741
LOG(INFO) << "=== finish oss test ===";
}
} // namespace paddle_infer
int main(int argc, char **argv) {
::testing::InitGoogleTest(&argc, argv);
::google::ParseCommandLineFlags(&argc, &argv, true);
#if IS_TRT_VERSION_GE(7200)
return RUN_ALL_TESTS();
#endif
return 0;
}
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <sstream>
#include <string>
#include <vector>
namespace paddle {
namespace test {
// split string to vector<string> by sep
static void split(const std::string &str, char sep,
std::vector<std::string> *pieces, bool ignore_null = true) {
pieces->clear();
if (str.empty()) {
if (!ignore_null) {
pieces->push_back(str);
}
return;
}
size_t pos = 0;
size_t next = str.find(sep, pos);
while (next != std::string::npos) {
pieces->push_back(str.substr(pos, next - pos));
pos = next + 1;
next = str.find(sep, pos);
}
if (!str.substr(pos).empty()) {
pieces->push_back(str.substr(pos));
}
}
template <typename T>
void GetValueFromStream(std::stringstream *ss, T *t) {
(*ss) >> (*t);
}
template <>
void GetValueFromStream<std::string>(std::stringstream *ss, std::string *t) {
*t = ss->str();
}
// Split string to multiple vector
template <typename T>
void Split(const std::string &line, char sep, std::vector<T> *v) {
std::stringstream ss;
T t;
for (auto c : line) {
if (c != sep) {
ss << c;
} else {
GetValueFromStream<T>(&ss, &t);
v->push_back(std::move(t));
ss.str({});
ss.clear();
}
}
if (!ss.str().empty()) {
GetValueFromStream<T>(&ss, &t);
v->push_back(std::move(t));
ss.str({});
ss.clear();
}
}
} // namespace test
} // namespace paddle
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "test_helper.h" // NOLINT
#include "test_suite.h" // NOLINT
DEFINE_string(modeldir, "", "Directory of the inference model.");
namespace paddle_infer {
paddle::test::Record PrepareInput(int batch_size, int shape_size = 224) {
// init input data
int channel = 3;
int width = shape_size; // w = 224
int height = shape_size; // h = 224
paddle::test::Record image_Record;
int input_num = batch_size * channel * width * height;
std::vector<float> input_data(input_num, 1);
image_Record.data = input_data;
image_Record.shape = std::vector<int>{batch_size, channel, width, height};
image_Record.type = paddle::PaddleDType::FLOAT32;
return image_Record;
}
TEST(tensorrt_tester_mobilenetv1, tuned_dynamic_trt_fp32_bz2) {
bool tuned_shape = true;
std::string shape_range_info = FLAGS_modeldir + "/shape_range_info.pbtxt";
LOG(INFO) << "tensorrt tuned info saved to " << shape_range_info;
// init input data
std::map<std::string, paddle::test::Record> my_input_data_map;
my_input_data_map["x"] = PrepareInput(2, 448);
// init output data
std::map<std::string, paddle::test::Record> infer_output_data,
truth_output_data;
if (tuned_shape) {
// NOTE: shape_range_info will be saved after destructor of predictor
// function
// prepare groudtruth config
paddle_infer::Config tune_config;
tune_config.SetModel(FLAGS_modeldir + "/inference.pdmodel",
FLAGS_modeldir + "/inference.pdiparams");
tune_config.SwitchIrOptim(false);
tune_config.EnableUseGpu(1000, 0);
tune_config.CollectShapeRangeInfo(shape_range_info);
auto predictor_tune = paddle_infer::CreatePredictor(tune_config);
SingleThreadPrediction(predictor_tune.get(), &my_input_data_map,
&truth_output_data, 1);
}
// prepare inference config
paddle_infer::Config config;
config.SetModel(FLAGS_modeldir + "/inference.pdmodel",
FLAGS_modeldir + "/inference.pdiparams");
config.EnableUseGpu(1000, 0);
config.EnableTensorRtEngine(
1 << 20, 2, 5, paddle_infer::PrecisionType::kFloat32, false, false);
config.EnableTunedTensorRtDynamicShape(shape_range_info, true);
LOG(INFO) << config.Summary();
paddle_infer::services::PredictorPool pred_pool(config, 1);
SingleThreadPrediction(pred_pool.Retrive(0), &my_input_data_map,
&infer_output_data);
// check outputs
CompareRecord(&truth_output_data, &infer_output_data);
VLOG(1) << "finish test";
}
} // namespace paddle_infer
int main(int argc, char** argv) {
::testing::InitGoogleTest(&argc, argv);
::google::ParseCommandLineFlags(&argc, &argv, true);
return RUN_ALL_TESTS();
}
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#pragma once #pragma once
#include <math.h> #include <math.h>
#include <algorithm> #include <algorithm>
#include <deque>
#include <fstream> #include <fstream>
#include <future> #include <future>
#include <iostream> #include <iostream>
...@@ -31,6 +32,18 @@ ...@@ -31,6 +32,18 @@
namespace paddle { namespace paddle {
namespace test { namespace test {
#define IS_TRT_VERSION_GE(version) \
((NV_TENSORRT_MAJOR * 1000 + NV_TENSORRT_MINOR * 100 + \
NV_TENSORRT_PATCH * 10 + NV_TENSORRT_BUILD) >= version)
#define IS_TRT_VERSION_LT(version) \
((NV_TENSORRT_MAJOR * 1000 + NV_TENSORRT_MINOR * 100 + \
NV_TENSORRT_PATCH * 10 + NV_TENSORRT_BUILD) < version)
#define TRT_VERSION \
NV_TENSORRT_MAJOR * 1000 + NV_TENSORRT_MINOR * 100 + \
NV_TENSORRT_PATCH * 10 + NV_TENSORRT_BUILD
class Record { class Record {
public: public:
std::vector<float> data; std::vector<float> data;
...@@ -96,7 +109,7 @@ void SingleThreadPrediction(paddle_infer::Predictor *predictor, ...@@ -96,7 +109,7 @@ void SingleThreadPrediction(paddle_infer::Predictor *predictor,
switch (output_tensor->type()) { switch (output_tensor->type()) {
case paddle::PaddleDType::INT64: { case paddle::PaddleDType::INT64: {
std::cout << "int64" << std::endl; VLOG(1) << "output_tensor dtype: int64";
std::vector<int64_t> out_data; std::vector<int64_t> out_data;
output_Record.type = paddle::PaddleDType::INT64; output_Record.type = paddle::PaddleDType::INT64;
out_data.resize(out_num); out_data.resize(out_num);
...@@ -108,7 +121,7 @@ void SingleThreadPrediction(paddle_infer::Predictor *predictor, ...@@ -108,7 +121,7 @@ void SingleThreadPrediction(paddle_infer::Predictor *predictor,
break; break;
} }
case paddle::PaddleDType::FLOAT32: { case paddle::PaddleDType::FLOAT32: {
std::cout << "float32" << std::endl; VLOG(1) << "output_tensor dtype: float32";
std::vector<float> out_data; std::vector<float> out_data;
output_Record.type = paddle::PaddleDType::FLOAT32; output_Record.type = paddle::PaddleDType::FLOAT32;
out_data.resize(out_num); out_data.resize(out_num);
...@@ -119,7 +132,7 @@ void SingleThreadPrediction(paddle_infer::Predictor *predictor, ...@@ -119,7 +132,7 @@ void SingleThreadPrediction(paddle_infer::Predictor *predictor,
break; break;
} }
case paddle::PaddleDType::INT32: { case paddle::PaddleDType::INT32: {
std::cout << "int32" << std::endl; VLOG(1) << "output_tensor dtype: int32";
std::vector<int32_t> out_data; std::vector<int32_t> out_data;
output_Record.type = paddle::PaddleDType::INT32; output_Record.type = paddle::PaddleDType::INT32;
out_data.resize(out_num); out_data.resize(out_num);
...@@ -139,10 +152,12 @@ void CompareRecord(std::map<std::string, Record> *truth_output_data, ...@@ -139,10 +152,12 @@ void CompareRecord(std::map<std::string, Record> *truth_output_data,
float epislon = 1e-5) { float epislon = 1e-5) {
for (const auto & [ key, value ] : *infer_output_data) { for (const auto & [ key, value ] : *infer_output_data) {
auto truth_record = (*truth_output_data)[key]; auto truth_record = (*truth_output_data)[key];
LOG(INFO) << "output name: " << key; VLOG(1) << "output name: " << key;
size_t numel = value.data.size() / sizeof(float); size_t numel = value.data.size() / sizeof(float);
EXPECT_EQ(value.data.size(), truth_record.data.size()); EXPECT_EQ(value.data.size(), truth_record.data.size());
for (size_t i = 0; i < numel; ++i) { for (size_t i = 0; i < numel; ++i) {
VLOG(1) << "compare: " << value.data.data()[i] << ",\t"
<< truth_record.data.data()[i];
ASSERT_LT(fabs(value.data.data()[i] - truth_record.data.data()[i]), ASSERT_LT(fabs(value.data.data()[i] - truth_record.data.data()[i]),
epislon); epislon);
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册