diff --git a/paddle/fluid/inference/tests/infer_ut/CMakeLists.txt b/paddle/fluid/inference/tests/infer_ut/CMakeLists.txt index 50d56b3e59ae7f70ac889d1d9cee0ba3cbe801da..9d590509a1eb6bea24d47c6d0d40e8711395cfac 100644 --- a/paddle/fluid/inference/tests/infer_ut/CMakeLists.txt +++ b/paddle/fluid/inference/tests/infer_ut/CMakeLists.txt @@ -93,20 +93,42 @@ if (USE_TENSORRT AND WITH_GPU) file(READ ${TENSORRT_INCLUDE_DIR}/NvInfer.h TENSORRT_VERSION_FILE_CONTENTS) string(REGEX MATCH "define NV_TENSORRT_MAJOR +([0-9]+)" TENSORRT_MAJOR_VERSION "${TENSORRT_VERSION_FILE_CONTENTS}") + string(REGEX MATCH "define NV_TENSORRT_MINOR +([0-9]+)" TENSORRT_MINOR_VERSION + "${TENSORRT_VERSION_FILE_CONTENTS}") + string(REGEX MATCH "define NV_TENSORRT_PATCH +([0-9]+)" TENSORRT_PATCH_VERSION + "${TENSORRT_VERSION_FILE_CONTENTS}") + string(REGEX MATCH "define NV_TENSORRT_BUILD +([0-9]+)" TENSORRT_BUILD_VERSION + "${TENSORRT_VERSION_FILE_CONTENTS}") if("${TENSORRT_MAJOR_VERSION}" STREQUAL "") file(READ ${TENSORRT_INCLUDE_DIR}/NvInferVersion.h TENSORRT_VERSION_FILE_CONTENTS) string(REGEX MATCH "define NV_TENSORRT_MAJOR +([0-9]+)" TENSORRT_MAJOR_VERSION "${TENSORRT_VERSION_FILE_CONTENTS}") + string(REGEX MATCH "define NV_TENSORRT_MINOR +([0-9]+)" TENSORRT_MINOR_VERSION + "${TENSORRT_VERSION_FILE_CONTENTS}") + string(REGEX MATCH "define NV_TENSORRT_PATCH +([0-9]+)" TENSORRT_PATCH_VERSION + "${TENSORRT_VERSION_FILE_CONTENTS}") + string(REGEX MATCH "define NV_TENSORRT_BUILD +([0-9]+)" TENSORRT_BUILD_VERSION + "${TENSORRT_VERSION_FILE_CONTENTS}") endif() if("${TENSORRT_MAJOR_VERSION}" STREQUAL "") message(SEND_ERROR "Failed to detect TensorRT version.") endif() string(REGEX REPLACE "define NV_TENSORRT_MAJOR +([0-9]+)" "\\1" TENSORRT_MAJOR_VERSION "${TENSORRT_MAJOR_VERSION}") + string(REGEX REPLACE "define NV_TENSORRT_MINOR +([0-9]+)" "\\1" + TENSORRT_MINOR_VERSION "${TENSORRT_MINOR_VERSION}") + string(REGEX REPLACE "define NV_TENSORRT_PATCH +([0-9]+)" "\\1" + TENSORRT_PATCH_VERSION "${TENSORRT_PATCH_VERSION}") + string(REGEX REPLACE "define NV_TENSORRT_BUILD +([0-9]+)" "\\1" + TENSORRT_BUILD_VERSION "${TENSORRT_BUILD_VERSION}") message(STATUS "Current TensorRT header is ${TENSORRT_INCLUDE_DIR}/NvInfer.h. " - "Current TensorRT version is v${TENSORRT_MAJOR_VERSION}. ") + "Current TensorRT version is v${TENSORRT_MAJOR_VERSION}.${TENSORRT_MINOR_VERSION}.${TENSORRT_PATCH_VERSION}.${TENSORRT_BUILD_VERSION} ") include_directories("${TENSORRT_INCLUDE_DIR}") link_directories("${TENSORRT_LIB_DIR}") + add_compile_definitions(NV_TENSORRT_MAJOR=${TENSORRT_MAJOR_VERSION}) + add_compile_definitions(NV_TENSORRT_MINOR=${TENSORRT_MINOR_VERSION}) + add_compile_definitions(NV_TENSORRT_PATCH=${TENSORRT_PATCH_VERSION}) + add_compile_definitions(NV_TENSORRT_BUILD=${TENSORRT_BUILD_VERSION}) endif() if(WITH_MKL) diff --git a/paddle/fluid/inference/tests/infer_ut/run.sh b/paddle/fluid/inference/tests/infer_ut/run.sh index 1547071e75d4976f7725578b86d67f775dbdd5a7..dd4b64f28d739776ee750205d41b4dce35a97640 100755 --- a/paddle/fluid/inference/tests/infer_ut/run.sh +++ b/paddle/fluid/inference/tests/infer_ut/run.sh @@ -115,6 +115,20 @@ for model_name in $unknown_download_list; do download $url_prefix $model_name done +# ernie int8 quant with matmul +unknown_nlp_download_list='quant_post_model_xnli_predict_matmul' +for model_name in $unknown_nlp_download_list; do + url_prefix="https://paddle-qa.bj.bcebos.com/inference_model/unknown/nlp" + download $url_prefix $model_name +done + +# mobilnetv1 with prune op attribute +dev_class_download_list='MobileNetV1' +for model_name in $dev_class_download_list; do + url_prefix="https://paddle-qa.bj.bcebos.com/inference_model/2021-09-16/class" + download $url_prefix $model_name +done + function compile_test() { mkdir -p ${build_dir} cd ${build_dir} @@ -255,6 +269,31 @@ if [ $? -ne 0 ]; then EXIT_CODE=8 fi +printf "${YELLOW} start test_ernie_xnli_int8 ${NC} \n"; +compile_test "test_ernie_xnli_int8" +ernie_qat_model="quant_post_model_xnli_predict_matmul" +${exe_dir}/test_ernie_xnli_int8 \ + --modeldir=$DATA_DIR/$ernie_qat_model/$ernie_qat_model \ + --datadir=$DATA_DIR/$ernie_qat_model/$ernie_qat_model/xnli_var_len \ + --truth_data=$DATA_DIR/$ernie_qat_model/$ernie_qat_model/truth_data \ + --gtest_filter=${test_suite_list} \ + --gtest_output=xml:${log_dir}/test_ernie_xnli_int8.xml +if [ $? -ne 0 ]; then + echo "${RED} test_ernie_xnli_int8 runs failed ${NC}" >> ${exe_dir}/test_summary.txt + EXIT_CODE=8 +fi + +printf "${YELLOW} start test_mobilnetv1 ${NC} \n"; +compile_test "test_mobilnetv1" +${exe_dir}/test_mobilnetv1 \ + --modeldir=$DATA_DIR/MobileNetV1/MobileNetV1 \ + --gtest_filter=${test_suite_list} \ + --gtest_output=xml:${log_dir}/test_mobilnetv1.xml +if [ $? -ne 0 ]; then + echo "${RED} test_mobilnetv1 runs failed ${NC}" >> ${exe_dir}/test_summary.txt + EXIT_CODE=8 +fi + set +x test_suites=$(echo ${test_suite_list} | sed 's/:/ /g') diff --git a/paddle/fluid/inference/tests/infer_ut/test_ernie_xnli_int8.cc b/paddle/fluid/inference/tests/infer_ut/test_ernie_xnli_int8.cc new file mode 100644 index 0000000000000000000000000000000000000000..1e57103d74bfdc25d59dde62742fbb178fd0a309 --- /dev/null +++ b/paddle/fluid/inference/tests/infer_ut/test_ernie_xnli_int8.cc @@ -0,0 +1,194 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "test_helper.h" // NOLINT +#include "test_suite.h" // NOLINT + +DEFINE_string(modeldir, "", "Directory of the inference model."); +DEFINE_string(datadir, "", "dataset."); +DEFINE_string(truth_data, "", "Directory of the inference data truth result"); + +namespace paddle_infer { + +std::shared_ptr InitPredictor() { + Config config; + config.SetModel(FLAGS_modeldir + "/__model__", + FLAGS_modeldir + "/__params__"); + config.EnableUseGpu(1000, 0); + // Open the memory optim. + config.EnableMemoryOptim(); + + int max_batch = 32; + int max_single_seq_len = 128; + int opt_single_seq_len = 64; + int min_batch_seq_len = 1; + int max_batch_seq_len = 512; + int opt_batch_seq_len = 256; + + std::string input_name0 = "eval_placeholder_0"; + std::string input_name1 = "eval_placeholder_1"; + std::string input_name2 = "eval_placeholder_2"; + std::string input_name3 = "eval_placeholder_3"; + + std::vector min_shape = {min_batch_seq_len}; + std::vector max_shape = {max_batch_seq_len}; + std::vector opt_shape = {opt_batch_seq_len}; + // Set the input's min, max, opt shape + std::map> min_input_shape = { + {input_name0, min_shape}, + {input_name1, min_shape}, + {input_name2, {1}}, + {input_name3, {1, min_batch_seq_len, 1}}}; + std::map> max_input_shape = { + {input_name0, max_shape}, + {input_name1, max_shape}, + {input_name2, {max_batch + 1}}, + {input_name3, {1, max_single_seq_len, 1}}}; + std::map> opt_input_shape = { + {input_name0, opt_shape}, + {input_name1, opt_shape}, + {input_name2, {max_batch + 1}}, + {input_name3, {1, opt_single_seq_len, 1}}}; + + // only kHalf supported + config.EnableTensorRtEngine(1 << 30, 1, 5, Config::Precision::kInt8, false, + false); + // erinie varlen must be used with dynamic shape + config.SetTRTDynamicShapeInfo(min_input_shape, max_input_shape, + opt_input_shape); + // erinie varlen must be used with oss + config.EnableTensorRtOSS(); + + return CreatePredictor(config); +} + +// Parse tensor from string +template +std::vector ParseTensor(const std::string &field) { + std::string mat_str = field; + + std::vector mat; + paddle::test::Split(mat_str, ' ', &mat); + + return mat; +} + +void run(Predictor *predictor, std::vector *out_data) { + clock_t start, end; + start = clock(); + CHECK(predictor->Run()); + end = clock(); + + auto output_names = predictor->GetOutputNames(); + auto output_t = predictor->GetOutputHandle(output_names[0]); + std::vector output_shape = output_t->shape(); + int out_num = std::accumulate(output_shape.begin(), output_shape.end(), 1, + std::multiplies()); + out_data->resize(out_num); + output_t->CopyToCpu(out_data->data()); + return; +} + +auto PrepareOutput(std::string input_file) -> std::deque { + std::ifstream fin(input_file); + std::string line; + std::vector buffer; + while (std::getline(fin, line)) { + buffer.emplace_back(line); + } + std::deque resDeque(buffer.size()); + std::transform(buffer.begin(), buffer.end(), resDeque.begin(), + [](const std::string &val) { return std::stof(val); }); + + return resDeque; +} // PrepareOutput + +TEST(tensorrt_tester_ernie_xnli, oss_varlen_truth_data_int8) { + auto resDeque = PrepareOutput(FLAGS_truth_data); + auto predictor = InitPredictor(); + + ASSERT_FALSE(FLAGS_datadir.empty()); + std::ifstream fin(FLAGS_datadir); + std::string line; + + int lineno = 0; + int max_seq_len = 128; + const int run_batch = 1; + int correct_num = 0; + while (std::getline(fin, line)) { + std::vector fields; + paddle::test::Split(line, ';', &fields); + + auto src_ids = ParseTensor(fields[0]); + auto sent_ids = ParseTensor(fields[1]); + auto pos_ids = ParseTensor(fields[2]); + + int run_seq_len = src_ids.size(); + int32_t i3[2] = {0, run_seq_len}; + int32_t i4[max_seq_len] = {0}; + + auto input_names = predictor->GetInputNames(); + + // first input + auto input_t1 = predictor->GetInputHandle(input_names[0]); + input_t1->Reshape({run_seq_len}); + input_t1->CopyFromCpu(src_ids.data()); + + // second input + auto input_t2 = predictor->GetInputHandle(input_names[1]); + input_t2->Reshape({run_seq_len}); + input_t2->CopyFromCpu(sent_ids.data()); + + // third input + auto input_t3 = predictor->GetInputHandle(input_names[2]); + input_t3->Reshape({run_batch + 1}); + input_t3->CopyFromCpu(i3); + + // fourth input + auto input_t4 = predictor->GetInputHandle(input_names[3]); + input_t4->Reshape({1, max_seq_len, 1}); + input_t4->CopyFromCpu(i4); + + std::vector out_data; + run(predictor.get(), &out_data); + + lineno++; + int maxPosition = + max_element(out_data.begin(), out_data.end()) - out_data.begin(); + + if (maxPosition == resDeque[0]) { + correct_num += 1; + } + resDeque.pop_front(); + + VLOG(2) << "predict result: " << maxPosition; + for (auto r : out_data) { + VLOG(2) << r; + } + } + ASSERT_GT(correct_num, + 4741); // total input 5010, int8 res should greater than 4741 + LOG(INFO) << "=== finish oss test ==="; +} + +} // namespace paddle_infer + +int main(int argc, char **argv) { + ::testing::InitGoogleTest(&argc, argv); + ::google::ParseCommandLineFlags(&argc, &argv, true); +#if IS_TRT_VERSION_GE(7200) + return RUN_ALL_TESTS(); +#endif + return 0; +} diff --git a/paddle/fluid/inference/tests/infer_ut/test_helper.h b/paddle/fluid/inference/tests/infer_ut/test_helper.h new file mode 100644 index 0000000000000000000000000000000000000000..4732f95543bc4b0380ea28a3505107c61f010c48 --- /dev/null +++ b/paddle/fluid/inference/tests/infer_ut/test_helper.h @@ -0,0 +1,79 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once +#include +#include +#include + +namespace paddle { +namespace test { + +// split string to vector by sep +static void split(const std::string &str, char sep, + std::vector *pieces, bool ignore_null = true) { + pieces->clear(); + if (str.empty()) { + if (!ignore_null) { + pieces->push_back(str); + } + return; + } + size_t pos = 0; + size_t next = str.find(sep, pos); + while (next != std::string::npos) { + pieces->push_back(str.substr(pos, next - pos)); + pos = next + 1; + next = str.find(sep, pos); + } + if (!str.substr(pos).empty()) { + pieces->push_back(str.substr(pos)); + } +} + +template +void GetValueFromStream(std::stringstream *ss, T *t) { + (*ss) >> (*t); +} + +template <> +void GetValueFromStream(std::stringstream *ss, std::string *t) { + *t = ss->str(); +} + +// Split string to multiple vector +template +void Split(const std::string &line, char sep, std::vector *v) { + std::stringstream ss; + T t; + for (auto c : line) { + if (c != sep) { + ss << c; + } else { + GetValueFromStream(&ss, &t); + v->push_back(std::move(t)); + ss.str({}); + ss.clear(); + } + } + + if (!ss.str().empty()) { + GetValueFromStream(&ss, &t); + v->push_back(std::move(t)); + ss.str({}); + ss.clear(); + } +} + +} // namespace test +} // namespace paddle diff --git a/paddle/fluid/inference/tests/infer_ut/test_mobilnetv1.cc b/paddle/fluid/inference/tests/infer_ut/test_mobilnetv1.cc new file mode 100644 index 0000000000000000000000000000000000000000..21991d0da06a17efb97a2b18b4d6b0ed7a53b886 --- /dev/null +++ b/paddle/fluid/inference/tests/infer_ut/test_mobilnetv1.cc @@ -0,0 +1,86 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "test_helper.h" // NOLINT +#include "test_suite.h" // NOLINT + +DEFINE_string(modeldir, "", "Directory of the inference model."); + +namespace paddle_infer { + +paddle::test::Record PrepareInput(int batch_size, int shape_size = 224) { + // init input data + int channel = 3; + int width = shape_size; // w = 224 + int height = shape_size; // h = 224 + paddle::test::Record image_Record; + int input_num = batch_size * channel * width * height; + std::vector input_data(input_num, 1); + image_Record.data = input_data; + image_Record.shape = std::vector{batch_size, channel, width, height}; + image_Record.type = paddle::PaddleDType::FLOAT32; + return image_Record; +} + +TEST(tensorrt_tester_mobilenetv1, tuned_dynamic_trt_fp32_bz2) { + bool tuned_shape = true; + std::string shape_range_info = FLAGS_modeldir + "/shape_range_info.pbtxt"; + LOG(INFO) << "tensorrt tuned info saved to " << shape_range_info; + + // init input data + std::map my_input_data_map; + my_input_data_map["x"] = PrepareInput(2, 448); + // init output data + std::map infer_output_data, + truth_output_data; + if (tuned_shape) { + // NOTE: shape_range_info will be saved after destructor of predictor + // function + // prepare groudtruth config + paddle_infer::Config tune_config; + tune_config.SetModel(FLAGS_modeldir + "/inference.pdmodel", + FLAGS_modeldir + "/inference.pdiparams"); + tune_config.SwitchIrOptim(false); + tune_config.EnableUseGpu(1000, 0); + tune_config.CollectShapeRangeInfo(shape_range_info); + + auto predictor_tune = paddle_infer::CreatePredictor(tune_config); + SingleThreadPrediction(predictor_tune.get(), &my_input_data_map, + &truth_output_data, 1); + } + + // prepare inference config + paddle_infer::Config config; + config.SetModel(FLAGS_modeldir + "/inference.pdmodel", + FLAGS_modeldir + "/inference.pdiparams"); + config.EnableUseGpu(1000, 0); + config.EnableTensorRtEngine( + 1 << 20, 2, 5, paddle_infer::PrecisionType::kFloat32, false, false); + config.EnableTunedTensorRtDynamicShape(shape_range_info, true); + LOG(INFO) << config.Summary(); + paddle_infer::services::PredictorPool pred_pool(config, 1); + SingleThreadPrediction(pred_pool.Retrive(0), &my_input_data_map, + &infer_output_data); + // check outputs + CompareRecord(&truth_output_data, &infer_output_data); + VLOG(1) << "finish test"; +} + +} // namespace paddle_infer + +int main(int argc, char** argv) { + ::testing::InitGoogleTest(&argc, argv); + ::google::ParseCommandLineFlags(&argc, &argv, true); + return RUN_ALL_TESTS(); +} diff --git a/paddle/fluid/inference/tests/infer_ut/test_suite.h b/paddle/fluid/inference/tests/infer_ut/test_suite.h index 0b580cd7c7e8624f49ab9f869af2b66328e33d59..a5c8c52402180adafd25255cf754e1af3a5a5498 100644 --- a/paddle/fluid/inference/tests/infer_ut/test_suite.h +++ b/paddle/fluid/inference/tests/infer_ut/test_suite.h @@ -14,6 +14,7 @@ #pragma once #include #include +#include #include #include #include @@ -31,6 +32,18 @@ namespace paddle { namespace test { +#define IS_TRT_VERSION_GE(version) \ + ((NV_TENSORRT_MAJOR * 1000 + NV_TENSORRT_MINOR * 100 + \ + NV_TENSORRT_PATCH * 10 + NV_TENSORRT_BUILD) >= version) + +#define IS_TRT_VERSION_LT(version) \ + ((NV_TENSORRT_MAJOR * 1000 + NV_TENSORRT_MINOR * 100 + \ + NV_TENSORRT_PATCH * 10 + NV_TENSORRT_BUILD) < version) + +#define TRT_VERSION \ + NV_TENSORRT_MAJOR * 1000 + NV_TENSORRT_MINOR * 100 + \ + NV_TENSORRT_PATCH * 10 + NV_TENSORRT_BUILD + class Record { public: std::vector data; @@ -96,7 +109,7 @@ void SingleThreadPrediction(paddle_infer::Predictor *predictor, switch (output_tensor->type()) { case paddle::PaddleDType::INT64: { - std::cout << "int64" << std::endl; + VLOG(1) << "output_tensor dtype: int64"; std::vector out_data; output_Record.type = paddle::PaddleDType::INT64; out_data.resize(out_num); @@ -108,7 +121,7 @@ void SingleThreadPrediction(paddle_infer::Predictor *predictor, break; } case paddle::PaddleDType::FLOAT32: { - std::cout << "float32" << std::endl; + VLOG(1) << "output_tensor dtype: float32"; std::vector out_data; output_Record.type = paddle::PaddleDType::FLOAT32; out_data.resize(out_num); @@ -119,7 +132,7 @@ void SingleThreadPrediction(paddle_infer::Predictor *predictor, break; } case paddle::PaddleDType::INT32: { - std::cout << "int32" << std::endl; + VLOG(1) << "output_tensor dtype: int32"; std::vector out_data; output_Record.type = paddle::PaddleDType::INT32; out_data.resize(out_num); @@ -139,10 +152,12 @@ void CompareRecord(std::map *truth_output_data, float epislon = 1e-5) { for (const auto & [ key, value ] : *infer_output_data) { auto truth_record = (*truth_output_data)[key]; - LOG(INFO) << "output name: " << key; + VLOG(1) << "output name: " << key; size_t numel = value.data.size() / sizeof(float); EXPECT_EQ(value.data.size(), truth_record.data.size()); for (size_t i = 0; i < numel; ++i) { + VLOG(1) << "compare: " << value.data.data()[i] << ",\t" + << truth_record.data.data()[i]; ASSERT_LT(fabs(value.data.data()[i] - truth_record.data.data()[i]), epislon); }