未验证 提交 9e6b1c5f 编写于 作者: Y Yiqun Liu 提交者: GitHub

Refine tester of TensorRT engine (#14390)

* Refine the tester for MixedRTPredictor.
test=develop

* Enable the profiler in TensorRT engine.

* Support the use of combined inference model in TensorRT unittest, and print the shape of feed targets.
上级 d3e63e6e
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
#include "paddle/fluid/inference/api/analysis_predictor.h" #include "paddle/fluid/inference/api/analysis_predictor.h"
#include <glog/logging.h> #include <glog/logging.h>
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include <thread> #include <thread> // NOLINT
#include "paddle/fluid/inference/api/helper.h" #include "paddle/fluid/inference/api/helper.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h" #include "paddle/fluid/inference/api/paddle_inference_api.h"
......
...@@ -23,7 +23,7 @@ limitations under the License. */ ...@@ -23,7 +23,7 @@ limitations under the License. */
#include <memory> #include <memory>
#include <thread> //NOLINT #include <thread> //NOLINT
#include "utils.h" #include "utils.h" // NOLINT
DEFINE_string(dirname, "", "Directory of the inference model."); DEFINE_string(dirname, "", "Directory of the inference model.");
DEFINE_bool(use_gpu, false, "Whether use gpu."); DEFINE_bool(use_gpu, false, "Whether use gpu.");
......
...@@ -4,7 +4,7 @@ Licensed under the Apache License, Version 2.0 (the "License"); ...@@ -4,7 +4,7 @@ Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License. you may not use this file except in compliance with the License.
You may obtain a copy of the License at You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0 http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS, distributed under the License is distributed on an "AS IS" BASIS,
......
...@@ -49,6 +49,8 @@ struct AnalysisConfig : public NativeConfig { ...@@ -49,6 +49,8 @@ struct AnalysisConfig : public NativeConfig {
void EnableTensorRtEngine(int workspace_size = 1 << 20, void EnableTensorRtEngine(int workspace_size = 1 << 20,
int max_batch_size = 1); int max_batch_size = 1);
bool use_tensorrt() const { return use_tensorrt_; }
// NOTE this is just for internal development, please not use it. // NOTE this is just for internal development, please not use it.
// NOT stable yet. // NOT stable yet.
void EnableMKLDNN(); void EnableMKLDNN();
......
...@@ -91,7 +91,7 @@ class CpuPassStrategy : public PassStrategy { ...@@ -91,7 +91,7 @@ class CpuPassStrategy : public PassStrategy {
virtual ~CpuPassStrategy() = default; virtual ~CpuPassStrategy() = default;
virtual void EnableMKLDNN() override { void EnableMKLDNN() override {
// TODO(Superjomn) Consider the way to mix CPU with GPU. // TODO(Superjomn) Consider the way to mix CPU with GPU.
#ifdef PADDLE_WITH_MKLDNN #ifdef PADDLE_WITH_MKLDNN
passes_.insert(passes_.begin(), "mkldnn_placement_pass"); passes_.insert(passes_.begin(), "mkldnn_placement_pass");
...@@ -123,7 +123,7 @@ class GpuPassStrategy : public PassStrategy { ...@@ -123,7 +123,7 @@ class GpuPassStrategy : public PassStrategy {
GpuPassStrategy(const GpuPassStrategy &other) GpuPassStrategy(const GpuPassStrategy &other)
: PassStrategy(other.AllPasses()) {} : PassStrategy(other.AllPasses()) {}
virtual void EnableMKLDNN() override; void EnableMKLDNN() override;
virtual ~GpuPassStrategy() = default; virtual ~GpuPassStrategy() = default;
}; };
......
...@@ -108,8 +108,7 @@ if(WITH_GPU AND TENSORRT_FOUND) ...@@ -108,8 +108,7 @@ if(WITH_GPU AND TENSORRT_FOUND)
if (NOT EXISTS ${TRT_MODEL_INSTALL_DIR}) if (NOT EXISTS ${TRT_MODEL_INSTALL_DIR})
inference_download_and_uncompress(${TRT_MODEL_INSTALL_DIR} ${INFERENCE_URL}/tensorrt_test "trt_test_models.tar.gz") inference_download_and_uncompress(${TRT_MODEL_INSTALL_DIR} ${INFERENCE_URL}/tensorrt_test "trt_test_models.tar.gz")
endif() endif()
inference_analysis_test(test_trt_models SRCS trt_models_tester.cc inference_analysis_test(test_trt_models SRCS trt_models_tester.cc
EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} analysis ${analysis_deps} ir_pass_manager analysis_predictor EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} analysis ${analysis_deps} ir_pass_manager analysis_predictor
ARGS --dirname=${TRT_MODEL_INSTALL_DIR}/trt_test_models SERIAL) ARGS --infer_model=${TRT_MODEL_INSTALL_DIR}/trt_test_models SERIAL)
endif() endif()
...@@ -178,7 +178,8 @@ TEST(Analyzer_dam, profile) { ...@@ -178,7 +178,8 @@ TEST(Analyzer_dam, profile) {
std::vector<PaddleTensor> outputs; std::vector<PaddleTensor> outputs;
std::vector<std::vector<PaddleTensor>> input_slots_all; std::vector<std::vector<PaddleTensor>> input_slots_all;
SetInput(&input_slots_all); SetInput(&input_slots_all);
TestPrediction(cfg, input_slots_all, &outputs, FLAGS_num_threads); TestPrediction(reinterpret_cast<const PaddlePredictor::Config *>(&cfg),
input_slots_all, &outputs, FLAGS_num_threads);
if (FLAGS_num_threads == 1 && !FLAGS_test_all_data) { if (FLAGS_num_threads == 1 && !FLAGS_test_all_data) {
PADDLE_ENFORCE_GT(outputs.size(), 0); PADDLE_ENFORCE_GT(outputs.size(), 0);
...@@ -216,7 +217,9 @@ TEST(Analyzer_dam, compare) { ...@@ -216,7 +217,9 @@ TEST(Analyzer_dam, compare) {
SetInput(&input_slots_all); SetInput(&input_slots_all);
if (FLAGS_use_analysis) { if (FLAGS_use_analysis) {
CompareNativeAndAnalysis(cfg, input_slots_all); CompareNativeAndAnalysis(
reinterpret_cast<const PaddlePredictor::Config *>(&cfg),
input_slots_all);
} }
} }
......
...@@ -133,7 +133,8 @@ TEST(Analyzer_LAC, profile) { ...@@ -133,7 +133,8 @@ TEST(Analyzer_LAC, profile) {
std::vector<std::vector<PaddleTensor>> input_slots_all; std::vector<std::vector<PaddleTensor>> input_slots_all;
SetInput(&input_slots_all); SetInput(&input_slots_all);
TestPrediction(cfg, input_slots_all, &outputs, FLAGS_num_threads); TestPrediction(reinterpret_cast<const PaddlePredictor::Config *>(&cfg),
input_slots_all, &outputs, FLAGS_num_threads);
if (FLAGS_num_threads == 1 && !FLAGS_test_all_data) { if (FLAGS_num_threads == 1 && !FLAGS_test_all_data) {
// the first inference result // the first inference result
...@@ -175,7 +176,8 @@ TEST(Analyzer_LAC, compare) { ...@@ -175,7 +176,8 @@ TEST(Analyzer_LAC, compare) {
std::vector<std::vector<PaddleTensor>> input_slots_all; std::vector<std::vector<PaddleTensor>> input_slots_all;
SetInput(&input_slots_all); SetInput(&input_slots_all);
CompareNativeAndAnalysis(cfg, input_slots_all); CompareNativeAndAnalysis(
reinterpret_cast<const PaddlePredictor::Config *>(&cfg), input_slots_all);
} }
} // namespace analysis } // namespace analysis
......
...@@ -121,7 +121,8 @@ TEST(Analyzer_Chinese_ner, profile) { ...@@ -121,7 +121,8 @@ TEST(Analyzer_Chinese_ner, profile) {
std::vector<std::vector<PaddleTensor>> input_slots_all; std::vector<std::vector<PaddleTensor>> input_slots_all;
SetInput(&input_slots_all); SetInput(&input_slots_all);
TestPrediction(cfg, input_slots_all, &outputs, FLAGS_num_threads); TestPrediction(reinterpret_cast<const PaddlePredictor::Config *>(&cfg),
input_slots_all, &outputs, FLAGS_num_threads);
if (FLAGS_num_threads == 1 && !FLAGS_test_all_data) { if (FLAGS_num_threads == 1 && !FLAGS_test_all_data) {
// the first inference result // the first inference result
...@@ -160,7 +161,8 @@ TEST(Analyzer_Chinese_ner, compare) { ...@@ -160,7 +161,8 @@ TEST(Analyzer_Chinese_ner, compare) {
std::vector<std::vector<PaddleTensor>> input_slots_all; std::vector<std::vector<PaddleTensor>> input_slots_all;
SetInput(&input_slots_all); SetInput(&input_slots_all);
CompareNativeAndAnalysis(cfg, input_slots_all); CompareNativeAndAnalysis(
reinterpret_cast<const PaddlePredictor::Config *>(&cfg), input_slots_all);
} }
} // namespace inference } // namespace inference
......
...@@ -45,7 +45,8 @@ void profile(bool use_mkldnn = false) { ...@@ -45,7 +45,8 @@ void profile(bool use_mkldnn = false) {
std::vector<std::vector<PaddleTensor>> input_slots_all; std::vector<std::vector<PaddleTensor>> input_slots_all;
SetInput(&input_slots_all); SetInput(&input_slots_all);
TestPrediction(cfg, input_slots_all, &outputs, FLAGS_num_threads); TestPrediction(reinterpret_cast<const PaddlePredictor::Config *>(&cfg),
input_slots_all, &outputs, FLAGS_num_threads);
} }
TEST(Analyzer_resnet50, profile) { profile(); } TEST(Analyzer_resnet50, profile) { profile(); }
...@@ -74,7 +75,8 @@ void compare(bool use_mkldnn = false) { ...@@ -74,7 +75,8 @@ void compare(bool use_mkldnn = false) {
std::vector<std::vector<PaddleTensor>> input_slots_all; std::vector<std::vector<PaddleTensor>> input_slots_all;
SetInput(&input_slots_all); SetInput(&input_slots_all);
CompareNativeAndAnalysis(cfg, input_slots_all); CompareNativeAndAnalysis(
reinterpret_cast<const PaddlePredictor::Config *>(&cfg), input_slots_all);
} }
TEST(Analyzer_resnet50, compare) { compare(); } TEST(Analyzer_resnet50, compare) { compare(); }
......
...@@ -233,8 +233,8 @@ TEST(Analyzer_rnn1, profile) { ...@@ -233,8 +233,8 @@ TEST(Analyzer_rnn1, profile) {
std::vector<std::vector<PaddleTensor>> input_slots_all; std::vector<std::vector<PaddleTensor>> input_slots_all;
SetInput(&input_slots_all); SetInput(&input_slots_all);
LOG(INFO) << "to test prediction"; TestPrediction(reinterpret_cast<const PaddlePredictor::Config *>(&cfg),
TestPrediction(cfg, input_slots_all, &outputs, FLAGS_num_threads); input_slots_all, &outputs, FLAGS_num_threads);
} }
// Check the fuse status // Check the fuse status
...@@ -261,7 +261,8 @@ TEST(Analyzer_rnn1, compare) { ...@@ -261,7 +261,8 @@ TEST(Analyzer_rnn1, compare) {
std::vector<std::vector<PaddleTensor>> input_slots_all; std::vector<std::vector<PaddleTensor>> input_slots_all;
SetInput(&input_slots_all); SetInput(&input_slots_all);
CompareNativeAndAnalysis(cfg, input_slots_all); CompareNativeAndAnalysis(
reinterpret_cast<const PaddlePredictor::Config *>(&cfg), input_slots_all);
} }
// Test Multi-Thread. // Test Multi-Thread.
...@@ -272,7 +273,8 @@ TEST(Analyzer_rnn1, multi_thread) { ...@@ -272,7 +273,8 @@ TEST(Analyzer_rnn1, multi_thread) {
std::vector<std::vector<PaddleTensor>> input_slots_all; std::vector<std::vector<PaddleTensor>> input_slots_all;
SetInput(&input_slots_all); SetInput(&input_slots_all);
TestPrediction(cfg, input_slots_all, &outputs, 4 /* multi_thread */); TestPrediction(reinterpret_cast<const PaddlePredictor::Config *>(&cfg),
input_slots_all, &outputs, 4 /* multi_thread */);
} }
// Validate that the AnalysisPredictor + ZeroCopyTensor really works by testing // Validate that the AnalysisPredictor + ZeroCopyTensor really works by testing
......
...@@ -132,7 +132,8 @@ TEST(Analyzer_rnn2, profile) { ...@@ -132,7 +132,8 @@ TEST(Analyzer_rnn2, profile) {
std::vector<std::vector<PaddleTensor>> input_slots_all; std::vector<std::vector<PaddleTensor>> input_slots_all;
SetInput(&input_slots_all); SetInput(&input_slots_all);
TestPrediction(cfg, input_slots_all, &outputs, FLAGS_num_threads); TestPrediction(reinterpret_cast<const PaddlePredictor::Config *>(&cfg),
input_slots_all, &outputs, FLAGS_num_threads);
if (FLAGS_num_threads == 1 && !FLAGS_test_all_data) { if (FLAGS_num_threads == 1 && !FLAGS_test_all_data) {
// the first inference result // the first inference result
...@@ -153,7 +154,8 @@ TEST(Analyzer_rnn2, compare) { ...@@ -153,7 +154,8 @@ TEST(Analyzer_rnn2, compare) {
std::vector<std::vector<PaddleTensor>> input_slots_all; std::vector<std::vector<PaddleTensor>> input_slots_all;
SetInput(&input_slots_all); SetInput(&input_slots_all);
CompareNativeAndAnalysis(cfg, input_slots_all); CompareNativeAndAnalysis(
reinterpret_cast<const PaddlePredictor::Config *>(&cfg), input_slots_all);
} }
} // namespace inference } // namespace inference
......
...@@ -161,7 +161,8 @@ TEST(Analyzer_seq_conv1, profile) { ...@@ -161,7 +161,8 @@ TEST(Analyzer_seq_conv1, profile) {
std::vector<std::vector<PaddleTensor>> input_slots_all; std::vector<std::vector<PaddleTensor>> input_slots_all;
SetInput(&input_slots_all); SetInput(&input_slots_all);
TestPrediction(cfg, input_slots_all, &outputs, FLAGS_num_threads); TestPrediction(reinterpret_cast<const PaddlePredictor::Config *>(&cfg),
input_slots_all, &outputs, FLAGS_num_threads);
if (FLAGS_num_threads == 1 && !FLAGS_test_all_data) { if (FLAGS_num_threads == 1 && !FLAGS_test_all_data) {
// the first inference result // the first inference result
...@@ -199,7 +200,8 @@ TEST(Analyzer_seq_conv1, compare) { ...@@ -199,7 +200,8 @@ TEST(Analyzer_seq_conv1, compare) {
std::vector<std::vector<PaddleTensor>> input_slots_all; std::vector<std::vector<PaddleTensor>> input_slots_all;
SetInput(&input_slots_all); SetInput(&input_slots_all);
CompareNativeAndAnalysis(cfg, input_slots_all); CompareNativeAndAnalysis(
reinterpret_cast<const PaddlePredictor::Config *>(&cfg), input_slots_all);
} }
} // namespace inference } // namespace inference
......
...@@ -74,7 +74,8 @@ TEST(Analyzer_Text_Classification, profile) { ...@@ -74,7 +74,8 @@ TEST(Analyzer_Text_Classification, profile) {
std::vector<std::vector<PaddleTensor>> input_slots_all; std::vector<std::vector<PaddleTensor>> input_slots_all;
SetInput(&input_slots_all); SetInput(&input_slots_all);
TestPrediction(cfg, input_slots_all, &outputs, FLAGS_num_threads); TestPrediction(reinterpret_cast<const PaddlePredictor::Config *>(&cfg),
input_slots_all, &outputs, FLAGS_num_threads);
if (FLAGS_num_threads == 1) { if (FLAGS_num_threads == 1) {
// Get output // Get output
...@@ -101,7 +102,8 @@ TEST(Analyzer_Text_Classification, compare) { ...@@ -101,7 +102,8 @@ TEST(Analyzer_Text_Classification, compare) {
std::vector<std::vector<PaddleTensor>> input_slots_all; std::vector<std::vector<PaddleTensor>> input_slots_all;
SetInput(&input_slots_all); SetInput(&input_slots_all);
CompareNativeAndAnalysis(cfg, input_slots_all); CompareNativeAndAnalysis(
reinterpret_cast<const PaddlePredictor::Config *>(&cfg), input_slots_all);
} }
TEST(Analyzer_Text_Classification, compare_against_embedding_fc_lstm_fused) { TEST(Analyzer_Text_Classification, compare_against_embedding_fc_lstm_fused) {
...@@ -112,7 +114,8 @@ TEST(Analyzer_Text_Classification, compare_against_embedding_fc_lstm_fused) { ...@@ -112,7 +114,8 @@ TEST(Analyzer_Text_Classification, compare_against_embedding_fc_lstm_fused) {
std::vector<std::vector<PaddleTensor>> input_slots_all; std::vector<std::vector<PaddleTensor>> input_slots_all;
SetInput(&input_slots_all); SetInput(&input_slots_all);
CompareNativeAndAnalysis(cfg, input_slots_all); CompareNativeAndAnalysis(
reinterpret_cast<const PaddlePredictor::Config *>(&cfg), input_slots_all);
} }
} // namespace inference } // namespace inference
......
...@@ -94,7 +94,8 @@ void profile(bool use_mkldnn = false) { ...@@ -94,7 +94,8 @@ void profile(bool use_mkldnn = false) {
std::vector<std::vector<PaddleTensor>> input_slots_all; std::vector<std::vector<PaddleTensor>> input_slots_all;
SetInput(&input_slots_all); SetInput(&input_slots_all);
TestPrediction(cfg, input_slots_all, &outputs, FLAGS_num_threads); TestPrediction(reinterpret_cast<const PaddlePredictor::Config *>(&cfg),
input_slots_all, &outputs, FLAGS_num_threads);
if (FLAGS_num_threads == 1 && !FLAGS_test_all_data) { if (FLAGS_num_threads == 1 && !FLAGS_test_all_data) {
const float ocr_result_data[] = { const float ocr_result_data[] = {
...@@ -136,7 +137,8 @@ void compare(bool use_mkldnn = false) { ...@@ -136,7 +137,8 @@ void compare(bool use_mkldnn = false) {
std::vector<std::vector<PaddleTensor>> input_slots_all; std::vector<std::vector<PaddleTensor>> input_slots_all;
SetInput(&input_slots_all); SetInput(&input_slots_all);
CompareNativeAndAnalysis(cfg, input_slots_all); CompareNativeAndAnalysis(
reinterpret_cast<const PaddlePredictor::Config *>(&cfg), input_slots_all);
} }
TEST(Analyzer_vis, compare) { compare(); } TEST(Analyzer_vis, compare) { compare(); }
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <ostream>
#include <string>
#include "paddle/fluid/inference/api/paddle_inference_api.h"
namespace paddle {
namespace inference {
thread_local int num_spaces = 0;
static std::string GenSpaces(int num_spaces) {
std::ostringstream os;
for (int i = 0; i < num_spaces; ++i) {
os << " ";
}
return os.str();
}
std::ostream &operator<<(std::ostream &os,
const PaddlePredictor::Config &config) {
os << GenSpaces(num_spaces) << "PaddlePredictor::Config {\n";
num_spaces++;
os << GenSpaces(num_spaces) << "model_dir: " << config.model_dir << "\n";
num_spaces--;
os << GenSpaces(num_spaces) << "}\n";
return os;
}
std::ostream &operator<<(std::ostream &os, const NativeConfig &config) {
os << GenSpaces(num_spaces) << "NativeConfig {\n";
num_spaces++;
os << *reinterpret_cast<const PaddlePredictor::Config *>(&config);
os << GenSpaces(num_spaces) << "use_gpu: " << config.use_gpu << "\n";
os << GenSpaces(num_spaces) << "device: " << config.device << "\n";
os << GenSpaces(num_spaces)
<< "fraction_of_gpu_memory: " << config.fraction_of_gpu_memory << "\n";
os << GenSpaces(num_spaces) << "prog_file: " << config.prog_file << "\n";
os << GenSpaces(num_spaces) << "param_file: " << config.param_file << "\n";
os << GenSpaces(num_spaces)
<< "specify_input_name: " << config.specify_input_name << "\n";
num_spaces--;
os << GenSpaces(num_spaces) << "}\n";
return os;
}
std::ostream &operator<<(std::ostream &os,
const contrib::AnalysisConfig &config) {
os << GenSpaces(num_spaces) << "contrib::AnalysisConfig {\n";
num_spaces++;
os << *reinterpret_cast<const NativeConfig *>(&config);
os << GenSpaces(num_spaces) << "enable_ir_optim: " << config.enable_ir_optim
<< "\n";
os << GenSpaces(num_spaces)
<< "use_feed_fetch_ops: " << config.use_feed_fetch_ops << "\n";
os << GenSpaces(num_spaces) << "use_tensorrt: " << config.use_tensorrt()
<< "\n";
os << GenSpaces(num_spaces) << "use_mkldnn: " << config.use_mkldnn() << "\n";
num_spaces--;
os << GenSpaces(num_spaces) << "}\n";
return os;
}
} // namespace inference
} // namespace paddle
...@@ -19,13 +19,16 @@ ...@@ -19,13 +19,16 @@
#include <string> #include <string>
#include <thread> // NOLINT #include <thread> // NOLINT
#include <vector> #include <vector>
#include "paddle/fluid/framework/ir/fuse_pass_base.h" #include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/scope.h" #include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/inference/analysis/analyzer.h" #include "paddle/fluid/inference/analysis/analyzer.h"
#include "paddle/fluid/inference/analysis/ut_helper.h" #include "paddle/fluid/inference/analysis/ut_helper.h"
#include "paddle/fluid/inference/api/analysis_predictor.h" #include "paddle/fluid/inference/api/analysis_predictor.h"
#include "paddle/fluid/inference/api/helper.h"
#include "paddle/fluid/inference/api/paddle_inference_pass.h" #include "paddle/fluid/inference/api/paddle_inference_pass.h"
#include "paddle/fluid/inference/api/helper.h"
#include "paddle/fluid/inference/tests/api/config_printer.h"
#include "paddle/fluid/inference/tests/test_helper.h" #include "paddle/fluid/inference/tests/test_helper.h"
#include "paddle/fluid/platform/profiler.h" #include "paddle/fluid/platform/profiler.h"
...@@ -38,10 +41,18 @@ DEFINE_int32(num_threads, 1, "Running the inference program in multi-threads."); ...@@ -38,10 +41,18 @@ DEFINE_int32(num_threads, 1, "Running the inference program in multi-threads.");
DEFINE_bool(use_analysis, true, DEFINE_bool(use_analysis, true,
"Running the inference program in analysis mode."); "Running the inference program in analysis mode.");
DECLARE_bool(profile);
namespace paddle { namespace paddle {
namespace inference { namespace inference {
using contrib::AnalysisConfig; void PrintConfig(const PaddlePredictor::Config *config, bool use_analysis) {
if (use_analysis) {
LOG(INFO) << *reinterpret_cast<const contrib::AnalysisConfig *>(config);
return;
}
LOG(INFO) << *config;
}
void CompareResult(const std::vector<PaddleTensor> &outputs, void CompareResult(const std::vector<PaddleTensor> &outputs,
const std::vector<PaddleTensor> &ref_outputs) { const std::vector<PaddleTensor> &ref_outputs) {
...@@ -77,12 +88,13 @@ void CompareResult(const std::vector<PaddleTensor> &outputs, ...@@ -77,12 +88,13 @@ void CompareResult(const std::vector<PaddleTensor> &outputs,
} }
std::unique_ptr<PaddlePredictor> CreateTestPredictor( std::unique_ptr<PaddlePredictor> CreateTestPredictor(
const AnalysisConfig &config, bool use_analysis = true) { const PaddlePredictor::Config *config, bool use_analysis = true) {
if (use_analysis) { if (use_analysis) {
return CreatePaddlePredictor<contrib::AnalysisConfig>(config); return CreatePaddlePredictor<contrib::AnalysisConfig>(
} else { *(reinterpret_cast<const contrib::AnalysisConfig *>(config)));
return CreatePaddlePredictor<NativeConfig>(config);
} }
return CreatePaddlePredictor<NativeConfig>(
*(reinterpret_cast<const NativeConfig *>(config)));
} }
size_t GetSize(const PaddleTensor &out) { return VecReduceToInt(out.shape); } size_t GetSize(const PaddleTensor &out) { return VecReduceToInt(out.shape); }
...@@ -111,11 +123,23 @@ std::unordered_map<std::string, int> GetFuseStatis(PaddlePredictor *predictor, ...@@ -111,11 +123,23 @@ std::unordered_map<std::string, int> GetFuseStatis(PaddlePredictor *predictor,
} }
void SetFakeImageInput(std::vector<std::vector<PaddleTensor>> *inputs, void SetFakeImageInput(std::vector<std::vector<PaddleTensor>> *inputs,
const std::string &dirname) { const std::string &dirname, bool is_combined = true,
std::string model_filename = "model",
std::string params_filename = "params") {
// Set fake_image_data // Set fake_image_data
PADDLE_ENFORCE_EQ(FLAGS_test_all_data, 0, "Only have single batch of data."); PADDLE_ENFORCE_EQ(FLAGS_test_all_data, 0, "Only have single batch of data.");
std::vector<std::vector<int64_t>> feed_target_shapes = std::vector<std::vector<int64_t>> feed_target_shapes = GetFeedTargetShapes(
GetFeedTargetShapes(dirname, true, "model", "params"); dirname, is_combined, model_filename, params_filename);
std::ostringstream os;
for (size_t i = 0; i < feed_target_shapes.size(); ++i) {
os << "feed target " << i << ": {" << feed_target_shapes[i][0];
for (size_t j = 1; j < feed_target_shapes[i].size(); ++j) {
os << ", " << feed_target_shapes[i][j];
}
os << "}\n";
}
LOG(INFO) << os.str();
int dim1 = feed_target_shapes[0][1]; int dim1 = feed_target_shapes[0][1];
int dim2 = feed_target_shapes[0][2]; int dim2 = feed_target_shapes[0][2];
int dim3 = feed_target_shapes[0][3]; int dim3 = feed_target_shapes[0][3];
...@@ -139,25 +163,43 @@ void SetFakeImageInput(std::vector<std::vector<PaddleTensor>> *inputs, ...@@ -139,25 +163,43 @@ void SetFakeImageInput(std::vector<std::vector<PaddleTensor>> *inputs,
} }
void TestOneThreadPrediction( void TestOneThreadPrediction(
const AnalysisConfig &config, const PaddlePredictor::Config *config,
const std::vector<std::vector<PaddleTensor>> &inputs, const std::vector<std::vector<PaddleTensor>> &inputs,
std::vector<PaddleTensor> *outputs, bool use_analysis = true) { std::vector<PaddleTensor> *outputs, bool use_analysis = true) {
int batch_size = FLAGS_batch_size; int batch_size = FLAGS_batch_size;
int num_times = FLAGS_repeat; int num_times = FLAGS_repeat;
auto predictor = CreateTestPredictor(config, use_analysis); auto predictor = CreateTestPredictor(config, use_analysis);
Timer timer;
timer.tic(); // warmup run
for (int i = 0; i < num_times; i++) { LOG(INFO) << "Warm up run...";
for (size_t j = 0; j < inputs.size(); j++) { {
predictor->Run(inputs[j], outputs); Timer warmup_timer;
warmup_timer.tic();
predictor->Run(inputs[0], outputs, batch_size);
PrintTime(batch_size, 1, 1, 0, warmup_timer.toc(), 1);
#if !defined(_WIN32)
if (FLAGS_profile) {
paddle::platform::ResetProfiler();
}
#endif
}
LOG(INFO) << "Run " << num_times << " times...";
{
Timer run_timer;
run_timer.tic();
for (int i = 0; i < num_times; i++) {
for (size_t j = 0; j < inputs.size(); j++) {
predictor->Run(inputs[j], outputs, batch_size);
}
} }
PrintTime(batch_size, num_times, 1, 0, run_timer.toc() / num_times,
inputs.size());
} }
PrintTime(batch_size, num_times, 1, 0, timer.toc() / num_times,
inputs.size());
} }
void TestMultiThreadPrediction( void TestMultiThreadPrediction(
const AnalysisConfig &config, const PaddlePredictor::Config *config,
const std::vector<std::vector<PaddleTensor>> &inputs, const std::vector<std::vector<PaddleTensor>> &inputs,
std::vector<PaddleTensor> *outputs, int num_threads, std::vector<PaddleTensor> *outputs, int num_threads,
bool use_analysis = true) { bool use_analysis = true) {
...@@ -200,12 +242,11 @@ void TestMultiThreadPrediction( ...@@ -200,12 +242,11 @@ void TestMultiThreadPrediction(
} }
} }
void TestPrediction(const AnalysisConfig &config, void TestPrediction(const PaddlePredictor::Config *config,
const std::vector<std::vector<PaddleTensor>> &inputs, const std::vector<std::vector<PaddleTensor>> &inputs,
std::vector<PaddleTensor> *outputs, int num_threads, std::vector<PaddleTensor> *outputs, int num_threads,
bool use_analysis = FLAGS_use_analysis) { bool use_analysis = FLAGS_use_analysis) {
LOG(INFO) << "use_analysis: " << use_analysis PrintConfig(config, use_analysis);
<< ", use_mkldnn: " << config.use_mkldnn();
if (num_threads == 1) { if (num_threads == 1) {
TestOneThreadPrediction(config, inputs, outputs, use_analysis); TestOneThreadPrediction(config, inputs, outputs, use_analysis);
} else { } else {
...@@ -215,9 +256,9 @@ void TestPrediction(const AnalysisConfig &config, ...@@ -215,9 +256,9 @@ void TestPrediction(const AnalysisConfig &config,
} }
void CompareNativeAndAnalysis( void CompareNativeAndAnalysis(
const AnalysisConfig &config, const PaddlePredictor::Config *config,
const std::vector<std::vector<PaddleTensor>> &inputs) { const std::vector<std::vector<PaddleTensor>> &inputs) {
LOG(INFO) << "use_mkldnn: " << config.use_mkldnn(); PrintConfig(config, true);
std::vector<PaddleTensor> native_outputs, analysis_outputs; std::vector<PaddleTensor> native_outputs, analysis_outputs;
TestOneThreadPrediction(config, inputs, &native_outputs, false); TestOneThreadPrediction(config, inputs, &native_outputs, false);
TestOneThreadPrediction(config, inputs, &analysis_outputs, true); TestOneThreadPrediction(config, inputs, &analysis_outputs, true);
......
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. /* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. you may not use this file except in compliance with the License.
// You may obtain a copy of the License at You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0 http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and See the License for the specific language governing permissions and
// limitations under the License. limitations under the License. */
#include <gflags/gflags.h> #include <gflags/gflags.h>
#include <glog/logging.h> #include <glog/logging.h>
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include "paddle/fluid/inference/analysis/analyzer.h"
#include "paddle/fluid/inference/api/helper.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/paddle_inference_pass.h"
#include "paddle/fluid/inference/tests/api/tester_helper.h" #include "paddle/fluid/inference/tests/api/tester_helper.h"
namespace paddle { namespace paddle {
using paddle::contrib::AnalysisConfig; namespace inference {
DEFINE_string(dirname, "", "Directory of the inference model."); DEFINE_bool(use_tensorrt, true, "Test the performance of TensorRT engine.");
DEFINE_string(prog_filename, "", "Name of model file.");
NativeConfig GetConfigNative() { DEFINE_string(param_filename, "", "Name of parameters file.");
NativeConfig config;
config.model_dir = FLAGS_dirname; template <typename ConfigType>
// LOG(INFO) << "dirname " << config.model_dir; void SetConfig(ConfigType* config, std::string model_dir, bool use_gpu,
config.fraction_of_gpu_memory = 0.15; bool use_tensorrt = false, int batch_size = -1) {
config.use_gpu = true; if (!FLAGS_prog_filename.empty() && !FLAGS_param_filename.empty()) {
config.device = 0; config->prog_file = model_dir + "/" + FLAGS_prog_filename;
return config; config->param_file = model_dir + "/" + FLAGS_param_filename;
} } else {
config->model_dir = model_dir;
void PrepareTRTConfig(AnalysisConfig *config) { }
config->model_dir = FLAGS_dirname + "/" + "mobilenet"; if (use_gpu) {
config->fraction_of_gpu_memory = 0.15; config->use_gpu = true;
config->EnableTensorRtEngine(1 << 10, 5); config->device = 0;
config->pass_builder()->DeletePass("conv_bn_fuse_pass"); config->fraction_of_gpu_memory = 0.15;
config->pass_builder()->DeletePass("fc_fuse_pass"); }
config->pass_builder()->TurnOnDebug();
} }
void PrepareInputs(std::vector<PaddleTensor> *tensors, int batch_size) { template <>
PADDLE_ENFORCE_EQ(tensors->size(), 1UL); void SetConfig<contrib::AnalysisConfig>(contrib::AnalysisConfig* config,
auto &tensor = tensors->front(); std::string model_dir, bool use_gpu,
int height = 224; bool use_tensorrt, int batch_size) {
int width = 224; if (!FLAGS_prog_filename.empty() && !FLAGS_param_filename.empty()) {
float *data = new float[batch_size * 3 * height * width]; config->prog_file = model_dir + "/" + FLAGS_prog_filename;
memset(data, 0, sizeof(float) * (batch_size * 3 * height * width)); config->param_file = model_dir + "/" + FLAGS_param_filename;
data[0] = 1.0f; } else {
config->model_dir = model_dir;
// Prepare inputs }
tensor.name = "input_0"; if (use_gpu) {
tensor.shape = std::vector<int>({batch_size, 3, height, width}); config->use_gpu = true;
tensor.data = PaddleBuf(static_cast<void *>(data), config->device = 0;
sizeof(float) * (batch_size * 3 * height * width)); config->fraction_of_gpu_memory = 0.15;
tensor.dtype = PaddleDType::FLOAT32; if (use_tensorrt) {
config->EnableTensorRtEngine(1 << 10, batch_size);
config->pass_builder()->DeletePass("conv_bn_fuse_pass");
config->pass_builder()->DeletePass("fc_fuse_pass");
config->pass_builder()->TurnOnDebug();
} else {
config->enable_ir_optim = true;
}
}
} }
void CompareTensorRTWithFluid(int batch_size, std::string model_dirname) { void profile(std::string model_dir, bool use_analysis, bool use_tensorrt) {
auto config0 = GetConfigNative(); std::vector<std::vector<PaddleTensor>> inputs_all;
config0.model_dir = model_dirname; if (!FLAGS_prog_filename.empty() && !FLAGS_param_filename.empty()) {
SetFakeImageInput(&inputs_all, model_dir, true, FLAGS_prog_filename,
AnalysisConfig config1(true); FLAGS_param_filename);
PrepareTRTConfig(&config1); } else {
config1.model_dir = model_dirname; SetFakeImageInput(&inputs_all, model_dir, false, "__model__", "");
auto predictor0 = CreatePaddlePredictor<NativeConfig>(config0);
auto predictor1 = CreatePaddlePredictor(config1);
// Prepare inputs
std::vector<PaddleTensor> paddle_tensor_feeds(1);
PrepareInputs(&paddle_tensor_feeds, batch_size);
// Prepare outputs
std::vector<PaddleTensor> outputs0;
std::vector<PaddleTensor> outputs1;
CHECK(predictor0->Run(paddle_tensor_feeds, &outputs0));
CHECK(predictor1->Run(paddle_tensor_feeds, &outputs1, batch_size));
const size_t num_elements = outputs0.front().data.length() / sizeof(float);
const size_t num_elements1 = outputs1.front().data.length() / sizeof(float);
EXPECT_EQ(num_elements, num_elements1);
auto *data0 = static_cast<float *>(outputs0.front().data.data());
auto *data1 = static_cast<float *>(outputs1.front().data.data());
ASSERT_GT(num_elements, 0UL);
for (size_t i = 0; i < std::min(num_elements, num_elements1); i++) {
EXPECT_NEAR(data0[i], data1[i], 1e-3);
} }
}
TEST(trt_models_test, mobilenet) { std::vector<PaddleTensor> outputs;
CompareTensorRTWithFluid(1, FLAGS_dirname + "/" + "mobilenet"); if (use_analysis || use_tensorrt) {
} contrib::AnalysisConfig config(true);
TEST(trt_models_test, resnet50) { SetConfig<contrib::AnalysisConfig>(&config, model_dir, true, use_tensorrt,
CompareTensorRTWithFluid(1, FLAGS_dirname + "/" + "resnet50"); FLAGS_batch_size);
} TestPrediction(reinterpret_cast<PaddlePredictor::Config*>(&config),
TEST(trt_models_test, resnext50) { inputs_all, &outputs, FLAGS_num_threads, true);
CompareTensorRTWithFluid(1, FLAGS_dirname + "/" + "resnext50"); } else {
NativeConfig config;
SetConfig<NativeConfig>(&config, model_dir, true, false);
TestPrediction(reinterpret_cast<PaddlePredictor::Config*>(&config),
inputs_all, &outputs, FLAGS_num_threads, false);
}
} }
TEST(trt_models_test, raw_gpu) { void compare(std::string model_dir, bool use_tensorrt) {
std::string model_dir = FLAGS_dirname + "/" + "mobilenet"; std::vector<std::vector<PaddleTensor>> inputs_all;
auto config0 = GetConfigNative(); if (!FLAGS_prog_filename.empty() && !FLAGS_param_filename.empty()) {
config0.model_dir = model_dir; SetFakeImageInput(&inputs_all, model_dir, true, FLAGS_prog_filename,
int batch_size = 2; FLAGS_param_filename);
} else {
AnalysisConfig config1(true); SetFakeImageInput(&inputs_all, model_dir, false, "__model__", "");
config1.fraction_of_gpu_memory = 0.1; }
config1.enable_ir_optim = true;
config1.model_dir = model_dir;
auto predictor0 = CreatePaddlePredictor<NativeConfig>(config0); std::vector<PaddleTensor> native_outputs;
auto predictor1 = CreatePaddlePredictor(config1); NativeConfig native_config;
SetConfig<NativeConfig>(&native_config, model_dir, true, false,
FLAGS_batch_size);
TestOneThreadPrediction(
reinterpret_cast<PaddlePredictor::Config*>(&native_config), inputs_all,
&native_outputs, false);
std::vector<PaddleTensor> analysis_outputs;
contrib::AnalysisConfig analysis_config(true);
SetConfig<contrib::AnalysisConfig>(&analysis_config, model_dir, true,
use_tensorrt, FLAGS_batch_size);
TestOneThreadPrediction(
reinterpret_cast<PaddlePredictor::Config*>(&analysis_config), inputs_all,
&analysis_outputs, true);
CompareResult(native_outputs, analysis_outputs);
}
// Prepare inputs TEST(TensorRT_mobilenet, compare) {
std::vector<PaddleTensor> paddle_tensor_feeds(1); std::string model_dir = FLAGS_infer_model + "/mobilenet";
PrepareInputs(&paddle_tensor_feeds, batch_size); compare(model_dir, /* use_tensorrt */ true);
}
// Prepare outputs TEST(TensorRT_resnet50, compare) {
std::vector<PaddleTensor> outputs0; std::string model_dir = FLAGS_infer_model + "/resnet50";
std::vector<PaddleTensor> outputs1; compare(model_dir, /* use_tensorrt */ true);
CHECK(predictor0->Run(paddle_tensor_feeds, &outputs0)); }
CHECK(predictor1->Run(paddle_tensor_feeds, &outputs1, batch_size));
const size_t num_elements = outputs0.front().data.length() / sizeof(float); TEST(TensorRT_resnext50, compare) {
const size_t num_elements1 = outputs1.front().data.length() / sizeof(float); std::string model_dir = FLAGS_infer_model + "/resnext50";
EXPECT_EQ(num_elements, num_elements1); compare(model_dir, /* use_tensorrt */ true);
}
auto *data0 = static_cast<float *>(outputs0.front().data.data()); TEST(TensorRT_resnext50, profile) {
auto *data1 = static_cast<float *>(outputs1.front().data.data()); std::string model_dir = FLAGS_infer_model + "/resnext50";
profile(model_dir, /* use_analysis */ true, FLAGS_use_tensorrt);
}
ASSERT_GT(num_elements, 0UL); TEST(TensorRT_mobilenet, analysis) {
for (size_t i = 0; i < std::min(num_elements, num_elements1); i++) { std::string model_dir = FLAGS_infer_model + "/" + "mobilenet";
EXPECT_NEAR(data0[i], data1[i], 1e-3); compare(model_dir, /* use_tensorrt */ false);
}
} }
} // namespace inference
} // namespace paddle } // namespace paddle
USE_PASS(tensorrt_subgraph_pass); USE_PASS(tensorrt_subgraph_pass);
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册