未验证 提交 322bb8d5 编写于 作者: T Tao Luo 提交者: GitHub

Merge pull request #14825 from NHZlX/add_benchmark_for_trt

an sample of recording benchmark to file for trt
set(INFERENCE_EXTRA_DEPS paddle_inference_api paddle_fluid_api ir_pass_manager analysis_predictor)
set(INFERENCE_EXTRA_DEPS paddle_inference_api paddle_fluid_api ir_pass_manager analysis_predictor benchmark)
if(WITH_GPU AND TENSORRT_FOUND)
set(INFERENCE_EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} analysis ${analysis_deps} ir_pass_manager analysis_predictor)
......
......@@ -30,8 +30,10 @@
#include "paddle/fluid/inference/api/helper.h"
#include "paddle/fluid/inference/tests/api/config_printer.h"
#include "paddle/fluid/inference/tests/test_helper.h"
#include "paddle/fluid/inference/utils/benchmark.h"
#include "paddle/fluid/platform/profiler.h"
DEFINE_string(model_name, "", "model name");
DEFINE_string(infer_model, "", "model path");
DEFINE_string(infer_data, "", "data file");
DEFINE_int32(batch_size, 1, "batch size.");
......@@ -40,6 +42,8 @@ DEFINE_bool(test_all_data, false, "Test the all dataset in data file.");
DEFINE_int32(num_threads, 1, "Running the inference program in multi-threads.");
DEFINE_bool(use_analysis, true,
"Running the inference program in analysis mode.");
DEFINE_bool(record_benchmark, false,
"Record benchmark after profiling the model");
DECLARE_bool(profile);
DECLARE_int32(paddle_num_threads);
......@@ -192,8 +196,16 @@ void TestOneThreadPrediction(
predictor->Run(inputs[j], outputs, batch_size);
}
}
PrintTime(batch_size, num_times, 1, 0, run_timer.toc() / num_times,
inputs.size());
double latency = run_timer.toc() / num_times;
PrintTime(batch_size, num_times, 1, 0, latency, inputs.size());
if (FLAGS_record_benchmark) {
Benchmark benchmark;
benchmark.SetName(FLAGS_model_name);
benchmark.SetBatchSize(batch_size);
benchmark.SetLatency(latency);
benchmark.PersistToFile("benchmark_record.txt");
}
}
}
......
......@@ -135,6 +135,9 @@ TEST(TensorRT_resnext50, compare) {
TEST(TensorRT_resnext50, profile) {
std::string model_dir = FLAGS_infer_model + "/resnext50";
// Set FLAGS_record_benchmark to true to record benchmark to file.
// FLAGS_record_benchmark=true;
FLAGS_model_name = "resnext50";
profile(model_dir, /* use_analysis */ true, FLAGS_use_tensorrt);
}
......
......@@ -30,7 +30,7 @@ std::string Benchmark::SerializeToString() const {
ss << '\n';
ss << name_ << "\t";
ss << batch_size_ << "\t";
ss << batch_size_ << "\t\t";
ss << num_threads_ << "\t";
ss << latency_ << "\t";
ss << 1000.0 / latency_;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册