diff --git a/paddle/fluid/inference/api/helper.h b/paddle/fluid/inference/api/helper.h index c89dd41e0a6283e0723e2925f28c0372cda6a2b2..ab7f55337488f9e4c953210124e47c12e26ed6b1 100644 --- a/paddle/fluid/inference/api/helper.h +++ b/paddle/fluid/inference/api/helper.h @@ -26,14 +26,20 @@ #include #include #include +#include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/inference/api/paddle_inference_api.h" #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/port.h" #include "paddle/fluid/string/printf.h" +extern std::string paddle::framework::DataTypeToString( + const framework::proto::VarType::Type type); + namespace paddle { namespace inference { +using paddle::framework::DataTypeToString; + // Timer for timer class Timer { public: @@ -267,17 +273,20 @@ static std::string DescribeZeroCopyTensor(const ZeroCopyTensor &tensor) { } static void PrintTime(int batch_size, int repeat, int num_threads, int tid, - double batch_latency, int epoch = 1) { + double batch_latency, int epoch = 1, + const framework::proto::VarType::Type data_type = + framework::proto::VarType::FP32) { PADDLE_ENFORCE(batch_size > 0, "Non-positive batch size."); double sample_latency = batch_latency / batch_size; LOG(INFO) << "====== threads: " << num_threads << ", thread id: " << tid << " ======"; - LOG(INFO) << "====== batch_size: " << batch_size << ", iterations: " << epoch + LOG(INFO) << "====== batch size: " << batch_size << ", iterations: " << epoch << ", repetitions: " << repeat << " ======"; LOG(INFO) << "====== batch latency: " << batch_latency << "ms, number of samples: " << batch_size * epoch << ", sample latency: " << sample_latency - << "ms, fps: " << 1000.f / sample_latency << " ======"; + << "ms, fps: " << 1000.f / sample_latency + << ", data type: " << DataTypeToString(data_type) << " ======"; } static bool IsFileExists(const std::string &path) { diff --git a/paddle/fluid/inference/tests/api/tester_helper.h b/paddle/fluid/inference/tests/api/tester_helper.h index 10fc7556994b93776ed15184ba17820cebae07a0..a50810948ff8cb9e0bb92c287a7ab3945d39e089 100644 --- a/paddle/fluid/inference/tests/api/tester_helper.h +++ b/paddle/fluid/inference/tests/api/tester_helper.h @@ -65,6 +65,8 @@ DECLARE_int32(paddle_num_threads); namespace paddle { namespace inference { +using paddle::framework::proto::VarType; + template constexpr paddle::PaddleDType GetPaddleDType(); @@ -293,7 +295,8 @@ void ConvertPaddleTensorToZeroCopyTensor( void PredictionWarmUp(PaddlePredictor *predictor, const std::vector> &inputs, std::vector> *outputs, - int num_threads, int tid) { + int num_threads, int tid, + const VarType::Type data_type = VarType::FP32) { int batch_size = FLAGS_batch_size; LOG(INFO) << "Running thread " << tid << ", warm up run..."; if (FLAGS_zero_copy) { @@ -307,7 +310,7 @@ void PredictionWarmUp(PaddlePredictor *predictor, } else { predictor->ZeroCopyRun(); } - PrintTime(batch_size, 1, num_threads, tid, warmup_timer.toc(), 1); + PrintTime(batch_size, 1, num_threads, tid, warmup_timer.toc(), 1, data_type); if (FLAGS_profile) { paddle::platform::ResetProfiler(); } @@ -316,7 +319,8 @@ void PredictionWarmUp(PaddlePredictor *predictor, void PredictionRun(PaddlePredictor *predictor, const std::vector> &inputs, std::vector> *outputs, - int num_threads, int tid) { + int num_threads, int tid, + const VarType::Type data_type = VarType::FP32) { int num_times = FLAGS_repeat; int iterations = inputs.size(); // process the whole dataset ... if (FLAGS_iterations > 0 && @@ -355,7 +359,7 @@ void PredictionRun(PaddlePredictor *predictor, auto batch_latency = elapsed_time / (iterations * num_times); PrintTime(FLAGS_batch_size, num_times, num_threads, tid, batch_latency, - iterations); + iterations, data_type); if (FLAGS_record_benchmark) { Benchmark benchmark; benchmark.SetName(FLAGS_model_name); @@ -368,12 +372,13 @@ void PredictionRun(PaddlePredictor *predictor, void TestOneThreadPrediction( const PaddlePredictor::Config *config, const std::vector> &inputs, - std::vector> *outputs, bool use_analysis = true) { + std::vector> *outputs, bool use_analysis = true, + const VarType::Type data_type = VarType::FP32) { auto predictor = CreateTestPredictor(config, use_analysis); if (FLAGS_warmup) { - PredictionWarmUp(predictor.get(), inputs, outputs, 1, 0); + PredictionWarmUp(predictor.get(), inputs, outputs, 1, 0, data_type); } - PredictionRun(predictor.get(), inputs, outputs, 1, 0); + PredictionRun(predictor.get(), inputs, outputs, 1, 0, data_type); } void TestMultiThreadPrediction( @@ -505,13 +510,14 @@ void CompareQuantizedAndAnalysis( auto *cfg = reinterpret_cast(config); PrintConfig(cfg, true); std::vector> analysis_outputs; - TestOneThreadPrediction(cfg, inputs, &analysis_outputs, true); + TestOneThreadPrediction(cfg, inputs, &analysis_outputs, true, VarType::FP32); LOG(INFO) << "--- INT8 prediction start ---"; auto *qcfg = reinterpret_cast(qconfig); PrintConfig(qcfg, true); std::vector> quantized_outputs; - TestOneThreadPrediction(qcfg, inputs, &quantized_outputs, true); + TestOneThreadPrediction(qcfg, inputs, &quantized_outputs, true, + VarType::INT8); LOG(INFO) << "--- comparing outputs --- "; CompareTopAccuracy(quantized_outputs, analysis_outputs); @@ -640,7 +646,7 @@ static bool CompareTensorData(const framework::LoDTensor &a, } for (size_t i = 0; i < a_size; i++) { - if (a.type() == framework::proto::VarType::FP32) { + if (a.type() == VarType::FP32) { const auto *a_data = a.data(); const auto *b_data = b.data(); if (std::abs(a_data[i] - b_data[i]) > 1e-3) { @@ -649,7 +655,7 @@ static bool CompareTensorData(const framework::LoDTensor &a, b_data[i]); return false; } - } else if (a.type() == framework::proto::VarType::INT64) { + } else if (a.type() == VarType::INT64) { const auto *a_data = a.data(); const auto *b_data = b.data(); if (std::abs(a_data[i] - b_data[i]) > 1e-3) {