提交 984aa905 编写于 作者: W Wojciech Uss 提交者: Tao Luo

improved unit test output (#17266)

added printing data type to differentiate int8 and fp32 latency results

test=develop
上级 65541d83
......@@ -26,14 +26,20 @@
#include <sstream>
#include <string>
#include <vector>
#include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/port.h"
#include "paddle/fluid/string/printf.h"
extern std::string paddle::framework::DataTypeToString(
const framework::proto::VarType::Type type);
namespace paddle {
namespace inference {
using paddle::framework::DataTypeToString;
// Timer for timer
class Timer {
public:
......@@ -267,17 +273,20 @@ static std::string DescribeZeroCopyTensor(const ZeroCopyTensor &tensor) {
}
static void PrintTime(int batch_size, int repeat, int num_threads, int tid,
double batch_latency, int epoch = 1) {
double batch_latency, int epoch = 1,
const framework::proto::VarType::Type data_type =
framework::proto::VarType::FP32) {
PADDLE_ENFORCE(batch_size > 0, "Non-positive batch size.");
double sample_latency = batch_latency / batch_size;
LOG(INFO) << "====== threads: " << num_threads << ", thread id: " << tid
<< " ======";
LOG(INFO) << "====== batch_size: " << batch_size << ", iterations: " << epoch
LOG(INFO) << "====== batch size: " << batch_size << ", iterations: " << epoch
<< ", repetitions: " << repeat << " ======";
LOG(INFO) << "====== batch latency: " << batch_latency
<< "ms, number of samples: " << batch_size * epoch
<< ", sample latency: " << sample_latency
<< "ms, fps: " << 1000.f / sample_latency << " ======";
<< "ms, fps: " << 1000.f / sample_latency
<< ", data type: " << DataTypeToString(data_type) << " ======";
}
static bool IsFileExists(const std::string &path) {
......
......@@ -65,6 +65,8 @@ DECLARE_int32(paddle_num_threads);
namespace paddle {
namespace inference {
using paddle::framework::proto::VarType;
template <typename T>
constexpr paddle::PaddleDType GetPaddleDType();
......@@ -293,7 +295,8 @@ void ConvertPaddleTensorToZeroCopyTensor(
void PredictionWarmUp(PaddlePredictor *predictor,
const std::vector<std::vector<PaddleTensor>> &inputs,
std::vector<std::vector<PaddleTensor>> *outputs,
int num_threads, int tid) {
int num_threads, int tid,
const VarType::Type data_type = VarType::FP32) {
int batch_size = FLAGS_batch_size;
LOG(INFO) << "Running thread " << tid << ", warm up run...";
if (FLAGS_zero_copy) {
......@@ -307,7 +310,7 @@ void PredictionWarmUp(PaddlePredictor *predictor,
} else {
predictor->ZeroCopyRun();
}
PrintTime(batch_size, 1, num_threads, tid, warmup_timer.toc(), 1);
PrintTime(batch_size, 1, num_threads, tid, warmup_timer.toc(), 1, data_type);
if (FLAGS_profile) {
paddle::platform::ResetProfiler();
}
......@@ -316,7 +319,8 @@ void PredictionWarmUp(PaddlePredictor *predictor,
void PredictionRun(PaddlePredictor *predictor,
const std::vector<std::vector<PaddleTensor>> &inputs,
std::vector<std::vector<PaddleTensor>> *outputs,
int num_threads, int tid) {
int num_threads, int tid,
const VarType::Type data_type = VarType::FP32) {
int num_times = FLAGS_repeat;
int iterations = inputs.size(); // process the whole dataset ...
if (FLAGS_iterations > 0 &&
......@@ -355,7 +359,7 @@ void PredictionRun(PaddlePredictor *predictor,
auto batch_latency = elapsed_time / (iterations * num_times);
PrintTime(FLAGS_batch_size, num_times, num_threads, tid, batch_latency,
iterations);
iterations, data_type);
if (FLAGS_record_benchmark) {
Benchmark benchmark;
benchmark.SetName(FLAGS_model_name);
......@@ -368,12 +372,13 @@ void PredictionRun(PaddlePredictor *predictor,
void TestOneThreadPrediction(
const PaddlePredictor::Config *config,
const std::vector<std::vector<PaddleTensor>> &inputs,
std::vector<std::vector<PaddleTensor>> *outputs, bool use_analysis = true) {
std::vector<std::vector<PaddleTensor>> *outputs, bool use_analysis = true,
const VarType::Type data_type = VarType::FP32) {
auto predictor = CreateTestPredictor(config, use_analysis);
if (FLAGS_warmup) {
PredictionWarmUp(predictor.get(), inputs, outputs, 1, 0);
PredictionWarmUp(predictor.get(), inputs, outputs, 1, 0, data_type);
}
PredictionRun(predictor.get(), inputs, outputs, 1, 0);
PredictionRun(predictor.get(), inputs, outputs, 1, 0, data_type);
}
void TestMultiThreadPrediction(
......@@ -505,13 +510,14 @@ void CompareQuantizedAndAnalysis(
auto *cfg = reinterpret_cast<const PaddlePredictor::Config *>(config);
PrintConfig(cfg, true);
std::vector<std::vector<PaddleTensor>> analysis_outputs;
TestOneThreadPrediction(cfg, inputs, &analysis_outputs, true);
TestOneThreadPrediction(cfg, inputs, &analysis_outputs, true, VarType::FP32);
LOG(INFO) << "--- INT8 prediction start ---";
auto *qcfg = reinterpret_cast<const PaddlePredictor::Config *>(qconfig);
PrintConfig(qcfg, true);
std::vector<std::vector<PaddleTensor>> quantized_outputs;
TestOneThreadPrediction(qcfg, inputs, &quantized_outputs, true);
TestOneThreadPrediction(qcfg, inputs, &quantized_outputs, true,
VarType::INT8);
LOG(INFO) << "--- comparing outputs --- ";
CompareTopAccuracy(quantized_outputs, analysis_outputs);
......@@ -640,7 +646,7 @@ static bool CompareTensorData(const framework::LoDTensor &a,
}
for (size_t i = 0; i < a_size; i++) {
if (a.type() == framework::proto::VarType::FP32) {
if (a.type() == VarType::FP32) {
const auto *a_data = a.data<float>();
const auto *b_data = b.data<float>();
if (std::abs(a_data[i] - b_data[i]) > 1e-3) {
......@@ -649,7 +655,7 @@ static bool CompareTensorData(const framework::LoDTensor &a,
b_data[i]);
return false;
}
} else if (a.type() == framework::proto::VarType::INT64) {
} else if (a.type() == VarType::INT64) {
const auto *a_data = a.data<int64_t>();
const auto *b_data = b.data<int64_t>();
if (std::abs(a_data[i] - b_data[i]) > 1e-3) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册