提交 984aa905 编写于 作者: W Wojciech Uss 提交者: Tao Luo

improved unit test output (#17266)

added printing data type to differentiate int8 and fp32 latency results

test=develop
上级 65541d83
...@@ -26,14 +26,20 @@ ...@@ -26,14 +26,20 @@
#include <sstream> #include <sstream>
#include <string> #include <string>
#include <vector> #include <vector>
#include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h" #include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/port.h" #include "paddle/fluid/platform/port.h"
#include "paddle/fluid/string/printf.h" #include "paddle/fluid/string/printf.h"
extern std::string paddle::framework::DataTypeToString(
const framework::proto::VarType::Type type);
namespace paddle { namespace paddle {
namespace inference { namespace inference {
using paddle::framework::DataTypeToString;
// Timer for timer // Timer for timer
class Timer { class Timer {
public: public:
...@@ -267,17 +273,20 @@ static std::string DescribeZeroCopyTensor(const ZeroCopyTensor &tensor) { ...@@ -267,17 +273,20 @@ static std::string DescribeZeroCopyTensor(const ZeroCopyTensor &tensor) {
} }
static void PrintTime(int batch_size, int repeat, int num_threads, int tid, static void PrintTime(int batch_size, int repeat, int num_threads, int tid,
double batch_latency, int epoch = 1) { double batch_latency, int epoch = 1,
const framework::proto::VarType::Type data_type =
framework::proto::VarType::FP32) {
PADDLE_ENFORCE(batch_size > 0, "Non-positive batch size."); PADDLE_ENFORCE(batch_size > 0, "Non-positive batch size.");
double sample_latency = batch_latency / batch_size; double sample_latency = batch_latency / batch_size;
LOG(INFO) << "====== threads: " << num_threads << ", thread id: " << tid LOG(INFO) << "====== threads: " << num_threads << ", thread id: " << tid
<< " ======"; << " ======";
LOG(INFO) << "====== batch_size: " << batch_size << ", iterations: " << epoch LOG(INFO) << "====== batch size: " << batch_size << ", iterations: " << epoch
<< ", repetitions: " << repeat << " ======"; << ", repetitions: " << repeat << " ======";
LOG(INFO) << "====== batch latency: " << batch_latency LOG(INFO) << "====== batch latency: " << batch_latency
<< "ms, number of samples: " << batch_size * epoch << "ms, number of samples: " << batch_size * epoch
<< ", sample latency: " << sample_latency << ", sample latency: " << sample_latency
<< "ms, fps: " << 1000.f / sample_latency << " ======"; << "ms, fps: " << 1000.f / sample_latency
<< ", data type: " << DataTypeToString(data_type) << " ======";
} }
static bool IsFileExists(const std::string &path) { static bool IsFileExists(const std::string &path) {
......
...@@ -65,6 +65,8 @@ DECLARE_int32(paddle_num_threads); ...@@ -65,6 +65,8 @@ DECLARE_int32(paddle_num_threads);
namespace paddle { namespace paddle {
namespace inference { namespace inference {
using paddle::framework::proto::VarType;
template <typename T> template <typename T>
constexpr paddle::PaddleDType GetPaddleDType(); constexpr paddle::PaddleDType GetPaddleDType();
...@@ -293,7 +295,8 @@ void ConvertPaddleTensorToZeroCopyTensor( ...@@ -293,7 +295,8 @@ void ConvertPaddleTensorToZeroCopyTensor(
void PredictionWarmUp(PaddlePredictor *predictor, void PredictionWarmUp(PaddlePredictor *predictor,
const std::vector<std::vector<PaddleTensor>> &inputs, const std::vector<std::vector<PaddleTensor>> &inputs,
std::vector<std::vector<PaddleTensor>> *outputs, std::vector<std::vector<PaddleTensor>> *outputs,
int num_threads, int tid) { int num_threads, int tid,
const VarType::Type data_type = VarType::FP32) {
int batch_size = FLAGS_batch_size; int batch_size = FLAGS_batch_size;
LOG(INFO) << "Running thread " << tid << ", warm up run..."; LOG(INFO) << "Running thread " << tid << ", warm up run...";
if (FLAGS_zero_copy) { if (FLAGS_zero_copy) {
...@@ -307,7 +310,7 @@ void PredictionWarmUp(PaddlePredictor *predictor, ...@@ -307,7 +310,7 @@ void PredictionWarmUp(PaddlePredictor *predictor,
} else { } else {
predictor->ZeroCopyRun(); predictor->ZeroCopyRun();
} }
PrintTime(batch_size, 1, num_threads, tid, warmup_timer.toc(), 1); PrintTime(batch_size, 1, num_threads, tid, warmup_timer.toc(), 1, data_type);
if (FLAGS_profile) { if (FLAGS_profile) {
paddle::platform::ResetProfiler(); paddle::platform::ResetProfiler();
} }
...@@ -316,7 +319,8 @@ void PredictionWarmUp(PaddlePredictor *predictor, ...@@ -316,7 +319,8 @@ void PredictionWarmUp(PaddlePredictor *predictor,
void PredictionRun(PaddlePredictor *predictor, void PredictionRun(PaddlePredictor *predictor,
const std::vector<std::vector<PaddleTensor>> &inputs, const std::vector<std::vector<PaddleTensor>> &inputs,
std::vector<std::vector<PaddleTensor>> *outputs, std::vector<std::vector<PaddleTensor>> *outputs,
int num_threads, int tid) { int num_threads, int tid,
const VarType::Type data_type = VarType::FP32) {
int num_times = FLAGS_repeat; int num_times = FLAGS_repeat;
int iterations = inputs.size(); // process the whole dataset ... int iterations = inputs.size(); // process the whole dataset ...
if (FLAGS_iterations > 0 && if (FLAGS_iterations > 0 &&
...@@ -355,7 +359,7 @@ void PredictionRun(PaddlePredictor *predictor, ...@@ -355,7 +359,7 @@ void PredictionRun(PaddlePredictor *predictor,
auto batch_latency = elapsed_time / (iterations * num_times); auto batch_latency = elapsed_time / (iterations * num_times);
PrintTime(FLAGS_batch_size, num_times, num_threads, tid, batch_latency, PrintTime(FLAGS_batch_size, num_times, num_threads, tid, batch_latency,
iterations); iterations, data_type);
if (FLAGS_record_benchmark) { if (FLAGS_record_benchmark) {
Benchmark benchmark; Benchmark benchmark;
benchmark.SetName(FLAGS_model_name); benchmark.SetName(FLAGS_model_name);
...@@ -368,12 +372,13 @@ void PredictionRun(PaddlePredictor *predictor, ...@@ -368,12 +372,13 @@ void PredictionRun(PaddlePredictor *predictor,
void TestOneThreadPrediction( void TestOneThreadPrediction(
const PaddlePredictor::Config *config, const PaddlePredictor::Config *config,
const std::vector<std::vector<PaddleTensor>> &inputs, const std::vector<std::vector<PaddleTensor>> &inputs,
std::vector<std::vector<PaddleTensor>> *outputs, bool use_analysis = true) { std::vector<std::vector<PaddleTensor>> *outputs, bool use_analysis = true,
const VarType::Type data_type = VarType::FP32) {
auto predictor = CreateTestPredictor(config, use_analysis); auto predictor = CreateTestPredictor(config, use_analysis);
if (FLAGS_warmup) { if (FLAGS_warmup) {
PredictionWarmUp(predictor.get(), inputs, outputs, 1, 0); PredictionWarmUp(predictor.get(), inputs, outputs, 1, 0, data_type);
} }
PredictionRun(predictor.get(), inputs, outputs, 1, 0); PredictionRun(predictor.get(), inputs, outputs, 1, 0, data_type);
} }
void TestMultiThreadPrediction( void TestMultiThreadPrediction(
...@@ -505,13 +510,14 @@ void CompareQuantizedAndAnalysis( ...@@ -505,13 +510,14 @@ void CompareQuantizedAndAnalysis(
auto *cfg = reinterpret_cast<const PaddlePredictor::Config *>(config); auto *cfg = reinterpret_cast<const PaddlePredictor::Config *>(config);
PrintConfig(cfg, true); PrintConfig(cfg, true);
std::vector<std::vector<PaddleTensor>> analysis_outputs; std::vector<std::vector<PaddleTensor>> analysis_outputs;
TestOneThreadPrediction(cfg, inputs, &analysis_outputs, true); TestOneThreadPrediction(cfg, inputs, &analysis_outputs, true, VarType::FP32);
LOG(INFO) << "--- INT8 prediction start ---"; LOG(INFO) << "--- INT8 prediction start ---";
auto *qcfg = reinterpret_cast<const PaddlePredictor::Config *>(qconfig); auto *qcfg = reinterpret_cast<const PaddlePredictor::Config *>(qconfig);
PrintConfig(qcfg, true); PrintConfig(qcfg, true);
std::vector<std::vector<PaddleTensor>> quantized_outputs; std::vector<std::vector<PaddleTensor>> quantized_outputs;
TestOneThreadPrediction(qcfg, inputs, &quantized_outputs, true); TestOneThreadPrediction(qcfg, inputs, &quantized_outputs, true,
VarType::INT8);
LOG(INFO) << "--- comparing outputs --- "; LOG(INFO) << "--- comparing outputs --- ";
CompareTopAccuracy(quantized_outputs, analysis_outputs); CompareTopAccuracy(quantized_outputs, analysis_outputs);
...@@ -640,7 +646,7 @@ static bool CompareTensorData(const framework::LoDTensor &a, ...@@ -640,7 +646,7 @@ static bool CompareTensorData(const framework::LoDTensor &a,
} }
for (size_t i = 0; i < a_size; i++) { for (size_t i = 0; i < a_size; i++) {
if (a.type() == framework::proto::VarType::FP32) { if (a.type() == VarType::FP32) {
const auto *a_data = a.data<float>(); const auto *a_data = a.data<float>();
const auto *b_data = b.data<float>(); const auto *b_data = b.data<float>();
if (std::abs(a_data[i] - b_data[i]) > 1e-3) { if (std::abs(a_data[i] - b_data[i]) > 1e-3) {
...@@ -649,7 +655,7 @@ static bool CompareTensorData(const framework::LoDTensor &a, ...@@ -649,7 +655,7 @@ static bool CompareTensorData(const framework::LoDTensor &a,
b_data[i]); b_data[i]);
return false; return false;
} }
} else if (a.type() == framework::proto::VarType::INT64) { } else if (a.type() == VarType::INT64) {
const auto *a_data = a.data<int64_t>(); const auto *a_data = a.data<int64_t>();
const auto *b_data = b.data<int64_t>(); const auto *b_data = b.data<int64_t>();
if (std::abs(a_data[i] - b_data[i]) > 1e-3) { if (std::abs(a_data[i] - b_data[i]) > 1e-3) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册