diff --git a/paddle/fluid/inference/tests/api/tester_helper.h b/paddle/fluid/inference/tests/api/tester_helper.h index 5cc54ed299c50b48c83de2742b715b16cf1f8cd0..d13469a8482304d04b99c96e70bac5c8b90e4043 100644 --- a/paddle/fluid/inference/tests/api/tester_helper.h +++ b/paddle/fluid/inference/tests/api/tester_helper.h @@ -55,6 +55,9 @@ DEFINE_bool(record_benchmark, false, DEFINE_double(accuracy, 1e-3, "Result Accuracy."); DEFINE_double(quantized_accuracy, 1e-2, "Result Quantized Accuracy."); DEFINE_bool(zero_copy, false, "Use ZeroCopy to speedup Feed/Fetch."); +DEFINE_bool(warmup, false, + "Use warmup to calculate elapsed_time more accurately. " + "To reduce CI time, it sets false in default."); DECLARE_bool(profile); DECLARE_int32(paddle_num_threads); @@ -367,7 +370,9 @@ void TestOneThreadPrediction( const std::vector> &inputs, std::vector> *outputs, bool use_analysis = true) { auto predictor = CreateTestPredictor(config, use_analysis); - PredictionWarmUp(predictor.get(), inputs, outputs, 1, 0); + if (FLAGS_warmup) { + PredictionWarmUp(predictor.get(), inputs, outputs, 1, 0); + } PredictionRun(predictor.get(), inputs, outputs, 1, 0); } @@ -395,7 +400,10 @@ void TestMultiThreadPrediction( ->SetMkldnnThreadID(static_cast(tid) + 1); } #endif - PredictionWarmUp(predictor.get(), inputs, &outputs_tid, num_threads, tid); + if (FLAGS_warmup) { + PredictionWarmUp(predictor.get(), inputs, &outputs_tid, num_threads, + tid); + } PredictionRun(predictor.get(), inputs, &outputs_tid, num_threads, tid); }); }