diff --git a/paddle/fluid/inference/analysis/analyzer_lac_tester.cc b/paddle/fluid/inference/analysis/analyzer_lac_tester.cc
index 1df1ade25f2ee0eeb7ece727bdd3c7fcb5d2be4a..5740faa746a3161965f525753b893918c3fd4d2e 100644
--- a/paddle/fluid/inference/analysis/analyzer_lac_tester.cc
+++ b/paddle/fluid/inference/analysis/analyzer_lac_tester.cc
@@ -118,13 +118,6 @@ void GetOneBatch(std::vector<PaddleTensor> *input_slots, DataRecord *data,
   input_slots->assign({input_tensor});
 }
 
-static void PrintTime(const double latency, const int bs, const int repeat) {
-  LOG(INFO) << "===========profile result===========";
-  LOG(INFO) << "batch_size: " << bs << ", repeat: " << repeat
-            << ", avg latency: " << latency / repeat << "ms";
-  LOG(INFO) << "=====================================";
-}
-
 void BenchAllData(const std::string &model_path, const std::string &data_file,
                   const int batch_size, const int repeat) {
   NativeConfig config;
@@ -150,7 +143,7 @@ void BenchAllData(const std::string &model_path, const std::string &data_file,
       sum += timer.toc();
     }
   }
-  PrintTime(sum, batch_size, repeat);
+  PrintTime(batch_size, repeat, 1, 0, sum / repeat);
 }
 
 const int64_t lac_ref_data[] = {24, 25, 25, 25, 38, 30, 31, 14, 15, 44, 24, 25,
@@ -206,7 +199,7 @@ void TestLACPrediction(const std::string &model_path,
   for (int i = 0; i < repeat; i++) {
     predictor->Run(input_slots, &outputs_slots);
   }
-  PrintTime(timer.toc(), batch_size, repeat);
+  PrintTime(batch_size, repeat, 1, 0, timer.toc() / repeat);
 
   // check result
   EXPECT_EQ(outputs_slots.size(), 1UL);
diff --git a/paddle/fluid/inference/analysis/analyzer_ner_tester.cc b/paddle/fluid/inference/analysis/analyzer_ner_tester.cc
index 720a8811db75a91a5774a29dd95285eceabadf83..eaae09b051f6d2d6c90b25312a07c50c4019e120 100644
--- a/paddle/fluid/inference/analysis/analyzer_ner_tester.cc
+++ b/paddle/fluid/inference/analysis/analyzer_ner_tester.cc
@@ -25,6 +25,7 @@ DEFINE_string(infer_model, "", "model path");
 DEFINE_string(infer_data, "", "data path");
 DEFINE_int32(batch_size, 10, "batch size.");
 DEFINE_int32(repeat, 1, "Running the inference program repeat times.");
+DEFINE_bool(test_all_data, false, "Test the all dataset in data file.");
 
 namespace paddle {
 namespace inference {
@@ -35,6 +36,7 @@ struct DataRecord {
   std::vector<size_t> lod;  // two inputs have the same lod info.
   size_t batch_iter{0};
   size_t batch_size{1};
+  size_t num_samples;  // total number of samples
   DataRecord() = default;
   explicit DataRecord(const std::string &path, int batch_size = 1)
       : batch_size(batch_size) {
@@ -81,6 +83,7 @@ struct DataRecord {
       word_data_all.push_back(std::move(word_data));
       mention_data_all.push_back(std::move(mention_data));
     }
+    num_samples = num_lines;
   }
 };
 
@@ -120,21 +123,38 @@ void TestChineseNERPrediction() {
   auto predictor =
       CreatePaddlePredictor<NativeConfig, PaddleEngineKind::kNative>(config);
   std::vector<PaddleTensor> input_slots;
-  DataRecord data(FLAGS_infer_data, FLAGS_batch_size);
+  std::vector<PaddleTensor> outputs;
+  Timer timer;
+
+  if (FLAGS_test_all_data) {
+    LOG(INFO) << "test all data";
+    double sum = 0;
+    size_t num_samples;
+    for (int i = 0; i < FLAGS_repeat; i++) {
+      DataRecord data(FLAGS_infer_data, FLAGS_batch_size);
+      num_samples = data.num_samples;
+      for (size_t bid = 0; bid < num_samples; ++bid) {
+        PrepareInputs(&input_slots, &data, FLAGS_batch_size);
+        timer.tic();
+        predictor->Run(input_slots, &outputs);
+        sum += timer.toc();
+      }
+    }
+    LOG(INFO) << "total number of samples: " << num_samples;
+    PrintTime(FLAGS_batch_size, FLAGS_repeat, 1, 0, sum / FLAGS_repeat);
+    LOG(INFO) << "average latency of each sample: "
+              << sum / FLAGS_repeat / num_samples;
+    return;
+  }
   // Prepare inputs.
+  DataRecord data(FLAGS_infer_data, FLAGS_batch_size);
   PrepareInputs(&input_slots, &data, FLAGS_batch_size);
-  std::vector<PaddleTensor> outputs;
 
-  Timer timer;
   timer.tic();
   for (int i = 0; i < FLAGS_repeat; i++) {
     predictor->Run(input_slots, &outputs);
   }
-  LOG(INFO) << "===========profile result===========";
-  LOG(INFO) << "batch_size: " << FLAGS_batch_size
-            << ", repeat: " << FLAGS_repeat
-            << ", latency: " << timer.toc() / FLAGS_repeat << "ms";
-  LOG(INFO) << "=====================================";
+  PrintTime(FLAGS_batch_size, FLAGS_repeat, 1, 0, timer.toc() / FLAGS_repeat);
 
   PADDLE_ENFORCE(outputs.size(), 1UL);
   auto &out = outputs[0];
diff --git a/paddle/fluid/inference/analysis/analyzer_text_classification_tester.cc b/paddle/fluid/inference/analysis/analyzer_text_classification_tester.cc
index 0e493176c4bfb154de0d079868f9f396813ec48f..65169f8cfcc5bf1e989609666f6e0ba03e42e5ba 100644
--- a/paddle/fluid/inference/analysis/analyzer_text_classification_tester.cc
+++ b/paddle/fluid/inference/analysis/analyzer_text_classification_tester.cc
@@ -31,25 +31,11 @@ DEFINE_int32(repeat, 1, "How many times to repeat run.");
 DEFINE_int32(topn, -1, "Run top n batches of data to save time");
 
 namespace paddle {
-
-template <typename T>
-std::string to_string(const std::vector<T> &vec) {
-  std::stringstream ss;
-  for (const auto &c : vec) {
-    ss << c << " ";
-  }
-  return ss.str();
-}
-
-void PrintTime(const double latency, const int bs, const int repeat) {
-  LOG(INFO) << "===========profile result===========";
-  LOG(INFO) << "batch_size: " << bs << ", repeat: " << repeat
-            << ", avg latency: " << latency / repeat << "ms";
-  LOG(INFO) << "=====================================";
-}
+namespace inference {
 
 struct DataReader {
-  DataReader(const std::string &path) : file(new std::ifstream(path)) {}
+  explicit DataReader(const std::string &path)
+      : file(new std::ifstream(path)) {}
 
   bool NextBatch(PaddleTensor *tensor, int batch_size) {
     PADDLE_ENFORCE_EQ(batch_size, 1);
@@ -107,8 +93,7 @@ void Main(int batch_size) {
       ++num_batches;
     }
   }
-
-  PrintTime(sum, batch_size, num_batches);
+  PrintTime(batch_size, FLAGS_repeat, 1, 0, sum / FLAGS_repeat);
 
   // Get output
   LOG(INFO) << "get outputs " << output_slots.size();
@@ -129,4 +114,5 @@ void Main(int batch_size) {
 
 TEST(text_classification, basic) { Main(FLAGS_batch_size); }
 
+}  // namespace inference
 }  // namespace paddle