diff --git a/paddle/fluid/inference/analysis/analyzer_lac_tester.cc b/paddle/fluid/inference/analysis/analyzer_lac_tester.cc
index a6e8351c4f92a9591d6af5f4c8916a4a7f2cef0a..1df1ade25f2ee0eeb7ece727bdd3c7fcb5d2be4a 100644
--- a/paddle/fluid/inference/analysis/analyzer_lac_tester.cc
+++ b/paddle/fluid/inference/analysis/analyzer_lac_tester.cc
@@ -178,6 +178,7 @@ void TestLACPrediction(const std::string &model_path,
     cfg.device = 0;
     cfg.specify_input_name = true;
     cfg.enable_ir_optim = true;
+    cfg.ir_passes.push_back("fc_gru_fuse_pass");
     predictor =
         CreatePaddlePredictor<AnalysisConfig, PaddleEngineKind::kAnalysis>(cfg);
   } else {
@@ -208,13 +209,6 @@ void TestLACPrediction(const std::string &model_path,
   PrintTime(timer.toc(), batch_size, repeat);
 
   // check result
-  if (use_analysis) {
-    // run once for comparion as reference
-    auto ref_predictor =
-        CreatePaddlePredictor<NativeConfig, PaddleEngineKind::kNative>(config);
-    ref_predictor->Run(input_slots, &ref_outputs_slots);
-  }
-
   EXPECT_EQ(outputs_slots.size(), 1UL);
   auto &out = outputs_slots[0];
   size_t size = std::accumulate(out.shape.begin(), out.shape.end(), 1,
@@ -228,6 +222,10 @@ void TestLACPrediction(const std::string &model_path,
   }
 
   if (use_analysis) {
+    // run once for comparion as reference
+    auto ref_predictor =
+        CreatePaddlePredictor<NativeConfig, PaddleEngineKind::kNative>(config);
+    ref_predictor->Run(input_slots, &ref_outputs_slots);
     EXPECT_EQ(ref_outputs_slots.size(), outputs_slots.size());
     auto &ref_out = ref_outputs_slots[0];
     size_t ref_size =
@@ -256,12 +254,9 @@ void TestLACPrediction(const std::string &model_path,
     }
     LOG(INFO) << "has num ops: " << num_ops;
     ASSERT_TRUE(fuse_statis.count("fc_fuse"));
-    ASSERT_TRUE(fuse_statis.count("fc_gru_fuse"));
-    LOG(INFO) << "fc fuse num:" << fuse_statis.at("fc_fuse");
-    LOG(INFO) << "fc gru fuse num:" << fuse_statis.at("fc_gru_fuse");
-
     // ASSERT_TRUE(fuse_statis.count("fc_gru_fuse"));
-    // LOG(INFO) << fuse_statis.at("fc_gru_fuse");
+    LOG(INFO) << "fc fuse num:" << fuse_statis.at("fc_fuse");
+    // LOG(INFO) << "fc gru fuse num:" << fuse_statis.at("fc_gru_fuse");
   }
 }
 
diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc
index a8fa677202d8429c274a6e3fdfd18ef5d48620c2..82d673fd1512412d850e09200046fdfa351ebede 100644
--- a/paddle/fluid/inference/api/analysis_predictor.cc
+++ b/paddle/fluid/inference/api/analysis_predictor.cc
@@ -22,12 +22,25 @@
 #include "paddle/fluid/inference/api/paddle_inference_api.h"
 #include "paddle/fluid/inference/api/paddle_inference_pass.h"
 #include "paddle/fluid/inference/utils/singleton.h"
+#include "paddle/fluid/platform/profiler.h"
+
+DECLARE_bool(profile);
 
 namespace paddle {
 
 bool AnalysisPredictor::Init(
     const std::shared_ptr<framework::Scope>& parent_scope) {
   VLOG(3) << "Predictor::init()";
+#if !defined(_WIN32)
+  if (FLAGS_profile) {
+    LOG(WARNING) << "Profiler is actived, might affect the performance";
+    LOG(INFO) << "You can turn off by set gflags '-profile false'";
+    auto tracking_device = config_.use_gpu ? platform::ProfilerState::kAll
+                                           : platform::ProfilerState::kCPU;
+    platform::EnableProfiler(tracking_device);
+  }
+#endif
+
   if (config_.use_gpu) {
     place_ = paddle::platform::CUDAPlace(config_.device);
     LOG(WARNING) << "ir optimize only supports CPU currently";