From 0c3227a523f816239bb16de0dce9d6413d3c0e42 Mon Sep 17 00:00:00 2001
From: minqiyang <minqiyang@baidu.com>
Date: Thu, 8 Nov 2018 16:00:07 +0800
Subject: [PATCH] Change the origin VLOG level to 10 times

Fix code to support cpplint syntax check

test=develop
---
 .../fluid/framework/data_device_transform.cc  |   4 +-
 .../framework/data_device_transform_test.cu   |   6 +-
 .../framework/details/broadcast_op_handle.cc  |   2 +-
 .../modify_op_lock_and_record_event_pass.cc   |   4 +-
 .../details/multi_devices_graph_pass.cc       |  12 +-
 .../framework/details/reference_count_pass.cc |   4 +-
 .../details/scale_loss_grad_op_handle.cc      |   2 +-
 .../details/sequential_execution_pass.cc      |   4 +-
 .../details/threaded_ssa_graph_executor.cc    |   8 +-
 paddle/fluid/framework/executor.cc            |  34 +-
 paddle/fluid/framework/feed_fetch_method.cc   |   6 +-
 .../framework/ir/attention_lstm_fuse_pass.cc  |  28 +-
 .../ir/conv_bias_mkldnn_fuse_pass.cc          |   4 +-
 .../fluid/framework/ir/conv_bn_fuse_pass.cc   |   6 +-
 .../ir/conv_relu_mkldnn_fuse_pass.cc          |   4 +-
 .../ir/depthwise_conv_mkldnn_pass.cc          |   2 +-
 paddle/fluid/framework/ir/fc_fuse_pass.cc     |   2 +-
 .../framework/ir/fuse_elewise_add_act_pass.cc |  28 +-
 paddle/fluid/framework/ir/graph.cc            |   4 +-
 paddle/fluid/framework/ir/graph.h             |   2 +-
 paddle/fluid/framework/ir/graph_helper.cc     |  19 +-
 .../framework/ir/graph_pattern_detector.cc    |  22 +-
 paddle/fluid/framework/ir/graph_viz_pass.cc   |   2 +-
 .../framework/ir/mkldnn_placement_pass.cc     |   2 +-
 .../framework/ir/multi_batch_merge_pass.cc    |   8 +-
 paddle/fluid/framework/ir/pass.h              |   2 +-
 .../framework/ir/seq_concat_fc_fuse_pass.cc   |  12 +-
 .../ir/seqconv_eltadd_relu_fuse_pass.cc       |   2 +-
 paddle/fluid/framework/lod_rank_table.cc      |   2 +-
 paddle/fluid/framework/mixed_vector_test.cc   |   2 +-
 paddle/fluid/framework/naive_executor.cc      |  14 +-
 paddle/fluid/framework/op_desc.cc             |  32 +-
 paddle/fluid/framework/op_registry.cc         |   6 +-
 paddle/fluid/framework/operator.cc            |  15 +-
 paddle/fluid/framework/parallel_executor.cc   |   2 +-
 paddle/fluid/framework/scope.cc               |   2 +-
 paddle/fluid/framework/selected_rows.cc       |   2 +-
 paddle/fluid/framework/tensor_util.cc         |  24 +-
 paddle/fluid/framework/tensor_util.cu         | 491 +++++++++++++++++-
 paddle/fluid/framework/threadpool.cc          |   2 +-
 paddle/fluid/framework/var_desc.cc            |  28 +-
 paddle/fluid/inference/analysis/analyzer.cc   |   4 +-
 paddle/fluid/inference/analysis/argument.h    |   4 +-
 .../inference/analysis/data_flow_graph.cc     |  10 +-
 .../analysis/data_flow_graph_to_fluid_pass.cc |   7 +-
 .../analysis/dfg_graphviz_draw_pass.cc        |   2 +-
 .../inference/analysis/fluid_to_ir_pass.cc    |   2 +-
 .../inference/analysis/model_store_pass.cc    |   8 +-
 .../fluid/inference/analysis/pass_manager.cc  |   4 +-
 .../inference/analysis/subgraph_splitter.cc   |   2 +-
 .../analysis/tensorrt_subgraph_pass.cc        |   6 +-
 .../fluid/inference/api/analysis_predictor.cc |  16 +-
 .../fluid/inference/api/api_anakin_engine.cc  |  36 +-
 paddle/fluid/inference/api/api_impl.cc        |  20 +-
 .../api/api_tensorrt_subgraph_engine.cc       |  14 +-
 .../api/demo_ci/trt_mobilenet_demo.cc         |   8 +-
 paddle/fluid/inference/api/demo_ci/utils.h    |  10 +-
 .../fluid/inference/api/demo_ci/vis_demo.cc   |  10 +-
 .../api/details/reset_tensor_array.cc         |   4 +-
 paddle/fluid/inference/io.cc                  |   4 +-
 .../inference/tensorrt/convert/concat_op.cc   |   2 +-
 .../inference/tensorrt/convert/dropout_op.cc  |   2 +-
 .../fluid/inference/tensorrt/convert/fc_op.cc |   2 +-
 .../inference/tensorrt/convert/mul_op.cc      |   2 +-
 .../inference/tensorrt/convert/pad_op.cc      |   2 +-
 .../inference/tensorrt/convert/pool2d_op.cc   |   2 +-
 .../inference/tensorrt/convert/softmax_op.cc  |   2 +-
 .../inference/tests/api/anakin_rnn1_tester.cc |   4 +-
 .../tests/api/analyzer_vis_tester.cc          |   6 +-
 paddle/fluid/memory/detail/buddy_allocator.cc |  56 +-
 paddle/fluid/memory/detail/meta_cache.cc      |   2 +-
 paddle/fluid/memory/malloc.cc                 |  18 +-
 paddle/fluid/operators/activation_op.h        |   2 +-
 paddle/fluid/operators/adam_op.h              |   2 +-
 paddle/fluid/operators/array_operator.h       |   2 +-
 .../fluid/operators/array_to_lod_tensor_op.cc |   4 +-
 paddle/fluid/operators/batch_norm_op.cu.cc    |   2 +-
 paddle/fluid/operators/beam_search_op.cc      |  12 +-
 .../fluid/operators/checkpoint_notify_op.cc   |   4 +-
 paddle/fluid/operators/concat_op.cc           |   2 +-
 paddle/fluid/operators/conv_cudnn_op.cu.cc    |   4 +-
 .../operators/distributed/brpc_server.cc      |   4 +-
 .../operators/distributed/grpc_client.cc      |  14 +-
 .../operators/distributed/grpc_server.cc      |  45 +-
 .../operators/distributed/request_handler.h   |   4 +-
 .../distributed/request_handler_impl.cc       |  25 +-
 .../fluid/operators/distributed/rpc_server.cc |  20 +-
 .../distributed/variable_response.cc          |   8 +-
 paddle/fluid/operators/feed_op.cc             |   4 +-
 paddle/fluid/operators/fetch_barrier_op.cc    |   2 +-
 paddle/fluid/operators/fetch_op.cc            |   2 +-
 paddle/fluid/operators/gen_nccl_id_op.cc      |  10 +-
 paddle/fluid/operators/listen_and_serv_op.cc  |  34 +-
 paddle/fluid/operators/lod_rank_table_op.cc   |   4 +-
 paddle/fluid/operators/lookup_table_op.cc     |   8 +-
 paddle/fluid/operators/math/cpu_vec_test.cc   |   4 +-
 .../fluid/operators/math/jit_kernel_test.cc   |  89 ++--
 .../operators/math/selected_rows_functor.cc   |   4 +-
 .../operators/math/selected_rows_functor.cu   |   4 +-
 paddle/fluid/operators/momentum_op.h          |   2 +-
 paddle/fluid/operators/mul_op.cc              |   6 +-
 paddle/fluid/operators/nccl_op.cu.cc          |  31 +-
 paddle/fluid/operators/nccl_op_test.cu.cc     |  14 +-
 paddle/fluid/operators/parallel_do_op.cc      |  10 +-
 paddle/fluid/operators/prefetch_op.cc         |   6 +-
 paddle/fluid/operators/random_crop_op.h       |   4 +-
 .../fluid/operators/reader/blocking_queue.h   |   4 +-
 .../reader/create_shuffle_reader_op.cc        |   6 +-
 paddle/fluid/operators/recurrent_op.cc        |  26 +-
 paddle/fluid/operators/recv_op.cc             |   2 +-
 .../fluid/operators/rnn_memory_helper_op.cc   |   2 +-
 paddle/fluid/operators/save_op.cc             |   2 +-
 paddle/fluid/operators/send_barrier_op.cc     |   4 +-
 paddle/fluid/operators/send_op.cc             |   4 +-
 paddle/fluid/operators/send_recv_op_test.cc   |   4 +-
 paddle/fluid/operators/sequence_mask_op.h     |   2 +-
 paddle/fluid/operators/sgd_op.h               |   8 +-
 paddle/fluid/operators/split_byref_op.h       |   2 +-
 paddle/fluid/operators/split_ids_op.h         |   2 +-
 paddle/fluid/operators/sum_mkldnn_op.cc       |   2 +-
 paddle/fluid/operators/sum_op.cc              |   6 +-
 .../operators/tensor_array_read_write_op.cc   |  14 +-
 paddle/fluid/operators/tensorrt_engine_op.h   |  16 +-
 paddle/fluid/operators/while_op.cc            |  18 +-
 paddle/fluid/platform/device_tracer.cc        |   8 +-
 .../fluid/platform/dynload/dynamic_loader.cc  |   4 +-
 paddle/fluid/platform/gpu_info.cc             |   4 +-
 paddle/fluid/platform/init.cc                 |   2 +-
 paddle/fluid/platform/nccl_helper.h           |   2 +-
 paddle/fluid/pybind/protobuf.cc               |   6 +-
 paddle/fluid/train/demo/demo_trainer.cc       |   2 +-
 paddle/testing/TestUtil.cpp                   |   2 +-
 132 files changed, 1091 insertions(+), 583 deletions(-)
 mode change 120000 => 100644 paddle/fluid/framework/tensor_util.cu

diff --git a/paddle/fluid/framework/data_device_transform.cc b/paddle/fluid/framework/data_device_transform.cc
index fee6ba400..57ff061fe 100644
--- a/paddle/fluid/framework/data_device_transform.cc
+++ b/paddle/fluid/framework/data_device_transform.cc
@@ -18,8 +18,8 @@ namespace framework {
 
 void TransDataDevice(const Tensor &in, const platform::Place &dst_place,
                      Tensor *out) {
-  VLOG(3) << "DeviceTransform in, src_place " << in.place()
-          << " dst_place: " << dst_place;
+  VLOG(30) << "DeviceTransform in, src_place " << in.place()
+           << " dst_place: " << dst_place;
 
   PADDLE_ENFORCE_NE(
       in.place().which(), dst_place.which(),
diff --git a/paddle/fluid/framework/data_device_transform_test.cu b/paddle/fluid/framework/data_device_transform_test.cu
index f2c55e533..21e0cb3f9 100644
--- a/paddle/fluid/framework/data_device_transform_test.cu
+++ b/paddle/fluid/framework/data_device_transform_test.cu
@@ -49,10 +49,10 @@ class TestOpWithKernel : public OperatorWithKernel {
   OpKernelType GetExpectedKernelType(
       const ExecutionContext& ctx) const override {
     if (Attr<bool>("use_gpu")) {
-      VLOG(3) << "force use gpu kernel";
+      VLOG(30) << "force use gpu kernel";
       return OpKernelType(proto::VarType::FP32, platform::CUDAPlace(0));
     } else {
-      VLOG(3) << "use default kernel";
+      VLOG(30) << "use default kernel";
       return OpKernelType(proto::VarType::FP32,
                           ctx.Input<Tensor>("input")->place());
     }
@@ -148,7 +148,7 @@ TEST(Operator, CPUtoGPU) {
   // get output
   auto* output2 = scope.Var("OUT2");
   gpu_op->Run(scope, cuda_place);
-  VLOG(3) << "after gpu_op run";
+  VLOG(30) << "after gpu_op run";
 
   // auto* output2_ptr = output2->Get<LoDTensor>().data<float>();
   paddle::platform::DeviceContextPool& pool =
diff --git a/paddle/fluid/framework/details/broadcast_op_handle.cc b/paddle/fluid/framework/details/broadcast_op_handle.cc
index 7f0d06c89..8e5e54276 100644
--- a/paddle/fluid/framework/details/broadcast_op_handle.cc
+++ b/paddle/fluid/framework/details/broadcast_op_handle.cc
@@ -60,7 +60,7 @@ void BroadcastOpHandle::BroadcastOneVar(
   PADDLE_ENFORCE_NOT_NULL(in_var);
   Tensor &in_tensor = VariableVisitor::GetMutableTensor(in_var);
   if (UNLIKELY(!in_tensor.IsInitialized())) {
-    VLOG(3) << "in var " << in_var_handle.name_ << "not inited, return!";
+    VLOG(30) << "in var " << in_var_handle.name_ << "not inited, return!";
     return;
   }
 
diff --git a/paddle/fluid/framework/details/modify_op_lock_and_record_event_pass.cc b/paddle/fluid/framework/details/modify_op_lock_and_record_event_pass.cc
index 169ce3ae7..a3ecd589a 100644
--- a/paddle/fluid/framework/details/modify_op_lock_and_record_event_pass.cc
+++ b/paddle/fluid/framework/details/modify_op_lock_and_record_event_pass.cc
@@ -44,8 +44,8 @@ std::unique_ptr<ir::Graph> ModifyOpLockAndRecordEventPass::ApplyImpl(
         IsLockAndRecordEventFreeComputationOpHandle(compute_op, graph_view);
     compute_op->SetLockAndRecordEventFree(is_lock_and_record_event_free);
     if (is_lock_and_record_event_free) {
-      VLOG(10) << "Set is_lock_and_record_event_free be true in op "
-               << compute_op->DebugString();
+      VLOG(100) << "Set is_lock_and_record_event_free be true in op "
+                << compute_op->DebugString();
     }
   }
   return ir_graph;
diff --git a/paddle/fluid/framework/details/multi_devices_graph_pass.cc b/paddle/fluid/framework/details/multi_devices_graph_pass.cc
index f3819887a..2ead651c6 100644
--- a/paddle/fluid/framework/details/multi_devices_graph_pass.cc
+++ b/paddle/fluid/framework/details/multi_devices_graph_pass.cc
@@ -392,7 +392,7 @@ std::unique_ptr<ir::Graph> MultiDevSSAGraphBuilder::ApplyImpl(
               for (size_t i = 0; i < backward_vars.size(); i += 2) {
                 auto &p_name = backward_vars[i];
                 auto &g_name = backward_vars[i + 1];
-                VLOG(10) << "Bcast " << g_name << " for parameter " << p_name;
+                VLOG(100) << "Bcast " << g_name << " for parameter " << p_name;
 
                 switch (strategy_.reduce_) {
                   case BuildStrategy::ReduceStrategy::kReduce:
@@ -794,8 +794,8 @@ int MultiDevSSAGraphBuilder::CreateRPCOp(ir::Graph *result,
           node->Op()->GetAttr(OpProtoAndCheckerMaker::OpRoleVarAttrName()));
       PADDLE_ENFORCE_EQ(send_param_grad.size(), 2U);
       op_dev_id = GetAppropriateDeviceID({send_param_grad[1]});
-      VLOG(10) << "send grad " << input_var_names[0] << " origin "
-               << send_param_grad[1] << " place: " << op_dev_id;
+      VLOG(100) << "send grad " << input_var_names[0] << " origin "
+                << send_param_grad[1] << " place: " << op_dev_id;
       for (auto &varname : input_var_names) {
         result->Get<ShardedVarDevice>(kShardedVarDevice)
             .emplace(varname, op_dev_id);
@@ -812,9 +812,9 @@ int MultiDevSSAGraphBuilder::CreateRPCOp(ir::Graph *result,
         node->Op()->GetAttr(OpProtoAndCheckerMaker::OpRoleVarAttrName()));
     if (recv_param_grad.size() == 2U) {
       op_dev_id = GetVarDeviceID(*result, recv_param_grad[1]);
-      VLOG(10) << "recv param " << recv_param_grad[0]
-               << " get grad place: " << recv_param_grad[1]
-               << " place: " << op_dev_id;
+      VLOG(100) << "recv param " << recv_param_grad[0]
+                << " get grad place: " << recv_param_grad[1]
+                << " place: " << op_dev_id;
     } else {
       op_dev_id = GetAppropriateDeviceID(output_var_names);
     }
diff --git a/paddle/fluid/framework/details/reference_count_pass.cc b/paddle/fluid/framework/details/reference_count_pass.cc
index 0b994ced7..955f075ed 100644
--- a/paddle/fluid/framework/details/reference_count_pass.cc
+++ b/paddle/fluid/framework/details/reference_count_pass.cc
@@ -141,8 +141,8 @@ std::unique_ptr<ir::Graph> ReferenceCountPass::ApplyImpl(
         if (next_compute_op != nullptr) {
           if (compute_ref_cnt_map.count(next_compute_op)) {
             compute_ref_cnt_map[next_compute_op]->AddVar(var_name);
-            VLOG(5) << "Add reference count of " << var_name << " to Operator "
-                    << next_compute_op->Name();
+            VLOG(50) << "Add reference count of " << var_name << " to Operator "
+                     << next_compute_op->Name();
           } else {
             // Create new reference_count_op_handle
             ir::Node *ref_cnt_node = graph->CreateEmptyNode(
diff --git a/paddle/fluid/framework/details/scale_loss_grad_op_handle.cc b/paddle/fluid/framework/details/scale_loss_grad_op_handle.cc
index ef1626599..6ab6cb233 100644
--- a/paddle/fluid/framework/details/scale_loss_grad_op_handle.cc
+++ b/paddle/fluid/framework/details/scale_loss_grad_op_handle.cc
@@ -51,7 +51,7 @@ void ScaleLossGradOpHandle::RunImpl() {
                         ->stream();
       memory::Copy(boost::get<platform::CUDAPlace>(place_), tmp,
                    platform::CPUPlace(), &coeff_, sizeof(float), stream);
-      VLOG(10) << place_ << "RUN Scale loss grad op";
+      VLOG(100) << place_ << "RUN Scale loss grad op";
     });
 #endif
   }
diff --git a/paddle/fluid/framework/details/sequential_execution_pass.cc b/paddle/fluid/framework/details/sequential_execution_pass.cc
index cc2c8bfef..f78a47bb7 100644
--- a/paddle/fluid/framework/details/sequential_execution_pass.cc
+++ b/paddle/fluid/framework/details/sequential_execution_pass.cc
@@ -94,8 +94,8 @@ std::unique_ptr<ir::Graph> SequentialExecutionPass::ApplyImpl(
     op_node_list[i - 1]->outputs.push_back(dep_var);
     dep_var->outputs.push_back(op_node_list[i]);
     dep_var->inputs.push_back(op_node_list[i - 1]);
-    VLOG(10) << "Add dependencies between " << op_node_list[i - 1]->Name()
-             << " and " << op_node_list[i]->Name();
+    VLOG(100) << "Add dependencies between " << op_node_list[i - 1]->Name()
+              << " and " << op_node_list[i]->Name();
   }
   return graph;
 }
diff --git a/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc b/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc
index 2d2bdb604..de22191c5 100644
--- a/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc
+++ b/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc
@@ -208,16 +208,16 @@ void ThreadedSSAGraphExecutor::RunOp(
     details::OpHandleBase *op) {
   auto op_run = [ready_var_q, op, this] {
     try {
-      if (VLOG_IS_ON(10)) {
-        VLOG(10) << op << " " << op->Name() << " : " << op->DebugString();
+      if (VLOG_IS_ON(100)) {
+        VLOG(100) << op << " " << op->Name() << " : " << op->DebugString();
       }
       if (LIKELY(!strategy_.dry_run_)) {
         op->Run(strategy_.use_cuda_);
       }
-      VLOG(10) << op << " " << op->Name() << " Done ";
+      VLOG(100) << op << " " << op->Name() << " Done ";
       running_ops_--;
       ready_var_q->Extend(op->Outputs());
-      VLOG(10) << op << " " << op->Name() << "Signal posted";
+      VLOG(100) << op << " " << op->Name() << "Signal posted";
     } catch (...) {
       exception_holder_.Catch(std::current_exception());
     }
diff --git a/paddle/fluid/framework/executor.cc b/paddle/fluid/framework/executor.cc
index 8ed0ba1df..fc6b32528 100644
--- a/paddle/fluid/framework/executor.cc
+++ b/paddle/fluid/framework/executor.cc
@@ -43,7 +43,7 @@ ExecutorPrepareContext::ExecutorPrepareContext(
 }
 
 ExecutorPrepareContext::~ExecutorPrepareContext() {
-  VLOG(5) << "destroy ExecutorPrepareContext";
+  VLOG(50) << "destroy ExecutorPrepareContext";
 }
 
 template <typename RefCntMap>
@@ -60,7 +60,7 @@ static void DeleteUnusedTensors(const Scope& scope, const OperatorBase* op,
         if ((it->second)-- == 1) {
           auto* var = scope.FindVar(name);
           if (var != nullptr) {
-            VLOG(10) << "Erase tensor \'" << name << "\'";
+            VLOG(100) << "Erase tensor \'" << name << "\'";
             if (var->IsType<LoDTensor>()) {
               erase_tensors.insert(var->GetMutable<LoDTensor>());
             } else if (var->IsType<SelectedRows>()) {
@@ -141,21 +141,21 @@ void Executor::CreateVariables(const ProgramDesc& pdesc, Scope* scope,
       if (var->Persistable()) {
         auto* ptr = const_cast<Scope*>(ancestor_scope)->Var(var->Name());
         InitializeVariable(ptr, var->GetType());
-        VLOG(3) << "Create Variable " << var->Name()
-                << " global, which pointer is " << ptr;
+        VLOG(30) << "Create Variable " << var->Name()
+                 << " global, which pointer is " << ptr;
       } else {
         auto* ptr = scope->Var(var->Name());
         InitializeVariable(ptr, var->GetType());
-        VLOG(3) << "Create Variable " << var->Name()
-                << " locally, which pointer is " << ptr;
+        VLOG(30) << "Create Variable " << var->Name()
+                 << " locally, which pointer is " << ptr;
       }
     }
   } else {
     for (auto& var : global_block.AllVars()) {
       auto* ptr = scope->Var(var->Name());
       InitializeVariable(ptr, var->GetType());
-      VLOG(3) << "Create variable " << var->Name() << ", which pointer is "
-              << ptr;
+      VLOG(30) << "Create variable " << var->Name() << ", which pointer is "
+               << ptr;
     }
   }
 }
@@ -286,7 +286,7 @@ void Executor::Run(const ProgramDesc& program, Scope* scope,
     int i = 0;
     for (auto& feed_target : (*feed_targets)) {
       std::string var_name = feed_target.first;
-      VLOG(3) << "feed target's name: " << var_name;
+      VLOG(30) << "feed target's name: " << var_name;
 
       // prepend feed op
       auto* op = global_block->PrependOp();
@@ -309,7 +309,7 @@ void Executor::Run(const ProgramDesc& program, Scope* scope,
     int i = 0;
     for (auto& fetch_target : (*fetch_targets)) {
       std::string var_name = fetch_target.first;
-      VLOG(3) << "fetch target's name: " << var_name;
+      VLOG(30) << "fetch target's name: " << var_name;
 
       // append fetch op
       auto* op = global_block->AppendOp();
@@ -398,8 +398,8 @@ void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope,
     }
 
     if (FLAGS_benchmark) {
-      VLOG(2) << "Memory used after operator " + op->Type() + " running: "
-              << memory::memory_usage(place_);
+      VLOG(20) << "Memory used after operator " + op->Type() + " running: "
+               << memory::memory_usage(place_);
     }
   }
 
@@ -424,10 +424,10 @@ void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope,
   }
 
   if (FLAGS_benchmark) {
-    VLOG(2) << "-------------------------------------------------------";
-    VLOG(2) << "Memory used after deleting local scope: "
-            << memory::memory_usage(place_);
-    VLOG(2) << "-------------------------------------------------------";
+    VLOG(20) << "-------------------------------------------------------";
+    VLOG(20) << "Memory used after deleting local scope: "
+             << memory::memory_usage(place_);
+    VLOG(20) << "-------------------------------------------------------";
   }
 }
 
@@ -471,7 +471,7 @@ void Executor::RunPreparedContext(
 
 void Executor::EnableMKLDNN(const ProgramDesc& program) {
 #ifdef PADDLE_WITH_MKLDNN
-  VLOG(3) << "use_mkldnn=True";
+  VLOG(30) << "use_mkldnn=True";
   for (size_t bid = 0; bid < program.Size(); ++bid) {
     auto* block = const_cast<ProgramDesc&>(program).MutableBlock(bid);
     for (auto* op : block->AllOps()) {
diff --git a/paddle/fluid/framework/feed_fetch_method.cc b/paddle/fluid/framework/feed_fetch_method.cc
index 3e9353f5c..1f3c19c0d 100644
--- a/paddle/fluid/framework/feed_fetch_method.cc
+++ b/paddle/fluid/framework/feed_fetch_method.cc
@@ -25,7 +25,7 @@ void SetFeedVariable(Scope* scope, const LoDTensor& input,
                      const std::string& var_name, size_t index) {
   // If var_name Variable is not found in GlobalScope, a new variable will
   // be created.
-  VLOG(3) << "SetFeedVariable name=" << var_name << " index=" << index;
+  VLOG(30) << "SetFeedVariable name=" << var_name << " index=" << index;
   Variable* g_feed_value = scope->Var(var_name);
   auto& feed_inputs = *(g_feed_value->GetMutable<FeedFetchList>());
   if (index >= feed_inputs.size()) {
@@ -47,8 +47,8 @@ LoDTensor& GetFetchVariable(const Scope& scope, const std::string& var_name,
                  typeid(FeedFetchList).name());
   auto& fetch_outputs = *g_fetch_value->GetMutable<FeedFetchList>();
   auto& tensor = fetch_outputs[index];
-  VLOG(3) << "Fetch " << var_name << " with index " << index
-          << " shape= " << tensor.dims();
+  VLOG(30) << "Fetch " << var_name << " with index " << index
+           << " shape= " << tensor.dims();
   PADDLE_ENFORCE_LT(index, fetch_outputs.size());
   return tensor;
 }
diff --git a/paddle/fluid/framework/ir/attention_lstm_fuse_pass.cc b/paddle/fluid/framework/ir/attention_lstm_fuse_pass.cc
index 6090f1fe7..6b284b1c1 100644
--- a/paddle/fluid/framework/ir/attention_lstm_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/attention_lstm_fuse_pass.cc
@@ -147,19 +147,19 @@ void PrepareParameters(Graph* graph, const Param& param) {
   scope->Var(param.LSTMX)->GetMutable<LoDTensor>();
   scope->Var(param.LSTMOUT)->GetMutable<LoDTensor>();
 
-#define GATE_W(name__)                                               \
-  auto* W_##name__##_w0 = scope->FindVar(#name__ ".w_0");            \
-  auto* W_##name__##_w1 = scope->FindVar(#name__ ".w_1");            \
-  auto* W_##name__##_b0 = scope->FindVar(#name__ ".b_0");            \
-  CHECK_P3(W_##name__##_w0, W_##name__##_w1, W_##name__##_b0);       \
-  VLOG(4) << #name__ "_w0"                                           \
-          << " shape: " << W_##name__##_w0->Get<LoDTensor>().dims(); \
-  VLOG(4) << #name__ "_w1"                                           \
-          << " shape: " << W_##name__##_w1->Get<LoDTensor>().dims(); \
-  VLOG(4) << #name__ "_b0"                                           \
-          << " shape: " << W_##name__##_b0->Get<LoDTensor>().dims(); \
-  auto& W_##name__##_w0_t = W_##name__##_w0->Get<LoDTensor>();       \
-  auto& W_##name__##_w1_t = W_##name__##_w1->Get<LoDTensor>();       \
+#define GATE_W(name__)                                                \
+  auto* W_##name__##_w0 = scope->FindVar(#name__ ".w_0");             \
+  auto* W_##name__##_w1 = scope->FindVar(#name__ ".w_1");             \
+  auto* W_##name__##_b0 = scope->FindVar(#name__ ".b_0");             \
+  CHECK_P3(W_##name__##_w0, W_##name__##_w1, W_##name__##_b0);        \
+  VLOG(40) << #name__ "_w0"                                           \
+           << " shape: " << W_##name__##_w0->Get<LoDTensor>().dims(); \
+  VLOG(40) << #name__ "_w1"                                           \
+           << " shape: " << W_##name__##_w1->Get<LoDTensor>().dims(); \
+  VLOG(40) << #name__ "_b0"                                           \
+           << " shape: " << W_##name__##_b0->Get<LoDTensor>().dims(); \
+  auto& W_##name__##_w0_t = W_##name__##_w0->Get<LoDTensor>();        \
+  auto& W_##name__##_w1_t = W_##name__##_w1->Get<LoDTensor>();        \
   auto& W_##name__##_b0_t = W_##name__##_b0->Get<LoDTensor>();
 
   GATE_W(forget);
@@ -208,7 +208,7 @@ void PrepareLSTMWeight(const LoDTensor& W_forget_w0,
   int D = W_forget_w0.dims()[0];
   int M = W_forget_w1.dims()[0];
   out->Resize(make_ddim({D + M, 4 * D}));
-  VLOG(3) << "LSTMWeight resized to " << out->dims();
+  VLOG(30) << "LSTMWeight resized to " << out->dims();
 
   float* out_data = out->mutable_data<float>(platform::CPUPlace());
   std::array<const float*, 4> tensors(
diff --git a/paddle/fluid/framework/ir/conv_bias_mkldnn_fuse_pass.cc b/paddle/fluid/framework/ir/conv_bias_mkldnn_fuse_pass.cc
index 449cc78be..c9c4d5afe 100644
--- a/paddle/fluid/framework/ir/conv_bias_mkldnn_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/conv_bias_mkldnn_fuse_pass.cc
@@ -57,7 +57,7 @@ std::unique_ptr<ir::Graph> ConvBiasFusePass::ApplyImpl(
   int found_conv_bias_count = 0;
   auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
                      Graph* g) {
-    VLOG(4) << "handle ConvBias fuse";
+    VLOG(40) << "handle ConvBias fuse";
     GET_IR_NODE_FROM_SUBGRAPH(conv_weight, conv_weight,
                               conv_bias_pattern);                      // Filter
     GET_IR_NODE_FROM_SUBGRAPH(conv_out, conv_out, conv_bias_pattern);  // tmp
@@ -74,7 +74,7 @@ std::unique_ptr<ir::Graph> ConvBiasFusePass::ApplyImpl(
     // check if fuse can be done and if MKL-DNN should be used
     FuseOptions fuse_option = FindFuseOption(*conv, *eltwise);
     if (fuse_option == DO_NOT_FUSE || fuse_option == FUSE_NATIVE) {
-      VLOG(3) << "do not perform conv+bias fuse";
+      VLOG(30) << "do not perform conv+bias fuse";
       return;
     }
 
diff --git a/paddle/fluid/framework/ir/conv_bn_fuse_pass.cc b/paddle/fluid/framework/ir/conv_bn_fuse_pass.cc
index 846a14e36..34b4c26ae 100644
--- a/paddle/fluid/framework/ir/conv_bn_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/conv_bn_fuse_pass.cc
@@ -121,7 +121,7 @@ std::unique_ptr<ir::Graph> ConvBNFusePass::ApplyImpl(
   int found_conv_bn_count = 0;
   auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
                      Graph* g) {
-    VLOG(4) << "handle ConvBN fuse";
+    VLOG(40) << "handle ConvBN fuse";
 
     // conv, batch_norm,
     // conv_weight, conv_out,
@@ -133,7 +133,7 @@ std::unique_ptr<ir::Graph> ConvBNFusePass::ApplyImpl(
     // check if fuse can be done and if MKL-DNN should be used
     FuseOptions fuse_option = FindFuseOption(*conv, *batch_norm);
     if (fuse_option == DO_NOT_FUSE) {
-      VLOG(3) << "do not perform conv+bn fuse";
+      VLOG(30) << "do not perform conv+bn fuse";
       return;
     }
 
@@ -241,7 +241,7 @@ std::unique_ptr<ir::Graph> ConvEltwiseAddBNFusePass::ApplyImpl(
   int found_conv_bn_count = 0;
   auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
                      Graph* g) {
-    VLOG(4) << "handle ConvBN fuse";
+    VLOG(40) << "handle ConvBN fuse";
 
     // conv, batch_norm,
     // conv_weight, conv_out,
diff --git a/paddle/fluid/framework/ir/conv_relu_mkldnn_fuse_pass.cc b/paddle/fluid/framework/ir/conv_relu_mkldnn_fuse_pass.cc
index e359a3832..048868e1f 100644
--- a/paddle/fluid/framework/ir/conv_relu_mkldnn_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/conv_relu_mkldnn_fuse_pass.cc
@@ -38,7 +38,7 @@ std::unique_ptr<ir::Graph> ConvReLUFusePass::ApplyImpl(
   int found_conv_relu_count = 0;
   auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
                      Graph* g) {
-    VLOG(4) << "handle ConvReLU fuse";
+    VLOG(40) << "handle ConvReLU fuse";
     GET_IR_NODE_FROM_SUBGRAPH(conv_weight, conv_weight,
                               conv_relu_pattern);                      // Filter
     GET_IR_NODE_FROM_SUBGRAPH(conv_out, conv_out, conv_relu_pattern);  // tmp
@@ -48,7 +48,7 @@ std::unique_ptr<ir::Graph> ConvReLUFusePass::ApplyImpl(
 
     FuseOptions fuse_option = FindFuseOption(*conv, *relu);
     if (fuse_option == DO_NOT_FUSE) {
-      VLOG(3) << "do not perform conv+relu fuse";
+      VLOG(30) << "do not perform conv+relu fuse";
       return;
     }
 
diff --git a/paddle/fluid/framework/ir/depthwise_conv_mkldnn_pass.cc b/paddle/fluid/framework/ir/depthwise_conv_mkldnn_pass.cc
index 19056e18a..5f3334578 100644
--- a/paddle/fluid/framework/ir/depthwise_conv_mkldnn_pass.cc
+++ b/paddle/fluid/framework/ir/depthwise_conv_mkldnn_pass.cc
@@ -39,7 +39,7 @@ std::unique_ptr<ir::Graph> DepthwiseConvMKLDNNPass::ApplyImpl(
   int found_depthwise_conv_mkldnn_count = 0;
   auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
                      Graph* g) {
-    VLOG(3) << "handle DepthwiseConvMKLDNN fuse";
+    VLOG(30) << "handle DepthwiseConvMKLDNN fuse";
     GET_NODE(depthwise_conv, (*pattern));
     depthwise_conv->Op()->SetType("conv2d");
     found_depthwise_conv_mkldnn_count++;
diff --git a/paddle/fluid/framework/ir/fc_fuse_pass.cc b/paddle/fluid/framework/ir/fc_fuse_pass.cc
index ca704c7f5..3348abb19 100644
--- a/paddle/fluid/framework/ir/fc_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/fc_fuse_pass.cc
@@ -39,7 +39,7 @@ std::unique_ptr<ir::Graph> FCFusePass::ApplyImpl(
   int found_fc_count = 0;
   auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
                      Graph* g) {
-    VLOG(4) << "handle FC fuse";
+    VLOG(40) << "handle FC fuse";
     GET_IR_NODE_FROM_SUBGRAPH(w, w, fc_pattern);
     GET_IR_NODE_FROM_SUBGRAPH(fc_bias, bias, fc_pattern);
     GET_IR_NODE_FROM_SUBGRAPH(fc_out, Out, fc_pattern);
diff --git a/paddle/fluid/framework/ir/fuse_elewise_add_act_pass.cc b/paddle/fluid/framework/ir/fuse_elewise_add_act_pass.cc
index 648acc4a7..8ed68905b 100644
--- a/paddle/fluid/framework/ir/fuse_elewise_add_act_pass.cc
+++ b/paddle/fluid/framework/ir/fuse_elewise_add_act_pass.cc
@@ -61,7 +61,7 @@ std::unique_ptr<ir::Graph> FuseElewiseAddActPass::FuseElewiseAddAct(
 
   auto handler = [&](const GraphPatternDetector::subgraph_t &subgraph,
                      Graph *g) {
-    VLOG(4) << "handle FuseElewiseAddAct fuse";
+    VLOG(40) << "handle FuseElewiseAddAct fuse";
     GET_IR_NODE_FROM_SUBGRAPH(ele_y, ele_y, elewise_add_act_pattern);
     GET_IR_NODE_FROM_SUBGRAPH(ele_out, elewise_add_out,
                               elewise_add_act_pattern);
@@ -77,10 +77,10 @@ std::unique_ptr<ir::Graph> FuseElewiseAddActPass::FuseElewiseAddAct(
     Node *elewise_add_act_node = CreateFuseElewiseAddActNode(
         g, act, ele_add, ele_x_n, ele_y_n, ele_out_n, act_out_n);
 
-    VLOG(4) << "\n\t " << ele_x_n << " and " << ele_y_n << " -> "
-            << ele_add->Name() << " -> " << ele_out_n << "\n"
-            << "\t " << ele_out_n << " -> " << act->Name() << " -> "
-            << act_out_n;
+    VLOG(40) << "\n\t " << ele_x_n << " and " << ele_y_n << " -> "
+             << ele_add->Name() << " -> " << ele_out_n << "\n"
+             << "\t " << ele_out_n << " -> " << act->Name() << " -> "
+             << act_out_n;
 
     ReLinkNodes(g, ele_out, ele_add, act, elewise_add_act_node);
     found_elewise_add_act_count++;
@@ -113,7 +113,7 @@ std::unique_ptr<ir::Graph> FuseElewiseAddActPass::FuseActElewiseAdd(
 
   auto handler = [&](const GraphPatternDetector::subgraph_t &subgraph,
                      Graph *g) {
-    VLOG(4) << "handle FuseElewiseAddAct fuse";
+    VLOG(40) << "handle FuseElewiseAddAct fuse";
     GET_IR_NODE_FROM_SUBGRAPH(act_out, act_out, act_elewise_add_pattern);
     GET_IR_NODE_FROM_SUBGRAPH(ele_x, ele_x, act_elewise_add_pattern);
     GET_IR_NODE_FROM_SUBGRAPH(ele_out, elewise_add_out,
@@ -129,9 +129,9 @@ std::unique_ptr<ir::Graph> FuseElewiseAddActPass::FuseActElewiseAdd(
     Node *elewise_add_act_node = CreateFuseElewiseAddActNode(
         g, ele_add, act, elewise_add_x_n, act_i_n, act_o_n, elewise_add_out_n);
 
-    VLOG(4) << "\n\t " << act_i_n << " -> " << act->Name() << " -> " << act_o_n
-            << "\n\t " << act_o_n << " and " << elewise_add_x_n << " -> "
-            << ele_add->Name() << " -> " << elewise_add_out_n;
+    VLOG(40) << "\n\t " << act_i_n << " -> " << act->Name() << " -> " << act_o_n
+             << "\n\t " << act_o_n << " and " << elewise_add_x_n << " -> "
+             << ele_add->Name() << " -> " << elewise_add_out_n;
 
     ReLinkNodes(g, act_out, act, ele_add, elewise_add_act_node);
     found_elewise_add_act_count++;
@@ -165,7 +165,7 @@ std::unique_ptr<ir::Graph> FuseElewiseAddActPass::FuseElewiseAddActInplaceGrad(
 
   auto handler = [&](const GraphPatternDetector::subgraph_t &subgraph,
                      Graph *g) {
-    VLOG(4) << "handle FuseElewiseAddActGrad1 fuse";
+    VLOG(40) << "handle FuseElewiseAddActGrad1 fuse";
     GET_IR_NODE_FROM_SUBGRAPH(act_out, act_out, elewise_add_act_grad_pattern);
     GET_IR_NODE_FROM_SUBGRAPH(act_grad, act_grad, elewise_add_act_grad_pattern);
     GET_IR_NODE_FROM_SUBGRAPH(d_itermediate_out, d_itermediate_out,
@@ -208,10 +208,10 @@ std::unique_ptr<ir::Graph> FuseElewiseAddActPass::FuseElewiseAddActInplaceGrad(
 
     auto fused_node = g->CreateOpNode(&desc);
 
-    VLOG(4) << "\n\t " << d_act_out_n << " and " << act_out_n << " -> "
-            << act_grad->Name() << " -> " << d_itermediate_out_n << "\n\t "
-            << d_itermediate_out_n << " and " << act_out_n << " -> "
-            << ele_add_grad->Name() << " -> " << d_itermediate_out_n;
+    VLOG(40) << "\n\t " << d_act_out_n << " and " << act_out_n << " -> "
+             << act_grad->Name() << " -> " << d_itermediate_out_n << "\n\t "
+             << d_itermediate_out_n << " and " << act_out_n << " -> "
+             << ele_add_grad->Name() << " -> " << d_itermediate_out_n;
 
     ReLinkNodes(g, d_itermediate_out, act_grad, ele_add_grad, fused_node);
     found_elewise_add_act_count++;
diff --git a/paddle/fluid/framework/ir/graph.cc b/paddle/fluid/framework/ir/graph.cc
index 132159b8b..a2a8baa5e 100644
--- a/paddle/fluid/framework/ir/graph.cc
+++ b/paddle/fluid/framework/ir/graph.cc
@@ -92,7 +92,7 @@ Graph::Graph(const ProgramDesc &program) : program_(program) {
 
 std::map<std::string, std::vector<ir::Node *>> Graph::InitFromProgram(
     const ProgramDesc &program) {
-  VLOG(3) << "block in program:" << program_.Size();
+  VLOG(30) << "block in program:" << program_.Size();
   std::unordered_map<std::string, VarDesc *> all_vars;
   // var nodes for each var name, will have multiple versions in SSA
   std::map<std::string, std::vector<ir::Node *>> var_nodes;
@@ -160,7 +160,7 @@ void Graph::ResolveHazard(
     auto it_old = versions.rbegin();
     ++it_old;
     for (; it_old != versions.rend(); it_new = it_old, ++it_old) {
-      VLOG(3) << "deal with var: " << (*it_new)->Name();
+      VLOG(30) << "deal with var: " << (*it_new)->Name();
       ir::Node *write_op =
           (*it_new)->inputs.empty() ? nullptr : (*it_new)->inputs[0];
       const auto &read_ops = (*it_old)->outputs;
diff --git a/paddle/fluid/framework/ir/graph.h b/paddle/fluid/framework/ir/graph.h
index 9d7aa5d32..46501f8d5 100644
--- a/paddle/fluid/framework/ir/graph.h
+++ b/paddle/fluid/framework/ir/graph.h
@@ -89,7 +89,7 @@ class Graph {
                    attr_name);
     attrs_[attr_name] = attr;
     attr_dels_[attr_name] = [attr, attr_name]() {
-      VLOG(3) << "deleting " << attr_name;
+      VLOG(30) << "deleting " << attr_name;
       delete attr;
     };
   }
diff --git a/paddle/fluid/framework/ir/graph_helper.cc b/paddle/fluid/framework/ir/graph_helper.cc
index 01e878089..98112c1ed 100644
--- a/paddle/fluid/framework/ir/graph_helper.cc
+++ b/paddle/fluid/framework/ir/graph_helper.cc
@@ -33,8 +33,9 @@ void SortHelper(
     }
   }
 
-  VLOG(3) << "topology sort insert: " << node->Name()
-          << reinterpret_cast<void *>(node) << " input " << node->inputs.size();
+  VLOG(30) << "topology sort insert: " << node->Name()
+           << reinterpret_cast<void *>(node) << " input "
+           << node->inputs.size();
   ret->push_back(node);
 }
 
@@ -103,9 +104,9 @@ std::map<ir::Node *, std::unordered_set<ir::Node *>> BuildOperationAdjList(
     for (auto &var : n->inputs) {
       for (auto &adj_n : var->inputs) {
         PADDLE_ENFORCE(adj_n->NodeType() == ir::Node::Type::kOperation);
-        VLOG(4) << "adj " << adj_n->Name() << reinterpret_cast<void *>(adj_n)
-                << " -> " << n->Name() << reinterpret_cast<void *>(n)
-                << "  via " << var->Name() << reinterpret_cast<void *>(var);
+        VLOG(40) << "adj " << adj_n->Name() << reinterpret_cast<void *>(adj_n)
+                 << " -> " << n->Name() << reinterpret_cast<void *>(n)
+                 << "  via " << var->Name() << reinterpret_cast<void *>(var);
         adj_list[n].insert(adj_n);
       }
     }
@@ -163,10 +164,10 @@ size_t GraphNum(const Graph &graph) {
     graph_nodes.emplace_back(g_nodes);
   }
 
-  if (VLOG_IS_ON(10)) {
-    VLOG(10) << "graph_num: " << graph_nodes.size();
+  if (VLOG_IS_ON(100)) {
+    VLOG(100) << "graph_num: " << graph_nodes.size();
     for (auto &g_n : graph_nodes) {
-      VLOG(10) << "graph_nodes: " << g_n.size();
+      VLOG(100) << "graph_nodes: " << g_n.size();
       if (g_n.size() < 10) {
         std::stringstream out;
         for (auto &node : g_n) {
@@ -180,7 +181,7 @@ size_t GraphNum(const Graph &graph) {
           }
           out << "]";
         }
-        VLOG(10) << out.str();
+        VLOG(100) << out.str();
       }
     }
   }
diff --git a/paddle/fluid/framework/ir/graph_pattern_detector.cc b/paddle/fluid/framework/ir/graph_pattern_detector.cc
index b20d70132..0a3c8a6cb 100644
--- a/paddle/fluid/framework/ir/graph_pattern_detector.cc
+++ b/paddle/fluid/framework/ir/graph_pattern_detector.cc
@@ -12,6 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+#include <algorithm>
 #include <array>
 #include <string>
 #include <vector>
@@ -91,19 +92,19 @@ void GraphPatternDetector::operator()(Graph *graph,
   PrettyLogEndl(Style::detail(), "---  detect %d subgraphs", subgraphs.size());
   int id = 0;
   for (auto &g : subgraphs) {
-    VLOG(3) << "optimizing #" << id++ << " subgraph";
+    VLOG(30) << "optimizing #" << id++ << " subgraph";
     handler(g, graph);
   }
 }
 
 bool GraphPatternDetector::MarkPDNodesInGraph(const ir::Graph &graph) {
-  VLOG(3) << "mark pdnodes in graph";
+  VLOG(30) << "mark pdnodes in graph";
   if (graph.Nodes().empty()) return false;
 
   for (auto &node : GraphTraits::DFS(graph)) {
     for (const auto &pdnode : pattern_.nodes()) {
       if (pdnode->Tell(&node)) {
-        VLOG(4) << "pdnode " << pdnode->name() << " marked";
+        VLOG(40) << "pdnode " << pdnode->name() << " marked";
         pdnodes2nodes_[pdnode.get()].insert(&node);
       }
     }
@@ -111,7 +112,7 @@ bool GraphPatternDetector::MarkPDNodesInGraph(const ir::Graph &graph) {
   // Check to early stop if some PDNode can't find matched Node.
   for (auto &pdnode : pattern_.nodes()) {
     if (!pdnodes2nodes_.count(pdnode.get())) {
-      VLOG(4) << pdnode->name() << " can't find matched Node, early stop";
+      VLOG(40) << pdnode->name() << " can't find matched Node, early stop";
       // return false;
     }
   }
@@ -120,7 +121,7 @@ bool GraphPatternDetector::MarkPDNodesInGraph(const ir::Graph &graph) {
       GetMarkedNodes(const_cast<Graph *>(&graph)).insert(n);
     }
   }
-  VLOG(3) << pdnodes2nodes_.size() << " nodes marked";
+  VLOG(30) << pdnodes2nodes_.size() << " nodes marked";
 
   return !pdnodes2nodes_.empty();
 }
@@ -213,7 +214,7 @@ GraphPatternDetector::DetectPatterns() {
   // Extend a PDNode to subgraphs by deducing the connection relations defined
   // in edges of PDNodes.
   for (const auto &edge : pattern_.edges()) {
-    VLOG(4) << "check " << edge.first->name() << " -> " << edge.second->name();
+    VLOG(40) << "check " << edge.first->name() << " -> " << edge.second->name();
     // TODO(Superjomn) Fix bug here, the groups might be duplicate here.
     // Each role has two PDNodes, which indicates two roles.
     // Detect two Nodes that can match these two roles and they are connected.
@@ -224,7 +225,7 @@ GraphPatternDetector::DetectPatterns() {
     // source -> target
     for (Node *source : pdnodes2nodes_[edge.first]) {
       for (Node *target : pdnodes2nodes_[edge.second]) {
-        VLOG(8) << "check " << source->id() << " -- " << target->id();
+        VLOG(80) << "check " << source->id() << " -- " << target->id();
         // TODO(Superjomn) add some prune strategies.
         for (const auto &group : pre_groups) {
           HitGroup new_group = group;
@@ -240,12 +241,13 @@ GraphPatternDetector::DetectPatterns() {
         }
       }
     }
-    VLOG(3) << "step " << step << " get records: " << cur_groups.size();
+    VLOG(30) << "step " << step << " get records: " << cur_groups.size();
     for (auto &group : cur_groups) {
       for (auto &item : group.roles) {
-        VLOG(4) << "node " << item.second->id() << " as " << item.first->name();
+        VLOG(40) << "node " << item.second->id() << " as "
+                 << item.first->name();
       }
-      VLOG(4) << "=========================================================";
+      VLOG(40) << "=========================================================";
     }
   }
 
diff --git a/paddle/fluid/framework/ir/graph_viz_pass.cc b/paddle/fluid/framework/ir/graph_viz_pass.cc
index 31ed98db7..13dd354dc 100644
--- a/paddle/fluid/framework/ir/graph_viz_pass.cc
+++ b/paddle/fluid/framework/ir/graph_viz_pass.cc
@@ -41,7 +41,7 @@ std::string FormatName(const Node* node) {
 std::unique_ptr<ir::Graph> GraphVizPass::ApplyImpl(
     std::unique_ptr<ir::Graph> graph) const {
   const std::string graph_viz_path = Get<std::string>(kGraphVizPath);
-  VLOG(3) << "draw IR graph viz to " << graph_viz_path;
+  VLOG(30) << "draw IR graph viz to " << graph_viz_path;
   std::unique_ptr<std::ostream> fout(new std::ofstream(graph_viz_path));
   PADDLE_ENFORCE(fout->good());
   std::ostream& sout = *fout;
diff --git a/paddle/fluid/framework/ir/mkldnn_placement_pass.cc b/paddle/fluid/framework/ir/mkldnn_placement_pass.cc
index 65be69b7f..145a3a455 100644
--- a/paddle/fluid/framework/ir/mkldnn_placement_pass.cc
+++ b/paddle/fluid/framework/ir/mkldnn_placement_pass.cc
@@ -20,7 +20,7 @@ namespace ir {
 
 std::unique_ptr<ir::Graph> MKLDNNPlacementPass::ApplyImpl(
     std::unique_ptr<ir::Graph> graph) const {
-  VLOG(3) << "Aplies MKL-DNN placement strategy.";
+  VLOG(30) << "Aplies MKL-DNN placement strategy.";
   for (const Node* n : graph->Nodes()) {
     if (n->IsOp() && n->Op()->HasAttr("use_mkldnn")) {
       n->Op()->SetAttr("use_mkldnn", true);
diff --git a/paddle/fluid/framework/ir/multi_batch_merge_pass.cc b/paddle/fluid/framework/ir/multi_batch_merge_pass.cc
index bd5b76426..532961e4d 100644
--- a/paddle/fluid/framework/ir/multi_batch_merge_pass.cc
+++ b/paddle/fluid/framework/ir/multi_batch_merge_pass.cc
@@ -62,7 +62,7 @@ VarDesc UpdateGradVarDesc(
         string::Sprintf("%s.repeat.%d", var_desc->Name(), repeat);
     VarDesc repeated_var = CopyVarDesc(var_desc);
     repeated_var.SetName(new_gname);
-    VLOG(3) << "update " << var_desc->Name() << " to repeat " << repeat;
+    VLOG(30) << "update " << var_desc->Name() << " to repeat " << repeat;
     return repeated_var;
   }
   return *var_desc;
@@ -78,7 +78,7 @@ std::unique_ptr<Graph> BatchMergePass::ApplyImpl(
 
   std::vector<ir::Node*> nodes = TopologySortOperations(*graph);
   auto origin_nodes = graph->ReleaseNodes();
-  VLOG(3) << "origin nodes count: " << origin_nodes.size();
+  VLOG(30) << "origin nodes count: " << origin_nodes.size();
   ir::Graph& result = *graph;
 
   // 1. record op nodes of different roles
@@ -137,8 +137,8 @@ std::unique_ptr<Graph> BatchMergePass::ApplyImpl(
             "%s.repeat.%d", repeated_op.Input("Variance")[0], i);
         bn_vars_need_rename.insert(repeated_op.Input("Mean")[0]);
         bn_vars_need_rename.insert(repeated_op.Input("Variance")[0]);
-        VLOG(3) << "renaming " << repeated_op.Input("Mean")[0] << " to "
-                << new_mean_name;
+        VLOG(30) << "renaming " << repeated_op.Input("Mean")[0] << " to "
+                 << new_mean_name;
         repeated_op.RenameInput(repeated_op.Input("Mean")[0], new_mean_name);
         repeated_op.RenameInput(repeated_op.Input("Variance")[0], new_var_name);
         repeated_op.RenameOutput(repeated_op.Output("MeanOut")[0],
diff --git a/paddle/fluid/framework/ir/pass.h b/paddle/fluid/framework/ir/pass.h
index 9570c59cf..8ac8d7677 100644
--- a/paddle/fluid/framework/ir/pass.h
+++ b/paddle/fluid/framework/ir/pass.h
@@ -76,7 +76,7 @@ class Pass {
                    attr_name);
     attrs_[attr_name] = attr;
     attr_dels_[attr_name] = [attr, attr_name]() {
-      VLOG(3) << "deleting " << attr_name;
+      VLOG(30) << "deleting " << attr_name;
       delete attr;
     };
   }
diff --git a/paddle/fluid/framework/ir/seq_concat_fc_fuse_pass.cc b/paddle/fluid/framework/ir/seq_concat_fc_fuse_pass.cc
index a7d5161c3..b7687d61d 100644
--- a/paddle/fluid/framework/ir/seq_concat_fc_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/seq_concat_fc_fuse_pass.cc
@@ -12,10 +12,13 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "paddle/fluid/framework/ir/seq_concat_fc_fuse_pass.h"
+#include <set>
+#include <string>
+
 #include "paddle/fluid/framework/ir/fuse_pass_base.h"
 #include "paddle/fluid/framework/ir/graph_pattern_detector.h"
 #include "paddle/fluid/framework/ir/graph_viz_pass.h"
+#include "paddle/fluid/framework/ir/seq_concat_fc_fuse_pass.h"
 #include "paddle/fluid/framework/lod_tensor.h"
 
 namespace paddle {
@@ -159,10 +162,7 @@ PDNode* BuildFCPattern(PDPattern* pattern, PDNode* fc_x) {
 
   std::set<std::string> acts({"sigmoid", "tanh", "relu", "identity"});
   PDNode* act = pattern->NewNode(
-      [=](Node* x) {
-        return x && x->IsOp() && acts.count(x->Op()->Type());
-
-      },
+      [=](Node* x) { return x && x->IsOp() && acts.count(x->Op()->Type()); },
       "act");
 
   PDNode* fc_out = pattern->NewNode(
@@ -196,7 +196,7 @@ std::unique_ptr<ir::Graph> SeqConcatFcFusePass::ApplyImpl(
 
   detector(graph.get(), [&](const GraphPatternDetector::subgraph_t& subgraph,
                             Graph* graph) {
-    VLOG(4) << "get one concat pattern";
+    VLOG(40) << "get one concat pattern";
     // fc
     GET_NODE(fc_w, detector.pattern());
     GET_NODE(fc_bias, detector.pattern());
diff --git a/paddle/fluid/framework/ir/seqconv_eltadd_relu_fuse_pass.cc b/paddle/fluid/framework/ir/seqconv_eltadd_relu_fuse_pass.cc
index 0a1f65d27..015b5e3c6 100644
--- a/paddle/fluid/framework/ir/seqconv_eltadd_relu_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/seqconv_eltadd_relu_fuse_pass.cc
@@ -60,7 +60,7 @@ int BuildFusion(Graph* graph, const std::string& name_scope, Scope* scope) {
 
   auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
                      Graph* g) {
-    VLOG(4) << "handle SeqConv EltAdd Relu fuse";
+    VLOG(40) << "handle SeqConv EltAdd Relu fuse";
     GET_IR_NODE_FROM_SUBGRAPH(seqconv, seqconv, fuse_pattern);
     GET_IR_NODE_FROM_SUBGRAPH(seqconv_weight, seqconv_weight, fuse_pattern);
     GET_IR_NODE_FROM_SUBGRAPH(seqconv_out, seqconv_out, fuse_pattern);
diff --git a/paddle/fluid/framework/lod_rank_table.cc b/paddle/fluid/framework/lod_rank_table.cc
index 6bc795b64..660ce2ec8 100644
--- a/paddle/fluid/framework/lod_rank_table.cc
+++ b/paddle/fluid/framework/lod_rank_table.cc
@@ -31,7 +31,7 @@ void LoDRankTable::Reset(const LoD& lod, size_t level) {
     TableItem item;
     item.index = i;
     item.length = vec[i + 1] - vec[i];
-    VLOG(10) << "Add item to rank table " << item.index << " " << item.length;
+    VLOG(100) << "Add item to rank table " << item.index << " " << item.length;
     items_.emplace_back(item);
   }
   // NOTE(yuyang18):
diff --git a/paddle/fluid/framework/mixed_vector_test.cc b/paddle/fluid/framework/mixed_vector_test.cc
index 0599c8d38..0330cae37 100644
--- a/paddle/fluid/framework/mixed_vector_test.cc
+++ b/paddle/fluid/framework/mixed_vector_test.cc
@@ -51,7 +51,7 @@ TEST(mixed_vector, InitWithCount) {
 TEST(mixed_vector, ForEach) {
   vec<int> tmp;
   for (auto& v : tmp) {
-    VLOG(3) << v;
+    VLOG(30) << v;
   }
 }
 
diff --git a/paddle/fluid/framework/naive_executor.cc b/paddle/fluid/framework/naive_executor.cc
index 7fb42feb9..8e660f97f 100644
--- a/paddle/fluid/framework/naive_executor.cc
+++ b/paddle/fluid/framework/naive_executor.cc
@@ -71,7 +71,7 @@ void NaiveExecutor::Prepare(Scope *parent_scope,
 
 void NaiveExecutor::Run() {
   for (auto &op : ops_) {
-    VLOG(4) << "run " << op->Type();
+    VLOG(40) << "run " << op->Type();
     op->Run(*scope_, place_);
   }
 }
@@ -95,21 +95,21 @@ void NaiveExecutor::CreateVariables(const ProgramDesc &desc, Scope *scope,
       if (var->Persistable()) {
         auto *ptr = const_cast<Scope *>(ancestor_scope)->Var(var->Name());
         InitializeVariable(ptr, var->GetType());
-        VLOG(3) << "Create Variable " << var->Name()
-                << " global, which pointer is " << ptr;
+        VLOG(30) << "Create Variable " << var->Name()
+                 << " global, which pointer is " << ptr;
       } else {  // Create temporary variables in local scope.
         auto *ptr = scope->Var(var->Name());
         InitializeVariable(ptr, var->GetType());
-        VLOG(3) << "Create Variable " << var->Name()
-                << " locally, which pointer is " << ptr;
+        VLOG(30) << "Create Variable " << var->Name()
+                 << " locally, which pointer is " << ptr;
       }
     }
   } else {
     for (auto &var : global_block.AllVars()) {
       auto *ptr = scope->Var(var->Name());
       InitializeVariable(ptr, var->GetType());
-      VLOG(3) << "Create variable " << var->Name() << ", which pointer is "
-              << ptr;
+      VLOG(30) << "Create variable " << var->Name() << ", which pointer is "
+               << ptr;
     }
   }
 }
diff --git a/paddle/fluid/framework/op_desc.cc b/paddle/fluid/framework/op_desc.cc
index 8ece618f3..fbaa169df 100644
--- a/paddle/fluid/framework/op_desc.cc
+++ b/paddle/fluid/framework/op_desc.cc
@@ -82,7 +82,7 @@ class CompileTimeInferShapeContext : public InferShapeContext {
     auto *in_var = block_.FindVarRecursive(Inputs(in)[i]);
     auto *out_var = block_.FindVarRecursive(Outputs(out)[j]);
     if (in_var->GetType() != proto::VarType::LOD_TENSOR) {
-      VLOG(3) << "input " << in << " is not LodTensor";
+      VLOG(30) << "input " << in << " is not LodTensor";
       return;
     }
     out_var->SetLoDLevel(in_var->GetLoDLevel());
@@ -241,32 +241,32 @@ void OpDesc::SetAttr(const std::string &name, const Attribute &v) {
     const proto::OpProto::Attr &attr = GetProtoAttr(name);
     switch (attr.type()) {
       case proto::AttrType::BOOLEANS: {
-        VLOG(11) << "SetAttr: " << Type() << ", " << name
-                 << " from INTS to BOOLEANS";
+        VLOG(110) << "SetAttr: " << Type() << ", " << name
+                  << " from INTS to BOOLEANS";
         this->attrs_[name] = std::vector<bool>();
         break;
       }
       case proto::AttrType::INTS: {
-        VLOG(11) << "SetAttr: " << Type() << ", " << name
-                 << " from INTS to INTS";
+        VLOG(110) << "SetAttr: " << Type() << ", " << name
+                  << " from INTS to INTS";
         this->attrs_[name] = std::vector<int>();
         break;
       }
       case proto::AttrType::FLOATS: {
-        VLOG(11) << "SetAttr: " << Type() << ", " << name
-                 << " from INTS to FLOATS";
+        VLOG(110) << "SetAttr: " << Type() << ", " << name
+                  << " from INTS to FLOATS";
         this->attrs_[name] = std::vector<float>();
         break;
       }
       case proto::AttrType::STRINGS: {
-        VLOG(11) << "SetAttr: " << Type() << ", " << name
-                 << " from INTS to STRINGS";
+        VLOG(110) << "SetAttr: " << Type() << ", " << name
+                  << " from INTS to STRINGS";
         this->attrs_[name] = std::vector<std::string>();
         break;
       }
       case proto::AttrType::BLOCKS: {
-        VLOG(11) << "SetAttr: " << Type() << ", " << name
-                 << " from INTS to BLOCKS";
+        VLOG(110) << "SetAttr: " << Type() << ", " << name
+                  << " from INTS to BLOCKS";
         this->SetBlocksAttr(name, std::vector<BlockDesc *>());
         return;
       }
@@ -499,13 +499,13 @@ void OpDesc::CheckAttrs() {
 }
 
 void OpDesc::InferShape(const BlockDesc &block) const {
-  VLOG(3) << "CompileTime infer shape on " << Type();
+  VLOG(30) << "CompileTime infer shape on " << Type();
   InitInferShapeFuncs();
   auto &infer_shape = OpInfoMap::Instance().Get(this->Type()).infer_shape_;
   PADDLE_ENFORCE(static_cast<bool>(infer_shape),
                  "%s's infer_shape has not been registered", this->Type());
   CompileTimeInferShapeContext ctx(*this, block);
-  if (VLOG_IS_ON(10)) {
+  if (VLOG_IS_ON(100)) {
     std::ostringstream sout;
     auto inames = this->InputArgumentNames();
     sout << " From [";
@@ -516,7 +516,7 @@ void OpDesc::InferShape(const BlockDesc &block) const {
     std::copy(onames.begin(), onames.end(),
               std::ostream_iterator<std::string>(sout, ", "));
     sout << "]";
-    VLOG(10) << sout.str();
+    VLOG(100) << sout.str();
   }
   infer_shape(&ctx);
 }
@@ -607,7 +607,7 @@ DDim CompileTimeInferShapeContext::GetDim(const std::string &name) const {
     auto shape = var->GetShape();
     res = shape.empty() ? make_ddim({0UL}) : make_ddim(shape);
   } catch (...) {
-    VLOG(5) << "GetDim of variable " << name << " error";
+    VLOG(50) << "GetDim of variable " << name << " error";
     std::rethrow_exception(std::current_exception());
   }
   return res;
@@ -624,7 +624,7 @@ std::vector<DDim> CompileTimeInferShapeContext::GetRepeatedDims(
       res.push_back(s.empty() ? make_ddim({0UL}) : make_ddim(s));
     }
   } catch (...) {
-    VLOG(5) << "GetRepeatedDim of variable " << name << " error.";
+    VLOG(50) << "GetRepeatedDim of variable " << name << " error.";
     std::rethrow_exception(std::current_exception());
   }
   return res;
diff --git a/paddle/fluid/framework/op_registry.cc b/paddle/fluid/framework/op_registry.cc
index bfc411ca2..4a841bae8 100644
--- a/paddle/fluid/framework/op_registry.cc
+++ b/paddle/fluid/framework/op_registry.cc
@@ -46,9 +46,9 @@ static VariableNameMap ConvertOpDescVarsToVarNameMap(
 
 std::unique_ptr<OperatorBase> OpRegistry::CreateOp(
     const proto::OpDesc& op_desc) {
-  VLOG(1) << "CreateOp directly from OpDesc is deprecated. It should only be"
-             "used in unit tests. Use CreateOp(const OpDesc& op_desc) "
-             "instead.";
+  VLOG(10) << "CreateOp directly from OpDesc is deprecated. It should only be"
+              "used in unit tests. Use CreateOp(const OpDesc& op_desc) "
+              "instead.";
   VariableNameMap inputs = ConvertOpDescVarsToVarNameMap(op_desc.inputs());
   VariableNameMap outputs = ConvertOpDescVarsToVarNameMap(op_desc.outputs());
   AttributeMap attrs;
diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc
index 45fc36c70..c17daaac0 100644
--- a/paddle/fluid/framework/operator.cc
+++ b/paddle/fluid/framework/operator.cc
@@ -140,7 +140,7 @@ static LoD GetLoD(const Scope& scope, const std::string& name) {
 }
 
 void OperatorBase::Run(const Scope& scope, const platform::Place& place) {
-  VLOG(4) << place << " " << DebugStringEx(&scope);
+  VLOG(40) << place << " " << DebugStringEx(&scope);
   if (platform::is_gpu_place(place)) {
 #ifndef PADDLE_WITH_CUDA
     PADDLE_THROW("Cannot run operator on place %s", place);
@@ -160,7 +160,7 @@ void OperatorBase::Run(const Scope& scope, const platform::Place& place) {
   } else {
     RunImpl(scope, place);
   }
-  VLOG(3) << place << " " << DebugStringEx(&scope);
+  VLOG(30) << place << " " << DebugStringEx(&scope);
 }
 
 bool OperatorBase::HasInputs(const std::string& name) const {
@@ -708,14 +708,14 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
 
   auto expected_kernel_key =
       this->GetExpectedKernelType(ExecutionContext(*this, scope, *dev_ctx));
-  VLOG(3) << "expected_kernel_key:" << expected_kernel_key;
+  VLOG(30) << "expected_kernel_key:" << expected_kernel_key;
 
   auto kernel_iter = kernels.find(expected_kernel_key);
 #ifdef PADDLE_WITH_MKLDNN
   // workaround for missing MKLDNN kernel when FLAGS_use_mkldnn env var is set
   if (kernel_iter == kernels.end() &&
       expected_kernel_key.library_type_ == LibraryType::kMKLDNN) {
-    VLOG(3) << "missing MKLDNN kernel: fallbacking to PLAIN one";
+    VLOG(30) << "missing MKLDNN kernel: fallbacking to PLAIN one";
     expected_kernel_key.library_type_ = LibraryType::kPlain;
     expected_kernel_key.data_layout_ = DataLayout::kAnyLayout;
     kernel_iter = kernels.find(expected_kernel_key);
@@ -767,7 +767,8 @@ void OperatorWithKernel::TransferInplaceVarsBack(
     const Scope& scope, const std::vector<std::string>& inplace_vars,
     const Scope& transfer_scope) const {
   for (auto& var_name : inplace_vars) {
-    VLOG(3) << "share inplace var " + var_name + " back to it's original scope";
+    VLOG(30) << "share inplace var " + var_name +
+                    " back to it's original scope";
     auto* original_tensor = GetMutableTensorFromVar(scope.FindVar(var_name));
     auto* var = transfer_scope.FindVar(var_name);
     PADDLE_ENFORCE(var != nullptr, "The var[%s] should not be nullptr",
@@ -807,8 +808,8 @@ Scope* OperatorWithKernel::TryTransferData(
         transfered_inplace_vars->emplace_back(var_name);
       }
 
-      VLOG(3) << "Transform Variable " << var_name << " from "
-              << kernel_type_for_var << " to " << expected_kernel_key;
+      VLOG(30) << "Transform Variable " << var_name << " from "
+               << kernel_type_for_var << " to " << expected_kernel_key;
 
       if (new_scope == nullptr) {
         new_scope = &scope.NewScope();
diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc
index dfb107688..39b47415f 100644
--- a/paddle/fluid/framework/parallel_executor.cc
+++ b/paddle/fluid/framework/parallel_executor.cc
@@ -199,7 +199,7 @@ void ParallelExecutor::BCastParamsToDevices(
 
     auto &main_tensor = main_var->Get<LoDTensor>();
     if (!main_tensor.IsInitialized()) {
-      VLOG(3) << "one in var not inited, return!";
+      VLOG(30) << "one in var not inited, return!";
       continue;
     }
     auto &dims = main_tensor.dims();
diff --git a/paddle/fluid/framework/scope.cc b/paddle/fluid/framework/scope.cc
index a4abd1b12..0c407f8c1 100644
--- a/paddle/fluid/framework/scope.cc
+++ b/paddle/fluid/framework/scope.cc
@@ -149,7 +149,7 @@ Variable* Scope::VarInternal(const std::string& name) {
 
   v = new Variable();
   vars_[name].reset(v);
-  VLOG(3) << "Create variable " << name;
+  VLOG(30) << "Create variable " << name;
   v->name_ = &(vars_.find(name)->first);
   return v;
 }
diff --git a/paddle/fluid/framework/selected_rows.cc b/paddle/fluid/framework/selected_rows.cc
index 8c290bb09..3319c772e 100644
--- a/paddle/fluid/framework/selected_rows.cc
+++ b/paddle/fluid/framework/selected_rows.cc
@@ -176,7 +176,7 @@ void SelectedRows::Get(const framework::Tensor& ids, framework::Tensor* value,
   PADDLE_ENFORCE(value->IsInitialized(),
                  "The value tensor should be initialized.");
   if (ids.numel() == 0) {
-    VLOG(3) << "keys is empty, please check data!";
+    VLOG(30) << "keys is empty, please check data!";
   } else {
     int64_t value_width = value_->numel() / value_->dims()[0];
     PADDLE_ENFORCE_EQ(value_width, value->numel() / value->dims()[0],
diff --git a/paddle/fluid/framework/tensor_util.cc b/paddle/fluid/framework/tensor_util.cc
index ca1e01c89..8d8f07a1f 100644
--- a/paddle/fluid/framework/tensor_util.cc
+++ b/paddle/fluid/framework/tensor_util.cc
@@ -22,8 +22,8 @@ namespace framework {
 
 void TensorCopy(const Tensor& src, const platform::Place& dst_place,
                 const platform::DeviceContext& ctx, Tensor* dst) {
-  VLOG(3) << "TensorCopy " << src.dims() << " from " << src.place() << " to "
-          << dst_place;
+  VLOG(30) << "TensorCopy " << src.dims() << " from " << src.place() << " to "
+           << dst_place;
   src.check_memory_size();
 
   dst->Resize(src.dims());
@@ -37,8 +37,8 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
 
   if (platform::is_cpu_place(src_place) && platform::is_cpu_place(dst_place)) {
     if (src_ptr == dst_ptr) {
-      VLOG(3) << "Skip copy the same data async from " << src_place << " to "
-              << dst_place;
+      VLOG(30) << "Skip copy the same data async from " << src_place << " to "
+               << dst_place;
       return;
     }
     memory::Copy(boost::get<platform::CPUPlace>(dst_place), dst_ptr,
@@ -77,8 +77,8 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
         reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream();
     if (platform::is_same_place(src_place, dst_place)) {
       if (src_ptr == dst_ptr) {
-        VLOG(3) << "Skip copy the same data async from " << src_place << " to "
-                << dst_place;
+        VLOG(30) << "Skip copy the same data async from " << src_place << " to "
+                 << dst_place;
         return;
       }
       memory::Copy(dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size,
@@ -114,8 +114,8 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
 
 void TensorCopySync(const Tensor& src, const platform::Place& dst_place,
                     Tensor* dst) {
-  VLOG(3) << "TensorCopySync " << src.dims() << " from " << src.place()
-          << " to " << dst_place;
+  VLOG(30) << "TensorCopySync " << src.dims() << " from " << src.place()
+           << " to " << dst_place;
   src.check_memory_size();
   dst->Resize(src.dims());
   dst->set_layout(src.layout());
@@ -125,8 +125,8 @@ void TensorCopySync(const Tensor& src, const platform::Place& dst_place,
   auto size = src.numel() * SizeOfType(src.type());
   if (platform::is_cpu_place(src_place) && platform::is_cpu_place(dst_place)) {
     if (src_ptr == dst_ptr) {
-      VLOG(3) << "Skip copy the same data from " << src_place << " to "
-              << dst_place;
+      VLOG(30) << "Skip copy the same data from " << src_place << " to "
+               << dst_place;
       return;
     }
     memory::Copy(boost::get<platform::CPUPlace>(dst_place), dst_ptr,
@@ -146,8 +146,8 @@ void TensorCopySync(const Tensor& src, const platform::Place& dst_place,
   } else if (platform::is_gpu_place(src_place) &&
              platform::is_gpu_place(dst_place)) {
     if (src_ptr == dst_ptr && platform::is_same_place(src_place, dst_place)) {
-      VLOG(3) << "Skip copy the same data from " << src_place << " to "
-              << dst_place;
+      VLOG(30) << "Skip copy the same data from " << src_place << " to "
+               << dst_place;
       return;
     }
     auto src_gpu_place = boost::get<platform::CUDAPlace>(src_place);
diff --git a/paddle/fluid/framework/tensor_util.cu b/paddle/fluid/framework/tensor_util.cu
deleted file mode 120000
index edd88c4e5..000000000
--- a/paddle/fluid/framework/tensor_util.cu
+++ /dev/null
@@ -1 +0,0 @@
-tensor_util.cc
\ No newline at end of file
diff --git a/paddle/fluid/framework/tensor_util.cu b/paddle/fluid/framework/tensor_util.cu
new file mode 100644
index 000000000..ac6f07773
--- /dev/null
+++ b/paddle/fluid/framework/tensor_util.cu
@@ -0,0 +1,490 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+#include <algorithm>
+#include <limits>
+#include <vector>
+#include "paddle/fluid/framework/data_type.h"
+#include "paddle/fluid/framework/tensor_util.h"
+
+namespace paddle {
+namespace framework {
+
+void TensorCopy(const Tensor& src, const platform::Place& dst_place,
+                const platform::DeviceContext& ctx, Tensor* dst) {
+  VLOG(30) << "TensorCopy " << src.dims() << " from " << src.place() << " to "
+           << dst_place;
+  src.check_memory_size();
+
+  dst->Resize(src.dims());
+  dst->set_layout(src.layout());
+  auto src_place = src.place();
+  auto src_ptr = src.data<void>();
+
+  auto dst_ptr = dst->mutable_data(dst_place, src.type());
+
+  auto size = src.numel() * SizeOfType(src.type());
+
+  if (platform::is_cpu_place(src_place) && platform::is_cpu_place(dst_place)) {
+    if (src_ptr == dst_ptr) {
+      VLOG(30) << "Skip copy the same data async from " << src_place << " to "
+               << dst_place;
+      return;
+    }
+    memory::Copy(boost::get<platform::CPUPlace>(dst_place), dst_ptr,
+                 boost::get<platform::CPUPlace>(src_place), src_ptr, size);
+  }
+#ifdef PADDLE_WITH_CUDA
+  else if (platform::is_gpu_place(src_place) &&  // NOLINT
+           platform::is_cpu_place(dst_place)) {
+    auto src_gpu_place = boost::get<platform::CUDAPlace>(src_place);
+    auto dst_cpu_place = boost::get<platform::CPUPlace>(dst_place);
+    auto ctx_place = ctx.GetPlace();
+    PADDLE_ENFORCE(platform::is_gpu_place(ctx_place));
+    auto ctx_gpu_place = boost::get<platform::CUDAPlace>(ctx_place);
+    PADDLE_ENFORCE_EQ(src_gpu_place, ctx_gpu_place);
+    auto stream =
+        reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream();
+    memory::Copy(dst_cpu_place, dst_ptr, src_gpu_place, src_ptr, size, stream);
+  } else if (platform::is_cpu_place(src_place) &&
+             platform::is_gpu_place(dst_place)) {
+    auto src_cpu_place = boost::get<platform::CPUPlace>(src_place);
+    auto dst_gpu_place = boost::get<platform::CUDAPlace>(dst_place);
+    auto ctx_place = ctx.GetPlace();
+    PADDLE_ENFORCE(platform::is_gpu_place(ctx_place));
+    auto ctx_gpu_place = boost::get<platform::CUDAPlace>(ctx_place);
+    PADDLE_ENFORCE_EQ(dst_gpu_place, ctx_gpu_place);
+    auto stream =
+        reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream();
+    memory::Copy(dst_gpu_place, dst_ptr, src_cpu_place, src_ptr, size, stream);
+  } else if (platform::is_gpu_place(src_place) &&
+             platform::is_gpu_place(dst_place)) {
+    auto src_gpu_place = boost::get<platform::CUDAPlace>(src_place);
+    auto dst_gpu_place = boost::get<platform::CUDAPlace>(dst_place);
+    auto ctx_place = ctx.GetPlace();
+    PADDLE_ENFORCE(platform::is_gpu_place(ctx_place));
+    auto stream =
+        reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream();
+    if (platform::is_same_place(src_place, dst_place)) {
+      if (src_ptr == dst_ptr) {
+        VLOG(30) << "Skip copy the same data async from " << src_place << " to "
+                 << dst_place;
+        return;
+      }
+      memory::Copy(dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size,
+                   stream);
+    } else {
+      if (platform::is_same_place(ctx_place, src_place)) {
+        memory::Copy(dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size,
+                     stream);
+        platform::DeviceContextPool::Instance().Get(src.place())->Wait();
+      } else if (platform::is_same_place(ctx_place, dst_place)) {
+        platform::DeviceContextPool::Instance().Get(src.place())->Wait();
+        memory::Copy(dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size,
+                     stream);
+      } else {
+        PADDLE_THROW("ctx is not belong to dst_gpu_place or src_gpu_place.");
+      }
+    }
+  }
+#endif
+}
+
+void TensorCopy(const Tensor& src, const platform::Place& dst_place,
+                Tensor* dst) {
+  platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
+  const platform::DeviceContext* dev_ctx;
+  if (platform::is_gpu_place(dst_place)) {
+    dev_ctx = pool.Get(dst_place);
+  } else {
+    dev_ctx = pool.Get(src.place());
+  }
+  TensorCopy(src, dst_place, *dev_ctx, dst);
+}
+
+void TensorCopySync(const Tensor& src, const platform::Place& dst_place,
+                    Tensor* dst) {
+  VLOG(30) << "TensorCopySync " << src.dims() << " from " << src.place()
+           << " to " << dst_place;
+  src.check_memory_size();
+  dst->Resize(src.dims());
+  dst->set_layout(src.layout());
+  auto src_place = src.place();
+  auto src_ptr = src.data<void>();
+  auto dst_ptr = dst->mutable_data(dst_place, src.type());
+  auto size = src.numel() * SizeOfType(src.type());
+  if (platform::is_cpu_place(src_place) && platform::is_cpu_place(dst_place)) {
+    if (src_ptr == dst_ptr) {
+      VLOG(30) << "Skip copy the same data from " << src_place << " to "
+               << dst_place;
+      return;
+    }
+    memory::Copy(boost::get<platform::CPUPlace>(dst_place), dst_ptr,
+                 boost::get<platform::CPUPlace>(src_place), src_ptr, size);
+  }
+#ifdef PADDLE_WITH_CUDA
+  else if (platform::is_gpu_place(src_place) &&  // NOLINT
+           platform::is_cpu_place(dst_place)) {
+    auto src_gpu_place = boost::get<platform::CUDAPlace>(src_place);
+    auto dst_cpu_place = boost::get<platform::CPUPlace>(dst_place);
+    memory::Copy(dst_cpu_place, dst_ptr, src_gpu_place, src_ptr, size, nullptr);
+  } else if (platform::is_cpu_place(src_place) &&
+             platform::is_gpu_place(dst_place)) {
+    auto src_cpu_place = boost::get<platform::CPUPlace>(src_place);
+    auto dst_gpu_place = boost::get<platform::CUDAPlace>(dst_place);
+    memory::Copy(dst_gpu_place, dst_ptr, src_cpu_place, src_ptr, size, nullptr);
+  } else if (platform::is_gpu_place(src_place) &&
+             platform::is_gpu_place(dst_place)) {
+    if (src_ptr == dst_ptr && platform::is_same_place(src_place, dst_place)) {
+      VLOG(30) << "Skip copy the same data from " << src_place << " to "
+               << dst_place;
+      return;
+    }
+    auto src_gpu_place = boost::get<platform::CUDAPlace>(src_place);
+    auto dst_gpu_place = boost::get<platform::CUDAPlace>(dst_place);
+    memory::Copy(dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size, nullptr);
+  } else if (platform::is_cuda_pinned_place(src_place) &&
+             platform::is_gpu_place(dst_place)) {
+    auto src_pinned_place = boost::get<platform::CUDAPinnedPlace>(src_place);
+    auto dst_gpu_place = boost::get<platform::CUDAPlace>(dst_place);
+    memory::Copy(dst_gpu_place, dst_ptr, src_pinned_place, src_ptr, size,
+                 nullptr);
+  }
+#endif
+}
+
+template <typename Predicate, typename DevCtx>
+struct AnyDTypeVisitor {
+  Predicate predicate_;
+  const Tensor& tensor_;
+  const DevCtx& ctx_;
+  Tensor* out_;
+
+  AnyDTypeVisitor(Predicate predicate, const Tensor& tensor, const DevCtx& ctx,
+                  Tensor* out)
+      : predicate_(predicate), tensor_(tensor), ctx_(ctx), out_(out) {}
+
+  template <typename T>
+  void apply() const {
+    auto t = EigenVector<T>::Flatten(tensor_);
+    auto o = EigenScalar<bool>::From(*out_);
+    // return any of predicate_(t) is true.
+    o.device(*ctx_.eigen_device()) = predicate_(t).any();
+  }
+};
+
+template <typename Predicate, typename DevCtx>
+inline void AnyImpl(Predicate predicate, const framework::Tensor& tensor,
+                    const DevCtx& ctx, framework::Tensor* out) {
+  VisitDataType(ToDataType(tensor.type()), AnyDTypeVisitor<Predicate, DevCtx>(
+                                               predicate, tensor, ctx, out));
+}
+
+template <typename Predicate>
+class AnyVisitor : public boost::static_visitor<bool> {
+ private:
+  const framework::Tensor& tensor_;
+  Predicate predicate_;
+
+ public:
+  AnyVisitor(const framework::Tensor& tensor, Predicate predicate)
+      : tensor_(tensor), predicate_(std::move(predicate)) {}
+
+  template <typename Place>
+  bool operator()(const Place& place) const {
+    framework::Tensor out;
+    out.Resize({1});
+    out.mutable_data<bool>(place);
+    auto* ctx = platform::DeviceContextPool::Instance().GetByPlace(place);
+    AnyImpl(predicate_, tensor_, *ctx, &out);
+    return this->GetResult(out, place);
+  }
+
+  bool GetResult(const framework::Tensor& out,
+                 const platform::CUDAPlace& gpu) const {
+    platform::CPUPlace cpu;
+    framework::Tensor tmp;
+    tmp.Resize({1});
+    tmp.mutable_data<bool>(cpu);
+    auto gpuctx = platform::DeviceContextPool::Instance().Get(gpu);
+    gpuctx->Wait();
+    TensorCopy(out, cpu, *gpuctx, &tmp);
+    gpuctx->Wait();
+    return GetResult(tmp, cpu);
+  }
+
+  bool GetResult(const framework::Tensor& out,
+                 const platform::CPUPlace& cpu) const {
+    return *out.data<bool>();
+  }
+
+  bool GetResult(const framework::Tensor& out,
+                 const platform::CUDAPinnedPlace& cpu) const {
+    return *out.data<bool>();
+  }
+};
+
+template <typename Predicate>
+class AnyOutVisitor : public boost::static_visitor<> {
+ private:
+  const framework::Tensor& tensor_;
+  mutable framework::Tensor* out_;
+  Predicate predicate_;
+
+ public:
+  AnyOutVisitor(const framework::Tensor& tensor, Predicate predicate,
+                framework::Tensor* out)
+      : tensor_(tensor), out_(out), predicate_(std::move(predicate)) {}
+
+  template <typename Place>
+  void operator()(const Place& place) const {
+    auto* ctx = platform::DeviceContextPool::Instance().GetByPlace(place);
+    out_->Resize({1});
+    out_->mutable_data<bool>(place);
+    AnyImpl(predicate_, tensor_, *ctx, out_);
+  }
+};
+
+template <typename Predicate>
+inline bool Any(const framework::Tensor& tensor, Predicate predicate) {
+  AnyVisitor<Predicate> visitor(tensor, predicate);
+  auto place = tensor.place();
+  return platform::VisitPlace(place, visitor);
+}
+
+template <typename Predicate>
+inline void Any(const framework::Tensor& tensor, Predicate predicate,
+                framework::Tensor* out) {
+  AnyOutVisitor<Predicate> visitor(tensor, predicate, out);
+  auto place = tensor.place();
+  platform::VisitPlace(place, visitor);
+}
+
+struct ContainsNANPredicate {
+  template <typename T>
+  auto operator()(const T& eigen_vec) const
+      -> decltype(std::declval<T>().isnan()) {
+    // Cast eigen_vector to vector of bool. true if is inf.
+    return eigen_vec.isnan();
+  }
+};
+
+bool TensorContainsNAN(const framework::Tensor& tensor) {
+  ContainsNANPredicate predicate;
+  return Any(tensor, predicate);
+}
+
+void TensorContainsNAN(const framework::Tensor& tensor,
+                       framework::Tensor* out) {
+  ContainsNANPredicate predicate;
+  Any(tensor, predicate, out);
+}
+
+struct ContainsInfPredicate {
+  template <typename T>
+  auto operator()(const T& eigen_vec) const
+      -> decltype(std::declval<T>().isinf()) {
+    // Cast eigen_vector to vector of bool. true if is inf.
+    return eigen_vec.isinf();
+  }
+};
+
+bool TensorContainsInf(const framework::Tensor& tensor) {
+  ContainsInfPredicate predicate;
+  return Any(tensor, predicate);
+}
+
+void TensorContainsInf(const framework::Tensor& tensor,
+                       framework::Tensor* out) {
+  ContainsInfPredicate predicate;
+  Any(tensor, predicate, out);
+}
+
+// NOTE(dzhwinter):
+// Isfinite need a AllVisitor to loop through all the elements.
+// We choose two cuda call instead of one allvisitor. The AllVisitor
+// should be implemented if the performance hurts.
+bool TensorIsfinite(const framework::Tensor& tensor) {
+  ContainsInfPredicate pred_inf;
+  ContainsNANPredicate pred_nan;
+  return !Any(tensor, pred_inf) && !Any(tensor, pred_nan);
+}
+
+#ifdef PADDLE_WITH_CUDA
+template <typename T>
+static inline void __global__ BothFalse(const T* cmp, T* out) {
+  out[0] = (!cmp[0]) && (!out[0]);
+}
+#endif
+
+struct BothFalseVisitor : public boost::static_visitor<> {
+  const framework::Tensor& in_;
+  mutable framework::Tensor* out_;
+  BothFalseVisitor(const framework::Tensor& in, framework::Tensor* out)
+      : in_(in), out_(out) {}
+
+  template <typename Place>
+  void operator()(const Place& place) const {
+    VisitorImpl(place);
+  }
+
+  void VisitorImpl(const platform::CUDAPlace& gpu) const {
+#ifdef PADDLE_WITH_CUDA
+    auto* ctx = platform::DeviceContextPool::Instance().GetByPlace(gpu);
+    BothFalse<bool><<<1, 1, 0, ctx->stream()>>>(in_.data<bool>(),
+                                                out_->mutable_data<bool>(gpu));
+#endif
+  }
+
+  void VisitorImpl(const platform::CPUPlace& cpu) const {
+    bool lhs = !in_.data<bool>()[0];
+    bool rhs = !out_->mutable_data<bool>(cpu)[0];
+    out_->mutable_data<bool>(cpu)[0] = lhs && rhs;
+  }
+
+  void VisitorImpl(
+      const platform::CUDAPinnedPlace& cpu /* equals to cpu*/) const {
+    bool lhs = !in_.data<bool>()[0];
+    bool rhs = !out_->mutable_data<bool>(cpu)[0];
+    out_->mutable_data<bool>(cpu)[0] = lhs && rhs;
+  }
+};
+
+void TensorIsfinite(const framework::Tensor& tensor, framework::Tensor* out) {
+  framework::Tensor tmp;
+  TensorContainsInf(tensor, &tmp);
+  TensorContainsNAN(tensor, out);
+  BothFalseVisitor visitor(tmp, out);
+  auto place = tensor.place();
+  platform::VisitPlace(place, visitor);
+}
+
+void TensorToStream(std::ostream& os, const Tensor& tensor,
+                    const platform::DeviceContext& dev_ctx) {
+  {  // the 1st field, uint32_t version
+    constexpr uint32_t version = 0;
+    os.write(reinterpret_cast<const char*>(&version), sizeof(version));
+  }
+  {  // the 2nd field, tensor description
+     // int32_t  size
+     // void*    protobuf message
+    proto::VarType::TensorDesc desc;
+    desc.set_data_type(framework::ToDataType(tensor.type()));
+    auto dims = framework::vectorize(tensor.dims());
+    auto* pb_dims = desc.mutable_dims();
+    pb_dims->Resize(static_cast<int>(dims.size()), 0);
+    std::copy(dims.begin(), dims.end(), pb_dims->begin());
+    int32_t size = desc.ByteSize();
+    os.write(reinterpret_cast<const char*>(&size), sizeof(size));
+    auto out = desc.SerializeAsString();
+    os.write(out.data(), size);
+  }
+  {  // the 3rd field, tensor data
+    uint64_t size = tensor.numel() * framework::SizeOfType(tensor.type());
+
+    auto* data_ptr = tensor.data<void>();
+    PADDLE_ENFORCE(size < std::numeric_limits<std::streamsize>::max(),
+                   "Index overflow when writing tensor");
+    if (platform::is_gpu_place(tensor.place())) {
+#ifdef PADDLE_WITH_CUDA
+      constexpr size_t kBufSize = 1024 * 1024 * 64;  // 64MB
+      std::unique_ptr<char[]> buf(new char[kBufSize]);
+      auto& gpu_dev_ctx =
+          static_cast<const platform::CUDADeviceContext&>(dev_ctx);
+      platform::CPUPlace cpu;
+      uintptr_t data = reinterpret_cast<uintptr_t>(data_ptr);
+      while (size != 0) {
+        size_t size_to_write = std::min(kBufSize, static_cast<size_t>(size));
+        memory::Copy(cpu, buf.get(),
+                     boost::get<platform::CUDAPlace>(tensor.place()),
+                     reinterpret_cast<const void*>(data), size_to_write,
+                     gpu_dev_ctx.stream());
+        gpu_dev_ctx.Wait();
+        os.write(buf.get(), size_to_write);
+        data += size_to_write;
+        size -= size_to_write;
+      }
+#else
+      PADDLE_THROW("Unexpected branch");
+#endif
+    } else {
+      os.write(static_cast<const char*>(data_ptr),
+               static_cast<std::streamsize>(size));
+    }
+  }
+}
+
+struct DeserializedDataFunctor {
+  DeserializedDataFunctor(void** buf, Tensor* tensor,
+                          const platform::Place& place)
+      : buf_(buf), tensor_(tensor), place_(place) {}
+
+  template <typename T>
+  void apply() {
+    *buf_ = tensor_->mutable_data<T>(place_);
+  }
+
+  void** buf_;
+  Tensor* tensor_;
+  platform::Place place_;
+};
+
+void TensorFromStream(std::istream& is, Tensor* tensor,
+                      const platform::DeviceContext& dev_ctx) {
+  uint32_t version;
+  is.read(reinterpret_cast<char*>(&version), sizeof(version));
+  PADDLE_ENFORCE_EQ(version, 0U, "Only version 0 is supported");
+  proto::VarType::TensorDesc desc;
+  {  // int32_t size
+     // proto buffer
+    int32_t size;
+    is.read(reinterpret_cast<char*>(&size), sizeof(size));
+    std::unique_ptr<char[]> buf(new char[size]);
+    is.read(reinterpret_cast<char*>(buf.get()), size);
+    PADDLE_ENFORCE(desc.ParseFromArray(buf.get(), size),
+                   "Cannot parse tensor desc");
+  }
+  {  // read tensor
+    std::vector<int64_t> dims;
+    dims.reserve(static_cast<size_t>(desc.dims().size()));
+    std::copy(desc.dims().begin(), desc.dims().end(), std::back_inserter(dims));
+    tensor->Resize(framework::make_ddim(dims));
+    void* buf;
+    auto ctx = platform::CPUDeviceContext();
+    size_t size =
+        tensor->numel() *
+        framework::SizeOfType(framework::ToTypeIndex(desc.data_type()));
+    if (platform::is_gpu_place(dev_ctx.GetPlace())) {
+#ifdef PADDLE_WITH_CUDA
+      Tensor cpu_tensor;
+      cpu_tensor.Resize(framework::make_ddim(dims));
+      framework::VisitDataType(
+          desc.data_type(),
+          DeserializedDataFunctor(&buf, &cpu_tensor, ctx.GetPlace()));
+      is.read(static_cast<char*>(buf), size);
+      auto dst_place = dev_ctx.GetPlace();
+      framework::TensorCopy(cpu_tensor, dst_place, dev_ctx, tensor);
+#else
+      PADDLE_THROW("Unexpected branch");
+#endif
+    } else {
+      framework::VisitDataType(
+          desc.data_type(),
+          DeserializedDataFunctor(&buf, tensor, ctx.GetPlace()));
+      is.read(static_cast<char*>(buf), size);
+    }
+  }
+}
+
+}  // namespace framework
+}  // namespace paddle
diff --git a/paddle/fluid/framework/threadpool.cc b/paddle/fluid/framework/threadpool.cc
index fcec95536..2dab4e793 100644
--- a/paddle/fluid/framework/threadpool.cc
+++ b/paddle/fluid/framework/threadpool.cc
@@ -39,7 +39,7 @@ void ThreadPool::Init() {
     int num_threads = std::thread::hardware_concurrency();
     if (FLAGS_dist_threadpool_size > 0) {
       num_threads = FLAGS_dist_threadpool_size;
-      VLOG(1) << "set dist_threadpool_size to " << num_threads;
+      VLOG(10) << "set dist_threadpool_size to " << num_threads;
     }
     PADDLE_ENFORCE_GT(num_threads, 0);
     threadpool_.reset(new ThreadPool(num_threads));
diff --git a/paddle/fluid/framework/var_desc.cc b/paddle/fluid/framework/var_desc.cc
index 7e3f002b5..29ef459b4 100644
--- a/paddle/fluid/framework/var_desc.cc
+++ b/paddle/fluid/framework/var_desc.cc
@@ -61,10 +61,10 @@ size_t VarDesc::GetTensorDescNum() const {
 void VarDesc::SetShapes(
     const std::vector<std::vector<int64_t>> &multiple_dims) {
   if (multiple_dims.size() != GetTensorDescNum()) {
-    VLOG(3) << "WARNING: The number of given shapes(" << multiple_dims.size()
-            << ") doesn't match the existing tensor number("
-            << GetTensorDescNum()
-            << "). The Reader is going to be reinitialized.";
+    VLOG(30) << "WARNING: The number of given shapes(" << multiple_dims.size()
+             << ") doesn't match the existing tensor number("
+             << GetTensorDescNum()
+             << "). The Reader is going to be reinitialized.";
     SetTensorDescNum(multiple_dims.size());
   }
   std::vector<proto::VarType::TensorDesc *> tensors = mutable_tensor_descs();
@@ -94,11 +94,11 @@ void VarDesc::SetDataType(proto::VarType::Type data_type) {
 void VarDesc::SetDataTypes(
     const std::vector<proto::VarType::Type> &multiple_data_type) {
   if (multiple_data_type.size() != GetTensorDescNum()) {
-    VLOG(3) << "WARNING: The number of given data types("
-            << multiple_data_type.size()
-            << ") doesn't match the existing tensor number("
-            << GetTensorDescNum()
-            << "). The Reader is going to be reinitialized.";
+    VLOG(30) << "WARNING: The number of given data types("
+             << multiple_data_type.size()
+             << ") doesn't match the existing tensor number("
+             << GetTensorDescNum()
+             << "). The Reader is going to be reinitialized.";
     SetTensorDescNum(multiple_data_type.size());
   }
   std::vector<proto::VarType::TensorDesc *> tensor_descs =
@@ -139,11 +139,11 @@ void VarDesc::SetLoDLevel(int32_t lod_level) {
 
 void VarDesc::SetLoDLevels(const std::vector<int32_t> &multiple_lod_level) {
   if (multiple_lod_level.size() != GetTensorDescNum()) {
-    VLOG(3) << "WARNING: The number of given lod_levels("
-            << multiple_lod_level.size()
-            << ") doesn't match the existing tensor number("
-            << GetTensorDescNum()
-            << "). The Reader is going to be reinitialized.";
+    VLOG(30) << "WARNING: The number of given lod_levels("
+             << multiple_lod_level.size()
+             << ") doesn't match the existing tensor number("
+             << GetTensorDescNum()
+             << "). The Reader is going to be reinitialized.";
     SetTensorDescNum(multiple_lod_level.size());
   }
   switch (desc_.type().type()) {
diff --git a/paddle/fluid/inference/analysis/analyzer.cc b/paddle/fluid/inference/analysis/analyzer.cc
index ef4142f33..ea26de432 100644
--- a/paddle/fluid/inference/analysis/analyzer.cc
+++ b/paddle/fluid/inference/analysis/analyzer.cc
@@ -60,7 +60,7 @@ class DfgPassManagerImpl final : public DfgPassManager {
 
  private:
   void AddPass(const std::string& name, AnalysisPass* pass) {
-    VLOG(3) << "Adding pass " << name;
+    VLOG(30) << "Adding pass " << name;
     Register(name, pass);
     AddGraphvizDebugerPass(pass);
   }
@@ -103,7 +103,7 @@ void Analyzer::Run(Argument* argument) {
   std::vector<std::string> passes;
 #ifdef PADDLE_WITH_MKLDNN
   if (use_mkldnn_) {
-    VLOG(3) << "Adding MKL-DNN placement pass";
+    VLOG(30) << "Adding MKL-DNN placement pass";
     passes.push_back("mkldnn_placement_pass");
   }
 #endif
diff --git a/paddle/fluid/inference/analysis/argument.h b/paddle/fluid/inference/analysis/argument.h
index e8fb0775b..9495e2435 100644
--- a/paddle/fluid/inference/analysis/argument.h
+++ b/paddle/fluid/inference/analysis/argument.h
@@ -68,8 +68,8 @@ struct Argument {
                    key);
     attrs_[key] = data;
     attr_deleters_[key] = [data, key]() {
-      VLOG(3) << "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx";
-      VLOG(3) << "argument delete attr: " << key;
+      VLOG(30) << "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx";
+      VLOG(30) << "argument delete attr: " << key;
       delete data;
     };
   }
diff --git a/paddle/fluid/inference/analysis/data_flow_graph.cc b/paddle/fluid/inference/analysis/data_flow_graph.cc
index 8c7d58678..bdcb30f15 100644
--- a/paddle/fluid/inference/analysis/data_flow_graph.cc
+++ b/paddle/fluid/inference/analysis/data_flow_graph.cc
@@ -132,7 +132,7 @@ void DataFlowGraph::Build(const framework::ir::Graph &graph) {
     Node *x{nullptr};
     if (ir_node->IsOp()) {
       PADDLE_ENFORCE(ir_node->Op());
-      VLOG(4) << "get op " << ir_node << " " << ir_node->Name();
+      VLOG(40) << "get op " << ir_node << " " << ir_node->Name();
       x = nodes.Create(Node::Type::kFunction);
       x->attr("ir_node").Pointer() = ir_node;
       PADDLE_ENFORCE(ir_node->Op()->Proto());
@@ -141,7 +141,7 @@ void DataFlowGraph::Build(const framework::ir::Graph &graph) {
     } else if (ir_node->IsVar()) {
       // Not create a Node for IR ControlDepVar, considering Inference currently
       // just used in single thread scenerio.
-      VLOG(4) << "get var " << ir_node->Name();
+      VLOG(40) << "get var " << ir_node->Name();
       x = nodes.Create(Node::Type::kValue);
       x->attr("ir_node").Pointer() = ir_node;
       x->SetName(ir_node->Name());
@@ -151,9 +151,9 @@ void DataFlowGraph::Build(const framework::ir::Graph &graph) {
     }
     ir_node_map.emplace(ir_node, x);
   }
-  VLOG(4) << "finish creating Nodes";
+  VLOG(40) << "finish creating Nodes";
 
-  VLOG(4) << "to create edge";
+  VLOG(40) << "to create edge";
   // Create links
   for (auto *ir_node : graph.Nodes()) {
     auto it = ir_node_map.find(ir_node);
@@ -175,7 +175,7 @@ void DataFlowGraph::Build(const framework::ir::Graph &graph) {
                  "Can't deduce any inputs from the graph, Is the graph empty?");
 
   ir_graph = &graph;
-  VLOG(3) << "finished build from IR";
+  VLOG(30) << "finished build from IR";
 }
 
 void DataFlowGraph::Clean() {
diff --git a/paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.cc b/paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.cc
index cb549f4b5..dbe138514 100644
--- a/paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.cc
+++ b/paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.cc
@@ -239,9 +239,10 @@ void DataFlowGraphToFluidPass::AddEngineOp(Node *node) {
   framework::BlockDesc block_desc(nullptr, &proto);
   block_desc.Proto()->set_parent_idx(-1);
   block_desc.Proto()->set_idx(0);
-  VLOG(4) << "origin variable size: "
-          << argument_->origin_program_desc->blocks(0).vars().size();
-  VLOG(4) << "transformed variable size: " << block_desc.Proto()->vars().size();
+  VLOG(40) << "origin variable size: "
+           << argument_->origin_program_desc->blocks(0).vars().size();
+  VLOG(40) << "transformed variable size: "
+           << block_desc.Proto()->vars().size();
   // copy ops.
 
   for (auto *node : block_node->subgraph) {
diff --git a/paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.cc b/paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.cc
index 648b8f7d6..8888529a5 100644
--- a/paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.cc
+++ b/paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.cc
@@ -29,7 +29,7 @@ void DFG_GraphvizDrawPass::Run(DataFlowGraph *graph) {
 
   auto png_path = dot_path.substr(0, dot_path.size() - 4) + ".png";
   std::string message;
-  VLOG(3) << "draw to " << png_path;
+  VLOG(30) << "draw to " << png_path;
   ExecShellCommand("dot -Tpng " + dot_path + " -o " + png_path, &message);
 }
 
diff --git a/paddle/fluid/inference/analysis/fluid_to_ir_pass.cc b/paddle/fluid/inference/analysis/fluid_to_ir_pass.cc
index fc60ca3bd..9f52af670 100644
--- a/paddle/fluid/inference/analysis/fluid_to_ir_pass.cc
+++ b/paddle/fluid/inference/analysis/fluid_to_ir_pass.cc
@@ -29,7 +29,7 @@ void FluidToIrPass::EnableParamModify(const std::string &model_dir,
   PADDLE_ENFORCE(argument_);
   argument_->Set(framework::ir::kParamScopeAttr, new framework::Scope);
   // Load parameters.
-  VLOG(3) << "Loading parameters from " << model_dir;
+  VLOG(30) << "Loading parameters from " << model_dir;
   LoadParams(&argument_->Get<framework::Scope>(framework::ir::kParamScopeAttr),
              model_dir, prog_file, param_file);
 }
diff --git a/paddle/fluid/inference/analysis/model_store_pass.cc b/paddle/fluid/inference/analysis/model_store_pass.cc
index c313db088..4f40a7a1a 100644
--- a/paddle/fluid/inference/analysis/model_store_pass.cc
+++ b/paddle/fluid/inference/analysis/model_store_pass.cc
@@ -35,21 +35,21 @@ void ModelStorePass::Run(DataFlowGraph *x) {
   std::stringstream ss;
   // NOTE these commands only works on linux.
   ss << "mkdir -p " << *argument_->model_output_store_path;
-  VLOG(3) << "run command: " << ss.str();
+  VLOG(30) << "run command: " << ss.str();
   PADDLE_ENFORCE_EQ(system(ss.str().c_str()), 0);
   ss.str("");
 
   ss << "cp " << *argument_->fluid_model_dir << "/*"
      << " " << *argument_->model_output_store_path;
-  VLOG(3) << "run command: " << ss.str();
+  VLOG(30) << "run command: " << ss.str();
   PADDLE_ENFORCE_EQ(system(ss.str().c_str()), 0);
 
   // Store program
   PADDLE_ENFORCE_NOT_NULL(argument_->transformed_program_desc,
                           "program desc is not transformed, should call "
                           "DataFlowGraphToFluidPass first.");
-  VLOG(3) << "store analyzed program to "
-          << *argument_->model_output_store_path;
+  VLOG(30) << "store analyzed program to "
+           << *argument_->model_output_store_path;
   const std::string program_output_path =
       *argument_->model_output_store_path + "/__model__";
   std::ofstream file(program_output_path, std::ios::binary);
diff --git a/paddle/fluid/inference/analysis/pass_manager.cc b/paddle/fluid/inference/analysis/pass_manager.cc
index a6ac0ee49..ce390ee83 100644
--- a/paddle/fluid/inference/analysis/pass_manager.cc
+++ b/paddle/fluid/inference/analysis/pass_manager.cc
@@ -23,7 +23,7 @@ namespace analysis {
 bool PassManager::Initialize(Argument* argument) {
   argument_ = argument;
   for (auto& pass : data_) {
-    VLOG(3) << "Initializing pass [" << pass->repr() << "]";
+    VLOG(30) << "Initializing pass [" << pass->repr() << "]";
     if (!pass->Initialize(argument)) {
       LOG(ERROR) << "Failed to initialize pass [" << pass->repr() << "]";
       return false;
@@ -34,7 +34,7 @@ bool PassManager::Initialize(Argument* argument) {
 
 void DfgPassManager::RunAll() {
   PADDLE_ENFORCE(argument_);
-  VLOG(3) << "Total " << data_.size() << " Analysys passes";
+  VLOG(30) << "Total " << data_.size() << " Analysys passes";
   for (auto& pass : data_) {
     string::PrettyLogEndl(string::Style::H1(), "* Running Analysis pass [%s]",
                           pass->repr());
diff --git a/paddle/fluid/inference/analysis/subgraph_splitter.cc b/paddle/fluid/inference/analysis/subgraph_splitter.cc
index 526bbbadf..3688ea15d 100644
--- a/paddle/fluid/inference/analysis/subgraph_splitter.cc
+++ b/paddle/fluid/inference/analysis/subgraph_splitter.cc
@@ -232,7 +232,7 @@ std::vector<std::vector<Node *>> SubGraphSplitter::ExtractSubGraphs() {
     BriefNode *brief_node = itr.second;
 
     if (!brief_node->node->attr(kMarkerAttrName).Bool()) {
-      VLOG(4) << brief_node->node->id() << " node not a trt candicate.";
+      VLOG(40) << brief_node->node->id() << " node not a trt candicate.";
       continue;
     }
 
diff --git a/paddle/fluid/inference/analysis/tensorrt_subgraph_pass.cc b/paddle/fluid/inference/analysis/tensorrt_subgraph_pass.cc
index cc1746ecb..3aa65f223 100644
--- a/paddle/fluid/inference/analysis/tensorrt_subgraph_pass.cc
+++ b/paddle/fluid/inference/analysis/tensorrt_subgraph_pass.cc
@@ -25,9 +25,9 @@ TensorRTSubGraphPass::TensorRTSubGraphPass(
 
 void TensorRTSubGraphPass::Run(DataFlowGraph *graph) {
   SubGraphFuse(graph, node_inside_subgraph_teller_, argument_)();
-  VLOG(4) << "debug info "
-          << graph->HumanReadableInfo(false /*show_values*/,
-                                      true /*show_functions*/);
+  VLOG(40) << "debug info "
+           << graph->HumanReadableInfo(false /*show_values*/,
+                                       true /*show_functions*/);
 }
 
 }  // namespace analysis
diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc
index 54c37fe64..dd295854a 100644
--- a/paddle/fluid/inference/api/analysis_predictor.cc
+++ b/paddle/fluid/inference/api/analysis_predictor.cc
@@ -38,7 +38,7 @@ using contrib::AnalysisConfig;
 bool AnalysisPredictor::Init(
     const std::shared_ptr<framework::Scope> &parent_scope,
     const std::shared_ptr<framework::ProgramDesc> &program) {
-  VLOG(3) << "Predictor::init()";
+  VLOG(30) << "Predictor::init()";
 #if !defined(_WIN32)
   if (FLAGS_profile) {
     LOG(WARNING) << "Profiler is actived, might affect the performance";
@@ -89,7 +89,7 @@ bool AnalysisPredictor::Init(
 bool AnalysisPredictor::Run(const std::vector<PaddleTensor> &inputs,
                             std::vector<PaddleTensor> *output_data,
                             int batch_size) {
-  VLOG(3) << "Predictor::predict";
+  VLOG(30) << "Predictor::predict";
   inference::Timer timer;
   timer.tic();
   // set feed variable
@@ -109,7 +109,7 @@ bool AnalysisPredictor::Run(const std::vector<PaddleTensor> &inputs,
     LOG(ERROR) << "fail to get fetches";
     return false;
   }
-  VLOG(3) << "predict cost: " << timer.toc() << "ms";
+  VLOG(30) << "predict cost: " << timer.toc() << "ms";
 
   // Fix TensorArray reuse not cleaned bug.
   tensor_array_batch_cleaner_.CollectTensorArrays(scope_.get());
@@ -119,7 +119,7 @@ bool AnalysisPredictor::Run(const std::vector<PaddleTensor> &inputs,
 
 bool AnalysisPredictor::SetFeed(const std::vector<PaddleTensor> &inputs,
                                 framework::Scope *scope) {
-  VLOG(3) << "Predictor::set_feed";
+  VLOG(30) << "Predictor::set_feed";
   if (inputs.size() != feeds_.size()) {
     LOG(ERROR) << "wrong feed input size, need " << feeds_.size() << " but get "
                << inputs.size();
@@ -184,7 +184,7 @@ void AnalysisPredictor::GetFetchOne(const framework::LoDTensor &fetch,
 
 bool AnalysisPredictor::GetFetch(std::vector<PaddleTensor> *outputs,
                                  framework::Scope *scope) {
-  VLOG(3) << "Predictor::get_fetch";
+  VLOG(30) << "Predictor::get_fetch";
   outputs->resize(fetchs_.size());
   for (size_t i = 0; i < fetchs_.size(); ++i) {
     int idx = boost::get<int>(fetchs_[i]->GetAttr("col"));
@@ -246,7 +246,7 @@ void AnalysisPredictor::OptimizeInferenceProgram() {
   }
 
   CHECK(argument_.transformed_program_desc);
-  VLOG(5) << "to prepare executor";
+  VLOG(50) << "to prepare executor";
   inference_program_.reset(
       new framework::ProgramDesc(*argument_.transformed_program_desc));
   if (argument_.Has(framework::ir::kParamScopeAttr)) {
@@ -260,7 +260,7 @@ void AnalysisPredictor::OptimizeInferenceProgram() {
 template <>
 std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
     AnalysisConfig, PaddleEngineKind::kAnalysis>(const AnalysisConfig &config) {
-  VLOG(3) << "create AnalysisConfig";
+  VLOG(30) << "create AnalysisConfig";
   if (config.use_gpu) {
     // 1. GPU memeroy
     PADDLE_ENFORCE_GT(
@@ -274,7 +274,7 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
       std::string flag = "--fraction_of_gpu_memory_to_use=" +
                          std::to_string(config.fraction_of_gpu_memory);
       flags.push_back(flag);
-      VLOG(3) << "set flag: " << flag;
+      VLOG(30) << "set flag: " << flag;
       framework::InitGflags(flags);
     }
   }
diff --git a/paddle/fluid/inference/api/api_anakin_engine.cc b/paddle/fluid/inference/api/api_anakin_engine.cc
index 2c4894fd8..2ea122bfd 100644
--- a/paddle/fluid/inference/api/api_anakin_engine.cc
+++ b/paddle/fluid/inference/api/api_anakin_engine.cc
@@ -50,7 +50,7 @@ template <typename Target>
 bool PaddleInferenceAnakinPredictor<Target>::Init(
     const contrib::AnakinConfig &config) {
   if (!(graph_.load(config.model_file))) {
-    VLOG(3) << "fail to load graph from " << config.model_file;
+    VLOG(30) << "fail to load graph from " << config.model_file;
     return false;
   }
   auto inputs = graph_.get_ins();
@@ -76,15 +76,15 @@ bool PaddleInferenceAnakinPredictor<Target>::Run(
     std::vector<PaddleTensor> *output_data, int batch_size) {
   for (const auto &input : inputs) {
     if (input.dtype != PaddleDType::FLOAT32) {
-      VLOG(3) << "Only support float type inputs. " << input.name
-              << "'s type is not float";
+      VLOG(30) << "Only support float type inputs. " << input.name
+               << "'s type is not float";
       return false;
     }
     auto d_tensor_in_p = executor_p_->get_in(input.name);
     auto net_shape = d_tensor_in_p->shape();
     if (net_shape.size() != input.shape.size()) {
-      VLOG(3) << " input  " << input.name
-              << "'s shape size should be equal to that of net";
+      VLOG(30) << " input  " << input.name
+               << "'s shape size should be equal to that of net";
       return false;
     }
     int sum = 1;
@@ -105,15 +105,15 @@ bool PaddleInferenceAnakinPredictor<Target>::Run(
 
     if (input.lod.size() > 0) {
       if (input.lod.size() > 1) {
-        VLOG(3) << " input lod first dim should <=1, but you set "
-                << input.lod.size();
+        VLOG(30) << " input lod first dim should <=1, but you set "
+                 << input.lod.size();
         return false;
       }
       std::vector<int> offset(input.lod[0].begin(), input.lod[0].end());
       d_tensor_in_p->set_seq_offset(offset);
-      VLOG(3) << "offset.size(): " << offset.size();
+      VLOG(30) << "offset.size(): " << offset.size();
       for (int i = 0; i < offset.size(); i++) {
-        VLOG(3) << offset[i];
+        VLOG(30) << offset[i];
       }
     }
 
@@ -124,7 +124,7 @@ bool PaddleInferenceAnakinPredictor<Target>::Run(
       if (cudaMemcpy(d_data_p, static_cast<float *>(input.data.data()),
                      d_tensor_in_p->valid_size() * sizeof(float),
                      cudaMemcpyHostToDevice) != 0) {
-        VLOG(3) << "copy data from CPU to GPU error";
+        VLOG(30) << "copy data from CPU to GPU error";
         return false;
       }
     }
@@ -141,7 +141,7 @@ bool PaddleInferenceAnakinPredictor<Target>::Run(
 #endif
 
   if (output_data->empty()) {
-    VLOG(3) << "At least one output should be set with tensors' names.";
+    VLOG(30) << "At least one output should be set with tensors' names.";
     return false;
   }
   for (auto &output : *output_data) {
@@ -157,7 +157,7 @@ bool PaddleInferenceAnakinPredictor<Target>::Run(
       if (cudaMemcpy(output.data.data(), tensor->mutable_data(),
                      tensor->valid_size() * sizeof(float),
                      cudaMemcpyDeviceToHost) != 0) {
-        VLOG(3) << "copy data from GPU to CPU error";
+        VLOG(30) << "copy data from GPU to CPU error";
         return false;
       }
     }
@@ -181,14 +181,14 @@ anakin::Net<Target, anakin::saber::AK_FLOAT, anakin::Precision::FP32>
 template <typename Target>
 std::unique_ptr<PaddlePredictor>
 PaddleInferenceAnakinPredictor<Target>::Clone() {
-  VLOG(3) << "Anakin Predictor::clone";
+  VLOG(30) << "Anakin Predictor::clone";
   std::unique_ptr<PaddlePredictor> cls(
       new PaddleInferenceAnakinPredictor<Target>());
   // construct executer from other graph
   auto anakin_predictor_p =
       dynamic_cast<PaddleInferenceAnakinPredictor<Target> *>(cls.get());
   if (!anakin_predictor_p) {
-    VLOG(3) << "fail to call Init";
+    VLOG(30) << "fail to call Init";
     return nullptr;
   }
   anakin_predictor_p->get_executer().init(graph_);
@@ -206,10 +206,10 @@ template <>
 std::unique_ptr<PaddlePredictor>
 CreatePaddlePredictor<contrib::AnakinConfig, PaddleEngineKind::kAnakin>(
     const contrib::AnakinConfig &config) {
-  VLOG(3) << "Anakin Predictor create.";
+  VLOG(30) << "Anakin Predictor create.";
   if (config.target_type == contrib::AnakinConfig::NVGPU) {
 #ifdef PADDLE_WITH_CUDA
-    VLOG(3) << "Anakin Predictor create on [ NVIDIA GPU ].";
+    VLOG(30) << "Anakin Predictor create on [ NVIDIA GPU ].";
     std::unique_ptr<PaddlePredictor> x(
         new PaddleInferenceAnakinPredictor<anakin::NV>(config));
     return x;
@@ -218,12 +218,12 @@ CreatePaddlePredictor<contrib::AnakinConfig, PaddleEngineKind::kAnakin>(
     return nullptr;
 #endif
   } else if (config.target_type == contrib::AnakinConfig::X86) {
-    VLOG(3) << "Anakin Predictor create on [ Intel X86 ].";
+    VLOG(30) << "Anakin Predictor create on [ Intel X86 ].";
     std::unique_ptr<PaddlePredictor> x(
         new PaddleInferenceAnakinPredictor<anakin::X86>(config));
     return x;
   } else {
-    VLOG(3) << "Anakin Predictor create on unknown platform.";
+    VLOG(30) << "Anakin Predictor create on unknown platform.";
     return nullptr;
   }
 }
diff --git a/paddle/fluid/inference/api/api_impl.cc b/paddle/fluid/inference/api/api_impl.cc
index d06ab8f8c..ba2228864 100644
--- a/paddle/fluid/inference/api/api_impl.cc
+++ b/paddle/fluid/inference/api/api_impl.cc
@@ -63,7 +63,7 @@ void NativePaddlePredictor::PrepareFeedFetch() {
 
 bool NativePaddlePredictor::Init(
     std::shared_ptr<framework::Scope> parent_scope) {
-  VLOG(3) << "Predictor::init()";
+  VLOG(30) << "Predictor::init()";
 #if !defined(_WIN32)
   if (FLAGS_profile) {
     LOG(WARNING) << "Profiler is actived, might affect the performance";
@@ -135,7 +135,7 @@ NativePaddlePredictor::~NativePaddlePredictor() {
 bool NativePaddlePredictor::Run(const std::vector<PaddleTensor> &inputs,
                                 std::vector<PaddleTensor> *output_data,
                                 int batch_size) {
-  VLOG(3) << "Predictor::predict";
+  VLOG(30) << "Predictor::predict";
   Timer timer;
   timer.tic();
   // set feed variable
@@ -147,17 +147,17 @@ bool NativePaddlePredictor::Run(const std::vector<PaddleTensor> &inputs,
   }
   // Run the inference program
   // if share variables, we need not create variables
-  VLOG(4) << "Run prepared context";
+  VLOG(40) << "Run prepared context";
   executor_->RunPreparedContext(ctx_.get(), scope,
                                 false, /* don't create local scope each time*/
                                 false /* don't create variable each time */);
-  VLOG(4) << "Finish prepared context";
+  VLOG(40) << "Finish prepared context";
   // get fetch variable
   if (!GetFetch(output_data, scope)) {
     LOG(ERROR) << "fail to get fetches";
     return false;
   }
-  VLOG(3) << "predict cost: " << timer.toc() << "ms";
+  VLOG(30) << "predict cost: " << timer.toc() << "ms";
 
   // Fix TensorArray reuse not cleaned bug.
   tensor_array_batch_cleaner_.CollectTensorArrays(scope_.get());
@@ -166,7 +166,7 @@ bool NativePaddlePredictor::Run(const std::vector<PaddleTensor> &inputs,
 }
 
 std::unique_ptr<PaddlePredictor> NativePaddlePredictor::Clone() {
-  VLOG(3) << "Predictor::clone";
+  VLOG(30) << "Predictor::clone";
   std::unique_ptr<PaddlePredictor> cls(new NativePaddlePredictor(config_));
 
   if (!dynamic_cast<NativePaddlePredictor *>(cls.get())->Init(scope_)) {
@@ -184,7 +184,7 @@ std::unique_ptr<PaddlePredictor> NativePaddlePredictor::Clone() {
 
 bool NativePaddlePredictor::SetFeed(const std::vector<PaddleTensor> &inputs,
                                     framework::Scope *scope) {
-  VLOG(3) << "Predictor::set_feed";
+  VLOG(30) << "Predictor::set_feed";
   if (inputs.size() != feeds_.size()) {
     LOG(ERROR) << "wrong feed input size, need " << feeds_.size() << " but get "
                << inputs.size();
@@ -244,7 +244,7 @@ void NativePaddlePredictor::GetFetchOne(const framework::LoDTensor &fetch,
 
 bool NativePaddlePredictor::GetFetch(std::vector<PaddleTensor> *outputs,
                                      framework::Scope *scope) {
-  VLOG(3) << "Predictor::get_fetch";
+  VLOG(30) << "Predictor::get_fetch";
   outputs->resize(fetchs_.size());
   for (size_t i = 0; i < fetchs_.size(); ++i) {
     int idx = boost::get<int>(fetchs_[i]->GetAttr("col"));
@@ -269,7 +269,7 @@ bool NativePaddlePredictor::GetFetch(std::vector<PaddleTensor> *outputs,
 template <>
 std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
     NativeConfig, PaddleEngineKind::kNative>(const NativeConfig &config) {
-  VLOG(3) << "create NativePaddlePredictor";
+  VLOG(30) << "create NativePaddlePredictor";
   if (config.use_gpu) {
     // 1. GPU memeroy
     PADDLE_ENFORCE_GT(
@@ -283,7 +283,7 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
       std::string flag = "--fraction_of_gpu_memory_to_use=" +
                          num2str<float>(config.fraction_of_gpu_memory);
       flags.push_back(flag);
-      VLOG(3) << "set flag: " << flag;
+      VLOG(30) << "set flag: " << flag;
       framework::InitGflags(flags);
     }
   }
diff --git a/paddle/fluid/inference/api/api_tensorrt_subgraph_engine.cc b/paddle/fluid/inference/api/api_tensorrt_subgraph_engine.cc
index 7ac468ee4..94b393349 100644
--- a/paddle/fluid/inference/api/api_tensorrt_subgraph_engine.cc
+++ b/paddle/fluid/inference/api/api_tensorrt_subgraph_engine.cc
@@ -34,7 +34,7 @@ class TensorRTSubgraphPredictor : public NativePaddlePredictor {
 
   bool Init(const std::shared_ptr<framework::Scope>& parent_scope) {
     FLAGS_IA_enable_tensorrt_subgraph_engine = true;
-    VLOG(3) << "Predictor::init()";
+    VLOG(30) << "Predictor::init()";
     if (config_.use_gpu) {
       place_ = paddle::platform::CUDAPlace(config_.device);
     } else {
@@ -70,7 +70,7 @@ class TensorRTSubgraphPredictor : public NativePaddlePredictor {
     OptimizeInferenceProgram();
     ctx_ = executor_->Prepare(*inference_program_, 0);
 
-    VLOG(5) << "to create variables";
+    VLOG(50) << "to create variables";
     executor_->CreateVariables(*inference_program_,
                                sub_scope_ ? sub_scope_ : scope_.get(), 0);
     // Get the feed_target_names and fetch_target_names
@@ -114,9 +114,9 @@ class TensorRTSubgraphPredictor : public NativePaddlePredictor {
         new ProgramDesc(*inference_program_->Proto()));
     Singleton<Analyzer>::Global().Run(&argument);
     CHECK(argument.transformed_program_desc);
-    VLOG(5) << "transformed program:\n"
-            << argument.transformed_program_desc->SerializeAsString();
-    VLOG(5) << "to prepare executor";
+    VLOG(50) << "transformed program:\n"
+             << argument.transformed_program_desc->SerializeAsString();
+    VLOG(50) << "to prepare executor";
     inference_program_.reset(
         new framework::ProgramDesc(*argument.transformed_program_desc));
   }
@@ -129,7 +129,7 @@ template <>
 std::unique_ptr<PaddlePredictor>
 CreatePaddlePredictor<MixedRTConfig, PaddleEngineKind::kAutoMixedTensorRT>(
     const MixedRTConfig& config) {
-  VLOG(3) << "create TensorRTSubgraphPredictor";
+  VLOG(30) << "create TensorRTSubgraphPredictor";
   if (config.use_gpu) {
     // 1. GPU memeroy
     PADDLE_ENFORCE_GT(
@@ -143,7 +143,7 @@ CreatePaddlePredictor<MixedRTConfig, PaddleEngineKind::kAutoMixedTensorRT>(
       std::string flag = "--fraction_of_gpu_memory_to_use=" +
                          std::to_string(config.fraction_of_gpu_memory);
       flags.push_back(flag);
-      VLOG(3) << "set flag: " << flag;
+      VLOG(30) << "set flag: " << flag;
       framework::InitGflags(flags);
     }
   }
diff --git a/paddle/fluid/inference/api/demo_ci/trt_mobilenet_demo.cc b/paddle/fluid/inference/api/demo_ci/trt_mobilenet_demo.cc
index 4a8404f21..6460514f3 100644
--- a/paddle/fluid/inference/api/demo_ci/trt_mobilenet_demo.cc
+++ b/paddle/fluid/inference/api/demo_ci/trt_mobilenet_demo.cc
@@ -45,7 +45,7 @@ void Main() {
   config.fraction_of_gpu_memory = 0.1;  // set by yourself
   predictor = CreatePaddlePredictor<paddle::contrib::MixedRTConfig>(config);
 
-  VLOG(3) << "begin to process data";
+  VLOG(30) << "begin to process data";
   // Just a single batch of data.
   std::string line;
   std::ifstream file(FLAGS_data);
@@ -60,13 +60,13 @@ void Main() {
       PaddleBuf(record.data.data(), record.data.size() * sizeof(float));
   input.dtype = PaddleDType::FLOAT32;
 
-  VLOG(3) << "run executor";
+  VLOG(30) << "run executor";
   std::vector<PaddleTensor> output;
   predictor->Run({input}, &output, 1);
 
-  VLOG(3) << "output.size " << output.size();
+  VLOG(30) << "output.size " << output.size();
   auto& tensor = output.front();
-  VLOG(3) << "output: " << SummaryTensor(tensor);
+  VLOG(30) << "output: " << SummaryTensor(tensor);
 
   // compare with reference result
   CheckOutput(FLAGS_refer, tensor);
diff --git a/paddle/fluid/inference/api/demo_ci/utils.h b/paddle/fluid/inference/api/demo_ci/utils.h
index d70c6aea7..664b9d01c 100644
--- a/paddle/fluid/inference/api/demo_ci/utils.h
+++ b/paddle/fluid/inference/api/demo_ci/utils.h
@@ -47,7 +47,7 @@ static void split(const std::string& str, char sep,
 }
 
 Record ProcessALine(const std::string& line) {
-  VLOG(3) << "process a line";
+  VLOG(30) << "process a line";
   std::vector<std::string> columns;
   split(line, '\t', &columns);
   CHECK_EQ(columns.size(), 2UL)
@@ -65,8 +65,8 @@ Record ProcessALine(const std::string& line) {
   for (auto& s : shape_strs) {
     record.shape.push_back(std::stoi(s));
   }
-  VLOG(3) << "data size " << record.data.size();
-  VLOG(3) << "data shape size " << record.shape.size();
+  VLOG(30) << "data size " << record.data.size();
+  VLOG(30) << "data shape size " << record.shape.size();
   return record;
 }
 
@@ -78,8 +78,8 @@ void CheckOutput(const std::string& referfile, const PaddleTensor& output) {
   file.close();
 
   size_t numel = output.data.length() / PaddleDtypeSize(output.dtype);
-  VLOG(3) << "predictor output numel " << numel;
-  VLOG(3) << "reference output numel " << refer.data.size();
+  VLOG(30) << "predictor output numel " << numel;
+  VLOG(30) << "reference output numel " << refer.data.size();
   CHECK_EQ(numel, refer.data.size());
   switch (output.dtype) {
     case PaddleDType::INT64: {
diff --git a/paddle/fluid/inference/api/demo_ci/vis_demo.cc b/paddle/fluid/inference/api/demo_ci/vis_demo.cc
index 8d546e3e9..d747f8558 100644
--- a/paddle/fluid/inference/api/demo_ci/vis_demo.cc
+++ b/paddle/fluid/inference/api/demo_ci/vis_demo.cc
@@ -49,11 +49,11 @@ void Main(bool use_gpu) {
     config.fraction_of_gpu_memory = 0.1;  // set by yourself
   }
 
-  VLOG(3) << "init predictor";
+  VLOG(30) << "init predictor";
   predictor = CreatePaddlePredictor<NativeConfig>(config);
   analysis_predictor = CreatePaddlePredictor<AnalysisConfig>(config);
 
-  VLOG(3) << "begin to process data";
+  VLOG(30) << "begin to process data";
   // Just a single batch of data.
   std::string line;
   std::ifstream file(FLAGS_data);
@@ -68,13 +68,13 @@ void Main(bool use_gpu) {
       PaddleBuf(record.data.data(), record.data.size() * sizeof(float));
   input.dtype = PaddleDType::FLOAT32;
 
-  VLOG(3) << "run executor";
+  VLOG(30) << "run executor";
   std::vector<PaddleTensor> output, analysis_output;
   predictor->Run({input}, &output, 1);
 
-  VLOG(3) << "output.size " << output.size();
+  VLOG(30) << "output.size " << output.size();
   auto& tensor = output.front();
-  VLOG(3) << "output: " << SummaryTensor(tensor);
+  VLOG(30) << "output: " << SummaryTensor(tensor);
 
   // compare with reference result
   CheckOutput(FLAGS_refer, tensor);
diff --git a/paddle/fluid/inference/api/details/reset_tensor_array.cc b/paddle/fluid/inference/api/details/reset_tensor_array.cc
index 4ae6c6dc9..244b0b567 100644
--- a/paddle/fluid/inference/api/details/reset_tensor_array.cc
+++ b/paddle/fluid/inference/api/details/reset_tensor_array.cc
@@ -26,7 +26,7 @@ void TensorArrayBatchCleaner::CollectTensorArrays(framework::Scope *scope) {
       // parameter.
       if (var_name == "feed" || var_name == "fetch") continue;
       if (var->Type() == typeid(framework::LoDTensorArray)) {
-        VLOG(4) << "collect " << var_name;
+        VLOG(40) << "collect " << var_name;
         arrays_.push_back(var->GetMutable<framework::LoDTensorArray>());
       }
     }
@@ -34,7 +34,7 @@ void TensorArrayBatchCleaner::CollectTensorArrays(framework::Scope *scope) {
       CollectTensorArrays(kid);
     }
 
-    VLOG(3) << "Collect " << arrays_.size() << " arrays";
+    VLOG(30) << "Collect " << arrays_.size() << " arrays";
     flag_ = false;
   }
 }
diff --git a/paddle/fluid/inference/io.cc b/paddle/fluid/inference/io.cc
index e246a06fd..1acc4e713 100644
--- a/paddle/fluid/inference/io.cc
+++ b/paddle/fluid/inference/io.cc
@@ -77,7 +77,7 @@ void LoadPersistables(framework::Executor* executor, framework::Scope* scope,
 
   for (auto* var : global_block.AllVars()) {
     if (IsPersistable(var)) {
-      VLOG(3) << "persistable variable's name: " << var->Name();
+      VLOG(30) << "persistable variable's name: " << var->Name();
 
       framework::VarDesc* new_var = load_block->Var(var->Name());
       new_var->SetShape(var->GetShape());
@@ -120,7 +120,7 @@ std::unique_ptr<framework::ProgramDesc> Load(framework::Executor* executor,
                                              const std::string& dirname) {
   std::string model_filename = dirname + "/__model__";
   std::string program_desc_str;
-  VLOG(3) << "loading model from " << model_filename;
+  VLOG(30) << "loading model from " << model_filename;
   ReadBinaryFile(model_filename, &program_desc_str);
 
   std::unique_ptr<framework::ProgramDesc> main_program(
diff --git a/paddle/fluid/inference/tensorrt/convert/concat_op.cc b/paddle/fluid/inference/tensorrt/convert/concat_op.cc
index a11dfa1e8..60c16e35e 100644
--- a/paddle/fluid/inference/tensorrt/convert/concat_op.cc
+++ b/paddle/fluid/inference/tensorrt/convert/concat_op.cc
@@ -25,7 +25,7 @@ class ConcatOpConverter : public OpConverter {
  public:
   void operator()(const framework::proto::OpDesc& op,
                   const framework::Scope& scope, bool test_mode) override {
-    VLOG(4) << "convert a fluid mul op to tensorrt mul layer without bias";
+    VLOG(40) << "convert a fluid mul op to tensorrt mul layer without bias";
 
     framework::OpDesc op_desc(op, nullptr);
     // Declare inputs
diff --git a/paddle/fluid/inference/tensorrt/convert/dropout_op.cc b/paddle/fluid/inference/tensorrt/convert/dropout_op.cc
index 9533ecbcf..df86a68da 100644
--- a/paddle/fluid/inference/tensorrt/convert/dropout_op.cc
+++ b/paddle/fluid/inference/tensorrt/convert/dropout_op.cc
@@ -25,7 +25,7 @@ class DropoutOpConverter : public OpConverter {
  public:
   void operator()(const framework::proto::OpDesc& op,
                   const framework::Scope& scope, bool test_mode) override {
-    VLOG(4) << "convert a fluid dropout op to tensorrt dropout layer";
+    VLOG(40) << "convert a fluid dropout op to tensorrt dropout layer";
     framework::OpDesc op_desc(op, nullptr);
     // Declare inputs
     auto* input1 = engine_->GetITensor(op_desc.Input("X")[0]);
diff --git a/paddle/fluid/inference/tensorrt/convert/fc_op.cc b/paddle/fluid/inference/tensorrt/convert/fc_op.cc
index 7c21ecd95..bc1d9ee28 100644
--- a/paddle/fluid/inference/tensorrt/convert/fc_op.cc
+++ b/paddle/fluid/inference/tensorrt/convert/fc_op.cc
@@ -52,7 +52,7 @@ class FcOpConverter : public OpConverter {
  public:
   void operator()(const framework::proto::OpDesc& op,
                   const framework::Scope& scope, bool test_mode) override {
-    VLOG(4) << "convert a fluid fc op to tensorrt fc layer without bias";
+    VLOG(40) << "convert a fluid fc op to tensorrt fc layer without bias";
 
     framework::OpDesc op_desc(op, nullptr);
     PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1);
diff --git a/paddle/fluid/inference/tensorrt/convert/mul_op.cc b/paddle/fluid/inference/tensorrt/convert/mul_op.cc
index 514eb659a..babd56d62 100644
--- a/paddle/fluid/inference/tensorrt/convert/mul_op.cc
+++ b/paddle/fluid/inference/tensorrt/convert/mul_op.cc
@@ -25,7 +25,7 @@ class MulOpConverter : public OpConverter {
  public:
   void operator()(const framework::proto::OpDesc& op,
                   const framework::Scope& scope, bool test_mode) override {
-    VLOG(4) << "convert a fluid mul op to tensorrt mul layer without bias";
+    VLOG(40) << "convert a fluid mul op to tensorrt mul layer without bias";
 
     framework::OpDesc op_desc(op, nullptr);
     // Declare inputs
diff --git a/paddle/fluid/inference/tensorrt/convert/pad_op.cc b/paddle/fluid/inference/tensorrt/convert/pad_op.cc
index 218030a59..c3699428d 100644
--- a/paddle/fluid/inference/tensorrt/convert/pad_op.cc
+++ b/paddle/fluid/inference/tensorrt/convert/pad_op.cc
@@ -25,7 +25,7 @@ class PadOpConverter : public OpConverter {
  public:
   void operator()(const framework::proto::OpDesc& op,
                   const framework::Scope& scope, bool test_mode) override {
-    VLOG(4) << "convert a fluid transpose op to tensorrt tranpose layer";
+    VLOG(40) << "convert a fluid transpose op to tensorrt tranpose layer";
 
     framework::OpDesc op_desc(op, nullptr);
     // Declare inputs
diff --git a/paddle/fluid/inference/tensorrt/convert/pool2d_op.cc b/paddle/fluid/inference/tensorrt/convert/pool2d_op.cc
index 677f85152..d943d699f 100644
--- a/paddle/fluid/inference/tensorrt/convert/pool2d_op.cc
+++ b/paddle/fluid/inference/tensorrt/convert/pool2d_op.cc
@@ -25,7 +25,7 @@ class Pool2dOpConverter : public OpConverter {
  public:
   void operator()(const framework::proto::OpDesc& op,
                   const framework::Scope& scope, bool test_mode) override {
-    VLOG(4)
+    VLOG(40)
         << "convert a fluid pool2d op to tensorrt pool2d layer without bias";
     framework::OpDesc op_desc(op, nullptr);
     // Declare inputs
diff --git a/paddle/fluid/inference/tensorrt/convert/softmax_op.cc b/paddle/fluid/inference/tensorrt/convert/softmax_op.cc
index 0064f90fd..174cdbe53 100644
--- a/paddle/fluid/inference/tensorrt/convert/softmax_op.cc
+++ b/paddle/fluid/inference/tensorrt/convert/softmax_op.cc
@@ -25,7 +25,7 @@ class SoftMaxOpConverter : public OpConverter {
  public:
   void operator()(const framework::proto::OpDesc& op,
                   const framework::Scope& scope, bool test_mode) override {
-    VLOG(4)
+    VLOG(40)
         << "convert a fluid softmax op to tensorrt softmax layer without bias";
     framework::OpDesc op_desc(op, nullptr);
     // Declare inputs
diff --git a/paddle/fluid/inference/tests/api/anakin_rnn1_tester.cc b/paddle/fluid/inference/tests/api/anakin_rnn1_tester.cc
index c4022225f..48369e2e0 100644
--- a/paddle/fluid/inference/tests/api/anakin_rnn1_tester.cc
+++ b/paddle/fluid/inference/tests/api/anakin_rnn1_tester.cc
@@ -217,9 +217,9 @@ void single_test() {
     LOG(INFO) << "sequence_length = " << seq_offset[seq_offset.size() - 1];
 
     float* data_o = static_cast<float*>(outputs[0].data.data());
-    VLOG(3) << "outputs[0].data.length() = " << outputs[0].data.length();
+    VLOG(30) << "outputs[0].data.length() = " << outputs[0].data.length();
     for (size_t j = 0; j < outputs[0].data.length(); ++j) {
-      VLOG(3) << "output[" << j << "]: " << data_o[j];
+      VLOG(30) << "output[" << j << "]: " << data_o[j];
     }
   }
 }
diff --git a/paddle/fluid/inference/tests/api/analyzer_vis_tester.cc b/paddle/fluid/inference/tests/api/analyzer_vis_tester.cc
index 893329649..b2cd49af9 100644
--- a/paddle/fluid/inference/tests/api/analyzer_vis_tester.cc
+++ b/paddle/fluid/inference/tests/api/analyzer_vis_tester.cc
@@ -27,7 +27,7 @@ struct Record {
 };
 
 Record ProcessALine(const std::string &line) {
-  VLOG(3) << "process a line";
+  VLOG(30) << "process a line";
   std::vector<std::string> columns;
   split(line, '\t', &columns);
   CHECK_EQ(columns.size(), 2UL)
@@ -45,8 +45,8 @@ Record ProcessALine(const std::string &line) {
   for (auto &s : shape_strs) {
     record.shape.push_back(std::stoi(s));
   }
-  VLOG(3) << "data size " << record.data.size();
-  VLOG(3) << "data shape size " << record.shape.size();
+  VLOG(30) << "data size " << record.data.size();
+  VLOG(30) << "data shape size " << record.shape.size();
   return record;
 }
 
diff --git a/paddle/fluid/memory/detail/buddy_allocator.cc b/paddle/fluid/memory/detail/buddy_allocator.cc
index 26ef27c3c..dd7ffaa26 100644
--- a/paddle/fluid/memory/detail/buddy_allocator.cc
+++ b/paddle/fluid/memory/detail/buddy_allocator.cc
@@ -32,11 +32,11 @@ BuddyAllocator::BuddyAllocator(
       system_allocator_(std::move(system_allocator)) {}
 
 BuddyAllocator::~BuddyAllocator() {
-  VLOG(10) << "BuddyAllocator Disconstructor makes sure that all of these "
-              "have actually been freed";
+  VLOG(100) << "BuddyAllocator Disconstructor makes sure that all of these "
+               "have actually been freed";
   while (!pool_.empty()) {
     auto block = static_cast<MemoryBlock*>(std::get<2>(*pool_.begin()));
-    VLOG(10) << "Free from block (" << block << ", " << max_chunk_size_ << ")";
+    VLOG(100) << "Free from block (" << block << ", " << max_chunk_size_ << ")";
 
     system_allocator_->Free(block, max_chunk_size_, block->index(cache_));
     cache_.invalidate(block);
@@ -57,12 +57,12 @@ void* BuddyAllocator::Alloc(size_t unaligned_size) {
   // acquire the allocator lock
   std::lock_guard<std::mutex> lock(mutex_);
 
-  VLOG(10) << "Allocate " << unaligned_size << " bytes from chunk size "
-           << size;
+  VLOG(100) << "Allocate " << unaligned_size << " bytes from chunk size "
+            << size;
 
   // if the allocation is huge, send directly to the system allocator
   if (size > max_chunk_size_) {
-    VLOG(10) << "Allocate from system allocator.";
+    VLOG(100) << "Allocate from system allocator.";
     return SystemAlloc(size);
   }
 
@@ -77,9 +77,9 @@ void* BuddyAllocator::Alloc(size_t unaligned_size) {
       return nullptr;
     }
   } else {
-    VLOG(10) << "Allocation from existing memory block " << std::get<2>(*it)
-             << " at address "
-             << reinterpret_cast<MemoryBlock*>(std::get<2>(*it))->data();
+    VLOG(100) << "Allocation from existing memory block " << std::get<2>(*it)
+              << " at address "
+              << reinterpret_cast<MemoryBlock*>(std::get<2>(*it))->data();
   }
 
   total_used_ += size;
@@ -96,10 +96,10 @@ void BuddyAllocator::Free(void* p) {
   // Acquire the allocator lock
   std::lock_guard<std::mutex> lock(mutex_);
 
-  VLOG(10) << "Free from address " << block;
+  VLOG(100) << "Free from address " << block;
 
   if (block->type(cache_) == MemoryBlock::HUGE_CHUNK) {
-    VLOG(10) << "Free directly from system allocator";
+    VLOG(100) << "Free directly from system allocator";
     system_allocator_->Free(block, block->total_size(cache_),
                             block->index(cache_));
 
@@ -116,8 +116,8 @@ void BuddyAllocator::Free(void* p) {
 
   // Trying to merge the right buddy
   if (block->has_right_buddy(cache_)) {
-    VLOG(10) << "Merging this block " << block << " with its right buddy "
-             << block->right_buddy(cache_);
+    VLOG(100) << "Merging this block " << block << " with its right buddy "
+              << block->right_buddy(cache_);
 
     auto right_buddy = block->right_buddy(cache_);
 
@@ -134,8 +134,8 @@ void BuddyAllocator::Free(void* p) {
 
   // Trying to merge the left buddy
   if (block->has_left_buddy(cache_)) {
-    VLOG(10) << "Merging this block " << block << " with its left buddy "
-             << block->left_buddy(cache_);
+    VLOG(100) << "Merging this block " << block << " with its left buddy "
+              << block->left_buddy(cache_);
 
     auto left_buddy = block->left_buddy(cache_);
 
@@ -151,8 +151,8 @@ void BuddyAllocator::Free(void* p) {
   }
 
   // Dumping this block into pool
-  VLOG(10) << "Inserting free block (" << block << ", "
-           << block->total_size(cache_) << ")";
+  VLOG(100) << "Inserting free block (" << block << ", "
+            << block->total_size(cache_) << ")";
   pool_.insert(
       IndexSizeAddress(block->index(cache_), block->total_size(cache_), block));
 
@@ -174,7 +174,7 @@ void* BuddyAllocator::SystemAlloc(size_t size) {
   size_t index = 0;
   void* p = system_allocator_->Alloc(&index, size);
 
-  VLOG(10) << "Allocated " << p << " from system allocator.";
+  VLOG(100) << "Allocated " << p << " from system allocator.";
 
   if (p == nullptr) return nullptr;
 
@@ -200,8 +200,8 @@ BuddyAllocator::PoolSet::iterator BuddyAllocator::RefillPool() {
 
   if (p == nullptr) return pool_.end();
 
-  VLOG(10) << "Creating and inserting new block " << p
-           << " from system allocator";
+  VLOG(100) << "Creating and inserting new block " << p
+            << " from system allocator";
 
   static_cast<MemoryBlock*>(p)->init(&cache_, MemoryBlock::FREE_CHUNK, index,
                                      max_chunk_size_, nullptr, nullptr);
@@ -245,19 +245,19 @@ void* BuddyAllocator::SplitToAlloc(BuddyAllocator::PoolSet::iterator it,
   auto block = static_cast<MemoryBlock*>(std::get<2>(*it));
   pool_.erase(it);
 
-  VLOG(10) << "Split block (" << block << ", " << block->total_size(cache_)
-           << ") into";
+  VLOG(100) << "Split block (" << block << ", " << block->total_size(cache_)
+            << ") into";
   block->split(&cache_, size);
 
-  VLOG(10) << "Left block (" << block << ", " << block->total_size(cache_)
-           << ")";
+  VLOG(100) << "Left block (" << block << ", " << block->total_size(cache_)
+            << ")";
   block->set_type(&cache_, MemoryBlock::ARENA_CHUNK);
 
   // the rest of memory if exist
   if (block->has_right_buddy(cache_)) {
     if (block->right_buddy(cache_)->type(cache_) == MemoryBlock::FREE_CHUNK) {
-      VLOG(10) << "Insert right block (" << block->right_buddy(cache_) << ", "
-               << block->right_buddy(cache_)->total_size(cache_) << ")";
+      VLOG(100) << "Insert right block (" << block->right_buddy(cache_) << ", "
+                << block->right_buddy(cache_)->total_size(cache_) << ")";
 
       pool_.insert(
           IndexSizeAddress(block->right_buddy(cache_)->index(cache_),
@@ -284,7 +284,7 @@ void BuddyAllocator::CleanIdleFallBackAlloc() {
       return;
     }
 
-    VLOG(10) << "Return block " << block << " to fallback allocator.";
+    VLOG(100) << "Return block " << block << " to fallback allocator.";
 
     system_allocator_->Free(block, max_chunk_size_, block->index(cache_));
     cache_.invalidate(block);
@@ -320,7 +320,7 @@ void BuddyAllocator::CleanIdleNormalAlloc() {
 
     MemoryBlock* block = static_cast<MemoryBlock*>(std::get<2>(*pool));
 
-    VLOG(10) << "Return block " << block << " to base allocator.";
+    VLOG(100) << "Return block " << block << " to base allocator.";
 
     system_allocator_->Free(block, max_chunk_size_, block->index(cache_));
     cache_.invalidate(block);
diff --git a/paddle/fluid/memory/detail/meta_cache.cc b/paddle/fluid/memory/detail/meta_cache.cc
index b86e4f38c..152e4e7f9 100644
--- a/paddle/fluid/memory/detail/meta_cache.cc
+++ b/paddle/fluid/memory/detail/meta_cache.cc
@@ -29,7 +29,7 @@ MemoryBlock::Desc MetadataCache::load(const MemoryBlock* block) const {
     return existing_desc->second;
   } else {
     auto* desc = reinterpret_cast<const MemoryBlock::Desc*>(block);
-    VLOG(10) << "Load MemoryBlock::Desc type=" << desc->type;
+    VLOG(100) << "Load MemoryBlock::Desc type=" << desc->type;
     PADDLE_ASSERT(desc->check_guards());
     return *reinterpret_cast<const MemoryBlock::Desc*>(block);
   }
diff --git a/paddle/fluid/memory/malloc.cc b/paddle/fluid/memory/malloc.cc
index 0f13a4ea9..ec87793b4 100644
--- a/paddle/fluid/memory/malloc.cc
+++ b/paddle/fluid/memory/malloc.cc
@@ -71,18 +71,18 @@ struct NaiveAllocator {
 
 template <>
 void* Alloc<platform::CPUPlace>(platform::CPUPlace place, size_t size) {
-  VLOG(10) << "Allocate " << size << " bytes on " << platform::Place(place);
+  VLOG(100) << "Allocate " << size << " bytes on " << platform::Place(place);
   void* p = GetCPUBuddyAllocator()->Alloc(size);
   if (FLAGS_init_allocated_mem) {
     memset(p, 0xEF, size);
   }
-  VLOG(10) << "  pointer=" << p;
+  VLOG(100) << "  pointer=" << p;
   return p;
 }
 
 template <>
 void Free<platform::CPUPlace>(platform::CPUPlace place, void* p) {
-  VLOG(10) << "Free pointer=" << p << " on " << platform::Place(place);
+  VLOG(100) << "Free pointer=" << p << " on " << platform::Place(place);
   GetCPUBuddyAllocator()->Free(p);
 }
 
@@ -110,12 +110,12 @@ BuddyAllocator* GetGPUBuddyAllocator(int gpu_id) {
           std::unique_ptr<detail::SystemAllocator>(new detail::GPUAllocator(i)),
           platform::GpuMinChunkSize(), platform::GpuMaxChunkSize());
 
-      VLOG(10) << "\n\nNOTE: each GPU device use "
-               << FLAGS_fraction_of_gpu_memory_to_use * 100
-               << "% of GPU memory.\n"
-               << "You can set GFlags environment variable '"
-               << "FLAGS_fraction_of_gpu_memory_to_use"
-               << "' to change the fraction of GPU usage.\n\n";
+      VLOG(100) << "\n\nNOTE: each GPU device use "
+                << FLAGS_fraction_of_gpu_memory_to_use * 100
+                << "% of GPU memory.\n"
+                << "You can set GFlags environment variable '"
+                << "FLAGS_fraction_of_gpu_memory_to_use"
+                << "' to change the fraction of GPU usage.\n\n";
     }
   });
 
diff --git a/paddle/fluid/operators/activation_op.h b/paddle/fluid/operators/activation_op.h
index 0747469e0..4ffc7f364 100644
--- a/paddle/fluid/operators/activation_op.h
+++ b/paddle/fluid/operators/activation_op.h
@@ -95,7 +95,7 @@ class ActivationGradKernel
       auto x = framework::EigenVector<T>::Flatten(*X);
       functor(*place, x, out, dout, dx);
     } else {
-      VLOG(10) << " Inplace activation ";
+      VLOG(100) << " Inplace activation ";
       auto x = framework::EigenVector<T>::Flatten(*dX);
       functor(*place, x, out, dout, dx);
     }
diff --git a/paddle/fluid/operators/adam_op.h b/paddle/fluid/operators/adam_op.h
index 3455d1ee5..48e0448d0 100644
--- a/paddle/fluid/operators/adam_op.h
+++ b/paddle/fluid/operators/adam_op.h
@@ -297,7 +297,7 @@ class AdamOpKernel : public framework::OpKernel<T> {
       auto& grad =
           Ref(ctx.Input<framework::SelectedRows>("Grad"), "Must set Grad");
       if (grad.rows().size() == 0) {
-        VLOG(3) << "grad row size is 0!!";
+        VLOG(30) << "grad row size is 0!!";
         return;
       }
 
diff --git a/paddle/fluid/operators/array_operator.h b/paddle/fluid/operators/array_operator.h
index 4309f0a54..eddf34494 100644
--- a/paddle/fluid/operators/array_operator.h
+++ b/paddle/fluid/operators/array_operator.h
@@ -49,7 +49,7 @@ class ArrayOp : public framework::OperatorBase {
     } else {
       offset = static_cast<size_t>(*i_tensor.data<int64_t>());
     }
-    VLOG(10) << " Offset = " << offset;
+    VLOG(100) << " Offset = " << offset;
     return offset;
   }
 };
diff --git a/paddle/fluid/operators/array_to_lod_tensor_op.cc b/paddle/fluid/operators/array_to_lod_tensor_op.cc
index 6257e04b0..3c40135ec 100644
--- a/paddle/fluid/operators/array_to_lod_tensor_op.cc
+++ b/paddle/fluid/operators/array_to_lod_tensor_op.cc
@@ -148,8 +148,8 @@ class ArrayToLoDTensorOp : public framework::OperatorBase {
 
         size_t start_offset = lod_and_offset.second.first;
         size_t end_offset = lod_and_offset.second.second;
-        VLOG(10) << "idx=" << idx << " x_idx=" << x_idx << " ["
-                 << ", " << end_offset << "]";
+        VLOG(100) << "idx=" << idx << " x_idx=" << x_idx << " ["
+                  << ", " << end_offset << "]";
         // Copy data
         PADDLE_ENFORCE_GE(end_offset, start_offset);
         size_t len = end_offset - start_offset;
diff --git a/paddle/fluid/operators/batch_norm_op.cu.cc b/paddle/fluid/operators/batch_norm_op.cu.cc
index aaed335c9..0609027c6 100644
--- a/paddle/fluid/operators/batch_norm_op.cu.cc
+++ b/paddle/fluid/operators/batch_norm_op.cu.cc
@@ -96,7 +96,7 @@ class BatchNormKernel<platform::CUDADeviceContext, T>
     mode_ = CUDNN_BATCHNORM_SPATIAL;
 #endif
 
-    VLOG(3) << "Setting descriptors.";
+    VLOG(30) << "Setting descriptors.";
     std::vector<int> dims;
     std::vector<int> strides;
     if (data_layout == DataLayout::kNCHW) {
diff --git a/paddle/fluid/operators/beam_search_op.cc b/paddle/fluid/operators/beam_search_op.cc
index 62771d09f..791f8a4d3 100644
--- a/paddle/fluid/operators/beam_search_op.cc
+++ b/paddle/fluid/operators/beam_search_op.cc
@@ -33,11 +33,11 @@ void BeamSearch::operator()(const framework::LoDTensor &pre_ids,
 
   auto items = SelectTopBeamSizeItems(pre_ids, pre_scores);
   auto selected_items = ToMap(items, high_level.back());
-  VLOG(3) << "selected_items:";
+  VLOG(30) << "selected_items:";
   for (size_t i = 0; i < selected_items.size(); ++i) {
-    VLOG(3) << "offset:" << i;
+    VLOG(30) << "offset:" << i;
     for (auto &item : selected_items[i]) {
-      VLOG(3) << ItemToString(item);
+      VLOG(30) << ItemToString(item);
     }
   }
 
@@ -138,11 +138,11 @@ std::vector<std::vector<BeamSearch::Item>> BeamSearch::SelectTopBeamSizeItems(
     }
     result.emplace_back(items);
   }
-  VLOG(3) << "SelectTopBeamSizeItems result size " << result.size();
+  VLOG(30) << "SelectTopBeamSizeItems result size " << result.size();
   for (auto &items : result) {
-    VLOG(3) << "item set:";
+    VLOG(30) << "item set:";
     for (auto &item : items) {
-      VLOG(3) << ItemToString(item);
+      VLOG(30) << ItemToString(item);
     }
   }
 
diff --git a/paddle/fluid/operators/checkpoint_notify_op.cc b/paddle/fluid/operators/checkpoint_notify_op.cc
index 7c072cb07..defa287bd 100644
--- a/paddle/fluid/operators/checkpoint_notify_op.cc
+++ b/paddle/fluid/operators/checkpoint_notify_op.cc
@@ -46,8 +46,8 @@ class CheckpointNotifyOp : public framework::OperatorBase {
       auto lookup_table_save_dir =
           string::Sprintf("%s/%s_%d", dir, lookup_table_name, i);
       rpc_client->AsyncCheckpointNotify(epmap[i], lookup_table_save_dir);
-      VLOG(3) << "checkpoint notify sending lookup table: " << lookup_table_name
-              << " and dir:" << dir << " to " << epmap[i];
+      VLOG(30) << "checkpoint notify sending lookup table: "
+               << lookup_table_name << " and dir:" << dir << " to " << epmap[i];
     }
     PADDLE_ENFORCE(rpc_client->Wait(), "internal error in RPCClient");
   }
diff --git a/paddle/fluid/operators/concat_op.cc b/paddle/fluid/operators/concat_op.cc
index 57817da71..093b0a9a1 100644
--- a/paddle/fluid/operators/concat_op.cc
+++ b/paddle/fluid/operators/concat_op.cc
@@ -37,7 +37,7 @@ class ConcatOp : public framework::OperatorWithKernel {
 
     PADDLE_ENFORCE_GT(n, 0, "Input tensors count should > 0.");
     if (n == 1) {
-      VLOG(3) << "Warning: concat op have only one input, may waste memory";
+      VLOG(30) << "Warning: concat op have only one input, may waste memory";
     }
 
     auto out_dims = ins[0];
diff --git a/paddle/fluid/operators/conv_cudnn_op.cu.cc b/paddle/fluid/operators/conv_cudnn_op.cu.cc
index 76eda51ad..3ec436133 100644
--- a/paddle/fluid/operators/conv_cudnn_op.cu.cc
+++ b/paddle/fluid/operators/conv_cudnn_op.cu.cc
@@ -143,11 +143,11 @@ class CUDNNConvOpKernel : public framework::OpKernel<T> {
           cudnn_conv_desc, CUDNN_TENSOR_OP_MATH));
       // Currently tensor core is only enabled using this algo
       algo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM;
-      VLOG(5) << "use cudnn_tensor_op_math";
+      VLOG(50) << "use cudnn_tensor_op_math";
     } else {
       CUDNN_ENFORCE(platform::dynload::cudnnSetConvolutionMathType(
           cudnn_conv_desc, CUDNN_DEFAULT_MATH));
-      VLOG(5) << "NOT use cudnn_tensor_op_math";
+      VLOG(50) << "NOT use cudnn_tensor_op_math";
     }
 #endif
 
diff --git a/paddle/fluid/operators/distributed/brpc_server.cc b/paddle/fluid/operators/distributed/brpc_server.cc
index 862167f02..47a06dd0f 100644
--- a/paddle/fluid/operators/distributed/brpc_server.cc
+++ b/paddle/fluid/operators/distributed/brpc_server.cc
@@ -133,10 +133,10 @@ void AsyncBRPCServer::StartServer() {
 void AsyncBRPCServer::ShutDownImpl() { server_.Stop(1000); }
 
 void AsyncBRPCServer::WaitServerReady() {
-  VLOG(3) << "AsyncGRPCServer is wait server ready";
+  VLOG(30) << "AsyncGRPCServer is wait server ready";
   std::unique_lock<std::mutex> lock(this->mutex_ready_);
   condition_ready_.wait(lock, [=] { return this->ready_ == 1; });
-  VLOG(3) << "AsyncGRPCServer WaitSeverReady";
+  VLOG(30) << "AsyncGRPCServer WaitSeverReady";
 }
 
 };  // namespace distributed
diff --git a/paddle/fluid/operators/distributed/grpc_client.cc b/paddle/fluid/operators/distributed/grpc_client.cc
index be5c20ad2..c28f86146 100644
--- a/paddle/fluid/operators/distributed/grpc_client.cc
+++ b/paddle/fluid/operators/distributed/grpc_client.cc
@@ -38,7 +38,7 @@ void GRPCClient::SendComplete() {
   std::unique_lock<std::mutex> lk(completed_mutex_);
   if (!completed_) {
     for (auto& it : channels_) {
-      VLOG(3) << "send complete message to " << it.first;
+      VLOG(30) << "send complete message to " << it.first;
       this->AsyncSendComplete(it.first);
     }
     PADDLE_ENFORCE(this->Wait(), "internal grpc error");
@@ -81,7 +81,7 @@ VarHandlePtr GRPCClient::AsyncSendVar(const std::string& ep,
     ::grpc::ByteBuffer req;
     SerializeToByteBuffer(var_name_val, var, *p_ctx, &req, "", trainer_id_);
 
-    VLOG(3) << s->GetVarHandlePtr()->String() << " begin";
+    VLOG(30) << s->GetVarHandlePtr()->String() << " begin";
 
     // stub context
     s->response_call_back_ = nullptr;
@@ -142,7 +142,7 @@ VarHandlePtr GRPCClient::AsyncGetVar(const std::string& ep,
     ::grpc::ByteBuffer buf;
     RequestToByteBuffer<sendrecv::VariableMessage>(req, &buf);
 
-    VLOG(3) << s->GetVarHandlePtr()->String() << " begin";
+    VLOG(30) << s->GetVarHandlePtr()->String() << " begin";
 
     // stub context
     s->response_call_back_ = ProcGetResponse;
@@ -190,7 +190,7 @@ VarHandlePtr GRPCClient::AsyncPrefetchVar(const std::string& ep,
     ::grpc::ByteBuffer req;
     SerializeToByteBuffer(in_var_name_val, var, *p_ctx, &req, out_var_name_val);
 
-    VLOG(3) << s->GetVarHandlePtr()->String() << " begin";
+    VLOG(30) << s->GetVarHandlePtr()->String() << " begin";
 
     // stub context
     s->response_call_back_ = ProcGetResponse;
@@ -328,14 +328,14 @@ void GRPCClient::Proceed() {
   void* tag = nullptr;
   bool ok = false;
 
-  VLOG(3) << "GRPCClient Proceed begin";
+  VLOG(30) << "GRPCClient Proceed begin";
   while (!stopped_ && cq_.Next(&tag, &ok)) {
     BaseProcessor* c = static_cast<BaseProcessor*>(tag);
     GPR_ASSERT(ok);
     PADDLE_ENFORCE(c);
 
     if (c->status_.ok()) {
-      VLOG(3) << c->GetVarHandlePtr()->String() << " process";
+      VLOG(30) << c->GetVarHandlePtr()->String() << " process";
       c->Process();
     } else if (c->status_.error_code() == grpc::StatusCode::DEADLINE_EXCEEDED) {
       // FIXME(gongwb): parse error_details?
@@ -370,7 +370,7 @@ void GRPCClient::Proceed() {
       sync_cond_.notify_all();
     }
   }
-  VLOG(3) << "GRPCClient Proceed end";
+  VLOG(30) << "GRPCClient Proceed end";
 }
 
 std::shared_ptr<grpc::Channel> GRPCClient::GetChannel(const std::string& ep) {
diff --git a/paddle/fluid/operators/distributed/grpc_server.cc b/paddle/fluid/operators/distributed/grpc_server.cc
index eb9e36029..ffd2b1707 100644
--- a/paddle/fluid/operators/distributed/grpc_server.cc
+++ b/paddle/fluid/operators/distributed/grpc_server.cc
@@ -98,7 +98,7 @@ class RequestSend final : public RequestBase {
 
   void Process() override {
     std::string varname = GetReqName();
-    VLOG(4) << "RequestSend var_name:" << varname;
+    VLOG(40) << "RequestSend var_name:" << varname;
 
     auto scope = request_->GetMutableLocalScope();
     auto invar = request_->GetVar();
@@ -135,7 +135,7 @@ class RequestGet final : public RequestBase {
     // proc request.
     std::string varname = request_.varname();
     int trainer_id = request_.trainer_id();
-    VLOG(4) << "RequestGet " << varname;
+    VLOG(40) << "RequestGet " << varname;
 
     auto scope = request_handler_->scope();
     auto invar = scope->FindVar(varname);
@@ -182,8 +182,8 @@ class RequestPrefetch final : public RequestBase {
     std::string in_var_name = request_->Varname();
     std::string out_var_name = request_->OutVarname();
     int trainer_id = request_->GetTrainerId();
-    VLOG(4) << "RequestPrefetch, in_var_name: " << in_var_name
-            << " out_var_name: " << out_var_name;
+    VLOG(40) << "RequestPrefetch, in_var_name: " << in_var_name
+             << " out_var_name: " << out_var_name;
 
     auto scope = request_->GetMutableLocalScope();
     auto invar = scope->FindVar(in_var_name);
@@ -231,8 +231,8 @@ class RequestCheckpointNotify final : public RequestBase {
     std::string checkpoint_dir = request_->OutVarname();
     int trainer_id = request_->GetTrainerId();
 
-    VLOG(4) << "RequestCheckpointNotify notify: " << checkpoint_notify
-            << ", dir: " << checkpoint_dir;
+    VLOG(40) << "RequestCheckpointNotify notify: " << checkpoint_notify
+             << ", dir: " << checkpoint_dir;
 
     request_handler_->Handle(checkpoint_notify, scope, nullptr, nullptr,
                              trainer_id, checkpoint_dir);
@@ -246,10 +246,10 @@ class RequestCheckpointNotify final : public RequestBase {
 };
 
 void AsyncGRPCServer::WaitServerReady() {
-  VLOG(4) << "AsyncGRPCServer is wait server ready";
+  VLOG(40) << "AsyncGRPCServer is wait server ready";
   std::unique_lock<std::mutex> lock(this->mutex_ready_);
   condition_ready_.wait(lock, [=] { return this->ready_ == 1; });
-  VLOG(4) << "AsyncGRPCServer WaitSeverReady";
+  VLOG(40) << "AsyncGRPCServer WaitSeverReady";
 }
 
 void AsyncGRPCServer::StartServer() {
@@ -282,14 +282,15 @@ void AsyncGRPCServer::StartServer() {
     reqs.reserve(kRequestBufSize);
 
     for (int i = 0; i < kRequestBufSize; i++) {
-      VLOG(6) << "TryToRegisterNewOne on RPC NAME: " << rpc_name << " I: " << i;
+      VLOG(60) << "TryToRegisterNewOne on RPC NAME: " << rpc_name
+               << " I: " << i;
       TryToRegisterNewOne(rpc_name, i);
     }
 
     for (int i = 0; i < threadnum; i++) {
       rpc_threads_[rpc_name].emplace_back(new std::thread(std::bind(
           &AsyncGRPCServer::HandleRequest, this, cq.get(), rpc_name, f)));
-      VLOG(4) << t.first << " creates threads!";
+      VLOG(40) << t.first << " creates threads!";
     }
   }
 
@@ -306,7 +307,7 @@ void AsyncGRPCServer::StartServer() {
     auto& threads = t.second;
     for (size_t i = 0; i < threads.size(); ++i) {
       threads[i]->join();
-      VLOG(4) << t.first << " threads ends!";
+      VLOG(40) << t.first << " threads ends!";
     }
   }
 }
@@ -314,7 +315,7 @@ void AsyncGRPCServer::StartServer() {
 void AsyncGRPCServer::ShutdownQueue() {
   for (auto& t : rpc_cq_) {
     t.second->Shutdown();
-    VLOG(4) << t.first << " queue shutdown!";
+    VLOG(40) << t.first << " queue shutdown!";
   }
 }
 
@@ -323,7 +324,7 @@ void AsyncGRPCServer::ShutDownImpl() {
   is_shut_down_ = true;
   ShutdownQueue();
 
-  VLOG(4) << "server_ shutdown!";
+  VLOG(40) << "server_ shutdown!";
   server_->Shutdown();
 }
 
@@ -331,12 +332,12 @@ void AsyncGRPCServer::TryToRegisterNewOne(const std::string& rpc_name,
                                           int req_id) {
   std::unique_lock<std::mutex> lock(cq_mutex_);
   if (is_shut_down_) {
-    VLOG(4) << "shutdown, do not TryToRegisterNewSendOne";
+    VLOG(40) << "shutdown, do not TryToRegisterNewSendOne";
     return;
   }
 
-  VLOG(4) << "TryToRegisterNewOne on RPC NAME: " << rpc_name
-          << " REQ ID: " << req_id;
+  VLOG(40) << "TryToRegisterNewOne on RPC NAME: " << rpc_name
+           << " REQ ID: " << req_id;
 
   auto& reqs = rpc_reqs_[rpc_name];
   auto& handler = rpc_call_map_[rpc_name];
@@ -357,7 +358,7 @@ void AsyncGRPCServer::TryToRegisterNewOne(const std::string& rpc_name,
 
   reqs[req_id] = b;
 
-  VLOG(4) << "Create RequestSend status:" << b->Status();
+  VLOG(40) << "Create RequestSend status:" << b->Status();
 }
 
 void AsyncGRPCServer::HandleRequest(
@@ -367,15 +368,15 @@ void AsyncGRPCServer::HandleRequest(
   bool ok = false;
 
   while (true) {
-    VLOG(4) << "HandleRequest " << rpc_name << " wait next";
+    VLOG(40) << "HandleRequest " << rpc_name << " wait next";
     if (!cq->Next(&tag, &ok)) {
-      VLOG(3) << "CompletionQueue " << rpc_name << " shutdown!";
+      VLOG(30) << "CompletionQueue " << rpc_name << " shutdown!";
       break;
     }
 
     int req_id = static_cast<int>(reinterpret_cast<intptr_t>(tag));
-    VLOG(4) << "HandleRequest " << rpc_name << ", req_id:" << req_id
-            << " get next";
+    VLOG(40) << "HandleRequest " << rpc_name << ", req_id:" << req_id
+             << " get next";
 
     auto& reqs = rpc_reqs_[rpc_name];
     RequestBase* base = nullptr;
@@ -385,7 +386,7 @@ void AsyncGRPCServer::HandleRequest(
       base = reqs[req_id];
     }
 
-    VLOG(3) << base->Status2String(rpc_name);
+    VLOG(30) << base->Status2String(rpc_name);
 
     // reference:
     // https://github.com/tensorflow/tensorflow/issues/5596
diff --git a/paddle/fluid/operators/distributed/request_handler.h b/paddle/fluid/operators/distributed/request_handler.h
index 3c1db1470..3bcc59a47 100644
--- a/paddle/fluid/operators/distributed/request_handler.h
+++ b/paddle/fluid/operators/distributed/request_handler.h
@@ -75,7 +75,7 @@ class VarHandle {
       wait_cond_.wait(lk, [this] { return status_ != kDefaultState; });
       ret = status_;
     }
-    VLOG(7) << "VarHandle wait:" << ret;
+    VLOG(70) << "VarHandle wait:" << ret;
     return ret != kErrorState;
   }
 
@@ -84,7 +84,7 @@ class VarHandle {
       std::unique_lock<std::mutex> lk(sync_mutex_);
       status_ = ok ? kFinishState : kErrorState;
     }
-    VLOG(7) << "VarHandle finish:" << ok;
+    VLOG(70) << "VarHandle finish:" << ok;
     wait_cond_.notify_all();
   }
 
diff --git a/paddle/fluid/operators/distributed/request_handler_impl.cc b/paddle/fluid/operators/distributed/request_handler_impl.cc
index 025528fe7..dae56cc84 100644
--- a/paddle/fluid/operators/distributed/request_handler_impl.cc
+++ b/paddle/fluid/operators/distributed/request_handler_impl.cc
@@ -38,19 +38,19 @@ bool RequestSendHandler::Handle(const std::string& varname,
                                 framework::Variable** outvar,
                                 const int trainer_id,
                                 const std::string& out_var_name) {
-  VLOG(4) << "RequestSendHandler:" << varname;
+  VLOG(40) << "RequestSendHandler:" << varname;
 
   // Sync
   if (varname == BATCH_BARRIER_MESSAGE) {
-    VLOG(3) << "sync: recv BATCH_BARRIER_MESSAGE";
+    VLOG(30) << "sync: recv BATCH_BARRIER_MESSAGE";
     rpc_server_->IncreaseBatchBarrier(kRequestSend);
   } else if (varname == COMPLETE_MESSAGE) {
-    VLOG(3) << "sync: recv complete message";
+    VLOG(30) << "sync: recv complete message";
     rpc_server_->Complete();
   } else {
     // Async
     if (!sync_mode_) {
-      VLOG(3) << "async process var: " << varname;
+      VLOG(30) << "async process var: " << varname;
       try {
         executor_->RunPreparedContext((*grad_to_prepared_ctx_)[varname].get(),
                                       scope);
@@ -61,7 +61,7 @@ bool RequestSendHandler::Handle(const std::string& varname,
       return true;
     } else {  // sync
       rpc_server_->WaitCond(kRequestSend);
-      VLOG(3) << "sync: processing received var: " << varname;
+      VLOG(30) << "sync: processing received var: " << varname;
 
       if (invar == nullptr) {
         LOG(FATAL) << "sync: Can not find server side var: " << varname;
@@ -78,10 +78,10 @@ bool RequestGetHandler::Handle(const std::string& varname,
                                framework::Variable** outvar,
                                const int trainer_id,
                                const std::string& out_var_name) {
-  VLOG(4) << "RequestGetHandler:" << varname;
+  VLOG(40) << "RequestGetHandler:" << varname;
   if (sync_mode_) {
     if (varname == FETCH_BARRIER_MESSAGE) {
-      VLOG(3) << "sync: recv fetch barrier message";
+      VLOG(30) << "sync: recv fetch barrier message";
       rpc_server_->IncreaseBatchBarrier(kRequestGet);
     } else {
       rpc_server_->WaitCond(kRequestGet);
@@ -93,13 +93,14 @@ bool RequestGetHandler::Handle(const std::string& varname,
         // NOTE: the format is determined by distributed_transpiler.py
         std::string param_bak_name =
             string::Sprintf("%s.trainer_%d_bak", varname, trainer_id);
-        VLOG(3) << "getting " << param_bak_name << " trainer_id " << trainer_id;
+        VLOG(30) << "getting " << param_bak_name << " trainer_id "
+                 << trainer_id;
         auto var = scope_->FindVar(varname);
         auto t_orig = var->Get<framework::LoDTensor>();
         auto param_bak = scope_->Var(param_bak_name);
         auto t = param_bak->GetMutable<framework::LoDTensor>();
         t->mutable_data(dev_ctx_->GetPlace(), t_orig.type());
-        VLOG(3) << "copying " << varname << " to " << param_bak_name;
+        VLOG(30) << "copying " << varname << " to " << param_bak_name;
         framework::TensorCopy(t_orig, dev_ctx_->GetPlace(), t);
       }
       *outvar = scope_->FindVar(varname);
@@ -114,7 +115,7 @@ bool RequestPrefetchHandler::Handle(const std::string& varname,
                                     framework::Variable** outvar,
                                     const int trainer_id,
                                     const std::string& out_var_name) {
-  VLOG(4) << "RequestPrefetchHandler " << varname;
+  VLOG(40) << "RequestPrefetchHandler " << varname;
 
   auto var_desc = program_->Block(0).FindVar(out_var_name);
   InitializeVariable(*outvar, var_desc->GetType());
@@ -138,8 +139,8 @@ bool RequestCheckpointHandler::Handle(const std::string& varname,
   auto* lt_var = scope_->FindVar(LOOKUP_TABLE_PATH)->GetMutable<std::string>();
   lt_var->clear();
   lt_var->append(out_var_name);
-  VLOG(4) << "RequestCheckpointHandler update var kLookupTablePath to: "
-          << out_var_name;
+  VLOG(40) << "RequestCheckpointHandler update var kLookupTablePath to: "
+           << out_var_name;
   executor_->RunPreparedContext(checkpoint_prepared_ctx_.get(), scope_);
   return true;
 }
diff --git a/paddle/fluid/operators/distributed/rpc_server.cc b/paddle/fluid/operators/distributed/rpc_server.cc
index 3e30ed4ac..405509110 100644
--- a/paddle/fluid/operators/distributed/rpc_server.cc
+++ b/paddle/fluid/operators/distributed/rpc_server.cc
@@ -39,7 +39,7 @@ void RPCServer::SavePort() const {
   port_file.open(file_path);
   port_file << selected_port_;
   port_file.close();
-  VLOG(4) << "selected port written to " << file_path;
+  VLOG(40) << "selected port written to " << file_path;
 }
 
 void RPCServer::WaitBarrier(const std::string& rpc_name) {
@@ -49,12 +49,12 @@ void RPCServer::WaitBarrier(const std::string& rpc_name) {
             exit_flag_.load());
   });
 
-  VLOG(3) << "batch_barrier_: " << rpc_name << " "
-          << barrier_counter_[rpc_name];
+  VLOG(30) << "batch_barrier_: " << rpc_name << " "
+           << barrier_counter_[rpc_name];
 }
 
 void RPCServer::IncreaseBatchBarrier(const std::string rpc_name) {
-  VLOG(4) << "RPCServer begin IncreaseBatchBarrier " << rpc_name;
+  VLOG(40) << "RPCServer begin IncreaseBatchBarrier " << rpc_name;
   int b = 0;
   std::unique_lock<std::mutex> lock(mutex_);
   b = ++barrier_counter_[rpc_name];
@@ -71,7 +71,7 @@ void RPCServer::Complete() {
     client_num_--;
     need_reset_all_vars_ = true;
 
-    VLOG(4) << "decrease client_num to: " << client_num_;
+    VLOG(40) << "decrease client_num to: " << client_num_;
     if (cur_cond_.load() == rpc_cond_map_[kRequestGet]) {
       barrier_counter_[kRequestGet]--;
     }
@@ -90,7 +90,7 @@ int RPCServer::GetClientNum() {
 }
 
 void RPCServer::ResetBarrierCounter() {
-  VLOG(3) << "RPCServer ResetBarrierCounter ";
+  VLOG(30) << "RPCServer ResetBarrierCounter ";
   std::unique_lock<std::mutex> lock(mutex_);
   for (auto& t : barrier_counter_) {
     t.second = 0;
@@ -105,12 +105,12 @@ void RPCServer::RegisterRPC(const std::string& rpc_name,
 
   static int cond = -1;
   rpc_cond_map_[rpc_name] = ++cond;
-  VLOG(4) << "RegisterRPC rpc_name:" << rpc_name << ", handler:" << handler
-          << ", cond:" << rpc_cond_map_[rpc_name];
+  VLOG(40) << "RegisterRPC rpc_name:" << rpc_name << ", handler:" << handler
+           << ", cond:" << rpc_cond_map_[rpc_name];
 }
 
 void RPCServer::SetCond(const std::string& rpc_name) {
-  VLOG(3) << "RPCServer SetCond " << rpc_name;
+  VLOG(30) << "RPCServer SetCond " << rpc_name;
   {
     std::unique_lock<std::mutex> lock(mutex_);
     cur_cond_ = rpc_cond_map_[rpc_name];
@@ -120,7 +120,7 @@ void RPCServer::SetCond(const std::string& rpc_name) {
 }
 
 void RPCServer::WaitCond(const std::string& rpc_name) {
-  VLOG(4) << "RPCServer WaitCond " << rpc_name;
+  VLOG(40) << "RPCServer WaitCond " << rpc_name;
   int cond = 0;
   {
     std::unique_lock<std::mutex> lock(mutex_);
diff --git a/paddle/fluid/operators/distributed/variable_response.cc b/paddle/fluid/operators/distributed/variable_response.cc
index b2f73b67d..d1572ce01 100644
--- a/paddle/fluid/operators/distributed/variable_response.cc
+++ b/paddle/fluid/operators/distributed/variable_response.cc
@@ -50,7 +50,7 @@ bool VariableResponse::ReadRaw(::google::protobuf::io::CodedInputStream* input,
         size_to_write = length - total_written;
       }
       // This log is useful to see how long a internal block size is of rpc.
-      VLOG(7) << "copy " << size_to_write << " data to CUDAPlace";
+      VLOG(70) << "copy " << size_to_write << " data to CUDAPlace";
       memory::Copy(boost::get<platform::CUDAPlace>(place),
                    reinterpret_cast<void*>(p), cpu, data, size_to_write,
                    gpu_dev_ctx.stream());
@@ -79,7 +79,7 @@ bool VariableResponse::ReadRaw(::google::protobuf::io::CodedInputStream* input,
     // TODO(gongwb): can we avoid copy?
     platform::CPUPlace cpu;
     // This log is useful to see how long a internal block size is of rpc.
-    VLOG(7) << "copy " << size_to_write << " data to CPUPlace";
+    VLOG(70) << "copy " << size_to_write << " data to CPUPlace";
     memory::Copy(cpu, reinterpret_cast<void*>(p), cpu, data, size_to_write);
 
     p += size_to_write;
@@ -198,8 +198,8 @@ bool VariableResponse::ProcSerializedField(
 #endif
   }
 
-  VLOG(7) << "ProcSerializedField:" << meta_.varname()
-          << ", type:" << meta_.type() << std::endl;
+  VLOG(70) << "ProcSerializedField:" << meta_.varname()
+           << ", type:" << meta_.type() << std::endl;
   framework::DDim dims = GetDims(meta_.dims());
   if (meta_.type() == sendrecv::LOD_TENSOR) {
     PADDLE_ENFORCE(meta_.lod_size() >= 0, "lod info should be got first!");
diff --git a/paddle/fluid/operators/feed_op.cc b/paddle/fluid/operators/feed_op.cc
index dc7ef6649..5da0a536d 100644
--- a/paddle/fluid/operators/feed_op.cc
+++ b/paddle/fluid/operators/feed_op.cc
@@ -47,8 +47,8 @@ class FeedOp : public framework::OperatorBase {
 
     auto col = Attr<int>("col");
 
-    VLOG(3) << "Feed Var " << feed_var_name << "'s " << col << " column to var "
-            << out_name;
+    VLOG(30) << "Feed Var " << feed_var_name << "'s " << col
+             << " column to var " << out_name;
 
     auto &feed_list = feed_var->Get<framework::FeedFetchList>();
     auto &feed_item = feed_list.at(static_cast<size_t>(col));
diff --git a/paddle/fluid/operators/fetch_barrier_op.cc b/paddle/fluid/operators/fetch_barrier_op.cc
index 8754856e1..88a5e59ce 100644
--- a/paddle/fluid/operators/fetch_barrier_op.cc
+++ b/paddle/fluid/operators/fetch_barrier_op.cc
@@ -43,7 +43,7 @@ class FetchBarrierOp : public framework::OperatorBase {
     PADDLE_ENFORCE(rpc_client->Wait(), "internal error in RPCClient");
 
     for (auto& ep : eps) {
-      VLOG(3) << "fetch barrier, ep: " << ep;
+      VLOG(30) << "fetch barrier, ep: " << ep;
       rpc_client->AsyncSendFetchBarrier(ep);
     }
     PADDLE_ENFORCE(rpc_client->Wait(), "internal error in RPCClient");
diff --git a/paddle/fluid/operators/fetch_op.cc b/paddle/fluid/operators/fetch_op.cc
index c197b45e8..c9e759ebf 100644
--- a/paddle/fluid/operators/fetch_op.cc
+++ b/paddle/fluid/operators/fetch_op.cc
@@ -57,7 +57,7 @@ class FetchOp : public framework::OperatorBase {
     TensorCopySync(src_item, platform::CPUPlace(), &dst_item);
     dst_item.set_lod(src_item.lod());
 
-    VLOG(3) << "Fetch variable " << fetch_var_name << " to " << out_name;
+    VLOG(30) << "Fetch variable " << fetch_var_name << " to " << out_name;
   }
 };
 
diff --git a/paddle/fluid/operators/gen_nccl_id_op.cc b/paddle/fluid/operators/gen_nccl_id_op.cc
index ef574ccdf..56ea165ff 100644
--- a/paddle/fluid/operators/gen_nccl_id_op.cc
+++ b/paddle/fluid/operators/gen_nccl_id_op.cc
@@ -64,7 +64,7 @@ class GenNCCLIdOp : public framework::OperatorBase {
         distributed::RPCClient::GetInstance<RPCCLIENT_T>(0);
 
     for (auto& ep : endpoint_list) {
-      VLOG(3) << "sending nccl id to " << ep;
+      VLOG(30) << "sending nccl id to " << ep;
       client->AsyncSendVar(ep, dev_ctx, *scope, NCCL_ID_VARNAME);
     }
     client->Wait();
@@ -72,7 +72,7 @@ class GenNCCLIdOp : public framework::OperatorBase {
       client->AsyncSendBatchBarrier(ep);
     }
     client->Wait();
-    VLOG(3) << "sending completed...";
+    VLOG(30) << "sending completed...";
   }
 
   void GetIdByServer(framework::Scope* scope,
@@ -99,11 +99,11 @@ class GenNCCLIdOp : public framework::OperatorBase {
         std::bind(&distributed::RPCServer::StartServer, rpc_service.get()));
 
     rpc_service->SetCond(distributed::kRequestSend);
-    VLOG(3) << "start getting nccl id from trainer 0...";
+    VLOG(30) << "start getting nccl id from trainer 0...";
     rpc_service->WaitBarrier(distributed::kRequestSend);
-    VLOG(3) << "got nccl id and stop server...";
+    VLOG(30) << "got nccl id and stop server...";
     rpc_service->ShutDown();
-    VLOG(3) << "rpc server stopped";
+    VLOG(30) << "rpc server stopped";
     server_thread.join();
   }
 };
diff --git a/paddle/fluid/operators/listen_and_serv_op.cc b/paddle/fluid/operators/listen_and_serv_op.cc
index 1d8b1411c..e3d09e2d1 100644
--- a/paddle/fluid/operators/listen_and_serv_op.cc
+++ b/paddle/fluid/operators/listen_and_serv_op.cc
@@ -36,7 +36,7 @@ namespace operators {
 
 void RunServer(std::shared_ptr<distributed::RPCServer> service) {
   service->StartServer();
-  VLOG(4) << "RunServer thread end";
+  VLOG(40) << "RunServer thread end";
 }
 static void split(const std::string &str, char sep,
                   std::vector<std::string> *pieces) {
@@ -66,8 +66,8 @@ static void ParallelExecuteBlocks(
     fs.push_back(framework::Async([&executor, &prepared, &scope, idx]() {
       int run_block = idx;  // thread local
       try {
-        VLOG(3) << "running server block: " << run_block
-                << "pointer: " << prepared[run_block].get();
+        VLOG(30) << "running server block: " << run_block
+                 << "pointer: " << prepared[run_block].get();
         executor->RunPreparedContext(prepared[run_block].get(), scope);
       } catch (const std::exception &e) {
         LOG(FATAL) << "run sub program:" << idx << " error " << e.what();
@@ -108,7 +108,7 @@ void ListenAndServOp::RunSyncLoop(
     framework::Scope *recv_scope, platform::DeviceContext *dev_ctx,
     const std::vector<int> &prefetch_block_id_list,
     const int checkpoint_point_block_id) const {
-  VLOG(2) << "RunSyncLoop";
+  VLOG(20) << "RunSyncLoop";
   size_t num_blocks = program->Size();
   auto optimize_blocks =
       Attr<std::vector<framework::BlockDesc *>>(kOptimizeBlocks);
@@ -167,7 +167,7 @@ void ListenAndServOp::RunSyncLoop(
     }
     ParallelExecuteBlocks(parallel_blkids, executor, optimize_prepared, program,
                           recv_scope);
-    VLOG(2) << "run all blocks spent " << GetTimestamp() - ts << "(ms)";
+    VLOG(20) << "run all blocks spent " << GetTimestamp() - ts << "(ms)";
 
     ResetReceivedVars(recv_scope, dev_ctx, rpc_service_->NeedResetAllVars());
 
@@ -183,11 +183,11 @@ void ListenAndServOp::ResetReceivedVars(framework::Scope *recv_scope,
   for (auto &varname : sparse_vars_) {
     auto var = recv_scope->FindVar(varname);
     if (var == nullptr) {
-      VLOG(2) << "can not find var " << varname << " in received scope";
+      VLOG(20) << "can not find var " << varname << " in received scope";
       continue;
     }
     if (var->IsType<framework::SelectedRows>()) {
-      VLOG(3) << "reset sparse var: " << varname;
+      VLOG(30) << "reset sparse var: " << varname;
       var->GetMutable<framework::SelectedRows>()->mutable_rows()->clear();
     } else {
       PADDLE_THROW("The type of sparse var should be SelectedRows");
@@ -197,7 +197,7 @@ void ListenAndServOp::ResetReceivedVars(framework::Scope *recv_scope,
     for (auto &varname : dense_vars_) {
       auto var = recv_scope->FindVar(varname);
       if (var == nullptr) {
-        VLOG(2) << "can not find var " << varname << " in received scope";
+        VLOG(20) << "can not find var " << varname << " in received scope";
         continue;
       }
       if (var->IsType<framework::LoDTensor>()) {
@@ -216,7 +216,7 @@ void ListenAndServOp::ResetReceivedVars(framework::Scope *recv_scope,
 void ListenAndServOp::RunAsyncLoop(framework::Executor *executor,
                                    framework::ProgramDesc *program,
                                    framework::Scope *recv_scope) const {
-  VLOG(2) << "RunAsyncLoop";
+  VLOG(20) << "RunAsyncLoop";
   auto grad_to_block_id_str =
       Attr<std::vector<std::string>>("grad_to_block_id");
   DoubleFindMap<std::string, int32_t> grad_to_block_id;
@@ -225,7 +225,7 @@ void ListenAndServOp::RunAsyncLoop(framework::Executor *executor,
                               const std::string &grad_and_id) {
     std::vector<std::string> pieces;
     split(grad_and_id, ':', &pieces);
-    VLOG(3) << "after split, key = " << pieces[0] << ", id=" << pieces[1];
+    VLOG(30) << "after split, key = " << pieces[0] << ", id=" << pieces[1];
     PADDLE_ENFORCE_EQ(pieces.size(), 2);
     PADDLE_ENFORCE_EQ(out_map->count(pieces[0]), 0);
 
@@ -270,7 +270,7 @@ void ListenAndServOp::RunAsyncLoop(framework::Executor *executor,
 
   while (true) {
     if (rpc_service_->IsExit()) {
-      VLOG(4) << "get exit!rpc_processor break!";
+      VLOG(40) << "get exit!rpc_processor break!";
       break;
     }
 
@@ -332,9 +332,9 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope,
   std::string endpoint = Attr<std::string>("endpoint");
   int checkpoint_block_id = Attr<int>(kCheckpointBlockId);
 
-  VLOG(4) << "sync_mode:" << sync_mode << ", fan_in:" << fan_in
-          << ", end_point:" << endpoint
-          << ", checkpoint_block_id: " << checkpoint_block_id;
+  VLOG(40) << "sync_mode:" << sync_mode << ", fan_in:" << fan_in
+           << ", end_point:" << endpoint
+           << ", checkpoint_block_id: " << checkpoint_block_id;
 
   rpc_service_.reset(new RPCSERVER_T(endpoint, fan_in));
 
@@ -383,8 +383,8 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope,
        prefetch_var_name_to_block_id_str) {
     std::vector<std::string> pieces;
     split(prefetch_var_name_and_id, ':', &pieces);
-    VLOG(3) << "after split, prefetch_var = " << pieces[0]
-            << ", id=" << pieces[1];
+    VLOG(30) << "after split, prefetch_var = " << pieces[0]
+             << ", id=" << pieces[1];
     PADDLE_ENFORCE_EQ(pieces.size(), 2);
 
     int block_id = std::stoi(pieces[1]);
@@ -415,7 +415,7 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope,
 
   // start the server listening after all member initialized.
   server_thread_.reset(new std::thread(RunServer, rpc_service_));
-  VLOG(3) << "wait server thread to become ready...";
+  VLOG(30) << "wait server thread to become ready...";
   rpc_service_->WaitServerReady();
 
   // register SIGINT(from ctrl+C) and SIGTERM(from kill) signal handlers
diff --git a/paddle/fluid/operators/lod_rank_table_op.cc b/paddle/fluid/operators/lod_rank_table_op.cc
index 166952fe2..59ef9cb62 100644
--- a/paddle/fluid/operators/lod_rank_table_op.cc
+++ b/paddle/fluid/operators/lod_rank_table_op.cc
@@ -30,9 +30,9 @@ class LoDRankTableOp : public framework::OperatorBase {
     auto x = scope.FindVar(Input("X"))->Get<framework::LoDTensor>();
     auto *out =
         scope.FindVar(Output("Out"))->GetMutable<framework::LoDRankTable>();
-    VLOG(10) << "Level = " << static_cast<size_t>(Attr<int>("level"));
+    VLOG(100) << "Level = " << static_cast<size_t>(Attr<int>("level"));
     out->Reset(x.lod(), static_cast<size_t>(Attr<int>("level")));
-    VLOG(10) << Input("X") << "'s lod information is " << *out;
+    VLOG(100) << Input("X") << "'s lod information is " << *out;
   }
 };
 
diff --git a/paddle/fluid/operators/lookup_table_op.cc b/paddle/fluid/operators/lookup_table_op.cc
index 3226a727b..1878dfe8a 100644
--- a/paddle/fluid/operators/lookup_table_op.cc
+++ b/paddle/fluid/operators/lookup_table_op.cc
@@ -134,13 +134,13 @@ class LookupTableOpGradVarTypeInference : public framework::VarTypeInference {
     auto attr = op_desc.GetAttr("is_sparse");
     bool is_sparse = boost::get<bool>(attr);
     if (is_sparse) {
-      VLOG(3) << "lookup_table_grad op " << framework::GradVarName("W")
-              << " is set to SelectedRows";
+      VLOG(30) << "lookup_table_grad op " << framework::GradVarName("W")
+               << " is set to SelectedRows";
       block->Var(out_var_name)
           ->SetType(framework::proto::VarType::SELECTED_ROWS);
     } else {
-      VLOG(3) << "lookup_table_grad op " << framework::GradVarName("W")
-              << " is set to LoDTensor";
+      VLOG(30) << "lookup_table_grad op " << framework::GradVarName("W")
+               << " is set to LoDTensor";
       block->Var(out_var_name)->SetType(framework::proto::VarType::LOD_TENSOR);
     }
     block->Var(out_var_name)->SetDataType(block->Var("W")->GetDataType());
diff --git a/paddle/fluid/operators/math/cpu_vec_test.cc b/paddle/fluid/operators/math/cpu_vec_test.cc
index cd40f1b2f..18a586f8d 100644
--- a/paddle/fluid/operators/math/cpu_vec_test.cc
+++ b/paddle/fluid/operators/math/cpu_vec_test.cc
@@ -96,8 +96,8 @@ void TestAndBench(const int n, std::function<void(const int, const T*, T*)> tgt,
   }
   auto et = GetCurrentUS();
 
-  VLOG(3) << "Vec size " << n << ": refer takes: " << (et - mt) / repeat
-          << " us, tgt takes: " << (mt - st) / repeat;
+  VLOG(30) << "Vec size " << n << ": refer takes: " << (et - mt) / repeat
+           << " us, tgt takes: " << (mt - st) / repeat;
   for (int i = 0; i < n; ++i) {
     EXPECT_NEAR(ytgt_data[i], yref_data[i], 1e-3);
   }
diff --git a/paddle/fluid/operators/math/jit_kernel_test.cc b/paddle/fluid/operators/math/jit_kernel_test.cc
index 9a1942469..dd88c55d5 100644
--- a/paddle/fluid/operators/math/jit_kernel_test.cc
+++ b/paddle/fluid/operators/math/jit_kernel_test.cc
@@ -87,7 +87,7 @@ TEST(JitKernel, vrelu) {
         vrelu_intri8(d, x_data, zref_data);
       }
       auto si1 = GetCurrentUS();
-      VLOG(3) << "Vec size 8 intr takes: " << (si1 - si0) / repeat;
+      VLOG(30) << "Vec size 8 intr takes: " << (si1 - si0) / repeat;
     }
 #endif
     auto ttgts = GetCurrentUS();
@@ -95,8 +95,9 @@ TEST(JitKernel, vrelu) {
       ker->Compute(x_data, ztgt_data);
     }
     auto ttgte = GetCurrentUS();
-    VLOG(3) << "Vec size " << d << ": refer takes: " << (trefe - trefs) / repeat
-            << " us, tgt takes: " << (ttgte - ttgts) / repeat;
+    VLOG(30) << "Vec size " << d
+             << ": refer takes: " << (trefe - trefs) / repeat
+             << " us, tgt takes: " << (ttgte - ttgts) / repeat;
     for (int i = 0; i < d; ++i) {
       EXPECT_NEAR(ztgt_data[i], zref_data[i], 1e-3);
     }
@@ -132,8 +133,9 @@ TEST(JitKernel, vaddbias) {
     }
     auto ttgte = GetCurrentUS();
 
-    VLOG(3) << "Vec size " << d << ": refer takes: " << (trefe - trefs) / repeat
-            << " us, tgt takes: " << (ttgte - ttgts) / repeat;
+    VLOG(30) << "Vec size " << d
+             << ": refer takes: " << (trefe - trefs) / repeat
+             << " us, tgt takes: " << (ttgte - ttgts) / repeat;
     for (int i = 0; i < d; ++i) {
       EXPECT_NEAR(ztgt_data[i], zref_data[i], 1e-3);
     }
@@ -183,13 +185,14 @@ TEST(JitKernel, vexp) {
     }
     auto ttgte = GetCurrentUS();
 
-    VLOG(3) << "Vec size " << d << ": refer takes: " << (trefe - trefs) / repeat
+    VLOG(30) << "Vec size " << d
+             << ": refer takes: " << (trefe - trefs) / repeat
 #ifdef PADDLE_WITH_MKLML
-            << " us, mkl takes: " << (tmkle - tmkls) / repeat << " us, "
+             << " us, mkl takes: " << (tmkle - tmkls) / repeat << " us, "
 #else
-            << " us, "
+             << " us, "
 #endif
-            << "tgt takes: " << (ttgte - ttgts) / repeat;
+             << "tgt takes: " << (ttgte - ttgts) / repeat;
     for (int i = 0; i < d; ++i) {
       EXPECT_NEAR(ztgt_data[i], zref_data[i], 1e-3);
     }
@@ -254,9 +257,10 @@ TEST(JitKernel, vsigmoid) {
     }
     auto ttgte = GetCurrentUS();
 
-    VLOG(3) << "Vec size " << d << ": refer takes: " << (trefe - trefs) / repeat
-            << " us, better(jit exp) takes: " << (tmkle - tmkls) / repeat
-            << " us, tgt takes: " << (ttgte - ttgts) / repeat;
+    VLOG(30) << "Vec size " << d
+             << ": refer takes: " << (trefe - trefs) / repeat
+             << " us, better(jit exp) takes: " << (tmkle - tmkls) / repeat
+             << " us, tgt takes: " << (ttgte - ttgts) / repeat;
     for (int i = 0; i < d; ++i) {
       EXPECT_NEAR(ztgt_data[i], zref_data[i], 1e-3);
     }
@@ -320,9 +324,10 @@ TEST(JitKernel, vtanh) {
     }
     auto ttgte = GetCurrentUS();
 
-    VLOG(3) << "Vec size " << d << ": refer takes: " << (trefe - trefs) / repeat
-            << " us, better(jit exp) takes: " << (tmkle - tmkls) / repeat
-            << " us, tgt takes: " << (ttgte - ttgts) / repeat;
+    VLOG(30) << "Vec size " << d
+             << ": refer takes: " << (trefe - trefs) / repeat
+             << " us, better(jit exp) takes: " << (tmkle - tmkls) / repeat
+             << " us, tgt takes: " << (ttgte - ttgts) / repeat;
     for (int i = 0; i < d; ++i) {
       EXPECT_NEAR(ztgt_data[i], zref_data[i], 1e-3);
     }
@@ -440,9 +445,10 @@ TEST(JitKernel, lstm) {
       ker->ComputeCtHt(x_data, ct_1_data, ct_tgt_data, ht_tgt_data);
     }
     auto ttgte = GetCurrentUS();
-    VLOG(3) << "Vec size " << d << ": refer takes: " << (trefe - trefs) / repeat
-            << " us, better(jit) takes: " << (tmkle - tmkls) / repeat
-            << " us, tgt takes: " << (ttgte - ttgts) / repeat;
+    VLOG(30) << "Vec size " << d
+             << ": refer takes: " << (trefe - trefs) / repeat
+             << " us, better(jit) takes: " << (tmkle - tmkls) / repeat
+             << " us, tgt takes: " << (ttgte - ttgts) / repeat;
   }
 }
 
@@ -524,8 +530,8 @@ TEST(JitKernel, vscal) {
         vscal_inp_intri8(d, a, y_data);
       }
       auto si3 = GetCurrentUS();
-      VLOG(3) << "Vec size 8 intr takes: " << (si1 - si0) / repeat
-              << " us, inplace: " << (si3 - si2) / repeat;
+      VLOG(30) << "Vec size 8 intr takes: " << (si1 - si0) / repeat
+               << " us, inplace: " << (si3 - si2) / repeat;
     }
 #endif
 
@@ -539,15 +545,17 @@ TEST(JitKernel, vscal) {
       ker->Compute(a, y_data);
     }
     auto ttgte1 = GetCurrentUS();
-    VLOG(3) << "Vec size " << d << ": refer takes: " << (trefe - trefs) / repeat
-            << " us, inplace takes: " << (trefe1 - trefs1) / repeat
+    VLOG(30) << "Vec size " << d
+             << ": refer takes: " << (trefe - trefs) / repeat
+             << " us, inplace takes: " << (trefe1 - trefs1) / repeat
 #ifdef PADDLE_WITH_MKLML
-            << " us, mkl inplace takes: " << (tmkle - tmkls) / repeat << " us, "
+             << " us, mkl inplace takes: " << (tmkle - tmkls) / repeat
+             << " us, "
 #else
-            << " us, "
+             << " us, "
 #endif
-            << "tgt takes: " << (ttgte - ttgts) / repeat
-            << "us, tgt inplace takes: " << (ttgte1 - ttgts1) / repeat;
+             << "tgt takes: " << (ttgte - ttgts) / repeat
+             << "us, tgt inplace takes: " << (ttgte1 - ttgts1) / repeat;
     for (int i = 0; i < d; ++i) {
       EXPECT_NEAR(ztgt_data[i], zref_data[i], 1e-3);
     }
@@ -610,7 +618,7 @@ TEST(JitKernel, vmul) {
         vmul_intri8(d, x_data, y_data, zref_data);
       }
       auto si1 = GetCurrentUS();
-      VLOG(3) << "Vec size 8 intr takes: " << (si1 - si0) / repeat;
+      VLOG(30) << "Vec size 8 intr takes: " << (si1 - si0) / repeat;
     }
 #endif
 
@@ -620,13 +628,14 @@ TEST(JitKernel, vmul) {
     }
     auto ttgte = GetCurrentUS();
 
-    VLOG(3) << "Vec size " << d << ": refer takes: " << (trefe - trefs) / repeat
+    VLOG(30) << "Vec size " << d
+             << ": refer takes: " << (trefe - trefs) / repeat
 #ifdef PADDLE_WITH_MKLML
-            << " us, mkl takes: " << (tmkle - tmkls) / repeat << " us, "
+             << " us, mkl takes: " << (tmkle - tmkls) / repeat << " us, "
 #else
-            << " us, "
+             << " us, "
 #endif
-            << "tgt takes: " << (ttgte - ttgts) / repeat;
+             << "tgt takes: " << (ttgte - ttgts) / repeat;
     for (int i = 0; i < d; ++i) {
       EXPECT_NEAR(ztgt_data[i], zref_data[i], 1e-3);
     }
@@ -689,7 +698,7 @@ TEST(JitKernel, vadd) {
         vadd_intri8(d, x_data, y_data, zref_data);
       }
       auto si1 = GetCurrentUS();
-      VLOG(3) << "Vec size 8 intr takes: " << (si1 - si0) / repeat;
+      VLOG(30) << "Vec size 8 intr takes: " << (si1 - si0) / repeat;
     }
 #endif
 
@@ -699,13 +708,14 @@ TEST(JitKernel, vadd) {
     }
     auto ttgte = GetCurrentUS();
 
-    VLOG(3) << "Vec size " << d << ": refer takes: " << (trefe - trefs) / repeat
+    VLOG(30) << "Vec size " << d
+             << ": refer takes: " << (trefe - trefs) / repeat
 #ifdef PADDLE_WITH_MKLML
-            << " us, mkl takes: " << (tmkle - tmkls) / repeat << " us, "
+             << " us, mkl takes: " << (tmkle - tmkls) / repeat << " us, "
 #else
-            << " us, "
+             << " us, "
 #endif
-            << "tgt takes: " << (ttgte - ttgts) / repeat;
+             << "tgt takes: " << (ttgte - ttgts) / repeat;
     for (int i = 0; i < d; ++i) {
       EXPECT_NEAR(ztgt_data[i], zref_data[i], 1e-3);
     }
@@ -760,9 +770,10 @@ TEST(JitKernel, vaddrelu) {
       ker->Compute(x_data, y_data, ztgt_data, d);
     }
     auto ttgte = GetCurrentUS();
-    VLOG(3) << "Vec size " << d << ": refer takes: " << (trefe - trefs) / repeat
-            << " us, better takes: " << (tmkle - tmkls) / repeat << " us, "
-            << "tgt takes: " << (ttgte - ttgts) / repeat;
+    VLOG(30) << "Vec size " << d
+             << ": refer takes: " << (trefe - trefs) / repeat
+             << " us, better takes: " << (tmkle - tmkls) / repeat << " us, "
+             << "tgt takes: " << (ttgte - ttgts) / repeat;
     for (int i = 0; i < d; ++i) {
       EXPECT_NEAR(ztgt_data[i], zref_data[i], 1e-3);
     }
diff --git a/paddle/fluid/operators/math/selected_rows_functor.cc b/paddle/fluid/operators/math/selected_rows_functor.cc
index 759467403..9577a4cb9 100644
--- a/paddle/fluid/operators/math/selected_rows_functor.cc
+++ b/paddle/fluid/operators/math/selected_rows_functor.cc
@@ -270,7 +270,7 @@ struct MergeAdd<platform::CPUDeviceContext, T> {
                   const std::vector<const framework::SelectedRows*>& inputs,
                   framework::SelectedRows* output) {
     if (inputs.size() == 0) {
-      VLOG(3) << "no input! return";
+      VLOG(30) << "no input! return";
       return;
     }
     const framework::SelectedRows* has_value_input = nullptr;
@@ -281,7 +281,7 @@ struct MergeAdd<platform::CPUDeviceContext, T> {
       }
     }
     if (has_value_input == nullptr) {
-      VLOG(3) << "no input has value! just return" << std::endl;
+      VLOG(30) << "no input has value! just return" << std::endl;
       return;
     }
     auto input_width = has_value_input->value().dims()[1];
diff --git a/paddle/fluid/operators/math/selected_rows_functor.cu b/paddle/fluid/operators/math/selected_rows_functor.cu
index c4fccdbf8..74b9659cf 100644
--- a/paddle/fluid/operators/math/selected_rows_functor.cu
+++ b/paddle/fluid/operators/math/selected_rows_functor.cu
@@ -314,7 +314,7 @@ struct MergeAdd<platform::CUDADeviceContext, T> {
                   const std::vector<const framework::SelectedRows*>& inputs,
                   framework::SelectedRows* output) {
     if (inputs.size() == 0) {
-      VLOG(3) << "no input! return";
+      VLOG(30) << "no input! return";
       return;
     }
     const framework::SelectedRows* has_value_input = nullptr;
@@ -325,7 +325,7 @@ struct MergeAdd<platform::CUDADeviceContext, T> {
       }
     }
     if (has_value_input == nullptr) {
-      VLOG(3) << "no input has value! just return" << std::endl;
+      VLOG(30) << "no input has value! just return" << std::endl;
       return;
     }
     auto input_width = has_value_input->value().dims()[1];
diff --git a/paddle/fluid/operators/momentum_op.h b/paddle/fluid/operators/momentum_op.h
index 71f079e4d..e5b756b4f 100644
--- a/paddle/fluid/operators/momentum_op.h
+++ b/paddle/fluid/operators/momentum_op.h
@@ -346,7 +346,7 @@ class MomentumOpKernel : public framework::OpKernel<T> {
 
       // sparse update maybe empty.
       if (grad->rows().size() == 0) {
-        VLOG(3) << "Grad SelectedRows contains no data!";
+        VLOG(30) << "Grad SelectedRows contains no data!";
         return;
       }
       auto* merged_grad = const_cast<framework::Scope&>(ctx.scope())
diff --git a/paddle/fluid/operators/mul_op.cc b/paddle/fluid/operators/mul_op.cc
index 363abfb0e..a2140ddc7 100644
--- a/paddle/fluid/operators/mul_op.cc
+++ b/paddle/fluid/operators/mul_op.cc
@@ -38,9 +38,9 @@ class MulOp : public framework::OperatorWithKernel {
     int x_num_col_dims = ctx->Attrs().Get<int>("x_num_col_dims");
     int y_num_col_dims = ctx->Attrs().Get<int>("y_num_col_dims");
 
-    VLOG(3) << "mul operator x.shape=" << x_dims << " y.shape=" << y_dims
-            << " x_num_col_dims=" << x_num_col_dims
-            << " y_num_col_dims=" << y_num_col_dims;
+    VLOG(30) << "mul operator x.shape=" << x_dims << " y.shape=" << y_dims
+             << " x_num_col_dims=" << x_num_col_dims
+             << " y_num_col_dims=" << y_num_col_dims;
 
     PADDLE_ENFORCE_GT(
         x_dims.size(), x_num_col_dims,
diff --git a/paddle/fluid/operators/nccl_op.cu.cc b/paddle/fluid/operators/nccl_op.cu.cc
index 8de974bc2..9db0031a6 100644
--- a/paddle/fluid/operators/nccl_op.cu.cc
+++ b/paddle/fluid/operators/nccl_op.cu.cc
@@ -63,16 +63,16 @@ class NCCLAllReduceKernel : public framework::OpKernel<T> {
     // device id
     int gpu_id = boost::get<platform::CUDAPlace>(ctx.GetPlace()).GetDeviceId();
     int idx = comm->GetCommId(gpu_id);
-    VLOG(3) << "gpu : "
-            << " invoke allreduce. send " << x->numel() << " recv "
-            << out->numel();
+    VLOG(30) << "gpu : "
+             << " invoke allreduce. send " << x->numel() << " recv "
+             << out->numel();
     PADDLE_ENFORCE(platform::dynload::ncclAllReduce(
         x->data<T>(), out->mutable_data<T>(ctx.GetPlace()), out->numel(),
         NCCLTypeWrapper<T>::type, reduction_op_, comm->comms().at(idx),
         ctx.cuda_device_context().stream()));
-    VLOG(3) << "gpu : "
-            << " finished allreduce. send " << x->numel() << " recv "
-            << out->numel();
+    VLOG(30) << "gpu : "
+             << " finished allreduce. send " << x->numel() << " recv "
+             << out->numel();
   }
 };
 
@@ -109,14 +109,14 @@ class NCCLReduceKernel : public framework::OpKernel<T> {
     } else {
       out->Resize(framework::make_ddim({0}));
     }
-    VLOG(3) << "gpu : " << gpu_id << " invoke reduce. send " << x->numel()
-            << " recv " << out->numel();
+    VLOG(30) << "gpu : " << gpu_id << " invoke reduce. send " << x->numel()
+             << " recv " << out->numel();
     PADDLE_ENFORCE(platform::dynload::ncclReduce(
         x->data<T>(), recvbuffer, x->numel(), NCCLTypeWrapper<T>::type,
         reduction_op_, root, comm->comms().at(idx),
         ctx.cuda_device_context().stream()));
-    VLOG(3) << "gpu : " << gpu_id << " finished reduce. send " << x->numel()
-            << " recv " << out->numel();
+    VLOG(30) << "gpu : " << gpu_id << " finished reduce. send " << x->numel()
+             << " recv " << out->numel();
   }
 };
 
@@ -133,21 +133,22 @@ class NCCLBcastKernel : public framework::OpKernel<T> {
     int idx = comm->GetCommId(gpu_id);
     if (idx == root) {
       auto* x = ctx.Input<LoDTensor>("X");
-      VLOG(3) << "gpu : " << gpu_id << " invoke Bcast. send " << x->numel();
+      VLOG(30) << "gpu : " << gpu_id << " invoke Bcast. send " << x->numel();
       PADDLE_ENFORCE(platform::dynload::ncclBcast(
           reinterpret_cast<void*>(const_cast<T*>(x->data<T>())), x->numel(),
           NCCLTypeWrapper<T>::type, root, comm->comms().at(idx),
           ctx.cuda_device_context().stream()));
-      VLOG(3) << "gpu : " << gpu_id << " finished Bcast.";
+      VLOG(30) << "gpu : " << gpu_id << " finished Bcast.";
     } else {
       auto* out = ctx.Output<LoDTensor>("Out");
-      VLOG(3) << "gpu : " << gpu_id << " invoke Bcast. recv buffer "
-              << framework::product(out->dims());
+      VLOG(30) << "gpu : " << gpu_id << " invoke Bcast. recv buffer "
+               << framework::product(out->dims());
       PADDLE_ENFORCE(platform::dynload::ncclBcast(
           out->mutable_data<T>(ctx.GetPlace()), out->numel(),
           NCCLTypeWrapper<T>::type, root, comm->comms().at(idx),
           ctx.cuda_device_context().stream()));
-      VLOG(3) << "gpu : " << gpu_id << " finished Bcast. recv " << out->numel();
+      VLOG(30) << "gpu : " << gpu_id << " finished Bcast. recv "
+               << out->numel();
     }
   }
 };
diff --git a/paddle/fluid/operators/nccl_op_test.cu.cc b/paddle/fluid/operators/nccl_op_test.cu.cc
index d5fb7a12e..f48ccdd97 100644
--- a/paddle/fluid/operators/nccl_op_test.cu.cc
+++ b/paddle/fluid/operators/nccl_op_test.cu.cc
@@ -86,9 +86,9 @@ class NCCLTester : public ::testing::Test {
     (*p_scopes).resize(gpu_list_.size());
 
     auto op = f::OpRegistry::CreateOp(*op1);
-    VLOG(1) << "invoke NCCLInitOp.";
+    VLOG(10) << "invoke NCCLInitOp.";
     op->Run(g_scope_, cpu_place);
-    VLOG(1) << "NCCLInitOp finished.";
+    VLOG(10) << "NCCLInitOp finished.";
   }
 
   int GetGPUData(int gpu_id) { return gpu_id + 42; }
@@ -109,7 +109,7 @@ class NCCLTester : public ::testing::Test {
 
       std::vector<T> send_vector(f::product(kDims), GetGPUData(gpu_id));
       paddle::framework::TensorFromVector<T>(send_vector, *ctx, send_tensor);
-      VLOG(1) << "Send Tensor filled with elements " << send_tensor->numel();
+      VLOG(10) << "Send Tensor filled with elements " << send_tensor->numel();
     }
 
     lk.unlock();
@@ -119,11 +119,11 @@ class NCCLTester : public ::testing::Test {
 
     auto op = f::OpRegistry::CreateOp(*op1);
 
-    VLOG(1) << "Device : " << gpu_id << " invoke " << op_desc.Type();
-    VLOG(1) << " send_tensor : " << send_tensor->numel()
-            << " recv_tensor : " << recv_tensor->numel();
+    VLOG(10) << "Device : " << gpu_id << " invoke " << op_desc.Type();
+    VLOG(10) << " send_tensor : " << send_tensor->numel()
+             << " recv_tensor : " << recv_tensor->numel();
     op->Run(*scope, place);
-    VLOG(1) << "Device : " << gpu_id << " finished " << op_desc.Type();
+    VLOG(10) << "Device : " << gpu_id << " finished " << op_desc.Type();
   }
 
  public:
diff --git a/paddle/fluid/operators/parallel_do_op.cc b/paddle/fluid/operators/parallel_do_op.cc
index ab25628d4..c795d4bdd 100644
--- a/paddle/fluid/operators/parallel_do_op.cc
+++ b/paddle/fluid/operators/parallel_do_op.cc
@@ -48,7 +48,7 @@ static void SplitTensorAndMoveTensorToScopes(
     auto lod_tensors = tensor.SplitLoDTensor(places);
 
     for (auto &lod : lod_tensors) {
-      VLOG(3) << lod.dims();
+      VLOG(30) << lod.dims();
     }
     if (num_sub_scopes == 0) {
       num_sub_scopes = lod_tensors.size();
@@ -263,7 +263,7 @@ class ParallelDoGradOp : public framework::OperatorBase {
       if (s == framework::kEmptyVarName) {
         continue;
       }
-      VLOG(3) << "Moving " << s;
+      VLOG(30) << "Moving " << s;
       CopyOrShare(*sub_scopes[0]->FindVar(s), place, scope.FindVar(s));
     }
     WaitOnPlaces(places);
@@ -277,7 +277,7 @@ class ParallelDoGradOp : public framework::OperatorBase {
       if (s == framework::kEmptyVarName) {
         continue;
       }
-      VLOG(3) << "Accumulating " << s;
+      VLOG(30) << "Accumulating " << s;
       if (s == framework::kEmptyVarName) continue;
       std::string tmp_name;
       auto *tmp = sub_scopes[0]->Var(&tmp_name);
@@ -289,7 +289,7 @@ class ParallelDoGradOp : public framework::OperatorBase {
         auto sum_op = framework::OpRegistry::CreateOp(
             "sum", {{"X", {s, tmp_name}}}, {{"Out", {s}}},
             framework::AttributeMap{{"use_mkldnn", {false}}});
-        VLOG(10) << sum_op->DebugStringEx(sub_scopes[0]);
+        VLOG(100) << sum_op->DebugStringEx(sub_scopes[0]);
         sum_op->Run(*sub_scopes[0], places[0]);
         WaitOnPlace(places[0]);
       }
@@ -316,7 +316,7 @@ class ParallelDoGradOpDescMaker : public framework::SingleGradOpDescMaker {
     auto *grad = new framework::OpDesc();
     grad->SetType("parallel_do_grad");
     for (auto &input_param : this->InputNames()) {
-      VLOG(3) << input_param;
+      VLOG(30) << input_param;
       grad->SetInput(input_param, this->Input(input_param));
       if (input_param != kPlaces) {
         grad->SetOutput(framework::GradVarName(input_param),
diff --git a/paddle/fluid/operators/prefetch_op.cc b/paddle/fluid/operators/prefetch_op.cc
index 490dfa41b..55853d254 100644
--- a/paddle/fluid/operators/prefetch_op.cc
+++ b/paddle/fluid/operators/prefetch_op.cc
@@ -48,12 +48,12 @@ class PrefetchOp : public framework::OperatorBase {
     std::vector<distributed::VarHandlePtr> rets;
     for (size_t i = 0; i < ins.size(); i++) {
       if (NeedSend(scope, ins[i])) {
-        VLOG(3) << "sending " << ins[i] << " to " << epmap[i] << " to get "
-                << outs[i] << " back";
+        VLOG(30) << "sending " << ins[i] << " to " << epmap[i] << " to get "
+                 << outs[i] << " back";
         rets.push_back(rpc_client->AsyncPrefetchVar(epmap[i], ctx, scope,
                                                     ins[i], outs[i]));
       } else {
-        VLOG(3) << "don't send no-initialied variable: " << ins[i];
+        VLOG(30) << "don't send no-initialied variable: " << ins[i];
       }
     }
     for (size_t i = 0; i < rets.size(); i++) {
diff --git a/paddle/fluid/operators/random_crop_op.h b/paddle/fluid/operators/random_crop_op.h
index d68ba9d66..5f1a48b6d 100644
--- a/paddle/fluid/operators/random_crop_op.h
+++ b/paddle/fluid/operators/random_crop_op.h
@@ -155,8 +155,8 @@ class RandomCropKernel : public framework::OpKernel<T> {
         seed = *cpu_seed.data<int64_t>();
       }
     } else {
-      VLOG(5) << "WARNING: The input 'Seed' is not initialized, use attribute "
-                 "'startup_seed' instead.";
+      VLOG(50) << "WARNING: The input 'Seed' is not initialized, use attribute "
+                  "'startup_seed' instead.";
       seed = ctx.Attr<int>("startup_seed");
     }
     auto shape = ctx.Attr<std::vector<int>>("shape");
diff --git a/paddle/fluid/operators/reader/blocking_queue.h b/paddle/fluid/operators/reader/blocking_queue.h
index 51b980acb..618248f87 100644
--- a/paddle/fluid/operators/reader/blocking_queue.h
+++ b/paddle/fluid/operators/reader/blocking_queue.h
@@ -42,7 +42,7 @@ class BlockingQueue {
     std::unique_lock<std::mutex> lock(mutex_);
     send_cv_.wait(lock, [&] { return queue_.size() < capacity_ || closed_; });
     if (closed_) {
-      VLOG(5)
+      VLOG(50)
           << "WARNING: Sending an element to a closed reader::BlokcingQueue.";
       return false;
     }
@@ -56,7 +56,7 @@ class BlockingQueue {
     std::unique_lock<std::mutex> lock(mutex_);
     send_cv_.wait(lock, [&] { return queue_.size() < capacity_ || closed_; });
     if (closed_) {
-      VLOG(5)
+      VLOG(50)
           << "WARNING: Sending an element to a closed reader::BlokcingQueue.";
       return false;
     }
diff --git a/paddle/fluid/operators/reader/create_shuffle_reader_op.cc b/paddle/fluid/operators/reader/create_shuffle_reader_op.cc
index 3f72890a7..3fe4e9e7a 100644
--- a/paddle/fluid/operators/reader/create_shuffle_reader_op.cc
+++ b/paddle/fluid/operators/reader/create_shuffle_reader_op.cc
@@ -26,7 +26,7 @@ class ShuffleReader : public framework::DecoratedReader {
   ShuffleReader(const std::shared_ptr<ReaderBase>& reader, size_t buffer_size,
                 size_t seed = 0)
       : DecoratedReader(reader), buffer_size_(buffer_size), seed_(seed) {
-    VLOG(10) << "Create shuffle reader of " << reader_;
+    VLOG(100) << "Create shuffle reader of " << reader_;
     if (seed_ == 0) {
       std::random_device device;
       seed_ = device();
@@ -37,7 +37,7 @@ class ShuffleReader : public framework::DecoratedReader {
   void ReadNextImpl(std::vector<framework::LoDTensor>* out) override {
     out->clear();
     if (iteration_pos_ >= buffer_.size()) {
-      VLOG(10) << "Resetting shuffle buffer";
+      VLOG(100) << "Resetting shuffle buffer";
       ReloadBuffer();
       if (buffer_.empty()) {
         return;
@@ -73,7 +73,7 @@ class ShuffleReader : public framework::DecoratedReader {
     std::mt19937 g(seed_);
     std::shuffle(buffer_.begin(), buffer_.end(), g);
     seed_ = g();  // update seed_;
-    VLOG(10) << "random buffer size = " << buffer_.size();
+    VLOG(100) << "random buffer size = " << buffer_.size();
   }
 
   size_t buffer_size_;
diff --git a/paddle/fluid/operators/recurrent_op.cc b/paddle/fluid/operators/recurrent_op.cc
index 162bfcbb0..283dce932 100644
--- a/paddle/fluid/operators/recurrent_op.cc
+++ b/paddle/fluid/operators/recurrent_op.cc
@@ -160,7 +160,7 @@ class RecurrentBase : public framework::OperatorBase {
                                      Callback callback) {
     PADDLE_ENFORCE_EQ(src_vars.size(), dst_vars.size());
     for (size_t i = 0; i < dst_vars.size(); ++i) {
-      VLOG(10) << "Link " << src_vars[i] << " to " << dst_vars[i];
+      VLOG(100) << "Link " << src_vars[i] << " to " << dst_vars[i];
       AccessTensor(src_scope, src_vars[i], dst_scope, dst_vars[i], callback);
     }
   }
@@ -176,7 +176,7 @@ class RecurrentBase : public framework::OperatorBase {
                                      Callback callback) {
     PADDLE_ENFORCE_EQ(src_vars.size(), dst_vars.size());
     for (size_t i = 0; i < dst_vars.size(); ++i) {
-      VLOG(10) << "Link " << src_vars[i] << " to " << dst_vars[i];
+      VLOG(100) << "Link " << src_vars[i] << " to " << dst_vars[i];
       AccessTensor(src_scope, src_vars[i], dst_scope, dst_vars[i], callback);
     }
   }
@@ -230,7 +230,7 @@ class RecurrentOp : public RecurrentBase {
   void RunImpl(const framework::Scope &scope,
                const platform::Place &place) const override {
     auto seq_len = static_cast<size_t>(this->GetSequenceLength(scope));
-    VLOG(3) << "Static RNN input sequence length = " << seq_len;
+    VLOG(30) << "Static RNN input sequence length = " << seq_len;
     StepScopes scopes = CreateStepScopes(scope, seq_len);
     auto reverse = Attr<bool>(kReverse);
 
@@ -241,7 +241,7 @@ class RecurrentOp : public RecurrentBase {
 
     for (size_t i = 0; i < seq_len; ++i) {
       size_t seq_offset = reverse ? seq_len - i - 1 : i;
-      VLOG(3) << "Recurrent operate at the time step " << seq_offset;
+      VLOG(30) << "Recurrent operate at the time step " << seq_offset;
 
       auto &cur_scope = scopes.CurScope();
 
@@ -334,7 +334,7 @@ class RecurrentGradOp : public RecurrentBase {
 
     for (size_t step_id = 0; step_id < seq_len; ++step_id) {
       size_t seq_offset = reverse ? step_id : seq_len - step_id - 1;
-      VLOG(3) << "Recurrent backward operate at the time step " << seq_offset;
+      VLOG(30) << "Recurrent backward operate at the time step " << seq_offset;
       auto &cur_scope = scopes.CurScope();
       // Link outside::output_grads --> inside::output_grads
       //   inside::output_grad = outside::output_grad[seq_offset:seq_offset+1]
@@ -348,11 +348,11 @@ class RecurrentGradOp : public RecurrentBase {
           });
       auto og_set = List2Set(Inputs(kOutputGrads));
 
-      if (VLOG_IS_ON(10)) {
+      if (VLOG_IS_ON(100)) {
         std::ostringstream sout;
         std::copy(og_set.begin(), og_set.end(),
                   std::ostream_iterator<std::string>(sout, ","));
-        VLOG(10) << " RNN output gradients = [" << sout.str() << "]";
+        VLOG(100) << " RNN output gradients = [" << sout.str() << "]";
       }
 
       // Link states
@@ -374,7 +374,7 @@ class RecurrentGradOp : public RecurrentBase {
           auto &ex_tensor =
               ex_scope.FindVar(ex_grad)->Get<framework::LoDTensor>();
 
-          VLOG(10) << " RNN link " << cur_grad << " from " << ex_grad;
+          VLOG(100) << " RNN link " << cur_grad << " from " << ex_grad;
           auto *cur_grad_var = cur_scope.Var(cur_grad);
           auto cur_grad_tensor =
               cur_grad_var->GetMutable<framework::LoDTensor>();
@@ -382,12 +382,12 @@ class RecurrentGradOp : public RecurrentBase {
         }
       }
 
-      VLOG(5) << "Recurrent memory linking finished ";
+      VLOG(50) << "Recurrent memory linking finished ";
       // Run step block with cur_scope
       executor.Run(*program, &cur_scope, block->ID(),
                    false /*create_local_scope*/);
 
-      VLOG(5) << "executor.Run finished ";
+      VLOG(50) << "executor.Run finished ";
 
       auto local_var_names = LocalVarNames(cur_scope);
 
@@ -436,7 +436,7 @@ class RecurrentGradOp : public RecurrentBase {
           cur_scope.Rename(new_inside_name, inside_grad_name);
         }
       }
-      VLOG(5) << "Accumulate Parameter finished ";
+      VLOG(50) << "Accumulate Parameter finished ";
 
       // Copy input gradient from inside to outside
       //   outside::input_grad[seq_offset: seq_offset + 1] = inside::input_grad
@@ -455,7 +455,7 @@ class RecurrentGradOp : public RecurrentBase {
             auto dst = outside->Slice(seq_offset, seq_offset + 1);
             framework::TensorCopy(inside, place, dev_ctx, &dst);
           });
-      VLOG(5) << "Link outside gradient finished ";
+      VLOG(50) << "Link outside gradient finished ";
 
       if (step_id + 1 == seq_len) {  // at_end
         // copy initialize states gradient from inside to outside
@@ -468,7 +468,7 @@ class RecurrentGradOp : public RecurrentBase {
               outside->mutable_data(place, inside.type());
               framework::TensorCopy(inside, place, dev_ctx, outside);
             });
-        VLOG(5) << "Link initialize state gradient finished ";
+        VLOG(50) << "Link initialize state gradient finished ";
       }
       scopes.Next();
     }
diff --git a/paddle/fluid/operators/recv_op.cc b/paddle/fluid/operators/recv_op.cc
index 0399ff410..fbbd86502 100644
--- a/paddle/fluid/operators/recv_op.cc
+++ b/paddle/fluid/operators/recv_op.cc
@@ -47,7 +47,7 @@ class RecvOp : public framework::OperatorBase {
 
     std::vector<distributed::VarHandlePtr> rets;
     for (size_t i = 0; i < outs.size(); i++) {
-      VLOG(3) << "getting " << outs[i] << " from " << epmap[i];
+      VLOG(30) << "getting " << outs[i] << " from " << epmap[i];
       rets.push_back(rpc_client->AsyncGetVar(epmap[i], ctx, scope, outs[i]));
     }
     if (sync_mode) {
diff --git a/paddle/fluid/operators/rnn_memory_helper_op.cc b/paddle/fluid/operators/rnn_memory_helper_op.cc
index 0fb7776fd..b840e6909 100644
--- a/paddle/fluid/operators/rnn_memory_helper_op.cc
+++ b/paddle/fluid/operators/rnn_memory_helper_op.cc
@@ -93,7 +93,7 @@ class RNNMemoryHelperGradOp : public framework::OperatorBase {
                    in_grad_var_name);
 
     if (out_grad_var == nullptr) {
-      VLOG(5) << "Using fill constant 0 as starting gradient";
+      VLOG(50) << "Using fill constant 0 as starting gradient";
       auto in_var_name = Input("X");
       auto *in_var = scope.FindVar(in_var_name);
       auto &in_var_tensor = in_var->Get<framework::LoDTensor>();
diff --git a/paddle/fluid/operators/save_op.cc b/paddle/fluid/operators/save_op.cc
index e79cffcf4..0dcf3f0e3 100644
--- a/paddle/fluid/operators/save_op.cc
+++ b/paddle/fluid/operators/save_op.cc
@@ -110,7 +110,7 @@ class SaveOp : public framework::OperatorBase {
         lt_var != nullptr,
         "Can not find variable kLookupTablePath for SaveSelectedRows");
     std::string filename = lt_var->data();
-    VLOG(4) << "SaveSelectedRows get File name: " << filename;
+    VLOG(40) << "SaveSelectedRows get File name: " << filename;
 
     MkDirRecursively(DirName(filename).c_str());
 
diff --git a/paddle/fluid/operators/send_barrier_op.cc b/paddle/fluid/operators/send_barrier_op.cc
index 8ca2877d8..02ca107ca 100644
--- a/paddle/fluid/operators/send_barrier_op.cc
+++ b/paddle/fluid/operators/send_barrier_op.cc
@@ -42,12 +42,12 @@ class SendBarrierOp : public framework::OperatorBase {
         distributed::RPCClient::GetInstance<RPCCLIENT_T>(
             Attr<int>("trainer_id"));
 
-    VLOG(3) << "SendBarrierOp sync";
+    VLOG(30) << "SendBarrierOp sync";
 
     // need to wait before sending send_barrier message
     PADDLE_ENFORCE(rpc_client->Wait(), "internal error in RPCClient");
     for (auto& ep : eps) {
-      VLOG(3) << "send barrier, ep: " << ep;
+      VLOG(30) << "send barrier, ep: " << ep;
       rpc_client->AsyncSendBatchBarrier(ep);
     }
     PADDLE_ENFORCE(rpc_client->Wait(), "internal error in RPCClient");
diff --git a/paddle/fluid/operators/send_op.cc b/paddle/fluid/operators/send_op.cc
index be1dc4bf1..0ad43d56d 100644
--- a/paddle/fluid/operators/send_op.cc
+++ b/paddle/fluid/operators/send_op.cc
@@ -50,10 +50,10 @@ class SendOp : public framework::OperatorBase {
     std::vector<distributed::VarHandlePtr> rets;
     for (size_t i = 0; i < ins.size(); i++) {
       if (NeedSend(scope, ins[i])) {
-        VLOG(3) << "sending " << ins[i] << " to " << epmap[i];
+        VLOG(30) << "sending " << ins[i] << " to " << epmap[i];
         rets.push_back(rpc_client->AsyncSendVar(epmap[i], ctx, scope, ins[i]));
       } else {
-        VLOG(3) << "don't send no-initialied variable: " << ins[i];
+        VLOG(30) << "don't send no-initialied variable: " << ins[i];
       }
     }
     if (sync_send) {
diff --git a/paddle/fluid/operators/send_recv_op_test.cc b/paddle/fluid/operators/send_recv_op_test.cc
index aee6180ad..d79b16e3c 100644
--- a/paddle/fluid/operators/send_recv_op_test.cc
+++ b/paddle/fluid/operators/send_recv_op_test.cc
@@ -120,7 +120,7 @@ void AddOp(const std::string &type, const f::VariableNameMap &inputs,
 void StartServerNet(bool is_sparse, std::atomic<bool> *initialized) {
   f::Scope scope;
   p::CPUPlace place;
-  VLOG(4) << "before init tensor";
+  VLOG(40) << "before init tensor";
   if (is_sparse) {
     InitSelectedRowsInScope(place, &scope);
   } else {
@@ -146,7 +146,7 @@ void StartServerNet(bool is_sparse, std::atomic<bool> *initialized) {
   attrs.insert({"PrefetchBlock", prefetch_block});
   attrs.insert({"grad_to_block_id", std::vector<std::string>({""})});
   attrs.insert({"sync_mode", true});
-  VLOG(4) << "before init op";
+  VLOG(40) << "before init op";
   listen_and_serv_op =
       f::OpRegistry::CreateOp("listen_and_serv", {{"X", {"x1"}}}, {}, attrs);
   *initialized = true;
diff --git a/paddle/fluid/operators/sequence_mask_op.h b/paddle/fluid/operators/sequence_mask_op.h
index 18acb735c..7ff68f9c7 100644
--- a/paddle/fluid/operators/sequence_mask_op.h
+++ b/paddle/fluid/operators/sequence_mask_op.h
@@ -127,7 +127,7 @@ class SequenceMaskKernel : public framework::OpKernel<Tx> {
     auto x_numel = x->numel();
     if (maxlen < 0) {
 #ifdef __NVCC__
-      VLOG(10)
+      VLOG(100)
           << "SequenceMaskOp on GPU may be slow when maxlen is not provided.";
       maxlen = static_cast<int>(
           thrust::reduce(thrust::device_pointer_cast(x_data),
diff --git a/paddle/fluid/operators/sgd_op.h b/paddle/fluid/operators/sgd_op.h
index d8b0165b2..2e206c963 100644
--- a/paddle/fluid/operators/sgd_op.h
+++ b/paddle/fluid/operators/sgd_op.h
@@ -98,10 +98,10 @@ class SGDOpKernel : public framework::OpKernel<T> {
 
       auto param_row_width = param.value().dims()[1];
       auto grad_row_width = grad.value().dims()[1];
-      VLOG(4) << " param rows: " << param.rows().size()
-              << " param memory rows: " << param.value().dims()[0]
-              << " grad rows: " << grad.rows().size()
-              << " grad memory rows: " << grad.value().dims()[0];
+      VLOG(40) << " param rows: " << param.rows().size()
+               << " param memory rows: " << param.value().dims()[0]
+               << " grad rows: " << grad.rows().size()
+               << " grad memory rows: " << grad.value().dims()[0];
       PADDLE_ENFORCE_EQ(param_row_width, grad_row_width,
                         "param_row should have the same size with grad_row");
 
diff --git a/paddle/fluid/operators/split_byref_op.h b/paddle/fluid/operators/split_byref_op.h
index fedd7218d..3b7ae6fc9 100644
--- a/paddle/fluid/operators/split_byref_op.h
+++ b/paddle/fluid/operators/split_byref_op.h
@@ -32,7 +32,7 @@ class SplitByrefOpKernel : public framework::OpKernel<T> {
     for (size_t i = 0; i < outs.size(); ++i) {
       // NOTE: no need to call mutable_data here to allocate memory.
       auto* out = outs[i];
-      VLOG(3) << "spliting by ref: " << row_offset << " " << out->dims()[0];
+      VLOG(30) << "spliting by ref: " << row_offset << " " << out->dims()[0];
       *out = in->Slice(row_offset, row_offset + out->dims()[0]);
       row_offset += out->dims()[0];
     }
diff --git a/paddle/fluid/operators/split_ids_op.h b/paddle/fluid/operators/split_ids_op.h
index 69ac6c5a6..a71c4791d 100644
--- a/paddle/fluid/operators/split_ids_op.h
+++ b/paddle/fluid/operators/split_ids_op.h
@@ -44,7 +44,7 @@ class SplitIdsOpKernel : public framework::OpKernel<T> {
       for (size_t i = 0; i < ids_tensors.size(); ++i) {
         batch_size += ids_tensors[i]->dims()[0];
       }
-      VLOG(4) << "Get Total BatchSize is: " << batch_size;
+      VLOG(40) << "Get Total BatchSize is: " << batch_size;
 
       std::vector<T> all_ids(batch_size);
       int offset = 0;
diff --git a/paddle/fluid/operators/sum_mkldnn_op.cc b/paddle/fluid/operators/sum_mkldnn_op.cc
index f9a16ef35..2ae5c17bf 100644
--- a/paddle/fluid/operators/sum_mkldnn_op.cc
+++ b/paddle/fluid/operators/sum_mkldnn_op.cc
@@ -186,7 +186,7 @@ class SumMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
       }
 
       if (in_dim.empty()) {
-        VLOG(3) << "WARNING: all the inputs are empty";
+        VLOG(30) << "WARNING: all the inputs are empty";
         in_dim = framework::vectorize(get_selected_row(N - 1).value().dims());
       } else {
         in_dim[0] = static_cast<int64_t>(first_dim);
diff --git a/paddle/fluid/operators/sum_op.cc b/paddle/fluid/operators/sum_op.cc
index d19ac9839..5d49bca85 100644
--- a/paddle/fluid/operators/sum_op.cc
+++ b/paddle/fluid/operators/sum_op.cc
@@ -45,7 +45,7 @@ class SumOp : public framework::OperatorWithKernel {
     size_t N = x_dims.size();
     PADDLE_ENFORCE_GT(N, 0, "Input tensors count should > 0.");
     if (N == 1) {
-      VLOG(3) << "Warning: sum have only one input, may waste memory";
+      VLOG(30) << "Warning: sum have only one input, may waste memory";
     }
 
     framework::DDim in_dim({0});
@@ -157,8 +157,8 @@ class SumOpVarTypeInference : public framework::VarTypeInference {
     auto& inputs = op_desc.Input("X");
     auto var_type = framework::proto::VarType::SELECTED_ROWS;
     for (auto& name : op_desc.Input("X")) {
-      VLOG(10) << name << " "
-               << block->FindRecursiveOrCreateVar(name).GetType();
+      VLOG(100) << name << " "
+                << block->FindRecursiveOrCreateVar(name).GetType();
     }
 
     bool any_input_is_lod_tensor = std::any_of(
diff --git a/paddle/fluid/operators/tensor_array_read_write_op.cc b/paddle/fluid/operators/tensor_array_read_write_op.cc
index a2d44284e..484160aeb 100644
--- a/paddle/fluid/operators/tensor_array_read_write_op.cc
+++ b/paddle/fluid/operators/tensor_array_read_write_op.cc
@@ -34,8 +34,8 @@ class WriteToArrayOp : public ArrayOp {
     auto *out =
         scope.FindVar(Output("Out"))->GetMutable<framework::LoDTensorArray>();
     if (offset >= out->size()) {
-      VLOG(10) << "Resize " << Output("Out") << " from " << out->size()
-               << " to " << offset + 1;
+      VLOG(100) << "Resize " << Output("Out") << " from " << out->size()
+                << " to " << offset + 1;
       out->resize(offset + 1);
     }
     auto *out_tensor = &out->at(offset);
@@ -47,9 +47,9 @@ class WriteToArrayOp : public ArrayOp {
 
       TensorCopy(x_tensor, place, dev_ctx, out_tensor);
     } else {
-      VLOG(10) << "WARNING: The input tensor 'x_tensor' holds no memory, so "
-                  "nothing has been written to output array["
-               << offset << "].";
+      VLOG(100) << "WARNING: The input tensor 'x_tensor' holds no memory, so "
+                   "nothing has been written to output array["
+                << offset << "].";
     }
   }
 };
@@ -104,7 +104,7 @@ class WriteToArrayInferVarType : public framework::VarTypeInference {
                   framework::BlockDesc *block) const override {
     auto x_name = op_desc.Input("X")[0];
     auto out_name = op_desc.Output("Out")[0];
-    VLOG(10) << "Set Variable " << out_name << " as LOD_TENSOR_ARRAY";
+    VLOG(100) << "Set Variable " << out_name << " as LOD_TENSOR_ARRAY";
     auto &out = block->FindRecursiveOrCreateVar(out_name);
     out.SetType(framework::proto::VarType::LOD_TENSOR_ARRAY);
     auto *x = block->FindVarRecursive(x_name);
@@ -139,7 +139,7 @@ class ReadFromArrayOp : public ArrayOp {
       framework::TensorCopy(x_array[offset], place, dev_ctx, out_tensor);
       out_tensor->set_lod(x_array[offset].lod());
     } else {
-      VLOG(10) << "offset " << offset << " >= " << x_array.size();
+      VLOG(100) << "offset " << offset << " >= " << x_array.size();
     }
   }
 };
diff --git a/paddle/fluid/operators/tensorrt_engine_op.h b/paddle/fluid/operators/tensorrt_engine_op.h
index 673f86da7..2f3d75e32 100644
--- a/paddle/fluid/operators/tensorrt_engine_op.h
+++ b/paddle/fluid/operators/tensorrt_engine_op.h
@@ -34,7 +34,7 @@ namespace operators {
 using FluidDT = framework::proto::VarType_Type;
 using TRT_DT = nvinfer1::DataType;
 
-namespace {
+namespace details {
 
 TRT_DT FluidDataType2TRT(FluidDT type) {
   switch (type) {
@@ -60,7 +60,7 @@ nvinfer1::Dims Vec2TRT_Dims(const std::vector<int64_t>& shape) {
   return nvinfer1::DimsCHW(shape[1], 1, 1);
 }
 
-}  // namespace
+}  // namespace details
 
 using inference::Singleton;
 using inference::tensorrt::TRT_EngineManager;
@@ -127,9 +127,9 @@ class TensorRTEngineKernel : public framework::OpKernel<T> {
 
     // Convert output tensor from engine to fluid
     int output_index = 0;
-    VLOG(4) << "TensorRT Engine Op Outputs:";
+    VLOG(40) << "TensorRT Engine Op Outputs:";
     for (const auto& y : context.Outputs("Ys")) {
-      VLOG(4) << y;
+      VLOG(40) << y;
       // convert output and copy to fluid.
       nvinfer1::ITensor* trt_t = engine->GetITensor(output_maps[output_index]);
       auto dims = trt_t->getDimensions();
@@ -167,7 +167,7 @@ class TensorRTEngineKernel : public framework::OpKernel<T> {
 
  protected:
   void Prepare(const framework::ExecutionContext& context) const {
-    VLOG(4) << "Prepare engine";
+    VLOG(40) << "Prepare engine";
     // Get the ProgramDesc and pass to convert.
     framework::proto::BlockDesc block_desc;
     block_desc.ParseFromString(context.Attr<std::string>("subgraph"));
@@ -192,12 +192,12 @@ class TensorRTEngineKernel : public framework::OpKernel<T> {
     engine->InitNetwork();
 
     framework::BlockDesc block(nullptr /*programdesc*/, &block_desc);
-    VLOG(4) << "parsed var size " << block.AllVars().size();
+    VLOG(40) << "parsed var size " << block.AllVars().size();
     // Add inputs
-    VLOG(4) << "declare inputs";
+    VLOG(40) << "declare inputs";
     for (auto& input : context.Inputs("Xs")) {
       if (parameters.count(input)) continue;
-      VLOG(4) << "declare input " << input;
+      VLOG(40) << "declare input " << input;
       auto* var = block.FindVar(input);
       // TensorRT engine need to create parameters. The parameter's description
       // should be set in
diff --git a/paddle/fluid/operators/while_op.cc b/paddle/fluid/operators/while_op.cc
index 3c8a01b6e..aa6af055d 100644
--- a/paddle/fluid/operators/while_op.cc
+++ b/paddle/fluid/operators/while_op.cc
@@ -129,15 +129,15 @@ class WhileGradOp : public framework::OperatorBase {
 
     for (auto cur_scope_iter = step_scopes->rbegin();
          cur_scope_iter != step_scopes->rend(); ++cur_scope_iter) {
-      VLOG(3) << "Start backward at time_step "
-              << cur_scope_iter - step_scopes->rbegin();
+      VLOG(30) << "Start backward at time_step "
+               << cur_scope_iter - step_scopes->rbegin();
       framework::Scope &cur_scope = **cur_scope_iter;
       // Link OG from outside to inside
       for (size_t i = 0; i < outside_og_names.size(); ++i) {
         auto outside_og_name = outside_og_names[i];
         auto inside_og_name = inside_og_names[i];
-        VLOG(8) << "Linking outside " << outside_og_name << " --> inside "
-                << inside_og_name;
+        VLOG(80) << "Linking outside " << outside_og_name << " --> inside "
+                 << inside_og_name;
         if (scope.FindVar(outside_og_name) == nullptr) {
           continue;
         }
@@ -159,11 +159,11 @@ class WhileGradOp : public framework::OperatorBase {
           auto &outside_array = og_outside.Get<framework::LoDTensorArray>();
           auto &inside_array =
               detail::Ref(og_inside.GetMutable<framework::LoDTensorArray>());
-          VLOG(8) << outside_og_name << " size = " << outside_array.size();
+          VLOG(80) << outside_og_name << " size = " << outside_array.size();
           inside_array.resize(outside_array.size());
 
           for (size_t j = 0; j < inside_array.size(); ++j) {
-            VLOG(8) << j << " " << outside_array[j].numel();
+            VLOG(80) << j << " " << outside_array[j].numel();
             if (outside_array[j].numel() != 0) {
               inside_array[j].set_lod(outside_array[j].lod());
               inside_array[j].ShareDataWith(outside_array[j]);
@@ -289,7 +289,7 @@ class WhileGradOpDescMaker : public framework::SingleGradOpDescMaker {
     auto igs = InputGrad(kX, /*do not drop empty gradient*/ false);
     for (auto &each_ig : igs) {
       if (inner_op_outputs.find(each_ig) == inner_op_outputs.end()) {
-        VLOG(8) << "Ignore " << each_ig;
+        VLOG(80) << "Ignore " << each_ig;
         each_ig = framework::kEmptyVarName;
       }
     }
@@ -353,8 +353,8 @@ class WhileGradOpVarTypeInference : public framework::VarTypeInference {
       auto &p_var = detail::Ref(block->FindVarRecursive(p_names[i]));
       auto *g_var = block->FindVarRecursive(pg_ig_names[i]);
       if (g_var != nullptr) {  // Gradient could be @EMPTY@
-        VLOG(5) << "Setting " << pg_ig_names[i] << " following " << p_names[i]
-                << " type: " << p_var.GetType();
+        VLOG(50) << "Setting " << pg_ig_names[i] << " following " << p_names[i]
+                 << " type: " << p_var.GetType();
         g_var->SetType(p_var.GetType());
         g_var->SetDataType(p_var.GetDataType());
       }
diff --git a/paddle/fluid/platform/device_tracer.cc b/paddle/fluid/platform/device_tracer.cc
index dc1d75114..ea4564058 100644
--- a/paddle/fluid/platform/device_tracer.cc
+++ b/paddle/fluid/platform/device_tracer.cc
@@ -203,7 +203,7 @@ class DeviceTracerImpl : public DeviceTracer {
   void AddCPURecords(const std::string &anno, uint64_t start_ns,
                      uint64_t end_ns, int64_t device_id, int64_t thread_id) {
     if (anno.empty()) {
-      VLOG(1) << "Empty timeline annotation.";
+      VLOG(10) << "Empty timeline annotation.";
       return;
     }
     std::lock_guard<std::mutex> l(trace_mu_);
@@ -216,7 +216,7 @@ class DeviceTracerImpl : public DeviceTracer {
                      uint32_t correlation_id, uint64_t bytes) {
     // 0 means timestamp information could not be collected for the kernel.
     if (start_ns == 0 || end_ns == 0) {
-      VLOG(3) << name << " cannot be traced";
+      VLOG(30) << name << " cannot be traced";
       return;
     }
     std::lock_guard<std::mutex> l(trace_mu_);
@@ -228,7 +228,7 @@ class DeviceTracerImpl : public DeviceTracer {
                         int64_t stream_id, uint32_t correlation_id) {
     // 0 means timestamp information could not be collected for the kernel.
     if (start == 0 || end == 0) {
-      VLOG(3) << correlation_id << " cannot be traced";
+      VLOG(30) << correlation_id << " cannot be traced";
       return;
     }
     std::lock_guard<std::mutex> l(trace_mu_);
@@ -347,7 +347,7 @@ class DeviceTracerImpl : public DeviceTracer {
         tracer->AddAnnotation(cbInfo->correlationId, anno);
       }
     } else {
-      VLOG(1) << "Unhandled API Callback for " << domain << " " << cbid;
+      VLOG(10) << "Unhandled API Callback for " << domain << " " << cbid;
     }
   }
   CUpti_SubscriberHandle subscriber_;
diff --git a/paddle/fluid/platform/dynload/dynamic_loader.cc b/paddle/fluid/platform/dynload/dynamic_loader.cc
index cc5cda610..d53907b74 100644
--- a/paddle/fluid/platform/dynload/dynamic_loader.cc
+++ b/paddle/fluid/platform/dynload/dynamic_loader.cc
@@ -72,8 +72,8 @@ static inline std::string join(const std::string& part1,
 
 static inline void* GetDsoHandleFromDefaultPath(const std::string& dso_path,
                                                 int dynload_flags) {
-  VLOG(3) << "Try to find library: " << dso_path
-          << " from default system path.";
+  VLOG(30) << "Try to find library: " << dso_path
+           << " from default system path.";
   // default search from LD_LIBRARY_PATH/DYLD_LIBRARY_PATH
   // and /usr/local/lib path
   void* dso_handle = dlopen(dso_path.c_str(), dynload_flags);
diff --git a/paddle/fluid/platform/gpu_info.cc b/paddle/fluid/platform/gpu_info.cc
index 8fff9844d..c78f159ad 100644
--- a/paddle/fluid/platform/gpu_info.cc
+++ b/paddle/fluid/platform/gpu_info.cc
@@ -124,8 +124,8 @@ size_t GpuMaxChunkSize() {
   size_t available = 0;
 
   GpuMemoryUsage(&available, &total);
-  VLOG(10) << "GPU Usage " << available / 1024 / 1024 << "M/"
-           << total / 1024 / 1024 << "M";
+  VLOG(100) << "GPU Usage " << available / 1024 / 1024 << "M/"
+            << total / 1024 / 1024 << "M";
   size_t reserving = static_cast<size_t>(0.05 * total);
   // If available less than minimum chunk size, no usable memory exists.
   available =
diff --git a/paddle/fluid/platform/init.cc b/paddle/fluid/platform/init.cc
index 2211e5504..4cbfe0a69 100644
--- a/paddle/fluid/platform/init.cc
+++ b/paddle/fluid/platform/init.cc
@@ -45,7 +45,7 @@ void InitGflags(std::vector<std::string> argv) {
       line += ' ';
     }
     google::ParseCommandLineFlags(&argc, &arr, true);
-    VLOG(1) << "Init commandline: " << line;
+    VLOG(10) << "Init commandline: " << line;
   });
 }
 
diff --git a/paddle/fluid/platform/nccl_helper.h b/paddle/fluid/platform/nccl_helper.h
index 115abb98d..40af1f952 100644
--- a/paddle/fluid/platform/nccl_helper.h
+++ b/paddle/fluid/platform/nccl_helper.h
@@ -112,7 +112,7 @@ struct NCCLContextMap {
         NCCLGroupGuard gurad;
         for (auto &gpu_id : order_) {
           int rank = trainer_id * order_.size() + gpu_id;
-          VLOG(3) << "init nccl rank: " << rank << " nranks: " << nranks;
+          VLOG(30) << "init nccl rank: " << rank << " nranks: " << nranks;
           PADDLE_ENFORCE(cudaSetDevice(gpu_id));
           PADDLE_ENFORCE(platform::dynload::ncclCommInitRank(
               comms.get() + gpu_id, nranks, *nccl_id, rank));
diff --git a/paddle/fluid/pybind/protobuf.cc b/paddle/fluid/pybind/protobuf.cc
index d3b0d4a22..586e92c2b 100644
--- a/paddle/fluid/pybind/protobuf.cc
+++ b/paddle/fluid/pybind/protobuf.cc
@@ -61,9 +61,9 @@ struct variant_caster<V<Ts...>> {
       if (std::is_same<T, std::vector<float>>::value) {
         auto caster_ints = make_caster<std::vector<int64_t>>();
         if (caster_ints.load(src, convert)) {
-          VLOG(4) << "This value are floats and int64_ts satisfy "
-                     "simultaneously, will set it's type to "
-                     "std::vector<int64_t>";
+          VLOG(40) << "This value are floats and int64_ts satisfy "
+                      "simultaneously, will set it's type to "
+                      "std::vector<int64_t>";
           value = cast_op<std::vector<int64_t>>(caster_ints);
           return true;
         }
diff --git a/paddle/fluid/train/demo/demo_trainer.cc b/paddle/fluid/train/demo/demo_trainer.cc
index a0757b53f..ac1ac8e7c 100644
--- a/paddle/fluid/train/demo/demo_trainer.cc
+++ b/paddle/fluid/train/demo/demo_trainer.cc
@@ -40,7 +40,7 @@ void ReadBinaryFile(const std::string& filename, std::string* contents) {
 
 std::unique_ptr<paddle::framework::ProgramDesc> Load(
     paddle::framework::Executor* executor, const std::string& model_filename) {
-  VLOG(3) << "loading model from " << model_filename;
+  VLOG(30) << "loading model from " << model_filename;
   std::string program_desc_str;
   ReadBinaryFile(model_filename, &program_desc_str);
 
diff --git a/paddle/testing/TestUtil.cpp b/paddle/testing/TestUtil.cpp
index fa8efc20f..fa1888966 100644
--- a/paddle/testing/TestUtil.cpp
+++ b/paddle/testing/TestUtil.cpp
@@ -118,7 +118,7 @@ void generateSequenceStartPositions(size_t batchSize,
     }
     buf[i] = pos;
     pos += len;
-    VLOG(1) << " len=" << len;
+    VLOG(10) << " len=" << len;
   }
   buf[numSeqs] = batchSize;
 }
-- 
GitLab