diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt
index 0b8cfab573f2771d6845df2d2768c8f6e6634043..0b4c6db6f98d8d73b362d3c98f52a3914a031c68 100644
--- a/paddle/fluid/framework/CMakeLists.txt
+++ b/paddle/fluid/framework/CMakeLists.txt
@@ -56,7 +56,7 @@ cc_test(op_proto_maker_test SRCS op_proto_maker_test.cc DEPS op_proto_maker)
 cc_library(op_info SRCS op_info.cc DEPS attribute framework_proto)
 cc_library(shape_inference SRCS shape_inference.cc DEPS ddim attribute device_context)
 cc_library(operator SRCS operator.cc DEPS op_info device_context tensor scope glog
-    shape_inference data_transform lod_tensor)
+    shape_inference data_transform lod_tensor profiler)
 cc_test(operator_test SRCS operator_test.cc DEPS operator op_registry init)
 cc_library(proto_desc SRCS var_desc.cc op_desc.cc block_desc.cc program_desc.cc DEPS shape_inference op_info operator glog)
 
@@ -80,7 +80,7 @@ cc_library(lod_rank_table SRCS lod_rank_table.cc DEPS lod_tensor)
 cc_library(feed_fetch_method SRCS feed_fetch_method.cc DEPS lod_tensor scope glog)
 
 cc_library(executor SRCS executor.cc DEPS op_registry device_context scope
-framework_proto backward glog lod_rank_table profiler feed_fetch_method)
+framework_proto backward glog lod_rank_table feed_fetch_method)
 
 cc_library(prune SRCS prune.cc DEPS framework_proto)
 cc_test(prune_test SRCS prune_test.cc DEPS op_info prune recurrent_op device_context)
diff --git a/paddle/fluid/framework/executor.cc b/paddle/fluid/framework/executor.cc
index d3155d33d0b461c9a3889ed8ae2ad9ee400a60fe..961e3e22f278d6e0346defd90190c53fd31ede08 100644
--- a/paddle/fluid/framework/executor.cc
+++ b/paddle/fluid/framework/executor.cc
@@ -25,7 +25,6 @@ limitations under the License. */
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/framework/reader.h"
 #include "paddle/fluid/platform/place.h"
-#include "paddle/fluid/platform/profiler.h"
 
 DECLARE_bool(benchmark);
 DEFINE_bool(check_nan_inf, false,
@@ -126,11 +125,6 @@ void Executor::Run(const ProgramDesc& pdesc, Scope* scope, int block_id,
   for (auto& op_desc : block.AllOps()) {
     auto op = paddle::framework::OpRegistry::CreateOp(*op_desc);
 
-    platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
-    // TODO(panyx0718): Need a program id to distinguish programs.
-    platform::RecordEvent record_event(op->Type(), pool.Get(place_),
-                                       op_desc->Block()->ID());
-
     VLOG(3) << place_ << " " << op->DebugStringEx(local_scope);
     op->Run(*local_scope, place_);
 
diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc
index 7debdd8525741ea4ae93fe7bff7b5817373fd7ce..ac6289c5abe8f40ae9ee32aa3d58cdef3ff0e836 100644
--- a/paddle/fluid/framework/operator.cc
+++ b/paddle/fluid/framework/operator.cc
@@ -21,6 +21,7 @@ limitations under the License. */
 #include "paddle/fluid/framework/operator.h"
 #include "paddle/fluid/framework/shape_inference.h"
 #include "paddle/fluid/framework/var_type.h"
+#include "paddle/fluid/platform/profiler.h"
 
 DECLARE_bool(benchmark);
 
@@ -497,7 +498,8 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
   this->InferShape(&infer_shape_ctx);
   platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
   auto dev_ctx = pool.Get(place);
-
+  // profile
+  platform::RecordEvent record_event(Type(), dev_ctx);
   // check if op[type] has kernel registered.
   auto& all_op_kernels = AllOpKernels();
   auto kernels_iter = all_op_kernels.find(type_);
diff --git a/paddle/fluid/platform/profiler.cc b/paddle/fluid/platform/profiler.cc
index 201fc872946b70e3d7fbc318c8b04781056279b9..0076762d2f8c3840497ad354234680c4e41607cb 100644
--- a/paddle/fluid/platform/profiler.cc
+++ b/paddle/fluid/platform/profiler.cc
@@ -132,21 +132,19 @@ void PopEvent(const std::string& name, const DeviceContext* dev_ctx) {
   GetEventList().Record(EventKind::kPopRange, name, g_thread_id, dev_ctx);
 }
 
-RecordEvent::RecordEvent(const std::string& name, const DeviceContext* dev_ctx,
-                         int32_t block_id) {
+RecordEvent::RecordEvent(const std::string& name,
+                         const DeviceContext* dev_ctx) {
   if (g_state == ProfilerState::kDisabled) return;
   dev_ctx_ = dev_ctx;
   name_ = name;
   PushEvent(name_, dev_ctx_);
-
-  full_name_ = string::Sprintf("%s_b%d", name, block_id);
   // Maybe need the same push/pop behavior.
-  SetCurAnnotation(full_name_.c_str());
+  SetCurAnnotation(name_.c_str());
 }
 
 RecordEvent::~RecordEvent() {
-  ClearCurAnnotation();
   if (g_state == ProfilerState::kDisabled) return;
+  ClearCurAnnotation();
   PopEvent(name_, dev_ctx_);
 }
 
diff --git a/paddle/fluid/platform/profiler.h b/paddle/fluid/platform/profiler.h
index 830b86c88ee11b217114c95348c2d25d0dcdf961..775edb85c0c3b4b6d2cf2e86c527af0722c72fea 100644
--- a/paddle/fluid/platform/profiler.h
+++ b/paddle/fluid/platform/profiler.h
@@ -104,8 +104,7 @@ void PushEvent(const std::string& name, const DeviceContext* dev_ctx);
 void PopEvent(const std::string& name, const DeviceContext* dev_ctx);
 
 struct RecordEvent {
-  RecordEvent(const std::string& name, const DeviceContext* dev_ctx,
-              int32_t block_id);
+  RecordEvent(const std::string& name, const DeviceContext* dev_ctx);
 
   ~RecordEvent();
 
diff --git a/paddle/fluid/platform/profiler_test.cc b/paddle/fluid/platform/profiler_test.cc
index 8bc480857a4c3ae2825f08a8d9ed9c152adb80d4..dae4d2206e0a1ec6ef99122460a15c064efe58fd 100644
--- a/paddle/fluid/platform/profiler_test.cc
+++ b/paddle/fluid/platform/profiler_test.cc
@@ -95,7 +95,7 @@ TEST(RecordEvent, RecordEvent) {
    */
   for (int i = 1; i < 5; ++i) {
     std::string name = "evs_op_" + std::to_string(i);
-    RecordEvent record_event(name, dev_ctx, 0);
+    RecordEvent record_event(name, dev_ctx);
     int counter = 1;
     while (counter != i * 1000) counter++;
   }