diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc
index bcd586e8c099c81fb58f3fcc3c221858ab1ce511..df773c044bf118e2b381c4281c8e5039360aeef9 100644
--- a/paddle/fluid/framework/operator.cc
+++ b/paddle/fluid/framework/operator.cc
@@ -167,7 +167,13 @@ void OperatorBase::Run(const Scope& scope, const platform::Place& place) {
     }
 
     {
-      platform::RecordEvent record_event(Type());
+      // TODO(wangchaochaohu) : refine code to use only one RecordEvent)
+      // in order to record different op type cost time
+      // and different op name cost time,we set two event.
+      platform::RecordEvent op_type_record_event(Type());
+      auto op_name = platform::OpName(outputs_, Type());
+      platform::RecordEvent op_name_record_event(
+          op_name, platform::RecordRole::kUniqueOp);
       RunImpl(scope, place);
     }
 
@@ -950,7 +956,8 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
   std::vector<std::string> transfered_inplace_vars;
   Scope* transfer_scope = nullptr;
   {
-    platform::RecordEvent record_event("prepare_data_inner_op");
+    platform::RecordEvent record_event("prepare_data",
+                                       platform::RecordRole::kInnerOp);
     transfer_scope = PrepareData(scope, *kernel_type_, &transfered_inplace_vars,
                                  runtime_ctx);
   }
@@ -963,7 +970,8 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
   }
 
   if (!all_kernels_must_compute_runtime_shape_) {
-    platform::RecordEvent record_event("infer_shape_inner_op");
+    platform::RecordEvent record_event("infer_shape",
+                                       platform::RecordRole::kInnerOp);
     RuntimeInferShapeContext infer_shape_ctx(*this, *runtime_ctx);
     this->InferShape(&infer_shape_ctx);
   }
@@ -975,7 +983,8 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
   // TODO(panyx0718): ExecutionContext should only depend on RuntimeContext
   // not Scope. Imperative mode only pass inputs and get outputs.
   {
-    platform::RecordEvent record_event("compute_inner_op");
+    platform::RecordEvent record_event("compute",
+                                       platform::RecordRole::kInnerOp);
     (*kernel_func_)(ExecutionContext(*this, exec_scope, *dev_ctx, *runtime_ctx,
                                      kernel_configs));
   }
diff --git a/paddle/fluid/platform/profiler.cc b/paddle/fluid/platform/profiler.cc
index 2e89dbf8fdd153a2e16594ed27a3e9691193ee1e..07659e596e0fc426ff28c0b2bf5d49f4c8606028 100644
--- a/paddle/fluid/platform/profiler.cc
+++ b/paddle/fluid/platform/profiler.cc
@@ -42,6 +42,7 @@ static int64_t profiler_lister_id = 0;
 static bool should_send_profile_state = false;
 std::mutex profiler_mu;
 
+static TracerOption g_tracer_option = TracerOption::kDefault;
 // The profiler state, the initial value is ProfilerState::kDisabled
 static ProfilerState g_state = ProfilerState::kDisabled;
 // The thread local event list only can be accessed by the specific thread
@@ -137,11 +138,15 @@ void PopEvent(const std::string &name) {
   GetEventList().Record(EventType::kPopRange, name, g_thread_id);
 }
 
-RecordEvent::RecordEvent(const std::string &name)
-    : is_enabled_(false), start_ns_(PosixInNsec()) {
+RecordEvent::RecordEvent(const std::string &name, const RecordRole role)
+    : is_enabled_(false), start_ns_(PosixInNsec()), role_(role) {
   if (g_state == ProfilerState::kDisabled || name.empty()) return;
+  if ((g_tracer_option == TracerOption::kOpDetail &&
+       role_ != RecordRole::kInnerOp) ||
+      (g_tracer_option == TracerOption::kDefault &&
+       role != RecordRole::kOrdinary))
+    return;
   // lock is not needed, the code below is thread-safe
-
   is_enabled_ = true;
   Event *e = PushEvent(name);
   // Maybe need the same push/pop behavior.
@@ -363,30 +368,21 @@ void PrintProfiler(const std::vector<std::vector<EventItem>> &events_table,
               << "Max." << std::setw(data_width) << "Ave."
               << std::setw(data_width) << "Ratio." << std::endl;
   }
-  if (print_depth >= 100) return;
-  int print_depth_next = print_depth;
+
+  if (events_table.size() <= 0) return;
+
   for (size_t i = 0; i < events_table.size(); ++i) {
     for (size_t j = 0; j < events_table[i].size(); ++j) {
       auto event_item = events_table[i][j];
       std::vector<std::vector<EventItem>> child_table;
       std::vector<EventItem> table;
-      bool do_next = false;
-      std::string op_end_str = "inner_op";
       for (auto it = child_map.begin(); it != child_map.end(); it++) {
         if (it->first == event_item.name) {
           table.push_back(it->second);
-          if (!do_next)
-            do_next = !(it->second.name.rfind(op_end_str) ==
-                        (it->second.name.length() - op_end_str.length()));
         }
       }
       child_table.push_back(table);
 
-      if (do_next)
-        print_depth_next = print_depth + 1;
-      else
-        print_depth_next = print_depth + 100;
-
       auto name_len = event_item.name.length();
       std::string print_name = event_item.name.substr(remove_len, name_len);
       std::string delimiter;
@@ -394,10 +390,7 @@ void PrintProfiler(const std::vector<std::vector<EventItem>> &events_table,
         delimiter = "  " + delimiter;
       }
       print_name = delimiter + print_name;
-      size_t pos = 0;
-      while ((pos = print_name.find(op_end_str)) != std::string::npos) {
-        print_name.erase(pos, op_end_str.length());
-      }
+
       std::cout << std::setw(name_width) << print_name << std::setw(data_width)
                 << event_item.calls << std::setw(data_width)
                 << event_item.total_time;
@@ -416,14 +409,14 @@ void PrintProfiler(const std::vector<std::vector<EventItem>> &events_table,
                 << std::setw(data_width) << event_item.ave_time
                 << std::setw(data_width) << event_item.ratio << std::endl;
       PrintProfiler(child_table, child_map, sorted_domain, name_width,
-                    data_width, merge_thread, print_depth_next, 0);
+                    data_width, merge_thread, print_depth + 1, 0);
     }
   }
 }
 
 std::function<bool(const EventItem &, const EventItem &)> SetSortedFunc(
     EventSortingKey sorted_by, std::string *domain) {
-  auto sorted_domain = *domain;
+  std::string sorted_domain;
   std::function<bool(const EventItem &, const EventItem &)> sorted_func;
   switch (sorted_by) {
     case EventSortingKey::kCalls:
@@ -471,6 +464,7 @@ std::function<bool(const EventItem &, const EventItem &)> SetSortedFunc(
     default:
       sorted_domain = "event first end time";
   }
+  *domain = sorted_domain;
   return sorted_func;
 }
 
@@ -709,6 +703,45 @@ void ParseMemEvents(const std::vector<std::vector<MemEvent>> &events) {
   PrintMemProfiler(annotation_report, 55, 18);
 }
 
+void DealWithShowName() {
+  std::unordered_map<std::string, int> prefix_name;
+  std::vector<std::string> op_out_name;
+  for (auto it = g_all_event_lists.begin(); it != g_all_event_lists.end();
+       ++it) {
+    for (auto &block : (*it)->event_blocks) {
+      for (auto &r : block) {
+        auto event_name = r.name();
+        size_t start = event_name.find('%', 0);
+        size_t end = event_name.find('%', start + 1);
+        std::string prefix_str = event_name.substr(0, start);
+        while (start != std::string::npos && end != std::string::npos) {
+          auto search_str = event_name.substr(start, end - start + 1);
+          auto it = find(op_out_name.begin(), op_out_name.end(), search_str);
+          std::string replace_str;
+          bool prefix_find = true;
+          if (prefix_name.find(prefix_str) == prefix_name.end()) {
+            prefix_find = false;
+            prefix_name[prefix_str] = 0;
+          }
+
+          if (it == op_out_name.end()) {
+            if (prefix_find)
+              prefix_name[prefix_str] = prefix_name[prefix_str] + 1;
+            op_out_name.push_back(search_str);
+          }
+          replace_str = std::to_string(prefix_name[prefix_str]);
+          event_name.replace(start, end - start + 1, replace_str);
+          start = start + 1;
+          start = event_name.find('%', start);
+          end = event_name.find('%', start + 1);
+          prefix_str = event_name.substr(0, start);
+        }
+        r.set_name(event_name);
+      }
+    }
+  }
+}
+
 void DisableProfiler(EventSortingKey sorted_key,
                      const std::string &profile_path) {
   SynchronizeAllDevice();
@@ -718,37 +751,17 @@ void DisableProfiler(EventSortingKey sorted_key,
   if (g_state == ProfilerState::kDisabled) return;
   // Mark the profiling stop.
   Mark("_stop_profiler_");
+  DealWithShowName();
 
   DeviceTracer *tracer = GetDeviceTracer();
   if (tracer->IsEnabled()) {
     tracer->Disable();
-    tracer->GenProfile(profile_path);
     tracer->GenEventKernelCudaElapsedTime();
+    tracer->GenProfile(profile_path);
   }
 
   std::vector<std::vector<Event>> all_events = GetAllEvents();
 
-  std::vector<std::string> op_name;
-  for (size_t i = 0; i < (all_events).size(); i++) {
-    for (size_t j = 0; j < (all_events)[i].size(); j++) {
-      std::string event_name = (all_events)[i][j].name();
-      size_t start = event_name.find('%', 0);
-      size_t end = event_name.rfind('%', event_name.length() - 1);
-      if (start == std::string::npos || end == std::string::npos) continue;
-      std::string search_str = event_name.substr(start, end - start + 1);
-      auto it = find(op_name.begin(), op_name.end(), search_str);
-      std::string replace_str;
-      if (it != op_name.end()) {
-        replace_str = std::to_string(std::distance(op_name.begin(), it));
-      } else {
-        replace_str = std::to_string(op_name.size());
-        op_name.push_back(search_str);
-      }
-      event_name.replace(start, end - start + 1, replace_str);
-      (all_events)[i][j].set_name(event_name);
-    }
-  }
-
   ParseEvents(all_events, true, sorted_key);
   ParseEvents(all_events, false, sorted_key);
   if (VLOG_IS_ON(5)) {
@@ -773,5 +786,30 @@ void SetProfileListener() {
 }
 int64_t ListenerId() { return profiler_lister_id; }
 
+std::string OpName(const framework::VariableNameMap &name_map,
+                   const std::string &type_name) {
+  if (platform::GetTracerOption() != platform::TracerOption::kAllOpDetail)
+    return "";
+
+  std::string ret = type_name + "%";
+  for (auto it = name_map.begin(); it != name_map.end(); it++) {
+    auto name_outputs = it->second;
+    if (!name_outputs.empty() &&
+        type_name.length() < name_outputs[0].length()) {
+      ret = ret + name_outputs[0];
+      break;
+    }
+  }
+  ret = ret + "%";
+
+  return ret;
+}
+
+void SetTracerOption(TracerOption option) {
+  std::lock_guard<std::mutex> l(profiler_mu);
+  g_tracer_option = option;
+}
+
+platform::TracerOption GetTracerOption() { return g_tracer_option; }
 }  // namespace platform
 }  // namespace paddle
diff --git a/paddle/fluid/platform/profiler.h b/paddle/fluid/platform/profiler.h
index 4550b99a064a3950554ade662b768d1d147d729e..eb135475b5d1d34b9f814e81e25c8040c0905813 100644
--- a/paddle/fluid/platform/profiler.h
+++ b/paddle/fluid/platform/profiler.h
@@ -23,6 +23,7 @@ limitations under the License. */
 #include <unordered_set>
 #include <utility>
 #include <vector>
+#include "paddle/fluid/framework/type_defs.h"
 #include "paddle/fluid/platform/enforce.h"
 #include "paddle/fluid/platform/event.h"
 #include "paddle/fluid/platform/place.h"
@@ -39,6 +40,19 @@ enum class ProfilerState {
   kAll,       // Profile both CPU and GPU. (Currently experimental).
 };
 
+enum class RecordRole {
+  kOrdinary,  // only record op time with op type key
+  kInnerOp,   // record op detail time with op type key
+  kUniqueOp,  // record op detail time with op unique name key
+};
+
+// it is the flag to control to print the profiling result
+enum class TracerOption {
+  kDefault,      // print the different op type profiling result
+  kOpDetail,     // print the detail profiling result of different op type
+  kAllOpDetail,  // print the detail profiling result of different op name
+};
+
 void Mark(const std::string& name);
 
 void PushMemEvent(uint64_t start_ns, uint64_t end_ns, size_t bytes,
@@ -79,7 +93,8 @@ Event* PushEvent(const std::string& name);
 void PopEvent(const std::string& name);
 
 struct RecordEvent {
-  explicit RecordEvent(const std::string& name);
+  RecordEvent(const std::string& name,
+              const RecordRole r_type = RecordRole::kOrdinary);
 
   ~RecordEvent();
 
@@ -90,6 +105,7 @@ struct RecordEvent {
   // Need to distinguish name by op type, block_id, program_id and perhaps
   // different kernel invocations within an op.
   std::string full_name_;
+  RecordRole role_{RecordRole::kOrdinary};
 };
 
 class RecordRPCEvent {
@@ -181,6 +197,10 @@ bool ShouldSendProfileState();
 void SetProfileListener();
 int64_t ListenerId();
 
+std::string OpName(const framework::VariableNameMap& name_map,
+                   const std::string& type_name);
+void SetTracerOption(TracerOption option);
+platform::TracerOption GetTracerOption();
 #ifdef PADDLE_WITH_CUDA
 void DummyKernelAndEvent();
 #endif
diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc
index fc35d887865c8490ab8a53bd60ce4e5ebd75c0a8..c080d9219fd6bed9edb7c3325adbffb27166e4bf 100644
--- a/paddle/fluid/pybind/pybind.cc
+++ b/paddle/fluid/pybind/pybind.cc
@@ -1555,6 +1555,12 @@ All parameter, weight, gradient are variables in Paddle.
 #endif
 #endif
 
+  py::enum_<platform::TracerOption>(m, "TracerOption", py::arithmetic())
+      .value("kDefault", platform::TracerOption::kDefault)
+      .value("kOpDetail", platform::TracerOption::kOpDetail)
+      .value("kAllOpDetail", platform::TracerOption::kAllOpDetail)
+      .export_values();
+
   py::enum_<platform::ProfilerState>(m, "ProfilerState", py::arithmetic())
       .value("kDisabled", platform::ProfilerState::kDisabled)
       .value("kCPU", platform::ProfilerState::kCPU)
@@ -1571,6 +1577,7 @@ All parameter, weight, gradient are variables in Paddle.
       .value("kAve", platform::EventSortingKey::kAve)
       .export_values();
 
+  m.def("set_tracer_option", platform::SetTracerOption);
   m.def("enable_profiler", platform::EnableProfiler);
   m.def("disable_profiler", platform::DisableProfiler);
   m.def("is_profiler_enabled", platform::IsProfileEnabled);
diff --git a/python/paddle/fluid/profiler.py b/python/paddle/fluid/profiler.py
index 82b29e25fde11ed488440fb1486b954144ea7cc7..c0b0c86ecd2b30be82d46cf3af7bcd13045cff6e 100644
--- a/python/paddle/fluid/profiler.py
+++ b/python/paddle/fluid/profiler.py
@@ -126,7 +126,7 @@ def reset_profiler():
     core.reset_profiler()
 
 
-def start_profiler(state):
+def start_profiler(state, tracer_option='Default'):
     """
     Enable the profiler. Uers can use `fluid.profiler.start_profiler` and
     `fluid.profiler.stop_profiler` to profile, which is equal to the usage 
@@ -137,9 +137,15 @@ def start_profiler(state):
             or 'All'. 'CPU' means only profiling CPU; 'GPU' means profiling
             both CPU and GPU; 'All' means profiling both CPU and GPU, and 
             generates timeline as well.
+        tracer_option (str) : tracer_option can be one of ['Default', 'OpDetail', 'AllOpDetail'], it
+            can control the profile level and print the different level profile result. Default option print 
+            the different Op type profiling result and the OpDetail option print the detail profiling 
+            result of different op types such as compute and data transform, AllOpDetail option 
+            print the detail profiling result of different op name same as OpDetail.
 
     Raises:
         ValueError: If `state` is not in ['CPU', 'GPU', 'All'].
+        ValueError: If `tracer_option` is not in ['Default', 'OpDetail', 'AllOpDetail']
 
     Examples:
 
@@ -154,6 +160,13 @@ def start_profiler(state):
                     profiler.reset_profiler()
                 # except each iteration
             profiler.stop_profiler('total', '/tmp/profile')
+            
+            profiler.start_profiler('GPU', "OpDetail")
+            for iter in range(10):
+                if iter == 2:
+                    profiler.reset_profiler()
+                # except each iteration
+            profiler.stop_profiler('total', '/tmp/profile')
     """
     if core.is_profiler_enabled():
         return
@@ -165,6 +178,18 @@ def start_profiler(state):
         prof_state = core.ProfilerState.kCPU
     else:
         prof_state = core.ProfilerState.kAll
+
+    if tracer_option not in ['Default', 'OpDetail', 'AllOpDetail']:
+        raise ValueError(
+            "tracer option must be 'Default', 'OpDetail', 'AllOpDetail'.")
+    if tracer_option == "Default":
+        prof_tracer_option = core.TracerOption.kDefault
+    elif tracer_option == "OpDetail":
+        prof_tracer_option = core.TracerOption.kOpDetail
+    else:
+        prof_tracer_option = core.TracerOption.kAllOpDetail
+
+    core.set_tracer_option(prof_tracer_option)
     core.enable_profiler(prof_state)
 
 
@@ -184,8 +209,8 @@ def stop_profiler(sorted_key=None, profile_path='/tmp/profile'):
             The `max` means sorting by the maximum execution time.
             The `min` means sorting by the minimum execution time.
             The `ave` means sorting by the average execution time.
-        profile_path (str, optional) : If state == 'All', it will generate timeline,
             and write it into `profile_path`. The default profile_path is `/tmp/profile`. 
+        profile_path (str, optional) : If state == 'All', it will generate timeline,
 
     Raises:
         ValueError: If `sorted_key` is not in
@@ -225,7 +250,10 @@ def stop_profiler(sorted_key=None, profile_path='/tmp/profile'):
 
 
 @signature_safe_contextmanager
-def profiler(state, sorted_key=None, profile_path='/tmp/profile'):
+def profiler(state,
+             sorted_key=None,
+             profile_path='/tmp/profile',
+             tracer_option='Default'):
     """
     The profiler interface. Different from `fluid.profiler.cuda_profiler`, 
     this profiler can be used to profile both CPU and GPU program.
@@ -246,6 +274,11 @@ def profiler(state, sorted_key=None, profile_path='/tmp/profile'):
             The `ave` means sorting by the average execution time.
         profile_path (str, optional) : If state == 'All', it will generate timeline,
             and write it into `profile_path`. The default profile_path is `/tmp/profile`. 
+        tracer_option (str) : tracer_option can be one of ['Default', 'OpDetail', 'AllOpDetail'], it
+            can control the profile level and print the different level profile result. Default option print 
+            the different Op type profiling result and the OpDetail option print the detail profiling 
+            result of different op types such as compute and data transform, AllOpDetail option 
+            print the detail profiling result of different op name same as OpDetail.
 
     Raises:
         ValueError: If `state` is not in ['CPU', 'GPU', 'All']. If `sorted_key` is
@@ -268,7 +301,7 @@ def profiler(state, sorted_key=None, profile_path='/tmp/profile'):
             exe = fluid.Executor(place)
             exe.run(fluid.default_startup_program())
 
-            with profiler.profiler('CPU', 'total', '/tmp/profile') as prof:
+            with profiler.profiler('CPU', 'total', '/tmp/profile', 'Default') as prof:
                 for i in range(epoc):
                     input = np.random.random(dshape).astype('float32')
                     exe.run(fluid.default_main_program(), feed={'data': input})
@@ -311,6 +344,6 @@ def profiler(state, sorted_key=None, profile_path='/tmp/profile'):
             thread0::conv2d             8           7.93456     0.291385    5.63342     0.99182     0.795243
             thread0::elementwise_add    8           1.96555     0.191884    0.518004    0.245693    0.196998
     """
-    start_profiler(state)
+    start_profiler(state, tracer_option)
     yield
     stop_profiler(sorted_key, profile_path)
diff --git a/python/paddle/fluid/tests/unittests/test_profiler.py b/python/paddle/fluid/tests/unittests/test_profiler.py
index 367b60831c5b1d0397b7729acf078513bb074299..2290f38e2f1ebcb064ecc9d4b0d86b7988949af3 100644
--- a/python/paddle/fluid/tests/unittests/test_profiler.py
+++ b/python/paddle/fluid/tests/unittests/test_profiler.py
@@ -22,6 +22,7 @@ import paddle.fluid as fluid
 import paddle.fluid.profiler as profiler
 import paddle.fluid.layers as layers
 import paddle.fluid.core as core
+from paddle.fluid import compiler, Program, program_guard
 import paddle.fluid.proto.profiler.profiler_pb2 as profiler_pb2
 
 
@@ -30,7 +31,11 @@ class TestProfiler(unittest.TestCase):
     def setUpClass(cls):
         os.environ['CPU_NUM'] = str(4)
 
-    def net_profiler(self, state, use_parallel_executor=False):
+    def net_profiler(self,
+                     state,
+                     option,
+                     iter_range=None,
+                     use_parallel_executor=False):
         profile_path = os.path.join(tempfile.gettempdir(), "profile")
         open(profile_path, "w").write("")
         startup_program = fluid.Program()
@@ -75,7 +80,7 @@ class TestProfiler(unittest.TestCase):
                 main_program=main_program)
 
         pass_acc_calculator = fluid.average.WeightedAverage()
-        with profiler.profiler(state, 'total', profile_path) as prof:
+        with profiler.profiler(state, 'total', profile_path, option) as prof:
             for iter in range(10):
                 if iter == 2:
                     profiler.reset_profiler()
@@ -94,36 +99,37 @@ class TestProfiler(unittest.TestCase):
                 pass_acc_calculator.add(value=acc, weight=b_size)
                 pass_acc = pass_acc_calculator.eval()
         data = open(profile_path, 'rb').read()
-        self.assertGreater(len(data), 0)
-        profile_pb = profiler_pb2.Profile()
-        profile_pb.ParseFromString(data)
-        self.assertGreater(len(profile_pb.events), 0)
-        for event in profile_pb.events:
-            if event.type == profiler_pb2.Event.GPUKernel:
-                if not event.detail_info and not event.name.startswith("MEM"):
-                    raise Exception(
-                        "Kernel %s missing event. Has this kernel been recorded by RecordEvent?"
-                        % event.name)
-            elif event.type == profiler_pb2.Event.CPU and (
-                    event.name.startswith("Driver API") or
-                    event.name.startswith("Runtime API")):
-                print("Warning: unregister", event.name)
+        if (len(data) > 0):
+            profile_pb = profiler_pb2.Profile()
+            profile_pb.ParseFromString(data)
+            self.assertGreater(len(profile_pb.events), 0)
+            for event in profile_pb.events:
+                if event.type == profiler_pb2.Event.GPUKernel:
+                    if not event.detail_info and not event.name.startswith(
+                            "MEM"):
+                        raise Exception(
+                            "Kernel %s missing event. Has this kernel been recorded by RecordEvent?"
+                            % event.name)
+                elif event.type == profiler_pb2.Event.CPU and (
+                        event.name.startswith("Driver API") or
+                        event.name.startswith("Runtime API")):
+                    print("Warning: unregister", event.name)
 
     def test_cpu_profiler(self):
-        self.net_profiler('CPU')
-        self.net_profiler('CPU', use_parallel_executor=True)
+        self.net_profiler('CPU', "Default")
+        self.net_profiler('CPU', "Default", use_parallel_executor=True)
 
     @unittest.skipIf(not core.is_compiled_with_cuda(),
                      "profiler is enabled only with GPU")
     def test_cuda_profiler(self):
-        self.net_profiler('GPU')
-        self.net_profiler('GPU', use_parallel_executor=True)
+        self.net_profiler('GPU', "OpDetail")
+        self.net_profiler('GPU', "OpDetail", use_parallel_executor=True)
 
     @unittest.skipIf(not core.is_compiled_with_cuda(),
                      "profiler is enabled only with GPU")
     def test_all_profiler(self):
-        self.net_profiler('All')
-        self.net_profiler('All', use_parallel_executor=True)
+        self.net_profiler('All', "AllOpDetail")
+        self.net_profiler('All', "AllOpDetail", use_parallel_executor=True)
 
 
 if __name__ == '__main__':