diff --git a/python/paddle/fluid/dataloader/dataloader_iter.py b/python/paddle/fluid/dataloader/dataloader_iter.py
index bbf2a4377c767f5b0e684bcb7917213e993b85db..430578db510226cc27b0f6ffdacad11fa1268a3f 100644
--- a/python/paddle/fluid/dataloader/dataloader_iter.py
+++ b/python/paddle/fluid/dataloader/dataloader_iter.py
@@ -31,6 +31,7 @@ import queue
 
 import paddle
 import paddle.profiler as profiler
+from paddle.profiler.utils import in_profiler_mode
 from .. import core, layers
 from ..framework import _non_static_mode, in_dygraph_mode, _in_legacy_dygraph
 from ..multiprocess_utils import _set_SIGCHLD_handler, MP_STATUS_CHECK_INTERVAL, CleanupFuncRegistrar
@@ -252,10 +253,11 @@ class _DataLoaderIterSingleProcess(_DataLoaderIterBase):
         self._exit_thread_expectedly()
 
     def __next__(self):
-        trace_event = profiler.RecordEvent(
-            name="_DataLoaderIterSingleProcess",
-            event_type=profiler.TracerEventType.Dataloader)
-        trace_event.begin()
+        if in_profiler_mode():
+            trace_event = profiler.RecordEvent(
+                name="_DataLoaderIterSingleProcess",
+                event_type=profiler.TracerEventType.Dataloader)
+            trace_event.begin()
         try:
             benchmark().check_if_need_record(self)
             benchmark().before_reader()
@@ -294,7 +296,8 @@ class _DataLoaderIterSingleProcess(_DataLoaderIterBase):
             self._try_shutdown_all()
             six.reraise(*sys.exc_info())
         finally:
-            trace_event.end()
+            if in_profiler_mode():
+                trace_event.end()
 
     def _shutdown_thread(self):
         if self._thread:
@@ -708,10 +711,11 @@ class _DataLoaderIterMultiProcess(_DataLoaderIterBase):
         self._try_shutdown_all(1)
 
     def __next__(self):
-        trace_event = profiler.RecordEvent(
-            name="_DataLoaderIterMultiProcess",
-            event_type=profiler.TracerEventType.Dataloader)
-        trace_event.begin()
+        if in_profiler_mode():
+            trace_event = profiler.RecordEvent(
+                name="_DataLoaderIterMultiProcess",
+                event_type=profiler.TracerEventType.Dataloader)
+            trace_event.begin()
         try:
             benchmark().check_if_need_record(self)
             benchmark().before_reader()
@@ -765,7 +769,8 @@ class _DataLoaderIterMultiProcess(_DataLoaderIterBase):
                 self._try_shutdown_all()
             six.reraise(*sys.exc_info())
         finally:
-            trace_event.end()
+            if in_profiler_mode():
+                trace_event.end()
 
     # python2 compatibility
     def next(self):
diff --git a/python/paddle/fluid/dygraph/layers.py b/python/paddle/fluid/dygraph/layers.py
index 41c1a0aa5808e8007cd5d234cacc3f109c3e327d..088fed03c35957ea4a2040354537c8ec0ad86717 100644
--- a/python/paddle/fluid/dygraph/layers.py
+++ b/python/paddle/fluid/dygraph/layers.py
@@ -26,6 +26,7 @@ import inspect
 
 import paddle
 import paddle.profiler as profiler
+from paddle.profiler.utils import in_profiler_mode
 
 from . import parallel_helper
 from .. import unique_name
@@ -906,8 +907,11 @@ class Layer(object):
 
             self._built = True
 
-        with profiler.RecordEvent(self.full_name(),
-                                  profiler.TracerEventType.Forward):
+        if in_profiler_mode():
+            with profiler.RecordEvent(self.full_name(),
+                                      profiler.TracerEventType.Forward):
+                outputs = self.forward(*inputs, **kwargs)
+        else:
             outputs = self.forward(*inputs, **kwargs)
 
         for forward_post_hook in self._forward_post_hooks.values():
@@ -919,7 +923,7 @@ class Layer(object):
 
     def __call__(self, *inputs, **kwargs):
         if (not in_declarative_mode()) and (not self._forward_pre_hooks) \
-            and (not self._forward_post_hooks) and (not self._built) and in_dygraph_mode():
+            and (not self._forward_post_hooks) and (not self._built) and in_dygraph_mode() and (not in_profiler_mode()):
             self._build_once(*inputs, **kwargs)
             return self.forward(*inputs, **kwargs)
         else:
diff --git a/python/paddle/fluid/dygraph/varbase_patch_methods.py b/python/paddle/fluid/dygraph/varbase_patch_methods.py
index db6af87635ccb12da3bd30118cce72d6dc066cd8..a93facbc34a5ba197eb1b0291cd8212d2dcafc9f 100644
--- a/python/paddle/fluid/dygraph/varbase_patch_methods.py
+++ b/python/paddle/fluid/dygraph/varbase_patch_methods.py
@@ -30,6 +30,7 @@ from .parallel import scale_loss
 from paddle.fluid.data_feeder import convert_dtype, _PADDLE_DTYPE_2_NUMPY_DTYPE
 import paddle.utils.deprecated as deprecated
 import paddle.profiler as profiler
+from paddle.profiler.utils import in_profiler_mode
 from paddle import _C_ops
 
 _grad_scalar = None
@@ -247,9 +248,10 @@ def monkey_patch_varbase():
 
         """
         if framework._non_static_mode():
-            record_event = profiler.RecordEvent(
-                "Gradient Backward", profiler.TracerEventType.Backward)
-            record_event.begin()
+            if in_profiler_mode():
+                record_event = profiler.RecordEvent(
+                    "Gradient Backward", profiler.TracerEventType.Backward)
+                record_event.begin()
             if grad_tensor is not None:
                 if framework._in_eager_mode_:
                     assert isinstance(
@@ -289,7 +291,8 @@ def monkey_patch_varbase():
                     core.dygraph_run_backward([self], [grad_tensor],
                                               retain_graph,
                                               framework._dygraph_tracer())
-            record_event.end()
+            if in_profiler_mode():
+                record_event.end()
         else:
             raise ValueError(
                 "Variable.backward() is only available in DyGraph mode")
diff --git a/python/paddle/fluid/tests/unittests/test_newprofiler.py b/python/paddle/fluid/tests/unittests/test_newprofiler.py
index ae804f82b90f70f08833cb3306f7b02531c051a9..53ade0dfb79c120e2e8d02f3c26fbf6b278c9f9f 100755
--- a/python/paddle/fluid/tests/unittests/test_newprofiler.py
+++ b/python/paddle/fluid/tests/unittests/test_newprofiler.py
@@ -134,6 +134,42 @@ class TestProfiler(unittest.TestCase):
         prof.export(path='./test_profiler_pb.pb', format='pb')
         prof.summary()
         result = profiler.utils.load_profiler_result('./test_profiler_pb.pb')
+        prof = None
+        dataset = RandomDataset(10 * 4)
+        simple_net = SimpleNet()
+        opt = paddle.optimizer.SGD(learning_rate=1e-3,
+                                   parameters=simple_net.parameters())
+        loader = DataLoader(
+            dataset, batch_size=4, shuffle=True, drop_last=True, num_workers=2)
+        prof = profiler.Profiler(on_trace_ready=lambda prof: None)
+        prof.start()
+        for i, (image, label) in enumerate(loader()):
+            out = simple_net(image)
+            loss = F.cross_entropy(out, label)
+            avg_loss = paddle.mean(loss)
+            avg_loss.backward()
+            opt.minimize(avg_loss)
+            simple_net.clear_gradients()
+            prof.step()
+        prof.stop()
+        prof.summary()
+        prof = None
+        dataset = RandomDataset(10 * 4)
+        simple_net = SimpleNet()
+        loader = DataLoader(dataset, batch_size=4, shuffle=True, drop_last=True)
+        opt = paddle.optimizer.Adam(
+            learning_rate=1e-3, parameters=simple_net.parameters())
+        prof = profiler.Profiler(on_trace_ready=lambda prof: None)
+        prof.start()
+        for i, (image, label) in enumerate(loader()):
+            out = simple_net(image)
+            loss = F.cross_entropy(out, label)
+            avg_loss = paddle.mean(loss)
+            avg_loss.backward()
+            opt.step()
+            simple_net.clear_gradients()
+            prof.step()
+        prof.stop()
 
 
 class TestNvprof(unittest.TestCase):
diff --git a/python/paddle/profiler/utils.py b/python/paddle/profiler/utils.py
index 6ae3fe4e60b927803e8b4df5affaac6f6d5f3801..fba1aeabf28bd29307be389c1e56e09b32700997 100644
--- a/python/paddle/profiler/utils.py
+++ b/python/paddle/profiler/utils.py
@@ -21,6 +21,7 @@ from paddle.fluid import core
 from paddle.fluid.core import (_RecordEvent, TracerEventType)
 
 _is_profiler_used = False
+_has_optimizer_wrapped = False
 
 _AllowedEventTypeList = [
     TracerEventType.Dataloader, TracerEventType.ProfileStep,
@@ -154,20 +155,31 @@ def load_profiler_result(filename: str):
     return core.load_profiler_result(filename)
 
 
+def in_profiler_mode():
+    return _is_profiler_used == True
+
+
 def wrap_optimizers():
     def optimizer_warpper(func):
         @functools.wraps(func)
         def warpper(*args, **kwargs):
-            with RecordEvent(
-                    'Optimization Step',
-                    event_type=TracerEventType.Optimization):
+            if in_profiler_mode():
+                with RecordEvent(
+                        'Optimization Step',
+                        event_type=TracerEventType.Optimization):
+                    return func(*args, **kwargs)
+            else:
                 return func(*args, **kwargs)
 
         return warpper
 
+    global _has_optimizer_wrapped
+    if _has_optimizer_wrapped == True:
+        return
     import paddle.optimizer as optimizer
     for classname in optimizer.__all__:
         if classname != 'Optimizer':
             classobject = getattr(optimizer, classname)
             if getattr(classobject, 'step', None) != None:
                 classobject.step = optimizer_warpper(classobject.step)
+    _has_optimizer_wrapped = True