From e468e93c0f885209efdeddaa037bd8d409a866b5 Mon Sep 17 00:00:00 2001
From: Jiabin Yang <360788950@qq.com>
Date: Mon, 19 Sep 2022 13:13:53 +0800
Subject: [PATCH] [Eager] Optimize log (#45783) (#46133)

* make eager log readable

* fix compile error

* recover test

* invoke ci again
---
 .../eager/accumulation/accumulation_node.cc   |  18 +-
 .../eager/accumulation/accumulation_node.h    |   6 +-
 .../manual/eager_manual/dygraph_forward_api.h |   4 +-
 .../eager_manual/forwards/add_n_fwd_func.cc   |   6 +-
 .../forwards/conv2d_fwd_function.cc           |  48 ++---
 .../manual/eager_manual/nodes/add_n_node.cc   |   3 +-
 .../forwards/fused_attention_fwd_func.cc      |   1 -
 .../generator/codegen_utils.py                |  19 +-
 .../generator/eager_gen.py                    | 125 +++++++++---
 paddle/fluid/eager/backward.cc                |  38 ++--
 paddle/fluid/eager/eager_amp_auto_cast.h      |   4 +-
 paddle/fluid/eager/eager_layout_transformer.h |   2 +-
 paddle/fluid/eager/grad_node_info.cc          |  20 +-
 paddle/fluid/eager/grad_node_info.h           |  10 +-
 paddle/fluid/eager/grad_tensor_holder.cc      |   4 +-
 paddle/fluid/eager/tensor_wrapper.h           |  10 +-
 .../performance_tests/benchmark_utils.cc      |   2 +-
 .../eager/to_static/run_program_op_func.h     |   2 +-
 paddle/fluid/eager/utils.cc                   |   4 +-
 paddle/fluid/eager/utils.h                    | 179 ++++++++++++++++++
 paddle/fluid/pybind/eager_custom_python_api.h |   6 +-
 .../pybind/eager_legacy_custom_python_api.h   |   2 +-
 paddle/fluid/pybind/eager_method.cc           |  18 +-
 23 files changed, 403 insertions(+), 128 deletions(-)

diff --git a/paddle/fluid/eager/accumulation/accumulation_node.cc b/paddle/fluid/eager/accumulation/accumulation_node.cc
index 0017dba7974..12bbfbbb25d 100644
--- a/paddle/fluid/eager/accumulation/accumulation_node.cc
+++ b/paddle/fluid/eager/accumulation/accumulation_node.cc
@@ -16,6 +16,7 @@
 
 #include "glog/logging.h"
 #include "paddle/fluid/eager/eager_tensor.h"
+#include "paddle/fluid/eager/utils.h"
 #include "paddle/fluid/imperative/gradient_accumulator.h"
 #include "paddle/fluid/platform/device_context.h"
 #include "paddle/fluid/platform/enforce.h"
@@ -89,7 +90,7 @@ GradNodeAccumulation::operator()(
                          kSlotSmallVectorSize>& grads,  // NOLINT
     bool create_graph,
     bool is_new_grad) {
-  VLOG(3) << "Running Eager Backward Node: GradNodeAccumulation";
+  VLOG(3) << "Running AD API Grad: GradNodeAccumulation";
   PADDLE_ENFORCE(grads.size() == 1,
                  paddle::platform::errors::Fatal(
                      "GradNodeAccumulation should take exactly 1 grad tensor"
@@ -122,7 +123,22 @@ GradNodeAccumulation::operator()(
   if (ReduceHooksRegistered()) {
     ApplyReduceHooks();
   }
+  VLOG(3) << "Finish AD API Grad: GradNodeAccumulation";
+  if (VLOG_IS_ON(4)) {
+    const char* INPUT_PRINT_TEMPLATE = "{ Input: [%s], Output: [%s] } ";
 
+    std::string input_str = "";
+    std::string output_str = "";
+    const char* TENSOR_OUT_GRAD_TEMPLATE = "(grads[0][0], [%s]), ";
+    std::string input_out_grad_str = paddle::string::Sprintf(
+        TENSOR_OUT_GRAD_TEMPLATE, egr::EagerUtils::TensorStr(grads[0][0]));
+    const char* TENSOR_X_GRAD_TEMPLATE = "(grad_out, [%s]), ";
+    std::string output_x_grad_str = paddle::string::Sprintf(
+        TENSOR_X_GRAD_TEMPLATE, egr::EagerUtils::TensorStr(grad_out));
+    output_str += output_x_grad_str;
+    VLOG(4) << paddle::string::Sprintf(
+        INPUT_PRINT_TEMPLATE, input_str, output_str);
+  }
   return {{grad_out}};
 }
 
diff --git a/paddle/fluid/eager/accumulation/accumulation_node.h b/paddle/fluid/eager/accumulation/accumulation_node.h
index 8dbc2872ca2..f8e2c4327e1 100644
--- a/paddle/fluid/eager/accumulation/accumulation_node.h
+++ b/paddle/fluid/eager/accumulation/accumulation_node.h
@@ -24,7 +24,7 @@ class GradNodeAccumulation : public GradNodeBase {
  public:
   // Constructor: configure fwd input tensors to grad node
   explicit GradNodeAccumulation(AutogradMeta* meta) : GradNodeBase(1, 1) {
-    VLOG(6) << "Construct GradNodeAccumulation";
+    VLOG(5) << "Construct GradNodeAccumulation";
     if (meta) {
       weak_grad_ = meta->WeakGrad();
     }
@@ -33,7 +33,7 @@ class GradNodeAccumulation : public GradNodeBase {
   }
 
   ~GradNodeAccumulation() override {
-    VLOG(6) << "Destruct GradNodeAccumulation";
+    VLOG(5) << "Destruct GradNodeAccumulation";
   }
 
   // Functor: perform backward computations
@@ -44,7 +44,7 @@ class GradNodeAccumulation : public GradNodeBase {
              bool create_graph = false,
              bool is_new_grad = false) override;
 
-  void ClearTensorWrappers() override { VLOG(6) << "Do nothing here now"; }
+  void ClearTensorWrappers() override { VLOG(5) << "Do nothing here now"; }
 
   std::string name() { return "GradNodeAccumulation"; }
 
diff --git a/paddle/fluid/eager/api/manual/eager_manual/dygraph_forward_api.h b/paddle/fluid/eager/api/manual/eager_manual/dygraph_forward_api.h
index 49d401b9230..bc970f4e2d8 100644
--- a/paddle/fluid/eager/api/manual/eager_manual/dygraph_forward_api.h
+++ b/paddle/fluid/eager/api/manual/eager_manual/dygraph_forward_api.h
@@ -16,10 +16,10 @@
 
 #include "paddle/phi/api/include/tensor.h"
 
-paddle::experimental::Tensor add_n_dygraph_function(
+paddle::experimental::Tensor add_n_ad_func(
     const std::vector<paddle::experimental::Tensor>& x);
 
-paddle::experimental::Tensor conv2d_dygraph_function(
+paddle::experimental::Tensor conv2d_ad_func(
     const paddle::experimental::Tensor& input,
     const paddle::experimental::Tensor& filter,
     std::vector<int> strides,
diff --git a/paddle/fluid/eager/api/manual/eager_manual/forwards/add_n_fwd_func.cc b/paddle/fluid/eager/api/manual/eager_manual/forwards/add_n_fwd_func.cc
index 3081eaf3584..fc423402113 100644
--- a/paddle/fluid/eager/api/manual/eager_manual/forwards/add_n_fwd_func.cc
+++ b/paddle/fluid/eager/api/manual/eager_manual/forwards/add_n_fwd_func.cc
@@ -23,7 +23,7 @@
 #pragma GCC diagnostic ignored "-Wunused-variable"
 DECLARE_bool(check_nan_inf);
 
-paddle::experimental::Tensor add_n_dygraph_function(
+paddle::experimental::Tensor add_n_ad_func(
     const std::vector<paddle::experimental::Tensor>& x) {
   // Dygraph Record Event
   paddle::platform::RecordEvent dygraph_entrance_record_event(
@@ -46,7 +46,7 @@ paddle::experimental::Tensor add_n_dygraph_function(
       paddle::imperative::AutoCastGuard guard(
           egr::Controller::Instance().GetCurrentTracer(),
           paddle::imperative::AmpLevel::O0);
-      return add_n_dygraph_function(NEW_x);
+      return add_n_ad_func(NEW_x);
     }
   }
 
@@ -56,7 +56,7 @@ paddle::experimental::Tensor add_n_dygraph_function(
   std::vector<egr::AutogradMeta*>* x_autograd_meta = &x_autograd_meta_vec;
   // Forward API Call
   VLOG(3) << "Final State Running: "
-          << "add_n_dygraph_function";
+          << "add_n_ad_func";
   auto api_result = paddle::experimental::add_n(x);
   // Check NaN and Inf if needed
   if (FLAGS_check_nan_inf) {
diff --git a/paddle/fluid/eager/api/manual/eager_manual/forwards/conv2d_fwd_function.cc b/paddle/fluid/eager/api/manual/eager_manual/forwards/conv2d_fwd_function.cc
index 3e2e6729783..5e221d3f07f 100644
--- a/paddle/fluid/eager/api/manual/eager_manual/forwards/conv2d_fwd_function.cc
+++ b/paddle/fluid/eager/api/manual/eager_manual/forwards/conv2d_fwd_function.cc
@@ -24,7 +24,7 @@
 #pragma GCC diagnostic ignored "-Wunused-variable"
 DECLARE_bool(check_nan_inf);
 
-paddle::experimental::Tensor conv2d_dygraph_function(
+paddle::experimental::Tensor conv2d_ad_func(
     const paddle::experimental::Tensor& input,
     const paddle::experimental::Tensor& filter,
     std::vector<int> strides,
@@ -60,17 +60,17 @@ paddle::experimental::Tensor conv2d_dygraph_function(
       paddle::imperative::AutoCastGuard guard(
           egr::Controller::Instance().GetCurrentTracer(),
           paddle::imperative::AmpLevel::O0);
-      return conv2d_dygraph_function(NEW_input,
-                                     NEW_filter,
-                                     strides,
-                                     paddings,
-                                     paddding_algorithm,
-                                     groups,
-                                     dilations,
-                                     data_format,
-                                     use_addto,
-                                     workspace_size_MB,
-                                     exhaustive_search);
+      return conv2d_ad_func(NEW_input,
+                            NEW_filter,
+                            strides,
+                            paddings,
+                            paddding_algorithm,
+                            groups,
+                            dilations,
+                            data_format,
+                            use_addto,
+                            workspace_size_MB,
+                            exhaustive_search);
     }
   }
 
@@ -89,17 +89,17 @@ paddle::experimental::Tensor conv2d_dygraph_function(
     bool is_enable_tune =
         paddle::imperative::LayoutAutoTune::Instance().UseLayoutAutoTune();
     paddle::imperative::LayoutAutoTune::Instance().DisableLayoutAutoTune();
-    auto out = conv2d_dygraph_function(NEW_input,
-                                       filter,
-                                       strides,
-                                       paddings,
-                                       paddding_algorithm,
-                                       groups,
-                                       dilations,
-                                       data_format,
-                                       use_addto,
-                                       workspace_size_MB,
-                                       exhaustive_search);
+    auto out = conv2d_ad_func(NEW_input,
+                              filter,
+                              strides,
+                              paddings,
+                              paddding_algorithm,
+                              groups,
+                              dilations,
+                              data_format,
+                              use_addto,
+                              workspace_size_MB,
+                              exhaustive_search);
     transformer->SetOutTensorLayout(&out);
     if (is_enable_tune) {
       paddle::imperative::LayoutAutoTune::Instance().EnableLayoutAutoTune();
@@ -115,7 +115,7 @@ paddle::experimental::Tensor conv2d_dygraph_function(
       egr::EagerUtils::nullable_autograd_meta(filter);
   // Forward API Call
   VLOG(3) << "Final State Running: "
-          << "conv2d_dygraph_function";
+          << "conv2d_ad_func";
   auto api_result = paddle::experimental::conv2d(input,
                                                  filter,
                                                  strides,
diff --git a/paddle/fluid/eager/api/manual/eager_manual/nodes/add_n_node.cc b/paddle/fluid/eager/api/manual/eager_manual/nodes/add_n_node.cc
index b0dc4f59ffd..6f7a34094b1 100644
--- a/paddle/fluid/eager/api/manual/eager_manual/nodes/add_n_node.cc
+++ b/paddle/fluid/eager/api/manual/eager_manual/nodes/add_n_node.cc
@@ -64,8 +64,7 @@ AddNGradNodeFinal::operator()(
 
   // dygraph function
   for (size_t i = 0; i < returns[0].size(); i++) {
-    returns[0][i] =
-        ::scale_dygraph_function(out_grad, phi::Scalar(1.0), 0.0, true);
+    returns[0][i] = ::scale_ad_func(out_grad, phi::Scalar(1.0), 0.0, true);
   }
 
   // Check NaN and Inf id needed
diff --git a/paddle/fluid/eager/api/manual/fluid_manual/forwards/fused_attention_fwd_func.cc b/paddle/fluid/eager/api/manual/fluid_manual/forwards/fused_attention_fwd_func.cc
index ea1bc2271c1..d733dbf8b7c 100644
--- a/paddle/fluid/eager/api/manual/fluid_manual/forwards/fused_attention_fwd_func.cc
+++ b/paddle/fluid/eager/api/manual/fluid_manual/forwards/fused_attention_fwd_func.cc
@@ -531,7 +531,6 @@ fused_attention_dygraph_function(
       egr::EagerUtils::SetHistory(p_autograd_Y, grad_node);
       grad_node->SetGradInMeta(Y, 19);
       egr::EagerUtils::CheckAndRetainGrad(Y);
-
       auto QKVOut_accumulation_node =
           std::make_shared<egr::GradNodeAccumulation>(p_autograd_QKVOut);
       egr::EagerUtils::SetOutRankWithSlot(p_autograd_QKVOut, 0);
diff --git a/paddle/fluid/eager/auto_code_generator/generator/codegen_utils.py b/paddle/fluid/eager/auto_code_generator/generator/codegen_utils.py
index 9022e800905..fcc66893a71 100644
--- a/paddle/fluid/eager/auto_code_generator/generator/codegen_utils.py
+++ b/paddle/fluid/eager/auto_code_generator/generator/codegen_utils.py
@@ -161,11 +161,24 @@ def GetGradNodeName(string):
     string = str2Hump(string)
     if string.rfind("Grad") == (len(string) - 4):
         string = string[:-4]
-    return f"{string}GradNodeFinal"
+    return f"{string}GradNode"
 
 
 def GetDygraphForwardFunctionName(string):
-    return f"{string}_dygraph_function"
+    return f"{string}_ad_func"
+
+
+def GetDygraphLogName(string):
+
+    def str2Hump(text):
+        arr = filter(None, text.split('_'))
+        res = ''
+        for i in arr:
+            res = res + i[0].upper() + i[1:]
+        return res
+
+    string = str2Hump(string)
+    return string
 
 
 def GetIntermediateAPIFunctionName(string):
@@ -198,7 +211,7 @@ def GetInplacedFunctionName(function_name):
 
 
 def GetForwardFunctionName(string):
-    return f"{string}_dygraph_function"
+    return f"{string}_ad_func"
 
 
 def GetIndent(num):
diff --git a/paddle/fluid/eager/auto_code_generator/generator/eager_gen.py b/paddle/fluid/eager/auto_code_generator/generator/eager_gen.py
index 100dfd57405..d6375b5aff3 100644
--- a/paddle/fluid/eager/auto_code_generator/generator/eager_gen.py
+++ b/paddle/fluid/eager/auto_code_generator/generator/eager_gen.py
@@ -23,7 +23,7 @@ from codegen_utils import ReadFwdFile, ReadBwdFile
 from codegen_utils import FindGradName, FindForwardName, GetSavedName, GetGradNodeName
 from codegen_utils import IsPlainTensorType, IsVectorTensorType
 from codegen_utils import GetConstReference, RemoveConstAndReference
-from codegen_utils import GetDygraphForwardFunctionName, GetIntermediateAPIFunctionName
+from codegen_utils import GetDygraphForwardFunctionName, GetIntermediateAPIFunctionName, GetDygraphLogName
 from codegen_utils import GetAutoGradMetaName, GetAutoGradMetaVectorName
 from codegen_utils import RemoveSpecialSymbolsInName, RecoverBaseNameOfInplaceFunction
 from codegen_utils import GetInplacedFunctionName
@@ -150,6 +150,7 @@ class {} : public egr::GradNodeBase {{
 GRAD_FUNCTION_TEMPLATE = \
 """
 paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> {}::operator()(paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize>& grads, bool create_graph, bool is_new_grad) {{
+  VLOG(3) << \"Running AD API GRAD: \" << \"{}\";
   // Fill Zero For GradIn Tensors
 {}
   // Apply Gradient Hooks
@@ -166,7 +167,7 @@ paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallV
   // Inplace Strategy
 {}
   // Call grad_api function
-  VLOG(3) << \"Final State Running: {}\";
+  VLOG(5) << \"Running C++ API: \" << \"{}\";
 {}
   // Check NaN and Inf id needed
 {}
@@ -174,6 +175,9 @@ paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallV
 {}
   // Create Grad Node
 {}
+  VLOG(4) << \"Finish AD API GRAD: {}";
+  // LOG IF DEBUG
+  {}
   // Return
 {}
 }}
@@ -182,6 +186,7 @@ paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallV
 FORWARD_FUNCTION_TEMPLATE = \
 """
 {} {}({}) {{
+  VLOG(3) << \"Running AD API: \" << \"{}\";
   // Dygraph Record Event
 {}
   // AMP Logic
@@ -191,7 +196,7 @@ FORWARD_FUNCTION_TEMPLATE = \
   // Get Input AutoGradMeta
 {}
   // Forward API Call
-  VLOG(3) << \"Final State Running: \" << \"{}\";
+  VLOG(5) << \"Running C++ API: \" << \"{}\";
 {}
   // Check NaN and Inf if needed
 {}
@@ -206,15 +211,29 @@ FORWARD_FUNCTION_TEMPLATE = \
 {}{}
   // Node Creation
 {}
+
+  VLOG(4) << \"Finish AD API: {}";
+  // LOG IF DEBUG
+  {}
   // Returns
   return {};
 }}
 """
 
+LOG_PRINT_TEMPLATE = \
+"""
+  if(VLOG_IS_ON(4)){{
+      const char* INPUT_PRINT_TEMPLATE = \"{{ Input: [%s],  Output: [%s] }} \";
+      {}
+      VLOG(4) << paddle::string::Sprintf(INPUT_PRINT_TEMPLATE, input_str, output_str);
+  }}
+"""
+
 
 FORWARD_ONLY_FUNCTION_TEMPLATE = \
 """
 {} {}({}) {{
+  VLOG(3) << \"Running AD API: \" << \"{}\";
   // Dygraph Record Event
 {}
   // AMP Logic
@@ -222,11 +241,13 @@ FORWARD_ONLY_FUNCTION_TEMPLATE = \
   // Layout autotune
 {}
   // Forward API Call
-  VLOG(3) << \"Final State Running: \" << \"{}\";
+  VLOG(5) << \"Running C++ API: \" << \"{}\";
 {}
   // Get Outputs
 {}
-
+  VLOG(4) << \"Finish AD API: {}";
+  // LOG IF DEBUG
+  {}
   // Returns
   return {};
 }}
@@ -867,7 +888,7 @@ class DygraphFunctionGeneratorBase(FunctionGeneratorBase):
             set_grad_out_meta_list.append(set_grad_out_meta)
         set_grad_out_meta_str = "\n".join(set_grad_out_meta_list)
 
-        # SetOutRank & SetHistory & SetGradInMeta & CheckAndRetainGrad
+        # SetOutRank & SetHistory & SetGradInMeta
         set_out_rank_list = []
         set_history_list = []
         set_grad_in_meta_list = []
@@ -885,7 +906,6 @@ class DygraphFunctionGeneratorBase(FunctionGeneratorBase):
 
             set_grad_in_meta = f"{indent}grad_node->SetGradInMeta({name}, {pos});"
             set_retain_grad = f"{indent}egr::EagerUtils::CheckAndRetainGrad({name});"
-
             set_out_rank_list.append(set_out_rank)
             set_history_list.append(set_history)
             set_grad_in_meta_list.append(set_grad_in_meta)
@@ -1294,7 +1314,8 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase):
             node_creation_str = self.node_creation_str
 
         dygraph_event_str = f"{indent}paddle::platform::RecordEvent dygraph_entrance_record_event(\"{forward_api_name} dygraph\", paddle::platform::TracerEventType::Operator, 1);\n"
-        forward_function_name = GetDygraphForwardFunctionName(forward_api_name)
+        forward_ad_function_name = GetDygraphForwardFunctionName(
+            forward_api_name)
 
         # Forward amp logic
         kernel_trans2_op_name_str = f"auto op_name = phi::TransToFluidOpName(\"{forward_api_name}\");"
@@ -1307,9 +1328,10 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase):
             amp_autocast_list) + "    " + "    ".join(
                 amp_autocast_optional_list)
         amp_inputs_call_args_str = ", ".join(amp_inputs_call_list)
-        amp_call_str = f"return {forward_function_name}({amp_inputs_call_args_str});"
+        amp_call_str = f"return {forward_ad_function_name}({amp_inputs_call_args_str});"
         if is_inplaced or (forward_api_name == "cast"):
-            amp_logic_str = ""
+            amp_logic_str = "\n VLOG(5) << \" No AMP for {} because it is a inplace or cast api. \"; ".format(
+                forward_ad_function_name)
         else:
             amp_logic_str = AMP_LOGIC_TEMPLATE.format(
                 kernel_trans2_op_name_str, amp_tensors_vector_list_str,
@@ -1335,8 +1357,8 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase):
                 layout_autotune_attr) == 0:
             layout_logic_str = ""
         else:
-            # after_call_str = f"return {forward_function_name}({layout_inputs_call_args_str});\n"
-            after_call_str = f"auto api_result = {forward_function_name}({layout_inputs_call_args_str});\n"
+            # after_call_str = f"return {forward_ad_function_name}({layout_inputs_call_args_str});\n"
+            after_call_str = f"auto api_result = {forward_ad_function_name}({layout_inputs_call_args_str});\n"
             layout_logic_str = LAYOUT_LOGIC_TEMPLATE.format(
                 amp_tensors_vector_list_str,
                 "    ".join(layout_tensors_vector_optional_list),
@@ -1345,26 +1367,45 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase):
                 "   ".join(layout_autotune_optional_list), after_call_str,
                 layout_autotune_outs_list, returns_str)
 
+        # For inputs outputs prepare for logging
+        var_str = f"\n{indent}  std::string input_str = \"\";"
+        var_str += f"\n{indent}  std::string output_str = \"\";"
+        for name, (ttype, pos) in forward_inputs_position_map.items():
+            var_str += f"\n{indent}  const char* TENSOR_{name.upper()}_TEMPLATE = \"({name}, [%s]), \";"
+            var_str += f"\n{indent}  std::string input_{name}_str = paddle::string::Sprintf(TENSOR_{name.upper()}_TEMPLATE, egr::EagerUtils::TensorStr({name}));"
+            var_str += f"\n{indent}  input_str += input_{name}_str; "
+        for name, (ttype, pos) in forward_outputs_position_map.items():
+            var_str += f"\n{indent}  const char* TENSOR_{name.upper()}_TEMPLATE = \"({name}, [%s]), \";"
+            var_str += f"\n{indent}  std::string output_{name}_str = paddle::string::Sprintf(TENSOR_{name.upper()}_TEMPLATE, egr::EagerUtils::TensorStr({name}));"
+            var_str += f"\n{indent}  output_str += output_{name}_str; "
+
+        log_str = LOG_PRINT_TEMPLATE.format(var_str)
+
         # Generate forward_definition_str and forward_declaration_str
         if self.is_forward_only:
             if len(amp_tensors_vector_list) == 0:
-                amp_logic_str = ""
+                amp_logic_str = "\n VLOG(7) << \" No AMP for {} because it has no input. \"; ".format(
+                    forward_ad_function_name)
             self.forward_definition_str += FORWARD_ONLY_FUNCTION_TEMPLATE.format(
-                returns_type_str, forward_function_name,
-                inputs_args_definition_str, dygraph_event_str, amp_logic_str,
-                layout_logic_str, forward_function_name, forward_call_str,
-                get_outputs_str, returns_str)
+                returns_type_str,
+                forward_ad_function_name, inputs_args_definition_str,
+                GetDygraphLogName(forward_api_name), dygraph_event_str,
+                amp_logic_str, layout_logic_str, forward_api_name,
+                forward_call_str, get_outputs_str, forward_ad_function_name,
+                log_str, returns_str)
         else:
             self.forward_definition_str += FORWARD_FUNCTION_TEMPLATE.format(
-                returns_type_str, forward_function_name,
-                inputs_args_definition_str, dygraph_event_str, amp_logic_str,
-                layout_logic_str, inputs_autograd_meta_str,
-                forward_function_name, forward_call_str, check_nan_inf_str,
+                returns_type_str,
+                forward_ad_function_name, inputs_args_definition_str,
+                GetDygraphLogName(forward_api_name), dygraph_event_str,
+                amp_logic_str, layout_logic_str, inputs_autograd_meta_str,
+                forward_api_name, forward_call_str, check_nan_inf_str,
                 get_outputs_str, outputs_autograd_meta_str,
                 compute_require_grad_args_str, check_inplace_str,
-                bump_inplace_version_str, node_creation_str, returns_str)
+                bump_inplace_version_str, node_creation_str,
+                forward_ad_function_name, log_str, returns_str)
 
-        self.forward_declaration_str += f"{returns_type_str} {forward_function_name}({inputs_args_declaration_str});\n"
+        self.forward_declaration_str += f"{returns_type_str} {forward_ad_function_name}({inputs_args_declaration_str});\n"
 
     def GenerateInplacedForwardDygraphFunctions(self):
         # Inplaced Version Dygraph Function Generation
@@ -1770,7 +1811,8 @@ class DygraphNodeGenerator(DygraphFunctionGeneratorBase):
             forward_api_name = self.grad_api_contents['invoke'].split(
                 '(')[0].strip()
             autograd_api = self.grad_api_contents['invoke'].replace(
-                forward_api_name, forward_api_name + '_dygraph_function', 1)
+                forward_api_name,
+                GetDygraphForwardFunctionName(forward_api_name), 1)
             grad_function_call_str = f"""
   if (trace_backward) {{            
   {indent}{autograd_api_out} api_output = {autograd_api};
@@ -1839,13 +1881,40 @@ class DygraphNodeGenerator(DygraphFunctionGeneratorBase):
         returns_str += f"{indent}return returns;\n"
 
         grad_node_name = GetGradNodeName(self.backward_api_name)
+        # For inputs outputs prepare for logging
+        var_str = f"\n{indent}  std::string input_str = \"\";"
+        var_str += f"\n{indent}  std::string output_str = \"\";"
+        for name, (ttype, fwd_position,
+                   grad_api_position) in backward_grad_inputs_map.items():
+            new_name = self.TransformToNextGradName(name)
+            var_str += f"\n{indent}  const char* TENSOR_{new_name.upper()}_TEMPLATE = \"({new_name}, [%s]), \";"
+            var_str += f"\n{indent}  std::string input_{new_name}_str = paddle::string::Sprintf(TENSOR_{new_name.upper()}_TEMPLATE, egr::EagerUtils::TensorStr({new_name}));"
+            var_str += f"\n{indent}  input_str += input_{new_name}_str; "
+
+        for name, (backward_input_type, is_fwd_input,
+                   grad_api_position), in backward_forward_inputs_map.items():
+            new_name = self.TransformToNextGradName(name)
+            var_str += f"\n{indent}  const char* TENSOR_{new_name.upper()}_TEMPLATE = \"({new_name}, [%s]), \";"
+            var_str += f"\n{indent}  std::string input_{new_name}_str = paddle::string::Sprintf(TENSOR_{new_name.upper()}_TEMPLATE, egr::EagerUtils::TensorStr({new_name}));"
+            var_str += f"\n{indent}  input_str += input_{new_name}_str; "
+
+        for name, (ttype, fwd_position,
+                   grad_api_position) in backward_grad_outputs_map.items():
+            new_name = self.TransformToNextGradName(name)
+            var_str += f"\n{indent}  const char* TENSOR_{new_name.upper()}_TEMPLATE = \"({new_name}, [%s]), \";"
+            var_str += f"\n{indent}  std::string output_{new_name}_str = paddle::string::Sprintf(TENSOR_{new_name.upper()}_TEMPLATE, egr::EagerUtils::TensorStr({new_name}));"
+            var_str += f"\n{indent}  output_str += output_{new_name}_str; "
+
+        log_str = LOG_PRINT_TEMPLATE.format(var_str)
 
         self.node_definition_str = GRAD_FUNCTION_TEMPLATE.format(
-            grad_node_name, fill_zero_str, get_grad_in_args_str,
-            grad_function_prepare_str, compute_require_next_grad_str,
-            inplace_check_str, inplace_for_grad_outs_str, grad_node_name,
+            grad_node_name, GetDygraphLogName(self.backward_api_name),
+            fill_zero_str, get_grad_in_args_str, grad_function_prepare_str,
+            compute_require_next_grad_str, inplace_check_str,
+            inplace_for_grad_outs_str, self.backward_api_name,
             grad_function_call_str, check_nan_inf_str,
-            outputs_autograd_meta_str, next_grad_node_creation_str, returns_str)
+            outputs_autograd_meta_str, next_grad_node_creation_str,
+            GetDygraphLogName(self.backward_api_name), log_str, returns_str)
 
     def run(self):
         super().run()
diff --git a/paddle/fluid/eager/backward.cc b/paddle/fluid/eager/backward.cc
index b70ec78c759..04541d082c4 100644
--- a/paddle/fluid/eager/backward.cc
+++ b/paddle/fluid/eager/backward.cc
@@ -133,7 +133,7 @@ std::vector<paddle::experimental::Tensor> RunBackward(
 
     AutogradMeta* auto_grad_meta = EagerUtils::nullable_autograd_meta(tensor);
     if (auto_grad_meta == nullptr) {
-      VLOG(3) << "Skip auto grad since there is no grad op for var or loss is "
+      VLOG(5) << "Skip auto grad since there is no grad op for var or loss is "
                  "stop_gradient=True: "
               << tensor.name();
       continue;
@@ -141,14 +141,14 @@ std::vector<paddle::experimental::Tensor> RunBackward(
     // Get grad input info from target tensors
     auto input_info = auto_grad_meta->OutRankInfo();
 
-    VLOG(2) << "Out Rank of Tensor is slot: " << input_info.first
+    VLOG(5) << "Out Rank of Tensor is slot: " << input_info.first
             << ", rank: " << input_info.second;
     // Get target GradNodeBase from target tensors
     auto shared_grad_node = auto_grad_meta->GetMutableGradNode();
 
     if (shared_grad_node == nullptr || shared_grad_node.get() == nullptr ||
         auto_grad_meta->StopGradient()) {
-      VLOG(3) << "Skip auto grad since there is no grad op for var or loss is "
+      VLOG(5) << "Skip auto grad since there is no grad op for var or loss is "
                  "stop_gradient=True: "
               << tensor.name();
       continue;
@@ -169,7 +169,7 @@ std::vector<paddle::experimental::Tensor> RunBackward(
 
     // Prepare GradTensorHolder
     if (!node_input_buffers_dict.count(grad_node)) {
-      VLOG(6) << "Create Value for grad input tensor " << i
+      VLOG(5) << "Create Value for grad input tensor " << i
               << " of grad node: " << grad_node->name();
       node_input_buffers_dict[grad_node] =
           std::make_unique<GradTensorHolder>(grad_node->InputMeta());
@@ -184,13 +184,13 @@ std::vector<paddle::experimental::Tensor> RunBackward(
               "grad_tensors should either have "
               "size = 0 or same size as tensors."));
       // Feed given tensor if it's provided
-      VLOG(6) << "Fill grad input tensor " << i << "with give grad tensor";
+      VLOG(3) << "Fill grad input tensor " << i << "with give grad tensor";
 
       // Deep copy
       node_input_buffers_dict[grad_node]->CopyValueFromTensor(
           input_info.first, input_info.second, grad_tensors[i]);
     } else {
-      VLOG(6) << "Fill grad input tensor " << i << " with 1.0";
+      VLOG(3) << "Fill grad input tensor " << i << " with 1.0";
       // Initialize tensor with 1.0
       // Forward Tensor "tensor" is passed to indicate tensortype, datatype and
       // dims
@@ -210,12 +210,12 @@ std::vector<paddle::experimental::Tensor> RunBackward(
         inputs, no_grad_vars, orig_queue, &queue, node_input_buffers_dict);
   }
 
-  VLOG(6) << "Update In degree Map for backward";
+  VLOG(5) << "Update In degree Map for backward";
   // 3. Compute in_degree for each node
   std::unordered_map<GradNodeBase*, int> node_in_degree_map =
       getInDegreeMap(queue);
 
-  VLOG(3) << "Startup_ops's size is " << queue.size();
+  VLOG(5) << "Startup_ops's size is " << queue.size();
 
   /* --- Topological Visit --- */
   // 1. Pop queue
@@ -224,11 +224,10 @@ std::vector<paddle::experimental::Tensor> RunBackward(
   //    |- node(grads)
   //    |- Prepare for next node
   // 3. Update queue
-  VLOG(3) << "Run Backward";
   while (!queue.empty()) {
     GradNodeBase* node = queue.front();
-    VLOG(3) << "Running GradNode:" << node->name() << " addr:" << node;
-
+    VLOG(3) << "Preparing GradNode:" << node->name() << " addr:" << node;
+    VLOG(4) << EagerUtils::GradNodeStr(*node);
     paddle::platform::RecordEvent node_record_event(
         std::string((*node).name()),
         paddle::platform::TracerEventType::Operator,
@@ -255,7 +254,7 @@ std::vector<paddle::experimental::Tensor> RunBackward(
     // Check input
     EnforceGradNodeHasInput(node);
 
-    VLOG(6) << "Run Backward Kernel with GradTensorHolder.";
+    VLOG(7) << "Run Backward Kernel with GradTensorHolder.";
     // Run Pre Backward Node and get outputs
     paddle::small_vector<std::vector<paddle::experimental::Tensor>,
                          kSlotSmallVectorSize>
@@ -269,7 +268,7 @@ std::vector<paddle::experimental::Tensor> RunBackward(
 
     // retain_grad or not
     if (!retain_graph) {
-      VLOG(6)
+      VLOG(3)
           << "retain_graph is false, need to clear the TensorWrapper of nodes.";
       node->ClearTensorWrappers();
     }
@@ -322,11 +321,11 @@ std::vector<paddle::experimental::Tensor> RunBackward(
 
         if ((!grad_output_tensor.defined() ||
              !grad_output_tensor.initialized())) {
-          VLOG(6) << "We get grad_output_tensor with slot: " << i
+          VLOG(7) << "We get grad_output_tensor with slot: " << i
                   << ", rank: " << j << " as uninitialized or undefined tensor";
         }
 
-        VLOG(6) << "Get Edge and grad_output_tensor with slot: " << i
+        VLOG(7) << "Get Edge and grad_output_tensor with slot: " << i
                 << ", rank: " << j
                 << " 's name is: " << grad_output_tensor.name();
 
@@ -335,12 +334,12 @@ std::vector<paddle::experimental::Tensor> RunBackward(
           const auto& input_meta = next_node->InputMeta();
           auto grad_tensor_holder =
               std::make_unique<GradTensorHolder>(input_meta);
-          VLOG(6) << "Construct GradTensorHolder for grad node: "
+          VLOG(7) << "Construct GradTensorHolder for grad node: "
                   << next_node->name();
           node_input_buffers_dict[next_node] = std::move(grad_tensor_holder);
         }
 
-        VLOG(6) << "Sum grad inputs for edge slot: " << edge_rank.first
+        VLOG(3) << "Sum grad inputs for edge slot: " << edge_rank.first
                 << ", rank: " << edge_rank.second;
 
         node_input_buffers_dict[next_node]->add(edge_rank.first,
@@ -350,7 +349,7 @@ std::vector<paddle::experimental::Tensor> RunBackward(
 
         // Update queue
         node_in_degree_map[next_node]--;
-        VLOG(6) << next_node->name()
+        VLOG(7) << next_node->name()
                 << " ref_cnt is: " << node_in_degree_map[next_node];
 
         PADDLE_ENFORCE(
@@ -382,7 +381,7 @@ std::vector<paddle::experimental::Tensor> RunBackward(
     }
   }
 
-  VLOG(6) << "Run Backward Final hook size: "
+  VLOG(7) << "Run Backward Final hook size: "
           << egr::Controller::Instance().FinalBackwardHooks().size();
   for (auto& hook : egr::Controller::Instance().FinalBackwardHooks()) {
     (*hook)();
@@ -390,6 +389,7 @@ std::vector<paddle::experimental::Tensor> RunBackward(
   egr::Controller::Instance().ClearFinalBackwardHooks();
   if (!is_general_grad) return {};
   return GeneralGrad::Instance().GetResults(inputs, allow_unused, create_graph);
+  VLOG(3) << "Finish Backward";
 }
 
 void Backward(
diff --git a/paddle/fluid/eager/eager_amp_auto_cast.h b/paddle/fluid/eager/eager_amp_auto_cast.h
index 4ebc2860c59..118c8be8611 100644
--- a/paddle/fluid/eager/eager_amp_auto_cast.h
+++ b/paddle/fluid/eager/eager_amp_auto_cast.h
@@ -45,7 +45,7 @@ inline paddle::experimental::Tensor Cast(
     const bool trace_backward = true) {
   if (input.is_sparse_coo_tensor() || input.is_sparse_csr_tensor()) {
     if (trace_backward) {
-      return sparse::cast_dygraph_function(
+      return sparse::cast_ad_func(
           input, paddle::experimental::DataType::UNDEFINED, dst_dtype);
     } else {
       return paddle::experimental::sparse::cast(
@@ -53,7 +53,7 @@ inline paddle::experimental::Tensor Cast(
     }
   } else {
     if (trace_backward) {
-      return cast_dygraph_function(input, dst_dtype);
+      return cast_ad_func(input, dst_dtype);
     } else {
       return paddle::experimental::cast(input, dst_dtype);
     }
diff --git a/paddle/fluid/eager/eager_layout_transformer.h b/paddle/fluid/eager/eager_layout_transformer.h
index 3f2717be6be..d0cb9c48124 100644
--- a/paddle/fluid/eager/eager_layout_transformer.h
+++ b/paddle/fluid/eager/eager_layout_transformer.h
@@ -35,7 +35,7 @@ inline paddle::experimental::Tensor EagerTraceTransposeOp(
   } else {
     axis = {0, 1, 2, 3};
   }
-  auto out_tensor = transpose_dygraph_function(in, axis);
+  auto out_tensor = transpose_ad_func(in, axis);
   VLOG(4) << "AutoTune Transpose from "
           << paddle::framework::DataLayoutToString(in.layout()) << " to "
           << paddle::framework::DataLayoutToString(layout);
diff --git a/paddle/fluid/eager/grad_node_info.cc b/paddle/fluid/eager/grad_node_info.cc
index 0e102d0d1bc..afa8a6f2052 100644
--- a/paddle/fluid/eager/grad_node_info.cc
+++ b/paddle/fluid/eager/grad_node_info.cc
@@ -41,7 +41,7 @@ static void CheckTensor(const paddle::experimental::Tensor& pre,
         "The tensor in before and after hook are not consistent"));
   }
   if (pre.initialized() && post.initialized()) {
-    VLOG(4) << paddle::framework::DataType2String(pre.dtype()) << " "
+    VLOG(7) << paddle::framework::DataType2String(pre.dtype()) << " "
             << paddle::framework::DataType2String(post.dtype());
     PADDLE_ENFORCE_EQ(
         pre.dtype(),
@@ -62,7 +62,7 @@ static void CheckTensor(const paddle::experimental::Tensor& pre,
 }
 
 GradNodeBase::GradNodeBase(size_t bwd_in_slot_num, size_t bwd_out_slot_num) {
-  VLOG(6) << "Construct GradNodeBase";
+  VLOG(7) << "Construct GradNodeBase";
   bwd_in_meta_.resize(bwd_in_slot_num);
   bwd_out_meta_.resize(bwd_out_slot_num);
 }
@@ -84,7 +84,7 @@ GradNodeBase::MutableOutputMeta() {
 
 void GradNodeBase::SetGradInMeta(const paddle::experimental::Tensor& fwd_out,
                                  size_t slot_rank) {
-  VLOG(6) << "Set GradSlotMeta for Grad Inputs";
+  VLOG(7) << "Set GradSlotMeta for Grad Inputs";
   auto* fwd_out_meta = egr::EagerUtils::nullable_autograd_meta(fwd_out);
   PADDLE_ENFORCE_LE(
       slot_rank,
@@ -104,7 +104,7 @@ void GradNodeBase::SetGradInMeta(const paddle::experimental::Tensor& fwd_out,
   }
 
   if (!fwd_out.initialized()) {
-    VLOG(6)
+    VLOG(7)
         << "Skip Configuring GradSlotMeta for uninitialized GradInput Tensor";
     return;
   }
@@ -123,7 +123,7 @@ void GradNodeBase::SetGradInMeta(const paddle::experimental::Tensor& fwd_out,
         static_cast<phi::SparseCsrTensor*>(fwd_out.impl().get());
     dense_tensor = csr_tensor->mutable_non_zero_elements();
   } else {
-    VLOG(6) << "Unable to initialize the DenseTensorMeta of GradSlotMeta with "
+    VLOG(7) << "Unable to initialize the DenseTensorMeta of GradSlotMeta with "
                "non-DenseTensor argument.";
   }
   PADDLE_ENFORCE_NE(
@@ -145,7 +145,7 @@ void GradNodeBase::SetGradInMeta(const paddle::experimental::Tensor& fwd_out,
 void GradNodeBase::SetGradInMeta(
     const std::vector<paddle::experimental::Tensor>& fwd_out,
     size_t slot_rank) {
-  VLOG(6) << "Set GradSlotMeta for Grad Inputs";
+  VLOG(7) << "Set GradSlotMeta for Grad Inputs";
   size_t slot_size = fwd_out.size();
   PADDLE_ENFORCE_LE(
       slot_rank,
@@ -177,7 +177,7 @@ void GradNodeBase::SetGradInMeta(
     }
 
     if (!fwd_out_tensor.initialized()) {
-      VLOG(6)
+      VLOG(7)
           << "Skip Configuring GradSlotMeta for uninitialized GradInput Tensor";
       return;
     }
@@ -202,7 +202,7 @@ void GradNodeBase::SetGradInMeta(
         need_complex_to_real_ = true;
       }
     } else {
-      VLOG(6) << "Unable to initialize the DenseTensorMeta of GradSlotMeta "
+      VLOG(7) << "Unable to initialize the DenseTensorMeta of GradSlotMeta "
                  "with non-DenseTensor argument.";
     }
   }
@@ -260,7 +260,7 @@ void GradNodeBase::SetGradOutMeta(const paddle::experimental::Tensor& fwd_in,
       meta.SetPlace(fwd_in.place());
     }
   } else {
-    VLOG(6) << "Unable to initialize the DenseTensorMeta of GradSlotMeta with "
+    VLOG(7) << "Unable to initialize the DenseTensorMeta of GradSlotMeta with "
                "non-DenseTensor argument.";
   }
 }
@@ -319,7 +319,7 @@ void GradNodeBase::SetGradOutMeta(
         meta.SetPlace(fwd_in_tensor.place());
       }
     } else {
-      VLOG(6)
+      VLOG(7)
           << "Unable to initialize the DenseTensorMeta of GradSlotMeta with "
              "non-DenseTensor argument.";
     }
diff --git a/paddle/fluid/eager/grad_node_info.h b/paddle/fluid/eager/grad_node_info.h
index ebe1f6cccf9..65044640146 100644
--- a/paddle/fluid/eager/grad_node_info.h
+++ b/paddle/fluid/eager/grad_node_info.h
@@ -74,7 +74,7 @@ class Edge {
   }
 
   void SetGradNode(const std::shared_ptr<GradNodeBase>& node) {
-    VLOG(6) << "Reseting Edge's Grad Node";
+    VLOG(7) << "Reseting Edge's Grad Node";
     grad_node_ = node;
   }
 
@@ -167,10 +167,10 @@ class GradSlotMeta {
 
 class GradNodeBase {
  public:
-  GradNodeBase() { VLOG(6) << "Construct GradNodeBase"; }
+  GradNodeBase() { VLOG(7) << "Construct GradNodeBase"; }
   GradNodeBase(size_t bwd_in_slot_num, size_t bwd_out_slot_num);
   // TODO(jiabin): Should we have other constructor here?
-  virtual ~GradNodeBase() { VLOG(6) << "Destruct GradNodeBase"; }
+  virtual ~GradNodeBase() { VLOG(7) << "Destruct GradNodeBase"; }
 
   /**
    * operator() designed to contian the real backward execution logic, it should
@@ -255,14 +255,14 @@ class GradNodeBase {
 
   std::map<int64_t, std::tuple<size_t, size_t, std::shared_ptr<TensorHook>>>
   GetGradientHookFuntions() {
-    VLOG(6) << "GetGradientHookFuntions ";
+    VLOG(7) << "GetGradientHookFuntions ";
     return gradient_hooks_;
   }
 
   void SetGradientHookFuntions(
       std::map<int64_t, std::tuple<size_t, size_t, std::shared_ptr<TensorHook>>>
           hooks) {
-    VLOG(6) << "SetGradientHookFuntions ";
+    VLOG(7) << "SetGradientHookFuntions ";
     gradient_hooks_ = hooks;
   }
 
diff --git a/paddle/fluid/eager/grad_tensor_holder.cc b/paddle/fluid/eager/grad_tensor_holder.cc
index afd9e4ef865..14a8c26f9dc 100644
--- a/paddle/fluid/eager/grad_tensor_holder.cc
+++ b/paddle/fluid/eager/grad_tensor_holder.cc
@@ -143,7 +143,7 @@ void GradTensorHolder::add(size_t slot_id,
     if (t.is_dense_tensor()) {
       if (buffer_tensor.is_dense_tensor()) {
         if (create_graph || t.is_custom_device()) {
-          buffer_tensor = add_dygraph_function(t, buffer_tensor);
+          buffer_tensor = add_ad_func(t, buffer_tensor);
         } else {
           paddle::imperative::TensorAdd<paddle::experimental::Tensor>(
               t, &buffer_tensor);
@@ -170,7 +170,7 @@ void GradTensorHolder::add(size_t slot_id,
             std::make_shared<phi::DenseTensor>(
                 buffer_sparse->non_zero_elements()));
         if (create_graph || t.is_custom_device()) {
-          buffer_values = add_dygraph_function(t_values, buffer_values);
+          buffer_values = add_ad_func(t_values, buffer_values);
         } else {
           paddle::imperative::TensorAdd<paddle::experimental::Tensor>(
               t_values, &buffer_values);
diff --git a/paddle/fluid/eager/tensor_wrapper.h b/paddle/fluid/eager/tensor_wrapper.h
index a6fd57ac6a4..e7994e38829 100644
--- a/paddle/fluid/eager/tensor_wrapper.h
+++ b/paddle/fluid/eager/tensor_wrapper.h
@@ -100,10 +100,10 @@ class TensorWrapper {
 
     std::shared_ptr<GradNodeBase> new_grad_node = weak_grad_node_.lock();
     if (new_grad_node) {
-      VLOG(3) << "Recovered TensorWrapper with GradNode "
+      VLOG(7) << "Recovered TensorWrapper with GradNode "
               << new_grad_node->name() << " addr: " << new_grad_node.get();
     } else {
-      VLOG(3) << "Recovered TensorWrapper with Empty GradNode";
+      VLOG(7) << "Recovered TensorWrapper with Empty GradNode";
     }
     auto* intermediate_autograd_meta =
         EagerUtils::nullable_autograd_meta(intermidiate_tensor_);
@@ -129,7 +129,7 @@ class TensorWrapper {
  private:
   void check_inplace_version() {
     if (no_need_buffer_) {
-      VLOG(6) << "There's no need to check inplace_version because "
+      VLOG(7) << "There's no need to check inplace_version because "
                  "no_need_buffer_ is true.";
       return;
     }
@@ -154,10 +154,10 @@ class TensorWrapper {
               intermidiate_tensor_.name(),
               tensor_version,
               wrapper_version_snapshot));
-      VLOG(6) << " The wrapper_version_snapshot of Tensor '"
+      VLOG(7) << " The wrapper_version_snapshot of Tensor '"
               << intermidiate_tensor_.name() << "' is [ "
               << wrapper_version_snapshot << " ]";
-      VLOG(6) << " The tensor_version of Tensor '"
+      VLOG(7) << " The tensor_version of Tensor '"
               << intermidiate_tensor_.name() << "' is [ " << tensor_version
               << " ]";
     }
diff --git a/paddle/fluid/eager/tests/performance_tests/benchmark_utils.cc b/paddle/fluid/eager/tests/performance_tests/benchmark_utils.cc
index 144ceab1e49..515def46b64 100644
--- a/paddle/fluid/eager/tests/performance_tests/benchmark_utils.cc
+++ b/paddle/fluid/eager/tests/performance_tests/benchmark_utils.cc
@@ -77,7 +77,7 @@ void benchmark_eager_matmul(const paddle::experimental::Tensor& X,
 
   size_t max_num_runs = accuracy_check ? 2 : max_num_benchmark_runs;
   for (size_t i = 0; i < max_num_runs; i++) {
-    input_tensor0 = matmul_dygraph_function(input_tensor0, Y, false, false);
+    input_tensor0 = matmul_ad_func(input_tensor0, Y, false, false);
   }
 
   std::vector<paddle::experimental::Tensor> target_tensors = {input_tensor0};
diff --git a/paddle/fluid/eager/to_static/run_program_op_func.h b/paddle/fluid/eager/to_static/run_program_op_func.h
index 33da489fd47..23ba88c8898 100644
--- a/paddle/fluid/eager/to_static/run_program_op_func.h
+++ b/paddle/fluid/eager/to_static/run_program_op_func.h
@@ -54,7 +54,7 @@ static void clear_no_grad_edges_with_partial_block(
   }
 }
 
-inline void run_program_dygraph_function(
+inline void run_program_ad_func(
     const std::vector<paddle::experimental::Tensor>& x,
     const std::vector<paddle::experimental::Tensor>& params,
     std::vector<paddle::experimental::Tensor*>& out,     // NOLINT
diff --git a/paddle/fluid/eager/utils.cc b/paddle/fluid/eager/utils.cc
index 33e2c84099e..777929bbc75 100644
--- a/paddle/fluid/eager/utils.cc
+++ b/paddle/fluid/eager/utils.cc
@@ -296,7 +296,7 @@ void EagerUtils::HandleViewBetweenInputAndOutput(
     view_output_dense_tensor->ShareInplaceVersionCounterWith(
         *input_dense_tensor);
 
-    VLOG(3) << "Perform View between Output Tensor("
+    VLOG(4) << "Perform View between Output Tensor("
             << view_output_tensor->name() << ") and Input Tensor("
             << input_tensor.name()
             << "), share allocation and inplace version.";
@@ -409,7 +409,7 @@ std::vector<paddle::experimental::Tensor> EagerUtils::RecoverTensorWrapper(
   }
   return ret;
 }
-
+// TODO(jiabin): remove all this when we fix all test using tmp grad
 void EagerUtils::CheckAndRetainGrad(
     const paddle::experimental::Tensor& tensor) {
   VLOG(6) << "Check RetainGradForTensor: " << tensor.name();
diff --git a/paddle/fluid/eager/utils.h b/paddle/fluid/eager/utils.h
index a42b1187718..24e91386662 100644
--- a/paddle/fluid/eager/utils.h
+++ b/paddle/fluid/eager/utils.h
@@ -230,6 +230,7 @@ class EagerUtils {
       const std::vector<paddle::experimental::Tensor>& tensors);
   static void CheckAndRetainGrad(
       const std::vector<paddle::experimental::Tensor*>& tensors);
+
   static std::shared_ptr<egr::GradNodeBase> GetGradAccumulationNode(
       const paddle::experimental::Tensor& tensor);
 
@@ -246,6 +247,184 @@ class EagerUtils {
   static void FillZeroForEmptyGradInput(
       std::vector<paddle::experimental::Tensor>* in_grads,
       const std::vector<GradSlotMeta>& grad_in_metas);
+  /**
+   * Print Input Output (level 0 means least info, level 2 means most info)
+   * **/
+  static const std::string TensorStr(const paddle::experimental::Tensor& t) {
+    std::string tensor_name_str = "";
+    if (t.name() == "") {
+      tensor_name_str = "None";
+    } else {
+      tensor_name_str = t.name();
+    }
+    const char* TENSOR_INFO_TEMPLATE =
+        "{ Type: [ \"%s\" ], Dtype:[ \"%s\" ], Place:[ \"%s\" ] }";
+    std::string tensor_info_str = "";
+    if (t.defined()) {
+      if (t.initialized()) {
+        tensor_info_str += paddle::string::Sprintf(TENSOR_INFO_TEMPLATE,
+                                                   t.impl()->type_info().name(),
+                                                   t.dtype(),
+                                                   t.place().DebugString());
+      } else {
+        tensor_info_str += paddle::string::Sprintf(TENSOR_INFO_TEMPLATE,
+                                                   t.impl()->type_info().name(),
+                                                   "Unknown",
+                                                   "Unknown");
+      }
+    } else {
+      tensor_info_str += "Unknown";
+    }
+    if (VLOG_IS_ON(6)) {
+      const char* TENSOR_PRINT_TEMPLATE =
+          "{ Name:[ \"%s\" ], Initialized: [ \"%d\" ], Ptr: [ \"%d\" ] "
+          "TensorInfo: [ \"%s\" ], ADInfo:[ \"%s\" ] }";
+      auto* ad_meta = nullable_autograd_meta(t);
+      if (!ad_meta && !(ad_meta->WeakGrad().lock().get())) {
+        std::string ad_info_str = "";
+        const char* AD_INFO_TEMPLATE =
+            "{ Grad: [ \"%s\" ],  GradNode: [ %s ], StopGradient: [ %d ] }";
+        ad_info_str += paddle::string::Sprintf(AD_INFO_TEMPLATE,
+                                               TensorStr(ad_meta->Grad()),
+                                               GradNodeStr(t),
+                                               ad_meta->StopGradient());
+        return paddle::string::Sprintf(TENSOR_PRINT_TEMPLATE,
+                                       tensor_name_str,
+                                       t.initialized(),
+                                       t.impl(),
+                                       tensor_info_str,
+                                       ad_info_str);
+      } else {
+        return paddle::string::Sprintf(TENSOR_PRINT_TEMPLATE,
+                                       tensor_name_str,
+                                       t.initialized(),
+                                       t.impl(),
+                                       tensor_info_str,
+                                       "None");
+      }
+    } else if (VLOG_IS_ON(5)) {
+      const char* TENSOR_PRINT_TEMPLATE =
+          "{ Name:[ \"%s\" ], Initialized: [ \"%d\" ], Ptr: [ \"%d\" ] "
+          "TensorInfo: [ \"%s\" ] }";
+      return paddle::string::Sprintf(TENSOR_PRINT_TEMPLATE,
+                                     tensor_name_str,
+                                     t.initialized(),
+                                     t.impl(),
+                                     tensor_info_str);
+    } else if (VLOG_IS_ON(4)) {
+      const char* TENSOR_PRINT_TEMPLATE =
+          "{ Name:[ \"%s\" ], Initialized: [ \"%d\" ], Ptr: [ \"%d\" ] }";
+      return paddle::string::Sprintf(
+          TENSOR_PRINT_TEMPLATE, tensor_name_str, t.initialized(), t.impl());
+    } else {
+      return "[ Not specified tensor log level ]";
+    }
+  }
+
+  static const std::string GradNodeStr(const egr::GradNodeBase& node) {
+    if (VLOG_IS_ON(6)) {
+      const char* GRAD_NODE_TEMPLATE =
+          " { BackwardOutMeta: [ %s ], BackwardInMeta: [ %s ] }";
+      const char* GRAD_SLOT_META_TEMPLATE = " {SlotSize: [%d]: %s} ";
+      const char* SLOT_INFO_TEMPLATE =
+          " {SlotID: [\"%s\"], StopGradients: [ %s ], Edges[ %s ] }";
+      auto out_metas = node.OutputMeta();
+      auto in_metas = node.InputMeta();
+      std::string out_slot_str = "";
+      std::string in_slot_str = "";
+      const char* EDGE_INFO_TEMPLATE = " { [%d, %d]: [%s, %s] }, ";
+      std::string slot_str = "";
+      for (size_t i = 0; i < out_metas.size(); i++) {
+        std::string edges_str = "";
+        std::string sg_str = "";
+        for (const GradSlotMeta& meta : out_metas[i]) {
+          const egr::Edge& edge = meta.GetEdge();
+          if (edge.IsInitialized()) {
+            edges_str += paddle::string::Sprintf(EDGE_INFO_TEMPLATE,
+                                                 edge.GetEdgeRankInfo().first,
+                                                 edge.GetEdgeRankInfo().second,
+                                                 edge.GetGradNode(),
+                                                 edge.GetGradNode()->name());
+          } else {
+            edges_str += paddle::string::Sprintf("{ NULL Edge }");
+          }
+          sg_str += meta.IsStopGradient() ? "1, " : "0, ";
+        }
+        out_slot_str +=
+            paddle::string::Sprintf(SLOT_INFO_TEMPLATE, i, sg_str, edges_str);
+      }
+      std::string out_meta_str = paddle::string::Sprintf(
+          GRAD_SLOT_META_TEMPLATE, out_metas.size(), out_slot_str);
+
+      for (size_t i = 0; i < in_metas.size(); i++) {
+        std::string edges_str = "";
+        std::string sg_str = "";
+        for (const GradSlotMeta& meta : in_metas[i]) {
+          edges_str += paddle::string::Sprintf("{ NULL Edge }");
+          sg_str += meta.IsStopGradient() ? "1, " : "0, ";
+        }
+        in_slot_str +=
+            paddle::string::Sprintf(SLOT_INFO_TEMPLATE, i, sg_str, edges_str);
+      }
+      std::string in_meta_str =
+          paddle::string::Sprintf(GRAD_SLOT_META_TEMPLATE, in_slot_str);
+      return paddle::string::Sprintf(
+          GRAD_NODE_TEMPLATE, out_meta_str, in_meta_str);
+    } else if (VLOG_IS_ON(5)) {
+      const char* GRAD_NODE_TEMPLATE =
+          " { BackwardOutMeta: [ %s ], BackwardInMeta: [ %s ] }";
+      const char* GRAD_SLOT_META_TEMPLATE = "SlotSize: [\"%d\"]";
+      std::string out_meta_str = paddle::string::Sprintf(
+          GRAD_SLOT_META_TEMPLATE, node.OutputMeta().size());
+      std::string in_meta_str = paddle::string::Sprintf(
+          GRAD_SLOT_META_TEMPLATE, node.InputMeta().size());
+      return paddle::string::Sprintf(
+          GRAD_NODE_TEMPLATE, out_meta_str, in_meta_str);
+    } else {
+      return "[ Not specified grad node log level. ] ";
+    }
+  }
+
+  static const std::string GradNodeStr(const paddle::experimental::Tensor& t) {
+    auto* ad_meta = nullable_autograd_meta(t);
+    if (ad_meta && !(ad_meta->GetMutableGradNode().get())) {
+      return GradNodeStr((*ad_meta->GetMutableGradNode().get()));
+    } else {
+      return "None";
+    }
+  }
+
+  static const std::string TensorStr(
+      const std::vector<paddle::experimental::Tensor>& tensors) {
+    std::string tensors_str = "";
+    for (const auto& tensor : tensors) {
+      tensors_str += TensorStr(tensor) + ", ";
+    }
+    return "[ " + tensors_str + " ]";
+  }
+
+  static const std::string TensorStr(
+      const paddle::optional<paddle::experimental::Tensor>& t) {
+    if (!t.is_initialized()) {
+      return "{ UnDefinedTensor }";
+    } else {
+      return TensorStr((*t.get_ptr()));
+    }
+  }
+
+  static const std::string TensorStr(
+      const paddle::optional<std::vector<paddle::experimental::Tensor>>&
+          tensors) {
+    std::string tensors_str = "";
+    if (!tensors.is_initialized()) {
+      return "[ UnDefinedTensor List ]";
+    } else {
+      for (const auto& tensor : (*tensors.get_ptr())) {
+        tensors_str += TensorStr(tensor) + ", ";
+      }
+      return "[ " + tensors_str + " ]";
+    }
+  }
 };
 
 }  // namespace egr
diff --git a/paddle/fluid/pybind/eager_custom_python_api.h b/paddle/fluid/pybind/eager_custom_python_api.h
index 1bb8fdd9360..85afc274623 100644
--- a/paddle/fluid/pybind/eager_custom_python_api.h
+++ b/paddle/fluid/pybind/eager_custom_python_api.h
@@ -30,13 +30,13 @@ static PyObject *eager_api_linear(PyObject *self,
     auto bias = GetTensorFromArgs("linear", "Bias", args, 2, true);
     tstate = PyEval_SaveThread();
     if (bias.initialized()) {
-      auto mm_out = matmul_dygraph_function(x, weight, false, false);
-      auto out = add_dygraph_function(mm_out, bias);
+      auto mm_out = matmul_ad_func(x, weight, false, false);
+      auto out = add_ad_func(mm_out, bias);
       PyEval_RestoreThread(tstate);
       tstate = nullptr;
       return ToPyObject(out);
     } else {
-      auto mm_out = matmul_dygraph_function(x, weight, false, false);
+      auto mm_out = matmul_ad_func(x, weight, false, false);
       PyEval_RestoreThread(tstate);
       tstate = nullptr;
       return ToPyObject(mm_out);
diff --git a/paddle/fluid/pybind/eager_legacy_custom_python_api.h b/paddle/fluid/pybind/eager_legacy_custom_python_api.h
index 7ed58a1e956..c599346bdb7 100644
--- a/paddle/fluid/pybind/eager_legacy_custom_python_api.h
+++ b/paddle/fluid/pybind/eager_legacy_custom_python_api.h
@@ -38,7 +38,7 @@ static PyObject *eager_api_run_program(PyObject *self,
         "run_program", args, 6, PyTuple_GET_SIZE(args), attrs);
 
     tstate = PyEval_SaveThread();
-    run_program_dygraph_function(X, Params, Out, OutScope, DOut, attrs);
+    run_program_ad_func(X, Params, Out, OutScope, DOut, attrs);
     PyEval_RestoreThread(tstate);
     tstate = nullptr;
     Py_RETURN_NONE;
diff --git a/paddle/fluid/pybind/eager_method.cc b/paddle/fluid/pybind/eager_method.cc
index 94756a41f85..151d1ad2114 100644
--- a/paddle/fluid/pybind/eager_method.cc
+++ b/paddle/fluid/pybind/eager_method.cc
@@ -808,14 +808,14 @@ static PyObject* tensor__getitem_index_not_tensor(TensorObject* self,
                                            decrease_axis.end());
 
     if (op_type == "slice") {
-      out = slice_dygraph_function(self->tensor,
-                                   slice_axes_tmp,
-                                   slice_starts,
-                                   slice_ends,
-                                   infer_flags_tmp,
-                                   decrease_axis_tmp);
+      out = slice_ad_func(self->tensor,
+                          slice_axes_tmp,
+                          slice_starts,
+                          slice_ends,
+                          infer_flags_tmp,
+                          decrease_axis_tmp);
     } else if (op_type == "strided_slice") {
-      out = strided_slice_dygraph_function(
+      out = strided_slice_ad_func(
           self->tensor, slice_axes, slice_starts, slice_ends, slice_strides);
     } else {
       PADDLE_THROW(platform::errors::InvalidArgument(
@@ -854,7 +854,7 @@ static PyObject* tensor__getitem_index_not_tensor(TensorObject* self,
       }
 
       paddle::experimental::Tensor new_out;
-      new_out = unsqueeze_dygraph_function(out, none_axes);
+      new_out = unsqueeze_ad_func(out, none_axes);
       return ToPyObject(new_out);
     }
   }
@@ -870,7 +870,7 @@ static PyObject* tensor__getitem_index_not_tensor(TensorObject* self,
     paddle::framework::TensorFromVector(
         list_select_idxs, *dev_ctx, idx_tensor.get());
     framework::AttributeMap attrs = {{"dim", 0}};
-    out = index_select_dygraph_function(self->tensor, select_index, 0);
+    out = index_select_ad_func(self->tensor, select_index, 0);
   }
 
   return ToPyObject(out);
-- 
GitLab