From 3da97b4567fc45f66e982dc89416b2522e9c5b0a Mon Sep 17 00:00:00 2001
From: Chen Weihang <chenweihang@baidu.com>
Date: Fri, 6 May 2022 16:49:48 +0800
Subject: [PATCH] [Eager] Polish generated code details (#42512)

* polish code details

* remove needless prefix

* revert needless change

* polish grad func generated format
---
 .../final_state_generator/codegen_utils.py    |   2 +-
 .../final_state_generator/eager_gen.py        | 206 ++++++++----------
 .../final_state_generator/python_c_gen.py     |  30 +--
 paddle/fluid/eager/autograd_meta.h            |  41 ++--
 paddle/fluid/pybind/eager.cc                  |  10 +-
 paddle/fluid/pybind/eager.h                   |   4 +-
 6 files changed, 119 insertions(+), 174 deletions(-)

diff --git a/paddle/fluid/eager/auto_code_generator/final_state_generator/codegen_utils.py b/paddle/fluid/eager/auto_code_generator/final_state_generator/codegen_utils.py
index 8c98d9fa27..448fa54625 100644
--- a/paddle/fluid/eager/auto_code_generator/final_state_generator/codegen_utils.py
+++ b/paddle/fluid/eager/auto_code_generator/final_state_generator/codegen_utils.py
@@ -178,7 +178,7 @@ def GetForwardFunctionName(string):
 
 
 def GetIndent(num):
-    tab = "   "
+    tab = "  "
     return "".join([tab for i in range(num)])
 
 
diff --git a/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py b/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py
index 00b9aa7a23..9f48cce878 100644
--- a/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py
+++ b/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py
@@ -55,58 +55,49 @@ def ParseArguments():
 ## Code Gen Templates ##
 ########################
 SET_PLAIN_TENSOR_WRAPPER_TEMPLATE = \
-"""
-   void SetTensorWrapper{}(const paddle::experimental::Tensor& {}, bool full_reserved) {{
-     {} = egr::TensorWrapper({}, full_reserved, {});
-   }}
+"""  void SetTensorWrapper{}(const paddle::experimental::Tensor& {}, bool full_reserved) {{
+    {} = egr::TensorWrapper({}, full_reserved, {});
+  }}
 """
 
 PLAIN_TENSOR_MEMBER_TEMPLATE = \
-"""
-       egr::TensorWrapper {};
+"""  egr::TensorWrapper {};
 """
 
 CLEAR_TENSOR_WRAPPER_TEMPLATE = \
-"""
-       {}.clear();
+"""    {}.clear();
 """
 
 SET_VECTOR_TENSOR_WRAPPER_TEMPLATE = \
-"""
-       void SetTensorWrapper{}(const std::vector<paddle::experimental::Tensor>& {}, bool full_reserved) {{
-         for(const auto& eager_tensor : {}) {{
-            {}.emplace_back( egr::TensorWrapper(eager_tensor, full_reserved, {}) );
-         }};
-       }}
+"""  void SetTensorWrapper{}(const std::vector<paddle::experimental::Tensor>& {}, bool full_reserved) {{
+    for(const auto& eager_tensor : {}) {{
+      {}.emplace_back(egr::TensorWrapper(eager_tensor, full_reserved, {}));
+    }};
+  }}
 """
 
 VECTOR_TENSOR_MEMBER_TEMPLATE = \
-"""
-       std::vector<egr::TensorWrapper> {};
+"""  std::vector<egr::TensorWrapper> {};
 """
 
 CLEAR_VECTOR_TENSOR_WRAPPERS_TEMPLATE = \
-"""
-       for (auto& tw : {}) {{
-         tw.clear();
-       }}
+"""    for (auto& tw : {}) {{
+      tw.clear();
+    }}
 """
 
 SET_ATTR_METHOD_TEMPLATE = \
-"""
-       void SetAttribute{}({} {}) {{
-         {} = {};
-       }}
+"""  void SetAttribute{}({} {}) {{
+    {} = {};
+  }}
 """
 
 ATTRIBUTE_MEMBER_WITH_DEFAULT_TEMPLATE = \
-"""
-       {} {} = {};
+"""  {} {} = {};
 """
 
 ATTRIBUTE_MEMBER_TEMPLATE = \
-"""
-       {} {};
+"""  {} {};
 """
 
 NODE_DECLARATION_TEMPLATE = \
@@ -114,140 +105,114 @@ NODE_DECLARATION_TEMPLATE = \
 class {} : public egr::GradNodeBase {{
  public:
   {}() : egr::GradNodeBase() {{}}
-  {}(size_t bwd_in_slot_num, size_t bwd_out_slot_num) : 
+  {}(size_t bwd_in_slot_num, size_t bwd_out_slot_num) :
       egr::GradNodeBase(bwd_in_slot_num, bwd_out_slot_num) {{}}
   ~{}() override = default;
 
   virtual paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> operator()(
       paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize>& grads, bool create_graph = false, bool is_new_grad = false) override;
   std::string name() override {{ return \"{}\"; }}
-  
+
   void ClearTensorWrappers() override {{
-      {}
-      SetIsTensorWrappersCleared(true);
+{}
+    SetIsTensorWrappersCleared(true);
   }}
 
   std::shared_ptr<GradNodeBase> Copy() const override {{
-      auto copied_node = std::shared_ptr<{}>(new {}(*this));
-      
-      return copied_node;
+    auto copied_node = std::shared_ptr<{}>(new {}(*this));
+    return copied_node;
   }}
-  
+
   // SetTensorWrapperX, SetTensorWrapperY, ...
-  {}
+{}
   // SetAttributes
-  {}
-
+{}
  private:
   // TensorWrappers
-  {}
-
+{}
   // Attributes
-  {}
-}};
+{}}};
 """
 
 GRAD_FUNCTION_TEMPLATE = \
 """
 paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> {}::operator()(paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize>& grads, bool create_graph, bool is_new_grad) {{
-    // Fill Zero For GradIn Tensors
+  // Fill Zero For GradIn Tensors
 {}
+  // Apply Gradient Hooks
+  auto hooked_grads = ApplyGradientHooks(grads);
 
-    // Apply Gradient Hooks
-    auto hooked_grads = ApplyGradientHooks(grads);
-    
-    // Collect GradIn Tensors, Attrs and Recovered TensorWrappers
+  // Collect GradIn Tensors, Attrs and Recovered TensorWrappers
 {}
 
-    // Call grad_api function
-    VLOG(3) << \"Final State Running: \" << \"{}\"; 
+  // Call grad_api function
+  VLOG(3) << \"Final State Running: \" << \"{}\";
 {}
 
-    // Get Output
+  // Get Output
 {}
-
-    // Get GradIn autograd_meta
+  // Get GradIn autograd_meta
 {}
-
-    // Get GradOut autograd_meta
+  // Get GradOut autograd_meta
 {}
-    
-    // Compute Require Grad
+  // Compute Require Grad
 {}
-    
-    // Create Grad Node
+  // Create Grad Node
 {}
-
-    // Return 
+  // Return
 {}
-
 }}
 """
 
 FORWARD_FUNCTION_TEMPLATE = \
 """
 {} {}({}) {{
-    // Dygraph Record Event
+  // Dygraph Record Event
 {}
-    // AMP Logic
+  // AMP Logic
 {}
-    
-    // Get Input AutoGradMeta
+  // Get Input AutoGradMeta
 {}
-    // Set Device Id
-    auto place = egr::Controller::Instance().GetExpectedPlace();
-    if (paddle::platform::is_gpu_place(place)) {{
-#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
-      phi::backends::gpu::SetDeviceId(place.device);
-#else
-      PADDLE_THROW(paddle::platform::errors::PreconditionNotMet(
-        "PaddlePaddle should compile with GPU if use CUDAPlace."));
-#endif
-    }}
-    // Forward API Call
-    VLOG(3) << \"Final State Running: \" << \"{}\"; 
+  // Forward API Call
+  VLOG(3) << \"Final State Running: \" << \"{}\";
 {}
-    // Get Outputs
+  // Get Outputs
 {}
-    // Get Output AutoGradMeta
-{}
-    bool trace_backward = egr::Controller::Instance().HasGrad();
-    bool require_any_grad = egr::EagerUtils::ComputeRequireGrad({});
-    
-    // Check Inplace & Bump Inplace Version
-{}
-{}
-    // Node Creation
+  // Get Output AutoGradMeta
 {}
+  bool trace_backward = egr::Controller::Instance().HasGrad();
+  bool require_any_grad = egr::EagerUtils::ComputeRequireGrad({});
 
-    // Returns
-    return {};
+  // Check Inplace if needed
+{}{}
+  // Node Creation
+{}
+  // Returns
+  return {};
 }}
-
 """
 
 FORWARD_BODY_TEMPLATE = \
-"""
-    if(require_any_grad) {{
+"""  if(require_any_grad) {{
 {}
-      egr::EagerUtils::PassStopGradient({});
-            
-      // Node Construction
+    egr::EagerUtils::PassStopGradient({});
+
+    // Node Construction
 {}
-      // SetAttributes
+    // SetAttributes if needed
 {}
-      // Set TensorWrappers for Forward Inputs
+    // Set TensorWrappers for Forward Inputs if needed
 {}
-      // SetGradOutMeta & SetEdges
+    // SetGradOutMeta & SetEdges
 {}
-      // SetOutRank & SetHistory & SetGradInMeta & RetainGrad
+    // SetOutRank & SetHistory & SetGradInMeta & RetainGrad
 {}
 {}
 {}
 {}
-      // Set TensorWrappers for Forward Outputs
+    // Set TensorWrappers for Forward Outputs if needed
 {}
-    }}
+  }}
 """
 
 NAMESPACE_WRAPPER_TEMPLATE = \
@@ -340,30 +305,29 @@ extern std::unordered_map<std::string, std::vector<std::string>> core_ops_final_
 
 CHECK_INPLACE_TEMPLATE = \
 """
-    egr::EagerUtils::CheckInplace({}, {}, require_any_grad);\n
+  egr::EagerUtils::CheckInplace({}, {}, require_any_grad);
 """
 
 BUMP_INPLACE_VERSION_TEMPLATE = \
 """
-    // Bump Inplace Version
-    {}.bump_inplace_version();
-    VLOG(3) << \"Tensor(\" << {}.name() << \") uses Inplace Strategy.\";\n
+  // Bump Inplace Version
+  {}.bump_inplace_version();
+  VLOG(3) << \"Tensor(\" << {}.name() << \") uses Inplace Strategy.\";
 """
 
 AMP_LOGIC_TEMPLATE = \
-"""
-    if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {{
-        VLOG(5) << "Check and Prepare For AMP";
-        {}
-        paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = {};
-        {}
-        {}
-        {}
-        {{
-            paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
-            {}
-        }}
+"""  if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {{
+    VLOG(5) << "Check and Prepare For AMP";
+    {}
+    paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = {};
+    {}
+    {}
+    {}
+    {{
+      paddle::imperative::AutoCastGuard guard(egr::Controller::Instance().GetCurrentTracer(), paddle::imperative::AmpLevel::O0);
+      {}
     }}
+  }}
 """
 
 CREATE_PLAIN_OPTIONAL_TENSOR_TEMPLATE = \
@@ -1045,7 +1009,7 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase):
         self.GenerateNodeCreationCodes()
 
         node_creation_str = self.node_creation_str
-        dygraph_event_str = f"{indent}paddle::platform::RecordEvent dygraph_entrance_record_event(\"{forward_api_name} dygraph\", paddle::platform::TracerEventType::Operator, 1);"
+        dygraph_event_str = f"{indent}paddle::platform::RecordEvent dygraph_entrance_record_event(\"{forward_api_name} dygraph\", paddle::platform::TracerEventType::Operator, 1);\n"
         forward_function_name = GetDygraphForwardFunctionName(forward_api_name)
 
         # Forward amp logic
@@ -1055,8 +1019,8 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase):
         amp_tensors_vector_optional_list_str = "".join(
             amp_tensors_vector_optional_list)
         amp_get_dst_dtype_str = f"auto amp_dst_dtype = egr::GetAmpDestDtype(op_name, amp_tensors_vector);\n"
-        amp_autocast_list_str = "        ".join(
-            amp_autocast_list) + "        " + "        ".join(
+        amp_autocast_list_str = "    ".join(
+            amp_autocast_list) + "    " + "    ".join(
                 amp_autocast_optional_list)
         amp_inputs_call_args_str = ", ".join(amp_inputs_call_list)
         amp_call_str = f"return {forward_function_name}({amp_inputs_call_args_str});"
diff --git a/paddle/fluid/eager/auto_code_generator/final_state_generator/python_c_gen.py b/paddle/fluid/eager/auto_code_generator/final_state_generator/python_c_gen.py
index 7ca5fc833e..3608fe7e40 100644
--- a/paddle/fluid/eager/auto_code_generator/final_state_generator/python_c_gen.py
+++ b/paddle/fluid/eager/auto_code_generator/final_state_generator/python_c_gen.py
@@ -66,12 +66,13 @@ PARSE_PYTHON_C_TENSORS_TEMPLATE = \
 
 
 PARSE_PYTHON_C_ARGS_TEMPLATE = \
-"""    PyObject* {}_obj = PyTuple_GET_ITEM(args, {});\n
-     {} {} = {}({}_obj, \"{}\", {});\n"""
+"""    PyObject* {}_obj = PyTuple_GET_ITEM(args, {});
+    {} {} = {}({}_obj, \"{}\", {});
+"""
 
 
 RECORD_EVENT_TEMPLATE = \
-"    paddle::platform::RecordEvent {}(\"{} {}\", paddle::platform::TracerEventType::Operator, 1);"
+"paddle::platform::RecordEvent {}(\"{} {}\", paddle::platform::TracerEventType::Operator, 1);"
 
 
 RETURN_INPLACE_PYOBJECT_TEMPLATE = \
@@ -84,33 +85,27 @@ RETURN_INPLACE_PYOBJECT_TEMPLATE = \
 
 PYTHON_C_FUNCTION_TEMPLATE = \
 """
-static PyObject * eager_final_state_api_{}(PyObject *self, PyObject *args, PyObject *kwargs)
-{{
+static PyObject * eager_final_state_api_{}(PyObject *self, PyObject *args, PyObject *kwargs) {{
   {}
 
   PyThreadState *tstate = nullptr;
-  try
-  {{
+  try {{
     VLOG(6) << "Running Eager Final State API: {}";
 
     // Get EagerTensors from args
 {}
-
-    // Parse Attributes
+    // Parse Attributes if needed
 {}
-
     tstate = PyEval_SaveThread();
 
     // Set Device ID
 {}
-    
     auto out = {}({});
-    
+
     PyEval_RestoreThread(tstate);
     tstate = nullptr;
 {}
-  }}
-  catch(...) {{
+  }} catch(...) {{
     if (tstate) {{
       PyEval_RestoreThread(tstate);
     }}
@@ -118,13 +113,10 @@ static PyObject * eager_final_state_api_{}(PyObject *self, PyObject *args, PyObj
     return nullptr;
   }}
 }}
-
 """
 
 FUNCTION_SET_DEVICE_TEMPLATE = \
-"""
-    {}
-    if (paddle::platform::is_gpu_place(place)) {{
+"""{}    if (paddle::platform::is_gpu_place(place)) {{
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
       phi::backends::gpu::SetDeviceId(place.device);
       VLOG(1) <<"CurrentDeviceId: " << phi::backends::gpu::GetCurrentDeviceId() << " from " << (int)place.device;
@@ -309,7 +301,7 @@ class PythonCSingleFunctionGenerator(FunctionGeneratorBase):
                         "false")
 
         parse_attributes_str = ""
-        expected_place_str = "auto place = egr::Controller::Instance().GetExpectedPlace();\n"
+        expected_place_str = "    auto place = egr::Controller::Instance().GetExpectedPlace();\n"
 
         # Generate Python-C Attributes Parsing Logic
         for name, atype, _, pos in orig_forward_attrs_list:
diff --git a/paddle/fluid/eager/autograd_meta.h b/paddle/fluid/eager/autograd_meta.h
index 2241ccca81..eb5a9b5850 100644
--- a/paddle/fluid/eager/autograd_meta.h
+++ b/paddle/fluid/eager/autograd_meta.h
@@ -22,13 +22,10 @@ using AbstractAutogradMeta = paddle::experimental::AbstractAutogradMeta;
 /**
  *
  * AutogradMeta is what record the backward info for tensor. When we run
- * computation
- * graph eagerly, we can not build a static paddle program like static mode do,
- * so we
- * need a new method to record forward info to trace backward when we finish all
- * forward
- * computation. This require our AutogradMeta class record following main
- * members
+ * computation graph eagerly, we can not build a static paddle program like
+ * static mode do, so we need a new method to record forward info to trace
+ * backward when we finish all forward computation. This require our
+ * AutogradMeta class record following main members
  *
  * 1. grad_op:
  * Grad_op indicate the grad operation of the forward op
@@ -38,28 +35,24 @@ using AbstractAutogradMeta = paddle::experimental::AbstractAutogradMeta;
  * backward computation
  *
  * NOTE: grad should only be available when current tensor is a leaf tensor, and
- * for non-leaf
- * tensor grad is only available while user set `retain_grad` option as `true`.
+ * for non-leaf tensor grad is only available while user set `retain_grad`
+ * option as `true`.
  *
  * TODO(jiabin) : support hooks
  * 3. hooks:
  * Hooks are some computation logic which only attached with backward operation,
- * it registered
- * by user and run before accumulator.
+ * it registered by user and run before accumulator.
  *
- * 4.overrided_stop_gradient_
+ * 4. overrided_stop_gradient_
  * This member is used to finish some auto-prune related work, which indicate
- * user set stop_gradient
- * should overrided the result indicated by framework. All non-parameter
- * tensor's stop_gradient
- * properties should be true. We will pass stop_gradient when we find one who
- * need it.
+ * user set stop_gradient should overrided the result indicated by framework.
+ * All non-parameter tensor's stop_gradient properties should be true. We will
+ * pass stop_gradient when we find one who need it.
  *
  * NOTE: AutogradMeta is inherited from AbstractAutogradMeta which is defined
- * in tensor's deps,
- * we did this to avoid additional dependency on Autograd. In eager execution,
- * we will cast
- * AbstractAutogradMeta as AutogradMeta to use it.
+ * in tensor's deps, we did this to avoid additional dependency on Autograd.
+ * In eager execution, we will cast AbstractAutogradMeta as AutogradMeta to use
+ * it.
  *
  * **/
 
@@ -119,7 +112,7 @@ class AutogradMeta : public AbstractAutogradMeta {
     return std::make_pair(out_slot_id_, out_rank_);
   }
 
-  bool IsInitialized() { return grad_node_.get(); }
+  bool IsInitialized() const { return grad_node_.get(); }
 
   // TODO(jiabin): This may cause error, since -1 still can indication true;
   bool StopGradient() const { return stop_gradient_ != 0; }
@@ -140,7 +133,7 @@ class AutogradMeta : public AbstractAutogradMeta {
 
   void SetPersistable(bool persistable) { persistable_ = persistable; }
 
-  bool RetainGrads() { return retain_grads_; }
+  bool RetainGrads() const { return retain_grads_; }
 
   void SetRetainGrads(bool value) { retain_grads_ = value; }
 
@@ -156,7 +149,7 @@ class AutogradMeta : public AbstractAutogradMeta {
 
   /**
    * Why we need slot id here?
-   * Because in paddle most of our operators inputs and outputs
+   * Because in paddle most of operators, inputs and outputs
    * are assemble in form of {"slot name", vector<tensor>}.
    * So its better for us to set a slot id to fit this format. **/
   size_t out_slot_id_;
diff --git a/paddle/fluid/pybind/eager.cc b/paddle/fluid/pybind/eager.cc
index 6601c8e8e3..b94a3b0edc 100644
--- a/paddle/fluid/pybind/eager.cc
+++ b/paddle/fluid/pybind/eager.cc
@@ -111,11 +111,10 @@ void EmptyStringTensorInitializer(TensorObject* self, const std::string& name,
   // Note(zhoushunjie): Only support CPUPlace when create StringTensor
   auto actual_place = platform::CPUPlace();
   // Allocate memory
-  const auto string_allocator =
-      std::make_unique<paddle::experimental::DefaultAllocator>(actual_place);
-  const auto alloc = string_allocator.get();
+  paddle::experimental::DefaultAllocator string_allocator(actual_place);
   std::shared_ptr<phi::StringTensor> string_tensor =
-      std::make_shared<phi::StringTensor>(alloc, phi::StringTensorMeta{ddims});
+      std::make_shared<phi::StringTensor>(&string_allocator,
+                                          phi::StringTensorMeta{ddims});
   if (phi::product(ddims) > 0) {
     string_tensor->mutable_data(actual_place);
   }
@@ -184,8 +183,7 @@ void InitTensorWithTensor(TensorObject* self,
                           const std::string& name) {
   self->tensor.set_name(name);
   if (place == src.place()) {
-    auto impl = std::static_pointer_cast<phi::DenseTensor>(src.impl());
-    self->tensor.set_impl(impl);
+    self->tensor.set_impl(src.impl());
     VLOG(4) << "Same place, do ShareDataWith";
   } else {
     self->tensor.set_impl(src.copy_to(place, true).impl());
diff --git a/paddle/fluid/pybind/eager.h b/paddle/fluid/pybind/eager.h
index 84df71ddee..a3eac7ab47 100644
--- a/paddle/fluid/pybind/eager.h
+++ b/paddle/fluid/pybind/eager.h
@@ -27,9 +27,7 @@ typedef struct {
 } TensorObject;
 
 typedef struct {
-  PyObject_HEAD
-
-      PyObject* container;
+  PyObject_HEAD PyObject* container;
   PyObject* non_differentiable;
   PyObject* dirty_tensors;
   bool materialize_grads;
-- 
GitLab