From 3ffcd693a4c79530b5dc1c6168c2c3cbf7cf056f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=A7=9C=E6=B0=B8=E4=B9=85?=
 <34344716+yjjiang11@users.noreply.github.com>
Date: Fri, 30 Dec 2022 10:29:59 +0800
Subject: [PATCH] Yj/rm legacy part 0 (#49424)

* rm legacy

* clear in_legacy

* fix tracer
---
 python/paddle/common_ops_import.py            |  1 -
 python/paddle/fluid/dygraph/tracer.py         |  2 +-
 python/paddle/fluid/framework.py              | 99 ++++++-------------
 .../fluid/layers/layer_function_generator.py  |  1 -
 .../paddle/fluid/tests/unittests/op_test.py   |  6 +-
 python/paddle/framework/__init__.py           |  1 -
 python/paddle/nn/functional/norm.py           | 85 +++++++---------
 7 files changed, 71 insertions(+), 124 deletions(-)

diff --git a/python/paddle/common_ops_import.py b/python/paddle/common_ops_import.py
index 1c2bb424dc1..91a3f49cdbb 100644
--- a/python/paddle/common_ops_import.py
+++ b/python/paddle/common_ops_import.py
@@ -24,7 +24,6 @@ from paddle.fluid.framework import (  # noqa: F401
     OpProtoHolder,
     Variable,
     _dygraph_tracer,
-    _in_legacy_dygraph,
     _non_static_mode,
     _varbase_creator,
     convert_np_dtype_to_dtype_,
diff --git a/python/paddle/fluid/dygraph/tracer.py b/python/paddle/fluid/dygraph/tracer.py
index e13fdac0e73..74826c9a6bc 100644
--- a/python/paddle/fluid/dygraph/tracer.py
+++ b/python/paddle/fluid/dygraph/tracer.py
@@ -306,7 +306,7 @@ class Tracer(core.Tracer):
         stop_gradient=False,
         inplace_map=None,
     ):
-        if not framework._in_legacy_dygraph():
+        if framework.in_dygraph_mode():
             # inputs : {"sum": [tensor], ...}
             # outputs : {"sum": [tensor], ...}
             if type in name_mapping.keys():
diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py
index d56dbde378a..a2ae6927db4 100644
--- a/python/paddle/fluid/framework.py
+++ b/python/paddle/fluid/framework.py
@@ -98,11 +98,10 @@ _dy2st_enable_standalone_executor_ = os.environ.get(
 # 2. dygraph_mode():
 # This flags inidicates we are now running in dygraph mode which called eager mode before.
 # 3. _in_legacy_dygraph():
-# This flags inidicates we are now running in legacy dygraph mode
+# This flags has been deprecated
 #
 # They have a relation ship as below:
-# Both dygraph_mode and _in_legacy_dygraph are _non_static_mode, but if you are running in
-# dygraph mode means you are not in _in_legacy_dygraph.
+# Since _in_legacy_graph is deprecated, so dygraph_mode is _non_static_mode
 #
 # Why we have to make different of _in_legacy_dygraph and dygraph_mode?
 # In some performance issue, we find that python if statement cause server performance problem
@@ -237,10 +236,6 @@ def in_dygraph_mode():
     return (_dygraph_tracer_ is not None) and _in_eager_mode_
 
 
-def _in_legacy_dygraph():
-    return (not _in_eager_mode_) and (_dygraph_tracer_ is not None)
-
-
 def _non_static_mode():
     return _dygraph_tracer_ is not None
 
@@ -1334,8 +1329,6 @@ class VariableMetaClass(type):
         if in_dygraph_mode():
             return issubclass(t, core.eager.Tensor)
         else:
-            if _in_legacy_dygraph():
-                return issubclass(t, core.VarBase)
             return issubclass(t, Variable)
 
 
@@ -1346,8 +1339,6 @@ class ParameterMetaClass(VariableMetaClass):
         if in_dygraph_mode():
             return issubclass(t, EagerParamBase)
         else:
-            if _in_legacy_dygraph():
-                return issubclass(t, ParamBase)
             return issubclass(t, Parameter)
 
 
@@ -3893,31 +3884,18 @@ class Block:
                     error_clip=error_clip,
                 )
             else:
-                if _in_legacy_dygraph():
-                    var = ParamBase(
-                        d.shape(),
-                        d.dtype(),
-                        type=orig_var_type,
-                        name=new_name,
-                        stop_gradient=stop_gradient,
-                        trainable=trainable,
-                        optimize_attr=optimize_attr,
-                        regularizer=regularizer,
-                        error_clip=error_clip,
-                    )
-                else:
-                    var = Parameter(
-                        self,
-                        d.shape(),
-                        d.dtype(),
-                        type=orig_var_type,
-                        name=new_name,
-                        stop_gradient=stop_gradient,
-                        trainable=trainable,
-                        optimize_attr=optimize_attr,
-                        regularizer=regularizer,
-                        error_clip=error_clip,
-                    )
+                var = Parameter(
+                    self,
+                    d.shape(),
+                    d.dtype(),
+                    type=orig_var_type,
+                    name=new_name,
+                    stop_gradient=stop_gradient,
+                    trainable=trainable,
+                    optimize_attr=optimize_attr,
+                    regularizer=regularizer,
+                    error_clip=error_clip,
+                )
         elif var_type == "Variable":
             var = Variable(
                 self,
@@ -3946,10 +3924,7 @@ class Block:
         if in_dygraph_mode():
             param = EagerParamBase(*args, **kwargs)
         else:
-            if _in_legacy_dygraph():
-                param = ParamBase(*args, **kwargs)
-            else:
-                param = Parameter(global_block, *args, **kwargs)
+            param = Parameter(global_block, *args, **kwargs)
 
         if 'initializer' in kwargs:
 
@@ -4262,35 +4237,21 @@ class Block:
                     name=v.name,
                 )
             else:
-                if _in_legacy_dygraph():
-                    new_p = ParamBase(
-                        shape=v.shape,
-                        dtype=v.dtype,
-                        type=v.type,
-                        lod_level=v.lod_level,
-                        stop_gradient=p.stop_gradient,
-                        trainable=p.trainable,
-                        optimize_attr=p.optimize_attr,
-                        regularizer=p.regularizer,
-                        error_clip=p.error_clip,
-                        name=v.name,
-                    )
-                else:
-                    new_p = Parameter(
-                        block=self,
-                        shape=v.shape,
-                        dtype=v.dtype,
-                        type=v.type,
-                        lod_level=v.lod_level
-                        if v.type == core.VarDesc.VarType.LOD_TENSOR
-                        else None,
-                        stop_gradient=p.stop_gradient,
-                        trainable=p.trainable,
-                        optimize_attr=p.optimize_attr,
-                        regularizer=p.regularizer,
-                        error_clip=p.error_clip,
-                        name=v.name,
-                    )
+                new_p = Parameter(
+                    block=self,
+                    shape=v.shape,
+                    dtype=v.dtype,
+                    type=v.type,
+                    lod_level=v.lod_level
+                    if v.type == core.VarDesc.VarType.LOD_TENSOR
+                    else None,
+                    stop_gradient=p.stop_gradient,
+                    trainable=p.trainable,
+                    optimize_attr=p.optimize_attr,
+                    regularizer=p.regularizer,
+                    error_clip=p.error_clip,
+                    name=v.name,
+                )
             self.vars[new_p.name] = new_p
 
     def _clone_variable(self, var, force_persistable=True):
diff --git a/python/paddle/fluid/layers/layer_function_generator.py b/python/paddle/fluid/layers/layer_function_generator.py
index bb5d06157e1..6e4b1f836f0 100644
--- a/python/paddle/fluid/layers/layer_function_generator.py
+++ b/python/paddle/fluid/layers/layer_function_generator.py
@@ -272,7 +272,6 @@ def generate_activation_fn(op_type):
             op = getattr(_C_ops, op_type)
             return op(x)
         # TODO(dev): Because some ops' yaml has not been migrated.
-        # Replace it with _in_legacy_dygraph while all yaml work is done.
         if in_dygraph_mode() and hasattr(_legacy_C_ops, op_type):
             op = getattr(_legacy_C_ops, op_type)
             return op(x)
diff --git a/python/paddle/fluid/tests/unittests/op_test.py b/python/paddle/fluid/tests/unittests/op_test.py
index 9728edf5d1c..cf1e7863009 100644
--- a/python/paddle/fluid/tests/unittests/op_test.py
+++ b/python/paddle/fluid/tests/unittests/op_test.py
@@ -38,8 +38,8 @@ from paddle.fluid.framework import (
     _dygraph_tracer,
     _enable_legacy_dygraph,
     _in_eager_without_dygraph_check,
-    _in_legacy_dygraph,
     _test_eager_guard,
+    in_dygraph_mode,
 )
 from paddle.fluid.op import Operator
 from paddle.jit.dy2static.utils import parse_arg_and_kwargs
@@ -716,7 +716,7 @@ class OpTest(unittest.TestCase):
 
                 if if_return_inputs_grad_dict:
                     v.stop_gradient = False
-                    if not _in_legacy_dygraph():
+                    if hasattr(v, "retain_grads"):
                         v.retain_grads()
 
                 if has_lod:
@@ -2515,7 +2515,7 @@ class OpTest(unittest.TestCase):
                 for no_grad_val in no_grad_set:
                     del inputs[no_grad_val]
 
-                if not _in_legacy_dygraph():
+                if in_dygraph_mode():
                     core.eager.run_backward(
                         fluid.layers.utils.flatten(outputs), grad_outputs, False
                     )
diff --git a/python/paddle/framework/__init__.py b/python/paddle/framework/__init__.py
index 99d9cffed1f..986b8e93ae6 100755
--- a/python/paddle/framework/__init__.py
+++ b/python/paddle/framework/__init__.py
@@ -64,7 +64,6 @@ from ..fluid.framework import _dygraph_tracer  # noqa: F401
 
 from ..fluid.layer_helper import LayerHelper  # noqa: F401
 from ..fluid.framework import in_dygraph_mode  # noqa: F401
-from ..fluid.framework import _in_legacy_dygraph  # noqa: F401
 from ..fluid.framework import _global_flags  # noqa: F401
 from ..fluid.framework import _apply_pass  # noqa: F401
 from ..fluid.framework import switch_main_program
diff --git a/python/paddle/nn/functional/norm.py b/python/paddle/nn/functional/norm.py
index 4d5bac573c5..42f2ff17078 100644
--- a/python/paddle/nn/functional/norm.py
+++ b/python/paddle/nn/functional/norm.py
@@ -17,8 +17,8 @@ import numbers
 # TODO: define normalization api
 import paddle
 import paddle.fluid as fluid
-from paddle import _C_ops, _legacy_C_ops, in_dynamic_mode
-from paddle.fluid.framework import _in_legacy_dygraph, in_dygraph_mode
+from paddle import _C_ops, in_dynamic_mode
+from paddle.fluid.framework import in_dygraph_mode
 
 from ...fluid import dygraph_utils
 from ...fluid.data_feeder import check_type, check_variable_and_dtype
@@ -336,54 +336,43 @@ def layer_norm(
         out, _, _ = _C_ops.layer_norm(x, weight, bias, epsilon, begin_norm_axis)
         return out
 
-    if _in_legacy_dygraph():
-        out, _, _ = _legacy_C_ops.layer_norm(
-            x,
-            weight,
-            bias,
-            'epsilon',
-            epsilon,
-            'begin_norm_axis',
-            begin_norm_axis,
+    else:
+        check_variable_and_dtype(
+            x, 'input', ['float16', 'float32', 'float64'], 'LayerNorm'
+        )
+
+        inputs = dict()
+        inputs['X'] = [x]
+        if weight:
+            inputs['Scale'] = [weight]
+        if bias:
+            inputs['Bias'] = [bias]
+        attrs = {"epsilon": epsilon, "begin_norm_axis": begin_norm_axis}
+
+        # create output
+        helper = LayerHelper('layer_norm', **locals())
+
+        dtype = x.dtype
+        mean_out = helper.create_variable_for_type_inference(
+            dtype=dtype, stop_gradient=True
+        )
+        variance_out = helper.create_variable_for_type_inference(
+            dtype=dtype, stop_gradient=True
+        )
+        layer_norm_out = helper.create_variable_for_type_inference(dtype)
+
+        helper.append_op(
+            type="layer_norm",
+            inputs=inputs,
+            outputs={
+                "Y": layer_norm_out,
+                "Mean": mean_out,
+                "Variance": variance_out,
+            },
+            attrs={"epsilon": epsilon, "begin_norm_axis": begin_norm_axis},
         )
-        return out
 
-    check_variable_and_dtype(
-        x, 'input', ['float16', 'float32', 'float64'], 'LayerNorm'
-    )
-
-    inputs = dict()
-    inputs['X'] = [x]
-    if weight:
-        inputs['Scale'] = [weight]
-    if bias:
-        inputs['Bias'] = [bias]
-    attrs = {"epsilon": epsilon, "begin_norm_axis": begin_norm_axis}
-
-    # create output
-    helper = LayerHelper('layer_norm', **locals())
-
-    dtype = x.dtype
-    mean_out = helper.create_variable_for_type_inference(
-        dtype=dtype, stop_gradient=True
-    )
-    variance_out = helper.create_variable_for_type_inference(
-        dtype=dtype, stop_gradient=True
-    )
-    layer_norm_out = helper.create_variable_for_type_inference(dtype)
-
-    helper.append_op(
-        type="layer_norm",
-        inputs=inputs,
-        outputs={
-            "Y": layer_norm_out,
-            "Mean": mean_out,
-            "Variance": variance_out,
-        },
-        attrs={"epsilon": epsilon, "begin_norm_axis": begin_norm_axis},
-    )
-
-    return helper.append_activation(layer_norm_out)
+        return helper.append_activation(layer_norm_out)
 
 
 def instance_norm(
-- 
GitLab