From 140d786d5040d0523a716d3dd502cfa63afbf436 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=A7=9C=E6=B0=B8=E4=B9=85?=
 <34344716+yjjiang11@users.noreply.github.com>
Date: Tue, 27 Dec 2022 10:00:33 +0800
Subject: [PATCH] rm in_legacy_dygraph python/paddle/nn/functional/ part1
 (#49258)

* rm in_legacy_dygraph nn part1

* rm non_static_mode

* modify rrelu
---
 python/paddle/nn/functional/activation.py | 694 ++++++++++------------
 python/paddle/nn/functional/common.py     | 230 +++----
 python/paddle/nn/functional/conv.py       | 108 +---
 python/paddle/nn/functional/distance.py   |  56 +-
 python/paddle/nn/layer/norm.py            | 205 +++----
 5 files changed, 508 insertions(+), 785 deletions(-)

diff --git a/python/paddle/nn/functional/activation.py b/python/paddle/nn/functional/activation.py
index 53871b26b0..ada12acf6e 100644
--- a/python/paddle/nn/functional/activation.py
+++ b/python/paddle/nn/functional/activation.py
@@ -18,11 +18,7 @@ from paddle.framework import core
 from paddle.utils.inplace_utils import inplace_apis_in_dygraph_only
 
 from ...fluid.data_feeder import check_dtype, check_variable_and_dtype
-from ...fluid.framework import (
-    _in_legacy_dygraph,
-    convert_np_dtype_to_dtype_,
-    in_dygraph_mode,
-)
+from ...fluid.framework import convert_np_dtype_to_dtype_, in_dygraph_mode
 from ...fluid.layer_helper import LayerHelper
 from ...tensor.manipulation import chunk
 from ...tensor.math import tanh  # noqa: F401
@@ -62,22 +58,21 @@ def celu(x, alpha=1.0, name=None):
     """
     if alpha == 0:
         raise ZeroDivisionError("alpha cannot be 0 for celu")
-
-    if _in_legacy_dygraph():
-        return _legacy_C_ops.celu(x, 'alpha', alpha)
     if in_dygraph_mode():
         return _C_ops.celu(x, alpha)
-
-    check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], 'celu')
-    helper = LayerHelper("celu", **locals())
-    out = helper.create_variable_for_type_inference(x.dtype)
-    helper.append_op(
-        type='celu',
-        inputs={'X': x},
-        outputs={'Out': out},
-        attrs={'alpha': alpha},
-    )
-    return out
+    else:
+        check_variable_and_dtype(
+            x, 'x', ['float16', 'float32', 'float64'], 'celu'
+        )
+        helper = LayerHelper("celu", **locals())
+        out = helper.create_variable_for_type_inference(x.dtype)
+        helper.append_op(
+            type='celu',
+            inputs={'X': x},
+            outputs={'Out': out},
+            attrs={'alpha': alpha},
+        )
+        return out
 
 
 def elu(x, alpha=1.0, name=None):
@@ -117,19 +112,19 @@ def elu(x, alpha=1.0, name=None):
     if in_dygraph_mode():
         return _C_ops.elu(x, alpha)
 
-    if _in_legacy_dygraph():
-        return _legacy_C_ops.elu(x, 'alpha', alpha)
-
-    check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], 'elu')
-    helper = LayerHelper("elu", **locals())
-    out = helper.create_variable_for_type_inference(x.dtype)
-    helper.append_op(
-        type='elu',
-        inputs={'X': x},
-        outputs={'Out': out},
-        attrs={'alpha': alpha},
-    )
-    return out
+    else:
+        check_variable_and_dtype(
+            x, 'x', ['float16', 'float32', 'float64'], 'elu'
+        )
+        helper = LayerHelper("elu", **locals())
+        out = helper.create_variable_for_type_inference(x.dtype)
+        helper.append_op(
+            type='elu',
+            inputs={'X': x},
+            outputs={'Out': out},
+            attrs={'alpha': alpha},
+        )
+        return out
 
 
 @inplace_apis_in_dygraph_only
@@ -187,20 +182,19 @@ def gelu(x, approximate=False, name=None):
 
     if in_dygraph_mode():
         return _C_ops.gelu(x, approximate)
-
-    if _in_legacy_dygraph():
-        return _legacy_C_ops.gelu(x, 'approximate', approximate)
-
-    check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], 'gelu')
-    helper = LayerHelper("gelu", **locals())
-    out = helper.create_variable_for_type_inference(x.dtype)
-    helper.append_op(
-        type='gelu',
-        inputs={'X': x},
-        outputs={'Out': out},
-        attrs={'approximate': approximate},
-    )
-    return out
+    else:
+        check_variable_and_dtype(
+            x, 'x', ['float16', 'float32', 'float64'], 'gelu'
+        )
+        helper = LayerHelper("gelu", **locals())
+        out = helper.create_variable_for_type_inference(x.dtype)
+        helper.append_op(
+            type='gelu',
+            inputs={'X': x},
+            outputs={'Out': out},
+            attrs={'approximate': approximate},
+        )
+        return out
 
 
 def hardshrink(x, threshold=0.5, name=None):
@@ -238,22 +232,19 @@ def hardshrink(x, threshold=0.5, name=None):
     """
     if in_dygraph_mode():
         return _C_ops.hardshrink(x, threshold)
-
-    if _in_legacy_dygraph():
-        return _legacy_C_ops.hard_shrink(x, 'threshold', threshold)
-
-    check_variable_and_dtype(
-        x, 'x', ['float16', 'float32', 'float64'], 'hardshrink'
-    )
-    helper = LayerHelper('hardshrink', **locals())
-    out = helper.create_variable_for_type_inference(x.dtype)
-    helper.append_op(
-        type='hard_shrink',
-        inputs={'X': x},
-        outputs={'Out': out},
-        attrs={'threshold': threshold},
-    )
-    return out
+    else:
+        check_variable_and_dtype(
+            x, 'x', ['float16', 'float32', 'float64'], 'hardshrink'
+        )
+        helper = LayerHelper('hardshrink', **locals())
+        out = helper.create_variable_for_type_inference(x.dtype)
+        helper.append_op(
+            type='hard_shrink',
+            inputs={'X': x},
+            outputs={'Out': out},
+            attrs={'threshold': threshold},
+        )
+        return out
 
 
 def hardtanh(x, min=-1.0, max=1.0, name=None):
@@ -292,23 +283,20 @@ def hardtanh(x, min=-1.0, max=1.0, name=None):
 
     if in_dygraph_mode():
         return _C_ops.hardtanh(x, min, max)
+    else:
+        check_variable_and_dtype(
+            x, 'x', ['float16', 'float32', 'float64'], 'hardtanh'
+        )
 
-    if _in_legacy_dygraph():
-        return _legacy_C_ops.brelu(x, 't_min', min, 't_max', max)
-
-    check_variable_and_dtype(
-        x, 'x', ['float16', 'float32', 'float64'], 'hardtanh'
-    )
-
-    helper = LayerHelper('hardtanh', **locals())
-    out = helper.create_variable_for_type_inference(dtype=x.dtype)
-    helper.append_op(
-        type='brelu',
-        inputs={'X': x},
-        outputs={'Out': out},
-        attrs={'t_min': min, 't_max': max},
-    )
-    return out
+        helper = LayerHelper('hardtanh', **locals())
+        out = helper.create_variable_for_type_inference(dtype=x.dtype)
+        helper.append_op(
+            type='brelu',
+            inputs={'X': x},
+            outputs={'Out': out},
+            attrs={'t_min': min, 't_max': max},
+        )
+        return out
 
 
 def hardsigmoid(x, slope=0.1666667, offset=0.5, name=None):
@@ -349,23 +337,20 @@ def hardsigmoid(x, slope=0.1666667, offset=0.5, name=None):
 
     if in_dygraph_mode():
         return _C_ops.hardsigmoid(x, slope, offset)
+    else:
+        check_variable_and_dtype(
+            x, 'x', ['float16', 'float32', 'float64'], 'hardsigmoid'
+        )
 
-    if _in_legacy_dygraph():
-        return _legacy_C_ops.hard_sigmoid(x, 'slope', slope, 'offset', offset)
-
-    check_variable_and_dtype(
-        x, 'x', ['float16', 'float32', 'float64'], 'hardsigmoid'
-    )
-
-    helper = LayerHelper('hardsigmoid', **locals())
-    out = helper.create_variable_for_type_inference(x.dtype)
-    helper.append_op(
-        type='hard_sigmoid',
-        inputs={'X': x},
-        outputs={'Out': out},
-        attrs={'slope': slope, 'offset': offset},
-    )
-    return out
+        helper = LayerHelper('hardsigmoid', **locals())
+        out = helper.create_variable_for_type_inference(x.dtype)
+        helper.append_op(
+            type='hard_sigmoid',
+            inputs={'X': x},
+            outputs={'Out': out},
+            attrs={'slope': slope, 'offset': offset},
+        )
+        return out
 
 
 def hardswish(x, name=None):
@@ -401,20 +386,19 @@ def hardswish(x, name=None):
             x = paddle.to_tensor([-4., 5., 1.])
             out = F.hardswish(x) # [0., 5., 0.666667]
     """
-
-    if _in_legacy_dygraph():
-        return _legacy_C_ops.hard_swish(x)
     if in_dygraph_mode():
         return _C_ops.hardswish(x)
+    else:
+        check_variable_and_dtype(
+            x, 'x', ['float16', 'float32', 'float64'], 'hardswish'
+        )
 
-    check_variable_and_dtype(
-        x, 'x', ['float16', 'float32', 'float64'], 'hardswish'
-    )
-
-    helper = LayerHelper('hardswish', **locals())
-    out = helper.create_variable_for_type_inference(x.dtype)
-    helper.append_op(type='hard_swish', inputs={'X': x}, outputs={'Out': out})
-    return out
+        helper = LayerHelper('hardswish', **locals())
+        out = helper.create_variable_for_type_inference(x.dtype)
+        helper.append_op(
+            type='hard_swish', inputs={'X': x}, outputs={'Out': out}
+        )
+        return out
 
 
 def leaky_relu(x, negative_slope=0.01, name=None):
@@ -453,22 +437,19 @@ def leaky_relu(x, negative_slope=0.01, name=None):
     """
     if in_dygraph_mode():
         return _C_ops.leaky_relu(x, negative_slope)
-
-    if _in_legacy_dygraph():
-        return _legacy_C_ops.leaky_relu(x, 'alpha', negative_slope)
-
-    check_variable_and_dtype(
-        x, 'x', ['float16', 'float32', 'float64'], 'leaky_relu'
-    )
-    helper = LayerHelper('leaky_relu', **locals())
-    out = helper.create_variable_for_type_inference(dtype=x.dtype)
-    helper.append_op(
-        type='leaky_relu',
-        inputs={'X': x},
-        outputs={'Out': out},
-        attrs={'alpha': negative_slope},
-    )
-    return out
+    else:
+        check_variable_and_dtype(
+            x, 'x', ['float16', 'float32', 'float64'], 'leaky_relu'
+        )
+        helper = LayerHelper('leaky_relu', **locals())
+        out = helper.create_variable_for_type_inference(dtype=x.dtype)
+        helper.append_op(
+            type='leaky_relu',
+            inputs={'X': x},
+            outputs={'Out': out},
+            attrs={'alpha': negative_slope},
+        )
+        return out
 
 
 def prelu(x, weight, data_format="NCHW", name=None):
@@ -559,20 +540,16 @@ def prelu(x, weight, data_format="NCHW", name=None):
 
     if in_dygraph_mode():
         return _C_ops.prelu(x, weight, data_format, mode)
-    if _in_legacy_dygraph():
-        return _legacy_C_ops.prelu(
-            x, weight, 'mode', mode, 'data_format', data_format
+    else:
+        helper = LayerHelper('prelu', **locals())
+        out = helper.create_variable_for_type_inference(x.dtype)
+        helper.append_op(
+            type="prelu",
+            inputs={"X": x, "Alpha": weight},
+            outputs={"Out": out},
+            attrs={"mode": mode, "data_format": data_format},
         )
-
-    helper = LayerHelper('prelu', **locals())
-    out = helper.create_variable_for_type_inference(x.dtype)
-    helper.append_op(
-        type="prelu",
-        inputs={"X": x, "Alpha": weight},
-        outputs={"Out": out},
-        attrs={"mode": mode, "data_format": data_format},
-    )
-    return out
+        return out
 
 
 def rrelu(x, lower=1.0 / 8.0, upper=1.0 / 3.0, training=True, name=None):
@@ -681,23 +658,23 @@ def rrelu(x, lower=1.0 / 8.0, upper=1.0 / 3.0, training=True, name=None):
 
     is_test = not training
 
-    if _in_legacy_dygraph():
+    if in_dygraph_mode():
         out, noise = _legacy_C_ops.rrelu(
             x, 'lower', lower, 'upper', upper, 'is_test', is_test
         )
         return out
-
-    helper = LayerHelper('rrelu', **locals())
-    out = helper.create_variable_for_type_inference(x.dtype)
-    noise = helper.create_variable_for_type_inference(dtype=x.dtype)
-    attrs = {'lower': lower, 'upper': upper, 'is_test': is_test}
-    helper.append_op(
-        type='rrelu',
-        inputs={"X": x},
-        outputs={"Out": out, "Noise": noise},
-        attrs=attrs,
-    )
-    return out
+    else:
+        helper = LayerHelper('rrelu', **locals())
+        out = helper.create_variable_for_type_inference(x.dtype)
+        noise = helper.create_variable_for_type_inference(dtype=x.dtype)
+        attrs = {'lower': lower, 'upper': upper, 'is_test': is_test}
+        helper.append_op(
+            type='rrelu',
+            inputs={"X": x},
+            outputs={"Out": out, "Noise": noise},
+            attrs=attrs,
+        )
+        return out
 
 
 def relu(x, name=None):
@@ -729,13 +706,14 @@ def relu(x, name=None):
 
     if in_dygraph_mode():
         return _C_ops.relu(x)
-    if _in_legacy_dygraph():
-        return _legacy_C_ops.relu(x)
-    check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], 'relu')
-    helper = LayerHelper('relu', **locals())
-    out = helper.create_variable_for_type_inference(x.dtype)
-    helper.append_op(type='relu', inputs={'X': x}, outputs={'Out': out})
-    return out
+    else:
+        check_variable_and_dtype(
+            x, 'x', ['float16', 'float32', 'float64'], 'relu'
+        )
+        helper = LayerHelper('relu', **locals())
+        out = helper.create_variable_for_type_inference(x.dtype)
+        helper.append_op(type='relu', inputs={'X': x}, outputs={'Out': out})
+        return out
 
 
 @inplace_apis_in_dygraph_only
@@ -744,10 +722,7 @@ def relu_(x, name=None):
     Inplace version of ``relu`` API, the output Tensor will be inplaced with input ``x``.
     Please refer to :ref:`api_nn_cn_relu`.
     """
-    if in_dygraph_mode():
-        return _C_ops.relu_(x)
-    if _in_legacy_dygraph():
-        return _legacy_C_ops.relu_(x)
+    return _C_ops.relu_(x)
 
 
 def log_sigmoid(x, name=None):
@@ -777,17 +752,16 @@ def log_sigmoid(x, name=None):
 
     if in_dygraph_mode():
         return _C_ops.logsigmoid(x)
-
-    if _in_legacy_dygraph():
-        return _legacy_C_ops.logsigmoid(x)
-
-    check_variable_and_dtype(
-        x, 'x', ['float16', 'float32', 'float64'], 'log_sigmoid'
-    )
-    helper = LayerHelper("log_sigmoid", **locals())
-    out = helper.create_variable_for_type_inference(x.dtype)
-    helper.append_op(type='logsigmoid', inputs={'X': x}, outputs={'Out': out})
-    return out
+    else:
+        check_variable_and_dtype(
+            x, 'x', ['float16', 'float32', 'float64'], 'log_sigmoid'
+        )
+        helper = LayerHelper("log_sigmoid", **locals())
+        out = helper.create_variable_for_type_inference(x.dtype)
+        helper.append_op(
+            type='logsigmoid', inputs={'X': x}, outputs={'Out': out}
+        )
+        return out
 
 
 def maxout(x, groups, axis=1, name=None):
@@ -844,28 +818,27 @@ def maxout(x, groups, axis=1, name=None):
             #    [0.95313174 0.6228939  0.7129065  0.7087491 ]
             #    [0.7142536  0.88725346 0.61093384 0.38833922]]]]
     """
-    if _in_legacy_dygraph():
-        return _legacy_C_ops.maxout(x, 'groups', groups, 'axis', axis)
     if in_dygraph_mode():
         return _C_ops.maxout(x, groups, axis)
-    check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'maxout')
-    if axis not in [1, -1, 3]:
-        raise ValueError(
-            "Attr(axis) should be 1 when data format is NCHW, -1 or 3 when data format is NHWC. Received "
-            "Attr(axis): %s." % str(axis)
-        )
-    if axis == -1:
-        axis = 3
+    else:
+        check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'maxout')
+        if axis not in [1, -1, 3]:
+            raise ValueError(
+                "Attr(axis) should be 1 when data format is NCHW, -1 or 3 when data format is NHWC. Received "
+                "Attr(axis): %s." % str(axis)
+            )
+        if axis == -1:
+            axis = 3
 
-    helper = LayerHelper('maxout', **locals())
-    out = helper.create_variable_for_type_inference(x.dtype)
-    helper.append_op(
-        type='maxout',
-        inputs={'X': x},
-        outputs={'Out': out},
-        attrs={'groups': groups, 'axis': axis},
-    )
-    return out
+        helper = LayerHelper('maxout', **locals())
+        out = helper.create_variable_for_type_inference(x.dtype)
+        helper.append_op(
+            type='maxout',
+            inputs={'X': x},
+            outputs={'Out': out},
+            attrs={'groups': groups, 'axis': axis},
+        )
+        return out
 
 
 def relu6(x, name=None):
@@ -963,19 +936,19 @@ def selu(
 
     if in_dygraph_mode():
         return _C_ops.selu(x, scale, alpha)
-    if _in_legacy_dygraph():
-        return _legacy_C_ops.selu(x, 'scale', scale, 'alpha', alpha)
-
-    check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], 'selu')
-    helper = LayerHelper('selu', **locals())
-    out = helper.create_variable_for_type_inference(x.dtype)
-    helper.append_op(
-        type='selu',
-        inputs={'X': x},
-        outputs={'Out': out},
-        attrs={'scale': scale, 'alpha': alpha},
-    )
-    return out
+    else:
+        check_variable_and_dtype(
+            x, 'x', ['float16', 'float32', 'float64'], 'selu'
+        )
+        helper = LayerHelper('selu', **locals())
+        out = helper.create_variable_for_type_inference(x.dtype)
+        helper.append_op(
+            type='selu',
+            inputs={'X': x},
+            outputs={'Out': out},
+            attrs={'scale': scale, 'alpha': alpha},
+        )
+        return out
 
 
 def silu(x, name=None):
@@ -1007,14 +980,14 @@ def silu(x, name=None):
 
     if in_dygraph_mode():
         return _C_ops.silu(x)
-    if _in_legacy_dygraph():
-        return _legacy_C_ops.silu(x)
-
-    check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], 'silu')
-    helper = LayerHelper("silu", **locals())
-    out = helper.create_variable_for_type_inference(x.dtype)
-    helper.append_op(type='silu', inputs={'X': x}, outputs={'Out': out})
-    return out
+    else:
+        check_variable_and_dtype(
+            x, 'x', ['float16', 'float32', 'float64'], 'silu'
+        )
+        helper = LayerHelper("silu", **locals())
+        out = helper.create_variable_for_type_inference(x.dtype)
+        helper.append_op(type='silu', inputs={'X': x}, outputs={'Out': out})
+        return out
 
 
 def softmax(x, axis=-1, dtype=None, name=None):
@@ -1132,55 +1105,46 @@ def softmax(x, axis=-1, dtype=None, name=None):
 
     if (dtype is not None) and (not isinstance(dtype, core.VarDesc.VarType)):
         dtype = convert_np_dtype_to_dtype_(dtype)
-    use_cudnn = True
-
     if in_dygraph_mode():
         outs_cast = x if dtype is None else _C_ops.cast(x, dtype)
         return _C_ops.softmax(outs_cast, axis)
+    else:
+        use_cudnn = True
+        if dtype is None:
+            check_variable_and_dtype(
+                x, 'x', ['float16', 'float32', 'float64'], 'softmax'
+            )
+        else:
+            check_dtype(
+                dtype,
+                'dtype',
+                ['float32', 'float64'],
+                'softmax',
+                'If dtype is not None, it only support float32 or float64.',
+            )
 
-    if _in_legacy_dygraph():
-        outs_cast = (
-            x
-            if dtype is None
-            else _legacy_C_ops.cast(x, 'in_dtype', x.dtype, 'out_dtype', dtype)
-        )
-        return _legacy_C_ops.softmax(
-            outs_cast, 'axis', axis, 'use_cudnn', use_cudnn
-        )
+        helper = LayerHelper("softmax", **locals())
+        outs_cast = x
+        if dtype is not None:
+            outs_cast = helper.create_variable_for_type_inference(dtype)
+            helper.append_op(
+                type='cast',
+                inputs={'X': x},
+                outputs={'Out': outs_cast},
+                attrs={'in_dtype': x.dtype, 'out_dtype': dtype},
+            )
 
-    if dtype is None:
-        check_variable_and_dtype(
-            x, 'x', ['float16', 'float32', 'float64'], 'softmax'
-        )
-    else:
-        check_dtype(
-            dtype,
-            'dtype',
-            ['float32', 'float64'],
-            'softmax',
-            'If dtype is not None, it only support float32 or float64.',
+        outs_softmax = helper.create_variable_for_type_inference(
+            outs_cast.dtype
         )
-
-    helper = LayerHelper("softmax", **locals())
-    outs_cast = x
-    if dtype is not None:
-        outs_cast = helper.create_variable_for_type_inference(dtype)
         helper.append_op(
-            type='cast',
-            inputs={'X': x},
-            outputs={'Out': outs_cast},
-            attrs={'in_dtype': x.dtype, 'out_dtype': dtype},
+            type='softmax',
+            inputs={'X': outs_cast},
+            outputs={'Out': outs_softmax},
+            attrs={'axis': axis, 'use_cudnn': use_cudnn},
         )
 
-    outs_softmax = helper.create_variable_for_type_inference(outs_cast.dtype)
-    helper.append_op(
-        type='softmax',
-        inputs={'X': outs_cast},
-        outputs={'Out': outs_softmax},
-        attrs={'axis': axis, 'use_cudnn': use_cudnn},
-    )
-
-    return outs_softmax
+        return outs_softmax
 
 
 @inplace_apis_in_dygraph_only
@@ -1191,25 +1155,12 @@ def softmax_(x, axis=-1, dtype=None, name=None):
     """
     if (dtype is not None) and (not isinstance(dtype, core.VarDesc.VarType)):
         dtype = convert_np_dtype_to_dtype_(dtype)
-    use_cudnn = True
-
-    if in_dygraph_mode():
-        outs_cast = (
-            x
-            if dtype is None
-            else _legacy_C_ops.cast(x, 'in_dtype', x.dtype, 'out_dtype', dtype)
-        )
-        return _C_ops.softmax_(outs_cast, axis)
-
-    if _in_legacy_dygraph():
-        outs_cast = (
-            x
-            if dtype is None
-            else _legacy_C_ops.cast(x, 'in_dtype', x.dtype, 'out_dtype', dtype)
-        )
-        return _legacy_C_ops.softmax_(
-            outs_cast, 'axis', axis, 'use_cudnn', use_cudnn
-        )
+    outs_cast = (
+        x
+        if dtype is None
+        else _legacy_C_ops.cast(x, 'in_dtype', x.dtype, 'out_dtype', dtype)
+    )
+    return _C_ops.softmax_(outs_cast, axis)
 
 
 def softplus(x, beta=1, threshold=20, name=None):
@@ -1243,22 +1194,19 @@ def softplus(x, beta=1, threshold=20, name=None):
 
     if in_dygraph_mode():
         return _C_ops.softplus(x, beta, threshold)
-
-    if _in_legacy_dygraph():
-        return _legacy_C_ops.softplus(x, 'beta', beta, 'threshold', threshold)
-
-    check_variable_and_dtype(
-        x, 'x', ['float16', 'float32', 'float64'], 'softplus'
-    )
-    helper = LayerHelper('softplus', **locals())
-    out = helper.create_variable_for_type_inference(x.dtype)
-    helper.append_op(
-        type='softplus',
-        inputs={'X': x},
-        outputs={'Out': out},
-        attrs={'beta': beta, 'threshold': threshold},
-    )
-    return out
+    else:
+        check_variable_and_dtype(
+            x, 'x', ['float16', 'float32', 'float64'], 'softplus'
+        )
+        helper = LayerHelper('softplus', **locals())
+        out = helper.create_variable_for_type_inference(x.dtype)
+        helper.append_op(
+            type='softplus',
+            inputs={'X': x},
+            outputs={'Out': out},
+            attrs={'beta': beta, 'threshold': threshold},
+        )
+        return out
 
 
 def softshrink(x, threshold=0.5, name=None):
@@ -1305,21 +1253,19 @@ def softshrink(x, threshold=0.5, name=None):
 
     if in_dygraph_mode():
         return _C_ops.softshrink(x, threshold)
-    if _in_legacy_dygraph():
-        return _legacy_C_ops.softshrink(x, 'lambda', threshold)
-
-    check_variable_and_dtype(
-        x, 'x', ['float16', 'float32', 'float64'], 'softshrink'
-    )
-    helper = LayerHelper('softshrink', **locals())
-    out = helper.create_variable_for_type_inference(x.dtype)
-    helper.append_op(
-        type='softshrink',
-        inputs={'X': x},
-        outputs={'Out': out},
-        attrs={'lambda': threshold},
-    )
-    return out
+    else:
+        check_variable_and_dtype(
+            x, 'x', ['float16', 'float32', 'float64'], 'softshrink'
+        )
+        helper = LayerHelper('softshrink', **locals())
+        out = helper.create_variable_for_type_inference(x.dtype)
+        helper.append_op(
+            type='softshrink',
+            inputs={'X': x},
+            outputs={'Out': out},
+            attrs={'lambda': threshold},
+        )
+        return out
 
 
 def softsign(x, name=None):
@@ -1392,16 +1338,19 @@ def swish(x, name=None):
     """
     if in_dygraph_mode():
         return _C_ops.swish(x)
-    if _in_legacy_dygraph():
-        return _legacy_C_ops.swish(x, 'beta', 1.0)
-
-    check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], 'swish')
-    helper = LayerHelper('swish', **locals())
-    out = helper.create_variable_for_type_inference(x.dtype)
-    helper.append_op(
-        type='swish', inputs={'X': x}, outputs={'Out': out}, attrs={'beta': 1.0}
-    )
-    return out
+    else:
+        check_variable_and_dtype(
+            x, 'x', ['float16', 'float32', 'float64'], 'swish'
+        )
+        helper = LayerHelper('swish', **locals())
+        out = helper.create_variable_for_type_inference(x.dtype)
+        helper.append_op(
+            type='swish',
+            inputs={'X': x},
+            outputs={'Out': out},
+            attrs={'beta': 1.0},
+        )
+        return out
 
 
 def mish(x, name=None):
@@ -1435,14 +1384,14 @@ def mish(x, name=None):
     """
     if in_dygraph_mode():
         return _C_ops.mish(x, 20)
-    if _in_legacy_dygraph():
-        return _legacy_C_ops.mish(x)
-
-    check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], 'mish')
-    helper = LayerHelper('mish', **locals())
-    out = helper.create_variable_for_type_inference(x.dtype)
-    helper.append_op(type='mish', inputs={'X': x}, outputs={'Out': out})
-    return out
+    else:
+        check_variable_and_dtype(
+            x, 'x', ['float16', 'float32', 'float64'], 'mish'
+        )
+        helper = LayerHelper('mish', **locals())
+        out = helper.create_variable_for_type_inference(x.dtype)
+        helper.append_op(type='mish', inputs={'X': x}, outputs={'Out': out})
+        return out
 
 
 def tanhshrink(x, name=None):
@@ -1474,17 +1423,16 @@ def tanhshrink(x, name=None):
     """
     if in_dygraph_mode():
         return _C_ops.tanh_shrink(x)
-
-    if _in_legacy_dygraph():
-        return _legacy_C_ops.tanh_shrink(x)
-
-    check_variable_and_dtype(
-        x, 'x', ['float16', 'float32', 'float64'], 'tanhshrink'
-    )
-    helper = LayerHelper('tanh_shrink', **locals())
-    out = helper.create_variable_for_type_inference(x.dtype)
-    helper.append_op(type='tanh_shrink', inputs={'X': x}, outputs={'Out': out})
-    return out
+    else:
+        check_variable_and_dtype(
+            x, 'x', ['float16', 'float32', 'float64'], 'tanhshrink'
+        )
+        helper = LayerHelper('tanh_shrink', **locals())
+        out = helper.create_variable_for_type_inference(x.dtype)
+        helper.append_op(
+            type='tanh_shrink', inputs={'X': x}, outputs={'Out': out}
+        )
+        return out
 
 
 def thresholded_relu(x, threshold=1.0, name=None):
@@ -1525,22 +1473,19 @@ def thresholded_relu(x, threshold=1.0, name=None):
 
     if in_dygraph_mode():
         return _C_ops.thresholded_relu(x, threshold)
-
-    if _in_legacy_dygraph():
-        return _legacy_C_ops.thresholded_relu(x, 'threshold', threshold)
-
-    check_variable_and_dtype(
-        x, 'x', ['float16', 'float32', 'float64'], 'thresholded_relu'
-    )
-    helper = LayerHelper('thresholded_relu', **locals())
-    out = helper.create_variable_for_type_inference(x.dtype)
-    helper.append_op(
-        type='thresholded_relu',
-        inputs={'X': x},
-        outputs={'Out': out},
-        attrs={'threshold': threshold},
-    )
-    return out
+    else:
+        check_variable_and_dtype(
+            x, 'x', ['float16', 'float32', 'float64'], 'thresholded_relu'
+        )
+        helper = LayerHelper('thresholded_relu', **locals())
+        out = helper.create_variable_for_type_inference(x.dtype)
+        helper.append_op(
+            type='thresholded_relu',
+            inputs={'X': x},
+            outputs={'Out': out},
+            attrs={'threshold': threshold},
+        )
+        return out
 
 
 def log_softmax(x, axis=-1, dtype=None, name=None):
@@ -1605,45 +1550,40 @@ def log_softmax(x, axis=-1, dtype=None, name=None):
         if dtype is not None:
             x = _C_ops.cast(x, dtype)
         return _C_ops.log_softmax(x, axis)
+    else:
+        if dtype is None:
+            check_variable_and_dtype(
+                x, 'x', ['float16', 'float32', 'float64'], 'log_softmax'
+            )
+        else:
+            check_dtype(
+                dtype,
+                'dtype',
+                ['float32', 'float64'],
+                'log_softmax',
+                'If dtype is not None, it only support float32 or float64.',
+            )
 
-    if _in_legacy_dygraph():
+        helper = LayerHelper("log_softmax", **locals())
+        out_cast = x
         if dtype is not None:
-            x = _legacy_C_ops.cast(x, 'in_dtype', x.dtype, 'out_dtype', dtype)
-        return _legacy_C_ops.log_softmax(x, 'axis', axis)
-
-    if dtype is None:
-        check_variable_and_dtype(
-            x, 'x', ['float16', 'float32', 'float64'], 'log_softmax'
-        )
-    else:
-        check_dtype(
-            dtype,
-            'dtype',
-            ['float32', 'float64'],
-            'log_softmax',
-            'If dtype is not None, it only support float32 or float64.',
-        )
+            out_cast = helper.create_variable_for_type_inference(dtype)
+            helper.append_op(
+                type='cast',
+                inputs={'X': x},
+                outputs={'Out': out_cast},
+                attrs={'in_dtype': x.dtype, 'out_dtype': dtype},
+            )
 
-    helper = LayerHelper("log_softmax", **locals())
-    out_cast = x
-    if dtype is not None:
-        out_cast = helper.create_variable_for_type_inference(dtype)
+        out = helper.create_variable_for_type_inference(out_cast.dtype)
         helper.append_op(
-            type='cast',
-            inputs={'X': x},
-            outputs={'Out': out_cast},
-            attrs={'in_dtype': x.dtype, 'out_dtype': dtype},
+            type='log_softmax',
+            inputs={'X': out_cast},
+            outputs={'Out': out},
+            attrs={'axis': axis},
         )
 
-    out = helper.create_variable_for_type_inference(out_cast.dtype)
-    helper.append_op(
-        type='log_softmax',
-        inputs={'X': out_cast},
-        outputs={'Out': out},
-        attrs={'axis': axis},
-    )
-
-    return out
+        return out
 
 
 def glu(x, axis=-1, name=None):
diff --git a/python/paddle/nn/functional/common.py b/python/paddle/nn/functional/common.py
index c5bcd4ee8e..beaec4c91c 100644
--- a/python/paddle/nn/functional/common.py
+++ b/python/paddle/nn/functional/common.py
@@ -25,11 +25,7 @@ from ...fluid.data_feeder import (
     check_type,
     check_variable_and_dtype,
 )
-from ...fluid.framework import (
-    _in_legacy_dygraph,
-    _non_static_mode,
-    in_dygraph_mode,
-)
+from ...fluid.framework import in_dygraph_mode
 from ...tensor import clip, concat, sqrt, sum
 from ...tensor.creation import zeros
 
@@ -927,24 +923,22 @@ def bilinear(x1, x2, weight, bias=None, name=None):
 
     if in_dygraph_mode():
         return _C_ops.bilinear_tensor_product(x1, x2, weight, bias)
-    elif _non_static_mode():
-        return _legacy_C_ops.bilinear_tensor_product(x1, x2, weight, bias)
-
-    check_variable_and_dtype(x1, 'x1', ['float32', 'float64'], 'bilinear')
-    check_variable_and_dtype(x2, 'x2', ['float32', 'float64'], 'bilinear')
+    else:
+        check_variable_and_dtype(x1, 'x1', ['float32', 'float64'], 'bilinear')
+        check_variable_and_dtype(x2, 'x2', ['float32', 'float64'], 'bilinear')
 
-    inputs = {"X": x1, "Y": x2, "Weight": weight}
-    if bias is not None:
-        inputs["Bias"] = bias
+        inputs = {"X": x1, "Y": x2, "Weight": weight}
+        if bias is not None:
+            inputs["Bias"] = bias
 
-    helper = LayerHelper("bilinear", **locals())
-    out = helper.create_variable_for_type_inference(dtype=x1.dtype)
+        helper = LayerHelper("bilinear", **locals())
+        out = helper.create_variable_for_type_inference(dtype=x1.dtype)
 
-    helper.append_op(
-        type="bilinear_tensor_product", inputs=inputs, outputs={"Out": out}
-    )
+        helper.append_op(
+            type="bilinear_tensor_product", inputs=inputs, outputs={"Out": out}
+        )
 
-    return out
+        return out
 
 
 def dropout(
@@ -1118,77 +1112,62 @@ def dropout(
             'downgrade_in_infer' if mode == 'downscale_in_infer' else mode
         )  # semantic transfer
 
-        if _non_static_mode():
+        if in_dygraph_mode():
             if default_main_program().random_seed != 0:
                 seed = default_main_program().random_seed
 
-            if in_dygraph_mode():
-                out, mask = _C_ops.dropout(
-                    x,
-                    None,
-                    p,
-                    not training,
-                    mode,
-                    seed if seed is not None else 0,
-                    seed is not None,
-                )
-
-                return out
-            out, mask = _legacy_C_ops.dropout(
+            out, mask = _C_ops.dropout(
                 x,
-                'dropout_prob',
+                None,
                 p,
-                'is_test',
                 not training,
-                'fix_seed',
-                seed is not None,
-                'seed',
-                seed if seed is not None else 0,
-                'dropout_implementation',
                 mode,
+                seed if seed is not None else 0,
+                seed is not None,
             )
-            return out
 
-        helper = LayerHelper('dropout', **locals())
-        check_variable_and_dtype(
-            x, 'x', ['float16', 'float32', 'float64'], 'dropout'
-        )
+            return out
+        else:
+            helper = LayerHelper('dropout', **locals())
+            check_variable_and_dtype(
+                x, 'x', ['float16', 'float32', 'float64'], 'dropout'
+            )
 
-        out = helper.create_variable_for_type_inference(dtype=x.dtype)
-        mask = helper.create_variable_for_type_inference(
-            dtype=core.VarDesc.VarType.UINT8, stop_gradient=True
-        )
+            out = helper.create_variable_for_type_inference(dtype=x.dtype)
+            mask = helper.create_variable_for_type_inference(
+                dtype=core.VarDesc.VarType.UINT8, stop_gradient=True
+            )
 
-        def get_attrs(prog, dropout_prob, is_test, seed):
-            if (seed is None or seed == 0) and prog.random_seed != 0:
-                seed = prog.random_seed
+            def get_attrs(prog, dropout_prob, is_test, seed):
+                if (seed is None or seed == 0) and prog.random_seed != 0:
+                    seed = prog.random_seed
 
-            if isinstance(
-                dropout_prob, Variable
-            ) and not dropout_prob.shape != [1]:
-                raise TypeError(
-                    "Required p.shape == [1] if type(p) is Variable, but received p.shape = {}".format(
-                        p.shape
+                if isinstance(
+                    dropout_prob, Variable
+                ) and not dropout_prob.shape != [1]:
+                    raise TypeError(
+                        "Required p.shape == [1] if type(p) is Variable, but received p.shape = {}".format(
+                            p.shape
+                        )
                     )
-                )
-            attrs = {
-                'dropout_prob': dropout_prob,
-                'is_test': is_test,
-                'fix_seed': seed is not None,
-                'seed': seed if seed is not None else 0,
-                'dropout_implementation': mode,
-            }
-            return attrs
+                attrs = {
+                    'dropout_prob': dropout_prob,
+                    'is_test': is_test,
+                    'fix_seed': seed is not None,
+                    'seed': seed if seed is not None else 0,
+                    'dropout_implementation': mode,
+                }
+                return attrs
 
-        attrs = get_attrs(helper.main_program, p, not training, seed)
+            attrs = get_attrs(helper.main_program, p, not training, seed)
 
-        helper.append_op(
-            type='dropout',
-            inputs={'X': [x]},
-            outputs={'Out': [out], 'Mask': [mask]},
-            attrs=attrs,
-        )
-        return out
+            helper.append_op(
+                type='dropout',
+                inputs={'X': [x]},
+                outputs={'Out': [out], 'Mask': [mask]},
+                attrs=attrs,
+            )
+            return out
     else:  # sometimes called dropout_nd #TODO: optimize with c++
         if not in_dynamic_mode():
             check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'dropout')
@@ -1684,38 +1663,21 @@ def pad(x, pad, mode='constant', value=0.0, data_format="NCHW", name=None):
             pad = pad.numpy().tolist()
         out = _C_ops.pad3d(x, pad, mode, value, data_format)
     else:
-        if _in_legacy_dygraph():
-            if isinstance(pad, Variable):
-                pad = pad.numpy().tolist()
-            out = _legacy_C_ops.pad3d(
-                x,
-                "paddings",
-                pad,
-                "mode",
-                mode,
-                "value",
-                value,
-                "data_format",
-                data_format,
-                "name",
-                name,
-            )
+        attrs = {'mode': mode, 'value': value, 'data_format': data_format}
+        inputs = {'X': [x]}
+        if isinstance(pad, Variable):
+            inputs['Paddings'] = [pad]
+            attrs['paddings'] = []
         else:
-            attrs = {'mode': mode, 'value': value, 'data_format': data_format}
-            inputs = {'X': [x]}
-            if isinstance(pad, Variable):
-                inputs['Paddings'] = [pad]
-                attrs['paddings'] = []
-            else:
-                attrs['paddings'] = pad
+            attrs['paddings'] = pad
 
-            helper = LayerHelper('pad3d', **locals())
+        helper = LayerHelper('pad3d', **locals())
 
-            dtype = helper.input_dtype(input_param_name='input')
-            out = helper.create_variable_for_type_inference(dtype)
-            helper.append_op(
-                type='pad3d', inputs=inputs, outputs={"Out": out}, attrs=attrs
-            )
+        dtype = helper.input_dtype(input_param_name='input')
+        out = helper.create_variable_for_type_inference(dtype)
+        helper.append_op(
+            type='pad3d', inputs=inputs, outputs={"Out": out}, attrs=attrs
+        )
 
     if len(unsqueezed_dim) != 0:
         out = squeeze(out, axis=unsqueezed_dim)
@@ -1873,46 +1835,34 @@ def linear(x, weight, bias=None, name=None):
         # TODO(jiabin): using addmm for fast forward route
         return _C_ops.linear(x, weight, bias)
     else:
-        if _in_legacy_dygraph():
-            pre_bias = _legacy_C_ops.matmul_v2(
-                x, weight, 'trans_x', False, 'trans_y', False
-            )
-
-            if bias is None:
-                return pre_bias
-
-            return _legacy_C_ops.elementwise_add(pre_bias, bias)
-        else:
-            helper = LayerHelper('linear', **locals())
-            dtype = x.dtype
+        helper = LayerHelper('linear', **locals())
+        dtype = x.dtype
 
-            check_variable_and_dtype(
-                x, 'x', ['float16', 'float32', 'float64'], 'linear'
-            )
-            check_dtype(
-                dtype, 'dtype', ['float16', 'float32', 'float64'], 'linear'
-            )
+        check_variable_and_dtype(
+            x, 'x', ['float16', 'float32', 'float64'], 'linear'
+        )
+        check_dtype(dtype, 'dtype', ['float16', 'float32', 'float64'], 'linear')
 
-            inputs = {'X': [x], 'Y': [weight]}
-            attrs = {'trans_x': False, 'trans_y': False}
-            tmp = helper.create_variable_for_type_inference(dtype)
+        inputs = {'X': [x], 'Y': [weight]}
+        attrs = {'trans_x': False, 'trans_y': False}
+        tmp = helper.create_variable_for_type_inference(dtype)
+        helper.append_op(
+            type='matmul_v2',
+            inputs=inputs,
+            outputs={'Out': tmp},
+            attrs=attrs,
+        )
+        if bias is not None:
+            res = helper.create_variable_for_type_inference(dtype)
             helper.append_op(
-                type='matmul_v2',
-                inputs=inputs,
-                outputs={'Out': tmp},
-                attrs=attrs,
+                type='elementwise_add',
+                inputs={'X': [tmp], 'Y': [bias]},
+                outputs={'Out': [res]},
+                attrs={'axis': len(x.shape) - 1},
             )
-            if bias is not None:
-                res = helper.create_variable_for_type_inference(dtype)
-                helper.append_op(
-                    type='elementwise_add',
-                    inputs={'X': [tmp], 'Y': [bias]},
-                    outputs={'Out': [res]},
-                    attrs={'axis': len(x.shape) - 1},
-                )
-            else:
-                res = tmp
-            return res
+        else:
+            res = tmp
+        return res
 
 
 def label_smooth(label, prior_dist=None, epsilon=0.1, name=None):
diff --git a/python/paddle/nn/functional/conv.py b/python/paddle/nn/functional/conv.py
index ff1e16494d..ceaa6e5e4a 100644
--- a/python/paddle/nn/functional/conv.py
+++ b/python/paddle/nn/functional/conv.py
@@ -19,11 +19,7 @@ from paddle.device import (
     is_compiled_with_npu,
     is_compiled_with_rocm,
 )
-from paddle.fluid.framework import (
-    _global_flags,
-    _in_legacy_dygraph,
-    in_dygraph_mode,
-)
+from paddle.fluid.framework import _global_flags, in_dygraph_mode
 from paddle.tensor.math import _add_with_axis
 
 from ...device import get_cudnn_version
@@ -489,30 +485,6 @@ def conv1d(
             )
         if bias is not None:
             out = _add_with_axis(out, bias, axis=channel_dim)
-    elif _in_legacy_dygraph():
-        attrs = (
-            'strides',
-            stride,
-            'paddings',
-            padding,
-            'dilations',
-            dilation,
-            'groups',
-            groups,
-            'use_cudnn',
-            use_cudnn,
-            'use_mkldnn',
-            False,
-            'fuse_relu_before_depthwise_conv',
-            False,
-            "padding_algorithm",
-            padding_algorithm,
-            "data_format",
-            conv2d_data_format,
-        )
-        out = getattr(_legacy_C_ops, l_type)(x, weight, *attrs)
-        if bias is not None:
-            out = _add_with_axis(out, bias, axis=channel_dim)
     else:
         inputs = {'Input': [x], 'Filter': [weight]}
         attrs = {
@@ -1044,30 +1016,6 @@ def conv1d_transpose(
         )
         if bias is not None:
             out = _add_with_axis(out, bias, axis=channel_dim)
-    elif _in_legacy_dygraph():
-        attrs = (
-            'output_padding',
-            output_padding,
-            'output_size',
-            output_size,
-            'strides',
-            stride,
-            'paddings',
-            padding,
-            'padding_algorithm',
-            padding_algorithm,
-            'dilations',
-            dilation,
-            'groups',
-            groups,
-            'use_cudnn',
-            use_cudnn,
-            'data_format',
-            conv2d_data_format,
-        )
-        out = getattr(_legacy_C_ops, op_type)(x, weight, *attrs)
-        if bias is not None:
-            out = _add_with_axis(out, bias, axis=channel_dim)
     else:
         inputs = {'Input': [x], 'Filter': [weight]}
         attrs = {
@@ -1350,33 +1298,6 @@ def conv2d_transpose(
             return _add_with_axis(pre_bias, bias, axis=channel_dim)
         else:
             return pre_bias
-
-    if _in_legacy_dygraph():
-        attrs = (
-            'output_padding',
-            output_padding,
-            'output_size',
-            output_size,
-            'strides',
-            stride,
-            'paddings',
-            padding,
-            'padding_algorithm',
-            padding_algorithm,
-            'dilations',
-            dilation,
-            'groups',
-            groups,
-            'use_cudnn',
-            use_cudnn,
-            'data_format',
-            data_format,
-        )
-        pre_bias = getattr(_legacy_C_ops, op_type)(x, weight, *attrs)
-        if bias is not None:
-            out = _add_with_axis(pre_bias, bias, axis=channel_dim)
-        else:
-            out = pre_bias
     else:
         inputs = {'Input': [x], 'Filter': [weight]}
         attrs = {
@@ -1823,33 +1744,6 @@ def conv3d_transpose(
             return _add_with_axis(pre_bias, bias, axis=channel_dim)
         else:
             return pre_bias
-
-    if _in_legacy_dygraph():
-        attrs = (
-            'output_padding',
-            output_padding,
-            'output_size',
-            output_size,
-            'paddings',
-            padding,
-            "padding_algorithm",
-            padding_algorithm,
-            'strides',
-            stride,
-            'dilations',
-            dilation,
-            'groups',
-            groups,
-            'use_cudnn',
-            use_cudnn,
-            "data_format",
-            data_format_,
-        )
-        pre_bias = getattr(_legacy_C_ops, op_type)(x, weight, *attrs)
-        if bias is not None:
-            out = _add_with_axis(pre_bias, bias, axis=channel_dim)
-        else:
-            out = pre_bias
     else:
         inputs = {'Input': [x], 'Filter': [weight]}
         attrs = {
diff --git a/python/paddle/nn/functional/distance.py b/python/paddle/nn/functional/distance.py
index b9783c251b..8a4432a88b 100644
--- a/python/paddle/nn/functional/distance.py
+++ b/python/paddle/nn/functional/distance.py
@@ -13,8 +13,8 @@
 # limitations under the License.
 
 import paddle
-from paddle import _C_ops, _legacy_C_ops
-from paddle.fluid.framework import _in_legacy_dygraph, in_dygraph_mode
+from paddle import _C_ops
+from paddle.fluid.framework import in_dygraph_mode
 
 from ...fluid.data_feeder import check_type, check_variable_and_dtype
 from ...fluid.layer_helper import LayerHelper
@@ -81,36 +81,30 @@ def pairwise_distance(x, y, p=2.0, epsilon=1e-6, keepdim=False, name=None):
             sub = _C_ops.add(sub, epsilon)
         return _C_ops.p_norm(sub, p, -1, 0.0, keepdim, False)
 
-    if _in_legacy_dygraph():
-        sub = _legacy_C_ops.elementwise_sub(x, y)
+    else:
+        check_variable_and_dtype(
+            x, 'x', ['float32', 'float64'], 'PairwiseDistance'
+        )
+        check_variable_and_dtype(
+            y, 'y', ['float32', 'float64'], 'PairwiseDistance'
+        )
+        sub = paddle.subtract(x, y)
         if epsilon != 0.0:
-            epsilon = paddle.fluid.dygraph.base.to_variable(
-                [epsilon], dtype=sub.dtype
+            epsilon_var = sub.block.create_var(dtype=sub.dtype)
+            epsilon_var = paddle.full(
+                shape=[1], fill_value=epsilon, dtype=sub.dtype
             )
-            sub = _legacy_C_ops.elementwise_add(sub, epsilon)
-        return _legacy_C_ops.p_norm(
-            sub, 'axis', -1, 'porder', p, 'keepdim', keepdim, 'epsilon', 0.0
+            sub = paddle.add(sub, epsilon_var)
+        helper = LayerHelper("PairwiseDistance", name=name)
+        attrs = {
+            'axis': -1,
+            'porder': p,
+            'keepdim': keepdim,
+            'epsilon': 0.0,
+        }
+        out = helper.create_variable_for_type_inference(dtype=x.dtype)
+        helper.append_op(
+            type='p_norm', inputs={'X': sub}, outputs={'Out': out}, attrs=attrs
         )
 
-    check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'PairwiseDistance')
-    check_variable_and_dtype(y, 'y', ['float32', 'float64'], 'PairwiseDistance')
-    sub = paddle.subtract(x, y)
-    if epsilon != 0.0:
-        epsilon_var = sub.block.create_var(dtype=sub.dtype)
-        epsilon_var = paddle.full(
-            shape=[1], fill_value=epsilon, dtype=sub.dtype
-        )
-        sub = paddle.add(sub, epsilon_var)
-    helper = LayerHelper("PairwiseDistance", name=name)
-    attrs = {
-        'axis': -1,
-        'porder': p,
-        'keepdim': keepdim,
-        'epsilon': 0.0,
-    }
-    out = helper.create_variable_for_type_inference(dtype=x.dtype)
-    helper.append_op(
-        type='p_norm', inputs={'X': sub}, outputs={'Out': out}, attrs=attrs
-    )
-
-    return out
+        return out
diff --git a/python/paddle/nn/layer/norm.py b/python/paddle/nn/layer/norm.py
index 9c701f6064..aed7e455d6 100644
--- a/python/paddle/nn/layer/norm.py
+++ b/python/paddle/nn/layer/norm.py
@@ -34,17 +34,11 @@ import numpy as np
 
 from paddle import _C_ops, _legacy_C_ops, in_dynamic_mode
 from paddle.device import get_all_custom_device_type
-from paddle.fluid.framework import _in_legacy_dygraph, in_dygraph_mode
+from paddle.fluid.framework import in_dygraph_mode
 
 from ...fluid import dygraph_utils
 from ...fluid.data_feeder import check_variable_and_dtype
-from ...framework import (
-    ParamAttr,
-    _global_flags,
-    _non_static_mode,
-    get_default_dtype,
-    no_grad,
-)
+from ...framework import ParamAttr, _global_flags, get_default_dtype, no_grad
 from .. import Layer
 from .. import functional as F
 from ..functional import batch_norm, instance_norm, layer_norm
@@ -492,20 +486,6 @@ class GroupNorm(Layer):
             dtype=input.dtype, stop_gradient=True
         )
 
-        if _in_legacy_dygraph():
-            pre_act, _, _ = _legacy_C_ops.group_norm(
-                input,
-                self.weight,
-                self.bias,
-                mean_out,
-                variance_out,
-                'epsilon',
-                self._epsilon,
-                'groups',
-                self._num_groups,
-            )
-            return pre_act
-
         inputs = {'X': input}
         if self.bias is not None:
             inputs['Bias'] = self.bias
@@ -1005,121 +985,86 @@ class BatchNorm(Layer):
         self._trainable_statistics = trainable_statistics
 
     def forward(self, input):
-        # create output
-        # mean and mean_out share the same memory
-        mean_out = self._mean
-        # variance and variance out share the same memory
-        variance_out = self._variance
-
-        if _non_static_mode():
-            if in_dygraph_mode():
-                batch_norm_out, t1, t2, t3, t4, _ = _C_ops.batch_norm(
-                    input,
-                    self._mean,
-                    self._variance,
-                    self.weight,
-                    self.bias,
-                    not self.training,
-                    self._momentum,
-                    self._epsilon,
-                    self._data_layout,
-                    self._use_global_stats,
-                    self._trainable_statistics,
-                )
-                return dygraph_utils._append_activation_in_dygraph(
-                    batch_norm_out, act=self._act, use_mkldnn=self._use_mkldnn
-                )
-
-            elif _in_legacy_dygraph():
-                attrs = (
-                    "momentum",
-                    self._momentum,
-                    "epsilon",
-                    self._epsilon,
-                    "is_test",
-                    not self.training,
-                    "data_layout",
-                    self._data_layout,
-                    "use_mkldnn",
-                    self._use_mkldnn,
-                    "fuse_with_relu",
-                    self._fuse_with_relu,
-                    "use_global_stats",
-                    self._use_global_stats,
-                    'trainable_statistics',
-                    self._trainable_statistics,
-                )
-                batch_norm_out, _, _, _, _, _ = _legacy_C_ops.batch_norm(
-                    input,
-                    self.weight,
-                    self.bias,
-                    self._mean,
-                    self._variance,
-                    None,
-                    mean_out,
-                    variance_out,
-                    *attrs
-                )
-
+        if in_dygraph_mode():
+            batch_norm_out, t1, t2, t3, t4, _ = _C_ops.batch_norm(
+                input,
+                self._mean,
+                self._variance,
+                self.weight,
+                self.bias,
+                not self.training,
+                self._momentum,
+                self._epsilon,
+                self._data_layout,
+                self._use_global_stats,
+                self._trainable_statistics,
+            )
             return dygraph_utils._append_activation_in_dygraph(
                 batch_norm_out, act=self._act, use_mkldnn=self._use_mkldnn
             )
+        else:
+            # create output
+            # mean and mean_out share the same memory
+            mean_out = self._mean
+            # variance and variance out share the same memory
+            variance_out = self._variance
+            check_variable_and_dtype(
+                input, 'input', ['float16', 'float32', 'float64'], 'BatchNorm'
+            )
 
-        check_variable_and_dtype(
-            input, 'input', ['float16', 'float32', 'float64'], 'BatchNorm'
-        )
-
-        attrs = {
-            "momentum": self._momentum,
-            "epsilon": self._epsilon,
-            "is_test": self._is_test,
-            "data_layout": self._data_layout,
-            "use_mkldnn": False,
-            "fuse_with_relu": self._fuse_with_relu,
-            "use_global_stats": self._use_global_stats,
-            "trainable_statistics": self._trainable_statistics,
-        }
-
-        inputs = {
-            "X": [input],
-            "Scale": [self.weight],
-            "Bias": [self.bias],
-            "Mean": [self._mean],
-            "Variance": [self._variance],
-        }
-
-        saved_mean = self._helper.create_variable_for_type_inference(
-            dtype=self._dtype, stop_gradient=True
-        )
-        saved_variance = self._helper.create_variable_for_type_inference(
-            dtype=self._dtype, stop_gradient=True
-        )
-        reserve_space = self._helper.create_variable_for_type_inference(
-            dtype=self._helper.input_dtype(input), stop_gradient=True
-        )
-
-        batch_norm_out = (
-            input
-            if self._in_place
-            else self._helper.create_variable_for_type_inference(self._dtype)
-        )
+            attrs = {
+                "momentum": self._momentum,
+                "epsilon": self._epsilon,
+                "is_test": self._is_test,
+                "data_layout": self._data_layout,
+                "use_mkldnn": False,
+                "fuse_with_relu": self._fuse_with_relu,
+                "use_global_stats": self._use_global_stats,
+                "trainable_statistics": self._trainable_statistics,
+            }
+
+            inputs = {
+                "X": [input],
+                "Scale": [self.weight],
+                "Bias": [self.bias],
+                "Mean": [self._mean],
+                "Variance": [self._variance],
+            }
+
+            saved_mean = self._helper.create_variable_for_type_inference(
+                dtype=self._dtype, stop_gradient=True
+            )
+            saved_variance = self._helper.create_variable_for_type_inference(
+                dtype=self._dtype, stop_gradient=True
+            )
+            reserve_space = self._helper.create_variable_for_type_inference(
+                dtype=self._helper.input_dtype(input), stop_gradient=True
+            )
 
-        outputs = {
-            "Y": [batch_norm_out],
-            "MeanOut": [mean_out],
-            "VarianceOut": [variance_out],
-            "SavedMean": [saved_mean],
-            "SavedVariance": [saved_variance],
-        }
-        if reserve_space is not None:
-            outputs["ReserveSpace"] = [reserve_space]
+            batch_norm_out = (
+                input
+                if self._in_place
+                else self._helper.create_variable_for_type_inference(
+                    self._dtype
+                )
+            )
 
-        self._helper.append_op(
-            type="batch_norm", inputs=inputs, outputs=outputs, attrs=attrs
-        )
+            outputs = {
+                "Y": [batch_norm_out],
+                "MeanOut": [mean_out],
+                "VarianceOut": [variance_out],
+                "SavedMean": [saved_mean],
+                "SavedVariance": [saved_variance],
+            }
+            if reserve_space is not None:
+                outputs["ReserveSpace"] = [reserve_space]
+
+            self._helper.append_op(
+                type="batch_norm", inputs=inputs, outputs=outputs, attrs=attrs
+            )
 
-        # Currently, we don't support inplace in dygraph mode
-        return self._helper.append_activation(batch_norm_out, self._act)
+            # Currently, we don't support inplace in dygraph mode
+            return self._helper.append_activation(batch_norm_out, self._act)
 
 
 class BatchNorm1D(_BatchNormBase):
-- 
GitLab