rm legacy nn part2 (#49259)

* rm legacy nn part2 * rm _non_static_mode * modify * modify unpool test * modify unpool test * modify loss * keep legacy for layer_norm

rm legacy nn part2 (#49259)
* rm legacy nn part2 * rm _non_static_mode * modify * modify unpool test * modify unpool test * modify loss * keep legacy for layer_norm
69e51c77 · 姜永久 · GitHub · e34e634a · 69e51c77 · 69e51c77
7 changed file
--- a/python/paddle/fluid/tests/unittests/test_unpool_op.py
+++ b/python/paddle/fluid/tests/unittests/test_unpool_op.py
@@ -414,6 +414,7 @@ class TestUnpoolOpAPI_st(unittest.TestCase):
            pool_out_np, indices_np, [2, 2], [2, 2], [0, 0], [5, 5]
        ).astype("float64")
        np.testing.assert_allclose(results[0], expect_res, rtol=1e-05)
+        paddle.disable_static()


 class TestOutputSizeTensor(UnittestBase):

--- a/python/paddle/nn/functional/extension.py
+++ b/python/paddle/nn/functional/extension.py
@@ -23,11 +23,7 @@ from ...fluid.data_feeder import (
    check_type,
    check_variable_and_dtype,
 )
-from ...fluid.framework import (
-    _in_legacy_dygraph,
-    _non_static_mode,
-    in_dygraph_mode,
-)
+from ...fluid.framework import in_dygraph_mode
 from ...fluid.layer_helper import LayerHelper
 from ...framework import convert_np_dtype_to_dtype_, core
 from ...static import Variable
@@ -325,14 +321,9 @@ def gather_tree(ids, parents):

    if in_dygraph_mode():
        return _C_ops.gather_tree(ids, parents)
-    else:
-        if _in_legacy_dygraph():
-            return _legacy_C_ops.gather_tree(ids, parents)
    else:
        helper = LayerHelper('gather_tree', **locals())
-            check_variable_and_dtype(
-                ids, 'ids', ['int32', 'int64'], 'gather_tree'
-            )
+        check_variable_and_dtype(ids, 'ids', ['int32', 'int64'], 'gather_tree')
        check_variable_and_dtype(
            parents, 'parents', ['int32', 'int64'], 'gather_tree'
        )
@@ -385,19 +376,11 @@ def temporal_shift(x, seg_num, shift_ratio=0.25, name=None, data_format="NCHW"):
        )
    if in_dygraph_mode():
        return _C_ops.temporal_shift(x, seg_num, shift_ratio, data_format)
-    if _non_static_mode():
-        return _legacy_C_ops.temporal_shift(
-            x,
-            'seg_num',
-            seg_num,
-            'shift_ratio',
-            shift_ratio,
-            'data_format',
-            data_format,
-        )
-
+    else:
        helper = LayerHelper("temporal_shift", **locals())
-    check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'temporal_shift')
+        check_variable_and_dtype(
+            x, 'x', ['float32', 'float64'], 'temporal_shift'
+        )
        check_type(seg_num, 'seg_num', int, 'temporal_shift')
        check_type(shift_ratio, 'shift_ratio', float, 'temporal_shift')


--- a/python/paddle/nn/functional/input.py
+++ b/python/paddle/nn/functional/input.py
@@ -12,10 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-from paddle import _C_ops, _legacy_C_ops
+from paddle import _C_ops

 from ...fluid.data_feeder import check_variable_and_dtype
-from ...fluid.framework import _in_legacy_dygraph, in_dygraph_mode
+from ...fluid.framework import in_dygraph_mode
 from ...fluid.layer_helper import LayerHelper
 from ...static import Variable

@@ -88,19 +88,10 @@ def one_hot(x, num_classes, name=None):
    if in_dygraph_mode():
        return _C_ops.one_hot(x, num_classes)
    else:
-        if _in_legacy_dygraph():
-            return _legacy_C_ops.one_hot_v2(
-                x, 'depth', num_classes, 'allow_out_of_range', False
-            )
-        else:
-            check_variable_and_dtype(
-                x, 'input', ['int32', 'int64'], 'one_hot_v2'
-            )
+        check_variable_and_dtype(x, 'input', ['int32', 'int64'], 'one_hot_v2')
        helper = LayerHelper("one_hot_v2", **locals())

-            one_hot_out = helper.create_variable_for_type_inference(
-                dtype='float32'
-            )
+        one_hot_out = helper.create_variable_for_type_inference(dtype='float32')
        if not isinstance(num_classes, Variable):
            # user attribute
            inputs = {'X': x}
@@ -212,19 +203,6 @@ def embedding(x, weight, padding_idx=None, sparse=False, name=None):

    if in_dygraph_mode():
        return _C_ops.embedding(x, weight, padding_idx, sparse)
-    elif _in_legacy_dygraph():
-        return _legacy_C_ops.lookup_table_v2(
-            weight,
-            x,
-            'is_sparse',
-            sparse,
-            'is_distributed',
-            False,
-            'remote_prefetch',
-            False,
-            'padding_idx',
-            padding_idx,
-        )
    else:
        helper = LayerHelper('embedding', **locals())
        dtype = helper.input_dtype(input_param_name='weight')

--- a/python/paddle/nn/functional/loss.py
+++ b/python/paddle/nn/functional/loss.py
@@ -21,15 +21,8 @@ from paddle.framework import core
 from paddle.utils import deprecated

 from ...fluid.data_feeder import check_variable_and_dtype
-from ...fluid.framework import (
-    _current_expected_place,
-    _in_legacy_dygraph,
-    _non_static_mode,
-    _varbase_creator,
-    in_dygraph_mode,
-)
+from ...fluid.framework import _current_expected_place, in_dygraph_mode
 from ...fluid.layer_helper import LayerHelper
-from ...fluid.layers.nn import _elementwise_op_in_dygraph
 from ...static import Variable
 from ...tensor.manipulation import reshape

@@ -260,7 +253,7 @@ def fluid_softmax_with_cross_entropy(
            # Tensor(shape=[1], dtype=float32, place=Place(gpu:0), stop_gradient=True,
            #        [1.15328646])
    """
-    if _non_static_mode():
+    if in_dygraph_mode():
        if core.is_compiled_with_npu():
            softmax, backprop, loss = _legacy_C_ops.softmax_with_cross_entropy(
                logits,
@@ -275,7 +268,6 @@ def fluid_softmax_with_cross_entropy(
                axis,
            )
        else:
-            if in_dygraph_mode():
            softmax, loss = _C_ops.cross_entropy_with_softmax(
                logits,
                label,
@@ -285,24 +277,11 @@ def fluid_softmax_with_cross_entropy(
                ignore_index,
                axis,
            )
-            if _in_legacy_dygraph():
-                softmax, loss = _legacy_C_ops.softmax_with_cross_entropy(
-                    logits,
-                    label,
-                    'soft_label',
-                    soft_label,
-                    'ignore_index',
-                    ignore_index,
-                    'numeric_stable_mode',
-                    numeric_stable_mode,
-                    'axis',
-                    axis,
-                )
        if not return_softmax:
            return loss
        else:
            return loss, softmax
-
+    else:
        attrs = {
            'soft_label': soft_label,
            'ignore_index': ignore_index,
@@ -315,7 +294,9 @@ def fluid_softmax_with_cross_entropy(

        outputs = {'Softmax': softmax, 'Loss': loss}
        if core.is_compiled_with_npu() or core.is_compiled_with_mlu():
-        backprop = helper.create_variable_for_type_inference(dtype=logits.dtype)
+            backprop = helper.create_variable_for_type_inference(
+                dtype=logits.dtype
+            )
            outputs['Backprop'] = backprop
        helper.append_op(
            type='softmax_with_cross_entropy',
@@ -441,11 +422,7 @@ def square_error_cost(input, label):
        minus_out = _C_ops.subtract(input, label)
        square_out = _C_ops.square(minus_out)
        return square_out
-    elif _in_legacy_dygraph():
-        minus_out = _legacy_C_ops.elementwise_sub(input, label)
-        square_out = _legacy_C_ops.square(minus_out)
-        return square_out
-
+    else:
        check_variable_and_dtype(
            input, "input", ['float32', 'float64'], 'square_error_cost'
        )
@@ -460,9 +437,13 @@ def square_error_cost(input, label):
            outputs={'Out': [minus_out]},
        )

-    square_out = helper.create_variable_for_type_inference(dtype=input.dtype)
+        square_out = helper.create_variable_for_type_inference(
+            dtype=input.dtype
+        )
        helper.append_op(
-        type='square', inputs={'X': [minus_out]}, outputs={'Out': [square_out]}
+            type='square',
+            inputs={'X': [minus_out]},
+            outputs={'Out': [square_out]},
        )
        return square_out

@@ -674,19 +655,6 @@ def binary_cross_entropy(
            return _C_ops.mean_all(out)
        else:
            return out
-    else:
-        if _in_legacy_dygraph():
-            out = _legacy_C_ops.bce_loss(input, label)
-            if weight is not None:
-                out = _legacy_C_ops.elementwise_mul(out, weight, 'axis', -1)
-            if reduction == 'sum':
-                return _legacy_C_ops.reduce_sum(
-                    out, 'dim', [0], 'keep_dim', False, "reduce_all", True
-                )
-            elif reduction == 'mean':
-                return _legacy_C_ops.mean(out)
-            else:
-                return out
    else:
        check_variable_and_dtype(
            input, 'input', ['float32', 'float64'], 'binary_cross_entropy'
@@ -833,40 +801,7 @@ def binary_cross_entropy_with_logits(
            return _C_ops.mean_all(out)
        else:
            return out
-    elif _in_legacy_dygraph():
-        one = _varbase_creator(dtype=logit.dtype)
-        _legacy_C_ops.fill_constant(
-            one,
-            'value',
-            float(1.0),
-            'force_cpu',
-            False,
-            'dtype',
-            one.dtype,
-            'str_value',
-            '1.0',
-            'shape',
-            [1],
-        )
-        out = _legacy_C_ops.sigmoid_cross_entropy_with_logits(logit, label)
-        if pos_weight is not None:
-            log_weight = _legacy_C_ops.elementwise_add(
-                _legacy_C_ops.elementwise_mul(
-                    label, _legacy_C_ops.elementwise_sub(pos_weight, one)
-                ),
-                one,
-            )
-            out = _legacy_C_ops.elementwise_mul(out, log_weight)
-        if weight is not None:
-            out = _legacy_C_ops.elementwise_mul(out, weight)
-
-        if reduction == "sum":
-            return _legacy_C_ops.reduce_sum(out, 'reduce_all', True)
-        elif reduction == "mean":
-            return _legacy_C_ops.mean(out)
    else:
-            return out
-
        check_variable_and_dtype(
            logit,
            'logit',
@@ -1031,22 +966,7 @@ def hsigmoid_loss(
            is_sparse,
        )
        return out
-    elif _in_legacy_dygraph():
-        out, _, _ = _legacy_C_ops.hierarchical_sigmoid(
-            input,
-            weight,
-            label,
-            path_table,
-            path_code,
-            bias,
-            'num_classes',
-            num_classes,
-            'is_sparse',
-            is_sparse,
-            'remote_prefetch',
-            is_sparse,
-        )
-        return out
+    else:

        check_variable_and_dtype(
            input, 'input', ['float32', 'float64'], 'hsigmoid_loss'
@@ -1089,7 +1009,10 @@ def hsigmoid_loss(
        outputs = {"Out": out, "PreOut": pre_out, "W_Out": weight}

        helper.append_op(
-        type="hierarchical_sigmoid", inputs=inputs, outputs=outputs, attrs=attrs
+            type="hierarchical_sigmoid",
+            inputs=inputs,
+            outputs=outputs,
+            attrs=attrs,
        )
        return out

@@ -1246,19 +1169,7 @@ def margin_ranking_loss(
        elif reduction == 'mean':
            return _C_ops.mean_all(out)
        return out
-    elif _in_legacy_dygraph():
-        out = _legacy_C_ops.elementwise_sub(other, input)
-        out = _legacy_C_ops.elementwise_mul(out, label)
-        if margin != 0.0:
-            margin = fluid.dygraph.base.to_variable([margin], dtype=out.dtype)
-            out = _legacy_C_ops.elementwise_add(out, margin)
-        out = _legacy_C_ops.relu(out)
-        if reduction == 'sum':
-            return _legacy_C_ops.reduce_sum(out, 'reduce_all', True)
-        elif reduction == 'mean':
-            return _legacy_C_ops.mean(out)
-        return out
-
+    else:
        helper = LayerHelper("margin_ranking_loss", **locals())
        check_variable_and_dtype(
            input, 'input', ['float32', 'float64'], 'margin_rank_loss'
@@ -1276,7 +1187,9 @@ def margin_ranking_loss(

        if margin != 0.0:
            margin_var = out.block.create_var(dtype=out.dtype)
-        margin_var = paddle.full(shape=[1], fill_value=margin, dtype=out.dtype)
+            margin_var = paddle.full(
+                shape=[1], fill_value=margin, dtype=out.dtype
+            )
            out = paddle.add(out, margin_var)

        result_out = helper.create_variable_for_type_inference(input.dtype)
@@ -1384,19 +1297,7 @@ def l1_loss(input, label, reduction='mean', name=None):
            return _C_ops.sum(unreduced, [], None, False)
        else:
            return unreduced
-    elif _in_legacy_dygraph():
-        unreduced = _elementwise_op_in_dygraph(
-            input, label, axis=-1, act='abs', op_name='elementwise_sub'
-        )
-        if reduction == 'mean':
-            return _legacy_C_ops.mean(unreduced)
-        elif reduction == 'sum':
-            return _legacy_C_ops.reduce_sum(
-                unreduced, 'dim', [0], 'keep_dim', False, 'reduce_all', True
-            )
    else:
-            return unreduced
-
        check_variable_and_dtype(
            input, 'input', ['float32', 'float64', 'int32', 'int64'], 'l1_loss'
        )
@@ -1488,27 +1389,7 @@ def nll_loss(
        if input_dims != 2 and input_dims != 4 and reduction == 'none':
            out = _C_ops.reshape(out, out_shape)
        return out
-    elif _in_legacy_dygraph():
-        if input_dims != 2 and input_dims != 4:
-            input, _ = _legacy_C_ops.reshape2(
-                input, None, 'shape', [n, c, 1, -1]
-            )
-            label, _ = _legacy_C_ops.reshape2(label, None, 'shape', [n, 1, -1])
-            out_shape = [n] + input_shape[2:]
-
-        out, total_weight = _legacy_C_ops.nll_loss(
-            input,
-            label,
-            weight,
-            'ignore_index',
-            ignore_index,
-            'reduction',
-            reduction,
-        )
-        if input_dims != 2 and input_dims != 4 and reduction == 'none':
-            out, _ = _legacy_C_ops.reshape2(out, None, 'shape', out_shape)
-        return out
-
+    else:
        helper = LayerHelper('nll_loss', **locals())

        if input_dims != 2 and input_dims != 4:
@@ -1516,7 +1397,9 @@ def nll_loss(
            label = reshape(label, shape=[n, 1, -1])
            out_shape = [n] + input_shape[2:]

-    check_variable_and_dtype(input, 'input', ['float32', 'float64'], 'nll_loss')
+        check_variable_and_dtype(
+            input, 'input', ['float32', 'float64'], 'nll_loss'
+        )
        check_variable_and_dtype(label, 'label', ['int64'], 'nll_loss')
        inputs = {'X': input, 'Label': label}
        attrs = {'reduction': reduction, 'ignore_index': ignore_index}
@@ -1525,7 +1408,9 @@ def nll_loss(
                inputs['Weight'] = weight

        out = helper.create_variable_for_type_inference(dtype=input.dtype)
-    total_weight = helper.create_variable_for_type_inference(dtype=input.dtype)
+        total_weight = helper.create_variable_for_type_inference(
+            dtype=input.dtype
+        )
        outputs = {'Out': out, 'Total_weight': total_weight}

        helper.append_op(
@@ -1624,22 +1509,15 @@ def kl_div(input, label, reduction='mean', name=None):
                batch_size = input.shape[0]
                out = paddle.sum(out) / batch_size
        return out
-    elif _in_legacy_dygraph():
-        out = _legacy_C_ops.kldiv_loss(input, label, 'reduction', 'none')
-        if reduction == 'mean':
-            out = paddle.mean(out)
-        elif reduction == 'sum':
-            out = paddle.sum(out)
-        elif reduction == 'batchmean':
-            if len(input.shape) > 0:
-                batch_size = input.shape[0]
-                out = paddle.sum(out) / batch_size
-        return out
-
+    else:
        helper = LayerHelper('kl_div', **locals())

-    check_variable_and_dtype(input, 'input', ['float32', 'float64'], 'kl_div')
-    check_variable_and_dtype(label, 'label', ['float32', 'float64'], 'kl_div')
+        check_variable_and_dtype(
+            input, 'input', ['float32', 'float64'], 'kl_div'
+        )
+        check_variable_and_dtype(
+            label, 'label', ['float32', 'float64'], 'kl_div'
+        )
        fluid.data_feeder.check_type(reduction, 'reduction', str, 'kl_div')

        loss = helper.create_variable_for_type_inference(dtype=input.dtype)
@@ -1836,22 +1714,7 @@ def ctc_loss(
                input, label, input_length, label_length, blank, norm_by_times
            )
            return loss_out
-        if _non_static_mode():
-            if input_length is None or label_length is None:
-                raise ValueError(
-                    "input_length and label_length must not be None in dygraph mode!"
-                )
-            grad, loss_out = _legacy_C_ops.warpctc(
-                input,
-                label,
-                input_length,
-                label_length,
-                'blank',
-                blank,
-                'norm_by_times',
-                norm_by_times,
-            )
-            return loss_out
+        else:
            helper = LayerHelper('warpctc', **locals())
            check_variable_and_dtype(
                input, 'input', ['float32', 'float64'], "warpctc"
@@ -1868,8 +1731,12 @@ def ctc_loss(
                this_inputs['LogitsLength'] = [input_length]
                this_inputs['LabelLength'] = [label_length]

-        loss_out = helper.create_variable_for_type_inference(dtype=input.dtype)
-        grad_out = helper.create_variable_for_type_inference(dtype=input.dtype)
+            loss_out = helper.create_variable_for_type_inference(
+                dtype=input.dtype
+            )
+            grad_out = helper.create_variable_for_type_inference(
+                dtype=input.dtype
+            )

            helper.append_op(
                type='warpctc',
@@ -2274,36 +2141,7 @@ def margin_cross_entropy(
            return loss
        else:
            return loss, softmax
-    elif _in_legacy_dygraph():
-        softmax, loss = _legacy_C_ops.margin_cross_entropy(
-            logits,
-            label,
-            'ring_id',
-            ring_id,
-            'rank',
-            rank,
-            'nranks',
-            nranks,
-            'margin1',
-            margin1,
-            'margin2',
-            margin2,
-            'margin3',
-            margin3,
-            'scale',
-            scale,
-            'return_softmax',
-            return_softmax,
-        )
-        if reduction == 'mean':
-            loss = paddle.mean(loss)
-        elif reduction == 'sum':
-            loss = paddle.sum(loss)
-        if not return_softmax:
-            return loss
    else:
-            return loss, softmax
-
        op_type = 'margin_cross_entropy'
        helper = LayerHelper(op_type, **locals())
        softmax = helper.create_variable_for_type_inference(dtype=logits.dtype)
@@ -2863,180 +2701,7 @@ def cross_entropy(
                out = paddle.squeeze(out, axis=axis)
            return out

-    elif _in_legacy_dygraph():
-        if not soft_label:
-            valid_label = (
-                paddle.cast(label != ignore_index, dtype=label.dtype) * label
-            )
-            label_min = paddle.min(valid_label)
-            label_max = paddle.max(valid_label)
-            if label_min < 0:
-                raise ValueError(
-                    "Target {} is out of lower bound.".format(label_min.item())
-                )
-            if label_max >= input.shape[axis]:
-                raise ValueError(
-                    "Target {} is out of upper bound.".format(label_max.item())
-                )
-        if core.is_compiled_with_npu() or core.is_compiled_with_mlu():
-            if not soft_label:
-                _, _, out = _legacy_C_ops.softmax_with_cross_entropy(
-                    input,
-                    valid_label,
-                    'soft_label',
-                    soft_label,
-                    'ignore_index',
-                    ignore_index,
-                    'numeric_stable_mode',
-                    True,
-                    'axis',
-                    axis,
-                    'use_softmax',
-                    use_softmax,
-                )
-            else:
-                _, _, out = _legacy_C_ops.softmax_with_cross_entropy(
-                    input,
-                    label,
-                    'soft_label',
-                    soft_label,
-                    'ignore_index',
-                    ignore_index,
-                    'numeric_stable_mode',
-                    True,
-                    'axis',
-                    axis,
-                    'use_softmax',
-                    use_softmax,
-                )
-        else:
-            _, out = _legacy_C_ops.softmax_with_cross_entropy(
-                input,
-                label,
-                'soft_label',
-                soft_label,
-                'ignore_index',
-                ignore_index,
-                'numeric_stable_mode',
-                True,
-                'axis',
-                axis,
-                'use_softmax',
-                use_softmax,
-            )
-
-        if weight is not None:
-
-            # trans weight from class to sample, shape:N or [N,H,W] for 1d and 2d cases.
-            if soft_label:
-                # chajchaj:
-                # weight's shape is C, where C is class num.
-                # for 1d case: label's shape is [N,C], weight_gather's shape is N.
-                # for 2d case: label's shape is [N,H,W,C], weight_gather's shape is [N,H,W].
-                weight_gather = paddle.matmul(
-                    x=paddle.cast(label, weight.dtype),
-                    y=weight,
-                    transpose_x=False,
-                    transpose_y=True,
-                )
-                out_shape = list(out.shape)
-                weight_gather_reshape = reshape(weight_gather, shape=out_shape)
-                out = paddle.cast(out, weight_gather_reshape.dtype)
-
-                out = _legacy_C_ops.elementwise_mul(out, weight_gather_reshape)
-
-            else:
-                if input.shape[axis] != weight.shape[-1]:
-                    raise ValueError(
-                        "input's class_dimension({}) must equal to "
-                        "weight's class_dimension({}) "
-                        "when weight is provided".format(
-                            input.shape[axis], weight.shape[-1]
-                        )
-                    )
-
-                ignore_weight_mask = paddle.cast(
-                    (label != ignore_index), out.dtype
-                )
-                if (
-                    ignore_weight_mask.ndim > 1
-                    and ignore_weight_mask.shape[axis] == 1
-                ):
-                    # TODO: Temporarily use squeeze instead of squeeze_
-                    ignore_weight_mask = paddle.squeeze(
-                        ignore_weight_mask, axis
-                    )
-                if axis != -1 and axis != valid_label.ndim - 1:
-                    temp_perm = (
-                        list(range(axis % valid_label.ndim))
-                        + list(
-                            range(
-                                (axis % valid_label.ndim + 1), valid_label.ndim
-                            )
-                        )
-                        + [axis % valid_label.ndim]
-                    )
-                    weight_gather = _legacy_C_ops.gather_nd(
-                        weight, valid_label.transpose(temp_perm)
-                    )
    else:
-                    weight_gather = _legacy_C_ops.gather_nd(weight, valid_label)
-                weight_gather = _legacy_C_ops.elementwise_mul(
-                    weight_gather, ignore_weight_mask
-                )
-                input_shape = list(label.shape)
-                weight_gather_reshape = reshape(
-                    weight_gather, shape=input_shape
-                )
-                out = paddle.cast(out, weight_gather_reshape.dtype)
-                out = _legacy_C_ops.elementwise_mul(out, weight_gather_reshape)
-
-        if reduction == "sum":
-            #   because of fluid_softmax_with_cross_entropy op's inner logic,
-            #   in the out tensor of this op, the loss of sample with class_index==ignore_index is 0
-            #   so, reduce_sum all directly is ok
-            return _legacy_C_ops.reduce_sum(out, 'reduce_all', True)
-        elif reduction == "mean":
-            # 1. if weight==none,
-            #     numerator: reduce_sum all loss directly is ok causeof fluid_softmax_with_cross_entropy's inner logic
-            #     denominator: count sample num with class_index!=ignore_index
-            # 2. else
-            #     numerator: loss's weighted sum
-            #     denominator: cal the sum of weight where the sample's class_index!=ignore_index
-            is_ignore = label == ignore_index
-            mask = ~is_ignore
-            if paddle.count_nonzero(is_ignore) > 0:  # ignore label
-                out_sum = _legacy_C_ops.reduce_sum(out, 'reduce_all', True)
-                # for each label[i],set 1 or 0, according to ignore_index
-                # mask[i]=0, if label[i]==ignore_index
-                # mask[i]=1, otherwise
-                if weight is None:
-                    mask = paddle.cast(mask, dtype=out_sum.dtype)
-                    count = _legacy_C_ops.reduce_sum(mask, 'reduce_all', True)
-                    ret = out_sum / (count + (count == 0.0))
-                else:
-                    mask = paddle.cast(mask, weight_gather_reshape.dtype)
-                    weight_ignored = _legacy_C_ops.elementwise_mul(
-                        mask, weight_gather_reshape
-                    )
-                    weight_sum = _legacy_C_ops.reduce_sum(
-                        weight_ignored, 'reduce_all', True
-                    )
-                    ret = out_sum / (weight_sum + (weight_sum == 0.0))
-                return ret
-            elif weight is not None:
-                out_sum = _legacy_C_ops.reduce_sum(out, 'reduce_all', True)
-                total_weight = _legacy_C_ops.reduce_sum(
-                    weight_gather_reshape, 'reduce_all', True
-                )
-                return out_sum / (total_weight + (total_weight == 0.0))
-            else:
-                return _legacy_C_ops.mean(out)
-        else:
-            if input_dims - 1 == label_dims:
-                out = paddle.squeeze(out, axis=axis)
-            return out
-
        check_variable_and_dtype(
            input,
            'input',
@@ -3062,7 +2727,9 @@ def cross_entropy(

        outputs = {'Softmax': softmax, 'Loss': out}
        if core.is_compiled_with_npu() or core.is_compiled_with_mlu():
-        backprop = helper.create_variable_for_type_inference(dtype=input.dtype)
+            backprop = helper.create_variable_for_type_inference(
+                dtype=input.dtype
+            )
            outputs['Backprop'] = backprop
        helper.append_op(
            type='softmax_with_cross_entropy',
@@ -3073,7 +2740,10 @@ def cross_entropy(

        if weight is not None:
            check_variable_and_dtype(
-            weight, 'weight', ['float32', 'float64'], 'softmax_cross_entropy'
+                weight,
+                'weight',
+                ['float32', 'float64'],
+                'softmax_cross_entropy',
            )
            weight_name = name if reduction == 'none' else None
            if soft_label:
@@ -3112,12 +2782,16 @@ def cross_entropy(
                    ignore_weight_mask.ndim > 1
                    and ignore_weight_mask.shape[axis] == 1
                ):
-                ignore_weight_mask = paddle.squeeze(ignore_weight_mask, axis)
+                    ignore_weight_mask = paddle.squeeze(
+                        ignore_weight_mask, axis
+                    )
                if axis != -1 and axis != valid_label.ndim - 1:
                    temp_perm = (
                        list(range(axis % valid_label.ndim))
                        + list(
-                        range((axis % valid_label.ndim + 1), valid_label.ndim)
+                            range(
+                                (axis % valid_label.ndim + 1), valid_label.ndim
+                            )
                        )
                        + [axis % valid_label.ndim]
                    )
@@ -3126,10 +2800,14 @@ def cross_entropy(
                    )
                else:
                    weight_gather = paddle.gather_nd(weight, valid_label)
-            weight_gather = paddle.multiply(weight_gather, ignore_weight_mask)
+                weight_gather = paddle.multiply(
+                    weight_gather, ignore_weight_mask
+                )

                input_shape = list(label.shape)
-            weight_gather_reshape = reshape(weight_gather, shape=input_shape)
+                weight_gather_reshape = reshape(
+                    weight_gather, shape=input_shape
+                )
            out = paddle.multiply(out, weight_gather_reshape, name=weight_name)

        if reduction == "sum":
@@ -3147,7 +2825,9 @@ def cross_entropy(
                    ret = out_sum / (count + (count == 0.0))
                else:
                    mask = paddle.cast(mask, weight_gather_reshape.dtype)
-                weight_ignored = paddle.multiply(mask, weight_gather_reshape)
+                    weight_ignored = paddle.multiply(
+                        mask, weight_gather_reshape
+                    )
                    weight_sum = paddle.sum(weight_ignored, name=name)
                    ret = out_sum / (weight_sum + (weight_sum == 0.0))
                return ret
@@ -3306,59 +2986,7 @@ def sigmoid_focal_loss(

        return loss

-    elif _in_legacy_dygraph():
-        one = _varbase_creator(dtype=logit.dtype)
-        _legacy_C_ops.fill_constant(
-            one,
-            'value',
-            float(1.0),
-            'force_cpu',
-            False,
-            'dtype',
-            one.dtype,
-            'str_value',
-            '1.0',
-            'shape',
-            logit.shape,
-        )
-        loss = _legacy_C_ops.sigmoid_cross_entropy_with_logits(logit, label)
-
-        pred = _legacy_C_ops.sigmoid(logit)
-
-        p_t = _legacy_C_ops.elementwise_add(
-            _legacy_C_ops.elementwise_mul(pred, label),
-            _legacy_C_ops.elementwise_mul(
-                _legacy_C_ops.elementwise_sub(one, pred),
-                _legacy_C_ops.elementwise_sub(one, label),
-            ),
-        )
-
-        alpha = fluid.dygraph.base.to_variable([alpha], dtype=loss.dtype)
-        alpha_t = _legacy_C_ops.elementwise_add(
-            _legacy_C_ops.elementwise_mul(alpha, label),
-            _legacy_C_ops.elementwise_mul(
-                _legacy_C_ops.elementwise_sub(one, alpha),
-                _legacy_C_ops.elementwise_sub(one, label),
-            ),
-        )
-        loss = _legacy_C_ops.elementwise_mul(alpha_t, loss)
-
-        gamma = fluid.dygraph.base.to_variable([gamma], dtype=loss.dtype)
-        gamma_t = _legacy_C_ops.elementwise_pow(
-            _legacy_C_ops.elementwise_sub(one, p_t), gamma
-        )
-        loss = _legacy_C_ops.elementwise_mul(gamma_t, loss)
-
-        if normalizer is not None:
-            loss = _legacy_C_ops.elementwise_div(loss, normalizer)
-
-        if reduction == "sum":
-            return _legacy_C_ops.reduce_sum(loss, 'reduce_all', True)
-        elif reduction == "mean":
-            return _legacy_C_ops.mean(loss)
-
-        return loss
-
+    else:
        check_variable_and_dtype(
            logit, 'logit', ['float32', 'float64'], 'sigmoid_focal_loss'
        )
@@ -3463,7 +3091,7 @@ def multi_label_soft_margin_loss(
            "but received {}!={}".format(input.shape, label.shape)
        )

-    if not _non_static_mode():
+    if not in_dygraph_mode():
        check_variable_and_dtype(
            input,
            'input',
@@ -3483,7 +3111,7 @@ def multi_label_soft_margin_loss(
    )

    if weight is not None:
-        if not _non_static_mode():
+        if not in_dygraph_mode():
            check_variable_and_dtype(
                weight,
                'weight',
@@ -3582,7 +3210,7 @@ def hinge_embedding_loss(input, label, margin=1.0, reduction='mean', name=None):
            "but received {}.".format(reduction)
        )

-    if not _non_static_mode():
+    if not in_dygraph_mode():
        check_variable_and_dtype(
            input, 'input', ['float32', 'float64'], 'hinge_embedding_loss'
        )
@@ -3807,7 +3435,7 @@ def triplet_margin_with_distance_loss(
        raise ValueError(
            "The margin between positive samples and negative samples should be greater than 0."
        )
-    if not _non_static_mode():
+    if not in_dygraph_mode():
        check_variable_and_dtype(
            input,
            'input',
@@ -3956,7 +3584,7 @@ def triplet_margin_loss(
        raise ValueError(
            "The margin between positive samples and negative samples should be greater than 0."
        )
-    if not _non_static_mode():
+    if not in_dygraph_mode():
        check_variable_and_dtype(
            input, 'input', ['float32', 'float64'], 'triplet_margin_loss'
        )
@@ -4066,7 +3694,7 @@ def multi_margin_loss(
            "but received {}.".format(reduction)
        )

-    if not _non_static_mode():
+    if not in_dygraph_mode():
        check_variable_and_dtype(
            input, 'input', ['float32', 'float64'], 'multi_margin_loss'
        )
@@ -4083,7 +3711,7 @@ def multi_margin_loss(
    label = label.reshape((-1, 1))
    index_sample = paddle.index_sample(input, label)
    if weight is not None:
-        if not _non_static_mode():
+        if not in_dygraph_mode():
            check_variable_and_dtype(
                weight, 'weight', ['float32', 'float64'], 'multi_margin_loss'
            )
@@ -4187,7 +3815,7 @@ def soft_margin_loss(input, label, reduction='mean', name=None):
            % reduction
        )

-    if not _non_static_mode():
+    if not in_dygraph_mode():
        fluid.data_feeder.check_variable_and_dtype(
            input, 'input', ['float32', 'float64'], 'soft_margin_loss'
        )

--- a/python/paddle/nn/functional/norm.py
+++ b/python/paddle/nn/functional/norm.py
@@ -83,21 +83,7 @@ def normalize(x, p=2, axis=1, epsilon=1e-12, name=None):
        out = _C_ops.p_norm(x, float(p), axis, epsilon, True, False)
        return x / _C_ops.maximum(out, eps)

-    if _in_legacy_dygraph():
-        eps = fluid.dygraph.base.to_variable([epsilon], dtype=x.dtype)
-        out = _legacy_C_ops.p_norm(
-            x,
-            'axis',
-            axis,
-            'porder',
-            float(p),
-            'keepdim',
-            True,
-            'epsilon',
-            epsilon,
-        )
-        return x / _legacy_C_ops.elementwise_max(out, eps)
-
+    else:
        check_type(p, 'p', (float, int), 'normalize')
        check_type(axis, 'axis', (int), 'normalize')
        check_variable_and_dtype(
@@ -229,43 +215,7 @@ def batch_norm(
            batch_norm_out, act=None
        )

-    elif _in_legacy_dygraph():
-        # for dygraph need tuple
-        attrs = (
-            "momentum",
-            momentum,
-            "epsilon",
-            epsilon,
-            "is_test",
-            not training,
-            "data_layout",
-            data_format,
-            "use_mkldnn",
-            False,
-            "fuse_with_relu",
-            False,
-            "use_global_stats",
-            use_global_stats,
-            "trainable_statistics",
-            trainable_statistics,
-        )
-
-        batch_norm_out, _, _, _, _, _ = _legacy_C_ops.batch_norm(
-            x,
-            weight,
-            bias,
-            running_mean,
-            running_var,
-            None,
-            mean_out,
-            variance_out,
-            *attrs
-        )
-
-        return dygraph_utils._append_activation_in_dygraph(
-            batch_norm_out, act=None
-        )
-
+    else:
        check_variable_and_dtype(
            x, 'input', ['float16', 'float32', 'float64'], 'BatchNorm'
        )
@@ -483,23 +433,16 @@ def instance_norm(
    if in_dygraph_mode():
        out = _C_ops.instance_norm(x, weight, bias, eps)
        return out
-    if _in_legacy_dygraph():
-        out, _, _ = _legacy_C_ops.instance_norm(
-            x,
-            weight,
-            bias,
-            "epsilon",
-            eps,
-            "momentum",
-            momentum,
-            "data_format",
-            data_format,
+    else:
+        check_variable_and_dtype(
+            x, 'input', ['float32', 'float64'], "InstanceNorm"
        )
-        return out
-
-    check_variable_and_dtype(x, 'input', ['float32', 'float64'], "InstanceNorm")

-    attrs = {"epsilon": eps, "momentum": momentum, "data_format": data_format}
+        attrs = {
+            "epsilon": eps,
+            "momentum": momentum,
+            "data_format": data_format,
+        }

        if weight and bias:
            inputs = {"X": [x], "Scale": [weight], "Bias": [bias]}

--- a/python/paddle/nn/functional/pooling.py
+++ b/python/paddle/nn/functional/pooling.py
@@ -13,12 +13,7 @@
 # limitations under the License.

 from paddle import _C_ops, _legacy_C_ops, in_dynamic_mode
-from paddle.fluid.framework import (
-    Variable,
-    _in_legacy_dygraph,
-    _non_static_mode,
-    in_dygraph_mode,
-)
+from paddle.fluid.framework import Variable, in_dygraph_mode

 from ...fluid.data_feeder import check_type, check_variable_and_dtype

@@ -266,34 +261,7 @@ def avg_pool1d(
        )
        return squeeze(output, [2])

-    if _in_legacy_dygraph():
-        output = _legacy_C_ops.pool2d(
-            x,
-            'pooling_type',
-            'avg',
-            'ksize',
-            kernel_size,
-            'global_pooling',
-            False,
-            'strides',
-            stride,
-            'paddings',
-            padding,
-            'padding_algorithm',
-            padding_algorithm,
-            'use_cudnn',
-            True,
-            'ceil_mode',
-            ceil_mode,
-            'use_mkldnn',
-            False,
-            'exclusive',
-            exclusive,
-            'data_format',
-            data_format,
-        )
-        return squeeze(output, [2])
-
+    else:
        op_type = 'pool2d'
        helper = LayerHelper(op_type, **locals())
        dtype = helper.input_dtype(input_param_name='x')
@@ -397,7 +365,6 @@ def avg_pool2d(
        padding, 2, channel_last, ceil_mode=ceil_mode
    )

-    if _non_static_mode():
    if in_dygraph_mode():
        output = _C_ops.pool2d(
            x,
@@ -412,38 +379,12 @@ def avg_pool2d(
            False,
            padding_algorithm,
        )
-        else:
-            output = _legacy_C_ops.pool2d(
-                x,
-                'pooling_type',
-                'avg',
-                'ksize',
-                kernel_size,
-                'global_pooling',
-                False,
-                'padding_algorithm',
-                padding_algorithm,
-                'strides',
-                stride,
-                'paddings',
-                padding,
-                'use_cudnn',
-                True,
-                'ceil_mode',
-                ceil_mode,
-                'use_mkldnn',
-                False,
-                'exclusive',
-                exclusive,
-                'data_format',
-                data_format,
-            )
        if divisor_override is None:
            return output
        else:
            _check_instance(divisor_override, "divisor_override")
            return output * (kernel_size[0] * kernel_size[1]) / divisor_override
-
+    else:
        op_type = 'pool2d'
        helper = LayerHelper(op_type, **locals())
        check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'avg_pool2d')
@@ -473,7 +414,9 @@ def avg_pool2d(
            return pool_out
        else:
            _check_instance(divisor_override, "divisor_override")
-        return pool_out * (kernel_size[0] * kernel_size[1]) / divisor_override
+            return (
+                pool_out * (kernel_size[0] * kernel_size[1]) / divisor_override
+            )


 def avg_pool3d(
@@ -565,32 +508,6 @@ def avg_pool3d(
            False,
            padding_algorithm,
        )
-    elif _in_legacy_dygraph():
-        pool_out = _legacy_C_ops.pool3d(
-            x,
-            'pooling_type',
-            'avg',
-            'ksize',
-            kernel_size,
-            'strides',
-            stride,
-            'paddings',
-            padding,
-            'global_pooling',
-            False,
-            'padding_algorithm',
-            padding_algorithm,
-            'use_cudnn',
-            True,
-            'ceil_mode',
-            ceil_mode,
-            'use_mkldnn',
-            False,
-            'exclusive',
-            exclusive,
-            'data_format',
-            data_format,
-        )
    else:
        op_type = "pool3d"
        helper = LayerHelper(op_type, **locals())
@@ -723,64 +640,7 @@ def max_pool1d(
            )
            return squeeze(pool_out, [2])

-    if _in_legacy_dygraph():
-        if return_mask:
-            pool_out = _legacy_C_ops.max_pool2d_with_index(
-                x,
-                'ksize',
-                kernel_size,
-                'global_pooling',
-                False,
-                'strides',
-                stride,
-                'paddings',
-                padding,
-                'padding_algorithm',
-                padding_algorithm,
-                'use_cudnn',
-                True,
-                'ceil_mode',
-                ceil_mode,
-                'use_mkldnn',
-                False,
-                'exclusive',
-                True,
-                'data_format',
-                data_format,
-            )
-            return (
-                (squeeze(pool_out[0], [2]), squeeze(pool_out[1], [2]))
-                if return_mask
-                else squeeze(pool_out[0], [2])
-            )
    else:
-            pool_out = _legacy_C_ops.pool2d(
-                x,
-                'pooling_type',
-                'max',
-                'ksize',
-                kernel_size,
-                'global_pooling',
-                False,
-                'padding_algorithm',
-                padding_algorithm,
-                'strides',
-                stride,
-                'paddings',
-                padding,
-                'use_cudnn',
-                True,
-                'ceil_mode',
-                ceil_mode,
-                'use_mkldnn',
-                False,
-                'exclusive',
-                True,
-                'data_format',
-                data_format,
-            )
-            return squeeze(pool_out, [2])
-
        op_type = 'max_pool2d_with_index' if return_mask else "pool2d"
        helper = LayerHelper(op_type, **locals())
        dtype = helper.input_dtype(input_param_name='x')
@@ -831,7 +691,7 @@ def _unpool_output_size(x, kernel_size, stride, padding, output_size):
    if output_size is None:
        return default_size
    elif utils._contain_var(output_size):
-        if not _non_static_mode():
+        if not in_dygraph_mode():
            has_static_var = True
            output_size = utils._convert_to_tensor_list(output_size)
        else:
@@ -1366,60 +1226,7 @@ def max_pool2d(
                padding_algorithm,
            )

-    if _in_legacy_dygraph():
-        if return_mask:
-            output = _legacy_C_ops.max_pool2d_with_index(
-                x,
-                'ksize',
-                kernel_size,
-                'global_pooling',
-                False,
-                'strides',
-                stride,
-                'paddings',
-                padding,
-                'padding_algorithm',
-                padding_algorithm,
-                'use_cudnn',
-                True,
-                'ceil_mode',
-                ceil_mode,
-                'use_mkldnn',
-                False,
-                'exclusive',
-                True,
-                'data_format',
-                data_format,
-            )
-            return output if return_mask else output[0]
    else:
-            output = _legacy_C_ops.pool2d(
-                x,
-                'pooling_type',
-                'max',
-                'ksize',
-                kernel_size,
-                'global_pooling',
-                False,
-                'padding_algorithm',
-                padding_algorithm,
-                'strides',
-                stride,
-                'paddings',
-                padding,
-                'use_cudnn',
-                True,
-                'ceil_mode',
-                ceil_mode,
-                'use_mkldnn',
-                False,
-                'exclusive',
-                True,
-                'data_format',
-                data_format,
-            )
-            return output
-
        op_type = 'max_pool2d_with_index' if return_mask else "pool2d"
        helper = LayerHelper(op_type, **locals())
        check_variable_and_dtype(
@@ -1580,62 +1387,7 @@ def max_pool3d(
                padding_algorithm,
            )

-    if _in_legacy_dygraph():
-        if return_mask:
-            output = _legacy_C_ops.max_pool3d_with_index(
-                x,
-                'pooling_type',
-                'max',
-                'ksize',
-                kernel_size,
-                'strides',
-                stride,
-                'paddings',
-                padding,
-                'global_pooling',
-                False,
-                'padding_algorithm',
-                padding_algorithm,
-                'use_cudnn',
-                True,
-                'ceil_mode',
-                ceil_mode,
-                'use_mkldnn',
-                False,
-                'exclusive',
-                True,
-                'data_format',
-                data_format,
-            )
-            return output if return_mask else output[0]
    else:
-            output = _legacy_C_ops.pool3d(
-                x,
-                'pooling_type',
-                'max',
-                'ksize',
-                kernel_size,
-                'global_pooling',
-                False,
-                'padding_algorithm',
-                padding_algorithm,
-                'strides',
-                stride,
-                'paddings',
-                padding,
-                'use_cudnn',
-                True,
-                'ceil_mode',
-                ceil_mode,
-                'use_mkldnn',
-                False,
-                'exclusive',
-                True,
-                'data_format',
-                data_format,
-            )
-            return output
-
        op_type = "max_pool3d_with_index" if return_mask else "pool3d"
        helper = LayerHelper(op_type, **locals())
        check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'max_pool3d')
@@ -1729,12 +1481,7 @@ def adaptive_avg_pool1d(x, output_size, name=None):
            "EXPLICIT",
        )
        return squeeze(pool_out, [2])
-    if _in_legacy_dygraph():
-        pool_out = _legacy_C_ops.pool2d(
-            x, 'pooling_type', pool_type, 'ksize', pool_size, 'adaptive', True
-        )
-        return squeeze(pool_out, [2])
-
+    else:
        l_type = "pool2d"

        helper = LayerHelper(l_type, **locals())
@@ -1841,7 +1588,7 @@ def adaptive_avg_pool2d(x, output_size, data_format='NCHW', name=None):
        if output_size[1] is None:
            output_size[1] = in_w

-    if _non_static_mode():
+    if in_dygraph_mode():
        output_size = [
            item.numpy().item(0) if isinstance(item, Variable) else item
            for item in output_size
@@ -1866,21 +1613,7 @@ def adaptive_avg_pool2d(x, output_size, data_format='NCHW', name=None):
            "EXPLICIT",
        )

-    if _in_legacy_dygraph():
-        return _legacy_C_ops.pool2d(
-            x,
-            'pooling_type',
-            'avg',
-            'ksize',
-            output_size,
-            'global_pooling',
-            False,
-            'adaptive',
-            True,
-            'data_format',
-            data_format,
-        )
-
+    else:
        l_type = 'pool2d'

        helper = LayerHelper(l_type, **locals())
@@ -2010,21 +1743,7 @@ def adaptive_avg_pool3d(x, output_size, data_format='NCDHW', name=None):
            True,
            "EXPLICIT",
        )
-    elif _in_legacy_dygraph():
-        return _legacy_C_ops.pool3d(
-            x,
-            'pooling_type',
-            'avg',
-            'ksize',
-            output_size,
-            'global_pooling',
-            False,
-            'adaptive',
-            True,
-            'data_format',
-            data_format,
-        )
-
+    else:
        l_type = 'pool3d'

        helper = LayerHelper(l_type, **locals())
@@ -2112,16 +1831,7 @@ def adaptive_max_pool1d(x, output_size, return_mask=False, name=None):
            if return_mask
            else squeeze(pool_out[0], [2])
        )
-    if _in_legacy_dygraph():
-        pool_out = _legacy_C_ops.max_pool2d_with_index(
-            x, 'pooling_type', pool_type, 'ksize', pool_size, 'adaptive', True
-        )
-        return (
-            (squeeze(pool_out[0], [2]), squeeze(pool_out[1], [2]))
-            if return_mask
-            else squeeze(pool_out[0], [2])
-        )
-
+    else:
        l_type = 'max_pool2d_with_index'

        helper = LayerHelper(l_type, **locals())
@@ -2211,12 +1921,7 @@ def adaptive_max_pool2d(x, output_size, return_mask=False, name=None):
            x, output_size, [1, 1], [0, 0], False, True
        )
        return pool_out if return_mask else pool_out[0]
-    if _in_legacy_dygraph():
-        pool_out = _legacy_C_ops.max_pool2d_with_index(
-            x, 'pooling_type', 'max', 'ksize', output_size, 'adaptive', True
-        )
-        return pool_out if return_mask else pool_out[0]
-
+    else:
        l_type = 'max_pool2d_with_index'

        helper = LayerHelper(l_type, **locals())
@@ -2304,18 +2009,13 @@ def adaptive_max_pool3d(x, output_size, return_mask=False, name=None):
        if output_size[2] is None:
            output_size[2] = in_w

-    if in_dynamic_mode():
    if in_dygraph_mode():
        # By default, strides is [1,1,1] and paddings is [0, 0, 0]
        pool_out = _C_ops.max_pool3d_with_index(
            x, output_size, [1, 1, 1], [0, 0, 0], False, True
        )
-        elif _in_legacy_dygraph():
-            pool_out = _legacy_C_ops.max_pool3d_with_index(
-                x, 'pooling_type', 'max', 'ksize', output_size, 'adaptive', True
-            )
        return pool_out if return_mask else pool_out[0]
-
+    else:
        l_type = 'max_pool3d_with_index'

        helper = LayerHelper(l_type, **locals())

--- a/python/paddle/nn/functional/vision.py
+++ b/python/paddle/nn/functional/vision.py
@@ -13,8 +13,7 @@
 # limitations under the License.

 from paddle import _C_ops, _legacy_C_ops, in_dynamic_mode
-from paddle.fluid.framework import _in_legacy_dygraph, in_dygraph_mode
-from paddle.framework import _non_static_mode
+from paddle.fluid.framework import in_dygraph_mode

 from ...device import get_cudnn_version, is_compiled_with_rocm
 from ...fluid.data_feeder import check_variable_and_dtype
@@ -381,20 +380,20 @@ def pixel_shuffle(x, upscale_factor, data_format="NCHW", name=None):
        )
    if in_dygraph_mode():
        return _C_ops.pixel_shuffle(x, upscale_factor, data_format)
-
-    if _in_legacy_dygraph():
-        return _legacy_C_ops.pixel_shuffle(
-            x, "upscale_factor", upscale_factor, "data_format", data_format
-        )
-
+    else:
        helper = LayerHelper("pixel_shuffle", **locals())
-    check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'pixel_shuffle')
+        check_variable_and_dtype(
+            x, 'x', ['float32', 'float64'], 'pixel_shuffle'
+        )
        out = helper.create_variable_for_type_inference(dtype=x.dtype)
        helper.append_op(
            type="pixel_shuffle",
            inputs={"X": x},
            outputs={"Out": out},
-        attrs={"upscale_factor": upscale_factor, "data_format": data_format},
+            attrs={
+                "upscale_factor": upscale_factor,
+                "data_format": data_format,
+            },
        )
        return out

@@ -442,7 +441,7 @@ def pixel_unshuffle(x, downscale_factor, data_format="NCHW", name=None):
            "But recevie Attr(data_format): {} ".format(data_format)
        )

-    if _non_static_mode():
+    if in_dygraph_mode():
        return _legacy_C_ops.pixel_unshuffle(
            x, "downscale_factor", downscale_factor, "data_format", data_format
        )
@@ -516,7 +515,7 @@ def channel_shuffle(x, groups, data_format="NCHW", name=None):
            "But recevie Attr(data_format): {} ".format(data_format)
        )

-    if _non_static_mode():
+    if in_dygraph_mode():
        return _legacy_C_ops.channel_shuffle(
            x, "groups", groups, "data_format", data_format
        )