clear fluid apis in loss.py v_1 (#48132)

* clear fluid apis: center_loss, bpr_loss, edit_distance, hsigmoid, sampled_softmax_with_cross_entropy, rank_loss, margin_rank_loss, sigmoid_cross_entropy_with_logits, huber_loss * fix python/paddle/fluid/layers/loss.py * fix test_layers.py * fix CI bug * fix nn.py

clear fluid apis in loss.py v_1 (#48132)
* clear fluid apis: center_loss, bpr_loss, edit_distance, hsigmoid, sampled_softmax_with_cross_entropy, rank_loss, margin_rank_loss, sigmoid_cross_entropy_with_logits, huber_loss * fix python/paddle/fluid/layers/loss.py * fix test_layers.py * fix CI bug * fix nn.py
27f49254 · yuehuayingxueluo · GitHub · cbdc86b5 · 27f49254 · 27f49254
17 changed file
--- a/python/paddle/fluid/evaluator.py
+++ b/python/paddle/fluid/evaluator.py
@@ -23,7 +23,6 @@ from .initializer import Constant
 from .layers import detection
 __all__ = [
-    'EditDistance',
    'DetectionMAP',
 ]
@@ -126,95 +125,6 @@ class Evaluator:
        return state
-class EditDistance(Evaluator):
-    """
-    Warning: This would be deprecated in the future. Please use fluid.metrics.EditDistance
-    instead.
-    Accumulate edit distance sum and sequence number from mini-batches and
-    compute the average edit_distance and instance error of all batches.
-    Args:
-        input: the sequences predicted by network.
-        label: the target sequences which must have same sequence count
-        with input.
-        ignored_tokens(list of int): Tokens that should be removed before
-        calculating edit distance.
-    Examples:
-        .. code-block:: python
-            exe = fluid.executor(place)
-            distance_evaluator = fluid.Evaluator.EditDistance(input, label)
-            for epoch in PASS_NUM:
-                distance_evaluator.reset(exe)
-                for data in batches:
-                    loss = exe.run(fetch_list=[cost])
-                distance, instance_error = distance_evaluator.eval(exe)
-        In the above example:
-        'distance' is the average of the edit distance in a pass.
-        'instance_error' is the instance error rate in a pass.
-    """
-    def __init__(self, input, label, ignored_tokens=None, **kwargs):
-        super().__init__("edit_distance", **kwargs)
-        main_program = self.helper.main_program
-        if main_program.current_block().idx != 0:
-            raise ValueError("You can only invoke Evaluator in root block")
-        self.total_distance = self._create_state(
-            dtype='float32', shape=[1], suffix='total_distance'
-        )
-        self.seq_num = self._create_state(
-            dtype='int64', shape=[1], suffix='seq_num'
-        )
-        self.instance_error = self._create_state(
-            dtype='int64', shape=[1], suffix='instance_error'
-        )
-        distances, seq_num = layers.edit_distance(
-            input=input, label=label, ignored_tokens=ignored_tokens
-        )
-        zero = layers.fill_constant(shape=[1], value=0.0, dtype='float32')
-        compare_result = layers.equal(distances, zero)
-        compare_result_int = layers.cast(x=compare_result, dtype='int64')
-        seq_right_count = layers.reduce_sum(compare_result_int)
-        instance_error_count = layers.elementwise_sub(
-            x=seq_num, y=seq_right_count
-        )
-        total_distance = layers.reduce_sum(distances)
-        layers.sums(
-            input=[self.total_distance, total_distance], out=self.total_distance
-        )
-        layers.sums(input=[self.seq_num, seq_num], out=self.seq_num)
-        layers.sums(
-            input=[self.instance_error, instance_error_count],
-            out=self.instance_error,
-        )
-        self.metrics.append(total_distance)
-        self.metrics.append(instance_error_count)
-    def eval(self, executor, eval_program=None):
-        if eval_program is None:
-            eval_program = Program()
-        block = eval_program.current_block()
-        with program_guard(main_program=eval_program):
-            total_distance = _clone_var_(block, self.total_distance)
-            seq_num = _clone_var_(block, self.seq_num)
-            instance_error = _clone_var_(block, self.instance_error)
-            seq_num = layers.cast(x=seq_num, dtype='float32')
-            instance_error = layers.cast(x=instance_error, dtype='float32')
-            avg_distance = layers.elementwise_div(x=total_distance, y=seq_num)
-            avg_instance_error = layers.elementwise_div(
-                x=instance_error, y=seq_num
-            )
-            result = executor.run(
-                eval_program, fetch_list=[avg_distance, avg_instance_error]
-            )
-        return np.array(result[0]), np.array(result[1])
 class DetectionMAP(Evaluator):
    """
    Warning: This would be deprecated in the future. Please use fluid.metrics.DetectionMAP

--- a/python/paddle/fluid/layers/loss.py
+++ b/python/paddle/fluid/layers/loss.py
@@ -35,21 +35,12 @@ import warnings
 from paddle import _C_ops, _legacy_C_ops
 __all__ = [
-    'center_loss',
-    'bpr_loss',
    'cross_entropy',
    'square_error_cost',
-    'edit_distance',
    'warpctc',
    'nce',
-    'hsigmoid',
-    'sampled_softmax_with_cross_entropy',
    'softmax_with_cross_entropy',
-    'rank_loss',
-    'margin_rank_loss',
    'sigmoid_cross_entropy_with_logits',
-    'teacher_student_sigmoid_loss',
-    'huber_loss',
    'kldiv_loss',
    'npair_loss',
    'mse_loss',
@@ -58,159 +49,6 @@ __all__ = [
 kIgnoreIndex = -100
-def center_loss(
-    input, label, num_classes, alpha, param_attr, update_center=True
-):
-    r"""
-    :api_attr: Static Graph
-    **Center loss Cost layer**
-    This OP accepts input (deep features,the output of the last hidden layer)
-    and target label and return the center loss cost. The average of the
-    distances of each sample in the mini-batch from the center of the
-    corresponding category is calculated as the center loss.
-    For deep features, :math:`X`, and target labels, :math:`Y`, the equation is:
-    .. math::
-        Out = \\frac{1}{2}(X - Y)^2
-    Args:
-        input (Variable): a 2-D tensor with shape[N x M]. Its dtype should be float32 or float64.
-        label (Variable): the groud truth which is a 2-D tensor
-                         with shape[N x 1],where N is the batch size. Its dtype should be int32.
-        num_classes (int): the number of classification categories.
-        alpha (float|Variable): learning rate of centers.
-        param_attr (ParamAttr): Attribute initializer of centers.
-        update_center (bool): whether to update value of center.
-    Returns:
-        Variable: 2-D tensor with shape [N * 1]
-    Examples:
-        .. code-block:: python
-          import paddle.fluid as fluid
-          import paddle
-          paddle.enable_static()
-          input = fluid.data(name='x',shape=[20,30],dtype='float32')
-          label = fluid.data(name='y',shape=[20,1],dtype='int64')
-          num_classes = 1000
-          alpha = 0.01
-          param_attr = fluid.initializer.Xavier(uniform=False)
-          center_loss=fluid.layers.center_loss(input=input,
-                 label=label,
-                 num_classes=1000,
-                 alpha=alpha,
-                 param_attr=fluid.initializer.Xavier(uniform=False),
-                 update_center=True)
-    """
-    helper = LayerHelper('center_loss', **locals())
-    dtype = helper.input_dtype()
-    check_variable_and_dtype(
-        input, 'input', ['float32', 'float64'], 'center_loss'
-    )
-    check_variable_and_dtype(label, 'label', ['int32', 'int64'], 'center_loss')
-    centers_shape = [num_classes, input.shape[1]]
-    centers_param = helper.create_parameter(
-        attr=param_attr, shape=centers_shape, dtype=dtype
-    )
-    centers_param.stop_gradient = True
-    if isinstance(alpha, Variable):
-        alpha_param = alpha
-        check_variable_and_dtype(
-            alpha, 'alpha', ['float32', 'float64'], 'center_loss'
-        )
-    else:
-        assert isinstance(alpha, float)
-        alpha_param = helper.create_variable(
-            name="centerloss_alpha",
-            shape=[1],
-            dtype="float32",
-            type=core.VarDesc.VarType.LOD_TENSOR,
-            persistable=True,
-            stop_gradient=True,
-            initializer=Constant(alpha),
-        )
-    centersdiff = helper.create_variable_for_type_inference(dtype=input.dtype)
-    loss = helper.create_variable_for_type_inference(dtype=input.dtype)
-    helper.append_op(
-        type='center_loss',
-        inputs={
-            'X': [input],
-            'Label': [label],
-            'Centers': [centers_param],
-            'CenterUpdateRate': [alpha_param],
-        },
-        outputs={
-            'SampleCenterDiff': [centersdiff],
-            'Loss': [loss],
-            'CentersOut': [centers_param],
-        },
-        attrs={'cluster_num': num_classes, 'need_update': update_center},
-    )
-    return loss
-def bpr_loss(input, label, name=None):
-    r"""
-    **Bayesian Personalized Ranking Loss Operator**
-    This operator belongs to pairwise ranking loss. Label is the desired item.
-    The loss at a given point in one session is defined as:
-    .. math::
-        Y[i] = 1/(N[i] - 1) * \sum_j{\log(\sigma(X[i, Label[i]]-X[i, j]))}
-    Learn more details by reading paper <session-based recommendations with recurrent
-    neural networks>.
-    Args:
-        input (Variable|list):  a 2-D tensor with shape [N x D], where N is the
-                                batch size and D is the number of positive classes and negative classes
-                                This input is not probability but logits.
-        label (Variable|list):  the ground truth which is a 2-D tensor.  `label`
-                                is a tensor<int64> with shape [N x 1].
-        name (str|None):        A name for this layer(optional). If set None, the
-                                layer will be named automatically. Default: None.
-    Returns:
-        A 2-D tensor with shape [N x 1], the bpr loss.
-    Examples:
-        .. code-block:: python
-          import paddle.fluid as fluid
-          import paddle
-          paddle.enable_static()
-          neg_size = 10
-          label = fluid.data(
-                    name="label", shape=[3, 1], dtype="int64")
-          predict = fluid.data(
-                    name="predict", shape=[3, neg_size + 1], dtype="float32")
-          cost = fluid.layers.bpr_loss(input=predict, label=label)
-    """
-    helper = LayerHelper('bpr_loss', **locals())
-    out = helper.create_variable_for_type_inference(dtype=input.dtype)
-    check_variable_and_dtype(
-        input, 'input', ['float16', 'float32', 'float64'], 'bpr_loss'
-    )
-    helper.append_op(
-        type='bpr_loss',
-        inputs={'X': [input], 'Label': [label]},
-        outputs={'Y': [out]},
-    )
-    return out
 def cross_entropy(input, label, soft_label=False, ignore_index=kIgnoreIndex):
    r"""
    :alias_main: paddle.nn.functional.cross_entropy
@@ -347,86 +185,6 @@ def square_error_cost(input, label):
    return paddle.nn.functional.square_error_cost(input, label)
-def edit_distance(
-    input,
-    label,
-    normalized=True,
-    ignored_tokens=None,
-    input_length=None,
-    label_length=None,
-):
-    """
-    This op computes the edit distances, also called Levenshtein distance, between a batch of
-    hypothesis strings and their references. It measures how dissimilar two strings are by counting
-    the minimum number of operations to transform one string into another.
-    The operations include insertion, deletion, and substitution.
-    For example, given hypothesis string A = "kitten" and reference
-    B = "sitting", A will be transformed into B
-    at least after two substitutions and one insertion:
-    "kitten" -> "sitten" -> "sittin" -> "sitting"
-    So the edit distance between A and B is 3.
-    The input is a Tensor, the input_length and label_length should be supported.
-    The `batch_size` of labels should be same as `input`.
-    The output include the edit distance value between every pair of input and related label, and the number of sequence.
-    If Attr(normalized) is true,
-    the edit distance value will be divided by the length of label.
-    Parameters:
-        input(Tensor): The input tensor, its rank should be equal to 2 and its data type should be int64.
-        label(Tensor): The label tensor, its rank should be equal to 2 and its data type should be int64.
-        normalized(bool, default True): Indicated whether to normalize the edit distance.
-        ignored_tokens(list<int>, default None): Tokens that will be removed before
-                                     calculating edit distance.
-        input_length(Tensor): The length for each sequence in `input` if it's of Tensor type, it should have shape `(batch_size, )` and its data type should be int64.
-        label_length(Tensor): The length for each sequence in `label` if it's of Tensor type, it should have shape `(batch_size, )` and its data type should be int64.
-        NOTE: To be avoid unexpected result, the value of every elements in input_length and label_length should be equal to the value of the second dimension of input and label. For example, The input: [[1,2,3,4],[5,6,7,8],[9,10,11,12]], the shape of input is [3,4] and the input_length should be [4,4,4]
-        NOTE: This Api is different from fluid.metrics.EditDistance
-    Returns:
-        Tuple:
-        distance(Tensor): edit distance result, its data type is float32, and its shape is (batch_size, 1).
-        sequence_num(Tensor): sequence number, its data type is float32, and its shape is (1,).
-    Examples:
-        .. code-block:: python
-            import paddle
-            import paddle.nn.functional as F
-            input = paddle.to_tensor([[1,2,3],[4,5,6],[4,4,4],[1,1,1]], dtype='int64')
-            label = paddle.to_tensor([[1,3,4,1],[4,5,8,1],[7,7,7,1],[1,1,1,1]], dtype='int64')
-            input_len = paddle.to_tensor([3,3,3,3], dtype='int64')
-            label_len = paddle.to_tensor([4,4,4,4], dtype='int64')
-            distance, sequence_num = F.loss.edit_distance(input=input, label=label, input_length=input_len, label_length=label_len, normalized=False)
-            # print(distance)
-            # [[3.]
-            #  [2.]
-            #  [4.]
-            #  [1.]]
-            # if set normalized to True
-            # [[0.75]
-            #  [0.5 ]
-            #  [1.  ]
-            #  [0.25]
-            #
-            # print(sequence_num)
-            # [4]
-    """
-    return paddle.nn.functional.loss.edit_distance(
-        input, label, normalized, ignored_tokens, input_length, label_length
-    )
 def warpctc(
    input,
    label,
@@ -837,363 +595,6 @@ def nce(
    return cost / (num_neg_samples + 1)
-def hsigmoid(
-    input,
-    label,
-    num_classes,
-    param_attr=None,
-    bias_attr=None,
-    name=None,
-    path_table=None,
-    path_code=None,
-    is_custom=False,
-    is_sparse=False,
-):
-    """
-    :api_attr: Static Graph
-    The hierarchical sigmoid organizes the classes into a complete binary tree to reduce the computational complexity
-    and speed up the model training, especially the training of language model.
-    Each leaf node of the complete binary tree represents a class(word) and each non-leaf node acts as a binary classifier.
-    For each class(word), there's a unique path from root to itself, hsigmoid calculate the cost for each non-leaf node on
-    the path, and sum them to get a total cost.
-    Comparing to softmax, the OP can reduce the computational complexity from :math:`O(N)` to :math:`O(logN)`, where :math:`N`
-    represents the number of classes or the size of word dict.
-    The OP supports default tree and custom tree. For the default tree, you can refer to `Hierarchical Probabilistic Neural
-    Network Language Model <http://www.iro.umontreal.ca/~lisa/pointeurs/hierarchical-nnlm-aistats05.pdf>`. For the custom
-    tree, you need to set :attr:`is_custom` to True, and do the following steps (take the language model as an example):
-    1. Using a custom word dict to build a binary tree, each leaf node should be an word in the word dict.
-    2. Creating a dict map word_id -> path that from the word to the root node, we call it path_table.
-    3. Creating a dict map word_id -> code of path that from the word to the root node, we call it path_code.
-       Code means the label of each binary classifier, 1 indicate true, 0 indicate false.
-    4. Now, each word should has its path and code along the path, you can pass a batch of path and code related
-       to the same batch of inputs.
-    Parameters:
-        input (Variable): A tensor with the shape [N, D], where N is the size of mini-batch,
-            and D is the feature size. Its data type supports float32 and float64.
-        label (Variable): A tensor contains the labels of training data. Its shape is [N, 1]
-            and data type is int64.
-        num_classes (int): The number of classes or the size of word dict, must be greater than 2.
-            If the default tree is used (:attr:`is_custom` is set to False), :attr:`num_classes`
-            should not be None. If the custom tree is used (:attr:`is_custom` is set to True),
-            :attr:`num_classes` should be the number of non-leaf nodes, which indicates the num of
-            classes using by the binary classifier.
-        param_attr (ParamAttr, optional): The parameter attribute for the learnable parameters/weights
-            of hsigmoid. If it is set to None or one attribute of ParamAttr, hsigmoid will create a
-            ParamAttr as param_attr. If the Initializer of the param_attr is not set, the parameter is
-            initialized with Xavier. Default: None.
-        bias_attr (ParamAttr|bool, optional): The parameter attribute for the bias of hsigmoid. If it
-            is set to False, no bias will be added. If it is set to None or one attribute of ParamAttr,
-            hsigmoid will create a ParamAttr as bias_attr. If the Initializer of the bias_attr is not
-            set, the bias is initialized zero. Default: None.
-        name (str, optional): Normally there is no need for user to set this property. For more information,
-            please refer to :ref:`api_guide_Name`. Default: None.
-        path_table (Variable, optional): A tensor that stores each batch of samples' path from leaf to root
-            node, its shape is [N, L] and data type is int64, where L is the length of path. For each sample i,
-            path_table[i] is a np.array like structure and each element in this array is the indexes in parent
-            nodes' weight matrix. Default: None.
-        path_code (Variable, optional): A tensor that stores each batch of samples' code of path from leaf
-            to root node, its shape is [N, L] and data type is int64, which is the same as :attr:`path_table`.
-            Each code of path is consisted with the code of nodes from leaf to root node. Default: None.
-        is_custom (bool, optional): Whether use custom binary tree. If it's True, :attr:`path_table`,
-            :attr:`path_code` and :attr:`num_classes` should be set, otherwise :attr:`num_classes` should
-            be set. Default: False.
-        is_sparse (bool, optional): Whether use sparse updating instead of dense updating, if it's True, the
-            gradient of W and input will be sparse. Default: False.
-    Returns:
-        Variable: A tensor with the cost of hierarchical sigmoid, its shape is [N, 1] and data type is the same as :attr:`input`.
-    Examples:
-        .. code-block:: python
-            import paddle.fluid as fluid
-            x = fluid.layers.fill_constant(shape=[4, 3], value=0.9, dtype='float32')
-            # x = [[0.9, 0.9, 0.9], [0.9, 0.9, 0.9], [0.9, 0.9, 0.9], [0.9, 0.9, 0.9]]
-            y = fluid.layers.fill_constant(
-                shape=[4, 1], value=1, dtype='int64')
-            # y = [[1], [1], [1], [1]]
-            out = fluid.layers.hsigmoid(input=x, label=y, num_classes=2, param_attr=fluid.initializer.Constant(
-                value=0.05), bias_attr=fluid.initializer.Constant(value=.0))
-            # out = [[0.62792355], [0.62792355], [0.62792355], [0.62792355]]
-    """
-    check_variable_and_dtype(input, 'input', ['float32', 'float64'], 'hsigmoid')
-    check_variable_and_dtype(label, 'label', ['int64'], 'hsigmoid')
-    helper = LayerHelper('hierarchical_sigmoid', **locals())
-    dtype = helper.input_dtype()
-    out = helper.create_variable_for_type_inference(dtype)
-    pre_out = helper.create_variable_for_type_inference(dtype)
-    dim = input.shape[1]
-    if ((num_classes is None) or (num_classes < 2)) and (not is_custom):
-        raise ValueError(
-            "num_classes must not be less than 2 with default tree"
-        )
-    if (not is_custom) and (is_sparse):
-        print("Sparse mode should not be used without custom tree")
-        is_sparse = False
-    if (not is_custom) and (
-        (path_table is not None) or (path_code is not None)
-    ):
-        raise ValueError(
-            "only num_classes should be passed without custom tree"
-        )
-    if (is_custom) and (path_code is None):
-        raise ValueError("path_code should not be None with custom tree")
-    elif (is_custom) and (path_table is None):
-        raise ValueError("path_table should not be None with custom tree")
-    elif (is_custom) and (num_classes is None):
-        raise ValueError("num_classes should not be None with custom tree")
-    else:
-        pass
-    weights = None
-    remote_prefetch = is_sparse
-    print(
-        "With sparse mode, if your models has only small parameter prefetch may cause speed down"
-    )
-    if not is_custom:
-        weights = helper.create_parameter(
-            attr=helper.param_attr,
-            shape=[num_classes - 1, dim],
-            is_bias=False,
-            dtype=input.dtype,
-        )
-    else:
-        weights = helper.create_parameter(
-            attr=helper.param_attr,
-            shape=[num_classes, dim],
-            is_bias=False,
-            dtype=input.dtype,
-        )
-    inputs = {
-        "X": input,
-        "W": weights,
-        "PathTable": path_table,
-        "PathCode": path_code,
-        "Label": label,
-    }
-    if helper.bias_attr:
-        if not is_custom:
-            bias = helper.create_parameter(
-                attr=helper.bias_attr,
-                shape=[num_classes - 1, 1],
-                is_bias=True,
-                dtype=input.dtype,
-            )
-            inputs['Bias'] = bias
-        else:
-            bias = helper.create_parameter(
-                attr=helper.bias_attr,
-                shape=[num_classes, 1],
-                is_bias=True,
-                dtype=input.dtype,
-            )
-            inputs['Bias'] = bias
-    helper.append_op(
-        type="hierarchical_sigmoid",
-        inputs=inputs,
-        outputs={"Out": out, "PreOut": pre_out, "W_Out": weights},
-        attrs={
-            "num_classes": num_classes,
-            "is_sparse": is_sparse,
-            "remote_prefetch": remote_prefetch,
-        },
-    )
-    return out
-def sampled_softmax_with_cross_entropy(
-    logits,
-    label,
-    num_samples,
-    num_true=1,
-    remove_accidental_hits=True,
-    use_customized_samples=False,
-    customized_samples=None,
-    customized_probabilities=None,
-    seed=0,
-):
-    """
-    **Sampled Softmax With Cross Entropy Operator.**
-    Cross entropy loss with sampled softmax is used as the output layer for
-    larger output classes extensively. This operator samples a number of samples
-    for all examples, and computes the softmax normalized values for each
-    row of the sampled tensor, after which cross-entropy loss is computed.
-    Because this operator performs a softmax on logits internally, it expects
-    unscaled logits. This operator should not be used with the output of
-    softmax operator since that would produce incorrect results.
-    For examples with T true labels (T >= 1), we assume that each true label has
-    a probability of 1/T. For each sample, S samples are generated using a
-    log uniform distribution. True labels are concatenated with these samples to
-    form T + S samples for each example. So, assume the shape of logits is
-    [N x K], the shape for samples is [N x (T+S)]. For each sampled label, a
-    probability is calculated, which corresponds to the Q(y|x) in
-    [Jean et al., 2014](http://arxiv.org/abs/1412.2007).
-    Logits are sampled according to the sampled labels. Then if
-    remove_accidental_hits is True, if a sample[i, j] accidentally hits true
-    labels, then the corresponding sampled_logits[i, j] is minus by 1e20 to
-    make its softmax result close to zero. Then sampled logits are subtracted by
-    logQ(y|x), these sampled logits and re-indexed labels are used to compute
-    a softmax with cross entropy.
-    Args:
-        logits (Variable): The unscaled log probabilities, which is a 2-D tensor
-            with shape [N x K]. N is the batch_size, and K is the class number.
-        label (Variable): The ground truth which is a 2-D tensor. Label is a
-            Tensor<int64> with shape [N x T], where T is the number of true
-            labels per example.
-        num_samples (int): The number for each example, num_samples should be
-            less than the number of class.
-        num_true(int): The number of target classes per training example.
-        remove_accidental_hits (bool): A flag indicating whether to remove
-            accidental hits when sampling. If True and if a sample[i, j]
-            accidentally hits true labels, then the corresponding
-            sampled_logits[i, j] is minus by 1e20 to make its softmax result
-            close to zero. Default is True.
-        use_customized_samples (bool): Whether to use custom samples and probabities to sample
-            logits.
-        customized_samples (Variable): User defined samples, which is a 2-D tensor
-            with shape [N, T + S]. S is the num_samples, and T is the number of true
-            labels per example.
-        customized_probabilities (Variable): User defined probabilities of samples,
-            a 2-D tensor which has the same shape with customized_samples.
-        seed (int): The random seed for generating random number, which is used
-            in the process of sampling. Default is 0.
-    Returns:
-        Variable: Return the cross entropy loss which is a 2-D tensor with shape
-                  [N x 1].
-    Examples:
-        .. code-block:: python
-            import paddle.fluid as fluid
-            input = fluid.layers.data(name='data', shape=[256], dtype='float32')
-            label = fluid.layers.data(name='label', shape=[1], dtype='int64')
-            fc = fluid.layers.fc(input=input, size=100)
-            out = fluid.layers.sampled_softmax_with_cross_entropy(
-                      logits=fc, label=label, num_samples=25)
-    """
-    if _non_static_mode():
-        sample_logits_attrs = (
-            'use_customized_samples',
-            use_customized_samples,
-            'uniq',
-            True,
-            'remove_accidental_hits',
-            remove_accidental_hits,
-            'num_samples',
-            num_samples,
-            'seed',
-            seed,
-        )
-        (
-            _,
-            _,
-            _,
-            _,
-            sampled_logits_out,
-            sampled_label_out,
-        ) = _legacy_C_ops.sample_logits(logits, label, *sample_logits_attrs)
-        depth = num_samples + 1
-        sampled_softlabel_out = _legacy_C_ops.one_hot(
-            sampled_label_out, 'depth', depth
-        )
-        softmax_with_cross_entropy_attrs = (
-            'soft_label',
-            True,
-            'numeric_stable_mode',
-            False,
-        )
-        _, loss = _legacy_C_ops.softmax_with_cross_entropy(
-            sampled_logits_out,
-            sampled_softlabel_out,
-            *softmax_with_cross_entropy_attrs
-        )
-        return loss / num_true
-    helper = LayerHelper('sample_logits', **locals())
-    samples = (
-        customized_samples
-        if use_customized_samples
-        else helper.create_variable_for_type_inference(dtype='int64')
-    )
-    probabilities = (
-        customized_probabilities
-        if use_customized_samples
-        else helper.create_variable_for_type_inference(dtype=logits.dtype)
-    )
-    sampled_logits = helper.create_variable_for_type_inference(
-        dtype=logits.dtype
-    )
-    sampled_label = helper.create_variable_for_type_inference(dtype='int64')
-    sampled_softlabel = helper.create_variable_for_type_inference(
-        dtype=logits.dtype
-    )
-    logits_dim = helper.create_variable_for_type_inference(dtype=logits.dtype)
-    labels_dim = helper.create_variable_for_type_inference(dtype=label.type)
-    helper.append_op(
-        type='sample_logits',
-        inputs={
-            'Logits': logits,
-            'Labels': label,
-            'CustomizedSamples': customized_samples,
-            'CustomizedProbabilities': customized_probabilities,
-        },
-        outputs={
-            'Samples': samples,
-            'Probabilities': probabilities,
-            'SampledLabels': sampled_label,
-            'SampledLogits': sampled_logits,
-            'LogitsDim': logits_dim,
-            'LabelsDim': labels_dim,
-        },
-        attrs={
-            'use_customized_samples': use_customized_samples,
-            'uniq': True,
-            'remove_accidental_hits': remove_accidental_hits,
-            'num_samples': num_samples,
-            'seed': seed,
-        },
-    )
-    loss = helper.create_variable_for_type_inference(dtype=logits.dtype)
-    softmax = helper.create_variable_for_type_inference(dtype=logits.dtype)
-    helper.append_op(
-        type='one_hot',
-        inputs={'X': sampled_label},
-        attrs={'depth': num_samples + 1},
-        outputs={'Out': sampled_softlabel},
-    )
-    helper.append_op(
-        type='softmax_with_cross_entropy',
-        inputs={'Logits': sampled_logits, 'Label': sampled_softlabel},
-        outputs={'Softmax': softmax, 'Loss': loss},
-        attrs={
-            'soft_label': True,
-            'ignore_index': False,
-            'numeric_stable_mode': False,
-        },
-    )
-    return loss / num_true
 def softmax_with_cross_entropy(
    logits,
    label,
@@ -1364,118 +765,6 @@ def identity_loss(x, reduction="none"):
    return out
-def rank_loss(label, left, right, name=None):
-    r"""
-    This operator implements the sort loss layer in the RankNet model. RankNet is a pairwise ranking model
-    with a training sample consisting of a pair of documents (A and B), The label (P)
-    indicates whether A is ranked higher than B or not. Please refer to more details:
-    `RankNet <http://icml.cc/2015/wp-content/uploads/2015/06/icml_ranking.pdf>`_
-    Rank loss layer takes three inputs: left ( :math:`o_i` ), right ( :math:`o_j` ) and
-    label ( :math:`P_{i,j}` ). The inputs respectively represent RankNet's output scores
-    for documents A and B and the value of label P. Rank loss layer takes batch inputs
-    with size batch_size (batch_size >= 1), P = {0, 1} or {0, 0.5, 1},
-    where 0.5 means that there is no information about the rank of the input pair.
-    The following equation computes rank loss C_{i,j} from the inputs:
-    .. math::
-      C_{i,j} &= -\\tilde{P_{ij}} * o_{i,j} + \log(1 + e^{o_{i,j}}) \\\\
-    .. math::
-      o_{i,j} &=  o_i - o_j  \\\\
-    .. math::
-      \\tilde{P_{i,j}} &= \\left \{0, 0.5, 1 \\right \} \ or \ \\left \{0, 1 \\right \}
-    Parameters:
-        label (Variable): 2-D ``Tensor`` with the shape of :math:`[batch,1]`, the data type is float32, batch indicates the size of the data. Indicats whether A ranked higher than B or not.
-        left (Variable): 2-D ``Tensor`` with the shape of :math:`[batch,1]`, the data type is float32. RankNet's output score for doc A.
-        right (Variable): 2-D ``Tensor`` with the shape of :math:`[batch,1]`, the data type is float32. RankNet's output score for doc B.
-        name(str|None): The default value is None. Normally there is no need for user to set this property. For more information, please refer to :ref:`api_guide_Name` .
-    Returns:
-        Variable: ``Tensor`` indicating the output value of the sort loss layer, the data type is float32, and the return value's shape is :math:`[batch,1]` .
-    Raises:
-        ValueError: Any of label, left, and right is not a ``Variable`` .
-    Examples:
-        .. code-block:: python
-            import paddle.fluid as fluid
-            import paddle
-            paddle.enable_static()
-            label = fluid.data(name="label", shape=[-1, 1], dtype="float32")
-            left = fluid.data(name="left", shape=[-1, 1], dtype="float32")
-            right = fluid.data(name="right", shape=[-1, 1], dtype="float32")
-            out = fluid.layers.rank_loss(label, left, right)
-    """
-    helper = LayerHelper('rank_loss', **locals())
-    check_variable_and_dtype(label, 'label', ['float32'], "rank_loss")
-    check_variable_and_dtype(left, 'left', ['float32'], "rank_loss")
-    check_variable_and_dtype(right, 'right', ['float32'], "rank_loss")
-    out = helper.create_variable_for_type_inference("float32")
-    helper.append_op(
-        type='rank_loss',
-        inputs={"Label": label, "Left": left, "Right": right},
-        outputs={'Out': out},
-    )
-    return out
-def margin_rank_loss(label, left, right, margin=0.1, name=None):
-    r"""
-    Margin Ranking Loss Layer for ranking problem,
-    which compares left score and right score passed in.
-    The ranking loss can be defined as following equation:
-    .. math::
-        rank\_loss = max(0, -label * (left - right) + margin)
-    Args:
-       label (Variable): Indicates whether the left is ranked higher than the right or not.
-           Data type is float32.
-       left (Variable): Ranking score for left. Data type float32.
-       right (Variable): Ranking score for right. Data type float32.
-       margin (float): Indicates the given margin.
-       name(str|None): For detailed information, please refer to
-           :ref:`api_guide_Name` . Usually name is no need to set and None by default.
-    Returns:
-       Variable: The ranking loss.
-    Raises:
-       ValueError: Any of label, left, and right is not a Variable.
-    Examples:
-        .. code-block:: python
-           import paddle.fluid as fluid
-           label = fluid.data(name="label", shape=[-1, 1], dtype="float32")
-           left = fluid.data(name="left", shape=[-1, 1], dtype="float32")
-           right = fluid.data(name="right", shape=[-1, 1], dtype="float32")
-           out = fluid.layers.margin_rank_loss(label, left, right)
-    """
-    helper = LayerHelper('margin_rank_loss', **locals())
-    check_variable_and_dtype(label, 'label', ['float32'], 'margin_rank_loss')
-    check_variable_and_dtype(label, 'left', ['float32'], 'margin_rank_loss')
-    check_variable_and_dtype(label, 'right', ['float32'], 'margin_rank_loss')
-    out = helper.create_variable_for_type_inference(left.dtype)
-    act = helper.create_variable_for_type_inference(left.dtype)
-    helper.append_op(
-        type='margin_rank_loss',
-        inputs={"Label": label, "X1": left, "X2": right},
-        outputs={'Out': out, 'Activated': act},
-        attrs={'margin': margin},
-    )
-    return out
 @templatedoc()
 def sigmoid_cross_entropy_with_logits(
    x, label, ignore_index=kIgnoreIndex, name=None, normalize=False
@@ -1539,144 +828,6 @@ def sigmoid_cross_entropy_with_logits(
    return out
-def teacher_student_sigmoid_loss(
-    input, label, soft_max_up_bound=15.0, soft_max_lower_bound=-15.0
-):
-    """
-    **Teacher Student Log Loss Layer**
-    This layer accepts input predictions and target label and returns the
-    teacher_student loss. Z is click or not, z' is value of teacher loss, label = {-2, -1, [0, 2]}
-    when z' is not exist, clk = 0 : label = -2; when z' is not exist, clk = 1 : label = -1;
-    when z' is exist    , clk = 0 : label = 0 + z'; when z' is exist    , clk = 1 : label = 1 + z'
-    .. math::
-        loss = max(x, 0) - x * z + log(1 + exp(-abs(x))) + max(x, 0) - x * z' + log(1 + exp(-abs(x)))
-    Args:
-        input (Variable|list):  a 2-D tensor with shape [N x 1], where N is the
-                                batch size. This input is a probability computed
-                                by the previous operator.
-        label (Variable|list):  the ground truth which is a 2-D tensor with
-                                shape [N x 1], where N is the batch size.
-        soft_max_up_bound  (float):  if input > soft_max_up_bound, will be bound
-        soft_max_lower_bound (float): if input < soft_max_lower_bound, will be bound
-    Returns:
-        Variable: A 2-D tensor with shape [N x 1], the teacher_student_sigmoid_loss.
-    Examples:
-        .. code-block:: python
-          import paddle.fluid as fluid
-          import paddle
-          paddle.enable_static()
-          batch_size = 64
-          label = fluid.data(
-                    name="label", shape=[batch_size, 1], dtype="int64")
-          similarity = fluid.data(
-                    name="similarity", shape=[batch_size, 1], dtype="float32")
-          cost = fluid.layers.teacher_student_sigmoid_loss(input=similarity, label=label)
-    """
-    check_variable_and_dtype(
-        input,
-        "input",
-        ['float32', 'float64', 'int32', 'int64'],
-        'teacher_student_sigmoid_loss',
-    )
-    check_variable_and_dtype(
-        label,
-        "label",
-        ['float32', 'float64', 'int32', 'int64'],
-        'teacher_student_sigmoid_loss',
-    )
-    helper = LayerHelper('teacher_student_sigmoid_loss', **locals())
-    out = helper.create_variable(dtype=input.dtype)
-    helper.append_op(
-        type='teacher_student_sigmoid_loss',
-        inputs={'X': [input], 'Label': [label]},
-        outputs={'Y': [out]},
-        attrs={
-            "soft_max_lower_bound": float(soft_max_lower_bound),
-            "soft_max_up_bound": float(soft_max_up_bound),
-        },
-    )
-    return out
-def huber_loss(input, label, delta):
-    r"""
-    This operator computes the Huber loss between input and label.
-    Huber loss is commonly used in regression tasks. Compared to square_error_cost, Huber loss is more robust and less sensitivity to outliers.
-    When the absolute difference between input and label is greater than delta, the linear error is calculated:
-    .. math::
-            huber\_loss = delta * (label - input) - 0.5 * delta * delta
-    When the absolute difference between input and label is greater than delta, the square error is calculated:
-    .. math::
-            huber\_loss = 0.5 * (label - input) * (label - input)
-    Args:
-        input (Variable): Predicted data, 2D-Tensor with the shape of [batch_size, 1]. The data type should be float32.
-        label (Variable): Ground truth label, 2D-Tensor with the shape of [batch_size, 1]. The data type should be float32.
-        delta (float): The threshold for Huber loss, which is used to control the balance between the linear error and square error. The data type should be float32.
-    Returns:
-        Variable: The huber loss, a tensor with the same shape and data type as input.
-    Examples:
-    ..  code-block:: python
-        import paddle.fluid as fluid
-        import numpy as np
-        DATATYPE='float32'
-        input_data = np.array([[1.],[2.],[3.],[4.]]).astype(DATATYPE)
-        label_data = np.array([[3.],[3.],[4.],[4.]]).astype(DATATYPE)
-        x = fluid.data(name='input', shape=[None, 1], dtype=DATATYPE)
-        y = fluid.data(name='label', shape=[None, 1], dtype=DATATYPE)
-        loss = fluid.layers.huber_loss(input=x, label=y, delta=1.0)
-        place = fluid.CPUPlace()
-        #place = fluid.CUDAPlace(0)
-        exe = fluid.Executor(place)
-        HuberLoss, = exe.run(feed={'input':input_data ,'label':label_data}, fetch_list=[loss.name])
-        print(HuberLoss)  #[[1.5], [0.5], [0.5], [0. ]], dtype=float32
-    """
-    if in_dygraph_mode():
-        out, residual = _C_ops.huber_loss(input, label, delta)
-        return out
-    helper = LayerHelper('huber_loss', **locals())
-    check_variable_and_dtype(
-        input, 'input', ['float32', 'float64'], 'huber_loss'
-    )
-    check_variable_and_dtype(
-        label, 'label', ['float32', 'float64'], 'huber_loss'
-    )
-    residual = helper.create_variable_for_type_inference(
-        dtype=helper.input_dtype()
-    )
-    out = helper.create_variable_for_type_inference(dtype=helper.input_dtype())
-    helper.append_op(
-        type='huber_loss',
-        inputs={'X': input, 'Y': label},
-        outputs={'Out': out, 'Residual': residual},
-        attrs={'delta': delta},
-    )
-    return out
 @deprecated(since="2.0.0", update_to="paddle.nn.functional.kl_div")
 @templatedoc()
 def kldiv_loss(x, target, reduction='mean', name=None):

--- a/python/paddle/fluid/metrics.py
+++ b/python/paddle/fluid/metrics.py
@@ -702,7 +702,7 @@ class EditDistance(MetricBase):
        """
        if self.seq_num == 0:
            raise ValueError(
-                "There is no data in EditDistance Metric. Please check layers.edit_distance output has been added to EditDistance."
+                "There is no data in EditDistance Metric. Please check paddle.nn.functional.loss.edit_distance output has been added to EditDistance."
            )
        avg_distance = self.total_distance / self.seq_num
        avg_instance_error = self.instance_error / float(self.seq_num)

--- a/python/paddle/fluid/tests/unittests/ipu/test_huber_loss_op_ipu.py
+++ b/python/paddle/fluid/tests/unittests/ipu/test_huber_loss_op_ipu.py
-#  Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import unittest
-import numpy as np
-import paddle
-import paddle.static
-from paddle.fluid.tests.unittests.ipu.op_test_ipu import IPUOpTest
-class TestBase(IPUOpTest):
-    def setUp(self):
-        self.set_atol()
-        self.set_training()
-        self.set_data_feed()
-        self.set_feed_attr()
-        self.set_op_attrs()
-    def set_data_feed(self):
-        x = np.random.uniform(size=[3, 4, 2, 2])
-        target = np.random.uniform(size=[3, 4, 2, 2])
-        self.feed_fp32 = {
-            "x": x.astype(np.float32),
-            "target": target.astype(np.float32),
-        }
-        self.feed_fp16 = {
-            "x": x.astype(np.float16),
-            "target": target.astype(np.float16),
-        }
-    def set_feed_attr(self):
-        self.feed_shape = [x.shape for x in self.feed_fp32.values()]
-        self.feed_list = list(self.feed_fp32.keys())
-    def set_op_attrs(self):
-        self.attrs = {
-            'delta': 1.0,
-        }
-    @IPUOpTest.static_graph
-    def build_model(self, on_ipu):
-        x = paddle.static.data(
-            name=self.feed_list[0], shape=self.feed_shape[0], dtype="float32"
-        )
-        target = paddle.static.data(
-            name=self.feed_list[1], shape=self.feed_shape[1], dtype='float32'
-        )
-        out = paddle.fluid.layers.huber_loss(x, target, **self.attrs)
-        self.fetch_list = [out.name]
-    def run_model(self, exec_mode):
-        self.run_op_test(exec_mode)
-    def test(self):
-        for m in IPUOpTest.ExecutionMode:
-            if not self.skip_mode(m):
-                self.build_model(self.is_ipu_mode(m))
-                self.run_model(m)
-        self.check()
-class TestCase1(TestBase):
-    def set_op_attrs(self):
-        self.attrs = {
-            'delta': 0.5,
-        }
-class TestCase2(TestBase):
-    def set_op_attrs(self):
-        self.attrs = {
-            'delta': 0.0,
-        }
-if __name__ == "__main__":
-    unittest.main()
--- a/python/paddle/fluid/tests/unittests/ipu/test_margin_rank_loss_op_ipu.py
+++ b/python/paddle/fluid/tests/unittests/ipu/test_margin_rank_loss_op_ipu.py
@@ -63,7 +63,7 @@ class TestBase(IPUOpTest):
        right = paddle.static.data(
            name=self.feed_list[2], shape=self.feed_shape[2], dtype='float32'
        )
-        out = paddle.fluid.layers.margin_rank_loss(label, left, right)
+        out = paddle.nn.functional.margin_ranking_loss(left, right, label)
        self.fetch_list = [out.name]
    def run_model(self, exec_mode):

--- a/python/paddle/fluid/tests/unittests/ipu/test_rank_loss_op_ipu.py
+++ b/python/paddle/fluid/tests/unittests/ipu/test_rank_loss_op_ipu.py
-#  Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import unittest
-import numpy as np
-import paddle
-import paddle.static
-from paddle.fluid.tests.unittests.ipu.op_test_ipu import IPUOpTest
-class TestBase(IPUOpTest):
-    def setUp(self):
-        self.set_atol()
-        self.set_training()
-        self.set_data_feed()
-        self.set_feed_attr()
-    def set_data_feed(self):
-        label = np.random.uniform(size=[3, 1])
-        left = np.random.uniform(size=[3, 1])
-        right = np.random.uniform(size=[3, 1])
-        self.feed_fp32 = {
-            "label": label.astype(np.float32),
-            "left": left.astype(np.float32),
-            "right": right.astype(np.float32),
-        }
-        self.feed_fp16 = {
-            "label": label.astype(np.float16),
-            "left": left.astype(np.float16),
-            "right": right.astype(np.float16),
-        }
-    def set_feed_attr(self):
-        self.feed_shape = [x.shape for x in self.feed_fp32.values()]
-        self.feed_list = list(self.feed_fp32.keys())
-    @IPUOpTest.static_graph
-    def build_model(self, on_ipu):
-        label = paddle.static.data(
-            name=self.feed_list[0], shape=self.feed_shape[0], dtype="float32"
-        )
-        left = paddle.static.data(
-            name=self.feed_list[1], shape=self.feed_shape[1], dtype='float32'
-        )
-        right = paddle.static.data(
-            name=self.feed_list[2], shape=self.feed_shape[2], dtype='float32'
-        )
-        out = paddle.fluid.layers.rank_loss(label, left, right)
-        self.fetch_list = [out.name]
-    def run_model(self, exec_mode):
-        self.run_op_test(exec_mode)
-    def test(self):
-        for m in IPUOpTest.ExecutionMode:
-            if not self.skip_mode(m):
-                self.build_model(self.is_ipu_mode(m))
-                self.run_model(m)
-        self.check()
-if __name__ == "__main__":
-    unittest.main()
--- a/python/paddle/fluid/tests/unittests/mlu/test_huber_loss_op_mlu.py
+++ b/python/paddle/fluid/tests/unittests/mlu/test_huber_loss_op_mlu.py
@@ -37,7 +37,6 @@ class TestHuberLossOp(OpTest):
    def setUp(self):
        self.op_type = 'huber_loss'
        self.set_mlu()
-        self.python_api = paddle.fluid.layers.huber_loss
        self.python_out_sig = ["Out"]
        self.delta = 1.0
        self.init_input()
@@ -103,28 +102,5 @@ def TestHuberLossOp3(TestHuberLossOp):
        return (6, 6, 1)
-class TestHuberLossOpError(unittest.TestCase):
-    def test_errors(self):
-        with program_guard(Program(), Program()):
-            # the input and label must be Variable
-            xw = np.random.random((6, 6)).astype("float32")
-            xr = fluid.data(name='xr', shape=[None, 6], dtype="float32")
-            lw = np.random.random((6, 6)).astype("float32")
-            lr = fluid.data(name='lr', shape=[None, 6], dtype="float32")
-            delta = 1.0
-            self.assertRaises(TypeError, fluid.layers.huber_loss, xr, lw, delta)
-            self.assertRaises(TypeError, fluid.layers.huber_loss, xw, lr, delta)
-            # the dtype of input and label must be float32 or float64
-            xw2 = fluid.data(name='xw2', shape=[None, 6], dtype="int32")
-            lw2 = fluid.data(name='lw2', shape=[None, 6], dtype="int32")
-            self.assertRaises(
-                TypeError, fluid.layers.huber_loss, xw2, lr, delta
-            )
-            self.assertRaises(
-                TypeError, fluid.layers.huber_loss, xr, lw2, delta
-            )
 if __name__ == '__main__':
    unittest.main()
--- a/python/paddle/fluid/tests/unittests/npu/test_huber_loss_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_huber_loss_op_npu.py
@@ -126,27 +126,6 @@ def TestHuberLossOpFP16(TestHuberLossOp):
 @unittest.skipIf(
    not paddle.is_compiled_with_npu(), "core is not compiled with NPU"
 )
-class TestHuberLossOpError(unittest.TestCase):
-    def test_errors(self):
-        with program_guard(Program(), Program()):
-            # the input and label must be Variable
-            xw = np.random.random((6, 6)).astype("float32")
-            xr = fluid.data(name='xr', shape=[None, 6], dtype="float32")
-            lw = np.random.random((6, 6)).astype("float32")
-            lr = fluid.data(name='lr', shape=[None, 6], dtype="float32")
-            delta = 1.0
-            self.assertRaises(TypeError, fluid.layers.huber_loss, xr, lw, delta)
-            self.assertRaises(TypeError, fluid.layers.huber_loss, xw, lr, delta)
-            # the dtype of input and label must be float32 or float64
-            xw2 = fluid.data(name='xw2', shape=[None, 6], dtype="int32")
-            lw2 = fluid.data(name='lw2', shape=[None, 6], dtype="int32")
-            self.assertRaises(
-                TypeError, fluid.layers.huber_loss, xw2, lr, delta
-            )
-            self.assertRaises(
-                TypeError, fluid.layers.huber_loss, xr, lw2, delta
-            )
 if __name__ == '__main__':

--- a/python/paddle/fluid/tests/unittests/test_center_loss.py
+++ b/python/paddle/fluid/tests/unittests/test_center_loss.py
@@ -15,7 +15,7 @@
 import unittest
 import numpy as np
 from op_test import OpTest
-import paddle.fluid as fluid
+import paddle
 class TestCenterLossOp(OpTest):
@@ -89,72 +89,6 @@ class TestCenterLossOpNoUpdate(TestCenterLossOp):
        self.need_update = False
-class BadInputTestCenterLoss(unittest.TestCase):
-    def test_error(self):
-        with fluid.program_guard(fluid.Program()):
-            def test_bad_x():
-                data = [[1, 2, 3, 4], [5, 6, 7, 8]]
-                label = fluid.layers.data(
-                    name='label', shape=[2, 1], dtype='int32'
-                )
-                res = fluid.layers.center_loss(
-                    data,
-                    label,
-                    num_classes=1000,
-                    alpha=0.2,
-                    param_attr=fluid.initializer.Xavier(uniform=False),
-                    update_center=True,
-                )
-            self.assertRaises(TypeError, test_bad_x)
-            def test_bad_y():
-                data = fluid.layers.data(
-                    name='data', shape=[2, 32], dtype='float32'
-                )
-                label = [[2], [3]]
-                res = fluid.layers.center_loss(
-                    data,
-                    label,
-                    num_classes=1000,
-                    alpha=0.2,
-                    param_attr=fluid.initializer.Xavier(uniform=False),
-                    update_center=True,
-                )
-            self.assertRaises(TypeError, test_bad_y)
-            def test_bad_alpha():
-                data = fluid.layers.data(
-                    name='data2',
-                    shape=[2, 32],
-                    dtype='float32',
-                    append_batch_size=False,
-                )
-                label = fluid.layers.data(
-                    name='label2',
-                    shape=[2, 1],
-                    dtype='int32',
-                    append_batch_size=False,
-                )
-                alpha = fluid.layers.data(
-                    name='alpha',
-                    shape=[1],
-                    dtype='int64',
-                    append_batch_size=False,
-                )
-                res = fluid.layers.center_loss(
-                    data,
-                    label,
-                    num_classes=1000,
-                    alpha=alpha,
-                    param_attr=fluid.initializer.Xavier(uniform=False),
-                    update_center=True,
-                )
-            self.assertRaises(TypeError, test_bad_alpha)
 if __name__ == "__main__":
+    paddle.enable_static()
    unittest.main()
--- a/python/paddle/fluid/tests/unittests/test_dist_transpiler.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_transpiler.py
@@ -1440,14 +1440,18 @@ class TestRemoteHsigmoid(TestDistLookupTableBase):
            ),
        )
-        cost = fluid.layers.hsigmoid(
+        loss = paddle.nn.HSigmoidLoss(
+            feature_size=emb.shape[1],
+            num_classes=num_total_classes,
+            is_custom=True,
+            is_sparse=is_sparse,
+        )
+        cost = loss(
            input=emb,
            label=label,
-            num_classes=num_total_classes,
            path_table=path_table,
            path_code=path_code,
-            is_custom=True,
-            is_sparse=is_sparse,
        )
        avg_cost = paddle.mean(cost)
        # optimizer

--- a/python/paddle/fluid/tests/unittests/test_hsigmoid_op.py
+++ b/python/paddle/fluid/tests/unittests/test_hsigmoid_op.py
@@ -17,7 +17,6 @@ import numpy as np
 import paddle
 import paddle.fluid as fluid
 import paddle.nn.functional as F
-from paddle.fluid import Program, program_guard
 import paddle.fluid.initializer as I
 import math
 from op_test import OpTest, skip_check_grad_ci
@@ -305,15 +304,19 @@ class TestHSigmoidOpWithSparseGrad(unittest.TestCase):
            ),
        )
-        cost = fluid.layers.hsigmoid(
+        loss = paddle.nn.HSigmoidLoss(
+            feature_size=emb.shape[1],
+            num_classes=3,
+            bias_attr=True,
+            is_custom=True,
+            is_sparse=is_sparse,
+        )
+        cost = loss(
            input=emb,
            label=label,
-            bias_attr=True,
-            num_classes=3,
            path_table=path_table,
            path_code=path_code,
-            is_custom=True,
-            is_sparse=is_sparse,
        )
        avg_cost = fluid.layers.reduce_mean(cost)
@@ -633,16 +636,19 @@ class TestHSigmoidLossAPI(unittest.TestCase):
                path_code = fluid.data('path_code', [-1, -1], 'int64')
            weight_attr = I.NumpyArrayInitializer(self.weight_np)
            bias_attr = I.NumpyArrayInitializer(self.bias_np)
-            out = fluid.layers.hsigmoid(
+            loss = paddle.nn.HSigmoidLoss(
-                x,
+                feature_size=x.shape[1],
-                labels,
+                num_classes=self.num_classes,
-                self.num_classes,
+                weight_attr=weight_attr,
-                weight_attr,
+                bias_attr=bias_attr,
-                bias_attr,
+                is_custom=self.is_custom,
-                'out',
+                name='out',
-                path_table,
+            )
-                path_code,
+            out = loss(
-                self.is_custom,
+                input=x,
+                label=labels,
+                path_table=path_table,
+                path_code=path_code,
            )
            exe = fluid.Executor(self.place)
@@ -730,28 +736,6 @@ class TestHSigmoidLossAPI(unittest.TestCase):
        self.assertRaises(ValueError, F.hsigmoid_loss, x, label, 0, weight)
        paddle.enable_static()
-        # test paddle.fluid.layers.hsigmoid
-        with program_guard(Program()):
-            label = fluid.data('label', [4, 1], 'int64')
-            # The input type must be Variable.
-            self.assertRaises(TypeError, fluid.layers.hsigmoid, 1, label, 2)
-            # The input dtype must be float16, float32, float64.
-            x_int32 = fluid.data(name='x_int32', shape=[4, 3], dtype='int32')
-            self.assertRaises(
-                TypeError, fluid.layers.hsigmoid, x_int32, label, 2
-            )
-            # support the input dtype is float32
-            x_fp32 = fluid.data(name='x_fp32', shape=[4, 3], dtype='float32')
-            fluid.layers.hsigmoid(x_fp32, label, 2)
-            # The label type must be Variable.
-            self.assertRaises(TypeError, fluid.layers.hsigmoid, x_fp32, 1, 2)
-            # The label dtype must be int64.
-            label_int32 = fluid.data('label_int32', [4, 1], 'int32')
-            self.assertRaises(
-                TypeError, fluid.layers.hsigmoid, x_fp32, label_int32, 2
-            )
 class TestHSigmoidLossAPICustom(TestHSigmoidLossAPI):
    def set_attrs(self):

--- a/python/paddle/fluid/tests/unittests/test_huber_loss_op.py
+++ b/python/paddle/fluid/tests/unittests/test_huber_loss_op.py
@@ -15,9 +15,7 @@
 import unittest
 import numpy as np
 from op_test import OpTest
-import paddle.fluid as fluid
 import paddle
-from paddle.fluid import Program, program_guard
 def huber_loss_forward(val, delta):
@@ -31,7 +29,6 @@ def huber_loss_forward(val, delta):
 class TestHuberLossOp(OpTest):
    def setUp(self):
        self.op_type = 'huber_loss'
-        self.python_api = paddle.fluid.layers.huber_loss
        self.python_out_sig = ["Out"]
        self.delta = 1.0
        self.init_input()
@@ -54,10 +51,10 @@ class TestHuberLossOp(OpTest):
        return (100, 1)
    def test_check_output(self):
-        self.check_output(check_eager=True)
+        self.check_output(check_eager=False)
    def test_check_grad_normal(self):
-        self.check_grad(['X', 'Y'], 'Out', check_eager=True)
+        self.check_grad(['X', 'Y'], 'Out', check_eager=False)
    def test_check_grad_ingore_x(self):
        self.check_grad(
@@ -85,29 +82,6 @@ def TestHuberLossOp3(TestHuberLossOp):
        return (6, 6, 1)
-class TestHuberLossOpError(unittest.TestCase):
-    def test_errors(self):
-        with program_guard(Program(), Program()):
-            # the input and label must be Variable
-            xw = np.random.random((6, 6)).astype("float32")
-            xr = fluid.data(name='xr', shape=[None, 6], dtype="float32")
-            lw = np.random.random((6, 6)).astype("float32")
-            lr = fluid.data(name='lr', shape=[None, 6], dtype="float32")
-            delta = 1.0
-            self.assertRaises(TypeError, fluid.layers.huber_loss, xr, lw, delta)
-            self.assertRaises(TypeError, fluid.layers.huber_loss, xw, lr, delta)
-            # the dtype of input and label must be float32 or float64
-            xw2 = fluid.data(name='xw2', shape=[None, 6], dtype="int32")
-            lw2 = fluid.data(name='lw2', shape=[None, 6], dtype="int32")
-            self.assertRaises(
-                TypeError, fluid.layers.huber_loss, xw2, lr, delta
-            )
-            self.assertRaises(
-                TypeError, fluid.layers.huber_loss, xr, lw2, delta
-            )
 if __name__ == '__main__':
    paddle.enable_static()
    unittest.main()
--- a/python/paddle/fluid/tests/unittests/test_layers.py
+++ b/python/paddle/fluid/tests/unittests/test_layers.py
@@ -2984,7 +2984,6 @@ class TestBook(LayerTest):
                "make_gaussian_random_batch_size_like",
                "make_kldiv_loss",
                "make_prelu",
-                "make_sampled_softmax_with_cross_entropy",
                "make_sampling_id",
                "make_uniform_random_batch_size_like",
            }
@@ -3091,18 +3090,6 @@ class TestBook(LayerTest):
                append_batch_size=append_batch_size,
            )
-    def make_sampled_softmax_with_cross_entropy(self):
-        with program_guard(
-            fluid.default_main_program(), fluid.default_startup_program()
-        ):
-            logits = self._get_data(name='Logits', shape=[256], dtype='float32')
-            label = self._get_data(name='Label', shape=[1], dtype='int64')
-            num_samples = 25
-            output = layers.sampled_softmax_with_cross_entropy(
-                logits, label, num_samples
-            )
-            return output
    def make_fit_a_line(self):
        with program_guard(
            fluid.default_main_program(),
@@ -3237,33 +3224,6 @@ class TestBook(LayerTest):
                x=dat, label=lbl, ignore_index=ignore_index
            )
-    def make_hsigmoid(self):
-        self._force_to_use_cpu = True
-        with fluid.framework._dygraph_place_guard(place=fluid.CPUPlace()):
-            x = self._get_data(name='x', shape=[2], dtype='float32')
-            y = self._get_data(name='y', shape=[2], dtype='int64')
-            return layers.hsigmoid(input=x, label=y, num_classes=2)
-        # test hsigmod with custom tree structure
-        program2 = Program()
-        with program_guard(program2):
-            x2 = self._get_data(name='x2', shape=[4, 8], dtype='float32')
-            y2 = self._get_data(name='y2', shape=[4], dtype='int64')
-            path_table = self._get_data(
-                name='path_table', shape=[4, 6], dtype='int64'
-            )
-            path_code = self._get_data(
-                name='path_code', shape=[4, 6], dtype='int64'
-            )
-            return layers.hsigmoid(
-                input=x2,
-                label=y2,
-                num_classes=6,
-                path_table=path_table,
-                path_code=path_code,
-                is_custom=True,
-            )
    def make_pool2d(self):
        with program_guard(
            fluid.default_main_program(), fluid.default_startup_program()
@@ -3597,31 +3557,6 @@ class TestBook(LayerTest):
            return out
            return ids
-    def make_rank_loss(self):
-        with program_guard(
-            fluid.default_main_program(), fluid.default_startup_program()
-        ):
-            label = self._get_data(
-                name='label',
-                append_batch_size=False,
-                shape=[16, 1],
-                dtype="float32",
-            )
-            left = self._get_data(
-                name='left',
-                append_batch_size=False,
-                shape=[16, 1],
-                dtype="float32",
-            )
-            right = self._get_data(
-                name='right',
-                append_batch_size=False,
-                shape=[16, 1],
-                dtype="float32",
-            )
-            out = layers.rank_loss(label, left, right, name="rank_loss")
-            return out
    def make_shape(self):
        with program_guard(
            fluid.default_main_program(), fluid.default_startup_program()
@@ -3691,14 +3626,6 @@ class TestBook(LayerTest):
            out = layers.cross_entropy(x, label, False, 4)
            return out
-    def make_bpr_loss(self):
-        self._force_to_use_cpu = True
-        with fluid.framework._dygraph_place_guard(place=fluid.CPUPlace()):
-            x = self._get_data(name="x", shape=[30, 10], dtype="float32")
-            label = self._get_data(name="label", shape=[30, 1], dtype="int64")
-            out = layers.bpr_loss(x, label)
-            return out
    def make_expand(self):
        with program_guard(
            fluid.default_main_program(), fluid.default_startup_program()
@@ -4585,17 +4512,6 @@ class TestBook(LayerTest):
            )
            return output
-    def test_edit_distance(self):
-        with self.static_graph():
-            predict = layers.data(
-                name='predict', shape=[-1, 1], dtype='int64', lod_level=1
-            )
-            label = layers.data(
-                name='label', shape=[-1, 1], dtype='int64', lod_level=1
-            )
-            evaluator = fluid.evaluator.EditDistance(predict, label)
-            return evaluator.metrics
    def test_basic_gru(self):
        input_size = 128
        hidden_size = 256

--- a/python/paddle/fluid/tests/unittests/test_margin_rank_loss_op.py
+++ b/python/paddle/fluid/tests/unittests/test_margin_rank_loss_op.py
@@ -16,6 +16,7 @@ import unittest
 import numpy as np
 from op_test import OpTest
 from paddle import fluid
+import paddle
 class TestMarginRankLossOp(OpTest):
@@ -87,7 +88,9 @@ class TestMarginRankLossLayer(unittest.TestCase):
                label = fluid.data("label", (self.batch_size, 1), "float32")
                x1 = fluid.data("x1", (self.batch_size, 1), "float32")
                x2 = fluid.data("x2", (self.batch_size, 1), "float32")
-                out = fluid.layers.margin_rank_loss(label, x1, x2, self.margin)
+                out = paddle.nn.functional.margin_ranking_loss(
+                    x1, x2, label, self.margin, 'none'
+                )
        exe = fluid.Executor(place)
        exe.run(start)

--- a/python/paddle/fluid/tests/unittests/test_rank_loss_op.py
+++ b/python/paddle/fluid/tests/unittests/test_rank_loss_op.py
@@ -15,8 +15,6 @@
 import unittest
 import numpy as np
 from op_test import OpTest
-import paddle.fluid as fluid
-from paddle.fluid import Program, program_guard
 class TestRankLossOp(OpTest):
@@ -84,31 +82,5 @@ class TestRankLossOp5(TestRankLossOp):
        return (batch_size), (batch_size), (batch_size)
-class TestRankLossOpError(unittest.TestCase):
-    def test_errors(self):
-        with program_guard(Program(), Program()):
-            label = fluid.data(name="label", shape=[16, 1], dtype="float32")
-            left = fluid.data(name="left", shape=[16, 1], dtype="float32")
-            right = fluid.data(name="right", shape=[16, 1], dtype="float32")
-            def test_label_Variable():
-                label_data = np.random.rand(16, 1).astype("float32")
-                out = fluid.layers.rank_loss(label_data, left, right)
-            self.assertRaises(TypeError, test_label_Variable)
-            def test_left_Variable():
-                left_data = np.random.rand(16, 1).astype("float32")
-                out = fluid.layers.rank_loss(label, left_data, right)
-            self.assertRaises(TypeError, test_left_Variable)
-            def test_right_Variable():
-                right_data = np.random.rand(16, 1).astype("float32")
-                out = fluid.layers.rank_loss(label, left, right_data)
-            self.assertRaises(TypeError, test_right_Variable)
 if __name__ == '__main__':
    unittest.main()
--- a/python/paddle/fluid/tests/unittests/test_teacher_student_sigmoid_loss_op.py
+++ b/python/paddle/fluid/tests/unittests/test_teacher_student_sigmoid_loss_op.py
@@ -17,8 +17,6 @@ from math import log
 from math import exp
 from op_test import OpTest
 from scipy.special import logit
-import unittest
-import paddle.fluid as fluid
 class TestTeacherStudentSigmoidLossOp(OpTest):
@@ -71,20 +69,3 @@ class TestTeacherStudentSigmoidLossOp(OpTest):
    def test_check_grad(self):
        self.check_grad(["X"], "Y", numeric_grad_delta=0.005)
-class TestTeacherStudentSigmoidLossInvalidInput(unittest.TestCase):
-    def test_error(self):
-        def test_invalid_input():
-            input = [512, 1]
-            label = fluid.data(name='label', shape=[None, 1], dtype='float32')
-            loss = fluid.layers.teacher_student_sigmoid_loss(input, label)
-        self.assertRaises(TypeError, test_invalid_input)
-        def test_invalid_label():
-            input = fluid.data(name='input1', shape=[None, 1], dtype='float32')
-            label = [512, 1]
-            loss = fluid.layers.teacher_student_sigmoid_loss(input, label)
-        self.assertRaises(TypeError, test_invalid_label)
--- a/python/paddle/nn/functional/loss.py
+++ b/python/paddle/nn/functional/loss.py
@@ -506,7 +506,6 @@ def edit_distance(
        input_length(Tensor): The length for each sequence in `input` if it's of Tensor type, it should have shape `(batch_size, )` and its data type should be int64.
        label_length(Tensor): The length for each sequence in `label` if it's of Tensor type, it should have shape `(batch_size, )` and its data type should be int64.
        NOTE: To be avoid unexpected result, the value of every elements in input_length and label_length should be equal to the value of the second dimension of input and label. For example, The input: [[1,2,3,4],[5,6,7,8],[9,10,11,12]], the shape of input is [3,4] and the input_length should be [4,4,4]
-        NOTE: This Api is different from fluid.metrics.EditDistance
    Returns:
        Tuple: