diff --git a/python/paddle/fluid/__init__.py b/python/paddle/fluid/__init__.py
index 96a15cb0ec906c9d7d4b4d55154a8683033dc6c3..c33406dba8e6a4b69ef312cdb18f1d9be3c29fa8 100644
--- a/python/paddle/fluid/__init__.py
+++ b/python/paddle/fluid/__init__.py
@@ -59,8 +59,6 @@ from . import optimizer
 from . import backward
 from .backward import gradients
 from . import regularizer
-from . import average
-from . import metrics
 from . import incubate
 from .param_attr import ParamAttr, WeightNormParamAttr
 from .data_feeder import DataFeeder
diff --git a/python/paddle/fluid/average.py b/python/paddle/fluid/average.py
deleted file mode 100644
index 32db4ba0a9093b5376861e016c891acd545bcc44..0000000000000000000000000000000000000000
--- a/python/paddle/fluid/average.py
+++ /dev/null
@@ -1,96 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import numpy as np
-import warnings
-
-"""
-    Class of all kinds of Average.
-
-    All Averages are accomplished via Python totally.
-    They do not change Paddle's Program, nor do anything to
-    modify NN model's configuration. They are completely
-    wrappers of Python functions.
-"""
-
-__all__ = ["WeightedAverage"]
-
-
-def _is_number_(var):
-    return (
-        isinstance(var, int)
-        or isinstance(var, float)
-        or (isinstance(var, np.ndarray) and var.shape == (1,))
-    )
-
-
-def _is_number_or_matrix_(var):
-    return _is_number_(var) or isinstance(var, np.ndarray)
-
-
-class WeightedAverage:
-    """
-    Calculate weighted average.
-
-    The average calculating is accomplished via Python totally.
-    They do not change Paddle's Program, nor do anything to
-    modify NN model's configuration. They are completely
-    wrappers of Python functions.
-
-    Examples:
-        .. code-block:: python
-
-            import paddle.fluid as fluid
-            avg = fluid.average.WeightedAverage()
-            avg.add(value=2.0, weight=1)
-            avg.add(value=4.0, weight=2)
-            avg.eval()
-
-            # The result is 3.333333333.
-            # For (2.0 * 1 + 4.0 * 2) / (1 + 2) = 3.333333333
-    """
-
-    def __init__(self):
-        warnings.warn(
-            "The %s is deprecated, please use fluid.metrics.Accuracy instead."
-            % (self.__class__.__name__),
-            Warning,
-        )
-        self.reset()
-
-    def reset(self):
-        self.numerator = None
-        self.denominator = None
-
-    def add(self, value, weight):
-        if not _is_number_or_matrix_(value):
-            raise ValueError(
-                "The 'value' must be a number(int, float) or a numpy ndarray."
-            )
-        if not _is_number_(weight):
-            raise ValueError("The 'weight' must be a number(int, float).")
-
-        if self.numerator is None or self.denominator is None:
-            self.numerator = value * weight
-            self.denominator = weight
-        else:
-            self.numerator += value * weight
-            self.denominator += weight
-
-    def eval(self):
-        if self.numerator is None or self.denominator is None:
-            raise ValueError(
-                "There is no data to be averaged in WeightedAverage."
-            )
-        return self.numerator / self.denominator
diff --git a/python/paddle/fluid/metrics.py b/python/paddle/fluid/metrics.py
deleted file mode 100644
index b04611db668660ea80eee2aab0674a597bc760a1..0000000000000000000000000000000000000000
--- a/python/paddle/fluid/metrics.py
+++ /dev/null
@@ -1,815 +0,0 @@
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-Fluid Metrics
-"""
-
-import numpy as np
-import copy
-
-from .layer_helper import LayerHelper
-from . import unique_name
-from .framework import Program, Variable, program_guard
-from . import layers
-
-__all__ = [
-    'MetricBase',
-    'CompositeMetric',
-    'Precision',
-    'Recall',
-    'Accuracy',
-    'ChunkEvaluator',
-    'EditDistance',
-    'Auc',
-]
-
-
-def _is_numpy_(var):
-    return isinstance(var, (np.ndarray, np.generic))
-
-
-def _is_number_(var):
-    return (
-        isinstance(var, int)
-        or isinstance(var, np.int64)
-        or isinstance(var, float)
-        or (isinstance(var, np.ndarray) and var.shape == (1,))
-    )
-
-
-def _is_number_or_matrix_(var):
-    return _is_number_(var) or isinstance(var, np.ndarray)
-
-
-class MetricBase:
-    """
-    In many cases, we usually have to split the test data into mini-batches for evaluating
-    deep neural networks, therefore we need to collect the evaluation results of each
-    mini-batch and aggregate them into the final result. The paddle.fluid.metrics is
-    designed for a convenient way of deep neural network evaluation.
-
-    The paddle.fluid.metrics contains serval different evaluation metrics
-    like precision and recall, and most of them have the following functions:
-
-    1. take the prediction result and the corresponding labels of a mini-batch as input,
-    then compute the evaluation result for the input mini-batch.
-
-    2. aggregate the existing evaluation results as the overall performance.
-
-    The class Metric is the base class for all classes in paddle.fluid.metrics, it defines
-    the fundamental APIs for all metrics classes, including:
-
-    1. update(preds, labels): given the prediction results (preds) and the labels (labels)
-    of some mini-batch, compute the evaluation result of that mini-batch, and memorize the
-    evaluation result.
-
-    2. eval(): aggregate all existing evaluation result in the memory, and return the overall
-    performance across different mini-batches.
-
-    3. reset(): empty the memory.
-
-    """
-
-    def __init__(self, name):
-        """
-        The constructor of the metric class.
-
-        Args:
-            name(str): The name of metric instance. such as, "accuracy".
-                  It can be used to distinguish different metric instances in a model.
-
-        Returns:
-            The constructed class instance.
-
-        Return types:
-            The MetricBase or its succeed classes
-
-        """
-        self._name = str(name) if name is not None else self.__class__.__name__
-
-    def __str__(self):
-        return self._name
-
-    def reset(self):
-        """
-        reset function empties the evaluation memory for previous mini-batches.
-
-        Args:
-            None
-
-        Returns:
-            None
-
-        Return types:
-            None
-
-        """
-        states = {
-            attr: value
-            for attr, value in self.__dict__.items()
-            if not attr.startswith("_")
-        }
-        for attr, value in states.items():
-            if isinstance(value, int):
-                setattr(self, attr, 0)
-            elif isinstance(value, float):
-                setattr(self, attr, 0.0)
-            elif isinstance(value, (np.ndarray, np.generic)):
-                setattr(self, attr, np.zeros_like(value))
-            else:
-                setattr(self, attr, None)
-
-    def get_config(self):
-        """
-        Get the metric and current states.
-        The states are the members who do not has "_" prefix.
-
-        Args:
-            None
-
-        Returns:
-            a python dict, which contains the inner states of the metric instance
-
-        Return types:
-            a python dict
-        """
-        states = {
-            attr: value
-            for attr, value in self.__dict__.items()
-            if not attr.startswith("_")
-        }
-        config = {}
-        config.update({"name": self._name, "states": copy.deepcopy(states)})
-        return config
-
-    def update(self, preds, labels):
-        """
-        Given the prediction results (preds) and the labels (labels)
-        of some mini-batch, compute the evaluation result of that mini-batch,
-        and memorize the evaluation result. Please notice that the update function only
-        memorizes the evaluation result but would not return the score. If you want to
-        get the evaluation result, please call eval() function.
-
-        Args:
-            preds(numpy.array): the predictions of current minibatch
-            labels(numpy.array): the labels of current minibatch.
-
-        Returns:
-            None
-
-        Return types:
-            None
-
-        """
-        raise NotImplementedError(
-            "Should not use it directly, please extend it."
-        )
-
-    def eval(self):
-        """
-        Aggregate all existing evaluation results in the memory, and return the overall
-        performance across different mini-batches.
-
-        Args:
-            None
-
-        Returns:
-            The overall performance across different mini-batches.
-
-        Return types:
-            float|list(float)|numpy.array: the metrics via Python.
-        """
-        raise NotImplementedError(
-            "Should not use it directly, please extend it."
-        )
-
-
-class CompositeMetric(MetricBase):
-    """
-    This op creates a container that contains the union of all the added metrics.
-    After the metrics added in, calling eval() method will compute all the contained metrics automatically.
-    CAUTION: only metrics with the SAME argument list can be added in a CompositeMetric instance.
-
-    Inherit from: `MetricBase <https://www.paddlepaddle.org.cn/documentation/docs/zh/1.5/api_cn/metrics_cn.html#paddle.fluid.metrics.MetricBase>`_
-
-    Args:
-       name (str, optional): Metric name. For details, please refer to :ref:`api_guide_Name`. Default is None.
-
-    Examples:
-        .. code-block:: python
-            import paddle.fluid as fluid
-            import numpy as np
-            preds = [[0.1], [0.7], [0.8], [0.9], [0.2],
-                     [0.2], [0.3], [0.5], [0.8], [0.6]]
-            labels = [[0], [1], [1], [1], [1],
-                      [0], [0], [0], [0], [0]]
-            preds = np.array(preds)
-            labels = np.array(labels)
-            comp = fluid.metrics.CompositeMetric()
-            precision = fluid.metrics.Precision()
-            recall = fluid.metrics.Recall()
-            comp.add_metric(precision)
-            comp.add_metric(recall)
-            comp.update(preds=preds, labels=labels)
-            numpy_precision, numpy_recall = comp.eval()
-            print("expect precision: %.2f, got %.2f" % ( 3. / 5, numpy_precision ) )
-            print("expect recall: %.2f, got %.2f" % (3. / 4, numpy_recall ) )
-    """
-
-    def __init__(self, name=None):
-        super().__init__(name)
-        self._metrics = []
-
-    def add_metric(self, metric):
-        """
-        Add a new metric to container. Noted that the argument list
-        of the added one should be consistent with existed ones.
-
-        Args:
-            metric(MetricBase): a instance of MetricBase
-        """
-        if not isinstance(metric, MetricBase):
-            raise ValueError("SubMetric should be inherit from MetricBase.")
-        self._metrics.append(metric)
-
-    def update(self, preds, labels):
-        """
-        Update the metrics of this container.
-
-        Args:
-            preds(numpy.array): predicted results of current mini-batch, the shape and dtype of which should meet the requirements of the corresponded metric.
-            labels(numpy.array): ground truth of current mini-batch, the shape and dtype of which should meet the requirements of the corresponded metric.
-        """
-        for m in self._metrics:
-            m.update(preds, labels)
-
-    def eval(self):
-        """
-        Calculate the results of all metrics sequentially.
-
-        Returns:
-            list: results of all added metrics.
-            The shape and dtype of each result depend on the definition of its metric.
-        """
-        ans = []
-        for m in self._metrics:
-            ans.append(m.eval())
-        return ans
-
-
-class Precision(MetricBase):
-    """
-    Precision (also called positive predictive value) is the fraction of
-    relevant instances among the retrieved instances. Refer to
-    https://en.wikipedia.org/wiki/Evaluation_of_binary_classifiers
-
-    Noted that this class manages the precision score only for binary classification task.
-
-    Args:
-       name (str, optional): Metric name. For details, please refer to :ref:`api_guide_Name`. Default is None.
-
-    Examples:
-        .. code-block:: python
-
-            import paddle.fluid as fluid
-            import numpy as np
-
-            metric = fluid.metrics.Precision()
-
-            # generate the preds and labels
-
-            preds = [[0.1], [0.7], [0.8], [0.9], [0.2],
-                     [0.2], [0.3], [0.5], [0.8], [0.6]]
-
-            labels = [[0], [1], [1], [1], [1],
-                      [0], [0], [0], [0], [0]]
-
-            preds = np.array(preds)
-            labels = np.array(labels)
-
-            metric.update(preds=preds, labels=labels)
-            numpy_precision = metric.eval()
-
-            print("expect precision: %.2f and got %.2f" % ( 3.0 / 5.0, numpy_precision))
-    """
-
-    def __init__(self, name=None):
-        super().__init__(name)
-        self.tp = 0  # true positive
-        self.fp = 0  # false positive
-
-    def update(self, preds, labels):
-        """
-        Update the precision based on the current mini-batch prediction results .
-
-        Args:
-            preds(numpy.ndarray): prediction results of current mini-batch,
-                                the output of two-class sigmoid function.
-                                Shape: [batch_size, 1]. Dtype: 'float64' or 'float32'.
-            labels(numpy.ndarray): ground truth (labels) of current mini-batch,
-                                 the shape should keep the same as preds.
-                                 Shape: [batch_size, 1], Dtype: 'int32' or 'int64'.
-        """
-        if not _is_numpy_(preds):
-            raise ValueError("The 'preds' must be a numpy ndarray.")
-        if not _is_numpy_(labels):
-            raise ValueError("The 'labels' must be a numpy ndarray.")
-        sample_num = labels.shape[0]
-        preds = np.rint(preds).astype("int32")
-
-        for i in range(sample_num):
-            pred = preds[i]
-            label = labels[i]
-            if pred == 1:
-                if pred == label:
-                    self.tp += 1
-                else:
-                    self.fp += 1
-
-    def eval(self):
-        """
-        Calculate the final precision.
-
-        Returns:
-            float: Results of the calculated Precision. Scalar output with float dtype.
-        """
-        ap = self.tp + self.fp
-        return float(self.tp) / ap if ap != 0 else 0.0
-
-
-class Recall(MetricBase):
-    """
-    Recall (also known as sensitivity) is the fraction of
-    relevant instances that have been retrieved over the
-    total amount of relevant instances
-
-    Refer to:
-    https://en.wikipedia.org/wiki/Precision_and_recall
-
-    Noted that this class manages the recall score only for binary classification task.
-
-    Args:
-       name (str, optional): Metric name. For details, please refer to :ref:`api_guide_Name`. Default is None.
-
-    Examples:
-        .. code-block:: python
-
-            import paddle.fluid as fluid
-            import numpy as np
-
-            metric = fluid.metrics.Recall()
-
-            # generate the preds and labels
-
-            preds = [[0.1], [0.7], [0.8], [0.9], [0.2],
-                     [0.2], [0.3], [0.5], [0.8], [0.6]]
-
-            labels = [[0], [1], [1], [1], [1],
-                      [0], [0], [0], [0], [0]]
-
-            preds = np.array(preds)
-            labels = np.array(labels)
-
-            metric.update(preds=preds, labels=labels)
-            numpy_recall = metric.eval()
-
-            print("expect recall: %.2f and got %.2f" % ( 3.0 / 4.0, numpy_recall))
-    """
-
-    def __init__(self, name=None):
-        super().__init__(name)
-        self.tp = 0  # true positive
-        self.fn = 0  # false negative
-
-    def update(self, preds, labels):
-        """
-        Update the recall based on the current mini-batch prediction results.
-
-        Args:
-            preds(numpy.array): prediction results of current mini-batch,
-                              the output of two-class sigmoid function.
-                              Shape: [batch_size, 1]. Dtype: 'float64' or 'float32'.
-            labels(numpy.array): ground truth (labels) of current mini-batch,
-                               the shape should keep the same as preds.
-                               Shape: [batch_size, 1], Dtype: 'int32' or 'int64'.
-        """
-        if not _is_numpy_(preds):
-            raise ValueError("The 'preds' must be a numpy ndarray.")
-        if not _is_numpy_(labels):
-            raise ValueError("The 'labels' must be a numpy ndarray.")
-        sample_num = labels.shape[0]
-        preds = np.rint(preds).astype("int32")
-
-        for i in range(sample_num):
-            pred = preds[i]
-            label = labels[i]
-            if label == 1:
-                if pred == label:
-                    self.tp += 1
-                else:
-                    self.fn += 1
-
-    def eval(self):
-        """
-        Calculate the final recall.
-
-        Returns:
-            float: results of the calculated Recall. Scalar output with float dtype.
-        """
-        recall = self.tp + self.fn
-        return float(self.tp) / recall if recall != 0 else 0.0
-
-
-class Accuracy(MetricBase):
-    """
-    This interface is used to calculate the mean accuracy over multiple batches.
-    Accuracy object has two state: value and weight. The definition of Accuracy is available at
-    https://en.wikipedia.org/wiki/Accuracy_and_precision
-
-    Args:
-       name (str, optional): Metric name. For details, please refer to :ref:`api_guide_Name`. Default is None.
-
-    Examples:
-        .. code-block:: python
-
-            import paddle.fluid as fluid
-            #suppose we have batch_size = 128
-            batch_size=128
-            accuracy_manager = fluid.metrics.Accuracy()
-
-            #suppose the accuracy is 0.9 for the 1st batch
-            batch1_acc = 0.9
-            accuracy_manager.update(value = batch1_acc, weight = batch_size)
-            print("expect accuracy: %.2f, get accuracy: %.2f" % (batch1_acc, accuracy_manager.eval()))
-
-            #suppose the accuracy is 0.8 for the 2nd batch
-            batch2_acc = 0.8
-
-            accuracy_manager.update(value = batch2_acc, weight = batch_size)
-            #the joint acc for batch1 and batch2 is (batch1_acc * batch_size + batch2_acc * batch_size) / batch_size / 2
-            print("expect accuracy: %.2f, get accuracy: %.2f" % ((batch1_acc * batch_size + batch2_acc * batch_size) / batch_size / 2, accuracy_manager.eval()))
-
-            #reset the accuracy_manager
-            accuracy_manager.reset()
-            #suppose the accuracy is 0.8 for the 3rd batch
-            batch3_acc = 0.8
-            accuracy_manager.update(value = batch3_acc, weight = batch_size)
-            print("expect accuracy: %.2f, get accuracy: %.2f" % (batch3_acc, accuracy_manager.eval()))
-    """
-
-    def __init__(self, name=None):
-        super().__init__(name)
-        self.value = 0.0
-        self.weight = 0.0
-
-    def update(self, value, weight):
-        r"""
-        This function takes the minibatch states (value, weight) as input,
-        to accumulate and update the corresponding status of the Accuracy object. The update method is as follows:
-
-        .. math::
-            \\\\ \\begin{array}{l}{\\text { self. value }+=\\text { value } * \\text { weight }} \\\\ {\\text { self. weight }+=\\text { weight }}\\end{array} \\\\
-
-        Args:
-            value(float|numpy.array): accuracy of one minibatch.
-            weight(int|float): minibatch size.
-        """
-        if not _is_number_or_matrix_(value):
-            raise ValueError(
-                "The 'value' must be a number(int, float) or a numpy ndarray."
-            )
-        if not _is_number_(weight):
-            raise ValueError("The 'weight' must be a number(int, float).")
-        if _is_number_(weight) and weight < 0:
-            raise ValueError("The 'weight' can not be negative")
-        self.value += value * weight
-        self.weight += weight
-
-    def eval(self):
-        """
-        This function returns the mean accuracy (float or numpy.array) for all accumulated minibatches.
-
-        Returns:
-            float or numpy.array: mean accuracy for all accumulated minibatches.
-
-        """
-        if self.weight == 0:
-            raise ValueError(
-                "There is no data in Accuracy Metrics. \
-                Please check layers.accuracy output has added to Accuracy."
-            )
-        return self.value / self.weight
-
-
-class ChunkEvaluator(MetricBase):
-    """
-    Accumulate counter numbers output by chunk_eval from mini-batches and
-    compute the precision recall and F1-score using the accumulated counter
-    numbers.
-    ChunkEvaluator has three states: num_infer_chunks, num_label_chunks and num_correct_chunks,
-    which correspond to the number of chunks, the number of labeled chunks, and the number of correctly identified chunks.
-    For some basics of chunking, please refer to
-    `Chunking with Support Vector Machines <https://www.aclweb.org/anthology/N01-1025>`_ .
-    ChunkEvalEvaluator computes the precision, recall, and F1-score of chunk detection,
-    and supports IOB, IOE, IOBES and IO (also known as plain) tagging schemes.
-
-    Args:
-       name (str, optional): Metric name. For details, please refer to :ref:`api_guide_Name`. Default is None.
-
-    Examples:
-        .. code-block:: python
-
-            import paddle.fluid as fluid
-            # init the chunk-level evaluation manager
-            metric = fluid.metrics.ChunkEvaluator()
-
-            # suppose the model predict 10 chucks, while 8 ones are correct and the ground truth has 9 chucks.
-            num_infer_chunks = 10
-            num_label_chunks = 9
-            num_correct_chunks = 8
-
-            metric.update(num_infer_chunks, num_label_chunks, num_correct_chunks)
-            numpy_precision, numpy_recall, numpy_f1 = metric.eval()
-
-            print("precision: %.2f, recall: %.2f, f1: %.2f" % (numpy_precision, numpy_recall, numpy_f1))
-
-            # the next batch, predicting 3 perfectly correct chucks.
-            num_infer_chunks = 3
-            num_label_chunks = 3
-            num_correct_chunks = 3
-
-            metric.update(num_infer_chunks, num_label_chunks, num_correct_chunks)
-            numpy_precision, numpy_recall, numpy_f1 = metric.eval()
-
-            print("precision: %.2f, recall: %.2f, f1: %.2f" % (numpy_precision, numpy_recall, numpy_f1))
-
-    """
-
-    def __init__(self, name=None):
-        super().__init__(name)
-        self.num_infer_chunks = 0
-        self.num_label_chunks = 0
-        self.num_correct_chunks = 0
-
-    def update(self, num_infer_chunks, num_label_chunks, num_correct_chunks):
-        r"""
-        This function takes (num_infer_chunks, num_label_chunks, num_correct_chunks) as input,
-        to accumulate and update the corresponding status of the ChunkEvaluator object. The update method is as follows:
-
-        .. math::
-                   \\\\ \\begin{array}{l}{\\text { self. num_infer_chunks }+=\\text { num_infer_chunks }} \\\\ {\\text { self. num_Label_chunks }+=\\text { num_label_chunks }} \\\\ {\\text { self. num_correct_chunks }+=\\text { num_correct_chunks }}\\end{array} \\\\
-
-        Args:
-            num_infer_chunks(int|numpy.array): The number of chunks in Inference on the given minibatch.
-            num_label_chunks(int|numpy.array): The number of chunks in Label on the given mini-batch.
-            num_correct_chunks(int|float|numpy.array): The number of chunks both in Inference and Label on the
-                                                  given mini-batch.
-        """
-        if not _is_number_or_matrix_(num_infer_chunks):
-            raise ValueError(
-                "The 'num_infer_chunks' must be a number(int) or a numpy ndarray."
-            )
-        if not _is_number_or_matrix_(num_label_chunks):
-            raise ValueError(
-                "The 'num_label_chunks' must be a number(int, float) or a numpy ndarray."
-            )
-        if not _is_number_or_matrix_(num_correct_chunks):
-            raise ValueError(
-                "The 'num_correct_chunks' must be a number(int, float) or a numpy ndarray."
-            )
-        self.num_infer_chunks += num_infer_chunks
-        self.num_label_chunks += num_label_chunks
-        self.num_correct_chunks += num_correct_chunks
-
-    def eval(self):
-        """
-        This function returns the mean precision, recall and f1 score for all accumulated minibatches.
-
-        Returns:
-            float: mean precision, recall and f1 score.
-
-        """
-        precision = (
-            float(self.num_correct_chunks) / self.num_infer_chunks
-            if self.num_infer_chunks
-            else 0
-        )
-        recall = (
-            float(self.num_correct_chunks) / self.num_label_chunks
-            if self.num_label_chunks
-            else 0
-        )
-        f1_score = (
-            float(2 * precision * recall) / (precision + recall)
-            if self.num_correct_chunks
-            else 0
-        )
-        return precision, recall, f1_score
-
-
-class EditDistance(MetricBase):
-    """
-    This API is for the management of edit distances.
-    Editing distance is a method to quantify the degree of dissimilarity
-    between two strings, such as words, by calculating the minimum editing
-    operand (add, delete or replace) required to convert one string into another.
-    Refer to https://en.wikipedia.org/wiki/Edit_distance.
-
-    Args:
-        name (str, optional): Metric name. For details, please refer to :ref:`api_guide_Name`. Default is None.
-
-    Examples:
-        .. code-block:: python
-
-            import paddle.fluid as fluid
-            import numpy as np
-
-            # suppose that batch_size is 128
-            batch_size = 128
-
-            # init the edit distance manager
-            distance_evaluator = fluid.metrics.EditDistance("EditDistance")
-
-            # generate the edit distance across 128 sequence pairs, the max distance is 10 here
-            edit_distances_batch0 = np.random.randint(low = 0, high = 10, size = (batch_size, 1))
-            seq_num_batch0 = batch_size
-
-            distance_evaluator.update(edit_distances_batch0, seq_num_batch0)
-            avg_distance, wrong_instance_ratio = distance_evaluator.eval()
-            print("the average edit distance for batch0 is %.2f and the wrong instance ratio is %.2f " % (avg_distance, wrong_instance_ratio))
-
-            edit_distances_batch1 = np.random.randint(low = 0, high = 10, size = (batch_size, 1))
-            seq_num_batch1 = batch_size
-
-            distance_evaluator.update(edit_distances_batch1, seq_num_batch1)
-            avg_distance, wrong_instance_ratio = distance_evaluator.eval()
-            print("the average edit distance for batch0 and batch1 is %.2f and the wrong instance ratio is %.2f " % (avg_distance, wrong_instance_ratio))
-
-            distance_evaluator.reset()
-
-            edit_distances_batch2 = np.random.randint(low = 0, high = 10, size = (batch_size, 1))
-            seq_num_batch2 = batch_size
-
-            distance_evaluator.update(edit_distances_batch2, seq_num_batch2)
-            avg_distance, wrong_instance_ratio = distance_evaluator.eval()
-            print("the average edit distance for batch2 is %.2f and the wrong instance ratio is %.2f " % (avg_distance, wrong_instance_ratio))
-
-    """
-
-    def __init__(self, name):
-        super().__init__(name)
-        self.total_distance = 0.0
-        self.seq_num = 0
-        self.instance_error = 0
-
-    def update(self, distances, seq_num):
-        """
-        Update the overall edit distance
-
-        Args:
-            distances(numpy.array): a (batch_size, 1) numpy.array, each element represents the edit distance between two sequences.
-            seq_num(int|float): standing for the number of sequence pairs.
-        """
-        if not _is_numpy_(distances):
-            raise ValueError("The 'distances' must be a numpy ndarray.")
-        if not _is_number_(seq_num):
-            raise ValueError("The 'seq_num' must be a number(int, float).")
-        seq_right_count = np.sum(distances == 0)
-        total_distance = np.sum(distances)
-        self.seq_num += seq_num
-        self.instance_error += seq_num - seq_right_count
-        self.total_distance += total_distance
-
-    def eval(self):
-        """
-        Return two floats:
-        avg_distance: the average distance for all sequence pairs updated using the update function.
-        avg_instance_error: the ratio of sequence pairs whose edit distance is not zero.
-        """
-        if self.seq_num == 0:
-            raise ValueError(
-                "There is no data in EditDistance Metric. Please check paddle.nn.functional.loss.edit_distance output has been added to EditDistance."
-            )
-        avg_distance = self.total_distance / self.seq_num
-        avg_instance_error = self.instance_error / float(self.seq_num)
-        return avg_distance, avg_instance_error
-
-
-class Auc(MetricBase):
-    """
-    The auc metric is for binary classification.
-    Refer to https://en.wikipedia.org/wiki/Receiver_operating_characteristic#Area_under_the_curve.
-    Please notice that the auc metric is implemented with python, which may be a little bit slow.
-    If you concern the speed, please use the paddle.static.auc instead.
-
-    The `auc` function creates four local variables, `true_positives`,
-    `true_negatives`, `false_positives` and `false_negatives` that are used to
-    compute the AUC. To discretize the AUC curve, a linearly spaced set of
-    thresholds is used to compute pairs of recall and precision values. The area
-    under the ROC-curve is therefore computed using the height of the recall
-    values by the false positive rate, while the area under the PR-curve is the
-    computed using the height of the precision values by the recall.
-
-    Args:
-        name (str, optional): Metric name. For details, please refer to :ref:`api_guide_Name`. Default is None.
-        curve (str): Specifies the name of the curve to be computed, 'ROC' [default] or 'PR' for the Precision-Recall-curve.
-
-    "NOTE: only implement the ROC curve type via Python now."
-
-    Examples:
-        .. code-block:: python
-
-            import paddle.fluid as fluid
-            import numpy as np
-            # init the auc metric
-            auc_metric = fluid.metrics.Auc("ROC")
-
-            # suppose that batch_size is 128
-            batch_num = 100
-            batch_size = 128
-
-            for batch_id in range(batch_num):
-
-                class0_preds = np.random.random(size = (batch_size, 1))
-                class1_preds = 1 - class0_preds
-
-                preds = np.concatenate((class0_preds, class1_preds), axis=1)
-
-                labels = np.random.randint(2, size = (batch_size, 1))
-                auc_metric.update(preds = preds, labels = labels)
-
-                # shall be some score closing to 0.5 as the preds are randomly assigned
-                print("auc for iteration %d is %.2f" % (batch_id, auc_metric.eval()))
-    """
-
-    def __init__(self, name, curve='ROC', num_thresholds=4095):
-        super().__init__(name=name)
-        self._curve = curve
-        self._num_thresholds = num_thresholds
-
-        _num_pred_buckets = num_thresholds + 1
-        self._stat_pos = [0] * _num_pred_buckets
-        self._stat_neg = [0] * _num_pred_buckets
-
-    def update(self, preds, labels):
-        """
-        Update the auc curve with the given predictions and labels.
-
-        Args:
-             preds (numpy.array): an numpy array in the shape of (batch_size, 2), preds[i][j] denotes the probability of classifying the instance i into the class j.
-             labels (numpy.array): an numpy array in the shape of (batch_size, 1), labels[i] is either o or 1, representing the label of the instance i.
-        """
-        if not _is_numpy_(labels):
-            raise ValueError("The 'labels' must be a numpy ndarray.")
-        if not _is_numpy_(preds):
-            raise ValueError("The 'predictions' must be a numpy ndarray.")
-
-        for i, lbl in enumerate(labels):
-            value = preds[i, 1]
-            bin_idx = int(value * self._num_thresholds)
-            assert bin_idx <= self._num_thresholds
-            if lbl:
-                self._stat_pos[bin_idx] += 1.0
-            else:
-                self._stat_neg[bin_idx] += 1.0
-
-    @staticmethod
-    def trapezoid_area(x1, x2, y1, y2):
-        return abs(x1 - x2) * (y1 + y2) / 2.0
-
-    def eval(self):
-        """
-        Return the area (a float score) under auc curve
-
-        Return:
-            float: the area under auc curve
-        """
-        tot_pos = 0.0
-        tot_neg = 0.0
-        auc = 0.0
-
-        idx = self._num_thresholds
-        while idx >= 0:
-            tot_pos_prev = tot_pos
-            tot_neg_prev = tot_neg
-            tot_pos += self._stat_pos[idx]
-            tot_neg += self._stat_neg[idx]
-            auc += self.trapezoid_area(
-                tot_neg, tot_neg_prev, tot_pos, tot_pos_prev
-            )
-            idx -= 1
-
-        return (
-            auc / tot_pos / tot_neg if tot_pos > 0.0 and tot_neg > 0.0 else 0.0
-        )
diff --git a/python/paddle/fluid/tests/unittests/test_auc_op.py b/python/paddle/fluid/tests/unittests/test_auc_op.py
index b0c6dc5b615a63c49c8e47edd7d62740cd448c22..19c7120f858856350ddc80e9cd80d666a8497aea 100644
--- a/python/paddle/fluid/tests/unittests/test_auc_op.py
+++ b/python/paddle/fluid/tests/unittests/test_auc_op.py
@@ -19,7 +19,6 @@ from eager_op_test import OpTest
 
 import paddle
 from paddle import fluid
-from paddle.fluid import metrics
 
 
 class TestAucOp(OpTest):
@@ -49,17 +48,17 @@ class TestAucOp(OpTest):
             "slide_steps": slide_steps,
         }
 
-        python_auc = metrics.Auc(
+        python_auc = paddle.metric.Auc(
             name="auc", curve='ROC', num_thresholds=num_thresholds
         )
         python_auc.update(pred, labels)
 
-        pos = python_auc._stat_pos * 2
+        pos = python_auc._stat_pos.tolist() * 2
         pos.append(1)
-        neg = python_auc._stat_neg * 2
+        neg = python_auc._stat_neg.tolist() * 2
         neg.append(1)
         self.outputs = {
-            'AUC': np.array(python_auc.eval()),
+            'AUC': np.array(python_auc.accumulate()),
             'StatPosOut': np.array(pos),
             'StatNegOut': np.array(neg),
         }
@@ -91,7 +90,7 @@ class TestGlobalAucOp(OpTest):
             "slide_steps": slide_steps,
         }
 
-        python_auc = metrics.Auc(
+        python_auc = paddle.metric.Auc(
             name="auc", curve='ROC', num_thresholds=num_thresholds
         )
         python_auc.update(pred, labels)
@@ -99,7 +98,7 @@ class TestGlobalAucOp(OpTest):
         pos = python_auc._stat_pos
         neg = python_auc._stat_neg
         self.outputs = {
-            'AUC': np.array(python_auc.eval()),
+            'AUC': np.array(python_auc.accumulate()),
             'StatPosOut': np.array(pos),
             'StatNegOut': np.array(neg),
         }
diff --git a/python/paddle/fluid/tests/unittests/test_auc_single_pred_op.py b/python/paddle/fluid/tests/unittests/test_auc_single_pred_op.py
index 1041958395535d0d25755ffa3ba7eebea212370b..d6dd4e9b0b8f4bda56c1d5d7a89b6d3cbd9f2061 100644
--- a/python/paddle/fluid/tests/unittests/test_auc_single_pred_op.py
+++ b/python/paddle/fluid/tests/unittests/test_auc_single_pred_op.py
@@ -17,7 +17,7 @@ import unittest
 import numpy as np
 from eager_op_test import OpTest
 
-from paddle.fluid import metrics
+import paddle
 
 
 class TestAucSinglePredOp(OpTest):
@@ -48,19 +48,19 @@ class TestAucSinglePredOp(OpTest):
             "slide_steps": slide_steps,
         }
 
-        python_auc = metrics.Auc(
+        python_auc = paddle.metric.Auc(
             name="auc", curve='ROC', num_thresholds=num_thresholds
         )
         for i in range(128):
             pred[i][1] = pred[i][0]
         python_auc.update(pred, labels)
 
-        pos = python_auc._stat_pos * 2
+        pos = python_auc._stat_pos.tolist() * 2
         pos.append(1)
-        neg = python_auc._stat_neg * 2
+        neg = python_auc._stat_neg.tolist() * 2
         neg.append(1)
         self.outputs = {
-            'AUC': np.array(python_auc.eval()),
+            'AUC': np.array(python_auc.accumulate()),
             'StatPosOut': np.array(pos),
             'StatNegOut': np.array(neg),
         }
@@ -93,7 +93,7 @@ class TestAucGlobalSinglePredOp(OpTest):
             "slide_steps": slide_steps,
         }
 
-        python_auc = metrics.Auc(
+        python_auc = paddle.metric.Auc(
             name="auc", curve='ROC', num_thresholds=num_thresholds
         )
         for i in range(128):
@@ -103,7 +103,7 @@ class TestAucGlobalSinglePredOp(OpTest):
         pos = python_auc._stat_pos
         neg = python_auc._stat_neg
         self.outputs = {
-            'AUC': np.array(python_auc.eval()),
+            'AUC': np.array(python_auc.accumulate()),
             'StatPosOut': np.array(pos),
             'StatNegOut': np.array(neg),
         }
diff --git a/test/dygraph_to_static/test_lac.py b/test/dygraph_to_static/test_lac.py
index 9e0815d1440e1e1c85ff330de9e18f91e2de0e86..9e854f9dda09fc52d2d8154359a798d359cfa059 100644
--- a/test/dygraph_to_static/test_lac.py
+++ b/test/dygraph_to_static/test_lac.py
@@ -564,8 +564,6 @@ class TestLACModel(unittest.TestCase):
             )
 
             step = 0
-            chunk_evaluator = fluid.metrics.ChunkEvaluator()
-            chunk_evaluator.reset()
 
             loss_data = []
             for epoch_id in range(args.epoch):
diff --git a/test/dygraph_to_static/test_simnet.py b/test/dygraph_to_static/test_simnet.py
index 3fb2302a93c7e97671e19b83cdf19533727da4bf..2fdaa9fc8368dc1e0e00bf1b96a9fd14723a9ed8 100644
--- a/test/dygraph_to_static/test_simnet.py
+++ b/test/dygraph_to_static/test_simnet.py
@@ -128,7 +128,7 @@ def train(conf_dict, to_static):
             parameter_list=net.parameters(),
         )
 
-        metric = fluid.metrics.Auc(name="auc")
+        metric = paddle.metric.Auc(name="auc")
 
         global_step = 0
         losses = []