diff --git a/python/paddle/fluid/evaluator.py b/python/paddle/fluid/evaluator.py
index 7c6ad6f27dcfd7040f79c72c01413c8cc84a28ba..00ba1a0457583d1cc1fa7136ebd51e9ced167832 100644
--- a/python/paddle/fluid/evaluator.py
+++ b/python/paddle/fluid/evaluator.py
@@ -41,7 +41,12 @@ def _clone_var_(block, var):
 
 class Evaluator(object):
     """
-    Base Class for all evaluators
+    Warning: better to use the fluid.metrics.* things, more
+    flexible support via pure Python and Operator, and decoupled
+    with executor. Short doc are intended to urge new user
+    start from Metrics.
+
+    Base Class for all evaluators.
 
     Args:
         name(str): The name of evaluator. such as, "accuracy". Used for generate
@@ -69,6 +74,10 @@ class Evaluator(object):
     def reset(self, executor, reset_program=None):
         """
         reset metric states at the begin of each pass/user specified batch
+
+        Args:
+            executor(Executor|ParallelExecutor): a executor for executing the reset_program
+            reset_program(Program): a single Program for reset process
         """
         if reset_program is None:
             reset_program = Program()
@@ -85,15 +94,16 @@ class Evaluator(object):
     def eval(self, executor, eval_program=None):
         """
         Evaluate the statistics merged by multiple mini-batches.
+        Args:
+            executor(Executor|ParallelExecutor): a executor for executing the eval_program
+            eval_program(Program): a single Program for eval process
         """
         raise NotImplementedError()
 
-    def create_state(self, suffix, dtype, shape):
+    def _create_state(self, suffix, dtype, shape):
         """
         Create state variable.
 
-        NOTE: It is not a public API.
-
         Args:
             suffix(str): the state suffix.
             dtype(str|core.VarDesc.VarType): the state data type
@@ -113,9 +123,35 @@ class Evaluator(object):
 
 class ChunkEvaluator(Evaluator):
     """
+    Warning: This would be deprecated in the future. Please use fluid.metrics.ChunkEvaluator 
+    instead.
+
     Accumulate counter numbers output by chunk_eval from mini-batches and
     compute the precision recall and F1-score using the accumulated counter
     numbers.
+    For some basics of chunking, please refer to
+    'Chunking with Support Vector Machines <https://aclanthology.info/pdf/N/N01/N01-1025.pdf>'.
+
+    Args:
+        input (Variable): prediction output of the network.
+        label (Variable): label of the test data set.
+        chunk_scheme (str): can be IOB/IOE/IOBES and IO. See the chunk_eval op for details.
+        num_chunk_types (int): the number of chunk type.
+        excluded_chunk_types (list): A list including chunk type ids, indicating chunk types that are not counted.
+
+    Returns:
+        tuple: tuple containing: precision, recall, f1_score
+
+    Examples:
+        .. code-block:: python
+
+            exe = fluid.executor(place)
+            evaluator = fluid.Evaluator.ChunkEvaluator(input, label)
+            for epoch in PASS_NUM:
+                evaluator.reset(exe)
+                for data in batches:
+                    loss = exe.run(fetch_list=[cost])
+                distance, instance_error = distance_evaluator.eval(exe)
     """
 
     def __init__(
@@ -130,11 +166,11 @@ class ChunkEvaluator(Evaluator):
         if main_program.current_block().idx != 0:
             raise ValueError("You can only invoke Evaluator in root block")
 
-        self.num_infer_chunks = self.create_state(
+        self.num_infer_chunks = self._create_state(
             dtype='int64', shape=[1], suffix='num_infer_chunks')
-        self.num_label_chunks = self.create_state(
+        self.num_label_chunks = self._create_state(
             dtype='int64', shape=[1], suffix='num_label_chunks')
-        self.num_correct_chunks = self.create_state(
+        self.num_correct_chunks = self._create_state(
             dtype='int64', shape=[1], suffix='num_correct_chunks')
         precision, recall, f1_score, num_infer_chunks, num_label_chunks, num_correct_chunks = layers.chunk_eval(
             input=input,
@@ -178,6 +214,8 @@ class ChunkEvaluator(Evaluator):
 
 class EditDistance(Evaluator):
     """
+    Warning: This would be deprecated in the future. Please use fluid.metrics.EditDistance
+    instead.
     Accumulate edit distance sum and sequence number from mini-batches and
     compute the average edit_distance and instance error of all batches.
 
@@ -188,15 +226,16 @@ class EditDistance(Evaluator):
         ignored_tokens(list of int): Tokens that should be removed before
         calculating edit distance.
 
-    Example:
+    Examples:
+        .. code-block:: python
 
-        exe = fluid.executor(place)
-        distance_evaluator = fluid.Evaluator.EditDistance(input, label)
-        for epoch in PASS_NUM:
-            distance_evaluator.reset(exe)
-            for data in batches:
-                loss = exe.run(fetch_list=[cost])
-            distance, instance_error = distance_evaluator.eval(exe)
+            exe = fluid.executor(place)
+            distance_evaluator = fluid.Evaluator.EditDistance(input, label)
+            for epoch in PASS_NUM:
+                distance_evaluator.reset(exe)
+                for data in batches:
+                    loss = exe.run(fetch_list=[cost])
+                distance, instance_error = distance_evaluator.eval(exe)
 
         In the above example:
         'distance' is the average of the edit distance in a pass.
@@ -210,11 +249,11 @@ class EditDistance(Evaluator):
         if main_program.current_block().idx != 0:
             raise ValueError("You can only invoke Evaluator in root block")
 
-        self.total_distance = self.create_state(
+        self.total_distance = self._create_state(
             dtype='float32', shape=[1], suffix='total_distance')
-        self.seq_num = self.create_state(
+        self.seq_num = self._create_state(
             dtype='int64', shape=[1], suffix='seq_num')
-        self.instance_error = self.create_state(
+        self.instance_error = self._create_state(
             dtype='int64', shape=[1], suffix='instance_error')
         distances, seq_num = layers.edit_distance(
             input=input, label=label, ignored_tokens=ignored_tokens)
@@ -256,9 +295,10 @@ class EditDistance(Evaluator):
 
 class DetectionMAP(Evaluator):
     """
+    Warning: This would be deprecated in the future. Please use fluid.metrics.DetectionMAP
+    instead.
     Calculate the detection mean average precision (mAP).
 
-    TODO (Dang Qingqing): update the following doc.
     The general steps are as follows:
     1. calculate the true positive and false positive according to the input
         of detection and labels.
@@ -293,17 +333,18 @@ class DetectionMAP(Evaluator):
             - 11point: the 11-point interpolated average precision.
             - integral: the natural integral of the precision-recall curve.
 
-    Example:
+    Examples:
+        .. code-block:: python
 
-        exe = fluid.executor(place)
-        map_evaluator = fluid.Evaluator.DetectionMAP(input,
-            gt_label, gt_box, gt_difficult)
-        cur_map, accum_map = map_evaluator.get_map_var()
-        fetch = [cost, cur_map, accum_map]
-        for epoch in PASS_NUM:
-            map_evaluator.reset(exe)
-            for data in batches:
-                loss, cur_map_v, accum_map_v = exe.run(fetch_list=fetch)
+            exe = fluid.executor(place)
+            map_evaluator = fluid.Evaluator.DetectionMAP(input,
+                gt_label, gt_box, gt_difficult)
+            cur_map, accum_map = map_evaluator.get_map_var()
+            fetch = [cost, cur_map, accum_map]
+            for epoch in PASS_NUM:
+                map_evaluator.reset(exe)
+                for data in batches:
+                    loss, cur_map_v, accum_map_v = exe.run(fetch_list=fetch)
 
         In the above example:
 
@@ -340,9 +381,10 @@ class DetectionMAP(Evaluator):
             evaluate_difficult=evaluate_difficult,
             ap_version=ap_version)
 
-        self.create_state(dtype='int32', shape=None, suffix='accum_pos_count')
-        self.create_state(dtype='float32', shape=None, suffix='accum_true_pos')
-        self.create_state(dtype='float32', shape=None, suffix='accum_false_pos')
+        self._create_state(dtype='int32', shape=None, suffix='accum_pos_count')
+        self._create_state(dtype='float32', shape=None, suffix='accum_true_pos')
+        self._create_state(
+            dtype='float32', shape=None, suffix='accum_false_pos')
 
         self.has_state = None
         var = self.helper.create_variable(
diff --git a/python/paddle/fluid/layers/__init__.py b/python/paddle/fluid/layers/__init__.py
index a568f61dcb2da976baa7847ae26281a34d6f88dd..cd1492da24d5e9d09a9eaac0b1b9c7aaffac6250 100644
--- a/python/paddle/fluid/layers/__init__.py
+++ b/python/paddle/fluid/layers/__init__.py
@@ -28,8 +28,8 @@ import math_op_patch
 from math_op_patch import *
 import detection
 from detection import *
-import metric
-from metric import *
+import metric_op
+from metric_op import *
 from learning_rate_scheduler import *
 
 __all__ = []
@@ -41,5 +41,5 @@ __all__ += control_flow.__all__
 __all__ += ops.__all__
 __all__ += device.__all__
 __all__ += detection.__all__
-__all__ += metric.__all__
+__all__ += metric_op.__all__
 __all__ += learning_rate_scheduler.__all__
diff --git a/python/paddle/fluid/layers/metric.py b/python/paddle/fluid/layers/metric_op.py
similarity index 99%
rename from python/paddle/fluid/layers/metric.py
rename to python/paddle/fluid/layers/metric_op.py
index 58de1b6b9fe17a24203e93de6780190b9fc6b3e7..99e82fdd04282177fae63f1fb94b5e32d41c612e 100644
--- a/python/paddle/fluid/layers/metric.py
+++ b/python/paddle/fluid/layers/metric_op.py
@@ -126,7 +126,7 @@ def auc(input, label, curve='ROC', num_thresholds=200):
     topk_out, topk_indices = nn.topk(input, k=k)
     auc_out = helper.create_tmp_variable(dtype="float32")
     helper.append_op(
-        type="accuracy",
+        type="auc",
         inputs={
             "Out": [topk_out],
             "Indices": [topk_indices],
diff --git a/python/paddle/fluid/metrics.py b/python/paddle/fluid/metrics.py
index 572475b483ff0341a97a91b6c5309fcf337dacbe..c9cd881979a4ea4b14299ce219be4b5bd1f153fc 100644
--- a/python/paddle/fluid/metrics.py
+++ b/python/paddle/fluid/metrics.py
@@ -23,6 +23,8 @@ import warnings
 __all__ = [
     'MetricBase',
     'CompositeMetric',
+    'Precision',
+    'Recall',
     'Accuracy',
     'ChunkEvaluator',
     'EditDistance',
@@ -46,33 +48,34 @@ def _is_number_or_matrix_(var):
 
 class MetricBase(object):
     """
-    Base Class for all evaluators
+    Base Class for all Metrics.
+    MetricBase define a group of interfaces for the
+    model evaluation methods. Metrics accumulate metric states between
+    consecutive minibatches, at every minibatch, use update
+    interface to add current minibatch value to global states.
+    Use eval to compute accumative metric value from last reset()
+    or from scratch on.
+    If you need to custom a new metric, please inherit from MetricBase and
+    custom implementation.
 
     Args:
-        name(str): The name of evaluator. such as, "accuracy". Used for generate
-            temporary variable name.
-    Interface:
-        Note(*) : the states is the attributes who not has _ prefix.
-
-        get_config(): print current states and configuration
-        reset(): clear the states. If the Metrics states type is not (int, float, np.ndarray),
-                Please override this method.
-        update(): update states at every minibatch
-        eval(): get metric evaluation in numpy type.
+        name(str): The name of metric instance. such as, "accuracy".
+                  It needed if you want to distinct different metrics in a model.
+
     """
 
-    def __init__(self, name, **kwargs):
+    def __init__(self, name):
         self._name = str(name) if name != None else self.__class__.__name__
-        self._kwargs = kwargs if kwargs != None else dict()
-        self.reset()
 
     def __str__(self):
         return self._name
 
     def reset(self):
         """
-        states is the attributes who not has _ prefix.
-        reset the states of metrics.
+        reset clear the states of metrics. By default, the states
+        are the members who do not has _ prefix, reset set them to inital states.
+        If you violate the implicit name rule, please also custom the reset
+        interface.
         """
         states = {
             attr: value
@@ -90,61 +93,231 @@ class MetricBase(object):
                 setattr(self, attr, None)
 
     def get_config(self):
+        """
+        Get the metric and current states.
+        The states are the members who do not has "_" prefix.
+
+        Args:
+            None
+
+        Returns:
+            dict: a dict of metric and states
+        """
         states = {
             attr: value
             for attr, value in self.__dict__.iteritems()
             if not attr.startswith("_")
         }
-        config = copy.deepcopy(self._kwargs)
+        config = {}
         config.update({"name": self._name, "states": copy.deepcopy(states)})
         return config
 
-    def update(self):
-        raise NotImplementedError()
+    def update(self, preds, labels):
+        """
+        Updates the metric states at every minibatch.
+        One user can compute the minibatch metric via pure Python, or
+        via a c++ operator.
+
+        Args:
+            preds(numpy.array): the predictions of current minibatch
+            labels(numpy.array): the labels of current minibatch, if the label is one-hot
+                               or soft-label, should custom the corresponding update rule.
+        """
+        raise NotImplementedError(
+            "Should not use it directly, please extend it.")
 
     def eval(self):
-        raise NotImplementedError()
+        """
+        Evalute the current metrics based the accumulated states.
+
+        Returns:
+            float|list(float)|numpy.array: the metrics via Python.
+        """
+        raise NotImplementedError(
+            "Should not use it directly, please extend it.")
 
 
 class CompositeMetric(MetricBase):
     """
-    Compute multiple metrics in each minibatch.
+    Composite multiple metrics in one instance.
     for example, merge F1, accuracy, recall into one Metric.
+    
+    Examples:
+        .. code-block:: python
+    
+          labels = fluid.layers.data(name="data", shape=[1], dtype="int32")
+          data = fluid.layers.data(name="data", shape=[32, 32], dtype="int32")
+          pred = fluid.layers.fc(input=data, size=1000, act="tanh")
+          comp = fluid.metrics.CompositeMetric()
+          acc = fluid.metrics.Precision()
+          recall = fluid.metrics.Recall()
+          comp.add_metric(acc)
+          comp.add_metric(recall)
+          for pass in range(PASSES):
+            comp.reset()
+            for data in train_reader():
+                loss, preds, labels = exe.run(fetch_list=[cost, preds, labels])
+            comp.update(preds=preds, labels=labels)
+            numpy_acc, numpy_recall = comp.eval()
     """
 
-    def __init__(self, name=None, **kwargs):
-        super(CompositeMetric, self).__init__(name, kwargs)
+    def __init__(self, name=None):
+        super(CompositeMetric, self).__init__(name)
         self._metrics = []
 
     def add_metric(self, metric):
+        """
+        add one metric instance to CompositeMetric.
+
+        Args:
+            metric: a instance of MetricBase.
+        """
         if not isinstance(metric, MetricBase):
             raise ValueError("SubMetric should be inherit from MetricBase.")
         self._metrics.append(metric)
 
+    def update(self, preds, labels):
+        """
+        Update every metrics in sequence.
+
+        Args:
+            preds(numpy.array): the predictions of current minibatch
+            labels(numpy.array): the labels of current minibatch, if the label is one-hot
+                               or soft-label, should custom the corresponding update rule.
+        """
+        for m in self._metrics:
+            ans.append(m.update(preds, labels))
+
     def eval(self):
+        """
+        Evaluate every metrics in sequence.
+
+        Returns:
+            list(float|numpy.array): a list of metrics value in Python.
+        """
         ans = []
         for m in self._metrics:
             ans.append(m.eval())
         return ans
 
 
+class Precision(MetricBase):
+    """
+    Precision (also called positive predictive value) is the fraction of
+    relevant instances among the retrieved instances.
+    https://en.wikipedia.org/wiki/Evaluation_of_binary_classifiers
+
+    Note Precision is different with Accuracy in binary classifiers.
+    accuracy = true positive / total instances
+    precision = true positive / all positive instance
+
+    Examples:
+        .. code-block:: python
+
+        metric = fluid.metrics.Precision()
+        for pass in range(PASSES):
+            metric.reset()
+            for data in train_reader():
+                loss, preds, labels = exe.run(fetch_list=[cost, preds, labels])
+            metric.update(preds=preds, labels=labels)
+            numpy_precision = metric.eval()
+    """
+
+    def __init__(self, name=None):
+        super(Precision, self).__init__(name)
+        self.tp = 0  # true positive
+        self.fp = 0  # false positive
+
+    def update(self, preds, labels):
+        if not _is_numpy_(preds):
+            raise ValueError("The 'preds' must be a numpy ndarray.")
+        if not _is_numpy_(labels):
+            raise ValueError("The 'labels' must be a numpy ndarray.")
+        sample_num = labels[0]
+        for i in range(sample_num):
+            pred = preds[i].astype("int32")
+            label = labels[i]
+            if label == 1:
+                if pred == label:
+                    self.tp += 1
+                else:
+                    self.fp += 1
+
+    def eval(self):
+        ap = self.tp + self.fp
+        return float(self.tp) / ap if ap != 0 else .0
+
+
+class Recall(MetricBase):
+    """
+    Recall (also known as sensitivity) is the fraction of
+    relevant instances that have been retrieved over the
+    total amount of relevant instances
+
+    https://en.wikipedia.org/wiki/Precision_and_recall
+
+    Examples:
+        .. code-block:: python
+
+        metric = fluid.metrics.Recall()
+        for pass in range(PASSES):
+            metric.reset()
+            for data in train_reader():
+                loss, preds, labels = exe.run(fetch_list=[cost, preds, labels])
+            metric.update(preds=preds, labels=labels)
+            numpy_recall = metric.eval()
+    """
+
+    def __init__(self, name=None):
+        super(Recall, self).__init__(name)
+        self.tp = 0  # true positive
+        self.fn = 0  # false negtive
+
+    def update(self, preds, labels):
+        if not _is_numpy_(preds):
+            raise ValueError("The 'preds' must be a numpy ndarray.")
+        if not _is_numpy_(labels):
+            raise ValueError("The 'labels' must be a numpy ndarray.")
+        sample_num = labels[0]
+        for i in range(sample_num):
+            pred = preds[i].astype("int32")
+            label = labels[i]
+            if label == 1:
+                if pred == label:
+                    self.tp += 1
+            else:
+                if pred != label:
+                    self.fn += 1
+
+    def eval(self):
+        recall = self.tp + self.fn
+        return float(self.tp) / recall if recall != 0 else .0
+
+
 class Accuracy(MetricBase):
     """
     Accumulate the accuracy from minibatches and compute the average accuracy
     for every pass.
+    https://en.wikipedia.org/wiki/Accuracy_and_precision
 
     Args:
        name: the metrics name
 
-    Example:
-        minibatch_accuracy = fluid.layers.accuracy(pred, label)
-        accuracy_evaluator = fluid.metrics.Accuracy()
-        for epoch in PASS_NUM:
-            accuracy_evaluator.reset()
-            for data in batches:
-                loss = exe.run(fetch_list=[cost, minibatch_accuracy])
-            accuracy_evaluator.update(value=minibatch_accuracy, weight=batches)
-            accuracy = accuracy_evaluator.eval()
+    Examples:
+        .. code-block:: python
+
+            labels = fluid.layers.data(name="data", shape=[1], dtype="int32")
+            data = fluid.layers.data(name="data", shape=[32, 32], dtype="int32")
+            pred = fluid.layers.fc(input=data, size=1000, act="tanh")
+            minibatch_accuracy = fluid.layers.accuracy(pred, label)
+            accuracy_evaluator = fluid.metrics.Accuracy()
+            for pass in range(PASSES):
+                accuracy_evaluator.reset()
+                for data in train_reader():
+                    batch_size = data[0]
+                    loss = exe.run(fetch_list=[cost, minibatch_accuracy])
+                accuracy_evaluator.update(value=minibatch_accuracy, weight=batch_size)
+                numpy_acc = accuracy_evaluator.eval()
     """
 
     def __init__(self, name=None):
@@ -153,6 +326,13 @@ class Accuracy(MetricBase):
         self.weight = .0
 
     def update(self, value, weight):
+        """
+        Update minibatch states.
+
+        Args:
+            value(float|numpy.array): accuracy of one minibatch.
+            weight(int|float): batch size.
+        """
         if not _is_number_or_matrix_(value):
             raise ValueError(
                 "The 'value' must be a number(int, float) or a numpy ndarray.")
@@ -163,9 +343,8 @@ class Accuracy(MetricBase):
 
     def eval(self):
         if self.weight == 0:
-            raise ValueError(
-                "There is no data in Accuracy Metrics. Please check layers.accuracy output has added to Accuracy."
-            )
+            raise ValueError("There is no data in Accuracy Metrics. \
+                Please check layers.accuracy output has added to Accuracy.")
         return self.value / self.weight
 
 
@@ -174,6 +353,25 @@ class ChunkEvaluator(MetricBase):
     Accumulate counter numbers output by chunk_eval from mini-batches and
     compute the precision recall and F1-score using the accumulated counter
     numbers.
+    For some basics of chunking, please refer to
+    'Chunking with Support Vector Machines <https://aclanthology.info/pdf/N/N01/N01-1025.pdf>'.
+    ChunkEvalEvaluator computes the precision, recall, and F1-score of chunk detection,
+    and supports IOB, IOE, IOBES and IO (also known as plain) tagging schemes.
+
+    Examples:
+        .. code-block:: python
+
+            labels = fluid.layers.data(name="data", shape=[1], dtype="int32")
+            data = fluid.layers.data(name="data", shape=[32, 32], dtype="int32")
+            pred = fluid.layers.fc(input=data, size=1000, act="tanh")
+            precision, recall, f1_score, num_infer_chunks, num_label_chunks, num_correct_chunks = layers.chunk_eval(
+                input=pred,
+                label=label)
+            metric = fluid.metrics.ChunkEvaluator()
+            for data in train_reader():
+                loss, preds, labels = exe.run(fetch_list=[cost, preds, labels])
+                metric.update(num_infer_chunks, num_label_chunks, num_correct_chunks)
+                numpy_precision, numpy_recall, numpy_f1 = metric.eval()
     """
 
     def __init__(self, name=None):
@@ -183,9 +381,17 @@ class ChunkEvaluator(MetricBase):
         self.num_correct_chunks = 0
 
     def update(self, num_infer_chunks, num_label_chunks, num_correct_chunks):
+        """
+        Update the states based on the layers.chunk_eval() ouputs.
+        Args:
+            num_infer_chunks(int|numpy.array): The number of chunks in Inference on the given minibatch.
+            num_label_chunks(int|numpy.array): The number of chunks in Label on the given mini-batch.
+            num_correct_chunks(int|float|numpy.array): The number of chunks both in Inference and Label on the
+                                                  given mini-batch.
+        """
         if not _is_number_or_matrix_(num_infer_chunks):
             raise ValueError(
-                "The 'num_infer_chunks' must be a number(int, float) or a numpy ndarray."
+                "The 'num_infer_chunks' must be a number(int) or a numpy ndarray."
             )
         if not _is_number_or_matrix_(num_label_chunks):
             raise ValueError(
@@ -212,21 +418,28 @@ class ChunkEvaluator(MetricBase):
 
 class EditDistance(MetricBase):
     """
+    Edit distance is a way of quantifying how dissimilar two strings
+    (e.g., words) are to one another by counting the minimum number
+    of operations required to transform one string into the other.
+    Refer to https://en.wikipedia.org/wiki/Edit_distance
+
     Accumulate edit distance sum and sequence number from mini-batches and
     compute the average edit_distance and instance error of all batches.
 
     Args:
         name: the metrics name
 
-    Example:
-        edit_distance_metrics = fluid.layers.edit_distance(input, label)
-        distance_evaluator = fluid.metrics.EditDistance()
-        for epoch in PASS_NUM:
-            distance_evaluator.reset()
-            for data in batches:
-                loss = exe.run(fetch_list=[cost] + list(edit_distance_metrics))
-            distance_evaluator.update(*edit_distance_metrics)
-            distance, instance_error = distance_evaluator.eval()
+    Examples:
+        .. code-block:: python
+
+            distances, seq_num = fluid.layers.edit_distance(input, label)
+            distance_evaluator = fluid.metrics.EditDistance()
+            for epoch in PASS_NUM:
+                distance_evaluator.reset()
+                for data in batches:
+                    loss = exe.run(fetch_list=[cost] + list(edit_distance_metrics))
+                distance_evaluator.update(distances, seq_num)
+                distance, instance_error = distance_evaluator.eval()
 
         In the above example:
         'distance' is the average of the edit distance in a pass.
@@ -264,16 +477,38 @@ class EditDistance(MetricBase):
 class DetectionMAP(MetricBase):
     """
     Calculate the detection mean average precision (mAP).
-
-    TODO (Dang Qingqing): update the following doc.
-    The general steps are as follows:
-    1. calculate the true positive and false positive according to the input
-        of detection and labels.
-    2. calculate mAP value, support two versions: '11 point' and 'integral'.
-
+    mAP is the metric to measure the accuracy of object detectors
+    like Faster R-CNN, SSD, etc.
+    It is the average of the maximum precisions at different recall values.
     Please get more information from the following articles:
       https://sanchom.wordpress.com/tag/average-precision/
+
       https://arxiv.org/abs/1512.02325
+
+    The general steps are as follows:
+
+        1. calculate the true positive and false positive according to the input
+            of detection and labels.
+        2. calculate mAP value, support two versions: '11 point' and 'integral'.
+
+    Examples:
+        .. code-block:: python
+
+            pred = fluid.layers.fc(input=data, size=1000, act="tanh")
+            batch_map = layers.detection_map(
+                input,
+                label,
+                class_num,
+                background_label,
+                overlap_threshold=overlap_threshold,
+                evaluate_difficult=evaluate_difficult,
+                ap_version=ap_version)
+            metric = fluid.metrics.DetectionMAP()
+            for data in train_reader():
+                loss, preds, labels = exe.run(fetch_list=[cost, batch_map])
+                batch_size = data[0]
+                metric.update(value=batch_map, weight=batch_size)
+                numpy_map = metric.eval()
     """
 
     def __init__(self, name=None):
@@ -302,17 +537,18 @@ class DetectionMAP(MetricBase):
 
 class Auc(MetricBase):
     """
-    Auc Metrics which adapts to binary classification.
-    Need to note that auc metrics compute the value via Python natively.
+    Auc metric adapts to the binary classification.
+    Refer to https://en.wikipedia.org/wiki/Receiver_operating_characteristic#Area_under_the_curve
+    Need to note that auc metric compute the value via Python natively.
     If you concern the speed, please use the fluid.layers.auc instead.
 
     The `auc` function creates four local variables, `true_positives`,
-      `true_negatives`, `false_positives` and `false_negatives` that are used to
-      compute the AUC. To discretize the AUC curve, a linearly spaced set of
-      thresholds is used to compute pairs of recall and precision values. The area
-      under the ROC-curve is therefore computed using the height of the recall
-      values by the false positive rate, while the area under the PR-curve is the
-      computed using the height of the precision values by the recall.
+    `true_negatives`, `false_positives` and `false_negatives` that are used to
+    compute the AUC. To discretize the AUC curve, a linearly spaced set of
+    thresholds is used to compute pairs of recall and precision values. The area
+    under the ROC-curve is therefore computed using the height of the recall
+    values by the false positive rate, while the area under the PR-curve is the
+    computed using the height of the precision values by the recall.
 
     Args:
         name: metric name
@@ -322,6 +558,16 @@ class Auc(MetricBase):
             curve.
 
     "NOTE: only implement the ROC curve type via Python now."
+
+    Examples:
+        .. code-block:: python
+
+            pred = fluid.layers.fc(input=data, size=1000, act="tanh")
+            metric = fluid.metrics.Auc()
+            for data in train_reader():
+                loss, preds, labels = exe.run(fetch_list=[cost, preds, labels])
+                metric.update(preds, labels)
+                numpy_auc = metric.eval()
     """
 
     def __init__(self, name, curve='ROC', num_thresholds=200):
@@ -334,10 +580,10 @@ class Auc(MetricBase):
         self.tn_list = np.zeros((num_thresholds, ))
         self.fp_list = np.zeros((num_thresholds, ))
 
-    def update(self, labels, predictions, axis=1):
+    def update(self, preds, labels):
         if not _is_numpy_(labels):
             raise ValueError("The 'labels' must be a numpy ndarray.")
-        if not _is_numpy_(predictions):
+        if not _is_numpy_(preds):
             raise ValueError("The 'predictions' must be a numpy ndarray.")
 
         kepsilon = 1e-7  # to account for floating point imprecisions