Override getValueImpl and revise document

f3eb9cb3 · yangyaming · a74060d4 · f3eb9cb3 · f3eb9cb3
Showing with 56 addition and 37 deletion

paddle/gserver/evaluators/ChunkEvaluator.cpp paddle/gserver/evaluators/ChunkEvaluator.cpp +15 -21

python/paddle/trainer_config_helpers/evaluators.py python/paddle/trainer_config_helpers/evaluators.py +41 -16

未找到文件。
--- a/paddle/gserver/evaluators/ChunkEvaluator.cpp
+++ b/paddle/gserver/evaluators/ChunkEvaluator.cpp
@@ -75,6 +75,7 @@ class ChunkEvaluator : public Evaluator {
  std::vector<Segment> labelSegments_;
  std::vector<Segment> outputSegments_;
  std::set<int> excludedChunkTypes_;
+  mutable std::unordered_map<std::string, real> values_;

 public:
  virtual void init(const EvaluatorConfig& config) {
@@ -243,23 +244,22 @@ public:
    return false;
  }

-public:
  // three metrics: precision, recall and F1-score
  void getNames(std::vector<std::string>* names) {
-    this->storeLocalValues();
-    names->reserve(this->values_.size());
-    for (auto it = this->values_.begin(); it != this->values_.end(); ++it) {
-      names->push_back(this->config_.name() + "." + it->first);
+    storeLocalValues();
+    names->reserve(names->size() + values_.size());
+    for (auto it = values_.begin(); it != values_.end(); ++it) {
+      names->push_back(config_.name() + "." + it->first);
    }
  }

  // get value by field name
  real getValue(const std::string& name, Error* err) const {
-    this->storeLocalValues();
+    storeLocalValues();
    std::vector<std::string> buffers;
    paddle::str::split(name, '.', &buffers);
-    auto it = this->values_.find(buffers[buffers.size() - 1]);
-    if (it == this->values_.end()) {  // not found
+    auto it = values_.find(buffers.back());
+    if (it == values_.end()) {  // not found
      *err = Error("No such key %s", name.c_str());
      return 0.0f;
    }
@@ -268,27 +268,21 @@ public:
  }

  // get type of evaluator
-  std::string getType(const std::string& name, Error* err) const {
-    this->getValue(name, err);
-    if (!err->isOK()) {
-      return std::string();
-    }
-    return "chunk";
-  }
+  std::string getTypeImpl() const { return "chunk"; }

 private:
  void storeLocalValues() const {
-    CHECK_GT(numOutputSegments_, 0);
-    CHECK_GT(numLabelSegments_, 0);
-    double precision = (double)numCorrect_ / numOutputSegments_;
-    double recall = (double)numCorrect_ / numLabelSegments_;
+    CHECK_GE(numOutputSegments_, 0);
+    CHECK_GE(numLabelSegments_, 0);
+    double precision =
+        !numOutputSegments_ ? 0 : (double)numCorrect_ / numOutputSegments_;
+    double recall =
+        !numLabelSegments_ ? 0 : (double)numCorrect_ / numLabelSegments_;
    values_["precision"] = precision;
    values_["recall"] = recall;
    values_["F1-score"] =
        !numCorrect_ ? 0 : 2 * precision * recall / (precision + recall);
  }
-
-  mutable std::unordered_map<std::string, real> values_;
 };

 REGISTER_EVALUATOR(chunk, ChunkEvaluator);

--- a/python/paddle/trainer_config_helpers/evaluators.py
+++ b/python/paddle/trainer_config_helpers/evaluators.py
@@ -347,27 +347,34 @@ def chunk_evaluator(
        excluded_chunk_types=None, ):
    """
    Chunk evaluator is used to evaluate segment labelling accuracy for a
-    sequence. It calculates precision, recall and F1 score of the chunk detection.
+    sequence. It calculates precision, recall and F1 scores for the chunk detection.

-    To use chunk evaluator, the construction of label dict should obey the following rules:
+    To use chunk evaluator, several concepts need to be clarified firstly.
+    Chunk type is the type of the whole chunk and a chunk consists of one or several words.  (For example in NER, ORG for organization name, PER for person name etc.)
+    Tag indicates the position of a word in a chunk. (B for begin, I for inside, E for end, S for single)
+    We can name a label by combining tag type and chunk type. (ie. B-ORG for begining of an organization name)
+
+    The construction of label dict should obey the following rules:
    (1) Use one of the listed labelling schemes. These schemes differ in ways indicating chunk boundry.

    .. code-block:: python
-     Scheme Begin Inside End   Single
-      plain  0     -      -     -
-      IOB    0     1      -     -
-      IOE    -     0      1     -
-      IOBES  0     1      2     3
+     Scheme    Description                                                                                  
+      plain    Use the same label for the whole chunk.
+      IOB      Two labels for chunk type X, B-X for chunk begining and I-X for chunk inside. 
+      IOE      Two labels for chunk type X, E-X for chunk ending and I-X for chunk inside.
+      IOBES    Four labels for chunk type X, B-X for chunk begining, I-X for chunk inside, E-X for chunk end and S-X for single word chunk. 
    .. code-block:: python
   
-    To make it clear, let's illustrate by a NER example.
-    Assuming that there are two named entity types including ORG and PER which are called 'chunk type' here,
-    if 'IOB' scheme were used, the label set will be extended to a set including B-ORG, I-ORG, B-PER, I-PER and O,
-    in which B-ORG for begining of ORG and I-ORG for end of ORG.
+    To make it clear, let's illustrate by an NER example.
+    Assuming that there are three named entity types including ORG, PER and LOC which are called 'chunk type' here,
+    if 'IOB' scheme were used, the label set will be extended to a set including B-ORG, I-ORG, B-PER, I-PER, B-LOC, I-LOC and O,
+    in which B-ORG for begining of ORG and I-ORG for inside of ORG.
    Prefixes which are called 'tag type' here are added to chunk types and there are two tag types including B and I.
    Of course, the training data should be labeled accordingly.

-    (2) Map can be done correctly by the listed equations.
+    (2) Mapping is done correctly by the listed equations and assigning protocol.
+
+    The following table are equations to extract tag type and chunk type from a label.

    .. code-block:: python
    tagType = label % numTagType
@@ -375,17 +382,33 @@ def chunk_evaluator(
    otherChunkType = numChunkTypes
    .. code-block:: python
    
-    Continue the NER example, and the label dict should like this to satify above equations:
+    The following table shows the mapping rule between tagType and tag type in each scheme.
+
+    .. code-block:: python
+     Scheme Begin Inside End   Single
+      plain  0     -      -     -
+      IOB    0     1      -     -
+      IOE    -     0      1     -
+      IOBES  0     1      2     3
+    .. code-block:: python
+
+    Continue the NER example, and the label dict should look like this to satify above equations:

    .. code-block:: python
      B-ORG  0
      I-ORG  1
      B-PER  2
      I-PER  3
-      O      4
+      B-LOC  4
+      I-LOC  5
+      O      6
    .. code-block:: python

-    Realizing that the number of is chunk type is 2 and number of tag type is 2, it is easy to validate this.
+    In this example, chunkType has three values: 0 for ORG, 1 for PER, 2 for LOC, because the scheme is
+    "IOB" so tagType has two values: 0 for B and 1 for I. 
+    Here we will use I-LOC to explain the above mapping rules in detail.
+    For I-LOC, the label id is 5, so we can get tagType=1 and ChunkType=2, which means I-LOC is a part of NER chunk LOC
+    and the tag is I.

    The simple usage is:

@@ -393,6 +416,8 @@ def chunk_evaluator(

       eval = chunk_evaluator(input, label, chunk_scheme, num_chunk_types)

+    .. code-block:: python
+    
    :param input: The input layers.
    :type input: LayerOutput
    :param label: An input layer containing the ground truth label.