diff --git a/paddle/gserver/evaluators/ChunkEvaluator.cpp b/paddle/gserver/evaluators/ChunkEvaluator.cpp index 13f02e51fe9e3831103982130bfdaa3255e1d174..1658282f3a5f79b128ce8685e92fd5cf9db2e41a 100644 --- a/paddle/gserver/evaluators/ChunkEvaluator.cpp +++ b/paddle/gserver/evaluators/ChunkEvaluator.cpp @@ -16,6 +16,7 @@ limitations under the License. */ #include #include "paddle/math/Vector.h" +#include "paddle/utils/StringUtil.h" #include "Evaluator.h" @@ -74,6 +75,7 @@ class ChunkEvaluator : public Evaluator { std::vector labelSegments_; std::vector outputSegments_; std::set excludedChunkTypes_; + mutable std::unordered_map values_; public: virtual void init(const EvaluatorConfig& config) { @@ -121,11 +123,9 @@ public: } virtual void printStats(std::ostream& os) const { - double precision = (double)numCorrect_ / numOutputSegments_; - double recall = (double)numCorrect_ / numLabelSegments_; - double f1 = - !numCorrect_ ? 0 : 2 * precision * recall / (precision + recall); - os << config_.name() << "=" << f1 << " true_chunks=" << numLabelSegments_ + storeLocalValues(); + os << config_.name() << "=" << values_["F1-score"] + << " true_chunks=" << numLabelSegments_ << " result_chunks=" << numOutputSegments_ << " correct_chunks=" << numCorrect_; } @@ -243,6 +243,46 @@ public: if (tag == tagSingle_) return true; return false; } + + // three metrics: precision, recall and F1-score + void getNames(std::vector* names) { + storeLocalValues(); + names->reserve(names->size() + values_.size()); + for (auto it = values_.begin(); it != values_.end(); ++it) { + names->push_back(config_.name() + "." + it->first); + } + } + + // get value by field name + real getValue(const std::string& name, Error* err) const { + storeLocalValues(); + std::vector buffers; + paddle::str::split(name, '.', &buffers); + auto it = values_.find(buffers.back()); + if (it == values_.end()) { // not found + *err = Error("No such key %s", name.c_str()); + return 0.0f; + } + + return it->second; + } + + // get type of evaluator + std::string getTypeImpl() const { return "chunk"; } + +private: + void storeLocalValues() const { + CHECK_GE(numOutputSegments_, 0); + CHECK_GE(numLabelSegments_, 0); + double precision = + !numOutputSegments_ ? 0 : (double)numCorrect_ / numOutputSegments_; + double recall = + !numLabelSegments_ ? 0 : (double)numCorrect_ / numLabelSegments_; + values_["precision"] = precision; + values_["recall"] = recall; + values_["F1-score"] = + !numCorrect_ ? 0 : 2 * precision * recall / (precision + recall); + } }; REGISTER_EVALUATOR(chunk, ChunkEvaluator); diff --git a/python/paddle/trainer_config_helpers/evaluators.py b/python/paddle/trainer_config_helpers/evaluators.py index 567521ee9dbadb7a2502cfb9972ef0940e1e410a..a5234f3e47f6caa4b365de593648e0ee5ad6e4a2 100644 --- a/python/paddle/trainer_config_helpers/evaluators.py +++ b/python/paddle/trainer_config_helpers/evaluators.py @@ -347,32 +347,71 @@ def chunk_evaluator( excluded_chunk_types=None, ): """ Chunk evaluator is used to evaluate segment labelling accuracy for a - sequence. It calculates the chunk detection F1 score. + sequence. It calculates precision, recall and F1 scores for the chunk detection. - A chunk is correctly detected if its beginning, end and type are correct. - Other chunk type is ignored. + To use chunk evaluator, several concepts need to be clarified firstly. - For each label in the label sequence, we have: + * **Chunk type** is the type of the whole chunk and a chunk consists of one or several words. (For example in NER, ORG for organization name, PER for person name etc.) - .. code-block:: python + * **Tag type** indicates the position of a word in a chunk. (B for begin, I for inside, E for end, S for single) + We can name a label by combining tag type and chunk type. (ie. B-ORG for begining of an organization name) - tagType = label % numTagType - chunkType = label / numTagType - otherChunkType = numChunkTypes + The construction of label dictionary should obey the following rules: - The total number of different labels is numTagType*numChunkTypes+1. - We support 4 labelling scheme. - The tag type for each of the scheme is shown as follows: + - Use one of the listed labelling schemes. These schemes differ in ways indicating chunk boundry. - .. code-block:: python + .. code-block:: text + + Scheme Description + plain Use the same label for the whole chunk. + IOB Two labels for chunk type X, B-X for chunk begining and I-X for chunk inside. + IOE Two labels for chunk type X, E-X for chunk ending and I-X for chunk inside. + IOBES Four labels for chunk type X, B-X for chunk begining, I-X for chunk inside, E-X for chunk end and S-X for single word chunk. + + To make it clear, let's illustrate by an NER example. + Assuming that there are three named entity types including ORG, PER and LOC which are called 'chunk type' here, + if 'IOB' scheme were used, the label set will be extended to a set including B-ORG, I-ORG, B-PER, I-PER, B-LOC, I-LOC and O, + in which B-ORG for begining of ORG and I-ORG for inside of ORG. + Prefixes which are called 'tag type' here are added to chunk types and there are two tag types including B and I. + Of course, the training data should be labeled accordingly. + + - Mapping is done correctly by the listed equations and assigning protocol. + + The following table are equations to extract tag type and chunk type from a label. + + .. code-block:: text + + tagType = label % numTagType + chunkType = label / numTagType + otherChunkType = numChunkTypes + + The following table shows the mapping rule between tagType and tag type in each scheme. + + .. code-block:: text + + Scheme Begin Inside End Single + plain 0 - - - + IOB 0 1 - - + IOE - 0 1 - + IOBES 0 1 2 3 + + Continue the NER example, and the label dict should look like this to satify above equations: + + .. code-block:: text - Scheme Begin Inside End Single - plain 0 - - - - IOB 0 1 - - - IOE - 0 1 - - IOBES 0 1 2 3 + B-ORG 0 + I-ORG 1 + B-PER 2 + I-PER 3 + B-LOC 4 + I-LOC 5 + O 6 - 'plain' means the whole chunk must contain exactly the same chunk label. + In this example, chunkType has three values: 0 for ORG, 1 for PER, 2 for LOC, because the scheme is + "IOB" so tagType has two values: 0 for B and 1 for I. + Here we will use I-LOC to explain the above mapping rules in detail. + For I-LOC, the label id is 5, so we can get tagType=1 and chunkType=2, which means I-LOC is a part of NER chunk LOC + and the tag is I. The simple usage is: @@ -380,6 +419,7 @@ def chunk_evaluator( eval = chunk_evaluator(input, label, chunk_scheme, num_chunk_types) + :param input: The input layers. :type input: LayerOutput :param label: An input layer containing the ground truth label.