evaluator.py 14.7 KB
Newer Older
1
#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
D
dzhwinter 已提交
2
#
D
dzhwinter 已提交
3 4 5
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
D
dzhwinter 已提交
6
#
D
dzhwinter 已提交
7
#     http://www.apache.org/licenses/LICENSE-2.0
D
dzhwinter 已提交
8
#
D
dzhwinter 已提交
9 10 11 12 13 14
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

D
dzhwinter 已提交
15
import warnings
D
Dong Zhihong 已提交
16
import numpy as np
武毅 已提交
17

18
import layers
Y
Yu Yang 已提交
19 20
from framework import Program, Variable, program_guard
import unique_name
21
from layer_helper import LayerHelper
22
from initializer import Constant
武毅 已提交
23

24 25
__all__ = [
    'ChunkEvaluator',
26
    'EditDistance',
27
    'DetectionMAP',
28
]
Y
Yu Yang 已提交
29 30 31


def _clone_var_(block, var):
D
Dong Zhihong 已提交
32 33 34 35
    assert isinstance(var, Variable)
    return block.create_var(
        name=var.name,
        shape=var.shape,
F
fengjiayi 已提交
36
        dtype=var.dtype,
D
Dong Zhihong 已提交
37 38 39 40 41
        type=var.type,
        lod_level=var.lod_level,
        persistable=True)


D
Dong Zhihong 已提交
42 43
class Evaluator(object):
    """
Y
Yu Yang 已提交
44
    Base Class for all evaluators
45

Y
Yu Yang 已提交
46
    Args:
47
        name(str): The name of evaluator. such as, "accuracy". Used for generate
Y
Yu Yang 已提交
48
            temporary variable name.
49
        main_program(Program, optional): The evaluator should be added to this
Y
Yu Yang 已提交
50
            main_program. Default default_main_program()
51
        startup_program(Program, optional):The parameter should be added to this
Y
Yu Yang 已提交
52
            startup_program. Default default_startup_program()
53

Y
Yu Yang 已提交
54
    Attributes:
55
        states(list): The list of state variables. states will be reset to zero
Y
Yu Yang 已提交
56
            when `reset` is invoked.
57
        metrics(list): The list of metrics variables. They will be calculate
Y
Yu Yang 已提交
58
            every mini-batch
D
Dong Zhihong 已提交
59
    """
武毅 已提交
60

D
Dong Zhihong 已提交
61
    def __init__(self, name, **kwargs):
D
dzhwinter 已提交
62 63 64
        warnings.warn(
            "The %s is deprecated, because maintain a modified program inside evaluator cause bug easily, please use fluid.metrics.%s instead."
            % (self.__class__.__name__, self.__class__.__name__), Warning)
Y
Yu Yang 已提交
65 66 67 68 69
        self.states = []
        self.metrics = []
        self.helper = LayerHelper(name, **kwargs)

    def reset(self, executor, reset_program=None):
D
Dong Zhihong 已提交
70
        """
Y
Yu Yang 已提交
71
        reset metric states at the begin of each pass/user specified batch
D
Dong Zhihong 已提交
72
        """
Y
Yu Yang 已提交
73 74 75
        if reset_program is None:
            reset_program = Program()

76 77 78 79 80 81
        with program_guard(main_program=reset_program):
            for var in self.states:
                assert isinstance(var, Variable)
                g_var = _clone_var_(reset_program.current_block(), var)
                layers.fill_constant(
                    shape=g_var.shape, value=0.0, dtype=g_var.dtype, out=g_var)
D
Dong Zhihong 已提交
82

Y
Yu Yang 已提交
83
        executor.run(reset_program)
84

Y
Yu Yang 已提交
85
    def eval(self, executor, eval_program=None):
D
Dong Zhihong 已提交
86
        """
Y
Yu Yang 已提交
87
        Evaluate the statistics merged by multiple mini-batches.
D
Dong Zhihong 已提交
88 89
        """
        raise NotImplementedError()
D
Dong Zhihong 已提交
90

Y
Yu Yang 已提交
91
    def create_state(self, suffix, dtype, shape):
武毅 已提交
92
        """
93 94
        Create state variable.

Y
Yu Yang 已提交
95
        NOTE: It is not a public API.
96

Y
Yu Yang 已提交
97
        Args:
98
            suffix(str): the state suffix.
99
            dtype(str|core.VarDesc.VarType): the state data type
100
            shape(tuple|list): the shape of state
Y
Yu Yang 已提交
101 102

        Returns: State variable
武毅 已提交
103

D
Dong Zhihong 已提交
104
        """
Y
Yu Yang 已提交
105
        state = self.helper.create_variable(
Y
Yu Yang 已提交
106
            name="_".join([unique_name.generate(self.helper.name), suffix]),
Y
Yu Yang 已提交
107 108 109 110 111
            persistable=True,
            dtype=dtype,
            shape=shape)
        self.states.append(state)
        return state
D
Dong Zhihong 已提交
112

D
Dong Zhihong 已提交
113

G
guosheng 已提交
114 115
class ChunkEvaluator(Evaluator):
    """
116 117
    Accumulate counter numbers output by chunk_eval from mini-batches and
    compute the precision recall and F1-score using the accumulated counter
G
guosheng 已提交
118 119 120
    numbers.
    """

121 122 123 124 125 126 127 128
    def __init__(
            self,
            input,
            label,
            chunk_scheme,
            num_chunk_types,
            excluded_chunk_types=None, ):
        super(ChunkEvaluator, self).__init__("chunk_eval")
G
guosheng 已提交
129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
        main_program = self.helper.main_program
        if main_program.current_block().idx != 0:
            raise ValueError("You can only invoke Evaluator in root block")

        self.num_infer_chunks = self.create_state(
            dtype='int64', shape=[1], suffix='num_infer_chunks')
        self.num_label_chunks = self.create_state(
            dtype='int64', shape=[1], suffix='num_label_chunks')
        self.num_correct_chunks = self.create_state(
            dtype='int64', shape=[1], suffix='num_correct_chunks')
        precision, recall, f1_score, num_infer_chunks, num_label_chunks, num_correct_chunks = layers.chunk_eval(
            input=input,
            label=label,
            chunk_scheme=chunk_scheme,
            num_chunk_types=num_chunk_types,
144
            excluded_chunk_types=excluded_chunk_types, )
G
guosheng 已提交
145 146
        layers.sums(
            input=[self.num_infer_chunks, num_infer_chunks],
147
            out=self.num_infer_chunks)
G
guosheng 已提交
148 149
        layers.sums(
            input=[self.num_label_chunks, num_label_chunks],
150
            out=self.num_label_chunks)
G
guosheng 已提交
151 152
        layers.sums(
            input=[self.num_correct_chunks, num_correct_chunks],
153
            out=self.num_correct_chunks)
G
guosheng 已提交
154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176

        self.metrics.extend([precision, recall, f1_score])

    def eval(self, executor, eval_program=None):
        if eval_program is None:
            eval_program = Program()
        block = eval_program.current_block()
        num_infer_chunks, num_label_chunks, num_correct_chunks = executor.run(
            eval_program,
            fetch_list=[_clone_var_(block, state) for state in self.states])
        num_infer_chunks = num_infer_chunks[0]
        num_label_chunks = num_label_chunks[0]
        num_correct_chunks = num_correct_chunks[0]
        precision = float(
            num_correct_chunks) / num_infer_chunks if num_infer_chunks else 0
        recall = float(
            num_correct_chunks) / num_label_chunks if num_label_chunks else 0
        f1_score = float(2 * precision * recall) / (
            precision + recall) if num_correct_chunks else 0
        return np.array(
            [precision], dtype='float32'), np.array(
                [recall], dtype='float32'), np.array(
                    [f1_score], dtype='float32')
177 178 179 180


class EditDistance(Evaluator):
    """
W
wanghaoshuang 已提交
181
    Accumulate edit distance sum and sequence number from mini-batches and
182
    compute the average edit_distance and instance error of all batches.
W
wanghaoshuang 已提交
183 184

    Args:
W
wanghaoshuang 已提交
185
        input: the sequences predicted by network.
W
wanghaoshuang 已提交
186 187 188 189 190 191 192 193 194 195 196 197
        label: the target sequences which must has same sequence count
        with input.
        ignored_tokens(list of int): Tokens that should be removed before
        calculating edit distance.

    Example:

        exe = fluid.executor(place)
        distance_evaluator = fluid.Evaluator.EditDistance(input, label)
        for epoch in PASS_NUM:
            distance_evaluator.reset(exe)
            for data in batches:
W
wanghaoshuang 已提交
198
                loss = exe.run(fetch_list=[cost])
199
            distance, instance_error = distance_evaluator.eval(exe)
W
wanghaoshuang 已提交
200 201

        In the above example:
202
        'distance' is the average of the edit distance in a pass.
203
        'instance_error' is the instance error rate in a pass.
W
wanghaoshuang 已提交
204

205 206
    """

W
wanghaoshuang 已提交
207
    def __init__(self, input, label, ignored_tokens=None, **kwargs):
208 209 210 211 212
        super(EditDistance, self).__init__("edit_distance", **kwargs)
        main_program = self.helper.main_program
        if main_program.current_block().idx != 0:
            raise ValueError("You can only invoke Evaluator in root block")

213 214
        self.total_distance = self.create_state(
            dtype='float32', shape=[1], suffix='total_distance')
215
        self.seq_num = self.create_state(
W
wanghaoshuang 已提交
216
            dtype='int64', shape=[1], suffix='seq_num')
217 218
        self.instance_error = self.create_state(
            dtype='int64', shape=[1], suffix='instance_error')
219
        distances, seq_num = layers.edit_distance(
W
wanghaoshuang 已提交
220
            input=input, label=label, ignored_tokens=ignored_tokens)
221 222 223 224 225

        zero = layers.fill_constant(shape=[1], value=0.0, dtype='float32')
        compare_result = layers.equal(distances, zero)
        compare_result_int = layers.cast(x=compare_result, dtype='int')
        seq_right_count = layers.reduce_sum(compare_result_int)
226 227
        instance_error_count = layers.elementwise_sub(
            x=seq_num, y=seq_right_count)
228 229 230 231
        total_distance = layers.reduce_sum(distances)
        layers.sums(
            input=[self.total_distance, total_distance],
            out=self.total_distance)
232
        layers.sums(input=[self.seq_num, seq_num], out=self.seq_num)
233 234 235
        layers.sums(
            input=[self.instance_error, instance_error_count],
            out=self.instance_error)
236
        self.metrics.append(total_distance)
237
        self.metrics.append(instance_error_count)
238 239 240 241 242 243

    def eval(self, executor, eval_program=None):
        if eval_program is None:
            eval_program = Program()
        block = eval_program.current_block()
        with program_guard(main_program=eval_program):
244
            total_distance = _clone_var_(block, self.total_distance)
245
            seq_num = _clone_var_(block, self.seq_num)
246
            instance_error = _clone_var_(block, self.instance_error)
247
            seq_num = layers.cast(x=seq_num, dtype='float32')
248
            instance_error = layers.cast(x=instance_error, dtype='float32')
249
            avg_distance = layers.elementwise_div(x=total_distance, y=seq_num)
250 251 252 253
            avg_instance_error = layers.elementwise_div(
                x=instance_error, y=seq_num)
            result = executor.run(
                eval_program, fetch_list=[avg_distance, avg_instance_error])
254
        return np.array(result[0]), np.array(result[1])
255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274


class DetectionMAP(Evaluator):
    """
    Calculate the detection mean average precision (mAP).

    TODO (Dang Qingqing): update the following doc.
    The general steps are as follows:
    1. calculate the true positive and false positive according to the input
        of detection and labels.
    2. calculate mAP value, support two versions: '11 point' and 'integral'.

    Please get more information from the following articles:
      https://sanchom.wordpress.com/tag/average-precision/
      https://arxiv.org/abs/1512.02325

    Args:
        input (Variable): The detection results, which is a LoDTensor with shape
            [M, 6]. The layout is [label, confidence, xmin, ymin, xmax, ymax].
        gt_label (Variable): The ground truth label index, which is a LoDTensor
275
            with shape [N, 1].
276 277
        gt_box (Variable): The ground truth bounding box (bbox), which is a
            LoDTensor with shape [N, 6]. The layout is [xmin, ymin, xmax, ymax].
278 279 280
        gt_difficult (Variable|None): Whether this ground truth is a difficult
            bounding bbox, which can be a LoDTensor [N, 1] or not set. If None,
            it means all the ground truth labels are not difficult bbox.
281 282 283 284
        class_num (int): The class number.
        background_label (int): The index of background label, the background
            label will be ignored. If set to -1, then all categories will be
            considered, 0 by defalut.
285 286 287
        overlap_threshold (float): The threshold for deciding true/false
            positive, 0.5 by defalut.
        evaluate_difficult (bool): Whether to consider difficult ground truth
288 289
            for evaluation, True by defalut. This argument does not work when
            gt_difficult is None.
290 291 292 293 294 295 296 297 298 299
        ap_version (string): The average precision calculation ways, it must be
            'integral' or '11point'. Please check
            https://sanchom.wordpress.com/tag/average-precision/ for details.
            - 11point: the 11-point interpolated average precision.
            - integral: the natural integral of the precision-recall curve.

    Example:

        exe = fluid.executor(place)
        map_evaluator = fluid.Evaluator.DetectionMAP(input,
300
            gt_label, gt_box, gt_difficult)
301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317
        cur_map, accum_map = map_evaluator.get_map_var()
        fetch = [cost, cur_map, accum_map]
        for epoch in PASS_NUM:
            map_evaluator.reset(exe)
            for data in batches:
                loss, cur_map_v, accum_map_v = exe.run(fetch_list=fetch)

        In the above example:

        'cur_map_v' is the mAP of current mini-batch.
        'accum_map_v' is the accumulative mAP of one pass.
    """

    def __init__(self,
                 input,
                 gt_label,
                 gt_box,
318 319
                 gt_difficult=None,
                 class_num=None,
320
                 background_label=0,
321 322 323 324 325 326
                 overlap_threshold=0.5,
                 evaluate_difficult=True,
                 ap_version='integral'):
        super(DetectionMAP, self).__init__("map_eval")

        gt_label = layers.cast(x=gt_label, dtype=gt_box.dtype)
327 328 329 330 331
        if gt_difficult:
            gt_difficult = layers.cast(x=gt_difficult, dtype=gt_box.dtype)
            label = layers.concat([gt_label, gt_difficult, gt_box], axis=1)
        else:
            label = layers.concat([gt_label, gt_box], axis=1)
332 333 334 335 336

        # calculate mean average precision (mAP) of current mini-batch
        map = layers.detection_map(
            input,
            label,
337 338
            class_num,
            background_label,
339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357
            overlap_threshold=overlap_threshold,
            evaluate_difficult=evaluate_difficult,
            ap_version=ap_version)

        self.create_state(dtype='int32', shape=None, suffix='accum_pos_count')
        self.create_state(dtype='float32', shape=None, suffix='accum_true_pos')
        self.create_state(dtype='float32', shape=None, suffix='accum_false_pos')

        self.has_state = None
        var = self.helper.create_variable(
            persistable=True, dtype='int32', shape=[1])
        self.helper.set_variable_initializer(
            var, initializer=Constant(value=int(0)))
        self.has_state = var

        # calculate accumulative mAP
        accum_map = layers.detection_map(
            input,
            label,
358 359
            class_num,
            background_label,
360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386
            overlap_threshold=overlap_threshold,
            evaluate_difficult=evaluate_difficult,
            has_state=self.has_state,
            input_states=self.states,
            out_states=self.states,
            ap_version=ap_version)

        layers.fill_constant(
            shape=self.has_state.shape,
            value=1,
            dtype=self.has_state.dtype,
            out=self.has_state)

        self.cur_map = map
        self.accum_map = accum_map

    def get_map_var(self):
        return self.cur_map, self.accum_map

    def reset(self, executor, reset_program=None):
        if reset_program is None:
            reset_program = Program()
        with program_guard(main_program=reset_program):
            var = _clone_var_(reset_program.current_block(), self.has_state)
            layers.fill_constant(
                shape=var.shape, value=0, dtype=var.dtype, out=var)
        executor.run(reset_program)