evaluator.py 10.9 KB
Newer Older
1
#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
D
dzhwinter 已提交
2
#
D
dzhwinter 已提交
3 4 5
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
D
dzhwinter 已提交
6
#
D
dzhwinter 已提交
7
#     http://www.apache.org/licenses/LICENSE-2.0
D
dzhwinter 已提交
8
#
D
dzhwinter 已提交
9 10 11 12 13 14
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

D
Dong Zhihong 已提交
15
import numpy as np
武毅 已提交
16

17
import layers
Y
Yu Yang 已提交
18 19
from framework import Program, Variable, program_guard
import unique_name
20
from layer_helper import LayerHelper
武毅 已提交
21

22 23 24
__all__ = [
    'Accuracy',
    'ChunkEvaluator',
25
    'EditDistance',
26
]
Y
Yu Yang 已提交
27 28 29


def _clone_var_(block, var):
D
Dong Zhihong 已提交
30 31 32 33
    assert isinstance(var, Variable)
    return block.create_var(
        name=var.name,
        shape=var.shape,
F
fengjiayi 已提交
34
        dtype=var.dtype,
D
Dong Zhihong 已提交
35 36 37 38 39
        type=var.type,
        lod_level=var.lod_level,
        persistable=True)


D
Dong Zhihong 已提交
40 41
class Evaluator(object):
    """
Y
Yu Yang 已提交
42
    Base Class for all evaluators
43

Y
Yu Yang 已提交
44
    Args:
45
        name(str): The name of evaluator. such as, "accuracy". Used for generate
Y
Yu Yang 已提交
46
            temporary variable name.
47
        main_program(Program, optional): The evaluator should be added to this
Y
Yu Yang 已提交
48
            main_program. Default default_main_program()
49
        startup_program(Program, optional):The parameter should be added to this
Y
Yu Yang 已提交
50
            startup_program. Default default_startup_program()
51

Y
Yu Yang 已提交
52
    Attributes:
53
        states(list): The list of state variables. states will be reset to zero
Y
Yu Yang 已提交
54
            when `reset` is invoked.
55
        metrics(list): The list of metrics variables. They will be calculate
Y
Yu Yang 已提交
56
            every mini-batch
D
Dong Zhihong 已提交
57
    """
武毅 已提交
58

D
Dong Zhihong 已提交
59
    def __init__(self, name, **kwargs):
Y
Yu Yang 已提交
60 61 62 63 64
        self.states = []
        self.metrics = []
        self.helper = LayerHelper(name, **kwargs)

    def reset(self, executor, reset_program=None):
D
Dong Zhihong 已提交
65
        """
Y
Yu Yang 已提交
66
        reset metric states at the begin of each pass/user specified batch
D
Dong Zhihong 已提交
67
        """
Y
Yu Yang 已提交
68 69 70
        if reset_program is None:
            reset_program = Program()

71 72 73 74 75 76
        with program_guard(main_program=reset_program):
            for var in self.states:
                assert isinstance(var, Variable)
                g_var = _clone_var_(reset_program.current_block(), var)
                layers.fill_constant(
                    shape=g_var.shape, value=0.0, dtype=g_var.dtype, out=g_var)
D
Dong Zhihong 已提交
77

Y
Yu Yang 已提交
78
        executor.run(reset_program)
79

Y
Yu Yang 已提交
80
    def eval(self, executor, eval_program=None):
D
Dong Zhihong 已提交
81
        """
Y
Yu Yang 已提交
82
        Evaluate the statistics merged by multiple mini-batches.
D
Dong Zhihong 已提交
83 84
        """
        raise NotImplementedError()
D
Dong Zhihong 已提交
85

Y
Yu Yang 已提交
86
    def create_state(self, suffix, dtype, shape):
武毅 已提交
87
        """
88 89
        Create state variable.

Y
Yu Yang 已提交
90
        NOTE: It is not a public API.
91

Y
Yu Yang 已提交
92
        Args:
93
            suffix(str): the state suffix.
94
            dtype(str|core.VarDesc.VarType): the state data type
95
            shape(tuple|list): the shape of state
Y
Yu Yang 已提交
96 97

        Returns: State variable
武毅 已提交
98

D
Dong Zhihong 已提交
99
        """
Y
Yu Yang 已提交
100
        state = self.helper.create_variable(
Y
Yu Yang 已提交
101
            name="_".join([unique_name.generate(self.helper.name), suffix]),
Y
Yu Yang 已提交
102 103 104 105 106
            persistable=True,
            dtype=dtype,
            shape=shape)
        self.states.append(state)
        return state
D
Dong Zhihong 已提交
107

D
Dong Zhihong 已提交
108 109

class Accuracy(Evaluator):
D
Dong Zhihong 已提交
110
    """
Y
Yu Yang 已提交
111
    Average Accuracy for multiple mini-batches.
D
Dong Zhihong 已提交
112 113
    """

Y
Yu Yang 已提交
114
    def __init__(self, input, label, k=1, **kwargs):
D
Dong Zhihong 已提交
115
        super(Accuracy, self).__init__("accuracy", **kwargs)
Y
Yu Yang 已提交
116 117 118 119 120 121 122 123 124 125
        main_program = self.helper.main_program
        if main_program.current_block().idx != 0:
            raise ValueError("You can only invoke Evaluator in root block")

        self.total = self.create_state(dtype='int64', shape=[1], suffix='total')
        self.correct = self.create_state(
            dtype='int64', shape=[1], suffix='correct')
        total = self.helper.create_tmp_variable(dtype='int')
        correct = self.helper.create_tmp_variable(dtype='int')
        acc = layers.accuracy(
126 127 128 129 130
            input=input, label=label, k=k, total=total, correct=correct)
        total = layers.cast(x=total, dtype='int64')
        correct = layers.cast(x=correct, dtype='int64')
        layers.sums(input=[self.total, total], out=self.total)
        layers.sums(input=[self.correct, correct], out=self.correct)
Y
Yu Yang 已提交
131 132

        self.metrics.append(acc)
D
Dong Zhihong 已提交
133

D
Dong Zhihong 已提交
134
    def eval(self, executor, eval_program=None):
Y
Yu Yang 已提交
135
        if eval_program is None:
D
Dong Zhihong 已提交
136
            eval_program = Program()
Y
Yu Yang 已提交
137
        block = eval_program.current_block()
138 139 140 141 142 143
        with program_guard(main_program=eval_program):
            total = _clone_var_(block, self.total)
            correct = _clone_var_(block, self.correct)
            total = layers.cast(total, dtype='float32')
            correct = layers.cast(correct, dtype='float32')
            out = layers.elementwise_div(x=correct, y=total)
Y
Yu Yang 已提交
144
        return np.array(executor.run(eval_program, fetch_list=[out])[0])
G
guosheng 已提交
145 146 147 148


class ChunkEvaluator(Evaluator):
    """
149 150
    Accumulate counter numbers output by chunk_eval from mini-batches and
    compute the precision recall and F1-score using the accumulated counter
G
guosheng 已提交
151 152 153
    numbers.
    """

154 155 156 157 158 159 160 161
    def __init__(
            self,
            input,
            label,
            chunk_scheme,
            num_chunk_types,
            excluded_chunk_types=None, ):
        super(ChunkEvaluator, self).__init__("chunk_eval")
G
guosheng 已提交
162 163 164 165 166 167 168 169 170 171 172 173 174 175 176
        main_program = self.helper.main_program
        if main_program.current_block().idx != 0:
            raise ValueError("You can only invoke Evaluator in root block")

        self.num_infer_chunks = self.create_state(
            dtype='int64', shape=[1], suffix='num_infer_chunks')
        self.num_label_chunks = self.create_state(
            dtype='int64', shape=[1], suffix='num_label_chunks')
        self.num_correct_chunks = self.create_state(
            dtype='int64', shape=[1], suffix='num_correct_chunks')
        precision, recall, f1_score, num_infer_chunks, num_label_chunks, num_correct_chunks = layers.chunk_eval(
            input=input,
            label=label,
            chunk_scheme=chunk_scheme,
            num_chunk_types=num_chunk_types,
177
            excluded_chunk_types=excluded_chunk_types, )
G
guosheng 已提交
178 179
        layers.sums(
            input=[self.num_infer_chunks, num_infer_chunks],
180
            out=self.num_infer_chunks)
G
guosheng 已提交
181 182
        layers.sums(
            input=[self.num_label_chunks, num_label_chunks],
183
            out=self.num_label_chunks)
G
guosheng 已提交
184 185
        layers.sums(
            input=[self.num_correct_chunks, num_correct_chunks],
186
            out=self.num_correct_chunks)
G
guosheng 已提交
187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209

        self.metrics.extend([precision, recall, f1_score])

    def eval(self, executor, eval_program=None):
        if eval_program is None:
            eval_program = Program()
        block = eval_program.current_block()
        num_infer_chunks, num_label_chunks, num_correct_chunks = executor.run(
            eval_program,
            fetch_list=[_clone_var_(block, state) for state in self.states])
        num_infer_chunks = num_infer_chunks[0]
        num_label_chunks = num_label_chunks[0]
        num_correct_chunks = num_correct_chunks[0]
        precision = float(
            num_correct_chunks) / num_infer_chunks if num_infer_chunks else 0
        recall = float(
            num_correct_chunks) / num_label_chunks if num_label_chunks else 0
        f1_score = float(2 * precision * recall) / (
            precision + recall) if num_correct_chunks else 0
        return np.array(
            [precision], dtype='float32'), np.array(
                [recall], dtype='float32'), np.array(
                    [f1_score], dtype='float32')
210 211 212 213


class EditDistance(Evaluator):
    """
W
wanghaoshuang 已提交
214
    Accumulate edit distance sum and sequence number from mini-batches and
215
    compute the average edit_distance and instance error of all batches.
W
wanghaoshuang 已提交
216 217

    Args:
W
wanghaoshuang 已提交
218
        input: the sequences predicted by network.
W
wanghaoshuang 已提交
219 220 221 222 223 224 225 226 227 228 229 230
        label: the target sequences which must has same sequence count
        with input.
        ignored_tokens(list of int): Tokens that should be removed before
        calculating edit distance.

    Example:

        exe = fluid.executor(place)
        distance_evaluator = fluid.Evaluator.EditDistance(input, label)
        for epoch in PASS_NUM:
            distance_evaluator.reset(exe)
            for data in batches:
W
wanghaoshuang 已提交
231
                loss = exe.run(fetch_list=[cost])
232
            distance, instance_error = distance_evaluator.eval(exe)
W
wanghaoshuang 已提交
233 234

        In the above example:
W
wanghaoshuang 已提交
235
        'distance' is the average of the edit distance rate in a pass.
236
        'instance_error' is the instance error rate in a pass.
W
wanghaoshuang 已提交
237

238 239
    """

W
wanghaoshuang 已提交
240
    def __init__(self, input, label, ignored_tokens=None, **kwargs):
241 242 243 244 245
        super(EditDistance, self).__init__("edit_distance", **kwargs)
        main_program = self.helper.main_program
        if main_program.current_block().idx != 0:
            raise ValueError("You can only invoke Evaluator in root block")

246 247
        self.total_distance = self.create_state(
            dtype='float32', shape=[1], suffix='total_distance')
248
        self.seq_num = self.create_state(
W
wanghaoshuang 已提交
249
            dtype='int64', shape=[1], suffix='seq_num')
250 251
        self.instance_error = self.create_state(
            dtype='int64', shape=[1], suffix='instance_error')
252
        distances, seq_num = layers.edit_distance(
W
wanghaoshuang 已提交
253
            input=input, label=label, ignored_tokens=ignored_tokens)
254 255 256 257 258

        zero = layers.fill_constant(shape=[1], value=0.0, dtype='float32')
        compare_result = layers.equal(distances, zero)
        compare_result_int = layers.cast(x=compare_result, dtype='int')
        seq_right_count = layers.reduce_sum(compare_result_int)
259 260
        instance_error_count = layers.elementwise_sub(
            x=seq_num, y=seq_right_count)
261 262 263 264
        total_distance = layers.reduce_sum(distances)
        layers.sums(
            input=[self.total_distance, total_distance],
            out=self.total_distance)
265
        layers.sums(input=[self.seq_num, seq_num], out=self.seq_num)
266 267 268
        layers.sums(
            input=[self.instance_error, instance_error_count],
            out=self.instance_error)
269
        self.metrics.append(total_distance)
270
        self.metrics.append(instance_error_count)
271 272 273 274 275 276

    def eval(self, executor, eval_program=None):
        if eval_program is None:
            eval_program = Program()
        block = eval_program.current_block()
        with program_guard(main_program=eval_program):
277
            total_distance = _clone_var_(block, self.total_distance)
278
            seq_num = _clone_var_(block, self.seq_num)
279
            instance_error = _clone_var_(block, self.instance_error)
280
            seq_num = layers.cast(x=seq_num, dtype='float32')
281
            instance_error = layers.cast(x=instance_error, dtype='float32')
282
            avg_distance = layers.elementwise_div(x=total_distance, y=seq_num)
283 284 285 286
            avg_instance_error = layers.elementwise_div(
                x=instance_error, y=seq_num)
            result = executor.run(
                eval_program, fetch_list=[avg_distance, avg_instance_error])
287
        return np.array(result[0]), np.array(result[1])