From 445310c03df3f27f7fae2384cd81dc3ce80d5881 Mon Sep 17 00:00:00 2001
From: Zeyu Chen <chenzeyu01@baidu.com>
Date: Thu, 4 Apr 2019 14:43:40 +0800
Subject: [PATCH] add evaluation code

---
 paddlehub/finetune/evaluate.py | 200 +++++++++++++++++++++++++++++++++
 1 file changed, 200 insertions(+)
 create mode 100644 paddlehub/finetune/evaluate.py

diff --git a/paddlehub/finetune/evaluate.py b/paddlehub/finetune/evaluate.py
new file mode 100644
index 00000000..31f16f30
--- /dev/null
+++ b/paddlehub/finetune/evaluate.py
@@ -0,0 +1,200 @@
+#   Copyright (c) 2019  PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+def evaluate_cls_task(task, data_reader, feed_list, phase="test", config=None):
+    logger.info("Evaluation on {} dataset start".format(phase))
+    inference_program = task.inference_program()
+    main_program = task.main_program()
+    loss = task.variable("loss")
+    accuracy = task.variable("accuracy")
+    batch_size = config.batch_size
+    place, dev_count = _get_running_device_info(config)
+    exe = fluid.Executor(place=place)
+    with fluid.program_guard(inference_program):
+        data_feeder = fluid.DataFeeder(feed_list=feed_list, place=place)
+        num_eval_examples = acc_sum = loss_sum = 0
+        test_reader = data_reader.data_generator(
+            batch_size=batch_size, phase=phase)
+        eval_time_begin = time.time()
+        eval_step = 0
+        for batch in test_reader():
+            num_batch_examples = len(batch)
+            eval_step += 1
+            loss_v, accuracy_v = exe.run(
+                feed=data_feeder.feed(batch),
+                fetch_list=[loss.name, accuracy.name])
+            num_eval_examples += num_batch_examples
+            acc_sum += accuracy_v * num_batch_examples
+            loss_sum += loss_v * num_batch_examples
+        eval_time_used = time.time() - eval_time_begin
+
+        avg_loss = loss_sum / num_eval_examples
+        avg_acc = acc_sum / num_eval_examples
+        eval_speed = eval_step / eval_time_used
+    logger.info(
+        "[%s dataset evaluation result] loss=%.5f acc=%.5f [step/sec: %.2f]" %
+        (phase, avg_loss, avg_acc, eval_speed))
+
+    return avg_loss, avg_acc, eval_speed
+
+
+def evaluate_seq_labeling_task(task,
+                               data_reader,
+                               feed_list,
+                               phase="test",
+                               config=None):
+    fetch_list = [
+        task.variable("labels").name,
+        task.variable("infers").name,
+        task.variable("seq_len").name,
+        task.variable("loss").name
+    ]
+    logger.info("Evaluation on {} dataset start".format(phase))
+    inference_program = task.inference_program()
+    batch_size = config.batch_size
+    place, dev_count = _get_running_device_info(config)
+    exe = fluid.Executor(place=place)
+    num_labels = len(data_reader.get_labels())
+    with fluid.program_guard(inference_program):
+        data_feeder = fluid.DataFeeder(feed_list=feed_list, place=place)
+        num_eval_examples = acc_sum = loss_sum = 0
+        test_reader = data_reader.data_generator(
+            batch_size=batch_size, phase=phase)
+        eval_time_begin = time.time()
+        eval_step = 0
+        total_label, total_infer, total_correct = 0.0, 0.0, 0.0
+        for batch in test_reader():
+            num_batch_examples = len(batch)
+            eval_step += 1
+            np_labels, np_infers, np_lens, _ = exe.run(
+                feed=data_feeder.feed(batch), fetch_list=fetch_list)
+            label_num, infer_num, correct_num = chunk_eval(
+                np_labels, np_infers, np_lens, num_labels, dev_count)
+
+            total_infer += infer_num
+            total_label += label_num
+            total_correct += correct_num
+
+        precision, recall, f1 = calculate_f1(total_label, total_infer,
+                                             total_correct)
+        eval_time_used = time.time() - eval_time_begin
+        eval_speed = eval_step / eval_time_used
+        logger.info(
+            "[%s evaluation] F1-Score=%f, precision=%f, recall=%f [step/sec: %.2f]"
+            % (phase, f1, precision, recall, eval_speed))
+
+
+# Sequence label evaluation functions
+def chunk_eval(np_labels, np_infers, np_lens, tag_num, dev_count=1):
+    def extract_bio_chunk(seq):
+        chunks = []
+        cur_chunk = None
+        null_index = tag_num - 1
+        for index in range(len(seq)):
+            tag = seq[index]
+            tag_type = tag // 2
+            tag_pos = tag % 2
+
+            if tag == null_index:
+                if cur_chunk is not None:
+                    chunks.append(cur_chunk)
+                    cur_chunk = None
+                continue
+
+            if tag_pos == 0:
+                if cur_chunk is not None:
+                    chunks.append(cur_chunk)
+                    cur_chunk = {}
+                cur_chunk = {"st": index, "en": index + 1, "type": tag_type}
+
+            else:
+                if cur_chunk is None:
+                    cur_chunk = {"st": index, "en": index + 1, "type": tag_type}
+                    continue
+
+                if cur_chunk["type"] == tag_type:
+                    cur_chunk["en"] = index + 1
+                else:
+                    chunks.append(cur_chunk)
+                    cur_chunk = {"st": index, "en": index + 1, "type": tag_type}
+
+        if cur_chunk is not None:
+            chunks.append(cur_chunk)
+        return chunks
+
+    null_index = tag_num - 1
+    num_label = 0
+    num_infer = 0
+    num_correct = 0
+    labels = np_labels.reshape([-1]).astype(np.int32).tolist()
+    infers = np_infers.reshape([-1]).astype(np.int32).tolist()
+    all_lens = np_lens.reshape([dev_count, -1]).astype(np.int32).tolist()
+
+    base_index = 0
+    for dev_index in range(dev_count):
+        lens = all_lens[dev_index]
+        max_len = 0
+        for l in lens:
+            max_len = max(max_len, l)
+
+        for i in range(len(lens)):
+            seq_st = base_index + i * max_len + 1
+            seq_en = seq_st + (lens[i] - 2)
+            infer_chunks = extract_bio_chunk(infers[seq_st:seq_en])
+            label_chunks = extract_bio_chunk(labels[seq_st:seq_en])
+            num_infer += len(infer_chunks)
+            num_label += len(label_chunks)
+
+            infer_index = 0
+            label_index = 0
+            while label_index < len(label_chunks) \
+                   and infer_index < len(infer_chunks):
+                if infer_chunks[infer_index]["st"] \
+                    < label_chunks[label_index]["st"]:
+                    infer_index += 1
+                elif infer_chunks[infer_index]["st"] \
+                    > label_chunks[label_index]["st"]:
+                    label_index += 1
+                else:
+                    if infer_chunks[infer_index]["en"] \
+                        == label_chunks[label_index]["en"] \
+                        and infer_chunks[infer_index]["type"] \
+                        == label_chunks[label_index]["type"]:
+                        num_correct += 1
+
+                    infer_index += 1
+                    label_index += 1
+
+        base_index += max_len * len(lens)
+
+    return num_label, num_infer, num_correct
+
+
+def calculate_f1(num_label, num_infer, num_correct):
+    if num_infer == 0:
+        precision = 0.0
+    else:
+        precision = num_correct * 1.0 / num_infer
+
+    if num_label == 0:
+        recall = 0.0
+    else:
+        recall = num_correct * 1.0 / num_label
+
+    if num_correct == 0:
+        f1 = 0.0
+    else:
+        f1 = 2 * precision * recall / (precision + recall)
+    return precision, recall, f1
-- 
GitLab