From 26eb54eb37e0515f863243c133fe0a72bfd5c6af Mon Sep 17 00:00:00 2001 From: yangyaming Date: Sun, 18 Jun 2017 14:31:57 +0800 Subject: [PATCH] Follow comments. --- error_rate.py | 16 ++++++++------- tests/test_error_rate.py | 44 +++++++++++++++++++++++++++++++++++----- 2 files changed, 48 insertions(+), 12 deletions(-) diff --git a/error_rate.py b/error_rate.py index 2bb63711..08fe1255 100644 --- a/error_rate.py +++ b/error_rate.py @@ -1,9 +1,11 @@ # -*- coding: utf-8 -*- -""" - This module provides functions to calculate error rate in different level. - e.g. wer for word-level, cer for char-level. +"""This module provides functions to calculate error rate in different level. +e.g. wer for word-level, cer for char-level. """ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function import numpy as np @@ -42,8 +44,7 @@ def levenshtein_distance(ref, hyp): def wer(reference, hypothesis, ignore_case=False, delimiter=' '): - """ - Calculate word error rate (WER). WER compares reference text and + """Calculate word error rate (WER). WER compares reference text and hypothesis text in word-level. WER is defined as: .. math:: @@ -71,6 +72,7 @@ def wer(reference, hypothesis, ignore_case=False, delimiter=' '): :type delimiter: char :return: Word error rate. :rtype: float + :raises ValueError: If reference length is zero. """ if ignore_case == True: reference = reference.lower() @@ -88,8 +90,7 @@ def wer(reference, hypothesis, ignore_case=False, delimiter=' '): def cer(reference, hypothesis, ignore_case=False): - """ - Calculate charactor error rate (CER). CER compares reference text and + """Calculate charactor error rate (CER). CER compares reference text and hypothesis text in char-level. CER is defined as: .. math:: @@ -117,6 +118,7 @@ def cer(reference, hypothesis, ignore_case=False): :type ignore_case: bool :return: Character error rate. :rtype: float + :raises ValueError: If reference length is zero. """ if ignore_case == True: reference = reference.lower() diff --git a/tests/test_error_rate.py b/tests/test_error_rate.py index bb6dca30..57a6ccd6 100644 --- a/tests/test_error_rate.py +++ b/tests/test_error_rate.py @@ -1,29 +1,63 @@ # -*- coding: utf-8 -*- +"""Test error rate.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + import unittest -import sys -sys.path.append('..') import error_rate class TestParse(unittest.TestCase): - def test_wer(self): + def test_wer_1(self): ref = 'i UM the PHONE IS i LEFT THE portable PHONE UPSTAIRS last night' hyp = 'i GOT IT TO the FULLEST i LOVE TO portable FROM OF STORES last night' word_error_rate = error_rate.wer(ref, hyp) self.assertTrue(abs(word_error_rate - 0.769230769231) < 1e-6) - def test_cer_en(self): + def test_wer_2(self): + ref = 'i UM the PHONE IS i LEFT THE portable PHONE UPSTAIRS last night' + word_error_rate = error_rate.wer(ref, ref) + self.assertEqual(word_error_rate, 0.0) + + def test_wer_3(self): + ref = ' ' + hyp = 'Hypothesis sentence' + try: + word_error_rate = error_rate.wer(ref, hyp) + except Exception as e: + self.assertTrue(isinstance(e, ValueError)) + + def test_cer_1(self): ref = 'werewolf' hyp = 'weae wolf' char_error_rate = error_rate.cer(ref, hyp) self.assertTrue(abs(char_error_rate - 0.25) < 1e-6) - def test_cer_zh(self): + def test_cer_2(self): + ref = 'werewolf' + char_error_rate = error_rate.cer(ref, ref) + self.assertEqual(char_error_rate, 0.0) + + def test_cer_3(self): ref = u'我是中国人' hyp = u'我是 美洲人' char_error_rate = error_rate.cer(ref, hyp) self.assertTrue(abs(char_error_rate - 0.6) < 1e-6) + def test_cer_4(self): + ref = u'我是中国人' + char_error_rate = error_rate.cer(ref, ref) + self.assertFalse(char_error_rate, 0.0) + + def test_cer_5(self): + ref = '' + hyp = 'Hypothesis' + try: + char_error_rate = error_rate.cer(ref, hyp) + except Exception as e: + self.assertTrue(isinstance(e, ValueError)) + if __name__ == '__main__': unittest.main() -- GitLab