From b7312e9f0b48fa5dab09cd8bede5eba302313664 Mon Sep 17 00:00:00 2001 From: HuangLiangJie Date: Wed, 9 Nov 2022 14:45:02 +0800 Subject: [PATCH] Revised TN qualifier for measure notation, test=tts (#2629) --- .../other/tn/data/textnorm_test_cases.txt | 4 ++- .../frontend/zh_normalization/quantifier.py | 26 +++++++++++++++++++ .../zh_normalization/text_normlization.py | 2 ++ 3 files changed, 31 insertions(+), 1 deletion(-) diff --git a/examples/other/tn/data/textnorm_test_cases.txt b/examples/other/tn/data/textnorm_test_cases.txt index e9a479b4..17e90d0b 100644 --- a/examples/other/tn/data/textnorm_test_cases.txt +++ b/examples/other/tn/data/textnorm_test_cases.txt @@ -122,4 +122,6 @@ iPad Pro的秒控键盘这次也推出白色版本。|iPad Pro的秒控键盘这 近期也一反常态地发表看空言论|近期也一反常态地发表看空言论 985|九八五 12~23|十二到二十三 -12-23|十二到二十三 \ No newline at end of file +12-23|十二到二十三 +25cm²|二十五平方厘米 +25m|米 \ No newline at end of file diff --git a/paddlespeech/t2s/frontend/zh_normalization/quantifier.py b/paddlespeech/t2s/frontend/zh_normalization/quantifier.py index 268d7229..598030e4 100644 --- a/paddlespeech/t2s/frontend/zh_normalization/quantifier.py +++ b/paddlespeech/t2s/frontend/zh_normalization/quantifier.py @@ -18,6 +18,25 @@ from .num import num2str # 温度表达式,温度会影响负号的读法 # -3°C 零下三度 RE_TEMPERATURE = re.compile(r'(-?)(\d+(\.\d+)?)(°C|℃|度|摄氏度)') +measure_dict = { + "cm2": "平方厘米", + "cm²": "平方厘米", + "cm3": "立方厘米", + "cm³": "立方厘米", + "cm": "厘米", + "db": "分贝", + "ds": "毫秒", + "kg": "千克", + "km": "千米", + "m2": "平方米", + "m²": "平方米", + "m³": "立方米", + "m3": "立方米", + "ml": "毫升", + "m": "米", + "mm": "毫米", + "s": "秒" +} def replace_temperature(match) -> str: @@ -35,3 +54,10 @@ def replace_temperature(match) -> str: unit: str = "摄氏度" if unit == "摄氏度" else "度" result = f"{sign}{temperature}{unit}" return result + + +def replace_measure(sentence) -> str: + for q_notation in measure_dict: + if q_notation in sentence: + sentence = sentence.replace(q_notation, measure_dict[q_notation]) + return sentence diff --git a/paddlespeech/t2s/frontend/zh_normalization/text_normlization.py b/paddlespeech/t2s/frontend/zh_normalization/text_normlization.py index bc663c70..8f8e3b07 100644 --- a/paddlespeech/t2s/frontend/zh_normalization/text_normlization.py +++ b/paddlespeech/t2s/frontend/zh_normalization/text_normlization.py @@ -46,6 +46,7 @@ from .phonecode import RE_TELEPHONE from .phonecode import replace_mobile from .phonecode import replace_phone from .quantifier import RE_TEMPERATURE +from .quantifier import replace_measure from .quantifier import replace_temperature @@ -91,6 +92,7 @@ class TextNormalizer(): sentence = RE_TIME.sub(replace_time, sentence) sentence = RE_TEMPERATURE.sub(replace_temperature, sentence) + sentence = replace_measure(sentence) sentence = RE_FRAC.sub(replace_frac, sentence) sentence = RE_PERCENTAGE.sub(replace_percentage, sentence) sentence = RE_MOBILE_PHONE.sub(replace_mobile, sentence) -- GitLab