提交 ae92fa74 编写于 作者: C chenfeiyu

format code

上级 7779f33e
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse import argparse
from text_processing import normalization from text_processing import normalization
parser = argparse.ArgumentParser(description="Normalize text in Chinese with some rules.") parser = argparse.ArgumentParser(
description="Normalize text in Chinese with some rules.")
parser.add_argument("input", type=str, help="the input sentences") parser.add_argument("input", type=str, help="the input sentences")
parser.add_argument("output", type=str, help="path to save the output file.") parser.add_argument("output", type=str, help="path to save the output file.")
args = parser.parse_args() args = parser.parse_args()
......
export MAIN_ROOT=${PWD}/../../ export MAIN_ROOT=${PWD}/../../
export PATH=${MAIN_ROOT}:${MAIN_ROOT}/utils:${PATH} export PATH=${MAIN_ROOT}:${MAIN_ROOT}/utils:${PATH}
export LC_ALL=C export LC_ALL=C
......
#!/usr/bin/env bash #!/usr/bin/env bash
source path.sh source path.sh
stage=-1 stage=-1
......
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
`opencc <https://github.com/BYVoid/OpenCC>`_. `opencc <https://github.com/BYVoid/OpenCC>`_.
""" """
import opencc import opencc
_t2s_converter = opencc.OpenCC("t2s.json") _t2s_converter = opencc.OpenCC("t2s.json")
...@@ -11,4 +12,4 @@ def tranditional_to_simplified(text: str) -> str: ...@@ -11,4 +12,4 @@ def tranditional_to_simplified(text: str) -> str:
return _t2s_converter.convert(text) return _t2s_converter.convert(text)
def simplified_to_traditional(text: str) -> str: def simplified_to_traditional(text: str) -> str:
return _s2t_converter.convert(text) return _s2t_converter.convert(text)
\ No newline at end of file
import re import re
from .num import verbalize_cardinal, verbalize_digit, num2str, DIGITS from .num import verbalize_cardinal, verbalize_digit, num2str, DIGITS
def _time_num2str(num_string: str) -> str: def _time_num2str(num_string: str) -> str:
"""A special case for verbalizing number in time.""" """A special case for verbalizing number in time."""
result = num2str(num_string.lstrip('0')) result = num2str(num_string.lstrip('0'))
...@@ -60,4 +61,4 @@ def replace_date2(match: re.Match) -> str: ...@@ -60,4 +61,4 @@ def replace_date2(match: re.Match) -> str:
result += f"{verbalize_cardinal(month)}月" result += f"{verbalize_cardinal(month)}月"
if day: if day:
result += f"{verbalize_cardinal(day)}日" result += f"{verbalize_cardinal(day)}日"
return result return result
\ No newline at end of file
...@@ -2,6 +2,7 @@ import string ...@@ -2,6 +2,7 @@ import string
import re import re
from pypinyin.constants import SUPPORT_UCS4 from pypinyin.constants import SUPPORT_UCS4
# 全角半角转换 # 全角半角转换
# 英文字符全角 -> 半角映射表 (num: 52) # 英文字符全角 -> 半角映射表 (num: 52)
F2H_ASCII_LETTERS = { F2H_ASCII_LETTERS = {
......
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
Rules to verbalize numbers into Chinese characters. Rules to verbalize numbers into Chinese characters.
https://zh.wikipedia.org/wiki/中文数字#現代中文 https://zh.wikipedia.org/wiki/中文数字#現代中文
""" """
import re import re
from typing import List from typing import List
from collections import OrderedDict from collections import OrderedDict
......
import re import re
from .num import verbalize_digit from .num import verbalize_digit
# 规范化固话/手机号码 # 规范化固话/手机号码
# 手机 # 手机
# http://www.jihaoba.com/news/show/13680 # http://www.jihaoba.com/news/show/13680
...@@ -27,4 +28,4 @@ def phone2str(phone_string: str, mobile=True) -> str: ...@@ -27,4 +28,4 @@ def phone2str(phone_string: str, mobile=True) -> str:
def replace_phone(match: re.Match) -> str: def replace_phone(match: re.Match) -> str:
return phone2str(match.group(0)) return phone2str(match.group(0))
\ No newline at end of file
import re import re
from .num import num2str from .num import num2str
# 温度表达式,温度会影响负号的读法 # 温度表达式,温度会影响负号的读法
# -3°C 零下三度 # -3°C 零下三度
RE_TEMPERATURE = re.compile( RE_TEMPERATURE = re.compile(
...@@ -14,4 +15,4 @@ def replace_temperature(match: re.Match) -> str: ...@@ -14,4 +15,4 @@ def replace_temperature(match: re.Match) -> str:
temperature: str = num2str(temperature) temperature: str = num2str(temperature)
unit: str = "摄氏度" if unit == "摄氏度" else "度" unit: str = "摄氏度" if unit == "摄氏度" else "度"
result = f"{sign}{temperature}{unit}" result = f"{sign}{temperature}{unit}"
return result return result
\ No newline at end of file
import re import re
from typing import List from typing import List
SENTENCE_SPLITOR = re.compile(r'([。!?][”’]?)') SENTENCE_SPLITOR = re.compile(r'([。!?][”’]?)')
def split(text: str) -> List[str]: def split(text: str) -> List[str]:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册