“0d3490e45c56270e3813dba956093827c48e48b9”上不存在“mobile/src/operators/fusion_deconv_add_relu_op.cpp”
未验证 提交 1c3d2cb8 编写于 作者: D David An (An Hongliang) 提交者: GitHub

add double byte char for zh normalization (#2661)

上级 94a487bd
......@@ -19,7 +19,7 @@ from pypinyin.constants import SUPPORT_UCS4
# 全角半角转换
# 英文字符全角 -> 半角映射表 (num: 52)
F2H_ASCII_LETTERS = {
chr(ord(char) + 65248): char
ord(char) + 65248: ord(char)
for char in string.ascii_letters
}
......@@ -27,12 +27,12 @@ F2H_ASCII_LETTERS = {
H2F_ASCII_LETTERS = {value: key for key, value in F2H_ASCII_LETTERS.items()}
# 数字字符全角 -> 半角映射表 (num: 10)
F2H_DIGITS = {chr(ord(char) + 65248): char for char in string.digits}
F2H_DIGITS = {ord(char) + 65248: ord(char) for char in string.digits}
# 数字字符半角 -> 全角映射表
H2F_DIGITS = {value: key for key, value in F2H_DIGITS.items()}
# 标点符号全角 -> 半角映射表 (num: 32)
F2H_PUNCTUATIONS = {chr(ord(char) + 65248): char for char in string.punctuation}
F2H_PUNCTUATIONS = {ord(char) + 65248: ord(char) for char in string.punctuation}
# 标点符号半角 -> 全角映射表
H2F_PUNCTUATIONS = {value: key for key, value in F2H_PUNCTUATIONS.items()}
......
......@@ -74,6 +74,17 @@ class TextNormalizer():
def _post_replace(self, sentence: str) -> str:
sentence = sentence.replace('/', '每')
sentence = sentence.replace('~', '至')
sentence = sentence.replace('~', '至')
sentence = sentence.replace('①', '一')
sentence = sentence.replace('②', '二')
sentence = sentence.replace('③', '三')
sentence = sentence.replace('④', '四')
sentence = sentence.replace('⑤', '五')
sentence = sentence.replace('⑥', '六')
sentence = sentence.replace('⑦', '七')
sentence = sentence.replace('⑧', '八')
sentence = sentence.replace('⑨', '九')
sentence = sentence.replace('⑩', '十')
return sentence
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册