Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
ae92fa74
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 2 年 前同步成功
通知
210
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
ae92fa74
编写于
6月 04, 2021
作者:
C
chenfeiyu
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
format code
上级
7779f33e
变更
11
隐藏空白更改
内联
并排
Showing
11 changed file
with
28 addition
and
7 deletion
+28
-7
examples/text_normalization/local/test_normalization.py
examples/text_normalization/local/test_normalization.py
+16
-1
examples/text_normalization/path.sh
examples/text_normalization/path.sh
+0
-1
examples/text_normalization/run.sh
examples/text_normalization/run.sh
+0
-1
third_party/text_processing/__ini__.py
third_party/text_processing/__ini__.py
+1
-0
third_party/text_processing/normalization/char_convert.py
third_party/text_processing/normalization/char_convert.py
+2
-1
third_party/text_processing/normalization/chronology.py
third_party/text_processing/normalization/chronology.py
+2
-1
third_party/text_processing/normalization/constants.py
third_party/text_processing/normalization/constants.py
+1
-0
third_party/text_processing/normalization/num.py
third_party/text_processing/normalization/num.py
+1
-0
third_party/text_processing/normalization/phone.py
third_party/text_processing/normalization/phone.py
+2
-1
third_party/text_processing/normalization/quantifier.py
third_party/text_processing/normalization/quantifier.py
+2
-1
third_party/text_processing/normalization/sentence_split.py
third_party/text_processing/normalization/sentence_split.py
+1
-0
未找到文件。
examples/text_normalization/local/test_normalization.py
浏览文件 @
ae92fa74
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
argparse
from
text_processing
import
normalization
parser
=
argparse
.
ArgumentParser
(
description
=
"Normalize text in Chinese with some rules."
)
parser
=
argparse
.
ArgumentParser
(
description
=
"Normalize text in Chinese with some rules."
)
parser
.
add_argument
(
"input"
,
type
=
str
,
help
=
"the input sentences"
)
parser
.
add_argument
(
"output"
,
type
=
str
,
help
=
"path to save the output file."
)
args
=
parser
.
parse_args
()
...
...
examples/text_normalization/path.sh
浏览文件 @
ae92fa74
export
MAIN_ROOT
=
${
PWD
}
/../../
export
PATH
=
${
MAIN_ROOT
}
:
${
MAIN_ROOT
}
/utils:
${
PATH
}
export
LC_ALL
=
C
...
...
examples/text_normalization/run.sh
浏览文件 @
ae92fa74
#!/usr/bin/env bash
source
path.sh
stage
=
-1
...
...
third_party/text_processing/__ini__.py
0 → 100644
浏览文件 @
ae92fa74
third_party/text_processing/normalization/char_convert.py
浏览文件 @
ae92fa74
...
...
@@ -2,6 +2,7 @@
`opencc <https://github.com/BYVoid/OpenCC>`_.
"""
import
opencc
_t2s_converter
=
opencc
.
OpenCC
(
"t2s.json"
)
...
...
@@ -11,4 +12,4 @@ def tranditional_to_simplified(text: str) -> str:
return
_t2s_converter
.
convert
(
text
)
def
simplified_to_traditional
(
text
:
str
)
->
str
:
return
_s2t_converter
.
convert
(
text
)
\ No newline at end of file
return
_s2t_converter
.
convert
(
text
)
third_party/text_processing/normalization/chronology.py
浏览文件 @
ae92fa74
import
re
from
.num
import
verbalize_cardinal
,
verbalize_digit
,
num2str
,
DIGITS
def
_time_num2str
(
num_string
:
str
)
->
str
:
"""A special case for verbalizing number in time."""
result
=
num2str
(
num_string
.
lstrip
(
'0'
))
...
...
@@ -60,4 +61,4 @@ def replace_date2(match: re.Match) -> str:
result
+=
f
"
{
verbalize_cardinal
(
month
)
}
月"
if
day
:
result
+=
f
"
{
verbalize_cardinal
(
day
)
}
日"
return
result
\ No newline at end of file
return
result
third_party/text_processing/normalization/constants.py
浏览文件 @
ae92fa74
...
...
@@ -2,6 +2,7 @@ import string
import
re
from
pypinyin.constants
import
SUPPORT_UCS4
# 全角半角转换
# 英文字符全角 -> 半角映射表 (num: 52)
F2H_ASCII_LETTERS
=
{
...
...
third_party/text_processing/normalization/num.py
浏览文件 @
ae92fa74
...
...
@@ -2,6 +2,7 @@
Rules to verbalize numbers into Chinese characters.
https://zh.wikipedia.org/wiki/中文数字#現代中文
"""
import
re
from
typing
import
List
from
collections
import
OrderedDict
...
...
third_party/text_processing/normalization/phone.py
浏览文件 @
ae92fa74
import
re
from
.num
import
verbalize_digit
# 规范化固话/手机号码
# 手机
# http://www.jihaoba.com/news/show/13680
...
...
@@ -27,4 +28,4 @@ def phone2str(phone_string: str, mobile=True) -> str:
def
replace_phone
(
match
:
re
.
Match
)
->
str
:
return
phone2str
(
match
.
group
(
0
))
\ No newline at end of file
return
phone2str
(
match
.
group
(
0
))
third_party/text_processing/normalization/quantifier.py
浏览文件 @
ae92fa74
import
re
from
.num
import
num2str
# 温度表达式,温度会影响负号的读法
# -3°C 零下三度
RE_TEMPERATURE
=
re
.
compile
(
...
...
@@ -14,4 +15,4 @@ def replace_temperature(match: re.Match) -> str:
temperature
:
str
=
num2str
(
temperature
)
unit
:
str
=
"摄氏度"
if
unit
==
"摄氏度"
else
"度"
result
=
f
"
{
sign
}{
temperature
}{
unit
}
"
return
result
\ No newline at end of file
return
result
third_party/text_processing/normalization/sentence_split.py
浏览文件 @
ae92fa74
import
re
from
typing
import
List
SENTENCE_SPLITOR
=
re
.
compile
(
r
'([。!?][”’]?)'
)
def
split
(
text
:
str
)
->
List
[
str
]:
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录