Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
13a7fa98
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 2 年 前同步成功
通知
210
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
13a7fa98
编写于
10月 14, 2022
作者:
D
david.95
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
enable chinese words' pinyin specified in text of ssml formats, test=tts
上级
b76968e6
变更
3
显示空白变更内容
内联
并排
Showing
3 changed file
with
323 addition
and
2 deletion
+323
-2
paddlespeech/t2s/exps/syn_utils.py
paddlespeech/t2s/exps/syn_utils.py
+4
-2
paddlespeech/t2s/frontend/zh_frontend.py
paddlespeech/t2s/frontend/zh_frontend.py
+156
-0
paddlespeech/t2s/ssml/xml_processor.py
paddlespeech/t2s/ssml/xml_processor.py
+163
-0
未找到文件。
paddlespeech/t2s/exps/syn_utils.py
浏览文件 @
13a7fa98
...
...
@@ -13,6 +13,7 @@
# limitations under the License.
import
math
import
os
import
re
from
pathlib
import
Path
from
typing
import
Any
from
typing
import
Dict
...
...
@@ -33,6 +34,7 @@ from paddlespeech.t2s.frontend.mix_frontend import MixFrontend
from
paddlespeech.t2s.frontend.zh_frontend
import
Frontend
from
paddlespeech.t2s.modules.normalizer
import
ZScore
from
paddlespeech.utils.dynamic_import
import
dynamic_import
# remove [W:onnxruntime: xxx] from ort
ort
.
set_default_logger_severity
(
3
)
...
...
@@ -103,7 +105,7 @@ def get_sentences(text_file: Optional[os.PathLike], lang: str='zh'):
sentences
=
[]
with
open
(
text_file
,
'rt'
)
as
f
:
for
line
in
f
:
items
=
line
.
strip
().
split
(
)
items
=
re
.
split
(
r
"\s+"
,
line
.
strip
(),
1
)
utt_id
=
items
[
0
]
if
lang
==
'zh'
:
sentence
=
""
.
join
(
items
[
1
:])
...
...
@@ -180,7 +182,7 @@ def run_frontend(frontend: object,
to_tensor
:
bool
=
True
):
outs
=
dict
()
if
lang
==
'zh'
:
input_ids
=
frontend
.
get_input_ids
(
input_ids
=
frontend
.
get_input_ids
_ssml
(
text
,
merge_sentences
=
merge_sentences
,
get_tone_ids
=
get_tone_ids
,
...
...
paddlespeech/t2s/frontend/zh_frontend.py
浏览文件 @
13a7fa98
...
...
@@ -13,6 +13,7 @@
# limitations under the License.
import
os
import
re
from
operator
import
itemgetter
from
typing
import
Dict
from
typing
import
List
...
...
@@ -31,6 +32,7 @@ from paddlespeech.t2s.frontend.g2pw import G2PWOnnxConverter
from
paddlespeech.t2s.frontend.generate_lexicon
import
generate_lexicon
from
paddlespeech.t2s.frontend.tone_sandhi
import
ToneSandhi
from
paddlespeech.t2s.frontend.zh_normalization.text_normlization
import
TextNormalizer
from
paddlespeech.t2s.ssml.xml_processor
import
MixTextProcessor
INITIALS
=
[
'b'
,
'p'
,
'm'
,
'f'
,
'd'
,
't'
,
'n'
,
'l'
,
'g'
,
'k'
,
'h'
,
'zh'
,
'ch'
,
'sh'
,
...
...
@@ -81,6 +83,7 @@ class Frontend():
g2p_model
=
"g2pW"
,
phone_vocab_path
=
None
,
tone_vocab_path
=
None
):
self
.
mix_ssml_processor
=
MixTextProcessor
()
self
.
tone_modifier
=
ToneSandhi
()
self
.
text_normalizer
=
TextNormalizer
()
self
.
punc
=
":,;。?!“”‘’':,;.?!"
...
...
@@ -143,6 +146,7 @@ class Frontend():
tone_id
=
[
line
.
strip
().
split
()
for
line
in
f
.
readlines
()]
for
tone
,
id
in
tone_id
:
self
.
vocab_tones
[
tone
]
=
int
(
id
)
self
.
mix_ssml_processor
.
__repr__
()
def
_init_pypinyin
(
self
):
large_pinyin
.
load
()
...
...
@@ -281,6 +285,65 @@ class Frontend():
phones_list
.
append
(
merge_list
)
return
phones_list
def
_split_word_to_char
(
self
,
words
):
res
=
[]
for
x
in
words
:
res
.
append
(
x
)
return
res
# if using ssml, have pingyin specified, assign pinyin to words
def
_g2p_assign
(
self
,
words
:
List
[
str
],
pinyin_spec
:
List
[
str
],
merge_sentences
:
bool
=
True
)
->
List
[
List
[
str
]]:
phones_list
=
[]
initials
=
[]
finals
=
[]
words
=
self
.
_split_word_to_char
(
words
[
0
])
for
pinyin
,
char
in
zip
(
pinyin_spec
,
words
):
sub_initials
=
[]
sub_finals
=
[]
pinyin
=
pinyin
.
replace
(
"u:"
,
"v"
)
#self.pinyin2phone: is a dict with all pinyin mapped with sheng_mu yun_mu
if
pinyin
in
self
.
pinyin2phone
:
initial_final_list
=
self
.
pinyin2phone
[
pinyin
].
split
(
" "
)
if
len
(
initial_final_list
)
==
2
:
sub_initials
.
append
(
initial_final_list
[
0
])
sub_finals
.
append
(
initial_final_list
[
1
])
elif
len
(
initial_final_list
)
==
1
:
sub_initials
.
append
(
''
)
sub_finals
.
append
(
initial_final_list
[
1
])
else
:
# If it's not pinyin (possibly punctuation) or no conversion is required
sub_initials
.
append
(
pinyin
)
sub_finals
.
append
(
pinyin
)
initials
.
append
(
sub_initials
)
finals
.
append
(
sub_finals
)
initials
=
sum
(
initials
,
[])
finals
=
sum
(
finals
,
[])
phones
=
[]
for
c
,
v
in
zip
(
initials
,
finals
):
# NOTE: post process for pypinyin outputs
# we discriminate i, ii and iii
if
c
and
c
not
in
self
.
punc
:
phones
.
append
(
c
)
if
c
and
c
in
self
.
punc
:
phones
.
append
(
'sp'
)
if
v
and
v
not
in
self
.
punc
:
phones
.
append
(
v
)
phones_list
.
append
(
phones
)
if
merge_sentences
:
merge_list
=
sum
(
phones_list
,
[])
# rm the last 'sp' to avoid the noise at the end
# cause in the training data, no 'sp' in the end
if
merge_list
[
-
1
]
==
'sp'
:
merge_list
=
merge_list
[:
-
1
]
phones_list
=
[]
phones_list
.
append
(
merge_list
)
return
phones_list
def
_merge_erhua
(
self
,
initials
:
List
[
str
],
finals
:
List
[
str
],
...
...
@@ -396,6 +459,52 @@ class Frontend():
print
(
"----------------------------"
)
return
phonemes
#@an added for ssml pinyin
def
get_phonemes_ssml
(
self
,
ssml_inputs
:
list
,
merge_sentences
:
bool
=
True
,
with_erhua
:
bool
=
True
,
robot
:
bool
=
False
,
print_info
:
bool
=
False
)
->
List
[
List
[
str
]]:
all_phonemes
=
[]
for
word_pinyin_item
in
ssml_inputs
:
phonemes
=
[]
sentence
,
pinyin_spec
=
itemgetter
(
0
,
1
)(
word_pinyin_item
)
sentences
=
self
.
text_normalizer
.
normalize
(
sentence
)
if
len
(
pinyin_spec
)
==
0
:
phonemes
=
self
.
_g2p
(
sentences
,
merge_sentences
=
merge_sentences
,
with_erhua
=
with_erhua
)
else
:
# phonemes should be pinyin_spec
phonemes
=
self
.
_g2p_assign
(
sentences
,
pinyin_spec
,
merge_sentences
=
merge_sentences
)
all_phonemes
=
all_phonemes
+
phonemes
if
robot
:
new_phonemes
=
[]
for
sentence
in
all_phonemes
:
new_sentence
=
[]
for
item
in
sentence
:
# `er` only have tone `2`
if
item
[
-
1
]
in
"12345"
and
item
!=
"er2"
:
item
=
item
[:
-
1
]
+
"1"
new_sentence
.
append
(
item
)
new_phonemes
.
append
(
new_sentence
)
all_phonemes
=
new_phonemes
if
print_info
:
print
(
"----------------------------"
)
print
(
"text norm results:"
)
print
(
sentences
)
print
(
"----------------------------"
)
print
(
"g2p results:"
)
print
(
all_phonemes
[
0
])
print
(
"----------------------------"
)
return
[
sum
(
all_phonemes
,
[])]
def
get_input_ids
(
self
,
sentence
:
str
,
merge_sentences
:
bool
=
True
,
...
...
@@ -405,6 +514,7 @@ class Frontend():
add_blank
:
bool
=
False
,
blank_token
:
str
=
"<pad>"
,
to_tensor
:
bool
=
True
)
->
Dict
[
str
,
List
[
paddle
.
Tensor
]]:
phonemes
=
self
.
get_phonemes
(
sentence
,
merge_sentences
=
merge_sentences
,
...
...
@@ -437,3 +547,49 @@ class Frontend():
if
temp_phone_ids
:
result
[
"phone_ids"
]
=
temp_phone_ids
return
result
# @an added for ssml
def
get_input_ids_ssml
(
self
,
sentence
:
str
,
merge_sentences
:
bool
=
True
,
get_tone_ids
:
bool
=
False
,
robot
:
bool
=
False
,
print_info
:
bool
=
False
,
add_blank
:
bool
=
False
,
blank_token
:
str
=
"<pad>"
,
to_tensor
:
bool
=
True
)
->
Dict
[
str
,
List
[
paddle
.
Tensor
]]:
l_inputs
=
MixTextProcessor
.
get_pinyin_split
(
sentence
)
phonemes
=
self
.
get_phonemes_ssml
(
l_inputs
,
merge_sentences
=
merge_sentences
,
print_info
=
print_info
,
robot
=
robot
)
result
=
{}
phones
=
[]
tones
=
[]
temp_phone_ids
=
[]
temp_tone_ids
=
[]
for
part_phonemes
in
phonemes
:
phones
,
tones
=
self
.
_get_phone_tone
(
part_phonemes
,
get_tone_ids
=
get_tone_ids
)
if
add_blank
:
phones
=
insert_after_character
(
phones
,
blank_token
)
if
tones
:
tone_ids
=
self
.
_t2id
(
tones
)
if
to_tensor
:
tone_ids
=
paddle
.
to_tensor
(
tone_ids
)
temp_tone_ids
.
append
(
tone_ids
)
if
phones
:
phone_ids
=
self
.
_p2id
(
phones
)
# if use paddle.to_tensor() in onnxruntime, the first time will be too low
if
to_tensor
:
phone_ids
=
paddle
.
to_tensor
(
phone_ids
)
temp_phone_ids
.
append
(
phone_ids
)
if
temp_tone_ids
:
result
[
"tone_ids"
]
=
temp_tone_ids
if
temp_phone_ids
:
result
[
"phone_ids"
]
=
temp_phone_ids
return
result
paddlespeech/t2s/ssml/xml_processor.py
0 → 100644
浏览文件 @
13a7fa98
# -*- coding: utf-8 -*-
import
re
import
xml.dom.minidom
import
xml.parsers.expat
from
xml.dom.minidom
import
Node
from
xml.dom.minidom
import
parseString
'''
Note: xml 有5种特殊字符, &<>"'
其一,采用<![CDATA[ ]]>特殊标签,将包含特殊字符的字符串封装起来。
例如:
<TitleName><![CDATA["姓名"]]></TitleName>
其二,使用XML转义序列表示这些特殊的字符,这5个特殊字符所对应XML转义序列为:
& &
< <
> >
" "
' '
例如:
<TitleName>"姓名"</TitleName>
'''
class
MixTextProcessor
():
def
__repr__
(
self
):
print
(
"@an MixTextProcessor class"
)
def
get_xml_content
(
self
,
mixstr
):
'''返回字符串的 xml 内容'''
xmlptn
=
re
.
compile
(
r
"<speak>.*?</speak>"
,
re
.
M
|
re
.
S
)
ctn
=
re
.
search
(
xmlptn
,
mixstr
)
if
ctn
:
return
ctn
.
group
(
0
)
else
:
return
None
def
get_content_split
(
self
,
mixstr
):
''' 文本分解,顺序加了列表中,按非xml 和 xml 分开,对应的字符串,带标点符号
不能去除空格,因为xml 中tag 属性带空格
'''
ctlist
=
[]
# print("Testing:",mixstr[:20])
patn
=
re
.
compile
(
r
'(.*\s*?)(<speak>.*?</speak>)(.*\s*)$'
,
re
.
M
|
re
.
S
)
mat
=
re
.
match
(
patn
,
mixstr
)
if
mat
:
pre_xml
=
mat
.
group
(
1
)
in_xml
=
mat
.
group
(
2
)
after_xml
=
mat
.
group
(
3
)
ctlist
.
append
(
pre_xml
)
ctlist
.
append
(
in_xml
)
ctlist
.
append
(
after_xml
)
return
ctlist
else
:
ctlist
.
append
(
mixstr
)
return
ctlist
@
classmethod
def
get_pinyin_split
(
self
,
mixstr
):
ctlist
=
[]
patn
=
re
.
compile
(
r
'(.*\s*?)(<speak>.*?</speak>)(.*\s*)$'
,
re
.
M
|
re
.
S
)
mat
=
re
.
match
(
patn
,
mixstr
)
if
mat
:
pre_xml
=
mat
.
group
(
1
)
in_xml
=
mat
.
group
(
2
)
after_xml
=
mat
.
group
(
3
)
ctlist
.
append
([
pre_xml
,
[]])
dom
=
DomXml
(
in_xml
)
pinyinlist
=
dom
.
get_pinyins_for_xml
()
ctlist
=
ctlist
+
pinyinlist
ctlist
.
append
([
after_xml
,
[]])
else
:
ctlist
.
append
([
mixstr
,
[]])
return
ctlist
class
DomXml
():
def
__init__
(
self
,
xmlstr
):
print
(
"Parse xml str:"
,
xmlstr
)
self
.
tdom
=
parseString
(
xmlstr
)
#Document
# print("tdom:",type(self.tdom))
self
.
root
=
self
.
tdom
.
documentElement
#Element
# print("root:",type(self.root))
self
.
rnode
=
self
.
tdom
.
childNodes
#NodeList
# print("rnode:",type(self.rnode))
pass
def
get_text
(
self
):
'''返回xml 内容的所有文本内容的 列表'''
res
=
[]
for
x1
in
self
.
rnode
:
if
x1
.
nodeType
==
Node
.
TEXT_NODE
:
res
.
append
(
x1
.
value
)
else
:
for
x2
in
x1
.
childNodes
:
if
isinstance
(
x2
,
xml
.
dom
.
minidom
.
Text
):
res
.
append
(
x2
.
data
)
else
:
for
x3
in
x2
.
childNodes
:
if
isinstance
(
x3
,
xml
.
dom
.
minidom
.
Text
):
res
.
append
(
x3
.
data
)
else
:
print
(
"len(nodes of x3):"
,
len
(
x3
.
childNodes
))
return
res
def
get_xmlchild_list
(
self
):
'''返回xml 内容的列表, 包括所有文本内容(不带tag)'''
res
=
[]
for
x1
in
self
.
rnode
:
if
x1
.
nodeType
==
Node
.
TEXT_NODE
:
res
.
append
(
x1
.
value
)
else
:
for
x2
in
x1
.
childNodes
:
if
isinstance
(
x2
,
xml
.
dom
.
minidom
.
Text
):
res
.
append
(
x2
.
data
)
else
:
for
x3
in
x2
.
childNodes
:
if
isinstance
(
x3
,
xml
.
dom
.
minidom
.
Text
):
res
.
append
(
x3
.
data
)
else
:
print
(
"len(nodes of x3):"
,
len
(
x3
.
childNodes
))
print
(
res
)
return
res
def
get_pinyins_for_xml
(
self
):
'''返回xml 内容,如果字符串 和 拼音的 list , 如 ['''
res
=
[]
for
x1
in
self
.
rnode
:
if
x1
.
nodeType
==
Node
.
TEXT_NODE
:
t
=
re
.
sub
(
r
"\s+"
,
""
,
x1
.
value
)
res
.
append
([
t
,
[]])
else
:
for
x2
in
x1
.
childNodes
:
if
isinstance
(
x2
,
xml
.
dom
.
minidom
.
Text
):
t
=
re
.
sub
(
r
"\s+"
,
""
,
x2
.
data
)
res
.
append
([
t
,
[]])
else
:
# print("x2",x2,x2.tagName)
if
x2
.
hasAttribute
(
'pinyin'
):
pinyin_value
=
x2
.
getAttribute
(
"pinyin"
)
pinyins
=
pinyin_value
.
split
(
" "
)
for
x3
in
x2
.
childNodes
:
# print('x3',x3)
if
isinstance
(
x3
,
xml
.
dom
.
minidom
.
Text
):
t
=
re
.
sub
(
r
"\s+"
,
""
,
x3
.
data
)
res
.
append
([
t
,
pinyins
])
else
:
print
(
"len(nodes of x3):"
,
len
(
x3
.
childNodes
))
return
res
def
get_all_tags
(
self
,
tag_name
):
'''获取所有的tag 及属性值'''
alltags
=
self
.
root
.
getElementsByTagName
(
tag_name
)
for
x
in
alltags
:
if
x
.
hasAttribute
(
'pinyin'
):
# pinyin
print
(
x
.
tagName
,
'pinyin'
,
x
.
getAttribute
(
'pinyin'
),
x
.
firstChild
.
data
)
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录