Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
9727e67a
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 2 年 前同步成功
通知
210
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
9727e67a
编写于
6月 07, 2023
作者:
H
Hui Zhang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add ssml unit test
上级
4d867700
变更
6
显示空白变更内容
内联
并排
Showing
6 changed file
with
101 addition
and
15 deletion
+101
-15
.pre-commit-config.yaml
.pre-commit-config.yaml
+6
-6
paddlespeech/t2s/exps/synthesize_e2e.py
paddlespeech/t2s/exps/synthesize_e2e.py
+1
-1
paddlespeech/t2s/frontend/canton_frontend.py
paddlespeech/t2s/frontend/canton_frontend.py
+13
-3
paddlespeech/t2s/frontend/ssml/xml_processor.py
paddlespeech/t2s/frontend/ssml/xml_processor.py
+9
-1
paddlespeech/t2s/frontend/zh_frontend.py
paddlespeech/t2s/frontend/zh_frontend.py
+11
-4
tests/unit/tts/test_ssml.py
tests/unit/tts/test_ssml.py
+61
-0
未找到文件。
.pre-commit-config.yaml
浏览文件 @
9727e67a
...
@@ -26,12 +26,12 @@ repos:
...
@@ -26,12 +26,12 @@ repos:
-
--no-sort-keys
-
--no-sort-keys
-
--autofix
-
--autofix
-
id
:
check-merge-conflict
-
id
:
check-merge-conflict
-
id
:
flake8
#
- id: flake8
aergs
:
#
aergs:
-
--ignore=E501,E228,E226,E261,E266,E128,E402,W503
#
- --ignore=E501,E228,E226,E261,E266,E128,E402,W503
-
--builtins=G,request
#
- --builtins=G,request
-
--jobs=1
#
- --jobs=1
exclude
:
(?=runtime/engine/kaldi|audio/paddleaudio/src|third_party).*(\.cpp|\.cc|\.h\.hpp|\.py)$
#
exclude: (?=runtime/engine/kaldi|audio/paddleaudio/src|third_party).*(\.cpp|\.cc|\.h\.hpp|\.py)$
-
repo
:
https://github.com/Lucas-C/pre-commit-hooks
-
repo
:
https://github.com/Lucas-C/pre-commit-hooks
rev
:
v1.0.1
rev
:
v1.0.1
...
...
paddlespeech/t2s/exps/synthesize_e2e.py
浏览文件 @
9727e67a
...
@@ -115,9 +115,9 @@ def evaluate(args):
...
@@ -115,9 +115,9 @@ def evaluate(args):
sentences
=
get_sentences_svs
(
text_file
=
args
.
text
)
sentences
=
get_sentences_svs
(
text_file
=
args
.
text
)
else
:
else
:
sentences
=
get_sentences
(
text_file
=
args
.
text
,
lang
=
args
.
lang
)
sentences
=
get_sentences
(
text_file
=
args
.
text
,
lang
=
args
.
lang
)
pprint
(
f
"inputs:
{
sentences
}
"
)
for
utt_id
,
sentence
in
sentences
:
for
utt_id
,
sentence
in
sentences
:
print
(
f
"
{
utt_id
}
{
sentence
}
..."
)
with
timer
()
as
t
:
with
timer
()
as
t
:
if
am_name
==
"diffsinger"
:
if
am_name
==
"diffsinger"
:
text
=
""
text
=
""
...
...
paddlespeech/t2s/frontend/canton_frontend.py
浏览文件 @
9727e67a
...
@@ -29,7 +29,8 @@ INITIALS = [
...
@@ -29,7 +29,8 @@ INITIALS = [
INITIALS
+=
[
'sp'
,
'spl'
,
'spn'
,
'sil'
]
INITIALS
+=
[
'sp'
,
'spl'
,
'spn'
,
'sil'
]
def
get_lines
(
cantons
:
List
[
str
]):
def
jyuping_to_phonemes
(
cantons
:
List
[
str
]):
# jyuping to inital and final
phones
=
[]
phones
=
[]
for
canton
in
cantons
:
for
canton
in
cantons
:
for
consonant
in
INITIALS
:
for
consonant
in
INITIALS
:
...
@@ -61,8 +62,11 @@ class CantonFrontend():
...
@@ -61,8 +62,11 @@ class CantonFrontend():
merge_sentences
:
bool
=
True
)
->
List
[
List
[
str
]]:
merge_sentences
:
bool
=
True
)
->
List
[
List
[
str
]]:
phones_list
=
[]
phones_list
=
[]
for
sentence
in
sentences
:
for
sentence
in
sentences
:
# jyuping
# 'gam3 ngaam1 lou5 sai3 jiu1 kau4 keoi5 dang2 zan6 jiu3 hoi1 wui2, zing6 dai1 ge2 je5 ngo5 wui5 gaau2 dim6 ga3 laa3.'
phones_str
=
ToJyutping
.
get_jyutping_text
(
sentence
)
phones_str
=
ToJyutping
.
get_jyutping_text
(
sentence
)
phones_split
=
get_lines
(
phones_str
.
split
(
' '
))
# phonemes
phones_split
=
jyuping_to_phonemes
(
phones_str
.
split
(
' '
))
phones_list
.
append
(
phones_split
)
phones_list
.
append
(
phones_split
)
return
phones_list
return
phones_list
...
@@ -78,8 +82,11 @@ class CantonFrontend():
...
@@ -78,8 +82,11 @@ class CantonFrontend():
sentence
:
str
,
sentence
:
str
,
merge_sentences
:
bool
=
True
,
merge_sentences
:
bool
=
True
,
print_info
:
bool
=
False
)
->
List
[
List
[
str
]]:
print_info
:
bool
=
False
)
->
List
[
List
[
str
]]:
# TN & Text Segmentation
sentences
=
self
.
text_normalizer
.
normalize
(
sentence
)
sentences
=
self
.
text_normalizer
.
normalize
(
sentence
)
# G2P
phonemes
=
self
.
_g2p
(
sentences
,
merge_sentences
=
merge_sentences
)
phonemes
=
self
.
_g2p
(
sentences
,
merge_sentences
=
merge_sentences
)
if
print_info
:
if
print_info
:
print
(
"----------------------------"
)
print
(
"----------------------------"
)
print
(
"text norm results:"
)
print
(
"text norm results:"
)
...
@@ -88,6 +95,7 @@ class CantonFrontend():
...
@@ -88,6 +95,7 @@ class CantonFrontend():
print
(
"g2p results:"
)
print
(
"g2p results:"
)
print
(
phonemes
)
print
(
phonemes
)
print
(
"----------------------------"
)
print
(
"----------------------------"
)
return
phonemes
return
phonemes
def
get_input_ids
(
self
,
def
get_input_ids
(
self
,
...
@@ -98,9 +106,9 @@ class CantonFrontend():
...
@@ -98,9 +106,9 @@ class CantonFrontend():
phonemes
=
self
.
get_phonemes
(
phonemes
=
self
.
get_phonemes
(
sentence
,
merge_sentences
=
merge_sentences
,
print_info
=
print_info
)
sentence
,
merge_sentences
=
merge_sentences
,
print_info
=
print_info
)
result
=
{}
result
=
{}
temp_phone_ids
=
[]
temp_phone_ids
=
[]
for
phones
in
phonemes
:
for
phones
in
phonemes
:
if
phones
:
if
phones
:
phone_ids
=
self
.
_p2id
(
phones
)
phone_ids
=
self
.
_p2id
(
phones
)
...
@@ -108,6 +116,8 @@ class CantonFrontend():
...
@@ -108,6 +116,8 @@ class CantonFrontend():
if
to_tensor
:
if
to_tensor
:
phone_ids
=
paddle
.
to_tensor
(
phone_ids
)
phone_ids
=
paddle
.
to_tensor
(
phone_ids
)
temp_phone_ids
.
append
(
phone_ids
)
temp_phone_ids
.
append
(
phone_ids
)
if
temp_phone_ids
:
if
temp_phone_ids
:
result
[
"phone_ids"
]
=
temp_phone_ids
result
[
"phone_ids"
]
=
temp_phone_ids
return
result
return
result
paddlespeech/t2s/frontend/ssml/xml_processor.py
浏览文件 @
9727e67a
...
@@ -17,7 +17,6 @@ Note: xml 有5种特殊字符, &<>"'
...
@@ -17,7 +17,6 @@ Note: xml 有5种特殊字符, &<>"'
' '
' '
例如:
例如:
<TitleName>"姓名"</TitleName>
<TitleName>"姓名"</TitleName>
'''
'''
...
@@ -61,14 +60,23 @@ class MixTextProcessor():
...
@@ -61,14 +60,23 @@ class MixTextProcessor():
patn
=
re
.
compile
(
r
'(.*\s*?)(<speak>.*?</speak>)(.*\s*)$'
,
re
.
M
|
re
.
S
)
patn
=
re
.
compile
(
r
'(.*\s*?)(<speak>.*?</speak>)(.*\s*)$'
,
re
.
M
|
re
.
S
)
mat
=
re
.
match
(
patn
,
mixstr
)
mat
=
re
.
match
(
patn
,
mixstr
)
if
mat
:
if
mat
:
# pre <speak>
pre_xml
=
mat
.
group
(
1
)
pre_xml
=
mat
.
group
(
1
)
# between <speak> ... </speak>
in_xml
=
mat
.
group
(
2
)
in_xml
=
mat
.
group
(
2
)
# post </speak>
after_xml
=
mat
.
group
(
3
)
after_xml
=
mat
.
group
(
3
)
# pre with none syllable
ctlist
.
append
([
pre_xml
,
[]])
ctlist
.
append
([
pre_xml
,
[]])
# between with syllable
# [(sub sentence, [syllables]), ...]
dom
=
DomXml
(
in_xml
)
dom
=
DomXml
(
in_xml
)
pinyinlist
=
dom
.
get_pinyins_for_xml
()
pinyinlist
=
dom
.
get_pinyins_for_xml
()
ctlist
=
ctlist
+
pinyinlist
ctlist
=
ctlist
+
pinyinlist
# post with none syllable
ctlist
.
append
([
after_xml
,
[]])
ctlist
.
append
([
after_xml
,
[]])
else
:
else
:
ctlist
.
append
([
mixstr
,
[]])
ctlist
.
append
([
mixstr
,
[]])
...
...
paddlespeech/t2s/frontend/zh_frontend.py
浏览文件 @
9727e67a
...
@@ -14,6 +14,7 @@
...
@@ -14,6 +14,7 @@
import
os
import
os
import
re
import
re
from
operator
import
itemgetter
from
operator
import
itemgetter
from
pprint
import
pprint
from
typing
import
Dict
from
typing
import
Dict
from
typing
import
List
from
typing
import
List
...
@@ -41,6 +42,9 @@ INITIALS = [
...
@@ -41,6 +42,9 @@ INITIALS = [
]
]
INITIALS
+=
[
'y'
,
'w'
,
'sp'
,
'spl'
,
'spn'
,
'sil'
]
INITIALS
+=
[
'y'
,
'w'
,
'sp'
,
'spl'
,
'spn'
,
'sil'
]
# 0 for None, 5 for neutral
TONES
=
[
"0"
,
"1"
,
"2"
,
"3"
,
"4"
,
"5"
]
def
intersperse
(
lst
,
item
):
def
intersperse
(
lst
,
item
):
result
=
[
item
]
*
(
len
(
lst
)
*
2
+
1
)
result
=
[
item
]
*
(
len
(
lst
)
*
2
+
1
)
...
@@ -597,11 +601,13 @@ class Frontend():
...
@@ -597,11 +601,13 @@ class Frontend():
all_phonemes
=
[]
all_phonemes
=
[]
for
word_pinyin_item
in
ssml_inputs
:
for
word_pinyin_item
in
ssml_inputs
:
phonemes
=
[]
phonemes
=
[]
print
(
"ssml inputs:"
,
word_pinyin_item
)
# ['你喜欢', []] -> 你喜欢 []
sentence
,
pinyin_spec
=
itemgetter
(
0
,
1
)(
word_pinyin_item
)
sentence
,
pinyin_spec
=
itemgetter
(
0
,
1
)(
word_pinyin_item
)
print
(
'ssml g2p:'
,
sentence
,
pinyin_spec
)
# TN & Text Segmentation
# TN & Text Segmentation
sentences
=
self
.
text_normalizer
.
normalize
(
sentence
)
sentences
=
self
.
text_normalizer
.
normalize
(
sentence
)
if
len
(
pinyin_spec
)
==
0
:
if
len
(
pinyin_spec
)
==
0
:
# g2p word w/o specified <say-as>
# g2p word w/o specified <say-as>
phonemes
=
self
.
_g2p
(
phonemes
=
self
.
_g2p
(
...
@@ -635,6 +641,7 @@ class Frontend():
...
@@ -635,6 +641,7 @@ class Frontend():
print
(
"g2p results:"
)
print
(
"g2p results:"
)
print
(
all_phonemes
[
0
])
print
(
all_phonemes
[
0
])
print
(
"----------------------------"
)
print
(
"----------------------------"
)
return
[
sum
(
all_phonemes
,
[])]
return
[
sum
(
all_phonemes
,
[])]
def
add_sp_if_no
(
self
,
phonemes
):
def
add_sp_if_no
(
self
,
phonemes
):
...
@@ -711,10 +718,10 @@ class Frontend():
...
@@ -711,10 +718,10 @@ class Frontend():
to_tensor
:
bool
=
True
)
->
Dict
[
str
,
List
[
paddle
.
Tensor
]]:
to_tensor
:
bool
=
True
)
->
Dict
[
str
,
List
[
paddle
.
Tensor
]]:
# split setence by SSML tag.
# split setence by SSML tag.
l_inpu
ts
=
MixTextProcessor
.
get_pinyin_split
(
sentence
)
tex
ts
=
MixTextProcessor
.
get_pinyin_split
(
sentence
)
phonemes
=
self
.
get_phonemes_ssml
(
phonemes
=
self
.
get_phonemes_ssml
(
l_inpu
ts
,
tex
ts
,
merge_sentences
=
merge_sentences
,
merge_sentences
=
merge_sentences
,
print_info
=
print_info
,
print_info
=
print_info
,
robot
=
robot
)
robot
=
robot
)
...
...
tests/unit/tts/test_ssml.py
0 → 100644
浏览文件 @
9727e67a
from
paddlespeech.t2s.frontend.ssml.xml_processor
import
MixTextProcessor
if
__name__
==
'__main__'
:
text
=
"你好吗,<speak>我们的声学模型使用了 Fast Speech Two。前浪<say-as pinyin='dao3'>倒</say-as>在沙滩上,沙滩上倒了一堆<say-as pinyin='tu3'>土</say-as>。 想象<say-as pinyin='gan1 gan1'>干干</say-as>的树干<say-as pinyin='dao3'>倒</say-as>了, 里面有个干尸,不知是被谁<say-as pinyin='gan4'>干</say-as>死的。</speak>thank you."
# SSML: 13
# 0 ['你好吗,', []]
# 1 ['我们的声学模型使用了FastSpeechTwo。前浪', []]
# 2 ['倒', ['dao3']]
# 3 ['在沙滩上,沙滩上倒了一堆', []]
# 4 ['土', ['tu3']]
# 5 ['。想象', []]
# 6 ['干干', ['gan1', 'gan1']]
# 7 ['的树干', []]
# 8 ['倒', ['dao3']]
# 9 ['了,里面有个干尸,不知是被谁', []]
# 10 ['干', ['gan4']]
# 11 ['死的。', []]
# 12 ['thank you.', []]
inputs
=
MixTextProcessor
.
get_pinyin_split
(
text
)
print
(
f
"SSML get_pinyin_split:
{
len
(
inputs
)
}
"
)
for
i
,
sub
in
enumerate
(
inputs
):
print
(
i
,
sub
)
print
()
# SSML get_dom_split: 13
# 0 你好吗,
# 1 我们的声学模型使用了 Fast Speech Two。前浪
# 2 <say-as pinyin="dao3">倒</say-as>
# 3 在沙滩上,沙滩上倒了一堆
# 4 <say-as pinyin="tu3">土</say-as>
# 5 。 想象
# 6 <say-as pinyin="gan1 gan1">干干</say-as>
# 7 的树干
# 8 <say-as pinyin="dao3">倒</say-as>
# 9 了, 里面有个干尸,不知是被谁
# 10 <say-as pinyin="gan4">干</say-as>
# 11 死的。
# 12 thank you.
inputs
=
MixTextProcessor
.
get_dom_split
(
text
)
print
(
f
"SSML get_dom_split:
{
len
(
inputs
)
}
"
)
for
i
,
sub
in
enumerate
(
inputs
):
print
(
i
,
sub
)
print
()
# SSML object.get_pinyin_split: 246
# <speak>我们的声学模型使用了 Fast Speech Two。前浪<say-as pinyin='dao3'>倒</say-as>在沙滩上,沙滩上倒了一堆<say-as pinyin='tu3'>土</say-as>。 想象<say-as pinyin='gan1 gan1'>干干</say-as>的树干<say-as pinyin='dao3'>倒</say-as>了, 里面有个干尸,不知是被谁<say-as pinyin='gan4'>干</say-as>死的。</speak>
outs
=
MixTextProcessor
().
get_xml_content
(
text
)
print
(
f
"SSML object.get_pinyin_split:
{
len
(
outs
)
}
"
)
print
(
outs
)
print
()
# SSML object.get_content_split: 30 你好吗,
# 1 <speak>我们的声学模型使用了 Fast Speech Two。前浪<say-as pinyin='dao3'>倒</say-as>在沙滩上,沙滩上倒了一堆<say-as pinyin='tu3'>土</say-as>。 想象<say-as pinyin='gan1 gan1'>干干</say-as>的树干<say-as pinyin='dao3'>
# 倒</say-as>了, 里面有个干尸,不知是被谁<say-as pinyin='gan4'>干</say-as>死的。</speak>
# 2 thank you.
outs
=
MixTextProcessor
().
get_content_split
(
text
)
print
(
f
"SSML object.get_content_split:
{
len
(
outs
)
}
"
)
for
i
,
sub
in
enumerate
(
outs
):
print
(
i
,
sub
)
print
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录