Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
9727e67a
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 2 年 前同步成功
通知
210
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
9727e67a
编写于
6月 07, 2023
作者:
H
Hui Zhang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add ssml unit test
上级
4d867700
变更
6
显示空白变更内容
内联
并排
Showing
6 changed file
with
101 addition
and
15 deletion
+101
-15
.pre-commit-config.yaml
.pre-commit-config.yaml
+6
-6
paddlespeech/t2s/exps/synthesize_e2e.py
paddlespeech/t2s/exps/synthesize_e2e.py
+1
-1
paddlespeech/t2s/frontend/canton_frontend.py
paddlespeech/t2s/frontend/canton_frontend.py
+13
-3
paddlespeech/t2s/frontend/ssml/xml_processor.py
paddlespeech/t2s/frontend/ssml/xml_processor.py
+9
-1
paddlespeech/t2s/frontend/zh_frontend.py
paddlespeech/t2s/frontend/zh_frontend.py
+11
-4
tests/unit/tts/test_ssml.py
tests/unit/tts/test_ssml.py
+61
-0
未找到文件。
.pre-commit-config.yaml
浏览文件 @
9727e67a
...
...
@@ -26,12 +26,12 @@ repos:
-
--no-sort-keys
-
--autofix
-
id
:
check-merge-conflict
-
id
:
flake8
aergs
:
-
--ignore=E501,E228,E226,E261,E266,E128,E402,W503
-
--builtins=G,request
-
--jobs=1
exclude
:
(?=runtime/engine/kaldi|audio/paddleaudio/src|third_party).*(\.cpp|\.cc|\.h\.hpp|\.py)$
#
- id: flake8
#
aergs:
#
- --ignore=E501,E228,E226,E261,E266,E128,E402,W503
#
- --builtins=G,request
#
- --jobs=1
#
exclude: (?=runtime/engine/kaldi|audio/paddleaudio/src|third_party).*(\.cpp|\.cc|\.h\.hpp|\.py)$
-
repo
:
https://github.com/Lucas-C/pre-commit-hooks
rev
:
v1.0.1
...
...
paddlespeech/t2s/exps/synthesize_e2e.py
浏览文件 @
9727e67a
...
...
@@ -115,9 +115,9 @@ def evaluate(args):
sentences
=
get_sentences_svs
(
text_file
=
args
.
text
)
else
:
sentences
=
get_sentences
(
text_file
=
args
.
text
,
lang
=
args
.
lang
)
pprint
(
f
"inputs:
{
sentences
}
"
)
for
utt_id
,
sentence
in
sentences
:
print
(
f
"
{
utt_id
}
{
sentence
}
..."
)
with
timer
()
as
t
:
if
am_name
==
"diffsinger"
:
text
=
""
...
...
paddlespeech/t2s/frontend/canton_frontend.py
浏览文件 @
9727e67a
...
...
@@ -29,7 +29,8 @@ INITIALS = [
INITIALS
+=
[
'sp'
,
'spl'
,
'spn'
,
'sil'
]
def
get_lines
(
cantons
:
List
[
str
]):
def
jyuping_to_phonemes
(
cantons
:
List
[
str
]):
# jyuping to inital and final
phones
=
[]
for
canton
in
cantons
:
for
consonant
in
INITIALS
:
...
...
@@ -61,8 +62,11 @@ class CantonFrontend():
merge_sentences
:
bool
=
True
)
->
List
[
List
[
str
]]:
phones_list
=
[]
for
sentence
in
sentences
:
# jyuping
# 'gam3 ngaam1 lou5 sai3 jiu1 kau4 keoi5 dang2 zan6 jiu3 hoi1 wui2, zing6 dai1 ge2 je5 ngo5 wui5 gaau2 dim6 ga3 laa3.'
phones_str
=
ToJyutping
.
get_jyutping_text
(
sentence
)
phones_split
=
get_lines
(
phones_str
.
split
(
' '
))
# phonemes
phones_split
=
jyuping_to_phonemes
(
phones_str
.
split
(
' '
))
phones_list
.
append
(
phones_split
)
return
phones_list
...
...
@@ -78,8 +82,11 @@ class CantonFrontend():
sentence
:
str
,
merge_sentences
:
bool
=
True
,
print_info
:
bool
=
False
)
->
List
[
List
[
str
]]:
# TN & Text Segmentation
sentences
=
self
.
text_normalizer
.
normalize
(
sentence
)
# G2P
phonemes
=
self
.
_g2p
(
sentences
,
merge_sentences
=
merge_sentences
)
if
print_info
:
print
(
"----------------------------"
)
print
(
"text norm results:"
)
...
...
@@ -88,6 +95,7 @@ class CantonFrontend():
print
(
"g2p results:"
)
print
(
phonemes
)
print
(
"----------------------------"
)
return
phonemes
def
get_input_ids
(
self
,
...
...
@@ -98,9 +106,9 @@ class CantonFrontend():
phonemes
=
self
.
get_phonemes
(
sentence
,
merge_sentences
=
merge_sentences
,
print_info
=
print_info
)
result
=
{}
temp_phone_ids
=
[]
for
phones
in
phonemes
:
if
phones
:
phone_ids
=
self
.
_p2id
(
phones
)
...
...
@@ -108,6 +116,8 @@ class CantonFrontend():
if
to_tensor
:
phone_ids
=
paddle
.
to_tensor
(
phone_ids
)
temp_phone_ids
.
append
(
phone_ids
)
if
temp_phone_ids
:
result
[
"phone_ids"
]
=
temp_phone_ids
return
result
paddlespeech/t2s/frontend/ssml/xml_processor.py
浏览文件 @
9727e67a
...
...
@@ -17,7 +17,6 @@ Note: xml 有5种特殊字符, &<>"'
' '
例如:
<TitleName>"姓名"</TitleName>
'''
...
...
@@ -61,14 +60,23 @@ class MixTextProcessor():
patn
=
re
.
compile
(
r
'(.*\s*?)(<speak>.*?</speak>)(.*\s*)$'
,
re
.
M
|
re
.
S
)
mat
=
re
.
match
(
patn
,
mixstr
)
if
mat
:
# pre <speak>
pre_xml
=
mat
.
group
(
1
)
# between <speak> ... </speak>
in_xml
=
mat
.
group
(
2
)
# post </speak>
after_xml
=
mat
.
group
(
3
)
# pre with none syllable
ctlist
.
append
([
pre_xml
,
[]])
# between with syllable
# [(sub sentence, [syllables]), ...]
dom
=
DomXml
(
in_xml
)
pinyinlist
=
dom
.
get_pinyins_for_xml
()
ctlist
=
ctlist
+
pinyinlist
# post with none syllable
ctlist
.
append
([
after_xml
,
[]])
else
:
ctlist
.
append
([
mixstr
,
[]])
...
...
paddlespeech/t2s/frontend/zh_frontend.py
浏览文件 @
9727e67a
...
...
@@ -14,6 +14,7 @@
import
os
import
re
from
operator
import
itemgetter
from
pprint
import
pprint
from
typing
import
Dict
from
typing
import
List
...
...
@@ -41,6 +42,9 @@ INITIALS = [
]
INITIALS
+=
[
'y'
,
'w'
,
'sp'
,
'spl'
,
'spn'
,
'sil'
]
# 0 for None, 5 for neutral
TONES
=
[
"0"
,
"1"
,
"2"
,
"3"
,
"4"
,
"5"
]
def
intersperse
(
lst
,
item
):
result
=
[
item
]
*
(
len
(
lst
)
*
2
+
1
)
...
...
@@ -597,11 +601,13 @@ class Frontend():
all_phonemes
=
[]
for
word_pinyin_item
in
ssml_inputs
:
phonemes
=
[]
print
(
"ssml inputs:"
,
word_pinyin_item
)
# ['你喜欢', []] -> 你喜欢 []
sentence
,
pinyin_spec
=
itemgetter
(
0
,
1
)(
word_pinyin_item
)
print
(
'ssml g2p:'
,
sentence
,
pinyin_spec
)
# TN & Text Segmentation
sentences
=
self
.
text_normalizer
.
normalize
(
sentence
)
if
len
(
pinyin_spec
)
==
0
:
# g2p word w/o specified <say-as>
phonemes
=
self
.
_g2p
(
...
...
@@ -635,6 +641,7 @@ class Frontend():
print
(
"g2p results:"
)
print
(
all_phonemes
[
0
])
print
(
"----------------------------"
)
return
[
sum
(
all_phonemes
,
[])]
def
add_sp_if_no
(
self
,
phonemes
):
...
...
@@ -711,10 +718,10 @@ class Frontend():
to_tensor
:
bool
=
True
)
->
Dict
[
str
,
List
[
paddle
.
Tensor
]]:
# split setence by SSML tag.
l_inpu
ts
=
MixTextProcessor
.
get_pinyin_split
(
sentence
)
tex
ts
=
MixTextProcessor
.
get_pinyin_split
(
sentence
)
phonemes
=
self
.
get_phonemes_ssml
(
l_inpu
ts
,
tex
ts
,
merge_sentences
=
merge_sentences
,
print_info
=
print_info
,
robot
=
robot
)
...
...
tests/unit/tts/test_ssml.py
0 → 100644
浏览文件 @
9727e67a
from
paddlespeech.t2s.frontend.ssml.xml_processor
import
MixTextProcessor
if
__name__
==
'__main__'
:
text
=
"你好吗,<speak>我们的声学模型使用了 Fast Speech Two。前浪<say-as pinyin='dao3'>倒</say-as>在沙滩上,沙滩上倒了一堆<say-as pinyin='tu3'>土</say-as>。 想象<say-as pinyin='gan1 gan1'>干干</say-as>的树干<say-as pinyin='dao3'>倒</say-as>了, 里面有个干尸,不知是被谁<say-as pinyin='gan4'>干</say-as>死的。</speak>thank you."
# SSML: 13
# 0 ['你好吗,', []]
# 1 ['我们的声学模型使用了FastSpeechTwo。前浪', []]
# 2 ['倒', ['dao3']]
# 3 ['在沙滩上,沙滩上倒了一堆', []]
# 4 ['土', ['tu3']]
# 5 ['。想象', []]
# 6 ['干干', ['gan1', 'gan1']]
# 7 ['的树干', []]
# 8 ['倒', ['dao3']]
# 9 ['了,里面有个干尸,不知是被谁', []]
# 10 ['干', ['gan4']]
# 11 ['死的。', []]
# 12 ['thank you.', []]
inputs
=
MixTextProcessor
.
get_pinyin_split
(
text
)
print
(
f
"SSML get_pinyin_split:
{
len
(
inputs
)
}
"
)
for
i
,
sub
in
enumerate
(
inputs
):
print
(
i
,
sub
)
print
()
# SSML get_dom_split: 13
# 0 你好吗,
# 1 我们的声学模型使用了 Fast Speech Two。前浪
# 2 <say-as pinyin="dao3">倒</say-as>
# 3 在沙滩上,沙滩上倒了一堆
# 4 <say-as pinyin="tu3">土</say-as>
# 5 。 想象
# 6 <say-as pinyin="gan1 gan1">干干</say-as>
# 7 的树干
# 8 <say-as pinyin="dao3">倒</say-as>
# 9 了, 里面有个干尸,不知是被谁
# 10 <say-as pinyin="gan4">干</say-as>
# 11 死的。
# 12 thank you.
inputs
=
MixTextProcessor
.
get_dom_split
(
text
)
print
(
f
"SSML get_dom_split:
{
len
(
inputs
)
}
"
)
for
i
,
sub
in
enumerate
(
inputs
):
print
(
i
,
sub
)
print
()
# SSML object.get_pinyin_split: 246
# <speak>我们的声学模型使用了 Fast Speech Two。前浪<say-as pinyin='dao3'>倒</say-as>在沙滩上,沙滩上倒了一堆<say-as pinyin='tu3'>土</say-as>。 想象<say-as pinyin='gan1 gan1'>干干</say-as>的树干<say-as pinyin='dao3'>倒</say-as>了, 里面有个干尸,不知是被谁<say-as pinyin='gan4'>干</say-as>死的。</speak>
outs
=
MixTextProcessor
().
get_xml_content
(
text
)
print
(
f
"SSML object.get_pinyin_split:
{
len
(
outs
)
}
"
)
print
(
outs
)
print
()
# SSML object.get_content_split: 30 你好吗,
# 1 <speak>我们的声学模型使用了 Fast Speech Two。前浪<say-as pinyin='dao3'>倒</say-as>在沙滩上,沙滩上倒了一堆<say-as pinyin='tu3'>土</say-as>。 想象<say-as pinyin='gan1 gan1'>干干</say-as>的树干<say-as pinyin='dao3'>
# 倒</say-as>了, 里面有个干尸,不知是被谁<say-as pinyin='gan4'>干</say-as>死的。</speak>
# 2 thank you.
outs
=
MixTextProcessor
().
get_content_split
(
text
)
print
(
f
"SSML object.get_content_split:
{
len
(
outs
)
}
"
)
for
i
,
sub
in
enumerate
(
outs
):
print
(
i
,
sub
)
print
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录