Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
742523fb
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 2 年 前同步成功
通知
210
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
742523fb
编写于
1月 13, 2023
作者:
晋
晋东毅
提交者:
GitHub
1月 13, 2023
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[tts]For mixed Chinese and English speech synthesis, add SSML support for Chinese (#2830)
* 添加.history * [tts]添加中英混合语音合成时对中文SSML的支持
上级
a99244d8
变更
3
显示空白变更内容
内联
并排
Showing
3 changed file
with
86 addition
and
6 deletion
+86
-6
.gitignore
.gitignore
+1
-0
paddlespeech/t2s/frontend/mix_frontend.py
paddlespeech/t2s/frontend/mix_frontend.py
+51
-6
paddlespeech/t2s/ssml/xml_processor.py
paddlespeech/t2s/ssml/xml_processor.py
+34
-0
未找到文件。
.gitignore
浏览文件 @
742523fb
...
@@ -15,6 +15,7 @@
...
@@ -15,6 +15,7 @@
*.egg-info
*.egg-info
build
build
*output/
*output/
.history
audio/dist/
audio/dist/
audio/fc_patch/
audio/fc_patch/
...
...
paddlespeech/t2s/frontend/mix_frontend.py
浏览文件 @
742523fb
...
@@ -11,6 +11,7 @@
...
@@ -11,6 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
import
re
from
typing
import
Dict
from
typing
import
Dict
from
typing
import
List
from
typing
import
List
...
@@ -18,6 +19,7 @@ import paddle
...
@@ -18,6 +19,7 @@ import paddle
from
paddlespeech.t2s.frontend
import
English
from
paddlespeech.t2s.frontend
import
English
from
paddlespeech.t2s.frontend.zh_frontend
import
Frontend
from
paddlespeech.t2s.frontend.zh_frontend
import
Frontend
from
paddlespeech.t2s.ssml.xml_processor
import
MixTextProcessor
class
MixFrontend
():
class
MixFrontend
():
...
@@ -107,7 +109,40 @@ class MixFrontend():
...
@@ -107,7 +109,40 @@ class MixFrontend():
add_sp
:
bool
=
True
,
add_sp
:
bool
=
True
,
to_tensor
:
bool
=
True
)
->
Dict
[
str
,
List
[
paddle
.
Tensor
]]:
to_tensor
:
bool
=
True
)
->
Dict
[
str
,
List
[
paddle
.
Tensor
]]:
segments
=
self
.
get_segment
(
sentence
)
''' 1. 添加SSML支持,先列出 文字 和 <say-as>标签内容,
然后添加到tmpSegments数组里
'''
d_inputs
=
MixTextProcessor
.
get_dom_split
(
sentence
)
tmpSegments
=
[]
for
instr
in
d_inputs
:
''' 暂时只支持 say-as '''
if
instr
.
lower
().
startswith
(
"<say-as"
):
tmpSegments
.
append
((
instr
,
"zh"
))
else
:
tmpSegments
.
extend
(
self
.
get_segment
(
instr
))
''' 2. 把zh的merge到一起,避免合成结果中间停顿
'''
segments
=
[]
currentSeg
=
[
""
,
""
]
for
seg
in
tmpSegments
:
if
seg
[
1
]
==
"en"
or
seg
[
1
]
==
"other"
:
if
currentSeg
[
0
]
==
''
:
segments
.
append
(
seg
)
else
:
currentSeg
[
0
]
=
"<speak>"
+
currentSeg
[
0
]
+
"</speak>"
segments
.
append
(
tuple
(
currentSeg
))
segments
.
append
(
seg
)
currentSeg
=
[
""
,
""
]
else
:
if
currentSeg
[
0
]
==
''
:
currentSeg
[
0
]
=
seg
[
0
]
currentSeg
[
1
]
=
seg
[
1
]
else
:
currentSeg
[
0
]
=
currentSeg
[
0
]
+
seg
[
0
]
if
currentSeg
[
0
]
!=
''
:
currentSeg
[
0
]
=
"<speak>"
+
currentSeg
[
0
]
+
"</speak>"
segments
.
append
(
tuple
(
currentSeg
))
phones_list
=
[]
phones_list
=
[]
result
=
{}
result
=
{}
...
@@ -119,6 +154,16 @@ class MixFrontend():
...
@@ -119,6 +154,16 @@ class MixFrontend():
if
lang
==
"en"
:
if
lang
==
"en"
:
input_ids
=
self
.
en_frontend
.
get_input_ids
(
input_ids
=
self
.
en_frontend
.
get_input_ids
(
content
,
merge_sentences
=
False
,
to_tensor
=
to_tensor
)
content
,
merge_sentences
=
False
,
to_tensor
=
to_tensor
)
else
:
''' 3. 把带speak tag的中文和普通文字分开处理
'''
if
content
.
strip
()
!=
""
and
\
re
.
match
(
r
".*?<speak>.*?</speak>.*"
,
content
,
re
.
DOTALL
):
input_ids
=
self
.
zh_frontend
.
get_input_ids_ssml
(
content
,
merge_sentences
=
False
,
get_tone_ids
=
get_tone_ids
,
to_tensor
=
to_tensor
)
else
:
else
:
input_ids
=
self
.
zh_frontend
.
get_input_ids
(
input_ids
=
self
.
zh_frontend
.
get_input_ids
(
content
,
content
,
...
...
paddlespeech/t2s/ssml/xml_processor.py
浏览文件 @
742523fb
...
@@ -74,6 +74,28 @@ class MixTextProcessor():
...
@@ -74,6 +74,28 @@ class MixTextProcessor():
ctlist
.
append
([
mixstr
,
[]])
ctlist
.
append
([
mixstr
,
[]])
return
ctlist
return
ctlist
@
classmethod
def
get_dom_split
(
self
,
mixstr
):
''' 文本分解,顺序加了列表中,返回文本和say-as标签
'''
ctlist
=
[]
patn
=
re
.
compile
(
r
'(.*\s*?)(<speak>.*?</speak>)(.*\s*)$'
,
re
.
M
|
re
.
S
)
mat
=
re
.
match
(
patn
,
mixstr
)
if
mat
:
pre_xml
=
mat
.
group
(
1
)
in_xml
=
mat
.
group
(
2
)
after_xml
=
mat
.
group
(
3
)
ctlist
.
append
(
pre_xml
)
dom
=
DomXml
(
in_xml
)
tags
=
dom
.
get_text_and_sayas_tags
()
ctlist
.
extend
(
tags
)
ctlist
.
append
(
after_xml
)
return
ctlist
else
:
ctlist
.
append
(
mixstr
)
return
ctlist
class
DomXml
():
class
DomXml
():
def
__init__
(
self
,
xmlstr
):
def
__init__
(
self
,
xmlstr
):
...
@@ -156,3 +178,15 @@ class DomXml():
...
@@ -156,3 +178,15 @@ class DomXml():
if
x
.
hasAttribute
(
'pinyin'
):
# pinyin
if
x
.
hasAttribute
(
'pinyin'
):
# pinyin
print
(
x
.
tagName
,
'pinyin'
,
print
(
x
.
tagName
,
'pinyin'
,
x
.
getAttribute
(
'pinyin'
),
x
.
firstChild
.
data
)
x
.
getAttribute
(
'pinyin'
),
x
.
firstChild
.
data
)
def
get_text_and_sayas_tags
(
self
):
'''返回 xml 内容的列表,包括所有文本内容和<say-as> tag'''
res
=
[]
for
x1
in
self
.
rnode
:
if
x1
.
nodeType
==
Node
.
TEXT_NODE
:
res
.
append
(
x1
.
value
)
else
:
for
x2
in
x1
.
childNodes
:
res
.
append
(
x2
.
toxml
())
return
res
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录