Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
80b18021
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 1 年 前同步成功
通知
206
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
未验证
提交
80b18021
编写于
9月 14, 2022
作者:
小湉湉
提交者:
GitHub
9月 14, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[TTS] fix some bugs of ERNIE-SAT (#2378)
* fix ernie_sat, test=tts * fix for comments, test=tts
上级
ec571bb0
变更
5
显示空白变更内容
内联
并排
Showing
5 changed file
with
27 addition
and
23 deletion
+27
-23
examples/aishell3/ernie_sat/local/synthesize_e2e.sh
examples/aishell3/ernie_sat/local/synthesize_e2e.sh
+3
-3
examples/aishell3_vctk/ernie_sat/local/synthesize_e2e.sh
examples/aishell3_vctk/ernie_sat/local/synthesize_e2e.sh
+3
-3
examples/vctk/ernie_sat/local/synthesize_e2e.sh
examples/vctk/ernie_sat/local/synthesize_e2e.sh
+3
-3
paddlespeech/t2s/exps/ernie_sat/align.py
paddlespeech/t2s/exps/ernie_sat/align.py
+2
-2
paddlespeech/t2s/exps/ernie_sat/synthesize_e2e.py
paddlespeech/t2s/exps/ernie_sat/synthesize_e2e.py
+16
-12
未找到文件。
examples/aishell3/ernie_sat/local/synthesize_e2e.sh
浏览文件 @
80b18021
...
...
@@ -13,9 +13,9 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
FLAGS_fraction_of_gpu_memory_to_use
=
0.01
\
python3
${
BIN_DIR
}
/synthesize_e2e.py
\
--task_name
=
synthesize
\
--wav_path
=
source
/SSB03540307.wav
\
--old_str
=
'请播放歌曲小苹果
。
'
\
--new_str
=
'歌曲真好听
。
'
\
--wav_path
=
source
/SSB03540307.wav
\
--old_str
=
'请播放歌曲小苹果'
\
--new_str
=
'歌曲真好听'
\
--source_lang
=
zh
\
--target_lang
=
zh
\
--erniesat_config
=
${
config_path
}
\
...
...
examples/aishell3_vctk/ernie_sat/local/synthesize_e2e.sh
浏览文件 @
80b18021
...
...
@@ -15,7 +15,7 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
python3
${
BIN_DIR
}
/synthesize_e2e.py
\
--task_name
=
synthesize
\
--wav_path
=
source
/p243_313.wav
\
--old_str
=
'For that reason cover should not be given
.
'
\
--old_str
=
'For that reason cover should not be given'
\
--new_str
=
'今天天气很好'
\
--source_lang
=
en
\
--target_lang
=
zh
\
...
...
@@ -36,8 +36,8 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
python3
${
BIN_DIR
}
/synthesize_e2e.py
\
--task_name
=
synthesize
\
--wav_path
=
source
/SSB03540307.wav
\
--old_str
=
'请播放歌曲小苹果
。
'
\
--new_str
=
"Thank you
!
"
\
--old_str
=
'请播放歌曲小苹果'
\
--new_str
=
"Thank you"
\
--source_lang
=
zh
\
--target_lang
=
en
\
--erniesat_config
=
${
config_path
}
\
...
...
examples/vctk/ernie_sat/local/synthesize_e2e.sh
浏览文件 @
80b18021
...
...
@@ -14,7 +14,7 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
python3
${
BIN_DIR
}
/synthesize_e2e.py
\
--task_name
=
synthesize
\
--wav_path
=
source
/p243_313.wav
\
--old_str
=
'For that reason cover should not be given
.
'
\
--old_str
=
'For that reason cover should not be given'
\
--new_str
=
'I love you very much do you love me'
\
--source_lang
=
en
\
--target_lang
=
en
\
...
...
@@ -36,8 +36,8 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
python3
${
BIN_DIR
}
/synthesize_e2e.py
\
--task_name
=
edit
\
--wav_path
=
source
/p243_313.wav
\
--old_str
=
'For that reason cover should not be given
.
'
\
--new_str
=
'For that reason cover is not impossible to be given
.
'
\
--old_str
=
'For that reason cover should not be given'
\
--new_str
=
'For that reason cover is not impossible to be given'
\
--source_lang
=
en
\
--target_lang
=
en
\
--erniesat_config
=
${
config_path
}
\
...
...
paddlespeech/t2s/exps/ernie_sat/align.py
浏览文件 @
80b18021
...
...
@@ -58,7 +58,7 @@ def _readtg(tg_path: str, lang: str='en', fs: int=24000, n_shift: int=300):
durations
[
-
2
]
+=
durations
[
-
1
]
durations
=
durations
[:
-
1
]
# replace ' and 'sil' with 'sp'
# replace '
'
and 'sil' with 'sp'
phones
=
[
'sp'
if
(
phn
==
''
or
phn
==
'sil'
)
else
phn
for
phn
in
phones
]
if
lang
==
'en'
:
...
...
@@ -195,7 +195,7 @@ def words2phns(text: str, lang='en'):
wrd
=
wrd
.
upper
()
if
(
wrd
not
in
ds
):
wrd2phns
[
str
(
index
)
+
'_'
+
wrd
]
=
'spn'
phns
.
extend
(
'spn'
)
phns
.
extend
(
[
'spn'
]
)
else
:
wrd2phns
[
str
(
index
)
+
'_'
+
wrd
]
=
word2phns_dict
[
wrd
].
split
()
phns
.
extend
(
word2phns_dict
[
wrd
].
split
())
...
...
paddlespeech/t2s/exps/ernie_sat/synthesize_e2e.py
浏览文件 @
80b18021
...
...
@@ -137,9 +137,6 @@ def prep_feats_with_dur(wav_path: str,
new_wav
=
np
.
concatenate
(
[
wav_org
[:
wav_left_idx
],
blank_wav
,
wav_org
[
wav_right_idx
:]])
# 音频是正常遮住了
sf
.
write
(
str
(
"mask_wav.wav"
),
new_wav
,
samplerate
=
fs
)
# 4. get old and new mel span to be mask
old_span_bdy
=
get_span_bdy
(
mfa_start
=
mfa_start
,
mfa_end
=
mfa_end
,
span_to_repl
=
span_to_repl
)
...
...
@@ -274,7 +271,8 @@ def get_wav(wav_path: str,
new_str
:
str
=
''
,
duration_adjust
:
bool
=
True
,
fs
:
int
=
24000
,
n_shift
:
int
=
300
):
n_shift
:
int
=
300
,
task_name
:
str
=
'synthesize'
):
outs
=
get_mlm_output
(
wav_path
=
wav_path
,
...
...
@@ -298,9 +296,11 @@ def get_wav(wav_path: str,
alt_wav
=
np
.
squeeze
(
alt_wav
)
old_time_bdy
=
[
n_shift
*
x
for
x
in
old_span_bdy
]
if
task_name
==
'edit'
:
wav_replaced
=
np
.
concatenate
(
[
wav_org
[:
old_time_bdy
[
0
]],
alt_wav
,
wav_org
[
old_time_bdy
[
1
]:]])
else
:
wav_replaced
=
alt_wav
wav_dict
=
{
"origin"
:
wav_org
,
"output"
:
wav_replaced
}
return
wav_dict
...
...
@@ -356,7 +356,11 @@ def parse_args():
"--ngpu"
,
type
=
int
,
default
=
1
,
help
=
"if ngpu == 0, use cpu."
)
# ernie sat related
parser
.
add_argument
(
"--task_name"
,
type
=
str
,
help
=
"task name"
)
parser
.
add_argument
(
"--task_name"
,
type
=
str
,
choices
=
[
'edit'
,
'synthesize'
],
help
=
"task name."
)
parser
.
add_argument
(
"--wav_path"
,
type
=
str
,
help
=
"path of old wav"
)
parser
.
add_argument
(
"--old_str"
,
type
=
str
,
help
=
"old string"
)
parser
.
add_argument
(
"--new_str"
,
type
=
str
,
help
=
"new string"
)
...
...
@@ -410,10 +414,9 @@ if __name__ == '__main__':
if
args
.
task_name
==
'edit'
:
new_str
=
new_str
elif
args
.
task_name
==
'synthesize'
:
new_str
=
old_str
+
new_str
new_str
=
old_str
+
' '
+
new_str
else
:
new_str
=
old_str
+
new_str
print
(
"new_str:"
,
new_str
)
new_str
=
old_str
+
' '
+
new_str
# Extractor
mel_extractor
=
LogMelFBank
(
...
...
@@ -467,7 +470,8 @@ if __name__ == '__main__':
new_str
=
new_str
,
duration_adjust
=
args
.
duration_adjust
,
fs
=
erniesat_config
.
fs
,
n_shift
=
erniesat_config
.
n_shift
)
n_shift
=
erniesat_config
.
n_shift
,
task_name
=
args
.
task_name
)
sf
.
write
(
args
.
output_name
,
wav_dict
[
'output'
],
samplerate
=
erniesat_config
.
fs
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录