Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
004ab8d0
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 1 年 前同步成功
通知
207
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
004ab8d0
编写于
5月 31, 2022
作者:
小湉湉
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
reneame chunk to block in streaming tts, test=tts
上级
0fa32e4a
变更
4
显示空白变更内容
内联
并排
Showing
4 changed file
with
16 addition
and
16 deletion
+16
-16
paddlespeech/t2s/exps/inference_streaming.py
paddlespeech/t2s/exps/inference_streaming.py
+4
-4
paddlespeech/t2s/exps/ort_predict_streaming.py
paddlespeech/t2s/exps/ort_predict_streaming.py
+4
-4
paddlespeech/t2s/exps/syn_utils.py
paddlespeech/t2s/exps/syn_utils.py
+4
-4
paddlespeech/t2s/exps/synthesize_streaming.py
paddlespeech/t2s/exps/synthesize_streaming.py
+4
-4
未找到文件。
paddlespeech/t2s/exps/inference_streaming.py
浏览文件 @
004ab8d0
...
...
@@ -90,7 +90,7 @@ def parse_args():
default
=
False
,
help
=
"whether use streaming acoustic model"
)
parser
.
add_argument
(
"--
chunk_size"
,
type
=
int
,
default
=
42
,
help
=
"chun
k size of am streaming"
)
"--
block_size"
,
type
=
int
,
default
=
42
,
help
=
"bloc
k size of am streaming"
)
parser
.
add_argument
(
"--pad_size"
,
type
=
int
,
default
=
12
,
help
=
"pad size of am streaming"
)
...
...
@@ -169,7 +169,7 @@ def main():
N
=
0
T
=
0
chunk_size
=
args
.
chun
k_size
block_size
=
args
.
bloc
k_size
pad_size
=
args
.
pad_size
get_tone_ids
=
False
for
utt_id
,
sentence
in
sentences
:
...
...
@@ -189,7 +189,7 @@ def main():
am_encoder_infer_predictor
,
input
=
phones
)
if
args
.
am_streaming
:
hss
=
get_chunks
(
orig_hs
,
chun
k_size
,
pad_size
)
hss
=
get_chunks
(
orig_hs
,
bloc
k_size
,
pad_size
)
chunk_num
=
len
(
hss
)
mel_list
=
[]
for
i
,
hs
in
enumerate
(
hss
):
...
...
@@ -211,7 +211,7 @@ def main():
sub_mel
=
sub_mel
[
pad_size
:]
else
:
# 倒数几块的右侧也可能没有 pad 够
sub_mel
=
sub_mel
[
pad_size
:(
chun
k_size
+
pad_size
)
-
sub_mel
=
sub_mel
[
pad_size
:(
bloc
k_size
+
pad_size
)
-
sub_mel
.
shape
[
0
]]
mel_list
.
append
(
sub_mel
)
mel
=
np
.
concatenate
(
mel_list
,
axis
=
0
)
...
...
paddlespeech/t2s/exps/ort_predict_streaming.py
浏览文件 @
004ab8d0
...
...
@@ -97,7 +97,7 @@ def ort_predict(args):
T
=
0
merge_sentences
=
True
get_tone_ids
=
False
chunk_size
=
args
.
chun
k_size
block_size
=
args
.
bloc
k_size
pad_size
=
args
.
pad_size
for
utt_id
,
sentence
in
sentences
:
...
...
@@ -115,7 +115,7 @@ def ort_predict(args):
orig_hs
=
am_encoder_infer_sess
.
run
(
None
,
input_feed
=
{
'text'
:
phone_ids
})
if
args
.
am_streaming
:
hss
=
get_chunks
(
orig_hs
[
0
],
chun
k_size
,
pad_size
)
hss
=
get_chunks
(
orig_hs
[
0
],
bloc
k_size
,
pad_size
)
chunk_num
=
len
(
hss
)
mel_list
=
[]
for
i
,
hs
in
enumerate
(
hss
):
...
...
@@ -139,7 +139,7 @@ def ort_predict(args):
sub_mel
=
sub_mel
[
pad_size
:]
else
:
# 倒数几块的右侧也可能没有 pad 够
sub_mel
=
sub_mel
[
pad_size
:(
chun
k_size
+
pad_size
)
-
sub_mel
=
sub_mel
[
pad_size
:(
bloc
k_size
+
pad_size
)
-
sub_mel
.
shape
[
0
]]
mel_list
.
append
(
sub_mel
)
mel
=
np
.
concatenate
(
mel_list
,
axis
=
0
)
...
...
@@ -236,7 +236,7 @@ def parse_args():
default
=
False
,
help
=
"whether use streaming acoustic model"
)
parser
.
add_argument
(
"--
chunk_size"
,
type
=
int
,
default
=
42
,
help
=
"chun
k size of am streaming"
)
"--
block_size"
,
type
=
int
,
default
=
42
,
help
=
"bloc
k size of am streaming"
)
parser
.
add_argument
(
"--pad_size"
,
type
=
int
,
default
=
12
,
help
=
"pad size of am streaming"
)
...
...
paddlespeech/t2s/exps/syn_utils.py
浏览文件 @
004ab8d0
...
...
@@ -75,13 +75,13 @@ def denorm(data, mean, std):
return
data
*
std
+
mean
def
get_chunks
(
data
,
chun
k_size
:
int
,
pad_size
:
int
):
def
get_chunks
(
data
,
bloc
k_size
:
int
,
pad_size
:
int
):
data_len
=
data
.
shape
[
1
]
chunks
=
[]
n
=
math
.
ceil
(
data_len
/
chun
k_size
)
n
=
math
.
ceil
(
data_len
/
bloc
k_size
)
for
i
in
range
(
n
):
start
=
max
(
0
,
i
*
chun
k_size
-
pad_size
)
end
=
min
((
i
+
1
)
*
chun
k_size
+
pad_size
,
data_len
)
start
=
max
(
0
,
i
*
bloc
k_size
-
pad_size
)
end
=
min
((
i
+
1
)
*
bloc
k_size
+
pad_size
,
data_len
)
chunks
.
append
(
data
[:,
start
:
end
,
:])
return
chunks
...
...
paddlespeech/t2s/exps/synthesize_streaming.py
浏览文件 @
004ab8d0
...
...
@@ -133,7 +133,7 @@ def evaluate(args):
N
=
0
T
=
0
chunk_size
=
args
.
chun
k_size
block_size
=
args
.
bloc
k_size
pad_size
=
args
.
pad_size
for
utt_id
,
sentence
in
sentences
:
...
...
@@ -153,7 +153,7 @@ def evaluate(args):
# acoustic model
orig_hs
=
am_encoder_infer
(
phone_ids
)
if
args
.
am_streaming
:
hss
=
get_chunks
(
orig_hs
,
chun
k_size
,
pad_size
)
hss
=
get_chunks
(
orig_hs
,
bloc
k_size
,
pad_size
)
chunk_num
=
len
(
hss
)
mel_list
=
[]
for
i
,
hs
in
enumerate
(
hss
):
...
...
@@ -171,7 +171,7 @@ def evaluate(args):
sub_mel
=
sub_mel
[
pad_size
:]
else
:
# 倒数几块的右侧也可能没有 pad 够
sub_mel
=
sub_mel
[
pad_size
:(
chun
k_size
+
pad_size
)
-
sub_mel
=
sub_mel
[
pad_size
:(
bloc
k_size
+
pad_size
)
-
sub_mel
.
shape
[
0
]]
mel_list
.
append
(
sub_mel
)
mel
=
paddle
.
concat
(
mel_list
,
axis
=
0
)
...
...
@@ -277,7 +277,7 @@ def parse_args():
default
=
False
,
help
=
"whether use streaming acoustic model"
)
parser
.
add_argument
(
"--
chunk_size"
,
type
=
int
,
default
=
42
,
help
=
"chun
k size of am streaming"
)
"--
block_size"
,
type
=
int
,
default
=
42
,
help
=
"bloc
k size of am streaming"
)
parser
.
add_argument
(
"--pad_size"
,
type
=
int
,
default
=
12
,
help
=
"pad size of am streaming"
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录