Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
8b1c1ec4
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
10 个月 前同步成功
通知
200
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
未验证
提交
8b1c1ec4
编写于
5月 31, 2022
作者:
L
liangym
提交者:
GitHub
5月 31, 2022
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'PaddlePaddle:develop' into update_engine
上级
4a11257d
1a6df85f
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
27 addition
and
27 deletion
+27
-27
paddlespeech/resource/resource.py
paddlespeech/resource/resource.py
+11
-11
paddlespeech/t2s/exps/inference_streaming.py
paddlespeech/t2s/exps/inference_streaming.py
+4
-4
paddlespeech/t2s/exps/ort_predict_streaming.py
paddlespeech/t2s/exps/ort_predict_streaming.py
+4
-4
paddlespeech/t2s/exps/syn_utils.py
paddlespeech/t2s/exps/syn_utils.py
+4
-4
paddlespeech/t2s/exps/synthesize_streaming.py
paddlespeech/t2s/exps/synthesize_streaming.py
+4
-4
未找到文件。
paddlespeech/resource/resource.py
浏览文件 @
8b1c1ec4
...
...
@@ -82,7 +82,7 @@ class CommonTaskResource:
self
.
model_tag
=
model_tag
self
.
version
=
version
self
.
res_dict
=
self
.
pretrained_models
[
model_tag
][
version
]
self
.
format_path
(
self
.
res_dict
)
self
.
_
format_path
(
self
.
res_dict
)
self
.
res_dir
=
self
.
_fetch
(
self
.
res_dict
,
self
.
_get_model_dir
(
model_type
))
else
:
...
...
@@ -90,19 +90,10 @@ class CommonTaskResource:
self
.
voc_model_tag
=
model_tag
self
.
voc_version
=
version
self
.
voc_res_dict
=
self
.
pretrained_models
[
model_tag
][
version
]
self
.
format_path
(
self
.
voc_res_dict
)
self
.
_
format_path
(
self
.
voc_res_dict
)
self
.
voc_res_dir
=
self
.
_fetch
(
self
.
voc_res_dict
,
self
.
_get_model_dir
(
model_type
))
@
staticmethod
def
format_path
(
res_dict
:
Dict
[
str
,
str
]):
for
k
,
v
in
res_dict
.
items
():
if
'/'
in
v
:
if
v
.
startswith
(
'https://'
)
or
v
.
startswith
(
'http://'
):
continue
else
:
res_dict
[
k
]
=
os
.
path
.
join
(
*
(
v
.
split
(
'/'
)))
@
staticmethod
def
get_model_class
(
model_name
)
->
List
[
object
]:
"""Dynamic import model class.
...
...
@@ -231,3 +222,12 @@ class CommonTaskResource:
os.PathLike: Directory of model resource.
"""
return
download_and_decompress
(
res_dict
,
target_dir
)
@
staticmethod
def
_format_path
(
res_dict
:
Dict
[
str
,
str
]):
for
k
,
v
in
res_dict
.
items
():
if
isinstance
(
v
,
str
)
and
'/'
in
v
:
if
v
.
startswith
(
'https://'
)
or
v
.
startswith
(
'http://'
):
continue
else
:
res_dict
[
k
]
=
os
.
path
.
join
(
*
(
v
.
split
(
'/'
)))
paddlespeech/t2s/exps/inference_streaming.py
浏览文件 @
8b1c1ec4
...
...
@@ -90,7 +90,7 @@ def parse_args():
default
=
False
,
help
=
"whether use streaming acoustic model"
)
parser
.
add_argument
(
"--
chunk_size"
,
type
=
int
,
default
=
42
,
help
=
"chun
k size of am streaming"
)
"--
block_size"
,
type
=
int
,
default
=
42
,
help
=
"bloc
k size of am streaming"
)
parser
.
add_argument
(
"--pad_size"
,
type
=
int
,
default
=
12
,
help
=
"pad size of am streaming"
)
...
...
@@ -169,7 +169,7 @@ def main():
N
=
0
T
=
0
chunk_size
=
args
.
chun
k_size
block_size
=
args
.
bloc
k_size
pad_size
=
args
.
pad_size
get_tone_ids
=
False
for
utt_id
,
sentence
in
sentences
:
...
...
@@ -189,7 +189,7 @@ def main():
am_encoder_infer_predictor
,
input
=
phones
)
if
args
.
am_streaming
:
hss
=
get_chunks
(
orig_hs
,
chun
k_size
,
pad_size
)
hss
=
get_chunks
(
orig_hs
,
bloc
k_size
,
pad_size
)
chunk_num
=
len
(
hss
)
mel_list
=
[]
for
i
,
hs
in
enumerate
(
hss
):
...
...
@@ -211,7 +211,7 @@ def main():
sub_mel
=
sub_mel
[
pad_size
:]
else
:
# 倒数几块的右侧也可能没有 pad 够
sub_mel
=
sub_mel
[
pad_size
:(
chun
k_size
+
pad_size
)
-
sub_mel
=
sub_mel
[
pad_size
:(
bloc
k_size
+
pad_size
)
-
sub_mel
.
shape
[
0
]]
mel_list
.
append
(
sub_mel
)
mel
=
np
.
concatenate
(
mel_list
,
axis
=
0
)
...
...
paddlespeech/t2s/exps/ort_predict_streaming.py
浏览文件 @
8b1c1ec4
...
...
@@ -97,7 +97,7 @@ def ort_predict(args):
T
=
0
merge_sentences
=
True
get_tone_ids
=
False
chunk_size
=
args
.
chun
k_size
block_size
=
args
.
bloc
k_size
pad_size
=
args
.
pad_size
for
utt_id
,
sentence
in
sentences
:
...
...
@@ -115,7 +115,7 @@ def ort_predict(args):
orig_hs
=
am_encoder_infer_sess
.
run
(
None
,
input_feed
=
{
'text'
:
phone_ids
})
if
args
.
am_streaming
:
hss
=
get_chunks
(
orig_hs
[
0
],
chun
k_size
,
pad_size
)
hss
=
get_chunks
(
orig_hs
[
0
],
bloc
k_size
,
pad_size
)
chunk_num
=
len
(
hss
)
mel_list
=
[]
for
i
,
hs
in
enumerate
(
hss
):
...
...
@@ -139,7 +139,7 @@ def ort_predict(args):
sub_mel
=
sub_mel
[
pad_size
:]
else
:
# 倒数几块的右侧也可能没有 pad 够
sub_mel
=
sub_mel
[
pad_size
:(
chun
k_size
+
pad_size
)
-
sub_mel
=
sub_mel
[
pad_size
:(
bloc
k_size
+
pad_size
)
-
sub_mel
.
shape
[
0
]]
mel_list
.
append
(
sub_mel
)
mel
=
np
.
concatenate
(
mel_list
,
axis
=
0
)
...
...
@@ -236,7 +236,7 @@ def parse_args():
default
=
False
,
help
=
"whether use streaming acoustic model"
)
parser
.
add_argument
(
"--
chunk_size"
,
type
=
int
,
default
=
42
,
help
=
"chun
k size of am streaming"
)
"--
block_size"
,
type
=
int
,
default
=
42
,
help
=
"bloc
k size of am streaming"
)
parser
.
add_argument
(
"--pad_size"
,
type
=
int
,
default
=
12
,
help
=
"pad size of am streaming"
)
...
...
paddlespeech/t2s/exps/syn_utils.py
浏览文件 @
8b1c1ec4
...
...
@@ -75,13 +75,13 @@ def denorm(data, mean, std):
return
data
*
std
+
mean
def
get_chunks
(
data
,
chun
k_size
:
int
,
pad_size
:
int
):
def
get_chunks
(
data
,
bloc
k_size
:
int
,
pad_size
:
int
):
data_len
=
data
.
shape
[
1
]
chunks
=
[]
n
=
math
.
ceil
(
data_len
/
chun
k_size
)
n
=
math
.
ceil
(
data_len
/
bloc
k_size
)
for
i
in
range
(
n
):
start
=
max
(
0
,
i
*
chun
k_size
-
pad_size
)
end
=
min
((
i
+
1
)
*
chun
k_size
+
pad_size
,
data_len
)
start
=
max
(
0
,
i
*
bloc
k_size
-
pad_size
)
end
=
min
((
i
+
1
)
*
bloc
k_size
+
pad_size
,
data_len
)
chunks
.
append
(
data
[:,
start
:
end
,
:])
return
chunks
...
...
paddlespeech/t2s/exps/synthesize_streaming.py
浏览文件 @
8b1c1ec4
...
...
@@ -133,7 +133,7 @@ def evaluate(args):
N
=
0
T
=
0
chunk_size
=
args
.
chun
k_size
block_size
=
args
.
bloc
k_size
pad_size
=
args
.
pad_size
for
utt_id
,
sentence
in
sentences
:
...
...
@@ -153,7 +153,7 @@ def evaluate(args):
# acoustic model
orig_hs
=
am_encoder_infer
(
phone_ids
)
if
args
.
am_streaming
:
hss
=
get_chunks
(
orig_hs
,
chun
k_size
,
pad_size
)
hss
=
get_chunks
(
orig_hs
,
bloc
k_size
,
pad_size
)
chunk_num
=
len
(
hss
)
mel_list
=
[]
for
i
,
hs
in
enumerate
(
hss
):
...
...
@@ -171,7 +171,7 @@ def evaluate(args):
sub_mel
=
sub_mel
[
pad_size
:]
else
:
# 倒数几块的右侧也可能没有 pad 够
sub_mel
=
sub_mel
[
pad_size
:(
chun
k_size
+
pad_size
)
-
sub_mel
=
sub_mel
[
pad_size
:(
bloc
k_size
+
pad_size
)
-
sub_mel
.
shape
[
0
]]
mel_list
.
append
(
sub_mel
)
mel
=
paddle
.
concat
(
mel_list
,
axis
=
0
)
...
...
@@ -277,7 +277,7 @@ def parse_args():
default
=
False
,
help
=
"whether use streaming acoustic model"
)
parser
.
add_argument
(
"--
chunk_size"
,
type
=
int
,
default
=
42
,
help
=
"chun
k size of am streaming"
)
"--
block_size"
,
type
=
int
,
default
=
42
,
help
=
"bloc
k size of am streaming"
)
parser
.
add_argument
(
"--pad_size"
,
type
=
int
,
default
=
12
,
help
=
"pad size of am streaming"
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录