Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
4b81cd0f
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 1 年 前同步成功
通知
207
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
4b81cd0f
编写于
6月 01, 2022
作者:
Y
Yang Zhou
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' of github.com:SmileGoat/PaddleSpeech into refactor_file_struct
上级
156ccfe4
0d34c624
变更
7
隐藏空白更改
内联
并排
Showing
7 changed file
with
43 addition
and
28 deletion
+43
-28
docker/ubuntu18-cpu/Dockerfile
docker/ubuntu18-cpu/Dockerfile
+15
-0
paddlespeech/resource/resource.py
paddlespeech/resource/resource.py
+11
-11
paddlespeech/server/engine/tts/online/python/tts_engine.py
paddlespeech/server/engine/tts/online/python/tts_engine.py
+1
-1
paddlespeech/t2s/exps/inference_streaming.py
paddlespeech/t2s/exps/inference_streaming.py
+4
-4
paddlespeech/t2s/exps/ort_predict_streaming.py
paddlespeech/t2s/exps/ort_predict_streaming.py
+4
-4
paddlespeech/t2s/exps/syn_utils.py
paddlespeech/t2s/exps/syn_utils.py
+4
-4
paddlespeech/t2s/exps/synthesize_streaming.py
paddlespeech/t2s/exps/synthesize_streaming.py
+4
-4
未找到文件。
docker/ubuntu18-cpu/Dockerfile
0 → 100644
浏览文件 @
4b81cd0f
FROM
registry.baidubce.com/paddlepaddle/paddle:2.2.2
LABEL
maintainer="paddlesl@baidu.com"
RUN
git clone
--depth
1 https://github.com/PaddlePaddle/PaddleSpeech.git /home/PaddleSpeech
RUN
pip3 uninstall mccabe
-y
;
exit
0
;
RUN
pip3
install
multiprocess
==
0.70.12 importlib-metadata
==
4.2.0
dill
==
0.3.4
RUN
cd
/home/PaddleSpeech/audio
RUN
python setup.py bdist_wheel
RUN
cd
/home/PaddleSpeech
RUN
python setup.py bdist_wheel
RUN
pip
install
audio/dist/
*
.whl dist/
*
.whl
WORKDIR
/home/PaddleSpeech/
paddlespeech/resource/resource.py
浏览文件 @
4b81cd0f
...
...
@@ -82,7 +82,7 @@ class CommonTaskResource:
self
.
model_tag
=
model_tag
self
.
version
=
version
self
.
res_dict
=
self
.
pretrained_models
[
model_tag
][
version
]
self
.
format_path
(
self
.
res_dict
)
self
.
_
format_path
(
self
.
res_dict
)
self
.
res_dir
=
self
.
_fetch
(
self
.
res_dict
,
self
.
_get_model_dir
(
model_type
))
else
:
...
...
@@ -90,19 +90,10 @@ class CommonTaskResource:
self
.
voc_model_tag
=
model_tag
self
.
voc_version
=
version
self
.
voc_res_dict
=
self
.
pretrained_models
[
model_tag
][
version
]
self
.
format_path
(
self
.
voc_res_dict
)
self
.
_
format_path
(
self
.
voc_res_dict
)
self
.
voc_res_dir
=
self
.
_fetch
(
self
.
voc_res_dict
,
self
.
_get_model_dir
(
model_type
))
@
staticmethod
def
format_path
(
res_dict
:
Dict
[
str
,
str
]):
for
k
,
v
in
res_dict
.
items
():
if
'/'
in
v
:
if
v
.
startswith
(
'https://'
)
or
v
.
startswith
(
'http://'
):
continue
else
:
res_dict
[
k
]
=
os
.
path
.
join
(
*
(
v
.
split
(
'/'
)))
@
staticmethod
def
get_model_class
(
model_name
)
->
List
[
object
]:
"""Dynamic import model class.
...
...
@@ -231,3 +222,12 @@ class CommonTaskResource:
os.PathLike: Directory of model resource.
"""
return
download_and_decompress
(
res_dict
,
target_dir
)
@
staticmethod
def
_format_path
(
res_dict
:
Dict
[
str
,
str
]):
for
k
,
v
in
res_dict
.
items
():
if
isinstance
(
v
,
str
)
and
'/'
in
v
:
if
v
.
startswith
(
'https://'
)
or
v
.
startswith
(
'http://'
):
continue
else
:
res_dict
[
k
]
=
os
.
path
.
join
(
*
(
v
.
split
(
'/'
)))
paddlespeech/server/engine/tts/online/python/tts_engine.py
浏览文件 @
4b81cd0f
...
...
@@ -44,7 +44,7 @@ class TTSServerExecutor(TTSExecutor):
self
.
voc_block
=
voc_block
self
.
voc_pad
=
voc_pad
self
.
task_resource
=
CommonTaskResource
(
task
=
'tts'
,
model_format
=
'
stat
ic'
,
inference_mode
=
'online'
)
task
=
'tts'
,
model_format
=
'
dynam
ic'
,
inference_mode
=
'online'
)
def
get_model_info
(
self
,
field
:
str
,
...
...
paddlespeech/t2s/exps/inference_streaming.py
浏览文件 @
4b81cd0f
...
...
@@ -90,7 +90,7 @@ def parse_args():
default
=
False
,
help
=
"whether use streaming acoustic model"
)
parser
.
add_argument
(
"--
chunk_size"
,
type
=
int
,
default
=
42
,
help
=
"chun
k size of am streaming"
)
"--
block_size"
,
type
=
int
,
default
=
42
,
help
=
"bloc
k size of am streaming"
)
parser
.
add_argument
(
"--pad_size"
,
type
=
int
,
default
=
12
,
help
=
"pad size of am streaming"
)
...
...
@@ -169,7 +169,7 @@ def main():
N
=
0
T
=
0
chunk_size
=
args
.
chun
k_size
block_size
=
args
.
bloc
k_size
pad_size
=
args
.
pad_size
get_tone_ids
=
False
for
utt_id
,
sentence
in
sentences
:
...
...
@@ -189,7 +189,7 @@ def main():
am_encoder_infer_predictor
,
input
=
phones
)
if
args
.
am_streaming
:
hss
=
get_chunks
(
orig_hs
,
chun
k_size
,
pad_size
)
hss
=
get_chunks
(
orig_hs
,
bloc
k_size
,
pad_size
)
chunk_num
=
len
(
hss
)
mel_list
=
[]
for
i
,
hs
in
enumerate
(
hss
):
...
...
@@ -211,7 +211,7 @@ def main():
sub_mel
=
sub_mel
[
pad_size
:]
else
:
# 倒数几块的右侧也可能没有 pad 够
sub_mel
=
sub_mel
[
pad_size
:(
chun
k_size
+
pad_size
)
-
sub_mel
=
sub_mel
[
pad_size
:(
bloc
k_size
+
pad_size
)
-
sub_mel
.
shape
[
0
]]
mel_list
.
append
(
sub_mel
)
mel
=
np
.
concatenate
(
mel_list
,
axis
=
0
)
...
...
paddlespeech/t2s/exps/ort_predict_streaming.py
浏览文件 @
4b81cd0f
...
...
@@ -97,7 +97,7 @@ def ort_predict(args):
T
=
0
merge_sentences
=
True
get_tone_ids
=
False
chunk_size
=
args
.
chun
k_size
block_size
=
args
.
bloc
k_size
pad_size
=
args
.
pad_size
for
utt_id
,
sentence
in
sentences
:
...
...
@@ -115,7 +115,7 @@ def ort_predict(args):
orig_hs
=
am_encoder_infer_sess
.
run
(
None
,
input_feed
=
{
'text'
:
phone_ids
})
if
args
.
am_streaming
:
hss
=
get_chunks
(
orig_hs
[
0
],
chun
k_size
,
pad_size
)
hss
=
get_chunks
(
orig_hs
[
0
],
bloc
k_size
,
pad_size
)
chunk_num
=
len
(
hss
)
mel_list
=
[]
for
i
,
hs
in
enumerate
(
hss
):
...
...
@@ -139,7 +139,7 @@ def ort_predict(args):
sub_mel
=
sub_mel
[
pad_size
:]
else
:
# 倒数几块的右侧也可能没有 pad 够
sub_mel
=
sub_mel
[
pad_size
:(
chun
k_size
+
pad_size
)
-
sub_mel
=
sub_mel
[
pad_size
:(
bloc
k_size
+
pad_size
)
-
sub_mel
.
shape
[
0
]]
mel_list
.
append
(
sub_mel
)
mel
=
np
.
concatenate
(
mel_list
,
axis
=
0
)
...
...
@@ -236,7 +236,7 @@ def parse_args():
default
=
False
,
help
=
"whether use streaming acoustic model"
)
parser
.
add_argument
(
"--
chunk_size"
,
type
=
int
,
default
=
42
,
help
=
"chun
k size of am streaming"
)
"--
block_size"
,
type
=
int
,
default
=
42
,
help
=
"bloc
k size of am streaming"
)
parser
.
add_argument
(
"--pad_size"
,
type
=
int
,
default
=
12
,
help
=
"pad size of am streaming"
)
...
...
paddlespeech/t2s/exps/syn_utils.py
浏览文件 @
4b81cd0f
...
...
@@ -75,13 +75,13 @@ def denorm(data, mean, std):
return
data
*
std
+
mean
def
get_chunks
(
data
,
chun
k_size
:
int
,
pad_size
:
int
):
def
get_chunks
(
data
,
bloc
k_size
:
int
,
pad_size
:
int
):
data_len
=
data
.
shape
[
1
]
chunks
=
[]
n
=
math
.
ceil
(
data_len
/
chun
k_size
)
n
=
math
.
ceil
(
data_len
/
bloc
k_size
)
for
i
in
range
(
n
):
start
=
max
(
0
,
i
*
chun
k_size
-
pad_size
)
end
=
min
((
i
+
1
)
*
chun
k_size
+
pad_size
,
data_len
)
start
=
max
(
0
,
i
*
bloc
k_size
-
pad_size
)
end
=
min
((
i
+
1
)
*
bloc
k_size
+
pad_size
,
data_len
)
chunks
.
append
(
data
[:,
start
:
end
,
:])
return
chunks
...
...
paddlespeech/t2s/exps/synthesize_streaming.py
浏览文件 @
4b81cd0f
...
...
@@ -133,7 +133,7 @@ def evaluate(args):
N
=
0
T
=
0
chunk_size
=
args
.
chun
k_size
block_size
=
args
.
bloc
k_size
pad_size
=
args
.
pad_size
for
utt_id
,
sentence
in
sentences
:
...
...
@@ -153,7 +153,7 @@ def evaluate(args):
# acoustic model
orig_hs
=
am_encoder_infer
(
phone_ids
)
if
args
.
am_streaming
:
hss
=
get_chunks
(
orig_hs
,
chun
k_size
,
pad_size
)
hss
=
get_chunks
(
orig_hs
,
bloc
k_size
,
pad_size
)
chunk_num
=
len
(
hss
)
mel_list
=
[]
for
i
,
hs
in
enumerate
(
hss
):
...
...
@@ -171,7 +171,7 @@ def evaluate(args):
sub_mel
=
sub_mel
[
pad_size
:]
else
:
# 倒数几块的右侧也可能没有 pad 够
sub_mel
=
sub_mel
[
pad_size
:(
chun
k_size
+
pad_size
)
-
sub_mel
=
sub_mel
[
pad_size
:(
bloc
k_size
+
pad_size
)
-
sub_mel
.
shape
[
0
]]
mel_list
.
append
(
sub_mel
)
mel
=
paddle
.
concat
(
mel_list
,
axis
=
0
)
...
...
@@ -277,7 +277,7 @@ def parse_args():
default
=
False
,
help
=
"whether use streaming acoustic model"
)
parser
.
add_argument
(
"--
chunk_size"
,
type
=
int
,
default
=
42
,
help
=
"chun
k size of am streaming"
)
"--
block_size"
,
type
=
int
,
default
=
42
,
help
=
"bloc
k size of am streaming"
)
parser
.
add_argument
(
"--pad_size"
,
type
=
int
,
default
=
12
,
help
=
"pad size of am streaming"
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录