Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
fa6e44e4
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 2 年 前同步成功
通知
210
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
fa6e44e4
编写于
5月 17, 2022
作者:
K
KP
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add paddlespeech.resource.
上级
775c4bef
变更
31
展开全部
显示空白变更内容
内联
并排
Showing
31 changed file
with
1382 addition
and
1507 deletion
+1382
-1507
paddlespeech/cli/asr/infer.py
paddlespeech/cli/asr/infer.py
+13
-18
paddlespeech/cli/asr/pretrained_models.py
paddlespeech/cli/asr/pretrained_models.py
+0
-151
paddlespeech/cli/base_commands.py
paddlespeech/cli/base_commands.py
+59
-4
paddlespeech/cli/cls/infer.py
paddlespeech/cli/cls/infer.py
+14
-17
paddlespeech/cli/cls/pretrained_models.py
paddlespeech/cli/cls/pretrained_models.py
+0
-47
paddlespeech/cli/executor.py
paddlespeech/cli/executor.py
+16
-33
paddlespeech/cli/st/infer.py
paddlespeech/cli/st/infer.py
+20
-16
paddlespeech/cli/st/pretrained_models.py
paddlespeech/cli/st/pretrained_models.py
+0
-35
paddlespeech/cli/stats/infer.py
paddlespeech/cli/stats/infer.py
+0
-146
paddlespeech/cli/text/infer.py
paddlespeech/cli/text/infer.py
+13
-17
paddlespeech/cli/text/pretrained_models.py
paddlespeech/cli/text/pretrained_models.py
+0
-54
paddlespeech/cli/tts/infer.py
paddlespeech/cli/tts/infer.py
+36
-34
paddlespeech/cli/tts/pretrained_models.py
paddlespeech/cli/tts/pretrained_models.py
+0
-300
paddlespeech/cli/vector/infer.py
paddlespeech/cli/vector/infer.py
+14
-15
paddlespeech/cli/vector/pretrained_models.py
paddlespeech/cli/vector/pretrained_models.py
+0
-36
paddlespeech/resource/__init__.py
paddlespeech/resource/__init__.py
+2
-2
paddlespeech/resource/pretrained_models.py
paddlespeech/resource/pretrained_models.py
+822
-0
paddlespeech/resource/resource.py
paddlespeech/resource/resource.py
+222
-0
paddlespeech/server/bin/paddlespeech_server.py
paddlespeech/server/bin/paddlespeech_server.py
+20
-90
paddlespeech/server/engine/asr/online/asr_engine.py
paddlespeech/server/engine/asr/online/asr_engine.py
+20
-20
paddlespeech/server/engine/asr/online/pretrained_models.py
paddlespeech/server/engine/asr/online/pretrained_models.py
+0
-70
paddlespeech/server/engine/asr/paddleinference/asr_engine.py
paddlespeech/server/engine/asr/paddleinference/asr_engine.py
+13
-12
paddlespeech/server/engine/asr/paddleinference/pretrained_models.py
...ch/server/engine/asr/paddleinference/pretrained_models.py
+0
-34
paddlespeech/server/engine/cls/paddleinference/cls_engine.py
paddlespeech/server/engine/cls/paddleinference/cls_engine.py
+10
-8
paddlespeech/server/engine/cls/paddleinference/pretrained_models.py
...ch/server/engine/cls/paddleinference/pretrained_models.py
+0
-58
paddlespeech/server/engine/tts/online/onnx/pretrained_models.py
...speech/server/engine/tts/online/onnx/pretrained_models.py
+0
-69
paddlespeech/server/engine/tts/online/onnx/tts_engine.py
paddlespeech/server/engine/tts/online/onnx/tts_engine.py
+25
-16
paddlespeech/server/engine/tts/online/python/pretrained_models.py
...eech/server/engine/tts/online/python/pretrained_models.py
+0
-73
paddlespeech/server/engine/tts/online/python/tts_engine.py
paddlespeech/server/engine/tts/online/python/tts_engine.py
+33
-24
paddlespeech/server/engine/tts/paddleinference/pretrained_models.py
...ch/server/engine/tts/paddleinference/pretrained_models.py
+0
-87
paddlespeech/server/engine/tts/paddleinference/tts_engine.py
paddlespeech/server/engine/tts/paddleinference/tts_engine.py
+30
-21
未找到文件。
paddlespeech/cli/asr/infer.py
浏览文件 @
fa6e44e4
...
@@ -33,11 +33,8 @@ from ..utils import CLI_TIMER
...
@@ -33,11 +33,8 @@ from ..utils import CLI_TIMER
from
..utils
import
MODEL_HOME
from
..utils
import
MODEL_HOME
from
..utils
import
stats_wrapper
from
..utils
import
stats_wrapper
from
..utils
import
timer_register
from
..utils
import
timer_register
from
.pretrained_models
import
model_alias
from
.pretrained_models
import
pretrained_models
from
paddlespeech.s2t.frontend.featurizer.text_featurizer
import
TextFeaturizer
from
paddlespeech.s2t.frontend.featurizer.text_featurizer
import
TextFeaturizer
from
paddlespeech.s2t.transform.transformation
import
Transformation
from
paddlespeech.s2t.transform.transformation
import
Transformation
from
paddlespeech.s2t.utils.dynamic_import
import
dynamic_import
from
paddlespeech.s2t.utils.utility
import
UpdateConfig
from
paddlespeech.s2t.utils.utility
import
UpdateConfig
__all__
=
[
'ASRExecutor'
]
__all__
=
[
'ASRExecutor'
]
...
@@ -46,10 +43,7 @@ __all__ = ['ASRExecutor']
...
@@ -46,10 +43,7 @@ __all__ = ['ASRExecutor']
@
timer_register
@
timer_register
class
ASRExecutor
(
BaseExecutor
):
class
ASRExecutor
(
BaseExecutor
):
def
__init__
(
self
):
def
__init__
(
self
):
super
().
__init__
()
super
().
__init__
(
task
=
'asr'
,
inference_type
=
'offline'
)
self
.
model_alias
=
model_alias
self
.
pretrained_models
=
pretrained_models
self
.
parser
=
argparse
.
ArgumentParser
(
self
.
parser
=
argparse
.
ArgumentParser
(
prog
=
'paddlespeech.asr'
,
add_help
=
True
)
prog
=
'paddlespeech.asr'
,
add_help
=
True
)
self
.
parser
.
add_argument
(
self
.
parser
.
add_argument
(
...
@@ -59,7 +53,8 @@ class ASRExecutor(BaseExecutor):
...
@@ -59,7 +53,8 @@ class ASRExecutor(BaseExecutor):
type
=
str
,
type
=
str
,
default
=
'conformer_wenetspeech'
,
default
=
'conformer_wenetspeech'
,
choices
=
[
choices
=
[
tag
[:
tag
.
index
(
'-'
)]
for
tag
in
self
.
pretrained_models
.
keys
()
tag
[:
tag
.
index
(
'-'
)]
for
tag
in
self
.
task_resource
.
pretrained_models
.
keys
()
],
],
help
=
'Choose model type of asr task.'
)
help
=
'Choose model type of asr task.'
)
self
.
parser
.
add_argument
(
self
.
parser
.
add_argument
(
...
@@ -141,14 +136,14 @@ class ASRExecutor(BaseExecutor):
...
@@ -141,14 +136,14 @@ class ASRExecutor(BaseExecutor):
if
cfg_path
is
None
or
ckpt_path
is
None
:
if
cfg_path
is
None
or
ckpt_path
is
None
:
sample_rate_str
=
'16k'
if
sample_rate
==
16000
else
'8k'
sample_rate_str
=
'16k'
if
sample_rate
==
16000
else
'8k'
tag
=
model_type
+
'-'
+
lang
+
'-'
+
sample_rate_str
tag
=
model_type
+
'-'
+
lang
+
'-'
+
sample_rate_str
res_path
=
self
.
_get_pretrained_path
(
tag
)
# wenetspeech_zh
self
.
task_resource
.
set_task_model
(
tag
,
version
=
None
)
self
.
res_path
=
res_path
self
.
res_path
=
self
.
task_resource
.
res_dir
self
.
cfg_path
=
os
.
path
.
join
(
self
.
cfg_path
=
os
.
path
.
join
(
res_path
,
self
.
pretrained_models
[
tag
]
[
'cfg_path'
])
self
.
res_path
,
self
.
task_resource
.
res_dict
[
'cfg_path'
])
self
.
ckpt_path
=
os
.
path
.
join
(
self
.
ckpt_path
=
os
.
path
.
join
(
res_path
,
self
.
res_path
,
self
.
pretrained_models
[
tag
]
[
'ckpt_path'
]
+
".pdparams"
)
self
.
task_resource
.
res_dict
[
'ckpt_path'
]
+
".pdparams"
)
logger
.
info
(
res_path
)
logger
.
info
(
self
.
res_path
)
else
:
else
:
self
.
cfg_path
=
os
.
path
.
abspath
(
cfg_path
)
self
.
cfg_path
=
os
.
path
.
abspath
(
cfg_path
)
...
@@ -172,8 +167,8 @@ class ASRExecutor(BaseExecutor):
...
@@ -172,8 +167,8 @@ class ASRExecutor(BaseExecutor):
self
.
collate_fn_test
=
SpeechCollator
.
from_config
(
self
.
config
)
self
.
collate_fn_test
=
SpeechCollator
.
from_config
(
self
.
config
)
self
.
text_feature
=
TextFeaturizer
(
self
.
text_feature
=
TextFeaturizer
(
unit_type
=
self
.
config
.
unit_type
,
vocab
=
self
.
vocab
)
unit_type
=
self
.
config
.
unit_type
,
vocab
=
self
.
vocab
)
lm_url
=
self
.
pretrained_models
[
tag
]
[
'lm_url'
]
lm_url
=
self
.
resource
.
res_dict
[
'lm_url'
]
lm_md5
=
self
.
pretrained_models
[
tag
]
[
'lm_md5'
]
lm_md5
=
self
.
resource
.
res_dict
[
'lm_md5'
]
self
.
download_lm
(
self
.
download_lm
(
lm_url
,
lm_url
,
os
.
path
.
dirname
(
self
.
config
.
decode
.
lang_model_path
),
lm_md5
)
os
.
path
.
dirname
(
self
.
config
.
decode
.
lang_model_path
),
lm_md5
)
...
@@ -191,7 +186,7 @@ class ASRExecutor(BaseExecutor):
...
@@ -191,7 +186,7 @@ class ASRExecutor(BaseExecutor):
raise
Exception
(
"wrong type"
)
raise
Exception
(
"wrong type"
)
model_name
=
model_type
[:
model_type
.
rindex
(
model_name
=
model_type
[:
model_type
.
rindex
(
'_'
)]
# model_type: {model_name}_{dataset}
'_'
)]
# model_type: {model_name}_{dataset}
model_class
=
dynamic_import
(
model_name
,
self
.
model_alias
)
model_class
=
self
.
task_resource
.
get_model_class
(
model_name
)
model_conf
=
self
.
config
model_conf
=
self
.
config
model
=
model_class
.
from_config
(
model_conf
)
model
=
model_class
.
from_config
(
model_conf
)
self
.
model
=
model
self
.
model
=
model
...
@@ -438,7 +433,7 @@ class ASRExecutor(BaseExecutor):
...
@@ -438,7 +433,7 @@ class ASRExecutor(BaseExecutor):
if
not
parser_args
.
verbose
:
if
not
parser_args
.
verbose
:
self
.
disable_task_loggers
()
self
.
disable_task_loggers
()
task_source
=
self
.
get_
task
_source
(
parser_args
.
input
)
task_source
=
self
.
get_
input
_source
(
parser_args
.
input
)
task_results
=
OrderedDict
()
task_results
=
OrderedDict
()
has_exceptions
=
False
has_exceptions
=
False
...
...
paddlespeech/cli/asr/pretrained_models.py
已删除
100644 → 0
浏览文件 @
775c4bef
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
pretrained_models
=
{
# The tags for pretrained_models should be "{model_name}[_{dataset}][-{lang}][-...]".
# e.g. "conformer_wenetspeech-zh-16k" and "panns_cnn6-32k".
# Command line and python api use "{model_name}[_{dataset}]" as --model, usage:
# "paddlespeech asr --model conformer_wenetspeech --lang zh --sr 16000 --input ./input.wav"
"conformer_wenetspeech-zh-16k"
:
{
'url'
:
'https://paddlespeech.bj.bcebos.com/s2t/wenetspeech/asr1_conformer_wenetspeech_ckpt_0.1.1.model.tar.gz'
,
'md5'
:
'76cb19ed857e6623856b7cd7ebbfeda4'
,
'cfg_path'
:
'model.yaml'
,
'ckpt_path'
:
'exp/conformer/checkpoints/wenetspeech'
,
},
"conformer_online_wenetspeech-zh-16k"
:
{
'url'
:
'https://paddlespeech.bj.bcebos.com/s2t/wenetspeech/asr1/asr1_chunk_conformer_wenetspeech_ckpt_1.0.0a.model.tar.gz'
,
'md5'
:
'b8c02632b04da34aca88459835be54a6'
,
'cfg_path'
:
'model.yaml'
,
'ckpt_path'
:
'exp/chunk_conformer/checkpoints/avg_10'
,
},
"conformer_online_multicn-zh-16k"
:
{
'url'
:
'https://paddlespeech.bj.bcebos.com/s2t/multi_cn/asr1/asr1_chunk_conformer_multi_cn_ckpt_0.2.0.model.tar.gz'
,
'md5'
:
'7989b3248c898070904cf042fd656003'
,
'cfg_path'
:
'model.yaml'
,
'ckpt_path'
:
'exp/chunk_conformer/checkpoints/multi_cn'
,
},
"conformer_aishell-zh-16k"
:
{
'url'
:
'https://paddlespeech.bj.bcebos.com/s2t/aishell/asr1/asr1_conformer_aishell_ckpt_0.1.2.model.tar.gz'
,
'md5'
:
'3f073eccfa7bb14e0c6867d65fc0dc3a'
,
'cfg_path'
:
'model.yaml'
,
'ckpt_path'
:
'exp/conformer/checkpoints/avg_30'
,
},
"conformer_online_aishell-zh-16k"
:
{
'url'
:
'https://paddlespeech.bj.bcebos.com/s2t/aishell/asr1/asr1_chunk_conformer_aishell_ckpt_0.2.0.model.tar.gz'
,
'md5'
:
'b374cfb93537761270b6224fb0bfc26a'
,
'cfg_path'
:
'model.yaml'
,
'ckpt_path'
:
'exp/chunk_conformer/checkpoints/avg_30'
,
},
"transformer_librispeech-en-16k"
:
{
'url'
:
'https://paddlespeech.bj.bcebos.com/s2t/librispeech/asr1/asr1_transformer_librispeech_ckpt_0.1.1.model.tar.gz'
,
'md5'
:
'2c667da24922aad391eacafe37bc1660'
,
'cfg_path'
:
'model.yaml'
,
'ckpt_path'
:
'exp/transformer/checkpoints/avg_10'
,
},
"deepspeech2online_wenetspeech-zh-16k"
:
{
'url'
:
'https://paddlespeech.bj.bcebos.com/s2t/wenetspeech/asr0/asr0_deepspeech2_online_wenetspeech_ckpt_1.0.0a.model.tar.gz'
,
'md5'
:
'e393d4d274af0f6967db24fc146e8074'
,
'cfg_path'
:
'model.yaml'
,
'ckpt_path'
:
'exp/deepspeech2_online/checkpoints/avg_10'
,
'lm_url'
:
'https://deepspeech.bj.bcebos.com/zh_lm/zh_giga.no_cna_cmn.prune01244.klm'
,
'lm_md5'
:
'29e02312deb2e59b3c8686c7966d4fe3'
},
"deepspeech2offline_aishell-zh-16k"
:
{
'url'
:
'https://paddlespeech.bj.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_aishell_ckpt_0.1.1.model.tar.gz'
,
'md5'
:
'932c3593d62fe5c741b59b31318aa314'
,
'cfg_path'
:
'model.yaml'
,
'ckpt_path'
:
'exp/deepspeech2/checkpoints/avg_1'
,
'lm_url'
:
'https://deepspeech.bj.bcebos.com/zh_lm/zh_giga.no_cna_cmn.prune01244.klm'
,
'lm_md5'
:
'29e02312deb2e59b3c8686c7966d4fe3'
},
"deepspeech2online_aishell-zh-16k"
:
{
'url'
:
'https://paddlespeech.bj.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_online_aishell_fbank161_ckpt_0.2.1.model.tar.gz'
,
'md5'
:
'98b87b171b7240b7cae6e07d8d0bc9be'
,
'cfg_path'
:
'model.yaml'
,
'ckpt_path'
:
'exp/deepspeech2_online/checkpoints/avg_1'
,
'lm_url'
:
'https://deepspeech.bj.bcebos.com/zh_lm/zh_giga.no_cna_cmn.prune01244.klm'
,
'lm_md5'
:
'29e02312deb2e59b3c8686c7966d4fe3'
},
"deepspeech2offline_librispeech-en-16k"
:
{
'url'
:
'https://paddlespeech.bj.bcebos.com/s2t/librispeech/asr0/asr0_deepspeech2_librispeech_ckpt_0.1.1.model.tar.gz'
,
'md5'
:
'f5666c81ad015c8de03aac2bc92e5762'
,
'cfg_path'
:
'model.yaml'
,
'ckpt_path'
:
'exp/deepspeech2/checkpoints/avg_1'
,
'lm_url'
:
'https://deepspeech.bj.bcebos.com/en_lm/common_crawl_00.prune01111.trie.klm'
,
'lm_md5'
:
'099a601759d467cd0a8523ff939819c5'
},
}
model_alias
=
{
"deepspeech2offline"
:
"paddlespeech.s2t.models.ds2:DeepSpeech2Model"
,
"deepspeech2online"
:
"paddlespeech.s2t.models.ds2_online:DeepSpeech2ModelOnline"
,
"conformer"
:
"paddlespeech.s2t.models.u2:U2Model"
,
"conformer_online"
:
"paddlespeech.s2t.models.u2:U2Model"
,
"transformer"
:
"paddlespeech.s2t.models.u2:U2Model"
,
"wenetspeech"
:
"paddlespeech.s2t.models.u2:U2Model"
,
}
paddlespeech/cli/base_commands.py
浏览文件 @
fa6e44e4
...
@@ -11,17 +11,18 @@
...
@@ -11,17 +11,18 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
import
argparse
from
typing
import
List
from
typing
import
List
from
prettytable
import
PrettyTable
from
..resource
import
CommonTaskResource
from
.entry
import
commands
from
.entry
import
commands
from
.utils
import
cli_register
from
.utils
import
cli_register
from
.utils
import
explicit_command_register
from
.utils
import
explicit_command_register
from
.utils
import
get_command
from
.utils
import
get_command
__all__
=
[
__all__
=
[
'BaseCommand'
,
'HelpCommand'
,
'StatsCommand'
]
'BaseCommand'
,
'HelpCommand'
,
]
@
cli_register
(
name
=
'paddlespeech'
)
@
cli_register
(
name
=
'paddlespeech'
)
...
@@ -76,6 +77,59 @@ class VersionCommand:
...
@@ -76,6 +77,59 @@ class VersionCommand:
return
True
return
True
model_name_format
=
{
'asr'
:
'Model-Language-Sample Rate'
,
'cls'
:
'Model-Sample Rate'
,
'st'
:
'Model-Source language-Target language'
,
'text'
:
'Model-Task-Language'
,
'tts'
:
'Model-Language'
,
'vector'
:
'Model-Sample Rate'
}
@
cli_register
(
name
=
'paddlespeech.stats'
,
description
=
'Get speech tasks support models list.'
)
class
StatsCommand
:
def
__init__
(
self
):
self
.
parser
=
argparse
.
ArgumentParser
(
prog
=
'paddlespeech.stats'
,
add_help
=
True
)
self
.
task_choices
=
[
'asr'
,
'cls'
,
'st'
,
'text'
,
'tts'
,
'vector'
]
self
.
parser
.
add_argument
(
'--task'
,
type
=
str
,
default
=
'asr'
,
choices
=
self
.
task_choices
,
help
=
'Choose speech task.'
,
required
=
True
)
def
show_support_models
(
self
,
pretrained_models
:
dict
):
fields
=
model_name_format
[
self
.
task
].
split
(
"-"
)
table
=
PrettyTable
(
fields
)
for
key
in
pretrained_models
:
table
.
add_row
(
key
.
split
(
"-"
))
print
(
table
)
def
execute
(
self
,
argv
:
List
[
str
])
->
bool
:
parser_args
=
self
.
parser
.
parse_args
(
argv
)
self
.
task
=
parser_args
.
task
if
self
.
task
not
in
self
.
task_choices
:
print
(
"Please input correct speech task, choices = "
+
str
(
self
.
task_choices
))
return
pretrained_models
=
CommonTaskResource
(
task
=
self
.
task
).
pretrained_models
try
:
print
(
"Here is the list of {} pretrained models released by PaddleSpeech that can be used by command line and python API"
.
format
(
self
.
task
.
upper
()))
self
.
show_support_models
(
pretrained_models
)
except
BaseException
:
print
(
"Failed to get the list of {} pretrained models."
.
format
(
self
.
task
.
upper
()))
# Dynamic import when running specific command
# Dynamic import when running specific command
_commands
=
{
_commands
=
{
'asr'
:
[
'Speech to text infer command.'
,
'ASRExecutor'
],
'asr'
:
[
'Speech to text infer command.'
,
'ASRExecutor'
],
...
@@ -91,3 +145,4 @@ for com, info in _commands.items():
...
@@ -91,3 +145,4 @@ for com, info in _commands.items():
name
=
'paddlespeech.{}'
.
format
(
com
),
name
=
'paddlespeech.{}'
.
format
(
com
),
description
=
info
[
0
],
description
=
info
[
0
],
cls
=
'paddlespeech.cli.{}.{}'
.
format
(
com
,
info
[
1
]))
cls
=
'paddlespeech.cli.{}.{}'
.
format
(
com
,
info
[
1
]))
\ No newline at end of file
paddlespeech/cli/cls/infer.py
浏览文件 @
fa6e44e4
...
@@ -21,26 +21,19 @@ from typing import Union
...
@@ -21,26 +21,19 @@ from typing import Union
import
numpy
as
np
import
numpy
as
np
import
paddle
import
paddle
import
yaml
import
yaml
from
paddleaudio
import
load
from
paddleaudio.features
import
LogMelSpectrogram
from
paddlespeech.utils.dynamic_import
import
dynamic_import
from
..executor
import
BaseExecutor
from
..executor
import
BaseExecutor
from
..log
import
logger
from
..log
import
logger
from
..utils
import
stats_wrapper
from
..utils
import
stats_wrapper
from
.pretrained_models
import
model_alias
from
paddleaudio
import
load
from
.pretrained_models
import
pretrained_models
from
paddleaudio.features
import
LogMelSpectrogram
__all__
=
[
'CLSExecutor'
]
__all__
=
[
'CLSExecutor'
]
class
CLSExecutor
(
BaseExecutor
):
class
CLSExecutor
(
BaseExecutor
):
def
__init__
(
self
):
def
__init__
(
self
):
super
().
__init__
()
super
().
__init__
(
task
=
'cls'
)
self
.
model_alias
=
model_alias
self
.
pretrained_models
=
pretrained_models
self
.
parser
=
argparse
.
ArgumentParser
(
self
.
parser
=
argparse
.
ArgumentParser
(
prog
=
'paddlespeech.cls'
,
add_help
=
True
)
prog
=
'paddlespeech.cls'
,
add_help
=
True
)
self
.
parser
.
add_argument
(
self
.
parser
.
add_argument
(
...
@@ -50,7 +43,8 @@ class CLSExecutor(BaseExecutor):
...
@@ -50,7 +43,8 @@ class CLSExecutor(BaseExecutor):
type
=
str
,
type
=
str
,
default
=
'panns_cnn14'
,
default
=
'panns_cnn14'
,
choices
=
[
choices
=
[
tag
[:
tag
.
index
(
'-'
)]
for
tag
in
self
.
pretrained_models
.
keys
()
tag
[:
tag
.
index
(
'-'
)]
for
tag
in
self
.
task_resource
.
pretrained_models
.
keys
()
],
],
help
=
'Choose model type of cls task.'
)
help
=
'Choose model type of cls task.'
)
self
.
parser
.
add_argument
(
self
.
parser
.
add_argument
(
...
@@ -103,13 +97,16 @@ class CLSExecutor(BaseExecutor):
...
@@ -103,13 +97,16 @@ class CLSExecutor(BaseExecutor):
if
label_file
is
None
or
ckpt_path
is
None
:
if
label_file
is
None
or
ckpt_path
is
None
:
tag
=
model_type
+
'-'
+
'32k'
# panns_cnn14-32k
tag
=
model_type
+
'-'
+
'32k'
# panns_cnn14-32k
self
.
res_path
=
self
.
_get_pretrained_path
(
tag
)
self
.
task_resource
.
set_task_model
(
tag
,
version
=
None
)
self
.
cfg_path
=
os
.
path
.
join
(
self
.
cfg_path
=
os
.
path
.
join
(
self
.
res_path
,
self
.
pretrained_models
[
tag
][
'cfg_path'
])
self
.
task_resource
.
res_dir
,
self
.
task_resource
.
res_dict
[
'cfg_path'
])
self
.
label_file
=
os
.
path
.
join
(
self
.
label_file
=
os
.
path
.
join
(
self
.
res_path
,
self
.
pretrained_models
[
tag
][
'label_file'
])
self
.
task_resource
.
res_dir
,
self
.
task_resource
.
res_dict
[
'label_file'
])
self
.
ckpt_path
=
os
.
path
.
join
(
self
.
ckpt_path
=
os
.
path
.
join
(
self
.
res_path
,
self
.
pretrained_models
[
tag
][
'ckpt_path'
])
self
.
task_resource
.
res_dir
,
self
.
task_resource
.
res_dict
[
'ckpt_path'
])
else
:
else
:
self
.
cfg_path
=
os
.
path
.
abspath
(
cfg_path
)
self
.
cfg_path
=
os
.
path
.
abspath
(
cfg_path
)
self
.
label_file
=
os
.
path
.
abspath
(
label_file
)
self
.
label_file
=
os
.
path
.
abspath
(
label_file
)
...
@@ -126,7 +123,7 @@ class CLSExecutor(BaseExecutor):
...
@@ -126,7 +123,7 @@ class CLSExecutor(BaseExecutor):
self
.
_label_list
.
append
(
line
.
strip
())
self
.
_label_list
.
append
(
line
.
strip
())
# model
# model
model_class
=
dynamic_import
(
model_type
,
self
.
model_alias
)
model_class
=
self
.
task_resource
.
get_model_class
(
model_type
)
model_dict
=
paddle
.
load
(
self
.
ckpt_path
)
model_dict
=
paddle
.
load
(
self
.
ckpt_path
)
self
.
model
=
model_class
(
extract_embedding
=
False
)
self
.
model
=
model_class
(
extract_embedding
=
False
)
self
.
model
.
set_state_dict
(
model_dict
)
self
.
model
.
set_state_dict
(
model_dict
)
...
@@ -203,7 +200,7 @@ class CLSExecutor(BaseExecutor):
...
@@ -203,7 +200,7 @@ class CLSExecutor(BaseExecutor):
if
not
parser_args
.
verbose
:
if
not
parser_args
.
verbose
:
self
.
disable_task_loggers
()
self
.
disable_task_loggers
()
task_source
=
self
.
get_
task
_source
(
parser_args
.
input
)
task_source
=
self
.
get_
input
_source
(
parser_args
.
input
)
task_results
=
OrderedDict
()
task_results
=
OrderedDict
()
has_exceptions
=
False
has_exceptions
=
False
...
...
paddlespeech/cli/cls/pretrained_models.py
已删除
100644 → 0
浏览文件 @
775c4bef
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
pretrained_models
=
{
# The tags for pretrained_models should be "{model_name}[_{dataset}][-{lang}][-...]".
# e.g. "conformer_wenetspeech-zh-16k", "transformer_aishell-zh-16k" and "panns_cnn6-32k".
# Command line and python api use "{model_name}[_{dataset}]" as --model, usage:
# "paddlespeech asr --model conformer_wenetspeech --lang zh --sr 16000 --input ./input.wav"
"panns_cnn6-32k"
:
{
'url'
:
'https://paddlespeech.bj.bcebos.com/cls/panns_cnn6.tar.gz'
,
'md5'
:
'4cf09194a95df024fd12f84712cf0f9c'
,
'cfg_path'
:
'panns.yaml'
,
'ckpt_path'
:
'cnn6.pdparams'
,
'label_file'
:
'audioset_labels.txt'
,
},
"panns_cnn10-32k"
:
{
'url'
:
'https://paddlespeech.bj.bcebos.com/cls/panns_cnn10.tar.gz'
,
'md5'
:
'cb8427b22176cc2116367d14847f5413'
,
'cfg_path'
:
'panns.yaml'
,
'ckpt_path'
:
'cnn10.pdparams'
,
'label_file'
:
'audioset_labels.txt'
,
},
"panns_cnn14-32k"
:
{
'url'
:
'https://paddlespeech.bj.bcebos.com/cls/panns_cnn14.tar.gz'
,
'md5'
:
'e3b9b5614a1595001161d0ab95edee97'
,
'cfg_path'
:
'panns.yaml'
,
'ckpt_path'
:
'cnn14.pdparams'
,
'label_file'
:
'audioset_labels.txt'
,
},
}
model_alias
=
{
"panns_cnn6"
:
"paddlespeech.cls.models.panns:CNN6"
,
"panns_cnn10"
:
"paddlespeech.cls.models.panns:CNN10"
,
"panns_cnn14"
:
"paddlespeech.cls.models.panns:CNN14"
,
}
paddlespeech/cli/executor.py
浏览文件 @
fa6e44e4
...
@@ -24,9 +24,8 @@ from typing import Union
...
@@ -24,9 +24,8 @@ from typing import Union
import
paddle
import
paddle
from
..resource
import
CommonTaskResource
from
.log
import
logger
from
.log
import
logger
from
.utils
import
download_and_decompress
from
.utils
import
MODEL_HOME
class
BaseExecutor
(
ABC
):
class
BaseExecutor
(
ABC
):
...
@@ -34,11 +33,10 @@ class BaseExecutor(ABC):
...
@@ -34,11 +33,10 @@ class BaseExecutor(ABC):
An abstract executor of paddlespeech tasks.
An abstract executor of paddlespeech tasks.
"""
"""
def
__init__
(
self
):
def
__init__
(
self
,
task
:
str
,
**
kwargs
):
self
.
_inputs
=
OrderedDict
()
self
.
_inputs
=
OrderedDict
()
self
.
_outputs
=
OrderedDict
()
self
.
_outputs
=
OrderedDict
()
self
.
pretrained_models
=
OrderedDict
()
self
.
task_resource
=
CommonTaskResource
(
task
=
task
,
**
kwargs
)
self
.
model_alias
=
OrderedDict
()
@
abstractmethod
@
abstractmethod
def
_init_from_path
(
self
,
*
args
,
**
kwargs
):
def
_init_from_path
(
self
,
*
args
,
**
kwargs
):
...
@@ -98,7 +96,7 @@ class BaseExecutor(ABC):
...
@@ -98,7 +96,7 @@ class BaseExecutor(ABC):
"""
"""
pass
pass
def
get_
task
_source
(
self
,
input_
:
Union
[
str
,
os
.
PathLike
,
None
]
def
get_
input
_source
(
self
,
input_
:
Union
[
str
,
os
.
PathLike
,
None
]
)
->
Dict
[
str
,
Union
[
str
,
os
.
PathLike
]]:
)
->
Dict
[
str
,
Union
[
str
,
os
.
PathLike
]]:
"""
"""
Get task input source from command line input.
Get task input source from command line input.
...
@@ -115,6 +113,8 @@ class BaseExecutor(ABC):
...
@@ -115,6 +113,8 @@ class BaseExecutor(ABC):
ret
=
OrderedDict
()
ret
=
OrderedDict
()
if
input_
is
None
:
# Take input from stdin
if
input_
is
None
:
# Take input from stdin
if
not
sys
.
stdin
.
isatty
(
):
# Avoid getting stuck when stdin is empty.
for
i
,
line
in
enumerate
(
sys
.
stdin
):
for
i
,
line
in
enumerate
(
sys
.
stdin
):
line
=
line
.
strip
()
line
=
line
.
strip
()
if
len
(
line
.
split
(
' '
))
==
1
:
if
len
(
line
.
split
(
' '
))
==
1
:
...
@@ -219,23 +219,6 @@ class BaseExecutor(ABC):
...
@@ -219,23 +219,6 @@ class BaseExecutor(ABC):
for
l
in
loggers
:
for
l
in
loggers
:
l
.
disabled
=
True
l
.
disabled
=
True
def
_get_pretrained_path
(
self
,
tag
:
str
)
->
os
.
PathLike
:
"""
Download and returns pretrained resources path of current task.
"""
support_models
=
list
(
self
.
pretrained_models
.
keys
())
assert
tag
in
self
.
pretrained_models
,
'The model "{}" you want to use has not been supported, please choose other models.
\n
The support models includes:
\n\t\t
{}
\n
'
.
format
(
tag
,
'
\n\t\t
'
.
join
(
support_models
))
res_path
=
os
.
path
.
join
(
MODEL_HOME
,
tag
)
decompressed_path
=
download_and_decompress
(
self
.
pretrained_models
[
tag
],
res_path
)
decompressed_path
=
os
.
path
.
abspath
(
decompressed_path
)
logger
.
info
(
'Use pretrained model stored in: {}'
.
format
(
decompressed_path
))
return
decompressed_path
def
show_rtf
(
self
,
info
:
Dict
[
str
,
List
[
float
]]):
def
show_rtf
(
self
,
info
:
Dict
[
str
,
List
[
float
]]):
"""
"""
Calculate rft of current task and show results.
Calculate rft of current task and show results.
...
...
paddlespeech/cli/st/infer.py
浏览文件 @
fa6e44e4
...
@@ -31,21 +31,22 @@ from ..log import logger
...
@@ -31,21 +31,22 @@ from ..log import logger
from
..utils
import
download_and_decompress
from
..utils
import
download_and_decompress
from
..utils
import
MODEL_HOME
from
..utils
import
MODEL_HOME
from
..utils
import
stats_wrapper
from
..utils
import
stats_wrapper
from
.pretrained_models
import
kaldi_bins
from
.pretrained_models
import
model_alias
from
.pretrained_models
import
pretrained_models
from
paddlespeech.s2t.frontend.featurizer.text_featurizer
import
TextFeaturizer
from
paddlespeech.s2t.frontend.featurizer.text_featurizer
import
TextFeaturizer
from
paddlespeech.s2t.utils.utility
import
UpdateConfig
from
paddlespeech.s2t.utils.utility
import
UpdateConfig
from
paddlespeech.utils.dynamic_import
import
dynamic_import
__all__
=
[
"STExecutor"
]
__all__
=
[
"STExecutor"
]
kaldi_bins
=
{
"url"
:
"https://paddlespeech.bj.bcebos.com/s2t/ted_en_zh/st1/kaldi_bins.tar.gz"
,
"md5"
:
"c0682303b3f3393dbf6ed4c4e35a53eb"
,
}
class
STExecutor
(
BaseExecutor
):
class
STExecutor
(
BaseExecutor
):
def
__init__
(
self
):
def
__init__
(
self
):
super
().
__init__
()
super
().
__init__
(
task
=
'st'
)
self
.
model_alias
=
model_alias
self
.
pretrained_models
=
pretrained_models
self
.
kaldi_bins
=
kaldi_bins
self
.
kaldi_bins
=
kaldi_bins
self
.
parser
=
argparse
.
ArgumentParser
(
self
.
parser
=
argparse
.
ArgumentParser
(
...
@@ -57,7 +58,8 @@ class STExecutor(BaseExecutor):
...
@@ -57,7 +58,8 @@ class STExecutor(BaseExecutor):
type
=
str
,
type
=
str
,
default
=
"fat_st_ted"
,
default
=
"fat_st_ted"
,
choices
=
[
choices
=
[
tag
[:
tag
.
index
(
'-'
)]
for
tag
in
self
.
pretrained_models
.
keys
()
tag
[:
tag
.
index
(
'-'
)]
for
tag
in
self
.
task_resource
.
pretrained_models
.
keys
()
],
],
help
=
"Choose model type of st task."
)
help
=
"Choose model type of st task."
)
self
.
parser
.
add_argument
(
self
.
parser
.
add_argument
(
...
@@ -131,14 +133,16 @@ class STExecutor(BaseExecutor):
...
@@ -131,14 +133,16 @@ class STExecutor(BaseExecutor):
if
cfg_path
is
None
or
ckpt_path
is
None
:
if
cfg_path
is
None
or
ckpt_path
is
None
:
tag
=
model_type
+
"-"
+
src_lang
+
"-"
+
tgt_lang
tag
=
model_type
+
"-"
+
src_lang
+
"-"
+
tgt_lang
res_path
=
self
.
_get_pretrained_path
(
tag
)
self
.
task_resource
.
set_task_model
(
tag
,
version
=
None
)
self
.
cfg_path
=
os
.
path
.
join
(
res_path
,
self
.
cfg_path
=
os
.
path
.
join
(
pretrained_models
[
tag
][
"cfg_path"
])
self
.
task_resource
.
res_dir
,
self
.
ckpt_path
=
os
.
path
.
join
(
res_path
,
self
.
task_resource
.
res_dict
[
'cfg_path'
])
pretrained_models
[
tag
][
"ckpt_path"
])
self
.
ckpt_path
=
os
.
path
.
join
(
logger
.
info
(
res_path
)
self
.
task_resource
.
res_dir
,
self
.
task_resource
.
res_dict
[
'ckpt_path'
])
logger
.
info
(
self
.
cfg_path
)
logger
.
info
(
self
.
cfg_path
)
logger
.
info
(
self
.
ckpt_path
)
logger
.
info
(
self
.
ckpt_path
)
res_path
=
self
.
task_resource
.
res_dir
else
:
else
:
self
.
cfg_path
=
os
.
path
.
abspath
(
cfg_path
)
self
.
cfg_path
=
os
.
path
.
abspath
(
cfg_path
)
self
.
ckpt_path
=
os
.
path
.
abspath
(
ckpt_path
)
self
.
ckpt_path
=
os
.
path
.
abspath
(
ckpt_path
)
...
@@ -163,7 +167,7 @@ class STExecutor(BaseExecutor):
...
@@ -163,7 +167,7 @@ class STExecutor(BaseExecutor):
model_conf
=
self
.
config
model_conf
=
self
.
config
model_name
=
model_type
[:
model_type
.
rindex
(
model_name
=
model_type
[:
model_type
.
rindex
(
'_'
)]
# model_type: {model_name}_{dataset}
'_'
)]
# model_type: {model_name}_{dataset}
model_class
=
dynamic_import
(
model_name
,
self
.
model_alias
)
model_class
=
self
.
task_resource
.
get_model_class
(
model_name
)
self
.
model
=
model_class
.
from_config
(
model_conf
)
self
.
model
=
model_class
.
from_config
(
model_conf
)
self
.
model
.
eval
()
self
.
model
.
eval
()
...
@@ -301,7 +305,7 @@ class STExecutor(BaseExecutor):
...
@@ -301,7 +305,7 @@ class STExecutor(BaseExecutor):
if
not
parser_args
.
verbose
:
if
not
parser_args
.
verbose
:
self
.
disable_task_loggers
()
self
.
disable_task_loggers
()
task_source
=
self
.
get_
task
_source
(
parser_args
.
input
)
task_source
=
self
.
get_
input
_source
(
parser_args
.
input
)
task_results
=
OrderedDict
()
task_results
=
OrderedDict
()
has_exceptions
=
False
has_exceptions
=
False
...
...
paddlespeech/cli/st/pretrained_models.py
已删除
100644 → 0
浏览文件 @
775c4bef
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
pretrained_models
=
{
"fat_st_ted-en-zh"
:
{
"url"
:
"https://paddlespeech.bj.bcebos.com/s2t/ted_en_zh/st1/st1_transformer_mtl_noam_ted-en-zh_ckpt_0.1.1.model.tar.gz"
,
"md5"
:
"d62063f35a16d91210a71081bd2dd557"
,
"cfg_path"
:
"model.yaml"
,
"ckpt_path"
:
"exp/transformer_mtl_noam/checkpoints/fat_st_ted-en-zh.pdparams"
,
}
}
model_alias
=
{
"fat_st"
:
"paddlespeech.s2t.models.u2_st:U2STModel"
}
kaldi_bins
=
{
"url"
:
"https://paddlespeech.bj.bcebos.com/s2t/ted_en_zh/st1/kaldi_bins.tar.gz"
,
"md5"
:
"c0682303b3f3393dbf6ed4c4e35a53eb"
,
}
paddlespeech/cli/stats/infer.py
已删除
100644 → 0
浏览文件 @
775c4bef
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
argparse
from
typing
import
List
from
prettytable
import
PrettyTable
from
..utils
import
cli_register
from
..utils
import
stats_wrapper
__all__
=
[
'StatsExecutor'
]
model_name_format
=
{
'asr'
:
'Model-Language-Sample Rate'
,
'cls'
:
'Model-Sample Rate'
,
'st'
:
'Model-Source language-Target language'
,
'text'
:
'Model-Task-Language'
,
'tts'
:
'Model-Language'
,
'vector'
:
'Model-Sample Rate'
}
@
cli_register
(
name
=
'paddlespeech.stats'
,
description
=
'Get speech tasks support models list.'
)
class
StatsExecutor
():
def
__init__
(
self
):
super
().
__init__
()
self
.
parser
=
argparse
.
ArgumentParser
(
prog
=
'paddlespeech.stats'
,
add_help
=
True
)
self
.
task_choices
=
[
'asr'
,
'cls'
,
'st'
,
'text'
,
'tts'
,
'vector'
]
self
.
parser
.
add_argument
(
'--task'
,
type
=
str
,
default
=
'asr'
,
choices
=
self
.
task_choices
,
help
=
'Choose speech task.'
,
required
=
True
)
def
show_support_models
(
self
,
pretrained_models
:
dict
):
fields
=
model_name_format
[
self
.
task
].
split
(
"-"
)
table
=
PrettyTable
(
fields
)
for
key
in
pretrained_models
:
table
.
add_row
(
key
.
split
(
"-"
))
print
(
table
)
def
execute
(
self
,
argv
:
List
[
str
])
->
bool
:
"""
Command line entry.
"""
parser_args
=
self
.
parser
.
parse_args
(
argv
)
has_exceptions
=
False
try
:
self
(
parser_args
.
task
)
except
Exception
as
e
:
has_exceptions
=
True
if
has_exceptions
:
return
False
else
:
return
True
@
stats_wrapper
def
__call__
(
self
,
task
:
str
=
None
,
):
"""
Python API to call an executor.
"""
self
.
task
=
task
if
self
.
task
not
in
self
.
task_choices
:
print
(
"Please input correct speech task, choices = "
+
str
(
self
.
task_choices
))
elif
self
.
task
==
'asr'
:
try
:
from
..asr.pretrained_models
import
pretrained_models
print
(
"Here is the list of ASR pretrained models released by PaddleSpeech that can be used by command line and python API"
)
self
.
show_support_models
(
pretrained_models
)
except
BaseException
:
print
(
"Failed to get the list of ASR pretrained models."
)
elif
self
.
task
==
'cls'
:
try
:
from
..cls.pretrained_models
import
pretrained_models
print
(
"Here is the list of CLS pretrained models released by PaddleSpeech that can be used by command line and python API"
)
self
.
show_support_models
(
pretrained_models
)
except
BaseException
:
print
(
"Failed to get the list of CLS pretrained models."
)
elif
self
.
task
==
'st'
:
try
:
from
..st.pretrained_models
import
pretrained_models
print
(
"Here is the list of ST pretrained models released by PaddleSpeech that can be used by command line and python API"
)
self
.
show_support_models
(
pretrained_models
)
except
BaseException
:
print
(
"Failed to get the list of ST pretrained models."
)
elif
self
.
task
==
'text'
:
try
:
from
..text.pretrained_models
import
pretrained_models
print
(
"Here is the list of TEXT pretrained models released by PaddleSpeech that can be used by command line and python API"
)
self
.
show_support_models
(
pretrained_models
)
except
BaseException
:
print
(
"Failed to get the list of TEXT pretrained models."
)
elif
self
.
task
==
'tts'
:
try
:
from
..tts.pretrained_models
import
pretrained_models
print
(
"Here is the list of TTS pretrained models released by PaddleSpeech that can be used by command line and python API"
)
self
.
show_support_models
(
pretrained_models
)
except
BaseException
:
print
(
"Failed to get the list of TTS pretrained models."
)
elif
self
.
task
==
'vector'
:
try
:
from
..vector.pretrained_models
import
pretrained_models
print
(
"Here is the list of Speaker Recognition pretrained models released by PaddleSpeech that can be used by command line and python API"
)
self
.
show_support_models
(
pretrained_models
)
except
BaseException
:
print
(
"Failed to get the list of Speaker Recognition pretrained models."
)
paddlespeech/cli/text/infer.py
浏览文件 @
fa6e44e4
...
@@ -24,21 +24,13 @@ import paddle
...
@@ -24,21 +24,13 @@ import paddle
from
..executor
import
BaseExecutor
from
..executor
import
BaseExecutor
from
..log
import
logger
from
..log
import
logger
from
..utils
import
stats_wrapper
from
..utils
import
stats_wrapper
from
.pretrained_models
import
model_alias
from
.pretrained_models
import
pretrained_models
from
.pretrained_models
import
tokenizer_alias
from
paddlespeech.utils.dynamic_import
import
dynamic_import
__all__
=
[
'TextExecutor'
]
__all__
=
[
'TextExecutor'
]
class
TextExecutor
(
BaseExecutor
):
class
TextExecutor
(
BaseExecutor
):
def
__init__
(
self
):
def
__init__
(
self
):
super
().
__init__
()
super
().
__init__
(
task
=
'text'
)
self
.
model_alias
=
model_alias
self
.
pretrained_models
=
pretrained_models
self
.
tokenizer_alias
=
tokenizer_alias
self
.
parser
=
argparse
.
ArgumentParser
(
self
.
parser
=
argparse
.
ArgumentParser
(
prog
=
'paddlespeech.text'
,
add_help
=
True
)
prog
=
'paddlespeech.text'
,
add_help
=
True
)
self
.
parser
.
add_argument
(
self
.
parser
.
add_argument
(
...
@@ -54,7 +46,8 @@ class TextExecutor(BaseExecutor):
...
@@ -54,7 +46,8 @@ class TextExecutor(BaseExecutor):
type
=
str
,
type
=
str
,
default
=
'ernie_linear_p7_wudao'
,
default
=
'ernie_linear_p7_wudao'
,
choices
=
[
choices
=
[
tag
[:
tag
.
index
(
'-'
)]
for
tag
in
self
.
pretrained_models
.
keys
()
tag
[:
tag
.
index
(
'-'
)]
for
tag
in
self
.
task_resource
.
pretrained_models
.
keys
()
],
],
help
=
'Choose model type of text task.'
)
help
=
'Choose model type of text task.'
)
self
.
parser
.
add_argument
(
self
.
parser
.
add_argument
(
...
@@ -112,13 +105,16 @@ class TextExecutor(BaseExecutor):
...
@@ -112,13 +105,16 @@ class TextExecutor(BaseExecutor):
if
cfg_path
is
None
or
ckpt_path
is
None
or
vocab_file
is
None
:
if
cfg_path
is
None
or
ckpt_path
is
None
or
vocab_file
is
None
:
tag
=
'-'
.
join
([
model_type
,
task
,
lang
])
tag
=
'-'
.
join
([
model_type
,
task
,
lang
])
self
.
res_path
=
self
.
_get_pretrained_path
(
tag
)
self
.
task_resource
.
set_task_model
(
tag
,
version
=
None
)
self
.
cfg_path
=
os
.
path
.
join
(
self
.
cfg_path
=
os
.
path
.
join
(
self
.
res_path
,
self
.
pretrained_models
[
tag
][
'cfg_path'
])
self
.
task_resource
.
res_dir
,
self
.
task_resource
.
res_dict
[
'cfg_path'
])
self
.
ckpt_path
=
os
.
path
.
join
(
self
.
ckpt_path
=
os
.
path
.
join
(
self
.
res_path
,
self
.
pretrained_models
[
tag
][
'ckpt_path'
])
self
.
task_resource
.
res_dir
,
self
.
task_resource
.
res_dict
[
'ckpt_path'
])
self
.
vocab_file
=
os
.
path
.
join
(
self
.
vocab_file
=
os
.
path
.
join
(
self
.
res_path
,
self
.
pretrained_models
[
tag
][
'vocab_file'
])
self
.
task_resource
.
res_dir
,
self
.
task_resource
.
res_dict
[
'vocab_file'
])
else
:
else
:
self
.
cfg_path
=
os
.
path
.
abspath
(
cfg_path
)
self
.
cfg_path
=
os
.
path
.
abspath
(
cfg_path
)
self
.
ckpt_path
=
os
.
path
.
abspath
(
ckpt_path
)
self
.
ckpt_path
=
os
.
path
.
abspath
(
ckpt_path
)
...
@@ -133,8 +129,8 @@ class TextExecutor(BaseExecutor):
...
@@ -133,8 +129,8 @@ class TextExecutor(BaseExecutor):
self
.
_punc_list
.
append
(
line
.
strip
())
self
.
_punc_list
.
append
(
line
.
strip
())
# model
# model
model_class
=
dynamic_import
(
model_name
,
self
.
model_alias
)
model_class
,
tokenizer_class
=
self
.
task_resource
.
get_model_class
(
tokenizer_class
=
dynamic_import
(
model_name
,
self
.
tokenizer_alias
)
model_name
)
self
.
model
=
model_class
(
self
.
model
=
model_class
(
cfg_path
=
self
.
cfg_path
,
ckpt_path
=
self
.
ckpt_path
)
cfg_path
=
self
.
cfg_path
,
ckpt_path
=
self
.
ckpt_path
)
self
.
tokenizer
=
tokenizer_class
.
from_pretrained
(
'ernie-1.0'
)
self
.
tokenizer
=
tokenizer_class
.
from_pretrained
(
'ernie-1.0'
)
...
@@ -224,7 +220,7 @@ class TextExecutor(BaseExecutor):
...
@@ -224,7 +220,7 @@ class TextExecutor(BaseExecutor):
if
not
parser_args
.
verbose
:
if
not
parser_args
.
verbose
:
self
.
disable_task_loggers
()
self
.
disable_task_loggers
()
task_source
=
self
.
get_
task
_source
(
parser_args
.
input
)
task_source
=
self
.
get_
input
_source
(
parser_args
.
input
)
task_results
=
OrderedDict
()
task_results
=
OrderedDict
()
has_exceptions
=
False
has_exceptions
=
False
...
...
paddlespeech/cli/text/pretrained_models.py
已删除
100644 → 0
浏览文件 @
775c4bef
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
pretrained_models
=
{
# The tags for pretrained_models should be "{model_name}[_{dataset}][-{lang}][-...]".
# e.g. "conformer_wenetspeech-zh-16k", "transformer_aishell-zh-16k" and "panns_cnn6-32k".
# Command line and python api use "{model_name}[_{dataset}]" as --model, usage:
# "paddlespeech asr --model conformer_wenetspeech --lang zh --sr 16000 --input ./input.wav"
"ernie_linear_p7_wudao-punc-zh"
:
{
'url'
:
'https://paddlespeech.bj.bcebos.com/text/ernie_linear_p7_wudao-punc-zh.tar.gz'
,
'md5'
:
'12283e2ddde1797c5d1e57036b512746'
,
'cfg_path'
:
'ckpt/model_config.json'
,
'ckpt_path'
:
'ckpt/model_state.pdparams'
,
'vocab_file'
:
'punc_vocab.txt'
,
},
"ernie_linear_p3_wudao-punc-zh"
:
{
'url'
:
'https://paddlespeech.bj.bcebos.com/text/ernie_linear_p3_wudao-punc-zh.tar.gz'
,
'md5'
:
'448eb2fdf85b6a997e7e652e80c51dd2'
,
'cfg_path'
:
'ckpt/model_config.json'
,
'ckpt_path'
:
'ckpt/model_state.pdparams'
,
'vocab_file'
:
'punc_vocab.txt'
,
},
}
model_alias
=
{
"ernie_linear_p7"
:
"paddlespeech.text.models:ErnieLinear"
,
"ernie_linear_p3"
:
"paddlespeech.text.models:ErnieLinear"
,
}
tokenizer_alias
=
{
"ernie_linear_p7"
:
"paddlenlp.transformers:ErnieTokenizer"
,
"ernie_linear_p3"
:
"paddlenlp.transformers:ErnieTokenizer"
,
}
paddlespeech/cli/tts/infer.py
浏览文件 @
fa6e44e4
...
@@ -29,22 +29,16 @@ from yacs.config import CfgNode
...
@@ -29,22 +29,16 @@ from yacs.config import CfgNode
from
..executor
import
BaseExecutor
from
..executor
import
BaseExecutor
from
..log
import
logger
from
..log
import
logger
from
..utils
import
stats_wrapper
from
..utils
import
stats_wrapper
from
.pretrained_models
import
model_alias
from
.pretrained_models
import
pretrained_models
from
paddlespeech.t2s.frontend
import
English
from
paddlespeech.t2s.frontend
import
English
from
paddlespeech.t2s.frontend.zh_frontend
import
Frontend
from
paddlespeech.t2s.frontend.zh_frontend
import
Frontend
from
paddlespeech.t2s.modules.normalizer
import
ZScore
from
paddlespeech.t2s.modules.normalizer
import
ZScore
from
paddlespeech.utils.dynamic_import
import
dynamic_import
__all__
=
[
'TTSExecutor'
]
__all__
=
[
'TTSExecutor'
]
class
TTSExecutor
(
BaseExecutor
):
class
TTSExecutor
(
BaseExecutor
):
def
__init__
(
self
):
def
__init__
(
self
):
super
().
__init__
()
super
().
__init__
(
'tts'
)
self
.
model_alias
=
model_alias
self
.
pretrained_models
=
pretrained_models
self
.
parser
=
argparse
.
ArgumentParser
(
self
.
parser
=
argparse
.
ArgumentParser
(
prog
=
'paddlespeech.tts'
,
add_help
=
True
)
prog
=
'paddlespeech.tts'
,
add_help
=
True
)
self
.
parser
.
add_argument
(
self
.
parser
.
add_argument
(
...
@@ -183,19 +177,23 @@ class TTSExecutor(BaseExecutor):
...
@@ -183,19 +177,23 @@ class TTSExecutor(BaseExecutor):
return
return
# am
# am
am_tag
=
am
+
'-'
+
lang
am_tag
=
am
+
'-'
+
lang
self
.
task_resource
.
set_task_model
(
model_tag
=
am_tag
,
model_type
=
0
,
# am
version
=
None
,
# default version
)
if
am_ckpt
is
None
or
am_config
is
None
or
am_stat
is
None
or
phones_dict
is
None
:
if
am_ckpt
is
None
or
am_config
is
None
or
am_stat
is
None
or
phones_dict
is
None
:
am_res_path
=
self
.
_get_pretrained_path
(
am_tag
)
self
.
am_res_path
=
self
.
task_resource
.
res_dir
self
.
am_res_path
=
am_res_path
self
.
am_config
=
os
.
path
.
join
(
self
.
am_res_path
,
self
.
am_config
=
os
.
path
.
join
(
self
.
task_resource
.
res_dict
[
'config'
])
am_res_path
,
self
.
pretrained_models
[
am_tag
][
'config'
])
self
.
am_ckpt
=
os
.
path
.
join
(
self
.
am_res_path
,
self
.
am_ckpt
=
os
.
path
.
join
(
am_res_path
,
self
.
task_resource
.
res_dict
[
'ckpt'
])
self
.
pretrained_models
[
am_tag
][
'ckpt'
])
self
.
am_stat
=
os
.
path
.
join
(
self
.
am_stat
=
os
.
path
.
join
(
am_res_path
,
self
.
pretrained_models
[
am_tag
]
[
'speech_stats'
])
self
.
am_res_path
,
self
.
task_resource
.
res_dict
[
'speech_stats'
])
# must have phones_dict in acoustic
# must have phones_dict in acoustic
self
.
phones_dict
=
os
.
path
.
join
(
self
.
phones_dict
=
os
.
path
.
join
(
am_res_path
,
self
.
pretrained_models
[
am_tag
]
[
'phones_dict'
])
self
.
am_res_path
,
self
.
task_resource
.
res_dict
[
'phones_dict'
])
logger
.
info
(
am_res_path
)
logger
.
info
(
self
.
am_res_path
)
logger
.
info
(
self
.
am_config
)
logger
.
info
(
self
.
am_config
)
logger
.
info
(
self
.
am_ckpt
)
logger
.
info
(
self
.
am_ckpt
)
else
:
else
:
...
@@ -207,33 +205,37 @@ class TTSExecutor(BaseExecutor):
...
@@ -207,33 +205,37 @@ class TTSExecutor(BaseExecutor):
# for speedyspeech
# for speedyspeech
self
.
tones_dict
=
None
self
.
tones_dict
=
None
if
'tones_dict'
in
self
.
pretrained_models
[
am_tag
]
:
if
'tones_dict'
in
self
.
task_resource
.
res_dict
:
self
.
tones_dict
=
os
.
path
.
join
(
self
.
tones_dict
=
os
.
path
.
join
(
self
.
am_res_path
,
self
.
pretrained_models
[
am_tag
]
[
'tones_dict'
])
self
.
am_res_path
,
self
.
task_resource
.
res_dict
[
'tones_dict'
])
if
tones_dict
:
if
tones_dict
:
self
.
tones_dict
=
tones_dict
self
.
tones_dict
=
tones_dict
# for multi speaker fastspeech2
# for multi speaker fastspeech2
self
.
speaker_dict
=
None
self
.
speaker_dict
=
None
if
'speaker_dict'
in
self
.
pretrained_models
[
am_tag
]
:
if
'speaker_dict'
in
self
.
task_resource
.
res_dict
:
self
.
speaker_dict
=
os
.
path
.
join
(
self
.
speaker_dict
=
os
.
path
.
join
(
self
.
am_res_path
,
self
.
am_res_path
,
self
.
task_resource
.
res_dict
[
'speaker_dict'
])
self
.
pretrained_models
[
am_tag
][
'speaker_dict'
])
if
speaker_dict
:
if
speaker_dict
:
self
.
speaker_dict
=
speaker_dict
self
.
speaker_dict
=
speaker_dict
# voc
# voc
voc_tag
=
voc
+
'-'
+
lang
voc_tag
=
voc
+
'-'
+
lang
self
.
task_resource
.
set_task_model
(
model_tag
=
voc_tag
,
model_type
=
1
,
# vocoder
version
=
None
,
# default version
)
if
voc_ckpt
is
None
or
voc_config
is
None
or
voc_stat
is
None
:
if
voc_ckpt
is
None
or
voc_config
is
None
or
voc_stat
is
None
:
voc_res_path
=
self
.
_get_pretrained_path
(
voc_tag
)
self
.
voc_res_path
=
self
.
task_resource
.
voc_res_dir
self
.
voc_res_path
=
voc_res_path
self
.
voc_config
=
os
.
path
.
join
(
self
.
voc_config
=
os
.
path
.
join
(
voc_res_path
,
self
.
pretrained_models
[
voc_tag
]
[
'config'
])
self
.
voc_res_path
,
self
.
task_resource
.
voc_res_dict
[
'config'
])
self
.
voc_ckpt
=
os
.
path
.
join
(
self
.
voc_ckpt
=
os
.
path
.
join
(
voc_res_path
,
self
.
pretrained_models
[
voc_tag
]
[
'ckpt'
])
self
.
voc_res_path
,
self
.
task_resource
.
voc_res_dict
[
'ckpt'
])
self
.
voc_stat
=
os
.
path
.
join
(
self
.
voc_stat
=
os
.
path
.
join
(
voc_res_path
,
self
.
pretrained_models
[
voc_tag
][
'speech_stats'
])
self
.
voc_res_path
,
logger
.
info
(
voc_res_path
)
self
.
task_resource
.
voc_res_dict
[
'speech_stats'
])
logger
.
info
(
self
.
voc_res_path
)
logger
.
info
(
self
.
voc_config
)
logger
.
info
(
self
.
voc_config
)
logger
.
info
(
self
.
voc_ckpt
)
logger
.
info
(
self
.
voc_ckpt
)
else
:
else
:
...
@@ -283,9 +285,9 @@ class TTSExecutor(BaseExecutor):
...
@@ -283,9 +285,9 @@ class TTSExecutor(BaseExecutor):
# model: {model_name}_{dataset}
# model: {model_name}_{dataset}
am_name
=
am
[:
am
.
rindex
(
'_'
)]
am_name
=
am
[:
am
.
rindex
(
'_'
)]
am_class
=
dynamic_import
(
am_name
,
self
.
model_alias
)
am_class
=
self
.
task_resource
.
get_model_class
(
am_name
)
am_inference_class
=
dynamic_import
(
am_name
+
'_inference'
,
am_inference_class
=
self
.
task_resource
.
get_model_class
(
am_name
+
self
.
model_alias
)
'_inference'
)
if
am_name
==
'fastspeech2'
:
if
am_name
==
'fastspeech2'
:
am
=
am_class
(
am
=
am_class
(
...
@@ -314,9 +316,9 @@ class TTSExecutor(BaseExecutor):
...
@@ -314,9 +316,9 @@ class TTSExecutor(BaseExecutor):
# vocoder
# vocoder
# model: {model_name}_{dataset}
# model: {model_name}_{dataset}
voc_name
=
voc
[:
voc
.
rindex
(
'_'
)]
voc_name
=
voc
[:
voc
.
rindex
(
'_'
)]
voc_class
=
dynamic_import
(
voc_name
,
self
.
model_alias
)
voc_class
=
self
.
task_resource
.
get_model_class
(
voc_name
)
voc_inference_class
=
dynamic_import
(
voc_name
+
'_inference'
,
voc_inference_class
=
self
.
task_resource
.
get_model_class
(
voc_name
+
self
.
model_alias
)
'_inference'
)
if
voc_name
!=
'wavernn'
:
if
voc_name
!=
'wavernn'
:
voc
=
voc_class
(
**
self
.
voc_config
[
"generator_params"
])
voc
=
voc_class
(
**
self
.
voc_config
[
"generator_params"
])
voc
.
set_state_dict
(
paddle
.
load
(
self
.
voc_ckpt
)[
"generator_params"
])
voc
.
set_state_dict
(
paddle
.
load
(
self
.
voc_ckpt
)[
"generator_params"
])
...
@@ -444,7 +446,7 @@ class TTSExecutor(BaseExecutor):
...
@@ -444,7 +446,7 @@ class TTSExecutor(BaseExecutor):
if
not
args
.
verbose
:
if
not
args
.
verbose
:
self
.
disable_task_loggers
()
self
.
disable_task_loggers
()
task_source
=
self
.
get_
task
_source
(
args
.
input
)
task_source
=
self
.
get_
input
_source
(
args
.
input
)
task_results
=
OrderedDict
()
task_results
=
OrderedDict
()
has_exceptions
=
False
has_exceptions
=
False
...
...
paddlespeech/cli/tts/pretrained_models.py
已删除
100644 → 0
浏览文件 @
775c4bef
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
pretrained_models
=
{
# speedyspeech
"speedyspeech_csmsc-zh"
:
{
'url'
:
'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/speedyspeech/speedyspeech_csmsc_ckpt_0.2.0.zip'
,
'md5'
:
'6f6fa967b408454b6662c8c00c0027cb'
,
'config'
:
'default.yaml'
,
'ckpt'
:
'snapshot_iter_30600.pdz'
,
'speech_stats'
:
'feats_stats.npy'
,
'phones_dict'
:
'phone_id_map.txt'
,
'tones_dict'
:
'tone_id_map.txt'
,
},
# fastspeech2
"fastspeech2_csmsc-zh"
:
{
'url'
:
'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_baker_ckpt_0.4.zip'
,
'md5'
:
'637d28a5e53aa60275612ba4393d5f22'
,
'config'
:
'default.yaml'
,
'ckpt'
:
'snapshot_iter_76000.pdz'
,
'speech_stats'
:
'speech_stats.npy'
,
'phones_dict'
:
'phone_id_map.txt'
,
},
"fastspeech2_ljspeech-en"
:
{
'url'
:
'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_ljspeech_ckpt_0.5.zip'
,
'md5'
:
'ffed800c93deaf16ca9b3af89bfcd747'
,
'config'
:
'default.yaml'
,
'ckpt'
:
'snapshot_iter_100000.pdz'
,
'speech_stats'
:
'speech_stats.npy'
,
'phones_dict'
:
'phone_id_map.txt'
,
},
"fastspeech2_aishell3-zh"
:
{
'url'
:
'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_aishell3_ckpt_0.4.zip'
,
'md5'
:
'f4dd4a5f49a4552b77981f544ab3392e'
,
'config'
:
'default.yaml'
,
'ckpt'
:
'snapshot_iter_96400.pdz'
,
'speech_stats'
:
'speech_stats.npy'
,
'phones_dict'
:
'phone_id_map.txt'
,
'speaker_dict'
:
'speaker_id_map.txt'
,
},
"fastspeech2_vctk-en"
:
{
'url'
:
'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_vctk_ckpt_0.5.zip'
,
'md5'
:
'743e5024ca1e17a88c5c271db9779ba4'
,
'config'
:
'default.yaml'
,
'ckpt'
:
'snapshot_iter_66200.pdz'
,
'speech_stats'
:
'speech_stats.npy'
,
'phones_dict'
:
'phone_id_map.txt'
,
'speaker_dict'
:
'speaker_id_map.txt'
,
},
# tacotron2
"tacotron2_csmsc-zh"
:
{
'url'
:
'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/tacotron2/tacotron2_csmsc_ckpt_0.2.0.zip'
,
'md5'
:
'0df4b6f0bcbe0d73c5ed6df8867ab91a'
,
'config'
:
'default.yaml'
,
'ckpt'
:
'snapshot_iter_30600.pdz'
,
'speech_stats'
:
'speech_stats.npy'
,
'phones_dict'
:
'phone_id_map.txt'
,
},
"tacotron2_ljspeech-en"
:
{
'url'
:
'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/tacotron2/tacotron2_ljspeech_ckpt_0.2.0.zip'
,
'md5'
:
'6a5eddd81ae0e81d16959b97481135f3'
,
'config'
:
'default.yaml'
,
'ckpt'
:
'snapshot_iter_60300.pdz'
,
'speech_stats'
:
'speech_stats.npy'
,
'phones_dict'
:
'phone_id_map.txt'
,
},
# pwgan
"pwgan_csmsc-zh"
:
{
'url'
:
'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwg_baker_ckpt_0.4.zip'
,
'md5'
:
'2e481633325b5bdf0a3823c714d2c117'
,
'config'
:
'pwg_default.yaml'
,
'ckpt'
:
'pwg_snapshot_iter_400000.pdz'
,
'speech_stats'
:
'pwg_stats.npy'
,
},
"pwgan_ljspeech-en"
:
{
'url'
:
'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwg_ljspeech_ckpt_0.5.zip'
,
'md5'
:
'53610ba9708fd3008ccaf8e99dacbaf0'
,
'config'
:
'pwg_default.yaml'
,
'ckpt'
:
'pwg_snapshot_iter_400000.pdz'
,
'speech_stats'
:
'pwg_stats.npy'
,
},
"pwgan_aishell3-zh"
:
{
'url'
:
'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwg_aishell3_ckpt_0.5.zip'
,
'md5'
:
'd7598fa41ad362d62f85ffc0f07e3d84'
,
'config'
:
'default.yaml'
,
'ckpt'
:
'snapshot_iter_1000000.pdz'
,
'speech_stats'
:
'feats_stats.npy'
,
},
"pwgan_vctk-en"
:
{
'url'
:
'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwg_vctk_ckpt_0.1.1.zip'
,
'md5'
:
'b3da1defcde3e578be71eb284cb89f2c'
,
'config'
:
'default.yaml'
,
'ckpt'
:
'snapshot_iter_1500000.pdz'
,
'speech_stats'
:
'feats_stats.npy'
,
},
# mb_melgan
"mb_melgan_csmsc-zh"
:
{
'url'
:
'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/mb_melgan/mb_melgan_csmsc_ckpt_0.1.1.zip'
,
'md5'
:
'ee5f0604e20091f0d495b6ec4618b90d'
,
'config'
:
'default.yaml'
,
'ckpt'
:
'snapshot_iter_1000000.pdz'
,
'speech_stats'
:
'feats_stats.npy'
,
},
# style_melgan
"style_melgan_csmsc-zh"
:
{
'url'
:
'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/style_melgan/style_melgan_csmsc_ckpt_0.1.1.zip'
,
'md5'
:
'5de2d5348f396de0c966926b8c462755'
,
'config'
:
'default.yaml'
,
'ckpt'
:
'snapshot_iter_1500000.pdz'
,
'speech_stats'
:
'feats_stats.npy'
,
},
# hifigan
"hifigan_csmsc-zh"
:
{
'url'
:
'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_csmsc_ckpt_0.1.1.zip'
,
'md5'
:
'dd40a3d88dfcf64513fba2f0f961ada6'
,
'config'
:
'default.yaml'
,
'ckpt'
:
'snapshot_iter_2500000.pdz'
,
'speech_stats'
:
'feats_stats.npy'
,
},
"hifigan_ljspeech-en"
:
{
'url'
:
'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_ljspeech_ckpt_0.2.0.zip'
,
'md5'
:
'70e9131695decbca06a65fe51ed38a72'
,
'config'
:
'default.yaml'
,
'ckpt'
:
'snapshot_iter_2500000.pdz'
,
'speech_stats'
:
'feats_stats.npy'
,
},
"hifigan_aishell3-zh"
:
{
'url'
:
'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_aishell3_ckpt_0.2.0.zip'
,
'md5'
:
'3bb49bc75032ed12f79c00c8cc79a09a'
,
'config'
:
'default.yaml'
,
'ckpt'
:
'snapshot_iter_2500000.pdz'
,
'speech_stats'
:
'feats_stats.npy'
,
},
"hifigan_vctk-en"
:
{
'url'
:
'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_vctk_ckpt_0.2.0.zip'
,
'md5'
:
'7da8f88359bca2457e705d924cf27bd4'
,
'config'
:
'default.yaml'
,
'ckpt'
:
'snapshot_iter_2500000.pdz'
,
'speech_stats'
:
'feats_stats.npy'
,
},
# wavernn
"wavernn_csmsc-zh"
:
{
'url'
:
'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/wavernn/wavernn_csmsc_ckpt_0.2.0.zip'
,
'md5'
:
'ee37b752f09bcba8f2af3b777ca38e13'
,
'config'
:
'default.yaml'
,
'ckpt'
:
'snapshot_iter_400000.pdz'
,
'speech_stats'
:
'feats_stats.npy'
,
}
}
model_alias
=
{
# acoustic model
"speedyspeech"
:
"paddlespeech.t2s.models.speedyspeech:SpeedySpeech"
,
"speedyspeech_inference"
:
"paddlespeech.t2s.models.speedyspeech:SpeedySpeechInference"
,
"fastspeech2"
:
"paddlespeech.t2s.models.fastspeech2:FastSpeech2"
,
"fastspeech2_inference"
:
"paddlespeech.t2s.models.fastspeech2:FastSpeech2Inference"
,
"tacotron2"
:
"paddlespeech.t2s.models.tacotron2:Tacotron2"
,
"tacotron2_inference"
:
"paddlespeech.t2s.models.tacotron2:Tacotron2Inference"
,
# voc
"pwgan"
:
"paddlespeech.t2s.models.parallel_wavegan:PWGGenerator"
,
"pwgan_inference"
:
"paddlespeech.t2s.models.parallel_wavegan:PWGInference"
,
"mb_melgan"
:
"paddlespeech.t2s.models.melgan:MelGANGenerator"
,
"mb_melgan_inference"
:
"paddlespeech.t2s.models.melgan:MelGANInference"
,
"style_melgan"
:
"paddlespeech.t2s.models.melgan:StyleMelGANGenerator"
,
"style_melgan_inference"
:
"paddlespeech.t2s.models.melgan:StyleMelGANInference"
,
"hifigan"
:
"paddlespeech.t2s.models.hifigan:HiFiGANGenerator"
,
"hifigan_inference"
:
"paddlespeech.t2s.models.hifigan:HiFiGANInference"
,
"wavernn"
:
"paddlespeech.t2s.models.wavernn:WaveRNN"
,
"wavernn_inference"
:
"paddlespeech.t2s.models.wavernn:WaveRNNInference"
,
}
paddlespeech/cli/vector/infer.py
浏览文件 @
fa6e44e4
...
@@ -22,26 +22,20 @@ from typing import Union
...
@@ -22,26 +22,20 @@ from typing import Union
import
paddle
import
paddle
import
soundfile
import
soundfile
from
paddleaudio.backends
import
load
as
load_audio
from
paddleaudio.compliance.librosa
import
melspectrogram
from
yacs.config
import
CfgNode
from
yacs.config
import
CfgNode
from
..executor
import
BaseExecutor
from
..executor
import
BaseExecutor
from
..log
import
logger
from
..log
import
logger
from
..utils
import
stats_wrapper
from
..utils
import
stats_wrapper
from
.pretrained_models
import
model_alias
from
paddleaudio.backends
import
load
as
load_audio
from
.pretrained_models
import
pretrained_models
from
paddleaudio.compliance.librosa
import
melspectrogram
from
paddlespeech.utils.dynamic_import
import
dynamic_import
from
paddlespeech.vector.io.batch
import
feature_normalize
from
paddlespeech.vector.io.batch
import
feature_normalize
from
paddlespeech.vector.modules.sid_model
import
SpeakerIdetification
from
paddlespeech.vector.modules.sid_model
import
SpeakerIdetification
class
VectorExecutor
(
BaseExecutor
):
class
VectorExecutor
(
BaseExecutor
):
def
__init__
(
self
):
def
__init__
(
self
):
super
().
__init__
()
super
().
__init__
(
'vector'
)
self
.
model_alias
=
model_alias
self
.
pretrained_models
=
pretrained_models
self
.
parser
=
argparse
.
ArgumentParser
(
self
.
parser
=
argparse
.
ArgumentParser
(
prog
=
"paddlespeech.vector"
,
add_help
=
True
)
prog
=
"paddlespeech.vector"
,
add_help
=
True
)
...
@@ -49,7 +43,10 @@ class VectorExecutor(BaseExecutor):
...
@@ -49,7 +43,10 @@ class VectorExecutor(BaseExecutor):
"--model"
,
"--model"
,
type
=
str
,
type
=
str
,
default
=
"ecapatdnn_voxceleb12"
,
default
=
"ecapatdnn_voxceleb12"
,
choices
=
[
"ecapatdnn_voxceleb12"
],
choices
=
[
tag
[:
tag
.
index
(
'-'
)]
for
tag
in
self
.
task_resource
.
pretrained_models
.
keys
()
],
help
=
"Choose model type of vector task."
)
help
=
"Choose model type of vector task."
)
self
.
parser
.
add_argument
(
self
.
parser
.
add_argument
(
"--task"
,
"--task"
,
...
@@ -119,7 +116,7 @@ class VectorExecutor(BaseExecutor):
...
@@ -119,7 +116,7 @@ class VectorExecutor(BaseExecutor):
self
.
disable_task_loggers
()
self
.
disable_task_loggers
()
# stage 2: read the input data and store them as a list
# stage 2: read the input data and store them as a list
task_source
=
self
.
get_
task
_source
(
parser_args
.
input
)
task_source
=
self
.
get_
input
_source
(
parser_args
.
input
)
logger
.
info
(
f
"task source:
{
task_source
}
"
)
logger
.
info
(
f
"task source:
{
task_source
}
"
)
# stage 3: process the audio one by one
# stage 3: process the audio one by one
...
@@ -296,6 +293,7 @@ class VectorExecutor(BaseExecutor):
...
@@ -296,6 +293,7 @@ class VectorExecutor(BaseExecutor):
# get the mode from pretrained list
# get the mode from pretrained list
sample_rate_str
=
"16k"
if
sample_rate
==
16000
else
"8k"
sample_rate_str
=
"16k"
if
sample_rate
==
16000
else
"8k"
tag
=
model_type
+
"-"
+
sample_rate_str
tag
=
model_type
+
"-"
+
sample_rate_str
self
.
task_resource
.
set_task_model
(
tag
,
version
=
None
)
logger
.
info
(
f
"load the pretrained model:
{
tag
}
"
)
logger
.
info
(
f
"load the pretrained model:
{
tag
}
"
)
# get the model from the pretrained list
# get the model from the pretrained list
# we download the pretrained model and store it in the res_path
# we download the pretrained model and store it in the res_path
...
@@ -303,10 +301,11 @@ class VectorExecutor(BaseExecutor):
...
@@ -303,10 +301,11 @@ class VectorExecutor(BaseExecutor):
self
.
res_path
=
res_path
self
.
res_path
=
res_path
self
.
cfg_path
=
os
.
path
.
join
(
self
.
cfg_path
=
os
.
path
.
join
(
res_path
,
self
.
pretrained_models
[
tag
][
'cfg_path'
])
self
.
task_resource
.
res_dir
,
self
.
task_resource
.
res_dict
[
'cfg_path'
])
self
.
ckpt_path
=
os
.
path
.
join
(
self
.
ckpt_path
=
os
.
path
.
join
(
res_path
,
self
.
task_resource
.
res_dir
,
self
.
pretrained_models
[
tag
]
[
'ckpt_path'
]
+
'.pdparams'
)
self
.
task_resource
.
res_dict
[
'ckpt_path'
]
+
'.pdparams'
)
else
:
else
:
# get the model from disk
# get the model from disk
self
.
cfg_path
=
os
.
path
.
abspath
(
cfg_path
)
self
.
cfg_path
=
os
.
path
.
abspath
(
cfg_path
)
...
@@ -325,8 +324,8 @@ class VectorExecutor(BaseExecutor):
...
@@ -325,8 +324,8 @@ class VectorExecutor(BaseExecutor):
# stage 3: get the model name to instance the model network with dynamic_import
# stage 3: get the model name to instance the model network with dynamic_import
logger
.
info
(
"start to dynamic import the model class"
)
logger
.
info
(
"start to dynamic import the model class"
)
model_name
=
model_type
[:
model_type
.
rindex
(
'_'
)]
model_name
=
model_type
[:
model_type
.
rindex
(
'_'
)]
model_class
=
self
.
task_resource
.
get_model_class
(
model_name
)
logger
.
info
(
f
"model name
{
model_name
}
"
)
logger
.
info
(
f
"model name
{
model_name
}
"
)
model_class
=
dynamic_import
(
model_name
,
self
.
model_alias
)
model_conf
=
self
.
config
.
model
model_conf
=
self
.
config
.
model
backbone
=
model_class
(
**
model_conf
)
backbone
=
model_class
(
**
model_conf
)
model
=
SpeakerIdetification
(
model
=
SpeakerIdetification
(
...
...
paddlespeech/cli/vector/pretrained_models.py
已删除
100644 → 0
浏览文件 @
775c4bef
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
pretrained_models
=
{
# The tags for pretrained_models should be "{model_name}[-{dataset}][-{sr}][-...]".
# e.g. "ecapatdnn_voxceleb12-16k".
# Command line and python api use "{model_name}[-{dataset}]" as --model, usage:
# "paddlespeech vector --task spk --model ecapatdnn_voxceleb12-16k --sr 16000 --input ./input.wav"
"ecapatdnn_voxceleb12-16k"
:
{
'url'
:
'https://paddlespeech.bj.bcebos.com/vector/voxceleb/sv0_ecapa_tdnn_voxceleb12_ckpt_0_2_1.tar.gz'
,
'md5'
:
'67c7ff8885d5246bd16e0f5ac1cba99f'
,
'cfg_path'
:
'conf/model.yaml'
,
# the yaml config path
'ckpt_path'
:
'model/model'
,
# the format is ${dir}/{model_name},
# so the first 'model' is dir, the second 'model' is the name
# this means we have a model stored as model/model.pdparams
},
}
model_alias
=
{
"ecapatdnn"
:
"paddlespeech.vector.models.ecapa_tdnn:EcapaTdnn"
,
}
paddlespeech/
cli/stats
/__init__.py
→
paddlespeech/
resource
/__init__.py
浏览文件 @
fa6e44e4
# Copyright (c) 202
1
PaddlePaddle Authors. All Rights Reserved.
# Copyright (c) 202
2
PaddlePaddle Authors. All Rights Reserved.
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# you may not use this file except in compliance with the License.
...
@@ -11,4 +11,4 @@
...
@@ -11,4 +11,4 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
from
.
infer
import
StatsExecutor
from
.
resource
import
CommonTaskResource
paddlespeech/resource/pretrained_models.py
0 → 100644
浏览文件 @
fa6e44e4
此差异已折叠。
点击以展开。
paddlespeech/resource/resource.py
0 → 100644
浏览文件 @
fa6e44e4
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
os
from
collections
import
OrderedDict
from
typing
import
Dict
from
typing
import
List
from
typing
import
Optional
from
..cli.utils
import
download_and_decompress
from
..cli.utils
import
MODEL_HOME
from
..utils.dynamic_import
import
dynamic_import
from
.model_alias
import
model_alias
task_supported
=
[
'asr'
,
'cls'
,
'st'
,
'text'
,
'tts'
,
'vector'
]
model_format_supported
=
[
'dynamic'
,
'static'
,
'onnx'
]
inference_mode_supported
=
[
'online'
,
'offline'
]
class
CommonTaskResource
:
def
__init__
(
self
,
task
:
str
,
model_format
:
str
=
'dynamic'
,
**
kwargs
):
assert
task
in
task_supported
,
'Arg "task" must be one of {}.'
.
format
(
task_supported
)
assert
model_format
in
model_format_supported
,
'Arg "model_format" must be one of {}.'
.
format
(
model_format_supported
)
self
.
task
=
task
self
.
model_format
=
model_format
self
.
pretrained_models
=
self
.
_get_pretrained_models
()
if
'inference_mode'
in
kwargs
:
assert
kwargs
[
'inference_mode'
]
in
inference_mode_supported
,
'Arg "inference_mode" must be one of {}.'
.
format
(
inference_mode_supported
)
self
.
_inference_mode_filter
(
kwargs
[
'inference_mode'
])
# Initialize after model and version had been set.
self
.
model_tag
=
None
self
.
version
=
None
self
.
res_dict
=
None
self
.
res_dir
=
None
if
self
.
task
==
'tts'
:
# For vocoder
self
.
voc_model_tag
=
None
self
.
voc_version
=
None
self
.
voc_res_dict
=
None
self
.
voc_res_dir
=
None
def
set_task_model
(
self
,
model_tag
:
str
,
model_type
:
int
=
0
,
version
:
Optional
[
str
]
=
None
):
"""Set model tag and version of current task.
Args:
model_tag (str): Model tag.
model_type (int): 0 for acoustic model otherwise vocoder in tts task.
version (Optional[str], optional): Version of pretrained model. Defaults to None.
"""
assert
model_tag
in
self
.
pretrained_models
,
\
"Can't find
\"
{}
\"
in resource. Model name must be one of {}"
.
format
(
model_tag
,
list
(
self
.
pretrained_models
.
keys
()))
if
version
is
None
:
version
=
self
.
_get_default_version
(
model_tag
)
assert
version
in
self
.
pretrained_models
[
model_tag
],
\
"Can't find version
\"
{}
\"
in
\"
{}
\"
. Model name must be one of {}"
.
format
(
version
,
model_tag
,
list
(
self
.
pretrained_models
[
model_tag
].
keys
()))
if
model_type
==
0
:
self
.
model_tag
=
model_tag
self
.
version
=
version
self
.
res_dict
=
self
.
pretrained_models
[
model_tag
][
version
]
self
.
res_dir
=
self
.
_fetch
(
self
.
res_dict
,
self
.
_get_model_dir
(
model_type
))
else
:
assert
self
.
task
==
'tts'
,
'Vocoder will only be used in tts task.'
self
.
voc_model_tag
=
model_tag
self
.
voc_version
=
version
self
.
voc_res_dict
=
self
.
pretrained_models
[
model_tag
][
version
]
self
.
voc_res_dir
=
self
.
_fetch
(
self
.
voc_res_dict
,
self
.
_get_model_dir
(
model_type
))
@
staticmethod
def
get_model_class
(
model_name
)
->
List
[
object
]:
"""Dynamic import model class.
Args:
model_name (str): Model name.
Returns:
List[object]: Return a list of model class.
"""
assert
model_name
in
model_alias
,
'No model classes found for "{}"'
.
format
(
model_name
)
ret
=
[]
for
import_path
in
model_alias
[
model_name
]:
ret
.
append
(
dynamic_import
(
import_path
))
if
len
(
ret
)
==
1
:
return
ret
[
0
]
else
:
return
ret
def
get_versions
(
self
,
model_tag
:
str
)
->
List
[
str
]:
"""List all available versions.
Args:
model_tag (str): Model tag.
Returns:
List[str]: Version list of model.
"""
return
list
(
self
.
pretrained_models
[
model_tag
].
keys
())
def
_get_default_version
(
self
,
model_tag
:
str
)
->
str
:
"""Get default version of model.
Args:
model_tag (str): Model tag.
Returns:
str: Default version.
"""
return
self
.
get_versions
(
model_tag
)[
-
1
]
# get latest version
def
_get_model_dir
(
self
,
model_type
:
int
=
0
)
->
os
.
PathLike
:
"""Get resource directory.
Args:
model_type (int): 0 for acoustic model otherwise vocoder in tts task.
Returns:
os.PathLike: Directory of model resource.
"""
if
model_type
==
0
:
model_tag
=
self
.
model_tag
version
=
self
.
version
else
:
model_tag
=
self
.
voc_model_tag
version
=
self
.
voc_version
return
os
.
path
.
join
(
MODEL_HOME
,
model_tag
,
version
)
def
_get_pretrained_models
(
self
)
->
Dict
[
str
,
str
]:
"""Get all available models for current task.
Returns:
Dict[str, str]: A dictionary with model tag and resources info.
"""
try
:
import_models
=
'{}_{}_pretrained_models'
.
format
(
self
.
task
,
self
.
model_format
)
exec
(
'from .pretrained_models import {}'
.
format
(
import_models
))
models
=
OrderedDict
(
locals
()[
import_models
])
except
ImportError
:
models
=
OrderedDict
({})
# no models.
finally
:
return
models
def
_inference_mode_filter
(
self
,
inference_mode
:
Optional
[
str
]):
"""Filter models dict based on inference_mode.
Args:
inference_mode (Optional[str]): 'online', 'offline' or None.
"""
if
inference_mode
is
None
:
return
if
self
.
task
==
'asr'
:
online_flags
=
[
'online'
in
model_tag
for
model_tag
in
self
.
pretrained_models
.
keys
()
]
for
online_flag
,
model_tag
in
zip
(
online_flags
,
list
(
self
.
pretrained_models
.
keys
())):
if
inference_mode
==
'online'
and
online_flag
:
continue
elif
inference_mode
==
'offline'
and
not
online_flag
:
continue
else
:
del
self
.
pretrained_models
[
model_tag
]
elif
self
.
task
==
'tts'
:
# Hardcode for tts online models.
tts_online_models
=
[
'fastspeech2_csmsc-zh'
,
'fastspeech2_cnndecoder_csmsc-zh'
,
'mb_melgan_csmsc-zh'
,
'hifigan_csmsc-zh'
]
for
model_tag
in
list
(
self
.
pretrained_models
.
keys
()):
if
inference_mode
==
'online'
and
model_tag
in
tts_online_models
:
continue
elif
inference_mode
==
'offline'
:
continue
else
:
del
self
.
pretrained_models
[
model_tag
]
else
:
raise
NotImplementedError
(
'Only supports asr and tts task.'
)
@
staticmethod
def
_fetch
(
res_dict
:
Dict
[
str
,
str
],
target_dir
:
os
.
PathLike
)
->
os
.
PathLike
:
"""Fetch archive from url.
Args:
res_dict (Dict[str, str]): Info dict of a resource.
target_dir (os.PathLike): Directory to save archives.
Returns:
os.PathLike: Directory of model resource.
"""
return
download_and_decompress
(
res_dict
,
target_dir
)
paddlespeech/server/bin/paddlespeech_server.py
浏览文件 @
fa6e44e4
...
@@ -25,6 +25,7 @@ from ..executor import BaseExecutor
...
@@ -25,6 +25,7 @@ from ..executor import BaseExecutor
from
..util
import
cli_server_register
from
..util
import
cli_server_register
from
..util
import
stats_wrapper
from
..util
import
stats_wrapper
from
paddlespeech.cli.log
import
logger
from
paddlespeech.cli.log
import
logger
from
paddlespeech.resource
import
CommonTaskResource
from
paddlespeech.server.engine.engine_pool
import
init_engine_pool
from
paddlespeech.server.engine.engine_pool
import
init_engine_pool
from
paddlespeech.server.restful.api
import
setup_router
as
setup_http_router
from
paddlespeech.server.restful.api
import
setup_router
as
setup_http_router
from
paddlespeech.server.utils.config
import
get_config
from
paddlespeech.server.utils.config
import
get_config
...
@@ -152,101 +153,30 @@ class ServerStatsExecutor():
...
@@ -152,101 +153,30 @@ class ServerStatsExecutor():
"Please input correct speech task, choices = ['asr', 'tts']"
)
"Please input correct speech task, choices = ['asr', 'tts']"
)
return
False
return
False
elif
self
.
task
.
lower
()
==
'asr'
:
try
:
try
:
from
paddlespeech.cli.asr.infer
import
pretrained_models
# Dynamic models
logger
.
info
(
dynamic_pretrained_models
=
CommonTaskResource
(
"Here is the table of ASR pretrained models supported in the service."
task
=
self
.
task
,
model_format
=
'dynamic'
).
pretrained_models
)
self
.
show_support_models
(
pretrained_models
)
# show ASR static pretrained model
from
paddlespeech.server.engine.asr.paddleinference.asr_engine
import
pretrained_models
logger
.
info
(
"Here is the table of ASR static pretrained models supported in the service."
)
self
.
show_support_models
(
pretrained_models
)
return
True
if
len
(
dynamic_pretrained_models
)
>
0
:
except
BaseException
:
logger
.
error
(
"Failed to get the table of ASR pretrained models supported in the service."
)
return
False
elif
self
.
task
.
lower
()
==
'tts'
:
try
:
from
paddlespeech.cli.tts.infer
import
pretrained_models
logger
.
info
(
logger
.
info
(
"Here is the table of TTS pretrained models supported in the service."
"Here is the table of {} pretrained models supported in the service."
.
)
format
(
self
.
task
.
upper
()))
self
.
show_support_models
(
pretrained_models
)
self
.
show_support_models
(
dynamic_pretrained_models
)
# show TTS static pretrained model
# Static models
from
paddlespeech.server.engine.tts.paddleinference.tts_engine
import
pretrained_models
static_pretrained_models
=
CommonTaskResource
(
task
=
self
.
task
,
model_format
=
'static'
).
pretrained_models
if
len
(
static_pretrained_models
)
>
0
:
logger
.
info
(
logger
.
info
(
"Here is the table of
TTS static pretrained models supported in the service."
"Here is the table of
{} static pretrained models supported in the service."
.
)
format
(
self
.
task
.
upper
())
)
self
.
show_support_models
(
pretrained_models
)
self
.
show_support_models
(
pretrained_models
)
return
True
return
True
except
BaseException
:
logger
.
error
(
"Failed to get the table of TTS pretrained models supported in the service."
)
return
False
elif
self
.
task
.
lower
()
==
'cls'
:
try
:
from
paddlespeech.cli.cls.infer
import
pretrained_models
logger
.
info
(
"Here is the table of CLS pretrained models supported in the service."
)
self
.
show_support_models
(
pretrained_models
)
# show CLS static pretrained model
from
paddlespeech.server.engine.cls.paddleinference.cls_engine
import
pretrained_models
logger
.
info
(
"Here is the table of CLS static pretrained models supported in the service."
)
self
.
show_support_models
(
pretrained_models
)
return
True
except
BaseException
:
except
BaseException
:
logger
.
error
(
logger
.
error
(
"Failed to get the table of CLS pretrained models supported in the service."
"Failed to get the table of {} pretrained models supported in the service."
.
)
format
(
self
.
task
.
upper
()))
return
False
elif
self
.
task
.
lower
()
==
'text'
:
try
:
from
paddlespeech.cli.text.infer
import
pretrained_models
logger
.
info
(
"Here is the table of Text pretrained models supported in the service."
)
self
.
show_support_models
(
pretrained_models
)
return
True
except
BaseException
:
logger
.
error
(
"Failed to get the table of Text pretrained models supported in the service."
)
return
False
elif
self
.
task
.
lower
()
==
'vector'
:
try
:
from
paddlespeech.cli.vector.infer
import
pretrained_models
logger
.
info
(
"Here is the table of Vector pretrained models supported in the service."
)
self
.
show_support_models
(
pretrained_models
)
return
True
except
BaseException
:
logger
.
error
(
"Failed to get the table of Vector pretrained models supported in the service."
)
return
False
else
:
logger
.
error
(
f
"Failed to get the table of
{
self
.
task
}
pretrained models supported in the service."
)
return
False
return
False
paddlespeech/server/engine/asr/online/asr_engine.py
浏览文件 @
fa6e44e4
...
@@ -11,7 +11,6 @@
...
@@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
import
copy
import
os
import
os
import
sys
import
sys
from
typing
import
Optional
from
typing
import
Optional
...
@@ -21,15 +20,14 @@ import paddle
...
@@ -21,15 +20,14 @@ import paddle
from
numpy
import
float32
from
numpy
import
float32
from
yacs.config
import
CfgNode
from
yacs.config
import
CfgNode
from
.pretrained_models
import
pretrained_models
from
paddlespeech.cli.asr.infer
import
ASRExecutor
from
paddlespeech.cli.asr.infer
import
ASRExecutor
from
paddlespeech.cli.log
import
logger
from
paddlespeech.cli.log
import
logger
from
paddlespeech.cli.utils
import
MODEL_HOME
from
paddlespeech.cli.utils
import
MODEL_HOME
from
paddlespeech.resource
import
CommonTaskResource
from
paddlespeech.s2t.frontend.featurizer.text_featurizer
import
TextFeaturizer
from
paddlespeech.s2t.frontend.featurizer.text_featurizer
import
TextFeaturizer
from
paddlespeech.s2t.frontend.speech
import
SpeechSegment
from
paddlespeech.s2t.frontend.speech
import
SpeechSegment
from
paddlespeech.s2t.modules.ctc
import
CTCDecoder
from
paddlespeech.s2t.modules.ctc
import
CTCDecoder
from
paddlespeech.s2t.transform.transformation
import
Transformation
from
paddlespeech.s2t.transform.transformation
import
Transformation
from
paddlespeech.s2t.utils.dynamic_import
import
dynamic_import
from
paddlespeech.s2t.utils.tensor_utils
import
add_sos_eos
from
paddlespeech.s2t.utils.tensor_utils
import
add_sos_eos
from
paddlespeech.s2t.utils.tensor_utils
import
pad_sequence
from
paddlespeech.s2t.utils.tensor_utils
import
pad_sequence
from
paddlespeech.s2t.utils.utility
import
UpdateConfig
from
paddlespeech.s2t.utils.utility
import
UpdateConfig
...
@@ -251,10 +249,12 @@ class PaddleASRConnectionHanddler:
...
@@ -251,10 +249,12 @@ class PaddleASRConnectionHanddler:
# for deepspeech2
# for deepspeech2
# init state
# init state
self
.
chunk_state_h_box
=
np
.
zeros
(
self
.
chunk_state_h_box
=
np
.
zeros
(
(
self
.
model_config
.
num_rnn_layers
,
1
,
self
.
model_config
.
rnn_layer_size
),
(
self
.
model_config
.
num_rnn_layers
,
1
,
self
.
model_config
.
rnn_layer_size
),
dtype
=
float32
)
dtype
=
float32
)
self
.
chunk_state_c_box
=
np
.
zeros
(
self
.
chunk_state_c_box
=
np
.
zeros
(
(
self
.
model_config
.
num_rnn_layers
,
1
,
self
.
model_config
.
rnn_layer_size
),
(
self
.
model_config
.
num_rnn_layers
,
1
,
self
.
model_config
.
rnn_layer_size
),
dtype
=
float32
)
dtype
=
float32
)
self
.
decoder
.
reset_decoder
(
batch_size
=
1
)
self
.
decoder
.
reset_decoder
(
batch_size
=
1
)
...
@@ -699,7 +699,8 @@ class PaddleASRConnectionHanddler:
...
@@ -699,7 +699,8 @@ class PaddleASRConnectionHanddler:
class
ASRServerExecutor
(
ASRExecutor
):
class
ASRServerExecutor
(
ASRExecutor
):
def
__init__
(
self
):
def
__init__
(
self
):
super
().
__init__
()
super
().
__init__
()
self
.
pretrained_models
=
pretrained_models
self
.
task_resource
=
CommonTaskResource
(
task
=
'asr'
,
model_format
=
'dynamic'
,
inference_mode
=
'online'
)
def
_init_from_path
(
self
,
def
_init_from_path
(
self
,
model_type
:
str
=
None
,
model_type
:
str
=
None
,
...
@@ -723,20 +724,19 @@ class ASRServerExecutor(ASRExecutor):
...
@@ -723,20 +724,19 @@ class ASRServerExecutor(ASRExecutor):
self
.
sample_rate
=
sample_rate
self
.
sample_rate
=
sample_rate
sample_rate_str
=
'16k'
if
sample_rate
==
16000
else
'8k'
sample_rate_str
=
'16k'
if
sample_rate
==
16000
else
'8k'
tag
=
model_type
+
'-'
+
lang
+
'-'
+
sample_rate_str
tag
=
model_type
+
'-'
+
lang
+
'-'
+
sample_rate_str
self
.
task_resource
.
set_task_model
(
model_tag
=
tag
)
if
cfg_path
is
None
or
am_model
is
None
or
am_params
is
None
:
if
cfg_path
is
None
or
am_model
is
None
or
am_params
is
None
:
logger
.
info
(
f
"Load the pretrained model, tag =
{
tag
}
"
)
logger
.
info
(
f
"Load the pretrained model, tag =
{
tag
}
"
)
res_path
=
self
.
_get_pretrained_path
(
tag
)
# wenetspeech_zh
self
.
res_path
=
self
.
task_resource
.
res_dir
self
.
res_path
=
res_path
self
.
cfg_path
=
os
.
path
.
join
(
self
.
cfg_path
=
os
.
path
.
join
(
res_path
,
self
.
pretrained_models
[
tag
]
[
'cfg_path'
])
self
.
res_path
,
self
.
task_resource
.
res_dict
[
'cfg_path'
])
self
.
am_model
=
os
.
path
.
join
(
res_path
,
self
.
am_model
=
os
.
path
.
join
(
self
.
res_path
,
self
.
pretrained_models
[
tag
]
[
'model'
])
self
.
task_resource
.
res_dict
[
'model'
])
self
.
am_params
=
os
.
path
.
join
(
res_path
,
self
.
am_params
=
os
.
path
.
join
(
self
.
res_path
,
self
.
pretrained_models
[
tag
]
[
'params'
])
self
.
task_resource
.
res_dict
[
'params'
])
logger
.
info
(
res_path
)
logger
.
info
(
self
.
res_path
)
else
:
else
:
self
.
cfg_path
=
os
.
path
.
abspath
(
cfg_path
)
self
.
cfg_path
=
os
.
path
.
abspath
(
cfg_path
)
self
.
am_model
=
os
.
path
.
abspath
(
am_model
)
self
.
am_model
=
os
.
path
.
abspath
(
am_model
)
...
@@ -763,8 +763,8 @@ class ASRServerExecutor(ASRExecutor):
...
@@ -763,8 +763,8 @@ class ASRServerExecutor(ASRExecutor):
self
.
text_feature
=
TextFeaturizer
(
self
.
text_feature
=
TextFeaturizer
(
unit_type
=
self
.
config
.
unit_type
,
vocab
=
self
.
vocab
)
unit_type
=
self
.
config
.
unit_type
,
vocab
=
self
.
vocab
)
lm_url
=
self
.
pretrained_models
[
tag
]
[
'lm_url'
]
lm_url
=
self
.
task_resource
.
res_dict
[
'lm_url'
]
lm_md5
=
self
.
pretrained_models
[
tag
]
[
'lm_md5'
]
lm_md5
=
self
.
task_resource
.
res_dict
[
'lm_md5'
]
logger
.
info
(
f
"Start to load language model
{
lm_url
}
"
)
logger
.
info
(
f
"Start to load language model
{
lm_url
}
"
)
self
.
download_lm
(
self
.
download_lm
(
lm_url
,
lm_url
,
...
@@ -810,7 +810,7 @@ class ASRServerExecutor(ASRExecutor):
...
@@ -810,7 +810,7 @@ class ASRServerExecutor(ASRExecutor):
model_name
=
model_type
[:
model_type
.
rindex
(
model_name
=
model_type
[:
model_type
.
rindex
(
'_'
)]
# model_type: {model_name}_{dataset}
'_'
)]
# model_type: {model_name}_{dataset}
logger
.
info
(
f
"model name:
{
model_name
}
"
)
logger
.
info
(
f
"model name:
{
model_name
}
"
)
model_class
=
dynamic_import
(
model_name
,
self
.
model_alias
)
model_class
=
self
.
task_resource
.
get_model_class
(
model_name
)
model_conf
=
self
.
config
model_conf
=
self
.
config
model
=
model_class
.
from_config
(
model_conf
)
model
=
model_class
.
from_config
(
model_conf
)
self
.
model
=
model
self
.
model
=
model
...
...
paddlespeech/server/engine/asr/online/pretrained_models.py
已删除
100644 → 0
浏览文件 @
775c4bef
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
pretrained_models
=
{
"deepspeech2online_aishell-zh-16k"
:
{
'url'
:
'https://paddlespeech.bj.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_online_aishell_fbank161_ckpt_0.2.1.model.tar.gz'
,
'md5'
:
'98b87b171b7240b7cae6e07d8d0bc9be'
,
'cfg_path'
:
'model.yaml'
,
'ckpt_path'
:
'exp/deepspeech2_online/checkpoints/avg_1'
,
'model'
:
'exp/deepspeech2_online/checkpoints/avg_1.jit.pdmodel'
,
'params'
:
'exp/deepspeech2_online/checkpoints/avg_1.jit.pdiparams'
,
'lm_url'
:
'https://deepspeech.bj.bcebos.com/zh_lm/zh_giga.no_cna_cmn.prune01244.klm'
,
'lm_md5'
:
'29e02312deb2e59b3c8686c7966d4fe3'
},
"conformer_online_multicn-zh-16k"
:
{
'url'
:
'https://paddlespeech.bj.bcebos.com/s2t/multi_cn/asr1/asr1_chunk_conformer_multi_cn_ckpt_0.2.3.model.tar.gz'
,
'md5'
:
'0ac93d390552336f2a906aec9e33c5fa'
,
'cfg_path'
:
'model.yaml'
,
'ckpt_path'
:
'exp/chunk_conformer/checkpoints/multi_cn'
,
'model'
:
'exp/chunk_conformer/checkpoints/multi_cn.pdparams'
,
'params'
:
'exp/chunk_conformer/checkpoints/multi_cn.pdparams'
,
'lm_url'
:
'https://deepspeech.bj.bcebos.com/zh_lm/zh_giga.no_cna_cmn.prune01244.klm'
,
'lm_md5'
:
'29e02312deb2e59b3c8686c7966d4fe3'
},
"conformer_online_wenetspeech-zh-16k"
:
{
'url'
:
'https://paddlespeech.bj.bcebos.com/s2t/wenetspeech/asr1/asr1_chunk_conformer_wenetspeech_ckpt_1.0.0a.model.tar.gz'
,
'md5'
:
'b8c02632b04da34aca88459835be54a6'
,
'cfg_path'
:
'model.yaml'
,
'ckpt_path'
:
'exp/chunk_conformer/checkpoints/avg_10'
,
'model'
:
'exp/chunk_conformer/checkpoints/avg_10.pdparams'
,
'params'
:
'exp/chunk_conformer/checkpoints/avg_10.pdparams'
,
'lm_url'
:
''
,
'lm_md5'
:
''
,
},
}
paddlespeech/server/engine/asr/paddleinference/asr_engine.py
浏览文件 @
fa6e44e4
...
@@ -19,10 +19,10 @@ from typing import Optional
...
@@ -19,10 +19,10 @@ from typing import Optional
import
paddle
import
paddle
from
yacs.config
import
CfgNode
from
yacs.config
import
CfgNode
from
.pretrained_models
import
pretrained_models
from
paddlespeech.cli.asr.infer
import
ASRExecutor
from
paddlespeech.cli.asr.infer
import
ASRExecutor
from
paddlespeech.cli.log
import
logger
from
paddlespeech.cli.log
import
logger
from
paddlespeech.cli.utils
import
MODEL_HOME
from
paddlespeech.cli.utils
import
MODEL_HOME
from
paddlespeech.resource
import
CommonTaskResource
from
paddlespeech.s2t.frontend.featurizer.text_featurizer
import
TextFeaturizer
from
paddlespeech.s2t.frontend.featurizer.text_featurizer
import
TextFeaturizer
from
paddlespeech.s2t.modules.ctc
import
CTCDecoder
from
paddlespeech.s2t.modules.ctc
import
CTCDecoder
from
paddlespeech.s2t.utils.utility
import
UpdateConfig
from
paddlespeech.s2t.utils.utility
import
UpdateConfig
...
@@ -36,7 +36,8 @@ __all__ = ['ASREngine']
...
@@ -36,7 +36,8 @@ __all__ = ['ASREngine']
class
ASRServerExecutor
(
ASRExecutor
):
class
ASRServerExecutor
(
ASRExecutor
):
def
__init__
(
self
):
def
__init__
(
self
):
super
().
__init__
()
super
().
__init__
()
self
.
pretrained_models
=
pretrained_models
self
.
task_resource
=
CommonTaskResource
(
task
=
'asr'
,
model_format
=
'static'
,
inference_mode
=
'online'
)
def
_init_from_path
(
self
,
def
_init_from_path
(
self
,
model_type
:
str
=
'wenetspeech'
,
model_type
:
str
=
'wenetspeech'
,
...
@@ -53,17 +54,17 @@ class ASRServerExecutor(ASRExecutor):
...
@@ -53,17 +54,17 @@ class ASRServerExecutor(ASRExecutor):
sample_rate_str
=
'16k'
if
sample_rate
==
16000
else
'8k'
sample_rate_str
=
'16k'
if
sample_rate
==
16000
else
'8k'
tag
=
model_type
+
'-'
+
lang
+
'-'
+
sample_rate_str
tag
=
model_type
+
'-'
+
lang
+
'-'
+
sample_rate_str
self
.
task_resource
.
set_task_model
(
model_tag
=
tag
)
if
cfg_path
is
None
or
am_model
is
None
or
am_params
is
None
:
if
cfg_path
is
None
or
am_model
is
None
or
am_params
is
None
:
res_path
=
self
.
_get_pretrained_path
(
tag
)
# wenetspeech_zh
self
.
res_path
=
self
.
task_resource
.
res_dir
self
.
res_path
=
res_path
self
.
cfg_path
=
os
.
path
.
join
(
self
.
cfg_path
=
os
.
path
.
join
(
res_path
,
self
.
pretrained_models
[
tag
]
[
'cfg_path'
])
self
.
res_path
,
self
.
task_resource
.
res_dict
[
'cfg_path'
])
self
.
am_model
=
os
.
path
.
join
(
res_path
,
self
.
am_model
=
os
.
path
.
join
(
self
.
res_path
,
self
.
pretrained_models
[
tag
]
[
'model'
])
self
.
task_resource
.
res_dict
[
'model'
])
self
.
am_params
=
os
.
path
.
join
(
res_path
,
self
.
am_params
=
os
.
path
.
join
(
self
.
res_path
,
self
.
pretrained_models
[
tag
]
[
'params'
])
self
.
task_resource
.
res_dict
[
'params'
])
logger
.
info
(
res_path
)
logger
.
info
(
self
.
res_path
)
logger
.
info
(
self
.
cfg_path
)
logger
.
info
(
self
.
cfg_path
)
logger
.
info
(
self
.
am_model
)
logger
.
info
(
self
.
am_model
)
logger
.
info
(
self
.
am_params
)
logger
.
info
(
self
.
am_params
)
...
@@ -89,8 +90,8 @@ class ASRServerExecutor(ASRExecutor):
...
@@ -89,8 +90,8 @@ class ASRServerExecutor(ASRExecutor):
self
.
text_feature
=
TextFeaturizer
(
self
.
text_feature
=
TextFeaturizer
(
unit_type
=
self
.
config
.
unit_type
,
vocab
=
self
.
vocab
)
unit_type
=
self
.
config
.
unit_type
,
vocab
=
self
.
vocab
)
lm_url
=
self
.
pretrained_models
[
tag
]
[
'lm_url'
]
lm_url
=
self
.
task_resource
.
res_dict
[
'lm_url'
]
lm_md5
=
self
.
pretrained_models
[
tag
]
[
'lm_md5'
]
lm_md5
=
self
.
task_resource
.
res_dict
[
'lm_md5'
]
self
.
download_lm
(
self
.
download_lm
(
lm_url
,
lm_url
,
os
.
path
.
dirname
(
self
.
config
.
decode
.
lang_model_path
),
lm_md5
)
os
.
path
.
dirname
(
self
.
config
.
decode
.
lang_model_path
),
lm_md5
)
...
...
paddlespeech/server/engine/asr/paddleinference/pretrained_models.py
已删除
100644 → 0
浏览文件 @
775c4bef
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
pretrained_models
=
{
"deepspeech2offline_aishell-zh-16k"
:
{
'url'
:
'https://paddlespeech.bj.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_aishell_ckpt_0.1.1.model.tar.gz'
,
'md5'
:
'932c3593d62fe5c741b59b31318aa314'
,
'cfg_path'
:
'model.yaml'
,
'ckpt_path'
:
'exp/deepspeech2/checkpoints/avg_1'
,
'model'
:
'exp/deepspeech2/checkpoints/avg_1.jit.pdmodel'
,
'params'
:
'exp/deepspeech2/checkpoints/avg_1.jit.pdiparams'
,
'lm_url'
:
'https://deepspeech.bj.bcebos.com/zh_lm/zh_giga.no_cna_cmn.prune01244.klm'
,
'lm_md5'
:
'29e02312deb2e59b3c8686c7966d4fe3'
},
}
paddlespeech/server/engine/cls/paddleinference/cls_engine.py
浏览文件 @
fa6e44e4
...
@@ -20,9 +20,9 @@ import numpy as np
...
@@ -20,9 +20,9 @@ import numpy as np
import
paddle
import
paddle
import
yaml
import
yaml
from
.pretrained_models
import
pretrained_models
from
paddlespeech.cli.cls.infer
import
CLSExecutor
from
paddlespeech.cli.cls.infer
import
CLSExecutor
from
paddlespeech.cli.log
import
logger
from
paddlespeech.cli.log
import
logger
from
paddlespeech.resource
import
CommonTaskResource
from
paddlespeech.server.engine.base_engine
import
BaseEngine
from
paddlespeech.server.engine.base_engine
import
BaseEngine
from
paddlespeech.server.utils.paddle_predictor
import
init_predictor
from
paddlespeech.server.utils.paddle_predictor
import
init_predictor
from
paddlespeech.server.utils.paddle_predictor
import
run_model
from
paddlespeech.server.utils.paddle_predictor
import
run_model
...
@@ -33,11 +33,12 @@ __all__ = ['CLSEngine']
...
@@ -33,11 +33,12 @@ __all__ = ['CLSEngine']
class
CLSServerExecutor
(
CLSExecutor
):
class
CLSServerExecutor
(
CLSExecutor
):
def
__init__
(
self
):
def
__init__
(
self
):
super
().
__init__
()
super
().
__init__
()
self
.
pretrained_models
=
pretrained_models
self
.
task_resource
=
CommonTaskResource
(
task
=
'cls'
,
model_format
=
'static'
)
def
_init_from_path
(
def
_init_from_path
(
self
,
self
,
model_type
:
str
=
'panns_cnn14'
,
model_type
:
str
=
'panns_cnn14
_audioset
'
,
cfg_path
:
Optional
[
os
.
PathLike
]
=
None
,
cfg_path
:
Optional
[
os
.
PathLike
]
=
None
,
model_path
:
Optional
[
os
.
PathLike
]
=
None
,
model_path
:
Optional
[
os
.
PathLike
]
=
None
,
params_path
:
Optional
[
os
.
PathLike
]
=
None
,
params_path
:
Optional
[
os
.
PathLike
]
=
None
,
...
@@ -49,15 +50,16 @@ class CLSServerExecutor(CLSExecutor):
...
@@ -49,15 +50,16 @@ class CLSServerExecutor(CLSExecutor):
if
cfg_path
is
None
or
model_path
is
None
or
params_path
is
None
or
label_file
is
None
:
if
cfg_path
is
None
or
model_path
is
None
or
params_path
is
None
or
label_file
is
None
:
tag
=
model_type
+
'-'
+
'32k'
tag
=
model_type
+
'-'
+
'32k'
self
.
res_path
=
self
.
_get_pretrained_path
(
tag
)
self
.
task_resource
.
set_task_model
(
model_tag
=
tag
)
self
.
res_path
=
self
.
task_resource
.
res_dir
self
.
cfg_path
=
os
.
path
.
join
(
self
.
cfg_path
=
os
.
path
.
join
(
self
.
res_path
,
self
.
pretrained_models
[
tag
]
[
'cfg_path'
])
self
.
res_path
,
self
.
task_resource
.
res_dict
[
'cfg_path'
])
self
.
model_path
=
os
.
path
.
join
(
self
.
model_path
=
os
.
path
.
join
(
self
.
res_path
,
self
.
pretrained_models
[
tag
]
[
'model_path'
])
self
.
res_path
,
self
.
task_resource
.
res_dict
[
'model_path'
])
self
.
params_path
=
os
.
path
.
join
(
self
.
params_path
=
os
.
path
.
join
(
self
.
res_path
,
self
.
pretrained_models
[
tag
]
[
'params_path'
])
self
.
res_path
,
self
.
task_resource
.
res_dict
[
'params_path'
])
self
.
label_file
=
os
.
path
.
join
(
self
.
label_file
=
os
.
path
.
join
(
self
.
res_path
,
self
.
pretrained_models
[
tag
]
[
'label_file'
])
self
.
res_path
,
self
.
task_resource
.
res_dict
[
'label_file'
])
else
:
else
:
self
.
cfg_path
=
os
.
path
.
abspath
(
cfg_path
)
self
.
cfg_path
=
os
.
path
.
abspath
(
cfg_path
)
self
.
model_path
=
os
.
path
.
abspath
(
model_path
)
self
.
model_path
=
os
.
path
.
abspath
(
model_path
)
...
...
paddlespeech/server/engine/cls/paddleinference/pretrained_models.py
已删除
100644 → 0
浏览文件 @
775c4bef
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
pretrained_models
=
{
"panns_cnn6-32k"
:
{
'url'
:
'https://paddlespeech.bj.bcebos.com/cls/inference_model/panns_cnn6_static.tar.gz'
,
'md5'
:
'da087c31046d23281d8ec5188c1967da'
,
'cfg_path'
:
'panns.yaml'
,
'model_path'
:
'inference.pdmodel'
,
'params_path'
:
'inference.pdiparams'
,
'label_file'
:
'audioset_labels.txt'
,
},
"panns_cnn10-32k"
:
{
'url'
:
'https://paddlespeech.bj.bcebos.com/cls/inference_model/panns_cnn10_static.tar.gz'
,
'md5'
:
'5460cc6eafbfaf0f261cc75b90284ae1'
,
'cfg_path'
:
'panns.yaml'
,
'model_path'
:
'inference.pdmodel'
,
'params_path'
:
'inference.pdiparams'
,
'label_file'
:
'audioset_labels.txt'
,
},
"panns_cnn14-32k"
:
{
'url'
:
'https://paddlespeech.bj.bcebos.com/cls/inference_model/panns_cnn14_static.tar.gz'
,
'md5'
:
'ccc80b194821274da79466862b2ab00f'
,
'cfg_path'
:
'panns.yaml'
,
'model_path'
:
'inference.pdmodel'
,
'params_path'
:
'inference.pdiparams'
,
'label_file'
:
'audioset_labels.txt'
,
},
}
paddlespeech/server/engine/tts/online/onnx/pretrained_models.py
已删除
100644 → 0
浏览文件 @
775c4bef
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# support online model
pretrained_models
=
{
# fastspeech2
"fastspeech2_csmsc_onnx-zh"
:
{
'url'
:
'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_csmsc_onnx_0.2.0.zip'
,
'md5'
:
'fd3ad38d83273ad51f0ea4f4abf3ab4e'
,
'ckpt'
:
[
'fastspeech2_csmsc.onnx'
],
'phones_dict'
:
'phone_id_map.txt'
,
'sample_rate'
:
24000
,
},
"fastspeech2_cnndecoder_csmsc_onnx-zh"
:
{
'url'
:
'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_cnndecoder_csmsc_streaming_onnx_1.0.0.zip'
,
'md5'
:
'5f70e1a6bcd29d72d54e7931aa86f266'
,
'ckpt'
:
[
'fastspeech2_csmsc_am_encoder_infer.onnx'
,
'fastspeech2_csmsc_am_decoder.onnx'
,
'fastspeech2_csmsc_am_postnet.onnx'
,
],
'speech_stats'
:
'speech_stats.npy'
,
'phones_dict'
:
'phone_id_map.txt'
,
'sample_rate'
:
24000
,
},
# mb_melgan
"mb_melgan_csmsc_onnx-zh"
:
{
'url'
:
'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/mb_melgan/mb_melgan_csmsc_onnx_0.2.0.zip'
,
'md5'
:
'5b83ec746e8414bc29032d954ffd07ec'
,
'ckpt'
:
'mb_melgan_csmsc.onnx'
,
'sample_rate'
:
24000
,
},
# hifigan
"hifigan_csmsc_onnx-zh"
:
{
'url'
:
'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_csmsc_onnx_0.2.0.zip'
,
'md5'
:
'1a7dc0385875889e46952e50c0994a6b'
,
'ckpt'
:
'hifigan_csmsc.onnx'
,
'sample_rate'
:
24000
,
},
}
paddlespeech/server/engine/tts/online/onnx/tts_engine.py
浏览文件 @
fa6e44e4
...
@@ -20,9 +20,9 @@ from typing import Optional
...
@@ -20,9 +20,9 @@ from typing import Optional
import
numpy
as
np
import
numpy
as
np
import
paddle
import
paddle
from
.pretrained_models
import
pretrained_models
from
paddlespeech.cli.log
import
logger
from
paddlespeech.cli.log
import
logger
from
paddlespeech.cli.tts.infer
import
TTSExecutor
from
paddlespeech.cli.tts.infer
import
TTSExecutor
from
paddlespeech.resource
import
CommonTaskResource
from
paddlespeech.server.engine.base_engine
import
BaseEngine
from
paddlespeech.server.engine.base_engine
import
BaseEngine
from
paddlespeech.server.utils.audio_process
import
float2pcm
from
paddlespeech.server.utils.audio_process
import
float2pcm
from
paddlespeech.server.utils.onnx_infer
import
get_sess
from
paddlespeech.server.utils.onnx_infer
import
get_sess
...
@@ -43,7 +43,7 @@ class TTSServerExecutor(TTSExecutor):
...
@@ -43,7 +43,7 @@ class TTSServerExecutor(TTSExecutor):
self
.
voc_pad
=
voc_pad
self
.
voc_pad
=
voc_pad
self
.
voc_upsample
=
voc_upsample
self
.
voc_upsample
=
voc_upsample
self
.
pretrained_models
=
pretrained_models
self
.
task_resource
=
CommonTaskResource
(
task
=
'tts'
,
model_format
=
'onnx'
)
def
_init_from_path
(
def
_init_from_path
(
self
,
self
,
...
@@ -72,16 +72,21 @@ class TTSServerExecutor(TTSExecutor):
...
@@ -72,16 +72,21 @@ class TTSServerExecutor(TTSExecutor):
return
return
# am
# am
am_tag
=
am
+
'-'
+
lang
am_tag
=
am
+
'-'
+
lang
self
.
task_resource
.
set_task_model
(
model_tag
=
am_tag
,
model_type
=
0
,
# am
version
=
None
,
# default version
)
self
.
am_res_path
=
self
.
task_resource
.
res_dir
if
am
==
"fastspeech2_csmsc_onnx"
:
if
am
==
"fastspeech2_csmsc_onnx"
:
# get model info
# get model info
if
am_ckpt
is
None
or
phones_dict
is
None
:
if
am_ckpt
is
None
or
phones_dict
is
None
:
am_res_path
=
self
.
_get_pretrained_path
(
am_tag
)
self
.
am_res_path
=
am_res_path
self
.
am_ckpt
=
os
.
path
.
join
(
self
.
am_ckpt
=
os
.
path
.
join
(
am_res_path
,
self
.
pretrained_models
[
am_tag
]
[
'ckpt'
][
0
])
self
.
am_res_path
,
self
.
task_resource
.
res_dict
[
'ckpt'
][
0
])
# must have phones_dict in acoustic
# must have phones_dict in acoustic
self
.
phones_dict
=
os
.
path
.
join
(
self
.
phones_dict
=
os
.
path
.
join
(
am_res_path
,
self
.
pretrained_models
[
am_tag
][
'phones_dict'
])
self
.
am_res_path
,
self
.
task_resource
.
res_dict
[
'phones_dict'
])
else
:
else
:
self
.
am_ckpt
=
os
.
path
.
abspath
(
am_ckpt
[
0
])
self
.
am_ckpt
=
os
.
path
.
abspath
(
am_ckpt
[
0
])
...
@@ -94,19 +99,19 @@ class TTSServerExecutor(TTSExecutor):
...
@@ -94,19 +99,19 @@ class TTSServerExecutor(TTSExecutor):
elif
am
==
"fastspeech2_cnndecoder_csmsc_onnx"
:
elif
am
==
"fastspeech2_cnndecoder_csmsc_onnx"
:
if
am_ckpt
is
None
or
am_stat
is
None
or
phones_dict
is
None
:
if
am_ckpt
is
None
or
am_stat
is
None
or
phones_dict
is
None
:
am_res_path
=
self
.
_get_pretrained_path
(
am_tag
)
self
.
am_res_path
=
am_res_path
self
.
am_encoder_infer
=
os
.
path
.
join
(
self
.
am_encoder_infer
=
os
.
path
.
join
(
am_res_path
,
self
.
pretrained_models
[
am_tag
]
[
'ckpt'
][
0
])
self
.
am_res_path
,
self
.
task_resource
.
res_dict
[
'ckpt'
][
0
])
self
.
am_decoder
=
os
.
path
.
join
(
self
.
am_decoder
=
os
.
path
.
join
(
am_res_path
,
self
.
pretrained_models
[
am_tag
]
[
'ckpt'
][
1
])
self
.
am_res_path
,
self
.
task_resource
.
res_dict
[
'ckpt'
][
1
])
self
.
am_postnet
=
os
.
path
.
join
(
self
.
am_postnet
=
os
.
path
.
join
(
am_res_path
,
self
.
pretrained_models
[
am_tag
]
[
'ckpt'
][
2
])
self
.
am_res_path
,
self
.
task_resource
.
res_dict
[
'ckpt'
][
2
])
# must have phones_dict in acoustic
# must have phones_dict in acoustic
self
.
phones_dict
=
os
.
path
.
join
(
self
.
phones_dict
=
os
.
path
.
join
(
am_res_path
,
self
.
pretrained_models
[
am_tag
][
'phones_dict'
])
self
.
am_res_path
,
self
.
task_resource
.
res_dict
[
'phones_dict'
])
self
.
am_stat
=
os
.
path
.
join
(
self
.
am_stat
=
os
.
path
.
join
(
am_res_path
,
self
.
pretrained_models
[
am_tag
][
'speech_stats'
])
self
.
am_res_path
,
self
.
task_resource
.
res_dict
[
'speech_stats'
])
else
:
else
:
self
.
am_encoder_infer
=
os
.
path
.
abspath
(
am_ckpt
[
0
])
self
.
am_encoder_infer
=
os
.
path
.
abspath
(
am_ckpt
[
0
])
...
@@ -131,11 +136,15 @@ class TTSServerExecutor(TTSExecutor):
...
@@ -131,11 +136,15 @@ class TTSServerExecutor(TTSExecutor):
# voc model info
# voc model info
voc_tag
=
voc
+
'-'
+
lang
voc_tag
=
voc
+
'-'
+
lang
self
.
task_resource
.
set_task_model
(
model_tag
=
voc_tag
,
model_type
=
1
,
# vocoder
version
=
None
,
# default version
)
if
voc_ckpt
is
None
:
if
voc_ckpt
is
None
:
voc_res_path
=
self
.
_get_pretrained_path
(
voc_tag
)
self
.
voc_res_path
=
self
.
task_resource
.
voc_res_dir
self
.
voc_res_path
=
voc_res_path
self
.
voc_ckpt
=
os
.
path
.
join
(
self
.
voc_ckpt
=
os
.
path
.
join
(
voc_res_path
,
self
.
pretrained_models
[
voc_tag
]
[
'ckpt'
])
self
.
voc_res_path
,
self
.
task_resource
.
voc_res_dict
[
'ckpt'
])
else
:
else
:
self
.
voc_ckpt
=
os
.
path
.
abspath
(
voc_ckpt
)
self
.
voc_ckpt
=
os
.
path
.
abspath
(
voc_ckpt
)
self
.
voc_res_path
=
os
.
path
.
dirname
(
os
.
path
.
abspath
(
self
.
voc_ckpt
))
self
.
voc_res_path
=
os
.
path
.
dirname
(
os
.
path
.
abspath
(
self
.
voc_ckpt
))
...
...
paddlespeech/server/engine/tts/online/python/pretrained_models.py
已删除
100644 → 0
浏览文件 @
775c4bef
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# support online model
pretrained_models
=
{
# fastspeech2
"fastspeech2_csmsc-zh"
:
{
'url'
:
'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_baker_ckpt_0.4.zip'
,
'md5'
:
'637d28a5e53aa60275612ba4393d5f22'
,
'config'
:
'default.yaml'
,
'ckpt'
:
'snapshot_iter_76000.pdz'
,
'speech_stats'
:
'speech_stats.npy'
,
'phones_dict'
:
'phone_id_map.txt'
,
},
"fastspeech2_cnndecoder_csmsc-zh"
:
{
'url'
:
'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_cnndecoder_csmsc_ckpt_1.0.0.zip'
,
'md5'
:
'6eb28e22ace73e0ebe7845f86478f89f'
,
'config'
:
'cnndecoder.yaml'
,
'ckpt'
:
'snapshot_iter_153000.pdz'
,
'speech_stats'
:
'speech_stats.npy'
,
'phones_dict'
:
'phone_id_map.txt'
,
},
# mb_melgan
"mb_melgan_csmsc-zh"
:
{
'url'
:
'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/mb_melgan/mb_melgan_csmsc_ckpt_0.1.1.zip'
,
'md5'
:
'ee5f0604e20091f0d495b6ec4618b90d'
,
'config'
:
'default.yaml'
,
'ckpt'
:
'snapshot_iter_1000000.pdz'
,
'speech_stats'
:
'feats_stats.npy'
,
},
# hifigan
"hifigan_csmsc-zh"
:
{
'url'
:
'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_csmsc_ckpt_0.1.1.zip'
,
'md5'
:
'dd40a3d88dfcf64513fba2f0f961ada6'
,
'config'
:
'default.yaml'
,
'ckpt'
:
'snapshot_iter_2500000.pdz'
,
'speech_stats'
:
'feats_stats.npy'
,
},
}
paddlespeech/server/engine/tts/online/python/tts_engine.py
浏览文件 @
fa6e44e4
...
@@ -22,9 +22,9 @@ import paddle
...
@@ -22,9 +22,9 @@ import paddle
import
yaml
import
yaml
from
yacs.config
import
CfgNode
from
yacs.config
import
CfgNode
from
.pretrained_models
import
pretrained_models
from
paddlespeech.cli.log
import
logger
from
paddlespeech.cli.log
import
logger
from
paddlespeech.cli.tts.infer
import
TTSExecutor
from
paddlespeech.cli.tts.infer
import
TTSExecutor
from
paddlespeech.resource
import
CommonTaskResource
from
paddlespeech.server.engine.base_engine
import
BaseEngine
from
paddlespeech.server.engine.base_engine
import
BaseEngine
from
paddlespeech.server.utils.audio_process
import
float2pcm
from
paddlespeech.server.utils.audio_process
import
float2pcm
from
paddlespeech.server.utils.util
import
denorm
from
paddlespeech.server.utils.util
import
denorm
...
@@ -32,7 +32,6 @@ from paddlespeech.server.utils.util import get_chunks
...
@@ -32,7 +32,6 @@ from paddlespeech.server.utils.util import get_chunks
from
paddlespeech.t2s.frontend
import
English
from
paddlespeech.t2s.frontend
import
English
from
paddlespeech.t2s.frontend.zh_frontend
import
Frontend
from
paddlespeech.t2s.frontend.zh_frontend
import
Frontend
from
paddlespeech.t2s.modules.normalizer
import
ZScore
from
paddlespeech.t2s.modules.normalizer
import
ZScore
from
paddlespeech.utils.dynamic_import
import
dynamic_import
__all__
=
[
'TTSEngine'
]
__all__
=
[
'TTSEngine'
]
...
@@ -44,7 +43,8 @@ class TTSServerExecutor(TTSExecutor):
...
@@ -44,7 +43,8 @@ class TTSServerExecutor(TTSExecutor):
self
.
am_pad
=
am_pad
self
.
am_pad
=
am_pad
self
.
voc_block
=
voc_block
self
.
voc_block
=
voc_block
self
.
voc_pad
=
voc_pad
self
.
voc_pad
=
voc_pad
self
.
pretrained_models
=
pretrained_models
self
.
task_resource
=
CommonTaskResource
(
task
=
'tts'
,
model_format
=
'static'
,
inference_mode
=
'online'
)
def
get_model_info
(
self
,
def
get_model_info
(
self
,
field
:
str
,
field
:
str
,
...
@@ -65,7 +65,7 @@ class TTSServerExecutor(TTSExecutor):
...
@@ -65,7 +65,7 @@ class TTSServerExecutor(TTSExecutor):
[Tensor]: standard deviation
[Tensor]: standard deviation
"""
"""
model_class
=
dynamic_import
(
model_name
,
self
.
model_alias
)
model_class
=
self
.
task_resource
.
get_model_class
(
model_name
)
if
field
==
"am"
:
if
field
==
"am"
:
odim
=
self
.
am_config
.
n_mels
odim
=
self
.
am_config
.
n_mels
...
@@ -110,20 +110,24 @@ class TTSServerExecutor(TTSExecutor):
...
@@ -110,20 +110,24 @@ class TTSServerExecutor(TTSExecutor):
return
return
# am model info
# am model info
am_tag
=
am
+
'-'
+
lang
am_tag
=
am
+
'-'
+
lang
self
.
task_resource
.
set_task_model
(
model_tag
=
am_tag
,
model_type
=
0
,
# am
version
=
None
,
# default version
)
if
am_ckpt
is
None
or
am_config
is
None
or
am_stat
is
None
or
phones_dict
is
None
:
if
am_ckpt
is
None
or
am_config
is
None
or
am_stat
is
None
or
phones_dict
is
None
:
am_res_path
=
self
.
_get_pretrained_path
(
am_tag
)
self
.
am_res_path
=
self
.
task_resource
.
res_dir
self
.
am_res_path
=
am_res_path
self
.
am_config
=
os
.
path
.
join
(
self
.
am_res_path
,
self
.
am_config
=
os
.
path
.
join
(
self
.
task_resource
.
res_dict
[
'config'
])
am_res_path
,
self
.
pretrained_models
[
am_tag
][
'config'
])
self
.
am_ckpt
=
os
.
path
.
join
(
self
.
am_res_path
,
self
.
am_ckpt
=
os
.
path
.
join
(
am_res_path
,
self
.
task_resource
.
res_dict
[
'ckpt'
])
self
.
pretrained_models
[
am_tag
][
'ckpt'
])
self
.
am_stat
=
os
.
path
.
join
(
self
.
am_stat
=
os
.
path
.
join
(
am_res_path
,
self
.
pretrained_models
[
am_tag
]
[
'speech_stats'
])
self
.
am_res_path
,
self
.
task_resource
.
res_dict
[
'speech_stats'
])
# must have phones_dict in acoustic
# must have phones_dict in acoustic
self
.
phones_dict
=
os
.
path
.
join
(
self
.
phones_dict
=
os
.
path
.
join
(
am_res_path
,
self
.
pretrained_models
[
am_tag
]
[
'phones_dict'
])
self
.
am_res_path
,
self
.
task_resource
.
res_dict
[
'phones_dict'
])
print
(
"self.phones_dict:"
,
self
.
phones_dict
)
print
(
"self.phones_dict:"
,
self
.
phones_dict
)
logger
.
info
(
am_res_path
)
logger
.
info
(
self
.
am_res_path
)
logger
.
info
(
self
.
am_config
)
logger
.
info
(
self
.
am_config
)
logger
.
info
(
self
.
am_ckpt
)
logger
.
info
(
self
.
am_ckpt
)
else
:
else
:
...
@@ -139,16 +143,21 @@ class TTSServerExecutor(TTSExecutor):
...
@@ -139,16 +143,21 @@ class TTSServerExecutor(TTSExecutor):
# voc model info
# voc model info
voc_tag
=
voc
+
'-'
+
lang
voc_tag
=
voc
+
'-'
+
lang
self
.
task_resource
.
set_task_model
(
model_tag
=
voc_tag
,
model_type
=
1
,
# vocoder
version
=
None
,
# default version
)
if
voc_ckpt
is
None
or
voc_config
is
None
or
voc_stat
is
None
:
if
voc_ckpt
is
None
or
voc_config
is
None
or
voc_stat
is
None
:
voc_res_path
=
self
.
_get_pretrained_path
(
voc_tag
)
self
.
voc_res_path
=
self
.
task_resource
.
voc_res_dir
self
.
voc_res_path
=
voc_res_path
self
.
voc_config
=
os
.
path
.
join
(
self
.
voc_config
=
os
.
path
.
join
(
voc_res_path
,
self
.
pretrained_models
[
voc_tag
]
[
'config'
])
self
.
voc_res_path
,
self
.
task_resource
.
voc_res_dict
[
'config'
])
self
.
voc_ckpt
=
os
.
path
.
join
(
self
.
voc_ckpt
=
os
.
path
.
join
(
voc_res_path
,
self
.
pretrained_models
[
voc_tag
]
[
'ckpt'
])
self
.
voc_res_path
,
self
.
task_resource
.
voc_res_dict
[
'ckpt'
])
self
.
voc_stat
=
os
.
path
.
join
(
self
.
voc_stat
=
os
.
path
.
join
(
voc_res_path
,
self
.
pretrained_models
[
voc_tag
][
'speech_stats'
])
self
.
voc_res_path
,
logger
.
info
(
voc_res_path
)
self
.
task_resource
.
voc_res_dict
[
'speech_stats'
])
logger
.
info
(
self
.
voc_res_path
)
logger
.
info
(
self
.
voc_config
)
logger
.
info
(
self
.
voc_config
)
logger
.
info
(
self
.
voc_ckpt
)
logger
.
info
(
self
.
voc_ckpt
)
else
:
else
:
...
@@ -188,8 +197,8 @@ class TTSServerExecutor(TTSExecutor):
...
@@ -188,8 +197,8 @@ class TTSServerExecutor(TTSExecutor):
am
,
am_mu
,
am_std
=
self
.
get_model_info
(
"am"
,
self
.
am_name
,
am
,
am_mu
,
am_std
=
self
.
get_model_info
(
"am"
,
self
.
am_name
,
self
.
am_ckpt
,
self
.
am_stat
)
self
.
am_ckpt
,
self
.
am_stat
)
am_normalizer
=
ZScore
(
am_mu
,
am_std
)
am_normalizer
=
ZScore
(
am_mu
,
am_std
)
am_inference_class
=
dynamic_import
(
self
.
am_name
+
'_inference'
,
am_inference_class
=
self
.
task_resource
.
get_model_class
(
self
.
model_alias
)
self
.
am_name
+
'_inference'
)
self
.
am_inference
=
am_inference_class
(
am_normalizer
,
am
)
self
.
am_inference
=
am_inference_class
(
am_normalizer
,
am
)
self
.
am_inference
.
eval
()
self
.
am_inference
.
eval
()
print
(
"acoustic model done!"
)
print
(
"acoustic model done!"
)
...
@@ -199,8 +208,8 @@ class TTSServerExecutor(TTSExecutor):
...
@@ -199,8 +208,8 @@ class TTSServerExecutor(TTSExecutor):
voc
,
voc_mu
,
voc_std
=
self
.
get_model_info
(
"voc"
,
self
.
voc_name
,
voc
,
voc_mu
,
voc_std
=
self
.
get_model_info
(
"voc"
,
self
.
voc_name
,
self
.
voc_ckpt
,
self
.
voc_stat
)
self
.
voc_ckpt
,
self
.
voc_stat
)
voc_normalizer
=
ZScore
(
voc_mu
,
voc_std
)
voc_normalizer
=
ZScore
(
voc_mu
,
voc_std
)
voc_inference_class
=
dynamic_import
(
self
.
voc_name
+
'_inference'
,
voc_inference_class
=
self
.
task_resource
.
get_model_class
(
self
.
voc_name
+
self
.
model_alias
)
'_inference'
)
self
.
voc_inference
=
voc_inference_class
(
voc_normalizer
,
voc
)
self
.
voc_inference
=
voc_inference_class
(
voc_normalizer
,
voc
)
self
.
voc_inference
.
eval
()
self
.
voc_inference
.
eval
()
print
(
"voc done!"
)
print
(
"voc done!"
)
...
...
paddlespeech/server/engine/tts/paddleinference/pretrained_models.py
已删除
100644 → 0
浏览文件 @
775c4bef
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Static model applied on paddle inference
pretrained_models
=
{
# speedyspeech
"speedyspeech_csmsc-zh"
:
{
'url'
:
'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/speedyspeech/speedyspeech_nosil_baker_static_0.5.zip'
,
'md5'
:
'f10cbdedf47dc7a9668d2264494e1823'
,
'model'
:
'speedyspeech_csmsc.pdmodel'
,
'params'
:
'speedyspeech_csmsc.pdiparams'
,
'phones_dict'
:
'phone_id_map.txt'
,
'tones_dict'
:
'tone_id_map.txt'
,
'sample_rate'
:
24000
,
},
# fastspeech2
"fastspeech2_csmsc-zh"
:
{
'url'
:
'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_baker_static_0.4.zip'
,
'md5'
:
'9788cd9745e14c7a5d12d32670b2a5a7'
,
'model'
:
'fastspeech2_csmsc.pdmodel'
,
'params'
:
'fastspeech2_csmsc.pdiparams'
,
'phones_dict'
:
'phone_id_map.txt'
,
'sample_rate'
:
24000
,
},
# pwgan
"pwgan_csmsc-zh"
:
{
'url'
:
'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwg_baker_static_0.4.zip'
,
'md5'
:
'e3504aed9c5a290be12d1347836d2742'
,
'model'
:
'pwgan_csmsc.pdmodel'
,
'params'
:
'pwgan_csmsc.pdiparams'
,
'sample_rate'
:
24000
,
},
# mb_melgan
"mb_melgan_csmsc-zh"
:
{
'url'
:
'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/mb_melgan/mb_melgan_csmsc_static_0.1.1.zip'
,
'md5'
:
'ac6eee94ba483421d750433f4c3b8d36'
,
'model'
:
'mb_melgan_csmsc.pdmodel'
,
'params'
:
'mb_melgan_csmsc.pdiparams'
,
'sample_rate'
:
24000
,
},
# hifigan
"hifigan_csmsc-zh"
:
{
'url'
:
'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_csmsc_static_0.1.1.zip'
,
'md5'
:
'7edd8c436b3a5546b3a7cb8cff9d5a0c'
,
'model'
:
'hifigan_csmsc.pdmodel'
,
'params'
:
'hifigan_csmsc.pdiparams'
,
'sample_rate'
:
24000
,
},
}
paddlespeech/server/engine/tts/paddleinference/tts_engine.py
浏览文件 @
fa6e44e4
...
@@ -23,9 +23,9 @@ import paddle
...
@@ -23,9 +23,9 @@ import paddle
import
soundfile
as
sf
import
soundfile
as
sf
from
scipy.io
import
wavfile
from
scipy.io
import
wavfile
from
.pretrained_models
import
pretrained_models
from
paddlespeech.cli.log
import
logger
from
paddlespeech.cli.log
import
logger
from
paddlespeech.cli.tts.infer
import
TTSExecutor
from
paddlespeech.cli.tts.infer
import
TTSExecutor
from
paddlespeech.resource
import
CommonTaskResource
from
paddlespeech.server.engine.base_engine
import
BaseEngine
from
paddlespeech.server.engine.base_engine
import
BaseEngine
from
paddlespeech.server.utils.audio_process
import
change_speed
from
paddlespeech.server.utils.audio_process
import
change_speed
from
paddlespeech.server.utils.errors
import
ErrorCode
from
paddlespeech.server.utils.errors
import
ErrorCode
...
@@ -41,7 +41,8 @@ __all__ = ['TTSEngine']
...
@@ -41,7 +41,8 @@ __all__ = ['TTSEngine']
class
TTSServerExecutor
(
TTSExecutor
):
class
TTSServerExecutor
(
TTSExecutor
):
def
__init__
(
self
):
def
__init__
(
self
):
super
().
__init__
()
super
().
__init__
()
self
.
pretrained_models
=
pretrained_models
self
.
task_resource
=
CommonTaskResource
(
task
=
'tts'
,
model_format
=
'static'
)
def
_init_from_path
(
def
_init_from_path
(
self
,
self
,
...
@@ -67,19 +68,23 @@ class TTSServerExecutor(TTSExecutor):
...
@@ -67,19 +68,23 @@ class TTSServerExecutor(TTSExecutor):
return
return
# am
# am
am_tag
=
am
+
'-'
+
lang
am_tag
=
am
+
'-'
+
lang
self
.
task_resource
.
set_task_model
(
model_tag
=
am_tag
,
model_type
=
0
,
# am
version
=
None
,
# default version
)
if
am_model
is
None
or
am_params
is
None
or
phones_dict
is
None
:
if
am_model
is
None
or
am_params
is
None
or
phones_dict
is
None
:
am_res_path
=
self
.
_get_pretrained_path
(
am_tag
)
self
.
am_res_path
=
self
.
task_resource
.
res_dir
self
.
am_res_path
=
am_res_path
self
.
am_model
=
os
.
path
.
join
(
self
.
am_res_path
,
self
.
am_model
=
os
.
path
.
join
(
self
.
task_resource
.
res_dict
[
'model'
])
am_res_path
,
self
.
pretrained_models
[
am_tag
][
'model'
])
self
.
am_params
=
os
.
path
.
join
(
self
.
am_res_path
,
self
.
am_params
=
os
.
path
.
join
(
self
.
task_resource
.
res_dict
[
'params'
])
am_res_path
,
self
.
pretrained_models
[
am_tag
][
'params'
])
# must have phones_dict in acoustic
# must have phones_dict in acoustic
self
.
phones_dict
=
os
.
path
.
join
(
self
.
phones_dict
=
os
.
path
.
join
(
am_res_path
,
self
.
pretrained_models
[
am_tag
]
[
'phones_dict'
])
self
.
am_res_path
,
self
.
task_resource
.
res_dict
[
'phones_dict'
])
self
.
am_sample_rate
=
self
.
pretrained_models
[
am_tag
]
[
'sample_rate'
]
self
.
am_sample_rate
=
self
.
task_resource
.
res_dict
[
'sample_rate'
]
logger
.
info
(
am_res_path
)
logger
.
info
(
self
.
am_res_path
)
logger
.
info
(
self
.
am_model
)
logger
.
info
(
self
.
am_model
)
logger
.
info
(
self
.
am_params
)
logger
.
info
(
self
.
am_params
)
else
:
else
:
...
@@ -92,32 +97,36 @@ class TTSServerExecutor(TTSExecutor):
...
@@ -92,32 +97,36 @@ class TTSServerExecutor(TTSExecutor):
# for speedyspeech
# for speedyspeech
self
.
tones_dict
=
None
self
.
tones_dict
=
None
if
'tones_dict'
in
self
.
pretrained_models
[
am_tag
]
:
if
'tones_dict'
in
self
.
task_resource
.
res_dict
:
self
.
tones_dict
=
os
.
path
.
join
(
self
.
tones_dict
=
os
.
path
.
join
(
am_res_path
,
self
.
pretrained_models
[
am_tag
]
[
'tones_dict'
])
self
.
am_res_path
,
self
.
task_resource
.
res_dict
[
'tones_dict'
])
if
tones_dict
:
if
tones_dict
:
self
.
tones_dict
=
tones_dict
self
.
tones_dict
=
tones_dict
# for multi speaker fastspeech2
# for multi speaker fastspeech2
self
.
speaker_dict
=
None
self
.
speaker_dict
=
None
if
'speaker_dict'
in
self
.
pretrained_models
[
am_tag
]
:
if
'speaker_dict'
in
self
.
task_resource
.
res_dict
:
self
.
speaker_dict
=
os
.
path
.
join
(
self
.
speaker_dict
=
os
.
path
.
join
(
am_res_path
,
self
.
pretrained_models
[
am_tag
]
[
'speaker_dict'
])
self
.
am_res_path
,
self
.
task_resource
.
res_dict
[
'speaker_dict'
])
if
speaker_dict
:
if
speaker_dict
:
self
.
speaker_dict
=
speaker_dict
self
.
speaker_dict
=
speaker_dict
# voc
# voc
voc_tag
=
voc
+
'-'
+
lang
voc_tag
=
voc
+
'-'
+
lang
self
.
task_resource
.
set_task_model
(
model_tag
=
voc_tag
,
model_type
=
1
,
# vocoder
version
=
None
,
# default version
)
if
voc_model
is
None
or
voc_params
is
None
:
if
voc_model
is
None
or
voc_params
is
None
:
voc_res_path
=
self
.
_get_pretrained_path
(
voc_tag
)
self
.
voc_res_path
=
self
.
task_resource
.
voc_res_dir
self
.
voc_res_path
=
voc_res_path
self
.
voc_model
=
os
.
path
.
join
(
self
.
voc_model
=
os
.
path
.
join
(
voc_res_path
,
self
.
pretrained_models
[
voc_tag
]
[
'model'
])
self
.
voc_res_path
,
self
.
task_resource
.
voc_res_dict
[
'model'
])
self
.
voc_params
=
os
.
path
.
join
(
self
.
voc_params
=
os
.
path
.
join
(
voc_res_path
,
self
.
pretrained_models
[
voc_tag
]
[
'params'
])
self
.
voc_res_path
,
self
.
task_resource
.
voc_res_dict
[
'params'
])
self
.
voc_sample_rate
=
self
.
pretrained_models
[
voc_tag
]
[
self
.
voc_sample_rate
=
self
.
task_resource
.
voc_res_dict
[
'sample_rate'
]
'sample_rate'
]
logger
.
info
(
voc_res_path
)
logger
.
info
(
self
.
voc_res_path
)
logger
.
info
(
self
.
voc_model
)
logger
.
info
(
self
.
voc_model
)
logger
.
info
(
self
.
voc_params
)
logger
.
info
(
self
.
voc_params
)
else
:
else
:
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录