Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
2a0e0e9d
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
接近 2 年 前同步成功
通知
210
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
2a0e0e9d
编写于
6月 25, 2021
作者:
H
Hui Zhang
提交者:
GitHub
6月 25, 2021
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #684 from PaddlePaddle/spec_aug
revise from_pretrained function
上级
bb4fd214
3965dbc2
变更
9
隐藏空白更改
内联
并排
Showing
9 changed file
with
51 addition
and
32 deletion
+51
-32
deepspeech/exps/deepspeech2/bin/deploy/runtime.py
deepspeech/exps/deepspeech2/bin/deploy/runtime.py
+16
-9
deepspeech/exps/deepspeech2/bin/deploy/server.py
deepspeech/exps/deepspeech2/bin/deploy/server.py
+20
-9
deepspeech/exps/deepspeech2/bin/tune.py
deepspeech/exps/deepspeech2/bin/tune.py
+1
-1
deepspeech/exps/deepspeech2/model.py
deepspeech/exps/deepspeech2/model.py
+1
-1
deepspeech/exps/u2/model.py
deepspeech/exps/u2/model.py
+1
-1
deepspeech/io/collator.py
deepspeech/io/collator.py
+2
-1
deepspeech/models/deepspeech2.py
deepspeech/models/deepspeech2.py
+4
-4
deepspeech/models/u2.py
deepspeech/models/u2.py
+4
-4
deepspeech/utils/socket_server.py
deepspeech/utils/socket_server.py
+2
-2
未找到文件。
deepspeech/exps/deepspeech2/bin/deploy/runtime.py
浏览文件 @
2a0e0e9d
...
...
@@ -18,8 +18,10 @@ import numpy as np
import
paddle
from
paddle.inference
import
Config
from
paddle.inference
import
create_predictor
from
paddle.io
import
DataLoader
from
deepspeech.exps.deepspeech2.config
import
get_cfg_defaults
from
deepspeech.io.collator
import
SpeechCollator
from
deepspeech.io.dataset
import
ManifestDataset
from
deepspeech.models.deepspeech2
import
DeepSpeech2Model
from
deepspeech.training.cli
import
default_argument_parser
...
...
@@ -78,26 +80,31 @@ def inference(config, args):
def
start_server
(
config
,
args
):
"""Start the ASR server"""
config
.
defrost
()
config
.
data
.
manfiest
=
config
.
data
.
test_manifest
config
.
data
.
augmentation_config
=
""
config
.
data
.
keep_transcription_text
=
True
config
.
data
.
manifest
=
config
.
data
.
test_manifest
dataset
=
ManifestDataset
.
from_config
(
config
)
model
=
DeepSpeech2Model
.
from_pretrained
(
dataset
,
config
,
config
.
collator
.
augmentation_config
=
""
config
.
collator
.
keep_transcription_text
=
True
config
.
collator
.
batch_size
=
1
config
.
collator
.
num_workers
=
0
collate_fn
=
SpeechCollator
.
from_config
(
config
)
test_loader
=
DataLoader
(
dataset
,
collate_fn
=
collate_fn
,
num_workers
=
0
)
model
=
DeepSpeech2Model
.
from_pretrained
(
test_loader
,
config
,
args
.
checkpoint_path
)
model
.
eval
()
# prepare ASR inference handler
def
file_to_transcript
(
filename
):
feature
=
dataset
.
process_utterance
(
filename
,
""
)
audio
=
np
.
array
([
feature
[
0
]]).
astype
(
'float32'
)
#[1,
D, T
]
audio_len
=
feature
[
0
].
shape
[
1
]
feature
=
test_loader
.
collate_fn
.
process_utterance
(
filename
,
""
)
audio
=
np
.
array
([
feature
[
0
]]).
astype
(
'float32'
)
#[1,
T, D
]
audio_len
=
feature
[
0
].
shape
[
0
]
audio_len
=
np
.
array
([
audio_len
]).
astype
(
'int64'
)
# [1]
result_transcript
=
model
.
decode
(
paddle
.
to_tensor
(
audio
),
paddle
.
to_tensor
(
audio_len
),
vocab_list
=
dataset
.
vocab_list
,
vocab_list
=
test_loader
.
collate_fn
.
vocab_list
,
decoding_method
=
config
.
decoding
.
decoding_method
,
lang_model_path
=
config
.
decoding
.
lang_model_path
,
beam_alpha
=
config
.
decoding
.
alpha
,
...
...
@@ -138,7 +145,7 @@ if __name__ == "__main__":
add_arg
(
'host_ip'
,
str
,
'localhost'
,
"Server's IP address."
)
add_arg
(
'host_port'
,
int
,
808
6
,
"Server's IP port."
)
add_arg
(
'host_port'
,
int
,
808
9
,
"Server's IP port."
)
add_arg
(
'speech_save_dir'
,
str
,
'demo_cache'
,
"Directory to save demo audios."
)
...
...
deepspeech/exps/deepspeech2/bin/deploy/server.py
浏览文件 @
2a0e0e9d
...
...
@@ -16,8 +16,10 @@ import functools
import
numpy
as
np
import
paddle
from
paddle.io
import
DataLoader
from
deepspeech.exps.deepspeech2.config
import
get_cfg_defaults
from
deepspeech.io.collator
import
SpeechCollator
from
deepspeech.io.dataset
import
ManifestDataset
from
deepspeech.models.deepspeech2
import
DeepSpeech2Model
from
deepspeech.training.cli
import
default_argument_parser
...
...
@@ -31,26 +33,35 @@ from deepspeech.utils.utility import print_arguments
def
start_server
(
config
,
args
):
"""Start the ASR server"""
config
.
defrost
()
config
.
data
.
manfiest
=
config
.
data
.
test_manifest
config
.
data
.
augmentation_config
=
""
config
.
data
.
keep_transcription_text
=
True
config
.
data
.
manifest
=
config
.
data
.
test_manifest
dataset
=
ManifestDataset
.
from_config
(
config
)
model
=
DeepSpeech2Model
.
from_pretrained
(
dataset
,
config
,
config
.
collator
.
augmentation_config
=
""
config
.
collator
.
keep_transcription_text
=
True
config
.
collator
.
batch_size
=
1
config
.
collator
.
num_workers
=
0
collate_fn
=
SpeechCollator
.
from_config
(
config
)
test_loader
=
DataLoader
(
dataset
,
collate_fn
=
collate_fn
,
num_workers
=
0
)
model
=
DeepSpeech2Model
.
from_pretrained
(
test_loader
,
config
,
args
.
checkpoint_path
)
model
.
eval
()
# prepare ASR inference handler
def
file_to_transcript
(
filename
):
feature
=
dataset
.
process_utterance
(
filename
,
""
)
audio
=
np
.
array
([
feature
[
0
]]).
astype
(
'float32'
)
#[1, D, T]
audio_len
=
feature
[
0
].
shape
[
1
]
feature
=
test_loader
.
collate_fn
.
process_utterance
(
filename
,
""
)
audio
=
np
.
array
([
feature
[
0
]]).
astype
(
'float32'
)
#[1, T, D]
# audio = audio.swapaxes(1,2)
print
(
'---file_to_transcript feature----'
)
print
(
audio
.
shape
)
audio_len
=
feature
[
0
].
shape
[
0
]
print
(
audio_len
)
audio_len
=
np
.
array
([
audio_len
]).
astype
(
'int64'
)
# [1]
result_transcript
=
model
.
decode
(
paddle
.
to_tensor
(
audio
),
paddle
.
to_tensor
(
audio_len
),
vocab_list
=
dataset
.
vocab_list
,
vocab_list
=
test_loader
.
collate_fn
.
vocab_list
,
decoding_method
=
config
.
decoding
.
decoding_method
,
lang_model_path
=
config
.
decoding
.
lang_model_path
,
beam_alpha
=
config
.
decoding
.
alpha
,
...
...
@@ -91,7 +102,7 @@ if __name__ == "__main__":
add_arg
(
'host_ip'
,
str
,
'localhost'
,
"Server's IP address."
)
add_arg
(
'host_port'
,
int
,
808
6
,
"Server's IP port."
)
add_arg
(
'host_port'
,
int
,
808
8
,
"Server's IP port."
)
add_arg
(
'speech_save_dir'
,
str
,
'demo_cache'
,
"Directory to save demo audios."
)
...
...
deepspeech/exps/deepspeech2/bin/tune.py
浏览文件 @
2a0e0e9d
...
...
@@ -47,7 +47,7 @@ def tune(config, args):
drop_last
=
False
,
collate_fn
=
SpeechCollator
(
keep_transcription_text
=
True
))
model
=
DeepSpeech2Model
.
from_pretrained
(
dev_dataset
,
config
,
model
=
DeepSpeech2Model
.
from_pretrained
(
valid_loader
,
config
,
args
.
checkpoint_path
)
model
.
eval
()
...
...
deepspeech/exps/deepspeech2/model.py
浏览文件 @
2a0e0e9d
...
...
@@ -318,7 +318,7 @@ class DeepSpeech2Tester(DeepSpeech2Trainer):
def
export
(
self
):
infer_model
=
DeepSpeech2InferModel
.
from_pretrained
(
self
.
test_loader
.
dataset
,
self
.
config
,
self
.
args
.
checkpoint_path
)
self
.
test_loader
,
self
.
config
,
self
.
args
.
checkpoint_path
)
infer_model
.
eval
()
feat_dim
=
self
.
test_loader
.
collate_fn
.
feature_size
static_model
=
paddle
.
jit
.
to_static
(
...
...
deepspeech/exps/u2/model.py
浏览文件 @
2a0e0e9d
...
...
@@ -506,7 +506,7 @@ class U2Tester(U2Trainer):
List[paddle.static.InputSpec]: input spec.
"""
from
deepspeech.models.u2
import
U2InferModel
infer_model
=
U2InferModel
.
from_pretrained
(
self
.
test_loader
.
dataset
,
infer_model
=
U2InferModel
.
from_pretrained
(
self
.
test_loader
,
self
.
config
.
model
.
clone
(),
self
.
args
.
checkpoint_path
)
feat_dim
=
self
.
test_loader
.
collate_fn
.
feature_size
...
...
deepspeech/io/collator.py
浏览文件 @
2a0e0e9d
...
...
@@ -242,6 +242,7 @@ class SpeechCollator():
# specgram augment
specgram
=
self
.
_augmentation_pipeline
.
transform_feature
(
specgram
)
specgram
=
specgram
.
transpose
([
1
,
0
])
return
specgram
,
transcript_part
def
__call__
(
self
,
batch
):
...
...
@@ -269,7 +270,7 @@ class SpeechCollator():
#utt
utts
.
append
(
utt
)
# audio
audios
.
append
(
audio
.
T
)
# [T, D]
audios
.
append
(
audio
)
# [T, D]
audio_lens
.
append
(
audio
.
shape
[
1
])
# text
# for training, text is token ids
...
...
deepspeech/models/deepspeech2.py
浏览文件 @
2a0e0e9d
...
...
@@ -198,11 +198,11 @@ class DeepSpeech2Model(nn.Layer):
cutoff_top_n
,
num_processes
)
@
classmethod
def
from_pretrained
(
cls
,
data
set
,
config
,
checkpoint_path
):
def
from_pretrained
(
cls
,
data
loader
,
config
,
checkpoint_path
):
"""Build a DeepSpeech2Model model from a pretrained model.
Parameters
----------
data
set: paddle.io.Dataset
data
loader: paddle.io.DataLoader
config: yacs.config.CfgNode
model configs
...
...
@@ -215,8 +215,8 @@ class DeepSpeech2Model(nn.Layer):
DeepSpeech2Model
The model built from pretrained result.
"""
model
=
cls
(
feat_size
=
data
set
.
feature_size
,
dict_size
=
data
set
.
vocab_size
,
model
=
cls
(
feat_size
=
data
loader
.
collate_fn
.
feature_size
,
dict_size
=
data
loader
.
collate_fn
.
vocab_size
,
num_conv_layers
=
config
.
model
.
num_conv_layers
,
num_rnn_layers
=
config
.
model
.
num_rnn_layers
,
rnn_size
=
config
.
model
.
rnn_layer_size
,
...
...
deepspeech/models/u2.py
浏览文件 @
2a0e0e9d
...
...
@@ -876,11 +876,11 @@ class U2Model(U2BaseModel):
return
model
@
classmethod
def
from_pretrained
(
cls
,
data
set
,
config
,
checkpoint_path
):
def
from_pretrained
(
cls
,
data
loader
,
config
,
checkpoint_path
):
"""Build a DeepSpeech2Model model from a pretrained model.
Args:
data
set (paddle.io.Dataset
): not used.
data
loader (paddle.io.DataLoader
): not used.
config (yacs.config.CfgNode): model configs
checkpoint_path (Path or str): the path of pretrained model checkpoint, without extension name
...
...
@@ -888,8 +888,8 @@ class U2Model(U2BaseModel):
DeepSpeech2Model: The model built from pretrained result.
"""
config
.
defrost
()
config
.
input_dim
=
data
set
.
feature_size
config
.
output_dim
=
data
set
.
vocab_size
config
.
input_dim
=
data
loader
.
collate_fn
.
feature_size
config
.
output_dim
=
data
loader
.
collate_fn
.
vocab_size
config
.
freeze
()
model
=
cls
.
from_config
(
config
)
...
...
deepspeech/utils/socket_server.py
浏览文件 @
2a0e0e9d
...
...
@@ -48,9 +48,9 @@ def warm_up_test(audio_process_handler,
rng
=
random
.
Random
(
random_seed
)
samples
=
rng
.
sample
(
manifest
,
num_test_cases
)
for
idx
,
sample
in
enumerate
(
samples
):
print
(
"Warm-up Test Case %d: %s"
,
idx
,
sample
[
'audio_filepath'
]
)
print
(
"Warm-up Test Case %d: %s"
%
(
idx
,
sample
[
'feat'
])
)
start_time
=
time
.
time
()
transcript
=
audio_process_handler
(
sample
[
'
audio_filepath
'
])
transcript
=
audio_process_handler
(
sample
[
'
feat
'
])
finish_time
=
time
.
time
()
print
(
"Response Time: %f, Transcript: %s"
%
(
finish_time
-
start_time
,
transcript
))
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录