Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
f8d52e59
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 2 年 前同步成功
通知
210
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
f8d52e59
编写于
6月 28, 2021
作者:
H
Hui Zhang
提交者:
GitHub
6月 28, 2021
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' into rsl
上级
03e69525
718bd307
变更
14
隐藏空白更改
内联
并排
Showing
14 changed file
with
71 addition
and
47 deletion
+71
-47
deepspeech/exps/deepspeech2/bin/deploy/runtime.py
deepspeech/exps/deepspeech2/bin/deploy/runtime.py
+16
-9
deepspeech/exps/deepspeech2/bin/deploy/server.py
deepspeech/exps/deepspeech2/bin/deploy/server.py
+20
-9
deepspeech/exps/deepspeech2/bin/tune.py
deepspeech/exps/deepspeech2/bin/tune.py
+1
-1
deepspeech/exps/deepspeech2/model.py
deepspeech/exps/deepspeech2/model.py
+1
-1
deepspeech/exps/u2/model.py
deepspeech/exps/u2/model.py
+1
-1
deepspeech/io/collator.py
deepspeech/io/collator.py
+2
-1
deepspeech/models/deepspeech2.py
deepspeech/models/deepspeech2.py
+4
-4
deepspeech/models/u2.py
deepspeech/models/u2.py
+4
-4
deepspeech/utils/socket_server.py
deepspeech/utils/socket_server.py
+2
-2
examples/aishell/s0/README.md
examples/aishell/s0/README.md
+7
-7
examples/aishell/s0/conf/deepspeech2.yaml
examples/aishell/s0/conf/deepspeech2.yaml
+1
-2
examples/aishell/s0/run.sh
examples/aishell/s0/run.sh
+2
-2
examples/aishell/s1/README.md
examples/aishell/s1/README.md
+1
-0
examples/librispeech/s0/conf/deepspeech2.yaml
examples/librispeech/s0/conf/deepspeech2.yaml
+9
-4
未找到文件。
deepspeech/exps/deepspeech2/bin/deploy/runtime.py
浏览文件 @
f8d52e59
...
@@ -18,8 +18,10 @@ import numpy as np
...
@@ -18,8 +18,10 @@ import numpy as np
import
paddle
import
paddle
from
paddle.inference
import
Config
from
paddle.inference
import
Config
from
paddle.inference
import
create_predictor
from
paddle.inference
import
create_predictor
from
paddle.io
import
DataLoader
from
deepspeech.exps.deepspeech2.config
import
get_cfg_defaults
from
deepspeech.exps.deepspeech2.config
import
get_cfg_defaults
from
deepspeech.io.collator
import
SpeechCollator
from
deepspeech.io.dataset
import
ManifestDataset
from
deepspeech.io.dataset
import
ManifestDataset
from
deepspeech.models.deepspeech2
import
DeepSpeech2Model
from
deepspeech.models.deepspeech2
import
DeepSpeech2Model
from
deepspeech.training.cli
import
default_argument_parser
from
deepspeech.training.cli
import
default_argument_parser
...
@@ -78,26 +80,31 @@ def inference(config, args):
...
@@ -78,26 +80,31 @@ def inference(config, args):
def
start_server
(
config
,
args
):
def
start_server
(
config
,
args
):
"""Start the ASR server"""
"""Start the ASR server"""
config
.
defrost
()
config
.
defrost
()
config
.
data
.
manfiest
=
config
.
data
.
test_manifest
config
.
data
.
manifest
=
config
.
data
.
test_manifest
config
.
data
.
augmentation_config
=
""
config
.
data
.
keep_transcription_text
=
True
dataset
=
ManifestDataset
.
from_config
(
config
)
dataset
=
ManifestDataset
.
from_config
(
config
)
model
=
DeepSpeech2Model
.
from_pretrained
(
dataset
,
config
,
config
.
collator
.
augmentation_config
=
""
config
.
collator
.
keep_transcription_text
=
True
config
.
collator
.
batch_size
=
1
config
.
collator
.
num_workers
=
0
collate_fn
=
SpeechCollator
.
from_config
(
config
)
test_loader
=
DataLoader
(
dataset
,
collate_fn
=
collate_fn
,
num_workers
=
0
)
model
=
DeepSpeech2Model
.
from_pretrained
(
test_loader
,
config
,
args
.
checkpoint_path
)
args
.
checkpoint_path
)
model
.
eval
()
model
.
eval
()
# prepare ASR inference handler
# prepare ASR inference handler
def
file_to_transcript
(
filename
):
def
file_to_transcript
(
filename
):
feature
=
dataset
.
process_utterance
(
filename
,
""
)
feature
=
test_loader
.
collate_fn
.
process_utterance
(
filename
,
""
)
audio
=
np
.
array
([
feature
[
0
]]).
astype
(
'float32'
)
#[1,
D, T
]
audio
=
np
.
array
([
feature
[
0
]]).
astype
(
'float32'
)
#[1,
T, D
]
audio_len
=
feature
[
0
].
shape
[
1
]
audio_len
=
feature
[
0
].
shape
[
0
]
audio_len
=
np
.
array
([
audio_len
]).
astype
(
'int64'
)
# [1]
audio_len
=
np
.
array
([
audio_len
]).
astype
(
'int64'
)
# [1]
result_transcript
=
model
.
decode
(
result_transcript
=
model
.
decode
(
paddle
.
to_tensor
(
audio
),
paddle
.
to_tensor
(
audio
),
paddle
.
to_tensor
(
audio_len
),
paddle
.
to_tensor
(
audio_len
),
vocab_list
=
dataset
.
vocab_list
,
vocab_list
=
test_loader
.
collate_fn
.
vocab_list
,
decoding_method
=
config
.
decoding
.
decoding_method
,
decoding_method
=
config
.
decoding
.
decoding_method
,
lang_model_path
=
config
.
decoding
.
lang_model_path
,
lang_model_path
=
config
.
decoding
.
lang_model_path
,
beam_alpha
=
config
.
decoding
.
alpha
,
beam_alpha
=
config
.
decoding
.
alpha
,
...
@@ -138,7 +145,7 @@ if __name__ == "__main__":
...
@@ -138,7 +145,7 @@ if __name__ == "__main__":
add_arg
(
'host_ip'
,
str
,
add_arg
(
'host_ip'
,
str
,
'localhost'
,
'localhost'
,
"Server's IP address."
)
"Server's IP address."
)
add_arg
(
'host_port'
,
int
,
808
6
,
"Server's IP port."
)
add_arg
(
'host_port'
,
int
,
808
9
,
"Server's IP port."
)
add_arg
(
'speech_save_dir'
,
str
,
add_arg
(
'speech_save_dir'
,
str
,
'demo_cache'
,
'demo_cache'
,
"Directory to save demo audios."
)
"Directory to save demo audios."
)
...
...
deepspeech/exps/deepspeech2/bin/deploy/server.py
浏览文件 @
f8d52e59
...
@@ -16,8 +16,10 @@ import functools
...
@@ -16,8 +16,10 @@ import functools
import
numpy
as
np
import
numpy
as
np
import
paddle
import
paddle
from
paddle.io
import
DataLoader
from
deepspeech.exps.deepspeech2.config
import
get_cfg_defaults
from
deepspeech.exps.deepspeech2.config
import
get_cfg_defaults
from
deepspeech.io.collator
import
SpeechCollator
from
deepspeech.io.dataset
import
ManifestDataset
from
deepspeech.io.dataset
import
ManifestDataset
from
deepspeech.models.deepspeech2
import
DeepSpeech2Model
from
deepspeech.models.deepspeech2
import
DeepSpeech2Model
from
deepspeech.training.cli
import
default_argument_parser
from
deepspeech.training.cli
import
default_argument_parser
...
@@ -31,26 +33,35 @@ from deepspeech.utils.utility import print_arguments
...
@@ -31,26 +33,35 @@ from deepspeech.utils.utility import print_arguments
def
start_server
(
config
,
args
):
def
start_server
(
config
,
args
):
"""Start the ASR server"""
"""Start the ASR server"""
config
.
defrost
()
config
.
defrost
()
config
.
data
.
manfiest
=
config
.
data
.
test_manifest
config
.
data
.
manifest
=
config
.
data
.
test_manifest
config
.
data
.
augmentation_config
=
""
config
.
data
.
keep_transcription_text
=
True
dataset
=
ManifestDataset
.
from_config
(
config
)
dataset
=
ManifestDataset
.
from_config
(
config
)
model
=
DeepSpeech2Model
.
from_pretrained
(
dataset
,
config
,
config
.
collator
.
augmentation_config
=
""
config
.
collator
.
keep_transcription_text
=
True
config
.
collator
.
batch_size
=
1
config
.
collator
.
num_workers
=
0
collate_fn
=
SpeechCollator
.
from_config
(
config
)
test_loader
=
DataLoader
(
dataset
,
collate_fn
=
collate_fn
,
num_workers
=
0
)
model
=
DeepSpeech2Model
.
from_pretrained
(
test_loader
,
config
,
args
.
checkpoint_path
)
args
.
checkpoint_path
)
model
.
eval
()
model
.
eval
()
# prepare ASR inference handler
# prepare ASR inference handler
def
file_to_transcript
(
filename
):
def
file_to_transcript
(
filename
):
feature
=
dataset
.
process_utterance
(
filename
,
""
)
feature
=
test_loader
.
collate_fn
.
process_utterance
(
filename
,
""
)
audio
=
np
.
array
([
feature
[
0
]]).
astype
(
'float32'
)
#[1, D, T]
audio
=
np
.
array
([
feature
[
0
]]).
astype
(
'float32'
)
#[1, T, D]
audio_len
=
feature
[
0
].
shape
[
1
]
# audio = audio.swapaxes(1,2)
print
(
'---file_to_transcript feature----'
)
print
(
audio
.
shape
)
audio_len
=
feature
[
0
].
shape
[
0
]
print
(
audio_len
)
audio_len
=
np
.
array
([
audio_len
]).
astype
(
'int64'
)
# [1]
audio_len
=
np
.
array
([
audio_len
]).
astype
(
'int64'
)
# [1]
result_transcript
=
model
.
decode
(
result_transcript
=
model
.
decode
(
paddle
.
to_tensor
(
audio
),
paddle
.
to_tensor
(
audio
),
paddle
.
to_tensor
(
audio_len
),
paddle
.
to_tensor
(
audio_len
),
vocab_list
=
dataset
.
vocab_list
,
vocab_list
=
test_loader
.
collate_fn
.
vocab_list
,
decoding_method
=
config
.
decoding
.
decoding_method
,
decoding_method
=
config
.
decoding
.
decoding_method
,
lang_model_path
=
config
.
decoding
.
lang_model_path
,
lang_model_path
=
config
.
decoding
.
lang_model_path
,
beam_alpha
=
config
.
decoding
.
alpha
,
beam_alpha
=
config
.
decoding
.
alpha
,
...
@@ -91,7 +102,7 @@ if __name__ == "__main__":
...
@@ -91,7 +102,7 @@ if __name__ == "__main__":
add_arg
(
'host_ip'
,
str
,
add_arg
(
'host_ip'
,
str
,
'localhost'
,
'localhost'
,
"Server's IP address."
)
"Server's IP address."
)
add_arg
(
'host_port'
,
int
,
808
6
,
"Server's IP port."
)
add_arg
(
'host_port'
,
int
,
808
8
,
"Server's IP port."
)
add_arg
(
'speech_save_dir'
,
str
,
add_arg
(
'speech_save_dir'
,
str
,
'demo_cache'
,
'demo_cache'
,
"Directory to save demo audios."
)
"Directory to save demo audios."
)
...
...
deepspeech/exps/deepspeech2/bin/tune.py
浏览文件 @
f8d52e59
...
@@ -47,7 +47,7 @@ def tune(config, args):
...
@@ -47,7 +47,7 @@ def tune(config, args):
drop_last
=
False
,
drop_last
=
False
,
collate_fn
=
SpeechCollator
(
keep_transcription_text
=
True
))
collate_fn
=
SpeechCollator
(
keep_transcription_text
=
True
))
model
=
DeepSpeech2Model
.
from_pretrained
(
dev_dataset
,
config
,
model
=
DeepSpeech2Model
.
from_pretrained
(
valid_loader
,
config
,
args
.
checkpoint_path
)
args
.
checkpoint_path
)
model
.
eval
()
model
.
eval
()
...
...
deepspeech/exps/deepspeech2/model.py
浏览文件 @
f8d52e59
...
@@ -318,7 +318,7 @@ class DeepSpeech2Tester(DeepSpeech2Trainer):
...
@@ -318,7 +318,7 @@ class DeepSpeech2Tester(DeepSpeech2Trainer):
def
export
(
self
):
def
export
(
self
):
infer_model
=
DeepSpeech2InferModel
.
from_pretrained
(
infer_model
=
DeepSpeech2InferModel
.
from_pretrained
(
self
.
test_loader
.
dataset
,
self
.
config
,
self
.
args
.
checkpoint_path
)
self
.
test_loader
,
self
.
config
,
self
.
args
.
checkpoint_path
)
infer_model
.
eval
()
infer_model
.
eval
()
feat_dim
=
self
.
test_loader
.
collate_fn
.
feature_size
feat_dim
=
self
.
test_loader
.
collate_fn
.
feature_size
static_model
=
paddle
.
jit
.
to_static
(
static_model
=
paddle
.
jit
.
to_static
(
...
...
deepspeech/exps/u2/model.py
浏览文件 @
f8d52e59
...
@@ -506,7 +506,7 @@ class U2Tester(U2Trainer):
...
@@ -506,7 +506,7 @@ class U2Tester(U2Trainer):
List[paddle.static.InputSpec]: input spec.
List[paddle.static.InputSpec]: input spec.
"""
"""
from
deepspeech.models.u2
import
U2InferModel
from
deepspeech.models.u2
import
U2InferModel
infer_model
=
U2InferModel
.
from_pretrained
(
self
.
test_loader
.
dataset
,
infer_model
=
U2InferModel
.
from_pretrained
(
self
.
test_loader
,
self
.
config
.
model
.
clone
(),
self
.
config
.
model
.
clone
(),
self
.
args
.
checkpoint_path
)
self
.
args
.
checkpoint_path
)
feat_dim
=
self
.
test_loader
.
collate_fn
.
feature_size
feat_dim
=
self
.
test_loader
.
collate_fn
.
feature_size
...
...
deepspeech/io/collator.py
浏览文件 @
f8d52e59
...
@@ -242,6 +242,7 @@ class SpeechCollator():
...
@@ -242,6 +242,7 @@ class SpeechCollator():
# specgram augment
# specgram augment
specgram
=
self
.
_augmentation_pipeline
.
transform_feature
(
specgram
)
specgram
=
self
.
_augmentation_pipeline
.
transform_feature
(
specgram
)
specgram
=
specgram
.
transpose
([
1
,
0
])
return
specgram
,
transcript_part
return
specgram
,
transcript_part
def
__call__
(
self
,
batch
):
def
__call__
(
self
,
batch
):
...
@@ -269,7 +270,7 @@ class SpeechCollator():
...
@@ -269,7 +270,7 @@ class SpeechCollator():
#utt
#utt
utts
.
append
(
utt
)
utts
.
append
(
utt
)
# audio
# audio
audios
.
append
(
audio
.
T
)
# [T, D]
audios
.
append
(
audio
)
# [T, D]
audio_lens
.
append
(
audio
.
shape
[
1
])
audio_lens
.
append
(
audio
.
shape
[
1
])
# text
# text
# for training, text is token ids
# for training, text is token ids
...
...
deepspeech/models/deepspeech2.py
浏览文件 @
f8d52e59
...
@@ -198,11 +198,11 @@ class DeepSpeech2Model(nn.Layer):
...
@@ -198,11 +198,11 @@ class DeepSpeech2Model(nn.Layer):
cutoff_top_n
,
num_processes
)
cutoff_top_n
,
num_processes
)
@
classmethod
@
classmethod
def
from_pretrained
(
cls
,
data
set
,
config
,
checkpoint_path
):
def
from_pretrained
(
cls
,
data
loader
,
config
,
checkpoint_path
):
"""Build a DeepSpeech2Model model from a pretrained model.
"""Build a DeepSpeech2Model model from a pretrained model.
Parameters
Parameters
----------
----------
data
set: paddle.io.Dataset
data
loader: paddle.io.DataLoader
config: yacs.config.CfgNode
config: yacs.config.CfgNode
model configs
model configs
...
@@ -215,8 +215,8 @@ class DeepSpeech2Model(nn.Layer):
...
@@ -215,8 +215,8 @@ class DeepSpeech2Model(nn.Layer):
DeepSpeech2Model
DeepSpeech2Model
The model built from pretrained result.
The model built from pretrained result.
"""
"""
model
=
cls
(
feat_size
=
data
set
.
feature_size
,
model
=
cls
(
feat_size
=
data
loader
.
collate_fn
.
feature_size
,
dict_size
=
data
set
.
vocab_size
,
dict_size
=
data
loader
.
collate_fn
.
vocab_size
,
num_conv_layers
=
config
.
model
.
num_conv_layers
,
num_conv_layers
=
config
.
model
.
num_conv_layers
,
num_rnn_layers
=
config
.
model
.
num_rnn_layers
,
num_rnn_layers
=
config
.
model
.
num_rnn_layers
,
rnn_size
=
config
.
model
.
rnn_layer_size
,
rnn_size
=
config
.
model
.
rnn_layer_size
,
...
...
deepspeech/models/u2.py
浏览文件 @
f8d52e59
...
@@ -876,11 +876,11 @@ class U2Model(U2BaseModel):
...
@@ -876,11 +876,11 @@ class U2Model(U2BaseModel):
return
model
return
model
@
classmethod
@
classmethod
def
from_pretrained
(
cls
,
data
set
,
config
,
checkpoint_path
):
def
from_pretrained
(
cls
,
data
loader
,
config
,
checkpoint_path
):
"""Build a DeepSpeech2Model model from a pretrained model.
"""Build a DeepSpeech2Model model from a pretrained model.
Args:
Args:
data
set (paddle.io.Dataset
): not used.
data
loader (paddle.io.DataLoader
): not used.
config (yacs.config.CfgNode): model configs
config (yacs.config.CfgNode): model configs
checkpoint_path (Path or str): the path of pretrained model checkpoint, without extension name
checkpoint_path (Path or str): the path of pretrained model checkpoint, without extension name
...
@@ -888,8 +888,8 @@ class U2Model(U2BaseModel):
...
@@ -888,8 +888,8 @@ class U2Model(U2BaseModel):
DeepSpeech2Model: The model built from pretrained result.
DeepSpeech2Model: The model built from pretrained result.
"""
"""
config
.
defrost
()
config
.
defrost
()
config
.
input_dim
=
data
set
.
feature_size
config
.
input_dim
=
data
loader
.
collate_fn
.
feature_size
config
.
output_dim
=
data
set
.
vocab_size
config
.
output_dim
=
data
loader
.
collate_fn
.
vocab_size
config
.
freeze
()
config
.
freeze
()
model
=
cls
.
from_config
(
config
)
model
=
cls
.
from_config
(
config
)
...
...
deepspeech/utils/socket_server.py
浏览文件 @
f8d52e59
...
@@ -48,9 +48,9 @@ def warm_up_test(audio_process_handler,
...
@@ -48,9 +48,9 @@ def warm_up_test(audio_process_handler,
rng
=
random
.
Random
(
random_seed
)
rng
=
random
.
Random
(
random_seed
)
samples
=
rng
.
sample
(
manifest
,
num_test_cases
)
samples
=
rng
.
sample
(
manifest
,
num_test_cases
)
for
idx
,
sample
in
enumerate
(
samples
):
for
idx
,
sample
in
enumerate
(
samples
):
print
(
"Warm-up Test Case %d: %s"
,
idx
,
sample
[
'audio_filepath'
]
)
print
(
"Warm-up Test Case %d: %s"
%
(
idx
,
sample
[
'feat'
])
)
start_time
=
time
.
time
()
start_time
=
time
.
time
()
transcript
=
audio_process_handler
(
sample
[
'
audio_filepath
'
])
transcript
=
audio_process_handler
(
sample
[
'
feat
'
])
finish_time
=
time
.
time
()
finish_time
=
time
.
time
()
print
(
"Response Time: %f, Transcript: %s"
%
print
(
"Response Time: %f, Transcript: %s"
%
(
finish_time
-
start_time
,
transcript
))
(
finish_time
-
start_time
,
transcript
))
...
...
examples/aishell/s0/README.md
浏览文件 @
f8d52e59
...
@@ -2,10 +2,10 @@
...
@@ -2,10 +2,10 @@
## Deepspeech2
## Deepspeech2
| Model |
r
elease | Config | Test set | Loss | CER |
| Model |
Params | R
elease | Config | Test set | Loss | CER |
| --- | --- | --- | --- | --- | --- |
| --- | --- | --- | --- | --- | --- |
--- |
| DeepSpeech2
58.4M | 2.1.0 | conf/deepspeech2.yaml + spec aug + new datapipe | test | 6.396368026733398 | 0.068382 ~ 0.073507
|
| DeepSpeech2
| 58.4M | 2.2.0 | conf/deepspeech2.yaml + spec aug + new datapipe | test | 6.396368026733398 | 0.068382,0.073507
|
| DeepSpeech2 58.4M | 2.1.0 | conf/deepspeech2.yaml + spec aug | test | 7.483316898345947 | 0.077860 |
| DeepSpeech2
|
58.4M | 2.1.0 | conf/deepspeech2.yaml + spec aug | test | 7.483316898345947 | 0.077860 |
| DeepSpeech2 58.4M | 2.1.0 | conf/deepspeech2.yaml | test | 7.299022197723389 | 0.078671 |
| DeepSpeech2
|
58.4M | 2.1.0 | conf/deepspeech2.yaml | test | 7.299022197723389 | 0.078671 |
| DeepSpeech2 58.4M | 2.0.0 | conf/deepspeech2.yaml | test | - | 0.078977 |
| DeepSpeech2
|
58.4M | 2.0.0 | conf/deepspeech2.yaml | test | - | 0.078977 |
| DeepSpeech2 58.4M | 1.8.5 | - | test | - | 0.080447 |
| DeepSpeech2
|
58.4M | 1.8.5 | - | test | - | 0.080447 |
examples/aishell/s0/conf/deepspeech2.yaml
浏览文件 @
f8d52e59
...
@@ -10,8 +10,8 @@ data:
...
@@ -10,8 +10,8 @@ data:
min_output_input_ratio
:
0.00
min_output_input_ratio
:
0.00
max_output_input_ratio
:
.inf
max_output_input_ratio
:
.inf
collator
:
collator
:
batch_size
:
64
# one gpu
mean_std_filepath
:
data/mean_std.json
mean_std_filepath
:
data/mean_std.json
unit_type
:
char
unit_type
:
char
vocab_filepath
:
data/vocab.txt
vocab_filepath
:
data/vocab.txt
...
@@ -33,7 +33,6 @@ collator:
...
@@ -33,7 +33,6 @@ collator:
sortagrad
:
True
sortagrad
:
True
shuffle_method
:
batch_shuffle
shuffle_method
:
batch_shuffle
num_workers
:
0
num_workers
:
0
batch_size
:
64
# one gpu
model
:
model
:
num_conv_layers
:
2
num_conv_layers
:
2
...
...
examples/aishell/s0/run.sh
浏览文件 @
f8d52e59
...
@@ -31,10 +31,10 @@ fi
...
@@ -31,10 +31,10 @@ fi
if
[
${
stage
}
-le
3
]
&&
[
${
stop_stage
}
-ge
3
]
;
then
if
[
${
stage
}
-le
3
]
&&
[
${
stop_stage
}
-ge
3
]
;
then
# test ckpt avg_n
# test ckpt avg_n
CUDA_VISIBLE_DEVICES
=
${
gpus
}
./local/test.sh
${
conf_path
}
exp/
${
ckpt
}
/checkpoints/
${
avg_ckpt
}
||
exit
-1
CUDA_VISIBLE_DEVICES
=
0
./local/test.sh
${
conf_path
}
exp/
${
ckpt
}
/checkpoints/
${
avg_ckpt
}
||
exit
-1
fi
fi
if
[
${
stage
}
-le
4
]
&&
[
${
stop_stage
}
-ge
4
]
;
then
if
[
${
stage
}
-le
4
]
&&
[
${
stop_stage
}
-ge
4
]
;
then
# export ckpt avg_n
# export ckpt avg_n
CUDA_VISIBLE_DEVICES
=
${
gpus
}
./local/export.sh
${
conf_path
}
exp/
${
ckpt
}
/checkpoints/
${
avg_ckpt
}
exp/
${
ckpt
}
/checkpoints/
${
avg_ckpt
}
.jit
CUDA_VISIBLE_DEVICES
=
0
./local/export.sh
${
conf_path
}
exp/
${
ckpt
}
/checkpoints/
${
avg_ckpt
}
exp/
${
ckpt
}
/checkpoints/
${
avg_ckpt
}
.jit
fi
fi
examples/aishell/s1/README.md
浏览文件 @
f8d52e59
...
@@ -9,6 +9,7 @@
...
@@ -9,6 +9,7 @@
| conformer | 47.07M | conf/conformer.yaml | spec_aug + shift | test | ctc_prefix_beam_search | - | 0.062196 |
| conformer | 47.07M | conf/conformer.yaml | spec_aug + shift | test | ctc_prefix_beam_search | - | 0.062196 |
| conformer | 47.07M | conf/conformer.yaml | spec_aug + shift | test | attention_rescoring | - | 0.054694 |
| conformer | 47.07M | conf/conformer.yaml | spec_aug + shift | test | attention_rescoring | - | 0.054694 |
## Chunk Conformer
## Chunk Conformer
| Model | Params | Config | Augmentation| Test set | Decode method | Chunk | Loss | WER |
| Model | Params | Config | Augmentation| Test set | Decode method | Chunk | Loss | WER |
...
...
examples/librispeech/s0/conf/deepspeech2.yaml
浏览文件 @
f8d52e59
...
@@ -3,16 +3,21 @@ data:
...
@@ -3,16 +3,21 @@ data:
train_manifest
:
data/manifest.train
train_manifest
:
data/manifest.train
dev_manifest
:
data/manifest.dev-clean
dev_manifest
:
data/manifest.dev-clean
test_manifest
:
data/manifest.test-clean
test_manifest
:
data/manifest.test-clean
mean_std_filepath
:
data/mean_std.json
vocab_filepath
:
data/vocab.txt
augmentation_config
:
conf/augmentation.json
batch_size
:
20
min_input_len
:
0.0
min_input_len
:
0.0
max_input_len
:
27.0
# second
max_input_len
:
27.0
# second
min_output_len
:
0.0
min_output_len
:
0.0
max_output_len
:
.inf
max_output_len
:
.inf
min_output_input_ratio
:
0.00
min_output_input_ratio
:
0.00
max_output_input_ratio
:
.inf
max_output_input_ratio
:
.inf
collator
:
batch_size
:
20
mean_std_filepath
:
data/mean_std.json
unit_type
:
char
vocab_filepath
:
data/vocab.txt
augmentation_config
:
conf/augmentation.json
random_seed
:
0
spm_model_prefix
:
specgram_type
:
linear
specgram_type
:
linear
target_sample_rate
:
16000
target_sample_rate
:
16000
max_freq
:
None
max_freq
:
None
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录