Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
8c5b8e35
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
1 年多 前同步成功
通知
207
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
8c5b8e35
编写于
4月 23, 2021
作者:
H
Hui Zhang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add rtf
上级
9ad706e2
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
46 addition
and
16 deletion
+46
-16
deepspeech/exps/u2/model.py
deepspeech/exps/u2/model.py
+14
-6
deepspeech/frontend/featurizer/audio_featurizer.py
deepspeech/frontend/featurizer/audio_featurizer.py
+4
-0
deepspeech/frontend/featurizer/speech_featurizer.py
deepspeech/frontend/featurizer/speech_featurizer.py
+13
-2
deepspeech/io/dataset.py
deepspeech/io/dataset.py
+13
-8
examples/aishell/s1/conf/conformer.yaml
examples/aishell/s1/conf/conformer.yaml
+1
-0
examples/tiny/s1/conf/conformer.yaml
examples/tiny/s1/conf/conformer.yaml
+1
-0
未找到文件。
deepspeech/exps/u2/model.py
浏览文件 @
8c5b8e35
...
...
@@ -362,8 +362,8 @@ class U2Tester(U2Trainer):
errors_func
=
error_rate
.
char_errors
if
cfg
.
error_rate_type
==
'cer'
else
error_rate
.
word_errors
error_rate_func
=
error_rate
.
cer
if
cfg
.
error_rate_type
==
'cer'
else
error_rate
.
wer
start_time
=
time
.
time
()
text_feature
=
self
.
test_loader
.
dataset
.
text_feature
target_transcripts
=
self
.
ordid2token
(
texts
,
texts_len
)
result_transcripts
=
self
.
model
.
decode
(
audio
,
...
...
@@ -381,7 +381,8 @@ class U2Tester(U2Trainer):
decoding_chunk_size
=
cfg
.
decoding_chunk_size
,
num_decoding_left_chunks
=
cfg
.
num_decoding_left_chunks
,
simulate_streaming
=
cfg
.
simulate_streaming
)
decode_time
=
time
.
time
()
for
target
,
result
in
zip
(
target_transcripts
,
result_transcripts
):
errors
,
len_ref
=
errors_func
(
target
,
result
)
errors_sum
+=
errors
...
...
@@ -397,9 +398,11 @@ class U2Tester(U2Trainer):
return
dict
(
errors_sum
=
errors_sum
,
len_refs
=
len_refs
,
num_ins
=
num_ins
,
num_ins
=
num_ins
,
# num examples
error_rate
=
errors_sum
/
len_refs
,
error_rate_type
=
cfg
.
error_rate_type
)
error_rate_type
=
cfg
.
error_rate_type
,
num_frames
=
audio_len
.
sum
().
numpy
().
item
(),
decode_time
=
decode_time
)
@
mp_tools
.
rank_zero_only
@
paddle
.
no_grad
()
...
...
@@ -410,10 +413,13 @@ class U2Tester(U2Trainer):
error_rate_type
=
None
errors_sum
,
len_refs
,
num_ins
=
0.0
,
0
,
0
num_frames
=
0.0
num_time
=
0.0
with
open
(
self
.
args
.
result_file
,
'w'
)
as
fout
:
for
i
,
batch
in
enumerate
(
self
.
test_loader
):
metrics
=
self
.
compute_metrics
(
*
batch
,
fout
=
fout
)
num_frames
+=
metrics
[
'num_frames'
]
num_time
+=
metrics
[
"decode_time"
]
errors_sum
+=
metrics
[
'errors_sum'
]
len_refs
+=
metrics
[
'len_refs'
]
num_ins
+=
metrics
[
'num_ins'
]
...
...
@@ -421,11 +427,13 @@ class U2Tester(U2Trainer):
logger
.
info
(
"Error rate [%s] (%d/?) = %f"
%
(
error_rate_type
,
num_ins
,
errors_sum
/
len_refs
))
rtf
=
num_time
/
(
num_frames
*
self
.
test_loader
.
dataset
.
stride_ms
/
1000.0
)
# logging
msg
=
"Test: "
msg
+=
"epoch: {}, "
.
format
(
self
.
epoch
)
msg
+=
"step: {}, "
.
format
(
self
.
iteration
)
msg
+=
", Final error rate [%s] (%d/%d) = %f"
%
(
msg
+=
"RTF: {}, "
.
format
(
rtf
)
msg
+=
"Final error rate [%s] (%d/%d) = %f"
%
(
error_rate_type
,
num_ins
,
num_ins
,
errors_sum
/
len_refs
)
logger
.
info
(
msg
)
...
...
deepspeech/frontend/featurizer/audio_featurizer.py
浏览文件 @
8c5b8e35
...
...
@@ -105,6 +105,10 @@ class AudioFeaturizer(object):
# extract spectrogram
return
self
.
_compute_specgram
(
audio_segment
)
@
property
def
stride_ms
(
self
):
return
self
.
_stride_ms
@
property
def
feature_size
(
self
):
"""audio feature size"""
...
...
deepspeech/frontend/featurizer/speech_featurizer.py
浏览文件 @
8c5b8e35
...
...
@@ -63,7 +63,8 @@ class SpeechFeaturizer(object):
max_freq
=
None
,
target_sample_rate
=
16000
,
use_dB_normalization
=
True
,
target_dB
=-
20
):
target_dB
=-
20
,
dither
=
1.0
):
self
.
_audio_featurizer
=
AudioFeaturizer
(
specgram_type
=
specgram_type
,
feat_dim
=
feat_dim
,
...
...
@@ -74,7 +75,8 @@ class SpeechFeaturizer(object):
max_freq
=
max_freq
,
target_sample_rate
=
target_sample_rate
,
use_dB_normalization
=
use_dB_normalization
,
target_dB
=
target_dB
)
target_dB
=
target_dB
,
dither
=
dither
)
self
.
_text_featurizer
=
TextFeaturizer
(
unit_type
,
vocab_filepath
,
spm_model_prefix
)
...
...
@@ -138,6 +140,15 @@ class SpeechFeaturizer(object):
"""
return
self
.
_audio_featurizer
.
feature_size
@
property
def
stride_ms
(
self
):
"""time length in `ms` unit per frame
Returns:
float: time(ms)/frame
"""
return
self
.
_audio_featurizer
.
stride_ms
@
property
def
text_feature
(
self
):
"""Return the text feature object.
...
...
deepspeech/io/dataset.py
浏览文件 @
8c5b8e35
...
...
@@ -63,6 +63,7 @@ class ManifestDataset(Dataset):
specgram_type
=
'linear'
,
# 'linear', 'mfcc', 'fbank'
feat_dim
=
0
,
# 'mfcc', 'fbank'
delta_delta
=
False
,
# 'mfcc', 'fbank'
dither
=
1.0
,
# feature dither
target_sample_rate
=
16000
,
# target sample rate
use_dB_normalization
=
True
,
target_dB
=-
20
,
...
...
@@ -123,6 +124,7 @@ class ManifestDataset(Dataset):
specgram_type
=
config
.
data
.
specgram_type
,
feat_dim
=
config
.
data
.
feat_dim
,
delta_delta
=
config
.
data
.
delta_delta
,
dither
=
config
.
data
.
dither
,
use_dB_normalization
=
config
.
data
.
use_dB_normalization
,
target_dB
=
config
.
data
.
target_dB
,
random_seed
=
config
.
data
.
random_seed
,
...
...
@@ -150,6 +152,7 @@ class ManifestDataset(Dataset):
specgram_type
=
'linear'
,
feat_dim
=
None
,
delta_delta
=
False
,
dither
=
1.0
,
use_dB_normalization
=
True
,
target_dB
=-
20
,
random_seed
=
0
,
...
...
@@ -183,13 +186,10 @@ class ManifestDataset(Dataset):
keep_transcription_text (bool, optional): True, when not in training mode, will not do tokenizer; Defaults to False.
"""
super
().
__init__
()
self
.
_max_input_len
=
max_input_len
,
self
.
_min_input_len
=
min_input_len
,
self
.
_max_output_len
=
max_output_len
,
self
.
_min_output_len
=
min_output_len
,
self
.
_max_output_input_ratio
=
max_output_input_ratio
,
self
.
_min_output_input_ratio
=
min_output_input_ratio
,
self
.
_stride_ms
=
stride_ms
self
.
_target_sample_rate
=
target_sample_rate
self
.
_normalizer
=
FeatureNormalizer
(
mean_std_filepath
)
if
mean_std_filepath
else
None
self
.
_augmentation_pipeline
=
AugmentationPipeline
(
...
...
@@ -207,7 +207,8 @@ class ManifestDataset(Dataset):
max_freq
=
max_freq
,
target_sample_rate
=
target_sample_rate
,
use_dB_normalization
=
use_dB_normalization
,
target_dB
=
target_dB
)
target_dB
=
target_dB
,
dither
=
dither
)
self
.
_rng
=
np
.
random
.
RandomState
(
random_seed
)
self
.
_keep_transcription_text
=
keep_transcription_text
...
...
@@ -250,6 +251,10 @@ class ManifestDataset(Dataset):
@
property
def
feature_size
(
self
):
return
self
.
_speech_featurizer
.
feature_size
@
property
def
stride_ms
(
self
):
return
self
.
_speech_featurizer
.
stride_ms
def
_parse_tar
(
self
,
file
):
"""Parse a tar file to get a tarfile object
...
...
examples/aishell/s1/conf/conformer.yaml
浏览文件 @
8c5b8e35
...
...
@@ -18,6 +18,7 @@ data:
specgram_type
:
fbank
#linear, mfcc, fbank
feat_dim
:
80
delta_delta
:
False
dither
:
1.0
target_sample_rate
:
16000
max_freq
:
None
n_fft
:
None
...
...
examples/tiny/s1/conf/conformer.yaml
浏览文件 @
8c5b8e35
...
...
@@ -19,6 +19,7 @@ data:
specgram_type
:
fbank
#linear, mfcc, fbank
feat_dim
:
80
delta_delta
:
False
dither
:
1.0
target_sample_rate
:
16000
max_freq
:
None
n_fft
:
None
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录