Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
7bae32f3
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 2 年 前同步成功
通知
210
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
7bae32f3
编写于
6月 15, 2021
作者:
H
Haoxin Ma
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
revise example/ting/s1
上级
b9110af9
变更
9
隐藏空白更改
内联
并排
Showing
9 changed file
with
38 addition
and
64 deletion
+38
-64
deepspeech/exps/deepspeech2/config.py
deepspeech/exps/deepspeech2/config.py
+1
-1
deepspeech/exps/deepspeech2/model.py
deepspeech/exps/deepspeech2/model.py
+2
-1
deepspeech/exps/u2/config.py
deepspeech/exps/u2/config.py
+7
-0
deepspeech/exps/u2/model.py
deepspeech/exps/u2/model.py
+5
-4
deepspeech/frontend/utility.py
deepspeech/frontend/utility.py
+1
-1
deepspeech/io/collator.py
deepspeech/io/collator.py
+4
-19
deepspeech/io/dataset.py
deepspeech/io/dataset.py
+2
-10
examples/tiny/s0/conf/deepspeech2.yaml
examples/tiny/s0/conf/deepspeech2.yaml
+1
-15
examples/tiny/s1/conf/transformer.yaml
examples/tiny/s1/conf/transformer.yaml
+15
-13
未找到文件。
deepspeech/exps/deepspeech2/config.py
浏览文件 @
7bae32f3
...
...
@@ -72,7 +72,7 @@ _C.collator =CN(
use_dB_normalization
=
True
,
target_dB
=-
20
,
dither
=
1.0
,
# feature dither
keep_transcription_text
=
Tru
e
keep_transcription_text
=
Fals
e
))
DeepSpeech2Model
.
params
(
_C
.
model
)
...
...
deepspeech/exps/deepspeech2/model.py
浏览文件 @
7bae32f3
...
...
@@ -336,13 +336,14 @@ class DeepSpeech2Tester(DeepSpeech2Trainer):
# config.data.max_output_input_ratio = float('inf')
test_dataset
=
ManifestDataset
.
from_config
(
config
)
config
.
collator
.
keep_transcription_text
=
True
# return text ord id
self
.
test_loader
=
DataLoader
(
test_dataset
,
batch_size
=
config
.
decoding
.
batch_size
,
shuffle
=
False
,
drop_last
=
False
,
collate_fn
=
SpeechCollator
(
config
=
config
,
keep_transcription_text
=
True
))
collate_fn
=
SpeechCollator
.
from_config
(
config
))
logger
.
info
(
"Setup test Dataloader!"
)
def
setup_output_dir
(
self
):
...
...
deepspeech/exps/u2/config.py
浏览文件 @
7bae32f3
...
...
@@ -22,6 +22,13 @@ _C = CfgNode()
_C
.
data
=
ManifestDataset
.
params
()
_C
.
collator
=
CfgNode
(
dict
(
augmentation_config
=
""
,
unit_type
=
"char"
,
keep_transcription_text
=
False
))
_C
.
model
=
U2Model
.
params
()
_C
.
training
=
U2Trainer
.
params
()
...
...
deepspeech/exps/u2/model.py
浏览文件 @
7bae32f3
...
...
@@ -221,7 +221,7 @@ class U2Trainer(Trainer):
config
.
data
.
augmentation_config
=
""
dev_dataset
=
ManifestDataset
.
from_config
(
config
)
collate_fn
=
SpeechCollator
(
keep_transcription_text
=
False
)
collate_fn
=
SpeechCollator
.
from_config
(
config
)
if
self
.
parallel
:
batch_sampler
=
SortagradDistributedBatchSampler
(
train_dataset
,
...
...
@@ -266,12 +266,13 @@ class U2Trainer(Trainer):
# config.data.max_output_input_ratio = float('inf')
test_dataset
=
ManifestDataset
.
from_config
(
config
)
# return text ord id
config
.
collator
.
keep_transcription_text
=
True
self
.
test_loader
=
DataLoader
(
test_dataset
,
batch_size
=
config
.
decoding
.
batch_size
,
shuffle
=
False
,
drop_last
=
False
,
collate_fn
=
SpeechCollator
(
keep_transcription_text
=
True
))
collate_fn
=
SpeechCollator
.
from_config
(
config
))
logger
.
info
(
"Setup train/valid/test Dataloader!"
)
def
setup_model
(
self
):
...
...
@@ -375,7 +376,7 @@ class U2Tester(U2Trainer):
error_rate_func
=
error_rate
.
cer
if
cfg
.
error_rate_type
==
'cer'
else
error_rate
.
wer
start_time
=
time
.
time
()
text_feature
=
self
.
test_loader
.
dataset
.
text_feature
text_feature
=
self
.
test_loader
.
collate_fn
.
text_feature
target_transcripts
=
self
.
ordid2token
(
texts
,
texts_len
)
result_transcripts
=
self
.
model
.
decode
(
audio
,
...
...
@@ -423,7 +424,7 @@ class U2Tester(U2Trainer):
self
.
model
.
eval
()
logger
.
info
(
f
"Test Total Examples:
{
len
(
self
.
test_loader
.
dataset
)
}
"
)
stride_ms
=
self
.
test_loader
.
dataset
.
stride_ms
stride_ms
=
self
.
config
.
collator
.
stride_ms
error_rate_type
=
None
errors_sum
,
len_refs
,
num_ins
=
0.0
,
0
,
0
num_frames
=
0.0
...
...
deepspeech/frontend/utility.py
浏览文件 @
7bae32f3
...
...
@@ -82,7 +82,7 @@ def read_manifest(
]
if
all
(
conditions
):
manifest
.
append
(
json_data
)
return
manifest
,
json_data
[
"feat_shape"
][
-
1
]
return
manifest
def
rms_to_db
(
rms
:
float
):
...
...
deepspeech/io/collator.py
浏览文件 @
7bae32f3
...
...
@@ -56,7 +56,7 @@ class SpeechCollator():
use_dB_normalization
=
True
,
target_dB
=-
20
,
dither
=
1.0
,
# feature dither
keep_transcription_text
=
Tru
e
keep_transcription_text
=
Fals
e
))
if
config
is
not
None
:
...
...
@@ -75,7 +75,7 @@ class SpeechCollator():
"""
assert
'augmentation_config'
in
config
.
collator
assert
'keep_transcription_text'
in
config
.
collator
assert
'mean_std_filepath'
in
config
.
collator
assert
'mean_std_filepath'
in
config
.
data
assert
'vocab_filepath'
in
config
.
data
assert
'specgram_type'
in
config
.
collator
assert
'n_fft'
in
config
.
collator
...
...
@@ -94,7 +94,7 @@ class SpeechCollator():
speech_collator
=
cls
(
aug_file
=
aug_file
,
random_seed
=
0
,
mean_std_filepath
=
config
.
collator
.
mean_std_filepath
,
mean_std_filepath
=
config
.
data
.
mean_std_filepath
,
unit_type
=
config
.
collator
.
unit_type
,
vocab_filepath
=
config
.
data
.
vocab_filepath
,
spm_model_prefix
=
config
.
collator
.
spm_model_prefix
,
...
...
@@ -282,26 +282,11 @@ class SpeechCollator():
text_lens
=
np
.
array
(
text_lens
).
astype
(
np
.
int64
)
return
utts
,
padded_audios
,
audio_lens
,
padded_texts
,
text_lens
@
property
def
vocab_size
(
self
):
return
self
.
_speech_featurizer
.
vocab_size
@
property
def
vocab_list
(
self
):
return
self
.
_speech_featurizer
.
vocab_list
@
property
def
vocab_dict
(
self
):
return
self
.
_speech_featurizer
.
vocab_dict
@
property
def
text_feature
(
self
):
return
self
.
_text_featurizer
self
.
_speech_featurizer
.
text_feature
return
self
.
_speech_featurizer
.
text_feature
@
property
def
feature_size
(
self
):
return
self
.
_speech_featurizer
.
feature_size
@
property
def
stride_ms
(
self
):
...
...
deepspeech/io/dataset.py
浏览文件 @
7bae32f3
...
...
@@ -161,7 +161,7 @@ class ManifestDataset(Dataset):
# self._rng = np.random.RandomState(random_seed)
# read manifest
self
.
_manifest
,
self
.
_feature_size
=
read_manifest
(
self
.
_manifest
=
read_manifest
(
manifest_path
=
manifest_path
,
max_input_len
=
max_input_len
,
min_input_len
=
min_input_len
,
...
...
@@ -213,16 +213,8 @@ class ManifestDataset(Dataset):
Returns:
int: audio feature size.
"""
return
self
.
_
feature_size
return
self
.
_
manifest
[
0
][
"feat_shape"
][
-
1
]
@
property
def
stride_ms
(
self
):
"""time length in `ms` unit per frame
Returns:
float: time(ms)/frame
"""
return
self
.
_audio_featurizer
.
stride_ms
def
__len__
(
self
):
...
...
examples/tiny/s0/conf/deepspeech2.yaml
浏览文件 @
7bae32f3
...
...
@@ -6,7 +6,6 @@ data:
mean_std_filepath
:
data/mean_std.json
unit_type
:
char
vocab_filepath
:
data/vocab.txt
augmentation_config
:
conf/augmentation.json
batch_size
:
4
min_input_len
:
0.0
max_input_len
:
27.0
...
...
@@ -14,18 +13,6 @@ data:
max_output_len
:
400.0
min_output_input_ratio
:
0.05
max_output_input_ratio
:
10.0
specgram_type
:
linear
target_sample_rate
:
16000
max_freq
:
None
n_fft
:
None
stride_ms
:
10.0
window_ms
:
20.0
delta_delta
:
False
dither
:
1.0
use_dB_normalization
:
True
target_dB
:
-20
random_seed
:
0
keep_transcription_text
:
False
sortagrad
:
True
shuffle_method
:
batch_shuffle
num_workers
:
0
...
...
@@ -33,7 +20,6 @@ data:
collator
:
augmentation_config
:
conf/augmentation.json
random_seed
:
0
mean_std_filepath
:
data/mean_std.json
spm_model_prefix
:
specgram_type
:
linear
feat_dim
:
...
...
@@ -46,7 +32,7 @@ collator:
use_dB_normalization
:
True
target_dB
:
-20
dither
:
1.0
keep_transcription_text
:
Tru
e
keep_transcription_text
:
Fals
e
model
:
num_conv_layers
:
2
...
...
examples/tiny/s1/conf/transformer.yaml
浏览文件 @
7bae32f3
...
...
@@ -7,7 +7,6 @@ data:
unit_type
:
'
spm'
spm_model_prefix
:
'
data/bpe_unigram_200'
mean_std_filepath
:
"
"
augmentation_config
:
conf/augmentation.json
batch_size
:
4
min_input_len
:
0.5
# second
max_input_len
:
20.0
# second
...
...
@@ -16,23 +15,26 @@ data:
min_output_input_ratio
:
0.05
max_output_input_ratio
:
10.0
raw_wav
:
True
# use raw_wav or kaldi feature
specgram_type
:
fbank
#linear, mfcc, fbank
sortagrad
:
True
shuffle_method
:
batch_shuffle
num_workers
:
0
#2
collator
:
augmentation_config
:
conf/augmentation.json
random_seed
:
0
spm_model_prefix
:
specgram_type
:
fbank
feat_dim
:
80
delta_delta
:
False
dither
:
1.0
target_sample_rate
:
16000
max_freq
:
None
n_fft
:
None
stride_ms
:
10.0
window_ms
:
25.0
window_ms
:
20.0
n_fft
:
None
max_freq
:
None
target_sample_rate
:
16000
use_dB_normalization
:
True
target_dB
:
-20
random_seed
:
0
dither
:
1.
0
keep_transcription_text
:
False
sortagrad
:
True
shuffle_method
:
batch_shuffle
num_workers
:
2
# network architecture
model
:
...
...
@@ -70,7 +72,7 @@ model:
training
:
n_epoch
:
2
n_epoch
:
3
accum_grad
:
1
global_grad_clip
:
5.0
optim
:
adam
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录