Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
22fce191
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 2 年 前同步成功
通知
210
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
22fce191
编写于
4月 12, 2021
作者:
H
Hui Zhang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
can train
上级
dee672a7
变更
13
隐藏空白更改
内联
并排
Showing
13 changed file
with
71 addition
and
63 deletion
+71
-63
deepspeech/exps/deepspeech2/bin/deploy/runtime.py
deepspeech/exps/deepspeech2/bin/deploy/runtime.py
+2
-2
deepspeech/exps/deepspeech2/bin/deploy/server.py
deepspeech/exps/deepspeech2/bin/deploy/server.py
+2
-2
deepspeech/exps/deepspeech2/bin/tune.py
deepspeech/exps/deepspeech2/bin/tune.py
+2
-3
deepspeech/exps/deepspeech2/model.py
deepspeech/exps/deepspeech2/model.py
+10
-12
deepspeech/exps/u2/config.py
deepspeech/exps/u2/config.py
+5
-8
deepspeech/exps/u2/model.py
deepspeech/exps/u2/model.py
+21
-14
deepspeech/frontend/normalizer.py
deepspeech/frontend/normalizer.py
+2
-2
deepspeech/frontend/utility.py
deepspeech/frontend/utility.py
+2
-2
deepspeech/io/dataset.py
deepspeech/io/dataset.py
+8
-5
deepspeech/models/u2.py
deepspeech/models/u2.py
+5
-11
deepspeech/training/scheduler.py
deepspeech/training/scheduler.py
+2
-0
examples/tiny/s1/conf/augmentation.config
examples/tiny/s1/conf/augmentation.config
+8
-0
examples/tiny/s1/conf/conformer.yaml
examples/tiny/s1/conf/conformer.yaml
+2
-2
未找到文件。
deepspeech/exps/deepspeech2/bin/deploy/runtime.py
浏览文件 @
22fce191
...
...
@@ -79,9 +79,9 @@ def inference(config, args):
def
start_server
(
config
,
args
):
"""Start the ASR server"""
config
.
defrost
()
config
.
data
.
manfiest
=
config
.
data
.
test_manifest
config
.
data
.
augmentation_config
=
io
.
StringIO
(
initial_value
=
'{}'
,
newline
=
''
)
config
.
data
.
augmentation_config
=
""
config
.
data
.
keep_transcription_text
=
True
dataset
=
ManifestDataset
.
from_config
(
config
)
...
...
deepspeech/exps/deepspeech2/bin/deploy/server.py
浏览文件 @
22fce191
...
...
@@ -31,9 +31,9 @@ from deepspeech.io.dataset import ManifestDataset
def
start_server
(
config
,
args
):
"""Start the ASR server"""
config
.
defrost
()
config
.
data
.
manfiest
=
config
.
data
.
test_manifest
config
.
data
.
augmentation_config
=
io
.
StringIO
(
initial_value
=
'{}'
,
newline
=
''
)
config
.
data
.
augmentation_config
=
""
config
.
data
.
keep_transcription_text
=
True
dataset
=
ManifestDataset
.
from_config
(
config
)
...
...
deepspeech/exps/deepspeech2/bin/tune.py
浏览文件 @
22fce191
...
...
@@ -36,10 +36,9 @@ def tune(config, args):
raise
ValueError
(
"num_alphas must be non-negative!"
)
if
not
args
.
num_betas
>=
0
:
raise
ValueError
(
"num_betas must be non-negative!"
)
config
.
defrost
()
config
.
data
.
manfiest
=
config
.
data
.
dev_manifest
config
.
data
.
augmentation_config
=
io
.
StringIO
(
initial_value
=
'{}'
,
newline
=
''
)
config
.
data
.
augmentation_config
=
""
config
.
data
.
keep_transcription_text
=
True
dev_dataset
=
ManifestDataset
.
from_config
(
config
)
...
...
deepspeech/exps/deepspeech2/model.py
浏览文件 @
22fce191
...
...
@@ -13,7 +13,6 @@
# limitations under the License.
"""Contains DeepSpeech2 model."""
import
io
import
time
import
logging
import
numpy
as
np
...
...
@@ -24,7 +23,7 @@ import paddle
from
paddle
import
distributed
as
dist
from
paddle.io
import
DataLoader
from
deepspeech.training
import
Trainer
from
deepspeech.training
.trainer
import
Trainer
from
deepspeech.training.gradclip
import
ClipGradByGlobalNormWithLog
from
deepspeech.utils
import
mp_tools
...
...
@@ -140,15 +139,15 @@ class DeepSpeech2Trainer(Trainer):
self
.
logger
.
info
(
"Setup model/optimizer/lr_scheduler!"
)
def
setup_dataloader
(
self
):
config
=
self
.
config
config
=
self
.
config
.
clone
()
config
.
defrost
()
config
.
data
.
keep_transcription_text
=
False
config
.
data
.
man
fi
est
=
config
.
data
.
train_manifest
config
.
data
.
man
if
est
=
config
.
data
.
train_manifest
train_dataset
=
ManifestDataset
.
from_config
(
config
)
config
.
data
.
manfiest
=
config
.
data
.
dev_manifest
config
.
data
.
augmentation_config
=
io
.
StringIO
(
initial_value
=
'{}'
,
newline
=
''
)
config
.
data
.
manifest
=
config
.
data
.
dev_manifest
config
.
data
.
augmentation_config
=
""
dev_dataset
=
ManifestDataset
.
from_config
(
config
)
if
self
.
parallel
:
...
...
@@ -324,13 +323,12 @@ class DeepSpeech2Tester(DeepSpeech2Trainer):
self
.
logger
.
info
(
"Setup model!"
)
def
setup_dataloader
(
self
):
config
=
self
.
config
config
=
self
.
config
.
clone
()
config
.
defrost
()
# return raw text
config
.
data
.
manfiest
=
config
.
data
.
test_manifest
config
.
data
.
augmentation_config
=
io
.
StringIO
(
initial_value
=
'{}'
,
newline
=
''
)
config
.
data
.
keep_transcription_text
=
True
config
.
data
.
manifest
=
config
.
data
.
test_manifest
config
.
data
.
augmentation_config
=
""
test_dataset
=
ManifestDataset
.
from_config
(
config
)
# return text ord id
...
...
deepspeech/exps/u2/config.py
浏览文件 @
22fce191
...
...
@@ -14,23 +14,20 @@
from
yacs.config
import
CfgNode
from
deepspeech.io.dataset
import
ManifestDataset
from
deepspeech.models.u2
import
U2Model
from
deepspeech.exps.u2.model
import
U2Trainer
from
deepspeech.exps.u2.model
import
U2Tester
_C
=
CfgNode
()
_C
.
data
=
CfgNode
()
ManifestDataset
.
params
(
_C
.
data
)
_C
.
data
=
ManifestDataset
.
params
()
_C
.
model
=
CfgNode
()
U2Model
.
params
(
_C
.
model
)
_C
.
model
=
U2Model
.
params
()
_C
.
training
=
CfgNode
()
U2Trainer
.
params
(
_C
.
training
)
_C
.
training
=
U2Trainer
.
params
()
_C
.
decoding
=
CfgNode
()
U2Tester
.
params
(
_C
.
training
)
_C
.
decoding
=
U2Tester
.
params
()
def
get_cfg_defaults
():
...
...
deepspeech/exps/u2/model.py
浏览文件 @
22fce191
...
...
@@ -18,12 +18,14 @@ import logging
import
numpy
as
np
from
collections
import
defaultdict
from
pathlib
import
Path
from
typing
import
Optional
from
yacs.config
import
CfgNode
import
paddle
from
paddle
import
distributed
as
dist
from
paddle.io
import
DataLoader
from
deepspeech.training
import
Trainer
from
deepspeech.training
.trainer
import
Trainer
from
deepspeech.training.gradclip
import
ClipGradByGlobalNormWithLog
from
deepspeech.training.scheduler
import
WarmupLR
...
...
@@ -77,7 +79,7 @@ class U2Trainer(Trainer):
self
.
model
.
train
()
start
=
time
.
time
()
loss
=
self
.
model
(
*
batch_data
)
loss
,
attention_loss
,
ctc_loss
=
self
.
model
(
*
batch_data
)
loss
.
backward
()
layer_tools
.
print_grads
(
self
.
model
,
print_func
=
None
)
if
self
.
iteration
%
train_conf
.
accum_grad
==
0
:
...
...
@@ -88,13 +90,15 @@ class U2Trainer(Trainer):
losses_np
=
{
'train_loss'
:
float
(
loss
),
'train_
loss_div_batchsize'
:
float
(
loss
)
/
self
.
config
.
data
.
batch_size
'train_
att_loss'
:
float
(
attention_loss
),
'train_ctc_loss'
:
float
(
ctc_loss
),
}
msg
=
"Train: Rank: {}, "
.
format
(
dist
.
get_rank
())
msg
+=
"epoch: {}, "
.
format
(
self
.
epoch
)
msg
+=
"step: {}, "
.
format
(
self
.
iteration
)
msg
+=
"time: {:>.3f}s, "
.
format
(
iteration_time
)
msg
+=
f
"batch size:
{
self
.
config
.
data
.
batch_size
}
, "
msg
+=
f
"accum:
{
train_config
.
accum_grad
}
, "
msg
+=
', '
.
join
(
'{}: {:>.6f}'
.
format
(
k
,
v
)
for
k
,
v
in
losses_np
.
items
())
if
self
.
iteration
%
train_conf
.
log_interval
==
0
:
...
...
@@ -113,11 +117,11 @@ class U2Trainer(Trainer):
f
"Valid Total Examples:
{
len
(
self
.
valid_loader
.
dataset
)
}
"
)
valid_losses
=
defaultdict
(
list
)
for
i
,
batch
in
enumerate
(
self
.
valid_loader
):
loss
=
self
.
model
(
*
batch
)
total_loss
,
attention_loss
,
ctc_
loss
=
self
.
model
(
*
batch
)
valid_losses
[
'val_loss'
].
append
(
float
(
loss
))
valid_losses
[
'val_
loss_div_batchsize'
].
append
(
float
(
loss
)
/
self
.
config
.
data
.
batch_size
)
valid_losses
[
'val_loss'
].
append
(
float
(
total_
loss
))
valid_losses
[
'val_
att_loss'
].
append
(
float
(
attention_loss
))
valid_losses
[
'val_ctc_loss'
].
append
(
float
(
ctc_loss
)
)
# write visual log
valid_losses
=
{
k
:
np
.
mean
(
v
)
for
k
,
v
in
valid_losses
.
items
()}
...
...
@@ -137,13 +141,14 @@ class U2Trainer(Trainer):
def
setup_dataloader
(
self
):
config
=
self
.
config
.
clone
()
config
.
defrost
()
config
.
data
.
keep_transcription_text
=
False
# train/valid dataset, return token ids
config
.
data
.
man
fi
est
=
config
.
data
.
train_manifest
config
.
data
.
man
if
est
=
config
.
data
.
train_manifest
train_dataset
=
ManifestDataset
.
from_config
(
config
)
config
.
data
.
man
fi
est
=
config
.
data
.
dev_manifest
config
.
data
.
man
if
est
=
config
.
data
.
dev_manifest
config
.
data
.
augmentation_config
=
""
dev_dataset
=
ManifestDataset
.
from_config
(
config
)
...
...
@@ -181,7 +186,7 @@ class U2Trainer(Trainer):
# test dataset, return raw text
config
.
data
.
keep_transcription_text
=
True
config
.
data
.
augmentation_config
=
""
config
.
data
.
man
fi
est
=
config
.
data
.
test_manifest
config
.
data
.
man
if
est
=
config
.
data
.
test_manifest
test_dataset
=
ManifestDataset
.
from_config
(
config
)
# return text ord id
self
.
test_loader
=
DataLoader
(
...
...
@@ -193,10 +198,12 @@ class U2Trainer(Trainer):
self
.
logger
.
info
(
"Setup train/valid/test Dataloader!"
)
def
setup_model
(
self
):
config
=
self
.
config
.
clone
()
config
=
self
.
config
model_conf
=
config
.
model
model_conf
.
defrost
()
model_conf
.
input_dim
=
self
.
train_loader
.
dataset
.
feature_size
model_conf
.
output_dim
=
self
.
train_loader
.
dataset
.
vocab_size
model_conf
.
freeze
()
model
=
U2Model
.
from_config
(
model_conf
)
if
self
.
parallel
:
...
...
@@ -206,12 +213,12 @@ class U2Trainer(Trainer):
train_config
=
config
.
training
optim_type
=
train_config
.
optim
optim_conf
=
train_config
.
train_config
optim_conf
=
train_config
.
optim_conf
scheduler_type
=
train_config
.
scheduler
scheduler_conf
=
train_config
.
scheduler_conf
grad_clip
=
ClipGradByGlobalNormWithLog
(
train_config
.
global_grad_clip
)
weight_decay
=
paddle
.
regularizer
.
L2Decay
(
train_config
.
weight_decay
)
weight_decay
=
paddle
.
regularizer
.
L2Decay
(
optim_conf
.
weight_decay
)
if
scheduler_type
==
'expdecaylr'
:
lr_scheduler
=
paddle
.
optimizer
.
lr
.
ExponentialDecay
(
...
...
deepspeech/frontend/normalizer.py
浏览文件 @
22fce191
...
...
@@ -93,5 +93,5 @@ class FeatureNormalizer(object):
features
.
append
(
featurize_func
(
AudioSegment
.
from_file
(
instance
[
"feat"
])))
features
=
np
.
hstack
(
features
)
#(D, T)
self
.
_mean
=
np
.
mean
(
features
,
axis
=
1
).
reshape
([
-
1
,
1
])
#(D, 1
)
self
.
_std
=
np
.
std
(
features
,
axis
=
1
).
reshape
([
-
1
,
1
])
#(D, 1
)
self
.
_mean
=
np
.
mean
(
features
,
axis
=
1
).
reshape
([
1
,
-
1
])
#(1, D
)
self
.
_std
=
np
.
std
(
features
,
axis
=
1
).
reshape
([
1
,
-
1
])
#(1, D
)
deepspeech/frontend/utility.py
浏览文件 @
22fce191
...
...
@@ -235,8 +235,8 @@ def _load_kaldi_cmvn(kaldi_cmvn_file):
def
_load_npz_cmvn
(
npz_cmvn_file
,
eps
=
1e-20
):
npzfile
=
np
.
load
(
npz_cmvn_file
)
means
=
npzfile
[
"mean"
]
#(
D, 1
)
std
=
npzfile
[
"std"
]
#(
D, 1
)
means
=
npzfile
[
"mean"
]
#(
1, D
)
std
=
npzfile
[
"std"
]
#(
1, D
)
std
=
np
.
clip
(
std
,
eps
,
None
)
variance
=
1.0
/
std
cmvn
=
np
.
array
([
means
,
variance
])
...
...
deepspeech/io/dataset.py
浏览文件 @
22fce191
...
...
@@ -16,8 +16,9 @@ import io
import
random
import
tarfile
import
logging
from
collections
import
namedtuple
from
typing
import
Optional
from
yacs.config
import
CfgNode
from
collections
import
namedtuple
from
paddle.io
import
Dataset
...
...
@@ -42,6 +43,7 @@ class ManifestDataset(Dataset):
train_manifest
=
""
,
dev_manifest
=
""
,
test_manifest
=
""
,
manifest
=
""
,
unit_type
=
"char"
,
vocab_filepath
=
""
,
spm_model_prefix
=
""
,
...
...
@@ -60,7 +62,7 @@ class ManifestDataset(Dataset):
raw_wav
=
True
,
# use raw_wav or kaldi feature
specgram_type
=
'linear'
,
# 'linear', 'mfcc', 'fbank'
feat_dim
=
0
,
# 'mfcc', 'fbank'
del
at
_delta
=
False
,
# 'mfcc', 'fbank'
del
ta
_delta
=
False
,
# 'mfcc', 'fbank'
target_sample_rate
=
16000
,
# target sample rate
use_dB_normalization
=
True
,
target_dB
=-
20
,
...
...
@@ -86,8 +88,9 @@ class ManifestDataset(Dataset):
Returns:
ManifestDataset: dataet object.
"""
assert
manifest
in
config
.
data
assert
keep_transcription_text
in
config
.
data
assert
'manifest'
in
config
.
data
assert
config
.
data
.
manifest
assert
'keep_transcription_text'
in
config
.
data
if
isinstance
(
config
.
data
.
augmentation_config
,
(
str
,
bytes
)):
if
config
.
data
.
augmentation_config
:
...
...
@@ -119,7 +122,7 @@ class ManifestDataset(Dataset):
target_sample_rate
=
config
.
data
.
target_sample_rate
,
specgram_type
=
config
.
data
.
specgram_type
,
feat_dim
=
config
.
data
.
feat_dim
,
delta_delta
=
config
.
data
.
del
at
_delta
,
delta_delta
=
config
.
data
.
del
ta
_delta
,
use_dB_normalization
=
config
.
data
.
use_dB_normalization
,
target_dB
=
config
.
data
.
target_dB
,
random_seed
=
config
.
data
.
random_seed
,
...
...
deepspeech/models/u2.py
浏览文件 @
22fce191
...
...
@@ -75,8 +75,8 @@ class U2BaseModel(nn.Module):
dropout_rate
=
0.1
,
positional_dropout_rate
=
0.1
,
attention_dropout_rate
=
0.0
,
input_layer
=
conv2d
,
# encoder input type, you can chose conv2d, conv2d6 and conv2d8
normalize_before
=
t
rue
,
input_layer
=
'conv2d'
,
# encoder input type, you can chose conv2d, conv2d6 and conv2d8
normalize_before
=
T
rue
,
cnn_module_kernel
=
15
,
use_cnn_module
=
True
,
activation_type
=
'swish'
,
...
...
@@ -98,7 +98,7 @@ class U2BaseModel(nn.Module):
dict
(
ctc_weight
=
0.3
,
lsm_weight
=
0.1
,
# label smoothing option
length_normalized_loss
=
f
alse
,
))
length_normalized_loss
=
F
alse
,
))
if
config
is
not
None
:
config
.
merge_from_other_cfg
(
default
)
...
...
@@ -744,15 +744,9 @@ class U2Model(U2BaseModel):
ValueError: raise when using not support encoder type.
Returns:
int, nn.Layer, nn.Layer, nn.Layer: vocab size, encoder, decoder, ctc
nn.Layer: U2Model
"""
vocab_size
,
encoder
,
decoder
,
ctc
=
U2Model
.
_init_from_config
(
configs
)
model
=
cls
(
vocab_size
=
vocab_size
,
encoder
=
encoder
,
decoder
=
decoder
,
ctc
=
ctc
,
**
configs
[
'model_conf'
])
model
=
cls
(
configs
)
return
model
@
classmethod
...
...
deepspeech/training/scheduler.py
浏览文件 @
22fce191
...
...
@@ -13,6 +13,8 @@
# limitations under the License.
import
logging
from
typing
import
Union
from
typeguard
import
check_argument_types
from
paddle.optimizer.lr
import
LRScheduler
...
...
examples/tiny/s1/conf/augmentation.config
0 → 100644
浏览文件 @
22fce191
[
{
"type"
:
"shift"
,
"params"
: {
"min_shift_ms"
: -
5
,
"max_shift_ms"
:
5
},
"prob"
:
1
.
0
}
]
examples/tiny/s1/conf/conformer.yaml
浏览文件 @
22fce191
...
...
@@ -5,7 +5,7 @@ data:
test_manifest
:
data/manifest.tiny
vocab_filepath
:
data/vocab.txt
unit_type
:
'
spm'
spm_model_prefix
:
'
bpe_unigram_200'
spm_model_prefix
:
'
data/
bpe_unigram_200'
mean_std_filepath
:
"
"
augmentation_config
:
conf/augmentation.config
batch_size
:
4
...
...
@@ -119,11 +119,11 @@ training:
optim
:
adam
optim_conf
:
lr
:
0.002
lr_decay
:
1.0
weight_decay
:
1e-06
scheduler
:
warmuplr
# pytorch v1.1.0+ required
scheduler_conf
:
warmup_steps
:
25000
lr_decay
:
1.0
log_interval
:
100
decoding
:
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录