Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
2cacbaf4
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
1 年多 前同步成功
通知
207
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
2cacbaf4
编写于
7月 28, 2021
作者:
H
huangyuxin
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
修改了deepspeech2.py部分LSTM和GRU的代码,增加了LayerNorm
上级
ce1e8ab5
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
62 addition
and
14 deletion
+62
-14
deepspeech/exps/deepspeech2/model.py
deepspeech/exps/deepspeech2/model.py
+4
-2
deepspeech/models/ds2/deepspeech2.py
deepspeech/models/ds2/deepspeech2.py
+54
-11
examples/aishell/s0/conf/deepspeech2.yaml
examples/aishell/s0/conf/deepspeech2.yaml
+2
-1
examples/librispeech/s0/conf/deepspeech2.yaml
examples/librispeech/s0/conf/deepspeech2.yaml
+1
-0
examples/tiny/s0/conf/deepspeech2.yaml
examples/tiny/s0/conf/deepspeech2.yaml
+1
-0
未找到文件。
deepspeech/exps/deepspeech2/model.py
浏览文件 @
2cacbaf4
...
...
@@ -127,7 +127,8 @@ class DeepSpeech2Trainer(Trainer):
num_rnn_layers
=
config
.
model
.
num_rnn_layers
,
rnn_size
=
config
.
model
.
rnn_layer_size
,
use_gru
=
config
.
model
.
use_gru
,
share_rnn_weights
=
config
.
model
.
share_rnn_weights
)
share_rnn_weights
=
config
.
model
.
share_rnn_weights
,
apply_online
=
config
.
model
.
apply_online
)
if
self
.
parallel
:
model
=
paddle
.
DataParallel
(
model
)
...
...
@@ -374,7 +375,8 @@ class DeepSpeech2Tester(DeepSpeech2Trainer):
num_rnn_layers
=
config
.
model
.
num_rnn_layers
,
rnn_size
=
config
.
model
.
rnn_layer_size
,
use_gru
=
config
.
model
.
use_gru
,
share_rnn_weights
=
config
.
model
.
share_rnn_weights
)
share_rnn_weights
=
config
.
model
.
share_rnn_weights
,
apply_online
=
config
.
model
.
apply_online
)
self
.
model
=
model
logger
.
info
(
"Setup model!"
)
...
...
deepspeech/models/ds2/deepspeech2.py
浏览文件 @
2cacbaf4
...
...
@@ -25,6 +25,11 @@ from deepspeech.utils import layer_tools
from
deepspeech.utils.checkpoint
import
Checkpoint
from
deepspeech.utils.log
import
Log
from
paddle.nn
import
LSTM
,
GRU
from
paddle.nn
import
LayerNorm
from
paddle.nn
import
LayerList
logger
=
Log
(
__name__
).
getlog
()
__all__
=
[
'DeepSpeech2Model'
,
'DeepSpeech2InferMode'
]
...
...
@@ -38,25 +43,50 @@ class CRNNEncoder(nn.Layer):
num_rnn_layers
=
3
,
rnn_size
=
1024
,
use_gru
=
False
,
share_rnn_weights
=
True
):
share_rnn_weights
=
True
,
apply_online
=
True
):
super
().
__init__
()
self
.
rnn_size
=
rnn_size
self
.
feat_size
=
feat_size
# 161 for linear
self
.
dict_size
=
dict_size
self
.
num_rnn_layers
=
num_rnn_layers
self
.
apply_online
=
apply_online
self
.
conv
=
ConvStack
(
feat_size
,
num_conv_layers
)
i_size
=
self
.
conv
.
output_height
# H after conv stack
self
.
rnn
=
LayerList
()
self
.
layernorm_list
=
LayerList
()
if
(
apply_online
==
True
):
rnn_direction
=
'forward'
else
:
rnn_direction
=
'bidirect'
if
use_gru
==
True
:
self
.
rnn
.
append
(
GRU
(
input_size
=
i_size
,
hidden_size
=
rnn_size
,
num_layers
=
1
,
direction
=
rnn_direction
))
self
.
layernorm_list
.
append
(
LayerNorm
(
rnn_size
))
for
i
in
range
(
1
,
num_rnn_layers
):
self
.
rnn
.
append
(
GRU
(
input_size
=
rnn_size
,
hidden_size
=
rnn_size
,
num_layers
=
1
,
direction
=
rnn_direction
))
self
.
layernorm_list
.
append
(
LayerNorm
(
rnn_size
))
else
:
self
.
rnn
.
append
(
LSTM
(
input_size
=
i_size
,
hidden_size
=
rnn_size
,
num_layers
=
1
,
direction
=
rnn_direction
))
self
.
layernorm_list
.
append
(
LayerNorm
(
rnn_size
))
for
i
in
range
(
1
,
num_rnn_layers
):
self
.
rnn
.
append
(
LSTM
(
input_size
=
rnn_size
,
hidden_size
=
rnn_size
,
num_layers
=
1
,
direction
=
rnn_direction
))
self
.
layernorm_list
.
append
(
LayerNorm
(
rnn_size
))
"""
self.rnn = RNNStack(
i_size=i_size,
h_size=rnn_size,
num_stacks=num_rnn_layers,
use_gru=use_gru,
share_rnn_weights=share_rnn_weights)
"""
@
property
def
output_size
(
self
):
return
self
.
rnn_size
*
2
return
self
.
rnn_size
def
forward
(
self
,
audio
,
audio_len
):
"""Compute Encoder outputs
...
...
@@ -86,7 +116,15 @@ class CRNNEncoder(nn.Layer):
x
=
x
.
reshape
([
0
,
0
,
-
1
])
#[B, T, C*D]
# remove padding part
x
,
x_lens
=
self
.
rnn
(
x
,
x_lens
)
#[B, T, D]
print
(
"x.shape:"
,
x
.
shape
)
x
,
output_state
=
self
.
rnn
[
0
](
x
,
None
,
x_lens
)
x
=
self
.
layernorm_list
[
0
](
x
)
for
i
in
range
(
1
,
self
.
num_rnn_layers
):
x
,
output_state
=
self
.
rnn
[
i
](
x
,
output_state
,
x_lens
)
#[B, T, D]
x
=
self
.
layernorm_list
[
i
](
x
)
"""
x, x_lens = self.rnn(x, x_lens)
"""
return
x
,
x_lens
...
...
@@ -141,7 +179,8 @@ class DeepSpeech2Model(nn.Layer):
num_rnn_layers
=
3
,
rnn_size
=
1024
,
use_gru
=
False
,
share_rnn_weights
=
True
):
share_rnn_weights
=
True
,
apply_online
=
True
):
super
().
__init__
()
self
.
encoder
=
CRNNEncoder
(
feat_size
=
feat_size
,
...
...
@@ -150,8 +189,9 @@ class DeepSpeech2Model(nn.Layer):
num_rnn_layers
=
num_rnn_layers
,
rnn_size
=
rnn_size
,
use_gru
=
use_gru
,
share_rnn_weights
=
share_rnn_weights
)
assert
(
self
.
encoder
.
output_size
==
rnn_size
*
2
)
share_rnn_weights
=
share_rnn_weights
,
apply_online
=
apply_online
)
assert
(
self
.
encoder
.
output_size
==
rnn_size
)
self
.
decoder
=
CTCDecoder
(
odim
=
dict_size
,
# <blank> is in vocab
...
...
@@ -221,7 +261,8 @@ class DeepSpeech2Model(nn.Layer):
num_rnn_layers
=
config
.
model
.
num_rnn_layers
,
rnn_size
=
config
.
model
.
rnn_layer_size
,
use_gru
=
config
.
model
.
use_gru
,
share_rnn_weights
=
config
.
model
.
share_rnn_weights
)
share_rnn_weights
=
config
.
model
.
share_rnn_weights
,
apply_online
=
config
.
model
.
apply_online
)
infos
=
Checkpoint
().
load_parameters
(
model
,
checkpoint_path
=
checkpoint_path
)
logger
.
info
(
f
"checkpoint info:
{
infos
}
"
)
...
...
@@ -237,7 +278,8 @@ class DeepSpeech2InferModel(DeepSpeech2Model):
num_rnn_layers
=
3
,
rnn_size
=
1024
,
use_gru
=
False
,
share_rnn_weights
=
True
):
share_rnn_weights
=
True
,
apply_online
=
True
):
super
().
__init__
(
feat_size
=
feat_size
,
dict_size
=
dict_size
,
...
...
@@ -245,7 +287,8 @@ class DeepSpeech2InferModel(DeepSpeech2Model):
num_rnn_layers
=
num_rnn_layers
,
rnn_size
=
rnn_size
,
use_gru
=
use_gru
,
share_rnn_weights
=
share_rnn_weights
)
share_rnn_weights
=
share_rnn_weights
,
apply_online
=
apply_online
)
def
forward
(
self
,
audio
,
audio_len
):
"""export model function
...
...
examples/aishell/s0/conf/deepspeech2.yaml
浏览文件 @
2cacbaf4
...
...
@@ -36,10 +36,11 @@ collator:
model
:
num_conv_layers
:
2
num_rnn_layers
:
3
num_rnn_layers
:
4
rnn_layer_size
:
1024
use_gru
:
True
share_rnn_weights
:
False
apply_online
:
False
training
:
n_epoch
:
50
...
...
examples/librispeech/s0/conf/deepspeech2.yaml
浏览文件 @
2cacbaf4
...
...
@@ -40,6 +40,7 @@ model:
rnn_layer_size
:
2048
use_gru
:
False
share_rnn_weights
:
True
apply_online
:
False
training
:
n_epoch
:
50
...
...
examples/tiny/s0/conf/deepspeech2.yaml
浏览文件 @
2cacbaf4
...
...
@@ -41,6 +41,7 @@ model:
rnn_layer_size
:
2048
use_gru
:
False
share_rnn_weights
:
True
apply_online
:
True
training
:
n_epoch
:
10
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录