Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
9ac6d65a
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 1 年 前同步成功
通知
206
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
未验证
提交
9ac6d65a
编写于
8月 23, 2021
作者:
J
Jackwaterveg
提交者:
GitHub
8月 23, 2021
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #780 from Jackwaterveg/ds2_online
修改pre_commit, 注释以及增加ds2的seed
上级
30a71de2
9068c0d4
变更
5
显示空白变更内容
内联
并排
Showing
5 changed file
with
35 addition
and
22 deletion
+35
-22
deepspeech/exps/deepspeech2/bin/train.py
deepspeech/exps/deepspeech2/bin/train.py
+4
-0
deepspeech/exps/deepspeech2/model.py
deepspeech/exps/deepspeech2/model.py
+9
-0
deepspeech/models/ds2_online/conv.py
deepspeech/models/ds2_online/conv.py
+0
-2
deepspeech/models/ds2_online/deepspeech2.py
deepspeech/models/ds2_online/deepspeech2.py
+20
-18
tests/deepspeech2_online_model_test.py
tests/deepspeech2_online_model_test.py
+2
-2
未找到文件。
deepspeech/exps/deepspeech2/bin/train.py
浏览文件 @
9ac6d65a
...
...
@@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
"""Trainer for DeepSpeech2 model."""
import
os
from
paddle
import
distributed
as
dist
from
deepspeech.exps.deepspeech2.config
import
get_cfg_defaults
...
...
@@ -53,5 +55,7 @@ if __name__ == "__main__":
if
args
.
dump_config
:
with
open
(
args
.
dump_config
,
'w'
)
as
f
:
print
(
config
,
file
=
f
)
if
config
.
training
.
seed
is
not
None
:
os
.
environ
.
setdefault
(
'FLAGS_cudnn_deterministic'
,
'True'
)
main
(
config
,
args
)
deepspeech/exps/deepspeech2/model.py
浏览文件 @
9ac6d65a
...
...
@@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains DeepSpeech2 and DeepSpeech2Online model."""
import
random
import
time
from
collections
import
defaultdict
from
pathlib
import
Path
...
...
@@ -53,6 +54,7 @@ class DeepSpeech2Trainer(Trainer):
weight_decay
=
1e-6
,
# the coeff of weight decay
global_grad_clip
=
5.0
,
# the global norm clip
n_epoch
=
50
,
# train epochs
seed
=
1024
,
#train seed
))
if
config
is
not
None
:
...
...
@@ -61,6 +63,13 @@ class DeepSpeech2Trainer(Trainer):
def
__init__
(
self
,
config
,
args
):
super
().
__init__
(
config
,
args
)
if
config
.
training
.
seed
is
not
None
:
self
.
set_seed
(
config
.
training
.
seed
)
def
set_seed
(
self
,
seed
):
np
.
random
.
seed
(
seed
)
random
.
seed
(
seed
)
paddle
.
seed
(
seed
)
def
train_batch
(
self
,
batch_index
,
batch_data
,
msg
):
start
=
time
.
time
()
...
...
deepspeech/models/ds2_online/conv.py
浏览文件 @
9ac6d65a
...
...
@@ -12,9 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import
paddle
from
paddle
import
nn
from
deepspeech.modules.embedding
import
PositionalEncoding
from
deepspeech.modules.subsampling
import
Conv2dSubsampling4
...
...
deepspeech/models/ds2_online/deepspeech2.py
浏览文件 @
9ac6d65a
...
...
@@ -26,7 +26,7 @@ from deepspeech.utils.checkpoint import Checkpoint
from
deepspeech.utils.log
import
Log
logger
=
Log
(
__name__
).
getlog
()
__all__
=
[
'DeepSpeech2ModelOnline'
,
'DeepSpeech2InferModeOnline'
]
__all__
=
[
'DeepSpeech2ModelOnline'
,
'DeepSpeech2InferMode
l
Online'
]
class
CRNNEncoder
(
nn
.
Layer
):
...
...
@@ -68,7 +68,7 @@ class CRNNEncoder(nn.Layer):
rnn_input_size
=
i_size
else
:
rnn_input_size
=
layernorm_size
if
use_gru
==
True
:
if
use_gru
is
True
:
self
.
rnn
.
append
(
nn
.
GRU
(
input_size
=
rnn_input_size
,
...
...
@@ -102,18 +102,18 @@ class CRNNEncoder(nn.Layer):
Args:
x (Tensor): [B, feature_size, D]
x_lens (Tensor): [B]
init_state_h_box(Tensor): init_states h for RNN layers
, num_rnn_layers * num_directions, batch_size, hidden_size
init_state_c_box(Tensor): init_states c for RNN layers
, num_rnn_layers * num_directions, batch_size, hidden_size
Return
s
:
init_state_h_box(Tensor): init_states h for RNN layers
: [num_rnn_layers * num_directions, batch_size, hidden_size]
init_state_c_box(Tensor): init_states c for RNN layers
: [num_rnn_layers * num_directions, batch_size, hidden_size]
Return:
x (Tensor): encoder outputs, [B, size, D]
x_lens (Tensor): encoder length, [B]
final_state_h_box(Tensor): final_states h for RNN layers
, num_rnn_layers * num_directions, batch_size, hidden_size
final_state_c_box(Tensor): final_states c for RNN layers
, num_rnn_layers * num_directions, batch_size, hidden_size
final_state_h_box(Tensor): final_states h for RNN layers
: [num_rnn_layers * num_directions, batch_size, hidden_size]
final_state_c_box(Tensor): final_states c for RNN layers
: [num_rnn_layers * num_directions, batch_size, hidden_size]
"""
if
init_state_h_box
is
not
None
:
init_state_list
=
None
if
self
.
use_gru
==
True
:
if
self
.
use_gru
is
True
:
init_state_h_list
=
paddle
.
split
(
init_state_h_box
,
self
.
num_rnn_layers
,
axis
=
0
)
init_state_list
=
init_state_h_list
...
...
@@ -139,10 +139,10 @@ class CRNNEncoder(nn.Layer):
x
=
self
.
fc_layers_list
[
i
](
x
)
x
=
F
.
relu
(
x
)
if
self
.
use_gru
==
True
:
if
self
.
use_gru
is
True
:
final_chunk_state_h_box
=
paddle
.
concat
(
final_chunk_state_list
,
axis
=
0
)
final_chunk_state_c_box
=
init_state_c_box
#paddle.zeros_like(final_chunk_state_h_box)
final_chunk_state_c_box
=
init_state_c_box
else
:
final_chunk_state_h_list
=
[
final_chunk_state_list
[
i
][
0
]
for
i
in
range
(
self
.
num_rnn_layers
)
...
...
@@ -165,10 +165,10 @@ class CRNNEncoder(nn.Layer):
x_lens (Tensor): [B]
decoder_chunk_size: The chunk size of decoder
Returns:
eouts_list (List of Tensor): The list of encoder outputs in chunk_size
,
[B, chunk_size, D] * num_chunks
eouts_lens_list (List of Tensor): The list of encoder length in chunk_size
,
[B] * num_chunks
final_state_h_box(Tensor): final_states h for RNN layers
, num_rnn_layers * num_directions, batch_size, hidden_size
final_state_c_box(Tensor): final_states c for RNN layers
, num_rnn_layers * num_directions, batch_size, hidden_size
eouts_list (List of Tensor): The list of encoder outputs in chunk_size
:
[B, chunk_size, D] * num_chunks
eouts_lens_list (List of Tensor): The list of encoder length in chunk_size
:
[B] * num_chunks
final_state_h_box(Tensor): final_states h for RNN layers
: [num_rnn_layers * num_directions, batch_size, hidden_size]
final_state_c_box(Tensor): final_states c for RNN layers
: [num_rnn_layers * num_directions, batch_size, hidden_size]
"""
subsampling_rate
=
self
.
conv
.
subsampling_rate
receptive_field_length
=
self
.
conv
.
receptive_field_length
...
...
@@ -215,12 +215,14 @@ class CRNNEncoder(nn.Layer):
class
DeepSpeech2ModelOnline
(
nn
.
Layer
):
"""The DeepSpeech2 network structure for online.
:param audio
_data
: Audio spectrogram data layer.
:type audio
_data
: Variable
:param text
_data
: Transcription text data layer.
:type text
_data
: Variable
:param audio: Audio spectrogram data layer.
:type audio: Variable
:param text: Transcription text data layer.
:type text: Variable
:param audio_len: Valid sequence length data layer.
:type audio_len: Variable
:param feat_size: feature size for audio.
:type feat_size: int
:param dict_size: Dictionary size for tokenized transcription.
:type dict_size: int
:param num_conv_layers: Number of stacking convolution layers.
...
...
tests/deepspeech2_online_model_test.py
浏览文件 @
9ac6d65a
...
...
@@ -146,7 +146,7 @@ class TestDeepSpeech2ModelOnline(unittest.TestCase):
self
.
assertEqual
(
paddle
.
allclose
(
eouts_by_chk
,
eouts
),
True
)
self
.
assertEqual
(
paddle
.
allclose
(
final_state_h_box
,
final_state_h_box_chk
),
True
)
if
use_gru
==
False
:
if
use_gru
is
False
:
self
.
assertEqual
(
paddle
.
allclose
(
final_state_c_box
,
final_state_c_box_chk
),
True
)
...
...
@@ -177,7 +177,7 @@ class TestDeepSpeech2ModelOnline(unittest.TestCase):
self
.
assertEqual
(
paddle
.
allclose
(
eouts_by_chk
,
eouts
),
True
)
self
.
assertEqual
(
paddle
.
allclose
(
final_state_h_box
,
final_state_h_box_chk
),
True
)
if
use_gru
==
False
:
if
use_gru
is
False
:
self
.
assertEqual
(
paddle
.
allclose
(
final_state_c_box
,
final_state_c_box_chk
),
True
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录