Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
913ea33f
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 1 年 前同步成功
通知
207
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
You need to sign in or sign up before continuing.
提交
913ea33f
编写于
10月 02, 2021
作者:
H
Hui Zhang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
remove conv and rnn uesless
上级
f4f2d6f0
变更
2
显示空白变更内容
内联
并排
Showing
2 changed file
with
0 addition
and
484 deletion
+0
-484
deepspeech/modules/conv.py
deepspeech/modules/conv.py
+0
-170
deepspeech/modules/rnn.py
deepspeech/modules/rnn.py
+0
-314
未找到文件。
deepspeech/modules/conv.py
已删除
100644 → 0
浏览文件 @
f4f2d6f0
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
paddle
import
nn
from
paddle.nn
import
functional
as
F
from
deepspeech.modules.activation
import
brelu
from
deepspeech.modules.mask
import
make_non_pad_mask
from
deepspeech.utils.log
import
Log
logger
=
Log
(
__name__
).
getlog
()
__all__
=
[
'ConvStack'
,
"conv_output_size"
]
def
conv_output_size
(
I
,
F
,
P
,
S
):
# https://stanford.edu/~shervine/teaching/cs-230/cheatsheet-convolutional-neural-networks#hyperparameters
# Output size after Conv:
# By noting I the length of the input volume size,
# F the length of the filter,
# P the amount of zero padding,
# S the stride,
# then the output size O of the feature map along that dimension is given by:
# O = (I - F + Pstart + Pend) // S + 1
# When Pstart == Pend == P, we can replace Pstart + Pend by 2P.
# When Pstart == Pend == 0
# O = (I - F - S) // S
# https://iq.opengenus.org/output-size-of-convolution/
# Output height = (Input height + padding height top + padding height bottom - kernel height) / (stride height) + 1
# Output width = (Output width + padding width right + padding width left - kernel width) / (stride width) + 1
return
(
I
-
F
+
2
*
P
-
S
)
//
S
# receptive field calculator
# https://fomoro.com/research/article/receptive-field-calculator
# https://stanford.edu/~shervine/teaching/cs-230/cheatsheet-convolutional-neural-networks#hyperparameters
# https://distill.pub/2019/computing-receptive-fields/
# Rl-1 = Sl * Rl + (Kl - Sl)
class
ConvBn
(
nn
.
Layer
):
"""Convolution layer with batch normalization.
:param kernel_size: The x dimension of a filter kernel. Or input a tuple for
two image dimension.
:type kernel_size: int|tuple|list
:param num_channels_in: Number of input channels.
:type num_channels_in: int
:param num_channels_out: Number of output channels.
:type num_channels_out: int
:param stride: The x dimension of the stride. Or input a tuple for two
image dimension.
:type stride: int|tuple|list
:param padding: The x dimension of the padding. Or input a tuple for two
image dimension.
:type padding: int|tuple|list
:param act: Activation type, relu|brelu
:type act: string
:return: Batch norm layer after convolution layer.
:rtype: Variable
"""
def
__init__
(
self
,
num_channels_in
,
num_channels_out
,
kernel_size
,
stride
,
padding
,
act
):
super
().
__init__
()
assert
len
(
kernel_size
)
==
2
assert
len
(
stride
)
==
2
assert
len
(
padding
)
==
2
self
.
kernel_size
=
kernel_size
self
.
stride
=
stride
self
.
padding
=
padding
self
.
conv
=
nn
.
Conv2D
(
num_channels_in
,
num_channels_out
,
kernel_size
=
kernel_size
,
stride
=
stride
,
padding
=
padding
,
weight_attr
=
None
,
bias_attr
=
False
,
data_format
=
'NCHW'
)
self
.
bn
=
nn
.
BatchNorm2D
(
num_channels_out
,
weight_attr
=
None
,
bias_attr
=
None
,
data_format
=
'NCHW'
)
self
.
act
=
F
.
relu
if
act
==
'relu'
else
brelu
def
forward
(
self
,
x
,
x_len
):
"""
x(Tensor): audio, shape [B, C, D, T]
"""
x
=
self
.
conv
(
x
)
x
=
self
.
bn
(
x
)
x
=
self
.
act
(
x
)
x_len
=
(
x_len
-
self
.
kernel_size
[
1
]
+
2
*
self
.
padding
[
1
]
)
//
self
.
stride
[
1
]
+
1
# reset padding part to 0
masks
=
make_non_pad_mask
(
x_len
)
#[B, T]
masks
=
masks
.
unsqueeze
(
1
).
unsqueeze
(
1
)
# [B, 1, 1, T]
# https://github.com/PaddlePaddle/Paddle/pull/29265
# rhs will type promote to lhs
x
=
x
*
masks
return
x
,
x_len
class
ConvStack
(
nn
.
Layer
):
"""Convolution group with stacked convolution layers.
:param feat_size: audio feature dim.
:type feat_size: int
:param num_stacks: Number of stacked convolution layers.
:type num_stacks: int
"""
def
__init__
(
self
,
feat_size
,
num_stacks
):
super
().
__init__
()
self
.
feat_size
=
feat_size
# D
self
.
num_stacks
=
num_stacks
self
.
conv_in
=
ConvBn
(
num_channels_in
=
1
,
num_channels_out
=
32
,
kernel_size
=
(
41
,
11
),
#[D, T]
stride
=
(
2
,
3
),
padding
=
(
20
,
5
),
act
=
'brelu'
)
out_channel
=
32
convs
=
[
ConvBn
(
num_channels_in
=
32
,
num_channels_out
=
out_channel
,
kernel_size
=
(
21
,
11
),
stride
=
(
2
,
1
),
padding
=
(
10
,
5
),
act
=
'brelu'
)
for
i
in
range
(
num_stacks
-
1
)
]
self
.
conv_stack
=
nn
.
LayerList
(
convs
)
# conv output feat_dim
output_height
=
(
feat_size
-
1
)
//
2
+
1
for
i
in
range
(
self
.
num_stacks
-
1
):
output_height
=
(
output_height
-
1
)
//
2
+
1
self
.
output_height
=
out_channel
*
output_height
def
forward
(
self
,
x
,
x_len
):
"""
x: shape [B, C, D, T]
x_len : shape [B]
"""
x
,
x_len
=
self
.
conv_in
(
x
,
x_len
)
for
i
,
conv
in
enumerate
(
self
.
conv_stack
):
x
,
x_len
=
conv
(
x
,
x_len
)
return
x
,
x_len
deepspeech/modules/rnn.py
已删除
100644 → 0
浏览文件 @
f4f2d6f0
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
math
import
paddle
from
paddle
import
nn
from
paddle.nn
import
functional
as
F
from
paddle.nn
import
initializer
as
I
from
deepspeech.modules.activation
import
brelu
from
deepspeech.modules.mask
import
make_non_pad_mask
from
deepspeech.utils.log
import
Log
logger
=
Log
(
__name__
).
getlog
()
__all__
=
[
'RNNStack'
]
class
RNNCell
(
nn
.
RNNCellBase
):
r
"""
Elman RNN (SimpleRNN) cell. Given the inputs and previous states, it
computes the outputs and updates states.
The formula used is as follows:
.. math::
h_{t} & = act(x_{t} + b_{ih} + W_{hh}h_{t-1} + b_{hh})
y_{t} & = h_{t}
where :math:`act` is for :attr:`activation`.
"""
def
__init__
(
self
,
hidden_size
:
int
,
activation
=
"tanh"
,
weight_ih_attr
=
None
,
weight_hh_attr
=
None
,
bias_ih_attr
=
None
,
bias_hh_attr
=
None
,
name
=
None
):
super
().
__init__
()
std
=
1.0
/
math
.
sqrt
(
hidden_size
)
self
.
weight_hh
=
self
.
create_parameter
(
(
hidden_size
,
hidden_size
),
weight_hh_attr
,
default_initializer
=
I
.
Uniform
(
-
std
,
std
))
self
.
bias_ih
=
None
self
.
bias_hh
=
self
.
create_parameter
(
(
hidden_size
,
),
bias_hh_attr
,
is_bias
=
True
,
default_initializer
=
I
.
Uniform
(
-
std
,
std
))
self
.
hidden_size
=
hidden_size
if
activation
not
in
[
"tanh"
,
"relu"
,
"brelu"
]:
raise
ValueError
(
"activation for SimpleRNNCell should be tanh or relu, "
"but get {}"
.
format
(
activation
))
self
.
activation
=
activation
self
.
_activation_fn
=
paddle
.
tanh
\
if
activation
==
"tanh"
\
else
F
.
relu
if
activation
==
'brelu'
:
self
.
_activation_fn
=
brelu
def
forward
(
self
,
inputs
,
states
=
None
):
if
states
is
None
:
states
=
self
.
get_initial_states
(
inputs
,
self
.
state_shape
)
pre_h
=
states
i2h
=
inputs
if
self
.
bias_ih
is
not
None
:
i2h
+=
self
.
bias_ih
h2h
=
paddle
.
matmul
(
pre_h
,
self
.
weight_hh
,
transpose_y
=
True
)
if
self
.
bias_hh
is
not
None
:
h2h
+=
self
.
bias_hh
h
=
self
.
_activation_fn
(
i2h
+
h2h
)
return
h
,
h
@
property
def
state_shape
(
self
):
return
(
self
.
hidden_size
,
)
class
GRUCell
(
nn
.
RNNCellBase
):
r
"""
Gated Recurrent Unit (GRU) RNN cell. Given the inputs and previous states,
it computes the outputs and updates states.
The formula for GRU used is as follows:
.. math::
r_{t} & = \sigma(W_{ir}x_{t} + b_{ir} + W_{hr}h_{t-1} + b_{hr})
z_{t} & = \sigma(W_{iz}x_{t} + b_{iz} + W_{hz}h_{t-1} + b_{hz})
\widetilde{h}_{t} & = \tanh(W_{ic}x_{t} + b_{ic} + r_{t} * (W_{hc}h_{t-1} + b_{hc}))
h_{t} & = z_{t} * h_{t-1} + (1 - z_{t}) * \widetilde{h}_{t}
y_{t} & = h_{t}
where :math:`\sigma` is the sigmoid fucntion, and * is the elemetwise
multiplication operator.
"""
def
__init__
(
self
,
input_size
:
int
,
hidden_size
:
int
,
weight_ih_attr
=
None
,
weight_hh_attr
=
None
,
bias_ih_attr
=
None
,
bias_hh_attr
=
None
,
name
=
None
):
super
().
__init__
()
std
=
1.0
/
math
.
sqrt
(
hidden_size
)
self
.
weight_hh
=
self
.
create_parameter
(
(
3
*
hidden_size
,
hidden_size
),
weight_hh_attr
,
default_initializer
=
I
.
Uniform
(
-
std
,
std
))
self
.
bias_ih
=
None
self
.
bias_hh
=
self
.
create_parameter
(
(
3
*
hidden_size
,
),
bias_hh_attr
,
is_bias
=
True
,
default_initializer
=
I
.
Uniform
(
-
std
,
std
))
self
.
hidden_size
=
hidden_size
self
.
input_size
=
input_size
self
.
_gate_activation
=
F
.
sigmoid
self
.
_activation
=
paddle
.
tanh
def
forward
(
self
,
inputs
,
states
=
None
):
if
states
is
None
:
states
=
self
.
get_initial_states
(
inputs
,
self
.
state_shape
)
pre_hidden
=
states
x_gates
=
inputs
if
self
.
bias_ih
is
not
None
:
x_gates
=
x_gates
+
self
.
bias_ih
h_gates
=
paddle
.
matmul
(
pre_hidden
,
self
.
weight_hh
,
transpose_y
=
True
)
if
self
.
bias_hh
is
not
None
:
h_gates
=
h_gates
+
self
.
bias_hh
x_r
,
x_z
,
x_c
=
paddle
.
split
(
x_gates
,
num_or_sections
=
3
,
axis
=
1
)
h_r
,
h_z
,
h_c
=
paddle
.
split
(
h_gates
,
num_or_sections
=
3
,
axis
=
1
)
r
=
self
.
_gate_activation
(
x_r
+
h_r
)
z
=
self
.
_gate_activation
(
x_z
+
h_z
)
c
=
self
.
_activation
(
x_c
+
r
*
h_c
)
# apply reset gate after mm
h
=
(
pre_hidden
-
c
)
*
z
+
c
# https://www.paddlepaddle.org.cn/documentation/docs/zh/api/paddle/fluid/layers/dynamic_gru_cn.html#dynamic-gru
return
h
,
h
@
property
def
state_shape
(
self
):
r
"""
The `state_shape` of GRUCell is a shape `[hidden_size]` (-1 for batch
size would be automatically inserted into shape). The shape corresponds
to the shape of :math:`h_{t-1}`.
"""
return
(
self
.
hidden_size
,
)
class
BiRNNWithBN
(
nn
.
Layer
):
"""Bidirectonal simple rnn layer with sequence-wise batch normalization.
The batch normalization is only performed on input-state weights.
:param size: Dimension of RNN cells.
:type size: int
:param share_weights: Whether to share input-hidden weights between
forward and backward directional RNNs.
:type share_weights: bool
:return: Bidirectional simple rnn layer.
:rtype: Variable
"""
def
__init__
(
self
,
i_size
:
int
,
h_size
:
int
,
share_weights
:
bool
):
super
().
__init__
()
self
.
share_weights
=
share_weights
if
self
.
share_weights
:
#input-hidden weights shared between bi-directional rnn.
self
.
fw_fc
=
nn
.
Linear
(
i_size
,
h_size
,
bias_attr
=
False
)
# batch norm is only performed on input-state projection
self
.
fw_bn
=
nn
.
BatchNorm1D
(
h_size
,
bias_attr
=
None
,
data_format
=
'NLC'
)
self
.
bw_fc
=
self
.
fw_fc
self
.
bw_bn
=
self
.
fw_bn
else
:
self
.
fw_fc
=
nn
.
Linear
(
i_size
,
h_size
,
bias_attr
=
False
)
self
.
fw_bn
=
nn
.
BatchNorm1D
(
h_size
,
bias_attr
=
None
,
data_format
=
'NLC'
)
self
.
bw_fc
=
nn
.
Linear
(
i_size
,
h_size
,
bias_attr
=
False
)
self
.
bw_bn
=
nn
.
BatchNorm1D
(
h_size
,
bias_attr
=
None
,
data_format
=
'NLC'
)
self
.
fw_cell
=
RNNCell
(
hidden_size
=
h_size
,
activation
=
'brelu'
)
self
.
bw_cell
=
RNNCell
(
hidden_size
=
h_size
,
activation
=
'brelu'
)
self
.
fw_rnn
=
nn
.
RNN
(
self
.
fw_cell
,
is_reverse
=
False
,
time_major
=
False
)
#[B, T, D]
self
.
bw_rnn
=
nn
.
RNN
(
self
.
fw_cell
,
is_reverse
=
True
,
time_major
=
False
)
#[B, T, D]
def
forward
(
self
,
x
:
paddle
.
Tensor
,
x_len
:
paddle
.
Tensor
):
# x, shape [B, T, D]
fw_x
=
self
.
fw_bn
(
self
.
fw_fc
(
x
))
bw_x
=
self
.
bw_bn
(
self
.
bw_fc
(
x
))
fw_x
,
_
=
self
.
fw_rnn
(
inputs
=
fw_x
,
sequence_length
=
x_len
)
bw_x
,
_
=
self
.
bw_rnn
(
inputs
=
bw_x
,
sequence_length
=
x_len
)
x
=
paddle
.
concat
([
fw_x
,
bw_x
],
axis
=-
1
)
return
x
,
x_len
class
BiGRUWithBN
(
nn
.
Layer
):
"""Bidirectonal gru layer with sequence-wise batch normalization.
The batch normalization is only performed on input-state weights.
:param name: Name of the layer.
:type name: string
:param input: Input layer.
:type input: Variable
:param size: Dimension of GRU cells.
:type size: int
:param act: Activation type.
:type act: string
:return: Bidirectional GRU layer.
:rtype: Variable
"""
def
__init__
(
self
,
i_size
:
int
,
h_size
:
int
):
super
().
__init__
()
hidden_size
=
h_size
*
3
self
.
fw_fc
=
nn
.
Linear
(
i_size
,
hidden_size
,
bias_attr
=
False
)
self
.
fw_bn
=
nn
.
BatchNorm1D
(
hidden_size
,
bias_attr
=
None
,
data_format
=
'NLC'
)
self
.
bw_fc
=
nn
.
Linear
(
i_size
,
hidden_size
,
bias_attr
=
False
)
self
.
bw_bn
=
nn
.
BatchNorm1D
(
hidden_size
,
bias_attr
=
None
,
data_format
=
'NLC'
)
self
.
fw_cell
=
GRUCell
(
input_size
=
hidden_size
,
hidden_size
=
h_size
)
self
.
bw_cell
=
GRUCell
(
input_size
=
hidden_size
,
hidden_size
=
h_size
)
self
.
fw_rnn
=
nn
.
RNN
(
self
.
fw_cell
,
is_reverse
=
False
,
time_major
=
False
)
#[B, T, D]
self
.
bw_rnn
=
nn
.
RNN
(
self
.
fw_cell
,
is_reverse
=
True
,
time_major
=
False
)
#[B, T, D]
def
forward
(
self
,
x
,
x_len
):
# x, shape [B, T, D]
fw_x
=
self
.
fw_bn
(
self
.
fw_fc
(
x
))
bw_x
=
self
.
bw_bn
(
self
.
bw_fc
(
x
))
fw_x
,
_
=
self
.
fw_rnn
(
inputs
=
fw_x
,
sequence_length
=
x_len
)
bw_x
,
_
=
self
.
bw_rnn
(
inputs
=
bw_x
,
sequence_length
=
x_len
)
x
=
paddle
.
concat
([
fw_x
,
bw_x
],
axis
=-
1
)
return
x
,
x_len
class
RNNStack
(
nn
.
Layer
):
"""RNN group with stacked bidirectional simple RNN or GRU layers.
:param input: Input layer.
:type input: Variable
:param size: Dimension of RNN cells in each layer.
:type size: int
:param num_stacks: Number of stacked rnn layers.
:type num_stacks: int
:param use_gru: Use gru if set True. Use simple rnn if set False.
:type use_gru: bool
:param share_rnn_weights: Whether to share input-hidden weights between
forward and backward directional RNNs.
It is only available when use_gru=False.
:type share_weights: bool
:return: Output layer of the RNN group.
:rtype: Variable
"""
def
__init__
(
self
,
i_size
:
int
,
h_size
:
int
,
num_stacks
:
int
,
use_gru
:
bool
,
share_rnn_weights
:
bool
):
super
().
__init__
()
rnn_stacks
=
[]
for
i
in
range
(
num_stacks
):
if
use_gru
:
#default:GRU using tanh
rnn_stacks
.
append
(
BiGRUWithBN
(
i_size
=
i_size
,
h_size
=
h_size
))
else
:
rnn_stacks
.
append
(
BiRNNWithBN
(
i_size
=
i_size
,
h_size
=
h_size
,
share_weights
=
share_rnn_weights
))
i_size
=
h_size
*
2
self
.
rnn_stacks
=
nn
.
LayerList
(
rnn_stacks
)
def
forward
(
self
,
x
:
paddle
.
Tensor
,
x_len
:
paddle
.
Tensor
):
"""
x: shape [B, T, D]
x_len: shpae [B]
"""
for
i
,
rnn
in
enumerate
(
self
.
rnn_stacks
):
x
,
x_len
=
rnn
(
x
,
x_len
)
masks
=
make_non_pad_mask
(
x_len
)
#[B, T]
masks
=
masks
.
unsqueeze
(
-
1
)
# [B, T, 1]
# https://github.com/PaddlePaddle/Paddle/pull/29265
# rhs will type promote to lhs
x
=
x
*
masks
return
x
,
x_len
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录