Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
df1d44f5
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 2 年 前同步成功
通知
210
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
df1d44f5
编写于
3月 17, 2021
作者:
H
Hui Zhang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add torch compatiable hack, mask funcs
上级
59ed8901
变更
7
显示空白变更内容
内联
并排
Showing
7 changed file
with
903 addition
and
92 deletion
+903
-92
.gitignore
.gitignore
+4
-0
.notebook/mask_and_masked_fill_test.ipynb
.notebook/mask_and_masked_fill_test.ipynb
+449
-0
deepspeech/modules/__init__.py
deepspeech/modules/__init__.py
+203
-0
deepspeech/modules/activation.py
deepspeech/modules/activation.py
+1
-58
deepspeech/modules/conformer_convolution.py
deepspeech/modules/conformer_convolution.py
+0
-4
deepspeech/modules/loss.py
deepspeech/modules/loss.py
+4
-30
deepspeech/modules/mask.py
deepspeech/modules/mask.py
+242
-0
未找到文件。
.gitignore
浏览文件 @
df1d44f5
...
@@ -5,3 +5,7 @@ tools/venv
...
@@ -5,3 +5,7 @@ tools/venv
*.log
*.log
*.pdmodel
*.pdmodel
*.pdiparams*
*.pdiparams*
*.zip
*.tar
*.tar.gz
.ipynb_checkpoints
.notebook/mask_and_masked_fill_test.ipynb
0 → 100644
浏览文件 @
df1d44f5
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "primary-organic",
"metadata": {},
"outputs": [],
"source": [
"import torch"
]
},
{
"cell_type": "code",
"execution_count": 38,
"id": "stopped-semester",
"metadata": {},
"outputs": [],
"source": [
"def mask_finished_scores(score: torch.Tensor,\n",
" flag: torch.Tensor) -> torch.Tensor:\n",
" \"\"\"\n",
" If a sequence is finished, we only allow one alive branch. This function\n",
" aims to give one branch a zero score and the rest -inf score.\n",
" Args:\n",
" score (torch.Tensor): A real value array with shape\n",
" (batch_size * beam_size, beam_size).\n",
" flag (torch.Tensor): A bool array with shape\n",
" (batch_size * beam_size, 1).\n",
" Returns:\n",
" torch.Tensor: (batch_size * beam_size, beam_size).\n",
" \"\"\"\n",
" beam_size = score.size(-1)\n",
" zero_mask = torch.zeros_like(flag, dtype=torch.bool)\n",
" if beam_size > 1:\n",
" unfinished = torch.cat((zero_mask, flag.repeat([1, beam_size - 1])),\n",
" dim=1)\n",
" finished = torch.cat((flag, zero_mask.repeat([1, beam_size - 1])),\n",
" dim=1)\n",
" else:\n",
" unfinished = zero_mask\n",
" finished = flag\n",
" print(unfinished)\n",
" print(finished)\n",
" score.masked_fill_(unfinished, -float('inf'))\n",
" score.masked_fill_(finished, 0)\n",
" return score"
]
},
{
"cell_type": "code",
"execution_count": 58,
"id": "agreed-portuguese",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"tensor([[ True],\n",
" [False]])\n",
"tensor([[-0.8841, 0.7381, -0.9986],\n",
" [ 0.2675, -0.7971, 0.3798]])\n",
"tensor([[ True, True],\n",
" [False, False]])\n"
]
}
],
"source": [
"score = torch.randn((2, 3))\n",
"flag = torch.ones((2, 1), dtype=torch.bool)\n",
"flag[1] = False\n",
"print(flag)\n",
"print(score)\n",
"print(flag.repeat([1, 2]))"
]
},
{
"cell_type": "code",
"execution_count": 59,
"id": "clean-aspect",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"tensor([[False, True, True],\n",
" [False, False, False]])\n",
"tensor([[ True, False, False],\n",
" [False, False, False]])\n",
"tensor([[ 0.0000, -inf, -inf],\n",
" [ 0.2675, -0.7971, 0.3798]])\n",
"tensor([[ 0.0000, -inf, -inf],\n",
" [ 0.2675, -0.7971, 0.3798]])\n"
]
}
],
"source": [
"r = mask_finished_scores(score, flag)\n",
"print(r)\n",
"print(score)"
]
},
{
"cell_type": "code",
"execution_count": 55,
"id": "thrown-airline",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Tensor(shape=[2, 1], dtype=bool, place=CUDAPlace(0), stop_gradient=True,\n",
" [[True ],\n",
" [False]])\n",
"Tensor(shape=[2, 3], dtype=float32, place=CUDAPlace(0), stop_gradient=True,\n",
" [[ 2.05994511, 1.87704289, 0.01988174],\n",
" [-0.40165186, 0.77547729, -0.64469045]])\n",
"Tensor(shape=[2, 2], dtype=bool, place=CUDAPlace(0), stop_gradient=True,\n",
" [[True , True ],\n",
" [False, False]])\n"
]
}
],
"source": [
"import paddle\n",
"\n",
"score = paddle.randn((2, 3))\n",
"flag = paddle.ones((2, 1), dtype='bool')\n",
"flag[1] = False\n",
"print(flag)\n",
"print(score)\n",
"print(flag.tile([1, 2]))"
]
},
{
"cell_type": "code",
"execution_count": 56,
"id": "internal-patent",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Tensor(shape=[2, 3], dtype=bool, place=CUDAPlace(0), stop_gradient=True,\n",
" [[False, True , True ],\n",
" [False, False, False]])\n",
"Tensor(shape=[2, 3], dtype=bool, place=CUDAPlace(0), stop_gradient=True,\n",
" [[True , False, False],\n",
" [False, False, False]])\n",
"x Tensor(shape=[2, 3], dtype=float32, place=CUDAPlace(0), stop_gradient=True,\n",
" [[ 2.05994511, 1.87704289, 0.01988174],\n",
" [-0.40165186, 0.77547729, -0.64469045]])\n",
"2 Tensor(shape=[2, 3], dtype=float32, place=CUDAPlace(0), stop_gradient=True,\n",
" [[ 2.05994511, 1.87704289, 0.01988174],\n",
" [-0.40165186, 0.77547729, -0.64469045]])\n",
"3 Tensor(shape=[2, 3], dtype=float32, place=CUDAPlace(0), stop_gradient=True,\n",
" [[ 2.05994511, -inf. , -inf. ],\n",
" [-0.40165186, 0.77547729, -0.64469045]])\n",
"x Tensor(shape=[2, 3], dtype=float32, place=CUDAPlace(0), stop_gradient=True,\n",
" [[ 2.05994511, -inf. , -inf. ],\n",
" [-0.40165186, 0.77547729, -0.64469045]])\n",
"2 Tensor(shape=[2, 3], dtype=float32, place=CUDAPlace(0), stop_gradient=True,\n",
" [[ 2.05994511, -inf. , -inf. ],\n",
" [-0.40165186, 0.77547729, -0.64469045]])\n",
"3 Tensor(shape=[2, 3], dtype=float32, place=CUDAPlace(0), stop_gradient=True,\n",
" [[ 0. , -inf. , -inf. ],\n",
" [-0.40165186, 0.77547729, -0.64469045]])\n",
"Tensor(shape=[2, 3], dtype=float32, place=CUDAPlace(0), stop_gradient=True,\n",
" [[ 0. , -inf. , -inf. ],\n",
" [-0.40165186, 0.77547729, -0.64469045]])\n"
]
}
],
"source": [
"paddle.bool = 'bool'\n",
"\n",
"def masked_fill(xs:paddle.Tensor, mask:paddle.Tensor, value:float):\n",
" print(xs)\n",
" trues = paddle.ones_like(xs) * value\n",
" assert xs.shape == mask.shape\n",
" xs = paddle.where(mask, trues, xs)\n",
" return xs\n",
"\n",
"def masked_fill_(xs:paddle.Tensor, mask:paddle.Tensor, value:float):\n",
" print('x', xs)\n",
" trues = paddle.ones_like(xs) * value\n",
" assert xs.shape == mask.shape\n",
" ret = paddle.where(mask, trues, xs)\n",
" print('2', xs)\n",
" paddle.assign(ret, output=xs)\n",
" print('3', xs)\n",
"\n",
"paddle.Tensor.masked_fill = masked_fill\n",
"paddle.Tensor.masked_fill_ = masked_fill_\n",
"\n",
"def mask_finished_scores_pd(score: paddle.Tensor,\n",
" flag: paddle.Tensor) -> paddle.Tensor:\n",
" \"\"\"\n",
" If a sequence is finished, we only allow one alive branch. This function\n",
" aims to give one branch a zero score and the rest -inf score.\n",
" Args:\n",
" score (torch.Tensor): A real value array with shape\n",
" (batch_size * beam_size, beam_size).\n",
" flag (torch.Tensor): A bool array with shape\n",
" (batch_size * beam_size, 1).\n",
" Returns:\n",
" torch.Tensor: (batch_size * beam_size, beam_size).\n",
" \"\"\"\n",
" beam_size = score.shape[-1]\n",
" zero_mask = paddle.zeros_like(flag, dtype=paddle.bool)\n",
" if beam_size > 1:\n",
" unfinished = paddle.concat((zero_mask, flag.tile([1, beam_size - 1])),\n",
" axis=1)\n",
" finished = paddle.concat((flag, zero_mask.tile([1, beam_size - 1])),\n",
" axis=1)\n",
" else:\n",
" unfinished = zero_mask\n",
" finished = flag\n",
" print(unfinished)\n",
" print(finished)\n",
" \n",
" #score.masked_fill_(unfinished, -float('inf'))\n",
" #score.masked_fill_(finished, 0)\n",
"# infs = paddle.ones_like(score) * -float('inf')\n",
"# score = paddle.where(unfinished, infs, score)\n",
"# score = paddle.where(finished, paddle.zeros_like(score), score)\n",
"\n",
"# score = score.masked_fill(unfinished, -float('inf'))\n",
"# score = score.masked_fill(finished, 0)\n",
" score.masked_fill_(unfinished, -float('inf'))\n",
" score.masked_fill_(finished, 0)\n",
" return score\n",
"\n",
"r = mask_finished_scores_pd(score, flag)\n",
"print(r)"
]
},
{
"cell_type": "code",
"execution_count": 57,
"id": "vocal-prime",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<bound method PyCapsule.value of Tensor(shape=[2, 3], dtype=float32, place=CUDAPlace(0), stop_gradient=True,\n",
" [[ 0. , -inf. , -inf. ],\n",
" [-0.40165186, 0.77547729, -0.64469045]])>"
]
},
"execution_count": 57,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"score.value"
]
},
{
"cell_type": "code",
"execution_count": 71,
"id": "bacterial-adolescent",
"metadata": {},
"outputs": [],
"source": [
"from typing import Union, Any"
]
},
{
"cell_type": "code",
"execution_count": 72,
"id": "absent-fiber",
"metadata": {},
"outputs": [],
"source": [
"def repeat(xs : paddle.Tensor, *size: Any):\n",
" print(size)\n",
" return paddle.tile(xs, size)\n",
"paddle.Tensor.repeat = repeat"
]
},
{
"cell_type": "code",
"execution_count": 73,
"id": "material-harbor",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(1, 2)\n",
"Tensor(shape=[2, 2], dtype=bool, place=CUDAPlace(0), stop_gradient=True,\n",
" [[True , True ],\n",
" [False, False]])\n"
]
}
],
"source": [
"flag = paddle.ones((2, 1), dtype='bool')\n",
"flag[1] = False\n",
"print(flag.repeat(1, 2))"
]
},
{
"cell_type": "code",
"execution_count": 84,
"id": "acute-brighton",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(Tensor(shape=[1], dtype=int64, place=CUDAPlace(0), stop_gradient=True,\n",
" [1]), 2)\n",
"Tensor(shape=[2, 2], dtype=bool, place=CUDAPlace(0), stop_gradient=True,\n",
" [[True , True ],\n",
" [False, False]])\n"
]
}
],
"source": [
"flag = paddle.ones((2, 1), dtype='bool')\n",
"flag[1] = False\n",
"print(flag.repeat(paddle.to_tensor(1), 2))"
]
},
{
"cell_type": "code",
"execution_count": 85,
"id": "european-rugby",
"metadata": {},
"outputs": [],
"source": [
"def size(xs, *args: int):\n",
" nargs = len(args)\n",
" s = paddle.shape(xs)\n",
" assert(nargs <= 1)\n",
" if nargs == 1:\n",
" return s[args[0]]\n",
" else:\n",
" return s\n",
"paddle.Tensor.size = size"
]
},
{
"cell_type": "code",
"execution_count": 86,
"id": "moral-special",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Tensor(shape=[2], dtype=int32, place=CPUPlace, stop_gradient=True,\n",
" [2, 1])"
]
},
"execution_count": 86,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"flag.size()"
]
},
{
"cell_type": "code",
"execution_count": 87,
"id": "ahead-coach",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Tensor(shape=[1], dtype=int32, place=CPUPlace, stop_gradient=True,\n",
" [1])"
]
},
"execution_count": 87,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"flag.size(1)"
]
},
{
"cell_type": "code",
"execution_count": 88,
"id": "incomplete-fitness",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Tensor(shape=[1], dtype=int32, place=CPUPlace, stop_gradient=True,\n",
" [2])"
]
},
"execution_count": 88,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"flag.size(0)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "upset-connectivity",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.0"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
deepspeech/modules/__init__.py
浏览文件 @
df1d44f5
...
@@ -11,3 +11,206 @@
...
@@ -11,3 +11,206 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
import
logging
from
typeing
import
Union
from
typeing
import
Any
import
paddle
from
paddle
import
nn
from
paddle.nn
import
functional
as
F
from
paddle.nn
import
initializer
as
I
logger
=
logging
.
getLogger
(
__name__
)
# TODO(Hui Zhang): remove this hack
paddle
.
bool
=
'bool'
paddle
.
float16
=
'float16'
paddle
.
float32
=
'float32'
paddle
.
float64
=
'float64'
paddle
.
int8
=
'int8'
paddle
.
int16
=
'int16'
paddle
.
int32
=
'int32'
paddle
.
int64
=
'int64'
paddle
.
uint8
=
'uint8'
paddle
.
complex64
=
'complex64'
paddle
.
complex128
=
'complex128'
if
not
hasattr
(
paddle
.
Tensor
,
'cat'
):
logger
.
warn
(
"override cat of paddle.Tensor if exists or register, remove this when fixed!"
)
paddle
.
Tensor
.
cat
=
paddle
.
Tensor
.
concat
def
size
(
xs
:
paddle
.
Tensor
,
*
args
:
int
)
->
paddle
.
Tensor
:
nargs
=
len
(
args
)
assert
(
nargs
<=
1
)
s
=
paddle
.
shape
(
xs
)
if
nargs
==
1
:
return
s
[
args
]
else
:
return
s
if
not
hasattr
(
paddle
.
Tensor
,
'size'
):
logger
.
warn
(
"override size of paddle.Tensor if exists or register, remove this when fixed!"
)
paddle
.
Tensor
.
size
=
size
def
masked_fill
(
xs
:
paddle
.
Tensor
,
mask
:
paddle
.
Tensor
,
value
:
Union
[
float
,
int
]):
assert
xs
.
shape
==
mask
.
shape
trues
=
paddle
.
ones_like
(
xs
)
*
value
xs
=
paddle
.
where
(
mask
,
trues
,
xs
)
return
xs
if
not
hasattr
(
paddle
.
Tensor
,
'masked_fill'
):
logger
.
warn
(
"register user masked_fill to paddle.Tensor, remove this when fixed!"
)
paddle
.
Tensor
.
masked_fill
=
masked_fill
def
masked_fill_
(
xs
:
paddle
.
Tensor
,
mask
:
paddle
.
Tensor
,
value
:
Union
[
float
,
int
]):
assert
xs
.
shape
==
mask
.
shape
trues
=
paddle
.
ones_like
(
xs
)
*
value
ret
=
paddle
.
where
(
mask
,
trues
,
xs
)
paddle
.
assign
(
ret
,
output
=
xs
)
if
not
hasattr
(
paddle
.
Tensor
,
'masked_fill_'
):
logger
.
warn
(
"register user masked_fill_ to paddle.Tensor, remove this when fixed!"
)
paddle
.
Tensor
.
masked_fill_
=
masked_fill_
def
repeat
(
xs
:
paddle
.
Tensor
,
*
size
:
Any
)
->
paddle
.
Tensor
:
return
paddle
.
tile
(
xs
,
size
)
if
not
hasattr
(
paddle
.
Tensor
,
'repeat'
):
logger
.
warn
(
"register user repeat to paddle.Tensor, remove this when fixed!"
)
paddle
.
Tensor
.
repeat
=
repeat
# def softplus(x):
# """Softplus function."""
# if hasattr(paddle.nn.functional, 'softplus'):
# #return paddle.nn.functional.softplus(x.float()).type_as(x)
# return paddle.nn.functional.softplus(x)
# else:
# raise NotImplementedError
# def gelu_accurate(x):
# """Gaussian Error Linear Units (GELU) activation."""
# # [reference] https://github.com/pytorch/fairseq/blob/e75cff5f2c1d62f12dc911e0bf420025eb1a4e33/fairseq/modules/gelu.py
# if not hasattr(gelu_accurate, "_a"):
# gelu_accurate._a = math.sqrt(2 / math.pi)
# return 0.5 * x * (1 + paddle.tanh(gelu_accurate._a *
# (x + 0.044715 * paddle.pow(x, 3))))
# def gelu(x):
# """Gaussian Error Linear Units (GELU) activation."""
# if hasattr(nn.functional, 'gelu'):
# #return nn.functional.gelu(x.float()).type_as(x)
# return nn.functional.gelu(x)
# else:
# return x * 0.5 * (1.0 + paddle.erf(x / math.sqrt(2.0)))
def
glu
(
x
:
paddle
.
Tensor
,
dim
=-
1
)
->
paddle
.
Tensor
:
"""The gated linear unit (GLU) activation."""
a
,
b
=
x
.
split
(
2
,
axis
=
dim
)
act_b
=
F
.
sigmoid
(
b
)
return
a
*
act_b
if
not
hasattr
(
paddle
.
nn
.
functional
,
'glu'
):
logger
.
warn
(
"register user glu to paddle.nn.functional, remove this when fixed!"
)
setattr
(
paddle
.
nn
.
functional
,
'glu'
,
glu
)
# TODO(Hui Zhang): remove this activation
class
GLU
(
nn
.
Layer
):
"""Gated Linear Units (GLU) Layer"""
def
__init__
(
self
,
dim
:
int
=-
1
):
super
().
__init__
()
self
.
dim
=
dim
def
forward
(
self
,
xs
):
return
glu
(
xs
,
dim
=
self
.
dim
)
if
not
hasattr
(
paddle
.
nn
,
'GLU'
):
logger
.
warn
(
"register user GLU to paddle.nn, remove this when fixed!"
)
setattr
(
paddle
.
nn
,
'GLU'
,
GLU
)
# TODO(Hui Zhang): remove this Layer
class
ConstantPad2d
(
nn
.
Layer
):
"""Pads the input tensor boundaries with a constant value.
For N-dimensional padding, use paddle.nn.functional.pad().
"""
def
__init__
(
self
,
padding
:
Union
[
tuple
,
list
,
int
],
value
:
float
):
"""
Args:
paddle ([tuple]): the size of the padding.
If is int, uses the same padding in all boundaries.
If a 4-tuple, uses (padding_left, padding_right, padding_top, padding_bottom)
value ([flaot]): pad value
"""
self
.
padding
=
padding
if
isinstance
(
padding
,
[
tuple
,
list
])
else
[
padding
]
*
4
self
.
value
=
value
def
forward
(
self
,
xs
:
paddle
.
Tensor
)
->
paddle
.
Tensor
:
return
nn
.
functional
.
pad
(
xs
,
self
.
padding
,
mode
=
'constant'
,
value
=
self
.
value
,
data_format
=
'NCHW'
)
if
not
hasattr
(
paddle
.
nn
,
'ConstantPad2d'
):
logger
.
warn
(
"register user ConstantPad2d to paddle.nn, remove this when fixed!"
)
setattr
(
paddle
.
nn
,
'ConstantPad2d'
,
ConstantPad2d
)
# hack loss
def
ctc_loss
(
logits
,
labels
,
input_lengths
,
label_lengths
,
blank
=
0
,
reduction
=
'mean'
,
norm_by_times
=
True
):
#logger.info("my ctc loss with norm by times")
## https://github.com/PaddlePaddle/Paddle/blob/f5ca2db2cc/paddle/fluid/operators/warpctc_op.h#L403
loss_out
=
paddle
.
fluid
.
layers
.
warpctc
(
logits
,
labels
,
blank
,
norm_by_times
,
input_lengths
,
label_lengths
)
loss_out
=
paddle
.
fluid
.
layers
.
squeeze
(
loss_out
,
[
-
1
])
logger
.
info
(
f
"warpctc loss:
{
loss_out
}
/
{
loss_out
.
shape
}
"
)
assert
reduction
in
[
'mean'
,
'sum'
,
'none'
]
if
reduction
==
'mean'
:
loss_out
=
paddle
.
mean
(
loss_out
/
label_lengths
)
elif
reduction
==
'sum'
:
loss_out
=
paddle
.
sum
(
loss_out
)
logger
.
info
(
f
"ctc loss:
{
loss_out
}
"
)
return
loss_out
logger
.
warn
(
"override ctc_loss of paddle.nn.functional if exists, remove this when fixed!"
)
F
.
ctc_loss
=
ctc_loss
deepspeech/modules/activation.py
浏览文件 @
df1d44f5
...
@@ -25,9 +25,7 @@ from paddle.nn import initializer as I
...
@@ -25,9 +25,7 @@ from paddle.nn import initializer as I
logger
=
logging
.
getLogger
(
__name__
)
logger
=
logging
.
getLogger
(
__name__
)
__all__
=
[
__all__
=
[
"brelu"
,
"LinearGLUBlock"
,
"ConstantPad2d"
,
"ConvGLUBlock"
]
"brelu"
,
"glu"
,
"GLU"
,
"LinearGLUBlock"
,
"ConstantPad2d"
,
"ConvGLUBlock"
]
def
brelu
(
x
,
t_min
=
0.0
,
t_max
=
24.0
,
name
=
None
):
def
brelu
(
x
,
t_min
=
0.0
,
t_max
=
24.0
,
name
=
None
):
...
@@ -37,61 +35,6 @@ def brelu(x, t_min=0.0, t_max=24.0, name=None):
...
@@ -37,61 +35,6 @@ def brelu(x, t_min=0.0, t_max=24.0, name=None):
return
x
.
maximum
(
t_min
).
minimum
(
t_max
)
return
x
.
maximum
(
t_min
).
minimum
(
t_max
)
# def softplus(x):
# """Softplus function."""
# if hasattr(paddle.nn.functional, 'softplus'):
# #return paddle.nn.functional.softplus(x.float()).type_as(x)
# return paddle.nn.functional.softplus(x)
# else:
# raise NotImplementedError
# def gelu_accurate(x):
# """Gaussian Error Linear Units (GELU) activation."""
# # [reference] https://github.com/pytorch/fairseq/blob/e75cff5f2c1d62f12dc911e0bf420025eb1a4e33/fairseq/modules/gelu.py
# if not hasattr(gelu_accurate, "_a"):
# gelu_accurate._a = math.sqrt(2 / math.pi)
# return 0.5 * x * (1 + paddle.tanh(gelu_accurate._a *
# (x + 0.044715 * paddle.pow(x, 3))))
# def gelu(x):
# """Gaussian Error Linear Units (GELU) activation."""
# if hasattr(nn.functional, 'gelu'):
# #return nn.functional.gelu(x.float()).type_as(x)
# return nn.functional.gelu(x)
# else:
# return x * 0.5 * (1.0 + paddle.erf(x / math.sqrt(2.0)))
# TODO(Hui Zhang): remove this activation
def
glu
(
x
,
dim
=-
1
):
"""The gated linear unit (GLU) activation."""
if
hasattr
(
nn
.
functional
,
'glu'
):
return
nn
.
functional
.
glu
(
x
)
else
:
a
,
b
=
x
.
split
(
2
,
axis
=
dim
)
act_b
=
F
.
sigmoid
(
b
)
return
a
*
act_b
# TODO(Hui Zhang): remove this activation
if
not
hasattr
(
nn
.
functional
,
'glu'
):
logger
.
warn
(
"register user glu to paddle.nn.functional, remove this when fixed!"
)
setattr
(
nn
.
functional
,
'glu'
,
glu
)
# TODO(Hui Zhang): remove this activation
class
GLU
(
nn
.
Layer
):
"""Gated Linear Units (GLU) Layer"""
def
__init__
(
self
,
dim
:
int
=-
1
):
super
().
__init__
()
self
.
dim
=
dim
def
forward
(
self
,
xs
):
return
glu
(
xs
,
dim
=
self
.
dim
)
class
LinearGLUBlock
(
nn
.
Layer
):
class
LinearGLUBlock
(
nn
.
Layer
):
"""A linear Gated Linear Units (GLU) block."""
"""A linear Gated Linear Units (GLU) block."""
...
...
deepspeech/modules/conformer_convolution.py
浏览文件 @
df1d44f5
...
@@ -22,10 +22,6 @@ from paddle import nn
...
@@ -22,10 +22,6 @@ from paddle import nn
from
paddle.nn
import
functional
as
F
from
paddle.nn
import
functional
as
F
from
paddle.nn
import
initializer
as
I
from
paddle.nn
import
initializer
as
I
# init F.glu func
# TODO(Hui Zhang): remove this line
import
deepspeech.modules.activation
logger
=
logging
.
getLogger
(
__name__
)
logger
=
logging
.
getLogger
(
__name__
)
__all__
=
[
'ConvolutionModule'
]
__all__
=
[
'ConvolutionModule'
]
...
...
deepspeech/modules/loss.py
浏览文件 @
df1d44f5
...
@@ -24,34 +24,6 @@ logger = logging.getLogger(__name__)
...
@@ -24,34 +24,6 @@ logger = logging.getLogger(__name__)
__all__
=
[
'CTCLoss'
,
"LabelSmoothingLoss"
]
__all__
=
[
'CTCLoss'
,
"LabelSmoothingLoss"
]
# TODO(Hui Zhang): remove this hack, when `norm_by_times=True` is added
def
ctc_loss
(
logits
,
labels
,
input_lengths
,
label_lengths
,
blank
=
0
,
reduction
=
'mean'
,
norm_by_times
=
True
):
#logger.info("my ctc loss with norm by times")
## https://github.com/PaddlePaddle/Paddle/blob/f5ca2db2cc/paddle/fluid/operators/warpctc_op.h#L403
loss_out
=
paddle
.
fluid
.
layers
.
warpctc
(
logits
,
labels
,
blank
,
norm_by_times
,
input_lengths
,
label_lengths
)
loss_out
=
paddle
.
fluid
.
layers
.
squeeze
(
loss_out
,
[
-
1
])
logger
.
info
(
f
"warpctc loss:
{
loss_out
}
/
{
loss_out
.
shape
}
"
)
assert
reduction
in
[
'mean'
,
'sum'
,
'none'
]
if
reduction
==
'mean'
:
loss_out
=
paddle
.
mean
(
loss_out
/
label_lengths
)
elif
reduction
==
'sum'
:
loss_out
=
paddle
.
sum
(
loss_out
)
logger
.
info
(
f
"ctc loss:
{
loss_out
}
"
)
return
loss_out
# TODO(Hui Zhang): remove this hack
F
.
ctc_loss
=
ctc_loss
class
CTCLoss
(
nn
.
Layer
):
class
CTCLoss
(
nn
.
Layer
):
def
__init__
(
self
,
blank
=
0
,
reduction
=
'sum'
):
def
__init__
(
self
,
blank
=
0
,
reduction
=
'sum'
):
super
().
__init__
()
super
().
__init__
()
...
@@ -149,12 +121,14 @@ class LabelSmoothingLoss(nn.Layer):
...
@@ -149,12 +121,14 @@ class LabelSmoothingLoss(nn.Layer):
ignore
=
target
==
self
.
padding_idx
# (B,)
ignore
=
target
==
self
.
padding_idx
# (B,)
ignore
=
ignore
.
cast
(
target
.
dtype
)
ignore
=
ignore
.
cast
(
target
.
dtype
)
target
=
target
*
(
1
-
ignore
)
# avoid -1 index
#target = target * (1 - ignore) # avoid -1 index
target
=
target
.
masked_fill
(
ignore
,
0
)
# avoid -1 index
true_dist
+=
F
.
one_hot
(
target
,
self
.
size
)
*
self
.
confidence
true_dist
+=
F
.
one_hot
(
target
,
self
.
size
)
*
self
.
confidence
kl
=
self
.
criterion
(
F
.
log_softmax
(
x
,
axis
=
1
),
true_dist
)
kl
=
self
.
criterion
(
F
.
log_softmax
(
x
,
axis
=
1
),
true_dist
)
total
=
len
(
target
)
-
int
(
ignore
.
sum
())
total
=
len
(
target
)
-
int
(
ignore
.
sum
())
denom
=
total
if
self
.
normalize_length
else
B
denom
=
total
if
self
.
normalize_length
else
B
numer
=
(
kl
*
(
1
-
ignore
)).
sum
()
#numer = (kl * (1 - ignore)).sum()
numer
=
kl
.
masked_fill
(
ignore
.
unsqueeze
(
1
),
0
).
sum
()
return
numer
/
denom
return
numer
/
denom
deepspeech/modules/mask.py
浏览文件 @
df1d44f5
...
@@ -25,6 +25,21 @@ __all__ = ['sequence_mask']
...
@@ -25,6 +25,21 @@ __all__ = ['sequence_mask']
def
sequence_mask
(
x_len
,
max_len
=
None
,
dtype
=
'float32'
):
def
sequence_mask
(
x_len
,
max_len
=
None
,
dtype
=
'float32'
):
"""[summary]
Args:
x_len ([paddle.Tensor]): xs lenght, [B]
max_len ([type], optional): max sequence length. Defaults to None.
dtype (str, optional): mask data type. Defaults to 'float32'.
Returns:
paddle.Tensor: [B, Tmax]
Examples:
>>> sequence_mask([2, 4])
[[1., 1., 0., 0.],
[1., 1., 1., 1.]]
"""
max_len
=
max_len
or
x_len
.
max
()
max_len
=
max_len
or
x_len
.
max
()
x_len
=
paddle
.
unsqueeze
(
x_len
,
-
1
)
x_len
=
paddle
.
unsqueeze
(
x_len
,
-
1
)
row_vector
=
paddle
.
arange
(
max_len
)
row_vector
=
paddle
.
arange
(
max_len
)
...
@@ -33,3 +48,230 @@ def sequence_mask(x_len, max_len=None, dtype='float32'):
...
@@ -33,3 +48,230 @@ def sequence_mask(x_len, max_len=None, dtype='float32'):
mask
=
row_vector
>
x_len
# a bug, broadcast 的时候出错了
mask
=
row_vector
>
x_len
# a bug, broadcast 的时候出错了
mask
=
paddle
.
cast
(
mask
,
dtype
)
mask
=
paddle
.
cast
(
mask
,
dtype
)
return
mask
return
mask
def
subsequent_mask
(
size
:
int
,
)
->
paddle
.
Tensor
:
"""Create mask for subsequent steps (size, size).
This mask is used only in decoder which works in an auto-regressive mode.
This means the current step could only do attention with its left steps.
In encoder, fully attention is used when streaming is not necessary and
the sequence is not long. In this case, no attention mask is needed.
When streaming is need, chunk-based attention is used in encoder. See
subsequent_chunk_mask for the chunk-based attention mask.
Args:
size (int): size of mask
Returns:
paddle.Tensor: mask
Examples:
>>> subsequent_mask(3)
[[1, 0, 0],
[1, 1, 0],
[1, 1, 1]]
"""
ret
=
paddle
.
ones
([
size
,
size
],
dtype
=
paddle
.
bool
)
return
paddle
.
tril
(
ret
)
def
subsequent_chunk_mask
(
size
:
int
,
chunk_size
:
int
,
num_left_chunks
:
int
=-
1
,
)
->
paddle
.
Tensor
:
"""Create mask for subsequent steps (size, size) with chunk size,
this is for streaming encoder
Args:
size (int): size of mask
chunk_size (int): size of chunk
num_left_chunks (int): number of left chunks
<0: use full chunk
>=0: use num_left_chunks
Returns:
paddle.Tensor: mask
Examples:
>>> subsequent_chunk_mask(4, 2)
[[1, 1, 0, 0],
[1, 1, 0, 0],
[1, 1, 1, 1],
[1, 1, 1, 1]]
"""
ret
=
torch
.
zeros
([
size
,
size
],
dtype
=
paddle
.
bool
)
for
i
in
range
(
size
):
if
num_left_chunks
<
0
:
start
=
0
else
:
start
=
max
((
i
//
chunk_size
-
num_left_chunks
)
*
chunk_size
,
0
)
ending
=
min
((
i
//
chunk_size
+
1
)
*
chunk_size
,
size
)
ret
[
i
,
start
:
ending
]
=
True
return
ret
def
add_optional_chunk_mask
(
xs
:
paddle
.
Tensor
,
masks
:
paddle
.
Tensor
,
use_dynamic_chunk
:
bool
,
use_dynamic_left_chunk
:
bool
,
decoding_chunk_size
:
int
,
static_chunk_size
:
int
,
num_decoding_left_chunks
:
int
):
""" Apply optional mask for encoder.
Args:
xs (paddle.Tensor): padded input, (B, L, D), L for max length
mask (paddle.Tensor): mask for xs, (B, 1, L)
use_dynamic_chunk (bool): whether to use dynamic chunk or not
use_dynamic_left_chunk (bool): whether to use dynamic left chunk for
training.
decoding_chunk_size (int): decoding chunk size for dynamic chunk, it's
0: default for training, use random dynamic chunk.
<0: for decoding, use full chunk.
>0: for decoding, use fixed chunk size as set.
static_chunk_size (int): chunk size for static chunk training/decoding
if it's greater than 0, if use_dynamic_chunk is true,
this parameter will be ignored
num_decoding_left_chunks (int): number of left chunks, this is for decoding,
the chunk size is decoding_chunk_size.
>=0: use num_decoding_left_chunks
<0: use all left chunks
Returns:
paddle.Tensor: chunk mask of the input xs.
"""
# Whether to use chunk mask or not
if
use_dynamic_chunk
:
max_len
=
xs
.
shape
[
1
]
if
decoding_chunk_size
<
0
:
chunk_size
=
max_len
num_left_chunks
=
-
1
elif
decoding_chunk_size
>
0
:
chunk_size
=
decoding_chunk_size
num_left_chunks
=
num_decoding_left_chunks
else
:
# chunk size is either [1, 25] or full context(max_len).
# Since we use 4 times subsampling and allow up to 1s(100 frames)
# delay, the maximum frame is 100 / 4 = 25.
chunk_size
=
int
(
paddle
.
randint
(
1
,
max_len
,
(
1
,
)))
num_left_chunks
=
-
1
if
chunk_size
>
max_len
//
2
:
chunk_size
=
max_len
else
:
chunk_size
=
chunk_size
%
25
+
1
if
use_dynamic_left_chunk
:
max_left_chunks
=
(
max_len
-
1
)
//
chunk_size
num_left_chunks
=
int
(
paddle
.
randint
(
0
,
max_left_chunks
,
(
1
,
)))
chunk_masks
=
subsequent_chunk_mask
(
xs
.
shape
[
1
],
chunk_size
,
num_left_chunks
)
# (L, L)
chunk_masks
=
chunk_masks
.
unsqueeze
(
0
)
# (1, L, L)
chunk_masks
=
masks
&
chunk_masks
# (B, L, L)
elif
static_chunk_size
>
0
:
num_left_chunks
=
num_decoding_left_chunks
chunk_masks
=
subsequent_chunk_mask
(
xs
.
shape
[
1
],
static_chunk_size
,
num_left_chunks
)
# (L, L)
chunk_masks
=
chunk_masks
.
unsqueeze
(
0
)
# (1, L, L)
chunk_masks
=
masks
&
chunk_masks
# (B, L, L)
else
:
chunk_masks
=
masks
return
chunk_masks
def
make_pad_mask
(
lengths
:
paddle
.
Tensor
)
->
paddle
.
Tensor
:
"""Make mask tensor containing indices of padded part.
See description of make_non_pad_mask.
Args:
lengths (paddle.Tensor): Batch of lengths (B,).
Returns:
paddle.Tensor: Mask tensor containing indices of padded part.
Examples:
>>> lengths = [5, 3, 2]
>>> make_pad_mask(lengths)
masks = [[0, 0, 0, 0 ,0],
[0, 0, 0, 1, 1],
[0, 0, 1, 1, 1]]
"""
batch_size
=
int
(
lengths
.
shape
[
0
])
max_len
=
int
(
lengths
.
max
())
seq_range
=
paddle
.
arange
(
0
,
max_len
,
dtype
=
paddle
.
int64
)
seq_range_expand
=
seq_range
.
unsqueeze
(
0
).
expand
([
batch_size
,
max_len
])
seq_length_expand
=
lengths
.
unsqueeze
(
-
1
)
mask
=
seq_range_expand
>=
seq_length_expand
return
mask
def
make_non_pad_mask
(
lengths
:
paddle
.
Tensor
)
->
paddle
.
Tensor
:
"""Make mask tensor containing indices of non-padded part.
The sequences in a batch may have different lengths. To enable
batch computing, padding is need to make all sequence in same
size. To avoid the padding part pass value to context dependent
block such as attention or convolution , this padding part is
masked.
This pad_mask is used in both encoder and decoder.
1 for non-padded part and 0 for padded part.
Args:
lengths (paddle.Tensor): Batch of lengths (B,).
Returns:
paddle.Tensor: mask tensor containing indices of padded part.
Examples:
>>> lengths = [5, 3, 2]
>>> make_non_pad_mask(lengths)
masks = [[1, 1, 1, 1 ,1],
[1, 1, 1, 0, 0],
[1, 1, 0, 0, 0]]
"""
return
~
make_pad_mask
(
lengths
)
def
mask_finished_scores
(
score
:
paddle
.
Tensor
,
flag
:
paddle
.
Tensor
)
->
paddle
.
Tensor
:
"""
If a sequence is finished, we only allow one alive branch. This function
aims to give one branch a zero score and the rest -inf score.
Args:
score (paddle.Tensor): A real value array with shape
(batch_size * beam_size, beam_size).
flag (paddle.Tensor): A bool array with shape
(batch_size * beam_size, 1).
Returns:
paddle.Tensor: (batch_size * beam_size, beam_size).
Examples:
flag: tensor([[ True],
[False]])
score: tensor([[-0.3666, -0.6664, 0.6019],
[-1.1490, -0.2948, 0.7460]])
unfinished: tensor([[False, True, True],
[False, False, False]])
finished: tensor([[ True, False, False],
[False, False, False]])
return: tensor([[ 0.0000, -inf, -inf],
[-1.1490, -0.2948, 0.7460]])
"""
beam_size
=
score
.
shape
[
-
1
]
zero_mask
=
paddle
.
zeros_like
(
flag
,
dtype
=
paddle
.
bool
)
if
beam_size
>
1
:
unfinished
=
paddle
.
concat
(
(
zero_mask
,
flag
.
tile
([
1
,
beam_size
-
1
])),
axis
=
1
)
finished
=
paddle
.
concat
(
(
flag
,
zero_mask
.
tile
([
1
,
beam_size
-
1
])),
axis
=
1
)
else
:
unfinished
=
zero_mask
finished
=
flag
# infs = paddle.ones_like(score) * -float('inf')
# score = paddle.where(unfinished, infs, score)
# score = paddle.where(finished, paddle.zeros_like(score), score)
score
.
masked_fill_
(
unfinished
,
-
float
(
'inf'
))
score
.
masked_fill_
(
finished
,
0
)
return
score
def
mask_finished_preds
(
pred
:
paddle
.
Tensor
,
flag
:
paddle
.
Tensor
,
eos
:
int
)
->
paddle
.
Tensor
:
"""
If a sequence is finished, all of its branch should be <eos>
Args:
pred (paddle.Tensor): A int array with shape
(batch_size * beam_size, beam_size).
flag (paddle.Tensor): A bool array with shape
(batch_size * beam_size, 1).
Returns:
paddle.Tensor: (batch_size * beam_size).
"""
beam_size
=
pred
.
size
(
-
1
)
finished
=
flag
.
repeat
([
1
,
beam_size
])
return
pred
.
masked_fill_
(
finished
,
eos
)
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录