Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
d9a38640
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 1 年 前同步成功
通知
207
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
未验证
提交
d9a38640
编写于
8月 20, 2021
作者:
H
Hui Zhang
提交者:
GitHub
8月 20, 2021
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #776 from PaddlePaddle/aug
support replace with mean by aug
上级
9fbcc66a
50f10f37
变更
20
隐藏空白更改
内联
并排
Showing
20 changed file
with
66 addition
and
72 deletion
+66
-72
deepspeech/__init__.py
deepspeech/__init__.py
+1
-40
deepspeech/frontend/augmentor/impulse_response.py
deepspeech/frontend/augmentor/impulse_response.py
+1
-1
deepspeech/frontend/augmentor/noise_perturb.py
deepspeech/frontend/augmentor/noise_perturb.py
+1
-1
deepspeech/frontend/augmentor/online_bayesian_normalization.py
...peech/frontend/augmentor/online_bayesian_normalization.py
+1
-1
deepspeech/frontend/augmentor/resample.py
deepspeech/frontend/augmentor/resample.py
+1
-1
deepspeech/frontend/augmentor/shift_perturb.py
deepspeech/frontend/augmentor/shift_perturb.py
+1
-1
deepspeech/frontend/augmentor/spec_augment.py
deepspeech/frontend/augmentor/spec_augment.py
+15
-6
deepspeech/frontend/augmentor/speed_perturb.py
deepspeech/frontend/augmentor/speed_perturb.py
+1
-1
deepspeech/frontend/augmentor/volume_perturb.py
deepspeech/frontend/augmentor/volume_perturb.py
+1
-1
examples/aishell/s0/conf/augmentation.json
examples/aishell/s0/conf/augmentation.json
+2
-1
examples/aishell/s1/conf/augmentation.json
examples/aishell/s1/conf/augmentation.json
+2
-1
examples/aug_conf/augmentation.json
examples/aug_conf/augmentation.json
+0
-10
examples/augmentation/augmentation.json
examples/augmentation/augmentation.json
+2
-1
examples/callcenter/s1/conf/augmentation.json
examples/callcenter/s1/conf/augmentation.json
+2
-1
examples/librispeech/s0/conf/augmentation.json
examples/librispeech/s0/conf/augmentation.json
+2
-1
examples/librispeech/s1/conf/augmentation.json
examples/librispeech/s1/conf/augmentation.json
+2
-1
examples/librispeech/s2/conf/augmentation.json
examples/librispeech/s2/conf/augmentation.json
+2
-1
examples/timit/s1/conf/augmentation.json
examples/timit/s1/conf/augmentation.json
+2
-1
examples/tiny/s0/conf/augmentation.json
examples/tiny/s0/conf/augmentation.json
+25
-0
examples/tiny/s1/conf/augmentation.json
examples/tiny/s1/conf/augmentation.json
+2
-1
未找到文件。
deepspeech/__init__.py
浏览文件 @
d9a38640
...
...
@@ -352,45 +352,6 @@ if not hasattr(paddle.Tensor, 'tolist'):
"register user tolist to paddle.Tensor, remove this when fixed!"
)
setattr
(
paddle
.
Tensor
,
'tolist'
,
tolist
)
########### hcak paddle.nn.functional #############
def
glu
(
x
:
paddle
.
Tensor
,
axis
=-
1
)
->
paddle
.
Tensor
:
"""The gated linear unit (GLU) activation."""
a
,
b
=
x
.
split
(
2
,
axis
=
axis
)
act_b
=
F
.
sigmoid
(
b
)
return
a
*
act_b
if
not
hasattr
(
paddle
.
nn
.
functional
,
'glu'
):
logger
.
warn
(
"register user glu to paddle.nn.functional, remove this when fixed!"
)
setattr
(
paddle
.
nn
.
functional
,
'glu'
,
glu
)
# def softplus(x):
# """Softplus function."""
# if hasattr(paddle.nn.functional, 'softplus'):
# #return paddle.nn.functional.softplus(x.float()).type_as(x)
# return paddle.nn.functional.softplus(x)
# else:
# raise NotImplementedError
# def gelu_accurate(x):
# """Gaussian Error Linear Units (GELU) activation."""
# # [reference] https://github.com/pytorch/fairseq/blob/e75cff5f2c1d62f12dc911e0bf420025eb1a4e33/fairseq/modules/gelu.py
# if not hasattr(gelu_accurate, "_a"):
# gelu_accurate._a = math.sqrt(2 / math.pi)
# return 0.5 * x * (1 + paddle.tanh(gelu_accurate._a *
# (x + 0.044715 * paddle.pow(x, 3))))
# def gelu(x):
# """Gaussian Error Linear Units (GELU) activation."""
# if hasattr(nn.functional, 'gelu'):
# #return nn.functional.gelu(x.float()).type_as(x)
# return nn.functional.gelu(x)
# else:
# return x * 0.5 * (1.0 + paddle.erf(x / math.sqrt(2.0)))
########### hcak paddle.nn #############
class
GLU
(
nn
.
Layer
):
...
...
@@ -401,7 +362,7 @@ class GLU(nn.Layer):
self
.
dim
=
dim
def
forward
(
self
,
xs
):
return
glu
(
xs
,
dim
=
self
.
dim
)
return
F
.
glu
(
xs
,
dim
=
self
.
dim
)
if
not
hasattr
(
paddle
.
nn
,
'GLU'
):
...
...
deepspeech/frontend/augmentor/impulse_response.py
浏览文件 @
d9a38640
...
...
@@ -32,7 +32,7 @@ class ImpulseResponseAugmentor(AugmentorBase):
def
__call__
(
self
,
x
,
uttid
=
None
,
train
=
True
):
if
not
train
:
return
return
x
self
.
transform_audio
(
x
)
return
x
...
...
deepspeech/frontend/augmentor/noise_perturb.py
浏览文件 @
d9a38640
...
...
@@ -38,7 +38,7 @@ class NoisePerturbAugmentor(AugmentorBase):
def
__call__
(
self
,
x
,
uttid
=
None
,
train
=
True
):
if
not
train
:
return
return
x
self
.
transform_audio
(
x
)
return
x
...
...
deepspeech/frontend/augmentor/online_bayesian_normalization.py
浏览文件 @
d9a38640
...
...
@@ -46,7 +46,7 @@ class OnlineBayesianNormalizationAugmentor(AugmentorBase):
def
__call__
(
self
,
x
,
uttid
=
None
,
train
=
True
):
if
not
train
:
return
return
x
self
.
transform_audio
(
x
)
return
x
...
...
deepspeech/frontend/augmentor/resample.py
浏览文件 @
d9a38640
...
...
@@ -33,7 +33,7 @@ class ResampleAugmentor(AugmentorBase):
def
__call__
(
self
,
x
,
uttid
=
None
,
train
=
True
):
if
not
train
:
return
return
x
self
.
transform_audio
(
x
)
return
x
...
...
deepspeech/frontend/augmentor/shift_perturb.py
浏览文件 @
d9a38640
...
...
@@ -33,7 +33,7 @@ class ShiftPerturbAugmentor(AugmentorBase):
def
__call__
(
self
,
x
,
uttid
=
None
,
train
=
True
):
if
not
train
:
return
return
x
self
.
transform_audio
(
x
)
return
x
...
...
deepspeech/frontend/augmentor/spec_augment.py
浏览文件 @
d9a38640
...
...
@@ -41,7 +41,8 @@ class SpecAugmentor(AugmentorBase):
W
=
40
,
adaptive_number_ratio
=
0
,
adaptive_size_ratio
=
0
,
max_n_time_masks
=
20
):
max_n_time_masks
=
20
,
replace_with_zero
=
True
):
"""SpecAugment class.
Args:
rng (random.Random): random generator object.
...
...
@@ -54,9 +55,11 @@ class SpecAugmentor(AugmentorBase):
adaptive_number_ratio (float): adaptive multiplicity ratio for time masking
adaptive_size_ratio (float): adaptive size ratio for time masking
max_n_time_masks (int): maximum number of time masking
replace_with_zero (bool): pad zero on mask if true else use mean
"""
super
().
__init__
()
self
.
_rng
=
rng
self
.
replace_with_zero
=
replace_with_zero
self
.
W
=
W
self
.
F
=
F
...
...
@@ -124,15 +127,18 @@ class SpecAugmentor(AugmentorBase):
return
f
"specaug: F-
{
F
}
, T-
{
T
}
, F-n-
{
n_freq_masks
}
, T-n-
{
n_time_masks
}
"
def
time_warp
(
xs
,
W
=
40
):
r
aise
NotImplementedError
r
eturn
xs
def
mask_freq
(
self
,
xs
,
replace_with_zero
=
False
):
n_bins
=
xs
.
shape
[
0
]
for
i
in
range
(
0
,
self
.
n_freq_masks
):
f
=
int
(
self
.
_rng
.
uniform
(
low
=
0
,
high
=
self
.
F
))
f_0
=
int
(
self
.
_rng
.
uniform
(
low
=
0
,
high
=
n_bins
-
f
))
xs
[
f_0
:
f_0
+
f
,
:]
=
0
assert
f_0
<=
f_0
+
f
if
self
.
replace_with_zero
:
xs
[
f_0
:
f_0
+
f
,
:]
=
0
else
:
xs
[
f_0
:
f_0
+
f
,
:]
=
xs
.
mean
()
self
.
_freq_mask
=
(
f_0
,
f_0
+
f
)
return
xs
...
...
@@ -154,14 +160,17 @@ class SpecAugmentor(AugmentorBase):
t
=
int
(
self
.
_rng
.
uniform
(
low
=
0
,
high
=
T
))
t
=
min
(
t
,
int
(
n_frames
*
self
.
p
))
t_0
=
int
(
self
.
_rng
.
uniform
(
low
=
0
,
high
=
n_frames
-
t
))
xs
[:,
t_0
:
t_0
+
t
]
=
0
assert
t_0
<=
t_0
+
t
if
self
.
replace_with_zero
:
xs
[:,
t_0
:
t_0
+
t
]
=
0
else
:
xs
[:,
t_0
:
t_0
+
t
]
=
xs
.
mean
()
self
.
_time_mask
=
(
t_0
,
t_0
+
t
)
return
xs
def
__call__
(
self
,
x
,
train
=
True
):
if
not
train
:
return
return
x
return
self
.
transform_feature
(
x
)
def
transform_feature
(
self
,
xs
:
np
.
ndarray
):
...
...
@@ -171,7 +180,7 @@ class SpecAugmentor(AugmentorBase):
Returns:
xs (FloatTensor): `[F, T]`
"""
#
xs = self.time_warp(xs)
xs
=
self
.
time_warp
(
xs
)
xs
=
self
.
mask_freq
(
xs
)
xs
=
self
.
mask_time
(
xs
)
return
xs
deepspeech/frontend/augmentor/speed_perturb.py
浏览文件 @
d9a38640
...
...
@@ -81,7 +81,7 @@ class SpeedPerturbAugmentor(AugmentorBase):
def
__call__
(
self
,
x
,
uttid
=
None
,
train
=
True
):
if
not
train
:
return
return
x
self
.
transform_audio
(
x
)
return
x
...
...
deepspeech/frontend/augmentor/volume_perturb.py
浏览文件 @
d9a38640
...
...
@@ -39,7 +39,7 @@ class VolumePerturbAugmentor(AugmentorBase):
def
__call__
(
self
,
x
,
uttid
=
None
,
train
=
True
):
if
not
train
:
return
return
x
self
.
transform_audio
(
x
)
return
x
...
...
examples/aishell/s0/conf/augmentation.json
浏览文件 @
d9a38640
...
...
@@ -27,7 +27,8 @@
"W"
:
80
,
"adaptive_number_ratio"
:
0
,
"adaptive_size_ratio"
:
0
,
"max_n_time_masks"
:
20
"max_n_time_masks"
:
20
,
"replace_with_zero"
:
true
},
"prob"
:
1.0
}
...
...
examples/aishell/s1/conf/augmentation.json
浏览文件 @
d9a38640
...
...
@@ -27,7 +27,8 @@
"W"
:
80
,
"adaptive_number_ratio"
:
0
,
"adaptive_size_ratio"
:
0
,
"max_n_time_masks"
:
20
"max_n_time_masks"
:
20
,
"replace_with_zero"
:
true
},
"prob"
:
1.0
}
...
...
examples/aug_conf/augmentation.json
已删除
100644 → 0
浏览文件 @
9fbcc66a
[
{
"type"
:
"shift"
,
"params"
:
{
"min_shift_ms"
:
-5
,
"max_shift_ms"
:
5
},
"prob"
:
1.0
}
]
examples/aug
_conf/augmentation.example
.json
→
examples/aug
mentation/augmentation
.json
浏览文件 @
d9a38640
...
...
@@ -60,7 +60,8 @@
"W"
:
80
,
"adaptive_number_ratio"
:
0
,
"adaptive_size_ratio"
:
0
,
"max_n_time_masks"
:
20
"max_n_time_masks"
:
20
,
"replace_with_zero"
:
true
},
"prob"
:
0.0
}
...
...
examples/callcenter/s1/conf/augmentation.json
浏览文件 @
d9a38640
...
...
@@ -27,7 +27,8 @@
"W"
:
80
,
"adaptive_number_ratio"
:
0
,
"adaptive_size_ratio"
:
0
,
"max_n_time_masks"
:
20
"max_n_time_masks"
:
20
,
"replace_with_zero"
:
true
},
"prob"
:
1.0
}
...
...
examples/librispeech/s0/conf/augmentation.json
浏览文件 @
d9a38640
...
...
@@ -27,7 +27,8 @@
"W"
:
80
,
"adaptive_number_ratio"
:
0
,
"adaptive_size_ratio"
:
0
,
"max_n_time_masks"
:
20
"max_n_time_masks"
:
20
,
"replace_with_zero"
:
true
},
"prob"
:
1.0
}
...
...
examples/librispeech/s1/conf/augmentation.json
浏览文件 @
d9a38640
...
...
@@ -27,7 +27,8 @@
"W"
:
80
,
"adaptive_number_ratio"
:
0
,
"adaptive_size_ratio"
:
0
,
"max_n_time_masks"
:
20
"max_n_time_masks"
:
20
,
"replace_with_zero"
:
true
},
"prob"
:
1.0
}
...
...
examples/librispeech/s2/conf/augmentation.json
浏览文件 @
d9a38640
...
...
@@ -10,7 +10,8 @@
"W"
:
80
,
"adaptive_number_ratio"
:
0
,
"adaptive_size_ratio"
:
0
,
"max_n_time_masks"
:
20
"max_n_time_masks"
:
20
,
"replace_with_zero"
:
true
},
"prob"
:
1.0
}
...
...
examples/timit/s1/conf/augmentation.json
浏览文件 @
d9a38640
...
...
@@ -27,7 +27,8 @@
"W"
:
80
,
"adaptive_number_ratio"
:
0
,
"adaptive_size_ratio"
:
0
,
"max_n_time_masks"
:
20
"max_n_time_masks"
:
20
,
"replace_with_zero"
:
true
},
"prob"
:
1.0
}
...
...
examples/tiny/s0/conf/augmentation.json
浏览文件 @
d9a38640
[
{
"type"
:
"speed"
,
"params"
:
{
"min_speed_rate"
:
0.9
,
"max_speed_rate"
:
1.1
,
"num_rates"
:
3
},
"prob"
:
1.0
},
{
"type"
:
"shift"
,
"params"
:
{
...
...
@@ -6,5 +15,21 @@
"max_shift_ms"
:
5
},
"prob"
:
1.0
},
{
"type"
:
"specaug"
,
"params"
:
{
"F"
:
10
,
"T"
:
50
,
"n_freq_masks"
:
2
,
"n_time_masks"
:
2
,
"p"
:
1.0
,
"W"
:
80
,
"adaptive_number_ratio"
:
0
,
"adaptive_size_ratio"
:
0
,
"max_n_time_masks"
:
20
,
"replace_with_zero"
:
true
},
"prob"
:
1.0
}
]
examples/tiny/s1/conf/augmentation.json
浏览文件 @
d9a38640
...
...
@@ -27,7 +27,8 @@
"W"
:
80
,
"adaptive_number_ratio"
:
0
,
"adaptive_size_ratio"
:
0
,
"max_n_time_masks"
:
20
"max_n_time_masks"
:
20
,
"replace_with_zero"
:
true
},
"prob"
:
1.0
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录