Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
043127b6
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 2 年 前同步成功
通知
210
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
043127b6
编写于
6月 29, 2021
作者:
H
Haoxin Ma
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
revise collator
上级
1ec93dbd
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
29 addition
and
21 deletion
+29
-21
deepspeech/frontend/augmentor/augmentation.py
deepspeech/frontend/augmentor/augmentation.py
+3
-3
deepspeech/frontend/augmentor/shift_perturb.py
deepspeech/frontend/augmentor/shift_perturb.py
+9
-9
deepspeech/frontend/augmentor/spec_augment.py
deepspeech/frontend/augmentor/spec_augment.py
+1
-1
deepspeech/io/collator.py
deepspeech/io/collator.py
+14
-7
examples/aishell/s0/conf/deepspeech2.yaml
examples/aishell/s0/conf/deepspeech2.yaml
+2
-1
未找到文件。
deepspeech/frontend/augmentor/augmentation.py
浏览文件 @
043127b6
...
...
@@ -104,7 +104,7 @@ class AugmentationPipeline():
for
augmentor
,
rate
in
zip
(
self
.
_augmentors
,
self
.
_rates
):
augmentor
.
randomize_parameters
()
def
randomize_parameters_feature_transform
(
self
,
audio
):
def
randomize_parameters_feature_transform
(
self
,
n_frames
,
n_bins
):
"""Run the pre-processing pipeline for data augmentation.
Note that this is an in-place transformation.
...
...
@@ -112,8 +112,8 @@ class AugmentationPipeline():
:param audio_segment: Audio segment to process.
:type audio_segment: AudioSegmenet|SpeechSegment
"""
for
augmentor
,
rate
in
zip
(
self
.
_augmentors
,
self
.
_rates
):
augmentor
.
randomize_parameters
(
audio
)
for
augmentor
,
rate
in
zip
(
self
.
_
spec_
augmentors
,
self
.
_rates
):
augmentor
.
randomize_parameters
(
n_frames
,
n_bins
)
def
apply_audio_transform
(
self
,
audio_segment
):
"""Run the pre-processing pipeline for data augmentation.
...
...
deepspeech/frontend/augmentor/shift_perturb.py
浏览文件 @
043127b6
...
...
@@ -37,17 +37,17 @@ class ShiftPerturbAugmentor(AugmentorBase):
def
apply
(
self
,
audio_segment
):
audio_segment
.
shift
(
self
.
shift_ms
)
def
transform_audio
(
self
,
audio_segment
,
single
):
"""Shift audio.
#
def transform_audio(self, audio_segment, single):
#
"""Shift audio.
Note that this is an in-place transformation.
#
Note that this is an in-place transformation.
:param audio_segment: Audio segment to add effects to.
:type audio_segment: AudioSegmenet|SpeechSegment
"""
if
(
single
):
self
.
randomize_parameters
()
self
.
apply
(
audio_segment
)
#
:param audio_segment: Audio segment to add effects to.
#
:type audio_segment: AudioSegmenet|SpeechSegment
#
"""
#
if(single):
#
self.randomize_parameters()
#
self.apply(audio_segment)
# def transform_audio(self, audio_segment):
...
...
deepspeech/frontend/augmentor/spec_augment.py
浏览文件 @
043127b6
...
...
@@ -124,7 +124,7 @@ class SpecAugmentor(AugmentorBase):
def
time_warp
(
xs
,
W
=
40
):
raise
NotImplementedError
def
randomize_parameters
(
self
,
n_frame
,
n_bins
):
def
randomize_parameters
(
self
,
n_frame
s
,
n_bins
):
# n_bins = xs.shape[0]
# n_frames = xs.shape[1]
...
...
deepspeech/io/collator.py
浏览文件 @
043127b6
...
...
@@ -110,7 +110,8 @@ class SpeechCollator():
use_dB_normalization
=
config
.
collator
.
use_dB_normalization
,
target_dB
=
config
.
collator
.
target_dB
,
dither
=
config
.
collator
.
dither
,
keep_transcription_text
=
config
.
collator
.
keep_transcription_text
)
keep_transcription_text
=
config
.
collator
.
keep_transcription_text
,
randomize_each_batch
=
config
.
collator
.
randomize_each_batch
)
return
speech_collator
def
__init__
(
...
...
@@ -132,7 +133,8 @@ class SpeechCollator():
use_dB_normalization
=
True
,
target_dB
=-
20
,
dither
=
1.0
,
keep_transcription_text
=
True
):
keep_transcription_text
=
True
,
randomize_each_batch
=
False
):
"""SpeechCollator Collator
Args:
...
...
@@ -160,6 +162,7 @@ class SpeechCollator():
a user-defined shape) within one batch.
"""
self
.
_keep_transcription_text
=
keep_transcription_text
self
.
_randomize_each_batch
=
randomize_each_batch
self
.
_local_data
=
TarLocalData
(
tar2info
=
{},
tar2object
=
{})
self
.
_augmentation_pipeline
=
AugmentationPipeline
(
...
...
@@ -170,6 +173,7 @@ class SpeechCollator():
self
.
_stride_ms
=
stride_ms
self
.
_target_sample_rate
=
target_sample_rate
self
.
_speech_featurizer
=
SpeechFeaturizer
(
unit_type
=
unit_type
,
...
...
@@ -224,10 +228,10 @@ class SpeechCollator():
return
speech_segment
def
randomize_audio_parameters
(
self
):
self
.
_augmentation_pipeline
.
andomize_parameters_audio_transform
()
self
.
_augmentation_pipeline
.
r
andomize_parameters_audio_transform
()
def
randomize_feature_parameters
(
self
,
n_
bins
,
n_frame
s
):
self
.
_augmentation_pipeline
.
andomize_parameters_feature_transform
(
n_bins
,
n_frame
s
)
def
randomize_feature_parameters
(
self
,
n_
frames
,
n_bin
s
):
self
.
_augmentation_pipeline
.
randomize_parameters_feature_transform
(
n_frames
,
n_bin
s
)
def
process_feature_and_transform
(
self
,
audio_file
,
transcript
):
"""Load, augment, featurize and normalize for speech data.
...
...
@@ -317,12 +321,15 @@ class SpeechCollator():
# print(len(batch))
self
.
randomize_audio_parameters
()
for
utt
,
audio
,
text
in
batch
:
if
not
self
.
config
.
randomize_each_batch
:
if
not
self
.
_
randomize_each_batch
:
self
.
randomize_audio_parameters
()
audio
,
text
=
self
.
process_feature_and_transform
(
audio
,
text
)
#utt
utts
.
append
(
utt
)
# audio
# print("---debug---")
# print(audio.shape)
audio
=
audio
.
T
audios
.
append
(
audio
)
# [T, D]
audio_lens
.
append
(
audio
.
shape
[
0
])
# text
...
...
@@ -350,7 +357,7 @@ class SpeechCollator():
n_bins
=
padded_audios
.
shape
[
2
]
self
.
randomize_feature_parameters
(
min
(
audio_lens
),
n_bins
)
for
i
in
range
(
len
(
padded_audios
)):
if
not
self
.
config
.
randomize_each_batch
:
if
not
self
.
_
randomize_each_batch
:
self
.
randomize_feature_parameters
(
n_bins
,
audio_lens
[
i
])
padded_audios
[
i
]
=
self
.
_augmentation_pipeline
.
apply_feature_transform
(
padded_audios
[
i
])
...
...
examples/aishell/s0/conf/deepspeech2.yaml
浏览文件 @
043127b6
...
...
@@ -11,7 +11,8 @@ data:
max_output_input_ratio
:
.inf
collator
:
batch_size
:
64
# one gpu
batch_size
:
32
#64 # one gpu
randomize_each_batch
:
False
mean_std_filepath
:
data/mean_std.json
unit_type
:
char
vocab_filepath
:
data/vocab.txt
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录