Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
models
提交
e64bd000
M
models
项目概览
PaddlePaddle
/
models
大约 1 年 前同步成功
通知
222
Star
6828
Fork
2962
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
602
列表
看板
标记
里程碑
合并请求
255
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
models
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
602
Issue
602
列表
看板
标记
里程碑
合并请求
255
合并请求
255
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
e64bd000
编写于
6月 19, 2017
作者:
chrisxu2014
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add audio file
上级
1b7c7c61
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
49 addition
and
25 deletion
+49
-25
deep_speech_2/data_utils/audio.py
deep_speech_2/data_utils/audio.py
+44
-20
deep_speech_2/data_utils/speech.py
deep_speech_2/data_utils/speech.py
+5
-5
未找到文件。
deep_speech_2/data_utils/audio.py
浏览文件 @
e64bd000
...
...
@@ -9,6 +9,7 @@ import soundfile
import
scikits.samplerate
from
scipy
import
signal
import
random
import
copy
class
AudioSegment
(
object
):
...
...
@@ -87,9 +88,8 @@ class AudioSegment(object):
:return: Audio segment instance as concatenating results.
:rtype: AudioSegment
:raises ValueError: If the number of segments is zero, or if the
sample_rate of any two segment does not match.
:raises TypeError: If every item in segments is not AudioSegment
instance.
sample_rate of any segments does not match.
:raises TypeError: If any segment is not AudioSegment instance.
"""
# Perform basic sanity-checks.
if
len
(
segments
)
==
0
:
...
...
@@ -101,7 +101,7 @@ class AudioSegment(object):
"different sample rates"
)
if
type
(
seg
)
is
not
cls
:
raise
TypeError
(
"Only audio segments of the same type "
"
instance
can be concatenated."
)
"can be concatenated."
)
samples
=
np
.
concatenate
([
seg
.
samples
for
seg
in
segments
])
return
cls
(
samples
,
sample_rate
)
...
...
@@ -180,8 +180,7 @@ class AudioSegment(object):
@
classmethod
def
make_silence
(
cls
,
duration
,
sample_rate
):
"""Creates a silent audio segment of the given duration and
sample rate.
"""Creates a silent audio segment of the given duration and sample rate.
:param duration: Length of silence in seconds.
:type duration: float
...
...
@@ -193,15 +192,17 @@ class AudioSegment(object):
samples
=
np
.
zeros
(
int
(
duration
*
sample_rate
))
return
cls
(
samples
,
sample_rate
)
def
superimpose
d
(
self
,
other
):
def
superimpose
(
self
,
other
):
"""Add samples from another segment to those of this segment
(sample-wise addition, not segment concatenation).
Note that this is an in-place transformation.
:param other: Segment containing samples to be added in.
:type other: AudioSegments
:raise TypeError: If type of two segments don't match.
:raise ValueError: If the sample
_rate of two segments not equal, or if
the length
of segments don't match.
:raise ValueError: If the sample
rates of the two segments are not
equal, or if the lengths
of segments don't match.
"""
if
type
(
self
)
!=
type
(
other
):
raise
TypeError
(
"Cannot add segments of different types: %s "
...
...
@@ -215,7 +216,7 @@ class AudioSegment(object):
def
to_bytes
(
self
,
dtype
=
'float32'
):
"""Create a byte string containing the audio content.
:param dtype: Data type for export samples. Options: 'int16','int32',
:param dtype: Data type for export samples. Options: 'int16',
'int32',
'float32', 'float64'. Default is 'float32'.
:type dtype: str
:return: Byte string containing audio content.
...
...
@@ -362,16 +363,20 @@ class AudioSegment(object):
elif
sides
==
"both"
:
padded
=
cls
.
concatenate
(
silence
,
self
,
silence
)
else
:
raise
ValueError
(
"Unknown value for the
kwarg
%s"
%
sides
)
raise
ValueError
(
"Unknown value for the
sides
%s"
%
sides
)
self
.
_samples
=
padded
.
_samples
def
subsegment
(
self
,
start_sec
=
None
,
end_sec
=
None
):
"""Return new AudioSegment containing audio between given boundaries.
"""Cut the AudioSegment between given boundaries.
Note that this is an in-place transformation.
:param start_sec: Beginning of subsegment in seconds.
:type start_sec: float
:param end_sec: End of subsegment in seconds.
:type end_sec: float
:raise ValueError: If start_sec or end_sec is incorrectly set, e.g. out
of bounds in time.
"""
start_sec
=
0.0
if
start_sec
is
None
else
start_sec
end_sec
=
self
.
duration
if
end_sec
is
None
else
end_sec
...
...
@@ -379,19 +384,33 @@ class AudioSegment(object):
start_sec
=
self
.
duration
+
start_sec
if
end_sec
<
0.0
:
end_sec
=
self
.
duration
+
end_sec
if
start_sec
<
0.0
:
raise
ValueError
(
"The slice start position (%f s) is out of "
"bounds."
%
start_sec
)
if
end_sec
<
0.0
:
raise
ValueError
(
"The slice end position (%f s) is out of bounds."
%
end_sec
)
if
start_sec
>
end_sec
:
raise
ValueError
(
"The slice start position (%f s) is later than "
"the end position (%f s)."
%
(
start_sec
,
end_sec
))
if
end_sec
>
self
.
duration
:
raise
ValueError
(
"The slice end position (%f s) is out of bounds "
"(> %f s)"
%
(
end_sec
,
self
.
duration
))
start_sample
=
int
(
round
(
start_sec
*
self
.
_sample_rate
))
end_sample
=
int
(
round
(
end_sec
*
self
.
_sample_rate
))
self
.
_samples
=
self
.
_samples
[
start_sample
:
end_sample
]
def
random_subsegment
(
self
,
subsegment_length
,
rng
=
None
):
"""Return a random subsegment of a specified length in seconds.
"""Cut the specified length of the audiosegment randomly.
Note that this is an in-place transformation.
:param subsegment_length: Subsegment length in seconds.
:type subsegment_length: float
:param rng: Random number generator state.
:type rng: random.Random
:raises ValueError: If the length of subsegment greater than
origineal segemnt.
:raises ValueError: If the length of subsegment
is
greater than
the
origineal segemnt.
"""
rng
=
random
.
Random
()
if
rng
is
None
else
rng
if
subsegment_length
>
self
.
duration
:
...
...
@@ -401,7 +420,7 @@ class AudioSegment(object):
self
.
subsegment
(
start_time
,
start_time
+
subsegment_length
)
def
convolve
(
self
,
impulse_segment
,
allow_resample
=
False
):
"""Convolve this audio segment with the given impulse
_
segment.
"""Convolve this audio segment with the given impulse
segment.
Note that this is an in-place transformation.
...
...
@@ -428,6 +447,8 @@ class AudioSegment(object):
"""Convolve and normalize the resulting audio segment so that it
has the same average power as the input signal.
Note that this is an in-place transformation.
:param impulse_segment: Impulse response segments.
:type impulse_segment: AudioSegment
:param allow_resample: Indicates whether resampling is allowed when
...
...
@@ -445,10 +466,12 @@ class AudioSegment(object):
allow_downsampling
=
False
,
max_gain_db
=
300.0
,
rng
=
None
):
"""Add
s
the given noise segment at a specific signal-to-noise ratio.
"""Add the given noise segment at a specific signal-to-noise ratio.
If the noise segment is longer than this segment, a random subsegment
of matching length is sampled from it and used instead.
Note that this is an in-place transformation.
:param noise: Noise signal to add.
:type noise: AudioSegment
:param snr_dB: Signal-to-Noise Ratio, in decibels.
...
...
@@ -480,9 +503,10 @@ class AudioSegment(object):
" base signal (%f sec)."
%
(
noise
.
duration
,
self
.
duration
))
noise_gain_db
=
min
(
self
.
rms_db
-
noise
.
rms_db
-
snr_dB
,
max_gain_db
)
noise
.
random_subsegment
(
self
.
duration
,
rng
=
rng
)
noise
.
apply_gain
(
noise_gain_db
)
self
.
superimposed
(
noise
)
noise_new
=
copy
.
deepcopy
(
noise
)
noise_new
.
random_subsegment
(
self
.
duration
,
rng
=
rng
)
noise_new
.
apply_gain
(
noise_gain_db
)
self
.
superimpose
(
noise_new
)
@
property
def
samples
(
self
):
...
...
deep_speech_2/data_utils/speech.py
浏览文件 @
e64bd000
...
...
@@ -67,7 +67,8 @@ class SpeechSegment(AudioSegment):
@
classmethod
def
concatenate
(
cls
,
*
segments
):
"""Concatenate an arbitrary number of speech segments together.
"""Concatenate an arbitrary number of speech segments together, both
audio and transcript will be concatenated.
:param *segments: Input speech segments to be concatenated.
:type *segments: tuple of SpeechSegment
...
...
@@ -75,8 +76,7 @@ class SpeechSegment(AudioSegment):
:rtype: SpeechSegment
:raises ValueError: If the number of segments is zero, or if the
sample_rate of any two segments does not match.
:raises TypeError: If every item in segments is not SpeechSegment
instance.
:raises TypeError: If any segment is not SpeechSegment instance.
"""
if
len
(
segments
)
==
0
:
raise
ValueError
(
"No speech segments are given to concatenate."
)
...
...
@@ -94,7 +94,7 @@ class SpeechSegment(AudioSegment):
return
cls
(
samples
,
sample_rate
,
transcripts
)
@
classmethod
def
slice_from_file
(
cls
,
filepath
,
start
=
None
,
end
=
None
,
transcript
=
""
):
def
slice_from_file
(
cls
,
filepath
,
start
=
None
,
end
=
None
,
transcript
):
"""Loads a small section of an speech without having to load
the entire file into the memory which can be incredibly wasteful.
...
...
@@ -121,7 +121,7 @@ class SpeechSegment(AudioSegment):
@
classmethod
def
make_silence
(
cls
,
duration
,
sample_rate
):
"""Creates a silent speech segment of the given duration and
sample rate.
sample rate
, transcript will be an empty string
.
:param duration: Length of silence in seconds.
:type duration: float
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录