提交 ddb2bdc1 编写于 作者: chrisxu2014's avatar chrisxu2014

add audio file

上级 25ce7ebe
...@@ -9,6 +9,7 @@ import soundfile ...@@ -9,6 +9,7 @@ import soundfile
import scikits.samplerate import scikits.samplerate
from scipy import signal from scipy import signal
import random import random
import copy
class AudioSegment(object): class AudioSegment(object):
...@@ -87,9 +88,8 @@ class AudioSegment(object): ...@@ -87,9 +88,8 @@ class AudioSegment(object):
:return: Audio segment instance as concatenating results. :return: Audio segment instance as concatenating results.
:rtype: AudioSegment :rtype: AudioSegment
:raises ValueError: If the number of segments is zero, or if the :raises ValueError: If the number of segments is zero, or if the
sample_rate of any two segment does not match. sample_rate of any segments does not match.
:raises TypeError: If every item in segments is not AudioSegment :raises TypeError: If any segment is not AudioSegment instance.
instance.
""" """
# Perform basic sanity-checks. # Perform basic sanity-checks.
if len(segments) == 0: if len(segments) == 0:
...@@ -101,7 +101,7 @@ class AudioSegment(object): ...@@ -101,7 +101,7 @@ class AudioSegment(object):
"different sample rates") "different sample rates")
if type(seg) is not cls: if type(seg) is not cls:
raise TypeError("Only audio segments of the same type " raise TypeError("Only audio segments of the same type "
"instance can be concatenated.") "can be concatenated.")
samples = np.concatenate([seg.samples for seg in segments]) samples = np.concatenate([seg.samples for seg in segments])
return cls(samples, sample_rate) return cls(samples, sample_rate)
...@@ -180,8 +180,7 @@ class AudioSegment(object): ...@@ -180,8 +180,7 @@ class AudioSegment(object):
@classmethod @classmethod
def make_silence(cls, duration, sample_rate): def make_silence(cls, duration, sample_rate):
"""Creates a silent audio segment of the given duration and """Creates a silent audio segment of the given duration and sample rate.
sample rate.
:param duration: Length of silence in seconds. :param duration: Length of silence in seconds.
:type duration: float :type duration: float
...@@ -193,15 +192,17 @@ class AudioSegment(object): ...@@ -193,15 +192,17 @@ class AudioSegment(object):
samples = np.zeros(int(duration * sample_rate)) samples = np.zeros(int(duration * sample_rate))
return cls(samples, sample_rate) return cls(samples, sample_rate)
def superimposed(self, other): def superimpose(self, other):
"""Add samples from another segment to those of this segment """Add samples from another segment to those of this segment
(sample-wise addition, not segment concatenation). (sample-wise addition, not segment concatenation).
Note that this is an in-place transformation.
:param other: Segment containing samples to be added in. :param other: Segment containing samples to be added in.
:type other: AudioSegments :type other: AudioSegments
:raise TypeError: If type of two segments don't match. :raise TypeError: If type of two segments don't match.
:raise ValueError: If the sample_rate of two segments not equal, or if :raise ValueError: If the sample rates of the two segments are not
the length of segments don't match. equal, or if the lengths of segments don't match.
""" """
if type(self) != type(other): if type(self) != type(other):
raise TypeError("Cannot add segments of different types: %s " raise TypeError("Cannot add segments of different types: %s "
...@@ -215,7 +216,7 @@ class AudioSegment(object): ...@@ -215,7 +216,7 @@ class AudioSegment(object):
def to_bytes(self, dtype='float32'): def to_bytes(self, dtype='float32'):
"""Create a byte string containing the audio content. """Create a byte string containing the audio content.
:param dtype: Data type for export samples. Options: 'int16','int32', :param dtype: Data type for export samples. Options: 'int16', 'int32',
'float32', 'float64'. Default is 'float32'. 'float32', 'float64'. Default is 'float32'.
:type dtype: str :type dtype: str
:return: Byte string containing audio content. :return: Byte string containing audio content.
...@@ -362,16 +363,20 @@ class AudioSegment(object): ...@@ -362,16 +363,20 @@ class AudioSegment(object):
elif sides == "both": elif sides == "both":
padded = cls.concatenate(silence, self, silence) padded = cls.concatenate(silence, self, silence)
else: else:
raise ValueError("Unknown value for the kwarg %s" % sides) raise ValueError("Unknown value for the sides %s" % sides)
self._samples = padded._samples self._samples = padded._samples
def subsegment(self, start_sec=None, end_sec=None): def subsegment(self, start_sec=None, end_sec=None):
"""Return new AudioSegment containing audio between given boundaries. """Cut the AudioSegment between given boundaries.
Note that this is an in-place transformation.
:param start_sec: Beginning of subsegment in seconds. :param start_sec: Beginning of subsegment in seconds.
:type start_sec: float :type start_sec: float
:param end_sec: End of subsegment in seconds. :param end_sec: End of subsegment in seconds.
:type end_sec: float :type end_sec: float
:raise ValueError: If start_sec or end_sec is incorrectly set, e.g. out
of bounds in time.
""" """
start_sec = 0.0 if start_sec is None else start_sec start_sec = 0.0 if start_sec is None else start_sec
end_sec = self.duration if end_sec is None else end_sec end_sec = self.duration if end_sec is None else end_sec
...@@ -379,19 +384,33 @@ class AudioSegment(object): ...@@ -379,19 +384,33 @@ class AudioSegment(object):
start_sec = self.duration + start_sec start_sec = self.duration + start_sec
if end_sec < 0.0: if end_sec < 0.0:
end_sec = self.duration + end_sec end_sec = self.duration + end_sec
if start_sec < 0.0:
raise ValueError("The slice start position (%f s) is out of "
"bounds." % start_sec)
if end_sec < 0.0:
raise ValueError("The slice end position (%f s) is out of bounds." %
end_sec)
if start_sec > end_sec:
raise ValueError("The slice start position (%f s) is later than "
"the end position (%f s)." % (start_sec, end_sec))
if end_sec > self.duration:
raise ValueError("The slice end position (%f s) is out of bounds "
"(> %f s)" % (end_sec, self.duration))
start_sample = int(round(start_sec * self._sample_rate)) start_sample = int(round(start_sec * self._sample_rate))
end_sample = int(round(end_sec * self._sample_rate)) end_sample = int(round(end_sec * self._sample_rate))
self._samples = self._samples[start_sample:end_sample] self._samples = self._samples[start_sample:end_sample]
def random_subsegment(self, subsegment_length, rng=None): def random_subsegment(self, subsegment_length, rng=None):
"""Return a random subsegment of a specified length in seconds. """Cut the specified length of the audiosegment randomly.
Note that this is an in-place transformation.
:param subsegment_length: Subsegment length in seconds. :param subsegment_length: Subsegment length in seconds.
:type subsegment_length: float :type subsegment_length: float
:param rng: Random number generator state. :param rng: Random number generator state.
:type rng: random.Random :type rng: random.Random
:raises ValueError: If the length of subsegment greater than :raises ValueError: If the length of subsegment is greater than
origineal segemnt. the origineal segemnt.
""" """
rng = random.Random() if rng is None else rng rng = random.Random() if rng is None else rng
if subsegment_length > self.duration: if subsegment_length > self.duration:
...@@ -401,7 +420,7 @@ class AudioSegment(object): ...@@ -401,7 +420,7 @@ class AudioSegment(object):
self.subsegment(start_time, start_time + subsegment_length) self.subsegment(start_time, start_time + subsegment_length)
def convolve(self, impulse_segment, allow_resample=False): def convolve(self, impulse_segment, allow_resample=False):
"""Convolve this audio segment with the given impulse_segment. """Convolve this audio segment with the given impulse segment.
Note that this is an in-place transformation. Note that this is an in-place transformation.
...@@ -428,6 +447,8 @@ class AudioSegment(object): ...@@ -428,6 +447,8 @@ class AudioSegment(object):
"""Convolve and normalize the resulting audio segment so that it """Convolve and normalize the resulting audio segment so that it
has the same average power as the input signal. has the same average power as the input signal.
Note that this is an in-place transformation.
:param impulse_segment: Impulse response segments. :param impulse_segment: Impulse response segments.
:type impulse_segment: AudioSegment :type impulse_segment: AudioSegment
:param allow_resample: Indicates whether resampling is allowed when :param allow_resample: Indicates whether resampling is allowed when
...@@ -445,10 +466,12 @@ class AudioSegment(object): ...@@ -445,10 +466,12 @@ class AudioSegment(object):
allow_downsampling=False, allow_downsampling=False,
max_gain_db=300.0, max_gain_db=300.0,
rng=None): rng=None):
"""Adds the given noise segment at a specific signal-to-noise ratio. """Add the given noise segment at a specific signal-to-noise ratio.
If the noise segment is longer than this segment, a random subsegment If the noise segment is longer than this segment, a random subsegment
of matching length is sampled from it and used instead. of matching length is sampled from it and used instead.
Note that this is an in-place transformation.
:param noise: Noise signal to add. :param noise: Noise signal to add.
:type noise: AudioSegment :type noise: AudioSegment
:param snr_dB: Signal-to-Noise Ratio, in decibels. :param snr_dB: Signal-to-Noise Ratio, in decibels.
...@@ -480,9 +503,10 @@ class AudioSegment(object): ...@@ -480,9 +503,10 @@ class AudioSegment(object):
" base signal (%f sec)." % " base signal (%f sec)." %
(noise.duration, self.duration)) (noise.duration, self.duration))
noise_gain_db = min(self.rms_db - noise.rms_db - snr_dB, max_gain_db) noise_gain_db = min(self.rms_db - noise.rms_db - snr_dB, max_gain_db)
noise.random_subsegment(self.duration, rng=rng) noise_new = copy.deepcopy(noise)
noise.apply_gain(noise_gain_db) noise_new.random_subsegment(self.duration, rng=rng)
self.superimposed(noise) noise_new.apply_gain(noise_gain_db)
self.superimpose(noise_new)
@property @property
def samples(self): def samples(self):
......
...@@ -67,7 +67,8 @@ class SpeechSegment(AudioSegment): ...@@ -67,7 +67,8 @@ class SpeechSegment(AudioSegment):
@classmethod @classmethod
def concatenate(cls, *segments): def concatenate(cls, *segments):
"""Concatenate an arbitrary number of speech segments together. """Concatenate an arbitrary number of speech segments together, both
audio and transcript will be concatenated.
:param *segments: Input speech segments to be concatenated. :param *segments: Input speech segments to be concatenated.
:type *segments: tuple of SpeechSegment :type *segments: tuple of SpeechSegment
...@@ -75,8 +76,7 @@ class SpeechSegment(AudioSegment): ...@@ -75,8 +76,7 @@ class SpeechSegment(AudioSegment):
:rtype: SpeechSegment :rtype: SpeechSegment
:raises ValueError: If the number of segments is zero, or if the :raises ValueError: If the number of segments is zero, or if the
sample_rate of any two segments does not match. sample_rate of any two segments does not match.
:raises TypeError: If every item in segments is not SpeechSegment :raises TypeError: If any segment is not SpeechSegment instance.
instance.
""" """
if len(segments) == 0: if len(segments) == 0:
raise ValueError("No speech segments are given to concatenate.") raise ValueError("No speech segments are given to concatenate.")
...@@ -94,7 +94,7 @@ class SpeechSegment(AudioSegment): ...@@ -94,7 +94,7 @@ class SpeechSegment(AudioSegment):
return cls(samples, sample_rate, transcripts) return cls(samples, sample_rate, transcripts)
@classmethod @classmethod
def slice_from_file(cls, filepath, start=None, end=None, transcript=""): def slice_from_file(cls, filepath, start=None, end=None, transcript):
"""Loads a small section of an speech without having to load """Loads a small section of an speech without having to load
the entire file into the memory which can be incredibly wasteful. the entire file into the memory which can be incredibly wasteful.
...@@ -121,7 +121,7 @@ class SpeechSegment(AudioSegment): ...@@ -121,7 +121,7 @@ class SpeechSegment(AudioSegment):
@classmethod @classmethod
def make_silence(cls, duration, sample_rate): def make_silence(cls, duration, sample_rate):
"""Creates a silent speech segment of the given duration and """Creates a silent speech segment of the given duration and
sample rate. sample rate, transcript will be an empty string.
:param duration: Length of silence in seconds. :param duration: Length of silence in seconds.
:type duration: float :type duration: float
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册