Add NoisePerturbAugmentor and CHiME3 data preparation.

98205955 · Xinghai Sun · d504e426 · 98205955 · 98205955 · 98205955
6 changed file
--- a/deep_speech_2/data_utils/augmentor/augmentation.py
+++ b/deep_speech_2/data_utils/augmentor/augmentation.py
@@ -8,6 +8,7 @@ import random
 from data_utils.augmentor.volume_perturb import VolumePerturbAugmentor
 from data_utils.augmentor.shift_perturb import ShiftPerturbAugmentor
 from data_utils.augmentor.speed_perturb import SpeedPerturbAugmentor
+from data_utils.augmentor.noise_perturb import NoisePerturbAugmentor
 from data_utils.augmentor.resample import ResampleAugmentor
 from data_utils.augmentor.online_bayesian_normalization import \
     OnlineBayesianNormalizationAugmentor
@@ -89,5 +90,7 @@ class AugmentationPipeline(object):
            return ResampleAugmentor(self._rng, **params)
        elif augmentor_type == "bayesian_normal":
            return OnlineBayesianNormalizationAugmentor(self._rng, **params)
+        elif augmentor_type == "noise":
+            return NoisePerturbAugmentor(self._rng, **params)
        else:
            raise ValueError("Unknown augmentor type [%s]." % augmentor_type)
--- a/deep_speech_2/data_utils/augmentor/noise_perturb.py
+++ b/deep_speech_2/data_utils/augmentor/noise_perturb.py
+"""Contains the noise perturb augmentation model."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from data_utils.augmentor.base import AugmentorBase
+from data_utils import utils
+from data_utils.speech import SpeechSegment
+
+
+class NoisePerturbAugmentor(AugmentorBase):
+    """Augmentation model for adding background noise.
+    
+    :param rng: Random generator object.
+    :type rng: random.Random
+    :param min_snr_dB: Minimal signal noise ratio, in decibels.
+    :type min_snr_dB: float
+    :param max_snr_dB: Maximal signal noise ratio, in decibels.
+    :type max_snr_dB: float
+    """
+
+    def __init__(self, rng, min_snr_dB, max_snr_dB, noise_manifest):
+        self._min_snr_dB = min_snr_dB
+        self._max_snr_dB = max_snr_dB
+        self._rng = rng
+        self._manifest = utils.read_manifest(manifest_path=noise_manifest)
+
+    def transform_audio(self, audio_segment):
+        """Add background noise audio.
+
+        Note that this is an in-place transformation.
+
+        :param audio_segment: Audio segment to add effects to.
+        :type audio_segment: AudioSegmenet|SpeechSegment
+        """
+        noise_json = self._rng.sample(self._manifest, 1)[0]
+        if noise_json['duration'] < audio_segment.duration:
+            raise RuntimeError("The duration of sampled noise audio is smaller "
+                               "than the audio segment to add effects to.")
+        diff_duration = noise_json['duration'] - audio_segment.duration
+        start = self._rng.uniform(0, diff_duration)
+        end = start + audio_segment.duration
+        noise_segment = SpeechSegment.slice_from_file(
+            noise_json['audio_filepath'], transcript="", start=start, end=end)
+        snr_dB = self._rng.uniform(self._min_snr_dB, self._max_snr_dB)
+        audio_segment.add_noise(
+            noise_segment, snr_dB, allow_downsampling=True, rng=self._rng)
--- a/deep_speech_2/data_utils/augmentor/online_bayesian_normalization.py
+++ b/deep_speech_2/data_utils/augmentor/online_bayesian_normalization.py
--- a/deep_speech_2/data_utils/augmentor/resample.py
+++ b/deep_speech_2/data_utils/augmentor/resample.py
--- a/deep_speech_2/datasets/noise/chime3_background.py
+++ b/deep_speech_2/datasets/noise/chime3_background.py
+"""Prepare CHiME3 background data.
+
+Download, unpack and create manifest files.
+Manifest file is a json-format file with each line containing the
+meta data (i.e. audio filepath, transcript and audio duration)
+of each audio file in the data set.
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import distutils.util
+import os
+import wget
+import zipfile
+import argparse
+import soundfile
+import json
+from paddle.v2.dataset.common import md5file
+
+DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset/speech')
+
+URL = "https://d4s.myairbridge.com/packagev2/AG0Y3DNBE5IWRRTV/?dlid=W19XG7T0NNHB027139H0EQ"
+MD5 = "c3ff512618d7a67d4f85566ea1bc39ec"
+
+parser = argparse.ArgumentParser(description=__doc__)
+parser.add_argument(
+    "--target_dir",
+    default=DATA_HOME + "/chime3_background",
+    type=str,
+    help="Directory to save the dataset. (default: %(default)s)")
+parser.add_argument(
+    "--manifest_filepath",
+    default="manifest.chime3.background",
+    type=str,
+    help="Filepath for output manifests. (default: %(default)s)")
+args = parser.parse_args()
+
+
+def download(url, md5sum, target_dir, filename=None):
+    """Download file from url to target_dir, and check md5sum."""
+    if filename == None:
+        filename = url.split("/")[-1]
+    if not os.path.exists(target_dir): os.makedirs(target_dir)
+    filepath = os.path.join(target_dir, filename)
+    if not (os.path.exists(filepath) and md5file(filepath) == md5sum):
+        print("Downloading %s ..." % url)
+        wget.download(url, target_dir)
+        print("\nMD5 Chesksum %s ..." % filepath)
+        if not md5file(filepath) == md5sum:
+            raise RuntimeError("MD5 checksum failed.")
+    else:
+        print("File exists, skip downloading. (%s)" % filepath)
+    return filepath
+
+
+def unpack(filepath, target_dir):
+    """Unpack the file to the target_dir."""
+    print("Unpacking %s ..." % filepath)
+    if filepath.endswith('.zip'):
+        zip = zipfile.ZipFile(filepath, 'r')
+        zip.extractall(target_dir)
+        zip.close()
+    elif filepath.endswith('.tar') or filepath.endswith('.tar.gz'):
+        tar = zipfile.open(filepath)
+        tar.extractall(target_dir)
+        tar.close()
+    else:
+        raise ValueError("File format is not supported for unpacking.")
+
+
+def create_manifest(data_dir, manifest_path):
+    """Create a manifest json file summarizing the data set, with each line
+    containing the meta data (i.e. audio filepath, transcription text, audio
+    duration) of each audio file within the data set.
+    """
+    print("Creating manifest %s ..." % manifest_path)
+    json_lines = []
+    for subfolder, _, filelist in sorted(os.walk(data_dir)):
+        for filename in filelist:
+            if filename.endswith('.wav'):
+                filepath = os.path.join(data_dir, subfolder, filename)
+                audio_data, samplerate = soundfile.read(filepath)
+                duration = float(len(audio_data)) / samplerate
+                json_lines.append(
+                    json.dumps({
+                        'audio_filepath': filepath,
+                        'duration': duration,
+                        'text': ''
+                    }))
+    with open(manifest_path, 'w') as out_file:
+        for line in json_lines:
+            out_file.write(line + '\n')
+
+
+def prepare_chime3(url, md5sum, target_dir, manifest_path):
+    """Download, unpack and create summmary manifest file."""
+    if not os.path.exists(os.path.join(target_dir, "CHiME3")):
+        # download
+        filepath = download(url, md5sum, target_dir,
+                            "myairbridge-AG0Y3DNBE5IWRRTV.zip")
+        # unpack
+        unpack(filepath, target_dir)
+        unpack(
+            os.path.join(target_dir, 'CHiME3_background_bus.zip'), target_dir)
+        unpack(
+            os.path.join(target_dir, 'CHiME3_background_caf.zip'), target_dir)
+        unpack(
+            os.path.join(target_dir, 'CHiME3_background_ped.zip'), target_dir)
+        unpack(
+            os.path.join(target_dir, 'CHiME3_background_str.zip'), target_dir)
+    else:
+        print("Skip downloading and unpacking. Data already exists in %s." %
+              target_dir)
+    # create manifest json file
+    create_manifest(target_dir, manifest_path)
+
+
+def main():
+    prepare_chime3(
+        url=URL,
+        md5sum=MD5,
+        target_dir=args.target_dir,
+        manifest_path=args.manifest_filepath)
+
+
+if __name__ == '__main__':
+    main()
--- a/deep_speech_2/datasets/run_all.sh
+++ b/deep_speech_2/datasets/run_all.sh
@@ -6,8 +6,17 @@ if [ $? -ne 0 ]; then
 fi
 cd -

+cd noise 
+python chime3_background.py
+if [ $? -ne 0 ]; then
+    echo "Prepare CHiME3 background noise failed. Terminated."
+    exit 1
+fi
+cd -
+
 cat librispeech/manifest.train* | shuf > manifest.train
 cat librispeech/manifest.dev-clean > manifest.dev
 cat librispeech/manifest.test-clean > manifest.test
+cat noise/manifest.* > manifest.noise

 echo "All done."