Update noise and impulse augmentor according to code review.

a52e52cf · Xinghai Sun · 93c1aaba · a52e52cf · a52e52cf · a52e52cf
8 changed file
--- a/deep_speech_2/conf/augmentation.config
+++ b/deep_speech_2/conf/augmentation.config
+[
+    {
+        "type": "shift",
+        "params": {"min_shift_ms": -5,
+                   "max_shift_ms": 5},
+        "prob": 1.0
+    }
+]
--- a/deep_speech_2/augmentation.config
+++ b/deep_speech_2/augmentation.config
@@ -3,14 +3,19 @@
        "type": "noise",
        "params": {"min_snr_dB": 40,
                   "max_snr_dB": 50,
-                   "noise_manifest": "datasets/manifest.noise"},
-        "prob": 0.0
+                   "noise_manifest_path": "datasets/manifest.noise"},
+        "prob": 0.6
+    },
+    {
+        "type": "impulse",
+        "params": {"impulse_manifest_path": "datasets/manifest.impulse"},
+        "prob": 0.5
    },
    {
        "type": "speed",
        "params": {"min_speed_rate": 0.95,
                   "max_speed_rate": 1.05},
-        "prob": 0.0
+        "prob": 0.5
    },
    {
        "type": "shift",
@@ -24,4 +29,11 @@
                   "max_gain_dBFS": 10},
        "prob": 0.0
    },
+    {
+        "type": "bayesian_normal",
+        "params": {"target_db": -20,
+                   "prior_db": -20,
+                   "prior_samples": 100},
+        "prob": 0.0
+    }
 ]
--- a/deep_speech_2/data_utils/augmentor/augmentation.py
+++ b/deep_speech_2/data_utils/augmentor/augmentation.py
@@ -30,7 +30,7 @@ class AugmentationPipeline(object):
                "type": "noise",
                "params": {"min_snr_dB": 10,
                           "max_snr_dB": 20,
-                           "noise_manifest": "datasets/manifest.noise"},
+                           "noise_manifest_path": "datasets/manifest.noise"},
                "prob": 0.0
            },
            {

--- a/deep_speech_2/data_utils/augmentor/impulse_response.py
+++ b/deep_speech_2/data_utils/augmentor/impulse_response.py
@@ -13,13 +13,14 @@ class ImpulseResponseAugmentor(AugmentorBase):
    
    :param rng: Random generator object.
    :type rng: random.Random
-    :param impulse_manifest: Manifest path for impulse audio data.
-    :type impulse_manifest: basestring 
+    :param impulse_manifest_path: Manifest path for impulse audio data.
+    :type impulse_manifest_path: basestring 
    """

-    def __init__(self, rng, impulse_manifest):
+    def __init__(self, rng, impulse_manifest_path):
        self._rng = rng
-        self._manifest = utils.read_manifest(manifest_path=impulse_manifest)
+        self._impulse_manifest = utils.read_manifest(
+            manifest_path=impulse_manifest_path)

    def transform_audio(self, audio_segment):
        """Add impulse response effect.
@@ -29,6 +30,6 @@ class ImpulseResponseAugmentor(AugmentorBase):
        :param audio_segment: Audio segment to add effects to.
        :type audio_segment: AudioSegmenet|SpeechSegment
        """
-        noise_json = self._rng.sample(self._manifest, 1)[0]
-        noise_segment = AudioSegment.from_file(noise_json['audio_filepath'])
-        audio_segment.convolve(noise_segment, allow_resample=True)
+        impulse_json = self._rng.sample(self._impulse_manifest, 1)[0]
+        impulse_segment = AudioSegment.from_file(impulse_json['audio_filepath'])
+        audio_segment.convolve(impulse_segment, allow_resample=True)
--- a/deep_speech_2/data_utils/augmentor/noise_perturb.py
+++ b/deep_speech_2/data_utils/augmentor/noise_perturb.py
@@ -17,15 +17,16 @@ class NoisePerturbAugmentor(AugmentorBase):
    :type min_snr_dB: float
    :param max_snr_dB: Maximal signal noise ratio, in decibels.
    :type max_snr_dB: float
-    :param noise_manifest: Manifest path for noise audio data.
-    :type noise_manifest: basestring 
+    :param noise_manifest_path: Manifest path for noise audio data.
+    :type noise_manifest_path: basestring 
    """

-    def __init__(self, rng, min_snr_dB, max_snr_dB, noise_manifest):
+    def __init__(self, rng, min_snr_dB, max_snr_dB, noise_manifest_path):
        self._min_snr_dB = min_snr_dB
        self._max_snr_dB = max_snr_dB
        self._rng = rng
-        self._manifest = utils.read_manifest(manifest_path=noise_manifest)
+        self._noise_manifest = utils.read_manifest(
+            manifest_path=noise_manifest_path)

    def transform_audio(self, audio_segment):
        """Add background noise audio.
@@ -35,7 +36,7 @@ class NoisePerturbAugmentor(AugmentorBase):
        :param audio_segment: Audio segment to add effects to.
        :type audio_segment: AudioSegmenet|SpeechSegment
        """
-        noise_json = self._rng.sample(self._manifest, 1)[0]
+        noise_json = self._rng.sample(self._noise_manifest, 1)[0]
        if noise_json['duration'] < audio_segment.duration:
            raise RuntimeError("The duration of sampled noise audio is smaller "
                               "than the audio segment to add effects to.")

--- a/deep_speech_2/datasets/run_all.sh
+++ b/deep_speech_2/datasets/run_all.sh
@@ -6,17 +6,8 @@ if [ $? -ne 0 ]; then
 fi
 cd -

-cd noise 
-python chime3_background.py
-if [ $? -ne 0 ]; then
-    echo "Prepare CHiME3 background noise failed. Terminated."
-    exit 1
-fi
-cd -
-
 cat librispeech/manifest.train* | shuf > manifest.train
 cat librispeech/manifest.dev-clean > manifest.dev
 cat librispeech/manifest.test-clean > manifest.test
-cat noise/manifest.* > manifest.noise

 echo "All done."
--- a/deep_speech_2/datasets/run_noise.sh
+++ b/deep_speech_2/datasets/run_noise.sh
+cd noise 
+python chime3_background.py
+if [ $? -ne 0 ]; then
+    echo "Prepare CHiME3 background noise failed. Terminated."
+    exit 1
+fi
+cd -
+
+cat noise/manifest.* > manifest.noise
+echo "All done."
--- a/deep_speech_2/train.py
+++ b/deep_speech_2/train.py
@@ -123,7 +123,7 @@ parser.add_argument(
    help="Directory for saving models. (default: %(default)s)")
 parser.add_argument(
    "--augmentation_config",
-    default=open('augmentation.config', 'r').read(),
+    default=open('conf/augmentation.config', 'r').read(),
    type=str,
    help="Augmentation configuration in json-format. "
    "(default: %(default)s)")