diff --git a/conf/augmentation.config b/conf/augmentation.config new file mode 100644 index 0000000000000000000000000000000000000000..6c24da5497460d4bae9c9c4fecdbe96ab8da7532 --- /dev/null +++ b/conf/augmentation.config @@ -0,0 +1,8 @@ +[ + { + "type": "shift", + "params": {"min_shift_ms": -5, + "max_shift_ms": 5}, + "prob": 1.0 + } +] diff --git a/augmentation.config b/conf/augmentation.config.example similarity index 56% rename from augmentation.config rename to conf/augmentation.config.example index 263af58649942fcb542de94fb1409ca296d2aa79..21ed6ee10375a749f4c072389509db2020d9e9c9 100644 --- a/augmentation.config +++ b/conf/augmentation.config.example @@ -3,14 +3,19 @@ "type": "noise", "params": {"min_snr_dB": 40, "max_snr_dB": 50, - "noise_manifest": "datasets/manifest.noise"}, - "prob": 0.0 + "noise_manifest_path": "datasets/manifest.noise"}, + "prob": 0.6 + }, + { + "type": "impulse", + "params": {"impulse_manifest_path": "datasets/manifest.impulse"}, + "prob": 0.5 }, { "type": "speed", "params": {"min_speed_rate": 0.95, "max_speed_rate": 1.05}, - "prob": 0.0 + "prob": 0.5 }, { "type": "shift", @@ -24,4 +29,11 @@ "max_gain_dBFS": 10}, "prob": 0.0 }, + { + "type": "bayesian_normal", + "params": {"target_db": -20, + "prior_db": -20, + "prior_samples": 100}, + "prob": 0.0 + } ] diff --git a/data_utils/augmentor/augmentation.py b/data_utils/augmentor/augmentation.py index c9e360313c7434491d20d531a942a988c69961ee..5c30b627ef9a23ff41d1f64f270934f149a793a2 100644 --- a/data_utils/augmentor/augmentation.py +++ b/data_utils/augmentor/augmentation.py @@ -30,7 +30,7 @@ class AugmentationPipeline(object): "type": "noise", "params": {"min_snr_dB": 10, "max_snr_dB": 20, - "noise_manifest": "datasets/manifest.noise"}, + "noise_manifest_path": "datasets/manifest.noise"}, "prob": 0.0 }, { diff --git a/data_utils/augmentor/impulse_response.py b/data_utils/augmentor/impulse_response.py index d868c3a1ca8cfd9d682a28858c6622b2d50984b4..c3de0fdbb2a40150f8cffdef3487ceb4400e52ed 100644 --- a/data_utils/augmentor/impulse_response.py +++ b/data_utils/augmentor/impulse_response.py @@ -13,13 +13,14 @@ class ImpulseResponseAugmentor(AugmentorBase): :param rng: Random generator object. :type rng: random.Random - :param impulse_manifest: Manifest path for impulse audio data. - :type impulse_manifest: basestring + :param impulse_manifest_path: Manifest path for impulse audio data. + :type impulse_manifest_path: basestring """ - def __init__(self, rng, impulse_manifest): + def __init__(self, rng, impulse_manifest_path): self._rng = rng - self._manifest = utils.read_manifest(manifest_path=impulse_manifest) + self._impulse_manifest = utils.read_manifest( + manifest_path=impulse_manifest_path) def transform_audio(self, audio_segment): """Add impulse response effect. @@ -29,6 +30,6 @@ class ImpulseResponseAugmentor(AugmentorBase): :param audio_segment: Audio segment to add effects to. :type audio_segment: AudioSegmenet|SpeechSegment """ - noise_json = self._rng.sample(self._manifest, 1)[0] - noise_segment = AudioSegment.from_file(noise_json['audio_filepath']) - audio_segment.convolve(noise_segment, allow_resample=True) + impulse_json = self._rng.sample(self._impulse_manifest, 1)[0] + impulse_segment = AudioSegment.from_file(impulse_json['audio_filepath']) + audio_segment.convolve(impulse_segment, allow_resample=True) diff --git a/data_utils/augmentor/noise_perturb.py b/data_utils/augmentor/noise_perturb.py index b4fa18e18df4423d39b4765ac7d10b72554857eb..281174af42c2f6d673ead94bd532941769c79c25 100644 --- a/data_utils/augmentor/noise_perturb.py +++ b/data_utils/augmentor/noise_perturb.py @@ -17,15 +17,16 @@ class NoisePerturbAugmentor(AugmentorBase): :type min_snr_dB: float :param max_snr_dB: Maximal signal noise ratio, in decibels. :type max_snr_dB: float - :param noise_manifest: Manifest path for noise audio data. - :type noise_manifest: basestring + :param noise_manifest_path: Manifest path for noise audio data. + :type noise_manifest_path: basestring """ - def __init__(self, rng, min_snr_dB, max_snr_dB, noise_manifest): + def __init__(self, rng, min_snr_dB, max_snr_dB, noise_manifest_path): self._min_snr_dB = min_snr_dB self._max_snr_dB = max_snr_dB self._rng = rng - self._manifest = utils.read_manifest(manifest_path=noise_manifest) + self._noise_manifest = utils.read_manifest( + manifest_path=noise_manifest_path) def transform_audio(self, audio_segment): """Add background noise audio. @@ -35,7 +36,7 @@ class NoisePerturbAugmentor(AugmentorBase): :param audio_segment: Audio segment to add effects to. :type audio_segment: AudioSegmenet|SpeechSegment """ - noise_json = self._rng.sample(self._manifest, 1)[0] + noise_json = self._rng.sample(self._noise_manifest, 1)[0] if noise_json['duration'] < audio_segment.duration: raise RuntimeError("The duration of sampled noise audio is smaller " "than the audio segment to add effects to.") diff --git a/datasets/run_all.sh b/datasets/run_all.sh index 61747a50bfa481adf980da34843b46017eb23078..ef2b721fbdc2a18fcbc208730189604e88d7ef2c 100644 --- a/datasets/run_all.sh +++ b/datasets/run_all.sh @@ -6,17 +6,8 @@ if [ $? -ne 0 ]; then fi cd - -cd noise -python chime3_background.py -if [ $? -ne 0 ]; then - echo "Prepare CHiME3 background noise failed. Terminated." - exit 1 -fi -cd - - cat librispeech/manifest.train* | shuf > manifest.train cat librispeech/manifest.dev-clean > manifest.dev cat librispeech/manifest.test-clean > manifest.test -cat noise/manifest.* > manifest.noise echo "All done." diff --git a/datasets/run_noise.sh b/datasets/run_noise.sh new file mode 100644 index 0000000000000000000000000000000000000000..7b27abde47a97b671609f0cd15e81565b3a00d02 --- /dev/null +++ b/datasets/run_noise.sh @@ -0,0 +1,10 @@ +cd noise +python chime3_background.py +if [ $? -ne 0 ]; then + echo "Prepare CHiME3 background noise failed. Terminated." + exit 1 +fi +cd - + +cat noise/manifest.* > manifest.noise +echo "All done." diff --git a/train.py b/train.py index 34c406015a876ad7c3442644532f467506e9cd15..0d4e2508dddf5cc6834b4f61f0c2cc8deee405af 100644 --- a/train.py +++ b/train.py @@ -123,7 +123,7 @@ parser.add_argument( help="Directory for saving models. (default: %(default)s)") parser.add_argument( "--augmentation_config", - default=open('augmentation.config', 'r').read(), + default=open('conf/augmentation.config', 'r').read(), type=str, help="Augmentation configuration in json-format. " "(default: %(default)s)")