diff --git a/deepspeech/frontend/audio.py b/deepspeech/frontend/audio.py index 4488f5f2e1f961ddb47b6d4669b1924c81b570df..ffdcd4b3a5f3b3e7bd8c3725fa8199c3e89e40c4 100644 --- a/deepspeech/frontend/audio.py +++ b/deepspeech/frontend/audio.py @@ -351,7 +351,9 @@ class AudioSegment(object): tfm.set_globals(multithread=False) tfm.speed(speed_rate) self._samples = tfm.build_array( - input_array=self._samples, sample_rate_in=self._sample_rate).copy() + input_array=self._samples, + sample_rate_in=self._sample_rate).squeeze(-1).astype( + np.float32).copy() def normalize(self, target_db=-20, max_gain_db=300.0): """Normalize audio to be of the desired RMS value in decibels. diff --git a/examples/librispeech/s0/conf/augmentation.json b/examples/librispeech/s0/conf/augmentation.json index a1a759e67f3a118b6754a60aead069742ede6ecc..5635d9c84c48938f746dcdffd1a16eff9ac8b98c 100644 --- a/examples/librispeech/s0/conf/augmentation.json +++ b/examples/librispeech/s0/conf/augmentation.json @@ -1,4 +1,13 @@ [ + { + "type": "speed", + "params": { + "min_speed_rate": 0.9, + "max_speed_rate": 1.1, + "num_rates": 3 + }, + "prob": 0.0 + }, { "type": "shift", "params": { diff --git a/examples/tiny/s1/conf/augmentation.json b/examples/tiny/s1/conf/augmentation.json index 1987ad4245dcf5542f1e22a545c36899659acef9..f26c282e7ddb3329923d4d10b0d392a254a216fe 100644 --- a/examples/tiny/s1/conf/augmentation.json +++ b/examples/tiny/s1/conf/augmentation.json @@ -6,7 +6,7 @@ "max_speed_rate": 1.1, "num_rates": 3 }, - "prob": 0.0 + "prob": 1.0 }, { "type": "shift",