diff --git a/deepspeech/frontend/augmentor/spec_augment.py b/deepspeech/frontend/augmentor/spec_augment.py index a3f4e268ab2e03abf37adc05214ef1ae02970f27..7c23b628e11b5b47d59452aafc677e6aee2097ba 100644 --- a/deepspeech/frontend/augmentor/spec_augment.py +++ b/deepspeech/frontend/augmentor/spec_augment.py @@ -245,6 +245,8 @@ class SpecAugmentor(AugmentorBase): Returns: x (np.ndarray): `[T, F]` """ + assert isinstance(x, np.ndarray) + assert x.ndim == 2 x = self.time_warp(x, self.mode) x = self.mask_freq(x, self.replace_with_zero) x = self.mask_time(x, self.replace_with_zero) diff --git a/examples/aishell/s0/conf/augmentation.json b/examples/aishell/s0/conf/augmentation.json index 81d110b0b670636270fcdea56ada1e7e9510455d..39afe4e6a584761f9a07f68ade91cd77207aaea7 100644 --- a/examples/aishell/s0/conf/augmentation.json +++ b/examples/aishell/s0/conf/augmentation.json @@ -19,17 +19,18 @@ { "type": "specaug", "params": { - "F": 10, - "T": 50, + "W": 5, + "warp_mode": "PIL", + "F": 30, "n_freq_masks": 2, + "T": 40, "n_time_masks": 2, "p": 1.0, - "W": 80, "adaptive_number_ratio": 0, "adaptive_size_ratio": 0, "max_n_time_masks": 20, - "replace_with_zero": true + "replace_with_zero": false }, "prob": 1.0 } -] +] \ No newline at end of file diff --git a/examples/aishell/s1/conf/augmentation.json b/examples/aishell/s1/conf/augmentation.json index 81d110b0b670636270fcdea56ada1e7e9510455d..d0409b142906bca406ba97ff63913ada5562984e 100644 --- a/examples/aishell/s1/conf/augmentation.json +++ b/examples/aishell/s1/conf/augmentation.json @@ -28,7 +28,8 @@ "adaptive_number_ratio": 0, "adaptive_size_ratio": 0, "max_n_time_masks": 20, - "replace_with_zero": true + "replace_with_zero": true, + "warp_mode": "PIL" }, "prob": 1.0 } diff --git a/examples/augmentation/augmentation.json b/examples/augmentation/augmentation.json index baf2cac3498f7dbef03b10e80614a8e059c46ac4..c99299d6cc849f132a3db80b017c444bb0b3cc85 100644 --- a/examples/augmentation/augmentation.json +++ b/examples/augmentation/augmentation.json @@ -52,17 +52,18 @@ { "type": "specaug", "params": { + "W": 80, + "warp_mode": "PIL", "F": 10, - "T": 50, "n_freq_masks": 2, + "T": 50, "n_time_masks": 2, "p": 1.0, - "W": 80, "adaptive_number_ratio": 0, "adaptive_size_ratio": 0, "max_n_time_masks": 20, - "replace_with_zero": true + "replace_with_zero": false }, - "prob": 0.0 + "prob": 1.0 } ] diff --git a/examples/librispeech/s0/conf/augmentation.json b/examples/librispeech/s0/conf/augmentation.json index 81d110b0b670636270fcdea56ada1e7e9510455d..d0409b142906bca406ba97ff63913ada5562984e 100644 --- a/examples/librispeech/s0/conf/augmentation.json +++ b/examples/librispeech/s0/conf/augmentation.json @@ -28,7 +28,8 @@ "adaptive_number_ratio": 0, "adaptive_size_ratio": 0, "max_n_time_masks": 20, - "replace_with_zero": true + "replace_with_zero": true, + "warp_mode": "PIL" }, "prob": 1.0 } diff --git a/examples/librispeech/s1/conf/augmentation.json b/examples/librispeech/s1/conf/augmentation.json index 7dd158eb53406e8fad0439a2db0e654060f67523..8e6e97040190455c9bf0ed5d858e0651715413be 100644 --- a/examples/librispeech/s1/conf/augmentation.json +++ b/examples/librispeech/s1/conf/augmentation.json @@ -28,7 +28,8 @@ "adaptive_number_ratio": 0, "adaptive_size_ratio": 0, "max_n_time_masks": 20, - "replace_with_zero": true + "replace_with_zero": true, + "warp_mode": "PIL" }, "prob": 1.0 } diff --git a/examples/librispeech/s2/conf/augmentation.json b/examples/librispeech/s2/conf/augmentation.json index cc8c7e00f667f5e9d850e46052f76faf1a019a53..e20fc19970ddf77c20aac43db84f340280a40364 100644 --- a/examples/librispeech/s2/conf/augmentation.json +++ b/examples/librispeech/s2/conf/augmentation.json @@ -11,7 +11,8 @@ "adaptive_number_ratio": 0, "adaptive_size_ratio": 0, "max_n_time_masks": 20, - "replace_with_zero": true + "replace_with_zero": true, + "warp_mode": "PIL" }, "prob": 1.0 } diff --git a/examples/timit/s1/conf/augmentation.json b/examples/timit/s1/conf/augmentation.json index 7dd158eb53406e8fad0439a2db0e654060f67523..8e6e97040190455c9bf0ed5d858e0651715413be 100644 --- a/examples/timit/s1/conf/augmentation.json +++ b/examples/timit/s1/conf/augmentation.json @@ -28,7 +28,8 @@ "adaptive_number_ratio": 0, "adaptive_size_ratio": 0, "max_n_time_masks": 20, - "replace_with_zero": true + "replace_with_zero": true, + "warp_mode": "PIL" }, "prob": 1.0 } diff --git a/examples/tiny/s0/conf/augmentation.json b/examples/tiny/s0/conf/augmentation.json index 8f9ff7fd48a2b836b5d1e41e0577984acf2ec18d..8370551667c9afbad96791bb4d357c524961313b 100644 --- a/examples/tiny/s0/conf/augmentation.json +++ b/examples/tiny/s0/conf/augmentation.json @@ -6,7 +6,7 @@ "max_speed_rate": 1.1, "num_rates": 3 }, - "prob": 1.0 + "prob": 0.0 }, { "type": "shift", @@ -19,16 +19,18 @@ { "type": "specaug", "params": { - "F": 10, - "T": 50, + "W": 5, + "warp_mode": "PIL", + "F": 30, "n_freq_masks": 2, + "T": 40, "n_time_masks": 2, "p": 1.0, - "W": 80, "adaptive_number_ratio": 0, "adaptive_size_ratio": 0, "max_n_time_masks": 20, - "replace_with_zero": true + "replace_with_zero": true, + "warp_mode": "PIL" }, "prob": 1.0 } diff --git a/examples/tiny/s1/conf/augmentation.json b/examples/tiny/s1/conf/augmentation.json index 8f9ff7fd48a2b836b5d1e41e0577984acf2ec18d..6010c2e47ec9adf3131ee834bb7d6a17741ad9c5 100644 --- a/examples/tiny/s1/conf/augmentation.json +++ b/examples/tiny/s1/conf/augmentation.json @@ -28,7 +28,8 @@ "adaptive_number_ratio": 0, "adaptive_size_ratio": 0, "max_n_time_masks": 20, - "replace_with_zero": true + "replace_with_zero": true, + "warp_mode": "PIL" }, "prob": 1.0 }