replace fbank, test=asr

fcdaef6c · huangyuxin · f256bb9c · fcdaef6c
隐藏空白更改
内联并排

Showing with 12 addition and 12 deletion

paddlespeech/s2t/frontend/featurizer/audio_featurizer.py paddlespeech/s2t/frontend/featurizer/audio_featurizer.py +12 -12

未找到文件。
--- a/paddlespeech/s2t/frontend/featurizer/audio_featurizer.py
+++ b/paddlespeech/s2t/frontend/featurizer/audio_featurizer.py
@@ -13,6 +13,8 @@
 # limitations under the License.
 """Contains the audio featurizer class."""
 import numpy as np
+import paddle
+import paddleaudio.compliance.kaldi as kaldi
 from python_speech_features import delta
 from python_speech_features import logfbank
 from python_speech_features import mfcc
@@ -345,19 +347,17 @@ class AudioFeaturizer():
            raise ValueError("Stride size must not be greater than "
                             "window size.")
        # (T, D)
-        fbank_feat = logfbank(
+        waveform = paddle.to_tensor(
-            signal=samples,
+            np.expand_dims(samples, 0), dtype=paddle.float32)
-            samplerate=sample_rate,
+        mat = kaldi.fbank(
-            winlen=0.001 * window_ms,
+            waveform,
-            winstep=0.001 * stride_ms,
+            n_mels=feat_dim,
-            nfilt=feat_dim,
+            frame_length=window_ms,  # default : 25
-            nfft=512,
+            frame_shift=stride_ms,  # default : 10
-            lowfreq=20,
-            highfreq=max_freq,
            dither=dither,
-            remove_dc_offset=True,
+            energy_floor=0.0,
-            preemph=0.97,
+            sr=sample_rate)
-            wintype='povey')
+        fbank_feat = np.squeeze(mat.numpy())
        if delta_delta:
            fbank_feat = self._concat_delta_delta(fbank_feat)
        return fbank_feat