未验证 提交 5201c59f 编写于 作者: H Hui Zhang 提交者: GitHub

Merge pull request #1528 from KPatr1ck/audio

[audio][feature]Refactor and add doc string.
...@@ -11,5 +11,12 @@ ...@@ -11,5 +11,12 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from . import compliance
from . import datasets
from . import features
from . import functional
from . import io
from . import metric
from . import sox_effects
from .backends import load from .backends import load
from .backends import save from .backends import save
...@@ -71,15 +71,17 @@ class Spectrogram(nn.Layer): ...@@ -71,15 +71,17 @@ class Spectrogram(nn.Layer):
if win_length is None: if win_length is None:
win_length = n_fft win_length = n_fft
fft_window = get_window(window, win_length, fftbins=True, dtype=dtype) self.fft_window = get_window(
window, win_length, fftbins=True, dtype=dtype)
self._stft = partial( self._stft = partial(
paddle.signal.stft, paddle.signal.stft,
n_fft=n_fft, n_fft=n_fft,
hop_length=hop_length, hop_length=hop_length,
win_length=win_length, win_length=win_length,
window=fft_window, window=self.fft_window,
center=center, center=center,
pad_mode=pad_mode) pad_mode=pad_mode)
self.register_buffer('fft_window', self.fft_window)
def forward(self, x): def forward(self, x):
stft = self._stft(x) stft = self._stft(x)
...@@ -259,12 +261,18 @@ class MFCC(nn.Layer): ...@@ -259,12 +261,18 @@ class MFCC(nn.Layer):
sr: int=22050, sr: int=22050,
n_mfcc: int=40, n_mfcc: int=40,
norm: str='ortho', norm: str='ortho',
dtype: str=paddle.float32,
**kwargs): **kwargs):
"""[summary] """Compute mel frequency cepstral coefficients(MFCCs) feature of given waveforms.
Parameters: Parameters:
sr (int, optional): [description]. Defaults to 22050. sr(int): the audio sample rate.
n_mfcc (int, optional): [description]. Defaults to 40. The default value is 22050.
norm (str, optional): [description]. Defaults to 'ortho'. n_mfcc (int, optional): Number of cepstra in MFCC. Defaults to 40.
norm(str|float): the normalization type in computing fbank matrix. Slaney-style is used by default.
You can specify norm=1.0/2.0 to use customized p-norm normalization.
dtype(str): the datatype of fbank matrix used in the transform. Use float64 to increase numerical
accuracy. Note that the final transform will be conducted in float32 regardless of dtype of fbank matrix.
""" """
super(MFCC, self).__init__() super(MFCC, self).__init__()
self._log_melspectrogram = LogMelSpectrogram(sr=sr, **kwargs) self._log_melspectrogram = LogMelSpectrogram(sr=sr, **kwargs)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册