Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
4ad885f8
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
接近 2 年 前同步成功
通知
210
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
4ad885f8
编写于
6月 09, 2021
作者:
H
Hui Zhang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add feature notebook
上级
5fb06f42
变更
1
隐藏空白更改
内联
并排
Showing
1 changed file
with
224 addition
and
0 deletion
+224
-0
.notebook/audio_feature.ipynb
.notebook/audio_feature.ipynb
+224
-0
未找到文件。
.notebook/audio_feature.ipynb
0 → 100644
浏览文件 @
4ad885f8
{
"cells": [
{
"cell_type": "code",
"execution_count": 13,
"id": "matched-camera",
"metadata": {},
"outputs": [],
"source": [
"from nnAudio import Spectrogram\n",
"from scipy.io import wavfile\n",
"import torch\n",
"import soundfile as sf\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "middle-salem",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"16000\n",
"[43 75 69 ... 7 6 3]\n",
"(83792,)\n",
"int16\n",
"sampling rate = 16000. Please make sure the sampling rate is correct in order toget a valid freq range\n",
"STFT kernels created, time used = 0.2142 seconds\n",
"tensor([[[[-4.0940e+03, 1.2600e+04],\n",
" [ 8.5108e+03, -5.4930e+03],\n",
" [-3.3631e+03, -1.7904e+03],\n",
" ...,\n",
" [ 8.2279e+03, -9.3340e+03],\n",
" [-3.1990e+03, 2.0969e+03],\n",
" [-1.2669e+03, 4.4488e+03]],\n",
"\n",
" [[ 3.4886e+03, -9.9620e+03],\n",
" [-4.5364e+03, 4.1907e+02],\n",
" [ 2.5074e+03, 7.1339e+03],\n",
" ...,\n",
" [-5.4819e+03, 3.9258e+01],\n",
" [ 4.7221e+03, 6.5887e+01],\n",
" [ 9.6492e+02, -3.4386e+03]],\n",
"\n",
" [[-3.4947e+03, 9.2981e+03],\n",
" [-7.5164e+03, 8.1856e+02],\n",
" [-5.3766e+03, -9.0889e+03],\n",
" ...,\n",
" [ 1.4317e+03, 5.7447e+03],\n",
" [-3.1178e+03, 3.0740e+03],\n",
" [-3.4351e+03, 5.6900e+02]],\n",
"\n",
" ...,\n",
"\n",
" [[ 6.7112e+01, -4.5737e+00],\n",
" [-9.6295e+00, 3.5554e+01],\n",
" [ 1.8527e+00, -1.0491e+01],\n",
" ...,\n",
" [-1.1157e+01, 3.4423e+00],\n",
" [ 3.1193e+00, -4.4388e+00],\n",
" [-8.8242e+00, 8.0324e+00]],\n",
"\n",
" [[-6.5080e+01, 2.9543e+00],\n",
" [ 3.9992e+01, -1.3836e+01],\n",
" [-9.2803e+00, 1.0318e+01],\n",
" ...,\n",
" [ 4.2928e+00, 9.2397e+00],\n",
" [ 3.6642e+00, 9.4680e+00],\n",
" [ 4.8932e+00, -2.5199e+01]],\n",
"\n",
" [[ 4.7264e+01, -1.0721e+00],\n",
" [-6.0516e+00, -1.4589e+01],\n",
" [ 1.3127e+01, 1.4995e+00],\n",
" ...,\n",
" [ 1.7333e+01, -1.4380e+01],\n",
" [-3.6046e+00, -6.1019e+00],\n",
" [ 1.3321e+01, 2.3184e+01]]]])\n"
]
}
],
"source": [
"sr, song = wavfile.read('./BAC009S0764W0124.wav') # Loading your audio\n",
"print(sr)\n",
"print(song)\n",
"print(song.shape)\n",
"print(song.dtype)\n",
"x = song\n",
"x = torch.tensor(x).float() # casting the array into a PyTorch Tensor\n",
"\n",
"spec_layer = Spectrogram.STFT(n_fft=2048, freq_bins=None, hop_length=512,\n",
" window='hann', freq_scale='linear', center=True, pad_mode='reflect',\n",
" fmin=50,fmax=8000, sr=sr) # Initializing the model\n",
"\n",
"spec = spec_layer(x) # Feed-forward your waveform to get the spectrogram\n",
"print(spec)"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "finished-sterling",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"16000\n",
"[43 75 69 ... 7 6 3]\n",
"(83792,)\n",
"int16\n",
"True\n",
"sampling rate = 16000. Please make sure the sampling rate is correct in order toget a valid freq range\n",
"STFT kernels created, time used = 0.2495 seconds\n",
"torch.Size([1, 1025, 164, 2])\n",
"tensor([[[[-4.0940e+03, 1.2600e+04],\n",
" [ 8.5108e+03, -5.4930e+03],\n",
" [-3.3631e+03, -1.7904e+03],\n",
" ...,\n",
" [ 8.2279e+03, -9.3340e+03],\n",
" [-3.1990e+03, 2.0969e+03],\n",
" [-1.2669e+03, 4.4488e+03]],\n",
"\n",
" [[ 3.4886e+03, -9.9620e+03],\n",
" [-4.5364e+03, 4.1907e+02],\n",
" [ 2.5074e+03, 7.1339e+03],\n",
" ...,\n",
" [-5.4819e+03, 3.9258e+01],\n",
" [ 4.7221e+03, 6.5887e+01],\n",
" [ 9.6492e+02, -3.4386e+03]],\n",
"\n",
" [[-3.4947e+03, 9.2981e+03],\n",
" [-7.5164e+03, 8.1856e+02],\n",
" [-5.3766e+03, -9.0889e+03],\n",
" ...,\n",
" [ 1.4317e+03, 5.7447e+03],\n",
" [-3.1178e+03, 3.0740e+03],\n",
" [-3.4351e+03, 5.6900e+02]],\n",
"\n",
" ...,\n",
"\n",
" [[ 6.7112e+01, -4.5737e+00],\n",
" [-9.6295e+00, 3.5554e+01],\n",
" [ 1.8527e+00, -1.0491e+01],\n",
" ...,\n",
" [-1.1157e+01, 3.4423e+00],\n",
" [ 3.1193e+00, -4.4388e+00],\n",
" [-8.8242e+00, 8.0324e+00]],\n",
"\n",
" [[-6.5080e+01, 2.9543e+00],\n",
" [ 3.9992e+01, -1.3836e+01],\n",
" [-9.2803e+00, 1.0318e+01],\n",
" ...,\n",
" [ 4.2928e+00, 9.2397e+00],\n",
" [ 3.6642e+00, 9.4680e+00],\n",
" [ 4.8932e+00, -2.5199e+01]],\n",
"\n",
" [[ 4.7264e+01, -1.0721e+00],\n",
" [-6.0516e+00, -1.4589e+01],\n",
" [ 1.3127e+01, 1.4995e+00],\n",
" ...,\n",
" [ 1.7333e+01, -1.4380e+01],\n",
" [-3.6046e+00, -6.1019e+00],\n",
" [ 1.3321e+01, 2.3184e+01]]]])\n",
"True\n"
]
}
],
"source": [
"wav, sr = sf.read('./BAC009S0764W0124.wav', dtype='int16')\n",
"print(sr)\n",
"print(wav)\n",
"print(wav.shape)\n",
"print(wav.dtype)\n",
"print(np.allclose(wav, song))\n",
"\n",
"x = wav\n",
"x = torch.tensor(x).float() # casting the array into a PyTorch Tensor\n",
"\n",
"spec_layer = Spectrogram.STFT(n_fft=2048, freq_bins=None, hop_length=512,\n",
" window='hann', freq_scale='linear', center=True, pad_mode='reflect',\n",
" fmin=50,fmax=8000, sr=sr) # Initializing the model\n",
"\n",
"wav_spec = spec_layer(x) # Feed-forward your waveform to get the spectrogram\n",
"print(wav_spec.shape)\n",
"print(wav_spec)\n",
"print(np.allclose(wav_spec, spec))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "running-technology",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.0"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录