提交 63c5dd05 编写于 作者: xuchaoxin1375's avatar xuchaoxin1375

update the make_window function in the ccser_client main file with more clear modular extraction

上级 74e45e75
此差异已折叠。
## ##
import os import os
from pathlib import Path
import librosa import librosa
import numpy as np import numpy as np
import soundfile import soundfile
from joblib import load from joblib import load
from sklearn.preprocessing import StandardScaler
from audio.converter import convert_audio from audio.converter import convert_audio
from config.EF import MCM, ava_features from config.EF import MCM, ava_features, f_config_def
from config.MetaPath import bclf, brgr,project_dir from config.MetaPath import bclf, brgr, project_dir,speech_dbs_dir
def get_used_keys(config_dict): def get_used_keys(config_dict):
"""将传入的字典中值为True的key添加到列表中并返回 """将传入的字典中值为True的key添加到列表中并返回
...@@ -60,7 +63,7 @@ def get_dropout_str(dropout, n_layers=3): ...@@ -60,7 +63,7 @@ def get_dropout_str(dropout, n_layers=3):
return "_".join([str(dropout) for _ in range(n_layers)]) return "_".join([str(dropout) for _ in range(n_layers)])
def extract_feature(audio_file_name, f_config): def extract_feature_of_audio(audio_file_name, f_config):
""" """
用于从音频文件中提取音频特征。该函数支持提取多种不同的特征, 用于从音频文件中提取音频特征。该函数支持提取多种不同的特征,
包括MFCC、Chroma、MEL Spectrogram Frequency、Contrast和Tonnetz。 包括MFCC、Chroma、MEL Spectrogram Frequency、Contrast和Tonnetz。
...@@ -70,6 +73,16 @@ def extract_feature(audio_file_name, f_config): ...@@ -70,6 +73,16 @@ def extract_feature(audio_file_name, f_config):
在函数的实现中,它首先检查音频文件的格式是否正确,如果不正确,则将其转换为16000采样率和单声道通道。 在函数的实现中,它首先检查音频文件的格式是否正确,如果不正确,则将其转换为16000采样率和单声道通道。
然后,它使用Librosa库提取所选的特征,并将它们连接成一个numpy数组,并返回该数组。 然后,它使用Librosa库提取所选的特征,并将它们连接成一个numpy数组,并返回该数组。
这段代码使用了Python中的with语句和soundfile库中的SoundFile类。
它的作用是打开名为file_name的音频文件,并将其作为sound_file对象传递给代码块,
以便在代码块中对该文件进行操作。
with语句的好处是,在代码块结束时,它会自动关闭文件句柄,无需手动关闭。
使用soundfile.SoundFile()函数创建的sound_file对象是一个上下文管理器,它提供了一些方法和属性,
可以用于读取和操作音频文件。在该函数中,我们使用sound_file对象读取音频文件,获取其采样率和数据类型等信息。
在代码块的最后,with语句自动关闭了sound_file对象,释放了与该文件的所有资源。
需要注意的是,在使用soundfile库打开音频文件时,我们可以使用with语句来确保文件句柄在使用完毕后被正确关闭。
这可以避免在操作大量音频文件时出现资源泄漏和文件句柄耗尽等问题。
params: params:
- -
Extract feature from audio file `file_name` Extract feature from audio file `file_name`
...@@ -98,20 +111,13 @@ def extract_feature(audio_file_name, f_config): ...@@ -98,20 +111,13 @@ def extract_feature(audio_file_name, f_config):
try: try:
print(audio_file_name,"@{audio_file_name}") print(audio_file_name,"@{audio_file_name}")
#考虑将此时的工作路径切换为项目根目录,以便利用相对路径访问文件 #考虑将此时的工作路径切换为项目根目录,以便利用相对路径访问文件
os.chdir(project_dir) # os.chdir(project_dir)
# sys.exist() p = Path(audio_file_name)
if p.is_file()==False:
raise FileNotFoundError(f"{p.absolute().resolve()} does not exist")
with soundfile.SoundFile(audio_file_name) as sound_file: with soundfile.SoundFile(audio_file_name) as sound_file:
# 成功打开 # 成功打开
pass pass
# 这行代码使用了Python中的with语句和soundfile库中的SoundFile类。
# 它的作用是打开名为file_name的音频文件,并将其作为sound_file对象传递给代码块,
# 以便在代码块中对该文件进行操作。
# with语句的好处是,在代码块结束时,它会自动关闭文件句柄,无需手动关闭。
# 使用soundfile.SoundFile()函数创建的sound_file对象是一个上下文管理器,它提供了一些方法和属性,
# 可以用于读取和操作音频文件。在该函数中,我们使用sound_file对象读取音频文件,获取其采样率和数据类型等信息。
# 在代码块的最后,with语句自动关闭了sound_file对象,释放了与该文件的所有资源。
# 需要注意的是,在使用soundfile库打开音频文件时,我们可以使用with语句来确保文件句柄在使用完毕后被正确关闭。
# 这可以避免在操作大量音频文件时出现资源泄漏和文件句柄耗尽等问题。
except RuntimeError: except RuntimeError:
# not properly formated, convert to 16000 sample rate & mono channel using ffmpeg # not properly formated, convert to 16000 sample rate & mono channel using ffmpeg
# get the basename # get the basename
...@@ -186,21 +192,8 @@ def extract_features_handler(new_filename, f_config): ...@@ -186,21 +192,8 @@ def extract_features_handler(new_filename, f_config):
提取结果(shape=(n,)) 提取结果(shape=(n,))
""" """
with soundfile.SoundFile(new_filename) as sound_file: with soundfile.SoundFile(new_filename) as sound_file:
X = sound_file.read(dtype="float32") X, sample_rate, extractors1, extractors2, stft = pre_calculate(f_config, sound_file)
sample_rate = sound_file.samplerate
# print(f'{sample_rate=}')
# 根据参数情况,提取需要的情感特征
# 对于chroma和constrast两种特征,计算stft的幅值矩阵(复数取模,实数化)
stft = []
from config.EF import chroma, contrast, mel, mfcc, tonnetz
global extractors_debug
extractors1 = {mfcc: mfcc_extract, mel: mel_extract, tonnetz: tonnetz_extract}
extractors2 = {chroma: chroma_extract, contrast: contrast_extract}
extractors_debug=extractors1,extractors2
if chroma in f_config or contrast in f_config:
stft = stft_prepare(X)
# 建立一个空数组来存储需要提取的特征 # 建立一个空数组来存储需要提取的特征
result = np.array([]) result = np.array([])
f_res=None f_res=None
...@@ -213,8 +206,25 @@ def extract_features_handler(new_filename, f_config): ...@@ -213,8 +206,25 @@ def extract_features_handler(new_filename, f_config):
f_res=extractors2[f](sample_rate, stft) f_res=extractors2[f](sample_rate, stft)
# print(f_res.shape,f,"@{f_res.shape}")#type:ignore # print(f_res.shape,f,"@{f_res.shape}")#type:ignore
result = np.hstack((result, f_res)) result = np.hstack((result, f_res))
# print(result.shape)
return result return result
def pre_calculate(f_config, sound_file):
X = sound_file.read(dtype="float32")
sample_rate = sound_file.samplerate
# print(f'{sample_rate=}')
# 根据参数情况,提取需要的情感特征
# 对于chroma和constrast两种特征,计算stft的幅值矩阵(复数取模,实数化)
from config.EF import chroma, contrast, mel, mfcc, tonnetz
extractors1 = {mfcc: mfcc_extract, mel: mel_extract, tonnetz: tonnetz_extract}
extractors2 = {chroma: chroma_extract, contrast: contrast_extract}
stft = []
if chroma in f_config or contrast in f_config:
stft = stft_prepare(X)
return X,sample_rate,extractors1,extractors2,stft
def stft_prepare(X): def stft_prepare(X):
# mfcc=True if mfcc in f_config else False # mfcc=True if mfcc in f_config else False
...@@ -332,13 +342,23 @@ def best_estimators(classification_task=True,fast=True): ...@@ -332,13 +342,23 @@ def best_estimators(classification_task=True,fast=True):
return res return res
def test1(): def test1():
from config.EF import f_config_def
audio_path= "../data/emodb/wav/03a01Fa.wav"
features = extract_feature(audio_path, f_config_def) audio_path= speech_dbs_dir/"emodb/wav/03a01Fa.wav"
print(os.path.exists(audio_path))
features = extract_feature_of_audio(audio_path, f_config_def)
return features return features
if __name__ == "__main__": if __name__ == "__main__":
audio_config = MCM pass
# res = get_audio_config(audio_config) # res = get_audio_config(audio_config)
# print(res) # print(res)
res=best_estimators() # res=best_estimators()
\ No newline at end of file
audio_path= speech_dbs_dir/"emodb/wav/03a01Fa.wav"
print(os.path.exists(audio_path))
features = extract_feature_of_audio(audio_path, f_config_def)
...@@ -16,7 +16,7 @@ from config.MetaPath import ( ...@@ -16,7 +16,7 @@ from config.MetaPath import (
validate_partition, validate_partition,
ava_dbs, ava_dbs,
) )
from audio.core import extract_feature from audio.core import extract_feature_of_audio
# from pathlib import Path # from pathlib import Path
Series = pd.Series Series = pd.Series
...@@ -294,11 +294,12 @@ class AudioExtractor: ...@@ -294,11 +294,12 @@ class AudioExtractor:
print([id(attr) for attr in attributes]) print([id(attr) for attr in attributes])
return attributes return attributes
def _extract_feature_in_meta(self, partition="", meta_path=""): def _extract_feature_in_meta(self, partition="", meta_path="",verbose=1):
"""根据meta_files提取相应语音文件的特征 """根据meta_files提取相应语音文件的特征
这里仅完成单次提取 这里仅完成单次提取
矩阵文件名中的e_config字段暂定为self.e_config,如果是这样,可能会和meta_path文件中的情感字段出现不一致的情况. 矩阵文件名中的e_config字段暂定为self.e_config
如果是这样,可能会和meta_path文件中的情感字段出现不一致的情况.
Parameters Parameters
---------- ----------
...@@ -308,31 +309,15 @@ class AudioExtractor: ...@@ -308,31 +309,15 @@ class AudioExtractor:
标记被提取文件是来自训练集还是测试集(验证集) 标记被提取文件是来自训练集还是测试集(验证集)
""" """
# 检查数据集是否按照配置的情感进行筛选和划分: # 检查数据集是否按照配置的情感进行筛选和划分:
# if(not os.path.exists(meta_path)):
# create_csv_by_metaname(meta_file=meta_path)
# self.load_metadata(meta_path)
audio_paths, emotions = self.load_metadata(meta_path) audio_paths, emotions = self.load_metadata(meta_path)
# 将计算结果保存为对象属性 # 将计算结果保存为对象属性
self.audio_paths = audio_paths self.audio_paths = audio_paths
self.emotions = emotions self.emotions = emotions
# 尝试计算语料库的名字(字段)
meta_name = os.path.basename(meta_path)
meta_name,ext=os.path.splitext(meta_name)
meta_fields = meta_name.split("_")
db = meta_fields[1]
# print(f"{meta_path=}@")
# print(f"{db=}@")
db = db if db in ava_dbs else "" # 尝试计算语料库的名字和情感配置名字
#计算情感字段 db = self.fields_parse(meta_path)
emotions_first_letters=meta_fields[-1]
origin_efls = get_first_letters(self.e_config)
if emotions_first_letters != origin_efls:
raise ValueError(
f"{emotions_first_letters} is not inconsistant with {self.e_config}"
)
if not os.path.isdir(self.features_dir): if not os.path.isdir(self.features_dir):
os.mkdir(self.features_dir) os.mkdir(self.features_dir)
...@@ -352,12 +337,9 @@ class AudioExtractor: ...@@ -352,12 +337,9 @@ class AudioExtractor:
self.features_dir, self.features_dir,
features_file_name, features_file_name,
) )
if verbose:
print(f"检查特征文件{features_file_path}是否存在...") print(f"检查特征文件{features_file_path}是否存在...")
print(f"{self.e_config=}") print(f"{self.e_config=}")
# if self.e_config == HNS:
# raise ValueError(f"{self.e_config=}")
ffp = os.path.isfile(features_file_path) ffp = os.path.isfile(features_file_path)
if ffp: if ffp:
...@@ -370,11 +352,41 @@ class AudioExtractor: ...@@ -370,11 +352,41 @@ class AudioExtractor:
if self.verbose: if self.verbose:
print("npy文件不存在,尝试创建...") print("npy文件不存在,尝试创建...")
# 如果尚未提取过特征,则在此处进行提取,同时保存提取结果,以便下次直接使用 # 如果尚未提取过特征,则在此处进行提取,同时保存提取结果,以便下次直接使用
features = self.features_save(partition, audio_paths, features_file_path) features = self.features_extract_save(partition, audio_paths, features_file_path)
return features, audio_paths, emotions return features, audio_paths, emotions
def features_save(self, partition, audio_paths, features_file_path): def fields_parse(self, meta_path):
#计算语料库字段名
meta_fields, db = self.db_field_parse(meta_path)
#计算情感字段并检查
self.validate_emotion_config_consistence(meta_fields)
return db
def db_field_parse(self, meta_path):
meta_name = os.path.basename(meta_path)
meta_name,ext=os.path.splitext(meta_name)
meta_fields = meta_name.split("_")
db = meta_fields[1]
# print(f"{meta_path=}@")
# print(f"{db=}@")
db = db if db in ava_dbs else ""
return meta_fields,db
def validate_emotion_config_consistence(self, meta_fields):
emotions_first_letters=meta_fields[-1]
origin_efls = get_first_letters(self.e_config)
#检查情感配置是否具有一致性
if emotions_first_letters != origin_efls:
raise ValueError(
f"{emotions_first_letters} is not inconsistant with {self.e_config}"
)
def features_extract_save(self, partition, audio_paths, features_file_path):
"""将提取的特征(ndarray)保存持久化保存(为npy文件) """将提取的特征(ndarray)保存持久化保存(为npy文件)
利用qtmd提供可视化特征抽取进度 利用qtmd提供可视化特征抽取进度
...@@ -406,7 +418,7 @@ class AudioExtractor: ...@@ -406,7 +418,7 @@ class AudioExtractor:
print(f"正在抽取第{cnt}个文件的特征..") print(f"正在抽取第{cnt}个文件的特征..")
# 调用utils模块中的extract_featrue进行特征提取 # 调用utils模块中的extract_featrue进行特征提取
f_config = self.f_config f_config = self.f_config
feature = extract_feature(audio_file, f_config=f_config) feature = extract_feature_of_audio(audio_file, f_config=f_config)
if self.feature_dimension is None: if self.feature_dimension is None:
# MCM特征组合下(3特征),有180维的单轴数组,5特征下,有193维 # MCM特征组合下(3特征),有180维的单轴数组,5特征下,有193维
self.feature_dimension = feature.shape[0] self.feature_dimension = feature.shape[0]
......
...@@ -20,7 +20,7 @@ from audio.extractor import load_data_from_meta ...@@ -20,7 +20,7 @@ from audio.extractor import load_data_from_meta
from config.EF import (e_config_def, f_config_def, validate_emotions) from config.EF import (e_config_def, f_config_def, validate_emotions)
from config.MetaPath import (emodb, meta_paths_of_db, ravdess, savee,validate_partition,project_dir) from config.MetaPath import (emodb, meta_paths_of_db, ravdess, savee,validate_partition,project_dir)
import config.MetaPath as meta import config.MetaPath as meta
from audio.core import best_estimators, extract_feature from audio.core import best_estimators, extract_feature_of_audio
## ##
class EmotionRecognizer: class EmotionRecognizer:
...@@ -219,7 +219,7 @@ class EmotionRecognizer: ...@@ -219,7 +219,7 @@ class EmotionRecognizer:
given an `audio_path`, this method extracts the features given an `audio_path`, this method extracts the features
and predicts the emotion and predicts the emotion
""" """
feature1 = extract_feature(audio_path, self.f_config) feature1 = extract_feature_of_audio(audio_path, self.f_config)
# print(feature1.shape) # print(feature1.shape)
# print(feature1,"@{feature1}",feature1.shape) # print(feature1,"@{feature1}",feature1.shape)
# feature2=feature1.T # feature2=feature1.T
...@@ -248,7 +248,7 @@ class EmotionRecognizer: ...@@ -248,7 +248,7 @@ class EmotionRecognizer:
Predicts the probability of each emotion. Predicts the probability of each emotion.
""" """
if self.classification_task: if self.classification_task:
feature = extract_feature(audio_path, self.f_config).reshape(1, -1) feature = extract_feature_of_audio(audio_path, self.f_config).reshape(1, -1)
proba = self.model.predict_proba(feature)[0] proba = self.model.predict_proba(feature)[0]
result = {} result = {}
for emotion, prob in zip(self.model.classes_, proba): for emotion, prob in zip(self.model.classes_, proba):
......
...@@ -22,7 +22,7 @@ from config.EF import validate_emotions ...@@ -22,7 +22,7 @@ from config.EF import validate_emotions
from recognizer.basic import EmotionRecognizer from recognizer.basic import EmotionRecognizer
# from ER import EmotionRecognizer # from ER import EmotionRecognizer
from config.MetaPath import get_first_letters from config.MetaPath import get_first_letters
from audio.core import extract_feature, get_dropout_str from audio.core import extract_feature_of_audio, get_dropout_str
class DeepEmotionRecognizer(EmotionRecognizer): class DeepEmotionRecognizer(EmotionRecognizer):
...@@ -337,7 +337,7 @@ class DeepEmotionRecognizer(EmotionRecognizer): ...@@ -337,7 +337,7 @@ class DeepEmotionRecognizer(EmotionRecognizer):
print("[+] Model trained") print("[+] Model trained")
def predict(self, audio_path): def predict(self, audio_path):
feature = extract_feature(audio_path, **self._f_config_dict).reshape( feature = extract_feature_of_audio(audio_path, **self._f_config_dict).reshape(
(1, 1, self.input_length) (1, 1, self.input_length)
) )
if self.classification_task: if self.classification_task:
...@@ -349,7 +349,7 @@ class DeepEmotionRecognizer(EmotionRecognizer): ...@@ -349,7 +349,7 @@ class DeepEmotionRecognizer(EmotionRecognizer):
def predict_proba(self, audio_path): def predict_proba(self, audio_path):
if self.classification_task: if self.classification_task:
feature = extract_feature(audio_path, **self._f_config_dict).reshape( feature = extract_feature_of_audio(audio_path, **self._f_config_dict).reshape(
(1, 1, self.input_length) (1, 1, self.input_length)
) )
proba = self.model.predict(feature)[0][0] proba = self.model.predict(feature)[0][0]
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册