update the make_window function in the ccser_client main file with more clear modular extraction

63c5dd05 · xuchaoxin1375 · 74e45e75 · 63c5dd05 · 63c5dd05 · 63c5dd05
9 changed file
--- a/SG/ccser_gui.py
+++ b/SG/ccser_gui.py
--- a/SG/constants/__pycache__/beauty.cpython-310.pyc
+++ b/SG/constants/__pycache__/beauty.cpython-310.pyc
--- a/audio/__pycache__/core.cpython-39.pyc
+++ b/audio/__pycache__/core.cpython-39.pyc
--- a/audio/__pycache__/extractor.cpython-39.pyc
+++ b/audio/__pycache__/extractor.cpython-39.pyc
--- a/audio/core.py
+++ b/audio/core.py
 ##

 import os
+from pathlib import Path

 import librosa
 import numpy as np
 import soundfile
 from joblib import load
+from sklearn.preprocessing import StandardScaler

 from audio.converter import convert_audio
-from config.EF import MCM, ava_features
-from config.MetaPath import bclf, brgr,project_dir
+from config.EF import MCM, ava_features, f_config_def
+from config.MetaPath import bclf, brgr, project_dir,speech_dbs_dir
+

 def get_used_keys(config_dict):
    """将传入的字典中值为True的key添加到列表中并返回
@@ -60,7 +63,7 @@ def get_dropout_str(dropout, n_layers=3):
        return "_".join([str(dropout) for _ in range(n_layers)])


-def extract_feature(audio_file_name, f_config):
+def extract_feature_of_audio(audio_file_name, f_config):
    """
    用于从音频文件中提取音频特征。该函数支持提取多种不同的特征，
    包括MFCC、Chroma、MEL Spectrogram Frequency、Contrast和Tonnetz。
@@ -70,6 +73,16 @@ def extract_feature(audio_file_name, f_config):
    在函数的实现中，它首先检查音频文件的格式是否正确，如果不正确，则将其转换为16000采样率和单声道通道。
    然后，它使用Librosa库提取所选的特征，并将它们连接成一个numpy数组，并返回该数组。

+    这段代码使用了Python中的with语句和soundfile库中的SoundFile类。
+    它的作用是打开名为file_name的音频文件，并将其作为sound_file对象传递给代码块，
+    以便在代码块中对该文件进行操作。
+    with语句的好处是，在代码块结束时，它会自动关闭文件句柄，无需手动关闭。
+    使用soundfile.SoundFile()函数创建的sound_file对象是一个上下文管理器，它提供了一些方法和属性，
+    可以用于读取和操作音频文件。在该函数中，我们使用sound_file对象读取音频文件，获取其采样率和数据类型等信息。
+    在代码块的最后，with语句自动关闭了sound_file对象，释放了与该文件的所有资源。
+    需要注意的是，在使用soundfile库打开音频文件时，我们可以使用with语句来确保文件句柄在使用完毕后被正确关闭。
+    这可以避免在操作大量音频文件时出现资源泄漏和文件句柄耗尽等问题。
+
    params:
    -
    Extract feature from audio file `file_name`
@@ -98,20 +111,13 @@ def extract_feature(audio_file_name, f_config):
    try:
        print(audio_file_name,"@{audio_file_name}")
        #考虑将此时的工作路径切换为项目根目录,以便利用相对路径访问文件
-        os.chdir(project_dir)
-        # sys.exist()
+        # os.chdir(project_dir)
+        p = Path(audio_file_name)
+        if p.is_file()==False:
+            raise FileNotFoundError(f"{p.absolute().resolve()} does not exist")
        with soundfile.SoundFile(audio_file_name) as sound_file:
            # 成功打开
            pass
-            # 这行代码使用了Python中的with语句和soundfile库中的SoundFile类。
-            # 它的作用是打开名为file_name的音频文件，并将其作为sound_file对象传递给代码块，
-            # 以便在代码块中对该文件进行操作。
-            # with语句的好处是，在代码块结束时，它会自动关闭文件句柄，无需手动关闭。
-            # 使用soundfile.SoundFile()函数创建的sound_file对象是一个上下文管理器，它提供了一些方法和属性，
-            # 可以用于读取和操作音频文件。在该函数中，我们使用sound_file对象读取音频文件，获取其采样率和数据类型等信息。
-            # 在代码块的最后，with语句自动关闭了sound_file对象，释放了与该文件的所有资源。
-            # 需要注意的是，在使用soundfile库打开音频文件时，我们可以使用with语句来确保文件句柄在使用完毕后被正确关闭。
-            # 这可以避免在操作大量音频文件时出现资源泄漏和文件句柄耗尽等问题。
    except RuntimeError:
        # not properly formated, convert to 16000 sample rate & mono channel using ffmpeg
        # get the basename
@@ -186,21 +192,8 @@ def extract_features_handler(new_filename, f_config):
        提取结果(shape=(n,))
    """
    with soundfile.SoundFile(new_filename) as sound_file:
-        X = sound_file.read(dtype="float32")
-        sample_rate = sound_file.samplerate
-        # print(f'{sample_rate=}')
-        # 根据参数情况,提取需要的情感特征
-        # 对于chroma和constrast两种特征,计算stft的幅值矩阵(复数取模,实数化)
-        stft = []
-        from config.EF import chroma, contrast, mel, mfcc, tonnetz
-        global extractors_debug
-        extractors1 = {mfcc: mfcc_extract, mel: mel_extract, tonnetz: tonnetz_extract}
-        extractors2 = {chroma: chroma_extract, contrast: contrast_extract}
+        X, sample_rate, extractors1, extractors2, stft = pre_calculate(f_config, sound_file)

-        extractors_debug=extractors1,extractors2
-
-        if chroma in f_config or contrast in f_config:
-            stft = stft_prepare(X)
        # 建立一个空数组来存储需要提取的特征
        result = np.array([])
        f_res=None
@@ -213,8 +206,25 @@ def extract_features_handler(new_filename, f_config):
                f_res=extractors2[f](sample_rate, stft)
            # print(f_res.shape,f,"@{f_res.shape}")#type:ignore
            result = np.hstack((result, f_res))
+            
+        # print(result.shape)
    return result

+def pre_calculate(f_config, sound_file):
+    X = sound_file.read(dtype="float32")
+    sample_rate = sound_file.samplerate
+        # print(f'{sample_rate=}')
+        # 根据参数情况,提取需要的情感特征
+        # 对于chroma和constrast两种特征,计算stft的幅值矩阵(复数取模,实数化)
+    from config.EF import chroma, contrast, mel, mfcc, tonnetz
+    extractors1 = {mfcc: mfcc_extract, mel: mel_extract, tonnetz: tonnetz_extract}
+    extractors2 = {chroma: chroma_extract, contrast: contrast_extract}
+
+    stft = []
+    if chroma in f_config or contrast in f_config:
+        stft = stft_prepare(X)
+    return X,sample_rate,extractors1,extractors2,stft
+

 def stft_prepare(X):
    # mfcc=True if mfcc in f_config else False
@@ -332,13 +342,23 @@ def best_estimators(classification_task=True,fast=True):
    return res

 def test1():
-    from config.EF import f_config_def
-    audio_path= "../data/emodb/wav/03a01Fa.wav"
-    features = extract_feature(audio_path, f_config_def)
+
+    
+    audio_path= speech_dbs_dir/"emodb/wav/03a01Fa.wav"
+    print(os.path.exists(audio_path))
+
+    features = extract_feature_of_audio(audio_path, f_config_def)
    return features

 if __name__ == "__main__":
-    audio_config = MCM
+    pass
    # res = get_audio_config(audio_config)
    # print(res)
-    res=best_estimators()
\ No newline at end of file
+    # res=best_estimators()
+
+
+    audio_path= speech_dbs_dir/"emodb/wav/03a01Fa.wav"
+    print(os.path.exists(audio_path))
+
+    features = extract_feature_of_audio(audio_path, f_config_def)
+
--- a/audio/extractor.py
+++ b/audio/extractor.py
@@ -16,7 +16,7 @@ from config.MetaPath import (
    validate_partition,
    ava_dbs,
 )
-from audio.core import extract_feature
+from audio.core import extract_feature_of_audio

 # from pathlib import Path
 Series = pd.Series
@@ -294,11 +294,12 @@ class AudioExtractor:
            print([id(attr) for attr in attributes])
        return attributes

-    def _extract_feature_in_meta(self, partition="", meta_path=""):
+    def _extract_feature_in_meta(self, partition="", meta_path="",verbose=1):
        """根据meta_files提取相应语音文件的特征
        这里仅完成单次提取

-        矩阵文件名中的e_config字段暂定为self.e_config,如果是这样,可能会和meta_path文件中的情感字段出现不一致的情况.
+        矩阵文件名中的e_config字段暂定为self.e_config
+        如果是这样,可能会和meta_path文件中的情感字段出现不一致的情况.

        Parameters
        ----------
@@ -308,31 +309,15 @@ class AudioExtractor:
            标记被提取文件是来自训练集还是测试集(验证集)
        """
        # 检查数据集是否按照配置的情感进行筛选和划分:
-        # if(not os.path.exists(meta_path)):
-        #     create_csv_by_metaname(meta_file=meta_path)
-        # self.load_metadata(meta_path)
-
+ 
        audio_paths, emotions = self.load_metadata(meta_path)
        # 将计算结果保存为对象属性
-
        self.audio_paths = audio_paths
        self.emotions = emotions
-        # 尝试计算语料库的名字(字段)
-        meta_name = os.path.basename(meta_path)
-        meta_name,ext=os.path.splitext(meta_name)
-        meta_fields = meta_name.split("_")
-        db = meta_fields[1]
-        # print(f"{meta_path=}@")
-        # print(f"{db=}@")

-        db = db if db in ava_dbs else ""
-        #计算情感字段
-        emotions_first_letters=meta_fields[-1]
-        origin_efls = get_first_letters(self.e_config)
-        if emotions_first_letters != origin_efls:
-            raise ValueError(
-                f"{emotions_first_letters} is not inconsistant with {self.e_config}"
-            )
+        # 尝试计算语料库的名字和情感配置名字
+        db = self.fields_parse(meta_path)
+        
        if not os.path.isdir(self.features_dir):
            os.mkdir(self.features_dir)

@@ -352,12 +337,9 @@ class AudioExtractor:
            self.features_dir,
            features_file_name,
        )
-
-        print(f"检查特征文件{features_file_path}是否存在...")
-        print(f"{self.e_config=}")
-
-        # if self.e_config == HNS:
-        #     raise ValueError(f"{self.e_config=}")
+        if verbose:
+            print(f"检查特征文件{features_file_path}是否存在...")
+            print(f"{self.e_config=}")
        
        ffp = os.path.isfile(features_file_path)
        if ffp:
@@ -370,11 +352,41 @@ class AudioExtractor:
            if self.verbose:
                print("npy文件不存在,尝试创建...")
            # 如果尚未提取过特征,则在此处进行提取,同时保存提取结果,以便下次直接使用
-            features = self.features_save(partition, audio_paths, features_file_path)
+            features = self.features_extract_save(partition, audio_paths, features_file_path)

        return features, audio_paths, emotions

-    def features_save(self, partition, audio_paths, features_file_path):
+    def fields_parse(self, meta_path):
+
+        #计算语料库字段名
+        meta_fields, db = self.db_field_parse(meta_path)
+
+        #计算情感字段并检查
+        self.validate_emotion_config_consistence(meta_fields)
+            
+        return db
+
+    def db_field_parse(self, meta_path):
+        meta_name = os.path.basename(meta_path)
+        meta_name,ext=os.path.splitext(meta_name)
+        meta_fields = meta_name.split("_")
+        db = meta_fields[1]
+        # print(f"{meta_path=}@")
+        # print(f"{db=}@")
+
+        db = db if db in ava_dbs else ""
+        return meta_fields,db
+
+    def validate_emotion_config_consistence(self, meta_fields):
+        emotions_first_letters=meta_fields[-1]
+        origin_efls = get_first_letters(self.e_config)
+        #检查情感配置是否具有一致性
+        if emotions_first_letters != origin_efls:
+            raise ValueError(
+                f"{emotions_first_letters} is not inconsistant with {self.e_config}"
+            )
+
+    def features_extract_save(self, partition, audio_paths, features_file_path):
        """将提取的特征(ndarray)保存持久化保存(为npy文件)
        利用qtmd提供可视化特征抽取进度

@@ -406,7 +418,7 @@ class AudioExtractor:
                print(f"正在抽取第{cnt}个文件的特征..")
            # 调用utils模块中的extract_featrue进行特征提取
            f_config = self.f_config
-            feature = extract_feature(audio_file, f_config=f_config)
+            feature = extract_feature_of_audio(audio_file, f_config=f_config)
            if self.feature_dimension is None:
                # MCM特征组合下(3特征),有180维的单轴数组,5特征下,有193维
                self.feature_dimension = feature.shape[0]

--- a/recognizer/__pycache__/basic.cpython-39.pyc
+++ b/recognizer/__pycache__/basic.cpython-39.pyc
--- a/recognizer/basic.py
+++ b/recognizer/basic.py
@@ -20,7 +20,7 @@ from audio.extractor import load_data_from_meta
 from config.EF import (e_config_def, f_config_def, validate_emotions)
 from config.MetaPath import (emodb, meta_paths_of_db, ravdess, savee,validate_partition,project_dir)
 import config.MetaPath as meta
-from audio.core import best_estimators, extract_feature
+from audio.core import best_estimators, extract_feature_of_audio

 ##
 class EmotionRecognizer:
@@ -219,7 +219,7 @@ class EmotionRecognizer:
        given an `audio_path`, this method extracts the features
        and predicts the emotion
        """
-        feature1 = extract_feature(audio_path, self.f_config)
+        feature1 = extract_feature_of_audio(audio_path, self.f_config)
        # print(feature1.shape)
        # print(feature1,"@{feature1}",feature1.shape)
        # feature2=feature1.T
@@ -248,7 +248,7 @@ class EmotionRecognizer:
        Predicts the probability of each emotion.
        """
        if self.classification_task:
-            feature = extract_feature(audio_path, self.f_config).reshape(1, -1)
+            feature = extract_feature_of_audio(audio_path, self.f_config).reshape(1, -1)
            proba = self.model.predict_proba(feature)[0]
            result = {}
            for emotion, prob in zip(self.model.classes_, proba):

--- a/recognizer/deep.py
+++ b/recognizer/deep.py
@@ -22,7 +22,7 @@ from config.EF import validate_emotions
 from recognizer.basic import EmotionRecognizer
 # from ER import EmotionRecognizer
 from config.MetaPath import get_first_letters
-from audio.core import extract_feature, get_dropout_str
+from audio.core import extract_feature_of_audio, get_dropout_str


 class DeepEmotionRecognizer(EmotionRecognizer):
@@ -337,7 +337,7 @@ class DeepEmotionRecognizer(EmotionRecognizer):
            print("[+] Model trained")

    def predict(self, audio_path):
-        feature = extract_feature(audio_path, **self._f_config_dict).reshape(
+        feature = extract_feature_of_audio(audio_path, **self._f_config_dict).reshape(
            (1, 1, self.input_length)
        )
        if self.classification_task:
@@ -349,7 +349,7 @@ class DeepEmotionRecognizer(EmotionRecognizer):

    def predict_proba(self, audio_path):
        if self.classification_task:
-            feature = extract_feature(audio_path, **self._f_config_dict).reshape(
+            feature = extract_feature_of_audio(audio_path, **self._f_config_dict).reshape(
                (1, 1, self.input_length)
            )
            proba = self.model.predict(feature)[0][0]