diff --git a/SG/ccser_gui.py b/SG/ccser_gui.py index 4dece47a2540dbe02e90d2c8fa31ca0feb01b143..550de824d4abd3df86c3f5657e8fec13d66c9dbf 100644 --- a/SG/ccser_gui.py +++ b/SG/ccser_gui.py @@ -10,11 +10,16 @@ import ipdb import numpy as np import PySimpleGUI as sg import query as q -from constants.beauty import (ccser_theme, db_introduction, h2, logo, - option_frame, result_frame) +from constants.beauty import ( + ccser_theme, + db_introduction, + h2, + logo, + option_frame, + result_frame, +) from constants.uiconfig import ML_KEY, __version__ -from demo_programs.Demo_Nice_Buttons import (image_file_to_bytes, red_pill64, - wcolor) +from demo_programs.Demo_Nice_Buttons import image_file_to_bytes, red_pill64, wcolor from fviewer import audio_viewer_layout, fviewer_events, selected_files from joblib import load from multilanguage import get_your_language_translator @@ -30,9 +35,16 @@ import sys from audio.core import get_used_keys from audio.graph import showFreqGraph, showMelFreqGraph, showWaveForm from config.EF import ava_algorithms, ava_emotions, ava_features -from config.MetaPath import (ava_dbs, bclf, brgr, emodb, - get_example_audio_file, ravdess, savee, - speech_dbs_dir) +from config.MetaPath import ( + ava_dbs, + bclf, + brgr, + emodb, + get_example_audio_file, + ravdess, + savee, + speech_dbs_dir, +) def import_config_bookmark(): @@ -101,6 +113,25 @@ def get_algos_elements_list(ava_algorithms=ava_algorithms): ) return algos_radios +def get_train_fit_start_layout(): + train_fit_start_layout = [ + [ + # sg.Button('start train'), + sg.RButton( + "start train", + image_data=image_file_to_bytes(red_pill64, (100, 50)), + button_color=("white", "white"), + # button_color=wcolor, + font="Any 15", + pad=(0, 0), + key="start train", + ), + sg.pin(sg.T("current model:", key=current_model_tip_key, visible=False)), + sg.T("", key=current_model_key), + ] + ] + + return train_fit_start_layout ## # ---create the window--- @@ -109,99 +140,193 @@ def make_window(theme=None, size=None): # print(theme) sg.theme(theme) menu_def = [["&Application", ["E&xit"]], ["Help", ["Introduction"]]] + # ---user register and login--- + user_layout = get_user_layout() # ---choose theme--- - theme_layout = [ - [ - sg.Text( - "See how elements look under different themes by choosing a different theme here!" - ) - ], - [ - sg.Listbox( - values=sg.theme_list(), - size=bt.lb_size, - key="-THEME LISTBOX-", - enable_events=True, - ) - ], - [sg.Button("Set Theme")], - ] + theme_layout = get_theme_layout() # ---file viewer-- # file_viewer_layout = file_view_layout() # ---create 2 column layout--- # ---column left--- + db_choose_layout = get_db_choose_layout() + e_config_layout = get_e_config_layout() + f_config_layout = get_f_config_layout() + algos_layout = get_algo_layout() + other_settings_frame_layout = get_other_settings_layout() + train_fit_start_layout = get_train_fit_start_layout() + train_result_tables_layout = get_train_res_tables_layout() + train_result_frame_layout = train_res_frame_layout(train_result_tables_layout) + + confution_matrix_button_layout = [ + [sg.B("show confusion matrix", key=show_confusion_matrix_key)], + ] + + file_choose_layout = get_file_choose_layout() + predict_res_frames_layout = get_predict_res_layout() + draw_layout = get_draw_layout() + + # ---column left--- + + left_col_layout = ( + db_choose_layout + + e_config_layout + + f_config_layout + + algos_layout + + other_settings_frame_layout + + train_fit_start_layout + + train_result_frame_layout + + confution_matrix_button_layout + + file_choose_layout + + predict_res_frames_layout + + draw_layout + # + file_viewer_layout + ) + + left_column = sg.Column( + left_col_layout, + expand_x=True, + expand_y=True, + element_justification="l", + scrollable=True, + vertical_scroll_only=True, + ) + # ---column right--- + + info_layout = get_info_layout() + + # output tab + analyzer_layout = get_analyzer_layout() + + settings_layout = [ + [sg.Text("Settings")], + ] + theme_layout + about_layout = info_layout + # ---column right--- + right_column_layout = audio_viewer_layout + get_logging_viewer_layout() + + right_column = sg.Column( + right_column_layout, + expand_x=True, + expand_y=True, + element_justification="l", + scrollable=True, + vertical_scroll_only=True, + ) + + main_pane = sg.Pane( + [ + left_column, + # [sg.VerticalSeparator(pad=None)], + # column_middle_separator, + right_column, + ], + orientation="h", + expand_x=True, + expand_y=True, + k="-PANE-", + ) + main_pane_layout = [[left_column, right_column]] + + main_tab_layout = get_title_layout() + main_pane_layout + + # --top Menu bar--- + Menubar_layout = [ + [sg.MenubarCustom(menu_def, key="-MENU-", font="Courier 15", tearoff=True)] + ] + + # ---tabs--- + tabs_layout = [ + [ + sg.TabGroup( + [ + [ + sg.Tab("WelcomeUser", user_layout), + sg.Tab("MainPage", main_tab_layout), + sg.Tab("Analyzer", analyzer_layout), + sg.Tab("Settings", settings_layout), + sg.Tab("about", about_layout), + ] + ], + key="-TAB GROUP-", + expand_x=True, + expand_y=True, + ), + ] + ] + # --full layout-- + layout = Menubar_layout + tabs_layout + + # ---create window--- + window = sg.Window( + title="ccser_client", + layout=layout, + # alpha_channel=0.9, + resizable=True, + size=size, + ) + return window + +def get_user_layout(): + global userUI + userUI = UserAuthenticatorGUI() + user_layout = [ + # [sg.Text("Welcome:"),sg.Text("User",key=current_user_key)], + # [sg.Input(default_text="user name or ID",key="-USER-")], + # [sg.Input(default_text="password",key="-PASSWORD-")], + ] + userUI.create_user_layout() + + return user_layout + + +def train_res_frame_layout(train_result_tables_layout): + train_result_frame_layout = [ + [ + bt.result_frame( + title=lang["train_result_title"], + layout=train_result_tables_layout, + frame_key="train_result_frame", + ), + ] + ] + + return train_result_frame_layout + + + + +def get_db_choose_layout(): db_choose_layout = [ [bt.h2("Select the training database")], [sg.Combo(ava_dbs, key="train_db", default_value=emodb, enable_events=True)], [bt.h2("Select the testing database")], [sg.Combo(ava_dbs, key="test_db", default_value=emodb, enable_events=True)], - ] # shape=(-1,1) - - # [sg.Checkbox(emo) for emo in ava_emotions] - emotion_config_checboxes_layout = [ - [ - sg.Checkbox("angry", key="angry", default=True, enable_events=True), - sg.Checkbox("happy", key="happy", enable_events=True), - sg.Checkbox("neutral", key="neutral", default=True, enable_events=True), - sg.Checkbox("ps", key="ps", enable_events=True), - sg.Checkbox("sad", key="sad", default=True, enable_events=True), - sg.Checkbox("others", key="others", default=True, enable_events=True), - ] ] - e_config_layout = [ + return db_choose_layout + + +def get_theme_layout(): + theme_layout = [ [ - bt.h2( - text="choose the emotion config", - # relief=sg.RELIEF_SOLID, - # style_add='underline', - style_add="italic", - tooltip=lang["choose_emotion_config"], - ), + sg.Text( + "See how elements look under different themes by choosing a different theme here!" + ) ], [ - bt.option_frame( - title="Emotion Config chooser", layout=emotion_config_checboxes_layout + sg.Listbox( + values=sg.theme_list(), + size=bt.lb_size, + key="-THEME LISTBOX-", + enable_events=True, ) ], + [sg.Button("Set Theme")], ] - f_config_option_frame = option_frame( - title="Feature Config chooser", - layout=[ - [ - sg.Checkbox("MFCC", key="mfcc", default=True, enable_events=True), - sg.Checkbox("Mel", key="mel", enable_events=True), - sg.Checkbox("Contrast", key="contrast", enable_events=True), - # 可以考虑在这里换行 - # ], - # [ - sg.Checkbox("Chromagram", key="chroma", enable_events=True), - sg.Checkbox("Tonnetz", key="tonnetz", enable_events=True), - ], - ], - frame_key="f_config_layout", - ) - f_config_layout = [ - [bt.h2(lang["choose_feature_config"])], - [f_config_option_frame], - ] - # ---column right--- - algos = get_algos_elements_list() - len_of_algos = len(algos) - algo_frame = option_frame( - title="Algorithms chooser", - layout=[ - algos[: len_of_algos // 2], - algos[len_of_algos // 2 :], - ], - frame_key="algo_border_frame", - ) - algos_layout = [ - [bt.h2(lang["choose_algorithm"])], - [algo_frame], - ] + return theme_layout + +def get_file_choose_layout(): file_choose_layout = [ [bt.h2(lang["choose_audio"])], [ @@ -226,7 +351,12 @@ def make_window(theme=None, size=None): ), ], ] - train_result_table_layout = [ + + return file_choose_layout + + +def get_train_res_tables_layout(): + train_result_tables_layout = [ [ sg.Table( values=[["pending"] * 2], @@ -252,102 +382,122 @@ def make_window(theme=None, size=None): visible=False, ) ], - # [ - # sg.Table( - # values=[["pending"] * 2], - # headings=["confusion_matrix", "accu_score"], - # justification="center", - # font="Arial 16", - # expand_x=True, - # key=train_cv_result_table_key, - # num_rows=1, # 默认表格会有一定的高度,这里设置为1,避免出现空白 - # hide_vertical_scroll=True, - # visible=False, - # ) - # ] - ] - confution_matrix_button_layout=[ - [sg.B("show confusion matrix", key=show_confusion_matrix_key)], ] - cv_mode_layout = [ + return train_result_tables_layout + + +def get_title_layout(): + return [ [ - sg.T("cv mode:"), - sg.Radio( - "k-fold", - group_id="cv_mode", - key=kfold_radio_key, - default=False, - enable_events=True, - ), - sg.Radio( - "shuffle-split", - group_id="cv_mode", - key=shuffle_split_radio_key, - default=False, - enable_events=True, - ), - sg.Radio( - "stratified-shuffle-split", - group_id="cv_mode", - key=stratified_shuffle_split_radio_key, - default=True, + sg.Text( + # "Welcome to experience CCSER Client!", + lang["welcome_title"], + size=bt.welcom_title_size, + justification="center", + font=("Comic", 50), + relief=sg.RELIEF_RIDGE, + k="-TEXT HEADING-", enable_events=True, - ), - ] - ] - cv_param_settings_layout = [ - [ - sg.T("cv splits:"), - sg.Slider( - range=(1, 10), - key=cv_splits_slider_key, - orientation="h", expand_x=True, - default_value=5, - enable_events=True, - ), + ) ], - *cv_mode_layout, ] - other_settings_frame_layout = [ + + +def get_draw_layout(): + draw_layout = [ + [bt.h2(lang["draw_diagram"], tooltip=lang["draw_diagram_detail"])], + # [sg.Input(), sg.FileBrowse()], [ - bt.option_frame( - title="Other Parameter Settings", layout=cv_param_settings_layout - ), + sg.Checkbox("waveForm", key="wave_form"), + sg.Checkbox("FreqGraph", key="freq_graph"), + sg.Checkbox("MelFreqGraph", key="mel_freq_graph"), ], + # todo reset + [sg.Button("draw_graph"), sg.Button("Reset", key="reset graph Checkbox")], ] - train_fit_button_layout = [ + + return draw_layout + + +def get_logging_viewer_layout(): + return [ + [sg.Text("dev logging tool:")], + [sg.HorizontalSeparator(color=bt.seperator_color)], [ - # sg.Button('start train'), - sg.RButton( - "start train", - image_data=image_file_to_bytes(red_pill64, (100, 50)), - button_color=("white", "white"), - # button_color=wcolor, - font="Any 15", - pad=(0, 0), - key="start train", - ), - sg.pin(sg.T("current model:", key=current_model_tip_key, visible=False)), - sg.T("", key=current_model_key), - ] + sg.Multiline( + size=bt.ml_size, + write_only=True, + # expand_x=True, + expand_y=True, + key=ML_KEY, + reroute_stdout=True, + echo_stdout_stderr=True, + reroute_cprint=True, + auto_refresh=True, + autoscroll=True, + ) + ], ] - train_result_frame_layout = [ + +def get_analyzer_layout(): + analyzer_layout = ( [ - bt.result_frame( - title=lang["train_result_title"], - layout=train_result_table_layout, - frame_key="train_result_frame", - ), + [bt.h2("Anything printed will display here!")], + [ + sg.Multiline( + size=bt.ml_size, + # expand_x=True, + # expand_y=True, + write_only=True, + reroute_stdout=True, + reroute_stderr=True, + echo_stdout_stderr=True, + autoscroll=True, + auto_refresh=True, + ) + ], ] + + dv.layout + + q.query_layout + ) + + return analyzer_layout + + +def get_info_layout(): + info_layout = [ + [sg.T("CCSER Client By Cxxu_zjgsu " + __version__)], + [ + sg.T( + "PySimpleGUI ver " + + sg.version.split(" ")[0] + + " tkinter ver " + + sg.tclversion_detailed, + font="Default 8", + pad=(0, 0), + ) + ], + [sg.T("Python ver " + sys.version, font="Default 8", pad=(0, 0))], + [ + sg.T( + "Interpreter " + sg.execute_py_get_interpreter(), + font="Default 8", + pad=(0, 0), + ) + ], ] - # bt.res_content_layout(no_result_yet,res_key='train_result') + + return info_layout + + +def get_predict_res_layout(): predict_res_layout = bt.res_content_layout( text=no_result_yet, justification="c", key=predict_res_key ) - # predict_proba_tips_layout = [[sg.Text("pending", key=predict_proba_tips_key)]] + # 默认不显示predict_proba的不可用说明 predict_proba_tips_layout = bt.normal_content_layout( text="pending", key=predict_proba_tips_key, visible=False @@ -388,209 +538,137 @@ def make_window(theme=None, size=None): # ), # ], ] - draw_layout = [ - [bt.h2(lang["draw_diagram"], tooltip=lang["draw_diagram_detail"])], - # [sg.Input(), sg.FileBrowse()], + + return predict_res_frames_layout + + +def get_other_settings_layout(): + cv_mode_layout = [ [ - sg.Checkbox("waveForm", key="wave_form"), - sg.Checkbox("FreqGraph", key="freq_graph"), - sg.Checkbox("MelFreqGraph", key="mel_freq_graph"), - ], - # todo reset - [sg.Button("draw_graph"), sg.Button("Reset", key="reset graph Checkbox")], + sg.T("cv mode:"), + sg.Radio( + "k-fold", + group_id="cv_mode", + key=kfold_radio_key, + default=False, + enable_events=True, + ), + sg.Radio( + "shuffle-split", + group_id="cv_mode", + key=shuffle_split_radio_key, + default=False, + enable_events=True, + ), + sg.Radio( + "stratified-shuffle-split", + group_id="cv_mode", + key=stratified_shuffle_split_radio_key, + default=True, + enable_events=True, + ), + ] ] - - info_layout = [ - [sg.T("CCSER Client By Cxxu_zjgsu " + __version__)], + cv_param_settings_layout = [ [ - sg.T( - "PySimpleGUI ver " - + sg.version.split(" ")[0] - + " tkinter ver " - + sg.tclversion_detailed, - font="Default 8", - pad=(0, 0), - ) + sg.T("cv splits:"), + sg.Slider( + range=(1, 10), + key=cv_splits_slider_key, + orientation="h", + expand_x=True, + default_value=5, + enable_events=True, + ), ], - [sg.T("Python ver " + sys.version, font="Default 8", pad=(0, 0))], + *cv_mode_layout, + ] + other_settings_frame_layout = [ [ - sg.T( - "Interpreter " + sg.execute_py_get_interpreter(), - font="Default 8", - pad=(0, 0), - ) + bt.option_frame( + title="Other Parameter Settings", layout=cv_param_settings_layout + ), ], ] - # dbs_introduce_layout=[ - # [sg.Text("数据库选择")], - # ] - # output tab - analyzer_layout = ( - [ - [bt.h2("Anything printed will display here!")], - [ - sg.Multiline( - size=bt.ml_size, - # expand_x=True, - # expand_y=True, - write_only=True, - reroute_stdout=True, - reroute_stderr=True, - echo_stdout_stderr=True, - autoscroll=True, - auto_refresh=True, - ) - ], - ] - + dv.layout - + q.query_layout - ) - settings_layout = [ - [sg.Text("Settings")], - ] + theme_layout - about_layout = info_layout - # ---column left--- + return other_settings_frame_layout - left_col_layout = ( - db_choose_layout - + e_config_layout - + f_config_layout - + algos_layout - + other_settings_frame_layout - + train_fit_button_layout - + train_result_frame_layout - + confution_matrix_button_layout - + file_choose_layout - + predict_res_frames_layout - + draw_layout - # + file_viewer_layout - ) - right_column_layout = ( - # [ - # [ - # sg.Button("open folder"), - # sg.Text("", key="speech_folder_path"), - # ], - # ] - # + - audio_viewer_layout - + [ - [sg.Text("dev logging tool:")], - [sg.HorizontalSeparator(color=bt.seperator_color)], - [ - sg.Multiline( - size=bt.ml_size, - write_only=True, - # expand_x=True, - expand_y=True, - key=ML_KEY, - reroute_stdout=True, - echo_stdout_stderr=True, - reroute_cprint=True, - auto_refresh=True, - autoscroll=True, - ) - ], - ] - ) - left_column = sg.Column( - left_col_layout, - expand_x=True, - expand_y=True, - element_justification="l", - scrollable=True, - vertical_scroll_only=True, - ) - # column_middle_separator = sg.Column([[sg.VerticalSeparator()]], background_color='yellow') - right_column = sg.Column( - right_column_layout, - expand_x=True, - expand_y=True, - element_justification="l", - scrollable=True, - vertical_scroll_only=True, - ) +def get_algo_layout(): + algos = get_algos_elements_list() + len_of_algos = len(algos) - main_pane = sg.Pane( - [ - left_column, - # [sg.VerticalSeparator(pad=None)], - # column_middle_separator, - right_column, + algo_frame = option_frame( + title="Algorithms chooser", + layout=[ + algos[: len_of_algos // 2], + algos[len_of_algos // 2 :], ], - orientation="h", - expand_x=True, - expand_y=True, - k="-PANE-", + frame_key="algo_border_frame", ) - main_pane_layout = [[left_column, right_column]] - global userUI - userUI = UserAuthenticatorGUI() - user_layout = [ - # [sg.Text("Welcome:"),sg.Text("User",key=current_user_key)], - # [sg.Input(default_text="user name or ID",key="-USER-")], - # [sg.Input(default_text="password",key="-PASSWORD-")], - ] + userUI.create_user_layout() + algos_layout = [ + [bt.h2(lang["choose_algorithm"])], + [algo_frame], + ] - main_tab_layout = [ - [ - sg.Text( - # "Welcome to experience CCSER Client!", - lang["welcome_title"], - size=bt.welcom_title_size, - justification="center", - font=("Comic", 50), - relief=sg.RELIEF_RIDGE, - k="-TEXT HEADING-", - enable_events=True, - expand_x=True, - ) - ], - # [main_pane], - # [sg.B(ufg.close)], - ] + main_pane_layout - # main_page_layout = main_tab_layout + return algos_layout - # ----full layout---- - # --top Menu bar--- - Menubar_layout = [ - [sg.MenubarCustom(menu_def, key="-MENU-", font="Courier 15", tearoff=True)] + +def get_e_config_layout(): + emotion_config_checboxes_layout = [ + [ + sg.Checkbox("angry", key="angry", default=True, enable_events=True), + sg.Checkbox("happy", key="happy", enable_events=True), + sg.Checkbox("neutral", key="neutral", default=True, enable_events=True), + sg.Checkbox("ps", key="ps", enable_events=True), + sg.Checkbox("sad", key="sad", default=True, enable_events=True), + sg.Checkbox("others", key="others", default=True, enable_events=True), + ] ] - # ---tabs--- - tabs_layout = [ + e_config_layout = [ [ - sg.TabGroup( - [ - [ - sg.Tab("WelcomeUser", user_layout), - sg.Tab("MainPage", main_tab_layout), - sg.Tab("Analyzer", analyzer_layout), - sg.Tab("Settings", settings_layout), - sg.Tab("about", about_layout), - ] - ], - key="-TAB GROUP-", - expand_x=True, - expand_y=True, + bt.h2( + text="choose the emotion config", + # relief=sg.RELIEF_SOLID, + # style_add='underline', + style_add="italic", + tooltip=lang["choose_emotion_config"], ), - ] + ], + [ + bt.option_frame( + title="Emotion Config chooser", layout=emotion_config_checboxes_layout + ) + ], ] - layout = Menubar_layout + tabs_layout - # layout=theme_layout + return e_config_layout - # ---create window--- - window = sg.Window( - title="ccser_client", - layout=layout, - # alpha_channel=0.9, - resizable=True, - size=size, + +def get_f_config_layout(): + f_config_option_frame = option_frame( + title="Feature Config chooser", + layout=[ + [ + sg.Checkbox("MFCC", key="mfcc", default=True, enable_events=True), + sg.Checkbox("Mel", key="mel", enable_events=True), + sg.Checkbox("Contrast", key="contrast", enable_events=True), + # 可以考虑在这里换行 + # ], + # [ + sg.Checkbox("Chromagram", key="chroma", enable_events=True), + sg.Checkbox("Tonnetz", key="tonnetz", enable_events=True), + ], + ], + frame_key="f_config_layout", ) - return window + f_config_layout = [ + [bt.h2(lang["choose_feature_config"])], + [f_config_option_frame], + ] + + return f_config_layout def initial(values=None, verbose=1): @@ -630,7 +708,7 @@ def selected_features(values): return f_config -def selected_radio_in(values,ava_list=ava_algorithms): +def selected_radio_in(values, ava_list=ava_algorithms): # global algorithm # res="" for algo_name in ava_list: @@ -722,7 +800,7 @@ def recognize_auido( data = list(predict_proba.items()) # print(data,"@{data}") - data = [[emo, round(proba,bt.score_ndigits)] for emo, proba in data] + data = [[emo, round(proba, bt.score_ndigits)] for emo, proba in data] # 关闭proba_tip的显示 window[predict_proba_tips_key].update(visible=False) # 更新proba表格内容 @@ -732,7 +810,7 @@ def recognize_auido( ppt.update( values=data, num_rows=4, - #display_row_numbers=True + # display_row_numbers=True visible=True, ) # window[] @@ -793,6 +871,7 @@ def start_train_model( if algorithm == "RNN": from recognizer.deep import DeepEmotionRecognizer + der = DeepEmotionRecognizer( train_dbs=train_db, test_dbs=test_db, e_config=e_config, f_config=f_config ) @@ -961,11 +1040,12 @@ def main(verbose=1): content = [logo, db_introduction] res = "\n".join(content) sg.popup_scrolled(res, size=(150, 100), title="Introduction") - elif event==show_confusion_matrix_key: - from SG.demo_pandas_table import TablePandas - cm=er.confusion_matrix() - tp=TablePandas(df=cm) - tp.show_confution_matrix_window() + elif event == show_confusion_matrix_key: + from SG.demo_pandas_table import TablePandas + + cm = er.confusion_matrix() + tp = TablePandas(df=cm) + tp.show_confution_matrix_window() else: # 具有独立的事件循环,直接调用即可 userUI.run_module(event, values, window=window, verbose=1) @@ -1006,10 +1086,10 @@ def refresh_trained_view(verbose, window, er, values): window[current_model_key].update(value=er.model) n_splits = values[cv_splits_slider_key] # cv_mode=values[kfold_radio_key] - cv_mode=selected_radio_in(values,ava_list=ava_cv_modes) + cv_mode = selected_radio_in(values, ava_list=ava_cv_modes) # print(cv_mode,"@{cv_mode}🎈") - fold_scores = er.model_cv_score(mean_only=False, n_splits=n_splits,cv_mode=cv_mode) + fold_scores = er.model_cv_score(mean_only=False, n_splits=n_splits, cv_mode=cv_mode) folds = len(fold_scores) mean_score = np.mean(fold_scores) fold_scores_rows = [ @@ -1023,7 +1103,6 @@ def refresh_trained_view(verbose, window, er, values): tcrt(values=fold_scores_rows, num_rows=folds + 1, visible=True) - def open_folder_event(window): print("[LOG] Clicked Open Folder!") folder_or_file = sg.popup_get_folder( diff --git a/SG/constants/__pycache__/beauty.cpython-310.pyc b/SG/constants/__pycache__/beauty.cpython-310.pyc index 90338821ec6071bc2ca7c4d0ceb8e83d2769e20b..a330cc235ff02f8bb5b217f30a13d779803bf2dc 100644 Binary files a/SG/constants/__pycache__/beauty.cpython-310.pyc and b/SG/constants/__pycache__/beauty.cpython-310.pyc differ diff --git a/audio/__pycache__/core.cpython-39.pyc b/audio/__pycache__/core.cpython-39.pyc index 7421362b8f0051ddb20a819df8f127a0e1ca1f71..eecedba13aa9babc844724f39bea087c82a17e83 100644 Binary files a/audio/__pycache__/core.cpython-39.pyc and b/audio/__pycache__/core.cpython-39.pyc differ diff --git a/audio/__pycache__/extractor.cpython-39.pyc b/audio/__pycache__/extractor.cpython-39.pyc index 004d0726b3cc7dc7f591229894c269f4c97308e4..0a1ec128f4f5efd1e66d5235972eb7b55f75318e 100644 Binary files a/audio/__pycache__/extractor.cpython-39.pyc and b/audio/__pycache__/extractor.cpython-39.pyc differ diff --git a/audio/core.py b/audio/core.py index e39745695b214280f3cdb0d6696311d34c0d1dba..9ff1515e703d9471fe66b07d5f64e377ba347f96 100644 --- a/audio/core.py +++ b/audio/core.py @@ -1,15 +1,18 @@ ## import os +from pathlib import Path import librosa import numpy as np import soundfile from joblib import load +from sklearn.preprocessing import StandardScaler from audio.converter import convert_audio -from config.EF import MCM, ava_features -from config.MetaPath import bclf, brgr,project_dir +from config.EF import MCM, ava_features, f_config_def +from config.MetaPath import bclf, brgr, project_dir,speech_dbs_dir + def get_used_keys(config_dict): """将传入的字典中值为True的key添加到列表中并返回 @@ -60,7 +63,7 @@ def get_dropout_str(dropout, n_layers=3): return "_".join([str(dropout) for _ in range(n_layers)]) -def extract_feature(audio_file_name, f_config): +def extract_feature_of_audio(audio_file_name, f_config): """ 用于从音频文件中提取音频特征。该函数支持提取多种不同的特征, 包括MFCC、Chroma、MEL Spectrogram Frequency、Contrast和Tonnetz。 @@ -70,6 +73,16 @@ def extract_feature(audio_file_name, f_config): 在函数的实现中,它首先检查音频文件的格式是否正确,如果不正确,则将其转换为16000采样率和单声道通道。 然后,它使用Librosa库提取所选的特征,并将它们连接成一个numpy数组,并返回该数组。 + 这段代码使用了Python中的with语句和soundfile库中的SoundFile类。 + 它的作用是打开名为file_name的音频文件,并将其作为sound_file对象传递给代码块, + 以便在代码块中对该文件进行操作。 + with语句的好处是,在代码块结束时,它会自动关闭文件句柄,无需手动关闭。 + 使用soundfile.SoundFile()函数创建的sound_file对象是一个上下文管理器,它提供了一些方法和属性, + 可以用于读取和操作音频文件。在该函数中,我们使用sound_file对象读取音频文件,获取其采样率和数据类型等信息。 + 在代码块的最后,with语句自动关闭了sound_file对象,释放了与该文件的所有资源。 + 需要注意的是,在使用soundfile库打开音频文件时,我们可以使用with语句来确保文件句柄在使用完毕后被正确关闭。 + 这可以避免在操作大量音频文件时出现资源泄漏和文件句柄耗尽等问题。 + params: - Extract feature from audio file `file_name` @@ -98,20 +111,13 @@ def extract_feature(audio_file_name, f_config): try: print(audio_file_name,"@{audio_file_name}") #考虑将此时的工作路径切换为项目根目录,以便利用相对路径访问文件 - os.chdir(project_dir) - # sys.exist() + # os.chdir(project_dir) + p = Path(audio_file_name) + if p.is_file()==False: + raise FileNotFoundError(f"{p.absolute().resolve()} does not exist") with soundfile.SoundFile(audio_file_name) as sound_file: # 成功打开 pass - # 这行代码使用了Python中的with语句和soundfile库中的SoundFile类。 - # 它的作用是打开名为file_name的音频文件,并将其作为sound_file对象传递给代码块, - # 以便在代码块中对该文件进行操作。 - # with语句的好处是,在代码块结束时,它会自动关闭文件句柄,无需手动关闭。 - # 使用soundfile.SoundFile()函数创建的sound_file对象是一个上下文管理器,它提供了一些方法和属性, - # 可以用于读取和操作音频文件。在该函数中,我们使用sound_file对象读取音频文件,获取其采样率和数据类型等信息。 - # 在代码块的最后,with语句自动关闭了sound_file对象,释放了与该文件的所有资源。 - # 需要注意的是,在使用soundfile库打开音频文件时,我们可以使用with语句来确保文件句柄在使用完毕后被正确关闭。 - # 这可以避免在操作大量音频文件时出现资源泄漏和文件句柄耗尽等问题。 except RuntimeError: # not properly formated, convert to 16000 sample rate & mono channel using ffmpeg # get the basename @@ -186,21 +192,8 @@ def extract_features_handler(new_filename, f_config): 提取结果(shape=(n,)) """ with soundfile.SoundFile(new_filename) as sound_file: - X = sound_file.read(dtype="float32") - sample_rate = sound_file.samplerate - # print(f'{sample_rate=}') - # 根据参数情况,提取需要的情感特征 - # 对于chroma和constrast两种特征,计算stft的幅值矩阵(复数取模,实数化) - stft = [] - from config.EF import chroma, contrast, mel, mfcc, tonnetz - global extractors_debug - extractors1 = {mfcc: mfcc_extract, mel: mel_extract, tonnetz: tonnetz_extract} - extractors2 = {chroma: chroma_extract, contrast: contrast_extract} + X, sample_rate, extractors1, extractors2, stft = pre_calculate(f_config, sound_file) - extractors_debug=extractors1,extractors2 - - if chroma in f_config or contrast in f_config: - stft = stft_prepare(X) # 建立一个空数组来存储需要提取的特征 result = np.array([]) f_res=None @@ -213,8 +206,25 @@ def extract_features_handler(new_filename, f_config): f_res=extractors2[f](sample_rate, stft) # print(f_res.shape,f,"@{f_res.shape}")#type:ignore result = np.hstack((result, f_res)) + + # print(result.shape) return result +def pre_calculate(f_config, sound_file): + X = sound_file.read(dtype="float32") + sample_rate = sound_file.samplerate + # print(f'{sample_rate=}') + # 根据参数情况,提取需要的情感特征 + # 对于chroma和constrast两种特征,计算stft的幅值矩阵(复数取模,实数化) + from config.EF import chroma, contrast, mel, mfcc, tonnetz + extractors1 = {mfcc: mfcc_extract, mel: mel_extract, tonnetz: tonnetz_extract} + extractors2 = {chroma: chroma_extract, contrast: contrast_extract} + + stft = [] + if chroma in f_config or contrast in f_config: + stft = stft_prepare(X) + return X,sample_rate,extractors1,extractors2,stft + def stft_prepare(X): # mfcc=True if mfcc in f_config else False @@ -332,13 +342,23 @@ def best_estimators(classification_task=True,fast=True): return res def test1(): - from config.EF import f_config_def - audio_path= "../data/emodb/wav/03a01Fa.wav" - features = extract_feature(audio_path, f_config_def) + + + audio_path= speech_dbs_dir/"emodb/wav/03a01Fa.wav" + print(os.path.exists(audio_path)) + + features = extract_feature_of_audio(audio_path, f_config_def) return features if __name__ == "__main__": - audio_config = MCM + pass # res = get_audio_config(audio_config) # print(res) - res=best_estimators() \ No newline at end of file + # res=best_estimators() + + + audio_path= speech_dbs_dir/"emodb/wav/03a01Fa.wav" + print(os.path.exists(audio_path)) + + features = extract_feature_of_audio(audio_path, f_config_def) + diff --git a/audio/extractor.py b/audio/extractor.py index 3e94de1c44baa76f6016e95f06e199ddfe2ffcd6..bd52364888d3945089677f0668f52e7f83fdbb5b 100644 --- a/audio/extractor.py +++ b/audio/extractor.py @@ -16,7 +16,7 @@ from config.MetaPath import ( validate_partition, ava_dbs, ) -from audio.core import extract_feature +from audio.core import extract_feature_of_audio # from pathlib import Path Series = pd.Series @@ -294,11 +294,12 @@ class AudioExtractor: print([id(attr) for attr in attributes]) return attributes - def _extract_feature_in_meta(self, partition="", meta_path=""): + def _extract_feature_in_meta(self, partition="", meta_path="",verbose=1): """根据meta_files提取相应语音文件的特征 这里仅完成单次提取 - 矩阵文件名中的e_config字段暂定为self.e_config,如果是这样,可能会和meta_path文件中的情感字段出现不一致的情况. + 矩阵文件名中的e_config字段暂定为self.e_config + 如果是这样,可能会和meta_path文件中的情感字段出现不一致的情况. Parameters ---------- @@ -308,31 +309,15 @@ class AudioExtractor: 标记被提取文件是来自训练集还是测试集(验证集) """ # 检查数据集是否按照配置的情感进行筛选和划分: - # if(not os.path.exists(meta_path)): - # create_csv_by_metaname(meta_file=meta_path) - # self.load_metadata(meta_path) - + audio_paths, emotions = self.load_metadata(meta_path) # 将计算结果保存为对象属性 - self.audio_paths = audio_paths self.emotions = emotions - # 尝试计算语料库的名字(字段) - meta_name = os.path.basename(meta_path) - meta_name,ext=os.path.splitext(meta_name) - meta_fields = meta_name.split("_") - db = meta_fields[1] - # print(f"{meta_path=}@") - # print(f"{db=}@") - db = db if db in ava_dbs else "" - #计算情感字段 - emotions_first_letters=meta_fields[-1] - origin_efls = get_first_letters(self.e_config) - if emotions_first_letters != origin_efls: - raise ValueError( - f"{emotions_first_letters} is not inconsistant with {self.e_config}" - ) + # 尝试计算语料库的名字和情感配置名字 + db = self.fields_parse(meta_path) + if not os.path.isdir(self.features_dir): os.mkdir(self.features_dir) @@ -352,12 +337,9 @@ class AudioExtractor: self.features_dir, features_file_name, ) - - print(f"检查特征文件{features_file_path}是否存在...") - print(f"{self.e_config=}") - - # if self.e_config == HNS: - # raise ValueError(f"{self.e_config=}") + if verbose: + print(f"检查特征文件{features_file_path}是否存在...") + print(f"{self.e_config=}") ffp = os.path.isfile(features_file_path) if ffp: @@ -370,11 +352,41 @@ class AudioExtractor: if self.verbose: print("npy文件不存在,尝试创建...") # 如果尚未提取过特征,则在此处进行提取,同时保存提取结果,以便下次直接使用 - features = self.features_save(partition, audio_paths, features_file_path) + features = self.features_extract_save(partition, audio_paths, features_file_path) return features, audio_paths, emotions - def features_save(self, partition, audio_paths, features_file_path): + def fields_parse(self, meta_path): + + #计算语料库字段名 + meta_fields, db = self.db_field_parse(meta_path) + + #计算情感字段并检查 + self.validate_emotion_config_consistence(meta_fields) + + return db + + def db_field_parse(self, meta_path): + meta_name = os.path.basename(meta_path) + meta_name,ext=os.path.splitext(meta_name) + meta_fields = meta_name.split("_") + db = meta_fields[1] + # print(f"{meta_path=}@") + # print(f"{db=}@") + + db = db if db in ava_dbs else "" + return meta_fields,db + + def validate_emotion_config_consistence(self, meta_fields): + emotions_first_letters=meta_fields[-1] + origin_efls = get_first_letters(self.e_config) + #检查情感配置是否具有一致性 + if emotions_first_letters != origin_efls: + raise ValueError( + f"{emotions_first_letters} is not inconsistant with {self.e_config}" + ) + + def features_extract_save(self, partition, audio_paths, features_file_path): """将提取的特征(ndarray)保存持久化保存(为npy文件) 利用qtmd提供可视化特征抽取进度 @@ -406,7 +418,7 @@ class AudioExtractor: print(f"正在抽取第{cnt}个文件的特征..") # 调用utils模块中的extract_featrue进行特征提取 f_config = self.f_config - feature = extract_feature(audio_file, f_config=f_config) + feature = extract_feature_of_audio(audio_file, f_config=f_config) if self.feature_dimension is None: # MCM特征组合下(3特征),有180维的单轴数组,5特征下,有193维 self.feature_dimension = feature.shape[0] diff --git a/recognizer/__pycache__/basic.cpython-39.pyc b/recognizer/__pycache__/basic.cpython-39.pyc index a492cf571493819dbaaa604f06ee4c743142066c..09cbbe97ee4b93cdced63dd8bdc49d155cebac6b 100644 Binary files a/recognizer/__pycache__/basic.cpython-39.pyc and b/recognizer/__pycache__/basic.cpython-39.pyc differ diff --git a/recognizer/basic.py b/recognizer/basic.py index b38980a0e9ae0e5538b4417a8bd820c54c18ec91..4ec75ecd0f7776255aedd06cf0493e98d903e5f3 100644 --- a/recognizer/basic.py +++ b/recognizer/basic.py @@ -20,7 +20,7 @@ from audio.extractor import load_data_from_meta from config.EF import (e_config_def, f_config_def, validate_emotions) from config.MetaPath import (emodb, meta_paths_of_db, ravdess, savee,validate_partition,project_dir) import config.MetaPath as meta -from audio.core import best_estimators, extract_feature +from audio.core import best_estimators, extract_feature_of_audio ## class EmotionRecognizer: @@ -219,7 +219,7 @@ class EmotionRecognizer: given an `audio_path`, this method extracts the features and predicts the emotion """ - feature1 = extract_feature(audio_path, self.f_config) + feature1 = extract_feature_of_audio(audio_path, self.f_config) # print(feature1.shape) # print(feature1,"@{feature1}",feature1.shape) # feature2=feature1.T @@ -248,7 +248,7 @@ class EmotionRecognizer: Predicts the probability of each emotion. """ if self.classification_task: - feature = extract_feature(audio_path, self.f_config).reshape(1, -1) + feature = extract_feature_of_audio(audio_path, self.f_config).reshape(1, -1) proba = self.model.predict_proba(feature)[0] result = {} for emotion, prob in zip(self.model.classes_, proba): diff --git a/recognizer/deep.py b/recognizer/deep.py index 913a4b9f59c972671e1cd0023b52a6ac2f8f37d7..5ff7fa9947829db75e5cf67362b25a2b2cd3f541 100644 --- a/recognizer/deep.py +++ b/recognizer/deep.py @@ -22,7 +22,7 @@ from config.EF import validate_emotions from recognizer.basic import EmotionRecognizer # from ER import EmotionRecognizer from config.MetaPath import get_first_letters -from audio.core import extract_feature, get_dropout_str +from audio.core import extract_feature_of_audio, get_dropout_str class DeepEmotionRecognizer(EmotionRecognizer): @@ -337,7 +337,7 @@ class DeepEmotionRecognizer(EmotionRecognizer): print("[+] Model trained") def predict(self, audio_path): - feature = extract_feature(audio_path, **self._f_config_dict).reshape( + feature = extract_feature_of_audio(audio_path, **self._f_config_dict).reshape( (1, 1, self.input_length) ) if self.classification_task: @@ -349,7 +349,7 @@ class DeepEmotionRecognizer(EmotionRecognizer): def predict_proba(self, audio_path): if self.classification_task: - feature = extract_feature(audio_path, **self._f_config_dict).reshape( + feature = extract_feature_of_audio(audio_path, **self._f_config_dict).reshape( (1, 1, self.input_length) ) proba = self.model.predict(feature)[0][0]