Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
xuchaoxin1375
SER
提交
63c5dd05
S
SER
项目概览
xuchaoxin1375
/
SER
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
S
SER
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
提交
63c5dd05
编写于
4月 28, 2023
作者:
xuchaoxin1375
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
update the make_window function in the ccser_client main file with more clear modular extraction
上级
74e45e75
变更
9
展开全部
隐藏空白更改
内联
并排
Showing
9 changed file
with
531 addition
and
420 deletion
+531
-420
SG/ccser_gui.py
SG/ccser_gui.py
+428
-349
SG/constants/__pycache__/beauty.cpython-310.pyc
SG/constants/__pycache__/beauty.cpython-310.pyc
+0
-0
audio/__pycache__/core.cpython-39.pyc
audio/__pycache__/core.cpython-39.pyc
+0
-0
audio/__pycache__/extractor.cpython-39.pyc
audio/__pycache__/extractor.cpython-39.pyc
+0
-0
audio/core.py
audio/core.py
+53
-33
audio/extractor.py
audio/extractor.py
+44
-32
recognizer/__pycache__/basic.cpython-39.pyc
recognizer/__pycache__/basic.cpython-39.pyc
+0
-0
recognizer/basic.py
recognizer/basic.py
+3
-3
recognizer/deep.py
recognizer/deep.py
+3
-3
未找到文件。
SG/ccser_gui.py
浏览文件 @
63c5dd05
此差异已折叠。
点击以展开。
SG/constants/__pycache__/beauty.cpython-310.pyc
浏览文件 @
63c5dd05
无法预览此类型文件
audio/__pycache__/core.cpython-39.pyc
浏览文件 @
63c5dd05
无法预览此类型文件
audio/__pycache__/extractor.cpython-39.pyc
浏览文件 @
63c5dd05
无法预览此类型文件
audio/core.py
浏览文件 @
63c5dd05
##
import
os
from
pathlib
import
Path
import
librosa
import
numpy
as
np
import
soundfile
from
joblib
import
load
from
sklearn.preprocessing
import
StandardScaler
from
audio.converter
import
convert_audio
from
config.EF
import
MCM
,
ava_features
from
config.MetaPath
import
bclf
,
brgr
,
project_dir
from
config.EF
import
MCM
,
ava_features
,
f_config_def
from
config.MetaPath
import
bclf
,
brgr
,
project_dir
,
speech_dbs_dir
def
get_used_keys
(
config_dict
):
"""将传入的字典中值为True的key添加到列表中并返回
...
...
@@ -60,7 +63,7 @@ def get_dropout_str(dropout, n_layers=3):
return
"_"
.
join
([
str
(
dropout
)
for
_
in
range
(
n_layers
)])
def
extract_feature
(
audio_file_name
,
f_config
):
def
extract_feature
_of_audio
(
audio_file_name
,
f_config
):
"""
用于从音频文件中提取音频特征。该函数支持提取多种不同的特征,
包括MFCC、Chroma、MEL Spectrogram Frequency、Contrast和Tonnetz。
...
...
@@ -70,6 +73,16 @@ def extract_feature(audio_file_name, f_config):
在函数的实现中,它首先检查音频文件的格式是否正确,如果不正确,则将其转换为16000采样率和单声道通道。
然后,它使用Librosa库提取所选的特征,并将它们连接成一个numpy数组,并返回该数组。
这段代码使用了Python中的with语句和soundfile库中的SoundFile类。
它的作用是打开名为file_name的音频文件,并将其作为sound_file对象传递给代码块,
以便在代码块中对该文件进行操作。
with语句的好处是,在代码块结束时,它会自动关闭文件句柄,无需手动关闭。
使用soundfile.SoundFile()函数创建的sound_file对象是一个上下文管理器,它提供了一些方法和属性,
可以用于读取和操作音频文件。在该函数中,我们使用sound_file对象读取音频文件,获取其采样率和数据类型等信息。
在代码块的最后,with语句自动关闭了sound_file对象,释放了与该文件的所有资源。
需要注意的是,在使用soundfile库打开音频文件时,我们可以使用with语句来确保文件句柄在使用完毕后被正确关闭。
这可以避免在操作大量音频文件时出现资源泄漏和文件句柄耗尽等问题。
params:
-
Extract feature from audio file `file_name`
...
...
@@ -98,20 +111,13 @@ def extract_feature(audio_file_name, f_config):
try
:
print
(
audio_file_name
,
"@{audio_file_name}"
)
#考虑将此时的工作路径切换为项目根目录,以便利用相对路径访问文件
os
.
chdir
(
project_dir
)
# sys.exist()
# os.chdir(project_dir)
p
=
Path
(
audio_file_name
)
if
p
.
is_file
()
==
False
:
raise
FileNotFoundError
(
f
"
{
p
.
absolute
().
resolve
()
}
does not exist"
)
with
soundfile
.
SoundFile
(
audio_file_name
)
as
sound_file
:
# 成功打开
pass
# 这行代码使用了Python中的with语句和soundfile库中的SoundFile类。
# 它的作用是打开名为file_name的音频文件,并将其作为sound_file对象传递给代码块,
# 以便在代码块中对该文件进行操作。
# with语句的好处是,在代码块结束时,它会自动关闭文件句柄,无需手动关闭。
# 使用soundfile.SoundFile()函数创建的sound_file对象是一个上下文管理器,它提供了一些方法和属性,
# 可以用于读取和操作音频文件。在该函数中,我们使用sound_file对象读取音频文件,获取其采样率和数据类型等信息。
# 在代码块的最后,with语句自动关闭了sound_file对象,释放了与该文件的所有资源。
# 需要注意的是,在使用soundfile库打开音频文件时,我们可以使用with语句来确保文件句柄在使用完毕后被正确关闭。
# 这可以避免在操作大量音频文件时出现资源泄漏和文件句柄耗尽等问题。
except
RuntimeError
:
# not properly formated, convert to 16000 sample rate & mono channel using ffmpeg
# get the basename
...
...
@@ -186,21 +192,8 @@ def extract_features_handler(new_filename, f_config):
提取结果(shape=(n,))
"""
with
soundfile
.
SoundFile
(
new_filename
)
as
sound_file
:
X
=
sound_file
.
read
(
dtype
=
"float32"
)
sample_rate
=
sound_file
.
samplerate
# print(f'{sample_rate=}')
# 根据参数情况,提取需要的情感特征
# 对于chroma和constrast两种特征,计算stft的幅值矩阵(复数取模,实数化)
stft
=
[]
from
config.EF
import
chroma
,
contrast
,
mel
,
mfcc
,
tonnetz
global
extractors_debug
extractors1
=
{
mfcc
:
mfcc_extract
,
mel
:
mel_extract
,
tonnetz
:
tonnetz_extract
}
extractors2
=
{
chroma
:
chroma_extract
,
contrast
:
contrast_extract
}
X
,
sample_rate
,
extractors1
,
extractors2
,
stft
=
pre_calculate
(
f_config
,
sound_file
)
extractors_debug
=
extractors1
,
extractors2
if
chroma
in
f_config
or
contrast
in
f_config
:
stft
=
stft_prepare
(
X
)
# 建立一个空数组来存储需要提取的特征
result
=
np
.
array
([])
f_res
=
None
...
...
@@ -213,8 +206,25 @@ def extract_features_handler(new_filename, f_config):
f_res
=
extractors2
[
f
](
sample_rate
,
stft
)
# print(f_res.shape,f,"@{f_res.shape}")#type:ignore
result
=
np
.
hstack
((
result
,
f_res
))
# print(result.shape)
return
result
def
pre_calculate
(
f_config
,
sound_file
):
X
=
sound_file
.
read
(
dtype
=
"float32"
)
sample_rate
=
sound_file
.
samplerate
# print(f'{sample_rate=}')
# 根据参数情况,提取需要的情感特征
# 对于chroma和constrast两种特征,计算stft的幅值矩阵(复数取模,实数化)
from
config.EF
import
chroma
,
contrast
,
mel
,
mfcc
,
tonnetz
extractors1
=
{
mfcc
:
mfcc_extract
,
mel
:
mel_extract
,
tonnetz
:
tonnetz_extract
}
extractors2
=
{
chroma
:
chroma_extract
,
contrast
:
contrast_extract
}
stft
=
[]
if
chroma
in
f_config
or
contrast
in
f_config
:
stft
=
stft_prepare
(
X
)
return
X
,
sample_rate
,
extractors1
,
extractors2
,
stft
def
stft_prepare
(
X
):
# mfcc=True if mfcc in f_config else False
...
...
@@ -332,13 +342,23 @@ def best_estimators(classification_task=True,fast=True):
return
res
def
test1
():
from
config.EF
import
f_config_def
audio_path
=
"../data/emodb/wav/03a01Fa.wav"
features
=
extract_feature
(
audio_path
,
f_config_def
)
audio_path
=
speech_dbs_dir
/
"emodb/wav/03a01Fa.wav"
print
(
os
.
path
.
exists
(
audio_path
))
features
=
extract_feature_of_audio
(
audio_path
,
f_config_def
)
return
features
if
__name__
==
"__main__"
:
audio_config
=
MCM
pass
# res = get_audio_config(audio_config)
# print(res)
res
=
best_estimators
()
\ No newline at end of file
# res=best_estimators()
audio_path
=
speech_dbs_dir
/
"emodb/wav/03a01Fa.wav"
print
(
os
.
path
.
exists
(
audio_path
))
features
=
extract_feature_of_audio
(
audio_path
,
f_config_def
)
audio/extractor.py
浏览文件 @
63c5dd05
...
...
@@ -16,7 +16,7 @@ from config.MetaPath import (
validate_partition
,
ava_dbs
,
)
from
audio.core
import
extract_feature
from
audio.core
import
extract_feature
_of_audio
# from pathlib import Path
Series
=
pd
.
Series
...
...
@@ -294,11 +294,12 @@ class AudioExtractor:
print
([
id
(
attr
)
for
attr
in
attributes
])
return
attributes
def
_extract_feature_in_meta
(
self
,
partition
=
""
,
meta_path
=
""
):
def
_extract_feature_in_meta
(
self
,
partition
=
""
,
meta_path
=
""
,
verbose
=
1
):
"""根据meta_files提取相应语音文件的特征
这里仅完成单次提取
矩阵文件名中的e_config字段暂定为self.e_config,如果是这样,可能会和meta_path文件中的情感字段出现不一致的情况.
矩阵文件名中的e_config字段暂定为self.e_config
如果是这样,可能会和meta_path文件中的情感字段出现不一致的情况.
Parameters
----------
...
...
@@ -308,31 +309,15 @@ class AudioExtractor:
标记被提取文件是来自训练集还是测试集(验证集)
"""
# 检查数据集是否按照配置的情感进行筛选和划分:
# if(not os.path.exists(meta_path)):
# create_csv_by_metaname(meta_file=meta_path)
# self.load_metadata(meta_path)
audio_paths
,
emotions
=
self
.
load_metadata
(
meta_path
)
# 将计算结果保存为对象属性
self
.
audio_paths
=
audio_paths
self
.
emotions
=
emotions
# 尝试计算语料库的名字(字段)
meta_name
=
os
.
path
.
basename
(
meta_path
)
meta_name
,
ext
=
os
.
path
.
splitext
(
meta_name
)
meta_fields
=
meta_name
.
split
(
"_"
)
db
=
meta_fields
[
1
]
# print(f"{meta_path=}@")
# print(f"{db=}@")
db
=
db
if
db
in
ava_dbs
else
""
#计算情感字段
emotions_first_letters
=
meta_fields
[
-
1
]
origin_efls
=
get_first_letters
(
self
.
e_config
)
if
emotions_first_letters
!=
origin_efls
:
raise
ValueError
(
f
"
{
emotions_first_letters
}
is not inconsistant with
{
self
.
e_config
}
"
)
# 尝试计算语料库的名字和情感配置名字
db
=
self
.
fields_parse
(
meta_path
)
if
not
os
.
path
.
isdir
(
self
.
features_dir
):
os
.
mkdir
(
self
.
features_dir
)
...
...
@@ -352,12 +337,9 @@ class AudioExtractor:
self
.
features_dir
,
features_file_name
,
)
print
(
f
"检查特征文件
{
features_file_path
}
是否存在..."
)
print
(
f
"
{
self
.
e_config
=
}
"
)
# if self.e_config == HNS:
# raise ValueError(f"{self.e_config=}")
if
verbose
:
print
(
f
"检查特征文件
{
features_file_path
}
是否存在..."
)
print
(
f
"
{
self
.
e_config
=
}
"
)
ffp
=
os
.
path
.
isfile
(
features_file_path
)
if
ffp
:
...
...
@@ -370,11 +352,41 @@ class AudioExtractor:
if
self
.
verbose
:
print
(
"npy文件不存在,尝试创建..."
)
# 如果尚未提取过特征,则在此处进行提取,同时保存提取结果,以便下次直接使用
features
=
self
.
features_save
(
partition
,
audio_paths
,
features_file_path
)
features
=
self
.
features_
extract_
save
(
partition
,
audio_paths
,
features_file_path
)
return
features
,
audio_paths
,
emotions
def
features_save
(
self
,
partition
,
audio_paths
,
features_file_path
):
def
fields_parse
(
self
,
meta_path
):
#计算语料库字段名
meta_fields
,
db
=
self
.
db_field_parse
(
meta_path
)
#计算情感字段并检查
self
.
validate_emotion_config_consistence
(
meta_fields
)
return
db
def
db_field_parse
(
self
,
meta_path
):
meta_name
=
os
.
path
.
basename
(
meta_path
)
meta_name
,
ext
=
os
.
path
.
splitext
(
meta_name
)
meta_fields
=
meta_name
.
split
(
"_"
)
db
=
meta_fields
[
1
]
# print(f"{meta_path=}@")
# print(f"{db=}@")
db
=
db
if
db
in
ava_dbs
else
""
return
meta_fields
,
db
def
validate_emotion_config_consistence
(
self
,
meta_fields
):
emotions_first_letters
=
meta_fields
[
-
1
]
origin_efls
=
get_first_letters
(
self
.
e_config
)
#检查情感配置是否具有一致性
if
emotions_first_letters
!=
origin_efls
:
raise
ValueError
(
f
"
{
emotions_first_letters
}
is not inconsistant with
{
self
.
e_config
}
"
)
def
features_extract_save
(
self
,
partition
,
audio_paths
,
features_file_path
):
"""将提取的特征(ndarray)保存持久化保存(为npy文件)
利用qtmd提供可视化特征抽取进度
...
...
@@ -406,7 +418,7 @@ class AudioExtractor:
print
(
f
"正在抽取第
{
cnt
}
个文件的特征.."
)
# 调用utils模块中的extract_featrue进行特征提取
f_config
=
self
.
f_config
feature
=
extract_feature
(
audio_file
,
f_config
=
f_config
)
feature
=
extract_feature
_of_audio
(
audio_file
,
f_config
=
f_config
)
if
self
.
feature_dimension
is
None
:
# MCM特征组合下(3特征),有180维的单轴数组,5特征下,有193维
self
.
feature_dimension
=
feature
.
shape
[
0
]
...
...
recognizer/__pycache__/basic.cpython-39.pyc
浏览文件 @
63c5dd05
无法预览此类型文件
recognizer/basic.py
浏览文件 @
63c5dd05
...
...
@@ -20,7 +20,7 @@ from audio.extractor import load_data_from_meta
from
config.EF
import
(
e_config_def
,
f_config_def
,
validate_emotions
)
from
config.MetaPath
import
(
emodb
,
meta_paths_of_db
,
ravdess
,
savee
,
validate_partition
,
project_dir
)
import
config.MetaPath
as
meta
from
audio.core
import
best_estimators
,
extract_feature
from
audio.core
import
best_estimators
,
extract_feature
_of_audio
##
class
EmotionRecognizer
:
...
...
@@ -219,7 +219,7 @@ class EmotionRecognizer:
given an `audio_path`, this method extracts the features
and predicts the emotion
"""
feature1
=
extract_feature
(
audio_path
,
self
.
f_config
)
feature1
=
extract_feature
_of_audio
(
audio_path
,
self
.
f_config
)
# print(feature1.shape)
# print(feature1,"@{feature1}",feature1.shape)
# feature2=feature1.T
...
...
@@ -248,7 +248,7 @@ class EmotionRecognizer:
Predicts the probability of each emotion.
"""
if
self
.
classification_task
:
feature
=
extract_feature
(
audio_path
,
self
.
f_config
).
reshape
(
1
,
-
1
)
feature
=
extract_feature
_of_audio
(
audio_path
,
self
.
f_config
).
reshape
(
1
,
-
1
)
proba
=
self
.
model
.
predict_proba
(
feature
)[
0
]
result
=
{}
for
emotion
,
prob
in
zip
(
self
.
model
.
classes_
,
proba
):
...
...
recognizer/deep.py
浏览文件 @
63c5dd05
...
...
@@ -22,7 +22,7 @@ from config.EF import validate_emotions
from
recognizer.basic
import
EmotionRecognizer
# from ER import EmotionRecognizer
from
config.MetaPath
import
get_first_letters
from
audio.core
import
extract_feature
,
get_dropout_str
from
audio.core
import
extract_feature
_of_audio
,
get_dropout_str
class
DeepEmotionRecognizer
(
EmotionRecognizer
):
...
...
@@ -337,7 +337,7 @@ class DeepEmotionRecognizer(EmotionRecognizer):
print
(
"[+] Model trained"
)
def
predict
(
self
,
audio_path
):
feature
=
extract_feature
(
audio_path
,
**
self
.
_f_config_dict
).
reshape
(
feature
=
extract_feature
_of_audio
(
audio_path
,
**
self
.
_f_config_dict
).
reshape
(
(
1
,
1
,
self
.
input_length
)
)
if
self
.
classification_task
:
...
...
@@ -349,7 +349,7 @@ class DeepEmotionRecognizer(EmotionRecognizer):
def
predict_proba
(
self
,
audio_path
):
if
self
.
classification_task
:
feature
=
extract_feature
(
audio_path
,
**
self
.
_f_config_dict
).
reshape
(
feature
=
extract_feature
_of_audio
(
audio_path
,
**
self
.
_f_config_dict
).
reshape
(
(
1
,
1
,
self
.
input_length
)
)
proba
=
self
.
model
.
predict
(
feature
)[
0
][
0
]
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录