Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
22a5344b
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 1 年 前同步成功
通知
207
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
22a5344b
编写于
8月 12, 2022
作者:
Y
YangZhou
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix save && effect test
上级
d2641184
变更
13
隐藏空白更改
内联
并排
Showing
13 changed file
with
670 addition
and
30 deletion
+670
-30
paddlespeech/audio/backends/sox_io_backend.py
paddlespeech/audio/backends/sox_io_backend.py
+2
-2
paddlespeech/audio/sox_effects/sox_effects.py
paddlespeech/audio/sox_effects/sox_effects.py
+17
-12
paddlespeech/audio/src/pybind/pybind.cpp
paddlespeech/audio/src/pybind/pybind.cpp
+3
-3
paddlespeech/audio/src/pybind/sox/effects_chain.cpp
paddlespeech/audio/src/pybind/sox/effects_chain.cpp
+0
-1
paddlespeech/audio/utils/sox_utils.py
paddlespeech/audio/utils/sox_utils.py
+1
-1
tests/unit/assets/sox_effect_test_args.jsonl
tests/unit/assets/sox_effect_test_args.jsonl
+78
-0
tests/unit/audio/backends/sox_io/save_test.py
tests/unit/audio/backends/sox_io/save_test.py
+0
-2
tests/unit/audio/backends/sox_io/smoke_test.py
tests/unit/audio/backends/sox_io/smoke_test.py
+183
-0
tests/unit/audio/backends/sox_io/sox_effect_test.py
tests/unit/audio/backends/sox_io/sox_effect_test.py
+346
-0
tests/unit/common_utils/__init__.py
tests/unit/common_utils/__init__.py
+7
-2
tests/unit/common_utils/case_utils.py
tests/unit/common_utils/case_utils.py
+3
-0
tests/unit/common_utils/parameterized_utils.py
tests/unit/common_utils/parameterized_utils.py
+20
-7
tests/unit/common_utils/wav_utils.py
tests/unit/common_utils/wav_utils.py
+10
-0
未找到文件。
paddlespeech/audio/backends/sox_io_backend.py
浏览文件 @
22a5344b
...
...
@@ -88,9 +88,9 @@ def save(filepath: str,
)
@
_mod_utils
.
requires_sox
()
def
info
(
filepath
:
str
,
format
:
Optional
[
str
])
->
None
:
def
info
(
filepath
:
str
,
format
:
Optional
[
str
]
=
""
)
->
None
:
if
hasattr
(
filepath
,
"read"
):
sinfo
=
paddleaudio
.
get_info_fileo
jb
(
filepath
,
format
)
sinfo
=
paddleaudio
.
get_info_fileo
bj
(
filepath
,
format
)
if
sinfo
is
not
None
:
return
AudioMetaData
(
*
sinfo
)
return
_fallback_info_fileobj
(
filepath
,
format
)
...
...
paddlespeech/audio/sox_effects/sox_effects.py
浏览文件 @
22a5344b
import
os
from
typing
import
List
,
Optional
,
Tuple
import
paddle
import
numpy
from
paddlespeech.audio._internal
import
module_utils
as
_mod_utils
from
paddlespeech.audio.utils.sox_utils
import
list_effects
...
...
@@ -52,11 +54,11 @@ def effect_names() -> List[str]:
@
_mod_utils
.
requires_sox
()
def
apply_effects_tensor
(
tensor
:
torch
.
Tensor
,
tensor
:
paddle
.
Tensor
,
sample_rate
:
int
,
effects
:
List
[
List
[
str
]],
channels_first
:
bool
=
True
,
)
->
Tuple
[
torch
.
Tensor
,
int
]:
)
->
Tuple
[
paddle
.
Tensor
,
int
]:
"""Apply sox effects to given Tensor
.. devices:: CPU
...
...
@@ -152,7 +154,11 @@ def apply_effects_tensor(
>>> waveform, sample_rate = transform(waveform, input_sample_rate)
>>> assert sample_rate == 8000
"""
return
paddleaudio
.
sox_effects_apply_effects_tensor
(
tensor
,
sample_rate
,
effects
,
channels_first
)
tensor_np
=
tensor
.
numpy
()
ret
=
paddleaudio
.
sox_effects_apply_effects_tensor
(
tensor_np
,
sample_rate
,
effects
,
channels_first
)
if
ret
is
not
None
:
return
(
paddle
.
to_tensor
(
ret
[
0
]),
ret
[
1
])
raise
RuntimeError
(
"Failed to apply sox effect"
)
@
_mod_utils
.
requires_sox
()
...
...
@@ -162,7 +168,7 @@ def apply_effects_file(
normalize
:
bool
=
True
,
channels_first
:
bool
=
True
,
format
:
Optional
[
str
]
=
None
,
)
->
Tuple
[
torch
.
Tensor
,
int
]:
)
->
Tuple
[
paddle
.
Tensor
,
int
]:
"""Apply sox effects to the audio file and load the resulting data as Tensor
.. devices:: CPU
...
...
@@ -270,14 +276,13 @@ def apply_effects_file(
>>> for batch in loader:
>>> pass
"""
if
not
torch
.
jit
.
is_scripting
():
if
hasattr
(
path
,
"read"
):
ret
=
paddleaudio
.
_paddleaudio
.
apply_effects_fileobj
(
path
,
effects
,
normalize
,
channels_first
,
format
)
if
ret
is
None
:
raise
RuntimeError
(
"Failed to load audio from {}"
.
format
(
path
))
return
ret
path
=
os
.
fspath
(
path
)
if
hasattr
(
path
,
"read"
):
ret
=
paddleaudio
.
apply_effects_fileobj
(
path
,
effects
,
normalize
,
channels_first
,
format
)
if
ret
is
None
:
raise
RuntimeError
(
"Failed to load audio from {}"
.
format
(
path
))
return
(
paddle
.
to_tensor
(
ret
[
0
]),
ret
[
1
])
path
=
os
.
fspath
(
path
)
ret
=
paddleaudio
.
sox_effects_apply_effects_file
(
path
,
effects
,
normalize
,
channels_first
,
format
)
if
ret
is
not
None
:
return
ret
return
(
paddle
.
to_tensor
(
ret
[
0
]),
ret
[
1
])
raise
RuntimeError
(
"Failed to load audio from {}"
.
format
(
path
))
\ No newline at end of file
paddlespeech/audio/src/pybind/pybind.cpp
浏览文件 @
22a5344b
...
...
@@ -65,9 +65,9 @@ PYBIND11_MODULE(_paddleaudio, m) {
&
paddleaudio
::
sox_utils
::
get_buffer_size
);
// effect
//
m.def("apply_effects_fileobj",
//
&paddleaudio::sox_effects::apply_effects_fileobj,
//
"Decode audio data from file-like obj and apply effects.");
m
.
def
(
"apply_effects_fileobj"
,
&
paddleaudio
::
sox_effects
::
apply_effects_fileobj
,
"Decode audio data from file-like obj and apply effects."
);
m
.
def
(
"sox_effects_initialize_sox_effects"
,
&
paddleaudio
::
sox_effects
::
initialize_sox_effects
);
m
.
def
(
...
...
paddlespeech/audio/src/pybind/sox/effects_chain.cpp
浏览文件 @
22a5344b
...
...
@@ -59,7 +59,6 @@ int tensor_input_drain(sox_effect_t* effp, sox_sample_t* obuf, size_t* osamp) {
switch
(
tensor
.
dtype
().
num
())
{
//case c10::ScalarType::Float: {
case
11
:
{
break
;
// Need to convert to 64-bit precision so that
// values around INT32_MIN/MAX are handled correctly.
for
(
int
idx
=
0
;
idx
<
chunk
.
size
();
++
idx
)
{
...
...
paddlespeech/audio/utils/sox_utils.py
浏览文件 @
22a5344b
...
...
@@ -31,7 +31,7 @@ def set_verbosity(verbosity: int):
See Also:
http://sox.sourceforge.net/sox.html
"""
_paddleaudio
.
sox_utils_set_verbosity
(
verbosity
)
_paddleaudio
.
sox_utils_set_verbosity
(
verbosity
)
@
_mod_utils
.
requires_sox
()
...
...
tests/unit/assets/sox_effect_test_args.jsonl
0 → 100644
浏览文件 @
22a5344b
{"effects": [["allpass", "300", "10"]]}
{"effects": [["band", "300", "10"]]}
{"effects": [["bandpass", "300", "10"]]}
{"effects": [["bandreject", "300", "10"]]}
{"effects": [["bass", "-10"]]}
{"effects": [["biquad", "0.4", "0.2", "0.9", "0.7", "0.2", "0.6"]]}
{"effects": [["chorus", "0.7", "0.9", "55", "0.4", "0.25", "2", "-t"]]}
{"effects": [["chorus", "0.6", "0.9", "50", "0.4", "0.25", "2", "-t", "60", "0.32", "0.4", "1.3", "-s"]]}
{"effects": [["chorus", "0.5", "0.9", "50", "0.4", "0.25", "2", "-t", "60", "0.32", "0.4", "2.3", "-t", "40", "0.3", "0.3", "1.3", "-s"]]}
{"effects": [["channels", "1"]]}
{"effects": [["channels", "2"]]}
{"effects": [["channels", "3"]]}
{"effects": [["compand", "0.3,1", "6:-70,-60,-20", "-5", "-90", "0.2"]]}
{"effects": [["compand", ".1,.2", "-inf,-50.1,-inf,-50,-50", "0", "-90", ".1"]]}
{"effects": [["compand", ".1,.1", "-45.1,-45,-inf,0,-inf", "45", "-90", ".1"]]}
{"effects": [["contrast", "0"]]}
{"effects": [["contrast", "25"]]}
{"effects": [["contrast", "50"]]}
{"effects": [["contrast", "75"]]}
{"effects": [["contrast", "100"]]}
{"effects": [["dcshift", "1.0"]]}
{"effects": [["dcshift", "-1.0"]]}
{"effects": [["deemph"]], "input_sample_rate": 44100}
{"effects": [["dither", "-s"]]}
{"effects": [["dither", "-S"]]}
{"effects": [["divide"]]}
{"effects": [["downsample", "2"]], "input_sample_rate": 8000, "output_sample_rate": 4000}
{"effects": [["earwax"]], "input_sample_rate": 44100}
{"effects": [["echo", "0.8", "0.88", "60", "0.4"]]}
{"effects": [["echo", "0.8", "0.88", "6", "0.4"]]}
{"effects": [["echo", "0.8", "0.9", "1000", "0.3"]]}
{"effects": [["echo", "0.8", "0.9", "1000", "0.3", "1800", "0.25"]]}
{"effects": [["echos", "0.8", "0.7", "700", "0.25", "700", "0.3"]]}
{"effects": [["echos", "0.8", "0.7", "700", "0.25", "900", "0.3"]]}
{"effects": [["echos", "0.8", "0.7", "40", "0.25", "63", "0.3"]]}
{"effects": [["equalizer", "300", "10", "5"]]}
{"effects": [["fade", "q", "3"]]}
{"effects": [["fade", "h", "3"]]}
{"effects": [["fade", "t", "3"]]}
{"effects": [["fade", "l", "3"]]}
{"effects": [["fade", "p", "3"]]}
{"effects": [["fir", "0.0195", "-0.082", "0.234", "0.891", "-0.145", "0.043"]]}
{"effects": [["fir", "<ASSET_DIR>/sox_effect_test_fir_coeffs.txt"]]}
{"effects": [["flanger"]]}
{"effects": [["gain", "-l", "-6"]]}
{"effects": [["highpass", "-1", "300"]]}
{"effects": [["highpass", "-2", "300"]]}
{"effects": [["hilbert"]]}
{"effects": [["loudness"]]}
{"effects": [["lowpass", "-1", "300"]]}
{"effects": [["lowpass", "-2", "300"]]}
{"effects": [["mcompand", "0.005,0.1 -47,-40,-34,-34,-17,-33", "100", "0.003,0.05 -47,-40,-34,-34,-17,-33", "400", "0.000625,0.0125 -47,-40,-34,-34,-15,-33", "1600", "0.0001,0.025 -47,-40,-34,-34,-31,-31,-0,-30", "6400", "0,0.025 -38,-31,-28,-28,-0,-25"]], "input_sample_rate": 44100}
{"effects": [["oops"]]}
{"effects": [["overdrive"]]}
{"effects": [["pad"]]}
{"effects": [["phaser"]]}
{"effects": [["remix", "6", "7", "8", "0"]], "num_channels": 8}
{"effects": [["remix", "1-3,7", "3"]], "num_channels": 8}
{"effects": [["repeat"]]}
{"effects": [["reverb"]]}
{"effects": [["reverse"]]}
{"effects": [["riaa"]], "input_sample_rate": 44100}
{"effects": [["silence", "0"]]}
{"effects": [["speed", "1.3"]], "input_sample_rate": 4000, "output_sample_rate": 5200}
{"effects": [["speed", "0.7"]], "input_sample_rate": 4000, "output_sample_rate": 2800}
{"effects": [["stat"]]}
{"effects": [["stats"]]}
{"effects": [["stretch"]]}
{"effects": [["swap"]]}
{"effects": [["synth"]]}
{"effects": [["tempo", "0.9"]]}
{"effects": [["tempo", "1.1"]]}
{"effects": [["treble", "3"]]}
{"effects": [["tremolo", "300", "40"]]}
{"effects": [["tremolo", "300", "50"]]}
{"effects": [["trim", "0", "0.1"]]}
{"effects": [["upsample", "2"]], "input_sample_rate": 8000, "output_sample_rate": 16000}
{"effects": [["vol", "3"]]}
tests/unit/audio/backends/sox_io/save_test.py
浏览文件 @
22a5344b
...
...
@@ -164,8 +164,6 @@ class TestSave(TestSaveBase, unittest.TestCase):
[
(
"float32"
,),
(
"int32"
,),
(
"int16"
,),
(
"uint8"
,),
],
)
def
test_save_wav_dtype
(
self
,
test_mode
,
params
):
...
...
tests/unit/audio/backends/sox_io/smoke_test.py
0 → 100644
浏览文件 @
22a5344b
import
io
import
itertools
import
unittest
from
parameterized
import
parameterized
from
paddlespeech.audio.backends
import
sox_io_backend
from
tests.unit.common_utils
import
(
get_wav_data
,
TempDirMixin
,
name_func
)
class
SmokeTest
(
TempDirMixin
,
unittest
.
TestCase
):
"""Run smoke test on various audio format
The purpose of this test suite is to verify that sox_io_backend functionalities do not exhibit
abnormal behaviors.
This test suite should be able to run without any additional tools (such as sox command),
however without such tools, the correctness of each function cannot be verified.
"""
def
run_smoke_test
(
self
,
ext
,
sample_rate
,
num_channels
,
*
,
compression
=
None
,
dtype
=
"float32"
):
duration
=
1
num_frames
=
sample_rate
*
duration
#path = self.get_temp_path(f"test.{ext}")
path
=
self
.
get_temp_path
(
f
"test.
{
ext
}
"
)
original
=
get_wav_data
(
dtype
,
num_channels
,
normalize
=
False
,
num_frames
=
num_frames
)
# 1. run save
sox_io_backend
.
save
(
path
,
original
,
sample_rate
,
compression
=
compression
)
# 2. run info
info
=
sox_io_backend
.
info
(
path
)
assert
info
.
sample_rate
==
sample_rate
assert
info
.
num_channels
==
num_channels
# 3. run load
loaded
,
sr
=
sox_io_backend
.
load
(
path
,
normalize
=
False
)
assert
sr
==
sample_rate
assert
loaded
.
shape
[
0
]
==
num_channels
@
parameterized
.
expand
(
list
(
itertools
.
product
(
[
"float32"
,
"int32"
],
#["float32", "int32", "int16", "uint8"],
[
8000
,
16000
],
[
1
,
2
],
)
),
name_func
=
name_func
,
)
def
test_wav
(
self
,
dtype
,
sample_rate
,
num_channels
):
"""Run smoke test on wav format"""
self
.
run_smoke_test
(
"wav"
,
sample_rate
,
num_channels
,
dtype
=
dtype
)
#@parameterized.expand(
#list(
#itertools.product(
#[8000, 16000],
#[1, 2],
#[-4.2, -0.2, 0, 0.2, 96, 128, 160, 192, 224, 256, 320],
#)
#)
#)
#def test_mp3(self, sample_rate, num_channels, bit_rate):
#"""Run smoke test on mp3 format"""
#self.run_smoke_test("mp3", sample_rate, num_channels, compression=bit_rate)
#@parameterized.expand(
#list(
#itertools.product(
#[8000, 16000],
#[1, 2],
#[-1, 0, 1, 2, 3, 3.6, 5, 10],
#)
#)
#)
#def test_vorbis(self, sample_rate, num_channels, quality_level):
#"""Run smoke test on vorbis format"""
#self.run_smoke_test("vorbis", sample_rate, num_channels, compression=quality_level)
@
parameterized
.
expand
(
list
(
itertools
.
product
(
[
8000
,
16000
],
[
1
,
2
],
list
(
range
(
9
)),
)
),
name_func
=
name_func
,
)
def
test_flac
(
self
,
sample_rate
,
num_channels
,
compression_level
):
"""Run smoke test on flac format"""
self
.
run_smoke_test
(
"flac"
,
sample_rate
,
num_channels
,
compression
=
compression_level
)
class
SmokeTestFileObj
(
unittest
.
TestCase
):
"""Run smoke test on various audio format
The purpose of this test suite is to verify that sox_io_backend functionalities do not exhibit
abnormal behaviors.
This test suite should be able to run without any additional tools (such as sox command),
however without such tools, the correctness of each function cannot be verified.
"""
def
run_smoke_test
(
self
,
ext
,
sample_rate
,
num_channels
,
*
,
compression
=
None
,
dtype
=
"float32"
):
duration
=
1
num_frames
=
sample_rate
*
duration
original
=
get_wav_data
(
dtype
,
num_channels
,
normalize
=
False
,
num_frames
=
num_frames
)
fileobj
=
io
.
BytesIO
()
# 1. run save
sox_io_backend
.
save
(
fileobj
,
original
,
sample_rate
,
compression
=
compression
,
format
=
ext
)
# 2. run info
fileobj
.
seek
(
0
)
info
=
sox_io_backend
.
info
(
fileobj
,
format
=
ext
)
assert
info
.
sample_rate
==
sample_rate
assert
info
.
num_channels
==
num_channels
# 3. run load
fileobj
.
seek
(
0
)
loaded
,
sr
=
sox_io_backend
.
load
(
fileobj
,
normalize
=
False
,
format
=
ext
)
assert
sr
==
sample_rate
assert
loaded
.
shape
[
0
]
==
num_channels
@
parameterized
.
expand
(
list
(
itertools
.
product
(
[
"float32"
,
"int32"
],
[
8000
,
16000
],
[
1
,
2
],
)
),
name_func
=
name_func
,
)
def
test_wav
(
self
,
dtype
,
sample_rate
,
num_channels
):
"""Run smoke test on wav format"""
self
.
run_smoke_test
(
"wav"
,
sample_rate
,
num_channels
,
dtype
=
dtype
)
# not support yet
#@parameterized.expand(
#list(
#itertools.product(
#[8000, 16000],
#[1, 2],
#[-4.2, -0.2, 0, 0.2, 96, 128, 160, 192, 224, 256, 320],
#)
#)
#)
#def test_mp3(self, sample_rate, num_channels, bit_rate):
#"""Run smoke test on mp3 format"""
#self.run_smoke_test("mp3", sample_rate, num_channels, compression=bit_rate)
#@parameterized.expand(
#list(
#itertools.product(
#[8000, 16000],
#[1, 2],
#[-1, 0, 1, 2, 3, 3.6, 5, 10],
#)
#)
#)
#def test_vorbis(self, sample_rate, num_channels, quality_level):
#"""Run smoke test on vorbis format"""
#self.run_smoke_test("vorbis", sample_rate, num_channels, compression=quality_level)
@
parameterized
.
expand
(
list
(
itertools
.
product
(
[
8000
,
16000
],
[
1
,
2
],
list
(
range
(
9
)),
)
),
name_func
=
name_func
,
)
def
test_flac
(
self
,
sample_rate
,
num_channels
,
compression_level
):
#"""Run smoke test on flac format"""
self
.
run_smoke_test
(
"flac"
,
sample_rate
,
num_channels
,
compression
=
compression_level
)
if
__name__
==
'__main__'
:
#test_func()
unittest
.
main
()
tests/unit/audio/backends/sox_io/sox_effect_test.py
0 → 100644
浏览文件 @
22a5344b
import
io
import
itertools
import
tarfile
import
unittest
from
pathlib
import
Path
import
numpy
as
np
from
parameterized
import
parameterized
from
paddlespeech.audio
import
sox_effects
from
paddlespeech.audio._internal
import
module_utils
as
_mod_utils
from
tests.unit.common_utils
import
(
get_sinusoid
,
get_wav_data
,
load_wav
,
save_wav
,
sox_utils
,
TempDirMixin
,
name_func
,
load_effects_params
)
if
_mod_utils
.
is_module_available
(
"requests"
):
import
requests
class
TestSoxEffects
(
unittest
.
TestCase
):
def
test_init
(
self
):
"""Calling init_sox_effects multiple times does not crush"""
for
_
in
range
(
3
):
sox_effects
.
init_sox_effects
()
class
TestSoxEffectsTensor
(
TempDirMixin
,
unittest
.
TestCase
):
"""Test suite for `apply_effects_tensor` function"""
@
parameterized
.
expand
(
list
(
itertools
.
product
([
"float32"
,
"int32"
],
[
8000
,
16000
],
[
1
,
2
,
4
,
8
],
[
True
,
False
])),
)
def
test_apply_no_effect
(
self
,
dtype
,
sample_rate
,
num_channels
,
channels_first
):
"""`apply_effects_tensor` without effects should return identical data as input"""
original
=
get_wav_data
(
dtype
,
num_channels
,
channels_first
=
channels_first
)
expected
=
original
.
clone
()
found
,
output_sample_rate
=
sox_effects
.
apply_effects_tensor
(
expected
,
sample_rate
,
[],
channels_first
)
assert
(
output_sample_rate
==
sample_rate
)
# SoxEffect should not alter the input Tensor object
#self.assertEqual(original, expected)
np
.
testing
.
assert_array_almost_equal
(
original
.
numpy
(),
expected
.
numpy
())
# SoxEffect should not return the same Tensor object
assert
expected
is
not
found
# Returned Tensor should equal to the input Tensor
#self.assertEqual(expected, found)
np
.
testing
.
assert_array_almost_equal
(
expected
.
numpy
(),
found
.
numpy
())
@
parameterized
.
expand
(
load_effects_params
(
"sox_effect_test_args.jsonl"
),
name_func
=
lambda
f
,
i
,
p
:
f
'
{
f
.
__name__
}
_
{
i
}
_
{
p
.
args
[
0
][
"effects"
][
0
][
0
]
}
'
,
)
def
test_apply_effects
(
self
,
args
):
"""`apply_effects_tensor` should return identical data as sox command"""
effects
=
args
[
"effects"
]
num_channels
=
args
.
get
(
"num_channels"
,
2
)
input_sr
=
args
.
get
(
"input_sample_rate"
,
8000
)
output_sr
=
args
.
get
(
"output_sample_rate"
)
input_path
=
self
.
get_temp_path
(
"input.wav"
)
reference_path
=
self
.
get_temp_path
(
"reference.wav"
)
original
=
get_sinusoid
(
frequency
=
800
,
sample_rate
=
input_sr
,
n_channels
=
num_channels
,
dtype
=
"float32"
)
save_wav
(
input_path
,
original
,
input_sr
)
sox_utils
.
run_sox_effect
(
input_path
,
reference_path
,
effects
,
output_sample_rate
=
output_sr
)
expected
,
expected_sr
=
load_wav
(
reference_path
)
found
,
sr
=
sox_effects
.
apply_effects_tensor
(
original
,
input_sr
,
effects
)
assert
sr
==
expected_sr
#self.assertEqual(expected, found)
np
.
testing
.
assert_array_almost_equal
(
expected
.
numpy
(),
found
.
numpy
())
class
TestSoxEffectsFile
(
TempDirMixin
,
unittest
.
TestCase
):
"""Test suite for `apply_effects_file` function"""
@
parameterized
.
expand
(
list
(
itertools
.
product
(
[
"float32"
,
"int32"
],
[
8000
,
16000
],
[
1
,
2
,
4
,
8
],
[
False
,
True
],
)
),
#name_func=name_func,
)
def
test_apply_no_effect
(
self
,
dtype
,
sample_rate
,
num_channels
,
channels_first
):
"""`apply_effects_file` without effects should return identical data as input"""
path
=
self
.
get_temp_path
(
"input.wav"
)
expected
=
get_wav_data
(
dtype
,
num_channels
,
channels_first
=
channels_first
)
save_wav
(
path
,
expected
,
sample_rate
,
channels_first
=
channels_first
)
found
,
output_sample_rate
=
sox_effects
.
apply_effects_file
(
path
,
[],
normalize
=
False
,
channels_first
=
channels_first
)
assert
output_sample_rate
==
sample_rate
#self.assertEqual(expected, found)
np
.
testing
.
assert_array_almost_equal
(
expected
.
numpy
(),
found
.
numpy
())
@
parameterized
.
expand
(
load_effects_params
(
"sox_effect_test_args.jsonl"
),
#name_func=lambda f, i, p: f'{f.__name__}_{i}_{p.args[0]["effects"][0][0]}',
)
def
test_apply_effects_str
(
self
,
args
):
"""`apply_effects_file` should return identical data as sox command"""
dtype
=
"int32"
channels_first
=
True
effects
=
args
[
"effects"
]
num_channels
=
args
.
get
(
"num_channels"
,
2
)
input_sr
=
args
.
get
(
"input_sample_rate"
,
8000
)
output_sr
=
args
.
get
(
"output_sample_rate"
)
input_path
=
self
.
get_temp_path
(
"input.wav"
)
reference_path
=
self
.
get_temp_path
(
"reference.wav"
)
data
=
get_wav_data
(
dtype
,
num_channels
,
channels_first
=
channels_first
)
save_wav
(
input_path
,
data
,
input_sr
,
channels_first
=
channels_first
)
sox_utils
.
run_sox_effect
(
input_path
,
reference_path
,
effects
,
output_sample_rate
=
output_sr
)
expected
,
expected_sr
=
load_wav
(
reference_path
)
found
,
sr
=
sox_effects
.
apply_effects_file
(
input_path
,
effects
,
normalize
=
False
,
channels_first
=
channels_first
)
assert
sr
==
expected_sr
#self.assertEqual(found, expected)
np
.
testing
.
assert_array_almost_equal
(
expected
.
numpy
(),
found
.
numpy
())
def
test_apply_effects_path
(
self
):
"""`apply_effects_file` should return identical data as sox command when file path is given as a Path Object"""
dtype
=
"int32"
channels_first
=
True
effects
=
[[
"hilbert"
]]
num_channels
=
2
input_sr
=
8000
output_sr
=
8000
input_path
=
self
.
get_temp_path
(
"input.wav"
)
reference_path
=
self
.
get_temp_path
(
"reference.wav"
)
data
=
get_wav_data
(
dtype
,
num_channels
,
channels_first
=
channels_first
)
save_wav
(
input_path
,
data
,
input_sr
,
channels_first
=
channels_first
)
sox_utils
.
run_sox_effect
(
input_path
,
reference_path
,
effects
,
output_sample_rate
=
output_sr
)
expected
,
expected_sr
=
load_wav
(
reference_path
)
found
,
sr
=
sox_effects
.
apply_effects_file
(
Path
(
input_path
),
effects
,
normalize
=
False
,
channels_first
=
channels_first
)
assert
sr
==
expected_sr
#self.assertEqual(found, expected)
np
.
testing
.
assert_array_almost_equal
(
expected
.
numpy
(),
found
.
numpy
())
class
TestFileFormats
(
TempDirMixin
,
unittest
.
TestCase
):
"""`apply_effects_file` gives the same result as sox on various file formats"""
@
parameterized
.
expand
(
list
(
itertools
.
product
(
[
"float32"
,
"int32"
],
[
8000
,
16000
],
[
1
,
2
],
)
),
#name_func=lambda f, _, p: f'{f.__name__}_{"_".join(str(arg) for arg in p.args)}',
)
def
test_wav
(
self
,
dtype
,
sample_rate
,
num_channels
):
"""`apply_effects_file` works on various wav format"""
channels_first
=
True
effects
=
[[
"band"
,
"300"
,
"10"
]]
input_path
=
self
.
get_temp_path
(
"input.wav"
)
reference_path
=
self
.
get_temp_path
(
"reference.wav"
)
data
=
get_wav_data
(
dtype
,
num_channels
,
channels_first
=
channels_first
)
save_wav
(
input_path
,
data
,
sample_rate
,
channels_first
=
channels_first
)
sox_utils
.
run_sox_effect
(
input_path
,
reference_path
,
effects
)
expected
,
expected_sr
=
load_wav
(
reference_path
)
found
,
sr
=
sox_effects
.
apply_effects_file
(
input_path
,
effects
,
normalize
=
False
,
channels_first
=
channels_first
)
assert
sr
==
expected_sr
#self.assertEqual(found, expected)
np
.
testing
.
assert_array_almost_equal
(
found
.
numpy
(),
expected
.
numpy
())
#not support now
#@parameterized.expand(
#list(
#itertools.product(
#[8000, 16000],
#[1, 2],
#)
#),
##name_func=lambda f, _, p: f'{f.__name__}_{"_".join(str(arg) for arg in p.args)}',
#)
#def test_flac(self, sample_rate, num_channels):
#"""`apply_effects_file` works on various flac format"""
#channels_first = True
#effects = [["band", "300", "10"]]
#input_path = self.get_temp_path("input.flac")
#reference_path = self.get_temp_path("reference.wav")
#sox_utils.gen_audio_file(input_path, sample_rate, num_channels)
#sox_utils.run_sox_effect(input_path, reference_path, effects, output_bitdepth=32)
#expected, expected_sr = load_wav(reference_path)
#found, sr = sox_effects.apply_effects_file(input_path, effects, channels_first=channels_first)
#save_wav(self.get_temp_path("result.wav"), found, sr, channels_first=channels_first)
#assert sr == expected_sr
##self.assertEqual(found, expected)
#np.testing.assert_array_almost_equal(found.numpy(), expected.numpy())
#@parameterized.expand(
#list(
#itertools.product(
#[8000, 16000],
#[1, 2],
#)
#),
##name_func=lambda f, _, p: f'{f.__name__}_{"_".join(str(arg) for arg in p.args)}',
#)
#def test_vorbis(self, sample_rate, num_channels):
#"""`apply_effects_file` works on various vorbis format"""
#channels_first = True
#effects = [["band", "300", "10"]]
#input_path = self.get_temp_path("input.vorbis")
#reference_path = self.get_temp_path("reference.wav")
#sox_utils.gen_audio_file(input_path, sample_rate, num_channels)
#sox_utils.run_sox_effect(input_path, reference_path, effects, output_bitdepth=32)
#expected, expected_sr = load_wav(reference_path)
#found, sr = sox_effects.apply_effects_file(input_path, effects, channels_first=channels_first)
#save_wav(self.get_temp_path("result.wav"), found, sr, channels_first=channels_first)
#assert sr == expected_sr
##self.assertEqual(found, expected)
#np.testing.assert_array_almost_equal(found.numpy(), expected.numpy())
#@skipIfNoExec("sox")
#@skipIfNoSox
class
TestFileObject
(
TempDirMixin
,
unittest
.
TestCase
):
@
parameterized
.
expand
(
[
(
"wav"
,
None
),
]
)
def
test_fileobj
(
self
,
ext
,
compression
):
"""Applying effects via file object works"""
sample_rate
=
16000
channels_first
=
True
effects
=
[[
"band"
,
"300"
,
"10"
]]
input_path
=
self
.
get_temp_path
(
f
"input.
{
ext
}
"
)
reference_path
=
self
.
get_temp_path
(
"reference.wav"
)
#sox_utils.gen_audio_file(input_path, sample_rate, num_channels=2, compression=compression)
data
=
get_wav_data
(
"int32"
,
2
,
channels_first
=
channels_first
)
save_wav
(
input_path
,
data
,
sample_rate
,
channels_first
=
channels_first
)
sox_utils
.
run_sox_effect
(
input_path
,
reference_path
,
effects
,
output_bitdepth
=
32
)
expected
,
expected_sr
=
load_wav
(
reference_path
)
with
open
(
input_path
,
"rb"
)
as
fileobj
:
found
,
sr
=
sox_effects
.
apply_effects_file
(
fileobj
,
effects
,
channels_first
=
channels_first
)
save_wav
(
self
.
get_temp_path
(
"result.wav"
),
found
,
sr
,
channels_first
=
channels_first
)
assert
sr
==
expected_sr
#self.assertEqual(found, expected)
np
.
testing
.
assert_array_almost_equal
(
found
.
numpy
(),
expected
.
numpy
())
@
parameterized
.
expand
(
[
(
"wav"
,
None
),
]
)
def
test_bytesio
(
self
,
ext
,
compression
):
"""Applying effects via BytesIO object works"""
sample_rate
=
16000
channels_first
=
True
effects
=
[[
"band"
,
"300"
,
"10"
]]
input_path
=
self
.
get_temp_path
(
f
"input.
{
ext
}
"
)
reference_path
=
self
.
get_temp_path
(
"reference.wav"
)
#sox_utils.gen_audio_file(input_path, sample_rate, num_channels=2, compression=compression)
data
=
get_wav_data
(
"int32"
,
2
,
channels_first
=
channels_first
)
save_wav
(
input_path
,
data
,
sample_rate
,
channels_first
=
channels_first
)
sox_utils
.
run_sox_effect
(
input_path
,
reference_path
,
effects
,
output_bitdepth
=
32
)
expected
,
expected_sr
=
load_wav
(
reference_path
)
with
open
(
input_path
,
"rb"
)
as
file_
:
fileobj
=
io
.
BytesIO
(
file_
.
read
())
found
,
sr
=
sox_effects
.
apply_effects_file
(
fileobj
,
effects
,
channels_first
=
channels_first
)
save_wav
(
self
.
get_temp_path
(
"result.wav"
),
found
,
sr
,
channels_first
=
channels_first
)
assert
sr
==
expected_sr
#self.assertEqual(found, expected)
print
(
"found"
)
print
(
found
)
print
(
"expected"
)
print
(
expected
)
np
.
testing
.
assert_array_almost_equal
(
found
.
numpy
(),
expected
.
numpy
())
@
parameterized
.
expand
(
[
(
"wav"
,
None
),
]
)
def
test_tarfile
(
self
,
ext
,
compression
):
"""Applying effects to compressed audio via file-like file works"""
sample_rate
=
16000
channels_first
=
True
effects
=
[[
"band"
,
"300"
,
"10"
]]
audio_file
=
f
"input.
{
ext
}
"
input_path
=
self
.
get_temp_path
(
audio_file
)
reference_path
=
self
.
get_temp_path
(
"reference.wav"
)
archive_path
=
self
.
get_temp_path
(
"archive.tar.gz"
)
data
=
get_wav_data
(
"int32"
,
2
,
channels_first
=
channels_first
)
save_wav
(
input_path
,
data
,
sample_rate
,
channels_first
=
channels_first
)
# sox_utils.gen_audio_file(input_path, sample_rate, num_channels=2, compression=compression)
sox_utils
.
run_sox_effect
(
input_path
,
reference_path
,
effects
,
output_bitdepth
=
32
)
expected
,
expected_sr
=
load_wav
(
reference_path
)
with
tarfile
.
TarFile
(
archive_path
,
"w"
)
as
tarobj
:
tarobj
.
add
(
input_path
,
arcname
=
audio_file
)
with
tarfile
.
TarFile
(
archive_path
,
"r"
)
as
tarobj
:
fileobj
=
tarobj
.
extractfile
(
audio_file
)
found
,
sr
=
sox_effects
.
apply_effects_file
(
fileobj
,
effects
,
channels_first
=
channels_first
)
save_wav
(
self
.
get_temp_path
(
"result.wav"
),
found
,
sr
,
channels_first
=
channels_first
)
assert
sr
==
expected_sr
#self.assertEqual(found, expected)
np
.
testing
.
assert_array_almost_equal
(
found
.
numpy
(),
expected
.
numpy
())
if
__name__
==
'__main__'
:
unittest
.
main
()
\ No newline at end of file
tests/unit/common_utils/__init__.py
浏览文件 @
22a5344b
from
.wav_utils
import
get_wav_data
,
load_wav
,
save_wav
,
normalize_wav
from
.parameterized_utils
import
load_params
,
nested_params
from
.parameterized_utils
import
nested_params
from
.data_utils
import
get_sinusoid
,
load_params
,
load_effects_params
from
.case_utils
import
(
TempDirMixin
TempDirMixin
,
name_func
)
__all__
=
[
...
...
@@ -11,4 +13,7 @@ __all__ = [
"normalize_wav"
,
"load_params"
,
"nested_params"
,
"get_sinusoid"
,
"name_func"
,
"load_effects_params"
]
tests/unit/common_utils/case_utils.py
浏览文件 @
22a5344b
...
...
@@ -14,6 +14,9 @@ from paddlespeech.audio._internal.module_utils import (
is_sox_available
,
)
def
name_func
(
func
,
_
,
params
):
return
f
'
{
func
.
__name__
}
_
{
"_"
.
join
(
str
(
arg
)
for
arg
in
params
.
args
)
}
'
class
TempDirMixin
:
"""Mixin to provide easy access to temp dir"""
...
...
tests/unit/common_utils/parameterized_utils.py
浏览文件 @
22a5344b
import
json
from
itertools
import
product
import
os
from
parameterized
import
param
,
parameterized
def
get_asset_path
(
*
paths
):
"""Return full path of a test asset"""
return
os
.
path
.
join
(
_TEST_DIR_PATH
,
"assets"
,
*
paths
)
def
load_params
(
*
paths
):
with
open
(
get_asset_path
(
*
paths
),
"r"
)
as
file
:
return
[
param
(
json
.
loads
(
line
))
for
line
in
file
]
#def get_asset_path(*paths):
#"""Return full path of a test asset"""
#return os.path.join(_TEST_DIR_PATH, "assets", *paths)
#def load_params(*paths):
#with open(get_asset_path(*paths), "r") as file:
#return [param(json.loads(line)) for line in file]
#def load_effects_params(*paths):
#params = []
#with open(get_asset_path(*paths), "r") as file:
#for line in file:
#data = json.loads(line)
#for effect in data["effects"]:
#for i, arg in enumerate(effect):
#if arg.startswith("<ASSET_DIR>"):
#effect[i] = arg.replace("<ASSET_DIR>", get_asset_path())
#params.append(param(data))
#return params
def
_name_func
(
func
,
_
,
params
):
strs
=
[]
...
...
tests/unit/common_utils/wav_utils.py
浏览文件 @
22a5344b
...
...
@@ -2,6 +2,7 @@ from typing import Optional
import
scipy.io.wavfile
import
paddle
import
numpy
as
np
def
normalize_wav
(
tensor
:
paddle
.
Tensor
)
->
paddle
.
Tensor
:
if
tensor
.
dtype
==
paddle
.
float32
:
...
...
@@ -52,8 +53,14 @@ def get_wav_data(
# paddle linspace not support uint8, int8, int16
#if dtype == "uint8":
# base = paddle.linspace(0, 255, num_frames, dtype=dtype_)
#dtype_np = getattr(np, dtype)
#base_np = np.linspace(0, 255, num_frames, dtype_np)
#base = paddle.to_tensor(base_np, dtype=dtype_)
#elif dtype == "int8":
# base = paddle.linspace(-128, 127, num_frames, dtype=dtype_)
#dtype_np = getattr(np, dtype)
#base_np = np.linspace(-128, 127, num_frames, dtype_np)
#base = paddle.to_tensor(base_np, dtype=dtype_)
if
dtype
==
"float32"
:
base
=
paddle
.
linspace
(
-
1.0
,
1.0
,
num_frames
,
dtype
=
dtype_
)
elif
dtype
==
"float64"
:
...
...
@@ -62,6 +69,9 @@ def get_wav_data(
base
=
paddle
.
linspace
(
-
2147483648
,
2147483647
,
num_frames
,
dtype
=
dtype_
)
#elif dtype == "int16":
# base = paddle.linspace(-32768, 32767, num_frames, dtype=dtype_)
#dtype_np = getattr(np, dtype)
#base_np = np.linspace(-32768, 32767, num_frames, dtype_np)
#base = paddle.to_tensor(base_np, dtype=dtype_)
else
:
raise
NotImplementedError
(
f
"Unsupported dtype
{
dtype
}
"
)
data
=
base
.
tile
([
num_channels
,
1
])
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录