Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
d0bca198
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
11 个月 前同步成功
通知
204
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
未验证
提交
d0bca198
编写于
3月 04, 2022
作者:
H
Hui Zhang
提交者:
GitHub
3月 04, 2022
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #1494 from PaddlePaddle/audio
[audio] refactor audio arch
上级
2886ab93
d70bcb8f
变更
34
展开全部
隐藏空白更改
内联
并排
Showing
34 changed file
with
1673 addition
and
254 deletion
+1673
-254
.gitignore
.gitignore
+2
-3
paddleaudio/CHANGELOG.md
paddleaudio/CHANGELOG.md
+4
-0
paddleaudio/features/augment.py
paddleaudio/features/augment.py
+0
-170
paddleaudio/paddleaudio/__init__.py
paddleaudio/paddleaudio/__init__.py
+22
-0
paddleaudio/paddleaudio/backends/__init__.py
paddleaudio/paddleaudio/backends/__init__.py
+19
-0
paddleaudio/paddleaudio/backends/soundfile_backend.py
paddleaudio/paddleaudio/backends/soundfile_backend.py
+3
-41
paddleaudio/paddleaudio/backends/sox_backend.py
paddleaudio/paddleaudio/backends/sox_backend.py
+13
-0
paddleaudio/paddleaudio/compliance/__init__.py
paddleaudio/paddleaudio/compliance/__init__.py
+1
-3
paddleaudio/paddleaudio/compliance/kaldi.py
paddleaudio/paddleaudio/compliance/kaldi.py
+638
-0
paddleaudio/paddleaudio/compliance/librosa.py
paddleaudio/paddleaudio/compliance/librosa.py
+152
-2
paddleaudio/paddleaudio/datasets/__init__.py
paddleaudio/paddleaudio/datasets/__init__.py
+0
-7
paddleaudio/paddleaudio/datasets/dataset.py
paddleaudio/paddleaudio/datasets/dataset.py
+2
-2
paddleaudio/paddleaudio/datasets/esc50.py
paddleaudio/paddleaudio/datasets/esc50.py
+0
-0
paddleaudio/paddleaudio/datasets/gtzan.py
paddleaudio/paddleaudio/datasets/gtzan.py
+0
-0
paddleaudio/paddleaudio/datasets/tess.py
paddleaudio/paddleaudio/datasets/tess.py
+0
-0
paddleaudio/paddleaudio/datasets/urban_sound.py
paddleaudio/paddleaudio/datasets/urban_sound.py
+0
-0
paddleaudio/paddleaudio/features/__init__.py
paddleaudio/paddleaudio/features/__init__.py
+4
-3
paddleaudio/paddleaudio/features/layers.py
paddleaudio/paddleaudio/features/layers.py
+344
-0
paddleaudio/paddleaudio/functional/__init__.py
paddleaudio/paddleaudio/functional/__init__.py
+20
-0
paddleaudio/paddleaudio/functional/functional.py
paddleaudio/paddleaudio/functional/functional.py
+265
-0
paddleaudio/paddleaudio/functional/window.py
paddleaudio/paddleaudio/functional/window.py
+29
-15
paddleaudio/paddleaudio/io/__init__.py
paddleaudio/paddleaudio/io/__init__.py
+0
-1
paddleaudio/paddleaudio/metric/__init__.py
paddleaudio/paddleaudio/metric/__init__.py
+15
-0
paddleaudio/paddleaudio/metric/dtw.py
paddleaudio/paddleaudio/metric/dtw.py
+42
-0
paddleaudio/paddleaudio/metric/mcd.py
paddleaudio/paddleaudio/metric/mcd.py
+48
-0
paddleaudio/paddleaudio/sox_effects/__init__.py
paddleaudio/paddleaudio/sox_effects/__init__.py
+13
-0
paddleaudio/paddleaudio/utils/__init__.py
paddleaudio/paddleaudio/utils/__init__.py
+12
-5
paddleaudio/paddleaudio/utils/download.py
paddleaudio/paddleaudio/utils/download.py
+6
-0
paddleaudio/paddleaudio/utils/env.py
paddleaudio/paddleaudio/utils/env.py
+7
-0
paddleaudio/paddleaudio/utils/error.py
paddleaudio/paddleaudio/utils/error.py
+0
-0
paddleaudio/paddleaudio/utils/log.py
paddleaudio/paddleaudio/utils/log.py
+4
-1
paddleaudio/paddleaudio/utils/time.py
paddleaudio/paddleaudio/utils/time.py
+5
-0
paddleaudio/setup.py
paddleaudio/setup.py
+3
-1
paddleaudio/tests/.gitkeep
paddleaudio/tests/.gitkeep
+0
-0
未找到文件。
.gitignore
浏览文件 @
d0bca198
...
...
@@ -14,6 +14,7 @@
*.whl
*.egg-info
build
*output/
docs/build/
docs/topic/ctc/warp-ctc/
...
...
@@ -33,6 +34,4 @@ tools/activate_python.sh
tools/miniconda.sh
tools/CRF++-0.58/
speechx/fc_patch/
*output/
speechx/fc_patch/
\ No newline at end of file
paddleaudio/CHANGELOG.md
浏览文件 @
d0bca198
# Changelog
Date: 2022-2-25, Author: Hui Zhang.
-
Refactor architecture.
-
dtw distance and mcd style dtw
paddleaudio/features/augment.py
已删除
100644 → 0
浏览文件 @
2886ab93
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
typing
import
List
import
numpy
as
np
from
numpy
import
ndarray
as
array
from
..backends
import
depth_convert
from
..utils
import
ParameterError
__all__
=
[
'depth_augment'
,
'spect_augment'
,
'random_crop1d'
,
'random_crop2d'
,
'adaptive_spect_augment'
,
]
def
randint
(
high
:
int
)
->
int
:
"""Generate one random integer in range [0 high)
This is a helper function for random data augmentaiton
"""
return
int
(
np
.
random
.
randint
(
0
,
high
=
high
))
def
rand
()
->
float
:
"""Generate one floating-point number in range [0 1)
This is a helper function for random data augmentaiton
"""
return
float
(
np
.
random
.
rand
(
1
))
def
depth_augment
(
y
:
array
,
choices
:
List
=
[
'int8'
,
'int16'
],
probs
:
List
[
float
]
=
[
0.5
,
0.5
])
->
array
:
""" Audio depth augmentation
Do audio depth augmentation to simulate the distortion brought by quantization.
"""
assert
len
(
probs
)
==
len
(
choices
),
'number of choices {} must be equal to size of probs {}'
.
format
(
len
(
choices
),
len
(
probs
))
depth
=
np
.
random
.
choice
(
choices
,
p
=
probs
)
src_depth
=
y
.
dtype
y1
=
depth_convert
(
y
,
depth
)
y2
=
depth_convert
(
y1
,
src_depth
)
return
y2
def
adaptive_spect_augment
(
spect
:
array
,
tempo_axis
:
int
=
0
,
level
:
float
=
0.1
)
->
array
:
"""Do adpative spectrogram augmentation
The level of the augmentation is gowern by the paramter level,
ranging from 0 to 1, with 0 represents no augmentation。
"""
assert
spect
.
ndim
==
2.
,
'only supports 2d tensor or numpy array'
if
tempo_axis
==
0
:
nt
,
nf
=
spect
.
shape
else
:
nf
,
nt
=
spect
.
shape
time_mask_width
=
int
(
nt
*
level
*
0.5
)
freq_mask_width
=
int
(
nf
*
level
*
0.5
)
num_time_mask
=
int
(
10
*
level
)
num_freq_mask
=
int
(
10
*
level
)
if
tempo_axis
==
0
:
for
_
in
range
(
num_time_mask
):
start
=
randint
(
nt
-
time_mask_width
)
spect
[
start
:
start
+
time_mask_width
,
:]
=
0
for
_
in
range
(
num_freq_mask
):
start
=
randint
(
nf
-
freq_mask_width
)
spect
[:,
start
:
start
+
freq_mask_width
]
=
0
else
:
for
_
in
range
(
num_time_mask
):
start
=
randint
(
nt
-
time_mask_width
)
spect
[:,
start
:
start
+
time_mask_width
]
=
0
for
_
in
range
(
num_freq_mask
):
start
=
randint
(
nf
-
freq_mask_width
)
spect
[
start
:
start
+
freq_mask_width
,
:]
=
0
return
spect
def
spect_augment
(
spect
:
array
,
tempo_axis
:
int
=
0
,
max_time_mask
:
int
=
3
,
max_freq_mask
:
int
=
3
,
max_time_mask_width
:
int
=
30
,
max_freq_mask_width
:
int
=
20
)
->
array
:
"""Do spectrogram augmentation in both time and freq axis
Reference:
"""
assert
spect
.
ndim
==
2.
,
'only supports 2d tensor or numpy array'
if
tempo_axis
==
0
:
nt
,
nf
=
spect
.
shape
else
:
nf
,
nt
=
spect
.
shape
num_time_mask
=
randint
(
max_time_mask
)
num_freq_mask
=
randint
(
max_freq_mask
)
time_mask_width
=
randint
(
max_time_mask_width
)
freq_mask_width
=
randint
(
max_freq_mask_width
)
if
tempo_axis
==
0
:
for
_
in
range
(
num_time_mask
):
start
=
randint
(
nt
-
time_mask_width
)
spect
[
start
:
start
+
time_mask_width
,
:]
=
0
for
_
in
range
(
num_freq_mask
):
start
=
randint
(
nf
-
freq_mask_width
)
spect
[:,
start
:
start
+
freq_mask_width
]
=
0
else
:
for
_
in
range
(
num_time_mask
):
start
=
randint
(
nt
-
time_mask_width
)
spect
[:,
start
:
start
+
time_mask_width
]
=
0
for
_
in
range
(
num_freq_mask
):
start
=
randint
(
nf
-
freq_mask_width
)
spect
[
start
:
start
+
freq_mask_width
,
:]
=
0
return
spect
def
random_crop1d
(
y
:
array
,
crop_len
:
int
)
->
array
:
""" Do random cropping on 1d input signal
The input is a 1d signal, typically a sound waveform
"""
if
y
.
ndim
!=
1
:
'only accept 1d tensor or numpy array'
n
=
len
(
y
)
idx
=
randint
(
n
-
crop_len
)
return
y
[
idx
:
idx
+
crop_len
]
def
random_crop2d
(
s
:
array
,
crop_len
:
int
,
tempo_axis
:
int
=
0
)
->
array
:
""" Do random cropping for 2D array, typically a spectrogram.
The cropping is done in temporal direction on the time-freq input signal.
"""
if
tempo_axis
>=
s
.
ndim
:
raise
ParameterError
(
'axis out of range'
)
n
=
s
.
shape
[
tempo_axis
]
idx
=
randint
(
high
=
n
-
crop_len
)
sli
=
[
slice
(
None
)
for
i
in
range
(
s
.
ndim
)]
sli
[
tempo_axis
]
=
slice
(
idx
,
idx
+
crop_len
)
out
=
s
[
tuple
(
sli
)]
return
out
paddleaudio/paddleaudio/__init__.py
0 → 100644
浏览文件 @
d0bca198
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
.
import
compliance
from
.
import
datasets
from
.
import
features
from
.
import
functional
from
.
import
io
from
.
import
metric
from
.
import
sox_effects
from
.backends
import
load
from
.backends
import
save
paddleaudio/paddleaudio/backends/__init__.py
0 → 100644
浏览文件 @
d0bca198
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
.soundfile_backend
import
depth_convert
from
.soundfile_backend
import
load
from
.soundfile_backend
import
normalize
from
.soundfile_backend
import
resample
from
.soundfile_backend
import
save
from
.soundfile_backend
import
to_mono
paddleaudio/
backends/audio
.py
→
paddleaudio/
paddleaudio/backends/soundfile_backend
.py
浏览文件 @
d0bca198
# Copyright (c) 202
1
PaddlePaddle Authors. All Rights Reserved.
# Copyright (c) 202
2
PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
...
...
@@ -29,7 +29,7 @@ __all__ = [
'to_mono'
,
'depth_convert'
,
'normalize'
,
'save
_wav
'
,
'save'
,
'load'
,
]
NORMALMIZE_TYPES
=
[
'linear'
,
'gaussian'
]
...
...
@@ -41,12 +41,9 @@ EPS = 1e-8
def
resample
(
y
:
array
,
src_sr
:
int
,
target_sr
:
int
,
mode
:
str
=
'kaiser_fast'
)
->
array
:
""" Audio resampling
This function is the same as using resampy.resample().
Notes:
The default mode is kaiser_fast. For better audio quality, use mode = 'kaiser_fast'
"""
if
mode
==
'kaiser_best'
:
...
...
@@ -106,7 +103,6 @@ def to_mono(y: array, merge_type: str='average') -> array:
def
_safe_cast
(
y
:
array
,
dtype
:
Union
[
type
,
str
])
->
array
:
""" data type casting in a safe way, i.e., prevent overflow or underflow
This function is used internally.
"""
return
np
.
clip
(
y
,
np
.
iinfo
(
dtype
).
min
,
np
.
iinfo
(
dtype
).
max
).
astype
(
dtype
)
...
...
@@ -115,10 +111,8 @@ def _safe_cast(y: array, dtype: Union[type, str]) -> array:
def
depth_convert
(
y
:
array
,
dtype
:
Union
[
type
,
str
],
dithering
:
bool
=
True
)
->
array
:
"""Convert audio array to target dtype safely
This function convert audio waveform to a target dtype, with addition steps of
preventing overflow/underflow and preserving audio range.
"""
SUPPORT_DTYPE
=
[
'int16'
,
'int8'
,
'float32'
,
'float64'
]
...
...
@@ -168,12 +162,9 @@ def sound_file_load(file: str,
dtype
:
str
=
'int16'
,
duration
:
Optional
[
int
]
=
None
)
->
Tuple
[
array
,
int
]:
"""Load audio using soundfile library
This function load audio file using libsndfile.
Reference:
http://www.mega-nerd.com/libsndfile/#Features
"""
with
sf
.
SoundFile
(
file
)
as
sf_desc
:
sr_native
=
sf_desc
.
samplerate
...
...
@@ -188,33 +179,9 @@ def sound_file_load(file: str,
return
y
,
sf_desc
.
samplerate
def
audio_file_load
():
"""Load audio using audiofile library
This function load audio file using audiofile.
Reference:
https://audiofile.68k.org/
"""
raise
NotImplementedError
()
def
sox_file_load
():
"""Load audio using sox library
This function load audio file using sox.
Reference:
http://sox.sourceforge.net/
"""
raise
NotImplementedError
()
def
normalize
(
y
:
array
,
norm_type
:
str
=
'linear'
,
mul_factor
:
float
=
1.0
)
->
array
:
""" normalize an input audio with additional multiplier.
"""
if
norm_type
==
'linear'
:
...
...
@@ -232,14 +199,12 @@ def normalize(y: array, norm_type: str='linear',
return
y
def
save
_wav
(
y
:
array
,
sr
:
int
,
file
:
str
)
->
None
:
def
save
(
y
:
array
,
sr
:
int
,
file
:
str
)
->
None
:
"""Save audio file to disk.
This function saves audio to disk using scipy.io.wavfile, with additional step
to convert input waveform to int16 unless it already is int16
Notes:
It only support raw wav format.
"""
if
not
file
.
endswith
(
'.wav'
):
raise
ParameterError
(
...
...
@@ -274,11 +239,8 @@ def load(
resample_mode
:
str
=
'kaiser_fast'
)
->
Tuple
[
array
,
int
]:
"""Load audio file from disk.
This function loads audio from disk using using audio beackend.
Parameters:
Notes:
"""
y
,
r
=
sound_file_load
(
file
,
offset
=
offset
,
dtype
=
dtype
,
duration
=
duration
)
...
...
paddleaudio/paddleaudio/backends/sox_backend.py
0 → 100644
浏览文件 @
d0bca198
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
paddleaudio/__init__.py
→
paddleaudio/
paddleaudio/compliance/
__init__.py
浏览文件 @
d0bca198
# Copyright (c) 202
1
PaddlePaddle Authors. All Rights Reserved.
# Copyright (c) 202
2
PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
...
...
@@ -11,5 +11,3 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
.backends
import
*
from
.features
import
*
paddleaudio/paddleaudio/compliance/kaldi.py
0 → 100644
浏览文件 @
d0bca198
此差异已折叠。
点击以展开。
paddleaudio/
features/core
.py
→
paddleaudio/
paddleaudio/compliance/librosa
.py
浏览文件 @
d0bca198
...
...
@@ -21,11 +21,13 @@ import numpy as np
import
scipy
from
numpy
import
ndarray
as
array
from
numpy.lib.stride_tricks
import
as_strided
from
scipy
.signal
import
get_window
from
scipy
import
signal
from
..backends
import
depth_convert
from
..utils
import
ParameterError
__all__
=
[
# dsp
'stft'
,
'mfcc'
,
'hz_to_mel'
,
...
...
@@ -38,6 +40,12 @@ __all__ = [
'spectrogram'
,
'mu_encode'
,
'mu_decode'
,
# augmentation
'depth_augment'
,
'spect_augment'
,
'random_crop1d'
,
'random_crop2d'
,
'adaptive_spect_augment'
,
]
...
...
@@ -303,7 +311,7 @@ def stft(x: array,
if
hop_length
is
None
:
hop_length
=
int
(
win_length
//
4
)
fft_window
=
get_window
(
window
,
win_length
,
fftbins
=
True
)
fft_window
=
signal
.
get_window
(
window
,
win_length
,
fftbins
=
True
)
# Pad the window out to n_fft size
fft_window
=
pad_center
(
fft_window
,
n_fft
)
...
...
@@ -576,3 +584,145 @@ def mu_decode(y: array, mu: int=255, quantized: bool=True) -> array:
y
=
y
*
2
/
mu
-
1
x
=
np
.
sign
(
y
)
/
mu
*
((
1
+
mu
)
**
np
.
abs
(
y
)
-
1
)
return
x
def
randint
(
high
:
int
)
->
int
:
"""Generate one random integer in range [0 high)
This is a helper function for random data augmentaiton
"""
return
int
(
np
.
random
.
randint
(
0
,
high
=
high
))
def
rand
()
->
float
:
"""Generate one floating-point number in range [0 1)
This is a helper function for random data augmentaiton
"""
return
float
(
np
.
random
.
rand
(
1
))
def
depth_augment
(
y
:
array
,
choices
:
List
=
[
'int8'
,
'int16'
],
probs
:
List
[
float
]
=
[
0.5
,
0.5
])
->
array
:
""" Audio depth augmentation
Do audio depth augmentation to simulate the distortion brought by quantization.
"""
assert
len
(
probs
)
==
len
(
choices
),
'number of choices {} must be equal to size of probs {}'
.
format
(
len
(
choices
),
len
(
probs
))
depth
=
np
.
random
.
choice
(
choices
,
p
=
probs
)
src_depth
=
y
.
dtype
y1
=
depth_convert
(
y
,
depth
)
y2
=
depth_convert
(
y1
,
src_depth
)
return
y2
def
adaptive_spect_augment
(
spect
:
array
,
tempo_axis
:
int
=
0
,
level
:
float
=
0.1
)
->
array
:
"""Do adpative spectrogram augmentation
The level of the augmentation is gowern by the paramter level,
ranging from 0 to 1, with 0 represents no augmentation。
"""
assert
spect
.
ndim
==
2.
,
'only supports 2d tensor or numpy array'
if
tempo_axis
==
0
:
nt
,
nf
=
spect
.
shape
else
:
nf
,
nt
=
spect
.
shape
time_mask_width
=
int
(
nt
*
level
*
0.5
)
freq_mask_width
=
int
(
nf
*
level
*
0.5
)
num_time_mask
=
int
(
10
*
level
)
num_freq_mask
=
int
(
10
*
level
)
if
tempo_axis
==
0
:
for
_
in
range
(
num_time_mask
):
start
=
randint
(
nt
-
time_mask_width
)
spect
[
start
:
start
+
time_mask_width
,
:]
=
0
for
_
in
range
(
num_freq_mask
):
start
=
randint
(
nf
-
freq_mask_width
)
spect
[:,
start
:
start
+
freq_mask_width
]
=
0
else
:
for
_
in
range
(
num_time_mask
):
start
=
randint
(
nt
-
time_mask_width
)
spect
[:,
start
:
start
+
time_mask_width
]
=
0
for
_
in
range
(
num_freq_mask
):
start
=
randint
(
nf
-
freq_mask_width
)
spect
[
start
:
start
+
freq_mask_width
,
:]
=
0
return
spect
def
spect_augment
(
spect
:
array
,
tempo_axis
:
int
=
0
,
max_time_mask
:
int
=
3
,
max_freq_mask
:
int
=
3
,
max_time_mask_width
:
int
=
30
,
max_freq_mask_width
:
int
=
20
)
->
array
:
"""Do spectrogram augmentation in both time and freq axis
Reference:
"""
assert
spect
.
ndim
==
2.
,
'only supports 2d tensor or numpy array'
if
tempo_axis
==
0
:
nt
,
nf
=
spect
.
shape
else
:
nf
,
nt
=
spect
.
shape
num_time_mask
=
randint
(
max_time_mask
)
num_freq_mask
=
randint
(
max_freq_mask
)
time_mask_width
=
randint
(
max_time_mask_width
)
freq_mask_width
=
randint
(
max_freq_mask_width
)
if
tempo_axis
==
0
:
for
_
in
range
(
num_time_mask
):
start
=
randint
(
nt
-
time_mask_width
)
spect
[
start
:
start
+
time_mask_width
,
:]
=
0
for
_
in
range
(
num_freq_mask
):
start
=
randint
(
nf
-
freq_mask_width
)
spect
[:,
start
:
start
+
freq_mask_width
]
=
0
else
:
for
_
in
range
(
num_time_mask
):
start
=
randint
(
nt
-
time_mask_width
)
spect
[:,
start
:
start
+
time_mask_width
]
=
0
for
_
in
range
(
num_freq_mask
):
start
=
randint
(
nf
-
freq_mask_width
)
spect
[
start
:
start
+
freq_mask_width
,
:]
=
0
return
spect
def
random_crop1d
(
y
:
array
,
crop_len
:
int
)
->
array
:
""" Do random cropping on 1d input signal
The input is a 1d signal, typically a sound waveform
"""
if
y
.
ndim
!=
1
:
'only accept 1d tensor or numpy array'
n
=
len
(
y
)
idx
=
randint
(
n
-
crop_len
)
return
y
[
idx
:
idx
+
crop_len
]
def
random_crop2d
(
s
:
array
,
crop_len
:
int
,
tempo_axis
:
int
=
0
)
->
array
:
""" Do random cropping for 2D array, typically a spectrogram.
The cropping is done in temporal direction on the time-freq input signal.
"""
if
tempo_axis
>=
s
.
ndim
:
raise
ParameterError
(
'axis out of range'
)
n
=
s
.
shape
[
tempo_axis
]
idx
=
randint
(
high
=
n
-
crop_len
)
sli
=
[
slice
(
None
)
for
i
in
range
(
s
.
ndim
)]
sli
[
tempo_axis
]
=
slice
(
idx
,
idx
+
crop_len
)
out
=
s
[
tuple
(
sli
)]
return
out
paddleaudio/datasets/__init__.py
→
paddleaudio/
paddleaudio/
datasets/__init__.py
浏览文件 @
d0bca198
...
...
@@ -15,10 +15,3 @@ from .esc50 import ESC50
from
.gtzan
import
GTZAN
from
.tess
import
TESS
from
.urban_sound
import
UrbanSound8K
__all__
=
[
'ESC50'
,
'UrbanSound8K'
,
'GTZAN'
,
'TESS'
,
]
paddleaudio/datasets/dataset.py
→
paddleaudio/
paddleaudio/
datasets/dataset.py
浏览文件 @
d0bca198
...
...
@@ -17,8 +17,8 @@ import numpy as np
import
paddle
from
..backends
import
load
as
load_audio
from
..
features
import
melspectrogram
from
..
features
import
mfcc
from
..
compliance.librosa
import
melspectrogram
from
..
compliance.librosa
import
mfcc
feat_funcs
=
{
'raw'
:
None
,
...
...
paddleaudio/datasets/esc50.py
→
paddleaudio/
paddleaudio/
datasets/esc50.py
浏览文件 @
d0bca198
文件已移动
paddleaudio/datasets/gtzan.py
→
paddleaudio/
paddleaudio/
datasets/gtzan.py
浏览文件 @
d0bca198
文件已移动
paddleaudio/datasets/tess.py
→
paddleaudio/
paddleaudio/
datasets/tess.py
浏览文件 @
d0bca198
文件已移动
paddleaudio/datasets/urban_sound.py
→
paddleaudio/
paddleaudio/
datasets/urban_sound.py
浏览文件 @
d0bca198
文件已移动
paddleaudio/features/__init__.py
→
paddleaudio/
paddleaudio/
features/__init__.py
浏览文件 @
d0bca198
...
...
@@ -11,6 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
.augment
import
*
from
.core
import
*
from
.spectrum
import
*
from
.layers
import
LogMelSpectrogram
from
.layers
import
MelSpectrogram
from
.layers
import
MFCC
from
.layers
import
Spectrogram
paddleaudio/
features/spectrum
.py
→
paddleaudio/
paddleaudio/features/layers
.py
浏览文件 @
d0bca198
此差异已折叠。
点击以展开。
paddleaudio/paddleaudio/functional/__init__.py
0 → 100644
浏览文件 @
d0bca198
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
.functional
import
compute_fbank_matrix
from
.functional
import
create_dct
from
.functional
import
fft_frequencies
from
.functional
import
hz_to_mel
from
.functional
import
mel_frequencies
from
.functional
import
mel_to_hz
from
.functional
import
power_to_db
paddleaudio/paddleaudio/functional/functional.py
0 → 100644
浏览文件 @
d0bca198
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from librosa(https://github.com/librosa/librosa)
import
math
from
typing
import
Optional
from
typing
import
Union
import
paddle
__all__
=
[
'hz_to_mel'
,
'mel_to_hz'
,
'mel_frequencies'
,
'fft_frequencies'
,
'compute_fbank_matrix'
,
'power_to_db'
,
'create_dct'
,
]
def
hz_to_mel
(
freq
:
Union
[
paddle
.
Tensor
,
float
],
htk
:
bool
=
False
)
->
Union
[
paddle
.
Tensor
,
float
]:
"""Convert Hz to Mels.
Parameters:
freq: the input tensor of arbitrary shape, or a single floating point number.
htk: use HTK formula to do the conversion.
The default value is False.
Returns:
The frequencies represented in Mel-scale.
"""
if
htk
:
if
isinstance
(
freq
,
paddle
.
Tensor
):
return
2595.0
*
paddle
.
log10
(
1.0
+
freq
/
700.0
)
else
:
return
2595.0
*
math
.
log10
(
1.0
+
freq
/
700.0
)
# Fill in the linear part
f_min
=
0.0
f_sp
=
200.0
/
3
mels
=
(
freq
-
f_min
)
/
f_sp
# Fill in the log-scale part
min_log_hz
=
1000.0
# beginning of log region (Hz)
min_log_mel
=
(
min_log_hz
-
f_min
)
/
f_sp
# same (Mels)
logstep
=
math
.
log
(
6.4
)
/
27.0
# step size for log region
if
isinstance
(
freq
,
paddle
.
Tensor
):
target
=
min_log_mel
+
paddle
.
log
(
freq
/
min_log_hz
+
1e-10
)
/
logstep
# prevent nan with 1e-10
mask
=
(
freq
>
min_log_hz
).
astype
(
freq
.
dtype
)
mels
=
target
*
mask
+
mels
*
(
1
-
mask
)
# will replace by masked_fill OP in future
else
:
if
freq
>=
min_log_hz
:
mels
=
min_log_mel
+
math
.
log
(
freq
/
min_log_hz
+
1e-10
)
/
logstep
return
mels
def
mel_to_hz
(
mel
:
Union
[
float
,
paddle
.
Tensor
],
htk
:
bool
=
False
)
->
Union
[
float
,
paddle
.
Tensor
]:
"""Convert mel bin numbers to frequencies.
Parameters:
mel: the mel frequency represented as a tensor of arbitrary shape, or a floating point number.
htk: use HTK formula to do the conversion.
Returns:
The frequencies represented in hz.
"""
if
htk
:
return
700.0
*
(
10.0
**
(
mel
/
2595.0
)
-
1.0
)
f_min
=
0.0
f_sp
=
200.0
/
3
freqs
=
f_min
+
f_sp
*
mel
# And now the nonlinear scale
min_log_hz
=
1000.0
# beginning of log region (Hz)
min_log_mel
=
(
min_log_hz
-
f_min
)
/
f_sp
# same (Mels)
logstep
=
math
.
log
(
6.4
)
/
27.0
# step size for log region
if
isinstance
(
mel
,
paddle
.
Tensor
):
target
=
min_log_hz
*
paddle
.
exp
(
logstep
*
(
mel
-
min_log_mel
))
mask
=
(
mel
>
min_log_mel
).
astype
(
mel
.
dtype
)
freqs
=
target
*
mask
+
freqs
*
(
1
-
mask
)
# will replace by masked_fill OP in future
else
:
if
mel
>=
min_log_mel
:
freqs
=
min_log_hz
*
math
.
exp
(
logstep
*
(
mel
-
min_log_mel
))
return
freqs
def
mel_frequencies
(
n_mels
:
int
=
64
,
f_min
:
float
=
0.0
,
f_max
:
float
=
11025.0
,
htk
:
bool
=
False
,
dtype
:
str
=
paddle
.
float32
):
"""Compute mel frequencies.
Parameters:
n_mels(int): number of Mel bins.
f_min(float): the lower cut-off frequency, below which the filter response is zero.
f_max(float): the upper cut-off frequency, above which the filter response is zero.
htk(bool): whether to use htk formula.
dtype(str): the datatype of the return frequencies.
Returns:
The frequencies represented in Mel-scale
"""
# 'Center freqs' of mel bands - uniformly spaced between limits
min_mel
=
hz_to_mel
(
f_min
,
htk
=
htk
)
max_mel
=
hz_to_mel
(
f_max
,
htk
=
htk
)
mels
=
paddle
.
linspace
(
min_mel
,
max_mel
,
n_mels
,
dtype
=
dtype
)
freqs
=
mel_to_hz
(
mels
,
htk
=
htk
)
return
freqs
def
fft_frequencies
(
sr
:
int
,
n_fft
:
int
,
dtype
:
str
=
paddle
.
float32
):
"""Compute fourier frequencies.
Parameters:
sr(int): the audio sample rate.
n_fft(float): the number of fft bins.
dtype(str): the datatype of the return frequencies.
Returns:
The frequencies represented in hz.
"""
return
paddle
.
linspace
(
0
,
float
(
sr
)
/
2
,
int
(
1
+
n_fft
//
2
),
dtype
=
dtype
)
def
compute_fbank_matrix
(
sr
:
int
,
n_fft
:
int
,
n_mels
:
int
=
64
,
f_min
:
float
=
0.0
,
f_max
:
Optional
[
float
]
=
None
,
htk
:
bool
=
False
,
norm
:
Union
[
str
,
float
]
=
'slaney'
,
dtype
:
str
=
paddle
.
float32
):
"""Compute fbank matrix.
Parameters:
sr(int): the audio sample rate.
n_fft(int): the number of fft bins.
n_mels(int): the number of Mel bins.
f_min(float): the lower cut-off frequency, below which the filter response is zero.
f_max(float): the upper cut-off frequency, above which the filter response is zero.
htk: whether to use htk formula.
return_complex(bool): whether to return complex matrix. If True, the matrix will
be complex type. Otherwise, the real and image part will be stored in the last
axis of returned tensor.
dtype(str): the datatype of the returned fbank matrix.
Returns:
The fbank matrix of shape (n_mels, int(1+n_fft//2)).
Shape:
output: (n_mels, int(1+n_fft//2))
"""
if
f_max
is
None
:
f_max
=
float
(
sr
)
/
2
# Initialize the weights
weights
=
paddle
.
zeros
((
n_mels
,
int
(
1
+
n_fft
//
2
)),
dtype
=
dtype
)
# Center freqs of each FFT bin
fftfreqs
=
fft_frequencies
(
sr
=
sr
,
n_fft
=
n_fft
,
dtype
=
dtype
)
# 'Center freqs' of mel bands - uniformly spaced between limits
mel_f
=
mel_frequencies
(
n_mels
+
2
,
f_min
=
f_min
,
f_max
=
f_max
,
htk
=
htk
,
dtype
=
dtype
)
fdiff
=
mel_f
[
1
:]
-
mel_f
[:
-
1
]
#np.diff(mel_f)
ramps
=
mel_f
.
unsqueeze
(
1
)
-
fftfreqs
.
unsqueeze
(
0
)
#ramps = np.subtract.outer(mel_f, fftfreqs)
for
i
in
range
(
n_mels
):
# lower and upper slopes for all bins
lower
=
-
ramps
[
i
]
/
fdiff
[
i
]
upper
=
ramps
[
i
+
2
]
/
fdiff
[
i
+
1
]
# .. then intersect them with each other and zero
weights
[
i
]
=
paddle
.
maximum
(
paddle
.
zeros_like
(
lower
),
paddle
.
minimum
(
lower
,
upper
))
# Slaney-style mel is scaled to be approx constant energy per channel
if
norm
==
'slaney'
:
enorm
=
2.0
/
(
mel_f
[
2
:
n_mels
+
2
]
-
mel_f
[:
n_mels
])
weights
*=
enorm
.
unsqueeze
(
1
)
elif
isinstance
(
norm
,
int
)
or
isinstance
(
norm
,
float
):
weights
=
paddle
.
nn
.
functional
.
normalize
(
weights
,
p
=
norm
,
axis
=-
1
)
return
weights
def
power_to_db
(
magnitude
:
paddle
.
Tensor
,
ref_value
:
float
=
1.0
,
amin
:
float
=
1e-10
,
top_db
:
Optional
[
float
]
=
None
)
->
paddle
.
Tensor
:
"""Convert a power spectrogram (amplitude squared) to decibel (dB) units.
The function computes the scaling ``10 * log10(x / ref)`` in a numerically
stable way.
Parameters:
magnitude(Tensor): the input magnitude tensor of any shape.
ref_value(float): the reference value. If smaller than 1.0, the db level
of the signal will be pulled up accordingly. Otherwise, the db level
is pushed down.
amin(float): the minimum value of input magnitude, below which the input
magnitude is clipped(to amin).
top_db(float): the maximum db value of resulting spectrum, above which the
spectrum is clipped(to top_db).
Returns:
The spectrogram in log-scale.
shape:
input: any shape
output: same as input
"""
if
amin
<=
0
:
raise
Exception
(
"amin must be strictly positive"
)
if
ref_value
<=
0
:
raise
Exception
(
"ref_value must be strictly positive"
)
ones
=
paddle
.
ones_like
(
magnitude
)
log_spec
=
10.0
*
paddle
.
log10
(
paddle
.
maximum
(
ones
*
amin
,
magnitude
))
log_spec
-=
10.0
*
math
.
log10
(
max
(
ref_value
,
amin
))
if
top_db
is
not
None
:
if
top_db
<
0
:
raise
Exception
(
"top_db must be non-negative"
)
log_spec
=
paddle
.
maximum
(
log_spec
,
ones
*
(
log_spec
.
max
()
-
top_db
))
return
log_spec
def
create_dct
(
n_mfcc
:
int
,
n_mels
:
int
,
norm
:
Optional
[
str
]
=
'ortho'
,
dtype
:
Optional
[
str
]
=
paddle
.
float32
)
->
paddle
.
Tensor
:
"""Create a discrete cosine transform(DCT) matrix.
Parameters:
n_mfcc (int): Number of mel frequency cepstral coefficients.
n_mels (int): Number of mel filterbanks.
norm (str, optional): Normalizaiton type. Defaults to 'ortho'.
Returns:
Tensor: The DCT matrix with shape (n_mels, n_mfcc).
"""
n
=
paddle
.
arange
(
n_mels
,
dtype
=
dtype
)
k
=
paddle
.
arange
(
n_mfcc
,
dtype
=
dtype
).
unsqueeze
(
1
)
dct
=
paddle
.
cos
(
math
.
pi
/
float
(
n_mels
)
*
(
n
+
0.5
)
*
k
)
# size (n_mfcc, n_mels)
if
norm
is
None
:
dct
*=
2.0
else
:
assert
norm
==
"ortho"
dct
[
0
]
*=
1.0
/
math
.
sqrt
(
2.0
)
dct
*=
math
.
sqrt
(
2.0
/
float
(
n_mels
))
return
dct
.
T
paddleaudio/
features
/window.py
→
paddleaudio/
paddleaudio/functional
/window.py
浏览文件 @
d0bca198
...
...
@@ -20,6 +20,19 @@ from paddle import Tensor
__all__
=
[
'get_window'
,
# windows
'taylor'
,
'hamming'
,
'hann'
,
'tukey'
,
'kaiser'
,
'gaussian'
,
'exponential'
,
'triang'
,
'bohman'
,
'blackman'
,
'cosine'
,
]
...
...
@@ -73,6 +86,21 @@ def general_gaussian(M: int, p, sig, sym: bool=True,
return
_truncate
(
w
,
needs_trunc
)
def
general_cosine
(
M
:
int
,
a
:
float
,
sym
:
bool
=
True
,
dtype
:
str
=
'float64'
)
->
Tensor
:
"""Compute a generic weighted sum of cosine terms window.
This function is consistent with scipy.signal.windows.general_cosine().
"""
if
_len_guards
(
M
):
return
paddle
.
ones
((
M
,
),
dtype
=
dtype
)
M
,
needs_trunc
=
_extend
(
M
,
sym
)
fac
=
paddle
.
linspace
(
-
math
.
pi
,
math
.
pi
,
M
,
dtype
=
dtype
)
w
=
paddle
.
zeros
((
M
,
),
dtype
=
dtype
)
for
k
in
range
(
len
(
a
)):
w
+=
a
[
k
]
*
paddle
.
cos
(
k
*
fac
)
return
_truncate
(
w
,
needs_trunc
)
def
general_hamming
(
M
:
int
,
alpha
:
float
,
sym
:
bool
=
True
,
dtype
:
str
=
'float64'
)
->
Tensor
:
"""Compute a generalized Hamming window.
...
...
@@ -143,21 +171,6 @@ def taylor(M: int,
return
_truncate
(
w
,
needs_trunc
)
def
general_cosine
(
M
:
int
,
a
:
float
,
sym
:
bool
=
True
,
dtype
:
str
=
'float64'
)
->
Tensor
:
"""Compute a generic weighted sum of cosine terms window.
This function is consistent with scipy.signal.windows.general_cosine().
"""
if
_len_guards
(
M
):
return
paddle
.
ones
((
M
,
),
dtype
=
dtype
)
M
,
needs_trunc
=
_extend
(
M
,
sym
)
fac
=
paddle
.
linspace
(
-
math
.
pi
,
math
.
pi
,
M
,
dtype
=
dtype
)
w
=
paddle
.
zeros
((
M
,
),
dtype
=
dtype
)
for
k
in
range
(
len
(
a
)):
w
+=
a
[
k
]
*
paddle
.
cos
(
k
*
fac
)
return
_truncate
(
w
,
needs_trunc
)
def
hamming
(
M
:
int
,
sym
:
bool
=
True
,
dtype
:
str
=
'float64'
)
->
Tensor
:
"""Compute a Hamming window.
The Hamming window is a taper formed by using a raised cosine with
...
...
@@ -375,6 +388,7 @@ def cosine(M: int, sym: bool=True, dtype: str='float64') -> Tensor:
return
_truncate
(
w
,
needs_trunc
)
## factory function
def
get_window
(
window
:
Union
[
str
,
Tuple
[
str
,
float
]],
win_length
:
int
,
fftbins
:
bool
=
True
,
...
...
paddleaudio/
backends
/__init__.py
→
paddleaudio/
paddleaudio/io
/__init__.py
浏览文件 @
d0bca198
...
...
@@ -11,4 +11,3 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
.audio
import
*
paddleaudio/paddleaudio/metric/__init__.py
0 → 100644
浏览文件 @
d0bca198
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
.dtw
import
dtw_distance
from
.mcd
import
mcd_distance
paddleaudio/paddleaudio/metric/dtw.py
0 → 100644
浏览文件 @
d0bca198
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
numpy
as
np
from
dtaidistance
import
dtw_ndim
__all__
=
[
'dtw_distance'
,
]
def
dtw_distance
(
xs
:
np
.
ndarray
,
ys
:
np
.
ndarray
)
->
float
:
"""dtw distance
Dynamic Time Warping.
This function keeps a compact matrix, not the full warping paths matrix.
Uses dynamic programming to compute:
wps[i, j] = (s1[i]-s2[j])**2 + min(
wps[i-1, j ] + penalty, // vertical / insertion / expansion
wps[i , j-1] + penalty, // horizontal / deletion / compression
wps[i-1, j-1]) // diagonal / match
dtw = sqrt(wps[-1, -1])
Args:
xs (np.ndarray): ref sequence, [T,D]
ys (np.ndarray): hyp sequence, [T,D]
Returns:
float: dtw distance
"""
return
dtw_ndim
.
distance
(
xs
,
ys
)
paddleaudio/paddleaudio/metric/mcd.py
0 → 100644
浏览文件 @
d0bca198
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
mcd.metrics_fast
as
mt
import
numpy
as
np
from
mcd
import
dtw
__all__
=
[
'mcd_distance'
,
]
def
mcd_distance
(
xs
:
np
.
ndarray
,
ys
:
np
.
ndarray
,
cost_fn
=
mt
.
logSpecDbDist
):
"""Mel cepstral distortion (MCD), dtw distance.
Dynamic Time Warping.
Uses dynamic programming to compute:
wps[i, j] = cost_fn(xs[i], ys[j]) + min(
wps[i-1, j ], // vertical / insertion / expansion
wps[i , j-1], // horizontal / deletion / compression
wps[i-1, j-1]) // diagonal / match
dtw = sqrt(wps[-1, -1])
Cost Function:
logSpecDbConst = 10.0 / math.log(10.0) * math.sqrt(2.0)
def logSpecDbDist(x, y):
diff = x - y
return logSpecDbConst * math.sqrt(np.inner(diff, diff))
Args:
xs (np.ndarray): ref sequence, [T,D]
ys (np.ndarray): hyp sequence, [T,D]
Returns:
float: dtw distance
"""
min_cost
,
path
=
dtw
.
dtw
(
xs
,
ys
,
cost_fn
)
return
min_cost
paddleaudio/paddleaudio/sox_effects/__init__.py
0 → 100644
浏览文件 @
d0bca198
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
paddleaudio/utils/__init__.py
→
paddleaudio/
paddleaudio/
utils/__init__.py
浏览文件 @
d0bca198
...
...
@@ -11,8 +11,15 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
.download
import
*
from
.env
import
*
from
.error
import
*
from
.log
import
*
from
.time
import
*
from
.download
import
decompress
from
.download
import
download_and_decompress
from
.download
import
load_state_dict_from_url
from
.env
import
DATA_HOME
from
.env
import
MODEL_HOME
from
.env
import
PPAUDIO_HOME
from
.env
import
USER_HOME
from
.error
import
ParameterError
from
.log
import
Logger
from
.log
import
logger
from
.time
import
seconds_to_hms
from
.time
import
Timer
paddleaudio/utils/download.py
→
paddleaudio/
paddleaudio/
utils/download.py
浏览文件 @
d0bca198
...
...
@@ -22,6 +22,12 @@ from .log import logger
download
.
logger
=
logger
__all__
=
[
'decompress'
,
'download_and_decompress'
,
'load_state_dict_from_url'
,
]
def
decompress
(
file
:
str
):
"""
...
...
paddleaudio/utils/env.py
→
paddleaudio/
paddleaudio/
utils/env.py
浏览文件 @
d0bca198
...
...
@@ -20,6 +20,13 @@ PPAUDIO_HOME --> the root directory for storing PaddleAudio related data. D
'''
import
os
__all__
=
[
'USER_HOME'
,
'PPAUDIO_HOME'
,
'MODEL_HOME'
,
'DATA_HOME'
,
]
def
_get_user_home
():
return
os
.
path
.
expanduser
(
'~'
)
...
...
paddleaudio/utils/error.py
→
paddleaudio/
paddleaudio/
utils/error.py
浏览文件 @
d0bca198
文件已移动
paddleaudio/utils/log.py
→
paddleaudio/
paddleaudio/
utils/log.py
浏览文件 @
d0bca198
...
...
@@ -19,7 +19,10 @@ import time
import
colorlog
loggers
=
{}
__all__
=
[
'Logger'
,
'logger'
,
]
log_config
=
{
'DEBUG'
:
{
...
...
paddleaudio/utils/time.py
→
paddleaudio/
paddleaudio/
utils/time.py
浏览文件 @
d0bca198
...
...
@@ -14,6 +14,11 @@
import
math
import
time
__all__
=
[
'Timer'
,
'seconds_to_hms'
,
]
class
Timer
(
object
):
'''Calculate runing speed and estimated time of arrival(ETA)'''
...
...
setup_audio
.py
→
paddleaudio/setup
.py
浏览文件 @
d0bca198
...
...
@@ -14,7 +14,7 @@
import
setuptools
# set the version here
VERSION
=
'0.
1
.0'
VERSION
=
'0.
2
.0'
def
write_version_py
(
filename
=
'paddleaudio/__init__.py'
):
...
...
@@ -59,6 +59,8 @@ setuptools.setup(
'resampy >= 0.2.2'
,
'soundfile >= 0.9.0'
,
'colorlog'
,
'dtaidistance >= 2.3.6'
,
'mcd >= 0.4'
,
],
)
remove_version_py
()
paddleaudio/tests/.gitkeep
0 → 100644
浏览文件 @
d0bca198
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录