Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
c437a7c5
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 2 年 前同步成功
通知
210
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
c437a7c5
编写于
2月 25, 2022
作者:
H
Hui Zhang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
refactor paddleaudio, test=doc
上级
a9422260
变更
30
隐藏空白更改
内联
并排
Showing
30 changed file
with
234 addition
and
249 deletion
+234
-249
.gitignore
.gitignore
+1
-0
paddleaudio/CHANGELOG.md
paddleaudio/CHANGELOG.md
+3
-0
paddleaudio/features/augment.py
paddleaudio/features/augment.py
+0
-170
paddleaudio/paddleaudio/__init__.py
paddleaudio/paddleaudio/__init__.py
+0
-1
paddleaudio/paddleaudio/backends/__init__.py
paddleaudio/paddleaudio/backends/__init__.py
+0
-2
paddleaudio/paddleaudio/backends/soundfile_backend.py
paddleaudio/paddleaudio/backends/soundfile_backend.py
+0
-0
paddleaudio/paddleaudio/backends/sox_backend.py
paddleaudio/paddleaudio/backends/sox_backend.py
+0
-0
paddleaudio/paddleaudio/datasets/__init__.py
paddleaudio/paddleaudio/datasets/__init__.py
+0
-0
paddleaudio/paddleaudio/datasets/dataset.py
paddleaudio/paddleaudio/datasets/dataset.py
+0
-0
paddleaudio/paddleaudio/datasets/esc50.py
paddleaudio/paddleaudio/datasets/esc50.py
+0
-0
paddleaudio/paddleaudio/datasets/gtzan.py
paddleaudio/paddleaudio/datasets/gtzan.py
+0
-0
paddleaudio/paddleaudio/datasets/tess.py
paddleaudio/paddleaudio/datasets/tess.py
+0
-0
paddleaudio/paddleaudio/datasets/urban_sound.py
paddleaudio/paddleaudio/datasets/urban_sound.py
+0
-0
paddleaudio/paddleaudio/features/__init__.py
paddleaudio/paddleaudio/features/__init__.py
+4
-3
paddleaudio/paddleaudio/features/librosa.py
paddleaudio/paddleaudio/features/librosa.py
+1
-1
paddleaudio/paddleaudio/functional/__init__.py
paddleaudio/paddleaudio/functional/__init__.py
+0
-0
paddleaudio/paddleaudio/functional/functional.py
paddleaudio/paddleaudio/functional/functional.py
+153
-2
paddleaudio/paddleaudio/functional/window.py
paddleaudio/paddleaudio/functional/window.py
+29
-15
paddleaudio/paddleaudio/io/__init__.py
paddleaudio/paddleaudio/io/__init__.py
+6
-0
paddleaudio/paddleaudio/io/audio.py
paddleaudio/paddleaudio/io/audio.py
+0
-0
paddleaudio/paddleaudio/kaldi/__init__.py
paddleaudio/paddleaudio/kaldi/__init__.py
+0
-0
paddleaudio/paddleaudio/sox_effects/__init__.py
paddleaudio/paddleaudio/sox_effects/__init__.py
+0
-0
paddleaudio/paddleaudio/utils/__init__.py
paddleaudio/paddleaudio/utils/__init__.py
+17
-5
paddleaudio/paddleaudio/utils/download.py
paddleaudio/paddleaudio/utils/download.py
+5
-0
paddleaudio/paddleaudio/utils/env.py
paddleaudio/paddleaudio/utils/env.py
+6
-0
paddleaudio/paddleaudio/utils/error.py
paddleaudio/paddleaudio/utils/error.py
+0
-0
paddleaudio/paddleaudio/utils/log.py
paddleaudio/paddleaudio/utils/log.py
+4
-1
paddleaudio/paddleaudio/utils/time.py
paddleaudio/paddleaudio/utils/time.py
+4
-0
paddleaudio/setup.py
paddleaudio/setup.py
+1
-1
requirements.txt
requirements.txt
+0
-48
未找到文件。
.gitignore
浏览文件 @
c437a7c5
...
@@ -30,5 +30,6 @@ tools/OpenBLAS/
...
@@ -30,5 +30,6 @@ tools/OpenBLAS/
tools/Miniconda3-latest-Linux-x86_64.sh
tools/Miniconda3-latest-Linux-x86_64.sh
tools/activate_python.sh
tools/activate_python.sh
tools/miniconda.sh
tools/miniconda.sh
tools/CRF++-0.58/
*output/
*output/
paddleaudio/CHANGELOG.md
浏览文件 @
c437a7c5
# Changelog
# Changelog
Date: 2022-2-25, Author: Hui Zhang.
-
Refactor architecture.
\ No newline at end of file
paddleaudio/features/augment.py
已删除
100644 → 0
浏览文件 @
a9422260
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
typing
import
List
import
numpy
as
np
from
numpy
import
ndarray
as
array
from
..backends
import
depth_convert
from
..utils
import
ParameterError
__all__
=
[
'depth_augment'
,
'spect_augment'
,
'random_crop1d'
,
'random_crop2d'
,
'adaptive_spect_augment'
,
]
def
randint
(
high
:
int
)
->
int
:
"""Generate one random integer in range [0 high)
This is a helper function for random data augmentaiton
"""
return
int
(
np
.
random
.
randint
(
0
,
high
=
high
))
def
rand
()
->
float
:
"""Generate one floating-point number in range [0 1)
This is a helper function for random data augmentaiton
"""
return
float
(
np
.
random
.
rand
(
1
))
def
depth_augment
(
y
:
array
,
choices
:
List
=
[
'int8'
,
'int16'
],
probs
:
List
[
float
]
=
[
0.5
,
0.5
])
->
array
:
""" Audio depth augmentation
Do audio depth augmentation to simulate the distortion brought by quantization.
"""
assert
len
(
probs
)
==
len
(
choices
),
'number of choices {} must be equal to size of probs {}'
.
format
(
len
(
choices
),
len
(
probs
))
depth
=
np
.
random
.
choice
(
choices
,
p
=
probs
)
src_depth
=
y
.
dtype
y1
=
depth_convert
(
y
,
depth
)
y2
=
depth_convert
(
y1
,
src_depth
)
return
y2
def
adaptive_spect_augment
(
spect
:
array
,
tempo_axis
:
int
=
0
,
level
:
float
=
0.1
)
->
array
:
"""Do adpative spectrogram augmentation
The level of the augmentation is gowern by the paramter level,
ranging from 0 to 1, with 0 represents no augmentation。
"""
assert
spect
.
ndim
==
2.
,
'only supports 2d tensor or numpy array'
if
tempo_axis
==
0
:
nt
,
nf
=
spect
.
shape
else
:
nf
,
nt
=
spect
.
shape
time_mask_width
=
int
(
nt
*
level
*
0.5
)
freq_mask_width
=
int
(
nf
*
level
*
0.5
)
num_time_mask
=
int
(
10
*
level
)
num_freq_mask
=
int
(
10
*
level
)
if
tempo_axis
==
0
:
for
_
in
range
(
num_time_mask
):
start
=
randint
(
nt
-
time_mask_width
)
spect
[
start
:
start
+
time_mask_width
,
:]
=
0
for
_
in
range
(
num_freq_mask
):
start
=
randint
(
nf
-
freq_mask_width
)
spect
[:,
start
:
start
+
freq_mask_width
]
=
0
else
:
for
_
in
range
(
num_time_mask
):
start
=
randint
(
nt
-
time_mask_width
)
spect
[:,
start
:
start
+
time_mask_width
]
=
0
for
_
in
range
(
num_freq_mask
):
start
=
randint
(
nf
-
freq_mask_width
)
spect
[
start
:
start
+
freq_mask_width
,
:]
=
0
return
spect
def
spect_augment
(
spect
:
array
,
tempo_axis
:
int
=
0
,
max_time_mask
:
int
=
3
,
max_freq_mask
:
int
=
3
,
max_time_mask_width
:
int
=
30
,
max_freq_mask_width
:
int
=
20
)
->
array
:
"""Do spectrogram augmentation in both time and freq axis
Reference:
"""
assert
spect
.
ndim
==
2.
,
'only supports 2d tensor or numpy array'
if
tempo_axis
==
0
:
nt
,
nf
=
spect
.
shape
else
:
nf
,
nt
=
spect
.
shape
num_time_mask
=
randint
(
max_time_mask
)
num_freq_mask
=
randint
(
max_freq_mask
)
time_mask_width
=
randint
(
max_time_mask_width
)
freq_mask_width
=
randint
(
max_freq_mask_width
)
if
tempo_axis
==
0
:
for
_
in
range
(
num_time_mask
):
start
=
randint
(
nt
-
time_mask_width
)
spect
[
start
:
start
+
time_mask_width
,
:]
=
0
for
_
in
range
(
num_freq_mask
):
start
=
randint
(
nf
-
freq_mask_width
)
spect
[:,
start
:
start
+
freq_mask_width
]
=
0
else
:
for
_
in
range
(
num_time_mask
):
start
=
randint
(
nt
-
time_mask_width
)
spect
[:,
start
:
start
+
time_mask_width
]
=
0
for
_
in
range
(
num_freq_mask
):
start
=
randint
(
nf
-
freq_mask_width
)
spect
[
start
:
start
+
freq_mask_width
,
:]
=
0
return
spect
def
random_crop1d
(
y
:
array
,
crop_len
:
int
)
->
array
:
""" Do random cropping on 1d input signal
The input is a 1d signal, typically a sound waveform
"""
if
y
.
ndim
!=
1
:
'only accept 1d tensor or numpy array'
n
=
len
(
y
)
idx
=
randint
(
n
-
crop_len
)
return
y
[
idx
:
idx
+
crop_len
]
def
random_crop2d
(
s
:
array
,
crop_len
:
int
,
tempo_axis
:
int
=
0
)
->
array
:
""" Do random cropping for 2D array, typically a spectrogram.
The cropping is done in temporal direction on the time-freq input signal.
"""
if
tempo_axis
>=
s
.
ndim
:
raise
ParameterError
(
'axis out of range'
)
n
=
s
.
shape
[
tempo_axis
]
idx
=
randint
(
high
=
n
-
crop_len
)
sli
=
[
slice
(
None
)
for
i
in
range
(
s
.
ndim
)]
sli
[
tempo_axis
]
=
slice
(
idx
,
idx
+
crop_len
)
out
=
s
[
tuple
(
sli
)]
return
out
paddleaudio/
backends
/__init__.py
→
paddleaudio/
paddleaudio
/__init__.py
浏览文件 @
c437a7c5
...
@@ -11,4 +11,3 @@
...
@@ -11,4 +11,3 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
from
.audio
import
*
paddleaudio/__init__.py
→
paddleaudio/
paddleaudio/backends/
__init__.py
浏览文件 @
c437a7c5
...
@@ -11,5 +11,3 @@
...
@@ -11,5 +11,3 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
from
.backends
import
*
from
.features
import
*
paddleaudio/paddleaudio/backends/soundfile_backend.py
0 → 100644
浏览文件 @
c437a7c5
paddleaudio/paddleaudio/backends/sox_backend.py
0 → 100644
浏览文件 @
c437a7c5
paddleaudio/datasets/__init__.py
→
paddleaudio/
paddleaudio/
datasets/__init__.py
浏览文件 @
c437a7c5
文件已移动
paddleaudio/datasets/dataset.py
→
paddleaudio/
paddleaudio/
datasets/dataset.py
浏览文件 @
c437a7c5
文件已移动
paddleaudio/datasets/esc50.py
→
paddleaudio/
paddleaudio/
datasets/esc50.py
浏览文件 @
c437a7c5
文件已移动
paddleaudio/datasets/gtzan.py
→
paddleaudio/
paddleaudio/
datasets/gtzan.py
浏览文件 @
c437a7c5
文件已移动
paddleaudio/datasets/tess.py
→
paddleaudio/
paddleaudio/
datasets/tess.py
浏览文件 @
c437a7c5
文件已移动
paddleaudio/datasets/urban_sound.py
→
paddleaudio/
paddleaudio/
datasets/urban_sound.py
浏览文件 @
c437a7c5
文件已移动
paddleaudio/features/__init__.py
→
paddleaudio/
paddleaudio/
features/__init__.py
浏览文件 @
c437a7c5
...
@@ -11,6 +11,7 @@
...
@@ -11,6 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
from
.augment
import
*
from
.core
import
*
from
.librosa
import
Spectrogram
from
.spectrum
import
*
from
.librosa
import
MelSpectrogram
from
.librosa
import
LogMelSpectrogram
\ No newline at end of file
paddleaudio/
features/spectrum
.py
→
paddleaudio/
paddleaudio/features/librosa
.py
浏览文件 @
c437a7c5
...
@@ -19,7 +19,7 @@ from typing import Union
...
@@ -19,7 +19,7 @@ from typing import Union
import
paddle
import
paddle
import
paddle.nn
as
nn
import
paddle.nn
as
nn
from
.window
import
get_window
from
.
.functional.
window
import
get_window
__all__
=
[
__all__
=
[
'Spectrogram'
,
'Spectrogram'
,
...
...
paddleaudio/paddleaudio/functional/__init__.py
0 → 100644
浏览文件 @
c437a7c5
paddleaudio/
features/core
.py
→
paddleaudio/
paddleaudio/functional/functional
.py
浏览文件 @
c437a7c5
...
@@ -21,11 +21,14 @@ import numpy as np
...
@@ -21,11 +21,14 @@ import numpy as np
import
scipy
import
scipy
from
numpy
import
ndarray
as
array
from
numpy
import
ndarray
as
array
from
numpy.lib.stride_tricks
import
as_strided
from
numpy.lib.stride_tricks
import
as_strided
from
scipy
.signal
import
get_window
from
scipy
import
signal
from
..utils
import
ParameterError
from
..utils
import
ParameterError
from
..backends
import
depth_convert
__all__
=
[
__all__
=
[
# dsp
'stft'
,
'stft'
,
'mfcc'
,
'mfcc'
,
'hz_to_mel'
,
'hz_to_mel'
,
...
@@ -38,6 +41,12 @@ __all__ = [
...
@@ -38,6 +41,12 @@ __all__ = [
'spectrogram'
,
'spectrogram'
,
'mu_encode'
,
'mu_encode'
,
'mu_decode'
,
'mu_decode'
,
# augmentation
'depth_augment'
,
'spect_augment'
,
'random_crop1d'
,
'random_crop2d'
,
'adaptive_spect_augment'
,
]
]
...
@@ -303,7 +312,7 @@ def stft(x: array,
...
@@ -303,7 +312,7 @@ def stft(x: array,
if
hop_length
is
None
:
if
hop_length
is
None
:
hop_length
=
int
(
win_length
//
4
)
hop_length
=
int
(
win_length
//
4
)
fft_window
=
get_window
(
window
,
win_length
,
fftbins
=
True
)
fft_window
=
signal
.
get_window
(
window
,
win_length
,
fftbins
=
True
)
# Pad the window out to n_fft size
# Pad the window out to n_fft size
fft_window
=
pad_center
(
fft_window
,
n_fft
)
fft_window
=
pad_center
(
fft_window
,
n_fft
)
...
@@ -576,3 +585,145 @@ def mu_decode(y: array, mu: int=255, quantized: bool=True) -> array:
...
@@ -576,3 +585,145 @@ def mu_decode(y: array, mu: int=255, quantized: bool=True) -> array:
y
=
y
*
2
/
mu
-
1
y
=
y
*
2
/
mu
-
1
x
=
np
.
sign
(
y
)
/
mu
*
((
1
+
mu
)
**
np
.
abs
(
y
)
-
1
)
x
=
np
.
sign
(
y
)
/
mu
*
((
1
+
mu
)
**
np
.
abs
(
y
)
-
1
)
return
x
return
x
def
randint
(
high
:
int
)
->
int
:
"""Generate one random integer in range [0 high)
This is a helper function for random data augmentaiton
"""
return
int
(
np
.
random
.
randint
(
0
,
high
=
high
))
def
rand
()
->
float
:
"""Generate one floating-point number in range [0 1)
This is a helper function for random data augmentaiton
"""
return
float
(
np
.
random
.
rand
(
1
))
def
depth_augment
(
y
:
array
,
choices
:
List
=
[
'int8'
,
'int16'
],
probs
:
List
[
float
]
=
[
0.5
,
0.5
])
->
array
:
""" Audio depth augmentation
Do audio depth augmentation to simulate the distortion brought by quantization.
"""
assert
len
(
probs
)
==
len
(
choices
),
'number of choices {} must be equal to size of probs {}'
.
format
(
len
(
choices
),
len
(
probs
))
depth
=
np
.
random
.
choice
(
choices
,
p
=
probs
)
src_depth
=
y
.
dtype
y1
=
depth_convert
(
y
,
depth
)
y2
=
depth_convert
(
y1
,
src_depth
)
return
y2
def
adaptive_spect_augment
(
spect
:
array
,
tempo_axis
:
int
=
0
,
level
:
float
=
0.1
)
->
array
:
"""Do adpative spectrogram augmentation
The level of the augmentation is gowern by the paramter level,
ranging from 0 to 1, with 0 represents no augmentation。
"""
assert
spect
.
ndim
==
2.
,
'only supports 2d tensor or numpy array'
if
tempo_axis
==
0
:
nt
,
nf
=
spect
.
shape
else
:
nf
,
nt
=
spect
.
shape
time_mask_width
=
int
(
nt
*
level
*
0.5
)
freq_mask_width
=
int
(
nf
*
level
*
0.5
)
num_time_mask
=
int
(
10
*
level
)
num_freq_mask
=
int
(
10
*
level
)
if
tempo_axis
==
0
:
for
_
in
range
(
num_time_mask
):
start
=
randint
(
nt
-
time_mask_width
)
spect
[
start
:
start
+
time_mask_width
,
:]
=
0
for
_
in
range
(
num_freq_mask
):
start
=
randint
(
nf
-
freq_mask_width
)
spect
[:,
start
:
start
+
freq_mask_width
]
=
0
else
:
for
_
in
range
(
num_time_mask
):
start
=
randint
(
nt
-
time_mask_width
)
spect
[:,
start
:
start
+
time_mask_width
]
=
0
for
_
in
range
(
num_freq_mask
):
start
=
randint
(
nf
-
freq_mask_width
)
spect
[
start
:
start
+
freq_mask_width
,
:]
=
0
return
spect
def
spect_augment
(
spect
:
array
,
tempo_axis
:
int
=
0
,
max_time_mask
:
int
=
3
,
max_freq_mask
:
int
=
3
,
max_time_mask_width
:
int
=
30
,
max_freq_mask_width
:
int
=
20
)
->
array
:
"""Do spectrogram augmentation in both time and freq axis
Reference:
"""
assert
spect
.
ndim
==
2.
,
'only supports 2d tensor or numpy array'
if
tempo_axis
==
0
:
nt
,
nf
=
spect
.
shape
else
:
nf
,
nt
=
spect
.
shape
num_time_mask
=
randint
(
max_time_mask
)
num_freq_mask
=
randint
(
max_freq_mask
)
time_mask_width
=
randint
(
max_time_mask_width
)
freq_mask_width
=
randint
(
max_freq_mask_width
)
if
tempo_axis
==
0
:
for
_
in
range
(
num_time_mask
):
start
=
randint
(
nt
-
time_mask_width
)
spect
[
start
:
start
+
time_mask_width
,
:]
=
0
for
_
in
range
(
num_freq_mask
):
start
=
randint
(
nf
-
freq_mask_width
)
spect
[:,
start
:
start
+
freq_mask_width
]
=
0
else
:
for
_
in
range
(
num_time_mask
):
start
=
randint
(
nt
-
time_mask_width
)
spect
[:,
start
:
start
+
time_mask_width
]
=
0
for
_
in
range
(
num_freq_mask
):
start
=
randint
(
nf
-
freq_mask_width
)
spect
[
start
:
start
+
freq_mask_width
,
:]
=
0
return
spect
def
random_crop1d
(
y
:
array
,
crop_len
:
int
)
->
array
:
""" Do random cropping on 1d input signal
The input is a 1d signal, typically a sound waveform
"""
if
y
.
ndim
!=
1
:
'only accept 1d tensor or numpy array'
n
=
len
(
y
)
idx
=
randint
(
n
-
crop_len
)
return
y
[
idx
:
idx
+
crop_len
]
def
random_crop2d
(
s
:
array
,
crop_len
:
int
,
tempo_axis
:
int
=
0
)
->
array
:
""" Do random cropping for 2D array, typically a spectrogram.
The cropping is done in temporal direction on the time-freq input signal.
"""
if
tempo_axis
>=
s
.
ndim
:
raise
ParameterError
(
'axis out of range'
)
n
=
s
.
shape
[
tempo_axis
]
idx
=
randint
(
high
=
n
-
crop_len
)
sli
=
[
slice
(
None
)
for
i
in
range
(
s
.
ndim
)]
sli
[
tempo_axis
]
=
slice
(
idx
,
idx
+
crop_len
)
out
=
s
[
tuple
(
sli
)]
return
out
\ No newline at end of file
paddleaudio/
features
/window.py
→
paddleaudio/
paddleaudio/functional
/window.py
浏览文件 @
c437a7c5
...
@@ -20,6 +20,19 @@ from paddle import Tensor
...
@@ -20,6 +20,19 @@ from paddle import Tensor
__all__
=
[
__all__
=
[
'get_window'
,
'get_window'
,
# windows
'taylor'
,
'hamming'
,
'hann'
,
'tukey'
,
'kaiser'
,
'gaussian'
,
'exponential'
,
'triang'
,
'bohman'
,
'blackman'
,
'cosine'
,
]
]
...
@@ -73,6 +86,21 @@ def general_gaussian(M: int, p, sig, sym: bool=True,
...
@@ -73,6 +86,21 @@ def general_gaussian(M: int, p, sig, sym: bool=True,
return
_truncate
(
w
,
needs_trunc
)
return
_truncate
(
w
,
needs_trunc
)
def
general_cosine
(
M
:
int
,
a
:
float
,
sym
:
bool
=
True
,
dtype
:
str
=
'float64'
)
->
Tensor
:
"""Compute a generic weighted sum of cosine terms window.
This function is consistent with scipy.signal.windows.general_cosine().
"""
if
_len_guards
(
M
):
return
paddle
.
ones
((
M
,
),
dtype
=
dtype
)
M
,
needs_trunc
=
_extend
(
M
,
sym
)
fac
=
paddle
.
linspace
(
-
math
.
pi
,
math
.
pi
,
M
,
dtype
=
dtype
)
w
=
paddle
.
zeros
((
M
,
),
dtype
=
dtype
)
for
k
in
range
(
len
(
a
)):
w
+=
a
[
k
]
*
paddle
.
cos
(
k
*
fac
)
return
_truncate
(
w
,
needs_trunc
)
def
general_hamming
(
M
:
int
,
alpha
:
float
,
sym
:
bool
=
True
,
def
general_hamming
(
M
:
int
,
alpha
:
float
,
sym
:
bool
=
True
,
dtype
:
str
=
'float64'
)
->
Tensor
:
dtype
:
str
=
'float64'
)
->
Tensor
:
"""Compute a generalized Hamming window.
"""Compute a generalized Hamming window.
...
@@ -143,21 +171,6 @@ def taylor(M: int,
...
@@ -143,21 +171,6 @@ def taylor(M: int,
return
_truncate
(
w
,
needs_trunc
)
return
_truncate
(
w
,
needs_trunc
)
def
general_cosine
(
M
:
int
,
a
:
float
,
sym
:
bool
=
True
,
dtype
:
str
=
'float64'
)
->
Tensor
:
"""Compute a generic weighted sum of cosine terms window.
This function is consistent with scipy.signal.windows.general_cosine().
"""
if
_len_guards
(
M
):
return
paddle
.
ones
((
M
,
),
dtype
=
dtype
)
M
,
needs_trunc
=
_extend
(
M
,
sym
)
fac
=
paddle
.
linspace
(
-
math
.
pi
,
math
.
pi
,
M
,
dtype
=
dtype
)
w
=
paddle
.
zeros
((
M
,
),
dtype
=
dtype
)
for
k
in
range
(
len
(
a
)):
w
+=
a
[
k
]
*
paddle
.
cos
(
k
*
fac
)
return
_truncate
(
w
,
needs_trunc
)
def
hamming
(
M
:
int
,
sym
:
bool
=
True
,
dtype
:
str
=
'float64'
)
->
Tensor
:
def
hamming
(
M
:
int
,
sym
:
bool
=
True
,
dtype
:
str
=
'float64'
)
->
Tensor
:
"""Compute a Hamming window.
"""Compute a Hamming window.
The Hamming window is a taper formed by using a raised cosine with
The Hamming window is a taper formed by using a raised cosine with
...
@@ -375,6 +388,7 @@ def cosine(M: int, sym: bool=True, dtype: str='float64') -> Tensor:
...
@@ -375,6 +388,7 @@ def cosine(M: int, sym: bool=True, dtype: str='float64') -> Tensor:
return
_truncate
(
w
,
needs_trunc
)
return
_truncate
(
w
,
needs_trunc
)
## factory function
def
get_window
(
window
:
Union
[
str
,
Tuple
[
str
,
float
]],
def
get_window
(
window
:
Union
[
str
,
Tuple
[
str
,
float
]],
win_length
:
int
,
win_length
:
int
,
fftbins
:
bool
=
True
,
fftbins
:
bool
=
True
,
...
...
paddleaudio/paddleaudio/io/__init__.py
0 → 100644
浏览文件 @
c437a7c5
from
.audio
import
save_wav
from
.audio
import
load
from
.audio
import
normalize
from
.audio
import
to_mono
from
.audio
import
resample
from
.audio
import
depth_convert
\ No newline at end of file
paddleaudio/
backends
/audio.py
→
paddleaudio/
paddleaudio/io
/audio.py
浏览文件 @
c437a7c5
文件已移动
paddleaudio/paddleaudio/kaldi/__init__.py
0 → 100644
浏览文件 @
c437a7c5
paddleaudio/paddleaudio/sox_effects/__init__.py
0 → 100644
浏览文件 @
c437a7c5
paddleaudio/utils/__init__.py
→
paddleaudio/
paddleaudio/
utils/__init__.py
浏览文件 @
c437a7c5
...
@@ -11,8 +11,20 @@
...
@@ -11,8 +11,20 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
from
.download
import
*
from
.env
import
*
from
.env
import
USER_HOME
from
.error
import
*
from
.env
import
PPAUDIO_HOME
from
.log
import
*
from
.env
import
MODEL_HOME
from
.time
import
*
from
.env
import
DATA_HOME
from
.download
import
decompress
from
.download
import
download_and_decompress
from
.download
import
load_state_dict_from_url
from
.error
import
ParameterError
from
.log
import
logger
from
.log
import
Logger
from
.time
import
Timer
from
.time
import
seconds_to_hms
paddleaudio/utils/download.py
→
paddleaudio/
paddleaudio/
utils/download.py
浏览文件 @
c437a7c5
...
@@ -22,6 +22,11 @@ from .log import logger
...
@@ -22,6 +22,11 @@ from .log import logger
download
.
logger
=
logger
download
.
logger
=
logger
__all__
=
[
'decompress'
,
'download_and_decompress'
,
'load_state_dict_from_url'
,
]
def
decompress
(
file
:
str
):
def
decompress
(
file
:
str
):
"""
"""
...
...
paddleaudio/utils/env.py
→
paddleaudio/
paddleaudio/
utils/env.py
浏览文件 @
c437a7c5
...
@@ -20,6 +20,12 @@ PPAUDIO_HOME --> the root directory for storing PaddleAudio related data. D
...
@@ -20,6 +20,12 @@ PPAUDIO_HOME --> the root directory for storing PaddleAudio related data. D
'''
'''
import
os
import
os
__all__
=
[
'USER_HOME'
,
'PPAUDIO_HOME'
,
'MODEL_HOME'
,
'DATA_HOME'
,
]
def
_get_user_home
():
def
_get_user_home
():
return
os
.
path
.
expanduser
(
'~'
)
return
os
.
path
.
expanduser
(
'~'
)
...
...
paddleaudio/utils/error.py
→
paddleaudio/
paddleaudio/
utils/error.py
浏览文件 @
c437a7c5
文件已移动
paddleaudio/utils/log.py
→
paddleaudio/
paddleaudio/
utils/log.py
浏览文件 @
c437a7c5
...
@@ -19,7 +19,10 @@ import time
...
@@ -19,7 +19,10 @@ import time
import
colorlog
import
colorlog
loggers
=
{}
__all__
=
[
'Logger'
,
'logger'
,
]
log_config
=
{
log_config
=
{
'DEBUG'
:
{
'DEBUG'
:
{
...
...
paddleaudio/utils/time.py
→
paddleaudio/
paddleaudio/
utils/time.py
浏览文件 @
c437a7c5
...
@@ -14,6 +14,10 @@
...
@@ -14,6 +14,10 @@
import
math
import
math
import
time
import
time
__all__
=
[
'Timer'
,
'seconds_to_hms'
,
]
class
Timer
(
object
):
class
Timer
(
object
):
'''Calculate runing speed and estimated time of arrival(ETA)'''
'''Calculate runing speed and estimated time of arrival(ETA)'''
...
...
setup_audio
.py
→
paddleaudio/setup
.py
浏览文件 @
c437a7c5
...
@@ -14,7 +14,7 @@
...
@@ -14,7 +14,7 @@
import
setuptools
import
setuptools
# set the version here
# set the version here
VERSION
=
'0.
1
.0'
VERSION
=
'0.
2
.0'
def
write_version_py
(
filename
=
'paddleaudio/__init__.py'
):
def
write_version_py
(
filename
=
'paddleaudio/__init__.py'
):
...
...
requirements.txt
已删除
100644 → 0
浏览文件 @
a9422260
ConfigArgParse
coverage
editdistance
g2p_en
g2pM
gpustat
h5py
inflect
jieba
jsonlines
kaldiio
librosa
loguru
matplotlib
nara_wpe
nltk
paddleaudio
paddlenlp
paddlespeech_ctcdecoders
paddlespeech_feat
pandas
phkit
Pillow
praatio
==5.0.0
pre-commit
pybind11
pypi-kenlm
pypinyin
python-dateutil
pyworld
resampy
==0.2.2
sacrebleu
scipy
sentencepiece
~=0.1.96
snakeviz
soundfile
~=0.10
sox
soxbindings
textgrid
timer
tqdm
typeguard
unidecode
visualdl
webrtcvad
yacs
~=0.1.8
yq
zhon
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录