Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
f55c4573
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 2 年 前同步成功
通知
210
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
f55c4573
编写于
6月 30, 2022
作者:
H
Hui Zhang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
more backend api
上级
da6692c7
变更
19
隐藏空白更改
内联
并排
Showing
19 changed file
with
143 addition
and
134 deletion
+143
-134
cmake/summary.cmake
cmake/summary.cmake
+2
-1
paddlespeech/__init__.py
paddlespeech/__init__.py
+8
-1
paddlespeech/audio/__init__.py
paddlespeech/audio/__init__.py
+19
-3
paddlespeech/audio/_extension.py
paddlespeech/audio/_extension.py
+7
-7
paddlespeech/audio/backends/__init__.py
paddlespeech/audio/backends/__init__.py
+4
-6
paddlespeech/audio/backends/soundfile_backend.py
paddlespeech/audio/backends/soundfile_backend.py
+5
-73
paddlespeech/audio/backends/sox_backend.py
paddlespeech/audio/backends/sox_backend.py
+0
-13
paddlespeech/audio/compliance/librosa.py
paddlespeech/audio/compliance/librosa.py
+1
-1
paddlespeech/audio/datasets/dataset.py
paddlespeech/audio/datasets/dataset.py
+2
-3
paddlespeech/audio/datasets/rirs_noises.py
paddlespeech/audio/datasets/rirs_noises.py
+3
-5
paddlespeech/audio/datasets/voxceleb.py
paddlespeech/audio/datasets/voxceleb.py
+2
-3
paddlespeech/audio/sox_effects/__init__.py
paddlespeech/audio/sox_effects/__init__.py
+0
-13
paddlespeech/audio/utils/__init__.py
paddlespeech/audio/utils/__init__.py
+7
-0
paddlespeech/audio/utils/numeric.py
paddlespeech/audio/utils/numeric.py
+78
-0
paddlespeech/cli/vector/infer.py
paddlespeech/cli/vector/infer.py
+1
-1
paddlespeech/cls/exps/panns/deploy/predict.py
paddlespeech/cls/exps/panns/deploy/predict.py
+1
-1
paddlespeech/server/engine/vector/python/vector_engine.py
paddlespeech/server/engine/vector/python/vector_engine.py
+1
-1
paddlespeech/vector/exps/ecapa_tdnn/extract_emb.py
paddlespeech/vector/exps/ecapa_tdnn/extract_emb.py
+1
-1
tools/setup_helpers/extension.py
tools/setup_helpers/extension.py
+1
-1
未找到文件。
cmake/summary.cmake
浏览文件 @
f55c4573
...
...
@@ -35,6 +35,7 @@ function (onnx_print_configuration_summary)
message
(
STATUS
" BUILD_ONNX_PYTHON :
${
BUILD_ONNX_PYTHON
}
"
)
message
(
STATUS
" Python version :
${
Python_VERSION
}
"
)
message
(
STATUS
" Python executable :
${
Python_EXECUTABLE
}
"
)
message
(
STATUS
" Python includes :
${
Python_INCLUDE_DIRS
}
"
)
message
(
STATUS
" Python includes :
${
Python_INCLUDE_DIR
}
"
)
message
(
STATUS
" Python libraries :
${
Python_LIBRARY
}
"
)
endfunction
()
\ No newline at end of file
paddlespeech/__init__.py
浏览文件 @
f55c4573
...
...
@@ -12,5 +12,12 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import
_locale
_locale
.
_getdefaultlocale
=
(
lambda
*
args
:
[
'en_US'
,
'utf8'
])
from
.
import
audio
# _init_audio_backend must called after audio import
audio
.
backends
.
utils
.
_init_audio_backend
()
__all__
=
[
"audio"
]
paddlespeech/audio/__init__.py
浏览文件 @
f55c4573
...
...
@@ -11,12 +11,28 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
.
import
compliance
from
.
import
datasets
from
.
import
features
from
.
import
functional
from
.
import
io
from
.
import
metric
from
.
import
sox_effects
from
.backends
import
load
from
.backends
import
save
from
.
import
utils
from
._ops
import
ops
from
paddlespeech.audio.backends
import
get_audio_backend
,
list_audio_backends
,
set_audio_backend
__all__
=
[
"io"
,
"compliance"
,
"datasets"
,
"functional"
,
"features"
,
"utils"
,
'ops'
"list_audio_backends"
,
"get_audio_backend"
,
"set_audio_backend"
,
]
\ No newline at end of file
paddlespeech/audio/_extension.py
浏览文件 @
f55c4573
...
...
@@ -44,7 +44,7 @@ def _load_lib(lib: str) -> bool:
path
=
_get_lib_path
(
lib
)
if
not
path
.
exists
():
return
False
paddlespeech
.
ops
.
load_library
(
path
)
paddlespeech
.
audio
.
ops
.
load_library
(
path
)
return
True
...
...
@@ -56,7 +56,7 @@ def _init_ffmpeg():
if
_FFMPEG_INITIALIZED
:
return
if
not
paddlespeech
.
ops
.
paddlleaudio
.
is_ffmpeg_available
():
if
not
paddlespeech
.
audio
.
ops
.
paddlleaudio
.
is_ffmpeg_available
():
raise
RuntimeError
(
"paddlleaudio is not compiled with FFmpeg integration. Please set USE_FFMPEG=1 when compiling paddlleaudio."
)
...
...
@@ -67,11 +67,11 @@ def _init_ffmpeg():
raise
ImportError
(
"FFmpeg libraries are not found. Please install FFmpeg."
)
from
err
import
paddllespeech
.
_paddlleaudio_ffmpeg
# noqa
import
paddllespeech
.
audio
.
_paddlleaudio_ffmpeg
# noqa
paddlespeech
.
ops
.
paddlleaudio
.
ffmpeg_init
()
if
paddlespeech
.
ops
.
paddlleaudio
.
ffmpeg_get_log_level
()
>
8
:
paddlespeech
.
ops
.
paddlleaudio
.
ffmpeg_set_log_level
(
8
)
paddlespeech
.
audio
.
ops
.
paddlleaudio
.
ffmpeg_init
()
if
paddlespeech
.
audio
.
ops
.
paddlleaudio
.
ffmpeg_get_log_level
()
>
8
:
paddlespeech
.
audio
.
ops
.
paddlleaudio
.
ffmpeg_set_log_level
(
8
)
_FFMPEG_INITIALIZED
=
True
...
...
@@ -84,7 +84,7 @@ def _init_extension():
_load_lib
(
"libpaddleaudio"
)
# This import is for initializing the methods registered via PyBind11
# This has to happen after the base library is loaded
from
paddlespeech
import
_paddleaudio
# noqa
from
paddlespeech
.audio
import
_paddleaudio
# noqa
# Because this part is executed as part of `import torchaudio`, we ignore the
# initialization failure.
...
...
paddlespeech/audio/backends/__init__.py
浏览文件 @
f55c4573
...
...
@@ -11,9 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
.soundfile_backend
import
depth_convert
from
.soundfile_backend
import
load
from
.soundfile_backend
import
normalize
from
.soundfile_backend
import
resample
from
.soundfile_backend
import
save
from
.soundfile_backend
import
to_mono
# flake8: noqa
from
.
import
utils
from
.utils
import
get_audio_backend
,
list_audio_backends
,
set_audio_backend
\ No newline at end of file
paddlespeech/audio/backends/soundfile_backend.py
浏览文件 @
f55c4573
...
...
@@ -23,11 +23,11 @@ import soundfile as sf
from
scipy.io
import
wavfile
from
..utils
import
ParameterError
from
..utils
import
depth_convert
__all__
=
[
'resample'
,
'to_mono'
,
'depth_convert'
,
'normalize'
,
'save'
,
'load'
,
...
...
@@ -117,78 +117,6 @@ def to_mono(y: np.ndarray, merge_type: str='average') -> np.ndarray:
return
y_out
def
_safe_cast
(
y
:
np
.
ndarray
,
dtype
:
Union
[
type
,
str
])
->
np
.
ndarray
:
"""Data type casting in a safe way, i.e., prevent overflow or underflow.
Args:
y (np.ndarray): Input waveform array in 1D or 2D.
dtype (Union[type, str]): Data type of waveform.
Returns:
np.ndarray: `y` after safe casting.
"""
if
'float'
in
str
(
y
.
dtype
):
return
np
.
clip
(
y
,
np
.
finfo
(
dtype
).
min
,
np
.
finfo
(
dtype
).
max
).
astype
(
dtype
)
else
:
return
np
.
clip
(
y
,
np
.
iinfo
(
dtype
).
min
,
np
.
iinfo
(
dtype
).
max
).
astype
(
dtype
)
def
depth_convert
(
y
:
np
.
ndarray
,
dtype
:
Union
[
type
,
str
])
->
np
.
ndarray
:
"""Convert audio array to target dtype safely. This function convert audio waveform to a target dtype, with addition steps of
preventing overflow/underflow and preserving audio range.
Args:
y (np.ndarray): Input waveform array in 1D or 2D.
dtype (Union[type, str]): Data type of waveform.
Returns:
np.ndarray: `y` after safe casting.
"""
SUPPORT_DTYPE
=
[
'int16'
,
'int8'
,
'float32'
,
'float64'
]
if
y
.
dtype
not
in
SUPPORT_DTYPE
:
raise
ParameterError
(
'Unsupported audio dtype, '
f
'y.dtype is
{
y
.
dtype
}
, supported dtypes are
{
SUPPORT_DTYPE
}
'
)
if
dtype
not
in
SUPPORT_DTYPE
:
raise
ParameterError
(
'Unsupported audio dtype, '
f
'target dtype is
{
dtype
}
, supported dtypes are
{
SUPPORT_DTYPE
}
'
)
if
dtype
==
y
.
dtype
:
return
y
if
dtype
==
'float64'
and
y
.
dtype
==
'float32'
:
return
_safe_cast
(
y
,
dtype
)
if
dtype
==
'float32'
and
y
.
dtype
==
'float64'
:
return
_safe_cast
(
y
,
dtype
)
if
dtype
==
'int16'
or
dtype
==
'int8'
:
if
y
.
dtype
in
[
'float64'
,
'float32'
]:
factor
=
np
.
iinfo
(
dtype
).
max
y
=
np
.
clip
(
y
*
factor
,
np
.
iinfo
(
dtype
).
min
,
np
.
iinfo
(
dtype
).
max
).
astype
(
dtype
)
y
=
y
.
astype
(
dtype
)
else
:
if
dtype
==
'int16'
and
y
.
dtype
==
'int8'
:
factor
=
np
.
iinfo
(
'int16'
).
max
/
np
.
iinfo
(
'int8'
).
max
-
EPS
y
=
y
.
astype
(
'float32'
)
*
factor
y
=
y
.
astype
(
'int16'
)
else
:
# dtype == 'int8' and y.dtype=='int16':
y
=
y
.
astype
(
'int32'
)
*
np
.
iinfo
(
'int8'
).
max
/
\
np
.
iinfo
(
'int16'
).
max
y
=
y
.
astype
(
'int8'
)
if
dtype
in
[
'float32'
,
'float64'
]:
org_dtype
=
y
.
dtype
y
=
y
.
astype
(
dtype
)
/
np
.
iinfo
(
org_dtype
).
max
return
y
def
sound_file_load
(
file
:
os
.
PathLike
,
offset
:
Optional
[
float
]
=
None
,
dtype
:
str
=
'int16'
,
...
...
@@ -323,3 +251,7 @@ def load(
y
=
depth_convert
(
y
,
dtype
)
return
y
,
r
def
info
(
filepath
:
str
)
->
None
:
raise
RuntimeError
(
"No audio I/O backend is available."
)
\ No newline at end of file
paddlespeech/audio/backends/sox_backend.py
已删除
100644 → 0
浏览文件 @
da6692c7
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
paddlespeech/audio/compliance/librosa.py
浏览文件 @
f55c4573
...
...
@@ -22,7 +22,7 @@ import scipy
from
numpy.lib.stride_tricks
import
as_strided
from
scipy
import
signal
from
..
backend
s
import
depth_convert
from
..
util
s
import
depth_convert
from
..utils
import
ParameterError
__all__
=
[
...
...
paddlespeech/audio/datasets/dataset.py
浏览文件 @
f55c4573
...
...
@@ -16,7 +16,6 @@ from typing import List
import
numpy
as
np
import
paddle
from
..backends
import
load
as
load_audio
from
..compliance.kaldi
import
fbank
as
kaldi_fbank
from
..compliance.kaldi
import
mfcc
as
kaldi_mfcc
from
..compliance.librosa
import
melspectrogram
...
...
@@ -70,9 +69,9 @@ class AudioClassificationDataset(paddle.io.Dataset):
file
,
label
=
self
.
files
[
idx
],
self
.
labels
[
idx
]
if
self
.
sample_rate
is
None
:
waveform
,
sample_rate
=
load_audio
(
file
)
waveform
,
sample_rate
=
paddlespeech
.
audio
.
load
(
file
)
else
:
waveform
,
sample_rate
=
load_audio
(
file
,
sr
=
self
.
sample_rate
)
waveform
,
sample_rate
=
paddlespeech
.
audio
.
load
(
file
,
sr
=
self
.
sample_rate
)
feat_func
=
feat_funcs
[
self
.
feat_type
]
...
...
paddlespeech/audio/datasets/rirs_noises.py
浏览文件 @
f55c4573
...
...
@@ -20,8 +20,6 @@ from typing import List
from
paddle.io
import
Dataset
from
tqdm
import
tqdm
from
..backends
import
load
as
load_audio
from
..backends
import
save
as
save_wav
from
..utils
import
DATA_HOME
from
..utils.download
import
download_and_decompress
from
.dataset
import
feat_funcs
...
...
@@ -105,7 +103,7 @@ class OpenRIRNoise(Dataset):
for
field
in
type
(
sample
).
_fields
:
record
[
field
]
=
getattr
(
sample
,
field
)
waveform
,
sr
=
load_audio
(
record
[
'wav'
])
waveform
,
sr
=
paddlespeech
.
audio
.
load
(
record
[
'wav'
])
assert
self
.
feat_type
in
feat_funcs
.
keys
(),
\
f
"Unknown feat_type:
{
self
.
feat_type
}
, it must be one in
{
list
(
feat_funcs
.
keys
())
}
"
...
...
@@ -128,7 +126,7 @@ class OpenRIRNoise(Dataset):
def
_get_audio_info
(
self
,
wav_file
:
str
,
split_chunks
:
bool
)
->
List
[
List
[
str
]]:
waveform
,
sr
=
load_audio
(
wav_file
)
waveform
,
sr
=
paddlespeech
.
audio
.
load
(
wav_file
)
audio_id
=
wav_file
.
split
(
"/open_rir_noise/"
)[
-
1
].
split
(
"."
)[
0
]
audio_duration
=
waveform
.
shape
[
0
]
/
sr
...
...
@@ -143,7 +141,7 @@ class OpenRIRNoise(Dataset):
end_sample
=
int
(
float
(
e
)
*
sr
)
new_wav_file
=
os
.
path
.
join
(
self
.
base_path
,
audio_id
+
f
'_chunk_
{
idx
+
1
:
02
}
.wav'
)
save_wav
(
waveform
[
start_sample
:
end_sample
],
sr
,
new_wav_file
)
paddlespeech
.
audio
.
save
(
waveform
[
start_sample
:
end_sample
],
sr
,
new_wav_file
)
# id, duration, new_wav
ret
.
append
([
chunk
,
self
.
chunk_duration
,
new_wav_file
])
else
:
# Keep whole audio.
...
...
paddlespeech/audio/datasets/voxceleb.py
浏览文件 @
f55c4573
...
...
@@ -23,7 +23,6 @@ from paddle.io import Dataset
from
pathos.multiprocessing
import
Pool
from
tqdm
import
tqdm
from
..backends
import
load
as
load_audio
from
..utils
import
DATA_HOME
from
..utils
import
decompress
from
..utils.download
import
download_and_decompress
...
...
@@ -192,7 +191,7 @@ class VoxCeleb(Dataset):
for
field
in
type
(
sample
).
_fields
:
record
[
field
]
=
getattr
(
sample
,
field
)
waveform
,
sr
=
load_audio
(
record
[
'wav'
])
waveform
,
sr
=
paddlespeech
.
audio
.
load
(
record
[
'wav'
])
# random select a chunk audio samples from the audio
if
self
.
random_chunk
:
...
...
@@ -231,7 +230,7 @@ class VoxCeleb(Dataset):
def
_get_audio_info
(
self
,
wav_file
:
str
,
split_chunks
:
bool
)
->
List
[
List
[
str
]]:
waveform
,
sr
=
load_audio
(
wav_file
)
waveform
,
sr
=
paddlespeech
.
audio
.
load
(
wav_file
)
spk_id
,
sess_id
,
utt_id
=
wav_file
.
split
(
"/"
)[
-
3
:]
audio_id
=
'-'
.
join
([
spk_id
,
sess_id
,
utt_id
.
split
(
"."
)[
0
]])
audio_duration
=
waveform
.
shape
[
0
]
/
sr
...
...
paddlespeech/audio/sox_effects/__init__.py
已删除
100644 → 0
浏览文件 @
da6692c7
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
paddlespeech/audio/utils/__init__.py
浏览文件 @
f55c4573
...
...
@@ -13,11 +13,18 @@
# limitations under the License.
from
...cli.utils
import
DATA_HOME
from
...cli.utils
import
MODEL_HOME
from
.download
import
decompress
from
.download
import
download_and_decompress
from
.download
import
load_state_dict_from_url
from
.error
import
ParameterError
from
.log
import
Logger
from
.log
import
logger
from
.time
import
seconds_to_hms
from
.time
import
Timer
from
.numeric
import
pcm16to32
from
.numeric
import
depth_convert
\ No newline at end of file
paddlespeech/audio/utils/numeric.py
浏览文件 @
f55c4573
...
...
@@ -12,7 +12,12 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import
numpy
as
np
from
typing
import
Union
__all__
=
[
"pcm16to32"
,
"depth_convert"
]
def
pcm16to32
(
audio
:
np
.
ndarray
)
->
np
.
ndarray
:
"""pcm int16 to float32
...
...
@@ -28,3 +33,76 @@ def pcm16to32(audio: np.ndarray) -> np.ndarray:
bits
=
np
.
iinfo
(
np
.
int16
).
bits
audio
=
audio
/
(
2
**
(
bits
-
1
))
return
audio
def
_safe_cast
(
y
:
np
.
ndarray
,
dtype
:
Union
[
type
,
str
])
->
np
.
ndarray
:
"""Data type casting in a safe way, i.e., prevent overflow or underflow.
Args:
y (np.ndarray): Input waveform array in 1D or 2D.
dtype (Union[type, str]): Data type of waveform.
Returns:
np.ndarray: `y` after safe casting.
"""
if
'float'
in
str
(
y
.
dtype
):
return
np
.
clip
(
y
,
np
.
finfo
(
dtype
).
min
,
np
.
finfo
(
dtype
).
max
).
astype
(
dtype
)
else
:
return
np
.
clip
(
y
,
np
.
iinfo
(
dtype
).
min
,
np
.
iinfo
(
dtype
).
max
).
astype
(
dtype
)
def
depth_convert
(
y
:
np
.
ndarray
,
dtype
:
Union
[
type
,
str
])
->
np
.
ndarray
:
"""Convert audio array to target dtype safely.
This function convert audio waveform to a target dtype, with addition steps of
preventing overflow/underflow and preserving audio range.
Args:
y (np.ndarray): Input waveform array in 1D or 2D.
dtype (Union[type, str]): Data type of waveform.
Returns:
np.ndarray: `y` after safe casting.
"""
SUPPORT_DTYPE
=
[
'int16'
,
'int8'
,
'float32'
,
'float64'
]
if
y
.
dtype
not
in
SUPPORT_DTYPE
:
raise
ParameterError
(
'Unsupported audio dtype, '
f
'y.dtype is
{
y
.
dtype
}
, supported dtypes are
{
SUPPORT_DTYPE
}
'
)
if
dtype
not
in
SUPPORT_DTYPE
:
raise
ParameterError
(
'Unsupported audio dtype, '
f
'target dtype is
{
dtype
}
, supported dtypes are
{
SUPPORT_DTYPE
}
'
)
if
dtype
==
y
.
dtype
:
return
y
if
dtype
==
'float64'
and
y
.
dtype
==
'float32'
:
return
_safe_cast
(
y
,
dtype
)
if
dtype
==
'float32'
and
y
.
dtype
==
'float64'
:
return
_safe_cast
(
y
,
dtype
)
if
dtype
==
'int16'
or
dtype
==
'int8'
:
if
y
.
dtype
in
[
'float64'
,
'float32'
]:
factor
=
np
.
iinfo
(
dtype
).
max
y
=
np
.
clip
(
y
*
factor
,
np
.
iinfo
(
dtype
).
min
,
np
.
iinfo
(
dtype
).
max
).
astype
(
dtype
)
y
=
y
.
astype
(
dtype
)
else
:
if
dtype
==
'int16'
and
y
.
dtype
==
'int8'
:
factor
=
np
.
iinfo
(
'int16'
).
max
/
np
.
iinfo
(
'int8'
).
max
-
EPS
y
=
y
.
astype
(
'float32'
)
*
factor
y
=
y
.
astype
(
'int16'
)
else
:
# dtype == 'int8' and y.dtype=='int16':
y
=
y
.
astype
(
'int32'
)
*
np
.
iinfo
(
'int8'
).
max
/
\
np
.
iinfo
(
'int16'
).
max
y
=
y
.
astype
(
'int8'
)
if
dtype
in
[
'float32'
,
'float64'
]:
org_dtype
=
y
.
dtype
y
=
y
.
astype
(
dtype
)
/
np
.
iinfo
(
org_dtype
).
max
return
y
\ No newline at end of file
paddlespeech/cli/vector/infer.py
浏览文件 @
f55c4573
...
...
@@ -27,7 +27,7 @@ from yacs.config import CfgNode
from
..executor
import
BaseExecutor
from
..log
import
logger
from
..utils
import
stats_wrapper
from
paddlespeech.audio
.backends
import
load
as
load_audio
from
paddlespeech.audio
import
load
as
load_audio
from
paddlespeech.audio.compliance.librosa
import
melspectrogram
from
paddlespeech.vector.io.batch
import
feature_normalize
from
paddlespeech.vector.modules.sid_model
import
SpeakerIdetification
...
...
paddlespeech/cls/exps/panns/deploy/predict.py
浏览文件 @
f55c4573
...
...
@@ -18,7 +18,7 @@ import numpy as np
from
paddle
import
inference
from
scipy.special
import
softmax
from
paddlespeech.audio
.backends
import
load
as
load_audio
from
paddlespeech.audio
import
load
as
load_audio
from
paddlespeech.audio.datasets
import
ESC50
from
paddlespeech.audio.features
import
melspectrogram
...
...
paddlespeech/server/engine/vector/python/vector_engine.py
浏览文件 @
f55c4573
...
...
@@ -17,7 +17,7 @@ from collections import OrderedDict
import
numpy
as
np
import
paddle
from
paddlespeech.audio
.backends
import
load
as
load_audio
from
paddlespeech.audio
import
load
as
load_audio
from
paddlespeech.audio.compliance.librosa
import
melspectrogram
from
paddlespeech.cli.log
import
logger
from
paddlespeech.cli.vector.infer
import
VectorExecutor
...
...
paddlespeech/vector/exps/ecapa_tdnn/extract_emb.py
浏览文件 @
f55c4573
...
...
@@ -18,7 +18,7 @@ import time
import
paddle
from
yacs.config
import
CfgNode
from
paddlespeech.audio
.backends
import
load
as
load_audio
from
paddlespeech.audio
import
load
as
load_audio
from
paddlespeech.audio.compliance.librosa
import
melspectrogram
from
paddlespeech.s2t.utils.log
import
Log
from
paddlespeech.vector.io.batch
import
feature_normalize
...
...
tools/setup_helpers/extension.py
浏览文件 @
f55c4573
...
...
@@ -90,7 +90,7 @@ class CMakeBuild(build_ext):
f
"-DCMAKE_INSTALL_PREFIX=
{
extdir
}
"
,
"-DCMAKE_VERBOSE_MAKEFILE=ON"
,
f
"-DPython_INCLUDE_DIR=
{
distutils
.
sysconfig
.
get_python_inc
()
}
"
,
f
"-DP
YTHON
_LIBRARY=
{
distutils
.
sysconfig
.
get_config_var
(
'LIBDIR'
)
}
"
,
f
"-DP
ython
_LIBRARY=
{
distutils
.
sysconfig
.
get_config_var
(
'LIBDIR'
)
}
"
,
f
"-DBUILD_SOX:BOOL=
{
'ON'
if
_BUILD_SOX
else
'OFF'
}
"
,
f
"-DBUILD_MAD:BOOL=
{
'ON'
if
_BUILD_MAD
else
'OFF'
}
"
,
# f"-DBUILD_KALDI:BOOL={'ON' if _BUILD_KALDI else 'OFF'}",
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录