Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
357a3648
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 2 年 前同步成功
通知
210
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
357a3648
编写于
6月 29, 2022
作者:
H
Hui Zhang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
pybind kaldi can make
上级
52477a10
变更
14
显示空白变更内容
内联
并排
Showing
14 changed file
with
220 addition
and
133 deletion
+220
-133
paddlespeech/audio/src/CMakeLists.txt
paddlespeech/audio/src/CMakeLists.txt
+25
-7
paddlespeech/audio/src/pybind/kaldi/feature_common.h
paddlespeech/audio/src/pybind/kaldi/feature_common.h
+8
-6
paddlespeech/audio/src/pybind/kaldi/feature_common_inl.h
paddlespeech/audio/src/pybind/kaldi/feature_common_inl.h
+26
-24
paddlespeech/audio/src/pybind/kaldi/kaldi_feature.cc
paddlespeech/audio/src/pybind/kaldi/kaldi_feature.cc
+23
-19
paddlespeech/audio/src/pybind/kaldi/kaldi_feature.h
paddlespeech/audio/src/pybind/kaldi/kaldi_feature.h
+24
-2
paddlespeech/audio/src/pybind/kaldi/kaldi_feature_wrapper.cc
paddlespeech/audio/src/pybind/kaldi/kaldi_feature_wrapper.cc
+22
-7
paddlespeech/audio/src/pybind/kaldi/kaldi_feature_wrapper.h
paddlespeech/audio/src/pybind/kaldi/kaldi_feature_wrapper.h
+40
-0
paddlespeech/audio/src/pybind/kaldi_frontend/CMakeLists.txt
paddlespeech/audio/src/pybind/kaldi_frontend/CMakeLists.txt
+0
-13
paddlespeech/audio/src/pybind/kaldi_frontend/kaldi_feature_wrapper.h
...h/audio/src/pybind/kaldi_frontend/kaldi_feature_wrapper.h
+0
-24
paddlespeech/audio/src/pybind/pybind.cpp
paddlespeech/audio/src/pybind/pybind.cpp
+11
-2
paddlespeech/audio/src/pybind/sox/utils.h
paddlespeech/audio/src/pybind/sox/utils.h
+1
-4
paddlespeech/audio/third_party/CMakeLists.txt
paddlespeech/audio/third_party/CMakeLists.txt
+0
-1
paddlespeech/audio/third_party/kaldi/CMakeLists.txt
paddlespeech/audio/third_party/kaldi/CMakeLists.txt
+38
-23
tools/setup_helpers/extension.py
tools/setup_helpers/extension.py
+2
-1
未找到文件。
paddlespeech/audio/src/CMakeLists.txt
浏览文件 @
357a3648
...
@@ -35,11 +35,6 @@ if(BUILD_SOX)
...
@@ -35,11 +35,6 @@ if(BUILD_SOX)
list
(
list
(
APPEND
APPEND
LIBPADDLEAUDIO_SOURCES
LIBPADDLEAUDIO_SOURCES
# sox/io.cpp
# sox/utils.cpp
# sox/effects.cpp
# sox/effects_chain.cpp
# sox/types.cpp
)
)
list
(
list
(
APPEND
APPEND
...
@@ -49,6 +44,20 @@ if(BUILD_SOX)
...
@@ -49,6 +44,20 @@ if(BUILD_SOX)
endif
()
endif
()
if
(
BUILD_KALDI
)
list
(
APPEND
LIBPADDLEAUDIO_LINK_LIBRARIES
libkaldi
)
list
(
APPEND
LIBPADDLEAUDIO_COMPILE_DEFINITIONS
INCLUDE_KALDI
COMPILE_WITHOUT_OPENFST
)
endif
()
#------------------------------------------------------------------------------#
#------------------------------------------------------------------------------#
# END OF CUSTOMIZATION LOGICS
# END OF CUSTOMIZATION LOGICS
#------------------------------------------------------------------------------#
#------------------------------------------------------------------------------#
...
@@ -79,9 +88,9 @@ define_library(
...
@@ -79,9 +88,9 @@ define_library(
)
)
if
(
APPLE
)
if
(
APPLE
)
set
(
TORCHAUDIO_LIBRARY lib
torch
audio CACHE INTERNAL
""
)
set
(
TORCHAUDIO_LIBRARY lib
paddle
audio CACHE INTERNAL
""
)
else
()
else
()
set
(
TORCHAUDIO_LIBRARY -Wl,--no-as-needed lib
torch
audio -Wl,--as-needed CACHE INTERNAL
""
)
set
(
TORCHAUDIO_LIBRARY -Wl,--no-as-needed lib
paddle
audio -Wl,--as-needed CACHE INTERNAL
""
)
endif
()
endif
()
################################################################################
################################################################################
...
@@ -136,6 +145,15 @@ if(BUILD_SOX)
...
@@ -136,6 +145,15 @@ if(BUILD_SOX)
pybind/sox/utils.cpp
pybind/sox/utils.cpp
)
)
endif
()
endif
()
if
(
BUILD_KALDI
)
list
(
APPEND
EXTENSION_SOURCES
pybind/kaldi/kaldi_feature_wrapper.cc
pybind/kaldi/kaldi_feature.cc
)
endif
()
#----------------------------------------------------------------------------#
#----------------------------------------------------------------------------#
# END OF CUSTOMIZATION LOGICS
# END OF CUSTOMIZATION LOGICS
#----------------------------------------------------------------------------#
#----------------------------------------------------------------------------#
...
...
paddlespeech/audio/src/pybind/kaldi
_frontend
/feature_common.h
→
paddlespeech/audio/src/pybind/kaldi/feature_common.h
浏览文件 @
357a3648
...
@@ -19,6 +19,7 @@
...
@@ -19,6 +19,7 @@
#include "feat/feature-window.h"
#include "feat/feature-window.h"
namespace
paddleaudio
{
namespace
paddleaudio
{
namespace
kaldi
{
namespace
py
=
pybind11
;
namespace
py
=
pybind11
;
...
@@ -27,21 +28,22 @@ class StreamingFeatureTpl {
...
@@ -27,21 +28,22 @@ class StreamingFeatureTpl {
public:
public:
typedef
typename
F
::
Options
Options
;
typedef
typename
F
::
Options
Options
;
StreamingFeatureTpl
(
const
Options
&
opts
);
StreamingFeatureTpl
(
const
Options
&
opts
);
bool
ComputeFeature
(
const
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>&
wav
,
bool
ComputeFeature
(
const
::
kaldi
::
VectorBase
<::
kaldi
::
BaseFloat
>&
wav
,
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
feats
);
::
kaldi
::
Vector
<::
kaldi
::
BaseFloat
>*
feats
);
void
Reset
()
{
remained_wav_
.
Resize
(
0
);
}
void
Reset
()
{
remained_wav_
.
Resize
(
0
);
}
int
Dim
()
{
return
computer_
.
Dim
();
}
int
Dim
()
{
return
computer_
.
Dim
();
}
private:
private:
bool
Compute
(
const
kaldi
::
Vector
<
kaldi
::
BaseFloat
>&
waves
,
bool
Compute
(
const
::
kaldi
::
Vector
<::
kaldi
::
BaseFloat
>&
waves
,
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
feats
);
::
kaldi
::
Vector
<::
kaldi
::
BaseFloat
>*
feats
);
Options
opts_
;
Options
opts_
;
kaldi
::
FeatureWindowFunction
window_function_
;
::
kaldi
::
FeatureWindowFunction
window_function_
;
kaldi
::
Vector
<
kaldi
::
BaseFloat
>
remained_wav_
;
::
kaldi
::
Vector
<::
kaldi
::
BaseFloat
>
remained_wav_
;
F
computer_
;
F
computer_
;
};
};
}
// namespace kaldi
}
// namespace ppspeech
}
// namespace ppspeech
#include "feature_common_inl.h"
#include "feature_common_inl.h"
paddlespeech/audio/src/pybind/kaldi
_frontend
/feature_common_inl.h
→
paddlespeech/audio/src/pybind/kaldi/feature_common_inl.h
浏览文件 @
357a3648
...
@@ -15,6 +15,7 @@
...
@@ -15,6 +15,7 @@
#include "base/kaldi-common.h"
#include "base/kaldi-common.h"
namespace
paddleaudio
{
namespace
paddleaudio
{
namespace
kaldi
{
template
<
class
F
>
template
<
class
F
>
StreamingFeatureTpl
<
F
>::
StreamingFeatureTpl
(
const
Options
&
opts
)
StreamingFeatureTpl
<
F
>::
StreamingFeatureTpl
(
const
Options
&
opts
)
...
@@ -24,21 +25,21 @@ StreamingFeatureTpl<F>::StreamingFeatureTpl(const Options& opts)
...
@@ -24,21 +25,21 @@ StreamingFeatureTpl<F>::StreamingFeatureTpl(const Options& opts)
template
<
class
F
>
template
<
class
F
>
bool
StreamingFeatureTpl
<
F
>::
ComputeFeature
(
bool
StreamingFeatureTpl
<
F
>::
ComputeFeature
(
const
kaldi
::
VectorBase
<
kaldi
::
BaseFloat
>&
wav
,
const
::
kaldi
::
VectorBase
<::
kaldi
::
BaseFloat
>&
wav
,
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
feats
)
{
::
kaldi
::
Vector
<::
kaldi
::
BaseFloat
>*
feats
)
{
// append remaned waves
// append remaned waves
kaldi
::
int32
wav_len
=
wav
.
Dim
();
::
kaldi
::
int32
wav_len
=
wav
.
Dim
();
if
(
wav_len
==
0
)
return
false
;
if
(
wav_len
==
0
)
return
false
;
kaldi
::
int32
left_len
=
remained_wav_
.
Dim
();
::
kaldi
::
int32
left_len
=
remained_wav_
.
Dim
();
kaldi
::
Vector
<
kaldi
::
BaseFloat
>
waves
(
left_len
+
wav_len
);
::
kaldi
::
Vector
<::
kaldi
::
BaseFloat
>
waves
(
left_len
+
wav_len
);
waves
.
Range
(
0
,
left_len
).
CopyFromVec
(
remained_wav_
);
waves
.
Range
(
0
,
left_len
).
CopyFromVec
(
remained_wav_
);
waves
.
Range
(
left_len
,
wav_len
).
CopyFromVec
(
wav
);
waves
.
Range
(
left_len
,
wav_len
).
CopyFromVec
(
wav
);
// cache remaned waves
// cache remaned waves
kaldi
::
FrameExtractionOptions
frame_opts
=
computer_
.
GetFrameOptions
();
::
kaldi
::
FrameExtractionOptions
frame_opts
=
computer_
.
GetFrameOptions
();
kaldi
::
int32
num_frames
=
kaldi
::
NumFrames
(
waves
.
Dim
(),
frame_opts
);
::
kaldi
::
int32
num_frames
=
::
kaldi
::
NumFrames
(
waves
.
Dim
(),
frame_opts
);
kaldi
::
int32
frame_shift
=
frame_opts
.
WindowShift
();
::
kaldi
::
int32
frame_shift
=
frame_opts
.
WindowShift
();
kaldi
::
int32
left_samples
=
waves
.
Dim
()
-
frame_shift
*
num_frames
;
::
kaldi
::
int32
left_samples
=
waves
.
Dim
()
-
frame_shift
*
num_frames
;
remained_wav_
.
Resize
(
left_samples
);
remained_wav_
.
Resize
(
left_samples
);
remained_wav_
.
CopyFromVec
(
remained_wav_
.
CopyFromVec
(
waves
.
Range
(
frame_shift
*
num_frames
,
left_samples
));
waves
.
Range
(
frame_shift
*
num_frames
,
left_samples
));
...
@@ -51,26 +52,26 @@ bool StreamingFeatureTpl<F>::ComputeFeature(
...
@@ -51,26 +52,26 @@ bool StreamingFeatureTpl<F>::ComputeFeature(
// Compute feat
// Compute feat
template
<
class
F
>
template
<
class
F
>
bool
StreamingFeatureTpl
<
F
>::
Compute
(
bool
StreamingFeatureTpl
<
F
>::
Compute
(
const
kaldi
::
Vector
<
kaldi
::
BaseFloat
>&
waves
,
const
::
kaldi
::
Vector
<::
kaldi
::
BaseFloat
>&
waves
,
kaldi
::
Vector
<
kaldi
::
BaseFloat
>*
feats
)
{
::
kaldi
::
Vector
<::
kaldi
::
BaseFloat
>*
feats
)
{
kaldi
::
BaseFloat
vtln_warp
=
1.0
;
::
kaldi
::
BaseFloat
vtln_warp
=
1.0
;
const
kaldi
::
FrameExtractionOptions
&
frame_opts
=
const
::
kaldi
::
FrameExtractionOptions
&
frame_opts
=
computer_
.
GetFrameOptions
();
computer_
.
GetFrameOptions
();
kaldi
::
int32
num_samples
=
waves
.
Dim
();
::
kaldi
::
int32
num_samples
=
waves
.
Dim
();
kaldi
::
int32
frame_length
=
frame_opts
.
WindowSize
();
::
kaldi
::
int32
frame_length
=
frame_opts
.
WindowSize
();
kaldi
::
int32
sample_rate
=
frame_opts
.
samp_freq
;
::
kaldi
::
int32
sample_rate
=
frame_opts
.
samp_freq
;
if
(
num_samples
<
frame_length
)
{
if
(
num_samples
<
frame_length
)
{
return
false
;
return
false
;
}
}
kaldi
::
int32
num_frames
=
kaldi
::
NumFrames
(
num_samples
,
frame_opts
);
::
kaldi
::
int32
num_frames
=
::
kaldi
::
NumFrames
(
num_samples
,
frame_opts
);
feats
->
Resize
(
num_frames
*
Dim
());
feats
->
Resize
(
num_frames
*
Dim
());
kaldi
::
Vector
<
kaldi
::
BaseFloat
>
window
;
::
kaldi
::
Vector
<::
kaldi
::
BaseFloat
>
window
;
bool
need_raw_log_energy
=
computer_
.
NeedRawLogEnergy
();
bool
need_raw_log_energy
=
computer_
.
NeedRawLogEnergy
();
for
(
kaldi
::
int32
frame
=
0
;
frame
<
num_frames
;
frame
++
)
{
for
(
::
kaldi
::
int32
frame
=
0
;
frame
<
num_frames
;
frame
++
)
{
kaldi
::
BaseFloat
raw_log_energy
=
0.0
;
::
kaldi
::
BaseFloat
raw_log_energy
=
0.0
;
kaldi
::
ExtractWindow
(
0
,
::
kaldi
::
ExtractWindow
(
0
,
waves
,
waves
,
frame
,
frame
,
frame_opts
,
frame_opts
,
...
@@ -78,14 +79,15 @@ bool StreamingFeatureTpl<F>::Compute(
...
@@ -78,14 +79,15 @@ bool StreamingFeatureTpl<F>::Compute(
&
window
,
&
window
,
need_raw_log_energy
?
&
raw_log_energy
:
NULL
);
need_raw_log_energy
?
&
raw_log_energy
:
NULL
);
kaldi
::
Vector
<
kaldi
::
BaseFloat
>
this_feature
(
computer_
.
Dim
(),
::
kaldi
::
Vector
<::
kaldi
::
BaseFloat
>
this_feature
(
computer_
.
Dim
(),
kaldi
::
kUndefined
);
::
kaldi
::
kUndefined
);
computer_
.
Compute
(
raw_log_energy
,
vtln_warp
,
&
window
,
&
this_feature
);
computer_
.
Compute
(
raw_log_energy
,
vtln_warp
,
&
window
,
&
this_feature
);
kaldi
::
SubVector
<
kaldi
::
BaseFloat
>
output_row
(
::
kaldi
::
SubVector
<::
kaldi
::
BaseFloat
>
output_row
(
feats
->
Data
()
+
frame
*
Dim
(),
Dim
());
feats
->
Data
()
+
frame
*
Dim
(),
Dim
());
output_row
.
CopyFromVec
(
this_feature
);
output_row
.
CopyFromVec
(
this_feature
);
}
}
return
true
;
return
true
;
}
}
}
// namespace kaldi
}
// namespace paddleaudio
}
// namespace paddleaudio
paddlespeech/audio/src/pybind/kaldi
_frontend
/kaldi_feature.cc
→
paddlespeech/audio/src/pybind/kaldi/kaldi_feature.cc
浏览文件 @
357a3648
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <pybind11/numpy.h>
#include "paddlespeech/audio/src/pybind/kaldi/kaldi_feature.h"
#include <pybind11/pybind11.h>
#include "kaldi_feature_wrapper.h"
namespace
paddleaudio
{
namespace
kaldi
{
namespace
py
=
pybind11
;
bool
InitFbank
(
float
samp_freq
,
// frame opts
bool
InitFbank
(
float
samp_freq
,
// frame opts
float
frame_shift_ms
,
float
frame_shift_ms
,
...
@@ -32,7 +43,7 @@ bool InitFbank(float samp_freq, // frame opts
...
@@ -32,7 +43,7 @@ bool InitFbank(float samp_freq, // frame opts
bool
htk_compat
,
bool
htk_compat
,
bool
use_log_fbank
,
bool
use_log_fbank
,
bool
use_power
)
{
bool
use_power
)
{
kaldi
::
FbankOptions
opts
;
::
kaldi
::
FbankOptions
opts
;
opts
.
frame_opts
.
samp_freq
=
samp_freq
;
// frame opts
opts
.
frame_opts
.
samp_freq
=
samp_freq
;
// frame opts
opts
.
frame_opts
.
frame_shift_ms
=
frame_shift_ms
;
opts
.
frame_opts
.
frame_shift_ms
=
frame_shift_ms
;
opts
.
frame_opts
.
frame_length_ms
=
frame_length_ms
;
opts
.
frame_opts
.
frame_length_ms
=
frame_length_ms
;
...
@@ -61,12 +72,12 @@ bool InitFbank(float samp_freq, // frame opts
...
@@ -61,12 +72,12 @@ bool InitFbank(float samp_freq, // frame opts
opts
.
htk_compat
=
htk_compat
;
opts
.
htk_compat
=
htk_compat
;
opts
.
use_log_fbank
=
use_log_fbank
;
opts
.
use_log_fbank
=
use_log_fbank
;
opts
.
use_power
=
use_power
;
opts
.
use_power
=
use_power
;
paddleaudio
::
KaldiFeatureWrapper
::
GetInstance
()
->
InitFbank
(
opts
);
paddleaudio
::
kaldi
::
KaldiFeatureWrapper
::
GetInstance
()
->
InitFbank
(
opts
);
return
true
;
return
true
;
}
}
py
::
array_t
<
double
>
ComputeFbankStreaming
(
const
py
::
array_t
<
double
>&
wav
)
{
py
::
array_t
<
double
>
ComputeFbankStreaming
(
const
py
::
array_t
<
double
>&
wav
)
{
return
paddleaudio
::
KaldiFeatureWrapper
::
GetInstance
()
->
ComputeFbank
(
wav
);
return
paddleaudio
::
kaldi
::
KaldiFeatureWrapper
::
GetInstance
()
->
ComputeFbank
(
wav
);
}
}
py
::
array_t
<
double
>
ComputeFbank
(
py
::
array_t
<
double
>
ComputeFbank
(
...
@@ -124,21 +135,14 @@ py::array_t<double> ComputeFbank(
...
@@ -124,21 +135,14 @@ py::array_t<double> ComputeFbank(
use_log_fbank
,
use_log_fbank
,
use_power
);
use_power
);
py
::
array_t
<
double
>
result
=
ComputeFbankStreaming
(
wav
);
py
::
array_t
<
double
>
result
=
ComputeFbankStreaming
(
wav
);
paddleaudio
::
KaldiFeatureWrapper
::
GetInstance
()
->
ResetFbank
();
paddleaudio
::
kaldi
::
KaldiFeatureWrapper
::
GetInstance
()
->
ResetFbank
();
return
result
;
return
result
;
}
}
void
ResetFbank
()
{
void
ResetFbank
()
{
paddleaudio
::
KaldiFeatureWrapper
::
GetInstance
()
->
ResetFbank
();
paddleaudio
::
kaldi
::
KaldiFeatureWrapper
::
GetInstance
()
->
ResetFbank
();
}
}
PYBIND11_MODULE
(
kaldi_featurepy
,
m
)
{
}
// kaldi
m
.
doc
()
=
"kaldi_feature example"
;
}
// paddleaudio
m
.
def
(
"InitFbank"
,
&
InitFbank
,
"init fbank"
);
m
.
def
(
"ResetFbank"
,
&
ResetFbank
,
"reset fbank"
);
m
.
def
(
"ComputeFbank"
,
&
ComputeFbank
,
"compute fbank"
);
m
.
def
(
"ComputeFbankStreaming"
,
&
ComputeFbankStreaming
,
"compute fbank streaming"
);
}
paddlespeech/audio/src/pybind/kaldi
_frontend
/kaldi_feature.h
→
paddlespeech/audio/src/pybind/kaldi/kaldi_feature.h
浏览文件 @
357a3648
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <pybind11/numpy.h>
#include <pybind11/numpy.h>
#include <pybind11/pybind11.h>
#include <pybind11/pybind11.h>
#include "kaldi_feature_wrapper.h"
#include "
paddlespeech/audio/src/pybind/kaldi/
kaldi_feature_wrapper.h"
namespace
py
=
pybind11
;
namespace
py
=
pybind11
;
namespace
paddleaudio
{
namespace
kaldi
{
bool
InitFbank
(
float
samp_freq
,
// frame opts
bool
InitFbank
(
float
samp_freq
,
// frame opts
float
frame_shift_ms
,
float
frame_shift_ms
,
float
frame_length_ms
,
float
frame_length_ms
,
...
@@ -41,7 +60,7 @@ py::array_t<double> ComputeFbank(
...
@@ -41,7 +60,7 @@ py::array_t<double> ComputeFbank(
bool
remove_dc_offset
,
bool
remove_dc_offset
,
std
::
string
window_type
,
// e.g. Hamming window
std
::
string
window_type
,
// e.g. Hamming window
bool
round_to_power_of_two
,
bool
round_to_power_of_two
,
kaldi
::
BaseFloat
blackman_coeff
,
::
kaldi
::
BaseFloat
blackman_coeff
,
bool
snip_edges
,
bool
snip_edges
,
bool
allow_downsample
,
bool
allow_downsample
,
bool
allow_upsample
,
bool
allow_upsample
,
...
@@ -68,3 +87,6 @@ void ResetFbank();
...
@@ -68,3 +87,6 @@ void ResetFbank();
py
::
array_t
<
double
>
ComputeFbankStreaming
(
const
py
::
array_t
<
double
>&
wav
);
py
::
array_t
<
double
>
ComputeFbankStreaming
(
const
py
::
array_t
<
double
>&
wav
);
py
::
array_t
<
double
>
TestFun
(
const
py
::
array_t
<
double
>&
wav
);
py
::
array_t
<
double
>
TestFun
(
const
py
::
array_t
<
double
>&
wav
);
}
// namespace kaldi
}
// namespace paddleaudio
\ No newline at end of file
paddlespeech/audio/src/pybind/kaldi
_frontend
/kaldi_feature_wrapper.cc
→
paddlespeech/audio/src/pybind/kaldi/kaldi_feature_wrapper.cc
浏览文件 @
357a3648
#include "kaldi_feature_wrapper.h"
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddlespeech/audio/src/pybind/kaldi/kaldi_feature_wrapper.h"
namespace
paddleaudio
{
namespace
paddleaudio
{
namespace
kaldi
{
KaldiFeatureWrapper
*
KaldiFeatureWrapper
::
GetInstance
()
{
KaldiFeatureWrapper
*
KaldiFeatureWrapper
::
GetInstance
()
{
static
KaldiFeatureWrapper
instance
;
static
KaldiFeatureWrapper
instance
;
return
&
instance
;
return
&
instance
;
}
}
bool
KaldiFeatureWrapper
::
InitFbank
(
kaldi
::
FbankOptions
opts
)
{
bool
KaldiFeatureWrapper
::
InitFbank
(
::
kaldi
::
FbankOptions
opts
)
{
fbank_
.
reset
(
new
Fbank
(
opts
));
fbank_
.
reset
(
new
Fbank
(
opts
));
return
true
;
return
true
;
}
}
...
@@ -15,7 +30,7 @@ bool KaldiFeatureWrapper::InitFbank(kaldi::FbankOptions opts) {
...
@@ -15,7 +30,7 @@ bool KaldiFeatureWrapper::InitFbank(kaldi::FbankOptions opts) {
py
::
array_t
<
double
>
KaldiFeatureWrapper
::
ComputeFbank
(
py
::
array_t
<
double
>
KaldiFeatureWrapper
::
ComputeFbank
(
const
py
::
array_t
<
double
>
wav
)
{
const
py
::
array_t
<
double
>
wav
)
{
py
::
buffer_info
info
=
wav
.
request
();
py
::
buffer_info
info
=
wav
.
request
();
kaldi
::
Vector
<
kaldi
::
BaseFloat
>
input_wav
(
info
.
size
);
::
kaldi
::
Vector
<::
kaldi
::
BaseFloat
>
input_wav
(
info
.
size
);
double
*
wav_ptr
=
(
double
*
)
info
.
ptr
;
double
*
wav_ptr
=
(
double
*
)
info
.
ptr
;
for
(
int
idx
=
0
;
idx
<
info
.
size
;
++
idx
)
{
for
(
int
idx
=
0
;
idx
<
info
.
size
;
++
idx
)
{
input_wav
(
idx
)
=
*
wav_ptr
;
input_wav
(
idx
)
=
*
wav_ptr
;
...
@@ -23,7 +38,7 @@ py::array_t<double> KaldiFeatureWrapper::ComputeFbank(
...
@@ -23,7 +38,7 @@ py::array_t<double> KaldiFeatureWrapper::ComputeFbank(
}
}
kaldi
::
Vector
<
kaldi
::
BaseFloat
>
feats
;
::
kaldi
::
Vector
<::
kaldi
::
BaseFloat
>
feats
;
bool
flag
=
fbank_
->
ComputeFeature
(
input_wav
,
&
feats
);
bool
flag
=
fbank_
->
ComputeFeature
(
input_wav
,
&
feats
);
if
(
flag
==
false
||
feats
.
Dim
()
==
0
)
return
py
::
array_t
<
double
>
();
if
(
flag
==
false
||
feats
.
Dim
()
==
0
)
return
py
::
array_t
<
double
>
();
auto
result
=
py
::
array_t
<
double
>
(
feats
.
Dim
());
auto
result
=
py
::
array_t
<
double
>
(
feats
.
Dim
());
...
@@ -44,8 +59,8 @@ py::array_t<double> KaldiFeatureWrapper::ComputeFbank(
...
@@ -44,8 +59,8 @@ py::array_t<double> KaldiFeatureWrapper::ComputeFbank(
py::buffer_info info = wav.request();
py::buffer_info info = wav.request();
std::cout << info.size << std::endl;
std::cout << info.size << std::endl;
auto result = py::array_t<double>(info.size);
auto result = py::array_t<double>(info.size);
//
kaldi::Vector<
kaldi::BaseFloat> input_wav(info.size);
//
::kaldi::Vector<::
kaldi::BaseFloat> input_wav(info.size);
kaldi::Vector<double> input_wav(info.size);
::
kaldi::Vector<double> input_wav(info.size);
py::buffer_info info_re = result.request();
py::buffer_info info_re = result.request();
memcpy(input_wav.Data(), (double*)info.ptr, wav.nbytes());
memcpy(input_wav.Data(), (double*)info.ptr, wav.nbytes());
...
@@ -55,5 +70,5 @@ py::array_t<double> KaldiFeatureWrapper::ComputeFbank(
...
@@ -55,5 +70,5 @@ py::array_t<double> KaldiFeatureWrapper::ComputeFbank(
*/
*/
}
}
}
// namesapce kaldi
}
// namespace paddleaudio
}
// namespace paddleaudio
paddlespeech/audio/src/pybind/kaldi/kaldi_feature_wrapper.h
0 → 100644
浏览文件 @
357a3648
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "base/kaldi-common.h"
#include "feat/feature-fbank.h"
#include "paddlespeech/audio/src/pybind/kaldi/feature_common.h"
namespace
paddleaudio
{
namespace
kaldi
{
typedef
StreamingFeatureTpl
<::
kaldi
::
FbankComputer
>
Fbank
;
class
KaldiFeatureWrapper
{
public:
static
KaldiFeatureWrapper
*
GetInstance
();
bool
InitFbank
(
::
kaldi
::
FbankOptions
opts
);
py
::
array_t
<
double
>
ComputeFbank
(
const
py
::
array_t
<
double
>
wav
);
int
Dim
()
{
return
fbank_
->
Dim
();
}
void
ResetFbank
()
{
fbank_
->
Reset
();
}
private:
std
::
unique_ptr
<
paddleaudio
::
kaldi
::
Fbank
>
fbank_
;
};
}
// namespace kaldi
}
// namespace paddleaudio
paddlespeech/audio/src/pybind/kaldi_frontend/CMakeLists.txt
已删除
100644 → 0
浏览文件 @
52477a10
include_directories
(
${
CMAKE_CURRENT_SOURCE_DIR
}
)
add_library
(
kaldi_feature
kaldi_feature.cc
kaldi_feature_wrapper.cc
)
target_link_libraries
(
kaldi_feature kaldi-fbank
)
pybind11_add_module
(
kaldi_frontend kaldi_feature.cc kaldi_feature_wrapper.cc
)
target_link_libraries
(
kaldi_frontend PRIVATE kaldi_feature
)
paddlespeech/audio/src/pybind/kaldi_frontend/kaldi_feature_wrapper.h
已删除
100644 → 0
浏览文件 @
52477a10
#include "base/kaldi-common.h"
#include "feat/feature-fbank.h"
#include "feature_common.h"
#pragma once
namespace
paddleaudio
{
typedef
StreamingFeatureTpl
<
kaldi
::
FbankComputer
>
Fbank
;
class
KaldiFeatureWrapper
{
public:
static
KaldiFeatureWrapper
*
GetInstance
();
bool
InitFbank
(
kaldi
::
FbankOptions
opts
);
py
::
array_t
<
double
>
ComputeFbank
(
const
py
::
array_t
<
double
>
wav
);
int
Dim
()
{
return
fbank_
->
Dim
();
}
void
ResetFbank
()
{
fbank_
->
Reset
();
}
private:
std
::
unique_ptr
<
paddleaudio
::
Fbank
>
fbank_
;
};
}
// namespace paddleaudio
paddlespeech/audio/src/pybind/pybind.cpp
浏览文件 @
357a3648
// Copyright (c) 2017 Facebook Inc. (Soumith Chintala),
// Copyright (c) 2017 Facebook Inc. (Soumith Chintala),
All rights reserved.
//
All rights r
eserved.
//
Copyright (c) 2022 PaddlePaddle Authors. All Rights R
eserved.
#include "paddlespeech/audio/src/pybind/sox/io.h"
#include "paddlespeech/audio/src/pybind/sox/io.h"
#include "paddlespeech/audio/src/pybind/kaldi/kaldi_feature.h"
// Sox
PYBIND11_MODULE
(
_paddleaudio
,
m
)
{
PYBIND11_MODULE
(
_paddleaudio
,
m
)
{
m
.
def
(
"get_info_file"
,
m
.
def
(
"get_info_file"
,
&
paddleaudio
::
sox_io
::
get_info_file
,
&
paddleaudio
::
sox_io
::
get_info_file
,
...
@@ -10,4 +12,11 @@ PYBIND11_MODULE(_paddleaudio, m) {
...
@@ -10,4 +12,11 @@ PYBIND11_MODULE(_paddleaudio, m) {
m
.
def
(
"get_info_fileobj"
,
m
.
def
(
"get_info_fileobj"
,
&
paddleaudio
::
sox_io
::
get_info_fileobj
,
&
paddleaudio
::
sox_io
::
get_info_fileobj
,
"Get metadata of audio in file object."
);
"Get metadata of audio in file object."
);
m
.
def
(
"InitFbank"
,
&
paddleaudio
::
kaldi
::
InitFbank
,
"init fbank"
);
m
.
def
(
"ResetFbank"
,
&
paddleaudio
::
kaldi
::
ResetFbank
,
"reset fbank"
);
m
.
def
(
"ComputeFbank"
,
&
paddleaudio
::
kaldi
::
ComputeFbank
,
"compute fbank"
);
m
.
def
(
"ComputeFbankStreaming"
,
&
paddleaudio
::
kaldi
::
ComputeFbankStreaming
,
"compute fbank streaming"
);
}
}
\ No newline at end of file
paddlespeech/audio/src/pybind/sox/utils.h
浏览文件 @
357a3648
// Copyright (c) 2017 Facebook Inc. (Soumith Chintala),
// Copyright (c) 2017 Facebook Inc. (Soumith Chintala),
// All rights reserved.
// All rights reserved.
#ifndef PADDLEAUDIO_PYBIND_SOX_UTILS_H
#pragma once
#define PADDLEAUDIO_PYBIND_SOX_UTILS_H
#include <pybind11/pybind11.h>
#include <pybind11/pybind11.h>
#include <sox.h>
#include <sox.h>
...
@@ -41,5 +40,3 @@ std::string get_encoding(sox_encoding_t encoding);
...
@@ -41,5 +40,3 @@ std::string get_encoding(sox_encoding_t encoding);
}
// namespace paddleaudio
}
// namespace paddleaudio
}
// namespace sox_utils
}
// namespace sox_utils
#endif
paddlespeech/audio/third_party/CMakeLists.txt
浏览文件 @
357a3648
...
@@ -12,5 +12,4 @@ endif()
...
@@ -12,5 +12,4 @@ endif()
################################################################################
################################################################################
if
(
BUILD_KALDI
)
if
(
BUILD_KALDI
)
add_subdirectory
(
kaldi
)
add_subdirectory
(
kaldi
)
message
(
STATUS
"Build Kaldi"
)
endif
()
endif
()
\ No newline at end of file
paddlespeech/audio/third_party/kaldi/CMakeLists.txt
浏览文件 @
357a3648
...
@@ -2,11 +2,6 @@
...
@@ -2,11 +2,6 @@
# compile kaldi without openfst
# compile kaldi without openfst
add_definitions
(
"-DCOMPILE_WITHOUT_OPENFST"
)
add_definitions
(
"-DCOMPILE_WITHOUT_OPENFST"
)
# include_directories(${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/../../third_party/kaldi)
# include_directories(/usr/include/python3.7m)
set
(
INSTALL_DIR
${
CMAKE_CURRENT_SOURCE_DIR
}
)
# function (define_library name source include_dirs link_libraries compile_defs)
# function (define_library name source include_dirs link_libraries compile_defs)
# add_library(${name} INTERFACE ${source})
# add_library(${name} INTERFACE ${source})
# target_include_directories(${name} INTERFACE ${include_dirs})
# target_include_directories(${name} INTERFACE ${include_dirs})
...
@@ -24,18 +19,18 @@ set(INSTALL_DIR ${CMAKE_CURRENT_SOURCE_DIR})
...
@@ -24,18 +19,18 @@ set(INSTALL_DIR ${CMAKE_CURRENT_SOURCE_DIR})
# endfunction()
# endfunction()
# kaldi-base
# kaldi-base
add_library
(
kaldi-base
INTERFACE
add_library
(
kaldi-base
STATIC
base/io-funcs.cc
base/io-funcs.cc
base/kaldi-error.cc
base/kaldi-error.cc
base/kaldi-math.cc
base/kaldi-math.cc
base/kaldi-utils.cc
base/kaldi-utils.cc
base/timer.cc
base/timer.cc
)
)
target_include_directories
(
kaldi-base
INTERFACE
${
INSTALL_DIR
}
/base
)
target_include_directories
(
kaldi-base
PUBLIC
${
CMAKE_CURRENT_SOURCE_DIR
}
)
target_compile_definitions
(
kaldi-base INTERFACE
"-DCOMPILE_WITHOUT_OPENFST"
)
# kaldi-matrix
# kaldi-matrix
add_library
(
kaldi-matrix
INTERFACE
add_library
(
kaldi-matrix
STATIC
matrix/compressed-matrix.cc
matrix/compressed-matrix.cc
matrix/kaldi-matrix.cc
matrix/kaldi-matrix.cc
matrix/kaldi-vector.cc
matrix/kaldi-vector.cc
...
@@ -48,11 +43,12 @@ add_library(kaldi-matrix INTERFACE
...
@@ -48,11 +43,12 @@ add_library(kaldi-matrix INTERFACE
matrix/srfft.cc
matrix/srfft.cc
matrix/tp-matrix.cc
matrix/tp-matrix.cc
)
)
target_include_directories
(
kaldi-matrix INTERFACE
${
INSTALL_DIR
}
/matrix
)
target_include_directories
(
kaldi-matrix PUBLIC
${
CMAKE_CURRENT_SOURCE_DIR
}
)
target_link_libraries
(
kaldi-matrix INTERFACE gfortran kaldi-base libopenblas.a
)
target_link_libraries
(
kaldi-matrix PUBLIC gfortran kaldi-base libopenblas.a
)
# kaldi-util
# kaldi-util
add_library
(
kaldi-util
INTERFACE
add_library
(
kaldi-util
STATIC
util/kaldi-holder.cc
util/kaldi-holder.cc
util/kaldi-io.cc
util/kaldi-io.cc
util/kaldi-semaphore.cc
util/kaldi-semaphore.cc
...
@@ -63,11 +59,12 @@ add_library(kaldi-util INTERFACE
...
@@ -63,11 +59,12 @@ add_library(kaldi-util INTERFACE
util/simple-options.cc
util/simple-options.cc
util/text-utils.cc
util/text-utils.cc
)
)
target_include_directories
(
kaldi-util INTERFACE
${
INSTALL_DIR
}
/util
)
target_include_directories
(
kaldi-util PUBLIC
${
CMAKE_CURRENT_SOURCE_DIR
}
)
target_link_libraries
(
kaldi-util INTERFACE kaldi-base kaldi-matrix
)
target_link_libraries
(
kaldi-util PUBLIC kaldi-base kaldi-matrix
)
# kaldi-feat-common
# kaldi-feat-common
add_library
(
kaldi-feat-common
INTERFACE
add_library
(
kaldi-feat-common
STATIC
feat/wave-reader.cc
feat/wave-reader.cc
feat/signal.cc
feat/signal.cc
feat/feature-functions.cc
feat/feature-functions.cc
...
@@ -76,19 +73,37 @@ add_library(kaldi-feat-common INTERFACE
...
@@ -76,19 +73,37 @@ add_library(kaldi-feat-common INTERFACE
feat/mel-computations.cc
feat/mel-computations.cc
feat/cmvn.cc
feat/cmvn.cc
)
)
target_include_directories
(
kaldi-feat-common INTERFACE
${
INSTALL_DIR
}
/feat
)
target_include_directories
(
kaldi-feat-common PUBLIC
${
CMAKE_CURRENT_SOURCE_DIR
}
)
target_link_libraries
(
kaldi-feat-common INTERFACE kaldi-base kaldi-matrix kaldi-util
)
target_link_libraries
(
kaldi-feat-common PUBLIC kaldi-base kaldi-matrix kaldi-util
)
# kaldi-mfcc
# kaldi-mfcc
add_library
(
kaldi-mfcc
INTERFACE
add_library
(
kaldi-mfcc
STATIC
feat/feature-mfcc.cc
feat/feature-mfcc.cc
)
)
target_include_directories
(
kaldi-mfcc INTERFACE
${
INSTALL_DIR
}
/feat
)
target_include_directories
(
kaldi-mfcc PUBLIC
${
CMAKE_CURRENT_SOURCE_DIR
}
)
target_link_libraries
(
kaldi-mfcc INTERFACE kaldi-feat-common
)
target_link_libraries
(
kaldi-mfcc PUBLIC kaldi-feat-common
)
# kaldi-fbank
# kaldi-fbank
add_library
(
kaldi-fbank
INTERFACE
add_library
(
kaldi-fbank
STATIC
feat/feature-fbank.cc
feat/feature-fbank.cc
)
)
target_include_directories
(
kaldi-fbank INTERFACE
${
INSTALL_DIR
}
/feat
)
target_include_directories
(
kaldi-fbank PUBLIC
${
CMAKE_CURRENT_SOURCE_DIR
}
)
target_link_libraries
(
kaldi-fbank INTERFACE kaldi-feat-common
)
target_link_libraries
(
kaldi-fbank PUBLIC kaldi-feat-common
)
\ No newline at end of file
set
(
KALDI_LIBRARIES
${
CMAKE_CURRENT_BINARY_DIR
}
/libkaldi-base.a
${
CMAKE_CURRENT_BINARY_DIR
}
/libkaldi-matrix.a
${
CMAKE_CURRENT_BINARY_DIR
}
/libkaldi-util.a
${
CMAKE_CURRENT_BINARY_DIR
}
/libkaldi-feat-common.a
${
CMAKE_CURRENT_BINARY_DIR
}
/libkaldi-mfcc.a
${
CMAKE_CURRENT_BINARY_DIR
}
/libkaldi-fbank.a
)
add_library
(
libkaldi INTERFACE
)
add_dependencies
(
libkaldi kaldi-base kaldi-matrix kaldi-util kaldi-feat-common kaldi-mfcc kaldi-fbank
)
target_include_directories
(
libkaldi INTERFACE
${
CMAKE_CURRENT_SOURCE_DIR
}
)
target_link_libraries
(
libkaldi INTERFACE
${
KALDI_LIBRARIES
}
)
target_compile_definitions
(
libkaldi INTERFACE
"-DCOMPILE_WITHOUT_OPENFST"
)
\ No newline at end of file
tools/setup_helpers/extension.py
浏览文件 @
357a3648
...
@@ -88,7 +88,8 @@ class CMakeBuild(build_ext):
...
@@ -88,7 +88,8 @@ class CMakeBuild(build_ext):
# f"-DCMAKE_PREFIX_PATH={torch.utils.cmake_prefix_path}",
# f"-DCMAKE_PREFIX_PATH={torch.utils.cmake_prefix_path}",
f
"-DCMAKE_INSTALL_PREFIX=
{
extdir
}
"
,
f
"-DCMAKE_INSTALL_PREFIX=
{
extdir
}
"
,
"-DCMAKE_VERBOSE_MAKEFILE=ON"
,
"-DCMAKE_VERBOSE_MAKEFILE=ON"
,
f
"-DPython_INCLUDE_DIRS=
{
distutils
.
sysconfig
.
get_python_inc
()
}
"
,
f
"-DPython_INCLUDE_DIR=
{
distutils
.
sysconfig
.
get_python_inc
()
}
"
,
f
"-DPYTHON_LIBRARY=
{
distutils
.
sysconfig
.
get_config_var
(
'LIBDIR'
)
}
"
,
f
"-DBUILD_SOX:BOOL=
{
'ON'
if
_BUILD_SOX
else
'OFF'
}
"
,
f
"-DBUILD_SOX:BOOL=
{
'ON'
if
_BUILD_SOX
else
'OFF'
}
"
,
f
"-DBUILD_MAD:BOOL=
{
'ON'
if
_BUILD_MAD
else
'OFF'
}
"
,
f
"-DBUILD_MAD:BOOL=
{
'ON'
if
_BUILD_MAD
else
'OFF'
}
"
,
# f"-DBUILD_KALDI:BOOL={'ON' if _BUILD_KALDI else 'OFF'}",
# f"-DBUILD_KALDI:BOOL={'ON' if _BUILD_KALDI else 'OFF'}",
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录