Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
fb853167
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 2 年 前同步成功
通知
210
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
fb853167
编写于
11月 05, 2021
作者:
H
Hui Zhang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
format code
上级
7b3a901b
变更
8
隐藏空白更改
内联
并排
Showing
8 changed file
with
46 addition
and
35 deletion
+46
-35
paddlespeech/s2t/exps/u2/model.py
paddlespeech/s2t/exps/u2/model.py
+4
-4
paddlespeech/s2t/exps/u2_kaldi/model.py
paddlespeech/s2t/exps/u2_kaldi/model.py
+4
-4
paddlespeech/s2t/exps/u2_st/model.py
paddlespeech/s2t/exps/u2_st/model.py
+4
-4
paddlespeech/s2t/transform/cmvn.py
paddlespeech/s2t/transform/cmvn.py
+8
-3
paddlespeech/s2t/transform/perturb.py
paddlespeech/s2t/transform/perturb.py
+2
-0
paddlespeech/s2t/transform/transformation.py
paddlespeech/s2t/transform/transformation.py
+1
-2
paddlespeech/s2t/utils/ctc_utils.py
paddlespeech/s2t/utils/ctc_utils.py
+2
-1
utils/remove_longshortdata.py
utils/remove_longshortdata.py
+21
-17
未找到文件。
paddlespeech/s2t/exps/u2/model.py
浏览文件 @
fb853167
...
...
@@ -575,10 +575,10 @@ class U2Tester(U2Trainer):
@
paddle
.
no_grad
()
def
align
(
self
):
ctc_utils
.
ctc_align
(
self
.
config
,
self
.
model
,
self
.
align_loader
,
self
.
config
.
decoding
.
batch_size
,
self
.
config
.
collator
.
stride_ms
,
self
.
vocab_list
,
self
.
args
.
result_file
)
ctc_utils
.
ctc_align
(
self
.
config
,
self
.
model
,
self
.
align_loader
,
self
.
config
.
decoding
.
batch_size
,
self
.
config
.
collator
.
stride_ms
,
self
.
vocab_list
,
self
.
args
.
result_file
)
def
load_inferspec
(
self
):
"""infer model and input spec.
...
...
paddlespeech/s2t/exps/u2_kaldi/model.py
浏览文件 @
fb853167
...
...
@@ -528,10 +528,10 @@ class U2Tester(U2Trainer):
@
paddle
.
no_grad
()
def
align
(
self
):
ctc_utils
.
ctc_align
(
self
.
config
,
self
.
model
,
self
.
align_loader
,
self
.
config
.
decoding
.
batch_size
,
self
.
config
.
collator
.
stride_ms
,
self
.
vocab_list
,
self
.
args
.
result_file
)
ctc_utils
.
ctc_align
(
self
.
config
,
self
.
model
,
self
.
align_loader
,
self
.
config
.
decoding
.
batch_size
,
self
.
config
.
collator
.
stride_ms
,
self
.
vocab_list
,
self
.
args
.
result_file
)
def
load_inferspec
(
self
):
"""infer model and input spec.
...
...
paddlespeech/s2t/exps/u2_st/model.py
浏览文件 @
fb853167
...
...
@@ -543,10 +543,10 @@ class U2STTester(U2STTrainer):
@
paddle
.
no_grad
()
def
align
(
self
):
ctc_utils
.
ctc_align
(
self
.
config
,
self
.
model
,
self
.
align_loader
,
self
.
config
.
decoding
.
batch_size
,
self
.
config
.
collator
.
stride_ms
,
self
.
vocab_list
,
self
.
args
.
result_file
)
ctc_utils
.
ctc_align
(
self
.
config
,
self
.
model
,
self
.
align_loader
,
self
.
config
.
decoding
.
batch_size
,
self
.
config
.
collator
.
stride_ms
,
self
.
vocab_list
,
self
.
args
.
result_file
)
def
load_inferspec
(
self
):
"""infer model and input spec.
...
...
paddlespeech/s2t/transform/cmvn.py
浏览文件 @
fb853167
...
...
@@ -14,10 +14,12 @@
# Modified from espnet(https://github.com/espnet/espnet)
import
io
import
json
import
h5py
import
kaldiio
import
numpy
as
np
class
CMVN
():
"Apply Global/Spk CMVN/iverserCMVN."
...
...
@@ -158,11 +160,14 @@ class UtteranceCMVN():
return
x
class
GlobalCMVN
():
"Apply Global CMVN"
def
__init__
(
self
,
cmvn_path
,
norm_means
=
True
,
norm_vars
=
True
,
std_floor
=
1.0e-20
):
def
__init__
(
self
,
cmvn_path
,
norm_means
=
True
,
norm_vars
=
True
,
std_floor
=
1.0e-20
):
self
.
cmvn_path
=
cmvn_path
self
.
norm_means
=
norm_means
self
.
norm_vars
=
norm_vars
...
...
@@ -189,4 +194,4 @@ class GlobalCMVN():
if
self
.
norm_vars
:
x
=
np
.
divide
(
x
,
self
.
std
)
return
x
\ No newline at end of file
return
x
paddlespeech/s2t/transform/perturb.py
浏览文件 @
fb853167
...
...
@@ -17,6 +17,7 @@ import numpy
import
scipy
import
soundfile
import
soxbindings
as
sox
from
paddlespeech.s2t.io.reader
import
SoundHDF5File
...
...
@@ -171,6 +172,7 @@ class SpeedPerturbationSox():
upper=
{
self
.
upper
}
,
keep_length=
{
self
.
keep_length
}
,
sample_rate=
{
self
.
sr
}
)"""
else
:
return
f
"""
{
self
.
__class__
.
__name__
}
(
utt2ratio=
{
self
.
utt2ratio_file
}
,
...
...
paddlespeech/s2t/transform/transformation.py
浏览文件 @
fb853167
...
...
@@ -46,8 +46,7 @@ import_alias = dict(
wpe
=
"paddlespeech.s2t.transform.wpe:WPE"
,
channel_selector
=
"paddlespeech.s2t.transform.channel_selector:ChannelSelector"
,
fbank_kaldi
=
"paddlespeech.s2t.transform.spectrogram:LogMelSpectrogramKaldi"
,
cmvn_json
=
"paddlespeech.s2t.transform.cmvn:GlobalCMVN"
)
cmvn_json
=
"paddlespeech.s2t.transform.cmvn:GlobalCMVN"
)
class
Transformation
():
...
...
paddlespeech/s2t/utils/ctc_utils.py
浏览文件 @
fb853167
...
...
@@ -12,8 +12,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from wenet(https://github.com/wenet-e2e/wenet)
from
typing
import
List
from
pathlib
import
Path
from
typing
import
List
import
numpy
as
np
import
paddle
...
...
utils/remove_longshortdata.py
浏览文件 @
fb853167
#!/usr/bin/env python3
"""remove longshort data from manifest"""
import
logging
import
argparse
import
logging
import
jsonlines
from
paddlespeech.s2t.utils.cli_utils
import
get_commandline_args
...
...
@@ -23,17 +24,19 @@ def get_parser():
parser
.
add_argument
(
"--verbose"
,
"-V"
,
default
=
0
,
type
=
int
,
help
=
"Verbose option"
)
parser
.
add_argument
(
"--iaxis"
,
default
=
0
,
type
=
int
,
help
=
"multi inputs index, 0 is the first"
)
parser
.
add_argument
(
"--oaxis"
,
default
=
0
,
type
=
int
,
help
=
"multi outputs index, 0 is the first"
)
parser
.
add_argument
(
"--maxframes"
,
default
=
2000
,
type
=
int
,
help
=
"maxframes"
)
parser
.
add_argument
(
"--minframes"
,
default
=
10
,
type
=
int
,
help
=
"minframes"
)
"--iaxis"
,
default
=
0
,
type
=
int
,
help
=
"multi inputs index, 0 is the first"
)
parser
.
add_argument
(
"--maxchars"
,
default
=
200
,
type
=
int
,
help
=
"max tokens"
)
parser
.
add_argument
(
"--minchars"
,
default
=
0
,
type
=
int
,
help
=
"min tokens"
)
"--oaxis"
,
default
=
0
,
type
=
int
,
help
=
"multi outputs index, 0 is the first"
)
parser
.
add_argument
(
"--maxframes"
,
default
=
2000
,
type
=
int
,
help
=
"maxframes"
)
parser
.
add_argument
(
"--minframes"
,
default
=
10
,
type
=
int
,
help
=
"minframes"
)
parser
.
add_argument
(
"--maxchars"
,
default
=
200
,
type
=
int
,
help
=
"max tokens"
)
parser
.
add_argument
(
"--minchars"
,
default
=
0
,
type
=
int
,
help
=
"min tokens"
)
parser
.
add_argument
(
"--stride_ms"
,
default
=
10
,
type
=
int
,
help
=
"stride in ms unit."
)
parser
.
add_argument
(
...
...
@@ -54,7 +57,7 @@ def filter_input(args, line):
nframe
=
tmp
[
'shape'
][
0
]
*
1000
/
args
.
stride_ms
else
:
nframe
=
tmp
[
'shape'
][
0
]
if
nframe
<
args
.
minframes
or
nframe
>
args
.
maxframes
:
return
True
else
:
...
...
@@ -67,7 +70,7 @@ def filter_output(args, line):
return
True
else
:
return
False
def
main
():
args
=
get_parser
().
parse_args
()
...
...
@@ -78,15 +81,15 @@ def main():
else
:
logging
.
basicConfig
(
level
=
logging
.
WARN
,
format
=
logfmt
)
logging
.
info
(
get_commandline_args
())
with
jsonlines
.
open
(
args
.
rspecifier
,
'r'
)
as
reader
:
lines
=
list
(
reader
)
logging
.
info
(
f
"Example:
{
len
(
lines
)
}
"
)
feat
=
lines
[
0
][
'input'
][
args
.
iaxis
][
'feat'
]
args
.
soud
=
False
args
.
soud
=
False
if
feat
.
split
(
'.'
)[
-
1
]
not
in
'ark, scp'
:
args
.
sound
=
True
count
=
0
filter
=
0
with
jsonlines
.
open
(
args
.
wspecifier_or_wxfilename
,
'w'
)
as
writer
:
...
...
@@ -98,5 +101,6 @@ def main():
count
+=
1
logging
.
info
(
f
"Example after filter:
{
count
}
\{filter}"
)
if
__name__
==
'__main__'
:
main
()
\ No newline at end of file
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录