Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
9abe33b4
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
接近 2 年 前同步成功
通知
210
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
9abe33b4
编写于
9月 22, 2021
作者:
H
Hui Zhang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add score_sclite
上级
ae87bc8c
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
105 addition
and
2 deletion
+105
-2
examples/librispeech/s2/path.sh
examples/librispeech/s2/path.sh
+1
-1
tools/Makefile
tools/Makefile
+36
-0
utils/filter.py
utils/filter.py
+66
-0
utils/score_sclite.sh
utils/score_sclite.sh
+2
-1
未找到文件。
examples/librispeech/s2/path.sh
浏览文件 @
9abe33b4
export
MAIN_ROOT
=
`
realpath
${
PWD
}
/../../../
`
export
PATH
=
${
MAIN_ROOT
}
:
${
PWD
}
/utils:
${
PATH
}
export
PATH
=
${
MAIN_ROOT
}
:
${
MAIN_ROOT
}
/tools/sckt/bin/sclite:
${
PWD
}
/utils:
${
PATH
}
export
LC_ALL
=
C
# Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
...
...
tools/Makefile
浏览文件 @
9abe33b4
...
...
@@ -39,3 +39,39 @@ mfa.done:
test
-d
montreal-forced-aligner
||
wget https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner/releases/download/v1.0.1/montreal-forced-aligner_linux.tar.gz
tar
xvf montreal-forced-aligner_linux.tar.gz
touch
mfa.done
# Keep the existing target 'sclite' to avoid breaking the users who might have
# scripted it in.
.PHONY
:
sclite sctk_cleaned sctk_made
sclite sctk_made
:
sctk/.compiled
sctk/.compiled
:
sctk
rm
-f
sctk/.compiled
$(SCTK_MKENV)
$(MAKE)
-C
sctk config
$(SCTK_MKENV)
$(MAKE)
-C
sctk all doc
$(MAKE)
-C
sctk
install
touch
sctk/.compiled
# The GitHub archive unpacks into SCTK-{40-character-long-hash}/
sctk
:
sctk-$(SCTK_GITHASH).tar.gz
tar
zxvf sctk-
$(SCTK_GITHASH)
.tar.gz
rm
-rf
sctk-
$(SCTK_GITHASH)
sctk
mv
SCTK-
$(SCTK_GITHASH)
*
sctk-
$(SCTK_GITHASH)
ln
-s
sctk-
$(SCTK_GITHASH)
sctk
touch
sctk-
$(SCTK_GITHASH)
.tar.gz
sctk-$(SCTK_GITHASH).tar.gz
:
if
[
-d
'
$(DOWNLOAD_DIR)
'
]
;
then
\
cp
-p
'
$(DOWNLOAD_DIR)
/sctk-
$(SCTK_GITHASH)
.tar.gz'
.
;
\
else
\
$(WGET)
-nv
-T
10
-t
3
-O
sctk-
$(SCTK_GITHASH)
.tar.gz
\
https://github.com/usnistgov/SCTK/archive/
$(SCTK_GITHASH)
.tar.gz
;
\
fi
sctk_cleaned
:
-
for
d
in
sctk/ sctk-
*
/
;
do
\
[
!
-f
$$
d/.compiled
]
||
$(MAKE)
-C
$$
d clean
;
\
rm
-f
$$
d/.compiled
;
\
done
\ No newline at end of file
utils/filter.py
0 → 100644
浏览文件 @
9abe33b4
#!/usr/bin/env python3
# Apache 2.0
import
argparse
import
codecs
import
sys
is_python2
=
sys
.
version_info
[
0
]
==
2
def
get_parser
():
parser
=
argparse
.
ArgumentParser
(
description
=
"filter words in a text file"
,
formatter_class
=
argparse
.
ArgumentDefaultsHelpFormatter
,
)
parser
.
add_argument
(
"--exclude"
,
"-v"
,
dest
=
"exclude"
,
action
=
"store_true"
,
help
=
"exclude filter words"
,
)
parser
.
add_argument
(
"filt"
,
type
=
str
,
help
=
"filter list"
)
parser
.
add_argument
(
"infile"
,
type
=
str
,
help
=
"input file"
)
return
parser
def
main
(
args
):
args
=
get_parser
().
parse_args
(
args
)
filter_file
(
args
.
infile
,
args
.
filt
,
args
.
exclude
)
def
filter_file
(
infile
,
filt
,
exclude
):
vocab
=
set
()
with
codecs
.
open
(
filt
,
"r"
,
encoding
=
"utf-8"
)
as
vocabfile
:
for
line
in
vocabfile
:
vocab
.
add
(
line
.
strip
())
sys
.
stdout
=
codecs
.
getwriter
(
"utf-8"
)(
sys
.
stdout
if
is_python2
else
sys
.
stdout
.
buffer
)
with
codecs
.
open
(
infile
,
"r"
,
encoding
=
"utf-8"
)
as
textfile
:
for
line
in
textfile
:
if
exclude
:
print
(
" "
.
join
(
map
(
lambda
word
:
word
if
word
not
in
vocab
else
""
,
line
.
strip
().
split
(),
)
)
)
else
:
print
(
" "
.
join
(
map
(
lambda
word
:
word
if
word
in
vocab
else
"<UNK>"
,
line
.
strip
().
split
(),
)
)
)
if
__name__
==
"__main__"
:
main
(
sys
.
argv
[
1
:])
utils/score_sclite.sh
浏览文件 @
9abe33b4
...
...
@@ -5,6 +5,7 @@
[
-f
./path.sh
]
&&
.
./path.sh
# non language symbol
nlsyms
=
""
wer
=
false
bpe
=
""
...
...
@@ -24,7 +25,7 @@ fi
dir
=
$1
dic
=
$2
c
oncatjson.py
${
dir
}
/data.
*
.json
>
${
dir
}
/data.json
c
at
${
dir
}
/data.
*
.json
>
${
dir
}
/data.json
if
[
$num_spkrs
-eq
1
]
;
then
json2trn.py
${
dir
}
/data.json
${
dic
}
--num-spkrs
${
num_spkrs
}
--refs
${
dir
}
/ref.trn
--hyps
${
dir
}
/hyp.trn
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录