Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
a9422260
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 2 年 前同步成功
通知
210
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
a9422260
编写于
2月 24, 2022
作者:
H
Hui Zhang
提交者:
GitHub
2月 24, 2022
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #1483 from zh794390558/doc
[doc] update ctc loss compare
上级
fe350ddd
6b1fe701
变更
10
隐藏空白更改
内联
并排
Showing
10 changed file
with
109 addition
and
114 deletion
+109
-114
.pre-commit-config.yaml
.pre-commit-config.yaml
+2
-1
dataset/voxceleb/voxceleb1.py
dataset/voxceleb/voxceleb1.py
+4
-0
docs/topic/ctc/ctc_loss_compare.ipynb
docs/topic/ctc/ctc_loss_compare.ipynb
+73
-77
examples/ami/sd0/local/ami_prepare.py
examples/ami/sd0/local/ami_prepare.py
+6
-8
examples/voxceleb/sv0/local/make_voxceleb_kaldi_trial.py
examples/voxceleb/sv0/local/make_voxceleb_kaldi_trial.py
+18
-13
paddlespeech/__init__.py
paddlespeech/__init__.py
+0
-11
paddlespeech/cli/asr/infer.py
paddlespeech/cli/asr/infer.py
+2
-1
paddlespeech/s2t/io/utility.py
paddlespeech/s2t/io/utility.py
+1
-1
paddlespeech/t2s/datasets/dataset.py
paddlespeech/t2s/datasets/dataset.py
+1
-1
utils/DER.py
utils/DER.py
+2
-1
未找到文件。
.pre-commit-config.yaml
浏览文件 @
a9422260
...
...
@@ -50,12 +50,13 @@ repos:
entry
:
bash .pre-commit-hooks/clang-format.hook -i
language
:
system
files
:
\.(c|cc|cxx|cpp|cu|h|hpp|hxx|cuh|proto)$
exclude
:
(?=speechx/speechx/kaldi).*(\.cpp|\.cc|\.h|\.py)$
-
id
:
copyright_checker
name
:
copyright_checker
entry
:
python .pre-commit-hooks/copyright-check.hook
language
:
system
files
:
\.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto|py)$
exclude
:
(?=third_party|pypinyin
).*(\.cpp
|\.h|\.py)$
exclude
:
(?=third_party|pypinyin
|speechx/speechx/kaldi).*(\.cpp|\.cc
|\.h|\.py)$
-
repo
:
https://github.com/asottile/reorder_python_imports
rev
:
v2.4.0
hooks
:
...
...
dataset/voxceleb/voxceleb1.py
浏览文件 @
a9422260
...
...
@@ -80,6 +80,7 @@ parser.add_argument(
args
=
parser
.
parse_args
()
def
create_manifest
(
data_dir
,
manifest_path_prefix
):
print
(
"Creating manifest %s ..."
%
manifest_path_prefix
)
json_lines
=
[]
...
...
@@ -128,6 +129,7 @@ def create_manifest(data_dir, manifest_path_prefix):
print
(
f
"
{
total_text
/
total_sec
}
text/sec"
,
file
=
f
)
print
(
f
"
{
total_sec
/
total_num
}
sec/utt"
,
file
=
f
)
def
prepare_dataset
(
base_url
,
data_list
,
target_dir
,
manifest_path
,
target_data
):
if
not
os
.
path
.
exists
(
target_dir
):
...
...
@@ -164,6 +166,7 @@ def prepare_dataset(base_url, data_list, target_dir, manifest_path,
# create the manifest file
create_manifest
(
data_dir
=
target_dir
,
manifest_path_prefix
=
manifest_path
)
def
main
():
if
args
.
target_dir
.
startswith
(
'~'
):
args
.
target_dir
=
os
.
path
.
expanduser
(
args
.
target_dir
)
...
...
@@ -184,5 +187,6 @@ def main():
print
(
"Manifest prepare done!"
)
if
__name__
==
'__main__'
:
main
()
docs/topic/ctc/ctc_loss_compare.ipynb
浏览文件 @
a9422260
...
...
@@ -30,12 +30,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Cloning into 'warp-ctc'...\n",
"remote: Enumerating objects: 829, done.\u001b[K\n",
"remote: Total 829 (delta 0), reused 0 (delta 0), pack-reused 829\u001b[K\n",
"Receiving objects: 100% (829/829), 388.85 KiB | 140.00 KiB/s, done.\n",
"Resolving deltas: 100% (419/419), done.\n",
"Checking connectivity... done.\n"
"fatal: destination path 'warp-ctc' already exists and is not an empty directory.\r\n"
]
}
],
...
...
@@ -99,30 +94,6 @@
"name": "stdout",
"output_type": "stream",
"text": [
"-- The C compiler identification is GNU 5.4.0\n",
"-- The CXX compiler identification is GNU 5.4.0\n",
"-- Check for working C compiler: /usr/bin/cc\n",
"-- Check for working C compiler: /usr/bin/cc -- works\n",
"-- Detecting C compiler ABI info\n",
"-- Detecting C compiler ABI info - done\n",
"-- Detecting C compile features\n",
"-- Detecting C compile features - done\n",
"-- Check for working CXX compiler: /usr/bin/c++\n",
"-- Check for working CXX compiler: /usr/bin/c++ -- works\n",
"-- Detecting CXX compiler ABI info\n",
"-- Detecting CXX compiler ABI info - done\n",
"-- Detecting CXX compile features\n",
"-- Detecting CXX compile features - done\n",
"-- Looking for pthread.h\n",
"-- Looking for pthread.h - found\n",
"-- Performing Test CMAKE_HAVE_LIBC_PTHREAD\n",
"-- Performing Test CMAKE_HAVE_LIBC_PTHREAD - Failed\n",
"-- Looking for pthread_create in pthreads\n",
"-- Looking for pthread_create in pthreads - not found\n",
"-- Looking for pthread_create in pthread\n",
"-- Looking for pthread_create in pthread - found\n",
"-- Found Threads: TRUE \n",
"-- Found CUDA: /usr/local/cuda (found suitable version \"10.2\", minimum required is \"6.5\") \n",
"-- cuda found TRUE\n",
"-- Building shared library with GPU support\n",
"-- Configuring done\n",
...
...
@@ -145,20 +116,11 @@
"name": "stdout",
"output_type": "stream",
"text": [
"[ 11%] \u001b[34m\u001b[1mBuilding NVCC (Device) object CMakeFiles/warpctc.dir/src/warpctc_generated_reduce.cu.o\u001b[0m\n",
"[ 22%] \u001b[34m\u001b[1mBuilding NVCC (Device) object CMakeFiles/warpctc.dir/src/warpctc_generated_ctc_entrypoint.cu.o\u001b[0m\n",
"\u001b[35m\u001b[1mScanning dependencies of target warpctc\u001b[0m\n",
"[ 33%] \u001b[32m\u001b[1mLinking CXX shared library libwarpctc.so\u001b[0m\n",
"[ 11%] \u001b[32m\u001b[1mLinking CXX shared library libwarpctc.so\u001b[0m\n",
"[ 33%] Built target warpctc\n",
"[ 44%] \u001b[34m\u001b[1mBuilding NVCC (Device) object CMakeFiles/test_gpu.dir/tests/test_gpu_generated_test_gpu.cu.o\u001b[0m\n",
"\u001b[35m\u001b[1mScanning dependencies of target test_cpu\u001b[0m\n",
"[ 55%] \u001b[32mBuilding CXX object CMakeFiles/test_cpu.dir/tests/test_cpu.cpp.o\u001b[0m\n",
"[ 66%] \u001b[32mBuilding CXX object CMakeFiles/test_cpu.dir/tests/random.cpp.o\u001b[0m\n",
"[ 77%] \u001b[32m\u001b[1mLinking CXX executable test_cpu\u001b[0m\n",
"[ 44%] \u001b[32m\u001b[1mLinking CXX executable test_cpu\u001b[0m\n",
"[ 55%] \u001b[32m\u001b[1mLinking CXX executable test_gpu\u001b[0m\n",
"[ 77%] Built target test_cpu\n",
"\u001b[35m\u001b[1mScanning dependencies of target test_gpu\u001b[0m\n",
"[ 88%] \u001b[32mBuilding CXX object CMakeFiles/test_gpu.dir/tests/random.cpp.o\u001b[0m\n",
"[100%] \u001b[32m\u001b[1mLinking CXX executable test_gpu\u001b[0m\n",
"[100%] Built target test_gpu\n"
]
}
...
...
@@ -169,7 +131,7 @@
},
{
"cell_type": "code",
"execution_count":
10
,
"execution_count":
9
,
"id": "31761a31",
"metadata": {},
"outputs": [
...
...
@@ -187,7 +149,7 @@
},
{
"cell_type": "code",
"execution_count": 1
1
,
"execution_count": 1
0
,
"id": "f53316f6",
"metadata": {},
"outputs": [
...
...
@@ -205,7 +167,7 @@
},
{
"cell_type": "code",
"execution_count": 1
2
,
"execution_count": 1
1
,
"id": "084f1e49",
"metadata": {},
"outputs": [
...
...
@@ -216,29 +178,20 @@
"running install\n",
"running bdist_egg\n",
"running egg_info\n",
"creating warpctc_pytorch.egg-info\n",
"writing warpctc_pytorch.egg-info/PKG-INFO\n",
"writing dependency_links to warpctc_pytorch.egg-info/dependency_links.txt\n",
"writing top-level names to warpctc_pytorch.egg-info/top_level.txt\n",
"writing manifest file 'warpctc_pytorch.egg-info/SOURCES.txt'\n",
"writing manifest file 'warpctc_pytorch.egg-info/SOURCES.txt'\n",
"installing library code to build/bdist.linux-x86_64/egg\n",
"running install_lib\n",
"running build_py\n",
"creating build\n",
"creating build/lib.linux-x86_64-3.9\n",
"creating build/lib.linux-x86_64-3.9/warpctc_pytorch\n",
"copying warpctc_pytorch/__init__.py -> build/lib.linux-x86_64-3.9/warpctc_pytorch\n",
"running build_ext\n",
"building 'warpctc_pytorch._warp_ctc' extension\n",
"creating /workspace/zhanghui/DeepSpeech-2.x/docs/topic/ctc/warp-ctc/pytorch_binding/build/temp.linux-x86_64-3.9\n",
"creating /workspace/zhanghui/DeepSpeech-2.x/docs/topic/ctc/warp-ctc/pytorch_binding/build/temp.linux-x86_64-3.9/src\n",
"Emitting ninja build file /workspace/zhanghui/DeepSpeech-2.x/docs/topic/ctc/warp-ctc/pytorch_binding/build/temp.linux-x86_64-3.9/build.ninja...\n",
"Compiling objects...\n",
"Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n",
"
[1/1] c++ -MMD -MF /workspace/zhanghui/DeepSpeech-2.x/docs/topic/ctc/warp-ctc/pytorch_binding/build/temp.linux-x86_64-3.9/src/binding.o.d -pthread -B /workspace/zhanghui/DeepSpeech-2.x/tools/venv/compiler_compat -Wl,--sysroot=/ -Wno-unused-result -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /workspace/zhanghui/DeepSpeech-2.x/tools/venv/include -fPIC -O2 -isystem /workspace/zhanghui/DeepSpeech-2.x/tools/venv/include -fPIC -I/workspace/zhanghui/DeepSpeech-2.x/docs/topic/ctc/warp-ctc/include -I/workspace/zhanghui/DeepSpeech-2.x/tools/venv/lib/python3.9/site-packages/torch/include -I/workspace/zhanghui/DeepSpeech-2.x/tools/venv/lib/python3.9/site-packages/torch/include/torch/csrc/api/include -I/workspace/zhanghui/DeepSpeech-2.x/tools/venv/lib/python3.9/site-packages/torch/include/TH -I/workspace/zhanghui/DeepSpeech-2.x/tools/venv/lib/python3.9/site-packages/torch/include/THC -I/usr/local/cuda/include -I/workspace/zhanghui/DeepSpeech-2.x/tools/venv/include/python3.9 -c -c /workspace/zhanghui/DeepSpeech-2.x/docs/topic/ctc/warp-ctc/pytorch_binding/src/binding.cpp -o /workspace/zhanghui/DeepSpeech-2.x/docs/topic/ctc/warp-ctc/pytorch_binding/build/temp.linux-x86_64-3.9/src/binding.o -std=c++14 -fPIC -DWARPCTC_ENABLE_GPU -DTORCH_API_INCLUDE_EXTENSION_H '-DPYBIND11_COMPILER_TYPE=\"_gcc\"' '-DPYBIND11_STDLIB=\"_libstdcpp\"' '-DPYBIND11_BUILD_ABI=\"_cxxabi1011\"' -DTORCH_EXTENSION_NAME=_warp_ctc -D_GLIBCXX_USE_CXX11_ABI=0
\n",
"
ninja: no work to do.
\n",
"g++ -pthread -B /workspace/zhanghui/DeepSpeech-2.x/tools/venv/compiler_compat -Wl,--sysroot=/ -shared -Wl,-rpath,/workspace/zhanghui/DeepSpeech-2.x/tools/venv/lib -Wl,-rpath-link,/workspace/zhanghui/DeepSpeech-2.x/tools/venv/lib -L/workspace/zhanghui/DeepSpeech-2.x/tools/venv/lib -Wl,-rpath,/workspace/zhanghui/DeepSpeech-2.x/tools/venv/lib -Wl,-rpath-link,/workspace/zhanghui/DeepSpeech-2.x/tools/venv/lib -L/workspace/zhanghui/DeepSpeech-2.x/tools/venv/lib /workspace/zhanghui/DeepSpeech-2.x/docs/topic/ctc/warp-ctc/pytorch_binding/build/temp.linux-x86_64-3.9/src/binding.o -L/workspace/zhanghui/DeepSpeech-2.x/docs/topic/ctc/warp-ctc/build -L/workspace/zhanghui/DeepSpeech-2.x/tools/venv/lib/python3.9/site-packages/torch/lib -L/usr/local/cuda/lib64 -lwarpctc -lc10 -ltorch -ltorch_cpu -ltorch_python -lcudart -lc10_cuda -ltorch_cuda -o build/lib.linux-x86_64-3.9/warpctc_pytorch/_warp_ctc.cpython-39-x86_64-linux-gnu.so -Wl,-rpath,/workspace/zhanghui/DeepSpeech-2.x/docs/topic/ctc/warp-ctc/build\n",
"creating build/bdist.linux-x86_64\n",
"creating build/bdist.linux-x86_64/egg\n",
"creating build/bdist.linux-x86_64/egg/warpctc_pytorch\n",
"copying build/lib.linux-x86_64-3.9/warpctc_pytorch/__init__.py -> build/bdist.linux-x86_64/egg/warpctc_pytorch\n",
...
...
@@ -254,7 +207,6 @@
"writing build/bdist.linux-x86_64/egg/EGG-INFO/native_libs.txt\n",
"zip_safe flag not set; analyzing archive contents...\n",
"warpctc_pytorch.__pycache__._warp_ctc.cpython-39: module references __file__\n",
"creating dist\n",
"creating 'dist/warpctc_pytorch-0.1-py3.9-linux-x86_64.egg' and adding 'build/bdist.linux-x86_64/egg' to it\n",
"removing 'build/bdist.linux-x86_64/egg' (and everything under it)\n",
"Processing warpctc_pytorch-0.1-py3.9-linux-x86_64.egg\n",
...
...
@@ -275,7 +227,7 @@
},
{
"cell_type": "code",
"execution_count": 1
3
,
"execution_count": 1
2
,
"id": "ee4ca9e3",
"metadata": {},
"outputs": [
...
...
@@ -293,7 +245,7 @@
},
{
"cell_type": "code",
"execution_count": 1
4
,
"execution_count": 1
3
,
"id": "59255ed8",
"metadata": {},
"outputs": [
...
...
@@ -311,21 +263,14 @@
},
{
"cell_type": "code",
"execution_count":
15
,
"execution_count":
22
,
"id": "1dae09b9",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"grep: warning: GREP_OPTIONS is deprecated; please use an alias or script\n"
]
}
],
"outputs": [],
"source": [
"import torch\n",
"import torch.nn as nn\n",
"import torch.nn.functional as F\n",
"import warpctc_pytorch as wp\n",
"import paddle.nn as pn\n",
"import paddle"
...
...
@@ -333,7 +278,7 @@
},
{
"cell_type": "code",
"execution_count": 1
6
,
"execution_count": 1
5
,
"id": "83d0762e",
"metadata": {},
"outputs": [
...
...
@@ -343,7 +288,7 @@
"'1.10.0+cu102'"
]
},
"execution_count": 1
6
,
"execution_count": 1
5
,
"metadata": {},
"output_type": "execute_result"
}
...
...
@@ -354,17 +299,17 @@
},
{
"cell_type": "code",
"execution_count": 1
7
,
"execution_count": 1
6
,
"id": "62501e2c",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'2.2.
0
'"
"'2.2.
1
'"
]
},
"execution_count": 1
7
,
"execution_count": 1
6
,
"metadata": {},
"output_type": "execute_result"
}
...
...
@@ -375,7 +320,7 @@
},
{
"cell_type": "code",
"execution_count": 1
8
,
"execution_count": 1
7
,
"id": "9e8e0f40",
"metadata": {},
"outputs": [
...
...
@@ -392,6 +337,7 @@
}
],
"source": [
"# warpctc_pytorch CTCLoss\n",
"probs = torch.FloatTensor([[\n",
" [0.1, 0.6, 0.1, 0.1, 0.1], [0.1, 0.1, 0.6, 0.1, 0.1]\n",
" ]]).transpose(0, 1).contiguous()\n",
...
...
@@ -412,7 +358,7 @@
},
{
"cell_type": "code",
"execution_count": 1
9
,
"execution_count": 1
8
,
"id": "2cd46569",
"metadata": {},
"outputs": [
...
...
@@ -428,6 +374,7 @@
}
],
"source": [
"# pytorch CTCLoss\n",
"probs = torch.FloatTensor([[\n",
" [0.1, 0.6, 0.1, 0.1, 0.1], [0.1, 0.1, 0.6, 0.1, 0.1]\n",
" ]]).transpose(0, 1).contiguous()\n",
...
...
@@ -449,7 +396,7 @@
},
{
"cell_type": "code",
"execution_count": 2
2
,
"execution_count": 2
7
,
"id": "85c3461a",
"metadata": {},
"outputs": [
...
...
@@ -467,6 +414,7 @@
}
],
"source": [
"# Paddle CTCLoss\n",
"paddle.set_device('cpu')\n",
"probs = paddle.to_tensor([[\n",
" [0.1, 0.6, 0.1, 0.1, 0.1], [0.1, 0.1, 0.6, 0.1, 0.1],\n",
...
...
@@ -490,7 +438,55 @@
{
"cell_type": "code",
"execution_count": null,
"id": "d390cd91",
"id": "8cdf76c2",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 26,
"id": "2c305eaf",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"torch.Size([2, 1, 5])\n",
"2.4628584384918213\n",
"[[[ 0.17703117 -0.7081247 0.17703117 0.17703117 0.17703117]]\n",
"\n",
" [[ 0.17703117 0.17703117 -0.7081247 0.17703117 0.17703117]]]\n"
]
}
],
"source": [
"# warpctc_pytorch CTCLoss, log_softmax idempotent\n",
"probs = torch.FloatTensor([[\n",
" [0.1, 0.6, 0.1, 0.1, 0.1], [0.1, 0.1, 0.6, 0.1, 0.1]\n",
" ]]).transpose(0, 1).contiguous()\n",
"print(probs.size())\n",
"labels = torch.IntTensor([1, 2])\n",
"label_sizes = torch.IntTensor([2])\n",
"probs_sizes = torch.IntTensor([2])\n",
"probs.requires_grad_(True)\n",
"bs = probs.size(1)\n",
"\n",
"ctc_loss = wp.CTCLoss(size_average=False, length_average=False)\n",
"\n",
"log_probs = torch.log_softmax(probs, axis=-1)\n",
"cost = ctc_loss(log_probs, labels, probs_sizes, label_sizes)\n",
"cost = cost.sum() / bs\n",
"print(cost.item())\n",
"cost.backward()\n",
"print(probs.grad.numpy())"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "443336f0",
"metadata": {},
"outputs": [],
"source": []
...
...
examples/ami/sd0/local/ami_prepare.py
浏览文件 @
a9422260
...
...
@@ -22,19 +22,17 @@ Authors
* qingenz123@126.com (Qingen ZHAO) 2022
"""
import
os
import
logging
import
argparse
import
xml.etree.ElementTree
as
et
import
glob
import
json
from
ami_splits
import
get_AMI_split
import
logging
import
os
import
xml.etree.ElementTree
as
et
from
distutils.util
import
strtobool
from
dataio
import
(
load_pkl
,
save_pkl
,
)
from
ami_splits
import
get_AMI_split
from
dataio
import
load_pkl
from
dataio
import
save_pkl
logger
=
logging
.
getLogger
(
__name__
)
SAMPLERATE
=
16000
...
...
examples/voxceleb/sv0/local/make_voxceleb_kaldi_trial.py
浏览文件 @
a9422260
...
...
@@ -12,28 +12,30 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Make VoxCeleb1 trial of kaldi format
this script creat the test trial from kaldi trial voxceleb1_test_v2.txt or official trial veri_test2.txt
to kaldi trial format
"""
import
argparse
import
codecs
import
os
parser
=
argparse
.
ArgumentParser
(
description
=
__doc__
)
parser
.
add_argument
(
"--voxceleb_trial"
,
default
=
"voxceleb1_test_v2"
,
type
=
str
,
help
=
"VoxCeleb trial file. Default we use the kaldi trial voxceleb1_test_v2.txt"
)
parser
.
add_argument
(
"--trial"
,
default
=
"data/test/trial"
,
type
=
str
,
help
=
"Kaldi format trial file"
)
parser
.
add_argument
(
"--voxceleb_trial"
,
default
=
"voxceleb1_test_v2"
,
type
=
str
,
help
=
"VoxCeleb trial file. Default we use the kaldi trial voxceleb1_test_v2.txt"
)
parser
.
add_argument
(
"--trial"
,
default
=
"data/test/trial"
,
type
=
str
,
help
=
"Kaldi format trial file"
)
args
=
parser
.
parse_args
()
def
main
(
voxceleb_trial
,
trial
):
"""
VoxCeleb provide several trial file, which format is different with kaldi format.
...
...
@@ -58,7 +60,9 @@ def main(voxceleb_trial, trial):
"""
print
(
"Start convert the voxceleb trial to kaldi format"
)
if
not
os
.
path
.
exists
(
voxceleb_trial
):
raise
RuntimeError
(
"{} does not exist. Pleas input the correct file path"
.
format
(
voxceleb_trial
))
raise
RuntimeError
(
"{} does not exist. Pleas input the correct file path"
.
format
(
voxceleb_trial
))
trial_dirname
=
os
.
path
.
dirname
(
trial
)
if
not
os
.
path
.
exists
(
trial_dirname
):
...
...
@@ -66,9 +70,9 @@ def main(voxceleb_trial, trial):
with
codecs
.
open
(
voxceleb_trial
,
'r'
,
encoding
=
'utf-8'
)
as
f
,
\
codecs
.
open
(
trial
,
'w'
,
encoding
=
'utf-8'
)
as
w
:
for
line
in
f
:
for
line
in
f
:
target_or_nontarget
,
path1
,
path2
=
line
.
strip
().
split
()
utt_id1
=
"-"
.
join
(
path1
.
split
(
"/"
))
utt_id2
=
"-"
.
join
(
path2
.
split
(
"/"
))
target
=
"nontarget"
...
...
@@ -77,5 +81,6 @@ def main(voxceleb_trial, trial):
w
.
write
(
"{} {} {}
\n
"
.
format
(
utt_id1
,
utt_id2
,
target
))
print
(
"Convert the voxceleb trial to kaldi format successfully"
)
if
__name__
==
"__main__"
:
main
(
args
.
voxceleb_trial
,
args
.
trial
)
paddlespeech/__init__.py
浏览文件 @
a9422260
...
...
@@ -11,14 +11,3 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
paddlespeech/cli/asr/infer.py
浏览文件 @
a9422260
...
...
@@ -413,7 +413,8 @@ class ASRExecutor(BaseExecutor):
def
_check
(
self
,
audio_file
:
str
,
sample_rate
:
int
,
force_yes
:
bool
):
self
.
sample_rate
=
sample_rate
if
self
.
sample_rate
!=
16000
and
self
.
sample_rate
!=
8000
:
logger
.
error
(
"invalid sample rate, please input --sr 8000 or --sr 16000"
)
logger
.
error
(
"invalid sample rate, please input --sr 8000 or --sr 16000"
)
return
False
if
isinstance
(
audio_file
,
(
str
,
os
.
PathLike
)):
...
...
paddlespeech/s2t/io/utility.py
浏览文件 @
a9422260
...
...
@@ -11,8 +11,8 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
typing
import
List
from
io
import
BytesIO
from
typing
import
List
import
numpy
as
np
...
...
paddlespeech/t2s/datasets/dataset.py
浏览文件 @
a9422260
...
...
@@ -258,4 +258,4 @@ class ChainDataset(Dataset):
return
dataset
[
i
]
i
-=
len
(
dataset
)
raise
IndexError
(
"dataset index out of range"
)
\ No newline at end of file
raise
IndexError
(
"dataset index out of range"
)
utils/DER.py
浏览文件 @
a9422260
...
...
@@ -23,10 +23,11 @@ Credits
This code is adapted from https://github.com/nryant/dscore
"""
import
argparse
from
distutils.util
import
strtobool
import
os
import
re
import
subprocess
from
distutils.util
import
strtobool
import
numpy
as
np
FILE_IDS
=
re
.
compile
(
r
"(?<=Speaker Diarization for).+(?=\*\*\*)"
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录