Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
6ec69212
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 2 年 前同步成功
通知
210
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
6ec69212
编写于
6月 29, 2022
作者:
H
huangyuxin
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'webdataset' of
https://github.com/Jackwaterveg/DeepSpeech
into webdataset
上级
1dd23a8b
429221dc
变更
6
显示空白变更内容
内联
并排
Showing
6 changed file
with
13 addition
and
8 deletion
+13
-8
examples/wenetspeech/asr1/conf/conformer.yaml
examples/wenetspeech/asr1/conf/conformer.yaml
+1
-1
examples/wenetspeech/asr1/local/train.sh
examples/wenetspeech/asr1/local/train.sh
+1
-2
paddlespeech/audio/streamdata/shardlists.py
paddlespeech/audio/streamdata/shardlists.py
+2
-0
paddlespeech/audio/streamdata/utils.py
paddlespeech/audio/streamdata/utils.py
+7
-3
paddlespeech/cli/asr/infer.py
paddlespeech/cli/asr/infer.py
+1
-1
paddlespeech/s2t/io/dataloader.py
paddlespeech/s2t/io/dataloader.py
+1
-1
未找到文件。
examples/wenetspeech/asr1/conf/conformer.yaml
浏览文件 @
6ec69212
...
@@ -67,7 +67,7 @@ maxlen_out: 150 # if output length(number of tokens) > maxlen-out, data is auto
...
@@ -67,7 +67,7 @@ maxlen_out: 150 # if output length(number of tokens) > maxlen-out, data is auto
resample_rate
:
16000
resample_rate
:
16000
shuffle_size
:
1500
shuffle_size
:
1500
sort_size
:
1000
sort_size
:
1000
num_workers
:
0
num_workers
:
8
prefetch_factor
:
10
prefetch_factor
:
10
dist_sampler
:
True
dist_sampler
:
True
num_encs
:
1
num_encs
:
1
...
...
examples/wenetspeech/asr1/local/train.sh
浏览文件 @
6ec69212
...
@@ -45,8 +45,7 @@ python3 -u ${BIN_DIR}/train.py \
...
@@ -45,8 +45,7 @@ python3 -u ${BIN_DIR}/train.py \
--benchmark-batch-size
${
benchmark_batch_size
}
\
--benchmark-batch-size
${
benchmark_batch_size
}
\
--benchmark-max-step
${
benchmark_max_step
}
--benchmark-max-step
${
benchmark_max_step
}
else
else
#NCCL_SOCKET_IFNAME=eth0
NCCL_SOCKET_IFNAME
=
eth0 python3
-m
paddle.distributed.launch
--gpus
=
${
CUDA_VISIBLE_DEVICES
}
${
ips_config
}
${
BIN_DIR
}
/train.py
\
python3
-m
paddle.distributed.launch
--gpus
=
${
CUDA_VISIBLE_DEVICES
}
${
ips_config
}
${
BIN_DIR
}
/train.py
\
--ngpu
${
ngpu
}
\
--ngpu
${
ngpu
}
\
--seed
${
seed
}
\
--seed
${
seed
}
\
--config
${
config_path
}
\
--config
${
config_path
}
\
...
...
paddlespeech/audio/streamdata/shardlists.py
浏览文件 @
6ec69212
...
@@ -65,6 +65,7 @@ class SimpleShardList(IterableDataset):
...
@@ -65,6 +65,7 @@ class SimpleShardList(IterableDataset):
def
split_by_node
(
src
,
group
=
None
):
def
split_by_node
(
src
,
group
=
None
):
rank
,
world_size
,
worker
,
num_workers
=
utils
.
paddle_worker_info
(
group
=
group
)
rank
,
world_size
,
worker
,
num_workers
=
utils
.
paddle_worker_info
(
group
=
group
)
logger
.
info
(
f
"world_size:
{
world_size
}
, rank:
{
rank
}
"
)
if
world_size
>
1
:
if
world_size
>
1
:
for
s
in
islice
(
src
,
rank
,
None
,
world_size
):
for
s
in
islice
(
src
,
rank
,
None
,
world_size
):
yield
s
yield
s
...
@@ -83,6 +84,7 @@ def single_node_only(src, group=None):
...
@@ -83,6 +84,7 @@ def single_node_only(src, group=None):
def
split_by_worker
(
src
):
def
split_by_worker
(
src
):
rank
,
world_size
,
worker
,
num_workers
=
utils
.
paddle_worker_info
()
rank
,
world_size
,
worker
,
num_workers
=
utils
.
paddle_worker_info
()
logger
.
info
(
f
"num_workers:
{
num_workers
}
, worker:
{
worker
}
"
)
if
num_workers
>
1
:
if
num_workers
>
1
:
for
s
in
islice
(
src
,
worker
,
None
,
num_workers
):
for
s
in
islice
(
src
,
worker
,
None
,
num_workers
):
yield
s
yield
s
...
...
paddlespeech/audio/streamdata/utils.py
浏览文件 @
6ec69212
...
@@ -16,6 +16,9 @@ import re
...
@@ -16,6 +16,9 @@ import re
import
sys
import
sys
from
typing
import
Any
,
Callable
,
Iterator
,
Optional
,
Union
from
typing
import
Any
,
Callable
,
Iterator
,
Optional
,
Union
from
..utils.log
import
Logger
logger
=
Logger
(
__name__
)
def
make_seed
(
*
args
):
def
make_seed
(
*
args
):
seed
=
0
seed
=
0
...
@@ -112,13 +115,14 @@ def paddle_worker_info(group=None):
...
@@ -112,13 +115,14 @@ def paddle_worker_info(group=None):
num_workers
=
int
(
os
.
environ
[
"NUM_WORKERS"
])
num_workers
=
int
(
os
.
environ
[
"NUM_WORKERS"
])
else
:
else
:
try
:
try
:
import
paddle.io.get_worker_info
from
paddle.io
import
get_worker_info
worker_info
=
paddle
.
io
.
get_worker_info
()
worker_info
=
paddle
.
io
.
get_worker_info
()
if
worker_info
is
not
None
:
if
worker_info
is
not
None
:
worker
=
worker_info
.
id
worker
=
worker_info
.
id
num_workers
=
worker_info
.
num_workers
num_workers
=
worker_info
.
num_workers
except
ModuleNotFoundError
:
except
ModuleNotFoundError
as
E
:
pass
logger
.
info
(
f
"not found
{
E
}
"
)
exit
(
-
1
)
return
rank
,
world_size
,
worker
,
num_workers
return
rank
,
world_size
,
worker
,
num_workers
...
...
paddlespeech/cli/asr/infer.py
浏览文件 @
6ec69212
...
@@ -33,7 +33,7 @@ from ..log import logger
...
@@ -33,7 +33,7 @@ from ..log import logger
from
..utils
import
CLI_TIMER
from
..utils
import
CLI_TIMER
from
..utils
import
stats_wrapper
from
..utils
import
stats_wrapper
from
..utils
import
timer_register
from
..utils
import
timer_register
from
paddlespeech.
s2t.audio
.transformation
import
Transformation
from
paddlespeech.
audio.transform
.transformation
import
Transformation
from
paddlespeech.s2t.frontend.featurizer.text_featurizer
import
TextFeaturizer
from
paddlespeech.s2t.frontend.featurizer.text_featurizer
import
TextFeaturizer
from
paddlespeech.s2t.utils.utility
import
UpdateConfig
from
paddlespeech.s2t.utils.utility
import
UpdateConfig
...
...
paddlespeech/s2t/io/dataloader.py
浏览文件 @
6ec69212
...
@@ -104,7 +104,7 @@ class StreamDataLoader():
...
@@ -104,7 +104,7 @@ class StreamDataLoader():
if
self
.
dist_sampler
:
if
self
.
dist_sampler
:
base_dataset
=
streamdata
.
DataPipeline
(
base_dataset
=
streamdata
.
DataPipeline
(
streamdata
.
SimpleShardList
(
shardlist
),
streamdata
.
SimpleShardList
(
shardlist
),
streamdata
.
split_by_node
,
streamdata
.
split_by_node
if
train_mode
else
streamdata
.
placeholder
()
,
streamdata
.
split_by_worker
,
streamdata
.
split_by_worker
,
streamdata
.
tarfile_to_samples
(
streamdata
.
reraise_exception
)
streamdata
.
tarfile_to_samples
(
streamdata
.
reraise_exception
)
)
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录