Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
429221dc
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 2 年 前同步成功
通知
210
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
429221dc
编写于
6月 29, 2022
作者:
H
huangyuxin
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
adopt multi machine traiing
上级
ac1b3016
变更
4
显示空白变更内容
内联
并排
Showing
4 changed file
with
11 addition
and
6 deletion
+11
-6
examples/wenetspeech/asr1/conf/conformer.yaml
examples/wenetspeech/asr1/conf/conformer.yaml
+1
-1
examples/wenetspeech/asr1/local/train.sh
examples/wenetspeech/asr1/local/train.sh
+1
-2
paddlespeech/audio/streamdata/shardlists.py
paddlespeech/audio/streamdata/shardlists.py
+2
-0
paddlespeech/audio/streamdata/utils.py
paddlespeech/audio/streamdata/utils.py
+7
-3
未找到文件。
examples/wenetspeech/asr1/conf/conformer.yaml
浏览文件 @
429221dc
...
@@ -67,7 +67,7 @@ maxlen_out: 150 # if output length(number of tokens) > maxlen-out, data is auto
...
@@ -67,7 +67,7 @@ maxlen_out: 150 # if output length(number of tokens) > maxlen-out, data is auto
resample_rate
:
16000
resample_rate
:
16000
shuffle_size
:
1500
shuffle_size
:
1500
sort_size
:
1000
sort_size
:
1000
num_workers
:
0
num_workers
:
8
prefetch_factor
:
10
prefetch_factor
:
10
dist_sampler
:
True
dist_sampler
:
True
num_encs
:
1
num_encs
:
1
...
...
examples/wenetspeech/asr1/local/train.sh
浏览文件 @
429221dc
...
@@ -45,8 +45,7 @@ python3 -u ${BIN_DIR}/train.py \
...
@@ -45,8 +45,7 @@ python3 -u ${BIN_DIR}/train.py \
--benchmark-batch-size
${
benchmark_batch_size
}
\
--benchmark-batch-size
${
benchmark_batch_size
}
\
--benchmark-max-step
${
benchmark_max_step
}
--benchmark-max-step
${
benchmark_max_step
}
else
else
#NCCL_SOCKET_IFNAME=eth0
NCCL_SOCKET_IFNAME
=
eth0 python3
-m
paddle.distributed.launch
--gpus
=
${
CUDA_VISIBLE_DEVICES
}
${
ips_config
}
${
BIN_DIR
}
/train.py
\
python3
-m
paddle.distributed.launch
--gpus
=
${
CUDA_VISIBLE_DEVICES
}
${
ips_config
}
${
BIN_DIR
}
/train.py
\
--ngpu
${
ngpu
}
\
--ngpu
${
ngpu
}
\
--seed
${
seed
}
\
--seed
${
seed
}
\
--config
${
config_path
}
\
--config
${
config_path
}
\
...
...
paddlespeech/audio/streamdata/shardlists.py
浏览文件 @
429221dc
...
@@ -65,6 +65,7 @@ class SimpleShardList(IterableDataset):
...
@@ -65,6 +65,7 @@ class SimpleShardList(IterableDataset):
def
split_by_node
(
src
,
group
=
None
):
def
split_by_node
(
src
,
group
=
None
):
rank
,
world_size
,
worker
,
num_workers
=
utils
.
paddle_worker_info
(
group
=
group
)
rank
,
world_size
,
worker
,
num_workers
=
utils
.
paddle_worker_info
(
group
=
group
)
logger
.
info
(
f
"world_size:
{
world_size
}
, rank:
{
rank
}
"
)
if
world_size
>
1
:
if
world_size
>
1
:
for
s
in
islice
(
src
,
rank
,
None
,
world_size
):
for
s
in
islice
(
src
,
rank
,
None
,
world_size
):
yield
s
yield
s
...
@@ -83,6 +84,7 @@ def single_node_only(src, group=None):
...
@@ -83,6 +84,7 @@ def single_node_only(src, group=None):
def
split_by_worker
(
src
):
def
split_by_worker
(
src
):
rank
,
world_size
,
worker
,
num_workers
=
utils
.
paddle_worker_info
()
rank
,
world_size
,
worker
,
num_workers
=
utils
.
paddle_worker_info
()
logger
.
info
(
f
"num_workers:
{
num_workers
}
, worker:
{
worker
}
"
)
if
num_workers
>
1
:
if
num_workers
>
1
:
for
s
in
islice
(
src
,
worker
,
None
,
num_workers
):
for
s
in
islice
(
src
,
worker
,
None
,
num_workers
):
yield
s
yield
s
...
...
paddlespeech/audio/streamdata/utils.py
浏览文件 @
429221dc
...
@@ -16,6 +16,9 @@ import re
...
@@ -16,6 +16,9 @@ import re
import
sys
import
sys
from
typing
import
Any
,
Callable
,
Iterator
,
Optional
,
Union
from
typing
import
Any
,
Callable
,
Iterator
,
Optional
,
Union
from
..utils.log
import
Logger
logger
=
Logger
(
__name__
)
def
make_seed
(
*
args
):
def
make_seed
(
*
args
):
seed
=
0
seed
=
0
...
@@ -112,13 +115,14 @@ def paddle_worker_info(group=None):
...
@@ -112,13 +115,14 @@ def paddle_worker_info(group=None):
num_workers
=
int
(
os
.
environ
[
"NUM_WORKERS"
])
num_workers
=
int
(
os
.
environ
[
"NUM_WORKERS"
])
else
:
else
:
try
:
try
:
import
paddle.io.get_worker_info
from
paddle.io
import
get_worker_info
worker_info
=
paddle
.
io
.
get_worker_info
()
worker_info
=
paddle
.
io
.
get_worker_info
()
if
worker_info
is
not
None
:
if
worker_info
is
not
None
:
worker
=
worker_info
.
id
worker
=
worker_info
.
id
num_workers
=
worker_info
.
num_workers
num_workers
=
worker_info
.
num_workers
except
ModuleNotFoundError
:
except
ModuleNotFoundError
as
E
:
pass
logger
.
info
(
f
"not found
{
E
}
"
)
exit
(
-
1
)
return
rank
,
world_size
,
worker
,
num_workers
return
rank
,
world_size
,
worker
,
num_workers
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录