Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
47999c42
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
47999c42
编写于
2月 19, 2019
作者:
Y
Yan Xu
提交者:
GitHub
2月 19, 2019
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
cherry pick: multiple process launch utily test=release/1.3 (#15741)
上级
687fb401
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
37 addition
and
17 deletion
+37
-17
python/paddle/__init__.py
python/paddle/__init__.py
+1
-0
python/paddle/distributed/__init__.py
python/paddle/distributed/__init__.py
+13
-0
python/paddle/distributed/launch.py
python/paddle/distributed/launch.py
+22
-16
python/paddle/fluid/__init__.py
python/paddle/fluid/__init__.py
+0
-1
python/setup.py.in
python/setup.py.in
+1
-0
未找到文件。
python/paddle/__init__.py
浏览文件 @
47999c42
...
...
@@ -25,4 +25,5 @@ import paddle.reader
import
paddle.dataset
import
paddle.batch
import
paddle.compat
import
paddle.distributed
batch
=
batch
.
batch
python/paddle/distributed/__init__.py
0 → 100644
浏览文件 @
47999c42
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
tools/run_mp
.py
→
python/paddle/distributed/launch
.py
浏览文件 @
47999c42
...
...
@@ -37,7 +37,7 @@ default_envs = {
GPUS
=
8
def
start_procs
(
gpus
,
cmd
,
log_dir
):
def
start_procs
(
gpus
,
entrypoint
,
entrypoint_args
,
log_dir
):
procs
=
[]
log_fns
=
[]
os
.
system
(
"mkdir -p %s"
%
log_dir
)
...
...
@@ -73,12 +73,11 @@ def start_procs(gpus, cmd, log_dir):
"PADDLE_TRAINER_ENDPOINTS"
:
all_nodes_devices_endpoints
})
print
(
"starting process "
,
i
,
cmd
,
curr_env
)
print
(
"starting process "
,
i
,
entrypoint
,
entrypoint_args
,
curr_env
)
fn
=
open
(
"%s/workerlog.%d"
%
(
log_dir
,
i
),
"w"
)
log_fns
.
append
(
fn
)
procs
.
append
(
subprocess
.
Popen
(
cmd
.
strip
().
split
(
" "
),
stdout
=
fn
,
stderr
=
fn
,
env
=
curr_env
))
cmd
=
[
sys
.
executable
,
"-u"
,
entrypoint
]
+
entrypoint_args
procs
.
append
(
subprocess
.
Popen
(
cmd
,
stdout
=
fn
,
stderr
=
fn
,
env
=
curr_env
))
for
i
in
range
(
gpus
):
try
:
...
...
@@ -89,7 +88,8 @@ def start_procs(gpus, cmd, log_dir):
pass
def
main
():
def
parse_args
():
parser
=
argparse
.
ArgumentParser
(
description
=
'''start paddle training using multi-process mode.
NOTE: your train program ***must*** run as distributed nccl2 mode,
...
...
@@ -108,21 +108,27 @@ POD_IP (current node ip address, not needed for local training)
type
=
int
,
default
=
8
,
help
=
'start number of processes for every gpu'
)
parser
.
add_argument
(
'--cmd'
,
type
=
str
,
default
=
""
,
help
=
'command to run for each process, e.g. python train.py --lr 0.1'
)
parser
.
add_argument
(
'--log_dir'
,
type
=
str
,
default
=
"mylog"
,
help
=
'directory to put logs per process.'
)
args
=
parser
.
parse_args
()
if
args
.
cmd
==
""
:
parser
.
print_help
()
exit
(
0
)
start_procs
(
args
.
gpus
,
args
.
cmd
,
args
.
log_dir
)
parser
.
add_argument
(
'entrypoint_script'
,
type
=
str
,
help
=
"The entrypoint script to be launched in parallel,"
"followed by all the arguments for each process,"
"e.g. train.py --lr 0.1"
)
parser
.
add_argument
(
'entrypoint_args'
,
nargs
=
argparse
.
REMAINDER
)
return
parser
.
parse_args
()
def
main
():
args
=
parse_args
()
# launch multiple training process
start_procs
(
args
.
gpus
,
args
.
entrypoint_script
,
args
.
entrypoint_args
,
args
.
log_dir
)
if
__name__
==
"__main__"
:
...
...
python/paddle/fluid/__init__.py
浏览文件 @
47999c42
...
...
@@ -161,7 +161,6 @@ def __bootstrap__():
'times_excess_than_required_tmp_allocation'
,
'enable_inplace_whitelist'
]
core
.
init_gflags
([
sys
.
argv
[
0
]]
+
[
"--tryfromenv="
+
","
.
join
(
read_env_flags
)])
core
.
init_glog
(
sys
.
argv
[
0
])
...
...
python/setup.py.in
浏览文件 @
47999c42
...
...
@@ -100,6 +100,7 @@ packages=['paddle',
'paddle.utils',
'paddle.dataset',
'paddle.reader',
'paddle.distributed',
'paddle.fluid',
'paddle.fluid.imperative',
'paddle.fluid.proto',
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录