Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
5e513928
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
5e513928
编写于
4月 04, 2019
作者:
X
xjqbest
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix runtime error
test=develop
上级
514d727a
变更
7
隐藏空白更改
内联
并排
Showing
7 changed file
with
35 addition
and
19 deletion
+35
-19
paddle/fluid/framework/fleet/fleet_wrapper.cc
paddle/fluid/framework/fleet/fleet_wrapper.cc
+1
-0
paddle/fluid/framework/io/shell.cc
paddle/fluid/framework/io/shell.cc
+1
-1
python/paddle/fluid/executor.py
python/paddle/fluid/executor.py
+1
-1
python/paddle/fluid/incubate/fleet/parameter_server/__init__.py
.../paddle/fluid/incubate/fleet/parameter_server/__init__.py
+16
-6
python/paddle/fluid/tests/unittests/test_dataset.py
python/paddle/fluid/tests/unittests/test_dataset.py
+12
-8
python/paddle/fluid/trainer_desc.py
python/paddle/fluid/trainer_desc.py
+1
-1
python/paddle/fluid/trainer_factory.py
python/paddle/fluid/trainer_factory.py
+3
-2
未找到文件。
paddle/fluid/framework/fleet/fleet_wrapper.cc
浏览文件 @
5e513928
...
...
@@ -237,6 +237,7 @@ void FleetWrapper::PushDenseParamSync(
std
::
vector
<
paddle
::
ps
::
Region
>
regions
;
for
(
auto
&
t
:
var_names
)
{
Variable
*
var
=
scope
.
FindVar
(
t
);
CHECK
(
var
!=
nullptr
)
<<
"var["
<<
t
<<
"] not found"
;
LoDTensor
*
tensor
=
var
->
GetMutable
<
LoDTensor
>
();
float
*
g
=
tensor
->
mutable_data
<
float
>
(
place
);
paddle
::
ps
::
Region
reg
(
g
,
tensor
->
numel
());
...
...
paddle/fluid/framework/io/shell.cc
浏览文件 @
5e513928
...
...
@@ -126,7 +126,7 @@ static int shell_popen_fork_internal(const char* real_cmd, bool do_read,
}
close_open_fds_internal
();
if
(
execl
(
"/bin/
sh"
,
"
sh"
,
"-c"
,
real_cmd
,
NULL
)
<
0
)
{
if
(
execl
(
"/bin/
bash"
,
"ba
sh"
,
"-c"
,
real_cmd
,
NULL
)
<
0
)
{
return
-
1
;
}
exit
(
127
);
...
...
python/paddle/fluid/executor.py
浏览文件 @
5e513928
...
...
@@ -712,7 +712,7 @@ class Executor(object):
if
dataset
==
None
:
raise
RuntimeError
(
"dataset is needed and should be initialized"
)
if
self
.
place
==
paddle
.
fluid
.
CUDAPlace
(
):
if
not
isinstance
(
self
.
place
,
core
.
CPUPlace
):
raise
RuntimeError
(
"infer_from_dataset is verified on CPUPlace"
"We will open CUDAPlace in the future"
)
...
...
python/paddle/fluid/incubate/fleet/parameter_server/__init__.py
浏览文件 @
5e513928
...
...
@@ -123,18 +123,23 @@ class Fleet(object):
print
(
"You should run DistributedOptimizer.minimize() first"
)
sys
.
exit
(
-
1
)
def
init_worker
(
self
,
programs
):
def
init_worker
(
self
,
programs
,
scopes
=
None
):
"""
init_worker(): will be called by user. When a user knows current process is_server(), he/she
should call init_worker() to initialize global information about worker and connect
worker with pserver.
worker with pserver.
You should run startup program before init_worker.
Args:
programs(Program|list): a Program or a list of Programs
scopes(Scope|list): a Scope or a list of Scopes, default None.
"""
if
not
isinstance
(
programs
,
list
):
programs
=
[
programs
]
if
scopes
is
None
:
scopes
=
[
fluid
.
global_scope
()]
*
len
(
programs
)
if
len
(
scopes
)
!=
len
(
programs
):
print
(
"You should make sure len(scopes) == len(programs) or set scopes None"
)
sys
.
exit
(
-
1
)
if
self
.
_opt_info
:
if
"fleet_desc"
in
self
.
_opt_info
:
self
.
_dist_desc_str
=
text_format
.
MessageToString
(
...
...
@@ -160,7 +165,7 @@ class Fleet(object):
self
.
role_maker_
.
_barrier_worker
()
if
self
.
role_maker_
.
_is_first_worker
():
tables
=
self
.
_dist_desc
.
trainer_param
.
dense_table
for
prog
in
programs
:
for
prog
,
scope
in
zip
(
programs
,
scopes
)
:
prog_id
=
str
(
id
(
prog
))
prog_conf
=
self
.
_opt_info
[
'program_configs'
][
prog_id
]
prog_tables
=
{}
...
...
@@ -174,8 +179,13 @@ class Fleet(object):
continue
var_name_list
=
[]
for
i
in
range
(
0
,
len
(
table
.
dense_variable_name
)):
var_name_list
.
append
(
table
.
dense_variable_name
[
i
])
self
.
_fleet_ptr
.
init_model
(
prog
.
desc
,
var_name
=
table
.
dense_variable_name
[
i
]
if
scope
.
find_var
(
var_name
)
is
None
:
print
(
"var "
+
var_name
+
" not found in scope, "
"you should run startup program first"
)
sys
.
exit
(
-
1
)
var_name_list
.
append
(
var_name
)
self
.
_fleet_ptr
.
init_model
(
scope
,
int
(
table
.
table_id
),
var_name_list
)
# barrier for init model done
...
...
python/paddle/fluid/tests/unittests/test_dataset.py
浏览文件 @
5e513928
...
...
@@ -107,10 +107,12 @@ class TestDataset(unittest.TestCase):
exe
=
fluid
.
Executor
(
fluid
.
CPUPlace
())
exe
.
run
(
fluid
.
default_startup_program
())
for
i
in
range
(
2
):
try
:
exe
.
train_from_dataset
(
fluid
.
default_main_program
(),
dataset
)
except
:
self
.
assertTrue
(
False
)
#try:
exe
.
train_from_dataset
(
fluid
.
default_main_program
(),
dataset
)
#except ImportError as e:
# pass
#except Exception as e:
# self.assertTrue(False)
os
.
remove
(
"./test_in_memory_dataset_run_a.txt"
)
os
.
remove
(
"./test_in_memory_dataset_run_b.txt"
)
...
...
@@ -149,10 +151,12 @@ class TestDataset(unittest.TestCase):
exe
=
fluid
.
Executor
(
fluid
.
CPUPlace
())
exe
.
run
(
fluid
.
default_startup_program
())
for
i
in
range
(
2
):
try
:
exe
.
train_from_dataset
(
fluid
.
default_main_program
(),
dataset
)
except
:
self
.
assertTrue
(
False
)
#try:
exe
.
train_from_dataset
(
fluid
.
default_main_program
(),
dataset
)
#except ImportError as e:
# pass
#except Exception as e:
# self.assertTrue(False)
os
.
remove
(
"./test_queue_dataset_run_a.txt"
)
os
.
remove
(
"./test_queue_dataset_run_b.txt"
)
...
...
python/paddle/fluid/trainer_desc.py
浏览文件 @
5e513928
...
...
@@ -23,7 +23,7 @@ class TrainerDesc(object):
with open(proto_file, 'r') as f:
text_format.Parse(f.read(), self.proto_desc)
'''
from
proto
import
trainer_desc_pb2
from
.
proto
import
trainer_desc_pb2
self
.
proto_desc
=
trainer_desc_pb2
.
TrainerDesc
()
import
multiprocessing
as
mp
# set default thread num == cpu count
...
...
python/paddle/fluid/trainer_factory.py
浏览文件 @
5e513928
...
...
@@ -12,6 +12,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from
.trainer_desc
import
MultiTrainer
,
DistMultiTrainer
from
.device_worker
import
Hogwild
,
DownpourSGD
__all__
=
[
"TrainerFactory"
]
...
...
@@ -20,8 +23,6 @@ class TrainerFactory(object):
pass
def
_create_trainer
(
self
,
opt_info
=
None
):
from
.trainer_desc
import
MultiTrainer
,
DistMultiTrainer
from
.device_worker
import
Hogwild
,
DownpourSGD
trainer
=
None
device_worker
=
None
if
opt_info
==
None
:
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录