Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
016a0687
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
016a0687
编写于
12月 10, 2018
作者:
H
heqiaozhi
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
stop server
上级
8e3fe2d7
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
23 addition
and
10 deletion
+23
-10
paddle/fluid/framework/async_executor.cc
paddle/fluid/framework/async_executor.cc
+4
-0
paddle/fluid/framework/async_executor.h
paddle/fluid/framework/async_executor.h
+1
-0
paddle/fluid/framework/executor_thread_worker.cc
paddle/fluid/framework/executor_thread_worker.cc
+9
-9
paddle/fluid/pybind/async_executor_py.cc
paddle/fluid/pybind/async_executor_py.cc
+1
-0
python/paddle/fluid/async_executor.py
python/paddle/fluid/async_executor.py
+8
-1
未找到文件。
paddle/fluid/framework/async_executor.cc
浏览文件 @
016a0687
...
...
@@ -83,6 +83,10 @@ uint64_t AsyncExecutor::StartServer() {
return
_pslib_ptr
->
run_server
();
}
void
AsyncExecutor
::
StopServer
()
{
_pslib_ptr
->
stop_server
();
}
void
AsyncExecutor
::
GatherServers
(
std
::
vector
<
uint64_t
>&
host_sign_list
,
int
node_num
)
{
_pslib_ptr
->
gather_servers
(
host_sign_list
.
data
(),
node_num
);
}
...
...
paddle/fluid/framework/async_executor.h
浏览文件 @
016a0687
...
...
@@ -67,6 +67,7 @@ class AsyncExecutor {
void
InitWorker
(
const
std
::
string
&
dist_desc
,
std
::
vector
<
uint64_t
>&
host_sign_list
,
int
node_num
,
int
index
);
//void ConfigWorker() {}
uint64_t
StartServer
();
void
StopServer
();
void
GatherServers
(
std
::
vector
<
uint64_t
>&
host_sign_list
,
int
node_num
);
void
InitModel
();
void
SaveModel
(
const
std
::
string
&
path
);
...
...
paddle/fluid/framework/executor_thread_worker.cc
浏览文件 @
016a0687
...
...
@@ -569,7 +569,6 @@ void AsyncExecutorThreadWorker::FillSparse(int table_id) {
}
void
AsyncExecutorThreadWorker
::
PushSparse
(
int
table_id
)
{
auto
slot_dim
=
_param_config
->
slot_dim
;
//TODO
auto
fea_dim
=
_param_config
->
fea_dim
;
//_current_train_job.fea_dim();TODO
auto
&
features
=
_features
[
table_id
];
...
...
@@ -592,19 +591,20 @@ void AsyncExecutorThreadWorker::PushSparse(int table_id) {
}
Variable
*
g_var
=
thread_scope_
->
FindVar
(
_param_config
->
gradient_var
[
table_id
][
slot_idx
-
1
]);
LoDTensor
*
g_tensor
=
g_var
->
GetMutable
<
LoDTensor
>
();
//int count = g_tensor->numel();
float
*
g
=
g_tensor
->
data
<
float
>
();
/*
if (FLAGS_scale_sparse_gradient_with_batch_size) {
Eigen::Map<Eigen::MatrixXf> g_mat(g, 1, tensor->numel());
g_mat *= _batch_size;
if
(
g_tensor
==
NULL
)
{
LOG
(
ERROR
)
<<
"var["
<<
_param_config
->
gradient_var
[
table_id
][
slot_idx
-
1
]
<<
"] not found"
;
exit
(
-
1
);
}
*/
float
*
g
=
g_tensor
->
data
<
float
>
();
Variable
*
var
=
thread_scope_
->
FindVar
(
feed_vec
[
slot_idx
]);
LoDTensor
*
tensor
=
var
->
GetMutable
<
LoDTensor
>
();
if
(
tensor
==
NULL
)
{
LOG
(
ERROR
)
<<
"var["
<<
feed_vec
[
slot_idx
]
<<
"] not found"
;
exit
(
-
1
);
}
int
len
=
tensor
->
lod
()[
0
].
back
();
//assert(slot_dim * len == count
);
assert
(
slot_dim
*
len
==
g_tensor
->
numel
()
);
int64_t
*
ids
=
tensor
->
data
<
int64_t
>
();
for
(
auto
id_idx
=
0u
;
id_idx
<
len
;
++
id_idx
){
if
(
ids
[
id_idx
]
==
0
)
{
...
...
paddle/fluid/pybind/async_executor_py.cc
浏览文件 @
016a0687
...
...
@@ -51,6 +51,7 @@ void BindAsyncExecutor(py::module* m) {
.
def
(
"init_server"
,
&
framework
::
AsyncExecutor
::
InitServer
)
.
def
(
"init_worker"
,
&
framework
::
AsyncExecutor
::
InitWorker
)
.
def
(
"start_server"
,
&
framework
::
AsyncExecutor
::
StartServer
)
.
def
(
"stop_server"
,
&
framework
::
AsyncExecutor
::
StopServer
)
.
def
(
"gather_servers"
,
&
framework
::
AsyncExecutor
::
GatherServers
)
.
def
(
"init_model"
,
&
framework
::
AsyncExecutor
::
InitModel
)
.
def
(
"save_model"
,
&
framework
::
AsyncExecutor
::
SaveModel
);
...
...
python/paddle/fluid/async_executor.py
浏览文件 @
016a0687
...
...
@@ -151,7 +151,10 @@ class AsyncExecutor(object):
self
.
executor
.
run_from_files
(
program_desc
,
data_feed
.
desc
(),
filelist
,
thread_num
,
fetch_var_names
,
debug
)
self
.
instance
.
barrier_all
()
self
.
instance
.
barrier_all
()
#worker do all things
if
self
.
instance
.
is_first_worker
():
self
.
executor
.
stop_server
()
self
.
instance
.
barrier_all
()
#sync
def
config_distributed_nodes
(
self
,
dist_opt
):
...
...
@@ -164,6 +167,9 @@ class AsyncExecutor(object):
def
get_instance
(
self
):
return
self
.
instance
#def stop_server(self):
# self.executor.stop_server()
def
init_server
(
self
,
dist_desc
):
self
.
executor
.
init_server
(
dist_desc
,
self
.
instance
.
_rankid
)
ip
=
self
.
executor
.
start_server
()
...
...
@@ -174,6 +180,7 @@ class AsyncExecutor(object):
self
.
instance
.
barrier_all
()
#wait all worker start
self
.
instance
.
barrier_all
()
#wait init model
self
.
instance
.
barrier_all
()
#wait worker do all things
self
.
instance
.
barrier_all
()
#sync
def
init_worker
(
self
,
dist_desc
):
self
.
instance
.
barrier_all
()
#wait all server start
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录