Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
c3efabeb
P
Paddle
项目概览
PaddlePaddle
/
Paddle
1 年多 前同步成功
通知
2302
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
c3efabeb
编写于
8月 23, 2021
作者:
S
seemingwang
提交者:
GitHub
8月 23, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
set node feature (#34994)
上级
77a8a394
变更
11
隐藏空白更改
内联
并排
Showing
11 changed file
with
224 addition
and
0 deletion
+224
-0
paddle/fluid/distributed/service/graph_brpc_client.cc
paddle/fluid/distributed/service/graph_brpc_client.cc
+96
-0
paddle/fluid/distributed/service/graph_brpc_client.h
paddle/fluid/distributed/service/graph_brpc_client.h
+5
-0
paddle/fluid/distributed/service/graph_brpc_server.cc
paddle/fluid/distributed/service/graph_brpc_server.cc
+42
-0
paddle/fluid/distributed/service/graph_brpc_server.h
paddle/fluid/distributed/service/graph_brpc_server.h
+4
-0
paddle/fluid/distributed/service/graph_py_service.cc
paddle/fluid/distributed/service/graph_py_service.cc
+13
-0
paddle/fluid/distributed/service/graph_py_service.h
paddle/fluid/distributed/service/graph_py_service.h
+3
-0
paddle/fluid/distributed/service/sendrecv.proto
paddle/fluid/distributed/service/sendrecv.proto
+1
-0
paddle/fluid/distributed/table/common_graph_table.cc
paddle/fluid/distributed/table/common_graph_table.cc
+28
-0
paddle/fluid/distributed/table/common_graph_table.h
paddle/fluid/distributed/table/common_graph_table.h
+6
-0
paddle/fluid/distributed/test/graph_node_test.cc
paddle/fluid/distributed/test/graph_node_test.cc
+11
-0
paddle/fluid/pybind/fleet_py.cc
paddle/fluid/pybind/fleet_py.cc
+15
-0
未找到文件。
paddle/fluid/distributed/service/graph_brpc_client.cc
浏览文件 @
c3efabeb
...
...
@@ -479,6 +479,102 @@ std::future<int32_t> GraphBrpcClient::pull_graph_list(
closure
);
return
fut
;
}
std
::
future
<
int32_t
>
GraphBrpcClient
::
set_node_feat
(
const
uint32_t
&
table_id
,
const
std
::
vector
<
uint64_t
>
&
node_ids
,
const
std
::
vector
<
std
::
string
>
&
feature_names
,
const
std
::
vector
<
std
::
vector
<
std
::
string
>>
&
features
)
{
std
::
vector
<
int
>
request2server
;
std
::
vector
<
int
>
server2request
(
server_size
,
-
1
);
for
(
int
query_idx
=
0
;
query_idx
<
node_ids
.
size
();
++
query_idx
)
{
int
server_index
=
get_server_index_by_id
(
node_ids
[
query_idx
]);
if
(
server2request
[
server_index
]
==
-
1
)
{
server2request
[
server_index
]
=
request2server
.
size
();
request2server
.
push_back
(
server_index
);
}
}
size_t
request_call_num
=
request2server
.
size
();
std
::
vector
<
std
::
vector
<
uint64_t
>>
node_id_buckets
(
request_call_num
);
std
::
vector
<
std
::
vector
<
int
>>
query_idx_buckets
(
request_call_num
);
std
::
vector
<
std
::
vector
<
std
::
vector
<
std
::
string
>>>
features_idx_buckets
(
request_call_num
);
for
(
int
query_idx
=
0
;
query_idx
<
node_ids
.
size
();
++
query_idx
)
{
int
server_index
=
get_server_index_by_id
(
node_ids
[
query_idx
]);
int
request_idx
=
server2request
[
server_index
];
node_id_buckets
[
request_idx
].
push_back
(
node_ids
[
query_idx
]);
query_idx_buckets
[
request_idx
].
push_back
(
query_idx
);
if
(
features_idx_buckets
[
request_idx
].
size
()
==
0
)
{
features_idx_buckets
[
request_idx
].
resize
(
feature_names
.
size
());
}
for
(
int
feat_idx
=
0
;
feat_idx
<
feature_names
.
size
();
++
feat_idx
)
{
features_idx_buckets
[
request_idx
][
feat_idx
].
push_back
(
features
[
feat_idx
][
query_idx
]);
}
}
DownpourBrpcClosure
*
closure
=
new
DownpourBrpcClosure
(
request_call_num
,
[
&
,
node_id_buckets
,
query_idx_buckets
,
request_call_num
](
void
*
done
)
{
int
ret
=
0
;
auto
*
closure
=
(
DownpourBrpcClosure
*
)
done
;
size_t
fail_num
=
0
;
for
(
int
request_idx
=
0
;
request_idx
<
request_call_num
;
++
request_idx
)
{
if
(
closure
->
check_response
(
request_idx
,
PS_GRAPH_SET_NODE_FEAT
)
!=
0
)
{
++
fail_num
;
}
if
(
fail_num
==
request_call_num
)
{
ret
=
-
1
;
}
}
closure
->
set_promise_value
(
ret
);
});
auto
promise
=
std
::
make_shared
<
std
::
promise
<
int32_t
>>
();
closure
->
add_promise
(
promise
);
std
::
future
<
int
>
fut
=
promise
->
get_future
();
for
(
int
request_idx
=
0
;
request_idx
<
request_call_num
;
++
request_idx
)
{
int
server_index
=
request2server
[
request_idx
];
closure
->
request
(
request_idx
)
->
set_cmd_id
(
PS_GRAPH_SET_NODE_FEAT
);
closure
->
request
(
request_idx
)
->
set_table_id
(
table_id
);
closure
->
request
(
request_idx
)
->
set_client_id
(
_client_id
);
size_t
node_num
=
node_id_buckets
[
request_idx
].
size
();
closure
->
request
(
request_idx
)
->
add_params
((
char
*
)
node_id_buckets
[
request_idx
].
data
(),
sizeof
(
uint64_t
)
*
node_num
);
std
::
string
joint_feature_name
=
paddle
::
string
::
join_strings
(
feature_names
,
'\t'
);
closure
->
request
(
request_idx
)
->
add_params
(
joint_feature_name
.
c_str
(),
joint_feature_name
.
size
());
// set features
std
::
string
set_feature
=
""
;
for
(
size_t
feat_idx
=
0
;
feat_idx
<
feature_names
.
size
();
++
feat_idx
)
{
for
(
size_t
node_idx
=
0
;
node_idx
<
node_num
;
++
node_idx
)
{
size_t
feat_len
=
features_idx_buckets
[
request_idx
][
feat_idx
][
node_idx
].
size
();
set_feature
.
append
((
char
*
)
&
feat_len
,
sizeof
(
size_t
));
set_feature
.
append
(
features_idx_buckets
[
request_idx
][
feat_idx
][
node_idx
].
data
(),
feat_len
);
}
}
closure
->
request
(
request_idx
)
->
add_params
(
set_feature
.
c_str
(),
set_feature
.
size
());
GraphPsService_Stub
rpc_stub
=
getServiceStub
(
get_cmd_channel
(
server_index
));
closure
->
cntl
(
request_idx
)
->
set_log_id
(
butil
::
gettimeofday_ms
());
rpc_stub
.
service
(
closure
->
cntl
(
request_idx
),
closure
->
request
(
request_idx
),
closure
->
response
(
request_idx
),
closure
);
}
return
fut
;
}
int32_t
GraphBrpcClient
::
initialize
()
{
// set_shard_num(_config.shard_num());
BrpcPsClient
::
initialize
();
...
...
paddle/fluid/distributed/service/graph_brpc_client.h
浏览文件 @
c3efabeb
...
...
@@ -79,6 +79,11 @@ class GraphBrpcClient : public BrpcPsClient {
const
std
::
vector
<
std
::
string
>&
feature_names
,
std
::
vector
<
std
::
vector
<
std
::
string
>>&
res
);
virtual
std
::
future
<
int32_t
>
set_node_feat
(
const
uint32_t
&
table_id
,
const
std
::
vector
<
uint64_t
>&
node_ids
,
const
std
::
vector
<
std
::
string
>&
feature_names
,
const
std
::
vector
<
std
::
vector
<
std
::
string
>>&
features
);
virtual
std
::
future
<
int32_t
>
clear_nodes
(
uint32_t
table_id
);
virtual
std
::
future
<
int32_t
>
add_graph_node
(
uint32_t
table_id
,
std
::
vector
<
uint64_t
>&
node_id_list
,
...
...
paddle/fluid/distributed/service/graph_brpc_server.cc
浏览文件 @
c3efabeb
...
...
@@ -16,6 +16,7 @@
#include "paddle/fluid/distributed/service/brpc_ps_server.h"
#include <thread> // NOLINT
#include <utility>
#include "butil/endpoint.h"
#include "iomanip"
#include "paddle/fluid/distributed/service/brpc_ps_client.h"
...
...
@@ -157,6 +158,8 @@ int32_t GraphBrpcService::initialize() {
&
GraphBrpcService
::
add_graph_node
;
_service_handler_map
[
PS_GRAPH_REMOVE_GRAPH_NODE
]
=
&
GraphBrpcService
::
remove_graph_node
;
_service_handler_map
[
PS_GRAPH_SET_NODE_FEAT
]
=
&
GraphBrpcService
::
graph_set_node_feat
;
// shard初始化,server启动后才可从env获取到server_list的shard信息
initialize_shard_info
();
...
...
@@ -400,5 +403,44 @@ int32_t GraphBrpcService::graph_get_node_feat(Table *table,
return
0
;
}
int32_t
GraphBrpcService
::
graph_set_node_feat
(
Table
*
table
,
const
PsRequestMessage
&
request
,
PsResponseMessage
&
response
,
brpc
::
Controller
*
cntl
)
{
CHECK_TABLE_EXIST
(
table
,
request
,
response
)
if
(
request
.
params_size
()
<
3
)
{
set_response_code
(
response
,
-
1
,
"graph_set_node_feat request requires at least 2 arguments"
);
return
0
;
}
size_t
node_num
=
request
.
params
(
0
).
size
()
/
sizeof
(
uint64_t
);
uint64_t
*
node_data
=
(
uint64_t
*
)(
request
.
params
(
0
).
c_str
());
std
::
vector
<
uint64_t
>
node_ids
(
node_data
,
node_data
+
node_num
);
std
::
vector
<
std
::
string
>
feature_names
=
paddle
::
string
::
split_string
<
std
::
string
>
(
request
.
params
(
1
),
"
\t
"
);
std
::
vector
<
std
::
vector
<
std
::
string
>>
features
(
feature_names
.
size
(),
std
::
vector
<
std
::
string
>
(
node_num
));
const
char
*
buffer
=
request
.
params
(
2
).
c_str
();
for
(
size_t
feat_idx
=
0
;
feat_idx
<
feature_names
.
size
();
++
feat_idx
)
{
for
(
size_t
node_idx
=
0
;
node_idx
<
node_num
;
++
node_idx
)
{
size_t
feat_len
=
*
(
size_t
*
)(
buffer
);
buffer
+=
sizeof
(
size_t
);
auto
feat
=
std
::
string
(
buffer
,
feat_len
);
features
[
feat_idx
][
node_idx
]
=
feat
;
buffer
+=
feat_len
;
}
}
((
GraphTable
*
)
table
)
->
set_node_feat
(
node_ids
,
feature_names
,
features
);
return
0
;
}
}
// namespace distributed
}
// namespace paddle
paddle/fluid/distributed/service/graph_brpc_server.h
浏览文件 @
c3efabeb
...
...
@@ -83,9 +83,13 @@ class GraphBrpcService : public PsBaseService {
const
PsRequestMessage
&
request
,
PsResponseMessage
&
response
,
brpc
::
Controller
*
cntl
);
int32_t
graph_get_node_feat
(
Table
*
table
,
const
PsRequestMessage
&
request
,
PsResponseMessage
&
response
,
brpc
::
Controller
*
cntl
);
int32_t
graph_set_node_feat
(
Table
*
table
,
const
PsRequestMessage
&
request
,
PsResponseMessage
&
response
,
brpc
::
Controller
*
cntl
);
int32_t
clear_nodes
(
Table
*
table
,
const
PsRequestMessage
&
request
,
PsResponseMessage
&
response
,
brpc
::
Controller
*
cntl
);
int32_t
add_graph_node
(
Table
*
table
,
const
PsRequestMessage
&
request
,
...
...
paddle/fluid/distributed/service/graph_py_service.cc
浏览文件 @
c3efabeb
...
...
@@ -330,6 +330,19 @@ std::vector<std::vector<std::string>> GraphPyClient::get_node_feat(
return
v
;
}
void
GraphPyClient
::
set_node_feat
(
std
::
string
node_type
,
std
::
vector
<
uint64_t
>
node_ids
,
std
::
vector
<
std
::
string
>
feature_names
,
const
std
::
vector
<
std
::
vector
<
std
::
string
>>
features
)
{
if
(
this
->
table_id_map
.
count
(
node_type
))
{
uint32_t
table_id
=
this
->
table_id_map
[
node_type
];
auto
status
=
worker_ptr
->
set_node_feat
(
table_id
,
node_ids
,
feature_names
,
features
);
status
.
wait
();
}
return
;
}
std
::
vector
<
FeatureNode
>
GraphPyClient
::
pull_graph_list
(
std
::
string
name
,
int
server_index
,
int
start
,
int
size
,
...
...
paddle/fluid/distributed/service/graph_py_service.h
浏览文件 @
c3efabeb
...
...
@@ -155,6 +155,9 @@ class GraphPyClient : public GraphPyService {
std
::
vector
<
std
::
vector
<
std
::
string
>>
get_node_feat
(
std
::
string
node_type
,
std
::
vector
<
uint64_t
>
node_ids
,
std
::
vector
<
std
::
string
>
feature_names
);
void
set_node_feat
(
std
::
string
node_type
,
std
::
vector
<
uint64_t
>
node_ids
,
std
::
vector
<
std
::
string
>
feature_names
,
const
std
::
vector
<
std
::
vector
<
std
::
string
>>
features
);
std
::
vector
<
FeatureNode
>
pull_graph_list
(
std
::
string
name
,
int
server_index
,
int
start
,
int
size
,
int
step
=
1
);
::
paddle
::
distributed
::
PSParameter
GetWorkerProto
();
...
...
paddle/fluid/distributed/service/sendrecv.proto
浏览文件 @
c3efabeb
...
...
@@ -55,6 +55,7 @@ enum PsCmdID {
PS_GRAPH_CLEAR
=
34
;
PS_GRAPH_ADD_GRAPH_NODE
=
35
;
PS_GRAPH_REMOVE_GRAPH_NODE
=
36
;
PS_GRAPH_SET_NODE_FEAT
=
37
;
}
message
PsRequestMessage
{
...
...
paddle/fluid/distributed/table/common_graph_table.cc
浏览文件 @
c3efabeb
...
...
@@ -469,6 +469,34 @@ int32_t GraphTable::get_node_feat(const std::vector<uint64_t> &node_ids,
return
0
;
}
int32_t
GraphTable
::
set_node_feat
(
const
std
::
vector
<
uint64_t
>
&
node_ids
,
const
std
::
vector
<
std
::
string
>
&
feature_names
,
const
std
::
vector
<
std
::
vector
<
std
::
string
>>
&
res
)
{
size_t
node_num
=
node_ids
.
size
();
std
::
vector
<
std
::
future
<
int
>>
tasks
;
for
(
size_t
idx
=
0
;
idx
<
node_num
;
++
idx
)
{
uint64_t
node_id
=
node_ids
[
idx
];
tasks
.
push_back
(
_shards_task_pool
[
get_thread_pool_index
(
node_id
)]
->
enqueue
(
[
&
,
idx
,
node_id
]()
->
int
{
size_t
index
=
node_id
%
this
->
shard_num
-
this
->
shard_start
;
auto
node
=
shards
[
index
].
add_feature_node
(
node_id
);
node
->
set_feature_size
(
this
->
feat_name
.
size
());
for
(
int
feat_idx
=
0
;
feat_idx
<
feature_names
.
size
();
++
feat_idx
)
{
const
std
::
string
&
feature_name
=
feature_names
[
feat_idx
];
if
(
feat_id_map
.
find
(
feature_name
)
!=
feat_id_map
.
end
())
{
node
->
set_feature
(
feat_id_map
[
feature_name
],
res
[
feat_idx
][
idx
]);
}
}
return
0
;
}));
}
for
(
size_t
idx
=
0
;
idx
<
node_num
;
++
idx
)
{
tasks
[
idx
].
get
();
}
return
0
;
}
std
::
pair
<
int32_t
,
std
::
string
>
GraphTable
::
parse_feature
(
std
::
string
feat_str
)
{
// Return (feat_id, btyes) if name are in this->feat_name, else return (-1,
...
...
paddle/fluid/distributed/table/common_graph_table.h
浏览文件 @
c3efabeb
...
...
@@ -46,6 +46,7 @@ class GraphShard {
}
return
res
;
}
GraphNode
*
add_graph_node
(
uint64_t
id
);
FeatureNode
*
add_feature_node
(
uint64_t
id
);
Node
*
find_node
(
uint64_t
id
);
...
...
@@ -122,6 +123,11 @@ class GraphTable : public SparseTable {
const
std
::
vector
<
std
::
string
>
&
feature_names
,
std
::
vector
<
std
::
vector
<
std
::
string
>>
&
res
);
virtual
int32_t
set_node_feat
(
const
std
::
vector
<
uint64_t
>
&
node_ids
,
const
std
::
vector
<
std
::
string
>
&
feature_names
,
const
std
::
vector
<
std
::
vector
<
std
::
string
>>
&
res
);
protected:
std
::
vector
<
GraphShard
>
shards
;
size_t
shard_start
,
shard_end
,
server_num
,
shard_num_per_table
,
shard_num
;
...
...
paddle/fluid/distributed/test/graph_node_test.cc
浏览文件 @
c3efabeb
...
...
@@ -558,6 +558,17 @@ void RunBrpcPushSparse() {
VLOG
(
0
)
<<
"get_node_feat: "
<<
node_feat
[
1
][
0
];
VLOG
(
0
)
<<
"get_node_feat: "
<<
node_feat
[
1
][
1
];
node_feat
[
1
][
0
]
=
"helloworld"
;
client1
.
set_node_feat
(
std
::
string
(
"user"
),
node_ids
,
feature_names
,
node_feat
);
// sleep(5);
node_feat
=
client1
.
get_node_feat
(
std
::
string
(
"user"
),
node_ids
,
feature_names
);
VLOG
(
0
)
<<
"get_node_feat: "
<<
node_feat
[
1
][
0
];
ASSERT_TRUE
(
node_feat
[
1
][
0
]
==
"helloworld"
);
// Test string
node_ids
.
clear
();
node_ids
.
push_back
(
37
);
...
...
paddle/fluid/pybind/fleet_py.cc
浏览文件 @
c3efabeb
...
...
@@ -205,6 +205,7 @@ void BindGraphPyClient(py::module* m) {
.
def
(
"pull_graph_list"
,
&
GraphPyClient
::
pull_graph_list
)
.
def
(
"start_client"
,
&
GraphPyClient
::
start_client
)
.
def
(
"batch_sample_neighboors"
,
&
GraphPyClient
::
batch_sample_neighboors
)
.
def
(
"remove_graph_node"
,
&
GraphPyClient
::
remove_graph_node
)
.
def
(
"random_sample_nodes"
,
&
GraphPyClient
::
random_sample_nodes
)
.
def
(
"stop_server"
,
&
GraphPyClient
::
stop_server
)
.
def
(
"get_node_feat"
,
...
...
@@ -221,6 +222,20 @@ void BindGraphPyClient(py::module* m) {
}
return
bytes_feats
;
})
.
def
(
"set_node_feat"
,
[](
GraphPyClient
&
self
,
std
::
string
node_type
,
std
::
vector
<
uint64_t
>
node_ids
,
std
::
vector
<
std
::
string
>
feature_names
,
std
::
vector
<
std
::
vector
<
py
::
bytes
>>
bytes_feats
)
{
std
::
vector
<
std
::
vector
<
std
::
string
>>
feats
(
bytes_feats
.
size
());
for
(
int
i
=
0
;
i
<
bytes_feats
.
size
();
++
i
)
{
for
(
int
j
=
0
;
j
<
bytes_feats
[
i
].
size
();
++
j
)
{
feats
[
i
].
push_back
(
std
::
string
(
bytes_feats
[
i
][
j
]));
}
}
self
.
set_node_feat
(
node_type
,
node_ids
,
feature_names
,
feats
);
return
;
})
.
def
(
"bind_local_server"
,
&
GraphPyClient
::
bind_local_server
);
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录