Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PGL
提交
b46b2b1a
P
PGL
项目概览
PaddlePaddle
/
PGL
通知
76
Star
4
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
11
列表
看板
标记
里程碑
合并请求
1
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PGL
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
11
Issue
11
列表
看板
标记
里程碑
合并请求
1
合并请求
1
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
b46b2b1a
编写于
2月 14, 2020
作者:
L
liweibin
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add pinsage sampling
上级
0bd10e14
变更
1
隐藏空白更改
内联
并排
Showing
1 changed file
with
163 addition
and
2 deletion
+163
-2
pgl/sample.py
pgl/sample.py
+163
-2
未找到文件。
pgl/sample.py
浏览文件 @
b46b2b1a
...
...
@@ -24,10 +24,29 @@ from pgl import graph_kernel
__all__
=
[
'graphsage_sample'
,
'node2vec_sample'
,
'deepwalk_sample'
,
'metapath_randomwalk'
'metapath_randomwalk'
,
'pinsage_sample'
]
def
traverse
(
item
):
"""traverse the list or numpy"""
if
isinstance
(
item
,
list
)
or
isinstance
(
item
,
np
.
ndarray
):
for
i
in
iter
(
item
):
for
j
in
traverse
(
i
):
yield
j
else
:
yield
item
def
flat_node_and_edge
(
nodes
,
eids
,
weights
=
None
):
"""flatten the sub-lists to one list"""
nodes
=
list
(
set
(
traverse
(
nodes
)))
eids
=
list
(
traverse
(
eids
))
if
weights
is
not
None
:
weights
=
list
(
traverse
(
weights
))
return
nodes
,
eids
,
weights
def
edge_hash
(
src
,
dst
):
"""edge_hash
"""
...
...
@@ -88,7 +107,6 @@ def graphsage_sample(graph, nodes, samples, ignore_edges=[]):
start_nodes
=
list
(
nodes_set
-
last_nodes_set
)
layer_nodes
=
[
nodes
]
+
layer_nodes
layer_eids
=
[
eids
]
+
layer_eids
log
.
debug
(
"flat time: %s"
%
(
time
.
time
()
-
start
))
start
=
time
.
time
()
# Find new nodes
...
...
@@ -317,3 +335,146 @@ def metapath_randomwalk(graph,
cur_nodes
=
np
.
array
(
nxt_cur_nodes
)
return
walk
def
random_walk_with_start_prob
(
graph
,
nodes
,
max_depth
,
proba
=
0.5
):
"""Implement of random walk with the probability of returning the origin node.
This function get random walks path for given nodes and depth.
Args:
nodes: Walk starting from nodes
max_depth: Max walking depth
proba: the proba to return the origin node
Return:
A list of walks.
"""
walk
=
[]
# init
for
node
in
nodes
:
walk
.
append
([
node
])
walk_ids
=
np
.
arange
(
0
,
len
(
nodes
))
cur_nodes
=
np
.
array
(
nodes
)
nodes
=
np
.
array
(
nodes
)
for
l
in
range
(
max_depth
):
# select the walks not end
if
l
>=
1
:
return_proba
=
np
.
random
.
rand
(
cur_nodes
.
shape
[
0
])
proba_mask
=
(
return_proba
<
proba
)
cur_nodes
[
proba_mask
]
=
nodes
[
proba_mask
]
outdegree
=
graph
.
outdegree
(
cur_nodes
)
mask
=
(
outdegree
!=
0
)
if
np
.
any
(
mask
):
cur_walk_ids
=
walk_ids
[
mask
]
outdegree
=
outdegree
[
mask
]
else
:
# stop when all nodes have no successor, wait start next loop to get precesssor
continue
succ
=
graph
.
successor
(
cur_nodes
[
mask
])
sample_index
=
np
.
floor
(
np
.
random
.
rand
(
outdegree
.
shape
[
0
])
*
outdegree
).
astype
(
"int64"
)
nxt_cur_nodes
=
cur_nodes
for
s
,
ind
,
walk_id
in
zip
(
succ
,
sample_index
,
cur_walk_ids
):
walk
[
walk_id
].
append
(
s
[
ind
])
nxt_cur_nodes
[
walk_id
]
=
s
[
ind
]
cur_nodes
=
np
.
array
(
nxt_cur_nodes
)
return
walk
def
pinsage_sample
(
graph
,
nodes
,
samples
,
top_k
=
10
,
proba
=
0.5
,
norm_bais
=
1.0
,
ignore_edges
=
set
()):
"""Implement of graphsage sample.
Reference paper: .
Args:
graph: A pgl graph instance
nodes: Sample starting from nodes
samples: A list, number of neighbors in each layer
top_k: select the top_k visit count nodes to construct the edges
proba: the probability to return the origin node
norm_bais: the normlization for the visit count
ignore_edges: list of edge(src, dst) will be ignored.
Return:
A list of subgraphs
"""
start
=
time
.
time
()
num_layers
=
len
(
samples
)
start_nodes
=
nodes
edges
,
weights
=
[],
[]
layer_nodes
,
layer_edges
,
layer_weights
=
[],
[],
[]
ignore_edge_set
=
set
([
edge_hash
(
src
,
dst
)
for
src
,
dst
in
ignore_edges
])
for
layer_idx
in
reversed
(
range
(
num_layers
)):
if
len
(
start_nodes
)
==
0
:
layer_nodes
=
[
nodes
]
+
layer_nodes
layer_edges
=
[
edges
]
+
layer_edges
layer_edges_weight
=
[
weights
]
+
layer_weights
continue
walks
=
random_walk_with_start_prob
(
graph
,
start_nodes
,
samples
[
layer_idx
],
proba
=
proba
)
walks
=
[
walk
[
1
:]
for
walk
in
walks
]
pred_edges
=
[]
pred_weights
=
[]
pred_nodes
=
[]
for
node
,
walk
in
zip
(
start_nodes
,
walks
):
walk_nodes
=
[]
walk_weights
=
[]
count_sum
=
0
for
random_walk_node
in
walk
:
if
len
(
ignore_edge_set
)
>
0
and
random_walk_node
!=
node
and
\
edge_hash
(
random_walk_node
,
node
)
in
ignore_edge_set
:
continue
walk_nodes
.
append
(
random_walk_node
)
unique
,
counts
=
np
.
unique
(
walk_nodes
,
return_counts
=
True
)
frequencies
=
np
.
asarray
((
unique
,
counts
)).
T
frequencies
=
frequencies
[
np
.
argsort
(
frequencies
[:,
1
])]
frequencies
=
frequencies
[
-
1
*
top_k
:,
:]
for
random_walk_node
,
random_count
in
zip
(
frequencies
[:,
0
].
tolist
(),
frequencies
[:,
1
].
tolist
()):
pred_nodes
.
append
(
random_walk_node
)
pred_edges
.
append
((
random_walk_node
,
node
))
walk_weights
.
append
(
random_count
)
count_sum
+=
random_count
count_sum
+=
len
(
walk_weights
)
*
norm_bais
walk_weights
=
(
np
.
array
(
walk_weights
)
+
norm_bais
)
/
(
count_sum
)
pred_weights
.
extend
(
walk_weights
.
tolist
())
last_node_set
=
set
(
nodes
)
nodes
,
edges
,
weights
=
flat_node_and_edge
([
nodes
,
pred_nodes
],
\
[
edges
,
pred_edges
],
[
weights
,
pred_weights
])
layer_edges
=
[
edges
]
+
layer_edges
layer_weights
=
[
weights
]
+
layer_weights
layer_nodes
=
[
nodes
]
+
layer_nodes
start_nodes
=
list
(
set
(
nodes
)
-
last_node_set
)
start
=
time
.
time
()
feed_dict
=
{}
subgraphs
=
[]
for
i
in
range
(
num_layers
):
edge_feat_dict
=
{
"weight"
:
np
.
array
(
layer_weights
[
i
],
dtype
=
'float32'
)
}
subgraphs
.
append
(
graph
.
subgraph
(
nodes
=
layer_nodes
[
0
],
edges
=
layer_edges
[
i
],
edge_feats
=
edge_feat_dict
))
subgraphs
[
i
].
node_feat
[
"index"
]
=
np
.
array
(
layer_nodes
[
0
],
dtype
=
"int64"
)
return
subgraphs
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录