Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PGL
提交
a580ad0c
P
PGL
项目概览
PaddlePaddle
/
PGL
通知
76
Star
4
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
11
列表
看板
标记
里程碑
合并请求
1
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PGL
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
11
Issue
11
列表
看板
标记
里程碑
合并请求
1
合并请求
1
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
a580ad0c
编写于
1月 20, 2020
作者:
L
liweibin
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
reduce memory of graph in multiprocess sampling
上级
cfd11f59
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
45 addition
and
62 deletion
+45
-62
pgl/graph.py
pgl/graph.py
+26
-30
pgl/graph_kernel.pyx
pgl/graph_kernel.pyx
+19
-32
未找到文件。
pgl/graph.py
浏览文件 @
a580ad0c
...
...
@@ -43,8 +43,8 @@ class EdgeIndex(object):
"""
def
__init__
(
self
,
u
,
v
,
num_nodes
):
self
.
_
v
,
self
.
_eid
,
self
.
_degree
,
self
.
_sorted_u
,
\
self
.
_sorted_v
,
self
.
_sorted_eid
=
graph_kernel
.
build_index
(
u
,
v
,
num_nodes
)
self
.
_
degree
,
self
.
_sorted_v
,
self
.
_sorted_u
,
\
self
.
_sorted_eid
,
self
.
_indptr
=
graph_kernel
.
build_index
(
u
,
v
,
num_nodes
)
@
property
def
degree
(
self
):
...
...
@@ -52,17 +52,25 @@ class EdgeIndex(object):
"""
return
self
.
_degree
@
property
def
v
(
self
):
"""Return the compressed v.
def
view_v
(
self
,
u
=
None
):
"""Return the compressed v for given u.
"""
return
self
.
_v
if
u
is
None
:
return
np
.
split
(
self
.
_sorted_v
,
self
.
_indptr
[
1
:])
else
:
u
=
np
.
array
(
u
,
dtype
=
"int64"
)
return
graph_kernel
.
slice_by_index
(
self
.
_sorted_v
,
self
.
_indptr
,
index
=
u
)
@
property
def
eid
(
self
):
"""Return the edge id.
def
view_eid
(
self
,
u
=
None
):
"""Return the compressed edge id for given u.
"""
return
self
.
_eid
if
u
is
None
:
return
np
.
split
(
self
.
_sorted_eid
,
self
.
_indptr
[
1
:])
else
:
u
=
np
.
array
(
u
,
dtype
=
"int64"
)
return
graph_kernel
.
slice_by_index
(
self
.
_sorted_eid
,
self
.
_indptr
,
index
=
u
)
def
triples
(
self
):
"""Return the sorted (u, v, eid) tuples.
...
...
@@ -287,17 +295,11 @@ class Graph(object):
[]]
"""
if
nodes
is
None
:
if
return_eids
:
return
self
.
adj_src_index
.
v
,
self
.
adj_src_index
.
eid
else
:
return
self
.
adj_src_index
.
v
if
return_eids
:
return
self
.
adj_src_index
.
view_v
(
nodes
),
self
.
adj_src_index
.
view_eid
(
nodes
)
else
:
if
return_eids
:
return
self
.
adj_src_index
.
v
[
nodes
],
self
.
adj_src_index
.
eid
[
nodes
]
else
:
return
self
.
adj_src_index
.
v
[
nodes
]
return
self
.
adj_src_index
.
view_v
(
nodes
)
def
sample_successor
(
self
,
nodes
,
...
...
@@ -385,17 +387,11 @@ class Graph(object):
[2]]
"""
if
nodes
is
None
:
if
return_eids
:
return
self
.
adj_dst_index
.
v
,
self
.
adj_dst_index
.
eid
else
:
return
self
.
adj_dst_index
.
v
if
return_eids
:
return
self
.
adj_dst_index
.
view_v
(
nodes
),
self
.
adj_dst_index
.
view_eid
(
nodes
)
else
:
if
return_eids
:
return
self
.
adj_dst_index
.
v
[
nodes
],
self
.
adj_dst_index
.
eid
[
nodes
]
else
:
return
self
.
adj_dst_index
.
v
[
nodes
]
return
self
.
adj_dst_index
.
view_v
(
nodes
)
def
sample_predecessor
(
self
,
nodes
,
...
...
pgl/graph_kernel.pyx
浏览文件 @
a580ad0c
...
...
@@ -53,14 +53,21 @@ def build_index(np.ndarray[np.int64_t, ndim=1] u,
_tmp_eid
[
indptr
[
u
[
i
]]
+
count
[
u
[
i
]]]
=
i
_tmp_u
[
indptr
[
u
[
i
]]
+
count
[
u
[
i
]]]
=
u
[
i
]
count
[
u
[
i
]]
+=
1
return
degree
,
_tmp_v
,
_tmp_u
,
_tmp_eid
,
indptr
cdef
list
output_eid
=
[]
cdef
list
output_v
=
[]
for
i
in
xrange
(
n_size
):
output_eid
.
append
(
_tmp_eid
[
indptr
[
i
]:
indptr
[
i
+
1
]])
output_v
.
append
(
_tmp_v
[
indptr
[
i
]:
indptr
[
i
+
1
]])
return
np
.
array
(
output_v
),
np
.
array
(
output_eid
),
degree
,
_tmp_u
,
_tmp_v
,
_tmp_eid
@
cython
.
boundscheck
(
False
)
@
cython
.
wraparound
(
False
)
def
slice_by_index
(
np
.
ndarray
[
np
.
int64_t
,
ndim
=
1
]
u
,
np
.
ndarray
[
np
.
int64_t
,
ndim
=
1
]
indptr
,
np
.
ndarray
[
np
.
int64_t
,
ndim
=
1
]
index
):
cdef
list
output
=
[]
cdef
long
long
i
cdef
long
long
h
=
len
(
index
)
cdef
long
long
j
for
i
in
xrange
(
h
):
j
=
index
[
i
]
output
.
append
(
u
[
indptr
[
j
]:
indptr
[
j
+
1
]])
return
np
.
array
(
output
)
@
cython
.
boundscheck
(
False
)
@
cython
.
wraparound
(
False
)
...
...
@@ -253,22 +260,10 @@ def sample_subset_with_eid(list nids, list eids, long long maxdegree, shuffle=Fa
@
cython
.
boundscheck
(
False
)
@
cython
.
wraparound
(
False
)
def
skip_gram_gen_pair
(
vector
[
long
long
]
walk_path
,
long
win_size
=
5
):
"""Return node paris generated by skip-gram algorithm.
This function will auto remove the pair which src node is the same
as dst node.
Args:
walk_path: List of nodes as a walk path.
win_size: the windows size used in skip-gram.
Return:
A tuple of (src node list, dst node list).
"""
def
skip_gram_gen_pair
(
vector
[
long
long
]
walk
,
long
win_size
=
5
):
cdef
vector
[
long
long
]
src
cdef
vector
[
long
long
]
dst
cdef
long
long
l
=
len
(
walk
_path
)
cdef
long
long
l
=
len
(
walk
)
cdef
long
long
real_win_size
,
left
,
right
,
i
cdef
np
.
ndarray
[
np
.
int64_t
,
ndim
=
1
]
rnd
=
np
.
random
.
randint
(
1
,
win_size
+
1
,
dtype
=
np
.
int64
,
size
=
l
)
...
...
@@ -282,23 +277,15 @@ def skip_gram_gen_pair(vector[long long] walk_path, long win_size=5):
if
right
>=
l
:
right
=
l
-
1
for
j
in
xrange
(
left
,
right
+
1
):
if
walk
_path
[
i
]
==
walk_path
[
j
]:
if
walk
[
i
]
==
walk
[
j
]:
continue
src
.
push_back
(
walk
_path
[
i
])
dst
.
push_back
(
walk
_path
[
j
])
src
.
push_back
(
walk
[
i
])
dst
.
push_back
(
walk
[
j
])
return
src
,
dst
@
cython
.
boundscheck
(
False
)
@
cython
.
wraparound
(
False
)
def
alias_sample_build_table
(
np
.
ndarray
[
np
.
float64_t
,
ndim
=
1
]
probs
):
"""Return the alias table and event table for alias sampling.
Args:
porobs: A list of float numbers as the probability.
Return:
A tuple of (alias table, event table).
"""
cdef
long
long
l
=
len
(
probs
)
cdef
np
.
ndarray
[
np
.
float64_t
,
ndim
=
1
]
alias
=
probs
*
l
cdef
np
.
ndarray
[
np
.
int64_t
,
ndim
=
1
]
events
=
np
.
zeros
(
l
,
dtype
=
np
.
int64
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录