提交 a580ad0c 编写于 作者: L liweibin

reduce memory of graph in multiprocess sampling

上级 cfd11f59
......@@ -43,8 +43,8 @@ class EdgeIndex(object):
"""
def __init__(self, u, v, num_nodes):
self._v, self._eid, self._degree, self._sorted_u,\
self._sorted_v, self._sorted_eid = graph_kernel.build_index(u, v, num_nodes)
self._degree, self._sorted_v, self._sorted_u, \
self._sorted_eid, self._indptr = graph_kernel.build_index(u, v, num_nodes)
@property
def degree(self):
......@@ -52,17 +52,25 @@ class EdgeIndex(object):
"""
return self._degree
@property
def v(self):
"""Return the compressed v.
def view_v(self, u=None):
"""Return the compressed v for given u.
"""
return self._v
if u is None:
return np.split(self._sorted_v, self._indptr[1:])
else:
u = np.array(u, dtype="int64")
return graph_kernel.slice_by_index(
self._sorted_v, self._indptr, index=u)
@property
def eid(self):
"""Return the edge id.
def view_eid(self, u=None):
"""Return the compressed edge id for given u.
"""
return self._eid
if u is None:
return np.split(self._sorted_eid, self._indptr[1:])
else:
u = np.array(u, dtype="int64")
return graph_kernel.slice_by_index(
self._sorted_eid, self._indptr, index=u)
def triples(self):
"""Return the sorted (u, v, eid) tuples.
......@@ -287,17 +295,11 @@ class Graph(object):
[]]
"""
if nodes is None:
if return_eids:
return self.adj_src_index.v, self.adj_src_index.eid
else:
return self.adj_src_index.v
if return_eids:
return self.adj_src_index.view_v(
nodes), self.adj_src_index.view_eid(nodes)
else:
if return_eids:
return self.adj_src_index.v[nodes], self.adj_src_index.eid[
nodes]
else:
return self.adj_src_index.v[nodes]
return self.adj_src_index.view_v(nodes)
def sample_successor(self,
nodes,
......@@ -385,17 +387,11 @@ class Graph(object):
[2]]
"""
if nodes is None:
if return_eids:
return self.adj_dst_index.v, self.adj_dst_index.eid
else:
return self.adj_dst_index.v
if return_eids:
return self.adj_dst_index.view_v(
nodes), self.adj_dst_index.view_eid(nodes)
else:
if return_eids:
return self.adj_dst_index.v[nodes], self.adj_dst_index.eid[
nodes]
else:
return self.adj_dst_index.v[nodes]
return self.adj_dst_index.view_v(nodes)
def sample_predecessor(self,
nodes,
......
......@@ -53,14 +53,21 @@ def build_index(np.ndarray[np.int64_t, ndim=1] u,
_tmp_eid[indptr[u[i]] + count[u[i]]] = i
_tmp_u[indptr[u[i]] + count[u[i]]] = u[i]
count[u[i]] += 1
return degree, _tmp_v, _tmp_u, _tmp_eid, indptr
cdef list output_eid = []
cdef list output_v = []
for i in xrange(n_size):
output_eid.append(_tmp_eid[indptr[i]:indptr[i+1]])
output_v.append(_tmp_v[indptr[i]:indptr[i+1]])
return np.array(output_v), np.array(output_eid), degree, _tmp_u, _tmp_v, _tmp_eid
@cython.boundscheck(False)
@cython.wraparound(False)
def slice_by_index(np.ndarray[np.int64_t, ndim=1] u,
np.ndarray[np.int64_t, ndim=1] indptr,
np.ndarray[np.int64_t, ndim=1] index):
cdef list output = []
cdef long long i
cdef long long h = len(index)
cdef long long j
for i in xrange(h):
j = index[i]
output.append(u[indptr[j]:indptr[j+1]])
return np.array(output)
@cython.boundscheck(False)
@cython.wraparound(False)
......@@ -253,22 +260,10 @@ def sample_subset_with_eid(list nids, list eids, long long maxdegree, shuffle=Fa
@cython.boundscheck(False)
@cython.wraparound(False)
def skip_gram_gen_pair(vector[long long] walk_path, long win_size=5):
"""Return node paris generated by skip-gram algorithm.
This function will auto remove the pair which src node is the same
as dst node.
Args:
walk_path: List of nodes as a walk path.
win_size: the windows size used in skip-gram.
Return:
A tuple of (src node list, dst node list).
"""
def skip_gram_gen_pair(vector[long long] walk, long win_size=5):
cdef vector[long long] src
cdef vector[long long] dst
cdef long long l = len(walk_path)
cdef long long l = len(walk)
cdef long long real_win_size, left, right, i
cdef np.ndarray[np.int64_t, ndim=1] rnd = np.random.randint(1, win_size+1,
dtype=np.int64, size=l)
......@@ -282,23 +277,15 @@ def skip_gram_gen_pair(vector[long long] walk_path, long win_size=5):
if right >= l:
right = l - 1
for j in xrange(left, right+1):
if walk_path[i] == walk_path[j]:
if walk[i] == walk[j]:
continue
src.push_back(walk_path[i])
dst.push_back(walk_path[j])
src.push_back(walk[i])
dst.push_back(walk[j])
return src, dst
@cython.boundscheck(False)
@cython.wraparound(False)
def alias_sample_build_table(np.ndarray[np.float64_t, ndim=1] probs):
"""Return the alias table and event table for alias sampling.
Args:
porobs: A list of float numbers as the probability.
Return:
A tuple of (alias table, event table).
"""
cdef long long l = len(probs)
cdef np.ndarray[np.float64_t, ndim=1] alias = probs * l
cdef np.ndarray[np.int64_t, ndim=1] events = np.zeros(l, dtype=np.int64)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册