提交 a580ad0c 编写于 作者: L liweibin

reduce memory of graph in multiprocess sampling

上级 cfd11f59
...@@ -43,8 +43,8 @@ class EdgeIndex(object): ...@@ -43,8 +43,8 @@ class EdgeIndex(object):
""" """
def __init__(self, u, v, num_nodes): def __init__(self, u, v, num_nodes):
self._v, self._eid, self._degree, self._sorted_u,\ self._degree, self._sorted_v, self._sorted_u, \
self._sorted_v, self._sorted_eid = graph_kernel.build_index(u, v, num_nodes) self._sorted_eid, self._indptr = graph_kernel.build_index(u, v, num_nodes)
@property @property
def degree(self): def degree(self):
...@@ -52,17 +52,25 @@ class EdgeIndex(object): ...@@ -52,17 +52,25 @@ class EdgeIndex(object):
""" """
return self._degree return self._degree
@property def view_v(self, u=None):
def v(self): """Return the compressed v for given u.
"""Return the compressed v.
""" """
return self._v if u is None:
return np.split(self._sorted_v, self._indptr[1:])
else:
u = np.array(u, dtype="int64")
return graph_kernel.slice_by_index(
self._sorted_v, self._indptr, index=u)
@property def view_eid(self, u=None):
def eid(self): """Return the compressed edge id for given u.
"""Return the edge id.
""" """
return self._eid if u is None:
return np.split(self._sorted_eid, self._indptr[1:])
else:
u = np.array(u, dtype="int64")
return graph_kernel.slice_by_index(
self._sorted_eid, self._indptr, index=u)
def triples(self): def triples(self):
"""Return the sorted (u, v, eid) tuples. """Return the sorted (u, v, eid) tuples.
...@@ -287,17 +295,11 @@ class Graph(object): ...@@ -287,17 +295,11 @@ class Graph(object):
[]] []]
""" """
if nodes is None: if return_eids:
if return_eids: return self.adj_src_index.view_v(
return self.adj_src_index.v, self.adj_src_index.eid nodes), self.adj_src_index.view_eid(nodes)
else:
return self.adj_src_index.v
else: else:
if return_eids: return self.adj_src_index.view_v(nodes)
return self.adj_src_index.v[nodes], self.adj_src_index.eid[
nodes]
else:
return self.adj_src_index.v[nodes]
def sample_successor(self, def sample_successor(self,
nodes, nodes,
...@@ -385,17 +387,11 @@ class Graph(object): ...@@ -385,17 +387,11 @@ class Graph(object):
[2]] [2]]
""" """
if nodes is None: if return_eids:
if return_eids: return self.adj_dst_index.view_v(
return self.adj_dst_index.v, self.adj_dst_index.eid nodes), self.adj_dst_index.view_eid(nodes)
else:
return self.adj_dst_index.v
else: else:
if return_eids: return self.adj_dst_index.view_v(nodes)
return self.adj_dst_index.v[nodes], self.adj_dst_index.eid[
nodes]
else:
return self.adj_dst_index.v[nodes]
def sample_predecessor(self, def sample_predecessor(self,
nodes, nodes,
......
...@@ -53,14 +53,21 @@ def build_index(np.ndarray[np.int64_t, ndim=1] u, ...@@ -53,14 +53,21 @@ def build_index(np.ndarray[np.int64_t, ndim=1] u,
_tmp_eid[indptr[u[i]] + count[u[i]]] = i _tmp_eid[indptr[u[i]] + count[u[i]]] = i
_tmp_u[indptr[u[i]] + count[u[i]]] = u[i] _tmp_u[indptr[u[i]] + count[u[i]]] = u[i]
count[u[i]] += 1 count[u[i]] += 1
return degree, _tmp_v, _tmp_u, _tmp_eid, indptr
cdef list output_eid = [] @cython.boundscheck(False)
cdef list output_v = [] @cython.wraparound(False)
for i in xrange(n_size): def slice_by_index(np.ndarray[np.int64_t, ndim=1] u,
output_eid.append(_tmp_eid[indptr[i]:indptr[i+1]]) np.ndarray[np.int64_t, ndim=1] indptr,
output_v.append(_tmp_v[indptr[i]:indptr[i+1]]) np.ndarray[np.int64_t, ndim=1] index):
return np.array(output_v), np.array(output_eid), degree, _tmp_u, _tmp_v, _tmp_eid cdef list output = []
cdef long long i
cdef long long h = len(index)
cdef long long j
for i in xrange(h):
j = index[i]
output.append(u[indptr[j]:indptr[j+1]])
return np.array(output)
@cython.boundscheck(False) @cython.boundscheck(False)
@cython.wraparound(False) @cython.wraparound(False)
...@@ -253,22 +260,10 @@ def sample_subset_with_eid(list nids, list eids, long long maxdegree, shuffle=Fa ...@@ -253,22 +260,10 @@ def sample_subset_with_eid(list nids, list eids, long long maxdegree, shuffle=Fa
@cython.boundscheck(False) @cython.boundscheck(False)
@cython.wraparound(False) @cython.wraparound(False)
def skip_gram_gen_pair(vector[long long] walk_path, long win_size=5): def skip_gram_gen_pair(vector[long long] walk, long win_size=5):
"""Return node paris generated by skip-gram algorithm.
This function will auto remove the pair which src node is the same
as dst node.
Args:
walk_path: List of nodes as a walk path.
win_size: the windows size used in skip-gram.
Return:
A tuple of (src node list, dst node list).
"""
cdef vector[long long] src cdef vector[long long] src
cdef vector[long long] dst cdef vector[long long] dst
cdef long long l = len(walk_path) cdef long long l = len(walk)
cdef long long real_win_size, left, right, i cdef long long real_win_size, left, right, i
cdef np.ndarray[np.int64_t, ndim=1] rnd = np.random.randint(1, win_size+1, cdef np.ndarray[np.int64_t, ndim=1] rnd = np.random.randint(1, win_size+1,
dtype=np.int64, size=l) dtype=np.int64, size=l)
...@@ -282,23 +277,15 @@ def skip_gram_gen_pair(vector[long long] walk_path, long win_size=5): ...@@ -282,23 +277,15 @@ def skip_gram_gen_pair(vector[long long] walk_path, long win_size=5):
if right >= l: if right >= l:
right = l - 1 right = l - 1
for j in xrange(left, right+1): for j in xrange(left, right+1):
if walk_path[i] == walk_path[j]: if walk[i] == walk[j]:
continue continue
src.push_back(walk_path[i]) src.push_back(walk[i])
dst.push_back(walk_path[j]) dst.push_back(walk[j])
return src, dst return src, dst
@cython.boundscheck(False) @cython.boundscheck(False)
@cython.wraparound(False) @cython.wraparound(False)
def alias_sample_build_table(np.ndarray[np.float64_t, ndim=1] probs): def alias_sample_build_table(np.ndarray[np.float64_t, ndim=1] probs):
"""Return the alias table and event table for alias sampling.
Args:
porobs: A list of float numbers as the probability.
Return:
A tuple of (alias table, event table).
"""
cdef long long l = len(probs) cdef long long l = len(probs)
cdef np.ndarray[np.float64_t, ndim=1] alias = probs * l cdef np.ndarray[np.float64_t, ndim=1] alias = probs * l
cdef np.ndarray[np.int64_t, ndim=1] events = np.zeros(l, dtype=np.int64) cdef np.ndarray[np.int64_t, ndim=1] events = np.zeros(l, dtype=np.int64)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册