diff --git a/pgl/graph.py b/pgl/graph.py index 8af2038f6378bef688f3530757bef3bd2a11b0c2..cc96a243a93859b94b5026a087cdec9c86f4682c 100644 --- a/pgl/graph.py +++ b/pgl/graph.py @@ -43,8 +43,8 @@ class EdgeIndex(object): """ def __init__(self, u, v, num_nodes): - self._v, self._eid, self._degree, self._sorted_u,\ - self._sorted_v, self._sorted_eid = graph_kernel.build_index(u, v, num_nodes) + self._degree, self._sorted_v, self._sorted_u, \ + self._sorted_eid, self._indptr = graph_kernel.build_index(u, v, num_nodes) @property def degree(self): @@ -52,17 +52,25 @@ class EdgeIndex(object): """ return self._degree - @property - def v(self): - """Return the compressed v. + def view_v(self, u=None): + """Return the compressed v for given u. """ - return self._v + if u is None: + return np.split(self._sorted_v, self._indptr[1:]) + else: + u = np.array(u, dtype="int64") + return graph_kernel.slice_by_index( + self._sorted_v, self._indptr, index=u) - @property - def eid(self): - """Return the edge id. + def view_eid(self, u=None): + """Return the compressed edge id for given u. """ - return self._eid + if u is None: + return np.split(self._sorted_eid, self._indptr[1:]) + else: + u = np.array(u, dtype="int64") + return graph_kernel.slice_by_index( + self._sorted_eid, self._indptr, index=u) def triples(self): """Return the sorted (u, v, eid) tuples. @@ -287,17 +295,11 @@ class Graph(object): []] """ - if nodes is None: - if return_eids: - return self.adj_src_index.v, self.adj_src_index.eid - else: - return self.adj_src_index.v + if return_eids: + return self.adj_src_index.view_v( + nodes), self.adj_src_index.view_eid(nodes) else: - if return_eids: - return self.adj_src_index.v[nodes], self.adj_src_index.eid[ - nodes] - else: - return self.adj_src_index.v[nodes] + return self.adj_src_index.view_v(nodes) def sample_successor(self, nodes, @@ -385,17 +387,11 @@ class Graph(object): [2]] """ - if nodes is None: - if return_eids: - return self.adj_dst_index.v, self.adj_dst_index.eid - else: - return self.adj_dst_index.v + if return_eids: + return self.adj_dst_index.view_v( + nodes), self.adj_dst_index.view_eid(nodes) else: - if return_eids: - return self.adj_dst_index.v[nodes], self.adj_dst_index.eid[ - nodes] - else: - return self.adj_dst_index.v[nodes] + return self.adj_dst_index.view_v(nodes) def sample_predecessor(self, nodes, diff --git a/pgl/graph_kernel.pyx b/pgl/graph_kernel.pyx index c69d4e920b0ea54717996b4dc99e8a954b94d9bd..adcd343edba32ca53e50c08e49f724b4044959df 100644 --- a/pgl/graph_kernel.pyx +++ b/pgl/graph_kernel.pyx @@ -53,14 +53,21 @@ def build_index(np.ndarray[np.int64_t, ndim=1] u, _tmp_eid[indptr[u[i]] + count[u[i]]] = i _tmp_u[indptr[u[i]] + count[u[i]]] = u[i] count[u[i]] += 1 + return degree, _tmp_v, _tmp_u, _tmp_eid, indptr - cdef list output_eid = [] - cdef list output_v = [] - for i in xrange(n_size): - output_eid.append(_tmp_eid[indptr[i]:indptr[i+1]]) - output_v.append(_tmp_v[indptr[i]:indptr[i+1]]) - return np.array(output_v), np.array(output_eid), degree, _tmp_u, _tmp_v, _tmp_eid - +@cython.boundscheck(False) +@cython.wraparound(False) +def slice_by_index(np.ndarray[np.int64_t, ndim=1] u, + np.ndarray[np.int64_t, ndim=1] indptr, + np.ndarray[np.int64_t, ndim=1] index): + cdef list output = [] + cdef long long i + cdef long long h = len(index) + cdef long long j + for i in xrange(h): + j = index[i] + output.append(u[indptr[j]:indptr[j+1]]) + return np.array(output) @cython.boundscheck(False) @cython.wraparound(False) @@ -253,22 +260,10 @@ def sample_subset_with_eid(list nids, list eids, long long maxdegree, shuffle=Fa @cython.boundscheck(False) @cython.wraparound(False) -def skip_gram_gen_pair(vector[long long] walk_path, long win_size=5): - """Return node paris generated by skip-gram algorithm. - - This function will auto remove the pair which src node is the same - as dst node. - - Args: - walk_path: List of nodes as a walk path. - win_size: the windows size used in skip-gram. - - Return: - A tuple of (src node list, dst node list). - """ +def skip_gram_gen_pair(vector[long long] walk, long win_size=5): cdef vector[long long] src cdef vector[long long] dst - cdef long long l = len(walk_path) + cdef long long l = len(walk) cdef long long real_win_size, left, right, i cdef np.ndarray[np.int64_t, ndim=1] rnd = np.random.randint(1, win_size+1, dtype=np.int64, size=l) @@ -282,23 +277,15 @@ def skip_gram_gen_pair(vector[long long] walk_path, long win_size=5): if right >= l: right = l - 1 for j in xrange(left, right+1): - if walk_path[i] == walk_path[j]: + if walk[i] == walk[j]: continue - src.push_back(walk_path[i]) - dst.push_back(walk_path[j]) + src.push_back(walk[i]) + dst.push_back(walk[j]) return src, dst @cython.boundscheck(False) @cython.wraparound(False) def alias_sample_build_table(np.ndarray[np.float64_t, ndim=1] probs): - """Return the alias table and event table for alias sampling. - - Args: - porobs: A list of float numbers as the probability. - - Return: - A tuple of (alias table, event table). - """ cdef long long l = len(probs) cdef np.ndarray[np.float64_t, ndim=1] alias = probs * l cdef np.ndarray[np.int64_t, ndim=1] events = np.zeros(l, dtype=np.int64)