未验证 提交 99d8ba47 编写于 作者: S Siming Dai 提交者: GitHub

[geometric] fix english doc (#46485) (#47317)

* fix geometric doc
上级 d5c6386c
...@@ -32,16 +32,15 @@ def segment_sum(data, segment_ids, name=None): ...@@ -32,16 +32,15 @@ def segment_sum(data, segment_ids, name=None):
Args: Args:
data (Tensor): A tensor, available data type float32, float64, int32, int64, float16. data (Tensor): A tensor, available data type float32, float64, int32, int64, float16.
segment_ids (Tensor): A 1-D tensor, which have the same size segment_ids (Tensor): A 1-D tensor, which have the same size
with the first dimension of input data. with the first dimension of input data.
Available data type is int32, int64. Available data type is int32, int64.
name (str, optional): Name for the operation (optional, default is None). name (str, optional): Name for the operation (optional, default is None).
For more information, please refer to :ref:`api_guide_Name`. For more information, please refer to :ref:`api_guide_Name`.
Returns: Returns:
output (Tensor): the reduced result. - output (Tensor), the reduced result.
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle import paddle
...@@ -54,29 +53,30 @@ def segment_sum(data, segment_ids, name=None): ...@@ -54,29 +53,30 @@ def segment_sum(data, segment_ids, name=None):
if in_dygraph_mode(): if in_dygraph_mode():
return _C_ops.segment_pool(data, segment_ids, "SUM")[0] return _C_ops.segment_pool(data, segment_ids, "SUM")[0]
if _in_legacy_dygraph(): if _in_legacy_dygraph():
out, tmp = _legacy_C_ops.segment_pool(data, segment_ids, 'pooltype', out, tmp = _legacy_C_ops.segment_pool(
"SUM") data, segment_ids, 'pooltype', "SUM"
)
return out return out
check_variable_and_dtype( check_variable_and_dtype(
data, "X", ("float32", "float64", "int32", "int64", "float16"), data,
"segment_pool") "X",
check_variable_and_dtype(segment_ids, "SegmentIds", ("int32", "int64"), ("float32", "float64", "int32", "int64", "float16"),
"segment_pool") "segment_pool",
)
check_variable_and_dtype(
segment_ids, "SegmentIds", ("int32", "int64"), "segment_pool"
)
helper = LayerHelper("segment_sum", **locals()) helper = LayerHelper("segment_sum", **locals())
out = helper.create_variable_for_type_inference(dtype=data.dtype) out = helper.create_variable_for_type_inference(dtype=data.dtype)
summed_ids = helper.create_variable_for_type_inference(dtype=data.dtype) summed_ids = helper.create_variable_for_type_inference(dtype=data.dtype)
helper.append_op(type="segment_pool", helper.append_op(
inputs={ type="segment_pool",
"X": data, inputs={"X": data, "SegmentIds": segment_ids},
"SegmentIds": segment_ids outputs={"Out": out, "SummedIds": summed_ids},
}, attrs={"pooltype": "SUM"},
outputs={ )
"Out": out,
"SummedIds": summed_ids
},
attrs={"pooltype": "SUM"})
return out return out
...@@ -84,7 +84,7 @@ def segment_mean(data, segment_ids, name=None): ...@@ -84,7 +84,7 @@ def segment_mean(data, segment_ids, name=None):
r""" r"""
Segment mean Operator. Segment mean Operator.
Ihis operator calculate the mean value of input `data` which This operator calculate the mean value of input `data` which
with the same index in `segment_ids`. with the same index in `segment_ids`.
It computes a tensor such that $out_i = \\frac{1}{n_i} \\sum_{j} data[j]$ It computes a tensor such that $out_i = \\frac{1}{n_i} \\sum_{j} data[j]$
where sum is over j such that 'segment_ids[j] == i' and $n_i$ is the number where sum is over j such that 'segment_ids[j] == i' and $n_i$ is the number
...@@ -92,17 +92,16 @@ def segment_mean(data, segment_ids, name=None): ...@@ -92,17 +92,16 @@ def segment_mean(data, segment_ids, name=None):
Args: Args:
data (tensor): a tensor, available data type float32, float64, int32, int64, float16. data (tensor): a tensor, available data type float32, float64, int32, int64, float16.
segment_ids (tensor): a 1-d tensor, which have the same size segment_ids (tensor): a 1-d tensor, which have the same size
with the first dimension of input data. with the first dimension of input data.
available data type is int32, int64. available data type is int32, int64.
name (str, optional): Name for the operation (optional, default is None). name (str, optional): Name for the operation (optional, default is None).
For more information, please refer to :ref:`api_guide_Name`. For more information, please refer to :ref:`api_guide_Name`.
Returns: Returns:
output (Tensor): the reduced result. - output (Tensor), the reduced result.
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle import paddle
...@@ -116,29 +115,30 @@ def segment_mean(data, segment_ids, name=None): ...@@ -116,29 +115,30 @@ def segment_mean(data, segment_ids, name=None):
if in_dygraph_mode(): if in_dygraph_mode():
return _C_ops.segment_pool(data, segment_ids, "MEAN")[0] return _C_ops.segment_pool(data, segment_ids, "MEAN")[0]
if _in_legacy_dygraph(): if _in_legacy_dygraph():
out, tmp = _legacy_C_ops.segment_pool(data, segment_ids, 'pooltype', out, tmp = _legacy_C_ops.segment_pool(
"MEAN") data, segment_ids, 'pooltype', "MEAN"
)
return out return out
check_variable_and_dtype( check_variable_and_dtype(
data, "X", ("float32", "float64", "int32", "int64", "float16"), data,
"segment_pool") "X",
check_variable_and_dtype(segment_ids, "SegmentIds", ("int32", "int64"), ("float32", "float64", "int32", "int64", "float16"),
"segment_pool") "segment_pool",
)
check_variable_and_dtype(
segment_ids, "SegmentIds", ("int32", "int64"), "segment_pool"
)
helper = LayerHelper("segment_mean", **locals()) helper = LayerHelper("segment_mean", **locals())
out = helper.create_variable_for_type_inference(dtype=data.dtype) out = helper.create_variable_for_type_inference(dtype=data.dtype)
summed_ids = helper.create_variable_for_type_inference(dtype=data.dtype) summed_ids = helper.create_variable_for_type_inference(dtype=data.dtype)
helper.append_op(type="segment_pool", helper.append_op(
inputs={ type="segment_pool",
"X": data, inputs={"X": data, "SegmentIds": segment_ids},
"SegmentIds": segment_ids outputs={"Out": out, "SummedIds": summed_ids},
}, attrs={"pooltype": "MEAN"},
outputs={ )
"Out": out,
"SummedIds": summed_ids
},
attrs={"pooltype": "MEAN"})
return out return out
...@@ -154,16 +154,15 @@ def segment_min(data, segment_ids, name=None): ...@@ -154,16 +154,15 @@ def segment_min(data, segment_ids, name=None):
Args: Args:
data (tensor): a tensor, available data type float32, float64, int32, int64, float16. data (tensor): a tensor, available data type float32, float64, int32, int64, float16.
segment_ids (tensor): a 1-d tensor, which have the same size segment_ids (tensor): a 1-d tensor, which have the same size
with the first dimension of input data. with the first dimension of input data.
available data type is int32, int64. available data type is int32, int64.
name (str, optional): Name for the operation (optional, default is None). name (str, optional): Name for the operation (optional, default is None).
For more information, please refer to :ref:`api_guide_Name`. For more information, please refer to :ref:`api_guide_Name`.
Returns: Returns:
output (Tensor): the reduced result. - output (Tensor), the reduced result.
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle import paddle
...@@ -177,29 +176,30 @@ def segment_min(data, segment_ids, name=None): ...@@ -177,29 +176,30 @@ def segment_min(data, segment_ids, name=None):
if in_dygraph_mode(): if in_dygraph_mode():
return _C_ops.segment_pool(data, segment_ids, "MIN")[0] return _C_ops.segment_pool(data, segment_ids, "MIN")[0]
if _in_legacy_dygraph(): if _in_legacy_dygraph():
out, tmp = _legacy_C_ops.segment_pool(data, segment_ids, 'pooltype', out, tmp = _legacy_C_ops.segment_pool(
"MIN") data, segment_ids, 'pooltype', "MIN"
)
return out return out
check_variable_and_dtype( check_variable_and_dtype(
data, "X", ("float32", "float64", "int32", "int64", "float16"), data,
"segment_pool") "X",
check_variable_and_dtype(segment_ids, "SegmentIds", ("int32", "int64"), ("float32", "float64", "int32", "int64", "float16"),
"segment_pool") "segment_pool",
)
check_variable_and_dtype(
segment_ids, "SegmentIds", ("int32", "int64"), "segment_pool"
)
helper = LayerHelper("segment_min", **locals()) helper = LayerHelper("segment_min", **locals())
out = helper.create_variable_for_type_inference(dtype=data.dtype) out = helper.create_variable_for_type_inference(dtype=data.dtype)
summed_ids = helper.create_variable_for_type_inference(dtype=data.dtype) summed_ids = helper.create_variable_for_type_inference(dtype=data.dtype)
helper.append_op(type="segment_pool", helper.append_op(
inputs={ type="segment_pool",
"X": data, inputs={"X": data, "SegmentIds": segment_ids},
"SegmentIds": segment_ids outputs={"Out": out, "SummedIds": summed_ids},
}, attrs={"pooltype": "MIN"},
outputs={ )
"Out": out,
"SummedIds": summed_ids
},
attrs={"pooltype": "MIN"})
return out return out
...@@ -215,16 +215,15 @@ def segment_max(data, segment_ids, name=None): ...@@ -215,16 +215,15 @@ def segment_max(data, segment_ids, name=None):
Args: Args:
data (tensor): a tensor, available data type float32, float64, int32, int64, float16. data (tensor): a tensor, available data type float32, float64, int32, int64, float16.
segment_ids (tensor): a 1-d tensor, which have the same size segment_ids (tensor): a 1-d tensor, which have the same size
with the first dimension of input data. with the first dimension of input data.
available data type is int32, int64. available data type is int32, int64.
name (str, optional): Name for the operation (optional, default is None). name (str, optional): Name for the operation (optional, default is None).
For more information, please refer to :ref:`api_guide_Name`. For more information, please refer to :ref:`api_guide_Name`.
Returns: Returns:
output (Tensor): the reduced result. - output (Tensor), the reduced result.
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle import paddle
...@@ -238,27 +237,28 @@ def segment_max(data, segment_ids, name=None): ...@@ -238,27 +237,28 @@ def segment_max(data, segment_ids, name=None):
if in_dygraph_mode(): if in_dygraph_mode():
return _C_ops.segment_pool(data, segment_ids, "MAX")[0] return _C_ops.segment_pool(data, segment_ids, "MAX")[0]
if _in_legacy_dygraph(): if _in_legacy_dygraph():
out, tmp = _legacy_C_ops.segment_pool(data, segment_ids, 'pooltype', out, tmp = _legacy_C_ops.segment_pool(
"MAX") data, segment_ids, 'pooltype', "MAX"
)
return out return out
check_variable_and_dtype( check_variable_and_dtype(
data, "X", ("float32", "float64", "int32", "int64", "float16"), data,
"segment_pool") "X",
check_variable_and_dtype(segment_ids, "SegmentIds", ("int32", "int64"), ("float32", "float64", "int32", "int64", "float16"),
"segment_pool") "segment_pool",
)
check_variable_and_dtype(
segment_ids, "SegmentIds", ("int32", "int64"), "segment_pool"
)
helper = LayerHelper("segment_max", **locals()) helper = LayerHelper("segment_max", **locals())
out = helper.create_variable_for_type_inference(dtype=data.dtype) out = helper.create_variable_for_type_inference(dtype=data.dtype)
summed_ids = helper.create_variable_for_type_inference(dtype=data.dtype) summed_ids = helper.create_variable_for_type_inference(dtype=data.dtype)
helper.append_op(type="segment_pool", helper.append_op(
inputs={ type="segment_pool",
"X": data, inputs={"X": data, "SegmentIds": segment_ids},
"SegmentIds": segment_ids outputs={"Out": out, "SummedIds": summed_ids},
}, attrs={"pooltype": "MAX"},
outputs={ )
"Out": out,
"SummedIds": summed_ids
},
attrs={"pooltype": "MAX"})
return out return out
...@@ -22,161 +22,144 @@ from paddle import _C_ops, _legacy_C_ops ...@@ -22,161 +22,144 @@ from paddle import _C_ops, _legacy_C_ops
__all__ = [] __all__ = []
def reindex_graph(x, def reindex_graph(
neighbors, x, neighbors, count, value_buffer=None, index_buffer=None, name=None
count, ):
value_buffer=None,
index_buffer=None,
name=None):
""" """
Reindex Graph API. Reindex Graph API.
This API is mainly used in Graph Learning domain, which should be used This API is mainly used in Graph Learning domain, which should be used
in conjunction with `graph_sample_neighbors` API. And the main purpose in conjunction with `paddle.geometric.sample_neighbors` API. And the main purpose
is to reindex the ids information of the input nodes, and return the is to reindex the ids information of the input nodes, and return the
corresponding graph edges after reindex. corresponding graph edges after reindex.
**Notes**: Take input nodes x = [0, 1, 2] as an example. If we have neighbors = [8, 9, 0, 4, 7, 6, 7], and count = [2, 3, 2],
The number in x should be unique, otherwise it would cause potential errors. then we know that the neighbors of 0 is [8, 9], the neighbors of 1 is [0, 4, 7], and the neighbors of 2 is [6, 7].
We will reindex all the nodes from 0. Then after graph_reindex, we will have 3 different outputs: reindex_src: [3, 4, 0, 5, 6, 7, 6], reindex_dst: [0, 0, 1, 1, 1, 2, 2]
and out_nodes: [0, 1, 2, 8, 9, 4, 7, 6]. We can see that the numbers in `reindex_src` and `reindex_dst` is the corresponding index
Take input nodes x = [0, 1, 2] as an example.
If we have neighbors = [8, 9, 0, 4, 7, 6, 7], and count = [2, 3, 2],
then we know that the neighbors of 0 is [8, 9], the neighbors of 1
is [0, 4, 7], and the neighbors of 2 is [6, 7].
Then after graph_reindex, we will have 3 different outputs:
1. reindex_src: [3, 4, 0, 5, 6, 7, 6]
2. reindex_dst: [0, 0, 1, 1, 1, 2, 2]
3. out_nodes: [0, 1, 2, 8, 9, 4, 7, 6]
We can see that the numbers in `reindex_src` and `reindex_dst` is the corresponding index
of nodes in `out_nodes`. of nodes in `out_nodes`.
Note:
The number in x should be unique, otherwise it would cause potential errors. We will reindex all the nodes from 0.
Args: Args:
x (Tensor): The input nodes which we sample neighbors for. The available x (Tensor): The input nodes which we sample neighbors for. The available
data type is int32, int64. data type is int32, int64.
neighbors (Tensor): The neighbors of the input nodes `x`. The data type neighbors (Tensor): The neighbors of the input nodes `x`. The data type
should be the same with `x`. should be the same with `x`.
count (Tensor): The neighbor count of the input nodes `x`. And the count (Tensor): The neighbor count of the input nodes `x`. And the
data type should be int32. data type should be int32.
value_buffer (Tensor|None): Value buffer for hashtable. The data type should be int32, value_buffer (Tensor|None): Value buffer for hashtable. The data type should be int32,
and should be filled with -1. Only useful for gpu version. and should be filled with -1. Only useful for gpu version.
index_buffer (Tensor|None): Index buffer for hashtable. The data type should be int32, index_buffer (Tensor|None): Index buffer for hashtable. The data type should be int32,
and should be filled with -1. Only useful for gpu version. and should be filled with -1. Only useful for gpu version.
`value_buffer` and `index_buffer` should be both not None `value_buffer` and `index_buffer` should be both not None
if you want to speed up by using hashtable buffer. if you want to speed up by using hashtable buffer.
name (str, optional): Name for the operation (optional, default is None). name (str, optional): Name for the operation (optional, default is None).
For more information, please refer to :ref:`api_guide_Name`. For more information, please refer to :ref:`api_guide_Name`.
Returns: Returns:
reindex_src (Tensor): The source node index of graph edges after reindex. - reindex_src (Tensor), the source node index of graph edges after reindex.
reindex_dst (Tensor): The destination node index of graph edges after reindex.
out_nodes (Tensor): The index of unique input nodes and neighbors before reindex,
where we put the input nodes `x` in the front, and put neighbor
nodes in the back.
Examples: - reindex_dst (Tensor), the destination node index of graph edges after reindex.
.. code-block:: python
import paddle - out_nodes (Tensor), the index of unique input nodes and neighbors before reindex, where we put the input nodes `x` in the front, and put neighbor nodes in the back.
x = [0, 1, 2] Examples:
neighbors = [8, 9, 0, 4, 7, 6, 7] .. code-block:: python
count = [2, 3, 2]
x = paddle.to_tensor(x, dtype="int64")
neighbors = paddle.to_tensor(neighbors, dtype="int64")
count = paddle.to_tensor(count, dtype="int32")
reindex_src, reindex_dst, out_nodes = \ import paddle
paddle.geometric.reindex_graph(x, neighbors, count) x = [0, 1, 2]
# reindex_src: [3, 4, 0, 5, 6, 7, 6] neighbors = [8, 9, 0, 4, 7, 6, 7]
# reindex_dst: [0, 0, 1, 1, 1, 2, 2] count = [2, 3, 2]
# out_nodes: [0, 1, 2, 8, 9, 4, 7, 6] x = paddle.to_tensor(x, dtype="int64")
neighbors = paddle.to_tensor(neighbors, dtype="int64")
count = paddle.to_tensor(count, dtype="int32")
reindex_src, reindex_dst, out_nodes = paddle.geometric.reindex_graph(x, neighbors, count)
# reindex_src: [3, 4, 0, 5, 6, 7, 6]
# reindex_dst: [0, 0, 1, 1, 1, 2, 2]
# out_nodes: [0, 1, 2, 8, 9, 4, 7, 6]
""" """
use_buffer_hashtable = True if value_buffer is not None \ use_buffer_hashtable = (
and index_buffer is not None else False True if value_buffer is not None and index_buffer is not None else False
)
if _non_static_mode(): if _non_static_mode():
reindex_src, reindex_dst, out_nodes = \ reindex_src, reindex_dst, out_nodes = _legacy_C_ops.graph_reindex(
_legacy_C_ops.graph_reindex(x, neighbors, count, value_buffer, index_buffer, x,
"flag_buffer_hashtable", use_buffer_hashtable) neighbors,
count,
value_buffer,
index_buffer,
"flag_buffer_hashtable",
use_buffer_hashtable,
)
return reindex_src, reindex_dst, out_nodes return reindex_src, reindex_dst, out_nodes
check_variable_and_dtype(x, "X", ("int32", "int64"), "graph_reindex") check_variable_and_dtype(x, "X", ("int32", "int64"), "graph_reindex")
check_variable_and_dtype(neighbors, "Neighbors", ("int32", "int64"), check_variable_and_dtype(
"graph_reindex") neighbors, "Neighbors", ("int32", "int64"), "graph_reindex"
)
check_variable_and_dtype(count, "Count", ("int32"), "graph_reindex") check_variable_and_dtype(count, "Count", ("int32"), "graph_reindex")
if use_buffer_hashtable: if use_buffer_hashtable:
check_variable_and_dtype(value_buffer, "HashTable_Value", ("int32"), check_variable_and_dtype(
"graph_reindex") value_buffer, "HashTable_Value", ("int32"), "graph_reindex"
check_variable_and_dtype(index_buffer, "HashTable_Index", ("int32"), )
"graph_reindex") check_variable_and_dtype(
index_buffer, "HashTable_Index", ("int32"), "graph_reindex"
)
helper = LayerHelper("reindex_graph", **locals()) helper = LayerHelper("reindex_graph", **locals())
reindex_src = helper.create_variable_for_type_inference(dtype=x.dtype) reindex_src = helper.create_variable_for_type_inference(dtype=x.dtype)
reindex_dst = helper.create_variable_for_type_inference(dtype=x.dtype) reindex_dst = helper.create_variable_for_type_inference(dtype=x.dtype)
out_nodes = helper.create_variable_for_type_inference(dtype=x.dtype) out_nodes = helper.create_variable_for_type_inference(dtype=x.dtype)
helper.append_op(type="graph_reindex", helper.append_op(
inputs={ type="graph_reindex",
"X": inputs={
x, "X": x,
"Neighbors": "Neighbors": neighbors,
neighbors, "Count": count,
"Count": "HashTable_Value": value_buffer if use_buffer_hashtable else None,
count, "HashTable_Index": index_buffer if use_buffer_hashtable else None,
"HashTable_Value": },
value_buffer if use_buffer_hashtable else None, outputs={
"HashTable_Index": "Reindex_Src": reindex_src,
index_buffer if use_buffer_hashtable else None, "Reindex_Dst": reindex_dst,
}, "Out_Nodes": out_nodes,
outputs={ },
"Reindex_Src": reindex_src, attrs={"flag_buffer_hashtable": use_buffer_hashtable},
"Reindex_Dst": reindex_dst, )
"Out_Nodes": out_nodes
},
attrs={"flag_buffer_hashtable": use_buffer_hashtable})
return reindex_src, reindex_dst, out_nodes return reindex_src, reindex_dst, out_nodes
def reindex_heter_graph(x, def reindex_heter_graph(
neighbors, x, neighbors, count, value_buffer=None, index_buffer=None, name=None
count, ):
value_buffer=None,
index_buffer=None,
name=None):
""" """
Reindex HeterGraph API. Reindex HeterGraph API.
This API is mainly used in Graph Learning domain, which should be used This API is mainly used in Graph Learning domain, which should be used
in conjunction with `graph_sample_neighbors` API. And the main purpose in conjunction with `paddle.geometric.sample_neighbors` API. And the main purpose
is to reindex the ids information of the input nodes, and return the is to reindex the ids information of the input nodes, and return the
corresponding graph edges after reindex. corresponding graph edges after reindex.
**Notes**: Take input nodes x = [0, 1, 2] as an example. For graph A, suppose we have neighbors = [8, 9, 0, 4, 7, 6, 7], and count = [2, 3, 2],
The number in x should be unique, otherwise it would cause potential errors. then we know that the neighbors of 0 is [8, 9], the neighbors of 1 is [0, 4, 7], and the neighbors of 2 is [6, 7]. For graph B,
We support multi-edge-types neighbors reindexing in reindex_heter_graph api. suppose we have neighbors = [0, 2, 3, 5, 1], and count = [1, 3, 1], then we know that the neighbors of 0 is [0], the neighbors of 1 is [2, 3, 5],
We will reindex all the nodes from 0. and the neighbors of 3 is [1]. We will get following outputs: reindex_src: [3, 4, 0, 5, 6, 7, 6, 0, 2, 8, 9, 1], reindex_dst: [0, 0, 1, 1, 1, 2, 2, 0, 1, 1, 1, 2]
and out_nodes: [0, 1, 2, 8, 9, 4, 7, 6, 3, 5].
Take input nodes x = [0, 1, 2] as an example.
For graph A, suppose we have neighbors = [8, 9, 0, 4, 7, 6, 7], and count = [2, 3, 2], Note:
then we know that the neighbors of 0 is [8, 9], the neighbors of 1 The number in x should be unique, otherwise it would cause potential errors. We support multi-edge-types neighbors reindexing in reindex_heter_graph api. We will reindex all the nodes from 0.
is [0, 4, 7], and the neighbors of 2 is [6, 7].
For graph B, suppose we have neighbors = [0, 2, 3, 5, 1], and count = [1, 3, 1],
then we know that the neighbors of 0 is [0], the neighbors of 1 is [2, 3, 5],
and the neighbors of 3 is [1].
We will get following outputs:
1. reindex_src: [3, 4, 0, 5, 6, 7, 6, 0, 2, 8, 9, 1]
2. reindex_dst: [0, 0, 1, 1, 1, 2, 2, 0, 1, 1, 1, 2]
3. out_nodes: [0, 1, 2, 8, 9, 4, 7, 6, 3, 5]
Args: Args:
x (Tensor): The input nodes which we sample neighbors for. The available x (Tensor): The input nodes which we sample neighbors for. The available
data type is int32, int64. data type is int32, int64.
neighbors (list|tuple): The neighbors of the input nodes `x` from different graphs. neighbors (list|tuple): The neighbors of the input nodes `x` from different graphs.
The data type should be the same with `x`. The data type should be the same with `x`.
count (list|tuple): The neighbor counts of the input nodes `x` from different graphs. count (list|tuple): The neighbor counts of the input nodes `x` from different graphs.
And the data type should be int32. And the data type should be int32.
value_buffer (Tensor|None): Value buffer for hashtable. The data type should be int32, value_buffer (Tensor|None): Value buffer for hashtable. The data type should be int32,
and should be filled with -1. Only useful for gpu version. and should be filled with -1. Only useful for gpu version.
...@@ -188,48 +171,52 @@ def reindex_heter_graph(x, ...@@ -188,48 +171,52 @@ def reindex_heter_graph(x,
For more information, please refer to :ref:`api_guide_Name`. For more information, please refer to :ref:`api_guide_Name`.
Returns: Returns:
reindex_src (Tensor): The source node index of graph edges after reindex. - reindex_src (Tensor), the source node index of graph edges after reindex.
reindex_dst (Tensor): The destination node index of graph edges after reindex.
out_nodes (Tensor): The index of unique input nodes and neighbors before reindex,
where we put the input nodes `x` in the front, and put neighbor
nodes in the back.
Examples:
.. code-block:: python - reindex_dst (Tensor), the destination node index of graph edges after reindex.
import paddle - out_nodes (Tensor), the index of unique input nodes and neighbors before reindex,
where we put the input nodes `x` in the front, and put neighbor
nodes in the back.
x = [0, 1, 2] Examples:
neighbors_a = [8, 9, 0, 4, 7, 6, 7] .. code-block:: python
count_a = [2, 3, 2]
x = paddle.to_tensor(x, dtype="int64")
neighbors_a = paddle.to_tensor(neighbors_a, dtype="int64")
count_a = paddle.to_tensor(count_a, dtype="int32")
neighbors_b = [0, 2, 3, 5, 1]
count_b = [1, 3, 1]
neighbors_b = paddle.to_tensor(neighbors_b, dtype="int64")
count_b = paddle.to_tensor(count_b, dtype="int32")
neighbors = [neighbors_a, neighbors_b] import paddle
count = [count_a, count_b] x = [0, 1, 2]
reindex_src, reindex_dst, out_nodes = \ neighbors_a = [8, 9, 0, 4, 7, 6, 7]
paddle.geometric.reindex_heter_graph(x, neighbors, count) count_a = [2, 3, 2]
# reindex_src: [3, 4, 0, 5, 6, 7, 6, 0, 2, 8, 9, 1] x = paddle.to_tensor(x, dtype="int64")
# reindex_dst: [0, 0, 1, 1, 1, 2, 2, 0, 1, 1, 1, 2] neighbors_a = paddle.to_tensor(neighbors_a, dtype="int64")
# out_nodes: [0, 1, 2, 8, 9, 4, 7, 6, 3, 5] count_a = paddle.to_tensor(count_a, dtype="int32")
neighbors_b = [0, 2, 3, 5, 1]
count_b = [1, 3, 1]
neighbors_b = paddle.to_tensor(neighbors_b, dtype="int64")
count_b = paddle.to_tensor(count_b, dtype="int32")
neighbors = [neighbors_a, neighbors_b]
count = [count_a, count_b]
reindex_src, reindex_dst, out_nodes = paddle.geometric.reindex_heter_graph(x, neighbors, count)
# reindex_src: [3, 4, 0, 5, 6, 7, 6, 0, 2, 8, 9, 1]
# reindex_dst: [0, 0, 1, 1, 1, 2, 2, 0, 1, 1, 1, 2]
# out_nodes: [0, 1, 2, 8, 9, 4, 7, 6, 3, 5]
""" """
use_buffer_hashtable = True if value_buffer is not None \ use_buffer_hashtable = (
and index_buffer is not None else False True if value_buffer is not None and index_buffer is not None else False
)
if _non_static_mode(): if _non_static_mode():
neighbors = paddle.concat(neighbors, axis=0) neighbors = paddle.concat(neighbors, axis=0)
count = paddle.concat(count, axis=0) count = paddle.concat(count, axis=0)
reindex_src, reindex_dst, out_nodes = \ reindex_src, reindex_dst, out_nodes = _legacy_C_ops.graph_reindex(
_legacy_C_ops.graph_reindex(x, neighbors, count, value_buffer, index_buffer, x,
"flag_buffer_hashtable", use_buffer_hashtable) neighbors,
count,
value_buffer,
index_buffer,
"flag_buffer_hashtable",
use_buffer_hashtable,
)
return reindex_src, reindex_dst, out_nodes return reindex_src, reindex_dst, out_nodes
if isinstance(neighbors, Variable): if isinstance(neighbors, Variable):
...@@ -241,15 +228,18 @@ def reindex_heter_graph(x, ...@@ -241,15 +228,18 @@ def reindex_heter_graph(x,
count = paddle.concat(count, axis=0) count = paddle.concat(count, axis=0)
check_variable_and_dtype(x, "X", ("int32", "int64"), "heter_graph_reindex") check_variable_and_dtype(x, "X", ("int32", "int64"), "heter_graph_reindex")
check_variable_and_dtype(neighbors, "Neighbors", ("int32", "int64"), check_variable_and_dtype(
"graph_reindex") neighbors, "Neighbors", ("int32", "int64"), "graph_reindex"
)
check_variable_and_dtype(count, "Count", ("int32"), "graph_reindex") check_variable_and_dtype(count, "Count", ("int32"), "graph_reindex")
if use_buffer_hashtable: if use_buffer_hashtable:
check_variable_and_dtype(value_buffer, "HashTable_Value", ("int32"), check_variable_and_dtype(
"graph_reindex") value_buffer, "HashTable_Value", ("int32"), "graph_reindex"
check_variable_and_dtype(index_buffer, "HashTable_Index", ("int32"), )
"graph_reindex") check_variable_and_dtype(
index_buffer, "HashTable_Index", ("int32"), "graph_reindex"
)
helper = LayerHelper("reindex_heter_graph", **locals()) helper = LayerHelper("reindex_heter_graph", **locals())
reindex_src = helper.create_variable_for_type_inference(dtype=x.dtype) reindex_src = helper.create_variable_for_type_inference(dtype=x.dtype)
...@@ -257,23 +247,20 @@ def reindex_heter_graph(x, ...@@ -257,23 +247,20 @@ def reindex_heter_graph(x,
out_nodes = helper.create_variable_for_type_inference(dtype=x.dtype) out_nodes = helper.create_variable_for_type_inference(dtype=x.dtype)
neighbors = paddle.concat(neighbors, axis=0) neighbors = paddle.concat(neighbors, axis=0)
count = paddle.concat(count, axis=0) count = paddle.concat(count, axis=0)
helper.append_op(type="graph_reindex", helper.append_op(
inputs={ type="graph_reindex",
"X": inputs={
x, "X": x,
"Neighbors": "Neighbors": neighbors,
neighbors, "Count": count,
"Count": "HashTable_Value": value_buffer if use_buffer_hashtable else None,
count, "HashTable_Index": index_buffer if use_buffer_hashtable else None,
"HashTable_Value": },
value_buffer if use_buffer_hashtable else None, outputs={
"HashTable_Index": "Reindex_Src": reindex_src,
index_buffer if use_buffer_hashtable else None, "Reindex_Dst": reindex_dst,
}, "Out_Nodes": out_nodes,
outputs={ },
"Reindex_Src": reindex_src, attrs={"flag_buffer_hashtable": use_buffer_hashtable},
"Reindex_Dst": reindex_dst, )
"Out_Nodes": out_nodes
},
attrs={"flag_buffer_hashtable": use_buffer_hashtable})
return reindex_src, reindex_dst, out_nodes return reindex_src, reindex_dst, out_nodes
...@@ -21,25 +21,27 @@ from paddle import _C_ops, _legacy_C_ops ...@@ -21,25 +21,27 @@ from paddle import _C_ops, _legacy_C_ops
__all__ = [] __all__ = []
def sample_neighbors(row, def sample_neighbors(
colptr, row,
input_nodes, colptr,
sample_size=-1, input_nodes,
eids=None, sample_size=-1,
return_eids=False, eids=None,
perm_buffer=None, return_eids=False,
name=None): perm_buffer=None,
name=None,
):
""" """
Graph Sample Neighbors API. Graph Sample Neighbors API.
This API is mainly used in Graph Learning domain, and the main purpose is to This API is mainly used in Graph Learning domain, and the main purpose is to
provide high performance of graph sampling method. For example, we get the provide high performance of graph sampling method. For example, we get the
CSC(Compressed Sparse Column) format of the input graph edges as `row` and CSC(Compressed Sparse Column) format of the input graph edges as `row` and
`colptr`, so as to convert graph data into a suitable format for sampling. `colptr`, so as to convert graph data into a suitable format for sampling.
`input_nodes` means the nodes we need to sample neighbors, and `sample_sizes` `input_nodes` means the nodes we need to sample neighbors, and `sample_sizes`
means the number of neighbors and number of layers we want to sample. means the number of neighbors and number of layers we want to sample.
Besides, we support fisher-yates sampling in GPU version. Besides, we support fisher-yates sampling in GPU version.
Args: Args:
row (Tensor): One of the components of the CSC format of the input graph, and row (Tensor): One of the components of the CSC format of the input graph, and
...@@ -50,10 +52,10 @@ def sample_neighbors(row, ...@@ -50,10 +52,10 @@ def sample_neighbors(row,
The data type should be the same with `row`. The data type should be the same with `row`.
input_nodes (Tensor): The input nodes we need to sample neighbors for, and the input_nodes (Tensor): The input nodes we need to sample neighbors for, and the
data type should be the same with `row`. data type should be the same with `row`.
sample_size (int): The number of neighbors we need to sample. Default value is -1, sample_size (int): The number of neighbors we need to sample. Default value is -1,
which means returning all the neighbors of the input nodes. which means returning all the neighbors of the input nodes.
eids (Tensor): The eid information of the input graph. If return_eids is True, eids (Tensor): The eid information of the input graph. If return_eids is True,
then `eids` should not be None. The data type should be the then `eids` should not be None. The data type should be the
same with `row`. Default is None. same with `row`. Default is None.
return_eids (bool): Whether to return eid information of sample edges. Default is False. return_eids (bool): Whether to return eid information of sample edges. Default is False.
perm_buffer (Tensor): Permutation buffer for fisher-yates sampling. If `use_perm_buffer` perm_buffer (Tensor): Permutation buffer for fisher-yates sampling. If `use_perm_buffer`
...@@ -64,81 +66,106 @@ def sample_neighbors(row, ...@@ -64,81 +66,106 @@ def sample_neighbors(row,
For more information, please refer to :ref:`api_guide_Name`. For more information, please refer to :ref:`api_guide_Name`.
Returns: Returns:
out_neighbors (Tensor): The sample neighbors of the input nodes. - out_neighbors (Tensor), the sample neighbors of the input nodes.
out_count (Tensor): The number of sampling neighbors of each input node, and the shape
should be the same with `input_nodes`. - out_count (Tensor), the number of sampling neighbors of each input node, and the shape
out_eids (Tensor): If `return_eids` is True, we will return the eid information of the should be the same with `input_nodes`.
sample edges.
- out_eids (Tensor), if `return_eids` is True, we will return the eid information of the
sample edges.
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle
# edges: (3, 0), (7, 0), (0, 1), (9, 1), (1, 2), (4, 3), (2, 4), import paddle
# (9, 5), (3, 5), (9, 6), (1, 6), (9, 8), (7, 8) # edges: (3, 0), (7, 0), (0, 1), (9, 1), (1, 2), (4, 3), (2, 4),
row = [3, 7, 0, 9, 1, 4, 2, 9, 3, 9, 1, 9, 7] # (9, 5), (3, 5), (9, 6), (1, 6), (9, 8), (7, 8)
colptr = [0, 2, 4, 5, 6, 7, 9, 11, 11, 13, 13] row = [3, 7, 0, 9, 1, 4, 2, 9, 3, 9, 1, 9, 7]
nodes = [0, 8, 1, 2] colptr = [0, 2, 4, 5, 6, 7, 9, 11, 11, 13, 13]
sample_size = 2 nodes = [0, 8, 1, 2]
row = paddle.to_tensor(row, dtype="int64") sample_size = 2
colptr = paddle.to_tensor(colptr, dtype="int64") row = paddle.to_tensor(row, dtype="int64")
nodes = paddle.to_tensor(nodes, dtype="int64") colptr = paddle.to_tensor(colptr, dtype="int64")
out_neighbors, out_count = \ nodes = paddle.to_tensor(nodes, dtype="int64")
paddle.geometric.sample_neighbors(row, colptr, nodes, out_neighbors, out_count = paddle.geometric.sample_neighbors(row, colptr, nodes, sample_size=sample_size)
sample_size=sample_size)
""" """
if return_eids: if return_eids:
if eids is None: if eids is None:
raise ValueError( raise ValueError(
f"`eids` should not be None if `return_eids` is True.") f"`eids` should not be None if `return_eids` is True."
)
use_perm_buffer = True if perm_buffer is not None else False use_perm_buffer = True if perm_buffer is not None else False
if _non_static_mode(): if _non_static_mode():
out_neighbors, out_count, out_eids = _legacy_C_ops.graph_sample_neighbors( (
row, colptr, input_nodes, eids, perm_buffer, "sample_size", out_neighbors,
sample_size, "return_eids", return_eids, "flag_perm_buffer", out_count,
use_perm_buffer) out_eids,
) = _legacy_C_ops.graph_sample_neighbors(
row,
colptr,
input_nodes,
eids,
perm_buffer,
"sample_size",
sample_size,
"return_eids",
return_eids,
"flag_perm_buffer",
use_perm_buffer,
)
if return_eids: if return_eids:
return out_neighbors, out_count, out_eids return out_neighbors, out_count, out_eids
return out_neighbors, out_count return out_neighbors, out_count
check_variable_and_dtype(row, "Row", ("int32", "int64"), check_variable_and_dtype(
"graph_sample_neighbors") row, "Row", ("int32", "int64"), "graph_sample_neighbors"
check_variable_and_dtype(colptr, "Col_Ptr", ("int32", "int64"), )
"graph_sample_neighbors") check_variable_and_dtype(
check_variable_and_dtype(input_nodes, "X", ("int32", "int64"), colptr, "Col_Ptr", ("int32", "int64"), "graph_sample_neighbors"
"graph_sample_neighbors") )
check_variable_and_dtype(
input_nodes, "X", ("int32", "int64"), "graph_sample_neighbors"
)
if return_eids: if return_eids:
check_variable_and_dtype(eids, "Eids", ("int32", "int64"), check_variable_and_dtype(
"graph_sample_neighbors") eids, "Eids", ("int32", "int64"), "graph_sample_neighbors"
)
if use_perm_buffer: if use_perm_buffer:
check_variable_and_dtype(perm_buffer, "Perm_Buffer", ("int32", "int64"), check_variable_and_dtype(
"graph_sample_neighbors") perm_buffer,
"Perm_Buffer",
("int32", "int64"),
"graph_sample_neighbors",
)
helper = LayerHelper("sample_neighbors", **locals()) helper = LayerHelper("sample_neighbors", **locals())
out_neighbors = helper.create_variable_for_type_inference(dtype=row.dtype) out_neighbors = helper.create_variable_for_type_inference(dtype=row.dtype)
out_count = helper.create_variable_for_type_inference(dtype=row.dtype) out_count = helper.create_variable_for_type_inference(dtype=row.dtype)
out_eids = helper.create_variable_for_type_inference(dtype=row.dtype) out_eids = helper.create_variable_for_type_inference(dtype=row.dtype)
helper.append_op(type="graph_sample_neighbors", helper.append_op(
inputs={ type="graph_sample_neighbors",
"Row": row, inputs={
"Col_Ptr": colptr, "Row": row,
"X": input_nodes, "Col_Ptr": colptr,
"Eids": eids if return_eids else None, "X": input_nodes,
"Perm_Buffer": perm_buffer if use_perm_buffer else None "Eids": eids if return_eids else None,
}, "Perm_Buffer": perm_buffer if use_perm_buffer else None,
outputs={ },
"Out": out_neighbors, outputs={
"Out_Count": out_count, "Out": out_neighbors,
"Out_Eids": out_eids "Out_Count": out_count,
}, "Out_Eids": out_eids,
attrs={ },
"sample_size": sample_size, attrs={
"return_eids": return_eids, "sample_size": sample_size,
"flag_perm_buffer": use_perm_buffer "return_eids": return_eids,
}) "flag_perm_buffer": use_perm_buffer,
},
)
if return_eids: if return_eids:
return out_neighbors, out_count, out_eids return out_neighbors, out_count, out_eids
return out_neighbors, out_count return out_neighbors, out_count
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册