Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
54145604
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
54145604
编写于
7月 11, 2020
作者:
M
mindspore-ci-bot
提交者:
Gitee
7月 11, 2020
浏览文件
操作
浏览文件
下载
差异文件
!2934 GraphData support random_walk and get_edge_feature
Merge pull request !2934 from heleiwang/gnn_july
上级
95e75c31
7a046a1d
变更
11
隐藏空白更改
内联
并排
Showing
11 changed file
with
240 addition
and
77 deletion
+240
-77
mindspore/ccsrc/dataset/api/python_bindings.cc
mindspore/ccsrc/dataset/api/python_bindings.cc
+6
-0
mindspore/ccsrc/dataset/engine/gnn/graph.cc
mindspore/ccsrc/dataset/engine/gnn/graph.cc
+101
-48
mindspore/ccsrc/dataset/engine/gnn/graph.h
mindspore/ccsrc/dataset/engine/gnn/graph.h
+16
-1
mindspore/ccsrc/dataset/engine/gnn/graph_loader.cc
mindspore/ccsrc/dataset/engine/gnn/graph_loader.cc
+18
-12
mindspore/ccsrc/dataset/engine/gnn/graph_loader.h
mindspore/ccsrc/dataset/engine/gnn/graph_loader.h
+10
-8
mindspore/dataset/engine/graphdata.py
mindspore/dataset/engine/graphdata.py
+38
-2
mindspore/dataset/engine/validators.py
mindspore/dataset/engine/validators.py
+23
-1
tests/ut/cpp/dataset/gnn_graph_test.cc
tests/ut/cpp/dataset/gnn_graph_test.cc
+14
-2
tests/ut/data/mindrecord/testGraphData/testdata
tests/ut/data/mindrecord/testGraphData/testdata
+0
-0
tests/ut/data/mindrecord/testGraphData/testdata.db
tests/ut/data/mindrecord/testGraphData/testdata.db
+0
-0
tests/ut/python/dataset/test_graphdata.py
tests/ut/python/dataset/test_graphdata.py
+14
-3
未找到文件。
mindspore/ccsrc/dataset/api/python_bindings.cc
浏览文件 @
54145604
...
...
@@ -833,6 +833,12 @@ void bindGraphData(py::module *m) {
THROW_IF_ERROR
(
g
.
GetNodeFeature
(
node_list
,
feature_types
,
&
out
));
return
out
.
getRow
();
})
.
def
(
"get_edge_feature"
,
[](
gnn
::
Graph
&
g
,
std
::
shared_ptr
<
Tensor
>
edge_list
,
std
::
vector
<
gnn
::
FeatureType
>
feature_types
)
{
TensorRow
out
;
THROW_IF_ERROR
(
g
.
GetEdgeFeature
(
edge_list
,
feature_types
,
&
out
));
return
out
.
getRow
();
})
.
def
(
"graph_info"
,
[](
gnn
::
Graph
&
g
)
{
py
::
dict
out
;
...
...
mindspore/ccsrc/dataset/engine/gnn/graph.cc
浏览文件 @
54145604
...
...
@@ -125,13 +125,8 @@ Status Graph::GetNodesFromEdges(const std::vector<EdgeIdType> &edge_list, std::s
Status
Graph
::
GetAllNeighbors
(
const
std
::
vector
<
NodeIdType
>
&
node_list
,
NodeType
neighbor_type
,
std
::
shared_ptr
<
Tensor
>
*
out
)
{
if
(
node_list
.
empty
())
{
RETURN_STATUS_UNEXPECTED
(
"Input node_list is empty."
);
}
if
(
node_type_map_
.
find
(
neighbor_type
)
==
node_type_map_
.
end
())
{
std
::
string
err_msg
=
"Invalid neighbor type:"
+
std
::
to_string
(
neighbor_type
);
RETURN_STATUS_UNEXPECTED
(
err_msg
);
}
CHECK_FAIL_RETURN_UNEXPECTED
(
!
node_list
.
empty
(),
"Input node_list is empty."
);
RETURN_IF_NOT_OK
(
CheckNeighborType
(
neighbor_type
));
std
::
vector
<
std
::
vector
<
NodeIdType
>>
neighbors
;
size_t
max_neighbor_num
=
0
;
...
...
@@ -161,6 +156,14 @@ Status Graph::CheckSamplesNum(NodeIdType samples_num) {
return
Status
::
OK
();
}
Status
Graph
::
CheckNeighborType
(
NodeType
neighbor_type
)
{
if
(
node_type_map_
.
find
(
neighbor_type
)
==
node_type_map_
.
end
())
{
std
::
string
err_msg
=
"Invalid neighbor type:"
+
std
::
to_string
(
neighbor_type
);
RETURN_STATUS_UNEXPECTED
(
err_msg
);
}
return
Status
::
OK
();
}
Status
Graph
::
GetSampledNeighbors
(
const
std
::
vector
<
NodeIdType
>
&
node_list
,
const
std
::
vector
<
NodeIdType
>
&
neighbor_nums
,
const
std
::
vector
<
NodeType
>
&
neighbor_types
,
std
::
shared_ptr
<
Tensor
>
*
out
)
{
...
...
@@ -171,10 +174,7 @@ Status Graph::GetSampledNeighbors(const std::vector<NodeIdType> &node_list,
RETURN_IF_NOT_OK
(
CheckSamplesNum
(
num
));
}
for
(
const
auto
&
type
:
neighbor_types
)
{
if
(
node_type_map_
.
find
(
type
)
==
node_type_map_
.
end
())
{
std
::
string
err_msg
=
"Invalid neighbor type:"
+
std
::
to_string
(
type
);
RETURN_STATUS_UNEXPECTED
(
err_msg
);
}
RETURN_IF_NOT_OK
(
CheckNeighborType
(
type
));
}
std
::
vector
<
std
::
vector
<
NodeIdType
>>
neighbors_vec
(
node_list
.
size
());
for
(
size_t
node_idx
=
0
;
node_idx
<
node_list
.
size
();
++
node_idx
)
{
...
...
@@ -228,44 +228,36 @@ Status Graph::GetNegSampledNeighbors(const std::vector<NodeIdType> &node_list, N
NodeType
neg_neighbor_type
,
std
::
shared_ptr
<
Tensor
>
*
out
)
{
CHECK_FAIL_RETURN_UNEXPECTED
(
!
node_list
.
empty
(),
"Input node_list is empty."
);
RETURN_IF_NOT_OK
(
CheckSamplesNum
(
samples_num
));
if
(
node_type_map_
.
find
(
neg_neighbor_type
)
==
node_type_map_
.
end
())
{
std
::
string
err_msg
=
"Invalid neighbor type:"
+
std
::
to_string
(
neg_neighbor_type
);
RETURN_STATUS_UNEXPECTED
(
err_msg
);
}
RETURN_IF_NOT_OK
(
CheckNeighborType
(
neg_neighbor_type
));
std
::
vector
<
std
::
vector
<
NodeIdType
>>
neighbors_vec
;
neighbors_vec
.
resize
(
node_list
.
size
());
std
::
vector
<
std
::
vector
<
NodeIdType
>>
ne
g_ne
ighbors_vec
;
ne
g_ne
ighbors_vec
.
resize
(
node_list
.
size
());
for
(
size_t
node_idx
=
0
;
node_idx
<
node_list
.
size
();
++
node_idx
)
{
std
::
shared_ptr
<
Node
>
node
;
RETURN_IF_NOT_OK
(
GetNodeByNodeId
(
node_list
[
node_idx
],
&
node
));
std
::
vector
<
NodeIdType
>
neighbors
;
RETURN_IF_NOT_OK
(
node
->
GetAllNeighbors
(
neg_neighbor_type
,
&
neighbors
));
std
::
unordered_set
<
NodeIdType
>
exclude_node
;
std
::
unordered_set
<
NodeIdType
>
exclude_node
s
;
std
::
transform
(
neighbors
.
begin
(),
neighbors
.
end
(),
std
::
insert_iterator
<
std
::
unordered_set
<
NodeIdType
>>
(
exclude_node
,
exclude_node
.
begin
()),
std
::
insert_iterator
<
std
::
unordered_set
<
NodeIdType
>>
(
exclude_node
s
,
exclude_nodes
.
begin
()),
[](
const
NodeIdType
node
)
{
return
node
;
});
auto
itr
=
node_type_map_
.
find
(
neg_neighbor_type
);
if
(
itr
==
node_type_map_
.
end
())
{
std
::
string
err_msg
=
"Invalid node type:"
+
std
::
to_string
(
neg_neighbor_type
);
RETURN_STATUS_UNEXPECTED
(
err_msg
);
const
std
::
vector
<
NodeIdType
>
&
all_nodes
=
node_type_map_
[
neg_neighbor_type
];
neg_neighbors_vec
[
node_idx
].
emplace_back
(
node
->
id
());
if
(
all_nodes
.
size
()
>
exclude_nodes
.
size
())
{
while
(
neg_neighbors_vec
[
node_idx
].
size
()
<
samples_num
+
1
)
{
RETURN_IF_NOT_OK
(
NegativeSample
(
all_nodes
,
exclude_nodes
,
samples_num
-
neg_neighbors_vec
[
node_idx
].
size
(),
&
neg_neighbors_vec
[
node_idx
]));
}
}
else
{
neighbors_vec
[
node_idx
].
emplace_back
(
node
->
id
());
if
(
itr
->
second
.
size
()
>
exclude_node
.
size
())
{
while
(
neighbors_vec
[
node_idx
].
size
()
<
samples_num
+
1
)
{
RETURN_IF_NOT_OK
(
NegativeSample
(
itr
->
second
,
exclude_node
,
samples_num
-
neighbors_vec
[
node_idx
].
size
(),
&
neighbors_vec
[
node_idx
]));
}
}
else
{
MS_LOG
(
DEBUG
)
<<
"There are no negative neighbors. node_id:"
<<
node
->
id
()
<<
" neg_neighbor_type:"
<<
neg_neighbor_type
;
// If there are no negative neighbors, they are filled with kDefaultNodeId
for
(
int32_t
i
=
0
;
i
<
samples_num
;
++
i
)
{
neighbors_vec
[
node_idx
].
emplace_back
(
kDefaultNodeId
);
}
MS_LOG
(
DEBUG
)
<<
"There are no negative neighbors. node_id:"
<<
node
->
id
()
<<
" neg_neighbor_type:"
<<
neg_neighbor_type
;
// If there are no negative neighbors, they are filled with kDefaultNodeId
for
(
int32_t
i
=
0
;
i
<
samples_num
;
++
i
)
{
neg_neighbors_vec
[
node_idx
].
emplace_back
(
kDefaultNodeId
);
}
}
}
RETURN_IF_NOT_OK
(
CreateTensorByVector
<
NodeIdType
>
(
neighbors_vec
,
DataType
(
DataType
::
DE_INT32
),
out
));
RETURN_IF_NOT_OK
(
CreateTensorByVector
<
NodeIdType
>
(
ne
g_ne
ighbors_vec
,
DataType
(
DataType
::
DE_INT32
),
out
));
return
Status
::
OK
();
}
...
...
@@ -280,8 +272,19 @@ Status Graph::RandomWalk(const std::vector<NodeIdType> &node_list, const std::ve
}
Status
Graph
::
GetNodeDefaultFeature
(
FeatureType
feature_type
,
std
::
shared_ptr
<
Feature
>
*
out_feature
)
{
auto
itr
=
default_feature_map_
.
find
(
feature_type
);
if
(
itr
==
default_feature_map_
.
end
())
{
auto
itr
=
default_node_feature_map_
.
find
(
feature_type
);
if
(
itr
==
default_node_feature_map_
.
end
())
{
std
::
string
err_msg
=
"Invalid feature type:"
+
std
::
to_string
(
feature_type
);
RETURN_STATUS_UNEXPECTED
(
err_msg
);
}
else
{
*
out_feature
=
itr
->
second
;
}
return
Status
::
OK
();
}
Status
Graph
::
GetEdgeDefaultFeature
(
FeatureType
feature_type
,
std
::
shared_ptr
<
Feature
>
*
out_feature
)
{
auto
itr
=
default_edge_feature_map_
.
find
(
feature_type
);
if
(
itr
==
default_edge_feature_map_
.
end
())
{
std
::
string
err_msg
=
"Invalid feature type:"
+
std
::
to_string
(
feature_type
);
RETURN_STATUS_UNEXPECTED
(
err_msg
);
}
else
{
...
...
@@ -295,7 +298,7 @@ Status Graph::GetNodeFeature(const std::shared_ptr<Tensor> &nodes, const std::ve
if
(
!
nodes
||
nodes
->
Size
()
==
0
)
{
RETURN_STATUS_UNEXPECTED
(
"Input nodes is empty"
);
}
CHECK_FAIL_RETURN_UNEXPECTED
(
!
feature_types
.
empty
(),
"Inpu
de
feature_types is empty"
);
CHECK_FAIL_RETURN_UNEXPECTED
(
!
feature_types
.
empty
(),
"Inpu
t
feature_types is empty"
);
TensorRow
tensors
;
for
(
const
auto
&
f_type
:
feature_types
)
{
std
::
shared_ptr
<
Feature
>
default_feature
;
...
...
@@ -340,6 +343,45 @@ Status Graph::GetNodeFeature(const std::shared_ptr<Tensor> &nodes, const std::ve
Status
Graph
::
GetEdgeFeature
(
const
std
::
shared_ptr
<
Tensor
>
&
edges
,
const
std
::
vector
<
FeatureType
>
&
feature_types
,
TensorRow
*
out
)
{
if
(
!
edges
||
edges
->
Size
()
==
0
)
{
RETURN_STATUS_UNEXPECTED
(
"Input edges is empty"
);
}
CHECK_FAIL_RETURN_UNEXPECTED
(
!
feature_types
.
empty
(),
"Input feature_types is empty"
);
TensorRow
tensors
;
for
(
const
auto
&
f_type
:
feature_types
)
{
std
::
shared_ptr
<
Feature
>
default_feature
;
// If no feature can be obtained, fill in the default value
RETURN_IF_NOT_OK
(
GetEdgeDefaultFeature
(
f_type
,
&
default_feature
));
TensorShape
shape
(
default_feature
->
Value
()
->
shape
());
auto
shape_vec
=
edges
->
shape
().
AsVector
();
dsize_t
size
=
std
::
accumulate
(
shape_vec
.
begin
(),
shape_vec
.
end
(),
1
,
std
::
multiplies
<
dsize_t
>
());
shape
=
shape
.
PrependDim
(
size
);
std
::
shared_ptr
<
Tensor
>
fea_tensor
;
RETURN_IF_NOT_OK
(
Tensor
::
CreateTensor
(
&
fea_tensor
,
TensorImpl
::
kFlexible
,
shape
,
default_feature
->
Value
()
->
type
(),
nullptr
));
dsize_t
index
=
0
;
for
(
auto
edge_itr
=
edges
->
begin
<
EdgeIdType
>
();
edge_itr
!=
edges
->
end
<
EdgeIdType
>
();
++
edge_itr
)
{
std
::
shared_ptr
<
Edge
>
edge
;
RETURN_IF_NOT_OK
(
GetEdgeByEdgeId
(
*
edge_itr
,
&
edge
));
std
::
shared_ptr
<
Feature
>
feature
;
if
(
!
edge
->
GetFeatures
(
f_type
,
&
feature
).
IsOk
())
{
feature
=
default_feature
;
}
RETURN_IF_NOT_OK
(
fea_tensor
->
InsertTensor
({
index
},
feature
->
Value
()));
index
++
;
}
TensorShape
reshape
(
edges
->
shape
());
for
(
auto
s
:
default_feature
->
Value
()
->
shape
().
AsVector
())
{
reshape
=
reshape
.
AppendDim
(
s
);
}
RETURN_IF_NOT_OK
(
fea_tensor
->
Reshape
(
reshape
));
fea_tensor
->
Squeeze
();
tensors
.
push_back
(
fea_tensor
);
}
*
out
=
std
::
move
(
tensors
);
return
Status
::
OK
();
}
...
...
@@ -405,7 +447,8 @@ Status Graph::LoadNodeAndEdge() {
RETURN_IF_NOT_OK
(
gl
.
InitAndLoad
());
// get all maps
RETURN_IF_NOT_OK
(
gl
.
GetNodesAndEdges
(
&
node_id_map_
,
&
edge_id_map_
,
&
node_type_map_
,
&
edge_type_map_
,
&
node_feature_map_
,
&
edge_feature_map_
,
&
default_feature_map_
));
&
node_feature_map_
,
&
edge_feature_map_
,
&
default_node_feature_map_
,
&
default_edge_feature_map_
));
return
Status
::
OK
();
}
...
...
@@ -420,18 +463,33 @@ Status Graph::GetNodeByNodeId(NodeIdType id, std::shared_ptr<Node> *node) {
return
Status
::
OK
();
}
Status
Graph
::
GetEdgeByEdgeId
(
EdgeIdType
id
,
std
::
shared_ptr
<
Edge
>
*
edge
)
{
auto
itr
=
edge_id_map_
.
find
(
id
);
if
(
itr
==
edge_id_map_
.
end
())
{
std
::
string
err_msg
=
"Invalid edge id:"
+
std
::
to_string
(
id
);
RETURN_STATUS_UNEXPECTED
(
err_msg
);
}
else
{
*
edge
=
itr
->
second
;
}
return
Status
::
OK
();
}
Graph
::
RandomWalkBase
::
RandomWalkBase
(
Graph
*
graph
)
:
graph_
(
graph
),
step_home_param_
(
1.0
),
step_away_param_
(
1.0
),
default_node_
(
-
1
),
num_walks_
(
1
),
num_workers_
(
1
)
{}
Status
Graph
::
RandomWalkBase
::
Build
(
const
std
::
vector
<
NodeIdType
>
&
node_list
,
const
std
::
vector
<
NodeType
>
&
meta_path
,
float
step_home_param
,
float
step_away_param
,
const
NodeIdType
default_node
,
int32_t
num_walks
,
int32_t
num_workers
)
{
CHECK_FAIL_RETURN_UNEXPECTED
(
!
node_list
.
empty
(),
"Input node_list is empty."
);
node_list_
=
node_list
;
if
(
meta_path
.
empty
()
||
meta_path
.
size
()
>
kMaxNumWalks
)
{
std
::
string
err_msg
=
"Failed, meta path required between 1 and "
+
std
::
to_string
(
kMaxNumWalks
)
+
". The size of input path is "
+
std
::
to_string
(
meta_path
.
size
());
RETURN_STATUS_UNEXPECTED
(
err_msg
);
}
for
(
const
auto
&
type
:
meta_path
)
{
RETURN_IF_NOT_OK
(
graph_
->
CheckNeighborType
(
type
));
}
meta_path_
=
meta_path
;
if
(
step_home_param
<
kGnnEpsilon
||
step_away_param
<
kGnnEpsilon
)
{
std
::
string
err_msg
=
"Failed, step_home_param and step_away_param required greater than "
+
...
...
@@ -500,15 +558,10 @@ Status Graph::RandomWalkBase::Node2vecWalk(const NodeIdType &start_node, std::ve
}
Status
Graph
::
RandomWalkBase
::
SimulateWalk
(
std
::
vector
<
std
::
vector
<
NodeIdType
>>
*
walks
)
{
// Repeatedly simulate random walks from each node
std
::
vector
<
uint32_t
>
permutation
(
node_list_
.
size
());
std
::
iota
(
permutation
.
begin
(),
permutation
.
end
(),
0
);
for
(
int32_t
i
=
0
;
i
<
num_walks_
;
i
++
)
{
unsigned
seed
=
std
::
chrono
::
system_clock
::
now
().
time_since_epoch
().
count
();
std
::
shuffle
(
permutation
.
begin
(),
permutation
.
end
(),
std
::
default_random_engine
(
seed
));
for
(
const
auto
&
i_perm
:
permutation
)
{
for
(
const
auto
&
node
:
node_list_
)
{
std
::
vector
<
NodeIdType
>
walk
;
RETURN_IF_NOT_OK
(
Node2vecWalk
(
node
_list_
[
i_perm
]
,
&
walk
));
RETURN_IF_NOT_OK
(
Node2vecWalk
(
node
,
&
walk
));
walks
->
push_back
(
walk
);
}
}
...
...
mindspore/ccsrc/dataset/engine/gnn/graph.h
浏览文件 @
54145604
...
...
@@ -211,12 +211,24 @@ class Graph {
// @return Status - The error code return
Status
GetNodeDefaultFeature
(
FeatureType
feature_type
,
std
::
shared_ptr
<
Feature
>
*
out_feature
);
// Get the default feature of a edge
// @param FeatureType feature_type -
// @param std::shared_ptr<Feature> *out_feature - Returned feature
// @return Status - The error code return
Status
GetEdgeDefaultFeature
(
FeatureType
feature_type
,
std
::
shared_ptr
<
Feature
>
*
out_feature
);
// Find node object using node id
// @param NodeIdType id -
// @param std::shared_ptr<Node> *node - Returned node object
// @return Status - The error code return
Status
GetNodeByNodeId
(
NodeIdType
id
,
std
::
shared_ptr
<
Node
>
*
node
);
// Find edge object using edge id
// @param EdgeIdType id -
// @param std::shared_ptr<Node> *edge - Returned edge object
// @return Status - The error code return
Status
GetEdgeByEdgeId
(
EdgeIdType
id
,
std
::
shared_ptr
<
Edge
>
*
edge
);
// Negative sampling
// @param std::vector<NodeIdType> &input_data - The data set to be sampled
// @param std::unordered_set<NodeIdType> &exclude_data - Data to be excluded
...
...
@@ -228,6 +240,8 @@ class Graph {
Status
CheckSamplesNum
(
NodeIdType
samples_num
);
Status
CheckNeighborType
(
NodeType
neighbor_type
);
std
::
string
dataset_file_
;
int32_t
num_workers_
;
// The number of worker threads
std
::
mt19937
rnd_
;
...
...
@@ -242,7 +256,8 @@ class Graph {
std
::
unordered_map
<
NodeType
,
std
::
unordered_set
<
FeatureType
>>
node_feature_map_
;
std
::
unordered_map
<
EdgeType
,
std
::
unordered_set
<
FeatureType
>>
edge_feature_map_
;
std
::
unordered_map
<
FeatureType
,
std
::
shared_ptr
<
Feature
>>
default_feature_map_
;
std
::
unordered_map
<
FeatureType
,
std
::
shared_ptr
<
Feature
>>
default_node_feature_map_
;
std
::
unordered_map
<
FeatureType
,
std
::
shared_ptr
<
Feature
>>
default_edge_feature_map_
;
};
}
// namespace gnn
}
// namespace dataset
...
...
mindspore/ccsrc/dataset/engine/gnn/graph_loader.cc
浏览文件 @
54145604
...
...
@@ -41,7 +41,8 @@ GraphLoader::GraphLoader(std::string mr_filepath, int32_t num_workers)
Status
GraphLoader
::
GetNodesAndEdges
(
NodeIdMap
*
n_id_map
,
EdgeIdMap
*
e_id_map
,
NodeTypeMap
*
n_type_map
,
EdgeTypeMap
*
e_type_map
,
NodeFeatureMap
*
n_feature_map
,
EdgeFeatureMap
*
e_feature_map
,
DefaultFeatureMap
*
default_feature_map
)
{
EdgeFeatureMap
*
e_feature_map
,
DefaultNodeFeatureMap
*
default_node_feature_map
,
DefaultEdgeFeatureMap
*
default_edge_feature_map
)
{
for
(
std
::
deque
<
std
::
shared_ptr
<
Node
>>
&
dq
:
n_deques_
)
{
while
(
dq
.
empty
()
==
false
)
{
std
::
shared_ptr
<
Node
>
node_ptr
=
dq
.
front
();
...
...
@@ -70,7 +71,7 @@ Status GraphLoader::GetNodesAndEdges(NodeIdMap *n_id_map, EdgeIdMap *e_id_map, N
for
(
auto
&
itr
:
*
n_type_map
)
itr
.
second
.
shrink_to_fit
();
for
(
auto
&
itr
:
*
e_type_map
)
itr
.
second
.
shrink_to_fit
();
MergeFeatureMaps
(
n_feature_map
,
e_feature_map
,
default_feature_map
);
MergeFeatureMaps
(
n_feature_map
,
e_feature_map
,
default_
node_feature_map
,
default_edge_
feature_map
);
return
Status
::
OK
();
}
...
...
@@ -81,7 +82,8 @@ Status GraphLoader::InitAndLoad() {
e_deques_
.
resize
(
num_workers_
);
n_feature_maps_
.
resize
(
num_workers_
);
e_feature_maps_
.
resize
(
num_workers_
);
default_feature_maps_
.
resize
(
num_workers_
);
default_node_feature_maps_
.
resize
(
num_workers_
);
default_edge_feature_maps_
.
resize
(
num_workers_
);
TaskGroup
vg
;
shard_reader_
=
std
::
make_unique
<
ShardReader
>
();
...
...
@@ -109,7 +111,7 @@ Status GraphLoader::InitAndLoad() {
Status
GraphLoader
::
LoadNode
(
const
std
::
vector
<
uint8_t
>
&
col_blob
,
const
mindrecord
::
json
&
col_jsn
,
std
::
shared_ptr
<
Node
>
*
node
,
NodeFeatureMap
*
feature_map
,
DefaultFeatureMap
*
default_feature
)
{
Default
Node
FeatureMap
*
default_feature
)
{
NodeIdType
node_id
=
col_jsn
[
"first_id"
];
NodeType
node_type
=
static_cast
<
NodeType
>
(
col_jsn
[
"type"
]);
(
*
node
)
=
std
::
make_shared
<
LocalNode
>
(
node_id
,
node_type
);
...
...
@@ -133,7 +135,7 @@ Status GraphLoader::LoadNode(const std::vector<uint8_t> &col_blob, const mindrec
Status
GraphLoader
::
LoadEdge
(
const
std
::
vector
<
uint8_t
>
&
col_blob
,
const
mindrecord
::
json
&
col_jsn
,
std
::
shared_ptr
<
Edge
>
*
edge
,
EdgeFeatureMap
*
feature_map
,
DefaultFeatureMap
*
default_feature
)
{
Default
Edge
FeatureMap
*
default_feature
)
{
EdgeIdType
edge_id
=
col_jsn
[
"first_id"
];
EdgeType
edge_type
=
static_cast
<
EdgeType
>
(
col_jsn
[
"type"
]);
NodeIdType
src_id
=
col_jsn
[
"second_id"
],
dst_id
=
col_jsn
[
"third_id"
];
...
...
@@ -214,13 +216,13 @@ Status GraphLoader::WorkerEntry(int32_t worker_id) {
std
::
string
attr
=
col_jsn
[
"attribute"
];
if
(
attr
==
"n"
)
{
std
::
shared_ptr
<
Node
>
node_ptr
;
RETURN_IF_NOT_OK
(
LoadNode
(
col_blob
,
col_jsn
,
&
node_ptr
,
&
(
n_feature_maps_
[
worker_id
]),
&
default
_feature_maps_
[
worker_id
]));
RETURN_IF_NOT_OK
(
LoadNode
(
col_blob
,
col_jsn
,
&
node_ptr
,
&
(
n_feature_maps_
[
worker_id
]),
&
default_node
_feature_maps_
[
worker_id
]));
n_deques_
[
worker_id
].
emplace_back
(
node_ptr
);
}
else
if
(
attr
==
"e"
)
{
std
::
shared_ptr
<
Edge
>
edge_ptr
;
RETURN_IF_NOT_OK
(
LoadEdge
(
col_blob
,
col_jsn
,
&
edge_ptr
,
&
(
e_feature_maps_
[
worker_id
]),
&
default
_feature_maps_
[
worker_id
]));
RETURN_IF_NOT_OK
(
LoadEdge
(
col_blob
,
col_jsn
,
&
edge_ptr
,
&
(
e_feature_maps_
[
worker_id
]),
&
default_edge
_feature_maps_
[
worker_id
]));
e_deques_
[
worker_id
].
emplace_back
(
edge_ptr
);
}
else
{
MS_LOG
(
WARNING
)
<<
"attribute:"
<<
attr
<<
" is neither edge nor node."
;
...
...
@@ -233,7 +235,8 @@ Status GraphLoader::WorkerEntry(int32_t worker_id) {
}
void
GraphLoader
::
MergeFeatureMaps
(
NodeFeatureMap
*
n_feature_map
,
EdgeFeatureMap
*
e_feature_map
,
DefaultFeatureMap
*
default_feature_map
)
{
DefaultNodeFeatureMap
*
default_node_feature_map
,
DefaultEdgeFeatureMap
*
default_edge_feature_map
)
{
for
(
int
wkr_id
=
0
;
wkr_id
<
num_workers_
;
wkr_id
++
)
{
for
(
auto
&
m
:
n_feature_maps_
[
wkr_id
])
{
for
(
auto
&
n
:
m
.
second
)
(
*
n_feature_map
)[
m
.
first
].
insert
(
n
);
...
...
@@ -241,8 +244,11 @@ void GraphLoader::MergeFeatureMaps(NodeFeatureMap *n_feature_map, EdgeFeatureMap
for
(
auto
&
m
:
e_feature_maps_
[
wkr_id
])
{
for
(
auto
&
n
:
m
.
second
)
(
*
e_feature_map
)[
m
.
first
].
insert
(
n
);
}
for
(
auto
&
m
:
default_feature_maps_
[
wkr_id
])
{
(
*
default_feature_map
)[
m
.
first
]
=
m
.
second
;
for
(
auto
&
m
:
default_node_feature_maps_
[
wkr_id
])
{
(
*
default_node_feature_map
)[
m
.
first
]
=
m
.
second
;
}
for
(
auto
&
m
:
default_edge_feature_maps_
[
wkr_id
])
{
(
*
default_edge_feature_map
)[
m
.
first
]
=
m
.
second
;
}
}
n_feature_maps_
.
clear
();
...
...
mindspore/ccsrc/dataset/engine/gnn/graph_loader.h
浏览文件 @
54145604
...
...
@@ -43,7 +43,8 @@ using NodeTypeMap = std::unordered_map<NodeType, std::vector<NodeIdType>>;
using
EdgeTypeMap
=
std
::
unordered_map
<
EdgeType
,
std
::
vector
<
EdgeIdType
>>
;
using
NodeFeatureMap
=
std
::
unordered_map
<
NodeType
,
std
::
unordered_set
<
FeatureType
>>
;
using
EdgeFeatureMap
=
std
::
unordered_map
<
EdgeType
,
std
::
unordered_set
<
FeatureType
>>
;
using
DefaultFeatureMap
=
std
::
unordered_map
<
FeatureType
,
std
::
shared_ptr
<
Feature
>>
;
using
DefaultNodeFeatureMap
=
std
::
unordered_map
<
FeatureType
,
std
::
shared_ptr
<
Feature
>>
;
using
DefaultEdgeFeatureMap
=
std
::
unordered_map
<
FeatureType
,
std
::
shared_ptr
<
Feature
>>
;
// this class interfaces with the underlying storage format (mindrecord)
// it returns raw nodes and edges via GetNodesAndEdges
...
...
@@ -63,7 +64,7 @@ class GraphLoader {
// random order. src_node and dst_node in Edge are node_id only with -1 as type.
// features attached to each node and edge are expected to be filled correctly
Status
GetNodesAndEdges
(
NodeIdMap
*
,
EdgeIdMap
*
,
NodeTypeMap
*
,
EdgeTypeMap
*
,
NodeFeatureMap
*
,
EdgeFeatureMap
*
,
DefaultFeatureMap
*
);
Default
NodeFeatureMap
*
,
DefaultEdge
FeatureMap
*
);
private:
//
...
...
@@ -77,19 +78,19 @@ class GraphLoader {
// @param mindrecord::json &jsn - contains raw data
// @param std::shared_ptr<Node> *node - return value
// @param NodeFeatureMap *feature_map -
// @param DefaultFeatureMap *default_feature -
// @param Default
Node
FeatureMap *default_feature -
// @return Status - the status code
Status
LoadNode
(
const
std
::
vector
<
uint8_t
>
&
blob
,
const
mindrecord
::
json
&
jsn
,
std
::
shared_ptr
<
Node
>
*
node
,
NodeFeatureMap
*
feature_map
,
DefaultFeatureMap
*
default_feature
);
NodeFeatureMap
*
feature_map
,
Default
Node
FeatureMap
*
default_feature
);
// @param std::vector<uint8_t> &blob - contains data in blob field in mindrecord
// @param mindrecord::json &jsn - contains raw data
// @param std::shared_ptr<Edge> *edge - return value, the edge ptr, edge is not yet connected
// @param FeatureMap *feature_map
// @param DefaultFeatureMap *default_feature -
// @param Default
Edge
FeatureMap *default_feature -
// @return Status - the status code
Status
LoadEdge
(
const
std
::
vector
<
uint8_t
>
&
blob
,
const
mindrecord
::
json
&
jsn
,
std
::
shared_ptr
<
Edge
>
*
edge
,
EdgeFeatureMap
*
feature_map
,
DefaultFeatureMap
*
default_feature
);
EdgeFeatureMap
*
feature_map
,
Default
Edge
FeatureMap
*
default_feature
);
// @param std::string key - column name
// @param std::vector<uint8_t> &blob - contains data in blob field in mindrecord
...
...
@@ -108,7 +109,7 @@ class GraphLoader {
std
::
shared_ptr
<
Tensor
>
*
tensor
);
// merge NodeFeatureMap and EdgeFeatureMap of each worker into 1
void
MergeFeatureMaps
(
NodeFeatureMap
*
,
EdgeFeatureMap
*
,
DefaultFeatureMap
*
);
void
MergeFeatureMaps
(
NodeFeatureMap
*
,
EdgeFeatureMap
*
,
Default
NodeFeatureMap
*
,
DefaultEdge
FeatureMap
*
);
const
int32_t
num_workers_
;
std
::
atomic_int
row_id_
;
...
...
@@ -118,7 +119,8 @@ class GraphLoader {
std
::
vector
<
std
::
deque
<
std
::
shared_ptr
<
Edge
>>>
e_deques_
;
std
::
vector
<
NodeFeatureMap
>
n_feature_maps_
;
std
::
vector
<
EdgeFeatureMap
>
e_feature_maps_
;
std
::
vector
<
DefaultFeatureMap
>
default_feature_maps_
;
std
::
vector
<
DefaultNodeFeatureMap
>
default_node_feature_maps_
;
std
::
vector
<
DefaultEdgeFeatureMap
>
default_edge_feature_maps_
;
const
std
::
vector
<
std
::
string
>
keys_
;
};
}
// namespace gnn
...
...
mindspore/dataset/engine/graphdata.py
浏览文件 @
54145604
...
...
@@ -22,7 +22,8 @@ from mindspore._c_dataengine import Tensor
from
.validators
import
check_gnn_graphdata
,
check_gnn_get_all_nodes
,
check_gnn_get_all_edges
,
\
check_gnn_get_nodes_from_edges
,
check_gnn_get_all_neighbors
,
check_gnn_get_sampled_neighbors
,
\
check_gnn_get_neg_sampled_neighbors
,
check_gnn_get_node_feature
,
check_gnn_random_walk
check_gnn_get_neg_sampled_neighbors
,
check_gnn_get_node_feature
,
check_gnn_get_edge_feature
,
\
check_gnn_random_walk
class
GraphData
:
...
...
@@ -127,7 +128,13 @@ class GraphData:
@
check_gnn_get_sampled_neighbors
def
get_sampled_neighbors
(
self
,
node_list
,
neighbor_nums
,
neighbor_types
):
"""
Get sampled neighbor information, maximum support 6-hop sampling.
Get sampled neighbor information.
The api supports multi-hop neighbor sampling. That is, the previous sampling result is used as the input of
next-hop sampling. A maximum of 6-hop are allowed.
The sampling result is tiled into a list in the format of [input node, 1-hop sampling result,
2-hop samling result ...]
Args:
node_list (list or numpy.ndarray): The given list of nodes.
...
...
@@ -207,6 +214,35 @@ class GraphData:
Tensor
(
node_list
),
feature_types
)]
@
check_gnn_get_edge_feature
def
get_edge_feature
(
self
,
edge_list
,
feature_types
):
"""
Get `feature_types` feature of the edges in `edge_list`.
Args:
edge_list (list or numpy.ndarray): The given list of edges.
feature_types (list or ndarray): The given list of feature types.
Returns:
numpy.ndarray: array of features.
Examples:
>>> import mindspore.dataset as ds
>>> data_graph = ds.GraphData('dataset_file', 2)
>>> edges = data_graph.get_all_edges(0)
>>> features = data_graph.get_edge_feature(edges, [1])
Raises:
TypeError: If `edge_list` is not list or ndarray.
TypeError: If `feature_types` is not list or ndarray.
"""
if
isinstance
(
edge_list
,
list
):
edge_list
=
np
.
array
(
edge_list
,
dtype
=
np
.
int32
)
return
[
t
.
as_array
()
for
t
in
self
.
_graph
.
get_edge_feature
(
Tensor
(
edge_list
),
feature_types
)]
def
graph_info
(
self
):
"""
Get the meta information of the graph, including the number of nodes, the type of nodes,
...
...
mindspore/dataset/engine/validators.py
浏览文件 @
54145604
...
...
@@ -816,7 +816,7 @@ def check_gnn_graphdata(method):
check_file
(
dataset_file
)
if
num_parallel_workers
is
not
None
:
type_check
(
num_parallel_workers
,
(
int
,),
"num_parallel_workers"
)
check_num_parallel_workers
(
num_parallel_workers
)
return
method
(
self
,
*
args
,
**
kwargs
)
return
new_method
...
...
@@ -989,6 +989,28 @@ def check_gnn_get_node_feature(method):
return
new_method
def
check_gnn_get_edge_feature
(
method
):
"""A wrapper that wrap a parameter checker to the GNN `get_edge_feature` function."""
@
wraps
(
method
)
def
new_method
(
self
,
*
args
,
**
kwargs
):
[
edge_list
,
feature_types
],
_
=
parse_user_args
(
method
,
*
args
,
**
kwargs
)
type_check
(
edge_list
,
(
list
,
np
.
ndarray
),
"edge_list"
)
if
isinstance
(
edge_list
,
list
):
check_aligned_list
(
edge_list
,
'edge_list'
,
int
)
elif
isinstance
(
edge_list
,
np
.
ndarray
):
if
not
edge_list
.
dtype
==
np
.
int32
:
raise
TypeError
(
"Each member in {0} should be of type int32. Got {1}."
.
format
(
edge_list
,
edge_list
.
dtype
))
check_gnn_list_or_ndarray
(
feature_types
,
'feature_types'
)
return
method
(
self
,
*
args
,
**
kwargs
)
return
new_method
def
check_numpyslicesdataset
(
method
):
"""A wrapper that wrap a parameter checker to the original Dataset(NumpySlicesDataset)."""
...
...
tests/ut/cpp/dataset/gnn_graph_test.cc
浏览文件 @
54145604
...
...
@@ -49,9 +49,10 @@ TEST_F(MindDataTestGNNGraph, TestGraphLoader) {
EdgeTypeMap
e_type_map
;
NodeFeatureMap
n_feature_map
;
EdgeFeatureMap
e_feature_map
;
DefaultFeatureMap
default_feature_map
;
DefaultNodeFeatureMap
default_node_feature_map
;
DefaultEdgeFeatureMap
default_edge_feature_map
;
EXPECT_TRUE
(
gl
.
GetNodesAndEdges
(
&
n_id_map
,
&
e_id_map
,
&
n_type_map
,
&
e_type_map
,
&
n_feature_map
,
&
e_feature_map
,
&
default_feature_map
)
&
default_
node_feature_map
,
&
default_edge_
feature_map
)
.
IsOk
());
EXPECT_EQ
(
n_id_map
.
size
(),
20
);
EXPECT_EQ
(
e_id_map
.
size
(),
40
);
...
...
@@ -119,6 +120,17 @@ TEST_F(MindDataTestGNNGraph, TestGetSampledNeighbors) {
std
::
transform
(
edges
->
begin
<
EdgeIdType
>
(),
edges
->
end
<
EdgeIdType
>
(),
edge_list
.
begin
(),
[](
const
EdgeIdType
edge
)
{
return
edge
;
});
TensorRow
edge_features
;
s
=
graph
.
GetEdgeFeature
(
edges
,
meta_info
.
edge_feature_type
,
&
edge_features
);
EXPECT_TRUE
(
s
.
IsOk
());
EXPECT_TRUE
(
edge_features
[
0
]
->
ToString
()
==
"Tensor (shape: <40>, Type: int32)
\n
"
"[0,1,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0]"
);
EXPECT_TRUE
(
edge_features
[
1
]
->
ToString
()
==
"Tensor (shape: <40>, Type: float32)
\n
"
"[0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1,1.1,1.2,1.3,1.4,1.5,1.6,1.7,1.8,1.9,2,2.1,2.2,2.3,2.4,2.5,2.6,2."
"7,2.8,2.9,3,3.1,3.2,3.3,3.4,3.5,3.6,3.7,3.8,3.9,4]"
);
std
::
shared_ptr
<
Tensor
>
nodes
;
s
=
graph
.
GetNodesFromEdges
(
edge_list
,
&
nodes
);
EXPECT_TRUE
(
s
.
IsOk
());
...
...
tests/ut/data/mindrecord/testGraphData/testdata
浏览文件 @
54145604
无法预览此类型文件
tests/ut/data/mindrecord/testGraphData/testdata.db
浏览文件 @
54145604
无法预览此类型文件
tests/ut/python/dataset/test_graphdata.py
浏览文件 @
54145604
...
...
@@ -125,7 +125,7 @@ def test_graphdata_graphinfo():
assert
graph_info
[
'node_num'
]
==
{
1
:
10
,
2
:
10
}
assert
graph_info
[
'edge_num'
]
==
{
0
:
40
}
assert
graph_info
[
'node_feature_type'
]
==
[
1
,
2
,
3
,
4
]
assert
graph_info
[
'edge_feature_type'
]
==
[]
assert
graph_info
[
'edge_feature_type'
]
==
[
1
,
2
]
class
RandomBatchedSampler
(
ds
.
Sampler
):
...
...
@@ -204,7 +204,6 @@ def test_graphdata_randomwalkdefault():
logger
.
info
(
'test randomwalk with default parameters.
\n
'
)
g
=
ds
.
GraphData
(
SOCIAL_DATA_FILE
,
1
)
nodes
=
g
.
get_all_nodes
(
1
)
print
(
len
(
nodes
))
assert
len
(
nodes
)
==
33
meta_path
=
[
1
for
_
in
range
(
39
)]
...
...
@@ -219,7 +218,6 @@ def test_graphdata_randomwalk():
logger
.
info
(
'test random walk with given parameters.
\n
'
)
g
=
ds
.
GraphData
(
SOCIAL_DATA_FILE
,
1
)
nodes
=
g
.
get_all_nodes
(
1
)
print
(
len
(
nodes
))
assert
len
(
nodes
)
==
33
meta_path
=
[
1
for
_
in
range
(
39
)]
...
...
@@ -227,6 +225,18 @@ def test_graphdata_randomwalk():
assert
walks
.
shape
==
(
33
,
40
)
def
test_graphdata_getedgefeature
():
"""
Test get edge feature
"""
logger
.
info
(
'test get_edge_feature.
\n
'
)
g
=
ds
.
GraphData
(
DATASET_FILE
)
edges
=
g
.
get_all_edges
(
0
)
features
=
g
.
get_edge_feature
(
edges
,
[
1
,
2
])
assert
features
[
0
].
shape
==
(
40
,)
assert
features
[
1
].
shape
==
(
40
,)
if
__name__
==
'__main__'
:
test_graphdata_getfullneighbor
()
test_graphdata_getnodefeature_input_check
()
...
...
@@ -236,3 +246,4 @@ if __name__ == '__main__':
test_graphdata_generatordataset
()
test_graphdata_randomwalkdefault
()
test_graphdata_randomwalk
()
test_graphdata_getedgefeature
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录