Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PGL
提交
756edcf6
P
PGL
项目概览
PaddlePaddle
/
PGL
通知
76
Star
4
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
11
列表
看板
标记
里程碑
合并请求
1
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PGL
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
11
Issue
11
列表
看板
标记
里程碑
合并请求
1
合并请求
1
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
756edcf6
编写于
4月 27, 2020
作者:
W
Webbley
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
remove unused file
上级
346f3c6d
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
2 addition
and
415 deletion
+2
-415
ogb_examples/graphproppred/main_pgl.py
ogb_examples/graphproppred/main_pgl.py
+0
-197
ogb_examples/graphproppred/mol/README.md
ogb_examples/graphproppred/mol/README.md
+2
-2
ogb_examples/graphproppred/ogbg_ppi.py
ogb_examples/graphproppred/ogbg_ppi.py
+0
-216
未找到文件。
ogb_examples/graphproppred/main_pgl.py
已删除
100644 → 0
浏览文件 @
346f3c6d
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""test ogb
"""
import
argparse
import
ssl
ssl
.
_create_default_https_context
=
ssl
.
_create_unverified_context
# SSL
import
torch
import
pgl
import
numpy
as
np
import
paddle.fluid
as
fluid
from
pgl.contrib.ogb.graphproppred.dataset_pgl
import
PglGraphPropPredDataset
from
pgl.utils
import
paddle_helper
from
ogb.graphproppred
import
Evaluator
from
pgl.contrib.ogb.graphproppred.mol_encoder
import
AtomEncoder
,
BondEncoder
def
train
(
exe
,
batch_size
,
graph_wrapper
,
train_program
,
splitted_idx
,
dataset
,
evaluator
,
fetch_loss
,
fetch_pred
):
"""Train"""
graphs
,
labels
=
dataset
[
splitted_idx
[
"train"
]]
perm
=
np
.
arange
(
0
,
len
(
graphs
))
np
.
random
.
shuffle
(
perm
)
start_batch
=
0
batch_no
=
0
pred_output
=
np
.
zeros_like
(
labels
,
dtype
=
"float32"
)
while
start_batch
<
len
(
perm
):
batch_index
=
perm
[
start_batch
:
start_batch
+
batch_size
]
start_batch
+=
batch_size
batch_graph
=
pgl
.
graph
.
MultiGraph
(
graphs
[
batch_index
])
batch_label
=
labels
[
batch_index
]
batch_valid
=
(
batch_label
==
batch_label
).
astype
(
"float32"
)
batch_label
=
np
.
nan_to_num
(
batch_label
).
astype
(
"float32"
)
feed_dict
=
graph_wrapper
.
to_feed
(
batch_graph
)
feed_dict
[
"label"
]
=
batch_label
feed_dict
[
"weight"
]
=
batch_valid
loss
,
pred
=
exe
.
run
(
train_program
,
feed
=
feed_dict
,
fetch_list
=
[
fetch_loss
,
fetch_pred
])
if
batch_no
%
100
==
0
:
print
(
"loss: %s"
%
loss
[
0
])
pred_output
[
batch_index
]
=
pred
batch_no
+=
1
print
(
"train"
,
evaluator
.
eval
({
"y_true"
:
labels
,
"y_pred"
:
pred_output
}))
def
evaluate
(
exe
,
batch_size
,
graph_wrapper
,
val_program
,
splitted_idx
,
dataset
,
mode
,
evaluator
,
fetch_pred
):
"""Eval"""
graphs
,
labels
=
dataset
[
splitted_idx
[
mode
]]
perm
=
np
.
arange
(
0
,
len
(
graphs
))
start_batch
=
0
batch_no
=
0
pred_output
=
np
.
zeros_like
(
labels
,
dtype
=
"float32"
)
while
start_batch
<
len
(
perm
):
batch_index
=
perm
[
start_batch
:
start_batch
+
batch_size
]
start_batch
+=
batch_size
batch_graph
=
pgl
.
graph
.
MultiGraph
(
graphs
[
batch_index
])
feed_dict
=
graph_wrapper
.
to_feed
(
batch_graph
)
pred
=
exe
.
run
(
val_program
,
feed
=
feed_dict
,
fetch_list
=
[
fetch_pred
])
pred_output
[
batch_index
]
=
pred
[
0
]
batch_no
+=
1
print
(
mode
,
evaluator
.
eval
({
"y_true"
:
labels
,
"y_pred"
:
pred_output
}))
def
send_func
(
src_feat
,
dst_feat
,
edge_feat
):
"""Send"""
return
src_feat
[
"h"
]
+
edge_feat
[
"h"
]
class
GNNModel
(
object
):
"""GNNModel"""
def
__init__
(
self
,
name
,
emb_dim
,
num_task
,
num_layers
):
self
.
num_task
=
num_task
self
.
emb_dim
=
emb_dim
self
.
num_layers
=
num_layers
self
.
name
=
name
self
.
atom_encoder
=
AtomEncoder
(
name
=
name
,
emb_dim
=
emb_dim
)
self
.
bond_encoder
=
BondEncoder
(
name
=
name
,
emb_dim
=
emb_dim
)
def
forward
(
self
,
graph
):
"""foward"""
h_node
=
self
.
atom_encoder
(
graph
.
node_feat
[
'feat'
])
h_edge
=
self
.
bond_encoder
(
graph
.
edge_feat
[
'feat'
])
for
layer
in
range
(
self
.
num_layers
):
msg
=
graph
.
send
(
send_func
,
nfeat_list
=
[(
"h"
,
h_node
)],
efeat_list
=
[(
"h"
,
h_edge
)])
h_node
=
graph
.
recv
(
msg
,
'sum'
)
+
h_node
h_node
=
fluid
.
layers
.
fc
(
h_node
,
size
=
self
.
emb_dim
,
name
=
self
.
name
+
'_%s'
%
layer
,
act
=
"relu"
)
graph_nodes
=
pgl
.
layers
.
graph_pooling
(
graph
,
h_node
,
"average"
)
graph_pred
=
fluid
.
layers
.
fc
(
graph_nodes
,
self
.
num_task
,
name
=
"final"
)
return
graph_pred
def
main
():
"""main
"""
# Training settings
parser
=
argparse
.
ArgumentParser
(
description
=
'Graph Dataset'
)
parser
.
add_argument
(
'--use_cuda'
,
action
=
'store_true'
)
parser
.
add_argument
(
'--epochs'
,
type
=
int
,
default
=
100
,
help
=
'number of epochs to train (default: 100)'
)
parser
.
add_argument
(
'--dataset'
,
type
=
str
,
default
=
"ogbg-molhiv"
,
help
=
'dataset name (default: proteinfunc)'
)
args
=
parser
.
parse_args
()
place
=
fluid
.
CUDAPlace
(
0
)
if
args
.
use_cuda
else
fluid
.
CPUPlace
()
### automatic dataloading and splitting
dataset
=
PglGraphPropPredDataset
(
name
=
args
.
dataset
)
splitted_idx
=
dataset
.
get_idx_split
()
### automatic evaluator. takes dataset name as input
evaluator
=
Evaluator
(
args
.
dataset
)
graph_data
,
label
=
dataset
[:
2
]
batch_graph
=
pgl
.
graph
.
MultiGraph
(
graph_data
)
graph_data
=
batch_graph
train_program
=
fluid
.
Program
()
startup_program
=
fluid
.
Program
()
test_program
=
fluid
.
Program
()
# degree normalize
graph_data
.
edge_feat
[
"feat"
]
=
graph_data
.
edge_feat
[
"feat"
].
astype
(
"int64"
)
graph_data
.
node_feat
[
"feat"
]
=
graph_data
.
node_feat
[
"feat"
].
astype
(
"int64"
)
model
=
GNNModel
(
name
=
"gnn"
,
num_task
=
dataset
.
num_tasks
,
emb_dim
=
64
,
num_layers
=
2
)
with
fluid
.
program_guard
(
train_program
,
startup_program
):
gw
=
pgl
.
graph_wrapper
.
GraphWrapper
(
"graph"
,
place
=
place
,
node_feat
=
graph_data
.
node_feat_info
(),
edge_feat
=
graph_data
.
edge_feat_info
())
pred
=
model
.
forward
(
gw
)
sigmoid_pred
=
fluid
.
layers
.
sigmoid
(
pred
)
val_program
=
train_program
.
clone
(
for_test
=
True
)
initializer
=
[]
with
fluid
.
program_guard
(
train_program
,
startup_program
):
train_label
=
fluid
.
layers
.
data
(
name
=
"label"
,
dtype
=
"float32"
,
shape
=
[
None
,
dataset
.
num_tasks
])
train_weight
=
fluid
.
layers
.
data
(
name
=
"weight"
,
dtype
=
"float32"
,
shape
=
[
None
,
dataset
.
num_tasks
])
train_loss_t
=
fluid
.
layers
.
sigmoid_cross_entropy_with_logits
(
x
=
pred
,
label
=
train_label
)
*
train_weight
train_loss_t
=
fluid
.
layers
.
reduce_sum
(
train_loss_t
)
adam
=
fluid
.
optimizer
.
Adam
(
learning_rate
=
1e-2
,
regularization
=
fluid
.
regularizer
.
L2DecayRegularizer
(
regularization_coeff
=
0.0005
))
adam
.
minimize
(
train_loss_t
)
exe
=
fluid
.
Executor
(
place
)
exe
.
run
(
startup_program
)
for
epoch
in
range
(
1
,
args
.
epochs
+
1
):
print
(
"Epoch"
,
epoch
)
train
(
exe
,
128
,
gw
,
train_program
,
splitted_idx
,
dataset
,
evaluator
,
train_loss_t
,
sigmoid_pred
)
evaluate
(
exe
,
128
,
gw
,
val_program
,
splitted_idx
,
dataset
,
"valid"
,
evaluator
,
sigmoid_pred
)
evaluate
(
exe
,
128
,
gw
,
val_program
,
splitted_idx
,
dataset
,
"test"
,
evaluator
,
sigmoid_pred
)
if
__name__
==
"__main__"
:
main
()
ogb_examples/graphproppred/mol/README.md
浏览文件 @
756edcf6
...
...
@@ -4,8 +4,8 @@
### Requirements
-
paddlpaddle 1.7.1
-
pgl 1.0.
1
-
paddlpaddle
>=
1.7.1
-
pgl 1.0.
2
-
ogb
NOTE: To install ogb that is fited for this project, run below command to install ogb
...
...
ogb_examples/graphproppred/ogbg_ppi.py
已删除
100644 → 0
浏览文件 @
346f3c6d
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""test ogb
"""
import
argparse
import
ssl
ssl
.
_create_default_https_context
=
ssl
.
_create_unverified_context
# SSL
import
torch
import
pgl
import
numpy
as
np
import
paddle.fluid
as
fluid
from
pgl.contrib.ogb.graphproppred.dataset_pgl
import
PglGraphPropPredDataset
from
pgl.utils
import
paddle_helper
from
ogb.graphproppred
import
Evaluator
from
pgl.contrib.ogb.graphproppred.mol_encoder
import
AtomEncoder
,
BondEncoder
def
train
(
exe
,
batch_size
,
graph_wrapper
,
train_program
,
splitted_idx
,
dataset
,
evaluator
,
fetch_loss
,
fetch_pred
):
"""Train"""
graphs
,
labels
=
dataset
[
splitted_idx
[
"train"
]]
perm
=
np
.
arange
(
0
,
len
(
graphs
))
np
.
random
.
shuffle
(
perm
)
start_batch
=
0
batch_no
=
0
pred_output
=
np
.
zeros_like
(
labels
,
dtype
=
"float32"
)
while
start_batch
<
len
(
perm
):
batch_index
=
perm
[
start_batch
:
start_batch
+
batch_size
]
start_batch
+=
batch_size
batch_graph
=
pgl
.
graph
.
MultiGraph
(
graphs
[
batch_index
])
batch_label
=
labels
[
batch_index
]
batch_valid
=
(
batch_label
==
batch_label
).
astype
(
"float32"
)
batch_label
=
np
.
nan_to_num
(
batch_label
).
astype
(
"float32"
)
feed_dict
=
graph_wrapper
.
to_feed
(
batch_graph
)
feed_dict
[
"label"
]
=
batch_label
feed_dict
[
"weight"
]
=
batch_valid
loss
,
pred
=
exe
.
run
(
train_program
,
feed
=
feed_dict
,
fetch_list
=
[
fetch_loss
,
fetch_pred
])
if
batch_no
%
100
==
0
:
print
(
"loss: %s"
%
loss
[
0
])
pred_output
[
batch_index
]
=
pred
batch_no
+=
1
print
(
"train"
,
evaluator
.
eval
({
"y_true"
:
labels
,
"y_pred"
:
pred_output
}))
def
evaluate
(
exe
,
batch_size
,
graph_wrapper
,
val_program
,
splitted_idx
,
dataset
,
mode
,
evaluator
,
fetch_pred
):
"""Eval"""
graphs
,
labels
=
dataset
[
splitted_idx
[
mode
]]
perm
=
np
.
arange
(
0
,
len
(
graphs
))
start_batch
=
0
batch_no
=
0
pred_output
=
np
.
zeros_like
(
labels
,
dtype
=
"float32"
)
while
start_batch
<
len
(
perm
):
batch_index
=
perm
[
start_batch
:
start_batch
+
batch_size
]
start_batch
+=
batch_size
batch_graph
=
pgl
.
graph
.
MultiGraph
(
graphs
[
batch_index
])
feed_dict
=
graph_wrapper
.
to_feed
(
batch_graph
)
pred
=
exe
.
run
(
val_program
,
feed
=
feed_dict
,
fetch_list
=
[
fetch_pred
])
pred_output
[
batch_index
]
=
pred
[
0
]
batch_no
+=
1
print
(
mode
,
evaluator
.
eval
({
"y_true"
:
labels
,
"y_pred"
:
pred_output
}))
def
send_func
(
src_feat
,
dst_feat
,
edge_feat
):
"""Send"""
return
src_feat
[
"h"
]
+
edge_feat
[
"h"
]
class
GNNModel
(
object
):
"""GNNModel"""
def
__init__
(
self
,
name
,
emb_dim
,
num_task
,
num_layers
):
self
.
num_task
=
num_task
self
.
emb_dim
=
emb_dim
self
.
num_layers
=
num_layers
self
.
name
=
name
self
.
atom_encoder
=
AtomEncoder
(
name
=
name
,
emb_dim
=
emb_dim
)
self
.
bond_encoder
=
BondEncoder
(
name
=
name
,
emb_dim
=
emb_dim
)
def
edges_encoder
(
self
,
inputs
,
name
):
outputs
=
fluid
.
layers
.
fc
(
inputs
,
size
=
self
.
emb_dim
,
param_attr
=
fluid
.
ParamAttr
(
name
=
name
))
return
outputs
def
forward
(
self
,
graph
):
"""foward"""
# h_node = self.atom_encoder(graph.node_feat['feat'])
h_node
=
fluid
.
layers
.
embedding
(
graph
.
node_feat
[
'feat'
],
size
=
[
1
,
self
.
emb_dim
])
# h_edge = self.bond_encoder(graph.edge_feat['feat'])
h_edge
=
self
.
edges_encoder
(
graph
.
edge_feat
[
'feat'
],
name
=
"edges_encoder"
)
for
layer
in
range
(
self
.
num_layers
):
msg
=
graph
.
send
(
send_func
,
nfeat_list
=
[(
"h"
,
h_node
)],
efeat_list
=
[(
"h"
,
h_edge
)])
h_node
=
graph
.
recv
(
msg
,
'sum'
)
+
h_node
h_node
=
fluid
.
layers
.
fc
(
h_node
,
size
=
self
.
emb_dim
,
name
=
self
.
name
+
'_%s'
%
layer
,
act
=
"relu"
)
graph_nodes
=
pgl
.
layers
.
graph_pooling
(
graph
,
h_node
,
"average"
)
graph_pred
=
fluid
.
layers
.
fc
(
graph_nodes
,
self
.
num_task
,
name
=
"final"
)
return
graph_pred
def
main
():
"""main
"""
# Training settings
parser
=
argparse
.
ArgumentParser
(
description
=
'Graph Dataset'
)
parser
.
add_argument
(
'--use_cuda'
,
action
=
'store_true'
)
parser
.
add_argument
(
'--epochs'
,
type
=
int
,
default
=
100
,
help
=
'number of epochs to train (default: 100)'
)
parser
.
add_argument
(
'--dataset'
,
type
=
str
,
default
=
"ogbg-molhiv"
,
help
=
'dataset name (default: proteinfunc)'
)
args
=
parser
.
parse_args
()
place
=
fluid
.
CUDAPlace
(
0
)
if
args
.
use_cuda
else
fluid
.
CPUPlace
()
### automatic dataloading and splitting
dataset
=
PglGraphPropPredDataset
(
name
=
args
.
dataset
)
for
i
in
range
(
len
(
dataset
)):
g
,
l
=
dataset
[
i
]
if
not
g
.
node_feat
:
g
.
node_feat
[
'feat'
]
=
np
.
array
(
[
0
for
_
in
range
(
g
.
num_nodes
)]).
reshape
(
-
1
,
1
)
splitted_idx
=
dataset
.
get_idx_split
()
### automatic evaluator. takes dataset name as input
evaluator
=
Evaluator
(
args
.
dataset
)
graph_data
,
label
=
dataset
[:
2
]
batch_graph
=
pgl
.
graph
.
MultiGraph
(
graph_data
)
graph_data
=
batch_graph
train_program
=
fluid
.
Program
()
startup_program
=
fluid
.
Program
()
test_program
=
fluid
.
Program
()
# degree normalize
graph_data
.
edge_feat
[
"feat"
]
=
graph_data
.
edge_feat
[
"feat"
].
astype
(
"float32"
)
graph_data
.
node_feat
[
"feat"
]
=
graph_data
.
node_feat
[
"feat"
].
astype
(
"int64"
)
model
=
GNNModel
(
name
=
"gnn"
,
num_task
=
dataset
.
num_tasks
,
emb_dim
=
64
,
num_layers
=
2
)
with
fluid
.
program_guard
(
train_program
,
startup_program
):
gw
=
pgl
.
graph_wrapper
.
GraphWrapper
(
"graph"
,
place
=
place
,
node_feat
=
graph_data
.
node_feat_info
(),
edge_feat
=
graph_data
.
edge_feat_info
())
pred
=
model
.
forward
(
gw
)
sigmoid_pred
=
fluid
.
layers
.
sigmoid
(
pred
)
val_program
=
train_program
.
clone
(
for_test
=
True
)
initializer
=
[]
with
fluid
.
program_guard
(
train_program
,
startup_program
):
train_label
=
fluid
.
layers
.
data
(
name
=
"label"
,
dtype
=
"float32"
,
shape
=
[
None
,
dataset
.
num_tasks
])
train_weight
=
fluid
.
layers
.
data
(
name
=
"weight"
,
dtype
=
"float32"
,
shape
=
[
None
,
dataset
.
num_tasks
])
train_loss_t
=
fluid
.
layers
.
sigmoid_cross_entropy_with_logits
(
x
=
pred
,
label
=
train_label
)
*
train_weight
train_loss_t
=
fluid
.
layers
.
reduce_sum
(
train_loss_t
)
adam
=
fluid
.
optimizer
.
Adam
(
learning_rate
=
1e-2
,
regularization
=
fluid
.
regularizer
.
L2DecayRegularizer
(
regularization_coeff
=
0.0005
))
adam
.
minimize
(
train_loss_t
)
exe
=
fluid
.
Executor
(
place
)
exe
.
run
(
startup_program
)
for
epoch
in
range
(
1
,
args
.
epochs
+
1
):
print
(
"Epoch"
,
epoch
)
train
(
exe
,
128
,
gw
,
train_program
,
splitted_idx
,
dataset
,
evaluator
,
train_loss_t
,
sigmoid_pred
)
evaluate
(
exe
,
128
,
gw
,
val_program
,
splitted_idx
,
dataset
,
"valid"
,
evaluator
,
sigmoid_pred
)
evaluate
(
exe
,
128
,
gw
,
val_program
,
splitted_idx
,
dataset
,
"test"
,
evaluator
,
sigmoid_pred
)
if
__name__
==
"__main__"
:
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录